aboutsummaryrefslogtreecommitdiff
path: root/sys/dev
diff options
context:
space:
mode:
authorVincenzo Maffione <vmaffione@FreeBSD.org>2018-04-12 07:20:50 +0000
committerVincenzo Maffione <vmaffione@FreeBSD.org>2018-04-12 07:20:50 +0000
commit2ff91c175eca50b7d0d9da6b31eae4109c034137 (patch)
tree15a4f8847a8cabd782f67326125c48fed4fdd27b /sys/dev
parent66def52613043a86172a2ebe6feab214258fa2fa (diff)
Notes
Diffstat (limited to 'sys/dev')
-rw-r--r--sys/dev/cxgbe/t4_netmap.c10
-rw-r--r--sys/dev/ixgbe/if_ixv.c2
-rw-r--r--sys/dev/ixl/ixl_pf_main.c2
-rw-r--r--sys/dev/ixl/ixl_txrx.c4
-rw-r--r--sys/dev/netmap/if_ptnet.c34
-rw-r--r--sys/dev/netmap/if_re_netmap.h6
-rw-r--r--sys/dev/netmap/if_vtnet_netmap.h26
-rw-r--r--sys/dev/netmap/netmap.c1073
-rw-r--r--sys/dev/netmap/netmap_freebsd.c120
-rw-r--r--sys/dev/netmap/netmap_generic.c6
-rw-r--r--sys/dev/netmap/netmap_kern.h155
-rw-r--r--sys/dev/netmap/netmap_legacy.c428
-rw-r--r--sys/dev/netmap/netmap_mem2.c564
-rw-r--r--sys/dev/netmap/netmap_mem2.h21
-rw-r--r--sys/dev/netmap/netmap_monitor.c56
-rw-r--r--sys/dev/netmap/netmap_pipe.c321
-rw-r--r--sys/dev/netmap/netmap_pt.c193
-rw-r--r--sys/dev/netmap/netmap_vale.c931
-rw-r--r--sys/dev/re/if_re.c2
19 files changed, 2580 insertions, 1374 deletions
diff --git a/sys/dev/cxgbe/t4_netmap.c b/sys/dev/cxgbe/t4_netmap.c
index 45083fb9a39bb..939f090bdcde3 100644
--- a/sys/dev/cxgbe/t4_netmap.c
+++ b/sys/dev/cxgbe/t4_netmap.c
@@ -344,7 +344,7 @@ cxgbe_netmap_on(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp,
for_each_nm_rxq(vi, i, nm_rxq) {
struct irq *irq = &sc->irq[vi->first_intr + i];
- kring = &na->rx_rings[nm_rxq->nid];
+ kring = na->rx_rings[nm_rxq->nid];
if (!nm_kring_pending_on(kring) ||
nm_rxq->iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID)
continue;
@@ -375,7 +375,7 @@ cxgbe_netmap_on(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp,
}
for_each_nm_txq(vi, i, nm_txq) {
- kring = &na->tx_rings[nm_txq->nid];
+ kring = na->tx_rings[nm_txq->nid];
if (!nm_kring_pending_on(kring) ||
nm_txq->cntxt_id != INVALID_NM_TXQ_CNTXT_ID)
continue;
@@ -427,7 +427,7 @@ cxgbe_netmap_off(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp,
for_each_nm_txq(vi, i, nm_txq) {
struct sge_qstat *spg = (void *)&nm_txq->desc[nm_txq->sidx];
- kring = &na->tx_rings[nm_txq->nid];
+ kring = na->tx_rings[nm_txq->nid];
if (!nm_kring_pending_off(kring) ||
nm_txq->cntxt_id == INVALID_NM_TXQ_CNTXT_ID)
continue;
@@ -445,7 +445,7 @@ cxgbe_netmap_off(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp,
for_each_nm_rxq(vi, i, nm_rxq) {
struct irq *irq = &sc->irq[vi->first_intr + i];
- kring = &na->rx_rings[nm_rxq->nid];
+ kring = na->rx_rings[nm_rxq->nid];
if (!nm_kring_pending_off(kring) ||
nm_rxq->iq_cntxt_id == INVALID_NM_RXQ_CNTXT_ID)
continue;
@@ -933,7 +933,7 @@ t4_nm_intr(void *arg)
struct adapter *sc = vi->pi->adapter;
struct ifnet *ifp = vi->ifp;
struct netmap_adapter *na = NA(ifp);
- struct netmap_kring *kring = &na->rx_rings[nm_rxq->nid];
+ struct netmap_kring *kring = na->rx_rings[nm_rxq->nid];
struct netmap_ring *ring = kring->ring;
struct iq_desc *d = &nm_rxq->iq_desc[nm_rxq->iq_cidx];
const void *cpl;
diff --git a/sys/dev/ixgbe/if_ixv.c b/sys/dev/ixgbe/if_ixv.c
index 3041128f3420f..a019fc80341d8 100644
--- a/sys/dev/ixgbe/if_ixv.c
+++ b/sys/dev/ixgbe/if_ixv.c
@@ -1450,7 +1450,7 @@ ixv_initialize_receive_units(if_ctx_t ctx)
*/
if (ifp->if_capenable & IFCAP_NETMAP) {
struct netmap_adapter *na = NA(ifp);
- struct netmap_kring *kring = &na->rx_rings[j];
+ struct netmap_kring *kring = na->rx_rings[j];
int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), t);
diff --git a/sys/dev/ixl/ixl_pf_main.c b/sys/dev/ixl/ixl_pf_main.c
index a5b312d0a73c5..1befa463154e1 100644
--- a/sys/dev/ixl/ixl_pf_main.c
+++ b/sys/dev/ixl/ixl_pf_main.c
@@ -2240,7 +2240,7 @@ ixl_initialize_vsi(struct ixl_vsi *vsi)
/* preserve queue */
if (vsi->ifp->if_capenable & IFCAP_NETMAP) {
struct netmap_adapter *na = NA(vsi->ifp);
- struct netmap_kring *kring = &na->rx_rings[i];
+ struct netmap_kring *kring = na->rx_rings[i];
int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
wr32(vsi->hw, I40E_QRX_TAIL(que->me), t);
} else
diff --git a/sys/dev/ixl/ixl_txrx.c b/sys/dev/ixl/ixl_txrx.c
index 3501ebc4cd508..cda3c528b9474 100644
--- a/sys/dev/ixl/ixl_txrx.c
+++ b/sys/dev/ixl/ixl_txrx.c
@@ -547,7 +547,7 @@ ixl_init_tx_ring(struct ixl_queue *que)
* netmap slot index, si
*/
if (slot) {
- int si = netmap_idx_n2k(&na->tx_rings[que->me], i);
+ int si = netmap_idx_n2k(na->tx_rings[que->me], i);
netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si));
}
#endif /* DEV_NETMAP */
@@ -1214,7 +1214,7 @@ ixl_init_rx_ring(struct ixl_queue *que)
* an mbuf, so end the block with a continue;
*/
if (slot) {
- int sj = netmap_idx_n2k(&na->rx_rings[que->me], j);
+ int sj = netmap_idx_n2k(na->rx_rings[que->me], j);
uint64_t paddr;
void *addr;
diff --git a/sys/dev/netmap/if_ptnet.c b/sys/dev/netmap/if_ptnet.c
index 1805a7f31e48d..b6059dc55cfad 100644
--- a/sys/dev/netmap/if_ptnet.c
+++ b/sys/dev/netmap/if_ptnet.c
@@ -210,8 +210,8 @@ static int ptnet_irqs_init(struct ptnet_softc *sc);
static void ptnet_irqs_fini(struct ptnet_softc *sc);
static uint32_t ptnet_nm_ptctl(if_t ifp, uint32_t cmd);
-static int ptnet_nm_config(struct netmap_adapter *na, unsigned *txr,
- unsigned *txd, unsigned *rxr, unsigned *rxd);
+static int ptnet_nm_config(struct netmap_adapter *na,
+ struct nm_config_info *info);
static void ptnet_update_vnet_hdr(struct ptnet_softc *sc);
static int ptnet_nm_register(struct netmap_adapter *na, int onoff);
static int ptnet_nm_txsync(struct netmap_kring *kring, int flags);
@@ -1104,18 +1104,20 @@ ptnet_nm_ptctl(if_t ifp, uint32_t cmd)
}
static int
-ptnet_nm_config(struct netmap_adapter *na, unsigned *txr, unsigned *txd,
- unsigned *rxr, unsigned *rxd)
+ptnet_nm_config(struct netmap_adapter *na, struct nm_config_info *info)
{
struct ptnet_softc *sc = if_getsoftc(na->ifp);
- *txr = bus_read_4(sc->iomem, PTNET_IO_NUM_TX_RINGS);
- *rxr = bus_read_4(sc->iomem, PTNET_IO_NUM_RX_RINGS);
- *txd = bus_read_4(sc->iomem, PTNET_IO_NUM_TX_SLOTS);
- *rxd = bus_read_4(sc->iomem, PTNET_IO_NUM_RX_SLOTS);
+ info->num_tx_rings = bus_read_4(sc->iomem, PTNET_IO_NUM_TX_RINGS);
+ info->num_rx_rings = bus_read_4(sc->iomem, PTNET_IO_NUM_RX_RINGS);
+ info->num_tx_descs = bus_read_4(sc->iomem, PTNET_IO_NUM_TX_SLOTS);
+ info->num_rx_descs = bus_read_4(sc->iomem, PTNET_IO_NUM_RX_SLOTS);
+ info->rx_buf_maxsize = NETMAP_BUF_SIZE(na);
- device_printf(sc->dev, "txr %u, rxr %u, txd %u, rxd %u\n",
- *txr, *rxr, *txd, *rxd);
+ device_printf(sc->dev, "txr %u, rxr %u, txd %u, rxd %u, rxbufsz %u\n",
+ info->num_tx_rings, info->num_rx_rings,
+ info->num_tx_descs, info->num_rx_descs,
+ info->rx_buf_maxsize);
return 0;
}
@@ -1133,9 +1135,9 @@ ptnet_sync_from_csb(struct ptnet_softc *sc, struct netmap_adapter *na)
struct netmap_kring *kring;
if (i < na->num_tx_rings) {
- kring = na->tx_rings + i;
+ kring = na->tx_rings[i];
} else {
- kring = na->rx_rings + i - na->num_tx_rings;
+ kring = na->rx_rings[i - na->num_tx_rings];
}
kring->rhead = kring->ring->head = ptgh->head;
kring->rcur = kring->ring->cur = ptgh->cur;
@@ -1228,7 +1230,7 @@ ptnet_nm_register(struct netmap_adapter *na, int onoff)
if (native) {
for_rx_tx(t) {
for (i = 0; i <= nma_get_nrings(na, t); i++) {
- struct netmap_kring *kring = &NMR(na, t)[i];
+ struct netmap_kring *kring = NMR(na, t)[i];
if (nm_kring_pending_on(kring)) {
kring->nr_mode = NKR_NETMAP_ON;
@@ -1243,7 +1245,7 @@ ptnet_nm_register(struct netmap_adapter *na, int onoff)
nm_clear_native_flags(na);
for_rx_tx(t) {
for (i = 0; i <= nma_get_nrings(na, t); i++) {
- struct netmap_kring *kring = &NMR(na, t)[i];
+ struct netmap_kring *kring = NMR(na, t)[i];
if (nm_kring_pending_off(kring)) {
kring->nr_mode = NKR_NETMAP_OFF;
@@ -1758,7 +1760,7 @@ ptnet_drain_transmit_queue(struct ptnet_queue *pq, unsigned int budget,
ptgh = pq->ptgh;
pthg = pq->pthg;
- kring = na->tx_rings + pq->kring_id;
+ kring = na->tx_rings[pq->kring_id];
ring = kring->ring;
lim = kring->nkr_num_slots - 1;
head = ring->head;
@@ -2021,7 +2023,7 @@ ptnet_rx_eof(struct ptnet_queue *pq, unsigned int budget, bool may_resched)
struct ptnet_csb_gh *ptgh = pq->ptgh;
struct ptnet_csb_hg *pthg = pq->pthg;
struct netmap_adapter *na = &sc->ptna->dr.up;
- struct netmap_kring *kring = na->rx_rings + pq->kring_id;
+ struct netmap_kring *kring = na->rx_rings[pq->kring_id];
struct netmap_ring *ring = kring->ring;
unsigned int const lim = kring->nkr_num_slots - 1;
unsigned int batch_count = 0;
diff --git a/sys/dev/netmap/if_re_netmap.h b/sys/dev/netmap/if_re_netmap.h
index e7dd087acc676..0e56a731ac6aa 100644
--- a/sys/dev/netmap/if_re_netmap.h
+++ b/sys/dev/netmap/if_re_netmap.h
@@ -304,7 +304,7 @@ re_netmap_tx_init(struct rl_softc *sc)
/* l points in the netmap ring, i points in the NIC ring */
for (i = 0; i < n; i++) {
uint64_t paddr;
- int l = netmap_idx_n2k(&na->tx_rings[0], i);
+ int l = netmap_idx_n2k(na->tx_rings[0], i);
void *addr = PNMB(na, slot + l, &paddr);
desc[i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
@@ -330,11 +330,11 @@ re_netmap_rx_init(struct rl_softc *sc)
* Do not release the slots owned by userspace,
* and also keep one empty.
*/
- max_avail = n - 1 - nm_kr_rxspace(&na->rx_rings[0]);
+ max_avail = n - 1 - nm_kr_rxspace(na->rx_rings[0]);
for (nic_i = 0; nic_i < n; nic_i++) {
void *addr;
uint64_t paddr;
- uint32_t nm_i = netmap_idx_n2k(&na->rx_rings[0], nic_i);
+ uint32_t nm_i = netmap_idx_n2k(na->rx_rings[0], nic_i);
addr = PNMB(na, slot + nm_i, &paddr);
diff --git a/sys/dev/netmap/if_vtnet_netmap.h b/sys/dev/netmap/if_vtnet_netmap.h
index 10789c53d1f06..e4ab64d2ed976 100644
--- a/sys/dev/netmap/if_vtnet_netmap.h
+++ b/sys/dev/netmap/if_vtnet_netmap.h
@@ -383,7 +383,7 @@ vtnet_netmap_init_rx_buffers(struct SOFTC_T *sc)
if (!nm_native_on(na))
return 0;
for (r = 0; r < na->num_rx_rings; r++) {
- struct netmap_kring *kring = &na->rx_rings[r];
+ struct netmap_kring *kring = na->rx_rings[r];
struct vtnet_rxq *rxq = &sc->vtnet_rxqs[r];
struct virtqueue *vq = rxq->vtnrx_vq;
struct netmap_slot* slot;
@@ -407,29 +407,6 @@ vtnet_netmap_init_rx_buffers(struct SOFTC_T *sc)
return 1;
}
-/* Update the virtio-net device configurations. Number of queues can
- * change dinamically, by 'ethtool --set-channels $IFNAME combined $N'.
- * This is actually the only way virtio-net can currently enable
- * the multiqueue mode.
- * XXX note that we seem to lose packets if the netmap ring has more
- * slots than the queue
- */
-static int
-vtnet_netmap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
- u_int *rxr, u_int *rxd)
-{
- struct ifnet *ifp = na->ifp;
- struct SOFTC_T *sc = ifp->if_softc;
-
- *txr = *rxr = sc->vtnet_max_vq_pairs;
- *rxd = 512; // sc->vtnet_rx_nmbufs;
- *txd = *rxd; // XXX
- D("vtnet config txq=%d, txd=%d rxq=%d, rxd=%d",
- *txr, *txd, *rxr, *rxd);
-
- return 0;
-}
-
static void
vtnet_netmap_attach(struct SOFTC_T *sc)
{
@@ -443,7 +420,6 @@ vtnet_netmap_attach(struct SOFTC_T *sc)
na.nm_register = vtnet_netmap_reg;
na.nm_txsync = vtnet_netmap_txsync;
na.nm_rxsync = vtnet_netmap_rxsync;
- na.nm_config = vtnet_netmap_config;
na.nm_intr = vtnet_netmap_intr;
na.num_tx_rings = na.num_rx_rings = sc->vtnet_max_vq_pairs;
D("max rings %d", sc->vtnet_max_vq_pairs);
diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c
index 3c5551bad1569..d6230dfb8ebe1 100644
--- a/sys/dev/netmap/netmap.c
+++ b/sys/dev/netmap/netmap.c
@@ -262,7 +262,7 @@ ports attached to the switch)
*
* Any network interface known to the system (including a persistent VALE
* port) can be attached to a VALE switch by issuing the
- * NETMAP_BDG_ATTACH subcommand. After the attachment, persistent VALE ports
+ * NETMAP_REQ_VALE_ATTACH command. After the attachment, persistent VALE ports
* look exactly like ephemeral VALE ports (as created in step 2 above). The
* attachment of other interfaces, instead, requires the creation of a
* netmap_bwrap_adapter. Moreover, the attached interface must be put in
@@ -591,9 +591,9 @@ void
netmap_set_ring(struct netmap_adapter *na, u_int ring_id, enum txrx t, int stopped)
{
if (stopped)
- netmap_disable_ring(NMR(na, t) + ring_id, stopped);
+ netmap_disable_ring(NMR(na, t)[ring_id], stopped);
else
- NMR(na, t)[ring_id].nkr_stopped = 0;
+ NMR(na, t)[ring_id]->nkr_stopped = 0;
}
@@ -745,39 +745,42 @@ nm_dump_buf(char *p, int len, int lim, char *dst)
int
netmap_update_config(struct netmap_adapter *na)
{
- u_int txr, txd, rxr, rxd;
+ struct nm_config_info info;
- txr = txd = rxr = rxd = 0;
+ bzero(&info, sizeof(info));
if (na->nm_config == NULL ||
- na->nm_config(na, &txr, &txd, &rxr, &rxd))
- {
+ na->nm_config(na, &info)) {
/* take whatever we had at init time */
- txr = na->num_tx_rings;
- txd = na->num_tx_desc;
- rxr = na->num_rx_rings;
- rxd = na->num_rx_desc;
+ info.num_tx_rings = na->num_tx_rings;
+ info.num_tx_descs = na->num_tx_desc;
+ info.num_rx_rings = na->num_rx_rings;
+ info.num_rx_descs = na->num_rx_desc;
+ info.rx_buf_maxsize = na->rx_buf_maxsize;
}
- if (na->num_tx_rings == txr && na->num_tx_desc == txd &&
- na->num_rx_rings == rxr && na->num_rx_desc == rxd)
+ if (na->num_tx_rings == info.num_tx_rings &&
+ na->num_tx_desc == info.num_tx_descs &&
+ na->num_rx_rings == info.num_rx_rings &&
+ na->num_rx_desc == info.num_rx_descs &&
+ na->rx_buf_maxsize == info.rx_buf_maxsize)
return 0; /* nothing changed */
- if (netmap_verbose || na->active_fds > 0) {
- D("stored config %s: txring %d x %d, rxring %d x %d",
- na->name,
- na->num_tx_rings, na->num_tx_desc,
- na->num_rx_rings, na->num_rx_desc);
- D("new config %s: txring %d x %d, rxring %d x %d",
- na->name, txr, txd, rxr, rxd);
- }
if (na->active_fds == 0) {
- D("configuration changed (but fine)");
- na->num_tx_rings = txr;
- na->num_tx_desc = txd;
- na->num_rx_rings = rxr;
- na->num_rx_desc = rxd;
+ D("configuration changed for %s: txring %d x %d, "
+ "rxring %d x %d, rxbufsz %d",
+ na->name, na->num_tx_rings, na->num_tx_desc,
+ na->num_rx_rings, na->num_rx_desc, na->rx_buf_maxsize);
+ na->num_tx_rings = info.num_tx_rings;
+ na->num_tx_desc = info.num_tx_descs;
+ na->num_rx_rings = info.num_rx_rings;
+ na->num_rx_desc = info.num_rx_descs;
+ na->rx_buf_maxsize = info.rx_buf_maxsize;
return 0;
}
- D("configuration changed while active, this is bad...");
+ D("WARNING: configuration changed for %s while active: "
+ "txring %d x %d, rxring %d x %d, rxbufsz %d",
+ na->name, info.num_tx_rings, info.num_tx_descs,
+ info.num_rx_rings, info.num_rx_descs,
+ info.rx_buf_maxsize);
return 1;
}
@@ -827,7 +830,9 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
n[NR_TX] = na->num_tx_rings + 1;
n[NR_RX] = na->num_rx_rings + 1;
- len = (n[NR_TX] + n[NR_RX]) * sizeof(struct netmap_kring) + tailroom;
+ len = (n[NR_TX] + n[NR_RX]) *
+ (sizeof(struct netmap_kring) + sizeof(struct netmap_kring *))
+ + tailroom;
na->tx_rings = nm_os_malloc((size_t)len);
if (na->tx_rings == NULL) {
@@ -835,6 +840,14 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
return ENOMEM;
}
na->rx_rings = na->tx_rings + n[NR_TX];
+ na->tailroom = na->rx_rings + n[NR_RX];
+
+ /* link the krings in the krings array */
+ kring = (struct netmap_kring *)((char *)na->tailroom + tailroom);
+ for (i = 0; i < n[NR_TX] + n[NR_RX]; i++) {
+ na->tx_rings[i] = kring;
+ kring++;
+ }
/*
* All fields in krings are 0 except the one initialized below.
@@ -843,9 +856,10 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
for_rx_tx(t) {
ndesc = nma_get_ndesc(na, t);
for (i = 0; i < n[t]; i++) {
- kring = &NMR(na, t)[i];
+ kring = NMR(na, t)[i];
bzero(kring, sizeof(*kring));
kring->na = na;
+ kring->notify_na = na;
kring->ring_id = i;
kring->tx = t;
kring->nkr_num_slots = ndesc;
@@ -854,6 +868,8 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
if (i < nma_get_nrings(na, t)) {
kring->nm_sync = (t == NR_TX ? na->nm_txsync : na->nm_rxsync);
} else {
+ if (!(na->na_flags & NAF_HOST_RINGS))
+ kring->nr_kflags |= NKR_FAKERING;
kring->nm_sync = (t == NR_TX ?
netmap_txsync_to_host:
netmap_rxsync_from_host);
@@ -874,7 +890,6 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
nm_os_selinfo_init(&na->si[t]);
}
- na->tailroom = na->rx_rings + n[NR_RX];
return 0;
}
@@ -885,7 +900,7 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
void
netmap_krings_delete(struct netmap_adapter *na)
{
- struct netmap_kring *kring = na->tx_rings;
+ struct netmap_kring **kring = na->tx_rings;
enum txrx t;
if (na->tx_rings == NULL) {
@@ -898,8 +913,8 @@ netmap_krings_delete(struct netmap_adapter *na)
/* we rely on the krings layout described above */
for ( ; kring != na->tailroom; kring++) {
- mtx_destroy(&kring->q_lock);
- nm_os_selinfo_uninit(&kring->si);
+ mtx_destroy(&(*kring)->q_lock);
+ nm_os_selinfo_uninit(&(*kring)->si);
}
nm_os_free(na->tx_rings);
na->tx_rings = na->rx_rings = na->tailroom = NULL;
@@ -915,7 +930,7 @@ netmap_krings_delete(struct netmap_adapter *na)
void
netmap_hw_krings_delete(struct netmap_adapter *na)
{
- struct mbq *q = &na->rx_rings[na->num_rx_rings].rx_queue;
+ struct mbq *q = &na->rx_rings[na->num_rx_rings]->rx_queue;
ND("destroy sw mbq with len %d", mbq_len(q));
mbq_purge(q);
@@ -1196,7 +1211,7 @@ nm_may_forward_down(struct netmap_kring *kring, int sync_flags)
static u_int
netmap_sw_to_nic(struct netmap_adapter *na)
{
- struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
+ struct netmap_kring *kring = na->rx_rings[na->num_rx_rings];
struct netmap_slot *rxslot = kring->ring->slot;
u_int i, rxcur = kring->nr_hwcur;
u_int const head = kring->rhead;
@@ -1205,7 +1220,7 @@ netmap_sw_to_nic(struct netmap_adapter *na)
/* scan rings to find space, then fill as much as possible */
for (i = 0; i < na->num_tx_rings; i++) {
- struct netmap_kring *kdst = &na->tx_rings[i];
+ struct netmap_kring *kdst = na->tx_rings[i];
struct netmap_ring *rdst = kdst->ring;
u_int const dst_lim = kdst->nkr_num_slots - 1;
@@ -1443,7 +1458,7 @@ assign_mem:
* MUST BE CALLED UNDER NMG_LOCK()
*
* Get a refcounted reference to a netmap adapter attached
- * to the interface specified by nmr.
+ * to the interface specified by req.
* This is always called in the execution of an ioctl().
*
* Return ENXIO if the interface specified by the request does
@@ -1453,13 +1468,15 @@ assign_mem:
* could not be allocated.
* If successful, hold a reference to the netmap adapter.
*
- * If the interface specified by nmr is a system one, also keep
+ * If the interface specified by req is a system one, also keep
* a reference to it and return a valid *ifp.
*/
int
-netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na,
- struct ifnet **ifp, struct netmap_mem_d *nmd, int create)
+netmap_get_na(struct nmreq_header *hdr,
+ struct netmap_adapter **na, struct ifnet **ifp,
+ struct netmap_mem_d *nmd, int create)
{
+ struct nmreq_register *req = (struct nmreq_register *)hdr->nr_body;
int error = 0;
struct netmap_adapter *ret = NULL;
int nmd_ref = 0;
@@ -1467,13 +1484,24 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na,
*na = NULL; /* default return value */
*ifp = NULL;
+ if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
+ return EINVAL;
+ }
+
+ if (req->nr_mode == NR_REG_PIPE_MASTER ||
+ req->nr_mode == NR_REG_PIPE_SLAVE) {
+ /* Do not accept deprecated pipe modes. */
+ D("Deprecated pipe nr_mode, use xx{yy or xx}yy syntax");
+ return EINVAL;
+ }
+
NMG_LOCK_ASSERT();
/* if the request contain a memid, try to find the
* corresponding memory region
*/
- if (nmd == NULL && nmr->nr_arg2) {
- nmd = netmap_mem_find(nmr->nr_arg2);
+ if (nmd == NULL && req->nr_mem_id) {
+ nmd = netmap_mem_find(req->nr_mem_id);
if (nmd == NULL)
return EINVAL;
/* keep the rereference */
@@ -1492,22 +1520,22 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na,
*/
/* try to see if this is a ptnetmap port */
- error = netmap_get_pt_host_na(nmr, na, nmd, create);
+ error = netmap_get_pt_host_na(hdr, na, nmd, create);
if (error || *na != NULL)
goto out;
/* try to see if this is a monitor port */
- error = netmap_get_monitor_na(nmr, na, nmd, create);
+ error = netmap_get_monitor_na(hdr, na, nmd, create);
if (error || *na != NULL)
goto out;
/* try to see if this is a pipe port */
- error = netmap_get_pipe_na(nmr, na, nmd, create);
+ error = netmap_get_pipe_na(hdr, na, nmd, create);
if (error || *na != NULL)
goto out;
/* try to see if this is a bridge port */
- error = netmap_get_bdg_na(nmr, na, nmd, create);
+ error = netmap_get_bdg_na(hdr, na, nmd, create);
if (error)
goto out;
@@ -1520,7 +1548,7 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na,
* This may still be a tap, a veth/epair, or even a
* persistent VALE port.
*/
- *ifp = ifunit_ref(nmr->nr_name);
+ *ifp = ifunit_ref(hdr->nr_name);
if (*ifp == NULL) {
error = ENXIO;
goto out;
@@ -1765,42 +1793,27 @@ netmap_ring_reinit(struct netmap_kring *kring)
*
*/
int
-netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags)
+netmap_interp_ringid(struct netmap_priv_d *priv, uint32_t nr_mode,
+ uint16_t nr_ringid, uint64_t nr_flags)
{
struct netmap_adapter *na = priv->np_na;
- u_int j, i = ringid & NETMAP_RING_MASK;
- u_int reg = flags & NR_REG_MASK;
int excluded_direction[] = { NR_TX_RINGS_ONLY, NR_RX_RINGS_ONLY };
enum txrx t;
+ u_int j;
- if (reg == NR_REG_DEFAULT) {
- /* convert from old ringid to flags */
- if (ringid & NETMAP_SW_RING) {
- reg = NR_REG_SW;
- } else if (ringid & NETMAP_HW_RING) {
- reg = NR_REG_ONE_NIC;
- } else {
- reg = NR_REG_ALL_NIC;
- }
- D("deprecated API, old ringid 0x%x -> ringid %x reg %d", ringid, i, reg);
- }
-
- if ((flags & NR_PTNETMAP_HOST) && ((reg != NR_REG_ALL_NIC &&
- reg != NR_REG_PIPE_MASTER && reg != NR_REG_PIPE_SLAVE) ||
- flags & (NR_RX_RINGS_ONLY|NR_TX_RINGS_ONLY))) {
+ if ((nr_flags & NR_PTNETMAP_HOST) && ((nr_mode != NR_REG_ALL_NIC) ||
+ nr_flags & (NR_RX_RINGS_ONLY|NR_TX_RINGS_ONLY))) {
D("Error: only NR_REG_ALL_NIC supported with netmap passthrough");
return EINVAL;
}
for_rx_tx(t) {
- if (flags & excluded_direction[t]) {
+ if (nr_flags & excluded_direction[t]) {
priv->np_qfirst[t] = priv->np_qlast[t] = 0;
continue;
}
- switch (reg) {
+ switch (nr_mode) {
case NR_REG_ALL_NIC:
- case NR_REG_PIPE_MASTER:
- case NR_REG_PIPE_SLAVE:
priv->np_qfirst[t] = 0;
priv->np_qlast[t] = nma_get_nrings(na, t);
ND("ALL/PIPE: %s %d %d", nm_txrx2str(t),
@@ -1812,20 +1825,21 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags
D("host rings not supported");
return EINVAL;
}
- priv->np_qfirst[t] = (reg == NR_REG_SW ?
+ priv->np_qfirst[t] = (nr_mode == NR_REG_SW ?
nma_get_nrings(na, t) : 0);
priv->np_qlast[t] = nma_get_nrings(na, t) + 1;
- ND("%s: %s %d %d", reg == NR_REG_SW ? "SW" : "NIC+SW",
+ ND("%s: %s %d %d", nr_mode == NR_REG_SW ? "SW" : "NIC+SW",
nm_txrx2str(t),
priv->np_qfirst[t], priv->np_qlast[t]);
break;
case NR_REG_ONE_NIC:
- if (i >= na->num_tx_rings && i >= na->num_rx_rings) {
- D("invalid ring id %d", i);
+ if (nr_ringid >= na->num_tx_rings &&
+ nr_ringid >= na->num_rx_rings) {
+ D("invalid ring id %d", nr_ringid);
return EINVAL;
}
/* if not enough rings, use the first one */
- j = i;
+ j = nr_ringid;
if (j >= nma_get_nrings(na, t))
j = 0;
priv->np_qfirst[t] = j;
@@ -1834,11 +1848,11 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags
priv->np_qfirst[t], priv->np_qlast[t]);
break;
default:
- D("invalid regif type %d", reg);
+ D("invalid regif type %d", nr_mode);
return EINVAL;
}
}
- priv->np_flags = (flags & ~NR_REG_MASK) | reg;
+ priv->np_flags = nr_flags | nr_mode; // TODO
/* Allow transparent forwarding mode in the host --> nic
* direction only if all the TX hw rings have been opened. */
@@ -1854,7 +1868,7 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags
priv->np_qlast[NR_TX],
priv->np_qfirst[NR_RX],
priv->np_qlast[NR_RX],
- i);
+ nr_ringid);
}
return 0;
}
@@ -1865,18 +1879,19 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags
* for all rings is the same as a single ring.
*/
static int
-netmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags)
+netmap_set_ringid(struct netmap_priv_d *priv, uint32_t nr_mode,
+ uint16_t nr_ringid, uint64_t nr_flags)
{
struct netmap_adapter *na = priv->np_na;
int error;
enum txrx t;
- error = netmap_interp_ringid(priv, ringid, flags);
+ error = netmap_interp_ringid(priv, nr_mode, nr_ringid, nr_flags);
if (error) {
return error;
}
- priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1;
+ priv->np_txpoll = (nr_flags & NR_NO_TX_POLL) ? 0 : 1;
/* optimization: count the users registered for more than
* one ring, which are the ones sleeping on the global queue.
@@ -1933,7 +1948,7 @@ netmap_krings_get(struct netmap_priv_d *priv)
*/
for_rx_tx(t) {
for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
- kring = &NMR(na, t)[i];
+ kring = NMR(na, t)[i];
if ((kring->nr_kflags & NKR_EXCLUSIVE) ||
(kring->users && excl))
{
@@ -1948,7 +1963,7 @@ netmap_krings_get(struct netmap_priv_d *priv)
*/
for_rx_tx(t) {
for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
- kring = &NMR(na, t)[i];
+ kring = NMR(na, t)[i];
kring->users++;
if (excl)
kring->nr_kflags |= NKR_EXCLUSIVE;
@@ -1979,10 +1994,9 @@ netmap_krings_put(struct netmap_priv_d *priv)
priv->np_qfirst[NR_RX],
priv->np_qlast[MR_RX]);
-
for_rx_tx(t) {
for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
- kring = &NMR(na, t)[i];
+ kring = NMR(na, t)[i];
if (excl)
kring->nr_kflags &= ~NKR_EXCLUSIVE;
kring->users--;
@@ -1992,6 +2006,12 @@ netmap_krings_put(struct netmap_priv_d *priv)
}
}
+static int
+nm_priv_rx_enabled(struct netmap_priv_d *priv)
+{
+ return (priv->np_qfirst[NR_RX] != priv->np_qlast[NR_RX]);
+}
+
/*
* possibly move the interface to netmap-mode.
* If success it returns a pointer to netmap_if, otherwise NULL.
@@ -2064,16 +2084,14 @@ netmap_krings_put(struct netmap_priv_d *priv)
*/
int
netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
- uint16_t ringid, uint32_t flags)
+ uint32_t nr_mode, uint16_t nr_ringid, uint64_t nr_flags)
{
struct netmap_if *nifp = NULL;
int error;
NMG_LOCK_ASSERT();
- /* ring configuration may have changed, fetch from the card */
- netmap_update_config(na);
priv->np_na = na; /* store the reference */
- error = netmap_set_ringid(priv, ringid, flags);
+ error = netmap_set_ringid(priv, nr_mode, nr_ringid, nr_flags);
if (error)
goto err;
error = netmap_mem_finalize(na->nm_mem, na);
@@ -2081,27 +2099,38 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
goto err;
if (na->active_fds == 0) {
+
+ /* cache the allocator info in the na */
+ error = netmap_mem_get_lut(na->nm_mem, &na->na_lut);
+ if (error)
+ goto err_drop_mem;
+ ND("lut %p bufs %u size %u", na->na_lut.lut, na->na_lut.objtotal,
+ na->na_lut.objsize);
+
+ /* ring configuration may have changed, fetch from the card */
+ netmap_update_config(na);
+
/*
* If this is the first registration of the adapter,
* perform sanity checks and create the in-kernel view
* of the netmap rings (the netmap krings).
*/
- if (na->ifp) {
+ if (na->ifp && nm_priv_rx_enabled(priv)) {
/* This netmap adapter is attached to an ifnet. */
unsigned nbs = netmap_mem_bufsize(na->nm_mem);
unsigned mtu = nm_os_ifnet_mtu(na->ifp);
- /* The maximum amount of bytes that a single
- * receive or transmit NIC descriptor can hold. */
- unsigned hw_max_slot_len = 4096;
- if (mtu <= hw_max_slot_len) {
+ ND("mtu %d rx_buf_maxsize %d netmap_buf_size %d",
+ mtu, na->rx_buf_maxsize, nbs);
+
+ if (mtu <= na->rx_buf_maxsize) {
/* The MTU fits a single NIC slot. We only
* Need to check that netmap buffers are
* large enough to hold an MTU. NS_MOREFRAG
* cannot be used in this case. */
if (nbs < mtu) {
nm_prerr("error: netmap buf size (%u) "
- "< device MTU (%u)", nbs, mtu);
+ "< device MTU (%u)\n", nbs, mtu);
error = EINVAL;
goto err_drop_mem;
}
@@ -2114,22 +2143,22 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
if (!(na->na_flags & NAF_MOREFRAG)) {
nm_prerr("error: large MTU (%d) needed "
"but %s does not support "
- "NS_MOREFRAG", mtu,
+ "NS_MOREFRAG\n", mtu,
na->ifp->if_xname);
error = EINVAL;
goto err_drop_mem;
- } else if (nbs < hw_max_slot_len) {
+ } else if (nbs < na->rx_buf_maxsize) {
nm_prerr("error: using NS_MOREFRAG on "
"%s requires netmap buf size "
- ">= %u", na->ifp->if_xname,
- hw_max_slot_len);
+ ">= %u\n", na->ifp->if_xname,
+ na->rx_buf_maxsize);
error = EINVAL;
goto err_drop_mem;
} else {
nm_prinf("info: netmap application on "
"%s needs to support "
"NS_MOREFRAG "
- "(MTU=%u,netmap_buf_size=%u)",
+ "(MTU=%u,netmap_buf_size=%u)\n",
na->ifp->if_xname, mtu, nbs);
}
}
@@ -2141,7 +2170,7 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
*/
error = na->nm_krings_create(na);
if (error)
- goto err_drop_mem;
+ goto err_put_lut;
}
@@ -2165,21 +2194,12 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
goto err_del_rings;
}
- if (na->active_fds == 0) {
- /* cache the allocator info in the na */
- error = netmap_mem_get_lut(na->nm_mem, &na->na_lut);
- if (error)
- goto err_del_if;
- ND("lut %p bufs %u size %u", na->na_lut.lut, na->na_lut.objtotal,
- na->na_lut.objsize);
- }
-
if (nm_kring_pending(priv)) {
/* Some kring is switching mode, tell the adapter to
* react on this. */
error = na->nm_register(na, 1);
if (error)
- goto err_put_lut;
+ goto err_del_if;
}
/* Commit the reference. */
@@ -2195,9 +2215,6 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
return 0;
-err_put_lut:
- if (na->active_fds == 0)
- memset(&na->na_lut, 0, sizeof(na->na_lut));
err_del_if:
netmap_mem_if_delete(na, nifp);
err_del_rings:
@@ -2207,6 +2224,9 @@ err_rel_excl:
err_del_krings:
if (na->active_fds == 0)
na->nm_krings_delete(na);
+err_put_lut:
+ if (na->active_fds == 0)
+ memset(&na->na_lut, 0, sizeof(na->na_lut));
err_drop_mem:
netmap_mem_drop(na);
err:
@@ -2242,246 +2262,367 @@ ring_timestamp_set(struct netmap_ring *ring)
}
}
+static int nmreq_copyin(struct nmreq_header *, int);
+static int nmreq_copyout(struct nmreq_header *, int);
+static int nmreq_checkoptions(struct nmreq_header *);
/*
* ioctl(2) support for the "netmap" device.
*
* Following a list of accepted commands:
- * - NIOCGINFO
+ * - NIOCCTRL device control API
+ * - NIOCTXSYNC sync TX rings
+ * - NIOCRXSYNC sync RX rings
* - SIOCGIFADDR just for convenience
- * - NIOCREGIF
- * - NIOCTXSYNC
- * - NIOCRXSYNC
+ * - NIOCGINFO deprecated (legacy API)
+ * - NIOCREGIF deprecated (legacy API)
*
* Return 0 on success, errno otherwise.
*/
int
-netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread *td)
+netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data,
+ struct thread *td, int nr_body_is_user)
{
struct mbq q; /* packets from RX hw queues to host stack */
- struct nmreq *nmr = (struct nmreq *) data;
struct netmap_adapter *na = NULL;
struct netmap_mem_d *nmd = NULL;
struct ifnet *ifp = NULL;
int error = 0;
u_int i, qfirst, qlast;
struct netmap_if *nifp;
- struct netmap_kring *krings;
+ struct netmap_kring **krings;
int sync_flags;
enum txrx t;
- if (cmd == NIOCGINFO || cmd == NIOCREGIF) {
- /* truncate name */
- nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0';
- if (nmr->nr_version != NETMAP_API) {
- D("API mismatch for %s got %d need %d",
- nmr->nr_name,
- nmr->nr_version, NETMAP_API);
- nmr->nr_version = NETMAP_API;
+ switch (cmd) {
+ case NIOCCTRL: {
+ struct nmreq_header *hdr = (struct nmreq_header *)data;
+
+ if (hdr->nr_version != NETMAP_API) {
+ D("API mismatch for reqtype %d: got %d need %d",
+ hdr->nr_version,
+ hdr->nr_version, NETMAP_API);
+ hdr->nr_version = NETMAP_API;
}
- if (nmr->nr_version < NETMAP_MIN_API ||
- nmr->nr_version > NETMAP_MAX_API) {
+ if (hdr->nr_version < NETMAP_MIN_API ||
+ hdr->nr_version > NETMAP_MAX_API) {
return EINVAL;
}
- }
- switch (cmd) {
- case NIOCGINFO: /* return capabilities etc */
- if (nmr->nr_cmd == NETMAP_BDG_LIST) {
- error = netmap_bdg_ctl(nmr, NULL);
- break;
+ /* Make a kernel-space copy of the user-space nr_body.
+ * For convenince, the nr_body pointer and the pointers
+ * in the options list will be replaced with their
+ * kernel-space counterparts. The original pointers are
+ * saved internally and later restored by nmreq_copyout
+ */
+ error = nmreq_copyin(hdr, nr_body_is_user);
+ if (error) {
+ return error;
}
- NMG_LOCK();
- do {
- /* memsize is always valid */
- u_int memflags;
- uint64_t memsize;
+ /* Sanitize hdr->nr_name. */
+ hdr->nr_name[sizeof(hdr->nr_name) - 1] = '\0';
+
+ switch (hdr->nr_reqtype) {
+ case NETMAP_REQ_REGISTER: {
+ struct nmreq_register *req =
+ (struct nmreq_register *)hdr->nr_body;
+ /* Protect access to priv from concurrent requests. */
+ NMG_LOCK();
+ do {
+ u_int memflags;
+#ifdef WITH_EXTMEM
+ struct nmreq_option *opt;
+#endif /* WITH_EXTMEM */
+
+ if (priv->np_nifp != NULL) { /* thread already registered */
+ error = EBUSY;
+ break;
+ }
+
+#ifdef WITH_EXTMEM
+ opt = nmreq_findoption((struct nmreq_option *)hdr->nr_options,
+ NETMAP_REQ_OPT_EXTMEM);
+ if (opt != NULL) {
+ struct nmreq_opt_extmem *e =
+ (struct nmreq_opt_extmem *)opt;
+
+ error = nmreq_checkduplicate(opt);
+ if (error) {
+ opt->nro_status = error;
+ break;
+ }
+ nmd = netmap_mem_ext_create(e->nro_usrptr,
+ &e->nro_info, &error);
+ opt->nro_status = error;
+ if (nmd == NULL)
+ break;
+ }
+#endif /* WITH_EXTMEM */
+
+ if (nmd == NULL && req->nr_mem_id) {
+ /* find the allocator and get a reference */
+ nmd = netmap_mem_find(req->nr_mem_id);
+ if (nmd == NULL) {
+ error = EINVAL;
+ break;
+ }
+ }
+ /* find the interface and a reference */
+ error = netmap_get_na(hdr, &na, &ifp, nmd,
+ 1 /* create */); /* keep reference */
+ if (error)
+ break;
+ if (NETMAP_OWNED_BY_KERN(na)) {
+ error = EBUSY;
+ break;
+ }
+
+ if (na->virt_hdr_len && !(req->nr_flags & NR_ACCEPT_VNET_HDR)) {
+ error = EIO;
+ break;
+ }
- if (nmr->nr_name[0] != '\0') {
+ error = netmap_do_regif(priv, na, req->nr_mode,
+ req->nr_ringid, req->nr_flags);
+ if (error) { /* reg. failed, release priv and ref */
+ break;
+ }
+ nifp = priv->np_nifp;
+ priv->np_td = td; /* for debugging purposes */
- /* get a refcount */
- error = netmap_get_na(nmr, &na, &ifp, NULL, 1 /* create */);
+ /* return the offset of the netmap_if object */
+ req->nr_rx_rings = na->num_rx_rings;
+ req->nr_tx_rings = na->num_tx_rings;
+ req->nr_rx_slots = na->num_rx_desc;
+ req->nr_tx_slots = na->num_tx_desc;
+ error = netmap_mem_get_info(na->nm_mem, &req->nr_memsize, &memflags,
+ &req->nr_mem_id);
if (error) {
- na = NULL;
- ifp = NULL;
+ netmap_do_unregif(priv);
break;
}
- nmd = na->nm_mem; /* get memory allocator */
- } else {
- nmd = netmap_mem_find(nmr->nr_arg2 ? nmr->nr_arg2 : 1);
- if (nmd == NULL) {
- error = EINVAL;
+ if (memflags & NETMAP_MEM_PRIVATE) {
+ *(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM;
+ }
+ for_rx_tx(t) {
+ priv->np_si[t] = nm_si_user(priv, t) ?
+ &na->si[t] : &NMR(na, t)[priv->np_qfirst[t]]->si;
+ }
+
+ if (req->nr_extra_bufs) {
+ if (netmap_verbose)
+ D("requested %d extra buffers",
+ req->nr_extra_bufs);
+ req->nr_extra_bufs = netmap_extra_alloc(na,
+ &nifp->ni_bufs_head, req->nr_extra_bufs);
+ if (netmap_verbose)
+ D("got %d extra buffers", req->nr_extra_bufs);
+ }
+ req->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp);
+
+ error = nmreq_checkoptions(hdr);
+ if (error) {
+ netmap_do_unregif(priv);
break;
}
+
+ /* store ifp reference so that priv destructor may release it */
+ priv->np_ifp = ifp;
+ } while (0);
+ if (error) {
+ netmap_unget_na(na, ifp);
}
+ /* release the reference from netmap_mem_find() or
+ * netmap_mem_ext_create()
+ */
+ if (nmd)
+ netmap_mem_put(nmd);
+ NMG_UNLOCK();
+ break;
+ }
- error = netmap_mem_get_info(nmd, &memsize, &memflags,
- &nmr->nr_arg2);
- if (error)
- break;
- nmr->nr_memsize = (uint32_t)memsize;
- if (na == NULL) /* only memory info */
- break;
- nmr->nr_offset = 0;
- nmr->nr_rx_slots = nmr->nr_tx_slots = 0;
- netmap_update_config(na);
- nmr->nr_rx_rings = na->num_rx_rings;
- nmr->nr_tx_rings = na->num_tx_rings;
- nmr->nr_rx_slots = na->num_rx_desc;
- nmr->nr_tx_slots = na->num_tx_desc;
- } while (0);
- netmap_unget_na(na, ifp);
- NMG_UNLOCK();
- break;
+ case NETMAP_REQ_PORT_INFO_GET: {
+ struct nmreq_port_info_get *req =
+ (struct nmreq_port_info_get *)hdr->nr_body;
- case NIOCREGIF:
- /*
- * If nmr->nr_cmd is not zero, this NIOCREGIF is not really
- * a regif operation, but a different one, specified by the
- * value of nmr->nr_cmd.
- */
- i = nmr->nr_cmd;
- if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH
- || i == NETMAP_BDG_VNET_HDR
- || i == NETMAP_BDG_NEWIF
- || i == NETMAP_BDG_DELIF
- || i == NETMAP_BDG_POLLING_ON
- || i == NETMAP_BDG_POLLING_OFF) {
- /* possibly attach/detach NIC and VALE switch */
- error = netmap_bdg_ctl(nmr, NULL);
+ NMG_LOCK();
+ do {
+ u_int memflags;
+
+ if (hdr->nr_name[0] != '\0') {
+ /* Build a nmreq_register out of the nmreq_port_info_get,
+ * so that we can call netmap_get_na(). */
+ struct nmreq_register regreq;
+ bzero(&regreq, sizeof(regreq));
+ regreq.nr_tx_slots = req->nr_tx_slots;
+ regreq.nr_rx_slots = req->nr_rx_slots;
+ regreq.nr_tx_rings = req->nr_tx_rings;
+ regreq.nr_rx_rings = req->nr_rx_rings;
+ regreq.nr_mem_id = req->nr_mem_id;
+
+ /* get a refcount */
+ hdr->nr_reqtype = NETMAP_REQ_REGISTER;
+ hdr->nr_body = (uint64_t)&regreq;
+ error = netmap_get_na(hdr, &na, &ifp, NULL, 1 /* create */);
+ hdr->nr_reqtype = NETMAP_REQ_PORT_INFO_GET; /* reset type */
+ hdr->nr_body = (uint64_t)req; /* reset nr_body */
+ if (error) {
+ na = NULL;
+ ifp = NULL;
+ break;
+ }
+ nmd = na->nm_mem; /* get memory allocator */
+ } else {
+ nmd = netmap_mem_find(req->nr_mem_id ? req->nr_mem_id : 1);
+ if (nmd == NULL) {
+ error = EINVAL;
+ break;
+ }
+ }
+
+ error = netmap_mem_get_info(nmd, &req->nr_memsize, &memflags,
+ &req->nr_mem_id);
+ if (error)
+ break;
+ if (na == NULL) /* only memory info */
+ break;
+ req->nr_offset = 0;
+ req->nr_rx_slots = req->nr_tx_slots = 0;
+ netmap_update_config(na);
+ req->nr_rx_rings = na->num_rx_rings;
+ req->nr_tx_rings = na->num_tx_rings;
+ req->nr_rx_slots = na->num_rx_desc;
+ req->nr_tx_slots = na->num_tx_desc;
+ } while (0);
+ netmap_unget_na(na, ifp);
+ NMG_UNLOCK();
break;
- } else if (i == NETMAP_PT_HOST_CREATE || i == NETMAP_PT_HOST_DELETE) {
- /* forward the command to the ptnetmap subsystem */
- error = ptnetmap_ctl(nmr, priv->np_na);
+ }
+#ifdef WITH_VALE
+ case NETMAP_REQ_VALE_ATTACH: {
+ error = nm_bdg_ctl_attach(hdr, NULL /* userspace request */);
break;
- } else if (i == NETMAP_VNET_HDR_GET) {
- /* get vnet-header length for this netmap port */
+ }
+
+ case NETMAP_REQ_VALE_DETACH: {
+ error = nm_bdg_ctl_detach(hdr, NULL /* userspace request */);
+ break;
+ }
+
+ case NETMAP_REQ_VALE_LIST: {
+ error = netmap_bdg_list(hdr);
+ break;
+ }
+
+ case NETMAP_REQ_PORT_HDR_SET: {
+ struct nmreq_port_hdr *req =
+ (struct nmreq_port_hdr *)hdr->nr_body;
+ /* Build a nmreq_register out of the nmreq_port_hdr,
+ * so that we can call netmap_get_bdg_na(). */
+ struct nmreq_register regreq;
+ bzero(&regreq, sizeof(regreq));
+ /* For now we only support virtio-net headers, and only for
+ * VALE ports, but this may change in future. Valid lengths
+ * for the virtio-net header are 0 (no header), 10 and 12. */
+ if (req->nr_hdr_len != 0 &&
+ req->nr_hdr_len != sizeof(struct nm_vnet_hdr) &&
+ req->nr_hdr_len != 12) {
+ error = EINVAL;
+ break;
+ }
+ NMG_LOCK();
+ hdr->nr_reqtype = NETMAP_REQ_REGISTER;
+ hdr->nr_body = (uint64_t)&regreq;
+ error = netmap_get_bdg_na(hdr, &na, NULL, 0);
+ hdr->nr_reqtype = NETMAP_REQ_PORT_HDR_SET;
+ hdr->nr_body = (uint64_t)req;
+ if (na && !error) {
+ struct netmap_vp_adapter *vpna =
+ (struct netmap_vp_adapter *)na;
+ na->virt_hdr_len = req->nr_hdr_len;
+ if (na->virt_hdr_len) {
+ vpna->mfs = NETMAP_BUF_SIZE(na);
+ }
+ D("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na);
+ netmap_adapter_put(na);
+ } else if (!na) {
+ error = ENXIO;
+ }
+ NMG_UNLOCK();
+ break;
+ }
+
+ case NETMAP_REQ_PORT_HDR_GET: {
+ /* Get vnet-header length for this netmap port */
+ struct nmreq_port_hdr *req =
+ (struct nmreq_port_hdr *)hdr->nr_body;
+ /* Build a nmreq_register out of the nmreq_port_hdr,
+ * so that we can call netmap_get_bdg_na(). */
+ struct nmreq_register regreq;
struct ifnet *ifp;
+ bzero(&regreq, sizeof(regreq));
NMG_LOCK();
- error = netmap_get_na(nmr, &na, &ifp, NULL, 0);
+ hdr->nr_reqtype = NETMAP_REQ_REGISTER;
+ hdr->nr_body = (uint64_t)&regreq;
+ error = netmap_get_na(hdr, &na, &ifp, NULL, 0);
+ hdr->nr_reqtype = NETMAP_REQ_PORT_HDR_GET;
+ hdr->nr_body = (uint64_t)req;
if (na && !error) {
- nmr->nr_arg1 = na->virt_hdr_len;
+ req->nr_hdr_len = na->virt_hdr_len;
}
netmap_unget_na(na, ifp);
NMG_UNLOCK();
break;
- } else if (i == NETMAP_POOLS_INFO_GET) {
- /* get information from the memory allocator */
+ }
+
+ case NETMAP_REQ_VALE_NEWIF: {
+ error = nm_vi_create(hdr);
+ break;
+ }
+
+ case NETMAP_REQ_VALE_DELIF: {
+ error = nm_vi_destroy(hdr->nr_name);
+ break;
+ }
+
+ case NETMAP_REQ_VALE_POLLING_ENABLE:
+ case NETMAP_REQ_VALE_POLLING_DISABLE: {
+ error = nm_bdg_polling(hdr);
+ break;
+ }
+#endif /* WITH_VALE */
+ case NETMAP_REQ_POOLS_INFO_GET: {
+ struct nmreq_pools_info *req =
+ (struct nmreq_pools_info *)hdr->nr_body;
+ /* Get information from the memory allocator. This
+ * netmap device must already be bound to a port.
+ * Note that hdr->nr_name is ignored. */
NMG_LOCK();
if (priv->np_na && priv->np_na->nm_mem) {
struct netmap_mem_d *nmd = priv->np_na->nm_mem;
- error = netmap_mem_pools_info_get(nmr, nmd);
+ error = netmap_mem_pools_info_get(req, nmd);
} else {
error = EINVAL;
}
NMG_UNLOCK();
break;
- } else if (i == NETMAP_POOLS_CREATE) {
- nmd = netmap_mem_ext_create(nmr, &error);
- if (nmd == NULL)
- break;
- /* reset the fields used by POOLS_CREATE to
- * avoid confusing the rest of the code
- */
- nmr->nr_cmd = 0;
- nmr->nr_arg1 = 0;
- nmr->nr_arg2 = 0;
- nmr->nr_arg3 = 0;
- } else if (i != 0) {
- D("nr_cmd must be 0 not %d", i);
+ }
+
+ default: {
error = EINVAL;
break;
}
-
- /* protect access to priv from concurrent NIOCREGIF */
- NMG_LOCK();
- do {
- u_int memflags;
- uint64_t memsize;
-
- if (priv->np_nifp != NULL) { /* thread already registered */
- error = EBUSY;
- break;
- }
-
- if (nmr->nr_arg2) {
- /* find the allocator and get a reference */
- nmd = netmap_mem_find(nmr->nr_arg2);
- if (nmd == NULL) {
- error = EINVAL;
- break;
- }
- }
- /* find the interface and a reference */
- error = netmap_get_na(nmr, &na, &ifp, nmd,
- 1 /* create */); /* keep reference */
- if (error)
- break;
- if (NETMAP_OWNED_BY_KERN(na)) {
- error = EBUSY;
- break;
- }
-
- if (na->virt_hdr_len && !(nmr->nr_flags & NR_ACCEPT_VNET_HDR)) {
- error = EIO;
- break;
- }
-
- error = netmap_do_regif(priv, na, nmr->nr_ringid, nmr->nr_flags);
- if (error) { /* reg. failed, release priv and ref */
- break;
- }
- nifp = priv->np_nifp;
- priv->np_td = td; // XXX kqueue, debugging only
-
- /* return the offset of the netmap_if object */
- nmr->nr_rx_rings = na->num_rx_rings;
- nmr->nr_tx_rings = na->num_tx_rings;
- nmr->nr_rx_slots = na->num_rx_desc;
- nmr->nr_tx_slots = na->num_tx_desc;
- error = netmap_mem_get_info(na->nm_mem, &memsize, &memflags,
- &nmr->nr_arg2);
- if (error) {
- netmap_do_unregif(priv);
- break;
- }
- nmr->nr_memsize = (uint32_t)memsize;
- if (memflags & NETMAP_MEM_PRIVATE) {
- *(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM;
- }
- for_rx_tx(t) {
- priv->np_si[t] = nm_si_user(priv, t) ?
- &na->si[t] : &NMR(na, t)[priv->np_qfirst[t]].si;
- }
-
- if (nmr->nr_arg3) {
- if (netmap_verbose)
- D("requested %d extra buffers", nmr->nr_arg3);
- nmr->nr_arg3 = netmap_extra_alloc(na,
- &nifp->ni_bufs_head, nmr->nr_arg3);
- if (netmap_verbose)
- D("got %d extra buffers", nmr->nr_arg3);
- }
- nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp);
-
- /* store ifp reference so that priv destructor may release it */
- priv->np_ifp = ifp;
- } while (0);
- if (error) {
- netmap_unget_na(na, ifp);
}
- /* release the reference from netmap_mem_find() or
- * netmap_mem_ext_create()
- */
- if (nmd)
- netmap_mem_put(nmd);
- NMG_UNLOCK();
+ /* Write back request body to userspace and reset the
+ * user-space pointer. */
+ error = nmreq_copyout(hdr, error);
break;
+ }
case NIOCTXSYNC:
- case NIOCRXSYNC:
+ case NIOCRXSYNC: {
nifp = priv->np_nifp;
if (nifp == NULL) {
@@ -2506,7 +2647,7 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
sync_flags = priv->np_sync_flags;
for (i = qfirst; i < qlast; i++) {
- struct netmap_kring *kring = krings + i;
+ struct netmap_kring *kring = krings[i];
struct netmap_ring *ring = kring->ring;
if (unlikely(nm_kr_tryget(kring, 1, &error))) {
@@ -2549,51 +2690,292 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
}
break;
+ }
-#ifdef WITH_VALE
- case NIOCCONFIG:
- error = netmap_bdg_config(nmr);
- break;
-#endif
-#ifdef __FreeBSD__
- case FIONBIO:
- case FIOASYNC:
- ND("FIONBIO/FIOASYNC are no-ops");
+ default: {
+ return netmap_ioctl_legacy(priv, cmd, data, td);
break;
+ }
+ }
+
+ return (error);
+}
- case BIOCIMMEDIATE:
- case BIOCGHDRCMPLT:
- case BIOCSHDRCMPLT:
- case BIOCSSEESENT:
- D("ignore BIOCIMMEDIATE/BIOCSHDRCMPLT/BIOCSHDRCMPLT/BIOCSSEESENT");
+size_t
+nmreq_size_by_type(uint16_t nr_reqtype)
+{
+ switch (nr_reqtype) {
+ case NETMAP_REQ_REGISTER:
+ return sizeof(struct nmreq_register);
+ case NETMAP_REQ_PORT_INFO_GET:
+ return sizeof(struct nmreq_port_info_get);
+ case NETMAP_REQ_VALE_ATTACH:
+ return sizeof(struct nmreq_vale_attach);
+ case NETMAP_REQ_VALE_DETACH:
+ return sizeof(struct nmreq_vale_detach);
+ case NETMAP_REQ_VALE_LIST:
+ return sizeof(struct nmreq_vale_list);
+ case NETMAP_REQ_PORT_HDR_SET:
+ case NETMAP_REQ_PORT_HDR_GET:
+ return sizeof(struct nmreq_port_hdr);
+ case NETMAP_REQ_VALE_NEWIF:
+ return sizeof(struct nmreq_vale_newif);
+ case NETMAP_REQ_VALE_DELIF:
+ return 0;
+ case NETMAP_REQ_VALE_POLLING_ENABLE:
+ case NETMAP_REQ_VALE_POLLING_DISABLE:
+ return sizeof(struct nmreq_vale_polling);
+ case NETMAP_REQ_POOLS_INFO_GET:
+ return sizeof(struct nmreq_pools_info);
+ }
+ return 0;
+}
+
+static size_t
+nmreq_opt_size_by_type(uint16_t nro_reqtype)
+{
+ size_t rv = sizeof(struct nmreq_option);
+#ifdef NETMAP_REQ_OPT_DEBUG
+ if (nro_reqtype & NETMAP_REQ_OPT_DEBUG)
+ return (nro_reqtype & ~NETMAP_REQ_OPT_DEBUG);
+#endif /* NETMAP_REQ_OPT_DEBUG */
+ switch (nro_reqtype) {
+#ifdef WITH_EXTMEM
+ case NETMAP_REQ_OPT_EXTMEM:
+ rv = sizeof(struct nmreq_opt_extmem);
break;
+#endif /* WITH_EXTMEM */
+ }
+ /* subtract the common header */
+ return rv - sizeof(struct nmreq_option);
+}
- default: /* allow device-specific ioctls */
- {
- struct ifnet *ifp = ifunit_ref(nmr->nr_name);
- if (ifp == NULL) {
- error = ENXIO;
- } else {
- struct socket so;
+int
+nmreq_copyin(struct nmreq_header *hdr, int nr_body_is_user)
+{
+ size_t rqsz, optsz, bufsz;
+ int error;
+ char *ker = NULL, *p;
+ struct nmreq_option **next, *src;
+ struct nmreq_option buf;
+ uint64_t *ptrs;
+
+ if (hdr->nr_reserved)
+ return EINVAL;
+
+ if (!nr_body_is_user)
+ return 0;
+
+ hdr->nr_reserved = nr_body_is_user;
+
+ /* compute the total size of the buffer */
+ rqsz = nmreq_size_by_type(hdr->nr_reqtype);
+ if (rqsz > NETMAP_REQ_MAXSIZE) {
+ error = EMSGSIZE;
+ goto out_err;
+ }
+ if ((rqsz && hdr->nr_body == (uint64_t)NULL) ||
+ (!rqsz && hdr->nr_body != (uint64_t)NULL)) {
+ /* Request body expected, but not found; or
+ * request body found but unexpected. */
+ error = EINVAL;
+ goto out_err;
+ }
- bzero(&so, sizeof(so));
- so.so_vnet = ifp->if_vnet;
- // so->so_proto not null.
- error = ifioctl(&so, cmd, data, td);
- if_rele(ifp);
+ bufsz = 2 * sizeof(void *) + rqsz;
+ optsz = 0;
+ for (src = (struct nmreq_option *)hdr->nr_options; src;
+ src = (struct nmreq_option *)buf.nro_next)
+ {
+ error = copyin(src, &buf, sizeof(*src));
+ if (error)
+ goto out_err;
+ optsz += sizeof(*src);
+ optsz += nmreq_opt_size_by_type(buf.nro_reqtype);
+ if (rqsz + optsz > NETMAP_REQ_MAXSIZE) {
+ error = EMSGSIZE;
+ goto out_err;
}
- break;
- }
+ bufsz += optsz + sizeof(void *);
+ }
-#else /* linux */
- default:
- error = EOPNOTSUPP;
-#endif /* linux */
+ ker = nm_os_malloc(bufsz);
+ if (ker == NULL) {
+ error = ENOMEM;
+ goto out_err;
}
+ p = ker;
- return (error);
+ /* make a copy of the user pointers */
+ ptrs = (uint64_t*)p;
+ *ptrs++ = hdr->nr_body;
+ *ptrs++ = hdr->nr_options;
+ p = (char *)ptrs;
+
+ /* copy the body */
+ error = copyin((void *)hdr->nr_body, p, rqsz);
+ if (error)
+ goto out_restore;
+ /* overwrite the user pointer with the in-kernel one */
+ hdr->nr_body = (uint64_t)p;
+ p += rqsz;
+
+ /* copy the options */
+ next = (struct nmreq_option **)&hdr->nr_options;
+ src = *next;
+ while (src) {
+ struct nmreq_option *opt;
+
+ /* copy the option header */
+ ptrs = (uint64_t *)p;
+ opt = (struct nmreq_option *)(ptrs + 1);
+ error = copyin(src, opt, sizeof(*src));
+ if (error)
+ goto out_restore;
+ /* make a copy of the user next pointer */
+ *ptrs = opt->nro_next;
+ /* overwrite the user pointer with the in-kernel one */
+ *next = opt;
+
+ /* initialize the option as not supported.
+ * Recognized options will update this field.
+ */
+ opt->nro_status = EOPNOTSUPP;
+
+ p = (char *)(opt + 1);
+
+ /* copy the option body */
+ optsz = nmreq_opt_size_by_type(opt->nro_reqtype);
+ if (optsz) {
+ /* the option body follows the option header */
+ error = copyin(src + 1, p, optsz);
+ if (error)
+ goto out_restore;
+ p += optsz;
+ }
+
+ /* move to next option */
+ next = (struct nmreq_option **)&opt->nro_next;
+ src = *next;
+ }
+ return 0;
+
+out_restore:
+ ptrs = (uint64_t *)ker;
+ hdr->nr_body = *ptrs++;
+ hdr->nr_options = *ptrs++;
+ hdr->nr_reserved = 0;
+ nm_os_free(ker);
+out_err:
+ return error;
}
+static int
+nmreq_copyout(struct nmreq_header *hdr, int rerror)
+{
+ struct nmreq_option *src, *dst;
+ void *ker = (void *)hdr->nr_body, *bufstart;
+ uint64_t *ptrs;
+ size_t bodysz;
+ int error;
+
+ if (!hdr->nr_reserved)
+ return rerror;
+
+ /* restore the user pointers in the header */
+ ptrs = (uint64_t *)ker - 2;
+ bufstart = ptrs;
+ hdr->nr_body = *ptrs++;
+ src = (struct nmreq_option *)hdr->nr_options;
+ hdr->nr_options = *ptrs;
+
+ if (!rerror) {
+ /* copy the body */
+ bodysz = nmreq_size_by_type(hdr->nr_reqtype);
+ error = copyout(ker, (void *)hdr->nr_body, bodysz);
+ if (error) {
+ rerror = error;
+ goto out;
+ }
+ }
+
+ /* copy the options */
+ dst = (struct nmreq_option *)hdr->nr_options;
+ while (src) {
+ size_t optsz;
+ uint64_t next;
+
+ /* restore the user pointer */
+ next = src->nro_next;
+ ptrs = (uint64_t *)src - 1;
+ src->nro_next = *ptrs;
+
+ /* always copy the option header */
+ error = copyout(src, dst, sizeof(*src));
+ if (error) {
+ rerror = error;
+ goto out;
+ }
+
+ /* copy the option body only if there was no error */
+ if (!rerror && !src->nro_status) {
+ optsz = nmreq_opt_size_by_type(src->nro_reqtype);
+ if (optsz) {
+ error = copyout(src + 1, dst + 1, optsz);
+ if (error) {
+ rerror = error;
+ goto out;
+ }
+ }
+ }
+ src = (struct nmreq_option *)next;
+ dst = (struct nmreq_option *)*ptrs;
+ }
+
+
+out:
+ hdr->nr_reserved = 0;
+ nm_os_free(bufstart);
+ return rerror;
+}
+
+struct nmreq_option *
+nmreq_findoption(struct nmreq_option *opt, uint16_t reqtype)
+{
+ for ( ; opt; opt = (struct nmreq_option *)opt->nro_next)
+ if (opt->nro_reqtype == reqtype)
+ return opt;
+ return NULL;
+}
+
+int
+nmreq_checkduplicate(struct nmreq_option *opt) {
+ uint16_t type = opt->nro_reqtype;
+ int dup = 0;
+
+ while ((opt = nmreq_findoption((struct nmreq_option *)opt->nro_next,
+ type))) {
+ dup++;
+ opt->nro_status = EINVAL;
+ }
+ return (dup ? EINVAL : 0);
+}
+
+static int
+nmreq_checkoptions(struct nmreq_header *hdr)
+{
+ struct nmreq_option *opt;
+ /* return error if there is still any option
+ * marked as not supported
+ */
+
+ for (opt = (struct nmreq_option *)hdr->nr_options; opt;
+ opt = (struct nmreq_option *)opt->nro_next)
+ if (opt->nro_status == EOPNOTSUPP)
+ return EOPNOTSUPP;
+
+ return 0;
+}
/*
* select(2) and poll(2) handlers for the "netmap" device.
@@ -2680,7 +3062,7 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
if (want_tx) {
enum txrx t = NR_TX;
for (i = priv->np_qfirst[t]; want[t] && i < priv->np_qlast[t]; i++) {
- kring = &NMR(na, t)[i];
+ kring = NMR(na, t)[i];
/* XXX compare ring->cur and kring->tail */
if (!nm_ring_empty(kring->ring)) {
revents |= want[t];
@@ -2692,7 +3074,7 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
enum txrx t = NR_RX;
want_rx = 0; /* look for a reason to run the handlers */
for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
- kring = &NMR(na, t)[i];
+ kring = NMR(na, t)[i];
if (kring->ring->cur == kring->ring->tail /* try fetch new buffers */
|| kring->rhead != kring->ring->head /* release buffers */) {
want_rx = 1;
@@ -2706,9 +3088,9 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
#ifdef linux
/* The selrecord must be unconditional on linux. */
nm_os_selrecord(sr, check_all_tx ?
- &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]].si);
+ &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]]->si);
nm_os_selrecord(sr, check_all_rx ?
- &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si);
+ &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]]->si);
#endif /* linux */
/*
@@ -2728,16 +3110,16 @@ flush_tx:
for (i = priv->np_qfirst[NR_TX]; i < priv->np_qlast[NR_TX]; i++) {
int found = 0;
- kring = &na->tx_rings[i];
+ kring = na->tx_rings[i];
ring = kring->ring;
/*
* Don't try to txsync this TX ring if we already found some
* space in some of the TX rings (want_tx == 0) and there are no
* TX slots in this ring that need to be flushed to the NIC
- * (cur == hwcur).
+ * (head == hwcur).
*/
- if (!send_down && !want_tx && ring->cur == kring->nr_hwcur)
+ if (!send_down && !want_tx && ring->head == kring->nr_hwcur)
continue;
if (nm_kr_tryget(kring, 1, &revents))
@@ -2774,7 +3156,7 @@ flush_tx:
if (want_tx && retry_tx && sr) {
#ifndef linux
nm_os_selrecord(sr, check_all_tx ?
- &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]].si);
+ &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]]->si);
#endif /* !linux */
retry_tx = 0;
goto flush_tx;
@@ -2791,7 +3173,7 @@ do_retry_rx:
for (i = priv->np_qfirst[NR_RX]; i < priv->np_qlast[NR_RX]; i++) {
int found = 0;
- kring = &na->rx_rings[i];
+ kring = na->rx_rings[i];
ring = kring->ring;
if (unlikely(nm_kr_tryget(kring, 1, &revents)))
@@ -2835,7 +3217,7 @@ do_retry_rx:
#ifndef linux
if (retry_rx && sr) {
nm_os_selrecord(sr, check_all_rx ?
- &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si);
+ &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]]->si);
}
#endif /* !linux */
if (send_down || retry_rx) {
@@ -2871,7 +3253,7 @@ nma_intr_enable(struct netmap_adapter *na, int onoff)
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t); i++) {
- struct netmap_kring *kring = &NMR(na, t)[i];
+ struct netmap_kring *kring = NMR(na, t)[i];
int on = !(kring->nr_kflags & NKR_NOINTR);
if (!!onoff != !!on) {
@@ -2907,7 +3289,7 @@ nma_intr_enable(struct netmap_adapter *na, int onoff)
static int
netmap_notify(struct netmap_kring *kring, int flags)
{
- struct netmap_adapter *na = kring->na;
+ struct netmap_adapter *na = kring->notify_na;
enum txrx t = kring->tx;
nm_os_selwakeup(&kring->si);
@@ -2934,6 +3316,11 @@ netmap_attach_common(struct netmap_adapter *na)
return EINVAL;
}
+ if (!na->rx_buf_maxsize) {
+ /* Set a conservative default (larger is safer). */
+ na->rx_buf_maxsize = PAGE_SIZE;
+ }
+
#ifdef __FreeBSD__
if (na->na_flags & NAF_HOST_RINGS && na->ifp) {
na->if_input = na->ifp->if_input; /* for netmap_send_up */
@@ -3149,7 +3536,7 @@ netmap_hw_krings_create(struct netmap_adapter *na)
int ret = netmap_krings_create(na, 0);
if (ret == 0) {
/* initialize the mbq for the sw rx ring */
- mbq_safe_init(&na->rx_rings[na->num_rx_rings].rx_queue);
+ mbq_safe_init(&na->rx_rings[na->num_rx_rings]->rx_queue);
ND("initialized sw rx queue %d", na->num_rx_rings);
}
return ret;
@@ -3213,7 +3600,7 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
struct mbq *q;
int busy;
- kring = &na->rx_rings[na->num_rx_rings];
+ kring = na->rx_rings[na->num_rx_rings];
// XXX [Linux] we do not need this lock
// if we follow the down/configure/up protocol -gl
// mtx_lock(&na->core_lock);
@@ -3228,7 +3615,7 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
if (txr >= na->num_tx_rings) {
txr %= na->num_tx_rings;
}
- tx_kring = &NMR(na, NR_TX)[txr];
+ tx_kring = NMR(na, NR_TX)[txr];
if (tx_kring->nr_mode == NKR_NETMAP_OFF) {
return MBUF_TRANSMIT(na, ifp, m);
@@ -3316,7 +3703,7 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
if (n >= na->num_tx_rings)
return NULL;
- kring = na->tx_rings + n;
+ kring = na->tx_rings[n];
if (kring->nr_pending_mode == NKR_NETMAP_OFF) {
kring->nr_mode = NKR_NETMAP_OFF;
@@ -3328,7 +3715,7 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
} else {
if (n >= na->num_rx_rings)
return NULL;
- kring = na->rx_rings + n;
+ kring = na->rx_rings[n];
if (kring->nr_pending_mode == NKR_NETMAP_OFF) {
kring->nr_mode = NKR_NETMAP_OFF;
@@ -3396,7 +3783,7 @@ netmap_common_irq(struct netmap_adapter *na, u_int q, u_int *work_done)
if (q >= nma_get_nrings(na, t))
return NM_IRQ_PASS; // not a physical queue
- kring = NMR(na, t) + q;
+ kring = NMR(na, t)[q];
if (kring->nr_mode == NKR_NETMAP_OFF) {
return NM_IRQ_PASS;
diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c
index c122dc64bed26..cc63b4b478617 100644
--- a/sys/dev/netmap/netmap_freebsd.c
+++ b/sys/dev/netmap/netmap_freebsd.c
@@ -619,6 +619,116 @@ nm_os_vi_detach(struct ifnet *ifp)
if_free(ifp);
}
+#ifdef WITH_EXTMEM
+#include <vm/vm_map.h>
+#include <vm/vm_kern.h>
+struct nm_os_extmem {
+ vm_object_t obj;
+ vm_offset_t kva;
+ vm_offset_t size;
+ vm_pindex_t scan;
+};
+
+void
+nm_os_extmem_delete(struct nm_os_extmem *e)
+{
+ D("freeing %lx bytes", e->size);
+ vm_map_remove(kernel_map, e->kva, e->kva + e->size);
+ nm_os_free(e);
+}
+
+char *
+nm_os_extmem_nextpage(struct nm_os_extmem *e)
+{
+ char *rv = NULL;
+ if (e->scan < e->kva + e->size) {
+ rv = (char *)e->scan;
+ e->scan += PAGE_SIZE;
+ }
+ return rv;
+}
+
+int
+nm_os_extmem_isequal(struct nm_os_extmem *e1, struct nm_os_extmem *e2)
+{
+ return (e1->obj == e1->obj);
+}
+
+int
+nm_os_extmem_nr_pages(struct nm_os_extmem *e)
+{
+ return e->size >> PAGE_SHIFT;
+}
+
+struct nm_os_extmem *
+nm_os_extmem_create(unsigned long p, struct nmreq_pools_info *pi, int *perror)
+{
+ vm_map_t map;
+ vm_map_entry_t entry;
+ vm_object_t obj;
+ vm_prot_t prot;
+ vm_pindex_t index;
+ boolean_t wired;
+ struct nm_os_extmem *e = NULL;
+ int rv, error = 0;
+
+ e = nm_os_malloc(sizeof(*e));
+ if (e == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+
+ map = &curthread->td_proc->p_vmspace->vm_map;
+ rv = vm_map_lookup(&map, p, VM_PROT_RW, &entry,
+ &obj, &index, &prot, &wired);
+ if (rv != KERN_SUCCESS) {
+ D("address %lx not found", p);
+ goto out_free;
+ }
+ /* check that we are given the whole vm_object ? */
+ vm_map_lookup_done(map, entry);
+
+ // XXX can we really use obj after releasing the map lock?
+ e->obj = obj;
+ vm_object_reference(obj);
+ /* wire the memory and add the vm_object to the kernel map,
+ * to make sure that it is not fred even if the processes that
+ * are mmap()ing it all exit
+ */
+ e->kva = vm_map_min(kernel_map);
+ e->size = obj->size << PAGE_SHIFT;
+ rv = vm_map_find(kernel_map, obj, 0, &e->kva, e->size, 0,
+ VMFS_OPTIMAL_SPACE, VM_PROT_READ | VM_PROT_WRITE,
+ VM_PROT_READ | VM_PROT_WRITE, 0);
+ if (rv != KERN_SUCCESS) {
+ D("vm_map_find(%lx) failed", e->size);
+ goto out_rel;
+ }
+ rv = vm_map_wire(kernel_map, e->kva, e->kva + e->size,
+ VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES);
+ if (rv != KERN_SUCCESS) {
+ D("vm_map_wire failed");
+ goto out_rem;
+ }
+
+ e->scan = e->kva;
+
+ return e;
+
+out_rem:
+ vm_map_remove(kernel_map, e->kva, e->kva + e->size);
+ e->obj = NULL;
+out_rel:
+ vm_object_deallocate(e->obj);
+out_free:
+ nm_os_free(e);
+out:
+ if (perror)
+ *perror = error;
+ return NULL;
+}
+#endif /* WITH_EXTMEM */
+
/* ======================== PTNETMAP SUPPORT ========================== */
#ifdef WITH_PTNETMAP_GUEST
@@ -1151,16 +1261,10 @@ nm_os_kctx_worker_setaff(struct nm_kctx *nmk, int affinity)
}
struct nm_kctx *
-nm_os_kctx_create(struct nm_kctx_cfg *cfg, unsigned int cfgtype,
- void *opaque)
+nm_os_kctx_create(struct nm_kctx_cfg *cfg, void *opaque)
{
struct nm_kctx *nmk = NULL;
- if (cfgtype != PTNETMAP_CFGTYPE_BHYVE) {
- D("Unsupported cfgtype %u", cfgtype);
- return NULL;
- }
-
nmk = malloc(sizeof(*nmk), M_DEVBUF, M_NOWAIT | M_ZERO);
if (!nmk)
return NULL;
@@ -1429,7 +1533,7 @@ freebsd_netmap_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data,
error = ENXIO;
goto out;
}
- error = netmap_ioctl(priv, cmd, data, td);
+ error = netmap_ioctl(priv, cmd, data, td, /*nr_body_is_user=*/1);
out:
CURVNET_RESTORE();
diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c
index 2ed251a557756..24d36d5db1b30 100644
--- a/sys/dev/netmap/netmap_generic.c
+++ b/sys/dev/netmap/netmap_generic.c
@@ -232,7 +232,7 @@ nm_os_get_mbuf(struct ifnet *ifp, int len)
#define for_each_kring_n(_i, _k, _karr, _n) \
- for (_k=_karr, _i = 0; _i < _n; (_k)++, (_i)++)
+ for ((_k)=*(_karr), (_i) = 0; (_i) < (_n); (_i)++, (_k) = (_karr)[(_i)])
#define for_each_tx_kring(_i, _k, _na) \
for_each_kring_n(_i, _k, (_na)->tx_rings, (_na)->num_tx_rings)
@@ -589,7 +589,7 @@ generic_mbuf_destructor(struct mbuf *m)
for (;;) {
bool match = false;
- kring = &na->tx_rings[r];
+ kring = na->tx_rings[r];
mtx_lock_spin(&kring->tx_event_lock);
if (kring->tx_event == m) {
kring->tx_event = NULL;
@@ -953,7 +953,7 @@ generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
r = r % na->num_rx_rings;
}
- kring = &na->rx_rings[r];
+ kring = na->rx_rings[r];
if (kring->nr_mode == NKR_NETMAP_OFF) {
/* We must not intercept this mbuf. */
diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h
index 3e64510913242..8fc71b8e820ef 100644
--- a/sys/dev/netmap/netmap_kern.h
+++ b/sys/dev/netmap/netmap_kern.h
@@ -77,7 +77,7 @@
#define WITH_GENERIC
#define WITH_PTNETMAP_HOST /* ptnetmap host support */
#define WITH_PTNETMAP_GUEST /* ptnetmap guest support */
-
+#define WITH_EXTMEM
#endif
#if defined(__FreeBSD__)
@@ -367,9 +367,6 @@ struct netmap_zmon_list {
* the next empty buffer as known by the hardware (next_to_check or so).
* TX rings: hwcur + hwofs coincides with next_to_send
*
- * For received packets, slot->flags is set to nkr_slot_flags
- * so we can provide a proper initial value.
- *
* The following fields are used to implement lock-free copy of packets
* from input to output ports in VALE switch:
* nkr_hwlease buffer after the last one being copied.
@@ -401,7 +398,7 @@ struct netmap_zmon_list {
struct netmap_kring {
struct netmap_ring *ring;
- uint32_t nr_hwcur;
+ uint32_t nr_hwcur; /* should be nr_hwhead */
uint32_t nr_hwtail;
/*
@@ -424,6 +421,7 @@ struct netmap_kring {
* by ptnetmap host ports)
*/
#define NKR_NOINTR 0x10 /* don't use interrupts on this ring */
+#define NKR_FAKERING 0x20 /* don't allocate/free buffers */
uint32_t nr_mode;
uint32_t nr_pending_mode;
@@ -450,7 +448,14 @@ struct netmap_kring {
NM_LOCK_T q_lock; /* protects kring and ring. */
NM_ATOMIC_T nr_busy; /* prevent concurrent syscalls */
+ /* the adapter the owns this kring */
struct netmap_adapter *na;
+
+ /* the adapter that wants to be notified when this kring has
+ * new slots avaialable. This is usually the same as the above,
+ * but wrappers may let it point to themselves
+ */
+ struct netmap_adapter *notify_na;
/* The following fields are for VALE switch support */
struct nm_bdg_fwd *nkr_ft;
@@ -630,6 +635,15 @@ struct netmap_lut {
struct netmap_vp_adapter; // forward
+/* Struct to be filled by nm_config callbacks. */
+struct nm_config_info {
+ unsigned num_tx_rings;
+ unsigned num_rx_rings;
+ unsigned num_tx_descs;
+ unsigned num_rx_descs;
+ unsigned rx_buf_maxsize;
+};
+
/*
* The "struct netmap_adapter" extends the "struct adapter"
* (or equivalent) device descriptor.
@@ -690,8 +704,8 @@ struct netmap_adapter {
* as a contiguous chunk of memory. Each array has
* N+1 entries, for the adapter queues and for the host queue.
*/
- struct netmap_kring *tx_rings; /* array of TX rings. */
- struct netmap_kring *rx_rings; /* array of RX rings. */
+ struct netmap_kring **tx_rings; /* array of TX rings. */
+ struct netmap_kring **rx_rings; /* array of RX rings. */
void *tailroom; /* space below the rings array */
/* (used for leases) */
@@ -766,8 +780,7 @@ struct netmap_adapter {
#define NAF_FORCE_RECLAIM 2
#define NAF_CAN_FORWARD_DOWN 4
/* return configuration information */
- int (*nm_config)(struct netmap_adapter *,
- u_int *txr, u_int *txd, u_int *rxr, u_int *rxd);
+ int (*nm_config)(struct netmap_adapter *, struct nm_config_info *info);
int (*nm_krings_create)(struct netmap_adapter *);
void (*nm_krings_delete)(struct netmap_adapter *);
#ifdef WITH_VALE
@@ -787,7 +800,7 @@ struct netmap_adapter {
* Called with NMG_LOCK held.
*/
int (*nm_bdg_attach)(const char *bdg_name, struct netmap_adapter *);
- int (*nm_bdg_ctl)(struct netmap_adapter *, struct nmreq *, int);
+ int (*nm_bdg_ctl)(struct nmreq_header *, struct netmap_adapter *);
/* adapter used to attach this adapter to a VALE switch (if any) */
struct netmap_vp_adapter *na_vp;
@@ -823,7 +836,13 @@ struct netmap_adapter {
/* Offset of ethernet header for each packet. */
u_int virt_hdr_len;
- char name[64];
+ /* Max number of bytes that the NIC can store in the buffer
+ * referenced by each RX descriptor. This translates to the maximum
+ * bytes that a single netmap slot can reference. Larger packets
+ * require NS_MOREFRAG support. */
+ unsigned rx_buf_maxsize;
+
+ char name[NETMAP_REQ_IFNAMSIZ]; /* used at least by pipes */
};
static __inline u_int
@@ -856,7 +875,7 @@ nma_set_nrings(struct netmap_adapter *na, enum txrx t, u_int v)
na->num_rx_rings = v;
}
-static __inline struct netmap_kring*
+static __inline struct netmap_kring**
NMR(struct netmap_adapter *na, enum txrx t)
{
return (t == NR_TX ? na->tx_rings : na->rx_rings);
@@ -1011,12 +1030,22 @@ struct netmap_bwrap_adapter {
*/
struct netmap_priv_d *na_kpriv;
struct nm_bdg_polling_state *na_polling_state;
+ /* we overwrite the hwna->na_vp pointer, so we save
+ * here its original value, to be restored at detach
+ */
+ struct netmap_vp_adapter *saved_na_vp;
};
+int nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token);
+int nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token);
+int nm_bdg_polling(struct nmreq_header *hdr);
int netmap_bwrap_attach(const char *name, struct netmap_adapter *);
-int netmap_vi_create(struct nmreq *, int);
+int netmap_vi_create(struct nmreq_header *hdr, int);
+int nm_vi_create(struct nmreq_header *);
+int nm_vi_destroy(const char *name);
+int netmap_bdg_list(struct nmreq_header *hdr);
#else /* !WITH_VALE */
-#define netmap_vi_create(nmr, a) (EOPNOTSUPP)
+#define netmap_vi_create(hdr, a) (EOPNOTSUPP)
#endif /* WITH_VALE */
#ifdef WITH_PIPES
@@ -1024,10 +1053,12 @@ int netmap_vi_create(struct nmreq *, int);
#define NM_MAXPIPES 64 /* max number of pipes per adapter */
struct netmap_pipe_adapter {
+ /* pipe identifier is up.name */
struct netmap_adapter up;
- u_int id; /* pipe identifier */
- int role; /* either NR_REG_PIPE_MASTER or NR_REG_PIPE_SLAVE */
+#define NM_PIPE_ROLE_MASTER 0x1
+#define NM_PIPE_ROLE_SLAVE 0x2
+ int role; /* either NM_PIPE_ROLE_MASTER or NM_PIPE_ROLE_SLAVE */
struct netmap_adapter *parent; /* adapter that owns the memory */
struct netmap_pipe_adapter *peer; /* the other end of the pipe */
@@ -1195,6 +1226,7 @@ int netmap_transmit(struct ifnet *, struct mbuf *);
struct netmap_slot *netmap_reset(struct netmap_adapter *na,
enum txrx tx, u_int n, u_int new_cur);
int netmap_ring_reinit(struct netmap_kring *);
+int netmap_rings_config_get(struct netmap_adapter *, struct nm_config_info *);
/* Return codes for netmap_*x_irq. */
enum {
@@ -1255,10 +1287,10 @@ static inline void
nm_update_hostrings_mode(struct netmap_adapter *na)
{
/* Process nr_mode and nr_pending_mode for host rings. */
- na->tx_rings[na->num_tx_rings].nr_mode =
- na->tx_rings[na->num_tx_rings].nr_pending_mode;
- na->rx_rings[na->num_rx_rings].nr_mode =
- na->rx_rings[na->num_rx_rings].nr_pending_mode;
+ na->tx_rings[na->num_tx_rings]->nr_mode =
+ na->tx_rings[na->num_tx_rings]->nr_pending_mode;
+ na->rx_rings[na->num_rx_rings]->nr_mode =
+ na->rx_rings[na->num_rx_rings]->nr_pending_mode;
}
/* set/clear native flags and if_transmit/netdev_ops */
@@ -1318,6 +1350,11 @@ nm_clear_native_flags(struct netmap_adapter *na)
#endif
}
+#ifdef linux
+int netmap_linux_config(struct netmap_adapter *na,
+ struct nm_config_info *info);
+#endif /* linux */
+
/*
* nm_*sync_prologue() functions are used in ioctl/poll and ptnetmap
* kthreads.
@@ -1373,9 +1410,10 @@ uint32_t nm_rxsync_prologue(struct netmap_kring *, struct netmap_ring *);
*/
int netmap_attach_common(struct netmap_adapter *);
/* fill priv->np_[tr]xq{first,last} using the ringid and flags information
- * coming from a struct nmreq
+ * coming from a struct nmreq_register
*/
-int netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags);
+int netmap_interp_ringid(struct netmap_priv_d *priv, uint32_t nr_mode,
+ uint16_t nr_ringid, uint64_t nr_flags);
/* update the ring parameters (number and size of tx and rx rings).
* It calls the nm_config callback, if available.
*/
@@ -1409,12 +1447,12 @@ void netmap_disable_all_rings(struct ifnet *);
void netmap_enable_all_rings(struct ifnet *);
int netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
- uint16_t ringid, uint32_t flags);
+ uint32_t nr_mode, uint16_t nr_ringid, uint64_t nr_flags);
void netmap_do_unregif(struct netmap_priv_d *priv);
u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg);
-int netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na,
- struct ifnet **ifp, struct netmap_mem_d *nmd, int create);
+int netmap_get_na(struct nmreq_header *hdr, struct netmap_adapter **na,
+ struct ifnet **ifp, struct netmap_mem_d *nmd, int create);
void netmap_unget_na(struct netmap_adapter *na, struct ifnet *ifp);
int netmap_get_hw_na(struct ifnet *ifp,
struct netmap_mem_d *nmd, struct netmap_adapter **na);
@@ -1430,18 +1468,19 @@ int netmap_get_hw_na(struct ifnet *ifp,
* NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 to indicate
* drop.
*/
-typedef u_int (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr,
- struct netmap_vp_adapter *);
+typedef uint32_t (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr,
+ struct netmap_vp_adapter *, void *private_data);
typedef int (*bdg_config_fn_t)(struct nm_ifreq *);
typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *);
+typedef void *(*bdg_update_private_data_fn_t)(void *private_data, void *callback_data, int *error);
struct netmap_bdg_ops {
bdg_lookup_fn_t lookup;
bdg_config_fn_t config;
bdg_dtor_fn_t dtor;
};
-u_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
- struct netmap_vp_adapter *);
+uint32_t netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
+ struct netmap_vp_adapter *, void *private_data);
#define NM_BRIDGES 8 /* number of bridges */
#define NM_BDG_MAXPORTS 254 /* up to 254 */
@@ -1449,45 +1488,47 @@ u_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
#define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1)
/* these are redefined in case of no VALE support */
-int netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na,
+int netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
struct netmap_mem_d *nmd, int create);
struct nm_bridge *netmap_init_bridges2(u_int);
void netmap_uninit_bridges2(struct nm_bridge *, u_int);
int netmap_init_bridges(void);
void netmap_uninit_bridges(void);
-int netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops);
-int netmap_bdg_config(struct nmreq *nmr);
+int netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token);
+int nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
+ void *callback_data, void *auth_token);
+int netmap_bdg_config(struct nm_ifreq *nifr);
+void *netmap_bdg_create(const char *bdg_name, int *return_status);
+int netmap_bdg_destroy(const char *bdg_name, void *auth_token);
#else /* !WITH_VALE */
#define netmap_get_bdg_na(_1, _2, _3, _4) 0
#define netmap_init_bridges(_1) 0
#define netmap_uninit_bridges()
-#define netmap_bdg_ctl(_1, _2) EINVAL
+#define netmap_bdg_regops(_1, _2) EINVAL
#endif /* !WITH_VALE */
#ifdef WITH_PIPES
/* max number of pipes per device */
#define NM_MAXPIPES 64 /* XXX this should probably be a sysctl */
void netmap_pipe_dealloc(struct netmap_adapter *);
-int netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na,
- struct netmap_mem_d *nmd, int create);
+int netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na,
+ struct netmap_mem_d *nmd, int create);
#else /* !WITH_PIPES */
#define NM_MAXPIPES 0
#define netmap_pipe_alloc(_1, _2) 0
#define netmap_pipe_dealloc(_1)
-#define netmap_get_pipe_na(nmr, _2, _3, _4) \
- ({ int role__ = (nmr)->nr_flags & NR_REG_MASK; \
- (role__ == NR_REG_PIPE_MASTER || \
- role__ == NR_REG_PIPE_SLAVE) ? EOPNOTSUPP : 0; })
+#define netmap_get_pipe_na(hdr, _2, _3, _4) \
+ ((strchr(hdr->nr_name, '{') != NULL || strchr(hdr->nr_name, '}') != NULL) ? EOPNOTSUPP : 0)
#endif
#ifdef WITH_MONITOR
-int netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na,
+int netmap_get_monitor_na(struct nmreq_header *hdr, struct netmap_adapter **na,
struct netmap_mem_d *nmd, int create);
void netmap_monitor_stop(struct netmap_adapter *na);
#else
-#define netmap_get_monitor_na(nmr, _2, _3, _4) \
- ((nmr)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0)
+#define netmap_get_monitor_na(hdr, _2, _3, _4) \
+ (((struct nmreq_register *)hdr->nr_body)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0)
#endif
#ifdef CONFIG_NET_NS
@@ -1508,7 +1549,11 @@ void netmap_fini(void);
int netmap_get_memory(struct netmap_priv_d* p);
void netmap_dtor(void *data);
-int netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread *);
+int netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data,
+ struct thread *, int nr_body_is_user);
+int netmap_ioctl_legacy(struct netmap_priv_d *priv, u_long cmd, caddr_t data,
+ struct thread *td);
+size_t nmreq_size_by_type(uint16_t nr_reqtype);
/* netmap_adapter creation/destruction */
@@ -1871,7 +1916,7 @@ static inline int nm_kring_pending(struct netmap_priv_d *np)
for_rx_tx(t) {
for (i = np->np_qfirst[t]; i < np->np_qlast[t]; i++) {
- struct netmap_kring *kring = &NMR(na, t)[i];
+ struct netmap_kring *kring = NMR(na, t)[i];
if (kring->nr_mode != kring->nr_pending_mode) {
return 1;
}
@@ -1980,7 +2025,7 @@ void nm_os_mitigation_cleanup(struct nm_generic_mit *mit);
struct nm_bdg_fwd { /* forwarding entry for a bridge */
void *ft_buf; /* netmap or indirect buffer */
uint8_t ft_frags; /* how many fragments (only on 1st frag) */
- uint8_t _ft_port; /* dst port (unused) */
+ uint16_t ft_offset; /* dst port (unused) */
uint16_t ft_flags; /* flags, e.g. indirect */
uint16_t ft_len; /* src fragment len */
uint16_t ft_next; /* next packet to same destination */
@@ -2094,7 +2139,6 @@ struct nm_kctx_cfg {
};
/* kthread configuration */
struct nm_kctx *nm_os_kctx_create(struct nm_kctx_cfg *cfg,
- unsigned int cfgtype,
void *opaque);
int nm_os_kctx_worker_start(struct nm_kctx *);
void nm_os_kctx_worker_stop(struct nm_kctx *);
@@ -2120,19 +2164,21 @@ struct netmap_pt_host_adapter {
int (*parent_nm_notify)(struct netmap_kring *kring, int flags);
void *ptns;
};
-/* ptnetmap HOST routines */
-int netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na,
- struct netmap_mem_d * nmd, int create);
-int ptnetmap_ctl(struct nmreq *nmr, struct netmap_adapter *na);
+
+/* ptnetmap host-side routines */
+int netmap_get_pt_host_na(struct nmreq_header *hdr, struct netmap_adapter **na,
+ struct netmap_mem_d * nmd, int create);
+int ptnetmap_ctl(const char *nr_name, int create, struct netmap_adapter *na);
+
static inline int
nm_ptnetmap_host_on(struct netmap_adapter *na)
{
return na && na->na_flags & NAF_PTNETMAP_HOST;
}
#else /* !WITH_PTNETMAP_HOST */
-#define netmap_get_pt_host_na(nmr, _2, _3, _4) \
- ((nmr)->nr_flags & (NR_PTNETMAP_HOST) ? EOPNOTSUPP : 0)
-#define ptnetmap_ctl(_1, _2) EINVAL
+#define netmap_get_pt_host_na(hdr, _2, _3, _4) \
+ (((struct nmreq_register *)hdr->nr_body)->nr_flags & (NR_PTNETMAP_HOST) ? EOPNOTSUPP : 0)
+#define ptnetmap_ctl(_1, _2, _3) EINVAL
#define nm_ptnetmap_host_on(_1) EINVAL
#endif /* !WITH_PTNETMAP_HOST */
@@ -2175,4 +2221,7 @@ void ptnet_nm_krings_delete(struct netmap_adapter *na);
void ptnet_nm_dtor(struct netmap_adapter *na);
#endif /* WITH_PTNETMAP_GUEST */
+struct nmreq_option * nmreq_findoption(struct nmreq_option *, uint16_t);
+int nmreq_checkduplicate(struct nmreq_option *);
+
#endif /* _NET_NETMAP_KERN_H_ */
diff --git a/sys/dev/netmap/netmap_legacy.c b/sys/dev/netmap/netmap_legacy.c
new file mode 100644
index 0000000000000..da0d622958d9b
--- /dev/null
+++ b/sys/dev/netmap/netmap_legacy.c
@@ -0,0 +1,428 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (C) 2018 Vincenzo Maffione
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* $FreeBSD$ */
+
+#if defined(__FreeBSD__)
+#include <sys/cdefs.h> /* prerequisite */
+#include <sys/types.h>
+#include <sys/param.h> /* defines used in kernel.h */
+#include <sys/filio.h> /* FIONBIO */
+#include <sys/malloc.h>
+#include <sys/socketvar.h> /* struct socket */
+#include <sys/socket.h> /* sockaddrs */
+#include <sys/sysctl.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/bpf.h> /* BIOCIMMEDIATE */
+#include <machine/bus.h> /* bus_dmamap_* */
+#include <sys/endian.h>
+#elif defined(linux)
+#include "bsd_glue.h"
+#elif defined(__APPLE__)
+#warning OSX support is only partial
+#include "osx_glue.h"
+#elif defined (_WIN32)
+#include "win_glue.h"
+#endif
+
+/*
+ * common headers
+ */
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+
+static int
+nmreq_register_from_legacy(struct nmreq *nmr, struct nmreq_header *hdr,
+ struct nmreq_register *req)
+{
+ req->nr_offset = nmr->nr_offset;
+ req->nr_memsize = nmr->nr_memsize;
+ req->nr_tx_slots = nmr->nr_tx_slots;
+ req->nr_rx_slots = nmr->nr_rx_slots;
+ req->nr_tx_rings = nmr->nr_tx_rings;
+ req->nr_rx_rings = nmr->nr_rx_rings;
+ req->nr_mem_id = nmr->nr_arg2;
+ req->nr_ringid = nmr->nr_ringid & NETMAP_RING_MASK;
+ if ((nmr->nr_flags & NR_REG_MASK) == NR_REG_DEFAULT) {
+ /* Convert the older nmr->nr_ringid (original
+ * netmap control API) to nmr->nr_flags. */
+ u_int regmode = NR_REG_DEFAULT;
+ if (req->nr_ringid & NETMAP_SW_RING) {
+ regmode = NR_REG_SW;
+ } else if (req->nr_ringid & NETMAP_HW_RING) {
+ regmode = NR_REG_ONE_NIC;
+ } else {
+ regmode = NR_REG_ALL_NIC;
+ }
+ nmr->nr_flags = regmode |
+ (nmr->nr_flags & (~NR_REG_MASK));
+ }
+ req->nr_mode = nmr->nr_flags & NR_REG_MASK;
+ /* Fix nr_name, nr_mode and nr_ringid to handle pipe requests. */
+ if (req->nr_mode == NR_REG_PIPE_MASTER ||
+ req->nr_mode == NR_REG_PIPE_SLAVE) {
+ char suffix[10];
+ snprintf(suffix, sizeof(suffix), "%c%d",
+ (req->nr_mode == NR_REG_PIPE_MASTER ? '{' : '}'),
+ req->nr_ringid);
+ if (strlen(hdr->nr_name) + strlen(suffix)
+ >= sizeof(hdr->nr_name)) {
+ /* No space for the pipe suffix. */
+ return ENOBUFS;
+ }
+ strncat(hdr->nr_name, suffix, strlen(suffix));
+ req->nr_mode = NR_REG_ALL_NIC;
+ req->nr_ringid = 0;
+ }
+ req->nr_flags = nmr->nr_flags & (~NR_REG_MASK);
+ if (nmr->nr_ringid & NETMAP_NO_TX_POLL) {
+ req->nr_flags |= NR_NO_TX_POLL;
+ }
+ if (nmr->nr_ringid & NETMAP_DO_RX_POLL) {
+ req->nr_flags |= NR_DO_RX_POLL;
+ }
+ /* nmr->nr_arg1 (nr_pipes) ignored */
+ req->nr_extra_bufs = nmr->nr_arg3;
+
+ return 0;
+}
+
+/* Convert the legacy 'nmr' struct into one of the nmreq_xyz structs
+ * (new API). The new struct is dynamically allocated. */
+static struct nmreq_header *
+nmreq_from_legacy(struct nmreq *nmr, u_long ioctl_cmd)
+{
+ struct nmreq_header *hdr = nm_os_malloc(sizeof(*hdr));
+
+ if (hdr == NULL) {
+ goto oom;
+ }
+
+ /* Sanitize nmr->nr_name by adding the string terminator. */
+ if (ioctl_cmd == NIOCGINFO || ioctl_cmd == NIOCREGIF) {
+ nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0';
+ }
+
+ /* First prepare the request header. */
+ hdr->nr_version = NETMAP_API; /* new API */
+ strncpy(hdr->nr_name, nmr->nr_name, sizeof(nmr->nr_name));
+ hdr->nr_options = (uint64_t)NULL;
+ hdr->nr_body = (uint64_t)NULL;
+
+ switch (ioctl_cmd) {
+ case NIOCREGIF: {
+ switch (nmr->nr_cmd) {
+ case 0: {
+ /* Regular NIOCREGIF operation. */
+ struct nmreq_register *req = nm_os_malloc(sizeof(*req));
+ if (!req) { goto oom; }
+ hdr->nr_body = (uint64_t)req;
+ hdr->nr_reqtype = NETMAP_REQ_REGISTER;
+ if (nmreq_register_from_legacy(nmr, hdr, req)) {
+ goto oom;
+ }
+ break;
+ }
+ case NETMAP_BDG_ATTACH: {
+ struct nmreq_vale_attach *req = nm_os_malloc(sizeof(*req));
+ if (!req) { goto oom; }
+ hdr->nr_body = (uint64_t)req;
+ hdr->nr_reqtype = NETMAP_REQ_VALE_ATTACH;
+ if (nmreq_register_from_legacy(nmr, hdr, &req->reg)) {
+ goto oom;
+ }
+ /* Fix nr_mode, starting from nr_arg1. */
+ if (nmr->nr_arg1 & NETMAP_BDG_HOST) {
+ req->reg.nr_mode = NR_REG_NIC_SW;
+ } else {
+ req->reg.nr_mode = NR_REG_ALL_NIC;
+ }
+ break;
+ }
+ case NETMAP_BDG_DETACH: {
+ hdr->nr_reqtype = NETMAP_REQ_VALE_DETACH;
+ hdr->nr_body = (uint64_t)nm_os_malloc(sizeof(struct nmreq_vale_detach));
+ break;
+ }
+ case NETMAP_BDG_VNET_HDR:
+ case NETMAP_VNET_HDR_GET: {
+ struct nmreq_port_hdr *req = nm_os_malloc(sizeof(*req));
+ if (!req) { goto oom; }
+ hdr->nr_body = (uint64_t)req;
+ hdr->nr_reqtype = (nmr->nr_cmd == NETMAP_BDG_VNET_HDR) ?
+ NETMAP_REQ_PORT_HDR_SET : NETMAP_REQ_PORT_HDR_GET;
+ req->nr_hdr_len = nmr->nr_arg1;
+ break;
+ }
+ case NETMAP_BDG_NEWIF : {
+ struct nmreq_vale_newif *req = nm_os_malloc(sizeof(*req));
+ if (!req) { goto oom; }
+ hdr->nr_body = (uint64_t)req;
+ hdr->nr_reqtype = NETMAP_REQ_VALE_NEWIF;
+ req->nr_tx_slots = nmr->nr_tx_slots;
+ req->nr_rx_slots = nmr->nr_rx_slots;
+ req->nr_tx_rings = nmr->nr_tx_rings;
+ req->nr_rx_rings = nmr->nr_rx_rings;
+ req->nr_mem_id = nmr->nr_arg2;
+ break;
+ }
+ case NETMAP_BDG_DELIF: {
+ hdr->nr_reqtype = NETMAP_REQ_VALE_DELIF;
+ break;
+ }
+ case NETMAP_BDG_POLLING_ON:
+ case NETMAP_BDG_POLLING_OFF: {
+ struct nmreq_vale_polling *req = nm_os_malloc(sizeof(*req));
+ if (!req) { goto oom; }
+ hdr->nr_body = (uint64_t)req;
+ hdr->nr_reqtype = (nmr->nr_cmd == NETMAP_BDG_POLLING_ON) ?
+ NETMAP_REQ_VALE_POLLING_ENABLE :
+ NETMAP_REQ_VALE_POLLING_DISABLE;
+ switch (nmr->nr_flags & NR_REG_MASK) {
+ default:
+ req->nr_mode = 0; /* invalid */
+ break;
+ case NR_REG_ONE_NIC:
+ req->nr_mode = NETMAP_POLLING_MODE_MULTI_CPU;
+ break;
+ case NR_REG_ALL_NIC:
+ req->nr_mode = NETMAP_POLLING_MODE_SINGLE_CPU;
+ break;
+ }
+ req->nr_first_cpu_id = nmr->nr_ringid & NETMAP_RING_MASK;
+ req->nr_num_polling_cpus = nmr->nr_arg1;
+ break;
+ }
+ case NETMAP_PT_HOST_CREATE:
+ case NETMAP_PT_HOST_DELETE: {
+ D("Netmap passthrough not supported yet");
+ return NULL;
+ break;
+ }
+ }
+ break;
+ }
+ case NIOCGINFO: {
+ if (nmr->nr_cmd == NETMAP_BDG_LIST) {
+ struct nmreq_vale_list *req = nm_os_malloc(sizeof(*req));
+ if (!req) { goto oom; }
+ hdr->nr_body = (uint64_t)req;
+ hdr->nr_reqtype = NETMAP_REQ_VALE_LIST;
+ req->nr_bridge_idx = nmr->nr_arg1;
+ req->nr_port_idx = nmr->nr_arg2;
+ } else {
+ /* Regular NIOCGINFO. */
+ struct nmreq_port_info_get *req = nm_os_malloc(sizeof(*req));
+ if (!req) { goto oom; }
+ hdr->nr_body = (uint64_t)req;
+ hdr->nr_reqtype = NETMAP_REQ_PORT_INFO_GET;
+ req->nr_offset = nmr->nr_offset;
+ req->nr_memsize = nmr->nr_memsize;
+ req->nr_tx_slots = nmr->nr_tx_slots;
+ req->nr_rx_slots = nmr->nr_rx_slots;
+ req->nr_tx_rings = nmr->nr_tx_rings;
+ req->nr_rx_rings = nmr->nr_rx_rings;
+ req->nr_mem_id = nmr->nr_arg2;
+ }
+ break;
+ }
+ }
+
+ return hdr;
+oom:
+ if (hdr) {
+ if (hdr->nr_body) {
+ nm_os_free((void *)hdr->nr_body);
+ }
+ nm_os_free(hdr);
+ }
+ D("Failed to allocate memory for nmreq_xyz struct");
+
+ return NULL;
+}
+
+static void
+nmreq_register_to_legacy(const struct nmreq_register *req, struct nmreq *nmr)
+{
+ nmr->nr_offset = req->nr_offset;
+ nmr->nr_memsize = req->nr_memsize;
+ nmr->nr_tx_slots = req->nr_tx_slots;
+ nmr->nr_rx_slots = req->nr_rx_slots;
+ nmr->nr_tx_rings = req->nr_tx_rings;
+ nmr->nr_rx_rings = req->nr_rx_rings;
+ nmr->nr_arg2 = req->nr_mem_id;
+ nmr->nr_arg3 = req->nr_extra_bufs;
+}
+
+/* Convert a nmreq_xyz struct (new API) to the legacy 'nmr' struct.
+ * It also frees the nmreq_xyz struct, as it was allocated by
+ * nmreq_from_legacy(). */
+static int
+nmreq_to_legacy(struct nmreq_header *hdr, struct nmreq *nmr)
+{
+ int ret = 0;
+
+ /* We only write-back the fields that the user expects to be
+ * written back. */
+ switch (hdr->nr_reqtype) {
+ case NETMAP_REQ_REGISTER: {
+ struct nmreq_register *req =
+ (struct nmreq_register *)hdr->nr_body;
+ nmreq_register_to_legacy(req, nmr);
+ break;
+ }
+ case NETMAP_REQ_PORT_INFO_GET: {
+ struct nmreq_port_info_get *req =
+ (struct nmreq_port_info_get *)hdr->nr_body;
+ nmr->nr_offset = req->nr_offset;
+ nmr->nr_memsize = req->nr_memsize;
+ nmr->nr_tx_slots = req->nr_tx_slots;
+ nmr->nr_rx_slots = req->nr_rx_slots;
+ nmr->nr_tx_rings = req->nr_tx_rings;
+ nmr->nr_rx_rings = req->nr_rx_rings;
+ nmr->nr_arg2 = req->nr_mem_id;
+ break;
+ }
+ case NETMAP_REQ_VALE_ATTACH: {
+ struct nmreq_vale_attach *req =
+ (struct nmreq_vale_attach *)hdr->nr_body;
+ nmreq_register_to_legacy(&req->reg, nmr);
+ break;
+ }
+ case NETMAP_REQ_VALE_DETACH: {
+ break;
+ }
+ case NETMAP_REQ_VALE_LIST: {
+ struct nmreq_vale_list *req =
+ (struct nmreq_vale_list *)hdr->nr_body;
+ strncpy(nmr->nr_name, hdr->nr_name, sizeof(nmr->nr_name));
+ nmr->nr_arg1 = req->nr_bridge_idx;
+ nmr->nr_arg2 = req->nr_port_idx;
+ break;
+ }
+ case NETMAP_REQ_PORT_HDR_SET:
+ case NETMAP_REQ_PORT_HDR_GET: {
+ struct nmreq_port_hdr *req =
+ (struct nmreq_port_hdr *)hdr->nr_body;
+ nmr->nr_arg1 = req->nr_hdr_len;
+ break;
+ }
+ case NETMAP_REQ_VALE_NEWIF: {
+ struct nmreq_vale_newif *req =
+ (struct nmreq_vale_newif *)hdr->nr_body;
+ nmr->nr_tx_slots = req->nr_tx_slots;
+ nmr->nr_rx_slots = req->nr_rx_slots;
+ nmr->nr_tx_rings = req->nr_tx_rings;
+ nmr->nr_rx_rings = req->nr_rx_rings;
+ nmr->nr_arg2 = req->nr_mem_id;
+ break;
+ }
+ case NETMAP_REQ_VALE_DELIF:
+ case NETMAP_REQ_VALE_POLLING_ENABLE:
+ case NETMAP_REQ_VALE_POLLING_DISABLE: {
+ break;
+ }
+ }
+
+ return ret;
+}
+
+int
+netmap_ioctl_legacy(struct netmap_priv_d *priv, u_long cmd, caddr_t data,
+ struct thread *td)
+{
+ int error = 0;
+
+ switch (cmd) {
+ case NIOCGINFO:
+ case NIOCREGIF: {
+ /* Request for the legacy control API. Convert it to a
+ * NIOCCTRL request. */
+ struct nmreq *nmr = (struct nmreq *) data;
+ struct nmreq_header *hdr = nmreq_from_legacy(nmr, cmd);
+ if (hdr == NULL) { /* out of memory */
+ return ENOMEM;
+ }
+ error = netmap_ioctl(priv, NIOCCTRL, (caddr_t)hdr, td,
+ /*nr_body_is_user=*/0);
+ if (error == 0) {
+ nmreq_to_legacy(hdr, nmr);
+ }
+ if (hdr->nr_body) {
+ nm_os_free((void *)hdr->nr_body);
+ }
+ nm_os_free(hdr);
+ break;
+ }
+#ifdef WITH_VALE
+ case NIOCCONFIG: {
+ struct nm_ifreq *nr = (struct nm_ifreq *)data;
+ error = netmap_bdg_config(nr);
+ break;
+ }
+#endif
+#ifdef __FreeBSD__
+ case FIONBIO:
+ case FIOASYNC:
+ ND("FIONBIO/FIOASYNC are no-ops");
+ break;
+
+ case BIOCIMMEDIATE:
+ case BIOCGHDRCMPLT:
+ case BIOCSHDRCMPLT:
+ case BIOCSSEESENT:
+ D("ignore BIOCIMMEDIATE/BIOCSHDRCMPLT/BIOCSHDRCMPLT/BIOCSSEESENT");
+ break;
+
+ default: /* allow device-specific ioctls */
+ {
+ struct nmreq *nmr = (struct nmreq *)data;
+ struct ifnet *ifp = ifunit_ref(nmr->nr_name);
+ if (ifp == NULL) {
+ error = ENXIO;
+ } else {
+ struct socket so;
+
+ bzero(&so, sizeof(so));
+ so.so_vnet = ifp->if_vnet;
+ // so->so_proto not null.
+ error = ifioctl(&so, cmd, data, td);
+ if_rele(ifp);
+ }
+ break;
+ }
+
+#else /* linux */
+ default:
+ error = EOPNOTSUPP;
+#endif /* linux */
+ }
+
+ return error;
+}
diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c
index 1f206a1b02927..b6d2d7e817d61 100644
--- a/sys/dev/netmap/netmap_mem2.c
+++ b/sys/dev/netmap/netmap_mem2.c
@@ -110,6 +110,7 @@ struct netmap_obj_pool {
uint32_t *bitmap; /* one bit per buffer, 1 means free */
uint32_t *invalid_bitmap;/* one bit per buffer, 1 means invalid */
uint32_t bitmap_slots; /* number of uint32 entries in bitmap */
+ int alloc_done; /* we have allocated the memory */
/* ---------------------------------------------------*/
/* limits */
@@ -131,7 +132,11 @@ struct netmap_obj_pool {
};
#define NMA_LOCK_T NM_MTX_T
-
+#define NMA_LOCK_INIT(n) NM_MTX_INIT((n)->nm_mtx)
+#define NMA_LOCK_DESTROY(n) NM_MTX_DESTROY((n)->nm_mtx)
+#define NMA_LOCK(n) NM_MTX_LOCK((n)->nm_mtx)
+#define NMA_SPINLOCK(n) NM_MTX_SPINLOCK((n)->nm_mtx)
+#define NMA_UNLOCK(n) NM_MTX_UNLOCK((n)->nm_mtx)
struct netmap_mem_ops {
int (*nmd_get_lut)(struct netmap_mem_d *, struct netmap_lut*);
@@ -179,56 +184,126 @@ struct netmap_mem_d {
char name[NM_MEM_NAMESZ];
};
-/*
- * XXX need to fix the case of t0 == void
- */
-#define NMD_DEFCB(t0, name) \
-t0 \
-netmap_mem_##name(struct netmap_mem_d *nmd) \
-{ \
- return nmd->ops->nmd_##name(nmd); \
+int
+netmap_mem_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut)
+{
+ int rv;
+
+ NMA_LOCK(nmd);
+ rv = nmd->ops->nmd_get_lut(nmd, lut);
+ NMA_UNLOCK(nmd);
+
+ return rv;
+}
+
+int
+netmap_mem_get_info(struct netmap_mem_d *nmd, uint64_t *size,
+ u_int *memflags, nm_memid_t *memid)
+{
+ int rv;
+
+ NMA_LOCK(nmd);
+ rv = nmd->ops->nmd_get_info(nmd, size, memflags, memid);
+ NMA_UNLOCK(nmd);
+
+ return rv;
+}
+
+vm_paddr_t
+netmap_mem_ofstophys(struct netmap_mem_d *nmd, vm_ooffset_t off)
+{
+ vm_paddr_t pa;
+
+#if defined(__FreeBSD__)
+ /* This function is called by netmap_dev_pager_fault(), which holds a
+ * non-sleepable lock since FreeBSD 12. Since we cannot sleep, we
+ * spin on the trylock. */
+ NMA_SPINLOCK(nmd);
+#else
+ NMA_LOCK(nmd);
+#endif
+ pa = nmd->ops->nmd_ofstophys(nmd, off);
+ NMA_UNLOCK(nmd);
+
+ return pa;
+}
+
+static int
+netmap_mem_config(struct netmap_mem_d *nmd)
+{
+ if (nmd->active) {
+ /* already in use. Not fatal, but we
+ * cannot change the configuration
+ */
+ return 0;
+ }
+
+ return nmd->ops->nmd_config(nmd);
+}
+
+ssize_t
+netmap_mem_if_offset(struct netmap_mem_d *nmd, const void *off)
+{
+ ssize_t rv;
+
+ NMA_LOCK(nmd);
+ rv = nmd->ops->nmd_if_offset(nmd, off);
+ NMA_UNLOCK(nmd);
+
+ return rv;
}
-#define NMD_DEFCB1(t0, name, t1) \
-t0 \
-netmap_mem_##name(struct netmap_mem_d *nmd, t1 a1) \
-{ \
- return nmd->ops->nmd_##name(nmd, a1); \
+static void
+netmap_mem_delete(struct netmap_mem_d *nmd)
+{
+ nmd->ops->nmd_delete(nmd);
}
-#define NMD_DEFCB3(t0, name, t1, t2, t3) \
-t0 \
-netmap_mem_##name(struct netmap_mem_d *nmd, t1 a1, t2 a2, t3 a3) \
-{ \
- return nmd->ops->nmd_##name(nmd, a1, a2, a3); \
+struct netmap_if *
+netmap_mem_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
+{
+ struct netmap_if *nifp;
+ struct netmap_mem_d *nmd = na->nm_mem;
+
+ NMA_LOCK(nmd);
+ nifp = nmd->ops->nmd_if_new(na, priv);
+ NMA_UNLOCK(nmd);
+
+ return nifp;
}
-#define NMD_DEFNACB(t0, name) \
-t0 \
-netmap_mem_##name(struct netmap_adapter *na) \
-{ \
- return na->nm_mem->ops->nmd_##name(na); \
+void
+netmap_mem_if_delete(struct netmap_adapter *na, struct netmap_if *nif)
+{
+ struct netmap_mem_d *nmd = na->nm_mem;
+
+ NMA_LOCK(nmd);
+ nmd->ops->nmd_if_delete(na, nif);
+ NMA_UNLOCK(nmd);
}
-#define NMD_DEFNACB1(t0, name, t1) \
-t0 \
-netmap_mem_##name(struct netmap_adapter *na, t1 a1) \
-{ \
- return na->nm_mem->ops->nmd_##name(na, a1); \
+int
+netmap_mem_rings_create(struct netmap_adapter *na)
+{
+ int rv;
+ struct netmap_mem_d *nmd = na->nm_mem;
+
+ NMA_LOCK(nmd);
+ rv = nmd->ops->nmd_rings_create(na);
+ NMA_UNLOCK(nmd);
+
+ return rv;
}
-NMD_DEFCB1(int, get_lut, struct netmap_lut *);
-NMD_DEFCB3(int, get_info, uint64_t *, u_int *, uint16_t *);
-NMD_DEFCB1(vm_paddr_t, ofstophys, vm_ooffset_t);
-static int netmap_mem_config(struct netmap_mem_d *);
-NMD_DEFCB(int, config);
-NMD_DEFCB1(ssize_t, if_offset, const void *);
-NMD_DEFCB(void, delete);
+void
+netmap_mem_rings_delete(struct netmap_adapter *na)
+{
+ struct netmap_mem_d *nmd = na->nm_mem;
-NMD_DEFNACB1(struct netmap_if *, if_new, struct netmap_priv_d *);
-NMD_DEFNACB1(void, if_delete, struct netmap_if *);
-NMD_DEFNACB(int, rings_create);
-NMD_DEFNACB(void, rings_delete);
+ NMA_LOCK(nmd);
+ nmd->ops->nmd_rings_delete(na);
+ NMA_UNLOCK(nmd);
+}
static int netmap_mem_map(struct netmap_obj_pool *, struct netmap_adapter *);
static int netmap_mem_unmap(struct netmap_obj_pool *, struct netmap_adapter *);
@@ -241,12 +316,6 @@ netmap_mem_get_id(struct netmap_mem_d *nmd)
return nmd->nm_id;
}
-#define NMA_LOCK_INIT(n) NM_MTX_INIT((n)->nm_mtx)
-#define NMA_LOCK_DESTROY(n) NM_MTX_DESTROY((n)->nm_mtx)
-#define NMA_LOCK(n) NM_MTX_LOCK((n)->nm_mtx)
-#define NMA_SPINLOCK(n) NM_MTX_SPINLOCK((n)->nm_mtx)
-#define NMA_UNLOCK(n) NM_MTX_UNLOCK((n)->nm_mtx)
-
#ifdef NM_DEBUG_MEM_PUTGET
#define NM_DBG_REFC(nmd, func, line) \
nm_prinf("%s:%d mem[%d] -> %d\n", func, line, (nmd)->nm_id, (nmd)->refcount);
@@ -285,22 +354,32 @@ __netmap_mem_put(struct netmap_mem_d *nmd, const char *func, int line)
int
netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na)
{
+ int lasterr = 0;
if (nm_mem_assign_group(nmd, na->pdev) < 0) {
return ENOMEM;
- } else {
- NMA_LOCK(nmd);
- nmd->lasterr = nmd->ops->nmd_finalize(nmd);
- NMA_UNLOCK(nmd);
}
+ NMA_LOCK(nmd);
+
+ if (netmap_mem_config(nmd))
+ goto out;
+
+ nmd->active++;
+
+ nmd->lasterr = nmd->ops->nmd_finalize(nmd);
+
if (!nmd->lasterr && na->pdev) {
nmd->lasterr = netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na);
- if (nmd->lasterr) {
- netmap_mem_deref(nmd, na);
- }
}
- return nmd->lasterr;
+out:
+ lasterr = nmd->lasterr;
+ NMA_UNLOCK(nmd);
+
+ if (lasterr)
+ netmap_mem_deref(nmd, na);
+
+ return lasterr;
}
static int
@@ -400,6 +479,10 @@ netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na)
}
nmd->ops->nmd_deref(nmd);
+ nmd->active--;
+ if (!nmd->active)
+ nmd->nm_grp = -1;
+
NMA_UNLOCK(nmd);
return last_user;
}
@@ -706,14 +789,6 @@ netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
vm_paddr_t pa;
struct netmap_obj_pool *p;
-#if defined(__FreeBSD__)
- /* This function is called by netmap_dev_pager_fault(), which holds a
- * non-sleepable lock since FreeBSD 12. Since we cannot sleep, we
- * spin on the trylock. */
- NMA_SPINLOCK(nmd);
-#else
- NMA_LOCK(nmd);
-#endif
p = nmd->pools;
for (i = 0; i < NETMAP_POOLS_NR; offset -= p[i].memtotal, i++) {
@@ -727,7 +802,6 @@ netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
pa = vtophys(p[i].lut[offset / p[i]._objsize].vaddr);
pa.QuadPart += offset % p[i]._objsize;
#endif
- NMA_UNLOCK(nmd);
return pa;
}
/* this is only in case of errors */
@@ -738,7 +812,6 @@ netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
p[NETMAP_IF_POOL].memtotal
+ p[NETMAP_RING_POOL].memtotal
+ p[NETMAP_BUF_POOL].memtotal);
- NMA_UNLOCK(nmd);
#ifndef _WIN32
return 0; /* bad address */
#else
@@ -775,10 +848,10 @@ netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
PMDL
win32_build_user_vm_map(struct netmap_mem_d* nmd)
{
- int i, j;
- size_t memsize;
u_int memflags, ofs = 0;
PMDL mainMdl, tempMdl;
+ uint64_t memsize;
+ int i, j;
if (netmap_mem_get_info(nmd, &memsize, &memflags, NULL)) {
D("memory not finalised yet");
@@ -847,11 +920,10 @@ netmap_mem2_get_pool_info(struct netmap_mem_d* nmd, u_int pool, u_int *clustsize
}
static int
-netmap_mem2_get_info(struct netmap_mem_d* nmd, uint64_t* size, u_int *memflags,
- nm_memid_t *id)
+netmap_mem2_get_info(struct netmap_mem_d* nmd, uint64_t* size,
+ u_int *memflags, nm_memid_t *id)
{
int error = 0;
- NMA_LOCK(nmd);
error = netmap_mem_config(nmd);
if (error)
goto out;
@@ -872,7 +944,6 @@ netmap_mem2_get_info(struct netmap_mem_d* nmd, uint64_t* size, u_int *memflags,
if (id)
*id = nmd->nm_id;
out:
- NMA_UNLOCK(nmd);
return error;
}
@@ -916,11 +987,7 @@ netmap_obj_offset(struct netmap_obj_pool *p, const void *vaddr)
static ssize_t
netmap_mem2_if_offset(struct netmap_mem_d *nmd, const void *addr)
{
- ssize_t v;
- NMA_LOCK(nmd);
- v = netmap_if_offset(nmd, addr);
- NMA_UNLOCK(nmd);
- return v;
+ return netmap_if_offset(nmd, addr);
}
/*
@@ -1118,7 +1185,7 @@ netmap_new_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n)
slot[i].ptr = 0;
}
- ND("allocated %d buffers, %d available, first at %d", n, p->objfree, pos);
+ ND("%s: allocated %d buffers, %d available, first at %d", p->name, n, p->objfree, pos);
return (0);
cleanup:
@@ -1163,9 +1230,11 @@ netmap_free_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n)
u_int i;
for (i = 0; i < n; i++) {
- if (slot[i].buf_idx > 2)
+ if (slot[i].buf_idx > 1)
netmap_free_buf(nmd, slot[i].buf_idx);
}
+ ND("%s: released some buffers, available: %u",
+ p->name, p->objfree);
}
static void
@@ -1180,6 +1249,12 @@ netmap_reset_obj_allocator(struct netmap_obj_pool *p)
if (p->invalid_bitmap)
nm_os_free(p->invalid_bitmap);
p->invalid_bitmap = NULL;
+ if (!p->alloc_done) {
+ /* allocation was done by somebody else.
+ * Let them clean up after themselves.
+ */
+ return;
+ }
if (p->lut) {
u_int i;
@@ -1199,6 +1274,7 @@ netmap_reset_obj_allocator(struct netmap_obj_pool *p)
p->memtotal = 0;
p->numclusters = 0;
p->objfree = 0;
+ p->alloc_done = 0;
}
/*
@@ -1310,13 +1386,20 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
size_t n;
if (p->lut) {
- /* already finalized, nothing to do */
+ /* if the lut is already there we assume that also all the
+ * clusters have already been allocated, possibily by somebody
+ * else (e.g., extmem). In the latter case, the alloc_done flag
+ * will remain at zero, so that we will not attempt to
+ * deallocate the clusters by ourselves in
+ * netmap_reset_obj_allocator.
+ */
return 0;
}
/* optimistically assume we have enough memory */
p->numclusters = p->_numclusters;
p->objtotal = p->_objtotal;
+ p->alloc_done = 1;
p->lut = nm_alloc_lut(p->objtotal);
if (p->lut == NULL) {
@@ -1426,7 +1509,7 @@ netmap_mem_reset_all(struct netmap_mem_d *nmd)
static int
netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na)
{
- int i, lim = p->_objtotal;
+ int i, lim = p->objtotal;
struct netmap_lut *lut = &na->na_lut;
if (na == NULL || na->pdev == NULL)
@@ -1675,10 +1758,6 @@ netmap_mem2_config(struct netmap_mem_d *nmd)
{
int i;
- if (nmd->active)
- /* already in use, we cannot change the configuration */
- goto out;
-
if (!netmap_mem_params_changed(nmd->params))
goto out;
@@ -1707,19 +1786,8 @@ out:
static int
netmap_mem2_finalize(struct netmap_mem_d *nmd)
{
- int err;
-
- /* update configuration if changed */
- if (netmap_mem_config(nmd))
- goto out1;
-
- nmd->active++;
-
- if (nmd->flags & NETMAP_MEM_FINALIZED) {
- /* may happen if config is not changed */
- D("nothing to do");
+ if (nmd->flags & NETMAP_MEM_FINALIZED)
goto out;
- }
if (netmap_mem_finalize_all(nmd))
goto out;
@@ -1727,13 +1795,7 @@ netmap_mem2_finalize(struct netmap_mem_d *nmd)
nmd->lasterr = 0;
out:
- if (nmd->lasterr)
- nmd->active--;
-out1:
- err = nmd->lasterr;
-
- return err;
-
+ return nmd->lasterr;
}
static void
@@ -1782,7 +1844,7 @@ netmap_free_rings(struct netmap_adapter *na)
for_rx_tx(t) {
u_int i;
for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
- struct netmap_kring *kring = &NMR(na, t)[i];
+ struct netmap_kring *kring = NMR(na, t)[i];
struct netmap_ring *ring = kring->ring;
if (ring == NULL || kring->users > 0 || (kring->nr_kflags & NKR_NEEDRING)) {
@@ -1793,8 +1855,12 @@ netmap_free_rings(struct netmap_adapter *na)
}
if (netmap_verbose)
D("deleting ring %s", kring->name);
- if (i != nma_get_nrings(na, t) || na->na_flags & NAF_HOST_RINGS)
+ if (!(kring->nr_kflags & NKR_FAKERING)) {
+ ND("freeing bufs for %s", kring->name);
netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
+ } else {
+ ND("NOT freeing bufs for %s", kring->name);
+ }
netmap_ring_free(na->nm_mem, ring);
kring->ring = NULL;
}
@@ -1813,13 +1879,11 @@ netmap_mem2_rings_create(struct netmap_adapter *na)
{
enum txrx t;
- NMA_LOCK(na->nm_mem);
-
for_rx_tx(t) {
u_int i;
for (i = 0; i <= nma_get_nrings(na, t); i++) {
- struct netmap_kring *kring = &NMR(na, t)[i];
+ struct netmap_kring *kring = NMR(na, t)[i];
struct netmap_ring *ring = kring->ring;
u_int len, ndesc;
@@ -1857,14 +1921,16 @@ netmap_mem2_rings_create(struct netmap_adapter *na)
ND("%s h %d c %d t %d", kring->name,
ring->head, ring->cur, ring->tail);
ND("initializing slots for %s_ring", nm_txrx2str(txrx));
- if (i != nma_get_nrings(na, t) || (na->na_flags & NAF_HOST_RINGS)) {
+ if (!(kring->nr_kflags & NKR_FAKERING)) {
/* this is a real ring */
+ ND("allocating buffers for %s", kring->name);
if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
D("Cannot allocate buffers for %s_ring", nm_txrx2str(t));
goto cleanup;
}
} else {
/* this is a fake ring, set all indices to 0 */
+ ND("NOT allocating buffers for %s", kring->name);
netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0);
}
/* ring info */
@@ -1873,15 +1939,11 @@ netmap_mem2_rings_create(struct netmap_adapter *na)
}
}
- NMA_UNLOCK(na->nm_mem);
-
return 0;
cleanup:
netmap_free_rings(na);
- NMA_UNLOCK(na->nm_mem);
-
return ENOMEM;
}
@@ -1889,11 +1951,7 @@ static void
netmap_mem2_rings_delete(struct netmap_adapter *na)
{
/* last instance, release bufs and rings */
- NMA_LOCK(na->nm_mem);
-
netmap_free_rings(na);
-
- NMA_UNLOCK(na->nm_mem);
}
@@ -1924,8 +1982,6 @@ netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
* to the tx and rx rings in the shared memory region.
*/
- NMA_LOCK(na->nm_mem);
-
len = sizeof(struct netmap_if) + (ntot * sizeof(ssize_t));
nifp = netmap_if_malloc(na->nm_mem, len);
if (nifp == NULL) {
@@ -1949,10 +2005,10 @@ netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
* ring, like we do for buffers? */
ssize_t ofs = 0;
- if (na->tx_rings[i].ring != NULL && i >= priv->np_qfirst[NR_TX]
+ if (na->tx_rings[i]->ring != NULL && i >= priv->np_qfirst[NR_TX]
&& i < priv->np_qlast[NR_TX]) {
ofs = netmap_ring_offset(na->nm_mem,
- na->tx_rings[i].ring) - base;
+ na->tx_rings[i]->ring) - base;
}
*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] = ofs;
}
@@ -1961,16 +2017,14 @@ netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
* ring, like we do for buffers? */
ssize_t ofs = 0;
- if (na->rx_rings[i].ring != NULL && i >= priv->np_qfirst[NR_RX]
+ if (na->rx_rings[i]->ring != NULL && i >= priv->np_qfirst[NR_RX]
&& i < priv->np_qlast[NR_RX]) {
ofs = netmap_ring_offset(na->nm_mem,
- na->rx_rings[i].ring) - base;
+ na->rx_rings[i]->ring) - base;
}
*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] = ofs;
}
- NMA_UNLOCK(na->nm_mem);
-
return (nifp);
}
@@ -1980,21 +2034,15 @@ netmap_mem2_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
if (nifp == NULL)
/* nothing to do */
return;
- NMA_LOCK(na->nm_mem);
if (nifp->ni_bufs_head)
netmap_extra_free(na, nifp->ni_bufs_head);
netmap_if_free(na->nm_mem, nifp);
-
- NMA_UNLOCK(na->nm_mem);
}
static void
netmap_mem2_deref(struct netmap_mem_d *nmd)
{
- nmd->active--;
- if (!nmd->active)
- nmd->nm_grp = -1;
if (netmap_verbose)
D("active = %d", nmd->active);
@@ -2016,42 +2064,32 @@ struct netmap_mem_ops netmap_mem_global_ops = {
};
int
-netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_mem_d *nmd)
+netmap_mem_pools_info_get(struct nmreq_pools_info *req,
+ struct netmap_mem_d *nmd)
{
- uintptr_t *pp = (uintptr_t *)&nmr->nr_arg1;
- struct netmap_pools_info *upi = (struct netmap_pools_info *)(*pp);
- struct netmap_pools_info pi;
- uint64_t memsize;
- uint16_t memid;
int ret;
- ret = netmap_mem_get_info(nmd, &memsize, NULL, &memid);
+ ret = netmap_mem_get_info(nmd, &req->nr_memsize, NULL,
+ &req->nr_mem_id);
if (ret) {
return ret;
}
- pi.memsize = memsize;
- pi.memid = memid;
NMA_LOCK(nmd);
- pi.if_pool_offset = 0;
- pi.if_pool_objtotal = nmd->pools[NETMAP_IF_POOL].objtotal;
- pi.if_pool_objsize = nmd->pools[NETMAP_IF_POOL]._objsize;
+ req->nr_if_pool_offset = 0;
+ req->nr_if_pool_objtotal = nmd->pools[NETMAP_IF_POOL].objtotal;
+ req->nr_if_pool_objsize = nmd->pools[NETMAP_IF_POOL]._objsize;
- pi.ring_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal;
- pi.ring_pool_objtotal = nmd->pools[NETMAP_RING_POOL].objtotal;
- pi.ring_pool_objsize = nmd->pools[NETMAP_RING_POOL]._objsize;
+ req->nr_ring_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal;
+ req->nr_ring_pool_objtotal = nmd->pools[NETMAP_RING_POOL].objtotal;
+ req->nr_ring_pool_objsize = nmd->pools[NETMAP_RING_POOL]._objsize;
- pi.buf_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal +
+ req->nr_buf_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal +
nmd->pools[NETMAP_RING_POOL].memtotal;
- pi.buf_pool_objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal;
- pi.buf_pool_objsize = nmd->pools[NETMAP_BUF_POOL]._objsize;
+ req->nr_buf_pool_objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal;
+ req->nr_buf_pool_objsize = nmd->pools[NETMAP_BUF_POOL]._objsize;
NMA_UNLOCK(nmd);
- ret = copyout(&pi, upi, sizeof(pi));
- if (ret) {
- return ret;
- }
-
return 0;
}
@@ -2059,8 +2097,7 @@ netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_mem_d *nmd)
struct netmap_mem_ext {
struct netmap_mem_d up;
- struct page **pages;
- int nr_pages;
+ struct nm_os_extmem *os;
struct netmap_mem_ext *next, *prev;
};
@@ -2090,29 +2127,14 @@ netmap_mem_ext_unregister(struct netmap_mem_ext *e)
e->prev = e->next = NULL;
}
-static int
-netmap_mem_ext_same_pages(struct netmap_mem_ext *e, struct page **pages, int nr_pages)
-{
- int i;
-
- if (e->nr_pages != nr_pages)
- return 0;
-
- for (i = 0; i < nr_pages; i++)
- if (pages[i] != e->pages[i])
- return 0;
-
- return 1;
-}
-
static struct netmap_mem_ext *
-netmap_mem_ext_search(struct page **pages, int nr_pages)
+netmap_mem_ext_search(struct nm_os_extmem *os)
{
struct netmap_mem_ext *e;
NM_MTX_LOCK(nm_mem_ext_list_lock);
for (e = netmap_mem_ext_list; e; e = e->next) {
- if (netmap_mem_ext_same_pages(e, pages, nr_pages)) {
+ if (nm_os_extmem_isequal(e->os, os)) {
netmap_mem_get(&e->up);
break;
}
@@ -2123,18 +2145,6 @@ netmap_mem_ext_search(struct page **pages, int nr_pages)
static void
-netmap_mem_ext_free_pages(struct page **pages, int nr_pages)
-{
- int i;
-
- for (i = 0; i < nr_pages; i++) {
- kunmap(pages[i]);
- put_page(pages[i]);
- }
- nm_os_vfree(pages);
-}
-
-static void
netmap_mem_ext_delete(struct netmap_mem_d *d)
{
int i;
@@ -2151,11 +2161,8 @@ netmap_mem_ext_delete(struct netmap_mem_d *d)
p->lut = NULL;
}
}
- if (e->pages) {
- netmap_mem_ext_free_pages(e->pages, e->nr_pages);
- e->pages = NULL;
- e->nr_pages = 0;
- }
+ if (e->os)
+ nm_os_extmem_delete(e->os);
netmap_mem2_delete(d);
}
@@ -2181,117 +2188,66 @@ struct netmap_mem_ops netmap_mem_ext_ops = {
};
struct netmap_mem_d *
-netmap_mem_ext_create(struct nmreq *nmr, int *perror)
+netmap_mem_ext_create(uint64_t usrptr, struct nmreq_pools_info *pi, int *perror)
{
- uintptr_t p = *(uintptr_t *)&nmr->nr_arg1;
- struct netmap_pools_info pi;
int error = 0;
- unsigned long end, start;
- int nr_pages, res, i, j;
- struct page **pages = NULL;
+ int i, j;
struct netmap_mem_ext *nme;
char *clust;
size_t off;
-
- error = copyin((void *)p, &pi, sizeof(pi));
- if (error)
- goto out;
+ struct nm_os_extmem *os = NULL;
+ int nr_pages;
// XXX sanity checks
- if (pi.if_pool_objtotal == 0)
- pi.if_pool_objtotal = netmap_min_priv_params[NETMAP_IF_POOL].num;
- if (pi.if_pool_objsize == 0)
- pi.if_pool_objsize = netmap_min_priv_params[NETMAP_IF_POOL].size;
- if (pi.ring_pool_objtotal == 0)
- pi.ring_pool_objtotal = netmap_min_priv_params[NETMAP_RING_POOL].num;
- if (pi.ring_pool_objsize == 0)
- pi.ring_pool_objsize = netmap_min_priv_params[NETMAP_RING_POOL].size;
- if (pi.buf_pool_objtotal == 0)
- pi.buf_pool_objtotal = netmap_min_priv_params[NETMAP_BUF_POOL].num;
- if (pi.buf_pool_objsize == 0)
- pi.buf_pool_objsize = netmap_min_priv_params[NETMAP_BUF_POOL].size;
+ if (pi->nr_if_pool_objtotal == 0)
+ pi->nr_if_pool_objtotal = netmap_min_priv_params[NETMAP_IF_POOL].num;
+ if (pi->nr_if_pool_objsize == 0)
+ pi->nr_if_pool_objsize = netmap_min_priv_params[NETMAP_IF_POOL].size;
+ if (pi->nr_ring_pool_objtotal == 0)
+ pi->nr_ring_pool_objtotal = netmap_min_priv_params[NETMAP_RING_POOL].num;
+ if (pi->nr_ring_pool_objsize == 0)
+ pi->nr_ring_pool_objsize = netmap_min_priv_params[NETMAP_RING_POOL].size;
+ if (pi->nr_buf_pool_objtotal == 0)
+ pi->nr_buf_pool_objtotal = netmap_min_priv_params[NETMAP_BUF_POOL].num;
+ if (pi->nr_buf_pool_objsize == 0)
+ pi->nr_buf_pool_objsize = netmap_min_priv_params[NETMAP_BUF_POOL].size;
D("if %d %d ring %d %d buf %d %d",
- pi.if_pool_objtotal, pi.if_pool_objsize,
- pi.ring_pool_objtotal, pi.ring_pool_objsize,
- pi.buf_pool_objtotal, pi.buf_pool_objsize);
+ pi->nr_if_pool_objtotal, pi->nr_if_pool_objsize,
+ pi->nr_ring_pool_objtotal, pi->nr_ring_pool_objsize,
+ pi->nr_buf_pool_objtotal, pi->nr_buf_pool_objsize);
- end = (p + pi.memsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
- start = p >> PAGE_SHIFT;
- nr_pages = end - start;
-
- pages = nm_os_vmalloc(nr_pages * sizeof(*pages));
- if (pages == NULL) {
- error = ENOMEM;
+ os = nm_os_extmem_create(usrptr, pi, &error);
+ if (os == NULL) {
+ D("os extmem creation failed");
goto out;
}
-#ifdef NETMAP_LINUX_HAVE_GUP_4ARGS
- res = get_user_pages_unlocked(
- p,
- nr_pages,
- pages,
- FOLL_WRITE | FOLL_GET | FOLL_SPLIT | FOLL_POPULATE); // XXX check other flags
-#elif defined(NETMAP_LINUX_HAVE_GUP_5ARGS)
- res = get_user_pages_unlocked(
- p,
- nr_pages,
- 1, /* write */
- 0, /* don't force */
- pages);
-#elif defined(NETMAP_LINUX_HAVE_GUP_7ARGS)
- res = get_user_pages_unlocked(
- current,
- current->mm,
- p,
- nr_pages,
- 1, /* write */
- 0, /* don't force */
- pages);
-#else
- down_read(&current->mm->mmap_sem);
- res = get_user_pages(
- current,
- current->mm,
- p,
- nr_pages,
- 1, /* write */
- 0, /* don't force */
- pages,
- NULL);
- up_read(&current->mm->mmap_sem);
-#endif /* NETMAP_LINUX_GUP */
-
- if (res < nr_pages) {
- error = EFAULT;
- goto out_unmap;
- }
-
- nme = netmap_mem_ext_search(pages, nr_pages);
+ nme = netmap_mem_ext_search(os);
if (nme) {
- netmap_mem_ext_free_pages(pages, nr_pages);
+ nm_os_extmem_delete(os);
return &nme->up;
}
D("not found, creating new");
nme = _netmap_mem_private_new(sizeof(*nme),
(struct netmap_obj_params[]){
- { pi.if_pool_objsize, pi.if_pool_objtotal },
- { pi.ring_pool_objsize, pi.ring_pool_objtotal },
- { pi.buf_pool_objsize, pi.buf_pool_objtotal }},
+ { pi->nr_if_pool_objsize, pi->nr_if_pool_objtotal },
+ { pi->nr_ring_pool_objsize, pi->nr_ring_pool_objtotal },
+ { pi->nr_buf_pool_objsize, pi->nr_buf_pool_objtotal }},
&netmap_mem_ext_ops,
&error);
if (nme == NULL)
goto out_unmap;
+ nr_pages = nm_os_extmem_nr_pages(os);
+
/* from now on pages will be released by nme destructor;
* we let res = 0 to prevent release in out_unmap below
*/
- res = 0;
- nme->pages = pages;
- nme->nr_pages = nr_pages;
- nme->up.flags |= NETMAP_MEM_EXT;
+ nme->os = os;
+ os = NULL; /* pass ownership */
- clust = kmap(*pages);
+ clust = nm_os_extmem_nextpage(nme->os);
off = 0;
for (i = 0; i < NETMAP_POOLS_NR; i++) {
struct netmap_obj_pool *p = &nme->up.pools[i];
@@ -2323,9 +2279,11 @@ netmap_mem_ext_create(struct nmreq *nmr, int *perror)
for (j = 0; j < o->num && nr_pages > 0; j++) {
size_t noff;
- size_t skip;
p->lut[j].vaddr = clust + off;
+#if !defined(linux) && !defined(_WIN32)
+ p->lut[j].paddr = vtophys(p->lut[j].vaddr);
+#endif
ND("%s %d at %p", p->name, j, p->lut[j].vaddr);
noff = off + p->_objsize;
if (noff < PAGE_SIZE) {
@@ -2333,15 +2291,16 @@ netmap_mem_ext_create(struct nmreq *nmr, int *perror)
continue;
}
ND("too big, recomputing offset...");
- skip = PAGE_SIZE - (off & PAGE_MASK);
while (noff >= PAGE_SIZE) {
- noff -= skip;
- pages++;
+ char *old_clust = clust;
+ noff -= PAGE_SIZE;
+ clust = nm_os_extmem_nextpage(nme->os);
nr_pages--;
ND("noff %zu page %p nr_pages %d", noff,
page_to_virt(*pages), nr_pages);
if (noff > 0 && !nm_isset(p->invalid_bitmap, j) &&
- (nr_pages == 0 || *pages != *(pages - 1) + 1))
+ (nr_pages == 0 ||
+ old_clust + PAGE_SIZE != clust))
{
/* out of space or non contiguous,
* drop this object
@@ -2351,11 +2310,8 @@ netmap_mem_ext_create(struct nmreq *nmr, int *perror)
}
if (nr_pages == 0)
break;
- skip = PAGE_SIZE;
}
off = noff;
- if (nr_pages > 0)
- clust = kmap(*pages);
}
p->objtotal = j;
p->numclusters = p->objtotal;
@@ -2363,12 +2319,6 @@ netmap_mem_ext_create(struct nmreq *nmr, int *perror)
ND("%d memtotal %u", j, p->memtotal);
}
- /* skip the first netmap_if, where the pools info reside */
- {
- struct netmap_obj_pool *p = &nme->up.pools[NETMAP_IF_POOL];
- p->invalid_bitmap[0] |= 1U;
- }
-
netmap_mem_ext_register(nme);
return &nme->up;
@@ -2376,10 +2326,8 @@ netmap_mem_ext_create(struct nmreq *nmr, int *perror)
out_delete:
netmap_mem_put(&nme->up);
out_unmap:
- for (i = 0; i < res; i++)
- put_page(pages[i]);
- if (res)
- nm_os_free(pages);
+ if (os)
+ nm_os_extmem_delete(os);
out:
if (perror)
*perror = error;
@@ -2504,8 +2452,6 @@ netmap_mem_pt_guest_get_info(struct netmap_mem_d *nmd, uint64_t *size,
{
int error = 0;
- NMA_LOCK(nmd);
-
error = nmd->ops->nmd_config(nmd);
if (error)
goto out;
@@ -2518,7 +2464,6 @@ netmap_mem_pt_guest_get_info(struct netmap_mem_d *nmd, uint64_t *size,
*id = nmd->nm_id;
out:
- NMA_UNLOCK(nmd);
return error;
}
@@ -2556,21 +2501,19 @@ netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd)
int i;
int error = 0;
- nmd->active++;
-
if (nmd->flags & NETMAP_MEM_FINALIZED)
goto out;
if (ptnmd->ptn_dev == NULL) {
D("ptnetmap memdev not attached");
error = ENOMEM;
- goto err;
+ goto out;
}
/* Map memory through ptnetmap-memdev BAR. */
error = nm_os_pt_memdev_iomap(ptnmd->ptn_dev, &ptnmd->nm_paddr,
&ptnmd->nm_addr, &mem_size);
if (error)
- goto err;
+ goto out;
/* Initialize the lut using the information contained in the
* ptnetmap memory device. */
@@ -2605,11 +2548,16 @@ netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd)
ptnmd->buf_lut.objsize = bufsize;
nmd->nm_totalsize = (unsigned int)mem_size;
+ /* Initialize these fields as are needed by
+ * netmap_mem_bufsize().
+ * XXX please improve this, why do we need this
+ * replication? maybe we nmd->pools[] should no be
+ * there for the guest allocator? */
+ nmd->pools[NETMAP_BUF_POOL]._objsize = bufsize;
+ nmd->pools[NETMAP_BUF_POOL]._objtotal = nbuffers;
+
nmd->flags |= NETMAP_MEM_FINALIZED;
out:
- return 0;
-err:
- nmd->active--;
return error;
}
@@ -2618,8 +2566,7 @@ netmap_mem_pt_guest_deref(struct netmap_mem_d *nmd)
{
struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
- nmd->active--;
- if (nmd->active <= 0 &&
+ if (nmd->active == 1 &&
(nmd->flags & NETMAP_MEM_FINALIZED)) {
nmd->flags &= ~NETMAP_MEM_FINALIZED;
/* unmap ptnetmap-memdev memory */
@@ -2661,8 +2608,6 @@ netmap_mem_pt_guest_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv
struct mem_pt_if *ptif;
struct netmap_if *nifp = NULL;
- NMA_LOCK(na->nm_mem);
-
ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp);
if (ptif == NULL) {
D("Error: interface %p is not in passthrough", na->ifp);
@@ -2671,7 +2616,6 @@ netmap_mem_pt_guest_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv
nifp = (struct netmap_if *)((char *)(ptnmd->nm_addr) +
ptif->nifp_offset);
- NMA_UNLOCK(na->nm_mem);
out:
return nifp;
}
@@ -2681,12 +2625,10 @@ netmap_mem_pt_guest_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
{
struct mem_pt_if *ptif;
- NMA_LOCK(na->nm_mem);
ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp);
if (ptif == NULL) {
D("Error: interface %p is not in passthrough", na->ifp);
}
- NMA_UNLOCK(na->nm_mem);
}
static int
@@ -2697,8 +2639,6 @@ netmap_mem_pt_guest_rings_create(struct netmap_adapter *na)
struct netmap_if *nifp;
int i, error = -1;
- NMA_LOCK(na->nm_mem);
-
ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp);
if (ptif == NULL) {
D("Error: interface %p is not in passthrough", na->ifp);
@@ -2709,14 +2649,14 @@ netmap_mem_pt_guest_rings_create(struct netmap_adapter *na)
/* point each kring to the corresponding backend ring */
nifp = (struct netmap_if *)((char *)ptnmd->nm_addr + ptif->nifp_offset);
for (i = 0; i <= na->num_tx_rings; i++) {
- struct netmap_kring *kring = na->tx_rings + i;
+ struct netmap_kring *kring = na->tx_rings[i];
if (kring->ring)
continue;
kring->ring = (struct netmap_ring *)
((char *)nifp + nifp->ring_ofs[i]);
}
for (i = 0; i <= na->num_rx_rings; i++) {
- struct netmap_kring *kring = na->rx_rings + i;
+ struct netmap_kring *kring = na->rx_rings[i];
if (kring->ring)
continue;
kring->ring = (struct netmap_ring *)
@@ -2726,8 +2666,6 @@ netmap_mem_pt_guest_rings_create(struct netmap_adapter *na)
error = 0;
out:
- NMA_UNLOCK(na->nm_mem);
-
return error;
}
diff --git a/sys/dev/netmap/netmap_mem2.h b/sys/dev/netmap/netmap_mem2.h
index f0bee7a33fd53..977bf622862a0 100644
--- a/sys/dev/netmap/netmap_mem2.h
+++ b/sys/dev/netmap/netmap_mem2.h
@@ -137,12 +137,12 @@ void netmap_mem_if_delete(struct netmap_adapter *, struct netmap_if *);
int netmap_mem_rings_create(struct netmap_adapter *);
void netmap_mem_rings_delete(struct netmap_adapter *);
int netmap_mem_deref(struct netmap_mem_d *, struct netmap_adapter *);
-int netmap_mem2_get_pool_info(struct netmap_mem_d *, u_int, u_int *, u_int *);
-int netmap_mem_get_info(struct netmap_mem_d *, uint64_t *size, u_int *memflags, uint16_t *id);
+int netmap_mem2_get_pool_info(struct netmap_mem_d *, u_int, u_int *, u_int *);
+int netmap_mem_get_info(struct netmap_mem_d *, uint64_t *size,
+ u_int *memflags, nm_memid_t *id);
ssize_t netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr);
struct netmap_mem_d* netmap_mem_private_new( u_int txr, u_int txd, u_int rxr, u_int rxd,
u_int extra_bufs, u_int npipes, int* error);
-void netmap_mem_delete(struct netmap_mem_d *);
#define netmap_mem_get(d) __netmap_mem_get(d, __FUNCTION__, __LINE__)
#define netmap_mem_put(d) __netmap_mem_put(d, __FUNCTION__, __LINE__)
@@ -152,7 +152,7 @@ struct netmap_mem_d* netmap_mem_find(nm_memid_t);
unsigned netmap_mem_bufsize(struct netmap_mem_d *nmd);
#ifdef WITH_EXTMEM
-struct netmap_mem_d* netmap_mem_ext_create(struct nmreq *, int *);
+struct netmap_mem_d* netmap_mem_ext_create(uint64_t, struct nmreq_pools_info *, int *);
#else /* !WITH_EXTMEM */
#define netmap_mem_ext_create(nmr, _perr) \
({ int *perr = _perr; if (perr) *(perr) = EOPNOTSUPP; NULL; })
@@ -167,7 +167,8 @@ struct netmap_mem_d* netmap_mem_pt_guest_attach(struct ptnetmap_memdev *, uint16
int netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *, struct ifnet *);
#endif /* WITH_PTNETMAP_GUEST */
-int netmap_mem_pools_info_get(struct nmreq *, struct netmap_mem_d *);
+int netmap_mem_pools_info_get(struct nmreq_pools_info *,
+ struct netmap_mem_d *);
#define NETMAP_MEM_PRIVATE 0x2 /* allocator uses private address space */
#define NETMAP_MEM_IO 0x4 /* the underlying memory is mmapped I/O */
@@ -175,4 +176,14 @@ int netmap_mem_pools_info_get(struct nmreq *, struct netmap_mem_d *);
uint32_t netmap_extra_alloc(struct netmap_adapter *, uint32_t *, uint32_t n);
+#ifdef WITH_EXTMEM
+#include <net/netmap_virt.h>
+struct nm_os_extmem; /* opaque */
+struct nm_os_extmem *nm_os_extmem_create(unsigned long, struct nmreq_pools_info *, int *perror);
+char *nm_os_extmem_nextpage(struct nm_os_extmem *);
+int nm_os_extmem_nr_pages(struct nm_os_extmem *);
+int nm_os_extmem_isequal(struct nm_os_extmem *, struct nm_os_extmem *);
+void nm_os_extmem_delete(struct nm_os_extmem *);
+#endif /* WITH_EXTMEM */
+
#endif
diff --git a/sys/dev/netmap/netmap_monitor.c b/sys/dev/netmap/netmap_monitor.c
index e7cc05f5ab0f0..f6b7c93adc768 100644
--- a/sys/dev/netmap/netmap_monitor.c
+++ b/sys/dev/netmap/netmap_monitor.c
@@ -167,8 +167,8 @@ netmap_monitor_krings_create(struct netmap_adapter *na)
if (error)
return error;
/* override the host rings callbacks */
- na->tx_rings[na->num_tx_rings].nm_sync = netmap_monitor_txsync;
- na->rx_rings[na->num_rx_rings].nm_sync = netmap_monitor_rxsync;
+ na->tx_rings[na->num_tx_rings]->nm_sync = netmap_monitor_txsync;
+ na->rx_rings[na->num_rx_rings]->nm_sync = netmap_monitor_rxsync;
return 0;
}
@@ -390,7 +390,7 @@ netmap_monitor_stop(struct netmap_adapter *na)
u_int i;
for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
- struct netmap_kring *kring = &NMR(na, t)[i];
+ struct netmap_kring *kring = NMR(na, t)[i];
struct netmap_kring *zkring;
u_int j;
@@ -456,7 +456,7 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
}
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
- mkring = &NMR(na, t)[i];
+ mkring = NMR(na, t)[i];
if (!nm_kring_pending_on(mkring))
continue;
mkring->nr_mode = NKR_NETMAP_ON;
@@ -466,7 +466,7 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
if (i > nma_get_nrings(pna, s))
continue;
if (mna->flags & nm_txrx2flag(s)) {
- kring = &NMR(pna, s)[i];
+ kring = NMR(pna, s)[i];
netmap_monitor_add(mkring, kring, zmon);
}
}
@@ -478,7 +478,7 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
na->na_flags &= ~NAF_NETMAP_ON;
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
- mkring = &NMR(na, t)[i];
+ mkring = NMR(na, t)[i];
if (!nm_kring_pending_off(mkring))
continue;
mkring->nr_mode = NKR_NETMAP_OFF;
@@ -494,7 +494,7 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
if (i > nma_get_nrings(pna, s))
continue;
if (mna->flags & nm_txrx2flag(s)) {
- kring = &NMR(pna, s)[i];
+ kring = NMR(pna, s)[i];
netmap_monitor_del(mkring, kring);
}
}
@@ -824,38 +824,41 @@ netmap_monitor_dtor(struct netmap_adapter *na)
}
-/* check if nmr is a request for a monitor adapter that we can satisfy */
+/* check if req is a request for a monitor adapter that we can satisfy */
int
-netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na,
- struct netmap_mem_d *nmd, int create)
+netmap_get_monitor_na(struct nmreq_header *hdr, struct netmap_adapter **na,
+ struct netmap_mem_d *nmd, int create)
{
- struct nmreq pnmr;
+ struct nmreq_register *req = (struct nmreq_register *)hdr->nr_body;
+ struct nmreq_register preq;
struct netmap_adapter *pna; /* parent adapter */
struct netmap_monitor_adapter *mna;
struct ifnet *ifp = NULL;
int error;
- int zcopy = (nmr->nr_flags & NR_ZCOPY_MON);
+ int zcopy = (req->nr_flags & NR_ZCOPY_MON);
char monsuff[10] = "";
if (zcopy) {
- nmr->nr_flags |= (NR_MONITOR_TX | NR_MONITOR_RX);
+ req->nr_flags |= (NR_MONITOR_TX | NR_MONITOR_RX);
}
- if ((nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) {
+ if ((req->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) {
ND("not a monitor");
return 0;
}
/* this is a request for a monitor adapter */
- ND("flags %x", nmr->nr_flags);
+ ND("flags %lx", req->nr_flags);
- /* first, try to find the adapter that we want to monitor
- * We use the same nmr, after we have turned off the monitor flags.
+ /* First, try to find the adapter that we want to monitor.
+ * We use the same req, after we have turned off the monitor flags.
* In this way we can potentially monitor everything netmap understands,
* except other monitors.
*/
- memcpy(&pnmr, nmr, sizeof(pnmr));
- pnmr.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON);
- error = netmap_get_na(&pnmr, &pna, &ifp, nmd, create);
+ memcpy(&preq, req, sizeof(preq));
+ preq.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON);
+ hdr->nr_body = (uint64_t)&preq;
+ error = netmap_get_na(hdr, &pna, &ifp, nmd, create);
+ hdr->nr_body = (uint64_t)req;
if (error) {
D("parent lookup failed: %d", error);
return error;
@@ -881,7 +884,8 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na,
mna->priv.np_na = pna;
/* grab all the rings we need in the parent */
- error = netmap_interp_ringid(&mna->priv, nmr->nr_ringid, nmr->nr_flags);
+ error = netmap_interp_ringid(&mna->priv, req->nr_mode, req->nr_ringid,
+ req->nr_flags);
if (error) {
D("ringid error");
goto free_out;
@@ -892,8 +896,8 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na,
snprintf(mna->up.name, sizeof(mna->up.name), "%s%s/%s%s%s", pna->name,
monsuff,
zcopy ? "z" : "",
- (nmr->nr_flags & NR_MONITOR_RX) ? "r" : "",
- (nmr->nr_flags & NR_MONITOR_TX) ? "t" : "");
+ (req->nr_flags & NR_MONITOR_RX) ? "r" : "",
+ (req->nr_flags & NR_MONITOR_TX) ? "t" : "");
/* the monitor supports the host rings iff the parent does */
mna->up.na_flags |= (pna->na_flags & NAF_HOST_RINGS);
@@ -913,10 +917,10 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na,
* the parent rings, but the user may ask for a different
* number
*/
- mna->up.num_tx_desc = nmr->nr_tx_slots;
+ mna->up.num_tx_desc = req->nr_tx_slots;
nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
1, NM_MONITOR_MAXSLOTS, NULL);
- mna->up.num_rx_desc = nmr->nr_rx_slots;
+ mna->up.num_rx_desc = req->nr_rx_slots;
nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
1, NM_MONITOR_MAXSLOTS, NULL);
if (zcopy) {
@@ -950,7 +954,7 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na,
}
/* remember the traffic directions we have to monitor */
- mna->flags = (nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON));
+ mna->flags = (req->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON));
*na = &mna->up;
netmap_adapter_get(*na);
diff --git a/sys/dev/netmap/netmap_pipe.c b/sys/dev/netmap/netmap_pipe.c
index 48dde5382f77a..3b0fb869231b2 100644
--- a/sys/dev/netmap/netmap_pipe.c
+++ b/sys/dev/netmap/netmap_pipe.c
@@ -77,6 +77,7 @@
#ifdef WITH_PIPES
#define NM_PIPE_MAXSLOTS 4096
+#define NM_PIPE_MAXRINGS 256
static int netmap_default_pipes = 0; /* ignored, kept for compatibility */
SYSBEGIN(vars_pipes);
@@ -129,14 +130,19 @@ netmap_pipe_dealloc(struct netmap_adapter *na)
/* find a pipe endpoint with the given id among the parent's pipes */
static struct netmap_pipe_adapter *
-netmap_pipe_find(struct netmap_adapter *parent, u_int pipe_id)
+netmap_pipe_find(struct netmap_adapter *parent, const char *pipe_id)
{
int i;
struct netmap_pipe_adapter *na;
for (i = 0; i < parent->na_next_pipe; i++) {
+ const char *na_pipe_id;
na = parent->na_pipes[i];
- if (na->id == pipe_id) {
+ na_pipe_id = strrchr(na->up.name,
+ na->role == NM_PIPE_ROLE_MASTER ? '{' : '}');
+ KASSERT(na_pipe_id != NULL, ("Invalid pipe name"));
+ ++na_pipe_id;
+ if (!strcmp(na_pipe_id, pipe_id)) {
return na;
}
}
@@ -179,63 +185,46 @@ int
netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
{
struct netmap_kring *rxkring = txkring->pipe;
- u_int limit; /* slots to transfer */
- u_int j, k, lim_tx = txkring->nkr_num_slots - 1,
- lim_rx = rxkring->nkr_num_slots - 1;
- int m, busy;
+ u_int k, lim = txkring->nkr_num_slots - 1;
+ int m; /* slots to transfer */
struct netmap_ring *txring = txkring->ring, *rxring = rxkring->ring;
ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name);
- ND(2, "before: hwcur %d hwtail %d cur %d head %d tail %d", txkring->nr_hwcur, txkring->nr_hwtail,
+ ND(20, "TX before: hwcur %d hwtail %d cur %d head %d tail %d",
+ txkring->nr_hwcur, txkring->nr_hwtail,
txkring->rcur, txkring->rhead, txkring->rtail);
- j = rxkring->nr_hwtail; /* RX */
- k = txkring->nr_hwcur; /* TX */
m = txkring->rhead - txkring->nr_hwcur; /* new slots */
if (m < 0)
m += txkring->nkr_num_slots;
- limit = m;
- m = lim_rx; /* max avail space on destination */
- busy = j - rxkring->nr_hwcur; /* busy slots */
- if (busy < 0)
- busy += rxkring->nkr_num_slots;
- m -= busy; /* subtract busy slots */
- ND(2, "m %d limit %d", m, limit);
- if (m < limit)
- limit = m;
- if (limit == 0) {
- /* either the rxring is full, or nothing to send */
+ if (m == 0) {
+ /* nothing to send */
return 0;
}
- while (limit-- > 0) {
- struct netmap_slot *rs = &rxring->slot[j];
+ for (k = txkring->nr_hwcur; m; m--, k = nm_next(k, lim)) {
+ struct netmap_slot *rs = &rxring->slot[k];
struct netmap_slot *ts = &txring->slot[k];
- struct netmap_slot tmp;
- __builtin_prefetch(ts + 1);
+ rs->len = ts->len;
+ rs->ptr = ts->ptr;
- /* swap the slots and report the buffer change */
- tmp = *rs;
- tmp.flags |= NS_BUF_CHANGED;
- *rs = *ts;
- rs->flags |= NS_BUF_CHANGED;
- *ts = tmp;
-
- j = nm_next(j, lim_rx);
- k = nm_next(k, lim_tx);
+ if (ts->flags & NS_BUF_CHANGED) {
+ rs->buf_idx = ts->buf_idx;
+ rs->flags |= NS_BUF_CHANGED;
+ ts->flags &= ~NS_BUF_CHANGED;
+ }
}
mb(); /* make sure the slots are updated before publishing them */
- rxkring->nr_hwtail = j;
+ rxkring->nr_hwtail = k;
txkring->nr_hwcur = k;
- txkring->nr_hwtail = nm_prev(k, lim_tx);
- ND(2, "after: hwcur %d hwtail %d cur %d head %d tail %d j %d", txkring->nr_hwcur, txkring->nr_hwtail,
- txkring->rcur, txkring->rhead, txkring->rtail, j);
+ ND(20, "TX after : hwcur %d hwtail %d cur %d head %d tail %d k %d",
+ txkring->nr_hwcur, txkring->nr_hwtail,
+ txkring->rcur, txkring->rhead, txkring->rtail, k);
- mb(); /* make sure rxkring->nr_hwtail is updated before notifying */
rxkring->nm_notify(rxkring, 0);
return 0;
@@ -245,20 +234,46 @@ int
netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags)
{
struct netmap_kring *txkring = rxkring->pipe;
- uint32_t oldhwcur = rxkring->nr_hwcur;
+ u_int k, lim = rxkring->nkr_num_slots - 1;
+ int m; /* slots to release */
+ struct netmap_ring *txring = txkring->ring, *rxring = rxkring->ring;
- ND("%s %x <- %s", rxkring->name, flags, txkring->name);
- rxkring->nr_hwcur = rxkring->rhead; /* recover user-relased slots */
- ND(5, "hwcur %d hwtail %d cur %d head %d tail %d", rxkring->nr_hwcur, rxkring->nr_hwtail,
+ ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name);
+ ND(20, "RX before: hwcur %d hwtail %d cur %d head %d tail %d",
+ rxkring->nr_hwcur, rxkring->nr_hwtail,
rxkring->rcur, rxkring->rhead, rxkring->rtail);
- mb(); /* paired with the first mb() in txsync */
- if (oldhwcur != rxkring->nr_hwcur) {
- /* we have released some slots, notify the other end */
- mb(); /* make sure nr_hwcur is updated before notifying */
- txkring->nm_notify(txkring, 0);
+ m = rxkring->rhead - rxkring->nr_hwcur; /* released slots */
+ if (m < 0)
+ m += rxkring->nkr_num_slots;
+
+ if (m == 0) {
+ /* nothing to release */
+ return 0;
}
- return 0;
+
+ for (k = rxkring->nr_hwcur; m; m--, k = nm_next(k, lim)) {
+ struct netmap_slot *rs = &rxring->slot[k];
+ struct netmap_slot *ts = &txring->slot[k];
+
+ if (rs->flags & NS_BUF_CHANGED) {
+ /* copy the slot and report the buffer change */
+ *ts = *rs;
+ rs->flags &= ~NS_BUF_CHANGED;
+ }
+ }
+
+ mb(); /* make sure the slots are updated before publishing them */
+ txkring->nr_hwtail = nm_prev(k, lim);
+ rxkring->nr_hwcur = k;
+
+ ND(20, "RX after : hwcur %d hwtail %d cur %d head %d tail %d k %d",
+ rxkring->nr_hwcur, rxkring->nr_hwtail,
+ rxkring->rcur, rxkring->rhead, rxkring->rtail, k);
+
+ txkring->nm_notify(txkring, 0);
+
+ return 0;
}
/* Pipe endpoints are created and destroyed together, so that endopoints do not
@@ -335,8 +350,10 @@ netmap_pipe_krings_create(struct netmap_adapter *na)
for_rx_tx(t) {
enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
for (i = 0; i < nma_get_nrings(na, t); i++) {
- NMR(na, t)[i].pipe = NMR(ona, r) + i;
- NMR(ona, r)[i].pipe = NMR(na, t) + i;
+ NMR(na, t)[i]->pipe = NMR(ona, r)[i];
+ NMR(ona, r)[i]->pipe = NMR(na, t)[i];
+ /* mark all peer-adapter rings as fake */
+ NMR(ona, r)[i]->nr_kflags |= NKR_FAKERING;
}
}
@@ -380,7 +397,7 @@ err:
* usr1 --> e1 e2 <-- usr2
*
* and we are either e1 or e2. Add a ref from the
- * other end and hide our rings.
+ * other end.
*/
static int
netmap_pipe_reg(struct netmap_adapter *na, int onoff)
@@ -395,7 +412,7 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
if (onoff) {
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t); i++) {
- struct netmap_kring *kring = &NMR(na, t)[i];
+ struct netmap_kring *kring = NMR(na, t)[i];
if (nm_kring_pending_on(kring)) {
/* mark the peer ring as needed */
@@ -404,7 +421,10 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
}
}
- /* create all missing needed rings on the other end */
+ /* create all missing needed rings on the other end.
+ * Either our end, or the other, has been marked as
+ * fake, so the allocation will not be done twice.
+ */
error = netmap_mem_rings_create(ona);
if (error)
return error;
@@ -412,9 +432,32 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
/* In case of no error we put our rings in netmap mode */
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
- struct netmap_kring *kring = &NMR(na, t)[i];
-
+ struct netmap_kring *kring = NMR(na, t)[i];
if (nm_kring_pending_on(kring)) {
+ struct netmap_kring *sring, *dring;
+
+ /* copy the buffers from the non-fake ring */
+ if (kring->nr_kflags & NKR_FAKERING) {
+ sring = kring->pipe;
+ dring = kring;
+ } else {
+ sring = kring;
+ dring = kring->pipe;
+ }
+ memcpy(dring->ring->slot,
+ sring->ring->slot,
+ sizeof(struct netmap_slot) *
+ sring->nkr_num_slots);
+ /* mark both rings as fake and needed,
+ * so that buffers will not be
+ * deleted by the standard machinery
+ * (we will delete them by ourselves in
+ * netmap_pipe_krings_delete)
+ */
+ sring->nr_kflags |=
+ (NKR_FAKERING | NKR_NEEDRING);
+ dring->nr_kflags |=
+ (NKR_FAKERING | NKR_NEEDRING);
kring->nr_mode = NKR_NETMAP_ON;
}
}
@@ -426,21 +469,13 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
na->na_flags &= ~NAF_NETMAP_ON;
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
- struct netmap_kring *kring = &NMR(na, t)[i];
+ struct netmap_kring *kring = NMR(na, t)[i];
if (nm_kring_pending_off(kring)) {
kring->nr_mode = NKR_NETMAP_OFF;
- /* mark the peer ring as no longer needed by us
- * (it may still be kept if sombody else is using it)
- */
- if (kring->pipe) {
- kring->pipe->nr_kflags &= ~NKR_NEEDRING;
- }
}
}
}
- /* delete all the peer rings that are no longer needed */
- netmap_mem_rings_delete(ona);
}
if (na->active_fds) {
@@ -482,29 +517,73 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
* and we are either e1 or e2.
*
* In the former case we have to also delete the krings of e2;
- * in the latter case we do nothing (note that our krings
- * have already been hidden in the unregister callback).
+ * in the latter case we do nothing.
*/
static void
netmap_pipe_krings_delete(struct netmap_adapter *na)
{
struct netmap_pipe_adapter *pna =
(struct netmap_pipe_adapter *)na;
- struct netmap_adapter *ona; /* na of the other end */
+ struct netmap_adapter *sna, *ona; /* na of the other end */
+ enum txrx t;
+ int i;
if (!pna->peer_ref) {
ND("%p: case 2, kept alive by peer", na);
return;
}
+ ona = &pna->peer->up;
/* case 1) above */
ND("%p: case 1, deleting everything", na);
+ /* To avoid double-frees we zero-out all the buffers in the kernel part
+ * of each ring. The reason is this: If the user is behaving correctly,
+ * all buffers are found in exactly one slot in the userspace part of
+ * some ring. If the user is not behaving correctly, we cannot release
+ * buffers cleanly anyway. In the latter case, the allocator will
+ * return to a clean state only when all its users will close.
+ */
+ sna = na;
+cleanup:
+ for_rx_tx(t) {
+ for (i = 0; i < nma_get_nrings(sna, t) + 1; i++) {
+ struct netmap_kring *kring = NMR(sna, t)[i];
+ struct netmap_ring *ring = kring->ring;
+ uint32_t j, lim = kring->nkr_num_slots - 1;
+
+ ND("%s ring %p hwtail %u hwcur %u",
+ kring->name, ring, kring->nr_hwtail, kring->nr_hwcur);
+
+ if (ring == NULL)
+ continue;
+
+ if (kring->nr_hwtail == kring->nr_hwcur)
+ ring->slot[kring->nr_hwtail].buf_idx = 0;
+
+ for (j = nm_next(kring->nr_hwtail, lim);
+ j != kring->nr_hwcur;
+ j = nm_next(j, lim))
+ {
+ ND("%s[%d] %u", kring->name, j, ring->slot[j].buf_idx);
+ ring->slot[j].buf_idx = 0;
+ }
+ kring->nr_kflags &= ~(NKR_FAKERING | NKR_NEEDRING);
+ }
+
+ }
+ if (sna != ona && ona->tx_rings) {
+ sna = ona;
+ goto cleanup;
+ }
+
+ netmap_mem_rings_delete(na);
netmap_krings_delete(na); /* also zeroes tx_rings etc. */
- ona = &pna->peer->up;
+
if (ona->tx_rings == NULL) {
/* already deleted, we must be on an
* cleanup-after-error path */
return;
}
+ netmap_mem_rings_delete(ona);
netmap_krings_delete(ona);
}
@@ -520,7 +599,7 @@ netmap_pipe_dtor(struct netmap_adapter *na)
pna->peer_ref = 0;
netmap_adapter_put(&pna->peer->up);
}
- if (pna->role == NR_REG_PIPE_MASTER)
+ if (pna->role == NM_PIPE_ROLE_MASTER)
netmap_pipe_remove(pna->parent, pna);
if (pna->parent_ifp)
if_rele(pna->parent_ifp);
@@ -529,34 +608,55 @@ netmap_pipe_dtor(struct netmap_adapter *na)
}
int
-netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na,
+netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na,
struct netmap_mem_d *nmd, int create)
{
- struct nmreq pnmr;
+ struct nmreq_register *req = (struct nmreq_register *)hdr->nr_body;
struct netmap_adapter *pna; /* parent adapter */
- struct netmap_pipe_adapter *mna, *sna, *req;
+ struct netmap_pipe_adapter *mna, *sna, *reqna;
struct ifnet *ifp = NULL;
- u_int pipe_id;
- int role = nmr->nr_flags & NR_REG_MASK;
+ const char *pipe_id = NULL;
+ int role = 0;
int error, retries = 0;
+ char *cbra;
- ND("flags %x", nmr->nr_flags);
+ /* Try to parse the pipe syntax 'xx{yy' or 'xx}yy'. */
+ cbra = strrchr(hdr->nr_name, '{');
+ if (cbra != NULL) {
+ role = NM_PIPE_ROLE_MASTER;
+ } else {
+ cbra = strrchr(hdr->nr_name, '}');
+ if (cbra != NULL) {
+ role = NM_PIPE_ROLE_SLAVE;
+ } else {
+ ND("not a pipe");
+ return 0;
+ }
+ }
+ pipe_id = cbra + 1;
+ if (*pipe_id == '\0' || cbra == hdr->nr_name) {
+ /* Bracket is the last character, so pipe name is missing;
+ * or bracket is the first character, so base port name
+ * is missing. */
+ return EINVAL;
+ }
- if (role != NR_REG_PIPE_MASTER && role != NR_REG_PIPE_SLAVE) {
- ND("not a pipe");
- return 0;
+ if (req->nr_mode != NR_REG_ALL_NIC && req->nr_mode != NR_REG_ONE_NIC) {
+ /* We only accept modes involving hardware rings. */
+ return EINVAL;
}
- role = nmr->nr_flags & NR_REG_MASK;
/* first, try to find the parent adapter */
- bzero(&pnmr, sizeof(pnmr));
- memcpy(&pnmr.nr_name, nmr->nr_name, IFNAMSIZ);
- /* pass to parent the requested number of pipes */
- pnmr.nr_arg1 = nmr->nr_arg1;
for (;;) {
+ char nr_name_orig[NETMAP_REQ_IFNAMSIZ];
int create_error;
- error = netmap_get_na(&pnmr, &pna, &ifp, nmd, create);
+ /* Temporarily remove the pipe suffix. */
+ strncpy(nr_name_orig, hdr->nr_name, sizeof(nr_name_orig));
+ *cbra = '\0';
+ error = netmap_get_na(hdr, &pna, &ifp, nmd, create);
+ /* Restore the pipe suffix. */
+ strncpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
if (!error)
break;
if (error != ENXIO || retries++) {
@@ -565,9 +665,11 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na,
}
ND("try to create a persistent vale port");
/* create a persistent vale port and try again */
+ *cbra = '\0';
NMG_UNLOCK();
- create_error = netmap_vi_create(&pnmr, 1 /* autodelete */);
+ create_error = netmap_vi_create(hdr, 1 /* autodelete */);
NMG_LOCK();
+ strncpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
if (create_error && create_error != EEXIST) {
if (create_error != EOPNOTSUPP) {
D("failed to create a persistent vale port: %d", create_error);
@@ -583,16 +685,15 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na,
}
/* next, lookup the pipe id in the parent list */
- req = NULL;
- pipe_id = nmr->nr_ringid & NETMAP_RING_MASK;
+ reqna = NULL;
mna = netmap_pipe_find(pna, pipe_id);
if (mna) {
if (mna->role == role) {
- ND("found %d directly at %d", pipe_id, mna->parent_slot);
- req = mna;
+ ND("found %s directly at %d", pipe_id, mna->parent_slot);
+ reqna = mna;
} else {
- ND("found %d indirectly at %d", pipe_id, mna->parent_slot);
- req = mna->peer;
+ ND("found %s indirectly at %d", pipe_id, mna->parent_slot);
+ reqna = mna->peer;
}
/* the pipe we have found already holds a ref to the parent,
* so we need to drop the one we got from netmap_get_na()
@@ -600,7 +701,7 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na,
netmap_unget_na(pna, ifp);
goto found;
}
- ND("pipe %d not found, create %d", pipe_id, create);
+ ND("pipe %s not found, create %d", pipe_id, create);
if (!create) {
error = ENODEV;
goto put_out;
@@ -614,10 +715,9 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na,
error = ENOMEM;
goto put_out;
}
- snprintf(mna->up.name, sizeof(mna->up.name), "%s{%d", pna->name, pipe_id);
+ snprintf(mna->up.name, sizeof(mna->up.name), "%s{%s", pna->name, pipe_id);
- mna->id = pipe_id;
- mna->role = NR_REG_PIPE_MASTER;
+ mna->role = NM_PIPE_ROLE_MASTER;
mna->parent = pna;
mna->parent_ifp = ifp;
@@ -631,12 +731,16 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na,
mna->up.na_flags |= NAF_MEM_OWNER;
mna->up.na_lut = pna->na_lut;
- mna->up.num_tx_rings = 1;
- mna->up.num_rx_rings = 1;
- mna->up.num_tx_desc = nmr->nr_tx_slots;
+ mna->up.num_tx_rings = req->nr_tx_rings;
+ nm_bound_var(&mna->up.num_tx_rings, 1,
+ 1, NM_PIPE_MAXRINGS, NULL);
+ mna->up.num_rx_rings = req->nr_rx_rings;
+ nm_bound_var(&mna->up.num_rx_rings, 1,
+ 1, NM_PIPE_MAXRINGS, NULL);
+ mna->up.num_tx_desc = req->nr_tx_slots;
nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
1, NM_PIPE_MAXSLOTS, NULL);
- mna->up.num_rx_desc = nmr->nr_rx_slots;
+ mna->up.num_rx_desc = req->nr_rx_slots;
nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
1, NM_PIPE_MAXSLOTS, NULL);
error = netmap_attach_common(&mna->up);
@@ -656,8 +760,11 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na,
/* most fields are the same, copy from master and then fix */
*sna = *mna;
sna->up.nm_mem = netmap_mem_get(mna->up.nm_mem);
- snprintf(sna->up.name, sizeof(sna->up.name), "%s}%d", pna->name, pipe_id);
- sna->role = NR_REG_PIPE_SLAVE;
+ /* swap the number of tx/rx rings */
+ sna->up.num_tx_rings = mna->up.num_rx_rings;
+ sna->up.num_rx_rings = mna->up.num_tx_rings;
+ snprintf(sna->up.name, sizeof(sna->up.name), "%s}%s", pna->name, pipe_id);
+ sna->role = NM_PIPE_ROLE_SLAVE;
error = netmap_attach_common(&sna->up);
if (error)
goto free_sna;
@@ -674,21 +781,21 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na,
if (ifp)
if_ref(ifp);
- if (role == NR_REG_PIPE_MASTER) {
- req = mna;
+ if (role == NM_PIPE_ROLE_MASTER) {
+ reqna = mna;
mna->peer_ref = 1;
netmap_adapter_get(&sna->up);
} else {
- req = sna;
+ reqna = sna;
sna->peer_ref = 1;
netmap_adapter_get(&mna->up);
}
ND("created master %p and slave %p", mna, sna);
found:
- ND("pipe %d %s at %p", pipe_id,
- (req->role == NR_REG_PIPE_MASTER ? "master" : "slave"), req);
- *na = &req->up;
+ ND("pipe %s %s at %p", pipe_id,
+ (reqna->role == NM_PIPE_ROLE_MASTER ? "master" : "slave"), reqna);
+ *na = &reqna->up;
netmap_adapter_get(*na);
/* keep the reference to the parent.
diff --git a/sys/dev/netmap/netmap_pt.c b/sys/dev/netmap/netmap_pt.c
index edb49dc504acd..cfa32b0bcf5f4 100644
--- a/sys/dev/netmap/netmap_pt.c
+++ b/sys/dev/netmap/netmap_pt.c
@@ -639,9 +639,9 @@ static struct netmap_kring *
ptnetmap_kring(struct netmap_pt_host_adapter *pth_na, int k)
{
if (k < pth_na->up.num_tx_rings) {
- return pth_na->up.tx_rings + k;
+ return pth_na->up.tx_rings[k];
}
- return pth_na->up.rx_rings + k - pth_na->up.num_tx_rings;
+ return pth_na->up.rx_rings[k - pth_na->up.num_tx_rings];
}
static int
@@ -676,8 +676,19 @@ ptnetmap_create_kctxs(struct netmap_pt_host_adapter *pth_na,
struct nm_kctx_cfg nmk_cfg;
unsigned int num_rings;
uint8_t *cfg_entries = (uint8_t *)(cfg + 1);
+ unsigned int expected_cfgtype = 0;
int k;
+#if defined(__FreeBSD__)
+ expected_cfgtype = PTNETMAP_CFGTYPE_BHYVE;
+#elif defined(linux)
+ expected_cfgtype = PTNETMAP_CFGTYPE_QEMU;
+#endif
+ if (cfg->cfgtype != expected_cfgtype) {
+ D("Unsupported cfgtype %u", cfg->cfgtype);
+ return EINVAL;
+ }
+
num_rings = pth_na->up.num_tx_rings +
pth_na->up.num_rx_rings;
@@ -695,7 +706,7 @@ ptnetmap_create_kctxs(struct netmap_pt_host_adapter *pth_na,
}
ptns->kctxs[k] = nm_os_kctx_create(&nmk_cfg,
- cfg->cfgtype, cfg_entries + k * cfg->entry_size);
+ cfg_entries + k * cfg->entry_size);
if (ptns->kctxs[k] == NULL) {
goto err;
}
@@ -761,34 +772,6 @@ ptnetmap_stop_kctx_workers(struct netmap_pt_host_adapter *pth_na)
}
}
-static struct ptnetmap_cfg *
-ptnetmap_read_cfg(struct nmreq *nmr)
-{
- uintptr_t *nmr_ptncfg = (uintptr_t *)&nmr->nr_arg1;
- struct ptnetmap_cfg *cfg;
- struct ptnetmap_cfg tmp;
- size_t cfglen;
-
- if (copyin((const void *)*nmr_ptncfg, &tmp, sizeof(tmp))) {
- D("Partial copyin() failed");
- return NULL;
- }
-
- cfglen = sizeof(tmp) + tmp.num_rings * tmp.entry_size;
- cfg = nm_os_malloc(cfglen);
- if (!cfg) {
- return NULL;
- }
-
- if (copyin((const void *)*nmr_ptncfg, cfg, cfglen)) {
- D("Full copyin() failed");
- nm_os_free(cfg);
- return NULL;
- }
-
- return cfg;
-}
-
static int nm_unused_notify(struct netmap_kring *, int);
static int nm_pt_host_notify(struct netmap_kring *, int);
@@ -864,14 +847,14 @@ ptnetmap_create(struct netmap_pt_host_adapter *pth_na,
}
for (i = 0; i < pth_na->parent->num_rx_rings; i++) {
- pth_na->up.rx_rings[i].save_notify =
- pth_na->up.rx_rings[i].nm_notify;
- pth_na->up.rx_rings[i].nm_notify = nm_pt_host_notify;
+ pth_na->up.rx_rings[i]->save_notify =
+ pth_na->up.rx_rings[i]->nm_notify;
+ pth_na->up.rx_rings[i]->nm_notify = nm_pt_host_notify;
}
for (i = 0; i < pth_na->parent->num_tx_rings; i++) {
- pth_na->up.tx_rings[i].save_notify =
- pth_na->up.tx_rings[i].nm_notify;
- pth_na->up.tx_rings[i].nm_notify = nm_pt_host_notify;
+ pth_na->up.tx_rings[i]->save_notify =
+ pth_na->up.tx_rings[i]->nm_notify;
+ pth_na->up.tx_rings[i]->nm_notify = nm_pt_host_notify;
}
#ifdef RATE
@@ -912,14 +895,14 @@ ptnetmap_delete(struct netmap_pt_host_adapter *pth_na)
pth_na->parent->na_flags = pth_na->parent_na_flags;
for (i = 0; i < pth_na->parent->num_rx_rings; i++) {
- pth_na->up.rx_rings[i].nm_notify =
- pth_na->up.rx_rings[i].save_notify;
- pth_na->up.rx_rings[i].save_notify = NULL;
+ pth_na->up.rx_rings[i]->nm_notify =
+ pth_na->up.rx_rings[i]->save_notify;
+ pth_na->up.rx_rings[i]->save_notify = NULL;
}
for (i = 0; i < pth_na->parent->num_tx_rings; i++) {
- pth_na->up.tx_rings[i].nm_notify =
- pth_na->up.tx_rings[i].save_notify;
- pth_na->up.tx_rings[i].save_notify = NULL;
+ pth_na->up.tx_rings[i]->nm_notify =
+ pth_na->up.tx_rings[i]->save_notify;
+ pth_na->up.tx_rings[i]->save_notify = NULL;
}
/* Destroy kernel contexts. */
@@ -941,66 +924,55 @@ ptnetmap_delete(struct netmap_pt_host_adapter *pth_na)
/*
* Called by netmap_ioctl().
- * Operation is indicated in nmr->nr_cmd.
+ * Operation is indicated in nr_name.
*
* Called without NMG_LOCK.
*/
int
-ptnetmap_ctl(struct nmreq *nmr, struct netmap_adapter *na)
+ptnetmap_ctl(const char *nr_name, int create, struct netmap_adapter *na)
{
- struct netmap_pt_host_adapter *pth_na;
- struct ptnetmap_cfg *cfg;
- char *name;
- int cmd, error = 0;
-
- name = nmr->nr_name;
- cmd = nmr->nr_cmd;
-
- DBG(D("name: %s", name));
+ struct netmap_pt_host_adapter *pth_na;
+ struct ptnetmap_cfg *cfg = NULL;
+ int error = 0;
- if (!nm_ptnetmap_host_on(na)) {
- D("ERROR Netmap adapter %p is not a ptnetmap host adapter", na);
- error = ENXIO;
- goto done;
- }
- pth_na = (struct netmap_pt_host_adapter *)na;
+ DBG(D("name: %s", nr_name));
- NMG_LOCK();
- switch (cmd) {
- case NETMAP_PT_HOST_CREATE:
- /* Read hypervisor configuration from userspace. */
- cfg = ptnetmap_read_cfg(nmr);
- if (!cfg)
- break;
- /* Create ptnetmap state (kctxs, ...) and switch parent
- * adapter to ptnetmap mode. */
- error = ptnetmap_create(pth_na, cfg);
- nm_os_free(cfg);
- if (error)
- break;
- /* Start kthreads. */
- error = ptnetmap_start_kctx_workers(pth_na);
- if (error)
- ptnetmap_delete(pth_na);
- break;
-
- case NETMAP_PT_HOST_DELETE:
- /* Stop kthreads. */
- ptnetmap_stop_kctx_workers(pth_na);
- /* Switch parent adapter back to normal mode and destroy
- * ptnetmap state (kthreads, ...). */
- ptnetmap_delete(pth_na);
- break;
+ if (!nm_ptnetmap_host_on(na)) {
+ D("ERROR Netmap adapter %p is not a ptnetmap host adapter",
+ na);
+ return ENXIO;
+ }
+ pth_na = (struct netmap_pt_host_adapter *)na;
- default:
- D("ERROR invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
- error = EINVAL;
- break;
- }
- NMG_UNLOCK();
+ NMG_LOCK();
+ if (create) {
+ /* Read hypervisor configuration from userspace. */
+ /* TODO */
+ if (!cfg) {
+ goto out;
+ }
+ /* Create ptnetmap state (kctxs, ...) and switch parent
+ * adapter to ptnetmap mode. */
+ error = ptnetmap_create(pth_na, cfg);
+ nm_os_free(cfg);
+ if (error) {
+ goto out;
+ }
+ /* Start kthreads. */
+ error = ptnetmap_start_kctx_workers(pth_na);
+ if (error)
+ ptnetmap_delete(pth_na);
+ } else {
+ /* Stop kthreads. */
+ ptnetmap_stop_kctx_workers(pth_na);
+ /* Switch parent adapter back to normal mode and destroy
+ * ptnetmap state (kthreads, ...). */
+ ptnetmap_delete(pth_na);
+ }
+out:
+ NMG_UNLOCK();
-done:
- return error;
+ return error;
}
/* nm_notify callbacks for ptnetmap */
@@ -1048,8 +1020,7 @@ nm_unused_notify(struct netmap_kring *kring, int flags)
/* nm_config callback for bwrap */
static int
-nm_pt_host_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
- u_int *rxr, u_int *rxd)
+nm_pt_host_config(struct netmap_adapter *na, struct nm_config_info *info)
{
struct netmap_pt_host_adapter *pth_na =
(struct netmap_pt_host_adapter *)na;
@@ -1061,12 +1032,11 @@ nm_pt_host_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
/* forward the request */
error = netmap_update_config(parent);
- *rxr = na->num_rx_rings = parent->num_rx_rings;
- *txr = na->num_tx_rings = parent->num_tx_rings;
- *txd = na->num_tx_desc = parent->num_tx_desc;
- *rxd = na->num_rx_desc = parent->num_rx_desc;
-
- DBG(D("rxr: %d txr: %d txd: %d rxd: %d", *rxr, *txr, *txd, *rxd));
+ info->num_rx_rings = na->num_rx_rings = parent->num_rx_rings;
+ info->num_tx_rings = na->num_tx_rings = parent->num_tx_rings;
+ info->num_tx_descs = na->num_tx_desc = parent->num_tx_desc;
+ info->num_rx_descs = na->num_rx_desc = parent->num_rx_desc;
+ info->rx_buf_maxsize = na->rx_buf_maxsize = parent->rx_buf_maxsize;
return error;
}
@@ -1107,7 +1077,7 @@ nm_pt_host_krings_create(struct netmap_adapter *na)
* host rings independently on what the regif asked for:
* these rings are needed by the guest ptnetmap adapter
* anyway. */
- kring = &NMR(na, t)[nma_get_nrings(na, t)];
+ kring = NMR(na, t)[nma_get_nrings(na, t)];
kring->nr_kflags |= NKR_NEEDRING;
}
@@ -1187,17 +1157,18 @@ nm_pt_host_dtor(struct netmap_adapter *na)
/* check if nmr is a request for a ptnetmap adapter that we can satisfy */
int
-netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na,
+netmap_get_pt_host_na(struct nmreq_header *hdr, struct netmap_adapter **na,
struct netmap_mem_d *nmd, int create)
{
- struct nmreq parent_nmr;
+ struct nmreq_register *req = (struct nmreq_register *)hdr->nr_body;
+ struct nmreq_register preq;
struct netmap_adapter *parent; /* target adapter */
struct netmap_pt_host_adapter *pth_na;
struct ifnet *ifp = NULL;
int error;
/* Check if it is a request for a ptnetmap adapter */
- if ((nmr->nr_flags & (NR_PTNETMAP_HOST)) == 0) {
+ if ((req->nr_flags & (NR_PTNETMAP_HOST)) == 0) {
return 0;
}
@@ -1210,12 +1181,14 @@ netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na,
}
/* first, try to find the adapter that we want to passthrough
- * We use the same nmr, after we have turned off the ptnetmap flag.
+ * We use the same req, after we have turned off the ptnetmap flag.
* In this way we can potentially passthrough everything netmap understands.
*/
- memcpy(&parent_nmr, nmr, sizeof(parent_nmr));
- parent_nmr.nr_flags &= ~(NR_PTNETMAP_HOST);
- error = netmap_get_na(&parent_nmr, &parent, &ifp, nmd, create);
+ memcpy(&preq, req, sizeof(preq));
+ preq.nr_flags &= ~(NR_PTNETMAP_HOST);
+ hdr->nr_body = (uint64_t)&preq;
+ error = netmap_get_na(hdr, &parent, &ifp, nmd, create);
+ hdr->nr_body = (uint64_t)req;
if (error) {
D("parent lookup failed: %d", error);
goto put_out_noputparent;
diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c
index d364699bce269..6e0748acd5300 100644
--- a/sys/dev/netmap/netmap_vale.c
+++ b/sys/dev/netmap/netmap_vale.c
@@ -166,7 +166,7 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0,
"Max batch size to be used in the bridge");
SYSEND;
-static int netmap_vp_create(struct nmreq *, struct ifnet *,
+static int netmap_vp_create(struct nmreq_header *hdr, struct ifnet *,
struct netmap_mem_d *nmd, struct netmap_vp_adapter **);
static int netmap_vp_reg(struct netmap_adapter *na, int onoff);
static int netmap_bwrap_reg(struct netmap_adapter *, int onoff);
@@ -188,6 +188,9 @@ struct nm_hash_ent {
uint64_t ports;
};
+/* Holds the default callbacks */
+static struct netmap_bdg_ops default_bdg_ops = {netmap_bdg_learning, NULL, NULL};
+
/*
* nm_bridge is a descriptor for a VALE switch.
* Interfaces for a bridge are all in bdg_ports[].
@@ -201,37 +204,50 @@ struct nm_hash_ent {
* bdg_lock protects accesses to the bdg_ports array.
* This is a rw lock (or equivalent).
*/
+#define NM_BDG_IFNAMSIZ IFNAMSIZ
struct nm_bridge {
/* XXX what is the proper alignment/layout ? */
BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */
int bdg_namelen;
- uint32_t bdg_active_ports; /* 0 means free */
- char bdg_basename[IFNAMSIZ];
+ uint32_t bdg_active_ports;
+ char bdg_basename[NM_BDG_IFNAMSIZ];
/* Indexes of active ports (up to active_ports)
* and all other remaining ports.
*/
- uint8_t bdg_port_index[NM_BDG_MAXPORTS];
+ uint32_t bdg_port_index[NM_BDG_MAXPORTS];
+ /* used by netmap_bdg_detach_common() */
+ uint32_t tmp_bdg_port_index[NM_BDG_MAXPORTS];
struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
-
/*
- * The function to decide the destination port.
+ * Programmable lookup functions to figure out the destination port.
* It returns either of an index of the destination port,
* NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
* forward this packet. ring_nr is the source ring index, and the
* function may overwrite this value to forward this packet to a
* different ring index.
- * This function must be set by netmap_bdg_ctl().
+ * The function is set by netmap_bdg_regops().
+ */
+ struct netmap_bdg_ops *bdg_ops;
+
+ /*
+ * Contains the data structure used by the bdg_ops.lookup function.
+ * By default points to *ht which is allocated on attach and used by the default lookup
+ * otherwise will point to the data structure received by netmap_bdg_regops().
*/
- struct netmap_bdg_ops bdg_ops;
+ void *private_data;
+ struct nm_hash_ent *ht;
- /* the forwarding table, MAC+ports.
- * XXX should be changed to an argument to be passed to
- * the lookup function
+ /* Currently used to specify if the bridge is still in use while empty and
+ * if it has been put in exclusive mode by an external module, see netmap_bdg_regops()
+ * and netmap_bdg_create().
*/
- struct nm_hash_ent *ht; // allocated on attach
+#define NM_BDG_ACTIVE 1
+#define NM_BDG_EXCLUSIVE 2
+ uint8_t bdg_flags;
+
#ifdef CONFIG_NET_NS
struct net *ns;
@@ -309,18 +325,17 @@ nm_vale_name_validate(const char *name)
return -1;
}
- for (i = 0; name[i]; i++) {
+ for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) {
if (name[i] == ':') {
- if (colon_pos != -1) {
- return -1;
- }
colon_pos = i;
+ break;
} else if (!nm_is_id_char(name[i])) {
return -1;
}
}
- if (i >= IFNAMSIZ) {
+ if (strlen(name) - colon_pos > IFNAMSIZ) {
+ /* interface name too long */
return -1;
}
@@ -355,7 +370,7 @@ nm_find_bridge(const char *name, int create)
for (i = 0; i < num_bridges; i++) {
struct nm_bridge *x = bridges + i;
- if (x->bdg_active_ports == 0) {
+ if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) {
if (create && b == NULL)
b = x; /* record empty slot */
} else if (x->bdg_namelen != namelen) {
@@ -381,7 +396,9 @@ nm_find_bridge(const char *name, int create)
for (i = 0; i < NM_BDG_MAXPORTS; i++)
b->bdg_port_index[i] = i;
/* set the default function */
- b->bdg_ops.lookup = netmap_bdg_learning;
+ b->bdg_ops = &default_bdg_ops;
+ b->private_data = b->ht;
+ b->bdg_flags = 0;
NM_BNS_GET(b);
}
return b;
@@ -395,15 +412,15 @@ static void
nm_free_bdgfwd(struct netmap_adapter *na)
{
int nrings, i;
- struct netmap_kring *kring;
+ struct netmap_kring **kring;
NMG_LOCK_ASSERT();
nrings = na->num_tx_rings;
kring = na->tx_rings;
for (i = 0; i < nrings; i++) {
- if (kring[i].nkr_ft) {
- nm_os_free(kring[i].nkr_ft);
- kring[i].nkr_ft = NULL; /* protect from freeing twice */
+ if (kring[i]->nkr_ft) {
+ nm_os_free(kring[i]->nkr_ft);
+ kring[i]->nkr_ft = NULL; /* protect from freeing twice */
}
}
}
@@ -416,7 +433,7 @@ static int
nm_alloc_bdgfwd(struct netmap_adapter *na)
{
int nrings, l, i, num_dstq;
- struct netmap_kring *kring;
+ struct netmap_kring **kring;
NMG_LOCK_ASSERT();
/* all port:rings + broadcast */
@@ -442,8 +459,23 @@ nm_alloc_bdgfwd(struct netmap_adapter *na)
dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
dstq[j].bq_len = 0;
}
- kring[i].nkr_ft = ft;
+ kring[i]->nkr_ft = ft;
+ }
+ return 0;
+}
+
+static int
+netmap_bdg_free(struct nm_bridge *b)
+{
+ if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) {
+ return EBUSY;
}
+
+ ND("marking bridge %s as free", b->bdg_basename);
+ nm_os_free(b->ht);
+ b->bdg_ops = NULL;
+ b->bdg_flags = 0;
+ NM_BNS_PUT(b);
return 0;
}
@@ -456,7 +488,7 @@ netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
{
int s_hw = hw, s_sw = sw;
int i, lim =b->bdg_active_ports;
- uint8_t tmp[NM_BDG_MAXPORTS];
+ uint32_t *tmp = b->tmp_bdg_port_index;
/*
New algorithm:
@@ -473,7 +505,7 @@ netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
/* make a copy of the list of active ports, update it,
* and then copy back within BDG_WLOCK().
*/
- memcpy(tmp, b->bdg_port_index, sizeof(tmp));
+ memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index));
for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
if (hw >= 0 && tmp[i] == hw) {
ND("detach hw %d at %d", hw, i);
@@ -496,35 +528,117 @@ netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
}
BDG_WLOCK(b);
- if (b->bdg_ops.dtor)
- b->bdg_ops.dtor(b->bdg_ports[s_hw]);
+ if (b->bdg_ops->dtor)
+ b->bdg_ops->dtor(b->bdg_ports[s_hw]);
b->bdg_ports[s_hw] = NULL;
if (s_sw >= 0) {
b->bdg_ports[s_sw] = NULL;
}
- memcpy(b->bdg_port_index, tmp, sizeof(tmp));
+ memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index));
b->bdg_active_ports = lim;
BDG_WUNLOCK(b);
ND("now %d active ports", lim);
- if (lim == 0) {
- ND("marking bridge %s as free", b->bdg_basename);
- nm_os_free(b->ht);
- bzero(&b->bdg_ops, sizeof(b->bdg_ops));
- NM_BNS_PUT(b);
+ netmap_bdg_free(b);
+}
+
+static inline void *
+nm_bdg_get_auth_token(struct nm_bridge *b)
+{
+ return b->ht;
+}
+
+/* bridge not in exclusive mode ==> always valid
+ * bridge in exclusive mode (created through netmap_bdg_create()) ==> check authentication token
+ */
+static inline int
+nm_bdg_valid_auth_token(struct nm_bridge *b, void *auth_token)
+{
+ return !(b->bdg_flags & NM_BDG_EXCLUSIVE) || b->ht == auth_token;
+}
+
+/* Allows external modules to create bridges in exclusive mode,
+ * returns an authentication token that the external module will need
+ * to provide during nm_bdg_ctl_{attach, detach}(), netmap_bdg_regops(),
+ * and nm_bdg_update_private_data() operations.
+ * Successfully executed if ret != NULL and *return_status == 0.
+ */
+void *
+netmap_bdg_create(const char *bdg_name, int *return_status)
+{
+ struct nm_bridge *b = NULL;
+ void *ret = NULL;
+
+ NMG_LOCK();
+ b = nm_find_bridge(bdg_name, 0 /* don't create */);
+ if (b) {
+ *return_status = EEXIST;
+ goto unlock_bdg_create;
+ }
+
+ b = nm_find_bridge(bdg_name, 1 /* create */);
+ if (!b) {
+ *return_status = ENOMEM;
+ goto unlock_bdg_create;
+ }
+
+ b->bdg_flags |= NM_BDG_ACTIVE | NM_BDG_EXCLUSIVE;
+ ret = nm_bdg_get_auth_token(b);
+ *return_status = 0;
+
+unlock_bdg_create:
+ NMG_UNLOCK();
+ return ret;
+}
+
+/* Allows external modules to destroy a bridge created through
+ * netmap_bdg_create(), the bridge must be empty.
+ */
+int
+netmap_bdg_destroy(const char *bdg_name, void *auth_token)
+{
+ struct nm_bridge *b = NULL;
+ int ret = 0;
+
+ NMG_LOCK();
+ b = nm_find_bridge(bdg_name, 0 /* don't create */);
+ if (!b) {
+ ret = ENXIO;
+ goto unlock_bdg_free;
+ }
+
+ if (!nm_bdg_valid_auth_token(b, auth_token)) {
+ ret = EACCES;
+ goto unlock_bdg_free;
+ }
+ if (!(b->bdg_flags & NM_BDG_EXCLUSIVE)) {
+ ret = EINVAL;
+ goto unlock_bdg_free;
}
+
+ b->bdg_flags &= ~(NM_BDG_EXCLUSIVE | NM_BDG_ACTIVE);
+ ret = netmap_bdg_free(b);
+ if (ret) {
+ b->bdg_flags |= NM_BDG_EXCLUSIVE | NM_BDG_ACTIVE;
+ }
+
+unlock_bdg_free:
+ NMG_UNLOCK();
+ return ret;
}
+
+
/* nm_bdg_ctl callback for VALE ports */
static int
-netmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
+netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
{
struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
struct nm_bridge *b = vpna->na_bdg;
- (void)nmr; // XXX merge ?
- if (attach)
+ if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
return 0; /* nothing to do */
+ }
if (b) {
netmap_set_all_rings(na, 0 /* disable */);
netmap_bdg_detach_common(b, vpna->bdg_port, -1);
@@ -560,8 +674,38 @@ netmap_vp_dtor(struct netmap_adapter *na)
}
}
+/* creates a persistent VALE port */
+int
+nm_vi_create(struct nmreq_header *hdr)
+{
+ struct nmreq_vale_newif *req =
+ (struct nmreq_vale_newif *)hdr->nr_body;
+ int error = 0;
+ /* Build a nmreq_register out of the nmreq_vale_newif,
+ * so that we can call netmap_get_bdg_na(). */
+ struct nmreq_register regreq;
+ bzero(&regreq, sizeof(regreq));
+ regreq.nr_tx_slots = req->nr_tx_slots;
+ regreq.nr_rx_slots = req->nr_rx_slots;
+ regreq.nr_tx_rings = req->nr_tx_rings;
+ regreq.nr_rx_rings = req->nr_rx_rings;
+ regreq.nr_mem_id = req->nr_mem_id;
+ hdr->nr_reqtype = NETMAP_REQ_REGISTER;
+ hdr->nr_body = (uint64_t)&regreq;
+ error = netmap_vi_create(hdr, 0 /* no autodelete */);
+ hdr->nr_reqtype = NETMAP_REQ_VALE_NEWIF;
+ hdr->nr_body = (uint64_t)req;
+ /* Write back to the original struct. */
+ req->nr_tx_slots = regreq.nr_tx_slots;
+ req->nr_rx_slots = regreq.nr_rx_slots;
+ req->nr_tx_rings = regreq.nr_tx_rings;
+ req->nr_rx_rings = regreq.nr_rx_rings;
+ req->nr_mem_id = regreq.nr_mem_id;
+ return error;
+}
+
/* remove a persistent VALE port from the system */
-static int
+int
nm_vi_destroy(const char *name)
{
struct ifnet *ifp;
@@ -611,17 +755,14 @@ err:
}
static int
-nm_update_info(struct nmreq *nmr, struct netmap_adapter *na)
+nm_update_info(struct nmreq_register *req, struct netmap_adapter *na)
{
- uint64_t memsize;
- int ret;
- nmr->nr_rx_rings = na->num_rx_rings;
- nmr->nr_tx_rings = na->num_tx_rings;
- nmr->nr_rx_slots = na->num_rx_desc;
- nmr->nr_tx_slots = na->num_tx_desc;
- ret = netmap_mem_get_info(na->nm_mem, &memsize, NULL, &nmr->nr_arg2);
- nmr->nr_memsize = (uint32_t)memsize;
- return ret;
+ req->nr_rx_rings = na->num_rx_rings;
+ req->nr_tx_rings = na->num_tx_rings;
+ req->nr_rx_slots = na->num_rx_desc;
+ req->nr_tx_slots = na->num_tx_desc;
+ return netmap_mem_get_info(na->nm_mem, &req->nr_memsize, NULL,
+ &req->nr_mem_id);
}
/*
@@ -629,22 +770,30 @@ nm_update_info(struct nmreq *nmr, struct netmap_adapter *na)
* The interface will be attached to a bridge later.
*/
int
-netmap_vi_create(struct nmreq *nmr, int autodelete)
+netmap_vi_create(struct nmreq_header *hdr, int autodelete)
{
+ struct nmreq_register *req = (struct nmreq_register *)hdr->nr_body;
struct ifnet *ifp;
struct netmap_vp_adapter *vpna;
struct netmap_mem_d *nmd = NULL;
int error;
+ if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
+ return EINVAL;
+ }
+
/* don't include VALE prefix */
- if (!strncmp(nmr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME)))
+ if (!strncmp(hdr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME)))
+ return EINVAL;
+ if (strlen(hdr->nr_name) >= IFNAMSIZ) {
return EINVAL;
- ifp = ifunit_ref(nmr->nr_name);
+ }
+ ifp = ifunit_ref(hdr->nr_name);
if (ifp) { /* already exist, cannot create new one */
error = EEXIST;
NMG_LOCK();
if (NM_NA_VALID(ifp)) {
- int update_err = nm_update_info(nmr, NA(ifp));
+ int update_err = nm_update_info(req, NA(ifp));
if (update_err)
error = update_err;
}
@@ -652,20 +801,20 @@ netmap_vi_create(struct nmreq *nmr, int autodelete)
if_rele(ifp);
return error;
}
- error = nm_os_vi_persist(nmr->nr_name, &ifp);
+ error = nm_os_vi_persist(hdr->nr_name, &ifp);
if (error)
return error;
NMG_LOCK();
- if (nmr->nr_arg2) {
- nmd = netmap_mem_find(nmr->nr_arg2);
+ if (req->nr_mem_id) {
+ nmd = netmap_mem_find(req->nr_mem_id);
if (nmd == NULL) {
error = EINVAL;
goto err_1;
}
}
/* netmap_vp_create creates a struct netmap_vp_adapter */
- error = netmap_vp_create(nmr, ifp, nmd, &vpna);
+ error = netmap_vp_create(hdr, ifp, nmd, &vpna);
if (error) {
D("error %d", error);
goto err_1;
@@ -679,15 +828,15 @@ netmap_vi_create(struct nmreq *nmr, int autodelete)
}
NM_ATTACH_NA(ifp, &vpna->up);
/* return the updated info */
- error = nm_update_info(nmr, &vpna->up);
+ error = nm_update_info(req, &vpna->up);
if (error) {
goto err_2;
}
- D("returning nr_arg2 %d", nmr->nr_arg2);
+ ND("returning nr_mem_id %d", req->nr_mem_id);
if (nmd)
netmap_mem_put(nmd);
NMG_UNLOCK();
- D("created %s", ifp->if_xname);
+ ND("created %s", ifp->if_xname);
return 0;
err_2:
@@ -711,16 +860,17 @@ err_1:
* (*na != NULL && return == 0).
*/
int
-netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na,
+netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
struct netmap_mem_d *nmd, int create)
{
- char *nr_name = nmr->nr_name;
+ char *nr_name = hdr->nr_name;
const char *ifname;
struct ifnet *ifp = NULL;
int error = 0;
struct netmap_vp_adapter *vpna, *hostna = NULL;
struct nm_bridge *b;
- int i, j, cand = -1, cand2 = -1;
+ uint32_t i, j;
+ uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT;
int needed;
*na = NULL; /* default return value */
@@ -780,17 +930,17 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na,
ifname = nr_name + b->bdg_namelen + 1;
ifp = ifunit_ref(ifname);
if (!ifp) {
- /* Create an ephemeral virtual port
- * This block contains all the ephemeral-specific logics
+ /* Create an ephemeral virtual port.
+ * This block contains all the ephemeral-specific logic.
*/
- if (nmr->nr_cmd) {
- /* nr_cmd must be 0 for a virtual port */
+
+ if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
error = EINVAL;
goto out;
}
/* bdg_netmap_attach creates a struct netmap_adapter */
- error = netmap_vp_create(nmr, NULL, nmd, &vpna);
+ error = netmap_vp_create(hdr, NULL, nmd, &vpna);
if (error) {
D("error %d", error);
goto out;
@@ -798,15 +948,16 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na,
/* shortcut - we can skip get_hw_na(),
* ownership check and nm_bdg_attach()
*/
+
} else {
struct netmap_adapter *hw;
/* the vale:nic syntax is only valid for some commands */
- switch (nmr->nr_cmd) {
- case NETMAP_BDG_ATTACH:
- case NETMAP_BDG_DETACH:
- case NETMAP_BDG_POLLING_ON:
- case NETMAP_BDG_POLLING_OFF:
+ switch (hdr->nr_reqtype) {
+ case NETMAP_REQ_VALE_ATTACH:
+ case NETMAP_REQ_VALE_DETACH:
+ case NETMAP_REQ_VALE_POLLING_ENABLE:
+ case NETMAP_REQ_VALE_POLLING_DISABLE:
break; /* ok */
default:
error = EINVAL;
@@ -823,8 +974,14 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na,
goto out;
vpna = hw->na_vp;
hostna = hw->na_hostvp;
- if (nmr->nr_arg1 != NETMAP_BDG_HOST)
- hostna = NULL;
+ if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
+ /* Check if we need to skip the host rings. */
+ struct nmreq_vale_attach *areq =
+ (struct nmreq_vale_attach *)hdr->nr_body;
+ if (areq->reg.nr_mode != NR_REG_NIC_SW) {
+ hostna = NULL;
+ }
+ }
}
BDG_WLOCK(b);
@@ -854,34 +1011,46 @@ out:
return error;
}
-
-/* Process NETMAP_BDG_ATTACH */
-static int
-nm_bdg_ctl_attach(struct nmreq *nmr)
+/* Process NETMAP_REQ_VALE_ATTACH.
+ */
+int
+nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token)
{
+ struct nmreq_vale_attach *req =
+ (struct nmreq_vale_attach *)hdr->nr_body;
+ struct netmap_vp_adapter * vpna;
struct netmap_adapter *na;
struct netmap_mem_d *nmd = NULL;
+ struct nm_bridge *b = NULL;
int error;
NMG_LOCK();
+ /* permission check for modified bridges */
+ b = nm_find_bridge(hdr->nr_name, 0 /* don't create */);
+ if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
+ error = EACCES;
+ goto unlock_exit;
+ }
- if (nmr->nr_arg2) {
- nmd = netmap_mem_find(nmr->nr_arg2);
+ if (req->reg.nr_mem_id) {
+ nmd = netmap_mem_find(req->reg.nr_mem_id);
if (nmd == NULL) {
error = EINVAL;
goto unlock_exit;
}
}
- /* XXX check existing one */
- error = netmap_get_bdg_na(nmr, &na, nmd, 0);
+ /* check for existing one */
+ error = netmap_get_bdg_na(hdr, &na, nmd, 0);
if (!error) {
error = EBUSY;
goto unref_exit;
}
- error = netmap_get_bdg_na(nmr, &na, nmd, 1 /* create if not exists */);
- if (error) /* no device */
+ error = netmap_get_bdg_na(hdr, &na,
+ nmd, 1 /* create if not exists */);
+ if (error) { /* no device */
goto unlock_exit;
+ }
if (na == NULL) { /* VALE prefix missing */
error = EINVAL;
@@ -897,11 +1066,13 @@ nm_bdg_ctl_attach(struct nmreq *nmr)
/* nop for VALE ports. The bwrap needs to put the hwna
* in netmap mode (see netmap_bwrap_bdg_ctl)
*/
- error = na->nm_bdg_ctl(na, nmr, 1);
+ error = na->nm_bdg_ctl(hdr, na);
if (error)
goto unref_exit;
ND("registered %s to netmap-mode", na->name);
}
+ vpna = (struct netmap_vp_adapter *)na;
+ req->port_index = vpna->bdg_port;
NMG_UNLOCK();
return 0;
@@ -918,15 +1089,26 @@ nm_is_bwrap(struct netmap_adapter *na)
return na->nm_register == netmap_bwrap_reg;
}
-/* process NETMAP_BDG_DETACH */
-static int
-nm_bdg_ctl_detach(struct nmreq *nmr)
+/* Process NETMAP_REQ_VALE_DETACH.
+ */
+int
+nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token)
{
+ struct nmreq_vale_detach *nmreq_det = (void *)hdr->nr_body;
+ struct netmap_vp_adapter *vpna;
struct netmap_adapter *na;
+ struct nm_bridge *b = NULL;
int error;
NMG_LOCK();
- error = netmap_get_bdg_na(nmr, &na, NULL, 0 /* don't create */);
+ /* permission check for modified bridges */
+ b = nm_find_bridge(hdr->nr_name, 0 /* don't create */);
+ if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
+ error = EACCES;
+ goto unlock_exit;
+ }
+
+ error = netmap_get_bdg_na(hdr, &na, NULL, 0 /* don't create */);
if (error) { /* no device, or another bridge or user owns the device */
goto unlock_exit;
}
@@ -938,16 +1120,27 @@ nm_bdg_ctl_detach(struct nmreq *nmr)
((struct netmap_bwrap_adapter *)na)->na_polling_state) {
/* Don't detach a NIC with polling */
error = EBUSY;
- netmap_adapter_put(na);
- goto unlock_exit;
+ goto unref_exit;
}
+
+ vpna = (struct netmap_vp_adapter *)na;
+ if (na->na_vp != vpna) {
+ /* trying to detach first attach of VALE persistent port attached
+ * to 2 bridges
+ */
+ error = EBUSY;
+ goto unref_exit;
+ }
+ nmreq_det->port_index = vpna->bdg_port;
+
if (na->nm_bdg_ctl) {
/* remove the port from bridge. The bwrap
* also needs to put the hwna in normal mode
*/
- error = na->nm_bdg_ctl(na, nmr, 0);
+ error = na->nm_bdg_ctl(hdr, na);
}
+unref_exit:
netmap_adapter_put(na);
unlock_exit:
NMG_UNLOCK();
@@ -968,7 +1161,7 @@ struct nm_bdg_polling_state {
bool configured;
bool stopped;
struct netmap_bwrap_adapter *bna;
- u_int reg;
+ uint32_t mode;
u_int qfirst;
u_int qlast;
u_int cpu_from;
@@ -982,7 +1175,7 @@ netmap_bwrap_polling(void *data, int is_kthread)
struct nm_bdg_kthread *nbk = data;
struct netmap_bwrap_adapter *bna;
u_int qfirst, qlast, i;
- struct netmap_kring *kring0, *kring;
+ struct netmap_kring **kring0, *kring;
if (!nbk)
return;
@@ -992,7 +1185,7 @@ netmap_bwrap_polling(void *data, int is_kthread)
kring0 = NMR(bna->hwna, NR_RX);
for (i = qfirst; i < qlast; i++) {
- kring = kring0 + i;
+ kring = kring0[i];
kring->nm_notify(kring, 0);
}
}
@@ -1012,7 +1205,8 @@ nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
kcfg.use_kthread = 1;
for (i = 0; i < bps->ncpus; i++) {
struct nm_bdg_kthread *t = bps->kthreads + i;
- int all = (bps->ncpus == 1 && bps->reg == NR_REG_ALL_NIC);
+ int all = (bps->ncpus == 1 &&
+ bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU);
int affinity = bps->cpu_from + i;
t->bps = bps;
@@ -1023,7 +1217,7 @@ nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
kcfg.type = i;
kcfg.worker_private = t;
- t->nmk = nm_os_kctx_create(&kcfg, 0, NULL);
+ t->nmk = nm_os_kctx_create(&kcfg, NULL);
if (t->nmk == NULL) {
goto cleanup;
}
@@ -1088,67 +1282,68 @@ nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps)
}
static int
-get_polling_cfg(struct nmreq *nmr, struct netmap_adapter *na,
- struct nm_bdg_polling_state *bps)
+get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na,
+ struct nm_bdg_polling_state *bps)
{
- int req_cpus, avail_cpus, core_from;
- u_int reg, i, qfirst, qlast;
+ unsigned int avail_cpus, core_from;
+ unsigned int qfirst, qlast;
+ uint32_t i = req->nr_first_cpu_id;
+ uint32_t req_cpus = req->nr_num_polling_cpus;
avail_cpus = nm_os_ncpus();
- req_cpus = nmr->nr_arg1;
if (req_cpus == 0) {
D("req_cpus must be > 0");
return EINVAL;
} else if (req_cpus >= avail_cpus) {
- D("for safety, we need at least one core left in the system");
+ D("Cannot use all the CPUs in the system");
return EINVAL;
}
- reg = nmr->nr_flags & NR_REG_MASK;
- i = nmr->nr_ringid & NETMAP_RING_MASK;
- /*
- * ONE_NIC: dedicate one core to one ring. If multiple cores
- * are specified, consecutive rings are also polled.
- * For example, if ringid=2 and 2 cores are given,
- * ring 2 and 3 are polled by core 2 and 3, respectively.
- * ALL_NIC: poll all the rings using a core specified by ringid.
- * the number of cores must be 1.
- */
- if (reg == NR_REG_ONE_NIC) {
+
+ if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) {
+ /* Use a separate core for each ring. If nr_num_polling_cpus>1
+ * more consecutive rings are polled.
+ * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2,
+ * ring 2 and 3 are polled by core 2 and 3, respectively. */
if (i + req_cpus > nma_get_nrings(na, NR_RX)) {
- D("only %d rings exist (ring %u-%u is given)",
- nma_get_nrings(na, NR_RX), i, i+req_cpus);
+ D("Rings %u-%u not in range (have %d rings)",
+ i, i + req_cpus, nma_get_nrings(na, NR_RX));
return EINVAL;
}
qfirst = i;
qlast = qfirst + req_cpus;
core_from = qfirst;
- } else if (reg == NR_REG_ALL_NIC) {
+
+ } else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) {
+ /* Poll all the rings using a core specified by nr_first_cpu_id.
+ * the number of cores must be 1. */
if (req_cpus != 1) {
- D("ncpus must be 1 not %d for REG_ALL_NIC", req_cpus);
+ D("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU "
+ "(was %d)", req_cpus);
return EINVAL;
}
qfirst = 0;
qlast = nma_get_nrings(na, NR_RX);
core_from = i;
} else {
- D("reg must be ALL_NIC or ONE_NIC");
+ D("Invalid polling mode");
return EINVAL;
}
- bps->reg = reg;
+ bps->mode = req->nr_mode;
bps->qfirst = qfirst;
bps->qlast = qlast;
bps->cpu_from = core_from;
bps->ncpus = req_cpus;
D("%s qfirst %u qlast %u cpu_from %u ncpus %u",
- reg == NR_REG_ALL_NIC ? "REG_ALL_NIC" : "REG_ONE_NIC",
+ req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ?
+ "MULTI" : "SINGLE",
qfirst, qlast, core_from, req_cpus);
return 0;
}
static int
-nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na)
+nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na)
{
struct nm_bdg_polling_state *bps;
struct netmap_bwrap_adapter *bna;
@@ -1166,7 +1361,7 @@ nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na)
bps->configured = false;
bps->stopped = true;
- if (get_polling_cfg(nmr, na, bps)) {
+ if (get_polling_cfg(req, na, bps)) {
nm_os_free(bps);
return EINVAL;
}
@@ -1195,7 +1390,7 @@ nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na)
}
static int
-nm_bdg_ctl_polling_stop(struct nmreq *nmr, struct netmap_adapter *na)
+nm_bdg_ctl_polling_stop(struct netmap_adapter *na)
{
struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na;
struct nm_bdg_polling_state *bps;
@@ -1214,190 +1409,203 @@ nm_bdg_ctl_polling_stop(struct nmreq *nmr, struct netmap_adapter *na)
return 0;
}
-/* Called by either user's context (netmap_ioctl())
- * or external kernel modules (e.g., Openvswitch).
- * Operation is indicated in nmr->nr_cmd.
- * NETMAP_BDG_OPS that sets configure/lookup/dtor functions to the bridge
- * requires bdg_ops argument; the other commands ignore this argument.
- *
- * Called without NMG_LOCK.
- */
int
-netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
+nm_bdg_polling(struct nmreq_header *hdr)
+{
+ struct nmreq_vale_polling *req =
+ (struct nmreq_vale_polling *)hdr->nr_body;
+ struct netmap_adapter *na = NULL;
+ int error = 0;
+
+ NMG_LOCK();
+ error = netmap_get_bdg_na(hdr, &na, NULL, /*create=*/0);
+ if (na && !error) {
+ if (!nm_is_bwrap(na)) {
+ error = EOPNOTSUPP;
+ } else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) {
+ error = nm_bdg_ctl_polling_start(req, na);
+ if (!error)
+ netmap_adapter_get(na);
+ } else {
+ error = nm_bdg_ctl_polling_stop(na);
+ if (!error)
+ netmap_adapter_put(na);
+ }
+ netmap_adapter_put(na);
+ } else if (!na && !error) {
+ /* Not VALE port. */
+ error = EINVAL;
+ }
+ NMG_UNLOCK();
+
+ return error;
+}
+
+/* Process NETMAP_REQ_VALE_LIST. */
+int
+netmap_bdg_list(struct nmreq_header *hdr)
{
+ struct nmreq_vale_list *req =
+ (struct nmreq_vale_list *)hdr->nr_body;
+ int namelen = strlen(hdr->nr_name);
struct nm_bridge *b, *bridges;
- struct netmap_adapter *na;
struct netmap_vp_adapter *vpna;
- char *name = nmr->nr_name;
- int cmd = nmr->nr_cmd, namelen = strlen(name);
int error = 0, i, j;
u_int num_bridges;
netmap_bns_getbridges(&bridges, &num_bridges);
- switch (cmd) {
- case NETMAP_BDG_NEWIF:
- error = netmap_vi_create(nmr, 0 /* no autodelete */);
- break;
-
- case NETMAP_BDG_DELIF:
- error = nm_vi_destroy(nmr->nr_name);
- break;
-
- case NETMAP_BDG_ATTACH:
- error = nm_bdg_ctl_attach(nmr);
- break;
-
- case NETMAP_BDG_DETACH:
- error = nm_bdg_ctl_detach(nmr);
- break;
-
- case NETMAP_BDG_LIST:
- /* this is used to enumerate bridges and ports */
- if (namelen) { /* look up indexes of bridge and port */
- if (strncmp(name, NM_BDG_NAME, strlen(NM_BDG_NAME))) {
- error = EINVAL;
- break;
- }
- NMG_LOCK();
- b = nm_find_bridge(name, 0 /* don't create */);
- if (!b) {
- error = ENOENT;
- NMG_UNLOCK();
- break;
- }
-
- error = 0;
- nmr->nr_arg1 = b - bridges; /* bridge index */
- nmr->nr_arg2 = NM_BDG_NOPORT;
- for (j = 0; j < b->bdg_active_ports; j++) {
- i = b->bdg_port_index[j];
- vpna = b->bdg_ports[i];
- if (vpna == NULL) {
- D("---AAAAAAAAARGH-------");
- continue;
- }
- /* the former and the latter identify a
- * virtual port and a NIC, respectively
- */
- if (!strcmp(vpna->up.name, name)) {
- nmr->nr_arg2 = i; /* port index */
- break;
- }
- }
- NMG_UNLOCK();
- } else {
- /* return the first non-empty entry starting from
- * bridge nr_arg1 and port nr_arg2.
- *
- * Users can detect the end of the same bridge by
- * seeing the new and old value of nr_arg1, and can
- * detect the end of all the bridge by error != 0
- */
- i = nmr->nr_arg1;
- j = nmr->nr_arg2;
-
- NMG_LOCK();
- for (error = ENOENT; i < NM_BRIDGES; i++) {
- b = bridges + i;
- for ( ; j < NM_BDG_MAXPORTS; j++) {
- if (b->bdg_ports[j] == NULL)
- continue;
- vpna = b->bdg_ports[j];
- strncpy(name, vpna->up.name, (size_t)IFNAMSIZ);
- error = 0;
- goto out;
- }
- j = 0; /* following bridges scan from 0 */
- }
- out:
- nmr->nr_arg1 = i;
- nmr->nr_arg2 = j;
- NMG_UNLOCK();
- }
- break;
-
- case NETMAP_BDG_REGOPS: /* XXX this should not be available from userspace */
- /* register callbacks to the given bridge.
- * nmr->nr_name may be just bridge's name (including ':'
- * if it is not just NM_NAME).
- */
- if (!bdg_ops) {
- error = EINVAL;
- break;
+ /* this is used to enumerate bridges and ports */
+ if (namelen) { /* look up indexes of bridge and port */
+ if (strncmp(hdr->nr_name, NM_BDG_NAME,
+ strlen(NM_BDG_NAME))) {
+ return EINVAL;
}
NMG_LOCK();
- b = nm_find_bridge(name, 0 /* don't create */);
+ b = nm_find_bridge(hdr->nr_name, 0 /* don't create */);
if (!b) {
- error = EINVAL;
- } else {
- b->bdg_ops = *bdg_ops;
+ NMG_UNLOCK();
+ return ENOENT;
}
- NMG_UNLOCK();
- break;
- case NETMAP_BDG_VNET_HDR:
- /* Valid lengths for the virtio-net header are 0 (no header),
- 10 and 12. */
- if (nmr->nr_arg1 != 0 &&
- nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) &&
- nmr->nr_arg1 != 12) {
- error = EINVAL;
- break;
- }
- NMG_LOCK();
- error = netmap_get_bdg_na(nmr, &na, NULL, 0);
- if (na && !error) {
- vpna = (struct netmap_vp_adapter *)na;
- na->virt_hdr_len = nmr->nr_arg1;
- if (na->virt_hdr_len) {
- vpna->mfs = NETMAP_BUF_SIZE(na);
+ req->nr_bridge_idx = b - bridges; /* bridge index */
+ req->nr_port_idx = NM_BDG_NOPORT;
+ for (j = 0; j < b->bdg_active_ports; j++) {
+ i = b->bdg_port_index[j];
+ vpna = b->bdg_ports[i];
+ if (vpna == NULL) {
+ D("This should not happen");
+ continue;
+ }
+ /* the former and the latter identify a
+ * virtual port and a NIC, respectively
+ */
+ if (!strcmp(vpna->up.name, hdr->nr_name)) {
+ req->nr_port_idx = i; /* port index */
+ break;
}
- D("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na);
- netmap_adapter_put(na);
- } else if (!na) {
- error = ENXIO;
}
NMG_UNLOCK();
- break;
+ } else {
+ /* return the first non-empty entry starting from
+ * bridge nr_arg1 and port nr_arg2.
+ *
+ * Users can detect the end of the same bridge by
+ * seeing the new and old value of nr_arg1, and can
+ * detect the end of all the bridge by error != 0
+ */
+ i = req->nr_bridge_idx;
+ j = req->nr_port_idx;
- case NETMAP_BDG_POLLING_ON:
- case NETMAP_BDG_POLLING_OFF:
NMG_LOCK();
- error = netmap_get_bdg_na(nmr, &na, NULL, 0);
- if (na && !error) {
- if (!nm_is_bwrap(na)) {
- error = EOPNOTSUPP;
- } else if (cmd == NETMAP_BDG_POLLING_ON) {
- error = nm_bdg_ctl_polling_start(nmr, na);
- if (!error)
- netmap_adapter_get(na);
- } else {
- error = nm_bdg_ctl_polling_stop(nmr, na);
- if (!error)
- netmap_adapter_put(na);
+ for (error = ENOENT; i < NM_BRIDGES; i++) {
+ b = bridges + i;
+ for ( ; j < NM_BDG_MAXPORTS; j++) {
+ if (b->bdg_ports[j] == NULL)
+ continue;
+ vpna = b->bdg_ports[j];
+ /* write back the VALE switch name */
+ strncpy(hdr->nr_name, vpna->up.name,
+ (size_t)IFNAMSIZ);
+ error = 0;
+ goto out;
}
- netmap_adapter_put(na);
+ j = 0; /* following bridges scan from 0 */
}
+ out:
+ req->nr_bridge_idx = i;
+ req->nr_port_idx = j;
NMG_UNLOCK();
- break;
+ }
+
+ return error;
+}
+
+/* Called by external kernel modules (e.g., Openvswitch).
+ * to set configure/lookup/dtor functions of a VALE instance.
+ * Register callbacks to the given bridge. 'name' may be just
+ * bridge's name (including ':' if it is not just NM_BDG_NAME).
+ *
+ * Called without NMG_LOCK.
+ */
+
+int
+netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token)
+{
+ struct nm_bridge *b;
+ int error = 0;
+
+ NMG_LOCK();
+ b = nm_find_bridge(name, 0 /* don't create */);
+ if (!b) {
+ error = ENXIO;
+ goto unlock_regops;
+ }
+ if (!nm_bdg_valid_auth_token(b, auth_token)) {
+ error = EACCES;
+ goto unlock_regops;
+ }
+
+ BDG_WLOCK(b);
+ if (!bdg_ops) {
+ /* resetting the bridge */
+ bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
+ b->bdg_ops = &default_bdg_ops;
+ b->private_data = b->ht;
+ } else {
+ /* modifying the bridge */
+ b->private_data = private_data;
+ b->bdg_ops = bdg_ops;
+ }
+ BDG_WUNLOCK(b);
+
+unlock_regops:
+ NMG_UNLOCK();
+ return error;
+}
+
+/* Called by external kernel modules (e.g., Openvswitch).
+ * to modify the private data previously given to regops().
+ * 'name' may be just bridge's name (including ':' if it
+ * is not just NM_BDG_NAME).
+ * Called without NMG_LOCK.
+ */
+int
+nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
+ void *callback_data, void *auth_token)
+{
+ void *private_data = NULL;
+ struct nm_bridge *b;
+ int error = 0;
- default:
- D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
+ NMG_LOCK();
+ b = nm_find_bridge(name, 0 /* don't create */);
+ if (!b) {
error = EINVAL;
- break;
+ goto unlock_update_priv;
+ }
+ if (!nm_bdg_valid_auth_token(b, auth_token)) {
+ error = EACCES;
+ goto unlock_update_priv;
}
+ BDG_WLOCK(b);
+ private_data = callback(b->private_data, callback_data, &error);
+ b->private_data = private_data;
+ BDG_WUNLOCK(b);
+
+unlock_update_priv:
+ NMG_UNLOCK();
return error;
}
int
-netmap_bdg_config(struct nmreq *nmr)
+netmap_bdg_config(struct nm_ifreq *nr)
{
struct nm_bridge *b;
int error = EINVAL;
NMG_LOCK();
- b = nm_find_bridge(nmr->nr_name, 0);
+ b = nm_find_bridge(nr->nifr_name, 0);
if (!b) {
NMG_UNLOCK();
return error;
@@ -1405,8 +1613,8 @@ netmap_bdg_config(struct nmreq *nmr)
NMG_UNLOCK();
/* Don't call config() with NMG_LOCK() held */
BDG_RLOCK(b);
- if (b->bdg_ops.config != NULL)
- error = b->bdg_ops.config((struct nm_ifreq *)nmr);
+ if (b->bdg_ops->config != NULL)
+ error = b->bdg_ops->config(nr);
BDG_RUNLOCK(b);
return error;
}
@@ -1436,7 +1644,7 @@ netmap_vp_krings_create(struct netmap_adapter *na)
leases = na->tailroom;
for (i = 0; i < nrx; i++) { /* Receive rings */
- na->rx_rings[i].nkr_leases = leases;
+ na->rx_rings[i]->nkr_leases = leases;
leases += na->num_rx_desc;
}
@@ -1502,6 +1710,7 @@ nm_bdg_preflush(struct netmap_kring *kring, u_int end)
ft[ft_i].ft_len = slot->len;
ft[ft_i].ft_flags = slot->flags;
+ ft[ft_i].ft_offset = 0;
ND("flags is 0x%x", slot->flags);
/* we do not use the buf changed flag, but we still need to reset it */
@@ -1606,7 +1815,7 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff)
if (onoff) {
for_rx_tx(t) {
for (i = 0; i < netmap_real_rings(na, t); i++) {
- struct netmap_kring *kring = &NMR(na, t)[i];
+ struct netmap_kring *kring = NMR(na, t)[i];
if (nm_kring_pending_on(kring))
kring->nr_mode = NKR_NETMAP_ON;
@@ -1622,7 +1831,7 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff)
na->na_flags &= ~NAF_NETMAP_ON;
for_rx_tx(t) {
for (i = 0; i < netmap_real_rings(na, t); i++) {
- struct netmap_kring *kring = &NMR(na, t)[i];
+ struct netmap_kring *kring = NMR(na, t)[i];
if (nm_kring_pending_off(kring))
kring->nr_mode = NKR_NETMAP_OFF;
@@ -1641,30 +1850,19 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff)
* and then returns the destination port index, and the
* ring in *dst_ring (at the moment, always use ring 0)
*/
-u_int
+uint32_t
netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
- struct netmap_vp_adapter *na)
+ struct netmap_vp_adapter *na, void *private_data)
{
- uint8_t *buf = ft->ft_buf;
- u_int buf_len = ft->ft_len;
- struct nm_hash_ent *ht = na->na_bdg->ht;
+ uint8_t *buf = ((uint8_t *)ft->ft_buf) + ft->ft_offset;
+ u_int buf_len = ft->ft_len - ft->ft_offset;
+ struct nm_hash_ent *ht = private_data;
uint32_t sh, dh;
u_int dst, mysrc = na->bdg_port;
uint64_t smac, dmac;
uint8_t indbuf[12];
- /* safety check, unfortunately we have many cases */
- if (buf_len >= 14 + na->up.virt_hdr_len) {
- /* virthdr + mac_hdr in the same slot */
- buf += na->up.virt_hdr_len;
- buf_len -= na->up.virt_hdr_len;
- } else if (buf_len == na->up.virt_hdr_len && ft->ft_flags & NS_MOREFRAG) {
- /* only header in first fragment */
- ft++;
- buf = ft->ft_buf;
- buf_len = ft->ft_len;
- } else {
- RD(5, "invalid buf format, length %d", buf_len);
+ if (buf_len < 14) {
return NM_BDG_NOPORT;
}
@@ -1803,13 +2001,23 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
uint8_t dst_ring = ring_nr; /* default, same ring as origin */
uint16_t dst_port, d_i;
struct nm_bdg_q *d;
+ struct nm_bdg_fwd *start_ft = NULL;
ND("slot %d frags %d", i, ft[i].ft_frags);
- /* Drop the packet if the virtio-net header is not into the first
- fragment nor at the very beginning of the second. */
- if (unlikely(na->up.virt_hdr_len > ft[i].ft_len))
+
+ if (na->up.virt_hdr_len < ft[i].ft_len) {
+ ft[i].ft_offset = na->up.virt_hdr_len;
+ start_ft = &ft[i];
+ } else if (na->up.virt_hdr_len == ft[i].ft_len && ft[i].ft_flags & NS_MOREFRAG) {
+ ft[i].ft_offset = ft[i].ft_len;
+ start_ft = &ft[i+1];
+ } else {
+ /* Drop the packet if the virtio-net header is not into the first
+ * fragment nor at the very beginning of the second.
+ */
continue;
- dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na);
+ }
+ dst_port = b->bdg_ops->lookup(start_ft, &dst_ring, na, b->private_data);
if (netmap_verbose > 255)
RD(5, "slot %d port %d -> %d", i, me, dst_port);
if (dst_port >= NM_BDG_NOPORT)
@@ -1940,7 +2148,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
nrings = dst_na->up.num_rx_rings;
if (dst_nr >= nrings)
dst_nr = dst_nr % nrings;
- kring = &dst_na->up.rx_rings[dst_nr];
+ kring = dst_na->up.rx_rings[dst_nr];
ring = kring->ring;
/* the destination ring may have not been opened for RX */
if (unlikely(ring == NULL || kring->nr_mode != NKR_NETMAP_ON))
@@ -2224,8 +2432,9 @@ netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na)
{
struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
- if (vpna->na_bdg)
+ if (vpna->na_bdg) {
return netmap_bwrap_attach(name, na);
+ }
na->na_vp = vpna;
strncpy(na->name, name, sizeof(na->name));
na->na_hostvp = NULL;
@@ -2236,14 +2445,19 @@ netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na)
* Only persistent VALE ports have a non-null ifp.
*/
static int
-netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp,
- struct netmap_mem_d *nmd,
- struct netmap_vp_adapter **ret)
+netmap_vp_create(struct nmreq_header *hdr, struct ifnet *ifp,
+ struct netmap_mem_d *nmd, struct netmap_vp_adapter **ret)
{
+ struct nmreq_register *req = (struct nmreq_register *)hdr->nr_body;
struct netmap_vp_adapter *vpna;
struct netmap_adapter *na;
int error = 0;
u_int npipes = 0;
+ u_int extrabufs = 0;
+
+ if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
+ return EINVAL;
+ }
vpna = nm_os_malloc(sizeof(*vpna));
if (vpna == NULL)
@@ -2252,31 +2466,30 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp,
na = &vpna->up;
na->ifp = ifp;
- strncpy(na->name, nmr->nr_name, sizeof(na->name));
+ strncpy(na->name, hdr->nr_name, sizeof(na->name));
/* bound checking */
- na->num_tx_rings = nmr->nr_tx_rings;
+ na->num_tx_rings = req->nr_tx_rings;
nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
- nmr->nr_tx_rings = na->num_tx_rings; // write back
- na->num_rx_rings = nmr->nr_rx_rings;
+ req->nr_tx_rings = na->num_tx_rings; /* write back */
+ na->num_rx_rings = req->nr_rx_rings;
nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
- nmr->nr_rx_rings = na->num_rx_rings; // write back
- nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
+ req->nr_rx_rings = na->num_rx_rings; /* write back */
+ nm_bound_var(&req->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1, NM_BDG_MAXSLOTS, NULL);
- na->num_tx_desc = nmr->nr_tx_slots;
- nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
+ na->num_tx_desc = req->nr_tx_slots;
+ nm_bound_var(&req->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1, NM_BDG_MAXSLOTS, NULL);
/* validate number of pipes. We want at least 1,
* but probably can do with some more.
* So let's use 2 as default (when 0 is supplied)
*/
- npipes = nmr->nr_arg1;
nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
- nmr->nr_arg1 = npipes; /* write back */
/* validate extra bufs */
- nm_bound_var(&nmr->nr_arg3, 0, 0,
+ nm_bound_var(&extrabufs, 0, 0,
128*NM_BDG_MAXSLOTS, NULL);
- na->num_rx_desc = nmr->nr_rx_slots;
+ req->nr_extra_bufs = extrabufs; /* write back */
+ na->num_rx_desc = req->nr_rx_slots;
/* Set the mfs to a default value, as it is needed on the VALE
* mismatch datapath. XXX We should set it according to the MTU
* known to the kernel. */
@@ -2299,13 +2512,13 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp,
na->nm_krings_create = netmap_vp_krings_create;
na->nm_krings_delete = netmap_vp_krings_delete;
na->nm_dtor = netmap_vp_dtor;
- D("nr_arg2 %d", nmr->nr_arg2);
+ ND("nr_mem_id %d", req->nr_mem_id);
na->nm_mem = nmd ?
netmap_mem_get(nmd):
netmap_mem_private_new(
na->num_tx_rings, na->num_tx_desc,
na->num_rx_rings, na->num_rx_desc,
- nmr->nr_arg3, npipes, &error);
+ req->nr_extra_bufs, npipes, &error);
if (na->nm_mem == NULL)
goto err;
na->nm_bdg_attach = netmap_vp_bdg_attach;
@@ -2373,8 +2586,9 @@ netmap_bwrap_dtor(struct netmap_adapter *na)
ND("na %p", na);
na->ifp = NULL;
bna->host.up.ifp = NULL;
+ hwna->na_vp = bna->saved_na_vp;
+ hwna->na_hostvp = NULL;
hwna->na_private = NULL;
- hwna->na_vp = hwna->na_hostvp = NULL;
hwna->na_flags &= ~NAF_BUSY;
netmap_adapter_put(hwna);
@@ -2414,7 +2628,7 @@ netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
if (netmap_verbose)
D("%s %s 0x%x", na->name, kring->name, flags);
- bkring = &vpna->up.tx_rings[ring_nr];
+ bkring = vpna->up.tx_rings[ring_nr];
/* make sure the ring is not disabled */
if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) {
@@ -2497,8 +2711,8 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
/* pass down the pending ring state information */
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
- NMR(hwna, t)[i].nr_pending_mode =
- NMR(na, t)[i].nr_pending_mode;
+ NMR(hwna, t)[i]->nr_pending_mode =
+ NMR(na, t)[i]->nr_pending_mode;
}
/* forward the request to the hwna */
@@ -2509,8 +2723,8 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
/* copy up the current ring state information */
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
- struct netmap_kring *kring = &NMR(hwna, t)[i];
- NMR(na, t)[i].nr_mode = kring->nr_mode;
+ struct netmap_kring *kring = NMR(hwna, t)[i];
+ NMR(na, t)[i]->nr_mode = kring->nr_mode;
}
}
@@ -2523,15 +2737,15 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
u_int i;
/* intercept the hwna nm_nofify callback on the hw rings */
for (i = 0; i < hwna->num_rx_rings; i++) {
- hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify;
- hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify;
+ hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify;
+ hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify;
}
i = hwna->num_rx_rings; /* for safety */
/* save the host ring notify unconditionally */
- hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify;
+ hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify;
if (hostna->na_bdg) {
/* also intercept the host ring notify */
- hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify;
+ hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify;
}
if (na->active_fds == 0)
na->na_flags |= NAF_NETMAP_ON;
@@ -2543,17 +2757,18 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
/* reset all notify callbacks (including host ring) */
for (i = 0; i <= hwna->num_rx_rings; i++) {
- hwna->rx_rings[i].nm_notify = hwna->rx_rings[i].save_notify;
- hwna->rx_rings[i].save_notify = NULL;
+ hwna->rx_rings[i]->nm_notify = hwna->rx_rings[i]->save_notify;
+ hwna->rx_rings[i]->save_notify = NULL;
}
hwna->na_lut.lut = NULL;
+ hwna->na_lut.plut = NULL;
hwna->na_lut.objtotal = 0;
hwna->na_lut.objsize = 0;
/* pass ownership of the netmap rings to the hwna */
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
- NMR(na, t)[i].ring = NULL;
+ NMR(na, t)[i]->ring = NULL;
}
}
@@ -2564,8 +2779,7 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
/* nm_config callback for bwrap */
static int
-netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
- u_int *rxr, u_int *rxd)
+netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info)
{
struct netmap_bwrap_adapter *bna =
(struct netmap_bwrap_adapter *)na;
@@ -2573,11 +2787,12 @@ netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
/* forward the request */
netmap_update_config(hwna);
- /* swap the results */
- *txr = hwna->num_rx_rings;
- *txd = hwna->num_rx_desc;
- *rxr = hwna->num_tx_rings;
- *rxd = hwna->num_rx_desc;
+ /* swap the results and propagate */
+ info->num_tx_rings = hwna->num_rx_rings;
+ info->num_tx_descs = hwna->num_rx_desc;
+ info->num_rx_rings = hwna->num_tx_rings;
+ info->num_rx_descs = hwna->num_tx_desc;
+ info->rx_buf_maxsize = hwna->rx_buf_maxsize;
return 0;
}
@@ -2610,7 +2825,7 @@ netmap_bwrap_krings_create(struct netmap_adapter *na)
/* increment the usage counter for all the hwna krings */
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) {
- NMR(hwna, t)[i].users++;
+ NMR(hwna, t)[i]->users++;
}
}
@@ -2627,8 +2842,8 @@ netmap_bwrap_krings_create(struct netmap_adapter *na)
for_rx_tx(t) {
enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) {
- NMR(na, t)[i].nkr_num_slots = NMR(hwna, r)[i].nkr_num_slots;
- NMR(na, t)[i].ring = NMR(hwna, r)[i].ring;
+ NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots;
+ NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring;
}
}
@@ -2638,16 +2853,16 @@ netmap_bwrap_krings_create(struct netmap_adapter *na)
* hostna
*/
hostna->tx_rings = &na->tx_rings[na->num_tx_rings];
- hostna->tx_rings[0].na = hostna;
+ hostna->tx_rings[0]->na = hostna;
hostna->rx_rings = &na->rx_rings[na->num_rx_rings];
- hostna->rx_rings[0].na = hostna;
+ hostna->rx_rings[0]->na = hostna;
}
return 0;
err_dec_users:
for_rx_tx(t) {
- NMR(hwna, t)[i].users--;
+ NMR(hwna, t)[i]->users--;
}
hwna->nm_krings_delete(hwna);
err_del_vp_rings:
@@ -2671,7 +2886,7 @@ netmap_bwrap_krings_delete(struct netmap_adapter *na)
/* decrement the usage counter for all the hwna krings */
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) {
- NMR(hwna, t)[i].users--;
+ NMR(hwna, t)[i]->users--;
}
}
@@ -2698,7 +2913,7 @@ netmap_bwrap_notify(struct netmap_kring *kring, int flags)
(kring ? kring->name : "NULL!"),
(na ? na->name : "NULL!"),
(hwna ? hwna->name : "NULL!"));
- hw_kring = &hwna->tx_rings[ring_n];
+ hw_kring = hwna->tx_rings[ring_n];
if (nm_kr_tryget(hw_kring, 0, NULL)) {
return ENXIO;
@@ -2746,13 +2961,22 @@ put_out:
* directed to hwna.
*/
static int
-netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
+netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
{
struct netmap_priv_d *npriv;
struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
int error = 0;
- if (attach) {
+ if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
+ struct nmreq_vale_attach *req =
+ (struct nmreq_vale_attach *)hdr->nr_body;
+ if (req->reg.nr_ringid != 0 ||
+ (req->reg.nr_mode != NR_REG_ALL_NIC &&
+ req->reg.nr_mode != NR_REG_NIC_SW)) {
+ /* We only support attaching all the NIC rings
+ * and/or the host stack. */
+ return EINVAL;
+ }
if (NETMAP_OWNED_BY_ANY(na)) {
return EBUSY;
}
@@ -2764,7 +2988,8 @@ netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
if (npriv == NULL)
return ENOMEM;
npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */
- error = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags);
+ error = netmap_do_regif(npriv, na, req->reg.nr_mode,
+ req->reg.nr_ringid, req->reg.nr_flags);
if (error) {
netmap_priv_delete(npriv);
return error;
@@ -2778,8 +3003,8 @@ netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
bna->na_kpriv = NULL;
na->na_flags &= ~NAF_BUSY;
}
- return error;
+ return error;
}
/* attach a bridge wrapper to the 'real' device */
@@ -2837,7 +3062,9 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
bna->hwna = hwna;
netmap_adapter_get(hwna);
hwna->na_private = bna; /* weak reference */
+ bna->saved_na_vp = hwna->na_vp;
hwna->na_vp = &bna->up;
+ bna->up.up.na_vp = &(bna->up);
if (hwna->na_flags & NAF_HOST_RINGS) {
if (hwna->na_flags & NAF_SW_ONLY)
diff --git a/sys/dev/re/if_re.c b/sys/dev/re/if_re.c
index e9328b5ec1d10..737294d84e6c9 100644
--- a/sys/dev/re/if_re.c
+++ b/sys/dev/re/if_re.c
@@ -2933,7 +2933,7 @@ re_start_locked(struct ifnet *ifp)
#ifdef DEV_NETMAP
/* XXX is this necessary ? */
if (ifp->if_capenable & IFCAP_NETMAP) {
- struct netmap_kring *kring = &NA(ifp)->tx_rings[0];
+ struct netmap_kring *kring = NA(ifp)->tx_rings[0];
if (sc->rl_ldata.rl_tx_prodidx != kring->nr_hwcur) {
/* kick the tx unit */
CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);