diff options
author | Luigi Rizzo <luigi@FreeBSD.org> | 2015-07-10 05:51:36 +0000 |
---|---|---|
committer | Luigi Rizzo <luigi@FreeBSD.org> | 2015-07-10 05:51:36 +0000 |
commit | 847bf38369b6ea5abf8b6409006468cfe4f66d5e (patch) | |
tree | 2a938ad28f8fa79c60e58c3430a4c2c93631db94 | |
parent | 9d73ee0f82b756db5e53a32e55766db958d41dba (diff) | |
download | src-847bf38369b6ea5abf8b6409006468cfe4f66d5e.tar.gz src-847bf38369b6ea5abf8b6409006468cfe4f66d5e.zip |
Notes
-rw-r--r-- | sys/dev/cxgbe/t4_main.c | 7 | ||||
-rw-r--r-- | sys/dev/cxgbe/t4_netmap.c | 6 | ||||
-rw-r--r-- | sys/dev/e1000/if_em.c | 3 | ||||
-rw-r--r-- | sys/dev/e1000/if_igb.c | 3 | ||||
-rw-r--r-- | sys/dev/e1000/if_lem.c | 3 | ||||
-rw-r--r-- | sys/dev/ixgbe/if_ix.c | 3 | ||||
-rw-r--r-- | sys/dev/netmap/if_em_netmap.h | 7 | ||||
-rw-r--r-- | sys/dev/netmap/if_igb_netmap.h | 7 | ||||
-rw-r--r-- | sys/dev/netmap/if_ixl_netmap.h | 16 | ||||
-rw-r--r-- | sys/dev/netmap/if_lem_netmap.h | 7 | ||||
-rw-r--r-- | sys/dev/netmap/if_re_netmap.h | 7 | ||||
-rw-r--r-- | sys/dev/netmap/if_vtnet_netmap.h | 8 | ||||
-rw-r--r-- | sys/dev/netmap/ixgbe_netmap.h | 7 | ||||
-rw-r--r-- | sys/dev/netmap/netmap.c | 1041 | ||||
-rw-r--r-- | sys/dev/netmap/netmap_freebsd.c | 39 | ||||
-rw-r--r-- | sys/dev/netmap/netmap_generic.c | 30 | ||||
-rw-r--r-- | sys/dev/netmap/netmap_kern.h | 350 | ||||
-rw-r--r-- | sys/dev/netmap/netmap_mem2.c | 553 | ||||
-rw-r--r-- | sys/dev/netmap/netmap_mem2.h | 30 | ||||
-rw-r--r-- | sys/dev/netmap/netmap_monitor.c | 713 | ||||
-rw-r--r-- | sys/dev/netmap/netmap_pipe.c | 135 | ||||
-rw-r--r-- | sys/dev/netmap/netmap_vale.c | 350 | ||||
-rw-r--r-- | sys/dev/re/if_re.c | 1 | ||||
-rw-r--r-- | sys/net/netmap.h | 8 | ||||
-rw-r--r-- | sys/net/netmap_user.h | 155 |
25 files changed, 2034 insertions, 1455 deletions
diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index 5c8805967f00..a3403ad79e62 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -8533,10 +8533,17 @@ static devclass_t cxgbe_devclass, cxl_devclass; DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0); MODULE_VERSION(t4nex, 1); MODULE_DEPEND(t4nex, firmware, 1, 1, 1); +#ifdef DEV_NETMAP +MODULE_DEPEND(t4nex, netmap, 1, 1, 1); +#endif /* DEV_NETMAP */ + DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0); MODULE_VERSION(t5nex, 1); MODULE_DEPEND(t5nex, firmware, 1, 1, 1); +#ifdef DEV_NETMAP +MODULE_DEPEND(t5nex, netmap, 1, 1, 1); +#endif /* DEV_NETMAP */ DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0); MODULE_VERSION(cxgbe, 1); diff --git a/sys/dev/cxgbe/t4_netmap.c b/sys/dev/cxgbe/t4_netmap.c index f54a67fe94c0..a4afb8a25794 100644 --- a/sys/dev/cxgbe/t4_netmap.c +++ b/sys/dev/cxgbe/t4_netmap.c @@ -917,8 +917,6 @@ cxgbe_netmap_txsync(struct netmap_kring *kring, int flags) kring->nr_hwtail -= kring->nkr_num_slots; } - nm_txsync_finalize(kring); - return (0); } @@ -931,7 +929,7 @@ cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags) struct port_info *pi = ifp->if_softc; struct adapter *sc = pi->adapter; struct sge_nm_rxq *nm_rxq = &sc->sge.nm_rxq[pi->first_nm_rxq + kring->ring_id]; - u_int const head = nm_rxsync_prologue(kring); + u_int const head = kring->rhead; u_int n; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; @@ -993,8 +991,6 @@ cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags) } } - nm_rxsync_finalize(kring); - return (0); } diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index 8032345d09ae..52b03d65ca50 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -344,6 +344,9 @@ devclass_t em_devclass; DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0); MODULE_DEPEND(em, pci, 1, 1, 1); MODULE_DEPEND(em, ether, 1, 1, 1); +#ifdef DEV_NETMAP +MODULE_DEPEND(em, netmap, 1, 1, 1); +#endif /* DEV_NETMAP */ /********************************************************************* * Tunable default values. diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c index 6ac6eb63c987..384a46bc223d 100644 --- a/sys/dev/e1000/if_igb.c +++ b/sys/dev/e1000/if_igb.c @@ -322,6 +322,9 @@ static devclass_t igb_devclass; DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0); MODULE_DEPEND(igb, pci, 1, 1, 1); MODULE_DEPEND(igb, ether, 1, 1, 1); +#ifdef DEV_NETMAP +MODULE_DEPEND(igb, netmap, 1, 1, 1); +#endif /* DEV_NETMAP */ /********************************************************************* * Tunable default values. diff --git a/sys/dev/e1000/if_lem.c b/sys/dev/e1000/if_lem.c index 894a74a9db81..f34010e5c86e 100644 --- a/sys/dev/e1000/if_lem.c +++ b/sys/dev/e1000/if_lem.c @@ -286,6 +286,9 @@ extern devclass_t em_devclass; DRIVER_MODULE(lem, pci, lem_driver, em_devclass, 0, 0); MODULE_DEPEND(lem, pci, 1, 1, 1); MODULE_DEPEND(lem, ether, 1, 1, 1); +#ifdef DEV_NETMAP +MODULE_DEPEND(lem, netmap, 1, 1, 1); +#endif /* DEV_NETMAP */ /********************************************************************* * Tunable default values. diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c index 77556267fa15..c8ce7445c860 100644 --- a/sys/dev/ixgbe/if_ix.c +++ b/sys/dev/ixgbe/if_ix.c @@ -246,6 +246,9 @@ DRIVER_MODULE(ix, pci, ix_driver, ix_devclass, 0, 0); MODULE_DEPEND(ix, pci, 1, 1, 1); MODULE_DEPEND(ix, ether, 1, 1, 1); +#ifdef DEV_NETMAP +MODULE_DEPEND(ix, netmap, 1, 1, 1); +#endif /* DEV_NETMAP */ /* ** TUNEABLE PARAMETERS: diff --git a/sys/dev/netmap/if_em_netmap.h b/sys/dev/netmap/if_em_netmap.h index 99eaa6f01319..eae4f8c18ca0 100644 --- a/sys/dev/netmap/if_em_netmap.h +++ b/sys/dev/netmap/if_em_netmap.h @@ -198,8 +198,6 @@ em_netmap_txsync(struct netmap_kring *kring, int flags) } } - nm_txsync_finalize(kring); - return 0; } @@ -217,7 +215,7 @@ em_netmap_rxsync(struct netmap_kring *kring, int flags) u_int nic_i; /* index into the NIC ring */ u_int n; u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); + u_int const head = kring->rhead; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; /* device-specific */ @@ -303,9 +301,6 @@ em_netmap_rxsync(struct netmap_kring *kring, int flags) E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i); } - /* tell userspace that there might be new packets */ - nm_rxsync_finalize(kring); - return 0; ring_reset: diff --git a/sys/dev/netmap/if_igb_netmap.h b/sys/dev/netmap/if_igb_netmap.h index c73846073341..33b7b3b66547 100644 --- a/sys/dev/netmap/if_igb_netmap.h +++ b/sys/dev/netmap/if_igb_netmap.h @@ -180,8 +180,6 @@ igb_netmap_txsync(struct netmap_kring *kring, int flags) kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); } - nm_txsync_finalize(kring); - return 0; } @@ -199,7 +197,7 @@ igb_netmap_rxsync(struct netmap_kring *kring, int flags) u_int nic_i; /* index into the NIC ring */ u_int n; u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); + u_int const head = kring->rhead; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; /* device-specific */ @@ -283,9 +281,6 @@ igb_netmap_rxsync(struct netmap_kring *kring, int flags) E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i); } - /* tell userspace that there might be new packets */ - nm_rxsync_finalize(kring); - return 0; ring_reset: diff --git a/sys/dev/netmap/if_ixl_netmap.h b/sys/dev/netmap/if_ixl_netmap.h index d6aff1f8c9a8..f7e7baaf6bc2 100644 --- a/sys/dev/netmap/if_ixl_netmap.h +++ b/sys/dev/netmap/if_ixl_netmap.h @@ -68,9 +68,14 @@ extern int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip; * count packets that might be missed due to lost interrupts. */ SYSCTL_DECL(_dev_netmap); -int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip; +/* + * The xl driver by default strips CRCs and we do not override it. + */ +int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip = 1; +#if 0 SYSCTL_INT(_dev_netmap, OID_AUTO, ixl_crcstrip, - CTLFLAG_RW, &ixl_crcstrip, 0, "strip CRC on rx frames"); + CTLFLAG_RW, &ixl_crcstrip, 1, "strip CRC on rx frames"); +#endif SYSCTL_INT(_dev_netmap, OID_AUTO, ixl_rx_miss, CTLFLAG_RW, &ixl_rx_miss, 0, "potentially missed rx intr"); SYSCTL_INT(_dev_netmap, OID_AUTO, ixl_rx_miss_bufs, @@ -268,8 +273,6 @@ ixl_netmap_txsync(struct netmap_kring *kring, int flags) kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); } - nm_txsync_finalize(kring); - return 0; } @@ -297,7 +300,7 @@ ixl_netmap_rxsync(struct netmap_kring *kring, int flags) u_int nic_i; /* index into the NIC ring */ u_int n; u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); + u_int const head = kring->rhead; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; /* device-specific */ @@ -408,9 +411,6 @@ ixl_netmap_rxsync(struct netmap_kring *kring, int flags) wr32(vsi->hw, rxr->tail, nic_i); } - /* tell userspace that there might be new packets */ - nm_rxsync_finalize(kring); - return 0; ring_reset: diff --git a/sys/dev/netmap/if_lem_netmap.h b/sys/dev/netmap/if_lem_netmap.h index 50eb1f719929..0ec9b1346609 100644 --- a/sys/dev/netmap/if_lem_netmap.h +++ b/sys/dev/netmap/if_lem_netmap.h @@ -302,8 +302,6 @@ lem_netmap_txsync(struct netmap_kring *kring, int flags) kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); } - nm_txsync_finalize(kring); - return 0; } @@ -321,7 +319,7 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags) u_int nic_i; /* index into the NIC ring */ u_int n; u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); + u_int const head = kring->rhead; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; /* device-specific */ @@ -466,9 +464,6 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags) E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), nic_i); } - /* tell userspace that there might be new packets */ - nm_rxsync_finalize(kring); - return 0; ring_reset: diff --git a/sys/dev/netmap/if_re_netmap.h b/sys/dev/netmap/if_re_netmap.h index 354f14df1c58..ac08aedd7962 100644 --- a/sys/dev/netmap/if_re_netmap.h +++ b/sys/dev/netmap/if_re_netmap.h @@ -159,8 +159,6 @@ re_netmap_txsync(struct netmap_kring *kring, int flags) } } - nm_txsync_finalize(kring); - return 0; } @@ -178,7 +176,7 @@ re_netmap_rxsync(struct netmap_kring *kring, int flags) u_int nic_i; /* index into the NIC ring */ u_int n; u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); + u_int const head = kring->rhead; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; /* device-specific */ @@ -273,9 +271,6 @@ re_netmap_rxsync(struct netmap_kring *kring, int flags) BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } - /* tell userspace that there might be new packets */ - nm_rxsync_finalize(kring); - return 0; ring_reset: diff --git a/sys/dev/netmap/if_vtnet_netmap.h b/sys/dev/netmap/if_vtnet_netmap.h index 63f4fa9aa5df..791cee56bcee 100644 --- a/sys/dev/netmap/if_vtnet_netmap.h +++ b/sys/dev/netmap/if_vtnet_netmap.h @@ -214,9 +214,6 @@ vtnet_netmap_txsync(struct netmap_kring *kring, int flags) virtqueue_postpone_intr(vq, VQ_POSTPONE_SHORT); } -//out: - nm_txsync_finalize(kring); - return 0; } @@ -278,7 +275,7 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags) // u_int nic_i; /* index into the NIC ring */ u_int n; u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); + u_int const head = kring->rhead; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; /* device-specific */ @@ -340,9 +337,6 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags) vtnet_rxq_enable_intr(rxq); } - /* tell userspace that there might be new packets. */ - nm_rxsync_finalize(kring); - ND("[C] h %d c %d t %d hwcur %d hwtail %d", ring->head, ring->cur, ring->tail, kring->nr_hwcur, kring->nr_hwtail); diff --git a/sys/dev/netmap/ixgbe_netmap.h b/sys/dev/netmap/ixgbe_netmap.h index f1f03cb6d7a7..4d5bde20a3dd 100644 --- a/sys/dev/netmap/ixgbe_netmap.h +++ b/sys/dev/netmap/ixgbe_netmap.h @@ -322,8 +322,6 @@ ixgbe_netmap_txsync(struct netmap_kring *kring, int flags) } } - nm_txsync_finalize(kring); - return 0; } @@ -351,7 +349,7 @@ ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags) u_int nic_i; /* index into the NIC ring */ u_int n; u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); + u_int const head = kring->rhead; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; /* device-specific */ @@ -458,9 +456,6 @@ ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags) IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), nic_i); } - /* tell userspace that there might be new packets */ - nm_rxsync_finalize(kring); - return 0; ring_reset: diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index 11229ccf6b87..0a728bbf94e7 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -293,7 +293,7 @@ ports attached to the switch) * kring->nm_sync() == DEVICE_netmap_rxsync() * 2) device interrupt handler * na->nm_notify() == netmap_notify() - * - tx from host stack + * - rx from host stack * concurrently: * 1) host stack * netmap_transmit() @@ -313,31 +313,113 @@ ports attached to the switch) * * -= SYSTEM DEVICE WITH GENERIC SUPPORT =- * + * na == NA(ifp) == generic_netmap_adapter created in generic_netmap_attach() * - * - * -= VALE PORT =- - * - * - * - * -= NETMAP PIPE =- - * - * - * - * -= SYSTEM DEVICE WITH NATIVE SUPPORT, CONNECTED TO VALE, NO HOST RINGS =- - * - * - * - * -= SYSTEM DEVICE WITH NATIVE SUPPORT, CONNECTED TO VALE, WITH HOST RINGS =- - * - * - * - * -= SYSTEM DEVICE WITH GENERIC SUPPORT, CONNECTED TO VALE, NO HOST RINGS =- - * - * - * - * -= SYSTEM DEVICE WITH GENERIC SUPPORT, CONNECTED TO VALE, WITH HOST RINGS =- + * - tx from netmap userspace: + * concurrently: + * 1) ioctl(NIOCTXSYNC)/netmap_poll() in process context + * kring->nm_sync() == generic_netmap_txsync() + * linux: dev_queue_xmit() with NM_MAGIC_PRIORITY_TX + * generic_ndo_start_xmit() + * orig. dev. start_xmit + * FreeBSD: na->if_transmit() == orig. dev if_transmit + * 2) generic_mbuf_destructor() + * na->nm_notify() == netmap_notify() + * - rx from netmap userspace: + * 1) ioctl(NIOCRXSYNC)/netmap_poll() in process context + * kring->nm_sync() == generic_netmap_rxsync() + * mbq_safe_dequeue() + * 2) device driver + * generic_rx_handler() + * mbq_safe_enqueue() + * na->nm_notify() == netmap_notify() + * - rx from host stack: + * concurrently: + * 1) host stack + * linux: generic_ndo_start_xmit() + * netmap_transmit() + * FreeBSD: ifp->if_input() == netmap_transmit + * both: + * na->nm_notify() == netmap_notify() + * 2) ioctl(NIOCRXSYNC)/netmap_poll() in process context + * kring->nm_sync() == netmap_rxsync_from_host_compat + * netmap_rxsync_from_host(na, NULL, NULL) + * - tx to host stack: + * ioctl(NIOCTXSYNC)/netmap_poll() in process context + * kring->nm_sync() == netmap_txsync_to_host_compat + * netmap_txsync_to_host(na) + * NM_SEND_UP() + * FreeBSD: na->if_input() == ??? XXX + * linux: netif_rx() with NM_MAGIC_PRIORITY_RX * * + * -= VALE =- + * + * INCOMING: + * + * - VALE ports: + * ioctl(NIOCTXSYNC)/netmap_poll() in process context + * kring->nm_sync() == netmap_vp_txsync() + * + * - system device with native support: + * from cable: + * interrupt + * na->nm_notify() == netmap_bwrap_intr_notify(ring_nr != host ring) + * kring->nm_sync() == DEVICE_netmap_rxsync() + * netmap_vp_txsync() + * kring->nm_sync() == DEVICE_netmap_rxsync() + * from host stack: + * netmap_transmit() + * na->nm_notify() == netmap_bwrap_intr_notify(ring_nr == host ring) + * kring->nm_sync() == netmap_rxsync_from_host_compat() + * netmap_vp_txsync() + * + * - system device with generic support: + * from device driver: + * generic_rx_handler() + * na->nm_notify() == netmap_bwrap_intr_notify(ring_nr != host ring) + * kring->nm_sync() == generic_netmap_rxsync() + * netmap_vp_txsync() + * kring->nm_sync() == generic_netmap_rxsync() + * from host stack: + * netmap_transmit() + * na->nm_notify() == netmap_bwrap_intr_notify(ring_nr == host ring) + * kring->nm_sync() == netmap_rxsync_from_host_compat() + * netmap_vp_txsync() + * + * (all cases) --> nm_bdg_flush() + * dest_na->nm_notify() == (see below) + * + * OUTGOING: + * + * - VALE ports: + * concurrently: + * 1) ioctlNIOCRXSYNC)/netmap_poll() in process context + * kring->nm_sync() == netmap_vp_rxsync() + * 2) from nm_bdg_flush() + * na->nm_notify() == netmap_notify() + * + * - system device with native support: + * to cable: + * na->nm_notify() == netmap_bwrap_notify() + * netmap_vp_rxsync() + * kring->nm_sync() == DEVICE_netmap_txsync() + * netmap_vp_rxsync() + * to host stack: + * netmap_vp_rxsync() + * kring->nm_sync() == netmap_txsync_to_host_compat + * netmap_vp_rxsync_locked() + * + * - system device with generic adapter: + * to device driver: + * na->nm_notify() == netmap_bwrap_notify() + * netmap_vp_rxsync() + * kring->nm_sync() == generic_netmap_txsync() + * netmap_vp_rxsync() + * to host stack: + * netmap_vp_rxsync() + * kring->nm_sync() == netmap_txsync_to_host_compat + * netmap_vp_rxsync() * */ @@ -412,15 +494,6 @@ ports attached to the switch) MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map"); -/* - * The following variables are used by the drivers and replicate - * fields in the global memory pool. They only refer to buffers - * used by physical interfaces. - */ -u_int netmap_total_buffers; -u_int netmap_buf_size; -char *netmap_buffer_base; /* also address of an invalid buffer */ - /* user-controlled variables */ int netmap_verbose; @@ -446,7 +519,6 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, adaptive_io, CTLFLAG_RW, int netmap_flags = 0; /* debug flags */ int netmap_fwd = 0; /* force transparent mode */ -int netmap_mmap_unreg = 0; /* allow mmap of unregistered fds */ /* * netmap_admode selects the netmap mode to use. @@ -464,7 +536,6 @@ int netmap_generic_rings = 1; /* number of queues in generic. */ SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , ""); SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, mmap_unreg, CTLFLAG_RW, &netmap_mmap_unreg, 0, ""); SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0 , ""); SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit, 0 , ""); SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, &netmap_generic_ringsize, 0 , ""); @@ -472,15 +543,6 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW, &netmap_generic_rin NMG_LOCK_T netmap_global_lock; - -static void -nm_kr_get(struct netmap_kring *kr) -{ - while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy)) - tsleep(kr, 0, "NM_KR_GET", 4); -} - - /* * mark the ring as stopped, and run through the locks * to make sure other users get to see it. @@ -495,34 +557,14 @@ netmap_disable_ring(struct netmap_kring *kr) nm_kr_put(kr); } -/* stop or enable a single tx ring */ -void -netmap_set_txring(struct netmap_adapter *na, u_int ring_id, int stopped) -{ - if (stopped) - netmap_disable_ring(na->tx_rings + ring_id); - else - na->tx_rings[ring_id].nkr_stopped = 0; - /* nofify that the stopped state has changed. This is currently - *only used by bwrap to propagate the state to its own krings. - * (see netmap_bwrap_intr_notify). - */ - na->nm_notify(na, ring_id, NR_TX, NAF_DISABLE_NOTIFY); -} - -/* stop or enable a single rx ring */ +/* stop or enable a single ring */ void -netmap_set_rxring(struct netmap_adapter *na, u_int ring_id, int stopped) +netmap_set_ring(struct netmap_adapter *na, u_int ring_id, enum txrx t, int stopped) { if (stopped) - netmap_disable_ring(na->rx_rings + ring_id); + netmap_disable_ring(NMR(na, t) + ring_id); else - na->rx_rings[ring_id].nkr_stopped = 0; - /* nofify that the stopped state has changed. This is currently - *only used by bwrap to propagate the state to its own krings. - * (see netmap_bwrap_intr_notify). - */ - na->nm_notify(na, ring_id, NR_RX, NAF_DISABLE_NOTIFY); + NMR(na, t)[ring_id].nkr_stopped = 0; } @@ -531,20 +573,15 @@ void netmap_set_all_rings(struct netmap_adapter *na, int stopped) { int i; - u_int ntx, nrx; + enum txrx t; if (!nm_netmap_on(na)) return; - ntx = netmap_real_tx_rings(na); - nrx = netmap_real_rx_rings(na); - - for (i = 0; i < ntx; i++) { - netmap_set_txring(na, i, stopped); - } - - for (i = 0; i < nrx; i++) { - netmap_set_rxring(na, i, stopped); + for_rx_tx(t) { + for (i = 0; i < netmap_real_rings(na, t); i++) { + netmap_set_ring(na, i, t, stopped); + } } } @@ -657,7 +694,8 @@ netmap_update_config(struct netmap_adapter *na) txr = txd = rxr = rxd = 0; if (na->nm_config == NULL || - na->nm_config(na, &txr, &txd, &rxr, &rxd)) { + na->nm_config(na, &txr, &txd, &rxr, &rxd)) + { /* take whatever we had at init time */ txr = na->num_tx_rings; txd = na->num_tx_desc; @@ -738,73 +776,59 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom) { u_int i, len, ndesc; struct netmap_kring *kring; - u_int ntx, nrx; + u_int n[NR_TXRX]; + enum txrx t; /* account for the (possibly fake) host rings */ - ntx = na->num_tx_rings + 1; - nrx = na->num_rx_rings + 1; + n[NR_TX] = na->num_tx_rings + 1; + n[NR_RX] = na->num_rx_rings + 1; - len = (ntx + nrx) * sizeof(struct netmap_kring) + tailroom; + len = (n[NR_TX] + n[NR_RX]) * sizeof(struct netmap_kring) + tailroom; na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO); if (na->tx_rings == NULL) { D("Cannot allocate krings"); return ENOMEM; } - na->rx_rings = na->tx_rings + ntx; + na->rx_rings = na->tx_rings + n[NR_TX]; /* * All fields in krings are 0 except the one initialized below. * but better be explicit on important kring fields. */ - ndesc = na->num_tx_desc; - for (i = 0; i < ntx; i++) { /* Transmit rings */ - kring = &na->tx_rings[i]; - bzero(kring, sizeof(*kring)); - kring->na = na; - kring->ring_id = i; - kring->nkr_num_slots = ndesc; - if (i < na->num_tx_rings) { - kring->nm_sync = na->nm_txsync; - } else if (i == na->num_tx_rings) { - kring->nm_sync = netmap_txsync_to_host_compat; - } - /* - * IMPORTANT: Always keep one slot empty. - */ - kring->rhead = kring->rcur = kring->nr_hwcur = 0; - kring->rtail = kring->nr_hwtail = ndesc - 1; - snprintf(kring->name, sizeof(kring->name) - 1, "%s TX%d", na->name, i); - ND("ktx %s h %d c %d t %d", - kring->name, kring->rhead, kring->rcur, kring->rtail); - mtx_init(&kring->q_lock, "nm_txq_lock", NULL, MTX_DEF); - init_waitqueue_head(&kring->si); - } - - ndesc = na->num_rx_desc; - for (i = 0; i < nrx; i++) { /* Receive rings */ - kring = &na->rx_rings[i]; - bzero(kring, sizeof(*kring)); - kring->na = na; - kring->ring_id = i; - kring->nkr_num_slots = ndesc; - if (i < na->num_rx_rings) { - kring->nm_sync = na->nm_rxsync; - } else if (i == na->num_rx_rings) { - kring->nm_sync = netmap_rxsync_from_host_compat; + for_rx_tx(t) { + ndesc = nma_get_ndesc(na, t); + for (i = 0; i < n[t]; i++) { + kring = &NMR(na, t)[i]; + bzero(kring, sizeof(*kring)); + kring->na = na; + kring->ring_id = i; + kring->tx = t; + kring->nkr_num_slots = ndesc; + if (i < nma_get_nrings(na, t)) { + kring->nm_sync = (t == NR_TX ? na->nm_txsync : na->nm_rxsync); + } else if (i == na->num_tx_rings) { + kring->nm_sync = (t == NR_TX ? + netmap_txsync_to_host_compat : + netmap_rxsync_from_host_compat); + } + kring->nm_notify = na->nm_notify; + kring->rhead = kring->rcur = kring->nr_hwcur = 0; + /* + * IMPORTANT: Always keep one slot empty. + */ + kring->rtail = kring->nr_hwtail = (t == NR_TX ? ndesc - 1 : 0); + snprintf(kring->name, sizeof(kring->name) - 1, "%s %s%d", na->name, + nm_txrx2str(t), i); + ND("ktx %s h %d c %d t %d", + kring->name, kring->rhead, kring->rcur, kring->rtail); + mtx_init(&kring->q_lock, (t == NR_TX ? "nm_txq_lock" : "nm_rxq_lock"), NULL, MTX_DEF); + init_waitqueue_head(&kring->si); } - kring->rhead = kring->rcur = kring->nr_hwcur = 0; - kring->rtail = kring->nr_hwtail = 0; - snprintf(kring->name, sizeof(kring->name) - 1, "%s RX%d", na->name, i); - ND("krx %s h %d c %d t %d", - kring->name, kring->rhead, kring->rcur, kring->rtail); - mtx_init(&kring->q_lock, "nm_rxq_lock", NULL, MTX_DEF); - init_waitqueue_head(&kring->si); + init_waitqueue_head(&na->si[t]); } - init_waitqueue_head(&na->tx_si); - init_waitqueue_head(&na->rx_si); - na->tailroom = na->rx_rings + nrx; + na->tailroom = na->rx_rings + n[NR_RX]; return 0; } @@ -829,6 +853,10 @@ void netmap_krings_delete(struct netmap_adapter *na) { struct netmap_kring *kring = na->tx_rings; + enum txrx t; + + for_rx_tx(t) + netmap_knlist_destroy(&na->si[t]); /* we rely on the krings layout described above */ for ( ; kring != na->tailroom; kring++) { @@ -858,142 +886,35 @@ netmap_hw_krings_delete(struct netmap_adapter *na) } -/* create a new netmap_if for a newly registered fd. - * If this is the first registration of the adapter, - * also create the netmap rings and their in-kernel view, - * the netmap krings. - */ -/* call with NMG_LOCK held */ -static struct netmap_if* -netmap_if_new(struct netmap_adapter *na) -{ - struct netmap_if *nifp; - - if (netmap_update_config(na)) { - /* configuration mismatch, report and fail */ - return NULL; - } - - if (na->active_fds) /* already registered */ - goto final; - - /* create and init the krings arrays. - * Depending on the adapter, this may also create - * the netmap rings themselves - */ - if (na->nm_krings_create(na)) - return NULL; - - /* create all missing netmap rings */ - if (netmap_mem_rings_create(na)) - goto cleanup; - -final: - - /* in all cases, create a new netmap if */ - nifp = netmap_mem_if_new(na); - if (nifp == NULL) - goto cleanup; - - return (nifp); - -cleanup: - - if (na->active_fds == 0) { - netmap_mem_rings_delete(na); - na->nm_krings_delete(na); - } - - return NULL; -} - - -/* grab a reference to the memory allocator, if we don't have one already. The - * reference is taken from the netmap_adapter registered with the priv. - */ -/* call with NMG_LOCK held */ -static int -netmap_get_memory_locked(struct netmap_priv_d* p) -{ - struct netmap_mem_d *nmd; - int error = 0; - - if (p->np_na == NULL) { - if (!netmap_mmap_unreg) - return ENODEV; - /* for compatibility with older versions of the API - * we use the global allocator when no interface has been - * registered - */ - nmd = &nm_mem; - } else { - nmd = p->np_na->nm_mem; - } - if (p->np_mref == NULL) { - error = netmap_mem_finalize(nmd, p->np_na); - if (!error) - p->np_mref = nmd; - } else if (p->np_mref != nmd) { - /* a virtual port has been registered, but previous - * syscalls already used the global allocator. - * We cannot continue - */ - error = ENODEV; - } - return error; -} - - -/* call with NMG_LOCK *not* held */ -int -netmap_get_memory(struct netmap_priv_d* p) -{ - int error; - NMG_LOCK(); - error = netmap_get_memory_locked(p); - NMG_UNLOCK(); - return error; -} - - -/* call with NMG_LOCK held */ -static int -netmap_have_memory_locked(struct netmap_priv_d* p) -{ - return p->np_mref != NULL; -} - - -/* call with NMG_LOCK held */ -static void -netmap_drop_memory_locked(struct netmap_priv_d* p) -{ - if (p->np_mref) { - netmap_mem_deref(p->np_mref, p->np_na); - p->np_mref = NULL; - } -} - /* - * Call nm_register(ifp,0) to stop netmap mode on the interface and + * Undo everything that was done in netmap_do_regif(). In particular, + * call nm_register(ifp,0) to stop netmap mode on the interface and * revert to normal operation. - * The second argument is the nifp to work on. In some cases it is - * not attached yet to the netmap_priv_d so we need to pass it as - * a separate argument. */ /* call with NMG_LOCK held */ +static void netmap_unset_ringid(struct netmap_priv_d *); +static void netmap_rel_exclusive(struct netmap_priv_d *); static void -netmap_do_unregif(struct netmap_priv_d *priv, struct netmap_if *nifp) +netmap_do_unregif(struct netmap_priv_d *priv) { struct netmap_adapter *na = priv->np_na; NMG_LOCK_ASSERT(); na->active_fds--; + /* release exclusive use if it was requested on regif */ + netmap_rel_exclusive(priv); if (na->active_fds <= 0) { /* last instance */ if (netmap_verbose) D("deleting last instance for %s", na->name); + +#ifdef WITH_MONITOR + /* walk through all the rings and tell any monitor + * that the port is going to exit netmap mode + */ + netmap_monitor_stop(na); +#endif /* * (TO CHECK) This function is only called * when the last reference to this file descriptor goes @@ -1014,37 +935,33 @@ netmap_do_unregif(struct netmap_priv_d *priv, struct netmap_if *nifp) * XXX The wake up now must happen during *_down(), when * we order all activities to stop. -gl */ - netmap_knlist_destroy(&na->tx_si); - netmap_knlist_destroy(&na->rx_si); - /* delete rings and buffers */ netmap_mem_rings_delete(na); na->nm_krings_delete(na); } + /* possibily decrement counter of tx_si/rx_si users */ + netmap_unset_ringid(priv); /* delete the nifp */ - netmap_mem_if_delete(na, nifp); -} - -/* call with NMG_LOCK held */ -static __inline int -nm_tx_si_user(struct netmap_priv_d *priv) -{ - return (priv->np_na != NULL && - (priv->np_txqlast - priv->np_txqfirst > 1)); + netmap_mem_if_delete(na, priv->np_nifp); + /* drop the allocator */ + netmap_mem_deref(na->nm_mem, na); + /* mark the priv as unregistered */ + priv->np_na = NULL; + priv->np_nifp = NULL; } /* call with NMG_LOCK held */ static __inline int -nm_rx_si_user(struct netmap_priv_d *priv) +nm_si_user(struct netmap_priv_d *priv, enum txrx t) { return (priv->np_na != NULL && - (priv->np_rxqlast - priv->np_rxqfirst > 1)); + (priv->np_qlast[t] - priv->np_qfirst[t] > 1)); } - /* * Destructor of the netmap_priv_d, called when the fd has - * no active open() and mmap(). Also called in error paths. + * no active open() and mmap(). + * Undo all the things done by NIOCREGIF. * * returns 1 if this is the last instance and we can free priv */ @@ -1066,17 +983,8 @@ netmap_dtor_locked(struct netmap_priv_d *priv) if (!na) { return 1; //XXX is it correct? } - netmap_do_unregif(priv, priv->np_nifp); - priv->np_nifp = NULL; - netmap_drop_memory_locked(priv); - if (priv->np_na) { - if (nm_tx_si_user(priv)) - na->tx_si_users--; - if (nm_rx_si_user(priv)) - na->rx_si_users--; - netmap_adapter_put(na); - priv->np_na = NULL; - } + netmap_do_unregif(priv); + netmap_adapter_put(na); return 1; } @@ -1148,7 +1056,7 @@ static void netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force) { u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->ring->head; + u_int const head = kring->rhead; u_int n; struct netmap_adapter *na = kring->na; @@ -1235,7 +1143,6 @@ void netmap_txsync_to_host(struct netmap_adapter *na) { struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings]; - struct netmap_ring *ring = kring->ring; u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; struct mbq q; @@ -1246,14 +1153,12 @@ netmap_txsync_to_host(struct netmap_adapter *na) * the queue is drained in all cases. */ mbq_init(&q); - ring->cur = head; netmap_grab_packets(kring, &q, 1 /* force */); ND("have %d pkts in queue", mbq_len(&q)); kring->nr_hwcur = head; kring->nr_hwtail = head + lim; if (kring->nr_hwtail > lim) kring->nr_hwtail -= lim + 1; - nm_txsync_finalize(kring); netmap_send_up(na->ifp, &q); } @@ -1281,11 +1186,13 @@ netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwai u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; int ret = 0; - struct mbq *q = &kring->rx_queue; + struct mbq *q = &kring->rx_queue, fq; (void)pwait; /* disable unused warnings */ (void)td; + mbq_init(&fq); /* fq holds packets to be freed */ + mbq_lock(q); /* First part: import newly received packets */ @@ -1308,7 +1215,7 @@ netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwai slot->len = len; slot->flags = kring->nkr_slot_flags; nm_i = nm_next(nm_i, lim); - m_freem(m); + mbq_enqueue(&fq, m); } kring->nr_hwtail = nm_i; } @@ -1323,13 +1230,15 @@ netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwai kring->nr_hwcur = head; } - nm_rxsync_finalize(kring); - /* access copies of cur,tail in the kring */ if (kring->rcur == kring->rtail && td) /* no bufs available */ OS_selrecord(td, &kring->si); mbq_unlock(q); + + mbq_purge(&fq); + mbq_destroy(&fq); + return ret; } @@ -1363,9 +1272,11 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na) { /* generic support */ int i = netmap_admode; /* Take a snapshot. */ - int error = 0; struct netmap_adapter *prev_na; +#ifdef WITH_GENERIC struct netmap_generic_adapter *gna; + int error = 0; +#endif *na = NULL; /* default */ @@ -1401,6 +1312,7 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na) if (!NETMAP_CAPABLE(ifp) && i == NETMAP_ADMODE_NATIVE) return EOPNOTSUPP; +#ifdef WITH_GENERIC /* Otherwise, create a generic adapter and return it, * saving the previously used netmap adapter, if any. * @@ -1431,6 +1343,9 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na) ND("Created generic NA %p (prev %p)", gna, gna->prev); return 0; +#else /* !WITH_GENERIC */ + return EOPNOTSUPP; +#endif } @@ -1489,7 +1404,7 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create) return error; if (*na != NULL) /* valid match in netmap_get_bdg_na() */ - goto pipes; + goto out; /* * This must be a hardware na, lookup the name in the system. @@ -1509,14 +1424,6 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create) *na = ret; netmap_adapter_get(ret); -pipes: - /* - * If we are opening a pipe whose parent was not in netmap mode, - * we have to allocate the pipe array now. - * XXX get rid of this clumsiness (2014-03-15) - */ - error = netmap_pipe_alloc(*na, nmr); - out: if (error && ret != NULL) netmap_adapter_put(ret); @@ -1541,9 +1448,10 @@ out: * * hwcur, rhead, rtail and hwtail are reliable */ -u_int +static u_int nm_txsync_prologue(struct netmap_kring *kring) { +#define NM_ASSERT(t) if (t) { D("fail " #t); goto error; } struct netmap_ring *ring = kring->ring; u_int head = ring->head; /* read only once */ u_int cur = ring->cur; /* read only once */ @@ -1569,25 +1477,20 @@ nm_txsync_prologue(struct netmap_kring *kring) */ if (kring->rtail >= kring->rhead) { /* want rhead <= head <= rtail */ - if (head < kring->rhead || head > kring->rtail) - goto error; + NM_ASSERT(head < kring->rhead || head > kring->rtail); /* and also head <= cur <= rtail */ - if (cur < head || cur > kring->rtail) - goto error; + NM_ASSERT(cur < head || cur > kring->rtail); } else { /* here rtail < rhead */ /* we need head outside rtail .. rhead */ - if (head > kring->rtail && head < kring->rhead) - goto error; + NM_ASSERT(head > kring->rtail && head < kring->rhead); /* two cases now: head <= rtail or head >= rhead */ if (head <= kring->rtail) { /* want head <= cur <= rtail */ - if (cur < head || cur > kring->rtail) - goto error; + NM_ASSERT(cur < head || cur > kring->rtail); } else { /* head >= rhead */ /* cur must be outside rtail..head */ - if (cur > kring->rtail && cur < head) - goto error; + NM_ASSERT(cur > kring->rtail && cur < head); } } if (ring->tail != kring->rtail) { @@ -1600,12 +1503,13 @@ nm_txsync_prologue(struct netmap_kring *kring) return head; error: - RD(5, "%s kring error: hwcur %d rcur %d hwtail %d cur %d tail %d", + RD(5, "%s kring error: head %d cur %d tail %d rhead %d rcur %d rtail %d hwcur %d hwtail %d", kring->name, - kring->nr_hwcur, - kring->rcur, kring->nr_hwtail, - cur, ring->tail); + head, cur, ring->tail, + kring->rhead, kring->rcur, kring->rtail, + kring->nr_hwcur, kring->nr_hwtail); return n; +#undef NM_ASSERT } @@ -1620,14 +1524,14 @@ error: * hwcur and hwtail are reliable. * */ -u_int +static u_int nm_rxsync_prologue(struct netmap_kring *kring) { struct netmap_ring *ring = kring->ring; uint32_t const n = kring->nkr_num_slots; uint32_t head, cur; - ND("%s kc %d kt %d h %d c %d t %d", + ND(5,"%s kc %d kt %d h %d c %d t %d", kring->name, kring->nr_hwcur, kring->nr_hwtail, ring->head, ring->cur, ring->tail); @@ -1719,7 +1623,7 @@ netmap_ring_reinit(struct netmap_kring *kring) for (i = 0; i <= lim; i++) { u_int idx = ring->slot[i].buf_idx; u_int len = ring->slot[i].len; - if (idx < 2 || idx >= netmap_total_buffers) { + if (idx < 2 || idx >= kring->na->na_lut.objtotal) { RD(5, "bad index at slot %d idx %d len %d ", i, idx, len); ring->slot[i].buf_idx = 0; ring->slot[i].len = 0; @@ -1754,6 +1658,7 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags struct netmap_adapter *na = priv->np_na; u_int j, i = ringid & NETMAP_RING_MASK; u_int reg = flags & NR_REG_MASK; + enum txrx t; if (reg == NR_REG_DEFAULT) { /* convert from old ringid to flags */ @@ -1770,12 +1675,12 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags case NR_REG_ALL_NIC: case NR_REG_PIPE_MASTER: case NR_REG_PIPE_SLAVE: - priv->np_txqfirst = 0; - priv->np_txqlast = na->num_tx_rings; - priv->np_rxqfirst = 0; - priv->np_rxqlast = na->num_rx_rings; + for_rx_tx(t) { + priv->np_qfirst[t] = 0; + priv->np_qlast[t] = nma_get_nrings(na, t); + } ND("%s %d %d", "ALL/PIPE", - priv->np_rxqfirst, priv->np_rxqlast); + priv->np_qfirst[NR_RX], priv->np_qlast[NR_RX]); break; case NR_REG_SW: case NR_REG_NIC_SW: @@ -1783,31 +1688,27 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags D("host rings not supported"); return EINVAL; } - priv->np_txqfirst = (reg == NR_REG_SW ? - na->num_tx_rings : 0); - priv->np_txqlast = na->num_tx_rings + 1; - priv->np_rxqfirst = (reg == NR_REG_SW ? - na->num_rx_rings : 0); - priv->np_rxqlast = na->num_rx_rings + 1; + for_rx_tx(t) { + priv->np_qfirst[t] = (reg == NR_REG_SW ? + nma_get_nrings(na, t) : 0); + priv->np_qlast[t] = nma_get_nrings(na, t) + 1; + } ND("%s %d %d", reg == NR_REG_SW ? "SW" : "NIC+SW", - priv->np_rxqfirst, priv->np_rxqlast); + priv->np_qfirst[NR_RX], priv->np_qlast[NR_RX]); break; case NR_REG_ONE_NIC: if (i >= na->num_tx_rings && i >= na->num_rx_rings) { D("invalid ring id %d", i); return EINVAL; } - /* if not enough rings, use the first one */ - j = i; - if (j >= na->num_tx_rings) - j = 0; - priv->np_txqfirst = j; - priv->np_txqlast = j + 1; - j = i; - if (j >= na->num_rx_rings) - j = 0; - priv->np_rxqfirst = j; - priv->np_rxqlast = j + 1; + for_rx_tx(t) { + /* if not enough rings, use the first one */ + j = i; + if (j >= nma_get_nrings(na, t)) + j = 0; + priv->np_qfirst[t] = j; + priv->np_qlast[t] = j + 1; + } break; default: D("invalid regif type %d", reg); @@ -1818,10 +1719,10 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags if (netmap_verbose) { D("%s: tx [%d,%d) rx [%d,%d) id %d", na->name, - priv->np_txqfirst, - priv->np_txqlast, - priv->np_rxqfirst, - priv->np_rxqlast, + priv->np_qfirst[NR_TX], + priv->np_qlast[NR_TX], + priv->np_qfirst[NR_RX], + priv->np_qlast[NR_RX], i); } return 0; @@ -1837,6 +1738,7 @@ netmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags) { struct netmap_adapter *na = priv->np_na; int error; + enum txrx t; error = netmap_interp_ringid(priv, ringid, flags); if (error) { @@ -1850,13 +1752,109 @@ netmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags) * The default netmap_notify() callback will then * avoid signaling the global queue if nobody is using it */ - if (nm_tx_si_user(priv)) - na->tx_si_users++; - if (nm_rx_si_user(priv)) - na->rx_si_users++; + for_rx_tx(t) { + if (nm_si_user(priv, t)) + na->si_users[t]++; + } return 0; } +static void +netmap_unset_ringid(struct netmap_priv_d *priv) +{ + struct netmap_adapter *na = priv->np_na; + enum txrx t; + + for_rx_tx(t) { + if (nm_si_user(priv, t)) + na->si_users[t]--; + priv->np_qfirst[t] = priv->np_qlast[t] = 0; + } + priv->np_flags = 0; + priv->np_txpoll = 0; +} + + +/* check that the rings we want to bind are not exclusively owned by a previous + * bind. If exclusive ownership has been requested, we also mark the rings. + */ +static int +netmap_get_exclusive(struct netmap_priv_d *priv) +{ + struct netmap_adapter *na = priv->np_na; + u_int i; + struct netmap_kring *kring; + int excl = (priv->np_flags & NR_EXCLUSIVE); + enum txrx t; + + ND("%s: grabbing tx [%d, %d) rx [%d, %d)", + na->name, + priv->np_qfirst[NR_TX], + priv->np_qlast[NR_TX], + priv->np_qfirst[NR_RX], + priv->np_qlast[NR_RX]); + + /* first round: check that all the requested rings + * are neither alread exclusively owned, nor we + * want exclusive ownership when they are already in use + */ + for_rx_tx(t) { + for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) { + kring = &NMR(na, t)[i]; + if ((kring->nr_kflags & NKR_EXCLUSIVE) || + (kring->users && excl)) + { + ND("ring %s busy", kring->name); + return EBUSY; + } + } + } + + /* second round: increment usage cound and possibly + * mark as exclusive + */ + + for_rx_tx(t) { + for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) { + kring = &NMR(na, t)[i]; + kring->users++; + if (excl) + kring->nr_kflags |= NKR_EXCLUSIVE; + } + } + + return 0; + +} + +/* undo netmap_get_ownership() */ +static void +netmap_rel_exclusive(struct netmap_priv_d *priv) +{ + struct netmap_adapter *na = priv->np_na; + u_int i; + struct netmap_kring *kring; + int excl = (priv->np_flags & NR_EXCLUSIVE); + enum txrx t; + + ND("%s: releasing tx [%d, %d) rx [%d, %d)", + na->name, + priv->np_qfirst[NR_TX], + priv->np_qlast[NR_TX], + priv->np_qfirst[NR_RX], + priv->np_qlast[MR_RX]); + + + for_rx_tx(t) { + for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) { + kring = &NMR(na, t)[i]; + if (excl) + kring->nr_kflags &= ~NKR_EXCLUSIVE; + kring->users--; + } + } +} + /* * possibly move the interface to netmap-mode. * If success it returns a pointer to netmap_if, otherwise NULL. @@ -1871,9 +1869,8 @@ netmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags) * The bwrap has to override this, since it has to forward * the request to the wrapped adapter (netmap_bwrap_config). * - * XXX netmap_if_new calls this again (2014-03-15) * - * na->nm_krings_create() [by netmap_if_new] + * na->nm_krings_create() * (create and init the krings array) * * One of the following: @@ -1927,15 +1924,14 @@ netmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags) * the hwna notify callback (to get the frames * coming from outside go through the bridge). * - * XXX maybe netmap_if_new() should be merged with this (2014-03-15). * */ -struct netmap_if * +int netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, - uint16_t ringid, uint32_t flags, int *err) + uint16_t ringid, uint32_t flags) { struct netmap_if *nifp = NULL; - int error, need_mem = 0; + int error; NMG_LOCK_ASSERT(); /* ring configuration may have changed, fetch from the card */ @@ -1943,57 +1939,121 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, priv->np_na = na; /* store the reference */ error = netmap_set_ringid(priv, ringid, flags); if (error) - goto out; - /* ensure allocators are ready */ - need_mem = !netmap_have_memory_locked(priv); - if (need_mem) { - error = netmap_get_memory_locked(priv); - ND("get_memory returned %d", error); + goto err; + error = netmap_mem_finalize(na->nm_mem, na); + if (error) + goto err; + + if (na->active_fds == 0) { + /* + * If this is the first registration of the adapter, + * also create the netmap rings and their in-kernel view, + * the netmap krings. + */ + + /* + * Depending on the adapter, this may also create + * the netmap rings themselves + */ + error = na->nm_krings_create(na); + if (error) + goto err_drop_mem; + + /* create all missing netmap rings */ + error = netmap_mem_rings_create(na); if (error) - goto out; + goto err_del_krings; } - /* Allocate a netmap_if and, if necessary, all the netmap_ring's */ - nifp = netmap_if_new(na); - if (nifp == NULL) { /* allocation failed */ + + /* now the kring must exist and we can check whether some + * previous bind has exclusive ownership on them + */ + error = netmap_get_exclusive(priv); + if (error) + goto err_del_rings; + + /* in all cases, create a new netmap if */ + nifp = netmap_mem_if_new(na); + if (nifp == NULL) { error = ENOMEM; - goto out; + goto err_rel_excl; } + na->active_fds++; if (!nm_netmap_on(na)) { /* Netmap not active, set the card in netmap mode * and make it use the shared buffers. */ /* cache the allocator info in the na */ - na->na_lut = netmap_mem_get_lut(na->nm_mem); - ND("%p->na_lut == %p", na, na->na_lut); - na->na_lut_objtotal = netmap_mem_get_buftotal(na->nm_mem); - na->na_lut_objsize = netmap_mem_get_bufsize(na->nm_mem); + netmap_mem_get_lut(na->nm_mem, &na->na_lut); + ND("%p->na_lut == %p", na, na->na_lut.lut); error = na->nm_register(na, 1); /* mode on */ - if (error) { - netmap_do_unregif(priv, nifp); - nifp = NULL; - } - } -out: - *err = error; - if (error) { - /* we should drop the allocator, but only - * if we were the ones who grabbed it - */ - if (need_mem) - netmap_drop_memory_locked(priv); - priv->np_na = NULL; - } - if (nifp != NULL) { - /* - * advertise that the interface is ready bt setting ni_nifp. - * The barrier is needed because readers (poll and *SYNC) - * check for priv->np_nifp != NULL without locking - */ - wmb(); /* make sure previous writes are visible to all CPUs */ - priv->np_nifp = nifp; + if (error) + goto err_del_if; } - return nifp; + + /* + * advertise that the interface is ready by setting np_nifp. + * The barrier is needed because readers (poll, *SYNC and mmap) + * check for priv->np_nifp != NULL without locking + */ + mb(); /* make sure previous writes are visible to all CPUs */ + priv->np_nifp = nifp; + + return 0; + +err_del_if: + memset(&na->na_lut, 0, sizeof(na->na_lut)); + na->active_fds--; + netmap_mem_if_delete(na, nifp); +err_rel_excl: + netmap_rel_exclusive(priv); +err_del_rings: + if (na->active_fds == 0) + netmap_mem_rings_delete(na); +err_del_krings: + if (na->active_fds == 0) + na->nm_krings_delete(na); +err_drop_mem: + netmap_mem_deref(na->nm_mem, na); +err: + priv->np_na = NULL; + return error; +} + + +/* + * update kring and ring at the end of txsync. + */ +static inline void +nm_txsync_finalize(struct netmap_kring *kring) +{ + /* update ring tail to what the kernel knows */ + kring->ring->tail = kring->rtail = kring->nr_hwtail; + + /* note, head/rhead/hwcur might be behind cur/rcur + * if no carrier + */ + ND(5, "%s now hwcur %d hwtail %d head %d cur %d tail %d", + kring->name, kring->nr_hwcur, kring->nr_hwtail, + kring->rhead, kring->rcur, kring->rtail); +} + + +/* + * update kring and ring at the end of rxsync + */ +static inline void +nm_rxsync_finalize(struct netmap_kring *kring) +{ + /* tell userspace that there might be new packets */ + //struct netmap_ring *ring = kring->ring; + ND("head %d cur %d tail %d -> %d", ring->head, ring->cur, ring->tail, + kring->nr_hwtail); + kring->ring->tail = kring->rtail = kring->nr_hwtail; + /* make a copy of the state for next round */ + kring->rhead = kring->ring->head; + kring->rcur = kring->ring->cur; } @@ -2021,6 +2081,7 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, u_int i, qfirst, qlast; struct netmap_if *nifp; struct netmap_kring *krings; + enum txrx t; (void)dev; /* UNUSED */ (void)fflag; /* UNUSED */ @@ -2108,7 +2169,7 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, do { u_int memflags; - if (priv->np_na != NULL) { /* thread already registered */ + if (priv->np_nifp != NULL) { /* thread already registered */ error = EBUSY; break; } @@ -2121,12 +2182,12 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, error = EBUSY; break; } - nifp = netmap_do_regif(priv, na, nmr->nr_ringid, nmr->nr_flags, &error); - if (!nifp) { /* reg. failed, release priv and ref */ + error = netmap_do_regif(priv, na, nmr->nr_ringid, nmr->nr_flags); + if (error) { /* reg. failed, release priv and ref */ netmap_adapter_put(na); - priv->np_nifp = NULL; break; } + nifp = priv->np_nifp; priv->np_td = td; // XXX kqueue, debugging only /* return the offset of the netmap_if object */ @@ -2137,16 +2198,17 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags, &nmr->nr_arg2); if (error) { + netmap_do_unregif(priv); netmap_adapter_put(na); break; } if (memflags & NETMAP_MEM_PRIVATE) { *(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM; } - priv->np_txsi = (priv->np_txqlast - priv->np_txqfirst > 1) ? - &na->tx_si : &na->tx_rings[priv->np_txqfirst].si; - priv->np_rxsi = (priv->np_rxqlast - priv->np_rxqfirst > 1) ? - &na->rx_si : &na->rx_rings[priv->np_rxqfirst].si; + for_rx_tx(t) { + priv->np_si[t] = nm_si_user(priv, t) ? + &na->si[t] : &NMR(na, t)[priv->np_qfirst[t]].si; + } if (nmr->nr_arg3) { D("requested %d extra buffers", nmr->nr_arg3); @@ -2182,15 +2244,10 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, break; } - if (cmd == NIOCTXSYNC) { - krings = na->tx_rings; - qfirst = priv->np_txqfirst; - qlast = priv->np_txqlast; - } else { - krings = na->rx_rings; - qfirst = priv->np_rxqfirst; - qlast = priv->np_rxqlast; - } + t = (cmd == NIOCTXSYNC ? NR_TX : NR_RX); + krings = NMR(na, t); + qfirst = priv->np_qfirst[t]; + qlast = priv->np_qlast[t]; for (i = qfirst; i < qlast; i++) { struct netmap_kring *kring = krings + i; @@ -2205,15 +2262,19 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, kring->nr_hwcur); if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) { netmap_ring_reinit(kring); - } else { - kring->nm_sync(kring, NAF_FORCE_RECLAIM); + } else if (kring->nm_sync(kring, NAF_FORCE_RECLAIM) == 0) { + nm_txsync_finalize(kring); } if (netmap_verbose & NM_VERB_TXSYNC) D("post txsync ring %d cur %d hwcur %d", i, kring->ring->cur, kring->nr_hwcur); } else { - kring->nm_sync(kring, NAF_FORCE_READ); + if (nm_rxsync_prologue(kring) >= kring->nkr_num_slots) { + netmap_ring_reinit(kring); + } else if (kring->nm_sync(kring, NAF_FORCE_READ) == 0) { + nm_rxsync_finalize(kring); + } microtime(&na->rx_rings[i].ring->ts); } nm_kr_put(kring); @@ -2221,9 +2282,11 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, break; +#ifdef WITH_VALE case NIOCCONFIG: error = netmap_bdg_config(nmr); break; +#endif #ifdef __FreeBSD__ case FIONBIO: case FIOASYNC: @@ -2286,10 +2349,13 @@ netmap_poll(struct cdev *dev, int events, struct thread *td) struct netmap_priv_d *priv = NULL; struct netmap_adapter *na; struct netmap_kring *kring; - u_int i, check_all_tx, check_all_rx, want_tx, want_rx, revents = 0; + u_int i, check_all_tx, check_all_rx, want[NR_TXRX], revents = 0; +#define want_tx want[NR_TX] +#define want_rx want[NR_RX] struct mbq q; /* packets from hw queues to host stack */ void *pwait = dev; /* linux compatibility */ int is_kevent = 0; + enum txrx t; /* * In order to avoid nested locks, we need to "double check" @@ -2320,7 +2386,7 @@ netmap_poll(struct cdev *dev, int events, struct thread *td) D("No if registered"); return POLLERR; } - rmb(); /* make sure following reads are not from cache */ + mb(); /* make sure following reads are not from cache */ na = priv->np_na; @@ -2346,28 +2412,22 @@ netmap_poll(struct cdev *dev, int events, struct thread *td) * there are pending packets to send. The latter can be disabled * passing NETMAP_NO_TX_POLL in the NIOCREG call. */ - check_all_tx = nm_tx_si_user(priv); - check_all_rx = nm_rx_si_user(priv); + check_all_tx = nm_si_user(priv, NR_TX); + check_all_rx = nm_si_user(priv, NR_RX); /* * We start with a lock free round which is cheap if we have * slots available. If this fails, then lock and call the sync * routines. */ - for (i = priv->np_rxqfirst; want_rx && i < priv->np_rxqlast; i++) { - kring = &na->rx_rings[i]; - /* XXX compare ring->cur and kring->tail */ - if (!nm_ring_empty(kring->ring)) { - revents |= want_rx; - want_rx = 0; /* also breaks the loop */ - } - } - for (i = priv->np_txqfirst; want_tx && i < priv->np_txqlast; i++) { - kring = &na->tx_rings[i]; - /* XXX compare ring->cur and kring->tail */ - if (!nm_ring_empty(kring->ring)) { - revents |= want_tx; - want_tx = 0; /* also breaks the loop */ + for_rx_tx(t) { + for (i = priv->np_qfirst[t]; want[t] && i < priv->np_qlast[t]; i++) { + kring = &NMR(na, t)[i]; + /* XXX compare ring->cur and kring->tail */ + if (!nm_ring_empty(kring->ring)) { + revents |= want[t]; + want[t] = 0; /* also breaks the loop */ + } } } @@ -2386,7 +2446,7 @@ netmap_poll(struct cdev *dev, int events, struct thread *td) * used to skip rings with no pending transmissions. */ flush_tx: - for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) { + for (i = priv->np_qfirst[NR_TX]; i < priv->np_qlast[NR_RX]; i++) { int found = 0; kring = &na->tx_rings[i]; @@ -2410,6 +2470,8 @@ flush_tx: } else { if (kring->nm_sync(kring, 0)) revents |= POLLERR; + else + nm_txsync_finalize(kring); } /* @@ -2423,12 +2485,12 @@ flush_tx: if (found) { /* notify other listeners */ revents |= want_tx; want_tx = 0; - na->nm_notify(na, i, NR_TX, 0); + kring->nm_notify(kring, 0); } } if (want_tx && retry_tx && !is_kevent) { OS_selrecord(td, check_all_tx ? - &na->tx_si : &na->tx_rings[priv->np_txqfirst].si); + &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]].si); retry_tx = 0; goto flush_tx; } @@ -2442,7 +2504,7 @@ flush_tx: int send_down = 0; /* transparent mode */ /* two rounds here for race avoidance */ do_retry_rx: - for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) { + for (i = priv->np_qfirst[NR_RX]; i < priv->np_qlast[NR_RX]; i++) { int found = 0; kring = &na->rx_rings[i]; @@ -2454,6 +2516,12 @@ do_retry_rx: continue; } + if (nm_rxsync_prologue(kring) >= kring->nkr_num_slots) { + netmap_ring_reinit(kring); + revents |= POLLERR; + } + /* now we can use kring->rcur, rtail */ + /* * transparent mode support: collect packets * from the rxring(s). @@ -2468,17 +2536,18 @@ do_retry_rx: if (kring->nm_sync(kring, 0)) revents |= POLLERR; + else + nm_rxsync_finalize(kring); if (netmap_no_timestamp == 0 || kring->ring->flags & NR_TIMESTAMP) { microtime(&kring->ring->ts); } - /* after an rxsync we can use kring->rcur, rtail */ found = kring->rcur != kring->rtail; nm_kr_put(kring); if (found) { revents |= want_rx; retry_rx = 0; - na->nm_notify(na, i, NR_RX, 0); + kring->nm_notify(kring, 0); } } @@ -2497,7 +2566,7 @@ do_retry_rx: if (retry_rx && !is_kevent) OS_selrecord(td, check_all_rx ? - &na->rx_si : &na->rx_rings[priv->np_rxqfirst].si); + &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si); if (send_down > 0 || retry_rx) { retry_rx = 0; if (send_down) @@ -2523,6 +2592,8 @@ do_retry_rx: netmap_send_up(na->ifp, &q); return (revents); +#undef want_tx +#undef want_rx } @@ -2532,27 +2603,19 @@ static int netmap_hw_krings_create(struct netmap_adapter *); /* default notify callback */ static int -netmap_notify(struct netmap_adapter *na, u_int n_ring, - enum txrx tx, int flags) +netmap_notify(struct netmap_kring *kring, int flags) { - struct netmap_kring *kring; + struct netmap_adapter *na = kring->na; + enum txrx t = kring->tx; + + OS_selwakeup(&kring->si, PI_NET); + /* optimization: avoid a wake up on the global + * queue if nobody has registered for more + * than one ring + */ + if (na->si_users[t] > 0) + OS_selwakeup(&na->si[t], PI_NET); - if (tx == NR_TX) { - kring = na->tx_rings + n_ring; - OS_selwakeup(&kring->si, PI_NET); - /* optimization: avoid a wake up on the global - * queue if nobody has registered for more - * than one ring - */ - if (na->tx_si_users > 0) - OS_selwakeup(&na->tx_si, PI_NET); - } else { - kring = na->rx_rings + n_ring; - OS_selwakeup(&kring->si, PI_NET); - /* optimization: same as above */ - if (na->rx_si_users > 0) - OS_selwakeup(&na->rx_si, PI_NET); - } return 0; } @@ -2605,11 +2668,14 @@ netmap_attach_common(struct netmap_adapter *na) if (na->nm_mem == NULL) /* use the global allocator */ na->nm_mem = &nm_mem; + netmap_mem_get(na->nm_mem); +#ifdef WITH_VALE if (na->nm_bdg_attach == NULL) /* no special nm_bdg_attach callback. On VALE * attach, we need to interpose a bwrap */ na->nm_bdg_attach = netmap_bwrap_attach; +#endif return 0; } @@ -2626,8 +2692,8 @@ netmap_detach_common(struct netmap_adapter *na) na->nm_krings_delete(na); } netmap_pipe_dealloc(na); - if (na->na_flags & NAF_MEM_OWNER) - netmap_mem_private_delete(na->nm_mem); + if (na->nm_mem) + netmap_mem_put(na->nm_mem); bzero(na, sizeof(*na)); free(na, M_DEVBUF); } @@ -2678,7 +2744,7 @@ netmap_attach(struct netmap_adapter *arg) if (hwna == NULL) goto fail; hwna->up = *arg; - hwna->up.na_flags |= NAF_HOST_RINGS; + hwna->up.na_flags |= NAF_HOST_RINGS | NAF_NATIVE; strncpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name)); hwna->nm_hw_register = hwna->up.nm_register; hwna->up.nm_register = netmap_hw_register; @@ -2691,7 +2757,7 @@ netmap_attach(struct netmap_adapter *arg) #ifdef linux if (ifp->netdev_ops) { /* prepare a clone of the netdev ops */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28) +#ifndef NETMAP_LINUX_HAVE_NETDEV_OPS hwna->nm_ndo.ndo_start_xmit = ifp->netdev_ops; #else hwna->nm_ndo = *ifp->netdev_ops; @@ -2702,7 +2768,7 @@ netmap_attach(struct netmap_adapter *arg) hwna->nm_eto = *ifp->ethtool_ops; } hwna->nm_eto.set_ringparam = linux_netmap_set_ringparam; -#ifdef ETHTOOL_SCHANNELS +#ifdef NETMAP_LINUX_HAVE_SET_CHANNELS hwna->nm_eto.set_channels = linux_netmap_set_channels; #endif if (arg->nm_config == NULL) { @@ -2710,17 +2776,9 @@ netmap_attach(struct netmap_adapter *arg) } #endif /* linux */ -#ifdef __FreeBSD__ if_printf(ifp, "netmap queues/slots: TX %d/%d, RX %d/%d\n", hwna->up.num_tx_rings, hwna->up.num_tx_desc, hwna->up.num_rx_rings, hwna->up.num_rx_desc); -#else - D("success for %s tx %d/%d rx %d/%d queues/slots", - hwna->up.name, - hwna->up.num_tx_rings, hwna->up.num_tx_desc, - hwna->up.num_rx_rings, hwna->up.num_rx_desc - ); -#endif return 0; fail: @@ -2788,16 +2846,19 @@ netmap_detach(struct ifnet *ifp) NMG_LOCK(); netmap_disable_all_rings(ifp); - if (!netmap_adapter_put(na)) { - /* someone is still using the adapter, - * tell them that the interface is gone - */ - na->ifp = NULL; - // XXX also clear NAF_NATIVE_ON ? - na->na_flags &= ~NAF_NETMAP_ON; - /* give them a chance to notice */ - netmap_enable_all_rings(ifp); + na->ifp = NULL; + na->na_flags &= ~NAF_NETMAP_ON; + /* + * if the netmap adapter is not native, somebody + * changed it, so we can not release it here. + * The NULL na->ifp will notify the new owner that + * the driver is gone. + */ + if (na->na_flags & NAF_NATIVE) { + netmap_adapter_put(na); } + /* give them a chance to notice */ + netmap_enable_all_rings(ifp); NMG_UNLOCK(); } @@ -2824,6 +2885,7 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m) struct mbq *q; int space; + kring = &na->rx_rings[na->num_rx_rings]; // XXX [Linux] we do not need this lock // if we follow the down/configure/up protocol -gl // mtx_lock(&na->core_lock); @@ -2834,7 +2896,6 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m) goto done; } - kring = &na->rx_rings[na->num_rx_rings]; q = &kring->rx_queue; // XXX reconsider long packets if we handle fragments @@ -2872,7 +2933,7 @@ done: if (m) m_freem(m); /* unconditionally wake up listeners */ - na->nm_notify(na, na->num_rx_rings, NR_RX, 0); + kring->nm_notify(kring, 0); /* this is normally netmap_notify(), but for nics * connected to a bridge it is netmap_bwrap_intr_notify(), * that possibly forwards the frames through the switch @@ -2953,7 +3014,7 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n, * We do the wakeup here, but the ring is not yet reconfigured. * However, we are under lock so there are no races. */ - na->nm_notify(na, n, tx, 0); + kring->nm_notify(kring, 0); return kring->ring->slot; } @@ -2977,6 +3038,7 @@ netmap_common_irq(struct ifnet *ifp, u_int q, u_int *work_done) { struct netmap_adapter *na = NA(ifp); struct netmap_kring *kring; + enum txrx t = (work_done ? NR_RX : NR_TX); q &= NETMAP_RING_MASK; @@ -2984,19 +3046,16 @@ netmap_common_irq(struct ifnet *ifp, u_int q, u_int *work_done) RD(5, "received %s queue %d", work_done ? "RX" : "TX" , q); } - if (work_done) { /* RX path */ - if (q >= na->num_rx_rings) - return; // not a physical queue - kring = na->rx_rings + q; + if (q >= nma_get_nrings(na, t)) + return; // not a physical queue + + kring = NMR(na, t) + q; + + if (t == NR_RX) { kring->nr_kflags |= NKR_PENDINTR; // XXX atomic ? - na->nm_notify(na, q, NR_RX, 0); *work_done = 1; /* do not fire napi again */ - } else { /* TX path */ - if (q >= na->num_tx_rings) - return; // not a physical queue - kring = na->tx_rings + q; - na->nm_notify(na, q, NR_TX, 0); } + kring->nm_notify(kring, 0); } @@ -3057,7 +3116,7 @@ extern struct cdevsw netmap_cdevsw; void netmap_fini(void) { - // XXX destroy_bridges() ? + netmap_uninit_bridges(); if (netmap_dev) destroy_dev(netmap_dev); netmap_mem_fini(); @@ -3087,10 +3146,14 @@ netmap_init(void) if (!netmap_dev) goto fail; - netmap_init_bridges(); + error = netmap_init_bridges(); + if (error) + goto fail; + #ifdef __FreeBSD__ nm_vi_init_index(); #endif + printf("netmap: loaded module\n"); return (0); fail: diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c index 968724854ea6..ebac6b0efe21 100644 --- a/sys/dev/netmap/netmap_freebsd.c +++ b/sys/dev/netmap/netmap_freebsd.c @@ -24,6 +24,8 @@ */ /* $FreeBSD$ */ +#include "opt_inet.h" +#include "opt_inet6.h" #include <sys/types.h> #include <sys/module.h> @@ -148,9 +150,9 @@ nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data, * Second argument is non-zero to intercept, 0 to restore */ int -netmap_catch_rx(struct netmap_adapter *na, int intercept) +netmap_catch_rx(struct netmap_generic_adapter *gna, int intercept) { - struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; + struct netmap_adapter *na = &gna->up.up; struct ifnet *ifp = na->ifp; if (intercept) { @@ -183,7 +185,7 @@ void netmap_catch_tx(struct netmap_generic_adapter *gna, int enable) { struct netmap_adapter *na = &gna->up.up; - struct ifnet *ifp = na->ifp; + struct ifnet *ifp = netmap_generic_getifp(gna); if (enable) { na->if_transmit = ifp->if_transmit; @@ -494,6 +496,7 @@ netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, { struct netmap_vm_handle_t *vmh = object->handle; struct netmap_priv_d *priv = vmh->priv; + struct netmap_adapter *na = priv->np_na; vm_paddr_t paddr; vm_page_t page; vm_memattr_t memattr; @@ -503,7 +506,7 @@ netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, object, (intmax_t)offset, prot, mres); memattr = object->memattr; pidx = OFF_TO_IDX(offset); - paddr = netmap_mem_ofstophys(priv->np_mref, offset); + paddr = netmap_mem_ofstophys(na->nm_mem, offset); if (paddr == 0) return VM_PAGER_FAIL; @@ -568,14 +571,14 @@ netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff, error = devfs_get_cdevpriv((void**)&priv); if (error) goto err_unlock; + if (priv->np_nifp == NULL) { + error = EINVAL; + goto err_unlock; + } vmh->priv = priv; priv->np_refcount++; NMG_UNLOCK(); - error = netmap_get_memory(priv); - if (error) - goto err_deref; - obj = cdev_pager_allocate(vmh, OBJT_DEVICE, &netmap_cdev_pager_ops, objsize, prot, *foff, NULL); @@ -598,8 +601,18 @@ err_unlock: return error; } - -// XXX can we remove this ? +/* + * netmap_close() is called on every close(), but we do not need to do + * anything at that moment, since the process may have other open file + * descriptors for /dev/netmap. Instead, we pass netmap_dtor() to + * devfs_set_cdevpriv() on open(). The FreeBSD kernel will call the destructor + * when the last fd pointing to the device is closed. + * + * Unfortunately, FreeBSD does not automatically track active mmap()s on an fd, + * so we have to track them by ourselvesi (see above). The result is that + * netmap_dtor() is called when the process has no open fds and no active + * memory maps on /dev/netmap, as in linux. + */ static int netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td) { @@ -673,7 +686,7 @@ static void netmap_knrdetach(struct knote *kn) { struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook; - struct selinfo *si = &priv->np_rxsi->si; + struct selinfo *si = &priv->np_si[NR_RX]->si; D("remove selinfo %p", si); knlist_remove(&si->si_note, kn, 0); @@ -683,7 +696,7 @@ static void netmap_knwdetach(struct knote *kn) { struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook; - struct selinfo *si = &priv->np_txsi->si; + struct selinfo *si = &priv->np_si[NR_TX]->si; D("remove selinfo %p", si); knlist_remove(&si->si_note, kn, 0); @@ -773,7 +786,7 @@ netmap_kqfilter(struct cdev *dev, struct knote *kn) return 1; } /* the si is indicated in the priv */ - si = (ev == EVFILT_WRITE) ? priv->np_txsi : priv->np_rxsi; + si = priv->np_si[(ev == EVFILT_WRITE) ? NR_TX : NR_RX]; // XXX lock(priv) ? kn->kn_fop = (ev == EVFILT_WRITE) ? &netmap_wfiltops : &netmap_rfiltops; diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c index ecdb36824077..bc5b452cfaae 100644 --- a/sys/dev/netmap/netmap_generic.c +++ b/sys/dev/netmap/netmap_generic.c @@ -305,7 +305,7 @@ generic_netmap_register(struct netmap_adapter *na, int enable) } rtnl_lock(); /* Prepare to intercept incoming traffic. */ - error = netmap_catch_rx(na, 1); + error = netmap_catch_rx(gna, 1); if (error) { D("netdev_rx_handler_register() failed (%d)", error); goto register_handler; @@ -342,7 +342,7 @@ generic_netmap_register(struct netmap_adapter *na, int enable) netmap_catch_tx(gna, 0); /* Do not intercept packets on the rx path. */ - netmap_catch_rx(na, 0); + netmap_catch_rx(gna, 0); rtnl_unlock(); @@ -645,8 +645,6 @@ generic_netmap_txsync(struct netmap_kring *kring, int flags) generic_netmap_tx_clean(kring); - nm_txsync_finalize(kring); - return 0; } @@ -711,7 +709,7 @@ generic_netmap_rxsync(struct netmap_kring *kring, int flags) u_int nm_i; /* index into the netmap ring */ //j, u_int n; u_int const lim = kring->nkr_num_slots - 1; - u_int const head = nm_rxsync_prologue(kring); + u_int const head = kring->rhead; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; if (head > lim) @@ -774,8 +772,6 @@ generic_netmap_rxsync(struct netmap_kring *kring, int flags) } kring->nr_hwcur = head; } - /* tell userspace that there might be new packets. */ - nm_rxsync_finalize(kring); IFRATE(rate_ctx.new.rxsync++); return 0; @@ -784,20 +780,25 @@ generic_netmap_rxsync(struct netmap_kring *kring, int flags) static void generic_netmap_dtor(struct netmap_adapter *na) { - struct ifnet *ifp = na->ifp; struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na; + struct ifnet *ifp = netmap_generic_getifp(gna); struct netmap_adapter *prev_na = gna->prev; if (prev_na != NULL) { D("Released generic NA %p", gna); - if_rele(na->ifp); + if_rele(ifp); netmap_adapter_put(prev_na); + if (na->ifp == NULL) { + /* + * The driver has been removed without releasing + * the reference so we need to do it here. + */ + netmap_adapter_put(prev_na); + } } - if (ifp != NULL) { - WNA(ifp) = prev_na; - D("Restored native NA %p", prev_na); - na->ifp = NULL; - } + WNA(ifp) = prev_na; + D("Restored native NA %p", prev_na); + na->ifp = NULL; } /* @@ -834,6 +835,7 @@ generic_netmap_attach(struct ifnet *ifp) return ENOMEM; } na = (struct netmap_adapter *)gna; + strncpy(na->name, ifp->if_xname, sizeof(na->name)); na->ifp = ifp; na->num_tx_desc = num_tx_desc; na->num_rx_desc = num_rx_desc; diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h index 95b3a5deda69..fd715cd1378f 100644 --- a/sys/dev/netmap/netmap_kern.h +++ b/sys/dev/netmap/netmap_kern.h @@ -34,26 +34,46 @@ #ifndef _NET_NETMAP_KERN_H_ #define _NET_NETMAP_KERN_H_ +#if defined(linux) + +#if defined(CONFIG_NETMAP_VALE) +#define WITH_VALE +#endif +#if defined(CONFIG_NETMAP_PIPE) +#define WITH_PIPES +#endif +#if defined(CONFIG_NETMAP_MONITOR) +#define WITH_MONITOR +#endif +#if defined(CONFIG_NETMAP_GENERIC) +#define WITH_GENERIC +#endif +#if defined(CONFIG_NETMAP_V1000) +#define WITH_V1000 +#endif + +#else /* not linux */ + #define WITH_VALE // comment out to disable VALE support #define WITH_PIPES #define WITH_MONITOR #define WITH_GENERIC +#endif + #if defined(__FreeBSD__) #define likely(x) __builtin_expect((long)!!(x), 1L) #define unlikely(x) __builtin_expect((long)!!(x), 0L) -#define NM_LOCK_T struct mtx +#define NM_LOCK_T struct mtx /* low level spinlock, used to protect queues */ -/* netmap global lock */ -#define NMG_LOCK_T struct sx -#define NMG_LOCK_INIT() sx_init(&netmap_global_lock, \ - "netmap global lock") -#define NMG_LOCK_DESTROY() sx_destroy(&netmap_global_lock) -#define NMG_LOCK() sx_xlock(&netmap_global_lock) -#define NMG_UNLOCK() sx_xunlock(&netmap_global_lock) -#define NMG_LOCK_ASSERT() sx_assert(&netmap_global_lock, SA_XLOCKED) +#define NM_MTX_T struct sx /* OS-specific mutex (sleepable) */ +#define NM_MTX_INIT(m) sx_init(&(m), #m) +#define NM_MTX_DESTROY(m) sx_destroy(&(m)) +#define NM_MTX_LOCK(m) sx_xlock(&(m)) +#define NM_MTX_UNLOCK(m) sx_xunlock(&(m)) +#define NM_MTX_ASSERT(m) sx_assert(&(m), SA_XLOCKED) #define NM_SELINFO_T struct nm_selinfo #define MBUF_LEN(m) ((m)->m_pkthdr.len) @@ -102,6 +122,8 @@ struct ethtool_ops { }; struct hrtimer { }; +#define NM_BNS_GET(b) +#define NM_BNS_PUT(b) #elif defined (linux) @@ -117,20 +139,12 @@ struct hrtimer { #define NM_ATOMIC_T volatile long unsigned int -#define NM_MTX_T struct mutex -#define NM_MTX_INIT(m, s) do { (void)s; mutex_init(&(m)); } while (0) -#define NM_MTX_DESTROY(m) do { (void)m; } while (0) +#define NM_MTX_T struct mutex /* OS-specific sleepable lock */ +#define NM_MTX_INIT(m) mutex_init(&(m)) +#define NM_MTX_DESTROY(m) do { (void)(m); } while (0) #define NM_MTX_LOCK(m) mutex_lock(&(m)) #define NM_MTX_UNLOCK(m) mutex_unlock(&(m)) -#define NM_MTX_LOCK_ASSERT(m) mutex_is_locked(&(m)) - -#define NMG_LOCK_T NM_MTX_T -#define NMG_LOCK_INIT() NM_MTX_INIT(netmap_global_lock, \ - "netmap_global_lock") -#define NMG_LOCK_DESTROY() NM_MTX_DESTROY(netmap_global_lock) -#define NMG_LOCK() NM_MTX_LOCK(netmap_global_lock) -#define NMG_UNLOCK() NM_MTX_UNLOCK(netmap_global_lock) -#define NMG_LOCK_ASSERT() NM_MTX_LOCK_ASSERT(netmap_global_lock) +#define NM_MTX_ASSERT(m) mutex_is_locked(&(m)) #ifndef DEV_NETMAP #define DEV_NETMAP @@ -152,6 +166,13 @@ struct hrtimer { #endif /* end - platform-specific code */ +#define NMG_LOCK_T NM_MTX_T +#define NMG_LOCK_INIT() NM_MTX_INIT(netmap_global_lock) +#define NMG_LOCK_DESTROY() NM_MTX_DESTROY(netmap_global_lock) +#define NMG_LOCK() NM_MTX_LOCK(netmap_global_lock) +#define NMG_UNLOCK() NM_MTX_UNLOCK(netmap_global_lock) +#define NMG_LOCK_ASSERT() NM_MTX_ASSERT(netmap_global_lock) + #define ND(format, ...) #define D(format, ...) \ do { \ @@ -185,6 +206,23 @@ const char *nm_dump_buf(char *p, int len, int lim, char *dst); extern NMG_LOCK_T netmap_global_lock; +enum txrx { NR_RX = 0, NR_TX = 1, NR_TXRX }; + +static __inline const char* +nm_txrx2str(enum txrx t) +{ + return (t== NR_RX ? "RX" : "TX"); +} + +static __inline enum txrx +nm_txrx_swap(enum txrx t) +{ + return (t== NR_RX ? NR_TX : NR_RX); +} + +#define for_rx_tx(t) for ((t) = 0; (t) < NR_TXRX; (t)++) + + /* * private, kernel view of a ring. Keeps track of the status of * a ring across system calls. @@ -259,6 +297,7 @@ struct netmap_kring { uint32_t nr_kflags; /* private driver flags */ #define NKR_PENDINTR 0x1 // Pending interrupt. +#define NKR_EXCLUSIVE 0x2 /* exclusive binding */ uint32_t nkr_num_slots; /* @@ -308,7 +347,10 @@ struct netmap_kring { // u_int nr_ntc; /* Emulation of a next-to-clean RX ring pointer. */ struct mbq rx_queue; /* intercepted rx mbufs. */ + uint32_t users; /* existing bindings for this ring */ + uint32_t ring_id; /* debugging */ + enum txrx tx; /* kind of ring (tx or rx) */ char name[64]; /* diagnostic */ /* [tx]sync callback for this kring. @@ -323,6 +365,7 @@ struct netmap_kring { * any of the nm_krings_create callbacks. */ int (*nm_sync)(struct netmap_kring *kring, int flags); + int (*nm_notify)(struct netmap_kring *kring, int flags); #ifdef WITH_PIPES struct netmap_kring *pipe; /* if this is a pipe ring, @@ -333,17 +376,25 @@ struct netmap_kring { */ #endif /* WITH_PIPES */ +#ifdef WITH_VALE + int (*save_notify)(struct netmap_kring *kring, int flags); +#endif + #ifdef WITH_MONITOR - /* pointer to the adapter that is monitoring this kring (if any) - */ - struct netmap_monitor_adapter *monitor; + /* array of krings that are monitoring this kring */ + struct netmap_kring **monitors; + uint32_t max_monitors; /* current size of the monitors array */ + uint32_t n_monitors; /* next unused entry in the monitor array */ /* - * Monitors work by intercepting the txsync and/or rxsync of the - * monitored krings. This is implemented by replacing - * the nm_sync pointer above and saving the previous - * one in save_sync below. + * Monitors work by intercepting the sync and notify callbacks of the + * monitored krings. This is implemented by replacing the pointers + * above and saving the previous ones in mon_* pointers below */ - int (*save_sync)(struct netmap_kring *kring, int flags); + int (*mon_sync)(struct netmap_kring *kring, int flags); + int (*mon_notify)(struct netmap_kring *kring, int flags); + + uint32_t mon_tail; /* last seen slot on rx */ + uint32_t mon_pos; /* index of this ring in the monitored ring array */ #endif } __attribute__((__aligned__(64))); @@ -414,8 +465,11 @@ tail->| |<-hwtail | |<-hwlease */ - -enum txrx { NR_RX = 0, NR_TX = 1 }; +struct netmap_lut { + struct lut_entry *lut; + uint32_t objtotal; /* max buffer index */ + uint32_t objsize; /* buffer size */ +}; struct netmap_vp_adapter; // forward @@ -445,11 +499,10 @@ struct netmap_adapter { * forwarding packets coming from this * interface */ -#define NAF_MEM_OWNER 8 /* the adapter is responsible for the - * deallocation of the memory allocator +#define NAF_MEM_OWNER 8 /* the adapter uses its own memory area + * that cannot be changed */ -#define NAF_NATIVE_ON 16 /* the adapter is native and the attached - * interface is in netmap mode. +#define NAF_NATIVE 16 /* the adapter is native. * Virtual ports (vale, pipe, monitor...) * should never use this flag. */ @@ -469,7 +522,7 @@ struct netmap_adapter { u_int num_rx_rings; /* number of adapter receive rings */ u_int num_tx_rings; /* number of adapter transmit rings */ - u_int num_tx_desc; /* number of descriptor in each queue */ + u_int num_tx_desc; /* number of descriptor in each queue */ u_int num_rx_desc; /* tx_rings and rx_rings are private but allocated @@ -483,10 +536,10 @@ struct netmap_adapter { /* (used for leases) */ - NM_SELINFO_T tx_si, rx_si; /* global wait queues */ + NM_SELINFO_T si[NR_TXRX]; /* global wait queues */ /* count users of the global wait queues */ - int tx_si_users, rx_si_users; + int si_users[NR_TXRX]; void *pdev; /* used to store pci device */ @@ -544,6 +597,7 @@ struct netmap_adapter { int (*nm_txsync)(struct netmap_kring *kring, int flags); int (*nm_rxsync)(struct netmap_kring *kring, int flags); + int (*nm_notify)(struct netmap_kring *kring, int flags); #define NAF_FORCE_READ 1 #define NAF_FORCE_RECLAIM 2 /* return configuration information */ @@ -551,12 +605,6 @@ struct netmap_adapter { u_int *txr, u_int *txd, u_int *rxr, u_int *rxd); int (*nm_krings_create)(struct netmap_adapter *); void (*nm_krings_delete)(struct netmap_adapter *); - int (*nm_notify)(struct netmap_adapter *, - u_int ring, enum txrx, int flags); -#define NAF_DISABLE_NOTIFY 8 /* notify that the stopped state of the - * ring has changed (kring->nkr_stopped) - */ - #ifdef WITH_VALE /* * nm_bdg_attach() initializes the na_vp field to point @@ -593,9 +641,7 @@ struct netmap_adapter { * buffer addresses, and the total number of buffers. */ struct netmap_mem_d *nm_mem; - struct lut_entry *na_lut; - uint32_t na_lut_objtotal; /* max buffer index */ - uint32_t na_lut_objsize; /* buffer size */ + struct netmap_lut na_lut; /* additional information attached to this adapter * by other netmap subsystems. Currently used by @@ -603,16 +649,49 @@ struct netmap_adapter { */ void *na_private; -#ifdef WITH_PIPES /* array of pipes that have this adapter as a parent */ struct netmap_pipe_adapter **na_pipes; int na_next_pipe; /* next free slot in the array */ int na_max_pipes; /* size of the array */ -#endif /* WITH_PIPES */ char name[64]; }; +static __inline u_int +nma_get_ndesc(struct netmap_adapter *na, enum txrx t) +{ + return (t == NR_TX ? na->num_tx_desc : na->num_rx_desc); +} + +static __inline void +nma_set_ndesc(struct netmap_adapter *na, enum txrx t, u_int v) +{ + if (t == NR_TX) + na->num_tx_desc = v; + else + na->num_rx_desc = v; +} + +static __inline u_int +nma_get_nrings(struct netmap_adapter *na, enum txrx t) +{ + return (t == NR_TX ? na->num_tx_rings : na->num_rx_rings); +} + +static __inline void +nma_set_nrings(struct netmap_adapter *na, enum txrx t, u_int v) +{ + if (t == NR_TX) + na->num_tx_rings = v; + else + na->num_rx_rings = v; +} + +static __inline struct netmap_kring* +NMR(struct netmap_adapter *na, enum txrx t) +{ + return (t == NR_TX ? na->tx_rings : na->rx_rings); +} /* * If the NIC is owned by the kernel @@ -624,7 +703,6 @@ struct netmap_adapter { #define NETMAP_OWNED_BY_ANY(na) \ (NETMAP_OWNED_BY_KERN(na) || ((na)->active_fds > 0)) - /* * derived netmap adapters for various types of ports */ @@ -645,6 +723,8 @@ struct netmap_vp_adapter { /* VALE software port */ u_int virt_hdr_len; /* Maximum Frame Size, used in bdg_mismatch_datapath() */ u_int mfs; + /* Last source MAC on this port */ + uint64_t last_smac; }; @@ -689,15 +769,9 @@ struct netmap_generic_adapter { /* emulated device */ #endif /* WITH_GENERIC */ static __inline int -netmap_real_tx_rings(struct netmap_adapter *na) -{ - return na->num_tx_rings + !!(na->na_flags & NAF_HOST_RINGS); -} - -static __inline int -netmap_real_rx_rings(struct netmap_adapter *na) +netmap_real_rings(struct netmap_adapter *na, enum txrx t) { - return na->num_rx_rings + !!(na->na_flags & NAF_HOST_RINGS); + return nma_get_nrings(na, t) + !!(na->na_flags & NAF_HOST_RINGS); } #ifdef WITH_VALE @@ -751,9 +825,6 @@ struct netmap_bwrap_adapter { struct netmap_vp_adapter host; /* for host rings */ struct netmap_adapter *hwna; /* the underlying device */ - /* backup of the hwna notify callback */ - int (*save_notify)(struct netmap_adapter *, - u_int ring, enum txrx, int flags); /* backup of the hwna memory allocator */ struct netmap_mem_d *save_nmd; @@ -847,6 +918,14 @@ static __inline int nm_kr_tryget(struct netmap_kring *kr) return 0; } +static __inline void nm_kr_get(struct netmap_kring *kr) +{ + while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy)) + tsleep(kr, 0, "NM_KR_GET", 4); +} + + + /* * The following functions are used by individual drivers to @@ -896,15 +975,15 @@ const char *netmap_bdg_name(struct netmap_vp_adapter *); #endif /* WITH_VALE */ static inline int -nm_native_on(struct netmap_adapter *na) +nm_netmap_on(struct netmap_adapter *na) { - return na && na->na_flags & NAF_NATIVE_ON; + return na && na->na_flags & NAF_NETMAP_ON; } static inline int -nm_netmap_on(struct netmap_adapter *na) +nm_native_on(struct netmap_adapter *na) { - return na && na->na_flags & NAF_NETMAP_ON; + return nm_netmap_on(na) && (na->na_flags & NAF_NATIVE); } /* set/clear native flags and if_transmit/netdev_ops */ @@ -913,7 +992,7 @@ nm_set_native_flags(struct netmap_adapter *na) { struct ifnet *ifp = na->ifp; - na->na_flags |= (NAF_NATIVE_ON | NAF_NETMAP_ON); + na->na_flags |= NAF_NETMAP_ON; #ifdef IFCAP_NETMAP /* or FreeBSD ? */ ifp->if_capenable |= IFCAP_NETMAP; #endif @@ -940,63 +1019,13 @@ nm_clear_native_flags(struct netmap_adapter *na) ifp->netdev_ops = (void *)na->if_transmit; ifp->ethtool_ops = ((struct netmap_hw_adapter*)na)->save_ethtool; #endif - na->na_flags &= ~(NAF_NATIVE_ON | NAF_NETMAP_ON); + na->na_flags &= ~NAF_NETMAP_ON; #ifdef IFCAP_NETMAP /* or FreeBSD ? */ ifp->if_capenable &= ~IFCAP_NETMAP; #endif } -/* - * validates parameters in the ring/kring, returns a value for head - * If any error, returns ring_size to force a reinit. - */ -uint32_t nm_txsync_prologue(struct netmap_kring *); - - -/* - * validates parameters in the ring/kring, returns a value for head, - * and the 'reserved' value in the argument. - * If any error, returns ring_size lim to force a reinit. - */ -uint32_t nm_rxsync_prologue(struct netmap_kring *); - - -/* - * update kring and ring at the end of txsync. - */ -static inline void -nm_txsync_finalize(struct netmap_kring *kring) -{ - /* update ring tail to what the kernel knows */ - kring->ring->tail = kring->rtail = kring->nr_hwtail; - - /* note, head/rhead/hwcur might be behind cur/rcur - * if no carrier - */ - ND(5, "%s now hwcur %d hwtail %d head %d cur %d tail %d", - kring->name, kring->nr_hwcur, kring->nr_hwtail, - kring->rhead, kring->rcur, kring->rtail); -} - - -/* - * update kring and ring at the end of rxsync - */ -static inline void -nm_rxsync_finalize(struct netmap_kring *kring) -{ - /* tell userspace that there might be new packets */ - //struct netmap_ring *ring = kring->ring; - ND("head %d cur %d tail %d -> %d", ring->head, ring->cur, ring->tail, - kring->nr_hwtail); - kring->ring->tail = kring->rtail = kring->nr_hwtail; - /* make a copy of the state for next round */ - kring->rhead = kring->ring->head; - kring->rcur = kring->ring->cur; -} - - /* check/fix address and len in tx rings */ #if 1 /* debug version */ #define NM_CHECK_ADDR_LEN(_na, _a, _l) do { \ @@ -1050,14 +1079,15 @@ int netmap_krings_create(struct netmap_adapter *na, u_int tailroom); * been created using netmap_krings_create */ void netmap_krings_delete(struct netmap_adapter *na); +int netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait); + /* set the stopped/enabled status of ring * When stopping, they also wait for all current activity on the ring to * terminate. The status change is then notified using the na nm_notify * callback. */ -void netmap_set_txring(struct netmap_adapter *, u_int ring_id, int stopped); -void netmap_set_rxring(struct netmap_adapter *, u_int ring_id, int stopped); +void netmap_set_ring(struct netmap_adapter *, u_int ring_id, enum txrx, int stopped); /* set the stopped/enabled status of all rings of the adapter. */ void netmap_set_all_rings(struct netmap_adapter *, int stopped); /* convenience wrappers for netmap_set_all_rings, used in drivers */ @@ -1066,9 +1096,9 @@ void netmap_enable_all_rings(struct ifnet *); int netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait); -struct netmap_if * +int netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, - uint16_t ringid, uint32_t flags, int *err); + uint16_t ringid, uint32_t flags); @@ -1088,7 +1118,7 @@ int netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na); * XXX in practice "unknown" might be handled same as broadcast. */ typedef u_int (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr, - const struct netmap_vp_adapter *); + struct netmap_vp_adapter *); typedef int (*bdg_config_fn_t)(struct nm_ifreq *); typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *); struct netmap_bdg_ops { @@ -1098,7 +1128,7 @@ struct netmap_bdg_ops { }; u_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, - const struct netmap_vp_adapter *); + struct netmap_vp_adapter *); #define NM_BDG_MAXPORTS 254 /* up to 254 */ #define NM_BDG_BROADCAST NM_BDG_MAXPORTS @@ -1108,34 +1138,52 @@ u_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, /* these are redefined in case of no VALE support */ int netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create); -void netmap_init_bridges(void); +struct nm_bridge *netmap_init_bridges2(u_int); +void netmap_uninit_bridges2(struct nm_bridge *, u_int); +int netmap_init_bridges(void); +void netmap_uninit_bridges(void); int netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops); int netmap_bdg_config(struct nmreq *nmr); #else /* !WITH_VALE */ #define netmap_get_bdg_na(_1, _2, _3) 0 -#define netmap_init_bridges(_1) +#define netmap_init_bridges(_1) 0 +#define netmap_uninit_bridges() #define netmap_bdg_ctl(_1, _2) EINVAL #endif /* !WITH_VALE */ #ifdef WITH_PIPES /* max number of pipes per device */ #define NM_MAXPIPES 64 /* XXX how many? */ -/* in case of no error, returns the actual number of pipes in nmr->nr_arg1 */ -int netmap_pipe_alloc(struct netmap_adapter *, struct nmreq *nmr); void netmap_pipe_dealloc(struct netmap_adapter *); int netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create); #else /* !WITH_PIPES */ #define NM_MAXPIPES 0 -#define netmap_pipe_alloc(_1, _2) EOPNOTSUPP +#define netmap_pipe_alloc(_1, _2) 0 #define netmap_pipe_dealloc(_1) -#define netmap_get_pipe_na(_1, _2, _3) 0 +#define netmap_get_pipe_na(nmr, _2, _3) \ + ({ int role__ = (nmr)->nr_flags & NR_REG_MASK; \ + (role__ == NR_REG_PIPE_MASTER || \ + role__ == NR_REG_PIPE_SLAVE) ? EOPNOTSUPP : 0; }) #endif #ifdef WITH_MONITOR int netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create); +void netmap_monitor_stop(struct netmap_adapter *na); #else -#define netmap_get_monitor_na(_1, _2, _3) 0 +#define netmap_get_monitor_na(nmr, _2, _3) \ + ((nmr)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0) +#endif + +#ifdef CONFIG_NET_NS +struct net *netmap_bns_get(void); +void netmap_bns_put(struct net *); +void netmap_bns_getbridges(struct nm_bridge **, u_int *); +#else +#define netmap_bns_get() +#define netmap_bns_put(_1) +#define netmap_bns_getbridges(b, n) \ + do { *b = nm_bridges; *n = NM_BRIDGES; } while (0) #endif /* Various prototypes */ @@ -1186,8 +1234,8 @@ int netmap_adapter_put(struct netmap_adapter *na); /* * module variables */ -#define NETMAP_BUF_BASE(na) ((na)->na_lut[0].vaddr) -#define NETMAP_BUF_SIZE(na) ((na)->na_lut_objsize) +#define NETMAP_BUF_BASE(na) ((na)->na_lut.lut[0].vaddr) +#define NETMAP_BUF_SIZE(na) ((na)->na_lut.objsize) extern int netmap_mitigate; // XXX not really used extern int netmap_no_pendintr; extern int netmap_verbose; // XXX debugging @@ -1291,15 +1339,14 @@ netmap_reload_map(struct netmap_adapter *na, #else /* linux */ int nm_iommu_group_id(bus_dma_tag_t dev); -extern size_t netmap_mem_get_bufsize(struct netmap_mem_d *); #include <linux/dma-mapping.h> static inline void netmap_load_map(struct netmap_adapter *na, bus_dma_tag_t tag, bus_dmamap_t map, void *buf) { - if (map) { - *map = dma_map_single(na->pdev, buf, netmap_mem_get_bufsize(na->nm_mem), + if (0 && map) { + *map = dma_map_single(na->pdev, buf, na->na_lut.objsize, DMA_BIDIRECTIONAL); } } @@ -1308,7 +1355,7 @@ static inline void netmap_unload_map(struct netmap_adapter *na, bus_dma_tag_t tag, bus_dmamap_t map) { - u_int sz = netmap_mem_get_bufsize(na->nm_mem); + u_int sz = na->na_lut.objsize; if (*map) { dma_unmap_single(na->pdev, *map, sz, @@ -1320,7 +1367,7 @@ static inline void netmap_reload_map(struct netmap_adapter *na, bus_dma_tag_t tag, bus_dmamap_t map, void *buf) { - u_int sz = netmap_mem_get_bufsize(na->nm_mem); + u_int sz = na->na_lut.objsize; if (*map) { dma_unmap_single(na->pdev, *map, sz, @@ -1418,9 +1465,9 @@ struct netmap_obj_pool; static inline void * NMB(struct netmap_adapter *na, struct netmap_slot *slot) { - struct lut_entry *lut = na->na_lut; + struct lut_entry *lut = na->na_lut.lut; uint32_t i = slot->buf_idx; - return (unlikely(i >= na->na_lut_objtotal)) ? + return (unlikely(i >= na->na_lut.objtotal)) ? lut[0].vaddr : lut[i].vaddr; } @@ -1428,10 +1475,10 @@ static inline void * PNMB(struct netmap_adapter *na, struct netmap_slot *slot, uint64_t *pp) { uint32_t i = slot->buf_idx; - struct lut_entry *lut = na->na_lut; - void *ret = (i >= na->na_lut_objtotal) ? lut[0].vaddr : lut[i].vaddr; + struct lut_entry *lut = na->na_lut.lut; + void *ret = (i >= na->na_lut.objtotal) ? lut[0].vaddr : lut[i].vaddr; - *pp = (i >= na->na_lut_objtotal) ? lut[0].paddr : lut[i].paddr; + *pp = (i >= na->na_lut.objtotal) ? lut[0].paddr : lut[i].paddr; return ret; } @@ -1459,7 +1506,7 @@ void netmap_txsync_to_host(struct netmap_adapter *na); * If np_nifp is NULL initialization has not been performed, * so they should return an error to userspace. * - * The ref_done field is used to regulate access to the refcount in the + * The ref_done field (XXX ?) is used to regulate access to the refcount in the * memory allocator. The refcount must be incremented at most once for * each open("/dev/netmap"). The increment is performed by the first * function that calls netmap_get_memory() (currently called by @@ -1472,11 +1519,10 @@ struct netmap_priv_d { struct netmap_adapter *np_na; uint32_t np_flags; /* from the ioctl */ - u_int np_txqfirst, np_txqlast; /* range of tx rings to scan */ - u_int np_rxqfirst, np_rxqlast; /* range of rx rings to scan */ + u_int np_qfirst[NR_TXRX], + np_qlast[NR_TXRX]; /* range of tx/rx rings to scan */ uint16_t np_txpoll; /* XXX and also np_rxpoll ? */ - struct netmap_mem_d *np_mref; /* use with NMG_LOCK held */ /* np_refcount is only used on FreeBSD */ int np_refcount; /* use with NMG_LOCK held */ @@ -1484,7 +1530,7 @@ struct netmap_priv_d { * Either the local or the global one depending on the * number of rings. */ - NM_SELINFO_T *np_rxsi, *np_txsi; + NM_SELINFO_T *np_si[NR_TXRX]; struct thread *np_td; /* kqueue, just debugging */ }; @@ -1507,12 +1553,20 @@ struct netmap_monitor_adapter { */ int generic_netmap_attach(struct ifnet *ifp); -int netmap_catch_rx(struct netmap_adapter *na, int intercept); +int netmap_catch_rx(struct netmap_generic_adapter *na, int intercept); void generic_rx_handler(struct ifnet *ifp, struct mbuf *m);; void netmap_catch_tx(struct netmap_generic_adapter *na, int enable); int generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, void *addr, u_int len, u_int ring_nr); int generic_find_num_desc(struct ifnet *ifp, u_int *tx, u_int *rx); void generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq); +static inline struct ifnet* +netmap_generic_getifp(struct netmap_generic_adapter *gna) +{ + if (gna->prev) + return gna->prev->ifp; + + return gna->up.up.ifp; +} //#define RATE_GENERIC /* Enables communication statistics for generic. */ #ifdef RATE_GENERIC diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c index c823376dbab6..023604d49535 100644 --- a/sys/dev/netmap/netmap_mem2.c +++ b/sys/dev/netmap/netmap_mem2.c @@ -71,6 +71,7 @@ struct netmap_obj_params { u_int size; u_int num; }; + struct netmap_obj_pool { char name[NETMAP_POOL_MAX_NAMSZ]; /* name of the allocator */ @@ -106,16 +107,26 @@ struct netmap_obj_pool { u_int r_objsize; }; -#ifdef linux -// XXX a mtx would suffice here 20130415 lr -#define NMA_LOCK_T struct semaphore -#else /* !linux */ -#define NMA_LOCK_T struct mtx -#endif /* linux */ +#define NMA_LOCK_T NM_MTX_T + + +struct netmap_mem_ops { + void (*nmd_get_lut)(struct netmap_mem_d *, struct netmap_lut*); + int (*nmd_get_info)(struct netmap_mem_d *, u_int *size, + u_int *memflags, uint16_t *id); -typedef int (*netmap_mem_config_t)(struct netmap_mem_d*); -typedef int (*netmap_mem_finalize_t)(struct netmap_mem_d*); -typedef void (*netmap_mem_deref_t)(struct netmap_mem_d*); + vm_paddr_t (*nmd_ofstophys)(struct netmap_mem_d *, vm_ooffset_t); + int (*nmd_config)(struct netmap_mem_d *); + int (*nmd_finalize)(struct netmap_mem_d *); + void (*nmd_deref)(struct netmap_mem_d *); + ssize_t (*nmd_if_offset)(struct netmap_mem_d *, const void *vaddr); + void (*nmd_delete)(struct netmap_mem_d *); + + struct netmap_if * (*nmd_if_new)(struct netmap_adapter *); + void (*nmd_if_delete)(struct netmap_adapter *, struct netmap_if *); + int (*nmd_rings_create)(struct netmap_adapter *); + void (*nmd_rings_delete)(struct netmap_adapter *); +}; typedef uint16_t nm_memid_t; @@ -126,52 +137,144 @@ struct netmap_mem_d { u_int flags; #define NETMAP_MEM_FINALIZED 0x1 /* preallocation done */ int lasterr; /* last error for curr config */ - int refcount; /* existing priv structures */ + int active; /* active users */ + int refcount; /* the three allocators */ struct netmap_obj_pool pools[NETMAP_POOLS_NR]; - netmap_mem_config_t config; /* called with NMA_LOCK held */ - netmap_mem_finalize_t finalize; /* called with NMA_LOCK held */ - netmap_mem_deref_t deref; /* called with NMA_LOCK held */ - nm_memid_t nm_id; /* allocator identifier */ int nm_grp; /* iommu groupd id */ /* list of all existing allocators, sorted by nm_id */ struct netmap_mem_d *prev, *next; + + struct netmap_mem_ops *ops; }; -/* accessor functions */ -struct lut_entry* -netmap_mem_get_lut(struct netmap_mem_d *nmd) +#define NMD_DEFCB(t0, name) \ +t0 \ +netmap_mem_##name(struct netmap_mem_d *nmd) \ +{ \ + return nmd->ops->nmd_##name(nmd); \ +} + +#define NMD_DEFCB1(t0, name, t1) \ +t0 \ +netmap_mem_##name(struct netmap_mem_d *nmd, t1 a1) \ +{ \ + return nmd->ops->nmd_##name(nmd, a1); \ +} + +#define NMD_DEFCB3(t0, name, t1, t2, t3) \ +t0 \ +netmap_mem_##name(struct netmap_mem_d *nmd, t1 a1, t2 a2, t3 a3) \ +{ \ + return nmd->ops->nmd_##name(nmd, a1, a2, a3); \ +} + +#define NMD_DEFNACB(t0, name) \ +t0 \ +netmap_mem_##name(struct netmap_adapter *na) \ +{ \ + return na->nm_mem->ops->nmd_##name(na); \ +} + +#define NMD_DEFNACB1(t0, name, t1) \ +t0 \ +netmap_mem_##name(struct netmap_adapter *na, t1 a1) \ +{ \ + return na->nm_mem->ops->nmd_##name(na, a1); \ +} + +NMD_DEFCB1(void, get_lut, struct netmap_lut *); +NMD_DEFCB3(int, get_info, u_int *, u_int *, uint16_t *); +NMD_DEFCB1(vm_paddr_t, ofstophys, vm_ooffset_t); +static int netmap_mem_config(struct netmap_mem_d *); +NMD_DEFCB(int, config); +NMD_DEFCB1(ssize_t, if_offset, const void *); +NMD_DEFCB(void, delete); + +NMD_DEFNACB(struct netmap_if *, if_new); +NMD_DEFNACB1(void, if_delete, struct netmap_if *); +NMD_DEFNACB(int, rings_create); +NMD_DEFNACB(void, rings_delete); + +static int netmap_mem_map(struct netmap_obj_pool *, struct netmap_adapter *); +static int netmap_mem_unmap(struct netmap_obj_pool *, struct netmap_adapter *); +static int nm_mem_assign_group(struct netmap_mem_d *, struct device *); + +#define NMA_LOCK_INIT(n) NM_MTX_INIT((n)->nm_mtx) +#define NMA_LOCK_DESTROY(n) NM_MTX_DESTROY((n)->nm_mtx) +#define NMA_LOCK(n) NM_MTX_LOCK((n)->nm_mtx) +#define NMA_UNLOCK(n) NM_MTX_UNLOCK((n)->nm_mtx) + +#ifdef NM_DEBUG_MEM_PUTGET +#define NM_DBG_REFC(nmd, func, line) \ + printf("%s:%d mem[%d] -> %d\n", func, line, (nmd)->nm_id, (nmd)->refcount); +#else +#define NM_DBG_REFC(nmd, func, line) +#endif + +#ifdef NM_DEBUG_MEM_PUTGET +void __netmap_mem_get(struct netmap_mem_d *nmd, const char *func, int line) +#else +void netmap_mem_get(struct netmap_mem_d *nmd) +#endif +{ + NMA_LOCK(nmd); + nmd->refcount++; + NM_DBG_REFC(nmd, func, line); + NMA_UNLOCK(nmd); +} + +#ifdef NM_DEBUG_MEM_PUTGET +void __netmap_mem_put(struct netmap_mem_d *nmd, const char *func, int line) +#else +void netmap_mem_put(struct netmap_mem_d *nmd) +#endif { - return nmd->pools[NETMAP_BUF_POOL].lut; + int last; + NMA_LOCK(nmd); + last = (--nmd->refcount == 0); + NM_DBG_REFC(nmd, func, line); + NMA_UNLOCK(nmd); + if (last) + netmap_mem_delete(nmd); } -u_int -netmap_mem_get_buftotal(struct netmap_mem_d *nmd) +int +netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na) { - return nmd->pools[NETMAP_BUF_POOL].objtotal; + if (nm_mem_assign_group(nmd, na->pdev) < 0) { + return ENOMEM; + } else { + nmd->ops->nmd_finalize(nmd); + } + + if (!nmd->lasterr && na->pdev) + netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na); + + return nmd->lasterr; } -size_t -netmap_mem_get_bufsize(struct netmap_mem_d *nmd) +void +netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na) { - return nmd->pools[NETMAP_BUF_POOL]._objsize; + NMA_LOCK(nmd); + netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na); + NMA_UNLOCK(nmd); + return nmd->ops->nmd_deref(nmd); } -#ifdef linux -#define NMA_LOCK_INIT(n) sema_init(&(n)->nm_mtx, 1) -#define NMA_LOCK_DESTROY(n) -#define NMA_LOCK(n) down(&(n)->nm_mtx) -#define NMA_UNLOCK(n) up(&(n)->nm_mtx) -#else /* !linux */ -#define NMA_LOCK_INIT(n) mtx_init(&(n)->nm_mtx, "netmap memory allocator lock", NULL, MTX_DEF) -#define NMA_LOCK_DESTROY(n) mtx_destroy(&(n)->nm_mtx) -#define NMA_LOCK(n) mtx_lock(&(n)->nm_mtx) -#define NMA_UNLOCK(n) mtx_unlock(&(n)->nm_mtx) -#endif /* linux */ +/* accessor functions */ +static void +netmap_mem2_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut) +{ + lut->lut = nmd->pools[NETMAP_BUF_POOL].lut; + lut->objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal; + lut->objsize = nmd->pools[NETMAP_BUF_POOL]._objsize; +} struct netmap_obj_params netmap_params[NETMAP_POOLS_NR] = { [NETMAP_IF_POOL] = { @@ -209,9 +312,7 @@ struct netmap_obj_params netmap_min_priv_params[NETMAP_POOLS_NR] = { * running in netmap mode. * Virtual (VALE) ports will have each its own allocator. */ -static int netmap_mem_global_config(struct netmap_mem_d *nmd); -static int netmap_mem_global_finalize(struct netmap_mem_d *nmd); -static void netmap_mem_global_deref(struct netmap_mem_d *nmd); +extern struct netmap_mem_ops netmap_mem_global_ops; /* forward */ struct netmap_mem_d nm_mem = { /* Our memory allocator. */ .pools = { [NETMAP_IF_POOL] = { @@ -236,24 +337,21 @@ struct netmap_mem_d nm_mem = { /* Our memory allocator. */ .nummax = 1000000, /* one million! */ }, }, - .config = netmap_mem_global_config, - .finalize = netmap_mem_global_finalize, - .deref = netmap_mem_global_deref, .nm_id = 1, .nm_grp = -1, .prev = &nm_mem, .next = &nm_mem, + + .ops = &netmap_mem_global_ops }; struct netmap_mem_d *netmap_last_mem_d = &nm_mem; /* blueprint for the private memory allocators */ -static int netmap_mem_private_config(struct netmap_mem_d *nmd); -static int netmap_mem_private_finalize(struct netmap_mem_d *nmd); -static void netmap_mem_private_deref(struct netmap_mem_d *nmd); +extern struct netmap_mem_ops netmap_mem_private_ops; /* forward */ const struct netmap_mem_d nm_blueprint = { .pools = { [NETMAP_IF_POOL] = { @@ -278,11 +376,10 @@ const struct netmap_mem_d nm_blueprint = { .nummax = 1000000, /* one million! */ }, }, - .config = netmap_mem_private_config, - .finalize = netmap_mem_private_finalize, - .deref = netmap_mem_private_deref, .flags = NETMAP_MEM_PRIVATE, + + .ops = &netmap_mem_private_ops }; /* memory allocator related sysctls */ @@ -382,8 +479,8 @@ nm_mem_assign_group(struct netmap_mem_d *nmd, struct device *dev) * First, find the allocator that contains the requested offset, * then locate the cluster through a lookup table. */ -vm_paddr_t -netmap_mem_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset) +static vm_paddr_t +netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset) { int i; vm_ooffset_t o = offset; @@ -414,13 +511,13 @@ netmap_mem_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset) return 0; // XXX bad address } -int -netmap_mem_get_info(struct netmap_mem_d* nmd, u_int* size, u_int *memflags, +static int +netmap_mem2_get_info(struct netmap_mem_d* nmd, u_int* size, u_int *memflags, nm_memid_t *id) { int error = 0; NMA_LOCK(nmd); - error = nmd->config(nmd); + error = netmap_mem_config(nmd); if (error) goto out; if (size) { @@ -487,8 +584,8 @@ netmap_obj_offset(struct netmap_obj_pool *p, const void *vaddr) netmap_obj_offset(&(n)->pools[NETMAP_BUF_POOL], (v))) -ssize_t -netmap_mem_if_offset(struct netmap_mem_d *nmd, const void *addr) +static ssize_t +netmap_mem2_if_offset(struct netmap_mem_d *nmd, const void *addr) { ssize_t v; NMA_LOCK(nmd); @@ -648,7 +745,7 @@ netmap_extra_alloc(struct netmap_adapter *na, uint32_t *head, uint32_t n) static void netmap_extra_free(struct netmap_adapter *na, uint32_t head) { - struct lut_entry *lut = na->na_lut; + struct lut_entry *lut = na->na_lut.lut; struct netmap_mem_d *nmd = na->nm_mem; struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL]; uint32_t i, cur, *buf; @@ -1081,15 +1178,15 @@ error: -void +static void netmap_mem_private_delete(struct netmap_mem_d *nmd) { if (nmd == NULL) return; if (netmap_verbose) D("deleting %p", nmd); - if (nmd->refcount > 0) - D("bug: deleting mem allocator with refcount=%d!", nmd->refcount); + if (nmd->active > 0) + D("bug: deleting mem allocator with active=%d!", nmd->active); nm_mem_release_id(nmd); if (netmap_verbose) D("done deleting %p", nmd); @@ -1110,8 +1207,10 @@ static int netmap_mem_private_finalize(struct netmap_mem_d *nmd) { int err; - nmd->refcount++; + NMA_LOCK(nmd); + nmd->active++; err = netmap_mem_finalize_all(nmd); + NMA_UNLOCK(nmd); return err; } @@ -1119,8 +1218,10 @@ netmap_mem_private_finalize(struct netmap_mem_d *nmd) static void netmap_mem_private_deref(struct netmap_mem_d *nmd) { - if (--nmd->refcount <= 0) + NMA_LOCK(nmd); + if (--nmd->active <= 0) netmap_mem_reset_all(nmd); + NMA_UNLOCK(nmd); } @@ -1223,14 +1324,14 @@ netmap_mem_global_config(struct netmap_mem_d *nmd) { int i; - if (nmd->refcount) + if (nmd->active) /* already in use, we cannot change the configuration */ goto out; if (!netmap_memory_config_changed(nmd)) goto out; - D("reconfiguring"); + ND("reconfiguring"); if (nmd->flags & NETMAP_MEM_FINALIZED) { /* reset previous allocation */ @@ -1261,7 +1362,7 @@ netmap_mem_global_finalize(struct netmap_mem_d *nmd) if (netmap_mem_global_config(nmd)) goto out; - nmd->refcount++; + nmd->active++; if (nmd->flags & NETMAP_MEM_FINALIZED) { /* may happen if config is not changed */ @@ -1276,53 +1377,56 @@ netmap_mem_global_finalize(struct netmap_mem_d *nmd) out: if (nmd->lasterr) - nmd->refcount--; + nmd->active--; err = nmd->lasterr; return err; } +static void +netmap_mem_global_delete(struct netmap_mem_d *nmd) +{ + int i; + + for (i = 0; i < NETMAP_POOLS_NR; i++) { + netmap_destroy_obj_allocator(&nm_mem.pools[i]); + } + + NMA_LOCK_DESTROY(&nm_mem); +} + int netmap_mem_init(void) { NMA_LOCK_INIT(&nm_mem); + netmap_mem_get(&nm_mem); return (0); } void netmap_mem_fini(void) { - int i; - - for (i = 0; i < NETMAP_POOLS_NR; i++) { - netmap_destroy_obj_allocator(&nm_mem.pools[i]); - } - NMA_LOCK_DESTROY(&nm_mem); + netmap_mem_put(&nm_mem); } static void netmap_free_rings(struct netmap_adapter *na) { - struct netmap_kring *kring; - struct netmap_ring *ring; - if (!na->tx_rings) - return; - for (kring = na->tx_rings; kring != na->rx_rings; kring++) { - ring = kring->ring; - if (ring == NULL) - continue; - netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots); - netmap_ring_free(na->nm_mem, ring); - kring->ring = NULL; - } - for (/* cont'd from above */; kring != na->tailroom; kring++) { - ring = kring->ring; - if (ring == NULL) - continue; - netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots); - netmap_ring_free(na->nm_mem, ring); - kring->ring = NULL; + enum txrx t; + + for_rx_tx(t) { + u_int i; + for (i = 0; i < netmap_real_rings(na, t); i++) { + struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_ring *ring = kring->ring; + + if (ring == NULL) + continue; + netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots); + netmap_ring_free(na->nm_mem, ring); + kring->ring = NULL; + } } } @@ -1333,99 +1437,63 @@ netmap_free_rings(struct netmap_adapter *na) * The kring array must follow the layout described * in netmap_krings_create(). */ -int -netmap_mem_rings_create(struct netmap_adapter *na) +static int +netmap_mem2_rings_create(struct netmap_adapter *na) { - struct netmap_ring *ring; - u_int len, ndesc; - struct netmap_kring *kring; - u_int i; + enum txrx t; NMA_LOCK(na->nm_mem); - /* transmit rings */ - for (i =0, kring = na->tx_rings; kring != na->rx_rings; kring++, i++) { - if (kring->ring) { - ND("%s %ld already created", kring->name, kring - na->tx_rings); - continue; /* already created by somebody else */ - } - ndesc = kring->nkr_num_slots; - len = sizeof(struct netmap_ring) + - ndesc * sizeof(struct netmap_slot); - ring = netmap_ring_malloc(na->nm_mem, len); - if (ring == NULL) { - D("Cannot allocate tx_ring"); - goto cleanup; - } - ND("txring at %p", ring); - kring->ring = ring; - *(uint32_t *)(uintptr_t)&ring->num_slots = ndesc; - *(int64_t *)(uintptr_t)&ring->buf_ofs = - (na->nm_mem->pools[NETMAP_IF_POOL].memtotal + - na->nm_mem->pools[NETMAP_RING_POOL].memtotal) - - netmap_ring_offset(na->nm_mem, ring); - - /* copy values from kring */ - ring->head = kring->rhead; - ring->cur = kring->rcur; - ring->tail = kring->rtail; - *(uint16_t *)(uintptr_t)&ring->nr_buf_size = - netmap_mem_bufsize(na->nm_mem); - ND("%s h %d c %d t %d", kring->name, - ring->head, ring->cur, ring->tail); - ND("initializing slots for txring"); - if (i != na->num_tx_rings || (na->na_flags & NAF_HOST_RINGS)) { - /* this is a real ring */ - if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) { - D("Cannot allocate buffers for tx_ring"); - goto cleanup; - } - } else { - /* this is a fake tx ring, set all indices to 0 */ - netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0); - } - } + for_rx_tx(t) { + u_int i; - /* receive rings */ - for ( i = 0 /* kring cont'd from above */ ; kring != na->tailroom; kring++, i++) { - if (kring->ring) { - ND("%s %ld already created", kring->name, kring - na->rx_rings); - continue; /* already created by somebody else */ - } - ndesc = kring->nkr_num_slots; - len = sizeof(struct netmap_ring) + - ndesc * sizeof(struct netmap_slot); - ring = netmap_ring_malloc(na->nm_mem, len); - if (ring == NULL) { - D("Cannot allocate rx_ring"); - goto cleanup; - } - ND("rxring at %p", ring); - kring->ring = ring; - *(uint32_t *)(uintptr_t)&ring->num_slots = ndesc; - *(int64_t *)(uintptr_t)&ring->buf_ofs = - (na->nm_mem->pools[NETMAP_IF_POOL].memtotal + - na->nm_mem->pools[NETMAP_RING_POOL].memtotal) - - netmap_ring_offset(na->nm_mem, ring); - - /* copy values from kring */ - ring->head = kring->rhead; - ring->cur = kring->rcur; - ring->tail = kring->rtail; - *(int *)(uintptr_t)&ring->nr_buf_size = - netmap_mem_bufsize(na->nm_mem); - ND("%s h %d c %d t %d", kring->name, - ring->head, ring->cur, ring->tail); - ND("initializing slots for rxring %p", ring); - if (i != na->num_rx_rings || (na->na_flags & NAF_HOST_RINGS)) { - /* this is a real ring */ - if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) { - D("Cannot allocate buffers for rx_ring"); + for (i = 0; i <= nma_get_nrings(na, t); i++) { + struct netmap_kring *kring = &NMR(na, t)[i]; + struct netmap_ring *ring = kring->ring; + u_int len, ndesc; + + if (ring) { + ND("%s already created", kring->name); + continue; /* already created by somebody else */ + } + ndesc = kring->nkr_num_slots; + len = sizeof(struct netmap_ring) + + ndesc * sizeof(struct netmap_slot); + ring = netmap_ring_malloc(na->nm_mem, len); + if (ring == NULL) { + D("Cannot allocate %s_ring", nm_txrx2str(t)); goto cleanup; } - } else { - /* this is a fake rx ring, set all indices to 1 */ - netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 1); + ND("txring at %p", ring); + kring->ring = ring; + *(uint32_t *)(uintptr_t)&ring->num_slots = ndesc; + *(int64_t *)(uintptr_t)&ring->buf_ofs = + (na->nm_mem->pools[NETMAP_IF_POOL].memtotal + + na->nm_mem->pools[NETMAP_RING_POOL].memtotal) - + netmap_ring_offset(na->nm_mem, ring); + + /* copy values from kring */ + ring->head = kring->rhead; + ring->cur = kring->rcur; + ring->tail = kring->rtail; + *(uint16_t *)(uintptr_t)&ring->nr_buf_size = + netmap_mem_bufsize(na->nm_mem); + ND("%s h %d c %d t %d", kring->name, + ring->head, ring->cur, ring->tail); + ND("initializing slots for %s_ring", nm_txrx2str(txrx)); + if (i != nma_get_nrings(na, t) || (na->na_flags & NAF_HOST_RINGS)) { + /* this is a real ring */ + if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) { + D("Cannot allocate buffers for %s_ring", nm_txrx2str(t)); + goto cleanup; + } + } else { + /* this is a fake ring, set all indices to 0 */ + netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0); + } + /* ring info */ + *(uint16_t *)(uintptr_t)&ring->ringid = kring->ring_id; + *(uint16_t *)(uintptr_t)&ring->dir = kring->tx; } } @@ -1441,8 +1509,8 @@ cleanup: return ENOMEM; } -void -netmap_mem_rings_delete(struct netmap_adapter *na) +static void +netmap_mem2_rings_delete(struct netmap_adapter *na) { /* last instance, release bufs and rings */ NMA_LOCK(na->nm_mem); @@ -1461,16 +1529,20 @@ netmap_mem_rings_delete(struct netmap_adapter *na) * (number of tx/rx rings and descs) does not change while * the interface is in netmap mode. */ -struct netmap_if * -netmap_mem_if_new(struct netmap_adapter *na) +static struct netmap_if * +netmap_mem2_if_new(struct netmap_adapter *na) { struct netmap_if *nifp; ssize_t base; /* handy for relative offsets between rings and nifp */ - u_int i, len, ntx, nrx; - - /* account for the (eventually fake) host rings */ - ntx = na->num_tx_rings + 1; - nrx = na->num_rx_rings + 1; + u_int i, len, n[NR_TXRX], ntot; + enum txrx t; + + ntot = 0; + for_rx_tx(t) { + /* account for the (eventually fake) host rings */ + n[t] = nma_get_nrings(na, t) + 1; + ntot += n[t]; + } /* * the descriptor is followed inline by an array of offsets * to the tx and rx rings in the shared memory region. @@ -1478,7 +1550,7 @@ netmap_mem_if_new(struct netmap_adapter *na) NMA_LOCK(na->nm_mem); - len = sizeof(struct netmap_if) + (nrx + ntx) * sizeof(ssize_t); + len = sizeof(struct netmap_if) + (ntot * sizeof(ssize_t)); nifp = netmap_if_malloc(na->nm_mem, len); if (nifp == NULL) { NMA_UNLOCK(na->nm_mem); @@ -1496,12 +1568,12 @@ netmap_mem_if_new(struct netmap_adapter *na) * userspace to reach the ring from the nifp. */ base = netmap_if_offset(na->nm_mem, nifp); - for (i = 0; i < ntx; i++) { + for (i = 0; i < n[NR_TX]; i++) { *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] = netmap_ring_offset(na->nm_mem, na->tx_rings[i].ring) - base; } - for (i = 0; i < nrx; i++) { - *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+ntx] = + for (i = 0; i < n[NR_RX]; i++) { + *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] = netmap_ring_offset(na->nm_mem, na->rx_rings[i].ring) - base; } @@ -1510,8 +1582,8 @@ netmap_mem_if_new(struct netmap_adapter *na) return (nifp); } -void -netmap_mem_if_delete(struct netmap_adapter *na, struct netmap_if *nifp) +static void +netmap_mem2_if_delete(struct netmap_adapter *na, struct netmap_if *nifp) { if (nifp == NULL) /* nothing to do */ @@ -1528,78 +1600,39 @@ static void netmap_mem_global_deref(struct netmap_mem_d *nmd) { - nmd->refcount--; - if (!nmd->refcount) + nmd->active--; + if (!nmd->active) nmd->nm_grp = -1; if (netmap_verbose) - D("refcount = %d", nmd->refcount); + D("active = %d", nmd->active); } -int -netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na) -{ - if (nm_mem_assign_group(nmd, na->pdev) < 0) { - return ENOMEM; - } else { - NMA_LOCK(nmd); - nmd->finalize(nmd); - NMA_UNLOCK(nmd); - } - - if (!nmd->lasterr && na->pdev) - netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na); - - return nmd->lasterr; -} - -void -netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na) -{ - NMA_LOCK(nmd); - netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na); - if (nmd->refcount == 1) { - u_int i; - - /* - * Reset the allocator when it falls out of use so that any - * pool resources leaked by unclean application exits are - * reclaimed. - */ - for (i = 0; i < NETMAP_POOLS_NR; i++) { - struct netmap_obj_pool *p; - u_int j; - - p = &nmd->pools[i]; - p->objfree = p->objtotal; - /* - * Reproduce the net effect of the M_ZERO malloc() - * and marking of free entries in the bitmap that - * occur in finalize_obj_allocator() - */ - memset(p->bitmap, - '\0', - sizeof(uint32_t) * ((p->objtotal + 31) / 32)); - - /* - * Set all the bits in the bitmap that have - * corresponding buffers to 1 to indicate they are - * free. - */ - for (j = 0; j < p->objtotal; j++) { - if (p->lut[j].vaddr != NULL) { - p->bitmap[ (j>>5) ] |= ( 1 << (j & 31) ); - } - } - } - - /* - * Per netmap_mem_finalize_all(), - * buffers 0 and 1 are reserved - */ - nmd->pools[NETMAP_BUF_POOL].objfree -= 2; - nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3; - } - nmd->deref(nmd); - NMA_UNLOCK(nmd); -} +struct netmap_mem_ops netmap_mem_global_ops = { + .nmd_get_lut = netmap_mem2_get_lut, + .nmd_get_info = netmap_mem2_get_info, + .nmd_ofstophys = netmap_mem2_ofstophys, + .nmd_config = netmap_mem_global_config, + .nmd_finalize = netmap_mem_global_finalize, + .nmd_deref = netmap_mem_global_deref, + .nmd_delete = netmap_mem_global_delete, + .nmd_if_offset = netmap_mem2_if_offset, + .nmd_if_new = netmap_mem2_if_new, + .nmd_if_delete = netmap_mem2_if_delete, + .nmd_rings_create = netmap_mem2_rings_create, + .nmd_rings_delete = netmap_mem2_rings_delete +}; +struct netmap_mem_ops netmap_mem_private_ops = { + .nmd_get_lut = netmap_mem2_get_lut, + .nmd_get_info = netmap_mem2_get_info, + .nmd_ofstophys = netmap_mem2_ofstophys, + .nmd_config = netmap_mem_private_config, + .nmd_finalize = netmap_mem_private_finalize, + .nmd_deref = netmap_mem_private_deref, + .nmd_if_offset = netmap_mem2_if_offset, + .nmd_delete = netmap_mem_private_delete, + .nmd_if_new = netmap_mem2_if_new, + .nmd_if_delete = netmap_mem2_if_delete, + .nmd_rings_create = netmap_mem2_rings_create, + .nmd_rings_delete = netmap_mem2_rings_delete +}; diff --git a/sys/dev/netmap/netmap_mem2.h b/sys/dev/netmap/netmap_mem2.h index 4c620bd4a780..ef0ff96d8e7f 100644 --- a/sys/dev/netmap/netmap_mem2.h +++ b/sys/dev/netmap/netmap_mem2.h @@ -117,9 +117,7 @@ extern struct netmap_mem_d nm_mem; -struct lut_entry* netmap_mem_get_lut(struct netmap_mem_d *); -u_int netmap_mem_get_buftotal(struct netmap_mem_d *); -size_t netmap_mem_get_bufsize(struct netmap_mem_d *); +void netmap_mem_get_lut(struct netmap_mem_d *, struct netmap_lut *); vm_paddr_t netmap_mem_ofstophys(struct netmap_mem_d *, vm_ooffset_t); int netmap_mem_finalize(struct netmap_mem_d *, struct netmap_adapter *); int netmap_mem_init(void); @@ -134,12 +132,34 @@ ssize_t netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr); struct netmap_mem_d* netmap_mem_private_new(const char *name, u_int txr, u_int txd, u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes, int* error); -void netmap_mem_private_delete(struct netmap_mem_d *); +void netmap_mem_delete(struct netmap_mem_d *); + +//#define NM_DEBUG_MEM_PUTGET 1 + +#ifdef NM_DEBUG_MEM_PUTGET + +#define netmap_mem_get(nmd) \ + do { \ + __netmap_mem_get(nmd, __FUNCTION__, __LINE__); \ + } while (0) + +#define netmap_mem_put(nmd) \ + do { \ + __netmap_mem_put(nmd, __FUNCTION__, __LINE__); \ + } while (0) + +void __netmap_mem_get(struct netmap_mem_d *, const char *, int); +void __netmap_mem_put(struct netmap_mem_d *, const char *, int); +#else /* !NM_DEBUG_MEM_PUTGET */ + +void netmap_mem_get(struct netmap_mem_d *); +void netmap_mem_put(struct netmap_mem_d *); + +#endif /* !NM_DEBUG_PUTGET */ #define NETMAP_MEM_PRIVATE 0x2 /* allocator uses private address space */ #define NETMAP_MEM_IO 0x4 /* the underlying memory is mmapped I/O */ uint32_t netmap_extra_alloc(struct netmap_adapter *, uint32_t *, uint32_t n); - #endif diff --git a/sys/dev/netmap/netmap_monitor.c b/sys/dev/netmap/netmap_monitor.c index 746abb524d06..be7ce440015c 100644 --- a/sys/dev/netmap/netmap_monitor.c +++ b/sys/dev/netmap/netmap_monitor.c @@ -28,7 +28,7 @@ * * Monitors * - * netmap monitors can be used to do zero-copy monitoring of network traffic + * netmap monitors can be used to do monitoring of network traffic * on another adapter, when the latter adapter is working in netmap mode. * * Monitors offer to userspace the same interface as any other netmap port, @@ -38,8 +38,24 @@ * monitored adapter. During registration, the user can choose if she wants * to intercept tx only, rx only, or both tx and rx traffic. * - * The monitor only sees the frames after they have been consumed in the - * monitored adapter: + * If the monitor is not able to cope with the stream of frames, excess traffic + * will be dropped. + * + * If the monitored adapter leaves netmap mode, the monitor has to be restarted. + * + * Monitors can be either zero-copy or copy-based. + * + * Copy monitors see the frames before they are consumed: + * + * - For tx traffic, this is when the application sends them, before they are + * passed down to the adapter. + * + * - For rx traffic, this is when they are received by the adapter, before + * they are sent up to the application, if any (note that, if no + * application is reading from a monitored ring, the ring will eventually + * fill up and traffic will stop). + * + * Zero-copy monitors only see the frames after they have been consumed: * * - For tx traffic, this is after the slots containing the frames have been * marked as free. Note that this may happen at a considerably delay after @@ -49,11 +65,9 @@ * has released them. In most cases, the consumer is a userspace * application which may have modified the frame contents. * - * If the monitor is not able to cope with the stream of frames, excess traffic - * will be dropped. - * - * Each ring can be monitored by at most one monitor. This may change in the - * future, if we implement monitor chaining. + * Several copy monitors may be active on any ring. Zero-copy monitors, + * instead, need exclusive access to each of the monitored rings. This may + * change in the future, if we implement zero-copy monitor chaining. * */ @@ -105,34 +119,319 @@ #define NM_MONITOR_MAXSLOTS 4096 -/* monitor works by replacing the nm_sync callbacks in the monitored rings. - * The actions to be performed are the same on both tx and rx rings, so we - * have collected them here +/* + ******************************************************************** + * functions common to both kind of monitors + ******************************************************************** + */ + +/* nm_sync callback for the monitor's own tx rings. + * This makes no sense and always returns error + */ +static int +netmap_monitor_txsync(struct netmap_kring *kring, int flags) +{ + RD(1, "%s %x", kring->name, flags); + return EIO; +} + +/* nm_sync callback for the monitor's own rx rings. + * Note that the lock in netmap_zmon_parent_sync only protects + * writers among themselves. Synchronization between writers + * (i.e., netmap_zmon_parent_txsync and netmap_zmon_parent_rxsync) + * and readers (i.e., netmap_zmon_rxsync) relies on memory barriers. + */ +static int +netmap_monitor_rxsync(struct netmap_kring *kring, int flags) +{ + ND("%s %x", kring->name, flags); + kring->nr_hwcur = kring->rcur; + mb(); + return 0; +} + +/* nm_krings_create callbacks for monitors. + * We could use the default netmap_hw_krings_zmon, but + * we don't need the mbq. + */ +static int +netmap_monitor_krings_create(struct netmap_adapter *na) +{ + return netmap_krings_create(na, 0); +} + +/* nm_krings_delete callback for monitors */ +static void +netmap_monitor_krings_delete(struct netmap_adapter *na) +{ + netmap_krings_delete(na); +} + + +static u_int +nm_txrx2flag(enum txrx t) +{ + return (t == NR_RX ? NR_MONITOR_RX : NR_MONITOR_TX); +} + +/* allocate the monitors array in the monitored kring */ +static int +nm_monitor_alloc(struct netmap_kring *kring, u_int n) +{ + size_t len; + struct netmap_kring **nm; + + if (n <= kring->max_monitors) + /* we already have more entries that requested */ + return 0; + + len = sizeof(struct netmap_kring *) * n; + nm = realloc(kring->monitors, len, M_DEVBUF, M_NOWAIT | M_ZERO); + if (nm == NULL) + return ENOMEM; + + kring->monitors = nm; + kring->max_monitors = n; + + return 0; +} + +/* deallocate the parent array in the parent adapter */ +static void +nm_monitor_dealloc(struct netmap_kring *kring) +{ + if (kring->monitors) { + if (kring->n_monitors > 0) { + D("freeing not empty monitor array for %s (%d dangling monitors)!", kring->name, + kring->n_monitors); + } + free(kring->monitors, M_DEVBUF); + kring->monitors = NULL; + kring->max_monitors = 0; + kring->n_monitors = 0; + } +} + +/* + * monitors work by replacing the nm_sync() and possibly the + * nm_notify() callbacks in the monitored rings. + */ +static int netmap_zmon_parent_txsync(struct netmap_kring *, int); +static int netmap_zmon_parent_rxsync(struct netmap_kring *, int); +static int netmap_monitor_parent_txsync(struct netmap_kring *, int); +static int netmap_monitor_parent_rxsync(struct netmap_kring *, int); +static int netmap_monitor_parent_notify(struct netmap_kring *, int); + + +/* add the monitor mkring to the list of monitors of kring. + * If this is the first monitor, intercept the callbacks + */ +static int +netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int zcopy) +{ + int error = 0; + + /* sinchronize with concurrently running nm_sync()s */ + nm_kr_get(kring); + /* make sure the monitor array exists and is big enough */ + error = nm_monitor_alloc(kring, kring->n_monitors + 1); + if (error) + goto out; + kring->monitors[kring->n_monitors] = mkring; + mkring->mon_pos = kring->n_monitors; + kring->n_monitors++; + if (kring->n_monitors == 1) { + /* this is the first monitor, intercept callbacks */ + D("%s: intercept callbacks on %s", mkring->name, kring->name); + kring->mon_sync = kring->nm_sync; + /* zcopy monitors do not override nm_notify(), but + * we save the original one regardless, so that + * netmap_monitor_del() does not need to know the + * monitor type + */ + kring->mon_notify = kring->nm_notify; + if (kring->tx == NR_TX) { + kring->nm_sync = (zcopy ? netmap_zmon_parent_txsync : + netmap_monitor_parent_txsync); + } else { + kring->nm_sync = (zcopy ? netmap_zmon_parent_rxsync : + netmap_monitor_parent_rxsync); + if (!zcopy) { + /* also intercept notify */ + kring->nm_notify = netmap_monitor_parent_notify; + kring->mon_tail = kring->nr_hwtail; + } + } + } + +out: + nm_kr_put(kring); + return error; +} + + +/* remove the monitor mkring from the list of monitors of kring. + * If this is the last monitor, restore the original callbacks + */ +static void +netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring) +{ + /* sinchronize with concurrently running nm_sync()s */ + nm_kr_get(kring); + kring->n_monitors--; + if (mkring->mon_pos != kring->n_monitors) { + kring->monitors[mkring->mon_pos] = kring->monitors[kring->n_monitors]; + kring->monitors[mkring->mon_pos]->mon_pos = mkring->mon_pos; + } + kring->monitors[kring->n_monitors] = NULL; + if (kring->n_monitors == 0) { + /* this was the last monitor, restore callbacks and delete monitor array */ + D("%s: restoring sync on %s: %p", mkring->name, kring->name, kring->mon_sync); + kring->nm_sync = kring->mon_sync; + kring->mon_sync = NULL; + if (kring->tx == NR_RX) { + D("%s: restoring notify on %s: %p", + mkring->name, kring->name, kring->mon_notify); + kring->nm_notify = kring->mon_notify; + kring->mon_notify = NULL; + } + nm_monitor_dealloc(kring); + } + nm_kr_put(kring); +} + + +/* This is called when the monitored adapter leaves netmap mode + * (see netmap_do_unregif). + * We need to notify the monitors that the monitored rings are gone. + * We do this by setting their mna->priv.np_na to NULL. + * Note that the rings are already stopped when this happens, so + * no monitor ring callback can be active. + */ +void +netmap_monitor_stop(struct netmap_adapter *na) +{ + enum txrx t; + + for_rx_tx(t) { + u_int i; + + for (i = 0; i < nma_get_nrings(na, t); i++) { + struct netmap_kring *kring = &NMR(na, t)[i]; + u_int j; + + for (j = 0; j < kring->n_monitors; j++) { + struct netmap_kring *mkring = + kring->monitors[j]; + struct netmap_monitor_adapter *mna = + (struct netmap_monitor_adapter *)mkring->na; + /* forget about this adapter */ + mna->priv.np_na = NULL; + } + } + } +} + + +/* common functions for the nm_register() callbacks of both kind of + * monitors. */ static int -netmap_monitor_parent_sync(struct netmap_kring *kring, int flags, u_int* ringptr) +netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon) { - struct netmap_monitor_adapter *mna = kring->monitor; - struct netmap_kring *mkring = &mna->up.rx_rings[kring->ring_id]; - struct netmap_ring *ring = kring->ring, *mring = mkring->ring; - int error; - int rel_slots, free_slots, busy; + struct netmap_monitor_adapter *mna = + (struct netmap_monitor_adapter *)na; + struct netmap_priv_d *priv = &mna->priv; + struct netmap_adapter *pna = priv->np_na; + struct netmap_kring *kring, *mkring; + int i; + enum txrx t; + + ND("%p: onoff %d", na, onoff); + if (onoff) { + if (pna == NULL) { + /* parent left netmap mode, fatal */ + D("%s: internal error", na->name); + return ENXIO; + } + for_rx_tx(t) { + if (mna->flags & nm_txrx2flag(t)) { + for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) { + kring = &NMR(pna, t)[i]; + mkring = &na->rx_rings[i]; + netmap_monitor_add(mkring, kring, zmon); + } + } + } + na->na_flags |= NAF_NETMAP_ON; + } else { + if (pna == NULL) { + D("%s: parent left netmap mode, nothing to restore", na->name); + return 0; + } + na->na_flags &= ~NAF_NETMAP_ON; + for_rx_tx(t) { + if (mna->flags & nm_txrx2flag(t)) { + for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) { + kring = &NMR(pna, t)[i]; + mkring = &na->rx_rings[i]; + netmap_monitor_del(mkring, kring); + } + } + } + } + return 0; +} + +/* + **************************************************************** + * functions specific for zero-copy monitors + **************************************************************** + */ + +/* + * Common function for both zero-copy tx and rx nm_sync() + * callbacks + */ +static int +netmap_zmon_parent_sync(struct netmap_kring *kring, int flags, enum txrx tx) +{ + struct netmap_kring *mkring = kring->monitors[0]; + struct netmap_ring *ring = kring->ring, *mring; + int error = 0; + int rel_slots, free_slots, busy, sent = 0; u_int beg, end, i; u_int lim = kring->nkr_num_slots - 1, - mlim = mkring->nkr_num_slots - 1; + mlim; // = mkring->nkr_num_slots - 1; + + if (mkring == NULL) { + RD(5, "NULL monitor on %s", kring->name); + return 0; + } + mring = mkring->ring; + mlim = mkring->nkr_num_slots - 1; /* get the relased slots (rel_slots) */ - beg = *ringptr; - error = kring->save_sync(kring, flags); - if (error) - return error; - end = *ringptr; + if (tx == NR_TX) { + beg = kring->nr_hwtail; + error = kring->mon_sync(kring, flags); + if (error) + return error; + end = kring->nr_hwtail; + } else { /* NR_RX */ + beg = kring->nr_hwcur; + end = kring->rhead; + } + rel_slots = end - beg; if (rel_slots < 0) rel_slots += kring->nkr_num_slots; if (!rel_slots) { - return 0; + /* no released slots, but we still need + * to call rxsync if this is a rx ring + */ + goto out_rxsync; } /* we need to lock the monitor receive ring, since it @@ -147,19 +446,18 @@ netmap_monitor_parent_sync(struct netmap_kring *kring, int flags, u_int* ringptr busy += mkring->nkr_num_slots; free_slots = mlim - busy; - if (!free_slots) { - mtx_unlock(&mkring->q_lock); - return 0; - } + if (!free_slots) + goto out; /* swap min(free_slots, rel_slots) slots */ if (free_slots < rel_slots) { beg += (rel_slots - free_slots); - if (beg > lim) - beg = 0; + if (beg >= kring->nkr_num_slots) + beg -= kring->nkr_num_slots; rel_slots = free_slots; } + sent = rel_slots; for ( ; rel_slots; rel_slots--) { struct netmap_slot *s = &ring->slot[beg]; struct netmap_slot *ms = &mring->slot[i]; @@ -168,6 +466,7 @@ netmap_monitor_parent_sync(struct netmap_kring *kring, int flags, u_int* ringptr tmp = ms->buf_idx; ms->buf_idx = s->buf_idx; s->buf_idx = tmp; + ND(5, "beg %d buf_idx %d", beg, tmp); tmp = ms->len; ms->len = s->len; @@ -182,143 +481,196 @@ netmap_monitor_parent_sync(struct netmap_kring *kring, int flags, u_int* ringptr mb(); mkring->nr_hwtail = i; +out: mtx_unlock(&mkring->q_lock); - /* notify the new frames to the monitor */ - mna->up.nm_notify(&mna->up, mkring->ring_id, NR_RX, 0); - return 0; + + if (sent) { + /* notify the new frames to the monitor */ + mkring->nm_notify(mkring, 0); + } + +out_rxsync: + if (tx == NR_RX) + error = kring->mon_sync(kring, flags); + + return error; } /* callback used to replace the nm_sync callback in the monitored tx rings */ static int -netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags) +netmap_zmon_parent_txsync(struct netmap_kring *kring, int flags) { ND("%s %x", kring->name, flags); - return netmap_monitor_parent_sync(kring, flags, &kring->nr_hwtail); + return netmap_zmon_parent_sync(kring, flags, NR_TX); } /* callback used to replace the nm_sync callback in the monitored rx rings */ static int -netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags) +netmap_zmon_parent_rxsync(struct netmap_kring *kring, int flags) { ND("%s %x", kring->name, flags); - return netmap_monitor_parent_sync(kring, flags, &kring->rcur); + return netmap_zmon_parent_sync(kring, flags, NR_RX); } -/* nm_sync callback for the monitor's own tx rings. - * This makes no sense and always returns error - */ + static int -netmap_monitor_txsync(struct netmap_kring *kring, int flags) +netmap_zmon_reg(struct netmap_adapter *na, int onoff) { - D("%s %x", kring->name, flags); - return EIO; + return netmap_monitor_reg_common(na, onoff, 1 /* zcopy */); } -/* nm_sync callback for the monitor's own rx rings. - * Note that the lock in netmap_monitor_parent_sync only protects - * writers among themselves. Synchronization between writers - * (i.e., netmap_monitor_parent_txsync and netmap_monitor_parent_rxsync) - * and readers (i.e., netmap_monitor_rxsync) relies on memory barriers. - */ -static int -netmap_monitor_rxsync(struct netmap_kring *kring, int flags) +/* nm_dtor callback for monitors */ +static void +netmap_zmon_dtor(struct netmap_adapter *na) { - ND("%s %x", kring->name, flags); - kring->nr_hwcur = kring->rcur; - mb(); - nm_rxsync_finalize(kring); - return 0; + struct netmap_monitor_adapter *mna = + (struct netmap_monitor_adapter *)na; + struct netmap_priv_d *priv = &mna->priv; + struct netmap_adapter *pna = priv->np_na; + + netmap_adapter_put(pna); } -/* nm_krings_create callbacks for monitors. - * We could use the default netmap_hw_krings_monitor, but - * we don't need the mbq. +/* + **************************************************************** + * functions specific for copy monitors + **************************************************************** */ -static int -netmap_monitor_krings_create(struct netmap_adapter *na) + +static void +netmap_monitor_parent_sync(struct netmap_kring *kring, u_int first_new, int new_slots) { - return netmap_krings_create(na, 0); -} + u_int j; + for (j = 0; j < kring->n_monitors; j++) { + struct netmap_kring *mkring = kring->monitors[j]; + u_int i, mlim, beg; + int free_slots, busy, sent = 0, m; + u_int lim = kring->nkr_num_slots - 1; + struct netmap_ring *ring = kring->ring, *mring = mkring->ring; + u_int max_len = NETMAP_BUF_SIZE(mkring->na); -/* nm_register callback for monitors. - * - * On registration, replace the nm_sync callbacks in the monitored - * rings with our own, saving the previous ones in the monitored - * rings themselves, where they are used by netmap_monitor_parent_sync. - * - * On de-registration, restore the original callbacks. We need to - * stop traffic while we are doing this, since the monitored adapter may - * have already started executing a netmap_monitor_parent_sync - * and may not like the kring->save_sync pointer to become NULL. - */ -static int -netmap_monitor_reg(struct netmap_adapter *na, int onoff) -{ - struct netmap_monitor_adapter *mna = - (struct netmap_monitor_adapter *)na; - struct netmap_priv_d *priv = &mna->priv; - struct netmap_adapter *pna = priv->np_na; - struct netmap_kring *kring; - int i; + mlim = mkring->nkr_num_slots - 1; - ND("%p: onoff %d", na, onoff); - if (onoff) { - if (!nm_netmap_on(pna)) { - /* parent left netmap mode, fatal */ - return ENXIO; - } - if (mna->flags & NR_MONITOR_TX) { - for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) { - kring = &pna->tx_rings[i]; - kring->save_sync = kring->nm_sync; - kring->nm_sync = netmap_monitor_parent_txsync; - } - } - if (mna->flags & NR_MONITOR_RX) { - for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) { - kring = &pna->rx_rings[i]; - kring->save_sync = kring->nm_sync; - kring->nm_sync = netmap_monitor_parent_rxsync; - } - } - na->na_flags |= NAF_NETMAP_ON; - } else { - if (!nm_netmap_on(pna)) { - /* parent left netmap mode, nothing to restore */ - return 0; + /* we need to lock the monitor receive ring, since it + * is the target of bot tx and rx traffic from the monitored + * adapter + */ + mtx_lock(&mkring->q_lock); + /* get the free slots available on the monitor ring */ + i = mkring->nr_hwtail; + busy = i - mkring->nr_hwcur; + if (busy < 0) + busy += mkring->nkr_num_slots; + free_slots = mlim - busy; + + if (!free_slots) + goto out; + + /* copy min(free_slots, new_slots) slots */ + m = new_slots; + beg = first_new; + if (free_slots < m) { + beg += (m - free_slots); + if (beg >= kring->nkr_num_slots) + beg -= kring->nkr_num_slots; + m = free_slots; } - na->na_flags &= ~NAF_NETMAP_ON; - if (mna->flags & NR_MONITOR_TX) { - for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) { - netmap_set_txring(pna, i, 1 /* stopped */); - kring = &pna->tx_rings[i]; - kring->nm_sync = kring->save_sync; - kring->save_sync = NULL; - netmap_set_txring(pna, i, 0 /* enabled */); + + for ( ; m; m--) { + struct netmap_slot *s = &ring->slot[beg]; + struct netmap_slot *ms = &mring->slot[i]; + u_int copy_len = s->len; + char *src = NMB(kring->na, s), + *dst = NMB(mkring->na, ms); + + if (unlikely(copy_len > max_len)) { + RD(5, "%s->%s: truncating %d to %d", kring->name, + mkring->name, copy_len, max_len); + copy_len = max_len; } + + memcpy(dst, src, copy_len); + ms->len = copy_len; + sent++; + + beg = nm_next(beg, lim); + i = nm_next(i, mlim); } - if (mna->flags & NR_MONITOR_RX) { - for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) { - netmap_set_rxring(pna, i, 1 /* stopped */); - kring = &pna->rx_rings[i]; - kring->nm_sync = kring->save_sync; - kring->save_sync = NULL; - netmap_set_rxring(pna, i, 0 /* enabled */); - } + mb(); + mkring->nr_hwtail = i; + out: + mtx_unlock(&mkring->q_lock); + + if (sent) { + /* notify the new frames to the monitor */ + mkring->nm_notify(mkring, 0); } } +} + +/* callback used to replace the nm_sync callback in the monitored tx rings */ +static int +netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags) +{ + u_int first_new; + int new_slots; + + /* get the new slots */ + first_new = kring->nr_hwcur; + new_slots = kring->rhead - first_new; + if (new_slots < 0) + new_slots += kring->nkr_num_slots; + if (new_slots) + netmap_monitor_parent_sync(kring, first_new, new_slots); + return kring->mon_sync(kring, flags); +} + +/* callback used to replace the nm_sync callback in the monitored rx rings */ +static int +netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags) +{ + u_int first_new; + int new_slots, error; + + /* get the new slots */ + error = kring->mon_sync(kring, flags); + if (error) + return error; + first_new = kring->mon_tail; + new_slots = kring->nr_hwtail - first_new; + if (new_slots < 0) + new_slots += kring->nkr_num_slots; + if (new_slots) + netmap_monitor_parent_sync(kring, first_new, new_slots); + kring->mon_tail = kring->nr_hwtail; return 0; } -/* nm_krings_delete callback for monitors */ -static void -netmap_monitor_krings_delete(struct netmap_adapter *na) + +/* callback used to replace the nm_notify() callback in the monitored rx rings */ +static int +netmap_monitor_parent_notify(struct netmap_kring *kring, int flags) { - netmap_krings_delete(na); + ND(5, "%s %x", kring->name, flags); + /* ?xsync callbacks have tryget called by their callers + * (NIOCREGIF and poll()), but here we have to call it + * by ourself + */ + if (nm_kr_tryget(kring)) + goto out; + netmap_monitor_parent_rxsync(kring, NAF_FORCE_READ); + nm_kr_put(kring); +out: + return kring->mon_notify(kring, flags); } -/* nm_dtor callback for monitors */ +static int +netmap_monitor_reg(struct netmap_adapter *na, int onoff) +{ + return netmap_monitor_reg_common(na, onoff, 0 /* no zcopy */); +} + static void netmap_monitor_dtor(struct netmap_adapter *na) { @@ -326,22 +678,7 @@ netmap_monitor_dtor(struct netmap_adapter *na) (struct netmap_monitor_adapter *)na; struct netmap_priv_d *priv = &mna->priv; struct netmap_adapter *pna = priv->np_na; - int i; - ND("%p", na); - if (nm_netmap_on(pna)) { - /* parent still in netmap mode, mark its krings as free */ - if (mna->flags & NR_MONITOR_TX) { - for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) { - pna->tx_rings[i].monitor = NULL; - } - } - if (mna->flags & NR_MONITOR_RX) { - for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) { - pna->rx_rings[i].monitor = NULL; - } - } - } netmap_adapter_put(pna); } @@ -354,6 +691,9 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create) struct netmap_adapter *pna; /* parent adapter */ struct netmap_monitor_adapter *mna; int i, error; + enum txrx t; + int zcopy = (nmr->nr_flags & NR_ZCOPY_MON); + char monsuff[10] = ""; if ((nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) { ND("not a monitor"); @@ -400,44 +740,65 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create) D("ringid error"); goto put_out; } - if (nmr->nr_flags & NR_MONITOR_TX) { - for (i = mna->priv.np_txqfirst; i < mna->priv.np_txqlast; i++) { - struct netmap_kring *kring = &pna->tx_rings[i]; - if (kring->monitor) { - error = EBUSY; - D("ring busy"); - goto release_out; + if (mna->priv.np_qlast[NR_TX] - mna->priv.np_qfirst[NR_TX] == 1) { + snprintf(monsuff, 10, "-%d", mna->priv.np_qfirst[NR_TX]); + } + snprintf(mna->up.name, sizeof(mna->up.name), "%s%s/%s%s%s", pna->name, + monsuff, + zcopy ? "z" : "", + (nmr->nr_flags & NR_MONITOR_RX) ? "r" : "", + (nmr->nr_flags & NR_MONITOR_TX) ? "t" : ""); + + if (zcopy) { + /* zero copy monitors need exclusive access to the monitored rings */ + for_rx_tx(t) { + if (! (nmr->nr_flags & nm_txrx2flag(t))) + continue; + for (i = mna->priv.np_qfirst[t]; i < mna->priv.np_qlast[t]; i++) { + struct netmap_kring *kring = &NMR(pna, t)[i]; + if (kring->n_monitors > 0) { + error = EBUSY; + D("ring %s already monitored by %s", kring->name, + kring->monitors[0]->name); + goto put_out; + } } - kring->monitor = mna; } - } - if (nmr->nr_flags & NR_MONITOR_RX) { - for (i = mna->priv.np_rxqfirst; i < mna->priv.np_rxqlast; i++) { - struct netmap_kring *kring = &pna->rx_rings[i]; - if (kring->monitor) { - error = EBUSY; - D("ring busy"); - goto release_out; + mna->up.nm_register = netmap_zmon_reg; + mna->up.nm_dtor = netmap_zmon_dtor; + /* to have zero copy, we need to use the same memory allocator + * as the monitored port + */ + mna->up.nm_mem = pna->nm_mem; + mna->up.na_lut = pna->na_lut; + } else { + /* normal monitors are incompatible with zero copy ones */ + for_rx_tx(t) { + if (! (nmr->nr_flags & nm_txrx2flag(t))) + continue; + for (i = mna->priv.np_qfirst[t]; i < mna->priv.np_qlast[t]; i++) { + struct netmap_kring *kring = &NMR(pna, t)[i]; + if (kring->n_monitors > 0 && + kring->monitors[0]->na->nm_register == netmap_zmon_reg) + { + error = EBUSY; + D("ring busy"); + goto put_out; + } } - kring->monitor = mna; } + mna->up.nm_rxsync = netmap_monitor_rxsync; + mna->up.nm_register = netmap_monitor_reg; + mna->up.nm_dtor = netmap_monitor_dtor; } - snprintf(mna->up.name, sizeof(mna->up.name), "mon:%s", pna->name); - /* the monitor supports the host rings iff the parent does */ mna->up.na_flags = (pna->na_flags & NAF_HOST_RINGS); + /* a do-nothing txsync: monitors cannot be used to inject packets */ mna->up.nm_txsync = netmap_monitor_txsync; mna->up.nm_rxsync = netmap_monitor_rxsync; - mna->up.nm_register = netmap_monitor_reg; - mna->up.nm_dtor = netmap_monitor_dtor; mna->up.nm_krings_create = netmap_monitor_krings_create; mna->up.nm_krings_delete = netmap_monitor_krings_delete; - mna->up.nm_mem = pna->nm_mem; - mna->up.na_lut = pna->na_lut; - mna->up.na_lut_objtotal = pna->na_lut_objtotal; - mna->up.na_lut_objsize = pna->na_lut_objsize; - mna->up.num_tx_rings = 1; // XXX we don't need it, but field can't be zero /* we set the number of our rx_rings to be max(num_rx_rings, num_rx_rings) * in the parent @@ -458,7 +819,7 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create) error = netmap_attach_common(&mna->up); if (error) { D("attach_common error"); - goto release_out; + goto put_out; } /* remember the traffic directions we have to monitor */ @@ -478,16 +839,6 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create) return 0; -release_out: - D("monitor error"); - for (i = mna->priv.np_txqfirst; i < mna->priv.np_txqlast; i++) { - if (pna->tx_rings[i].monitor == mna) - pna->tx_rings[i].monitor = NULL; - } - for (i = mna->priv.np_rxqfirst; i < mna->priv.np_rxqlast; i++) { - if (pna->rx_rings[i].monitor == mna) - pna->rx_rings[i].monitor = NULL; - } put_out: netmap_adapter_put(pna); free(mna, M_DEVBUF); diff --git a/sys/dev/netmap/netmap_pipe.c b/sys/dev/netmap/netmap_pipe.c index 64828670c35b..3fe29bb2ff9c 100644 --- a/sys/dev/netmap/netmap_pipe.c +++ b/sys/dev/netmap/netmap_pipe.c @@ -72,51 +72,31 @@ #define NM_PIPE_MAXSLOTS 4096 -int netmap_default_pipes = 0; /* default number of pipes for each nic */ +int netmap_default_pipes = 0; /* ignored, kept for compatibility */ SYSCTL_DECL(_dev_netmap); SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW, &netmap_default_pipes, 0 , ""); /* allocate the pipe array in the parent adapter */ -int -netmap_pipe_alloc(struct netmap_adapter *na, struct nmreq *nmr) +static int +nm_pipe_alloc(struct netmap_adapter *na, u_int npipes) { size_t len; - int mode = nmr->nr_flags & NR_REG_MASK; - u_int npipes; + struct netmap_pipe_adapter **npa; - if (mode == NR_REG_PIPE_MASTER || mode == NR_REG_PIPE_SLAVE) { - /* this is for our parent, not for us */ + if (npipes <= na->na_max_pipes) + /* we already have more entries that requested */ return 0; - } - - /* TODO: we can resize the array if the new - * request can accomodate the already existing pipes - */ - if (na->na_pipes) { - nmr->nr_arg1 = na->na_max_pipes; - return 0; - } - - npipes = nmr->nr_arg1; - if (npipes == 0) - npipes = netmap_default_pipes; - nm_bound_var(&npipes, 0, 0, NM_MAXPIPES, NULL); - - if (npipes == 0) { - /* really zero, nothing to alloc */ - goto out; - } + + if (npipes < na->na_next_pipe || npipes > NM_MAXPIPES) + return EINVAL; - len = sizeof(struct netmap_pipe_adapter *) * npipes; - na->na_pipes = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO); - if (na->na_pipes == NULL) + len = sizeof(struct netmap_pipe_adapter *) * npipes; + npa = realloc(na->na_pipes, len, M_DEVBUF, M_NOWAIT | M_ZERO); + if (npa == NULL) return ENOMEM; + na->na_pipes = npa; na->na_max_pipes = npipes; - na->na_next_pipe = 0; - -out: - nmr->nr_arg1 = npipes; return 0; } @@ -126,7 +106,10 @@ void netmap_pipe_dealloc(struct netmap_adapter *na) { if (na->na_pipes) { - ND("freeing pipes for %s", na->name); + if (na->na_next_pipe > 0) { + D("freeing not empty pipe array for %s (%d dangling pipes)!", na->name, + na->na_next_pipe); + } free(na->na_pipes, M_DEVBUF); na->na_pipes = NULL; na->na_max_pipes = 0; @@ -155,8 +138,10 @@ static int netmap_pipe_add(struct netmap_adapter *parent, struct netmap_pipe_adapter *na) { if (parent->na_next_pipe >= parent->na_max_pipes) { - D("%s: no space left for pipes", parent->name); - return ENOMEM; + u_int npipes = parent->na_max_pipes ? 2*parent->na_max_pipes : 2; + int error = nm_pipe_alloc(parent, npipes); + if (error) + return error; } parent->na_pipes[parent->na_next_pipe] = na; @@ -172,8 +157,10 @@ netmap_pipe_remove(struct netmap_adapter *parent, struct netmap_pipe_adapter *na u_int n; n = --parent->na_next_pipe; if (n != na->parent_slot) { - parent->na_pipes[na->parent_slot] = - parent->na_pipes[n]; + struct netmap_pipe_adapter **p = + &parent->na_pipes[na->parent_slot]; + *p = parent->na_pipes[n]; + (*p)->parent_slot = na->parent_slot; } parent->na_pipes[n] = NULL; } @@ -208,7 +195,6 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags) if (limit == 0) { /* either the rxring is full, or nothing to send */ - nm_txsync_finalize(txkring); /* actually useless */ return 0; } @@ -222,7 +208,9 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags) *rs = *ts; *ts = tmp; - /* no need to report the buffer change */ + /* report the buffer change */ + ts->flags |= NS_BUF_CHANGED; + rs->flags |= NS_BUF_CHANGED; j = nm_next(j, lim_rx); k = nm_next(k, lim_tx); @@ -233,12 +221,11 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags) txkring->nr_hwcur = k; txkring->nr_hwtail = nm_prev(k, lim_tx); - nm_txsync_finalize(txkring); ND(2, "after: hwcur %d hwtail %d cur %d head %d tail %d j %d", txkring->nr_hwcur, txkring->nr_hwtail, txkring->rcur, txkring->rhead, txkring->rtail, j); mb(); /* make sure rxkring->nr_hwtail is updated before notifying */ - rxkring->na->nm_notify(rxkring->na, rxkring->ring_id, NR_RX, 0); + rxkring->nm_notify(rxkring, 0); return 0; } @@ -254,12 +241,11 @@ netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags) ND(5, "hwcur %d hwtail %d cur %d head %d tail %d", rxkring->nr_hwcur, rxkring->nr_hwtail, rxkring->rcur, rxkring->rhead, rxkring->rtail); mb(); /* paired with the first mb() in txsync */ - nm_rxsync_finalize(rxkring); if (oldhwcur != rxkring->nr_hwcur) { /* we have released some slots, notify the other end */ mb(); /* make sure nr_hwcur is updated before notifying */ - txkring->na->nm_notify(txkring->na, txkring->ring_id, NR_TX, 0); + txkring->nm_notify(txkring, 0); } return 0; } @@ -318,11 +304,13 @@ netmap_pipe_krings_create(struct netmap_adapter *na) (struct netmap_pipe_adapter *)na; struct netmap_adapter *ona = &pna->peer->up; int error = 0; + enum txrx t; + if (pna->peer_ref) { int i; /* case 1) above */ - D("%p: case 1, create everything", na); + ND("%p: case 1, create everything", na); error = netmap_krings_create(na, 0); if (error) goto err; @@ -338,10 +326,10 @@ netmap_pipe_krings_create(struct netmap_adapter *na) goto del_krings1; /* update our hidden ring pointers */ - for (i = 0; i < na->num_tx_rings + 1; i++) - na->tx_rings[i].save_ring = na->tx_rings[i].ring; - for (i = 0; i < na->num_rx_rings + 1; i++) - na->rx_rings[i].save_ring = na->rx_rings[i].ring; + for_rx_tx(t) { + for (i = 0; i < nma_get_nrings(na, t) + 1; i++) + NMR(na, t)[i].save_ring = NMR(na, t)[i].ring; + } /* now, create krings and rings of the other end */ error = netmap_krings_create(ona, 0); @@ -352,27 +340,28 @@ netmap_pipe_krings_create(struct netmap_adapter *na) if (error) goto del_krings2; - for (i = 0; i < ona->num_tx_rings + 1; i++) - ona->tx_rings[i].save_ring = ona->tx_rings[i].ring; - for (i = 0; i < ona->num_rx_rings + 1; i++) - ona->rx_rings[i].save_ring = ona->rx_rings[i].ring; + for_rx_tx(t) { + for (i = 0; i < nma_get_nrings(ona, t) + 1; i++) + NMR(ona, t)[i].save_ring = NMR(ona, t)[i].ring; + } /* cross link the krings */ - for (i = 0; i < na->num_tx_rings; i++) { - na->tx_rings[i].pipe = pna->peer->up.rx_rings + i; - na->rx_rings[i].pipe = pna->peer->up.tx_rings + i; - pna->peer->up.tx_rings[i].pipe = na->rx_rings + i; - pna->peer->up.rx_rings[i].pipe = na->tx_rings + i; + for_rx_tx(t) { + enum txrx r= nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ + for (i = 0; i < nma_get_nrings(na, t); i++) { + NMR(na, t)[i].pipe = NMR(&pna->peer->up, r) + i; + NMR(&pna->peer->up, r)[i].pipe = NMR(na, t) + i; + } } } else { int i; /* case 2) above */ /* recover the hidden rings */ ND("%p: case 2, hidden rings", na); - for (i = 0; i < na->num_tx_rings + 1; i++) - na->tx_rings[i].ring = na->tx_rings[i].save_ring; - for (i = 0; i < na->num_rx_rings + 1; i++) - na->rx_rings[i].ring = na->rx_rings[i].save_ring; + for_rx_tx(t) { + for (i = 0; i < nma_get_nrings(na, t) + 1; i++) + NMR(na, t)[i].ring = NMR(na, t)[i].save_ring; + } } return 0; @@ -423,6 +412,8 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff) { struct netmap_pipe_adapter *pna = (struct netmap_pipe_adapter *)na; + enum txrx t; + ND("%p: onoff %d", na, onoff); if (onoff) { na->na_flags |= NAF_NETMAP_ON; @@ -443,11 +434,10 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff) netmap_adapter_get(na); pna->peer->peer_ref = 1; /* hide our rings from netmap_mem_rings_delete */ - for (i = 0; i < na->num_tx_rings + 1; i++) { - na->tx_rings[i].ring = NULL; - } - for (i = 0; i < na->num_rx_rings + 1; i++) { - na->rx_rings[i].ring = NULL; + for_rx_tx(t) { + for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { + NMR(na, t)[i].ring = NULL; + } } } return 0; @@ -481,6 +471,7 @@ netmap_pipe_krings_delete(struct netmap_adapter *na) (struct netmap_pipe_adapter *)na; struct netmap_adapter *ona; /* na of the other end */ int i; + enum txrx t; if (!pna->peer_ref) { ND("%p: case 2, kept alive by peer", na); @@ -496,10 +487,10 @@ netmap_pipe_krings_delete(struct netmap_adapter *na) * cleanup-after-error path */ return; } - for (i = 0; i < ona->num_tx_rings + 1; i++) - ona->tx_rings[i].ring = ona->tx_rings[i].save_ring; - for (i = 0; i < ona->num_rx_rings + 1; i++) - ona->rx_rings[i].ring = ona->rx_rings[i].save_ring; + for_rx_tx(t) { + for (i = 0; i < nma_get_nrings(ona, t) + 1; i++) + NMR(ona, t)[i].ring = NMR(ona, t)[i].save_ring; + } netmap_mem_rings_delete(ona); netmap_krings_delete(ona); } @@ -604,8 +595,6 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create) mna->up.nm_krings_delete = netmap_pipe_krings_delete; mna->up.nm_mem = pna->nm_mem; mna->up.na_lut = pna->na_lut; - mna->up.na_lut_objtotal = pna->na_lut_objtotal; - mna->up.na_lut_objsize = pna->na_lut_objsize; mna->up.num_tx_rings = 1; mna->up.num_rx_rings = 1; diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c index 6b1fe1fdf94b..c2af263c6351 100644 --- a/sys/dev/netmap/netmap_vale.c +++ b/sys/dev/netmap/netmap_vale.c @@ -222,6 +222,10 @@ struct nm_bridge { * the lookup function, and allocated on attach */ struct nm_hash_ent ht[NM_BDG_HASH]; + +#ifdef CONFIG_NET_NS + struct net *ns; +#endif /* CONFIG_NET_NS */ }; const char* @@ -234,12 +238,14 @@ netmap_bdg_name(struct netmap_vp_adapter *vp) } +#ifndef CONFIG_NET_NS /* * XXX in principle nm_bridges could be created dynamically * Right now we have a static array and deletions are protected * by an exclusive lock. */ -struct nm_bridge nm_bridges[NM_BRIDGES]; +struct nm_bridge *nm_bridges; +#endif /* !CONFIG_NET_NS */ /* @@ -283,10 +289,13 @@ static struct nm_bridge * nm_find_bridge(const char *name, int create) { int i, l, namelen; - struct nm_bridge *b = NULL; + struct nm_bridge *b = NULL, *bridges; + u_int num_bridges; NMG_LOCK_ASSERT(); + netmap_bns_getbridges(&bridges, &num_bridges); + namelen = strlen(NM_NAME); /* base length */ l = name ? strlen(name) : 0; /* actual length */ if (l < namelen) { @@ -304,8 +313,8 @@ nm_find_bridge(const char *name, int create) ND("--- prefix is '%.*s' ---", namelen, name); /* lookup the name, remember empty slot if there is one */ - for (i = 0; i < NM_BRIDGES; i++) { - struct nm_bridge *x = nm_bridges + i; + for (i = 0; i < num_bridges; i++) { + struct nm_bridge *x = bridges + i; if (x->bdg_active_ports == 0) { if (create && b == NULL) @@ -318,7 +327,7 @@ nm_find_bridge(const char *name, int create) break; } } - if (i == NM_BRIDGES && b) { /* name not found, can create entry */ + if (i == num_bridges && b) { /* name not found, can create entry */ /* initialize the bridge */ strncpy(b->bdg_basename, name, namelen); ND("create new bridge %s with ports %d", b->bdg_basename, @@ -331,6 +340,7 @@ nm_find_bridge(const char *name, int create) b->bdg_ops.lookup = netmap_bdg_learning; /* reset the MAC address table */ bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); + NM_BNS_GET(b); } return b; } @@ -373,7 +383,7 @@ nm_alloc_bdgfwd(struct netmap_adapter *na) l += sizeof(struct nm_bdg_q) * num_dstq; l += sizeof(uint16_t) * NM_BDG_BATCH_MAX; - nrings = netmap_real_tx_rings(na); + nrings = netmap_real_rings(na, NR_TX); kring = na->tx_rings; for (i = 0; i < nrings; i++) { struct nm_bdg_fwd *ft; @@ -458,6 +468,7 @@ netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) if (lim == 0) { ND("marking bridge %s as free", b->bdg_basename); bzero(&b->bdg_ops, sizeof(b->bdg_ops)); + NM_BNS_PUT(b); } } @@ -632,7 +643,7 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create) i = b->bdg_port_index[j]; vpna = b->bdg_ports[i]; // KASSERT(na != NULL); - D("checking %s", vpna->up.name); + ND("checking %s", vpna->up.name); if (!strcmp(vpna->up.name, nr_name)) { netmap_adapter_get(&vpna->up); ND("found existing if %s refs %d", nr_name) @@ -813,12 +824,15 @@ unlock_exit: int netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops) { - struct nm_bridge *b; + struct nm_bridge *b, *bridges; struct netmap_adapter *na; struct netmap_vp_adapter *vpna; char *name = nmr->nr_name; int cmd = nmr->nr_cmd, namelen = strlen(name); int error = 0, i, j; + u_int num_bridges; + + netmap_bns_getbridges(&bridges, &num_bridges); switch (cmd) { case NETMAP_BDG_NEWIF: @@ -852,7 +866,6 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops) break; } - name = name + b->bdg_namelen + 1; error = ENOENT; for (j = 0; j < b->bdg_active_ports; j++) { i = b->bdg_port_index[j]; @@ -866,7 +879,7 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops) */ if (!strcmp(vpna->up.name, name)) { /* bridge index */ - nmr->nr_arg1 = b - nm_bridges; + nmr->nr_arg1 = b - bridges; nmr->nr_arg2 = i; /* port index */ error = 0; break; @@ -886,7 +899,7 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops) NMG_LOCK(); for (error = ENOENT; i < NM_BRIDGES; i++) { - b = nm_bridges + i; + b = bridges + i; if (j >= b->bdg_active_ports) { j = 0; /* following bridges scan from 0 */ continue; @@ -984,7 +997,7 @@ netmap_vp_krings_create(struct netmap_adapter *na) u_int tailroom; int error, i; uint32_t *leases; - u_int nrx = netmap_real_rx_rings(na); + u_int nrx = netmap_real_rings(na, NR_RX); /* * Leases are attached to RX rings on vale ports @@ -1066,6 +1079,9 @@ nm_bdg_preflush(struct netmap_kring *kring, u_int end) ft[ft_i].ft_flags = slot->flags; ND("flags is 0x%x", slot->flags); + /* we do not use the buf changed flag, but we still need to reset it */ + slot->flags &= ~NS_BUF_CHANGED; + /* this slot goes into a list so initialize the link field */ ft[ft_i].ft_next = NM_FT_NULL; buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ? @@ -1180,7 +1196,7 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff) */ u_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, - const struct netmap_vp_adapter *na) + struct netmap_vp_adapter *na) { uint8_t *buf = ft->ft_buf; u_int buf_len = ft->ft_len; @@ -1211,11 +1227,11 @@ netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, * The hash is somewhat expensive, there might be some * worthwhile optimizations here. */ - if ((buf[6] & 1) == 0) { /* valid src */ + if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */ uint8_t *s = buf+6; sh = nm_bridge_rthash(s); // XXX hash of source /* update source port forwarding entry */ - ht[sh].mac = smac; /* XXX expire ? */ + na->last_smac = ht[sh].mac = smac; /* XXX expire ? */ ht[sh].ports = mysrc; if (netmap_verbose) D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d", @@ -1229,7 +1245,6 @@ netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, } /* XXX otherwise return NM_BDG_UNKNOWN ? */ } - *dst_ring = 0; return dst; } @@ -1475,7 +1490,7 @@ retry: if (dst_na->retry && retry) { /* try to get some free slot from the previous run */ - dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0); + kring->nm_notify(kring, 0); /* actually useful only for bwraps, since there * the notify will trigger a txsync on the hwna. VALE ports * have dst_na->retry == 0 @@ -1616,7 +1631,7 @@ retry: kring->nr_hwtail = j; still_locked = 0; mtx_unlock(&kring->q_lock); - dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0); + kring->nm_notify(kring, 0); /* this is netmap_notify for VALE ports and * netmap_bwrap_notify for bwrap. The latter will * trigger a txsync on the underlying hwna @@ -1649,29 +1664,28 @@ netmap_vp_txsync(struct netmap_kring *kring, int flags) (struct netmap_vp_adapter *)kring->na; u_int done; u_int const lim = kring->nkr_num_slots - 1; - u_int const cur = kring->rcur; + u_int const head = kring->rhead; if (bridge_batch <= 0) { /* testing only */ - done = cur; // used all + done = head; // used all goto done; } if (!na->na_bdg) { - done = cur; + done = head; goto done; } if (bridge_batch > NM_BDG_BATCH) bridge_batch = NM_BDG_BATCH; - done = nm_bdg_preflush(kring, cur); + done = nm_bdg_preflush(kring, head); done: - if (done != cur) - D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail); + if (done != head) + D("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail); /* * packets between 'done' and 'cur' are left unsent. */ kring->nr_hwcur = done; kring->nr_hwtail = nm_prev(done, lim); - nm_txsync_finalize(kring); if (netmap_verbose) D("%s ring %d flags %d", na->up.name, kring->ring_id, flags); return 0; @@ -1687,7 +1701,7 @@ netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags) struct netmap_adapter *na = kring->na; struct netmap_ring *ring = kring->ring; u_int nm_i, lim = kring->nkr_num_slots - 1; - u_int head = nm_rxsync_prologue(kring); + u_int head = kring->rhead; int n; if (head > lim) { @@ -1717,8 +1731,6 @@ netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags) kring->nr_hwcur = head; } - /* tell userspace that there are new packets */ - nm_rxsync_finalize(kring); n = 0; done: return n; @@ -1804,12 +1816,13 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter na->num_rx_desc = nmr->nr_rx_slots; vpna->virt_hdr_len = 0; vpna->mfs = 1514; + vpna->last_smac = ~0llu; /*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero?? vpna->mfs = netmap_buf_size; */ if (netmap_verbose) D("max frame size %u", vpna->mfs); - na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER; + na->na_flags |= NAF_BDG_MAYSLEEP; na->nm_txsync = netmap_vp_txsync; na->nm_rxsync = netmap_vp_rxsync; na->nm_register = netmap_vp_reg; @@ -1832,7 +1845,7 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter err: if (na->nm_mem != NULL) - netmap_mem_private_delete(na->nm_mem); + netmap_mem_delete(na->nm_mem); free(vpna, M_DEVBUF); return error; } @@ -1913,75 +1926,35 @@ netmap_bwrap_dtor(struct netmap_adapter *na) * The bridge wrapper then sends the packets through the bridge. */ static int -netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags) +netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags) { + struct netmap_adapter *na = kring->na; struct netmap_bwrap_adapter *bna = na->na_private; - struct netmap_vp_adapter *hostna = &bna->host; - struct netmap_kring *kring, *bkring; + struct netmap_kring *bkring; struct netmap_ring *ring; - int is_host_ring = ring_nr == na->num_rx_rings; struct netmap_vp_adapter *vpna = &bna->up; + u_int ring_nr = kring->ring_id; int error = 0; if (netmap_verbose) - D("%s %s%d 0x%x", na->name, - (tx == NR_TX ? "TX" : "RX"), ring_nr, flags); - - if (flags & NAF_DISABLE_NOTIFY) { - /* the enabled/disabled state of the ring has changed, - * propagate the info to the wrapper (with tx/rx swapped) - */ - if (tx == NR_TX) { - netmap_set_rxring(&vpna->up, ring_nr, - na->tx_rings[ring_nr].nkr_stopped); - } else { - netmap_set_txring(&vpna->up, ring_nr, - na->rx_rings[ring_nr].nkr_stopped); - } - return 0; - } + D("%s %s 0x%x", na->name, kring->name, flags); if (!nm_netmap_on(na)) return 0; - /* we only care about receive interrupts */ - if (tx == NR_TX) - return 0; - - kring = &na->rx_rings[ring_nr]; - ring = kring->ring; + bkring = &vpna->up.tx_rings[ring_nr]; + ring = kring->ring; /* == kbkring->ring */ /* make sure the ring is not disabled */ if (nm_kr_tryget(kring)) return 0; - if (is_host_ring && hostna->na_bdg == NULL) { - error = bna->save_notify(na, ring_nr, tx, flags); - goto put_out; - } - - /* Here we expect ring->head = ring->cur = ring->tail - * because everything has been released from the previous round. - * However the ring is shared and we might have info from - * the wrong side (the tx ring). Hence we overwrite with - * the info from the rx kring. - */ if (netmap_verbose) - D("%s head %d cur %d tail %d (kring %d %d %d)", na->name, - ring->head, ring->cur, ring->tail, + D("%s head %d cur %d tail %d", na->name, kring->rhead, kring->rcur, kring->rtail); - ring->head = kring->rhead; - ring->cur = kring->rcur; - ring->tail = kring->rtail; - - if (is_host_ring) { - vpna = hostna; - ring_nr = 0; - } - /* simulate a user wakeup on the rx ring */ - /* fetch packets that have arrived. - * XXX maybe do this in a loop ? + /* simulate a user wakeup on the rx ring + * fetch packets that have arrived. */ error = kring->nm_sync(kring, 0); if (error) @@ -1992,33 +1965,18 @@ netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, goto put_out; } - /* new packets are ring->cur to ring->tail, and the bkring - * had hwcur == ring->cur. So advance ring->cur to ring->tail + /* new packets are kring->rcur to kring->nr_hwtail, and the bkring + * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail * to push all packets out. */ - ring->head = ring->cur = ring->tail; - - /* also set tail to what the bwrap expects */ - bkring = &vpna->up.tx_rings[ring_nr]; - ring->tail = bkring->nr_hwtail; // rtail too ? + bkring->rhead = bkring->rcur = kring->nr_hwtail; - /* pass packets to the switch */ - nm_txsync_prologue(bkring); // XXX error checking ? netmap_vp_txsync(bkring, flags); /* mark all buffers as released on this ring */ - ring->head = ring->cur = kring->nr_hwtail; - ring->tail = kring->rtail; + kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail; /* another call to actually release the buffers */ - if (!is_host_ring) { - error = kring->nm_sync(kring, 0); - } else { - /* mark all packets as released, as in the - * second part of netmap_rxsync_from_host() - */ - kring->nr_hwcur = kring->nr_hwtail; - nm_rxsync_finalize(kring); - } + error = kring->nm_sync(kring, 0); put_out: nm_kr_put(kring); @@ -2035,6 +1993,7 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff) struct netmap_adapter *hwna = bna->hwna; struct netmap_vp_adapter *hostna = &bna->host; int error; + enum txrx t; ND("%s %s", na->name, onoff ? "on" : "off"); @@ -2047,8 +2006,6 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff) * putting it in netmap mode */ hwna->na_lut = na->na_lut; - hwna->na_lut_objtotal = na->na_lut_objtotal; - hwna->na_lut_objsize = na->na_lut_objsize; if (hostna->na_bdg) { /* if the host rings have been attached to switch, @@ -2056,8 +2013,6 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff) * in the hostna also */ hostna->up.na_lut = na->na_lut; - hostna->up.na_lut_objtotal = na->na_lut_objtotal; - hostna->up.na_lut_objsize = na->na_lut_objsize; } /* cross-link the netmap rings @@ -2066,13 +2021,12 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff) * We need to do this now, after the initialization * of the kring->ring pointers */ - for (i = 0; i < na->num_rx_rings + 1; i++) { - hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots; - hwna->tx_rings[i].ring = na->rx_rings[i].ring; - } - for (i = 0; i < na->num_tx_rings + 1; i++) { - hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots; - hwna->rx_rings[i].ring = na->tx_rings[i].ring; + for_rx_tx(t) { + enum txrx r= nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ + for (i = 0; i < nma_get_nrings(na, r) + 1; i++) { + NMR(hwna, t)[i].nkr_num_slots = NMR(na, r)[i].nkr_num_slots; + NMR(hwna, t)[i].ring = NMR(na, r)[i].ring; + } } } @@ -2087,14 +2041,29 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff) netmap_vp_reg(&hostna->up, onoff); if (onoff) { - /* intercept the hwna nm_nofify callback */ - bna->save_notify = hwna->nm_notify; - hwna->nm_notify = netmap_bwrap_intr_notify; + u_int i; + /* intercept the hwna nm_nofify callback on the hw rings */ + for (i = 0; i < hwna->num_rx_rings; i++) { + hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify; + hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify; + } + i = hwna->num_rx_rings; /* for safety */ + /* save the host ring notify unconditionally */ + hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify; + if (hostna->na_bdg) { + /* also intercept the host ring notify */ + hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify; + } } else { - hwna->nm_notify = bna->save_notify; - hwna->na_lut = NULL; - hwna->na_lut_objtotal = 0; - hwna->na_lut_objsize = 0; + u_int i; + /* reset all notify callbacks (including host ring) */ + for (i = 0; i <= hwna->num_rx_rings; i++) { + hwna->rx_rings[i].nm_notify = hwna->rx_rings[i].save_notify; + hwna->rx_rings[i].save_notify = NULL; + } + hwna->na_lut.lut = NULL; + hwna->na_lut.objtotal = 0; + hwna->na_lut.objsize = 0; } return 0; @@ -2154,9 +2123,9 @@ netmap_bwrap_krings_create(struct netmap_adapter *na) * The corresponding krings must point back to the * hostna */ - hostna->tx_rings = na->tx_rings + na->num_tx_rings; + hostna->tx_rings = &na->tx_rings[na->num_tx_rings]; hostna->tx_rings[0].na = hostna; - hostna->rx_rings = na->rx_rings + na->num_rx_rings; + hostna->rx_rings = &na->rx_rings[na->num_rx_rings]; hostna->rx_rings[0].na = hostna; } @@ -2180,74 +2149,59 @@ netmap_bwrap_krings_delete(struct netmap_adapter *na) /* notify method for the bridge-->hwna direction */ static int -netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags) +netmap_bwrap_notify(struct netmap_kring *kring, int flags) { - struct netmap_bwrap_adapter *bna = - (struct netmap_bwrap_adapter *)na; + struct netmap_adapter *na = kring->na; + struct netmap_bwrap_adapter *bna = na->na_private; struct netmap_adapter *hwna = bna->hwna; - struct netmap_kring *kring, *hw_kring; - struct netmap_ring *ring; - u_int lim; + u_int ring_n = kring->ring_id; + u_int lim = kring->nkr_num_slots - 1; + struct netmap_kring *hw_kring; int error = 0; - if (tx == NR_TX) - return EINVAL; - - kring = &na->rx_rings[ring_n]; + ND("%s: na %s hwna %s", + (kring ? kring->name : "NULL!"), + (na ? na->name : "NULL!"), + (hwna ? hwna->name : "NULL!")); hw_kring = &hwna->tx_rings[ring_n]; - ring = kring->ring; - lim = kring->nkr_num_slots - 1; + + if (nm_kr_tryget(hw_kring)) + return 0; if (!nm_netmap_on(hwna)) return 0; - mtx_lock(&kring->q_lock); /* first step: simulate a user wakeup on the rx ring */ - netmap_vp_rxsync_locked(kring, flags); + netmap_vp_rxsync(kring, flags); ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", na->name, ring_n, kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, ring->head, ring->cur, ring->tail, hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail); - /* second step: the simulated user consumes all new packets */ - ring->head = ring->cur = ring->tail; - - /* third step: the new packets are sent on the tx ring + /* second step: the new packets are sent on the tx ring * (which is actually the same ring) */ - /* set tail to what the hw expects */ - ring->tail = hw_kring->rtail; - nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ? + hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail; error = hw_kring->nm_sync(hw_kring, flags); + if (error) + goto out; - /* fourth step: now we are back the rx ring */ + /* third step: now we are back the rx ring */ /* claim ownership on all hw owned bufs */ - ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */ - ring->tail = kring->rtail; /* restore saved value of tail, for safety */ + kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */ - /* fifth step: the user goes to sleep again, causing another rxsync */ - netmap_vp_rxsync_locked(kring, flags); + /* fourth step: the user goes to sleep again, causing another rxsync */ + netmap_vp_rxsync(kring, flags); ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", na->name, ring_n, kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, ring->head, ring->cur, ring->tail, hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); - mtx_unlock(&kring->q_lock); +out: + nm_kr_put(hw_kring); return error; } -/* notify method for the bridge-->host-rings path */ -static int -netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags) -{ - struct netmap_bwrap_adapter *bna = na->na_private; - struct netmap_adapter *port_na = &bna->up.up; - if (tx == NR_TX || ring_n != 0) - return EINVAL; - return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags); -} - - /* nm_bdg_ctl callback for the bwrap. * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd]. * On attach, it needs to provide a fake netmap_priv_d structure and @@ -2261,7 +2215,6 @@ netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) { struct netmap_priv_d *npriv; struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; - struct netmap_if *nifp; int error = 0; if (attach) { @@ -2275,8 +2228,8 @@ netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO); if (npriv == NULL) return ENOMEM; - nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags, &error); - if (!nifp) { + error = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags); + if (error) { bzero(npriv, sizeof(*npriv)); free(npriv, M_DEVBUF); return error; @@ -2323,6 +2276,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) struct netmap_adapter *na = NULL; struct netmap_adapter *hostna = NULL; int error = 0; + enum txrx t; /* make sure the NIC is not already in use */ if (NETMAP_OWNED_BY_ANY(hwna)) { @@ -2336,15 +2290,17 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) } na = &bna->up.up; + na->na_private = bna; strncpy(na->name, nr_name, sizeof(na->name)); /* fill the ring data for the bwrap adapter with rx/tx meanings * swapped. The real cross-linking will be done during register, * when all the krings will have been created. */ - na->num_rx_rings = hwna->num_tx_rings; - na->num_tx_rings = hwna->num_rx_rings; - na->num_tx_desc = hwna->num_rx_desc; - na->num_rx_desc = hwna->num_tx_desc; + for_rx_tx(t) { + enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ + nma_set_nrings(na, t, nma_get_nrings(hwna, r)); + nma_set_ndesc(na, t, nma_get_ndesc(hwna, r)); + } na->nm_dtor = netmap_bwrap_dtor; na->nm_register = netmap_bwrap_register; // na->nm_txsync = netmap_bwrap_txsync; @@ -2376,13 +2332,14 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) hostna = &bna->host.up; snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name); hostna->ifp = hwna->ifp; - hostna->num_tx_rings = 1; - hostna->num_tx_desc = hwna->num_rx_desc; - hostna->num_rx_rings = 1; - hostna->num_rx_desc = hwna->num_tx_desc; + for_rx_tx(t) { + enum txrx r = nm_txrx_swap(t); + nma_set_nrings(hostna, t, 1); + nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r)); + } // hostna->nm_txsync = netmap_bwrap_host_txsync; // hostna->nm_rxsync = netmap_bwrap_host_rxsync; - hostna->nm_notify = netmap_bwrap_host_notify; + hostna->nm_notify = netmap_bwrap_notify; hostna->nm_mem = na->nm_mem; hostna->na_private = bna; hostna->na_vp = &bna->up; @@ -2416,7 +2373,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) return 0; err_free: - netmap_mem_private_delete(na->nm_mem); + netmap_mem_delete(na->nm_mem); err_put: hwna->na_vp = hwna->na_hostvp = NULL; netmap_adapter_put(hwna); @@ -2425,13 +2382,54 @@ err_put: } +struct nm_bridge * +netmap_init_bridges2(u_int n) +{ + int i; + struct nm_bridge *b; + + b = malloc(sizeof(struct nm_bridge) * n, M_DEVBUF, + M_NOWAIT | M_ZERO); + if (b == NULL) + return NULL; + for (i = 0; i < n; i++) + BDG_RWINIT(&b[i]); + return b; +} void -netmap_init_bridges(void) +netmap_uninit_bridges2(struct nm_bridge *b, u_int n) { int i; - bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */ - for (i = 0; i < NM_BRIDGES; i++) - BDG_RWINIT(&nm_bridges[i]); + + if (b == NULL) + return; + + for (i = 0; i < n; i++) + BDG_RWDESTROY(&b[i]); + free(b, M_DEVBUF); +} + +int +netmap_init_bridges(void) +{ +#ifdef CONFIG_NET_NS + return netmap_bns_register(); +#else + nm_bridges = netmap_init_bridges2(NM_BRIDGES); + if (nm_bridges == NULL) + return ENOMEM; + return 0; +#endif +} + +void +netmap_uninit_bridges(void) +{ +#ifdef CONFIG_NET_NS + netmap_bns_unregister(); +#else + netmap_uninit_bridges2(nm_bridges, NM_BRIDGES); +#endif } #endif /* WITH_VALE */ diff --git a/sys/dev/re/if_re.c b/sys/dev/re/if_re.c index a67d367cfdf9..677d397a1c7a 100644 --- a/sys/dev/re/if_re.c +++ b/sys/dev/re/if_re.c @@ -304,6 +304,7 @@ static void re_set_linkspeed (struct rl_softc *); #ifdef DEV_NETMAP /* see ixgbe.c for details */ #include <dev/netmap/if_re_netmap.h> +MODULE_DEPEND(re, netmap, 1, 1, 1); #endif /* !DEV_NETMAP */ #ifdef RE_DIAG diff --git a/sys/net/netmap.h b/sys/net/netmap.h index 1203bfb37fff..88b2957502ab 100644 --- a/sys/net/netmap.h +++ b/sys/net/netmap.h @@ -157,6 +157,11 @@ struct netmap_slot { /* * must be set whenever buf_idx is changed (as it might be * necessary to recompute the physical address and mapping) + * + * It is also set by the kernel whenever the buf_idx is + * changed internally (e.g., by pipes). Applications may + * use this information to know when they can reuse the + * contents of previously prepared buffers. */ #define NS_REPORT 0x0002 /* ask the hardware to report results */ @@ -513,6 +518,9 @@ enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */ /* monitor uses the NR_REG to select the rings to monitor */ #define NR_MONITOR_TX 0x100 #define NR_MONITOR_RX 0x200 +#define NR_ZCOPY_MON 0x400 +/* request exclusive access to the selected rings */ +#define NR_EXCLUSIVE 0x800 /* diff --git a/sys/net/netmap_user.h b/sys/net/netmap_user.h index aab6c358de73..130117db7a2e 100644 --- a/sys/net/netmap_user.h +++ b/sys/net/netmap_user.h @@ -284,6 +284,12 @@ typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d); * -NN bind individual NIC ring pair * {NN bind master side of pipe NN * }NN bind slave side of pipe NN + * a suffix starting with + and the following flags, + * in any order: + * x exclusive access + * z zero copy monitor + * t monitor tx side + * r monitor rx side * * req provides the initial values of nmreq before parsing ifname. * Remember that the ifname parsing will override the ring @@ -351,9 +357,12 @@ nm_open(const char *ifname, const struct nmreq *req, struct nm_desc *d = NULL; const struct nm_desc *parent = arg; u_int namelen; - uint32_t nr_ringid = 0, nr_flags; + uint32_t nr_ringid = 0, nr_flags, nr_reg; const char *port = NULL; - const char *errmsg = NULL; +#define MAXERRMSG 80 + char errmsg[MAXERRMSG] = ""; + enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK } p_state; + long num; if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) { errno = 0; /* name not recognised, not an error */ @@ -362,60 +371,112 @@ nm_open(const char *ifname, const struct nmreq *req, if (ifname[0] == 'n') ifname += 7; /* scan for a separator */ - for (port = ifname; *port && !index("-*^{}", *port); port++) + for (port = ifname; *port && !index("-*^{}/", *port); port++) ; namelen = port - ifname; if (namelen >= sizeof(d->req.nr_name)) { - errmsg = "name too long"; + snprintf(errmsg, MAXERRMSG, "name too long"); goto fail; } - switch (*port) { - default: /* '\0', no suffix */ - nr_flags = NR_REG_ALL_NIC; - break; - case '-': /* one NIC */ - nr_flags = NR_REG_ONE_NIC; - nr_ringid = atoi(port + 1); - break; - case '*': /* NIC and SW, ignore port */ - nr_flags = NR_REG_NIC_SW; - if (port[1]) { - errmsg = "invalid port for nic+sw"; - goto fail; - } - break; - case '^': /* only sw ring */ - nr_flags = NR_REG_SW; - if (port[1]) { - errmsg = "invalid port for sw ring"; - goto fail; + p_state = P_START; + nr_flags = NR_REG_ALL_NIC; /* default for no suffix */ + while (*port) { + switch (p_state) { + case P_START: + switch (*port) { + case '^': /* only SW ring */ + nr_flags = NR_REG_SW; + p_state = P_RNGSFXOK; + break; + case '*': /* NIC and SW */ + nr_flags = NR_REG_NIC_SW; + p_state = P_RNGSFXOK; + break; + case '-': /* one NIC ring pair */ + nr_flags = NR_REG_ONE_NIC; + p_state = P_GETNUM; + break; + case '{': /* pipe (master endpoint) */ + nr_flags = NR_REG_PIPE_MASTER; + p_state = P_GETNUM; + break; + case '}': /* pipe (slave endoint) */ + nr_flags = NR_REG_PIPE_SLAVE; + p_state = P_GETNUM; + break; + case '/': /* start of flags */ + p_state = P_FLAGS; + break; + default: + snprintf(errmsg, MAXERRMSG, "unknown modifier: '%c'", *port); + goto fail; + } + port++; + break; + case P_RNGSFXOK: + switch (*port) { + case '/': + p_state = P_FLAGS; + break; + default: + snprintf(errmsg, MAXERRMSG, "unexpected character: '%c'", *port); + goto fail; + } + port++; + break; + case P_GETNUM: + num = strtol(port, (char **)&port, 10); + if (num < 0 || num >= NETMAP_RING_MASK) { + snprintf(errmsg, MAXERRMSG, "'%ld' out of range [0, %d)", + num, NETMAP_RING_MASK); + goto fail; + } + nr_ringid = num & NETMAP_RING_MASK; + p_state = P_RNGSFXOK; + break; + case P_FLAGS: + case P_FLAGSOK: + switch (*port) { + case 'x': + nr_flags |= NR_EXCLUSIVE; + break; + case 'z': + nr_flags |= NR_ZCOPY_MON; + break; + case 't': + nr_flags |= NR_MONITOR_TX; + break; + case 'r': + nr_flags |= NR_MONITOR_RX; + break; + default: + snprintf(errmsg, MAXERRMSG, "unrecognized flag: '%c'", *port); + goto fail; + } + port++; + p_state = P_FLAGSOK; + break; } - break; - case '{': - nr_flags = NR_REG_PIPE_MASTER; - nr_ringid = atoi(port + 1); - break; - case '}': - nr_flags = NR_REG_PIPE_SLAVE; - nr_ringid = atoi(port + 1); - break; } - - if (nr_ringid >= NETMAP_RING_MASK) { - errmsg = "invalid ringid"; + if (p_state != P_START && p_state != P_RNGSFXOK && p_state != P_FLAGSOK) { + snprintf(errmsg, MAXERRMSG, "unexpected end of port name"); goto fail; } - + ND("flags: %s %s %s %s", + (nr_flags & NR_EXCLUSIVE) ? "EXCLUSIVE" : "", + (nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "", + (nr_flags & NR_MONITOR_TX) ? "MONITOR_TX" : "", + (nr_flags & NR_MONITOR_RX) ? "MONITOR_RX" : ""); d = (struct nm_desc *)calloc(1, sizeof(*d)); if (d == NULL) { - errmsg = "nm_desc alloc failure"; + snprintf(errmsg, MAXERRMSG, "nm_desc alloc failure"); errno = ENOMEM; return NULL; } d->self = d; /* set this early so nm_close() works */ d->fd = open("/dev/netmap", O_RDWR); if (d->fd < 0) { - errmsg = "cannot open /dev/netmap"; + snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno)); goto fail; } @@ -464,7 +525,7 @@ nm_open(const char *ifname, const struct nmreq *req, d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL); if (ioctl(d->fd, NIOCREGIF, &d->req)) { - errmsg = "NIOCREGIF failed"; + snprintf(errmsg, MAXERRMSG, "NIOCREGIF failed: %s", strerror(errno)); goto fail; } @@ -479,7 +540,7 @@ nm_open(const char *ifname, const struct nmreq *req, d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, d->fd, 0); if (d->mem == MAP_FAILED) { - errmsg = "mmap failed"; + snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno)); goto fail; } d->done_mmap = 1; @@ -495,20 +556,22 @@ nm_open(const char *ifname, const struct nmreq *req, (char *)d->mem + d->memsize; } - if (d->req.nr_flags == NR_REG_SW) { /* host stack */ + nr_reg = d->req.nr_flags & NR_REG_MASK; + + if (nr_reg == NR_REG_SW) { /* host stack */ d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings; d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings; - } else if (d->req.nr_flags == NR_REG_ALL_NIC) { /* only nic */ + } else if (nr_reg == NR_REG_ALL_NIC) { /* only nic */ d->first_tx_ring = 0; d->first_rx_ring = 0; d->last_tx_ring = d->req.nr_tx_rings - 1; d->last_rx_ring = d->req.nr_rx_rings - 1; - } else if (d->req.nr_flags == NR_REG_NIC_SW) { + } else if (nr_reg == NR_REG_NIC_SW) { d->first_tx_ring = 0; d->first_rx_ring = 0; d->last_tx_ring = d->req.nr_tx_rings; d->last_rx_ring = d->req.nr_rx_rings; - } else if (d->req.nr_flags == NR_REG_ONE_NIC) { + } else if (nr_reg == NR_REG_ONE_NIC) { /* XXX check validity */ d->first_tx_ring = d->last_tx_ring = d->first_rx_ring = d->last_rx_ring = d->req.nr_ringid & NETMAP_RING_MASK; @@ -541,7 +604,7 @@ nm_open(const char *ifname, const struct nmreq *req, fail: nm_close(d); - if (errmsg) + if (errmsg[0]) D("%s %s", errmsg, ifname); if (errno == 0) errno = EINVAL; |