aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/netmap/netmap_freebsd.c
diff options
context:
space:
mode:
authorLuigi Rizzo <luigi@FreeBSD.org>2016-10-16 14:13:32 +0000
committerLuigi Rizzo <luigi@FreeBSD.org>2016-10-16 14:13:32 +0000
commit37e3a6d349581b4dd0aebf24be7b1b159a698dcf (patch)
tree0e61deea141c9733af511b0485cf1fd0f2dd17ed /sys/dev/netmap/netmap_freebsd.c
parent63f6b1a75a8e6e33e4f9d65571c6a221444d3b05 (diff)
Notes
Diffstat (limited to 'sys/dev/netmap/netmap_freebsd.c')
-rw-r--r--sys/dev/netmap/netmap_freebsd.c762
1 files changed, 701 insertions, 61 deletions
diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c
index 8490ae85670b..20ea5c8f2972 100644
--- a/sys/dev/netmap/netmap_freebsd.c
+++ b/sys/dev/netmap/netmap_freebsd.c
@@ -33,8 +33,9 @@
#include <sys/param.h> /* defines used in kernel.h */
#include <sys/poll.h> /* POLLIN, POLLOUT */
#include <sys/kernel.h> /* types used in module initialization */
-#include <sys/conf.h> /* DEV_MODULE */
+#include <sys/conf.h> /* DEV_MODULE_ORDERED */
#include <sys/endian.h>
+#include <sys/syscallsubr.h> /* kern_ioctl() */
#include <sys/rwlock.h>
@@ -50,6 +51,11 @@
#include <sys/malloc.h>
#include <sys/socket.h> /* sockaddrs */
#include <sys/selinfo.h>
+#include <sys/kthread.h> /* kthread_add() */
+#include <sys/proc.h> /* PROC_LOCK() */
+#include <sys/unistd.h> /* RFNOWAIT */
+#include <sys/sched.h> /* sched_bind() */
+#include <sys/smp.h> /* mp_maxid */
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_types.h> /* IFT_ETHER */
@@ -61,13 +67,94 @@
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
+#include <net/netmap_virt.h>
#include <dev/netmap/netmap_mem2.h>
/* ======================== FREEBSD-SPECIFIC ROUTINES ================== */
+void nm_os_selinfo_init(NM_SELINFO_T *si) {
+ struct mtx *m = &si->m;
+ mtx_init(m, "nm_kn_lock", NULL, MTX_DEF);
+ knlist_init_mtx(&si->si.si_note, m);
+}
+
+void
+nm_os_selinfo_uninit(NM_SELINFO_T *si)
+{
+ /* XXX kqueue(9) needed; these will mirror knlist_init. */
+ knlist_delete(&si->si.si_note, curthread, 0 /* not locked */ );
+ knlist_destroy(&si->si.si_note);
+ /* now we don't need the mutex anymore */
+ mtx_destroy(&si->m);
+}
+
+void
+nm_os_ifnet_lock(void)
+{
+ IFNET_WLOCK();
+}
+
+void
+nm_os_ifnet_unlock(void)
+{
+ IFNET_WUNLOCK();
+}
+
+static int netmap_use_count = 0;
+
+void
+nm_os_get_module(void)
+{
+ netmap_use_count++;
+}
+
+void
+nm_os_put_module(void)
+{
+ netmap_use_count--;
+}
+
+static void
+netmap_ifnet_arrival_handler(void *arg __unused, struct ifnet *ifp)
+{
+ netmap_undo_zombie(ifp);
+}
+
+static void
+netmap_ifnet_departure_handler(void *arg __unused, struct ifnet *ifp)
+{
+ netmap_make_zombie(ifp);
+}
+
+static eventhandler_tag nm_ifnet_ah_tag;
+static eventhandler_tag nm_ifnet_dh_tag;
+
+int
+nm_os_ifnet_init(void)
+{
+ nm_ifnet_ah_tag =
+ EVENTHANDLER_REGISTER(ifnet_arrival_event,
+ netmap_ifnet_arrival_handler,
+ NULL, EVENTHANDLER_PRI_ANY);
+ nm_ifnet_dh_tag =
+ EVENTHANDLER_REGISTER(ifnet_departure_event,
+ netmap_ifnet_departure_handler,
+ NULL, EVENTHANDLER_PRI_ANY);
+ return 0;
+}
+
+void
+nm_os_ifnet_fini(void)
+{
+ EVENTHANDLER_DEREGISTER(ifnet_arrival_event,
+ nm_ifnet_ah_tag);
+ EVENTHANDLER_DEREGISTER(ifnet_departure_event,
+ nm_ifnet_dh_tag);
+}
+
rawsum_t
-nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
+nm_os_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
{
/* TODO XXX please use the FreeBSD implementation for this. */
uint16_t *words = (uint16_t *)data;
@@ -87,7 +174,7 @@ nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
* return value is in network byte order.
*/
uint16_t
-nm_csum_fold(rawsum_t cur_sum)
+nm_os_csum_fold(rawsum_t cur_sum)
{
/* TODO XXX please use the FreeBSD implementation for this. */
while (cur_sum >> 16)
@@ -96,17 +183,17 @@ nm_csum_fold(rawsum_t cur_sum)
return htobe16((~cur_sum) & 0xFFFF);
}
-uint16_t nm_csum_ipv4(struct nm_iphdr *iph)
+uint16_t nm_os_csum_ipv4(struct nm_iphdr *iph)
{
#if 0
return in_cksum_hdr((void *)iph);
#else
- return nm_csum_fold(nm_csum_raw((uint8_t*)iph, sizeof(struct nm_iphdr), 0));
+ return nm_os_csum_fold(nm_os_csum_raw((uint8_t*)iph, sizeof(struct nm_iphdr), 0));
#endif
}
void
-nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
+nm_os_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
size_t datalen, uint16_t *check)
{
#ifdef INET
@@ -118,7 +205,7 @@ nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
/* Compute the checksum on TCP/UDP header + payload
* (includes the pseudo-header).
*/
- *check = nm_csum_fold(nm_csum_raw(data, datalen, 0));
+ *check = nm_os_csum_fold(nm_os_csum_raw(data, datalen, 0));
#else
static int notsupported = 0;
if (!notsupported) {
@@ -129,12 +216,12 @@ nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
}
void
-nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
+nm_os_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
size_t datalen, uint16_t *check)
{
#ifdef INET6
*check = in6_cksum_pseudo((void*)ip6h, datalen, ip6h->nexthdr, 0);
- *check = nm_csum_fold(nm_csum_raw(data, datalen, 0));
+ *check = nm_os_csum_fold(nm_os_csum_raw(data, datalen, 0));
#else
static int notsupported = 0;
if (!notsupported) {
@@ -144,13 +231,41 @@ nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
#endif
}
+/* on FreeBSD we send up one packet at a time */
+void *
+nm_os_send_up(struct ifnet *ifp, struct mbuf *m, struct mbuf *prev)
+{
+
+ NA(ifp)->if_input(ifp, m);
+ return NULL;
+}
+
+int
+nm_os_mbuf_has_offld(struct mbuf *m)
+{
+ return m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_SCTP |
+ CSUM_TCP_IPV6 | CSUM_UDP_IPV6 |
+ CSUM_SCTP_IPV6 | CSUM_TSO);
+}
+
+static void
+freebsd_generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
+{
+ struct netmap_generic_adapter *gna =
+ (struct netmap_generic_adapter *)NA(ifp);
+ int stolen = generic_rx_handler(ifp, m);
+
+ if (!stolen) {
+ gna->save_if_input(ifp, m);
+ }
+}
/*
* Intercept the rx routine in the standard device driver.
* Second argument is non-zero to intercept, 0 to restore
*/
int
-netmap_catch_rx(struct netmap_generic_adapter *gna, int intercept)
+nm_os_catch_rx(struct netmap_generic_adapter *gna, int intercept)
{
struct netmap_adapter *na = &gna->up.up;
struct ifnet *ifp = na->ifp;
@@ -161,7 +276,7 @@ netmap_catch_rx(struct netmap_generic_adapter *gna, int intercept)
return EINVAL; /* already set */
}
gna->save_if_input = ifp->if_input;
- ifp->if_input = generic_rx_handler;
+ ifp->if_input = freebsd_generic_rx_handler;
} else {
if (!gna->save_if_input){
D("cannot restore");
@@ -181,18 +296,20 @@ netmap_catch_rx(struct netmap_generic_adapter *gna, int intercept)
* Second argument is non-zero to intercept, 0 to restore.
* On freebsd we just intercept if_transmit.
*/
-void
-netmap_catch_tx(struct netmap_generic_adapter *gna, int enable)
+int
+nm_os_catch_tx(struct netmap_generic_adapter *gna, int intercept)
{
struct netmap_adapter *na = &gna->up.up;
struct ifnet *ifp = netmap_generic_getifp(gna);
- if (enable) {
+ if (intercept) {
na->if_transmit = ifp->if_transmit;
ifp->if_transmit = netmap_transmit;
} else {
ifp->if_transmit = na->if_transmit;
}
+
+ return 0;
}
@@ -213,40 +330,44 @@ netmap_catch_tx(struct netmap_generic_adapter *gna, int enable)
*
*/
int
-generic_xmit_frame(struct ifnet *ifp, struct mbuf *m,
- void *addr, u_int len, u_int ring_nr)
+nm_os_generic_xmit_frame(struct nm_os_gen_arg *a)
{
int ret;
+ u_int len = a->len;
+ struct ifnet *ifp = a->ifp;
+ struct mbuf *m = a->m;
+#if __FreeBSD_version < 1100000
/*
- * The mbuf should be a cluster from our special pool,
- * so we do not need to do an m_copyback but just copy
- * (and eventually, just reference the netmap buffer)
+ * Old FreeBSD versions. The mbuf has a cluster attached,
+ * we need to copy from the cluster to the netmap buffer.
*/
-
- if (GET_MBUF_REFCNT(m) != 1) {
- D("invalid refcnt %d for %p",
- GET_MBUF_REFCNT(m), m);
+ if (MBUF_REFCNT(m) != 1) {
+ D("invalid refcnt %d for %p", MBUF_REFCNT(m), m);
panic("in generic_xmit_frame");
}
- // XXX the ext_size check is unnecessary if we link the netmap buf
if (m->m_ext.ext_size < len) {
RD(5, "size %d < len %d", m->m_ext.ext_size, len);
len = m->m_ext.ext_size;
}
- if (0) { /* XXX seems to have negligible benefits */
- m->m_ext.ext_buf = m->m_data = addr;
- } else {
- bcopy(addr, m->m_data, len);
- }
+ bcopy(a->addr, m->m_data, len);
+#else /* __FreeBSD_version >= 1100000 */
+ /* New FreeBSD versions. Link the external storage to
+ * the netmap buffer, so that no copy is necessary. */
+ m->m_ext.ext_buf = m->m_data = a->addr;
+ m->m_ext.ext_size = len;
+#endif /* __FreeBSD_version >= 1100000 */
+
m->m_len = m->m_pkthdr.len = len;
- // inc refcount. All ours, we could skip the atomic
- atomic_fetchadd_int(PNT_MBUF_REFCNT(m), 1);
+
+ /* mbuf refcnt is not contended, no need to use atomic
+ * (a memory barrier is enough). */
+ SET_MBUF_REFCNT(m, 2);
M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
- m->m_pkthdr.flowid = ring_nr;
+ m->m_pkthdr.flowid = a->ring_nr;
m->m_pkthdr.rcvif = ifp; /* used for tx notification */
ret = NA(ifp)->if_transmit(ifp, m);
- return ret;
+ return ret ? -1 : 0;
}
@@ -263,7 +384,7 @@ netmap_getna(if_t ifp)
* way to extract the info from the ifp
*/
int
-generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
+nm_os_generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
{
D("called, in tx %d rx %d", *tx, *rx);
return 0;
@@ -271,16 +392,23 @@ generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
void
-generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
+nm_os_generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
{
D("called, in txq %d rxq %d", *txq, *rxq);
*txq = netmap_generic_rings;
*rxq = netmap_generic_rings;
}
+void
+nm_os_generic_set_features(struct netmap_generic_adapter *gna)
+{
+
+ gna->rxsg = 1; /* Supported through m_copydata. */
+ gna->txqdisc = 0; /* Not supported. */
+}
void
-netmap_mitigation_init(struct nm_generic_mit *mit, int idx, struct netmap_adapter *na)
+nm_os_mitigation_init(struct nm_generic_mit *mit, int idx, struct netmap_adapter *na)
{
ND("called");
mit->mit_pending = 0;
@@ -290,21 +418,21 @@ netmap_mitigation_init(struct nm_generic_mit *mit, int idx, struct netmap_adapte
void
-netmap_mitigation_start(struct nm_generic_mit *mit)
+nm_os_mitigation_start(struct nm_generic_mit *mit)
{
ND("called");
}
void
-netmap_mitigation_restart(struct nm_generic_mit *mit)
+nm_os_mitigation_restart(struct nm_generic_mit *mit)
{
ND("called");
}
int
-netmap_mitigation_active(struct nm_generic_mit *mit)
+nm_os_mitigation_active(struct nm_generic_mit *mit)
{
ND("called");
return 0;
@@ -312,7 +440,7 @@ netmap_mitigation_active(struct nm_generic_mit *mit)
void
-netmap_mitigation_cleanup(struct nm_generic_mit *mit)
+nm_os_mitigation_cleanup(struct nm_generic_mit *mit)
{
ND("called");
}
@@ -342,7 +470,7 @@ static struct {
} nm_vi_indices;
void
-nm_vi_init_index(void)
+nm_os_vi_init_index(void)
{
int i;
for (i = 0; i < NM_VI_MAX; i++)
@@ -398,7 +526,7 @@ nm_vi_free_index(uint8_t val)
* increment this refcount on if_attach().
*/
int
-nm_vi_persist(const char *name, struct ifnet **ret)
+nm_os_vi_persist(const char *name, struct ifnet **ret)
{
struct ifnet *ifp;
u_short macaddr_hi;
@@ -438,15 +566,220 @@ nm_vi_persist(const char *name, struct ifnet **ret)
*ret = ifp;
return 0;
}
+
/* unregister from the system and drop the final refcount */
void
-nm_vi_detach(struct ifnet *ifp)
+nm_os_vi_detach(struct ifnet *ifp)
{
nm_vi_free_index(((char *)IF_LLADDR(ifp))[5]);
ether_ifdetach(ifp);
if_free(ifp);
}
+/* ======================== PTNETMAP SUPPORT ========================== */
+
+#ifdef WITH_PTNETMAP_GUEST
+#include <sys/bus.h>
+#include <sys/rman.h>
+#include <machine/bus.h> /* bus_dmamap_* */
+#include <machine/resource.h>
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcireg.h>
+/*
+ * ptnetmap memory device (memdev) for freebsd guest,
+ * ssed to expose host netmap memory to the guest through a PCI BAR.
+ */
+
+/*
+ * ptnetmap memdev private data structure
+ */
+struct ptnetmap_memdev {
+ device_t dev;
+ struct resource *pci_io;
+ struct resource *pci_mem;
+ struct netmap_mem_d *nm_mem;
+};
+
+static int ptn_memdev_probe(device_t);
+static int ptn_memdev_attach(device_t);
+static int ptn_memdev_detach(device_t);
+static int ptn_memdev_shutdown(device_t);
+
+static device_method_t ptn_memdev_methods[] = {
+ DEVMETHOD(device_probe, ptn_memdev_probe),
+ DEVMETHOD(device_attach, ptn_memdev_attach),
+ DEVMETHOD(device_detach, ptn_memdev_detach),
+ DEVMETHOD(device_shutdown, ptn_memdev_shutdown),
+ DEVMETHOD_END
+};
+
+static driver_t ptn_memdev_driver = {
+ PTNETMAP_MEMDEV_NAME,
+ ptn_memdev_methods,
+ sizeof(struct ptnetmap_memdev),
+};
+
+/* We use (SI_ORDER_MIDDLE+1) here, see DEV_MODULE_ORDERED() invocation
+ * below. */
+static devclass_t ptnetmap_devclass;
+DRIVER_MODULE_ORDERED(ptn_memdev, pci, ptn_memdev_driver, ptnetmap_devclass,
+ NULL, NULL, SI_ORDER_MIDDLE + 1);
+
+/*
+ * I/O port read/write wrappers.
+ * Some are not used, so we keep them commented out until needed
+ */
+#define ptn_ioread16(ptn_dev, reg) bus_read_2((ptn_dev)->pci_io, (reg))
+#define ptn_ioread32(ptn_dev, reg) bus_read_4((ptn_dev)->pci_io, (reg))
+#if 0
+#define ptn_ioread8(ptn_dev, reg) bus_read_1((ptn_dev)->pci_io, (reg))
+#define ptn_iowrite8(ptn_dev, reg, val) bus_write_1((ptn_dev)->pci_io, (reg), (val))
+#define ptn_iowrite16(ptn_dev, reg, val) bus_write_2((ptn_dev)->pci_io, (reg), (val))
+#define ptn_iowrite32(ptn_dev, reg, val) bus_write_4((ptn_dev)->pci_io, (reg), (val))
+#endif /* unused */
+
+/*
+ * Map host netmap memory through PCI-BAR in the guest OS,
+ * returning physical (nm_paddr) and virtual (nm_addr) addresses
+ * of the netmap memory mapped in the guest.
+ */
+int
+nm_os_pt_memdev_iomap(struct ptnetmap_memdev *ptn_dev, vm_paddr_t *nm_paddr, void **nm_addr)
+{
+ uint32_t mem_size;
+ int rid;
+
+ D("ptn_memdev_driver iomap");
+
+ rid = PCIR_BAR(PTNETMAP_MEM_PCI_BAR);
+ mem_size = ptn_ioread32(ptn_dev, PTNETMAP_IO_PCI_MEMSIZE);
+
+ /* map memory allocator */
+ ptn_dev->pci_mem = bus_alloc_resource(ptn_dev->dev, SYS_RES_MEMORY,
+ &rid, 0, ~0, mem_size, RF_ACTIVE);
+ if (ptn_dev->pci_mem == NULL) {
+ *nm_paddr = 0;
+ *nm_addr = 0;
+ return ENOMEM;
+ }
+
+ *nm_paddr = rman_get_start(ptn_dev->pci_mem);
+ *nm_addr = rman_get_virtual(ptn_dev->pci_mem);
+
+ D("=== BAR %d start %lx len %lx mem_size %x ===",
+ PTNETMAP_MEM_PCI_BAR,
+ *nm_paddr,
+ rman_get_size(ptn_dev->pci_mem),
+ mem_size);
+ return (0);
+}
+
+/* Unmap host netmap memory. */
+void
+nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *ptn_dev)
+{
+ D("ptn_memdev_driver iounmap");
+
+ if (ptn_dev->pci_mem) {
+ bus_release_resource(ptn_dev->dev, SYS_RES_MEMORY,
+ PCIR_BAR(PTNETMAP_MEM_PCI_BAR), ptn_dev->pci_mem);
+ ptn_dev->pci_mem = NULL;
+ }
+}
+
+/* Device identification routine, return BUS_PROBE_DEFAULT on success,
+ * positive on failure */
+static int
+ptn_memdev_probe(device_t dev)
+{
+ char desc[256];
+
+ if (pci_get_vendor(dev) != PTNETMAP_PCI_VENDOR_ID)
+ return (ENXIO);
+ if (pci_get_device(dev) != PTNETMAP_PCI_DEVICE_ID)
+ return (ENXIO);
+
+ snprintf(desc, sizeof(desc), "%s PCI adapter",
+ PTNETMAP_MEMDEV_NAME);
+ device_set_desc_copy(dev, desc);
+
+ return (BUS_PROBE_DEFAULT);
+}
+
+/* Device initialization routine. */
+static int
+ptn_memdev_attach(device_t dev)
+{
+ struct ptnetmap_memdev *ptn_dev;
+ int rid;
+ uint16_t mem_id;
+
+ D("ptn_memdev_driver attach");
+
+ ptn_dev = device_get_softc(dev);
+ ptn_dev->dev = dev;
+
+ pci_enable_busmaster(dev);
+
+ rid = PCIR_BAR(PTNETMAP_IO_PCI_BAR);
+ ptn_dev->pci_io = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid,
+ RF_ACTIVE);
+ if (ptn_dev->pci_io == NULL) {
+ device_printf(dev, "cannot map I/O space\n");
+ return (ENXIO);
+ }
+
+ mem_id = ptn_ioread16(ptn_dev, PTNETMAP_IO_PCI_HOSTID);
+
+ /* create guest allocator */
+ ptn_dev->nm_mem = netmap_mem_pt_guest_attach(ptn_dev, mem_id);
+ if (ptn_dev->nm_mem == NULL) {
+ ptn_memdev_detach(dev);
+ return (ENOMEM);
+ }
+ netmap_mem_get(ptn_dev->nm_mem);
+
+ D("ptn_memdev_driver probe OK - host_id: %d", mem_id);
+
+ return (0);
+}
+
+/* Device removal routine. */
+static int
+ptn_memdev_detach(device_t dev)
+{
+ struct ptnetmap_memdev *ptn_dev;
+
+ D("ptn_memdev_driver detach");
+ ptn_dev = device_get_softc(dev);
+
+ if (ptn_dev->nm_mem) {
+ netmap_mem_put(ptn_dev->nm_mem);
+ ptn_dev->nm_mem = NULL;
+ }
+ if (ptn_dev->pci_mem) {
+ bus_release_resource(dev, SYS_RES_MEMORY,
+ PCIR_BAR(PTNETMAP_MEM_PCI_BAR), ptn_dev->pci_mem);
+ ptn_dev->pci_mem = NULL;
+ }
+ if (ptn_dev->pci_io) {
+ bus_release_resource(dev, SYS_RES_IOPORT,
+ PCIR_BAR(PTNETMAP_IO_PCI_BAR), ptn_dev->pci_io);
+ ptn_dev->pci_io = NULL;
+ }
+
+ return (0);
+}
+
+static int
+ptn_memdev_shutdown(device_t dev)
+{
+ D("ptn_memdev_driver shutdown");
+ return bus_generic_shutdown(dev);
+}
+
+#endif /* WITH_PTNETMAP_GUEST */
+
/*
* In order to track whether pages are still mapped, we hook into
* the standard cdev_pager and intercept the constructor and
@@ -606,7 +939,7 @@ err_unlock:
* the device (/dev/netmap) so we cannot do anything useful.
* To track close() on individual file descriptors we pass netmap_dtor() to
* devfs_set_cdevpriv() on open(). The FreeBSD kernel will call the destructor
- * when the last fd pointing to the device is closed.
+ * when the last fd pointing to the device is closed.
*
* Note that FreeBSD does not even munmap() on close() so we also have
* to track mmap() ourselves, and postpone the call to
@@ -634,26 +967,275 @@ netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
(void)devtype;
(void)td;
- priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
- M_NOWAIT | M_ZERO);
- if (priv == NULL)
- return ENOMEM;
- priv->np_refs = 1;
+ NMG_LOCK();
+ priv = netmap_priv_new();
+ if (priv == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
error = devfs_set_cdevpriv(priv, netmap_dtor);
if (error) {
- free(priv, M_DEVBUF);
- } else {
- NMG_LOCK();
- netmap_use_count++;
- NMG_UNLOCK();
+ netmap_priv_delete(priv);
}
+out:
+ NMG_UNLOCK();
return error;
}
+/******************** kthread wrapper ****************/
+#include <sys/sysproto.h>
+u_int
+nm_os_ncpus(void)
+{
+ return mp_maxid + 1;
+}
+
+struct nm_kthread_ctx {
+ struct thread *user_td; /* thread user-space (kthread creator) to send ioctl */
+ /* notification to guest (interrupt) */
+ int irq_fd; /* ioctl fd */
+ struct nm_kth_ioctl irq_ioctl; /* ioctl arguments */
+
+ /* notification from guest */
+ void *ioevent_file; /* tsleep() argument */
+
+ /* worker function and parameter */
+ nm_kthread_worker_fn_t worker_fn;
+ void *worker_private;
+
+ struct nm_kthread *nmk;
+
+ /* integer to manage multiple worker contexts (e.g., RX or TX on ptnetmap) */
+ long type;
+};
+
+struct nm_kthread {
+ struct thread *worker;
+ struct mtx worker_lock;
+ uint64_t scheduled; /* pending wake_up request */
+ struct nm_kthread_ctx worker_ctx;
+ int run; /* used to stop kthread */
+ int attach_user; /* kthread attached to user_process */
+ int affinity;
+};
+
+void inline
+nm_os_kthread_wakeup_worker(struct nm_kthread *nmk)
+{
+ /*
+ * There may be a race between FE and BE,
+ * which call both this function, and worker kthread,
+ * that reads nmk->scheduled.
+ *
+ * For us it is not important the counter value,
+ * but simply that it has changed since the last
+ * time the kthread saw it.
+ */
+ mtx_lock(&nmk->worker_lock);
+ nmk->scheduled++;
+ if (nmk->worker_ctx.ioevent_file) {
+ wakeup(nmk->worker_ctx.ioevent_file);
+ }
+ mtx_unlock(&nmk->worker_lock);
+}
+
+void inline
+nm_os_kthread_send_irq(struct nm_kthread *nmk)
+{
+ struct nm_kthread_ctx *ctx = &nmk->worker_ctx;
+ int err;
+
+ if (ctx->user_td && ctx->irq_fd > 0) {
+ err = kern_ioctl(ctx->user_td, ctx->irq_fd, ctx->irq_ioctl.com, (caddr_t)&ctx->irq_ioctl.data.msix);
+ if (err) {
+ D("kern_ioctl error: %d ioctl parameters: fd %d com %lu data %p",
+ err, ctx->irq_fd, ctx->irq_ioctl.com, &ctx->irq_ioctl.data);
+ }
+ }
+}
+
+static void
+nm_kthread_worker(void *data)
+{
+ struct nm_kthread *nmk = data;
+ struct nm_kthread_ctx *ctx = &nmk->worker_ctx;
+ uint64_t old_scheduled = nmk->scheduled;
+
+ if (nmk->affinity >= 0) {
+ thread_lock(curthread);
+ sched_bind(curthread, nmk->affinity);
+ thread_unlock(curthread);
+ }
+
+ while (nmk->run) {
+ /*
+ * check if the parent process dies
+ * (when kthread is attached to user process)
+ */
+ if (ctx->user_td) {
+ PROC_LOCK(curproc);
+ thread_suspend_check(0);
+ PROC_UNLOCK(curproc);
+ } else {
+ kthread_suspend_check();
+ }
+
+ /*
+ * if ioevent_file is not defined, we don't have notification
+ * mechanism and we continually execute worker_fn()
+ */
+ if (!ctx->ioevent_file) {
+ ctx->worker_fn(ctx->worker_private); /* worker body */
+ } else {
+ /* checks if there is a pending notification */
+ mtx_lock(&nmk->worker_lock);
+ if (likely(nmk->scheduled != old_scheduled)) {
+ old_scheduled = nmk->scheduled;
+ mtx_unlock(&nmk->worker_lock);
+
+ ctx->worker_fn(ctx->worker_private); /* worker body */
+
+ continue;
+ } else if (nmk->run) {
+ /* wait on event with one second timeout */
+ msleep_spin(ctx->ioevent_file, &nmk->worker_lock,
+ "nmk_ev", hz);
+ nmk->scheduled++;
+ }
+ mtx_unlock(&nmk->worker_lock);
+ }
+ }
+
+ kthread_exit();
+}
+
+static int
+nm_kthread_open_files(struct nm_kthread *nmk, struct nm_kthread_cfg *cfg)
+{
+ /* send irq through ioctl to bhyve (vmm.ko) */
+ if (cfg->event.irqfd) {
+ nmk->worker_ctx.irq_fd = cfg->event.irqfd;
+ nmk->worker_ctx.irq_ioctl = cfg->event.ioctl;
+ }
+ /* ring.ioeventfd contains the chan where do tsleep to wait events */
+ if (cfg->event.ioeventfd) {
+ nmk->worker_ctx.ioevent_file = (void *)cfg->event.ioeventfd;
+ }
+
+ return 0;
+}
+
+static void
+nm_kthread_close_files(struct nm_kthread *nmk)
+{
+ nmk->worker_ctx.irq_fd = 0;
+ nmk->worker_ctx.ioevent_file = NULL;
+}
+
+void
+nm_os_kthread_set_affinity(struct nm_kthread *nmk, int affinity)
+{
+ nmk->affinity = affinity;
+}
+
+struct nm_kthread *
+nm_os_kthread_create(struct nm_kthread_cfg *cfg)
+{
+ struct nm_kthread *nmk = NULL;
+ int error;
+
+ nmk = malloc(sizeof(*nmk), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (!nmk)
+ return NULL;
+
+ mtx_init(&nmk->worker_lock, "nm_kthread lock", NULL, MTX_SPIN);
+ nmk->worker_ctx.worker_fn = cfg->worker_fn;
+ nmk->worker_ctx.worker_private = cfg->worker_private;
+ nmk->worker_ctx.type = cfg->type;
+ nmk->affinity = -1;
+
+ /* attach kthread to user process (ptnetmap) */
+ nmk->attach_user = cfg->attach_user;
+
+ /* open event fd */
+ error = nm_kthread_open_files(nmk, cfg);
+ if (error)
+ goto err;
+
+ return nmk;
+err:
+ free(nmk, M_DEVBUF);
+ return NULL;
+}
+
+int
+nm_os_kthread_start(struct nm_kthread *nmk)
+{
+ struct proc *p = NULL;
+ int error = 0;
+
+ if (nmk->worker) {
+ return EBUSY;
+ }
+
+ /* check if we want to attach kthread to user process */
+ if (nmk->attach_user) {
+ nmk->worker_ctx.user_td = curthread;
+ p = curthread->td_proc;
+ }
+
+ /* enable kthread main loop */
+ nmk->run = 1;
+ /* create kthread */
+ if((error = kthread_add(nm_kthread_worker, nmk, p,
+ &nmk->worker, RFNOWAIT /* to be checked */, 0, "nm-kthread-%ld",
+ nmk->worker_ctx.type))) {
+ goto err;
+ }
+
+ D("nm_kthread started td 0x%p", nmk->worker);
+
+ return 0;
+err:
+ D("nm_kthread start failed err %d", error);
+ nmk->worker = NULL;
+ return error;
+}
+
+void
+nm_os_kthread_stop(struct nm_kthread *nmk)
+{
+ if (!nmk->worker) {
+ return;
+ }
+ /* tell to kthread to exit from main loop */
+ nmk->run = 0;
+
+ /* wake up kthread if it sleeps */
+ kthread_resume(nmk->worker);
+ nm_os_kthread_wakeup_worker(nmk);
+
+ nmk->worker = NULL;
+}
+
+void
+nm_os_kthread_delete(struct nm_kthread *nmk)
+{
+ if (!nmk)
+ return;
+ if (nmk->worker) {
+ nm_os_kthread_stop(nmk);
+ }
+
+ nm_kthread_close_files(nmk);
+
+ free(nmk, M_DEVBUF);
+}
+
/******************** kqueue support ****************/
/*
- * The OS_selwakeup also needs to issue a KNOTE_UNLOCKED.
+ * nm_os_selwakeup also needs to issue a KNOTE_UNLOCKED.
* We use a non-zero argument to distinguish the call from the one
* in kevent_scan() which instead also needs to run netmap_poll().
* The knote uses a global mutex for the time being. We might
@@ -672,17 +1254,23 @@ netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
void
-freebsd_selwakeup(struct nm_selinfo *si, int pri)
+nm_os_selwakeup(struct nm_selinfo *si)
{
if (netmap_verbose)
D("on knote %p", &si->si.si_note);
- selwakeuppri(&si->si, pri);
+ selwakeuppri(&si->si, PI_NET);
/* use a non-zero hint to tell the notification from the
* call done in kqueue_scan() which uses 0
*/
KNOTE_UNLOCKED(&si->si.si_note, 0x100 /* notification */);
}
+void
+nm_os_selrecord(struct thread *td, struct nm_selinfo *si)
+{
+ selrecord(td, &si->si);
+}
+
static void
netmap_knrdetach(struct knote *kn)
{
@@ -728,7 +1316,7 @@ netmap_knrw(struct knote *kn, long hint, int events)
RD(5, "curthread changed %p %p", curthread, priv->np_td);
return 1;
} else {
- revents = netmap_poll((void *)priv, events, curthread);
+ revents = netmap_poll(priv, events, NULL);
return (events & revents) ? 1 : 0;
}
}
@@ -801,13 +1389,47 @@ netmap_kqfilter(struct cdev *dev, struct knote *kn)
return 0;
}
+static int
+freebsd_netmap_poll(struct cdev *cdevi __unused, int events, struct thread *td)
+{
+ struct netmap_priv_d *priv;
+ if (devfs_get_cdevpriv((void **)&priv)) {
+ return POLLERR;
+ }
+ return netmap_poll(priv, events, td);
+}
+
+static int
+freebsd_netmap_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data,
+ int ffla __unused, struct thread *td)
+{
+ int error;
+ struct netmap_priv_d *priv;
+
+ CURVNET_SET(TD_TO_VNET(rd));
+ error = devfs_get_cdevpriv((void **)&priv);
+ if (error) {
+ /* XXX ENOENT should be impossible, since the priv
+ * is now created in the open */
+ if (error == ENOENT)
+ error = ENXIO;
+ goto out;
+ }
+ error = netmap_ioctl(priv, cmd, data, td);
+out:
+ CURVNET_RESTORE();
+
+ return error;
+}
+
+extern struct cdevsw netmap_cdevsw; /* XXX used in netmap.c, should go elsewhere */
struct cdevsw netmap_cdevsw = {
.d_version = D_VERSION,
.d_name = "netmap",
.d_open = netmap_open,
.d_mmap_single = netmap_mmap_single,
- .d_ioctl = netmap_ioctl,
- .d_poll = netmap_poll,
+ .d_ioctl = freebsd_netmap_ioctl,
+ .d_poll = freebsd_netmap_poll,
.d_kqfilter = netmap_kqfilter,
.d_close = netmap_close,
};
@@ -852,6 +1474,24 @@ netmap_loader(__unused struct module *module, int event, __unused void *arg)
return (error);
}
-
+#ifdef DEV_MODULE_ORDERED
+/*
+ * The netmap module contains three drivers: (i) the netmap character device
+ * driver; (ii) the ptnetmap memdev PCI device driver, (iii) the ptnet PCI
+ * device driver. The attach() routines of both (ii) and (iii) need the
+ * lock of the global allocator, and such lock is initialized in netmap_init(),
+ * which is part of (i).
+ * Therefore, we make sure that (i) is loaded before (ii) and (iii), using
+ * the 'order' parameter of driver declaration macros. For (i), we specify
+ * SI_ORDER_MIDDLE, while higher orders are used with the DRIVER_MODULE_ORDERED
+ * macros for (ii) and (iii).
+ */
+DEV_MODULE_ORDERED(netmap, netmap_loader, NULL, SI_ORDER_MIDDLE);
+#else /* !DEV_MODULE_ORDERED */
DEV_MODULE(netmap, netmap_loader, NULL);
+#endif /* DEV_MODULE_ORDERED */
+MODULE_DEPEND(netmap, pci, 1, 1, 1);
MODULE_VERSION(netmap, 1);
+/* reduce conditional code */
+// linux API, use for the knlist in FreeBSD
+/* use a private mutex for the knlist */