diff options
| author | Luigi Rizzo <luigi@FreeBSD.org> | 2016-10-16 14:13:32 +0000 |
|---|---|---|
| committer | Luigi Rizzo <luigi@FreeBSD.org> | 2016-10-16 14:13:32 +0000 |
| commit | 37e3a6d349581b4dd0aebf24be7b1b159a698dcf (patch) | |
| tree | 0e61deea141c9733af511b0485cf1fd0f2dd17ed /sys/dev/netmap/netmap_freebsd.c | |
| parent | 63f6b1a75a8e6e33e4f9d65571c6a221444d3b05 (diff) | |
Notes
Diffstat (limited to 'sys/dev/netmap/netmap_freebsd.c')
| -rw-r--r-- | sys/dev/netmap/netmap_freebsd.c | 762 |
1 files changed, 701 insertions, 61 deletions
diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c index 8490ae85670b..20ea5c8f2972 100644 --- a/sys/dev/netmap/netmap_freebsd.c +++ b/sys/dev/netmap/netmap_freebsd.c @@ -33,8 +33,9 @@ #include <sys/param.h> /* defines used in kernel.h */ #include <sys/poll.h> /* POLLIN, POLLOUT */ #include <sys/kernel.h> /* types used in module initialization */ -#include <sys/conf.h> /* DEV_MODULE */ +#include <sys/conf.h> /* DEV_MODULE_ORDERED */ #include <sys/endian.h> +#include <sys/syscallsubr.h> /* kern_ioctl() */ #include <sys/rwlock.h> @@ -50,6 +51,11 @@ #include <sys/malloc.h> #include <sys/socket.h> /* sockaddrs */ #include <sys/selinfo.h> +#include <sys/kthread.h> /* kthread_add() */ +#include <sys/proc.h> /* PROC_LOCK() */ +#include <sys/unistd.h> /* RFNOWAIT */ +#include <sys/sched.h> /* sched_bind() */ +#include <sys/smp.h> /* mp_maxid */ #include <net/if.h> #include <net/if_var.h> #include <net/if_types.h> /* IFT_ETHER */ @@ -61,13 +67,94 @@ #include <net/netmap.h> #include <dev/netmap/netmap_kern.h> +#include <net/netmap_virt.h> #include <dev/netmap/netmap_mem2.h> /* ======================== FREEBSD-SPECIFIC ROUTINES ================== */ +void nm_os_selinfo_init(NM_SELINFO_T *si) { + struct mtx *m = &si->m; + mtx_init(m, "nm_kn_lock", NULL, MTX_DEF); + knlist_init_mtx(&si->si.si_note, m); +} + +void +nm_os_selinfo_uninit(NM_SELINFO_T *si) +{ + /* XXX kqueue(9) needed; these will mirror knlist_init. */ + knlist_delete(&si->si.si_note, curthread, 0 /* not locked */ ); + knlist_destroy(&si->si.si_note); + /* now we don't need the mutex anymore */ + mtx_destroy(&si->m); +} + +void +nm_os_ifnet_lock(void) +{ + IFNET_WLOCK(); +} + +void +nm_os_ifnet_unlock(void) +{ + IFNET_WUNLOCK(); +} + +static int netmap_use_count = 0; + +void +nm_os_get_module(void) +{ + netmap_use_count++; +} + +void +nm_os_put_module(void) +{ + netmap_use_count--; +} + +static void +netmap_ifnet_arrival_handler(void *arg __unused, struct ifnet *ifp) +{ + netmap_undo_zombie(ifp); +} + +static void +netmap_ifnet_departure_handler(void *arg __unused, struct ifnet *ifp) +{ + netmap_make_zombie(ifp); +} + +static eventhandler_tag nm_ifnet_ah_tag; +static eventhandler_tag nm_ifnet_dh_tag; + +int +nm_os_ifnet_init(void) +{ + nm_ifnet_ah_tag = + EVENTHANDLER_REGISTER(ifnet_arrival_event, + netmap_ifnet_arrival_handler, + NULL, EVENTHANDLER_PRI_ANY); + nm_ifnet_dh_tag = + EVENTHANDLER_REGISTER(ifnet_departure_event, + netmap_ifnet_departure_handler, + NULL, EVENTHANDLER_PRI_ANY); + return 0; +} + +void +nm_os_ifnet_fini(void) +{ + EVENTHANDLER_DEREGISTER(ifnet_arrival_event, + nm_ifnet_ah_tag); + EVENTHANDLER_DEREGISTER(ifnet_departure_event, + nm_ifnet_dh_tag); +} + rawsum_t -nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum) +nm_os_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum) { /* TODO XXX please use the FreeBSD implementation for this. */ uint16_t *words = (uint16_t *)data; @@ -87,7 +174,7 @@ nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum) * return value is in network byte order. */ uint16_t -nm_csum_fold(rawsum_t cur_sum) +nm_os_csum_fold(rawsum_t cur_sum) { /* TODO XXX please use the FreeBSD implementation for this. */ while (cur_sum >> 16) @@ -96,17 +183,17 @@ nm_csum_fold(rawsum_t cur_sum) return htobe16((~cur_sum) & 0xFFFF); } -uint16_t nm_csum_ipv4(struct nm_iphdr *iph) +uint16_t nm_os_csum_ipv4(struct nm_iphdr *iph) { #if 0 return in_cksum_hdr((void *)iph); #else - return nm_csum_fold(nm_csum_raw((uint8_t*)iph, sizeof(struct nm_iphdr), 0)); + return nm_os_csum_fold(nm_os_csum_raw((uint8_t*)iph, sizeof(struct nm_iphdr), 0)); #endif } void -nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data, +nm_os_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data, size_t datalen, uint16_t *check) { #ifdef INET @@ -118,7 +205,7 @@ nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data, /* Compute the checksum on TCP/UDP header + payload * (includes the pseudo-header). */ - *check = nm_csum_fold(nm_csum_raw(data, datalen, 0)); + *check = nm_os_csum_fold(nm_os_csum_raw(data, datalen, 0)); #else static int notsupported = 0; if (!notsupported) { @@ -129,12 +216,12 @@ nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data, } void -nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data, +nm_os_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data, size_t datalen, uint16_t *check) { #ifdef INET6 *check = in6_cksum_pseudo((void*)ip6h, datalen, ip6h->nexthdr, 0); - *check = nm_csum_fold(nm_csum_raw(data, datalen, 0)); + *check = nm_os_csum_fold(nm_os_csum_raw(data, datalen, 0)); #else static int notsupported = 0; if (!notsupported) { @@ -144,13 +231,41 @@ nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data, #endif } +/* on FreeBSD we send up one packet at a time */ +void * +nm_os_send_up(struct ifnet *ifp, struct mbuf *m, struct mbuf *prev) +{ + + NA(ifp)->if_input(ifp, m); + return NULL; +} + +int +nm_os_mbuf_has_offld(struct mbuf *m) +{ + return m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_SCTP | + CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | + CSUM_SCTP_IPV6 | CSUM_TSO); +} + +static void +freebsd_generic_rx_handler(struct ifnet *ifp, struct mbuf *m) +{ + struct netmap_generic_adapter *gna = + (struct netmap_generic_adapter *)NA(ifp); + int stolen = generic_rx_handler(ifp, m); + + if (!stolen) { + gna->save_if_input(ifp, m); + } +} /* * Intercept the rx routine in the standard device driver. * Second argument is non-zero to intercept, 0 to restore */ int -netmap_catch_rx(struct netmap_generic_adapter *gna, int intercept) +nm_os_catch_rx(struct netmap_generic_adapter *gna, int intercept) { struct netmap_adapter *na = &gna->up.up; struct ifnet *ifp = na->ifp; @@ -161,7 +276,7 @@ netmap_catch_rx(struct netmap_generic_adapter *gna, int intercept) return EINVAL; /* already set */ } gna->save_if_input = ifp->if_input; - ifp->if_input = generic_rx_handler; + ifp->if_input = freebsd_generic_rx_handler; } else { if (!gna->save_if_input){ D("cannot restore"); @@ -181,18 +296,20 @@ netmap_catch_rx(struct netmap_generic_adapter *gna, int intercept) * Second argument is non-zero to intercept, 0 to restore. * On freebsd we just intercept if_transmit. */ -void -netmap_catch_tx(struct netmap_generic_adapter *gna, int enable) +int +nm_os_catch_tx(struct netmap_generic_adapter *gna, int intercept) { struct netmap_adapter *na = &gna->up.up; struct ifnet *ifp = netmap_generic_getifp(gna); - if (enable) { + if (intercept) { na->if_transmit = ifp->if_transmit; ifp->if_transmit = netmap_transmit; } else { ifp->if_transmit = na->if_transmit; } + + return 0; } @@ -213,40 +330,44 @@ netmap_catch_tx(struct netmap_generic_adapter *gna, int enable) * */ int -generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, - void *addr, u_int len, u_int ring_nr) +nm_os_generic_xmit_frame(struct nm_os_gen_arg *a) { int ret; + u_int len = a->len; + struct ifnet *ifp = a->ifp; + struct mbuf *m = a->m; +#if __FreeBSD_version < 1100000 /* - * The mbuf should be a cluster from our special pool, - * so we do not need to do an m_copyback but just copy - * (and eventually, just reference the netmap buffer) + * Old FreeBSD versions. The mbuf has a cluster attached, + * we need to copy from the cluster to the netmap buffer. */ - - if (GET_MBUF_REFCNT(m) != 1) { - D("invalid refcnt %d for %p", - GET_MBUF_REFCNT(m), m); + if (MBUF_REFCNT(m) != 1) { + D("invalid refcnt %d for %p", MBUF_REFCNT(m), m); panic("in generic_xmit_frame"); } - // XXX the ext_size check is unnecessary if we link the netmap buf if (m->m_ext.ext_size < len) { RD(5, "size %d < len %d", m->m_ext.ext_size, len); len = m->m_ext.ext_size; } - if (0) { /* XXX seems to have negligible benefits */ - m->m_ext.ext_buf = m->m_data = addr; - } else { - bcopy(addr, m->m_data, len); - } + bcopy(a->addr, m->m_data, len); +#else /* __FreeBSD_version >= 1100000 */ + /* New FreeBSD versions. Link the external storage to + * the netmap buffer, so that no copy is necessary. */ + m->m_ext.ext_buf = m->m_data = a->addr; + m->m_ext.ext_size = len; +#endif /* __FreeBSD_version >= 1100000 */ + m->m_len = m->m_pkthdr.len = len; - // inc refcount. All ours, we could skip the atomic - atomic_fetchadd_int(PNT_MBUF_REFCNT(m), 1); + + /* mbuf refcnt is not contended, no need to use atomic + * (a memory barrier is enough). */ + SET_MBUF_REFCNT(m, 2); M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); - m->m_pkthdr.flowid = ring_nr; + m->m_pkthdr.flowid = a->ring_nr; m->m_pkthdr.rcvif = ifp; /* used for tx notification */ ret = NA(ifp)->if_transmit(ifp, m); - return ret; + return ret ? -1 : 0; } @@ -263,7 +384,7 @@ netmap_getna(if_t ifp) * way to extract the info from the ifp */ int -generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx) +nm_os_generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx) { D("called, in tx %d rx %d", *tx, *rx); return 0; @@ -271,16 +392,23 @@ generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx) void -generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq) +nm_os_generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq) { D("called, in txq %d rxq %d", *txq, *rxq); *txq = netmap_generic_rings; *rxq = netmap_generic_rings; } +void +nm_os_generic_set_features(struct netmap_generic_adapter *gna) +{ + + gna->rxsg = 1; /* Supported through m_copydata. */ + gna->txqdisc = 0; /* Not supported. */ +} void -netmap_mitigation_init(struct nm_generic_mit *mit, int idx, struct netmap_adapter *na) +nm_os_mitigation_init(struct nm_generic_mit *mit, int idx, struct netmap_adapter *na) { ND("called"); mit->mit_pending = 0; @@ -290,21 +418,21 @@ netmap_mitigation_init(struct nm_generic_mit *mit, int idx, struct netmap_adapte void -netmap_mitigation_start(struct nm_generic_mit *mit) +nm_os_mitigation_start(struct nm_generic_mit *mit) { ND("called"); } void -netmap_mitigation_restart(struct nm_generic_mit *mit) +nm_os_mitigation_restart(struct nm_generic_mit *mit) { ND("called"); } int -netmap_mitigation_active(struct nm_generic_mit *mit) +nm_os_mitigation_active(struct nm_generic_mit *mit) { ND("called"); return 0; @@ -312,7 +440,7 @@ netmap_mitigation_active(struct nm_generic_mit *mit) void -netmap_mitigation_cleanup(struct nm_generic_mit *mit) +nm_os_mitigation_cleanup(struct nm_generic_mit *mit) { ND("called"); } @@ -342,7 +470,7 @@ static struct { } nm_vi_indices; void -nm_vi_init_index(void) +nm_os_vi_init_index(void) { int i; for (i = 0; i < NM_VI_MAX; i++) @@ -398,7 +526,7 @@ nm_vi_free_index(uint8_t val) * increment this refcount on if_attach(). */ int -nm_vi_persist(const char *name, struct ifnet **ret) +nm_os_vi_persist(const char *name, struct ifnet **ret) { struct ifnet *ifp; u_short macaddr_hi; @@ -438,15 +566,220 @@ nm_vi_persist(const char *name, struct ifnet **ret) *ret = ifp; return 0; } + /* unregister from the system and drop the final refcount */ void -nm_vi_detach(struct ifnet *ifp) +nm_os_vi_detach(struct ifnet *ifp) { nm_vi_free_index(((char *)IF_LLADDR(ifp))[5]); ether_ifdetach(ifp); if_free(ifp); } +/* ======================== PTNETMAP SUPPORT ========================== */ + +#ifdef WITH_PTNETMAP_GUEST +#include <sys/bus.h> +#include <sys/rman.h> +#include <machine/bus.h> /* bus_dmamap_* */ +#include <machine/resource.h> +#include <dev/pci/pcivar.h> +#include <dev/pci/pcireg.h> +/* + * ptnetmap memory device (memdev) for freebsd guest, + * ssed to expose host netmap memory to the guest through a PCI BAR. + */ + +/* + * ptnetmap memdev private data structure + */ +struct ptnetmap_memdev { + device_t dev; + struct resource *pci_io; + struct resource *pci_mem; + struct netmap_mem_d *nm_mem; +}; + +static int ptn_memdev_probe(device_t); +static int ptn_memdev_attach(device_t); +static int ptn_memdev_detach(device_t); +static int ptn_memdev_shutdown(device_t); + +static device_method_t ptn_memdev_methods[] = { + DEVMETHOD(device_probe, ptn_memdev_probe), + DEVMETHOD(device_attach, ptn_memdev_attach), + DEVMETHOD(device_detach, ptn_memdev_detach), + DEVMETHOD(device_shutdown, ptn_memdev_shutdown), + DEVMETHOD_END +}; + +static driver_t ptn_memdev_driver = { + PTNETMAP_MEMDEV_NAME, + ptn_memdev_methods, + sizeof(struct ptnetmap_memdev), +}; + +/* We use (SI_ORDER_MIDDLE+1) here, see DEV_MODULE_ORDERED() invocation + * below. */ +static devclass_t ptnetmap_devclass; +DRIVER_MODULE_ORDERED(ptn_memdev, pci, ptn_memdev_driver, ptnetmap_devclass, + NULL, NULL, SI_ORDER_MIDDLE + 1); + +/* + * I/O port read/write wrappers. + * Some are not used, so we keep them commented out until needed + */ +#define ptn_ioread16(ptn_dev, reg) bus_read_2((ptn_dev)->pci_io, (reg)) +#define ptn_ioread32(ptn_dev, reg) bus_read_4((ptn_dev)->pci_io, (reg)) +#if 0 +#define ptn_ioread8(ptn_dev, reg) bus_read_1((ptn_dev)->pci_io, (reg)) +#define ptn_iowrite8(ptn_dev, reg, val) bus_write_1((ptn_dev)->pci_io, (reg), (val)) +#define ptn_iowrite16(ptn_dev, reg, val) bus_write_2((ptn_dev)->pci_io, (reg), (val)) +#define ptn_iowrite32(ptn_dev, reg, val) bus_write_4((ptn_dev)->pci_io, (reg), (val)) +#endif /* unused */ + +/* + * Map host netmap memory through PCI-BAR in the guest OS, + * returning physical (nm_paddr) and virtual (nm_addr) addresses + * of the netmap memory mapped in the guest. + */ +int +nm_os_pt_memdev_iomap(struct ptnetmap_memdev *ptn_dev, vm_paddr_t *nm_paddr, void **nm_addr) +{ + uint32_t mem_size; + int rid; + + D("ptn_memdev_driver iomap"); + + rid = PCIR_BAR(PTNETMAP_MEM_PCI_BAR); + mem_size = ptn_ioread32(ptn_dev, PTNETMAP_IO_PCI_MEMSIZE); + + /* map memory allocator */ + ptn_dev->pci_mem = bus_alloc_resource(ptn_dev->dev, SYS_RES_MEMORY, + &rid, 0, ~0, mem_size, RF_ACTIVE); + if (ptn_dev->pci_mem == NULL) { + *nm_paddr = 0; + *nm_addr = 0; + return ENOMEM; + } + + *nm_paddr = rman_get_start(ptn_dev->pci_mem); + *nm_addr = rman_get_virtual(ptn_dev->pci_mem); + + D("=== BAR %d start %lx len %lx mem_size %x ===", + PTNETMAP_MEM_PCI_BAR, + *nm_paddr, + rman_get_size(ptn_dev->pci_mem), + mem_size); + return (0); +} + +/* Unmap host netmap memory. */ +void +nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *ptn_dev) +{ + D("ptn_memdev_driver iounmap"); + + if (ptn_dev->pci_mem) { + bus_release_resource(ptn_dev->dev, SYS_RES_MEMORY, + PCIR_BAR(PTNETMAP_MEM_PCI_BAR), ptn_dev->pci_mem); + ptn_dev->pci_mem = NULL; + } +} + +/* Device identification routine, return BUS_PROBE_DEFAULT on success, + * positive on failure */ +static int +ptn_memdev_probe(device_t dev) +{ + char desc[256]; + + if (pci_get_vendor(dev) != PTNETMAP_PCI_VENDOR_ID) + return (ENXIO); + if (pci_get_device(dev) != PTNETMAP_PCI_DEVICE_ID) + return (ENXIO); + + snprintf(desc, sizeof(desc), "%s PCI adapter", + PTNETMAP_MEMDEV_NAME); + device_set_desc_copy(dev, desc); + + return (BUS_PROBE_DEFAULT); +} + +/* Device initialization routine. */ +static int +ptn_memdev_attach(device_t dev) +{ + struct ptnetmap_memdev *ptn_dev; + int rid; + uint16_t mem_id; + + D("ptn_memdev_driver attach"); + + ptn_dev = device_get_softc(dev); + ptn_dev->dev = dev; + + pci_enable_busmaster(dev); + + rid = PCIR_BAR(PTNETMAP_IO_PCI_BAR); + ptn_dev->pci_io = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid, + RF_ACTIVE); + if (ptn_dev->pci_io == NULL) { + device_printf(dev, "cannot map I/O space\n"); + return (ENXIO); + } + + mem_id = ptn_ioread16(ptn_dev, PTNETMAP_IO_PCI_HOSTID); + + /* create guest allocator */ + ptn_dev->nm_mem = netmap_mem_pt_guest_attach(ptn_dev, mem_id); + if (ptn_dev->nm_mem == NULL) { + ptn_memdev_detach(dev); + return (ENOMEM); + } + netmap_mem_get(ptn_dev->nm_mem); + + D("ptn_memdev_driver probe OK - host_id: %d", mem_id); + + return (0); +} + +/* Device removal routine. */ +static int +ptn_memdev_detach(device_t dev) +{ + struct ptnetmap_memdev *ptn_dev; + + D("ptn_memdev_driver detach"); + ptn_dev = device_get_softc(dev); + + if (ptn_dev->nm_mem) { + netmap_mem_put(ptn_dev->nm_mem); + ptn_dev->nm_mem = NULL; + } + if (ptn_dev->pci_mem) { + bus_release_resource(dev, SYS_RES_MEMORY, + PCIR_BAR(PTNETMAP_MEM_PCI_BAR), ptn_dev->pci_mem); + ptn_dev->pci_mem = NULL; + } + if (ptn_dev->pci_io) { + bus_release_resource(dev, SYS_RES_IOPORT, + PCIR_BAR(PTNETMAP_IO_PCI_BAR), ptn_dev->pci_io); + ptn_dev->pci_io = NULL; + } + + return (0); +} + +static int +ptn_memdev_shutdown(device_t dev) +{ + D("ptn_memdev_driver shutdown"); + return bus_generic_shutdown(dev); +} + +#endif /* WITH_PTNETMAP_GUEST */ + /* * In order to track whether pages are still mapped, we hook into * the standard cdev_pager and intercept the constructor and @@ -606,7 +939,7 @@ err_unlock: * the device (/dev/netmap) so we cannot do anything useful. * To track close() on individual file descriptors we pass netmap_dtor() to * devfs_set_cdevpriv() on open(). The FreeBSD kernel will call the destructor - * when the last fd pointing to the device is closed. + * when the last fd pointing to the device is closed. * * Note that FreeBSD does not even munmap() on close() so we also have * to track mmap() ourselves, and postpone the call to @@ -634,26 +967,275 @@ netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td) (void)devtype; (void)td; - priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF, - M_NOWAIT | M_ZERO); - if (priv == NULL) - return ENOMEM; - priv->np_refs = 1; + NMG_LOCK(); + priv = netmap_priv_new(); + if (priv == NULL) { + error = ENOMEM; + goto out; + } error = devfs_set_cdevpriv(priv, netmap_dtor); if (error) { - free(priv, M_DEVBUF); - } else { - NMG_LOCK(); - netmap_use_count++; - NMG_UNLOCK(); + netmap_priv_delete(priv); } +out: + NMG_UNLOCK(); return error; } +/******************** kthread wrapper ****************/ +#include <sys/sysproto.h> +u_int +nm_os_ncpus(void) +{ + return mp_maxid + 1; +} + +struct nm_kthread_ctx { + struct thread *user_td; /* thread user-space (kthread creator) to send ioctl */ + /* notification to guest (interrupt) */ + int irq_fd; /* ioctl fd */ + struct nm_kth_ioctl irq_ioctl; /* ioctl arguments */ + + /* notification from guest */ + void *ioevent_file; /* tsleep() argument */ + + /* worker function and parameter */ + nm_kthread_worker_fn_t worker_fn; + void *worker_private; + + struct nm_kthread *nmk; + + /* integer to manage multiple worker contexts (e.g., RX or TX on ptnetmap) */ + long type; +}; + +struct nm_kthread { + struct thread *worker; + struct mtx worker_lock; + uint64_t scheduled; /* pending wake_up request */ + struct nm_kthread_ctx worker_ctx; + int run; /* used to stop kthread */ + int attach_user; /* kthread attached to user_process */ + int affinity; +}; + +void inline +nm_os_kthread_wakeup_worker(struct nm_kthread *nmk) +{ + /* + * There may be a race between FE and BE, + * which call both this function, and worker kthread, + * that reads nmk->scheduled. + * + * For us it is not important the counter value, + * but simply that it has changed since the last + * time the kthread saw it. + */ + mtx_lock(&nmk->worker_lock); + nmk->scheduled++; + if (nmk->worker_ctx.ioevent_file) { + wakeup(nmk->worker_ctx.ioevent_file); + } + mtx_unlock(&nmk->worker_lock); +} + +void inline +nm_os_kthread_send_irq(struct nm_kthread *nmk) +{ + struct nm_kthread_ctx *ctx = &nmk->worker_ctx; + int err; + + if (ctx->user_td && ctx->irq_fd > 0) { + err = kern_ioctl(ctx->user_td, ctx->irq_fd, ctx->irq_ioctl.com, (caddr_t)&ctx->irq_ioctl.data.msix); + if (err) { + D("kern_ioctl error: %d ioctl parameters: fd %d com %lu data %p", + err, ctx->irq_fd, ctx->irq_ioctl.com, &ctx->irq_ioctl.data); + } + } +} + +static void +nm_kthread_worker(void *data) +{ + struct nm_kthread *nmk = data; + struct nm_kthread_ctx *ctx = &nmk->worker_ctx; + uint64_t old_scheduled = nmk->scheduled; + + if (nmk->affinity >= 0) { + thread_lock(curthread); + sched_bind(curthread, nmk->affinity); + thread_unlock(curthread); + } + + while (nmk->run) { + /* + * check if the parent process dies + * (when kthread is attached to user process) + */ + if (ctx->user_td) { + PROC_LOCK(curproc); + thread_suspend_check(0); + PROC_UNLOCK(curproc); + } else { + kthread_suspend_check(); + } + + /* + * if ioevent_file is not defined, we don't have notification + * mechanism and we continually execute worker_fn() + */ + if (!ctx->ioevent_file) { + ctx->worker_fn(ctx->worker_private); /* worker body */ + } else { + /* checks if there is a pending notification */ + mtx_lock(&nmk->worker_lock); + if (likely(nmk->scheduled != old_scheduled)) { + old_scheduled = nmk->scheduled; + mtx_unlock(&nmk->worker_lock); + + ctx->worker_fn(ctx->worker_private); /* worker body */ + + continue; + } else if (nmk->run) { + /* wait on event with one second timeout */ + msleep_spin(ctx->ioevent_file, &nmk->worker_lock, + "nmk_ev", hz); + nmk->scheduled++; + } + mtx_unlock(&nmk->worker_lock); + } + } + + kthread_exit(); +} + +static int +nm_kthread_open_files(struct nm_kthread *nmk, struct nm_kthread_cfg *cfg) +{ + /* send irq through ioctl to bhyve (vmm.ko) */ + if (cfg->event.irqfd) { + nmk->worker_ctx.irq_fd = cfg->event.irqfd; + nmk->worker_ctx.irq_ioctl = cfg->event.ioctl; + } + /* ring.ioeventfd contains the chan where do tsleep to wait events */ + if (cfg->event.ioeventfd) { + nmk->worker_ctx.ioevent_file = (void *)cfg->event.ioeventfd; + } + + return 0; +} + +static void +nm_kthread_close_files(struct nm_kthread *nmk) +{ + nmk->worker_ctx.irq_fd = 0; + nmk->worker_ctx.ioevent_file = NULL; +} + +void +nm_os_kthread_set_affinity(struct nm_kthread *nmk, int affinity) +{ + nmk->affinity = affinity; +} + +struct nm_kthread * +nm_os_kthread_create(struct nm_kthread_cfg *cfg) +{ + struct nm_kthread *nmk = NULL; + int error; + + nmk = malloc(sizeof(*nmk), M_DEVBUF, M_NOWAIT | M_ZERO); + if (!nmk) + return NULL; + + mtx_init(&nmk->worker_lock, "nm_kthread lock", NULL, MTX_SPIN); + nmk->worker_ctx.worker_fn = cfg->worker_fn; + nmk->worker_ctx.worker_private = cfg->worker_private; + nmk->worker_ctx.type = cfg->type; + nmk->affinity = -1; + + /* attach kthread to user process (ptnetmap) */ + nmk->attach_user = cfg->attach_user; + + /* open event fd */ + error = nm_kthread_open_files(nmk, cfg); + if (error) + goto err; + + return nmk; +err: + free(nmk, M_DEVBUF); + return NULL; +} + +int +nm_os_kthread_start(struct nm_kthread *nmk) +{ + struct proc *p = NULL; + int error = 0; + + if (nmk->worker) { + return EBUSY; + } + + /* check if we want to attach kthread to user process */ + if (nmk->attach_user) { + nmk->worker_ctx.user_td = curthread; + p = curthread->td_proc; + } + + /* enable kthread main loop */ + nmk->run = 1; + /* create kthread */ + if((error = kthread_add(nm_kthread_worker, nmk, p, + &nmk->worker, RFNOWAIT /* to be checked */, 0, "nm-kthread-%ld", + nmk->worker_ctx.type))) { + goto err; + } + + D("nm_kthread started td 0x%p", nmk->worker); + + return 0; +err: + D("nm_kthread start failed err %d", error); + nmk->worker = NULL; + return error; +} + +void +nm_os_kthread_stop(struct nm_kthread *nmk) +{ + if (!nmk->worker) { + return; + } + /* tell to kthread to exit from main loop */ + nmk->run = 0; + + /* wake up kthread if it sleeps */ + kthread_resume(nmk->worker); + nm_os_kthread_wakeup_worker(nmk); + + nmk->worker = NULL; +} + +void +nm_os_kthread_delete(struct nm_kthread *nmk) +{ + if (!nmk) + return; + if (nmk->worker) { + nm_os_kthread_stop(nmk); + } + + nm_kthread_close_files(nmk); + + free(nmk, M_DEVBUF); +} + /******************** kqueue support ****************/ /* - * The OS_selwakeup also needs to issue a KNOTE_UNLOCKED. + * nm_os_selwakeup also needs to issue a KNOTE_UNLOCKED. * We use a non-zero argument to distinguish the call from the one * in kevent_scan() which instead also needs to run netmap_poll(). * The knote uses a global mutex for the time being. We might @@ -672,17 +1254,23 @@ netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td) void -freebsd_selwakeup(struct nm_selinfo *si, int pri) +nm_os_selwakeup(struct nm_selinfo *si) { if (netmap_verbose) D("on knote %p", &si->si.si_note); - selwakeuppri(&si->si, pri); + selwakeuppri(&si->si, PI_NET); /* use a non-zero hint to tell the notification from the * call done in kqueue_scan() which uses 0 */ KNOTE_UNLOCKED(&si->si.si_note, 0x100 /* notification */); } +void +nm_os_selrecord(struct thread *td, struct nm_selinfo *si) +{ + selrecord(td, &si->si); +} + static void netmap_knrdetach(struct knote *kn) { @@ -728,7 +1316,7 @@ netmap_knrw(struct knote *kn, long hint, int events) RD(5, "curthread changed %p %p", curthread, priv->np_td); return 1; } else { - revents = netmap_poll((void *)priv, events, curthread); + revents = netmap_poll(priv, events, NULL); return (events & revents) ? 1 : 0; } } @@ -801,13 +1389,47 @@ netmap_kqfilter(struct cdev *dev, struct knote *kn) return 0; } +static int +freebsd_netmap_poll(struct cdev *cdevi __unused, int events, struct thread *td) +{ + struct netmap_priv_d *priv; + if (devfs_get_cdevpriv((void **)&priv)) { + return POLLERR; + } + return netmap_poll(priv, events, td); +} + +static int +freebsd_netmap_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data, + int ffla __unused, struct thread *td) +{ + int error; + struct netmap_priv_d *priv; + + CURVNET_SET(TD_TO_VNET(rd)); + error = devfs_get_cdevpriv((void **)&priv); + if (error) { + /* XXX ENOENT should be impossible, since the priv + * is now created in the open */ + if (error == ENOENT) + error = ENXIO; + goto out; + } + error = netmap_ioctl(priv, cmd, data, td); +out: + CURVNET_RESTORE(); + + return error; +} + +extern struct cdevsw netmap_cdevsw; /* XXX used in netmap.c, should go elsewhere */ struct cdevsw netmap_cdevsw = { .d_version = D_VERSION, .d_name = "netmap", .d_open = netmap_open, .d_mmap_single = netmap_mmap_single, - .d_ioctl = netmap_ioctl, - .d_poll = netmap_poll, + .d_ioctl = freebsd_netmap_ioctl, + .d_poll = freebsd_netmap_poll, .d_kqfilter = netmap_kqfilter, .d_close = netmap_close, }; @@ -852,6 +1474,24 @@ netmap_loader(__unused struct module *module, int event, __unused void *arg) return (error); } - +#ifdef DEV_MODULE_ORDERED +/* + * The netmap module contains three drivers: (i) the netmap character device + * driver; (ii) the ptnetmap memdev PCI device driver, (iii) the ptnet PCI + * device driver. The attach() routines of both (ii) and (iii) need the + * lock of the global allocator, and such lock is initialized in netmap_init(), + * which is part of (i). + * Therefore, we make sure that (i) is loaded before (ii) and (iii), using + * the 'order' parameter of driver declaration macros. For (i), we specify + * SI_ORDER_MIDDLE, while higher orders are used with the DRIVER_MODULE_ORDERED + * macros for (ii) and (iii). + */ +DEV_MODULE_ORDERED(netmap, netmap_loader, NULL, SI_ORDER_MIDDLE); +#else /* !DEV_MODULE_ORDERED */ DEV_MODULE(netmap, netmap_loader, NULL); +#endif /* DEV_MODULE_ORDERED */ +MODULE_DEPEND(netmap, pci, 1, 1, 1); MODULE_VERSION(netmap, 1); +/* reduce conditional code */ +// linux API, use for the knlist in FreeBSD +/* use a private mutex for the knlist */ |
