diff options
author | Luigi Rizzo <luigi@FreeBSD.org> | 2016-10-16 14:13:32 +0000 |
---|---|---|
committer | Luigi Rizzo <luigi@FreeBSD.org> | 2016-10-16 14:13:32 +0000 |
commit | 37e3a6d349581b4dd0aebf24be7b1b159a698dcf (patch) | |
tree | 0e61deea141c9733af511b0485cf1fd0f2dd17ed /sys/net | |
parent | 63f6b1a75a8e6e33e4f9d65571c6a221444d3b05 (diff) | |
download | src-test2-37e3a6d349581b4dd0aebf24be7b1b159a698dcf.tar.gz src-test2-37e3a6d349581b4dd0aebf24be7b1b159a698dcf.zip |
Notes
Diffstat (limited to 'sys/net')
-rw-r--r-- | sys/net/netmap.h | 109 | ||||
-rw-r--r-- | sys/net/netmap_user.h | 422 |
2 files changed, 489 insertions, 42 deletions
diff --git a/sys/net/netmap.h b/sys/net/netmap.h index 88b2957502ab..c3b8b9205d3d 100644 --- a/sys/net/netmap.h +++ b/sys/net/netmap.h @@ -137,6 +137,26 @@ * netmap:foo-k the k-th NIC ring pair * netmap:foo{k PIPE ring pair k, master side * netmap:foo}k PIPE ring pair k, slave side + * + * Some notes about host rings: + * + * + The RX host ring is used to store those packets that the host network + * stack is trying to transmit through a NIC queue, but only if that queue + * is currently in netmap mode. Netmap will not intercept host stack mbufs + * designated to NIC queues that are not in netmap mode. As a consequence, + * registering a netmap port with netmap:foo^ is not enough to intercept + * mbufs in the RX host ring; the netmap port should be registered with + * netmap:foo*, or another registration should be done to open at least a + * NIC TX queue in netmap mode. + * + * + Netmap is not currently able to deal with intercepted trasmit mbufs which + * require offloadings like TSO, UFO, checksumming offloadings, etc. It is + * responsibility of the user to disable those offloadings (e.g. using + * ifconfig on FreeBSD or ethtool -K on Linux) for an interface that is being + * used in netmap mode. If the offloadings are not disabled, GSO and/or + * unchecksummed packets may be dropped immediately or end up in the host RX + * ring, and will be dropped as soon as the packet reaches another netmap + * adapter. */ /* @@ -277,7 +297,11 @@ struct netmap_ring { struct timeval ts; /* (k) time of last *sync() */ /* opaque room for a mutex or similar object */ - uint8_t sem[128] __attribute__((__aligned__(NM_CACHE_ALIGN))); +#if !defined(_WIN32) || defined(__CYGWIN__) + uint8_t __attribute__((__aligned__(NM_CACHE_ALIGN))) sem[128]; +#else + uint8_t __declspec(align(NM_CACHE_ALIGN)) sem[128]; +#endif /* the slots follow. This struct has variable size */ struct netmap_slot slot[0]; /* array of slots. */ @@ -496,6 +520,11 @@ struct nmreq { #define NETMAP_BDG_OFFSET NETMAP_BDG_VNET_HDR /* deprecated alias */ #define NETMAP_BDG_NEWIF 6 /* create a virtual port */ #define NETMAP_BDG_DELIF 7 /* destroy a virtual port */ +#define NETMAP_PT_HOST_CREATE 8 /* create ptnetmap kthreads */ +#define NETMAP_PT_HOST_DELETE 9 /* delete ptnetmap kthreads */ +#define NETMAP_BDG_POLLING_ON 10 /* delete polling kthread */ +#define NETMAP_BDG_POLLING_OFF 11 /* delete polling kthread */ +#define NETMAP_VNET_HDR_GET 12 /* get the port virtio-net-hdr length */ uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */ #define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */ @@ -521,7 +550,61 @@ enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */ #define NR_ZCOPY_MON 0x400 /* request exclusive access to the selected rings */ #define NR_EXCLUSIVE 0x800 +/* request ptnetmap host support */ +#define NR_PASSTHROUGH_HOST NR_PTNETMAP_HOST /* deprecated */ +#define NR_PTNETMAP_HOST 0x1000 +#define NR_RX_RINGS_ONLY 0x2000 +#define NR_TX_RINGS_ONLY 0x4000 +/* Applications set this flag if they are able to deal with virtio-net headers, + * that is send/receive frames that start with a virtio-net header. + * If not set, NIOCREGIF will fail with netmap ports that require applications + * to use those headers. If the flag is set, the application can use the + * NETMAP_VNET_HDR_GET command to figure out the header length. */ +#define NR_ACCEPT_VNET_HDR 0x8000 + +#define NM_BDG_NAME "vale" /* prefix for bridge port name */ + +/* + * Windows does not have _IOWR(). _IO(), _IOW() and _IOR() are defined + * in ws2def.h but not sure if they are in the form we need. + * XXX so we redefine them + * in a convenient way to use for DeviceIoControl signatures + */ +#ifdef _WIN32 +#undef _IO // ws2def.h +#define _WIN_NM_IOCTL_TYPE 40000 +#define _IO(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \ + METHOD_BUFFERED, FILE_ANY_ACCESS ) +#define _IO_direct(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \ + METHOD_OUT_DIRECT, FILE_ANY_ACCESS ) + +#define _IOWR(_c, _n, _s) _IO(_c, _n) + +/* We havesome internal sysctl in addition to the externally visible ones */ +#define NETMAP_MMAP _IO_direct('i', 160) // note METHOD_OUT_DIRECT +#define NETMAP_POLL _IO('i', 162) + +/* and also two setsockopt for sysctl emulation */ +#define NETMAP_SETSOCKOPT _IO('i', 140) +#define NETMAP_GETSOCKOPT _IO('i', 141) + + +//These linknames are for the Netmap Core Driver +#define NETMAP_NT_DEVICE_NAME L"\\Device\\NETMAP" +#define NETMAP_DOS_DEVICE_NAME L"\\DosDevices\\netmap" +//Definition of a structure used to pass a virtual address within an IOCTL +typedef struct _MEMORY_ENTRY { + PVOID pUsermodeVirtualAddress; +} MEMORY_ENTRY, *PMEMORY_ENTRY; + +typedef struct _POLL_REQUEST_DATA { + int events; + int timeout; + int revents; +} POLL_REQUEST_DATA; + +#endif /* _WIN32 */ /* * FreeBSD uses the size value embedded in the _IOWR to determine @@ -561,4 +644,28 @@ struct nm_ifreq { char data[NM_IFRDATA_LEN]; }; +/* + * netmap kernel thread configuration + */ +/* bhyve/vmm.ko MSIX parameters for IOCTL */ +struct ptn_vmm_ioctl_msix { + uint64_t msg; + uint64_t addr; +}; + +/* IOCTL parameters */ +struct nm_kth_ioctl { + u_long com; + /* TODO: use union */ + union { + struct ptn_vmm_ioctl_msix msix; + } data; +}; + +/* Configuration of a ptnetmap ring */ +struct ptnet_ring_cfg { + uint64_t ioeventfd; /* eventfd in linux, tsleep() parameter in FreeBSD */ + uint64_t irqfd; /* eventfd in linux, ioctl fd in FreeBSD */ + struct nm_kth_ioctl ioctl; /* ioctl parameter to send irq (only used in bhyve/FreeBSD) */ +}; #endif /* _NET_NETMAP_H_ */ diff --git a/sys/net/netmap_user.h b/sys/net/netmap_user.h index 130117db7a2e..4ec3d941c504 100644 --- a/sys/net/netmap_user.h +++ b/sys/net/netmap_user.h @@ -1,5 +1,6 @@ /* - * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved. + * Copyright (C) 2011-2016 Universita` di Pisa + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -65,9 +66,31 @@ #ifndef _NET_NETMAP_USER_H_ #define _NET_NETMAP_USER_H_ +#define NETMAP_DEVICE_NAME "/dev/netmap" + +#ifdef __CYGWIN__ +/* + * we can compile userspace apps with either cygwin or msvc, + * and we use _WIN32 to identify windows specific code + */ +#ifndef _WIN32 +#define _WIN32 +#endif /* _WIN32 */ + +#endif /* __CYGWIN__ */ + +#ifdef _WIN32 +#undef NETMAP_DEVICE_NAME +#define NETMAP_DEVICE_NAME "/proc/sys/DosDevices/Global/netmap" +#include <windows.h> +#include <WinDef.h> +#include <sys/cygwin.h> +#endif /* _WIN32 */ + #include <stdint.h> #include <sys/socket.h> /* apple needs sockaddr */ #include <net/if.h> /* IFNAMSIZ */ +#include <ctype.h> #ifndef likely #define likely(x) __builtin_expect(!!(x), 1) @@ -172,17 +195,23 @@ nm_ring_space(struct netmap_ring *ring) } while (0) #endif -struct nm_pkthdr { /* same as pcap_pkthdr */ +struct nm_pkthdr { /* first part is the same as pcap_pkthdr */ struct timeval ts; uint32_t caplen; uint32_t len; + + uint64_t flags; /* NM_MORE_PKTS etc */ +#define NM_MORE_PKTS 1 + struct nm_desc *d; + struct netmap_slot *slot; + uint8_t *buf; }; struct nm_stat { /* same as pcap_stat */ u_int ps_recv; u_int ps_drop; u_int ps_ifdrop; -#ifdef WIN32 +#ifdef WIN32 /* XXX or _WIN32 ? */ u_int bs_capt; #endif /* WIN32 */ }; @@ -284,12 +313,14 @@ typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d); * -NN bind individual NIC ring pair * {NN bind master side of pipe NN * }NN bind slave side of pipe NN - * a suffix starting with + and the following flags, + * a suffix starting with / and the following flags, * in any order: * x exclusive access * z zero copy monitor * t monitor tx side * r monitor rx side + * R bind only RX ring(s) + * T bind only TX ring(s) * * req provides the initial values of nmreq before parsing ifname. * Remember that the ifname parsing will override the ring @@ -329,6 +360,13 @@ enum { static int nm_close(struct nm_desc *); /* + * nm_mmap() do mmap or inherit from parent if the nr_arg2 + * (memory block) matches. + */ + +static int nm_mmap(struct nm_desc *, const struct nm_desc *); + +/* * nm_inject() is the same as pcap_inject() * nm_dispatch() is the same as pcap_dispatch() * nm_nextpkt() is the same as pcap_next() @@ -338,13 +376,247 @@ static int nm_inject(struct nm_desc *, const void *, size_t); static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *); static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *); +#ifdef _WIN32 + +intptr_t _get_osfhandle(int); /* defined in io.h in windows */ + +/* + * In windows we do not have yet native poll support, so we keep track + * of file descriptors associated to netmap ports to emulate poll on + * them and fall back on regular poll on other file descriptors. + */ +struct win_netmap_fd_list { + struct win_netmap_fd_list *next; + int win_netmap_fd; + HANDLE win_netmap_handle; +}; + +/* + * list head containing all the netmap opened fd and their + * windows HANDLE counterparts + */ +static struct win_netmap_fd_list *win_netmap_fd_list_head; + +static void +win_insert_fd_record(int fd) +{ + struct win_netmap_fd_list *curr; + + for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { + if (fd == curr->win_netmap_fd) { + return; + } + } + curr = calloc(1, sizeof(*curr)); + curr->next = win_netmap_fd_list_head; + curr->win_netmap_fd = fd; + curr->win_netmap_handle = IntToPtr(_get_osfhandle(fd)); + win_netmap_fd_list_head = curr; +} + +void +win_remove_fd_record(int fd) +{ + struct win_netmap_fd_list *curr = win_netmap_fd_list_head; + struct win_netmap_fd_list *prev = NULL; + for (; curr ; prev = curr, curr = curr->next) { + if (fd != curr->win_netmap_fd) + continue; + /* found the entry */ + if (prev == NULL) { /* we are freeing the first entry */ + win_netmap_fd_list_head = curr->next; + } else { + prev->next = curr->next; + } + free(curr); + break; + } +} + + +HANDLE +win_get_netmap_handle(int fd) +{ + struct win_netmap_fd_list *curr; + + for (curr = win_netmap_fd_list_head; curr; curr = curr->next) { + if (fd == curr->win_netmap_fd) { + return curr->win_netmap_handle; + } + } + return NULL; +} + +/* + * we need to wrap ioctl and mmap, at least for the netmap file descriptors + */ + +/* + * use this function only from netmap_user.h internal functions + * same as ioctl, returns 0 on success and -1 on error + */ +static int +win_nm_ioctl_internal(HANDLE h, int32_t ctlCode, void *arg) +{ + DWORD bReturn = 0, szIn, szOut; + BOOL ioctlReturnStatus; + void *inParam = arg, *outParam = arg; + + switch (ctlCode) { + case NETMAP_POLL: + szIn = sizeof(POLL_REQUEST_DATA); + szOut = sizeof(POLL_REQUEST_DATA); + break; + case NETMAP_MMAP: + szIn = 0; + szOut = sizeof(void*); + inParam = NULL; /* nothing on input */ + break; + case NIOCTXSYNC: + case NIOCRXSYNC: + szIn = 0; + szOut = 0; + break; + case NIOCREGIF: + szIn = sizeof(struct nmreq); + szOut = sizeof(struct nmreq); + break; + case NIOCCONFIG: + D("unsupported NIOCCONFIG!"); + return -1; + + default: /* a regular ioctl */ + D("invalid ioctl %x on netmap fd", ctlCode); + return -1; + } + + ioctlReturnStatus = DeviceIoControl(h, + ctlCode, inParam, szIn, + outParam, szOut, + &bReturn, NULL); + // XXX note windows returns 0 on error or async call, 1 on success + // we could call GetLastError() to figure out what happened + return ioctlReturnStatus ? 0 : -1; +} + +/* + * this function is what must be called from user-space programs + * same as ioctl, returns 0 on success and -1 on error + */ +static int +win_nm_ioctl(int fd, int32_t ctlCode, void *arg) +{ + HANDLE h = win_get_netmap_handle(fd); + + if (h == NULL) { + return ioctl(fd, ctlCode, arg); + } else { + return win_nm_ioctl_internal(h, ctlCode, arg); + } +} + +#define ioctl win_nm_ioctl /* from now on, within this file ... */ + +/* + * We cannot use the native mmap on windows + * The only parameter used is "fd", the other ones are just declared to + * make this signature comparable to the FreeBSD/Linux one + */ +static void * +win32_mmap_emulated(void *addr, size_t length, int prot, int flags, int fd, int32_t offset) +{ + HANDLE h = win_get_netmap_handle(fd); + + if (h == NULL) { + return mmap(addr, length, prot, flags, fd, offset); + } else { + MEMORY_ENTRY ret; + + return win_nm_ioctl_internal(h, NETMAP_MMAP, &ret) ? + NULL : ret.pUsermodeVirtualAddress; + } +} + +#define mmap win32_mmap_emulated + +#include <sys/poll.h> /* XXX needed to use the structure pollfd */ + +static int +win_nm_poll(struct pollfd *fds, int nfds, int timeout) +{ + HANDLE h; + + if (nfds != 1 || fds == NULL || (h = win_get_netmap_handle(fds->fd)) == NULL) {; + return poll(fds, nfds, timeout); + } else { + POLL_REQUEST_DATA prd; + + prd.timeout = timeout; + prd.events = fds->events; + + win_nm_ioctl_internal(h, NETMAP_POLL, &prd); + if ((prd.revents == POLLERR) || (prd.revents == STATUS_TIMEOUT)) { + return -1; + } + return 1; + } +} + +#define poll win_nm_poll + +static int +win_nm_open(char* pathname, int flags) +{ + + if (strcmp(pathname, NETMAP_DEVICE_NAME) == 0) { + int fd = open(NETMAP_DEVICE_NAME, O_RDWR); + if (fd < 0) { + return -1; + } + + win_insert_fd_record(fd); + return fd; + } else { + return open(pathname, flags); + } +} + +#define open win_nm_open + +static int +win_nm_close(int fd) +{ + if (fd != -1) { + close(fd); + if (win_get_netmap_handle(fd) != NULL) { + win_remove_fd_record(fd); + } + } + return 0; +} + +#define close win_nm_close + +#endif /* _WIN32 */ + +static int +nm_is_identifier(const char *s, const char *e) +{ + for (; s != e; s++) { + if (!isalnum(*s) && *s != '_') { + return 0; + } + } + + return 1; +} /* * Try to open, return descriptor if successful, NULL otherwise. * An invalid netmap name will return errno = 0; * You can pass a pointer to a pre-filled nm_desc to add special * parameters. Flags is used as follows - * NM_OPEN_NO_MMAP use the memory from arg, only + * NM_OPEN_NO_MMAP use the memory from arg, only XXX avoid mmap * if the nr_arg2 (memory block) matches. * NM_OPEN_ARG1 use req.nr_arg1 from arg * NM_OPEN_ARG2 use req.nr_arg2 from arg @@ -359,20 +631,48 @@ nm_open(const char *ifname, const struct nmreq *req, u_int namelen; uint32_t nr_ringid = 0, nr_flags, nr_reg; const char *port = NULL; + const char *vpname = NULL; #define MAXERRMSG 80 char errmsg[MAXERRMSG] = ""; enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK } p_state; + int is_vale; long num; - if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) { + if (strncmp(ifname, "netmap:", 7) && + strncmp(ifname, NM_BDG_NAME, strlen(NM_BDG_NAME))) { errno = 0; /* name not recognised, not an error */ return NULL; } - if (ifname[0] == 'n') + + is_vale = (ifname[0] == 'v'); + if (is_vale) { + port = index(ifname, ':'); + if (port == NULL) { + snprintf(errmsg, MAXERRMSG, + "missing ':' in vale name"); + goto fail; + } + + if (!nm_is_identifier(ifname + 4, port)) { + snprintf(errmsg, MAXERRMSG, "invalid bridge name"); + goto fail; + } + + vpname = ++port; + } else { ifname += 7; + port = ifname; + } + /* scan for a separator */ - for (port = ifname; *port && !index("-*^{}/", *port); port++) + for (; *port && !index("-*^{}/", *port); port++) ; + + if (is_vale && !nm_is_identifier(vpname, port)) { + snprintf(errmsg, MAXERRMSG, "invalid bridge port name"); + goto fail; + } + namelen = port - ifname; if (namelen >= sizeof(d->req.nr_name)) { snprintf(errmsg, MAXERRMSG, "name too long"); @@ -449,6 +749,12 @@ nm_open(const char *ifname, const struct nmreq *req, case 'r': nr_flags |= NR_MONITOR_RX; break; + case 'R': + nr_flags |= NR_RX_RINGS_ONLY; + break; + case 'T': + nr_flags |= NR_TX_RINGS_ONLY; + break; default: snprintf(errmsg, MAXERRMSG, "unrecognized flag: '%c'", *port); goto fail; @@ -462,6 +768,11 @@ nm_open(const char *ifname, const struct nmreq *req, snprintf(errmsg, MAXERRMSG, "unexpected end of port name"); goto fail; } + if ((nr_flags & NR_ZCOPY_MON) && + !(nr_flags & (NR_MONITOR_TX|NR_MONITOR_RX))) { + snprintf(errmsg, MAXERRMSG, "'z' used but neither 'r', nor 't' found"); + goto fail; + } ND("flags: %s %s %s %s", (nr_flags & NR_EXCLUSIVE) ? "EXCLUSIVE" : "", (nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "", @@ -474,7 +785,7 @@ nm_open(const char *ifname, const struct nmreq *req, return NULL; } d->self = d; /* set this early so nm_close() works */ - d->fd = open("/dev/netmap", O_RDWR); + d->fd = open(NETMAP_DEVICE_NAME, O_RDWR); if (d->fd < 0) { snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno)); goto fail; @@ -487,7 +798,7 @@ nm_open(const char *ifname, const struct nmreq *req, /* these fields are overridden by ifname and flags processing */ d->req.nr_ringid |= nr_ringid; - d->req.nr_flags = nr_flags; + d->req.nr_flags |= nr_flags; memcpy(d->req.nr_name, ifname, namelen); d->req.nr_name[namelen] = '\0'; /* optionally import info from parent */ @@ -529,31 +840,10 @@ nm_open(const char *ifname, const struct nmreq *req, goto fail; } - if (IS_NETMAP_DESC(parent) && parent->mem && - parent->req.nr_arg2 == d->req.nr_arg2) { - /* do not mmap, inherit from parent */ - d->memsize = parent->memsize; - d->mem = parent->mem; - } else { - /* XXX TODO: check if memsize is too large (or there is overflow) */ - d->memsize = d->req.nr_memsize; - d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, - d->fd, 0); - if (d->mem == MAP_FAILED) { - snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno)); - goto fail; - } - d->done_mmap = 1; - } - { - struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset); - struct netmap_ring *r = NETMAP_RXRING(nifp, ); - - *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp; - *(struct netmap_ring **)(uintptr_t)&d->some_ring = r; - *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0); - *(void **)(uintptr_t)&d->buf_end = - (char *)d->mem + d->memsize; + /* if parent is defined, do nm_mmap() even if NM_OPEN_NO_MMAP is set */ + if ((!(new_flags & NM_OPEN_NO_MMAP) || parent) && nm_mmap(d, parent)) { + snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno)); + goto fail; } nr_reg = d->req.nr_flags & NR_REG_MASK; @@ -626,14 +916,54 @@ nm_close(struct nm_desc *d) return EINVAL; if (d->done_mmap && d->mem) munmap(d->mem, d->memsize); - if (d->fd != -1) + if (d->fd != -1) { close(d->fd); + } + bzero(d, sizeof(*d)); free(d); return 0; } +static int +nm_mmap(struct nm_desc *d, const struct nm_desc *parent) +{ + //XXX TODO: check if mmap is already done + + if (IS_NETMAP_DESC(parent) && parent->mem && + parent->req.nr_arg2 == d->req.nr_arg2) { + /* do not mmap, inherit from parent */ + D("do not mmap, inherit from parent"); + d->memsize = parent->memsize; + d->mem = parent->mem; + } else { + /* XXX TODO: check if memsize is too large (or there is overflow) */ + d->memsize = d->req.nr_memsize; + d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED, + d->fd, 0); + if (d->mem == MAP_FAILED) { + goto fail; + } + d->done_mmap = 1; + } + { + struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset); + struct netmap_ring *r = NETMAP_RXRING(nifp, ); + + *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp; + *(struct netmap_ring **)(uintptr_t)&d->some_ring = r; + *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0); + *(void **)(uintptr_t)&d->buf_end = + (char *)d->mem + d->memsize; + } + + return 0; + +fail: + return EINVAL; +} + /* * Same prototype as pcap_inject(), only need to cast. */ @@ -674,6 +1004,9 @@ nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) { int n = d->last_rx_ring - d->first_rx_ring + 1; int c, got = 0, ri = d->cur_rx_ring; + d->hdr.buf = NULL; + d->hdr.flags = NM_MORE_PKTS; + d->hdr.d = d; if (cnt == 0) cnt = -1; @@ -690,17 +1023,24 @@ nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) ri = d->first_rx_ring; ring = NETMAP_RXRING(d->nifp, ri); for ( ; !nm_ring_empty(ring) && cnt != got; got++) { - u_int i = ring->cur; - u_int idx = ring->slot[i].buf_idx; - u_char *buf = (u_char *)NETMAP_BUF(ring, idx); - + u_int idx, i; + if (d->hdr.buf) { /* from previous round */ + cb(arg, &d->hdr, d->hdr.buf); + } + i = ring->cur; + idx = ring->slot[i].buf_idx; + d->hdr.slot = &ring->slot[i]; + d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx); // __builtin_prefetch(buf); d->hdr.len = d->hdr.caplen = ring->slot[i].len; d->hdr.ts = ring->ts; - cb(arg, &d->hdr, buf); ring->head = ring->cur = nm_ring_next(ring, i); } } + if (d->hdr.buf) { /* from previous round */ + d->hdr.flags = 0; + cb(arg, &d->hdr, d->hdr.buf); + } d->cur_rx_ring = ri; return got; } |