summaryrefslogtreecommitdiff
path: root/sys/net/netmap_user.h
diff options
context:
space:
mode:
authorLuigi Rizzo <luigi@FreeBSD.org>2016-10-16 14:13:32 +0000
committerLuigi Rizzo <luigi@FreeBSD.org>2016-10-16 14:13:32 +0000
commit37e3a6d349581b4dd0aebf24be7b1b159a698dcf (patch)
tree0e61deea141c9733af511b0485cf1fd0f2dd17ed /sys/net/netmap_user.h
parent63f6b1a75a8e6e33e4f9d65571c6a221444d3b05 (diff)
downloadsrc-test2-37e3a6d349581b4dd0aebf24be7b1b159a698dcf.tar.gz
src-test2-37e3a6d349581b4dd0aebf24be7b1b159a698dcf.zip
Notes
Diffstat (limited to 'sys/net/netmap_user.h')
-rw-r--r--sys/net/netmap_user.h422
1 files changed, 381 insertions, 41 deletions
diff --git a/sys/net/netmap_user.h b/sys/net/netmap_user.h
index 130117db7a2e..4ec3d941c504 100644
--- a/sys/net/netmap_user.h
+++ b/sys/net/netmap_user.h
@@ -1,5 +1,6 @@
/*
- * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved.
+ * Copyright (C) 2011-2016 Universita` di Pisa
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -65,9 +66,31 @@
#ifndef _NET_NETMAP_USER_H_
#define _NET_NETMAP_USER_H_
+#define NETMAP_DEVICE_NAME "/dev/netmap"
+
+#ifdef __CYGWIN__
+/*
+ * we can compile userspace apps with either cygwin or msvc,
+ * and we use _WIN32 to identify windows specific code
+ */
+#ifndef _WIN32
+#define _WIN32
+#endif /* _WIN32 */
+
+#endif /* __CYGWIN__ */
+
+#ifdef _WIN32
+#undef NETMAP_DEVICE_NAME
+#define NETMAP_DEVICE_NAME "/proc/sys/DosDevices/Global/netmap"
+#include <windows.h>
+#include <WinDef.h>
+#include <sys/cygwin.h>
+#endif /* _WIN32 */
+
#include <stdint.h>
#include <sys/socket.h> /* apple needs sockaddr */
#include <net/if.h> /* IFNAMSIZ */
+#include <ctype.h>
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
@@ -172,17 +195,23 @@ nm_ring_space(struct netmap_ring *ring)
} while (0)
#endif
-struct nm_pkthdr { /* same as pcap_pkthdr */
+struct nm_pkthdr { /* first part is the same as pcap_pkthdr */
struct timeval ts;
uint32_t caplen;
uint32_t len;
+
+ uint64_t flags; /* NM_MORE_PKTS etc */
+#define NM_MORE_PKTS 1
+ struct nm_desc *d;
+ struct netmap_slot *slot;
+ uint8_t *buf;
};
struct nm_stat { /* same as pcap_stat */
u_int ps_recv;
u_int ps_drop;
u_int ps_ifdrop;
-#ifdef WIN32
+#ifdef WIN32 /* XXX or _WIN32 ? */
u_int bs_capt;
#endif /* WIN32 */
};
@@ -284,12 +313,14 @@ typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d);
* -NN bind individual NIC ring pair
* {NN bind master side of pipe NN
* }NN bind slave side of pipe NN
- * a suffix starting with + and the following flags,
+ * a suffix starting with / and the following flags,
* in any order:
* x exclusive access
* z zero copy monitor
* t monitor tx side
* r monitor rx side
+ * R bind only RX ring(s)
+ * T bind only TX ring(s)
*
* req provides the initial values of nmreq before parsing ifname.
* Remember that the ifname parsing will override the ring
@@ -329,6 +360,13 @@ enum {
static int nm_close(struct nm_desc *);
/*
+ * nm_mmap() do mmap or inherit from parent if the nr_arg2
+ * (memory block) matches.
+ */
+
+static int nm_mmap(struct nm_desc *, const struct nm_desc *);
+
+/*
* nm_inject() is the same as pcap_inject()
* nm_dispatch() is the same as pcap_dispatch()
* nm_nextpkt() is the same as pcap_next()
@@ -338,13 +376,247 @@ static int nm_inject(struct nm_desc *, const void *, size_t);
static int nm_dispatch(struct nm_desc *, int, nm_cb_t, u_char *);
static u_char *nm_nextpkt(struct nm_desc *, struct nm_pkthdr *);
+#ifdef _WIN32
+
+intptr_t _get_osfhandle(int); /* defined in io.h in windows */
+
+/*
+ * In windows we do not have yet native poll support, so we keep track
+ * of file descriptors associated to netmap ports to emulate poll on
+ * them and fall back on regular poll on other file descriptors.
+ */
+struct win_netmap_fd_list {
+ struct win_netmap_fd_list *next;
+ int win_netmap_fd;
+ HANDLE win_netmap_handle;
+};
+
+/*
+ * list head containing all the netmap opened fd and their
+ * windows HANDLE counterparts
+ */
+static struct win_netmap_fd_list *win_netmap_fd_list_head;
+
+static void
+win_insert_fd_record(int fd)
+{
+ struct win_netmap_fd_list *curr;
+
+ for (curr = win_netmap_fd_list_head; curr; curr = curr->next) {
+ if (fd == curr->win_netmap_fd) {
+ return;
+ }
+ }
+ curr = calloc(1, sizeof(*curr));
+ curr->next = win_netmap_fd_list_head;
+ curr->win_netmap_fd = fd;
+ curr->win_netmap_handle = IntToPtr(_get_osfhandle(fd));
+ win_netmap_fd_list_head = curr;
+}
+
+void
+win_remove_fd_record(int fd)
+{
+ struct win_netmap_fd_list *curr = win_netmap_fd_list_head;
+ struct win_netmap_fd_list *prev = NULL;
+ for (; curr ; prev = curr, curr = curr->next) {
+ if (fd != curr->win_netmap_fd)
+ continue;
+ /* found the entry */
+ if (prev == NULL) { /* we are freeing the first entry */
+ win_netmap_fd_list_head = curr->next;
+ } else {
+ prev->next = curr->next;
+ }
+ free(curr);
+ break;
+ }
+}
+
+
+HANDLE
+win_get_netmap_handle(int fd)
+{
+ struct win_netmap_fd_list *curr;
+
+ for (curr = win_netmap_fd_list_head; curr; curr = curr->next) {
+ if (fd == curr->win_netmap_fd) {
+ return curr->win_netmap_handle;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * we need to wrap ioctl and mmap, at least for the netmap file descriptors
+ */
+
+/*
+ * use this function only from netmap_user.h internal functions
+ * same as ioctl, returns 0 on success and -1 on error
+ */
+static int
+win_nm_ioctl_internal(HANDLE h, int32_t ctlCode, void *arg)
+{
+ DWORD bReturn = 0, szIn, szOut;
+ BOOL ioctlReturnStatus;
+ void *inParam = arg, *outParam = arg;
+
+ switch (ctlCode) {
+ case NETMAP_POLL:
+ szIn = sizeof(POLL_REQUEST_DATA);
+ szOut = sizeof(POLL_REQUEST_DATA);
+ break;
+ case NETMAP_MMAP:
+ szIn = 0;
+ szOut = sizeof(void*);
+ inParam = NULL; /* nothing on input */
+ break;
+ case NIOCTXSYNC:
+ case NIOCRXSYNC:
+ szIn = 0;
+ szOut = 0;
+ break;
+ case NIOCREGIF:
+ szIn = sizeof(struct nmreq);
+ szOut = sizeof(struct nmreq);
+ break;
+ case NIOCCONFIG:
+ D("unsupported NIOCCONFIG!");
+ return -1;
+
+ default: /* a regular ioctl */
+ D("invalid ioctl %x on netmap fd", ctlCode);
+ return -1;
+ }
+
+ ioctlReturnStatus = DeviceIoControl(h,
+ ctlCode, inParam, szIn,
+ outParam, szOut,
+ &bReturn, NULL);
+ // XXX note windows returns 0 on error or async call, 1 on success
+ // we could call GetLastError() to figure out what happened
+ return ioctlReturnStatus ? 0 : -1;
+}
+
+/*
+ * this function is what must be called from user-space programs
+ * same as ioctl, returns 0 on success and -1 on error
+ */
+static int
+win_nm_ioctl(int fd, int32_t ctlCode, void *arg)
+{
+ HANDLE h = win_get_netmap_handle(fd);
+
+ if (h == NULL) {
+ return ioctl(fd, ctlCode, arg);
+ } else {
+ return win_nm_ioctl_internal(h, ctlCode, arg);
+ }
+}
+
+#define ioctl win_nm_ioctl /* from now on, within this file ... */
+
+/*
+ * We cannot use the native mmap on windows
+ * The only parameter used is "fd", the other ones are just declared to
+ * make this signature comparable to the FreeBSD/Linux one
+ */
+static void *
+win32_mmap_emulated(void *addr, size_t length, int prot, int flags, int fd, int32_t offset)
+{
+ HANDLE h = win_get_netmap_handle(fd);
+
+ if (h == NULL) {
+ return mmap(addr, length, prot, flags, fd, offset);
+ } else {
+ MEMORY_ENTRY ret;
+
+ return win_nm_ioctl_internal(h, NETMAP_MMAP, &ret) ?
+ NULL : ret.pUsermodeVirtualAddress;
+ }
+}
+
+#define mmap win32_mmap_emulated
+
+#include <sys/poll.h> /* XXX needed to use the structure pollfd */
+
+static int
+win_nm_poll(struct pollfd *fds, int nfds, int timeout)
+{
+ HANDLE h;
+
+ if (nfds != 1 || fds == NULL || (h = win_get_netmap_handle(fds->fd)) == NULL) {;
+ return poll(fds, nfds, timeout);
+ } else {
+ POLL_REQUEST_DATA prd;
+
+ prd.timeout = timeout;
+ prd.events = fds->events;
+
+ win_nm_ioctl_internal(h, NETMAP_POLL, &prd);
+ if ((prd.revents == POLLERR) || (prd.revents == STATUS_TIMEOUT)) {
+ return -1;
+ }
+ return 1;
+ }
+}
+
+#define poll win_nm_poll
+
+static int
+win_nm_open(char* pathname, int flags)
+{
+
+ if (strcmp(pathname, NETMAP_DEVICE_NAME) == 0) {
+ int fd = open(NETMAP_DEVICE_NAME, O_RDWR);
+ if (fd < 0) {
+ return -1;
+ }
+
+ win_insert_fd_record(fd);
+ return fd;
+ } else {
+ return open(pathname, flags);
+ }
+}
+
+#define open win_nm_open
+
+static int
+win_nm_close(int fd)
+{
+ if (fd != -1) {
+ close(fd);
+ if (win_get_netmap_handle(fd) != NULL) {
+ win_remove_fd_record(fd);
+ }
+ }
+ return 0;
+}
+
+#define close win_nm_close
+
+#endif /* _WIN32 */
+
+static int
+nm_is_identifier(const char *s, const char *e)
+{
+ for (; s != e; s++) {
+ if (!isalnum(*s) && *s != '_') {
+ return 0;
+ }
+ }
+
+ return 1;
+}
/*
* Try to open, return descriptor if successful, NULL otherwise.
* An invalid netmap name will return errno = 0;
* You can pass a pointer to a pre-filled nm_desc to add special
* parameters. Flags is used as follows
- * NM_OPEN_NO_MMAP use the memory from arg, only
+ * NM_OPEN_NO_MMAP use the memory from arg, only XXX avoid mmap
* if the nr_arg2 (memory block) matches.
* NM_OPEN_ARG1 use req.nr_arg1 from arg
* NM_OPEN_ARG2 use req.nr_arg2 from arg
@@ -359,20 +631,48 @@ nm_open(const char *ifname, const struct nmreq *req,
u_int namelen;
uint32_t nr_ringid = 0, nr_flags, nr_reg;
const char *port = NULL;
+ const char *vpname = NULL;
#define MAXERRMSG 80
char errmsg[MAXERRMSG] = "";
enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK } p_state;
+ int is_vale;
long num;
- if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) {
+ if (strncmp(ifname, "netmap:", 7) &&
+ strncmp(ifname, NM_BDG_NAME, strlen(NM_BDG_NAME))) {
errno = 0; /* name not recognised, not an error */
return NULL;
}
- if (ifname[0] == 'n')
+
+ is_vale = (ifname[0] == 'v');
+ if (is_vale) {
+ port = index(ifname, ':');
+ if (port == NULL) {
+ snprintf(errmsg, MAXERRMSG,
+ "missing ':' in vale name");
+ goto fail;
+ }
+
+ if (!nm_is_identifier(ifname + 4, port)) {
+ snprintf(errmsg, MAXERRMSG, "invalid bridge name");
+ goto fail;
+ }
+
+ vpname = ++port;
+ } else {
ifname += 7;
+ port = ifname;
+ }
+
/* scan for a separator */
- for (port = ifname; *port && !index("-*^{}/", *port); port++)
+ for (; *port && !index("-*^{}/", *port); port++)
;
+
+ if (is_vale && !nm_is_identifier(vpname, port)) {
+ snprintf(errmsg, MAXERRMSG, "invalid bridge port name");
+ goto fail;
+ }
+
namelen = port - ifname;
if (namelen >= sizeof(d->req.nr_name)) {
snprintf(errmsg, MAXERRMSG, "name too long");
@@ -449,6 +749,12 @@ nm_open(const char *ifname, const struct nmreq *req,
case 'r':
nr_flags |= NR_MONITOR_RX;
break;
+ case 'R':
+ nr_flags |= NR_RX_RINGS_ONLY;
+ break;
+ case 'T':
+ nr_flags |= NR_TX_RINGS_ONLY;
+ break;
default:
snprintf(errmsg, MAXERRMSG, "unrecognized flag: '%c'", *port);
goto fail;
@@ -462,6 +768,11 @@ nm_open(const char *ifname, const struct nmreq *req,
snprintf(errmsg, MAXERRMSG, "unexpected end of port name");
goto fail;
}
+ if ((nr_flags & NR_ZCOPY_MON) &&
+ !(nr_flags & (NR_MONITOR_TX|NR_MONITOR_RX))) {
+ snprintf(errmsg, MAXERRMSG, "'z' used but neither 'r', nor 't' found");
+ goto fail;
+ }
ND("flags: %s %s %s %s",
(nr_flags & NR_EXCLUSIVE) ? "EXCLUSIVE" : "",
(nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "",
@@ -474,7 +785,7 @@ nm_open(const char *ifname, const struct nmreq *req,
return NULL;
}
d->self = d; /* set this early so nm_close() works */
- d->fd = open("/dev/netmap", O_RDWR);
+ d->fd = open(NETMAP_DEVICE_NAME, O_RDWR);
if (d->fd < 0) {
snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno));
goto fail;
@@ -487,7 +798,7 @@ nm_open(const char *ifname, const struct nmreq *req,
/* these fields are overridden by ifname and flags processing */
d->req.nr_ringid |= nr_ringid;
- d->req.nr_flags = nr_flags;
+ d->req.nr_flags |= nr_flags;
memcpy(d->req.nr_name, ifname, namelen);
d->req.nr_name[namelen] = '\0';
/* optionally import info from parent */
@@ -529,31 +840,10 @@ nm_open(const char *ifname, const struct nmreq *req,
goto fail;
}
- if (IS_NETMAP_DESC(parent) && parent->mem &&
- parent->req.nr_arg2 == d->req.nr_arg2) {
- /* do not mmap, inherit from parent */
- d->memsize = parent->memsize;
- d->mem = parent->mem;
- } else {
- /* XXX TODO: check if memsize is too large (or there is overflow) */
- d->memsize = d->req.nr_memsize;
- d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED,
- d->fd, 0);
- if (d->mem == MAP_FAILED) {
- snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno));
- goto fail;
- }
- d->done_mmap = 1;
- }
- {
- struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset);
- struct netmap_ring *r = NETMAP_RXRING(nifp, );
-
- *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp;
- *(struct netmap_ring **)(uintptr_t)&d->some_ring = r;
- *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0);
- *(void **)(uintptr_t)&d->buf_end =
- (char *)d->mem + d->memsize;
+ /* if parent is defined, do nm_mmap() even if NM_OPEN_NO_MMAP is set */
+ if ((!(new_flags & NM_OPEN_NO_MMAP) || parent) && nm_mmap(d, parent)) {
+ snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno));
+ goto fail;
}
nr_reg = d->req.nr_flags & NR_REG_MASK;
@@ -626,14 +916,54 @@ nm_close(struct nm_desc *d)
return EINVAL;
if (d->done_mmap && d->mem)
munmap(d->mem, d->memsize);
- if (d->fd != -1)
+ if (d->fd != -1) {
close(d->fd);
+ }
+
bzero(d, sizeof(*d));
free(d);
return 0;
}
+static int
+nm_mmap(struct nm_desc *d, const struct nm_desc *parent)
+{
+ //XXX TODO: check if mmap is already done
+
+ if (IS_NETMAP_DESC(parent) && parent->mem &&
+ parent->req.nr_arg2 == d->req.nr_arg2) {
+ /* do not mmap, inherit from parent */
+ D("do not mmap, inherit from parent");
+ d->memsize = parent->memsize;
+ d->mem = parent->mem;
+ } else {
+ /* XXX TODO: check if memsize is too large (or there is overflow) */
+ d->memsize = d->req.nr_memsize;
+ d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED,
+ d->fd, 0);
+ if (d->mem == MAP_FAILED) {
+ goto fail;
+ }
+ d->done_mmap = 1;
+ }
+ {
+ struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset);
+ struct netmap_ring *r = NETMAP_RXRING(nifp, );
+
+ *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp;
+ *(struct netmap_ring **)(uintptr_t)&d->some_ring = r;
+ *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0);
+ *(void **)(uintptr_t)&d->buf_end =
+ (char *)d->mem + d->memsize;
+ }
+
+ return 0;
+
+fail:
+ return EINVAL;
+}
+
/*
* Same prototype as pcap_inject(), only need to cast.
*/
@@ -674,6 +1004,9 @@ nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg)
{
int n = d->last_rx_ring - d->first_rx_ring + 1;
int c, got = 0, ri = d->cur_rx_ring;
+ d->hdr.buf = NULL;
+ d->hdr.flags = NM_MORE_PKTS;
+ d->hdr.d = d;
if (cnt == 0)
cnt = -1;
@@ -690,17 +1023,24 @@ nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg)
ri = d->first_rx_ring;
ring = NETMAP_RXRING(d->nifp, ri);
for ( ; !nm_ring_empty(ring) && cnt != got; got++) {
- u_int i = ring->cur;
- u_int idx = ring->slot[i].buf_idx;
- u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
-
+ u_int idx, i;
+ if (d->hdr.buf) { /* from previous round */
+ cb(arg, &d->hdr, d->hdr.buf);
+ }
+ i = ring->cur;
+ idx = ring->slot[i].buf_idx;
+ d->hdr.slot = &ring->slot[i];
+ d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx);
// __builtin_prefetch(buf);
d->hdr.len = d->hdr.caplen = ring->slot[i].len;
d->hdr.ts = ring->ts;
- cb(arg, &d->hdr, buf);
ring->head = ring->cur = nm_ring_next(ring, i);
}
}
+ if (d->hdr.buf) { /* from previous round */
+ d->hdr.flags = 0;
+ cb(arg, &d->hdr, d->hdr.buf);
+ }
d->cur_rx_ring = ri;
return got;
}