author: Luigi Rizzo <luigi@FreeBSD.org> 2012-02-27 19:05:01 +0000
committer: Luigi Rizzo <luigi@FreeBSD.org> 2012-02-27 19:05:01 +0000
commit: 64ae02c36579bad7d5e682589a0bc1023e359f9d (patch)
tree: a547096f4399bc66370c43d717a40e4b79eb8401 /sys/dev/netmap/netmap.c
parent: d7ccbd70099774d72fd45fa7a0b942c360dd9878 (diff)
1 files changed, 228 insertions, 169 deletions
diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c
index 8dc62d8fd4ef..ae9a599ee916 100644
--- a/sys/dev/netmap/netmap.c
+++ b/sys/dev/netmap/netmap.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
- * 
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -9,7 +9,7 @@
  *   2. Redistributions in binary form must reproduce the above copyright
  *      notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 
+ *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -87,10 +87,10 @@ MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
 /*
  * lock and unlock for the netmap memory allocator
  */
-#define NMA_LOCK()	mtx_lock(&netmap_mem_d->nm_mtx);
-#define NMA_UNLOCK()	mtx_unlock(&netmap_mem_d->nm_mtx);
+#define NMA_LOCK()	mtx_lock(&nm_mem->nm_mtx);
+#define NMA_UNLOCK()	mtx_unlock(&nm_mem->nm_mtx);
 struct netmap_mem_d;
-static struct netmap_mem_d *netmap_mem_d;	/* Our memory allocator. */
+static struct netmap_mem_d *nm_mem;	/* Our memory allocator. */
 
 u_int netmap_total_buffers;
 char *netmap_buffer_base;	/* address of an invalid buffer */
@@ -254,10 +254,10 @@ struct netmap_mem_d {
 
 /* Shorthand to compute a netmap interface offset. */
 #define netmap_if_offset(v)                                     \
-    ((char *) (v) - (char *) netmap_mem_d->nm_buffer)
+    ((char *) (v) - (char *) nm_mem->nm_buffer)
 /* .. and get a physical address given a memory offset */
 #define netmap_ofstophys(o)                                     \
-    (vtophys(netmap_mem_d->nm_buffer) + (o))
+    (vtophys(nm_mem->nm_buffer) + (o))
 
 
 /*------ netmap memory allocator -------*/
@@ -265,7 +265,7 @@ struct netmap_mem_d {
  * Request for a chunk of memory.
  *
  * Memory objects are arranged into a list, hence we need to walk this
- * list until we find an object with the needed amount of data free. 
+ * list until we find an object with the needed amount of data free.
  * This sounds like a completely inefficient implementation, but given
  * the fact that data allocation is done once, we can handle it
  * flawlessly.
@@ -279,7 +279,7 @@ netmap_malloc(size_t size, __unused const char *msg)
 	void *ret = NULL;
 
 	NMA_LOCK();
-	TAILQ_FOREACH(mem_obj, &netmap_mem_d->nm_molist, nmo_next) {
+	TAILQ_FOREACH(mem_obj, &nm_mem->nm_molist, nmo_next) {
 		if (mem_obj->nmo_used != 0 || mem_obj->nmo_size < size)
 			continue;
 
@@ -295,7 +295,7 @@ netmap_malloc(size_t size, __unused const char *msg)
 		mem_obj->nmo_size -= size;
 		mem_obj->nmo_data = (char *) mem_obj->nmo_data + size;
 		if (mem_obj->nmo_size == 0) {
-			TAILQ_REMOVE(&netmap_mem_d->nm_molist, mem_obj,
+			TAILQ_REMOVE(&nm_mem->nm_molist, mem_obj,
 				     nmo_next);
 			free(mem_obj, M_NETMAP);
 		}
@@ -328,7 +328,7 @@ netmap_free(void *addr, const char *msg)
 	}
 
 	NMA_LOCK();
-	TAILQ_FOREACH(cur, &netmap_mem_d->nm_molist, nmo_next) {
+	TAILQ_FOREACH(cur, &nm_mem->nm_molist, nmo_next) {
 		if (cur->nmo_data == addr && cur->nmo_used)
 			break;
 	}
@@ -345,7 +345,7 @@ netmap_free(void *addr, const char *msg)
 	   if present. */
 	prev = TAILQ_PREV(cur, netmap_mem_obj_h, nmo_next);
 	if (prev && prev->nmo_used == 0) {
-		TAILQ_REMOVE(&netmap_mem_d->nm_molist, cur, nmo_next);
+		TAILQ_REMOVE(&nm_mem->nm_molist, cur, nmo_next);
 		prev->nmo_size += cur->nmo_size;
 		free(cur, M_NETMAP);
 		cur = prev;
@@ -354,7 +354,7 @@ netmap_free(void *addr, const char *msg)
 	/* merge with the next one */
 	next = TAILQ_NEXT(cur, nmo_next);
 	if (next && next->nmo_used == 0) {
-		TAILQ_REMOVE(&netmap_mem_d->nm_molist, next, nmo_next);
+		TAILQ_REMOVE(&nm_mem->nm_molist, next, nmo_next);
 		cur->nmo_size += next->nmo_size;
 		free(next, M_NETMAP);
 	}
@@ -374,21 +374,24 @@ netmap_if_new(const char *ifname, struct netmap_adapter *na)
 {
 	struct netmap_if *nifp;
 	struct netmap_ring *ring;
+	struct netmap_kring *kring;
 	char *buff;
-	u_int i, len, ofs;
-	u_int n = na->num_queues + 1; /* shorthand, include stack queue */
+	u_int i, len, ofs, numdesc;
+	u_int nrx = na->num_rx_queues + 1; /* shorthand, include stack queue */
+	u_int ntx = na->num_tx_queues + 1; /* shorthand, include stack queue */
 
 	/*
 	 * the descriptor is followed inline by an array of offsets
 	 * to the tx and rx rings in the shared memory region.
 	 */
-	len = sizeof(struct netmap_if) + 2 * n * sizeof(ssize_t);
+	len = sizeof(struct netmap_if) + (nrx + ntx) * sizeof(ssize_t);
 	nifp = netmap_if_malloc(len);
 	if (nifp == NULL)
 		return (NULL);
 
 	/* initialize base fields */
-	*(int *)(uintptr_t)&nifp->ni_num_queues = na->num_queues;
+	*(int *)(uintptr_t)&nifp->ni_rx_queues = na->num_rx_queues;
+	*(int *)(uintptr_t)&nifp->ni_tx_queues = na->num_tx_queues;
 	strncpy(nifp->ni_name, ifname, IFNAMSIZ);
 
 	(na->refcount)++;	/* XXX atomic ? we are under lock */
@@ -396,16 +399,15 @@ netmap_if_new(const char *ifname, struct netmap_adapter *na)
 		goto final;
 
 	/*
-	 * If this is the first instance, allocate the shadow rings and
-	 * buffers for this card (one for each hw queue, one for the host).
+	 * First instance. Allocate the netmap rings
+	 * (one for each hw queue, one pair for the host).
 	 * The rings are contiguous, but have variable size.
 	 * The entire block is reachable at
-	 *	na->tx_rings[0].ring
+	 *	na->tx_rings[0]
 	 */
-
-	len = n * (2 * sizeof(struct netmap_ring) +
-		  (na->num_tx_desc + na->num_rx_desc) *
-		   sizeof(struct netmap_slot) );
+	len = (ntx + nrx) * sizeof(struct netmap_ring) +
+	      (ntx * na->num_tx_desc + nrx * na->num_rx_desc) *
+		   sizeof(struct netmap_slot);
 	buff = netmap_ring_malloc(len);
 	if (buff == NULL) {
 		D("failed to allocate %d bytes for %s shadow ring",
@@ -415,9 +417,8 @@ error:
 		netmap_if_free(nifp);
 		return (NULL);
 	}
-	/* do we have the bufers ? we are in need of num_tx_desc buffers for
-	 * each tx ring and num_tx_desc buffers for each rx ring. */
-	len = n * (na->num_tx_desc + na->num_rx_desc);
+	/* Check whether we have enough buffers */
+	len = ntx * na->num_tx_desc + nrx * na->num_rx_desc;
 	NMA_LOCK();
 	if (nm_buf_pool.free < len) {
 		NMA_UNLOCK();
@@ -429,11 +430,7 @@ error:
 	 * and initialize the rings. We are under NMA_LOCK().
 	 */
 	ofs = 0;
-	for (i = 0; i < n; i++) {
-		struct netmap_kring *kring;
-		int numdesc;
-
-		/* Transmit rings */
+	for (i = 0; i < ntx; i++) { /* Transmit rings */
 		kring = &na->tx_rings[i];
 		numdesc = na->num_tx_desc;
 		bzero(kring, sizeof(*kring));
@@ -459,8 +456,9 @@ error:
 
 		ofs += sizeof(struct netmap_ring) +
 			numdesc * sizeof(struct netmap_slot);
+	}
 
-		/* Receive rings */
+	for (i = 0; i < nrx; i++) { /* Receive rings */
 		kring = &na->rx_rings[i];
 		numdesc = na->num_rx_desc;
 		bzero(kring, sizeof(*kring));
@@ -480,21 +478,21 @@ error:
 			numdesc * sizeof(struct netmap_slot);
 	}
 	NMA_UNLOCK();
-	for (i = 0; i < n+1; i++) {
-		// XXX initialize the selrecord structs.
-	}
+	// XXX initialize the selrecord structs.
+
 final:
 	/*
 	 * fill the slots for the rx and tx queues. They contain the offset
 	 * between the ring and nifp, so the information is usable in
 	 * userspace to reach the ring from the nifp.
 	 */
-	for (i = 0; i < n; i++) {
-		char *base = (char *)nifp;
+	for (i = 0; i < ntx; i++) {
 		*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] =
-			(char *)na->tx_rings[i].ring - base;
-		*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n] =
-			(char *)na->rx_rings[i].ring - base;
+			(char *)na->tx_rings[i].ring - (char *)nifp;
+	}
+	for (i = 0; i < nrx; i++) {
+		*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+ntx] =
+			(char *)na->rx_rings[i].ring - (char *)nifp;
 	}
 	return (nifp);
 }
@@ -532,17 +530,17 @@ netmap_memory_init(void)
 			    );
 		if (buf)
 			break;
-	} 
+	}
 	if (buf == NULL)
 		return (ENOMEM);
 	sz += extra_sz;
-	netmap_mem_d = malloc(sizeof(struct netmap_mem_d), M_NETMAP,
+	nm_mem = malloc(sizeof(struct netmap_mem_d), M_NETMAP,
 			      M_WAITOK | M_ZERO);
-	mtx_init(&netmap_mem_d->nm_mtx, "netmap memory allocator lock", NULL,
+	mtx_init(&nm_mem->nm_mtx, "netmap memory allocator lock", NULL,
 		 MTX_DEF);
-	TAILQ_INIT(&netmap_mem_d->nm_molist);
-	netmap_mem_d->nm_buffer = buf;
-	netmap_mem_d->nm_totalsize = sz;
+	TAILQ_INIT(&nm_mem->nm_molist);
+	nm_mem->nm_buffer = buf;
+	nm_mem->nm_totalsize = sz;
 
 	/*
 	 * A buffer takes 2k, a slot takes 8 bytes + ring overhead,
@@ -550,24 +548,24 @@ netmap_memory_init(void)
 	 * the memory for the rings, and the rest for the buffers,
 	 * and be sure we never run out.
 	 */
-	netmap_mem_d->nm_size = sz/200;
-	netmap_mem_d->nm_buf_start =
-		(netmap_mem_d->nm_size + PAGE_SIZE - 1) & ~(PAGE_SIZE-1);
-	netmap_mem_d->nm_buf_len = sz - netmap_mem_d->nm_buf_start;
+	nm_mem->nm_size = sz/200;
+	nm_mem->nm_buf_start =
+		(nm_mem->nm_size + PAGE_SIZE - 1) & ~(PAGE_SIZE-1);
+	nm_mem->nm_buf_len = sz - nm_mem->nm_buf_start;
 
-	nm_buf_pool.base = netmap_mem_d->nm_buffer;
-	nm_buf_pool.base += netmap_mem_d->nm_buf_start;
+	nm_buf_pool.base = nm_mem->nm_buffer;
+	nm_buf_pool.base += nm_mem->nm_buf_start;
 	netmap_buffer_base = nm_buf_pool.base;
 	D("netmap_buffer_base %p (offset %d)",
-		netmap_buffer_base, (int)netmap_mem_d->nm_buf_start);
+		netmap_buffer_base, (int)nm_mem->nm_buf_start);
 	/* number of buffers, they all start as free */
 
 	netmap_total_buffers = nm_buf_pool.total_buffers =
-		netmap_mem_d->nm_buf_len / NETMAP_BUF_SIZE;
+		nm_mem->nm_buf_len / NETMAP_BUF_SIZE;
 	nm_buf_pool.bufsize = NETMAP_BUF_SIZE;
 
 	D("Have %d MB, use %dKB for rings, %d buffers at %p",
-		(sz >> 20), (int)(netmap_mem_d->nm_size >> 10),
+		(sz >> 20), (int)(nm_mem->nm_size >> 10),
 		nm_buf_pool.total_buffers, nm_buf_pool.base);
 
 	/* allocate and initialize the bitmap. Entry 0 is considered
@@ -583,10 +581,10 @@ netmap_memory_init(void)
 	
 	mem_obj = malloc(sizeof(struct netmap_mem_obj), M_NETMAP,
 			 M_WAITOK | M_ZERO);
-	TAILQ_INSERT_HEAD(&netmap_mem_d->nm_molist, mem_obj, nmo_next);
+	TAILQ_INSERT_HEAD(&nm_mem->nm_molist, mem_obj, nmo_next);
 	mem_obj->nmo_used = 0;
-	mem_obj->nmo_size = netmap_mem_d->nm_size;
-	mem_obj->nmo_data = netmap_mem_d->nm_buffer;
+	mem_obj->nmo_size = nm_mem->nm_size;
+	mem_obj->nmo_data = nm_mem->nm_buffer;
 
 	return (0);
 }
@@ -603,9 +601,9 @@ netmap_memory_fini(void)
 {
 	struct netmap_mem_obj *mem_obj;
 
-	while (!TAILQ_EMPTY(&netmap_mem_d->nm_molist)) {
-		mem_obj = TAILQ_FIRST(&netmap_mem_d->nm_molist);
-		TAILQ_REMOVE(&netmap_mem_d->nm_molist, mem_obj, nmo_next);
+	while (!TAILQ_EMPTY(&nm_mem->nm_molist)) {
+		mem_obj = TAILQ_FIRST(&nm_mem->nm_molist);
+		TAILQ_REMOVE(&nm_mem->nm_molist, mem_obj, nmo_next);
 		if (mem_obj->nmo_used == 1) {
 			printf("netmap: leaked %d bytes at %p\n",
 			       (int)mem_obj->nmo_size,
@@ -613,9 +611,9 @@ netmap_memory_fini(void)
 		}
 		free(mem_obj, M_NETMAP);
 	}
-	contigfree(netmap_mem_d->nm_buffer, netmap_mem_d->nm_totalsize, M_NETMAP);
+	contigfree(nm_mem->nm_buffer, nm_mem->nm_totalsize, M_NETMAP);
 	// XXX mutex_destroy(nm_mtx);
-	free(netmap_mem_d, M_NETMAP);
+	free(nm_mem, M_NETMAP);
 }
 /*------------- end of memory allocator -----------------*/
 
@@ -647,7 +645,7 @@ netmap_dtor_locked(void *data)
 
 	na->refcount--;
 	if (na->refcount <= 0) {	/* last instance */
-		u_int i;
+		u_int i, j, lim;
 
 		D("deleting last netmap instance for %s", ifp->if_xname);
 		/*
@@ -669,24 +667,22 @@ netmap_dtor_locked(void *data)
 		/* Wake up any sleeping threads. netmap_poll will
 		 * then return POLLERR
 		 */
-		for (i = 0; i < na->num_queues + 2; i++) {
+		for (i = 0; i < na->num_tx_queues + 1; i++)
 			selwakeuppri(&na->tx_rings[i].si, PI_NET);
+		for (i = 0; i < na->num_rx_queues + 1; i++)
 			selwakeuppri(&na->rx_rings[i].si, PI_NET);
-		}
+		selwakeuppri(&na->tx_si, PI_NET);
+		selwakeuppri(&na->rx_si, PI_NET);
 		/* release all buffers */
 		NMA_LOCK();
-		for (i = 0; i < na->num_queues + 1; i++) {
-			int j, lim;
-			struct netmap_ring *ring;
-
-			ND("tx queue %d", i);
-			ring = na->tx_rings[i].ring;
+		for (i = 0; i < na->num_tx_queues + 1; i++) {
+			struct netmap_ring *ring = na->tx_rings[i].ring;
 			lim = na->tx_rings[i].nkr_num_slots;
 			for (j = 0; j < lim; j++)
 				netmap_free_buf(nifp, ring->slot[j].buf_idx);
-
-			ND("rx queue %d", i);
-			ring = na->rx_rings[i].ring;
+		}
+		for (i = 0; i < na->num_rx_queues + 1; i++) {
+			struct netmap_ring *ring = na->rx_rings[i].ring;
 			lim = na->rx_rings[i].nkr_num_slots;
 			for (j = 0; j < lim; j++)
 				netmap_free_buf(nifp, ring->slot[j].buf_idx);
@@ -708,7 +704,7 @@ netmap_dtor(void *data)
 
 	na->nm_lock(ifp, NETMAP_REG_LOCK, 0);
 	netmap_dtor_locked(data);
-	na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0); 
+	na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
 
 	if_rele(ifp);
 	bzero(priv, sizeof(*priv));	/* XXX for safety */
@@ -758,7 +754,7 @@ netmap_mmap(__unused struct cdev *dev,
 static void
 netmap_sync_to_host(struct netmap_adapter *na)
 {
-	struct netmap_kring *kring = &na->tx_rings[na->num_queues];
+	struct netmap_kring *kring = &na->tx_rings[na->num_tx_queues];
 	struct netmap_ring *ring = kring->ring;
 	struct mbuf *head = NULL, *tail = NULL, *m;
 	u_int k, n, lim = kring->nkr_num_slots - 1;
@@ -818,31 +814,37 @@ netmap_sync_to_host(struct netmap_adapter *na)
 static void
 netmap_sync_from_host(struct netmap_adapter *na, struct thread *td)
 {
-	struct netmap_kring *kring = &na->rx_rings[na->num_queues];
+	struct netmap_kring *kring = &na->rx_rings[na->num_rx_queues];
 	struct netmap_ring *ring = kring->ring;
-	int error = 1, delta;
-	u_int k = ring->cur, lim = kring->nkr_num_slots;
+	u_int j, n, lim = kring->nkr_num_slots;
+	u_int k = ring->cur, resvd = ring->reserved;
 
 	na->nm_lock(na->ifp, NETMAP_CORE_LOCK, 0);
-	if (k >= lim) /* bad value */
-		goto done;
-	delta = k - kring->nr_hwcur;
-	if (delta < 0)
-		delta += lim;
-	kring->nr_hwavail -= delta;
-	if (kring->nr_hwavail < 0)	/* error */
-		goto done;
+	if (k >= lim) {
+		netmap_ring_reinit(kring);
+		return;
+	}
+	/* new packets are already set in nr_hwavail */
+	/* skip past packets that userspace has released */
+	j = kring->nr_hwcur;
+	if (resvd > 0) {
+		if (resvd + ring->avail >= lim + 1) {
+			D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
+			ring->reserved = resvd = 0; // XXX panic...
+		}
+		k = (k >= resvd) ? k - resvd : k + lim - resvd;
+        }
+	if (j != k) {
+		n = k >= j ? k - j : k + lim - j;
+		kring->nr_hwavail -= n;
 	kring->nr_hwcur = k;
-	error = 0;
-	k = ring->avail = kring->nr_hwavail;
+	}
+	k = ring->avail = kring->nr_hwavail - resvd;
 	if (k == 0 && td)
 		selrecord(td, &kring->si);
 	if (k && (netmap_verbose & NM_VERB_HOST))
 		D("%d pkts from stack", k);
-done:
 	na->nm_lock(na->ifp, NETMAP_CORE_UNLOCK, 0);
-	if (error)
-		netmap_ring_reinit(kring);
 }
 
 
@@ -907,13 +909,13 @@ netmap_ring_reinit(struct netmap_kring *kring)
 	}
 	if (errors) {
 		int pos = kring - kring->na->tx_rings;
-		int n = kring->na->num_queues + 2;
+		int n = kring->na->num_tx_queues + 1;
 
 		D("total %d errors", errors);
 		errors++;
 		D("%s %s[%d] reinit, cur %d -> %d avail %d -> %d",
 			kring->na->ifp->if_xname,
-			pos < n ?  "TX" : "RX", pos < n ? pos : pos - n, 
+			pos < n ?  "TX" : "RX", pos < n ? pos : pos - n,
 			ring->cur, kring->nr_hwcur,
 			ring->avail, kring->nr_hwavail);
 		ring->cur = kring->nr_hwcur;
@@ -933,10 +935,13 @@ netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid)
 	struct ifnet *ifp = priv->np_ifp;
 	struct netmap_adapter *na = NA(ifp);
 	u_int i = ringid & NETMAP_RING_MASK;
-	/* first time we don't lock */
+	/* initially (np_qfirst == np_qlast) we don't want to lock */
 	int need_lock = (priv->np_qfirst != priv->np_qlast);
+	int lim = na->num_rx_queues;
 
-	if ( (ringid & NETMAP_HW_RING) && i >= na->num_queues) {
+	if (na->num_tx_queues > lim)
+		lim = na->num_tx_queues;
+	if ( (ringid & NETMAP_HW_RING) && i >= lim) {
 		D("invalid ring id %d", i);
 		return (EINVAL);
 	}
@@ -944,14 +949,14 @@ netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid)
 		na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
 	priv->np_ringid = ringid;
 	if (ringid & NETMAP_SW_RING) {
-		priv->np_qfirst = na->num_queues;
-		priv->np_qlast = na->num_queues + 1;
+		priv->np_qfirst = NETMAP_SW_RING;
+		priv->np_qlast = 0;
 	} else if (ringid & NETMAP_HW_RING) {
 		priv->np_qfirst = i;
 		priv->np_qlast = i + 1;
 	} else {
 		priv->np_qfirst = 0;
-		priv->np_qlast = na->num_queues;
+		priv->np_qlast = NETMAP_HW_RING ;
 	}
 	priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1;
 	if (need_lock)
@@ -962,8 +967,7 @@ netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid)
 		D("ringid %s set to HW RING %d", ifp->if_xname,
 			priv->np_qfirst);
 	else
-		D("ringid %s set to all %d HW RINGS", ifp->if_xname,
-			priv->np_qlast);
+		D("ringid %s set to all %d HW RINGS", ifp->if_xname, lim);
 	return 0;
 }
 
@@ -989,7 +993,7 @@ netmap_ioctl(__unused struct cdev *dev, u_long cmd, caddr_t data,
 	struct nmreq *nmr = (struct nmreq *) data;
 	struct netmap_adapter *na;
 	int error;
-	u_int i;
+	u_int i, lim;
 	struct netmap_if *nifp;
 
 	CURVNET_SET(TD_TO_VNET(td));
@@ -1004,22 +1008,36 @@ netmap_ioctl(__unused struct cdev *dev, u_long cmd, caddr_t data,
 	switch (cmd) {
 	case NIOCGINFO:		/* return capabilities etc */
 		/* memsize is always valid */
-		nmr->nr_memsize = netmap_mem_d->nm_totalsize;
+		nmr->nr_memsize = nm_mem->nm_totalsize;
 		nmr->nr_offset = 0;
-		nmr->nr_numrings = 0;
-		nmr->nr_numslots = 0;
+		nmr->nr_rx_rings = nmr->nr_tx_rings = 0;
+		nmr->nr_rx_slots = nmr->nr_tx_slots = 0;
+		if (nmr->nr_version != NETMAP_API) {
+			D("API mismatch got %d have %d",
+				nmr->nr_version, NETMAP_API);
+			nmr->nr_version = NETMAP_API;
+			error = EINVAL;
+			break;
+		}
 		if (nmr->nr_name[0] == '\0')	/* just get memory info */
 			break;
 		error = get_ifp(nmr->nr_name, &ifp); /* get a refcount */
 		if (error)
 			break;
 		na = NA(ifp); /* retrieve netmap_adapter */
-		nmr->nr_numrings = na->num_queues;
-		nmr->nr_numslots = na->num_tx_desc;
+		nmr->nr_rx_rings = na->num_rx_queues;
+		nmr->nr_tx_rings = na->num_tx_queues;
+		nmr->nr_rx_slots = na->num_rx_desc;
+		nmr->nr_tx_slots = na->num_tx_desc;
 		if_rele(ifp);	/* return the refcount */
 		break;
 
 	case NIOCREGIF:
+		if (nmr->nr_version != NETMAP_API) {
+			nmr->nr_version = NETMAP_API;
+			error = EINVAL;
+			break;
+		}
 		if (priv != NULL) {	/* thread already registered */
 			error = netmap_set_ringid(priv, nmr->nr_ringid);
 			break;
@@ -1095,9 +1113,11 @@ error:
 		}
 
 		/* return the offset of the netmap_if object */
-		nmr->nr_numrings = na->num_queues;
-		nmr->nr_numslots = na->num_tx_desc;
-		nmr->nr_memsize = netmap_mem_d->nm_totalsize;
+		nmr->nr_rx_rings = na->num_rx_queues;
+		nmr->nr_tx_rings = na->num_tx_queues;
+		nmr->nr_rx_slots = na->num_rx_desc;
+		nmr->nr_tx_slots = na->num_tx_desc;
+		nmr->nr_memsize = nm_mem->nm_totalsize;
 		nmr->nr_offset = netmap_if_offset(nifp);
 		break;
 
@@ -1120,17 +1140,19 @@ error:
 		}
 		ifp = priv->np_ifp;	/* we have a reference */
 		na = NA(ifp); /* retrieve netmap adapter */
-
-		if (priv->np_qfirst == na->num_queues) {
-			/* queues to/from host */
+		if (priv->np_qfirst == NETMAP_SW_RING) { /* host rings */
 			if (cmd == NIOCTXSYNC)
 				netmap_sync_to_host(na);
 			else
 				netmap_sync_from_host(na, NULL);
 			break;
 		}
+		/* find the last ring to scan */
+		lim = priv->np_qlast;
+		if (lim == NETMAP_HW_RING)
+		    lim = (cmd == NIOCTXSYNC) ? na->num_tx_queues : na->num_rx_queues;
 
-		for (i = priv->np_qfirst; i < priv->np_qlast; i++) {
+		for (i = priv->np_qfirst; i < lim; i++) {
 		    if (cmd == NIOCTXSYNC) {
 			struct netmap_kring *kring = &na->tx_rings[i];
 			if (netmap_verbose & NM_VERB_TXSYNC)
@@ -1195,6 +1217,7 @@ netmap_poll(__unused struct cdev *dev, int events, struct thread *td)
 	struct ifnet *ifp;
 	struct netmap_kring *kring;
 	u_int core_lock, i, check_all, want_tx, want_rx, revents = 0;
+	u_int lim_tx, lim_rx;
 	enum {NO_CL, NEED_CL, LOCKED_CL }; /* see below */
 
 	if (devfs_get_cdevpriv((void **)&priv) != 0 || priv == NULL)
@@ -1212,17 +1235,18 @@ netmap_poll(__unused struct cdev *dev, int events, struct thread *td)
 
 	na = NA(ifp); /* retrieve netmap adapter */
 
+	lim_tx = na->num_tx_queues;
+	lim_rx = na->num_rx_queues;
 	/* how many queues we are scanning */
-	i = priv->np_qfirst;
-	if (i == na->num_queues) { /* from/to host */
+	if (priv->np_qfirst == NETMAP_SW_RING) {
 		if (priv->np_txpoll || want_tx) {
 			/* push any packets up, then we are always ready */
-			kring = &na->tx_rings[i];
+			kring = &na->tx_rings[lim_tx];
 			netmap_sync_to_host(na);
 			revents |= want_tx;
 		}
 		if (want_rx) {
-			kring = &na->rx_rings[i];
+			kring = &na->rx_rings[lim_rx];
 			if (kring->ring->avail == 0)
 				netmap_sync_from_host(na, td);
 			if (kring->ring->avail > 0) {
@@ -1253,7 +1277,7 @@ netmap_poll(__unused struct cdev *dev, int events, struct thread *td)
 	 * there are pending packets to send. The latter can be disabled
 	 * passing NETMAP_NO_TX_POLL in the NIOCREG call.
 	 */
-	check_all = (i + 1 != priv->np_qlast);
+	check_all = (priv->np_qlast == NETMAP_HW_RING) && (lim_tx > 1 || lim_rx > 1);
 
 	/*
 	 * core_lock indicates what to do with the core lock.
@@ -1270,25 +1294,29 @@ netmap_poll(__unused struct cdev *dev, int events, struct thread *td)
 	 * LOCKED_CL	core lock is set, so we need to release it.
 	 */
 	core_lock = (check_all || !na->separate_locks) ? NEED_CL : NO_CL;
+	if (priv->np_qlast != NETMAP_HW_RING) {
+		lim_tx = lim_rx = priv->np_qlast;
+	}
+
 	/*
 	 * We start with a lock free round which is good if we have
 	 * data available. If this fails, then lock and call the sync
 	 * routines.
 	 */
-		for (i = priv->np_qfirst; want_rx && i < priv->np_qlast; i++) {
-			kring = &na->rx_rings[i];
-			if (kring->ring->avail > 0) {
-				revents |= want_rx;
-				want_rx = 0;	/* also breaks the loop */
-			}
+	for (i = priv->np_qfirst; want_rx && i < lim_rx; i++) {
+		kring = &na->rx_rings[i];
+		if (kring->ring->avail > 0) {
+			revents |= want_rx;
+			want_rx = 0;	/* also breaks the loop */
 		}
-		for (i = priv->np_qfirst; want_tx && i < priv->np_qlast; i++) {
-			kring = &na->tx_rings[i];
-			if (kring->ring->avail > 0) {
-				revents |= want_tx;
-				want_tx = 0;	/* also breaks the loop */
-			}
+	}
+	for (i = priv->np_qfirst; want_tx && i < lim_tx; i++) {
+		kring = &na->tx_rings[i];
+		if (kring->ring->avail > 0) {
+			revents |= want_tx;
+			want_tx = 0;	/* also breaks the loop */
 		}
+	}
 
 	/*
 	 * If we to push packets out (priv->np_txpoll) or want_tx is
@@ -1296,7 +1324,7 @@ netmap_poll(__unused struct cdev *dev, int events, struct thread *td)
 	 * to avoid that the tx rings stall).
 	 */
 	if (priv->np_txpoll || want_tx) {
-		for (i = priv->np_qfirst; i < priv->np_qlast; i++) {
+		for (i = priv->np_qfirst; i < lim_tx; i++) {
 			kring = &na->tx_rings[i];
 			/*
 			 * Skip the current ring if want_tx == 0
@@ -1340,7 +1368,7 @@ netmap_poll(__unused struct cdev *dev, int events, struct thread *td)
 	 * Do it on all rings because otherwise we starve.
 	 */
 	if (want_rx) {
-		for (i = priv->np_qfirst; i < priv->np_qlast; i++) {
+		for (i = priv->np_qfirst; i < lim_rx; i++) {
 			kring = &na->rx_rings[i];
 			if (core_lock == NEED_CL) {
 				na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
@@ -1364,12 +1392,11 @@ netmap_poll(__unused struct cdev *dev, int events, struct thread *td)
 				na->nm_lock(ifp, NETMAP_RX_UNLOCK, i);
 		}
 	}
-	if (check_all && revents == 0) {
-		i = na->num_queues + 1; /* the global queue */
+	if (check_all && revents == 0) { /* signal on the global queue */
 		if (want_tx)
-			selrecord(td, &na->tx_rings[i].si);
+			selrecord(td, &na->tx_si);
 		if (want_rx)
-			selrecord(td, &na->rx_rings[i].si);
+			selrecord(td, &na->rx_si);
 	}
 	if (core_lock == LOCKED_CL)
 		na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0);
@@ -1430,28 +1457,37 @@ netmap_lock_wrapper(struct ifnet *dev, int what, u_int queueid)
  * kring	N	is for the host stack queue
  * kring	N+1	is only used for the selinfo for all queues.
  * Return 0 on success, ENOMEM otherwise.
+ *
+ * na->num_tx_queues can be set for cards with different tx/rx setups
  */
 int
 netmap_attach(struct netmap_adapter *na, int num_queues)
 {
-	int n = num_queues + 2;
-	int size = sizeof(*na) + 2 * n * sizeof(struct netmap_kring);
+	int i, n, size;
 	void *buf;
 	struct ifnet *ifp = na->ifp;
-	int i;
 
 	if (ifp == NULL) {
 		D("ifp not set, giving up");
 		return EINVAL;
 	}
+	/* clear other fields ? */
 	na->refcount = 0;
-	na->num_queues = num_queues;
+	if (na->num_tx_queues == 0)
+		na->num_tx_queues = num_queues;
+	na->num_rx_queues = num_queues;
+	/* on each direction we have N+1 resources
+	 * 0..n-1	are the hardware rings
+	 * n		is the ring attached to the stack.
+	 */
+	n = na->num_rx_queues + na->num_tx_queues + 2;
+	size = sizeof(*na) + n * sizeof(struct netmap_kring);
 
 	buf = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (buf) {
 		WNA(ifp) = buf;
 		na->tx_rings = (void *)((char *)buf + sizeof(*na));
-		na->rx_rings = na->tx_rings + n;
+		na->rx_rings = na->tx_rings + na->num_tx_queues + 1;
 		na->buff_size = NETMAP_BUF_SIZE;
 		bcopy(na, buf, sizeof(*na));
 		ifp->if_capabilities |= IFCAP_NETMAP;
@@ -1460,11 +1496,17 @@ netmap_attach(struct netmap_adapter *na, int num_queues)
 		if (na->nm_lock == NULL)
 			na->nm_lock = netmap_lock_wrapper;
 		mtx_init(&na->core_lock, "netmap core lock", NULL, MTX_DEF);
-		for (i = 0 ; i < num_queues; i++)
+		for (i = 0 ; i < na->num_tx_queues + 1; i++)
 			mtx_init(&na->tx_rings[i].q_lock, "netmap txq lock", NULL, MTX_DEF);
-		for (i = 0 ; i < num_queues; i++)
+		for (i = 0 ; i < na->num_rx_queues + 1; i++)
 			mtx_init(&na->rx_rings[i].q_lock, "netmap rxq lock", NULL, MTX_DEF);
 	}
+#ifdef linux
+	D("netdev_ops %p", ifp->netdev_ops);
+	/* prepare a clone of the netdev ops */
+	na->nm_ndo = *ifp->netdev_ops;
+	na->nm_ndo.ndo_start_xmit = netmap_start_linux;
+#endif
 	D("%s for %s", buf ? "ok" : "failed", ifp->if_xname);
 
 	return (buf ? 0 : ENOMEM);
@@ -1484,10 +1526,16 @@ netmap_detach(struct ifnet *ifp)
 	if (!na)
 		return;
 
-	for (i = 0; i < na->num_queues + 2; i++) {
+	for (i = 0; i < na->num_tx_queues + 1; i++) {
 		knlist_destroy(&na->tx_rings[i].si.si_note);
+		mtx_destroy(&na->tx_rings[i].q_lock);
+	}
+	for (i = 0; i < na->num_rx_queues + 1; i++) {
 		knlist_destroy(&na->rx_rings[i].si.si_note);
+		mtx_destroy(&na->rx_rings[i].q_lock);
 	}
+	knlist_destroy(&na->tx_si.si_note);
+	knlist_destroy(&na->rx_si.si_note);
 	bzero(na, sizeof(*na));
 	WNA(ifp) = NULL;
 	free(na, M_DEVBUF);
@@ -1503,7 +1551,7 @@ int
 netmap_start(struct ifnet *ifp, struct mbuf *m)
 {
 	struct netmap_adapter *na = NA(ifp);
-	struct netmap_kring *kring = &na->rx_rings[na->num_queues];
+	struct netmap_kring *kring = &na->rx_rings[na->num_rx_queues];
 	u_int i, len = MBUF_LEN(m);
 	int error = EBUSY, lim = kring->nkr_num_slots - 1;
 	struct netmap_slot *slot;
@@ -1516,8 +1564,8 @@ netmap_start(struct ifnet *ifp, struct mbuf *m)
 		D("stack ring %s full\n", ifp->if_xname);
 		goto done;	/* no space */
 	}
-	if (len > na->buff_size) {
-		D("drop packet size %d > %d", len, na->buff_size);
+	if (len > NETMAP_BUF_SIZE) {
+		D("drop packet size %d > %d", len, NETMAP_BUF_SIZE);
 		goto done;	/* too long for us */
 	}
 
@@ -1530,7 +1578,7 @@ netmap_start(struct ifnet *ifp, struct mbuf *m)
 	slot->len = len;
 	kring->nr_hwavail++;
 	if (netmap_verbose  & NM_VERB_HOST)
-		D("wake up host ring %s %d", na->ifp->if_xname, na->num_queues);
+		D("wake up host ring %s %d", na->ifp->if_xname, na->num_rx_queues);
 	selwakeuppri(&kring->si, PI_NET);
 	error = 0;
 done:
@@ -1556,21 +1604,21 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, int n,
 	u_int new_cur)
 {
 	struct netmap_kring *kring;
-	struct netmap_ring *ring;
 	int new_hwofs, lim;
 
 	if (na == NULL)
 		return NULL;	/* no netmap support here */
 	if (!(na->ifp->if_capenable & IFCAP_NETMAP))
 		return NULL;	/* nothing to reinitialize */
-	kring = tx == NR_TX ?  na->tx_rings + n : na->rx_rings + n;
-	ring = kring->ring;
-	lim = kring->nkr_num_slots - 1;
 
-	if (tx == NR_TX)
+	if (tx == NR_TX) {
+		kring = na->tx_rings + n;
 		new_hwofs = kring->nr_hwcur - new_cur;
-	else
+	} else {
+		kring = na->rx_rings + n;
 		new_hwofs = kring->nr_hwcur + kring->nr_hwavail - new_cur;
+	}
+	lim = kring->nkr_num_slots - 1;
 	if (new_hwofs > lim)
 		new_hwofs -= lim + 1;
 
@@ -1583,11 +1631,12 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, int n,
 			tx == NR_TX ? "TX" : "RX", n);
 
 	/*
+	 * Wakeup on the individual and global lock
 	 * We do the wakeup here, but the ring is not yet reconfigured.
 	 * However, we are under lock so there are no races.
 	 */
 	selwakeuppri(&kring->si, PI_NET);
-	selwakeuppri(&kring[na->num_queues + 1 - n].si, PI_NET);
+	selwakeuppri(tx == NR_TX ? &na->tx_si : &na->rx_si, PI_NET);
 	return kring->ring->slot;
 }
 
@@ -1603,38 +1652,48 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, int n,
  *     lock(i); wake(i); unlock(i)
  * N rings, separate locks:
  *     lock(i); wake(i); unlock(i); lock(core) wake(N+1) unlock(core)
+ * work_done is non-null on the RX path.
  */
 int
 netmap_rx_irq(struct ifnet *ifp, int q, int *work_done)
 {
 	struct netmap_adapter *na;
 	struct netmap_kring *r;
+	NM_SELINFO_T *main_wq;
 
 	if (!(ifp->if_capenable & IFCAP_NETMAP))
 		return 0;
 	na = NA(ifp);
-	r = work_done ? na->rx_rings : na->tx_rings;
+	if (work_done) { /* RX path */
+		r = na->rx_rings + q;
+		r->nr_kflags |= NKR_PENDINTR;
+		main_wq = (na->num_rx_queues > 1) ? &na->tx_si : NULL;
+	} else { /* tx path */
+		r = na->tx_rings + q;
+		main_wq = (na->num_tx_queues > 1) ? &na->rx_si : NULL;
+		work_done = &q; /* dummy */
+	}
 	if (na->separate_locks) {
-		mtx_lock(&r[q].q_lock);
-		selwakeuppri(&r[q].si, PI_NET);
-		mtx_unlock(&r[q].q_lock);
-		if (na->num_queues > 1) {
+		mtx_lock(&r->q_lock);
+		selwakeuppri(&r->si, PI_NET);
+		mtx_unlock(&r->q_lock);
+		if (main_wq) {
 			mtx_lock(&na->core_lock);
-			selwakeuppri(&r[na->num_queues + 1].si, PI_NET);
+			selwakeuppri(main_wq, PI_NET);
 			mtx_unlock(&na->core_lock);
 		}
 	} else {
 		mtx_lock(&na->core_lock);
-		selwakeuppri(&r[q].si, PI_NET);
-		if (na->num_queues > 1)
-			selwakeuppri(&r[na->num_queues + 1].si, PI_NET);
+		selwakeuppri(&r->si, PI_NET);
+		if (main_wq)
+			selwakeuppri(main_wq, PI_NET);
 		mtx_unlock(&na->core_lock);
 	}
-	if (work_done)
 		*work_done = 1; /* do not fire napi again */
 	return 1;
 }
 
+
 static struct cdevsw netmap_cdevsw = {
 	.d_version = D_VERSION,
 	.d_name = "netmap",
@@ -1666,7 +1725,7 @@ netmap_init(void)
 		return (error);
 	}
 	printf("netmap: loaded module with %d Mbytes\n",
-		(int)(netmap_mem_d->nm_totalsize >> 20));
+		(int)(nm_mem->nm_totalsize >> 20));
 	netmap_dev = make_dev(&netmap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0660,
 			      "netmap");
 	return (error);
author	Luigi Rizzo <luigi@FreeBSD.org>	2012-02-27 19:05:01 +0000
committer	Luigi Rizzo <luigi@FreeBSD.org>	2012-02-27 19:05:01 +0000
commit	64ae02c36579bad7d5e682589a0bc1023e359f9d (patch)
tree	a547096f4399bc66370c43d717a40e4b79eb8401 /sys/dev/netmap/netmap.c
parent	d7ccbd70099774d72fd45fa7a0b942c360dd9878 (diff)