author: Luigi Rizzo <luigi@FreeBSD.org> 2016-10-16 14:13:32 +0000
committer: Luigi Rizzo <luigi@FreeBSD.org> 2016-10-16 14:13:32 +0000
commit: 37e3a6d349581b4dd0aebf24be7b1b159a698dcf (patch)
tree: 0e61deea141c9733af511b0485cf1fd0f2dd17ed /sys/dev/netmap
parent: 63f6b1a75a8e6e33e4f9d65571c6a221444d3b05 (diff)
15 files changed, 4404 insertions, 1602 deletions
diff --git a/sys/dev/netmap/if_ixl_netmap.h b/sys/dev/netmap/if_ixl_netmap.h
index 2c7f9be541b3..223dc06e36ab 100644
--- a/sys/dev/netmap/if_ixl_netmap.h
+++ b/sys/dev/netmap/if_ixl_netmap.h
@@ -59,7 +59,7 @@ extern int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip;
 /*
  * device-specific sysctl variables:
  *
- * ixl_crcstrip: 0: keep CRC in rx frames (default), 1: strip it.
+ * ixl_crcstrip: 0: NIC keeps CRC in rx frames, 1: NIC strips it (default).
  *	During regular operations the CRC is stripped, but on some
  *	hardware reception of frames not multiple of 64 is slower,
  *	so using crcstrip=0 helps in benchmarks.
@@ -73,7 +73,7 @@ SYSCTL_DECL(_dev_netmap);
  */
 #if 0
 SYSCTL_INT(_dev_netmap, OID_AUTO, ixl_crcstrip,
-    CTLFLAG_RW, &ixl_crcstrip, 1, "strip CRC on rx frames");
+    CTLFLAG_RW, &ixl_crcstrip, 1, "NIC strips CRC on rx frames");
 #endif
 SYSCTL_INT(_dev_netmap, OID_AUTO, ixl_rx_miss,
     CTLFLAG_RW, &ixl_rx_miss, 0, "potentially missed rx intr");
diff --git a/sys/dev/netmap/if_lem_netmap.h b/sys/dev/netmap/if_lem_netmap.h
index 0ec9b1346609..1c2afbd18f10 100644
--- a/sys/dev/netmap/if_lem_netmap.h
+++ b/sys/dev/netmap/if_lem_netmap.h
@@ -81,6 +81,22 @@ lem_netmap_reg(struct netmap_adapter *na, int onoff)
 }
 
 
+static void
+lem_netmap_intr(struct netmap_adapter *na, int onoff)
+{
+	struct ifnet *ifp = na->ifp;
+	struct adapter *adapter = ifp->if_softc;
+
+	EM_CORE_LOCK(adapter);
+	if (onoff) {
+		lem_enable_intr(adapter);
+	} else {
+		lem_disable_intr(adapter);
+	}
+	EM_CORE_UNLOCK(adapter);
+}
+
+
 /*
  * Reconcile kernel and user view of the transmit ring.
  */
@@ -99,10 +115,6 @@ lem_netmap_txsync(struct netmap_kring *kring, int flags)
 
 	/* device-specific */
 	struct adapter *adapter = ifp->if_softc;
-#ifdef NIC_PARAVIRT
-	struct paravirt_csb *csb = adapter->csb;
-	uint64_t *csbd = (uint64_t *)(csb + 1);
-#endif /* NIC_PARAVIRT */
 
 	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
 			BUS_DMASYNC_POSTREAD);
@@ -113,19 +125,6 @@ lem_netmap_txsync(struct netmap_kring *kring, int flags)
 
 	nm_i = kring->nr_hwcur;
 	if (nm_i != head) {	/* we have new packets to send */
-#ifdef NIC_PARAVIRT
-		int do_kick = 0;
-		uint64_t t = 0; // timestamp
-		int n = head - nm_i;
-		if (n < 0)
-			n += lim + 1;
-		if (csb) {
-			t = rdtsc(); /* last timestamp */
-			csbd[16] += t - csbd[0]; /* total Wg */
-			csbd[17] += n;		/* Wg count */
-			csbd[0] = t;
-		}
-#endif /* NIC_PARAVIRT */
 		nic_i = netmap_idx_k2n(kring, nm_i);
 		while (nm_i != head) {
 			struct netmap_slot *slot = &ring->slot[nm_i];
@@ -166,38 +165,8 @@ lem_netmap_txsync(struct netmap_kring *kring, int flags)
 		bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
 			BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
-#ifdef NIC_PARAVIRT
-		/* set unconditionally, then also kick if needed */
-		if (csb) {
-			t = rdtsc();
-			if (csb->host_need_txkick == 2) {
-				/* can compute an update of delta */
-				int64_t delta = t - csbd[3];
-				if (delta < 0)
-					delta = -delta;
-				if (csbd[8] == 0 || delta < csbd[8]) {
-					csbd[8] = delta;
-					csbd[9]++;
-				}
-				csbd[10]++;
-			}
-			csb->guest_tdt = nic_i;
-			csbd[18] += t - csbd[0]; // total wp
-			csbd[19] += n;
-		}
-		if (!csb || !csb->guest_csb_on || (csb->host_need_txkick & 1))
-			do_kick = 1;
-		if (do_kick)
-#endif /* NIC_PARAVIRT */
 		/* (re)start the tx unit up to slot nic_i (excluded) */
 		E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), nic_i);
-#ifdef NIC_PARAVIRT
-		if (do_kick) {
-			uint64_t t1 = rdtsc();
-			csbd[20] += t1 - t; // total Np
-			csbd[21]++;
-		}
-#endif /* NIC_PARAVIRT */
 	}
 
 	/*
@@ -206,93 +175,6 @@ lem_netmap_txsync(struct netmap_kring *kring, int flags)
 	if (ticks != kring->last_reclaim || flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
 		kring->last_reclaim = ticks;
 		/* record completed transmissions using TDH */
-#ifdef NIC_PARAVIRT
-		/* host updates tdh unconditionally, and we have
-		 * no side effects on reads, so we can read from there
-		 * instead of exiting.
-		 */
-		if (csb) {
-		    static int drain = 0, nodrain=0, good = 0, bad = 0, fail = 0;
-		    u_int x = adapter->next_tx_to_clean;
-		    csbd[19]++; // XXX count reclaims
-		    nic_i = csb->host_tdh;
-		    if (csb->guest_csb_on) {
-			if (nic_i == x) {
-			    bad++;
-		    	    csbd[24]++; // failed reclaims
-			    /* no progress, request kick and retry */
-			    csb->guest_need_txkick = 1;
-			    mb(); // XXX barrier
-		    	    nic_i = csb->host_tdh;
-			} else {
-			    good++;
-			}
-			if (nic_i != x) {
-			    csb->guest_need_txkick = 2;
-			    if (nic_i == csb->guest_tdt)
-				drain++;
-			    else
-				nodrain++;
-#if 1
-			if (netmap_adaptive_io) {
-			    /* new mechanism: last half ring (or so)
-			     * released one slot at a time.
-			     * This effectively makes the system spin.
-			     *
-			     * Take next_to_clean + 1 as a reference.
-			     * tdh must be ahead or equal
-			     * On entry, the logical order is
-			     *		x < tdh = nic_i
-			     * We first push tdh up to avoid wraps.
-			     * The limit is tdh-ll (half ring).
-			     * if tdh-256 < x we report x;
-			     * else we report tdh-256
-			     */
-			    u_int tdh = nic_i;
-			    u_int ll = csbd[15];
-			    u_int delta = lim/8;
-			    if (netmap_adaptive_io == 2 || ll > delta)
-				csbd[15] = ll = delta;
-			    else if (netmap_adaptive_io == 1 && ll > 1) {
-				csbd[15]--;
-			    }
-
-			    if (nic_i >= kring->nkr_num_slots) {
-				RD(5, "bad nic_i %d on input", nic_i);
-			    }
-			    x = nm_next(x, lim);
-			    if (tdh < x)
-				tdh += lim + 1;
-			    if (tdh <= x + ll) {
-				nic_i = x;
-				csbd[25]++; //report n + 1;
-			    } else {
-				tdh = nic_i;
-				if (tdh < ll)
-				    tdh += lim + 1;
-				nic_i = tdh - ll;
-				csbd[26]++; // report tdh - ll
-			    }
-			}
-#endif
-			} else {
-			    /* we stop, count whether we are idle or not */
-			    int bh_active = csb->host_need_txkick & 2 ? 4 : 0;
-			    csbd[27+ csb->host_need_txkick]++;
-			    if (netmap_adaptive_io == 1) {
-				if (bh_active && csbd[15] > 1)
-				    csbd[15]--;
-				else if (!bh_active && csbd[15] < lim/2)
-				    csbd[15]++;
-			    }
-			    bad--;
-			    fail++;
-			}
-		    }
-		    RD(1, "drain %d nodrain %d good %d retry %d fail %d",
-			drain, nodrain, good, bad, fail);
-		} else
-#endif /* !NIC_PARAVIRT */
 		nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(0));
 		if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
 			D("TDH wrap %d", nic_i);
@@ -324,21 +206,10 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags)
 
 	/* device-specific */
 	struct adapter *adapter = ifp->if_softc;
-#ifdef NIC_PARAVIRT
-	struct paravirt_csb *csb = adapter->csb;
-	uint32_t csb_mode = csb && csb->guest_csb_on;
-	uint32_t do_host_rxkick = 0;
-#endif /* NIC_PARAVIRT */
 
 	if (head > lim)
 		return netmap_ring_reinit(kring);
 
-#ifdef NIC_PARAVIRT
-	if (csb_mode) {
-		force_update = 1;
-		csb->guest_need_rxkick = 0;
-	}
-#endif /* NIC_PARAVIRT */
 	/* XXX check sync modes */
 	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
 			BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
@@ -357,23 +228,6 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags)
 			uint32_t staterr = le32toh(curr->status);
 			int len;
 
-#ifdef NIC_PARAVIRT
-			if (csb_mode) {
-			    if ((staterr & E1000_RXD_STAT_DD) == 0) {
-				/* don't bother to retry if more than 1 pkt */
-				if (n > 1)
-				    break;
-				csb->guest_need_rxkick = 1;
-				wmb();
-				staterr = le32toh(curr->status);
-				if ((staterr & E1000_RXD_STAT_DD) == 0) {
-				    break;
-				} else { /* we are good */
-				   csb->guest_need_rxkick = 0;
-				}
-			    }
-			} else
-#endif /* NIC_PARAVIRT */
 			if ((staterr & E1000_RXD_STAT_DD) == 0)
 				break;
 			len = le16toh(curr->length) - 4; // CRC
@@ -390,18 +244,6 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags)
 			nic_i = nm_next(nic_i, lim);
 		}
 		if (n) { /* update the state variables */
-#ifdef NIC_PARAVIRT
-			if (csb_mode) {
-			    if (n > 1) {
-				/* leave one spare buffer so we avoid rxkicks */
-				nm_i = nm_prev(nm_i, lim);
-				nic_i = nm_prev(nic_i, lim);
-				n--;
-			    } else {
-				csb->guest_need_rxkick = 1;
-			    }
-			}
-#endif /* NIC_PARAVIRT */
 			ND("%d new packets at nic %d nm %d tail %d",
 				n,
 				adapter->next_rx_desc_to_check,
@@ -440,10 +282,6 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags)
 			curr->status = 0;
 			bus_dmamap_sync(adapter->rxtag, rxbuf->map,
 			    BUS_DMASYNC_PREREAD);
-#ifdef NIC_PARAVIRT
-			if (csb_mode && csb->host_rxkick_at == nic_i)
-				do_host_rxkick = 1;
-#endif /* NIC_PARAVIRT */
 			nm_i = nm_next(nm_i, lim);
 			nic_i = nm_next(nic_i, lim);
 		}
@@ -455,12 +293,6 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags)
 		 * so move nic_i back by one unit
 		 */
 		nic_i = nm_prev(nic_i, lim);
-#ifdef NIC_PARAVIRT
-		/* set unconditionally, then also kick if needed */
-		if (csb)
-			csb->guest_rdt = nic_i;
-		if (!csb_mode || do_host_rxkick)
-#endif /* NIC_PARAVIRT */
 		E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), nic_i);
 	}
 
@@ -486,6 +318,7 @@ lem_netmap_attach(struct adapter *adapter)
 	na.nm_rxsync = lem_netmap_rxsync;
 	na.nm_register = lem_netmap_reg;
 	na.num_tx_rings = na.num_rx_rings = 1;
+	na.nm_intr = lem_netmap_intr;
 	netmap_attach(&na);
 }
 
diff --git a/sys/dev/netmap/ixgbe_netmap.h b/sys/dev/netmap/ixgbe_netmap.h
index 0f34e7218503..7986c9965173 100644
--- a/sys/dev/netmap/ixgbe_netmap.h
+++ b/sys/dev/netmap/ixgbe_netmap.h
@@ -53,7 +53,7 @@ void ixgbe_netmap_attach(struct adapter *adapter);
 /*
  * device-specific sysctl variables:
  *
- * ix_crcstrip: 0: keep CRC in rx frames (default), 1: strip it.
+ * ix_crcstrip: 0: NIC keeps CRC in rx frames (default), 1: NIC strips it.
  *	During regular operations the CRC is stripped, but on some
  *	hardware reception of frames not multiple of 64 is slower,
  *	so using crcstrip=0 helps in benchmarks.
@@ -65,7 +65,7 @@ SYSCTL_DECL(_dev_netmap);
 static int ix_rx_miss, ix_rx_miss_bufs;
 int ix_crcstrip;
 SYSCTL_INT(_dev_netmap, OID_AUTO, ix_crcstrip,
-    CTLFLAG_RW, &ix_crcstrip, 0, "strip CRC on rx frames");
+    CTLFLAG_RW, &ix_crcstrip, 0, "NIC strips CRC on rx frames");
 SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss,
     CTLFLAG_RW, &ix_rx_miss, 0, "potentially missed rx intr");
 SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss_bufs,
@@ -109,6 +109,20 @@ set_crcstrip(struct ixgbe_hw *hw, int onoff)
 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rxc);
 }
 
+static void
+ixgbe_netmap_intr(struct netmap_adapter *na, int onoff)
+{
+	struct ifnet *ifp = na->ifp;
+	struct adapter *adapter = ifp->if_softc;
+
+	IXGBE_CORE_LOCK(adapter);
+	if (onoff) {
+		ixgbe_enable_intr(adapter); // XXX maybe ixgbe_stop ?
+	} else {
+		ixgbe_disable_intr(adapter); // XXX maybe ixgbe_stop ?
+	}
+	IXGBE_CORE_UNLOCK(adapter);
+}
 
 /*
  * Register/unregister. We are already under netmap lock.
@@ -311,7 +325,7 @@ ixgbe_netmap_txsync(struct netmap_kring *kring, int flags)
 		 * good way.
 		 */
 		nic_i = IXGBE_READ_REG(&adapter->hw, IXGBE_IS_VF(adapter) ?
-				       IXGBE_VFTDH(kring->ring_id) : IXGBE_TDH(kring->ring_id));
+				IXGBE_VFTDH(kring->ring_id) : IXGBE_TDH(kring->ring_id));
 		if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
 			D("TDH wrap %d", nic_i);
 			nic_i -= kring->nkr_num_slots;
@@ -486,6 +500,7 @@ ixgbe_netmap_attach(struct adapter *adapter)
 	na.nm_rxsync = ixgbe_netmap_rxsync;
 	na.nm_register = ixgbe_netmap_reg;
 	na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
+	na.nm_intr = ixgbe_netmap_intr;
 	netmap_attach(&na);
 }
 
diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c
index aff757bdadfe..d92d342af83c 100644
--- a/sys/dev/netmap/netmap.c
+++ b/sys/dev/netmap/netmap.c
@@ -1,5 +1,9 @@
 /*
- * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2011-2014 Matteo Landi
+ * Copyright (C) 2011-2016 Luigi Rizzo
+ * Copyright (C) 2011-2016 Giuseppe Lettieri
+ * Copyright (C) 2011-2016 Vincenzo Maffione
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -133,13 +137,12 @@ ports attached to the switch)
  * >    select()able file descriptor on which events are reported.
  *
  *  	Internally, we allocate a netmap_priv_d structure, that will be
- *  	initialized on ioctl(NIOCREGIF).
+ *  	initialized on ioctl(NIOCREGIF). There is one netmap_priv_d
+ *  	structure for each open().
  *
  *      os-specific:
- *  	    FreeBSD: netmap_open (netmap_freebsd.c). The priv is
- *  		     per-thread.
- *  	    linux:   linux_netmap_open (netmap_linux.c). The priv is
- *  		     per-open.
+ *  	    FreeBSD: see netmap_open() (netmap_freebsd.c)
+ *  	    linux:   see linux_netmap_open() (netmap_linux.c)
  *
  * > 2. on each descriptor, the process issues an ioctl() to identify
  * >    the interface that should report events to the file descriptor.
@@ -299,18 +302,17 @@ ports attached to the switch)
  *                netmap_transmit()
  *                  na->nm_notify  == netmap_notify()
  *           2) ioctl(NIOCRXSYNC)/netmap_poll() in process context
- *                kring->nm_sync() == netmap_rxsync_from_host_compat
+ *                kring->nm_sync() == netmap_rxsync_from_host
  *                  netmap_rxsync_from_host(na, NULL, NULL)
  *    - tx to host stack
  *           ioctl(NIOCTXSYNC)/netmap_poll() in process context
- *             kring->nm_sync() == netmap_txsync_to_host_compat
+ *             kring->nm_sync() == netmap_txsync_to_host
  *               netmap_txsync_to_host(na)
- *                 NM_SEND_UP()
- *                   FreeBSD: na->if_input() == ?? XXX
+ *                 nm_os_send_up()
+ *                   FreeBSD: na->if_input() == ether_input()
  *                   linux: netif_rx() with NM_MAGIC_PRIORITY_RX
  *
  *
- *
  *               -= SYSTEM DEVICE WITH GENERIC SUPPORT =-
  *
  *    na == NA(ifp) == generic_netmap_adapter created in generic_netmap_attach()
@@ -319,10 +321,11 @@ ports attached to the switch)
  *       concurrently:
  *           1) ioctl(NIOCTXSYNC)/netmap_poll() in process context
  *               kring->nm_sync() == generic_netmap_txsync()
- *                   linux:   dev_queue_xmit() with NM_MAGIC_PRIORITY_TX
- *                       generic_ndo_start_xmit()
- *                           orig. dev. start_xmit
- *                   FreeBSD: na->if_transmit() == orig. dev if_transmit
+ *                   nm_os_generic_xmit_frame()
+ *                       linux:   dev_queue_xmit() with NM_MAGIC_PRIORITY_TX
+ *                           ifp->ndo_start_xmit == generic_ndo_start_xmit()
+ *                               gna->save_start_xmit == orig. dev. start_xmit
+ *                       FreeBSD: na->if_transmit() == orig. dev if_transmit
  *           2) generic_mbuf_destructor()
  *                   na->nm_notify() == netmap_notify()
  *    - rx from netmap userspace:
@@ -333,24 +336,15 @@ ports attached to the switch)
  *               generic_rx_handler()
  *                   mbq_safe_enqueue()
  *                   na->nm_notify() == netmap_notify()
- *    - rx from host stack:
- *        concurrently:
+ *    - rx from host stack
+ *        FreeBSD: same as native
+ *        Linux: same as native except:
  *           1) host stack
- *               linux: generic_ndo_start_xmit()
- *                   netmap_transmit()
- *               FreeBSD: ifp->if_input() == netmap_transmit
- *               both:
- *                       na->nm_notify() == netmap_notify()
- *           2) ioctl(NIOCRXSYNC)/netmap_poll() in process context
- *                kring->nm_sync() == netmap_rxsync_from_host_compat
- *                  netmap_rxsync_from_host(na, NULL, NULL)
- *    - tx to host stack:
- *           ioctl(NIOCTXSYNC)/netmap_poll() in process context
- *             kring->nm_sync() == netmap_txsync_to_host_compat
- *               netmap_txsync_to_host(na)
- *                 NM_SEND_UP()
- *                   FreeBSD: na->if_input() == ??? XXX
- *                   linux: netif_rx() with NM_MAGIC_PRIORITY_RX
+ *               dev_queue_xmit() without NM_MAGIC_PRIORITY_TX
+ *                   ifp->ndo_start_xmit == generic_ndo_start_xmit()
+ *                       netmap_transmit()
+ *                           na->nm_notify() == netmap_notify()
+ *    - tx to host stack (same as native):
  *
  *
  *                           -= VALE =-
@@ -371,7 +365,7 @@ ports attached to the switch)
  *         from host stack:
  *             netmap_transmit()
  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr == host ring)
- *                     kring->nm_sync() == netmap_rxsync_from_host_compat()
+ *                     kring->nm_sync() == netmap_rxsync_from_host()
  *                     netmap_vp_txsync()
  *
  *      - system device with generic support:
@@ -384,7 +378,7 @@ ports attached to the switch)
  *         from host stack:
  *            netmap_transmit()
  *                na->nm_notify() == netmap_bwrap_intr_notify(ring_nr == host ring)
- *                     kring->nm_sync() == netmap_rxsync_from_host_compat()
+ *                     kring->nm_sync() == netmap_rxsync_from_host()
  *                     netmap_vp_txsync()
  *
  *   (all cases) --> nm_bdg_flush()
@@ -407,7 +401,7 @@ ports attached to the switch)
  *                 netmap_vp_rxsync()
  *          to host stack:
  *                 netmap_vp_rxsync()
- *                 kring->nm_sync() == netmap_txsync_to_host_compat
+ *                 kring->nm_sync() == netmap_txsync_to_host
  *                 netmap_vp_rxsync_locked()
  *
  *      - system device with generic adapter:
@@ -418,7 +412,7 @@ ports attached to the switch)
  *                 netmap_vp_rxsync()
  *          to host stack:
  *                 netmap_vp_rxsync()
- *                 kring->nm_sync() == netmap_txsync_to_host_compat
+ *                 kring->nm_sync() == netmap_txsync_to_host
  *                 netmap_vp_rxsync()
  *
  */
@@ -455,29 +449,19 @@ ports attached to the switch)
 #include <sys/refcount.h>
 
 
-/* reduce conditional code */
-// linux API, use for the knlist in FreeBSD
-/* use a private mutex for the knlist */
-#define init_waitqueue_head(x) do {			\
-	struct mtx *m = &(x)->m;			\
-	mtx_init(m, "nm_kn_lock", NULL, MTX_DEF);	\
-	knlist_init_mtx(&(x)->si.si_note, m);		\
-    } while (0)
-
-#define OS_selrecord(a, b)	selrecord(a, &((b)->si))
-#define OS_selwakeup(a, b)	freebsd_selwakeup(a, b)
-
 #elif defined(linux)
 
 #include "bsd_glue.h"
 
-
-
 #elif defined(__APPLE__)
 
 #warning OSX support is only partial
 #include "osx_glue.h"
 
+#elif defined (_WIN32)
+
+#include "win_glue.h"
+
 #else
 
 #error	Unsupported platform
@@ -492,47 +476,72 @@ ports attached to the switch)
 #include <dev/netmap/netmap_mem2.h>
 
 
-MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
-
 /* user-controlled variables */
 int netmap_verbose;
 
 static int netmap_no_timestamp; /* don't timestamp on rxsync */
-
-SYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW, 0, "Netmap args");
-SYSCTL_INT(_dev_netmap, OID_AUTO, verbose,
-    CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
-SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
-    CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp");
 int netmap_mitigate = 1;
-SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, "");
 int netmap_no_pendintr = 1;
-SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr,
-    CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets.");
 int netmap_txsync_retry = 2;
-SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW,
-    &netmap_txsync_retry, 0 , "Number of txsync loops in bridge's flush.");
-
 int netmap_adaptive_io = 0;
-SYSCTL_INT(_dev_netmap, OID_AUTO, adaptive_io, CTLFLAG_RW,
-    &netmap_adaptive_io, 0 , "Adaptive I/O on paravirt");
-
 int netmap_flags = 0;	/* debug flags */
-int netmap_fwd = 0;	/* force transparent mode */
+static int netmap_fwd = 0;	/* force transparent mode */
 
 /*
  * netmap_admode selects the netmap mode to use.
  * Invalid values are reset to NETMAP_ADMODE_BEST
  */
-enum { NETMAP_ADMODE_BEST = 0,	/* use native, fallback to generic */
+enum {	NETMAP_ADMODE_BEST = 0,	/* use native, fallback to generic */
 	NETMAP_ADMODE_NATIVE,	/* either native or none */
 	NETMAP_ADMODE_GENERIC,	/* force generic */
 	NETMAP_ADMODE_LAST };
 static int netmap_admode = NETMAP_ADMODE_BEST;
 
-int netmap_generic_mit = 100*1000;   /* Generic mitigation interval in nanoseconds. */
-int netmap_generic_ringsize = 1024;   /* Generic ringsize. */
-int netmap_generic_rings = 1;   /* number of queues in generic. */
+/* netmap_generic_mit controls mitigation of RX notifications for
+ * the generic netmap adapter. The value is a time interval in
+ * nanoseconds. */
+int netmap_generic_mit = 100*1000;
+
+/* We use by default netmap-aware qdiscs with generic netmap adapters,
+ * even if there can be a little performance hit with hardware NICs.
+ * However, using the qdisc is the safer approach, for two reasons:
+ * 1) it prevents non-fifo qdiscs to break the TX notification
+ *    scheme, which is based on mbuf destructors when txqdisc is
+ *    not used.
+ * 2) it makes it possible to transmit over software devices that
+ *    change skb->dev, like bridge, veth, ...
+ *
+ * Anyway users looking for the best performance should
+ * use native adapters.
+ */
+int netmap_generic_txqdisc = 1;
+
+/* Default number of slots and queues for generic adapters. */
+int netmap_generic_ringsize = 1024;
+int netmap_generic_rings = 1;
+
+/* Non-zero if ptnet devices are allowed to use virtio-net headers. */
+int ptnet_vnet_hdr = 1;
+
+/*
+ * SYSCTL calls are grouped between SYSBEGIN and SYSEND to be emulated
+ * in some other operating systems
+ */
+SYSBEGIN(main_init);
+
+SYSCTL_DECL(_dev_netmap);
+SYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW, 0, "Netmap args");
+SYSCTL_INT(_dev_netmap, OID_AUTO, verbose,
+    CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
+SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
+    CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp");
+SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr,
+    CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets.");
+SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW,
+    &netmap_txsync_retry, 0 , "Number of txsync loops in bridge's flush.");
+SYSCTL_INT(_dev_netmap, OID_AUTO, adaptive_io, CTLFLAG_RW,
+    &netmap_adaptive_io, 0 , "Adaptive I/O on paravirt");
 
 SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
 SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
@@ -540,19 +549,24 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0 , "");
 SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit, 0 , "");
 SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, &netmap_generic_ringsize, 0 , "");
 SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW, &netmap_generic_rings, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, generic_txqdisc, CTLFLAG_RW, &netmap_generic_txqdisc, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, ptnet_vnet_hdr, CTLFLAG_RW, &ptnet_vnet_hdr, 0 , "");
+
+SYSEND;
 
 NMG_LOCK_T	netmap_global_lock;
-int netmap_use_count = 0; /* number of active netmap instances */
 
 /*
  * mark the ring as stopped, and run through the locks
  * to make sure other users get to see it.
+ * stopped must be either NR_KR_STOPPED (for unbounded stop)
+ * of NR_KR_LOCKED (brief stop for mutual exclusion purposes)
  */
 static void
-netmap_disable_ring(struct netmap_kring *kr)
+netmap_disable_ring(struct netmap_kring *kr, int stopped)
 {
-	kr->nkr_stopped = 1;
-	nm_kr_get(kr);
+	nm_kr_stop(kr, stopped);
+	// XXX check if nm_kr_stop is sufficient
 	mtx_lock(&kr->q_lock);
 	mtx_unlock(&kr->q_lock);
 	nm_kr_put(kr);
@@ -563,7 +577,7 @@ void
 netmap_set_ring(struct netmap_adapter *na, u_int ring_id, enum txrx t, int stopped)
 {
 	if (stopped)
-		netmap_disable_ring(NMR(na, t) + ring_id);
+		netmap_disable_ring(NMR(na, t) + ring_id, stopped);
 	else
 		NMR(na, t)[ring_id].nkr_stopped = 0;
 }
@@ -590,13 +604,14 @@ netmap_set_all_rings(struct netmap_adapter *na, int stopped)
  * Convenience function used in drivers.  Waits for current txsync()s/rxsync()s
  * to finish and prevents any new one from starting.  Call this before turning
  * netmap mode off, or before removing the hardware rings (e.g., on module
- * onload).  As a rule of thumb for linux drivers, this should be placed near
- * each napi_disable().
+ * onload).
  */
 void
 netmap_disable_all_rings(struct ifnet *ifp)
 {
-	netmap_set_all_rings(NA(ifp), 1 /* stopped */);
+	if (NM_NA_VALID(ifp)) {
+		netmap_set_all_rings(NA(ifp), NM_KR_STOPPED);
+	}
 }
 
 /*
@@ -607,9 +622,34 @@ netmap_disable_all_rings(struct ifnet *ifp)
 void
 netmap_enable_all_rings(struct ifnet *ifp)
 {
-	netmap_set_all_rings(NA(ifp), 0 /* enabled */);
+	if (NM_NA_VALID(ifp)) {
+		netmap_set_all_rings(NA(ifp), 0 /* enabled */);
+	}
+}
+
+void
+netmap_make_zombie(struct ifnet *ifp)
+{
+	if (NM_NA_VALID(ifp)) {
+		struct netmap_adapter *na = NA(ifp);
+		netmap_set_all_rings(na, NM_KR_LOCKED);
+		na->na_flags |= NAF_ZOMBIE;
+		netmap_set_all_rings(na, 0);
+	}
 }
 
+void
+netmap_undo_zombie(struct ifnet *ifp)
+{
+	if (NM_NA_VALID(ifp)) {
+		struct netmap_adapter *na = NA(ifp);
+		if (na->na_flags & NAF_ZOMBIE) {
+			netmap_set_all_rings(na, NM_KR_LOCKED);
+			na->na_flags &= ~NAF_ZOMBIE;
+			netmap_set_all_rings(na, 0);
+		}
+	}
+}
 
 /*
  * generic bound_checking function
@@ -727,28 +767,9 @@ netmap_update_config(struct netmap_adapter *na)
 	return 1;
 }
 
-static void netmap_txsync_to_host(struct netmap_adapter *na);
-static int netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait);
-
-/* kring->nm_sync callback for the host tx ring */
-static int
-netmap_txsync_to_host_compat(struct netmap_kring *kring, int flags)
-{
-	(void)flags; /* unused */
-	netmap_txsync_to_host(kring->na);
-	return 0;
-}
-
-/* kring->nm_sync callback for the host rx ring */
-static int
-netmap_rxsync_from_host_compat(struct netmap_kring *kring, int flags)
-{
-	(void)flags; /* unused */
-	netmap_rxsync_from_host(kring->na, NULL, NULL);
-	return 0;
-}
-
-
+/* nm_sync callbacks for the host rings */
+static int netmap_txsync_to_host(struct netmap_kring *kring, int flags);
+static int netmap_rxsync_from_host(struct netmap_kring *kring, int flags);
 
 /* create the krings array and initialize the fields common to all adapters.
  * The array layout is this:
@@ -809,12 +830,14 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
 			kring->ring_id = i;
 			kring->tx = t;
 			kring->nkr_num_slots = ndesc;
+			kring->nr_mode = NKR_NETMAP_OFF;
+			kring->nr_pending_mode = NKR_NETMAP_OFF;
 			if (i < nma_get_nrings(na, t)) {
 				kring->nm_sync = (t == NR_TX ? na->nm_txsync : na->nm_rxsync);
-			} else if (i == na->num_tx_rings) {
+			} else {
 				kring->nm_sync = (t == NR_TX ?
-						netmap_txsync_to_host_compat :
-						netmap_rxsync_from_host_compat);
+						netmap_txsync_to_host:
+						netmap_rxsync_from_host);
 			}
 			kring->nm_notify = na->nm_notify;
 			kring->rhead = kring->rcur = kring->nr_hwcur = 0;
@@ -822,14 +845,14 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
 			 * IMPORTANT: Always keep one slot empty.
 			 */
 			kring->rtail = kring->nr_hwtail = (t == NR_TX ? ndesc - 1 : 0);
-			snprintf(kring->name, sizeof(kring->name) - 1, "%s %s%d", na->name, 
+			snprintf(kring->name, sizeof(kring->name) - 1, "%s %s%d", na->name,
 					nm_txrx2str(t), i);
 			ND("ktx %s h %d c %d t %d",
 				kring->name, kring->rhead, kring->rcur, kring->rtail);
 			mtx_init(&kring->q_lock, (t == NR_TX ? "nm_txq_lock" : "nm_rxq_lock"), NULL, MTX_DEF);
-			init_waitqueue_head(&kring->si);
+			nm_os_selinfo_init(&kring->si);
 		}
-		init_waitqueue_head(&na->si[t]);
+		nm_os_selinfo_init(&na->si[t]);
 	}
 
 	na->tailroom = na->rx_rings + n[NR_RX];
@@ -838,19 +861,6 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
 }
 
 
-#ifdef __FreeBSD__
-static void
-netmap_knlist_destroy(NM_SELINFO_T *si)
-{
-	/* XXX kqueue(9) needed; these will mirror knlist_init. */
-	knlist_delete(&si->si.si_note, curthread, 0 /* not locked */ );
-	knlist_destroy(&si->si.si_note);
-	/* now we don't need the mutex anymore */
-	mtx_destroy(&si->m);
-}
-#endif /* __FreeBSD__ */
-
-
 /* undo the actions performed by netmap_krings_create */
 /* call with NMG_LOCK held */
 void
@@ -860,12 +870,12 @@ netmap_krings_delete(struct netmap_adapter *na)
 	enum txrx t;
 
 	for_rx_tx(t)
-		netmap_knlist_destroy(&na->si[t]);
+		nm_os_selinfo_uninit(&na->si[t]);
 
 	/* we rely on the krings layout described above */
 	for ( ; kring != na->tailroom; kring++) {
 		mtx_destroy(&kring->q_lock);
-		netmap_knlist_destroy(&kring->si);
+		nm_os_selinfo_uninit(&kring->si);
 	}
 	free(na->tx_rings, M_DEVBUF);
 	na->tx_rings = na->rx_rings = na->tailroom = NULL;
@@ -878,14 +888,14 @@ netmap_krings_delete(struct netmap_adapter *na)
  * them first.
  */
 /* call with NMG_LOCK held */
-static void
+void
 netmap_hw_krings_delete(struct netmap_adapter *na)
 {
 	struct mbq *q = &na->rx_rings[na->num_rx_rings].rx_queue;
 
 	ND("destroy sw mbq with len %d", mbq_len(q));
 	mbq_purge(q);
-	mbq_safe_destroy(q);
+	mbq_safe_fini(q);
 	netmap_krings_delete(na);
 }
 
@@ -898,29 +908,38 @@ netmap_hw_krings_delete(struct netmap_adapter *na)
  */
 /* call with NMG_LOCK held */
 static void netmap_unset_ringid(struct netmap_priv_d *);
-static void netmap_rel_exclusive(struct netmap_priv_d *);
-static void
+static void netmap_krings_put(struct netmap_priv_d *);
+void
 netmap_do_unregif(struct netmap_priv_d *priv)
 {
 	struct netmap_adapter *na = priv->np_na;
 
 	NMG_LOCK_ASSERT();
 	na->active_fds--;
-	/* release exclusive use if it was requested on regif */
-	netmap_rel_exclusive(priv);
-	if (na->active_fds <= 0) {	/* last instance */
-
-		if (netmap_verbose)
-			D("deleting last instance for %s", na->name);
+	/* unset nr_pending_mode and possibly release exclusive mode */
+	netmap_krings_put(priv);
 
 #ifdef	WITH_MONITOR
+	/* XXX check whether we have to do something with monitor
+	 * when rings change nr_mode. */
+	if (na->active_fds <= 0) {
 		/* walk through all the rings and tell any monitor
 		 * that the port is going to exit netmap mode
 		 */
 		netmap_monitor_stop(na);
+	}
 #endif
+
+	if (na->active_fds <= 0 || nm_kring_pending(priv)) {
+		na->nm_register(na, 0);
+	}
+
+	/* delete rings and buffers that are no longer needed */
+	netmap_mem_rings_delete(na);
+
+	if (na->active_fds <= 0) {	/* last instance */
 		/*
-		 * (TO CHECK) This function is only called
+		 * (TO CHECK) We enter here
 		 * when the last reference to this file descriptor goes
 		 * away. This means we cannot have any pending poll()
 		 * or interrupt routine operating on the structure.
@@ -933,16 +952,16 @@ netmap_do_unregif(struct netmap_priv_d *priv)
 		 * happens if the close() occurs while a concurrent
 		 * syscall is running.
 		 */
-		na->nm_register(na, 0); /* off, clear flags */
-		/* Wake up any sleeping threads. netmap_poll will
-		 * then return POLLERR
-		 * XXX The wake up now must happen during *_down(), when
-		 * we order all activities to stop. -gl
-		 */
-		/* delete rings and buffers */
-		netmap_mem_rings_delete(na);
+		if (netmap_verbose)
+			D("deleting last instance for %s", na->name);
+
+                if (nm_netmap_on(na)) {
+                    D("BUG: netmap on while going to delete the krings");
+                }
+
 		na->nm_krings_delete(na);
 	}
+
 	/* possibily decrement counter of tx_si/rx_si users */
 	netmap_unset_ringid(priv);
 	/* delete the nifp */
@@ -962,6 +981,20 @@ nm_si_user(struct netmap_priv_d *priv, enum txrx t)
 		(priv->np_qlast[t] - priv->np_qfirst[t] > 1));
 }
 
+struct netmap_priv_d*
+netmap_priv_new(void)
+{
+	struct netmap_priv_d *priv;
+
+	priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
+			      M_NOWAIT | M_ZERO);
+	if (priv == NULL)
+		return NULL;
+	priv->np_refs = 1;
+	nm_os_get_module();
+	return priv;
+}
+
 /*
  * Destructor of the netmap_priv_d, called when the fd is closed
  * Action: undo all the things done by NIOCREGIF,
@@ -971,22 +1004,22 @@ nm_si_user(struct netmap_priv_d *priv, enum txrx t)
  *
  */
 /* call with NMG_LOCK held */
-int
-netmap_dtor_locked(struct netmap_priv_d *priv)
+void
+netmap_priv_delete(struct netmap_priv_d *priv)
 {
 	struct netmap_adapter *na = priv->np_na;
 
 	/* number of active references to this fd */
 	if (--priv->np_refs > 0) {
-		return 0;
+		return;
 	}
-	netmap_use_count--;
-	if (!na) {
-		return 1; //XXX is it correct?
+	nm_os_put_module();
+	if (na) {
+		netmap_do_unregif(priv);
 	}
-	netmap_do_unregif(priv);
-	netmap_adapter_put(na);
-	return 1;
+	netmap_unget_na(na, priv->np_ifp);
+	bzero(priv, sizeof(*priv));	/* for safety */
+	free(priv, M_DEVBUF);
 }
 
 
@@ -995,15 +1028,10 @@ void
 netmap_dtor(void *data)
 {
 	struct netmap_priv_d *priv = data;
-	int last_instance;
 
 	NMG_LOCK();
-	last_instance = netmap_dtor_locked(priv);
+	netmap_priv_delete(priv);
 	NMG_UNLOCK();
-	if (last_instance) {
-		bzero(priv, sizeof(*priv));	/* for safety */
-		free(priv, M_DEVBUF);
-	}
 }
 
 
@@ -1036,14 +1064,19 @@ static void
 netmap_send_up(struct ifnet *dst, struct mbq *q)
 {
 	struct mbuf *m;
+	struct mbuf *head = NULL, *prev = NULL;
 
 	/* send packets up, outside the lock */
 	while ((m = mbq_dequeue(q)) != NULL) {
 		if (netmap_verbose & NM_VERB_HOST)
 			D("sending up pkt %p size %d", m, MBUF_LEN(m));
-		NM_SEND_UP(dst, m);
+		prev = nm_os_send_up(dst, m, prev);
+		if (head == NULL)
+			head = prev;
 	}
-	mbq_destroy(q);
+	if (head)
+		nm_os_send_up(dst, NULL, head);
+	mbq_fini(q);
 }
 
 
@@ -1081,6 +1114,27 @@ netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
 	}
 }
 
+static inline int
+_nm_may_forward(struct netmap_kring *kring)
+{
+	return	((netmap_fwd || kring->ring->flags & NR_FORWARD) &&
+		 kring->na->na_flags & NAF_HOST_RINGS &&
+		 kring->tx == NR_RX);
+}
+
+static inline int
+nm_may_forward_up(struct netmap_kring *kring)
+{
+	return	_nm_may_forward(kring) &&
+		 kring->ring_id != kring->na->num_rx_rings;
+}
+
+static inline int
+nm_may_forward_down(struct netmap_kring *kring)
+{
+	return	_nm_may_forward(kring) &&
+		 kring->ring_id == kring->na->num_rx_rings;
+}
 
 /*
  * Send to the NIC rings packets marked NS_FORWARD between
@@ -1107,7 +1161,7 @@ netmap_sw_to_nic(struct netmap_adapter *na)
 		for (; rxcur != head && !nm_ring_empty(rdst);
 		     rxcur = nm_next(rxcur, src_lim) ) {
 			struct netmap_slot *src, *dst, tmp;
-			u_int dst_cur = rdst->cur;
+			u_int dst_head = rdst->head;
 
 			src = &rxslot[rxcur];
 			if ((src->flags & NS_FORWARD) == 0 && !netmap_fwd)
@@ -1115,7 +1169,7 @@ netmap_sw_to_nic(struct netmap_adapter *na)
 
 			sent++;
 
-			dst = &rdst->slot[dst_cur];
+			dst = &rdst->slot[dst_head];
 
 			tmp = *src;
 
@@ -1126,7 +1180,7 @@ netmap_sw_to_nic(struct netmap_adapter *na)
 			dst->len = tmp.len;
 			dst->flags = NS_BUF_CHANGED;
 
-			rdst->cur = nm_next(dst_cur, dst_lim);
+			rdst->head = rdst->cur = nm_next(dst_head, dst_lim);
 		}
 		/* if (sent) XXX txsync ? */
 	}
@@ -1140,10 +1194,10 @@ netmap_sw_to_nic(struct netmap_adapter *na)
  * can be among multiple user threads erroneously calling
  * this routine concurrently.
  */
-static void
-netmap_txsync_to_host(struct netmap_adapter *na)
+static int
+netmap_txsync_to_host(struct netmap_kring *kring, int flags)
 {
-	struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
+	struct netmap_adapter *na = kring->na;
 	u_int const lim = kring->nkr_num_slots - 1;
 	u_int const head = kring->rhead;
 	struct mbq q;
@@ -1162,6 +1216,7 @@ netmap_txsync_to_host(struct netmap_adapter *na)
 		kring->nr_hwtail -= lim + 1;
 
 	netmap_send_up(na->ifp, &q);
+	return 0;
 }
 
 
@@ -1171,17 +1226,15 @@ netmap_txsync_to_host(struct netmap_adapter *na)
  * We protect access to the kring using kring->rx_queue.lock
  *
  * This routine also does the selrecord if called from the poll handler
- * (we know because td != NULL).
+ * (we know because sr != NULL).
  *
- * NOTE: on linux, selrecord() is defined as a macro and uses pwait
- *     as an additional hidden argument.
  * returns the number of packets delivered to tx queues in
  * transparent mode, or a negative value if error
  */
 static int
-netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait)
+netmap_rxsync_from_host(struct netmap_kring *kring, int flags)
 {
-	struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
+	struct netmap_adapter *na = kring->na;
 	struct netmap_ring *ring = kring->ring;
 	u_int nm_i, n;
 	u_int const lim = kring->nkr_num_slots - 1;
@@ -1189,9 +1242,6 @@ netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwai
 	int ret = 0;
 	struct mbq *q = &kring->rx_queue, fq;
 
-	(void)pwait;	/* disable unused warnings */
-	(void)td;
-
 	mbq_init(&fq); /* fq holds packets to be freed */
 
 	mbq_lock(q);
@@ -1226,19 +1276,20 @@ netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwai
 	 */
 	nm_i = kring->nr_hwcur;
 	if (nm_i != head) { /* something was released */
-		if (netmap_fwd || kring->ring->flags & NR_FORWARD)
+		if (nm_may_forward_down(kring)) {
 			ret = netmap_sw_to_nic(na);
+			if (ret > 0) {
+				kring->nr_kflags |= NR_FORWARD;
+				ret = 0;
+			}
+		}
 		kring->nr_hwcur = head;
 	}
 
-	/* access copies of cur,tail in the kring */
-	if (kring->rcur == kring->rtail && td) /* no bufs available */
-		OS_selrecord(td, &kring->si);
-
 	mbq_unlock(q);
 
 	mbq_purge(&fq);
-	mbq_destroy(&fq);
+	mbq_fini(&fq);
 
 	return ret;
 }
@@ -1267,17 +1318,14 @@ netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwai
  *     0        NETMAP_ADMODE_GENERIC   GENERIC GENERIC
  *
  */
-
+static void netmap_hw_dtor(struct netmap_adapter *); /* needed by NM_IS_NATIVE() */
 int
 netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
 {
 	/* generic support */
 	int i = netmap_admode;	/* Take a snapshot. */
 	struct netmap_adapter *prev_na;
-#ifdef WITH_GENERIC
-	struct netmap_generic_adapter *gna;
 	int error = 0;
-#endif
 
 	*na = NULL; /* default */
 
@@ -1285,7 +1333,7 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
 	if (i < NETMAP_ADMODE_BEST || i >= NETMAP_ADMODE_LAST)
 		i = netmap_admode = NETMAP_ADMODE_BEST;
 
-	if (NETMAP_CAPABLE(ifp)) {
+	if (NM_NA_VALID(ifp)) {
 		prev_na = NA(ifp);
 		/* If an adapter already exists, return it if
 		 * there are active file descriptors or if
@@ -1310,10 +1358,9 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
 	/* If there isn't native support and netmap is not allowed
 	 * to use generic adapters, we cannot satisfy the request.
 	 */
-	if (!NETMAP_CAPABLE(ifp) && i == NETMAP_ADMODE_NATIVE)
+	if (!NM_IS_NATIVE(ifp) && i == NETMAP_ADMODE_NATIVE)
 		return EOPNOTSUPP;
 
-#ifdef WITH_GENERIC
 	/* Otherwise, create a generic adapter and return it,
 	 * saving the previously used netmap adapter, if any.
 	 *
@@ -1328,25 +1375,12 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
 	 * the branches above. This ensures that we never override
 	 * a generic adapter with another generic adapter.
 	 */
-	prev_na = NA(ifp);
 	error = generic_netmap_attach(ifp);
 	if (error)
 		return error;
 
 	*na = NA(ifp);
-	gna = (struct netmap_generic_adapter*)NA(ifp);
-	gna->prev = prev_na; /* save old na */
-	if (prev_na != NULL) {
-		ifunit_ref(ifp->if_xname);
-		// XXX add a refcount ?
-		netmap_adapter_get(prev_na);
-	}
-	ND("Created generic NA %p (prev %p)", gna, gna->prev);
-
 	return 0;
-#else /* !WITH_GENERIC */
-	return EOPNOTSUPP;
-#endif
 }
 
 
@@ -1364,21 +1398,22 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
  * could not be allocated.
  * If successful, hold a reference to the netmap adapter.
  *
- * No reference is kept on the real interface, which may then
- * disappear at any time.
+ * If the interface specified by nmr is a system one, also keep
+ * a reference to it and return a valid *ifp.
  */
 int
-netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
+netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na,
+	      struct ifnet **ifp, int create)
 {
-	struct ifnet *ifp = NULL;
 	int error = 0;
 	struct netmap_adapter *ret = NULL;
 
 	*na = NULL;     /* default return value */
+	*ifp = NULL;
 
 	NMG_LOCK_ASSERT();
 
-	/* we cascade through all possible types of netmap adapter.
+	/* We cascade through all possible types of netmap adapter.
 	 * All netmap_get_*_na() functions return an error and an na,
 	 * with the following combinations:
 	 *
@@ -1389,6 +1424,11 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
 	 *  !0    !NULL		impossible
 	 */
 
+	/* try to see if this is a ptnetmap port */
+	error = netmap_get_pt_host_na(nmr, na, create);
+	if (error || *na != NULL)
+		return error;
+
 	/* try to see if this is a monitor port */
 	error = netmap_get_monitor_na(nmr, na, create);
 	if (error || *na != NULL)
@@ -1413,12 +1453,12 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
 	 * This may still be a tap, a veth/epair, or even a
 	 * persistent VALE port.
 	 */
-	ifp = ifunit_ref(nmr->nr_name);
-	if (ifp == NULL) {
+	*ifp = ifunit_ref(nmr->nr_name);
+	if (*ifp == NULL) {
 	        return ENXIO;
 	}
 
-	error = netmap_get_hw_na(ifp, &ret);
+	error = netmap_get_hw_na(*ifp, &ret);
 	if (error)
 		goto out;
 
@@ -1426,15 +1466,42 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
 	netmap_adapter_get(ret);
 
 out:
-	if (error && ret != NULL)
-		netmap_adapter_put(ret);
-
-	if (ifp)
-		if_rele(ifp); /* allow live unloading of drivers modules */
+	if (error) {
+		if (ret)
+			netmap_adapter_put(ret);
+		if (*ifp) {
+			if_rele(*ifp);
+			*ifp = NULL;
+		}
+	}
 
 	return error;
 }
 
+/* undo netmap_get_na() */
+void
+netmap_unget_na(struct netmap_adapter *na, struct ifnet *ifp)
+{
+	if (ifp)
+		if_rele(ifp);
+	if (na)
+		netmap_adapter_put(na);
+}
+
+
+#define NM_FAIL_ON(t) do {						\
+	if (unlikely(t)) {						\
+		RD(5, "%s: fail '" #t "' "				\
+			"h %d c %d t %d "				\
+			"rh %d rc %d rt %d "				\
+			"hc %d ht %d",					\
+			kring->name,					\
+			head, cur, ring->tail,				\
+			kring->rhead, kring->rcur, kring->rtail,	\
+			kring->nr_hwcur, kring->nr_hwtail);		\
+		return kring->nkr_num_slots;				\
+	}								\
+} while (0)
 
 /*
  * validate parameters on entry for *_txsync()
@@ -1449,11 +1516,9 @@ out:
  *
  * hwcur, rhead, rtail and hwtail are reliable
  */
-static u_int
-nm_txsync_prologue(struct netmap_kring *kring)
+u_int
+nm_txsync_prologue(struct netmap_kring *kring, struct netmap_ring *ring)
 {
-#define NM_ASSERT(t) if (t) { D("fail " #t); goto error; }
-	struct netmap_ring *ring = kring->ring;
 	u_int head = ring->head; /* read only once */
 	u_int cur = ring->cur; /* read only once */
 	u_int n = kring->nkr_num_slots;
@@ -1463,35 +1528,34 @@ nm_txsync_prologue(struct netmap_kring *kring)
 		kring->nr_hwcur, kring->nr_hwtail,
 		ring->head, ring->cur, ring->tail);
 #if 1 /* kernel sanity checks; but we can trust the kring. */
-	if (kring->nr_hwcur >= n || kring->rhead >= n ||
-	    kring->rtail >= n ||  kring->nr_hwtail >= n)
-		goto error;
+	NM_FAIL_ON(kring->nr_hwcur >= n || kring->rhead >= n ||
+	    kring->rtail >= n ||  kring->nr_hwtail >= n);
 #endif /* kernel sanity checks */
 	/*
-	 * user sanity checks. We only use 'cur',
-	 * A, B, ... are possible positions for cur:
+	 * user sanity checks. We only use head,
+	 * A, B, ... are possible positions for head:
 	 *
-	 *  0    A  cur   B  tail  C  n-1
-	 *  0    D  tail  E  cur   F  n-1
+	 *  0    A  rhead   B  rtail   C  n-1
+	 *  0    D  rtail   E  rhead   F  n-1
 	 *
 	 * B, F, D are valid. A, C, E are wrong
 	 */
 	if (kring->rtail >= kring->rhead) {
 		/* want rhead <= head <= rtail */
-		NM_ASSERT(head < kring->rhead || head > kring->rtail);
+		NM_FAIL_ON(head < kring->rhead || head > kring->rtail);
 		/* and also head <= cur <= rtail */
-		NM_ASSERT(cur < head || cur > kring->rtail);
+		NM_FAIL_ON(cur < head || cur > kring->rtail);
 	} else { /* here rtail < rhead */
 		/* we need head outside rtail .. rhead */
-		NM_ASSERT(head > kring->rtail && head < kring->rhead);
+		NM_FAIL_ON(head > kring->rtail && head < kring->rhead);
 
 		/* two cases now: head <= rtail or head >= rhead  */
 		if (head <= kring->rtail) {
 			/* want head <= cur <= rtail */
-			NM_ASSERT(cur < head || cur > kring->rtail);
+			NM_FAIL_ON(cur < head || cur > kring->rtail);
 		} else { /* head >= rhead */
 			/* cur must be outside rtail..head */
-			NM_ASSERT(cur > kring->rtail && cur < head);
+			NM_FAIL_ON(cur > kring->rtail && cur < head);
 		}
 	}
 	if (ring->tail != kring->rtail) {
@@ -1502,15 +1566,6 @@ nm_txsync_prologue(struct netmap_kring *kring)
 	kring->rhead = head;
 	kring->rcur = cur;
 	return head;
-
-error:
-	RD(5, "%s kring error: head %d cur %d tail %d rhead %d rcur %d rtail %d hwcur %d hwtail %d",
-		kring->name,
-		head, cur, ring->tail,
-		kring->rhead, kring->rcur, kring->rtail,
-		kring->nr_hwcur, kring->nr_hwtail);
-	return n;
-#undef NM_ASSERT
 }
 
 
@@ -1525,10 +1580,9 @@ error:
  * hwcur and hwtail are reliable.
  *
  */
-static u_int
-nm_rxsync_prologue(struct netmap_kring *kring)
+u_int
+nm_rxsync_prologue(struct netmap_kring *kring, struct netmap_ring *ring)
 {
-	struct netmap_ring *ring = kring->ring;
 	uint32_t const n = kring->nkr_num_slots;
 	uint32_t head, cur;
 
@@ -1546,30 +1600,24 @@ nm_rxsync_prologue(struct netmap_kring *kring)
 	cur = kring->rcur = ring->cur;	/* read only once */
 	head = kring->rhead = ring->head;	/* read only once */
 #if 1 /* kernel sanity checks */
-	if (kring->nr_hwcur >= n || kring->nr_hwtail >= n)
-		goto error;
+	NM_FAIL_ON(kring->nr_hwcur >= n || kring->nr_hwtail >= n);
 #endif /* kernel sanity checks */
 	/* user sanity checks */
 	if (kring->nr_hwtail >= kring->nr_hwcur) {
 		/* want hwcur <= rhead <= hwtail */
-		if (head < kring->nr_hwcur || head > kring->nr_hwtail)
-			goto error;
+		NM_FAIL_ON(head < kring->nr_hwcur || head > kring->nr_hwtail);
 		/* and also rhead <= rcur <= hwtail */
-		if (cur < head || cur > kring->nr_hwtail)
-			goto error;
+		NM_FAIL_ON(cur < head || cur > kring->nr_hwtail);
 	} else {
 		/* we need rhead outside hwtail..hwcur */
-		if (head < kring->nr_hwcur && head > kring->nr_hwtail)
-			goto error;
+		NM_FAIL_ON(head < kring->nr_hwcur && head > kring->nr_hwtail);
 		/* two cases now: head <= hwtail or head >= hwcur  */
 		if (head <= kring->nr_hwtail) {
 			/* want head <= cur <= hwtail */
-			if (cur < head || cur > kring->nr_hwtail)
-				goto error;
+			NM_FAIL_ON(cur < head || cur > kring->nr_hwtail);
 		} else {
 			/* cur must be outside hwtail..head */
-			if (cur < head && cur > kring->nr_hwtail)
-				goto error;
+			NM_FAIL_ON(cur < head && cur > kring->nr_hwtail);
 		}
 	}
 	if (ring->tail != kring->rtail) {
@@ -1579,13 +1627,6 @@ nm_rxsync_prologue(struct netmap_kring *kring)
 		ring->tail = kring->rtail;
 	}
 	return head;
-
-error:
-	RD(5, "kring error: hwcur %d rcur %d hwtail %d head %d cur %d tail %d",
-		kring->nr_hwcur,
-		kring->rcur, kring->nr_hwtail,
-		kring->rhead, kring->rcur, ring->tail);
-	return n;
 }
 
 
@@ -1659,6 +1700,7 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags
 	struct netmap_adapter *na = priv->np_na;
 	u_int j, i = ringid & NETMAP_RING_MASK;
 	u_int reg = flags & NR_REG_MASK;
+	int excluded_direction[] = { NR_TX_RINGS_ONLY, NR_RX_RINGS_ONLY };
 	enum txrx t;
 
 	if (reg == NR_REG_DEFAULT) {
@@ -1672,48 +1714,58 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags
 		}
 		D("deprecated API, old ringid 0x%x -> ringid %x reg %d", ringid, i, reg);
 	}
-	switch (reg) {
-	case NR_REG_ALL_NIC:
-	case NR_REG_PIPE_MASTER:
-	case NR_REG_PIPE_SLAVE:
-		for_rx_tx(t) {
+
+	if ((flags & NR_PTNETMAP_HOST) && (reg != NR_REG_ALL_NIC ||
+			flags & (NR_RX_RINGS_ONLY|NR_TX_RINGS_ONLY))) {
+		D("Error: only NR_REG_ALL_NIC supported with netmap passthrough");
+		return EINVAL;
+	}
+
+	for_rx_tx(t) {
+		if (flags & excluded_direction[t]) {
+			priv->np_qfirst[t] = priv->np_qlast[t] = 0;
+			continue;
+		}
+		switch (reg) {
+		case NR_REG_ALL_NIC:
+		case NR_REG_PIPE_MASTER:
+		case NR_REG_PIPE_SLAVE:
 			priv->np_qfirst[t] = 0;
 			priv->np_qlast[t] = nma_get_nrings(na, t);
-		}
-		ND("%s %d %d", "ALL/PIPE",
-			priv->np_qfirst[NR_RX], priv->np_qlast[NR_RX]);
-		break;
-	case NR_REG_SW:
-	case NR_REG_NIC_SW:
-		if (!(na->na_flags & NAF_HOST_RINGS)) {
-			D("host rings not supported");
-			return EINVAL;
-		}
-		for_rx_tx(t) {
+			ND("ALL/PIPE: %s %d %d", nm_txrx2str(t),
+				priv->np_qfirst[t], priv->np_qlast[t]);
+			break;
+		case NR_REG_SW:
+		case NR_REG_NIC_SW:
+			if (!(na->na_flags & NAF_HOST_RINGS)) {
+				D("host rings not supported");
+				return EINVAL;
+			}
 			priv->np_qfirst[t] = (reg == NR_REG_SW ?
 				nma_get_nrings(na, t) : 0);
 			priv->np_qlast[t] = nma_get_nrings(na, t) + 1;
-		}
-		ND("%s %d %d", reg == NR_REG_SW ? "SW" : "NIC+SW",
-			priv->np_qfirst[NR_RX], priv->np_qlast[NR_RX]);
-		break;
-	case NR_REG_ONE_NIC:
-		if (i >= na->num_tx_rings && i >= na->num_rx_rings) {
-			D("invalid ring id %d", i);
-			return EINVAL;
-		}
-		for_rx_tx(t) {
+			ND("%s: %s %d %d", reg == NR_REG_SW ? "SW" : "NIC+SW",
+				nm_txrx2str(t),
+				priv->np_qfirst[t], priv->np_qlast[t]);
+			break;
+		case NR_REG_ONE_NIC:
+			if (i >= na->num_tx_rings && i >= na->num_rx_rings) {
+				D("invalid ring id %d", i);
+				return EINVAL;
+			}
 			/* if not enough rings, use the first one */
 			j = i;
 			if (j >= nma_get_nrings(na, t))
 				j = 0;
 			priv->np_qfirst[t] = j;
 			priv->np_qlast[t] = j + 1;
+			ND("ONE_NIC: %s %d %d", nm_txrx2str(t),
+				priv->np_qfirst[t], priv->np_qlast[t]);
+			break;
+		default:
+			D("invalid regif type %d", reg);
+			return EINVAL;
 		}
-		break;
-	default:
-		D("invalid regif type %d", reg);
-		return EINVAL;
 	}
 	priv->np_flags = (flags & ~NR_REG_MASK) | reg;
 
@@ -1776,11 +1828,12 @@ netmap_unset_ringid(struct netmap_priv_d *priv)
 }
 
 
-/* check that the rings we want to bind are not exclusively owned by a previous
- * bind.  If exclusive ownership has been requested, we also mark the rings.
+/* Set the nr_pending_mode for the requested rings.
+ * If requested, also try to get exclusive access to the rings, provided
+ * the rings we want to bind are not exclusively owned by a previous bind.
  */
 static int
-netmap_get_exclusive(struct netmap_priv_d *priv)
+netmap_krings_get(struct netmap_priv_d *priv)
 {
 	struct netmap_adapter *na = priv->np_na;
 	u_int i;
@@ -1811,16 +1864,16 @@ netmap_get_exclusive(struct netmap_priv_d *priv)
 		}
 	}
 
-	/* second round: increment usage cound and possibly
-	 * mark as exclusive
+	/* second round: increment usage count (possibly marking them
+	 * as exclusive) and set the nr_pending_mode
 	 */
-
 	for_rx_tx(t) {
 		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
 			kring = &NMR(na, t)[i];
 			kring->users++;
 			if (excl)
 				kring->nr_kflags |= NKR_EXCLUSIVE;
+	                kring->nr_pending_mode = NKR_NETMAP_ON;
 		}
 	}
 
@@ -1828,9 +1881,11 @@ netmap_get_exclusive(struct netmap_priv_d *priv)
 
 }
 
-/* undo netmap_get_ownership() */
+/* Undo netmap_krings_get(). This is done by clearing the exclusive mode
+ * if was asked on regif, and unset the nr_pending_mode if we are the
+ * last users of the involved rings. */
 static void
-netmap_rel_exclusive(struct netmap_priv_d *priv)
+netmap_krings_put(struct netmap_priv_d *priv)
 {
 	struct netmap_adapter *na = priv->np_na;
 	u_int i;
@@ -1852,6 +1907,8 @@ netmap_rel_exclusive(struct netmap_priv_d *priv)
 			if (excl)
 				kring->nr_kflags &= ~NKR_EXCLUSIVE;
 			kring->users--;
+			if (kring->users == 0)
+				kring->nr_pending_mode = NKR_NETMAP_OFF;
 		}
 	}
 }
@@ -1899,9 +1956,8 @@ netmap_rel_exclusive(struct netmap_priv_d *priv)
  * (put the adapter in netmap mode)
  *
  * 	This may be one of the following:
- * 	(XXX these should be either all *_register or all *_reg 2014-03-15)
  *
- * 	* netmap_hw_register				(hw ports)
+ * 	* netmap_hw_reg				        (hw ports)
  * 		checks that the ifp is still there, then calls
  * 		the hardware specific callback;
  *
@@ -1919,7 +1975,7 @@ netmap_rel_exclusive(struct netmap_priv_d *priv)
  *		intercept the sync callbacks of the monitored
  *		rings
  *
- *	* netmap_bwrap_register				(bwraps)
+ *	* netmap_bwrap_reg				(bwraps)
  *		cross-link the bwrap and hwna rings,
  *		forward the request to the hwna, override
  *		the hwna notify callback (to get the frames
@@ -1948,7 +2004,7 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
 	if (na->active_fds == 0) {
 		/*
 		 * If this is the first registration of the adapter,
-		 * also create the netmap rings and their in-kernel view,
+		 * create the  in-kernel view of the netmap rings,
 		 * the netmap krings.
 		 */
 
@@ -1960,39 +2016,48 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
 		if (error)
 			goto err_drop_mem;
 
-		/* create all missing netmap rings */
-		error = netmap_mem_rings_create(na);
-		if (error)
-			goto err_del_krings;
 	}
 
-	/* now the kring must exist and we can check whether some
-	 * previous bind has exclusive ownership on them
+	/* now the krings must exist and we can check whether some
+	 * previous bind has exclusive ownership on them, and set
+	 * nr_pending_mode
 	 */
-	error = netmap_get_exclusive(priv);
+	error = netmap_krings_get(priv);
 	if (error)
-		goto err_del_rings;
+		goto err_del_krings;
+
+	/* create all needed missing netmap rings */
+	error = netmap_mem_rings_create(na);
+	if (error)
+		goto err_rel_excl;
 
 	/* in all cases, create a new netmap if */
 	nifp = netmap_mem_if_new(na);
 	if (nifp == NULL) {
 		error = ENOMEM;
-		goto err_rel_excl;
+		goto err_del_rings;
 	}
 
-	na->active_fds++;
-	if (!nm_netmap_on(na)) {
-		/* Netmap not active, set the card in netmap mode
-		 * and make it use the shared buffers.
-		 */
+	if (na->active_fds == 0) {
 		/* cache the allocator info in the na */
-		netmap_mem_get_lut(na->nm_mem, &na->na_lut);
-		ND("%p->na_lut == %p", na, na->na_lut.lut);
-		error = na->nm_register(na, 1); /* mode on */
-		if (error) 
+		error = netmap_mem_get_lut(na->nm_mem, &na->na_lut);
+		if (error)
 			goto err_del_if;
+		ND("lut %p bufs %u size %u", na->na_lut.lut, na->na_lut.objtotal,
+					    na->na_lut.objsize);
 	}
 
+	if (nm_kring_pending(priv)) {
+		/* Some kring is switching mode, tell the adapter to
+		 * react on this. */
+		error = na->nm_register(na, 1);
+		if (error)
+			goto err_put_lut;
+	}
+
+	/* Commit the reference. */
+	na->active_fds++;
+
 	/*
 	 * advertise that the interface is ready by setting np_nifp.
 	 * The barrier is needed because readers (poll, *SYNC and mmap)
@@ -2003,15 +2068,15 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
 
 	return 0;
 
+err_put_lut:
+	if (na->active_fds == 0)
+		memset(&na->na_lut, 0, sizeof(na->na_lut));
 err_del_if:
-	memset(&na->na_lut, 0, sizeof(na->na_lut));
-	na->active_fds--;
 	netmap_mem_if_delete(na, nifp);
 err_rel_excl:
-	netmap_rel_exclusive(priv);
+	netmap_krings_put(priv);
 err_del_rings:
-	if (na->active_fds == 0)
-		netmap_mem_rings_delete(na);
+	netmap_mem_rings_delete(na);
 err_del_krings:
 	if (na->active_fds == 0)
 		na->nm_krings_delete(na);
@@ -2024,41 +2089,23 @@ err:
 
 
 /*
- * update kring and ring at the end of txsync.
+ * update kring and ring at the end of rxsync/txsync.
  */
 static inline void
-nm_txsync_finalize(struct netmap_kring *kring)
+nm_sync_finalize(struct netmap_kring *kring)
 {
-	/* update ring tail to what the kernel knows */
+	/*
+	 * Update ring tail to what the kernel knows
+	 * After txsync: head/rhead/hwcur might be behind cur/rcur
+	 * if no carrier.
+	 */
 	kring->ring->tail = kring->rtail = kring->nr_hwtail;
 
-	/* note, head/rhead/hwcur might be behind cur/rcur
-	 * if no carrier
-	 */
 	ND(5, "%s now hwcur %d hwtail %d head %d cur %d tail %d",
 		kring->name, kring->nr_hwcur, kring->nr_hwtail,
 		kring->rhead, kring->rcur, kring->rtail);
 }
 
-
-/*
- * update kring and ring at the end of rxsync
- */
-static inline void
-nm_rxsync_finalize(struct netmap_kring *kring)
-{
-	/* tell userspace that there might be new packets */
-	//struct netmap_ring *ring = kring->ring;
-	ND("head %d cur %d tail %d -> %d", ring->head, ring->cur, ring->tail,
-		kring->nr_hwtail);
-	kring->ring->tail = kring->rtail = kring->nr_hwtail;
-	/* make a copy of the state for next round */
-	kring->rhead = kring->ring->head;
-	kring->rcur = kring->ring->cur;
-}
-
-
-
 /*
  * ioctl(2) support for the "netmap" device.
  *
@@ -2072,21 +2119,17 @@ nm_rxsync_finalize(struct netmap_kring *kring)
  * Return 0 on success, errno otherwise.
  */
 int
-netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
-	int fflag, struct thread *td)
+netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread *td)
 {
-	struct netmap_priv_d *priv = NULL;
 	struct nmreq *nmr = (struct nmreq *) data;
 	struct netmap_adapter *na = NULL;
-	int error;
+	struct ifnet *ifp = NULL;
+	int error = 0;
 	u_int i, qfirst, qlast;
 	struct netmap_if *nifp;
 	struct netmap_kring *krings;
 	enum txrx t;
 
-	(void)dev;	/* UNUSED */
-	(void)fflag;	/* UNUSED */
-
 	if (cmd == NIOCGINFO || cmd == NIOCREGIF) {
 		/* truncate name */
 		nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0';
@@ -2101,15 +2144,6 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
 			return EINVAL;
 		}
 	}
-	CURVNET_SET(TD_TO_VNET(td));
-
-	error = devfs_get_cdevpriv((void **)&priv);
-	if (error) {
-		CURVNET_RESTORE();
-		/* XXX ENOENT should be impossible, since the priv
-		 * is now created in the open */
-		return (error == ENOENT ? ENXIO : error);
-	}
 
 	switch (cmd) {
 	case NIOCGINFO:		/* return capabilities etc */
@@ -2125,10 +2159,14 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
 			u_int memflags;
 
 			if (nmr->nr_name[0] != '\0') {
+
 				/* get a refcount */
-				error = netmap_get_na(nmr, &na, 1 /* create */);
-				if (error)
+				error = netmap_get_na(nmr, &na, &ifp, 1 /* create */);
+				if (error) {
+					na = NULL;
+					ifp = NULL;
 					break;
+				}
 				nmd = na->nm_mem; /* get memory allocator */
 			}
 
@@ -2145,8 +2183,8 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
 			nmr->nr_tx_rings = na->num_tx_rings;
 			nmr->nr_rx_slots = na->num_rx_desc;
 			nmr->nr_tx_slots = na->num_tx_desc;
-			netmap_adapter_put(na);
 		} while (0);
+		netmap_unget_na(na, ifp);
 		NMG_UNLOCK();
 		break;
 
@@ -2156,9 +2194,25 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
 		if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH
 				|| i == NETMAP_BDG_VNET_HDR
 				|| i == NETMAP_BDG_NEWIF
-				|| i == NETMAP_BDG_DELIF) {
+				|| i == NETMAP_BDG_DELIF
+				|| i == NETMAP_BDG_POLLING_ON
+				|| i == NETMAP_BDG_POLLING_OFF) {
 			error = netmap_bdg_ctl(nmr, NULL);
 			break;
+		} else if (i == NETMAP_PT_HOST_CREATE || i == NETMAP_PT_HOST_DELETE) {
+			error = ptnetmap_ctl(nmr, priv->np_na);
+			break;
+		} else if (i == NETMAP_VNET_HDR_GET) {
+			struct ifnet *ifp;
+
+			NMG_LOCK();
+			error = netmap_get_na(nmr, &na, &ifp, 0);
+			if (na && !error) {
+				nmr->nr_arg1 = na->virt_hdr_len;
+			}
+			netmap_unget_na(na, ifp);
+			NMG_UNLOCK();
+			break;
 		} else if (i != 0) {
 			D("nr_cmd must be 0 not %d", i);
 			error = EINVAL;
@@ -2169,23 +2223,32 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
 		NMG_LOCK();
 		do {
 			u_int memflags;
+			struct ifnet *ifp;
 
 			if (priv->np_nifp != NULL) {	/* thread already registered */
 				error = EBUSY;
 				break;
 			}
 			/* find the interface and a reference */
-			error = netmap_get_na(nmr, &na, 1 /* create */); /* keep reference */
+			error = netmap_get_na(nmr, &na, &ifp,
+					      1 /* create */); /* keep reference */
 			if (error)
 				break;
 			if (NETMAP_OWNED_BY_KERN(na)) {
-				netmap_adapter_put(na);
+				netmap_unget_na(na, ifp);
 				error = EBUSY;
 				break;
 			}
+
+			if (na->virt_hdr_len && !(nmr->nr_flags & NR_ACCEPT_VNET_HDR)) {
+				netmap_unget_na(na, ifp);
+				error = EIO;
+				break;
+			}
+
 			error = netmap_do_regif(priv, na, nmr->nr_ringid, nmr->nr_flags);
 			if (error) {    /* reg. failed, release priv and ref */
-				netmap_adapter_put(na);
+				netmap_unget_na(na, ifp);
 				break;
 			}
 			nifp = priv->np_nifp;
@@ -2200,7 +2263,7 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
 				&nmr->nr_arg2);
 			if (error) {
 				netmap_do_unregif(priv);
-				netmap_adapter_put(na);
+				netmap_unget_na(na, ifp);
 				break;
 			}
 			if (memflags & NETMAP_MEM_PRIVATE) {
@@ -2212,12 +2275,17 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
 			}
 
 			if (nmr->nr_arg3) {
-				D("requested %d extra buffers", nmr->nr_arg3);
+				if (netmap_verbose)
+					D("requested %d extra buffers", nmr->nr_arg3);
 				nmr->nr_arg3 = netmap_extra_alloc(na,
 					&nifp->ni_bufs_head, nmr->nr_arg3);
-				D("got %d extra buffers", nmr->nr_arg3);
+				if (netmap_verbose)
+					D("got %d extra buffers", nmr->nr_arg3);
 			}
 			nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp);
+
+			/* store ifp reference so that priv destructor may release it */
+			priv->np_ifp = ifp;
 		} while (0);
 		NMG_UNLOCK();
 		break;
@@ -2240,11 +2308,6 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
 			break;
 		}
 
-		if (!nm_netmap_on(na)) {
-			error = ENXIO;
-			break;
-		}
-
 		t = (cmd == NIOCTXSYNC ? NR_TX : NR_RX);
 		krings = NMR(na, t);
 		qfirst = priv->np_qfirst[t];
@@ -2252,31 +2315,34 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
 
 		for (i = qfirst; i < qlast; i++) {
 			struct netmap_kring *kring = krings + i;
-			if (nm_kr_tryget(kring)) {
-				error = EBUSY;
-				goto out;
+			struct netmap_ring *ring = kring->ring;
+
+			if (unlikely(nm_kr_tryget(kring, 1, &error))) {
+				error = (error ? EIO : 0);
+				continue;
 			}
+
 			if (cmd == NIOCTXSYNC) {
 				if (netmap_verbose & NM_VERB_TXSYNC)
 					D("pre txsync ring %d cur %d hwcur %d",
-					    i, kring->ring->cur,
+					    i, ring->cur,
 					    kring->nr_hwcur);
-				if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) {
+				if (nm_txsync_prologue(kring, ring) >= kring->nkr_num_slots) {
 					netmap_ring_reinit(kring);
 				} else if (kring->nm_sync(kring, NAF_FORCE_RECLAIM) == 0) {
-					nm_txsync_finalize(kring);
+					nm_sync_finalize(kring);
 				}
 				if (netmap_verbose & NM_VERB_TXSYNC)
 					D("post txsync ring %d cur %d hwcur %d",
-					    i, kring->ring->cur,
+					    i, ring->cur,
 					    kring->nr_hwcur);
 			} else {
-				if (nm_rxsync_prologue(kring) >= kring->nkr_num_slots) {
+				if (nm_rxsync_prologue(kring, ring) >= kring->nkr_num_slots) {
 					netmap_ring_reinit(kring);
 				} else if (kring->nm_sync(kring, NAF_FORCE_READ) == 0) {
-					nm_rxsync_finalize(kring);
+					nm_sync_finalize(kring);
 				}
-				microtime(&na->rx_rings[i].ring->ts);
+				microtime(&ring->ts);
 			}
 			nm_kr_put(kring);
 		}
@@ -2323,9 +2389,7 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
 		error = EOPNOTSUPP;
 #endif /* linux */
 	}
-out:
 
-	CURVNET_RESTORE();
 	return (error);
 }
 
@@ -2345,17 +2409,15 @@ out:
  * hidden argument.
  */
 int
-netmap_poll(struct cdev *dev, int events, struct thread *td)
+netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
 {
-	struct netmap_priv_d *priv = NULL;
 	struct netmap_adapter *na;
 	struct netmap_kring *kring;
+	struct netmap_ring *ring;
 	u_int i, check_all_tx, check_all_rx, want[NR_TXRX], revents = 0;
 #define want_tx want[NR_TX]
 #define want_rx want[NR_RX]
 	struct mbq q;		/* packets from hw queues to host stack */
-	void *pwait = dev;	/* linux compatibility */
-	int is_kevent = 0;
 	enum txrx t;
 
 	/*
@@ -2365,23 +2427,13 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
 	 */
 	int retry_tx = 1, retry_rx = 1;
 
-	(void)pwait;
-	mbq_init(&q);
-
-	/*
-	 * XXX kevent has curthread->tp_fop == NULL,
-	 * so devfs_get_cdevpriv() fails. We circumvent this by passing
-	 * priv as the first argument, which is also useful to avoid
-	 * the selrecord() which are not necessary in that case.
+	/* transparent mode: send_down is 1 if we have found some
+	 * packets to forward during the rx scan and we have not
+	 * sent them down to the nic yet
 	 */
-	if (devfs_get_cdevpriv((void **)&priv) != 0) {
-		is_kevent = 1;
-		if (netmap_verbose)
-			D("called from kevent");
-		priv = (struct netmap_priv_d *)dev;
-	}
-	if (priv == NULL)
-		return POLLERR;
+	int send_down = 0;
+
+	mbq_init(&q);
 
 	if (priv->np_nifp == NULL) {
 		D("No if registered");
@@ -2399,7 +2451,6 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
 	want_tx = events & (POLLOUT | POLLWRNORM);
 	want_rx = events & (POLLIN | POLLRDNORM);
 
-
 	/*
 	 * check_all_{tx|rx} are set if the card has more than one queue AND
 	 * the file descriptor is bound to all of them. If so, we sleep on
@@ -2421,6 +2472,32 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
 	 * slots available. If this fails, then lock and call the sync
 	 * routines.
 	 */
+#if 1 /* new code- call rx if any of the ring needs to release or read buffers */
+	if (want_tx) {
+		t = NR_TX;
+		for (i = priv->np_qfirst[t]; want[t] && i < priv->np_qlast[t]; i++) {
+			kring = &NMR(na, t)[i];
+			/* XXX compare ring->cur and kring->tail */
+			if (!nm_ring_empty(kring->ring)) {
+				revents |= want[t];
+				want[t] = 0;	/* also breaks the loop */
+			}
+		}
+	}
+	if (want_rx) {
+		want_rx = 0; /* look for a reason to run the handlers */
+		t = NR_RX;
+		for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
+			kring = &NMR(na, t)[i];
+			if (kring->ring->cur == kring->ring->tail /* try fetch new buffers */
+			    || kring->rhead != kring->ring->head /* release buffers */) {
+				want_rx = 1;
+			}
+		}
+		if (!want_rx)
+			revents |= events & (POLLIN | POLLRDNORM); /* we have data */
+	}
+#else /* old code */
 	for_rx_tx(t) {
 		for (i = priv->np_qfirst[t]; want[t] && i < priv->np_qlast[t]; i++) {
 			kring = &NMR(na, t)[i];
@@ -2431,6 +2508,7 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
 			}
 		}
 	}
+#endif /* old code */
 
 	/*
 	 * If we want to push packets out (priv->np_txpoll) or
@@ -2447,32 +2525,26 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
 		 * used to skip rings with no pending transmissions.
 		 */
 flush_tx:
-		for (i = priv->np_qfirst[NR_TX]; i < priv->np_qlast[NR_RX]; i++) {
+		for (i = priv->np_qfirst[NR_TX]; i < priv->np_qlast[NR_TX]; i++) {
 			int found = 0;
 
 			kring = &na->tx_rings[i];
-			if (!want_tx && kring->ring->cur == kring->nr_hwcur)
+			ring = kring->ring;
+
+			if (!send_down && !want_tx && ring->cur == kring->nr_hwcur)
 				continue;
-			/* only one thread does txsync */
-			if (nm_kr_tryget(kring)) {
-				/* either busy or stopped
-				 * XXX if the ring is stopped, sleeping would
-				 * be better. In current code, however, we only
-				 * stop the rings for brief intervals (2014-03-14)
-				 */
-				if (netmap_verbose)
-					RD(2, "%p lost race on txring %d, ok",
-					    priv, i);
+
+			if (nm_kr_tryget(kring, 1, &revents))
 				continue;
-			}
-			if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) {
+
+			if (nm_txsync_prologue(kring, ring) >= kring->nkr_num_slots) {
 				netmap_ring_reinit(kring);
 				revents |= POLLERR;
 			} else {
 				if (kring->nm_sync(kring, 0))
 					revents |= POLLERR;
 				else
-					nm_txsync_finalize(kring);
+					nm_sync_finalize(kring);
 			}
 
 			/*
@@ -2489,8 +2561,10 @@ flush_tx:
 				kring->nm_notify(kring, 0);
 			}
 		}
-		if (want_tx && retry_tx && !is_kevent) {
-			OS_selrecord(td, check_all_tx ?
+		/* if there were any packet to forward we must have handled them by now */
+		send_down = 0;
+		if (want_tx && retry_tx && sr) {
+			nm_os_selrecord(sr, check_all_tx ?
 			    &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]].si);
 			retry_tx = 0;
 			goto flush_tx;
@@ -2502,22 +2576,18 @@ flush_tx:
 	 * Do it on all rings because otherwise we starve.
 	 */
 	if (want_rx) {
-		int send_down = 0; /* transparent mode */
 		/* two rounds here for race avoidance */
 do_retry_rx:
 		for (i = priv->np_qfirst[NR_RX]; i < priv->np_qlast[NR_RX]; i++) {
 			int found = 0;
 
 			kring = &na->rx_rings[i];
+			ring = kring->ring;
 
-			if (nm_kr_tryget(kring)) {
-				if (netmap_verbose)
-					RD(2, "%p lost race on rxring %d, ok",
-					    priv, i);
+			if (unlikely(nm_kr_tryget(kring, 1, &revents)))
 				continue;
-			}
 
-			if (nm_rxsync_prologue(kring) >= kring->nkr_num_slots) {
+			if (nm_rxsync_prologue(kring, ring) >= kring->nkr_num_slots) {
 				netmap_ring_reinit(kring);
 				revents |= POLLERR;
 			}
@@ -2526,22 +2596,22 @@ do_retry_rx:
 			/*
 			 * transparent mode support: collect packets
 			 * from the rxring(s).
-			 * XXX NR_FORWARD should only be read on
-			 * physical or NIC ports
 			 */
-			if (netmap_fwd ||kring->ring->flags & NR_FORWARD) {
+			if (nm_may_forward_up(kring)) {
 				ND(10, "forwarding some buffers up %d to %d",
-				    kring->nr_hwcur, kring->ring->cur);
+				    kring->nr_hwcur, ring->cur);
 				netmap_grab_packets(kring, &q, netmap_fwd);
 			}
 
+			kring->nr_kflags &= ~NR_FORWARD;
 			if (kring->nm_sync(kring, 0))
 				revents |= POLLERR;
 			else
-				nm_rxsync_finalize(kring);
+				nm_sync_finalize(kring);
+			send_down |= (kring->nr_kflags & NR_FORWARD); /* host ring only */
 			if (netmap_no_timestamp == 0 ||
-					kring->ring->flags & NR_TIMESTAMP) {
-				microtime(&kring->ring->ts);
+					ring->flags & NR_TIMESTAMP) {
+				microtime(&ring->ts);
 			}
 			found = kring->rcur != kring->rtail;
 			nm_kr_put(kring);
@@ -2552,22 +2622,10 @@ do_retry_rx:
 			}
 		}
 
-		/* transparent mode XXX only during first pass ? */
-		if (na->na_flags & NAF_HOST_RINGS) {
-			kring = &na->rx_rings[na->num_rx_rings];
-			if (check_all_rx
-			    && (netmap_fwd || kring->ring->flags & NR_FORWARD)) {
-				/* XXX fix to use kring fields */
-				if (nm_ring_empty(kring->ring))
-					send_down = netmap_rxsync_from_host(na, td, dev);
-				if (!nm_ring_empty(kring->ring))
-					revents |= want_rx;
-			}
-		}
-
-		if (retry_rx && !is_kevent)
-			OS_selrecord(td, check_all_rx ?
+		if (retry_rx && sr) {
+			nm_os_selrecord(sr, check_all_rx ?
 			    &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si);
+		}
 		if (send_down > 0 || retry_rx) {
 			retry_rx = 0;
 			if (send_down)
@@ -2582,15 +2640,14 @@ do_retry_rx:
 	 * kring->nr_hwcur and ring->head
 	 * are passed to the other endpoint.
 	 *
-	 * In this mode we also scan the sw rxring, which in
-	 * turn passes packets up.
-	 *
-	 * XXX Transparent mode at the moment requires to bind all
+	 * Transparent mode requires to bind all
  	 * rings to a single file descriptor.
 	 */
 
-	if (q.head && na->ifp != NULL)
+	if (q.head && !nm_kr_tryget(&na->tx_rings[na->num_tx_rings], 1, &revents)) {
 		netmap_send_up(na->ifp, &q);
+		nm_kr_put(&na->tx_rings[na->num_tx_rings]);
+	}
 
 	return (revents);
 #undef want_tx
@@ -2600,8 +2657,6 @@ do_retry_rx:
 
 /*-------------------- driver support routines -------------------*/
 
-static int netmap_hw_krings_create(struct netmap_adapter *);
-
 /* default notify callback */
 static int
 netmap_notify(struct netmap_kring *kring, int flags)
@@ -2609,51 +2664,51 @@ netmap_notify(struct netmap_kring *kring, int flags)
 	struct netmap_adapter *na = kring->na;
 	enum txrx t = kring->tx;
 
-	OS_selwakeup(&kring->si, PI_NET);
+	nm_os_selwakeup(&kring->si);
 	/* optimization: avoid a wake up on the global
 	 * queue if nobody has registered for more
 	 * than one ring
 	 */
 	if (na->si_users[t] > 0)
-		OS_selwakeup(&na->si[t], PI_NET);
+		nm_os_selwakeup(&na->si[t]);
 
-	return 0;
+	return NM_IRQ_COMPLETED;
 }
 
+#if 0
+static int
+netmap_notify(struct netmap_adapter *na, u_int n_ring,
+enum txrx tx, int flags)
+{
+	if (tx == NR_TX) {
+		KeSetEvent(notes->TX_EVENT, 0, FALSE);
+	}
+	else
+	{
+		KeSetEvent(notes->RX_EVENT, 0, FALSE);
+	}
+	return 0;
+}
+#endif
 
 /* called by all routines that create netmap_adapters.
- * Attach na to the ifp (if any) and provide defaults
- * for optional callbacks. Defaults assume that we
- * are creating an hardware netmap_adapter.
+ * provide some defaults and get a reference to the
+ * memory allocator
  */
 int
 netmap_attach_common(struct netmap_adapter *na)
 {
-	struct ifnet *ifp = na->ifp;
-
 	if (na->num_tx_rings == 0 || na->num_rx_rings == 0) {
 		D("%s: invalid rings tx %d rx %d",
 			na->name, na->num_tx_rings, na->num_rx_rings);
 		return EINVAL;
 	}
-	/* ifp is NULL for virtual adapters (bwrap, non-persistent VALE ports,
-	 * pipes, monitors). For bwrap we actually have a non-null ifp for
-	 * use by the external modules, but that is set after this
-	 * function has been called.
-	 * XXX this is ugly, maybe split this function in two (2014-03-14)
-	 */
-	if (ifp != NULL) {
-		WNA(ifp) = na;
 
-	/* the following is only needed for na that use the host port.
-	 * XXX do we have something similar for linux ?
-	 */
 #ifdef __FreeBSD__
-		na->if_input = ifp->if_input; /* for netmap_send_up */
-#endif /* __FreeBSD__ */
-
-		NETMAP_SET_CAPABLE(ifp);
+	if (na->na_flags & NAF_HOST_RINGS && na->ifp) {
+		na->if_input = na->ifp->if_input; /* for netmap_send_up */
 	}
+#endif /* __FreeBSD__ */
 	if (na->nm_krings_create == NULL) {
 		/* we assume that we have been called by a driver,
 		 * since other port types all provide their own
@@ -2677,6 +2732,7 @@ netmap_attach_common(struct netmap_adapter *na)
 		 */
 		na->nm_bdg_attach = netmap_bwrap_attach;
 #endif
+
 	return 0;
 }
 
@@ -2685,9 +2741,6 @@ netmap_attach_common(struct netmap_adapter *na)
 void
 netmap_detach_common(struct netmap_adapter *na)
 {
-	if (na->ifp != NULL)
-		WNA(na->ifp) = NULL; /* XXX do we need this? */
-
 	if (na->tx_rings) { /* XXX should not happen */
 		D("freeing leftover tx_rings");
 		na->nm_krings_delete(na);
@@ -2699,31 +2752,52 @@ netmap_detach_common(struct netmap_adapter *na)
 	free(na, M_DEVBUF);
 }
 
-/* Wrapper for the register callback provided hardware drivers.
- * na->ifp == NULL means the driver module has been
+/* Wrapper for the register callback provided netmap-enabled
+ * hardware drivers.
+ * nm_iszombie(na) means that the driver module has been
  * unloaded, so we cannot call into it.
- * Note that module unloading, in our patched linux drivers,
- * happens under NMG_LOCK and after having stopped all the
- * nic rings (see netmap_detach). This provides sufficient
- * protection for the other driver-provied callbacks
- * (i.e., nm_config and nm_*xsync), that therefore don't need
- * to wrapped.
+ * nm_os_ifnet_lock() must guarantee mutual exclusion with
+ * module unloading.
  */
 static int
-netmap_hw_register(struct netmap_adapter *na, int onoff)
+netmap_hw_reg(struct netmap_adapter *na, int onoff)
 {
 	struct netmap_hw_adapter *hwna =
 		(struct netmap_hw_adapter*)na;
+	int error = 0;
+
+	nm_os_ifnet_lock();
+
+	if (nm_iszombie(na)) {
+		if (onoff) {
+			error = ENXIO;
+		} else if (na != NULL) {
+			na->na_flags &= ~NAF_NETMAP_ON;
+		}
+		goto out;
+	}
+
+	error = hwna->nm_hw_register(na, onoff);
 
-	if (na->ifp == NULL)
-		return onoff ? ENXIO : 0;
+out:
+	nm_os_ifnet_unlock();
 
-	return hwna->nm_hw_register(na, onoff);
+	return error;
+}
+
+static void
+netmap_hw_dtor(struct netmap_adapter *na)
+{
+	if (nm_iszombie(na) || na->ifp == NULL)
+		return;
+
+	WNA(na->ifp) = NULL;
 }
 
 
 /*
- * Initialize a ``netmap_adapter`` object created by driver on attach.
+ * Allocate a ``netmap_adapter`` object, and initialize it from the
+ * 'arg' passed by the driver on attach.
  * We allocate a block of memory with room for a struct netmap_adapter
  * plus two sets of N+2 struct netmap_kring (where N is the number
  * of hardware rings):
@@ -2732,29 +2806,31 @@ netmap_hw_register(struct netmap_adapter *na, int onoff)
  * kring	N+1	is only used for the selinfo for all queues. // XXX still true ?
  * Return 0 on success, ENOMEM otherwise.
  */
-int
-netmap_attach(struct netmap_adapter *arg)
+static int
+_netmap_attach(struct netmap_adapter *arg, size_t size)
 {
 	struct netmap_hw_adapter *hwna = NULL;
-	// XXX when is arg == NULL ?
-	struct ifnet *ifp = arg ? arg->ifp : NULL;
+	struct ifnet *ifp = NULL;
 
-	if (arg == NULL || ifp == NULL)
+	if (arg == NULL || arg->ifp == NULL)
 		goto fail;
-	hwna = malloc(sizeof(*hwna), M_DEVBUF, M_NOWAIT | M_ZERO);
+	ifp = arg->ifp;
+	hwna = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (hwna == NULL)
 		goto fail;
 	hwna->up = *arg;
 	hwna->up.na_flags |= NAF_HOST_RINGS | NAF_NATIVE;
 	strncpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name));
 	hwna->nm_hw_register = hwna->up.nm_register;
-	hwna->up.nm_register = netmap_hw_register;
+	hwna->up.nm_register = netmap_hw_reg;
 	if (netmap_attach_common(&hwna->up)) {
 		free(hwna, M_DEVBUF);
 		goto fail;
 	}
 	netmap_adapter_get(&hwna->up);
 
+	NM_ATTACH_NA(ifp, &hwna->up);
+
 #ifdef linux
 	if (ifp->netdev_ops) {
 		/* prepare a clone of the netdev ops */
@@ -2762,7 +2838,7 @@ netmap_attach(struct netmap_adapter *arg)
 		hwna->nm_ndo.ndo_start_xmit = ifp->netdev_ops;
 #else
 		hwna->nm_ndo = *ifp->netdev_ops;
-#endif
+#endif /* NETMAP_LINUX_HAVE_NETDEV_OPS */
 	}
 	hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
 	if (ifp->ethtool_ops) {
@@ -2771,11 +2847,14 @@ netmap_attach(struct netmap_adapter *arg)
 	hwna->nm_eto.set_ringparam = linux_netmap_set_ringparam;
 #ifdef NETMAP_LINUX_HAVE_SET_CHANNELS
 	hwna->nm_eto.set_channels = linux_netmap_set_channels;
-#endif
+#endif /* NETMAP_LINUX_HAVE_SET_CHANNELS */
 	if (arg->nm_config == NULL) {
 		hwna->up.nm_config = netmap_linux_config;
 	}
 #endif /* linux */
+	if (arg->nm_dtor == NULL) {
+		hwna->up.nm_dtor = netmap_hw_dtor;
+	}
 
 	if_printf(ifp, "netmap queues/slots: TX %d/%d, RX %d/%d\n",
 	    hwna->up.num_tx_rings, hwna->up.num_tx_desc,
@@ -2784,12 +2863,57 @@ netmap_attach(struct netmap_adapter *arg)
 
 fail:
 	D("fail, arg %p ifp %p na %p", arg, ifp, hwna);
-	if (ifp)
-		netmap_detach(ifp);
 	return (hwna ? EINVAL : ENOMEM);
 }
 
 
+int
+netmap_attach(struct netmap_adapter *arg)
+{
+	return _netmap_attach(arg, sizeof(struct netmap_hw_adapter));
+}
+
+
+#ifdef WITH_PTNETMAP_GUEST
+int
+netmap_pt_guest_attach(struct netmap_adapter *arg,
+		       void *csb,
+		       unsigned int nifp_offset,
+		       nm_pt_guest_ptctl_t ptctl)
+{
+	struct netmap_pt_guest_adapter *ptna;
+	struct ifnet *ifp = arg ? arg->ifp : NULL;
+	int error;
+
+	/* get allocator */
+	arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, ptctl);
+	if (arg->nm_mem == NULL)
+		return ENOMEM;
+	arg->na_flags |= NAF_MEM_OWNER;
+	error = _netmap_attach(arg, sizeof(struct netmap_pt_guest_adapter));
+	if (error)
+		return error;
+
+	/* get the netmap_pt_guest_adapter */
+	ptna = (struct netmap_pt_guest_adapter *) NA(ifp);
+	ptna->csb = csb;
+
+	/* Initialize a separate pass-through netmap adapter that is going to
+	 * be used by the ptnet driver only, and so never exposed to netmap
+         * applications. We only need a subset of the available fields. */
+	memset(&ptna->dr, 0, sizeof(ptna->dr));
+	ptna->dr.up.ifp = ifp;
+	ptna->dr.up.nm_mem = ptna->hwup.up.nm_mem;
+	netmap_mem_get(ptna->dr.up.nm_mem);
+        ptna->dr.up.nm_config = ptna->hwup.up.nm_config;
+
+	ptna->backend_regifs = 0;
+
+	return 0;
+}
+#endif /* WITH_PTNETMAP_GUEST */
+
+
 void
 NM_DBG(netmap_adapter_get)(struct netmap_adapter *na)
 {
@@ -2841,28 +2965,29 @@ void
 netmap_detach(struct ifnet *ifp)
 {
 	struct netmap_adapter *na = NA(ifp);
-	int skip;
 
 	if (!na)
 		return;
 
-	skip = 0;
 	NMG_LOCK();
-	netmap_disable_all_rings(ifp);
-	na->ifp = NULL;
-	na->na_flags &= ~NAF_NETMAP_ON;
+	netmap_set_all_rings(na, NM_KR_LOCKED);
+	na->na_flags |= NAF_ZOMBIE;
 	/*
 	 * if the netmap adapter is not native, somebody
 	 * changed it, so we can not release it here.
-	 * The NULL na->ifp will notify the new owner that
+	 * The NAF_ZOMBIE flag will notify the new owner that
 	 * the driver is gone.
 	 */
 	if (na->na_flags & NAF_NATIVE) {
-		skip = netmap_adapter_put(na);
+	        netmap_adapter_put(na);
 	}
-	/* give them a chance to notice */
-	if (skip == 0)
-		netmap_enable_all_rings(ifp);
+	/* give active users a chance to notice that NAF_ZOMBIE has been
+	 * turned on, so that they can stop and return an error to userspace.
+	 * Note that this becomes a NOP if there are no active users and,
+	 * therefore, the put() above has deleted the na, since now NA(ifp) is
+	 * NULL.
+	 */
+	netmap_enable_all_rings(ifp);
 	NMG_UNLOCK();
 }
 
@@ -2883,9 +3008,10 @@ int
 netmap_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct netmap_adapter *na = NA(ifp);
-	struct netmap_kring *kring;
+	struct netmap_kring *kring, *tx_kring;
 	u_int len = MBUF_LEN(m);
 	u_int error = ENOBUFS;
+	unsigned int txr;
 	struct mbq *q;
 	int space;
 
@@ -2900,6 +3026,16 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
 		goto done;
 	}
 
+	txr = MBUF_TXQ(m);
+	if (txr >= na->num_tx_rings) {
+		txr %= na->num_tx_rings;
+	}
+	tx_kring = &NMR(na, NR_TX)[txr];
+
+	if (tx_kring->nr_mode == NKR_NETMAP_OFF) {
+		return MBUF_TRANSMIT(na, ifp, m);
+	}
+
 	q = &kring->rx_queue;
 
 	// XXX reconsider long packets if we handle fragments
@@ -2909,6 +3045,11 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
 		goto done;
 	}
 
+	if (nm_os_mbuf_has_offld(m)) {
+		RD(1, "%s drop mbuf requiring offloadings", na->name);
+		goto done;
+	}
+
 	/* protect against rxsync_from_host(), netmap_sw_to_nic()
 	 * and maybe other instances of netmap_transmit (the latter
 	 * not possible on Linux).
@@ -2951,6 +3092,8 @@ done:
  * netmap_reset() is called by the driver routines when reinitializing
  * a ring. The driver is in charge of locking to protect the kring.
  * If native netmap mode is not set just return NULL.
+ * If native netmap mode is set, in particular, we have to set nr_mode to
+ * NKR_NETMAP_ON.
  */
 struct netmap_slot *
 netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
@@ -2975,13 +3118,26 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
 	if (tx == NR_TX) {
 		if (n >= na->num_tx_rings)
 			return NULL;
+
 		kring = na->tx_rings + n;
+
+		if (kring->nr_pending_mode == NKR_NETMAP_OFF) {
+			kring->nr_mode = NKR_NETMAP_OFF;
+			return NULL;
+		}
+
 		// XXX check whether we should use hwcur or rcur
 		new_hwofs = kring->nr_hwcur - new_cur;
 	} else {
 		if (n >= na->num_rx_rings)
 			return NULL;
 		kring = na->rx_rings + n;
+
+		if (kring->nr_pending_mode == NKR_NETMAP_OFF) {
+			kring->nr_mode = NKR_NETMAP_OFF;
+			return NULL;
+		}
+
 		new_hwofs = kring->nr_hwtail - new_cur;
 	}
 	lim = kring->nkr_num_slots - 1;
@@ -3018,6 +3174,7 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
 	 * We do the wakeup here, but the ring is not yet reconfigured.
 	 * However, we are under lock so there are no races.
 	 */
+	kring->nr_mode = NKR_NETMAP_ON;
 	kring->nm_notify(kring, 0);
 	return kring->ring->slot;
 }
@@ -3037,10 +3194,9 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
  * - for a nic connected to a switch, call the proper forwarding routine
  *   (see netmap_bwrap_intr_notify)
  */
-void
-netmap_common_irq(struct ifnet *ifp, u_int q, u_int *work_done)
+int
+netmap_common_irq(struct netmap_adapter *na, u_int q, u_int *work_done)
 {
-	struct netmap_adapter *na = NA(ifp);
 	struct netmap_kring *kring;
 	enum txrx t = (work_done ? NR_RX : NR_TX);
 
@@ -3051,15 +3207,20 @@ netmap_common_irq(struct ifnet *ifp, u_int q, u_int *work_done)
 	}
 
 	if (q >= nma_get_nrings(na, t))
-		return;	// not a physical queue
+		return NM_IRQ_PASS; // not a physical queue
 
 	kring = NMR(na, t) + q;
 
+	if (kring->nr_mode == NKR_NETMAP_OFF) {
+		return NM_IRQ_PASS;
+	}
+
 	if (t == NR_RX) {
 		kring->nr_kflags |= NKR_PENDINTR;	// XXX atomic ?
 		*work_done = 1; /* do not fire napi again */
 	}
-	kring->nm_notify(kring, 0);
+
+	return kring->nm_notify(kring, 0);
 }
 
 
@@ -3067,17 +3228,17 @@ netmap_common_irq(struct ifnet *ifp, u_int q, u_int *work_done)
  * Default functions to handle rx/tx interrupts from a physical device.
  * "work_done" is non-null on the RX path, NULL for the TX path.
  *
- * If the card is not in netmap mode, simply return 0,
+ * If the card is not in netmap mode, simply return NM_IRQ_PASS,
  * so that the caller proceeds with regular processing.
- * Otherwise call netmap_common_irq() and return 1.
+ * Otherwise call netmap_common_irq().
  *
  * If the card is connected to a netmap file descriptor,
  * do a selwakeup on the individual queue, plus one on the global one
  * if needed (multiqueue card _and_ there are multiqueue listeners),
- * and return 1.
+ * and return NR_IRQ_COMPLETED.
  *
  * Finally, if called on rx from an interface connected to a switch,
- * calls the proper forwarding routine, and return 1.
+ * calls the proper forwarding routine.
  */
 int
 netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_done)
@@ -3091,15 +3252,14 @@ netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_done)
 	 * nm_native_on() here.
 	 */
 	if (!nm_netmap_on(na))
-		return 0;
+		return NM_IRQ_PASS;
 
 	if (na->na_flags & NAF_SKIP_INTR) {
 		ND("use regular interrupt");
-		return 0;
+		return NM_IRQ_PASS;
 	}
 
-	netmap_common_irq(ifp, q, work_done);
-	return 1;
+	return netmap_common_irq(na, q, work_done);
 }
 
 
@@ -3120,9 +3280,11 @@ extern struct cdevsw netmap_cdevsw;
 void
 netmap_fini(void)
 {
-	netmap_uninit_bridges();
 	if (netmap_dev)
 		destroy_dev(netmap_dev);
+	/* we assume that there are no longer netmap users */
+	nm_os_ifnet_fini();
+	netmap_uninit_bridges();
 	netmap_mem_fini();
 	NMG_LOCK_DESTROY();
 	printf("netmap: unloaded module.\n");
@@ -3155,9 +3317,13 @@ netmap_init(void)
 		goto fail;
 
 #ifdef __FreeBSD__
-	nm_vi_init_index();
+	nm_os_vi_init_index();
 #endif
 
+	error = nm_os_ifnet_init();
+	if (error)
+		goto fail;
+
 	printf("netmap: loaded module\n");
 	return (0);
 fail:
diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c
index 8490ae85670b..20ea5c8f2972 100644
--- a/sys/dev/netmap/netmap_freebsd.c
+++ b/sys/dev/netmap/netmap_freebsd.c
@@ -33,8 +33,9 @@
 #include <sys/param.h>  /* defines used in kernel.h */
 #include <sys/poll.h>  /* POLLIN, POLLOUT */
 #include <sys/kernel.h> /* types used in module initialization */
-#include <sys/conf.h>	/* DEV_MODULE */
+#include <sys/conf.h>	/* DEV_MODULE_ORDERED */
 #include <sys/endian.h>
+#include <sys/syscallsubr.h> /* kern_ioctl() */
 
 #include <sys/rwlock.h>
 
@@ -50,6 +51,11 @@
 #include <sys/malloc.h>
 #include <sys/socket.h> /* sockaddrs */
 #include <sys/selinfo.h>
+#include <sys/kthread.h> /* kthread_add() */
+#include <sys/proc.h> /* PROC_LOCK() */
+#include <sys/unistd.h> /* RFNOWAIT */
+#include <sys/sched.h> /* sched_bind() */
+#include <sys/smp.h> /* mp_maxid */
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h> /* IFT_ETHER */
@@ -61,13 +67,94 @@
 
 #include <net/netmap.h>
 #include <dev/netmap/netmap_kern.h>
+#include <net/netmap_virt.h>
 #include <dev/netmap/netmap_mem2.h>
 
 
 /* ======================== FREEBSD-SPECIFIC ROUTINES ================== */
 
+void nm_os_selinfo_init(NM_SELINFO_T *si) {
+	struct mtx *m = &si->m;
+	mtx_init(m, "nm_kn_lock", NULL, MTX_DEF);
+	knlist_init_mtx(&si->si.si_note, m);
+}
+
+void
+nm_os_selinfo_uninit(NM_SELINFO_T *si)
+{
+	/* XXX kqueue(9) needed; these will mirror knlist_init. */
+	knlist_delete(&si->si.si_note, curthread, 0 /* not locked */ );
+	knlist_destroy(&si->si.si_note);
+	/* now we don't need the mutex anymore */
+	mtx_destroy(&si->m);
+}
+
+void
+nm_os_ifnet_lock(void)
+{
+	IFNET_WLOCK();
+}
+
+void
+nm_os_ifnet_unlock(void)
+{
+	IFNET_WUNLOCK();
+}
+
+static int netmap_use_count = 0;
+
+void
+nm_os_get_module(void)
+{
+	netmap_use_count++;
+}
+
+void
+nm_os_put_module(void)
+{
+	netmap_use_count--;
+}
+
+static void
+netmap_ifnet_arrival_handler(void *arg __unused, struct ifnet *ifp)
+{
+        netmap_undo_zombie(ifp);
+}
+
+static void
+netmap_ifnet_departure_handler(void *arg __unused, struct ifnet *ifp)
+{
+        netmap_make_zombie(ifp);
+}
+
+static eventhandler_tag nm_ifnet_ah_tag;
+static eventhandler_tag nm_ifnet_dh_tag;
+
+int
+nm_os_ifnet_init(void)
+{
+        nm_ifnet_ah_tag =
+                EVENTHANDLER_REGISTER(ifnet_arrival_event,
+                        netmap_ifnet_arrival_handler,
+                        NULL, EVENTHANDLER_PRI_ANY);
+        nm_ifnet_dh_tag =
+                EVENTHANDLER_REGISTER(ifnet_departure_event,
+                        netmap_ifnet_departure_handler,
+                        NULL, EVENTHANDLER_PRI_ANY);
+        return 0;
+}
+
+void
+nm_os_ifnet_fini(void)
+{
+        EVENTHANDLER_DEREGISTER(ifnet_arrival_event,
+                nm_ifnet_ah_tag);
+        EVENTHANDLER_DEREGISTER(ifnet_departure_event,
+                nm_ifnet_dh_tag);
+}
+
 rawsum_t
-nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
+nm_os_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
 {
 	/* TODO XXX please use the FreeBSD implementation for this. */
 	uint16_t *words = (uint16_t *)data;
@@ -87,7 +174,7 @@ nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
  * return value is in network byte order.
  */
 uint16_t
-nm_csum_fold(rawsum_t cur_sum)
+nm_os_csum_fold(rawsum_t cur_sum)
 {
 	/* TODO XXX please use the FreeBSD implementation for this. */
 	while (cur_sum >> 16)
@@ -96,17 +183,17 @@ nm_csum_fold(rawsum_t cur_sum)
 	return htobe16((~cur_sum) & 0xFFFF);
 }
 
-uint16_t nm_csum_ipv4(struct nm_iphdr *iph)
+uint16_t nm_os_csum_ipv4(struct nm_iphdr *iph)
 {
 #if 0
 	return in_cksum_hdr((void *)iph);
 #else
-	return nm_csum_fold(nm_csum_raw((uint8_t*)iph, sizeof(struct nm_iphdr), 0));
+	return nm_os_csum_fold(nm_os_csum_raw((uint8_t*)iph, sizeof(struct nm_iphdr), 0));
 #endif
 }
 
 void
-nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
+nm_os_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
 					size_t datalen, uint16_t *check)
 {
 #ifdef INET
@@ -118,7 +205,7 @@ nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
 	/* Compute the checksum on TCP/UDP header + payload
 	 * (includes the pseudo-header).
 	 */
-	*check = nm_csum_fold(nm_csum_raw(data, datalen, 0));
+	*check = nm_os_csum_fold(nm_os_csum_raw(data, datalen, 0));
 #else
 	static int notsupported = 0;
 	if (!notsupported) {
@@ -129,12 +216,12 @@ nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
 }
 
 void
-nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
+nm_os_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
 					size_t datalen, uint16_t *check)
 {
 #ifdef INET6
 	*check = in6_cksum_pseudo((void*)ip6h, datalen, ip6h->nexthdr, 0);
-	*check = nm_csum_fold(nm_csum_raw(data, datalen, 0));
+	*check = nm_os_csum_fold(nm_os_csum_raw(data, datalen, 0));
 #else
 	static int notsupported = 0;
 	if (!notsupported) {
@@ -144,13 +231,41 @@ nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
 #endif
 }
 
+/* on FreeBSD we send up one packet at a time */
+void *
+nm_os_send_up(struct ifnet *ifp, struct mbuf *m, struct mbuf *prev)
+{
+
+	NA(ifp)->if_input(ifp, m);
+	return NULL;
+}
+
+int
+nm_os_mbuf_has_offld(struct mbuf *m)
+{
+	return m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_SCTP |
+					 CSUM_TCP_IPV6 | CSUM_UDP_IPV6 |
+					 CSUM_SCTP_IPV6 | CSUM_TSO);
+}
+
+static void
+freebsd_generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
+{
+	struct netmap_generic_adapter *gna =
+			(struct netmap_generic_adapter *)NA(ifp);
+	int stolen = generic_rx_handler(ifp, m);
+
+	if (!stolen) {
+		gna->save_if_input(ifp, m);
+	}
+}
 
 /*
  * Intercept the rx routine in the standard device driver.
  * Second argument is non-zero to intercept, 0 to restore
  */
 int
-netmap_catch_rx(struct netmap_generic_adapter *gna, int intercept)
+nm_os_catch_rx(struct netmap_generic_adapter *gna, int intercept)
 {
 	struct netmap_adapter *na = &gna->up.up;
 	struct ifnet *ifp = na->ifp;
@@ -161,7 +276,7 @@ netmap_catch_rx(struct netmap_generic_adapter *gna, int intercept)
 			return EINVAL; /* already set */
 		}
 		gna->save_if_input = ifp->if_input;
-		ifp->if_input = generic_rx_handler;
+		ifp->if_input = freebsd_generic_rx_handler;
 	} else {
 		if (!gna->save_if_input){
 			D("cannot restore");
@@ -181,18 +296,20 @@ netmap_catch_rx(struct netmap_generic_adapter *gna, int intercept)
  * Second argument is non-zero to intercept, 0 to restore.
  * On freebsd we just intercept if_transmit.
  */
-void
-netmap_catch_tx(struct netmap_generic_adapter *gna, int enable)
+int
+nm_os_catch_tx(struct netmap_generic_adapter *gna, int intercept)
 {
 	struct netmap_adapter *na = &gna->up.up;
 	struct ifnet *ifp = netmap_generic_getifp(gna);
 
-	if (enable) {
+	if (intercept) {
 		na->if_transmit = ifp->if_transmit;
 		ifp->if_transmit = netmap_transmit;
 	} else {
 		ifp->if_transmit = na->if_transmit;
 	}
+
+	return 0;
 }
 
 
@@ -213,40 +330,44 @@ netmap_catch_tx(struct netmap_generic_adapter *gna, int enable)
  *
  */
 int
-generic_xmit_frame(struct ifnet *ifp, struct mbuf *m,
-	void *addr, u_int len, u_int ring_nr)
+nm_os_generic_xmit_frame(struct nm_os_gen_arg *a)
 {
 	int ret;
+	u_int len = a->len;
+	struct ifnet *ifp = a->ifp;
+	struct mbuf *m = a->m;
 
+#if __FreeBSD_version < 1100000
 	/*
-	 * The mbuf should be a cluster from our special pool,
-	 * so we do not need to do an m_copyback but just copy
-	 * (and eventually, just reference the netmap buffer)
+	 * Old FreeBSD versions. The mbuf has a cluster attached,
+	 * we need to copy from the cluster to the netmap buffer.
 	 */
-
-	if (GET_MBUF_REFCNT(m) != 1) {
-		D("invalid refcnt %d for %p",
-			GET_MBUF_REFCNT(m), m);
+	if (MBUF_REFCNT(m) != 1) {
+		D("invalid refcnt %d for %p", MBUF_REFCNT(m), m);
 		panic("in generic_xmit_frame");
 	}
-	// XXX the ext_size check is unnecessary if we link the netmap buf
 	if (m->m_ext.ext_size < len) {
 		RD(5, "size %d < len %d", m->m_ext.ext_size, len);
 		len = m->m_ext.ext_size;
 	}
-	if (0) { /* XXX seems to have negligible benefits */
-		m->m_ext.ext_buf = m->m_data = addr;
-	} else {
-		bcopy(addr, m->m_data, len);
-	}
+	bcopy(a->addr, m->m_data, len);
+#else  /* __FreeBSD_version >= 1100000 */
+	/* New FreeBSD versions. Link the external storage to
+	 * the netmap buffer, so that no copy is necessary. */ 
+	m->m_ext.ext_buf = m->m_data = a->addr;
+	m->m_ext.ext_size = len;
+#endif /* __FreeBSD_version >= 1100000 */
+
 	m->m_len = m->m_pkthdr.len = len;
-	// inc refcount. All ours, we could skip the atomic
-	atomic_fetchadd_int(PNT_MBUF_REFCNT(m), 1);
+
+	/* mbuf refcnt is not contended, no need to use atomic
+	 * (a memory barrier is enough). */
+	SET_MBUF_REFCNT(m, 2);
 	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
-	m->m_pkthdr.flowid = ring_nr;
+	m->m_pkthdr.flowid = a->ring_nr;
 	m->m_pkthdr.rcvif = ifp; /* used for tx notification */
 	ret = NA(ifp)->if_transmit(ifp, m);
-	return ret;
+	return ret ? -1 : 0;
 }
 
 
@@ -263,7 +384,7 @@ netmap_getna(if_t ifp)
  * way to extract the info from the ifp
  */
 int
-generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
+nm_os_generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
 {
 	D("called, in tx %d rx %d", *tx, *rx);
 	return 0;
@@ -271,16 +392,23 @@ generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
 
 
 void
-generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
+nm_os_generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
 {
 	D("called, in txq %d rxq %d", *txq, *rxq);
 	*txq = netmap_generic_rings;
 	*rxq = netmap_generic_rings;
 }
 
+void
+nm_os_generic_set_features(struct netmap_generic_adapter *gna)
+{
+
+	gna->rxsg = 1; /* Supported through m_copydata. */
+	gna->txqdisc = 0; /* Not supported. */
+}
 
 void
-netmap_mitigation_init(struct nm_generic_mit *mit, int idx, struct netmap_adapter *na)
+nm_os_mitigation_init(struct nm_generic_mit *mit, int idx, struct netmap_adapter *na)
 {
 	ND("called");
 	mit->mit_pending = 0;
@@ -290,21 +418,21 @@ netmap_mitigation_init(struct nm_generic_mit *mit, int idx, struct netmap_adapte
 
 
 void
-netmap_mitigation_start(struct nm_generic_mit *mit)
+nm_os_mitigation_start(struct nm_generic_mit *mit)
 {
 	ND("called");
 }
 
 
 void
-netmap_mitigation_restart(struct nm_generic_mit *mit)
+nm_os_mitigation_restart(struct nm_generic_mit *mit)
 {
 	ND("called");
 }
 
 
 int
-netmap_mitigation_active(struct nm_generic_mit *mit)
+nm_os_mitigation_active(struct nm_generic_mit *mit)
 {
 	ND("called");
 	return 0;
@@ -312,7 +440,7 @@ netmap_mitigation_active(struct nm_generic_mit *mit)
 
 
 void
-netmap_mitigation_cleanup(struct nm_generic_mit *mit)
+nm_os_mitigation_cleanup(struct nm_generic_mit *mit)
 {
 	ND("called");
 }
@@ -342,7 +470,7 @@ static struct {
 } nm_vi_indices;
 
 void
-nm_vi_init_index(void)
+nm_os_vi_init_index(void)
 {
 	int i;
 	for (i = 0; i < NM_VI_MAX; i++)
@@ -398,7 +526,7 @@ nm_vi_free_index(uint8_t val)
  * increment this refcount on if_attach().
  */
 int
-nm_vi_persist(const char *name, struct ifnet **ret)
+nm_os_vi_persist(const char *name, struct ifnet **ret)
 {
 	struct ifnet *ifp;
 	u_short macaddr_hi;
@@ -438,15 +566,220 @@ nm_vi_persist(const char *name, struct ifnet **ret)
 	*ret = ifp;
 	return 0;
 }
+
 /* unregister from the system and drop the final refcount */
 void
-nm_vi_detach(struct ifnet *ifp)
+nm_os_vi_detach(struct ifnet *ifp)
 {
 	nm_vi_free_index(((char *)IF_LLADDR(ifp))[5]);
 	ether_ifdetach(ifp);
 	if_free(ifp);
 }
 
+/* ======================== PTNETMAP SUPPORT ========================== */
+
+#ifdef WITH_PTNETMAP_GUEST
+#include <sys/bus.h>
+#include <sys/rman.h>
+#include <machine/bus.h>        /* bus_dmamap_* */
+#include <machine/resource.h>
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcireg.h>
+/*
+ * ptnetmap memory device (memdev) for freebsd guest,
+ * ssed to expose host netmap memory to the guest through a PCI BAR.
+ */
+
+/*
+ * ptnetmap memdev private data structure
+ */
+struct ptnetmap_memdev {
+	device_t dev;
+	struct resource *pci_io;
+	struct resource *pci_mem;
+	struct netmap_mem_d *nm_mem;
+};
+
+static int	ptn_memdev_probe(device_t);
+static int	ptn_memdev_attach(device_t);
+static int	ptn_memdev_detach(device_t);
+static int	ptn_memdev_shutdown(device_t);
+
+static device_method_t ptn_memdev_methods[] = {
+	DEVMETHOD(device_probe, ptn_memdev_probe),
+	DEVMETHOD(device_attach, ptn_memdev_attach),
+	DEVMETHOD(device_detach, ptn_memdev_detach),
+	DEVMETHOD(device_shutdown, ptn_memdev_shutdown),
+	DEVMETHOD_END
+};
+
+static driver_t ptn_memdev_driver = {
+	PTNETMAP_MEMDEV_NAME,
+	ptn_memdev_methods,
+	sizeof(struct ptnetmap_memdev),
+};
+
+/* We use (SI_ORDER_MIDDLE+1) here, see DEV_MODULE_ORDERED() invocation
+ * below. */
+static devclass_t ptnetmap_devclass;
+DRIVER_MODULE_ORDERED(ptn_memdev, pci, ptn_memdev_driver, ptnetmap_devclass,
+		      NULL, NULL, SI_ORDER_MIDDLE + 1);
+
+/*
+ * I/O port read/write wrappers.
+ * Some are not used, so we keep them commented out until needed
+ */
+#define ptn_ioread16(ptn_dev, reg)		bus_read_2((ptn_dev)->pci_io, (reg))
+#define ptn_ioread32(ptn_dev, reg)		bus_read_4((ptn_dev)->pci_io, (reg))
+#if 0
+#define ptn_ioread8(ptn_dev, reg)		bus_read_1((ptn_dev)->pci_io, (reg))
+#define ptn_iowrite8(ptn_dev, reg, val)		bus_write_1((ptn_dev)->pci_io, (reg), (val))
+#define ptn_iowrite16(ptn_dev, reg, val)	bus_write_2((ptn_dev)->pci_io, (reg), (val))
+#define ptn_iowrite32(ptn_dev, reg, val)	bus_write_4((ptn_dev)->pci_io, (reg), (val))
+#endif /* unused */
+
+/*
+ * Map host netmap memory through PCI-BAR in the guest OS,
+ * returning physical (nm_paddr) and virtual (nm_addr) addresses
+ * of the netmap memory mapped in the guest.
+ */
+int
+nm_os_pt_memdev_iomap(struct ptnetmap_memdev *ptn_dev, vm_paddr_t *nm_paddr, void **nm_addr)
+{
+	uint32_t mem_size;
+	int rid;
+
+	D("ptn_memdev_driver iomap");
+
+	rid = PCIR_BAR(PTNETMAP_MEM_PCI_BAR);
+	mem_size = ptn_ioread32(ptn_dev, PTNETMAP_IO_PCI_MEMSIZE);
+
+	/* map memory allocator */
+	ptn_dev->pci_mem = bus_alloc_resource(ptn_dev->dev, SYS_RES_MEMORY,
+			&rid, 0, ~0, mem_size, RF_ACTIVE);
+	if (ptn_dev->pci_mem == NULL) {
+		*nm_paddr = 0;
+		*nm_addr = 0;
+		return ENOMEM;
+	}
+
+	*nm_paddr = rman_get_start(ptn_dev->pci_mem);
+	*nm_addr = rman_get_virtual(ptn_dev->pci_mem);
+
+	D("=== BAR %d start %lx len %lx mem_size %x ===",
+			PTNETMAP_MEM_PCI_BAR,
+			*nm_paddr,
+			rman_get_size(ptn_dev->pci_mem),
+			mem_size);
+	return (0);
+}
+
+/* Unmap host netmap memory. */
+void
+nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *ptn_dev)
+{
+	D("ptn_memdev_driver iounmap");
+
+	if (ptn_dev->pci_mem) {
+		bus_release_resource(ptn_dev->dev, SYS_RES_MEMORY,
+			PCIR_BAR(PTNETMAP_MEM_PCI_BAR), ptn_dev->pci_mem);
+		ptn_dev->pci_mem = NULL;
+	}
+}
+
+/* Device identification routine, return BUS_PROBE_DEFAULT on success,
+ * positive on failure */
+static int
+ptn_memdev_probe(device_t dev)
+{
+	char desc[256];
+
+	if (pci_get_vendor(dev) != PTNETMAP_PCI_VENDOR_ID)
+		return (ENXIO);
+	if (pci_get_device(dev) != PTNETMAP_PCI_DEVICE_ID)
+		return (ENXIO);
+
+	snprintf(desc, sizeof(desc), "%s PCI adapter",
+			PTNETMAP_MEMDEV_NAME);
+	device_set_desc_copy(dev, desc);
+
+	return (BUS_PROBE_DEFAULT);
+}
+
+/* Device initialization routine. */
+static int
+ptn_memdev_attach(device_t dev)
+{
+	struct ptnetmap_memdev *ptn_dev;
+	int rid;
+	uint16_t mem_id;
+
+	D("ptn_memdev_driver attach");
+
+	ptn_dev = device_get_softc(dev);
+	ptn_dev->dev = dev;
+
+	pci_enable_busmaster(dev);
+
+	rid = PCIR_BAR(PTNETMAP_IO_PCI_BAR);
+	ptn_dev->pci_io = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid,
+						 RF_ACTIVE);
+	if (ptn_dev->pci_io == NULL) {
+	        device_printf(dev, "cannot map I/O space\n");
+	        return (ENXIO);
+	}
+
+	mem_id = ptn_ioread16(ptn_dev, PTNETMAP_IO_PCI_HOSTID);
+
+	/* create guest allocator */
+	ptn_dev->nm_mem = netmap_mem_pt_guest_attach(ptn_dev, mem_id);
+	if (ptn_dev->nm_mem == NULL) {
+		ptn_memdev_detach(dev);
+	        return (ENOMEM);
+	}
+	netmap_mem_get(ptn_dev->nm_mem);
+
+	D("ptn_memdev_driver probe OK - host_id: %d", mem_id);
+
+	return (0);
+}
+
+/* Device removal routine. */
+static int
+ptn_memdev_detach(device_t dev)
+{
+	struct ptnetmap_memdev *ptn_dev;
+
+	D("ptn_memdev_driver detach");
+	ptn_dev = device_get_softc(dev);
+
+	if (ptn_dev->nm_mem) {
+		netmap_mem_put(ptn_dev->nm_mem);
+		ptn_dev->nm_mem = NULL;
+	}
+	if (ptn_dev->pci_mem) {
+		bus_release_resource(dev, SYS_RES_MEMORY,
+			PCIR_BAR(PTNETMAP_MEM_PCI_BAR), ptn_dev->pci_mem);
+		ptn_dev->pci_mem = NULL;
+	}
+	if (ptn_dev->pci_io) {
+		bus_release_resource(dev, SYS_RES_IOPORT,
+			PCIR_BAR(PTNETMAP_IO_PCI_BAR), ptn_dev->pci_io);
+		ptn_dev->pci_io = NULL;
+	}
+
+	return (0);
+}
+
+static int
+ptn_memdev_shutdown(device_t dev)
+{
+	D("ptn_memdev_driver shutdown");
+	return bus_generic_shutdown(dev);
+}
+
+#endif /* WITH_PTNETMAP_GUEST */
+
 /*
  * In order to track whether pages are still mapped, we hook into
  * the standard cdev_pager and intercept the constructor and
@@ -606,7 +939,7 @@ err_unlock:
  * the device (/dev/netmap) so we cannot do anything useful.
  * To track close() on individual file descriptors we pass netmap_dtor() to
  * devfs_set_cdevpriv() on open(). The FreeBSD kernel will call the destructor
- * when the last fd pointing to the device is closed. 
+ * when the last fd pointing to the device is closed.
  *
  * Note that FreeBSD does not even munmap() on close() so we also have
  * to track mmap() ourselves, and postpone the call to
@@ -634,26 +967,275 @@ netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
 	(void)devtype;
 	(void)td;
 
-	priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
-			      M_NOWAIT | M_ZERO);
-	if (priv == NULL)
-		return ENOMEM;
-	priv->np_refs = 1;
+	NMG_LOCK();
+	priv = netmap_priv_new();
+	if (priv == NULL) {
+		error = ENOMEM;
+		goto out;
+	}
 	error = devfs_set_cdevpriv(priv, netmap_dtor);
 	if (error) {
-		free(priv, M_DEVBUF);
-	} else {
-		NMG_LOCK();
-		netmap_use_count++;
-		NMG_UNLOCK();
+		netmap_priv_delete(priv);
 	}
+out:
+	NMG_UNLOCK();
 	return error;
 }
 
+/******************** kthread wrapper ****************/
+#include <sys/sysproto.h>
+u_int
+nm_os_ncpus(void)
+{
+	return mp_maxid + 1;
+}
+
+struct nm_kthread_ctx {
+	struct thread *user_td;		/* thread user-space (kthread creator) to send ioctl */
+	/* notification to guest (interrupt) */
+	int irq_fd;		/* ioctl fd */
+	struct nm_kth_ioctl irq_ioctl;	/* ioctl arguments */
+
+	/* notification from guest */
+	void *ioevent_file; 		/* tsleep() argument */
+
+	/* worker function and parameter */
+	nm_kthread_worker_fn_t worker_fn;
+	void *worker_private;
+
+	struct nm_kthread *nmk;
+
+	/* integer to manage multiple worker contexts (e.g., RX or TX on ptnetmap) */
+	long type;
+};
+
+struct nm_kthread {
+	struct thread *worker;
+	struct mtx worker_lock;
+	uint64_t scheduled; 		/* pending wake_up request */
+	struct nm_kthread_ctx worker_ctx;
+	int run;			/* used to stop kthread */
+	int attach_user;		/* kthread attached to user_process */
+	int affinity;
+};
+
+void inline
+nm_os_kthread_wakeup_worker(struct nm_kthread *nmk)
+{
+	/*
+	 * There may be a race between FE and BE,
+	 * which call both this function, and worker kthread,
+	 * that reads nmk->scheduled.
+	 *
+	 * For us it is not important the counter value,
+	 * but simply that it has changed since the last
+	 * time the kthread saw it.
+	 */
+	mtx_lock(&nmk->worker_lock);
+	nmk->scheduled++;
+	if (nmk->worker_ctx.ioevent_file) {
+		wakeup(nmk->worker_ctx.ioevent_file);
+	}
+	mtx_unlock(&nmk->worker_lock);
+}
+
+void inline
+nm_os_kthread_send_irq(struct nm_kthread *nmk)
+{
+	struct nm_kthread_ctx *ctx = &nmk->worker_ctx;
+	int err;
+
+	if (ctx->user_td && ctx->irq_fd > 0) {
+		err = kern_ioctl(ctx->user_td, ctx->irq_fd, ctx->irq_ioctl.com, (caddr_t)&ctx->irq_ioctl.data.msix);
+		if (err) {
+			D("kern_ioctl error: %d ioctl parameters: fd %d com %lu data %p",
+				err, ctx->irq_fd, ctx->irq_ioctl.com, &ctx->irq_ioctl.data);
+		}
+	}
+}
+
+static void
+nm_kthread_worker(void *data)
+{
+	struct nm_kthread *nmk = data;
+	struct nm_kthread_ctx *ctx = &nmk->worker_ctx;
+	uint64_t old_scheduled = nmk->scheduled;
+
+	if (nmk->affinity >= 0) {
+		thread_lock(curthread);
+		sched_bind(curthread, nmk->affinity);
+		thread_unlock(curthread);
+	}
+
+	while (nmk->run) {
+		/*
+		 * check if the parent process dies
+		 * (when kthread is attached to user process)
+		 */
+		if (ctx->user_td) {
+			PROC_LOCK(curproc);
+			thread_suspend_check(0);
+			PROC_UNLOCK(curproc);
+		} else {
+			kthread_suspend_check();
+		}
+
+		/*
+		 * if ioevent_file is not defined, we don't have notification
+		 * mechanism and we continually execute worker_fn()
+		 */
+		if (!ctx->ioevent_file) {
+			ctx->worker_fn(ctx->worker_private); /* worker body */
+		} else {
+			/* checks if there is a pending notification */
+			mtx_lock(&nmk->worker_lock);
+			if (likely(nmk->scheduled != old_scheduled)) {
+				old_scheduled = nmk->scheduled;
+				mtx_unlock(&nmk->worker_lock);
+
+				ctx->worker_fn(ctx->worker_private); /* worker body */
+
+				continue;
+			} else if (nmk->run) {
+				/* wait on event with one second timeout */
+				msleep_spin(ctx->ioevent_file, &nmk->worker_lock,
+					    "nmk_ev", hz);
+				nmk->scheduled++;
+			}
+			mtx_unlock(&nmk->worker_lock);
+		}
+	}
+
+	kthread_exit();
+}
+
+static int
+nm_kthread_open_files(struct nm_kthread *nmk, struct nm_kthread_cfg *cfg)
+{
+	/* send irq through ioctl to bhyve (vmm.ko) */
+	if (cfg->event.irqfd) {
+		nmk->worker_ctx.irq_fd = cfg->event.irqfd;
+		nmk->worker_ctx.irq_ioctl = cfg->event.ioctl;
+	}
+	/* ring.ioeventfd contains the chan where do tsleep to wait events */
+	if (cfg->event.ioeventfd) {
+		nmk->worker_ctx.ioevent_file = (void *)cfg->event.ioeventfd;
+	}
+
+	return 0;
+}
+
+static void
+nm_kthread_close_files(struct nm_kthread *nmk)
+{
+	nmk->worker_ctx.irq_fd = 0;
+	nmk->worker_ctx.ioevent_file = NULL;
+}
+
+void
+nm_os_kthread_set_affinity(struct nm_kthread *nmk, int affinity)
+{
+	nmk->affinity = affinity;
+}
+
+struct nm_kthread *
+nm_os_kthread_create(struct nm_kthread_cfg *cfg)
+{
+	struct nm_kthread *nmk = NULL;
+	int error;
+
+	nmk = malloc(sizeof(*nmk),  M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (!nmk)
+		return NULL;
+
+	mtx_init(&nmk->worker_lock, "nm_kthread lock", NULL, MTX_SPIN);
+	nmk->worker_ctx.worker_fn = cfg->worker_fn;
+	nmk->worker_ctx.worker_private = cfg->worker_private;
+	nmk->worker_ctx.type = cfg->type;
+	nmk->affinity = -1;
+
+	/* attach kthread to user process (ptnetmap) */
+	nmk->attach_user = cfg->attach_user;
+
+	/* open event fd */
+	error = nm_kthread_open_files(nmk, cfg);
+	if (error)
+		goto err;
+
+	return nmk;
+err:
+	free(nmk, M_DEVBUF);
+	return NULL;
+}
+
+int
+nm_os_kthread_start(struct nm_kthread *nmk)
+{
+	struct proc *p = NULL;
+	int error = 0;
+
+	if (nmk->worker) {
+		return EBUSY;
+	}
+
+	/* check if we want to attach kthread to user process */
+	if (nmk->attach_user) {
+		nmk->worker_ctx.user_td = curthread;
+		p = curthread->td_proc;
+	}
+
+	/* enable kthread main loop */
+	nmk->run = 1;
+	/* create kthread */
+	if((error = kthread_add(nm_kthread_worker, nmk, p,
+			&nmk->worker, RFNOWAIT /* to be checked */, 0, "nm-kthread-%ld",
+			nmk->worker_ctx.type))) {
+		goto err;
+	}
+
+	D("nm_kthread started td 0x%p", nmk->worker);
+
+	return 0;
+err:
+	D("nm_kthread start failed err %d", error);
+	nmk->worker = NULL;
+	return error;
+}
+
+void
+nm_os_kthread_stop(struct nm_kthread *nmk)
+{
+	if (!nmk->worker) {
+		return;
+	}
+	/* tell to kthread to exit from main loop */
+	nmk->run = 0;
+
+	/* wake up kthread if it sleeps */
+	kthread_resume(nmk->worker);
+	nm_os_kthread_wakeup_worker(nmk);
+
+	nmk->worker = NULL;
+}
+
+void
+nm_os_kthread_delete(struct nm_kthread *nmk)
+{
+	if (!nmk)
+		return;
+	if (nmk->worker) {
+		nm_os_kthread_stop(nmk);
+	}
+
+	nm_kthread_close_files(nmk);
+
+	free(nmk, M_DEVBUF);
+}
+
 /******************** kqueue support ****************/
 
 /*
- * The OS_selwakeup also needs to issue a KNOTE_UNLOCKED.
+ * nm_os_selwakeup also needs to issue a KNOTE_UNLOCKED.
  * We use a non-zero argument to distinguish the call from the one
  * in kevent_scan() which instead also needs to run netmap_poll().
  * The knote uses a global mutex for the time being. We might
@@ -672,17 +1254,23 @@ netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
 
 
 void
-freebsd_selwakeup(struct nm_selinfo *si, int pri)
+nm_os_selwakeup(struct nm_selinfo *si)
 {
 	if (netmap_verbose)
 		D("on knote %p", &si->si.si_note);
-	selwakeuppri(&si->si, pri);
+	selwakeuppri(&si->si, PI_NET);
 	/* use a non-zero hint to tell the notification from the
 	 * call done in kqueue_scan() which uses 0
 	 */
 	KNOTE_UNLOCKED(&si->si.si_note, 0x100 /* notification */);
 }
 
+void
+nm_os_selrecord(struct thread *td, struct nm_selinfo *si)
+{
+	selrecord(td, &si->si);
+}
+
 static void
 netmap_knrdetach(struct knote *kn)
 {
@@ -728,7 +1316,7 @@ netmap_knrw(struct knote *kn, long hint, int events)
 		RD(5, "curthread changed %p %p", curthread, priv->np_td);
 		return 1;
 	} else {
-		revents = netmap_poll((void *)priv, events, curthread);
+		revents = netmap_poll(priv, events, NULL);
 		return (events & revents) ? 1 : 0;
 	}
 }
@@ -801,13 +1389,47 @@ netmap_kqfilter(struct cdev *dev, struct knote *kn)
 	return 0;
 }
 
+static int
+freebsd_netmap_poll(struct cdev *cdevi __unused, int events, struct thread *td)
+{
+	struct netmap_priv_d *priv;
+	if (devfs_get_cdevpriv((void **)&priv)) {
+		return POLLERR;
+	}
+	return netmap_poll(priv, events, td);
+}
+
+static int
+freebsd_netmap_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data,
+        int ffla __unused, struct thread *td)
+{
+	int error;
+	struct netmap_priv_d *priv;
+
+	CURVNET_SET(TD_TO_VNET(rd));
+	error = devfs_get_cdevpriv((void **)&priv);
+	if (error) {
+		/* XXX ENOENT should be impossible, since the priv
+		 * is now created in the open */
+		if (error == ENOENT)
+			error = ENXIO;
+		goto out;
+	}
+	error = netmap_ioctl(priv, cmd, data, td);
+out:
+	CURVNET_RESTORE();
+
+	return error;
+}
+
+extern struct cdevsw netmap_cdevsw; /* XXX used in netmap.c, should go elsewhere */
 struct cdevsw netmap_cdevsw = {
 	.d_version = D_VERSION,
 	.d_name = "netmap",
 	.d_open = netmap_open,
 	.d_mmap_single = netmap_mmap_single,
-	.d_ioctl = netmap_ioctl,
-	.d_poll = netmap_poll,
+	.d_ioctl = freebsd_netmap_ioctl,
+	.d_poll = freebsd_netmap_poll,
 	.d_kqfilter = netmap_kqfilter,
 	.d_close = netmap_close,
 };
@@ -852,6 +1474,24 @@ netmap_loader(__unused struct module *module, int event, __unused void *arg)
 	return (error);
 }
 
-
+#ifdef DEV_MODULE_ORDERED
+/*
+ * The netmap module contains three drivers: (i) the netmap character device
+ * driver; (ii) the ptnetmap memdev PCI device driver, (iii) the ptnet PCI
+ * device driver. The attach() routines of both (ii) and (iii) need the
+ * lock of the global allocator, and such lock is initialized in netmap_init(),
+ * which is part of (i).
+ * Therefore, we make sure that (i) is loaded before (ii) and (iii), using
+ * the 'order' parameter of driver declaration macros. For (i), we specify
+ * SI_ORDER_MIDDLE, while higher orders are used with the DRIVER_MODULE_ORDERED
+ * macros for (ii) and (iii).
+ */
+DEV_MODULE_ORDERED(netmap, netmap_loader, NULL, SI_ORDER_MIDDLE);
+#else /* !DEV_MODULE_ORDERED */
 DEV_MODULE(netmap, netmap_loader, NULL);
+#endif /* DEV_MODULE_ORDERED  */
+MODULE_DEPEND(netmap, pci, 1, 1, 1);
 MODULE_VERSION(netmap, 1);
+/* reduce conditional code */
+// linux API, use for the knlist in FreeBSD
+/* use a private mutex for the knlist */
diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c
index 85a6a9f76ea2..5cef4a29110a 100644
--- a/sys/dev/netmap/netmap_generic.c
+++ b/sys/dev/netmap/netmap_generic.c
@@ -1,5 +1,7 @@
 /*
- * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
+ * Copyright (C) 2013-2016 Vincenzo Maffione
+ * Copyright (C) 2013-2016 Luigi Rizzo
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -83,25 +85,25 @@ __FBSDID("$FreeBSD$");
 
 #define rtnl_lock()	ND("rtnl_lock called")
 #define rtnl_unlock()	ND("rtnl_unlock called")
-#define MBUF_TXQ(m)	((m)->m_pkthdr.flowid)
 #define MBUF_RXQ(m)	((m)->m_pkthdr.flowid)
 #define smp_mb()
 
 /*
  * FreeBSD mbuf allocator/deallocator in emulation mode:
- *
+ */
+#if __FreeBSD_version < 1100000
+
+/*
+ * For older versions of FreeBSD:
+ * 
  * We allocate EXT_PACKET mbuf+clusters, but need to set M_NOFREE
  * so that the destructor, if invoked, will not free the packet.
- *    In principle we should set the destructor only on demand,
+ * In principle we should set the destructor only on demand,
  * but since there might be a race we better do it on allocation.
  * As a consequence, we also need to set the destructor or we
  * would leak buffers.
  */
 
-/*
- * mbuf wrappers
- */
-
 /* mbuf destructor, also need to change the type to EXT_EXTREF,
  * add an M_NOFREE flag, and then clear the flag and
  * chain into uma_zfree(zone_pack, mf)
@@ -112,35 +114,93 @@ __FBSDID("$FreeBSD$");
 	(m)->m_ext.ext_type = EXT_EXTREF;	\
 } while (0)
 
-static void
-netmap_default_mbuf_destructor(struct mbuf *m)
+static int
+void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2)
 {
 	/* restore original mbuf */
 	m->m_ext.ext_buf = m->m_data = m->m_ext.ext_arg1;
 	m->m_ext.ext_arg1 = NULL;
 	m->m_ext.ext_type = EXT_PACKET;
 	m->m_ext.ext_free = NULL;
-	if (GET_MBUF_REFCNT(m) == 0)
+	if (MBUF_REFCNT(m) == 0)
 		SET_MBUF_REFCNT(m, 1);
 	uma_zfree(zone_pack, m);
+
+	return 0;
 }
 
 static inline struct mbuf *
-netmap_get_mbuf(int len)
+nm_os_get_mbuf(struct ifnet *ifp, int len)
 {
 	struct mbuf *m;
+
+	(void)ifp;
 	m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (m) {
-		m->m_flags |= M_NOFREE;	/* XXXNP: Almost certainly incorrect. */
+		/* m_getcl() (mb_ctor_mbuf) has an assert that checks that
+		 * M_NOFREE flag is not specified as third argument,
+		 * so we have to set M_NOFREE after m_getcl(). */
+		m->m_flags |= M_NOFREE;
 		m->m_ext.ext_arg1 = m->m_ext.ext_buf; // XXX save
-		m->m_ext.ext_free = (void *)netmap_default_mbuf_destructor;
+		m->m_ext.ext_free = (void *)void_mbuf_dtor;
 		m->m_ext.ext_type = EXT_EXTREF;
-		ND(5, "create m %p refcnt %d", m, GET_MBUF_REFCNT(m));
+		ND(5, "create m %p refcnt %d", m, MBUF_REFCNT(m));
 	}
 	return m;
 }
 
+#else /* __FreeBSD_version >= 1100000 */
+
+/*
+ * Newer versions of FreeBSD, using a straightforward scheme.
+ *
+ * We allocate mbufs with m_gethdr(), since the mbuf header is needed
+ * by the driver. We also attach a customly-provided external storage,
+ * which in this case is a netmap buffer. When calling m_extadd(), however
+ * we pass a NULL address, since the real address (and length) will be
+ * filled in by nm_os_generic_xmit_frame() right before calling
+ * if_transmit().
+ *
+ * The dtor function does nothing, however we need it since mb_free_ext()
+ * has a KASSERT(), checking that the mbuf dtor function is not NULL.
+ */
+
+#define SET_MBUF_DESTRUCTOR(m, fn)	do {		\
+	(m)->m_ext.ext_free = (void *)fn;	\
+} while (0)
+
+static void void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2) { }
+
+static inline struct mbuf *
+nm_os_get_mbuf(struct ifnet *ifp, int len)
+{
+	struct mbuf *m;
+
+	(void)ifp;
+	(void)len;
+
+	m = m_gethdr(M_NOWAIT, MT_DATA);
+	if (m == NULL) {
+		return m;
+	}
+
+	m_extadd(m, NULL /* buf */, 0 /* size */, void_mbuf_dtor,
+		 NULL, NULL, 0, EXT_NET_DRV);
+
+	return m;
+}
+
+#endif /* __FreeBSD_version >= 1100000 */
 
+#elif defined _WIN32
+
+#include "win_glue.h"
+
+#define rtnl_lock()	ND("rtnl_lock called")
+#define rtnl_unlock()	ND("rtnl_unlock called")
+#define MBUF_TXQ(m) 	0//((m)->m_pkthdr.flowid)
+#define MBUF_RXQ(m)	    0//((m)->m_pkthdr.flowid)
+#define smp_mb()		//XXX: to be correctly defined
 
 #else /* linux */
 
@@ -150,7 +210,12 @@ netmap_get_mbuf(int len)
 #include <linux/ethtool.h>      /* struct ethtool_ops, get_ringparam */
 #include <linux/hrtimer.h>
 
-//#define REG_RESET
+static inline struct mbuf *
+nm_os_get_mbuf(struct ifnet *ifp, int len)
+{
+	return alloc_skb(ifp->needed_headroom + len +
+			 ifp->needed_tailroom, GFP_ATOMIC);
+}
 
 #endif /* linux */
 
@@ -161,8 +226,21 @@ netmap_get_mbuf(int len)
 #include <dev/netmap/netmap_mem2.h>
 
 
+#define for_each_kring_n(_i, _k, _karr, _n) \
+	for (_k=_karr, _i = 0; _i < _n; (_k)++, (_i)++)
+
+#define for_each_tx_kring(_i, _k, _na) \
+            for_each_kring_n(_i, _k, (_na)->tx_rings, (_na)->num_tx_rings)
+#define for_each_tx_kring_h(_i, _k, _na) \
+            for_each_kring_n(_i, _k, (_na)->tx_rings, (_na)->num_tx_rings + 1)
+
+#define for_each_rx_kring(_i, _k, _na) \
+            for_each_kring_n(_i, _k, (_na)->rx_rings, (_na)->num_rx_rings)
+#define for_each_rx_kring_h(_i, _k, _na) \
+            for_each_kring_n(_i, _k, (_na)->rx_rings, (_na)->num_rx_rings + 1)
 
-/* ======================== usage stats =========================== */
+
+/* ======================== PERFORMANCE STATISTICS =========================== */
 
 #ifdef RATE_GENERIC
 #define IFRATE(x) x
@@ -170,6 +248,8 @@ struct rate_stats {
 	unsigned long txpkt;
 	unsigned long txsync;
 	unsigned long txirq;
+	unsigned long txrepl;
+	unsigned long txdrop;
 	unsigned long rxpkt;
 	unsigned long rxirq;
 	unsigned long rxsync;
@@ -194,6 +274,8 @@ static void rate_callback(unsigned long arg)
 	RATE_PRINTK(txpkt);
 	RATE_PRINTK(txsync);
 	RATE_PRINTK(txirq);
+	RATE_PRINTK(txrepl);
+	RATE_PRINTK(txdrop);
 	RATE_PRINTK(rxpkt);
 	RATE_PRINTK(rxsync);
 	RATE_PRINTK(rxirq);
@@ -230,94 +312,222 @@ void generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi)
  * the poller threads. Differently from netmap_rx_irq(), we check
  * only NAF_NETMAP_ON instead of NAF_NATIVE_ON to enable the irq.
  */
-static void
-netmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done)
+void
+netmap_generic_irq(struct netmap_adapter *na, u_int q, u_int *work_done)
 {
-	struct netmap_adapter *na = NA(ifp);
 	if (unlikely(!nm_netmap_on(na)))
 		return;
 
-	netmap_common_irq(ifp, q, work_done);
+	netmap_common_irq(na, q, work_done);
+#ifdef RATE_GENERIC
+	if (work_done)
+		rate_ctx.new.rxirq++;
+	else
+		rate_ctx.new.txirq++;
+#endif  /* RATE_GENERIC */
 }
 
+static int
+generic_netmap_unregister(struct netmap_adapter *na)
+{
+	struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
+	struct netmap_kring *kring = NULL;
+	int i, r;
+
+	if (na->active_fds == 0) {
+		D("Generic adapter %p goes off", na);
+		rtnl_lock();
+
+		na->na_flags &= ~NAF_NETMAP_ON;
+
+		/* Release packet steering control. */
+		nm_os_catch_tx(gna, 0);
+
+		/* Stop intercepting packets on the RX path. */
+		nm_os_catch_rx(gna, 0);
+
+		rtnl_unlock();
+	}
+
+	for_each_rx_kring_h(r, kring, na) {
+		if (nm_kring_pending_off(kring)) {
+			D("RX ring %d of generic adapter %p goes off", r, na);
+			kring->nr_mode = NKR_NETMAP_OFF;
+		}
+	}
+	for_each_tx_kring_h(r, kring, na) {
+		if (nm_kring_pending_off(kring)) {
+			kring->nr_mode = NKR_NETMAP_OFF;
+			D("TX ring %d of generic adapter %p goes off", r, na);
+		}
+	}
+
+	for_each_rx_kring(r, kring, na) {
+		/* Free the mbufs still pending in the RX queues,
+		 * that did not end up into the corresponding netmap
+		 * RX rings. */
+		mbq_safe_purge(&kring->rx_queue);
+		nm_os_mitigation_cleanup(&gna->mit[r]);
+	}
+
+	/* Decrement reference counter for the mbufs in the
+	 * TX pools. These mbufs can be still pending in drivers,
+	 * (e.g. this happens with virtio-net driver, which
+	 * does lazy reclaiming of transmitted mbufs). */
+	for_each_tx_kring(r, kring, na) {
+		/* We must remove the destructor on the TX event,
+		 * because the destructor invokes netmap code, and
+		 * the netmap module may disappear before the
+		 * TX event is consumed. */
+		mtx_lock_spin(&kring->tx_event_lock);
+		if (kring->tx_event) {
+			SET_MBUF_DESTRUCTOR(kring->tx_event, NULL);
+		}
+		kring->tx_event = NULL;
+		mtx_unlock_spin(&kring->tx_event_lock);
+	}
+
+	if (na->active_fds == 0) {
+		free(gna->mit, M_DEVBUF);
+
+		for_each_rx_kring(r, kring, na) {
+			mbq_safe_fini(&kring->rx_queue);
+		}
+
+		for_each_tx_kring(r, kring, na) {
+			mtx_destroy(&kring->tx_event_lock);
+			if (kring->tx_pool == NULL) {
+				continue;
+			}
+
+			for (i=0; i<na->num_tx_desc; i++) {
+				if (kring->tx_pool[i]) {
+					m_freem(kring->tx_pool[i]);
+				}
+			}
+			free(kring->tx_pool, M_DEVBUF);
+			kring->tx_pool = NULL;
+		}
+
+#ifdef RATE_GENERIC
+		if (--rate_ctx.refcount == 0) {
+			D("del_timer()");
+			del_timer(&rate_ctx.timer);
+		}
+#endif
+	}
+
+	return 0;
+}
 
 /* Enable/disable netmap mode for a generic network interface. */
 static int
 generic_netmap_register(struct netmap_adapter *na, int enable)
 {
 	struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
-	struct mbuf *m;
+	struct netmap_kring *kring = NULL;
 	int error;
 	int i, r;
 
-	if (!na)
+	if (!na) {
 		return EINVAL;
+	}
 
-#ifdef REG_RESET
-	error = ifp->netdev_ops->ndo_stop(ifp);
-	if (error) {
-		return error;
+	if (!enable) {
+		/* This is actually an unregif. */
+		return generic_netmap_unregister(na);
 	}
-#endif /* REG_RESET */
 
-	if (enable) { /* Enable netmap mode. */
-		/* Init the mitigation support on all the rx queues. */
+	if (na->active_fds == 0) {
+		D("Generic adapter %p goes on", na);
+		/* Do all memory allocations when (na->active_fds == 0), to
+		 * simplify error management. */
+
+		/* Allocate memory for mitigation support on all the rx queues. */
 		gna->mit = malloc(na->num_rx_rings * sizeof(struct nm_generic_mit),
-					M_DEVBUF, M_NOWAIT | M_ZERO);
+				M_DEVBUF, M_NOWAIT | M_ZERO);
 		if (!gna->mit) {
 			D("mitigation allocation failed");
 			error = ENOMEM;
 			goto out;
 		}
-		for (r=0; r<na->num_rx_rings; r++)
-			netmap_mitigation_init(&gna->mit[r], r, na);
 
-		/* Initialize the rx queue, as generic_rx_handler() can
-		 * be called as soon as netmap_catch_rx() returns.
-		 */
-		for (r=0; r<na->num_rx_rings; r++) {
-			mbq_safe_init(&na->rx_rings[r].rx_queue);
+		for_each_rx_kring(r, kring, na) {
+			/* Init mitigation support. */
+			nm_os_mitigation_init(&gna->mit[r], r, na);
+
+			/* Initialize the rx queue, as generic_rx_handler() can
+			 * be called as soon as nm_os_catch_rx() returns.
+			 */
+			mbq_safe_init(&kring->rx_queue);
 		}
 
 		/*
-		 * Preallocate packet buffers for the tx rings.
+		 * Prepare mbuf pools (parallel to the tx rings), for packet
+		 * transmission. Don't preallocate the mbufs here, it's simpler
+		 * to leave this task to txsync.
 		 */
-		for (r=0; r<na->num_tx_rings; r++)
-			na->tx_rings[r].tx_pool = NULL;
-		for (r=0; r<na->num_tx_rings; r++) {
-			na->tx_rings[r].tx_pool = malloc(na->num_tx_desc * sizeof(struct mbuf *),
-					M_DEVBUF, M_NOWAIT | M_ZERO);
-			if (!na->tx_rings[r].tx_pool) {
+		for_each_tx_kring(r, kring, na) {
+			kring->tx_pool = NULL;
+		}
+		for_each_tx_kring(r, kring, na) {
+			kring->tx_pool =
+				malloc(na->num_tx_desc * sizeof(struct mbuf *),
+				       M_DEVBUF, M_NOWAIT | M_ZERO);
+			if (!kring->tx_pool) {
 				D("tx_pool allocation failed");
 				error = ENOMEM;
 				goto free_tx_pools;
 			}
-			for (i=0; i<na->num_tx_desc; i++)
-				na->tx_rings[r].tx_pool[i] = NULL;
-			for (i=0; i<na->num_tx_desc; i++) {
-				m = netmap_get_mbuf(NETMAP_BUF_SIZE(na));
-				if (!m) {
-					D("tx_pool[%d] allocation failed", i);
-					error = ENOMEM;
-					goto free_tx_pools;
-				}
-				na->tx_rings[r].tx_pool[i] = m;
-			}
+			mtx_init(&kring->tx_event_lock, "tx_event_lock",
+				 NULL, MTX_SPIN);
 		}
+	}
+
+	for_each_rx_kring_h(r, kring, na) {
+		if (nm_kring_pending_on(kring)) {
+			D("RX ring %d of generic adapter %p goes on", r, na);
+			kring->nr_mode = NKR_NETMAP_ON;
+		}
+
+	}
+	for_each_tx_kring_h(r, kring, na) {
+		if (nm_kring_pending_on(kring)) {
+			D("TX ring %d of generic adapter %p goes on", r, na);
+			kring->nr_mode = NKR_NETMAP_ON;
+		}
+	}
+
+	for_each_tx_kring(r, kring, na) {
+		/* Initialize tx_pool and tx_event. */
+		for (i=0; i<na->num_tx_desc; i++) {
+			kring->tx_pool[i] = NULL;
+		}
+
+		kring->tx_event = NULL;
+	}
+
+	if (na->active_fds == 0) {
 		rtnl_lock();
+
 		/* Prepare to intercept incoming traffic. */
-		error = netmap_catch_rx(gna, 1);
+		error = nm_os_catch_rx(gna, 1);
 		if (error) {
-			D("netdev_rx_handler_register() failed (%d)", error);
+			D("nm_os_catch_rx(1) failed (%d)", error);
 			goto register_handler;
 		}
-		na->na_flags |= NAF_NETMAP_ON;
 
 		/* Make netmap control the packet steering. */
-		netmap_catch_tx(gna, 1);
+		error = nm_os_catch_tx(gna, 1);
+		if (error) {
+			D("nm_os_catch_tx(1) failed (%d)", error);
+			goto catch_rx;
+		}
 
 		rtnl_unlock();
 
+		na->na_flags |= NAF_NETMAP_ON;
+
 #ifdef RATE_GENERIC
 		if (rate_ctx.refcount == 0) {
 			D("setup_timer()");
@@ -329,73 +539,26 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
 		}
 		rate_ctx.refcount++;
 #endif /* RATE */
-
-	} else if (na->tx_rings[0].tx_pool) {
-		/* Disable netmap mode. We enter here only if the previous
-		   generic_netmap_register(na, 1) was successful.
-		   If it was not, na->tx_rings[0].tx_pool was set to NULL by the
-		   error handling code below. */
-		rtnl_lock();
-
-		na->na_flags &= ~NAF_NETMAP_ON;
-
-		/* Release packet steering control. */
-		netmap_catch_tx(gna, 0);
-
-		/* Do not intercept packets on the rx path. */
-		netmap_catch_rx(gna, 0);
-
-		rtnl_unlock();
-
-		/* Free the mbufs going to the netmap rings */
-		for (r=0; r<na->num_rx_rings; r++) {
-			mbq_safe_purge(&na->rx_rings[r].rx_queue);
-			mbq_safe_destroy(&na->rx_rings[r].rx_queue);
-		}
-
-		for (r=0; r<na->num_rx_rings; r++)
-			netmap_mitigation_cleanup(&gna->mit[r]);
-		free(gna->mit, M_DEVBUF);
-
-		for (r=0; r<na->num_tx_rings; r++) {
-			for (i=0; i<na->num_tx_desc; i++) {
-				m_freem(na->tx_rings[r].tx_pool[i]);
-			}
-			free(na->tx_rings[r].tx_pool, M_DEVBUF);
-		}
-
-#ifdef RATE_GENERIC
-		if (--rate_ctx.refcount == 0) {
-			D("del_timer()");
-			del_timer(&rate_ctx.timer);
-		}
-#endif
-	}
-
-#ifdef REG_RESET
-	error = ifp->netdev_ops->ndo_open(ifp);
-	if (error) {
-		goto free_tx_pools;
 	}
-#endif
 
 	return 0;
 
+	/* Here (na->active_fds == 0) holds. */
+catch_rx:
+	nm_os_catch_rx(gna, 0);
 register_handler:
 	rtnl_unlock();
 free_tx_pools:
-	for (r=0; r<na->num_tx_rings; r++) {
-		if (na->tx_rings[r].tx_pool == NULL)
+	for_each_tx_kring(r, kring, na) {
+		mtx_destroy(&kring->tx_event_lock);
+		if (kring->tx_pool == NULL) {
 			continue;
-		for (i=0; i<na->num_tx_desc; i++)
-			if (na->tx_rings[r].tx_pool[i])
-				m_freem(na->tx_rings[r].tx_pool[i]);
-		free(na->tx_rings[r].tx_pool, M_DEVBUF);
-		na->tx_rings[r].tx_pool = NULL;
+		}
+		free(kring->tx_pool, M_DEVBUF);
+		kring->tx_pool = NULL;
 	}
-	for (r=0; r<na->num_rx_rings; r++) {
-		netmap_mitigation_cleanup(&gna->mit[r]);
-		mbq_safe_destroy(&na->rx_rings[r].rx_queue);
+	for_each_rx_kring(r, kring, na) {
+		mbq_safe_fini(&kring->rx_queue);
 	}
 	free(gna->mit, M_DEVBUF);
 out:
@@ -411,13 +574,58 @@ out:
 static void
 generic_mbuf_destructor(struct mbuf *m)
 {
-	netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL);
+	struct netmap_adapter *na = NA(GEN_TX_MBUF_IFP(m));
+	struct netmap_kring *kring;
+	unsigned int r = MBUF_TXQ(m);
+	unsigned int r_orig = r;
+
+	if (unlikely(!nm_netmap_on(na) || r >= na->num_tx_rings)) {
+		D("Error: no netmap adapter on device %p",
+		  GEN_TX_MBUF_IFP(m));
+		return;
+	}
+
+	/*
+	 * First, clear the event mbuf.
+	 * In principle, the event 'm' should match the one stored
+	 * on ring 'r'. However we check it explicitely to stay
+	 * safe against lower layers (qdisc, driver, etc.) changing
+	 * MBUF_TXQ(m) under our feet. If the match is not found
+	 * on 'r', we try to see if it belongs to some other ring.
+	 */
+        for (;;) {
+		bool match = false;
+
+		kring = &na->tx_rings[r];
+		mtx_lock_spin(&kring->tx_event_lock);
+		if (kring->tx_event == m) {
+			kring->tx_event = NULL;
+			match = true;
+		}
+		mtx_unlock_spin(&kring->tx_event_lock);
+
+		if (match) {
+			if (r != r_orig) {
+				RD(1, "event %p migrated: ring %u --> %u",
+				      m, r_orig, r);
+			}
+			break;
+		}
+
+		if (++r == na->num_tx_rings) r = 0;
+
+		if (r == r_orig) {
+			RD(1, "Cannot match event %p", m);
+			return;
+		}
+	}
+
+	/* Second, wake up clients. They will reclaim the event through
+	 * txsync. */
+	netmap_generic_irq(na, r, NULL);
 #ifdef __FreeBSD__
-	if (netmap_verbose)
-		RD(5, "Tx irq (%p) queue %d index %d" , m, MBUF_TXQ(m), (int)(uintptr_t)m->m_ext.ext_arg1);
-	netmap_default_mbuf_destructor(m);
-#endif /* __FreeBSD__ */
-	IFRATE(rate_ctx.new.txirq++);
+	void_mbuf_dtor(m, NULL, NULL);
+#endif
 }
 
 extern int netmap_adaptive_io;
@@ -428,7 +636,7 @@ extern int netmap_adaptive_io;
  * nr_hwcur is the first unsent buffer.
  */
 static u_int
-generic_netmap_tx_clean(struct netmap_kring *kring)
+generic_netmap_tx_clean(struct netmap_kring *kring, int txqdisc)
 {
 	u_int const lim = kring->nkr_num_slots - 1;
 	u_int nm_i = nm_next(kring->nr_hwtail, lim);
@@ -436,20 +644,50 @@ generic_netmap_tx_clean(struct netmap_kring *kring)
 	u_int n = 0;
 	struct mbuf **tx_pool = kring->tx_pool;
 
+	ND("hwcur = %d, hwtail = %d", kring->nr_hwcur, kring->nr_hwtail);
+
 	while (nm_i != hwcur) { /* buffers not completed */
 		struct mbuf *m = tx_pool[nm_i];
 
-		if (unlikely(m == NULL)) {
-			/* this is done, try to replenish the entry */
-			tx_pool[nm_i] = m = netmap_get_mbuf(NETMAP_BUF_SIZE(kring->na));
+		if (txqdisc) {
+			if (m == NULL) {
+				/* Nothing to do, this is going
+				 * to be replenished. */
+				RD(3, "Is this happening?");
+
+			} else if (MBUF_QUEUED(m)) {
+				break; /* Not dequeued yet. */
+
+			} else if (MBUF_REFCNT(m) != 1) {
+				/* This mbuf has been dequeued but is still busy
+				 * (refcount is 2).
+				 * Leave it to the driver and replenish. */
+				m_freem(m);
+				tx_pool[nm_i] = NULL;
+			}
+
+		} else {
 			if (unlikely(m == NULL)) {
-				D("mbuf allocation failed, XXX error");
-				// XXX how do we proceed ? break ?
-				return -ENOMEM;
+				int event_consumed;
+
+				/* This slot was used to place an event. */
+				mtx_lock_spin(&kring->tx_event_lock);
+				event_consumed = (kring->tx_event == NULL);
+				mtx_unlock_spin(&kring->tx_event_lock);
+				if (!event_consumed) {
+					/* The event has not been consumed yet,
+					 * still busy in the driver. */
+					break;
+				}
+				/* The event has been consumed, we can go
+				 * ahead. */
+
+			} else if (MBUF_REFCNT(m) != 1) {
+				/* This mbuf is still busy: its refcnt is 2. */
+				break;
 			}
-		} else if (GET_MBUF_REFCNT(m) != 1) {
-			break; /* This mbuf is still busy: its refcnt is 2. */
 		}
+
 		n++;
 		nm_i = nm_next(nm_i, lim);
 #if 0 /* rate adaptation */
@@ -476,23 +714,17 @@ generic_netmap_tx_clean(struct netmap_kring *kring)
 	return n;
 }
 
-
-/*
- * We have pending packets in the driver between nr_hwtail +1 and hwcur.
- * Compute a position in the middle, to be used to generate
- * a notification.
- */
+/* Compute a slot index in the middle between inf and sup. */
 static inline u_int
-generic_tx_event_middle(struct netmap_kring *kring, u_int hwcur)
+ring_middle(u_int inf, u_int sup, u_int lim)
 {
-	u_int n = kring->nkr_num_slots;
-	u_int ntc = nm_next(kring->nr_hwtail, n-1);
+	u_int n = lim + 1;
 	u_int e;
 
-	if (hwcur >= ntc) {
-		e = (hwcur + ntc) / 2;
+	if (sup >= inf) {
+		e = (sup + inf) / 2;
 	} else { /* wrap around */
-		e = (hwcur + n + ntc) / 2;
+		e = (sup + n + inf) / 2;
 		if (e >= n) {
 			e -= n;
 		}
@@ -506,35 +738,59 @@ generic_tx_event_middle(struct netmap_kring *kring, u_int hwcur)
 	return e;
 }
 
-/*
- * We have pending packets in the driver between nr_hwtail+1 and hwcur.
- * Schedule a notification approximately in the middle of the two.
- * There is a race but this is only called within txsync which does
- * a double check.
- */
 static void
 generic_set_tx_event(struct netmap_kring *kring, u_int hwcur)
 {
+	u_int lim = kring->nkr_num_slots - 1;
 	struct mbuf *m;
 	u_int e;
+	u_int ntc = nm_next(kring->nr_hwtail, lim); /* next to clean */
 
-	if (nm_next(kring->nr_hwtail, kring->nkr_num_slots -1) == hwcur) {
+	if (ntc == hwcur) {
 		return; /* all buffers are free */
 	}
-	e = generic_tx_event_middle(kring, hwcur);
+
+	/*
+	 * We have pending packets in the driver between hwtail+1
+	 * and hwcur, and we have to chose one of these slot to
+	 * generate a notification.
+	 * There is a race but this is only called within txsync which
+	 * does a double check.
+	 */
+#if 0
+	/* Choose a slot in the middle, so that we don't risk ending
+	 * up in a situation where the client continuously wake up,
+	 * fills one or a few TX slots and go to sleep again. */
+	e = ring_middle(ntc, hwcur, lim);
+#else
+	/* Choose the first pending slot, to be safe against driver
+	 * reordering mbuf transmissions. */
+	e = ntc;
+#endif
 
 	m = kring->tx_pool[e];
-	ND(5, "Request Event at %d mbuf %p refcnt %d", e, m, m ? GET_MBUF_REFCNT(m) : -2 );
 	if (m == NULL) {
-		/* This can happen if there is already an event on the netmap
-		   slot 'e': There is nothing to do. */
+		/* An event is already in place. */
 		return;
 	}
-	kring->tx_pool[e] = NULL;
+
+	mtx_lock_spin(&kring->tx_event_lock);
+	if (kring->tx_event) {
+		/* An event is already in place. */
+		mtx_unlock_spin(&kring->tx_event_lock);
+		return;
+	}
+
 	SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor);
+	kring->tx_event = m;
+	mtx_unlock_spin(&kring->tx_event_lock);
+
+	kring->tx_pool[e] = NULL;
+
+	ND(5, "Request Event at %d mbuf %p refcnt %d", e, m, m ? MBUF_REFCNT(m) : -2 );
 
-	// XXX wmb() ?
-	/* Decrement the refcount an free it if we have the last one. */
+	/* Decrement the refcount. This will free it if we lose the race
+	 * with the driver. */
 	m_freem(m);
 	smp_mb();
 }
@@ -551,6 +807,7 @@ static int
 generic_netmap_txsync(struct netmap_kring *kring, int flags)
 {
 	struct netmap_adapter *na = kring->na;
+	struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
 	struct ifnet *ifp = na->ifp;
 	struct netmap_ring *ring = kring->ring;
 	u_int nm_i;	/* index into the netmap ring */ // j
@@ -560,8 +817,6 @@ generic_netmap_txsync(struct netmap_kring *kring, int flags)
 
 	IFRATE(rate_ctx.new.txsync++);
 
-	// TODO: handle the case of mbuf allocation failure
-
 	rmb();
 
 	/*
@@ -569,72 +824,121 @@ generic_netmap_txsync(struct netmap_kring *kring, int flags)
 	 */
 	nm_i = kring->nr_hwcur;
 	if (nm_i != head) {	/* we have new packets to send */
+		struct nm_os_gen_arg a;
+		u_int event = -1;
+
+		if (gna->txqdisc && nm_kr_txempty(kring)) {
+			/* In txqdisc mode, we ask for a delayed notification,
+			 * but only when cur == hwtail, which means that the
+			 * client is going to block. */
+			event = ring_middle(nm_i, head, lim);
+			ND(3, "Place txqdisc event (hwcur=%u,event=%u,"
+			      "head=%u,hwtail=%u)", nm_i, event, head,
+			      kring->nr_hwtail);
+		}
+
+		a.ifp = ifp;
+		a.ring_nr = ring_nr;
+		a.head = a.tail = NULL;
+
 		while (nm_i != head) {
 			struct netmap_slot *slot = &ring->slot[nm_i];
 			u_int len = slot->len;
 			void *addr = NMB(na, slot);
-
 			/* device-specific */
 			struct mbuf *m;
 			int tx_ret;
 
 			NM_CHECK_ADDR_LEN(na, addr, len);
 
-			/* Tale a mbuf from the tx pool and copy in the user packet. */
+			/* Tale a mbuf from the tx pool (replenishing the pool
+			 * entry if necessary) and copy in the user packet. */
 			m = kring->tx_pool[nm_i];
-			if (unlikely(!m)) {
-				RD(5, "This should never happen");
-				kring->tx_pool[nm_i] = m = netmap_get_mbuf(NETMAP_BUF_SIZE(na));
-				if (unlikely(m == NULL)) {
-					D("mbuf allocation failed");
+			if (unlikely(m == NULL)) {
+				kring->tx_pool[nm_i] = m =
+					nm_os_get_mbuf(ifp, NETMAP_BUF_SIZE(na));
+				if (m == NULL) {
+					RD(2, "Failed to replenish mbuf");
+					/* Here we could schedule a timer which
+					 * retries to replenish after a while,
+					 * and notifies the client when it
+					 * manages to replenish some slots. In
+					 * any case we break early to avoid
+					 * crashes. */
 					break;
 				}
+				IFRATE(rate_ctx.new.txrepl++);
 			}
-			/* XXX we should ask notifications when NS_REPORT is set,
-			 * or roughly every half frame. We can optimize this
-			 * by lazily requesting notifications only when a
-			 * transmission fails. Probably the best way is to
-			 * break on failures and set notifications when
-			 * ring->cur == ring->tail || nm_i != cur
+
+			a.m = m;
+			a.addr = addr;
+			a.len = len;
+			a.qevent = (nm_i == event);
+			/* When not in txqdisc mode, we should ask
+			 * notifications when NS_REPORT is set, or roughly
+			 * every half ring. To optimize this, we set a
+			 * notification event when the client runs out of
+			 * TX ring space, or when transmission fails. In
+			 * the latter case we also break early.
 			 */
-			tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr);
+			tx_ret = nm_os_generic_xmit_frame(&a);
 			if (unlikely(tx_ret)) {
-				ND(5, "start_xmit failed: err %d [nm_i %u, head %u, hwtail %u]",
-						tx_ret, nm_i, head, kring->nr_hwtail);
-				/*
-				 * No room for this mbuf in the device driver.
-				 * Request a notification FOR A PREVIOUS MBUF,
-				 * then call generic_netmap_tx_clean(kring) to do the
-				 * double check and see if we can free more buffers.
-				 * If there is space continue, else break;
-				 * NOTE: the double check is necessary if the problem
-				 * occurs in the txsync call after selrecord().
-				 * Also, we need some way to tell the caller that not
-				 * all buffers were queued onto the device (this was
-				 * not a problem with native netmap driver where space
-				 * is preallocated). The bridge has a similar problem
-				 * and we solve it there by dropping the excess packets.
-				 */
-				generic_set_tx_event(kring, nm_i);
-				if (generic_netmap_tx_clean(kring)) { /* space now available */
-					continue;
-				} else {
-					break;
+				if (!gna->txqdisc) {
+					/*
+					 * No room for this mbuf in the device driver.
+					 * Request a notification FOR A PREVIOUS MBUF,
+					 * then call generic_netmap_tx_clean(kring) to do the
+					 * double check and see if we can free more buffers.
+					 * If there is space continue, else break;
+					 * NOTE: the double check is necessary if the problem
+					 * occurs in the txsync call after selrecord().
+					 * Also, we need some way to tell the caller that not
+					 * all buffers were queued onto the device (this was
+					 * not a problem with native netmap driver where space
+					 * is preallocated). The bridge has a similar problem
+					 * and we solve it there by dropping the excess packets.
+					 */
+					generic_set_tx_event(kring, nm_i);
+					if (generic_netmap_tx_clean(kring, gna->txqdisc)) {
+						/* space now available */
+						continue;
+					} else {
+						break;
+					}
 				}
+
+				/* In txqdisc mode, the netmap-aware qdisc
+				 * queue has the same length as the number of
+				 * netmap slots (N). Since tail is advanced
+				 * only when packets are dequeued, qdisc
+				 * queue overrun cannot happen, so
+				 * nm_os_generic_xmit_frame() did not fail
+				 * because of that.
+				 * However, packets can be dropped because
+				 * carrier is off, or because our qdisc is
+				 * being deactivated, or possibly for other
+				 * reasons. In these cases, we just let the
+				 * packet to be dropped. */
+				IFRATE(rate_ctx.new.txdrop++);
 			}
+
 			slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
 			nm_i = nm_next(nm_i, lim);
-			IFRATE(rate_ctx.new.txpkt ++);
+			IFRATE(rate_ctx.new.txpkt++);
 		}
-
-		/* Update hwcur to the next slot to transmit. */
-		kring->nr_hwcur = nm_i; /* not head, we could break early */
+		if (a.head != NULL) {
+			a.addr = NULL;
+			nm_os_generic_xmit_frame(&a);
+		}
+		/* Update hwcur to the next slot to transmit. Here nm_i
+		 * is not necessarily head, we could break early. */
+		kring->nr_hwcur = nm_i;
 	}
 
 	/*
 	 * Second, reclaim completed buffers
 	 */
-	if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
+	if (!gna->txqdisc && (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring))) {
 		/* No more available slots? Set a notification event
 		 * on a netmap slot that will be cleaned in the future.
 		 * No doublecheck is performed, since txsync() will be
@@ -642,58 +946,74 @@ generic_netmap_txsync(struct netmap_kring *kring, int flags)
 		 */
 		generic_set_tx_event(kring, nm_i);
 	}
-	ND("tx #%d, hwtail = %d", n, kring->nr_hwtail);
 
-	generic_netmap_tx_clean(kring);
+	generic_netmap_tx_clean(kring, gna->txqdisc);
 
 	return 0;
 }
 
 
 /*
- * This handler is registered (through netmap_catch_rx())
+ * This handler is registered (through nm_os_catch_rx())
  * within the attached network interface
  * in the RX subsystem, so that every mbuf passed up by
  * the driver can be stolen to the network stack.
  * Stolen packets are put in a queue where the
  * generic_netmap_rxsync() callback can extract them.
+ * Returns 1 if the packet was stolen, 0 otherwise.
  */
-void
+int
 generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
 {
 	struct netmap_adapter *na = NA(ifp);
 	struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
+	struct netmap_kring *kring;
 	u_int work_done;
-	u_int rr = MBUF_RXQ(m); // receive ring number
+	u_int r = MBUF_RXQ(m); /* receive ring number */
 
-	if (rr >= na->num_rx_rings) {
-		rr = rr % na->num_rx_rings; // XXX expensive...
+	if (r >= na->num_rx_rings) {
+		r = r % na->num_rx_rings;
+	}
+
+	kring = &na->rx_rings[r];
+
+	if (kring->nr_mode == NKR_NETMAP_OFF) {
+		/* We must not intercept this mbuf. */
+		return 0;
 	}
 
 	/* limit the size of the queue */
-	if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) {
+	if (unlikely(!gna->rxsg && MBUF_LEN(m) > NETMAP_BUF_SIZE(na))) {
+		/* This may happen when GRO/LRO features are enabled for
+		 * the NIC driver when the generic adapter does not
+		 * support RX scatter-gather. */
+		RD(2, "Warning: driver pushed up big packet "
+				"(size=%d)", (int)MBUF_LEN(m));
+		m_freem(m);
+	} else if (unlikely(mbq_len(&kring->rx_queue) > 1024)) {
 		m_freem(m);
 	} else {
-		mbq_safe_enqueue(&na->rx_rings[rr].rx_queue, m);
+		mbq_safe_enqueue(&kring->rx_queue, m);
 	}
 
 	if (netmap_generic_mit < 32768) {
 		/* no rx mitigation, pass notification up */
-		netmap_generic_irq(na->ifp, rr, &work_done);
-		IFRATE(rate_ctx.new.rxirq++);
+		netmap_generic_irq(na, r, &work_done);
 	} else {
 		/* same as send combining, filter notification if there is a
 		 * pending timer, otherwise pass it up and start a timer.
 		 */
-		if (likely(netmap_mitigation_active(&gna->mit[rr]))) {
+		if (likely(nm_os_mitigation_active(&gna->mit[r]))) {
 			/* Record that there is some pending work. */
-			gna->mit[rr].mit_pending = 1;
+			gna->mit[r].mit_pending = 1;
 		} else {
-			netmap_generic_irq(na->ifp, rr, &work_done);
-			IFRATE(rate_ctx.new.rxirq++);
-			netmap_mitigation_start(&gna->mit[rr]);
+			netmap_generic_irq(na, r, &work_done);
+			nm_os_mitigation_start(&gna->mit[r]);
 		}
 	}
+
+	/* We have intercepted the mbuf. */
+	return 1;
 }
 
 /*
@@ -713,54 +1033,23 @@ generic_netmap_rxsync(struct netmap_kring *kring, int flags)
 	u_int const head = kring->rhead;
 	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
 
+	/* Adapter-specific variables. */
+	uint16_t slot_flags = kring->nkr_slot_flags;
+	u_int nm_buf_len = NETMAP_BUF_SIZE(na);
+	struct mbq tmpq;
+	struct mbuf *m;
+	int avail; /* in bytes */
+	int mlen;
+	int copy;
+
 	if (head > lim)
 		return netmap_ring_reinit(kring);
 
-	/*
-	 * First part: import newly received packets.
-	 */
-	if (netmap_no_pendintr || force_update) {
-		/* extract buffers from the rx queue, stop at most one
-		 * slot before nr_hwcur (stop_i)
-		 */
-		uint16_t slot_flags = kring->nkr_slot_flags;
-		u_int stop_i = nm_prev(kring->nr_hwcur, lim);
-
-		nm_i = kring->nr_hwtail; /* first empty slot in the receive ring */
-		for (n = 0; nm_i != stop_i; n++) {
-			int len;
-			void *addr = NMB(na, &ring->slot[nm_i]);
-			struct mbuf *m;
-
-			/* we only check the address here on generic rx rings */
-			if (addr == NETMAP_BUF_BASE(na)) { /* Bad buffer */
-				return netmap_ring_reinit(kring);
-			}
-			/*
-			 * Call the locked version of the function.
-			 * XXX Ideally we could grab a batch of mbufs at once
-			 * and save some locking overhead.
-			 */
-			m = mbq_safe_dequeue(&kring->rx_queue);
-			if (!m)	/* no more data */
-				break;
-			len = MBUF_LEN(m);
-			m_copydata(m, 0, len, addr);
-			ring->slot[nm_i].len = len;
-			ring->slot[nm_i].flags = slot_flags;
-			m_freem(m);
-			nm_i = nm_next(nm_i, lim);
-		}
-		if (n) {
-			kring->nr_hwtail = nm_i;
-			IFRATE(rate_ctx.new.rxpkt += n);
-		}
-		kring->nr_kflags &= ~NKR_PENDINTR;
-	}
+	IFRATE(rate_ctx.new.rxsync++);
 
-	// XXX should we invert the order ?
 	/*
-	 * Second part: skip past packets that userspace has released.
+	 * First part: skip past packets that userspace has released.
+	 * This can possibly make room for the second part.
 	 */
 	nm_i = kring->nr_hwcur;
 	if (nm_i != head) {
@@ -773,7 +1062,106 @@ generic_netmap_rxsync(struct netmap_kring *kring, int flags)
 		}
 		kring->nr_hwcur = head;
 	}
-	IFRATE(rate_ctx.new.rxsync++);
+
+	/*
+	 * Second part: import newly received packets.
+	 */
+	if (!netmap_no_pendintr && !force_update) {
+		return 0;
+	}
+
+	nm_i = kring->nr_hwtail; /* First empty slot in the receive ring. */
+
+	/* Compute the available space (in bytes) in this netmap ring.
+	 * The first slot that is not considered in is the one before
+	 * nr_hwcur. */
+
+	avail = nm_prev(kring->nr_hwcur, lim) - nm_i;
+	if (avail < 0)
+		avail += lim + 1;
+	avail *= nm_buf_len;
+
+	/* First pass: While holding the lock on the RX mbuf queue,
+	 * extract as many mbufs as they fit the available space,
+	 * and put them in a temporary queue.
+	 * To avoid performing a per-mbuf division (mlen / nm_buf_len) to
+	 * to update avail, we do the update in a while loop that we
+	 * also use to set the RX slots, but without performing the copy. */
+	mbq_init(&tmpq);
+	mbq_lock(&kring->rx_queue);
+	for (n = 0;; n++) {
+		m = mbq_peek(&kring->rx_queue);
+		if (!m) {
+			/* No more packets from the driver. */
+			break;
+		}
+
+		mlen = MBUF_LEN(m);
+		if (mlen > avail) {
+			/* No more space in the ring. */
+			break;
+		}
+
+		mbq_dequeue(&kring->rx_queue);
+
+		while (mlen) {
+			copy = nm_buf_len;
+			if (mlen < copy) {
+				copy = mlen;
+			}
+			mlen -= copy;
+			avail -= nm_buf_len;
+
+			ring->slot[nm_i].len = copy;
+			ring->slot[nm_i].flags = slot_flags | (mlen ? NS_MOREFRAG : 0);
+			nm_i = nm_next(nm_i, lim);
+		}
+
+		mbq_enqueue(&tmpq, m);
+	}
+	mbq_unlock(&kring->rx_queue);
+
+	/* Second pass: Drain the temporary queue, going over the used RX slots,
+	 * and perform the copy out of the RX queue lock. */
+	nm_i = kring->nr_hwtail;
+
+	for (;;) {
+		void *nmaddr;
+		int ofs = 0;
+		int morefrag;
+
+		m = mbq_dequeue(&tmpq);
+		if (!m)	{
+			break;
+		}
+
+		do {
+			nmaddr = NMB(na, &ring->slot[nm_i]);
+			/* We only check the address here on generic rx rings. */
+			if (nmaddr == NETMAP_BUF_BASE(na)) { /* Bad buffer */
+				m_freem(m);
+				mbq_purge(&tmpq);
+				mbq_fini(&tmpq);
+				return netmap_ring_reinit(kring);
+			}
+
+			copy = ring->slot[nm_i].len;
+			m_copydata(m, ofs, copy, nmaddr);
+			ofs += copy;
+			morefrag = ring->slot[nm_i].flags & NS_MOREFRAG;
+			nm_i = nm_next(nm_i, lim);
+		} while (morefrag);
+
+		m_freem(m);
+	}
+
+	mbq_fini(&tmpq);
+
+	if (n) {
+		kring->nr_hwtail = nm_i;
+		IFRATE(rate_ctx.new.rxpkt += n);
+	}
+	kring->nr_kflags &= ~NKR_PENDINTR;
 
 	return 0;
 }
@@ -787,9 +1175,8 @@ generic_netmap_dtor(struct netmap_adapter *na)
 
 	if (prev_na != NULL) {
 		D("Released generic NA %p", gna);
-		if_rele(ifp);
 		netmap_adapter_put(prev_na);
-		if (na->ifp == NULL) {
+		if (nm_iszombie(na)) {
 		        /*
 		         * The driver has been removed without releasing
 		         * the reference so we need to do it here.
@@ -797,9 +1184,13 @@ generic_netmap_dtor(struct netmap_adapter *na)
 		        netmap_adapter_put(prev_na);
 		}
 	}
-	WNA(ifp) = prev_na;
-	D("Restored native NA %p", prev_na);
+	NM_ATTACH_NA(ifp, prev_na);
+	/*
+	 * netmap_detach_common(), that it's called after this function,
+	 * overrides WNA(ifp) if na->ifp is not NULL.
+	 */
 	na->ifp = NULL;
+	D("Restored native NA %p", prev_na);
 }
 
 /*
@@ -823,7 +1214,7 @@ generic_netmap_attach(struct ifnet *ifp)
 
 	num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */
 
-	generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); /* ignore errors */
+	nm_os_generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); /* ignore errors */
 	ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc);
 	if (num_tx_desc == 0 || num_rx_desc == 0) {
 		D("Device has no hw slots (tx %u, rx %u)", num_tx_desc, num_rx_desc);
@@ -855,12 +1246,23 @@ generic_netmap_attach(struct ifnet *ifp)
 	ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)",
 			ifp->num_rx_queues, ifp->real_num_rx_queues);
 
-	generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings);
+	nm_os_generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings);
 
 	retval = netmap_attach_common(na);
 	if (retval) {
 		free(gna, M_DEVBUF);
+		return retval;
 	}
 
+	gna->prev = NA(ifp); /* save old na */
+	if (gna->prev != NULL) {
+		netmap_adapter_get(gna->prev);
+	}
+	NM_ATTACH_NA(ifp, na);
+
+	nm_os_generic_set_features(gna);
+
+	D("Created generic NA %p (prev %p)", gna, gna->prev);
+
 	return retval;
 }
diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h
index 4aead85285fd..de21f29585e0 100644
--- a/sys/dev/netmap/netmap_kern.h
+++ b/sys/dev/netmap/netmap_kern.h
@@ -1,6 +1,7 @@
 /*
- * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
- * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
+ * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo
+ * Copyright (C) 2013-2016 Universita` di Pisa
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -48,24 +49,34 @@
 #if defined(CONFIG_NETMAP_GENERIC)
 #define WITH_GENERIC
 #endif
-#if defined(CONFIG_NETMAP_V1000)
-#define WITH_V1000
+#if defined(CONFIG_NETMAP_PTNETMAP_GUEST)
+#define WITH_PTNETMAP_GUEST
+#endif
+#if defined(CONFIG_NETMAP_PTNETMAP_HOST)
+#define WITH_PTNETMAP_HOST
 #endif
 
-#else /* not linux */
+#elif defined (_WIN32)
+#define WITH_VALE	// comment out to disable VALE support
+#define WITH_PIPES
+#define WITH_MONITOR
+#define WITH_GENERIC
 
+#else	/* neither linux nor windows */
 #define WITH_VALE	// comment out to disable VALE support
 #define WITH_PIPES
 #define WITH_MONITOR
 #define WITH_GENERIC
+#define WITH_PTNETMAP_HOST	/* ptnetmap host support */
+#define WITH_PTNETMAP_GUEST	/* ptnetmap guest support */
 
 #endif
 
 #if defined(__FreeBSD__)
-#include <sys/selinfo.h>
 
 #define likely(x)	__builtin_expect((long)!!(x), 1L)
 #define unlikely(x)	__builtin_expect((long)!!(x), 0L)
+#define __user
 
 #define	NM_LOCK_T	struct mtx	/* low level spinlock, used to protect queues */
 
@@ -77,9 +88,11 @@
 #define NM_MTX_ASSERT(m)	sx_assert(&(m), SA_XLOCKED)
 
 #define	NM_SELINFO_T	struct nm_selinfo
+#define NM_SELRECORD_T	struct thread
 #define	MBUF_LEN(m)	((m)->m_pkthdr.len)
-#define	MBUF_IFP(m)	((m)->m_pkthdr.rcvif)
-#define	NM_SEND_UP(ifp, m)	((NA(ifp))->if_input)(ifp, m)
+#define MBUF_TXQ(m)	((m)->m_pkthdr.flowid)
+#define MBUF_TRANSMIT(na, ifp, m)	((na)->if_transmit(ifp, m))
+#define	GEN_TX_MBUF_IFP(m)	((m)->m_pkthdr.rcvif)
 
 #define NM_ATOMIC_T	volatile int	// XXX ?
 /* atomic operations */
@@ -98,23 +111,20 @@ struct netmap_adapter *netmap_getna(if_t ifp);
 #endif
 
 #if __FreeBSD_version >= 1100027
-#define GET_MBUF_REFCNT(m)      ((m)->m_ext.ext_cnt ? *((m)->m_ext.ext_cnt) : -1)
-#define SET_MBUF_REFCNT(m, x)   *((m)->m_ext.ext_cnt) = x
-#define PNT_MBUF_REFCNT(m)      ((m)->m_ext.ext_cnt)
+#define MBUF_REFCNT(m)		((m)->m_ext.ext_count)
+#define SET_MBUF_REFCNT(m, x)   (m)->m_ext.ext_count = x
 #else
-#define GET_MBUF_REFCNT(m)      ((m)->m_ext.ref_cnt ? *((m)->m_ext.ref_cnt) : -1)
+#define MBUF_REFCNT(m)		((m)->m_ext.ref_cnt ? *((m)->m_ext.ref_cnt) : -1)
 #define SET_MBUF_REFCNT(m, x)   *((m)->m_ext.ref_cnt) = x
-#define PNT_MBUF_REFCNT(m)      ((m)->m_ext.ref_cnt)
 #endif
 
-MALLOC_DECLARE(M_NETMAP);
+#define MBUF_QUEUED(m)		1
 
 struct nm_selinfo {
 	struct selinfo si;
 	struct mtx m;
 };
 
-void freebsd_selwakeup(struct nm_selinfo *si, int pri);
 
 // XXX linux struct, not used in FreeBSD
 struct net_device_ops {
@@ -131,12 +141,16 @@ struct hrtimer {
 #define	NM_LOCK_T	safe_spinlock_t	// see bsd_glue.h
 #define	NM_SELINFO_T	wait_queue_head_t
 #define	MBUF_LEN(m)	((m)->len)
-#define	MBUF_IFP(m)	((m)->dev)
-#define	NM_SEND_UP(ifp, m)  \
-                        do { \
-                            m->priority = NM_MAGIC_PRIORITY_RX; \
-                            netif_rx(m); \
-                        } while (0)
+#define MBUF_TRANSMIT(na, ifp, m)							\
+	({										\
+		/* Avoid infinite recursion with generic. */				\
+		m->priority = NM_MAGIC_PRIORITY_TX;					\
+		(((struct net_device_ops *)(na)->if_transmit)->ndo_start_xmit(m, ifp));	\
+		0;									\
+	})
+
+/* See explanation in nm_os_generic_xmit_frame. */
+#define	GEN_TX_MBUF_IFP(m)	((struct ifnet *)skb_shinfo(m)->destructor_arg)
 
 #define NM_ATOMIC_T	volatile long unsigned int
 
@@ -159,7 +173,51 @@ struct hrtimer {
 #define	NM_LOCK_T	IOLock *
 #define	NM_SELINFO_T	struct selinfo
 #define	MBUF_LEN(m)	((m)->m_pkthdr.len)
-#define	NM_SEND_UP(ifp, m)	((ifp)->if_input)(ifp, m)
+
+#elif defined (_WIN32)
+#include "../../../WINDOWS/win_glue.h"
+
+#define NM_SELRECORD_T		IO_STACK_LOCATION
+#define NM_SELINFO_T		win_SELINFO		// see win_glue.h
+#define NM_LOCK_T		win_spinlock_t	// see win_glue.h
+#define NM_MTX_T		KGUARDED_MUTEX	/* OS-specific mutex (sleepable) */
+
+#define NM_MTX_INIT(m)		KeInitializeGuardedMutex(&m);
+#define NM_MTX_DESTROY(m)	do { (void)(m); } while (0)
+#define NM_MTX_LOCK(m)		KeAcquireGuardedMutex(&(m))
+#define NM_MTX_UNLOCK(m)	KeReleaseGuardedMutex(&(m))
+#define NM_MTX_ASSERT(m)	assert(&m.Count>0)
+
+//These linknames are for the NDIS driver
+#define NETMAP_NDIS_LINKNAME_STRING             L"\\DosDevices\\NMAPNDIS"
+#define NETMAP_NDIS_NTDEVICE_STRING             L"\\Device\\NMAPNDIS"
+
+//Definition of internal driver-to-driver ioctl codes
+#define NETMAP_KERNEL_XCHANGE_POINTERS		_IO('i', 180)
+#define NETMAP_KERNEL_SEND_SHUTDOWN_SIGNAL	_IO_direct('i', 195)
+
+//Empty data structures are not permitted by MSVC compiler
+//XXX_ale, try to solve this problem
+struct net_device_ops{
+	char data[1];
+};
+typedef struct ethtool_ops{
+	char data[1];
+};
+typedef struct hrtimer{
+	KTIMER timer;
+	BOOLEAN active;
+	KDPC deferred_proc;
+};
+
+/* MSVC does not have likely/unlikely support */
+#ifdef _MSC_VER
+#define likely(x)	(x)
+#define unlikely(x)	(x)
+#else
+#define likely(x)	__builtin_expect((long)!!(x), 1L)
+#define unlikely(x)	__builtin_expect((long)!!(x), 0L)
+#endif //_MSC_VER
 
 #else
 
@@ -167,6 +225,13 @@ struct hrtimer {
 
 #endif /* end - platform-specific code */
 
+#ifndef _WIN32 /* support for emulated sysctl */
+#define SYSBEGIN(x)
+#define SYSEND
+#endif /* _WIN32 */
+
+#define NM_ACCESS_ONCE(x)	(*(volatile __typeof__(x) *)&(x))
+
 #define	NMG_LOCK_T		NM_MTX_T
 #define	NMG_LOCK_INIT()		NM_MTX_INIT(netmap_global_lock)
 #define	NMG_LOCK_DESTROY()	NM_MTX_DESTROY(netmap_global_lock)
@@ -201,8 +266,36 @@ struct nm_bdg_fwd;
 struct nm_bridge;
 struct netmap_priv_d;
 
+/* os-specific NM_SELINFO_T initialzation/destruction functions */
+void nm_os_selinfo_init(NM_SELINFO_T *);
+void nm_os_selinfo_uninit(NM_SELINFO_T *);
+
 const char *nm_dump_buf(char *p, int len, int lim, char *dst);
 
+void nm_os_selwakeup(NM_SELINFO_T *si);
+void nm_os_selrecord(NM_SELRECORD_T *sr, NM_SELINFO_T *si);
+
+int nm_os_ifnet_init(void);
+void nm_os_ifnet_fini(void);
+void nm_os_ifnet_lock(void);
+void nm_os_ifnet_unlock(void);
+
+void nm_os_get_module(void);
+void nm_os_put_module(void);
+
+void netmap_make_zombie(struct ifnet *);
+void netmap_undo_zombie(struct ifnet *);
+
+/* passes a packet up to the host stack.
+ * If the packet is sent (or dropped) immediately it returns NULL,
+ * otherwise it links the packet to prev and returns m.
+ * In this case, a final call with m=NULL and prev != NULL will send up
+ * the entire chain to the host stack.
+ */
+void *nm_os_send_up(struct ifnet *, struct mbuf *m, struct mbuf *prev);
+
+int nm_os_mbuf_has_offld(struct mbuf *m);
+
 #include "netmap_mbq.h"
 
 extern NMG_LOCK_T	netmap_global_lock;
@@ -299,6 +392,19 @@ struct netmap_kring {
 	uint32_t	nr_kflags;	/* private driver flags */
 #define NKR_PENDINTR	0x1		// Pending interrupt.
 #define NKR_EXCLUSIVE	0x2		/* exclusive binding */
+#define NKR_FORWARD	0x4		/* (host ring only) there are
+					   packets to forward
+					 */
+#define NKR_NEEDRING	0x8		/* ring needed even if users==0
+					 * (used internally by pipes and
+					 *  by ptnetmap host ports)
+					 */
+
+	uint32_t	nr_mode;
+	uint32_t	nr_pending_mode;
+#define NKR_NETMAP_OFF	0x0
+#define NKR_NETMAP_ON	0x1
+
 	uint32_t	nkr_num_slots;
 
 	/*
@@ -344,13 +450,14 @@ struct netmap_kring {
 	 * store incoming mbufs in a queue that is drained by
 	 * a rxsync.
 	 */
-	struct mbuf **tx_pool;
-	// u_int nr_ntc;		/* Emulation of a next-to-clean RX ring pointer. */
-	struct mbq rx_queue;            /* intercepted rx mbufs. */
+	struct mbuf	**tx_pool;
+	struct mbuf	*tx_event;	/* TX event used as a notification */
+	NM_LOCK_T	tx_event_lock;	/* protects the tx_event mbuf */
+	struct mbq	rx_queue;       /* intercepted rx mbufs. */
 
 	uint32_t	users;		/* existing bindings for this ring */
 
-	uint32_t	ring_id;	/* debugging */
+	uint32_t	ring_id;	/* kring identifier */
 	enum txrx	tx;		/* kind of ring (tx or rx) */
 	char name[64];			/* diagnostic */
 
@@ -372,9 +479,6 @@ struct netmap_kring {
 	struct netmap_kring *pipe;	/* if this is a pipe ring,
 					 * pointer to the other end
 					 */
-	struct netmap_ring *save_ring;	/* pointer to hidden rings
-       					 * (see netmap_pipe.c for details)
-					 */
 #endif /* WITH_PIPES */
 
 #ifdef WITH_VALE
@@ -397,8 +501,28 @@ struct netmap_kring {
 	uint32_t mon_tail;  /* last seen slot on rx */
 	uint32_t mon_pos;   /* index of this ring in the monitored ring array */
 #endif
-} __attribute__((__aligned__(64)));
+}
+#ifdef _WIN32
+__declspec(align(64));
+#else
+__attribute__((__aligned__(64)));
+#endif
 
+/* return 1 iff the kring needs to be turned on */
+static inline int
+nm_kring_pending_on(struct netmap_kring *kring)
+{
+	return kring->nr_pending_mode == NKR_NETMAP_ON &&
+	       kring->nr_mode == NKR_NETMAP_OFF;
+}
+
+/* return 1 iff the kring needs to be turned off */
+static inline int
+nm_kring_pending_off(struct netmap_kring *kring)
+{
+	return kring->nr_pending_mode == NKR_NETMAP_OFF &&
+	       kring->nr_mode == NKR_NETMAP_ON;
+}
 
 /* return the next index, with wraparound */
 static inline uint32_t
@@ -514,6 +638,8 @@ struct netmap_adapter {
 				 */
 #define NAF_HOST_RINGS  64	/* the adapter supports the host rings */
 #define NAF_FORCE_NATIVE 128	/* the adapter is always NATIVE */
+#define NAF_PTNETMAP_HOST 256	/* the adapter supports ptnetmap in the host */
+#define NAF_ZOMBIE	(1U<<30) /* the nic driver has been unloaded */
 #define	NAF_BUSY	(1U<<31) /* the adapter is used internally and
 				  * cannot be registered from userspace
 				  */
@@ -592,10 +718,14 @@ struct netmap_adapter {
 	 *	For hw devices this is typically a selwakeup(),
 	 *	but for NIC/host ports attached to a switch (or vice-versa)
 	 *	we also need to invoke the 'txsync' code downstream.
+	 *      This callback pointer is actually used only to initialize
+	 *      kring->nm_notify.
+	 *      Return values are the same as for netmap_rx_irq().
 	 */
 	void (*nm_dtor)(struct netmap_adapter *);
 
 	int (*nm_register)(struct netmap_adapter *, int onoff);
+	void (*nm_intr)(struct netmap_adapter *, int onoff);
 
 	int (*nm_txsync)(struct netmap_kring *kring, int flags);
 	int (*nm_rxsync)(struct netmap_kring *kring, int flags);
@@ -640,14 +770,14 @@ struct netmap_adapter {
 
 	/* memory allocator (opaque)
 	 * We also cache a pointer to the lut_entry for translating
-	 * buffer addresses, and the total number of buffers.
+	 * buffer addresses, the total number of buffers and the buffer size.
 	 */
  	struct netmap_mem_d *nm_mem;
 	struct netmap_lut na_lut;
 
 	/* additional information attached to this adapter
 	 * by other netmap subsystems. Currently used by
-	 * bwrap and LINUX/v1000.
+	 * bwrap, LINUX/v1000 and ptnetmap
 	 */
 	void *na_private;
 
@@ -656,6 +786,9 @@ struct netmap_adapter {
 	int na_next_pipe;	/* next free slot in the array */
 	int na_max_pipes;	/* size of the array */
 
+	/* Offset of ethernet header for each packet. */
+	u_int virt_hdr_len;
+
 	char name[64];
 };
 
@@ -721,8 +854,6 @@ struct netmap_vp_adapter {	/* VALE software port */
 	struct nm_bridge *na_bdg;
 	int retry;
 
-	/* Offset of ethernet header for each packet. */
-	u_int virt_hdr_len;
 	/* Maximum Frame Size, used in bdg_mismatch_datapath() */
 	u_int mfs;
 	/* Last source MAC on this port */
@@ -767,6 +898,13 @@ struct netmap_generic_adapter {	/* emulated device */
 #ifdef linux
         netdev_tx_t (*save_start_xmit)(struct mbuf *, struct ifnet *);
 #endif
+	/* Is the adapter able to use multiple RX slots to scatter
+	 * each packet pushed up by the driver? */
+	int rxsg;
+
+	/* Is the transmission path controlled by a netmap-aware
+	 * device queue (i.e. qdisc on linux)? */
+	int txqdisc;
 };
 #endif  /* WITH_GENERIC */
 
@@ -777,7 +915,7 @@ netmap_real_rings(struct netmap_adapter *na, enum txrx t)
 }
 
 #ifdef WITH_VALE
-
+struct nm_bdg_polling_state;
 /*
  * Bridge wrapper for non VALE ports attached to a VALE switch.
  *
@@ -827,9 +965,6 @@ struct netmap_bwrap_adapter {
 	struct netmap_vp_adapter host;  /* for host rings */
 	struct netmap_adapter *hwna;	/* the underlying device */
 
-	/* backup of the hwna memory allocator */
-	struct netmap_mem_d *save_nmd;
-
 	/*
 	 * When we attach a physical interface to the bridge, we
 	 * allow the controlling process to terminate, so we need
@@ -838,10 +973,10 @@ struct netmap_bwrap_adapter {
 	 * are attached to a bridge.
 	 */
 	struct netmap_priv_d *na_kpriv;
+	struct nm_bdg_polling_state *na_polling_state;
 };
 int netmap_bwrap_attach(const char *name, struct netmap_adapter *);
 
-
 #endif /* WITH_VALE */
 
 #ifdef WITH_PIPES
@@ -876,56 +1011,122 @@ nm_kr_rxspace(struct netmap_kring *k)
 	return space;
 }
 
+/* return slots reserved to tx clients */
+#define nm_kr_txspace(_k) nm_kr_rxspace(_k)
 
-/* True if no space in the tx ring. only valid after txsync_prologue */
+
+/* True if no space in the tx ring, only valid after txsync_prologue */
 static inline int
 nm_kr_txempty(struct netmap_kring *kring)
 {
 	return kring->rcur == kring->nr_hwtail;
 }
 
+/* True if no more completed slots in the rx ring, only valid after
+ * rxsync_prologue */
+#define nm_kr_rxempty(_k)	nm_kr_txempty(_k)
 
 /*
  * protect against multiple threads using the same ring.
- * also check that the ring has not been stopped.
- * We only care for 0 or !=0 as a return code.
+ * also check that the ring has not been stopped or locked
  */
-#define NM_KR_BUSY	1
-#define NM_KR_STOPPED	2
+#define NM_KR_BUSY	1	/* some other thread is syncing the ring */
+#define NM_KR_STOPPED	2	/* unbounded stop (ifconfig down or driver unload) */
+#define NM_KR_LOCKED	3	/* bounded, brief stop for mutual exclusion */
 
 
+/* release the previously acquired right to use the *sync() methods of the ring */
 static __inline void nm_kr_put(struct netmap_kring *kr)
 {
 	NM_ATOMIC_CLEAR(&kr->nr_busy);
 }
 
 
-static __inline int nm_kr_tryget(struct netmap_kring *kr)
+/* true if the ifp that backed the adapter has disappeared (e.g., the
+ * driver has been unloaded)
+ */
+static inline int nm_iszombie(struct netmap_adapter *na);
+
+/* try to obtain exclusive right to issue the *sync() operations on the ring.
+ * The right is obtained and must be later relinquished via nm_kr_put() if and
+ * only if nm_kr_tryget() returns 0.
+ * If can_sleep is 1 there are only two other possible outcomes:
+ * - the function returns NM_KR_BUSY
+ * - the function returns NM_KR_STOPPED and sets the POLLERR bit in *perr
+ *   (if non-null)
+ * In both cases the caller will typically skip the ring, possibly collecting
+ * errors along the way.
+ * If the calling context does not allow sleeping, the caller must pass 0 in can_sleep.
+ * In the latter case, the function may also return NM_KR_LOCKED and leave *perr
+ * untouched: ideally, the caller should try again at a later time.
+ */
+static __inline int nm_kr_tryget(struct netmap_kring *kr, int can_sleep, int *perr)
 {
+	int busy = 1, stopped;
 	/* check a first time without taking the lock
 	 * to avoid starvation for nm_kr_get()
 	 */
-	if (unlikely(kr->nkr_stopped)) {
-		ND("ring %p stopped (%d)", kr, kr->nkr_stopped);
-		return NM_KR_STOPPED;
+retry:
+	stopped = kr->nkr_stopped;
+	if (unlikely(stopped)) {
+		goto stop;
 	}
-	if (unlikely(NM_ATOMIC_TEST_AND_SET(&kr->nr_busy)))
-		return NM_KR_BUSY;
-	/* check a second time with lock held */
-	if (unlikely(kr->nkr_stopped)) {
-		ND("ring %p stopped (%d)", kr, kr->nkr_stopped);
+	busy = NM_ATOMIC_TEST_AND_SET(&kr->nr_busy);
+	/* we should not return NM_KR_BUSY if the ring was
+	 * actually stopped, so check another time after
+	 * the barrier provided by the atomic operation
+	 */
+	stopped = kr->nkr_stopped;
+	if (unlikely(stopped)) {
+		goto stop;
+	}
+
+	if (unlikely(nm_iszombie(kr->na))) {
+		stopped = NM_KR_STOPPED;
+		goto stop;
+	}
+
+	return unlikely(busy) ? NM_KR_BUSY : 0;
+
+stop:
+	if (!busy)
 		nm_kr_put(kr);
-		return NM_KR_STOPPED;
+	if (stopped == NM_KR_STOPPED) {
+/* if POLLERR is defined we want to use it to simplify netmap_poll().
+ * Otherwise, any non-zero value will do.
+ */
+#ifdef POLLERR
+#define NM_POLLERR POLLERR
+#else
+#define NM_POLLERR 1
+#endif /* POLLERR */
+		if (perr)
+			*perr |= NM_POLLERR;
+#undef NM_POLLERR
+	} else if (can_sleep) {
+		tsleep(kr, 0, "NM_KR_TRYGET", 4);
+		goto retry;
 	}
-	return 0;
+	return stopped;
 }
 
-static __inline void nm_kr_get(struct netmap_kring *kr)
+/* put the ring in the 'stopped' state and wait for the current user (if any) to
+ * notice. stopped must be either NM_KR_STOPPED or NM_KR_LOCKED
+ */
+static __inline void nm_kr_stop(struct netmap_kring *kr, int stopped)
 {
+	kr->nkr_stopped = stopped;
 	while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))
 		tsleep(kr, 0, "NM_KR_GET", 4);
 }
 
+/* restart a ring after a stop */
+static __inline void nm_kr_start(struct netmap_kring *kr)
+{
+	kr->nkr_stopped = 0;
+	nm_kr_put(kr);
+}
+
 
 /*
  * The following functions are used by individual drivers to
@@ -953,10 +1154,26 @@ struct netmap_slot *netmap_reset(struct netmap_adapter *na,
 	enum txrx tx, u_int n, u_int new_cur);
 int netmap_ring_reinit(struct netmap_kring *);
 
+/* Return codes for netmap_*x_irq. */
+enum {
+	/* Driver should do normal interrupt processing, e.g. because
+	 * the interface is not in netmap mode. */
+	NM_IRQ_PASS = 0,
+	/* Port is in netmap mode, and the interrupt work has been
+	 * completed. The driver does not have to notify netmap
+	 * again before the next interrupt. */
+	NM_IRQ_COMPLETED = -1,
+	/* Port is in netmap mode, but the interrupt work has not been
+	 * completed. The driver has to make sure netmap will be
+	 * notified again soon, even if no more interrupts come (e.g.
+	 * on Linux the driver should not call napi_complete()). */
+	NM_IRQ_RESCHED = -2,
+};
+
 /* default functions to handle rx/tx interrupts */
 int netmap_rx_irq(struct ifnet *, u_int, u_int *);
 #define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL)
-void netmap_common_irq(struct ifnet *, u_int, u_int *work_done);
+int netmap_common_irq(struct netmap_adapter *, u_int, u_int *work_done);
 
 
 #ifdef WITH_VALE
@@ -986,35 +1203,74 @@ nm_native_on(struct netmap_adapter *na)
 	return nm_netmap_on(na) && (na->na_flags & NAF_NATIVE);
 }
 
+static inline int
+nm_iszombie(struct netmap_adapter *na)
+{
+	return na == NULL || (na->na_flags & NAF_ZOMBIE);
+}
+
+static inline void
+nm_update_hostrings_mode(struct netmap_adapter *na)
+{
+	/* Process nr_mode and nr_pending_mode for host rings. */
+	na->tx_rings[na->num_tx_rings].nr_mode =
+		na->tx_rings[na->num_tx_rings].nr_pending_mode;
+	na->rx_rings[na->num_rx_rings].nr_mode =
+		na->rx_rings[na->num_rx_rings].nr_pending_mode;
+}
+
 /* set/clear native flags and if_transmit/netdev_ops */
 static inline void
 nm_set_native_flags(struct netmap_adapter *na)
 {
 	struct ifnet *ifp = na->ifp;
 
+	/* We do the setup for intercepting packets only if we are the
+	 * first user of this adapapter. */
+	if (na->active_fds > 0) {
+		return;
+	}
+
 	na->na_flags |= NAF_NETMAP_ON;
 #ifdef IFCAP_NETMAP /* or FreeBSD ? */
 	ifp->if_capenable |= IFCAP_NETMAP;
 #endif
-#ifdef __FreeBSD__
+#if defined (__FreeBSD__)
 	na->if_transmit = ifp->if_transmit;
 	ifp->if_transmit = netmap_transmit;
+#elif defined (_WIN32)
+	(void)ifp; /* prevent a warning */
+	//XXX_ale can we just comment those?
+	//na->if_transmit = ifp->if_transmit;
+	//ifp->if_transmit = netmap_transmit;
 #else
 	na->if_transmit = (void *)ifp->netdev_ops;
 	ifp->netdev_ops = &((struct netmap_hw_adapter *)na)->nm_ndo;
 	((struct netmap_hw_adapter *)na)->save_ethtool = ifp->ethtool_ops;
 	ifp->ethtool_ops = &((struct netmap_hw_adapter*)na)->nm_eto;
 #endif
+	nm_update_hostrings_mode(na);
 }
 
-
 static inline void
 nm_clear_native_flags(struct netmap_adapter *na)
 {
 	struct ifnet *ifp = na->ifp;
 
-#ifdef __FreeBSD__
+	/* We undo the setup for intercepting packets only if we are the
+	 * last user of this adapapter. */
+	if (na->active_fds > 0) {
+		return;
+	}
+
+	nm_update_hostrings_mode(na);
+
+#if defined(__FreeBSD__)
 	ifp->if_transmit = na->if_transmit;
+#elif defined(_WIN32)
+	(void)ifp; /* prevent a warning */
+	//XXX_ale can we just comment those?
+	//ifp->if_transmit = na->if_transmit;
 #else
 	ifp->netdev_ops = (void *)na->if_transmit;
 	ifp->ethtool_ops = ((struct netmap_hw_adapter*)na)->save_ethtool;
@@ -1025,6 +1281,28 @@ nm_clear_native_flags(struct netmap_adapter *na)
 #endif
 }
 
+/*
+ * nm_*sync_prologue() functions are used in ioctl/poll and ptnetmap
+ * kthreads.
+ * We need netmap_ring* parameter, because in ptnetmap it is decoupled
+ * from host kring.
+ * The user-space ring pointers (head/cur/tail) are shared through
+ * CSB between host and guest.
+ */
+
+/*
+ * validates parameters in the ring/kring, returns a value for head
+ * If any error, returns ring_size to force a reinit.
+ */
+uint32_t nm_txsync_prologue(struct netmap_kring *, struct netmap_ring *);
+
+
+/*
+ * validates parameters in the ring/kring, returns a value for head
+ * If any error, returns ring_size lim to force a reinit.
+ */
+uint32_t nm_rxsync_prologue(struct netmap_kring *, struct netmap_ring *);
+
 
 /* check/fix address and len in tx rings */
 #if 1 /* debug version */
@@ -1080,6 +1358,9 @@ int netmap_krings_create(struct netmap_adapter *na, u_int tailroom);
  */
 void netmap_krings_delete(struct netmap_adapter *na);
 
+int netmap_hw_krings_create(struct netmap_adapter *na);
+void netmap_hw_krings_delete(struct netmap_adapter *na);
+
 /* set the stopped/enabled status of ring
  * When stopping, they also wait for all current activity on the ring to
  * terminate. The status change is then notified using the na nm_notify
@@ -1088,16 +1369,18 @@ void netmap_krings_delete(struct netmap_adapter *na);
 void netmap_set_ring(struct netmap_adapter *, u_int ring_id, enum txrx, int stopped);
 /* set the stopped/enabled status of all rings of the adapter. */
 void netmap_set_all_rings(struct netmap_adapter *, int stopped);
-/* convenience wrappers for netmap_set_all_rings, used in drivers */
+/* convenience wrappers for netmap_set_all_rings */
 void netmap_disable_all_rings(struct ifnet *);
 void netmap_enable_all_rings(struct ifnet *);
 
 int netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
 	uint16_t ringid, uint32_t flags);
-
+void netmap_do_unregif(struct netmap_priv_d *priv);
 
 u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg);
-int netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
+int netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na,
+		  struct ifnet **ifp, int create);
+void netmap_unget_na(struct netmap_adapter *na, struct ifnet *ifp);
 int netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na);
 
 
@@ -1124,12 +1407,11 @@ struct netmap_bdg_ops {
 u_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
 		struct netmap_vp_adapter *);
 
+#define	NM_BRIDGES		8	/* number of bridges */
 #define	NM_BDG_MAXPORTS		254	/* up to 254 */
 #define	NM_BDG_BROADCAST	NM_BDG_MAXPORTS
 #define	NM_BDG_NOPORT		(NM_BDG_MAXPORTS+1)
 
-#define	NM_NAME			"vale"	/* prefix for bridge port name */
-
 /* these are redefined in case of no VALE support */
 int netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
 struct nm_bridge *netmap_init_bridges2(u_int);
@@ -1181,14 +1463,13 @@ void netmap_bns_getbridges(struct nm_bridge **, u_int *);
 #endif
 
 /* Various prototypes */
-int netmap_poll(struct cdev *dev, int events, struct thread *td);
+int netmap_poll(struct netmap_priv_d *, int events, NM_SELRECORD_T *td);
 int netmap_init(void);
 void netmap_fini(void);
 int netmap_get_memory(struct netmap_priv_d* p);
 void netmap_dtor(void *data);
-int netmap_dtor_locked(struct netmap_priv_d *priv);
 
-int netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td);
+int netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread *);
 
 /* netmap_adapter creation/destruction */
 
@@ -1228,8 +1509,8 @@ int netmap_adapter_put(struct netmap_adapter *na);
 /*
  * module variables
  */
-#define NETMAP_BUF_BASE(na)	((na)->na_lut.lut[0].vaddr)
-#define NETMAP_BUF_SIZE(na)	((na)->na_lut.objsize)
+#define NETMAP_BUF_BASE(_na)	((_na)->na_lut.lut[0].vaddr)
+#define NETMAP_BUF_SIZE(_na)	((_na)->na_lut.objsize)
 extern int netmap_mitigate;	// XXX not really used
 extern int netmap_no_pendintr;
 extern int netmap_verbose;	// XXX debugging
@@ -1245,10 +1526,12 @@ enum {                                  /* verbose flags */
 };
 
 extern int netmap_txsync_retry;
+extern int netmap_adaptive_io;
+extern int netmap_flags;
 extern int netmap_generic_mit;
 extern int netmap_generic_ringsize;
 extern int netmap_generic_rings;
-extern int netmap_use_count;
+extern int netmap_generic_txqdisc;
 
 /*
  * NA returns a pointer to the struct netmap adapter from the ifp,
@@ -1257,37 +1540,27 @@ extern int netmap_use_count;
 #define	NA(_ifp)	((struct netmap_adapter *)WNA(_ifp))
 
 /*
- * Macros to determine if an interface is netmap capable or netmap enabled.
- * See the magic field in struct netmap_adapter.
- */
-#ifdef __FreeBSD__
-/*
- * on FreeBSD just use if_capabilities and if_capenable.
- */
-#define NETMAP_CAPABLE(ifp)	(NA(ifp) &&		\
-	(ifp)->if_capabilities & IFCAP_NETMAP )
-
-#define	NETMAP_SET_CAPABLE(ifp)				\
-	(ifp)->if_capabilities |= IFCAP_NETMAP
-
-#else	/* linux */
-
-/*
- * on linux:
- * we check if NA(ifp) is set and its first element has a related
+ * On old versions of FreeBSD, NA(ifp) is a pspare. On linux we
+ * overload another pointer in the netdev.
+ *
+ * We check if NA(ifp) is set and its first element has a related
  * magic value. The capenable is within the struct netmap_adapter.
  */
 #define	NETMAP_MAGIC	0x52697a7a
 
-#define NETMAP_CAPABLE(ifp)	(NA(ifp) &&		\
+#define NM_NA_VALID(ifp)	(NA(ifp) &&		\
 	((uint32_t)(uintptr_t)NA(ifp) ^ NA(ifp)->magic) == NETMAP_MAGIC )
 
-#define	NETMAP_SET_CAPABLE(ifp)				\
-	NA(ifp)->magic = ((uint32_t)(uintptr_t)NA(ifp)) ^ NETMAP_MAGIC
+#define	NM_ATTACH_NA(ifp, na) do {					\
+	WNA(ifp) = na;							\
+	if (NA(ifp))							\
+		NA(ifp)->magic = 					\
+			((uint32_t)(uintptr_t)NA(ifp)) ^ NETMAP_MAGIC;	\
+} while(0)
 
-#endif	/* linux */
+#define NM_IS_NATIVE(ifp)	(NM_NA_VALID(ifp) && NA(ifp)->nm_dtor == netmap_hw_dtor)
 
-#ifdef __FreeBSD__
+#if defined(__FreeBSD__)
 
 /* Assigns the device IOMMU domain to an allocator.
  * Returns -ENOMEM in case the domain is different */
@@ -1331,6 +1604,8 @@ netmap_reload_map(struct netmap_adapter *na,
 	}
 }
 
+#elif defined(_WIN32)
+
 #else /* linux */
 
 int nm_iommu_group_id(bus_dma_tag_t dev);
@@ -1341,8 +1616,8 @@ netmap_load_map(struct netmap_adapter *na,
 	bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
 {
 	if (0 && map) {
-		*map = dma_map_single(na->pdev, buf, na->na_lut.objsize,
-				DMA_BIDIRECTIONAL);
+		*map = dma_map_single(na->pdev, buf, NETMAP_BUF_SIZE(na),
+				      DMA_BIDIRECTIONAL);
 	}
 }
 
@@ -1350,11 +1625,11 @@ static inline void
 netmap_unload_map(struct netmap_adapter *na,
 	bus_dma_tag_t tag, bus_dmamap_t map)
 {
-	u_int sz = na->na_lut.objsize;
+	u_int sz = NETMAP_BUF_SIZE(na);
 
 	if (*map) {
 		dma_unmap_single(na->pdev, *map, sz,
-				DMA_BIDIRECTIONAL);
+				 DMA_BIDIRECTIONAL);
 	}
 }
 
@@ -1362,7 +1637,7 @@ static inline void
 netmap_reload_map(struct netmap_adapter *na,
 	bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
 {
-	u_int sz = na->na_lut.objsize;
+	u_int sz = NETMAP_BUF_SIZE(na);
 
 	if (*map) {
 		dma_unmap_single(na->pdev, *map, sz,
@@ -1473,7 +1748,11 @@ PNMB(struct netmap_adapter *na, struct netmap_slot *slot, uint64_t *pp)
 	struct lut_entry *lut = na->na_lut.lut;
 	void *ret = (i >= na->na_lut.objtotal) ? lut[0].vaddr : lut[i].vaddr;
 
+#ifndef _WIN32
 	*pp = (i >= na->na_lut.objtotal) ? lut[0].paddr : lut[i].paddr;
+#else
+	*pp = (i >= na->na_lut.objtotal) ? (uint64_t)lut[0].paddr.QuadPart : (uint64_t)lut[i].paddr.QuadPart;
+#endif
 	return ret;
 }
 
@@ -1497,8 +1776,9 @@ struct netmap_priv_d {
 	struct netmap_if * volatile np_nifp;	/* netmap if descriptor. */
 
 	struct netmap_adapter	*np_na;
+	struct ifnet	*np_ifp;
 	uint32_t	np_flags;	/* from the ioctl */
-	u_int		np_qfirst[NR_TXRX], 
+	u_int		np_qfirst[NR_TXRX],
 			np_qlast[NR_TXRX]; /* range of tx/rx rings to scan */
 	uint16_t	np_txpoll;	/* XXX and also np_rxpoll ? */
 
@@ -1512,6 +1792,26 @@ struct netmap_priv_d {
 	struct thread	*np_td;		/* kqueue, just debugging */
 };
 
+struct netmap_priv_d *netmap_priv_new(void);
+void netmap_priv_delete(struct netmap_priv_d *);
+
+static inline int nm_kring_pending(struct netmap_priv_d *np)
+{
+	struct netmap_adapter *na = np->np_na;
+	enum txrx t;
+	int i;
+
+	for_rx_tx(t) {
+		for (i = np->np_qfirst[t]; i < np->np_qlast[t]; i++) {
+			struct netmap_kring *kring = &NMR(na, t)[i];
+			if (kring->nr_mode != kring->nr_pending_mode) {
+				return 1;
+			}
+		}
+	}
+	return 0;
+}
+
 #ifdef WITH_MONITOR
 
 struct netmap_monitor_adapter {
@@ -1530,13 +1830,36 @@ struct netmap_monitor_adapter {
  * native netmap support.
  */
 int generic_netmap_attach(struct ifnet *ifp);
+int generic_rx_handler(struct ifnet *ifp, struct mbuf *m);;
+
+int nm_os_catch_rx(struct netmap_generic_adapter *gna, int intercept);
+int nm_os_catch_tx(struct netmap_generic_adapter *gna, int intercept);
+
+/*
+ * the generic transmit routine is passed a structure to optionally
+ * build a queue of descriptors, in an OS-specific way.
+ * The payload is at addr, if non-null, and the routine should send or queue
+ * the packet, returning 0 if successful, 1 on failure.
+ *
+ * At the end, if head is non-null, there will be an additional call
+ * to the function with addr = NULL; this should tell the OS-specific
+ * routine to send the queue and free any resources. Failure is ignored.
+ */
+struct nm_os_gen_arg {
+	struct ifnet *ifp;
+	void *m;	/* os-specific mbuf-like object */
+	void *head, *tail; /* tailq, if the OS-specific routine needs to build one */
+	void *addr;	/* payload of current packet */
+	u_int len;	/* packet length */
+	u_int ring_nr;	/* packet length */
+	u_int qevent;   /* in txqdisc mode, place an event on this mbuf */
+};
+
+int nm_os_generic_xmit_frame(struct nm_os_gen_arg *);
+int nm_os_generic_find_num_desc(struct ifnet *ifp, u_int *tx, u_int *rx);
+void nm_os_generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq);
+void nm_os_generic_set_features(struct netmap_generic_adapter *gna);
 
-int netmap_catch_rx(struct netmap_generic_adapter *na, int intercept);
-void generic_rx_handler(struct ifnet *ifp, struct mbuf *m);;
-void netmap_catch_tx(struct netmap_generic_adapter *na, int enable);
-int generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, void *addr, u_int len, u_int ring_nr);
-int generic_find_num_desc(struct ifnet *ifp, u_int *tx, u_int *rx);
-void generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq);
 static inline struct ifnet*
 netmap_generic_getifp(struct netmap_generic_adapter *gna)
 {
@@ -1546,6 +1869,8 @@ netmap_generic_getifp(struct netmap_generic_adapter *gna)
         return gna->up.up.ifp;
 }
 
+void netmap_generic_irq(struct netmap_adapter *na, u_int q, u_int *work_done);
+
 //#define RATE_GENERIC  /* Enables communication statistics for generic. */
 #ifdef RATE_GENERIC
 void generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi);
@@ -1558,16 +1883,16 @@ void generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi);
  * to reduce the number of interrupt requests/selwakeup
  * to clients on incoming packets.
  */
-void netmap_mitigation_init(struct nm_generic_mit *mit, int idx,
+void nm_os_mitigation_init(struct nm_generic_mit *mit, int idx,
                                 struct netmap_adapter *na);
-void netmap_mitigation_start(struct nm_generic_mit *mit);
-void netmap_mitigation_restart(struct nm_generic_mit *mit);
-int netmap_mitigation_active(struct nm_generic_mit *mit);
-void netmap_mitigation_cleanup(struct nm_generic_mit *mit);
+void nm_os_mitigation_start(struct nm_generic_mit *mit);
+void nm_os_mitigation_restart(struct nm_generic_mit *mit);
+int nm_os_mitigation_active(struct nm_generic_mit *mit);
+void nm_os_mitigation_cleanup(struct nm_generic_mit *mit);
+#else /* !WITH_GENERIC */
+#define generic_netmap_attach(ifp)	(EOPNOTSUPP)
 #endif /* WITH_GENERIC */
 
-
-
 /* Shared declarations for the VALE switch. */
 
 /*
@@ -1656,22 +1981,111 @@ struct nm_ipv6hdr {
  */
 #define rawsum_t uint32_t
 
-rawsum_t nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum);
-uint16_t nm_csum_ipv4(struct nm_iphdr *iph);
-void nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
+rawsum_t nm_os_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum);
+uint16_t nm_os_csum_ipv4(struct nm_iphdr *iph);
+void nm_os_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
 		      size_t datalen, uint16_t *check);
-void nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
+void nm_os_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
 		      size_t datalen, uint16_t *check);
-uint16_t nm_csum_fold(rawsum_t cur_sum);
+uint16_t nm_os_csum_fold(rawsum_t cur_sum);
 
 void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
 			   struct netmap_vp_adapter *dst_na,
-			   struct nm_bdg_fwd *ft_p, struct netmap_ring *ring,
+			   const struct nm_bdg_fwd *ft_p,
+			   struct netmap_ring *dst_ring,
 			   u_int *j, u_int lim, u_int *howmany);
 
 /* persistent virtual port routines */
-int nm_vi_persist(const char *, struct ifnet **);
-void nm_vi_detach(struct ifnet *);
-void nm_vi_init_index(void);
+int nm_os_vi_persist(const char *, struct ifnet **);
+void nm_os_vi_detach(struct ifnet *);
+void nm_os_vi_init_index(void);
+
+/*
+ * kernel thread routines
+ */
+struct nm_kthread; /* OS-specific kthread - opaque */
+typedef void (*nm_kthread_worker_fn_t)(void *data);
+
+/* kthread configuration */
+struct nm_kthread_cfg {
+	long				type;		/* kthread type/identifier */
+	struct ptnet_ring_cfg		event;		/* event/ioctl fd */
+	nm_kthread_worker_fn_t		worker_fn;	/* worker function */
+	void				*worker_private;/* worker parameter */
+	int				attach_user;	/* attach kthread to user process */
+};
+/* kthread configuration */
+struct nm_kthread *nm_os_kthread_create(struct nm_kthread_cfg *cfg);
+int nm_os_kthread_start(struct nm_kthread *);
+void nm_os_kthread_stop(struct nm_kthread *);
+void nm_os_kthread_delete(struct nm_kthread *);
+void nm_os_kthread_wakeup_worker(struct nm_kthread *nmk);
+void nm_os_kthread_send_irq(struct nm_kthread *);
+void nm_os_kthread_set_affinity(struct nm_kthread *, int);
+u_int nm_os_ncpus(void);
+
+#ifdef WITH_PTNETMAP_HOST
+/*
+ * netmap adapter for host ptnetmap ports
+ */
+struct netmap_pt_host_adapter {
+	struct netmap_adapter up;
+
+	struct netmap_adapter *parent;
+	int (*parent_nm_notify)(struct netmap_kring *kring, int flags);
+	void *ptns;
+};
+/* ptnetmap HOST routines */
+int netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
+int ptnetmap_ctl(struct nmreq *nmr, struct netmap_adapter *na);
+static inline int
+nm_ptnetmap_host_on(struct netmap_adapter *na)
+{
+	return na && na->na_flags & NAF_PTNETMAP_HOST;
+}
+#else /* !WITH_PTNETMAP_HOST */
+#define netmap_get_pt_host_na(nmr, _2, _3) \
+	((nmr)->nr_flags & (NR_PTNETMAP_HOST) ? EOPNOTSUPP : 0)
+#define ptnetmap_ctl(_1, _2)   EINVAL
+#define nm_ptnetmap_host_on(_1)   EINVAL
+#endif /* !WITH_PTNETMAP_HOST */
+
+#ifdef WITH_PTNETMAP_GUEST
+/* ptnetmap GUEST routines */
+
+typedef uint32_t (*nm_pt_guest_ptctl_t)(struct ifnet *, uint32_t);
+
+/*
+ * netmap adapter for guest ptnetmap ports
+ */
+struct netmap_pt_guest_adapter {
+        /* The netmap adapter to be used by netmap applications.
+	 * This field must be the first, to allow upcast. */
+	struct netmap_hw_adapter hwup;
+
+        /* The netmap adapter to be used by the driver. */
+        struct netmap_hw_adapter dr;
+
+	void *csb;
+
+	/* Reference counter to track users of backend netmap port: the
+	 * network stack and netmap clients.
+	 * Used to decide when we need (de)allocate krings/rings and
+	 * start (stop) ptnetmap kthreads. */
+	int backend_regifs;
+
+};
+
+int netmap_pt_guest_attach(struct netmap_adapter *, void *,
+			   unsigned int, nm_pt_guest_ptctl_t);
+struct ptnet_ring;
+bool netmap_pt_guest_txsync(struct ptnet_ring *ptring, struct netmap_kring *kring,
+			    int flags);
+bool netmap_pt_guest_rxsync(struct ptnet_ring *ptring, struct netmap_kring *kring,
+			    int flags);
+int ptnet_nm_krings_create(struct netmap_adapter *na);
+void ptnet_nm_krings_delete(struct netmap_adapter *na);
+void ptnet_nm_dtor(struct netmap_adapter *na);
+#endif /* WITH_PTNETMAP_GUEST */
 
 #endif /* _NET_NETMAP_KERN_H_ */
diff --git a/sys/dev/netmap/netmap_mbq.c b/sys/dev/netmap/netmap_mbq.c
index 503f5a13aa95..3eb971b74561 100644
--- a/sys/dev/netmap/netmap_mbq.c
+++ b/sys/dev/netmap/netmap_mbq.c
@@ -1,5 +1,6 @@
 /*
- * Copyright (C) 2013-2014 Vincenzo Maffione. All rights reserved.
+ * Copyright (C) 2013-2014 Vincenzo Maffione
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -30,6 +31,8 @@
 
 #ifdef linux
 #include "bsd_glue.h"
+#elif defined (_WIN32)
+#include "win_glue.h"
 #else   /* __FreeBSD__ */
 #include <sys/param.h>
 #include <sys/lock.h>
@@ -152,12 +155,12 @@ void mbq_safe_purge(struct mbq *q)
 }
 
 
-void mbq_safe_destroy(struct mbq *q)
+void mbq_safe_fini(struct mbq *q)
 {
     mtx_destroy(&q->lock);
 }
 
 
-void mbq_destroy(struct mbq *q)
+void mbq_fini(struct mbq *q)
 {
 }
diff --git a/sys/dev/netmap/netmap_mbq.h b/sys/dev/netmap/netmap_mbq.h
index 455ca8a2c3ac..9dafa8b1149b 100644
--- a/sys/dev/netmap/netmap_mbq.h
+++ b/sys/dev/netmap/netmap_mbq.h
@@ -1,5 +1,6 @@
 /*
- * Copyright (C) 2013-2014 Vincenzo Maffione. All rights reserved.
+ * Copyright (C) 2013-2014 Vincenzo Maffione
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -40,6 +41,8 @@
 /* XXX probably rely on a previous definition of SPINLOCK_T */
 #ifdef linux
 #define SPINLOCK_T  safe_spinlock_t
+#elif defined (_WIN32)
+#define SPINLOCK_T 	win_spinlock_t
 #else
 #define SPINLOCK_T  struct mtx
 #endif
@@ -52,16 +55,21 @@ struct mbq {
     SPINLOCK_T lock;
 };
 
-/* XXX "destroy" does not match "init" as a name.
- * We should also clarify whether init can be used while
+/* We should clarify whether init can be used while
  * holding a lock, and whether mbq_safe_destroy() is a NOP.
  */
 void mbq_init(struct mbq *q);
-void mbq_destroy(struct mbq *q);
+void mbq_fini(struct mbq *q);
 void mbq_enqueue(struct mbq *q, struct mbuf *m);
 struct mbuf *mbq_dequeue(struct mbq *q);
 void mbq_purge(struct mbq *q);
 
+static inline struct mbuf *
+mbq_peek(struct mbq *q)
+{
+	return q->head ? q->head : NULL;
+}
+
 static inline void
 mbq_lock(struct mbq *q)
 {
@@ -76,7 +84,7 @@ mbq_unlock(struct mbq *q)
 
 
 void mbq_safe_init(struct mbq *q);
-void mbq_safe_destroy(struct mbq *q);
+void mbq_safe_fini(struct mbq *q);
 void mbq_safe_enqueue(struct mbq *q, struct mbuf *m);
 struct mbuf *mbq_safe_dequeue(struct mbq *q);
 void mbq_safe_purge(struct mbq *q);
diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c
index fd0c06bb8b57..b54c9813c33f 100644
--- a/sys/dev/netmap/netmap_mem2.c
+++ b/sys/dev/netmap/netmap_mem2.c
@@ -1,5 +1,8 @@
 /*
- * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved.
+ * Copyright (C) 2012-2014 Matteo Landi
+ * Copyright (C) 2012-2016 Luigi Rizzo
+ * Copyright (C) 2012-2016 Giuseppe Lettieri
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -37,6 +40,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/malloc.h>
+#include <sys/kernel.h>		/* MALLOC_DEFINE */
 #include <sys/proc.h>
 #include <vm/vm.h>	/* vtophys */
 #include <vm/pmap.h>	/* vtophys */
@@ -48,13 +52,26 @@ __FBSDID("$FreeBSD$");
 #include <net/vnet.h>
 #include <machine/bus.h>	/* bus_dmamap_* */
 
+/* M_NETMAP only used in here */
+MALLOC_DECLARE(M_NETMAP);
+MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
+
 #endif /* __FreeBSD__ */
 
+#ifdef _WIN32
+#include <win_glue.h>
+#endif
+
 #include <net/netmap.h>
 #include <dev/netmap/netmap_kern.h>
+#include <net/netmap_virt.h>
 #include "netmap_mem2.h"
 
-#define NETMAP_BUF_MAX_NUM	20*4096*2	/* large machine */
+#ifdef _WIN32_USE_SMALL_GENERIC_DEVICES_MEMORY
+#define NETMAP_BUF_MAX_NUM  8*4096      /* if too big takes too much time to allocate */
+#else
+#define NETMAP_BUF_MAX_NUM 20*4096*2	/* large machine */
+#endif
 
 #define NETMAP_POOL_MAX_NAMSZ	32
 
@@ -111,7 +128,7 @@ struct netmap_obj_pool {
 
 
 struct netmap_mem_ops {
-	void (*nmd_get_lut)(struct netmap_mem_d *, struct netmap_lut*);
+	int (*nmd_get_lut)(struct netmap_mem_d *, struct netmap_lut*);
 	int  (*nmd_get_info)(struct netmap_mem_d *, u_int *size,
 			u_int *memflags, uint16_t *id);
 
@@ -130,6 +147,39 @@ struct netmap_mem_ops {
 
 typedef uint16_t nm_memid_t;
 
+/*
+ * Shared info for netmap allocator
+ *
+ * Each allocator contains this structur as first netmap_if.
+ * In this way, we can share same details about allocator
+ * to the VM.
+ * Used in ptnetmap.
+ */
+struct netmap_mem_shared_info {
+#ifndef _WIN32
+        struct netmap_if up;	/* ends with a 0-sized array, which VSC does not like */
+#else /* !_WIN32 */
+	char up[sizeof(struct netmap_if)];
+#endif /* !_WIN32 */
+        uint64_t features;
+#define NMS_FEAT_BUF_POOL          0x0001
+#define NMS_FEAT_MEMSIZE           0x0002
+
+        uint32_t buf_pool_offset;
+        uint32_t buf_pool_objtotal;
+        uint32_t buf_pool_objsize;
+        uint32_t totalsize;
+};
+
+#define NMS_NAME        "nms_info"
+#define NMS_VERSION     1
+static const struct netmap_if nms_if_blueprint = {
+    .ni_name = NMS_NAME,
+    .ni_version = NMS_VERSION,
+    .ni_tx_rings = 0,
+    .ni_rx_rings = 0
+};
+
 struct netmap_mem_d {
 	NMA_LOCK_T nm_mtx;  /* protect the allocator */
 	u_int nm_totalsize; /* shorthand */
@@ -151,6 +201,9 @@ struct netmap_mem_d {
 	struct netmap_mem_ops *ops;
 };
 
+/*
+ * XXX need to fix the case of t0 == void
+ */
 #define NMD_DEFCB(t0, name) \
 t0 \
 netmap_mem_##name(struct netmap_mem_d *nmd) \
@@ -186,7 +239,7 @@ netmap_mem_##name(struct netmap_adapter *na, t1 a1) \
 	return na->nm_mem->ops->nmd_##name(na, a1); \
 }
 
-NMD_DEFCB1(void, get_lut, struct netmap_lut *);
+NMD_DEFCB1(int, get_lut, struct netmap_lut *);
 NMD_DEFCB3(int, get_info, u_int *, u_int *, uint16_t *);
 NMD_DEFCB1(vm_paddr_t, ofstophys, vm_ooffset_t);
 static int netmap_mem_config(struct netmap_mem_d *);
@@ -201,7 +254,7 @@ NMD_DEFNACB(void, rings_delete);
 
 static int netmap_mem_map(struct netmap_obj_pool *, struct netmap_adapter *);
 static int netmap_mem_unmap(struct netmap_obj_pool *, struct netmap_adapter *);
-static int nm_mem_assign_group(struct netmap_mem_d *, device_t);
+static int nm_mem_assign_group(struct netmap_mem_d *, struct device *);
 
 #define NMA_LOCK_INIT(n)	NM_MTX_INIT((n)->nm_mtx)
 #define NMA_LOCK_DESTROY(n)	NM_MTX_DESTROY((n)->nm_mtx)
@@ -248,7 +301,9 @@ netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na)
 	if (nm_mem_assign_group(nmd, na->pdev) < 0) {
 		return ENOMEM;
 	} else {
-		nmd->ops->nmd_finalize(nmd);
+		NMA_LOCK(nmd);
+		nmd->lasterr = nmd->ops->nmd_finalize(nmd);
+		NMA_UNLOCK(nmd);
 	}
 
 	if (!nmd->lasterr && na->pdev)
@@ -257,26 +312,83 @@ netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na)
 	return nmd->lasterr;
 }
 
+static int netmap_mem_init_shared_info(struct netmap_mem_d *nmd);
+
 void
 netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na)
 {
 	NMA_LOCK(nmd);
 	netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na);
+	if (nmd->active == 1) {
+		u_int i;
+
+		/*
+		 * Reset the allocator when it falls out of use so that any
+		 * pool resources leaked by unclean application exits are
+		 * reclaimed.
+		 */
+		for (i = 0; i < NETMAP_POOLS_NR; i++) {
+			struct netmap_obj_pool *p;
+			u_int j;
+
+			p = &nmd->pools[i];
+			p->objfree = p->objtotal;
+			/*
+			 * Reproduce the net effect of the M_ZERO malloc()
+			 * and marking of free entries in the bitmap that
+			 * occur in finalize_obj_allocator()
+			 */
+			memset(p->bitmap,
+			    '\0',
+			    sizeof(uint32_t) * ((p->objtotal + 31) / 32));
+
+			/*
+			 * Set all the bits in the bitmap that have
+			 * corresponding buffers to 1 to indicate they are
+			 * free.
+			 */
+			for (j = 0; j < p->objtotal; j++) {
+				if (p->lut[j].vaddr != NULL) {
+					p->bitmap[ (j>>5) ] |=  ( 1 << (j & 31) );
+				}
+			}
+		}
+
+		/*
+		 * Per netmap_mem_finalize_all(),
+		 * buffers 0 and 1 are reserved
+		 */
+		nmd->pools[NETMAP_BUF_POOL].objfree -= 2;
+		if (nmd->pools[NETMAP_BUF_POOL].bitmap) {
+			/* XXX This check is a workaround that prevents a
+			 * NULL pointer crash which currently happens only
+			 * with ptnetmap guests. Also,
+			 * netmap_mem_init_shared_info must not be called
+			 * by ptnetmap guest. */
+			nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3;
+
+			/* expose info to the ptnetmap guest */
+			netmap_mem_init_shared_info(nmd);
+		}
+	}
+	nmd->ops->nmd_deref(nmd);
+
 	NMA_UNLOCK(nmd);
-	return nmd->ops->nmd_deref(nmd);
 }
 
 
 /* accessor functions */
-static void
+static int
 netmap_mem2_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut)
 {
 	lut->lut = nmd->pools[NETMAP_BUF_POOL].lut;
 	lut->objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal;
 	lut->objsize = nmd->pools[NETMAP_BUF_POOL]._objsize;
+
+	return 0;
 }
 
-struct netmap_obj_params netmap_params[NETMAP_POOLS_NR] = {
+static struct netmap_obj_params netmap_params[NETMAP_POOLS_NR] = {
 	[NETMAP_IF_POOL] = {
 		.size = 1024,
 		.num  = 100,
@@ -291,10 +403,10 @@ struct netmap_obj_params netmap_params[NETMAP_POOLS_NR] = {
 	},
 };
 
-struct netmap_obj_params netmap_min_priv_params[NETMAP_POOLS_NR] = {
+static struct netmap_obj_params netmap_min_priv_params[NETMAP_POOLS_NR] = {
 	[NETMAP_IF_POOL] = {
 		.size = 1024,
-		.num  = 1,
+		.num  = 2,
 	},
 	[NETMAP_RING_POOL] = {
 		.size = 5*PAGE_SIZE,
@@ -348,11 +460,12 @@ struct netmap_mem_d nm_mem = {	/* Our memory allocator. */
 };
 
 
-struct netmap_mem_d *netmap_last_mem_d = &nm_mem;
+static struct netmap_mem_d *netmap_last_mem_d = &nm_mem;
 
 /* blueprint for the private memory allocators */
 extern struct netmap_mem_ops netmap_mem_private_ops; /* forward */
-const struct netmap_mem_d nm_blueprint = {
+/* XXX clang is not happy about using name as a print format */
+static const struct netmap_mem_d nm_blueprint = {
 	.pools = {
 		[NETMAP_IF_POOL] = {
 			.name 	= "%s_if",
@@ -388,6 +501,8 @@ const struct netmap_mem_d nm_blueprint = {
 
 
 #define DECLARE_SYSCTLS(id, name) \
+	SYSBEGIN(mem2_ ## name); \
+	SYSCTL_DECL(_dev_netmap); /* leave it here, easier for porting */ \
 	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_size, \
 	    CTLFLAG_RW, &netmap_params[id].size, 0, "Requested size of netmap " STRINGIFY(name) "s"); \
 	SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_size, \
@@ -401,22 +516,21 @@ const struct netmap_mem_d nm_blueprint = {
 	    "Default size of private netmap " STRINGIFY(name) "s"); \
 	SYSCTL_INT(_dev_netmap, OID_AUTO, priv_##name##_num, \
 	    CTLFLAG_RW, &netmap_min_priv_params[id].num, 0, \
-	    "Default number of private netmap " STRINGIFY(name) "s")
+	    "Default number of private netmap " STRINGIFY(name) "s");	\
+	SYSEND
 
-SYSCTL_DECL(_dev_netmap);
 DECLARE_SYSCTLS(NETMAP_IF_POOL, if);
 DECLARE_SYSCTLS(NETMAP_RING_POOL, ring);
 DECLARE_SYSCTLS(NETMAP_BUF_POOL, buf);
 
+/* call with NMA_LOCK(&nm_mem) held */
 static int
-nm_mem_assign_id(struct netmap_mem_d *nmd)
+nm_mem_assign_id_locked(struct netmap_mem_d *nmd)
 {
 	nm_memid_t id;
 	struct netmap_mem_d *scan = netmap_last_mem_d;
 	int error = ENOMEM;
 
-	NMA_LOCK(&nm_mem);
-
 	do {
 		/* we rely on unsigned wrap around */
 		id = scan->nm_id + 1;
@@ -435,10 +549,22 @@ nm_mem_assign_id(struct netmap_mem_d *nmd)
 		}
 	} while (scan != netmap_last_mem_d);
 
-	NMA_UNLOCK(&nm_mem);
 	return error;
 }
 
+/* call with NMA_LOCK(&nm_mem) *not* held */
+static int
+nm_mem_assign_id(struct netmap_mem_d *nmd)
+{
+        int ret;
+
+	NMA_LOCK(&nm_mem);
+        ret = nm_mem_assign_id_locked(nmd);
+	NMA_UNLOCK(&nm_mem);
+
+	return ret;
+}
+
 static void
 nm_mem_release_id(struct netmap_mem_d *nmd)
 {
@@ -456,7 +582,7 @@ nm_mem_release_id(struct netmap_mem_d *nmd)
 }
 
 static int
-nm_mem_assign_group(struct netmap_mem_d *nmd, device_t dev)
+nm_mem_assign_group(struct netmap_mem_d *nmd, struct device *dev)
 {
 	int err = 0, id;
 	id = nm_iommu_group_id(dev);
@@ -494,8 +620,13 @@ netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
 		if (offset >= p[i].memtotal)
 			continue;
 		// now lookup the cluster's address
+#ifndef _WIN32
 		pa = vtophys(p[i].lut[offset / p[i]._objsize].vaddr) +
 			offset % p[i]._objsize;
+#else
+		pa = vtophys(p[i].lut[offset / p[i]._objsize].vaddr);
+		pa.QuadPart += offset % p[i]._objsize;
+#endif
 		NMA_UNLOCK(nmd);
 		return pa;
 	}
@@ -508,7 +639,110 @@ netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
 			+ p[NETMAP_RING_POOL].memtotal
 			+ p[NETMAP_BUF_POOL].memtotal);
 	NMA_UNLOCK(nmd);
+#ifndef _WIN32
 	return 0;	// XXX bad address
+#else
+	vm_paddr_t res;
+	res.QuadPart = 0;
+	return res;
+#endif
+}
+
+#ifdef _WIN32
+
+/*
+ * win32_build_virtual_memory_for_userspace
+ *
+ * This function get all the object making part of the pools and maps
+ * a contiguous virtual memory space for the userspace
+ * It works this way
+ * 1 - allocate a Memory Descriptor List wide as the sum
+ *		of the memory needed for the pools
+ * 2 - cycle all the objects in every pool and for every object do
+ *
+ *		2a - cycle all the objects in every pool, get the list
+ *				of the physical address descriptors
+ *		2b - calculate the offset in the array of pages desciptor in the
+ *				main MDL
+ *		2c - copy the descriptors of the object in the main MDL
+ *
+ * 3 - return the resulting MDL that needs to be mapped in userland
+ *
+ * In this way we will have an MDL that describes all the memory for the
+ * objects in a single object
+*/
+
+PMDL
+win32_build_user_vm_map(struct netmap_mem_d* nmd)
+{
+	int i, j;
+	u_int memsize, memflags, ofs = 0;
+	PMDL mainMdl, tempMdl;
+
+	if (netmap_mem_get_info(nmd, &memsize, &memflags, NULL)) {
+		D("memory not finalised yet");
+		return NULL;
+	}
+
+	mainMdl = IoAllocateMdl(NULL, memsize, FALSE, FALSE, NULL);
+	if (mainMdl == NULL) {
+		D("failed to allocate mdl");
+		return NULL;
+	}
+
+	NMA_LOCK(nmd);
+	for (i = 0; i < NETMAP_POOLS_NR; i++) {
+		struct netmap_obj_pool *p = &nmd->pools[i];
+		int clsz = p->_clustsize;
+		int clobjs = p->_clustentries; /* objects per cluster */
+		int mdl_len = sizeof(PFN_NUMBER) * BYTES_TO_PAGES(clsz);
+		PPFN_NUMBER pSrc, pDst;
+
+		/* each pool has a different cluster size so we need to reallocate */
+		tempMdl = IoAllocateMdl(p->lut[0].vaddr, clsz, FALSE, FALSE, NULL);
+		if (tempMdl == NULL) {
+			NMA_UNLOCK(nmd);
+			D("fail to allocate tempMdl");
+			IoFreeMdl(mainMdl);
+			return NULL;
+		}
+		pSrc = MmGetMdlPfnArray(tempMdl);
+		/* create one entry per cluster, the lut[] has one entry per object */
+		for (j = 0; j < p->numclusters; j++, ofs += clsz) {
+			pDst = &MmGetMdlPfnArray(mainMdl)[BYTES_TO_PAGES(ofs)];
+			MmInitializeMdl(tempMdl, p->lut[j*clobjs].vaddr, clsz);
+			MmBuildMdlForNonPagedPool(tempMdl); /* compute physical page addresses */
+			RtlCopyMemory(pDst, pSrc, mdl_len); /* copy the page descriptors */
+			mainMdl->MdlFlags = tempMdl->MdlFlags; /* XXX what is in here ? */
+		}
+		IoFreeMdl(tempMdl);
+	}
+	NMA_UNLOCK(nmd);
+	return mainMdl;
+}
+
+#endif /* _WIN32 */
+
+/*
+ * helper function for OS-specific mmap routines (currently only windows).
+ * Given an nmd and a pool index, returns the cluster size and number of clusters.
+ * Returns 0 if memory is finalised and the pool is valid, otherwise 1.
+ * It should be called under NMA_LOCK(nmd) otherwise the underlying info can change.
+ */
+
+int
+netmap_mem2_get_pool_info(struct netmap_mem_d* nmd, u_int pool, u_int *clustsize, u_int *numclusters)
+{
+	if (!nmd || !clustsize || !numclusters || pool >= NETMAP_POOLS_NR)
+		return 1; /* invalid arguments */
+	// NMA_LOCK_ASSERT(nmd);
+	if (!(nmd->flags & NETMAP_MEM_FINALIZED)) {
+		*clustsize = *numclusters = 0;
+		return 1; /* not ready yet */
+	}
+	*clustsize = nmd->pools[pool]._clustsize;
+	*numclusters = nmd->pools[pool].numclusters;
+	return 0; /* success */
 }
 
 static int
@@ -578,12 +812,6 @@ netmap_obj_offset(struct netmap_obj_pool *p, const void *vaddr)
     ((n)->pools[NETMAP_IF_POOL].memtotal + 			\
 	netmap_obj_offset(&(n)->pools[NETMAP_RING_POOL], (v)))
 
-#define netmap_buf_offset(n, v)					\
-    ((n)->pools[NETMAP_IF_POOL].memtotal +			\
-	(n)->pools[NETMAP_RING_POOL].memtotal +		\
-	netmap_obj_offset(&(n)->pools[NETMAP_BUF_POOL], (v)))
-
-
 static ssize_t
 netmap_mem2_if_offset(struct netmap_mem_d *nmd, const void *addr)
 {
@@ -602,7 +830,7 @@ static void *
 netmap_obj_malloc(struct netmap_obj_pool *p, u_int len, uint32_t *start, uint32_t *index)
 {
 	uint32_t i = 0;			/* index in the bitmap */
-	uint32_t mask, j;		/* slot counter */
+	uint32_t mask, j = 0;		/* slot counter */
 	void *vaddr = NULL;
 
 	if (len > p->_objsize) {
@@ -636,7 +864,7 @@ netmap_obj_malloc(struct netmap_obj_pool *p, u_int len, uint32_t *start, uint32_
 		if (index)
 			*index = i * 32 + j;
 	}
-	ND("%s allocator: allocated object @ [%d][%d]: vaddr %p", i, j, vaddr);
+	ND("%s allocator: allocated object @ [%d][%d]: vaddr %p",p->name, i, j, vaddr);
 
 	if (start)
 		*start = i;
@@ -733,7 +961,7 @@ netmap_extra_alloc(struct netmap_adapter *na, uint32_t *head, uint32_t n)
 			*head = cur; /* restore */
 			break;
 		}
-		RD(5, "allocate buffer %d -> %d", *head, cur);
+		ND(5, "allocate buffer %d -> %d", *head, cur);
 		*p = cur; /* link to previous head */
 	}
 
@@ -750,7 +978,7 @@ netmap_extra_free(struct netmap_adapter *na, uint32_t head)
 	struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
 	uint32_t i, cur, *buf;
 
-	D("freeing the extra list");
+	ND("freeing the extra list");
 	for (i = 0; head >=2 && head < p->objtotal; i++) {
 		cur = head;
 		buf = lut[head].vaddr;
@@ -761,7 +989,8 @@ netmap_extra_free(struct netmap_adapter *na, uint32_t head)
 	}
 	if (head != 0)
 		D("breaking with head %d", head);
-	D("freed %d buffers", i);
+	if (netmap_verbose)
+		D("freed %d buffers", i);
 }
 
 
@@ -846,7 +1075,6 @@ netmap_reset_obj_allocator(struct netmap_obj_pool *p)
 	p->bitmap = NULL;
 	if (p->lut) {
 		u_int i;
-		size_t sz = p->_clustsize;
 
 		/*
 		 * Free each cluster allocated in
@@ -856,7 +1084,7 @@ netmap_reset_obj_allocator(struct netmap_obj_pool *p)
 		 */
 		for (i = 0; i < p->objtotal; i += p->_clustentries) {
 			if (p->lut[i].vaddr)
-				contigfree(p->lut[i].vaddr, sz, M_NETMAP);
+				contigfree(p->lut[i].vaddr, p->_clustsize, M_NETMAP);
 		}
 		bzero(p->lut, sizeof(struct lut_entry) * p->objtotal);
 #ifdef linux
@@ -973,6 +1201,18 @@ netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int obj
 	return 0;
 }
 
+static struct lut_entry *
+nm_alloc_lut(u_int nobj)
+{
+	size_t n = sizeof(struct lut_entry) * nobj;
+	struct lut_entry *lut;
+#ifdef linux
+	lut = vmalloc(n);
+#else
+	lut = malloc(n, M_NETMAP, M_NOWAIT | M_ZERO);
+#endif
+	return lut;
+}
 
 /* call with NMA_LOCK held */
 static int
@@ -985,14 +1225,9 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
 	p->numclusters = p->_numclusters;
 	p->objtotal = p->_objtotal;
 
-	n = sizeof(struct lut_entry) * p->objtotal;
-#ifdef linux
-	p->lut = vmalloc(n);
-#else
-	p->lut = malloc(n, M_NETMAP, M_NOWAIT | M_ZERO);
-#endif
+	p->lut = nm_alloc_lut(p->objtotal);
 	if (p->lut == NULL) {
-		D("Unable to create lookup table (%d bytes) for '%s'", (int)n, p->name);
+		D("Unable to create lookup table for '%s'", p->name);
 		goto clean;
 	}
 
@@ -1015,6 +1250,13 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
 		int lim = i + p->_clustentries;
 		char *clust;
 
+		/*
+		 * XXX Note, we only need contigmalloc() for buffers attached
+		 * to native interfaces. In all other cases (nifp, netmap rings
+		 * and even buffers for VALE ports or emulated interfaces) we
+		 * can live with standard malloc, because the hardware will not
+		 * access the pages directly.
+		 */
 		clust = contigmalloc(n, M_NETMAP, M_NOWAIT | M_ZERO,
 		    (size_t)0, -1UL, PAGE_SIZE, 0);
 		if (clust == NULL) {
@@ -1108,10 +1350,15 @@ netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na)
 	if (na->pdev == NULL)
 		return 0;
 
-#ifdef __FreeBSD__
+#if defined(__FreeBSD__)
 	(void)i;
 	(void)lim;
 	D("unsupported on FreeBSD");
+
+#elif defined(_WIN32)
+	(void)i;
+	(void)lim;
+	D("unsupported on Windows");	//XXX_ale, really?
 #else /* linux */
 	for (i = 2; i < lim; i++) {
 		netmap_unload_map(na, (bus_dma_tag_t) na->pdev, &p->lut[i].paddr);
@@ -1124,8 +1371,10 @@ netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na)
 static int
 netmap_mem_map(struct netmap_obj_pool *p, struct netmap_adapter *na)
 {
-#ifdef __FreeBSD__
+#if defined(__FreeBSD__)
 	D("unsupported on FreeBSD");
+#elif defined(_WIN32)
+	D("unsupported on Windows");	//XXX_ale, really?
 #else /* linux */
 	int i, lim = p->_objtotal;
 
@@ -1142,6 +1391,30 @@ netmap_mem_map(struct netmap_obj_pool *p, struct netmap_adapter *na)
 }
 
 static int
+netmap_mem_init_shared_info(struct netmap_mem_d *nmd)
+{
+	struct netmap_mem_shared_info *nms_info;
+	ssize_t base;
+
+        /* Use the first slot in IF_POOL */
+	nms_info = netmap_if_malloc(nmd, sizeof(*nms_info));
+	if (nms_info == NULL) {
+	    return ENOMEM;
+	}
+
+	base = netmap_if_offset(nmd, nms_info);
+
+        memcpy(&nms_info->up, &nms_if_blueprint, sizeof(nms_if_blueprint));
+	nms_info->buf_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal + nmd->pools[NETMAP_RING_POOL].memtotal;
+	nms_info->buf_pool_objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal;
+	nms_info->buf_pool_objsize = nmd->pools[NETMAP_BUF_POOL]._objsize;
+	nms_info->totalsize = nmd->nm_totalsize;
+	nms_info->features = NMS_FEAT_BUF_POOL | NMS_FEAT_MEMSIZE;
+
+	return 0;
+}
+
+static int
 netmap_mem_finalize_all(struct netmap_mem_d *nmd)
 {
 	int i;
@@ -1160,6 +1433,11 @@ netmap_mem_finalize_all(struct netmap_mem_d *nmd)
 	nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3;
 	nmd->flags |= NETMAP_MEM_FINALIZED;
 
+	/* expose info to the ptnetmap guest */
+	nmd->lasterr = netmap_mem_init_shared_info(nmd);
+	if (nmd->lasterr)
+	        goto error;
+
 	if (netmap_verbose)
 		D("interfaces %d KB, rings %d KB, buffers %d MB",
 		    nmd->pools[NETMAP_IF_POOL].memtotal >> 10,
@@ -1207,10 +1485,9 @@ static int
 netmap_mem_private_finalize(struct netmap_mem_d *nmd)
 {
 	int err;
-	NMA_LOCK(nmd);
-	nmd->active++;
 	err = netmap_mem_finalize_all(nmd);
-	NMA_UNLOCK(nmd);
+	if (!err)
+		nmd->active++;
 	return err;
 
 }
@@ -1218,10 +1495,8 @@ netmap_mem_private_finalize(struct netmap_mem_d *nmd)
 static void
 netmap_mem_private_deref(struct netmap_mem_d *nmd)
 {
-	NMA_LOCK(nmd);
 	if (--nmd->active <= 0)
 		netmap_mem_reset_all(nmd);
-	NMA_UNLOCK(nmd);
 }
 
 
@@ -1238,7 +1513,7 @@ netmap_mem_private_new(const char *name, u_int txr, u_int txd,
 	u_int v, maxd;
 
 	d = malloc(sizeof(struct netmap_mem_d),
-			M_DEVBUF, M_NOWAIT | M_ZERO);
+		   M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (d == NULL) {
 		err = ENOMEM;
 		goto error;
@@ -1357,10 +1632,10 @@ static int
 netmap_mem_global_finalize(struct netmap_mem_d *nmd)
 {
 	int err;
-		
+
 	/* update configuration if changed */
 	if (netmap_mem_global_config(nmd))
-		goto out;
+		return nmd->lasterr;
 
 	nmd->active++;
 
@@ -1417,13 +1692,17 @@ netmap_free_rings(struct netmap_adapter *na)
 
 	for_rx_tx(t) {
 		u_int i;
-		for (i = 0; i < netmap_real_rings(na, t); i++) {
+		for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
 			struct netmap_kring *kring = &NMR(na, t)[i];
 			struct netmap_ring *ring = kring->ring;
 
-			if (ring == NULL)
+			if (ring == NULL || kring->users > 0 || (kring->nr_kflags & NKR_NEEDRING)) {
+				ND("skipping ring %s (ring %p, users %d)",
+						kring->name, ring, kring->users);
 				continue;
-			netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
+			}
+			if (i != nma_get_nrings(na, t) || na->na_flags & NAF_HOST_RINGS)
+				netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
 			netmap_ring_free(na->nm_mem, ring);
 			kring->ring = NULL;
 		}
@@ -1452,9 +1731,10 @@ netmap_mem2_rings_create(struct netmap_adapter *na)
 			struct netmap_ring *ring = kring->ring;
 			u_int len, ndesc;
 
-			if (ring) {
-				ND("%s already created", kring->name);
-				continue; /* already created by somebody else */
+			if (ring || (!kring->users && !(kring->nr_kflags & NKR_NEEDRING))) {
+				/* uneeded, or already created by somebody else */
+				ND("skipping ring %s", kring->name);
+				continue;
 			}
 			ndesc = kring->nkr_num_slots;
 			len = sizeof(struct netmap_ring) +
@@ -1569,10 +1849,22 @@ netmap_mem2_if_new(struct netmap_adapter *na)
 	 */
 	base = netmap_if_offset(na->nm_mem, nifp);
 	for (i = 0; i < n[NR_TX]; i++) {
+		if (na->tx_rings[i].ring == NULL) {
+			// XXX maybe use the offset of an error ring,
+			// like we do for buffers?
+			*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] = 0;
+			continue;
+		}
 		*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] =
 			netmap_ring_offset(na->nm_mem, na->tx_rings[i].ring) - base;
 	}
 	for (i = 0; i < n[NR_RX]; i++) {
+		if (na->rx_rings[i].ring == NULL) {
+			// XXX maybe use the offset of an error ring,
+			// like we do for buffers?
+			*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] = 0;
+			continue;
+		}
 		*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] =
 			netmap_ring_offset(na->nm_mem, na->rx_rings[i].ring) - base;
 	}
@@ -1636,3 +1928,531 @@ struct netmap_mem_ops netmap_mem_private_ops = {
 	.nmd_rings_create = netmap_mem2_rings_create,
 	.nmd_rings_delete = netmap_mem2_rings_delete
 };
+
+#ifdef WITH_PTNETMAP_GUEST
+struct mem_pt_if {
+	struct mem_pt_if *next;
+	struct ifnet *ifp;
+	unsigned int nifp_offset;
+	nm_pt_guest_ptctl_t ptctl;
+};
+
+/* Netmap allocator for ptnetmap guests. */
+struct netmap_mem_ptg {
+	struct netmap_mem_d up;
+
+	vm_paddr_t nm_paddr;            /* physical address in the guest */
+	void *nm_addr;                  /* virtual address in the guest */
+	struct netmap_lut buf_lut;      /* lookup table for BUF pool in the guest */
+	nm_memid_t nm_host_id;          /* allocator identifier in the host */
+	struct ptnetmap_memdev *ptn_dev;
+	struct mem_pt_if *pt_ifs;	/* list of interfaces in passthrough */
+};
+
+/* Link a passthrough interface to a passthrough netmap allocator. */
+static int
+netmap_mem_pt_guest_ifp_add(struct netmap_mem_d *nmd, struct ifnet *ifp,
+			    unsigned int nifp_offset,
+			    nm_pt_guest_ptctl_t ptctl)
+{
+	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
+	struct mem_pt_if *ptif = malloc(sizeof(*ptif), M_NETMAP,
+					M_NOWAIT | M_ZERO);
+
+	if (!ptif) {
+		return ENOMEM;
+	}
+
+	NMA_LOCK(nmd);
+
+	ptif->ifp = ifp;
+	ptif->nifp_offset = nifp_offset;
+	ptif->ptctl = ptctl;
+
+	if (ptnmd->pt_ifs) {
+		ptif->next = ptnmd->pt_ifs;
+	}
+	ptnmd->pt_ifs = ptif;
+
+	NMA_UNLOCK(nmd);
+
+	D("added (ifp=%p,nifp_offset=%u)", ptif->ifp, ptif->nifp_offset);
+
+	return 0;
+}
+
+/* Called with NMA_LOCK(nmd) held. */
+static struct mem_pt_if *
+netmap_mem_pt_guest_ifp_lookup(struct netmap_mem_d *nmd, struct ifnet *ifp)
+{
+	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
+	struct mem_pt_if *curr;
+
+	for (curr = ptnmd->pt_ifs; curr; curr = curr->next) {
+		if (curr->ifp == ifp) {
+			return curr;
+		}
+	}
+
+	return NULL;
+}
+
+/* Unlink a passthrough interface from a passthrough netmap allocator. */
+int
+netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *nmd, struct ifnet *ifp)
+{
+	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
+	struct mem_pt_if *prev = NULL;
+	struct mem_pt_if *curr;
+	int ret = -1;
+
+	NMA_LOCK(nmd);
+
+	for (curr = ptnmd->pt_ifs; curr; curr = curr->next) {
+		if (curr->ifp == ifp) {
+			if (prev) {
+				prev->next = curr->next;
+			} else {
+				ptnmd->pt_ifs = curr->next;
+			}
+			D("removed (ifp=%p,nifp_offset=%u)",
+			  curr->ifp, curr->nifp_offset);
+			free(curr, M_NETMAP);
+			ret = 0;
+			break;
+		}
+		prev = curr;
+	}
+
+	NMA_UNLOCK(nmd);
+
+	return ret;
+}
+
+/* Read allocator info from the first netmap_if (only on finalize) */
+static int
+netmap_mem_pt_guest_read_shared_info(struct netmap_mem_d *nmd)
+{
+	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
+	struct netmap_mem_shared_info *nms_info;
+	uint32_t bufsize;
+	uint32_t nbuffers;
+	char *vaddr;
+	vm_paddr_t paddr;
+	int i;
+
+        nms_info = (struct netmap_mem_shared_info *)ptnmd->nm_addr;
+        if (strncmp(nms_info->up.ni_name, NMS_NAME, sizeof(NMS_NAME)) != 0) {
+            D("error, the first slot does not contain shared info");
+            return EINVAL;
+        }
+        /* check features mem_shared info */
+        if ((nms_info->features & (NMS_FEAT_BUF_POOL | NMS_FEAT_MEMSIZE)) !=
+               (NMS_FEAT_BUF_POOL | NMS_FEAT_MEMSIZE)) {
+            D("error, the shared info does not contain BUF_POOL and MEMSIZE");
+            return EINVAL;
+        }
+
+        bufsize = nms_info->buf_pool_objsize;
+        nbuffers = nms_info->buf_pool_objtotal;
+
+	/* allocate the lut */
+	if (ptnmd->buf_lut.lut == NULL) {
+		D("allocating lut");
+		ptnmd->buf_lut.lut = nm_alloc_lut(nbuffers);
+		if (ptnmd->buf_lut.lut == NULL) {
+			D("lut allocation failed");
+			return ENOMEM;
+		}
+	}
+
+	/* we have physically contiguous memory mapped through PCI BAR */
+        vaddr = (char *)(ptnmd->nm_addr) + nms_info->buf_pool_offset;
+	paddr = ptnmd->nm_paddr + nms_info->buf_pool_offset;
+
+	for (i = 0; i < nbuffers; i++) {
+		ptnmd->buf_lut.lut[i].vaddr = vaddr;
+		ptnmd->buf_lut.lut[i].paddr = paddr;
+		vaddr += bufsize;
+		paddr += bufsize;
+	}
+
+	ptnmd->buf_lut.objtotal = nbuffers;
+	ptnmd->buf_lut.objsize = bufsize;
+
+        nmd->nm_totalsize = nms_info->totalsize;
+
+        return 0;
+}
+
+static int
+netmap_mem_pt_guest_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut)
+{
+	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
+
+	if (!(nmd->flags & NETMAP_MEM_FINALIZED)) {
+		return EINVAL;
+	}
+
+	*lut = ptnmd->buf_lut;
+	return 0;
+}
+
+static int
+netmap_mem_pt_guest_get_info(struct netmap_mem_d *nmd, u_int *size,
+			     u_int *memflags, uint16_t *id)
+{
+	int error = 0;
+
+	NMA_LOCK(nmd);
+
+	error = nmd->ops->nmd_config(nmd);
+	if (error)
+		goto out;
+
+	if (size)
+		*size = nmd->nm_totalsize;
+	if (memflags)
+		*memflags = nmd->flags;
+	if (id)
+		*id = nmd->nm_id;
+
+out:
+	NMA_UNLOCK(nmd);
+
+	return error;
+}
+
+static vm_paddr_t
+netmap_mem_pt_guest_ofstophys(struct netmap_mem_d *nmd, vm_ooffset_t off)
+{
+	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
+	vm_paddr_t paddr;
+	/* if the offset is valid, just return csb->base_addr + off */
+	paddr = (vm_paddr_t)(ptnmd->nm_paddr + off);
+	ND("off %lx padr %lx", off, (unsigned long)paddr);
+	return paddr;
+}
+
+static int
+netmap_mem_pt_guest_config(struct netmap_mem_d *nmd)
+{
+	/* nothing to do, we are configured on creation
+	 * and configuration never changes thereafter
+	 */
+	return 0;
+}
+
+static int
+netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd)
+{
+	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
+	int error = 0;
+
+	nmd->active++;
+
+	if (nmd->flags & NETMAP_MEM_FINALIZED)
+		goto out;
+
+	if (ptnmd->ptn_dev == NULL) {
+		D("ptnetmap memdev not attached");
+		error = ENOMEM;
+		goto err;
+	}
+	/* map memory through ptnetmap-memdev BAR */
+	error = nm_os_pt_memdev_iomap(ptnmd->ptn_dev, &ptnmd->nm_paddr,
+				      &ptnmd->nm_addr);
+	if (error)
+		goto err;
+
+        /* read allcator info and create lut */
+	error = netmap_mem_pt_guest_read_shared_info(nmd);
+	if (error)
+		goto err;
+
+	nmd->flags |= NETMAP_MEM_FINALIZED;
+out:
+	return 0;
+err:
+	nmd->active--;
+	return error;
+}
+
+static void
+netmap_mem_pt_guest_deref(struct netmap_mem_d *nmd)
+{
+	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
+
+	nmd->active--;
+	if (nmd->active <= 0 &&
+		(nmd->flags & NETMAP_MEM_FINALIZED)) {
+	    nmd->flags  &= ~NETMAP_MEM_FINALIZED;
+	    /* unmap ptnetmap-memdev memory */
+	    if (ptnmd->ptn_dev) {
+		nm_os_pt_memdev_iounmap(ptnmd->ptn_dev);
+	    }
+	    ptnmd->nm_addr = 0;
+	    ptnmd->nm_paddr = 0;
+	}
+}
+
+static ssize_t
+netmap_mem_pt_guest_if_offset(struct netmap_mem_d *nmd, const void *vaddr)
+{
+	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
+
+	return (const char *)(vaddr) - (char *)(ptnmd->nm_addr);
+}
+
+static void
+netmap_mem_pt_guest_delete(struct netmap_mem_d *nmd)
+{
+	if (nmd == NULL)
+		return;
+	if (netmap_verbose)
+		D("deleting %p", nmd);
+	if (nmd->active > 0)
+		D("bug: deleting mem allocator with active=%d!", nmd->active);
+	nm_mem_release_id(nmd);
+	if (netmap_verbose)
+		D("done deleting %p", nmd);
+	NMA_LOCK_DESTROY(nmd);
+	free(nmd, M_DEVBUF);
+}
+
+static struct netmap_if *
+netmap_mem_pt_guest_if_new(struct netmap_adapter *na)
+{
+	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem;
+	struct mem_pt_if *ptif;
+	struct netmap_if *nifp = NULL;
+
+	NMA_LOCK(na->nm_mem);
+
+	ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp);
+	if (ptif == NULL) {
+		D("Error: interface %p is not in passthrough", na->ifp);
+		goto out;
+	}
+
+	nifp = (struct netmap_if *)((char *)(ptnmd->nm_addr) +
+				    ptif->nifp_offset);
+	NMA_UNLOCK(na->nm_mem);
+out:
+	return nifp;
+}
+
+static void
+netmap_mem_pt_guest_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
+{
+	struct mem_pt_if *ptif;
+
+	NMA_LOCK(na->nm_mem);
+
+	ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp);
+	if (ptif == NULL) {
+		D("Error: interface %p is not in passthrough", na->ifp);
+		goto out;
+	}
+
+	ptif->ptctl(na->ifp, PTNETMAP_PTCTL_IFDELETE);
+out:
+	NMA_UNLOCK(na->nm_mem);
+}
+
+static int
+netmap_mem_pt_guest_rings_create(struct netmap_adapter *na)
+{
+	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem;
+	struct mem_pt_if *ptif;
+	struct netmap_if *nifp;
+	int i, error = -1;
+
+	NMA_LOCK(na->nm_mem);
+
+	ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp);
+	if (ptif == NULL) {
+		D("Error: interface %p is not in passthrough", na->ifp);
+		goto out;
+	}
+
+
+	/* point each kring to the corresponding backend ring */
+	nifp = (struct netmap_if *)((char *)ptnmd->nm_addr + ptif->nifp_offset);
+	for (i = 0; i <= na->num_tx_rings; i++) {
+		struct netmap_kring *kring = na->tx_rings + i;
+		if (kring->ring)
+			continue;
+		kring->ring = (struct netmap_ring *)
+			((char *)nifp + nifp->ring_ofs[i]);
+	}
+	for (i = 0; i <= na->num_rx_rings; i++) {
+		struct netmap_kring *kring = na->rx_rings + i;
+		if (kring->ring)
+			continue;
+		kring->ring = (struct netmap_ring *)
+			((char *)nifp +
+			 nifp->ring_ofs[i + na->num_tx_rings + 1]);
+	}
+
+	//error = ptif->ptctl->nm_ptctl(ifp, PTNETMAP_PTCTL_RINGSCREATE);
+	error = 0;
+out:
+	NMA_UNLOCK(na->nm_mem);
+
+	return error;
+}
+
+static void
+netmap_mem_pt_guest_rings_delete(struct netmap_adapter *na)
+{
+	/* TODO: remove?? */
+#if 0
+	struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem;
+	struct mem_pt_if *ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem,
+								na->ifp);
+#endif
+}
+
+static struct netmap_mem_ops netmap_mem_pt_guest_ops = {
+	.nmd_get_lut = netmap_mem_pt_guest_get_lut,
+	.nmd_get_info = netmap_mem_pt_guest_get_info,
+	.nmd_ofstophys = netmap_mem_pt_guest_ofstophys,
+	.nmd_config = netmap_mem_pt_guest_config,
+	.nmd_finalize = netmap_mem_pt_guest_finalize,
+	.nmd_deref = netmap_mem_pt_guest_deref,
+	.nmd_if_offset = netmap_mem_pt_guest_if_offset,
+	.nmd_delete = netmap_mem_pt_guest_delete,
+	.nmd_if_new = netmap_mem_pt_guest_if_new,
+	.nmd_if_delete = netmap_mem_pt_guest_if_delete,
+	.nmd_rings_create = netmap_mem_pt_guest_rings_create,
+	.nmd_rings_delete = netmap_mem_pt_guest_rings_delete
+};
+
+/* Called with NMA_LOCK(&nm_mem) held. */
+static struct netmap_mem_d *
+netmap_mem_pt_guest_find_hostid(nm_memid_t host_id)
+{
+	struct netmap_mem_d *mem = NULL;
+	struct netmap_mem_d *scan = netmap_last_mem_d;
+
+	do {
+		/* find ptnetmap allocator through host ID */
+		if (scan->ops->nmd_deref == netmap_mem_pt_guest_deref &&
+			((struct netmap_mem_ptg *)(scan))->nm_host_id == host_id) {
+			mem = scan;
+			break;
+		}
+		scan = scan->next;
+	} while (scan != netmap_last_mem_d);
+
+	return mem;
+}
+
+/* Called with NMA_LOCK(&nm_mem) held. */
+static struct netmap_mem_d *
+netmap_mem_pt_guest_create(nm_memid_t host_id)
+{
+	struct netmap_mem_ptg *ptnmd;
+	int err = 0;
+
+	ptnmd = malloc(sizeof(struct netmap_mem_ptg),
+			M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (ptnmd == NULL) {
+		err = ENOMEM;
+		goto error;
+	}
+
+	ptnmd->up.ops = &netmap_mem_pt_guest_ops;
+	ptnmd->nm_host_id = host_id;
+	ptnmd->pt_ifs = NULL;
+
+        /* Assign new id in the guest (We have the lock) */
+	err = nm_mem_assign_id_locked(&ptnmd->up);
+	if (err)
+		goto error;
+
+	ptnmd->up.flags &= ~NETMAP_MEM_FINALIZED;
+	ptnmd->up.flags |= NETMAP_MEM_IO;
+
+	NMA_LOCK_INIT(&ptnmd->up);
+
+	return &ptnmd->up;
+error:
+	netmap_mem_pt_guest_delete(&ptnmd->up);
+	return NULL;
+}
+
+/*
+ * find host id in guest allocators and create guest allocator
+ * if it is not there
+ */
+static struct netmap_mem_d *
+netmap_mem_pt_guest_get(nm_memid_t host_id)
+{
+	struct netmap_mem_d *nmd;
+
+	NMA_LOCK(&nm_mem);
+	nmd = netmap_mem_pt_guest_find_hostid(host_id);
+	if (nmd == NULL) {
+		nmd = netmap_mem_pt_guest_create(host_id);
+	}
+	NMA_UNLOCK(&nm_mem);
+
+	return nmd;
+}
+
+/*
+ * The guest allocator can be created by ptnetmap_memdev (during the device
+ * attach) or by ptnetmap device (e1000/virtio), during the netmap_attach.
+ *
+ * The order is not important (we have different order in LINUX and FreeBSD).
+ * The first one, creates the device, and the second one simply attaches it.
+ */
+
+/* Called when ptnetmap_memdev is attaching, to attach a new allocator in
+ * the guest */
+struct netmap_mem_d *
+netmap_mem_pt_guest_attach(struct ptnetmap_memdev *ptn_dev, nm_memid_t host_id)
+{
+	struct netmap_mem_d *nmd;
+	struct netmap_mem_ptg *ptnmd;
+
+	nmd = netmap_mem_pt_guest_get(host_id);
+
+	/* assign this device to the guest allocator */
+	if (nmd) {
+		ptnmd = (struct netmap_mem_ptg *)nmd;
+		ptnmd->ptn_dev = ptn_dev;
+	}
+
+	return nmd;
+}
+
+/* Called when ptnetmap device (virtio/e1000) is attaching */
+struct netmap_mem_d *
+netmap_mem_pt_guest_new(struct ifnet *ifp,
+			unsigned int nifp_offset,
+			nm_pt_guest_ptctl_t ptctl)
+{
+	struct netmap_mem_d *nmd;
+	nm_memid_t host_id;
+
+	if (ifp == NULL || ptctl == NULL) {
+		return NULL;
+	}
+
+	/* Get the host id allocator. */
+	host_id = ptctl(ifp, PTNETMAP_PTCTL_HOSTMEMID);
+
+	nmd = netmap_mem_pt_guest_get(host_id);
+
+	if (nmd) {
+		netmap_mem_pt_guest_ifp_add(nmd, ifp, nifp_offset,
+					    ptctl);
+	}
+
+	return nmd;
+}
+
+#endif /* WITH_PTNETMAP_GUEST */
diff --git a/sys/dev/netmap/netmap_mem2.h b/sys/dev/netmap/netmap_mem2.h
index ef0ff96d8e7f..7f4c5e9e9624 100644
--- a/sys/dev/netmap/netmap_mem2.h
+++ b/sys/dev/netmap/netmap_mem2.h
@@ -1,5 +1,8 @@
 /*
- * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved.
+ * Copyright (C) 2012-2014 Matteo Landi
+ * Copyright (C) 2012-2016 Luigi Rizzo
+ * Copyright (C) 2012-2016 Giuseppe Lettieri
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -117,8 +120,11 @@
 
 extern struct netmap_mem_d nm_mem;
 
-void	   netmap_mem_get_lut(struct netmap_mem_d *, struct netmap_lut *);
+int	   netmap_mem_get_lut(struct netmap_mem_d *, struct netmap_lut *);
 vm_paddr_t netmap_mem_ofstophys(struct netmap_mem_d *, vm_ooffset_t);
+#ifdef _WIN32
+PMDL win32_build_user_vm_map(struct netmap_mem_d* nmd);
+#endif
 int	   netmap_mem_finalize(struct netmap_mem_d *, struct netmap_adapter *);
 int 	   netmap_mem_init(void);
 void 	   netmap_mem_fini(void);
@@ -127,6 +133,7 @@ void 	   netmap_mem_if_delete(struct netmap_adapter *, struct netmap_if *);
 int	   netmap_mem_rings_create(struct netmap_adapter *);
 void	   netmap_mem_rings_delete(struct netmap_adapter *);
 void 	   netmap_mem_deref(struct netmap_mem_d *, struct netmap_adapter *);
+int	netmap_mem2_get_pool_info(struct netmap_mem_d *, u_int, u_int *, u_int *);
 int	   netmap_mem_get_info(struct netmap_mem_d *, u_int *size, u_int *memflags, uint16_t *id);
 ssize_t    netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr);
 struct netmap_mem_d* netmap_mem_private_new(const char *name,
@@ -157,6 +164,15 @@ void netmap_mem_put(struct netmap_mem_d *);
 
 #endif /* !NM_DEBUG_PUTGET */
 
+#ifdef WITH_PTNETMAP_GUEST
+struct netmap_mem_d* netmap_mem_pt_guest_new(struct ifnet *,
+					     unsigned int nifp_offset,
+					     nm_pt_guest_ptctl_t);
+struct ptnetmap_memdev;
+struct netmap_mem_d* netmap_mem_pt_guest_attach(struct ptnetmap_memdev *, uint16_t);
+int netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *, struct ifnet *);
+#endif /* WITH_PTNETMAP_GUEST */
+
 #define NETMAP_MEM_PRIVATE	0x2	/* allocator uses private address space */
 #define NETMAP_MEM_IO		0x4	/* the underlying memory is mmapped I/O */
 
diff --git a/sys/dev/netmap/netmap_monitor.c b/sys/dev/netmap/netmap_monitor.c
index c303952417ff..5b4f9cdf61c0 100644
--- a/sys/dev/netmap/netmap_monitor.c
+++ b/sys/dev/netmap/netmap_monitor.c
@@ -1,5 +1,6 @@
 /*
- * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
+ * Copyright (C) 2014-2016 Giuseppe Lettieri
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -101,6 +102,8 @@
 #warning OSX support is only partial
 #include "osx_glue.h"
 
+#elif defined(_WIN32)
+#include "win_glue.h"
 #else
 
 #error	Unsupported platform
@@ -151,13 +154,17 @@ netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
 }
 
 /* nm_krings_create callbacks for monitors.
- * We could use the default netmap_hw_krings_zmon, but
- * we don't need the mbq.
  */
 static int
 netmap_monitor_krings_create(struct netmap_adapter *na)
 {
-	return netmap_krings_create(na, 0);
+	int error = netmap_krings_create(na, 0);
+	if (error)
+		return error;
+	/* override the host rings callbacks */
+	na->tx_rings[na->num_tx_rings].nm_sync = netmap_monitor_txsync;
+	na->rx_rings[na->num_rx_rings].nm_sync = netmap_monitor_rxsync;
+	return 0;
 }
 
 /* nm_krings_delete callback for monitors */
@@ -186,7 +193,11 @@ nm_monitor_alloc(struct netmap_kring *kring, u_int n)
 		return 0;
 	
         len = sizeof(struct netmap_kring *) * n;
+#ifndef _WIN32
 	nm = realloc(kring->monitors, len, M_DEVBUF, M_NOWAIT | M_ZERO);
+#else
+	nm = realloc(kring->monitors, len, sizeof(struct netmap_kring *)*kring->max_monitors);
+#endif
 	if (nm == NULL)
 		return ENOMEM;
 
@@ -229,10 +240,10 @@ static int netmap_monitor_parent_notify(struct netmap_kring *, int);
 static int
 netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int zcopy)
 {
-	int error = 0;
+	int error = NM_IRQ_COMPLETED;
 
 	/* sinchronize with concurrently running nm_sync()s */
-	nm_kr_get(kring);
+	nm_kr_stop(kring, NM_KR_LOCKED);
 	/* make sure the monitor array exists and is big enough */
 	error = nm_monitor_alloc(kring, kring->n_monitors + 1);
 	if (error)
@@ -242,7 +253,7 @@ netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int
 	kring->n_monitors++;
 	if (kring->n_monitors == 1) {
 		/* this is the first monitor, intercept callbacks */
-		D("%s: intercept callbacks on %s", mkring->name, kring->name);
+		ND("%s: intercept callbacks on %s", mkring->name, kring->name);
 		kring->mon_sync = kring->nm_sync;
 		/* zcopy monitors do not override nm_notify(), but
 		 * we save the original one regardless, so that
@@ -265,7 +276,7 @@ netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int
 	}
 
 out:
-	nm_kr_put(kring);
+	nm_kr_start(kring);
 	return error;
 }
 
@@ -277,7 +288,7 @@ static void
 netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring)
 {
 	/* sinchronize with concurrently running nm_sync()s */
-	nm_kr_get(kring);
+	nm_kr_stop(kring, NM_KR_LOCKED);
 	kring->n_monitors--;
 	if (mkring->mon_pos != kring->n_monitors) {
 		kring->monitors[mkring->mon_pos] = kring->monitors[kring->n_monitors];
@@ -286,18 +297,18 @@ netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring)
 	kring->monitors[kring->n_monitors] = NULL;
 	if (kring->n_monitors == 0) {
 		/* this was the last monitor, restore callbacks  and delete monitor array */
-		D("%s: restoring sync on %s: %p", mkring->name, kring->name, kring->mon_sync);
+		ND("%s: restoring sync on %s: %p", mkring->name, kring->name, kring->mon_sync);
 		kring->nm_sync = kring->mon_sync;
 		kring->mon_sync = NULL;
 		if (kring->tx == NR_RX) {
-			D("%s: restoring notify on %s: %p", 
+			ND("%s: restoring notify on %s: %p", 
 					mkring->name, kring->name, kring->mon_notify);
 			kring->nm_notify = kring->mon_notify;
 			kring->mon_notify = NULL;
 		}
 		nm_monitor_dealloc(kring);
 	}
-	nm_kr_put(kring);
+	nm_kr_start(kring);
 }
 
 
@@ -316,7 +327,7 @@ netmap_monitor_stop(struct netmap_adapter *na)
 	for_rx_tx(t) {
 		u_int i;
 
-		for (i = 0; i < nma_get_nrings(na, t); i++) {
+		for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
 			struct netmap_kring *kring = &NMR(na, t)[i];
 			u_int j;
 
@@ -360,23 +371,32 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
 				for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
 					kring = &NMR(pna, t)[i];
 					mkring = &na->rx_rings[i];
-					netmap_monitor_add(mkring, kring, zmon);
+					if (nm_kring_pending_on(mkring)) {
+						netmap_monitor_add(mkring, kring, zmon);
+						mkring->nr_mode = NKR_NETMAP_ON;
+					}
 				}
 			}
 		}
 		na->na_flags |= NAF_NETMAP_ON;
 	} else {
-		if (pna == NULL) {
-			D("%s: parent left netmap mode, nothing to restore", na->name);
-			return 0;
-		}
-		na->na_flags &= ~NAF_NETMAP_ON;
+		if (na->active_fds == 0)
+			na->na_flags &= ~NAF_NETMAP_ON;
 		for_rx_tx(t) {
 			if (mna->flags & nm_txrx2flag(t)) {
 				for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
-					kring = &NMR(pna, t)[i];
 					mkring = &na->rx_rings[i];
-					netmap_monitor_del(mkring, kring);
+					if (nm_kring_pending_off(mkring)) {
+						mkring->nr_mode = NKR_NETMAP_OFF;
+						/* we cannot access the parent krings if the parent
+						 * has left netmap mode. This is signaled by a NULL
+						 * pna pointer
+						 */
+						if (pna) {
+							kring = &NMR(pna, t)[i];
+							netmap_monitor_del(mkring, kring);
+						}
+					}
 				}
 			}
 		}
@@ -652,17 +672,27 @@ netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags)
 static int
 netmap_monitor_parent_notify(struct netmap_kring *kring, int flags)
 {
+	int (*notify)(struct netmap_kring*, int);
 	ND(5, "%s %x", kring->name, flags);
 	/* ?xsync callbacks have tryget called by their callers
 	 * (NIOCREGIF and poll()), but here we have to call it
 	 * by ourself
 	 */
-	if (nm_kr_tryget(kring))
-		goto out;
-	netmap_monitor_parent_rxsync(kring, NAF_FORCE_READ);
+	if (nm_kr_tryget(kring, 0, NULL)) {
+		/* in all cases, just skip the sync */
+		return NM_IRQ_COMPLETED;
+	}
+	if (kring->n_monitors > 0) {
+		netmap_monitor_parent_rxsync(kring, NAF_FORCE_READ);
+		notify = kring->mon_notify;
+	} else {
+		/* we are no longer monitoring this ring, so both
+		 * mon_sync and mon_notify are NULL
+		 */
+		notify = kring->nm_notify;
+	}
 	nm_kr_put(kring);
-out:
-        return kring->mon_notify(kring, flags);
+        return notify(kring, flags);
 }
 
 
@@ -691,18 +721,25 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
 	struct nmreq pnmr;
 	struct netmap_adapter *pna; /* parent adapter */
 	struct netmap_monitor_adapter *mna;
+	struct ifnet *ifp = NULL;
 	int i, error;
 	enum txrx t;
 	int zcopy = (nmr->nr_flags & NR_ZCOPY_MON);
 	char monsuff[10] = "";
 
 	if ((nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) {
+		if (nmr->nr_flags & NR_ZCOPY_MON) {
+			/* the flag makes no sense unless you are 
+			 * creating a monitor
+			 */
+			return EINVAL;
+		}
 		ND("not a monitor");
 		return 0;
 	}
 	/* this is a request for a monitor adapter */
 
-	D("flags %x", nmr->nr_flags);
+	ND("flags %x", nmr->nr_flags);
 
 	mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (mna == NULL) {
@@ -716,13 +753,14 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
 	 * except other monitors.
 	 */
 	memcpy(&pnmr, nmr, sizeof(pnmr));
-	pnmr.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX);
-	error = netmap_get_na(&pnmr, &pna, create);
+	pnmr.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON);
+	error = netmap_get_na(&pnmr, &pna, &ifp, create);
 	if (error) {
 		D("parent lookup failed: %d", error);
+		free(mna, M_DEVBUF);
 		return error;
 	}
-	D("found parent: %s", pna->name);
+	ND("found parent: %s", pna->name);
 
 	if (!nm_netmap_on(pna)) {
 		/* parent not in netmap mode */
@@ -829,19 +867,17 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
 	*na = &mna->up;
 	netmap_adapter_get(*na);
 
-	/* write the configuration back */
-	nmr->nr_tx_rings = mna->up.num_tx_rings;
-	nmr->nr_rx_rings = mna->up.num_rx_rings;
-	nmr->nr_tx_slots = mna->up.num_tx_desc;
-	nmr->nr_rx_slots = mna->up.num_rx_desc;
-
 	/* keep the reference to the parent */
-	D("monitor ok");
+	ND("monitor ok");
+
+	/* drop the reference to the ifp, if any */
+	if (ifp)
+		if_rele(ifp);
 
 	return 0;
 
 put_out:
-	netmap_adapter_put(pna);
+	netmap_unget_na(pna, ifp);
 	free(mna, M_DEVBUF);
 	return error;
 }
diff --git a/sys/dev/netmap/netmap_offloadings.c b/sys/dev/netmap/netmap_offloadings.c
index dadc1dcbc14c..f8da672ffa53 100644
--- a/sys/dev/netmap/netmap_offloadings.c
+++ b/sys/dev/netmap/netmap_offloadings.c
@@ -1,5 +1,6 @@
 /*
- * Copyright (C) 2014 Vincenzo Maffione. All rights reserved.
+ * Copyright (C) 2014-2015 Vincenzo Maffione
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -31,9 +32,9 @@
 #include <sys/types.h>
 #include <sys/errno.h>
 #include <sys/param.h>	/* defines used in kernel.h */
-#include <sys/malloc.h>	/* types used in module initialization */
 #include <sys/kernel.h>	/* types used in module initialization */
 #include <sys/sockio.h>
+#include <sys/malloc.h>
 #include <sys/socketvar.h>	/* struct socket */
 #include <sys/socket.h> /* sockaddrs */
 #include <net/if.h>
@@ -64,21 +65,21 @@
 /* This routine is called by bdg_mismatch_datapath() when it finishes
  * accumulating bytes for a segment, in order to fix some fields in the
  * segment headers (which still contain the same content as the header
- * of the original GSO packet). 'buf' points to the beginning (e.g.
- * the ethernet header) of the segment, and 'len' is its length.
+ * of the original GSO packet). 'pkt' points to the beginning of the IP
+ * header of the segment, while 'len' is the length of the IP packet.
  */
-static void gso_fix_segment(uint8_t *buf, size_t len, u_int idx,
-			    u_int segmented_bytes, u_int last_segment,
-			    u_int tcp, u_int iphlen)
+static void
+gso_fix_segment(uint8_t *pkt, size_t len, u_int ipv4, u_int iphlen, u_int tcp,
+		u_int idx, u_int segmented_bytes, u_int last_segment)
 {
-	struct nm_iphdr *iph = (struct nm_iphdr *)(buf + 14);
-	struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(buf + 14);
+	struct nm_iphdr *iph = (struct nm_iphdr *)(pkt);
+	struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(pkt);
 	uint16_t *check = NULL;
 	uint8_t *check_data = NULL;
 
-	if (iphlen == 20) {
+	if (ipv4) {
 		/* Set the IPv4 "Total Length" field. */
-		iph->tot_len = htobe16(len-14);
+		iph->tot_len = htobe16(len);
 		ND("ip total length %u", be16toh(ip->tot_len));
 
 		/* Set the IPv4 "Identification" field. */
@@ -87,15 +88,15 @@ static void gso_fix_segment(uint8_t *buf, size_t len, u_int idx,
 
 		/* Compute and insert the IPv4 header checksum. */
 		iph->check = 0;
-		iph->check = nm_csum_ipv4(iph);
+		iph->check = nm_os_csum_ipv4(iph);
 		ND("IP csum %x", be16toh(iph->check));
-	} else {/* if (iphlen == 40) */
+	} else {
 		/* Set the IPv6 "Payload Len" field. */
-		ip6h->payload_len = htobe16(len-14-iphlen);
+		ip6h->payload_len = htobe16(len-iphlen);
 	}
 
 	if (tcp) {
-		struct nm_tcphdr *tcph = (struct nm_tcphdr *)(buf + 14 + iphlen);
+		struct nm_tcphdr *tcph = (struct nm_tcphdr *)(pkt + iphlen);
 
 		/* Set the TCP sequence number. */
 		tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
@@ -110,10 +111,10 @@ static void gso_fix_segment(uint8_t *buf, size_t len, u_int idx,
 		check = &tcph->check;
 		check_data = (uint8_t *)tcph;
 	} else { /* UDP */
-		struct nm_udphdr *udph = (struct nm_udphdr *)(buf + 14 + iphlen);
+		struct nm_udphdr *udph = (struct nm_udphdr *)(pkt + iphlen);
 
 		/* Set the UDP 'Length' field. */
-		udph->len = htobe16(len-14-iphlen);
+		udph->len = htobe16(len-iphlen);
 
 		check = &udph->check;
 		check_data = (uint8_t *)udph;
@@ -121,48 +122,80 @@ static void gso_fix_segment(uint8_t *buf, size_t len, u_int idx,
 
 	/* Compute and insert TCP/UDP checksum. */
 	*check = 0;
-	if (iphlen == 20)
-		nm_csum_tcpudp_ipv4(iph, check_data, len-14-iphlen, check);
+	if (ipv4)
+		nm_os_csum_tcpudp_ipv4(iph, check_data, len-iphlen, check);
 	else
-		nm_csum_tcpudp_ipv6(ip6h, check_data, len-14-iphlen, check);
+		nm_os_csum_tcpudp_ipv6(ip6h, check_data, len-iphlen, check);
 
 	ND("TCP/UDP csum %x", be16toh(*check));
 }
 
+static int
+vnet_hdr_is_bad(struct nm_vnet_hdr *vh)
+{
+	uint8_t gso_type = vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
+
+	return (
+		(gso_type != VIRTIO_NET_HDR_GSO_NONE &&
+		 gso_type != VIRTIO_NET_HDR_GSO_TCPV4 &&
+		 gso_type != VIRTIO_NET_HDR_GSO_UDP &&
+		 gso_type != VIRTIO_NET_HDR_GSO_TCPV6)
+		||
+		 (vh->flags & ~(VIRTIO_NET_HDR_F_NEEDS_CSUM
+			       | VIRTIO_NET_HDR_F_DATA_VALID))
+	       );
+}
 
 /* The VALE mismatch datapath implementation. */
-void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
-			   struct netmap_vp_adapter *dst_na,
-			   struct nm_bdg_fwd *ft_p, struct netmap_ring *ring,
-			   u_int *j, u_int lim, u_int *howmany)
+void
+bdg_mismatch_datapath(struct netmap_vp_adapter *na,
+		      struct netmap_vp_adapter *dst_na,
+		      const struct nm_bdg_fwd *ft_p,
+		      struct netmap_ring *dst_ring,
+		      u_int *j, u_int lim, u_int *howmany)
 {
-	struct netmap_slot *slot = NULL;
+	struct netmap_slot *dst_slot = NULL;
 	struct nm_vnet_hdr *vh = NULL;
-	/* Number of source slots to process. */
-	u_int frags = ft_p->ft_frags;
-	struct nm_bdg_fwd *ft_end = ft_p + frags;
+	const struct nm_bdg_fwd *ft_end = ft_p + ft_p->ft_frags;
 
 	/* Source and destination pointers. */
 	uint8_t *dst, *src;
 	size_t src_len, dst_len;
 
+	/* Indices and counters for the destination ring. */
 	u_int j_start = *j;
+	u_int j_cur = j_start;
 	u_int dst_slots = 0;
 
-	/* If the source port uses the offloadings, while destination doesn't,
-	 * we grab the source virtio-net header and do the offloadings here.
-	 */
-	if (na->virt_hdr_len && !dst_na->virt_hdr_len) {
-		vh = (struct nm_vnet_hdr *)ft_p->ft_buf;
+	if (unlikely(ft_p == ft_end)) {
+		RD(3, "No source slots to process");
+		return;
 	}
 
 	/* Init source and dest pointers. */
 	src = ft_p->ft_buf;
 	src_len = ft_p->ft_len;
-	slot = &ring->slot[*j];
-	dst = NMB(&dst_na->up, slot);
+	dst_slot = &dst_ring->slot[j_cur];
+	dst = NMB(&dst_na->up, dst_slot);
 	dst_len = src_len;
 
+	/* If the source port uses the offloadings, while destination doesn't,
+	 * we grab the source virtio-net header and do the offloadings here.
+	 */
+	if (na->up.virt_hdr_len && !dst_na->up.virt_hdr_len) {
+		vh = (struct nm_vnet_hdr *)src;
+		/* Initial sanity check on the source virtio-net header. If
+		 * something seems wrong, just drop the packet. */
+		if (src_len < na->up.virt_hdr_len) {
+			RD(3, "Short src vnet header, dropping");
+			return;
+		}
+		if (vnet_hdr_is_bad(vh)) {
+			RD(3, "Bad src vnet header, dropping");
+			return;
+		}
+	}
+
 	/* We are processing the first input slot and there is a mismatch
 	 * between source and destination virt_hdr_len (SHL and DHL).
 	 * When the a client is using virtio-net headers, the header length
@@ -185,14 +218,14 @@ void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
 	 *  12 |   0 | doesn't exist
 	 *  12 |  10 | copied from the first 10 bytes of source header
 	 */
-	bzero(dst, dst_na->virt_hdr_len);
-	if (na->virt_hdr_len && dst_na->virt_hdr_len)
+	bzero(dst, dst_na->up.virt_hdr_len);
+	if (na->up.virt_hdr_len && dst_na->up.virt_hdr_len)
 		memcpy(dst, src, sizeof(struct nm_vnet_hdr));
 	/* Skip the virtio-net headers. */
-	src += na->virt_hdr_len;
-	src_len -= na->virt_hdr_len;
-	dst += dst_na->virt_hdr_len;
-	dst_len = dst_na->virt_hdr_len + src_len;
+	src += na->up.virt_hdr_len;
+	src_len -= na->up.virt_hdr_len;
+	dst += dst_na->up.virt_hdr_len;
+	dst_len = dst_na->up.virt_hdr_len + src_len;
 
 	/* Here it could be dst_len == 0 (which implies src_len == 0),
 	 * so we avoid passing a zero length fragment.
@@ -214,16 +247,27 @@ void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
 		u_int gso_idx = 0;
 		/* Payload data bytes segmented so far (e.g. TCP data bytes). */
 		u_int segmented_bytes = 0;
+		/* Is this an IPv4 or IPv6 GSO packet? */
+		u_int ipv4 = 0;
 		/* Length of the IP header (20 if IPv4, 40 if IPv6). */
 		u_int iphlen = 0;
+		/* Length of the Ethernet header (18 if 802.1q, otherwise 14). */
+		u_int ethhlen = 14;
 		/* Is this a TCP or an UDP GSO packet? */
 		u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
 				== VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;
 
 		/* Segment the GSO packet contained into the input slots (frags). */
-		while (ft_p != ft_end) {
+		for (;;) {
 			size_t copy;
 
+			if (dst_slots >= *howmany) {
+				/* We still have work to do, but we've run out of
+				 * dst slots, so we have to drop the packet. */
+				RD(3, "Not enough slots, dropping GSO packet");
+				return;
+			}
+
 			/* Grab the GSO header if we don't have it. */
 			if (!gso_hdr) {
 				uint16_t ethertype;
@@ -231,28 +275,75 @@ void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
 				gso_hdr = src;
 
 				/* Look at the 'Ethertype' field to see if this packet
-				 * is IPv4 or IPv6.
-				 */
-				ethertype = be16toh(*((uint16_t *)(gso_hdr  + 12)));
-				if (ethertype == 0x0800)
-					iphlen = 20;
-				else /* if (ethertype == 0x86DD) */
-					iphlen = 40;
+				 * is IPv4 or IPv6, taking into account VLAN
+				 * encapsulation. */
+				for (;;) {
+					if (src_len < ethhlen) {
+						RD(3, "Short GSO fragment [eth], dropping");
+						return;
+					}
+					ethertype = be16toh(*((uint16_t *)
+							    (gso_hdr + ethhlen - 2)));
+					if (ethertype != 0x8100) /* not 802.1q */
+						break;
+					ethhlen += 4;
+				}
+				switch (ethertype) {
+					case 0x0800:  /* IPv4 */
+					{
+						struct nm_iphdr *iph = (struct nm_iphdr *)
+									(gso_hdr + ethhlen);
+
+						if (src_len < ethhlen + 20) {
+							RD(3, "Short GSO fragment "
+							      "[IPv4], dropping");
+							return;
+						}
+						ipv4 = 1;
+						iphlen = 4 * (iph->version_ihl & 0x0F);
+						break;
+					}
+					case 0x86DD:  /* IPv6 */
+						ipv4 = 0;
+						iphlen = 40;
+						break;
+					default:
+						RD(3, "Unsupported ethertype, "
+						      "dropping GSO packet");
+						return;
+				}
 				ND(3, "type=%04x", ethertype);
 
+				if (src_len < ethhlen + iphlen) {
+					RD(3, "Short GSO fragment [IP], dropping");
+					return;
+				}
+
 				/* Compute gso_hdr_len. For TCP we need to read the
 				 * content of the 'Data Offset' field.
 				 */
 				if (tcp) {
-					struct nm_tcphdr *tcph =
-						(struct nm_tcphdr *)&gso_hdr[14+iphlen];
+					struct nm_tcphdr *tcph = (struct nm_tcphdr *)
+								(gso_hdr + ethhlen + iphlen);
 
-					gso_hdr_len = 14 + iphlen + 4*(tcph->doff >> 4);
-				} else
-					gso_hdr_len = 14 + iphlen + 8; /* UDP */
+					if (src_len < ethhlen + iphlen + 20) {
+						RD(3, "Short GSO fragment "
+								"[TCP], dropping");
+						return;
+					}
+					gso_hdr_len = ethhlen + iphlen +
+						      4 * (tcph->doff >> 4);
+				} else {
+					gso_hdr_len = ethhlen + iphlen + 8; /* UDP */
+				}
+
+				if (src_len < gso_hdr_len) {
+					RD(3, "Short GSO fragment [TCP/UDP], dropping");
+					return;
+				}
 
 				ND(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
-								dst_na->mfs);
+								   dst_na->mfs);
 
 				/* Advance source pointers. */
 				src += gso_hdr_len;
@@ -263,7 +354,6 @@ void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
 						break;
 					src = ft_p->ft_buf;
 					src_len = ft_p->ft_len;
-					continue;
 				}
 			}
 
@@ -289,25 +379,24 @@ void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
 				/* After raw segmentation, we must fix some header
 				 * fields and compute checksums, in a protocol dependent
 				 * way. */
-				gso_fix_segment(dst, gso_bytes, gso_idx,
-						segmented_bytes,
-						src_len == 0 && ft_p + 1 == ft_end,
-						tcp, iphlen);
+				gso_fix_segment(dst + ethhlen, gso_bytes - ethhlen,
+						ipv4, iphlen, tcp,
+						gso_idx, segmented_bytes,
+						src_len == 0 && ft_p + 1 == ft_end);
 
 				ND("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
-				slot->len = gso_bytes;
-				slot->flags = 0;
-				segmented_bytes += gso_bytes - gso_hdr_len;
-
+				dst_slot->len = gso_bytes;
+				dst_slot->flags = 0;
 				dst_slots++;
-
-				/* Next destination slot. */
-				*j = nm_next(*j, lim);
-				slot = &ring->slot[*j];
-				dst = NMB(&dst_na->up, slot);
+				segmented_bytes += gso_bytes - gso_hdr_len;
 
 				gso_bytes = 0;
 				gso_idx++;
+
+				/* Next destination slot. */
+				j_cur = nm_next(j_cur, lim);
+				dst_slot = &dst_ring->slot[j_cur];
+				dst = NMB(&dst_na->up, dst_slot);
 			}
 
 			/* Next input slot. */
@@ -342,10 +431,10 @@ void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
 			/* Init/update the packet checksum if needed. */
 			if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
 				if (!dst_slots)
-					csum = nm_csum_raw(src + vh->csum_start,
+					csum = nm_os_csum_raw(src + vh->csum_start,
 								src_len - vh->csum_start, 0);
 				else
-					csum = nm_csum_raw(src, src_len, csum);
+					csum = nm_os_csum_raw(src, src_len, csum);
 			}
 
 			/* Round to a multiple of 64 */
@@ -359,44 +448,43 @@ void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
 			} else {
 				memcpy(dst, src, (int)src_len);
 			}
-			slot->len = dst_len;
-
+			dst_slot->len = dst_len;
 			dst_slots++;
 
 			/* Next destination slot. */
-			*j = nm_next(*j, lim);
-			slot = &ring->slot[*j];
-			dst = NMB(&dst_na->up, slot);
+			j_cur = nm_next(j_cur, lim);
+			dst_slot = &dst_ring->slot[j_cur];
+			dst = NMB(&dst_na->up, dst_slot);
 
 			/* Next source slot. */
 			ft_p++;
 			src = ft_p->ft_buf;
 			dst_len = src_len = ft_p->ft_len;
-
 		}
 
 		/* Finalize (fold) the checksum if needed. */
 		if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
-			*check = nm_csum_fold(csum);
+			*check = nm_os_csum_fold(csum);
 		}
 		ND(3, "using %u dst_slots", dst_slots);
 
-		/* A second pass on the desitations slots to set the slot flags,
+		/* A second pass on the destination slots to set the slot flags,
 		 * using the right number of destination slots.
 		 */
-		while (j_start != *j) {
-			slot = &ring->slot[j_start];
-			slot->flags = (dst_slots << 8)| NS_MOREFRAG;
+		while (j_start != j_cur) {
+			dst_slot = &dst_ring->slot[j_start];
+			dst_slot->flags = (dst_slots << 8)| NS_MOREFRAG;
 			j_start = nm_next(j_start, lim);
 		}
 		/* Clear NS_MOREFRAG flag on last entry. */
-		slot->flags = (dst_slots << 8);
+		dst_slot->flags = (dst_slots << 8);
 	}
 
-	/* Update howmany. */
+	/* Update howmany and j. This is to commit the use of
+	 * those slots in the destination ring. */
 	if (unlikely(dst_slots > *howmany)) {
-		dst_slots = *howmany;
-		D("Slot allocation error: Should never happen");
+		D("Slot allocation error: This is a bug");
 	}
+	*j = j_cur;
 	*howmany -= dst_slots;
 }
diff --git a/sys/dev/netmap/netmap_pipe.c b/sys/dev/netmap/netmap_pipe.c
index 67e840248c88..f0f1b524300a 100644
--- a/sys/dev/netmap/netmap_pipe.c
+++ b/sys/dev/netmap/netmap_pipe.c
@@ -1,5 +1,6 @@
 /*
- * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
+ * Copyright (C) 2014-2016 Giuseppe Lettieri
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -54,6 +55,9 @@
 #warning OSX support is only partial
 #include "osx_glue.h"
 
+#elif defined(_WIN32)
+#include "win_glue.h"
+
 #else
 
 #error	Unsupported platform
@@ -72,9 +76,11 @@
 
 #define NM_PIPE_MAXSLOTS	4096
 
-int netmap_default_pipes = 0; /* ignored, kept for compatibility */
+static int netmap_default_pipes = 0; /* ignored, kept for compatibility */
+SYSBEGIN(vars_pipes);
 SYSCTL_DECL(_dev_netmap);
 SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW, &netmap_default_pipes, 0 , "");
+SYSEND;
 
 /* allocate the pipe array in the parent adapter */
 static int
@@ -91,7 +97,11 @@ nm_pipe_alloc(struct netmap_adapter *na, u_int npipes)
 		return EINVAL;
 
         len = sizeof(struct netmap_pipe_adapter *) * npipes;
+#ifndef _WIN32
 	npa = realloc(na->na_pipes, len, M_DEVBUF, M_NOWAIT | M_ZERO);
+#else
+	npa = realloc(na->na_pipes, len, sizeof(struct netmap_pipe_adapter *)*na->na_max_pipes);
+#endif
 	if (npa == NULL)
 		return ENOMEM;
 
@@ -199,7 +209,7 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
 	}
 
         while (limit-- > 0) {
-                struct netmap_slot *rs = &rxkring->save_ring->slot[j];
+                struct netmap_slot *rs = &rxkring->ring->slot[j];
                 struct netmap_slot *ts = &txkring->ring->slot[k];
                 struct netmap_slot tmp;
 
@@ -295,7 +305,7 @@ netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags)
  *        usr1 --> e1 --> e2
  *
  *    and we are e2. e1 is certainly registered and our
- *    krings already exist, but they may be hidden.
+ *    krings already exist. Nothing to do.
  */
 static int
 netmap_pipe_krings_create(struct netmap_adapter *na)
@@ -310,65 +320,28 @@ netmap_pipe_krings_create(struct netmap_adapter *na)
 		int i;
 
 		/* case 1) above */
-		ND("%p: case 1, create everything", na);
+		D("%p: case 1, create both ends", na);
 		error = netmap_krings_create(na, 0);
 		if (error)
 			goto err;
 
-		/* we also create all the rings, since we need to
-                 * update the save_ring pointers.
-                 * netmap_mem_rings_create (called by our caller)
-                 * will not create the rings again
-                 */
-
-		error = netmap_mem_rings_create(na);
-		if (error)
-			goto del_krings1;
-
-		/* update our hidden ring pointers */
-		for_rx_tx(t) {
-			for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
-				NMR(na, t)[i].save_ring = NMR(na, t)[i].ring;
-		}
-
-		/* now, create krings and rings of the other end */
+		/* create the krings of the other end */
 		error = netmap_krings_create(ona, 0);
 		if (error)
-			goto del_rings1;
-
-		error = netmap_mem_rings_create(ona);
-		if (error)
-			goto del_krings2;
-
-		for_rx_tx(t) {
-			for (i = 0; i < nma_get_nrings(ona, t) + 1; i++)
-				NMR(ona, t)[i].save_ring = NMR(ona, t)[i].ring;
-		}
+			goto del_krings1;
 
 		/* cross link the krings */
 		for_rx_tx(t) {
-			enum txrx r= nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
+			enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
 			for (i = 0; i < nma_get_nrings(na, t); i++) {
 				NMR(na, t)[i].pipe = NMR(&pna->peer->up, r) + i;
 				NMR(&pna->peer->up, r)[i].pipe = NMR(na, t) + i;
 			}
 		}
-	} else {
-		int i;
-		/* case 2) above */
-		/* recover the hidden rings */
-		ND("%p: case 2, hidden rings", na);
-		for_rx_tx(t) {
-			for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
-				NMR(na, t)[i].ring = NMR(na, t)[i].save_ring;
-		}
+
 	}
 	return 0;
 
-del_krings2:
-	netmap_krings_delete(ona);
-del_rings1:
-	netmap_mem_rings_delete(na);
 del_krings1:
 	netmap_krings_delete(na);
 err:
@@ -383,7 +356,8 @@ err:
  *
  *        usr1 --> e1 --> e2
  *
- *      and we are e1. Nothing special to do.
+ *      and we are e1. Create the needed rings of the
+ *      other end.
  *
  * 1.b) state is
  *
@@ -412,14 +386,65 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
 {
 	struct netmap_pipe_adapter *pna =
 		(struct netmap_pipe_adapter *)na;
+	struct netmap_adapter *ona = &pna->peer->up;
+	int i, error = 0;
 	enum txrx t;
 
 	ND("%p: onoff %d", na, onoff);
 	if (onoff) {
-		na->na_flags |= NAF_NETMAP_ON;
+		for_rx_tx(t) {
+			for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+				struct netmap_kring *kring = &NMR(na, t)[i];
+
+				if (nm_kring_pending_on(kring)) {
+					/* mark the partner ring as needed */
+					kring->pipe->nr_kflags |= NKR_NEEDRING;
+				}
+			}
+		}
+		
+		/* create all missing needed rings on the other end */
+		error = netmap_mem_rings_create(ona);
+		if (error)
+			return error;
+
+		/* In case of no error we put our rings in netmap mode */
+		for_rx_tx(t) {
+			for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+				struct netmap_kring *kring = &NMR(na, t)[i];
+
+				if (nm_kring_pending_on(kring)) {
+					kring->nr_mode = NKR_NETMAP_ON;
+				}
+			}
+		}
+		if (na->active_fds == 0)
+			na->na_flags |= NAF_NETMAP_ON;
 	} else {
-		na->na_flags &= ~NAF_NETMAP_ON;
+		if (na->active_fds == 0)
+			na->na_flags &= ~NAF_NETMAP_ON;
+		for_rx_tx(t) {
+			for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+				struct netmap_kring *kring = &NMR(na, t)[i];
+
+				if (nm_kring_pending_off(kring)) {
+					kring->nr_mode = NKR_NETMAP_OFF;
+					/* mark the peer ring as no longer needed by us
+					 * (it may still be kept if sombody else is using it)
+					 */
+					kring->pipe->nr_kflags &= ~NKR_NEEDRING;
+				}
+			}
+		}
+		/* delete all the peer rings that are no longer needed */
+		netmap_mem_rings_delete(ona);
+	}
+
+	if (na->active_fds) {
+		D("active_fds %d", na->active_fds);
+		return 0;
 	}
+
 	if (pna->peer_ref) {
 		ND("%p: case 1.a or 2.a, nothing to do", na);
 		return 0;
@@ -429,18 +454,11 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
 		pna->peer->peer_ref = 0;
 		netmap_adapter_put(na);
 	} else {
-		int i;
 		ND("%p: case 2.b, grab peer", na);
 		netmap_adapter_get(na);
 		pna->peer->peer_ref = 1;
-		/* hide our rings from netmap_mem_rings_delete */
-		for_rx_tx(t) {
-			for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
-				NMR(na, t)[i].ring = NULL;
-			}
-		}
 	}
-	return 0;
+	return error;
 }
 
 /* netmap_pipe_krings_delete.
@@ -470,8 +488,6 @@ netmap_pipe_krings_delete(struct netmap_adapter *na)
 	struct netmap_pipe_adapter *pna =
 		(struct netmap_pipe_adapter *)na;
 	struct netmap_adapter *ona; /* na of the other end */
-	int i;
-	enum txrx t;
 
 	if (!pna->peer_ref) {
 		ND("%p: case 2, kept alive by peer",  na);
@@ -480,18 +496,12 @@ netmap_pipe_krings_delete(struct netmap_adapter *na)
 	/* case 1) above */
 	ND("%p: case 1, deleting everyhing", na);
 	netmap_krings_delete(na); /* also zeroes tx_rings etc. */
-	/* restore the ring to be deleted on the peer */
 	ona = &pna->peer->up;
 	if (ona->tx_rings == NULL) {
 		/* already deleted, we must be on an
                  * cleanup-after-error path */
 		return;
 	}
-	for_rx_tx(t) {
-		for (i = 0; i < nma_get_nrings(ona, t) + 1; i++)
-			NMR(ona, t)[i].ring = NMR(ona, t)[i].save_ring;
-	}
-	netmap_mem_rings_delete(ona);
 	netmap_krings_delete(ona);
 }
 
@@ -519,6 +529,7 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
 	struct nmreq pnmr;
 	struct netmap_adapter *pna; /* parent adapter */
 	struct netmap_pipe_adapter *mna, *sna, *req;
+	struct ifnet *ifp = NULL;
 	u_int pipe_id;
 	int role = nmr->nr_flags & NR_REG_MASK;
 	int error;
@@ -536,7 +547,7 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
 	memcpy(&pnmr.nr_name, nmr->nr_name, IFNAMSIZ);
 	/* pass to parent the requested number of pipes */
 	pnmr.nr_arg1 = nmr->nr_arg1;
-	error = netmap_get_na(&pnmr, &pna, create);
+	error = netmap_get_na(&pnmr, &pna, &ifp, create);
 	if (error) {
 		ND("parent lookup failed: %d", error);
 		return error;
@@ -652,16 +663,15 @@ found:
 	*na = &req->up;
 	netmap_adapter_get(*na);
 
-	/* write the configuration back */
-	nmr->nr_tx_rings = req->up.num_tx_rings;
-	nmr->nr_rx_rings = req->up.num_rx_rings;
-	nmr->nr_tx_slots = req->up.num_tx_desc;
-	nmr->nr_rx_slots = req->up.num_rx_desc;
-
 	/* keep the reference to the parent.
          * It will be released by the req destructor
          */
 
+	/* drop the ifp reference, if any */
+	if (ifp) {
+		if_rele(ifp);
+	}
+
 	return 0;
 
 free_sna:
@@ -671,7 +681,7 @@ unregister_mna:
 free_mna:
 	free(mna, M_DEVBUF);
 put_out:
-	netmap_adapter_put(pna);
+	netmap_unget_na(pna, ifp);
 	return error;
 }
 
diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c
index ddd7334a8378..2d2c807681d2 100644
--- a/sys/dev/netmap/netmap_vale.c
+++ b/sys/dev/netmap/netmap_vale.c
@@ -1,5 +1,6 @@
 /*
- * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
+ * Copyright (C) 2013-2016 Universita` di Pisa
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -101,6 +102,9 @@ __FBSDID("$FreeBSD$");
 #warning OSX support is only partial
 #include "osx_glue.h"
 
+#elif defined(_WIN32)
+#include "win_glue.h"
+
 #else
 
 #error	Unsupported platform
@@ -119,7 +123,7 @@ __FBSDID("$FreeBSD$");
 
 /*
  * system parameters (most of them in netmap_kern.h)
- * NM_NAME	prefix for switch port names, default "vale"
+ * NM_BDG_NAME	prefix for switch port names, default "vale"
  * NM_BDG_MAXPORTS	number of ports
  * NM_BRIDGES	max number of switches in the system.
  *	XXX should become a sysctl or tunable
@@ -144,7 +148,6 @@ __FBSDID("$FreeBSD$");
 #define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
 /* NM_FT_NULL terminates a list of slots in the ft */
 #define NM_FT_NULL		NM_BDG_BATCH_MAX
-#define	NM_BRIDGES		8	/* number of bridges */
 
 
 /*
@@ -152,14 +155,15 @@ __FBSDID("$FreeBSD$");
  * used in the bridge. The actual value may be larger as the
  * last packet in the block may overflow the size.
  */
-int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
+static int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
+SYSBEGIN(vars_vale);
 SYSCTL_DECL(_dev_netmap);
 SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
-
+SYSEND;
 
 static int netmap_vp_create(struct nmreq *, struct ifnet *, struct netmap_vp_adapter **);
 static int netmap_vp_reg(struct netmap_adapter *na, int onoff);
-static int netmap_bwrap_register(struct netmap_adapter *, int onoff);
+static int netmap_bwrap_reg(struct netmap_adapter *, int onoff);
 
 /*
  * For each output interface, nm_bdg_q is used to construct a list.
@@ -213,7 +217,7 @@ struct nm_bridge {
 	 * forward this packet.  ring_nr is the source ring index, and the
 	 * function may overwrite this value to forward this packet to a
 	 * different ring index.
-	 * This function must be set by netmap_bdgctl().
+	 * This function must be set by netmap_bdg_ctl().
 	 */
 	struct netmap_bdg_ops bdg_ops;
 
@@ -244,7 +248,7 @@ netmap_bdg_name(struct netmap_vp_adapter *vp)
  * Right now we have a static array and deletions are protected
  * by an exclusive lock.
  */
-struct nm_bridge *nm_bridges;
+static struct nm_bridge *nm_bridges;
 #endif /* !CONFIG_NET_NS */
 
 
@@ -278,6 +282,45 @@ pkt_copy(void *_src, void *_dst, int l)
 }
 
 
+static int
+nm_is_id_char(const char c)
+{
+	return (c >= 'a' && c <= 'z') ||
+	       (c >= 'A' && c <= 'Z') ||
+	       (c >= '0' && c <= '9') ||
+	       (c == '_');
+}
+
+/* Validate the name of a VALE bridge port and return the
+ * position of the ":" character. */
+static int
+nm_vale_name_validate(const char *name)
+{
+	int colon_pos = -1;
+	int i;
+
+	if (!name || strlen(name) < strlen(NM_BDG_NAME)) {
+		return -1;
+	}
+
+	for (i = 0; name[i]; i++) {
+		if (name[i] == ':') {
+			if (colon_pos != -1) {
+				return -1;
+			}
+			colon_pos = i;
+		} else if (!nm_is_id_char(name[i])) {
+			return -1;
+		}
+	}
+
+	if (i >= IFNAMSIZ) {
+		return -1;
+	}
+
+	return colon_pos;
+}
+
 /*
  * locate a bridge among the existing ones.
  * MUST BE CALLED WITH NMG_LOCK()
@@ -288,7 +331,7 @@ pkt_copy(void *_src, void *_dst, int l)
 static struct nm_bridge *
 nm_find_bridge(const char *name, int create)
 {
-	int i, l, namelen;
+	int i, namelen;
 	struct nm_bridge *b = NULL, *bridges;
 	u_int num_bridges;
 
@@ -296,21 +339,11 @@ nm_find_bridge(const char *name, int create)
 
 	netmap_bns_getbridges(&bridges, &num_bridges);
 
-	namelen = strlen(NM_NAME);	/* base length */
-	l = name ? strlen(name) : 0;		/* actual length */
-	if (l < namelen) {
+	namelen = nm_vale_name_validate(name);
+	if (namelen < 0) {
 		D("invalid bridge name %s", name ? name : NULL);
 		return NULL;
 	}
-	for (i = namelen + 1; i < l; i++) {
-		if (name[i] == ':') {
-			namelen = i;
-			break;
-		}
-	}
-	if (namelen >= IFNAMSIZ)
-		namelen = IFNAMSIZ;
-	ND("--- prefix is '%.*s' ---", namelen, name);
 
 	/* lookup the name, remember empty slot if there is one */
 	for (i = 0; i < num_bridges; i++) {
@@ -479,6 +512,7 @@ netmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
 	struct nm_bridge *b = vpna->na_bdg;
 
+	(void)nmr;	// XXX merge ?
 	if (attach)
 		return 0; /* nothing to do */
 	if (b) {
@@ -518,7 +552,7 @@ nm_vi_destroy(const char *name)
 		return ENXIO;
 	NMG_LOCK();
 	/* make sure this is actually a VALE port */
-	if (!NETMAP_CAPABLE(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
+	if (!NM_NA_VALID(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
 		error = EINVAL;
 		goto err;
 	}
@@ -535,7 +569,7 @@ nm_vi_destroy(const char *name)
 	 */
 	if_rele(ifp);
 	netmap_detach(ifp);
-	nm_vi_detach(ifp);
+	nm_os_vi_detach(ifp);
 	return 0;
 
 err:
@@ -556,14 +590,14 @@ nm_vi_create(struct nmreq *nmr)
 	int error;
 
 	/* don't include VALE prefix */
-	if (!strncmp(nmr->nr_name, NM_NAME, strlen(NM_NAME)))
+	if (!strncmp(nmr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME)))
 		return EINVAL;
 	ifp = ifunit_ref(nmr->nr_name);
 	if (ifp) { /* already exist, cannot create new one */
 		if_rele(ifp);
 		return EEXIST;
 	}
-	error = nm_vi_persist(nmr->nr_name, &ifp);
+	error = nm_os_vi_persist(nmr->nr_name, &ifp);
 	if (error)
 		return error;
 
@@ -572,12 +606,13 @@ nm_vi_create(struct nmreq *nmr)
 	error = netmap_vp_create(nmr, ifp, &vpna);
 	if (error) {
 		D("error %d", error);
-		nm_vi_detach(ifp);
+		nm_os_vi_detach(ifp);
 		return error;
 	}
 	/* persist-specific routines */
 	vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
 	netmap_adapter_get(&vpna->up);
+	NM_ATTACH_NA(ifp, &vpna->up);
 	NMG_UNLOCK();
 	D("created %s", ifp->if_xname);
 	return 0;
@@ -608,7 +643,7 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
 
 	/* first try to see if this is a bridge port. */
 	NMG_LOCK_ASSERT();
-	if (strncmp(nr_name, NM_NAME, sizeof(NM_NAME) - 1)) {
+	if (strncmp(nr_name, NM_BDG_NAME, sizeof(NM_BDG_NAME) - 1)) {
 		return 0;  /* no error, but no VALE prefix */
 	}
 
@@ -693,7 +728,6 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
 			goto out;
 		vpna = hw->na_vp;
 		hostna = hw->na_hostvp;
-		if_rele(ifp);
 		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
 			hostna = NULL;
 	}
@@ -768,6 +802,11 @@ unlock_exit:
 	return error;
 }
 
+static inline int
+nm_is_bwrap(struct netmap_adapter *na)
+{
+	return na->nm_register == netmap_bwrap_reg;
+}
 
 /* process NETMAP_BDG_DETACH */
 static int
@@ -785,8 +824,13 @@ nm_bdg_ctl_detach(struct nmreq *nmr)
 	if (na == NULL) { /* VALE prefix missing */
 		error = EINVAL;
 		goto unlock_exit;
+	} else if (nm_is_bwrap(na) &&
+		   ((struct netmap_bwrap_adapter *)na)->na_polling_state) {
+		/* Don't detach a NIC with polling */
+		error = EBUSY;
+		netmap_adapter_put(na);
+		goto unlock_exit;
 	}
-
 	if (na->nm_bdg_ctl) {
 		/* remove the port from bridge. The bwrap
 		 * also needs to put the hwna in normal mode
@@ -801,6 +845,267 @@ unlock_exit:
 
 }
 
+struct nm_bdg_polling_state;
+struct
+nm_bdg_kthread {
+	struct nm_kthread *nmk;
+	u_int qfirst;
+	u_int qlast;
+	struct nm_bdg_polling_state *bps;
+};
+
+struct nm_bdg_polling_state {
+	bool configured;
+	bool stopped;
+	struct netmap_bwrap_adapter *bna;
+	u_int reg;
+	u_int qfirst;
+	u_int qlast;
+	u_int cpu_from;
+	u_int ncpus;
+	struct nm_bdg_kthread *kthreads;
+};
+
+static void
+netmap_bwrap_polling(void *data)
+{
+	struct nm_bdg_kthread *nbk = data;
+	struct netmap_bwrap_adapter *bna;
+	u_int qfirst, qlast, i;
+	struct netmap_kring *kring0, *kring;
+
+	if (!nbk)
+		return;
+	qfirst = nbk->qfirst;
+	qlast = nbk->qlast;
+	bna = nbk->bps->bna;
+	kring0 = NMR(bna->hwna, NR_RX);
+
+	for (i = qfirst; i < qlast; i++) {
+		kring = kring0 + i;
+		kring->nm_notify(kring, 0);
+	}
+}
+
+static int
+nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
+{
+	struct nm_kthread_cfg kcfg;
+	int i, j;
+
+	bps->kthreads = malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus,
+				M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (bps->kthreads == NULL)
+		return ENOMEM;
+
+	bzero(&kcfg, sizeof(kcfg));
+	kcfg.worker_fn = netmap_bwrap_polling;
+	for (i = 0; i < bps->ncpus; i++) {
+		struct nm_bdg_kthread *t = bps->kthreads + i;
+		int all = (bps->ncpus == 1 && bps->reg == NR_REG_ALL_NIC);
+		int affinity = bps->cpu_from + i;
+
+		t->bps = bps;
+		t->qfirst = all ? bps->qfirst /* must be 0 */: affinity; 
+		t->qlast = all ? bps->qlast : t->qfirst + 1;
+		D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
+			t->qlast);
+
+		kcfg.type = i;
+		kcfg.worker_private = t;
+		t->nmk = nm_os_kthread_create(&kcfg);
+		if (t->nmk == NULL) {
+			goto cleanup;
+		}
+		nm_os_kthread_set_affinity(t->nmk, affinity);
+	}
+	return 0;
+
+cleanup:
+	for (j = 0; j < i; j++) {
+		struct nm_bdg_kthread *t = bps->kthreads + i;
+		nm_os_kthread_delete(t->nmk);
+	}
+	free(bps->kthreads, M_DEVBUF);
+	return EFAULT;
+}
+
+/* a version of ptnetmap_start_kthreads() */
+static int
+nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
+{
+	int error, i, j;
+
+	if (!bps) {
+		D("polling is not configured");
+		return EFAULT;
+	}
+	bps->stopped = false;
+
+	for (i = 0; i < bps->ncpus; i++) {
+		struct nm_bdg_kthread *t = bps->kthreads + i;
+		error = nm_os_kthread_start(t->nmk);
+		if (error) {
+			D("error in nm_kthread_start()");
+			goto cleanup;
+		}
+	}
+	return 0;
+
+cleanup:
+	for (j = 0; j < i; j++) {
+		struct nm_bdg_kthread *t = bps->kthreads + i;
+		nm_os_kthread_stop(t->nmk);
+	}
+	bps->stopped = true;
+	return error;
+}
+
+static void
+nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps)
+{
+	int i;
+
+	if (!bps)
+		return;
+
+	for (i = 0; i < bps->ncpus; i++) {
+		struct nm_bdg_kthread *t = bps->kthreads + i;
+		nm_os_kthread_stop(t->nmk);
+		nm_os_kthread_delete(t->nmk);
+	}
+	bps->stopped = true;
+}
+
+static int
+get_polling_cfg(struct nmreq *nmr, struct netmap_adapter *na,
+			struct nm_bdg_polling_state *bps)
+{
+	int req_cpus, avail_cpus, core_from;
+	u_int reg, i, qfirst, qlast;
+
+	avail_cpus = nm_os_ncpus();
+	req_cpus = nmr->nr_arg1;
+
+	if (req_cpus == 0) {
+		D("req_cpus must be > 0");
+		return EINVAL;
+	} else if (req_cpus >= avail_cpus) {
+		D("for safety, we need at least one core left in the system");
+		return EINVAL;
+	}
+	reg = nmr->nr_flags & NR_REG_MASK;
+	i = nmr->nr_ringid & NETMAP_RING_MASK;
+	/*
+	 * ONE_NIC: dedicate one core to one ring. If multiple cores
+	 *          are specified, consecutive rings are also polled.
+	 *          For example, if ringid=2 and 2 cores are given,
+	 *          ring 2 and 3 are polled by core 2 and 3, respectively.
+	 * ALL_NIC: poll all the rings using a core specified by ringid.
+	 *          the number of cores must be 1.
+	 */
+	if (reg == NR_REG_ONE_NIC) {
+		if (i + req_cpus > nma_get_nrings(na, NR_RX)) {
+			D("only %d rings exist (ring %u-%u is given)",
+				nma_get_nrings(na, NR_RX), i, i+req_cpus);
+			return EINVAL;
+		}
+		qfirst = i;
+		qlast = qfirst + req_cpus;
+		core_from = qfirst;
+	} else if (reg == NR_REG_ALL_NIC) {
+		if (req_cpus != 1) {
+			D("ncpus must be 1 not %d for REG_ALL_NIC", req_cpus);
+			return EINVAL;
+		}
+		qfirst = 0;
+		qlast = nma_get_nrings(na, NR_RX);
+		core_from = i;
+	} else {
+		D("reg must be ALL_NIC or ONE_NIC");
+		return EINVAL;
+	}
+
+	bps->reg = reg;
+	bps->qfirst = qfirst;
+	bps->qlast = qlast;
+	bps->cpu_from = core_from;
+	bps->ncpus = req_cpus;
+	D("%s qfirst %u qlast %u cpu_from %u ncpus %u",
+		reg == NR_REG_ALL_NIC ? "REG_ALL_NIC" : "REG_ONE_NIC",
+		qfirst, qlast, core_from, req_cpus);
+	return 0;
+}
+
+static int
+nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na)
+{
+	struct nm_bdg_polling_state *bps;
+	struct netmap_bwrap_adapter *bna;
+	int error;
+
+	bna = (struct netmap_bwrap_adapter *)na;
+	if (bna->na_polling_state) {
+		D("ERROR adapter already in polling mode");
+		return EFAULT;
+	}
+
+	bps = malloc(sizeof(*bps), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (!bps)
+		return ENOMEM;
+	bps->configured = false;
+	bps->stopped = true;
+
+	if (get_polling_cfg(nmr, na, bps)) {
+		free(bps, M_DEVBUF);
+		return EINVAL;
+	}
+
+	if (nm_bdg_create_kthreads(bps)) {
+		free(bps, M_DEVBUF);
+		return EFAULT;
+	}
+
+	bps->configured = true;
+	bna->na_polling_state = bps;
+	bps->bna = bna;
+
+	/* disable interrupt if possible */
+	if (bna->hwna->nm_intr)
+		bna->hwna->nm_intr(bna->hwna, 0);
+	/* start kthread now */
+	error = nm_bdg_polling_start_kthreads(bps);
+	if (error) {
+		D("ERROR nm_bdg_polling_start_kthread()");
+		free(bps->kthreads, M_DEVBUF);
+		free(bps, M_DEVBUF);
+		bna->na_polling_state = NULL;
+		if (bna->hwna->nm_intr)
+			bna->hwna->nm_intr(bna->hwna, 1);
+	}
+	return error;
+}
+
+static int
+nm_bdg_ctl_polling_stop(struct nmreq *nmr, struct netmap_adapter *na)
+{
+	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na;
+	struct nm_bdg_polling_state *bps;
+
+	if (!bna->na_polling_state) {
+		D("ERROR adapter is not in polling mode");
+		return EFAULT;
+	}
+	bps = bna->na_polling_state;
+	nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state);
+	bps->configured = false;
+	free(bps, M_DEVBUF);
+	bna->na_polling_state = NULL;
+	/* reenable interrupt */
+	if (bna->hwna->nm_intr)
+		bna->hwna->nm_intr(bna->hwna, 1);
+	return 0;
+}
 
 /* Called by either user's context (netmap_ioctl())
  * or external kernel modules (e.g., Openvswitch).
@@ -843,7 +1148,7 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
 	case NETMAP_BDG_LIST:
 		/* this is used to enumerate bridges and ports */
 		if (namelen) { /* look up indexes of bridge and port */
-			if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
+			if (strncmp(name, NM_BDG_NAME, strlen(NM_BDG_NAME))) {
 				error = EINVAL;
 				break;
 			}
@@ -855,7 +1160,9 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
 				break;
 			}
 
-			error = ENOENT;
+			error = 0;
+			nmr->nr_arg1 = b - bridges; /* bridge index */
+			nmr->nr_arg2 = NM_BDG_NOPORT;
 			for (j = 0; j < b->bdg_active_ports; j++) {
 				i = b->bdg_port_index[j];
 				vpna = b->bdg_ports[i];
@@ -867,10 +1174,7 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
 				 * virtual port and a NIC, respectively
 				 */
 				if (!strcmp(vpna->up.name, name)) {
-					/* bridge index */
-					nmr->nr_arg1 = b - bridges;
 					nmr->nr_arg2 = i; /* port index */
-					error = 0;
 					break;
 				}
 			}
@@ -937,10 +1241,34 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
 		error = netmap_get_bdg_na(nmr, &na, 0);
 		if (na && !error) {
 			vpna = (struct netmap_vp_adapter *)na;
-			vpna->virt_hdr_len = nmr->nr_arg1;
-			if (vpna->virt_hdr_len)
+			na->virt_hdr_len = nmr->nr_arg1;
+			if (na->virt_hdr_len) {
 				vpna->mfs = NETMAP_BUF_SIZE(na);
-			D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna);
+			}
+			D("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na);
+			netmap_adapter_put(na);
+		} else if (!na) {
+			error = ENXIO;
+		}
+		NMG_UNLOCK();
+		break;
+
+	case NETMAP_BDG_POLLING_ON:
+	case NETMAP_BDG_POLLING_OFF:
+		NMG_LOCK();
+		error = netmap_get_bdg_na(nmr, &na, 0);
+		if (na && !error) {
+			if (!nm_is_bwrap(na)) {
+				error = EOPNOTSUPP;
+			} else if (cmd == NETMAP_BDG_POLLING_ON) {
+				error = nm_bdg_ctl_polling_start(nmr, na);
+				if (!error)
+					netmap_adapter_get(na);
+			} else {
+				error = nm_bdg_ctl_polling_stop(nmr, na);
+				if (!error)
+					netmap_adapter_put(na);
+			}
 			netmap_adapter_put(na);
 		}
 		NMG_UNLOCK();
@@ -1097,10 +1425,12 @@ nm_bdg_preflush(struct netmap_kring *kring, u_int end)
 			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
 	}
 	if (frags > 1) {
-		D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
-		// ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
-		ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
-		ft[ft_i - frags].ft_frags = frags - 1;
+		/* Here ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG, and we
+		 * have to fix frags count. */
+		frags--;
+		ft[ft_i - 1].ft_flags &= ~NS_MOREFRAG;
+		ft[ft_i - frags].ft_frags = frags;
+		D("Truncate incomplete fragment at %d (%d frags)", ft_i, frags);
 	}
 	if (ft_i)
 		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
@@ -1157,6 +1487,8 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff)
 {
 	struct netmap_vp_adapter *vpna =
 		(struct netmap_vp_adapter*)na;
+	enum txrx t;
+	int i;
 
 	/* persistent ports may be put in netmap mode
 	 * before being attached to a bridge
@@ -1164,12 +1496,30 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff)
 	if (vpna->na_bdg)
 		BDG_WLOCK(vpna->na_bdg);
 	if (onoff) {
-		na->na_flags |= NAF_NETMAP_ON;
+		for_rx_tx(t) {
+			for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+				struct netmap_kring *kring = &NMR(na, t)[i];
+
+				if (nm_kring_pending_on(kring))
+					kring->nr_mode = NKR_NETMAP_ON;
+			}
+		}
+		if (na->active_fds == 0)
+			na->na_flags |= NAF_NETMAP_ON;
 		 /* XXX on FreeBSD, persistent VALE ports should also
 		 * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
 		 */
 	} else {
-		na->na_flags &= ~NAF_NETMAP_ON;
+		if (na->active_fds == 0)
+			na->na_flags &= ~NAF_NETMAP_ON;
+		for_rx_tx(t) {
+			for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+				struct netmap_kring *kring = &NMR(na, t)[i];
+
+				if (nm_kring_pending_off(kring))
+					kring->nr_mode = NKR_NETMAP_OFF;
+			}
+		}
 	}
 	if (vpna->na_bdg)
 		BDG_WUNLOCK(vpna->na_bdg);
@@ -1193,13 +1543,14 @@ netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
 	uint32_t sh, dh;
 	u_int dst, mysrc = na->bdg_port;
 	uint64_t smac, dmac;
+	uint8_t indbuf[12];
 
 	/* safety check, unfortunately we have many cases */
-	if (buf_len >= 14 + na->virt_hdr_len) {
+	if (buf_len >= 14 + na->up.virt_hdr_len) {
 		/* virthdr + mac_hdr in the same slot */
-		buf += na->virt_hdr_len;
-		buf_len -= na->virt_hdr_len;
-	} else if (buf_len == na->virt_hdr_len && ft->ft_flags & NS_MOREFRAG) {
+		buf += na->up.virt_hdr_len;
+		buf_len -= na->up.virt_hdr_len;
+	} else if (buf_len == na->up.virt_hdr_len && ft->ft_flags & NS_MOREFRAG) {
 		/* only header in first fragment */
 		ft++;
 		buf = ft->ft_buf;
@@ -1208,6 +1559,14 @@ netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
 		RD(5, "invalid buf format, length %d", buf_len);
 		return NM_BDG_NOPORT;
 	}
+
+	if (ft->ft_flags & NS_INDIRECT) {
+		if (copyin(buf, indbuf, sizeof(indbuf))) {
+			return NM_BDG_NOPORT;
+		}
+		buf = indbuf;
+	}
+
 	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
 	smac = le64toh(*(uint64_t *)(buf + 4));
 	smac >>= 16;
@@ -1321,7 +1680,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
 	struct nm_bdg_q *dst_ents, *brddst;
 	uint16_t num_dsts = 0, *dsts;
 	struct nm_bridge *b = na->na_bdg;
-	u_int i, j, me = na->bdg_port;
+	u_int i, me = na->bdg_port;
 
 	/*
 	 * The work area (pointed by ft) is followed by an array of
@@ -1341,7 +1700,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
 		ND("slot %d frags %d", i, ft[i].ft_frags);
 		/* Drop the packet if the virtio-net header is not into the first
 		   fragment nor at the very beginning of the second. */
-		if (unlikely(na->virt_hdr_len > ft[i].ft_len))
+		if (unlikely(na->up.virt_hdr_len > ft[i].ft_len))
 			continue;
 		dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na);
 		if (netmap_verbose > 255)
@@ -1382,6 +1741,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
 	 */
 	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
 	if (brddst->bq_head != NM_FT_NULL) {
+		u_int j;
 		for (j = 0; likely(j < b->bdg_active_ports); j++) {
 			uint16_t d_i;
 			i = b->bdg_port_index[j];
@@ -1441,8 +1801,9 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
 		 */
 		needed = d->bq_len + brddst->bq_len;
 
-		if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) {
-			RD(3, "virt_hdr_mismatch, src %d dst %d", na->virt_hdr_len, dst_na->virt_hdr_len);
+		if (unlikely(dst_na->up.virt_hdr_len != na->up.virt_hdr_len)) {
+			RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len,
+			      dst_na->up.virt_hdr_len);
 			/* There is a virtio-net header/offloadings mismatch between
 			 * source and destination. The slower mismatch datapath will
 			 * be used to cope with all the mismatches.
@@ -1803,7 +2164,6 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter
 	nm_bound_var(&nmr->nr_arg3, 0, 0,
 			128*NM_BDG_MAXSLOTS, NULL);
 	na->num_rx_desc = nmr->nr_rx_slots;
-	vpna->virt_hdr_len = 0;
 	vpna->mfs = 1514;
 	vpna->last_smac = ~0llu;
 	/*if (vpna->mfs > netmap_buf_size)  TODO netmap_buf_size is zero??
@@ -1880,19 +2240,17 @@ netmap_bwrap_dtor(struct netmap_adapter *na)
 {
 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
 	struct netmap_adapter *hwna = bna->hwna;
+	struct nm_bridge *b = bna->up.na_bdg,
+		*bh = bna->host.na_bdg;
+
+	if (b) {
+		netmap_bdg_detach_common(b, bna->up.bdg_port,
+			    (bh ? bna->host.bdg_port : -1));
+	}
 
 	ND("na %p", na);
-	/* drop reference to hwna->ifp.
-	 * If we don't do this, netmap_detach_common(na)
-	 * will think it has set NA(na->ifp) to NULL
-	 */
 	na->ifp = NULL;
-	/* for safety, also drop the possible reference
-	 * in the hostna
-	 */
 	bna->host.up.ifp = NULL;
-
-	hwna->nm_mem = bna->save_nmd;
 	hwna->na_private = NULL;
 	hwna->na_vp = hwna->na_hostvp = NULL;
 	hwna->na_flags &= ~NAF_BUSY;
@@ -1916,7 +2274,8 @@ netmap_bwrap_dtor(struct netmap_adapter *na)
  * (part as a receive ring, part as a transmit ring).
  *
  * callback that overwrites the hwna notify callback.
- * Packets come from the outside or from the host stack and are put on an hwna rx ring.
+ * Packets come from the outside or from the host stack and are put on an
+ * hwna rx ring.
  * The bridge wrapper then sends the packets through the bridge.
  */
 static int
@@ -1927,19 +2286,18 @@ netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
 	struct netmap_kring *bkring;
 	struct netmap_vp_adapter *vpna = &bna->up;
 	u_int ring_nr = kring->ring_id;
-	int error = 0;
+	int ret = NM_IRQ_COMPLETED;
+	int error;
 
 	if (netmap_verbose)
 	    D("%s %s 0x%x", na->name, kring->name, flags);
 
-	if (!nm_netmap_on(na))
-		return 0;
-
 	bkring = &vpna->up.tx_rings[ring_nr];
 
 	/* make sure the ring is not disabled */
-	if (nm_kr_tryget(kring))
-		return 0;
+	if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) {
+		return EIO;
+	}
 
 	if (netmap_verbose)
 	    D("%s head %d cur %d tail %d",  na->name,
@@ -1951,9 +2309,10 @@ netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
 	error = kring->nm_sync(kring, 0);
 	if (error)
 		goto put_out;
-	if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) {
-		D("how strange, interrupt with no packets on %s",
-			na->name);
+	if (kring->nr_hwcur == kring->nr_hwtail) {
+		if (netmap_verbose)
+			D("how strange, interrupt with no packets on %s",
+			    na->name);
 		goto put_out;
 	}
 
@@ -1970,28 +2329,32 @@ netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
 	/* another call to actually release the buffers */
 	error = kring->nm_sync(kring, 0);
 
+	/* The second rxsync may have further advanced hwtail. If this happens,
+	 *  return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */
+	if (kring->rcur != kring->nr_hwtail) {
+		ret = NM_IRQ_RESCHED;
+	}
 put_out:
 	nm_kr_put(kring);
-	return error;
+
+	return error ? error : ret;
 }
 
 
 /* nm_register callback for bwrap */
 static int
-netmap_bwrap_register(struct netmap_adapter *na, int onoff)
+netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
 {
 	struct netmap_bwrap_adapter *bna =
 		(struct netmap_bwrap_adapter *)na;
 	struct netmap_adapter *hwna = bna->hwna;
 	struct netmap_vp_adapter *hostna = &bna->host;
-	int error;
+	int error, i;
 	enum txrx t;
 
 	ND("%s %s", na->name, onoff ? "on" : "off");
 
 	if (onoff) {
-		int i;
-
 		/* netmap_do_regif has been called on the bwrap na.
 		 * We need to pass the information about the
 		 * memory allocator down to the hwna before
@@ -2010,16 +2373,32 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff)
 		/* cross-link the netmap rings
 		 * The original number of rings comes from hwna,
 		 * rx rings on one side equals tx rings on the other.
-		 * We need to do this now, after the initialization
-		 * of the kring->ring pointers
 		 */
 		for_rx_tx(t) {
-			enum txrx r= nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
-			for (i = 0; i < nma_get_nrings(na, r) + 1; i++) {
-				NMR(hwna, t)[i].nkr_num_slots = NMR(na, r)[i].nkr_num_slots;
-				NMR(hwna, t)[i].ring = NMR(na, r)[i].ring;
+			enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
+			for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) {
+				NMR(hwna, r)[i].ring = NMR(na, t)[i].ring;
 			}
 		}
+
+		if (na->na_flags & NAF_HOST_RINGS) {
+			struct netmap_adapter *hna = &hostna->up;
+			/* the hostna rings are the host rings of the bwrap.
+			 * The corresponding krings must point back to the
+			 * hostna
+			 */
+			hna->tx_rings = &na->tx_rings[na->num_tx_rings];
+			hna->tx_rings[0].na = hna;
+			hna->rx_rings = &na->rx_rings[na->num_rx_rings];
+			hna->rx_rings[0].na = hna;
+		}
+	}
+
+	/* pass down the pending ring state information */
+	for_rx_tx(t) {
+		for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
+			NMR(hwna, t)[i].nr_pending_mode =
+				NMR(na, t)[i].nr_pending_mode;
 	}
 
 	/* forward the request to the hwna */
@@ -2027,6 +2406,13 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff)
 	if (error)
 		return error;
 
+	/* copy up the current ring state information */
+	for_rx_tx(t) {
+		for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
+			NMR(na, t)[i].nr_mode =
+				NMR(hwna, t)[i].nr_mode;
+	}
+
 	/* impersonate a netmap_vp_adapter */
 	netmap_vp_reg(na, onoff);
 	if (hostna->na_bdg)
@@ -2046,8 +2432,14 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff)
 			/* also intercept the host ring notify */
 			hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify;
 		}
+		if (na->active_fds == 0)
+			na->na_flags |= NAF_NETMAP_ON;
 	} else {
 		u_int i;
+
+		if (na->active_fds == 0)
+			na->na_flags &= ~NAF_NETMAP_ON;
+
 		/* reset all notify callbacks (including host ring) */
 		for (i = 0; i <= hwna->num_rx_rings; i++) {
 			hwna->rx_rings[i].nm_notify = hwna->rx_rings[i].save_notify;
@@ -2089,8 +2481,8 @@ netmap_bwrap_krings_create(struct netmap_adapter *na)
 	struct netmap_bwrap_adapter *bna =
 		(struct netmap_bwrap_adapter *)na;
 	struct netmap_adapter *hwna = bna->hwna;
-	struct netmap_adapter *hostna = &bna->host.up;
-	int error;
+	int i, error = 0;
+	enum txrx t;
 
 	ND("%s", na->name);
 
@@ -2102,26 +2494,23 @@ netmap_bwrap_krings_create(struct netmap_adapter *na)
 	/* also create the hwna krings */
 	error = hwna->nm_krings_create(hwna);
 	if (error) {
-		netmap_vp_krings_delete(na);
-		return error;
+		goto err_del_vp_rings;
 	}
-	/* the connection between the bwrap krings and the hwna krings
-	 * will be perfomed later, in the nm_register callback, since
-	 * now the kring->ring pointers have not been initialized yet
-	 */
 
-	if (na->na_flags & NAF_HOST_RINGS) {
-		/* the hostna rings are the host rings of the bwrap.
-		 * The corresponding krings must point back to the
-		 * hostna
-		 */
-		hostna->tx_rings = &na->tx_rings[na->num_tx_rings];
-		hostna->tx_rings[0].na = hostna;
-		hostna->rx_rings = &na->rx_rings[na->num_rx_rings];
-		hostna->rx_rings[0].na = hostna;
+	/* get each ring slot number from the corresponding hwna ring */
+	for_rx_tx(t) {
+		enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
+		for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) {
+			NMR(na, t)[i].nkr_num_slots = NMR(hwna, r)[i].nkr_num_slots;
+		}
 	}
 
 	return 0;
+
+err_del_vp_rings:
+	netmap_vp_krings_delete(na);
+
+	return error;
 }
 
 
@@ -2149,7 +2538,7 @@ netmap_bwrap_notify(struct netmap_kring *kring, int flags)
 	u_int ring_n = kring->ring_id;
 	u_int lim = kring->nkr_num_slots - 1;
 	struct netmap_kring *hw_kring;
-	int error = 0;
+	int error;
 
 	ND("%s: na %s hwna %s", 
 			(kring ? kring->name : "NULL!"),
@@ -2157,11 +2546,10 @@ netmap_bwrap_notify(struct netmap_kring *kring, int flags)
 			(hwna ? hwna->name : "NULL!"));
 	hw_kring = &hwna->tx_rings[ring_n];
 
-	if (nm_kr_tryget(hw_kring))
-		return 0;
+	if (nm_kr_tryget(hw_kring, 0, NULL)) {
+		return ENXIO;
+	}
 
-	if (!nm_netmap_on(hwna))
-		return 0;
 	/* first step: simulate a user wakeup on the rx ring */
 	netmap_vp_rxsync(kring, flags);
 	ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
@@ -2175,7 +2563,7 @@ netmap_bwrap_notify(struct netmap_kring *kring, int flags)
 	hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail;
 	error = hw_kring->nm_sync(hw_kring, flags);
 	if (error)
-		goto out;
+		goto put_out;
 
 	/* third step: now we are back the rx ring */
 	/* claim ownership on all hw owned bufs */
@@ -2188,9 +2576,10 @@ netmap_bwrap_notify(struct netmap_kring *kring, int flags)
 		kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
 		ring->head, ring->cur, ring->tail,
 		hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
-out:
+put_out:
 	nm_kr_put(hw_kring);
-	return error;
+
+	return error ? error : NM_IRQ_COMPLETED;
 }
 
 
@@ -2217,44 +2606,23 @@ netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
 			/* nothing to do */
 			return 0;
 		}
-		npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
+		npriv = netmap_priv_new();
 		if (npriv == NULL)
 			return ENOMEM;
-		error = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags);
+		npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */
+		error = netmap_do_regif(npriv, na, 0, NR_REG_NIC_SW);
 		if (error) {
-			bzero(npriv, sizeof(*npriv));
-			free(npriv, M_DEVBUF);
+			netmap_priv_delete(npriv);
 			return error;
 		}
 		bna->na_kpriv = npriv;
 		na->na_flags |= NAF_BUSY;
 	} else {
-		int last_instance;
-
 		if (na->active_fds == 0) /* not registered */
 			return EINVAL;
-		last_instance = netmap_dtor_locked(bna->na_kpriv);
-		if (!last_instance) {
-			D("--- error, trying to detach an entry with active mmaps");
-			error = EINVAL;
-		} else {
-			struct nm_bridge *b = bna->up.na_bdg,
-				*bh = bna->host.na_bdg;
-			npriv = bna->na_kpriv;
-			bna->na_kpriv = NULL;
-			D("deleting priv");
-
-			bzero(npriv, sizeof(*npriv));
-			free(npriv, M_DEVBUF);
-			if (b) {
-				/* XXX the bwrap dtor should take care
-				 * of this (2014-06-16)
-				 */
-				netmap_bdg_detach_common(b, bna->up.bdg_port,
-				    (bh ? bna->host.bdg_port : -1));
-			}
-			na->na_flags &= ~NAF_BUSY;
-		}
+		netmap_priv_delete(bna->na_kpriv);
+		bna->na_kpriv = NULL;
+		na->na_flags &= ~NAF_BUSY;
 	}
 	return error;
 
@@ -2282,6 +2650,8 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
 	}
 
 	na = &bna->up.up;
+	/* make bwrap ifp point to the real ifp */
+	na->ifp = hwna->ifp;
 	na->na_private = bna;
 	strncpy(na->name, nr_name, sizeof(na->name));
 	/* fill the ring data for the bwrap adapter with rx/tx meanings
@@ -2294,7 +2664,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
 		nma_set_ndesc(na, t, nma_get_ndesc(hwna, r));
 	}
 	na->nm_dtor = netmap_bwrap_dtor;
-	na->nm_register = netmap_bwrap_register;
+	na->nm_register = netmap_bwrap_reg;
 	// na->nm_txsync = netmap_bwrap_txsync;
 	// na->nm_rxsync = netmap_bwrap_rxsync;
 	na->nm_config = netmap_bwrap_config;
@@ -2303,13 +2673,8 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
 	na->nm_notify = netmap_bwrap_notify;
 	na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
 	na->pdev = hwna->pdev;
-	na->nm_mem = netmap_mem_private_new(na->name,
-			na->num_tx_rings, na->num_tx_desc,
-			na->num_rx_rings, na->num_rx_desc,
-			0, 0, &error);
-	na->na_flags |= NAF_MEM_OWNER;
-	if (na->nm_mem == NULL)
-		goto err_put;
+	na->nm_mem = hwna->nm_mem;
+	na->virt_hdr_len = hwna->virt_hdr_len;
 	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
 
 	bna->hwna = hwna;
@@ -2349,24 +2714,10 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
 	if (error) {
 		goto err_free;
 	}
-	/* make bwrap ifp point to the real ifp
-	 * NOTE: netmap_attach_common() interprets a non-NULL na->ifp
-	 * as a request to make the ifp point to the na. Since we
-	 * do not want to change the na already pointed to by hwna->ifp,
-	 * the following assignment has to be delayed until now
-	 */
-	na->ifp = hwna->ifp;
 	hwna->na_flags |= NAF_BUSY;
-	/* make hwna point to the allocator we are actually using,
-	 * so that monitors will be able to find it
-	 */
-	bna->save_nmd = hwna->nm_mem;
-	hwna->nm_mem = na->nm_mem;
 	return 0;
 
 err_free:
-	netmap_mem_delete(na->nm_mem);
-err_put:
 	hwna->na_vp = hwna->na_hostvp = NULL;
 	netmap_adapter_put(hwna);
 	free(bna, M_DEVBUF);
author	Luigi Rizzo <luigi@FreeBSD.org>	2016-10-16 14:13:32 +0000
committer	Luigi Rizzo <luigi@FreeBSD.org>	2016-10-16 14:13:32 +0000
commit	37e3a6d349581b4dd0aebf24be7b1b159a698dcf (patch)
tree	0e61deea141c9733af511b0485cf1fd0f2dd17ed /sys/dev/netmap
parent	63f6b1a75a8e6e33e4f9d65571c6a221444d3b05 (diff)