24 files changed, 2767 insertions, 1025 deletions
diff --git a/sys/netinet/cc/cc.c b/sys/netinet/cc/cc.c
index c20a20cd983d..bc06616dbf93 100644
--- a/sys/netinet/cc/cc.c
+++ b/sys/netinet/cc/cc.c
@@ -271,7 +271,7 @@ cc_check_default(struct cc_algo *remove_cc)
  * Initialise CC subsystem on system boot.
  */
 static void
-cc_init(void)
+cc_init(void *dummy __unused)
 {
 	CC_LIST_LOCK_INIT();
 	STAILQ_INIT(&cc_list);
diff --git a/sys/netinet/in_fib_algo.c b/sys/netinet/in_fib_algo.c
index 123dacb409e7..95621c300064 100644
--- a/sys/netinet/in_fib_algo.c
+++ b/sys/netinet/in_fib_algo.c
@@ -767,7 +767,7 @@ struct fib_lookup_module flm_radix4 = {
 };
 
 static void
-fib4_algo_init(void)
+fib4_algo_init(void *dummy __unused)
 {
 
 	fib_module_register(&flm_bsearch4);
diff --git a/sys/netinet/in_mcast.c b/sys/netinet/in_mcast.c
index f5b20c49ffd2..ba112afbf002 100644
--- a/sys/netinet/in_mcast.c
+++ b/sys/netinet/in_mcast.c
@@ -159,9 +159,6 @@ static struct ip_moptions *
 static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
 static int	inp_join_group(struct inpcb *, struct sockopt *);
 static int	inp_leave_group(struct inpcb *, struct sockopt *);
-static struct ifnet *
-		inp_lookup_mcast_ifp(const struct inpcb *,
-		    const struct sockaddr_in *, const struct in_addr);
 static int	inp_block_unblock_source(struct inpcb *, struct sockopt *);
 static int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
 static int	inp_set_source_filters(struct inpcb *, struct sockopt *);
@@ -1832,69 +1829,55 @@ inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
 }
 
 /*
- * Look up the ifnet to use for a multicast group membership,
- * given the IPv4 address of an interface, and the IPv4 group address.
- *
- * This routine exists to support legacy multicast applications
- * which do not understand that multicast memberships are scoped to
- * specific physical links in the networking stack, or which need
- * to join link-scope groups before IPv4 addresses are configured.
- *
- * Use this socket's current FIB number for any required FIB lookup.
- * If ina is INADDR_ANY, look up the group address in the unicast FIB,
- * and use its ifp; usually, this points to the default next-hop.
- *
- * If the FIB lookup fails, attempt to use the first non-loopback
- * interface with multicast capability in the system as a
- * last resort. The legacy IPv4 ASM API requires that we do
- * this in order to allow groups to be joined when the routing
- * table has not yet been populated during boot.
- *
- * Returns NULL if no ifp could be found, otherwise return referenced ifp.
+ * Look up the ifnet to join a multicast group membership via legacy
+ * IP_ADD_MEMBERSHIP or via more modern MCAST_JOIN_GROUP.
  *
- * FUTURE: Implement IPv4 source-address selection.
+ * If the interface index was specified explicitly, just use it.  If the
+ * address was specified (legacy), try to find matching interface.  Else
+ * (index == 0 && no address) do a route lookup.  If that fails for a modern
+ * MCAST_JOIN_GROUP return failure, for legacy IP_ADD_MEMBERSHIP find first
+ * multicast capable interface.
  */
 static struct ifnet *
-inp_lookup_mcast_ifp(const struct inpcb *inp,
-    const struct sockaddr_in *gsin, const struct in_addr ina)
+inp_lookup_mcast_ifp(const struct inpcb *inp, const struct in_addr maddr,
+const struct in_addr *ina, const u_int index)
 {
 	struct ifnet *ifp;
 	struct nhop_object *nh;
 
 	NET_EPOCH_ASSERT();
-	KASSERT(inp != NULL, ("%s: inp must not be NULL", __func__));
-	KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__));
-	KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)),
-	    ("%s: not multicast", __func__));
 
-	ifp = NULL;
-	if (!in_nullhost(ina)) {
-		INADDR_TO_IFP(ina, ifp);
+	if (index != 0)
+		return (ifnet_byindex_ref(index));
+
+	if (ina != NULL && !in_nullhost(*ina)) {
+		INADDR_TO_IFP(*ina, ifp);
 		if (ifp != NULL)
 			if_ref(ifp);
-	} else {
-		nh = fib4_lookup(inp->inp_inc.inc_fibnum, gsin->sin_addr, 0, NHR_NONE, 0);
-		if (nh != NULL) {
-			ifp = nh->nh_ifp;
-			if_ref(ifp);
-		} else {
-			struct in_ifaddr *ia;
-			struct ifnet *mifp;
-
-			mifp = NULL;
-			CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
-				mifp = ia->ia_ifp;
-				if (!(mifp->if_flags & IFF_LOOPBACK) &&
-				     (mifp->if_flags & IFF_MULTICAST)) {
-					ifp = mifp;
-					if_ref(ifp);
-					break;
-				}
+		return (ifp);
+	}
+
+	nh = fib4_lookup(inp->inp_inc.inc_fibnum, maddr, 0, NHR_NONE, 0);
+	if (nh != NULL) {
+		ifp = nh->nh_ifp;
+		if_ref(ifp);
+		return (ifp);
+	}
+
+	if (ina != NULL) {
+		struct in_ifaddr *ia;
+
+		CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
+			if (!(ia->ia_ifp->if_flags & IFF_LOOPBACK) &&
+			     (ia->ia_ifp->if_flags & IFF_MULTICAST)) {
+				ifp = ia->ia_ifp;
+				if_ref(ifp);
+				return (ifp);
 			}
 		}
 	}
 
-	return (ifp);
+	return (NULL);
 }
 
 /*
@@ -1926,13 +1909,13 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
 	switch (sopt->sopt_name) {
 	case IP_ADD_MEMBERSHIP: {
 		struct ip_mreqn mreqn;
+		bool mreq;
 
-		if (sopt->sopt_valsize == sizeof(struct ip_mreqn))
-			error = sooptcopyin(sopt, &mreqn,
-			    sizeof(struct ip_mreqn), sizeof(struct ip_mreqn));
-		else
-			error = sooptcopyin(sopt, &mreqn,
-			    sizeof(struct ip_mreq), sizeof(struct ip_mreq));
+		mreq = (sopt->sopt_valsize != sizeof(struct ip_mreqn));
+
+		error = sooptcopyin(sopt, &mreqn,
+		    mreq ? sizeof(struct ip_mreq) : sizeof(struct ip_mreqn),
+		    mreq ? sizeof(struct ip_mreq) : sizeof(struct ip_mreqn));
 		if (error)
 			return (error);
 
@@ -1943,12 +1926,9 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
 			return (EINVAL);
 
 		NET_EPOCH_ENTER(et);
-		if (sopt->sopt_valsize == sizeof(struct ip_mreqn) &&
-		    mreqn.imr_ifindex != 0)
-			ifp = ifnet_byindex_ref(mreqn.imr_ifindex);
-		else
-			ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
-			    mreqn.imr_address);
+		ifp = inp_lookup_mcast_ifp(inp, mreqn.imr_multiaddr,
+		    mreq ? &mreqn.imr_address : NULL,
+		    mreq ? 0 : mreqn.imr_ifindex);
 		NET_EPOCH_EXIT(et);
 		break;
 	}
@@ -1971,8 +1951,8 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
 		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
 
 		NET_EPOCH_ENTER(et);
-		ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
-		    mreqs.imr_interface);
+		ifp = inp_lookup_mcast_ifp(inp, mreqs.imr_multiaddr,
+		    &mreqs.imr_interface, 0);
 		NET_EPOCH_EXIT(et);
 		CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p",
 		    __func__, ntohl(mreqs.imr_interface.s_addr), ifp);
@@ -2013,7 +1993,8 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
 			return (EINVAL);
 
 		NET_EPOCH_ENTER(et);
-		ifp = ifnet_byindex_ref(gsr.gsr_interface);
+		ifp = inp_lookup_mcast_ifp(inp, gsa->sin.sin_addr, NULL,
+		    gsr.gsr_interface);
 		NET_EPOCH_EXIT(et);
 		if (ifp == NULL)
 			return (EADDRNOTAVAIL);
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index dbe48242381d..712ff28768dc 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -2665,10 +2665,13 @@ in_pcbinshash(struct inpcb *inp)
 	    INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
 
 	/*
-	 * Add entry to load balance group.
-	 * Only do this if SO_REUSEPORT_LB is set.
+	 * Ignore SO_REUSEPORT_LB if the socket is connected.  Really this case
+	 * should be an error, but for UDP sockets it is not, and some
+	 * applications erroneously set it on connected UDP sockets, so we can't
+	 * change this without breaking compatibility.
 	 */
-	if ((inp->inp_socket->so_options & SO_REUSEPORT_LB) != 0) {
+	if (!connected &&
+	    (inp->inp_socket->so_options & SO_REUSEPORT_LB) != 0) {
 		int error = in_pcbinslbgrouphash(inp, M_NODOM);
 		if (error != 0)
 			return (error);
@@ -2770,6 +2773,10 @@ in_pcbrehash(struct inpcb *inp)
 		connected = !in_nullhost(inp->inp_faddr);
 	}
 
+	/* See the comment in in_pcbinshash(). */
+	if (connected && (inp->inp_flags & INP_INLBGROUP) != 0)
+		in_pcbremlbgrouphash(inp);
+
 	/*
 	 * When rehashing, the caller must ensure that either the new or the old
 	 * foreign address was unspecified.
diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c
index db46da6022c5..42a6cf0b5810 100644
--- a/sys/netinet/in_proto.c
+++ b/sys/netinet/in_proto.c
@@ -108,6 +108,8 @@ SYSCTL_NODE(_net_inet, IPPROTO_ICMP, icmp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "ICMP");
 SYSCTL_NODE(_net_inet, IPPROTO_UDP, udp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "UDP");
+SYSCTL_NODE(_net_inet, IPPROTO_UDPLITE, udplite, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+    "UDP-Lite");
 SYSCTL_NODE(_net_inet, IPPROTO_TCP, tcp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "TCP");
 #if defined(SCTP) || defined(SCTP_SUPPORT)
diff --git a/sys/netinet/libalias/alias_db.c b/sys/netinet/libalias/alias_db.c
index c143d74a2f45..41f0a328daec 100644
--- a/sys/netinet/libalias/alias_db.c
+++ b/sys/netinet/libalias/alias_db.c
@@ -2181,7 +2181,7 @@ LibAliasInit(struct libalias *la)
 #undef malloc	/* XXX: ugly */
 		la = malloc(sizeof *la, M_ALIAS, M_WAITOK | M_ZERO);
 #else
-		la = calloc(sizeof *la, 1);
+		la = calloc(1, sizeof *la);
 		if (la == NULL)
 			return (la);
 #endif
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index 66070faf97e9..bfe608be6b36 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -680,7 +680,6 @@ rip_ctloutput(struct socket *so, struct sockopt *sopt)
 			break;
 
 		case IP_DUMMYNET3:	/* generic dummynet v.3 functions */
-		case IP_DUMMYNET_GET:
 			if (ip_dn_ctl_ptr != NULL)
 				error = ip_dn_ctl_ptr(sopt);
 			else
@@ -747,9 +746,6 @@ rip_ctloutput(struct socket *so, struct sockopt *sopt)
 			break;
 
 		case IP_DUMMYNET3:	/* generic dummynet v.3 functions */
-		case IP_DUMMYNET_CONFIGURE:
-		case IP_DUMMYNET_DEL:
-		case IP_DUMMYNET_FLUSH:
 			if (ip_dn_ctl_ptr != NULL)
 				error = ip_dn_ctl_ptr(sopt);
 			else
diff --git a/sys/netinet/siftr.c b/sys/netinet/siftr.c
index 374b5595fcbc..5b89ca026e85 100644
--- a/sys/netinet/siftr.c
+++ b/sys/netinet/siftr.c
@@ -519,7 +519,7 @@ siftr_pkt_manager_thread(void *arg)
 			if (log_buf != NULL) {
 				alq_post_flags(siftr_alq, log_buf, 0);
 			}
-			for (;cnt > 0; cnt--) {
+			for (; cnt > 0; cnt--) {
 				pkt_node = STAILQ_FIRST(&tmp_pkt_queue);
 				STAILQ_REMOVE_HEAD(&tmp_pkt_queue, nodes);
 				free(pkt_node, M_SIFTR_PKTNODE);
diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c
index 63bbe4bba11b..c54459bb5f01 100644
--- a/sys/netinet/tcp_hpts.c
+++ b/sys/netinet/tcp_hpts.c
@@ -39,15 +39,14 @@
  * First, and probably the main thing its used by Rack and BBR, it can
  * be used to call tcp_output() of a transport stack at some time in the future.
  * The normal way this is done is that tcp_output() of the stack schedules
- * itself to be called again by calling tcp_hpts_insert(tcpcb, slot). The
- * slot is the time from now that the stack wants to be called but it
- * must be converted to tcp_hpts's notion of slot. This is done with
- * one of the macros HPTS_MS_TO_SLOTS or HPTS_USEC_TO_SLOTS. So a typical
+ * itself to be called again by calling tcp_hpts_insert(tcpcb, usecs). The
+ * usecs is the time from now that the stack wants to be called and is
+ * passing time directly in microseconds. So a typical
  * call from the tcp_output() routine might look like:
  *
- * tcp_hpts_insert(tp, HPTS_USEC_TO_SLOTS(550));
+ * tcp_hpts_insert(tp, 550, NULL);
  *
- * The above would schedule tcp_output() to be called in 550 useconds.
+ * The above would schedule tcp_output() to be called in 550 microseconds.
  * Note that if using this mechanism the stack will want to add near
  * its top a check to prevent unwanted calls (from user land or the
  * arrival of incoming ack's). So it would add something like:
@@ -149,27 +148,44 @@
 #include <netinet/tcpip.h>
 #include <netinet/cc/cc.h>
 #include <netinet/tcp_hpts.h>
+#include <netinet/tcp_hpts_internal.h>
 #include <netinet/tcp_log_buf.h>
 
 #ifdef tcp_offload
 #include <netinet/tcp_offload.h>
 #endif
 
-/*
- * The hpts uses a 102400 wheel. The wheel
- * defines the time in 10 usec increments (102400 x 10).
- * This gives a range of 10usec - 1024ms to place
- * an entry within. If the user requests more than
- * 1.024 second, a remaineder is attached and the hpts
- * when seeing the remainder will re-insert the
- * inpcb forward in time from where it is until
- * the remainder is zero.
- */
+/* Global instance for TCP HPTS */
+struct tcp_hptsi *tcp_hptsi_pace;
+
+/* Default function table for production use. */
+const struct tcp_hptsi_funcs tcp_hptsi_default_funcs = {
+	.microuptime = microuptime,
+	.swi_add = swi_add,
+	.swi_remove = swi_remove,
+	.swi_sched = swi_sched,
+	.intr_event_bind = intr_event_bind,
+	.intr_event_bind_ithread_cpuset = intr_event_bind_ithread_cpuset,
+	.callout_init = callout_init,
+	.callout_reset_sbt_on = callout_reset_sbt_on,
+	._callout_stop_safe = _callout_stop_safe,
+};
 
-#define NUM_OF_HPTSI_SLOTS 102400
+#ifdef TCP_HPTS_KTEST
+#define microuptime pace->funcs->microuptime
+#define swi_add pace->funcs->swi_add
+#define swi_remove pace->funcs->swi_remove
+#define swi_sched pace->funcs->swi_sched
+#define intr_event_bind pace->funcs->intr_event_bind
+#define intr_event_bind_ithread_cpuset pace->funcs->intr_event_bind_ithread_cpuset
+#define callout_init pace->funcs->callout_init
+#define callout_reset_sbt_on pace->funcs->callout_reset_sbt_on
+#define _callout_stop_safe pace->funcs->_callout_stop_safe
+#endif
 
-/* The number of connections after which the dynamic sleep logic kicks in. */
-#define DEFAULT_CONNECTION_THRESHOLD 100
+static MALLOC_DEFINE(M_TCPHPTS, "tcp_hpts", "TCP hpts");
+
+static void tcp_hpts_thread(void *ctx);
 
 /*
  * When using the hpts, a TCP stack must make sure
@@ -204,87 +220,22 @@
  *
  * When we are in the "new" mode i.e. conn_cnt > conn_cnt_thresh
  * then we do a dynamic adjustment on the time we sleep.
- * Our threshold is if the lateness of the first client served (in ticks) is
+ * Our threshold is if the lateness of the first client served (in slots) is
  * greater than or equal too slots_indicate_more_sleep (10ms
- * or 10000 ticks). If we were that late, the actual sleep time
- * is adjusted down by 50%. If the ticks_ran is less than
- * slots_indicate_more_sleep (100 ticks or 1000usecs).
+ * or 10000 slots). If we were that late, the actual sleep time
+ * is adjusted down by 50%. If the slots_ran is less than
+ * slots_indicate_more_sleep (100 slots or 1000usecs).
  *
  */
 
-/* Each hpts has its own p_mtx which is used for locking */
-#define	HPTS_MTX_ASSERT(hpts)	mtx_assert(&(hpts)->p_mtx, MA_OWNED)
-#define	HPTS_LOCK(hpts)		mtx_lock(&(hpts)->p_mtx)
-#define	HPTS_TRYLOCK(hpts)	mtx_trylock(&(hpts)->p_mtx)
-#define	HPTS_UNLOCK(hpts)	mtx_unlock(&(hpts)->p_mtx)
-struct tcp_hpts_entry {
-	/* Cache line 0x00 */
-	struct mtx p_mtx;	/* Mutex for hpts */
-	struct timeval p_mysleep;	/* Our min sleep time */
-	uint64_t syscall_cnt;
-	uint64_t sleeping;	/* What the actual sleep was (if sleeping) */
-	uint16_t p_hpts_active; /* Flag that says hpts is awake  */
-	uint8_t p_wheel_complete; /* have we completed the wheel arc walk? */
-	uint32_t p_curtick;	/* Tick in 10 us the hpts is going to */
-	uint32_t p_runningslot; /* Current tick we are at if we are running */
-	uint32_t p_prev_slot;	/* Previous slot we were on */
-	uint32_t p_cur_slot;	/* Current slot in wheel hpts is draining */
-	uint32_t p_nxt_slot;	/* The next slot outside the current range of
-				 * slots that the hpts is running on. */
-	int32_t p_on_queue_cnt;	/* Count on queue in this hpts */
-	uint32_t p_lasttick;	/* Last tick before the current one */
-	uint8_t p_direct_wake :1, /* boolean */
-		p_on_min_sleep:1, /* boolean */
-		p_hpts_wake_scheduled:1, /* boolean */
-		hit_callout_thresh:1,
-		p_avail:4;
-	uint8_t p_fill[3];	  /* Fill to 32 bits */
-	/* Cache line 0x40 */
-	struct hptsh {
-		TAILQ_HEAD(, tcpcb)	head;
-		uint32_t		count;
-		uint32_t		gencnt;
-	} *p_hptss;			/* Hptsi wheel */
-	uint32_t p_hpts_sleep_time;	/* Current sleep interval having a max
-					 * of 255ms */
-	uint32_t overidden_sleep;	/* what was overrided by min-sleep for logging */
-	uint32_t saved_lasttick;	/* for logging */
-	uint32_t saved_curtick;		/* for logging */
-	uint32_t saved_curslot;		/* for logging */
-	uint32_t saved_prev_slot;       /* for logging */
-	uint32_t p_delayed_by;	/* How much were we delayed by */
-	/* Cache line 0x80 */
-	struct sysctl_ctx_list hpts_ctx;
-	struct sysctl_oid *hpts_root;
-	struct intr_event *ie;
-	void *ie_cookie;
-	uint16_t p_num;		/* The hpts number one per cpu */
-	uint16_t p_cpu;		/* The hpts CPU */
-	/* There is extra space in here */
-	/* Cache line 0x100 */
-	struct callout co __aligned(CACHE_LINE_SIZE);
-}               __aligned(CACHE_LINE_SIZE);
-
-static struct tcp_hptsi {
-	struct cpu_group **grps;
-	struct tcp_hpts_entry **rp_ent;	/* Array of hptss */
-	uint32_t *cts_last_ran;
-	uint32_t grp_cnt;
-	uint32_t rp_num_hptss;	/* Number of hpts threads */
-} tcp_pace;
-
-static MALLOC_DEFINE(M_TCPHPTS, "tcp_hpts", "TCP hpts");
 #ifdef RSS
-static int tcp_bind_threads = 1;
+int tcp_bind_threads = 1;
 #else
-static int tcp_bind_threads = 2;
+int tcp_bind_threads = 2;
 #endif
 static int tcp_use_irq_cpu = 0;
 static int hpts_does_tp_logging = 0;
-
-static int32_t tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout);
-static void tcp_hpts_thread(void *ctx);
-
+static int32_t tcp_hpts_precision = 120;
 int32_t tcp_min_hptsi_time = DEFAULT_MIN_SLEEP;
 static int conn_cnt_thresh = DEFAULT_CONNECTION_THRESHOLD;
 static int32_t dynamic_min_sleep = DYNAMIC_MIN_SLEEP;
@@ -295,23 +246,6 @@ SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hpts, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
 SYSCTL_NODE(_net_inet_tcp_hpts, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "TCP Hpts statistics");
 
-#define	timersub(tvp, uvp, vvp)						\
-	do {								\
-		(vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec;		\
-		(vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec;	\
-		if ((vvp)->tv_usec < 0) {				\
-			(vvp)->tv_sec--;				\
-			(vvp)->tv_usec += 1000000;			\
-		}							\
-	} while (0)
-
-static int32_t tcp_hpts_precision = 120;
-
-static struct hpts_domain_info {
-	int count;
-	int cpu[MAXCPU];
-} hpts_domains[MAXMEMDOM];
-
 counter_u64_t hpts_hopelessly_behind;
 
 SYSCTL_COUNTER_U64(_net_inet_tcp_hpts_stats, OID_AUTO, hopeless, CTLFLAG_RD,
@@ -459,14 +393,14 @@ SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, nowake_over_thresh, CTLFLAG_RW,
     &tcp_hpts_no_wake_over_thresh, 0,
     "When we are over the threshold on the pacer do we prohibit wakeups?");
 
-static uint16_t
-hpts_random_cpu(void)
+uint16_t
+tcp_hptsi_random_cpu(struct tcp_hptsi *pace)
 {
 	uint16_t cpuid;
 	uint32_t ran;
 
 	ran = arc4random();
-	cpuid = (((ran & 0xffff) % mp_ncpus) % tcp_pace.rp_num_hptss);
+	cpuid = (((ran & 0xffff) % mp_ncpus) % pace->rp_num_hptss);
 	return (cpuid);
 }
 
@@ -487,13 +421,11 @@ tcp_hpts_log(struct tcp_hpts_entry *hpts, struct tcpcb *tp, struct timeval *tv,
 		log.u_bbr.flex2 = hpts->p_cur_slot;
 		log.u_bbr.flex3 = hpts->p_prev_slot;
 		log.u_bbr.flex4 = idx;
-		log.u_bbr.flex5 = hpts->p_curtick;
 		log.u_bbr.flex6 = hpts->p_on_queue_cnt;
 		log.u_bbr.flex7 = hpts->p_cpu;
 		log.u_bbr.flex8 = (uint8_t)from_callout;
 		log.u_bbr.inflight = slots_to_run;
 		log.u_bbr.applimited = hpts->overidden_sleep;
-		log.u_bbr.delivered = hpts->saved_curtick;
 		log.u_bbr.timeStamp = tcp_tv_to_usec(tv);
 		log.u_bbr.epoch = hpts->saved_curslot;
 		log.u_bbr.lt_epoch = hpts->saved_prev_slot;
@@ -510,11 +442,67 @@ tcp_hpts_log(struct tcp_hpts_entry *hpts, struct tcpcb *tp, struct timeval *tv,
 	}
 }
 
+/*
+ * Timeout handler for the HPTS sleep callout. It immediately schedules the SWI
+ * for the HPTS entry to run.
+ */
 static void
-tcp_wakehpts(struct tcp_hpts_entry *hpts)
+tcp_hpts_sleep_timeout(void *arg)
 {
+#ifdef TCP_HPTS_KTEST
+	struct tcp_hptsi *pace;
+#endif
+	struct tcp_hpts_entry *hpts;
+
+	hpts = (struct tcp_hpts_entry *)arg;
+#ifdef TCP_HPTS_KTEST
+	pace = hpts->p_hptsi;
+#endif
+	swi_sched(hpts->ie_cookie, 0);
+}
+
+/*
+ * Reset the HPTS callout timer with the provided timeval. Returns the results
+ * of the callout_reset_sbt_on() function.
+ */
+static int
+tcp_hpts_sleep(struct tcp_hpts_entry *hpts, struct timeval *tv)
+{
+#ifdef TCP_HPTS_KTEST
+	struct tcp_hptsi *pace;
+#endif
+	sbintime_t sb;
+
+#ifdef TCP_HPTS_KTEST
+	pace = hpts->p_hptsi;
+#endif
+
+	/* Store off to make visible the actual sleep time */
+	hpts->sleeping = tv->tv_usec;
+
+	sb = tvtosbt(*tv);
+	return (callout_reset_sbt_on(
+		    &hpts->co, sb, 0, tcp_hpts_sleep_timeout, hpts, hpts->p_cpu,
+		    (C_DIRECT_EXEC | C_PREL(tcp_hpts_precision))));
+}
+
+/*
+ * Schedules the SWI for the HTPS entry to run, if not already scheduled or
+ * running.
+ */
+void
+tcp_hpts_wake(struct tcp_hpts_entry *hpts)
+{
+#ifdef TCP_HPTS_KTEST
+	struct tcp_hptsi *pace;
+#endif
+
 	HPTS_MTX_ASSERT(hpts);
 
+#ifdef TCP_HPTS_KTEST
+	pace = hpts->p_hptsi;
+#endif
+
 	if (tcp_hpts_no_wake_over_thresh && (hpts->p_on_queue_cnt >= conn_cnt_thresh)) {
 		hpts->p_direct_wake = 0;
 		return;
@@ -526,15 +514,6 @@ tcp_wakehpts(struct tcp_hpts_entry *hpts)
 }
 
 static void
-hpts_timeout_swi(void *arg)
-{
-	struct tcp_hpts_entry *hpts;
-
-	hpts = (struct tcp_hpts_entry *)arg;
-	swi_sched(hpts->ie_cookie, 0);
-}
-
-static void
 tcp_hpts_insert_internal(struct tcpcb *tp, struct tcp_hpts_entry *hpts)
 {
 	struct inpcb *inp = tptoinpcb(tp);
@@ -562,13 +541,13 @@ tcp_hpts_insert_internal(struct tcpcb *tp, struct tcp_hpts_entry *hpts)
 }
 
 static struct tcp_hpts_entry *
-tcp_hpts_lock(struct tcpcb *tp)
+tcp_hpts_lock(struct tcp_hptsi *pace, struct tcpcb *tp)
 {
 	struct tcp_hpts_entry *hpts;
 
 	INP_LOCK_ASSERT(tptoinpcb(tp));
 
-	hpts = tcp_pace.rp_ent[tp->t_hpts_cpu];
+	hpts = pace->rp_ent[tp->t_hpts_cpu];
 	HPTS_LOCK(hpts);
 
 	return (hpts);
@@ -595,11 +574,10 @@ tcp_hpts_release(struct tcpcb *tp)
  * and has never received a first packet.
  */
 void
-tcp_hpts_init(struct tcpcb *tp)
+__tcp_hpts_init(struct tcp_hptsi *pace, struct tcpcb *tp)
 {
-
 	if (__predict_true(tp->t_hpts_cpu == HPTS_CPU_NONE)) {
-		tp->t_hpts_cpu = hpts_random_cpu();
+		tp->t_hpts_cpu = tcp_hptsi_random_cpu(pace);
 		MPASS(!(tp->t_flags2 & TF2_HPTS_CPU_SET));
 	}
 }
@@ -611,14 +589,14 @@ tcp_hpts_init(struct tcpcb *tp)
  * INP lock and then get the hpts lock.
  */
 void
-tcp_hpts_remove(struct tcpcb *tp)
+__tcp_hpts_remove(struct tcp_hptsi *pace, struct tcpcb *tp)
 {
 	struct tcp_hpts_entry *hpts;
 	struct hptsh *hptsh;
 
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 
-	hpts = tcp_hpts_lock(tp);
+	hpts = tcp_hpts_lock(pace, tp);
 	if (tp->t_in_hpts == IHPTS_ONQUEUE) {
 		hptsh = &hpts->p_hptss[tp->t_hpts_slot];
 		tp->t_hpts_request = 0;
@@ -662,23 +640,19 @@ hpts_slot(uint32_t wheel_slot, uint32_t plus)
 {
 	/*
 	 * Given a slot on the wheel, what slot
-	 * is that plus ticks out?
+	 * is that plus slots out?
 	 */
-	KASSERT(wheel_slot < NUM_OF_HPTSI_SLOTS, ("Invalid tick %u not on wheel", wheel_slot));
+	KASSERT(wheel_slot < NUM_OF_HPTSI_SLOTS, ("Invalid slot %u not on wheel", wheel_slot));
 	return ((wheel_slot + plus) % NUM_OF_HPTSI_SLOTS);
 }
 
 static inline int
-tick_to_wheel(uint32_t cts_in_wticks)
+cts_to_wheel(uint32_t cts)
 {
 	/*
-	 * Given a timestamp in ticks (so by
-	 * default to get it to a real time one
-	 * would multiply by 10.. i.e the number
-	 * of ticks in a slot) map it to our limited
-	 * space wheel.
+	 * Given a timestamp in useconds map it to our limited space wheel.
 	 */
-	return (cts_in_wticks % NUM_OF_HPTSI_SLOTS);
+	return ((cts / HPTS_USECS_PER_SLOT) % NUM_OF_HPTSI_SLOTS);
 }
 
 static inline int
@@ -721,7 +695,7 @@ max_slots_available(struct tcp_hpts_entry *hpts, uint32_t wheel_slot, uint32_t *
 	if ((hpts->p_hpts_active == 1) &&
 	    (hpts->p_wheel_complete == 0)) {
 		end_slot = hpts->p_runningslot;
-		/* Back up one tick */
+		/* Back up one slot */
 		if (end_slot == 0)
 			end_slot = NUM_OF_HPTSI_SLOTS - 1;
 		else
@@ -734,7 +708,7 @@ max_slots_available(struct tcp_hpts_entry *hpts, uint32_t wheel_slot, uint32_t *
 		 * not active, or we have
 		 * completed the pass over
 		 * the wheel, we can use the
-		 * prev tick and subtract one from it. This puts us
+		 * prev slot and subtract one from it. This puts us
 		 * as far out as possible on the wheel.
 		 */
 		end_slot = hpts->p_prev_slot;
@@ -747,7 +721,7 @@ max_slots_available(struct tcp_hpts_entry *hpts, uint32_t wheel_slot, uint32_t *
 		/*
 		 * Now we have close to the full wheel left minus the
 		 * time it has been since the pacer went to sleep. Note
-		 * that wheel_tick, passed in, should be the current time
+		 * that wheel_slot, passed in, should be the current time
 		 * from the perspective of the caller, mapped to the wheel.
 		 */
 		if (hpts->p_prev_slot != wheel_slot)
@@ -824,7 +798,7 @@ max_slots_available(struct tcp_hpts_entry *hpts, uint32_t wheel_slot, uint32_t *
 #ifdef INVARIANTS
 static void
 check_if_slot_would_be_wrong(struct tcp_hpts_entry *hpts, struct tcpcb *tp,
-    uint32_t hptsslot, int line)
+    uint32_t hptsslot)
 {
 	/*
 	 * Sanity checks for the pacer with invariants
@@ -855,12 +829,13 @@ check_if_slot_would_be_wrong(struct tcp_hpts_entry *hpts, struct tcpcb *tp,
 }
 #endif
 
-uint32_t
-tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_diag *diag)
+void
+__tcp_hpts_insert(struct tcp_hptsi *pace, struct tcpcb *tp, uint32_t usecs,
+	struct hpts_diag *diag)
 {
 	struct tcp_hpts_entry *hpts;
 	struct timeval tv;
-	uint32_t slot_on, wheel_cts, last_slot, need_new_to = 0;
+	uint32_t slot, wheel_cts, last_slot, need_new_to = 0;
 	int32_t wheel_slot, maxslots;
 	bool need_wakeup = false;
 
@@ -869,11 +844,13 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
 	MPASS(!(tp->t_in_hpts == IHPTS_ONQUEUE));
 
 	/*
+	 * Convert microseconds to slots for internal use.
 	 * We now return the next-slot the hpts will be on, beyond its
 	 * current run (if up) or where it was when it stopped if it is
 	 * sleeping.
 	 */
-	hpts = tcp_hpts_lock(tp);
+	slot = HPTS_USEC_TO_SLOTS(usecs);
+	hpts = tcp_hpts_lock(pace, tp);
 	microuptime(&tv);
 	if (diag) {
 		memset(diag, 0, sizeof(struct hpts_diag));
@@ -882,8 +859,6 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
 		diag->p_runningslot = hpts->p_runningslot;
 		diag->p_nxt_slot = hpts->p_nxt_slot;
 		diag->p_cur_slot = hpts->p_cur_slot;
-		diag->p_curtick = hpts->p_curtick;
-		diag->p_lasttick = hpts->p_lasttick;
 		diag->slot_req = slot;
 		diag->p_on_min_sleep = hpts->p_on_min_sleep;
 		diag->hpts_sleep_time = hpts->p_hpts_sleep_time;
@@ -910,17 +885,15 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
 			 * timeout is not 1.
 			 */
 			hpts->p_direct_wake = 1;
-			tcp_wakehpts(hpts);
+			tcp_hpts_wake(hpts);
 		}
-		slot_on = hpts->p_nxt_slot;
 		HPTS_UNLOCK(hpts);
 
-		return (slot_on);
+		return;
 	}
-	/* Get the current time relative to the wheel */
-	wheel_cts = tcp_tv_to_hpts_slot(&tv);
-	/* Map it onto the wheel */
-	wheel_slot = tick_to_wheel(wheel_cts);
+	/* Get the current time stamp and map it onto the wheel */
+	wheel_cts = tcp_tv_to_usec(&tv);
+	wheel_slot = cts_to_wheel(wheel_cts);
 	/* Now what's the max we can place it at? */
 	maxslots = max_slots_available(hpts, wheel_slot, &last_slot);
 	if (diag) {
@@ -952,11 +925,11 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
 		tp->t_hpts_slot = last_slot;
 	}
 	if (diag) {
-		diag->slot_remaining = tp->t_hpts_request;
+		diag->time_remaining = tp->t_hpts_request;
 		diag->inp_hptsslot = tp->t_hpts_slot;
 	}
 #ifdef INVARIANTS
-	check_if_slot_would_be_wrong(hpts, tp, tp->t_hpts_slot, line);
+	check_if_slot_would_be_wrong(hpts, tp, tp->t_hpts_slot);
 #endif
 	if (__predict_true(tp->t_in_hpts != IHPTS_MOVING))
 		tcp_hpts_insert_internal(tp, hpts);
@@ -995,12 +968,12 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
 	}
 	/*
 	 * Now how far is the hpts sleeping to? if active is 1, its
-	 * up and ticking we do nothing, otherwise we may need to
+	 * up and running we do nothing, otherwise we may need to
 	 * reschedule its callout if need_new_to is set from above.
 	 */
 	if (need_wakeup) {
 		hpts->p_direct_wake = 1;
-		tcp_wakehpts(hpts);
+		tcp_hpts_wake(hpts);
 		if (diag) {
 			diag->need_new_to = 0;
 			diag->co_ret = 0xffff0000;
@@ -1008,7 +981,6 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
 	} else if (need_new_to) {
 		int32_t co_ret;
 		struct timeval tv;
-		sbintime_t sb;
 
 		tv.tv_sec = 0;
 		tv.tv_usec = 0;
@@ -1016,24 +988,18 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
 			tv.tv_sec++;
 			need_new_to -= HPTS_USEC_IN_SEC;
 		}
-		tv.tv_usec = need_new_to;
-		sb = tvtosbt(tv);
-		co_ret = callout_reset_sbt_on(&hpts->co, sb, 0,
-					      hpts_timeout_swi, hpts, hpts->p_cpu,
-					      (C_DIRECT_EXEC | C_PREL(tcp_hpts_precision)));
+		tv.tv_usec = need_new_to; /* XXX: Why is this sleeping over the max? */
+		co_ret = tcp_hpts_sleep(hpts, &tv);
 		if (diag) {
 			diag->need_new_to = need_new_to;
 			diag->co_ret = co_ret;
 		}
 	}
-	slot_on = hpts->p_nxt_slot;
 	HPTS_UNLOCK(hpts);
-
-	return (slot_on);
 }
 
 static uint16_t
-hpts_cpuid(struct tcpcb *tp, int *failed)
+hpts_cpuid(struct tcp_hptsi *pace, struct tcpcb *tp, int *failed)
 {
 	struct inpcb *inp = tptoinpcb(tp);
 	u_int cpuid;
@@ -1060,7 +1026,7 @@ hpts_cpuid(struct tcpcb *tp, int *failed)
 #ifdef RSS
 	cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
 	if (cpuid == NETISR_CPUID_NONE)
-		return (hpts_random_cpu());
+		return (tcp_hptsi_random_cpu(pace));
 	else
 		return (cpuid);
 #endif
@@ -1071,7 +1037,7 @@ hpts_cpuid(struct tcpcb *tp, int *failed)
 	 */
 	if (inp->inp_flowtype == M_HASHTYPE_NONE) {
 		counter_u64_add(cpu_uses_random, 1);
-		return (hpts_random_cpu());
+		return (tcp_hptsi_random_cpu(pace));
 	}
 	/*
 	 * Hash to a thread based on the flowid.  If we are using numa,
@@ -1086,7 +1052,7 @@ hpts_cpuid(struct tcpcb *tp, int *failed)
 #ifdef NUMA
 	} else {
 		/* Hash into the cpu's that use that domain */
-		di = &hpts_domains[inp->inp_numa_domain];
+		di = &pace->domains[inp->inp_numa_domain];
 		cpuid = di->cpu[inp->inp_flowid % di->count];
 	}
 #endif
@@ -1118,9 +1084,16 @@ tcp_hpts_set_max_sleep(struct tcp_hpts_entry *hpts, int wrap_loop_cnt)
 	}
 }
 
-static int32_t
+static bool
+tcp_hpts_different_slots(uint32_t cts, uint32_t cts_last_run)
+{
+	return ((cts / HPTS_USECS_PER_SLOT) != (cts_last_run / HPTS_USECS_PER_SLOT));
+}
+
+int32_t
 tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout)
 {
+	struct tcp_hptsi *pace;
 	struct tcpcb *tp;
 	struct timeval tv;
 	int32_t slots_to_run, i, error;
@@ -1130,6 +1103,7 @@ tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout)
 	int32_t wrap_loop_cnt = 0;
 	int32_t slot_pos_of_endpoint = 0;
 	int32_t orig_exit_slot;
+	uint32_t cts, cts_last_run;
 	bool completed_measure, seen_endpoint;
 
 	completed_measure = false;
@@ -1137,32 +1111,34 @@ tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout)
 
 	HPTS_MTX_ASSERT(hpts);
 	NET_EPOCH_ASSERT();
+
+	pace = hpts->p_hptsi;
+	MPASS(pace != NULL);
+
 	/* record previous info for any logging */
-	hpts->saved_lasttick = hpts->p_lasttick;
-	hpts->saved_curtick = hpts->p_curtick;
 	hpts->saved_curslot = hpts->p_cur_slot;
 	hpts->saved_prev_slot = hpts->p_prev_slot;
 
-	hpts->p_lasttick = hpts->p_curtick;
-	hpts->p_curtick = tcp_gethptstick(&tv);
-	tcp_pace.cts_last_ran[hpts->p_num] = tcp_tv_to_usec(&tv);
-	orig_exit_slot = hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
+	microuptime(&tv);
+	cts_last_run = pace->cts_last_ran[hpts->p_cpu];
+	pace->cts_last_ran[hpts->p_cpu] = cts = tcp_tv_to_usec(&tv);
+
+	orig_exit_slot = hpts->p_cur_slot = cts_to_wheel(cts);
 	if ((hpts->p_on_queue_cnt == 0) ||
-	    (hpts->p_lasttick == hpts->p_curtick)) {
+	    !tcp_hpts_different_slots(cts, cts_last_run)) {
 		/*
-		 * No time has yet passed,
-		 * or nothing to do.
+		 * Not enough time has yet passed or nothing to do.
 		 */
 		hpts->p_prev_slot = hpts->p_cur_slot;
-		hpts->p_lasttick = hpts->p_curtick;
 		goto no_run;
 	}
 again:
 	hpts->p_wheel_complete = 0;
 	HPTS_MTX_ASSERT(hpts);
 	slots_to_run = hpts_slots_diff(hpts->p_prev_slot, hpts->p_cur_slot);
-	if (((hpts->p_curtick - hpts->p_lasttick) > (NUM_OF_HPTSI_SLOTS - 1)) &&
-	    (hpts->p_on_queue_cnt != 0)) {
+	if ((hpts->p_on_queue_cnt != 0) &&
+	    ((cts - cts_last_run) >
+	     ((NUM_OF_HPTSI_SLOTS-1) * HPTS_USECS_PER_SLOT))) {
 		/*
 		 * Wheel wrap is occuring, basically we
 		 * are behind and the distance between
@@ -1238,7 +1214,7 @@ again:
 		uint32_t runningslot;
 
 		/*
-		 * Calculate our delay, if there are no extra ticks there
+		 * Calculate our delay, if there are no extra slots there
 		 * was not any (i.e. if slots_to_run == 1, no delay).
 		 */
 		hpts->p_delayed_by = (slots_to_run - (i + 1)) *
@@ -1391,7 +1367,7 @@ again:
 				 * gets added to the hpts (not this one)
 				 * :-)
 				 */
-				tcp_set_hpts(tp);
+				__tcp_set_hpts(pace, tp);
 			}
 			CURVNET_SET(inp->inp_vnet);
 			/* Lets do any logging that we might want to */
@@ -1450,10 +1426,12 @@ no_one:
 	hpts->p_delayed_by = 0;
 	/*
 	 * Check to see if we took an excess amount of time and need to run
-	 * more ticks (if we did not hit eno-bufs).
+	 * more slots (if we did not hit eno-bufs).
 	 */
 	hpts->p_prev_slot = hpts->p_cur_slot;
-	hpts->p_lasttick = hpts->p_curtick;
+	microuptime(&tv);
+	cts_last_run = cts;
+	cts = tcp_tv_to_usec(&tv);
 	if (!from_callout || (loop_cnt > max_pacer_loops)) {
 		/*
 		 * Something is serious slow we have
@@ -1465,7 +1443,7 @@ no_one:
 		 * can never catch up :(
 		 *
 		 * We will just lie to this thread
-		 * and let it thing p_curtick is
+		 * and let it think p_curslot is
 		 * correct. When it next awakens
 		 * it will find itself further behind.
 		 */
@@ -1473,20 +1451,19 @@ no_one:
 			counter_u64_add(hpts_hopelessly_behind, 1);
 		goto no_run;
 	}
-	hpts->p_curtick = tcp_gethptstick(&tv);
-	hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
+
+	hpts->p_cur_slot = cts_to_wheel(cts);
 	if (!seen_endpoint) {
 		/* We saw no endpoint but we may be looping */
 		orig_exit_slot = hpts->p_cur_slot;
 	}
-	if ((wrap_loop_cnt < 2) &&
-	    (hpts->p_lasttick != hpts->p_curtick)) {
+	if ((wrap_loop_cnt < 2) && tcp_hpts_different_slots(cts, cts_last_run)) {
 		counter_u64_add(hpts_loops, 1);
 		loop_cnt++;
 		goto again;
 	}
 no_run:
-	tcp_pace.cts_last_ran[hpts->p_num] = tcp_tv_to_usec(&tv);
+	pace->cts_last_ran[hpts->p_cpu] = cts;
 	/*
 	 * Set flag to tell that we are done for
 	 * any slot input that happens during
@@ -1494,25 +1471,36 @@ no_run:
 	 */
 	hpts->p_wheel_complete = 1;
 	/*
-	 * Now did we spend too long running input and need to run more ticks?
-	 * Note that if wrap_loop_cnt < 2 then we should have the conditions
-	 * in the KASSERT's true. But if the wheel is behind i.e. wrap_loop_cnt
-	 * is greater than 2, then the condtion most likely are *not* true.
-	 * Also if we are called not from the callout, we don't run the wheel
-	 * multiple times so the slots may not align either.
-	 */
-	KASSERT(((hpts->p_prev_slot == hpts->p_cur_slot) ||
-		 (wrap_loop_cnt >= 2) || !from_callout),
-		("H:%p p_prev_slot:%u not equal to p_cur_slot:%u", hpts,
-		 hpts->p_prev_slot, hpts->p_cur_slot));
-	KASSERT(((hpts->p_lasttick == hpts->p_curtick)
-		 || (wrap_loop_cnt >= 2) || !from_callout),
-		("H:%p p_lasttick:%u not equal to p_curtick:%u", hpts,
-		 hpts->p_lasttick, hpts->p_curtick));
-	if (from_callout && (hpts->p_lasttick != hpts->p_curtick)) {
-		hpts->p_curtick = tcp_gethptstick(&tv);
+	* If enough time has elapsed that we should be processing the next
+	* slot(s), then we should have kept running and not marked the wheel as
+	* complete.
+	*
+	* But there are several other conditions where we would have stopped
+	* processing, so the prev/cur slots and cts variables won't match.
+	* These conditions are:
+	*
+	* - Calls not from callouts don't run multiple times
+	* - The wheel is empty
+	* - We've processed more than max_pacer_loops times
+	* - We've wrapped more than 2 times
+	*
+	* This assert catches when the logic above has violated this design.
+	*
+	*/
+	KASSERT((!from_callout || (hpts->p_on_queue_cnt == 0) ||
+		 (loop_cnt > max_pacer_loops) || (wrap_loop_cnt >= 2) ||
+		 ((hpts->p_prev_slot == hpts->p_cur_slot) &&
+		  !tcp_hpts_different_slots(cts, cts_last_run))),
+		("H:%p Shouldn't be done! prev_slot:%u, cur_slot:%u, "
+		 "cts_last_run:%u, cts:%u, loop_cnt:%d, wrap_loop_cnt:%d",
+		 hpts, hpts->p_prev_slot, hpts->p_cur_slot,
+		 cts_last_run, cts, loop_cnt, wrap_loop_cnt));
+
+	if (from_callout && tcp_hpts_different_slots(cts, cts_last_run)) {
+		microuptime(&tv);
+		cts = tcp_tv_to_usec(&tv);
+		hpts->p_cur_slot = cts_to_wheel(cts);
 		counter_u64_add(hpts_loops, 1);
-		hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
 		goto again;
 	}
 
@@ -1526,16 +1514,16 @@ no_run:
 }
 
 void
-tcp_set_hpts(struct tcpcb *tp)
+__tcp_set_hpts(struct tcp_hptsi *pace, struct tcpcb *tp)
 {
 	struct tcp_hpts_entry *hpts;
 	int failed;
 
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 
-	hpts = tcp_hpts_lock(tp);
+	hpts = tcp_hpts_lock(pace, tp);
 	if (tp->t_in_hpts == IHPTS_NONE && !(tp->t_flags2 & TF2_HPTS_CPU_SET)) {
-		tp->t_hpts_cpu = hpts_cpuid(tp, &failed);
+		tp->t_hpts_cpu = hpts_cpuid(pace, tp, &failed);
 		if (failed == 0)
 			tp->t_flags2 |= TF2_HPTS_CPU_SET;
 	}
@@ -1543,33 +1531,35 @@ tcp_set_hpts(struct tcpcb *tp)
 }
 
 static struct tcp_hpts_entry *
-tcp_choose_hpts_to_run(void)
+tcp_choose_hpts_to_run(struct tcp_hptsi *pace)
 {
+	struct timeval tv;
 	int i, oldest_idx, start, end;
 	uint32_t cts, time_since_ran, calc;
 
-	cts = tcp_get_usecs(NULL);
+	microuptime(&tv);
+	cts = tcp_tv_to_usec(&tv);
 	time_since_ran = 0;
 	/* Default is all one group */
 	start = 0;
-	end = tcp_pace.rp_num_hptss;
+	end = pace->rp_num_hptss;
 	/*
 	 * If we have more than one L3 group figure out which one
 	 * this CPU is in.
 	 */
-	if (tcp_pace.grp_cnt > 1) {
-		for (i = 0; i < tcp_pace.grp_cnt; i++) {
-			if (CPU_ISSET(curcpu, &tcp_pace.grps[i]->cg_mask)) {
-				start = tcp_pace.grps[i]->cg_first;
-				end = (tcp_pace.grps[i]->cg_last + 1);
+	if (pace->grp_cnt > 1) {
+		for (i = 0; i < pace->grp_cnt; i++) {
+			if (CPU_ISSET(curcpu, &pace->grps[i]->cg_mask)) {
+				start = pace->grps[i]->cg_first;
+				end = (pace->grps[i]->cg_last + 1);
 				break;
 			}
 		}
 	}
 	oldest_idx = -1;
 	for (i = start; i < end; i++) {
-		if (TSTMP_GT(cts, tcp_pace.cts_last_ran[i]))
-			calc = cts - tcp_pace.cts_last_ran[i];
+		if (TSTMP_GT(cts, pace->cts_last_ran[i]))
+			calc = cts - pace->cts_last_ran[i];
 		else
 			calc = 0;
 		if (calc > time_since_ran) {
@@ -1578,9 +1568,9 @@ tcp_choose_hpts_to_run(void)
 		}
 	}
 	if (oldest_idx >= 0)
-		return(tcp_pace.rp_ent[oldest_idx]);
+		return(pace->rp_ent[oldest_idx]);
 	else
-		return(tcp_pace.rp_ent[(curcpu % tcp_pace.rp_num_hptss)]);
+		return(pace->rp_ent[(curcpu % pace->rp_num_hptss)]);
 }
 
 static void
@@ -1588,9 +1578,9 @@ __tcp_run_hpts(void)
 {
 	struct epoch_tracker et;
 	struct tcp_hpts_entry *hpts;
-	int ticks_ran;
+	int slots_ran;
 
-	hpts = tcp_choose_hpts_to_run();
+	hpts = tcp_choose_hpts_to_run(tcp_hptsi_pace);
 
 	if (hpts->p_hpts_active) {
 		/* Already active */
@@ -1606,12 +1596,11 @@ __tcp_run_hpts(void)
 	hpts->syscall_cnt++;
 	counter_u64_add(hpts_direct_call, 1);
 	hpts->p_hpts_active = 1;
-	ticks_ran = tcp_hptsi(hpts, false);
+	slots_ran = tcp_hptsi(hpts, false);
 	/* We may want to adjust the sleep values here */
 	if (hpts->p_on_queue_cnt >= conn_cnt_thresh) {
-		if (ticks_ran > slots_indicate_less_sleep) {
+		if (slots_ran > slots_indicate_less_sleep) {
 			struct timeval tv;
-			sbintime_t sb;
 
 			hpts->p_mysleep.tv_usec /= 2;
 			if (hpts->p_mysleep.tv_usec < dynamic_min_sleep)
@@ -1635,13 +1624,8 @@ __tcp_run_hpts(void)
 			 * the dynamic value and set the on_min_sleep
 			 * flag so we will not be awoken.
 			 */
-			sb = tvtosbt(tv);
-			/* Store off to make visible the actual sleep time */
-			hpts->sleeping = tv.tv_usec;
-			callout_reset_sbt_on(&hpts->co, sb, 0,
-					     hpts_timeout_swi, hpts, hpts->p_cpu,
-					     (C_DIRECT_EXEC | C_PREL(tcp_hpts_precision)));
-		} else if (ticks_ran < slots_indicate_more_sleep) {
+			(void)tcp_hpts_sleep(hpts, &tv);
+		} else if (slots_ran < slots_indicate_more_sleep) {
 			/* For the further sleep, don't reschedule  hpts */
 			hpts->p_mysleep.tv_usec *= 2;
 			if (hpts->p_mysleep.tv_usec > dynamic_max_sleep)
@@ -1658,17 +1642,22 @@ out_with_mtx:
 static void
 tcp_hpts_thread(void *ctx)
 {
+#ifdef TCP_HPTS_KTEST
+	struct tcp_hptsi *pace;
+#endif
 	struct tcp_hpts_entry *hpts;
 	struct epoch_tracker et;
 	struct timeval tv;
-	sbintime_t sb;
-	int ticks_ran;
+	int slots_ran;
 
 	hpts = (struct tcp_hpts_entry *)ctx;
+#ifdef TCP_HPTS_KTEST
+	pace = hpts->p_hptsi;
+#endif
 	HPTS_LOCK(hpts);
 	if (hpts->p_direct_wake) {
 		/* Signaled by input or output with low occupancy count. */
-		callout_stop(&hpts->co);
+		_callout_stop_safe(&hpts->co, 0);
 		counter_u64_add(hpts_direct_awakening, 1);
 	} else {
 		/* Timed out, the normal case. */
@@ -1721,7 +1710,7 @@ tcp_hpts_thread(void *ctx)
 	}
 	hpts->sleeping = 0;
 	hpts->p_hpts_active = 1;
-	ticks_ran = tcp_hptsi(hpts, true);
+	slots_ran = tcp_hptsi(hpts, true);
 	tv.tv_sec = 0;
 	tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_USECS_PER_SLOT;
 	if ((hpts->p_on_queue_cnt > conn_cnt_thresh) && (hpts->hit_callout_thresh == 0)) {
@@ -1737,11 +1726,11 @@ tcp_hpts_thread(void *ctx)
 			 * Only adjust sleep time if we were
 			 * called from the callout i.e. direct_wake == 0.
 			 */
-			if (ticks_ran < slots_indicate_more_sleep) {
+			if (slots_ran < slots_indicate_more_sleep) {
 				hpts->p_mysleep.tv_usec *= 2;
 				if (hpts->p_mysleep.tv_usec > dynamic_max_sleep)
 					hpts->p_mysleep.tv_usec = dynamic_max_sleep;
-			} else if (ticks_ran > slots_indicate_less_sleep) {
+			} else if (slots_ran > slots_indicate_less_sleep) {
 				hpts->p_mysleep.tv_usec /= 2;
 				if (hpts->p_mysleep.tv_usec < dynamic_min_sleep)
 					hpts->p_mysleep.tv_usec = dynamic_min_sleep;
@@ -1797,18 +1786,11 @@ tcp_hpts_thread(void *ctx)
 	hpts->p_hpts_active = 0;
 back_to_sleep:
 	hpts->p_direct_wake = 0;
-	sb = tvtosbt(tv);
-	/* Store off to make visible the actual sleep time */
-	hpts->sleeping = tv.tv_usec;
-	callout_reset_sbt_on(&hpts->co, sb, 0,
-			     hpts_timeout_swi, hpts, hpts->p_cpu,
-			     (C_DIRECT_EXEC | C_PREL(tcp_hpts_precision)));
+	(void)tcp_hpts_sleep(hpts, &tv);
 	NET_EPOCH_EXIT(et);
 	HPTS_UNLOCK(hpts);
 }
 
-#undef	timersub
-
 static int32_t
 hpts_count_level(struct cpu_group *cg)
 {
@@ -1845,57 +1827,63 @@ hpts_gather_grps(struct cpu_group **grps, int32_t *at, int32_t max, struct cpu_g
 	}
 }
 
-static void
-tcp_hpts_mod_load(void)
+/*
+ * Initialize a tcp_hptsi structure. This performs the core initialization
+ * without starting threads.
+ */
+struct tcp_hptsi*
+tcp_hptsi_create(const struct tcp_hptsi_funcs *funcs, bool enable_sysctl)
 {
+	struct tcp_hptsi *pace;
 	struct cpu_group *cpu_top;
-	int32_t error __diagused;
-	int32_t i, j, bound = 0, created = 0;
+	uint32_t i, j, cts;
+	int32_t count;
 	size_t sz, asz;
 	struct timeval tv;
-	sbintime_t sb;
 	struct tcp_hpts_entry *hpts;
-	struct pcpu *pc;
 	char unit[16];
 	uint32_t ncpus = mp_ncpus ? mp_ncpus : MAXCPU;
-	int count, domain;
 
+	KASSERT(funcs != NULL, ("funcs is NULL"));
+
+	/* Allocate the main structure */
+	pace = malloc(sizeof(struct tcp_hptsi), M_TCPHPTS, M_WAITOK | M_ZERO);
+	if (pace == NULL)
+		return (NULL);
+
+	memset(pace, 0, sizeof(*pace));
+	pace->funcs = funcs;
+
+	/* Setup CPU topology information */
 #ifdef SMP
 	cpu_top = smp_topo();
 #else
 	cpu_top = NULL;
 #endif
-	tcp_pace.rp_num_hptss = ncpus;
-	hpts_hopelessly_behind = counter_u64_alloc(M_WAITOK);
-	hpts_loops = counter_u64_alloc(M_WAITOK);
-	back_tosleep = counter_u64_alloc(M_WAITOK);
-	combined_wheel_wrap = counter_u64_alloc(M_WAITOK);
-	wheel_wrap = counter_u64_alloc(M_WAITOK);
-	hpts_wake_timeout = counter_u64_alloc(M_WAITOK);
-	hpts_direct_awakening = counter_u64_alloc(M_WAITOK);
-	hpts_back_tosleep = counter_u64_alloc(M_WAITOK);
-	hpts_direct_call = counter_u64_alloc(M_WAITOK);
-	cpu_uses_flowid = counter_u64_alloc(M_WAITOK);
-	cpu_uses_random = counter_u64_alloc(M_WAITOK);
+	pace->rp_num_hptss = ncpus;
 
-	sz = (tcp_pace.rp_num_hptss * sizeof(struct tcp_hpts_entry *));
-	tcp_pace.rp_ent = malloc(sz, M_TCPHPTS, M_WAITOK | M_ZERO);
-	sz = (sizeof(uint32_t) * tcp_pace.rp_num_hptss);
-	tcp_pace.cts_last_ran = malloc(sz, M_TCPHPTS, M_WAITOK);
-	tcp_pace.grp_cnt = 0;
+	/* Allocate hpts entry array */
+	sz = (pace->rp_num_hptss * sizeof(struct tcp_hpts_entry *));
+	pace->rp_ent = malloc(sz, M_TCPHPTS, M_WAITOK | M_ZERO);
+
+	/* Allocate timestamp tracking array */
+	sz = (sizeof(uint32_t) * pace->rp_num_hptss);
+	pace->cts_last_ran = malloc(sz, M_TCPHPTS, M_WAITOK);
+
+	/* Setup CPU groups */
 	if (cpu_top == NULL) {
-		tcp_pace.grp_cnt = 1;
+		pace->grp_cnt = 1;
 	} else {
 		/* Find out how many cache level 3 domains we have */
 		count = 0;
-		tcp_pace.grp_cnt = hpts_count_level(cpu_top);
-		if (tcp_pace.grp_cnt == 0) {
-			tcp_pace.grp_cnt = 1;
+		pace->grp_cnt = hpts_count_level(cpu_top);
+		if (pace->grp_cnt == 0) {
+			pace->grp_cnt = 1;
 		}
-		sz = (tcp_pace.grp_cnt * sizeof(struct cpu_group *));
-		tcp_pace.grps = malloc(sz, M_TCPHPTS, M_WAITOK);
+		sz = (pace->grp_cnt * sizeof(struct cpu_group *));
+		pace->grps = malloc(sz, M_TCPHPTS, M_WAITOK);
 		/* Now populate the groups */
-		if (tcp_pace.grp_cnt == 1) {
+		if (pace->grp_cnt == 1) {
 			/*
 			 * All we need is the top level all cpu's are in
 			 * the same cache so when we use grp[0]->cg_mask
@@ -1903,193 +1891,290 @@ tcp_hpts_mod_load(void)
 			 * all cpu's in it. The level here is probably
 			 * zero which is ok.
 			 */
-			tcp_pace.grps[0] = cpu_top;
+			pace->grps[0] = cpu_top;
 		} else {
 			/*
 			 * Here we must find all the level three cache domains
 			 * and setup our pointers to them.
 			 */
 			count = 0;
-			hpts_gather_grps(tcp_pace.grps, &count, tcp_pace.grp_cnt, cpu_top);
+			hpts_gather_grps(pace->grps, &count, pace->grp_cnt, cpu_top);
 		}
 	}
+
+	/* Cache the current time for initializing the hpts entries */
+	microuptime(&tv);
+	cts = tcp_tv_to_usec(&tv);
+
+	/* Initialize each hpts entry */
 	asz = sizeof(struct hptsh) * NUM_OF_HPTSI_SLOTS;
-	for (i = 0; i < tcp_pace.rp_num_hptss; i++) {
-		tcp_pace.rp_ent[i] = malloc(sizeof(struct tcp_hpts_entry),
+	for (i = 0; i < pace->rp_num_hptss; i++) {
+		pace->rp_ent[i] = malloc(sizeof(struct tcp_hpts_entry),
 		    M_TCPHPTS, M_WAITOK | M_ZERO);
-		tcp_pace.rp_ent[i]->p_hptss = malloc(asz, M_TCPHPTS, M_WAITOK);
-		hpts = tcp_pace.rp_ent[i];
-		/*
-		 * Init all the hpts structures that are not specifically
-		 * zero'd by the allocations. Also lets attach them to the
-		 * appropriate sysctl block as well.
-		 */
-		mtx_init(&hpts->p_mtx, "tcp_hpts_lck",
-		    "hpts", MTX_DEF | MTX_DUPOK);
-		for (j = 0; j < NUM_OF_HPTSI_SLOTS; j++) {
-			TAILQ_INIT(&hpts->p_hptss[j].head);
-			hpts->p_hptss[j].count = 0;
-			hpts->p_hptss[j].gencnt = 0;
-		}
-		sysctl_ctx_init(&hpts->hpts_ctx);
-		sprintf(unit, "%d", i);
-		hpts->hpts_root = SYSCTL_ADD_NODE(&hpts->hpts_ctx,
-		    SYSCTL_STATIC_CHILDREN(_net_inet_tcp_hpts),
-		    OID_AUTO,
-		    unit,
-		    CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
-		    "");
-		SYSCTL_ADD_INT(&hpts->hpts_ctx,
-		    SYSCTL_CHILDREN(hpts->hpts_root),
-		    OID_AUTO, "out_qcnt", CTLFLAG_RD,
-		    &hpts->p_on_queue_cnt, 0,
-		    "Count TCB's awaiting output processing");
-		SYSCTL_ADD_U16(&hpts->hpts_ctx,
-		    SYSCTL_CHILDREN(hpts->hpts_root),
-		    OID_AUTO, "active", CTLFLAG_RD,
-		    &hpts->p_hpts_active, 0,
-		    "Is the hpts active");
-		SYSCTL_ADD_UINT(&hpts->hpts_ctx,
-		    SYSCTL_CHILDREN(hpts->hpts_root),
-		    OID_AUTO, "curslot", CTLFLAG_RD,
-		    &hpts->p_cur_slot, 0,
-		    "What the current running pacers goal");
-		SYSCTL_ADD_UINT(&hpts->hpts_ctx,
-		    SYSCTL_CHILDREN(hpts->hpts_root),
-		    OID_AUTO, "runtick", CTLFLAG_RD,
-		    &hpts->p_runningslot, 0,
-		    "What the running pacers current slot is");
-		SYSCTL_ADD_UINT(&hpts->hpts_ctx,
-		    SYSCTL_CHILDREN(hpts->hpts_root),
-		    OID_AUTO, "curtick", CTLFLAG_RD,
-		    &hpts->p_curtick, 0,
-		    "What the running pacers last tick mapped to the wheel was");
-		SYSCTL_ADD_UINT(&hpts->hpts_ctx,
-		    SYSCTL_CHILDREN(hpts->hpts_root),
-		    OID_AUTO, "lastran", CTLFLAG_RD,
-		    &tcp_pace.cts_last_ran[i], 0,
-		    "The last usec tick that this hpts ran");
-		SYSCTL_ADD_LONG(&hpts->hpts_ctx,
-		    SYSCTL_CHILDREN(hpts->hpts_root),
-		    OID_AUTO, "cur_min_sleep", CTLFLAG_RD,
-		    &hpts->p_mysleep.tv_usec,
-		    "What the running pacers is using for p_mysleep.tv_usec");
-		SYSCTL_ADD_U64(&hpts->hpts_ctx,
-		    SYSCTL_CHILDREN(hpts->hpts_root),
-		    OID_AUTO, "now_sleeping", CTLFLAG_RD,
-		    &hpts->sleeping, 0,
-		    "What the running pacers is actually sleeping for");
-		SYSCTL_ADD_U64(&hpts->hpts_ctx,
-		    SYSCTL_CHILDREN(hpts->hpts_root),
-		    OID_AUTO, "syscall_cnt", CTLFLAG_RD,
-		    &hpts->syscall_cnt, 0,
-		    "How many times we had syscalls on this hpts");
+		pace->rp_ent[i]->p_hptss = malloc(asz, M_TCPHPTS,
+		    M_WAITOK | M_ZERO);
+		hpts = pace->rp_ent[i];
 
+		/* Basic initialization */
 		hpts->p_hpts_sleep_time = hpts_sleep_max;
-		hpts->p_num = i;
-		hpts->p_curtick = tcp_gethptstick(&tv);
-		tcp_pace.cts_last_ran[i] = tcp_tv_to_usec(&tv);
-		hpts->p_prev_slot = hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
-		hpts->p_cpu = 0xffff;
+		hpts->p_cpu = i;
+		pace->cts_last_ran[i] = cts;
+		hpts->p_cur_slot = cts_to_wheel(cts);
+		hpts->p_prev_slot = hpts->p_cur_slot;
 		hpts->p_nxt_slot = hpts_slot(hpts->p_cur_slot, 1);
 		callout_init(&hpts->co, 1);
+		hpts->p_hptsi = pace;
+		mtx_init(&hpts->p_mtx, "tcp_hpts_lck", "hpts",
+		    MTX_DEF | MTX_DUPOK);
+		for (j = 0; j < NUM_OF_HPTSI_SLOTS; j++) {
+			TAILQ_INIT(&hpts->p_hptss[j].head);
+		}
+
+		/* Setup SYSCTL if requested */
+		if (enable_sysctl) {
+			sysctl_ctx_init(&hpts->hpts_ctx);
+			sprintf(unit, "%d", i);
+			hpts->hpts_root = SYSCTL_ADD_NODE(&hpts->hpts_ctx,
+			    SYSCTL_STATIC_CHILDREN(_net_inet_tcp_hpts),
+			    OID_AUTO,
+			    unit,
+			    CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+			    "");
+			SYSCTL_ADD_INT(&hpts->hpts_ctx,
+			    SYSCTL_CHILDREN(hpts->hpts_root),
+			    OID_AUTO, "out_qcnt", CTLFLAG_RD,
+			    &hpts->p_on_queue_cnt, 0,
+			    "Count TCB's awaiting output processing");
+			SYSCTL_ADD_U16(&hpts->hpts_ctx,
+			    SYSCTL_CHILDREN(hpts->hpts_root),
+			    OID_AUTO, "active", CTLFLAG_RD,
+			    &hpts->p_hpts_active, 0,
+			    "Is the hpts active");
+			SYSCTL_ADD_UINT(&hpts->hpts_ctx,
+			    SYSCTL_CHILDREN(hpts->hpts_root),
+			    OID_AUTO, "curslot", CTLFLAG_RD,
+			    &hpts->p_cur_slot, 0,
+			    "What the current running pacers goal");
+			SYSCTL_ADD_UINT(&hpts->hpts_ctx,
+			    SYSCTL_CHILDREN(hpts->hpts_root),
+			    OID_AUTO, "runslot", CTLFLAG_RD,
+			    &hpts->p_runningslot, 0,
+			    "What the running pacers current slot is");
+			SYSCTL_ADD_UINT(&hpts->hpts_ctx,
+			    SYSCTL_CHILDREN(hpts->hpts_root),
+			    OID_AUTO, "lastran", CTLFLAG_RD,
+			    &pace->cts_last_ran[i], 0,
+			    "The last usec timestamp that this hpts ran");
+			SYSCTL_ADD_LONG(&hpts->hpts_ctx,
+			    SYSCTL_CHILDREN(hpts->hpts_root),
+			    OID_AUTO, "cur_min_sleep", CTLFLAG_RD,
+			    &hpts->p_mysleep.tv_usec,
+			    "What the running pacers is using for p_mysleep.tv_usec");
+			SYSCTL_ADD_U64(&hpts->hpts_ctx,
+			    SYSCTL_CHILDREN(hpts->hpts_root),
+			    OID_AUTO, "now_sleeping", CTLFLAG_RD,
+			    &hpts->sleeping, 0,
+			    "What the running pacers is actually sleeping for");
+			SYSCTL_ADD_U64(&hpts->hpts_ctx,
+			    SYSCTL_CHILDREN(hpts->hpts_root),
+			    OID_AUTO, "syscall_cnt", CTLFLAG_RD,
+			    &hpts->syscall_cnt, 0,
+			    "How many times we had syscalls on this hpts");
+		}
 	}
-	/* Don't try to bind to NUMA domains if we don't have any */
-	if (vm_ndomains == 1 && tcp_bind_threads == 2)
-		tcp_bind_threads = 0;
 
-	/*
-	 * Now lets start ithreads to handle the hptss.
-	 */
-	for (i = 0; i < tcp_pace.rp_num_hptss; i++) {
-		hpts = tcp_pace.rp_ent[i];
-		hpts->p_cpu = i;
+	return (pace);
+}
+
+/*
+ * Create threads for a tcp_hptsi structure and starts timers for the current
+ * (minimum) sleep interval.
+ */
+void
+tcp_hptsi_start(struct tcp_hptsi *pace)
+{
+	struct tcp_hpts_entry *hpts;
+	struct pcpu *pc;
+	struct timeval tv;
+	uint32_t i, j;
+	int count, domain;
+	int error __diagused;
+
+	KASSERT(pace != NULL, ("tcp_hptsi_start: pace is NULL"));
+
+	/* Start threads for each hpts entry */
+	for (i = 0; i < pace->rp_num_hptss; i++) {
+		hpts = pace->rp_ent[i];
+
+		KASSERT(hpts->ie_cookie == NULL,
+		    ("tcp_hptsi_start: hpts[%d]->ie_cookie is not NULL", i));
 
 		error = swi_add(&hpts->ie, "hpts",
 		    tcp_hpts_thread, (void *)hpts,
 		    SWI_NET, INTR_MPSAFE, &hpts->ie_cookie);
 		KASSERT(error == 0,
-			("Can't add hpts:%p i:%d err:%d",
-			 hpts, i, error));
-		created++;
-		hpts->p_mysleep.tv_sec = 0;
-		hpts->p_mysleep.tv_usec = tcp_min_hptsi_time;
+		    ("Can't add hpts:%p i:%d err:%d", hpts, i, error));
+
 		if (tcp_bind_threads == 1) {
-			if (intr_event_bind(hpts->ie, i) == 0)
-				bound++;
+			(void)intr_event_bind(hpts->ie, i);
 		} else if (tcp_bind_threads == 2) {
 			/* Find the group for this CPU (i) and bind into it */
-			for (j = 0; j < tcp_pace.grp_cnt; j++) {
-				if (CPU_ISSET(i, &tcp_pace.grps[j]->cg_mask)) {
+			for (j = 0; j < pace->grp_cnt; j++) {
+				if (CPU_ISSET(i, &pace->grps[j]->cg_mask)) {
 					if (intr_event_bind_ithread_cpuset(hpts->ie,
-						&tcp_pace.grps[j]->cg_mask) == 0) {
-						bound++;
+					    &pace->grps[j]->cg_mask) == 0) {
 						pc = pcpu_find(i);
 						domain = pc->pc_domain;
-						count = hpts_domains[domain].count;
-						hpts_domains[domain].cpu[count] = i;
-						hpts_domains[domain].count++;
+						count = pace->domains[domain].count;
+						pace->domains[domain].cpu[count] = i;
+						pace->domains[domain].count++;
 						break;
 					}
 				}
 			}
 		}
+
+		hpts->p_mysleep.tv_sec = 0;
+		hpts->p_mysleep.tv_usec = tcp_min_hptsi_time;
 		tv.tv_sec = 0;
 		tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_USECS_PER_SLOT;
-		hpts->sleeping = tv.tv_usec;
-		sb = tvtosbt(tv);
-		callout_reset_sbt_on(&hpts->co, sb, 0,
-				     hpts_timeout_swi, hpts, hpts->p_cpu,
-				     (C_DIRECT_EXEC | C_PREL(tcp_hpts_precision)));
-	}
-	/*
-	 * If we somehow have an empty domain, fall back to choosing
-	 * among all htps threads.
-	 */
-	for (i = 0; i < vm_ndomains; i++) {
-		if (hpts_domains[i].count == 0) {
-			tcp_bind_threads = 0;
-			break;
-		}
+		(void)tcp_hpts_sleep(hpts, &tv);
 	}
-	tcp_hpts_softclock = __tcp_run_hpts;
-	tcp_lro_hpts_init();
-	printf("TCP Hpts created %d swi interrupt threads and bound %d to %s\n",
-	    created, bound,
-	    tcp_bind_threads == 2 ? "NUMA domains" : "cpus");
 }
 
-static void
-tcp_hpts_mod_unload(void)
+/*
+ * Stop all callouts/threads for a tcp_hptsi structure.
+ */
+void
+tcp_hptsi_stop(struct tcp_hptsi *pace)
 {
+	struct tcp_hpts_entry *hpts;
 	int rv __diagused;
+	uint32_t i;
 
-	tcp_lro_hpts_uninit();
-	atomic_store_ptr(&tcp_hpts_softclock, NULL);
+	KASSERT(pace != NULL, ("tcp_hptsi_stop: pace is NULL"));
 
-	for (int i = 0; i < tcp_pace.rp_num_hptss; i++) {
-		struct tcp_hpts_entry *hpts = tcp_pace.rp_ent[i];
+	for (i = 0; i < pace->rp_num_hptss; i++) {
+		hpts = pace->rp_ent[i];
+		KASSERT(hpts != NULL, ("tcp_hptsi_stop: hpts[%d] is NULL", i));
+		KASSERT(hpts->ie_cookie != NULL,
+		    ("tcp_hptsi_stop: hpts[%d]->ie_cookie is NULL", i));
 
-		rv = callout_drain(&hpts->co);
+		rv = _callout_stop_safe(&hpts->co, CS_DRAIN);
 		MPASS(rv != 0);
 
 		rv = swi_remove(hpts->ie_cookie);
 		MPASS(rv == 0);
+		hpts->ie_cookie = NULL;
+	}
+}
 
-		rv = sysctl_ctx_free(&hpts->hpts_ctx);
-		MPASS(rv == 0);
+/*
+ * Destroy a tcp_hptsi structure initialized by tcp_hptsi_create.
+ */
+void
+tcp_hptsi_destroy(struct tcp_hptsi *pace)
+{
+	struct tcp_hpts_entry *hpts;
+	uint32_t i;
+
+	KASSERT(pace != NULL, ("tcp_hptsi_destroy: pace is NULL"));
+	KASSERT(pace->rp_ent != NULL, ("tcp_hptsi_destroy: pace->rp_ent is NULL"));
+
+	/* Cleanup each hpts entry */
+	for (i = 0; i < pace->rp_num_hptss; i++) {
+		hpts = pace->rp_ent[i];
+		if (hpts != NULL) {
+			/* Cleanup SYSCTL if it was initialized */
+			if (hpts->hpts_root != NULL) {
+				sysctl_ctx_free(&hpts->hpts_ctx);
+			}
 
-		mtx_destroy(&hpts->p_mtx);
-		free(hpts->p_hptss, M_TCPHPTS);
-		free(hpts, M_TCPHPTS);
+			mtx_destroy(&hpts->p_mtx);
+			free(hpts->p_hptss, M_TCPHPTS);
+			free(hpts, M_TCPHPTS);
+		}
 	}
 
-	free(tcp_pace.rp_ent, M_TCPHPTS);
-	free(tcp_pace.cts_last_ran, M_TCPHPTS);
+	/* Cleanup main arrays */
+	free(pace->rp_ent, M_TCPHPTS);
+	free(pace->cts_last_ran, M_TCPHPTS);
 #ifdef SMP
-	free(tcp_pace.grps, M_TCPHPTS);
+	free(pace->grps, M_TCPHPTS);
 #endif
 
+	/* Free the main structure */
+	free(pace, M_TCPHPTS);
+}
+
+static int
+tcp_hpts_mod_load(void)
+{
+	int i;
+
+	/* Don't try to bind to NUMA domains if we don't have any */
+	if (vm_ndomains == 1 && tcp_bind_threads == 2)
+		tcp_bind_threads = 0;
+
+	/* Create the tcp_hptsi structure */
+	tcp_hptsi_pace = tcp_hptsi_create(&tcp_hptsi_default_funcs, true);
+	if (tcp_hptsi_pace == NULL)
+		return (ENOMEM);
+
+	/* Initialize global counters */
+	hpts_hopelessly_behind = counter_u64_alloc(M_WAITOK);
+	hpts_loops = counter_u64_alloc(M_WAITOK);
+	back_tosleep = counter_u64_alloc(M_WAITOK);
+	combined_wheel_wrap = counter_u64_alloc(M_WAITOK);
+	wheel_wrap = counter_u64_alloc(M_WAITOK);
+	hpts_wake_timeout = counter_u64_alloc(M_WAITOK);
+	hpts_direct_awakening = counter_u64_alloc(M_WAITOK);
+	hpts_back_tosleep = counter_u64_alloc(M_WAITOK);
+	hpts_direct_call = counter_u64_alloc(M_WAITOK);
+	cpu_uses_flowid = counter_u64_alloc(M_WAITOK);
+	cpu_uses_random = counter_u64_alloc(M_WAITOK);
+
+	/* Start the threads */
+	tcp_hptsi_start(tcp_hptsi_pace);
+
+	/* Enable the global HPTS softclock function */
+	tcp_hpts_softclock = __tcp_run_hpts;
+
+	/* Initialize LRO HPTS */
+	tcp_lro_hpts_init();
+
+	/*
+	 * If we somehow have an empty domain, fall back to choosing among all
+	 * HPTS threads.
+	 */
+	for (i = 0; i < vm_ndomains; i++) {
+		if (tcp_hptsi_pace->domains[i].count == 0) {
+			tcp_bind_threads = 0;
+			break;
+		}
+	}
+
+	printf("TCP HPTS started %u (%s) swi interrupt threads\n",
+		tcp_hptsi_pace->rp_num_hptss, (tcp_bind_threads == 0) ?
+		 "(unbounded)" :
+		 (tcp_bind_threads == 1 ? "per-cpu" : "per-NUMA-domain"));
+
+	return (0);
+}
+
+static void
+tcp_hpts_mod_unload(void)
+{
+	tcp_lro_hpts_uninit();
+
+	/* Disable the global HPTS softclock function */
+	atomic_store_ptr(&tcp_hpts_softclock, NULL);
+
+	tcp_hptsi_stop(tcp_hptsi_pace);
+	tcp_hptsi_destroy(tcp_hptsi_pace);
+	tcp_hptsi_pace = NULL;
+
+	/* Cleanup global counters */
 	counter_u64_free(hpts_hopelessly_behind);
 	counter_u64_free(hpts_loops);
 	counter_u64_free(back_tosleep);
@@ -2104,13 +2189,11 @@ tcp_hpts_mod_unload(void)
 }
 
 static int
-tcp_hpts_modevent(module_t mod, int what, void *arg)
+tcp_hpts_mod_event(module_t mod, int what, void *arg)
 {
-
 	switch (what) {
 	case MOD_LOAD:
-		tcp_hpts_mod_load();
-		return (0);
+		return (tcp_hpts_mod_load());
 	case MOD_QUIESCE:
 		/*
 		 * Since we are a dependency of TCP stack modules, they should
@@ -2130,7 +2213,7 @@ tcp_hpts_modevent(module_t mod, int what, void *arg)
 
 static moduledata_t tcp_hpts_module = {
 	.name = "tcphpts",
-	.evhand = tcp_hpts_modevent,
+	.evhand = tcp_hpts_mod_event,
 };
 
 DECLARE_MODULE(tcphpts, tcp_hpts_module, SI_SUB_SOFTINTR, SI_ORDER_ANY);
diff --git a/sys/netinet/tcp_hpts.h b/sys/netinet/tcp_hpts.h
index 6172baf2a062..6b05f9701ac2 100644
--- a/sys/netinet/tcp_hpts.h
+++ b/sys/netinet/tcp_hpts.h
@@ -28,19 +28,11 @@
 
 /* Number of useconds represented by an hpts slot */
 #define HPTS_USECS_PER_SLOT 10
-#define HPTS_MS_TO_SLOTS(x) ((x * 100) + 1)
-#define HPTS_USEC_TO_SLOTS(x) ((x+9) /10)
 #define HPTS_USEC_IN_SEC 1000000
 #define HPTS_MSEC_IN_SEC 1000
 #define HPTS_USEC_IN_MSEC 1000
 
 static inline uint32_t
-tcp_tv_to_hpts_slot(const struct timeval *sv)
-{
-	return ((sv->tv_sec * 100000) + (sv->tv_usec / HPTS_USECS_PER_SLOT));
-}
-
-static inline uint32_t
 tcp_tv_to_usec(const struct timeval *sv)
 {
 	return ((uint32_t) ((sv->tv_sec * HPTS_USEC_IN_SEC) + sv->tv_usec));
@@ -66,7 +58,7 @@ struct hpts_diag {
 	uint32_t p_runningslot;		/* bbr->inflight */
 	uint32_t slot_req;		/* bbr->flex3 x */
 	uint32_t inp_hptsslot;		/* bbr->flex4 x */
-	uint32_t slot_remaining;	/* bbr->flex5 x */
+	uint32_t time_remaining;	/* bbr->flex5 x */
 	uint32_t have_slept;		/* bbr->epoch x */
 	uint32_t hpts_sleep_time;	/* bbr->applimited x */
 	uint32_t yet_to_sleep;		/* bbr->lt_epoch x */
@@ -75,8 +67,6 @@ struct hpts_diag {
 	uint32_t maxslots;		/* bbr->delRate x */
 	uint32_t wheel_cts;		/* bbr->rttProp x */
 	int32_t co_ret; 		/* bbr->pkts_out x */
-	uint32_t p_curtick;		/* upper bbr->cur_del_rate */
-	uint32_t p_lasttick;		/* lower bbr->cur_del_rate */
 	uint8_t p_on_min_sleep; 	/* bbr->flex8 x */
 };
 
@@ -92,13 +82,18 @@ struct hpts_diag {
 
 #ifdef _KERNEL
 
+extern struct tcp_hptsi *tcp_hptsi_pace;
+
 /*
  * The following are the definitions for the kernel HPTS interface for managing
  * the HPTS ring and the TCBs on it.
 */
 
-void tcp_hpts_init(struct tcpcb *);
-void tcp_hpts_remove(struct tcpcb *);
+void __tcp_hpts_init(struct tcp_hptsi *pace, struct tcpcb *);
+#define tcp_hpts_init(tp) __tcp_hpts_init(tcp_hptsi_pace, tp)
+
+void __tcp_hpts_remove(struct tcp_hptsi *pace, struct tcpcb *);
+#define tcp_hpts_remove(tp) __tcp_hpts_remove(tcp_hptsi_pace, tp)
 
 static inline bool
 tcp_in_hpts(struct tcpcb *tp)
@@ -132,12 +127,13 @@ tcp_in_hpts(struct tcpcb *tp)
  * that INP_WLOCK() or from destroying your TCB where again
  * you should already have the INP_WLOCK().
  */
-uint32_t tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line,
-    struct hpts_diag *diag);
-#define	tcp_hpts_insert(inp, slot)	\
-	tcp_hpts_insert_diag((inp), (slot), __LINE__, NULL)
+void __tcp_hpts_insert(struct tcp_hptsi *pace, struct tcpcb *tp, uint32_t usecs,
+	struct hpts_diag *diag);
+#define	tcp_hpts_insert(tp, usecs, diag)	\
+	__tcp_hpts_insert(tcp_hptsi_pace, (tp), (usecs), (diag))
 
-void tcp_set_hpts(struct tcpcb *tp);
+void __tcp_set_hpts(struct tcp_hptsi *pace, struct tcpcb *tp);
+#define tcp_set_hpts(tp) __tcp_set_hpts(tcp_hptsi_pace, tp)
 
 extern int32_t tcp_min_hptsi_time;
 
@@ -147,17 +143,6 @@ get_hpts_min_sleep_time(void)
 	return (tcp_min_hptsi_time + HPTS_USECS_PER_SLOT);
 }
 
-static inline uint32_t
-tcp_gethptstick(struct timeval *sv)
-{
-	struct timeval tv;
-
-	if (sv == NULL)
-		sv = &tv;
-	microuptime(sv);
-	return (tcp_tv_to_hpts_slot(sv));
-}
-
 static inline uint64_t
 tcp_get_u64_usecs(struct timeval *tv)
 {
@@ -180,12 +165,5 @@ tcp_get_usecs(struct timeval *tv)
 	return (tcp_tv_to_usec(tv));
 }
 
-/*
- * LRO HPTS initialization and uninitialization, only for internal use by the
- * HPTS code.
- */
-void tcp_lro_hpts_init(void);
-void tcp_lro_hpts_uninit(void);
-
 #endif /* _KERNEL */
 #endif /* __tcp_hpts_h__ */
diff --git a/sys/netinet/tcp_hpts_internal.h b/sys/netinet/tcp_hpts_internal.h
new file mode 100644
index 000000000000..8b33e03a6981
--- /dev/null
+++ b/sys/netinet/tcp_hpts_internal.h
@@ -0,0 +1,184 @@
+/*-
+ * Copyright (c) 2025 Netflix, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef __tcp_hpts_internal_h__
+#define __tcp_hpts_internal_h__
+
+/*
+ * TCP High Precision Timer System (HPTS) - Internal Definitions
+ *
+ * This header contains internal structures, constants, and interfaces that are
+ * implemented in tcp_hpts.c but exposed to enable comprehensive unit testing of
+ * the HPTS subsystem.
+ */
+
+#if defined(_KERNEL)
+
+/*
+ * The hpts uses a 102400 wheel. The wheel
+ * defines the time in 10 usec increments (102400 x 10).
+ * This gives a range of 10usec - 1024ms to place
+ * an entry within. If the user requests more than
+ * 1.024 second, a remaineder is attached and the hpts
+ * when seeing the remainder will re-insert the
+ * inpcb forward in time from where it is until
+ * the remainder is zero.
+ */
+
+#define NUM_OF_HPTSI_SLOTS 102400
+
+/* The number of connections after which the dynamic sleep logic kicks in. */
+#define DEFAULT_CONNECTION_THRESHOLD 100
+
+/*
+ * The hpts uses a 102400 wheel. The wheel
+ * defines the time in 10 usec increments (102400 x 10).
+ * This gives a range of 10usec - 1024ms to place
+ * an entry within. If the user requests more than
+ * 1.024 second, a remaineder is attached and the hpts
+ * when seeing the remainder will re-insert the
+ * inpcb forward in time from where it is until
+ * the remainder is zero.
+ */
+
+#define NUM_OF_HPTSI_SLOTS 102400
+
+/* Convert microseconds to HPTS slots */
+#define HPTS_USEC_TO_SLOTS(x) ((x+9) /10)
+
+/* The number of connections after which the dynamic sleep logic kicks in. */
+#define DEFAULT_CONNECTION_THRESHOLD 100
+
+extern int tcp_bind_threads; 		/* Thread binding configuration
+					 * (0=none, 1=cpu, 2=numa) */
+
+/*
+ * Abstraction layer controlling time, interrupts and callouts.
+ */
+struct tcp_hptsi_funcs {
+	void (*microuptime)(struct timeval *tv);
+	int (*swi_add)(struct intr_event **eventp, const char *name,
+		driver_intr_t handler, void *arg, int pri, enum intr_type flags,
+		void **cookiep);
+	int (*swi_remove)(void *cookie);
+	void (*swi_sched)(void *cookie, int flags);
+	int (*intr_event_bind)(struct intr_event *ie, int cpu);
+	int (*intr_event_bind_ithread_cpuset)(struct intr_event *ie,
+		struct _cpuset *mask);
+	void (*callout_init)(struct callout *c, int mpsafe);
+	int (*callout_reset_sbt_on)(struct callout *c, sbintime_t sbt,
+		sbintime_t precision, void (*func)(void *), void *arg, int cpu,
+		int flags);
+	int (*_callout_stop_safe)(struct callout *c, int flags);
+};
+
+/* Default function table for system operation */
+extern const struct tcp_hptsi_funcs tcp_hptsi_default_funcs;
+
+/* Each hpts has its own p_mtx which is used for locking */
+#define	HPTS_MTX_ASSERT(hpts)	mtx_assert(&(hpts)->p_mtx, MA_OWNED)
+#define	HPTS_LOCK(hpts)		mtx_lock(&(hpts)->p_mtx)
+#define	HPTS_TRYLOCK(hpts)	mtx_trylock(&(hpts)->p_mtx)
+#define	HPTS_UNLOCK(hpts)	mtx_unlock(&(hpts)->p_mtx)
+
+struct tcp_hpts_entry {
+	/* Cache line 0x00 */
+	struct mtx p_mtx;		/* Mutex for hpts */
+	struct timeval p_mysleep;	/* Our min sleep time */
+	uint64_t syscall_cnt;
+	uint64_t sleeping;		/* What the actual sleep was (if sleeping) */
+	uint16_t p_hpts_active; 	/* Flag that says hpts is awake  */
+	uint8_t p_wheel_complete; 	/* have we completed the wheel arc walk? */
+	uint32_t p_runningslot; 	/* Current slot we are at if we are running */
+	uint32_t p_prev_slot;		/* Previous slot we were on */
+	uint32_t p_cur_slot;		/* Current slot in wheel hpts is draining */
+	uint32_t p_nxt_slot;		/* The next slot outside the current range
+					 * of slots that the hpts is running on. */
+	int32_t p_on_queue_cnt;		/* Count on queue in this hpts */
+	uint8_t p_direct_wake :1, 	/* boolean */
+		p_on_min_sleep:1, 	/* boolean */
+		p_hpts_wake_scheduled:1,/* boolean */
+		hit_callout_thresh:1,
+		p_avail:4;
+	uint8_t p_fill[3];		/* Fill to 32 bits */
+	/* Cache line 0x40 */
+	struct hptsh {
+		TAILQ_HEAD(, tcpcb)	head;
+		uint32_t		count;
+		uint32_t		gencnt;
+	} *p_hptss;			/* Hptsi wheel */
+	uint32_t p_hpts_sleep_time;	/* Current sleep interval having a max
+					 * of 255ms */
+	uint32_t overidden_sleep;	/* what was overrided by min-sleep for logging */
+	uint32_t saved_curslot;		/* for logging */
+	uint32_t saved_prev_slot;	/* for logging */
+	uint32_t p_delayed_by;		/* How much were we delayed by */
+	/* Cache line 0x80 */
+	struct sysctl_ctx_list hpts_ctx;
+	struct sysctl_oid *hpts_root;
+	struct intr_event *ie;
+	void *ie_cookie;
+	uint16_t p_cpu;			/* The hpts CPU */
+	struct tcp_hptsi *p_hptsi;	/* Back pointer to parent hptsi structure */
+	/* There is extra space in here */
+	/* Cache line 0x100 */
+	struct callout co __aligned(CACHE_LINE_SIZE);
+}               __aligned(CACHE_LINE_SIZE);
+
+struct tcp_hptsi {
+	struct cpu_group **grps;
+	struct tcp_hpts_entry **rp_ent;	/* Array of hptss */
+	uint32_t *cts_last_ran;
+	uint32_t grp_cnt;
+	uint32_t rp_num_hptss;		/* Number of hpts threads */
+	struct hpts_domain_info {
+		int count;
+		int cpu[MAXCPU];
+	} domains[MAXMEMDOM];		/* Per-NUMA domain CPU assignments */
+	const struct tcp_hptsi_funcs *funcs;	/* Function table for testability */
+};
+
+/*
+ * Core tcp_hptsi structure manipulation functions.
+ */
+struct tcp_hptsi* tcp_hptsi_create(const struct tcp_hptsi_funcs *funcs,
+	bool enable_sysctl);
+void tcp_hptsi_destroy(struct tcp_hptsi *pace);
+void tcp_hptsi_start(struct tcp_hptsi *pace);
+void tcp_hptsi_stop(struct tcp_hptsi *pace);
+uint16_t tcp_hptsi_random_cpu(struct tcp_hptsi *pace);
+int32_t tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout);
+
+void tcp_hpts_wake(struct tcp_hpts_entry *hpts);
+
+/*
+ * LRO HPTS initialization and uninitialization, only for internal use by the
+ * HPTS code.
+ */
+void tcp_lro_hpts_init(void);
+void tcp_lro_hpts_uninit(void);
+
+#endif /* defined(_KERNEL) */
+#endif /* __tcp_hpts_internal_h__ */
diff --git a/sys/netinet/tcp_hpts_test.c b/sys/netinet/tcp_hpts_test.c
new file mode 100644
index 000000000000..c5dc9cb5b03b
--- /dev/null
+++ b/sys/netinet/tcp_hpts_test.c
@@ -0,0 +1,1682 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Netflix, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <tests/ktest.h>
+#include <sys/cdefs.h>
+#include "opt_inet.h"
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/refcount.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <netinet/in_pcb.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_hpts.h>
+#include <netinet/tcp_hpts_internal.h>
+#include <dev/tcp_log/tcp_log_dev.h>
+#include <netinet/tcp_log_buf.h>
+
+#undef tcp_hpts_init
+#undef tcp_hpts_remove
+#undef tcp_hpts_insert
+#undef tcp_set_hpts
+
+/* Custom definitions that take the tcp_hptsi */
+#define tcp_hpts_init(pace, tp) __tcp_hpts_init((pace), (tp))
+#define tcp_hpts_remove(pace, tp) __tcp_hpts_remove((pace), (tp))
+#define	tcp_hpts_insert(pace, tp, usecs, diag)	\
+	__tcp_hpts_insert((pace), (tp), (usecs), (diag))
+#define tcp_set_hpts(pace, tp) __tcp_set_hpts((pace), (tp))
+
+static MALLOC_DEFINE(M_TCPHPTS, "tcp_hpts_test", "TCP hpts test");
+
+static int test_exit_on_failure = true;
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hpts_test, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+    "TCP HPTS test controls");
+SYSCTL_INT(_net_inet_tcp_hpts_test, OID_AUTO, exit_on_failure, CTLFLAG_RW,
+    &test_exit_on_failure, 0,
+    "Exit HPTS test immediately on first failure (1) or continue running all tests (0)");
+
+#define KTEST_VERIFY(x) do { \
+	if (!(x)) { \
+		KTEST_ERR(ctx, "FAIL: %s", #x); \
+		if (test_exit_on_failure) \
+			return (EINVAL); \
+	} else { \
+		KTEST_LOG(ctx, "PASS: %s", #x); \
+	} \
+} while (0)
+
+#define KTEST_EQUAL(x, y) do { \
+	if ((x) != (y)) { \
+		KTEST_ERR(ctx, "FAIL: %s != %s (%d != %d)", #x, #y, (x), (y)); \
+		if (test_exit_on_failure) \
+			return (EINVAL); \
+	} else { \
+		KTEST_LOG(ctx, "PASS: %s == %s", #x, #y); \
+	} \
+} while (0)
+
+#define KTEST_NEQUAL(x, y) do { \
+	if ((x) == (y)) { \
+		KTEST_ERR(ctx, "FAIL: %s == %s (%d == %d)", #x, #y, (x), (y)); \
+		if (test_exit_on_failure) \
+			return (EINVAL); \
+	} else { \
+		KTEST_LOG(ctx, "PASS: %s != %s", #x, #y); \
+	} \
+} while (0)
+
+#define KTEST_GREATER_THAN(x, y) do { \
+	if ((x) <= (y)) { \
+		KTEST_ERR(ctx, "FAIL: %s <= %s (%d <= %d)", #x, #y, (x), (y)); \
+		if (test_exit_on_failure) \
+			return (EINVAL); \
+	} else { \
+		KTEST_LOG(ctx, "PASS: %s > %s", #x, #y); \
+	} \
+} while (0)
+
+#define KTEST_VERIFY_RET(x, y) do { \
+	if (!(x)) { \
+		KTEST_ERR(ctx, "FAIL: %s", #x); \
+		if (test_exit_on_failure) \
+			return (y); \
+	} else { \
+		KTEST_LOG(ctx, "PASS: %s", #x); \
+	} \
+} while (0)
+
+#ifdef TCP_HPTS_KTEST
+
+static void
+dump_hpts_entry(struct ktest_test_context *ctx, struct tcp_hpts_entry *hpts)
+{
+	KTEST_LOG(ctx, "tcp_hpts_entry(%p)", hpts);
+	KTEST_LOG(ctx, "  p_cur_slot: %u", hpts->p_cur_slot);
+	KTEST_LOG(ctx, "  p_prev_slot: %u", hpts->p_prev_slot);
+	KTEST_LOG(ctx, "  p_nxt_slot: %u", hpts->p_nxt_slot);
+	KTEST_LOG(ctx, "  p_runningslot: %u", hpts->p_runningslot);
+	KTEST_LOG(ctx, "  p_on_queue_cnt: %d", hpts->p_on_queue_cnt);
+	KTEST_LOG(ctx, "  p_hpts_active: %u", hpts->p_hpts_active);
+	KTEST_LOG(ctx, "  p_wheel_complete: %u", hpts->p_wheel_complete);
+	KTEST_LOG(ctx, "  p_direct_wake: %u", hpts->p_direct_wake);
+	KTEST_LOG(ctx, "  p_on_min_sleep: %u", hpts->p_on_min_sleep);
+	KTEST_LOG(ctx, "  p_hpts_wake_scheduled: %u", hpts->p_hpts_wake_scheduled);
+	KTEST_LOG(ctx, "  hit_callout_thresh: %u", hpts->hit_callout_thresh);
+	KTEST_LOG(ctx, "  p_hpts_sleep_time: %u", hpts->p_hpts_sleep_time);
+	KTEST_LOG(ctx, "  p_delayed_by: %u", hpts->p_delayed_by);
+	KTEST_LOG(ctx, "  overidden_sleep: %u", hpts->overidden_sleep);
+	KTEST_LOG(ctx, "  saved_curslot: %u", hpts->saved_curslot);
+	KTEST_LOG(ctx, "  saved_prev_slot: %u", hpts->saved_prev_slot);
+	KTEST_LOG(ctx, "  syscall_cnt: %lu", hpts->syscall_cnt);
+	KTEST_LOG(ctx, "  sleeping: %lu", hpts->sleeping);
+	KTEST_LOG(ctx, "  p_cpu: %u", hpts->p_cpu);
+	KTEST_LOG(ctx, "  ie_cookie: %p", hpts->ie_cookie);
+	KTEST_LOG(ctx, "  p_hptsi: %p", hpts->p_hptsi);
+	KTEST_LOG(ctx, "  p_mysleep: %ld.%06ld", hpts->p_mysleep.tv_sec, hpts->p_mysleep.tv_usec);
+}
+
+static void
+dump_tcpcb(struct tcpcb *tp)
+{
+	struct ktest_test_context *ctx = tp->t_fb_ptr;
+	struct inpcb *inp = &tp->t_inpcb;
+
+	KTEST_LOG(ctx, "tcp_control_block(%p)", tp);
+
+	/* HPTS-specific fields */
+	KTEST_LOG(ctx, "  t_in_hpts: %d", tp->t_in_hpts);
+	KTEST_LOG(ctx, "  t_hpts_cpu: %u", tp->t_hpts_cpu);
+	KTEST_LOG(ctx, "  t_hpts_slot: %d", tp->t_hpts_slot);
+	KTEST_LOG(ctx, "  t_hpts_gencnt: %u", tp->t_hpts_gencnt);
+	KTEST_LOG(ctx, "  t_hpts_request: %u", tp->t_hpts_request);
+
+	/* LRO CPU field */
+	KTEST_LOG(ctx, "  t_lro_cpu: %u", tp->t_lro_cpu);
+
+	/* TCP flags that affect HPTS */
+	KTEST_LOG(ctx, "  t_flags2: 0x%x", tp->t_flags2);
+	KTEST_LOG(ctx, "    TF2_HPTS_CPU_SET: %s", (tp->t_flags2 & TF2_HPTS_CPU_SET) ? "YES" : "NO");
+	KTEST_LOG(ctx, "    TF2_HPTS_CALLS: %s", (tp->t_flags2 & TF2_HPTS_CALLS) ? "YES" : "NO");
+	KTEST_LOG(ctx, "    TF2_SUPPORTS_MBUFQ: %s", (tp->t_flags2 & TF2_SUPPORTS_MBUFQ) ? "YES" : "NO");
+
+	/* Input PCB fields that HPTS uses */
+	KTEST_LOG(ctx, "  inp_flags: 0x%x", inp->inp_flags);
+	KTEST_LOG(ctx, "    INP_DROPPED: %s", (inp->inp_flags & INP_DROPPED) ? "YES" : "NO");
+	KTEST_LOG(ctx, "  inp_flowid: 0x%x", inp->inp_flowid);
+	KTEST_LOG(ctx, "  inp_flowtype: %u", inp->inp_flowtype);
+	KTEST_LOG(ctx, "  inp_numa_domain: %d", inp->inp_numa_domain);
+}
+
+/* Enum for call counting indices */
+enum test_call_counts {
+	CCNT_MICROUPTIME = 0,
+	CCNT_SWI_ADD,
+	CCNT_SWI_REMOVE,
+	CCNT_SWI_SCHED,
+	CCNT_INTR_EVENT_BIND,
+	CCNT_INTR_EVENT_BIND_CPUSET,
+	CCNT_CALLOUT_INIT,
+	CCNT_CALLOUT_RESET_SBT_ON,
+	CCNT_CALLOUT_STOP_SAFE,
+	CCNT_TCP_OUTPUT,
+	CCNT_TCP_TFB_DO_QUEUED_SEGMENTS,
+	CCNT_MAX
+};
+
+static uint32_t call_counts[CCNT_MAX];
+
+static uint64_t test_time_usec = 0;
+
+/*
+ * Reset all test global variables to a clean state.
+ */
+static void
+test_hpts_init(void)
+{
+	memset(call_counts, 0, sizeof(call_counts));
+	test_time_usec = 0;
+}
+
+static void
+test_microuptime(struct timeval *tv)
+{
+	call_counts[CCNT_MICROUPTIME]++;
+	tv->tv_sec = test_time_usec / 1000000;
+	tv->tv_usec = test_time_usec % 1000000;
+}
+
+static int
+test_swi_add(struct intr_event **eventp, const char *name,
+    driver_intr_t handler, void *arg, int pri, enum intr_type flags,
+    void **cookiep)
+{
+	call_counts[CCNT_SWI_ADD]++;
+	/* Simulate successful SWI creation */
+	*eventp = (struct intr_event *)0xfeedface; /* Mock event */
+	*cookiep = (void *)0xdeadbeef; /* Mock cookie */
+	return (0);
+}
+
+static int
+test_swi_remove(void *cookie)
+{
+	call_counts[CCNT_SWI_REMOVE]++;
+	/* Simulate successful removal */
+	return (0);
+}
+
+static void
+test_swi_sched(void *cookie, int flags)
+{
+	call_counts[CCNT_SWI_SCHED]++;
+	/* Simulate successful SWI scheduling */
+}
+
+static int
+test_intr_event_bind(struct intr_event *ie, int cpu)
+{
+	call_counts[CCNT_INTR_EVENT_BIND]++;
+	/* Simulate successful binding */
+	return (0);
+}
+
+static int
+test_intr_event_bind_ithread_cpuset(struct intr_event *ie, struct _cpuset *mask)
+{
+	call_counts[CCNT_INTR_EVENT_BIND_CPUSET]++;
+	/* Simulate successful cpuset binding */
+	return (0);
+}
+
+static void
+test_callout_init(struct callout *c, int mpsafe)
+{
+	call_counts[CCNT_CALLOUT_INIT]++;
+	memset(c, 0, sizeof(*c));
+}
+
+static int
+test_callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision,
+    void (*func)(void *), void *arg, int cpu, int flags)
+{
+	call_counts[CCNT_CALLOUT_RESET_SBT_ON]++;
+	/* Return 1 to simulate successful timer scheduling */
+	return (1);
+}
+
+static int
+test_callout_stop_safe(struct callout *c, int flags)
+{
+	call_counts[CCNT_CALLOUT_STOP_SAFE]++;
+	/* Return 1 to simulate successful timer stopping */
+	return (1);
+}
+
+static const struct tcp_hptsi_funcs test_funcs = {
+	.microuptime = test_microuptime,
+	.swi_add = test_swi_add,
+	.swi_remove = test_swi_remove,
+	.swi_sched = test_swi_sched,
+	.intr_event_bind = test_intr_event_bind,
+	.intr_event_bind_ithread_cpuset = test_intr_event_bind_ithread_cpuset,
+	.callout_init = test_callout_init,
+	.callout_reset_sbt_on = test_callout_reset_sbt_on,
+	._callout_stop_safe = test_callout_stop_safe,
+};
+
+#define TP_REMOVE_FROM_HPTS(tp) tp->bits_spare
+#define TP_LOG_TEST(tp) tp->t_log_state_set
+
+static int
+test_tcp_output(struct tcpcb *tp)
+{
+	struct ktest_test_context *ctx = tp->t_fb_ptr;
+	struct tcp_hptsi *pace = (struct tcp_hptsi*)tp->t_tfo_pending;
+	struct tcp_hpts_entry *hpts = pace->rp_ent[tp->t_hpts_cpu];
+
+	call_counts[CCNT_TCP_OUTPUT]++;
+	if (TP_LOG_TEST(tp)) {
+		KTEST_LOG(ctx, "=> tcp_output(%p)", tp);
+		dump_tcpcb(tp);
+		dump_hpts_entry(ctx, hpts);
+	}
+
+	if ((TP_REMOVE_FROM_HPTS(tp) & 1) != 0) {
+		if (TP_LOG_TEST(tp))
+			KTEST_LOG(ctx, "=> tcp_hpts_remove(%p)", tp);
+		tcp_hpts_remove(pace, tp);
+	}
+
+	if ((TP_REMOVE_FROM_HPTS(tp) & 2) != 0) {
+		INP_WUNLOCK(&tp->t_inpcb); /* tcp_output unlocks on error */
+		return (-1); /* Simulate tcp_output error */
+	}
+
+	return (0);
+}
+
+static int
+test_tfb_do_queued_segments(struct tcpcb *tp, int flag)
+{
+	struct ktest_test_context *ctx = tp->t_fb_ptr;
+	struct tcp_hptsi *pace = (struct tcp_hptsi*)tp->t_tfo_pending;
+	struct tcp_hpts_entry *hpts = pace->rp_ent[tp->t_hpts_cpu];
+
+	call_counts[CCNT_TCP_TFB_DO_QUEUED_SEGMENTS]++;
+	KTEST_LOG(ctx, "=> tfb_do_queued_segments(%p, %d)", tp, flag);
+	dump_tcpcb(tp);
+	dump_hpts_entry(ctx, hpts);
+
+	if ((TP_REMOVE_FROM_HPTS(tp) & 1) != 0) {
+		if (TP_LOG_TEST(tp))
+			KTEST_LOG(ctx, "=> tcp_hpts_remove(%p)", tp);
+		tcp_hpts_remove(pace, tp);
+	}
+
+	if ((TP_REMOVE_FROM_HPTS(tp) & 2) != 0) {
+		INP_WUNLOCK(&tp->t_inpcb); /* do_queued_segments unlocks on error */
+		return (-1); /* Simulate do_queued_segments error */
+	}
+
+	return (0);
+}
+
+static struct tcp_function_block test_tcp_fb = {
+	.tfb_tcp_block_name = "hpts_test_tcp",
+	.tfb_tcp_output = test_tcp_output,
+	.tfb_do_queued_segments = test_tfb_do_queued_segments,
+};
+
+/*
+ * Create a minimally initialized tcpcb that can be safely inserted into HPTS.
+ * This function allocates and initializes all the fields that HPTS code
+ * reads or writes.
+ */
+static struct tcpcb *
+test_hpts_create_tcpcb(struct ktest_test_context *ctx, struct tcp_hptsi *pace)
+{
+	struct tcpcb *tp;
+
+	tp = malloc(sizeof(struct tcpcb), M_TCPHPTS, M_WAITOK | M_ZERO);
+	if (tp) {
+		rw_init_flags(&tp->t_inpcb.inp_lock, "test-inp",
+			RW_RECURSE | RW_DUPOK);
+		refcount_init(&tp->t_inpcb.inp_refcount, 1);
+		tp->t_inpcb.inp_pcbinfo = &V_tcbinfo;
+		tp->t_fb = &test_tcp_fb;
+		tp->t_hpts_cpu = HPTS_CPU_NONE;
+		STAILQ_INIT(&tp->t_inqueue);
+		tcp_hpts_init(pace, tp);
+
+		/* Stuff some pointers in the tcb for test purposes. */
+		tp->t_fb_ptr = ctx;
+		tp->t_tfo_pending = (unsigned int*)pace;
+	}
+
+	return (tp);
+}
+
+/*
+ * Free a test tcpcb created by test_hpts_create_tcpcb()
+ */
+static void
+test_hpts_free_tcpcb(struct tcpcb *tp)
+{
+	if (tp == NULL)
+		return;
+
+	INP_LOCK_DESTROY(&tp->t_inpcb);
+	free(tp, M_TCPHPTS);
+}
+
+/*
+ * ***********************************************
+ * * KTEST functions for testing the HPTS module *
+ * ***********************************************
+ */
+
+/*
+ * Validates that the HPTS module is properly loaded and initialized by checking
+ * that the minimum HPTS time is configured.
+ */
+KTEST_FUNC(module_load)
+{
+	test_hpts_init();
+	KTEST_NEQUAL(tcp_min_hptsi_time, 0);
+	KTEST_VERIFY(tcp_bind_threads >= 0 && tcp_bind_threads <= 2);
+	KTEST_NEQUAL(tcp_hptsi_pace, NULL);
+	return (0);
+}
+
+/*
+ * Validates the creation and destruction of tcp_hptsi structures, ensuring
+ * proper initialization of internal fields and clean destruction.
+ */
+KTEST_FUNC(hptsi_create_destroy)
+{
+	struct tcp_hptsi *pace;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	KTEST_NEQUAL(pace->rp_ent, NULL);
+	KTEST_NEQUAL(pace->cts_last_ran, NULL);
+	KTEST_VERIFY(pace->rp_num_hptss > 0);
+	KTEST_VERIFY(pace->rp_num_hptss <= MAXCPU); /* Reasonable upper bound */
+	KTEST_VERIFY(pace->grp_cnt >= 1); /* At least one group */
+	KTEST_EQUAL(pace->funcs, &test_funcs); /* Verify function pointer was set */
+
+	/* Verify individual HPTS entries are properly initialized */
+	for (uint32_t i = 0; i < pace->rp_num_hptss; i++) {
+		KTEST_NEQUAL(pace->rp_ent[i], NULL);
+		KTEST_EQUAL(pace->rp_ent[i]->p_cpu, i);
+		KTEST_EQUAL(pace->rp_ent[i]->p_hptsi, pace);
+		KTEST_EQUAL(pace->rp_ent[i]->p_on_queue_cnt, 0);
+	}
+
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates that tcp_hptsi structures can be started and stopped properly,
+ * including verification that threads are created during start and cleaned up
+ * during stop operations.
+ */
+KTEST_FUNC(hptsi_start_stop)
+{
+	struct tcp_hptsi *pace;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+
+	tcp_hptsi_start(pace);
+
+	/* Verify that entries have threads started */
+	struct tcp_hpts_entry *hpts = pace->rp_ent[0];
+	KTEST_NEQUAL(hpts->ie_cookie, NULL);  /* Should have SWI handler */
+	KTEST_EQUAL(hpts->p_hptsi, pace);     /* Should point to our pace */
+
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates that multiple tcp_hptsi instances can coexist independently, with
+ * different configurations and CPU assignments without interfering with each
+ * other.
+ */
+KTEST_FUNC(hptsi_independence)
+{
+	struct tcp_hptsi *pace1, *pace2;
+	uint16_t cpu1, cpu2;
+
+	test_hpts_init();
+
+	pace1 = tcp_hptsi_create(&test_funcs, false);
+	pace2 = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace1, NULL);
+	KTEST_NEQUAL(pace2, NULL);
+	KTEST_NEQUAL(pace2->rp_ent, NULL);
+
+	cpu1 = tcp_hptsi_random_cpu(pace1);
+	cpu2 = tcp_hptsi_random_cpu(pace2);
+	KTEST_VERIFY(cpu1 < pace1->rp_num_hptss);
+	KTEST_VERIFY(cpu2 < pace2->rp_num_hptss);
+
+	/* Verify both instances have independent entry arrays */
+	KTEST_NEQUAL(pace1->rp_ent, pace2->rp_ent);
+	/* Verify they may have different CPU counts but both reasonable */
+	KTEST_VERIFY(pace1->rp_num_hptss > 0 && pace1->rp_num_hptss <= MAXCPU);
+	KTEST_VERIFY(pace2->rp_num_hptss > 0 && pace2->rp_num_hptss <= MAXCPU);
+
+	tcp_hptsi_destroy(pace1);
+	tcp_hptsi_destroy(pace2);
+
+	return (0);
+}
+
+/*
+ * Validates that custom function injection works correctly, ensuring that
+ * test-specific implementations of microuptime and others are properly
+ * called by the HPTS system.
+ */
+KTEST_FUNC(function_injection)
+{
+	struct tcp_hptsi *pace;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	KTEST_EQUAL(pace->funcs, &test_funcs);
+	KTEST_VERIFY(call_counts[CCNT_MICROUPTIME] > 0);
+	KTEST_VERIFY(call_counts[CCNT_CALLOUT_INIT] > 0);
+
+	tcp_hptsi_start(pace);
+	KTEST_VERIFY(call_counts[CCNT_SWI_ADD] > 0);
+	KTEST_VERIFY(tcp_bind_threads == 0 ||
+	    call_counts[CCNT_INTR_EVENT_BIND] > 0 ||
+	    call_counts[CCNT_INTR_EVENT_BIND_CPUSET] > 0);
+	KTEST_VERIFY(call_counts[CCNT_CALLOUT_RESET_SBT_ON] > 0);
+
+	tcp_hptsi_stop(pace);
+	KTEST_VERIFY(call_counts[CCNT_CALLOUT_STOP_SAFE] > 0);
+	KTEST_VERIFY(call_counts[CCNT_SWI_REMOVE] > 0);
+
+	tcp_hptsi_destroy(pace);
+
+	/* Verify we have a reasonable balance of create/destroy calls */
+	KTEST_EQUAL(call_counts[CCNT_SWI_ADD], call_counts[CCNT_SWI_REMOVE]);
+	KTEST_VERIFY(call_counts[CCNT_CALLOUT_RESET_SBT_ON] <= call_counts[CCNT_CALLOUT_STOP_SAFE]);
+
+	return (0);
+}
+
+/*
+ * Validates that a tcpcb can be properly initialized for HPTS compatibility,
+ * ensuring all required fields are set correctly and function pointers are
+ * valid for safe HPTS operations.
+ */
+KTEST_FUNC(tcpcb_initialization)
+{
+	struct tcp_hptsi *pace;
+	struct tcpcb *tp;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	/* Verify the tcpcb is properly initialized for HPTS */
+	tp = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp, NULL);
+	KTEST_NEQUAL(tp->t_fb, NULL);
+	KTEST_NEQUAL(tp->t_fb->tfb_tcp_output, NULL);
+	KTEST_NEQUAL(tp->t_fb->tfb_do_queued_segments, NULL);
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_NONE);
+	KTEST_EQUAL((tp->t_flags2 & (TF2_HPTS_CPU_SET | TF2_HPTS_CALLS)), 0);
+
+	/* Verify that HPTS-specific fields are initialized */
+	KTEST_EQUAL(tp->t_hpts_gencnt, 0);
+	KTEST_EQUAL(tp->t_hpts_slot, 0);
+	KTEST_EQUAL(tp->t_hpts_request, 0);
+	KTEST_EQUAL(tp->t_lro_cpu, 0);
+	KTEST_VERIFY(tp->t_hpts_cpu < pace->rp_num_hptss);
+	KTEST_EQUAL(tp->t_inpcb.inp_refcount, 1);
+	KTEST_VERIFY(!(tp->t_inpcb.inp_flags & INP_DROPPED));
+
+	test_hpts_free_tcpcb(tp);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates that tcpcb structures can be successfully inserted into and removed
+ * from the HPTS wheel, with proper state tracking and slot assignment during
+ * the process.
+ */
+KTEST_FUNC(tcpcb_insertion)
+{
+	struct tcp_hptsi *pace;
+	struct tcpcb *tp;
+	struct tcp_hpts_entry *hpts;
+	uint32_t timeout_usecs = 10;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	tp = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp, NULL);
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_NONE);
+	KTEST_EQUAL((tp->t_flags2 & TF2_HPTS_CALLS), 0);
+
+	INP_WLOCK(&tp->t_inpcb);
+	tp->t_flags2 |= TF2_HPTS_CALLS;
+	KTEST_EQUAL(call_counts[CCNT_SWI_SCHED], 0);
+	tcp_hpts_insert(pace, tp, timeout_usecs, NULL);
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE);
+	INP_WUNLOCK(&tp->t_inpcb);
+	KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0);
+	KTEST_EQUAL(call_counts[CCNT_SWI_SCHED], 1);
+	KTEST_VERIFY(tcp_in_hpts(tp));
+	KTEST_VERIFY(tp->t_hpts_slot >= 0);
+	KTEST_VERIFY(tp->t_hpts_slot < NUM_OF_HPTSI_SLOTS);
+
+	hpts = pace->rp_ent[tp->t_hpts_cpu];
+	KTEST_EQUAL(hpts->p_on_queue_cnt, 1);
+	KTEST_EQUAL(tp->t_hpts_request, 0);
+	KTEST_EQUAL(tp->t_hpts_slot, HPTS_USEC_TO_SLOTS(timeout_usecs));
+	//KTEST_EQUAL(tp->t_hpts_gencnt, 1);
+
+	INP_WLOCK(&tp->t_inpcb);
+	tcp_hpts_remove(pace, tp);
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_NONE);
+	INP_WUNLOCK(&tp->t_inpcb);
+	KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0);
+	KTEST_VERIFY(!tcp_in_hpts(tp));
+
+	KTEST_EQUAL(hpts->p_on_queue_cnt, 0);
+
+	test_hpts_free_tcpcb(tp);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates the core HPTS timer functionality by verifying that scheduled
+ * tcpcb entries trigger tcp_output calls at appropriate times, simulating
+ * real-world timer-driven TCP processing.
+ */
+KTEST_FUNC(timer_functionality)
+{
+	struct epoch_tracker et;
+	struct tcp_hptsi *pace;
+	struct tcp_hpts_entry *hpts;
+	struct tcpcb *tp;
+	int32_t slots_ran;
+	uint32_t i;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	for (i = 0; i < pace->rp_num_hptss; i++)
+		dump_hpts_entry(ctx, pace->rp_ent[i]);
+
+	/* Create and insert the tcpcb into the HPTS wheel to wait for 500 usec */
+	tp = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp, NULL);
+	dump_tcpcb(tp);
+	TP_LOG_TEST(tp) = 1; /* Enable logging for this tcpcb */
+
+	KTEST_LOG(ctx, "=> tcp_hpts_insert(%p)", tp);
+	INP_WLOCK(&tp->t_inpcb);
+	tp->t_flags2 |= TF2_HPTS_CALLS; /* Mark as needing HPTS processing */
+	tcp_hpts_insert(pace, tp, 500, NULL);
+	INP_WUNLOCK(&tp->t_inpcb);
+
+	dump_tcpcb(tp);
+	for (i = 0; i < pace->rp_num_hptss; i++)
+		dump_hpts_entry(ctx, pace->rp_ent[i]);
+
+	hpts = pace->rp_ent[tp->t_hpts_cpu];
+	KTEST_EQUAL(hpts->p_on_queue_cnt, 1);
+	KTEST_EQUAL(hpts->p_prev_slot, 0);
+	KTEST_EQUAL(hpts->p_cur_slot, 0);
+	KTEST_EQUAL(hpts->p_runningslot, 0);
+	KTEST_EQUAL(hpts->p_nxt_slot, 1);
+	KTEST_EQUAL(hpts->p_hpts_active, 0);
+
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE);
+	KTEST_EQUAL(tp->t_hpts_request, 0);
+	KTEST_EQUAL(tp->t_hpts_slot, HPTS_USEC_TO_SLOTS(500));
+
+	/* Set our test flag to indicate the tcpcb should be removed from the
+	 * wheel when tcp_output is called. */
+	TP_REMOVE_FROM_HPTS(tp) = 1;
+
+	/* Test early exit condition: advance time by insufficient amount */
+	KTEST_LOG(ctx, "Testing early exit with insufficient time advancement");
+	test_time_usec += 1; /* Very small advancement - should cause early exit */
+	HPTS_LOCK(hpts);
+	NET_EPOCH_ENTER(et);
+	slots_ran = tcp_hptsi(hpts, true);
+	HPTS_UNLOCK(hpts);
+	NET_EPOCH_EXIT(et);
+
+	/* Should return 0 slots due to insufficient time advancement */
+	KTEST_EQUAL(slots_ran, 0);
+	KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0); /* No processing should occur */
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE); /* Connection still queued */
+
+	/* Wait for 498 more usecs and trigger the HPTS workers and verify
+	 * nothing happens yet (total 499 usec) */
+	KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0);
+	test_time_usec += 498;
+	for (i = 0; i < pace->rp_num_hptss; i++) {
+		KTEST_LOG(ctx, "=> tcp_hptsi(%p)", pace->rp_ent[i]);
+		HPTS_LOCK(pace->rp_ent[i]);
+		NET_EPOCH_ENTER(et);
+		slots_ran = tcp_hptsi(pace->rp_ent[i], true);
+		HPTS_UNLOCK(pace->rp_ent[i]);
+		NET_EPOCH_EXIT(et);
+
+		dump_hpts_entry(ctx, pace->rp_ent[i]);
+		KTEST_VERIFY(slots_ran >= 0);
+		KTEST_EQUAL(pace->rp_ent[i]->p_prev_slot, 49);
+		KTEST_EQUAL(pace->rp_ent[i]->p_cur_slot, 49);
+	}
+
+	dump_tcpcb(tp);
+	KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0);
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE);
+	KTEST_EQUAL(tp->t_hpts_request, 0);
+	KTEST_EQUAL(tp->t_hpts_slot, HPTS_USEC_TO_SLOTS(500));
+	KTEST_EQUAL(hpts->p_on_queue_cnt, 1);
+
+	/* Wait for 1 more usec and trigger the HPTS workers and verify it
+	 * triggers tcp_output this time */
+	KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0);
+	test_time_usec += 1;
+	for (i = 0; i < pace->rp_num_hptss; i++) {
+		KTEST_LOG(ctx, "=> tcp_hptsi(%p)", pace->rp_ent[i]);
+		HPTS_LOCK(pace->rp_ent[i]);
+		NET_EPOCH_ENTER(et);
+		slots_ran = tcp_hptsi(pace->rp_ent[i], true);
+		HPTS_UNLOCK(pace->rp_ent[i]);
+		NET_EPOCH_EXIT(et);
+
+		dump_hpts_entry(ctx, pace->rp_ent[i]);
+		KTEST_VERIFY(slots_ran >= 0);
+		KTEST_EQUAL(pace->rp_ent[i]->p_prev_slot, 50);
+		KTEST_EQUAL(pace->rp_ent[i]->p_cur_slot, 50);
+	}
+
+	dump_tcpcb(tp);
+	KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 1);
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_NONE);
+	KTEST_EQUAL(hpts->p_on_queue_cnt, 0);
+
+	test_hpts_free_tcpcb(tp);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates HPTS scalability by creating and inserting a LOT of tcpcbs into
+ * the HPTS wheel, testing performance under high load conditions.
+ */
+KTEST_FUNC(scalability_tcpcbs)
+{
+	struct tcp_hptsi *pace;
+	struct tcpcb **tcpcbs;
+	uint32_t i, num_tcpcbs = 100000, total_queued = 0;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	/* Allocate array to hold pointers to all tcpcbs */
+	tcpcbs = malloc(num_tcpcbs * sizeof(struct tcpcb *), M_TCPHPTS, M_WAITOK | M_ZERO);
+	KTEST_VERIFY_RET(tcpcbs != NULL, ENOMEM);
+
+	/* Create a LOT of tcpcbs */
+	KTEST_LOG(ctx, "Creating %u tcpcbs...", num_tcpcbs);
+	for (i = 0; i < num_tcpcbs; i++) {
+		tcpcbs[i] = test_hpts_create_tcpcb(ctx, pace);
+		if (tcpcbs[i] == NULL) {
+			KTEST_ERR(ctx, "FAIL: tcpcbs[i] == NULL");
+			return (EINVAL);
+		}
+	}
+
+	/* Insert all created tcpcbs into HPTS */
+	KTEST_LOG(ctx, "Inserting all tcpcbs into HPTS...");
+	for (i = 0; i < num_tcpcbs; i++) {
+		INP_WLOCK(&tcpcbs[i]->t_inpcb);
+		tcpcbs[i]->t_flags2 |= TF2_HPTS_CALLS;
+		/* Insert with varying future timeouts to distribute across slots */
+		tcp_hpts_insert(pace, tcpcbs[i], 100 + (i % 1000), NULL);
+		INP_WUNLOCK(&tcpcbs[i]->t_inpcb);
+	}
+
+	/* Verify total queue counts across all CPUs */
+	for (i = 0; i < pace->rp_num_hptss; i++) {
+		total_queued += pace->rp_ent[i]->p_on_queue_cnt;
+	}
+	KTEST_EQUAL(total_queued, num_tcpcbs);
+
+	for (i = 0; i < pace->rp_num_hptss; i++)
+		dump_hpts_entry(ctx, pace->rp_ent[i]);
+
+	/* Remove all tcpcbs from HPTS */
+	KTEST_LOG(ctx, "Removing all tcpcbs from HPTS...");
+	for (i = 0; i < num_tcpcbs; i++) {
+		INP_WLOCK(&tcpcbs[i]->t_inpcb);
+		if (tcpcbs[i]->t_in_hpts != IHPTS_NONE) {
+			tcp_hpts_remove(pace, tcpcbs[i]);
+		}
+		INP_WUNLOCK(&tcpcbs[i]->t_inpcb);
+	}
+
+	/* Verify all queues are now empty */
+	for (i = 0; i < pace->rp_num_hptss; i++) {
+		if (pace->rp_ent[i]->p_on_queue_cnt != 0) {
+			KTEST_ERR(ctx, "FAIL: pace->rp_ent[i]->p_on_queue_cnt != 0");
+			return (EINVAL);
+		}
+	}
+
+	for (i = 0; i < num_tcpcbs; i++) {
+		test_hpts_free_tcpcb(tcpcbs[i]);
+	}
+	free(tcpcbs, M_TCPHPTS);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates wheel wrap scenarios where the timer falls significantly behind
+ * and needs to process more than one full wheel revolution worth of slots.
+ */
+KTEST_FUNC(wheel_wrap_recovery)
+{
+	struct epoch_tracker et;
+	struct tcp_hptsi *pace;
+	struct tcpcb **tcpcbs;
+	uint32_t i, timeout_usecs, num_tcpcbs = 500;
+	int32_t slots_ran;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	/* Allocate array to hold pointers to tcpcbs */
+	tcpcbs = malloc(num_tcpcbs * sizeof(struct tcpcb *), M_TCPHPTS, M_WAITOK | M_ZERO);
+	KTEST_VERIFY_RET(tcpcbs != NULL, ENOMEM);
+
+	/* Create tcpcbs and insert them across many slots */
+	for (i = 0; i < num_tcpcbs; i++) {
+		tcpcbs[i] = test_hpts_create_tcpcb(ctx, pace);
+		KTEST_NEQUAL(tcpcbs[i], NULL);
+		TP_REMOVE_FROM_HPTS(tcpcbs[i]) = 1;
+
+		timeout_usecs = ((i * NUM_OF_HPTSI_SLOTS) / num_tcpcbs) * HPTS_USECS_PER_SLOT; /* Spread across slots */
+
+		INP_WLOCK(&tcpcbs[i]->t_inpcb);
+		tcpcbs[i]->t_flags2 |= TF2_HPTS_CALLS;
+		tcp_hpts_insert(pace, tcpcbs[i], timeout_usecs, NULL);
+		INP_WUNLOCK(&tcpcbs[i]->t_inpcb);
+	}
+
+	/* Fast forward time significantly to trigger wheel wrap */
+	test_time_usec += (NUM_OF_HPTSI_SLOTS + 5000) * HPTS_USECS_PER_SLOT;
+
+	for (i = 0; i < pace->rp_num_hptss; i++) {
+		KTEST_LOG(ctx, "=> tcp_hptsi(%u)", i);
+		KTEST_NEQUAL(pace->rp_ent[i]->p_on_queue_cnt, 0);
+
+		HPTS_LOCK(pace->rp_ent[i]);
+		NET_EPOCH_ENTER(et);
+		slots_ran = tcp_hptsi(pace->rp_ent[i], true);
+		HPTS_UNLOCK(pace->rp_ent[i]);
+		NET_EPOCH_EXIT(et);
+
+		KTEST_EQUAL(slots_ran, NUM_OF_HPTSI_SLOTS-1); /* Should process all slots */
+		KTEST_EQUAL(pace->rp_ent[i]->p_on_queue_cnt, 0);
+		KTEST_NEQUAL(pace->rp_ent[i]->p_cur_slot,
+			pace->rp_ent[i]->p_prev_slot);
+	}
+
+	/* Cleanup */
+	for (i = 0; i < num_tcpcbs; i++) {
+		INP_WLOCK(&tcpcbs[i]->t_inpcb);
+		if (tcpcbs[i]->t_in_hpts != IHPTS_NONE) {
+			tcp_hpts_remove(pace, tcpcbs[i]);
+		}
+		INP_WUNLOCK(&tcpcbs[i]->t_inpcb);
+		test_hpts_free_tcpcb(tcpcbs[i]);
+	}
+	free(tcpcbs, M_TCPHPTS);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates proper handling of tcpcbs in the IHPTS_MOVING state, which occurs
+ * when a tcpcb is being processed by the HPTS thread but gets removed.
+ */
+KTEST_FUNC(tcpcb_moving_state)
+{
+	struct epoch_tracker et;
+	struct tcp_hptsi *pace;
+	struct tcpcb *tp1, *tp2;
+	struct tcp_hpts_entry *hpts;
+	int32_t slots_ran;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	/* Create two tcpcbs on the same CPU/slot */
+	tp1 = test_hpts_create_tcpcb(ctx, pace);
+	tp2 = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp1, NULL);
+	KTEST_NEQUAL(tp2, NULL);
+
+	/* Force them to the same CPU for predictable testing */
+	tp1->t_hpts_cpu = 0;
+	tp2->t_hpts_cpu = 0;
+
+	/* Insert both into the same slot */
+	INP_WLOCK(&tp1->t_inpcb);
+	tp1->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp1, 100, NULL);
+	INP_WUNLOCK(&tp1->t_inpcb);
+
+	INP_WLOCK(&tp2->t_inpcb);
+	tp2->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp2, 100, NULL);
+	INP_WUNLOCK(&tp2->t_inpcb);
+
+	hpts = pace->rp_ent[0];
+
+	/* Manually transition tp1 to MOVING state to simulate race condition */
+	HPTS_LOCK(hpts);
+	tp1->t_in_hpts = IHPTS_MOVING;
+	tp1->t_hpts_slot = -1; /* Mark for removal */
+	HPTS_UNLOCK(hpts);
+
+	/* Set time and run HPTS to process the moving state */
+	test_time_usec += 100;
+	HPTS_LOCK(hpts);
+	NET_EPOCH_ENTER(et);
+	slots_ran = tcp_hptsi(hpts, true);
+	HPTS_UNLOCK(hpts);
+	NET_EPOCH_EXIT(et);
+
+	KTEST_VERIFY(slots_ran >= 0);
+	KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 1); /* Shouldn't call on both */
+
+	/* tp1 should be cleaned up and removed */
+	KTEST_EQUAL(tp1->t_in_hpts, IHPTS_NONE);
+	/* tp2 should have been processed normally */
+	KTEST_EQUAL(tp2->t_in_hpts, IHPTS_NONE);
+
+	test_hpts_free_tcpcb(tp1);
+	test_hpts_free_tcpcb(tp2);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates that tcpcbs with deferred requests (t_hpts_request > 0) are
+ * properly handled and re-inserted into appropriate future slots after
+ * the wheel processes enough slots to accommodate the original request.
+ */
+KTEST_FUNC(deferred_requests)
+{
+	struct epoch_tracker et;
+	struct tcp_hptsi *pace;
+	struct tcpcb *tp, *tp2;
+	struct tcp_hpts_entry *hpts;
+	uint32_t large_timeout_usecs = (NUM_OF_HPTSI_SLOTS + 5000) * HPTS_USECS_PER_SLOT; /* Beyond wheel capacity */
+	uint32_t huge_timeout_usecs = (NUM_OF_HPTSI_SLOTS * 3) * HPTS_USECS_PER_SLOT; /* 3x wheel capacity */
+	uint32_t initial_request;
+	int32_t slots_ran;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	tp = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp, NULL);
+
+	/* Insert with a request that exceeds current wheel capacity */
+	INP_WLOCK(&tp->t_inpcb);
+	tp->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp, large_timeout_usecs, NULL);
+	INP_WUNLOCK(&tp->t_inpcb);
+
+	/* Verify it was inserted with a deferred request */
+	dump_tcpcb(tp);
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE);
+	KTEST_VERIFY(tp->t_hpts_request > 0);
+	KTEST_VERIFY(tp->t_hpts_slot < NUM_OF_HPTSI_SLOTS);
+
+	hpts = pace->rp_ent[tp->t_hpts_cpu];
+
+	/* Advance time to process deferred requests */
+	test_time_usec += NUM_OF_HPTSI_SLOTS * HPTS_USECS_PER_SLOT;
+
+	/* Process the wheel to handle deferred requests */
+	HPTS_LOCK(hpts);
+	NET_EPOCH_ENTER(et);
+	slots_ran = tcp_hptsi(hpts, true);
+	HPTS_UNLOCK(hpts);
+	NET_EPOCH_EXIT(et);
+
+	dump_hpts_entry(ctx, hpts);
+	KTEST_GREATER_THAN(slots_ran, 0);
+	dump_tcpcb(tp);
+	KTEST_EQUAL(tp->t_hpts_request, 0);
+
+	/* Test incremental deferred request processing over multiple cycles */
+	KTEST_LOG(ctx, "Testing incremental deferred request processing");
+
+	/* Create a new connection with an even larger request */
+	tp2 = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp2, NULL);
+	tp2->t_hpts_cpu = tp->t_hpts_cpu; /* Same CPU for predictable testing */
+
+	INP_WLOCK(&tp2->t_inpcb);
+	tp2->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp2, huge_timeout_usecs, NULL);
+	INP_WUNLOCK(&tp2->t_inpcb);
+
+	/* Verify initial deferred request */
+	initial_request = tp2->t_hpts_request;
+	KTEST_VERIFY(initial_request > NUM_OF_HPTSI_SLOTS);
+
+	/* Process one wheel cycle - should reduce but not eliminate request */
+	test_time_usec += NUM_OF_HPTSI_SLOTS * HPTS_USECS_PER_SLOT;
+	HPTS_LOCK(hpts);
+	NET_EPOCH_ENTER(et);
+	slots_ran = tcp_hptsi(hpts, true);
+	HPTS_UNLOCK(hpts);
+	NET_EPOCH_EXIT(et);
+
+	/* Request should be reduced but not zero */
+	KTEST_GREATER_THAN(initial_request, tp2->t_hpts_request);
+	KTEST_VERIFY(tp2->t_hpts_request > 0);
+	KTEST_EQUAL(tp2->t_in_hpts, IHPTS_ONQUEUE); /* Still queued */
+
+	/* For huge_timeout_usecs = NUM_OF_HPTSI_SLOTS * 3 * HPTS_USECS_PER_SLOT, we need ~3 cycles to complete.
+	 * Each cycle can reduce the request by at most NUM_OF_HPTSI_SLOTS. */
+	test_time_usec += NUM_OF_HPTSI_SLOTS * HPTS_USECS_PER_SLOT;
+	HPTS_LOCK(hpts);
+	NET_EPOCH_ENTER(et);
+	slots_ran = tcp_hptsi(hpts, true);
+	HPTS_UNLOCK(hpts);
+	NET_EPOCH_EXIT(et);
+
+	/* After second cycle, request should be reduced significantly (likely by ~NUM_OF_HPTSI_SLOTS) */
+	KTEST_VERIFY(tp2->t_hpts_request < initial_request);
+	KTEST_VERIFY(tp2->t_hpts_request > 0); /* But not yet zero for such a large request */
+
+	/* Clean up second connection */
+	INP_WLOCK(&tp2->t_inpcb);
+	if (tp2->t_in_hpts != IHPTS_NONE) {
+		tcp_hpts_remove(pace, tp2);
+	}
+	INP_WUNLOCK(&tp2->t_inpcb);
+	test_hpts_free_tcpcb(tp2);
+
+	/* Clean up */
+	INP_WLOCK(&tp->t_inpcb);
+	if (tp->t_in_hpts != IHPTS_NONE) {
+		tcp_hpts_remove(pace, tp);
+	}
+	INP_WUNLOCK(&tp->t_inpcb);
+	test_hpts_free_tcpcb(tp);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates CPU assignment and affinity mechanisms, including flowid-based
+ * assignment, random fallback scenarios, and explicit CPU setting. Tests
+ * the actual cpu assignment logic in hpts_cpuid via tcp_set_hpts.
+ */
+KTEST_FUNC(cpu_assignment)
+{
+	struct tcp_hptsi *pace;
+	struct tcpcb *tp1, *tp2, *tp2_dup, *tp3;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+
+	/* Test random CPU assignment (no flowid) */
+	tp1 = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp1, NULL);
+	tp1->t_inpcb.inp_flowtype = M_HASHTYPE_NONE;
+	INP_WLOCK(&tp1->t_inpcb);
+	tcp_set_hpts(pace, tp1);
+	INP_WUNLOCK(&tp1->t_inpcb);
+	KTEST_VERIFY(tp1->t_hpts_cpu < pace->rp_num_hptss);
+	KTEST_VERIFY(tp1->t_flags2 & TF2_HPTS_CPU_SET);
+
+	/* Test flowid-based assignment */
+	tp2 = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp2, NULL);
+	tp2->t_inpcb.inp_flowtype = M_HASHTYPE_RSS_TCP_IPV4;
+	tp2->t_inpcb.inp_flowid = 12345;
+	INP_WLOCK(&tp2->t_inpcb);
+	tcp_set_hpts(pace, tp2);
+	INP_WUNLOCK(&tp2->t_inpcb);
+	KTEST_VERIFY(tp2->t_hpts_cpu < pace->rp_num_hptss);
+	KTEST_VERIFY(tp2->t_flags2 & TF2_HPTS_CPU_SET);
+
+	/* With the same flowid, should get same CPU assignment */
+	tp2_dup = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp2_dup, NULL);
+	tp2_dup->t_inpcb.inp_flowtype = M_HASHTYPE_RSS_TCP_IPV4;
+	tp2_dup->t_inpcb.inp_flowid = 12345;
+	INP_WLOCK(&tp2_dup->t_inpcb);
+	tcp_set_hpts(pace, tp2_dup);
+	INP_WUNLOCK(&tp2_dup->t_inpcb);
+	KTEST_EQUAL(tp2_dup->t_hpts_cpu, tp2->t_hpts_cpu);
+
+	/* Test explicit CPU setting */
+	tp3 = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp3, NULL);
+	tp3->t_hpts_cpu = 1; /* Assume we have at least 2 CPUs */
+	tp3->t_flags2 |= TF2_HPTS_CPU_SET;
+	INP_WLOCK(&tp3->t_inpcb);
+	tcp_set_hpts(pace, tp3);
+	INP_WUNLOCK(&tp3->t_inpcb);
+	KTEST_EQUAL(tp3->t_hpts_cpu, 1);
+
+	test_hpts_free_tcpcb(tp1);
+	test_hpts_free_tcpcb(tp2);
+	test_hpts_free_tcpcb(tp2_dup);
+	test_hpts_free_tcpcb(tp3);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates edge cases in slot calculation including boundary conditions
+ * around slot 0, maximum slots, and slot wrapping arithmetic.
+ */
+KTEST_FUNC(slot_boundary_conditions)
+{
+	struct tcp_hptsi *pace;
+	struct tcpcb *tp;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	/* Test insertion at slot 0 */
+	tp = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp, NULL);
+	INP_WLOCK(&tp->t_inpcb);
+	tp->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp, 0, NULL); /* Should insert immediately (0 timeout) */
+	INP_WUNLOCK(&tp->t_inpcb);
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE);
+	KTEST_VERIFY(tp->t_hpts_slot < NUM_OF_HPTSI_SLOTS);
+
+	INP_WLOCK(&tp->t_inpcb);
+	tcp_hpts_remove(pace, tp);
+	INP_WUNLOCK(&tp->t_inpcb);
+
+	/* Test insertion at maximum slot value */
+	INP_WLOCK(&tp->t_inpcb);
+	tp->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp, (NUM_OF_HPTSI_SLOTS - 1) * HPTS_USECS_PER_SLOT, NULL);
+	INP_WUNLOCK(&tp->t_inpcb);
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE);
+
+	INP_WLOCK(&tp->t_inpcb);
+	tcp_hpts_remove(pace, tp);
+	INP_WUNLOCK(&tp->t_inpcb);
+
+	/* Test very small timeout values */
+	INP_WLOCK(&tp->t_inpcb);
+	tp->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp, 1, NULL);
+	INP_WUNLOCK(&tp->t_inpcb);
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE);
+	KTEST_EQUAL(tp->t_hpts_slot, HPTS_USEC_TO_SLOTS(1)); /* Should convert 1 usec to slot */
+
+	INP_WLOCK(&tp->t_inpcb);
+	tcp_hpts_remove(pace, tp);
+	INP_WUNLOCK(&tp->t_inpcb);
+
+	test_hpts_free_tcpcb(tp);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates HPTS behavior under high load conditions, including proper
+ * processing of many connections and connection count tracking.
+ */
+KTEST_FUNC(dynamic_sleep_adjustment)
+{
+	struct epoch_tracker et;
+	struct tcp_hptsi *pace;
+	struct tcpcb **tcpcbs;
+	struct tcp_hpts_entry *hpts;
+	uint32_t i, num_tcpcbs = DEFAULT_CONNECTION_THRESHOLD + 50;
+	int32_t slots_ran;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	/* Create many connections to exceed threshold */
+	tcpcbs = malloc(num_tcpcbs * sizeof(struct tcpcb *), M_TCPHPTS, M_WAITOK | M_ZERO);
+	KTEST_VERIFY_RET(tcpcbs != NULL, ENOMEM);
+
+	for (i = 0; i < num_tcpcbs; i++) {
+		tcpcbs[i] = test_hpts_create_tcpcb(ctx, pace);
+		KTEST_NEQUAL(tcpcbs[i], NULL);
+		tcpcbs[i]->t_hpts_cpu = 0; /* Force all to CPU 0 */
+		INP_WLOCK(&tcpcbs[i]->t_inpcb);
+		tcpcbs[i]->t_flags2 |= TF2_HPTS_CALLS;
+		TP_REMOVE_FROM_HPTS(tcpcbs[i]) = 1; /* Will be removed after output */
+		tcp_hpts_insert(pace, tcpcbs[i], 100, NULL);
+		INP_WUNLOCK(&tcpcbs[i]->t_inpcb);
+	}
+
+	hpts = pace->rp_ent[0];
+	dump_hpts_entry(ctx, hpts);
+
+	/* Verify we're above threshold */
+	KTEST_GREATER_THAN(hpts->p_on_queue_cnt, DEFAULT_CONNECTION_THRESHOLD);
+
+	/* Run HPTS to process many connections */
+	test_time_usec += 100;
+	HPTS_LOCK(hpts);
+	NET_EPOCH_ENTER(et);
+	slots_ran = tcp_hptsi(hpts, true);
+	HPTS_UNLOCK(hpts);
+	NET_EPOCH_EXIT(et);
+
+	/* Verify HPTS processed slots and connections correctly */
+	KTEST_GREATER_THAN(slots_ran, 0);
+	KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], num_tcpcbs);
+
+	/* Verify all connections were removed from queue */
+	KTEST_EQUAL(hpts->p_on_queue_cnt, 0);
+
+	/* Cleanup */
+	for (i = 0; i < num_tcpcbs; i++) {
+		test_hpts_free_tcpcb(tcpcbs[i]);
+	}
+	free(tcpcbs, M_TCPHPTS);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates handling of concurrent insert/remove operations and race conditions
+ * between HPTS processing and user operations.
+ */
+KTEST_FUNC(concurrent_operations)
+{
+	struct tcp_hptsi *pace;
+	struct tcpcb *tp1, *tp2;
+	struct tcp_hpts_entry *hpts;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	tp1 = test_hpts_create_tcpcb(ctx, pace);
+	tp2 = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp1, NULL);
+	KTEST_NEQUAL(tp2, NULL);
+
+	/* Force all to CPU 0 */
+	tp1->t_hpts_cpu = 0;
+	tp2->t_hpts_cpu = 0;
+
+	/* Insert tp1 */
+	INP_WLOCK(&tp1->t_inpcb);
+	tp1->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp1, 100, NULL);
+	INP_WUNLOCK(&tp1->t_inpcb);
+
+	/* Insert tp2 into same slot */
+	INP_WLOCK(&tp2->t_inpcb);
+	tp2->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp2, 100, NULL);
+	INP_WUNLOCK(&tp2->t_inpcb);
+
+	/* Verify both are inserted */
+	KTEST_EQUAL(tp1->t_in_hpts, IHPTS_ONQUEUE);
+	KTEST_EQUAL(tp2->t_in_hpts, IHPTS_ONQUEUE);
+
+	/* Verify they're both assigned to the same slot */
+	KTEST_EQUAL(tp1->t_hpts_slot, tp2->t_hpts_slot);
+
+	/* Verify queue count reflects both connections */
+	KTEST_EQUAL(tp1->t_hpts_cpu, tp2->t_hpts_cpu); /* Should be on same CPU */
+	hpts = pace->rp_ent[tp1->t_hpts_cpu];
+	KTEST_EQUAL(hpts->p_on_queue_cnt, 2);
+
+	/* Remove tp1 while tp2 is still there */
+	INP_WLOCK(&tp1->t_inpcb);
+	tcp_hpts_remove(pace, tp1);
+	INP_WUNLOCK(&tp1->t_inpcb);
+
+	/* Verify tp1 removed, tp2 still there */
+	KTEST_EQUAL(tp1->t_in_hpts, IHPTS_NONE);
+	KTEST_EQUAL(tp2->t_in_hpts, IHPTS_ONQUEUE);
+
+	/* Verify queue count decreased by one */
+	KTEST_EQUAL(hpts->p_on_queue_cnt, 1);
+
+	/* Remove tp2 */
+	INP_WLOCK(&tp2->t_inpcb);
+	tcp_hpts_remove(pace, tp2);
+	INP_WUNLOCK(&tp2->t_inpcb);
+
+	KTEST_EQUAL(tp2->t_in_hpts, IHPTS_NONE);
+
+	/* Verify queue is now completely empty */
+	KTEST_EQUAL(hpts->p_on_queue_cnt, 0);
+
+	test_hpts_free_tcpcb(tp1);
+	test_hpts_free_tcpcb(tp2);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates the queued segments processing path via tfb_do_queued_segments,
+ * which is an alternative to direct tcp_output calls.
+ */
+KTEST_FUNC(queued_segments_processing)
+{
+	struct epoch_tracker et;
+	struct tcp_hptsi *pace;
+	struct tcpcb *tp;
+	struct tcp_hpts_entry *hpts;
+	struct mbuf *fake_mbuf;
+	int32_t slots_ran;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	tp = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp, NULL);
+
+	/* Create a minimal fake mbuf that has valid STAILQ pointers */
+	fake_mbuf = malloc(sizeof(struct mbuf), M_TCPHPTS, M_WAITOK | M_ZERO);
+	KTEST_NEQUAL(fake_mbuf, NULL);
+
+	/* Set up for queued segments path */
+	tp->t_flags2 |= (TF2_HPTS_CALLS | TF2_SUPPORTS_MBUFQ);
+	STAILQ_INSERT_TAIL(&tp->t_inqueue, fake_mbuf, m_stailqpkt);
+
+	INP_WLOCK(&tp->t_inpcb);
+	tcp_hpts_insert(pace, tp, 100, NULL);
+	INP_WUNLOCK(&tp->t_inpcb);
+
+	hpts = pace->rp_ent[tp->t_hpts_cpu];
+
+	/* Run HPTS and verify queued segments path is taken */
+	test_time_usec += 100;
+	HPTS_LOCK(hpts);
+	NET_EPOCH_ENTER(et);
+	slots_ran = tcp_hptsi(hpts, true);
+	HPTS_UNLOCK(hpts);
+	NET_EPOCH_EXIT(et);
+
+	KTEST_VERIFY(slots_ran >= 0);
+	KTEST_EQUAL(call_counts[CCNT_TCP_TFB_DO_QUEUED_SEGMENTS], 1);
+
+	/* Connection should be removed from HPTS after processing */
+	KTEST_EQUAL(tp->t_in_hpts, IHPTS_NONE);
+
+	/* Clean up the fake mbuf if it's still in the queue */
+	if (!STAILQ_EMPTY(&tp->t_inqueue)) {
+		struct mbuf *m = STAILQ_FIRST(&tp->t_inqueue);
+		STAILQ_REMOVE_HEAD(&tp->t_inqueue, m_stailqpkt);
+		free(m, M_TCPHPTS);
+	}
+
+	test_hpts_free_tcpcb(tp);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates the direct wake mechanism and wake inhibition logic when
+ * the connection count exceeds thresholds.
+ */
+KTEST_FUNC(direct_wake_mechanism)
+{
+	struct tcp_hptsi *pace;
+	struct tcpcb *tp;
+	struct tcp_hpts_entry *hpts;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	tp = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp, NULL);
+	hpts = pace->rp_ent[tp->t_hpts_cpu];
+
+	/* Test direct wake when not over threshold */
+	HPTS_LOCK(hpts);
+	hpts->p_on_queue_cnt = 50; /* Below threshold */
+	hpts->p_hpts_wake_scheduled = 0;
+	tcp_hpts_wake(hpts);
+	KTEST_EQUAL(hpts->p_hpts_wake_scheduled, 1);
+	KTEST_EQUAL(call_counts[CCNT_SWI_SCHED], 1);
+	HPTS_UNLOCK(hpts);
+
+	/* Reset for next test */
+	hpts->p_hpts_wake_scheduled = 0;
+	call_counts[CCNT_SWI_SCHED] = 0;
+
+	/* Test wake inhibition when over threshold */
+	HPTS_LOCK(hpts);
+	hpts->p_on_queue_cnt = 200; /* Above threshold */
+	hpts->p_direct_wake = 1; /* Request direct wake */
+	tcp_hpts_wake(hpts);
+	KTEST_EQUAL(hpts->p_hpts_wake_scheduled, 0); /* Should be inhibited */
+	KTEST_EQUAL(hpts->p_direct_wake, 0); /* Should be cleared */
+	KTEST_EQUAL(call_counts[CCNT_SWI_SCHED], 0); /* No SWI scheduled */
+	HPTS_UNLOCK(hpts);
+
+	test_hpts_free_tcpcb(tp);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates HPTS collision detection when attempting to run HPTS while
+ * it's already active.
+ */
+KTEST_FUNC(hpts_collision_detection)
+{
+	struct epoch_tracker et;
+	struct tcp_hptsi *pace;
+	struct tcp_hpts_entry *hpts;
+	int32_t slots_ran;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	hpts = pace->rp_ent[0];
+
+	/* Mark HPTS as active */
+	HPTS_LOCK(hpts);
+	hpts->p_hpts_active = 1;
+	HPTS_UNLOCK(hpts);
+
+	/* Attempt to run HPTS again - should detect collision */
+	HPTS_LOCK(hpts);
+	NET_EPOCH_ENTER(et);
+	slots_ran = tcp_hptsi(hpts, false); /* from_callout = false */
+	HPTS_UNLOCK(hpts);
+	NET_EPOCH_EXIT(et);
+
+	/* Should return 0 indicating no work done due to collision */
+	KTEST_EQUAL(slots_ran, 0);
+
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+/*
+ * Validates generation count handling for race condition detection between
+ * HPTS processing and connection insertion/removal operations.
+ */
+KTEST_FUNC(generation_count_validation)
+{
+	struct epoch_tracker et;
+	struct tcp_hptsi *pace;
+	struct tcp_hpts_entry *hpts;
+	struct tcpcb *tp1, *tp2;
+	uint32_t initial_gencnt, slot_to_test = 10;
+	uint32_t timeout_usecs = slot_to_test * HPTS_USECS_PER_SLOT;
+	uint32_t tp2_original_gencnt;
+	int32_t slots_ran;
+
+	test_hpts_init();
+
+	pace = tcp_hptsi_create(&test_funcs, false);
+	KTEST_NEQUAL(pace, NULL);
+	tcp_hptsi_start(pace);
+
+	hpts = pace->rp_ent[0];
+
+	/* Record initial generation count for the test slot */
+	initial_gencnt = hpts->p_hptss[slot_to_test].gencnt;
+
+	/* Create and insert first connection */
+	tp1 = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp1, NULL);
+	tp1->t_hpts_cpu = 0; /* Force to CPU 0 */
+
+	INP_WLOCK(&tp1->t_inpcb);
+	tp1->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp1, timeout_usecs, NULL);
+	INP_WUNLOCK(&tp1->t_inpcb);
+
+	/* Verify connection stored the generation count */
+	KTEST_EQUAL(tp1->t_in_hpts, IHPTS_ONQUEUE);
+	KTEST_EQUAL(tp1->t_hpts_slot, slot_to_test);
+	KTEST_EQUAL(tp1->t_hpts_gencnt, initial_gencnt);
+
+	/* Create second connection but don't insert yet */
+	tp2 = test_hpts_create_tcpcb(ctx, pace);
+	KTEST_NEQUAL(tp2, NULL);
+	tp2->t_hpts_cpu = 0; /* Force to CPU 0 */
+
+	/* Force generation count increment by processing the slot */
+	test_time_usec += (slot_to_test + 1) * HPTS_USECS_PER_SLOT;
+	HPTS_LOCK(hpts);
+	NET_EPOCH_ENTER(et);
+	slots_ran = tcp_hptsi(hpts, true);
+	HPTS_UNLOCK(hpts);
+	NET_EPOCH_EXIT(et);
+
+	/* Verify processing occurred */
+	KTEST_VERIFY(slots_ran > 0);
+	KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 1);
+
+	/* Verify generation count was incremented */
+	KTEST_EQUAL(hpts->p_hptss[slot_to_test].gencnt, initial_gencnt + 1);
+
+	/* Verify first connection was processed and removed */
+	KTEST_EQUAL(tp1->t_in_hpts, IHPTS_NONE);
+
+	/* Insert second connection and record its generation count */
+	INP_WLOCK(&tp2->t_inpcb);
+	tp2->t_flags2 |= TF2_HPTS_CALLS;
+	tcp_hpts_insert(pace, tp2, timeout_usecs, NULL);
+	INP_WUNLOCK(&tp2->t_inpcb);
+
+	/* Verify connection was inserted successfully */
+	KTEST_EQUAL(tp2->t_in_hpts, IHPTS_ONQUEUE);
+
+	/* Record the generation count that tp2 received */
+	tp2_original_gencnt = tp2->t_hpts_gencnt;
+
+	/* Test generation count mismatch detection during processing */
+	/* Manually set stale generation count to simulate race condition */
+	tp2->t_hpts_gencnt = tp2_original_gencnt + 100; /* Force a mismatch */
+
+	/* Process the slot to trigger generation count validation */
+	test_time_usec += (slot_to_test + 1) * HPTS_USECS_PER_SLOT;
+	HPTS_LOCK(hpts);
+	NET_EPOCH_ENTER(et);
+	slots_ran = tcp_hptsi(hpts, true);
+	HPTS_UNLOCK(hpts);
+	NET_EPOCH_EXIT(et);
+
+	/* Connection should be processed despite generation count mismatch */
+	KTEST_EQUAL(tp2->t_in_hpts, IHPTS_NONE); /* Processed and released */
+
+	/* The key test: HPTS should handle mismatched generation counts gracefully */
+	KTEST_VERIFY(slots_ran > 0); /* Processing should still occur */
+
+	test_hpts_free_tcpcb(tp1);
+	test_hpts_free_tcpcb(tp2);
+	tcp_hptsi_stop(pace);
+	tcp_hptsi_destroy(pace);
+
+	return (0);
+}
+
+static const struct ktest_test_info tests[] = {
+	KTEST_INFO(module_load),
+	KTEST_INFO(hptsi_create_destroy),
+	KTEST_INFO(hptsi_start_stop),
+	KTEST_INFO(hptsi_independence),
+	KTEST_INFO(function_injection),
+	KTEST_INFO(tcpcb_initialization),
+	KTEST_INFO(tcpcb_insertion),
+	KTEST_INFO(timer_functionality),
+	KTEST_INFO(scalability_tcpcbs),
+	KTEST_INFO(wheel_wrap_recovery),
+	KTEST_INFO(tcpcb_moving_state),
+	KTEST_INFO(deferred_requests),
+	KTEST_INFO(cpu_assignment),
+	KTEST_INFO(slot_boundary_conditions),
+	KTEST_INFO(dynamic_sleep_adjustment),
+	KTEST_INFO(concurrent_operations),
+	KTEST_INFO(queued_segments_processing),
+	KTEST_INFO(direct_wake_mechanism),
+	KTEST_INFO(hpts_collision_detection),
+	KTEST_INFO(generation_count_validation),
+};
+
+#else /* TCP_HPTS_KTEST */
+
+/*
+ * Stub to indicate that the TCP HPTS ktest is not enabled.
+ */
+KTEST_FUNC(module_load_without_tests)
+{
+	KTEST_LOG(ctx, "Warning: TCP HPTS ktest is not enabled");
+	return (0);
+}
+
+static const struct ktest_test_info tests[] = {
+	KTEST_INFO(module_load_without_tests),
+};
+
+#endif
+
+KTEST_MODULE_DECLARE(ktest_tcphpts, tests);
+KTEST_MODULE_DEPEND(ktest_tcphpts, tcphpts);
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index dd27ec77c1af..2146b0cac48f 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -219,7 +219,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_autorcvbuf), 0,
     "Enable automatic receive buffer sizing");
 
-VNET_DEFINE(int, tcp_autorcvbuf_max) = 2*1024*1024;
+VNET_DEFINE(int, tcp_autorcvbuf_max) = 8*1024*1024;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_autorcvbuf_max), 0,
     "Max size of automatic receive buffer");
diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c
index 64efa4bf060f..9b5baf115855 100644
--- a/sys/netinet/tcp_lro.c
+++ b/sys/netinet/tcp_lro.c
@@ -1475,10 +1475,11 @@ tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
  	}
 
 	/* create sequence number */
-	lc->lro_mbuf_data[lc->lro_mbuf_count].seq =
-	    (((uint64_t)M_HASHTYPE_GET(mb)) << 56) |
-	    (((uint64_t)mb->m_pkthdr.flowid) << 24) |
-	    ((uint64_t)lc->lro_mbuf_count);
+	lc->lro_mbuf_data[lc->lro_mbuf_count].seq = lc->lro_mbuf_count;
+	if (M_HASHTYPE_ISHASH(mb))
+		lc->lro_mbuf_data[lc->lro_mbuf_count].seq |=
+		    (((uint64_t)M_HASHTYPE_GET(mb)) << 56) |
+		    (((uint64_t)mb->m_pkthdr.flowid) << 24);
 
 	/* enter mbuf */
 	lc->lro_mbuf_data[lc->lro_mbuf_count].mb = mb;
diff --git a/sys/netinet/tcp_lro_hpts.c b/sys/netinet/tcp_lro_hpts.c
index 43587285fe26..ac1a27a4290a 100644
--- a/sys/netinet/tcp_lro_hpts.c
+++ b/sys/netinet/tcp_lro_hpts.c
@@ -29,6 +29,8 @@
 #include "opt_inet6.h"
 
 #include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
@@ -62,6 +64,7 @@
 #include <netinet/tcp_lro.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_hpts.h>
+#include <netinet/tcp_hpts_internal.h>
 #ifdef TCP_BLACKBOX
 #include <netinet/tcp_log_buf.h>
 #endif
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index 2dfb7faf56e3..208f72c4661c 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -123,7 +123,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_inc, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(tcp_autosndbuf_inc), 0,
 	"Incrementor step size of automatic send buffer");
 
-VNET_DEFINE(int, tcp_autosndbuf_max) = 2*1024*1024;
+VNET_DEFINE(int, tcp_autosndbuf_max) = 8*1024*1024;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(tcp_autosndbuf_max), 0,
 	"Max size of automatic send buffer");
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index f2d7867df9b4..10383bc0801e 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -477,10 +477,10 @@ bbr_log_rtt_shrinks(struct tcp_bbr *bbr, uint32_t cts, uint32_t applied,
 		    uint16_t set);
 static struct bbr_sendmap *
 bbr_find_lowest_rsm(struct tcp_bbr *bbr);
-static __inline uint32_t
+static inline uint32_t
 bbr_get_rtt(struct tcp_bbr *bbr, int32_t rtt_type);
 static void
-bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t slot,
+bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t pacing_delay,
 		 uint8_t which);
 static void
 bbr_log_timer_var(struct tcp_bbr *bbr, int mode, uint32_t cts,
@@ -489,7 +489,7 @@ bbr_log_timer_var(struct tcp_bbr *bbr, int mode, uint32_t cts,
 static void
 bbr_log_hpts_diag(struct tcp_bbr *bbr, uint32_t cts, struct hpts_diag *diag);
 static void
-bbr_log_type_bbrsnd(struct tcp_bbr *bbr, uint32_t len, uint32_t slot,
+bbr_log_type_bbrsnd(struct tcp_bbr *bbr, uint32_t len, uint32_t pacing_delay,
 		    uint32_t del_by, uint32_t cts, uint32_t sloton,
 		    uint32_t prev_delay);
 static void
@@ -724,7 +724,7 @@ bbr_minseg(struct tcp_bbr *bbr)
 }
 
 static void
-bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_t frm, int32_t slot, uint32_t tot_len)
+bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_t frm, int32_t pacing_delay, uint32_t tot_len)
 {
 	struct inpcb *inp = tptoinpcb(tp);
 	struct hpts_diag diag;
@@ -751,40 +751,40 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
 	bbr->r_ctl.rc_timer_exp = 0;
 	prev_delay = bbr->r_ctl.rc_last_delay_val;
 	if (bbr->r_ctl.rc_last_delay_val &&
-	    (slot == 0)) {
+	    (pacing_delay == 0)) {
 		/*
 		 * If a previous pacer delay was in place we
 		 * are not coming from the output side (where
 		 * we calculate a delay, more likely a timer).
 		 */
-		slot = bbr->r_ctl.rc_last_delay_val;
+		pacing_delay = bbr->r_ctl.rc_last_delay_val;
 		if (TSTMP_GT(cts, bbr->rc_pacer_started)) {
 			/* Compensate for time passed  */
 			delay_calc = cts - bbr->rc_pacer_started;
-			if (delay_calc <= slot)
-				slot -= delay_calc;
+			if (delay_calc <= pacing_delay)
+				pacing_delay -= delay_calc;
 		}
 	}
 	/* Do we have early to make up for by pushing out the pacing time? */
 	if (bbr->r_agg_early_set) {
-		bbr_log_pacing_delay_calc(bbr, 0, bbr->r_ctl.rc_agg_early, cts, slot, 0, bbr->r_agg_early_set, 2);
-		slot += bbr->r_ctl.rc_agg_early;
+		bbr_log_pacing_delay_calc(bbr, 0, bbr->r_ctl.rc_agg_early, cts, pacing_delay, 0, bbr->r_agg_early_set, 2);
+		pacing_delay += bbr->r_ctl.rc_agg_early;
 		bbr->r_ctl.rc_agg_early = 0;
 		bbr->r_agg_early_set = 0;
 	}
 	/* Are we running a total debt that needs to be compensated for? */
 	if (bbr->r_ctl.rc_hptsi_agg_delay) {
-		if (slot > bbr->r_ctl.rc_hptsi_agg_delay) {
+		if (pacing_delay > bbr->r_ctl.rc_hptsi_agg_delay) {
 			/* We nuke the delay */
-			slot -= bbr->r_ctl.rc_hptsi_agg_delay;
+			pacing_delay -= bbr->r_ctl.rc_hptsi_agg_delay;
 			bbr->r_ctl.rc_hptsi_agg_delay = 0;
 		} else {
 			/* We nuke some of the delay, put in a minimal 100usecs  */
-			bbr->r_ctl.rc_hptsi_agg_delay -= slot;
-			bbr->r_ctl.rc_last_delay_val = slot = 100;
+			bbr->r_ctl.rc_hptsi_agg_delay -= pacing_delay;
+			bbr->r_ctl.rc_last_delay_val = pacing_delay = 100;
 		}
 	}
-	bbr->r_ctl.rc_last_delay_val = slot;
+	bbr->r_ctl.rc_last_delay_val = pacing_delay;
 	hpts_timeout = bbr_timer_start(tp, bbr, cts);
 	if (tp->t_flags & TF_DELACK) {
 		if (bbr->rc_in_persist == 0) {
@@ -810,7 +810,7 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
 		bbr->r_ctl.rc_hpts_flags = PACE_TMR_DELACK;
 		hpts_timeout = delayed_ack;
 	}
-	if (slot) {
+	if (pacing_delay) {
 		/* Mark that we have a pacing timer up */
 		BBR_STAT_INC(bbr_paced_segments);
 		bbr->r_ctl.rc_hpts_flags |= PACE_PKT_OUTPUT;
@@ -820,7 +820,7 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
 	 * wheel, we resort to a keep-alive timer if its configured.
 	 */
 	if ((hpts_timeout == 0) &&
-	    (slot == 0)) {
+	    (pacing_delay == 0)) {
 		if ((V_tcp_always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
 		    (tp->t_state <= TCPS_CLOSING)) {
 			/*
@@ -849,7 +849,7 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
 		if (left < hpts_timeout)
 			hpts_timeout = left;
 	}
-	if (bbr->r_ctl.rc_incr_tmrs && slot &&
+	if (bbr->r_ctl.rc_incr_tmrs && pacing_delay &&
 	    (bbr->r_ctl.rc_hpts_flags & (PACE_TMR_TLP|PACE_TMR_RXT))) {
 		/*
 		 * If configured to do so, and the timer is either
@@ -867,7 +867,7 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
 		 * this extra delay but this is easier and being more
 		 * conservative is probably better.
 		 */
-		hpts_timeout += slot;
+		hpts_timeout += pacing_delay;
 	}
 	if (hpts_timeout) {
 		/*
@@ -879,10 +879,10 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
 		bbr->r_ctl.rc_timer_exp = cts + hpts_timeout;
 	} else
 		bbr->r_ctl.rc_timer_exp = 0;
-	if ((slot) &&
+	if ((pacing_delay) &&
 	    (bbr->rc_use_google ||
 	     bbr->output_error_seen ||
-	     (slot <= hpts_timeout))  ) {
+	     (pacing_delay <= hpts_timeout))  ) {
 		/*
 		 * Tell LRO that it can queue packets while
 		 * we pace.
@@ -900,17 +900,15 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
 			tp->t_flags2 &= ~TF2_DONT_SACK_QUEUE;
 		bbr->rc_pacer_started = cts;
 
-		(void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(slot),
-					   __LINE__, &diag);
+		tcp_hpts_insert(tp, pacing_delay, &diag);
 		bbr->rc_timer_first = 0;
 		bbr->bbr_timer_src = frm;
-		bbr_log_to_start(bbr, cts, hpts_timeout, slot, 1);
+		bbr_log_to_start(bbr, cts, hpts_timeout, pacing_delay, 1);
 		bbr_log_hpts_diag(bbr, cts, &diag);
 	} else if (hpts_timeout) {
-		(void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(hpts_timeout),
-					   __LINE__, &diag);
+		tcp_hpts_insert(tp, hpts_timeout, &diag);
 		/*
-		 * We add the flag here as well if the slot is set,
+		 * We add the flag here as well if the pacing delay is set,
 		 * since hpts will call in to clear the queue first before
 		 * calling the output routine (which does our timers).
 		 * We don't want to set the flag if its just a timer
@@ -919,7 +917,7 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
 		 * on a keep-alive timer and a request comes in for
 		 * more data.
 		 */
-		if (slot)
+		if (pacing_delay)
 			bbr->rc_pacer_started = cts;
 		if ((bbr->r_ctl.rc_hpts_flags & PACE_TMR_RACK) &&
 		    (bbr->rc_cwnd_limited == 0)) {
@@ -936,12 +934,12 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
 			    TF2_DONT_SACK_QUEUE);
 		}
 		bbr->bbr_timer_src = frm;
-		bbr_log_to_start(bbr, cts, hpts_timeout, slot, 0);
+		bbr_log_to_start(bbr, cts, hpts_timeout, pacing_delay, 0);
 		bbr_log_hpts_diag(bbr, cts, &diag);
 		bbr->rc_timer_first = 1;
 	}
 	bbr->rc_tmr_stopped = 0;
-	bbr_log_type_bbrsnd(bbr, tot_len, slot, delay_calc, cts, frm, prev_delay);
+	bbr_log_type_bbrsnd(bbr, tot_len, pacing_delay, delay_calc, cts, frm, prev_delay);
 }
 
 static void
@@ -1033,8 +1031,8 @@ bbr_timer_audit(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts, struct sock
 	}
 	/*
 	 * Ok the timer originally started is not what we want now. We will
-	 * force the hpts to be stopped if any, and restart with the slot
-	 * set to what was in the saved slot.
+	 * force the hpts to be stopped if any, and restart with the pacing
+	 * delay set to what was in the saved delay.
 	 */
 wrong_timer:
 	if ((bbr->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) == 0) {
@@ -1843,7 +1841,7 @@ bbr_counter_destroy(void)
 
 }
 
-static __inline void
+static inline void
 bbr_fill_in_logging_data(struct tcp_bbr *bbr, struct tcp_log_bbr *l, uint32_t cts)
 {
 	memset(l, 0, sizeof(union tcp_log_stackspecific));
@@ -2397,7 +2395,7 @@ bbr_log_hpts_diag(struct tcp_bbr *bbr, uint32_t cts, struct hpts_diag *diag)
 		log.u_bbr.flex2 = diag->p_cur_slot;
 		log.u_bbr.flex3 = diag->slot_req;
 		log.u_bbr.flex4 = diag->inp_hptsslot;
-		log.u_bbr.flex5 = diag->slot_remaining;
+		log.u_bbr.flex5 = diag->time_remaining;
 		log.u_bbr.flex6 = diag->need_new_to;
 		log.u_bbr.flex7 = diag->p_hpts_active;
 		log.u_bbr.flex8 = diag->p_on_min_sleep;
@@ -2411,9 +2409,6 @@ bbr_log_hpts_diag(struct tcp_bbr *bbr, uint32_t cts, struct hpts_diag *diag)
 		log.u_bbr.bw_inuse = diag->wheel_slot;
 		log.u_bbr.rttProp = diag->wheel_cts;
 		log.u_bbr.delRate = diag->maxslots;
-		log.u_bbr.cur_del_rate = diag->p_curtick;
-		log.u_bbr.cur_del_rate <<= 32;
-		log.u_bbr.cur_del_rate |= diag->p_lasttick;
 		TCP_LOG_EVENTP(bbr->rc_tp, NULL,
 		    &bbr->rc_inp->inp_socket->so_rcv,
 		    &bbr->rc_inp->inp_socket->so_snd,
@@ -2473,7 +2468,7 @@ bbr_log_pacing_delay_calc(struct tcp_bbr *bbr, uint16_t gain, uint32_t len,
 }
 
 static void
-bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t slot, uint8_t which)
+bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t pacing_delay, uint8_t which)
 {
 	if (tcp_bblogging_on(bbr->rc_tp)) {
 		union tcp_log_stackspecific log;
@@ -2483,7 +2478,7 @@ bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t slot, u
 		log.u_bbr.flex1 = bbr->bbr_timer_src;
 		log.u_bbr.flex2 = to;
 		log.u_bbr.flex3 = bbr->r_ctl.rc_hpts_flags;
-		log.u_bbr.flex4 = slot;
+		log.u_bbr.flex4 = pacing_delay;
 		log.u_bbr.flex5 = bbr->rc_tp->t_hpts_slot;
 		log.u_bbr.flex6 = TICKS_2_USEC(bbr->rc_tp->t_rxtcur);
 		log.u_bbr.pkts_out = bbr->rc_tp->t_flags2;
@@ -2733,13 +2728,13 @@ bbr_type_log_hdwr_pacing(struct tcp_bbr *bbr, const struct ifnet *ifp,
 }
 
 static void
-bbr_log_type_bbrsnd(struct tcp_bbr *bbr, uint32_t len, uint32_t slot, uint32_t del_by, uint32_t cts, uint32_t line, uint32_t prev_delay)
+bbr_log_type_bbrsnd(struct tcp_bbr *bbr, uint32_t len, uint32_t pacing_delay, uint32_t del_by, uint32_t cts, uint32_t line, uint32_t prev_delay)
 {
 	if (tcp_bblogging_on(bbr->rc_tp)) {
 		union tcp_log_stackspecific log;
 
 		bbr_fill_in_logging_data(bbr, &log.u_bbr, cts);
-		log.u_bbr.flex1 = slot;
+		log.u_bbr.flex1 = pacing_delay;
 		log.u_bbr.flex2 = del_by;
 		log.u_bbr.flex3 = prev_delay;
 		log.u_bbr.flex4 = line;
@@ -4211,7 +4206,7 @@ bbr_calc_thresh_tlp(struct tcpcb *tp, struct tcp_bbr *bbr,
 /*
  * Return one of three RTTs to use (in microseconds).
  */
-static __inline uint32_t
+static inline uint32_t
 bbr_get_rtt(struct tcp_bbr *bbr, int32_t rtt_type)
 {
 	uint32_t f_rtt;
@@ -4375,7 +4370,7 @@ bbr_timeout_rack(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
 	return (0);
 }
 
-static __inline void
+static inline void
 bbr_clone_rsm(struct tcp_bbr *bbr, struct bbr_sendmap *nrsm, struct bbr_sendmap *rsm, uint32_t start)
 {
 	int idx;
@@ -5205,7 +5200,7 @@ bbr_process_timers(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts, uint8_t
 		left = bbr->r_ctl.rc_timer_exp - cts;
 		ret = -3;
 		bbr_log_to_processing(bbr, cts, ret, left, hpts_calling);
-		tcp_hpts_insert(tp, HPTS_USEC_TO_SLOTS(left));
+		tcp_hpts_insert(tp, left, NULL);
 		return (1);
 	}
 	bbr->rc_tmr_stopped = 0;
@@ -5254,7 +5249,7 @@ bbr_timer_cancel(struct tcp_bbr *bbr, int32_t line, uint32_t cts)
 				else
 					time_since_send = 0;
 				if (bbr->r_ctl.rc_last_delay_val > time_since_send) {
-					/* Cut down our slot time */
+					/* Cut down our pacing_delay time */
 					bbr->r_ctl.rc_last_delay_val -= time_since_send;
 				} else {
 					bbr->r_ctl.rc_last_delay_val = 0;
@@ -5888,7 +5883,7 @@ bbr_log_output(struct tcp_bbr *bbr, struct tcpcb *tp, struct tcpopt *to, int32_t
 	 * sequence 1 for 10 bytes. In such an example the r_start would be
 	 * 1 (starting sequence) but the r_end would be r_start+len i.e. 11.
 	 * This means that r_end is actually the first sequence for the next
-	 * slot (11).
+	 * pacing delay (11).
 	 *
 	 */
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
@@ -11856,7 +11851,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
 	struct bbr_sendmap *rsm = NULL;
 	int32_t tso, mtu;
 	struct tcpopt to;
-	int32_t slot = 0;
+	int32_t pacing_delay = 0;
 	struct inpcb *inp;
 	struct sockbuf *sb;
 	bool hpts_calling;
@@ -11986,8 +11981,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
 			delay_calc -= bbr->r_ctl.rc_last_delay_val;
 		else {
 			/*
-			 * We are early setup to adjust
-			 * our slot time.
+			 * We are early setup to adjust out pacing delay.
 			 */
 			uint64_t merged_val;
 
@@ -12104,7 +12098,7 @@ again:
 #endif
 	error = 0;
 	tso = 0;
-	slot = 0;
+	pacing_delay = 0;
 	mtu = 0;
 	sendwin = min(tp->snd_wnd, tp->snd_cwnd);
 	sb_offset = tp->snd_max - tp->snd_una;
@@ -12126,7 +12120,7 @@ recheck_resend:
 			tot_len = tp->t_maxseg;
 			if (hpts_calling)
 				/* Retry in a ms */
-				slot = 1001;
+				pacing_delay = 1001;
 			goto just_return_nolock;
 		}
 		TAILQ_INSERT_TAIL(&bbr->r_ctl.rc_free, rsm, r_next);
@@ -12699,9 +12693,9 @@ just_return:
 	SOCK_SENDBUF_UNLOCK(so);
 just_return_nolock:
 	if (tot_len)
-		slot = bbr_get_pacing_delay(bbr, bbr->r_ctl.rc_bbr_hptsi_gain, tot_len, cts, 0);
+		pacing_delay = bbr_get_pacing_delay(bbr, bbr->r_ctl.rc_bbr_hptsi_gain, tot_len, cts, 0);
 	if (bbr->rc_no_pacing)
-		slot = 0;
+		pacing_delay = 0;
 	if (tot_len == 0) {
 		if ((ctf_outstanding(tp) + min((bbr->r_ctl.rc_high_rwnd/2), bbr_minseg(bbr))) >=
 		    tp->snd_wnd) {
@@ -12751,7 +12745,7 @@ just_return_nolock:
 	/* Dont update the time if we did not send */
 	bbr->r_ctl.rc_last_delay_val = 0;
 	bbr->rc_output_starts_timer = 1;
-	bbr_start_hpts_timer(bbr, tp, cts, 9, slot, tot_len);
+	bbr_start_hpts_timer(bbr, tp, cts, 9, pacing_delay, tot_len);
 	bbr_log_type_just_return(bbr, cts, tot_len, hpts_calling, app_limited, p_maxseg, len);
 	if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
 		/* Make sure snd_nxt is drug up */
@@ -12787,7 +12781,7 @@ send:
 				flags &= ~TH_FIN;
 				if ((len == 0) && ((tp->t_flags & TF_ACKNOW) == 0)) {
 					/* Lets not send this */
-					slot = 0;
+					pacing_delay = 0;
 					goto just_return;
 				}
 			}
@@ -13053,7 +13047,7 @@ send:
 		/*
 		 * We have outstanding data, don't send a fin by itself!.
 		 */
-		slot = 0;
+		pacing_delay = 0;
 		goto just_return;
 	}
 	/*
@@ -13763,7 +13757,7 @@ nomore:
 				if (tp->snd_cwnd < maxseg)
 					tp->snd_cwnd = maxseg;
 			}
-			slot = (bbr_error_base_paceout + 1) << bbr->oerror_cnt;
+			pacing_delay = (bbr_error_base_paceout + 1) << bbr->oerror_cnt;
 			BBR_STAT_INC(bbr_saw_enobuf);
 			if (bbr->bbr_hdrw_pacing)
 				counter_u64_add(bbr_hdwr_pacing_enobuf, 1);
@@ -13812,18 +13806,18 @@ nomore:
 				}
 				/*
 				 * Nuke all other things that can interfere
-				 * with slot
+				 * with pacing delay
 				 */
 				if ((tot_len + len) && (len >= tp->t_maxseg)) {
-					slot = bbr_get_pacing_delay(bbr,
+					pacing_delay = bbr_get_pacing_delay(bbr,
 					    bbr->r_ctl.rc_bbr_hptsi_gain,
 					    (tot_len + len), cts, 0);
-					if (slot < bbr_error_base_paceout)
-						slot = (bbr_error_base_paceout + 2) << bbr->oerror_cnt;
+					if (pacing_delay < bbr_error_base_paceout)
+						pacing_delay = (bbr_error_base_paceout + 2) << bbr->oerror_cnt;
 				} else
-					slot = (bbr_error_base_paceout + 2) << bbr->oerror_cnt;
+					pacing_delay = (bbr_error_base_paceout + 2) << bbr->oerror_cnt;
 				bbr->rc_output_starts_timer = 1;
-				bbr_start_hpts_timer(bbr, tp, cts, 10, slot,
+				bbr_start_hpts_timer(bbr, tp, cts, 10, pacing_delay,
 				    tot_len);
 				return (error);
 			}
@@ -13841,9 +13835,9 @@ nomore:
 			}
 			/* FALLTHROUGH */
 		default:
-			slot = (bbr_error_base_paceout + 3) << bbr->oerror_cnt;
+			pacing_delay = (bbr_error_base_paceout + 3) << bbr->oerror_cnt;
 			bbr->rc_output_starts_timer = 1;
-			bbr_start_hpts_timer(bbr, tp, cts, 11, slot, 0);
+			bbr_start_hpts_timer(bbr, tp, cts, 11, pacing_delay, 0);
 			return (error);
 		}
 #ifdef STATS
@@ -13981,12 +13975,12 @@ skip_again:
 		tcp_log_end_status(tp, TCP_EI_STATUS_SERVER_RST);
 	if (((flags & (TH_RST | TH_SYN | TH_FIN)) == 0) && tot_len) {
 		/*
-		 * Calculate/Re-Calculate the hptsi slot in usecs based on
+		 * Calculate/Re-Calculate the hptsi timeout in usecs based on
 		 * what we have sent so far
 		 */
-		slot = bbr_get_pacing_delay(bbr, bbr->r_ctl.rc_bbr_hptsi_gain, tot_len, cts, 0);
+		pacing_delay = bbr_get_pacing_delay(bbr, bbr->r_ctl.rc_bbr_hptsi_gain, tot_len, cts, 0);
 		if (bbr->rc_no_pacing)
-			slot = 0;
+			pacing_delay = 0;
 	}
 	tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
 enobufs:
@@ -13999,8 +13993,8 @@ enobufs:
 	    (more_to_rxt ||
 	     ((bbr->r_ctl.rc_resend = bbr_check_recovery_mode(tp, bbr, cts)) != NULL))) {
 		/* Rack cheats and shotguns out all rxt's 1ms apart */
-		if (slot > 1000)
-			slot = 1000;
+		if (pacing_delay > 1000)
+			pacing_delay = 1000;
 	}
 	if (bbr->bbr_hdrw_pacing && (bbr->hw_pacing_set == 0)) {
 		/*
@@ -14014,7 +14008,7 @@ enobufs:
 			tcp_bbr_tso_size_check(bbr, cts);
 		}
 	}
-	bbr_start_hpts_timer(bbr, tp, cts, 12, slot, tot_len);
+	bbr_start_hpts_timer(bbr, tp, cts, 12, pacing_delay, tot_len);
 	if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
 		/* Make sure snd_nxt is drug up */
 		tp->snd_nxt = tp->snd_max;
@@ -14132,8 +14126,7 @@ bbr_switch_failed(struct tcpcb *tp)
 		}
 	} else
 		toval = HPTS_USECS_PER_SLOT;
-	(void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(toval),
-				   __LINE__, &diag);
+	tcp_hpts_insert(tp, toval, &diag);
 	bbr_log_hpts_diag(bbr, cts, &diag);
 }
 
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 11ef5ba706c5..9ed26d5a617b 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -204,10 +204,6 @@ static int32_t rack_dnd_default = 0;		/* For rr_conf = 3, what is the default fo
 static int32_t rack_rxt_controls = 0;
 static int32_t rack_fill_cw_state = 0;
 static uint8_t rack_req_measurements = 1;
-/* Attack threshold detections */
-static uint32_t rack_highest_sack_thresh_seen = 0;
-static uint32_t rack_highest_move_thresh_seen = 0;
-static uint32_t rack_merge_out_sacks_on_attack = 0;
 static int32_t rack_enable_hw_pacing = 0; /* Due to CCSP keep it off by default */
 static int32_t rack_hw_rate_caps = 0; /* 1; */
 static int32_t rack_hw_rate_cap_per = 0;	/* 0 -- off  */
@@ -223,7 +219,6 @@ static int32_t rack_default_pacing_divisor = 250;
 static uint16_t rack_pacing_min_seg = 0;
 static int32_t rack_timely_off = 0;
 
-static uint32_t sad_seg_size_per = 800;	/* 80.0 % */
 static int32_t rack_pkt_delay = 1000;
 static int32_t rack_send_a_lot_in_prr = 1;
 static int32_t rack_min_to = 1000;	/* Number of microsecond  min timeout */
@@ -250,11 +245,11 @@ static int32_t rack_non_rxt_use_cr = 0; /* does a non-rxt in recovery use the co
 static int32_t rack_persist_min = 250000;	/* 250usec */
 static int32_t rack_persist_max = 2000000;	/* 2 Second in usec's */
 static int32_t rack_honors_hpts_min_to =  1;	/* Do we honor the hpts minimum time out for pacing timers */
-static uint32_t rack_max_reduce = 10;		/* Percent we can reduce slot by */
+static uint32_t rack_max_reduce = 10;		/* Percent we can reduce pacing delay by */
 static int32_t rack_sack_not_required = 1;	/* set to one to allow non-sack to use rack */
 static int32_t rack_limit_time_with_srtt = 0;
 static int32_t rack_autosndbuf_inc = 20;	/* In percentage form */
-static int32_t rack_enobuf_hw_boost_mult = 0;	/* How many times the hw rate we boost slot using time_between */
+static int32_t rack_enobuf_hw_boost_mult = 0;	/* How many times the hw rate we boost pacing delay using time_between */
 static int32_t rack_enobuf_hw_max = 12000;	/* 12 ms in usecs */
 static int32_t rack_enobuf_hw_min = 10000;	/* 10 ms in usecs */
 static int32_t rack_hw_rwnd_factor = 2;		/* How many max_segs the rwnd must be before we hold off sending */
@@ -278,7 +273,7 @@ static int32_t rack_hptsi_segments = 40;
 static int32_t rack_rate_sample_method = USE_RTT_LOW;
 static int32_t rack_pace_every_seg = 0;
 static int32_t rack_delayed_ack_time = 40000;	/* 40ms in usecs */
-static int32_t rack_slot_reduction = 4;
+static int32_t rack_pacing_delay_reduction = 4;
 static int32_t rack_wma_divisor = 8;		/* For WMA calculation */
 static int32_t rack_cwnd_block_ends_measure = 0;
 static int32_t rack_rwnd_block_ends_measure = 0;
@@ -399,18 +394,6 @@ counter_u64_t rack_extended_rfo;
 counter_u64_t rack_sack_proc_all;
 counter_u64_t rack_sack_proc_short;
 counter_u64_t rack_sack_proc_restart;
-counter_u64_t rack_sack_attacks_detected;
-counter_u64_t rack_sack_attacks_reversed;
-counter_u64_t rack_sack_attacks_suspect;
-counter_u64_t rack_sack_used_next_merge;
-counter_u64_t rack_sack_splits;
-counter_u64_t rack_sack_used_prev_merge;
-counter_u64_t rack_sack_skipped_acked;
-counter_u64_t rack_ack_total;
-counter_u64_t rack_express_sack;
-counter_u64_t rack_sack_total;
-counter_u64_t rack_move_none;
-counter_u64_t rack_move_some;
 
 counter_u64_t rack_input_idle_reduces;
 counter_u64_t rack_collapsed_win;
@@ -478,7 +461,7 @@ rack_log_alt_to_to_cancel(struct tcp_rack *rack,
     uint16_t flex7, uint8_t mod);
 
 static void
-rack_log_pacing_delay_calc(struct tcp_rack *rack, uint32_t len, uint32_t slot,
+rack_log_pacing_delay_calc(struct tcp_rack *rack, uint32_t len, uint32_t pacing_delay,
    uint64_t bw_est, uint64_t bw, uint64_t len_time, int method, int line,
    struct rack_sendmap *rsm, uint8_t quality);
 static struct rack_sendmap *
@@ -834,18 +817,6 @@ sysctl_rack_clear(SYSCTL_HANDLER_ARGS)
 		counter_u64_zero(rack_rxt_clamps_cwnd_uniq);
 		counter_u64_zero(rack_multi_single_eq);
 		counter_u64_zero(rack_proc_non_comp_ack);
-		counter_u64_zero(rack_sack_attacks_detected);
-		counter_u64_zero(rack_sack_attacks_reversed);
-		counter_u64_zero(rack_sack_attacks_suspect);
-		counter_u64_zero(rack_sack_used_next_merge);
-		counter_u64_zero(rack_sack_used_prev_merge);
-		counter_u64_zero(rack_sack_splits);
-		counter_u64_zero(rack_sack_skipped_acked);
-		counter_u64_zero(rack_ack_total);
-		counter_u64_zero(rack_express_sack);
-		counter_u64_zero(rack_sack_total);
-		counter_u64_zero(rack_move_none);
-		counter_u64_zero(rack_move_some);
 		counter_u64_zero(rack_try_scwnd);
 		counter_u64_zero(rack_collapsed_win);
 		counter_u64_zero(rack_collapsed_win_rxt);
@@ -872,7 +843,6 @@ static void
 rack_init_sysctls(void)
 {
 	struct sysctl_oid *rack_counters;
-	struct sysctl_oid *rack_attack;
 	struct sysctl_oid *rack_pacing;
 	struct sysctl_oid *rack_timely;
 	struct sysctl_oid *rack_timers;
@@ -883,12 +853,6 @@ rack_init_sysctls(void)
 	struct sysctl_oid *rack_probertt;
 	struct sysctl_oid *rack_hw_pacing;
 
-	rack_attack = SYSCTL_ADD_NODE(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_sysctl_root),
-	    OID_AUTO,
-	    "sack_attack",
-	    CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
-	    "Rack Sack Attack Counters and Controls");
 	rack_counters = SYSCTL_ADD_NODE(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO,
@@ -1107,7 +1071,7 @@ rack_init_sysctls(void)
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_pacing),
 	    OID_AUTO, "burst_reduces", CTLFLAG_RW,
-	    &rack_slot_reduction, 4,
+	    &rack_pacing_delay_reduction, 4,
 	    "When doing only burst mitigation what is the reduce divisor");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
@@ -1399,7 +1363,7 @@ rack_init_sysctls(void)
 	    SYSCTL_CHILDREN(rack_timers),
 	    OID_AUTO, "hpts_max_reduce", CTLFLAG_RW,
 	    &rack_max_reduce, 10,
-	    "Max percentage we will reduce slot by for pacing when we are behind");
+	    "Max percentage we will reduce pacing delay by for pacing when we are behind");
 	SYSCTL_ADD_U32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_timers),
 	    OID_AUTO, "persmin", CTLFLAG_RW,
@@ -1535,11 +1499,6 @@ rack_init_sysctls(void)
 	    "Do not disturb default for rack_rrr = 3");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_misc),
-	    OID_AUTO, "sad_seg_per", CTLFLAG_RW,
-	    &sad_seg_size_per, 800,
-	    "Percentage of segment size needed in a sack 800 = 80.0?");
-	SYSCTL_ADD_S32(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_misc),
 	    OID_AUTO, "rxt_controls", CTLFLAG_RW,
 	    &rack_rxt_controls, 0,
 	    "Retransmit sending size controls (valid  values 0, 1, 2 default=1)?");
@@ -1619,85 +1578,6 @@ rack_init_sysctls(void)
 	    &rack_autosndbuf_inc, 20,
 	    "What percentage should rack scale up its snd buffer by?");
 
-
-	/* Sack Attacker detection stuff */
-	SYSCTL_ADD_U32(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_attack),
-	    OID_AUTO, "merge_out", CTLFLAG_RW,
-	    &rack_merge_out_sacks_on_attack, 0,
-	    "Do we merge the sendmap when we decide we are being attacked?");
-
-	SYSCTL_ADD_U32(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_attack),
-	    OID_AUTO, "detect_highsackratio", CTLFLAG_RW,
-	    &rack_highest_sack_thresh_seen, 0,
-	    "Highest sack to ack ratio seen");
-	SYSCTL_ADD_U32(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_attack),
-	    OID_AUTO, "detect_highmoveratio", CTLFLAG_RW,
-	    &rack_highest_move_thresh_seen, 0,
-	    "Highest move to non-move ratio seen");
-	rack_ack_total = counter_u64_alloc(M_WAITOK);
-	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_attack),
-	    OID_AUTO, "acktotal", CTLFLAG_RD,
-	    &rack_ack_total,
-	    "Total number of Ack's");
-	rack_express_sack = counter_u64_alloc(M_WAITOK);
-	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_attack),
-	    OID_AUTO, "exp_sacktotal", CTLFLAG_RD,
-	    &rack_express_sack,
-	    "Total expresss number of Sack's");
-	rack_sack_total = counter_u64_alloc(M_WAITOK);
-	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_attack),
-	    OID_AUTO, "sacktotal", CTLFLAG_RD,
-	    &rack_sack_total,
-	    "Total number of SACKs");
-	rack_move_none = counter_u64_alloc(M_WAITOK);
-	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_attack),
-	    OID_AUTO, "move_none", CTLFLAG_RD,
-	    &rack_move_none,
-	    "Total number of SACK index reuse of positions under threshold");
-	rack_move_some = counter_u64_alloc(M_WAITOK);
-	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_attack),
-	    OID_AUTO, "move_some", CTLFLAG_RD,
-	    &rack_move_some,
-	    "Total number of SACK index reuse of positions over threshold");
-	rack_sack_attacks_detected = counter_u64_alloc(M_WAITOK);
-	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_attack),
-	    OID_AUTO, "attacks", CTLFLAG_RD,
-	    &rack_sack_attacks_detected,
-	    "Total number of SACK attackers that had sack disabled");
-	rack_sack_attacks_reversed = counter_u64_alloc(M_WAITOK);
-	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_attack),
-	    OID_AUTO, "reversed", CTLFLAG_RD,
-	    &rack_sack_attacks_reversed,
-	    "Total number of SACK attackers that were later determined false positive");
-	rack_sack_attacks_suspect = counter_u64_alloc(M_WAITOK);
-	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_attack),
-	    OID_AUTO, "suspect", CTLFLAG_RD,
-	    &rack_sack_attacks_suspect,
-	    "Total number of SACKs that triggered early detection");
-
-	rack_sack_used_next_merge = counter_u64_alloc(M_WAITOK);
-	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_attack),
-	    OID_AUTO, "nextmerge", CTLFLAG_RD,
-	    &rack_sack_used_next_merge,
-	    "Total number of times we used the next merge");
-	rack_sack_used_prev_merge = counter_u64_alloc(M_WAITOK);
-	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_attack),
-	    OID_AUTO, "prevmerge", CTLFLAG_RD,
-	    &rack_sack_used_prev_merge,
-	    "Total number of times we used the prev merge");
 	/* Counters */
 	rack_total_bytes = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
@@ -1908,18 +1788,6 @@ rack_init_sysctls(void)
 	    OID_AUTO, "sack_short", CTLFLAG_RD,
 	    &rack_sack_proc_short,
 	    "Total times we took shortcut for sack processing");
-	rack_sack_skipped_acked = counter_u64_alloc(M_WAITOK);
-	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_attack),
-	    OID_AUTO, "skipacked", CTLFLAG_RD,
-	    &rack_sack_skipped_acked,
-	    "Total number of times we skipped previously sacked");
-	rack_sack_splits = counter_u64_alloc(M_WAITOK);
-	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_attack),
-	    OID_AUTO, "ofsplit", CTLFLAG_RD,
-	    &rack_sack_splits,
-	    "Total number of times we did the old fashion tree split");
 	rack_input_idle_reduces = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_counters),
@@ -2700,7 +2568,7 @@ rack_log_retran_reason(struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t
 }
 
 static void
-rack_log_to_start(struct tcp_rack *rack, uint32_t cts, uint32_t to, int32_t slot, uint8_t which)
+rack_log_to_start(struct tcp_rack *rack, uint32_t cts, uint32_t to, int32_t pacing_delay, uint8_t which)
 {
 	if (tcp_bblogging_on(rack->rc_tp)) {
 		union tcp_log_stackspecific log;
@@ -2710,7 +2578,7 @@ rack_log_to_start(struct tcp_rack *rack, uint32_t cts, uint32_t to, int32_t slot
 		log.u_bbr.flex1 = rack->rc_tp->t_srtt;
 		log.u_bbr.flex2 = to;
 		log.u_bbr.flex3 = rack->r_ctl.rc_hpts_flags;
-		log.u_bbr.flex4 = slot;
+		log.u_bbr.flex4 = pacing_delay;
 		log.u_bbr.flex5 = rack->rc_tp->t_hpts_slot;
 		log.u_bbr.flex6 = rack->rc_tp->t_rxtcur;
 		log.u_bbr.flex7 = rack->rc_in_persist;
@@ -3034,14 +2902,14 @@ rack_log_progress_event(struct tcp_rack *rack, struct tcpcb *tp, uint32_t tick,
 }
 
 static void
-rack_log_type_bbrsnd(struct tcp_rack *rack, uint32_t len, uint32_t slot, uint32_t cts, struct timeval *tv, int line)
+rack_log_type_bbrsnd(struct tcp_rack *rack, uint32_t len, uint32_t pacing_delay, uint32_t cts, struct timeval *tv, int line)
 {
 	if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
 		union tcp_log_stackspecific log;
 
 		memset(&log, 0, sizeof(log));
 		log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
-		log.u_bbr.flex1 = slot;
+		log.u_bbr.flex1 = pacing_delay;
 		if (rack->rack_no_prr)
 			log.u_bbr.flex2 = 0;
 		else
@@ -3139,7 +3007,7 @@ rack_log_type_pacing_sizes(struct tcpcb *tp, struct tcp_rack *rack, uint32_t arg
 }
 
 static void
-rack_log_type_just_return(struct tcp_rack *rack, uint32_t cts, uint32_t tlen, uint32_t slot,
+rack_log_type_just_return(struct tcp_rack *rack, uint32_t cts, uint32_t tlen, uint32_t pacing_delay,
 			  uint8_t hpts_calling, int reason, uint32_t cwnd_to_use)
 {
 	if (tcp_bblogging_on(rack->rc_tp)) {
@@ -3148,7 +3016,7 @@ rack_log_type_just_return(struct tcp_rack *rack, uint32_t cts, uint32_t tlen, ui
 
 		memset(&log, 0, sizeof(log));
 		log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
-		log.u_bbr.flex1 = slot;
+		log.u_bbr.flex1 = pacing_delay;
 		log.u_bbr.flex2 = rack->r_ctl.rc_hpts_flags;
 		log.u_bbr.flex4 = reason;
 		if (rack->rack_no_prr)
@@ -3319,16 +3187,6 @@ rack_counter_destroy(void)
 	counter_u64_free(rack_hw_pace_lost);
 	counter_u64_free(rack_non_fto_send);
 	counter_u64_free(rack_extended_rfo);
-	counter_u64_free(rack_ack_total);
-	counter_u64_free(rack_express_sack);
-	counter_u64_free(rack_sack_total);
-	counter_u64_free(rack_move_none);
-	counter_u64_free(rack_move_some);
-	counter_u64_free(rack_sack_attacks_detected);
-	counter_u64_free(rack_sack_attacks_reversed);
-	counter_u64_free(rack_sack_attacks_suspect);
-	counter_u64_free(rack_sack_used_next_merge);
-	counter_u64_free(rack_sack_used_prev_merge);
 	counter_u64_free(rack_tlp_tot);
 	counter_u64_free(rack_tlp_newdata);
 	counter_u64_free(rack_tlp_retran);
@@ -3351,8 +3209,6 @@ rack_counter_destroy(void)
 	counter_u64_free(rack_sack_proc_all);
 	counter_u64_free(rack_sack_proc_restart);
 	counter_u64_free(rack_sack_proc_short);
-	counter_u64_free(rack_sack_skipped_acked);
-	counter_u64_free(rack_sack_splits);
 	counter_u64_free(rack_input_idle_reduces);
 	counter_u64_free(rack_collapsed_win);
 	counter_u64_free(rack_collapsed_win_rxt);
@@ -4730,7 +4586,7 @@ rack_make_timely_judgement(struct tcp_rack *rack, uint32_t rtt, int32_t rtt_diff
 	return (timely_says);
 }
 
-static __inline int
+static inline int
 rack_in_gp_window(struct tcpcb *tp, struct rack_sendmap *rsm)
 {
 	if (SEQ_GEQ(rsm->r_start, tp->gput_seq) &&
@@ -4767,7 +4623,7 @@ rack_in_gp_window(struct tcpcb *tp, struct rack_sendmap *rsm)
 	return (0);
 }
 
-static __inline void
+static inline void
 rack_mark_in_gp_win(struct tcpcb *tp, struct rack_sendmap *rsm)
 {
 
@@ -4784,7 +4640,7 @@ rack_mark_in_gp_win(struct tcpcb *tp, struct rack_sendmap *rsm)
 		rsm->r_flags &= ~RACK_IN_GP_WIN;
 }
 
-static __inline void
+static inline void
 rack_clear_gp_marks(struct tcpcb *tp, struct tcp_rack *rack)
 {
 	/* A GP measurement is ending, clear all marks on the send map*/
@@ -4802,7 +4658,7 @@ rack_clear_gp_marks(struct tcpcb *tp, struct tcp_rack *rack)
 }
 
 
-static __inline void
+static inline void
 rack_tend_gp_marks(struct tcpcb *tp, struct tcp_rack *rack)
 {
 	struct rack_sendmap *rsm = NULL;
@@ -6482,7 +6338,7 @@ rack_log_hpts_diag(struct tcp_rack *rack, uint32_t cts,
 		log.u_bbr.flex2 = diag->p_cur_slot;
 		log.u_bbr.flex3 = diag->slot_req;
 		log.u_bbr.flex4 = diag->inp_hptsslot;
-		log.u_bbr.flex5 = diag->slot_remaining;
+		log.u_bbr.flex5 = diag->time_remaining;
 		log.u_bbr.flex6 = diag->need_new_to;
 		log.u_bbr.flex7 = diag->p_hpts_active;
 		log.u_bbr.flex8 = diag->p_on_min_sleep;
@@ -6497,9 +6353,6 @@ rack_log_hpts_diag(struct tcp_rack *rack, uint32_t cts,
 		log.u_bbr.rttProp = diag->wheel_cts;
 		log.u_bbr.timeStamp = cts;
 		log.u_bbr.delRate = diag->maxslots;
-		log.u_bbr.cur_del_rate = diag->p_curtick;
-		log.u_bbr.cur_del_rate <<= 32;
-		log.u_bbr.cur_del_rate |= diag->p_lasttick;
 		TCP_LOG_EVENTP(rack->rc_tp, NULL,
 		    &rack->rc_inp->inp_socket->so_rcv,
 		    &rack->rc_inp->inp_socket->so_snd,
@@ -6532,14 +6385,14 @@ rack_log_wakeup(struct tcpcb *tp, struct tcp_rack *rack, struct sockbuf *sb, uin
 
 static void
 rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
-      int32_t slot, uint32_t tot_len_this_send, int sup_rack)
+      int32_t usecs, uint32_t tot_len_this_send, int sup_rack)
 {
 	struct hpts_diag diag;
 	struct inpcb *inp = tptoinpcb(tp);
 	struct timeval tv;
 	uint32_t delayed_ack = 0;
 	uint32_t hpts_timeout;
-	uint32_t entry_slot = slot;
+	uint32_t entry_usecs = usecs;
 	uint8_t stopped;
 	uint32_t left = 0;
 	uint32_t us_cts;
@@ -6560,7 +6413,7 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 	rack->r_ctl.rc_hpts_flags = 0;
 	us_cts = tcp_get_usecs(&tv);
 	/* Now early/late accounting */
-	rack_log_pacing_delay_calc(rack, entry_slot, slot, 0, 0, 0, 26, __LINE__, NULL, 0);
+	rack_log_pacing_delay_calc(rack, entry_usecs, usecs, 0, 0, 0, 26, __LINE__, NULL, 0);
 	if (rack->r_early && (rack->rc_ack_can_sendout_data == 0)) {
 		/*
 		 * We have a early carry over set,
@@ -6571,7 +6424,7 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 		 * penalize the next timer for being awoke
 		 * by an ack aka the rc_agg_early (non-paced mode).
 		 */
-		slot += rack->r_ctl.rc_agg_early;
+		usecs += rack->r_ctl.rc_agg_early;
 		rack->r_early = 0;
 		rack->r_ctl.rc_agg_early = 0;
 	}
@@ -6583,29 +6436,29 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 		 * really depends on what
 		 * the current pacing time is.
 		 */
-		if (rack->r_ctl.rc_agg_delayed >= slot) {
+		if (rack->r_ctl.rc_agg_delayed >= usecs) {
 			/*
 			 * We can't compensate for it all.
 			 * And we have to have some time
 			 * on the clock. We always have a min
-			 * 10 slots (10 x 10 i.e. 100 usecs).
+			 * 10 HPTS timer units (10 x 10 i.e. 100 usecs).
 			 */
-			if (slot <= HPTS_USECS_PER_SLOT) {
+			if (usecs <= HPTS_USECS_PER_SLOT) {
 				/* We gain delay */
-				rack->r_ctl.rc_agg_delayed += (HPTS_USECS_PER_SLOT - slot);
-				slot = HPTS_USECS_PER_SLOT;
+				rack->r_ctl.rc_agg_delayed += (HPTS_USECS_PER_SLOT - usecs);
+				usecs = HPTS_USECS_PER_SLOT;
 			} else {
 				/* We take off some */
-				rack->r_ctl.rc_agg_delayed -= (slot - HPTS_USECS_PER_SLOT);
-				slot = HPTS_USECS_PER_SLOT;
+				rack->r_ctl.rc_agg_delayed -= (usecs - HPTS_USECS_PER_SLOT);
+				usecs = HPTS_USECS_PER_SLOT;
 			}
 		} else {
-			slot -= rack->r_ctl.rc_agg_delayed;
+			usecs -= rack->r_ctl.rc_agg_delayed;
 			rack->r_ctl.rc_agg_delayed = 0;
 			/* Make sure we have 100 useconds at minimum */
-			if (slot < HPTS_USECS_PER_SLOT) {
-				rack->r_ctl.rc_agg_delayed = HPTS_USECS_PER_SLOT - slot;
-				slot = HPTS_USECS_PER_SLOT;
+			if (usecs < HPTS_USECS_PER_SLOT) {
+				rack->r_ctl.rc_agg_delayed = HPTS_USECS_PER_SLOT - usecs;
+				usecs = HPTS_USECS_PER_SLOT;
 			}
 			if (rack->r_ctl.rc_agg_delayed == 0)
 				rack->r_late = 0;
@@ -6614,17 +6467,17 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 		/* r_use_hpts_min is on and so is DGP */
 		uint32_t max_red;
 
-		max_red = (slot * rack->r_ctl.max_reduction) / 100;
+		max_red = (usecs * rack->r_ctl.max_reduction) / 100;
 		if (max_red >= rack->r_ctl.rc_agg_delayed) {
-			slot -= rack->r_ctl.rc_agg_delayed;
+			usecs -= rack->r_ctl.rc_agg_delayed;
 			rack->r_ctl.rc_agg_delayed = 0;
 		} else {
-			slot -= max_red;
+			usecs -= max_red;
 			rack->r_ctl.rc_agg_delayed -= max_red;
 		}
 	}
 	if ((rack->r_use_hpts_min == 1) &&
-	    (slot > 0) &&
+	    (usecs > 0) &&
 	    (rack->dgp_on == 1)) {
 		/*
 		 * We are enforcing a min pacing timer
@@ -6633,8 +6486,8 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 		uint32_t min;
 
 		min = get_hpts_min_sleep_time();
-		if (min > slot) {
-			slot = min;
+		if (min > usecs) {
+			usecs = min;
 		}
 	}
 	hpts_timeout = rack_timer_start(tp, rack, cts, sup_rack);
@@ -6652,7 +6505,7 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 	 * wheel, we resort to a keep-alive timer if its configured.
 	 */
 	if ((hpts_timeout == 0) &&
-	    (slot == 0)) {
+	    (usecs == 0)) {
 		if ((V_tcp_always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
 		    (tp->t_state <= TCPS_CLOSING)) {
 			/*
@@ -6709,10 +6562,10 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 			hpts_timeout = 0x7ffffffe;
 		rack->r_ctl.rc_timer_exp = cts + hpts_timeout;
 	}
-	rack_log_pacing_delay_calc(rack, entry_slot, slot, hpts_timeout, 0, 0, 27, __LINE__, NULL, 0);
+	rack_log_pacing_delay_calc(rack, entry_usecs, usecs, hpts_timeout, 0, 0, 27, __LINE__, NULL, 0);
 	if ((rack->gp_ready == 0) &&
 	    (rack->use_fixed_rate == 0) &&
-	    (hpts_timeout < slot) &&
+	    (hpts_timeout < usecs) &&
 	    (rack->r_ctl.rc_hpts_flags & (PACE_TMR_TLP|PACE_TMR_RXT))) {
 		/*
 		 * We have no good estimate yet for the
@@ -6722,7 +6575,7 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 		 * pace that long since we know the calculation
 		 * so far is not accurate.
 		 */
-		slot = hpts_timeout;
+		usecs = hpts_timeout;
 	}
 	/**
 	 * Turn off all the flags for queuing by default. The
@@ -6754,11 +6607,11 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 	 * so LRO can call into us.
 	 */
 	tp->t_flags2 &= ~(TF2_DONT_SACK_QUEUE|TF2_MBUF_QUEUE_READY);
-	if (slot) {
+	if (usecs) {
 		rack->r_ctl.rc_hpts_flags |= PACE_PKT_OUTPUT;
-		rack->r_ctl.rc_last_output_to = us_cts + slot;
+		rack->r_ctl.rc_last_output_to = us_cts + usecs;
 		/*
-		 * A pacing timer (slot) is being set, in
+		 * A pacing timer (usecs microseconds) is being set, in
 		 * such a case we cannot send (we are blocked by
 		 * the timer). So lets tell LRO that it should not
 		 * wake us unless there is a SACK. Note this only
@@ -6799,20 +6652,18 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 		}
 		if ((rack->use_rack_rr) &&
 		    (rack->r_rr_config < 2) &&
-		    ((hpts_timeout) && (hpts_timeout < slot))) {
+		    ((hpts_timeout) && (hpts_timeout < usecs))) {
 			/*
 			 * Arrange for the hpts to kick back in after the
 			 * t-o if the t-o does not cause a send.
 			 */
-			(void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(hpts_timeout),
-						   __LINE__, &diag);
+			tcp_hpts_insert(tp, hpts_timeout, &diag);
 			rack_log_hpts_diag(rack, us_cts, &diag, &tv);
-			rack_log_to_start(rack, cts, hpts_timeout, slot, 0);
+			rack_log_to_start(rack, cts, hpts_timeout, usecs, 0);
 		} else {
-			(void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(slot),
-						   __LINE__, &diag);
+			tcp_hpts_insert(tp, usecs, &diag);
 			rack_log_hpts_diag(rack, us_cts, &diag, &tv);
-			rack_log_to_start(rack, cts, hpts_timeout, slot, 1);
+			rack_log_to_start(rack, cts, hpts_timeout, usecs, 1);
 		}
 	} else if (hpts_timeout) {
 		/*
@@ -6824,22 +6675,21 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 		 * at the start of this block) are good enough.
 		 */
 		rack->r_ctl.rc_hpts_flags &= ~PACE_PKT_OUTPUT;
-		(void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(hpts_timeout),
-					   __LINE__, &diag);
+		tcp_hpts_insert(tp, hpts_timeout, &diag);
 		rack_log_hpts_diag(rack, us_cts, &diag, &tv);
-		rack_log_to_start(rack, cts, hpts_timeout, slot, 0);
+		rack_log_to_start(rack, cts, hpts_timeout, usecs, 0);
 	} else {
 		/* No timer starting */
 #ifdef INVARIANTS
 		if (SEQ_GT(tp->snd_max, tp->snd_una)) {
-			panic("tp:%p rack:%p tlts:%d cts:%u slot:%u pto:%u -- no timer started?",
-			    tp, rack, tot_len_this_send, cts, slot, hpts_timeout);
+			panic("tp:%p rack:%p tlts:%d cts:%u usecs:%u pto:%u -- no timer started?",
+			    tp, rack, tot_len_this_send, cts, usecs, hpts_timeout);
 		}
 #endif
 	}
 	rack->rc_tmr_stopped = 0;
-	if (slot)
-		rack_log_type_bbrsnd(rack, tot_len_this_send, slot, us_cts, &tv, __LINE__);
+	if (usecs)
+		rack_log_type_bbrsnd(rack, tot_len_this_send, usecs, us_cts, &tv, __LINE__);
 }
 
 static void
@@ -6870,6 +6720,18 @@ rack_mark_lost(struct tcpcb *tp,
 	}
 }
 
+static inline void
+rack_mark_nolonger_lost(struct tcp_rack *rack, struct rack_sendmap *rsm)
+{
+	KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)),
+		("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack));
+	rsm->r_flags &= ~RACK_WAS_LOST;
+	if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start))
+		rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start;
+	else
+		rack->r_ctl.rc_considered_lost = 0;
+}
+
 /*
  * RACK Timer, here we simply do logging and house keeping.
  * the normal rack_output() function will call the
@@ -7011,7 +6873,7 @@ rack_setup_offset_for_rsm(struct tcp_rack *rack, struct rack_sendmap *src_rsm, s
 	rsm->orig_t_space = M_TRAILINGROOM(rsm->m);
 }
 
-static __inline void
+static inline void
 rack_clone_rsm(struct tcp_rack *rack, struct rack_sendmap *nrsm,
 	       struct rack_sendmap *rsm, uint32_t start)
 {
@@ -8016,7 +7878,7 @@ rack_process_timers(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, uint8
 		rack->rc_tp->t_flags2 &= ~TF2_DONT_SACK_QUEUE;
 		ret = -3;
 		left = rack->r_ctl.rc_timer_exp - cts;
-		tcp_hpts_insert(tp, HPTS_MS_TO_SLOTS(left));
+		tcp_hpts_insert(tp, left, NULL);
 		rack_log_to_processing(rack, cts, ret, left);
 		return (1);
 	}
@@ -8136,13 +7998,7 @@ rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack,
 		 * remove the lost desgination and reduce the
 		 * bytes considered lost.
 		 */
-		rsm->r_flags &= ~RACK_WAS_LOST;
-		KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)),
-			("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack));
-		if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start))
-			rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start;
-		else
-			rack->r_ctl.rc_considered_lost = 0;
+		rack_mark_nolonger_lost(rack, rsm);
 	}
 	idx = rsm->r_rtr_cnt - 1;
 	rsm->r_tim_lastsent[idx] = ts;
@@ -9498,6 +9354,11 @@ do_rest_ofb:
 				if (rsm->r_flags & RACK_WAS_LOST) {
 					int my_chg;
 
+					/*
+					 * Note here we do not use our rack_mark_nolonger_lost() function
+					 * since we are moving our data pointer around and the
+					 * ack'ed side is already not considered lost.
+					 */
 					my_chg = (nrsm->r_end - nrsm->r_start);
 					KASSERT((rack->r_ctl.rc_considered_lost >= my_chg),
 						("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack));
@@ -9537,7 +9398,6 @@ do_rest_ofb:
 					goto out;
 				}
 				rack_log_map_chg(tp, rack, &stack_map, rsm, next, MAP_SACK_M1, end, __LINE__);
-				counter_u64_add(rack_sack_used_next_merge, 1);
 				/* Postion for the next block */
 				start = next->r_end;
 				rsm = tqhash_next(rack->r_ctl.tqh, next);
@@ -9569,7 +9429,6 @@ do_rest_ofb:
 					 */
 					goto out;
 				}
-				counter_u64_add(rack_sack_splits, 1);
 				rack_clone_rsm(rack, nrsm, rsm, start);
 				rsm->r_just_ret = 0;
 #ifndef INVARIANTS
@@ -9591,7 +9450,6 @@ do_rest_ofb:
 			}
 		} else {
 			/* Already sacked this piece */
-			counter_u64_add(rack_sack_skipped_acked, 1);
 			if (end == rsm->r_end) {
 				/* Done with block */
 				rsm = tqhash_next(rack->r_ctl.tqh, rsm);
@@ -9665,16 +9523,11 @@ do_rest_ofb:
 			changed += (rsm->r_end - rsm->r_start);
 			/* You get a count for acking a whole segment or more */
 			if (rsm->r_flags & RACK_WAS_LOST) {
-				int my_chg;
-
-				my_chg = (rsm->r_end - rsm->r_start);
-				rsm->r_flags &= ~RACK_WAS_LOST;
-				KASSERT((rack->r_ctl.rc_considered_lost >= my_chg),
-					("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack));
-				if (my_chg <= rack->r_ctl.rc_considered_lost)
-					rack->r_ctl.rc_considered_lost -= my_chg;
-				else
-					rack->r_ctl.rc_considered_lost = 0;
+				/*
+				 * Here we can use the inline function since
+				 * the rsm is truly marked lost and now no longer lost.
+				 */
+				rack_mark_nolonger_lost(rack, rsm);
 			}
 			rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start);
 			if (rsm->r_in_tmap) /* should be true */
@@ -9696,8 +9549,6 @@ do_rest_ofb:
 				rsm->r_in_tmap = 0;
 			}
 			rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_SACK_M3, end, __LINE__);
-		} else {
-			counter_u64_add(rack_sack_skipped_acked, 1);
 		}
 		if (end == rsm->r_end) {
 			/* This block only - done, setup for next */
@@ -9857,6 +9708,10 @@ do_rest_ofb:
 			if (rsm->r_flags & RACK_WAS_LOST) {
 				int my_chg;
 
+				/*
+				 * Note here we are using hookery again so we can't
+				 * use our rack_mark_nolonger_lost() function.
+				 */
 				my_chg = (nrsm->r_end - nrsm->r_start);
 				KASSERT((rack->r_ctl.rc_considered_lost >= my_chg),
 					("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack));
@@ -9872,7 +9727,6 @@ do_rest_ofb:
 			}
 			rack_log_map_chg(tp, rack, prev, &stack_map, rsm, MAP_SACK_M4, end, __LINE__);
 			rsm = prev;
-			counter_u64_add(rack_sack_used_prev_merge, 1);
 		} else {
 			/**
 			 * This is the case where our previous
@@ -9937,7 +9791,6 @@ do_rest_ofb:
 			 * rsm      |---|         (acked)
 			 * nrsm         |------|  (not acked)
 			 */
-			counter_u64_add(rack_sack_splits, 1);
 			rack_clone_rsm(rack, nrsm, rsm, end);
 			rsm->r_flags &= (~RACK_HAS_FIN);
 			rsm->r_just_ret = 0;
@@ -9958,16 +9811,10 @@ do_rest_ofb:
 			rack_update_rtt(tp, rack, rsm, to, cts, SACKED, 0);
 			changed += (rsm->r_end - rsm->r_start);
 			if (rsm->r_flags & RACK_WAS_LOST) {
-				int my_chg;
-
-				my_chg = (rsm->r_end - rsm->r_start);
-				rsm->r_flags &= ~RACK_WAS_LOST;
-				KASSERT((rack->r_ctl.rc_considered_lost >= my_chg),
-					("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack));
-				if (my_chg <= rack->r_ctl.rc_considered_lost)
-					rack->r_ctl.rc_considered_lost -= my_chg;
-				else
-					rack->r_ctl.rc_considered_lost = 0;
+				/*
+				 * Here it is safe to use our function.
+				 */
+				rack_mark_nolonger_lost(rack, rsm);
 			}
 			rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start);
 
@@ -9991,11 +9838,6 @@ do_rest_ofb:
 				rsm->r_in_tmap = 0;
 			}
 		}
-	} else if (start != end){
-		/*
-		 * The block was already acked.
-		 */
-		counter_u64_add(rack_sack_skipped_acked, 1);
 	}
 out:
 	if (rsm &&
@@ -10368,13 +10210,7 @@ more:
 			 * and yet before retransmitting we get an ack
 			 * which can happen due to reordering.
 			 */
-			rsm->r_flags &= ~RACK_WAS_LOST;
-			KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)),
-				("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack));
-			if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start))
-				rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start;
-			else
-				rack->r_ctl.rc_considered_lost = 0;
+			rack_mark_nolonger_lost(rack, rsm);
 		}
 		rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_FREE, rsm->r_end, __LINE__);
 		rack->r_ctl.rc_holes_rxt -= rsm->r_rtr_bytes;
@@ -10482,12 +10318,7 @@ more:
 		 * which can happen due to reordering. In this
 		 * case its only a partial ack of the send.
 		 */
-		KASSERT((rack->r_ctl.rc_considered_lost >= (th_ack - rsm->r_start)),
-			("rsm:%p rack:%p rc_considered_lost goes negative th_ack:%u", rsm,  rack, th_ack));
-		if (rack->r_ctl.rc_considered_lost >= (th_ack - rsm->r_start))
-			rack->r_ctl.rc_considered_lost -= th_ack - rsm->r_start;
-		else
-			rack->r_ctl.rc_considered_lost = 0;
+		rack_mark_nolonger_lost(rack, rsm);
 	}
 	/*
 	 * Clear the dup ack count for
@@ -10799,17 +10630,6 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered
 	changed = 0;
 	th_ack = th->th_ack;
 	segsiz = ctf_fixed_maxseg(rack->rc_tp);
-	if (BYTES_THIS_ACK(tp, th) >=  segsiz) {
-		/*
-		 * You only get credit for
-		 * MSS and greater (and you get extra
-		 * credit for larger cum-ack moves).
-		 */
-		int ac;
-
-		ac = BYTES_THIS_ACK(tp, th) / ctf_fixed_maxseg(rack->rc_tp);
-		counter_u64_add(rack_ack_total, ac);
-	}
 	if (SEQ_GT(th_ack, tp->snd_una)) {
 		rack_log_progress_event(rack, tp, ticks, PROGRESS_UPDATE, __LINE__);
 		tp->t_acktime = ticks;
@@ -10881,8 +10701,8 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered
 	if (sacks_seen != NULL)
 		*sacks_seen = num_sack_blks;
 	if (num_sack_blks == 0) {
-		/* Nothing to sack, but we need to update counts */
-		goto out_with_totals;
+		/* Nothing to sack */
+		goto out;
 	}
 	/* Its a sack of some sort */
 	if (num_sack_blks < 2) {
@@ -10905,7 +10725,7 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered
 	 */
 again:
 	if (num_sack_blks == 0)
-		goto out_with_totals;
+		goto out;
 	if (num_sack_blks > 1) {
 		for (i = 0; i < num_sack_blks; i++) {
 			for (j = i + 1; j < num_sack_blks; j++) {
@@ -10958,19 +10778,7 @@ do_sack_work:
 			changed += acked;
 		}
 		if (num_sack_blks == 1) {
-			/*
-			 * This is what we would expect from
-			 * a normal implementation to happen
-			 * after we have retransmitted the FR,
-			 * i.e the sack-filter pushes down
-			 * to 1 block and the next to be retransmitted
-			 * is the sequence in the sack block (has more
-			 * are acked). Count this as ACK'd data to boost
-			 * up the chances of recovering any false positives.
-			 */
-			counter_u64_add(rack_ack_total, (acked / ctf_fixed_maxseg(rack->rc_tp)));
-			counter_u64_add(rack_express_sack, 1);
-			goto out_with_totals;
+			goto out;
 		} else {
 			/*
 			 * Start the loop through the
@@ -10979,7 +10787,6 @@ do_sack_work:
 			loop_start = 1;
 		}
 	}
-	counter_u64_add(rack_sack_total, 1);
 	rsm = rack->r_ctl.rc_sacklast;
 	for (i = loop_start; i < num_sack_blks; i++) {
 		acked = rack_proc_sack_blk(tp, rack, &sack_blocks[i], to, &rsm, cts,  segsiz);
@@ -10988,18 +10795,6 @@ do_sack_work:
 			changed += acked;
 		}
 	}
-out_with_totals:
-	if (num_sack_blks > 1) {
-		/*
-		 * You get an extra stroke if
-		 * you have more than one sack-blk, this
-		 * could be where we are skipping forward
-		 * and the sack-filter is still working, or
-		 * it could be an attacker constantly
-		 * moving us.
-		 */
-		counter_u64_add(rack_move_some, 1);
-	}
 out:
 	if (changed) {
 		/* Something changed cancel the rack timer */
@@ -14377,8 +14172,7 @@ rack_switch_failed(struct tcpcb *tp)
 		}
 	} else
 		toval = HPTS_USECS_PER_SLOT;
-	(void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(toval),
-				   __LINE__, &diag);
+	tcp_hpts_insert(tp, toval, &diag);
 	rack_log_hpts_diag(rack, cts, &diag, &tv);
 }
 
@@ -14720,7 +14514,6 @@ rack_init(struct tcpcb *tp, void **ptr)
 	rack->r_ctl.rc_prr_sendalot = rack_send_a_lot_in_prr;
 	rack->r_ctl.rc_min_to = rack_min_to;
 	microuptime(&rack->r_ctl.act_rcv_time);
-	rack->r_ctl.rc_last_time_decay = rack->r_ctl.act_rcv_time;
 	rack->r_ctl.rack_per_of_gp_ss = rack_per_of_gp_ss;
 	if (rack_hw_up_only)
 		rack->r_up_only = 1;
@@ -14973,8 +14766,7 @@ rack_init(struct tcpcb *tp, void **ptr)
 				if (tov) {
 					struct hpts_diag diag;
 
-					(void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(tov),
-								   __LINE__, &diag);
+					tcp_hpts_insert(tp, tov, &diag);
 					rack_log_hpts_diag(rack, us_cts, &diag, &rack->r_ctl.act_rcv_time);
 				}
 			}
@@ -16367,7 +16159,7 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 	struct rack_sendmap *rsm;
 	int32_t prev_state = 0;
 	int no_output = 0;
-	int slot_remaining = 0;
+	int time_remaining = 0;
 #ifdef TCP_ACCOUNTING
 	int ack_val_set = 0xf;
 #endif
@@ -16416,7 +16208,7 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 		 * could be, if a sack is present, we want to be awoken and
 		 * so should process the packets.
 		 */
-		slot_remaining = rack->r_ctl.rc_last_output_to - us_cts;
+		time_remaining = rack->r_ctl.rc_last_output_to - us_cts;
 		if (rack->rc_tp->t_flags2 & TF2_DONT_SACK_QUEUE) {
 			no_output = 1;
 		} else {
@@ -16436,7 +16228,7 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 			     (*ts_ptr == TCP_LRO_TS_OPTION)))
 				no_output = 1;
 		}
-		if ((no_output == 1) && (slot_remaining < tcp_min_hptsi_time)) {
+		if ((no_output == 1) && (time_remaining < tcp_min_hptsi_time)) {
 			/*
 			 * It is unrealistic to think we can pace in less than
 			 * the minimum granularity of the pacer (def:250usec). So
@@ -16919,10 +16711,10 @@ do_output_now:
 			   (tcp_in_hpts(rack->rc_tp) == 0)) {
 			/*
 			 * We are not in hpts and we had a pacing timer up. Use
-			 * the remaining time (slot_remaining) to restart the timer.
+			 * the remaining time (time_remaining) to restart the timer.
 			 */
-			KASSERT ((slot_remaining != 0), ("slot remaining is zero for rack:%p tp:%p", rack, tp));
-			rack_start_hpts_timer(rack, tp, cts, slot_remaining, 0, 0);
+			KASSERT ((time_remaining != 0), ("slot remaining is zero for rack:%p tp:%p", rack, tp));
+			rack_start_hpts_timer(rack, tp, cts, time_remaining, 0, 0);
 			rack_free_trim(rack);
 		}
 		/* Clear the flag, it may have been cleared by output but we may not have  */
@@ -17102,7 +16894,7 @@ check_it:
 }
 
 static void
-rack_log_pacing_delay_calc (struct tcp_rack *rack, uint32_t len, uint32_t slot,
+rack_log_pacing_delay_calc (struct tcp_rack *rack, uint32_t len, uint32_t pacing_delay,
 			   uint64_t bw_est, uint64_t bw, uint64_t len_time, int method,
 			   int line, struct rack_sendmap *rsm, uint8_t quality)
 {
@@ -17125,7 +16917,7 @@ rack_log_pacing_delay_calc (struct tcp_rack *rack, uint32_t len, uint32_t slot,
 			}
 		}
 		memset(&log, 0, sizeof(log));
-		log.u_bbr.flex1 = slot;
+		log.u_bbr.flex1 = pacing_delay;
 		log.u_bbr.flex2 = len;
 		log.u_bbr.flex3 = rack->r_ctl.rc_pace_min_segs;
 		log.u_bbr.flex4 = rack->r_ctl.rc_pace_max_segs;
@@ -17284,25 +17076,25 @@ rack_arrive_at_discounted_rate(struct tcp_rack *rack, uint64_t window_input, uin
 }
 
 static int32_t
-pace_to_fill_cwnd(struct tcp_rack *rack, int32_t slot, uint32_t len, uint32_t segsiz, int *capped, uint64_t *rate_wanted, uint8_t non_paced)
+pace_to_fill_cwnd(struct tcp_rack *rack, int32_t pacing_delay, uint32_t len, uint32_t segsiz, int *capped, uint64_t *rate_wanted, uint8_t non_paced)
 {
 	uint64_t lentim, fill_bw;
 
 	rack->r_via_fill_cw = 0;
 	if (ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked) > rack->r_ctl.cwnd_to_use)
-		return (slot);
+		return (pacing_delay);
 	if ((ctf_outstanding(rack->rc_tp) + (segsiz-1)) > rack->rc_tp->snd_wnd)
-		return (slot);
+		return (pacing_delay);
 	if (rack->r_ctl.rc_last_us_rtt == 0)
-		return (slot);
+		return (pacing_delay);
 	if (rack->rc_pace_fill_if_rttin_range &&
 	    (rack->r_ctl.rc_last_us_rtt >=
 	     (get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt) * rack->rtt_limit_mul))) {
 		/* The rtt is huge, N * smallest, lets not fill */
-		return (slot);
+		return (pacing_delay);
 	}
 	if (rack->r_ctl.fillcw_cap && *rate_wanted >= rack->r_ctl.fillcw_cap)
-		return (slot);
+		return (pacing_delay);
 	/*
 	 * first lets calculate the b/w based on the last us-rtt
 	 * and the the smallest send window.
@@ -17368,7 +17160,7 @@ at_lt_bw:
 	if (non_paced)
 		*rate_wanted = fill_bw;
 	if ((fill_bw < RACK_MIN_BW) || (fill_bw < *rate_wanted))
-		return (slot);
+		return (pacing_delay);
 	rack->r_via_fill_cw = 1;
 	if (rack->r_rack_hw_rate_caps &&
 	    (rack->r_ctl.crte != NULL)) {
@@ -17423,19 +17215,19 @@ at_lt_bw:
 	lentim = (uint64_t)(len) * (uint64_t)HPTS_USEC_IN_SEC;
 	lentim /= fill_bw;
 	*rate_wanted = fill_bw;
-	if (non_paced || (lentim < slot)) {
-		rack_log_pacing_delay_calc(rack, len, slot, fill_bw,
+	if (non_paced || (lentim < pacing_delay)) {
+		rack_log_pacing_delay_calc(rack, len, pacing_delay, fill_bw,
 					   0, lentim, 12, __LINE__, NULL, 0);
 		return ((int32_t)lentim);
 	} else
-		return (slot);
+		return (pacing_delay);
 }
 
 static int32_t
 rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, struct rack_sendmap *rsm, uint32_t segsiz, int line)
 {
 	uint64_t srtt;
-	int32_t slot = 0;
+	int32_t pacing_delay = 0;
 	int can_start_hw_pacing = 1;
 	int err;
 	int pace_one;
@@ -17483,25 +17275,25 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
 		 * cwnd. Which in that case we are just waiting for
 		 * a ACK.
 		 */
-		slot = len / tr_perms;
+		pacing_delay = len / tr_perms;
 		/* Now do we reduce the time so we don't run dry? */
-		if (slot && rack_slot_reduction) {
-			reduce = (slot / rack_slot_reduction);
-			if (reduce < slot) {
-				slot -= reduce;
+		if (pacing_delay && rack_pacing_delay_reduction) {
+			reduce = (pacing_delay / rack_pacing_delay_reduction);
+			if (reduce < pacing_delay) {
+				pacing_delay -= reduce;
 			} else
-				slot = 0;
+				pacing_delay = 0;
 		} else
 			reduce = 0;
-		slot *= HPTS_USEC_IN_MSEC;
+		pacing_delay *= HPTS_USEC_IN_MSEC;
 		if (rack->rc_pace_to_cwnd) {
 			uint64_t rate_wanted = 0;
 
-			slot = pace_to_fill_cwnd(rack, slot, len, segsiz, NULL, &rate_wanted, 1);
+			pacing_delay = pace_to_fill_cwnd(rack, pacing_delay, len, segsiz, NULL, &rate_wanted, 1);
 			rack->rc_ack_can_sendout_data = 1;
-			rack_log_pacing_delay_calc(rack, len, slot, rate_wanted, 0, 0, 14, __LINE__, NULL, 0);
+			rack_log_pacing_delay_calc(rack, len, pacing_delay, rate_wanted, 0, 0, 14, __LINE__, NULL, 0);
 		} else
-			rack_log_pacing_delay_calc(rack, len, slot, tr_perms, reduce, 0, 7, __LINE__, NULL, 0);
+			rack_log_pacing_delay_calc(rack, len, pacing_delay, tr_perms, reduce, 0, 7, __LINE__, NULL, 0);
 		/*******************************************************/
 		/* RRS: We insert non-paced call to stats here for len */
 		/*******************************************************/
@@ -17575,7 +17367,7 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
 		segs *= oh;
 		lentim = (uint64_t)(len + segs) * (uint64_t)HPTS_USEC_IN_SEC;
 		res = lentim / rate_wanted;
-		slot = (uint32_t)res;
+		pacing_delay = (uint32_t)res;
 		if (rack_hw_rate_min &&
 		    (rate_wanted < rack_hw_rate_min)) {
 			can_start_hw_pacing = 0;
@@ -17635,7 +17427,7 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
 			 * We want to pace at our rate *or* faster to
 			 * fill the cwnd to the max if its not full.
 			 */
-			slot = pace_to_fill_cwnd(rack, slot, (len+segs), segsiz, &capped, &rate_wanted, 0);
+			pacing_delay = pace_to_fill_cwnd(rack, pacing_delay, (len+segs), segsiz, &capped, &rate_wanted, 0);
 			/* Re-check to make sure we are not exceeding our max b/w */
 			if ((rack->r_ctl.crte != NULL) &&
 			    (tcp_hw_highest_rate(rack->r_ctl.crte) < rate_wanted)) {
@@ -17786,15 +17578,15 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
 				srtt = rack->rc_tp->t_srtt;
 			else
 				srtt = RACK_INITIAL_RTO * HPTS_USEC_IN_MSEC;	/* its in ms convert */
-			if (srtt < (uint64_t)slot) {
-				rack_log_pacing_delay_calc(rack, srtt, slot, rate_wanted, bw_est, lentim, 99, __LINE__, NULL, 0);
-				slot = srtt;
+			if (srtt < (uint64_t)pacing_delay) {
+				rack_log_pacing_delay_calc(rack, srtt, pacing_delay, rate_wanted, bw_est, lentim, 99, __LINE__, NULL, 0);
+				pacing_delay = srtt;
 			}
 		}
 		/*******************************************************************/
 		/* RRS: We insert paced call to stats here for len and rate_wanted */
 		/*******************************************************************/
-		rack_log_pacing_delay_calc(rack, len, slot, rate_wanted, bw_est, lentim, 2, __LINE__, rsm, 0);
+		rack_log_pacing_delay_calc(rack, len, pacing_delay, rate_wanted, bw_est, lentim, 2, __LINE__, rsm, 0);
 	}
 	if (rack->r_ctl.crte && (rack->r_ctl.crte->rs_num_enobufs > 0)) {
 		/*
@@ -17811,9 +17603,9 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
 			hw_boost_delay = rack_enobuf_hw_max;
 		else if (hw_boost_delay < rack_enobuf_hw_min)
 			hw_boost_delay = rack_enobuf_hw_min;
-		slot += hw_boost_delay;
+		pacing_delay += hw_boost_delay;
 	}
-	return (slot);
+	return (pacing_delay);
 }
 
 static void
@@ -18482,7 +18274,7 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
 	struct tcpopt to;
 	u_char opt[TCP_MAXOLEN];
 	uint32_t hdrlen, optlen;
-	int32_t slot, segsiz, max_val, tso = 0, error = 0, ulen = 0;
+	int32_t pacing_delay, segsiz, max_val, tso = 0, error = 0, ulen = 0;
 	uint16_t flags;
 	uint32_t if_hw_tsomaxsegcount = 0, startseq;
 	uint32_t if_hw_tsomaxsegsize;
@@ -18688,9 +18480,9 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
 	}
 	if (rack->r_ctl.crte != NULL) {
 		/* See if we can send via the hw queue */
-		slot = rack_check_queue_level(rack, tp, tv, cts, len, segsiz);
+		pacing_delay = rack_check_queue_level(rack, tp, tv, cts, len, segsiz);
 		/* If there is nothing in queue (no pacing time) we can send via the hw queue */
-		if (slot == 0)
+		if (pacing_delay == 0)
 			ip_sendflag = 0;
 	}
 	tcp_set_flags(th, flags);
@@ -18955,20 +18747,20 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
 				rack_log_queue_level(tp, rack, len, tv, cts);
 		} else
 			tcp_trace_point(rack->rc_tp, TCP_TP_ENOBUF);
-		slot = ((1 + rack->rc_enobuf) * HPTS_USEC_IN_MSEC);
+		pacing_delay = ((1 + rack->rc_enobuf) * HPTS_USEC_IN_MSEC);
 		if (rack->rc_enobuf < 0x7f)
 			rack->rc_enobuf++;
-		if (slot < (10 * HPTS_USEC_IN_MSEC))
-			slot = 10 * HPTS_USEC_IN_MSEC;
+		if (pacing_delay < (10 * HPTS_USEC_IN_MSEC))
+			pacing_delay = 10 * HPTS_USEC_IN_MSEC;
 		if (rack->r_ctl.crte != NULL) {
 			counter_u64_add(rack_saw_enobuf_hw, 1);
 			tcp_rl_log_enobuf(rack->r_ctl.crte);
 		}
 		counter_u64_add(rack_saw_enobuf, 1);
 	} else {
-		slot = rack_get_pacing_delay(rack, tp, len, NULL, segsiz, __LINE__);
+		pacing_delay = rack_get_pacing_delay(rack, tp, len, NULL, segsiz, __LINE__);
 	}
-	rack_start_hpts_timer(rack, tp, cts, slot, len, 0);
+	rack_start_hpts_timer(rack, tp, cts, pacing_delay, len, 0);
 #ifdef TCP_ACCOUNTING
 	crtsc = get_cyclecount();
 	if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
@@ -19071,7 +18863,7 @@ rack_fast_output(struct tcpcb *tp, struct tcp_rack *rack, uint64_t ts_val,
 #ifdef TCP_ACCOUNTING
 	int cnt_thru = 1;
 #endif
-	int32_t slot, segsiz, len, max_val, tso = 0, sb_offset, error, ulen = 0;
+	int32_t pacing_delay, segsiz, len, max_val, tso = 0, sb_offset, error, ulen = 0;
 	uint16_t flags;
 	uint32_t s_soff;
 	uint32_t if_hw_tsomaxsegcount = 0, startseq;
@@ -19519,8 +19311,8 @@ again:
 	}
 	tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
 	counter_u64_add(rack_fto_send, 1);
-	slot = rack_get_pacing_delay(rack, tp, *tot_len, NULL, segsiz, __LINE__);
-	rack_start_hpts_timer(rack, tp, cts, slot, *tot_len, 0);
+	pacing_delay = rack_get_pacing_delay(rack, tp, *tot_len, NULL, segsiz, __LINE__);
+	rack_start_hpts_timer(rack, tp, cts, pacing_delay, *tot_len, 0);
 #ifdef TCP_ACCOUNTING
 	crtsc = get_cyclecount();
 	if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
@@ -19707,7 +19499,7 @@ rack_output(struct tcpcb *tp)
 	struct rack_sendmap *rsm = NULL;
 	int32_t tso, mtu;
 	struct tcpopt to;
-	int32_t slot = 0;
+	int32_t pacing_delay = 0;
 	int32_t sup_rack = 0;
 	uint32_t cts, ms_cts, delayed, early;
 	uint32_t add_flag = RACK_SENT_SP;
@@ -20070,7 +19862,7 @@ again:
 		if (rsm == NULL) {
 			if (hpts_calling)
 				/* Retry in a ms */
-				slot = (1 * HPTS_USEC_IN_MSEC);
+				pacing_delay = (1 * HPTS_USEC_IN_MSEC);
 			so = inp->inp_socket;
 			sb = &so->so_snd;
 			goto just_return_nolock;
@@ -20877,7 +20669,7 @@ just_return_nolock:
 		}
 		if (tot_len_this_send > 0) {
 			rack->r_ctl.fsb.recwin = recwin;
-			slot = rack_get_pacing_delay(rack, tp, tot_len_this_send, NULL, segsiz, __LINE__);
+			pacing_delay = rack_get_pacing_delay(rack, tp, tot_len_this_send, NULL, segsiz, __LINE__);
 			if ((error == 0) &&
 			    rack_use_rfo &&
 			    ((flags & (TH_SYN|TH_FIN)) == 0) &&
@@ -21060,8 +20852,8 @@ just_return_nolock:
 			/* Yes lets make sure to move to persist before timer-start */
 			rack_enter_persist(tp, rack, rack->r_ctl.rc_rcvtime, tp->snd_una);
 		}
-		rack_start_hpts_timer(rack, tp, cts, slot, tot_len_this_send, sup_rack);
-		rack_log_type_just_return(rack, cts, tot_len_this_send, slot, hpts_calling, app_limited, cwnd_to_use);
+		rack_start_hpts_timer(rack, tp, cts, pacing_delay, tot_len_this_send, sup_rack);
+		rack_log_type_just_return(rack, cts, tot_len_this_send, pacing_delay, hpts_calling, app_limited, cwnd_to_use);
 	}
 #ifdef NETFLIX_SHARED_CWND
 	if ((sbavail(sb) == 0) &&
@@ -21100,8 +20892,8 @@ send:
 		 * we come around to again, the flag will be clear.
 		 */
 		check_done = 1;
-		slot = rack_check_queue_level(rack, tp, &tv, cts, len, segsiz);
-		if (slot) {
+		pacing_delay = rack_check_queue_level(rack, tp, &tv, cts, len, segsiz);
+		if (pacing_delay) {
 			rack->r_ctl.rc_agg_delayed = 0;
 			rack->r_ctl.rc_agg_early = 0;
 			rack->r_early = 0;
@@ -22358,11 +22150,11 @@ nomore:
 					rack_log_queue_level(tp, rack, len, &tv, cts);
 			} else
 				tcp_trace_point(rack->rc_tp, TCP_TP_ENOBUF);
-			slot = ((1 + rack->rc_enobuf) * HPTS_USEC_IN_MSEC);
+			pacing_delay = ((1 + rack->rc_enobuf) * HPTS_USEC_IN_MSEC);
 			if (rack->rc_enobuf < 0x7f)
 				rack->rc_enobuf++;
-			if (slot < (10 * HPTS_USEC_IN_MSEC))
-				slot = 10 * HPTS_USEC_IN_MSEC;
+			if (pacing_delay < (10 * HPTS_USEC_IN_MSEC))
+				pacing_delay = 10 * HPTS_USEC_IN_MSEC;
 			if (rack->r_ctl.crte != NULL) {
 				counter_u64_add(rack_saw_enobuf_hw, 1);
 				tcp_rl_log_enobuf(rack->r_ctl.crte);
@@ -22389,8 +22181,8 @@ nomore:
 					goto again;
 				}
 			}
-			slot = 10 * HPTS_USEC_IN_MSEC;
-			rack_start_hpts_timer(rack, tp, cts, slot, 0, 0);
+			pacing_delay = 10 * HPTS_USEC_IN_MSEC;
+			rack_start_hpts_timer(rack, tp, cts, pacing_delay, 0, 0);
 #ifdef TCP_ACCOUNTING
 			crtsc = get_cyclecount();
 			if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
@@ -22412,8 +22204,8 @@ nomore:
 			}
 			/* FALLTHROUGH */
 		default:
-			slot = 10 * HPTS_USEC_IN_MSEC;
-			rack_start_hpts_timer(rack, tp, cts, slot, 0, 0);
+			pacing_delay = 10 * HPTS_USEC_IN_MSEC;
+			rack_start_hpts_timer(rack, tp, cts, pacing_delay, 0, 0);
 #ifdef TCP_ACCOUNTING
 			crtsc = get_cyclecount();
 			if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
@@ -22456,18 +22248,18 @@ enobufs:
 		/*
 		 * We don't send again after sending a RST.
 		 */
-		slot = 0;
+		pacing_delay = 0;
 		sendalot = 0;
 		if (error == 0)
 			tcp_log_end_status(tp, TCP_EI_STATUS_SERVER_RST);
-	} else if ((slot == 0) && (sendalot == 0) && tot_len_this_send) {
+	} else if ((pacing_delay == 0) && (sendalot == 0) && tot_len_this_send) {
 		/*
 		 * Get our pacing rate, if an error
 		 * occurred in sending (ENOBUF) we would
 		 * hit the else if with slot preset. Other
 		 * errors return.
 		 */
-		slot = rack_get_pacing_delay(rack, tp, tot_len_this_send, rsm, segsiz, __LINE__);
+		pacing_delay = rack_get_pacing_delay(rack, tp, tot_len_this_send, rsm, segsiz, __LINE__);
 	}
 	/* We have sent clear the flag */
 	rack->r_ent_rec_ns = 0;
@@ -22499,7 +22291,7 @@ enobufs:
 		 */
 		tp->t_flags &= ~(TF_WASCRECOVERY|TF_WASFRECOVERY);
 	}
-	if (slot) {
+	if (pacing_delay) {
 		/* set the rack tcb into the slot N */
 		if ((error == 0) &&
 		    rack_use_rfo &&
@@ -22564,7 +22356,7 @@ skip_all_send:
 	/* Assure when we leave that snd_nxt will point to top */
 	if (SEQ_GT(tp->snd_max, tp->snd_nxt))
 		tp->snd_nxt = tp->snd_max;
-	rack_start_hpts_timer(rack, tp, cts, slot, tot_len_this_send, 0);
+	rack_start_hpts_timer(rack, tp, cts, pacing_delay, tot_len_this_send, 0);
 #ifdef TCP_ACCOUNTING
 	crtsc = get_cyclecount() - ts_val;
 	if (tot_len_this_send) {
diff --git a/sys/netinet/tcp_stacks/tcp_rack.h b/sys/netinet/tcp_stacks/tcp_rack.h
index 144b4fabf7eb..cac17d9aeb50 100644
--- a/sys/netinet/tcp_stacks/tcp_rack.h
+++ b/sys/netinet/tcp_stacks/tcp_rack.h
@@ -462,7 +462,6 @@ struct rack_control {
 	uint64_t rc_gp_output_ts; /* chg*/
 	uint64_t rc_gp_cumack_ts; /* chg*/
 	struct timeval act_rcv_time;
-	struct timeval rc_last_time_decay;	/* SAD time decay happened here */
 	uint64_t gp_bw;
 	uint64_t init_rate;
 #ifdef NETFLIX_SHARED_CWND
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index c817c79881d6..b6f428b279b3 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -607,7 +607,7 @@ tcp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *inp,
 		}
 	}
 	m->m_pkthdr.tcp_tun_port = port = uh->uh_sport;
-	bcopy(th, uh, m->m_len - off);
+	bcopy(th, uh, m->m_len - off - sizeof(struct udphdr));
 	m->m_len -= sizeof(struct udphdr);
 	m->m_pkthdr.len -= sizeof(struct udphdr);
 	/*
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 2bb99596f965..3a7755e9f09e 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -1046,6 +1046,8 @@ abort:
  *
  * On syncache_socket() success the newly created socket
  * has its underlying inp locked.
+ *
+ * *lsop is updated, if and only if 1 is returned.
  */
 int
 syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
@@ -1094,12 +1096,14 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 				 */
 				SCH_UNLOCK(sch);
 				TCPSTAT_INC(tcps_sc_spurcookie);
-				if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+				if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 					log(LOG_DEBUG, "%s; %s: Spurious ACK, "
 					    "segment rejected "
 					    "(syncookies disabled)\n",
 					    s, __func__);
-				goto failed;
+					free(s, M_TCPLOG);
+				}
+				return (0);
 			}
 			if (sch->sch_last_overflow <
 			    time_uptime - SYNCOOKIE_LIFETIME) {
@@ -1109,12 +1113,14 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 				 */
 				SCH_UNLOCK(sch);
 				TCPSTAT_INC(tcps_sc_spurcookie);
-				if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+				if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 					log(LOG_DEBUG, "%s; %s: Spurious ACK, "
 					    "segment rejected "
 					    "(no syncache entry)\n",
 					    s, __func__);
-				goto failed;
+					free(s, M_TCPLOG);
+				}
+				return (0);
 			}
 			SCH_UNLOCK(sch);
 		}
@@ -1128,11 +1134,13 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 			TCPSTAT_INC(tcps_sc_recvcookie);
 		} else {
 			TCPSTAT_INC(tcps_sc_failcookie);
-			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+			if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 				log(LOG_DEBUG, "%s; %s: Segment failed "
 				    "SYNCOOKIE authentication, segment rejected "
 				    "(probably spoofed)\n", s, __func__);
-			goto failed;
+				free(s, M_TCPLOG);
+			}
+			return (0);
 		}
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		/* If received ACK has MD5 signature, check it. */
@@ -1160,7 +1168,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 		/*
 		 * If listening socket requested TCP digests, check that
 		 * received ACK has signature and it is correct.
-		 * If not, drop the ACK and leave sc entry in th cache,
+		 * If not, drop the ACK and leave sc entry in the cache,
 		 * because SYN was received with correct signature.
 		 */
 		if (sc->sc_flags & SCF_SIGNATURE) {
@@ -1206,9 +1214,9 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 				    "%s; %s: SEG.TSval %u < TS.Recent %u, "
 				    "segment dropped\n", s, __func__,
 				    to->to_tsval, sc->sc_tsreflect);
-				free(s, M_TCPLOG);
 			}
 			SCH_UNLOCK(sch);
+			free(s, M_TCPLOG);
 			return (-1);  /* Do not send RST */
 		}
 
@@ -1225,7 +1233,6 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 				    "expected, segment processed normally\n",
 				    s, __func__);
 				free(s, M_TCPLOG);
-				s = NULL;
 			}
 		}
 
@@ -1285,7 +1292,8 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 				    "segment rejected\n",
 				    s, __func__, th->th_ack, sc->sc_iss + 1);
 			SCH_UNLOCK(sch);
-			goto failed;
+			free(s, M_TCPLOG);
+			return (0);  /* Do send RST, do not free sc. */
 		}
 
 		TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
@@ -1311,16 +1319,6 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 	if (sc != &scs)
 		syncache_free(sc);
 	return (1);
-failed:
-	if (sc != NULL) {
-		TCPSTATES_DEC(TCPS_SYN_RECEIVED);
-		if (sc != &scs)
-			syncache_free(sc);
-	}
-	if (s != NULL)
-		free(s, M_TCPLOG);
-	*lsop = NULL;
-	return (0);
 }
 
 static struct socket *
@@ -1382,6 +1380,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 	struct tcpcb *tp;
 	struct socket *rv = NULL;
 	struct syncache *sc = NULL;
+	struct ucred *cred;
 	struct syncache_head *sch;
 	struct mbuf *ipopts = NULL;
 	u_int ltflags;
@@ -1410,6 +1409,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 	 */
 	KASSERT(SOLISTENING(so), ("%s: %p not listening", __func__, so));
 	tp = sototcpcb(so);
+	cred = V_tcp_syncache.see_other ? NULL : crhold(so->so_cred);
 
 #ifdef INET6
 	if (inc->inc_flags & INC_ISIPV6) {
@@ -1638,16 +1638,16 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 	/*
 	 * sc_cred is only used in syncache_pcblist() to list TCP endpoints in
 	 * TCPS_SYN_RECEIVED state when V_tcp_syncache.see_other is false.
-	 * Therefore, store the credentials and take a reference count only
-	 * when needed:
+	 * Therefore, store the credentials only when needed:
 	 * - sc is allocated from the zone and not using the on stack instance.
 	 * - the sysctl variable net.inet.tcp.syncache.see_other is false.
 	 * The reference count is decremented when a zone allocated sc is
 	 * freed in syncache_free().
 	 */
-	if (sc != &scs && !V_tcp_syncache.see_other)
-		sc->sc_cred = crhold(so->so_cred);
-	else
+	if (sc != &scs && !V_tcp_syncache.see_other) {
+		sc->sc_cred = cred;
+		cred = NULL;
+	} else
 		sc->sc_cred = NULL;
 	sc->sc_port = port;
 	sc->sc_ipopts = ipopts;
@@ -1785,6 +1785,8 @@ donenoprobe:
 		tcp_fastopen_decrement_counter(tfo_pending);
 
 tfo_expanded:
+	if (cred != NULL)
+		crfree(cred);
 	if (sc == NULL || sc == &scs) {
 #ifdef MAC
 		mac_syncache_destroy(&maclabel);
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index 3b9fe7a317b0..57c57666fa3a 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -513,9 +513,12 @@ tcp_timer_persist(struct tcpcb *tp)
 	if (progdrop || (tp->t_rxtshift >= V_tcp_retries &&
 	    (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
 	     ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff))) {
-		if (!progdrop)
+		if (progdrop) {
+			tcp_log_end_status(tp, TCP_EI_STATUS_PROGRESS);
+		} else {
 			TCPSTAT_INC(tcps_persistdrop);
-		tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX);
+			tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX);
+		}
 		goto dropit;
 	}
 	/*
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index cea8a916679b..f1d952037d5a 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -787,7 +787,8 @@ udplite_ctlinput(struct icmp *icmp)
 static int
 udp_pcblist(SYSCTL_HANDLER_ARGS)
 {
-	struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_udbinfo,
+	struct inpcbinfo *pcbinfo = udp_get_inpcbinfo(arg2);
+	struct inpcb_iterator inpi = INP_ALL_ITERATOR(pcbinfo,
 	    INPLOOKUP_RLOCKPCB);
 	struct xinpgen xig;
 	struct inpcb *inp;
@@ -799,7 +800,7 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
 	if (req->oldptr == 0) {
 		int n;
 
-		n = V_udbinfo.ipi_count;
+		n = pcbinfo->ipi_count;
 		n += imax(n / 8, 10);
 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
 		return (0);
@@ -810,8 +811,8 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
 
 	bzero(&xig, sizeof(xig));
 	xig.xig_len = sizeof xig;
-	xig.xig_count = V_udbinfo.ipi_count;
-	xig.xig_gen = V_udbinfo.ipi_gencnt;
+	xig.xig_count = pcbinfo->ipi_count;
+	xig.xig_gen = pcbinfo->ipi_gencnt;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
@@ -838,9 +839,9 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
 		 * that something happened while we were processing this
 		 * request, and it might be necessary to retry.
 		 */
-		xig.xig_gen = V_udbinfo.ipi_gencnt;
+		xig.xig_gen = pcbinfo->ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
-		xig.xig_count = V_udbinfo.ipi_count;
+		xig.xig_count = pcbinfo->ipi_count;
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 
@@ -848,10 +849,15 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
 }
 
 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist,
-    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
+    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, IPPROTO_UDP,
     udp_pcblist, "S,xinpcb",
     "List of active UDP sockets");
 
+SYSCTL_PROC(_net_inet_udplite, OID_AUTO, pcblist,
+    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, IPPROTO_UDPLITE,
+    udp_pcblist, "S,xinpcb",
+    "List of active UDP-Lite sockets");
+
 #ifdef INET
 static int
 udp_getcred(SYSCTL_HANDLER_ARGS)
@@ -1166,7 +1172,19 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
 	else
 		INP_RLOCK(inp);
 	NET_EPOCH_ENTER(et);
+#ifdef INET6
+	if ((flags & PRUS_IPV6) != 0) {
+		if ((inp->in6p_outputopts != NULL) &&
+		    (inp->in6p_outputopts->ip6po_tclass != -1))
+			tos = (u_char)inp->in6p_outputopts->ip6po_tclass;
+		else
+			tos = 0;
+	} else {
+		tos = inp->inp_ip_tos;
+	}
+#else
 	tos = inp->inp_ip_tos;
+#endif
 	if (control != NULL) {
 		/*
 		 * XXX: Currently, we assume all the optional information is
@@ -1190,6 +1208,23 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
 			error = udp_v4mapped_pktinfo(cm, &src, inp, flags);
 			if (error != 0)
 				break;
+			if (((flags & PRUS_IPV6) != 0) &&
+			    (cm->cmsg_level == IPPROTO_IPV6) &&
+			    (cm->cmsg_type == IPV6_TCLASS)) {
+				int tclass;
+
+				if (cm->cmsg_len != CMSG_LEN(sizeof(int))) {
+					error = EINVAL;
+					break;
+				}
+				tclass = *(int *)CMSG_DATA(cm);
+				if (tclass < -1 || tclass > 255) {
+					error = EINVAL;
+					break;
+				}
+				if (tclass != -1)
+					tos = (u_char)tclass;
+			}
 #endif
 			if (cm->cmsg_level != IPPROTO_IP)
 				continue;
diff --git a/sys/netinet/udp_var.h b/sys/netinet/udp_var.h
index 3895f365db3c..3ae08fc0b8f0 100644
--- a/sys/netinet/udp_var.h
+++ b/sys/netinet/udp_var.h
@@ -147,6 +147,7 @@ void	kmod_udpstat_inc(int statnum);
 	} while (0)
 
 SYSCTL_DECL(_net_inet_udp);
+SYSCTL_DECL(_net_inet_udplite);
 
 VNET_DECLARE(struct inpcbinfo, udbinfo);
 VNET_DECLARE(struct inpcbinfo, ulitecbinfo);