113 files changed, 6221 insertions, 6355 deletions
diff --git a/sys/netinet/cc/cc.c b/sys/netinet/cc/cc.c
index 9a8ca760fa28..d85ad4e9f4fd 100644
--- a/sys/netinet/cc/cc.c
+++ b/sys/netinet/cc/cc.c
@@ -392,6 +392,7 @@ void
 newreno_cc_post_recovery(struct cc_var *ccv)
 {
 	int pipe;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	if (IN_FASTRECOVERY(CCV(ccv, t_flags))) {
 		/*
@@ -400,20 +401,14 @@ newreno_cc_post_recovery(struct cc_var *ccv)
 		 * approximately snd_ssthresh outstanding data. But in case we
 		 * would be inclined to send a burst, better to do it via the
 		 * slow start mechanism.
-		 *
-		 * XXXLAS: Find a way to do this without needing curack
 		 */
-		if (V_tcp_do_newsack)
-			pipe = tcp_compute_pipe(ccv->ccvc.tcp);
-		else
-			pipe = CCV(ccv, snd_max) - ccv->curack;
+		pipe = tcp_compute_pipe(ccv->tp);
 		if (pipe < CCV(ccv, snd_ssthresh))
 			/*
 			 * Ensure that cwnd does not collapse to 1 MSS under
 			 * adverse conditions. Implements RFC6582
 			 */
-			CCV(ccv, snd_cwnd) = max(pipe, CCV(ccv, t_maxseg)) +
-			    CCV(ccv, t_maxseg);
+			CCV(ccv, snd_cwnd) = max(pipe, mss) + mss;
 		else
 			CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
 	}
@@ -440,7 +435,7 @@ newreno_cc_after_idle(struct cc_var *ccv)
 	 * maximum of the former ssthresh or 3/4 of the old cwnd, to
 	 * not exit slow-start prematurely.
 	 */
-	rw = tcp_compute_initwnd(tcp_fixed_maxseg(ccv->ccvc.tcp));
+	rw = tcp_compute_initwnd(tcp_fixed_maxseg(ccv->tp));
 
 	CCV(ccv, snd_ssthresh) = max(CCV(ccv, snd_ssthresh),
 	    CCV(ccv, snd_cwnd)-(CCV(ccv, snd_cwnd)>>2));
@@ -449,15 +444,14 @@ newreno_cc_after_idle(struct cc_var *ccv)
 }
 
 /*
- * Perform any necessary tasks before we enter congestion recovery.
- */
-void
-newreno_cc_cong_signal(struct cc_var *ccv, ccsignal_t type)
+ * Get a new congestion window size on a multiplicative decrease event.
+ * */
+u_int
+newreno_cc_cwnd_on_multiplicative_decrease(struct cc_var *ccv, uint32_t mss)
 {
-	uint32_t cwin, factor, mss, pipe;
+	uint32_t cwin, factor;
 
 	cwin = CCV(ccv, snd_cwnd);
-	mss = tcp_fixed_maxseg(ccv->ccvc.tcp);
 	/*
 	 * Other TCP congestion controls use newreno_cong_signal(), but
 	 * with their own private cc_data. Make sure the cc_data is used
@@ -465,12 +459,24 @@ newreno_cc_cong_signal(struct cc_var *ccv, ccsignal_t type)
 	 */
 	factor = V_newreno_beta;
 
+	return max(((uint64_t)cwin * (uint64_t)factor) / (100ULL * (uint64_t)mss), 2) * mss;
+}
+
+/*
+ * Perform any necessary tasks before we enter congestion recovery.
+ */
+void
+newreno_cc_cong_signal(struct cc_var *ccv, ccsignal_t type)
+{
+	uint32_t cwin, mss, pipe;
+
+	mss = tcp_fixed_maxseg(ccv->tp);
+
 	/* Catch algos which mistakenly leak private signal types. */
 	KASSERT((type & CC_SIGPRIVMASK) == 0,
 	    ("%s: congestion signal type 0x%08x is private\n", __func__, type));
 
-	cwin = max(((uint64_t)cwin * (uint64_t)factor) / (100ULL * (uint64_t)mss),
-	    2) * mss;
+	cwin = newreno_cc_cwnd_on_multiplicative_decrease(ccv, mss);
 
 	switch (type) {
 	case CC_NDUPACK:
@@ -489,13 +495,7 @@ newreno_cc_cong_signal(struct cc_var *ccv, ccsignal_t type)
 		break;
 	case CC_RTO:
 		if (CCV(ccv, t_rxtshift) == 1) {
-			if (V_tcp_do_newsack) {
-				pipe = tcp_compute_pipe(ccv->ccvc.tcp);
-			} else {
-				pipe = CCV(ccv, snd_max) -
-					CCV(ccv, snd_fack) +
-					CCV(ccv, sackhint.sack_bytes_rexmit);
-			}
+			pipe = tcp_compute_pipe(ccv->tp);
 			CCV(ccv, snd_ssthresh) = max(2,
 				min(CCV(ccv, snd_wnd), pipe) / 2 / mss) * mss;
 		}
@@ -506,78 +506,110 @@ newreno_cc_cong_signal(struct cc_var *ccv, ccsignal_t type)
 	}
 }
 
-void
-newreno_cc_ack_received(struct cc_var *ccv, ccsignal_t type)
+u_int
+newreno_cc_cwnd_in_cong_avoid(struct cc_var *ccv)
 {
-	if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) &&
-	    (ccv->flags & CCF_CWND_LIMITED)) {
-		u_int cw = CCV(ccv, snd_cwnd);
-		u_int incr = CCV(ccv, t_maxseg);
+	u_int cw = CCV(ccv, snd_cwnd);
+	u_int incr = tcp_fixed_maxseg(ccv->tp);
+
+	KASSERT(cw > CCV(ccv, snd_ssthresh),
+		("congestion control state not in congestion avoidance\n"));
 
+	/*
+	 * Regular in-order ACK, open the congestion window.
+	 * The congestion control state we're in is congestion avoidance.
+	 *
+	 * Check if ABC (RFC 3465) is enabled.
+	 * cong avoid: cwnd > ssthresh
+	 *
+	 * cong avoid and ABC (RFC 3465):
+	 *   Grow cwnd linearly by maxseg per RTT for each
+	 *   cwnd worth of ACKed data.
+	 *
+	 * cong avoid without ABC (RFC 5681):
+	 *   Grow cwnd linearly by approximately maxseg per RTT using
+	 *   maxseg^2 / cwnd per ACK as the increment.
+	 *   If cwnd > maxseg^2, fix the cwnd increment at 1 byte to
+	 *   avoid capping cwnd.
+	 */
+	if (V_tcp_do_rfc3465) {
+		if (ccv->flags & CCF_ABC_SENTAWND)
+			ccv->flags &= ~CCF_ABC_SENTAWND;
+		else
+			incr = 0;
+	} else
+		incr = max((incr * incr / cw), 1);
+	/* ABC is on by default, so incr equals 0 frequently. */
+	if (incr > 0)
+		return min(cw + incr, TCP_MAXWIN << CCV(ccv, snd_scale));
+	else
+		return cw;
+}
+
+u_int
+newreno_cc_cwnd_in_slow_start(struct cc_var *ccv)
+{
+	u_int cw = CCV(ccv, snd_cwnd);
+	u_int mss = tcp_fixed_maxseg(ccv->tp);
+	u_int incr = mss;
+
+	KASSERT(cw <= CCV(ccv, snd_ssthresh),
+		("congestion control state not in slow start\n"));
+
+	/*
+	 * Regular in-order ACK, open the congestion window.
+	 * The congestion control state we're in is slow start.
+	 *
+	 * slow start: cwnd <= ssthresh
+	 *
+	 * slow start and ABC (RFC 3465):
+	 *   Grow cwnd exponentially by the amount of data
+	 *   ACKed capping the max increment per ACK to
+	 *   (abc_l_var * maxseg) bytes.
+	 *
+	 * slow start without ABC (RFC 5681):
+	 *   Grow cwnd exponentially by maxseg per ACK.
+	 */
+	if (V_tcp_do_rfc3465) {
 		/*
-		 * Regular in-order ACK, open the congestion window.
-		 * Method depends on which congestion control state we're
-		 * in (slow start or cong avoid) and if ABC (RFC 3465) is
-		 * enabled.
-		 *
-		 * slow start: cwnd <= ssthresh
-		 * cong avoid: cwnd > ssthresh
-		 *
-		 * slow start and ABC (RFC 3465):
-		 *   Grow cwnd exponentially by the amount of data
-		 *   ACKed capping the max increment per ACK to
-		 *   (abc_l_var * maxseg) bytes.
-		 *
-		 * slow start without ABC (RFC 5681):
-		 *   Grow cwnd exponentially by maxseg per ACK.
-		 *
-		 * cong avoid and ABC (RFC 3465):
-		 *   Grow cwnd linearly by maxseg per RTT for each
-		 *   cwnd worth of ACKed data.
+		 * In slow-start with ABC enabled and no RTO in sight?
+		 * (Must not use abc_l_var > 1 if slow starting after
+		 * an RTO. On RTO, snd_nxt = snd_una, so the
+		 * snd_nxt == snd_max check is sufficient to
+		 * handle this).
 		 *
-		 * cong avoid without ABC (RFC 5681):
-		 *   Grow cwnd linearly by approximately maxseg per RTT using
-		 *   maxseg^2 / cwnd per ACK as the increment.
-		 *   If cwnd > maxseg^2, fix the cwnd increment at 1 byte to
-		 *   avoid capping cwnd.
+		 * XXXLAS: Find a way to signal SS after RTO that
+		 * doesn't rely on tcpcb vars.
 		 */
-		if (cw > CCV(ccv, snd_ssthresh)) {
-			if (V_tcp_do_rfc3465) {
-				if (ccv->flags & CCF_ABC_SENTAWND)
-					ccv->flags &= ~CCF_ABC_SENTAWND;
-				else
-					incr = 0;
-			} else
-				incr = max((incr * incr / cw), 1);
-		} else if (V_tcp_do_rfc3465) {
-			/*
-			 * In slow-start with ABC enabled and no RTO in sight?
-			 * (Must not use abc_l_var > 1 if slow starting after
-			 * an RTO. On RTO, snd_nxt = snd_una, so the
-			 * snd_nxt == snd_max check is sufficient to
-			 * handle this).
-			 *
-			 * XXXLAS: Find a way to signal SS after RTO that
-			 * doesn't rely on tcpcb vars.
-			 */
-			uint16_t abc_val;
-
-			if (ccv->flags & CCF_USE_LOCAL_ABC)
-				abc_val = ccv->labc;
-			else
-				abc_val = V_tcp_abc_l_var;
-			if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max))
-				incr = min(ccv->bytes_this_ack,
-				    ccv->nsegs * abc_val *
-				    CCV(ccv, t_maxseg));
-			else
-				incr = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg));
+		uint16_t abc_val;
 
+		if (ccv->flags & CCF_USE_LOCAL_ABC)
+			abc_val = ccv->labc;
+		else
+			abc_val = V_tcp_abc_l_var;
+		if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max))
+			incr = min(ccv->bytes_this_ack,
+			           ccv->nsegs * abc_val * mss);
+		else
+			incr = min(ccv->bytes_this_ack, mss);
+	}
+	/* ABC is on by default, so incr equals 0 frequently. */
+	if (incr > 0)
+		return min(cw + incr, TCP_MAXWIN << CCV(ccv, snd_scale));
+	else
+		return cw;
+}
+
+void
+newreno_cc_ack_received(struct cc_var *ccv, ccsignal_t type)
+{
+	if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) &&
+	    (ccv->flags & CCF_CWND_LIMITED)) {
+		if (CCV(ccv, snd_cwnd) > CCV(ccv, snd_ssthresh)) {
+			CCV(ccv, snd_cwnd) = newreno_cc_cwnd_in_cong_avoid(ccv);
+		} else {
+			CCV(ccv, snd_cwnd) = newreno_cc_cwnd_in_slow_start(ccv);
 		}
-		/* ABC is on by default, so incr equals 0 frequently. */
-		if (incr > 0)
-			CCV(ccv, snd_cwnd) = min(cw + incr,
-			    TCP_MAXWIN << CCV(ccv, snd_scale));
 	}
 }
 
diff --git a/sys/netinet/cc/cc.h b/sys/netinet/cc/cc.h
index aac0825e5fe1..890bea69a14b 100644
--- a/sys/netinet/cc/cc.h
+++ b/sys/netinet/cc/cc.h
@@ -87,21 +87,12 @@ int	cc_deregister_algo(struct cc_algo *remove_cc);
 #endif /* _KERNEL */
 
 #if defined(_KERNEL) || defined(_WANT_TCPCB)
-/*
- * Wrapper around transport structs that contain same-named congestion
- * control variables. Allows algos to be shared amongst multiple CC aware
- * transprots.
- */
 struct cc_var {
 	void		*cc_data; /* Per-connection private CC algorithm data. */
 	int		bytes_this_ack; /* # bytes acked by the current ACK. */
 	tcp_seq		curack; /* Most recent ACK. */
 	uint32_t	flags; /* Flags for cc_var (see below) */
-	int		type; /* Indicates which ptr is valid in ccvc. */
-	union ccv_container {
-		struct tcpcb		*tcp;
-		struct sctp_nets	*sctp;
-	} ccvc;
+	struct tcpcb	*tp; /* Pointer to tcpcb */
 	uint16_t	nsegs; /* # segments coalesced into current chain. */
 	uint8_t		labc;  /* Dont use system abc use passed in */
 };
@@ -113,10 +104,10 @@ struct cc_var {
 #define	CCF_ACKNOW		0x0008	/* Will this ack be sent now? */
 #define	CCF_IPHDR_CE		0x0010	/* Does this packet set CE bit? */
 #define	CCF_TCPHDR_CWR		0x0020	/* Does this packet set CWR bit? */
-#define	CCF_MAX_CWND		0x0040	/* Have we reached maximum cwnd? */
-#define	CCF_CHG_MAX_CWND	0x0080	/* CUBIC max_cwnd changed, for K */
-#define	CCF_USR_IWND		0x0100	/* User specified initial window */
-#define	CCF_USR_IWND_INIT_NSEG	0x0200	/* Convert segs to bytes on conn init */
+#define	CCF_UNUSED1		0x0040
+#define	CCF_UNUSED2		0x0080
+#define	CCF_UNUSED3		0x0100
+#define	CCF_UNUSED4		0x0200
 #define CCF_HYSTART_ALLOWED	0x0400	/* If the CC supports it Hystart is allowed */
 #define CCF_HYSTART_CAN_SH_CWND	0x0800  /* Can hystart when going CSS -> CA slam the cwnd */
 #define CCF_HYSTART_CONS_SSTH	0x1000	/* Should hystart use the more conservative ssthresh */
@@ -240,6 +231,9 @@ void newreno_cc_post_recovery(struct cc_var *);
 void newreno_cc_after_idle(struct cc_var *);
 void newreno_cc_cong_signal(struct cc_var *, ccsignal_t);
 void newreno_cc_ack_received(struct cc_var *, ccsignal_t);
+u_int newreno_cc_cwnd_on_multiplicative_decrease(struct cc_var *ccv, uint32_t mss);
+u_int newreno_cc_cwnd_in_cong_avoid(struct cc_var *ccv);
+u_int newreno_cc_cwnd_in_slow_start(struct cc_var *ccv);
 
 /* Called to temporarily keep an algo from going away during change */
 void cc_refer(struct cc_algo *algo);
diff --git a/sys/netinet/cc/cc_cdg.c b/sys/netinet/cc/cc_cdg.c
index 1e9236f878d4..5b1df76e71a2 100644
--- a/sys/netinet/cc/cc_cdg.c
+++ b/sys/netinet/cc/cc_cdg.c
@@ -57,6 +57,7 @@
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/queue.h>
+#include <sys/prng.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
@@ -294,7 +295,7 @@ cdg_cb_init(struct cc_var *ccv, void *ptr)
 {
 	struct cdg *cdg_data;
 
-	INP_WLOCK_ASSERT(tptoinpcb(ccv->ccvc.tcp));
+	INP_WLOCK_ASSERT(tptoinpcb(ccv->tp));
 	if (ptr == NULL) {
 		cdg_data = malloc(sizeof(struct cdg), M_CC_MEM, M_NOWAIT);
 		if (cdg_data == NULL)
@@ -415,27 +416,28 @@ cdg_window_increase(struct cc_var *ccv, int new_measurement)
 {
 	struct cdg *cdg_data;
 	int incr, s_w_incr;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	cdg_data = ccv->cc_data;
 	incr = s_w_incr = 0;
 
 	if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh)) {
 		/* Slow start. */
-		incr = CCV(ccv, t_maxseg);
+		incr = mss;
 		s_w_incr = incr;
 		cdg_data->window_incr = cdg_data->rtt_count = 0;
 	} else {
 		/* Congestion avoidance. */
 		if (new_measurement) {
-			s_w_incr = CCV(ccv, t_maxseg);
+			s_w_incr = mss;
 			if (V_cdg_alpha_inc == 0) {
-				incr = CCV(ccv, t_maxseg);
+				incr = mss;
 			} else {
 				if (++cdg_data->rtt_count >= V_cdg_alpha_inc) {
 					cdg_data->window_incr++;
 					cdg_data->rtt_count = 0;
 				}
-				incr = CCV(ccv, t_maxseg) *
+				incr = mss *
 				    cdg_data->window_incr;
 			}
 		}
@@ -507,7 +509,8 @@ cdg_cong_signal(struct cc_var *ccv, ccsignal_t signal_type)
 static inline int
 prob_backoff(long qtrend)
 {
-	int backoff, idx, p;
+	int backoff, idx;
+	uint32_t p;
 
 	backoff = (qtrend > ((MAXGRAD * V_cdg_exp_backoff_scale) << D_P_E));
 
@@ -519,8 +522,8 @@ prob_backoff(long qtrend)
 			idx = qtrend;
 
 		/* Backoff probability proportional to rate of queue growth. */
-		p = (INT_MAX / (1 << EXP_PREC)) * probexp[idx];
-		backoff = (random() < p);
+		p = (UINT32_MAX / (1 << EXP_PREC)) * probexp[idx];
+		backoff = (prng32() < p);
 	}
 
 	return (backoff);
diff --git a/sys/netinet/cc/cc_chd.c b/sys/netinet/cc/cc_chd.c
index 52048a7c05ae..1d440f43578f 100644
--- a/sys/netinet/cc/cc_chd.c
+++ b/sys/netinet/cc/cc_chd.c
@@ -58,6 +58,7 @@
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
+#include <sys/prng.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
@@ -85,8 +86,8 @@
  */
 #define	CC_CHD_DELAY	0x02000000
 
-/* Largest possible number returned by random(). */
-#define	RANDOM_MAX	INT_MAX
+/* Largest possible number returned by prng32(). */
+#define	RANDOM_MAX	UINT32_MAX
 
 static void	chd_ack_received(struct cc_var *ccv, ccsignal_t ack_type);
 static void	chd_cb_destroy(struct cc_var *ccv);
@@ -146,10 +147,11 @@ static __inline void
 chd_window_decrease(struct cc_var *ccv)
 {
 	unsigned long win;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
-	win = min(CCV(ccv, snd_wnd), CCV(ccv, snd_cwnd)) / CCV(ccv, t_maxseg);
+	win = min(CCV(ccv, snd_wnd), CCV(ccv, snd_cwnd)) / mss;
 	win -= max((win / 2), 1);
-	CCV(ccv, snd_ssthresh) = max(win, 2) * CCV(ccv, t_maxseg);
+	CCV(ccv, snd_ssthresh) = max(win, 2) * mss;
 }
 
 /*
@@ -159,9 +161,9 @@ chd_window_decrease(struct cc_var *ccv)
 static __inline int
 should_backoff(int qdly, int maxqdly, struct chd *chd_data)
 {
-	unsigned long p, rand;
+	uint32_t rand, p;
 
-	rand = random();
+	rand = prng32();
 
 	if (qdly < V_chd_qthresh) {
 		chd_data->loss_compete = 0;
@@ -189,6 +191,7 @@ chd_window_increase(struct cc_var *ccv, int new_measurement)
 {
 	struct chd *chd_data;
 	int incr;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	chd_data = ccv->cc_data;
 	incr = 0;
@@ -200,23 +203,22 @@ chd_window_increase(struct cc_var *ccv, int new_measurement)
 			if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max)) {
 				/* Not due to RTO. */
 				incr = min(ccv->bytes_this_ack,
-				    V_tcp_abc_l_var * CCV(ccv, t_maxseg));
+				    V_tcp_abc_l_var * mss);
 			} else {
 				/* Due to RTO. */
-				incr = min(ccv->bytes_this_ack,
-				    CCV(ccv, t_maxseg));
+				incr = min(ccv->bytes_this_ack, mss);
 			}
 		} else
-			incr = CCV(ccv, t_maxseg);
+			incr = mss;
 
 	} else { /* Congestion avoidance. */
 		if (V_tcp_do_rfc3465) {
 			if (ccv->flags & CCF_ABC_SENTAWND) {
 				ccv->flags &= ~CCF_ABC_SENTAWND;
-				incr = CCV(ccv, t_maxseg);
+				incr = mss;
 			}
 		} else if (new_measurement)
-			incr = CCV(ccv, t_maxseg);
+			incr = mss;
 	}
 
 	if (chd_data->shadow_w > 0) {
@@ -321,7 +323,7 @@ chd_cb_init(struct cc_var *ccv, void *ptr)
 {
 	struct chd *chd_data;
 
-	INP_WLOCK_ASSERT(tptoinpcb(ccv->ccvc.tcp));
+	INP_WLOCK_ASSERT(tptoinpcb(ccv->tp));
 	if (ptr == NULL) {
 		chd_data = malloc(sizeof(struct chd), M_CC_MEM, M_NOWAIT);
 		if (chd_data == NULL)
@@ -379,8 +381,9 @@ chd_cong_signal(struct cc_var *ccv, ccsignal_t signal_type)
 		}
 
 		if (chd_data->shadow_w > 0) {
+			uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 			chd_data->shadow_w = max(chd_data->shadow_w /
-			    CCV(ccv, t_maxseg) / 2, 2) * CCV(ccv, t_maxseg);
+			    mss / 2, 2) * mss;
 		}
 		ENTER_FASTRECOVERY(CCV(ccv, t_flags));
 		break;
diff --git a/sys/netinet/cc/cc_cubic.c b/sys/netinet/cc/cc_cubic.c
index b4050326ae31..b3e15009244d 100644
--- a/sys/netinet/cc/cc_cubic.c
+++ b/sys/netinet/cc/cc_cubic.c
@@ -38,7 +38,7 @@
 
 /*
  * An implementation of the CUBIC congestion control algorithm for FreeBSD,
- * based on the Internet Draft "draft-rhee-tcpm-cubic-02" by Rhee, Xu and Ha.
+ * based on the Internet RFC9438 by Xu, Ha, Rhee, Goel, and Eggert.
  * Originally released as part of the NewTCP research project at Swinburne
  * University of Technology's Centre for Advanced Internet Architectures,
  * Melbourne, Australia, which was made possible in part by a grant from the
@@ -81,7 +81,7 @@ static void	cubic_conn_init(struct cc_var *ccv);
 static int	cubic_mod_init(void);
 static void	cubic_post_recovery(struct cc_var *ccv);
 static void	cubic_record_rtt(struct cc_var *ccv);
-static void	cubic_ssthresh_update(struct cc_var *ccv, uint32_t maxseg);
+static uint32_t	cubic_get_ssthresh(struct cc_var *ccv, uint32_t maxseg);
 static void	cubic_after_idle(struct cc_var *ccv);
 static size_t	cubic_data_sz(void);
 static void	cubic_newround(struct cc_var *ccv, uint32_t round_cnt);
@@ -125,7 +125,7 @@ cubic_log_hystart_event(struct cc_var *ccv, struct cubic *cubicd, uint8_t mod, u
 
 	if (hystart_bblogs == 0)
 		return;
-	tp = ccv->ccvc.tcp;
+	tp = ccv->tp;
 	if (tcp_bblogging_on(tp)) {
 		union tcp_log_stackspecific log;
 		struct timeval tv;
@@ -168,7 +168,8 @@ cubic_does_slow_start(struct cc_var *ccv, struct cubic *cubicd)
 	 * doesn't rely on tcpcb vars.
 	 */
 	u_int cw = CCV(ccv, snd_cwnd);
-	u_int incr = CCV(ccv, t_maxseg);
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
+	u_int incr = mss;
 	uint16_t abc_val;
 
 	cubicd->flags |= CUBICFLAG_IN_SLOWSTART;
@@ -216,10 +217,9 @@ cubic_does_slow_start(struct cc_var *ccv, struct cubic *cubicd)
 	}
 	if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max))
 		incr = min(ccv->bytes_this_ack,
-			   ccv->nsegs * abc_val *
-			   CCV(ccv, t_maxseg));
+			   ccv->nsegs * abc_val * mss);
 	else
-		incr = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg));
+		incr = min(ccv->bytes_this_ack, mss);
 
 	/* Only if Hystart is enabled will the flag get set */
 	if (cubicd->flags & CUBICFLAG_HYSTART_IN_CSS) {
@@ -236,9 +236,11 @@ static void
 cubic_ack_received(struct cc_var *ccv, ccsignal_t type)
 {
 	struct cubic *cubic_data;
-	unsigned long W_est, W_cubic;
+	uint32_t W_est, W_cubic, cwin, target, incr;
 	int usecs_since_epoch;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
+	cwin = CCV(ccv, snd_cwnd);
 	cubic_data = ccv->cc_data;
 	cubic_record_rtt(ccv);
 
@@ -249,7 +251,7 @@ cubic_ack_received(struct cc_var *ccv, ccsignal_t type)
 	if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) &&
 	    (ccv->flags & CCF_CWND_LIMITED)) {
 		 /* Use the logic in NewReno ack_received() for slow start. */
-		if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) ||
+		if (cwin <= CCV(ccv, snd_ssthresh) ||
 		    cubic_data->min_rtt_usecs == TCPTV_SRTTBASE) {
 			cubic_does_slow_start(ccv, cubic_data);
 		} else {
@@ -264,21 +266,32 @@ cubic_ack_received(struct cc_var *ccv, ccsignal_t type)
 				cubic_data->flags &= ~CUBICFLAG_HYSTART_ENABLED;
 				cubic_log_hystart_event(ccv, cubic_data, 11, CCV(ccv, snd_ssthresh));
 			}
-			if ((cubic_data->flags & CUBICFLAG_RTO_EVENT) &&
-			    (cubic_data->flags & CUBICFLAG_IN_SLOWSTART)) {
-				/* RFC8312 Section 4.7 */
-				cubic_data->flags &= ~(CUBICFLAG_RTO_EVENT |
-						       CUBICFLAG_IN_SLOWSTART);
-				cubic_data->W_max = CCV(ccv, snd_cwnd);
-				cubic_data->t_epoch = ticks;
-				cubic_data->K = 0;
-			} else if (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART |
+			if (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART |
+						 CUBICFLAG_CONG_EVENT   |
 						 CUBICFLAG_IN_APPLIMIT)) {
+				/*
+				 * At the beginning of the current congestion
+				 * avoidance stage, The epoch variables
+				 * (t_epoch, cwnd_epoch, K) are updated in the
+				 * following three cases:
+				 * 1) just exited the slow start
+				 * 2) after a congestion event
+				 * 3) application-limited
+				 */
+				cubic_data->t_epoch = ticks;
+				cubic_data->cwnd_epoch = cwin;
+				cubic_data->K = cubic_k(cubic_data->W_max / mss,
+							cubic_data->cwnd_epoch / mss);
 				cubic_data->flags &= ~(CUBICFLAG_IN_SLOWSTART |
+						       CUBICFLAG_CONG_EVENT   |
 						       CUBICFLAG_IN_APPLIMIT);
-				cubic_data->t_epoch = ticks;
-				cubic_data->K = cubic_k(cubic_data->W_max /
-							CCV(ccv, t_maxseg));
+
+				if (cubic_data->flags & CUBICFLAG_RTO_EVENT) {
+					/* RFC9438 Section 4.8: Timeout */
+					cubic_data->flags &= ~CUBICFLAG_RTO_EVENT;
+					cubic_data->W_max = cwin;
+					cubic_data->K = 0;
+				}
 			}
 			usecs_since_epoch = (ticks - cubic_data->t_epoch) * tick;
 			if (usecs_since_epoch < 0) {
@@ -288,52 +301,35 @@ cubic_ack_received(struct cc_var *ccv, ccsignal_t type)
 				usecs_since_epoch = INT_MAX;
 				cubic_data->t_epoch = ticks - INT_MAX;
 			}
+			W_est = tf_cwnd(ccv);
 			/*
-			 * The mean RTT is used to best reflect the equations in
-			 * the I-D. Using min_rtt in the tf_cwnd calculation
-			 * causes W_est to grow much faster than it should if the
-			 * RTT is dominated by network buffering rather than
-			 * propagation delay.
+			 * The mean RTT is used to best reflect the equations.
 			 */
-			W_est = tf_cwnd(usecs_since_epoch, cubic_data->mean_rtt_usecs,
-				       cubic_data->W_max, CCV(ccv, t_maxseg));
-
 			W_cubic = cubic_cwnd(usecs_since_epoch +
 					     cubic_data->mean_rtt_usecs,
 					     cubic_data->W_max,
-					     CCV(ccv, t_maxseg),
+					     mss,
 					     cubic_data->K);
 
-			ccv->flags &= ~CCF_ABC_SENTAWND;
-
 			if (W_cubic < W_est) {
+				/* RFC9438 Section 4.3: Reno-friendly region */
+				CCV(ccv, snd_cwnd) = W_est;
+				cubic_data->flags |= CUBICFLAG_IN_TF;
+			} else {
 				/*
-				 * TCP-friendly region, follow tf
-				 * cwnd growth.
-				 */
-				if (CCV(ccv, snd_cwnd) < W_est)
-					CCV(ccv, snd_cwnd) = ulmin(W_est, INT_MAX);
-			} else if (CCV(ccv, snd_cwnd) < W_cubic) {
-				/*
-				 * Concave or convex region, follow CUBIC
-				 * cwnd growth.
-				 * Only update snd_cwnd, if it doesn't shrink.
+				 * RFC9438 Section 4.4 or 4.5:
+				 * Concave or Convex Region
 				 */
-				CCV(ccv, snd_cwnd) = ulmin(W_cubic, INT_MAX);
-			}
-
-			/*
-			 * If we're not in slow start and we're probing for a
-			 * new cwnd limit at the start of a connection
-			 * (happens when hostcache has a relevant entry),
-			 * keep updating our current estimate of the
-			 * W_max.
-			 */
-			if (((cubic_data->flags & CUBICFLAG_CONG_EVENT) == 0) &&
-			    cubic_data->W_max < CCV(ccv, snd_cwnd)) {
-				cubic_data->W_max = CCV(ccv, snd_cwnd);
-				cubic_data->K = cubic_k(cubic_data->W_max /
-				    CCV(ccv, t_maxseg));
+				if (W_cubic < cwin) {
+					target = cwin;
+				} else if (W_cubic > ((cwin * 3) >> 1)) {
+					target = (cwin * 3) >> 1;
+				} else {
+					target = W_cubic;
+				}
+				incr = (((target - cwin) << CUBIC_SHIFT) /
+					cwin * mss) >> CUBIC_SHIFT;
+				CCV(ccv, snd_cwnd) = cwin + incr;
 			}
 		}
 	} else if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) &&
@@ -350,12 +346,11 @@ cubic_ack_received(struct cc_var *ccv, ccsignal_t type)
 static void
 cubic_after_idle(struct cc_var *ccv)
 {
-	struct cubic *cubic_data;
-
-	cubic_data = ccv->cc_data;
+	struct cubic *cubic_data = ccv->cc_data;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	cubic_data->W_max = ulmax(cubic_data->W_max, CCV(ccv, snd_cwnd));
-	cubic_data->K = cubic_k(cubic_data->W_max / CCV(ccv, t_maxseg));
+	cubic_data->K = cubic_k(cubic_data->W_max / mss, cubic_data->cwnd_epoch / mss);
 	if ((cubic_data->flags & CUBICFLAG_HYSTART_ENABLED) == 0) {
 		/*
 		 * Re-enable hystart if we have been idle.
@@ -385,7 +380,7 @@ cubic_cb_init(struct cc_var *ccv, void *ptr)
 {
 	struct cubic *cubic_data;
 
-	INP_WLOCK_ASSERT(tptoinpcb(ccv->ccvc.tcp));
+	INP_WLOCK_ASSERT(tptoinpcb(ccv->tp));
 	if (ptr == NULL) {
 		cubic_data = malloc(sizeof(struct cubic), M_CC_MEM, M_NOWAIT|M_ZERO);
 		if (cubic_data == NULL)
@@ -394,7 +389,9 @@ cubic_cb_init(struct cc_var *ccv, void *ptr)
 		cubic_data = ptr;
 
 	/* Init some key variables with sensible defaults. */
-	cubic_data->t_epoch = ticks;
+	cubic_data->t_epoch = 0;
+	cubic_data->cwnd_epoch = 0;
+	cubic_data->K = 0;
 	cubic_data->min_rtt_usecs = TCPTV_SRTTBASE;
 	cubic_data->mean_rtt_usecs = 1;
 
@@ -421,10 +418,10 @@ static void
 cubic_cong_signal(struct cc_var *ccv, ccsignal_t type)
 {
 	struct cubic *cubic_data;
-	uint32_t mss, pipe;
+	uint32_t mss, pipe, ssthresh;
 
 	cubic_data = ccv->cc_data;
-	mss = tcp_fixed_maxseg(ccv->ccvc.tcp);
+	mss = tcp_fixed_maxseg(ccv->tp);
 
 	switch (type) {
 	case CC_NDUPACK:
@@ -436,10 +433,13 @@ cubic_cong_signal(struct cc_var *ccv, ccsignal_t type)
 		}
 		if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) {
 			if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) {
-				cubic_ssthresh_update(ccv, mss);
+				ssthresh = cubic_get_ssthresh(ccv, mss);
+				CCV(ccv, snd_ssthresh) = max(ssthresh, 2 * mss);
+				/*
+				 * The congestion flag will recalculate K at the
+				 * beginning of the congestion avoidance stage.
+				 */
 				cubic_data->flags |= CUBICFLAG_CONG_EVENT;
-				cubic_data->t_epoch = ticks;
-				cubic_data->K = cubic_k(cubic_data->W_max / mss);
 			}
 			ENTER_RECOVERY(CCV(ccv, t_flags));
 		}
@@ -453,17 +453,20 @@ cubic_cong_signal(struct cc_var *ccv, ccsignal_t type)
 			cubic_log_hystart_event(ccv, cubic_data, 9, CCV(ccv, snd_ssthresh));
 		}
 		if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) {
-			cubic_ssthresh_update(ccv, mss);
+			ssthresh = cubic_get_ssthresh(ccv, mss);
+			CCV(ccv, snd_ssthresh) = max(ssthresh, 2 * mss);
+			CCV(ccv, snd_cwnd) = max(ssthresh, mss);
+			/*
+			 * The congestion flag will recalculate K at the
+			 * beginning of the congestion avoidance stage.
+			 */
 			cubic_data->flags |= CUBICFLAG_CONG_EVENT;
-			cubic_data->t_epoch = ticks;
-			cubic_data->K = cubic_k(cubic_data->W_max / mss);
-			CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
 			ENTER_CONGRECOVERY(CCV(ccv, t_flags));
 		}
 		break;
 
 	case CC_RTO:
-		/* RFC8312 Section 4.7 */
+		/* RFC9438 Section 4.8: Timeout */
 		if (CCV(ccv, t_rxtshift) == 1) {
 			/*
 			 * Remember the state only for the first RTO event. This
@@ -473,34 +476,25 @@ cubic_cong_signal(struct cc_var *ccv, ccsignal_t type)
 			 */
 			cubic_data->undo_t_epoch = cubic_data->t_epoch;
 			cubic_data->undo_cwnd_epoch = cubic_data->cwnd_epoch;
-			cubic_data->undo_W_est = cubic_data->W_est;
-			cubic_data->undo_cwnd_prior = cubic_data->cwnd_prior;
 			cubic_data->undo_W_max = cubic_data->W_max;
 			cubic_data->undo_K = cubic_data->K;
-			if (V_tcp_do_newsack) {
-				pipe = tcp_compute_pipe(ccv->ccvc.tcp);
-			} else {
-				pipe = CCV(ccv, snd_max) -
-					CCV(ccv, snd_fack) +
-					CCV(ccv, sackhint.sack_bytes_rexmit);
-			}
+			pipe = tcp_compute_pipe(ccv->tp);
 			CCV(ccv, snd_ssthresh) = max(2,
 				(((uint64_t)min(CCV(ccv, snd_wnd), pipe) *
 				CUBIC_BETA) >> CUBIC_SHIFT) / mss) * mss;
 		}
-		cubic_data->flags |= CUBICFLAG_CONG_EVENT | CUBICFLAG_RTO_EVENT;
-		cubic_data->undo_W_max = cubic_data->W_max;
-		cubic_data->num_cong_events++;
+		/*
+		 * The RTO flag will recalculate K at the
+		 * beginning of the congestion avoidance stage.
+		 */
+		cubic_data->flags |= CUBICFLAG_RTO_EVENT;
 		CCV(ccv, snd_cwnd) = mss;
 		break;
 
 	case CC_RTO_ERR:
-		cubic_data->flags &= ~(CUBICFLAG_CONG_EVENT | CUBICFLAG_RTO_EVENT);
-		cubic_data->num_cong_events--;
+		cubic_data->flags &= ~CUBICFLAG_RTO_EVENT;
 		cubic_data->K = cubic_data->undo_K;
-		cubic_data->cwnd_prior = cubic_data->undo_cwnd_prior;
 		cubic_data->W_max = cubic_data->undo_W_max;
-		cubic_data->W_est = cubic_data->undo_W_est;
 		cubic_data->cwnd_epoch = cubic_data->undo_cwnd_epoch;
 		cubic_data->t_epoch = cubic_data->undo_t_epoch;
 		break;
@@ -521,7 +515,7 @@ cubic_conn_init(struct cc_var *ccv)
 	 * this here bad things happen when entries from the TCP hostcache
 	 * get used.
 	 */
-	cubic_data->W_max = CCV(ccv, snd_cwnd);
+	cubic_data->W_max = UINT_MAX;
 }
 
 static int
@@ -538,6 +532,7 @@ cubic_post_recovery(struct cc_var *ccv)
 {
 	struct cubic *cubic_data;
 	int pipe;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	cubic_data = ccv->cc_data;
 	pipe = 0;
@@ -547,26 +542,19 @@ cubic_post_recovery(struct cc_var *ccv)
 		 * If inflight data is less than ssthresh, set cwnd
 		 * conservatively to avoid a burst of data, as suggested in
 		 * the NewReno RFC. Otherwise, use the CUBIC method.
-		 *
-		 * XXXLAS: Find a way to do this without needing curack
 		 */
-		if (V_tcp_do_newsack)
-			pipe = tcp_compute_pipe(ccv->ccvc.tcp);
-		else
-			pipe = CCV(ccv, snd_max) - ccv->curack;
-
+		pipe = tcp_compute_pipe(ccv->tp);
 		if (pipe < CCV(ccv, snd_ssthresh))
 			/*
 			 * Ensure that cwnd does not collapse to 1 MSS under
 			 * adverse conditions. Implements RFC6582
 			 */
-			CCV(ccv, snd_cwnd) = max(pipe, CCV(ccv, t_maxseg)) +
-			    CCV(ccv, t_maxseg);
+			CCV(ccv, snd_cwnd) = max(pipe, mss) + mss;
 		else
 			/* Update cwnd based on beta and adjusted W_max. */
 			CCV(ccv, snd_cwnd) = max(((uint64_t)cubic_data->W_max *
 			    CUBIC_BETA) >> CUBIC_SHIFT,
-			    2 * CCV(ccv, t_maxseg));
+			    2 * mss);
 	}
 
 	/* Calculate the average RTT between congestion epochs. */
@@ -592,7 +580,7 @@ cubic_record_rtt(struct cc_var *ccv)
 	/* Ignore srtt until a min number of samples have been taken. */
 	if (CCV(ccv, t_rttupdated) >= CUBIC_MIN_RTT_SAMPLES) {
 		cubic_data = ccv->cc_data;
-		t_srtt_usecs = tcp_get_srtt(ccv->ccvc.tcp,
+		t_srtt_usecs = tcp_get_srtt(ccv->tp,
 					    TCP_TMR_GRANULARITY_USEC);
 		/*
 		 * Record the current SRTT as our minrtt if it's the smallest
@@ -627,40 +615,36 @@ cubic_record_rtt(struct cc_var *ccv)
 }
 
 /*
- * Update the ssthresh in the event of congestion.
+ * Return the new value for ssthresh in the event of a congestion.
  */
-static void
-cubic_ssthresh_update(struct cc_var *ccv, uint32_t maxseg)
+static uint32_t
+cubic_get_ssthresh(struct cc_var *ccv, uint32_t maxseg)
 {
 	struct cubic *cubic_data;
-	uint32_t ssthresh;
-	uint32_t cwnd;
+	uint32_t cwnd, pipe;
 
 	cubic_data = ccv->cc_data;
 	cwnd = CCV(ccv, snd_cwnd);
 
-	/* Fast convergence heuristic. */
+	/* RFC9438 Section 4.7: Fast convergence */
 	if (cwnd < cubic_data->W_max) {
 		cwnd = ((uint64_t)cwnd * CUBIC_FC_FACTOR) >> CUBIC_SHIFT;
 	}
-	cubic_data->undo_W_max = cubic_data->W_max;
 	cubic_data->W_max = cwnd;
 
-	/*
-	 * On the first congestion event, set ssthresh to cwnd * 0.5
-	 * and reduce W_max to cwnd * beta. This aligns the cubic concave
-	 * region appropriately. On subsequent congestion events, set
-	 * ssthresh to cwnd * beta.
-	 */
-	if ((cubic_data->flags & CUBICFLAG_CONG_EVENT) == 0) {
-		ssthresh = cwnd >> 1;
-		cubic_data->W_max = ((uint64_t)cwnd *
-		    CUBIC_BETA) >> CUBIC_SHIFT;
+	if (cubic_data->flags & CUBICFLAG_IN_TF) {
+		/* If in the TCP friendly region, follow what newreno does. */
+		return (newreno_cc_cwnd_on_multiplicative_decrease(ccv, maxseg));
+
 	} else {
-		ssthresh = ((uint64_t)cwnd *
-		    CUBIC_BETA) >> CUBIC_SHIFT;
+		/*
+		 * RFC9438 Section 4.6: Multiplicative Decrease
+		 * Outside the TCP friendly region, set ssthresh to the size of
+		 * inflight_size * beta.
+		 */
+		pipe = tcp_compute_pipe(ccv->tp);
+		return ((pipe * CUBIC_BETA) >> CUBIC_SHIFT);
 	}
-	CCV(ccv, snd_ssthresh) = max(ssthresh, 2 * maxseg);
 }
 
 static void
diff --git a/sys/netinet/cc/cc_cubic.h b/sys/netinet/cc/cc_cubic.h
index ce6c2a6633d7..c31506d26b00 100644
--- a/sys/netinet/cc/cc_cubic.h
+++ b/sys/netinet/cc/cc_cubic.h
@@ -83,26 +83,28 @@
 #define CUBICFLAG_RTO_EVENT		0x00000008	/* RTO experienced */
 #define CUBICFLAG_HYSTART_ENABLED	0x00000010	/* Hystart++ is enabled */
 #define CUBICFLAG_HYSTART_IN_CSS	0x00000020	/* We are in Hystart++ CSS */
+#define CUBICFLAG_IN_TF			0x00000040	/* We are in TCP friendly region */
 
 /* Kernel only bits */
 #ifdef _KERNEL
 struct cubic {
-	/* CUBIC K in fixed point form with CUBIC_SHIFT worth of precision. */
+	/*
+	 * CUBIC K in fixed point form with CUBIC_SHIFT worth of precision.
+	 * Also means the time period in seconds it takes to increase the
+	 * congestion window size at the beginning of the current congestion
+	 * avoidance stage to W_max.
+	 */
 	int64_t		K;
 	/* Sum of RTT samples across an epoch in usecs. */
 	int64_t		sum_rtt_usecs;
-	/* Size of cwnd just before cwnd was reduced in the last congestion event */
-	uint64_t	W_max;
-	/* An estimate for the congestion window in the Reno-friendly region */
-	uint64_t	W_est;
-	/* The cwnd at the beginning of the current congestion avoidance stage */
-	uint64_t	cwnd_epoch;
-	/*
-	 * Size of cwnd at the time of setting ssthresh most recently,
-	 * either upon exiting the first slow start, or just before cwnd
-	 * was reduced in the last congestion event
-	 */
-	uint64_t	cwnd_prior;
+	/* Size of cwnd (in bytes) just before cwnd was reduced in the last congestion event. */
+	uint32_t	W_max;
+	/* An estimate (in bytes) for the congestion window in the Reno-friendly region */
+	uint32_t	W_est;
+	/* An estimate (in bytes) for the congestion window in the CUBIC region */
+	uint32_t	W_cubic;
+	/* The cwnd (in bytes) at the beginning of the current congestion avoidance stage. */
+	uint32_t	cwnd_epoch;
 	/* various flags */
 	uint32_t	flags;
 	/* Minimum observed rtt in usecs. */
@@ -117,12 +119,8 @@ struct cubic {
 	int		undo_t_epoch;
 	/* Few variables to restore the state after RTO_ERR */
 	int64_t		undo_K;
-	uint64_t	undo_cwnd_prior;
-	uint64_t	undo_W_max;
-	uint64_t	undo_W_est;
-	uint64_t	undo_cwnd_epoch;
-	/* Number of congestion events experienced */
-	uint64_t	num_cong_events;
+	uint32_t	undo_W_max;
+	uint32_t	undo_cwnd_epoch;
 	uint32_t css_baseline_minrtt;
 	uint32_t css_current_round_minrtt;
 	uint32_t css_lastround_minrtt;
@@ -141,60 +139,103 @@ struct cubic {
 extern int hz;
 
 /*
- * Implementation based on the formulae found in the CUBIC Internet Draft
- * "draft-ietf-tcpm-cubic-04".
+ * Implementation based on the formulas in RFC9438.
  *
  */
 
-static __inline float
-theoretical_cubic_k(double wmax_pkts)
+
+/*
+ * Returns K, the time period in seconds it takes to increase the congestion
+ * window size at the beginning of the current congestion avoidance stage to
+ * W_max.
+ */
+static inline float
+theoretical_cubic_k(uint32_t wmax_segs, uint32_t cwnd_epoch_segs)
 {
 	double C;
 
 	C = 0.4;
+	if (wmax_segs <= cwnd_epoch_segs)
+		return 0.0;
 
-	return (pow((wmax_pkts * 0.3) / C, (1.0 / 3.0)) * pow(2, CUBIC_SHIFT));
+	/*
+	 * Figure 2: K = ((W_max - cwnd_epoch) / C)^(1/3)
+	 */
+	return (pow((wmax_segs - cwnd_epoch_segs) / C, (1.0 / 3.0)) * pow(2, CUBIC_SHIFT));
 }
 
-static __inline unsigned long
-theoretical_cubic_cwnd(int ticks_since_epoch, unsigned long wmax, uint32_t smss)
+/*
+ * Returns the congestion window in segments at time t in seconds based on the
+ * cubic increase function, where t is the elapsed time in seconds from the
+ * beginning of the current congestion avoidance stage, as described in RFC9438
+ * Section 4.2.
+ */
+static inline unsigned long
+theoretical_cubic_cwnd(int ticks_elapsed, uint32_t wmax_segs, uint32_t cwnd_epoch_segs)
 {
-	double C, wmax_pkts;
+	double C, t;
+	float K;
 
 	C = 0.4;
-	wmax_pkts = wmax / (double)smss;
+	t = ticks_elapsed / (double)hz;
+	K = theoretical_cubic_k(wmax_segs, cwnd_epoch_segs);
 
-	return (smss * (wmax_pkts +
-	    (C * pow(ticks_since_epoch / (double)hz -
-	    theoretical_cubic_k(wmax_pkts) / pow(2, CUBIC_SHIFT), 3.0))));
+	/*
+	 * Figure 1: W_cubic(t) = C * (t - K)^3 + W_max
+	 */
+	return (C * pow(t - K / pow(2, CUBIC_SHIFT), 3.0) + wmax_segs);
 }
 
-static __inline unsigned long
-theoretical_reno_cwnd(int ticks_since_epoch, int rtt_ticks, unsigned long wmax,
-    uint32_t smss)
+/*
+ * Returns estimated Reno congestion window in segments.
+ */
+static inline unsigned long
+theoretical_reno_cwnd(int ticks_elapsed, int rtt_ticks, uint32_t wmax_segs)
 {
 
-	return ((wmax * 0.5) + ((ticks_since_epoch / (float)rtt_ticks) * smss));
+	return (wmax_segs * 0.5 + ticks_elapsed / (float)rtt_ticks);
 }
 
-static __inline unsigned long
-theoretical_tf_cwnd(int ticks_since_epoch, int rtt_ticks, unsigned long wmax,
-    uint32_t smss)
+/*
+ * Returns an estimate for the congestion window in segments in the
+ * Reno-friendly region -- that is, an estimate for the congestion window of
+ * Reno, as described in RFC9438 Section 4.3, where:
+ * cwnd: Current congestion window in segments.
+ * cwnd_prior: Size of cwnd in segments at the time of setting ssthresh most
+ *             recently, either upon exiting the first slow start or just before
+ *             cwnd was reduced in the last congestion event.
+ * W_est: An estimate for the congestion window in segments in the Reno-friendly
+ *        region -- that is, an estimate for the congestion window of Reno.
+ */
+static inline unsigned long
+theoretical_tf_cwnd(unsigned long W_est, unsigned long segs_acked, unsigned long cwnd,
+    unsigned long cwnd_prior)
 {
+	float cubic_alpha, cubic_beta;
 
-	return ((wmax * 0.7) + ((3 * 0.3) / (2 - 0.3) *
-	    (ticks_since_epoch / (float)rtt_ticks) * smss));
+	/* RFC9438 Section 4.6: The parameter β_cubic SHOULD be set to 0.7. */
+	cubic_beta = 0.7;
+
+	if (W_est >= cwnd_prior)
+		cubic_alpha = 1.0;
+	else
+		cubic_alpha = (3.0 * (1.0 - cubic_beta)) / (1.0 + cubic_beta);
+
+	/*
+	 * Figure 4: W_est = W_est + α_cubic * segments_acked / cwnd
+	 */
+	return (W_est + cubic_alpha * segs_acked / cwnd);
 }
 
 #endif /* !_KERNEL */
 
 /*
  * Compute the CUBIC K value used in the cwnd calculation, using an
- * implementation of eqn 2 in the I-D. The method used
- * here is adapted from Apple Computer Technical Report #KT-32.
+ * implementation mentioned in Figure. 2 of RFC9438.
+ * The method used here is adapted from Apple Computer Technical Report #KT-32.
  */
-static __inline int64_t
-cubic_k(unsigned long wmax_pkts)
+static inline int64_t
+cubic_k(uint32_t wmax_segs, uint32_t cwnd_epoch_segs)
 {
 	int64_t s, K;
 	uint16_t p;
@@ -202,8 +243,13 @@ cubic_k(unsigned long wmax_pkts)
 	K = s = 0;
 	p = 0;
 
-	/* (wmax * beta)/C with CUBIC_SHIFT worth of precision. */
-	s = ((wmax_pkts * ONE_SUB_CUBIC_BETA) << CUBIC_SHIFT) / CUBIC_C_FACTOR;
+	/* Handle the corner case where W_max <= cwnd_epoch */
+	if (wmax_segs <= cwnd_epoch_segs) {
+		return 0;
+	}
+
+	/* (wmax - cwnd_epoch) / C with CUBIC_SHIFT worth of precision. */
+	s = ((wmax_segs - cwnd_epoch_segs) << (2 * CUBIC_SHIFT)) / CUBIC_C_FACTOR;
 
 	/* Rebase s to be between 1 and 1/8 with a shift of CUBIC_SHIFT. */
 	while (s >= 256) {
@@ -224,13 +270,14 @@ cubic_k(unsigned long wmax_pkts)
 }
 
 /*
- * Compute the new cwnd value using an implementation of eqn 1 from the I-D.
+ * Compute and return the new cwnd value in bytes using an implementation
+ * mentioned in Figure. 1 of RFC9438.
  * Thanks to Kip Macy for help debugging this function.
  *
  * XXXLAS: Characterise bounds for overflow.
  */
-static __inline unsigned long
-cubic_cwnd(int usecs_since_epoch, unsigned long wmax, uint32_t smss, int64_t K)
+static inline uint32_t
+cubic_cwnd(int usecs_since_epoch, uint32_t wmax, uint32_t smss, int64_t K)
 {
 	int64_t cwnd;
 
@@ -249,7 +296,7 @@ cubic_cwnd(int usecs_since_epoch, unsigned long wmax, uint32_t smss, int64_t K)
 	cwnd *= (cwnd * cwnd);
 
 	/*
-	 * C(t - K)^3 + wmax
+	 * Figure 1: C * (t - K)^3 + wmax
 	 * The down shift by CUBIC_SHIFT_4 is because cwnd has 4 lots of
 	 * CUBIC_SHIFT included in the value. 3 from the cubing of cwnd above,
 	 * and an extra from multiplying through by CUBIC_C_FACTOR.
@@ -264,46 +311,13 @@ cubic_cwnd(int usecs_since_epoch, unsigned long wmax, uint32_t smss, int64_t K)
 }
 
 /*
- * Compute an approximation of the NewReno cwnd some number of usecs after a
- * congestion event. RTT should be the average RTT estimate for the path
- * measured over the previous congestion epoch and wmax is the value of cwnd at
- * the last congestion event. The "TCP friendly" concept in the CUBIC I-D is
- * rather tricky to understand and it turns out this function is not required.
- * It is left here for reference.
- *
- * XXX: Not used
+ * Compute the "TCP friendly" cwnd by newreno in congestion avoidance state.
  */
-static __inline unsigned long
-reno_cwnd(int usecs_since_epoch, int rtt_usecs, unsigned long wmax,
-    uint32_t smss)
+static inline uint32_t
+tf_cwnd(struct cc_var *ccv)
 {
-
-	/*
-	 * For NewReno, beta = 0.5, therefore: W_tcp(t) = wmax*0.5 + t/RTT
-	 * W_tcp(t) deals with cwnd/wmax in pkts, so because our cwnd is in
-	 * bytes, we have to multiply by smss.
-	 */
-	return (((wmax * RENO_BETA) + (((usecs_since_epoch * smss)
-	    << CUBIC_SHIFT) / rtt_usecs)) >> CUBIC_SHIFT);
-}
-
-/*
- * Compute an approximation of the "TCP friendly" cwnd some number of usecs
- * after a congestion event that is designed to yield the same average cwnd as
- * NewReno while using CUBIC's beta of 0.7. RTT should be the average RTT
- * estimate for the path measured over the previous congestion epoch and wmax is
- * the value of cwnd at the last congestion event.
- */
-static __inline unsigned long
-tf_cwnd(int usecs_since_epoch, int rtt_usecs, unsigned long wmax,
-    uint32_t smss)
-{
-
-	/* Equation 4 of I-D. */
-	return (((wmax * CUBIC_BETA) +
-	    (((THREE_X_PT3 * (unsigned long)usecs_since_epoch *
-	    (unsigned long)smss) << CUBIC_SHIFT) / (TWO_SUB_PT3 * rtt_usecs)))
-	    >> CUBIC_SHIFT);
+	/* newreno is "TCP friendly" */
+	return newreno_cc_cwnd_in_cong_avoid(ccv);
 }
 
 #endif /* _NETINET_CC_CUBIC_H_ */
diff --git a/sys/netinet/cc/cc_dctcp.c b/sys/netinet/cc/cc_dctcp.c
index 374db98c5e60..757bc005edb4 100644
--- a/sys/netinet/cc/cc_dctcp.c
+++ b/sys/netinet/cc/cc_dctcp.c
@@ -108,6 +108,7 @@ dctcp_ack_received(struct cc_var *ccv, ccsignal_t type)
 {
 	struct dctcp *dctcp_data;
 	int bytes_acked = 0;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	dctcp_data = ccv->cc_data;
 
@@ -125,7 +126,7 @@ dctcp_ack_received(struct cc_var *ccv, ccsignal_t type)
 			newreno_cc_ack_received(ccv, type);
 
 		if (type == CC_DUPACK)
-			bytes_acked = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg));
+			bytes_acked = min(ccv->bytes_this_ack, mss);
 
 		if (type == CC_ACK)
 			bytes_acked = ccv->bytes_this_ack;
@@ -138,16 +139,16 @@ dctcp_ack_received(struct cc_var *ccv, ccsignal_t type)
 			//XXRMS: For fluid-model DCTCP, update
 			//cwnd here during for RTT fairness
 			if (!dctcp_data->ece_prev
-			    && bytes_acked > CCV(ccv, t_maxseg)) {
+			    && bytes_acked > mss) {
 				dctcp_data->bytes_ecn +=
-				    (bytes_acked - CCV(ccv, t_maxseg));
+				    (bytes_acked - mss);
 			} else
 				dctcp_data->bytes_ecn += bytes_acked;
 			dctcp_data->ece_prev = 1;
 		} else {
 			if (dctcp_data->ece_prev
-			    && bytes_acked > CCV(ccv, t_maxseg))
-				dctcp_data->bytes_ecn += CCV(ccv, t_maxseg);
+			    && bytes_acked > mss)
+				dctcp_data->bytes_ecn += mss;
 			dctcp_data->ece_prev = 0;
 		}
 		dctcp_data->ece_curr = 0;
@@ -201,7 +202,7 @@ dctcp_cb_init(struct cc_var *ccv, void *ptr)
 {
 	struct dctcp *dctcp_data;
 
-	INP_WLOCK_ASSERT(tptoinpcb(ccv->ccvc.tcp));
+	INP_WLOCK_ASSERT(tptoinpcb(ccv->tp));
 	if (ptr == NULL) {
 		dctcp_data = malloc(sizeof(struct dctcp), M_CC_MEM, M_NOWAIT|M_ZERO);
 		if (dctcp_data == NULL)
@@ -245,7 +246,7 @@ dctcp_cong_signal(struct cc_var *ccv, ccsignal_t type)
 	if (CCV(ccv, t_flags2) & TF2_ECN_PERMIT) {
 		dctcp_data = ccv->cc_data;
 		cwin = CCV(ccv, snd_cwnd);
-		mss = tcp_fixed_maxseg(ccv->ccvc.tcp);
+		mss = tcp_fixed_maxseg(ccv->tp);
 
 		switch (type) {
 		case CC_NDUPACK:
@@ -293,19 +294,13 @@ dctcp_cong_signal(struct cc_var *ccv, ccsignal_t type)
 			break;
 		case CC_RTO:
 			if (CCV(ccv, t_rxtshift) == 1) {
-				if (V_tcp_do_newsack) {
-					pipe = tcp_compute_pipe(ccv->ccvc.tcp);
-				} else {
-					pipe = CCV(ccv, snd_max) -
-						CCV(ccv, snd_fack) +
-						CCV(ccv, sackhint.sack_bytes_rexmit);
-				}
+				pipe = tcp_compute_pipe(ccv->tp);
 				CCV(ccv, snd_ssthresh) = max(2,
 					min(CCV(ccv, snd_wnd), pipe) / 2 / mss) * mss;
 			}
 			CCV(ccv, snd_cwnd) = mss;
 			dctcp_update_alpha(ccv);
-			dctcp_data->save_sndnxt += CCV(ccv, t_maxseg);
+			dctcp_data->save_sndnxt += mss;
 			dctcp_data->num_cong_events++;
 			break;
 		default:
diff --git a/sys/netinet/cc/cc_hd.c b/sys/netinet/cc/cc_hd.c
index 82486563f97e..def1580d8ffb 100644
--- a/sys/netinet/cc/cc_hd.c
+++ b/sys/netinet/cc/cc_hd.c
@@ -59,6 +59,7 @@
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
+#include <sys/prng.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
@@ -77,8 +78,8 @@
 
 #include <netinet/khelp/h_ertt.h>
 
-/* Largest possible number returned by random(). */
-#define	RANDOM_MAX	INT_MAX
+/* Largest possible number returned by prng32(). */
+#define	RANDOM_MAX	UINT32_MAX
 
 static void	hd_ack_received(struct cc_var *ccv, ccsignal_t ack_type);
 static int	hd_mod_init(void);
@@ -128,7 +129,7 @@ should_backoff(int qdly, int maxqdly)
 			p = (RANDOM_MAX / 100) * V_hd_pmax;
 	}
 
-	return (random() < p);
+	return (prng32() < p);
 }
 
 /*
diff --git a/sys/netinet/cc/cc_htcp.c b/sys/netinet/cc/cc_htcp.c
index 41c552a3bfa0..569495144d50 100644
--- a/sys/netinet/cc/cc_htcp.c
+++ b/sys/netinet/cc/cc_htcp.c
@@ -193,6 +193,7 @@ static void
 htcp_ack_received(struct cc_var *ccv, ccsignal_t type)
 {
 	struct htcp *htcp_data;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	htcp_data = ccv->cc_data;
 	htcp_record_rtt(ccv);
@@ -220,7 +221,7 @@ htcp_ack_received(struct cc_var *ccv, ccsignal_t type)
 			if (V_tcp_do_rfc3465) {
 				/* Increment cwnd by alpha segments. */
 				CCV(ccv, snd_cwnd) += htcp_data->alpha *
-				    CCV(ccv, t_maxseg);
+				    mss;
 				ccv->flags &= ~CCF_ABC_SENTAWND;
 			} else
 				/*
@@ -230,8 +231,8 @@ htcp_ack_received(struct cc_var *ccv, ccsignal_t type)
 				 */
 				CCV(ccv, snd_cwnd) += (((htcp_data->alpha <<
 				    HTCP_SHIFT) / (max(1,
-				    CCV(ccv, snd_cwnd) / CCV(ccv, t_maxseg)))) *
-				    CCV(ccv, t_maxseg))  >> HTCP_SHIFT;
+				    CCV(ccv, snd_cwnd) / mss))) *
+				    mss)  >> HTCP_SHIFT;
 		}
 	}
 }
@@ -253,7 +254,7 @@ htcp_cb_init(struct cc_var *ccv, void *ptr)
 {
 	struct htcp *htcp_data;
 
-	INP_WLOCK_ASSERT(tptoinpcb(ccv->ccvc.tcp));
+	INP_WLOCK_ASSERT(tptoinpcb(ccv->tp));
 	if (ptr == NULL) {
 		htcp_data = malloc(sizeof(struct htcp), M_CC_MEM, M_NOWAIT);
 		if (htcp_data == NULL)
@@ -284,7 +285,7 @@ htcp_cong_signal(struct cc_var *ccv, ccsignal_t type)
 	uint32_t mss, pipe;
 
 	htcp_data = ccv->cc_data;
-	mss = tcp_fixed_maxseg(ccv->ccvc.tcp);
+	mss = tcp_fixed_maxseg(ccv->tp);
 
 	switch (type) {
 	case CC_NDUPACK:
@@ -324,13 +325,7 @@ htcp_cong_signal(struct cc_var *ccv, ccsignal_t type)
 
 	case CC_RTO:
 		if (CCV(ccv, t_rxtshift) == 1) {
-			if (V_tcp_do_newsack) {
-				pipe = tcp_compute_pipe(ccv->ccvc.tcp);
-			} else {
-				pipe = CCV(ccv, snd_max) -
-					CCV(ccv, snd_fack) +
-					CCV(ccv, sackhint.sack_bytes_rexmit);
-			}
+			pipe = tcp_compute_pipe(ccv->tp);
 			CCV(ccv, snd_ssthresh) = max(2,
 				min(CCV(ccv, snd_wnd), pipe) / 2 / mss) * mss;
 		}
@@ -370,6 +365,7 @@ htcp_post_recovery(struct cc_var *ccv)
 {
 	int pipe;
 	struct htcp *htcp_data;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	pipe = 0;
 	htcp_data = ccv->cc_data;
@@ -379,25 +375,18 @@ htcp_post_recovery(struct cc_var *ccv)
 		 * If inflight data is less than ssthresh, set cwnd
 		 * conservatively to avoid a burst of data, as suggested in the
 		 * NewReno RFC. Otherwise, use the HTCP method.
-		 *
-		 * XXXLAS: Find a way to do this without needing curack
 		 */
-		if (V_tcp_do_newsack)
-			pipe = tcp_compute_pipe(ccv->ccvc.tcp);
-		else
-			pipe = CCV(ccv, snd_max) - ccv->curack;
-
+		pipe = tcp_compute_pipe(ccv->tp);
 		if (pipe < CCV(ccv, snd_ssthresh))
 			/*
 			 * Ensure that cwnd down not collape to 1 MSS under
 			 * adverse conditions. Implements RFC6582
 			 */
-			CCV(ccv, snd_cwnd) = max(pipe, CCV(ccv, t_maxseg)) +
-			    CCV(ccv, t_maxseg);
+			CCV(ccv, snd_cwnd) = max(pipe, mss) + mss;
 		else
 			CCV(ccv, snd_cwnd) = max(1, ((htcp_data->beta *
-			    htcp_data->prev_cwnd / CCV(ccv, t_maxseg))
-			    >> HTCP_SHIFT)) * CCV(ccv, t_maxseg);
+			    htcp_data->prev_cwnd / mss)
+			    >> HTCP_SHIFT)) * mss;
 	}
 }
 
@@ -451,7 +440,7 @@ htcp_recalc_alpha(struct cc_var *ccv)
 			 */
 			if (V_htcp_rtt_scaling)
 				alpha = max(1, (min(max(HTCP_MINROWE,
-				    (tcp_get_srtt(ccv->ccvc.tcp, TCP_TMR_GRANULARITY_TICKS) << HTCP_SHIFT) /
+				    (tcp_get_srtt(ccv->tp, TCP_TMR_GRANULARITY_TICKS) << HTCP_SHIFT) /
 				    htcp_rtt_ref), HTCP_MAXROWE) * alpha)
 				    >> HTCP_SHIFT);
 
@@ -502,18 +491,18 @@ htcp_record_rtt(struct cc_var *ccv)
 	 * or minrtt is currently equal to its initialised value. Ignore SRTT
 	 * until a min number of samples have been taken.
 	 */
-	if ((tcp_get_srtt(ccv->ccvc.tcp, TCP_TMR_GRANULARITY_TICKS) < htcp_data->minrtt ||
+	if ((tcp_get_srtt(ccv->tp, TCP_TMR_GRANULARITY_TICKS) < htcp_data->minrtt ||
 	    htcp_data->minrtt == TCPTV_SRTTBASE) &&
 	    (CCV(ccv, t_rttupdated) >= HTCP_MIN_RTT_SAMPLES))
-		htcp_data->minrtt = tcp_get_srtt(ccv->ccvc.tcp, TCP_TMR_GRANULARITY_TICKS);
+		htcp_data->minrtt = tcp_get_srtt(ccv->tp, TCP_TMR_GRANULARITY_TICKS);
 
 	/*
 	 * Record the current SRTT as our maxrtt if it's the largest we've
 	 * seen. Ignore SRTT until a min number of samples have been taken.
 	 */
-	if (tcp_get_srtt(ccv->ccvc.tcp, TCP_TMR_GRANULARITY_TICKS) > htcp_data->maxrtt
+	if (tcp_get_srtt(ccv->tp, TCP_TMR_GRANULARITY_TICKS) > htcp_data->maxrtt
 	    && CCV(ccv, t_rttupdated) >= HTCP_MIN_RTT_SAMPLES)
-		htcp_data->maxrtt = tcp_get_srtt(ccv->ccvc.tcp, TCP_TMR_GRANULARITY_TICKS);
+		htcp_data->maxrtt = tcp_get_srtt(ccv->tp, TCP_TMR_GRANULARITY_TICKS);
 }
 
 /*
diff --git a/sys/netinet/cc/cc_module.h b/sys/netinet/cc/cc_module.h
index e01d7a124ad4..606b2b66e7e5 100644
--- a/sys/netinet/cc/cc_module.h
+++ b/sys/netinet/cc/cc_module.h
@@ -43,18 +43,7 @@
 #ifndef _NETINET_CC_MODULE_H_
 #define _NETINET_CC_MODULE_H_
 
-/*
- * Allows a CC algorithm to manipulate a commonly named CC variable regardless
- * of the transport protocol and associated C struct.
- * XXXLAS: Out of action until the work to support SCTP is done.
- *
-#define	CCV(ccv, what)							\
-(*(									\
-	(ccv)->type == IPPROTO_TCP ?	&(ccv)->ccvc.tcp->what :	\
-					&(ccv)->ccvc.sctp->what		\
-))
- */
-#define	CCV(ccv, what) (ccv)->ccvc.tcp->what
+#define	CCV(ccv, what) (ccv)->tp->what
 
 #define	DECLARE_CC_MODULE(ccname, ccalgo) 				\
 	static moduledata_t cc_##ccname = {				\
diff --git a/sys/netinet/cc/cc_newreno.c b/sys/netinet/cc/cc_newreno.c
index aa20e2c64f7d..de7b878152b0 100644
--- a/sys/netinet/cc/cc_newreno.c
+++ b/sys/netinet/cc/cc_newreno.c
@@ -135,7 +135,7 @@ newreno_log_hystart_event(struct cc_var *ccv, struct newreno *nreno, uint8_t mod
 
 	if (hystart_bblogs == 0)
 		return;
-	tp = ccv->ccvc.tcp;
+	tp = ccv->tp;
 	if (tcp_bblogging_on(tp)) {
 		union tcp_log_stackspecific log;
 		struct timeval tv;
@@ -175,7 +175,7 @@ newreno_cb_init(struct cc_var *ccv, void *ptr)
 {
 	struct newreno *nreno;
 
-	INP_WLOCK_ASSERT(tptoinpcb(ccv->ccvc.tcp));
+	INP_WLOCK_ASSERT(tptoinpcb(ccv->tp));
 	if (ptr == NULL) {
 		ccv->cc_data = malloc(sizeof(struct newreno), M_CC_MEM, M_NOWAIT);
 		if (ccv->cc_data == NULL)
@@ -215,12 +215,13 @@ static void
 newreno_ack_received(struct cc_var *ccv, ccsignal_t type)
 {
 	struct newreno *nreno;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	nreno = ccv->cc_data;
 	if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) &&
 	    (ccv->flags & CCF_CWND_LIMITED)) {
 		u_int cw = CCV(ccv, snd_cwnd);
-		u_int incr = CCV(ccv, t_maxseg);
+		u_int incr = mss;
 
 		/*
 		 * Regular in-order ACK, open the congestion window.
@@ -324,10 +325,9 @@ newreno_ack_received(struct cc_var *ccv, ccsignal_t type)
 			}
 			if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max))
 				incr = min(ccv->bytes_this_ack,
-				    ccv->nsegs * abc_val *
-				    CCV(ccv, t_maxseg));
+				    ccv->nsegs * abc_val * mss);
 			else
-				incr = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg));
+				incr = min(ccv->bytes_this_ack, mss);
 
 			/* Only if Hystart is enabled will the flag get set */
 			if (nreno->newreno_flags & CC_NEWRENO_HYSTART_IN_CSS) {
@@ -369,9 +369,9 @@ newreno_cong_signal(struct cc_var *ccv, ccsignal_t type)
 	uint32_t beta, beta_ecn, cwin, factor, mss, pipe;
 
 	cwin = CCV(ccv, snd_cwnd);
-	mss = tcp_fixed_maxseg(ccv->ccvc.tcp);
+	mss = tcp_fixed_maxseg(ccv->tp);
 	nreno = ccv->cc_data;
-	beta = (nreno == NULL) ? V_newreno_beta : nreno->beta;;
+	beta = (nreno == NULL) ? V_newreno_beta : nreno->beta;
 	beta_ecn = (nreno == NULL) ? V_newreno_beta_ecn : nreno->beta_ecn;
 	/*
 	 * Note that we only change the backoff for ECN if the
@@ -428,13 +428,7 @@ newreno_cong_signal(struct cc_var *ccv, ccsignal_t type)
 		break;
 	case CC_RTO:
 		if (CCV(ccv, t_rxtshift) == 1) {
-			if (V_tcp_do_newsack) {
-				pipe = tcp_compute_pipe(ccv->ccvc.tcp);
-			} else {
-				pipe = CCV(ccv, snd_max) -
-					CCV(ccv, snd_fack) +
-					CCV(ccv, sackhint.sack_bytes_rexmit);
-			}
+			pipe = tcp_compute_pipe(ccv->tp);
 			CCV(ccv, snd_ssthresh) = max(2,
 				((uint64_t)min(CCV(ccv, snd_wnd), pipe) *
 				    (uint64_t)factor) /
@@ -456,7 +450,7 @@ newreno_ctl_output(struct cc_var *ccv, struct sockopt *sopt, void *buf)
 	if (sopt->sopt_valsize != sizeof(struct cc_newreno_opts))
 		return (EMSGSIZE);
 
-	if (CC_ALGO(ccv->ccvc.tcp) != &newreno_cc_algo)
+	if (CC_ALGO(ccv->tp) != &newreno_cc_algo)
 		return (ENOPROTOOPT);
 
 	nreno = (struct newreno *)ccv->cc_data;
diff --git a/sys/netinet/cc/cc_vegas.c b/sys/netinet/cc/cc_vegas.c
index ecd42c1a0f53..2e24a717f869 100644
--- a/sys/netinet/cc/cc_vegas.c
+++ b/sys/netinet/cc/cc_vegas.c
@@ -129,6 +129,7 @@ vegas_ack_received(struct cc_var *ccv, ccsignal_t ack_type)
 	struct ertt *e_t;
 	struct vegas *vegas_data;
 	long actual_tx_rate, expected_tx_rate, ndiff;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	e_t = khelp_get_osd(&CCV(ccv, t_osd), ertt_id);
 	vegas_data = ccv->cc_data;
@@ -139,7 +140,7 @@ vegas_ack_received(struct cc_var *ccv, ccsignal_t ack_type)
 			actual_tx_rate = e_t->bytes_tx_in_marked_rtt /
 			    e_t->markedpkt_rtt;
 			ndiff = (expected_tx_rate - actual_tx_rate) *
-			    e_t->minrtt / CCV(ccv, t_maxseg);
+			    e_t->minrtt / mss;
 
 			if (ndiff < V_vegas_alpha) {
 				if (CCV(ccv, snd_cwnd) <=
@@ -150,8 +151,7 @@ vegas_ack_received(struct cc_var *ccv, ccsignal_t ack_type)
 				} else {
 					vegas_data->slow_start_toggle = 0;
 					CCV(ccv, snd_cwnd) =
-					    min(CCV(ccv, snd_cwnd) +
-					    CCV(ccv, t_maxseg),
+					    min(CCV(ccv, snd_cwnd) + mss,
 					    TCP_MAXWIN << CCV(ccv, snd_scale));
 				}
 			} else if (ndiff > V_vegas_beta) {
@@ -184,7 +184,7 @@ vegas_cb_init(struct cc_var *ccv, void *ptr)
 {
 	struct vegas *vegas_data;
 
-	INP_WLOCK_ASSERT(tptoinpcb(ccv->ccvc.tcp));
+	INP_WLOCK_ASSERT(tptoinpcb(ccv->tp));
 	if (ptr == NULL) {
 		vegas_data = malloc(sizeof(struct vegas), M_CC_MEM, M_NOWAIT);
 		if (vegas_data == NULL)
@@ -207,6 +207,7 @@ vegas_cong_signal(struct cc_var *ccv, ccsignal_t signal_type)
 {
 	struct vegas *vegas_data;
 	int presignalrecov;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	vegas_data = ccv->cc_data;
 
@@ -218,8 +219,8 @@ vegas_cong_signal(struct cc_var *ccv, ccsignal_t signal_type)
 	switch((int)signal_type) {
 	case CC_VEGAS_RATE:
 		if (!IN_RECOVERY(CCV(ccv, t_flags))) {
-			CCV(ccv, snd_cwnd) = max(2 * CCV(ccv, t_maxseg),
-			    CCV(ccv, snd_cwnd) - CCV(ccv, t_maxseg));
+			CCV(ccv, snd_cwnd) = max(2 * mss,
+			    CCV(ccv, snd_cwnd) - mss);
 			if (CCV(ccv, snd_cwnd) < CCV(ccv, snd_ssthresh))
 				/* Exit slow start. */
 				CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd);
diff --git a/sys/netinet/dccp.h b/sys/netinet/dccp.h
index 4fb6a0d2ab3e..da83a1b06861 100644
--- a/sys/netinet/dccp.h
+++ b/sys/netinet/dccp.h
@@ -64,7 +64,7 @@ struct dccphdr {
                        uint8_t seq[6];
                } longseq;
        } d_seqno;
-};
+}  __packed;
 
 #define d_seqno_short d_seqno.shortseq;
 #define d_seqno_long d_seqno.longseq.seq;
diff --git a/sys/netinet/icmp6.h b/sys/netinet/icmp6.h
index 4368fd2a0fcf..7845b682f3e4 100644
--- a/sys/netinet/icmp6.h
+++ b/sys/netinet/icmp6.h
@@ -63,6 +63,8 @@
 #ifndef _NETINET_ICMP6_H_
 #define _NETINET_ICMP6_H_
 
+#include <sys/stdint.h>
+
 #define ICMPV6_PLD_MAXLEN	1232	/* IPV6_MMTU - sizeof(struct ip6_hdr)
 					   - sizeof(struct icmp6_hdr) */
 
@@ -307,7 +309,8 @@ struct nd_opt_hdr {		/* Neighbor discovery option header */
 #define ND_OPT_ROUTE_INFO		24	/* RFC 4191 */
 #define ND_OPT_RDNSS			25	/* RFC 6106 */
 #define ND_OPT_DNSSL			31	/* RFC 6106 */
-#define ND_OPT_MAX			31
+#define ND_OPT_PREF64			38	/* RFC 8781 */
+#define ND_OPT_MAX			38
 
 struct nd_opt_prefix_info {	/* prefix information */
 	u_int8_t	nd_opt_pi_type;
@@ -373,6 +376,14 @@ struct nd_opt_dnssl {		/* DNSSL option (RFC 6106) */
 	/* followed by list of DNS search domains */
 } __packed;
 
+struct nd_opt_pref64 {		/* PREF64 option (RFC 8781) */
+	uint8_t		nd_opt_pref64_type;
+	uint8_t		nd_opt_pref64_len;
+	/* bits 0-12 are the SL, bits 13-15 are the PLC */
+	uint16_t	nd_opt_pref64_sl_plc;
+	char		nd_opt_prefix[12];
+} __packed;
+
 /*
  * icmp6 namelookup
  */
@@ -641,7 +652,7 @@ VNET_PCPUSTAT_DECLARE(struct icmp6stat, icmp6stat);
 #define ICMP6STAT_INC2(name, type)                                       \
 	do {                                                             \
 		MIB_SDT_PROBE2(icmp6, count, name, 1, type);             \
-		VNET_PCPUSTAT_ADD(struct icmp6stat, icmp6stat, name, 1); \
+		VNET_PCPUSTAT_ADD(struct icmp6stat, icmp6stat, name[type], 1); \
 	} while (0)
 
 /*
diff --git a/sys/netinet/icmp_var.h b/sys/netinet/icmp_var.h
index b1f2b0ebf911..d6b75e482e35 100644
--- a/sys/netinet/icmp_var.h
+++ b/sys/netinet/icmp_var.h
@@ -104,11 +104,10 @@ extern int badport_bandlim(int);
 #define BANDLIM_ICMP_UNREACH 0
 #define BANDLIM_ICMP_ECHO 1
 #define BANDLIM_ICMP_TSTAMP 2
-#define BANDLIM_RST_CLOSEDPORT 3 /* No connection, and no listeners */
-#define BANDLIM_RST_OPENPORT 4   /* No connection, listener */
-#define BANDLIM_ICMP6_UNREACH 5
-#define BANDLIM_SCTP_OOTB 6
-#define BANDLIM_MAX 7
+#define BANDLIM_TCP_RST 3
+#define BANDLIM_ICMP6_UNREACH 4
+#define BANDLIM_SCTP_OOTB 5
+#define BANDLIM_MAX 6
 #endif
 
 #endif
diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c
index 543e6a3922fb..dc6ef343662d 100644
--- a/sys/netinet/if_ether.c
+++ b/sys/netinet/if_ether.c
@@ -56,6 +56,7 @@
 #include <net/if_dl.h>
 #include <net/if_private.h>
 #include <net/if_types.h>
+#include <net/if_bridgevar.h>
 #include <net/netisr.h>
 #include <net/ethernet.h>
 #include <net/route.h>
@@ -155,11 +156,12 @@ SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_log_per_second,
  */
 #define MAX_GARP_RETRANSMITS 16
 static int sysctl_garp_rexmit(SYSCTL_HANDLER_ARGS);
-static int garp_rexmit_count = 0; /* GARP retransmission setting. */
+VNET_DEFINE_STATIC(int, garp_rexmit_count) = 0; /* GARP retransmission setting. */
+#define	V_garp_rexmit_count	VNET(garp_rexmit_count)
 
 SYSCTL_PROC(_net_link_ether_inet, OID_AUTO, garp_rexmit_count,
-    CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_MPSAFE,
-    &garp_rexmit_count, 0, sysctl_garp_rexmit, "I",
+    CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_MPSAFE|CTLFLAG_VNET,
+    &VNET_NAME(garp_rexmit_count), 0, sysctl_garp_rexmit, "I",
     "Number of times to retransmit GARP packets;"
     " 0 to disable, maximum of 16");
 
@@ -831,7 +833,7 @@ in_arpinput(struct mbuf *m)
 	 * when we have clusters of interfaces).
 	 */
 	CK_LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
-		if (((bridged && ia->ia_ifp->if_bridge == ifp->if_bridge) ||
+		if (((bridged && bridge_same_p(ia->ia_ifp->if_bridge, ifp->if_bridge)) ||
 		    ia->ia_ifp == ifp) &&
 		    itaddr.s_addr == ia->ia_addr.sin_addr.s_addr &&
 		    (ia->ia_ifa.ifa_carp == NULL ||
@@ -841,7 +843,7 @@ in_arpinput(struct mbuf *m)
 		}
 	}
 	CK_LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash)
-		if (((bridged && ia->ia_ifp->if_bridge == ifp->if_bridge) ||
+		if (((bridged && bridge_same_p(ia->ia_ifp->if_bridge, ifp->if_bridge)) ||
 		    ia->ia_ifp == ifp) &&
 		    isaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
 			ifa_ref(&ia->ia_ifa);
@@ -849,7 +851,7 @@ in_arpinput(struct mbuf *m)
 		}
 
 #define BDG_MEMBER_MATCHES_ARP(addr, ifp, ia)				\
-  (ia->ia_ifp->if_bridge == ifp->if_softc &&				\
+  (bridge_get_softc_p(ia->ia_ifp) == ifp->if_softc &&			\
   !bcmp(IF_LLADDR(ia->ia_ifp), IF_LLADDR(ifp), ifp->if_addrlen) &&	\
   addr == ia->ia_addr.sin_addr.s_addr)
 	/*
@@ -1352,6 +1354,7 @@ sysctl_garp_rexmit(SYSCTL_HANDLER_ARGS)
 static void
 garp_rexmit(void *arg)
 {
+	struct epoch_tracker et;
 	struct in_ifaddr *ia = arg;
 
 	if (callout_pending(&ia->ia_garp_timer) ||
@@ -1361,6 +1364,7 @@ garp_rexmit(void *arg)
 		return;
 	}
 
+	NET_EPOCH_ENTER(et);
 	CURVNET_SET(ia->ia_ifa.ifa_ifp->if_vnet);
 
 	/*
@@ -1377,7 +1381,7 @@ garp_rexmit(void *arg)
 	 * the callout to retransmit another GARP packet.
 	 */
 	++ia->ia_garp_count;
-	if (ia->ia_garp_count >= garp_rexmit_count) {
+	if (ia->ia_garp_count >= V_garp_rexmit_count) {
 		ifa_free(&ia->ia_ifa);
 	} else {
 		int rescheduled;
@@ -1392,6 +1396,7 @@ garp_rexmit(void *arg)
 	}
 
 	CURVNET_RESTORE();
+	NET_EPOCH_EXIT(et);
 }
 
 /*
@@ -1444,7 +1449,7 @@ arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
 	NET_EPOCH_ENTER(et);
 	arp_announce_ifaddr(ifp, dst_in->sin_addr, IF_LLADDR(ifp));
 	NET_EPOCH_EXIT(et);
-	if (garp_rexmit_count > 0) {
+	if (V_garp_rexmit_count > 0) {
 		garp_timer_start(ifa);
 	}
 
@@ -1506,7 +1511,7 @@ vnet_arp_init(void)
 #endif
 }
 VNET_SYSINIT(vnet_arp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND,
-    vnet_arp_init, 0);
+    vnet_arp_init, NULL);
 
 #ifdef VIMAGE
 /*
diff --git a/sys/netinet/igmp.c b/sys/netinet/igmp.c
index 454668c2aadc..299f3c2e02bb 100644
--- a/sys/netinet/igmp.c
+++ b/sys/netinet/igmp.c
@@ -402,32 +402,43 @@ out:
 static int
 sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS)
 {
+	struct epoch_tracker	 et;
 	int	 error;
 	int	 new;
+	struct igmp_ifsoftc *igi;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error)
 		return (error);
 
-	IGMP_LOCK();
-
 	new = V_igmp_default_version;
 
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error || !req->newptr)
-		goto out_locked;
+		return (error);
 
-	if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
-		error = EINVAL;
-		goto out_locked;
-	}
+	if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3)
+		return (EINVAL);
+
+	IN_MULTI_LIST_LOCK();
+	IGMP_LOCK();
+	NET_EPOCH_ENTER(et);
 
-	CTR2(KTR_IGMPV3, "change igmp_default_version from %d to %d",
-	     V_igmp_default_version, new);
+	if (V_igmp_default_version != new) {
+		CTR2(KTR_IGMPV3, "change igmp_default_version from %d to %d",
+			V_igmp_default_version, new);
 
-	V_igmp_default_version = new;
+		V_igmp_default_version = new;
 
-out_locked:
+		LIST_FOREACH(igi, &V_igi_head, igi_link) {
+			if (igi->igi_version > V_igmp_default_version){
+				igmp_set_version(igi, V_igmp_default_version);
+			}
+		}
+	}
+
+	NET_EPOCH_EXIT(et);
+	IN_MULTI_LIST_UNLOCK();
 	IGMP_UNLOCK();
 	return (error);
 }
@@ -1471,6 +1482,7 @@ igmp_input(struct mbuf **mp, int *offp, int proto)
 	m = *mp;
 	ifp = m->m_pkthdr.rcvif;
 	*mp = NULL;
+	M_ASSERTMAPPED(m);
 
 	IGMPSTAT_INC(igps_rcv_total);
 
diff --git a/sys/netinet/in.c b/sys/netinet/in.c
index 940b197d9e95..963449d4b4b1 100644
--- a/sys/netinet/in.c
+++ b/sys/netinet/in.c
@@ -57,6 +57,7 @@
 #include <net/if_llatbl.h>
 #include <net/if_private.h>
 #include <net/if_types.h>
+#include <net/if_bridgevar.h>
 #include <net/route.h>
 #include <net/route/nhop.h>
 #include <net/route/route_ctl.h>
@@ -102,13 +103,13 @@ VNET_DEFINE(bool, ip_allow_net240) = false;
 #define	V_ip_allow_net240		VNET(ip_allow_net240)
 SYSCTL_BOOL(_net_inet_ip, OID_AUTO, allow_net240,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_allow_net240), 0,
-	"Allow use of Experimental addresses, aka Class E (240/4)");
+	"Allow forwarding of and ICMP response to Experimental addresses, aka Class E (240/4)");
 /* see https://datatracker.ietf.org/doc/draft-schoen-intarea-unicast-240 */
 
 VNET_DEFINE(bool, ip_allow_net0) = false;
 SYSCTL_BOOL(_net_inet_ip, OID_AUTO, allow_net0,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_allow_net0), 0,
-	"Allow use of addresses in network 0/8");
+	"Allow forwarding of and ICMP response to addresses in network 0/8");
 /* see https://datatracker.ietf.org/doc/draft-schoen-intarea-unicast-0 */
 
 VNET_DEFINE(uint32_t, in_loopback_mask) = IN_LOOPBACK_MASK_DFLT;
@@ -127,10 +128,10 @@ static struct sx in_control_sx;
 SX_SYSINIT(in_control_sx, &in_control_sx, "in_control");
 
 /*
- * Return 1 if an internet address is for a ``local'' host
+ * Return true if an internet address is for a ``local'' host
  * (one to which we have a connection).
  */
-int
+bool
 in_localaddr(struct in_addr in)
 {
 	u_long i = ntohl(in.s_addr);
@@ -140,14 +141,14 @@ in_localaddr(struct in_addr in)
 
 	CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 		if ((i & ia->ia_subnetmask) == ia->ia_subnet)
-			return (1);
+			return (true);
 	}
 
-	return (0);
+	return (false);
 }
 
 /*
- * Return 1 if an internet address is for the local host and configured
+ * Return true if an internet address is for the local host and configured
  * on one of its interfaces.
  */
 bool
@@ -185,9 +186,9 @@ in_localip_fib(struct in_addr in, uint16_t fib)
 }
 
 /*
- * Return 1 if an internet address is configured on an interface.
+ * Return true if an internet address is configured on an interface.
  */
-int
+bool
 in_ifhasaddr(struct ifnet *ifp, struct in_addr in)
 {
 	struct ifaddr *ifa;
@@ -200,10 +201,10 @@ in_ifhasaddr(struct ifnet *ifp, struct in_addr in)
 			continue;
 		ia = (struct in_ifaddr *)ifa;
 		if (ia->ia_addr.sin_addr.s_addr == in.s_addr)
-			return (1);
+			return (true);
 	}
 
-	return (0);
+	return (false);
 }
 
 /*
@@ -271,18 +272,19 @@ in_findlocal(uint32_t fibnum, bool loopback_ok)
  * that may not be forwarded, or whether datagrams to that destination
  * may be forwarded.
  */
-int
+bool
 in_canforward(struct in_addr in)
 {
 	u_long i = ntohl(in.s_addr);
 
-	if (IN_MULTICAST(i) || IN_LINKLOCAL(i) || IN_LOOPBACK(i))
-		return (0);
+	if (IN_MULTICAST(i) || IN_LINKLOCAL(i) || IN_LOOPBACK(i) ||
+	    in_nullhost(in))
+		return (false);
 	if (IN_EXPERIMENTAL(i) && !V_ip_allow_net240)
-		return (0);
+		return (false);
 	if (IN_ZERONET(i) && !V_ip_allow_net0)
-		return (0);
-	return (1);
+		return (false);
+	return (true);
 }
 
 /*
@@ -442,6 +444,27 @@ in_control_ioctl(u_long cmd, void *data, struct ifnet *ifp,
 }
 
 int
+in_mask2len(struct in_addr *mask)
+{
+	int x, y;
+	u_char *p;
+
+	p = (u_char *)mask;
+	for (x = 0; x < sizeof(*mask); x++) {
+		if (p[x] != 0xff)
+			break;
+	}
+	y = 0;
+	if (x < sizeof(*mask)) {
+		for (y = 0; y < 8; y++) {
+			if ((p[x] & (0x80 >> y)) == 0)
+				break;
+		}
+	}
+	return (x * 8 + y);
+}
+
+int
 in_control(struct socket *so, u_long cmd, void *data, struct ifnet *ifp,
     struct thread *td)
 {
@@ -497,6 +520,13 @@ in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct ucred *cred
 #endif
 
 	/*
+	 * Check if bridge wants to allow adding addrs to member interfaces.
+	 */
+	if (ifp->if_bridge && bridge_member_ifaddrs_p &&
+	    !bridge_member_ifaddrs_p())
+		return (EINVAL);
+
+	/*
 	 * See whether address already exist.
 	 */
 	iaIsFirst = true;
@@ -1222,7 +1252,7 @@ in_ifscrub_all(void)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa, *nifa;
-	struct ifaliasreq ifr;
+	struct ifreq ifr;
 
 	IFNET_RLOCK();
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
@@ -1237,9 +1267,7 @@ in_ifscrub_all(void)
 			 * cleanly remove addresses and everything attached.
 			 */
 			bzero(&ifr, sizeof(ifr));
-			ifr.ifra_addr = *ifa->ifa_addr;
-			if (ifa->ifa_dstaddr)
-			ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
+			ifr.ifr_addr = *ifa->ifa_addr;
 			(void)in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr,
 			    ifp, NULL);
 		}
@@ -1250,7 +1278,7 @@ in_ifscrub_all(void)
 	IFNET_RUNLOCK();
 }
 
-int
+bool
 in_ifaddr_broadcast(struct in_addr in, struct in_ifaddr *ia)
 {
 
@@ -1259,7 +1287,8 @@ in_ifaddr_broadcast(struct in_addr in, struct in_ifaddr *ia)
 	      * Optionally check for old-style (host 0) broadcast, but
 	      * taking into account that RFC 3021 obsoletes it.
 	      */
-	    (V_broadcast_lowest && ia->ia_subnetmask != IN_RFC3021_MASK &&
+	    __predict_false(V_broadcast_lowest &&
+	    ia->ia_subnetmask != IN_RFC3021_MASK &&
 	    ntohl(in.s_addr) == ia->ia_subnet)) &&
 	     /*
 	      * Check for an all one subnetmask. These
@@ -1270,33 +1299,28 @@ in_ifaddr_broadcast(struct in_addr in, struct in_ifaddr *ia)
 }
 
 /*
- * Return 1 if the address might be a local broadcast address.
+ * Return true if the address might be a local broadcast address.
  */
-int
-in_broadcast(struct in_addr in, struct ifnet *ifp)
+bool
+in_ifnet_broadcast(struct in_addr in, struct ifnet *ifp)
 {
 	struct ifaddr *ifa;
-	int found;
 
 	NET_EPOCH_ASSERT();
 
-	if (in.s_addr == INADDR_BROADCAST ||
-	    in.s_addr == INADDR_ANY)
-		return (1);
+	if (in_broadcast(in))
+		return (true);
 	if ((ifp->if_flags & IFF_BROADCAST) == 0)
-		return (0);
-	found = 0;
+		return (false);
 	/*
 	 * Look through the list of addresses for a match
 	 * with a broadcast address.
 	 */
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (ifa->ifa_addr->sa_family == AF_INET &&
-		    in_ifaddr_broadcast(in, (struct in_ifaddr *)ifa)) {
-			found = 1;
-			break;
-		}
-	return (found);
+		    in_ifaddr_broadcast(in, (struct in_ifaddr *)ifa))
+			return (true);
+	return (false);
 }
 
 /*
@@ -1473,9 +1497,6 @@ in_lltable_new(struct in_addr addr4, u_int flags)
 	return (&lle->base);
 }
 
-#define IN_ARE_MASKED_ADDR_EQUAL(d, a, m)	(		\
-	((((d).s_addr ^ (a).s_addr) & (m).s_addr)) == 0 )
-
 static int
 in_lltable_match_prefix(const struct sockaddr *saddr,
     const struct sockaddr *smask, u_int flags, struct llentry *lle)
diff --git a/sys/netinet/in.h b/sys/netinet/in.h
index f4fc41178399..3f2c388548ec 100644
--- a/sys/netinet/in.h
+++ b/sys/netinet/in.h
@@ -672,13 +672,13 @@ int	getsourcefilter(int, uint32_t, struct sockaddr *, socklen_t,
 struct ifnet; struct mbuf;	/* forward declarations for Standard C */
 struct in_ifaddr;
 
-int	 in_broadcast(struct in_addr, struct ifnet *);
-int	 in_ifaddr_broadcast(struct in_addr, struct in_ifaddr *);
-int	 in_canforward(struct in_addr);
-int	 in_localaddr(struct in_addr);
+bool	 in_ifnet_broadcast(struct in_addr, struct ifnet *);
+bool	 in_ifaddr_broadcast(struct in_addr, struct in_ifaddr *);
+bool	 in_canforward(struct in_addr);
+bool	 in_localaddr(struct in_addr);
 bool	 in_localip(struct in_addr);
 bool	 in_localip_fib(struct in_addr, uint16_t);
-int	 in_ifhasaddr(struct ifnet *, struct in_addr);
+bool	 in_ifhasaddr(struct ifnet *, struct in_addr);
 struct in_ifaddr *in_findlocal(uint32_t, bool);
 int	 inet_aton(const char *, struct in_addr *); /* in libkern */
 char	*inet_ntoa_r(struct in_addr ina, char *buf); /* in libkern */
@@ -686,6 +686,13 @@ char	*inet_ntop(int, const void *, char *, socklen_t); /* in libkern */
 int	 inet_pton(int af, const char *, void *); /* in libkern */
 void	 in_ifdetach(struct ifnet *);
 
+static inline bool
+in_broadcast(struct in_addr in)
+{
+	return (in.s_addr == __htonl(INADDR_BROADCAST) ||
+	    in.s_addr == __htonl(INADDR_ANY));
+}
+
 #define	in_hosteq(s, t)	((s).s_addr == (t).s_addr)
 #define	in_nullhost(x)	((x).s_addr == INADDR_ANY)
 #define	in_allhosts(x)	((x).s_addr == htonl(INADDR_ALLHOSTS_GROUP))
diff --git a/sys/netinet/in_fib_dxr.c b/sys/netinet/in_fib_dxr.c
index e7eede53ea51..b889131b544b 100644
--- a/sys/netinet/in_fib_dxr.c
+++ b/sys/netinet/in_fib_dxr.c
@@ -1,7 +1,7 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
- * Copyright (c) 2012-2022 Marko Zec
+ * Copyright (c) 2012-2024 Marko Zec
  * Copyright (c) 2005, 2018 University of Zagreb
  * Copyright (c) 2005 International Computer Science Institute
  *
@@ -68,9 +68,6 @@
 
 CTASSERT(DXR_TRIE_BITS >= 16 && DXR_TRIE_BITS <= 24);
 
-/* DXR2: two-stage primary trie, instead of a single direct lookup table */
-#define	DXR2
-
 #if DXR_TRIE_BITS > 16
 #define	DXR_D			16
 #else
@@ -317,7 +314,6 @@ range_lookup(struct range_entry_long *rt, struct direct_entry de, uint32_t dst)
 		    ntohl(key.addr4.s_addr))]);				\
 	}
 
-#ifdef DXR2
 #if DXR_TRIE_BITS > 16
 DXR_LOOKUP_DEFINE(16)
 #endif
@@ -328,23 +324,16 @@ DXR_LOOKUP_DEFINE(12)
 DXR_LOOKUP_DEFINE(11)
 DXR_LOOKUP_DEFINE(10)
 DXR_LOOKUP_DEFINE(9)
-#endif /* DXR2 */
 
 static int inline
 dxr_lookup(struct dxr *dxr, uint32_t dst)
 {
 	struct direct_entry de;
-#ifdef DXR2
 	uint16_t *dt = dxr->d;
 	struct direct_entry *xt = dxr->x;
 
 	de = xt[(dt[dst >> dxr->d_shift] << dxr->x_shift) +
 	    ((dst >> DXR_RANGE_SHIFT) & dxr->x_mask)];
-#else /* !DXR2 */
-	struct direct_entry *dt = dxr->d;
-
-	de = dt[dst >> DXR_RANGE_SHIFT];
-#endif /* !DXR2 */
 	if (__predict_true(de.fragments == FRAGS_MARK_HIT))
 		return (de.base);
 	return (range_lookup(dxr->r, de, dst));
@@ -474,8 +463,7 @@ chunk_ref(struct dxr_aux *da, uint32_t chunk)
 		cdp->cd_max_size = size;
 		cdp->cd_base = fdesc->base;
 		LIST_INSERT_HEAD(&da->all_chunks, cdp, cd_all_le);
-		KASSERT(cdp->cd_base + cdp->cd_max_size == da->rtbl_top,
-		    ("dxr: %s %d", __FUNCTION__, __LINE__));
+		MPASS(cdp->cd_base + cdp->cd_max_size == da->rtbl_top);
 	}
 
 	cdp->cd_hash = hash;
@@ -497,8 +485,11 @@ chunk_ref(struct dxr_aux *da, uint32_t chunk)
 		da->range_tbl = realloc(da->range_tbl,
 		    sizeof(*da->range_tbl) * da->rtbl_size + FRAGS_PREF_SHORT,
 		    M_DXRAUX, M_NOWAIT);
-		if (da->range_tbl == NULL)
+		if (da->range_tbl == NULL) {
+			FIB_PRINTF(LOG_NOTICE, da->fd,
+			    "Unable to allocate DXR range table");
 			return (1);
+		}
 	}
 
 	return (0);
@@ -522,7 +513,7 @@ chunk_unref(struct dxr_aux *da, uint32_t chunk)
 		    sizeof(struct range_entry_long) * size) == 0)
 			break;
 
-	KASSERT(cdp != NULL, ("dxr: dangling chunk"));
+	MPASS(cdp != NULL);
 	if (--cdp->cd_refcnt > 0)
 		return;
 
@@ -533,8 +524,7 @@ chunk_unref(struct dxr_aux *da, uint32_t chunk)
 	/* Attempt to merge with the preceding chunk, if empty */
 	cdp2 = LIST_NEXT(cdp, cd_all_le);
 	if (cdp2 != NULL && cdp2->cd_cur_size == 0) {
-		KASSERT(cdp2->cd_base + cdp2->cd_max_size == cdp->cd_base,
-		    ("dxr: %s %d", __FUNCTION__, __LINE__));
+		MPASS(cdp2->cd_base + cdp2->cd_max_size == cdp->cd_base);
 		LIST_REMOVE(cdp, cd_all_le);
 		LIST_REMOVE(cdp2, cd_hash_le);
 		cdp2->cd_max_size += cdp->cd_max_size;
@@ -545,8 +535,7 @@ chunk_unref(struct dxr_aux *da, uint32_t chunk)
 	/* Attempt to merge with the subsequent chunk, if empty */
 	cdp2 = LIST_PREV(cdp, &da->all_chunks, chunk_desc, cd_all_le);
 	if (cdp2 != NULL && cdp2->cd_cur_size == 0) {
-		KASSERT(cdp->cd_base + cdp->cd_max_size == cdp2->cd_base,
-		    ("dxr: %s %d", __FUNCTION__, __LINE__));
+		MPASS(cdp->cd_base + cdp->cd_max_size == cdp2->cd_base);
 		LIST_REMOVE(cdp, cd_all_le);
 		LIST_REMOVE(cdp2, cd_hash_le);
 		cdp2->cd_max_size += cdp->cd_max_size;
@@ -557,8 +546,7 @@ chunk_unref(struct dxr_aux *da, uint32_t chunk)
 
 	if (cdp->cd_base + cdp->cd_max_size == da->rtbl_top) {
 		/* Free the chunk on the top of the range heap, trim the heap */
-		KASSERT(cdp == LIST_FIRST(&da->all_chunks),
-		    ("dxr: %s %d", __FUNCTION__, __LINE__));
+		MPASS(cdp == LIST_FIRST(&da->all_chunks));
 		da->rtbl_top -= cdp->cd_max_size;
 		da->unused_chunks_size -= cdp->cd_max_size;
 		LIST_REMOVE(cdp, cd_all_le);
@@ -572,7 +560,6 @@ chunk_unref(struct dxr_aux *da, uint32_t chunk)
 	LIST_INSERT_HEAD(&da->unused_chunks[i], cdp, cd_hash_le);
 }
 
-#ifdef DXR2
 static uint32_t
 trie_hash(struct dxr_aux *da, uint32_t dxr_x, uint32_t index)
 {
@@ -632,8 +619,11 @@ trie_ref(struct dxr_aux *da, uint32_t index)
 		da->xtbl_size += XTBL_SIZE_INCR;
 		da->x_tbl = realloc(da->x_tbl,
 		    sizeof(*da->x_tbl) * da->xtbl_size, M_DXRAUX, M_NOWAIT);
-		if (da->x_tbl == NULL)
+		if (da->x_tbl == NULL) {
+			FIB_PRINTF(LOG_NOTICE, da->fd,
+			    "Unable to allocate DXR extension table");
 			return (-1);
+		}
 	}
 	return(tp->td_index);
 }
@@ -668,7 +658,6 @@ trie_unref(struct dxr_aux *da, uint32_t index)
 			}
 	} while (tp != NULL);
 }
-#endif
 
 static void
 heap_inject(struct dxr_aux *da, uint32_t start, uint32_t end, uint32_t preflen,
@@ -862,21 +851,23 @@ dxr_build(struct dxr *dxr)
 	uint32_t r_size, dxr_tot_size;
 	uint32_t i, m, range_rebuild = 0;
 	uint32_t range_frag;
-#ifdef DXR2
 	struct trie_desc *tp;
 	uint32_t d_tbl_size, dxr_x, d_size, x_size;
 	uint32_t ti, trie_rebuild = 0, prev_size = 0;
 	uint32_t trie_frag;
-#endif
 
-	KASSERT(dxr->d == NULL, ("dxr: d not free"));
+	MPASS(dxr->d == NULL);
 
 	if (da == NULL) {
 		da = malloc(sizeof(*dxr->aux), M_DXRAUX, M_NOWAIT);
-		if (da == NULL)
+		if (da == NULL) {
+			FIB_PRINTF(LOG_NOTICE, dxr->fd,
+			    "Unable to allocate DXR aux struct");
 			return;
+		}
 		dxr->aux = da;
 		da->fibnum = dxr->fibnum;
+		da->fd = dxr->fd;
 		da->refcnt = 1;
 		LIST_INIT(&da->all_chunks);
 		LIST_INIT(&da->all_trie);
@@ -894,20 +885,23 @@ dxr_build(struct dxr *dxr)
 	if (da->range_tbl == NULL) {
 		da->range_tbl = malloc(sizeof(*da->range_tbl) * da->rtbl_size
 		    + FRAGS_PREF_SHORT, M_DXRAUX, M_NOWAIT);
-		if (da->range_tbl == NULL)
+		if (da->range_tbl == NULL) {
+			FIB_PRINTF(LOG_NOTICE, da->fd,
+			    "Unable to allocate DXR range table");
 			return;
+		}
 		range_rebuild = 1;
 	}
-#ifdef DXR2
 	if (da->x_tbl == NULL) {
 		da->x_tbl = malloc(sizeof(*da->x_tbl) * da->xtbl_size,
 		    M_DXRAUX, M_NOWAIT);
-		if (da->x_tbl == NULL)
+		if (da->x_tbl == NULL) {
+			FIB_PRINTF(LOG_NOTICE, da->fd,
+			    "Unable to allocate DXR extension table");
 			return;
+		}
 		trie_rebuild = 1;
 	}
-#endif
-	da->fd = dxr->fd;
 
 	microuptime(&t0);
 
@@ -959,7 +953,6 @@ range_build:
 	r_size = sizeof(*da->range_tbl) * da->rtbl_top;
 	microuptime(&t1);
 
-#ifdef DXR2
 	if (range_rebuild ||
 	    abs(fls(da->prefixes) - fls(da->trie_rebuilt_prefixes)) > 1)
 		trie_rebuild = 1;
@@ -1033,15 +1026,13 @@ dxr2_try_squeeze:
 		goto dxr2_try_squeeze;
 	}
 	microuptime(&t2);
-#else /* !DXR2 */
-	dxr_tot_size = sizeof(da->direct_tbl) + r_size;
-	t2 = t1;
-#endif
 
 	dxr->d = malloc(dxr_tot_size, M_DXRLPM, M_NOWAIT);
-	if (dxr->d == NULL)
+	if (dxr->d == NULL) {
+		FIB_PRINTF(LOG_NOTICE, da->fd,
+		    "Unable to allocate DXR lookup table");
 		return;
-#ifdef DXR2
+	}
 	memcpy(dxr->d, da->d_tbl, d_size);
 	dxr->x = ((char *) dxr->d) + d_size;
 	memcpy(dxr->x, da->x_tbl, x_size);
@@ -1049,10 +1040,6 @@ dxr2_try_squeeze:
 	dxr->d_shift = 32 - da->d_bits;
 	dxr->x_shift = dxr_x;
 	dxr->x_mask = 0xffffffffU >> (32 - dxr_x);
-#else /* !DXR2 */
-	memcpy(dxr->d, da->direct_tbl, sizeof(da->direct_tbl));
-	dxr->r = ((char *) dxr->d) + sizeof(da->direct_tbl);
-#endif
 	memcpy(dxr->r, da->range_tbl, r_size);
 
 	if (da->updates_low <= da->updates_high)
@@ -1062,43 +1049,31 @@ dxr2_try_squeeze:
 	da->updates_high = 0;
 	microuptime(&t3);
 
-#ifdef DXR2
 	FIB_PRINTF(LOG_INFO, da->fd, "D%dX%dR, %d prefixes, %d nhops (max)",
 	    da->d_bits, dxr_x, rinfo.num_prefixes, rinfo.num_nhops);
-#else
-	FIB_PRINTF(LOG_INFO, da->fd, "D%dR, %d prefixes, %d nhops (max)",
-	    DXR_D, rinfo.num_prefixes, rinfo.num_nhops);
-#endif
 	i = dxr_tot_size * 100;
 	if (rinfo.num_prefixes)
 		i /= rinfo.num_prefixes;
 	FIB_PRINTF(LOG_INFO, da->fd, "%d.%02d KBytes, %d.%02d Bytes/prefix",
 	    dxr_tot_size / 1024, dxr_tot_size * 100 / 1024 % 100,
 	    i / 100, i % 100);
-#ifdef DXR2
 	FIB_PRINTF(LOG_INFO, da->fd,
 	    "%d.%02d%% trie, %d.%02d%% range fragmentation",
 	    trie_frag / 100, trie_frag % 100,
 	    range_frag / 100, range_frag % 100);
-#else
-	FIB_PRINTF(LOG_INFO, da->fd, "%d.%01d%% range fragmentation",
-	    range_frag / 100, range_frag % 100);
-#endif
 	i = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec;
 	FIB_PRINTF(LOG_INFO, da->fd, "range table %s in %u.%03u ms",
 	    range_rebuild ? "rebuilt" : "updated", i / 1000, i % 1000);
-#ifdef DXR2
 	i = (t2.tv_sec - t1.tv_sec) * 1000000 + t2.tv_usec - t1.tv_usec;
 	FIB_PRINTF(LOG_INFO, da->fd, "trie %s in %u.%03u ms",
 	    trie_rebuild ? "rebuilt" : "updated", i / 1000, i % 1000);
-#endif
 	i = (t3.tv_sec - t2.tv_sec) * 1000000 + t3.tv_usec - t2.tv_usec;
 	FIB_PRINTF(LOG_INFO, da->fd, "snapshot forked in %u.%03u ms",
 	    i / 1000, i % 1000);
 }
 
 /*
- * Glue functions for attaching to FreeBSD 13 fib_algo infrastructure.
+ * Glue functions for attaching to the FIB_ALGO infrastructure.
  */
 
 static struct nhop_object *
@@ -1118,11 +1093,15 @@ dxr_init(uint32_t fibnum, struct fib_data *fd, void *old_data, void **data)
 	struct dxr *dxr;
 
 	dxr = malloc(sizeof(*dxr), M_DXRAUX, M_NOWAIT);
-	if (dxr == NULL)
+	if (dxr == NULL) {
+		FIB_PRINTF(LOG_NOTICE, fd,
+		    "Unable to allocate DXR container struct");
 		return (FLM_REBUILD);
+	}
 
 	/* Check whether we may reuse the old auxiliary structures */
-	if (old_dxr != NULL && old_dxr->aux != NULL) {
+	if (old_dxr != NULL && old_dxr->aux != NULL &&
+	    old_dxr->aux->fd == fd) {
 		da = old_dxr->aux;
 		atomic_add_int(&da->refcnt, 1);
 	}
@@ -1140,14 +1119,11 @@ static void
 dxr_destroy(void *data)
 {
 	struct dxr *dxr = data;
-	struct dxr_aux *da;
+	struct dxr_aux *da = dxr->aux;
 	struct chunk_desc *cdp;
 	struct trie_desc *tp;
 
-	if (dxr->d != NULL)
-		free(dxr->d, M_DXRLPM);
-
-	da = dxr->aux;
+	free(dxr->d, M_DXRLPM);
 	free(dxr, M_DXRAUX);
 
 	if (da == NULL || atomic_fetchadd_int(&da->refcnt, -1) > 1)
@@ -1179,7 +1155,6 @@ static void *
 choose_lookup_fn(struct dxr_aux *da)
 {
 
-#ifdef DXR2
 	switch (da->d_bits) {
 #if DXR_TRIE_BITS > 16
 	case 16:
@@ -1200,7 +1175,6 @@ choose_lookup_fn(struct dxr_aux *da)
 	case 9:
 		return (dxr_fib_lookup_9);
 	}
-#endif /* DXR2 */
 	return (dxr_fib_lookup);
 }
 
@@ -1213,17 +1187,12 @@ dxr_dump_end(void *data, struct fib_dp *dp)
 	dxr_build(dxr);
 
 	da = dxr->aux;
-	if (da == NULL)
+	if (da == NULL || dxr->d == NULL)
 		return (FLM_REBUILD);
 
-	/* Structural limit exceeded, hard error */
 	if (da->rtbl_top >= BASE_MAX)
 		return (FLM_ERROR);
 
-	/* A malloc(,, M_NOWAIT) failed somewhere, retry later */
-	if (dxr->d == NULL)
-		return (FLM_REBUILD);
-
 	dp->f = choose_lookup_fn(da);
 	dp->arg = dxr;
 
@@ -1260,13 +1229,14 @@ dxr_change_rib_batch(struct rib_head *rnh, struct fib_change_queue *q,
 	int update_delta = 0;
 #endif
 
-	KASSERT(data != NULL, ("%s: NULL data", __FUNCTION__));
-	KASSERT(q != NULL, ("%s: NULL q", __FUNCTION__));
-	KASSERT(q->count < q->size, ("%s: q->count %d q->size %d",
-	    __FUNCTION__, q->count, q->size));
+	MPASS(data != NULL);
+	MPASS(q != NULL);
+	MPASS(q->count < q->size);
 
 	da = dxr->aux;
-	KASSERT(da != NULL, ("%s: NULL dxr->aux", __FUNCTION__));
+	MPASS(da != NULL);
+	MPASS(da->fd == dxr->fd);
+	MPASS(da->refcnt > 0);
 
 	FIB_PRINTF(LOG_INFO, da->fd, "processing %d update(s)", q->count);
 	for (ui = 0; ui < q->count; ui++) {
@@ -1299,8 +1269,7 @@ dxr_change_rib_batch(struct rib_head *rnh, struct fib_change_queue *q,
 
 #ifdef INVARIANTS
 	fib_get_rtable_info(fib_get_rh(da->fd), &rinfo);
-	KASSERT(da->prefixes + update_delta == rinfo.num_prefixes,
-	    ("%s: update count mismatch", __FUNCTION__));
+	MPASS(da->prefixes + update_delta == rinfo.num_prefixes);
 #endif
 
 	res = dxr_init(0, dxr->fd, data, (void **) &new_dxr);
@@ -1315,7 +1284,6 @@ dxr_change_rib_batch(struct rib_head *rnh, struct fib_change_queue *q,
 		return (FLM_ERROR);
 	}
 
-	/* A malloc(,, M_NOWAIT) failed somewhere, retry later */
 	if (new_dxr->d == NULL) {
 		dxr_destroy(new_dxr);
 		return (FLM_REBUILD);
@@ -1329,6 +1297,7 @@ dxr_change_rib_batch(struct rib_head *rnh, struct fib_change_queue *q,
 		return (FLM_SUCCESS);
 	}
 
+	FIB_PRINTF(LOG_NOTICE, dxr->fd, "fib_set_datapath_ptr() failed");
 	dxr_destroy(new_dxr);
 	return (FLM_REBUILD);
 }
diff --git a/sys/netinet/in_jail.c b/sys/netinet/in_jail.c
index 9b6b8f670df1..7b2120f575ca 100644
--- a/sys/netinet/in_jail.c
+++ b/sys/netinet/in_jail.c
@@ -97,13 +97,7 @@ prison_qcmp_v4(const void *ip1, const void *ip2)
 bool
 prison_valid_v4(const void *ip)
 {
-	in_addr_t ia = ((const struct in_addr *)ip)->s_addr;
-
-	/*
-	 * We do not have to care about byte order for these
-	 * checks so we will do them in NBO.
-	 */
-	return (ia != INADDR_ANY && ia != INADDR_BROADCAST);
+	return (!in_broadcast(*(const struct in_addr *)ip));
 }
 
 /*
diff --git a/sys/netinet/in_kdtrace.c b/sys/netinet/in_kdtrace.c
index 2a53b11c3be2..de2a98ce541c 100644
--- a/sys/netinet/in_kdtrace.c
+++ b/sys/netinet/in_kdtrace.c
@@ -36,7 +36,7 @@ SDT_PROVIDER_DEFINE(tcp);
 SDT_PROVIDER_DEFINE(udp);
 SDT_PROVIDER_DEFINE(udplite);
 
-#ifndef KDTRACE_NO_MIB_SDT
+#ifdef KDTRACE_MIB_SDT
 #define MIB_PROBE_IP(name) \
     SDT_PROBE_DEFINE1(mib, ip, count, name, \
         "int")
@@ -286,6 +286,8 @@ MIB_PROBE_TCP(tcps_sc_unreach);
 MIB_PROBE_TCP(tcps_sc_zonefail);
 MIB_PROBE_TCP(tcps_sc_sendcookie);
 MIB_PROBE_TCP(tcps_sc_recvcookie);
+MIB_PROBE_TCP(tcps_sc_spurcookie);
+MIB_PROBE_TCP(tcps_sc_failcookie);
 
 MIB_PROBE_TCP(tcps_hc_added);
 MIB_PROBE_TCP(tcps_hc_bucketoverflow);
@@ -294,6 +296,7 @@ MIB_PROBE_TCP(tcps_finwait2_drops);
 
 MIB_PROBE_TCP(tcps_sack_recovery_episode);
 MIB_PROBE_TCP(tcps_sack_rexmits);
+MIB_PROBE_TCP(tcps_sack_rexmits_tso);
 MIB_PROBE_TCP(tcps_sack_rexmit_bytes);
 MIB_PROBE_TCP(tcps_sack_rcv_blocks);
 MIB_PROBE_TCP(tcps_sack_send_blocks);
@@ -338,6 +341,101 @@ MIB_PROBE_TCP(tcps_ecn_sndect1);
 MIB_PROBE_TCP(tcps_tlpresends);
 MIB_PROBE_TCP(tcps_tlpresend_bytes);
 
+MIB_PROBE_TCP(tcps_rcvghostack);
+MIB_PROBE_TCP(tcps_rcvacktooold);
+
+#define	MIB_PROBE_IPSEC(name)	SDT_PROBE_DEFINE1(mib, ipsec, count, name, "int")
+
+MIB_PROBE_IPSEC(ips_in_polvio);
+MIB_PROBE_IPSEC(ips_in_nomem);
+MIB_PROBE_IPSEC(ips_in_inval);
+MIB_PROBE_IPSEC(ips_out_polvio);
+MIB_PROBE_IPSEC(ips_out_nosa);
+MIB_PROBE_IPSEC(ips_out_nomem);
+MIB_PROBE_IPSEC(ips_out_noroute);
+MIB_PROBE_IPSEC(ips_out_inval);
+MIB_PROBE_IPSEC(ips_out_bundlesa);
+
+MIB_PROBE_IPSEC(ips_spdcache_hits);
+MIB_PROBE_IPSEC(ips_spdcache_misses);
+
+MIB_PROBE_IPSEC(ips_clcopied);
+MIB_PROBE_IPSEC(ips_mbinserted);
+MIB_PROBE_IPSEC(ips_input_front);
+MIB_PROBE_IPSEC(ips_input_middle);
+MIB_PROBE_IPSEC(ips_input_end);
+
+#define	MIB_PROBE_ESP(name)	SDT_PROBE_DEFINE1(mib, esp, count, name, "int")
+#define	MIB_PROBE2_ESP(name)	SDT_PROBE_DEFINE2(mib, esp, count, name, "int", "int")
+
+MIB_PROBE_ESP(esps_hdrops);
+MIB_PROBE_ESP(esps_nopf);
+MIB_PROBE_ESP(esps_notdb);
+MIB_PROBE_ESP(esps_badkcr);
+MIB_PROBE_ESP(esps_qfull);
+MIB_PROBE_ESP(esps_noxform);
+MIB_PROBE_ESP(esps_badilen);
+MIB_PROBE_ESP(esps_wrap);
+MIB_PROBE_ESP(esps_badenc);
+MIB_PROBE_ESP(esps_badauth);
+MIB_PROBE_ESP(esps_replay);
+MIB_PROBE_ESP(esps_input);
+MIB_PROBE_ESP(esps_output);
+MIB_PROBE_ESP(esps_invalid);
+MIB_PROBE_ESP(esps_ibytes);
+MIB_PROBE_ESP(esps_obytes);
+MIB_PROBE_ESP(esps_toobig);
+MIB_PROBE_ESP(esps_pdrops);
+MIB_PROBE_ESP(esps_crypto);
+MIB_PROBE_ESP(esps_tunnel);
+MIB_PROBE2_ESP(esps_hist);
+
+#define	MIB_PROBE_AH(name)	SDT_PROBE_DEFINE1(mib, ah, count, name, "int")
+#define	MIB_PROBE_AH2(name)	SDT_PROBE_DEFINE2(mib, ah, count, name, "int", "int")
+
+MIB_PROBE_AH(ahs_hdrops);
+MIB_PROBE_AH(ahs_nopf);
+MIB_PROBE_AH(ahs_notdb);
+MIB_PROBE_AH(ahs_badkcr);
+MIB_PROBE_AH(ahs_badauth);
+MIB_PROBE_AH(ahs_noxform);
+MIB_PROBE_AH(ahs_qfull);
+MIB_PROBE_AH(ahs_wrap);
+MIB_PROBE_AH(ahs_replay);
+MIB_PROBE_AH(ahs_badauthl);
+MIB_PROBE_AH(ahs_input);
+MIB_PROBE_AH(ahs_output);
+MIB_PROBE_AH(ahs_invalid);
+MIB_PROBE_AH(ahs_ibytes);
+MIB_PROBE_AH(ahs_obytes);
+MIB_PROBE_AH(ahs_toobig);
+MIB_PROBE_AH(ahs_pdrops);
+MIB_PROBE_AH(ahs_crypto);
+MIB_PROBE_AH(ahs_tunnel);
+MIB_PROBE_AH2(ahs_hist);
+
+#define	MIB_PROBE_IPCOMP(name)	SDT_PROBE_DEFINE1(mib, ipcomp, count, name, "int")
+#define	MIB_PROBE_IPCOMP2(name)	SDT_PROBE_DEFINE2(mib, ipcomp, count, name, "int", "int")
+
+MIB_PROBE_IPCOMP(ipcomps_hdrops);
+MIB_PROBE_IPCOMP(ipcomps_nopf);
+MIB_PROBE_IPCOMP(ipcomps_notdb);
+MIB_PROBE_IPCOMP(ipcomps_badkcr);
+MIB_PROBE_IPCOMP(ipcomps_qfull);
+MIB_PROBE_IPCOMP(ipcomps_noxform);
+MIB_PROBE_IPCOMP(ipcomps_wrap);
+MIB_PROBE_IPCOMP(ipcomps_input);
+MIB_PROBE_IPCOMP(ipcomps_output);
+MIB_PROBE_IPCOMP(ipcomps_invalid);
+MIB_PROBE_IPCOMP(ipcomps_ibytes);
+MIB_PROBE_IPCOMP(ipcomps_obytes);
+MIB_PROBE_IPCOMP(ipcomps_toobig);
+MIB_PROBE_IPCOMP(ipcomps_pdrops);
+MIB_PROBE_IPCOMP(ipcomps_crypto);
+MIB_PROBE_IPCOMP2(ipcomps_hist);
+MIB_PROBE_IPCOMP(ipcomps_threshold);
+MIB_PROBE_IPCOMP(ipcomps_uncompr);
+
 #endif
 
 SDT_PROBE_DEFINE6_XLATE(ip, , , receive,
diff --git a/sys/netinet/in_kdtrace.h b/sys/netinet/in_kdtrace.h
index 780839299993..a203b660d777 100644
--- a/sys/netinet/in_kdtrace.h
+++ b/sys/netinet/in_kdtrace.h
@@ -54,7 +54,7 @@ SDT_PROVIDER_DECLARE(tcp);
 SDT_PROVIDER_DECLARE(udp);
 SDT_PROVIDER_DECLARE(udplite);
 
-#ifndef KDTRACE_NO_MIB_SDT
+#ifdef KDTRACE_MIB_SDT
 SDT_PROVIDER_DECLARE(mib);
 
 SDT_PROBE_DECLARE(mib, ip, count, ips_total);
@@ -278,6 +278,8 @@ SDT_PROBE_DECLARE(mib, tcp, count, tcps_sc_unreach);
 SDT_PROBE_DECLARE(mib, tcp, count, tcps_sc_zonefail);
 SDT_PROBE_DECLARE(mib, tcp, count, tcps_sc_sendcookie);
 SDT_PROBE_DECLARE(mib, tcp, count, tcps_sc_recvcookie);
+SDT_PROBE_DECLARE(mib, tcp, count, tcps_sc_spurcookie);
+SDT_PROBE_DECLARE(mib, tcp, count, tcps_sc_failcookie);
 
 SDT_PROBE_DECLARE(mib, tcp, count, tcps_hc_added);
 SDT_PROBE_DECLARE(mib, tcp, count, tcps_hc_bucketoverflow);
@@ -286,6 +288,7 @@ SDT_PROBE_DECLARE(mib, tcp, count, tcps_finwait2_drops);
 
 SDT_PROBE_DECLARE(mib, tcp, count, tcps_sack_recovery_episode);
 SDT_PROBE_DECLARE(mib, tcp, count, tcps_sack_rexmits);
+SDT_PROBE_DECLARE(mib, tcp, count, tcps_sack_rexmits_tso);
 SDT_PROBE_DECLARE(mib, tcp, count, tcps_sack_rexmit_bytes);
 SDT_PROBE_DECLARE(mib, tcp, count, tcps_sack_rcv_blocks);
 SDT_PROBE_DECLARE(mib, tcp, count, tcps_sack_send_blocks);
@@ -329,6 +332,91 @@ SDT_PROBE_DECLARE(mib, tcp, count, tcps_ecn_sndect1);
 
 SDT_PROBE_DECLARE(mib, tcp, count, tcps_tlpresends);
 SDT_PROBE_DECLARE(mib, tcp, count, tcps_tlpresend_bytes);
+
+SDT_PROBE_DECLARE(mib, tcp, count, tcps_rcvghostack);
+SDT_PROBE_DECLARE(mib, tcp, count, tcps_rcvacktooold);
+
+SDT_PROBE_DECLARE(mib, ipsec, count, ips_in_polvio);
+SDT_PROBE_DECLARE(mib, ipsec, count, ips_in_nomem);
+SDT_PROBE_DECLARE(mib, ipsec, count, ips_in_inval);
+SDT_PROBE_DECLARE(mib, ipsec, count, ips_out_polvio);
+SDT_PROBE_DECLARE(mib, ipsec, count, ips_out_nosa);
+SDT_PROBE_DECLARE(mib, ipsec, count, ips_out_nomem);
+SDT_PROBE_DECLARE(mib, ipsec, count, ips_out_noroute);
+SDT_PROBE_DECLARE(mib, ipsec, count, ips_out_inval);
+SDT_PROBE_DECLARE(mib, ipsec, count, ips_out_bundlesa);
+
+SDT_PROBE_DECLARE(mib, ipsec, count, ips_spdcache_hits);
+SDT_PROBE_DECLARE(mib, ipsec, count, ips_spdcache_misses);
+
+SDT_PROBE_DECLARE(mib, ipsec, count, ips_clcopied);
+SDT_PROBE_DECLARE(mib, ipsec, count, ips_mbinserted);
+SDT_PROBE_DECLARE(mib, ipsec, count, ips_input_front);
+SDT_PROBE_DECLARE(mib, ipsec, count, ips_input_middle);
+SDT_PROBE_DECLARE(mib, ipsec, count, ips_input_end);
+
+SDT_PROBE_DECLARE(mib, esp, count, esps_hdrops);
+SDT_PROBE_DECLARE(mib, esp, count, esps_nopf);
+SDT_PROBE_DECLARE(mib, esp, count, esps_notdb);
+SDT_PROBE_DECLARE(mib, esp, count, esps_badkcr);
+SDT_PROBE_DECLARE(mib, esp, count, esps_qfull);
+SDT_PROBE_DECLARE(mib, esp, count, esps_noxform);
+SDT_PROBE_DECLARE(mib, esp, count, esps_badilen);
+SDT_PROBE_DECLARE(mib, esp, count, esps_wrap);
+SDT_PROBE_DECLARE(mib, esp, count, esps_badenc);
+SDT_PROBE_DECLARE(mib, esp, count, esps_badauth);
+SDT_PROBE_DECLARE(mib, esp, count, esps_replay);
+SDT_PROBE_DECLARE(mib, esp, count, esps_input);
+SDT_PROBE_DECLARE(mib, esp, count, esps_output);
+SDT_PROBE_DECLARE(mib, esp, count, esps_invalid);
+SDT_PROBE_DECLARE(mib, esp, count, esps_ibytes);
+SDT_PROBE_DECLARE(mib, esp, count, esps_obytes);
+SDT_PROBE_DECLARE(mib, esp, count, esps_toobig);
+SDT_PROBE_DECLARE(mib, esp, count, esps_pdrops);
+SDT_PROBE_DECLARE(mib, esp, count, esps_crypto);
+SDT_PROBE_DECLARE(mib, esp, count, esps_tunnel);
+SDT_PROBE_DECLARE(mib, esp, count, esps_hist);
+
+SDT_PROBE_DECLARE(mib, ah, count, ahs_hdrops);
+SDT_PROBE_DECLARE(mib, ah, count, ahs_nopf);
+SDT_PROBE_DECLARE(mib, ah, count, ahs_notdb);
+SDT_PROBE_DECLARE(mib, ah, count, ahs_badkcr);
+SDT_PROBE_DECLARE(mib, ah, count, ahs_badauth);
+SDT_PROBE_DECLARE(mib, ah, count, ahs_noxform);
+SDT_PROBE_DECLARE(mib, ah, count, ahs_qfull);
+SDT_PROBE_DECLARE(mib, ah, count, ahs_wrap);
+SDT_PROBE_DECLARE(mib, ah, count, ahs_replay);
+SDT_PROBE_DECLARE(mib, ah, count, ahs_badauthl);
+SDT_PROBE_DECLARE(mib, ah, count, ahs_input);
+SDT_PROBE_DECLARE(mib, ah, count, ahs_output);
+SDT_PROBE_DECLARE(mib, ah, count, ahs_invalid);
+SDT_PROBE_DECLARE(mib, ah, count, ahs_ibytes);
+SDT_PROBE_DECLARE(mib, ah, count, ahs_obytes);
+SDT_PROBE_DECLARE(mib, ah, count, ahs_toobig);
+SDT_PROBE_DECLARE(mib, ah, count, ahs_pdrops);
+SDT_PROBE_DECLARE(mib, ah, count, ahs_crypto);
+SDT_PROBE_DECLARE(mib, ah, count, ahs_tunnel);
+SDT_PROBE_DECLARE(mib, ah, count, ahs_hist);
+
+SDT_PROBE_DECLARE(mib, ipcomp, count, ipcomps_hdrops);
+SDT_PROBE_DECLARE(mib, ipcomp, count, ipcomps_nopf);
+SDT_PROBE_DECLARE(mib, ipcomp, count, ipcomps_notdb);
+SDT_PROBE_DECLARE(mib, ipcomp, count, ipcomps_badkcr);
+SDT_PROBE_DECLARE(mib, ipcomp, count, ipcomps_qfull);
+SDT_PROBE_DECLARE(mib, ipcomp, count, ipcomps_noxform);
+SDT_PROBE_DECLARE(mib, ipcomp, count, ipcomps_wrap);
+SDT_PROBE_DECLARE(mib, ipcomp, count, ipcomps_input);
+SDT_PROBE_DECLARE(mib, ipcomp, count, ipcomps_output);
+SDT_PROBE_DECLARE(mib, ipcomp, count, ipcomps_invalid);
+SDT_PROBE_DECLARE(mib, ipcomp, count, ipcomps_ibytes);
+SDT_PROBE_DECLARE(mib, ipcomp, count, ipcomps_obytes);
+SDT_PROBE_DECLARE(mib, ipcomp, count, ipcomps_toobig);
+SDT_PROBE_DECLARE(mib, ipcomp, count, ipcomps_pdrops);
+SDT_PROBE_DECLARE(mib, ipcomp, count, ipcomps_crypto);
+SDT_PROBE_DECLARE(mib, ipcomp, count, ipcomps_hist);
+SDT_PROBE_DECLARE(mib, ipcomp, count, ipcomps_threshold);
+SDT_PROBE_DECLARE(mib, ipcomp, count, ipcomps_uncompr);
+
 #endif
 
 SDT_PROBE_DECLARE(ip, , , receive);
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index 1a341d421f31..dbe48242381d 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -139,7 +139,7 @@ VNET_DEFINE(int, ipport_randomized) = 1;
 static struct inpcb	*in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo,
 			    struct in_addr faddr, u_int fport_arg,
 			    struct in_addr laddr, u_int lport_arg,
-			    int lookupflags, uint8_t numa_domain);
+			    int lookupflags, uint8_t numa_domain, int fib);
 
 #define RANGECHK(var, min, max) \
 	if ((var) < (min)) { (var) = (min); } \
@@ -232,7 +232,15 @@ in_pcbhashseed_init(void)
 	V_in_pcbhashseed = arc4random();
 }
 VNET_SYSINIT(in_pcbhashseed_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST,
-    in_pcbhashseed_init, 0);
+    in_pcbhashseed_init, NULL);
+
+#ifdef INET
+VNET_DEFINE_STATIC(int, connect_inaddr_wild) = 0;
+#define	V_connect_inaddr_wild	VNET(connect_inaddr_wild)
+SYSCTL_INT(_net_inet_ip, OID_AUTO, connect_inaddr_wild,
+    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(connect_inaddr_wild), 0,
+    "Allow connecting to INADDR_ANY or INADDR_BROADCAST for connect(2)");
+#endif
 
 static void in_pcbremhash(struct inpcb *);
 
@@ -245,9 +253,8 @@ static void in_pcbremhash(struct inpcb *);
  */
 
 static struct inpcblbgroup *
-in_pcblbgroup_alloc(struct inpcblbgrouphead *hdr, struct ucred *cred,
-    u_char vflag, uint16_t port, const union in_dependaddr *addr, int size,
-    uint8_t numa_domain)
+in_pcblbgroup_alloc(struct ucred *cred, u_char vflag, uint16_t port,
+    const union in_dependaddr *addr, int size, uint8_t numa_domain, int fib)
 {
 	struct inpcblbgroup *grp;
 	size_t bytes;
@@ -256,13 +263,14 @@ in_pcblbgroup_alloc(struct inpcblbgrouphead *hdr, struct ucred *cred,
 	grp = malloc(bytes, M_PCB, M_ZERO | M_NOWAIT);
 	if (grp == NULL)
 		return (NULL);
+	LIST_INIT(&grp->il_pending);
 	grp->il_cred = crhold(cred);
 	grp->il_vflag = vflag;
 	grp->il_lport = port;
 	grp->il_numa_domain = numa_domain;
+	grp->il_fibnum = fib;
 	grp->il_dependladdr = *addr;
 	grp->il_inpsiz = size;
-	CK_LIST_INSERT_HEAD(hdr, grp, il_list);
 	return (grp);
 }
 
@@ -279,21 +287,82 @@ in_pcblbgroup_free_deferred(epoch_context_t ctx)
 static void
 in_pcblbgroup_free(struct inpcblbgroup *grp)
 {
+	KASSERT(LIST_EMPTY(&grp->il_pending),
+	    ("local group %p still has pending inps", grp));
 
 	CK_LIST_REMOVE(grp, il_list);
 	NET_EPOCH_CALL(in_pcblbgroup_free_deferred, &grp->il_epoch_ctx);
 }
 
 static struct inpcblbgroup *
+in_pcblbgroup_find(struct inpcb *inp)
+{
+	struct inpcbinfo *pcbinfo;
+	struct inpcblbgroup *grp;
+	struct inpcblbgrouphead *hdr;
+
+	INP_LOCK_ASSERT(inp);
+
+	pcbinfo = inp->inp_pcbinfo;
+	INP_HASH_LOCK_ASSERT(pcbinfo);
+
+	hdr = &pcbinfo->ipi_lbgrouphashbase[
+	    INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask)];
+	CK_LIST_FOREACH(grp, hdr, il_list) {
+		struct inpcb *inp1;
+
+		for (unsigned int i = 0; i < grp->il_inpcnt; i++) {
+			if (inp == grp->il_inp[i])
+				goto found;
+		}
+		LIST_FOREACH(inp1, &grp->il_pending, inp_lbgroup_list) {
+			if (inp == inp1)
+				goto found;
+		}
+	}
+found:
+	return (grp);
+}
+
+static void
+in_pcblbgroup_insert(struct inpcblbgroup *grp, struct inpcb *inp)
+{
+	KASSERT(grp->il_inpcnt < grp->il_inpsiz,
+	    ("invalid local group size %d and count %d", grp->il_inpsiz,
+	    grp->il_inpcnt));
+	INP_WLOCK_ASSERT(inp);
+
+	if (inp->inp_socket->so_proto->pr_listen != pr_listen_notsupp &&
+	    !SOLISTENING(inp->inp_socket)) {
+		/*
+		 * If this is a TCP socket, it should not be visible to lbgroup
+		 * lookups until listen() has been called.
+		 */
+		LIST_INSERT_HEAD(&grp->il_pending, inp, inp_lbgroup_list);
+		grp->il_pendcnt++;
+	} else {
+		grp->il_inp[grp->il_inpcnt] = inp;
+
+		/*
+		 * Synchronize with in_pcblookup_lbgroup(): make sure that we
+		 * don't expose a null slot to the lookup path.
+		 */
+		atomic_store_rel_int(&grp->il_inpcnt, grp->il_inpcnt + 1);
+	}
+
+	inp->inp_flags |= INP_INLBGROUP;
+}
+
+static struct inpcblbgroup *
 in_pcblbgroup_resize(struct inpcblbgrouphead *hdr,
     struct inpcblbgroup *old_grp, int size)
 {
 	struct inpcblbgroup *grp;
 	int i;
 
-	grp = in_pcblbgroup_alloc(hdr, old_grp->il_cred, old_grp->il_vflag,
+	grp = in_pcblbgroup_alloc(old_grp->il_cred, old_grp->il_vflag,
 	    old_grp->il_lport, &old_grp->il_dependladdr, size,
-	    old_grp->il_numa_domain);
+	    old_grp->il_numa_domain, old_grp->il_fibnum);
 	if (grp == NULL)
 		return (NULL);
 
@@ -304,35 +373,16 @@ in_pcblbgroup_resize(struct inpcblbgrouphead *hdr,
 	for (i = 0; i < old_grp->il_inpcnt; ++i)
 		grp->il_inp[i] = old_grp->il_inp[i];
 	grp->il_inpcnt = old_grp->il_inpcnt;
+	CK_LIST_INSERT_HEAD(hdr, grp, il_list);
+	LIST_SWAP(&old_grp->il_pending, &grp->il_pending, inpcb,
+	    inp_lbgroup_list);
+	grp->il_pendcnt = old_grp->il_pendcnt;
+	old_grp->il_pendcnt = 0;
 	in_pcblbgroup_free(old_grp);
 	return (grp);
 }
 
 /*
- * PCB at index 'i' is removed from the group. Pull up the ones below il_inp[i]
- * and shrink group if possible.
- */
-static void
-in_pcblbgroup_reorder(struct inpcblbgrouphead *hdr, struct inpcblbgroup **grpp,
-    int i)
-{
-	struct inpcblbgroup *grp, *new_grp;
-
-	grp = *grpp;
-	for (; i + 1 < grp->il_inpcnt; ++i)
-		grp->il_inp[i] = grp->il_inp[i + 1];
-	grp->il_inpcnt--;
-
-	if (grp->il_inpsiz > INPCBLBGROUP_SIZMIN &&
-	    grp->il_inpcnt <= grp->il_inpsiz / 4) {
-		/* Shrink this group. */
-		new_grp = in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz / 2);
-		if (new_grp != NULL)
-			*grpp = new_grp;
-	}
-}
-
-/*
  * Add PCB to load balance group for SO_REUSEPORT_LB option.
  */
 static int
@@ -344,12 +394,16 @@ in_pcbinslbgrouphash(struct inpcb *inp, uint8_t numa_domain)
 	struct inpcblbgrouphead *hdr;
 	struct inpcblbgroup *grp;
 	uint32_t idx;
+	int fib;
 
 	pcbinfo = inp->inp_pcbinfo;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(pcbinfo);
 
+	fib = (inp->inp_flags & INP_BOUNDFIB) != 0 ?
+	    inp->inp_inc.inc_fibnum : RT_ALL_FIBS;
+
 #ifdef INET6
 	/*
 	 * Don't allow IPv4 mapped INET6 wild socket.
@@ -368,6 +422,7 @@ in_pcbinslbgrouphash(struct inpcb *inp, uint8_t numa_domain)
 		    grp->il_vflag == inp->inp_vflag &&
 		    grp->il_lport == inp->inp_lport &&
 		    grp->il_numa_domain == numa_domain &&
+		    grp->il_fibnum == fib &&
 		    memcmp(&grp->il_dependladdr,
 		    &inp->inp_inc.inc_ie.ie_dependladdr,
 		    sizeof(grp->il_dependladdr)) == 0) {
@@ -376,12 +431,14 @@ in_pcbinslbgrouphash(struct inpcb *inp, uint8_t numa_domain)
 	}
 	if (grp == NULL) {
 		/* Create new load balance group. */
-		grp = in_pcblbgroup_alloc(hdr, inp->inp_cred, inp->inp_vflag,
+		grp = in_pcblbgroup_alloc(inp->inp_cred, inp->inp_vflag,
 		    inp->inp_lport, &inp->inp_inc.inc_ie.ie_dependladdr,
-		    INPCBLBGROUP_SIZMIN, numa_domain);
+		    INPCBLBGROUP_SIZMIN, numa_domain, fib);
 		if (grp == NULL)
-			return (ENOBUFS);
-	} else if (grp->il_inpcnt == grp->il_inpsiz) {
+			return (ENOMEM);
+		in_pcblbgroup_insert(grp, inp);
+		CK_LIST_INSERT_HEAD(hdr, grp, il_list);
+	} else if (grp->il_inpcnt + grp->il_pendcnt == grp->il_inpsiz) {
 		if (grp->il_inpsiz >= INPCBLBGROUP_SIZMAX) {
 			if (ratecheck(&lastprint, &interval))
 				printf("lb group port %d, limit reached\n",
@@ -392,16 +449,11 @@ in_pcbinslbgrouphash(struct inpcb *inp, uint8_t numa_domain)
 		/* Expand this local group. */
 		grp = in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz * 2);
 		if (grp == NULL)
-			return (ENOBUFS);
+			return (ENOMEM);
+		in_pcblbgroup_insert(grp, inp);
+	} else {
+		in_pcblbgroup_insert(grp, inp);
 	}
-
-	KASSERT(grp->il_inpcnt < grp->il_inpsiz,
-	    ("invalid local group size %d and count %d", grp->il_inpsiz,
-	    grp->il_inpcnt));
-
-	grp->il_inp[grp->il_inpcnt] = inp;
-	grp->il_inpcnt++;
-	inp->inp_flags |= INP_INLBGROUP;
 	return (0);
 }
 
@@ -414,6 +466,7 @@ in_pcbremlbgrouphash(struct inpcb *inp)
 	struct inpcbinfo *pcbinfo;
 	struct inpcblbgrouphead *hdr;
 	struct inpcblbgroup *grp;
+	struct inpcb *inp1;
 	int i;
 
 	pcbinfo = inp->inp_pcbinfo;
@@ -429,27 +482,40 @@ in_pcbremlbgrouphash(struct inpcb *inp)
 			if (grp->il_inp[i] != inp)
 				continue;
 
-			if (grp->il_inpcnt == 1) {
+			if (grp->il_inpcnt == 1 &&
+			    LIST_EMPTY(&grp->il_pending)) {
 				/* We are the last, free this local group. */
 				in_pcblbgroup_free(grp);
 			} else {
-				/* Pull up inpcbs, shrink group if possible. */
-				in_pcblbgroup_reorder(hdr, &grp, i);
+				grp->il_inp[i] =
+				    grp->il_inp[grp->il_inpcnt - 1];
+
+				/*
+				 * Synchronize with in_pcblookup_lbgroup().
+				 */
+				atomic_store_rel_int(&grp->il_inpcnt,
+				    grp->il_inpcnt - 1);
 			}
 			inp->inp_flags &= ~INP_INLBGROUP;
 			return;
 		}
+		LIST_FOREACH(inp1, &grp->il_pending, inp_lbgroup_list) {
+			if (inp == inp1) {
+				LIST_REMOVE(inp, inp_lbgroup_list);
+				grp->il_pendcnt--;
+				inp->inp_flags &= ~INP_INLBGROUP;
+				return;
+			}
+		}
 	}
-	KASSERT(0, ("%s: did not find %p", __func__, inp));
+	__assert_unreachable();
 }
 
 int
 in_pcblbgroup_numa(struct inpcb *inp, int arg)
 {
 	struct inpcbinfo *pcbinfo;
-	struct inpcblbgrouphead *hdr;
-	struct inpcblbgroup *grp;
-	int err, i;
+	int error;
 	uint8_t numa_domain;
 
 	switch (arg) {
@@ -465,33 +531,20 @@ in_pcblbgroup_numa(struct inpcb *inp, int arg)
 		numa_domain = arg;
 	}
 
-	err = 0;
 	pcbinfo = inp->inp_pcbinfo;
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK(pcbinfo);
-	hdr = &pcbinfo->ipi_lbgrouphashbase[
-	    INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask)];
-	CK_LIST_FOREACH(grp, hdr, il_list) {
-		for (i = 0; i < grp->il_inpcnt; ++i) {
-			if (grp->il_inp[i] != inp)
-				continue;
-
-			if (grp->il_numa_domain == numa_domain) {
-				goto abort_with_hash_wlock;
-			}
-
-			/* Remove it from the old group. */
-			in_pcbremlbgrouphash(inp);
-
-			/* Add it to the new group based on numa domain. */
-			in_pcbinslbgrouphash(inp, numa_domain);
-			goto abort_with_hash_wlock;
-		}
+	if (in_pcblbgroup_find(inp) != NULL) {
+		/* Remove it from the old group. */
+		in_pcbremlbgrouphash(inp);
+		/* Add it to the new group based on numa domain. */
+		in_pcbinslbgrouphash(inp, numa_domain);
+		error = 0;
+	} else {
+		error = ENOENT;
 	}
-	err = ENOENT;
-abort_with_hash_wlock:
 	INP_HASH_WUNLOCK(pcbinfo);
-	return (err);
+	return (error);
 }
 
 /* Make sure it is safe to use hashinit(9) on CK_LIST. */
@@ -523,7 +576,6 @@ in_pcbinfo_init(struct inpcbinfo *pcbinfo, struct inpcbstorage *pcbstor,
 	pcbinfo->ipi_lbgrouphashbase = hashinit(porthash_nelements, M_PCB,
 	    &pcbinfo->ipi_lbgrouphashmask);
 	pcbinfo->ipi_zone = pcbstor->ips_zone;
-	pcbinfo->ipi_portzone = pcbstor->ips_portzone;
 	pcbinfo->ipi_smr = uma_zone_get_smr(pcbinfo->ipi_zone);
 }
 
@@ -559,10 +611,6 @@ in_pcbstorage_init(void *arg)
 	pcbstor->ips_zone = uma_zcreate(pcbstor->ips_zone_name,
 	    pcbstor->ips_size, NULL, NULL, pcbstor->ips_pcbinit,
 	    inpcb_fini, UMA_ALIGN_CACHE, UMA_ZONE_SMR);
-	pcbstor->ips_portzone = uma_zcreate(pcbstor->ips_portzone_name,
-	    sizeof(struct inpcbport), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
-	uma_zone_set_smr(pcbstor->ips_portzone,
-	    uma_zone_get_smr(pcbstor->ips_zone));
 }
 
 /*
@@ -574,7 +622,6 @@ in_pcbstorage_destroy(void *arg)
 	struct inpcbstorage *pcbstor = arg;
 
 	uma_zdestroy(pcbstor->ips_zone);
-	uma_zdestroy(pcbstor->ips_portzone);
 }
 
 /*
@@ -666,7 +713,8 @@ out:
 
 #ifdef INET
 int
-in_pcbbind(struct inpcb *inp, struct sockaddr_in *sin, struct ucred *cred)
+in_pcbbind(struct inpcb *inp, struct sockaddr_in *sin, int flags,
+    struct ucred *cred)
 {
 	int anonport, error;
 
@@ -681,13 +729,15 @@ in_pcbbind(struct inpcb *inp, struct sockaddr_in *sin, struct ucred *cred)
 		return (EINVAL);
 	anonport = sin == NULL || sin->sin_port == 0;
 	error = in_pcbbind_setup(inp, sin, &inp->inp_laddr.s_addr,
-	    &inp->inp_lport, cred);
+	    &inp->inp_lport, flags, cred);
 	if (error)
 		return (error);
-	if (in_pcbinshash(inp) != 0) {
+	if (__predict_false((error = in_pcbinshash(inp)) != 0)) {
+		MPASS(inp->inp_socket->so_options & SO_REUSEPORT_LB);
 		inp->inp_laddr.s_addr = INADDR_ANY;
 		inp->inp_lport = 0;
-		return (EAGAIN);
+		inp->inp_flags &= ~INP_BOUNDFIB;
+		return (error);
 	}
 	if (anonport)
 		inp->inp_flags |= INP_ANONPORT;
@@ -703,8 +753,9 @@ in_pcbbind(struct inpcb *inp, struct sockaddr_in *sin, struct ucred *cred)
  * lsa can be NULL for IPv6.
  */
 int
-in_pcb_lport_dest(struct inpcb *inp, struct sockaddr *lsa, u_short *lportp,
-    struct sockaddr *fsa, u_short fport, struct ucred *cred, int lookupflags)
+in_pcb_lport_dest(const struct inpcb *inp, struct sockaddr *lsa,
+    u_short *lportp, struct sockaddr *fsa, u_short fport, struct ucred *cred,
+    int lookupflags)
 {
 	struct inpcbinfo *pcbinfo;
 	struct inpcb *tmpinp;
@@ -775,7 +826,6 @@ in_pcb_lport_dest(struct inpcb *inp, struct sockaddr *lsa, u_short *lportp,
 #endif
 
 	tmpinp = NULL;
-	lport = *lportp;
 
 	if (V_ipport_randomized)
 		*lastport = first + (arc4random() % (last - first));
@@ -795,26 +845,28 @@ in_pcb_lport_dest(struct inpcb *inp, struct sockaddr *lsa, u_short *lportp,
 			if (lsa->sa_family == AF_INET) {
 				tmpinp = in_pcblookup_hash_locked(pcbinfo,
 				    faddr, fport, laddr, lport, lookupflags,
-				    M_NODOM);
+				    M_NODOM, RT_ALL_FIBS);
 			}
 #endif
 #ifdef INET6
 			if (lsa->sa_family == AF_INET6) {
 				tmpinp = in6_pcblookup_hash_locked(pcbinfo,
 				    faddr6, fport, laddr6, lport, lookupflags,
-				    M_NODOM);
+				    M_NODOM, RT_ALL_FIBS);
 			}
 #endif
 		} else {
 #ifdef INET6
 			if ((inp->inp_vflag & INP_IPV6) != 0) {
 				tmpinp = in6_pcblookup_local(pcbinfo,
-				    &inp->in6p_laddr, lport, lookupflags, cred);
+				    &inp->in6p_laddr, lport, RT_ALL_FIBS,
+				    lookupflags, cred);
 #ifdef INET
 				if (tmpinp == NULL &&
 				    (inp->inp_vflag & INP_IPV4))
 					tmpinp = in_pcblookup_local(pcbinfo,
-					    laddr, lport, lookupflags, cred);
+					    laddr, lport, RT_ALL_FIBS,
+					    lookupflags, cred);
 #endif
 			}
 #endif
@@ -823,7 +875,7 @@ in_pcb_lport_dest(struct inpcb *inp, struct sockaddr *lsa, u_short *lportp,
 #endif
 #ifdef INET
 				tmpinp = in_pcblookup_local(pcbinfo, laddr,
-				    lport, lookupflags, cred);
+				    lport, RT_ALL_FIBS, lookupflags, cred);
 #endif
 		}
 	} while (tmpinp != NULL);
@@ -854,6 +906,99 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
 
 #ifdef INET
 /*
+ * Determine whether the inpcb can be bound to the specified address/port tuple.
+ */
+static int
+in_pcbbind_avail(struct inpcb *inp, const struct in_addr laddr,
+    const u_short lport, const int fib, int sooptions, int lookupflags,
+    struct ucred *cred)
+{
+	int reuseport, reuseport_lb;
+
+	INP_LOCK_ASSERT(inp);
+	INP_HASH_LOCK_ASSERT(inp->inp_pcbinfo);
+
+	reuseport = (sooptions & SO_REUSEPORT);
+	reuseport_lb = (sooptions & SO_REUSEPORT_LB);
+
+	if (IN_MULTICAST(ntohl(laddr.s_addr))) {
+		/*
+		 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
+		 * allow complete duplication of binding if
+		 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
+		 * and a multicast address is bound on both
+		 * new and duplicated sockets.
+		 */
+		if ((sooptions & (SO_REUSEADDR | SO_REUSEPORT)) != 0)
+			reuseport = SO_REUSEADDR | SO_REUSEPORT;
+		/*
+		 * XXX: How to deal with SO_REUSEPORT_LB here?
+		 * Treat same as SO_REUSEPORT for now.
+		 */
+		if ((sooptions & (SO_REUSEADDR | SO_REUSEPORT_LB)) != 0)
+			reuseport_lb = SO_REUSEADDR | SO_REUSEPORT_LB;
+	} else if (!in_nullhost(laddr)) {
+		struct sockaddr_in sin;
+
+		memset(&sin, 0, sizeof(sin));
+		sin.sin_family = AF_INET;
+		sin.sin_len = sizeof(sin);
+		sin.sin_addr = laddr;
+
+		/*
+		 * Is the address a local IP address?
+		 * If INP_BINDANY is set, then the socket may be bound
+		 * to any endpoint address, local or not.
+		 */
+		if ((inp->inp_flags & INP_BINDANY) == 0 &&
+		    ifa_ifwithaddr_check((const struct sockaddr *)&sin) == 0)
+			return (EADDRNOTAVAIL);
+	}
+
+	if (lport != 0) {
+		struct inpcb *t;
+
+		if (ntohs(lport) <= V_ipport_reservedhigh &&
+		    ntohs(lport) >= V_ipport_reservedlow &&
+		    priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT))
+			return (EACCES);
+
+		if (!IN_MULTICAST(ntohl(laddr.s_addr)) &&
+		    priv_check_cred(inp->inp_cred, PRIV_NETINET_REUSEPORT) != 0) {
+			/*
+			 * If a socket owned by a different user is already
+			 * bound to this port, fail.  In particular, SO_REUSE*
+			 * can only be used to share a port among sockets owned
+			 * by the same user.
+			 *
+			 * However, we can share a port with a connected socket
+			 * which has a unique 4-tuple.
+			 */
+			t = in_pcblookup_local(inp->inp_pcbinfo, laddr, lport,
+			    RT_ALL_FIBS, INPLOOKUP_WILDCARD, cred);
+			if (t != NULL &&
+			    (inp->inp_socket->so_type != SOCK_STREAM ||
+			     in_nullhost(t->inp_faddr)) &&
+			    (inp->inp_cred->cr_uid != t->inp_cred->cr_uid))
+				return (EADDRINUSE);
+		}
+		t = in_pcblookup_local(inp->inp_pcbinfo, laddr, lport, fib,
+		    lookupflags, cred);
+		if (t != NULL && ((reuseport | reuseport_lb) &
+		    t->inp_socket->so_options) == 0) {
+#ifdef INET6
+			if (!in_nullhost(laddr) ||
+			    !in_nullhost(t->inp_laddr) ||
+			    (inp->inp_vflag & INP_IPV6PROTO) == 0 ||
+			    (t->inp_vflag & INP_IPV6PROTO) == 0)
+#endif
+				return (EADDRINUSE);
+		}
+	}
+	return (0);
+}
+
+/*
  * Set up a bind operation on a PCB, performing port allocation
  * as required, but do not actually modify the PCB. Callers can
  * either complete the bind by setting inp_laddr/inp_lport and
@@ -864,31 +1009,26 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
  */
 int
 in_pcbbind_setup(struct inpcb *inp, struct sockaddr_in *sin, in_addr_t *laddrp,
-    u_short *lportp, struct ucred *cred)
+    u_short *lportp, int flags, struct ucred *cred)
 {
 	struct socket *so = inp->inp_socket;
-	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct in_addr laddr;
 	u_short lport = 0;
-	int lookupflags = 0, reuseport = (so->so_options & SO_REUSEPORT);
-	int error;
-
-	/*
-	 * XXX: Maybe we could let SO_REUSEPORT_LB set SO_REUSEPORT bit here
-	 * so that we don't have to add to the (already messy) code below.
-	 */
-	int reuseport_lb = (so->so_options & SO_REUSEPORT_LB);
+	int error, fib, lookupflags, sooptions;
 
 	/*
 	 * No state changes, so read locks are sufficient here.
 	 */
 	INP_LOCK_ASSERT(inp);
-	INP_HASH_LOCK_ASSERT(pcbinfo);
+	INP_HASH_LOCK_ASSERT(inp->inp_pcbinfo);
 
 	laddr.s_addr = *laddrp;
 	if (sin != NULL && laddr.s_addr != INADDR_ANY)
 		return (EINVAL);
-	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0)
+
+	lookupflags = 0;
+	sooptions = atomic_load_int(&so->so_options);
+	if ((sooptions & (SO_REUSEADDR | SO_REUSEPORT | SO_REUSEPORT_LB)) == 0)
 		lookupflags = INPLOOKUP_WILDCARD;
 	if (sin == NULL) {
 		if ((error = prison_local_ip4(cred, &laddr)) != 0)
@@ -908,79 +1048,16 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr_in *sin, in_addr_t *laddrp,
 				return (EINVAL);
 			lport = sin->sin_port;
 		}
-		/* NB: lport is left as 0 if the port isn't being changed. */
-		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
-			/*
-			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
-			 * allow complete duplication of binding if
-			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
-			 * and a multicast address is bound on both
-			 * new and duplicated sockets.
-			 */
-			if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
-				reuseport = SO_REUSEADDR|SO_REUSEPORT;
-			/*
-			 * XXX: How to deal with SO_REUSEPORT_LB here?
-			 * Treat same as SO_REUSEPORT for now.
-			 */
-			if ((so->so_options &
-			    (SO_REUSEADDR|SO_REUSEPORT_LB)) != 0)
-				reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB;
-		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
-			sin->sin_port = 0;		/* yech... */
-			bzero(&sin->sin_zero, sizeof(sin->sin_zero));
-			/*
-			 * Is the address a local IP address?
-			 * If INP_BINDANY is set, then the socket may be bound
-			 * to any endpoint address, local or not.
-			 */
-			if ((inp->inp_flags & INP_BINDANY) == 0 &&
-			    ifa_ifwithaddr_check((struct sockaddr *)sin) == 0)
-				return (EADDRNOTAVAIL);
-		}
 		laddr = sin->sin_addr;
-		if (lport) {
-			struct inpcb *t;
-
-			/* GROSS */
-			if (ntohs(lport) <= V_ipport_reservedhigh &&
-			    ntohs(lport) >= V_ipport_reservedlow &&
-			    priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT))
-				return (EACCES);
-			if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
-			    priv_check_cred(inp->inp_cred, PRIV_NETINET_REUSEPORT) != 0) {
-				t = in_pcblookup_local(pcbinfo, sin->sin_addr,
-				    lport, INPLOOKUP_WILDCARD, cred);
-	/*
-	 * XXX
-	 * This entire block sorely needs a rewrite.
-	 */
-				if (t != NULL &&
-				    (so->so_type != SOCK_STREAM ||
-				     ntohl(t->inp_faddr.s_addr) == INADDR_ANY) &&
-				    (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
-				     ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
-				     (t->inp_socket->so_options & SO_REUSEPORT) ||
-				     (t->inp_socket->so_options & SO_REUSEPORT_LB) == 0) &&
-				    (inp->inp_cred->cr_uid !=
-				     t->inp_cred->cr_uid))
-					return (EADDRINUSE);
-			}
-			t = in_pcblookup_local(pcbinfo, sin->sin_addr,
-			    lport, lookupflags, cred);
-			if (t != NULL && (reuseport & t->inp_socket->so_options) == 0 &&
-			    (reuseport_lb & t->inp_socket->so_options) == 0) {
-#ifdef INET6
-				if (ntohl(sin->sin_addr.s_addr) !=
-				    INADDR_ANY ||
-				    ntohl(t->inp_laddr.s_addr) !=
-				    INADDR_ANY ||
-				    (inp->inp_vflag & INP_IPV6PROTO) == 0 ||
-				    (t->inp_vflag & INP_IPV6PROTO) == 0)
-#endif
-						return (EADDRINUSE);
-			}
-		}
+
+		fib = (flags & INPBIND_FIB) != 0 ? inp->inp_inc.inc_fibnum :
+		    RT_ALL_FIBS;
+
+		/* See if this address/port combo is available. */
+		error = in_pcbbind_avail(inp, laddr, lport, fib, sooptions,
+		    lookupflags, cred);
+		if (error != 0)
+			return (error);
 	}
 	if (*lportp != 0)
 		lport = *lportp;
@@ -991,6 +1068,8 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr_in *sin, in_addr_t *laddrp,
 	}
 	*laddrp = laddr.s_addr;
 	*lportp = lport;
+	if ((flags & INPBIND_FIB) != 0)
+		inp->inp_flags |= INP_BOUNDFIB;
 	return (0);
 }
 
@@ -1001,48 +1080,105 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr_in *sin, in_addr_t *laddrp,
  * then pick one.
  */
 int
-in_pcbconnect(struct inpcb *inp, struct sockaddr_in *sin, struct ucred *cred,
-    bool rehash __unused)
+in_pcbconnect(struct inpcb *inp, struct sockaddr_in *sin, struct ucred *cred)
 {
-	u_short lport, fport;
-	in_addr_t laddr, faddr;
-	int anonport, error;
+	struct in_addr laddr, faddr;
+	u_short lport;
+	int error;
+	bool anonport;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
 	KASSERT(in_nullhost(inp->inp_faddr),
 	    ("%s: inp is already connected", __func__));
+	KASSERT(sin->sin_family == AF_INET,
+	    ("%s: invalid address family for %p", __func__, sin));
+	KASSERT(sin->sin_len == sizeof(*sin),
+	    ("%s: invalid address length for %p", __func__, sin));
 
-	lport = inp->inp_lport;
-	laddr = inp->inp_laddr.s_addr;
-	anonport = (lport == 0);
-	error = in_pcbconnect_setup(inp, sin, &laddr, &lport, &faddr, &fport,
-	    cred);
-	if (error)
-		return (error);
+	if (sin->sin_port == 0)
+		return (EADDRNOTAVAIL);
 
-	inp->inp_faddr.s_addr = faddr;
-	inp->inp_fport = fport;
-
-	/* Do the initial binding of the local address if required. */
-	if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) {
-		inp->inp_lport = lport;
-		inp->inp_laddr.s_addr = laddr;
-		if (in_pcbinshash(inp) != 0) {
-			inp->inp_laddr.s_addr = inp->inp_faddr.s_addr =
-			    INADDR_ANY;
-			inp->inp_lport = inp->inp_fport = 0;
-			return (EAGAIN);
-		}
-	} else {
-		inp->inp_lport = lport;
-		inp->inp_laddr.s_addr = laddr;
-		if ((inp->inp_flags & INP_INHASHLIST) != 0)
-			in_pcbrehash(inp);
-		else
-			in_pcbinshash(inp);
-	}
+	anonport = (inp->inp_lport == 0);
 
+	if (__predict_false(in_broadcast(sin->sin_addr))) {
+		if (!V_connect_inaddr_wild || CK_STAILQ_EMPTY(&V_in_ifaddrhead))
+			return (ENETUNREACH);
+		/*
+		 * If the destination address is INADDR_ANY, use the primary
+		 * local address.  If the supplied address is INADDR_BROADCAST,
+		 * and the primary interface supports broadcast, choose the
+		 * broadcast address for that interface.
+		 */
+		if (in_nullhost(sin->sin_addr)) {
+			faddr =
+			    IA_SIN(CK_STAILQ_FIRST(&V_in_ifaddrhead))->sin_addr;
+			if ((error = prison_get_ip4(cred, &faddr)) != 0)
+				return (error);
+		} else if (sin->sin_addr.s_addr == INADDR_BROADCAST &&
+		    CK_STAILQ_FIRST(&V_in_ifaddrhead)->ia_ifp->if_flags
+		    & IFF_BROADCAST) {
+			faddr = satosin(&CK_STAILQ_FIRST(
+			    &V_in_ifaddrhead)->ia_broadaddr)->sin_addr;
+		} else
+			faddr = sin->sin_addr;
+	} else
+		faddr = sin->sin_addr;
+
+	if (in_nullhost(inp->inp_laddr)) {
+		error = in_pcbladdr(inp, &faddr, &laddr, cred);
+		if (error)
+			return (error);
+	} else
+		laddr = inp->inp_laddr;
+
+	if (anonport) {
+		struct sockaddr_in lsin = {
+			.sin_family = AF_INET,
+			.sin_addr = laddr,
+		};
+		struct sockaddr_in fsin = {
+			.sin_family = AF_INET,
+			.sin_addr = faddr,
+		};
+
+		error = in_pcb_lport_dest(inp, (struct sockaddr *)&lsin,
+		    &lport, (struct sockaddr *)&fsin, sin->sin_port, cred,
+		    INPLOOKUP_WILDCARD);
+		if (error)
+			return (error);
+	} else if (in_pcblookup_hash_locked(inp->inp_pcbinfo, faddr,
+	    sin->sin_port, laddr, inp->inp_lport, 0, M_NODOM, RT_ALL_FIBS) !=
+	    NULL)
+		return (EADDRINUSE);
+	else
+		lport = inp->inp_lport;
+
+	MPASS(!in_nullhost(inp->inp_laddr) || inp->inp_lport != 0 ||
+	    !(inp->inp_flags & INP_INHASHLIST));
+
+	inp->inp_faddr = faddr;
+	inp->inp_fport = sin->sin_port;
+	inp->inp_laddr = laddr;
+	inp->inp_lport = lport;
+
+	if ((inp->inp_flags & INP_INHASHLIST) == 0) {
+		error = in_pcbinshash(inp);
+		MPASS(error == 0);
+	} else
+		in_pcbrehash(inp);
+#ifdef ROUTE_MPATH
+	if (CALC_FLOWID_OUTBOUND) {
+		uint32_t hash_val, hash_type;
+
+		hash_val = fib4_calc_software_hash(inp->inp_laddr,
+		    inp->inp_faddr, 0, sin->sin_port,
+		    inp->inp_socket->so_proto->pr_protocol, &hash_type);
+
+		inp->inp_flowid = hash_val;
+		inp->inp_flowtype = hash_type;
+	}
+#endif
 	if (anonport)
 		inp->inp_flags |= INP_ANONPORT;
 	return (0);
@@ -1053,8 +1189,8 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr_in *sin, struct ucred *cred,
  * of connect. Take jails into account as well.
  */
 int
-in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
-    struct ucred *cred)
+in_pcbladdr(const struct inpcb *inp, struct in_addr *faddr,
+    struct in_addr *laddr, struct ucred *cred)
 {
 	struct ifaddr *ifa;
 	struct sockaddr *sa;
@@ -1072,6 +1208,27 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
 	if (!prison_saddrsel_ip4(cred, laddr))
 		return (0);
 
+	/*
+	 * If the destination address is multicast and an outgoing
+	 * interface has been set as a multicast option, prefer the
+	 * address of that interface as our source address.
+	 */
+	if (IN_MULTICAST(ntohl(faddr->s_addr)) && inp->inp_moptions != NULL &&
+	    inp->inp_moptions->imo_multicast_ifp != NULL) {
+		struct ifnet *ifp = inp->inp_moptions->imo_multicast_ifp;
+		struct in_ifaddr *ia;
+
+		CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
+			if (ia->ia_ifp == ifp &&
+			    prison_check_ip4(cred, &ia->ia_addr.sin_addr) == 0)
+				break;
+		}
+		if (ia == NULL)
+			return (EADDRNOTAVAIL);
+		*laddr = ia->ia_addr.sin_addr;
+		return (0);
+	}
+
 	error = 0;
 
 	nh = NULL;
@@ -1259,135 +1416,6 @@ done:
 	return (error);
 }
 
-/*
- * Set up for a connect from a socket to the specified address.
- * On entry, *laddrp and *lportp should contain the current local
- * address and port for the PCB; these are updated to the values
- * that should be placed in inp_laddr and inp_lport to complete
- * the connect.
- *
- * On success, *faddrp and *fportp will be set to the remote address
- * and port. These are not updated in the error case.
- */
-int
-in_pcbconnect_setup(struct inpcb *inp, struct sockaddr_in *sin,
-    in_addr_t *laddrp, u_short *lportp, in_addr_t *faddrp, u_short *fportp,
-    struct ucred *cred)
-{
-	struct in_ifaddr *ia;
-	struct in_addr laddr, faddr;
-	u_short lport, fport;
-	int error;
-
-	KASSERT(sin->sin_family == AF_INET,
-	    ("%s: invalid address family for %p", __func__, sin));
-	KASSERT(sin->sin_len == sizeof(*sin),
-	    ("%s: invalid address length for %p", __func__, sin));
-
-	/*
-	 * Because a global state change doesn't actually occur here, a read
-	 * lock is sufficient.
-	 */
-	NET_EPOCH_ASSERT();
-	INP_LOCK_ASSERT(inp);
-	INP_HASH_LOCK_ASSERT(inp->inp_pcbinfo);
-
-	if (sin->sin_port == 0)
-		return (EADDRNOTAVAIL);
-	laddr.s_addr = *laddrp;
-	lport = *lportp;
-	faddr = sin->sin_addr;
-	fport = sin->sin_port;
-#ifdef ROUTE_MPATH
-	if (CALC_FLOWID_OUTBOUND) {
-		uint32_t hash_val, hash_type;
-
-		hash_val = fib4_calc_software_hash(laddr, faddr, 0, fport,
-		    inp->inp_socket->so_proto->pr_protocol, &hash_type);
-
-		inp->inp_flowid = hash_val;
-		inp->inp_flowtype = hash_type;
-	}
-#endif
-	if (!CK_STAILQ_EMPTY(&V_in_ifaddrhead)) {
-		/*
-		 * If the destination address is INADDR_ANY,
-		 * use the primary local address.
-		 * If the supplied address is INADDR_BROADCAST,
-		 * and the primary interface supports broadcast,
-		 * choose the broadcast address for that interface.
-		 */
-		if (faddr.s_addr == INADDR_ANY) {
-			faddr =
-			    IA_SIN(CK_STAILQ_FIRST(&V_in_ifaddrhead))->sin_addr;
-			if ((error = prison_get_ip4(cred, &faddr)) != 0)
-				return (error);
-		} else if (faddr.s_addr == (u_long)INADDR_BROADCAST) {
-			if (CK_STAILQ_FIRST(&V_in_ifaddrhead)->ia_ifp->if_flags &
-			    IFF_BROADCAST)
-				faddr = satosin(&CK_STAILQ_FIRST(
-				    &V_in_ifaddrhead)->ia_broadaddr)->sin_addr;
-		}
-	}
-	if (laddr.s_addr == INADDR_ANY) {
-		error = in_pcbladdr(inp, &faddr, &laddr, cred);
-		/*
-		 * If the destination address is multicast and an outgoing
-		 * interface has been set as a multicast option, prefer the
-		 * address of that interface as our source address.
-		 */
-		if (IN_MULTICAST(ntohl(faddr.s_addr)) &&
-		    inp->inp_moptions != NULL) {
-			struct ip_moptions *imo;
-			struct ifnet *ifp;
-
-			imo = inp->inp_moptions;
-			if (imo->imo_multicast_ifp != NULL) {
-				ifp = imo->imo_multicast_ifp;
-				CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
-					if (ia->ia_ifp == ifp &&
-					    prison_check_ip4(cred,
-					    &ia->ia_addr.sin_addr) == 0)
-						break;
-				}
-				if (ia == NULL)
-					error = EADDRNOTAVAIL;
-				else {
-					laddr = ia->ia_addr.sin_addr;
-					error = 0;
-				}
-			}
-		}
-		if (error)
-			return (error);
-	}
-
-	if (lport != 0) {
-		if (in_pcblookup_hash_locked(inp->inp_pcbinfo, faddr,
-		    fport, laddr, lport, 0, M_NODOM) != NULL)
-			return (EADDRINUSE);
-	} else {
-		struct sockaddr_in lsin, fsin;
-
-		bzero(&lsin, sizeof(lsin));
-		bzero(&fsin, sizeof(fsin));
-		lsin.sin_family = AF_INET;
-		lsin.sin_addr = laddr;
-		fsin.sin_family = AF_INET;
-		fsin.sin_addr = faddr;
-		error = in_pcb_lport_dest(inp, (struct sockaddr *) &lsin,
-		    &lport, (struct sockaddr *)& fsin, fport, cred,
-		    INPLOOKUP_WILDCARD);
-		if (error)
-			return (error);
-	}
-	*laddrp = laddr.s_addr;
-	*lportp = lport;
-	*faddrp = faddr.s_addr;
-	*fportp = fport;
-	return (0);
-}
-
 void
 in_pcbdisconnect(struct inpcb *inp)
 {
@@ -1407,6 +1435,26 @@ in_pcbdisconnect(struct inpcb *inp)
 }
 #endif /* INET */
 
+void
+in_pcblisten(struct inpcb *inp)
+{
+	struct inpcblbgroup *grp;
+
+	INP_WLOCK_ASSERT(inp);
+
+	if ((inp->inp_flags & INP_INLBGROUP) != 0) {
+		struct inpcbinfo *pcbinfo;
+
+		pcbinfo = inp->inp_pcbinfo;
+		INP_HASH_WLOCK(pcbinfo);
+		grp = in_pcblbgroup_find(inp);
+		LIST_REMOVE(inp, inp_lbgroup_list);
+		grp->il_pendcnt--;
+		in_pcblbgroup_insert(grp, inp);
+		INP_HASH_WUNLOCK(pcbinfo);
+	}
+}
+
 /*
  * inpcb hash lookups are protected by SMR section.
  *
@@ -1697,6 +1745,23 @@ in_pcbrele(struct inpcb *inp, const inp_lookup_t lock)
 }
 
 /*
+ * Dereference and rlock inp, for which the caller must own the
+ * reference.  Returns true if inp no longer usable, false otherwise.
+ */
+bool
+in_pcbrele_rlock(struct inpcb *inp)
+{
+	INP_RLOCK(inp);
+	if (in_pcbrele_rlocked(inp))
+		return (true);
+	if ((inp->inp_flags & INP_FREED) != 0) {
+		INP_RUNLOCK(inp);
+		return (true);
+	}
+	return (false);
+}
+
+/*
  * Unconditionally schedule an inpcb to be freed by decrementing its
  * reference count, which should occur only after the inpcb has been detached
  * from its socket.  If another thread holds a temporary reference (acquired
@@ -1915,7 +1980,7 @@ restart:
 #define INP_LOOKUP_MAPPED_PCB_COST	3
 struct inpcb *
 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
-    u_short lport, int lookupflags, struct ucred *cred)
+    u_short lport, int fib, int lookupflags, struct ucred *cred)
 {
 	struct inpcb *inp;
 #ifdef INET6
@@ -1927,6 +1992,9 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
 
 	KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
 	    ("%s: invalid lookup flags %d", __func__, lookupflags));
+	KASSERT(fib == RT_ALL_FIBS || (fib >= 0 && fib < V_rt_numfibs),
+	    ("%s: invalid fib %d", __func__, fib));
+
 	INP_HASH_LOCK_ASSERT(pcbinfo);
 
 	if ((lookupflags & INPLOOKUP_WILDCARD) == 0) {
@@ -1945,7 +2013,8 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
 #endif
 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
 			    inp->inp_laddr.s_addr == laddr.s_addr &&
-			    inp->inp_lport == lport) {
+			    inp->inp_lport == lport && (fib == RT_ALL_FIBS ||
+			    inp->inp_inc.inc_fibnum == fib)) {
 				/*
 				 * Found?
 				 */
@@ -1959,68 +2028,58 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
 		 */
 		return (NULL);
 	} else {
-		struct inpcbporthead *porthash;
-		struct inpcbport *phd;
+		struct inpcbhead *porthash;
 		struct inpcb *match = NULL;
+
 		/*
-		 * Best fit PCB lookup.
-		 *
-		 * First see if this local port is in use by looking on the
-		 * port hash list.
+		 * Port is in use by one or more PCBs. Look for best
+		 * fit.
 		 */
 		porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
 		    pcbinfo->ipi_porthashmask)];
-		CK_LIST_FOREACH(phd, porthash, phd_hash) {
-			if (phd->phd_port == lport)
-				break;
-		}
-		if (phd != NULL) {
+		CK_LIST_FOREACH(inp, porthash, inp_portlist) {
+			if (inp->inp_lport != lport)
+				continue;
+			if (!prison_equal_ip4(inp->inp_cred->cr_prison,
+			    cred->cr_prison))
+				continue;
+			if (fib != RT_ALL_FIBS &&
+			    inp->inp_inc.inc_fibnum != fib)
+				continue;
+			wildcard = 0;
+#ifdef INET6
+			/* XXX inp locking */
+			if ((inp->inp_vflag & INP_IPV4) == 0)
+				continue;
 			/*
-			 * Port is in use by one or more PCBs. Look for best
-			 * fit.
+			 * We never select the PCB that has INP_IPV6 flag and
+			 * is bound to :: if we have another PCB which is bound
+			 * to 0.0.0.0.  If a PCB has the INP_IPV6 flag, then we
+			 * set its cost higher than IPv4 only PCBs.
+			 *
+			 * Note that the case only happens when a socket is
+			 * bound to ::, under the condition that the use of the
+			 * mapped address is allowed.
 			 */
-			CK_LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
-				wildcard = 0;
-				if (!prison_equal_ip4(inp->inp_cred->cr_prison,
-				    cred->cr_prison))
-					continue;
-#ifdef INET6
-				/* XXX inp locking */
-				if ((inp->inp_vflag & INP_IPV4) == 0)
-					continue;
-				/*
-				 * We never select the PCB that has
-				 * INP_IPV6 flag and is bound to :: if
-				 * we have another PCB which is bound
-				 * to 0.0.0.0.  If a PCB has the
-				 * INP_IPV6 flag, then we set its cost
-				 * higher than IPv4 only PCBs.
-				 *
-				 * Note that the case only happens
-				 * when a socket is bound to ::, under
-				 * the condition that the use of the
-				 * mapped address is allowed.
-				 */
-				if ((inp->inp_vflag & INP_IPV6) != 0)
-					wildcard += INP_LOOKUP_MAPPED_PCB_COST;
+			if ((inp->inp_vflag & INP_IPV6) != 0)
+				wildcard += INP_LOOKUP_MAPPED_PCB_COST;
 #endif
-				if (inp->inp_faddr.s_addr != INADDR_ANY)
+			if (inp->inp_faddr.s_addr != INADDR_ANY)
+				wildcard++;
+			if (inp->inp_laddr.s_addr != INADDR_ANY) {
+				if (laddr.s_addr == INADDR_ANY)
+					wildcard++;
+				else if (inp->inp_laddr.s_addr != laddr.s_addr)
+					continue;
+			} else {
+				if (laddr.s_addr != INADDR_ANY)
 					wildcard++;
-				if (inp->inp_laddr.s_addr != INADDR_ANY) {
-					if (laddr.s_addr == INADDR_ANY)
-						wildcard++;
-					else if (inp->inp_laddr.s_addr != laddr.s_addr)
-						continue;
-				} else {
-					if (laddr.s_addr != INADDR_ANY)
-						wildcard++;
-				}
-				if (wildcard < matchwild) {
-					match = inp;
-					matchwild = wildcard;
-					if (matchwild == 0)
-						break;
-				}
+			}
+			if (wildcard < matchwild) {
+				match = inp;
+				matchwild = wildcard;
+				if (matchwild == 0)
+					break;
 			}
 		}
 		return (match);
@@ -2029,21 +2088,25 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
 #undef INP_LOOKUP_MAPPED_PCB_COST
 
 static bool
-in_pcblookup_lb_numa_match(const struct inpcblbgroup *grp, int domain)
+in_pcblookup_lb_match(const struct inpcblbgroup *grp, int domain, int fib)
 {
-	return (domain == M_NODOM || domain == grp->il_numa_domain);
+	return ((domain == M_NODOM || domain == grp->il_numa_domain) &&
+	    (fib == RT_ALL_FIBS || fib == grp->il_fibnum));
 }
 
 static struct inpcb *
 in_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
     const struct in_addr *faddr, uint16_t fport, const struct in_addr *laddr,
-    uint16_t lport, int domain)
+    uint16_t lport, int domain, int fib)
 {
 	const struct inpcblbgrouphead *hdr;
 	struct inpcblbgroup *grp;
 	struct inpcblbgroup *jail_exact, *jail_wild, *local_exact, *local_wild;
+	struct inpcb *inp;
+	u_int count;
 
 	INP_HASH_LOCK_ASSERT(pcbinfo);
+	NET_EPOCH_ASSERT();
 
 	hdr = &pcbinfo->ipi_lbgrouphashbase[
 	    INP_PCBPORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
@@ -2073,20 +2136,20 @@ in_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
 		if (grp->il_laddr.s_addr == laddr->s_addr) {
 			if (injail) {
 				jail_exact = grp;
-				if (in_pcblookup_lb_numa_match(grp, domain))
+				if (in_pcblookup_lb_match(grp, domain, fib))
 					/* This is a perfect match. */
 					goto out;
 			} else if (local_exact == NULL ||
-			    in_pcblookup_lb_numa_match(grp, domain)) {
+			    in_pcblookup_lb_match(grp, domain, fib)) {
 				local_exact = grp;
 			}
 		} else if (grp->il_laddr.s_addr == INADDR_ANY) {
 			if (injail) {
 				if (jail_wild == NULL ||
-				    in_pcblookup_lb_numa_match(grp, domain))
+				    in_pcblookup_lb_match(grp, domain, fib))
 					jail_wild = grp;
 			} else if (local_wild == NULL ||
-			    in_pcblookup_lb_numa_match(grp, domain)) {
+			    in_pcblookup_lb_match(grp, domain, fib)) {
 				local_wild = grp;
 			}
 		}
@@ -2102,9 +2165,17 @@ in_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
 		grp = local_wild;
 	if (grp == NULL)
 		return (NULL);
+
 out:
-	return (grp->il_inp[INP_PCBLBGROUP_PKTHASH(faddr, lport, fport) %
-	    grp->il_inpcnt]);
+	/*
+	 * Synchronize with in_pcblbgroup_insert().
+	 */
+	count = atomic_load_acq_int(&grp->il_inpcnt);
+	if (count == 0)
+		return (NULL);
+	inp = grp->il_inp[INP_PCBLBGROUP_PKTHASH(faddr, lport, fport) % count];
+	KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
+	return (inp);
 }
 
 static bool
@@ -2150,7 +2221,7 @@ typedef enum {
 
 static inp_lookup_match_t
 in_pcblookup_wild_match(const struct inpcb *inp, struct in_addr laddr,
-    u_short lport)
+    u_short lport, int fib)
 {
 #ifdef INET6
 	/* XXX inp locking */
@@ -2159,6 +2230,8 @@ in_pcblookup_wild_match(const struct inpcb *inp, struct in_addr laddr,
 #endif
 	if (inp->inp_faddr.s_addr != INADDR_ANY || inp->inp_lport != lport)
 		return (INPLOOKUP_MATCH_NONE);
+	if (fib != RT_ALL_FIBS && inp->inp_inc.inc_fibnum != fib)
+		return (INPLOOKUP_MATCH_NONE);
 	if (inp->inp_laddr.s_addr == INADDR_ANY)
 		return (INPLOOKUP_MATCH_WILD);
 	if (inp->inp_laddr.s_addr == laddr.s_addr)
@@ -2169,9 +2242,8 @@ in_pcblookup_wild_match(const struct inpcb *inp, struct in_addr laddr,
 #define	INP_LOOKUP_AGAIN	((struct inpcb *)(uintptr_t)-1)
 
 static struct inpcb *
-in_pcblookup_hash_wild_smr(struct inpcbinfo *pcbinfo, struct in_addr faddr,
-    u_short fport, struct in_addr laddr, u_short lport,
-    const inp_lookup_t lockflags)
+in_pcblookup_hash_wild_smr(struct inpcbinfo *pcbinfo, struct in_addr laddr,
+    u_short lport, int fib, const inp_lookup_t lockflags)
 {
 	struct inpcbhead *head;
 	struct inpcb *inp;
@@ -2184,12 +2256,12 @@ in_pcblookup_hash_wild_smr(struct inpcbinfo *pcbinfo, struct in_addr faddr,
 	CK_LIST_FOREACH(inp, head, inp_hash_wild) {
 		inp_lookup_match_t match;
 
-		match = in_pcblookup_wild_match(inp, laddr, lport);
+		match = in_pcblookup_wild_match(inp, laddr, lport, fib);
 		if (match == INPLOOKUP_MATCH_NONE)
 			continue;
 
 		if (__predict_true(inp_smr_lock(inp, lockflags))) {
-			match = in_pcblookup_wild_match(inp, laddr, lport);
+			match = in_pcblookup_wild_match(inp, laddr, lport, fib);
 			if (match != INPLOOKUP_MATCH_NONE &&
 			    prison_check_ip4_locked(inp->inp_cred->cr_prison,
 			    &laddr) == 0)
@@ -2207,8 +2279,8 @@ in_pcblookup_hash_wild_smr(struct inpcbinfo *pcbinfo, struct in_addr faddr,
 }
 
 static struct inpcb *
-in_pcblookup_hash_wild_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
-    u_short fport, struct in_addr laddr, u_short lport)
+in_pcblookup_hash_wild_locked(struct inpcbinfo *pcbinfo, struct in_addr laddr,
+    u_short lport, int fib)
 {
 	struct inpcbhead *head;
 	struct inpcb *inp, *local_wild, *local_exact, *jail_wild;
@@ -2235,7 +2307,7 @@ in_pcblookup_hash_wild_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
 		inp_lookup_match_t match;
 		bool injail;
 
-		match = in_pcblookup_wild_match(inp, laddr, lport);
+		match = in_pcblookup_wild_match(inp, laddr, lport, fib);
 		if (match == INPLOOKUP_MATCH_NONE)
 			continue;
 
@@ -2288,12 +2360,12 @@ in_pcblookup_hash_wild_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
 static struct inpcb *
 in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
     u_int fport_arg, struct in_addr laddr, u_int lport_arg, int lookupflags,
-    uint8_t numa_domain)
+    uint8_t numa_domain, int fib)
 {
 	struct inpcb *inp;
 	const u_short fport = fport_arg, lport = lport_arg;
 
-	KASSERT((lookupflags & ~INPLOOKUP_WILDCARD) == 0,
+	KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD | INPLOOKUP_FIB)) == 0,
 	    ("%s: invalid lookup flags %d", __func__, lookupflags));
 	KASSERT(faddr.s_addr != INADDR_ANY,
 	    ("%s: invalid foreign address", __func__));
@@ -2307,10 +2379,10 @@ in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
 
 	if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
 		inp = in_pcblookup_lbgroup(pcbinfo, &faddr, fport,
-		    &laddr, lport, numa_domain);
+		    &laddr, lport, numa_domain, fib);
 		if (inp == NULL) {
-			inp = in_pcblookup_hash_wild_locked(pcbinfo, faddr,
-			    fport, laddr, lport);
+			inp = in_pcblookup_hash_wild_locked(pcbinfo, laddr,
+			    lport, fib);
 		}
 	}
 
@@ -2320,7 +2392,7 @@ in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
 static struct inpcb *
 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
     u_int fport, struct in_addr laddr, u_int lport, int lookupflags,
-    uint8_t numa_domain)
+    uint8_t numa_domain, int fib)
 {
 	struct inpcb *inp;
 	const inp_lookup_t lockflags = lookupflags & INPLOOKUP_LOCKMASK;
@@ -2330,7 +2402,7 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
 
 	INP_HASH_WLOCK(pcbinfo);
 	inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
-	    lookupflags & ~INPLOOKUP_LOCKMASK, numa_domain);
+	    lookupflags & ~INPLOOKUP_LOCKMASK, numa_domain, fib);
 	if (inp != NULL && !inp_trylock(inp, lockflags)) {
 		in_pcbref(inp);
 		INP_HASH_WUNLOCK(pcbinfo);
@@ -2347,7 +2419,7 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
 static struct inpcb *
 in_pcblookup_hash_smr(struct inpcbinfo *pcbinfo, struct in_addr faddr,
     u_int fport_arg, struct in_addr laddr, u_int lport_arg, int lookupflags,
-    uint8_t numa_domain)
+    uint8_t numa_domain, int fib)
 {
 	struct inpcb *inp;
 	const inp_lookup_t lockflags = lookupflags & INPLOOKUP_LOCKMASK;
@@ -2377,27 +2449,27 @@ in_pcblookup_hash_smr(struct inpcbinfo *pcbinfo, struct in_addr faddr,
 		 * out from under us.  Fall back to a precise search.
 		 */
 		return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
-		    lookupflags, numa_domain));
+		    lookupflags, numa_domain, fib));
 	}
 
 	if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
 		inp = in_pcblookup_lbgroup(pcbinfo, &faddr, fport,
-		    &laddr, lport, numa_domain);
+		    &laddr, lport, numa_domain, fib);
 		if (inp != NULL) {
 			if (__predict_true(inp_smr_lock(inp, lockflags))) {
 				if (__predict_true(in_pcblookup_wild_match(inp,
-				    laddr, lport) != INPLOOKUP_MATCH_NONE))
+				    laddr, lport, fib) != INPLOOKUP_MATCH_NONE))
 					return (inp);
 				inp_unlock(inp, lockflags);
 			}
 			inp = INP_LOOKUP_AGAIN;
 		} else {
-			inp = in_pcblookup_hash_wild_smr(pcbinfo, faddr, fport,
-			    laddr, lport, lockflags);
+			inp = in_pcblookup_hash_wild_smr(pcbinfo, laddr, lport,
+			    fib, lockflags);
 		}
 		if (inp == INP_LOOKUP_AGAIN) {
 			return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr,
-			    lport, lookupflags, numa_domain));
+			    lport, lookupflags, numa_domain, fib));
 		}
 	}
 
@@ -2414,10 +2486,13 @@ in_pcblookup_hash_smr(struct inpcbinfo *pcbinfo, struct in_addr faddr,
 struct inpcb *
 in_pcblookup(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport,
     struct in_addr laddr, u_int lport, int lookupflags,
-    struct ifnet *ifp __unused)
+    struct ifnet *ifp)
 {
+	int fib;
+
+	fib = (lookupflags & INPLOOKUP_FIB) ? if_getfib(ifp) : RT_ALL_FIBS;
 	return (in_pcblookup_hash_smr(pcbinfo, faddr, fport, laddr, lport,
-	    lookupflags, M_NODOM));
+	    lookupflags, M_NODOM, fib));
 }
 
 struct inpcb *
@@ -2425,8 +2500,12 @@ in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr,
     u_int fport, struct in_addr laddr, u_int lport, int lookupflags,
     struct ifnet *ifp __unused, struct mbuf *m)
 {
+	int fib;
+
+	M_ASSERTPKTHDR(m);
+	fib = (lookupflags & INPLOOKUP_FIB) ? M_GETFIB(m) : RT_ALL_FIBS;
 	return (in_pcblookup_hash_smr(pcbinfo, faddr, fport, laddr, lport,
-	    lookupflags, m->m_pkthdr.numa_domain));
+	    lookupflags, m->m_pkthdr.numa_domain, fib));
 }
 #endif /* INET */
 
@@ -2546,14 +2625,16 @@ _in6_pcbinshash_wild(struct inpcbhead *pcbhash, struct inpcb *inp)
 
 /*
  * Insert PCB onto various hash lists.
+ *
+ * With normal sockets this function shall not fail, so it could return void.
+ * But for SO_REUSEPORT_LB it may need to allocate memory with locks held,
+ * that's the only condition when it can fail.
  */
 int
 in_pcbinshash(struct inpcb *inp)
 {
-	struct inpcbhead *pcbhash;
-	struct inpcbporthead *pcbporthash;
+	struct inpcbhead *pcbhash, *pcbporthash;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
-	struct inpcbport *phd;
 	uint32_t hash;
 	bool connected;
 
@@ -2594,31 +2675,6 @@ in_pcbinshash(struct inpcb *inp)
 	}
 
 	/*
-	 * Go through port list and look for a head for this lport.
-	 */
-	CK_LIST_FOREACH(phd, pcbporthash, phd_hash) {
-		if (phd->phd_port == inp->inp_lport)
-			break;
-	}
-
-	/*
-	 * If none exists, malloc one and tack it on.
-	 */
-	if (phd == NULL) {
-		phd = uma_zalloc_smr(pcbinfo->ipi_portzone, M_NOWAIT);
-		if (phd == NULL) {
-			if ((inp->inp_flags & INP_INLBGROUP) != 0)
-				in_pcbremlbgrouphash(inp);
-			return (ENOMEM);
-		}
-		phd->phd_port = inp->inp_lport;
-		CK_LIST_INIT(&phd->phd_pcblist);
-		CK_LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
-	}
-	inp->inp_phd = phd;
-	CK_LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
-
-	/*
 	 * The PCB may have been disconnected in the past.  Before we can safely
 	 * make it visible in the hash table, we must wait for all readers which
 	 * may be traversing this PCB to finish.
@@ -2638,6 +2694,7 @@ in_pcbinshash(struct inpcb *inp)
 #endif
 			_in_pcbinshash_wild(pcbhash, inp);
 	}
+	CK_LIST_INSERT_HEAD(pcbporthash, inp, inp_portlist);
 	inp->inp_flags |= INP_INHASHLIST;
 
 	return (0);
@@ -2646,7 +2703,6 @@ in_pcbinshash(struct inpcb *inp)
 void
 in_pcbremhash_locked(struct inpcb *inp)
 {
-	struct inpcbport *phd = inp->inp_phd;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
@@ -2669,10 +2725,6 @@ in_pcbremhash_locked(struct inpcb *inp)
 			CK_LIST_REMOVE(inp, inp_hash_exact);
 	}
 	CK_LIST_REMOVE(inp, inp_portlist);
-	if (CK_LIST_FIRST(&phd->phd_pcblist) == NULL) {
-		CK_LIST_REMOVE(phd, phd_hash);
-		uma_zfree_smr(inp->inp_pcbinfo->ipi_portzone, phd);
-	}
 	inp->inp_flags &= ~INP_INHASHLIST;
 }
 
@@ -3183,8 +3235,7 @@ db_print_inpcb(struct inpcb *inp, const char *name, int indent)
 	}
 
 	db_print_indent(indent);
-	db_printf("inp_phd: %p   inp_gencnt: %ju\n", inp->inp_phd,
-	    (uintmax_t)inp->inp_gencnt);
+	db_printf("inp_gencnt: %ju\n", (uintmax_t)inp->inp_gencnt);
 }
 
 DB_SHOW_COMMAND(inpcb, db_show_inpcb)
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index a4b4075b3501..9e0618e87601 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -64,7 +64,6 @@
  * protocol-specific control block) are stored here.
  */
 CK_LIST_HEAD(inpcbhead, inpcb);
-CK_LIST_HEAD(inpcbporthead, inpcbport);
 CK_LIST_HEAD(inpcblbgrouphead, inpcblbgroup);
 typedef	uint64_t	inp_gen_t;
 
@@ -167,7 +166,10 @@ struct inpcbpolicy;
 struct m_snd_tag;
 struct inpcb {
 	/* Cache line #1 (amd64) */
-	CK_LIST_ENTRY(inpcb) inp_hash_exact;	/* hash table linkage */
+	union {
+		CK_LIST_ENTRY(inpcb) inp_hash_exact;	/* hash table linkage */
+		LIST_ENTRY(inpcb) inp_lbgroup_list;	/* lb group list */
+	};
 	CK_LIST_ENTRY(inpcb) inp_hash_wild;	/* hash table linkage */
 	struct rwlock	inp_lock;
 	/* Cache line #2 (amd64) */
@@ -218,7 +220,6 @@ struct inpcb {
 		short	in6p_hops;
 	};
 	CK_LIST_ENTRY(inpcb) inp_portlist;	/* (r:e/w:h) port list */
-	struct	inpcbport *inp_phd;	/* (r:e/w:h) head of this list */
 	inp_gen_t	inp_gencnt;	/* (c) generation count */
 	void		*spare_ptr;	/* Spare pointer. */
 	rt_gen_t	inp_rt_cookie;	/* generation for route entry */
@@ -302,6 +303,30 @@ struct sockopt_parameters {
 	char sop_optval[];
 };
 
+#ifdef _SYS_KTLS_H_
+struct xktls_session {
+	uint32_t tsz;	/* total sz of elm, next elm is at this+tsz */
+	uint32_t fsz;	/* size of the struct up to keys */
+	uint64_t inp_gencnt;
+	kvaddr_t so_pcb;
+	struct in_conninfo coninf;
+	u_short rx_vlan_id;
+	struct xktls_session_onedir rcv;
+	struct xktls_session_onedir snd;
+/*
+ * Next are
+ * - keydata for rcv, first cipher of length rcv.cipher_key_len, then
+ *    authentication of length rcv.auth_key_len;
+ * - driver data (string) of length rcv.drv_st_len, if the rcv session is
+ *    offloaded to ifnet rcv.ifnet;
+ * - keydata for snd, first cipher of length snd.cipher_key_len, then
+ *    authentication of length snd.auth_key_len;
+ * - driver data (string) of length snd.drv_st_len, if the snd session is
+ *    offloaded to ifnet snd.ifnet;
+ */
+};
+#endif /* _SYS_KTLS_H_ */
+
 #ifdef	_KERNEL
 int	sysctl_setsockopt(SYSCTL_HANDLER_ARGS, struct inpcbinfo *pcbinfo,
 	    int (*ctloutput_set)(struct inpcb *, struct sockopt *));
@@ -367,7 +392,7 @@ struct inpcbinfo {
 	/*
 	 * Global hash of inpcbs, hashed by only local port number.
 	 */
-	struct inpcbporthead	*ipi_porthashbase;	/* (h) */
+	struct inpcbhead	*ipi_porthashbase;	/* (h) */
 	u_long			 ipi_porthashmask;	/* (h) */
 
 	/*
@@ -389,11 +414,9 @@ struct inpcbinfo {
  */
 struct inpcbstorage {
 	uma_zone_t	ips_zone;
-	uma_zone_t	ips_portzone;
 	uma_init	ips_pcbinit;
 	size_t		ips_size;
 	const char *	ips_zone_name;
-	const char *	ips_portzone_name;
 	const char *	ips_infolock_name;
 	const char *	ips_hashlock_name;
 };
@@ -411,7 +434,6 @@ static struct inpcbstorage prot = {					\
 	.ips_size = sizeof(struct ppcb),				\
 	.ips_pcbinit = prot##_inpcb_init,				\
 	.ips_zone_name = zname,						\
-	.ips_portzone_name = zname " ports",				\
 	.ips_infolock_name = iname,					\
 	.ips_hashlock_name = hname,					\
 };									\
@@ -420,28 +442,6 @@ SYSINIT(prot##_inpcbstorage_init, SI_SUB_PROTO_DOMAIN,			\
 SYSUNINIT(prot##_inpcbstorage_uninit, SI_SUB_PROTO_DOMAIN,		\
     SI_ORDER_SECOND, in_pcbstorage_destroy, &prot)
 
-/*
- * Load balance groups used for the SO_REUSEPORT_LB socket option. Each group
- * (or unique address:port combination) can be re-used at most
- * INPCBLBGROUP_SIZMAX (256) times. The inpcbs are stored in il_inp which
- * is dynamically resized as processes bind/unbind to that specific group.
- */
-struct inpcblbgroup {
-	CK_LIST_ENTRY(inpcblbgroup) il_list;
-	struct epoch_context il_epoch_ctx;
-	struct ucred	*il_cred;
-	uint16_t	il_lport;			/* (c) */
-	u_char		il_vflag;			/* (c) */
-	uint8_t		il_numa_domain;
-	uint32_t	il_pad2;
-	union in_dependaddr il_dependladdr;		/* (c) */
-#define	il_laddr	il_dependladdr.id46_addr.ia46_addr4
-#define	il6_laddr	il_dependladdr.id6_addr
-	uint32_t	il_inpsiz; /* max count in il_inp[] (h) */
-	uint32_t	il_inpcnt; /* cur count in il_inp[] (h) */
-	struct inpcb	*il_inp[];			/* (h) */
-};
-
 #define INP_LOCK_DESTROY(inp)	rw_destroy(&(inp)->inp_lock)
 #define INP_RLOCK(inp)		rw_rlock(&(inp)->inp_lock)
 #define INP_WLOCK(inp)		rw_wlock(&(inp)->inp_lock)
@@ -571,7 +571,7 @@ void 	inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
 #define	INP_DROPPED		0x04000000 /* protocol drop flag */
 #define	INP_SOCKREF		0x08000000 /* strong socket reference */
 #define	INP_RESERVED_0          0x10000000 /* reserved field */
-#define	INP_RESERVED_1          0x20000000 /* reserved field */
+#define	INP_BOUNDFIB		0x20000000 /* Bound to a specific FIB. */
 #define	IN6P_RFC2292		0x40000000 /* used RFC2292 API on the socket */
 #define	IN6P_MTU		0x80000000 /* receive path MTU */
 
@@ -617,10 +617,11 @@ typedef	enum {
 	INPLOOKUP_WILDCARD = 0x00000001,	/* Allow wildcard sockets. */
 	INPLOOKUP_RLOCKPCB = 0x00000002,	/* Return inpcb read-locked. */
 	INPLOOKUP_WLOCKPCB = 0x00000004,	/* Return inpcb write-locked. */
+	INPLOOKUP_FIB = 0x00000008,		/* inp must be from same FIB. */
 } inp_lookup_t;
 
 #define	INPLOOKUP_MASK	(INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB | \
-	    INPLOOKUP_WLOCKPCB)
+	    INPLOOKUP_WLOCKPCB | INPLOOKUP_FIB)
 #define	INPLOOKUP_LOCKMASK	(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)
 
 #define	sotoinpcb(so)	((struct inpcb *)(so)->so_pcb)
@@ -658,20 +659,18 @@ void	in_pcbstorage_destroy(void *);
 
 void	in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
 int	in_pcballoc(struct socket *, struct inpcbinfo *);
-int	in_pcbbind(struct inpcb *, struct sockaddr_in *, struct ucred *);
+#define	INPBIND_FIB	0x0001	/* bind to the PCB's FIB only */
+int	in_pcbbind(struct inpcb *, struct sockaddr_in *, int, struct ucred *);
 int	in_pcbbind_setup(struct inpcb *, struct sockaddr_in *, in_addr_t *,
-	    u_short *, struct ucred *);
-int	in_pcbconnect(struct inpcb *, struct sockaddr_in *, struct ucred *,
-	    bool);
-int	in_pcbconnect_setup(struct inpcb *, struct sockaddr_in *, in_addr_t *,
-	    u_short *, in_addr_t *, u_short *, struct ucred *);
+	    u_short *, int, struct ucred *);
+int	in_pcbconnect(struct inpcb *, struct sockaddr_in *, struct ucred *);
 void	in_pcbdisconnect(struct inpcb *);
 void	in_pcbdrop(struct inpcb *);
 void	in_pcbfree(struct inpcb *);
-int	in_pcbinshash(struct inpcb *);
-int	in_pcbladdr(struct inpcb *, struct in_addr *, struct in_addr *,
+int	in_pcbladdr(const struct inpcb *, struct in_addr *, struct in_addr *,
 	    struct ucred *);
 int	in_pcblbgroup_numa(struct inpcb *, int arg);
+void	in_pcblisten(struct inpcb *);
 struct inpcb *
 	in_pcblookup(struct inpcbinfo *, struct in_addr, u_int,
 	    struct in_addr, u_int, int, struct ifnet *);
@@ -679,11 +678,10 @@ struct inpcb *
 	in_pcblookup_mbuf(struct inpcbinfo *, struct in_addr, u_int,
 	    struct in_addr, u_int, int, struct ifnet *, struct mbuf *);
 void	in_pcbref(struct inpcb *);
-void	in_pcbrehash(struct inpcb *);
-void	in_pcbremhash_locked(struct inpcb *);
 bool	in_pcbrele(struct inpcb *, inp_lookup_t);
 bool	in_pcbrele_rlocked(struct inpcb *);
 bool	in_pcbrele_wlocked(struct inpcb *);
+bool	in_pcbrele_rlock(struct inpcb *inp);
 
 typedef bool inp_match_t(const struct inpcb *, void *);
 struct inpcb_iterator {
diff --git a/sys/netinet/in_pcb_var.h b/sys/netinet/in_pcb_var.h
index 655fd03ee9ba..7e8a1626ab40 100644
--- a/sys/netinet/in_pcb_var.h
+++ b/sys/netinet/in_pcb_var.h
@@ -50,16 +50,37 @@ int	inp_trylock(struct inpcb *inp, const inp_lookup_t lock);
 bool	inp_smr_lock(struct inpcb *, const inp_lookup_t);
 int	in_pcb_lport(struct inpcb *, struct in_addr *, u_short *,
 	    struct ucred *, int);
-int	in_pcb_lport_dest(struct inpcb *inp, struct sockaddr *lsa,
+int	in_pcb_lport_dest(const struct inpcb *inp, struct sockaddr *lsa,
             u_short *lportp, struct sockaddr *fsa, u_short fport,
             struct ucred *cred, int lookupflags);
-struct inpcb *	in_pcblookup_local(struct inpcbinfo *, struct in_addr, u_short,
-	    int, struct ucred *);
+struct inpcb *in_pcblookup_local(struct inpcbinfo *, struct in_addr, u_short,
+	    int, int, struct ucred *);
+int     in_pcbinshash(struct inpcb *);
+void    in_pcbrehash(struct inpcb *);
+void    in_pcbremhash_locked(struct inpcb *);
 
-struct inpcbport {
-	struct inpcbhead phd_pcblist;
-	CK_LIST_ENTRY(inpcbport) phd_hash;
-	u_short phd_port;
+/*
+ * Load balance groups used for the SO_REUSEPORT_LB socket option. Each group
+ * (or unique address:port combination) can be re-used at most
+ * INPCBLBGROUP_SIZMAX (256) times. The inpcbs are stored in il_inp which
+ * is dynamically resized as processes bind/unbind to that specific group.
+ */
+struct inpcblbgroup {
+	CK_LIST_ENTRY(inpcblbgroup) il_list;
+	LIST_HEAD(, inpcb) il_pending;	/* PCBs waiting for listen() */
+	struct epoch_context il_epoch_ctx;
+	struct ucred	*il_cred;
+	uint16_t	il_lport;			/* (c) */
+	u_char		il_vflag;			/* (c) */
+	uint8_t		il_numa_domain;
+	int		il_fibnum;
+	union in_dependaddr il_dependladdr;		/* (c) */
+#define	il_laddr	il_dependladdr.id46_addr.ia46_addr4
+#define	il6_laddr	il_dependladdr.id6_addr
+	uint32_t	il_inpsiz; /* max count in il_inp[] (h) */
+	uint32_t	il_inpcnt; /* cur count in il_inp[] (h) */
+	uint32_t	il_pendcnt; /* cur count in il_pending (h) */
+	struct inpcb	*il_inp[];			/* (h) */
 };
 
 #endif /* !_NETINET_IN_PCB_VAR_H_ */
diff --git a/sys/netinet/in_prot.c b/sys/netinet/in_prot.c
index 204f4f60456e..69f0f3694096 100644
--- a/sys/netinet/in_prot.c
+++ b/sys/netinet/in_prot.c
@@ -26,21 +26,17 @@
  */
 
 /*
- * System calls related to processes and protection
+ * Helpers related to visibility and protection of sockets and inpcb.
  */
 
-#include <sys/cdefs.h>
-#include "opt_inet.h"
-#include "opt_inet6.h"
-
-#include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
-#include <sys/jail.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
@@ -72,3 +68,16 @@ cr_canseeinpcb(struct ucred *cred, struct inpcb *inp)
 
 	return (0);
 }
+
+bool
+cr_canexport_ktlskeys(struct thread *td, struct inpcb *inp)
+{
+	int error;
+
+	if (cr_canseeinpcb(td->td_ucred, inp) == 0 &&
+	    cr_xids_subset(td->td_ucred, inp->inp_cred))
+		return (true);
+	error = priv_check(td, PRIV_NETINET_KTLSKEYS);
+	return (error == 0);
+
+}
diff --git a/sys/netinet/in_rmx.c b/sys/netinet/in_rmx.c
index aedfd0bc08c7..b8599143b991 100644
--- a/sys/netinet/in_rmx.c
+++ b/sys/netinet/in_rmx.c
@@ -71,7 +71,7 @@ rib4_set_nh_pfxflags(u_int fibnum, const struct sockaddr *addr, const struct soc
 		 * add these routes to support some cases with active-active
 		 * load balancing. Given that, retain this support.
 		 */
-		if (in_broadcast(addr4->sin_addr, nh->nh_ifp))
+		if (in_ifnet_broadcast(addr4->sin_addr, nh->nh_ifp))
 			is_broadcast = true;
 	} else if (mask4->sin_addr.s_addr == 0)
 		nhop_set_pxtype_flag(nh, NHF_DEFAULT);
diff --git a/sys/netinet/in_systm.h b/sys/netinet/in_systm.h
index 2750733335bb..e2f553ec461c 100644
--- a/sys/netinet/in_systm.h
+++ b/sys/netinet/in_systm.h
@@ -32,6 +32,8 @@
 #ifndef _NETINET_IN_SYSTM_H_
 #define _NETINET_IN_SYSTM_H_
 
+#include <sys/types.h>
+
 /*
  * Miscellaneous internetwork
  * definitions for kernel.
@@ -56,8 +58,10 @@ typedef	u_int32_t n_time;		/* ms since 00:00 UTC, byte rev */
 #ifdef _KERNEL
 struct inpcb;
 struct ucred;
+struct thread;
 
 int	cr_canseeinpcb(struct ucred *cred, struct inpcb *inp);
+bool	cr_canexport_ktlskeys(struct thread *td, struct inpcb *inp);
 
 uint32_t	 iptime(void);
 #endif
diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h
index 09d3cd050fc3..1f6f6edb9219 100644
--- a/sys/netinet/in_var.h
+++ b/sys/netinet/in_var.h
@@ -97,6 +97,11 @@ struct in_ifaddr {
 #define IN_LNAOF(in, ifa) \
 	((ntohl((in).s_addr) & ~((struct in_ifaddr *)(ifa)->ia_subnetmask))
 
+#ifdef _KERNEL
+#define IN_ARE_MASKED_ADDR_EQUAL(d, a, m)	(		\
+	((((d).s_addr ^ (a).s_addr) & (m).s_addr)) == 0 )
+#endif
+
 #define LLTABLE(ifp)	\
 	((struct in_ifinfo *)(ifp)->if_afdata[AF_INET])->ii_llt
 /*
@@ -454,6 +459,7 @@ int	in_joingroup_locked(struct ifnet *, const struct in_addr *,
 int	in_leavegroup(struct in_multi *, /*const*/ struct in_mfilter *);
 int	in_leavegroup_locked(struct in_multi *,
 	    /*const*/ struct in_mfilter *);
+int	in_mask2len(struct in_addr *);
 int	in_control(struct socket *, u_long, void *, struct ifnet *,
 	    struct thread *);
 int	in_control_ioctl(u_long, void *, struct ifnet *,
diff --git a/sys/netinet/ip.h b/sys/netinet/ip.h
index 8d205ba07cf5..6de41a7e79fa 100644
--- a/sys/netinet/ip.h
+++ b/sys/netinet/ip.h
@@ -33,7 +33,8 @@
 #ifndef _NETINET_IP_H_
 #define	_NETINET_IP_H_
 
-#include <sys/cdefs.h>
+#include <sys/types.h>
+#include <netinet/in.h>
 
 /*
  * Definitions for internet protocol version 4.
@@ -66,7 +67,7 @@ struct ip {
 	u_char	ip_p;			/* protocol */
 	u_short	ip_sum;			/* checksum */
 	struct	in_addr ip_src,ip_dst;	/* source and dest address */
-} __packed __aligned(2);
+} __packed;
 
 #define	IP_MAXPACKET	65535		/* maximum packet size */
 
@@ -186,7 +187,7 @@ struct	ip_timestamp {
 			uint32_t ipt_time;	/* network format */
 		} ipt_ta[1];
 	} ipt_timestamp;
-};
+} __packed;
 
 /* Flag bits for ipt_flg. */
 #define	IPOPT_TS_TSONLY		0		/* timestamps only */
diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c
index ddbc13e7c878..d3d7957cf087 100644
--- a/sys/netinet/ip_carp.c
+++ b/sys/netinet/ip_carp.c
@@ -37,6 +37,7 @@
 #include <sys/systm.h>
 #include <sys/devctl.h>
 #include <sys/jail.h>
+#include <sys/kassert.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
@@ -95,7 +96,8 @@ static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses");
 struct carp_softc {
 	struct ifnet		*sc_carpdev;	/* Pointer to parent ifnet. */
 	struct ifaddr		**sc_ifas;	/* Our ifaddrs. */
-	struct sockaddr_dl	sc_addr;	/* Our link level address. */
+	carp_version_t		sc_version;	/* carp or VRRPv3 */
+	uint8_t			sc_addr[ETHER_ADDR_LEN];	/* Our link level address. */
 	struct callout		sc_ad_tmo;	/* Advertising timeout. */
 #ifdef INET
 	struct callout		sc_md_tmo;	/* Master down timeout. */
@@ -106,11 +108,25 @@ struct carp_softc {
 	struct mtx		sc_mtx;
 
 	int			sc_vhid;
-	int			sc_advskew;
-	int			sc_advbase;
-	struct in_addr		sc_carpaddr;
-	struct in6_addr		sc_carpaddr6;
-
+	union {
+		struct { /* sc_version == CARP_VERSION_CARP */
+			int		sc_advskew;
+			int		sc_advbase;
+			struct in_addr	sc_carpaddr;
+			struct in6_addr	sc_carpaddr6;
+			uint64_t	sc_counter;
+			bool		sc_init_counter;
+#define	CARP_HMAC_PAD	64
+			unsigned char sc_key[CARP_KEY_LEN];
+			unsigned char sc_pad[CARP_HMAC_PAD];
+			SHA1_CTX sc_sha1;
+		};
+		struct { /* sc_version == CARP_VERSION_VRRPv3 */
+			uint8_t		sc_vrrp_prio;
+			uint16_t	sc_vrrp_adv_inter;
+			uint16_t	sc_vrrp_master_inter;
+		};
+	};
 	int			sc_naddrs;
 	int			sc_naddrs6;
 	int			sc_ifasiz;
@@ -121,15 +137,6 @@ struct carp_softc {
 	int			sc_sendad_success;
 #define	CARP_SENDAD_MIN_SUCCESS 3
 
-	int			sc_init_counter;
-	uint64_t		sc_counter;
-
-	/* authentication */
-#define	CARP_HMAC_PAD	64
-	unsigned char sc_key[CARP_KEY_LEN];
-	unsigned char sc_pad[CARP_HMAC_PAD];
-	SHA1_CTX sc_sha1;
-
 	TAILQ_ENTRY(carp_softc)	sc_list;	/* On the carp_if list. */
 	LIST_ENTRY(carp_softc)	sc_next;	/* On the global list. */
 };
@@ -166,6 +173,9 @@ struct carpkreq {
 	/* Everything above this is identical to carpreq */
 	struct in_addr	carpr_addr;
 	struct in6_addr	carpr_addr6;
+	carp_version_t	carpr_version;
+	uint8_t		carpr_vrrp_priority;
+	uint16_t	carpr_vrrp_adv_inter;
 };
 
 /*
@@ -196,8 +206,6 @@ struct carpkreq {
  *
  * Known issues with locking:
  *
- * - Sending ad, we put the pointer to the softc in an mtag, and no reference
- *   counting is done on the softc.
  * - On module unload we may race (?) with packet processing thread
  *   dereferencing our function pointers.
  */
@@ -325,8 +333,9 @@ SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats,
         0 : ((sc)->sc_advskew + V_carp_demotion)))
 
 static void	carp_input_c(struct mbuf *, struct carp_header *, sa_family_t, int);
+static void	vrrp_input_c(struct mbuf *, int, sa_family_t, int, int, uint16_t);
 static struct carp_softc
-		*carp_alloc(struct ifnet *);
+		*carp_alloc(struct ifnet *, carp_version_t, int);
 static void	carp_destroy(struct carp_softc *);
 static struct carp_if
 		*carp_alloc_if(struct ifnet *);
@@ -337,8 +346,8 @@ static void	carp_setrun(struct carp_softc *, sa_family_t);
 static void	carp_master_down(void *);
 static void	carp_master_down_locked(struct carp_softc *,
     		    const char* reason);
-static void	carp_send_ad(void *);
 static void	carp_send_ad_locked(struct carp_softc *);
+static void	vrrp_send_ad_locked(struct carp_softc *);
 static void	carp_addroute(struct carp_softc *);
 static void	carp_ifa_addroute(struct ifaddr *);
 static void	carp_delroute(struct carp_softc *);
@@ -346,7 +355,7 @@ static void	carp_ifa_delroute(struct ifaddr *);
 static void	carp_send_ad_all(void *, int);
 static void	carp_demote_adj(int, char *);
 
-static LIST_HEAD(, carp_softc) carp_list;
+static LIST_HEAD(, carp_softc) carp_list = LIST_HEAD_INITIALIZER(carp_list);
 static struct mtx carp_mtx;
 static struct sx carp_sx;
 static struct task carp_sendall_task =
@@ -373,7 +382,7 @@ carp_is_supported_if(if_t ifp)
 static void
 carp_hmac_prepare(struct carp_softc *sc)
 {
-	uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
+	uint8_t version = CARP_VERSION_CARP, type = CARP_ADVERTISEMENT;
 	uint8_t vhid = sc->sc_vhid & 0xff;
 	struct ifaddr *ifa;
 	int i, found;
@@ -385,6 +394,7 @@ carp_hmac_prepare(struct carp_softc *sc)
 #endif
 
 	CARP_LOCK_ASSERT(sc);
+	MPASS(sc->sc_version == CARP_VERSION_CARP);
 
 	/* Compute ipad from key. */
 	bzero(sc->sc_pad, sizeof(sc->sc_pad));
@@ -478,6 +488,22 @@ carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
 	return (bcmp(md, md2, sizeof(md2)));
 }
 
+static int
+vrrp_checksum_verify(struct mbuf *m, int off, int len, uint16_t phdrcksum)
+{
+	uint16_t cksum;
+
+	/*
+	 * Note that VRRPv3 checksums are different from CARP checksums.
+	 * Carp just calculates the checksum over the packet.
+	 * VRRPv3 includes the pseudo-header checksum as well.
+	 */
+	cksum = in_cksum_skip(m, off + len, off);
+	cksum -= phdrcksum;
+
+	return (cksum);
+}
+
 /*
  * process input packet.
  * we have rearranged checks order compared to the rfc,
@@ -488,9 +514,11 @@ static int
 carp_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
-	struct ip *ip = mtod(m, struct ip *);
-	struct carp_header *ch;
-	int iplen, len;
+	struct ip *ip;
+	struct vrrpv3_header *vh;
+	int iplen;
+	int minlen;
+	int totlen;
 
 	iplen = *offp;
 	*mp = NULL;
@@ -502,60 +530,92 @@ carp_input(struct mbuf **mp, int *offp, int proto)
 		return (IPPROTO_DONE);
 	}
 
-	iplen = ip->ip_hl << 2;
-
-	if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
+	/* Ensure we have enough header to figure out the version. */
+	if (m->m_pkthdr.len < iplen + sizeof(*vh)) {
 		CARPSTATS_INC(carps_badlen);
-		CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) "
+		CARP_DEBUG("%s: received len %zd < sizeof(struct vrrpv3_header) "
 		    "on %s\n", __func__, m->m_len - sizeof(struct ip),
 		    if_name(m->m_pkthdr.rcvif));
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
-	if (iplen + sizeof(*ch) < m->m_len) {
-		if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
+	if (m->m_len < iplen + sizeof(*vh)) {
+		if ((m = m_pullup(m, iplen + sizeof(*vh))) == NULL) {
 			CARPSTATS_INC(carps_hdrops);
-			CARP_DEBUG("%s: pullup failed\n", __func__);
+			CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__);
 			return (IPPROTO_DONE);
 		}
-		ip = mtod(m, struct ip *);
 	}
-	ch = (struct carp_header *)((char *)ip + iplen);
+	ip = mtod(m, struct ip *);
+	totlen = ntohs(ip->ip_len);
+	vh = (struct vrrpv3_header *)((char *)ip + iplen);
 
-	/*
-	 * verify that the received packet length is
-	 * equal to the CARP header
-	 */
-	len = iplen + sizeof(*ch);
-	if (len > m->m_pkthdr.len) {
+	switch (vh->vrrp_version) {
+	case CARP_VERSION_CARP:
+		minlen = sizeof(struct carp_header);
+		break;
+	case CARP_VERSION_VRRPv3:
+		minlen = sizeof(struct vrrpv3_header);
+		break;
+	default:
+		CARPSTATS_INC(carps_badver);
+		CARP_DEBUG("%s: unsupported version %d on %s\n", __func__,
+		    vh->vrrp_version, if_name(m->m_pkthdr.rcvif));
+		m_freem(m);
+		return (IPPROTO_DONE);
+	}
+
+	/* And now check the length again but with the real minimal length. */
+	if (m->m_pkthdr.len < iplen + minlen) {
 		CARPSTATS_INC(carps_badlen);
-		CARP_DEBUG("%s: packet too short %d on %s\n", __func__,
-		    m->m_pkthdr.len,
+		CARP_DEBUG("%s: received len %zd < %d "
+		    "on %s\n", __func__, m->m_len - sizeof(struct ip),
+		    iplen + minlen,
 		    if_name(m->m_pkthdr.rcvif));
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
-	if ((m = m_pullup(m, len)) == NULL) {
-		CARPSTATS_INC(carps_hdrops);
-		return (IPPROTO_DONE);
+	if (m->m_len < iplen + minlen) {
+		if ((m = m_pullup(m, iplen + minlen)) == NULL) {
+			CARPSTATS_INC(carps_hdrops);
+			CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__);
+			return (IPPROTO_DONE);
+		}
+		ip = mtod(m, struct ip *);
+		vh = (struct vrrpv3_header *)((char *)ip + iplen);
 	}
-	ip = mtod(m, struct ip *);
-	ch = (struct carp_header *)((char *)ip + iplen);
 
-	/* verify the CARP checksum */
-	m->m_data += iplen;
-	if (in_cksum(m, len - iplen)) {
-		CARPSTATS_INC(carps_badsum);
-		CARP_DEBUG("%s: checksum failed on %s\n", __func__,
-		    if_name(m->m_pkthdr.rcvif));
-		m_freem(m);
-		return (IPPROTO_DONE);
+	switch (vh->vrrp_version) {
+	case CARP_VERSION_CARP: {
+		struct carp_header *ch;
+
+		/* verify the CARP checksum */
+		if (in_cksum_skip(m, totlen, iplen)) {
+			CARPSTATS_INC(carps_badsum);
+			CARP_DEBUG("%s: checksum failed on %s\n", __func__,
+			    if_name(m->m_pkthdr.rcvif));
+			m_freem(m);
+			break;
+		}
+		ch = (struct carp_header *)((char *)ip + iplen);
+		carp_input_c(m, ch, AF_INET, ip->ip_ttl);
+		break;
+	}
+	case CARP_VERSION_VRRPv3: {
+		uint16_t phdrcksum;
+
+		phdrcksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+		    htonl((u_short)(totlen - iplen) + ip->ip_p));
+		vrrp_input_c(m, iplen, AF_INET, ip->ip_ttl, totlen - iplen,
+		    phdrcksum);
+		break;
+	}
+	default:
+		KASSERT(false, ("Unsupported version %d", vh->vrrp_version));
 	}
-	m->m_data -= iplen;
 
-	carp_input_c(m, ch, AF_INET, ip->ip_ttl);
 	return (IPPROTO_DONE);
 }
 #endif
@@ -566,8 +626,8 @@ carp6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
-	struct carp_header *ch;
-	u_int len;
+	struct vrrpv3_header *vh;
+	u_int len, minlen;
 
 	CARPSTATS_INC(carps_ipackets6);
 
@@ -585,10 +645,9 @@ carp6_input(struct mbuf **mp, int *offp, int proto)
 		return (IPPROTO_DONE);
 	}
 
-	/* verify that we have a complete carp packet */
-	if (m->m_len < *offp + sizeof(*ch)) {
+	if (m->m_len < *offp + sizeof(*vh)) {
 		len = m->m_len;
-		m = m_pullup(m, *offp + sizeof(*ch));
+		m = m_pullup(m, *offp + sizeof(*vh));
 		if (m == NULL) {
 			CARPSTATS_INC(carps_badlen);
 			CARP_DEBUG("%s: packet size %u too small\n", __func__, len);
@@ -596,20 +655,73 @@ carp6_input(struct mbuf **mp, int *offp, int proto)
 		}
 		ip6 = mtod(m, struct ip6_hdr *);
 	}
-	ch = (struct carp_header *)(mtod(m, char *) + *offp);
+	vh = (struct vrrpv3_header *)(mtod(m, char *) + *offp);
 
-	/* verify the CARP checksum */
-	m->m_data += *offp;
-	if (in_cksum(m, sizeof(*ch))) {
-		CARPSTATS_INC(carps_badsum);
-		CARP_DEBUG("%s: checksum failed, on %s\n", __func__,
+	switch (vh->vrrp_version) {
+	case CARP_VERSION_CARP:
+		minlen = sizeof(struct carp_header);
+		break;
+	case CARP_VERSION_VRRPv3:
+		minlen = sizeof(struct vrrpv3_header);
+		break;
+	default:
+		CARPSTATS_INC(carps_badver);
+		CARP_DEBUG("%s: unsupported version %d on %s\n", __func__,
+		    vh->vrrp_version, if_name(m->m_pkthdr.rcvif));
+		m_freem(m);
+		return (IPPROTO_DONE);
+	}
+
+	/* And now check the length again but with the real minimal length. */
+	if (m->m_pkthdr.len < sizeof(*ip6) + minlen) {
+		CARPSTATS_INC(carps_badlen);
+		CARP_DEBUG("%s: received len %zd < %zd "
+		    "on %s\n", __func__, m->m_len - sizeof(struct ip),
+		    sizeof(*ip6) + minlen,
 		    if_name(m->m_pkthdr.rcvif));
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
-	m->m_data -= *offp;
 
-	carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim);
+	if (m->m_len < sizeof(*ip6) + minlen) {
+		if ((m = m_pullup(m, sizeof(*ip6) + minlen)) == NULL) {
+			CARPSTATS_INC(carps_hdrops);
+			CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__);
+			return (IPPROTO_DONE);
+		}
+		ip6 = mtod(m, struct ip6_hdr *);
+		vh = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6));
+	}
+
+	switch (vh->vrrp_version) {
+	case CARP_VERSION_CARP: {
+		struct carp_header *ch;
+
+		/* verify the CARP checksum */
+		if (in_cksum_skip(m, *offp + sizeof(struct carp_header),
+		    *offp)) {
+			CARPSTATS_INC(carps_badsum);
+			CARP_DEBUG("%s: checksum failed, on %s\n", __func__,
+			    if_name(m->m_pkthdr.rcvif));
+			m_freem(m);
+			break;
+		}
+		ch = (struct carp_header *)((char *)ip6 + sizeof(*ip6));
+		carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim);
+		break;
+	}
+	case CARP_VERSION_VRRPv3: {
+		uint16_t phdrcksum;
+
+		phdrcksum = in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen),
+		    ip6->ip6_nxt, 0);
+		vrrp_input_c(m, sizeof(*ip6), AF_INET6, ip6->ip6_hlim,
+		    ntohs(ip6->ip6_plen), phdrcksum);
+		break;
+	}
+	default:
+		KASSERT(false, ("Unsupported version %d", vh->vrrp_version));
+	}
 	return (IPPROTO_DONE);
 }
 #endif /* INET6 */
@@ -629,7 +741,7 @@ carp6_input(struct mbuf **mp, int *offp, int proto)
  * The VHID test is outside this mini-function.
  */
 static int
-carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af)
+carp_source_is_self(const struct mbuf *m, struct ifaddr *ifa, sa_family_t af)
 {
 #ifdef INET
 	struct ip *ip4;
@@ -659,16 +771,12 @@ carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af)
 	return (0);
 }
 
-static void
-carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl)
+static struct ifaddr *
+carp_find_ifa(const struct mbuf *m, sa_family_t af, uint8_t vhid)
 {
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct ifaddr *ifa, *match;
-	struct carp_softc *sc;
-	uint64_t tmp_counter;
-	struct timeval sc_tv, ch_tv;
 	int error;
-	bool multicast = false;
 
 	NET_EPOCH_ASSERT();
 
@@ -688,9 +796,9 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl)
 	IFNET_FOREACH_IFA(ifp, ifa) {
 		if (match == NULL && ifa->ifa_carp != NULL &&
 		    ifa->ifa_addr->sa_family == af &&
-		    ifa->ifa_carp->sc_vhid == ch->carp_vhid)
+		    ifa->ifa_carp->sc_vhid == vhid)
 			match = ifa;
-		if (ch->carp_vhid == 0 && carp_source_is_self(m, ifa, af))
+		if (vhid == 0 && carp_source_is_self(m, ifa, af))
 			error = ELOOP;
 	}
 	ifa = error ? NULL : match;
@@ -705,12 +813,37 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl)
 		} else {
 			CARPSTATS_INC(carps_badvhid);
 		}
+	}
+
+	return (ifa);
+}
+
+static void
+carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl)
+{
+	struct ifnet *ifp = m->m_pkthdr.rcvif;
+	struct ifaddr *ifa;
+	struct carp_softc *sc;
+	uint64_t tmp_counter;
+	struct timeval sc_tv, ch_tv;
+	bool multicast = false;
+
+	NET_EPOCH_ASSERT();
+	MPASS(ch->carp_version == CARP_VERSION_CARP);
+
+	ifa = carp_find_ifa(m, af, ch->carp_vhid);
+	if (ifa == NULL) {
 		m_freem(m);
 		return;
 	}
 
+	sc = ifa->ifa_carp;
+	CARP_LOCK(sc);
+
 	/* verify the CARP version. */
-	if (ch->carp_version != CARP_VERSION) {
+	if (sc->sc_version != CARP_VERSION_CARP) {
+		CARP_UNLOCK(sc);
+
 		CARPSTATS_INC(carps_badver);
 		CARP_DEBUG("%s: invalid version %d\n", if_name(ifp),
 		    ch->carp_version);
@@ -719,10 +852,8 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl)
 		return;
 	}
 
-	sc = ifa->ifa_carp;
-	CARP_LOCK(sc);
 	if (ifa->ifa_addr->sa_family == AF_INET) {
-		multicast = IN_MULTICAST(sc->sc_carpaddr.s_addr);
+		multicast = IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr));
 	} else {
 		multicast = IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6);
 	}
@@ -749,7 +880,7 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl)
 
 	/* XXX Replay protection goes here */
 
-	sc->sc_init_counter = 0;
+	sc->sc_init_counter = false;
 	sc->sc_counter = tmp_counter;
 
 	sc_tv.tv_sec = sc->sc_advbase;
@@ -809,11 +940,133 @@ out:
 	m_freem(m);
 }
 
+static void
+vrrp_input_c(struct mbuf *m, int off, sa_family_t af, int ttl,
+    int len, uint16_t phdrcksum)
+{
+	struct vrrpv3_header *vh = mtodo(m, off);
+	struct ifnet *ifp = m->m_pkthdr.rcvif;
+	struct ifaddr *ifa;
+	struct carp_softc *sc;
+
+	NET_EPOCH_ASSERT();
+	MPASS(vh->vrrp_version == CARP_VERSION_VRRPv3);
+
+	ifa = carp_find_ifa(m, af, vh->vrrp_vrtid);
+	if (ifa == NULL) {
+		m_freem(m);
+		return;
+	}
+
+	sc = ifa->ifa_carp;
+	CARP_LOCK(sc);
+
+	ifa_free(ifa);
+
+	/* verify the CARP version. */
+	if (sc->sc_version != CARP_VERSION_VRRPv3) {
+		CARP_UNLOCK(sc);
+
+		CARPSTATS_INC(carps_badver);
+		CARP_DEBUG("%s: invalid version %d\n", if_name(ifp),
+		    vh->vrrp_version);
+		m_freem(m);
+		return;
+	}
+
+	/* verify that the IP TTL is 255. */
+	if (ttl != CARP_DFLTTL) {
+		CARPSTATS_INC(carps_badttl);
+		CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
+		    ttl, if_name(m->m_pkthdr.rcvif));
+		goto out;
+	}
+
+	if (vrrp_checksum_verify(m, off, len, phdrcksum)) {
+		CARPSTATS_INC(carps_badsum);
+		CARP_DEBUG("%s: incorrect checksum for VRID %u@%s\n", __func__,
+		    sc->sc_vhid, if_name(ifp));
+		goto out;
+	}
+
+	/* RFC9568, 7.1 Receiving VRRP packets. */
+	if (sc->sc_vrrp_prio == 255) {
+		CARP_DEBUG("%s: our priority is 255. Ignore peer announcement.\n",
+		    __func__);
+		goto out;
+	}
+
+	/* XXX TODO Check IP address payload. */
+
+	sc->sc_vrrp_master_inter = ntohs(vh->vrrp_max_adver_int);
+
+	switch (sc->sc_state) {
+	case INIT:
+		break;
+	case MASTER:
+		/*
+		 * If we receive an advertisement from a master who's going to
+		 * be more frequent than us, go into BACKUP state.
+		 * Same if the peer has a higher priority than us.
+		 */
+		if (ntohs(vh->vrrp_max_adver_int) < sc->sc_vrrp_adv_inter ||
+		    vh->vrrp_priority > sc->sc_vrrp_prio) {
+			callout_stop(&sc->sc_ad_tmo);
+			carp_set_state(sc, BACKUP,
+			    "more frequent advertisement received");
+			carp_setrun(sc, 0);
+			carp_delroute(sc);
+		}
+		break;
+	case BACKUP:
+		/*
+		 * If we're pre-empting masters who advertise slower than us,
+		 * and this one claims to be slower, treat him as down.
+		 */
+		if (V_carp_preempt && (ntohs(vh->vrrp_max_adver_int) > sc->sc_vrrp_adv_inter
+		    || vh->vrrp_priority < sc->sc_vrrp_prio)) {
+			carp_master_down_locked(sc,
+			    "preempting a slower master");
+			break;
+		}
+
+		/*
+		 * Otherwise, we reset the counter and wait for the next
+		 * advertisement.
+		 */
+		carp_setrun(sc, af);
+		break;
+	}
+
+out:
+	CARP_UNLOCK(sc);
+	m_freem(m);
+}
+
 static int
-carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
+carp_tag(struct carp_softc *sc, struct mbuf *m)
 {
 	struct m_tag *mtag;
 
+	/* Tag packet for carp_output */
+	if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(sc->sc_vhid),
+	    M_NOWAIT)) == NULL) {
+		m_freem(m);
+		CARPSTATS_INC(carps_onomem);
+		return (ENOMEM);
+	}
+	bcopy(&sc->sc_vhid, mtag + 1, sizeof(sc->sc_vhid));
+	m_tag_prepend(m, mtag);
+
+	return (0);
+}
+
+static void
+carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
+{
+
+	MPASS(sc->sc_version == CARP_VERSION_CARP);
+
 	if (sc->sc_init_counter) {
 		/* this could also be seconds since unix epoch */
 		sc->sc_counter = arc4random();
@@ -826,18 +1079,19 @@ carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
 	ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
 
 	carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
+}
 
-	/* Tag packet for carp_output */
-	if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *),
-	    M_NOWAIT)) == NULL) {
-		m_freem(m);
-		CARPSTATS_INC(carps_onomem);
-		return (ENOMEM);
+static inline void
+send_ad_locked(struct carp_softc *sc)
+{
+	switch (sc->sc_version) {
+	case CARP_VERSION_CARP:
+		carp_send_ad_locked(sc);
+		break;
+	case CARP_VERSION_VRRPv3:
+		vrrp_send_ad_locked(sc);
+		break;
 	}
-	bcopy(&sc, mtag + 1, sizeof(sc));
-	m_tag_prepend(m, mtag);
-
-	return (0);
 }
 
 /*
@@ -856,7 +1110,7 @@ carp_send_ad_all(void *ctx __unused, int pending __unused)
 		if (sc->sc_state == MASTER) {
 			CARP_LOCK(sc);
 			CURVNET_SET(sc->sc_carpdev->if_vnet);
-			carp_send_ad_locked(sc);
+			send_ad_locked(sc);
 			CURVNET_RESTORE();
 			CARP_UNLOCK(sc);
 		}
@@ -866,7 +1120,7 @@ carp_send_ad_all(void *ctx __unused, int pending __unused)
 
 /* Send a periodic advertisement, executed in callout context. */
 static void
-carp_send_ad(void *v)
+carp_callout(void *v)
 {
 	struct carp_softc *sc = v;
 	struct epoch_tracker et;
@@ -874,7 +1128,7 @@ carp_send_ad(void *v)
 	NET_EPOCH_ENTER(et);
 	CARP_LOCK_ASSERT(sc);
 	CURVNET_SET(sc->sc_carpdev->if_vnet);
-	carp_send_ad_locked(sc);
+	send_ad_locked(sc);
 	CURVNET_RESTORE();
 	CARP_UNLOCK(sc);
 	NET_EPOCH_EXIT(et);
@@ -958,12 +1212,13 @@ carp_send_ad_locked(struct carp_softc *sc)
 
 	NET_EPOCH_ASSERT();
 	CARP_LOCK_ASSERT(sc);
+	MPASS(sc->sc_version == CARP_VERSION_CARP);
 
 	advskew = DEMOTE_ADVSKEW(sc);
 	tv.tv_sec = sc->sc_advbase;
 	tv.tv_usec = advskew * 1000000 / 256;
 
-	ch.carp_version = CARP_VERSION;
+	ch.carp_version = CARP_VERSION_CARP;
 	ch.carp_type = CARP_ADVERTISEMENT;
 	ch.carp_vhid = sc->sc_vhid;
 	ch.carp_advbase = sc->sc_advbase;
@@ -988,7 +1243,7 @@ carp_send_ad_locked(struct carp_softc *sc)
 		m->m_pkthdr.rcvif = NULL;
 		m->m_len = len;
 		M_ALIGN(m, m->m_len);
-		if (IN_MULTICAST(sc->sc_carpaddr.s_addr))
+		if (IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr)))
 			m->m_flags |= M_MCAST;
 		ip = mtod(m, struct ip *);
 		ip->ip_v = IPVERSION;
@@ -999,7 +1254,7 @@ carp_send_ad_locked(struct carp_softc *sc)
 		ip->ip_ttl = CARP_DFLTTL;
 		ip->ip_p = IPPROTO_CARP;
 		ip->ip_sum = 0;
-		ip_fillid(ip);
+		ip_fillid(ip, V_ip_random_id);
 
 		ifa = carp_best_ifa(AF_INET, sc->sc_carpdev);
 		if (ifa != NULL) {
@@ -1012,7 +1267,9 @@ carp_send_ad_locked(struct carp_softc *sc)
 
 		ch_ptr = (struct carp_header *)(&ip[1]);
 		bcopy(&ch, ch_ptr, sizeof(ch));
-		if (carp_prepare_ad(m, sc, ch_ptr))
+		carp_prepare_ad(m, sc, ch_ptr);
+		if (IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr)) &&
+		    carp_tag(sc, m) != 0)
 			goto resched;
 
 		m->m_data += sizeof(*ip);
@@ -1072,7 +1329,9 @@ carp_send_ad_locked(struct carp_softc *sc)
 
 		ch_ptr = (struct carp_header *)(&ip6[1]);
 		bcopy(&ch, ch_ptr, sizeof(ch));
-		if (carp_prepare_ad(m, sc, ch_ptr))
+		carp_prepare_ad(m, sc, ch_ptr);
+		if (IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6) &&
+		    carp_tag(sc, m) != 0)
 			goto resched;
 
 		m->m_data += sizeof(*ip6);
@@ -1087,7 +1346,188 @@ carp_send_ad_locked(struct carp_softc *sc)
 #endif /* INET6 */
 
 resched:
-	callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc);
+	callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_callout, sc);
+}
+
+static void
+vrrp_send_ad_locked(struct carp_softc *sc)
+{
+	struct vrrpv3_header *vh_ptr;
+	struct ifaddr *ifa;
+	struct mbuf *m;
+	int len;
+	struct vrrpv3_header vh = {
+	    .vrrp_version = CARP_VERSION_VRRPv3,
+	    .vrrp_type = VRRP_TYPE_ADVERTISEMENT,
+	    .vrrp_vrtid = sc->sc_vhid,
+	    .vrrp_priority = sc->sc_vrrp_prio,
+	    .vrrp_count_addr = 0,
+	    .vrrp_max_adver_int = htons(sc->sc_vrrp_adv_inter),
+	    .vrrp_checksum = 0,
+	};
+
+	NET_EPOCH_ASSERT();
+	CARP_LOCK_ASSERT(sc);
+	MPASS(sc->sc_version == CARP_VERSION_VRRPv3);
+
+#ifdef INET
+	if (sc->sc_naddrs) {
+		struct ip *ip;
+
+		m = m_gethdr(M_NOWAIT, MT_DATA);
+		if (m == NULL) {
+			CARPSTATS_INC(carps_onomem);
+			goto resched;
+		}
+		len = sizeof(*ip) + sizeof(vh);
+		m->m_pkthdr.len = len;
+		m->m_pkthdr.rcvif = NULL;
+		m->m_len = len;
+		M_ALIGN(m, m->m_len);
+		m->m_flags |= M_MCAST;
+		ip = mtod(m, struct ip *);
+		ip->ip_v = IPVERSION;
+		ip->ip_hl = sizeof(*ip) >> 2;
+		ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET;
+		ip->ip_off = htons(IP_DF);
+		ip->ip_ttl = CARP_DFLTTL;
+		ip->ip_p = IPPROTO_CARP;
+		ip->ip_sum = 0;
+		ip_fillid(ip, V_ip_random_id);
+
+		ifa = carp_best_ifa(AF_INET, sc->sc_carpdev);
+		if (ifa != NULL) {
+			ip->ip_src.s_addr =
+			    ifatoia(ifa)->ia_addr.sin_addr.s_addr;
+			ifa_free(ifa);
+		} else
+			ip->ip_src.s_addr = 0;
+		ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
+
+		/* Include the IP addresses in the announcement. */
+		for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) {
+			struct sockaddr_in *in;
+
+			MPASS(sc->sc_ifas[i] != NULL);
+			if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET)
+				continue;
+
+			in = (struct sockaddr_in *)sc->sc_ifas[i]->ifa_addr;
+
+			if (m_append(m, sizeof(in->sin_addr),
+			    (caddr_t)&in->sin_addr) != 1) {
+				m_freem(m);
+				goto resched;
+			}
+
+			vh.vrrp_count_addr++;
+			len += sizeof(in->sin_addr);
+		}
+		ip->ip_len = htons(len);
+
+		vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip));
+		bcopy(&vh, vh_ptr, sizeof(vh));
+
+		vh_ptr->vrrp_checksum = in_pseudo(ip->ip_src.s_addr,
+		    ip->ip_dst.s_addr,
+		    htonl((uint16_t)(len - sizeof(*ip)) + ip->ip_p));
+		vh_ptr->vrrp_checksum = in_cksum_skip(m, len, sizeof(*ip));
+
+		if (carp_tag(sc, m))
+			goto resched;
+
+		CARPSTATS_INC(carps_opackets);
+
+		carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT,
+		    &sc->sc_carpdev->if_carp->cif_imo, NULL));
+	}
+#endif
+#ifdef INET6
+	if (sc->sc_naddrs6) {
+		struct ip6_hdr *ip6;
+
+		m = m_gethdr(M_NOWAIT, MT_DATA);
+		if (m == NULL) {
+			CARPSTATS_INC(carps_onomem);
+			goto resched;
+		}
+		len = sizeof(*ip6) + sizeof(vh);
+		m->m_pkthdr.len = len;
+		m->m_pkthdr.rcvif = NULL;
+		m->m_len = len;
+		M_ALIGN(m, m->m_len);
+		m->m_flags |= M_MCAST;
+		ip6 = mtod(m, struct ip6_hdr *);
+		bzero(ip6, sizeof(*ip6));
+		ip6->ip6_vfc |= IPV6_VERSION;
+		/* Traffic class isn't defined in ip6 struct instead
+		 * it gets offset into flowid field */
+		ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN +
+		    IPTOS_DSCP_OFFSET));
+		ip6->ip6_hlim = CARP_DFLTTL;
+		ip6->ip6_nxt = IPPROTO_CARP;
+
+		/* set the source address */
+		ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev);
+		if (ifa != NULL) {
+			bcopy(IFA_IN6(ifa), &ip6->ip6_src,
+			    sizeof(struct in6_addr));
+			ifa_free(ifa);
+		} else
+			/* This should never happen with IPv6. */
+			bzero(&ip6->ip6_src, sizeof(struct in6_addr));
+
+		/* Set the multicast destination. */
+		bzero(&ip6->ip6_dst, sizeof(ip6->ip6_dst));
+		ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
+		ip6->ip6_dst.s6_addr8[15] = 0x12;
+
+		/* Include the IP addresses in the announcement. */
+		len = sizeof(vh);
+		for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) {
+			struct sockaddr_in6 *in6;
+
+			MPASS(sc->sc_ifas[i] != NULL);
+			if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET6)
+				continue;
+
+			in6 = (struct sockaddr_in6 *)sc->sc_ifas[i]->ifa_addr;
+
+			if (m_append(m, sizeof(in6->sin6_addr),
+			    (char *)&in6->sin6_addr) != 1) {
+				m_freem(m);
+				goto resched;
+			}
+
+			vh.vrrp_count_addr++;
+			len += sizeof(in6->sin6_addr);
+		}
+		ip6->ip6_plen = htonl(len);
+
+		vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6));
+		bcopy(&vh, vh_ptr, sizeof(vh));
+
+		vh_ptr->vrrp_checksum = in6_cksum_pseudo(ip6, len, ip6->ip6_nxt, 0);
+		vh_ptr->vrrp_checksum = in_cksum_skip(m, len + sizeof(*ip6), sizeof(*ip6));
+
+		if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
+			m_freem(m);
+			CARP_DEBUG("%s: in6_setscope failed\n", __func__);
+			goto resched;
+		}
+
+		if (carp_tag(sc, m))
+			goto resched;
+		CARPSTATS_INC(carps_opackets6);
+
+		carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0,
+		    &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL));
+	}
+#endif
+
+resched:
+	callout_reset(&sc->sc_ad_tmo, sc->sc_vrrp_adv_inter * hz / 100,
+	    carp_callout, sc);
 }
 
 static void
@@ -1178,7 +1618,7 @@ carp_send_arp(struct carp_softc *sc)
 		if (ifa->ifa_addr->sa_family != AF_INET)
 			continue;
 		addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
-		arp_announce_ifaddr(sc->sc_carpdev, addr, LLADDR(&sc->sc_addr));
+		arp_announce_ifaddr(sc->sc_carpdev, addr, sc->sc_addr);
 	}
 }
 
@@ -1188,7 +1628,7 @@ carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr)
 	struct carp_softc *sc = ifa->ifa_carp;
 
 	if (sc->sc_state == MASTER) {
-		*enaddr = LLADDR(&sc->sc_addr);
+		*enaddr = sc->sc_addr;
 		return (1);
 	}
 
@@ -1246,6 +1686,7 @@ char *
 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
 {
 	struct ifaddr *ifa;
+	char *mac = NULL;
 
 	NET_EPOCH_ASSERT();
 
@@ -1256,18 +1697,26 @@ carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
 			struct m_tag *mtag;
 
 			mtag = m_tag_get(PACKET_TAG_CARP,
-			    sizeof(struct carp_softc *), M_NOWAIT);
-			if (mtag == NULL)
-				/* Better a bit than nothing. */
-				return (LLADDR(&sc->sc_addr));
+			    sizeof(sc->sc_vhid) + sizeof(sc->sc_addr),
+			    M_NOWAIT);
+			if (mtag == NULL) {
+				CARPSTATS_INC(carps_onomem);
+				break;
+			}
+			/* carp_output expects sc_vhid first. */
+			bcopy(&sc->sc_vhid, mtag + 1, sizeof(sc->sc_vhid));
+			/*
+			 * Save sc_addr into mtag data after sc_vhid to avoid
+			 * possible access to destroyed softc.
+			 */
+			mac = (char *)(mtag + 1) + sizeof(sc->sc_vhid);
+			bcopy(sc->sc_addr, mac, sizeof(sc->sc_addr));
 
-			bcopy(&sc, mtag + 1, sizeof(sc));
 			m_tag_prepend(m, mtag);
-
-			return (LLADDR(&sc->sc_addr));
+			break;
 		}
 
-	return (NULL);
+	return (mac);
 }
 #endif /* INET6 */
 
@@ -1286,7 +1735,7 @@ carp_forus(struct ifnet *ifp, u_char *dhost)
 		 * CARP_LOCK() is not here, since would protect nothing, but
 		 * cause deadlock with if_bridge, calling this under its lock.
 		 */
-		if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr),
+		if (sc->sc_state == MASTER && !bcmp(dhost, sc->sc_addr,
 		    ETHER_ADDR_LEN)) {
 			CIF_UNLOCK(ifp->if_carp);
 			return (1);
@@ -1327,7 +1776,7 @@ carp_master_down_locked(struct carp_softc *sc, const char *reason)
 	switch (sc->sc_state) {
 	case BACKUP:
 		carp_set_state(sc, MASTER, reason);
-		carp_send_ad_locked(sc);
+		send_ad_locked(sc);
 #ifdef INET
 		carp_send_arp(sc);
 #endif
@@ -1357,6 +1806,7 @@ static void
 carp_setrun(struct carp_softc *sc, sa_family_t af)
 {
 	struct timeval tv;
+	int timeout;
 
 	CARP_LOCK_ASSERT(sc);
 
@@ -1373,40 +1823,63 @@ carp_setrun(struct carp_softc *sc, sa_family_t af)
 		break;
 	case BACKUP:
 		callout_stop(&sc->sc_ad_tmo);
-		tv.tv_sec = 3 * sc->sc_advbase;
-		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+
+		switch (sc->sc_version) {
+		case CARP_VERSION_CARP:
+			tv.tv_sec = 3 * sc->sc_advbase;
+			tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+			timeout = tvtohz(&tv);
+			break;
+		case CARP_VERSION_VRRPv3:
+			/* skew time */
+			timeout = (256 - sc->sc_vrrp_prio) *
+			    sc->sc_vrrp_master_inter / 256;
+			timeout += (3 * sc->sc_vrrp_master_inter);
+			timeout *= hz;
+			timeout /= 100; /* master interval is in centiseconds */
+			break;
+		}
 		switch (af) {
 #ifdef INET
 		case AF_INET:
-			callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
+			callout_reset(&sc->sc_md_tmo, timeout,
 			    carp_master_down, sc);
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
-			callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
+			callout_reset(&sc->sc_md6_tmo, timeout,
 			    carp_master_down, sc);
 			break;
 #endif
 		default:
 #ifdef INET
 			if (sc->sc_naddrs)
-				callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
+				callout_reset(&sc->sc_md_tmo, timeout,
 				    carp_master_down, sc);
 #endif
 #ifdef INET6
 			if (sc->sc_naddrs6)
-				callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
+				callout_reset(&sc->sc_md6_tmo, timeout,
 				    carp_master_down, sc);
 #endif
 			break;
 		}
 		break;
 	case MASTER:
-		tv.tv_sec = sc->sc_advbase;
-		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
-		callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
-		    carp_send_ad, sc);
+		switch (sc->sc_version) {
+		case CARP_VERSION_CARP:
+			tv.tv_sec = sc->sc_advbase;
+			tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+			callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
+			    carp_callout, sc);
+			break;
+		case CARP_VERSION_VRRPv3:
+			callout_reset(&sc->sc_ad_tmo,
+			    sc->sc_vrrp_adv_inter * hz / 100,
+			    carp_callout, sc);
+			break;
+		}
 		break;
 	}
 }
@@ -1559,7 +2032,7 @@ int
 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa)
 {
 	struct m_tag *mtag;
-	struct carp_softc *sc;
+	int vhid;
 
 	if (!sa)
 		return (0);
@@ -1581,20 +2054,7 @@ carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa)
 	if (mtag == NULL)
 		return (0);
 
-	bcopy(mtag + 1, &sc, sizeof(sc));
-
-	switch (sa->sa_family) {
-	case AF_INET:
-		if (! IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr)))
-			return (0);
-		break;
-	case AF_INET6:
-		if (! IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6))
-			return (0);
-		break;
-	default:
-		panic("Unknown af");
-	}
+	bcopy(mtag + 1, &vhid, sizeof(vhid));
 
 	/* Set the source MAC address to the Virtual Router MAC Address. */
 	switch (ifp->if_type) {
@@ -1609,7 +2069,7 @@ carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa)
 			eh->ether_shost[2] = 0x5e;
 			eh->ether_shost[3] = 0;
 			eh->ether_shost[4] = 1;
-			eh->ether_shost[5] = sc->sc_vhid;
+			eh->ether_shost[5] = vhid;
 		}
 		break;
 	default:
@@ -1622,7 +2082,7 @@ carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa)
 }
 
 static struct carp_softc*
-carp_alloc(struct ifnet *ifp)
+carp_alloc(struct ifnet *ifp, carp_version_t version, int vhid)
 {
 	struct carp_softc *sc;
 	struct carp_if *cif;
@@ -1632,20 +2092,31 @@ carp_alloc(struct ifnet *ifp)
 	if ((cif = ifp->if_carp) == NULL)
 		cif = carp_alloc_if(ifp);
 
-	sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
-
-	sc->sc_advbase = CARP_DFLTINTV;
-	sc->sc_vhid = -1;	/* required setting */
-	sc->sc_init_counter = 1;
-	sc->sc_state = INIT;
-
-	sc->sc_ifasiz = sizeof(struct ifaddr *);
+	sc = malloc(sizeof(*sc), M_CARP, M_WAITOK);
+	*sc = (struct carp_softc ){
+		.sc_vhid = vhid,
+		.sc_version = version,
+		.sc_state = INIT,
+		.sc_carpdev = ifp,
+		.sc_ifasiz = sizeof(struct ifaddr *),
+		.sc_addr = { 0, 0, 0x5e, 0, 1, vhid },
+	};
 	sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO);
-	sc->sc_carpdev = ifp;
 
-	sc->sc_carpaddr.s_addr = htonl(INADDR_CARP_GROUP);
-	sc->sc_carpaddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
-	sc->sc_carpaddr6.s6_addr8[15] = 0x12;
+	switch (version) {
+	case CARP_VERSION_CARP:
+		sc->sc_advbase = CARP_DFLTINTV;
+		sc->sc_init_counter = true;
+		sc->sc_carpaddr.s_addr = htonl(INADDR_CARP_GROUP);
+		sc->sc_carpaddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
+		sc->sc_carpaddr6.s6_addr8[15] = 0x12;
+		break;
+	case CARP_VERSION_VRRPv3:
+		sc->sc_vrrp_adv_inter = 100;
+		sc->sc_vrrp_master_inter = sc->sc_vrrp_adv_inter;
+		sc->sc_vrrp_prio = 100;
+		break;
+	}
 
 	CARP_LOCK_INIT(sc);
 #ifdef INET
@@ -1770,12 +2241,19 @@ carp_carprcp(void *arg, struct carp_softc *sc, int priv)
 	CARP_LOCK(sc);
 	carpr->carpr_state = sc->sc_state;
 	carpr->carpr_vhid = sc->sc_vhid;
-	carpr->carpr_advbase = sc->sc_advbase;
-	carpr->carpr_advskew = sc->sc_advskew;
-	if (priv)
-		bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
-	else
-		bzero(carpr->carpr_key, sizeof(carpr->carpr_key));
+	switch (sc->sc_version) {
+	case CARP_VERSION_CARP:
+		carpr->carpr_advbase = sc->sc_advbase;
+		carpr->carpr_advskew = sc->sc_advskew;
+		if (priv)
+			bcopy(sc->sc_key, carpr->carpr_key,
+			    sizeof(carpr->carpr_key));
+		else
+			bzero(carpr->carpr_key, sizeof(carpr->carpr_key));
+		break;
+	case CARP_VERSION_VRRPv3:
+		break;
+	}
 	CARP_UNLOCK(sc);
 
 	return (true);
@@ -1788,9 +2266,21 @@ carp_ioctl_set(if_t ifp, struct carpkreq *carpr)
 	struct carp_softc *sc = NULL;
 	int error = 0;
 
+	if (carpr->carpr_vhid <= 0 || carpr->carpr_vhid > CARP_MAXVHID)
+		return (EINVAL);
 
-	if (carpr->carpr_vhid <= 0 || carpr->carpr_vhid > CARP_MAXVHID ||
-	    carpr->carpr_advbase < 0 || carpr->carpr_advskew < 0) {
+	switch (carpr->carpr_version) {
+	case CARP_VERSION_CARP:
+		if (carpr->carpr_advbase != 0 && (carpr->carpr_advbase > 255 ||
+		    carpr->carpr_advbase < CARP_DFLTINTV))
+			return (EINVAL);
+		if (carpr->carpr_advskew < 0 || carpr->carpr_advskew >= 255)
+			return (EINVAL);
+		break;
+	case CARP_VERSION_VRRPv3:
+		/* XXXGL: shouldn't we check anything? */
+		break;
+	default:
 		return (EINVAL);
 	}
 
@@ -1799,41 +2289,37 @@ carp_ioctl_set(if_t ifp, struct carpkreq *carpr)
 			if (sc->sc_vhid == carpr->carpr_vhid)
 				break;
 	}
-	if (sc == NULL) {
-		sc = carp_alloc(ifp);
-		CARP_LOCK(sc);
-		sc->sc_vhid = carpr->carpr_vhid;
-		LLADDR(&sc->sc_addr)[0] = 0;
-		LLADDR(&sc->sc_addr)[1] = 0;
-		LLADDR(&sc->sc_addr)[2] = 0x5e;
-		LLADDR(&sc->sc_addr)[3] = 0;
-		LLADDR(&sc->sc_addr)[4] = 1;
-		LLADDR(&sc->sc_addr)[5] = sc->sc_vhid;
-	} else
-		CARP_LOCK(sc);
-	if (carpr->carpr_advbase > 0) {
-		if (carpr->carpr_advbase > 255 ||
-		    carpr->carpr_advbase < CARP_DFLTINTV) {
-			error = EINVAL;
-			goto out;
+
+	if (sc == NULL)
+		sc = carp_alloc(ifp, carpr->carpr_version, carpr->carpr_vhid);
+	else if (sc->sc_version != carpr->carpr_version)
+		return (EINVAL);
+
+	CARP_LOCK(sc);
+	switch (sc->sc_version) {
+	case CARP_VERSION_CARP:
+		if (carpr->carpr_advbase != 0)
+			sc->sc_advbase = carpr->carpr_advbase;
+		sc->sc_advskew = carpr->carpr_advskew;
+		if (carpr->carpr_addr.s_addr != INADDR_ANY)
+			sc->sc_carpaddr = carpr->carpr_addr;
+		if (!IN6_IS_ADDR_UNSPECIFIED(&carpr->carpr_addr6)) {
+			memcpy(&sc->sc_carpaddr6, &carpr->carpr_addr6,
+			    sizeof(sc->sc_carpaddr6));
 		}
-		sc->sc_advbase = carpr->carpr_advbase;
-	}
-	if (carpr->carpr_advskew >= 255) {
-		error = EINVAL;
-		goto out;
-	}
-	sc->sc_advskew = carpr->carpr_advskew;
-	if (carpr->carpr_addr.s_addr != INADDR_ANY)
-		sc->sc_carpaddr = carpr->carpr_addr;
-	if (! IN6_IS_ADDR_UNSPECIFIED(&carpr->carpr_addr6)) {
-		memcpy(&sc->sc_carpaddr6, &carpr->carpr_addr6,
-		    sizeof(sc->sc_carpaddr6));
-	}
-	if (carpr->carpr_key[0] != '\0') {
-		bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key));
-		carp_hmac_prepare(sc);
+		if (carpr->carpr_key[0] != '\0') {
+			bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key));
+			carp_hmac_prepare(sc);
+		}
+		break;
+	case CARP_VERSION_VRRPv3:
+		if (carpr->carpr_vrrp_priority != 0)
+			sc->sc_vrrp_prio = carpr->carpr_vrrp_priority;
+		if (carpr->carpr_vrrp_adv_inter)
+			sc->sc_vrrp_adv_inter = carpr->carpr_vrrp_adv_inter;
+		break;
 	}
+
 	if (sc->sc_state != INIT &&
 	    carpr->carpr_state != sc->sc_state) {
 		switch (carpr->carpr_state) {
@@ -1854,8 +2340,6 @@ carp_ioctl_set(if_t ifp, struct carpkreq *carpr)
 			break;
 		}
 	}
-
-out:
 	CARP_UNLOCK(sc);
 
 	return (error);
@@ -1910,7 +2394,9 @@ int
 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td)
 {
 	struct carpreq carpr;
-	struct carpkreq carprk = { };
+	struct carpkreq carprk = {
+		.carpr_version = CARP_VERSION_CARP,
+	};
 	struct ifnet *ifp;
 	int error = 0;
 
@@ -2034,7 +2520,8 @@ carp_attach(struct ifaddr *ifa, int vhid)
 	CARP_LOCK(sc);
 	sc->sc_ifas[index - 1] = ifa;
 	ifa->ifa_carp = sc;
-	carp_hmac_prepare(sc);
+	if (sc->sc_version == CARP_VERSION_CARP)
+		carp_hmac_prepare(sc);
 	carp_sc_state(sc);
 	CARP_UNLOCK(sc);
 
@@ -2087,7 +2574,8 @@ carp_detach(struct ifaddr *ifa, bool keep_cif)
 	ifa->ifa_carp = NULL;
 	ifa_free(ifa);
 
-	carp_hmac_prepare(sc);
+	if (sc->sc_version == CARP_VERSION_CARP)
+		carp_hmac_prepare(sc);
 	carp_sc_state(sc);
 
 	if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0)
@@ -2279,13 +2767,23 @@ carp_nl_send(void *arg, struct carp_softc *sc, int priv)
 
 	nlattr_add_u32(nw, CARP_NL_VHID, sc->sc_vhid);
 	nlattr_add_u32(nw, CARP_NL_STATE, sc->sc_state);
-	nlattr_add_s32(nw, CARP_NL_ADVBASE, sc->sc_advbase);
-	nlattr_add_s32(nw, CARP_NL_ADVSKEW, sc->sc_advskew);
-	nlattr_add_in_addr(nw, CARP_NL_ADDR, &sc->sc_carpaddr);
-	nlattr_add_in6_addr(nw, CARP_NL_ADDR6, &sc->sc_carpaddr6);
-
-	if (priv)
-		nlattr_add(nw, CARP_NL_KEY, sizeof(sc->sc_key), sc->sc_key);
+	nlattr_add_u8(nw, CARP_NL_VERSION, sc->sc_version);
+	switch (sc->sc_version) {
+	case CARP_VERSION_CARP:
+		nlattr_add_s32(nw, CARP_NL_ADVBASE, sc->sc_advbase);
+		nlattr_add_s32(nw, CARP_NL_ADVSKEW, sc->sc_advskew);
+		nlattr_add_in_addr(nw, CARP_NL_ADDR, &sc->sc_carpaddr);
+		nlattr_add_in6_addr(nw, CARP_NL_ADDR6, &sc->sc_carpaddr6);
+		if (priv)
+			nlattr_add(nw, CARP_NL_KEY, sizeof(sc->sc_key),
+			    sc->sc_key);
+		break;
+	case CARP_VERSION_VRRPv3:
+		nlattr_add_u8(nw, CARP_NL_VRRP_PRIORITY, sc->sc_vrrp_prio);
+		nlattr_add_u16(nw, CARP_NL_VRRP_ADV_INTER,
+		    sc->sc_vrrp_adv_inter);
+		break;
+	}
 
 	CARP_UNLOCK(sc);
 
@@ -2307,11 +2805,12 @@ struct nl_carp_parsed {
 	char		key[CARP_KEY_LEN];
 	struct in_addr	addr;
 	struct in6_addr	addr6;
+	carp_version_t	version;
+	uint8_t		vrrp_prio;
+	uint16_t	vrrp_adv_inter;
 };
 
-#define	_IN(_field)	offsetof(struct genlmsghdr, _field)
 #define	_OUT(_field)	offsetof(struct nl_carp_parsed, _field)
-
 static const struct nlattr_parser nla_p_set[] = {
 	{ .type = CARP_NL_VHID, .off = _OUT(vhid), .cb = nlattr_get_uint32 },
 	{ .type = CARP_NL_STATE, .off = _OUT(state), .cb = nlattr_get_uint32 },
@@ -2322,11 +2821,11 @@ static const struct nlattr_parser nla_p_set[] = {
 	{ .type = CARP_NL_ADDR, .off = _OUT(addr), .cb = nlattr_get_in_addr },
 	{ .type = CARP_NL_ADDR6, .off = _OUT(addr6), .cb = nlattr_get_in6_addr },
 	{ .type = CARP_NL_IFNAME, .off = _OUT(ifname), .cb = nlattr_get_string },
+	{ .type = CARP_NL_VERSION, .off = _OUT(version), .cb = nlattr_get_uint8 },
+	{ .type = CARP_NL_VRRP_PRIORITY, .off = _OUT(vrrp_prio), .cb = nlattr_get_uint8 },
+	{ .type = CARP_NL_VRRP_ADV_INTER, .off = _OUT(vrrp_adv_inter), .cb = nlattr_get_uint16 },
 };
-static const struct nlfield_parser nlf_p_set[] = {
-};
-NL_DECLARE_PARSER(carp_parser, struct genlmsghdr, nlf_p_set, nla_p_set);
-#undef _IN
+NL_DECLARE_PARSER(carp_parser, struct genlmsghdr, nlf_p_empty, nla_p_set);
 #undef _OUT
 
 
@@ -2393,12 +2892,24 @@ carp_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt)
 		return (EINVAL);
 	if (attrs.state > CARP_MAXSTATE)
 		return (EINVAL);
-	if (attrs.advbase < 0 || attrs.advskew < 0)
-		return (EINVAL);
-	if (attrs.advbase > 255)
-		return (EINVAL);
-	if (attrs.advskew >= 255)
+	if (attrs.version == 0)	/* compat with pre-VRRPv3 */
+		attrs.version = CARP_VERSION_CARP;
+	switch (attrs.version) {
+	case CARP_VERSION_CARP:
+		if (attrs.advbase < 0 || attrs.advskew < 0)
+			return (EINVAL);
+		if (attrs.advbase > 255)
+			return (EINVAL);
+		if (attrs.advskew >= 255)
+			return (EINVAL);
+		break;
+	case CARP_VERSION_VRRPv3:
+		if (attrs.vrrp_adv_inter > VRRP_MAX_INTERVAL)
+			return (EINVAL);
+		break;
+	default:
 		return (EINVAL);
+	}
 
 	NET_EPOCH_ENTER(et);
 	if (attrs.ifname != NULL)
@@ -2418,12 +2929,20 @@ carp_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt)
 	carpr.carpr_count = 1;
 	carpr.carpr_vhid = attrs.vhid;
 	carpr.carpr_state = attrs.state;
-	carpr.carpr_advbase = attrs.advbase;
-	carpr.carpr_advskew = attrs.advskew;
-	carpr.carpr_addr = attrs.addr;
-	carpr.carpr_addr6 = attrs.addr6;
-
-	memcpy(&carpr.carpr_key, &attrs.key, sizeof(attrs.key));
+	carpr.carpr_version = attrs.version;
+	switch (attrs.version) {
+	case CARP_VERSION_CARP:
+		carpr.carpr_advbase = attrs.advbase;
+		carpr.carpr_advskew = attrs.advskew;
+		carpr.carpr_addr = attrs.addr;
+		carpr.carpr_addr6 = attrs.addr6;
+		memcpy(&carpr.carpr_key, &attrs.key, sizeof(attrs.key));
+		break;
+	case CARP_VERSION_VRRPv3:
+		carpr.carpr_vrrp_priority = attrs.vrrp_prio;
+		carpr.carpr_vrrp_adv_inter = attrs.vrrp_adv_inter;
+		break;
+	}
 
 	sx_xlock(&carp_sx);
 	error = carp_ioctl_set(ifp, &carpr);
@@ -2457,26 +2976,25 @@ static const struct genl_cmd carp_cmds[] = {
 	},
 };
 
+static uint16_t carp_family_id;
 static void
 carp_nl_register(void)
 {
 	bool ret __diagused;
-	int family_id __diagused;
 
 	NL_VERIFY_PARSERS(all_parsers);
-	family_id = genl_register_family(CARP_NL_FAMILY_NAME, 0, 2,
+	carp_family_id = genl_register_family(CARP_NL_FAMILY_NAME, 0, 2,
 	    CARP_NL_CMD_MAX);
-	MPASS(family_id != 0);
+	MPASS(carp_family_id != 0);
 
-	ret = genl_register_cmds(CARP_NL_FAMILY_NAME, carp_cmds,
-	    NL_ARRAY_LEN(carp_cmds));
+	ret = genl_register_cmds(carp_family_id, carp_cmds, nitems(carp_cmds));
 	MPASS(ret);
 }
 
 static void
 carp_nl_unregister(void)
 {
-	genl_unregister_family(CARP_NL_FAMILY_NAME);
+	genl_unregister_family(carp_family_id);
 }
 
 static void
@@ -2525,7 +3043,6 @@ carp_mod_load(void)
 
 	mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
 	sx_init(&carp_sx, "carp_sx");
-	LIST_INIT(&carp_list);
 	carp_get_vhid_p = carp_get_vhid;
 	carp_forus_p = carp_forus;
 	carp_output_p = carp_output;
diff --git a/sys/netinet/ip_carp.h b/sys/netinet/ip_carp.h
index 0c22e9434797..dc3d9a68b43b 100644
--- a/sys/netinet/ip_carp.h
+++ b/sys/netinet/ip_carp.h
@@ -31,6 +31,7 @@
 #ifndef _IP_CARP_H
 #define	_IP_CARP_H
 
+#ifdef _KERNEL
 /*
  * The CARP header layout is as follows:
  *
@@ -77,14 +78,53 @@ struct carp_header {
 	unsigned char	carp_md[20];	/* SHA1 HMAC */
 } __packed;
 
-#ifdef CTASSERT
 CTASSERT(sizeof(struct carp_header) == 36);
+
+/*
+ * The VRRPv3 header layout is as follows:
+ * See RFC9568, 5.1.  VRRP Packet Format
+ *
+ *   0                   1                   2                   3
+ *   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  |Version| Type  | Virtual Rtr ID|   Priority    |Count IPvX Addr|
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  |(rsvd) |     Max Adver Int     |          Checksum             |
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  |                                                               |
+ *  +                                                               +
+ *  |                       IPvX Address(es)                        |
+ *  +                                                               +
+ *  +                                                               +
+ *  +                                                               +
+ *  +                                                               +
+ *  |                                                               |
+ *  +                                                               +
+ *  |                                                               |
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ */
+
+struct vrrpv3_header {
+#if BYTE_ORDER == LITTLE_ENDIAN
+	uint8_t		vrrp_type:4,
+			vrrp_version:4;
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+	uint8_t		vrrp_version:4,
+			vrrp_type:4;
 #endif
+	uint8_t		vrrp_vrtid;
+	uint8_t		vrrp_priority;
+	uint8_t		vrrp_count_addr;
+	uint16_t	vrrp_max_adver_int;
+	uint16_t	vrrp_checksum;
+} __packed;
 
-#define	CARP_DFLTTL		255
+CTASSERT(sizeof(struct vrrpv3_header) == 8);
+#endif /* _KERNEL */
 
-/* carp_version */
-#define	CARP_VERSION		2
+#define	CARP_DFLTTL		255
 
 /* carp_type */
 #define	CARP_ADVERTISEMENT	0x01
@@ -94,6 +134,8 @@ CTASSERT(sizeof(struct carp_header) == 36);
 /* carp_advbase */
 #define	CARP_DFLTINTV		1
 
+#define	VRRP_TYPE_ADVERTISEMENT	0x01
+#define	VRRP_MAX_INTERVAL	(0x1000 - 1)
 /*
  * Statistics.
  */
@@ -136,6 +178,11 @@ struct carpreq {
 #define	SIOCSVH	_IOWR('i', 245, struct ifreq)
 #define	SIOCGVH	_IOWR('i', 246, struct ifreq)
 
+typedef enum carp_version {
+	CARP_VERSION_CARP	= 2,
+	CARP_VERSION_VRRPv3	= 3,
+} carp_version_t;
+
 #ifdef _KERNEL
 int		carp_ioctl(struct ifreq *, u_long, struct thread *);
 int		carp_attach(struct ifaddr *, int);
diff --git a/sys/netinet/ip_carp_nl.h b/sys/netinet/ip_carp_nl.h
index 89720af3e0dc..de4c0367c1d3 100644
--- a/sys/netinet/ip_carp_nl.h
+++ b/sys/netinet/ip_carp_nl.h
@@ -32,6 +32,9 @@ enum carp_nl_type_t {
 	CARP_NL_ADDR		= 7,	/* in_addr_t */
 	CARP_NL_ADDR6		= 8,	/* in6_addr_t */
 	CARP_NL_IFNAME		= 9,	/* string */
+	CARP_NL_VERSION		= 10,	/* u8 */
+	CARP_NL_VRRP_PRIORITY	= 11,	/* u8 */
+	CARP_NL_VRRP_ADV_INTER	= 12,	/* u16, 12-bit field in centiseconds*/
 };
 
 #endif
diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c
index 6bc76e0be111..5a561814cdb5 100644
--- a/sys/netinet/ip_divert.c
+++ b/sys/netinet/ip_divert.c
@@ -538,7 +538,7 @@ div_output_inbound(int family, struct socket *so, struct mbuf *m,
 		 */
 		if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)))
 			m->m_flags |= M_MCAST;
-		else if (in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
+		else if (in_ifnet_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
 			m->m_flags |= M_BCAST;
 		netisr_queue_src(NETISR_IP, (uintptr_t)so, m);
 		DIVSTAT_INC(inbound);
diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c
index 6d34ba4f5420..9b81760e58f3 100644
--- a/sys/netinet/ip_fastfwd.c
+++ b/sys/netinet/ip_fastfwd.c
@@ -278,14 +278,12 @@ ip_tryforward(struct mbuf *m)
 	 */
 	if ((m->m_flags & (M_BCAST|M_MCAST)) ||
 	    (m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) ||
-	    ntohl(ip->ip_src.s_addr) == (u_long)INADDR_BROADCAST ||
-	    ntohl(ip->ip_dst.s_addr) == (u_long)INADDR_BROADCAST ||
+	    in_broadcast(ip->ip_src) ||
+	    in_broadcast(ip->ip_dst) ||
 	    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
 	    IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 	    IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) ||
-	    IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) ||
-	    ip->ip_src.s_addr == INADDR_ANY ||
-	    ip->ip_dst.s_addr == INADDR_ANY )
+	    IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) )
 		return m;
 
 	/*
@@ -401,7 +399,7 @@ passin:
 	if (!PFIL_HOOKED_OUT(V_inet_pfil_head))
 		goto passout;
 
-	if (pfil_mbuf_out(V_inet_pfil_head, &m, nh->nh_ifp,
+	if (pfil_mbuf_fwd(V_inet_pfil_head, &m, nh->nh_ifp,
 	    NULL) != PFIL_PASS)
 		goto drop;
 
diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h
index 2b59e46b5bcc..c440223b81f8 100644
--- a/sys/netinet/ip_fw.h
+++ b/sys/netinet/ip_fw.h
@@ -75,6 +75,10 @@ typedef struct _ip_fw3_opheader {
 	uint16_t reserved[2];	/* Align to 64-bit boundary */
 } ip_fw3_opheader;
 
+#define	IP_FW3_OPVER_0		0
+#define	IP_FW3_OPVER_1		1	/* 32bit rulenum */
+#define	IP_FW3_OPVER		IP_FW3_OPVER_1
+
 /* IP_FW3 opcodes */
 #define	IP_FW_TABLE_XADD	86	/* add entry */
 #define	IP_FW_TABLE_XDEL	87	/* delete entry */
@@ -109,6 +113,7 @@ typedef struct _ip_fw3_opheader {
 
 #define	IP_FW_DUMP_SOPTCODES	116	/* Dump available sopts/versions */
 #define	IP_FW_DUMP_SRVOBJECTS	117	/* Dump existing named objects */
+#define	IP_FW_SKIPTO_CACHE	118	/* Manage skipto cache */
 
 #define	IP_FW_NAT64STL_CREATE	130	/* Create stateless NAT64 instance */
 #define	IP_FW_NAT64STL_DESTROY	131	/* Destroy stateless NAT64 instance */
@@ -211,8 +216,8 @@ enum ipfw_opcodes {		/* arguments (4 byte each)	*/
 	O_VERREVPATH,		/* none				*/
 	O_VERSRCREACH,		/* none				*/
 
-	O_PROBE_STATE,		/* none				*/
-	O_KEEP_STATE,		/* none				*/
+	O_PROBE_STATE,		/* v0:arg1=kidx, v1:kidx=kidx	*/
+	O_KEEP_STATE,		/* v0:arg1=kidx, v1:kidx=kidx	*/
 	O_LIMIT,		/* ipfw_insn_limit		*/
 	O_LIMIT_PARENT,		/* dyn_type, not an opcode.	*/
 
@@ -223,12 +228,13 @@ enum ipfw_opcodes {		/* arguments (4 byte each)	*/
 	O_LOG,			/* ipfw_insn_log		*/
 	O_PROB,			/* u32 = match probability	*/
 
-	O_CHECK_STATE,		/* none				*/
+	O_CHECK_STATE,		/* v0:arg1=kidx, v1:kidx=kidx	*/
 	O_ACCEPT,		/* none				*/
 	O_DENY,			/* none 			*/
 	O_REJECT,		/* arg1=icmp arg (same as deny)	*/
 	O_COUNT,		/* none				*/
-	O_SKIPTO,		/* arg1=next rule number	*/
+	O_SKIPTO,		/* v0:arg1=next rule number	*/
+				/* v1:kidx= next rule number	*/
 	O_PIPE,			/* arg1=pipe number		*/
 	O_QUEUE,		/* arg1=queue number		*/
 	O_DIVERT,		/* arg1=port number		*/
@@ -242,8 +248,10 @@ enum ipfw_opcodes {		/* arguments (4 byte each)	*/
 	 * More opcodes.
 	 */
 	O_IPSEC,		/* has ipsec history 		*/
-	O_IP_SRC_LOOKUP,	/* arg1=table number, u32=value	*/
+	O_IP_SRC_LOOKUP,	/* v0:arg1=table number, u32=value */
+				/* v1:kidx=name, u32=value, arg1=key */
 	O_IP_DST_LOOKUP,	/* arg1=table number, u32=value	*/
+				/* v1:kidx=name, u32=value, arg1=key */
 	O_ANTISPOOF,		/* none				*/
 	O_JAIL,			/* u32 = id			*/
 	O_ALTQ,			/* u32 = altq classif. qid	*/
@@ -278,23 +286,27 @@ enum ipfw_opcodes {		/* arguments (4 byte each)	*/
 
 	O_SOCKARG,		/* socket argument */
 
-	O_CALLRETURN,		/* arg1=called rule number */
+	O_CALLRETURN,		/* v0:arg1=called rule number */
+				/* v1:kidx=called rule number */
 
 	O_FORWARD_IP6,		/* fwd sockaddr_in6             */
 
 	O_DSCP,			/* 2 u32 = DSCP mask */
 	O_SETDSCP,		/* arg1=DSCP value */
-	O_IP_FLOW_LOOKUP,	/* arg1=table number, u32=value	*/
+	O_IP_FLOW_LOOKUP,	/* v0:arg1=table number, u32=value	*/
+				/* v1:kidx=name, u32=value */
 
-	O_EXTERNAL_ACTION,	/* arg1=id of external action handler */
-	O_EXTERNAL_INSTANCE,	/* arg1=id of eaction handler instance */
+	O_EXTERNAL_ACTION,	/* v0:arg1=id of external action handler */
+				/* v1:kidx=id of external action handler */
+	O_EXTERNAL_INSTANCE,	/* v0:arg1=id of eaction handler instance */
+				/* v1:kidx=id of eaction handler instance */
 	O_EXTERNAL_DATA,	/* variable length data */
 
 	O_SKIP_ACTION,		/* none				*/
 	O_TCPMSS,		/* arg1=MSS value */
 
-	O_MAC_SRC_LOOKUP,	/* arg1=table number, u32=value */
-	O_MAC_DST_LOOKUP,	/* arg1=table number, u32=value */
+	O_MAC_SRC_LOOKUP,	/* kidx=name, u32=value, arg1=key */
+	O_MAC_DST_LOOKUP,	/* kidx=name, u32=value, arg1=key */
 
 	O_SETMARK,		/* u32 = value */
 	O_MARK,			/* 2 u32 = value, bitmask */
@@ -303,22 +315,6 @@ enum ipfw_opcodes {		/* arguments (4 byte each)	*/
 };
 
 /*
- * Defines key types used by lookup instruction
- */
-enum ipfw_table_lookup_type {
-	LOOKUP_DST_IP,
-	LOOKUP_SRC_IP,
-	LOOKUP_DST_PORT,
-	LOOKUP_SRC_PORT,
-	LOOKUP_UID,
-	LOOKUP_JAIL,
-	LOOKUP_DSCP,
-	LOOKUP_DST_MAC,
-	LOOKUP_SRC_MAC,
-	LOOKUP_MARK,
-};
-
-/*
  * The extension header are filtered only for presence using a bit
  * vector with a flag for each header.
  */
@@ -392,6 +388,11 @@ typedef struct	_ipfw_insn_u32 {
 	u_int32_t d[1];	/* one or more */
 } ipfw_insn_u32;
 
+typedef struct _ipfw_insn_kidx {
+	ipfw_insn o;
+	uint32_t kidx;
+} ipfw_insn_kidx;
+
 /*
  * This is used to store IP addr-mask pairs.
  */
@@ -401,6 +402,47 @@ typedef struct	_ipfw_insn_ip {
 	struct in_addr	mask;
 } ipfw_insn_ip;
 
+typedef struct _ipfw_insn_table {
+	ipfw_insn o;	/* arg1 is optional lookup key */
+	uint32_t kidx;	/* table name index */
+	uint32_t value;	/* table value */
+} ipfw_insn_table;
+
+#define	IPFW_LOOKUP_TYPE_MASK		0x00FF
+#define	IPFW_LOOKUP_TYPE(insn)		((insn)->arg1 & IPFW_LOOKUP_TYPE_MASK)
+#define	IPFW_SET_LOOKUP_TYPE(insn, type)	do {	\
+	(insn)->arg1 &= ~IPFW_LOOKUP_TYPE_MASK;		\
+	(insn)->arg1 |= (type) & IPFW_LOOKUP_TYPE_MASK;	\
+} while (0)
+
+/*
+ * Defines key types used by lookup instruction
+ */
+enum ipfw_table_lookup_type {
+	LOOKUP_NONE = 0,
+	LOOKUP_DST_IP,
+	LOOKUP_SRC_IP,
+	LOOKUP_DST_PORT,
+	LOOKUP_SRC_PORT,
+	LOOKUP_UID,
+	LOOKUP_JAIL,
+	LOOKUP_DSCP,
+	LOOKUP_DST_MAC,
+	LOOKUP_SRC_MAC,
+	LOOKUP_MARK,
+	LOOKUP_RULENUM,
+};
+
+enum ipfw_return_type {
+	RETURN_NEXT_RULENUM = 0,
+	RETURN_NEXT_RULE,
+};
+
+enum ipfw_skipto_cache_op {
+	SKIPTO_CACHE_DISABLE = 0,
+	SKIPTO_CACHE_ENABLE,
+};
+
 /*
  * This is used to forward to a given address (ip).
  */
@@ -434,7 +476,8 @@ typedef struct	_ipfw_insn_if {
 	union {
 		struct in_addr ip;
 		int glob;
-		uint16_t kidx;
+		uint16_t kidx_v0;
+		uint32_t kidx;
 	} p;
 	char name[IFNAMSIZ];
 } ipfw_insn_if;
@@ -452,6 +495,7 @@ typedef struct _ipfw_insn_altq {
  */
 typedef struct	_ipfw_insn_limit {
 	ipfw_insn o;
+	u_int32_t kidx;
 	u_int8_t _pad;
 	u_int8_t limit_mask;	/* combination of DYN_* below	*/
 #define	DYN_SRC_ADDR	0x1
@@ -462,6 +506,9 @@ typedef struct	_ipfw_insn_limit {
 	u_int16_t conn_limit;
 } ipfw_insn_limit;
 
+/* MAC/InfiniBand/etc address length */
+#define	IPFW_MAX_L2_ADDR_LEN	20
+
 /*
  * This is used for log instructions.
  */
@@ -471,6 +518,22 @@ typedef struct  _ipfw_insn_log {
 	u_int32_t log_left;	/* how many left to log 	*/
 } ipfw_insn_log;
 
+/* ipfw_insn_log->o.arg1 bitmasks */
+#define	IPFW_LOG_DEFAULT	0x0000
+#define	IPFW_LOG_SYSLOG		(1 << 15)
+#define	IPFW_LOG_IPFW0		(1 << 14)
+#define	IPFW_LOG_RTSOCK		(1 << 13)
+
+typedef struct _ipfwlog_rtsock_hdr_v2 {
+	uint32_t	rulenum;
+	uint32_t	tablearg;
+	ipfw_insn	cmd;
+	u_char		ether_shost[IPFW_MAX_L2_ADDR_LEN];
+	u_char		ether_dhost[IPFW_MAX_L2_ADDR_LEN];
+	uint32_t	mark;
+	char		comment[0];
+} ipfwlog_rtsock_hdr_v2;
+
 /* Legacy NAT structures, compat only */
 #ifndef	_KERNEL
 /*
@@ -604,6 +667,10 @@ typedef struct _ipfw_insn_icmp6 {
                        */
 } ipfw_insn_icmp6;
 
+/* Convert pointer to instruction with specified type */
+#define	insntod(p, type)	((ipfw_insn_ ## type *)(p))
+#define	insntoc(p, type)	((const ipfw_insn_ ## type *)(p))
+
 /*
  * Here we have the structure representing an ipfw rule.
  *
@@ -719,30 +786,29 @@ struct ipfw_flow_id {
 /*
  * Dynamic ipfw rule.
  */
-typedef struct _ipfw_dyn_rule ipfw_dyn_rule;
-
-struct _ipfw_dyn_rule {
-	ipfw_dyn_rule	*next;		/* linked list of rules.	*/
-	struct ip_fw *rule;		/* pointer to rule		*/
-	/* 'rule' is used to pass up the rule number (from the parent)	*/
+#define	IPFW_DYN_ORPHANED	0x40000	/* state's parent rule was deleted */
 
-	ipfw_dyn_rule *parent;		/* pointer to parent rule	*/
-	u_int64_t	pcnt;		/* packet match counter		*/
-	u_int64_t	bcnt;		/* byte match counter		*/
+typedef struct _ipfw_dyn_rule {
 	struct ipfw_flow_id id;		/* (masked) flow id		*/
-	u_int32_t	expire;		/* expire time			*/
-	u_int32_t	bucket;		/* which bucket in hash table	*/
-	u_int32_t	state;		/* state of this rule (typically a
+	uint8_t		set;
+	uint8_t		type;		/* rule type			*/
+	uint16_t	pad;
+	uint32_t	expire;		/* expire time			*/
+	uint32_t	rulenum;	/* parent's rule number		*/
+	uint32_t	kidx;		/* index of named object	*/
+	uint64_t	pcnt;		/* packet match counter		*/
+	uint64_t	bcnt;		/* byte match counter		*/
+	uint32_t	hashval;	/* hash value			*/
+	union {
+		uint32_t state;		/* state of this rule (typically a
 					 * combination of TCP flags)
 					 */
-#define	IPFW_DYN_ORPHANED	0x40000	/* state's parent rule was deleted */
-	u_int32_t	ack_fwd;	/* most recent ACKs in forward	*/
-	u_int32_t	ack_rev;	/* and reverse directions (used	*/
+		uint32_t count;		/* number of linked states	*/
+	};
+	uint32_t	ack_fwd;	/* most recent ACKs in forward	*/
+	uint32_t	ack_rev;	/* and reverse directions (used	*/
 					/* to generate keepalives)	*/
-	u_int16_t	dyn_type;	/* rule type			*/
-	u_int16_t	count;		/* refcount			*/
-	u_int16_t	kidx;		/* index of named object */
-} __packed __aligned(8);
+} __packed __aligned(8) ipfw_dyn_rule;
 
 /*
  * Definitions for IP option names.
@@ -794,16 +860,6 @@ struct _ipfw_dyn_rule {
 #define	IPFW_VTYPE_NH6		0x00000400	/* IPv6 nexthop */
 #define	IPFW_VTYPE_MARK		0x00000800	/* [fw]mark */
 
-/* MAC/InfiniBand/etc address length */
-#define	IPFW_MAX_L2_ADDR_LEN	20
-
-typedef struct	_ipfw_table_entry {
-	in_addr_t	addr;		/* network address		*/
-	u_int32_t	value;		/* value			*/
-	u_int16_t	tbl;		/* table number			*/
-	u_int8_t	masklen;	/* mask length			*/
-} ipfw_table_entry;
-
 typedef struct	_ipfw_table_xentry {
 	uint16_t	len;		/* Total entry length		*/
 	uint8_t		type;		/* entry type			*/
@@ -819,13 +875,6 @@ typedef struct	_ipfw_table_xentry {
 } ipfw_table_xentry;
 #define	IPFW_TCF_INET	0x01		/* CIDR flags: IPv4 record	*/
 
-typedef struct	_ipfw_table {
-	u_int32_t	size;		/* size of entries in bytes	*/
-	u_int32_t	cnt;		/* # of entries			*/
-	u_int16_t	tbl;		/* table number			*/
-	ipfw_table_entry ent[0];	/* entries			*/
-} ipfw_table;
-
 typedef struct	_ipfw_xtable {
 	ip_fw3_opheader	opheader;	/* IP_FW3 opcode */
 	uint32_t	size;		/* size of entries in bytes	*/
@@ -865,10 +914,10 @@ typedef struct _ipfw_obj_data {
 /* Object name TLV */
 typedef struct _ipfw_obj_ntlv {
 	ipfw_obj_tlv	head;		/* TLV header			*/
-	uint16_t	idx;		/* Name index			*/
+	uint32_t	idx;		/* Name index			*/
 	uint8_t		set;		/* set, if applicable		*/
 	uint8_t		type;		/* object type, if applicable	*/
-	uint32_t	spare;		/* unused			*/
+	uint16_t	spare;		/* unused			*/
 	char		name[64];	/* Null-terminated name		*/
 } ipfw_obj_ntlv;
 
@@ -891,19 +940,40 @@ struct tflow_entry {
 	} a;
 };
 
+#define	IPFW_TVALUE_TYPE_MASK		0xFF00
+#define	IPFW_TVALUE_TYPE(insn)		(((insn)->arg1 & IPFW_TVALUE_TYPE_MASK) >> 8)
+#define	IPFW_SET_TVALUE_TYPE(insn, type)	do {	\
+	(insn)->arg1 &= ~IPFW_TVALUE_TYPE_MASK;		\
+	(insn)->arg1 |= ((type) << 8) & IPFW_TVALUE_TYPE_MASK;	\
+} while (0)
+
+enum ipfw_table_value_type {
+	TVALUE_TAG = 0,
+	TVALUE_PIPE,
+	TVALUE_DIVERT,
+	TVALUE_SKIPTO,
+	TVALUE_NETGRAPH,
+	TVALUE_FIB,
+	TVALUE_NAT,
+	TVALUE_NH4,
+	TVALUE_DSCP,
+	TVALUE_LIMIT,
+	TVALUE_MARK,
+};
+
 /* 64-byte structure representing multi-field table value */
 typedef struct _ipfw_table_value {
 	uint32_t	tag;		/* O_TAG/O_TAGGED */
-	uint32_t	pipe;		/* O_PIPE/O_QUEUE */
+	uint16_t	pipe;		/* O_PIPE/O_QUEUE */
 	uint16_t	divert;		/* O_DIVERT/O_TEE */
-	uint16_t	skipto;		/* skipto, CALLRET */
+	uint32_t	skipto;		/* skipto, CALLRET */
 	uint32_t	netgraph;	/* O_NETGRAPH/O_NGTEE */
-	uint32_t	fib;		/* O_SETFIB */
 	uint32_t	nat;		/* O_NAT */
 	uint32_t	nh4;
+	uint16_t	fib;		/* O_SETFIB */
 	uint8_t		dscp;
 	uint8_t		spare0;
-	uint16_t	kidx;		/* value kernel index */
+	uint32_t	kidx;		/* value kernel index */
 	struct in6_addr	nh6;
 	uint32_t	limit;		/* O_LIMIT */
 	uint32_t	zoneid;		/* scope zone id for nh6 */
@@ -918,8 +988,7 @@ typedef struct	_ipfw_obj_tentry {
 	uint8_t		masklen;	/* mask length			*/
 	uint8_t		result;		/* request result		*/
 	uint8_t		spare0;
-	uint16_t	idx;		/* Table name index		*/
-	uint16_t	spare1;
+	uint32_t	idx;		/* Table name index		*/
 	union {
 		/* Longest field needs to be aligned by 8-byte boundary	*/
 		struct in_addr		addr;		/* IPv4 address		*/
@@ -966,8 +1035,8 @@ typedef struct _ipfw_obj_ctlv {
 typedef struct _ipfw_range_tlv {
 	ipfw_obj_tlv	head;		/* TLV header			*/
 	uint32_t	flags;		/* Range flags			*/
-	uint16_t	start_rule;	/* Range start			*/
-	uint16_t	end_rule;	/* Range end			*/
+	uint32_t	start_rule;	/* Range start			*/
+	uint32_t	end_rule;	/* Range end			*/
 	uint32_t	set;		/* Range set to match		 */
 	uint32_t	new_set;	/* New set to move/swap to	*/
 } ipfw_range_tlv;
@@ -979,7 +1048,7 @@ typedef struct _ipfw_range_tlv {
 #define	IPFW_RCFLAG_USER	(IPFW_RCFLAG_RANGE | IPFW_RCFLAG_ALL | \
 	IPFW_RCFLAG_SET | IPFW_RCFLAG_DYNAMIC)
 /* Internally used flags */
-#define	IPFW_RCFLAG_DEFAULT	0x0100	/* Do not skip defaul rule	*/
+#define	IPFW_RCFLAG_DEFAULT	0x0100	/* Do not skip default rule	*/
 
 typedef struct _ipfw_ta_tinfo {
 	uint32_t	flags;		/* Format flags			*/
@@ -1051,10 +1120,16 @@ typedef struct _ipfw_ta_info {
 	uint64_t	spare1;
 } ipfw_ta_info;
 
+typedef struct _ipfw_cmd_header {	/* control command header	*/
+	ip_fw3_opheader	opheader;	/* IP_FW3 opcode		*/
+	uint32_t	size;		/* Total size (incl. header)	*/
+	uint32_t	cmd;		/* command			*/
+} ipfw_cmd_header;
+
 typedef struct _ipfw_obj_header {
 	ip_fw3_opheader	opheader;	/* IP_FW3 opcode		*/
-	uint32_t	spare;
-	uint16_t	idx;		/* object name index		*/
+	uint32_t	idx;		/* object name index		*/
+	uint16_t	spare;
 	uint8_t		objtype;	/* object type			*/
 	uint8_t		objsubtype;	/* object subtype		*/
 	ipfw_obj_ntlv	ntlv;		/* object name tlv		*/
diff --git a/sys/netinet/ip_gre.c b/sys/netinet/ip_gre.c
index c9356edb0608..01a6ef4cd670 100644
--- a/sys/netinet/ip_gre.c
+++ b/sys/netinet/ip_gre.c
@@ -534,7 +534,7 @@ in_gre_output(struct mbuf *m, int af, int hlen)
 #ifdef INET6
 	case AF_INET6:
 		gi->gi_ip.ip_tos = 0; /* XXX */
-		ip_fillid(&gi->gi_ip);
+		ip_fillid(&gi->gi_ip, V_ip_random_id);
 		break;
 #endif
 	}
diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c
index 1149796c0db3..71b75d18efd0 100644
--- a/sys/netinet/ip_icmp.c
+++ b/sys/netinet/ip_icmp.c
@@ -88,7 +88,7 @@ SYSCTL_PROC(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLTYPE_UINT |
     &sysctl_icmplim_and_jitter, "IU",
     "Maximum number of ICMP responses per second");
 
-VNET_DEFINE_STATIC(int, icmplim_curr_jitter) = 0;
+VNET_DEFINE_STATIC(int, icmplim_curr_jitter[BANDLIM_MAX]) = {0};
 #define V_icmplim_curr_jitter		VNET(icmplim_curr_jitter)
 VNET_DEFINE_STATIC(u_int, icmplim_jitter) = 16;
 #define	V_icmplim_jitter		VNET(icmplim_jitter)
@@ -635,15 +635,10 @@ icmp_input(struct mbuf **mp, int *offp, int proto)
 		 */
 		if (icmplen < ICMP_MASKLEN)
 			break;
-		switch (ip->ip_dst.s_addr) {
-		case INADDR_BROADCAST:
-		case INADDR_ANY:
+		if (in_broadcast(ip->ip_dst))
 			icmpdst.sin_addr = ip->ip_src;
-			break;
-
-		default:
+		else
 			icmpdst.sin_addr = ip->ip_dst;
-		}
 		ia = (struct in_ifaddr *)ifaof_ifpforaddr(
 			    (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
 		if (ia == NULL)
@@ -788,10 +783,11 @@ icmp_reflect(struct mbuf *m)
 
 	if (IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
 	    (IN_EXPERIMENTAL(ntohl(ip->ip_src.s_addr)) && !V_ip_allow_net240) ||
-	    (IN_ZERONET(ntohl(ip->ip_src.s_addr)) && !V_ip_allow_net0) ) {
+	    (IN_ZERONET(ntohl(ip->ip_src.s_addr)) && !V_ip_allow_net0) ||
+	    in_nullhost(ip->ip_src) ) {
 		m_freem(m);	/* Bad return address */
 		ICMPSTAT_INC(icps_badaddr);
-		goto done;	/* Ip_output() will check for broadcast */
+		goto done;	/* ip_output() will check for broadcast */
 	}
 
 	t = ip->ip_dst;
@@ -1094,28 +1090,29 @@ ip_next_mtu(int mtu, int dir)
  *	the 'final' error, but it doesn't make sense to solve the printing
  *	delay with more complex code.
  */
-VNET_DEFINE_STATIC(struct counter_rate, icmp_rates[BANDLIM_MAX]);
+VNET_DEFINE_STATIC(struct counter_rate *, icmp_rates[BANDLIM_MAX]);
 #define	V_icmp_rates	VNET(icmp_rates)
 
 static const char *icmp_rate_descrs[BANDLIM_MAX] = {
 	[BANDLIM_ICMP_UNREACH] = "icmp unreach",
 	[BANDLIM_ICMP_ECHO] = "icmp ping",
 	[BANDLIM_ICMP_TSTAMP] = "icmp tstamp",
-	[BANDLIM_RST_CLOSEDPORT] = "closed port RST",
-	[BANDLIM_RST_OPENPORT] = "open port RST",
+	[BANDLIM_TCP_RST] = "tcp reset",
 	[BANDLIM_ICMP6_UNREACH] = "icmp6 unreach",
 	[BANDLIM_SCTP_OOTB] = "sctp ootb",
 };
 
 static void
-icmplim_new_jitter(void)
+icmplim_new_jitter(int which)
 {
 	/*
 	 * Adjust limit +/- to jitter the measurement to deny a side-channel
 	 * port scan as in https://dl.acm.org/doi/10.1145/3372297.3417280
 	 */
+	KASSERT(which >= 0 && which < BANDLIM_MAX,
+	    ("%s: which %d", __func__, which));
 	if (V_icmplim_jitter > 0)
-		V_icmplim_curr_jitter =
+		V_icmplim_curr_jitter[which] =
 		    arc4random_uniform(V_icmplim_jitter * 2 + 1) -
 		    V_icmplim_jitter;
 }
@@ -1144,11 +1141,13 @@ sysctl_icmplim_and_jitter(SYSCTL_HANDLER_ARGS)
 				error = EINVAL;
 			else {
 				V_icmplim_jitter = new;
-				icmplim_new_jitter();
+				for (int i = 0; i < BANDLIM_MAX; i++) {
+					icmplim_new_jitter(i);
+				}
 			}
 		}
 	}
-	MPASS(V_icmplim + V_icmplim_curr_jitter >= 0);
+	MPASS(V_icmplim == 0 || V_icmplim > V_icmplim_jitter);
 
 	return (error);
 }
@@ -1158,10 +1157,9 @@ icmp_bandlimit_init(void)
 {
 
 	for (int i = 0; i < BANDLIM_MAX; i++) {
-		V_icmp_rates[i].cr_rate = counter_u64_alloc(M_WAITOK);
-		V_icmp_rates[i].cr_ticks = ticks;
+		V_icmp_rates[i] = counter_rate_alloc(M_WAITOK, 1);
+		icmplim_new_jitter(i);
 	}
-	icmplim_new_jitter();
 }
 VNET_SYSINIT(icmp_bandlimit, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY,
     icmp_bandlimit_init, NULL);
@@ -1172,7 +1170,7 @@ icmp_bandlimit_uninit(void)
 {
 
 	for (int i = 0; i < BANDLIM_MAX; i++)
-		counter_u64_free(V_icmp_rates[i].cr_rate);
+		counter_rate_free(V_icmp_rates[i]);
 }
 VNET_SYSUNINIT(icmp_bandlimit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
     icmp_bandlimit_uninit, NULL);
@@ -1189,15 +1187,15 @@ badport_bandlim(int which)
 	KASSERT(which >= 0 && which < BANDLIM_MAX,
 	    ("%s: which %d", __func__, which));
 
-	pps = counter_ratecheck(&V_icmp_rates[which], V_icmplim +
-	    V_icmplim_curr_jitter);
+	pps = counter_ratecheck(V_icmp_rates[which], V_icmplim +
+	    V_icmplim_curr_jitter[which]);
 	if (pps > 0) {
 		if (V_icmplim_output)
 			log(LOG_NOTICE,
 			    "Limiting %s response from %jd to %d packets/sec\n",
 			    icmp_rate_descrs[which], (intmax_t )pps,
-			    V_icmplim + V_icmplim_curr_jitter);
-		icmplim_new_jitter();
+			    V_icmplim + V_icmplim_curr_jitter[which]);
+		icmplim_new_jitter(which);
 	}
 	if (pps == -1)
 		return (-1);
diff --git a/sys/netinet/ip_id.c b/sys/netinet/ip_id.c
index 12dd6c8bf972..738b7eceb448 100644
--- a/sys/netinet/ip_id.c
+++ b/sys/netinet/ip_id.c
@@ -97,9 +97,9 @@
  * user wants to, we can turn on random ID generation.
  */
 VNET_DEFINE_STATIC(int, ip_rfc6864) = 1;
-VNET_DEFINE_STATIC(int, ip_do_randomid) = 0;
 #define	V_ip_rfc6864		VNET(ip_rfc6864)
-#define	V_ip_do_randomid	VNET(ip_do_randomid)
+
+VNET_DEFINE(int, ip_random_id) = 0;
 
 /*
  * Random ID state engine.
@@ -126,7 +126,7 @@ VNET_DEFINE_STATIC(struct mtx, ip_id_mtx);
 VNET_DEFINE_STATIC(counter_u64_t, ip_id);
 #define	V_ip_id		VNET(ip_id)
 
-static int	sysctl_ip_randomid(SYSCTL_HANDLER_ARGS);
+static int	sysctl_ip_random_id(SYSCTL_HANDLER_ARGS);
 static int	sysctl_ip_id_change(SYSCTL_HANDLER_ARGS);
 static void	ip_initid(int);
 static uint16_t ip_randomid(void);
@@ -136,7 +136,7 @@ static void	ipid_sysuninit(void);
 SYSCTL_DECL(_net_inet_ip);
 SYSCTL_PROC(_net_inet_ip, OID_AUTO, random_id,
     CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_MPSAFE,
-    &VNET_NAME(ip_do_randomid), 0, sysctl_ip_randomid, "IU",
+    &VNET_NAME(ip_random_id), 0, sysctl_ip_random_id, "IU",
     "Assign random ip_id values");
 SYSCTL_INT(_net_inet_ip, OID_AUTO, rfc6864, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ip_rfc6864), 0,
@@ -151,22 +151,22 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_total, CTLFLAG_RD | CTLFLAG_VNET,
     &VNET_NAME(random_id_total), 0, "Count of IP IDs created");
 
 static int
-sysctl_ip_randomid(SYSCTL_HANDLER_ARGS)
+sysctl_ip_random_id(SYSCTL_HANDLER_ARGS)
 {
 	int error, new;
 
-	new = V_ip_do_randomid;
+	new = V_ip_random_id;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error || req->newptr == NULL)
 		return (error);
 	if (new != 0 && new != 1)
 		return (EINVAL);
-	if (new == V_ip_do_randomid)
+	if (new == V_ip_random_id)
 		return (0);
-	if (new == 1 && V_ip_do_randomid == 0)
+	if (new == 1 && V_ip_random_id == 0)
 		ip_initid(8192);
 	/* We don't free memory when turning random ID off, due to race. */
-	V_ip_do_randomid = new;
+	V_ip_random_id = new;
 	return (0);
 }
 
@@ -238,7 +238,7 @@ ip_randomid(void)
 }
 
 void
-ip_fillid(struct ip *ip)
+ip_fillid(struct ip *ip, bool do_randomid)
 {
 
 	/*
@@ -249,7 +249,7 @@ ip_fillid(struct ip *ip)
 	 */
 	if (V_ip_rfc6864 && (ip->ip_off & htons(IP_DF)) == htons(IP_DF))
 		ip->ip_id = 0;
-	else if (V_ip_do_randomid)
+	else if (do_randomid)
 		ip->ip_id = ip_randomid();
 	else {
 		counter_u64_add(V_ip_id, 1);
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index 82d7acdd0710..4d614dfeb0a2 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -521,11 +521,6 @@ ip_input(struct mbuf *m)
 			goto bad;
 		}
 	}
-	/* The unspecified address can appear only as a src address - RFC1122 */
-	if (__predict_false(ntohl(ip->ip_dst.s_addr) == INADDR_ANY)) {
-		IPSTAT_INC(ips_badaddr);
-		goto bad;
-	}
 
 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
@@ -641,6 +636,17 @@ tooshort:
 		}
 	}
 passin:
+	/*
+	 * The unspecified address can appear only as a src address - RFC1122.
+	 *
+	 * The check is deferred to here to give firewalls a chance to block
+	 * (and log) such packets.  ip_tryforward() will not process such
+	 * packets.
+	 */
+	if (__predict_false(ntohl(ip->ip_dst.s_addr) == INADDR_ANY)) {
+		IPSTAT_INC(ips_badaddr);
+		goto bad;
+	}
 
 	/*
 	 * Process options and, if not destined for us,
@@ -783,9 +789,7 @@ passin:
 		 */
 		goto ours;
 	}
-	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
-		goto ours;
-	if (ip->ip_dst.s_addr == INADDR_ANY)
+	if (in_broadcast(ip->ip_dst))
 		goto ours;
 	/* RFC 3927 2.7: Do not forward packets to or from IN_LINKLOCAL. */
 	if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) ||
@@ -920,7 +924,7 @@ ip_forward(struct mbuf *m, int srcrt)
 
 	NET_EPOCH_ASSERT();
 
-	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
+	if (m->m_flags & (M_BCAST|M_MCAST) || !in_canforward(ip->ip_dst)) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 		return;
@@ -942,6 +946,18 @@ ip_forward(struct mbuf *m, int srcrt)
 	flowid = m->m_pkthdr.flowid;
 	ro.ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_REF, flowid);
 	if (ro.ro_nh != NULL) {
+		if (ro.ro_nh->nh_flags & (NHF_BLACKHOLE | NHF_BROADCAST)) {
+			IPSTAT_INC(ips_cantforward);
+			m_freem(m);
+			NH_FREE(ro.ro_nh);
+			return;
+		}
+		if (ro.ro_nh->nh_flags & NHF_REJECT) {
+			IPSTAT_INC(ips_cantforward);
+			NH_FREE(ro.ro_nh);
+			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
+			return;
+		}
 		ia = ifatoia(ro.ro_nh->nh_ifa);
 	} else
 		ia = NULL;
diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c
index b864a4db5abc..d30bd42ec578 100644
--- a/sys/netinet/ip_mroute.c
+++ b/sys/netinet/ip_mroute.c
@@ -139,6 +139,13 @@ static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast forwarding cache");
  * structures.
  */
 
+static struct sx __exclusive_cache_line mrouter_teardown;
+#define	MRW_TEARDOWN_WLOCK()	sx_xlock(&mrouter_teardown)
+#define	MRW_TEARDOWN_WUNLOCK()	sx_xunlock(&mrouter_teardown)
+#define	MRW_TEARDOWN_LOCK_INIT()				\
+	sx_init(&mrouter_teardown, "IPv4 multicast forwarding teardown")
+#define	MRW_TEARDOWN_LOCK_DESTROY()	sx_destroy(&mrouter_teardown)
+
 static struct rwlock mrouter_lock;
 #define	MRW_RLOCK()		rw_rlock(&mrouter_lock)
 #define	MRW_WLOCK()		rw_wlock(&mrouter_lock)
@@ -692,15 +699,18 @@ ip_mrouter_init(struct socket *so, int version)
 	if (version != 1)
 		return ENOPROTOOPT;
 
+	MRW_TEARDOWN_WLOCK();
 	MRW_WLOCK();
 
 	if (ip_mrouter_unloading) {
 		MRW_WUNLOCK();
+		MRW_TEARDOWN_WUNLOCK();
 		return ENOPROTOOPT;
 	}
 
 	if (V_ip_mrouter != NULL) {
 		MRW_WUNLOCK();
+		MRW_TEARDOWN_WUNLOCK();
 		return EADDRINUSE;
 	}
 
@@ -708,6 +718,7 @@ ip_mrouter_init(struct socket *so, int version)
 	    HASH_NOWAIT);
 	if (V_mfchashtbl == NULL) {
 		MRW_WUNLOCK();
+		MRW_TEARDOWN_WUNLOCK();
 		return (ENOMEM);
 	}
 
@@ -717,6 +728,7 @@ ip_mrouter_init(struct socket *so, int version)
 	    M_NOWAIT, &V_bw_upcalls_ring_mtx);
 	if (!V_bw_upcalls_ring) {
 		MRW_WUNLOCK();
+		MRW_TEARDOWN_WUNLOCK();
 		return (ENOMEM);
 	}
 
@@ -736,6 +748,7 @@ ip_mrouter_init(struct socket *so, int version)
 	mtx_init(&V_buf_ring_mtx, "mroute buf_ring mtx", NULL, MTX_DEF);
 
 	MRW_WUNLOCK();
+	MRW_TEARDOWN_WUNLOCK();
 
 	CTR1(KTR_IPMF, "%s: done", __func__);
 
@@ -754,8 +767,12 @@ X_ip_mrouter_done(void)
 	vifi_t vifi;
 	struct bw_upcall *bu;
 
-	if (V_ip_mrouter == NULL)
+	MRW_TEARDOWN_WLOCK();
+
+	if (V_ip_mrouter == NULL) {
+		MRW_TEARDOWN_WUNLOCK();
 		return (EINVAL);
+	}
 
 	/*
 	 * Detach/disable hooks to the reset of the system.
@@ -768,7 +785,7 @@ X_ip_mrouter_done(void)
 	 * Wait for all epoch sections to complete to ensure
 	 * V_ip_mrouter = NULL is visible to others.
 	 */
-	epoch_wait_preempt(net_epoch_preempt);
+	NET_EPOCH_WAIT();
 
 	/* Stop and drain task queue */
 	taskqueue_block(V_task_queue);
@@ -830,6 +847,7 @@ X_ip_mrouter_done(void)
 	mtx_destroy(&V_buf_ring_mtx);
 
 	MRW_WUNLOCK();
+	MRW_TEARDOWN_WUNLOCK();
 
 	/*
 	 * Now drop our claim on promiscuous multicast on the interfaces recorded
@@ -1311,6 +1329,8 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
 	u_long hash;
 	int hlen;
 
+	M_ASSERTMAPPED(m);
+
 	CTR3(KTR_IPMF, "ip_mforward: delete mfc orig 0x%08x group %lx ifp %p",
 	    ntohl(ip->ip_src.s_addr), (u_long)ntohl(ip->ip_dst.s_addr), ifp);
 
@@ -1562,6 +1582,7 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif)
 	vifi_t vifi;
 	int plen = ntohs(ip->ip_len);
 
+	M_ASSERTMAPPED(m);
 	MRW_LOCK_ASSERT();
 	NET_EPOCH_ASSERT();
 
@@ -1745,6 +1766,7 @@ phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m)
 	int hlen = ip->ip_hl << 2;
 
 	MRW_LOCK_ASSERT();
+	M_ASSERTMAPPED(m);
 
 	/*
 	 * Make a new reference to the packet; make sure that
@@ -2444,7 +2466,7 @@ pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy,
 	ip_outer->ip_tos = ip->ip_tos;
 	if (ip->ip_off & htons(IP_DF))
 		ip_outer->ip_off |= htons(IP_DF);
-	ip_fillid(ip_outer);
+	ip_fillid(ip_outer, V_ip_random_id);
 	pimhdr = (struct pim_encap_pimhdr *)((caddr_t)ip_outer
 			+ sizeof(pim_encap_iphdr));
 	*pimhdr = pim_encap_pimhdr;
@@ -2717,6 +2739,9 @@ sysctl_mfctable(SYSCTL_HANDLER_ARGS)
 		return (error);
 
 	MRW_RLOCK();
+	if (V_mfchashtbl == NULL)
+		goto out_locked;
+
 	for (i = 0; i < mfchashsize; i++) {
 		LIST_FOREACH(rt, &V_mfchashtbl[i], mfc_hash) {
 			error = SYSCTL_OUT(req, rt, sizeof(struct mfc));
@@ -2805,6 +2830,7 @@ ip_mroute_modevent(module_t mod, int type, void *unused)
 
 	switch (type) {
 	case MOD_LOAD:
+		MRW_TEARDOWN_LOCK_INIT();
 		MRW_LOCK_INIT();
 
 		if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
@@ -2876,6 +2902,7 @@ ip_mroute_modevent(module_t mod, int type, void *unused)
 		rsvp_input_p = NULL;
 
 		MRW_LOCK_DESTROY();
+		MRW_TEARDOWN_LOCK_DESTROY();
 		break;
 
 	default:
diff --git a/sys/netinet/ip_options.c b/sys/netinet/ip_options.c
index 41f77a7491f2..a9d6836d9e97 100644
--- a/sys/netinet/ip_options.c
+++ b/sys/netinet/ip_options.c
@@ -514,6 +514,8 @@ ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
 		*phlen = 0;
 		return (m);		/* XXX should fail */
 	}
+	KASSERT((m->m_flags & M_EXTPG) == 0, ("%s: mbuf %p is unmapped",
+	    __func__, m));
 	if (p->ipopt_dst.s_addr)
 		ip->ip_dst = p->ipopt_dst;
 	if (!M_WRITABLE(m) || M_LEADINGSPACE(m) < optlen) {
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index 28fb651a0bc9..ec6ba8d92015 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -112,13 +112,19 @@ ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, int flags,
 	struct mbuf *m;
 	struct in_addr odst;
 	struct ip *ip;
+	int ret;
 
 	m = *mp;
 	ip = mtod(m, struct ip *);
 
 	/* Run through list of hooks for output packets. */
 	odst.s_addr = ip->ip_dst.s_addr;
-	switch (pfil_mbuf_out(V_inet_pfil_head, mp, ifp, inp)) {
+	if (flags & IP_FORWARDING)
+		ret = pfil_mbuf_fwd(V_inet_pfil_head, mp, ifp, inp);
+	else
+		ret = pfil_mbuf_out(V_inet_pfil_head, mp, ifp, inp);
+
+	switch (ret) {
 	case PFIL_DROPPED:
 		*error = EACCES;
 		/* FALLTHROUGH */
@@ -323,7 +329,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
 	const struct sockaddr *gw;
 	struct in_ifaddr *ia = NULL;
 	struct in_addr src;
-	int isbroadcast;
+	bool isbroadcast;
 	uint16_t ip_len, ip_off;
 	struct route iproute;
 	uint32_t fibnum;
@@ -362,7 +368,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
 	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = hlen >> 2;
-		ip_fillid(ip);
+		ip_fillid(ip, V_ip_random_id);
 	} else {
 		/* Header already set, fetch hlen from there */
 		hlen = ip->ip_hl << 2;
@@ -428,7 +434,7 @@ again:
 		ifp = ia->ia_ifp;
 		mtu = ifp->if_mtu;
 		ip->ip_ttl = 1;
-		isbroadcast = 1;
+		isbroadcast = true;
 		src = IA_SIN(ia)->sin_addr;
 	} else if (flags & IP_ROUTETOIF) {
 		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
@@ -443,7 +449,8 @@ again:
 		mtu = ifp->if_mtu;
 		ip->ip_ttl = 1;
 		isbroadcast = ifp->if_flags & IFF_BROADCAST ?
-		    in_ifaddr_broadcast(dst->sin_addr, ia) : 0;
+		    (in_broadcast(ip->ip_dst) ||
+		    in_ifaddr_broadcast(dst->sin_addr, ia)) : 0;
 		src = IA_SIN(ia)->sin_addr;
 	} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
 	    imo != NULL && imo->imo_multicast_ifp != NULL) {
@@ -454,7 +461,7 @@ again:
 		ifp = imo->imo_multicast_ifp;
 		mtu = ifp->if_mtu;
 		IFP_TO_IA(ifp, ia);
-		isbroadcast = 0;	/* fool gcc */
+		isbroadcast = false;
 		/* Interface may have no addresses. */
 		if (ia != NULL)
 			src = IA_SIN(ia)->sin_addr;
@@ -496,10 +503,13 @@ again:
 			gw = &nh->gw_sa;
 		if (nh->nh_flags & NHF_HOST)
 			isbroadcast = (nh->nh_flags & NHF_BROADCAST);
-		else if ((ifp->if_flags & IFF_BROADCAST) && (gw->sa_family == AF_INET))
-			isbroadcast = in_ifaddr_broadcast(((const struct sockaddr_in *)gw)->sin_addr, ia);
+		else if ((ifp->if_flags & IFF_BROADCAST) &&
+		    (gw->sa_family == AF_INET))
+			isbroadcast = in_broadcast(ip->ip_dst) ||
+			    in_ifaddr_broadcast(
+			    ((const struct sockaddr_in *)gw)->sin_addr, ia);
 		else
-			isbroadcast = 0;
+			isbroadcast = false;
 		mtu = nh->nh_mtu;
 		src = IA_SIN(ia)->sin_addr;
 	} else {
@@ -527,11 +537,12 @@ again:
 			gw = &nh->gw_sa;
 		ia = ifatoia(nh->nh_ifa);
 		src = IA_SIN(ia)->sin_addr;
-		isbroadcast = (((nh->nh_flags & (NHF_HOST | NHF_BROADCAST)) ==
+		isbroadcast = ((nh->nh_flags & (NHF_HOST | NHF_BROADCAST)) ==
 		    (NHF_HOST | NHF_BROADCAST)) ||
 		    ((ifp->if_flags & IFF_BROADCAST) &&
 		    (gw->sa_family == AF_INET) &&
-		    in_ifaddr_broadcast(((const struct sockaddr_in *)gw)->sin_addr, ia)));
+		    (in_broadcast(ip->ip_dst) || in_ifaddr_broadcast(
+		    ((const struct sockaddr_in *)gw)->sin_addr, ia)));
 	}
 
 	/* Catch a possible divide by zero later. */
@@ -667,18 +678,19 @@ again:
 sendit:
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	if (IPSEC_ENABLED(ipv4)) {
-		m = mb_unmapped_to_ext(m);
-		if (m == NULL) {
-			IPSTAT_INC(ips_odropped);
-			error = ENOBUFS;
-			goto bad;
-		}
-		if ((error = IPSEC_OUTPUT(ipv4, m, inp)) != 0) {
+		struct ip ip_hdr;
+
+		if ((error = IPSEC_OUTPUT(ipv4, ifp, m, inp, mtu)) != 0) {
 			if (error == EINPROGRESS)
 				error = 0;
 			goto done;
 		}
+
+		/* Update variables that are affected by ipsec4_output(). */
+		m_copydata(m, 0, sizeof(ip_hdr), (char *)&ip_hdr);
+		hlen = ip_hdr.ip_hl << 2;
 	}
+
 	/*
 	 * Check if there was a route for this packet; return error if not.
 	 */
@@ -687,9 +699,6 @@ sendit:
 		error = EHOSTUNREACH;
 		goto bad;
 	}
-	/* Update variables that are affected by ipsec4_output(). */
-	ip = mtod(m, struct ip *);
-	hlen = ip->ip_hl << 2;
 #endif /* IPSEC */
 
 	/* Jump over all PFIL processing if hooks are not active. */
@@ -731,11 +740,20 @@ sendit:
 
 	/* Ensure the packet data is mapped if the interface requires it. */
 	if ((ifp->if_capenable & IFCAP_MEXTPG) == 0) {
-		m = mb_unmapped_to_ext(m);
-		if (m == NULL) {
+		struct mbuf *m1;
+
+		error = mb_unmapped_to_ext(m, &m1);
+		if (error != 0) {
+			if (error == EINVAL) {
+				if_printf(ifp, "TLS packet\n");
+				/* XXXKIB */
+			} else if (error == ENOMEM) {
+				error = ENOBUFS;
+			}
 			IPSTAT_INC(ips_odropped);
-			error = ENOBUFS;
-			goto bad;
+			goto done;
+		} else {
+			m = m1;
 		}
 	}
 
@@ -841,7 +859,7 @@ sendit:
 
 done:
 	return (error);
- bad:
+bad:
 	m_freem(m);
 	goto done;
 }
@@ -1081,10 +1099,22 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
 		    sopt->sopt_dir == SOPT_SET) {
 			switch (sopt->sopt_name) {
 			case SO_SETFIB:
+				error = sooptcopyin(sopt, &optval,
+				    sizeof(optval), sizeof(optval));
+				if (error != 0)
+					break;
+
 				INP_WLOCK(inp);
-				inp->inp_inc.inc_fibnum = so->so_fibnum;
+				if ((inp->inp_flags & INP_BOUNDFIB) != 0 &&
+				    optval != so->so_fibnum) {
+					INP_WUNLOCK(inp);
+					error = EISCONN;
+					break;
+				}
+				error = sosetfib(inp->inp_socket, optval);
+				if (error == 0)
+					inp->inp_inc.inc_fibnum = optval;
 				INP_WUNLOCK(inp);
-				error = 0;
 				break;
 			case SO_MAX_PACING_RATE:
 #ifdef RATELIMIT
diff --git a/sys/netinet/ip_reass.c b/sys/netinet/ip_reass.c
index a95780aa2f27..177069f5e010 100644
--- a/sys/netinet/ip_reass.c
+++ b/sys/netinet/ip_reass.c
@@ -670,6 +670,11 @@ ipreass_drain(void)
 	VNET_LIST_RUNLOCK();
 }
 
+static void
+ipreass_drain_lowmem(void *arg __unused, int flags __unused)
+{
+	ipreass_drain();
+}
 
 /*
  * Initialize IP reassembly structures.
@@ -711,10 +716,10 @@ ipreass_init(void)
 	maxfrags = IP_MAXFRAGS;
 	EVENTHANDLER_REGISTER(nmbclusters_change, ipreass_zone_change,
 	    NULL, EVENTHANDLER_PRI_ANY);
-	EVENTHANDLER_REGISTER(vm_lowmem, ipreass_drain, NULL,
+	EVENTHANDLER_REGISTER(vm_lowmem, ipreass_drain_lowmem, NULL,
+	    LOWMEM_PRI_DEFAULT);
+	EVENTHANDLER_REGISTER(mbuf_lowmem, ipreass_drain_lowmem, NULL,
 	    LOWMEM_PRI_DEFAULT);
-	EVENTHANDLER_REGISTER(mbuf_lowmem, ipreass_drain, NULL,
-		LOWMEM_PRI_DEFAULT);
 }
 
 /*
diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h
index 0f2ed8c43e64..f782ebc53eb0 100644
--- a/sys/netinet/ip_var.h
+++ b/sys/netinet/ip_var.h
@@ -47,7 +47,7 @@ struct ipovly {
 	u_short	ih_len;			/* protocol length */
 	struct	in_addr ih_src;		/* source internet address */
 	struct	in_addr ih_dst;		/* destination internet address */
-};
+} __packed;
 
 #ifdef _KERNEL
 /*
@@ -204,6 +204,7 @@ extern int	(*legal_vif_num)(int);
 extern u_long	(*ip_mcast_src)(int);
 VNET_DECLARE(int, rsvp_on);
 VNET_DECLARE(int, drop_redirect);
+VNET_DECLARE(int, ip_random_id);
 
 #define	V_ip_id			VNET(ip_id)
 #define	V_ip_defttl		VNET(ip_defttl)
@@ -216,6 +217,7 @@ VNET_DECLARE(int, drop_redirect);
 #define	V_ip_mrouter		VNET(ip_mrouter)
 #define	V_rsvp_on		VNET(rsvp_on)
 #define	V_drop_redirect		VNET(drop_redirect)
+#define	V_ip_random_id		VNET(ip_random_id)
 
 void	inp_freemoptions(struct ip_moptions *);
 int	inp_getmoptions(struct inpcb *, struct sockopt *);
@@ -235,7 +237,7 @@ struct mbuf *
 	ip_reass(struct mbuf *);
 void	ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
 	    struct mbuf *);
-void	ip_fillid(struct ip *);
+void	ip_fillid(struct ip *, bool);
 int	rip_ctloutput(struct socket *, struct sockopt *);
 int	ipip_input(struct mbuf **, int *, int);
 int	rsvp_input(struct mbuf **, int *, int);
diff --git a/sys/netinet/libalias/alias.c b/sys/netinet/libalias/alias.c
index 9bf6b82b9369..6758813f6a21 100644
--- a/sys/netinet/libalias/alias.c
+++ b/sys/netinet/libalias/alias.c
@@ -183,12 +183,12 @@ a timeout period.
 */
 
 /* Local prototypes */
-static void	TcpMonitorIn(u_char, struct alias_link *);
+static void	TcpMonitorIn(uint16_t, struct alias_link *);
 
-static void	TcpMonitorOut(u_char, struct alias_link *);
+static void	TcpMonitorOut(uint16_t, struct alias_link *);
 
 static void
-TcpMonitorIn(u_char th_flags, struct alias_link *lnk)
+TcpMonitorIn(uint16_t th_flags, struct alias_link *lnk)
 {
 	switch (GetStateIn(lnk)) {
 	case ALIAS_TCP_STATE_NOT_CONNECTED:
@@ -205,7 +205,7 @@ TcpMonitorIn(u_char th_flags, struct alias_link *lnk)
 }
 
 static void
-TcpMonitorOut(u_char th_flags, struct alias_link *lnk)
+TcpMonitorOut(uint16_t th_flags, struct alias_link *lnk)
 {
 	switch (GetStateOut(lnk)) {
 	case ALIAS_TCP_STATE_NOT_CONNECTED:
@@ -290,13 +290,14 @@ IcmpAliasIn1(struct libalias *la, struct ip *pip)
 {
 	struct alias_link *lnk;
 	struct icmp *ic;
+	int ret;
 
 	LIBALIAS_LOCK_ASSERT(la);
 	ic = (struct icmp *)ip_next(pip);
 
 	/* Get source address from ICMP data field and restore original data */
-	lnk = FindIcmpIn(la, pip->ip_src, pip->ip_dst, ic->icmp_id, 1);
-	if (lnk != NULL) {
+	ret = FindIcmpIn(la, pip->ip_src, pip->ip_dst, ic->icmp_id, 1, &lnk);
+	if (ret == PKT_ALIAS_OK) {
 		u_short original_id;
 		int accumulate;
 
@@ -319,10 +320,8 @@ IcmpAliasIn1(struct libalias *la, struct ip *pip)
 			    &original_address, &pip->ip_dst, 2);
 			pip->ip_dst = original_address;
 		}
-
-		return (PKT_ALIAS_OK);
 	}
-	return (PKT_ALIAS_IGNORED);
+	return (ret);
 }
 
 /*
@@ -337,6 +336,7 @@ IcmpAliasIn2(struct libalias *la, struct ip *pip)
 	struct udphdr *ud;
 	struct tcphdr *tc;
 	struct alias_link *lnk;
+	int ret;
 
 	LIBALIAS_LOCK_ASSERT(la);
 	ic = (struct icmp *)ip_next(pip);
@@ -346,18 +346,26 @@ IcmpAliasIn2(struct libalias *la, struct ip *pip)
 	tc = (struct tcphdr *)ip_next(ip);
 	ic2 = (struct icmp *)ip_next(ip);
 
-	if (ip->ip_p == IPPROTO_UDP)
-		lnk = FindUdpTcpIn(la, ip->ip_dst, ip->ip_src,
+	if (ip->ip_p == IPPROTO_UDP) {
+		ret = FindUdpTcpIn(la, ip->ip_dst, ip->ip_src,
 		    ud->uh_dport, ud->uh_sport,
-		    IPPROTO_UDP, 0);
-	else if (ip->ip_p == IPPROTO_TCP)
-		lnk = FindUdpTcpIn(la, ip->ip_dst, ip->ip_src,
+		    IPPROTO_UDP, 0, &lnk);
+		if (ret != PKT_ALIAS_OK)
+			return (ret);
+	} else if (ip->ip_p == IPPROTO_TCP) {
+		ret = FindUdpTcpIn(la, ip->ip_dst, ip->ip_src,
 		    tc->th_dport, tc->th_sport,
-		    IPPROTO_TCP, 0);
-	else if (ip->ip_p == IPPROTO_ICMP) {
-		if (ic2->icmp_type == ICMP_ECHO || ic2->icmp_type == ICMP_TSTAMP)
-			lnk = FindIcmpIn(la, ip->ip_dst, ip->ip_src, ic2->icmp_id, 0);
-		else
+		    IPPROTO_TCP, 0, &lnk);
+		if (ret != PKT_ALIAS_OK)
+			return (ret);
+	} else if (ip->ip_p == IPPROTO_ICMP) {
+		if (ic2->icmp_type == ICMP_ECHO ||
+		    ic2->icmp_type == ICMP_TSTAMP) {
+			ret = FindIcmpIn(la, ip->ip_dst, ip->ip_src,
+			    ic2->icmp_id, 0, &lnk);
+			if (ret != PKT_ALIAS_OK)
+				return (ret);
+		} else
 			lnk = NULL;
 	} else
 		lnk = NULL;
@@ -479,13 +487,15 @@ IcmpAliasOut1(struct libalias *la, struct ip *pip, int create)
 {
 	struct alias_link *lnk;
 	struct icmp *ic;
+	int ret;
 
 	LIBALIAS_LOCK_ASSERT(la);
 	ic = (struct icmp *)ip_next(pip);
 
 	/* Save overwritten data for when echo packet returns */
-	lnk = FindIcmpOut(la, pip->ip_src, pip->ip_dst, ic->icmp_id, create);
-	if (lnk != NULL) {
+	ret = FindIcmpOut(la, pip->ip_src, pip->ip_dst, ic->icmp_id, create,
+	    &lnk);
+	if (ret == PKT_ALIAS_OK) {
 		u_short alias_id;
 		int accumulate;
 
@@ -508,10 +518,8 @@ IcmpAliasOut1(struct libalias *la, struct ip *pip, int create)
 			    &alias_address, &pip->ip_src, 2);
 			pip->ip_src = alias_address;
 		}
-
-		return (PKT_ALIAS_OK);
 	}
-	return (PKT_ALIAS_IGNORED);
+	return (ret);
 }
 
 /*
@@ -526,6 +534,7 @@ IcmpAliasOut2(struct libalias *la, struct ip *pip)
 	struct udphdr *ud;
 	struct tcphdr *tc;
 	struct alias_link *lnk;
+	int ret;
 
 	LIBALIAS_LOCK_ASSERT(la);
 	ic = (struct icmp *)ip_next(pip);
@@ -535,18 +544,26 @@ IcmpAliasOut2(struct libalias *la, struct ip *pip)
 	tc = (struct tcphdr *)ip_next(ip);
 	ic2 = (struct icmp *)ip_next(ip);
 
-	if (ip->ip_p == IPPROTO_UDP)
-		lnk = FindUdpTcpOut(la, ip->ip_dst, ip->ip_src,
+	if (ip->ip_p == IPPROTO_UDP) {
+		ret = FindUdpTcpOut(la, ip->ip_dst, ip->ip_src,
 		    ud->uh_dport, ud->uh_sport,
-		    IPPROTO_UDP, 0);
-	else if (ip->ip_p == IPPROTO_TCP)
-		lnk = FindUdpTcpOut(la, ip->ip_dst, ip->ip_src,
+		    IPPROTO_UDP, 0, &lnk);
+		if (ret != PKT_ALIAS_OK)
+			return (ret);
+	} else if (ip->ip_p == IPPROTO_TCP) {
+		ret = FindUdpTcpOut(la, ip->ip_dst, ip->ip_src,
 		    tc->th_dport, tc->th_sport,
-		    IPPROTO_TCP, 0);
-	else if (ip->ip_p == IPPROTO_ICMP) {
-		if (ic2->icmp_type == ICMP_ECHO || ic2->icmp_type == ICMP_TSTAMP)
-			lnk = FindIcmpOut(la, ip->ip_dst, ip->ip_src, ic2->icmp_id, 0);
-		else
+		    IPPROTO_TCP, 0, &lnk);
+		if (ret != PKT_ALIAS_OK)
+			return (ret);
+	} else if (ip->ip_p == IPPROTO_ICMP) {
+		if (ic2->icmp_type == ICMP_ECHO ||
+		    ic2->icmp_type == ICMP_TSTAMP) {
+			ret = FindIcmpOut(la, ip->ip_dst, ip->ip_src,
+			    ic2->icmp_id, 0, &lnk);
+			if (ret != PKT_ALIAS_OK)
+				return (ret);
+		} else
 			lnk = NULL;
 	} else
 		lnk = NULL;
@@ -661,14 +678,15 @@ ProtoAliasIn(struct libalias *la, struct in_addr ip_src,
     struct ip *pip, u_char ip_p, u_short *ip_sum)
 {
 	struct alias_link *lnk;
+	int ret;
 
 	LIBALIAS_LOCK_ASSERT(la);
 	/* Return if proxy-only mode is enabled */
 	if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY)
 		return (PKT_ALIAS_OK);
 
-	lnk = FindProtoIn(la, ip_src, pip->ip_dst, ip_p);
-	if (lnk != NULL) {
+	ret = FindProtoIn(la, ip_src, pip->ip_dst, ip_p, &lnk);
+	if (ret == PKT_ALIAS_OK) {
 		struct in_addr original_address;
 
 		original_address = GetOriginalAddress(lnk);
@@ -677,10 +695,8 @@ ProtoAliasIn(struct libalias *la, struct in_addr ip_src,
 		DifferentialChecksum(ip_sum,
 		    &original_address, &pip->ip_dst, 2);
 		pip->ip_dst = original_address;
-
-		return (PKT_ALIAS_OK);
 	}
-	return (PKT_ALIAS_IGNORED);
+	return (ret);
 }
 
 /*
@@ -693,6 +709,7 @@ ProtoAliasOut(struct libalias *la, struct ip *pip,
     struct in_addr ip_dst, u_char ip_p, u_short *ip_sum, int create)
 {
 	struct alias_link *lnk;
+	int ret;
 
 	LIBALIAS_LOCK_ASSERT(la);
 
@@ -703,8 +720,8 @@ ProtoAliasOut(struct libalias *la, struct ip *pip,
 	if (!create)
 		return (PKT_ALIAS_IGNORED);
 
-	lnk = FindProtoOut(la, pip->ip_src, ip_dst, ip_p);
-	if (lnk != NULL) {
+	ret = FindProtoOut(la, pip->ip_src, ip_dst, ip_p, &lnk);
+	if (ret == PKT_ALIAS_OK) {
 		struct in_addr alias_address;
 
 		alias_address = GetAliasAddress(lnk);
@@ -713,10 +730,8 @@ ProtoAliasOut(struct libalias *la, struct ip *pip,
 		DifferentialChecksum(ip_sum,
 		    &alias_address, &pip->ip_src, 2);
 		pip->ip_src = alias_address;
-
-		return (PKT_ALIAS_OK);
 	}
-	return (PKT_ALIAS_IGNORED);
+	return (ret);
 }
 
 #define MF_ISSET(_pip) (ntohs((_pip)->ip_off) & IP_MF)
@@ -745,6 +760,7 @@ UdpAliasIn(struct libalias *la, struct ip *pip)
 {
 	struct udphdr *ud;
 	struct alias_link *lnk;
+	int ret;
 
 	LIBALIAS_LOCK_ASSERT(la);
 
@@ -752,10 +768,12 @@ UdpAliasIn(struct libalias *la, struct ip *pip)
 	if (ud == NULL)
 		return (PKT_ALIAS_IGNORED);
 
-	lnk = FindUdpTcpIn(la, pip->ip_src, pip->ip_dst,
+	ret = FindUdpTcpIn(la, pip->ip_src, pip->ip_dst,
 	    ud->uh_sport, ud->uh_dport,
-	    IPPROTO_UDP, !(la->packetAliasMode & PKT_ALIAS_PROXY_ONLY));
-	if (lnk != NULL) {
+	    IPPROTO_UDP, !(la->packetAliasMode & PKT_ALIAS_PROXY_ONLY), &lnk);
+	if (ret != PKT_ALIAS_OK)
+		return (ret);
+	{
 		struct in_addr alias_address;
 		struct in_addr original_address;
 		struct in_addr proxy_address;
@@ -828,7 +846,6 @@ UdpAliasIn(struct libalias *la, struct ip *pip)
 
 		return (PKT_ALIAS_OK);
 	}
-	return (PKT_ALIAS_IGNORED);
 }
 
 static int
@@ -840,7 +857,7 @@ UdpAliasOut(struct libalias *la, struct ip *pip, int maxpacketsize, int create)
 	struct in_addr proxy_server_address;
 	u_short dest_port;
 	u_short proxy_server_port;
-	int proxy_type;
+	int proxy_type, ret;
 
 	LIBALIAS_LOCK_ASSERT(la);
 
@@ -877,10 +894,12 @@ UdpAliasOut(struct libalias *la, struct ip *pip, int maxpacketsize, int create)
 		pip->ip_dst = proxy_server_address;
 		ud->uh_dport = proxy_server_port;
 	}
-	lnk = FindUdpTcpOut(la, pip->ip_src, pip->ip_dst,
+	ret = FindUdpTcpOut(la, pip->ip_src, pip->ip_dst,
 	    ud->uh_sport, ud->uh_dport,
-	    IPPROTO_UDP, create);
-	if (lnk != NULL) {
+	    IPPROTO_UDP, create, &lnk);
+	if (ret != PKT_ALIAS_OK)
+		return (ret);
+	{
 		u_short alias_port;
 		struct in_addr alias_address;
 		struct alias_data ad = {
@@ -930,7 +949,6 @@ UdpAliasOut(struct libalias *la, struct ip *pip, int maxpacketsize, int create)
 
 		return (PKT_ALIAS_OK);
 	}
-	return (PKT_ALIAS_IGNORED);
 }
 
 static int
@@ -939,6 +957,7 @@ TcpAliasIn(struct libalias *la, struct ip *pip)
 	struct tcphdr *tc;
 	struct alias_link *lnk;
 	size_t dlen;
+	int ret;
 
 	LIBALIAS_LOCK_ASSERT(la);
 
@@ -947,11 +966,12 @@ TcpAliasIn(struct libalias *la, struct ip *pip)
 		return (PKT_ALIAS_IGNORED);
 	tc = (struct tcphdr *)ip_next(pip);
 
-	lnk = FindUdpTcpIn(la, pip->ip_src, pip->ip_dst,
+	ret = FindUdpTcpIn(la, pip->ip_src, pip->ip_dst,
 	    tc->th_sport, tc->th_dport,
 	    IPPROTO_TCP,
-	    !(la->packetAliasMode & PKT_ALIAS_PROXY_ONLY));
-	if (lnk != NULL) {
+	    !(la->packetAliasMode & PKT_ALIAS_PROXY_ONLY),
+	    &lnk);
+	if (ret == PKT_ALIAS_OK) {
 		struct in_addr alias_address;
 		struct in_addr original_address;
 		struct in_addr proxy_address;
@@ -1053,17 +1073,17 @@ TcpAliasIn(struct libalias *la, struct ip *pip)
 
 		/* Monitor TCP connection state */
 		tc = (struct tcphdr *)ip_next(pip);
-		TcpMonitorIn(tc->th_flags, lnk);
+		TcpMonitorIn(__tcp_get_flags(tc), lnk);
 
 		return (PKT_ALIAS_OK);
 	}
-	return (PKT_ALIAS_IGNORED);
+	return (ret);
 }
 
 static int
 TcpAliasOut(struct libalias *la, struct ip *pip, int maxpacketsize, int create)
 {
-	int proxy_type;
+	int proxy_type, ret;
 	u_short dest_port;
 	u_short proxy_server_port;
 	size_t dlen;
@@ -1108,12 +1128,12 @@ TcpAliasOut(struct libalias *la, struct ip *pip, int maxpacketsize, int create)
 		accumulate -= twowords(&pip->ip_dst);
 		ADJUST_CHECKSUM(accumulate, pip->ip_sum);
 	}
-	lnk = FindUdpTcpOut(la, pip->ip_src, pip->ip_dst,
+	ret = FindUdpTcpOut(la, pip->ip_src, pip->ip_dst,
 	    tc->th_sport, tc->th_dport,
-	    IPPROTO_TCP, create);
-	if (lnk == NULL)
-		return (PKT_ALIAS_IGNORED);
-	if (lnk != NULL) {
+	    IPPROTO_TCP, create, &lnk);
+	if (ret != PKT_ALIAS_OK)
+		return (ret);
+	{
 		u_short alias_port;
 		struct in_addr alias_address;
 		int accumulate;
@@ -1142,7 +1162,7 @@ TcpAliasOut(struct libalias *la, struct ip *pip, int maxpacketsize, int create)
 
 		/* Monitor TCP connection state */
 		tc = (struct tcphdr *)ip_next(pip);
-		TcpMonitorOut(tc->th_flags, lnk);
+		TcpMonitorOut(__tcp_get_flags(tc), lnk);
 
 		/* Walk out chain. */
 		find_handler(OUT, TCP, la, pip, &ad);
@@ -1177,7 +1197,6 @@ TcpAliasOut(struct libalias *la, struct ip *pip, int maxpacketsize, int create)
 
 		return (PKT_ALIAS_OK);
 	}
-	return (PKT_ALIAS_IGNORED);
 }
 
 /* Fragment Handling
@@ -1581,17 +1600,24 @@ LibAliasUnaliasOut(struct libalias *la,
 	ic = (struct icmp *)ip_next(pip);
 
 	/* Find a link */
-	if (pip->ip_p == IPPROTO_UDP)
-		lnk = FindUdpTcpIn(la, pip->ip_dst, pip->ip_src,
+	if (pip->ip_p == IPPROTO_UDP) {
+		iresult = FindUdpTcpIn(la, pip->ip_dst, pip->ip_src,
 		    ud->uh_dport, ud->uh_sport,
-		    IPPROTO_UDP, 0);
-	else if (pip->ip_p == IPPROTO_TCP)
-		lnk = FindUdpTcpIn(la, pip->ip_dst, pip->ip_src,
+		    IPPROTO_UDP, 0, &lnk);
+		if (iresult != PKT_ALIAS_OK)
+			goto getout;
+	} else if (pip->ip_p == IPPROTO_TCP) {
+		iresult = FindUdpTcpIn(la, pip->ip_dst, pip->ip_src,
 		    tc->th_dport, tc->th_sport,
-		    IPPROTO_TCP, 0);
-	else if (pip->ip_p == IPPROTO_ICMP)
-		lnk = FindIcmpIn(la, pip->ip_dst, pip->ip_src, ic->icmp_id, 0);
-	else
+		    IPPROTO_TCP, 0, &lnk);
+		if (iresult != PKT_ALIAS_OK)
+			goto getout;
+	} else if (pip->ip_p == IPPROTO_ICMP) {
+		iresult = FindIcmpIn(la, pip->ip_dst, pip->ip_src,
+		    ic->icmp_id, 0, &lnk);
+		if (iresult != PKT_ALIAS_OK)
+			goto getout;
+	} else
 		lnk = NULL;
 
 	/* Change it from an aliased packet to an unaliased packet */
diff --git a/sys/netinet/libalias/alias.h b/sys/netinet/libalias/alias.h
index 706184552429..96d8ceec28be 100644
--- a/sys/netinet/libalias/alias.h
+++ b/sys/netinet/libalias/alias.h
@@ -227,6 +227,26 @@ struct mbuf    *m_megapullup(struct mbuf *, int);
  */
 #define	PKT_ALIAS_UNREGISTERED_CGN	0x400
 
+/*
+ * When this bit is set, UDP uses endpoint-independent mapping (EIM), as per
+ * RFC 4787 ("full cone" NAT of RFC 3489). All packets from the same internal
+ * address:port are mapped to the same NAT address:port, regardless of their
+ * destination address:port. If filtering rules allow, and if
+ * PKT_ALIAS_DENY_INCOMING is unset, any other external address:port can also
+ * send to the internal address:port through its mapped NAT address:port. This
+ * is more compatible with applications, and can reduce the need for port
+ * forwarding, but less scalable as each NAT address:port can only be
+ * concurrently used by at most one internal address:port.
+ *
+ * When this bit is unset, UDP packets use endpoint-dependent mapping (EDM)
+ * ("symmetric" NAT). Each connection from a particular internal address:port
+ * to different external addresses:ports is mapped to a random and
+ * unpredictable NAT address:port. Two appplications behind EDM NATs can only
+ * connect to each other by port forwarding on the NAT, or tunnelling through
+ * an in-between server.
+ */
+#define PKT_ALIAS_UDP_EIM		0x800
+
 /* Function return codes. */
 #define	PKT_ALIAS_ERROR			-1
 #define	PKT_ALIAS_OK			1
diff --git a/sys/netinet/libalias/alias_db.c b/sys/netinet/libalias/alias_db.c
index 167201fa1b8f..c143d74a2f45 100644
--- a/sys/netinet/libalias/alias_db.c
+++ b/sys/netinet/libalias/alias_db.c
@@ -28,13 +28,13 @@
 
 #include <sys/cdefs.h>
 #ifdef _KERNEL
-#include <machine/stdarg.h>
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/rwlock.h>
+#include <sys/stdarg.h>
 #include <sys/syslog.h>
 #else
 #include <stdarg.h>
@@ -93,6 +93,8 @@ DECLARE_MODULE(alias, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
 
 SPLAY_GENERATE(splay_out, alias_link, all.out, cmp_out);
 SPLAY_GENERATE(splay_in, group_in, in, cmp_in);
+SPLAY_GENERATE(splay_internal_endpoint, alias_link, all.internal_endpoint,
+    cmp_internal_endpoint);
 
 static struct group_in *
 StartPointIn(struct libalias *la,
@@ -235,6 +237,19 @@ GetNewPort(struct libalias *la, struct alias_link *lnk, int alias_port_param)
 
 	max_trials = GET_NEW_PORT_MAX_ATTEMPTS;
 
+	if ((la->packetAliasMode & PKT_ALIAS_UDP_EIM) &&
+	    lnk->link_type == LINK_UDP) {
+		/* Try reuse the same alias address:port for all destinations
+		 * from the same internal address:port, as per RFC 4787.
+		 */
+		struct alias_link *search_result = FindLinkByInternalEndpoint(
+		    la, lnk->src_addr, lnk->src_port, lnk->link_type);
+		if (search_result != NULL) {
+			lnk->alias_port = search_result->alias_port;
+			return (0);
+		}
+	}
+
 	/*
 	 * When the PKT_ALIAS_SAME_PORTS option is chosen,
 	 * the first try will be the actual source port. If
@@ -254,10 +269,18 @@ GetNewPort(struct libalias *la, struct alias_link *lnk, int alias_port_param)
 		if (grp == NULL)
 			break;
 
+		/* As per RFC 4787, UDP cannot share the same alias port among
+		 * multiple internal endpoints
+		 */
+		if ((la->packetAliasMode & PKT_ALIAS_UDP_EIM) &&
+		    lnk->link_type == LINK_UDP)
+			continue;
+
 		LIST_FOREACH(search_result, &grp->full, all.in) {
-			if (lnk->dst_addr.s_addr == search_result->dst_addr.s_addr &&
+			if (lnk->dst_addr.s_addr ==
+			    search_result->dst_addr.s_addr &&
 			    lnk->dst_port == search_result->dst_port)
-			    break;     /* found match */
+				break;     /* found match */
 		}
 		if (search_result == NULL)
 			break;
@@ -496,6 +519,10 @@ DeleteLink(struct alias_link **plnk, int deletePermanent)
 		/* Adjust input table pointers */
 		LIST_REMOVE(lnk, all.in);
 
+		/* Adjust "internal endpoint" table pointer */
+		SPLAY_REMOVE(splay_internal_endpoint,
+		    &la->linkSplayInternalEndpoint, lnk);
+
 		/* Remove intermediate node, if empty */
 		grp = StartPointIn(la, lnk->alias_addr, lnk->alias_port, lnk->link_type, 0);
 		if (grp != NULL &&
@@ -696,6 +723,10 @@ AddLink(struct libalias *la, struct in_addr src_addr, struct in_addr dst_addr,
 			LIST_INSERT_HEAD(&grp->partial, lnk, all.in);
 		else
 			LIST_INSERT_HEAD(&grp->full, lnk, all.in);
+
+		/* Set up pointers for "internal endpoint" lookup table */
+		SPLAY_INSERT(splay_internal_endpoint,
+		    &la->linkSplayInternalEndpoint, lnk);
 	}
 		break;
 	}
@@ -868,8 +899,18 @@ _FindLinkIn(struct libalias *la, struct in_addr dst_addr,
 	case 0:
 		LIST_FOREACH(lnk, &grp->full, all.in) {
 			if (lnk->dst_addr.s_addr == dst_addr.s_addr &&
-			    lnk->dst_port == dst_port)
-				return (UseLink(la, lnk));
+			    lnk->dst_port == dst_port) {
+				struct alias_link *found;
+
+				found = UseLink(la, lnk);
+				if (found != NULL)
+					return (found);
+				/* link expired */
+				grp = StartPointIn(la, alias_addr, alias_port, link_type, 0);
+				if (grp == NULL)
+					return (NULL);
+				break;
+			}
 		}
 		break;
 	case LINK_UNKNOWN_DEST_PORT:
@@ -954,6 +995,14 @@ FindLinkIn(struct libalias *la, struct in_addr dst_addr,
 	lnk = _FindLinkIn(la, dst_addr, alias_addr, dst_port, alias_port,
 	    link_type, replace_partial_links);
 
+	if (lnk == NULL &&
+	    (la->packetAliasMode & PKT_ALIAS_UDP_EIM) &&
+	    link_type == LINK_UDP &&
+	    !(la->packetAliasMode & PKT_ALIAS_DENY_INCOMING)) {
+		lnk = _FindLinkIn(la, ANY_ADDR, alias_addr, 0, alias_port,
+		    link_type, replace_partial_links);
+	}
+
 	if (lnk == NULL) {
 		/*
 		 * The following allows permanent links to be specified as
@@ -970,6 +1019,20 @@ FindLinkIn(struct libalias *la, struct in_addr dst_addr,
 	return (lnk);
 }
 
+static struct alias_link *
+FindLinkByInternalEndpoint(struct libalias *la, struct in_addr src_addr,
+    u_short src_port,
+    int link_type)
+{
+	struct alias_link needle = {
+		.src_addr = src_addr,
+		.src_port = src_port,
+		.link_type = link_type
+	};
+	LIBALIAS_LOCK_ASSERT(la);
+	return SPLAY_FIND(splay_internal_endpoint, &la->linkSplayInternalEndpoint, &needle);
+}
+
 /* External routines for finding/adding links
 
 -- "external" means outside alias_db.c, but within alias*.c --
@@ -986,15 +1049,19 @@ FindLinkIn(struct libalias *la, struct in_addr dst_addr,
 (prototypes in alias_local.h)
 */
 
-struct alias_link *
+int
 FindIcmpIn(struct libalias *la, struct in_addr dst_addr,
     struct in_addr alias_addr,
     u_short id_alias,
-    int create)
+    int create,
+    struct alias_link **lnkp)
 {
 	struct alias_link *lnk;
 
 	LIBALIAS_LOCK_ASSERT(la);
+
+	*lnkp = NULL;
+
 	lnk = FindLinkIn(la, dst_addr, alias_addr,
 	    NO_DEST_PORT, id_alias,
 	    LINK_ICMP, 0);
@@ -1005,19 +1072,26 @@ FindIcmpIn(struct libalias *la, struct in_addr dst_addr,
 		lnk = AddLink(la, target_addr, dst_addr, alias_addr,
 		    id_alias, NO_DEST_PORT, id_alias,
 		    LINK_ICMP);
+		if (lnk == NULL)
+			return (PKT_ALIAS_ERROR);
 	}
-	return (lnk);
+	*lnkp = lnk;
+	return (lnk != NULL ? PKT_ALIAS_OK : PKT_ALIAS_IGNORED);
 }
 
-struct alias_link *
+int
 FindIcmpOut(struct libalias *la, struct in_addr src_addr,
     struct in_addr dst_addr,
     u_short id,
-    int create)
+    int create,
+    struct alias_link **lnkp)
 {
 	struct alias_link *lnk;
 
 	LIBALIAS_LOCK_ASSERT(la);
+
+	*lnkp = NULL;
+
 	lnk = FindLinkOut(la, src_addr, dst_addr,
 	    id, NO_DEST_PORT,
 	    LINK_ICMP, 0);
@@ -1028,8 +1102,11 @@ FindIcmpOut(struct libalias *la, struct in_addr src_addr,
 		lnk = AddLink(la, src_addr, dst_addr, alias_addr,
 		    id, NO_DEST_PORT, GET_ALIAS_ID,
 		    LINK_ICMP);
+		if (lnk == NULL)
+			return (PKT_ALIAS_ERROR);
 	}
-	return (lnk);
+	*lnkp = lnk;
+	return (lnk != NULL ? PKT_ALIAS_OK : PKT_ALIAS_IGNORED);
 }
 
 struct alias_link *
@@ -1083,18 +1160,21 @@ FindFragmentPtr(struct libalias *la, struct in_addr dst_addr,
 	    LINK_FRAGMENT_PTR, 0);
 }
 
-struct alias_link *
+int
 FindProtoIn(struct libalias *la, struct in_addr dst_addr,
     struct in_addr alias_addr,
-    u_char proto)
+    u_char proto,
+    struct alias_link **lnkp)
 {
 	struct alias_link *lnk;
 
 	LIBALIAS_LOCK_ASSERT(la);
+
+	*lnkp = NULL;
+
 	lnk = FindLinkIn(la, dst_addr, alias_addr,
 	    NO_DEST_PORT, 0,
 	    proto, 1);
-
 	if (lnk == NULL && !(la->packetAliasMode & PKT_ALIAS_DENY_INCOMING)) {
 		struct in_addr target_addr;
 
@@ -1102,22 +1182,28 @@ FindProtoIn(struct libalias *la, struct in_addr dst_addr,
 		lnk = AddLink(la, target_addr, dst_addr, alias_addr,
 		    NO_SRC_PORT, NO_DEST_PORT, 0,
 		    proto);
+		if (lnk == NULL)
+			return (PKT_ALIAS_ERROR);
 	}
-	return (lnk);
+	*lnkp = lnk;
+	return (lnk != NULL ? PKT_ALIAS_OK : PKT_ALIAS_IGNORED);
 }
 
-struct alias_link *
+int
 FindProtoOut(struct libalias *la, struct in_addr src_addr,
     struct in_addr dst_addr,
-    u_char proto)
+    u_char proto,
+    struct alias_link **lnkp)
 {
 	struct alias_link *lnk;
 
 	LIBALIAS_LOCK_ASSERT(la);
+
+	*lnkp = NULL;
+
 	lnk = FindLinkOut(la, src_addr, dst_addr,
 	    NO_SRC_PORT, NO_DEST_PORT,
 	    proto, 1);
-
 	if (lnk == NULL) {
 		struct in_addr alias_addr;
 
@@ -1125,22 +1211,29 @@ FindProtoOut(struct libalias *la, struct in_addr src_addr,
 		lnk = AddLink(la, src_addr, dst_addr, alias_addr,
 		    NO_SRC_PORT, NO_DEST_PORT, 0,
 		    proto);
+		if (lnk == NULL)
+			return (PKT_ALIAS_ERROR);
 	}
-	return (lnk);
+	*lnkp = lnk;
+	return (lnk != NULL ? PKT_ALIAS_OK : PKT_ALIAS_IGNORED);
 }
 
-struct alias_link *
+int
 FindUdpTcpIn(struct libalias *la, struct in_addr dst_addr,
     struct in_addr alias_addr,
     u_short dst_port,
     u_short alias_port,
     u_char proto,
-    int create)
+    int create,
+    struct alias_link **lnkp)
 {
 	int link_type;
 	struct alias_link *lnk;
 
 	LIBALIAS_LOCK_ASSERT(la);
+
+	*lnkp = NULL;
+
 	switch (proto) {
 	case IPPROTO_UDP:
 		link_type = LINK_UDP;
@@ -1149,8 +1242,7 @@ FindUdpTcpIn(struct libalias *la, struct in_addr dst_addr,
 		link_type = LINK_TCP;
 		break;
 	default:
-		return (NULL);
-		break;
+		return (PKT_ALIAS_IGNORED);
 	}
 
 	lnk = FindLinkIn(la, dst_addr, alias_addr,
@@ -1164,22 +1256,30 @@ FindUdpTcpIn(struct libalias *la, struct in_addr dst_addr,
 		lnk = AddLink(la, target_addr, dst_addr, alias_addr,
 		    alias_port, dst_port, alias_port,
 		    link_type);
+		if (lnk == NULL)
+			return (PKT_ALIAS_ERROR);
+
 	}
-	return (lnk);
+	*lnkp = lnk;
+	return (lnk != NULL ? PKT_ALIAS_OK : PKT_ALIAS_IGNORED);
 }
 
-struct alias_link *
+int
 FindUdpTcpOut(struct libalias *la, struct in_addr src_addr,
     struct in_addr dst_addr,
     u_short src_port,
     u_short dst_port,
     u_char proto,
-    int create)
+    int create,
+    struct alias_link **lnkp)
 {
 	int link_type;
 	struct alias_link *lnk;
 
 	LIBALIAS_LOCK_ASSERT(la);
+
+	*lnkp = NULL;
+
 	switch (proto) {
 	case IPPROTO_UDP:
 		link_type = LINK_UDP;
@@ -1188,12 +1288,10 @@ FindUdpTcpOut(struct libalias *la, struct in_addr src_addr,
 		link_type = LINK_TCP;
 		break;
 	default:
-		return (NULL);
-		break;
+		return (PKT_ALIAS_IGNORED);
 	}
 
 	lnk = FindLinkOut(la, src_addr, dst_addr, src_port, dst_port, link_type, create);
-
 	if (lnk == NULL && create) {
 		struct in_addr alias_addr;
 
@@ -1201,8 +1299,11 @@ FindUdpTcpOut(struct libalias *la, struct in_addr src_addr,
 		lnk = AddLink(la, src_addr, dst_addr, alias_addr,
 		    src_port, dst_port, GET_ALIAS_PORT,
 		    link_type);
+		if (lnk == NULL)
+			return (PKT_ALIAS_ERROR);
 	}
-	return (lnk);
+	*lnkp = lnk;
+	return (lnk != NULL ? PKT_ALIAS_OK : PKT_ALIAS_IGNORED);
 }
 
 struct alias_link *
@@ -2100,6 +2201,7 @@ LibAliasInit(struct libalias *la)
 
 		SPLAY_INIT(&la->linkSplayIn);
 		SPLAY_INIT(&la->linkSplayOut);
+		SPLAY_INIT(&la->linkSplayInternalEndpoint);
 		LIST_INIT(&la->pptpList);
 		TAILQ_INIT(&la->checkExpire);
 #ifdef _KERNEL
diff --git a/sys/netinet/libalias/alias_db.h b/sys/netinet/libalias/alias_db.h
index 35858099bce2..7175d0a50f4b 100644
--- a/sys/netinet/libalias/alias_db.h
+++ b/sys/netinet/libalias/alias_db.h
@@ -208,12 +208,14 @@ static struct in_addr const ANY_ADDR = { INADDR_ANY };
     stored in the auxiliary space.  Pointers to unresolved
     fragments can also be stored.
 
-    The link records support two independent chainings.  Lookup
+    The link records support several independent chainings.  Lookup
     tables for input and out tables hold the initial pointers
     the link chains.  On input, the lookup table indexes on alias
     port and link type.  On output, the lookup table indexes on
     source address, destination address, source port, destination
-    port and link type.
+    port and link type. A internal_endpoint table is used for
+    endpoint-independent mapping, and indexes on source address,
+    source port and link type.
 */
 
 /* used to save changes to ACK/sequence numbers */
@@ -292,6 +294,7 @@ struct alias_link {
 		struct {
 			SPLAY_ENTRY(alias_link) out;
 			LIST_ENTRY (alias_link) in;
+			SPLAY_ENTRY(alias_link) internal_endpoint;
 		} all;
 		struct {
 			LIST_ENTRY (alias_link) list;
@@ -374,25 +377,38 @@ cmp_in(struct group_in *a, struct group_in *b) {
 }
 SPLAY_PROTOTYPE(splay_in, group_in, in, cmp_in);
 
+static inline int
+cmp_internal_endpoint(struct alias_link *a, struct alias_link *b) {
+	int i = a->link_type - b->link_type;
+	if (i != 0) return (i);
+	if (a->src_addr.s_addr > b->src_addr.s_addr) return (1);
+	if (a->src_addr.s_addr < b->src_addr.s_addr) return (-1);
+	i = a->src_port - b->src_port;
+	return (i);
+}
+SPLAY_PROTOTYPE(splay_internal_endpoint, alias_link, all.internal_endpoint,
+    cmp_internal_endpoint);
+
 /* Internal routines for finding, deleting and adding links
 
 Port Allocation:
-    GetNewPort()             -- find and reserve new alias port number
-    GetSocket()              -- try to allocate a socket for a given port
+    GetNewPort()                 -- find and reserve new alias port number
+    GetSocket()                  -- try to allocate a socket for a given port
 
 Link creation and deletion:
-    CleanupAliasData()      - remove all link chains from lookup table
-    CleanupLink()           - look for a stale link
-    DeleteLink()            - remove link
-    AddLink()               - add link
-    ReLink()                - change link
+    CleanupAliasData()           - remove all link chains from lookup table
+    CleanupLink()                - look for a stale link
+    DeleteLink()                 - remove link
+    AddLink()                    - add link
+    ReLink()                     - change link
 
 Link search:
-    FindLinkOut()           - find link for outgoing packets
-    FindLinkIn()            - find link for incoming packets
+    FindLinkOut()                - find link for outgoing packets
+    FindLinkIn()                 - find link for incoming packets
+    FindLinkByInternalEndpoint() - find link by a packet's internal endpoint
 
 Port search:
-    FindNewPortGroup()      - find an available group of ports
+    FindNewPortGroup()           - find an available group of ports
 */
 
 /* Local prototypes */
@@ -417,6 +433,9 @@ FindLinkOut(struct libalias *, struct in_addr, struct in_addr, u_short, u_short,
 static struct alias_link *
 FindLinkIn(struct libalias *, struct in_addr, struct in_addr, u_short, u_short, int, int);
 
+static struct alias_link *
+FindLinkByInternalEndpoint(struct libalias *, struct in_addr, u_short, int);
+
 static u_short _RandomPort(struct libalias *la);
 
 #define GET_NEW_PORT_MAX_ATTEMPTS       20
diff --git a/sys/netinet/libalias/alias_ftp.c b/sys/netinet/libalias/alias_ftp.c
index 4a0b616ccf27..4119221e9b35 100644
--- a/sys/netinet/libalias/alias_ftp.c
+++ b/sys/netinet/libalias/alias_ftp.c
@@ -752,7 +752,7 @@ NewFtpMessage(struct libalias *la, struct ip *pip,
 		/* Compute TCP checksum for revised packet */
 		tc->th_sum = 0;
 #ifdef _KERNEL
-		tc->th_x2 = (TH_RES1 >> 8);
+		tcp_set_flags(tc, tcp_get_flags(tc) | TH_RES1);
 #else
 		tc->th_sum = TcpChecksum(pip);
 #endif
diff --git a/sys/netinet/libalias/alias_irc.c b/sys/netinet/libalias/alias_irc.c
index 3ef336b7333d..30cee74fff21 100644
--- a/sys/netinet/libalias/alias_irc.c
+++ b/sys/netinet/libalias/alias_irc.c
@@ -360,9 +360,9 @@ AliasHandleIrcOut(struct libalias *la,
 			 * matter, and this would probably allow it through
 			 * at least _some_ firewalls.
 			 */
-			dcc_lnk = FindUdpTcpOut(la, true_addr, destaddr,
+			(void)FindUdpTcpOut(la, true_addr, destaddr,
 			    true_port, 0,
-			    IPPROTO_TCP, 1);
+			    IPPROTO_TCP, 1, &dcc_lnk);
 			DBprintf(("Got a DCC link\n"));
 			if (dcc_lnk) {
 				struct in_addr alias_address;	/* Address from aliasing */
@@ -456,7 +456,7 @@ AliasHandleIrcOut(struct libalias *la,
 		/* Compute TCP checksum for revised packet */
 		tc->th_sum = 0;
 #ifdef _KERNEL
-		tc->th_x2 = (TH_RES1 >> 8);
+		tcp_set_flags(tc, tcp_get_flags(tc) | TH_RES1);
 #else
 		tc->th_sum = TcpChecksum(pip);
 #endif
diff --git a/sys/netinet/libalias/alias_local.h b/sys/netinet/libalias/alias_local.h
index 7b82621a105b..7c1dcb0c8eb0 100644
--- a/sys/netinet/libalias/alias_local.h
+++ b/sys/netinet/libalias/alias_local.h
@@ -94,10 +94,12 @@ struct libalias {
 	 * if no aliasing link already exists */
 	struct in_addr	targetAddress;
 	/* Lookup table of pointers to chains of link records.
-	 * Each link record is doubly indexed into input and
-	 * output lookup tables. */
+	 * Each link record is indexed into input,
+	 * output and "internal endpoint" lookup tables. */
 	SPLAY_HEAD(splay_out, alias_link) linkSplayOut;
 	SPLAY_HEAD(splay_in,  group_in)   linkSplayIn;
+	SPLAY_HEAD(splay_internal_endpoint, alias_link)
+	    linkSplayInternalEndpoint;
 	LIST_HEAD (, alias_link) pptpList;
 	/* HouseKeeping */
 	TAILQ_HEAD    (, alias_link) checkExpire;
@@ -237,12 +239,12 @@ struct alias_link *
 AddLink(struct libalias *la, struct in_addr src_addr, struct in_addr dst_addr,
     struct in_addr alias_addr, u_short src_port, u_short dst_port,
     int alias_param, int link_type);
-struct alias_link *
+int
 FindIcmpIn(struct libalias *la, struct in_addr _dst_addr, struct in_addr _alias_addr,
-    u_short _id_alias, int _create);
-struct alias_link *
+    u_short _id_alias, int _create, struct alias_link **_lnkp);
+int
 FindIcmpOut(struct libalias *la, struct in_addr _src_addr, struct in_addr _dst_addr,
-    u_short _id, int _create);
+    u_short _id, int _create, struct alias_link **_lnkp);
 struct alias_link *
 FindFragmentIn1(struct libalias *la, struct in_addr _dst_addr, struct in_addr _alias_addr,
     u_short _ip_id);
@@ -253,18 +255,20 @@ struct alias_link *
 AddFragmentPtrLink(struct libalias *la, struct in_addr _dst_addr, u_short _ip_id);
 struct alias_link *
 FindFragmentPtr(struct libalias *la, struct in_addr _dst_addr, u_short _ip_id);
-struct alias_link *
+int
 FindProtoIn(struct libalias *la, struct in_addr _dst_addr, struct in_addr _alias_addr,
-    u_char _proto);
-struct alias_link *
+    u_char _proto, struct alias_link **_lnkp);
+int
 FindProtoOut(struct libalias *la, struct in_addr _src_addr, struct in_addr _dst_addr,
-    u_char _proto);
-struct alias_link *
+    u_char _proto, struct alias_link **_lnkp);
+int
 FindUdpTcpIn(struct libalias *la, struct in_addr _dst_addr, struct in_addr _alias_addr,
-    u_short _dst_port, u_short _alias_port, u_char _proto, int _create);
-struct alias_link *
+    u_short _dst_port, u_short _alias_port, u_char _proto, int _create,
+    struct alias_link **_lnkp);
+int
 FindUdpTcpOut(struct libalias *la, struct in_addr _src_addr, struct in_addr _dst_addr,
-    u_short _src_port, u_short _dst_port, u_char _proto, int _create);
+    u_short _src_port, u_short _dst_port, u_char _proto, int _create,
+    struct alias_link **_lnkp);
 struct alias_link *
 AddPptp(struct libalias *la, struct in_addr _src_addr, struct in_addr _dst_addr,
     struct in_addr _alias_addr, u_int16_t _src_call_id);
diff --git a/sys/netinet/libalias/alias_proxy.c b/sys/netinet/libalias/alias_proxy.c
index dd685bed760d..0ff4b87b5000 100644
--- a/sys/netinet/libalias/alias_proxy.c
+++ b/sys/netinet/libalias/alias_proxy.c
@@ -366,7 +366,7 @@ ProxyEncodeTcpStream(struct alias_link *lnk,
 
 	tc->th_sum = 0;
 #ifdef _KERNEL
-	tc->th_x2 = (TH_RES1 >> 8);
+	tcp_set_flags(tc, tcp_get_flags(tc) | TH_RES1);
 #else
 	tc->th_sum = TcpChecksum(pip);
 #endif
diff --git a/sys/netinet/libalias/alias_sctp.c b/sys/netinet/libalias/alias_sctp.c
index 6781c33f5edb..5ccf31697b42 100644
--- a/sys/netinet/libalias/alias_sctp.c
+++ b/sys/netinet/libalias/alias_sctp.c
@@ -72,12 +72,12 @@
 
 
 #ifdef _KERNEL
-#include <machine/stdarg.h>
 #include <sys/param.h>
 #include <sys/gsb_crc32.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
+#include <sys/stdarg.h>
 #include <sys/syslog.h>
 #include <netinet/libalias/alias_sctp.h>
 #include <netinet/libalias/alias.h>
diff --git a/sys/netinet/libalias/alias_skinny.c b/sys/netinet/libalias/alias_skinny.c
index 47d66a474fb4..fd9e15d3ad40 100644
--- a/sys/netinet/libalias/alias_skinny.c
+++ b/sys/netinet/libalias/alias_skinny.c
@@ -214,7 +214,7 @@ alias_skinny_reg_msg(struct RegisterMessage *reg_msg, struct ip *pip,
 
 	tc->th_sum = 0;
 #ifdef _KERNEL
-	tc->th_x2 = (TH_RES1 >> 8);
+	tcp_set_flags(tc, tcp_get_flags(tc) | TH_RES1);
 #else
 	tc->th_sum = TcpChecksum(pip);
 #endif
@@ -257,7 +257,7 @@ alias_skinny_port_msg(struct IpPortMessage *port_msg, struct ip *pip,
 
 	tc->th_sum = 0;
 #ifdef _KERNEL
-	tc->th_x2 = (TH_RES1 >> 8);
+	tcp_set_flags(tc, tcp_get_flags(tc) | TH_RES1);
 #else
 	tc->th_sum = TcpChecksum(pip);
 #endif
@@ -279,15 +279,15 @@ alias_skinny_opnrcvch_ack(struct libalias *la, struct OpenReceiveChannelAck *opn
 	*localIpAddr = (u_int32_t)opnrcvch_ack->ipAddr;
 
 	null_addr.s_addr = INADDR_ANY;
-	opnrcv_lnk = FindUdpTcpOut(la, pip->ip_src, null_addr,
+	(void)FindUdpTcpOut(la, pip->ip_src, null_addr,
 	    htons((u_short) opnrcvch_ack->port), 0,
-	    IPPROTO_UDP, 1);
+	    IPPROTO_UDP, 1, &opnrcv_lnk);
 	opnrcvch_ack->ipAddr = (u_int32_t)GetAliasAddress(opnrcv_lnk).s_addr;
 	opnrcvch_ack->port = (u_int32_t)ntohs(GetAliasPort(opnrcv_lnk));
 
 	tc->th_sum = 0;
 #ifdef _KERNEL
-	tc->th_x2 = (TH_RES1 >> 8);
+	tcp_set_flags(tc, tcp_get_flags(tc) | TH_RES1);
 #else
 	tc->th_sum = TcpChecksum(pip);
 #endif
diff --git a/sys/netinet/libalias/alias_smedia.c b/sys/netinet/libalias/alias_smedia.c
index 6c67e0d8f006..badd75a45c61 100644
--- a/sys/netinet/libalias/alias_smedia.c
+++ b/sys/netinet/libalias/alias_smedia.c
@@ -402,7 +402,7 @@ alias_rtsp_out(struct libalias *la, struct ip *pip,
 
 	tc->th_sum = 0;
 #ifdef _KERNEL
-	tc->th_x2 = (TH_RES1 >> 8);
+	tcp_set_flags(tc, tcp_get_flags(tc) | TH_RES1);
 #else
 	tc->th_sum = TcpChecksum(pip);
 #endif
@@ -435,8 +435,8 @@ alias_pna_out(struct libalias *la, struct ip *pip,
 
 		if ((ntohs(msg_id) == 1) || (ntohs(msg_id) == 7)) {
 			memcpy(&port, work, 2);
-			pna_links = FindUdpTcpOut(la, pip->ip_src, GetDestAddress(lnk),
-			    port, 0, IPPROTO_UDP, 1);
+			(void)FindUdpTcpOut(la, pip->ip_src, GetDestAddress(lnk),
+			    port, 0, IPPROTO_UDP, 1, &pna_links);
 			if (pna_links != NULL) {
 #ifndef NO_FW_PUNCH
 				/* Punch hole in firewall */
@@ -449,7 +449,7 @@ alias_pna_out(struct libalias *la, struct ip *pip,
 				/* Compute TCP checksum for revised packet */
 				tc->th_sum = 0;
 #ifdef _KERNEL
-				tc->th_x2 = (TH_RES1 >> 8);
+				tcp_set_flags(tc, tcp_get_flags(tc) | TH_RES1);
 #else
 				tc->th_sum = TcpChecksum(pip);
 #endif
diff --git a/sys/netinet/libalias/libalias.3 b/sys/netinet/libalias/libalias.3
index b4d123682f0b..1b8ecc14059d 100644
--- a/sys/netinet/libalias/libalias.3
+++ b/sys/netinet/libalias/libalias.3
@@ -23,7 +23,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd May 31, 2021
+.Dd November 29, 2024
 .Dt LIBALIAS 3
 .Os
 .Sh NAME
@@ -200,11 +200,11 @@ is closed.
 .It Dv PKT_ALIAS_UNREGISTERED_ONLY
 If this mode bit is set, traffic on the local network which does not
 originate from unregistered address spaces will be ignored.
-Standard Class A, B and C unregistered addresses are:
+The standard private IP address ranges are:
 .Pp
-10.0.0.0     ->  10.255.255.255   (Class A subnet)
-172.16.0.0   ->  172.31.255.255   (Class B subnets)
-192.168.0.0  ->  192.168.255.255  (Class C subnets)
+10.0.0.0           ->        10.255.255.255   (/8)
+172.16.0.0         ->        172.31.255.255   (/16)
+192.168.0.0        ->        192.168.255.255  (/24)
 .Pp
 This option is useful in the case that the packet aliasing host has both
 registered and unregistered subnets on different interfaces.
@@ -270,6 +270,26 @@ See section
 in
 .Xr ipfw 8
 for more details.
+.It Dv PKT_ALIAS_UDP_EIM
+When this bit is set, UDP uses endpoint-independent mapping (EIM), as per
+RFC 4787 ("full cone" NAT of RFC 3489).
+All packets from the same internal address:port are mapped to the same NAT
+address:port, regardless of their destination address:port.
+If filtering rules allow, and if
+.Em PKT_ALIAS_DENY_INCOMING
+is unset, any other external address:port can
+also send to the internal address:port through its mapped NAT address:port.
+This is more compatible with applications, and can reduce the need for port
+forwarding, but less scalable as each NAT address:port can only be
+concurrently used by at most one internal address:port.
+.Pp
+When this bit is unset, UDP packets use endpoint-dependent mapping (EDM)
+("symmetric" NAT).
+Each connection from a particular internal address:port to different
+external addresses:ports is mapped to a random and unpredictable NAT
+address:port.
+Two appplications behind EDM NATs can only connect to each other
+by port forwarding on the NAT, or tunnelling through an in-between server.
 .El
 .Ed
 .Pp
diff --git a/sys/netinet/pim.h b/sys/netinet/pim.h
index 98230fc6ae2d..4744ffc7e9d8 100644
--- a/sys/netinet/pim.h
+++ b/sys/netinet/pim.h
@@ -71,7 +71,7 @@ struct pim {
 #endif /* ! _PIM_VT  */
 	uint8_t		pim_reserved;	/* Reserved			*/
 	uint16_t	pim_cksum;	/* IP-style checksum		*/
-};
+} __packed;
 /* KAME-related name backward compatibility */
 #define pim_ver pim_vers
 #define pim_rsv pim_reserved
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index a6bef1c7e275..66070faf97e9 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -30,7 +30,6 @@
  * SUCH DAMAGE.
  */
 
-#include <sys/cdefs.h>
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
@@ -50,6 +49,7 @@
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
+#include <sys/stdarg.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
@@ -75,7 +75,6 @@
 
 #include <netipsec/ipsec_support.h>
 
-#include <machine/stdarg.h>
 #include <security/mac/mac_framework.h>
 
 extern ipproto_input_t *ip_protox[];
@@ -128,6 +127,12 @@ int (*ip_rsvp_vif)(struct socket *, struct sockopt *);
 void (*ip_rsvp_force_done)(struct socket *);
 #endif /* INET */
 
+#define	V_rip_bind_all_fibs	VNET(rip_bind_all_fibs)
+VNET_DEFINE(int, rip_bind_all_fibs) = 1;
+SYSCTL_INT(_net_inet_raw, OID_AUTO, bind_all_fibs, CTLFLAG_VNET | CTLFLAG_RDTUN,
+    &VNET_NAME(rip_bind_all_fibs), 0,
+    "Bound sockets receive traffic from all FIBs");
+
 u_long	rip_sendspace = 9216;
 SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
     &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
@@ -302,7 +307,9 @@ rip_input(struct mbuf **mp, int *offp, int proto)
 	struct mbuf *m = *mp;
 	struct inpcb *inp;
 	struct sockaddr_in ripsrc;
-	int appended;
+	int appended, fib;
+
+	M_ASSERTPKTHDR(m);
 
 	*mp = NULL;
 	appended = 0;
@@ -312,6 +319,7 @@ rip_input(struct mbuf **mp, int *offp, int proto)
 	ripsrc.sin_family = AF_INET;
 	ripsrc.sin_addr = ctx.ip->ip_src;
 
+	fib = M_GETFIB(m);
 	ifp = m->m_pkthdr.rcvif;
 
 	inpi.hash = INP_PCBHASH_RAW(proto, ctx.ip->ip_src.s_addr,
@@ -326,6 +334,12 @@ rip_input(struct mbuf **mp, int *offp, int proto)
 			 */
 			continue;
 		}
+		if (V_rip_bind_all_fibs == 0 && fib != inp->inp_inc.inc_fibnum)
+			/*
+			 * Sockets bound to a specific FIB can only receive
+			 * packets from that FIB.
+			 */
+			continue;
 		appended += rip_append(inp, ctx.ip, m, &ripsrc);
 	}
 
@@ -343,6 +357,9 @@ rip_input(struct mbuf **mp, int *offp, int proto)
 			 * and fall through into normal filter path if so.
 			 */
 			continue;
+		if (V_rip_bind_all_fibs == 0 && fib != inp->inp_inc.inc_fibnum)
+			continue;
+
 		/*
 		 * If this raw socket has multicast state, and we
 		 * have received a multicast, check if this socket
@@ -584,7 +601,7 @@ rip_send(struct socket *so, int pruflags, struct mbuf *m, struct sockaddr *nam,
 		 * but we got this limitation from the beginning of history.
 		 */
 		if (ip->ip_id == 0)
-			ip_fillid(ip);
+			ip_fillid(ip, V_ip_random_id);
 
 		/*
 		 * XXX prevent ip_output from overwriting header fields.
@@ -625,8 +642,6 @@ rip_send(struct socket *so, int pruflags, struct mbuf *m, struct sockaddr *nam,
  *
  * When adding new socket options here, make sure to add access control
  * checks here as necessary.
- *
- * XXX-BZ inp locking?
  */
 int
 rip_ctloutput(struct socket *so, struct sockopt *sopt)
@@ -635,11 +650,10 @@ rip_ctloutput(struct socket *so, struct sockopt *sopt)
 	int	error, optval;
 
 	if (sopt->sopt_level != IPPROTO_IP) {
-		if ((sopt->sopt_level == SOL_SOCKET) &&
-		    (sopt->sopt_name == SO_SETFIB)) {
-			inp->inp_inc.inc_fibnum = so->so_fibnum;
-			return (0);
-		}
+		if (sopt->sopt_dir == SOPT_SET &&
+		    sopt->sopt_level == SOL_SOCKET &&
+		    sopt->sopt_name == SO_SETFIB)
+			return (ip_ctloutput(so, sopt));
 		return (EINVAL);
 	}
 
@@ -707,10 +721,12 @@ rip_ctloutput(struct socket *so, struct sockopt *sopt)
 					    sizeof optval);
 			if (error)
 				break;
+			INP_WLOCK(inp);
 			if (optval)
 				inp->inp_flags |= INP_HDRINCL;
 			else
 				inp->inp_flags &= ~INP_HDRINCL;
+			INP_WUNLOCK(inp);
 			break;
 
 		case IP_FW3:	/* generic ipfw v.3 functions */
diff --git a/sys/netinet/sctp_asconf.c b/sys/netinet/sctp_asconf.c
index 3a30b0ba3740..6b98557c45a6 100644
--- a/sys/netinet/sctp_asconf.c
+++ b/sys/netinet/sctp_asconf.c
@@ -1313,13 +1313,13 @@ sctp_asconf_queue_mgmt(struct sctp_tcb *stcb, struct sctp_ifa *ifa,
 #ifdef SCTP_DEBUG
 	if (SCTP_BASE_SYSCTL(sctp_debug_on) & SCTP_DEBUG_ASCONF2) {
 		if (type == SCTP_ADD_IP_ADDRESS) {
-			SCTP_PRINTF("asconf_queue_mgmt: inserted asconf ADD_IP_ADDRESS: ");
+			SCTPDBG(SCTP_DEBUG_ASCONF2, "asconf_queue_mgmt: inserted asconf ADD_IP_ADDRESS: ");
 			SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, &ifa->address.sa);
 		} else if (type == SCTP_DEL_IP_ADDRESS) {
-			SCTP_PRINTF("asconf_queue_mgmt: appended asconf DEL_IP_ADDRESS: ");
+			SCTPDBG(SCTP_DEBUG_ASCONF2, "asconf_queue_mgmt: appended asconf DEL_IP_ADDRESS: ");
 			SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, &ifa->address.sa);
 		} else {
-			SCTP_PRINTF("asconf_queue_mgmt: appended asconf SET_PRIM_ADDR: ");
+			SCTPDBG(SCTP_DEBUG_ASCONF2, "asconf_queue_mgmt: appended asconf SET_PRIM_ADDR: ");
 			SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, &ifa->address.sa);
 		}
 	}
diff --git a/sys/netinet/sctp_bsd_addr.c b/sys/netinet/sctp_bsd_addr.c
index a91b0dde5967..ac715d8298ec 100644
--- a/sys/netinet/sctp_bsd_addr.c
+++ b/sys/netinet/sctp_bsd_addr.c
@@ -117,25 +117,26 @@ sctp_gather_internal_ifa_flags(struct sctp_ifa *ifa)
 {
 	struct in6_ifaddr *ifa6;
 
+	KASSERT(ifa->address.sa.sa_family == AF_INET6,
+	    ("sctp_gather_internal_ifa_flags() called with address family %u",
+	    ifa->address.sa.sa_family));
 	ifa6 = (struct in6_ifaddr *)ifa->ifa;
 	ifa->flags = ifa6->ia6_flags;
-	if (!MODULE_GLOBAL(ip6_use_deprecated)) {
-		if (ifa->flags &
-		    IN6_IFF_DEPRECATED) {
+	if (MODULE_GLOBAL(ip6_use_deprecated)) {
+		ifa->localifa_flags &= ~SCTP_ADDR_IFA_UNUSEABLE;
+	} else {
+		if (ifa->flags & IN6_IFF_DEPRECATED) {
 			ifa->localifa_flags |= SCTP_ADDR_IFA_UNUSEABLE;
 		} else {
 			ifa->localifa_flags &= ~SCTP_ADDR_IFA_UNUSEABLE;
 		}
-	} else {
-		ifa->localifa_flags &= ~SCTP_ADDR_IFA_UNUSEABLE;
 	}
-	if (ifa->flags &
-	    (IN6_IFF_DETACHED |
-	    IN6_IFF_ANYCAST |
-	    IN6_IFF_NOTREADY)) {
+	if (ifa->flags & (IN6_IFF_DETACHED | IN6_IFF_DUPLICATED)) {
+		ifa->localifa_flags |= SCTP_ADDR_IFA_UNUSEABLE;
+	}
+	/* Right now, do not support IPv6 anycast addresses */
+	if (ifa->flags & IN6_IFF_ANYCAST) {
 		ifa->localifa_flags |= SCTP_ADDR_IFA_UNUSEABLE;
-	} else {
-		ifa->localifa_flags &= ~SCTP_ADDR_IFA_UNUSEABLE;
 	}
 }
 #endif				/* INET6 */
@@ -338,8 +339,8 @@ sctp_addr_change(struct ifaddr *ifa, int cmd)
 		    (void *)ifa, ifa->ifa_addr, ifa_flags, 1);
 	} else {
 		sctp_del_addr_from_vrf(SCTP_DEFAULT_VRFID, ifa->ifa_addr,
-		    ifa->ifa_ifp->if_index,
-		    ifa->ifa_ifp->if_xname);
+		    (void *)ifa->ifa_ifp,
+		    ifa->ifa_ifp->if_index);
 
 		/*
 		 * We don't bump refcount here so when it completes the
diff --git a/sys/netinet/sctp_header.h b/sys/netinet/sctp_header.h
index 9696c4e954ba..c9fd0341f83a 100644
--- a/sys/netinet/sctp_header.h
+++ b/sys/netinet/sctp_header.h
@@ -83,7 +83,7 @@ struct sctp_supported_addr_param {
 /* heartbeat info parameter */
 struct sctp_heartbeat_info_param {
 	struct sctp_paramhdr ph;
-	uint32_t time_value_1;
+	time_t time_value_1;
 	uint32_t time_value_2;
 	uint32_t random_value1;
 	uint32_t random_value2;
diff --git a/sys/netinet/sctp_indata.c b/sys/netinet/sctp_indata.c
index a30fd95fef30..693de313b970 100644
--- a/sys/netinet/sctp_indata.c
+++ b/sys/netinet/sctp_indata.c
@@ -746,21 +746,6 @@ sctp_build_readq_entry_from_ctl(struct sctp_queued_to_read *nc, struct sctp_queu
 	nc->do_not_ref_stcb = control->do_not_ref_stcb;
 }
 
-static void
-sctp_reset_a_control(struct sctp_queued_to_read *control,
-    struct sctp_inpcb *inp, uint32_t tsn)
-{
-	control->fsn_included = tsn;
-	if (control->on_read_q) {
-		/*
-		 * We have to purge it from there, hopefully this will work
-		 * :-)
-		 */
-		TAILQ_REMOVE(&inp->read_queue, control, next);
-		control->on_read_q = 0;
-	}
-}
-
 static int
 sctp_handle_old_unordered_data(struct sctp_tcb *stcb,
     struct sctp_association *asoc,
@@ -1922,7 +1907,8 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc,
 				SCTP_SNPRINTF(msg, sizeof(msg), "Duplicate MID=%8.8x detected.", mid);
 				goto err_out;
 			} else {
-				if ((tsn == control->fsn_included + 1) &&
+				if ((control->first_frag_seen) &&
+				    (tsn == control->fsn_included + 1) &&
 				    (control->end_added == 0)) {
 					SCTP_SNPRINTF(msg, sizeof(msg),
 					    "Illegal message sequence, missing end for MID: %8.8x",
@@ -5241,6 +5227,10 @@ sctp_kick_prsctp_reorder_queue(struct sctp_tcb *stcb,
 	uint32_t mid;
 	int need_reasm_check = 0;
 
+	KASSERT(stcb != NULL, ("stcb == NULL"));
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	SCTP_INP_READ_LOCK_ASSERT(stcb->sctp_ep);
+
 	asoc = &stcb->asoc;
 	mid = strmin->last_mid_delivered;
 	/*
@@ -5278,11 +5268,9 @@ sctp_kick_prsctp_reorder_queue(struct sctp_tcb *stcb,
 				/* deliver it to at least the delivery-q */
 				if (stcb->sctp_socket) {
 					sctp_mark_non_revokable(asoc, control->sinfo_tsn);
-					sctp_add_to_readq(stcb->sctp_ep, stcb,
-					    control,
-					    &stcb->sctp_socket->so_rcv,
-					    1, SCTP_READ_LOCK_HELD,
-					    SCTP_SO_NOT_LOCKED);
+					sctp_add_to_readq(stcb->sctp_ep, stcb, control,
+					    &stcb->sctp_socket->so_rcv, 1,
+					    SCTP_READ_LOCK_HELD, SCTP_SO_NOT_LOCKED);
 				}
 			} else {
 				/* Its a fragmented message */
@@ -5352,8 +5340,7 @@ sctp_kick_prsctp_reorder_queue(struct sctp_tcb *stcb,
 				strmin->last_mid_delivered = control->mid;
 				if (stcb->sctp_socket) {
 					sctp_mark_non_revokable(asoc, control->sinfo_tsn);
-					sctp_add_to_readq(stcb->sctp_ep, stcb,
-					    control,
+					sctp_add_to_readq(stcb->sctp_ep, stcb, control,
 					    &stcb->sctp_socket->so_rcv, 1,
 					    SCTP_READ_LOCK_HELD, SCTP_SO_NOT_LOCKED);
 				}
@@ -5394,6 +5381,11 @@ sctp_flush_reassm_for_str_seq(struct sctp_tcb *stcb,
 	 * it can be delivered... But for now we just dump everything on the
 	 * queue.
 	 */
+
+	KASSERT(stcb != NULL, ("stcb == NULL"));
+	SCTP_TCB_LOCK_ASSERT(stcb);
+	SCTP_INP_READ_LOCK_ASSERT(stcb->sctp_ep);
+
 	if (!asoc->idata_supported && !ordered &&
 	    control->first_frag_seen &&
 	    SCTP_TSN_GT(control->fsn_included, cumtsn)) {
@@ -5424,12 +5416,25 @@ sctp_flush_reassm_for_str_seq(struct sctp_tcb *stcb,
 		sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 	}
 	if (!TAILQ_EMPTY(&control->reasm)) {
-		/* This has to be old data, unordered */
+		KASSERT(!asoc->idata_supported,
+		    ("Reassembly queue not empty for I-DATA"));
+		KASSERT(!ordered,
+		    ("Reassembly queue not empty for ordered data"));
 		if (control->data) {
 			sctp_m_freem(control->data);
 			control->data = NULL;
 		}
-		sctp_reset_a_control(control, stcb->sctp_ep, cumtsn);
+		control->fsn_included = 0xffffffff;
+		control->first_frag_seen = 0;
+		control->last_frag_seen = 0;
+		if (control->on_read_q) {
+			/*
+			 * We have to purge it from there, hopefully this
+			 * will work :-)
+			 */
+			TAILQ_REMOVE(&stcb->sctp_ep->read_queue, control, next);
+			control->on_read_q = 0;
+		}
 		chk = TAILQ_FIRST(&control->reasm);
 		if (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) {
 			TAILQ_REMOVE(&control->reasm, chk, sctp_next);
diff --git a/sys/netinet/sctp_input.c b/sys/netinet/sctp_input.c
index a55ef5ac1eab..dc31ffbc2161 100644
--- a/sys/netinet/sctp_input.c
+++ b/sys/netinet/sctp_input.c
@@ -2329,7 +2329,7 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
 	}
 	ep = &(*inp_p)->sctp_ep;
 	/* which cookie is it? */
-	if ((cookie->time_entered.tv_sec < (long)ep->time_of_secret_change) &&
+	if ((cookie->time_entered.tv_sec < ep->time_of_secret_change) &&
 	    (ep->current_secret_number != ep->last_secret_number)) {
 		/* it's the old cookie */
 		(void)sctp_hmac_m(SCTP_HMAC,
@@ -2352,7 +2352,7 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
 	/* compare the received digest with the computed digest */
 	if (timingsafe_bcmp(calc_sig, sig, SCTP_SIGNATURE_SIZE) != 0) {
 		/* try the old cookie? */
-		if ((cookie->time_entered.tv_sec == (long)ep->time_of_secret_change) &&
+		if ((cookie->time_entered.tv_sec == ep->time_of_secret_change) &&
 		    (ep->current_secret_number != ep->last_secret_number)) {
 			/* compute digest with old */
 			(void)sctp_hmac_m(SCTP_HMAC,
@@ -4231,6 +4231,8 @@ sctp_handle_packet_dropped(struct sctp_pktdrop_chunk *cp,
 				SCTP_STAT_INCR(sctps_pdrpmbda);
 			}
 		} else {
+			desc.tsn_ifany = htonl(0);
+			memset(desc.data_bytes, 0, SCTP_NUM_DB_TO_VERIFY);
 			if (pktdrp_flags & SCTP_FROM_MIDDLE_BOX) {
 				SCTP_STAT_INCR(sctps_pdrpmbct);
 			}
diff --git a/sys/netinet/sctp_os_bsd.h b/sys/netinet/sctp_os_bsd.h
index eb0caec942e9..9cec02aa6a07 100644
--- a/sys/netinet/sctp_os_bsd.h
+++ b/sys/netinet/sctp_os_bsd.h
@@ -342,7 +342,7 @@ typedef struct callout sctp_os_timer_t;
                          } while(0)
 
 /* Other m_pkthdr type things */
-#define SCTP_IS_IT_BROADCAST(dst, m) ((m->m_flags & M_PKTHDR) ? in_broadcast(dst, m->m_pkthdr.rcvif) : 0)
+#define SCTP_IS_IT_BROADCAST(dst, m) ((m->m_flags & M_PKTHDR) ? in_ifnet_broadcast(dst, m->m_pkthdr.rcvif) : 0)
 #define SCTP_IS_IT_LOOPBACK(m) ((m->m_flags & M_PKTHDR) && ((m->m_pkthdr.rcvif == NULL) || (m->m_pkthdr.rcvif->if_type == IFT_LOOP)))
 
 /* This converts any input packet header
diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c
index a8facff6b917..e4bdb4291972 100644
--- a/sys/netinet/sctp_output.c
+++ b/sys/netinet/sctp_output.c
@@ -3655,8 +3655,7 @@ sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *er
 				sin.sin_len = sizeof(struct sockaddr_in);
 				sin.sin_port = stcb->rport;
 				m_copydata(control, cmsg_data_off, sizeof(struct in_addr), (caddr_t)&sin.sin_addr);
-				if ((sin.sin_addr.s_addr == INADDR_ANY) ||
-				    (sin.sin_addr.s_addr == INADDR_BROADCAST) ||
+				if (in_broadcast(sin.sin_addr) ||
 				    IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
 					*error = EINVAL;
 					return (1);
@@ -3687,8 +3686,7 @@ sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *er
 #ifdef INET
 				if (IN6_IS_ADDR_V4MAPPED(&sin6.sin6_addr)) {
 					in6_sin6_2_sin(&sin, &sin6);
-					if ((sin.sin_addr.s_addr == INADDR_ANY) ||
-					    (sin.sin_addr.s_addr == INADDR_BROADCAST) ||
+					if (in_broadcast(sin.sin_addr) ||
 					    IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
 						*error = EINVAL;
 						return (1);
@@ -4073,7 +4071,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
 				ip->ip_off = htons(0);
 			}
 			/* FreeBSD has a function for ip_id's */
-			ip_fillid(ip);
+			ip_fillid(ip, V_ip_random_id);
 
 			ip->ip_ttl = inp->ip_inp.inp.inp_ip_ttl;
 			ip->ip_len = htons(packet_length);
@@ -6705,7 +6703,9 @@ sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr,
 		} else {
 			m = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
 			    0, M_NOWAIT, 1, MT_DATA);
-			SCTP_BUF_LEN(m) = sizeof(struct sctp_paramhdr);
+			if (m != NULL) {
+				SCTP_BUF_LEN(m) = sizeof(struct sctp_paramhdr);
+			}
 		}
 		if (m != NULL) {
 			struct sctp_paramhdr *ph;
@@ -6909,10 +6909,20 @@ static int
 sctp_sendall(struct sctp_inpcb *inp, struct uio *uio, struct mbuf *m,
     struct sctp_nonpad_sndrcvinfo *srcv)
 {
-	int ret;
 	struct sctp_copy_all *ca;
+	struct mbuf *mat;
+	ssize_t sndlen;
+	int ret;
 
-	if (uio->uio_resid > (ssize_t)SCTP_BASE_SYSCTL(sctp_sendall_limit)) {
+	if (uio != NULL) {
+		sndlen = uio->uio_resid;
+	} else {
+		sndlen = 0;
+		for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) {
+			sndlen += SCTP_BUF_LEN(mat);
+		}
+	}
+	if (sndlen > (ssize_t)SCTP_BASE_SYSCTL(sctp_sendall_limit)) {
 		/* You must not be larger than the limit! */
 		return (EMSGSIZE);
 	}
@@ -6924,12 +6934,10 @@ sctp_sendall(struct sctp_inpcb *inp, struct uio *uio, struct mbuf *m,
 		return (ENOMEM);
 	}
 	memset(ca, 0, sizeof(struct sctp_copy_all));
-
 	ca->inp = inp;
 	if (srcv != NULL) {
 		memcpy(&ca->sndrcv, srcv, sizeof(struct sctp_nonpad_sndrcvinfo));
 	}
-
 	/* Serialize. */
 	SCTP_INP_WLOCK(inp);
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_SND_ITERATOR_UP) != 0) {
@@ -6940,15 +6948,14 @@ sctp_sendall(struct sctp_inpcb *inp, struct uio *uio, struct mbuf *m,
 	}
 	inp->sctp_flags |= SCTP_PCB_FLAGS_SND_ITERATOR_UP;
 	SCTP_INP_WUNLOCK(inp);
-
 	/*
 	 * take off the sendall flag, it would be bad if we failed to do
 	 * this :-0
 	 */
 	ca->sndrcv.sinfo_flags &= ~SCTP_SENDALL;
 	/* get length and mbuf chain */
-	if (uio) {
-		ca->sndlen = uio->uio_resid;
+	ca->sndlen = sndlen;
+	if (uio != NULL) {
 		ca->m = sctp_copy_out_all(uio, ca->sndlen);
 		if (ca->m == NULL) {
 			SCTP_FREE(ca, SCTP_M_COPYAL);
@@ -6960,20 +6967,14 @@ sctp_sendall(struct sctp_inpcb *inp, struct uio *uio, struct mbuf *m,
 			return (ENOMEM);
 		}
 	} else {
-		/* Gather the length of the send */
-		struct mbuf *mat;
-
-		ca->sndlen = 0;
-		for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) {
-			ca->sndlen += SCTP_BUF_LEN(mat);
-		}
+		ca->m = m;
 	}
 	ret = sctp_initiate_iterator(NULL, sctp_sendall_iterator, NULL,
 	    SCTP_PCB_ANY_FLAGS, SCTP_PCB_ANY_FEATURES,
 	    SCTP_ASOC_ANY_STATE,
 	    (void *)ca, 0,
 	    sctp_sendall_completes, inp, 1);
-	if (ret) {
+	if (ret != 0) {
 		SCTP_INP_WLOCK(inp);
 		inp->sctp_flags &= ~SCTP_PCB_FLAGS_SND_ITERATOR_UP;
 		SCTP_INP_WUNLOCK(inp);
@@ -11196,7 +11197,7 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst,
 		ip->ip_hl = (sizeof(struct ip) >> 2);
 		ip->ip_tos = 0;
 		ip->ip_off = htons(IP_DF);
-		ip_fillid(ip);
+		ip_fillid(ip, V_ip_random_id);
 		ip->ip_ttl = MODULE_GLOBAL(ip_defttl);
 		if (port) {
 			ip->ip_p = IPPROTO_UDP;
@@ -11424,7 +11425,7 @@ sctp_send_hb(struct sctp_tcb *stcb, struct sctp_nets *net, int so_locked)
 	/* Fill out hb parameter */
 	hb->heartbeat.hb_info.ph.param_type = htons(SCTP_HEARTBEAT_INFO);
 	hb->heartbeat.hb_info.ph.param_length = htons(sizeof(struct sctp_heartbeat_info_param));
-	hb->heartbeat.hb_info.time_value_1 = (uint32_t)now.tv_sec;
+	hb->heartbeat.hb_info.time_value_1 = now.tv_sec;
 	hb->heartbeat.hb_info.time_value_2 = now.tv_usec;
 	/* Did our user request this one, put it in */
 	hb->heartbeat.hb_info.addr_family = (uint8_t)net->ro._l_addr.sa.sa_family;
@@ -13910,15 +13911,15 @@ sctp_v4src_match_nexthop(struct sctp_ifa *sifa, sctp_route_t *ro)
 	mask = (struct sockaddr_in *)(ifa->ifa_netmask);
 	sin = &sifa->address.sin;
 	srcnetaddr.s_addr = (sin->sin_addr.s_addr & mask->sin_addr.s_addr);
-	SCTPDBG(SCTP_DEBUG_OUTPUT1, "match_nexthop4: src address is ");
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "match_nexthop4: src address is ");
 	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &sifa->address.sa);
-	SCTPDBG(SCTP_DEBUG_OUTPUT1, "network address is %x\n", srcnetaddr.s_addr);
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "network address is %x\n", srcnetaddr.s_addr);
 
 	sin = &ro->ro_nh->gw4_sa;
 	gwnetaddr.s_addr = (sin->sin_addr.s_addr & mask->sin_addr.s_addr);
-	SCTPDBG(SCTP_DEBUG_OUTPUT1, "match_nexthop4: nexthop is ");
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "match_nexthop4: nexthop is ");
 	SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &ro->ro_nh->gw_sa);
-	SCTPDBG(SCTP_DEBUG_OUTPUT1, "network address is %x\n", gwnetaddr.s_addr);
+	SCTPDBG(SCTP_DEBUG_OUTPUT2, "network address is %x\n", gwnetaddr.s_addr);
 	if (srcnetaddr.s_addr == gwnetaddr.s_addr) {
 		return (1);
 	}
diff --git a/sys/netinet/sctp_pcb.c b/sys/netinet/sctp_pcb.c
index 1509ac13901e..2092f20e3c22 100644
--- a/sys/netinet/sctp_pcb.c
+++ b/sys/netinet/sctp_pcb.c
@@ -192,21 +192,16 @@ sctp_find_ifn(void *ifn, uint32_t ifn_index)
 	struct sctp_ifn *sctp_ifnp;
 	struct sctp_ifnlist *hash_ifn_head;
 
-	/*
-	 * We assume the lock is held for the addresses if that's wrong
-	 * problems could occur :-)
-	 */
 	SCTP_IPI_ADDR_LOCK_ASSERT();
+	KASSERT(ifn != NULL, ("sctp_find_ifn(NULL, %u) called", ifn_index));
 	hash_ifn_head = &SCTP_BASE_INFO(vrf_ifn_hash)[(ifn_index & SCTP_BASE_INFO(vrf_ifn_hashmark))];
 	LIST_FOREACH(sctp_ifnp, hash_ifn_head, next_bucket) {
-		if (sctp_ifnp->ifn_index == ifn_index) {
-			return (sctp_ifnp);
-		}
-		if (sctp_ifnp->ifn_p && ifn && (sctp_ifnp->ifn_p == ifn)) {
-			return (sctp_ifnp);
+		if (sctp_ifnp->ifn_index == ifn_index &&
+		    sctp_ifnp->ifn_p == ifn) {
+			break;
 		}
 	}
-	return (NULL);
+	return (sctp_ifnp);
 }
 
 struct sctp_vrf *
@@ -239,7 +234,7 @@ sctp_free_vrf(struct sctp_vrf *vrf)
 	}
 }
 
-void
+static void
 sctp_free_ifn(struct sctp_ifn *sctp_ifnp)
 {
 	if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&sctp_ifnp->refcount)) {
@@ -253,17 +248,6 @@ sctp_free_ifn(struct sctp_ifn *sctp_ifnp)
 }
 
 void
-sctp_update_ifn_mtu(uint32_t ifn_index, uint32_t mtu)
-{
-	struct sctp_ifn *sctp_ifnp;
-
-	sctp_ifnp = sctp_find_ifn((void *)NULL, ifn_index);
-	if (sctp_ifnp != NULL) {
-		sctp_ifnp->ifn_mtu = mtu;
-	}
-}
-
-void
 sctp_free_ifa(struct sctp_ifa *sctp_ifap)
 {
 	if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&sctp_ifap->refcount)) {
@@ -277,123 +261,30 @@ sctp_free_ifa(struct sctp_ifa *sctp_ifap)
 }
 
 static void
-sctp_delete_ifn(struct sctp_ifn *sctp_ifnp, int hold_addr_lock)
+sctp_delete_ifn(struct sctp_ifn *sctp_ifnp)
 {
-	struct sctp_ifn *found;
 
-	found = sctp_find_ifn(sctp_ifnp->ifn_p, sctp_ifnp->ifn_index);
-	if (found == NULL) {
+	SCTP_IPI_ADDR_WLOCK_ASSERT();
+	if (sctp_find_ifn(sctp_ifnp->ifn_p, sctp_ifnp->ifn_index) == NULL) {
 		/* Not in the list.. sorry */
 		return;
 	}
-	if (hold_addr_lock == 0) {
-		SCTP_IPI_ADDR_WLOCK();
-	} else {
-		SCTP_IPI_ADDR_WLOCK_ASSERT();
-	}
 	LIST_REMOVE(sctp_ifnp, next_bucket);
 	LIST_REMOVE(sctp_ifnp, next_ifn);
-	if (hold_addr_lock == 0) {
-		SCTP_IPI_ADDR_WUNLOCK();
-	}
 	/* Take away the reference, and possibly free it */
 	sctp_free_ifn(sctp_ifnp);
 }
 
-void
-sctp_mark_ifa_addr_down(uint32_t vrf_id, struct sockaddr *addr,
-    const char *if_name, uint32_t ifn_index)
-{
-	struct sctp_vrf *vrf;
-	struct sctp_ifa *sctp_ifap;
-
-	SCTP_IPI_ADDR_RLOCK();
-	vrf = sctp_find_vrf(vrf_id);
-	if (vrf == NULL) {
-		SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id);
-		goto out;
-	}
-	sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED);
-	if (sctp_ifap == NULL) {
-		SCTPDBG(SCTP_DEBUG_PCB4, "Can't find sctp_ifap for address\n");
-		goto out;
-	}
-	if (sctp_ifap->ifn_p == NULL) {
-		SCTPDBG(SCTP_DEBUG_PCB4, "IFA has no IFN - can't mark unusable\n");
-		goto out;
-	}
-	if (if_name) {
-		if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, SCTP_IFNAMSIZ) != 0) {
-			SCTPDBG(SCTP_DEBUG_PCB4, "IFN %s of IFA not the same as %s\n",
-			    sctp_ifap->ifn_p->ifn_name, if_name);
-			goto out;
-		}
-	} else {
-		if (sctp_ifap->ifn_p->ifn_index != ifn_index) {
-			SCTPDBG(SCTP_DEBUG_PCB4, "IFA owned by ifn_index:%d down command for ifn_index:%d - ignored\n",
-			    sctp_ifap->ifn_p->ifn_index, ifn_index);
-			goto out;
-		}
-	}
-
-	sctp_ifap->localifa_flags &= (~SCTP_ADDR_VALID);
-	sctp_ifap->localifa_flags |= SCTP_ADDR_IFA_UNUSEABLE;
-out:
-	SCTP_IPI_ADDR_RUNLOCK();
-}
-
-void
-sctp_mark_ifa_addr_up(uint32_t vrf_id, struct sockaddr *addr,
-    const char *if_name, uint32_t ifn_index)
-{
-	struct sctp_vrf *vrf;
-	struct sctp_ifa *sctp_ifap;
-
-	SCTP_IPI_ADDR_RLOCK();
-	vrf = sctp_find_vrf(vrf_id);
-	if (vrf == NULL) {
-		SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id);
-		goto out;
-	}
-	sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED);
-	if (sctp_ifap == NULL) {
-		SCTPDBG(SCTP_DEBUG_PCB4, "Can't find sctp_ifap for address\n");
-		goto out;
-	}
-	if (sctp_ifap->ifn_p == NULL) {
-		SCTPDBG(SCTP_DEBUG_PCB4, "IFA has no IFN - can't mark unusable\n");
-		goto out;
-	}
-	if (if_name) {
-		if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, SCTP_IFNAMSIZ) != 0) {
-			SCTPDBG(SCTP_DEBUG_PCB4, "IFN %s of IFA not the same as %s\n",
-			    sctp_ifap->ifn_p->ifn_name, if_name);
-			goto out;
-		}
-	} else {
-		if (sctp_ifap->ifn_p->ifn_index != ifn_index) {
-			SCTPDBG(SCTP_DEBUG_PCB4, "IFA owned by ifn_index:%d down command for ifn_index:%d - ignored\n",
-			    sctp_ifap->ifn_p->ifn_index, ifn_index);
-			goto out;
-		}
-	}
-
-	sctp_ifap->localifa_flags &= (~SCTP_ADDR_IFA_UNUSEABLE);
-	sctp_ifap->localifa_flags |= SCTP_ADDR_VALID;
-out:
-	SCTP_IPI_ADDR_RUNLOCK();
-}
-
 /*-
  * Add an ifa to an ifn.
  * Register the interface as necessary.
- * NOTE: ADDR write lock MUST be held.
  */
 static void
 sctp_add_ifa_to_ifn(struct sctp_ifn *sctp_ifnp, struct sctp_ifa *sctp_ifap)
 {
 	int ifa_af;
 
+	SCTP_IPI_ADDR_WLOCK_ASSERT();
 	LIST_INSERT_HEAD(&sctp_ifnp->ifalist, sctp_ifap, next_ifa);
 	sctp_ifap->ifn_p = sctp_ifnp;
 	atomic_add_int(&sctp_ifap->ifn_p->refcount, 1);
@@ -424,11 +315,11 @@ sctp_add_ifa_to_ifn(struct sctp_ifn *sctp_ifnp, struct sctp_ifa *sctp_ifap)
  * Remove an ifa from its ifn.
  * If no more addresses exist, remove the ifn too. Otherwise, re-register
  * the interface based on the remaining address families left.
- * NOTE: ADDR write lock MUST be held.
  */
 static void
 sctp_remove_ifa_from_ifn(struct sctp_ifa *sctp_ifap)
 {
+	SCTP_IPI_ADDR_WLOCK_ASSERT();
 	LIST_REMOVE(sctp_ifap, next_ifa);
 	if (sctp_ifap->ifn_p) {
 		/* update address counts */
@@ -450,7 +341,7 @@ sctp_remove_ifa_from_ifn(struct sctp_ifa *sctp_ifap)
 
 		if (LIST_EMPTY(&sctp_ifap->ifn_p->ifalist)) {
 			/* remove the ifn, possibly freeing it */
-			sctp_delete_ifn(sctp_ifap->ifn_p, SCTP_ADDR_LOCKED);
+			sctp_delete_ifn(sctp_ifap->ifn_p);
 		} else {
 			/* re-register address family type, if needed */
 			if ((sctp_ifap->ifn_p->num_v6 == 0) &&
@@ -479,7 +370,6 @@ sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index,
 	struct sctp_ifalist *hash_addr_head;
 	struct sctp_ifnlist *hash_ifn_head;
 	uint32_t hash_of_addr;
-	int new_ifn_af = 0;
 
 #ifdef SCTP_DEBUG
 	SCTPDBG(SCTP_DEBUG_PCB4, "vrf_id 0x%x: adding address: ", vrf_id);
@@ -543,59 +433,74 @@ sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index,
 		LIST_INSERT_HEAD(hash_ifn_head, sctp_ifnp, next_bucket);
 		LIST_INSERT_HEAD(&vrf->ifnlist, sctp_ifnp, next_ifn);
 		atomic_add_int(&SCTP_BASE_INFO(ipi_count_ifns), 1);
-		new_ifn_af = 1;
 	}
 	sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED);
-	if (sctp_ifap) {
-		/* Hmm, it already exists? */
-		if ((sctp_ifap->ifn_p) &&
-		    (sctp_ifap->ifn_p->ifn_index == ifn_index)) {
-			SCTPDBG(SCTP_DEBUG_PCB4, "Using existing ifn %s (0x%x) for ifa %p\n",
-			    sctp_ifap->ifn_p->ifn_name, ifn_index,
-			    (void *)sctp_ifap);
-			if (new_ifn_af) {
-				/* Remove the created one that we don't want */
-				sctp_delete_ifn(sctp_ifnp, SCTP_ADDR_LOCKED);
-			}
-			if (sctp_ifap->localifa_flags & SCTP_BEING_DELETED) {
-				/* easy to solve, just switch back to active */
-				SCTPDBG(SCTP_DEBUG_PCB4, "Clearing deleted ifa flag\n");
-				sctp_ifap->localifa_flags = SCTP_ADDR_VALID;
-				sctp_ifap->ifn_p = sctp_ifnp;
-				atomic_add_int(&sctp_ifap->ifn_p->refcount, 1);
-			}
-	exit_stage_left:
-			SCTP_IPI_ADDR_WUNLOCK();
-			if (new_sctp_ifnp != NULL) {
-				SCTP_FREE(new_sctp_ifnp, SCTP_M_IFN);
-			}
-			SCTP_FREE(new_sctp_ifap, SCTP_M_IFA);
-			return (sctp_ifap);
-		} else {
-			if (sctp_ifap->ifn_p) {
+	if (sctp_ifap != NULL) {
+		/* The address being added is already or still known. */
+		if (sctp_ifap->ifn_p != NULL) {
+			if (sctp_ifap->ifn_p->ifn_index == ifn_index &&
+			    sctp_ifap->ifn_p->ifn_p == ifn) {
+				SCTPDBG(SCTP_DEBUG_PCB4,
+				    "Using existing ifn %s (0x%x) for ifa %p\n",
+				    sctp_ifap->ifn_p->ifn_name, ifn_index,
+				    (void *)sctp_ifap);
+				if (new_sctp_ifnp == NULL) {
+					/* Remove the created one not used. */
+					sctp_delete_ifn(sctp_ifnp);
+				}
+				if (sctp_ifap->localifa_flags & SCTP_BEING_DELETED) {
+					/* Switch back to active. */
+					SCTPDBG(SCTP_DEBUG_PCB4,
+					    "Clearing deleted ifa flag\n");
+					sctp_ifap->localifa_flags = SCTP_ADDR_VALID;
+#ifdef INET6
+					if (sctp_ifap->address.sa.sa_family == AF_INET6) {
+						sctp_gather_internal_ifa_flags(sctp_ifap);
+					}
+#endif
+					sctp_ifap->ifn_p = sctp_ifnp;
+					atomic_add_int(&sctp_ifap->ifn_p->refcount, 1);
+				}
+			} else {
 				/*
 				 * The last IFN gets the address, remove the
-				 * old one
+				 * old one.
 				 */
-				SCTPDBG(SCTP_DEBUG_PCB4, "Moving ifa %p from %s (0x%x) to %s (0x%x)\n",
-				    (void *)sctp_ifap, sctp_ifap->ifn_p->ifn_name,
+				SCTPDBG(SCTP_DEBUG_PCB4,
+				    "Moving ifa %p from %s (0x%x) to %s (0x%x)\n",
+				    (void *)sctp_ifap,
+				    sctp_ifap->ifn_p->ifn_name,
 				    sctp_ifap->ifn_p->ifn_index, if_name,
 				    ifn_index);
 				/* remove the address from the old ifn */
 				sctp_remove_ifa_from_ifn(sctp_ifap);
 				/* move the address over to the new ifn */
 				sctp_add_ifa_to_ifn(sctp_ifnp, sctp_ifap);
-				goto exit_stage_left;
-			} else {
-				/* repair ifnp which was NULL ? */
-				sctp_ifap->localifa_flags = SCTP_ADDR_VALID;
-				SCTPDBG(SCTP_DEBUG_PCB4, "Repairing ifn %p for ifa %p\n",
-				    (void *)sctp_ifnp, (void *)sctp_ifap);
-				sctp_add_ifa_to_ifn(sctp_ifnp, sctp_ifap);
 			}
-			goto exit_stage_left;
+		} else {
+			/* Repair ifn_p, which was NULL... */
+			sctp_ifap->localifa_flags = SCTP_ADDR_VALID;
+#ifdef INET6
+			if (sctp_ifap->address.sa.sa_family == AF_INET6) {
+				sctp_gather_internal_ifa_flags(sctp_ifap);
+			}
+#endif
+			SCTPDBG(SCTP_DEBUG_PCB4,
+			    "Repairing ifn %p for ifa %p\n",
+			    (void *)sctp_ifnp, (void *)sctp_ifap);
+			sctp_add_ifa_to_ifn(sctp_ifnp, sctp_ifap);
+		}
+		SCTP_IPI_ADDR_WUNLOCK();
+		if (new_sctp_ifnp != NULL) {
+			SCTP_FREE(new_sctp_ifnp, SCTP_M_IFN);
 		}
+		SCTP_FREE(new_sctp_ifap, SCTP_M_IFA);
+		return (sctp_ifap);
 	}
+	KASSERT(sctp_ifnp != NULL,
+	    ("sctp_add_addr_to_vrf: sctp_ifnp == NULL"));
+	KASSERT(sctp_ifap == NULL,
+	    ("sctp_add_addr_to_vrf: sctp_ifap (%p) != NULL", sctp_ifap));
 	sctp_ifap = new_sctp_ifap;
 	memset(sctp_ifap, 0, sizeof(struct sctp_ifa));
 	sctp_ifap->ifn_p = sctp_ifnp;
@@ -605,6 +510,11 @@ sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index,
 	memcpy(&sctp_ifap->address, addr, addr->sa_len);
 	sctp_ifap->localifa_flags = SCTP_ADDR_VALID | SCTP_ADDR_DEFER_USE;
 	sctp_ifap->flags = ifa_flags;
+#ifdef INET6
+	if (addr->sa_family == AF_INET6) {
+		sctp_gather_internal_ifa_flags(sctp_ifap);
+	}
+#endif
 	/* Set scope */
 	switch (sctp_ifap->address.sa.sa_family) {
 #ifdef INET
@@ -621,8 +531,8 @@ sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index,
 				sctp_ifap->src_is_priv = 1;
 			}
 			sctp_ifnp->num_v4++;
-			if (new_ifn_af)
-				new_ifn_af = AF_INET;
+			if (new_sctp_ifnp == NULL)
+				sctp_ifnp->registered_af = AF_INET;
 			break;
 		}
 #endif
@@ -641,13 +551,12 @@ sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index,
 				sctp_ifap->src_is_priv = 1;
 			}
 			sctp_ifnp->num_v6++;
-			if (new_ifn_af)
-				new_ifn_af = AF_INET6;
+			if (new_sctp_ifnp == NULL)
+				sctp_ifnp->registered_af = AF_INET6;
 			break;
 		}
 #endif
 	default:
-		new_ifn_af = 0;
 		break;
 	}
 	hash_of_addr = sctp_get_ifa_hash_val(&sctp_ifap->address.sa);
@@ -663,9 +572,6 @@ sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index,
 	sctp_ifnp->ifa_count++;
 	vrf->total_ifa_count++;
 	atomic_add_int(&SCTP_BASE_INFO(ipi_count_ifas), 1);
-	if (new_ifn_af) {
-		sctp_ifnp->registered_af = new_ifn_af;
-	}
 	SCTP_IPI_ADDR_WUNLOCK();
 	if (new_sctp_ifnp != NULL) {
 		SCTP_FREE(new_sctp_ifnp, SCTP_M_IFN);
@@ -687,8 +593,7 @@ sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index,
 			 */
 			SCTPDBG(SCTP_DEBUG_PCB4, "Lost an address change?\n");
 			/* Opps, must decrement the count */
-			sctp_del_addr_from_vrf(vrf_id, addr, ifn_index,
-			    if_name);
+			sctp_del_addr_from_vrf(vrf_id, addr, ifn, ifn_index);
 			return (NULL);
 		}
 		SCTP_INCR_LADDR_COUNT();
@@ -713,16 +618,17 @@ sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index,
 
 void
 sctp_del_addr_from_vrf(uint32_t vrf_id, struct sockaddr *addr,
-    uint32_t ifn_index, const char *if_name)
+    void *ifn, uint32_t ifn_index)
 {
 	struct sctp_vrf *vrf;
-	struct sctp_ifa *sctp_ifap = NULL;
+	struct sctp_ifa *sctp_ifap;
 
 	SCTP_IPI_ADDR_WLOCK();
 	vrf = sctp_find_vrf(vrf_id);
 	if (vrf == NULL) {
 		SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id);
-		goto out_now;
+		SCTP_IPI_ADDR_WUNLOCK();
+		return;
 	}
 
 #ifdef SCTP_DEBUG
@@ -730,38 +636,21 @@ sctp_del_addr_from_vrf(uint32_t vrf_id, struct sockaddr *addr,
 	SCTPDBG_ADDR(SCTP_DEBUG_PCB4, addr);
 #endif
 	sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED);
-	if (sctp_ifap) {
+	if (sctp_ifap != NULL) {
 		/* Validate the delete */
 		if (sctp_ifap->ifn_p) {
-			int valid = 0;
-
-			/*-
-			 * The name has priority over the ifn_index
-			 * if its given.
-			 */
-			if (if_name) {
-				if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, SCTP_IFNAMSIZ) == 0) {
-					/* They match its a correct delete */
-					valid = 1;
-				}
-			}
-			if (!valid) {
-				/* last ditch check ifn_index */
-				if (ifn_index == sctp_ifap->ifn_p->ifn_index) {
-					valid = 1;
-				}
-			}
-			if (!valid) {
-				SCTPDBG(SCTP_DEBUG_PCB4, "ifn:%d ifname:%s does not match addresses\n",
-				    ifn_index, ((if_name == NULL) ? "NULL" : if_name));
-				SCTPDBG(SCTP_DEBUG_PCB4, "ifn:%d ifname:%s - ignoring delete\n",
-				    sctp_ifap->ifn_p->ifn_index, sctp_ifap->ifn_p->ifn_name);
+			if (ifn_index != sctp_ifap->ifn_p->ifn_index ||
+			    ifn != sctp_ifap->ifn_p->ifn_p) {
+				SCTPDBG(SCTP_DEBUG_PCB4, "ifn:%d (%p) ifname:%s - ignoring delete\n",
+				    sctp_ifap->ifn_p->ifn_index,
+				    sctp_ifap->ifn_p->ifn_p,
+				    sctp_ifap->ifn_p->ifn_name);
 				SCTP_IPI_ADDR_WUNLOCK();
 				return;
 			}
 		}
 		SCTPDBG(SCTP_DEBUG_PCB4, "Deleting ifa %p\n", (void *)sctp_ifap);
-		sctp_ifap->localifa_flags &= SCTP_ADDR_VALID;
+		sctp_ifap->localifa_flags &= ~SCTP_ADDR_VALID;
 		/*
 		 * We don't set the flag. This means that the structure will
 		 * hang around in EP's that have bound specific to it until
@@ -778,13 +667,12 @@ sctp_del_addr_from_vrf(uint32_t vrf_id, struct sockaddr *addr,
 	else {
 		SCTPDBG(SCTP_DEBUG_PCB4, "Del Addr-ifn:%d Could not find address:",
 		    ifn_index);
-		SCTPDBG_ADDR(SCTP_DEBUG_PCB1, addr);
+		SCTPDBG_ADDR(SCTP_DEBUG_PCB4, addr);
 	}
 #endif
 
-out_now:
 	SCTP_IPI_ADDR_WUNLOCK();
-	if (sctp_ifap) {
+	if (sctp_ifap != NULL) {
 		struct sctp_laddr *wi;
 
 		wi = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr);
@@ -2570,7 +2458,7 @@ sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id)
 
 	/* Setup the initial secret */
 	(void)SCTP_GETTIME_TIMEVAL(&time);
-	m->time_of_secret_change = (unsigned int)time.tv_sec;
+	m->time_of_secret_change = time.tv_sec;
 
 	for (i = 0; i < SCTP_NUMBER_OF_SECRETS; i++) {
 		m->secret_key[0][i] = sctp_select_initial_TSN(m);
@@ -3177,7 +3065,7 @@ continue_anyway:
 			/* GAK, more FIXME IFA lock? */
 			if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
 				/* Can't bind a non-existent addr. */
-				error = EINVAL;
+				error = EADDRNOTAVAIL;
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error);
 				goto out;
 			}
@@ -4257,8 +4145,7 @@ sctp_aloc_assoc_locked(struct sctp_inpcb *inp, struct sockaddr *firstaddr,
 
 			sin = (struct sockaddr_in *)firstaddr;
 			if ((ntohs(sin->sin_port) == 0) ||
-			    (sin->sin_addr.s_addr == INADDR_ANY) ||
-			    (sin->sin_addr.s_addr == INADDR_BROADCAST) ||
+			    in_broadcast(sin->sin_addr) ||
 			    IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) ||
 			    ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
 			    (SCTP_IPV6_V6ONLY(inp) != 0))) {
@@ -4360,7 +4247,6 @@ sctp_aloc_assoc_locked(struct sctp_inpcb *inp, struct sockaddr *firstaddr,
 		LIST_REMOVE(stcb, sctp_asocs);
 		LIST_REMOVE(stcb, sctp_tcbasocidhash);
 		SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb);
-		SCTP_INP_WUNLOCK(inp);
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS);
 		*error = ENOBUFS;
 		return (NULL);
@@ -4541,7 +4427,7 @@ sctp_del_remote_addr(struct sctp_tcb *stcb, struct sockaddr *remaddr)
 }
 
 static bool
-sctp_is_in_timewait(uint32_t tag, uint16_t lport, uint16_t rport, uint32_t now)
+sctp_is_in_timewait(uint32_t tag, uint16_t lport, uint16_t rport, time_t now)
 {
 	struct sctpvtaghead *chain;
 	struct sctp_tagblock *twait_block;
@@ -4563,7 +4449,7 @@ sctp_is_in_timewait(uint32_t tag, uint16_t lport, uint16_t rport, uint32_t now)
 }
 
 static void
-sctp_set_vtag_block(struct sctp_timewait *vtag_block, uint32_t time,
+sctp_set_vtag_block(struct sctp_timewait *vtag_block, time_t time,
     uint32_t tag, uint16_t lport, uint16_t rport)
 {
 	vtag_block->tv_sec_at_expire = time;
@@ -4578,13 +4464,13 @@ sctp_add_vtag_to_timewait(uint32_t tag, uint16_t lport, uint16_t rport)
 	struct sctpvtaghead *chain;
 	struct sctp_tagblock *twait_block;
 	struct timeval now;
-	uint32_t time;
+	time_t time;
 	int i;
 	bool set;
 
 	SCTP_INP_INFO_WLOCK_ASSERT();
 	(void)SCTP_GETTIME_TIMEVAL(&now);
-	time = (uint32_t)now.tv_sec + SCTP_BASE_SYSCTL(sctp_vtag_time_wait);
+	time = now.tv_sec + SCTP_BASE_SYSCTL(sctp_vtag_time_wait);
 	chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
 	set = false;
 	LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
@@ -4596,7 +4482,7 @@ sctp_add_vtag_to_timewait(uint32_t tag, uint16_t lport, uint16_t rport)
 				continue;
 			}
 			if ((twait_block->vtag_block[i].v_tag != 0) &&
-			    (twait_block->vtag_block[i].tv_sec_at_expire < (uint32_t)now.tv_sec)) {
+			    (twait_block->vtag_block[i].tv_sec_at_expire < now.tv_sec)) {
 				if (set) {
 					/* Audit expires this guy */
 					sctp_set_vtag_block(twait_block->vtag_block + i, 0, 0, 0, 0);
@@ -6136,8 +6022,7 @@ sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
 					/* Skip multi-cast addresses */
 					goto next_param;
 				}
-				if ((sin.sin_addr.s_addr == INADDR_BROADCAST) ||
-				    (sin.sin_addr.s_addr == INADDR_ANY)) {
+				if (in_broadcast(sin.sin_addr)) {
 					goto next_param;
 				}
 				sa = (struct sockaddr *)&sin;
@@ -6745,7 +6630,7 @@ sctp_is_vtag_good(uint32_t tag, uint16_t lport, uint16_t rport, struct timeval *
 			return (false);
 		}
 	}
-	return (!sctp_is_in_timewait(tag, lport, rport, (uint32_t)now->tv_sec));
+	return (!sctp_is_in_timewait(tag, lport, rport, now->tv_sec));
 }
 
 static void
@@ -6952,7 +6837,7 @@ sctp_drain_mbufs(struct sctp_tcb *stcb)
 }
 
 static void
-sctp_drain(void)
+sctp_drain(void *arg __unused, int flags __unused)
 {
 	struct epoch_tracker et;
 
diff --git a/sys/netinet/sctp_pcb.h b/sys/netinet/sctp_pcb.h
index e57e13654073..2bec2bc32d4e 100644
--- a/sys/netinet/sctp_pcb.h
+++ b/sys/netinet/sctp_pcb.h
@@ -130,7 +130,7 @@ struct sctp_block_entry {
 };
 
 struct sctp_timewait {
-	uint32_t tv_sec_at_expire;	/* the seconds from boot to expire */
+	time_t tv_sec_at_expire;	/* the seconds from boot to expire */
 	uint32_t v_tag;		/* the vtag that can not be reused */
 	uint16_t lport;		/* the local port used in vtag */
 	uint16_t rport;		/* the remote port used in vtag */
@@ -263,8 +263,8 @@ struct sctp_base_info {
  * access /dev/random.
  */
 struct sctp_pcb {
-	unsigned int time_of_secret_change;	/* number of seconds from
-						 * timeval.tv_sec */
+	time_t time_of_secret_change;	/* number of seconds from
+					 * timeval.tv_sec */
 	uint32_t secret_key[SCTP_HOW_MANY_SECRETS][SCTP_NUMBER_OF_SECRETS];
 	unsigned int size_of_a_cookie;
 
@@ -487,18 +487,6 @@ struct sctp_vrf *sctp_allocate_vrf(int vrfid);
 struct sctp_vrf *sctp_find_vrf(uint32_t vrfid);
 void sctp_free_vrf(struct sctp_vrf *vrf);
 
-/*-
- * Change address state, can be used if
- * O/S supports telling transports about
- * changes to IFA/IFN's (link layer triggers).
- * If a ifn goes down, we will do src-addr-selection
- * and NOT use that, as a source address. This does
- * not stop the routing system from routing out
- * that interface, but we won't put it as a source.
- */
-void sctp_mark_ifa_addr_down(uint32_t vrf_id, struct sockaddr *addr, const char *if_name, uint32_t ifn_index);
-void sctp_mark_ifa_addr_up(uint32_t vrf_id, struct sockaddr *addr, const char *if_name, uint32_t ifn_index);
-
 struct sctp_ifa *
 sctp_add_addr_to_vrf(uint32_t vrfid,
     void *ifn, uint32_t ifn_index, uint32_t ifn_type,
@@ -506,14 +494,11 @@ sctp_add_addr_to_vrf(uint32_t vrfid,
     void *ifa, struct sockaddr *addr, uint32_t ifa_flags,
     int dynamic_add);
 
-void sctp_update_ifn_mtu(uint32_t ifn_index, uint32_t mtu);
-
-void sctp_free_ifn(struct sctp_ifn *sctp_ifnp);
 void sctp_free_ifa(struct sctp_ifa *sctp_ifap);
 
 void
 sctp_del_addr_from_vrf(uint32_t vrfid, struct sockaddr *addr,
-    uint32_t ifn_index, const char *if_name);
+    void *ifn, uint32_t ifn_index);
 
 struct sctp_nets *sctp_findnet(struct sctp_tcb *, struct sockaddr *);
 
diff --git a/sys/netinet/sctp_syscalls.c b/sys/netinet/sctp_syscalls.c
index d67e260b6f99..9d85576e2592 100644
--- a/sys/netinet/sctp_syscalls.c
+++ b/sys/netinet/sctp_syscalls.c
@@ -141,13 +141,14 @@ sys_sctp_peeloff(struct thread *td, struct sctp_peeloff_args *uap)
 {
 	struct file *headfp, *nfp = NULL;
 	struct socket *head, *so;
+	struct filecaps fcaps;
 	cap_rights_t rights;
 	u_int fflag;
 	int error, fd;
 
 	AUDIT_ARG_FD(uap->sd);
-	error = getsock(td, uap->sd, cap_rights_init_one(&rights, CAP_PEELOFF),
-	    &headfp);
+	error = getsock_cap(td, uap->sd,
+	    cap_rights_init_one(&rights, CAP_PEELOFF), &headfp, &fcaps);
 	if (error != 0)
 		goto done2;
 	fflag = atomic_load_int(&headfp->f_flag);
@@ -165,7 +166,7 @@ sys_sctp_peeloff(struct thread *td, struct sctp_peeloff_args *uap)
 	 * but that is ok.
 	 */
 
-	error = falloc(td, &nfp, &fd, 0);
+	error = falloc_caps(td, &nfp, &fd, 0, &fcaps);
 	if (error != 0)
 		goto done;
 	td->td_retval[0] = fd;
diff --git a/sys/netinet/sctp_sysctl.c b/sys/netinet/sctp_sysctl.c
index a4be3471e2fd..bd2f23f40727 100644
--- a/sys/netinet/sctp_sysctl.c
+++ b/sys/netinet/sctp_sysctl.c
@@ -265,6 +265,10 @@ sctp_sysctl_copy_out_local_addresses(struct sctp_inpcb *inp, struct sctp_tcb *st
 					if (sctp_is_addr_restricted(stcb, sctp_ifa)) {
 						continue;
 					}
+				} else {
+					if (sctp_ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+						continue;
+					}
 				}
 				switch (sctp_ifa->address.sa.sa_family) {
 #ifdef INET
@@ -894,7 +898,7 @@ sctp_sysctl_handle_trace_log_clear(SYSCTL_HANDLER_ARGS)
 		return (error);						\
 	}								\
 	SYSCTL_PROC(_net_inet_sctp, OID_AUTO, mib_name, flags, NULL, 0,	\
-	    sctp_sysctl_handle_##mib_name, "UI", prefix##_DESC)
+	    sctp_sysctl_handle_##mib_name, "IU", prefix##_DESC)
 
 #define SCTP_UINT_SYSCTL_RDTUN(mib_name, var_name, prefix)		\
 	SYSCTL_UINT(_net_inet_sctp, OID_AUTO, mib_name,			\
diff --git a/sys/netinet/sctp_usrreq.c b/sys/netinet/sctp_usrreq.c
index 3b0da87edce3..94d57225c20b 100644
--- a/sys/netinet/sctp_usrreq.c
+++ b/sys/netinet/sctp_usrreq.c
@@ -361,8 +361,9 @@ sctp_getcred(SYSCTL_HANDLER_ARGS)
 	/* FIX, for non-bsd is this right? */
 	vrf_id = SCTP_DEFAULT_VRFID;
 
+	if (req->newptr == NULL)
+		return (EINVAL);
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
-
 	if (error)
 		return (error);
 
@@ -843,8 +844,10 @@ sctp_shutdown(struct socket *so, enum shutdown_how how)
 				}
 				sctp_free_a_readq(stcb, control);
 			} else {
-				stcb->asoc.size_on_all_streams +=
-				    control->length;
+				if (stcb != NULL) {
+					stcb->asoc.size_on_all_streams +=
+					    control->length;
+				}
 			}
 		}
 		SOCK_UNLOCK(so);
@@ -7514,7 +7517,6 @@ sctp_peeraddr(struct socket *so, struct sockaddr *sa)
 	.pr_control =	in_control,				\
 	.pr_close =	sctp_close,				\
 	.pr_detach =	sctp_close,				\
-	.pr_sopoll =	sopoll_generic,				\
 	.pr_disconnect = sctp_disconnect,			\
 	.pr_listen =	sctp_listen,				\
 	.pr_peeraddr =	sctp_peeraddr,				\
diff --git a/sys/netinet/sctputil.c b/sys/netinet/sctputil.c
index b23efd9c8968..ddfa71d5c7ed 100644
--- a/sys/netinet/sctputil.c
+++ b/sys/netinet/sctputil.c
@@ -1944,7 +1944,7 @@ sctp_timeout_handler(void *t)
 		    type, inp, stcb, net));
 		SCTP_STAT_INCR(sctps_timosecret);
 		(void)SCTP_GETTIME_TIMEVAL(&tv);
-		inp->sctp_ep.time_of_secret_change = (unsigned int)tv.tv_sec;
+		inp->sctp_ep.time_of_secret_change = tv.tv_sec;
 		inp->sctp_ep.last_secret_number =
 		    inp->sctp_ep.current_secret_number;
 		inp->sctp_ep.current_secret_number++;
@@ -2289,19 +2289,19 @@ sctp_timer_start(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
 		} else {
 			to_ticks = net->RTO;
 		}
-		rndval = sctp_select_initial_TSN(&inp->sctp_ep);
-		jitter = rndval % to_ticks;
-		if (to_ticks > 1) {
-			to_ticks >>= 1;
-		}
-		if (jitter < (UINT32_MAX - to_ticks)) {
-			to_ticks += jitter;
-		} else {
-			to_ticks = UINT32_MAX;
-		}
 		if (!((net->dest_state & SCTP_ADDR_UNCONFIRMED) &&
 		    (net->dest_state & SCTP_ADDR_REACHABLE)) &&
 		    ((net->dest_state & SCTP_ADDR_PF) == 0)) {
+			if (to_ticks > 1) {
+				rndval = sctp_select_initial_TSN(&inp->sctp_ep);
+				jitter = rndval % to_ticks;
+				to_ticks >>= 1;
+				if (jitter < (UINT32_MAX - to_ticks)) {
+					to_ticks += jitter;
+				} else {
+					to_ticks = UINT32_MAX;
+				}
+			}
 			if (net->heart_beat_delay < (UINT32_MAX - to_ticks)) {
 				to_ticks += net->heart_beat_delay;
 			} else {
@@ -6634,8 +6634,7 @@ sctp_connectx_helper_add(struct sctp_tcb *stcb, struct sockaddr *addr,
 		case AF_INET:
 			incr = sizeof(struct sockaddr_in);
 			sin = (struct sockaddr_in *)sa;
-			if ((sin->sin_addr.s_addr == INADDR_ANY) ||
-			    (sin->sin_addr.s_addr == INADDR_BROADCAST) ||
+			if (in_broadcast(sin->sin_addr) ||
 			    IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
 				SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
diff --git a/sys/netinet/siftr.c b/sys/netinet/siftr.c
index bf0cdc2ac4cc..374b5595fcbc 100644
--- a/sys/netinet/siftr.c
+++ b/sys/netinet/siftr.c
@@ -273,6 +273,7 @@ static struct mtx siftr_pkt_queue_mtx;
 static struct mtx siftr_pkt_mgr_mtx;
 static struct thread *siftr_pkt_manager_thr = NULL;
 static char direction[2] = {'i','o'};
+static eventhandler_tag siftr_shutdown_tag;
 
 /* Required function prototypes. */
 static int siftr_sysctl_enabled_handler(SYSCTL_HANDLER_ARGS);
@@ -596,9 +597,6 @@ siftr_findinpcb(int ipver, struct ip *ip, struct mbuf *m, uint16_t sport,
 {
 	struct inpcb *inp;
 
-	/* We need the tcbinfo lock. */
-	INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
-
 	if (dir == PFIL_IN)
 		inp = (ipver == INP_IPV4 ?
 		    in_pcblookup(&V_tcbinfo, ip->ip_src, sport, ip->ip_dst,
@@ -1310,6 +1308,7 @@ static int
 deinit_siftr(void)
 {
 	/* Cleanup. */
+	EVENTHANDLER_DEREGISTER(shutdown_pre_sync, siftr_shutdown_tag);
 	siftr_manage_ops(SIFTR_DISABLE);
 	hashdestroy(counter_hash, M_SIFTR, siftr_hashmask);
 	mtx_destroy(&siftr_pkt_queue_mtx);
@@ -1324,8 +1323,8 @@ deinit_siftr(void)
 static int
 init_siftr(void)
 {
-	EVENTHANDLER_REGISTER(shutdown_pre_sync, siftr_shutdown_handler, NULL,
-	    SHUTDOWN_PRI_FIRST);
+	siftr_shutdown_tag = EVENTHANDLER_REGISTER(shutdown_pre_sync,
+	    siftr_shutdown_handler, NULL, SHUTDOWN_PRI_FIRST);
 
 	/* Initialise our flow counter hash table. */
 	counter_hash = hashinit(SIFTR_EXPECTED_MAX_TCP_FLOWS, M_SIFTR,
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
index c97a3e04d9b6..41a49b318cd5 100644
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -71,13 +71,13 @@ struct tcphdr {
 #define	TH_RES3	0x200
 #define	TH_RES2	0x400
 #define	TH_RES1	0x800
-#define	TH_FLAGS	(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|TH_ECE|TH_CWR)
+#define	TH_FLAGS	(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|TH_ECE|TH_CWR|TH_AE)
 #define	PRINT_TH_FLAGS	"\20\1FIN\2SYN\3RST\4PUSH\5ACK\6URG\7ECE\10CWR\11AE"
 
 	u_short	th_win;			/* window */
 	u_short	th_sum;			/* checksum */
 	u_short	th_urp;			/* urgent pointer */
-};
+} __packed;
 
 static __inline uint16_t
 __tcp_get_flags(const struct tcphdr *th)
@@ -166,8 +166,6 @@ __tcp_set_flags(struct tcphdr *th, uint16_t flags)
 
 #define TCP_MAX_WINSHIFT	14	/* maximum window shift */
 
-#define TCP_MAXBURST		4	/* maximum segments in a burst */
-
 #define TCP_MAXHLEN	(0xf<<2)	/* max length of header in bytes */
 #define TCP_MAXOLEN	(TCP_MAXHLEN - sizeof(struct tcphdr))
 					/* max space left for options */
@@ -182,176 +180,169 @@ __tcp_set_flags(struct tcphdr *th, uint16_t flags)
  * values and are not masked together.  Some values appear to be
  * bitmasks for historical reasons.
  */
-#define	TCP_NODELAY	1	/* don't delay send to coalesce packets */
+#define	TCP_NODELAY		1	/* don't delay send to coalesce packets */
 #if __BSD_VISIBLE
-#define	TCP_MAXSEG	2	/* set maximum segment size */
-#define TCP_NOPUSH	4	/* don't push last block of write */
-#define TCP_NOOPT	8	/* don't use TCP options */
-#define TCP_MD5SIG	16	/* use MD5 digests (RFC2385) */
-#define	TCP_INFO	32	/* retrieve tcp_info structure */
-#define	TCP_STATS	33	/* retrieve stats blob structure */
-#define	TCP_LOG		34	/* configure event logging for connection */
-#define	TCP_LOGBUF	35	/* retrieve event log for connection */
-#define	TCP_LOGID	36	/* configure log ID to correlate connections */
-#define	TCP_LOGDUMP	37	/* dump connection log events to device */
-#define	TCP_LOGDUMPID	38	/* dump events from connections with same ID to
-				   device */
-#define	TCP_TXTLS_ENABLE 39	/* TLS framing and encryption for transmit */
-#define	TCP_TXTLS_MODE	40	/* Transmit TLS mode */
-#define	TCP_RXTLS_ENABLE 41	/* TLS framing and encryption for receive */
-#define	TCP_RXTLS_MODE	42	/* Receive TLS mode */
-#define	TCP_IWND_NB	43	/* Override initial window (units: bytes) */
-#define	TCP_IWND_NSEG	44	/* Override initial window (units: MSS segs) */
+#define	TCP_MAXSEG		2	/* set maximum segment size */
+#define	TCP_NOPUSH		4	/* don't push last block of write */
+#define	TCP_NOOPT		8	/* don't use TCP options */
+#define	TCP_MD5SIG		16	/* use MD5 digests (RFC2385) */
+#define	TCP_INFO		32	/* retrieve tcp_info structure */
+#define	TCP_STATS		33	/* retrieve stats blob structure */
+#define	TCP_LOG			34	/* configure event logging for connection */
+#define	TCP_LOGBUF		35	/* retrieve event log for connection */
+#define	TCP_LOGID		36	/* configure log ID to correlate connections */
+#define	TCP_LOGDUMP		37	/* dump connection log events to device */
+#define	TCP_LOGDUMPID		38	/* dump events from connections with same ID to
+					   device */
+#define	TCP_TXTLS_ENABLE	39	/* TLS framing and encryption for transmit */
+#define	TCP_TXTLS_MODE		40	/* Transmit TLS mode */
+#define	TCP_RXTLS_ENABLE	41	/* TLS framing and encryption for receive */
+#define	TCP_RXTLS_MODE		42	/* Receive TLS mode */
+#define	TCP_IWND_NB		43	/* Override initial window (units: bytes) */
+#define	TCP_IWND_NSEG		44	/* Override initial window (units: MSS segs) */
 #ifdef _KERNEL
-#define	TCP_USE_DDP	45	/* Use direct data placement for so_rcvbuf */
+#define	TCP_USE_DDP		45	/* Use direct data placement for so_rcvbuf */
 #endif
-#define	TCP_LOGID_CNT	46	/* get number of connections with the same ID */
-#define	TCP_LOG_TAG	47	/* configure tag for grouping logs */
-#define	TCP_USER_LOG	48	/* userspace log event */
-#define	TCP_CONGESTION	64	/* get/set congestion control algorithm */
-#define	TCP_CCALGOOPT	65	/* get/set cc algorithm specific options */
-#define	TCP_MAXUNACKTIME 68	/* maximum time without making progress (sec) */
-#define	TCP_MAXPEAKRATE 69	/* maximum peak rate allowed (kbps) */
-#define TCP_IDLE_REDUCE 70	/* Reduce cwnd on idle input */
-#define TCP_REMOTE_UDP_ENCAPS_PORT 71	/* Enable TCP over UDP tunneling via the specified port */
-#define TCP_DELACK  	72	/* socket option for delayed ack */
-#define TCP_FIN_IS_RST 73	/* A fin from the peer is treated has a RST */
-#define TCP_LOG_LIMIT  74	/* Limit to number of records in tcp-log */
-#define TCP_SHARED_CWND_ALLOWED 75 	/* Use of a shared cwnd is allowed */
-#define TCP_PROC_ACCOUNTING 76	/* Do accounting on tcp cpu usage and counts */
-#define TCP_USE_CMP_ACKS 77 	/* The transport can handle the Compressed mbuf acks */
-#define	TCP_PERF_INFO	78	/* retrieve accounting counters */
-#define	TCP_KEEPINIT	128	/* N, time to establish connection */
-#define	TCP_KEEPIDLE	256	/* L,N,X start keeplives after this period */
-#define	TCP_KEEPINTVL	512	/* L,N interval between keepalives */
-#define	TCP_KEEPCNT	1024	/* L,N number of keepalives before close */
-#define	TCP_FASTOPEN	1025	/* enable TFO / was created via TFO */
-#define	TCP_PCAP_OUT	2048	/* number of output packets to keep */
-#define	TCP_PCAP_IN	4096	/* number of input packets to keep */
-#define TCP_FUNCTION_BLK 8192	/* Set the tcp function pointers to the specified stack */
-#define TCP_FUNCTION_ALIAS 8193	/* Get the current tcp function pointer name alias */
+#define	TCP_LOGID_CNT		46	/* get number of connections with the same ID */
+#define	TCP_LOG_TAG		47	/* configure tag for grouping logs */
+#define	TCP_USER_LOG		48	/* userspace log event */
+#define	TCP_CONGESTION		64	/* get/set congestion control algorithm */
+#define	TCP_CCALGOOPT		65	/* get/set cc algorithm specific options */
+#define	TCP_MAXUNACKTIME	68	/* maximum time without making progress (sec) */
+/* unused			69	*/
+#define	TCP_IDLE_REDUCE		70	/* Reduce cwnd on idle input */
+#define	TCP_REMOTE_UDP_ENCAPS_PORT 71	/* Enable TCP over UDP tunneling via the specified port */
+#define	TCP_DELACK		72	/* socket option for delayed ack */
+#define	TCP_FIN_IS_RST		73	/* A fin from the peer is treated has a RST */
+#define	TCP_LOG_LIMIT		74	/* Limit to number of records in tcp-log */
+#define	TCP_SHARED_CWND_ALLOWED	75	/* Use of a shared cwnd is allowed */
+#define	TCP_PROC_ACCOUNTING	76	/* Do accounting on tcp cpu usage and counts */
+#define	TCP_USE_CMP_ACKS	77	/* The transport can handle the Compressed mbuf acks */
+#define	TCP_PERF_INFO		78	/* retrieve accounting counters */
+#define	TCP_KEEPINIT		128	/* N, time to establish connection */
+#define	TCP_KEEPIDLE		256	/* L,N,X start keeplives after this period */
+#define	TCP_KEEPINTVL		512	/* L,N interval between keepalives */
+#define	TCP_KEEPCNT		1024	/* L,N number of keepalives before close */
+#define	TCP_FASTOPEN		1025	/* enable TFO / was created via TFO */
+/* unused			2048	   was TCP_PCAP_OUT */
+/* unused			4096	   was TCP_PCAP_IN */
+#define	TCP_FUNCTION_BLK	8192	/* Set the tcp function pointers to the specified stack */
+#define	TCP_FUNCTION_ALIAS	8193	/* Get the current tcp function pointer name alias */
 /* Options for Rack and BBR */
-#define	TCP_REUSPORT_LB_NUMA   1026	/* set listen socket numa domain */
-#define TCP_RACK_MBUF_QUEUE   1050 /* Do we allow mbuf queuing if supported */
-#define TCP_RACK_PROP	      1051 /* Not used */
-#define TCP_RACK_TLP_REDUCE   1052 /* RACK TLP cwnd reduction (bool) */
-#define TCP_RACK_PACE_REDUCE  1053 /* RACK Pacingv reduction factor (divisor) */
-#define TCP_RACK_PACE_MAX_SEG 1054 /* Max TSO size we will send  */
-#define TCP_RACK_PACE_ALWAYS  1055 /* Use the always pace method */
-#define TCP_RACK_PROP_RATE    1056 /* Not used */
-#define TCP_RACK_PRR_SENDALOT 1057 /* Allow PRR to send more than one seg */
-#define TCP_RACK_MIN_TO       1058 /* Minimum time between rack t-o's in ms */
-#define TCP_RACK_EARLY_RECOV  1059 /* Not used */
-#define TCP_RACK_EARLY_SEG    1060 /* If early recovery max segments */
-#define TCP_RACK_REORD_THRESH 1061 /* RACK reorder threshold (shift amount) */
-#define TCP_RACK_REORD_FADE   1062 /* Does reordering fade after ms time */
-#define TCP_RACK_TLP_THRESH   1063 /* RACK TLP theshold i.e. srtt+(srtt/N) */
-#define TCP_RACK_PKT_DELAY    1064 /* RACK added ms i.e. rack-rtt + reord + N */
-#define TCP_RACK_TLP_INC_VAR  1065 /* Does TLP include rtt variance in t-o */
-#define TCP_BBR_IWINTSO	      1067 /* Initial TSO window for BBRs first sends */
-#define TCP_BBR_RECFORCE      1068 /* Enter recovery force out a segment disregard pacer no longer valid */
-#define TCP_BBR_STARTUP_PG    1069 /* Startup pacing gain */
-#define TCP_BBR_DRAIN_PG      1070 /* Drain pacing gain */
-#define TCP_BBR_RWND_IS_APP   1071 /* Rwnd limited is considered app limited */
-#define TCP_BBR_PROBE_RTT_INT 1072 /* How long in useconds between probe-rtt */
-#define TCP_BBR_ONE_RETRAN    1073 /* Is only one segment allowed out during retran */
-#define TCP_BBR_STARTUP_LOSS_EXIT 1074	/* Do we exit a loss during startup if not 20% incr */
-#define TCP_BBR_USE_LOWGAIN   1075 /* lower the gain in PROBE_BW enable */
-#define TCP_BBR_LOWGAIN_THRESH 1076 /* Unused after 2.3 morphs to TSLIMITS >= 2.3 */
-#define TCP_BBR_TSLIMITS 1076	   /* Do we use experimental Timestamp limiting for our algo */
-#define TCP_BBR_LOWGAIN_HALF  1077 /* Unused after 2.3 */
-#define TCP_BBR_PACE_OH        1077 /* Reused in 4.2 for pacing overhead setting */
-#define TCP_BBR_LOWGAIN_FD    1078 /* Unused after 2.3 */
-#define TCP_BBR_HOLD_TARGET 1078	/* For 4.3 on */
-#define TCP_BBR_USEDEL_RATE   1079 /* Enable use of delivery rate for loss recovery */
-#define TCP_BBR_MIN_RTO       1080 /* Min RTO in milliseconds */
-#define TCP_BBR_MAX_RTO	      1081 /* Max RTO in milliseconds */
-#define TCP_BBR_REC_OVER_HPTS 1082 /* Recovery override htps settings 0/1/3 */
-#define TCP_BBR_UNLIMITED     1083 /* Not used before 2.3 and morphs to algorithm >= 2.3 */
-#define TCP_BBR_ALGORITHM     1083 /* What measurement algo does BBR use netflix=0, google=1 */
-#define TCP_BBR_DRAIN_INC_EXTRA 1084 /* Does the 3/4 drain target include the extra gain */
-#define TCP_BBR_STARTUP_EXIT_EPOCH 1085 /* what epoch gets us out of startup */
-#define TCP_BBR_PACE_PER_SEC   1086
-#define TCP_BBR_PACE_DEL_TAR   1087
-#define TCP_BBR_PACE_SEG_MAX   1088
-#define TCP_BBR_PACE_SEG_MIN   1089
-#define TCP_BBR_PACE_CROSS     1090
-#define TCP_RACK_IDLE_REDUCE_HIGH 1092  /* Reduce the highest cwnd seen to IW on idle */
-#define TCP_RACK_MIN_PACE      1093 	/* Do we enforce rack min pace time */
-#define TCP_RACK_MIN_PACE_SEG  1094	/* If so what is the seg threshould */
-#define TCP_RACK_GP_INCREASE   1094	/* After 4.1 its the GP increase in older rack */
-#define TCP_RACK_TLP_USE       1095
-#define TCP_BBR_ACK_COMP_ALG   1096 	/* Not used */
-#define TCP_BBR_TMR_PACE_OH    1096	/* Recycled in 4.2 */
-#define TCP_BBR_EXTRA_GAIN     1097
-#define TCP_RACK_DO_DETECTION  1097	/* Recycle of extra gain for rack, attack detection */
-#define TCP_BBR_RACK_RTT_USE   1098	/* what RTT should we use 0, 1, or 2? */
-#define TCP_BBR_RETRAN_WTSO    1099
-#define TCP_DATA_AFTER_CLOSE   1100
-#define TCP_BBR_PROBE_RTT_GAIN 1101
-#define TCP_BBR_PROBE_RTT_LEN  1102
-#define TCP_BBR_SEND_IWND_IN_TSO 1103	/* Do we burst out whole iwin size chunks at start? */
-#define TCP_BBR_USE_RACK_RR	 1104	/* Do we use the rack rapid recovery for pacing rxt's */
-#define TCP_BBR_USE_RACK_CHEAT TCP_BBR_USE_RACK_RR /* Compat. */
-#define TCP_BBR_HDWR_PACE      1105	/* Enable/disable hardware pacing */
-#define TCP_BBR_UTTER_MAX_TSO  1106	/* Do we enforce an utter max TSO size */
-#define TCP_BBR_EXTRA_STATE    1107	/* Special exit-persist catch up */
-#define TCP_BBR_FLOOR_MIN_TSO  1108     /* The min tso size */
-#define TCP_BBR_MIN_TOPACEOUT  1109	/* Do we suspend pacing until */
-#define TCP_BBR_TSTMP_RAISES   1110	/* Can a timestamp measurement raise the b/w */
-#define TCP_BBR_POLICER_DETECT 1111	/* Turn on/off google mode policer detection */
-#define TCP_BBR_RACK_INIT_RATE 1112	/* Set an initial pacing rate for when we have no b/w in kbits per sec */
-#define TCP_RACK_RR_CONF	1113 /* Rack rapid recovery configuration control*/
-#define TCP_RACK_CHEAT_NOT_CONF_RATE TCP_RACK_RR_CONF
-#define TCP_RACK_GP_INCREASE_CA   1114	/* GP increase for Congestion Avoidance */
-#define TCP_RACK_GP_INCREASE_SS   1115	/* GP increase for Slow Start */
-#define TCP_RACK_GP_INCREASE_REC  1116	/* GP increase for Recovery */
-#define TCP_RACK_FORCE_MSEG	1117	/* Override to use the user set max-seg value */
-#define TCP_RACK_PACE_RATE_CA  1118 /* Pacing rate for Congestion Avoidance */
-#define TCP_RACK_PACE_RATE_SS  1119 /* Pacing rate for Slow Start */
-#define TCP_RACK_PACE_RATE_REC  1120 /* Pacing rate for Recovery */
-#define TCP_NO_PRR         	1122 /* If pacing, don't use prr  */
-#define TCP_RACK_NONRXT_CFG_RATE 1123 /* In recovery does a non-rxt use the cfg rate */
-#define TCP_SHARED_CWND_ENABLE   1124 	/* Use a shared cwnd if allowed */
-#define TCP_TIMELY_DYN_ADJ       1125 /* Do we attempt dynamic multipler adjustment with timely. */
-#define TCP_RACK_NO_PUSH_AT_MAX 1126 /* For timely do not push if we are over max rtt */
-#define TCP_RACK_PACE_TO_FILL 1127 /* If we are not in recovery, always pace to fill the cwnd in 1 RTT */
-#define TCP_SHARED_CWND_TIME_LIMIT 1128 /* we should limit to low time values the scwnd life */
-#define TCP_RACK_PROFILE 1129	/* Select a profile that sets multiple options */
-#define TCP_HDWR_RATE_CAP 1130 /* Allow hardware rates to cap pacing rate */
-#define TCP_PACING_RATE_CAP 1131 /* Highest rate allowed in pacing in bytes per second (uint64_t) */
-#define TCP_HDWR_UP_ONLY 1132	/* Allow the pacing rate to climb but not descend (with the exception of fill-cw */
-#define TCP_RACK_ABC_VAL 1133	/* Set a local ABC value different then the system default */
-#define TCP_REC_ABC_VAL 1134	/* Do we use the ABC value for recovery or the override one from sysctl  */
-#define TCP_RACK_MEASURE_CNT 1135 /* How many measurements are required in GP pacing */
-#define TCP_DEFER_OPTIONS 1136 /* Defer options until the proper number of measurements occur, does not defer TCP_RACK_MEASURE_CNT */
-#define TCP_FAST_RSM_HACK 1137	/* Not used in modern stacks */
-#define TCP_RACK_PACING_BETA 1138	/* Changing the beta for pacing */
-#define TCP_RACK_PACING_BETA_ECN 1139	/* Changing the beta for ecn with pacing */
-#define TCP_RACK_TIMER_SLOP 1140	/* Set or get the timer slop used */
-#define TCP_RACK_DSACK_OPT 1141		/* How do we setup rack timer DSACK options bit 1/2 */
-#define TCP_RACK_ENABLE_HYSTART 1142	/* Do we allow hystart in the CC modules */
-#define TCP_RACK_SET_RXT_OPTIONS 1143	/* Set the bits in the retransmit options */
-#define TCP_RACK_HI_BETA 1144 /* Turn on/off high beta */
-#define TCP_RACK_SPLIT_LIMIT 1145	/* Set a split limit for split allocations */
-#define TCP_RACK_PACING_DIVISOR 1146 /* Pacing divisor given to rate-limit code for burst sizing */
-#define TCP_RACK_PACE_MIN_SEG 1147	/* Pacing min seg size rack will use */
-#define TCP_RACK_DGP_IN_REC 1148	/* Do we use full DGP in recovery? */
-#define TCP_POLICER_DETECT 1149 	/* Do we apply a thresholds to rack to detect and compensate for policers? */
-#define TCP_RXT_CLAMP TCP_POLICER_DETECT
-#define TCP_HYBRID_PACING   1150	/* Hybrid pacing enablement */
-#define TCP_PACING_DND	    1151	/* When pacing with rr_config=3 can sacks disturb us */
-#define TCP_SS_EEXIT        1152	/* Do we do early exit from slowtart if no  b/w growth */
-#define TCP_DGP_UPPER_BOUNDS 1153	/* SS and CA upper bound in percentage */
-#define TCP_NO_TIMELY	    1154	/* Disable/enable Timely */
-#define TCP_HONOR_HPTS_MIN  1155	/* Do we honor hpts min to */
-#define TCP_REC_IS_DYN      1156	/* Do we allow timely to change recovery multiplier? */
-#define TCP_SIDECHAN_DIS    1157	/* Disable/enable the side-channel */
-#define TCP_FILLCW_RATE_CAP 1158	/* Set a cap for DGP's fillcw */
-#define TCP_POLICER_MSS     1159	/* Policer MSS requirement */
-#define TCP_STACK_SPEC_INFO 1160	/* Get stack specific information (if present) */
-#define RACK_CSPR_IS_FCC    1161
-#define TCP_GP_USE_LTBW     1162	/* how we use lt_bw 0=not, 1=min, 2=max */
+#define	TCP_REUSPORT_LB_NUMA	1026	/* set listen socket numa domain */
+#define	TCP_RACK_MBUF_QUEUE	1050	/* Do we allow mbuf queuing if supported */
+/* unused			1051	*/
+#define	TCP_RACK_TLP_REDUCE 	1052	/* RACK TLP cwnd reduction (bool) */
+/* unused			1053	*/
+#define	TCP_RACK_PACE_MAX_SEG	1054	/* Max TSO size we will send  */
+#define	TCP_RACK_PACE_ALWAYS	1055	/* Use the always pace method */
+/* unused			1056	*/
+#define	TCP_RACK_PRR_SENDALOT	1057	/* Allow PRR to send more than one seg */
+#define	TCP_RACK_MIN_TO		1058	/* Minimum time between rack t-o's in ms */
+/* unused			1059	*/
+#define	TCP_RACK_EARLY_SEG	1060	/* If early recovery max segments */
+#define	TCP_RACK_REORD_THRESH	1061	/* RACK reorder threshold (shift amount) */
+#define	TCP_RACK_REORD_FADE	1062	/* Does reordering fade after ms time */
+#define	TCP_RACK_TLP_THRESH	1063	/* RACK TLP theshold i.e. srtt+(srtt/N) */
+#define	TCP_RACK_PKT_DELAY	1064	/* RACK added ms i.e. rack-rtt + reord + N */
+/* unused			1065	*/
+/* unused			1066	*/
+#define	TCP_BBR_IWINTSO		1067	/* Initial TSO window for BBRs first sends */
+/* unused			1068	*/
+#define	TCP_BBR_STARTUP_PG	1069	/* Startup pacing gain */
+#define	TCP_BBR_DRAIN_PG	1070	/* Drain pacing gain */
+/* unused			1071	*/
+#define	TCP_BBR_PROBE_RTT_INT	1072	/* How long in useconds between probe-rtt */
+/* unused			1073	*/
+#define	TCP_BBR_STARTUP_LOSS_EXIT 1074	/* Do we exit a loss during startup if not 20% incr */
+/* unused			1075	*/
+#define	TCP_BBR_TSLIMITS	1076	/* Do we use experimental Timestamp limiting for our algo */
+#define	TCP_BBR_PACE_OH		1077	/* pacing overhead setting */
+/* unused			1078	*/
+#define	TCP_BBR_USEDEL_RATE	1079	/* Enable use of delivery rate for loss recovery */
+#define	TCP_BBR_MIN_RTO		1080	/* Min RTO in milliseconds */
+#define	TCP_BBR_MAX_RTO		1081	/* Max RTO in milliseconds */
+/* unused			1082	*/
+#define	TCP_BBR_ALGORITHM	1083	/* What measurement algo does BBR use netflix=0, google=1 */
+/* unused			1084	*/
+/* unused			1085	*/
+#define	TCP_BBR_PACE_PER_SEC	1086
+#define	TCP_BBR_PACE_DEL_TAR	1087
+#define	TCP_BBR_PACE_SEG_MAX	1088
+#define	TCP_BBR_PACE_SEG_MIN	1089
+#define	TCP_BBR_PACE_CROSS	1090
+/* unused			1091	*/
+/* unused			1092	*/
+/* unused			1093	*/
+/* unused			1094	*/
+#define	TCP_RACK_TLP_USE	1095
+#define	TCP_BBR_TMR_PACE_OH	1096	/* ??? */
+#define	TCP_RACK_DO_DETECTION	1097	/* Recycle of extra gain for rack, attack detection */
+#define	TCP_BBR_RACK_RTT_USE	1098	/* what RTT should we use 0, 1, or 2? */
+#define	TCP_BBR_RETRAN_WTSO	1099
+#define	TCP_DATA_AFTER_CLOSE	1100
+#define	TCP_BBR_PROBE_RTT_GAIN	1101
+#define	TCP_BBR_PROBE_RTT_LEN	1102
+#define	TCP_BBR_SEND_IWND_IN_TSO 1103	/* Do we burst out whole iwin size chunks at start? */
+#define	TCP_BBR_USE_RACK_RR	1104	/* Do we use the rack rapid recovery for pacing rxt's */
+#define	TCP_BBR_USE_RACK_CHEAT 	TCP_BBR_USE_RACK_RR /* Compat. */
+#define	TCP_BBR_HDWR_PACE	1105	/* Enable/disable hardware pacing */
+#define	TCP_BBR_UTTER_MAX_TSO	1106	/* Do we enforce an utter max TSO size */
+#define	TCP_BBR_EXTRA_STATE	1107	/* Special exit-persist catch up */
+#define	TCP_BBR_FLOOR_MIN_TSO	1108	/* The min tso size */
+#define	TCP_BBR_MIN_TOPACEOUT	1109	/* Do we suspend pacing until */
+#define	TCP_BBR_TSTMP_RAISES	1110	/* Can a timestamp measurement raise the b/w */
+#define	TCP_BBR_POLICER_DETECT	1111	/* Turn on/off google mode policer detection */
+#define	TCP_BBR_RACK_INIT_RATE	1112	/* Set an initial pacing rate for when we have no b/w in kbits per sec */
+#define	TCP_RACK_RR_CONF	1113	/* Rack rapid recovery configuration control*/
+#define	TCP_RACK_GP_INCREASE_CA	1114	/* GP increase for Congestion Avoidance */
+#define	TCP_RACK_GP_INCREASE_SS	1115	/* GP increase for Slow Start */
+#define	TCP_RACK_GP_INCREASE_REC 1116	/* GP increase for Recovery */
+#define	TCP_RACK_FORCE_MSEG	1117	/* Override to use the user set max-seg value */
+#define	TCP_RACK_PACE_RATE_CA	1118	/* Pacing rate for Congestion Avoidance */
+#define	TCP_RACK_PACE_RATE_SS	1119	/* Pacing rate for Slow Start */
+#define	TCP_RACK_PACE_RATE_REC	1120	/* Pacing rate for Recovery */
+#define	TCP_NO_PRR		1122	/* If pacing, don't use prr  */
+#define	TCP_RACK_NONRXT_CFG_RATE 1123	/* In recovery does a non-rxt use the cfg rate */
+#define	TCP_SHARED_CWND_ENABLE	1124	/* Use a shared cwnd if allowed */
+#define	TCP_TIMELY_DYN_ADJ	1125	/* Do we attempt dynamic multipler adjustment with timely. */
+#define	TCP_RACK_NO_PUSH_AT_MAX	1126	/* For timely do not push if we are over max rtt */
+#define	TCP_RACK_PACE_TO_FILL	1127	/* If we are not in recovery, always pace to fill the cwnd in 1 RTT */
+#define	TCP_SHARED_CWND_TIME_LIMIT 1128	/* we should limit to low time values the scwnd life */
+#define	TCP_RACK_PROFILE	1129	/* Select a profile that sets multiple options */
+#define	TCP_HDWR_RATE_CAP	1130	/* Allow hardware rates to cap pacing rate */
+#define	TCP_PACING_RATE_CAP	1131	/* Highest rate allowed in pacing in bytes per second (uint64_t) */
+#define	TCP_HDWR_UP_ONLY	1132	/* Allow the pacing rate to climb but not descend (with the exception of fill-cw */
+#define	TCP_RACK_ABC_VAL	1133	/* Set a local ABC value different then the system default */
+#define	TCP_REC_ABC_VAL		1134	/* Do we use the ABC value for recovery or the override one from sysctl  */
+#define	TCP_RACK_MEASURE_CNT	1135	/* How many measurements are required in GP pacing */
+#define	TCP_DEFER_OPTIONS	1136	/* Defer options until the proper number of measurements occur, does not defer TCP_RACK_MEASURE_CNT */
+/* unused			1137	*/
+#define	TCP_RACK_PACING_BETA	1138	/* Changing the beta for pacing */
+#define	TCP_RACK_PACING_BETA_ECN 1139	/* Changing the beta for ecn with pacing */
+#define	TCP_RACK_TIMER_SLOP	1140	/* Set or get the timer slop used */
+#define	TCP_RACK_DSACK_OPT	1141	/* How do we setup rack timer DSACK options bit 1/2 */
+#define	TCP_RACK_ENABLE_HYSTART	1142	/* Do we allow hystart in the CC modules */
+#define	TCP_RACK_SET_RXT_OPTIONS 1143	/* Set the bits in the retransmit options */
+#define	TCP_RACK_HI_BETA	1144	/* Turn on/off high beta */
+#define	TCP_RACK_SPLIT_LIMIT	1145	/* Set a split limit for split allocations */
+#define	TCP_RACK_PACING_DIVISOR	1146	/* Pacing divisor given to rate-limit code for burst sizing */
+#define	TCP_RACK_PACE_MIN_SEG	1147	/* Pacing min seg size rack will use */
+#define	TCP_RACK_DGP_IN_REC	1148	/* Do we use full DGP in recovery? */
+/* unused			1149	*/
+#define	TCP_HYBRID_PACING	1150	/* Hybrid pacing enablement */
+#define	TCP_PACING_DND		1151	/* When pacing with rr_config=3 can sacks disturb us */
+#define	TCP_SS_EEXIT		1152	/* Do we do early exit from slowtart if no  b/w growth */
+#define	TCP_DGP_UPPER_BOUNDS	1153	/* SS and CA upper bound in percentage */
+#define	TCP_NO_TIMELY		1154	/* Disable/enable Timely */
+#define	TCP_HONOR_HPTS_MIN	1155	/* Do we honor hpts min to */
+#define	TCP_REC_IS_DYN		1156	/* Do we allow timely to change recovery multiplier? */
+#define	TCP_SIDECHAN_DIS	1157	/* Disable/enable the side-channel */
+#define	TCP_FILLCW_RATE_CAP	1158	/* Set a cap for DGP's fillcw */
+/* unused			1159	*/
+#define	TCP_STACK_SPEC_INFO	1160	/* Get stack specific information (if present) */
+#define	RACK_CSPR_IS_FCC	1161
+#define	TCP_GP_USE_LTBW		1162	/* how we use lt_bw 0=not, 1=min, 2=max */
 
 
 /* Start of reserved space for third-party user-settable options. */
diff --git a/sys/netinet/tcp_hostcache.c b/sys/netinet/tcp_hostcache.c
index ed90a9ba7196..dbc966acc56b 100644
--- a/sys/netinet/tcp_hostcache.c
+++ b/sys/netinet/tcp_hostcache.c
@@ -80,7 +80,6 @@
 #include <sys/sbuf.h>
 #include <sys/smr.h>
 #include <sys/socket.h>
-#include <sys/socketvar.h>
 #include <sys/sysctl.h>
 
 #include <net/vnet.h>
@@ -100,23 +99,23 @@ struct hc_head {
 
 struct hc_metrics {
 	/* housekeeping */
-	CK_SLIST_ENTRY(hc_metrics) rmx_q;
+	CK_SLIST_ENTRY(hc_metrics) hc_q;
 	struct		in_addr ip4;	/* IP address */
 	struct		in6_addr ip6;	/* IP6 address */
 	uint32_t	ip6_zoneid;	/* IPv6 scope zone id */
 	/* endpoint specific values for tcp */
-	uint32_t	rmx_mtu;	/* MTU for this path */
-	uint32_t	rmx_ssthresh;	/* outbound gateway buffer limit */
-	uint32_t	rmx_rtt;	/* estimated round trip time */
-	uint32_t	rmx_rttvar;	/* estimated rtt variance */
-	uint32_t	rmx_cwnd;	/* congestion window */
-	uint32_t	rmx_sendpipe;	/* outbound delay-bandwidth product */
-	uint32_t	rmx_recvpipe;	/* inbound delay-bandwidth product */
+	uint32_t	hc_mtu;		/* MTU for this path */
+	uint32_t	hc_ssthresh;	/* outbound gateway buffer limit */
+	uint32_t	hc_rtt;		/* estimated round trip time */
+	uint32_t	hc_rttvar;	/* estimated rtt variance */
+	uint32_t	hc_cwnd;	/* congestion window */
+	uint32_t	hc_sendpipe;	/* outbound delay-bandwidth product */
+	uint32_t	hc_recvpipe;	/* inbound delay-bandwidth product */
 	/* TCP hostcache internal data */
-	int		rmx_expire;	/* lifetime for object */
+	int		hc_expire;	/* lifetime for object */
 #ifdef	TCP_HC_COUNTERS
-	u_long		rmx_hits;	/* number of hits */
-	u_long		rmx_updates;	/* number of updates */
+	u_long		hc_hits;	/* number of hits */
+	u_long		hc_updates;	/* number of updates */
 #endif
 };
 
@@ -147,7 +146,7 @@ VNET_DEFINE_STATIC(struct tcp_hostcache, tcp_hostcache);
 VNET_DEFINE_STATIC(struct callout, tcp_hc_callout);
 #define	V_tcp_hc_callout	VNET(tcp_hc_callout)
 
-static struct hc_metrics *tcp_hc_lookup(struct in_conninfo *);
+static struct hc_metrics *tcp_hc_lookup(const struct in_conninfo *);
 static int sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS);
 static int sysctl_tcp_hc_histo(SYSCTL_HANDLER_ARGS);
 static int sysctl_tcp_hc_purgenow(SYSCTL_HANDLER_ARGS);
@@ -313,7 +312,7 @@ tcp_hc_destroy(void)
  * Internal function: compare cache entry to a connection.
  */
 static bool
-tcp_hc_cmp(struct hc_metrics *hc_entry, struct in_conninfo *inc)
+tcp_hc_cmp(struct hc_metrics *hc_entry, const struct in_conninfo *inc)
 {
 
 	if (inc->inc_flags & INC_ISIPV6) {
@@ -335,7 +334,7 @@ tcp_hc_cmp(struct hc_metrics *hc_entry, struct in_conninfo *inc)
  * On success returns in SMR section.
  */
 static struct hc_metrics *
-tcp_hc_lookup(struct in_conninfo *inc)
+tcp_hc_lookup(const struct in_conninfo *inc)
 {
 	struct hc_head *hc_head;
 	struct hc_metrics *hc_entry;
@@ -348,17 +347,17 @@ tcp_hc_lookup(struct in_conninfo *inc)
 	 * Iterate through entries in bucket row looking for a match.
 	 */
 	smr_enter(V_tcp_hostcache.smr);
-	CK_SLIST_FOREACH(hc_entry, &hc_head->hch_bucket, rmx_q)
+	CK_SLIST_FOREACH(hc_entry, &hc_head->hch_bucket, hc_q)
 		if (tcp_hc_cmp(hc_entry, inc))
 			break;
 
 	if (hc_entry != NULL) {
-		if (atomic_load_int(&hc_entry->rmx_expire) !=
+		if (atomic_load_int(&hc_entry->hc_expire) !=
 		    V_tcp_hostcache.expire)
-			atomic_store_int(&hc_entry->rmx_expire,
+			atomic_store_int(&hc_entry->hc_expire,
 			    V_tcp_hostcache.expire);
 #ifdef	TCP_HC_COUNTERS
-		hc_entry->rmx_hits++;
+		hc_entry->hc_hits++;
 #endif
 	} else
 		smr_exit(V_tcp_hostcache.smr);
@@ -372,7 +371,8 @@ tcp_hc_lookup(struct in_conninfo *inc)
  * a value is not set.
  */
 void
-tcp_hc_get(struct in_conninfo *inc, struct hc_metrics_lite *hc_metrics_lite)
+tcp_hc_get(const struct in_conninfo *inc,
+    struct hc_metrics_lite *hc_metrics_lite)
 {
 	struct hc_metrics *hc_entry;
 
@@ -394,13 +394,13 @@ tcp_hc_get(struct in_conninfo *inc, struct hc_metrics_lite *hc_metrics_lite)
 		return;
 	}
 
-	hc_metrics_lite->rmx_mtu = atomic_load_32(&hc_entry->rmx_mtu);
-	hc_metrics_lite->rmx_ssthresh = atomic_load_32(&hc_entry->rmx_ssthresh);
-	hc_metrics_lite->rmx_rtt = atomic_load_32(&hc_entry->rmx_rtt);
-	hc_metrics_lite->rmx_rttvar = atomic_load_32(&hc_entry->rmx_rttvar);
-	hc_metrics_lite->rmx_cwnd = atomic_load_32(&hc_entry->rmx_cwnd);
-	hc_metrics_lite->rmx_sendpipe = atomic_load_32(&hc_entry->rmx_sendpipe);
-	hc_metrics_lite->rmx_recvpipe = atomic_load_32(&hc_entry->rmx_recvpipe);
+	hc_metrics_lite->hc_mtu = atomic_load_32(&hc_entry->hc_mtu);
+	hc_metrics_lite->hc_ssthresh = atomic_load_32(&hc_entry->hc_ssthresh);
+	hc_metrics_lite->hc_rtt = atomic_load_32(&hc_entry->hc_rtt);
+	hc_metrics_lite->hc_rttvar = atomic_load_32(&hc_entry->hc_rttvar);
+	hc_metrics_lite->hc_cwnd = atomic_load_32(&hc_entry->hc_cwnd);
+	hc_metrics_lite->hc_sendpipe = atomic_load_32(&hc_entry->hc_sendpipe);
+	hc_metrics_lite->hc_recvpipe = atomic_load_32(&hc_entry->hc_recvpipe);
 
 	smr_exit(V_tcp_hostcache.smr);
 }
@@ -411,7 +411,7 @@ tcp_hc_get(struct in_conninfo *inc, struct hc_metrics_lite *hc_metrics_lite)
  * set.
  */
 uint32_t
-tcp_hc_getmtu(struct in_conninfo *inc)
+tcp_hc_getmtu(const struct in_conninfo *inc)
 {
 	struct hc_metrics *hc_entry;
 	uint32_t mtu;
@@ -424,7 +424,7 @@ tcp_hc_getmtu(struct in_conninfo *inc)
 		return (0);
 	}
 
-	mtu = atomic_load_32(&hc_entry->rmx_mtu);
+	mtu = atomic_load_32(&hc_entry->hc_mtu);
 	smr_exit(V_tcp_hostcache.smr);
 
 	return (mtu);
@@ -435,9 +435,9 @@ tcp_hc_getmtu(struct in_conninfo *inc)
  * Creates a new entry if none was found.
  */
 void
-tcp_hc_updatemtu(struct in_conninfo *inc, uint32_t mtu)
+tcp_hc_updatemtu(const struct in_conninfo *inc, uint32_t mtu)
 {
-	struct hc_metrics_lite hcml = { .rmx_mtu = mtu };
+	struct hc_metrics_lite hcml = { .hc_mtu = mtu };
 
 	return (tcp_hc_update(inc, &hcml));
 }
@@ -447,7 +447,7 @@ tcp_hc_updatemtu(struct in_conninfo *inc, uint32_t mtu)
  * Creates a new entry if none was found.
  */
 void
-tcp_hc_update(struct in_conninfo *inc, struct hc_metrics_lite *hcml)
+tcp_hc_update(const struct in_conninfo *inc, struct hc_metrics_lite *hcml)
 {
 	struct hc_head *hc_head;
 	struct hc_metrics *hc_entry, *hc_prev;
@@ -461,20 +461,20 @@ tcp_hc_update(struct in_conninfo *inc, struct hc_metrics_lite *hcml)
 	hc_prev = NULL;
 
 	THC_LOCK(hc_head);
-	CK_SLIST_FOREACH(hc_entry, &hc_head->hch_bucket, rmx_q) {
+	CK_SLIST_FOREACH(hc_entry, &hc_head->hch_bucket, hc_q) {
 		if (tcp_hc_cmp(hc_entry, inc))
 			break;
-		if (CK_SLIST_NEXT(hc_entry, rmx_q) != NULL)
+		if (CK_SLIST_NEXT(hc_entry, hc_q) != NULL)
 			hc_prev = hc_entry;
 	}
 
 	if (hc_entry != NULL) {
-		if (atomic_load_int(&hc_entry->rmx_expire) !=
+		if (atomic_load_int(&hc_entry->hc_expire) !=
 		    V_tcp_hostcache.expire)
-			atomic_store_int(&hc_entry->rmx_expire,
+			atomic_store_int(&hc_entry->hc_expire,
 			    V_tcp_hostcache.expire);
 #ifdef	TCP_HC_COUNTERS
-		hc_entry->rmx_updates++;
+		hc_entry->hc_updates++;
 #endif
 		new = false;
 	} else {
@@ -492,18 +492,18 @@ tcp_hc_update(struct in_conninfo *inc, struct hc_metrics_lite *hcml)
 		    atomic_load_int(&V_tcp_hostcache.cache_count) >=
 		    V_tcp_hostcache.cache_limit) {
 			if (hc_prev != NULL) {
-				hc_entry = CK_SLIST_NEXT(hc_prev, rmx_q);
-				KASSERT(CK_SLIST_NEXT(hc_entry, rmx_q) == NULL,
+				hc_entry = CK_SLIST_NEXT(hc_prev, hc_q);
+				KASSERT(CK_SLIST_NEXT(hc_entry, hc_q) == NULL,
 				    ("%s: %p is not one to last",
 				    __func__, hc_prev));
-				CK_SLIST_REMOVE_AFTER(hc_prev, rmx_q);
+				CK_SLIST_REMOVE_AFTER(hc_prev, hc_q);
 			} else if ((hc_entry =
 			    CK_SLIST_FIRST(&hc_head->hch_bucket)) != NULL) {
-				KASSERT(CK_SLIST_NEXT(hc_entry, rmx_q) == NULL,
+				KASSERT(CK_SLIST_NEXT(hc_entry, hc_q) == NULL,
 				    ("%s: %p is not the only element",
 				    __func__, hc_entry));
 				CK_SLIST_REMOVE_HEAD(&hc_head->hch_bucket,
-				    rmx_q);
+				    hc_q);
 			} else {
 				THC_UNLOCK(hc_head);
 				return;
@@ -536,7 +536,7 @@ tcp_hc_update(struct in_conninfo *inc, struct hc_metrics_lite *hcml)
 			hc_entry->ip6_zoneid = inc->inc6_zoneid;
 		} else
 			hc_entry->ip4 = inc->inc_faddr;
-		hc_entry->rmx_expire = V_tcp_hostcache.expire;
+		hc_entry->hc_expire = V_tcp_hostcache.expire;
 		new = true;
 	}
 
@@ -544,60 +544,60 @@ tcp_hc_update(struct in_conninfo *inc, struct hc_metrics_lite *hcml)
 	 * Fill in data.  Use atomics, since an existing entry is
 	 * accessible by readers in SMR section.
 	 */
-	if (hcml->rmx_mtu != 0) {
-		atomic_store_32(&hc_entry->rmx_mtu, hcml->rmx_mtu);
+	if (hcml->hc_mtu != 0) {
+		atomic_store_32(&hc_entry->hc_mtu, hcml->hc_mtu);
 	}
-	if (hcml->rmx_rtt != 0) {
-		if (hc_entry->rmx_rtt == 0)
-			v = hcml->rmx_rtt;
+	if (hcml->hc_rtt != 0) {
+		if (hc_entry->hc_rtt == 0)
+			v = hcml->hc_rtt;
 		else
-			v = ((uint64_t)hc_entry->rmx_rtt +
-			    (uint64_t)hcml->rmx_rtt) / 2;
-		atomic_store_32(&hc_entry->rmx_rtt, v);
+			v = ((uint64_t)hc_entry->hc_rtt +
+			    (uint64_t)hcml->hc_rtt) / 2;
+		atomic_store_32(&hc_entry->hc_rtt, v);
 		TCPSTAT_INC(tcps_cachedrtt);
 	}
-	if (hcml->rmx_rttvar != 0) {
-	        if (hc_entry->rmx_rttvar == 0)
-			v = hcml->rmx_rttvar;
+	if (hcml->hc_rttvar != 0) {
+	        if (hc_entry->hc_rttvar == 0)
+			v = hcml->hc_rttvar;
 		else
-			v = ((uint64_t)hc_entry->rmx_rttvar +
-			    (uint64_t)hcml->rmx_rttvar) / 2;
-		atomic_store_32(&hc_entry->rmx_rttvar, v);
+			v = ((uint64_t)hc_entry->hc_rttvar +
+			    (uint64_t)hcml->hc_rttvar) / 2;
+		atomic_store_32(&hc_entry->hc_rttvar, v);
 		TCPSTAT_INC(tcps_cachedrttvar);
 	}
-	if (hcml->rmx_ssthresh != 0) {
-		if (hc_entry->rmx_ssthresh == 0)
-			v = hcml->rmx_ssthresh;
+	if (hcml->hc_ssthresh != 0) {
+		if (hc_entry->hc_ssthresh == 0)
+			v = hcml->hc_ssthresh;
 		else
-			v = (hc_entry->rmx_ssthresh + hcml->rmx_ssthresh) / 2;
-		atomic_store_32(&hc_entry->rmx_ssthresh, v);
+			v = (hc_entry->hc_ssthresh + hcml->hc_ssthresh) / 2;
+		atomic_store_32(&hc_entry->hc_ssthresh, v);
 		TCPSTAT_INC(tcps_cachedssthresh);
 	}
-	if (hcml->rmx_cwnd != 0) {
-		if (hc_entry->rmx_cwnd == 0)
-			v = hcml->rmx_cwnd;
+	if (hcml->hc_cwnd != 0) {
+		if (hc_entry->hc_cwnd == 0)
+			v = hcml->hc_cwnd;
 		else
-			v = ((uint64_t)hc_entry->rmx_cwnd +
-			    (uint64_t)hcml->rmx_cwnd) / 2;
-		atomic_store_32(&hc_entry->rmx_cwnd, v);
+			v = ((uint64_t)hc_entry->hc_cwnd +
+			    (uint64_t)hcml->hc_cwnd) / 2;
+		atomic_store_32(&hc_entry->hc_cwnd, v);
 		/* TCPSTAT_INC(tcps_cachedcwnd); */
 	}
-	if (hcml->rmx_sendpipe != 0) {
-		if (hc_entry->rmx_sendpipe == 0)
-			v = hcml->rmx_sendpipe;
+	if (hcml->hc_sendpipe != 0) {
+		if (hc_entry->hc_sendpipe == 0)
+			v = hcml->hc_sendpipe;
 		else
-			v = ((uint64_t)hc_entry->rmx_sendpipe +
-			    (uint64_t)hcml->rmx_sendpipe) /2;
-		atomic_store_32(&hc_entry->rmx_sendpipe, v);
+			v = ((uint64_t)hc_entry->hc_sendpipe +
+			    (uint64_t)hcml->hc_sendpipe) /2;
+		atomic_store_32(&hc_entry->hc_sendpipe, v);
 		/* TCPSTAT_INC(tcps_cachedsendpipe); */
 	}
-	if (hcml->rmx_recvpipe != 0) {
-		if (hc_entry->rmx_recvpipe == 0)
-			v = hcml->rmx_recvpipe;
+	if (hcml->hc_recvpipe != 0) {
+		if (hc_entry->hc_recvpipe == 0)
+			v = hcml->hc_recvpipe;
 		else
-			v = ((uint64_t)hc_entry->rmx_recvpipe +
-			    (uint64_t)hcml->rmx_recvpipe) /2;
-		atomic_store_32(&hc_entry->rmx_recvpipe, v);
+			v = ((uint64_t)hc_entry->hc_recvpipe +
+			    (uint64_t)hcml->hc_recvpipe) /2;
+		atomic_store_32(&hc_entry->hc_recvpipe, v);
 		/* TCPSTAT_INC(tcps_cachedrecvpipe); */
 	}
 
@@ -605,17 +605,17 @@ tcp_hc_update(struct in_conninfo *inc, struct hc_metrics_lite *hcml)
 	 * Put it upfront.
 	 */
 	if (new) {
-		CK_SLIST_INSERT_HEAD(&hc_head->hch_bucket, hc_entry, rmx_q);
+		CK_SLIST_INSERT_HEAD(&hc_head->hch_bucket, hc_entry, hc_q);
 		hc_head->hch_length++;
 		KASSERT(hc_head->hch_length <= V_tcp_hostcache.bucket_limit,
 		    ("tcp_hostcache: bucket length too high at %p", hc_head));
 		atomic_add_int(&V_tcp_hostcache.cache_count, 1);
 		TCPSTAT_INC(tcps_hc_added);
 	} else if (hc_entry != CK_SLIST_FIRST(&hc_head->hch_bucket)) {
-		KASSERT(CK_SLIST_NEXT(hc_prev, rmx_q) == hc_entry,
+		KASSERT(CK_SLIST_NEXT(hc_prev, hc_q) == hc_entry,
 		    ("%s: %p next is not %p", __func__, hc_prev, hc_entry));
-		CK_SLIST_REMOVE_AFTER(hc_prev, rmx_q);
-		CK_SLIST_INSERT_HEAD(&hc_head->hch_bucket, hc_entry, rmx_q);
+		CK_SLIST_REMOVE_AFTER(hc_prev, hc_q);
+		CK_SLIST_INSERT_HEAD(&hc_head->hch_bucket, hc_entry, hc_q);
 	}
 	THC_UNLOCK(hc_head);
 }
@@ -668,7 +668,7 @@ sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS)
 	for (i = 0; i < V_tcp_hostcache.hashsize; i++) {
 		THC_LOCK(&V_tcp_hostcache.hashbase[i]);
 		CK_SLIST_FOREACH(hc_entry,
-		    &V_tcp_hostcache.hashbase[i].hch_bucket, rmx_q) {
+		    &V_tcp_hostcache.hashbase[i].hch_bucket, hc_q) {
 			sbuf_printf(&sb,
 			    "%-15s %5u %8u %6lums %6lums %8u %8u %8u "
 #ifdef	TCP_HC_COUNTERS
@@ -682,20 +682,20 @@ sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS)
 #else
 				"IPv6?",
 #endif
-			    hc_entry->rmx_mtu,
-			    hc_entry->rmx_ssthresh,
-			    msec((u_long)hc_entry->rmx_rtt *
+			    hc_entry->hc_mtu,
+			    hc_entry->hc_ssthresh,
+			    msec((u_long)hc_entry->hc_rtt *
 				(RTM_RTTUNIT / (hz * TCP_RTT_SCALE))),
-			    msec((u_long)hc_entry->rmx_rttvar *
+			    msec((u_long)hc_entry->hc_rttvar *
 				(RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE))),
-			    hc_entry->rmx_cwnd,
-			    hc_entry->rmx_sendpipe,
-			    hc_entry->rmx_recvpipe,
+			    hc_entry->hc_cwnd,
+			    hc_entry->hc_sendpipe,
+			    hc_entry->hc_recvpipe,
 #ifdef	TCP_HC_COUNTERS
-			    hc_entry->rmx_hits,
-			    hc_entry->rmx_updates,
+			    hc_entry->hc_hits,
+			    hc_entry->hc_updates,
 #endif
-			    hc_entry->rmx_expire);
+			    hc_entry->hc_expire);
 		}
 		THC_UNLOCK(&V_tcp_hostcache.hashbase[i]);
 		sbuf_drain(&sb);
@@ -762,33 +762,33 @@ tcp_hc_purge_internal(int all)
 		head = &V_tcp_hostcache.hashbase[i];
 		hc_prev = NULL;
 		THC_LOCK(head);
-		CK_SLIST_FOREACH_SAFE(hc_entry, &head->hch_bucket, rmx_q,
+		CK_SLIST_FOREACH_SAFE(hc_entry, &head->hch_bucket, hc_q,
 		    hc_next) {
 			KASSERT(head->hch_length > 0 && head->hch_length <=
 			    V_tcp_hostcache.bucket_limit, ("tcp_hostcache: "
 			    "bucket length out of range at %u: %u", i,
 			    head->hch_length));
 			if (all ||
-			    atomic_load_int(&hc_entry->rmx_expire) <= 0) {
+			    atomic_load_int(&hc_entry->hc_expire) <= 0) {
 				if (hc_prev != NULL) {
 					KASSERT(hc_entry ==
-					    CK_SLIST_NEXT(hc_prev, rmx_q),
+					    CK_SLIST_NEXT(hc_prev, hc_q),
 					    ("%s: %p is not next to %p",
 					    __func__, hc_entry, hc_prev));
-					CK_SLIST_REMOVE_AFTER(hc_prev, rmx_q);
+					CK_SLIST_REMOVE_AFTER(hc_prev, hc_q);
 				} else {
 					KASSERT(hc_entry ==
 					    CK_SLIST_FIRST(&head->hch_bucket),
 					    ("%s: %p is not first",
 					    __func__, hc_entry));
 					CK_SLIST_REMOVE_HEAD(&head->hch_bucket,
-					    rmx_q);
+					    hc_q);
 				}
 				uma_zfree_smr(V_tcp_hostcache.zone, hc_entry);
 				head->hch_length--;
 				atomic_subtract_int(&V_tcp_hostcache.cache_count, 1);
 			} else {
-				atomic_subtract_int(&hc_entry->rmx_expire,
+				atomic_subtract_int(&hc_entry->hc_expire,
 				    V_tcp_hostcache.prune);
 				hc_prev = hc_entry;
 			}
diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c
index 85341cab0750..b60cdf45af52 100644
--- a/sys/netinet/tcp_hpts.c
+++ b/sys/netinet/tcp_hpts.c
@@ -173,6 +173,7 @@
 /* Each hpts has its own p_mtx which is used for locking */
 #define	HPTS_MTX_ASSERT(hpts)	mtx_assert(&(hpts)->p_mtx, MA_OWNED)
 #define	HPTS_LOCK(hpts)		mtx_lock(&(hpts)->p_mtx)
+#define	HPTS_TRYLOCK(hpts)	mtx_trylock(&(hpts)->p_mtx)
 #define	HPTS_UNLOCK(hpts)	mtx_unlock(&(hpts)->p_mtx)
 struct tcp_hpts_entry {
 	/* Cache line 0x00 */
@@ -239,7 +240,7 @@ static int tcp_bind_threads = 2;
 static int tcp_use_irq_cpu = 0;
 static int hpts_does_tp_logging = 0;
 
-static int32_t tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout);
+static int32_t tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout);
 static void tcp_hpts_thread(void *ctx);
 
 int32_t tcp_min_hptsi_time = DEFAULT_MIN_SLEEP;
@@ -430,40 +431,42 @@ hpts_random_cpu(void)
 
 static void
 tcp_hpts_log(struct tcp_hpts_entry *hpts, struct tcpcb *tp, struct timeval *tv,
-	     int slots_to_run, int idx, int from_callout)
+    int slots_to_run, int idx, bool from_callout)
 {
-	union tcp_log_stackspecific log;
-	/*
-	 * Unused logs are
-	 * 64 bit - delRate, rttProp, bw_inuse
-	 * 16 bit - cwnd_gain
-	 *  8 bit - bbr_state, bbr_substate, inhpts;
-	 */
-	memset(&log.u_bbr, 0, sizeof(log.u_bbr));
-	log.u_bbr.flex1 = hpts->p_nxt_slot;
-	log.u_bbr.flex2 = hpts->p_cur_slot;
-	log.u_bbr.flex3 = hpts->p_prev_slot;
-	log.u_bbr.flex4 = idx;
-	log.u_bbr.flex5 = hpts->p_curtick;
-	log.u_bbr.flex6 = hpts->p_on_queue_cnt;
-	log.u_bbr.flex7 = hpts->p_cpu;
-	log.u_bbr.flex8 = (uint8_t)from_callout;
-	log.u_bbr.inflight = slots_to_run;
-	log.u_bbr.applimited = hpts->overidden_sleep;
-	log.u_bbr.delivered = hpts->saved_curtick;
-	log.u_bbr.timeStamp = tcp_tv_to_usectick(tv);
-	log.u_bbr.epoch = hpts->saved_curslot;
-	log.u_bbr.lt_epoch = hpts->saved_prev_slot;
-	log.u_bbr.pkts_out = hpts->p_delayed_by;
-	log.u_bbr.lost = hpts->p_hpts_sleep_time;
-	log.u_bbr.pacing_gain = hpts->p_cpu;
-	log.u_bbr.pkt_epoch = hpts->p_runningslot;
-	log.u_bbr.use_lt_bw = 1;
-	TCP_LOG_EVENTP(tp, NULL,
-		       &tptosocket(tp)->so_rcv,
-		       &tptosocket(tp)->so_snd,
-		       BBR_LOG_HPTSDIAG, 0,
-		       0, &log, false, tv);
+	if (hpts_does_tp_logging && tcp_bblogging_on(tp)) {
+		union tcp_log_stackspecific log;
+		/*
+		 * Unused logs are
+		 * 64 bit - delRate, rttProp, bw_inuse
+		 * 16 bit - cwnd_gain
+		 *  8 bit - bbr_state, bbr_substate, inhpts;
+		 */
+		memset(&log, 0, sizeof(log));
+		log.u_bbr.flex1 = hpts->p_nxt_slot;
+		log.u_bbr.flex2 = hpts->p_cur_slot;
+		log.u_bbr.flex3 = hpts->p_prev_slot;
+		log.u_bbr.flex4 = idx;
+		log.u_bbr.flex5 = hpts->p_curtick;
+		log.u_bbr.flex6 = hpts->p_on_queue_cnt;
+		log.u_bbr.flex7 = hpts->p_cpu;
+		log.u_bbr.flex8 = (uint8_t)from_callout;
+		log.u_bbr.inflight = slots_to_run;
+		log.u_bbr.applimited = hpts->overidden_sleep;
+		log.u_bbr.delivered = hpts->saved_curtick;
+		log.u_bbr.timeStamp = tcp_tv_to_usectick(tv);
+		log.u_bbr.epoch = hpts->saved_curslot;
+		log.u_bbr.lt_epoch = hpts->saved_prev_slot;
+		log.u_bbr.pkts_out = hpts->p_delayed_by;
+		log.u_bbr.lost = hpts->p_hpts_sleep_time;
+		log.u_bbr.pacing_gain = hpts->p_cpu;
+		log.u_bbr.pkt_epoch = hpts->p_runningslot;
+		log.u_bbr.use_lt_bw = 1;
+		TCP_LOG_EVENTP(tp, NULL,
+			&tptosocket(tp)->so_rcv,
+			&tptosocket(tp)->so_snd,
+			BBR_LOG_HPTSDIAG, 0,
+			0, &log, false, tv);
+	}
 }
 
 static void
@@ -1075,7 +1078,7 @@ tcp_hpts_set_max_sleep(struct tcp_hpts_entry *hpts, int wrap_loop_cnt)
 }
 
 static int32_t
-tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout)
+tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout)
 {
 	struct tcpcb *tp;
 	struct timeval tv;
@@ -1086,7 +1089,10 @@ tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout)
 	int32_t wrap_loop_cnt = 0;
 	int32_t slot_pos_of_endpoint = 0;
 	int32_t orig_exit_slot;
-	int8_t completed_measure = 0, seen_endpoint = 0;
+	bool completed_measure, seen_endpoint;
+
+	completed_measure = false;
+	seen_endpoint = false;
 
 	HPTS_MTX_ASSERT(hpts);
 	NET_EPOCH_ASSERT();
@@ -1251,11 +1257,11 @@ again:
 			}
 
 			/* For debugging */
-			if (seen_endpoint == 0) {
-				seen_endpoint = 1;
+			if (!seen_endpoint) {
+				seen_endpoint = true;
 				orig_exit_slot = slot_pos_of_endpoint =
 				    runningslot;
-			} else if (completed_measure == 0) {
+			} else if (!completed_measure) {
 				/* Record the new position */
 				orig_exit_slot = runningslot;
 			}
@@ -1349,9 +1355,7 @@ again:
 			}
 			CURVNET_SET(inp->inp_vnet);
 			/* Lets do any logging that we might want to */
-			if (hpts_does_tp_logging && tcp_bblogging_on(tp)) {
-				tcp_hpts_log(hpts, tp, &tv, slots_to_run, i, from_callout);
-			}
+			tcp_hpts_log(hpts, tp, &tv, slots_to_run, i, from_callout);
 
 			if (tp->t_fb_ptr != NULL) {
 				kern_prefetch(tp->t_fb_ptr, &did_prefetch);
@@ -1369,24 +1373,20 @@ again:
 			 * cause a call to output if it is needed so we do
 			 * not need a second call to tcp_output(). So we do
 			 * one or the other but not both.
+			 *
+			 * XXXGL: some KPI abuse here.  tfb_do_queued_segments
+			 * returns unlocked with positive error (always 1) and
+			 * tcp_output returns unlocked with negative error.
 			 */
 			tp->t_flags2 |= TF2_HPTS_CALLS;
 			if ((tp->t_flags2 & TF2_SUPPORTS_MBUFQ) &&
-			    !STAILQ_EMPTY(&tp->t_inqueue)) {
-				error = (*tp->t_fb->tfb_do_queued_segments)(tp, 0);
-				/*
-				 * A non-zero return for input queue processing
-				 * is the lock is released and most likely the
-				 * inp is gone.
-				 */
-				if (error)
-					goto skip_pacing;
-			} else
+			    !STAILQ_EMPTY(&tp->t_inqueue))
+				error = -(*tp->t_fb->tfb_do_queued_segments)(tp,
+				    0);
+			else
 				error = tcp_output(tp);
-			if (error < 0)
-				goto skip_pacing;
-			INP_WUNLOCK(inp);
-		skip_pacing:
+			if (__predict_true(error >= 0))
+				INP_WUNLOCK(inp);
 			CURVNET_RESTORE();
 		}
 		if (seen_endpoint) {
@@ -1397,7 +1397,7 @@ again:
 			 * is where we calculated the end of our cycle to
 			 * be when we first entered.
 			 */
-			completed_measure = 1;
+			completed_measure = true;
 		}
 		HPTS_LOCK(hpts);
 		hpts->p_runningslot++;
@@ -1414,7 +1414,7 @@ no_one:
 	 */
 	hpts->p_prev_slot = hpts->p_cur_slot;
 	hpts->p_lasttick = hpts->p_curtick;
-	if ((from_callout == 0) || (loop_cnt > max_pacer_loops)) {
+	if (!from_callout || (loop_cnt > max_pacer_loops)) {
 		/*
 		 * Something is serious slow we have
 		 * looped through processing the wheel
@@ -1435,7 +1435,7 @@ no_one:
 	}
 	hpts->p_curtick = tcp_gethptstick(&tv);
 	hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
-	if (seen_endpoint == 0) {
+	if (!seen_endpoint) {
 		/* We saw no endpoint but we may be looping */
 		orig_exit_slot = hpts->p_cur_slot;
 	}
@@ -1462,11 +1462,11 @@ no_run:
 	 * multiple times so the slots may not align either.
 	 */
 	KASSERT(((hpts->p_prev_slot == hpts->p_cur_slot) ||
-		 (wrap_loop_cnt >= 2) || (from_callout == 0)),
+		 (wrap_loop_cnt >= 2) || !from_callout),
 		("H:%p p_prev_slot:%u not equal to p_cur_slot:%u", hpts,
 		 hpts->p_prev_slot, hpts->p_cur_slot));
 	KASSERT(((hpts->p_lasttick == hpts->p_curtick)
-		 || (wrap_loop_cnt >= 2) || (from_callout == 0)),
+		 || (wrap_loop_cnt >= 2) || !from_callout),
 		("H:%p p_lasttick:%u not equal to p_curtick:%u", hpts,
 		 hpts->p_lasttick, hpts->p_curtick));
 	if (from_callout && (hpts->p_lasttick != hpts->p_curtick)) {
@@ -1476,7 +1476,7 @@ no_run:
 		goto again;
 	}
 
-	if (from_callout){
+	if (from_callout) {
 		tcp_hpts_set_max_sleep(hpts, wrap_loop_cnt);
 	}
 	if (seen_endpoint)
@@ -1486,7 +1486,7 @@ no_run:
 }
 
 void
-__tcp_set_hpts(struct tcpcb *tp, int32_t line)
+tcp_set_hpts(struct tcpcb *tp)
 {
 	struct tcp_hpts_entry *hpts;
 	int failed;
@@ -1499,7 +1499,7 @@ __tcp_set_hpts(struct tcpcb *tp, int32_t line)
 		if (failed == 0)
 			tp->t_flags2 |= TF2_HPTS_CPU_SET;
 	}
-	mtx_unlock(&hpts->p_mtx);
+	HPTS_UNLOCK(hpts);
 }
 
 static struct tcp_hpts_entry *
@@ -1556,7 +1556,7 @@ __tcp_run_hpts(void)
 		/* Already active */
 		return;
 	}
-	if (mtx_trylock(&hpts->p_mtx) == 0) {
+	if (!HPTS_TRYLOCK(hpts)) {
 		/* Someone else got the lock */
 		return;
 	}
@@ -1566,7 +1566,7 @@ __tcp_run_hpts(void)
 	hpts->syscall_cnt++;
 	counter_u64_add(hpts_direct_call, 1);
 	hpts->p_hpts_active = 1;
-	ticks_ran = tcp_hptsi(hpts, 0);
+	ticks_ran = tcp_hptsi(hpts, false);
 	/* We may want to adjust the sleep values here */
 	if (hpts->p_on_queue_cnt >= conn_cnt_thresh) {
 		if (ticks_ran > ticks_indicate_less_sleep) {
@@ -1611,8 +1611,7 @@ __tcp_run_hpts(void)
 	}
 	hpts->p_hpts_active = 0;
 out_with_mtx:
-	HPTS_MTX_ASSERT(hpts);
-	mtx_unlock(&hpts->p_mtx);
+	HPTS_UNLOCK(hpts);
 	NET_EPOCH_EXIT(et);
 }
 
@@ -1626,7 +1625,7 @@ tcp_hpts_thread(void *ctx)
 	int ticks_ran;
 
 	hpts = (struct tcp_hpts_entry *)ctx;
-	mtx_lock(&hpts->p_mtx);
+	HPTS_LOCK(hpts);
 	if (hpts->p_direct_wake) {
 		/* Signaled by input or output with low occupancy count. */
 		callout_stop(&hpts->co);
@@ -1636,7 +1635,7 @@ tcp_hpts_thread(void *ctx)
 		counter_u64_add(hpts_wake_timeout, 1);
 		if (callout_pending(&hpts->co) ||
 		    !callout_active(&hpts->co)) {
-			mtx_unlock(&hpts->p_mtx);
+			HPTS_UNLOCK(hpts);
 			return;
 		}
 	}
@@ -1682,7 +1681,7 @@ tcp_hpts_thread(void *ctx)
 	}
 	hpts->sleeping = 0;
 	hpts->p_hpts_active = 1;
-	ticks_ran = tcp_hptsi(hpts, 1);
+	ticks_ran = tcp_hptsi(hpts, true);
 	tv.tv_sec = 0;
 	tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_TICKS_PER_SLOT;
 	if ((hpts->p_on_queue_cnt > conn_cnt_thresh) && (hpts->hit_callout_thresh == 0)) {
@@ -1765,7 +1764,7 @@ back_to_sleep:
 			     hpts_timeout_swi, hpts, hpts->p_cpu,
 			     (C_DIRECT_EXEC | C_PREL(tcp_hpts_precision)));
 	NET_EPOCH_EXIT(et);
-	mtx_unlock(&hpts->p_mtx);
+	HPTS_UNLOCK(hpts);
 }
 
 #undef	timersub
diff --git a/sys/netinet/tcp_hpts.h b/sys/netinet/tcp_hpts.h
index b097a2b98db9..f5856ed8e688 100644
--- a/sys/netinet/tcp_hpts.h
+++ b/sys/netinet/tcp_hpts.h
@@ -149,8 +149,7 @@ uint32_t tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line,
 #define	tcp_hpts_insert(inp, slot)	\
 	tcp_hpts_insert_diag((inp), (slot), __LINE__, NULL)
 
-void __tcp_set_hpts(struct tcpcb *tp, int32_t line);
-#define tcp_set_hpts(a) __tcp_set_hpts(a, __LINE__)
+void tcp_set_hpts(struct tcpcb *tp);
 
 void tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason);
 
@@ -165,25 +164,25 @@ extern int32_t tcp_min_hptsi_time;
  * The following functions should also be available
  * to userspace as well.
  */
-static __inline uint32_t
+static inline uint32_t
 tcp_tv_to_hptstick(const struct timeval *sv)
 {
 	return ((sv->tv_sec * 100000) + (sv->tv_usec / HPTS_TICKS_PER_SLOT));
 }
 
-static __inline uint32_t
+static inline uint32_t
 tcp_tv_to_usectick(const struct timeval *sv)
 {
 	return ((uint32_t) ((sv->tv_sec * HPTS_USEC_IN_SEC) + sv->tv_usec));
 }
 
-static __inline uint32_t
+static inline uint32_t
 tcp_tv_to_mssectick(const struct timeval *sv)
 {
 	return ((uint32_t) ((sv->tv_sec * HPTS_MSEC_IN_SEC) + (sv->tv_usec/HPTS_USEC_IN_MSEC)));
 }
 
-static __inline uint64_t
+static inline uint64_t
 tcp_tv_to_lusectick(const struct timeval *sv)
 {
 	return ((uint64_t)((sv->tv_sec * HPTS_USEC_IN_SEC) + sv->tv_usec));
@@ -199,7 +198,7 @@ get_hpts_min_sleep_time(void)
 	return (tcp_min_hptsi_time + HPTS_TICKS_PER_SLOT);
 }
 
-static __inline uint32_t
+static inline uint32_t
 tcp_gethptstick(struct timeval *sv)
 {
 	struct timeval tv;
@@ -210,7 +209,7 @@ tcp_gethptstick(struct timeval *sv)
 	return (tcp_tv_to_hptstick(sv));
 }
 
-static __inline uint64_t
+static inline uint64_t
 tcp_get_u64_usecs(struct timeval *tv)
 {
 	struct timeval tvd;
@@ -221,7 +220,7 @@ tcp_get_u64_usecs(struct timeval *tv)
 	return (tcp_tv_to_lusectick(tv));
 }
 
-static __inline uint32_t
+static inline uint32_t
 tcp_get_usecs(struct timeval *tv)
 {
 	struct timeval tvd;
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 3fda6e903738..de428ae1af6f 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -112,9 +112,6 @@
 #include <netinet/tcpip.h>
 #include <netinet/cc/cc.h>
 #include <netinet/tcp_fastopen.h>
-#ifdef TCPPCAP
-#include <netinet/tcp_pcap.h>
-#endif
 #include <netinet/tcp_syncache.h>
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
@@ -135,6 +132,11 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_log_in_vain), 0,
     "Log all incoming TCP segments to closed ports");
 
+VNET_DEFINE(int, tcp_bind_all_fibs) = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, bind_all_fibs, CTLFLAG_VNET | CTLFLAG_RDTUN,
+    &VNET_NAME(tcp_bind_all_fibs), 0,
+    "Bound sockets receive traffic from all FIBs");
+
 VNET_DEFINE(int, blackhole) = 0;
 #define	V_blackhole		VNET(blackhole)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_VNET | CTLFLAG_RW,
@@ -202,6 +204,11 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_rst, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_insecure_rst), 0,
     "Follow RFC793 instead of RFC5961 criteria for accepting RST packets");
 
+VNET_DEFINE(int, tcp_insecure_ack) = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_ack, CTLFLAG_VNET | CTLFLAG_RW,
+    &VNET_NAME(tcp_insecure_ack), 0,
+    "Follow RFC793 criteria for validating SEG.ACK");
+
 VNET_DEFINE(int, tcp_recvspace) = 1024*64;
 #define	V_tcp_recvspace	VNET(tcp_recvspace)
 SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_VNET | CTLFLAG_RW,
@@ -363,11 +370,11 @@ cc_conn_init(struct tcpcb *tp)
 	tcp_hc_get(&inp->inp_inc, &metrics);
 	maxseg = tcp_maxseg(tp);
 
-	if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) {
+	if (tp->t_srtt == 0 && (rtt = metrics.hc_rtt)) {
 		tp->t_srtt = rtt;
 		TCPSTAT_INC(tcps_usedrtt);
-		if (metrics.rmx_rttvar) {
-			tp->t_rttvar = metrics.rmx_rttvar;
+		if (metrics.hc_rttvar) {
+			tp->t_rttvar = metrics.hc_rttvar;
 			TCPSTAT_INC(tcps_usedrttvar);
 		} else {
 			/* default variation is +- 1 rtt */
@@ -376,16 +383,16 @@ cc_conn_init(struct tcpcb *tp)
 		}
 		TCPT_RANGESET(tp->t_rxtcur,
 		    ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
-		    tp->t_rttmin, TCPTV_REXMTMAX);
+		    tp->t_rttmin, tcp_rexmit_max);
 	}
-	if (metrics.rmx_ssthresh) {
+	if (metrics.hc_ssthresh) {
 		/*
 		 * There's some sort of gateway or interface
 		 * buffer limit on the path.  Use this to set
 		 * the slow start threshold, but set the
 		 * threshold to no less than 2*mss.
 		 */
-		tp->snd_ssthresh = max(2 * maxseg, metrics.rmx_ssthresh);
+		tp->snd_ssthresh = max(2 * maxseg, metrics.hc_ssthresh);
 		TCPSTAT_INC(tcps_usedssthresh);
 	}
 
@@ -439,10 +446,7 @@ cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type)
 	case CC_RTO:
 		tp->t_dupacks = 0;
 		tp->t_bytes_acked = 0;
-		if ((tp->t_rxtshift > 1) ||
-		    !((tp->t_flags & TF_SACK_PERMIT) &&
-		      (!TAILQ_EMPTY(&tp->snd_holes))))
-			EXIT_RECOVERY(tp->t_flags);
+		EXIT_RECOVERY(tp->t_flags);
 		if (tp->t_flags2 & TF2_ECN_PERMIT)
 			tp->t_flags2 |= TF2_ECN_SND_CWR;
 		break;
@@ -458,6 +462,7 @@ cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type)
 			ENTER_CONGRECOVERY(tp->t_flags);
 		tp->snd_nxt = tp->snd_max;
 		tp->t_flags &= ~TF_PREVVALID;
+		tp->t_rxtshift = 0;
 		tp->t_badrxtwin = 0;
 		break;
 	}
@@ -562,8 +567,6 @@ int
 tcp6_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
 {
 	struct mbuf *m;
-	struct in6_ifaddr *ia6;
-	struct ip6_hdr *ip6;
 
 	m = *mp;
 	if (m->m_len < *offp + sizeof(struct tcphdr)) {
@@ -575,19 +578,6 @@ tcp6_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
 		}
 	}
 
-	/*
-	 * draft-itojun-ipv6-tcp-to-anycast
-	 * better place to put this in?
-	 */
-	ip6 = mtod(m, struct ip6_hdr *);
-	ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */, false);
-	if (ia6 && (ia6->ia6_flags & IN6_IFF_ANYCAST)) {
-		icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR,
-			    (caddr_t)&ip6->ip6_dst - (caddr_t)ip6);
-		*mp = NULL;
-		return (IPPROTO_DONE);
-	}
-
 	*mp = m;
 	return (tcp_input_with_port(mp, offp, proto, port));
 }
@@ -631,6 +621,7 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
 #endif /* INET6 */
 	struct tcpopt to;		/* options in this segment */
 	char *s = NULL;			/* address and port logging */
+	bool closed_port = false;	/* segment is hitting a closed port */
 
 	NET_EPOCH_ASSERT();
 
@@ -831,8 +822,10 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
 	 */
 	lookupflag = INPLOOKUP_WILDCARD |
 	    ((thflags & (TH_ACK|TH_SYN)) == TH_SYN ?
-	    INPLOOKUP_RLOCKPCB : INPLOOKUP_WLOCKPCB);
+	    INPLOOKUP_RLOCKPCB : INPLOOKUP_WLOCKPCB) |
+	    (V_tcp_bind_all_fibs ? 0 : INPLOOKUP_FIB);
 findpcb:
+	tp = NULL;
 #ifdef INET6
 	if (isipv6 && fwd_tag != NULL) {
 		struct sockaddr_in6 *next_hop6;
@@ -915,24 +908,8 @@ findpcb:
 				log(LOG_INFO, "%s; %s: Connection attempt "
 				    "to closed port\n", s, __func__);
 		}
-		/*
-		 * When blackholing do not respond with a RST but
-		 * completely ignore the segment and drop it.
-		 */
-		if (((V_blackhole == 1 && (thflags & TH_SYN)) ||
-		    V_blackhole == 2) && (V_blackhole_local || (
-#ifdef INET6
-		    isipv6 ? !in6_localaddr(&ip6->ip6_src) :
-#endif
-#ifdef INET
-		    !in_localip(ip->ip_src)
-#else
-		    true
-#endif
-		    )))
-			goto dropunlock;
-
-		rstreason = BANDLIM_RST_CLOSEDPORT;
+		rstreason = BANDLIM_TCP_RST;
+		closed_port = true;
 		goto dropwithreset;
 	}
 	INP_LOCK_ASSERT(inp);
@@ -1023,12 +1000,14 @@ findpcb:
 		 * down or it is in the CLOSED state.  Either way we drop the
 		 * segment and send an appropriate response.
 		 */
-		rstreason = BANDLIM_RST_CLOSEDPORT;
+		rstreason = BANDLIM_TCP_RST;
+		closed_port = true;
 		goto dropwithreset;
 	}
 
 	if ((tp->t_port != port) && (tp->t_state > TCPS_LISTEN)) {
-		rstreason = BANDLIM_RST_CLOSEDPORT;
+		rstreason = BANDLIM_TCP_RST;
+		closed_port = true;
 		goto dropwithreset;
 	}
 
@@ -1080,6 +1059,8 @@ findpcb:
 		 * socket appended to the listen queue in SYN_RECEIVED state.
 		 */
 		if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
+			int result;
+
 			/*
 			 * Parse the TCP options here because
 			 * syncookies need access to the reflected
@@ -1089,8 +1070,8 @@ findpcb:
 			/*
 			 * NB: syncache_expand() doesn't unlock inp.
 			 */
-			rstreason = syncache_expand(&inc, &to, th, &so, m, port);
-			if (rstreason < 0) {
+			result = syncache_expand(&inc, &to, th, &so, m, port);
+			if (result < 0) {
 				/*
 				 * A failing TCP MD5 signature comparison
 				 * must result in the segment being dropped
@@ -1098,7 +1079,7 @@ findpcb:
 				 * to the sender.
 				 */
 				goto dropunlock;
-			} else if (rstreason == 0) {
+			} else if (result == 0) {
 				/*
 				 * No syncache entry, or ACK was not for our
 				 * SYN/ACK.  Do our protection against double
@@ -1117,7 +1098,7 @@ findpcb:
 				 * of the failure cause.
 				 */
 				INP_WUNLOCK(inp);
-				rstreason = BANDLIM_RST_OPENPORT;
+				rstreason = BANDLIM_TCP_RST;
 				lookupflag &= ~INPLOOKUP_WILDCARD;
 				goto findpcb;
 			}
@@ -1208,7 +1189,7 @@ tfo_socket_result:
 				    s, __func__);
 			syncache_badack(&inc, port);	/* XXX: Not needed! */
 			TCPSTAT_INC(tcps_badsyn);
-			rstreason = BANDLIM_RST_OPENPORT;
+			rstreason = BANDLIM_TCP_RST;
 			goto dropwithreset;
 		}
 		/*
@@ -1284,7 +1265,7 @@ tfo_socket_result:
 					"Connection attempt to deprecated "
 					"IPv6 address rejected\n",
 					s, __func__);
-				rstreason = BANDLIM_RST_OPENPORT;
+				rstreason = BANDLIM_TCP_RST;
 				goto dropwithreset;
 			}
 		}
@@ -1298,7 +1279,7 @@ tfo_socket_result:
 		 *	global or subnet broad- or multicast address.
 		 *   Note that it is quite possible to receive unicast
 		 *	link-layer packets with a broadcast IP address. Use
-		 *	in_broadcast() to find them.
+		 *	in_ifnet_broadcast() to find them.
 		 */
 		if (m->m_flags & (M_BCAST|M_MCAST)) {
 			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
@@ -1343,7 +1324,7 @@ tfo_socket_result:
 			if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 			    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
 			    ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
-			    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
+			    in_ifnet_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				    log(LOG_DEBUG, "%s; %s: Listen socket: "
 					"Connection attempt from/to broad- "
@@ -1368,15 +1349,6 @@ tfo_socket_result:
 		 * Only the listen socket is unlocked by syncache_add().
 		 */
 		return (IPPROTO_DONE);
-	} else if (tp->t_state == TCPS_LISTEN) {
-		/*
-		 * When a listen socket is torn down the SO_ACCEPTCONN
-		 * flag is removed first while connections are drained
-		 * from the accept queue in a unlock/lock cycle of the
-		 * ACCEPT_LOCK, opening a race condition allowing a SYN
-		 * attempt go through unhandled.
-		 */
-		goto dropunlock;
 	}
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 	if (tp->t_flags & TF_SIGNATURE) {
@@ -1410,15 +1382,28 @@ tfo_socket_result:
 	return (IPPROTO_DONE);
 
 dropwithreset:
+	/*
+	 * When blackholing do not respond with a RST but
+	 * completely ignore the segment and drop it.
+	 */
+	if (rstreason == BANDLIM_TCP_RST &&
+	    ((!closed_port && V_blackhole == 3) ||
+	     (closed_port &&
+	      ((V_blackhole == 1 && (thflags & TH_SYN)) || V_blackhole > 1))) &&
+	    (V_blackhole_local || (
+#ifdef INET6
+	    isipv6 ? !in6_localip(&ip6->ip6_src) :
+#endif
+#ifdef INET
+	    !in_localip(ip->ip_src)
+#else
+	    true
+#endif
+	    )))
+		goto dropunlock;
 	TCP_PROBE5(receive, NULL, tp, m, tp, th);
-
-	if (inp != NULL) {
-		tcp_dropwithreset(m, th, tp, tlen, rstreason);
-		INP_UNLOCK(inp);
-	} else
-		tcp_dropwithreset(m, th, NULL, tlen, rstreason);
+	tcp_dropwithreset(m, th, tp, tlen, rstreason);
 	m = NULL;	/* mbuf chain got consumed. */
-	goto drop;
 
 dropunlock:
 	if (m != NULL)
@@ -1460,7 +1445,7 @@ drop:
  *     is at least 3/8 of the current socket buffer size.
  *  3. receive buffer size has not hit maximal automatic size;
  *
- * If all of the criteria are met we increaset the socket buffer
+ * If all of the criteria are met, we increase the socket buffer
  * by a 1/2 (bounded by the max). This allows us to keep ahead
  * of slow-start but also makes it so our peer never gets limited
  * by our rwnd which we then open up causing a burst.
@@ -1514,7 +1499,7 @@ tcp_handle_wakeup(struct tcpcb *tp)
 		struct socket *so = tptosocket(tp);
 
 		tp->t_flags &= ~TF_WAKESOR;
-		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+		SOCK_RECVBUF_LOCK_ASSERT(so);
 		sorwakeup_locked(so);
 	}
 }
@@ -1537,7 +1522,9 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 	struct tcpopt to;
 	int tfo_syn;
 	u_int maxseg = 0;
+	bool no_data;
 
+	no_data = (tlen == 0);
 	thflags = tcp_get_flags(th);
 	tp->sackhint.last_sack_ack = 0;
 	sack_changed = SACK_NOCHANGE;
@@ -1550,10 +1537,6 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 	KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
 	    __func__));
 
-#ifdef TCPPCAP
-	/* Save segment, if requested. */
-	tcp_pcap_add(th, m, &(tp->t_inpkts));
-#endif
 	TCP_LOG_EVENT(tp, th, &so->so_rcv, &so->so_snd, TCP_LOG_IN, 0,
 	    tlen, NULL, true);
 
@@ -1615,7 +1598,14 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 	tcp_dooptions(&to, (u_char *)(th + 1),
 	    (th->th_off << 2) - sizeof(struct tcphdr),
 	    (thflags & TH_SYN) ? TO_SYN : 0);
-
+	if (tp->t_flags2 & TF2_PROC_SACK_PROHIBIT) {
+		/*
+		 * We don't look at sack's from the
+		 * peer because the MSS is too small which
+		 * can subject us to an attack.
+		 */
+		to.to_flags &= ~TOF_SACK;
+	}
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 	if ((tp->t_flags & TF_SIGNATURE) != 0 &&
 	    (to.to_flags & TOF_SIGNATURE) == 0) {
@@ -1633,11 +1623,6 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 		to.to_tsecr -= tp->ts_offset;
 		if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks())) {
 			to.to_tsecr = 0;
-		} else if (tp->t_rxtshift == 1 &&
-			 tp->t_flags & TF_PREVVALID &&
-			 tp->t_badrxtwin != 0 &&
-			 TSTMP_LT(to.to_tsecr, tp->t_badrxtwin)) {
-			cc_cong_signal(tp, th, CC_RTO_ERR);
 		}
 	}
 	/*
@@ -1778,7 +1763,7 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 			tp->ts_recent = to.to_tsval;
 		}
 
-		if (tlen == 0) {
+		if (no_data) {
 			if (SEQ_GT(th->th_ack, tp->snd_una) &&
 			    SEQ_LEQ(th->th_ack, tp->snd_max) &&
 			    !IN_RECOVERY(tp->t_flags) &&
@@ -1790,15 +1775,17 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 				TCPSTAT_INC(tcps_predack);
 
 				/*
-				 * "bad retransmit" recovery without timestamps.
+				 * "bad retransmit" recovery.
 				 */
-				if ((to.to_flags & TOF_TS) == 0 &&
-				    tp->t_rxtshift == 1 &&
+				if (tp->t_rxtshift == 1 &&
 				    tp->t_flags & TF_PREVVALID &&
 				    tp->t_badrxtwin != 0 &&
-				    TSTMP_LT(ticks, tp->t_badrxtwin)) {
+				    (((to.to_flags & TOF_TS) != 0 &&
+				      to.to_tsecr != 0 &&
+				      TSTMP_LT(to.to_tsecr, tp->t_badrxtwin)) ||
+				     ((to.to_flags & TOF_TS) == 0 &&
+				      TSTMP_LT(ticks, tp->t_badrxtwin))))
 					cc_cong_signal(tp, th, CC_RTO_ERR);
-				}
 
 				/*
 				 * Recalculate the transmit timer / rtt.
@@ -1934,7 +1921,7 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 			newsize = tcp_autorcvbuf(m, th, so, tp, tlen);
 
 			/* Add data to socket buffer. */
-			SOCKBUF_LOCK(&so->so_rcv);
+			SOCK_RECVBUF_LOCK(so);
 			if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 				m_freem(m);
 			} else {
@@ -1985,7 +1972,7 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 		if ((thflags & TH_ACK) &&
 		    (SEQ_LEQ(th->th_ack, tp->snd_una) ||
 		     SEQ_GT(th->th_ack, tp->snd_max))) {
-				rstreason = BANDLIM_RST_OPENPORT;
+				rstreason = BANDLIM_TCP_RST;
 				tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT);
 				goto dropwithreset;
 		}
@@ -1998,7 +1985,7 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 			 * FIN, or a RST.
 			 */
 			if ((thflags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)) {
-				rstreason = BANDLIM_RST_OPENPORT;
+				rstreason = BANDLIM_TCP_RST;
 				tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT);
 				goto dropwithreset;
 			} else if (thflags & TH_SYN) {
@@ -2200,10 +2187,7 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 				}
 			} else {
 				TCPSTAT_INC(tcps_badrst);
-				/* Send challenge ACK. */
-				tcp_respond(tp, mtod(m, void *), th, m,
-				    tp->rcv_nxt, tp->snd_nxt, TH_ACK);
-				tp->last_ack_sent = tp->rcv_nxt;
+				tcp_send_challenge_ack(tp, th, m);
 				m = NULL;
 			}
 		}
@@ -2225,10 +2209,7 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 			rstreason = BANDLIM_UNLIMITED;
 		} else {
 			tcp_ecn_input_syn_sent(tp, thflags, iptos);
-			/* Send challenge ACK. */
-			tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt,
-			    tp->snd_nxt, TH_ACK);
-			tp->last_ack_sent = tp->rcv_nxt;
+			tcp_send_challenge_ack(tp, th, m);
 			m = NULL;
 		}
 		goto drop;
@@ -2272,7 +2253,7 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 	 * for the "LAND" DoS attack.
 	 */
 	if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) {
-		rstreason = BANDLIM_RST_OPENPORT;
+		rstreason = BANDLIM_TCP_RST;
 		tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT);
 		goto dropwithreset;
 	}
@@ -2435,6 +2416,42 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 	/*
 	 * Ack processing.
 	 */
+	if (SEQ_GEQ(tp->snd_una, tp->iss + (TCP_MAXWIN << tp->snd_scale))) {
+		/* Checking SEG.ACK against ISS is definitely redundant. */
+		tp->t_flags2 |= TF2_NO_ISS_CHECK;
+	}
+	if (!V_tcp_insecure_ack) {
+		tcp_seq seq_min;
+		bool ghost_ack_check;
+
+		if (tp->t_flags2 & TF2_NO_ISS_CHECK) {
+			/* Check for too old ACKs (RFC 5961, Section 5.2). */
+			seq_min = tp->snd_una - tp->max_sndwnd;
+			ghost_ack_check = false;
+		} else {
+			if (SEQ_GT(tp->iss + 1, tp->snd_una - tp->max_sndwnd)) {
+				/* Checking for ghost ACKs is stricter. */
+				seq_min = tp->iss + 1;
+				ghost_ack_check = true;
+			} else {
+				/*
+				 * Checking for too old ACKs (RFC 5961,
+				 * Section 5.2) is stricter.
+				 */
+				seq_min = tp->snd_una - tp->max_sndwnd;
+				ghost_ack_check = false;
+			}
+		}
+		if (SEQ_LT(th->th_ack, seq_min)) {
+			if (ghost_ack_check)
+				TCPSTAT_INC(tcps_rcvghostack);
+			else
+				TCPSTAT_INC(tcps_rcvacktooold);
+			tcp_send_challenge_ack(tp, th, m);
+			m = NULL;
+			goto drop;
+		}
+	}
 	switch (tp->t_state) {
 	/*
 	 * In SYN_RECEIVED state, the ack ACKs our SYN, so enter
@@ -2549,7 +2566,7 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 
 		if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
 			maxseg = tcp_maxseg(tp);
-			if (tlen == 0 &&
+			if (no_data &&
 			    (tiwin == tp->snd_wnd ||
 			    (tp->t_flags & TF_SACK_PERMIT))) {
 				/*
@@ -2618,26 +2635,30 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 						tcp_do_prr_ack(tp, th, &to,
 						    sack_changed, &maxseg);
 					} else if (tcp_is_sack_recovery(tp, &to) &&
-						    IN_FASTRECOVERY(tp->t_flags)) {
+						    IN_FASTRECOVERY(tp->t_flags) &&
+						    (tp->snd_nxt == tp->snd_max)) {
 						int awnd;
 
 						/*
 						 * Compute the amount of data in flight first.
 						 * We can inject new data into the pipe iff
-						 * we have less than 1/2 the original window's
+						 * we have less than ssthresh
 						 * worth of data in flight.
 						 */
-						if (V_tcp_do_newsack) {
-							awnd = tcp_compute_pipe(tp);
-						} else {
-							awnd = (tp->snd_nxt - tp->snd_fack) +
-								tp->sackhint.sack_bytes_rexmit;
-						}
+						awnd = tcp_compute_pipe(tp);
 						if (awnd < tp->snd_ssthresh) {
-							tp->snd_cwnd += maxseg;
+							tp->snd_cwnd += imax(maxseg,
+							    imin(2 * maxseg,
+							    tp->sackhint.delivered_data));
 							if (tp->snd_cwnd > tp->snd_ssthresh)
 								tp->snd_cwnd = tp->snd_ssthresh;
 						}
+					} else if (tcp_is_sack_recovery(tp, &to) &&
+						    IN_FASTRECOVERY(tp->t_flags) &&
+						    SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+						tp->snd_cwnd += imax(maxseg,
+						    imin(2 * maxseg,
+						    tp->sackhint.delivered_data));
 					} else {
 						tp->snd_cwnd += maxseg;
 					}
@@ -2661,14 +2682,13 @@ enter_recovery:
 					tcp_seq onxt = tp->snd_nxt;
 
 					/*
-					 * If we're doing sack, or prr, check
-					 * to see if we're already in sack
+					 * If we're doing sack, check to
+					 * see if we're already in sack
 					 * recovery. If we're not doing sack,
 					 * check to see if we're in newreno
 					 * recovery.
 					 */
-					if (V_tcp_do_prr ||
-					    (tp->t_flags & TF_SACK_PERMIT)) {
+					if (tcp_is_sack_recovery(tp, &to)) {
 						if (IN_FASTRECOVERY(tp->t_flags)) {
 							tp->t_dupacks = 0;
 							break;
@@ -2688,30 +2708,40 @@ enter_recovery:
 					tp->t_rtttime = 0;
 					if (V_tcp_do_prr) {
 						/*
-						 * snd_ssthresh is already updated by
-						 * cc_cong_signal.
+						 * snd_ssthresh and snd_recover are
+						 * already updated by cc_cong_signal.
 						 */
 						if (tcp_is_sack_recovery(tp, &to)) {
 							/*
-							 * Exclude Limited Transmit
+							 * Include Limited Transmit
 							 * segments here
 							 */
 							tp->sackhint.prr_delivered =
-							    maxseg;
+							    imin(tp->snd_max - th->th_ack,
+							    (tp->snd_limited + 1) * maxseg);
 						} else {
 							tp->sackhint.prr_delivered =
-							    imin(tp->snd_max - tp->snd_una,
-							    imin(INT_MAX / 65536,
-								tp->t_dupacks) * maxseg);
+							    maxseg;
 						}
 						tp->sackhint.recover_fs = max(1,
 						    tp->snd_nxt - tp->snd_una);
 					}
+					tp->snd_limited = 0;
 					if (tcp_is_sack_recovery(tp, &to)) {
 						TCPSTAT_INC(tcps_sack_recovery_episode);
-						tp->snd_recover = tp->snd_nxt;
-						tp->snd_cwnd = maxseg;
+						/*
+						 * When entering LR after RTO due to
+						 * Duplicate ACKs, retransmit existing
+						 * holes from the scoreboard.
+						 */
+						tcp_resend_sackholes(tp);
+						/* Avoid inflating cwnd in tcp_output */
+						tp->snd_nxt = tp->snd_max;
+						tp->snd_cwnd = tcp_compute_pipe(tp) +
+						    maxseg;
 						(void) tcp_output(tp);
+						/* Set cwnd to the expected flightsize */
+						tp->snd_cwnd = tp->snd_ssthresh;
 						if (SEQ_GT(th->th_ack, tp->snd_una)) {
 							goto resume_partialack;
 						}
@@ -2752,18 +2782,23 @@ enter_recovery:
 					    __func__));
 					if (tp->t_dupacks == 1)
 						tp->snd_limited = 0;
-					tp->snd_cwnd =
-					    (tp->snd_nxt - tp->snd_una) +
+					if ((tp->snd_nxt == tp->snd_max) &&
+					    (tp->t_rxtshift == 0))
+						tp->snd_cwnd =
+						    SEQ_SUB(tp->snd_nxt,
+							    tp->snd_una) -
+							tcp_sack_adjust(tp);
+					tp->snd_cwnd +=
 					    (tp->t_dupacks - tp->snd_limited) *
-					    maxseg;
+					    maxseg - tcp_sack_adjust(tp);
 					/*
 					 * Only call tcp_output when there
 					 * is new data available to be sent
 					 * or we need to send an ACK.
 					 */
-					SOCKBUF_LOCK(&so->so_snd);
+					SOCK_SENDBUF_LOCK(so);
 					avail = sbavail(&so->so_snd);
-					SOCKBUF_UNLOCK(&so->so_snd);
+					SOCK_SENDBUF_UNLOCK(so);
 					if (tp->t_flags & TF_ACKNOW ||
 					    (avail >=
 					     SEQ_SUB(tp->snd_nxt, tp->snd_una))) {
@@ -2774,9 +2809,11 @@ enter_recovery:
 						KASSERT((tp->t_dupacks == 2 &&
 						    tp->snd_limited == 0) ||
 						   (sent == maxseg + 1 &&
-						    tp->t_flags & TF_SENTFIN),
-						    ("%s: sent too much",
-						    __func__));
+						    tp->t_flags & TF_SENTFIN) ||
+						   (sent < 2 * maxseg &&
+						    tp->t_flags & TF_NODELAY),
+						    ("%s: sent too much: %u>%u",
+						    __func__, sent, maxseg));
 						tp->snd_limited = 2;
 					} else if (sent > 0) {
 						++tp->snd_limited;
@@ -2802,7 +2839,9 @@ enter_recovery:
 			 * counted as dupacks here.
 			 */
 			if (tcp_is_sack_recovery(tp, &to) &&
-			    (sack_changed != SACK_NOCHANGE)) {
+			    (((tp->t_rxtshift == 0) && (sack_changed != SACK_NOCHANGE)) ||
+			     ((tp->t_rxtshift > 0) && (sack_changed == SACK_NEWLOSS))) &&
+			    (tp->snd_nxt == tp->snd_max)) {
 				tp->t_dupacks++;
 				/* limit overhead by setting maxseg last */
 				if (!IN_FASTRECOVERY(tp->t_flags) &&
@@ -2938,7 +2977,7 @@ process_ACK:
 			tcp_xmit_timer(tp, ticks - tp->t_rtttime);
 		}
 
-		SOCKBUF_LOCK(&so->so_snd);
+		SOCK_SENDBUF_LOCK(so);
 		/*
 		 * Clear t_acktime if remote side has ACKd all data in the
 		 * socket buffer and FIN (if applicable).
@@ -2969,7 +3008,7 @@ process_ACK:
 		 *    skip rest of ACK processing.
 		 */
 		if (acked == 0) {
-			SOCKBUF_UNLOCK(&so->so_snd);
+			SOCK_SENDBUF_UNLOCK(so);
 			goto step6;
 		}
 
@@ -3009,9 +3048,8 @@ process_ACK:
 		    SEQ_GEQ(th->th_ack, tp->snd_recover)) {
 			cc_post_recovery(tp, th);
 		}
-		if (tp->t_flags & TF_SACK_PERMIT) {
-			if (SEQ_GT(tp->snd_una, tp->snd_recover))
-				tp->snd_recover = tp->snd_una;
+		if (SEQ_GT(tp->snd_una, tp->snd_recover)) {
+			tp->snd_recover = tp->snd_una;
 		}
 		if (SEQ_LT(tp->snd_nxt, tp->snd_una))
 			tp->snd_nxt = tp->snd_una;
@@ -3084,8 +3122,7 @@ step6:
 	    (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) ||
 	     (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) {
 		/* keep track of pure window updates */
-		if (tlen == 0 &&
-		    tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
+		if (no_data && tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
 			TCPSTAT_INC(tcps_rcvwinupd);
 		tp->snd_wnd = tiwin;
 		tp->snd_wl1 = th->th_seq;
@@ -3106,11 +3143,11 @@ step6:
 		 * soreceive.  It's hard to imagine someone
 		 * actually wanting to send this much urgent data.
 		 */
-		SOCKBUF_LOCK(&so->so_rcv);
+		SOCK_RECVBUF_LOCK(so);
 		if (th->th_urp + sbavail(&so->so_rcv) > sb_max) {
 			th->th_urp = 0;			/* XXX */
 			thflags &= ~TH_URG;		/* XXX */
-			SOCKBUF_UNLOCK(&so->so_rcv);	/* XXX */
+			SOCK_RECVBUF_UNLOCK(so);	/* XXX */
 			goto dodata;			/* XXX */
 		}
 		/*
@@ -3136,7 +3173,7 @@ step6:
 			sohasoutofband(so);
 			tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
 		}
-		SOCKBUF_UNLOCK(&so->so_rcv);
+		SOCK_RECVBUF_UNLOCK(so);
 		/*
 		 * Remove out of band data so doesn't get presented to user.
 		 * This can happen independent of advancing the URG pointer,
@@ -3209,7 +3246,7 @@ dodata:							/* XXX */
 			thflags = tcp_get_flags(th) & TH_FIN;
 			TCPSTAT_INC(tcps_rcvpack);
 			TCPSTAT_ADD(tcps_rcvbyte, tlen);
-			SOCKBUF_LOCK(&so->so_rcv);
+			SOCK_RECVBUF_LOCK(so);
 			if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
 				m_freem(m);
 			else
@@ -3395,7 +3432,7 @@ dropafterack:
 	if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) &&
 	    (SEQ_GT(tp->snd_una, th->th_ack) ||
 	     SEQ_GT(th->th_ack, tp->snd_max)) ) {
-		rstreason = BANDLIM_RST_OPENPORT;
+		rstreason = BANDLIM_TCP_RST;
 		tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT);
 		goto dropwithreset;
 	}
@@ -3466,7 +3503,7 @@ tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp,
 		if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 		    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
 		    ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
-		    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
+		    in_ifnet_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
 			goto drop;
 	}
 #endif
@@ -3703,7 +3740,7 @@ tcp_xmit_timer(struct tcpcb *tp, int rtt)
 	 * the minimum feasible timer (which is 2 ticks).
 	 */
 	TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
-		      max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX);
+	    max(tp->t_rttmin, rtt + 2), tcp_rexmit_max);
 
 	/*
 	 * We received an ack for a packet that wasn't retransmitted;
@@ -3818,19 +3855,16 @@ tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer,
 			offer = max(offer, V_tcp_minmss);
 	}
 
-	/*
-	 * rmx information is now retrieved from tcp_hostcache.
-	 */
-	tcp_hc_get(&inp->inp_inc, &metrics);
-	if (metricptr != NULL)
-		bcopy(&metrics, metricptr, sizeof(struct hc_metrics_lite));
+	if (metricptr == NULL)
+		metricptr = &metrics;
+	tcp_hc_get(&inp->inp_inc, metricptr);
 
 	/*
 	 * If there's a discovered mtu in tcp hostcache, use it.
 	 * Else, use the link mtu.
 	 */
-	if (metrics.rmx_mtu)
-		mss = min(metrics.rmx_mtu, maxmtu) - min_protoh;
+	if (metricptr->hc_mtu)
+		mss = min(metricptr->hc_mtu, maxmtu) - min_protoh;
 	else {
 #ifdef INET6
 		if (isipv6) {
@@ -3883,6 +3917,17 @@ tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer,
 	mss = max(mss, 64);
 
 	tp->t_maxseg = mss;
+	if (tp->t_maxseg < V_tcp_mssdflt) {
+		/*
+		 * The MSS is so small we should not process incoming
+		 * SACK's since we are subject to attack in such a
+		 * case.
+		 */
+		tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+	} else {
+		tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+	}
+
 }
 
 void
@@ -3910,9 +3955,9 @@ tcp_mss(struct tcpcb *tp, int offer)
 	 * if the mss is larger than the socket buffer, decrease the mss.
 	 */
 	so = inp->inp_socket;
-	SOCKBUF_LOCK(&so->so_snd);
-	if ((so->so_snd.sb_hiwat == V_tcp_sendspace) && metrics.rmx_sendpipe)
-		bufsize = metrics.rmx_sendpipe;
+	SOCK_SENDBUF_LOCK(so);
+	if ((so->so_snd.sb_hiwat == V_tcp_sendspace) && metrics.hc_sendpipe)
+		bufsize = metrics.hc_sendpipe;
 	else
 		bufsize = so->so_snd.sb_hiwat;
 	if (bufsize < mss)
@@ -3924,7 +3969,7 @@ tcp_mss(struct tcpcb *tp, int offer)
 		if (bufsize > so->so_snd.sb_hiwat)
 			(void)sbreserve_locked(so, SO_SND, bufsize, NULL);
 	}
-	SOCKBUF_UNLOCK(&so->so_snd);
+	SOCK_SENDBUF_UNLOCK(so);
 	/*
 	 * Sanity check: make sure that maxseg will be large
 	 * enough to allow some data on segments even if the
@@ -3934,10 +3979,20 @@ tcp_mss(struct tcpcb *tp, int offer)
 	 * XXXGL: shouldn't we reserve space for IP/IPv6 options?
 	 */
 	tp->t_maxseg = max(mss, 64);
+	if (tp->t_maxseg < V_tcp_mssdflt) {
+		/*
+		 * The MSS is so small we should not process incoming
+		 * SACK's since we are subject to attack in such a
+		 * case.
+		 */
+		tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+	} else {
+		tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+	}
 
-	SOCKBUF_LOCK(&so->so_rcv);
-	if ((so->so_rcv.sb_hiwat == V_tcp_recvspace) && metrics.rmx_recvpipe)
-		bufsize = metrics.rmx_recvpipe;
+	SOCK_RECVBUF_LOCK(so);
+	if ((so->so_rcv.sb_hiwat == V_tcp_recvspace) && metrics.hc_recvpipe)
+		bufsize = metrics.hc_recvpipe;
 	else
 		bufsize = so->so_rcv.sb_hiwat;
 	if (bufsize > mss) {
@@ -3947,7 +4002,7 @@ tcp_mss(struct tcpcb *tp, int offer)
 		if (bufsize > so->so_rcv.sb_hiwat)
 			(void)sbreserve_locked(so, SO_RCV, bufsize, NULL);
 	}
-	SOCKBUF_UNLOCK(&so->so_rcv);
+	SOCK_RECVBUF_UNLOCK(so);
 
 	/* Check the interface for TSO capabilities. */
 	if (cap.ifcap & CSUM_TSO) {
@@ -3955,6 +4010,8 @@ tcp_mss(struct tcpcb *tp, int offer)
 		tp->t_tsomax = cap.tsomax;
 		tp->t_tsomaxsegcount = cap.tsomaxsegcount;
 		tp->t_tsomaxsegsize = cap.tsomaxsegsize;
+		if (cap.ipsec_tso)
+			tp->t_flags2 |= TF2_IPSEC_TSO;
 	}
 }
 
@@ -4022,11 +4079,7 @@ tcp_do_prr_ack(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to,
 	    (IN_CONGRECOVERY(tp->t_flags) &&
 	     !IN_FASTRECOVERY(tp->t_flags))) {
 		del_data = tp->sackhint.delivered_data;
-		if (V_tcp_do_newsack)
-			pipe = tcp_compute_pipe(tp);
-		else
-			pipe = (tp->snd_nxt - tp->snd_fack) +
-				tp->sackhint.sack_bytes_rexmit;
+		pipe = tcp_compute_pipe(tp);
 	} else {
 		if (tp->sackhint.prr_delivered < (tcprexmtthresh * maxseg +
 					     tp->snd_recover - tp->snd_una)) {
@@ -4075,9 +4128,7 @@ tcp_do_prr_ack(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to,
 	 */
 	if (IN_FASTRECOVERY(tp->t_flags)) {
 		if (tcp_is_sack_recovery(tp, to)) {
-			tp->snd_cwnd = tp->snd_nxt - tp->snd_recover +
-					    tp->sackhint.sack_bytes_rexmit +
-					    (snd_cnt * maxseg);
+			tp->snd_cwnd = pipe - del_data + (snd_cnt * maxseg);
 		} else {
 			tp->snd_cwnd = (tp->snd_max - tp->snd_una) +
 					    (snd_cnt * maxseg);
@@ -4105,17 +4156,19 @@ tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th)
 
 	tcp_timer_activate(tp, TT_REXMT, 0);
 	tp->t_rtttime = 0;
-	tp->snd_nxt = th->th_ack;
-	/*
-	 * Set snd_cwnd to one segment beyond acknowledged offset.
-	 * (tp->snd_una has not yet been updated when this function is called.)
-	 */
-	tp->snd_cwnd = maxseg + BYTES_THIS_ACK(tp, th);
-	tp->t_flags |= TF_ACKNOW;
-	(void) tcp_output(tp);
-	tp->snd_cwnd = ocwnd;
-	if (SEQ_GT(onxt, tp->snd_nxt))
-		tp->snd_nxt = onxt;
+	if (IN_FASTRECOVERY(tp->t_flags)) {
+		tp->snd_nxt = th->th_ack;
+		/*
+		 * Set snd_cwnd to one segment beyond acknowledged offset.
+		 * (tp->snd_una has not yet been updated when this function is called.)
+		 */
+		tp->snd_cwnd = maxseg + BYTES_THIS_ACK(tp, th);
+		tp->t_flags |= TF_ACKNOW;
+		(void) tcp_output(tp);
+		tp->snd_cwnd = ocwnd;
+		if (SEQ_GT(onxt, tp->snd_nxt))
+			tp->snd_nxt = onxt;
+	}
 	/*
 	 * Partial window deflation.  Relies on fact that tp->snd_una
 	 * not updated yet.
@@ -4130,14 +4183,19 @@ tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th)
 int
 tcp_compute_pipe(struct tcpcb *tp)
 {
-	if (tp->t_fb->tfb_compute_pipe == NULL) {
-		return (tp->snd_max - tp->snd_una +
+	int pipe;
+
+	if (tp->t_fb->tfb_compute_pipe != NULL) {
+		pipe = (*tp->t_fb->tfb_compute_pipe)(tp);
+	} else if (V_tcp_do_newsack) {
+		pipe = tp->snd_max - tp->snd_una +
 			tp->sackhint.sack_bytes_rexmit -
 			tp->sackhint.sacked_bytes -
-			tp->sackhint.lost_bytes);
+			tp->sackhint.lost_bytes;
 	} else {
-		return((*tp->t_fb->tfb_compute_pipe)(tp));
+		pipe = tp->snd_nxt - tp->snd_fack + tp->sackhint.sack_bytes_rexmit;
 	}
+	return (imax(pipe, 0));
 }
 
 uint32_t
diff --git a/sys/netinet/tcp_log_buf.c b/sys/netinet/tcp_log_buf.c
index 7b937958a4fb..e24790ece43d 100644
--- a/sys/netinet/tcp_log_buf.c
+++ b/sys/netinet/tcp_log_buf.c
@@ -29,6 +29,7 @@
 
 #include <sys/cdefs.h>
 #include "opt_inet.h"
+#include "opt_ddb.h"
 #include <sys/param.h>
 #include <sys/arb.h>
 #include <sys/hash.h>
@@ -43,11 +44,18 @@
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
+#ifdef DDB
+#include <sys/time.h>
+#endif
 #include <sys/tree.h>
 #include <sys/stats.h> /* Must come after qmath.h and tree.h */
 #include <sys/counter.h>
 #include <dev/tcp_log/tcp_log_dev.h>
 
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/vnet.h>
@@ -1840,35 +1848,36 @@ retry:
 		log_buf->tlb_txbuf.tls_sb_ccc = 0;
 	}
 	/* Copy values from tp to the log entry. */
-#define	COPY_STAT(f)	log_buf->tlb_ ## f = tp->f
-#define	COPY_STAT_T(f)	log_buf->tlb_ ## f = tp->t_ ## f
-	COPY_STAT_T(state);
-	COPY_STAT_T(starttime);
-	COPY_STAT(iss);
-	COPY_STAT_T(flags);
-	COPY_STAT(snd_una);
-	COPY_STAT(snd_max);
-	COPY_STAT(snd_cwnd);
-	COPY_STAT(snd_nxt);
-	COPY_STAT(snd_recover);
-	COPY_STAT(snd_wnd);
-	COPY_STAT(snd_ssthresh);
-	COPY_STAT_T(srtt);
-	COPY_STAT_T(rttvar);
-	COPY_STAT(rcv_up);
-	COPY_STAT(rcv_adv);
-	COPY_STAT(rcv_nxt);
-	COPY_STAT(rcv_wnd);
-	COPY_STAT_T(dupacks);
-	COPY_STAT_T(segqlen);
-	COPY_STAT(snd_numholes);
-	COPY_STAT(snd_scale);
-	COPY_STAT(rcv_scale);
-	COPY_STAT_T(flags2);
-	COPY_STAT_T(fbyte_in);
-	COPY_STAT_T(fbyte_out);
-#undef COPY_STAT
-#undef COPY_STAT_T
+	log_buf->tlb_state = tp->t_state;
+	log_buf->tlb_starttime = tp->t_starttime;
+	log_buf->tlb_iss = tp->iss;
+	log_buf->tlb_flags = tp->t_flags;
+	log_buf->tlb_snd_una = tp->snd_una;
+	log_buf->tlb_snd_max = tp->snd_max;
+	log_buf->tlb_snd_cwnd = tp->snd_cwnd;
+	log_buf->tlb_snd_nxt = tp->snd_nxt;
+	log_buf->tlb_snd_recover = tp->snd_recover;
+	log_buf->tlb_snd_wnd = tp->snd_wnd;
+	log_buf->tlb_snd_ssthresh = tp->snd_ssthresh;
+	log_buf->tlb_srtt = tp->t_srtt;
+	log_buf->tlb_rttvar = tp->t_rttvar;
+	log_buf->tlb_rcv_up = tp->rcv_up;
+	log_buf->tlb_rcv_adv = tp->rcv_adv;
+	log_buf->tlb_flags2 = tp->t_flags2;
+	log_buf->tlb_rcv_nxt = tp->rcv_nxt;
+	log_buf->tlb_rcv_wnd = tp->rcv_wnd;
+	log_buf->tlb_dupacks = tp->t_dupacks;
+	log_buf->tlb_segqlen = tp->t_segqlen;
+	log_buf->tlb_snd_numholes = tp->snd_numholes;
+	log_buf->tlb_flex1 = 0;
+	log_buf->tlb_flex2 = 0;
+	log_buf->tlb_fbyte_in = tp->t_fbyte_in;
+	log_buf->tlb_fbyte_out = tp->t_fbyte_out;
+	log_buf->tlb_snd_scale = tp->snd_scale;
+	log_buf->tlb_rcv_scale = tp->rcv_scale;
+	log_buf->_pad[0] = 0;
+	log_buf->_pad[1] = 0;
+	log_buf->_pad[2] = 0;
 	/* Copy stack-specific info. */
 	if (stackinfo != NULL) {
 		memcpy(&log_buf->tlb_stackinfo, stackinfo,
@@ -2869,10 +2878,11 @@ tcp_log_sendfile(struct socket *so, off_t offset, size_t nbytes, int flags)
 	/* double check log state now that we have the lock */
 	if (inp->inp_flags & INP_DROPPED)
 		goto done;
-	if (tp->_t_logstate != TCP_LOG_STATE_OFF) {
+	if (tcp_bblogging_on(tp)) {
 		struct timeval tv;
 		tcp_log_eventspecific_t log;
 
+		memset(&log, 0, sizeof(log));
 		microuptime(&tv);
 		log.u_sf.offset = offset;
 		log.u_sf.length = nbytes;
@@ -2970,3 +2980,370 @@ skip_closed_req:
 done:
 	INP_WUNLOCK(inp);
 }
+
+#ifdef DDB
+static void
+db_print_indent(int indent)
+{
+	int i;
+
+	for (i = 0; i < indent; i++)
+		db_printf(" ");
+}
+
+static void
+db_print_tcphdr(struct tcp_log_buffer *tlm_buf)
+{
+	struct sackblk sack;
+	struct tcphdr *th;
+	int cnt, i, j, opt, optlen, num_sacks;
+	uint32_t val, ecr;
+	uint16_t mss;
+	uint16_t flags;
+
+	if ((tlm_buf->tlb_eventflags & TLB_FLAG_HDR) == 0) {
+		return;
+	}
+	th = &tlm_buf->tlb_th;
+	flags = tcp_get_flags(th);
+	if (flags & TH_FIN) {
+		db_printf("F");
+	}
+	if (flags & TH_SYN) {
+		db_printf("S");
+	}
+	if (flags & TH_RST) {
+		db_printf("R");
+	}
+	if (flags & TH_PUSH) {
+		db_printf("P");
+	}
+	if (flags & TH_ACK) {
+		db_printf(".");
+	}
+	if (flags & TH_URG) {
+		db_printf("U");
+	}
+	if (flags & TH_ECE) {
+		db_printf("E");
+	}
+	if (flags & TH_CWR) {
+		db_printf("W");
+	}
+	if (flags & TH_AE) {
+		db_printf("A");
+	}
+	db_printf(" %u:%u(%u)", ntohl(th->th_seq),
+	    ntohl(th->th_seq) + tlm_buf->tlb_len, tlm_buf->tlb_len);
+	if (flags & TH_ACK) {
+		db_printf(" ack %u", ntohl(th->th_ack));
+	}
+	db_printf(" win %u", ntohs(th->th_win));
+	if (flags & TH_URG) {
+		db_printf(" urg %u", ntohs(th->th_urp));
+	}
+	cnt = (th->th_off << 2) - sizeof(struct tcphdr);
+	if (cnt > 0) {
+		db_printf(" <");
+		for (i = 0; i < cnt; i += optlen) {
+			opt = tlm_buf->tlb_opts[i];
+			if (opt == TCPOPT_EOL || opt == TCPOPT_NOP) {
+				optlen = 1;
+			} else {
+				if (cnt - i < 2) {
+					break;
+				}
+				optlen = tlm_buf->tlb_opts[i + 1];
+				if (optlen < 2 || optlen > cnt - i) {
+					break;
+				}
+			}
+			if (i > 0) {
+				db_printf(",");
+			}
+			switch (opt) {
+			case TCPOPT_EOL:
+				db_printf("eol");
+				break;
+			case TCPOPT_NOP:
+				db_printf("nop");
+				break;
+			case TCPOPT_MAXSEG:
+				if (optlen != TCPOLEN_MAXSEG) {
+					break;
+				}
+				bcopy(tlm_buf->tlb_opts + i + 2, &mss,
+				    sizeof(uint16_t));
+				db_printf("mss %u", ntohs(mss));
+				break;
+			case TCPOPT_WINDOW:
+				if (optlen != TCPOLEN_WINDOW) {
+					break;
+				}
+				db_printf("wscale %u",
+				    tlm_buf->tlb_opts[i + 2]);
+				break;
+			case TCPOPT_SACK_PERMITTED:
+				if (optlen != TCPOLEN_SACK_PERMITTED) {
+					break;
+				}
+				db_printf("sackOK");
+				break;
+			case TCPOPT_SACK:
+				if (optlen == TCPOLEN_SACKHDR ||
+				    (optlen - 2) % TCPOLEN_SACK != 0) {
+					break;
+				}
+				num_sacks = (optlen - 2) / TCPOLEN_SACK;
+				db_printf("sack");
+				for (j = 0; j < num_sacks; j++) {
+					bcopy(tlm_buf->tlb_opts + i + 2 +
+					    j * TCPOLEN_SACK, &sack,
+					    TCPOLEN_SACK);
+					db_printf(" %u:%u", ntohl(sack.start),
+					    ntohl(sack.end));
+				}
+				break;
+			case TCPOPT_TIMESTAMP:
+				if (optlen != TCPOLEN_TIMESTAMP) {
+					break;
+				}
+				bcopy(tlm_buf->tlb_opts + i + 2, &val,
+				    sizeof(uint32_t));
+				bcopy(tlm_buf->tlb_opts + i + 6, &ecr,
+				    sizeof(uint32_t));
+				db_printf("TS val %u ecr %u", ntohl(val),
+				    ntohl(ecr));
+				break;
+			case TCPOPT_SIGNATURE:
+				db_printf("md5");
+				if (optlen > 2) {
+					db_printf(" ");
+				}
+				for (j = 0; j < optlen - 2; j++) {
+					db_printf("%02x",
+					    tlm_buf->tlb_opts[i + 2 + j]);
+				}
+				break;
+			case TCPOPT_FAST_OPEN:
+				db_printf("FO");
+				if (optlen > 2) {
+					db_printf(" ");
+				}
+				for (j = 0; j < optlen - 2; j++) {
+					db_printf("%02x",
+					    tlm_buf->tlb_opts[i + 2 + j]);
+				}
+				break;
+			default:
+				db_printf("opt=%u len=%u", opt, optlen);
+				break;
+			}
+		}
+		db_printf(">");
+	}
+}
+static void
+db_print_pru(struct tcp_log_buffer *tlm_buf)
+{
+	switch (tlm_buf->tlb_flex1) {
+	case PRU_ATTACH:
+		db_printf("ATTACH");
+		break;
+	case PRU_DETACH:
+		db_printf("DETACH");
+		break;
+	case PRU_BIND:
+		db_printf("BIND");
+		break;
+	case PRU_LISTEN:
+		db_printf("LISTEN");
+		break;
+	case PRU_CONNECT:
+		db_printf("CONNECT");
+		break;
+	case PRU_ACCEPT:
+		db_printf("ACCEPT");
+		break;
+	case PRU_DISCONNECT:
+		db_printf("DISCONNECT");
+		break;
+	case PRU_SHUTDOWN:
+		db_printf("SHUTDOWN");
+		break;
+	case PRU_RCVD:
+		db_printf("RCVD");
+		break;
+	case PRU_SEND:
+		db_printf("SEND");
+		break;
+	case PRU_ABORT:
+		db_printf("ABORT");
+		break;
+	case PRU_CONTROL:
+		db_printf("CONTROL");
+		break;
+	case PRU_SENSE:
+		db_printf("SENSE");
+		break;
+	case PRU_RCVOOB:
+		db_printf("RCVOOB");
+		break;
+	case PRU_SENDOOB:
+		db_printf("SENDOOB");
+		break;
+	case PRU_SOCKADDR:
+		db_printf("SOCKADDR");
+		break;
+	case PRU_PEERADDR:
+		db_printf("PEERADDR");
+		break;
+	case PRU_CONNECT2:
+		db_printf("CONNECT2");
+		break;
+	case PRU_FASTTIMO:
+		db_printf("FASTTIMO");
+		break;
+	case PRU_SLOWTIMO:
+		db_printf("SLOWTIMO");
+		break;
+	case PRU_PROTORCV:
+		db_printf("PROTORCV");
+		break;
+	case PRU_PROTOSEND:
+		db_printf("PROTOSEND");
+		break;
+	case PRU_SEND_EOF:
+		db_printf("SEND_EOF");
+		break;
+	case PRU_SOSETLABEL:
+		db_printf("SOSETLABEL");
+		break;
+	case PRU_CLOSE:
+		db_printf("CLOSE");
+		break;
+	case PRU_FLUSH:
+		db_printf("FLUSH");
+		break;
+	default:
+		db_printf("Unknown PRU (%u)", tlm_buf->tlb_flex1);
+		break;
+	}
+	if (tlm_buf->tlb_errno >= 0) {
+		db_printf(", error: %d", tlm_buf->tlb_errno);
+	}
+}
+
+static void
+db_print_rto(struct tcp_log_buffer *tlm_buf)
+{
+	tt_what what;
+	tt_which which;
+
+	what = (tlm_buf->tlb_flex1 & 0xffffff00) >> 8;
+	which = tlm_buf->tlb_flex1 & 0x000000ff;
+	switch (what) {
+	case TT_PROCESSING:
+		db_printf("Processing ");
+		break;
+	case TT_PROCESSED:
+		db_printf("Processed ");
+		break;
+	case TT_STARTING:
+		db_printf("Starting ");
+		break;
+	case TT_STOPPING:
+		db_printf("Stopping ");
+		break;
+	default:
+		db_printf("Unknown operation (%u) for ", what);
+		break;
+	}
+	switch (which) {
+	case TT_REXMT:
+		db_printf("Retransmission ");
+		break;
+	case TT_PERSIST:
+		db_printf("Persist ");
+		break;
+	case TT_KEEP:
+		db_printf("Keepalive ");
+		break;
+	case TT_2MSL:
+		db_printf("2 MSL ");
+		break;
+	case TT_DELACK:
+		db_printf("Delayed ACK ");
+		break;
+	default:
+		db_printf("Unknown (%u) ", which);
+		break;
+	}
+	db_printf("timer");
+	if (what == TT_STARTING) {
+		db_printf(": %u ms", tlm_buf->tlb_flex2);
+	}
+}
+
+static void
+db_print_usersend(struct tcp_log_buffer *tlm_buf)
+{
+	if ((tlm_buf->tlb_eventflags & TLB_FLAG_RXBUF) == 0) {
+		return;
+	}
+	if ((tlm_buf->tlb_eventflags & TLB_FLAG_TXBUF) == 0) {
+		return;
+	}
+	db_printf("usersend: rcv.acc: %u rcv.ccc: %u snd.acc: %u snd.ccc: %u",
+	    tlm_buf->tlb_rxbuf.tls_sb_acc, tlm_buf->tlb_rxbuf.tls_sb_ccc,
+	    tlm_buf->tlb_txbuf.tls_sb_acc, tlm_buf->tlb_txbuf.tls_sb_ccc);
+}
+
+void
+db_print_bblog_entries(struct tcp_log_stailq *log_entries, int indent)
+{
+	struct tcp_log_mem *log_entry;
+	struct tcp_log_buffer *tlm_buf, *prev_tlm_buf;
+	int64_t delta_t;
+
+	indent += 2;
+	prev_tlm_buf = NULL;
+	STAILQ_FOREACH(log_entry, log_entries, tlm_queue) {
+		db_print_indent(indent);
+		tlm_buf = &log_entry->tlm_buf;
+		if (prev_tlm_buf == NULL) {
+			db_printf(" 0.000 ");
+		} else {
+			delta_t = sbttoms(tvtosbt(tlm_buf->tlb_tv) -
+			    tvtosbt(prev_tlm_buf->tlb_tv));
+			db_printf("+%u.%03u ", (uint32_t)(delta_t / 1000),
+			    (uint32_t)(delta_t % 1000));
+		}
+		switch (tlm_buf->tlb_eventid) {
+		case TCP_LOG_IN:
+			db_printf("< ");
+			db_print_tcphdr(tlm_buf);
+			break;
+		case TCP_LOG_OUT:
+			db_printf("> ");
+			db_print_tcphdr(tlm_buf);
+			break;
+		case TCP_LOG_RTO:
+			db_print_rto(tlm_buf);
+			break;
+		case TCP_LOG_PRU:
+			db_print_pru(tlm_buf);
+			break;
+		case TCP_LOG_USERSEND:
+			db_print_usersend(tlm_buf);
+			break;
+		default:
+			break;
+		}
+		db_printf("\n");
+		prev_tlm_buf = tlm_buf;
+		if (db_pager_quit)
+			break;
+	}
+}
+#endif
diff --git a/sys/netinet/tcp_log_buf.h b/sys/netinet/tcp_log_buf.h
index 38f66e69b093..3e7eef8a1cda 100644
--- a/sys/netinet/tcp_log_buf.h
+++ b/sys/netinet/tcp_log_buf.h
@@ -60,14 +60,6 @@ struct tcp_log_verbose
 	uint8_t		_pad[4];
 } ALIGN_TCP_LOG;
 
-/* Internal RACK state variables. */
-struct tcp_log_rack
-{
-	uint32_t	tlr_rack_rtt;		/* rc_rack_rtt */
-	uint8_t		tlr_state;		/* Internal RACK state */
-	uint8_t		_pad[3];		/* Padding */
-};
-
 struct tcp_log_bbr {
 	uint64_t cur_del_rate;
 	uint64_t delRate;
@@ -126,7 +118,6 @@ struct tcp_log_sendfile {
  */
 union tcp_log_stackspecific
 {
-	struct tcp_log_rack u_rack;
 	struct tcp_log_bbr u_bbr;
 	struct tcp_log_sendfile u_sf;
 	struct tcp_log_raw u_raw;	/* "raw" log access */
@@ -185,7 +176,6 @@ struct tcp_log_buffer
 	uint8_t		_pad[3];	/* Padding */
 	/* Per-stack info */
 	union tcp_log_stackspecific tlb_stackinfo;
-#define	tlb_rack	tlb_stackinfo.u_rack
 
 	/* The packet */
 	uint32_t	tlb_len;	/* The packet's data length */
@@ -201,14 +191,14 @@ enum tcp_log_events {
 	TCP_LOG_OUT,		/* Transmit (without other event)    2 */
 	TCP_LOG_RTO,		/* Retransmit timeout                3 */
 	TCP_LOG_SB_WAKE,	/* Awaken socket buffer              4 */
-	TCP_LOG_BAD_RETRAN,	/* Detected bad retransmission       5 */
+	TCP_UNUSED_5,		/* Detected bad retransmission       5 */
 	TCP_LOG_PRR,		/* Doing PRR                         6 */
-	TCP_LOG_REORDER,	/* Detected reorder                  7 */
+	TCP_UNUSED_7,		/* Detected reorder                  7 */
 	TCP_LOG_HPTS,		/* Hpts sending a packet             8 */
 	BBR_LOG_BBRUPD,		/* We updated BBR info               9 */
 	BBR_LOG_BBRSND,		/* We did a slot calculation and sending is done 10 */
 	BBR_LOG_ACKCLEAR,	/* A ack clears all outstanding     11 */
-	BBR_LOG_INQUEUE,	/* The tcb had a packet input to it 12 */
+	TCP_UNUSED_12,		/* The tcb had a packet input to it 12 */
 	BBR_LOG_TIMERSTAR,	/* Start a timer                    13 */
 	BBR_LOG_TIMERCANC,	/* Cancel a timer                   14 */
 	BBR_LOG_ENTREC,		/* Entered recovery                 15 */
@@ -219,18 +209,18 @@ enum tcp_log_events {
 	BBR_LOG_BBRRTT,		/* BBR RTT is updated               20 */
 	BBR_LOG_JUSTRET,	/* We just returned out of output   21 */
 	BBR_LOG_STATE,		/* A BBR state change occurred      22 */
-	BBR_LOG_PKT_EPOCH,      /* A BBR packet epoch occurred      23 */
-	BBR_LOG_PERSIST,        /* BBR changed to/from a persists   24 */
-	TCP_LOG_FLOWEND,        /* End of a flow                    25 */
-	BBR_LOG_RTO,            /* BBR's timeout includes BBR info  26 */
-	BBR_LOG_DOSEG_DONE,     /* hpts do_segment completes        27 */
-	BBR_LOG_EXIT_GAIN,      /* hpts do_segment completes        28 */
-	BBR_LOG_THRESH_CALC,    /* Doing threshold calculation      29 */
+	BBR_LOG_PKT_EPOCH,	/* A BBR packet epoch occurred      23 */
+	BBR_LOG_PERSIST,	/* BBR changed to/from a persists   24 */
+	TCP_LOG_FLOWEND,	/* End of a flow                    25 */
+	BBR_LOG_RTO,		/* BBR's timeout includes BBR info  26 */
+	BBR_LOG_DOSEG_DONE,	/* hpts do_segment completes        27 */
+	BBR_LOG_EXIT_GAIN,	/* hpts do_segment completes        28 */
+	BBR_LOG_THRESH_CALC,	/* Doing threshold calculation      29 */
 	TCP_LOG_MAPCHG,		/* Map Changes to the sendmap       30 */
-	TCP_LOG_USERSEND, 	/* User level sends data            31 */
+	TCP_LOG_USERSEND,	/* User level sends data            31 */
 	BBR_RSM_CLEARED,	/* RSM cleared of ACK flags         32 */
-	BBR_LOG_STATE_TARGET, 	/* Log of target at state           33 */
-	BBR_LOG_TIME_EPOCH, 	/* A timed based Epoch occurred     34 */
+	BBR_LOG_STATE_TARGET,	/* Log of target at state           33 */
+	BBR_LOG_TIME_EPOCH,	/* A timed based Epoch occurred     34 */
 	BBR_LOG_TO_PROCESS,	/* A to was processed               35 */
 	BBR_LOG_BBRTSO,		/* TSO update                       36 */
 	BBR_LOG_HPTSDIAG,	/* Hpts diag insert                 37 */
@@ -245,7 +235,7 @@ enum tcp_log_events {
 	BBR_LOG_REDUCE,		/* old bbr log reduce for 4.1 and earlier 46*/
 	TCP_LOG_RTT,		/* A rtt (in useconds) is being sampled and applied to the srtt algo 47 */
 	BBR_LOG_SETTINGS_CHG,	/* Settings changed for loss response 48 */
-	BBR_LOG_SRTT_GAIN_EVENT, /* SRTT gaining -- now not used    49 */
+	TCP_UNUSED_49,		/* SRTT gaining -- now not used    49 */
 	TCP_LOG_REASS,		/* Reassembly buffer logging        50 */
 	TCP_HDWR_PACE_SIZE,	/*  TCP pacing size set (rl and rack uses this)  51 */
 	BBR_LOG_HDWR_PACE,	/* TCP Hardware pacing log          52 */
@@ -253,23 +243,23 @@ enum tcp_log_events {
 	TCP_LOG_CONNEND,	/* End of connection                54 */
 	TCP_LOG_LRO,		/* LRO entry                        55 */
 	TCP_SACK_FILTER_RES,	/* Results of SACK Filter           56 */
-	TCP_SAD_DETECT,		/* Sack Attack Detection            57 */
+	TCP_UNUSED_57,		/* Sack Attack Detection            57 */
 	TCP_TIMELY_WORK,	/* Logs regarding Timely CC tweaks  58 */
-	TCP_LOG_USER_EVENT,	/* User space event data            59 */
+	TCP_UNUSED_59,		/* User space event data            59 */
 	TCP_LOG_SENDFILE,	/* sendfile() logging for TCP connections 60 */
-	TCP_LOG_REQ_T,		/* logging of request tracking 61 */
-	TCP_LOG_ACCOUNTING,	/* Log of TCP Accounting data 62 */
-	TCP_LOG_FSB,		/* FSB information 63 */
+	TCP_LOG_REQ_T,		/* logging of request tracking      61 */
+	TCP_LOG_ACCOUNTING,	/* Log of TCP Accounting data       62 */
+	TCP_LOG_FSB,		/* FSB information                  63 */
 	RACK_DSACK_HANDLING,	/* Handling of DSACK in rack for reordering window 64 */
-	TCP_HYSTART,		/* TCP Hystart logging 65 */
-	TCP_CHG_QUERY,		/* Change query during fnc_init() 66 */
-	TCP_RACK_LOG_COLLAPSE,	/* Window collapse by peer 67 */
-	TCP_RACK_TP_TRIGGERED,	/* A rack tracepoint is triggered 68 */
-	TCP_HYBRID_PACING_LOG,	/* Hybrid pacing log 69 */
+	TCP_HYSTART,		/* TCP Hystart logging              65 */
+	TCP_CHG_QUERY,		/* Change query during fnc_init()   66 */
+	TCP_RACK_LOG_COLLAPSE,	/* Window collapse by peer          67 */
+	TCP_RACK_TP_TRIGGERED,	/* A rack tracepoint is triggered   68 */
+	TCP_HYBRID_PACING_LOG,	/* Hybrid pacing log                69 */
 	TCP_LOG_PRU,		/* TCP protocol user request        70 */
-	TCP_POLICER_DET,	/* TCP Policer detectionn 71 */
-	TCP_PCM_MEASURE,	/* TCP Path Capacity Measurement 72 */
-	TCP_LOG_END		/* End (keep at end)                72 */
+	TCP_UNUSED_71,		/* old TCP Policer detectionn, not used 71 */
+	TCP_PCM_MEASURE,	/* TCP Path Capacity Measurement    72 */
+	TCP_LOG_END		/* End (keep at end)                73 */
 };
 
 enum tcp_log_states {
@@ -549,12 +539,12 @@ struct tcpcb;
 			    NULL, NULL, 0, NULL);			\
 	} while (0)
 #endif /* TCP_LOG_FORCEVERBOSE */
+/* Assumes/requires the caller has already checked tcp_bblogging_on(tp). */
 #define	TCP_LOG_EVENTP(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder, tv) \
 	do {								\
-		if (tcp_bblogging_on(tp))				\
-			tcp_log_event(tp, th, rxbuf, txbuf, eventid,	\
-			    errornum, len, stackinfo, th_hostorder,	\
-			    NULL, NULL, 0, tv);				\
+		KASSERT(tcp_bblogging_on(tp), ("bblogging is off")); \
+		tcp_log_event(tp, th, rxbuf, txbuf, eventid, errornum, len,	\
+			stackinfo, th_hostorder, NULL, NULL, 0, tv); \
 	} while (0)
 
 #ifdef TCP_BLACKBOX
@@ -580,6 +570,9 @@ void tcp_log_flowend(struct tcpcb *tp);
 void tcp_log_sendfile(struct socket *so, off_t offset, size_t nbytes,
     int flags);
 int tcp_log_apply_ratio(struct tcpcb *tp, int ratio);
+#ifdef DDB
+void db_print_bblog_entries(struct tcp_log_stailq *log_entries, int indent);
+#endif
 #else /* !TCP_BLACKBOX */
 #define tcp_log_verbose	(false)
 
diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c
index 921d28f82517..10afed17bf3b 100644
--- a/sys/netinet/tcp_lro.c
+++ b/sys/netinet/tcp_lro.c
@@ -83,6 +83,7 @@ static MALLOC_DEFINE(M_LRO, "LRO", "LRO control structures");
 static void	tcp_lro_rx_done(struct lro_ctrl *lc);
 static int	tcp_lro_rx_common(struct lro_ctrl *lc, struct mbuf *m,
 		    uint32_t csum, bool use_hash);
+static void	tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le);
 
 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, lro,  CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "TCP LRO");
@@ -175,7 +176,7 @@ tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp,
 {
 	struct lro_entry *le;
 	size_t size;
-	unsigned i, elements;
+	unsigned i;
 
 	lc->lro_bad_csum = 0;
 	lc->lro_queued = 0;
@@ -190,11 +191,7 @@ tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp,
 	LIST_INIT(&lc->lro_active);
 
 	/* create hash table to accelerate entry lookup */
-	if (lro_entries > lro_mbufs)
-		elements = lro_entries;
-	else
-		elements = lro_mbufs;
-	lc->lro_hash = phashinit_flags(elements, M_LRO, &lc->lro_hashsz,
+	lc->lro_hash = phashinit_flags(lro_entries, M_LRO, &lc->lro_hashsz,
 	    HASH_NOWAIT);
 	if (lc->lro_hash == NULL) {
 		memset(lc, 0, sizeof(*lc));
@@ -599,7 +596,7 @@ tcp_lro_rx_done(struct lro_ctrl *lc)
 static void
 tcp_lro_flush_active(struct lro_ctrl *lc)
 {
-	struct lro_entry *le;
+	struct lro_entry *le, *le_tmp;
 
 	/*
 	 * Walk through the list of le entries, and
@@ -611,7 +608,7 @@ tcp_lro_flush_active(struct lro_ctrl *lc)
 	 * is being freed. This is ok it will just get
 	 * reallocated again like it was new.
 	 */
-	LIST_FOREACH(le, &lc->lro_active, next) {
+	LIST_FOREACH_SAFE(le, &lc->lro_active, next, le_tmp) {
 		if (le->m_head != NULL) {
 			tcp_lro_active_remove(le);
 			tcp_lro_flush(lc, le);
@@ -1108,7 +1105,7 @@ again:
 	}
 }
 
-void
+static void
 tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
 {
 
diff --git a/sys/netinet/tcp_lro.h b/sys/netinet/tcp_lro.h
index b4b5e3f811e4..a94eca665eb5 100644
--- a/sys/netinet/tcp_lro.h
+++ b/sys/netinet/tcp_lro.h
@@ -216,7 +216,6 @@ int tcp_lro_init(struct lro_ctrl *);
 int tcp_lro_init_args(struct lro_ctrl *, struct ifnet *, unsigned, unsigned);
 void tcp_lro_free(struct lro_ctrl *);
 void tcp_lro_flush_inactive(struct lro_ctrl *, const struct timeval *);
-void tcp_lro_flush(struct lro_ctrl *, struct lro_entry *);
 void tcp_lro_flush_all(struct lro_ctrl *);
 extern int (*tcp_lro_flush_tcphpts)(struct lro_ctrl *, struct lro_entry *);
 int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t);
diff --git a/sys/netinet/tcp_lro_hpts.c b/sys/netinet/tcp_lro_hpts.c
index cd757d5a6164..7e756285da45 100644
--- a/sys/netinet/tcp_lro_hpts.c
+++ b/sys/netinet/tcp_lro_hpts.c
@@ -39,6 +39,7 @@
 
 #include <net/if.h>
 #include <net/if_var.h>
+#include <net/if_private.h>
 #include <net/ethernet.h>
 #include <net/bpf.h>
 #include <net/vnet.h>
@@ -61,7 +62,9 @@
 #include <netinet/tcp_lro.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_hpts.h>
+#ifdef TCP_BLACKBOX
 #include <netinet/tcp_log_buf.h>
+#endif
 
 static void
 build_ack_entry(struct tcp_ackent *ae, struct tcphdr *th, struct mbuf *m,
@@ -147,6 +150,7 @@ tcp_lro_check_wake_status(struct tcpcb *tp)
 	return (false);
 }
 
+#ifdef TCP_BLACKBOX
 static void
 tcp_lro_log(struct tcpcb *tp, const struct lro_ctrl *lc,
     const struct lro_entry *le, const struct mbuf *m,
@@ -196,6 +200,7 @@ tcp_lro_log(struct tcpcb *tp, const struct lro_ctrl *lc,
 		    TCP_LOG_LRO, 0, 0, &log, false, &tv);
 	}
 }
+#endif
 
 static struct mbuf *
 tcp_lro_get_last_if_ackcmp(struct lro_ctrl *lc, struct lro_entry *le,
@@ -208,7 +213,9 @@ tcp_lro_get_last_if_ackcmp(struct lro_ctrl *lc, struct lro_entry *le,
 		m = STAILQ_LAST(&tp->t_inqueue, mbuf, m_stailqpkt);
 		if (m != NULL && (m->m_flags & M_ACKCMP) != 0) {
 			if (M_TRAILINGSPACE(m) >= sizeof(struct tcp_ackent)) {
+#ifdef TCP_BLACKBOX
 				tcp_lro_log(tp, lc, le, NULL, 23, 0, 0, 0, 0);
+#endif
 				*new_m = 0;
 				counter_u64_add(tcp_extra_mbuf, 1);
 				return (m);
@@ -219,7 +226,9 @@ tcp_lro_get_last_if_ackcmp(struct lro_ctrl *lc, struct lro_entry *le,
 		}
 	}
 	/* Decide mbuf size. */
+#ifdef TCP_BLACKBOX
 	tcp_lro_log(tp, lc, le, NULL, 21, 0, 0, 0, 0);
+#endif
 	if (tp->t_flags2 & TF2_MBUF_L_ACKS)
 		m = m_getcl(M_NOWAIT, MT_DATA, M_ACKCMP | M_PKTHDR);
 	else
@@ -611,13 +620,19 @@ _tcp_lro_flush_tcphpts(struct lro_ctrl *lc, struct lro_entry *le)
 			 * ack will be required.
 			 */
 			cmp = NULL;
+#ifdef TCP_BLACKBOX
 			tcp_lro_log(tp, lc, le, NULL, 25, 0, 0, 0, 0);
+#endif
 		} else if (mv_to != NULL) {
 			/* We are asked to move pp up */
 			pp = &mv_to->m_nextpkt;
+#ifdef TCP_BLACKBOX
 			tcp_lro_log(tp, lc, le, NULL, 24, 0, 0, 0, 0);
 		} else
 			tcp_lro_log(tp, lc, le, NULL, 26, 0, 0, 0, 0);
+#else
+		}
+#endif
 	}
 	/* Update "m_last_mbuf", if any. */
 	if (pp == &le->m_head)
@@ -628,7 +643,9 @@ _tcp_lro_flush_tcphpts(struct lro_ctrl *lc, struct lro_entry *le)
 	/* Check if any data mbufs left. */
 	if (le->m_head != NULL) {
 		counter_u64_add(tcp_inp_lro_direct_queue, 1);
+#ifdef TCP_BLACKBOX
 		tcp_lro_log(tp, lc, le, NULL, 22, 1, tp->t_flags2, 0, 1);
+#endif
 		tcp_queue_pkts(tp, le);
 	}
 	if (should_wake) {
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index 2bbc9414197c..bc5b42ee6f2c 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -85,9 +85,6 @@
 #include <netinet/tcpip.h>
 #include <netinet/cc/cc.h>
 #include <netinet/tcp_fastopen.h>
-#ifdef TCPPCAP
-#include <netinet/tcp_pcap.h>
-#endif
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
@@ -201,11 +198,9 @@ tcp_default_output(struct tcpcb *tp)
 	struct tcphdr *th;
 	u_char opt[TCP_MAXOLEN];
 	unsigned ipoptlen, optlen, hdrlen, ulen;
-#if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	unsigned ipsec_optlen = 0;
-#endif
 	int idle, sendalot, curticks;
-	int sack_rxmit, sack_bytes_rxmt;
+	int sack_bytes_rxmt;
 	struct sackhole *p;
 	int tso, mtu;
 	struct tcpopt to;
@@ -213,9 +208,7 @@ tcp_default_output(struct tcpcb *tp)
 	struct tcp_log_buffer *lgb;
 	unsigned int wanted_cookie = 0;
 	unsigned int dont_sendalot = 0;
-#if 0
-	int maxburst = TCP_MAXBURST;
-#endif
+	bool sack_rxmit;
 #ifdef INET6
 	struct ip6_hdr *ip6 = NULL;
 	const bool isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
@@ -264,19 +257,22 @@ tcp_default_output(struct tcpcb *tp)
 		}
 	}
 again:
+	sendwin = 0;
 	/*
 	 * If we've recently taken a timeout, snd_max will be greater than
 	 * snd_nxt.  There may be SACK information that allows us to avoid
 	 * resending already delivered data.  Adjust snd_nxt accordingly.
 	 */
 	if ((tp->t_flags & TF_SACK_PERMIT) &&
-	    SEQ_LT(tp->snd_nxt, tp->snd_max))
-		tcp_sack_adjust(tp);
+	    (tp->sackhint.nexthole != NULL) &&
+	    !IN_FASTRECOVERY(tp->t_flags)) {
+		sendwin = tcp_sack_adjust(tp);
+	}
 	sendalot = 0;
 	tso = 0;
 	mtu = 0;
 	off = tp->snd_nxt - tp->snd_una;
-	sendwin = min(tp->snd_wnd, tp->snd_cwnd);
+	sendwin = min(tp->snd_wnd, tp->snd_cwnd + sendwin);
 
 	flags = tcp_outflags[tp->t_state];
 	/*
@@ -289,16 +285,19 @@ again:
 	/*
 	 * Still in sack recovery , reset rxmit flag to zero.
 	 */
-	sack_rxmit = 0;
 	sack_bytes_rxmt = 0;
 	len = 0;
-	p = NULL;
-	if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp->t_flags) &&
+	if ((tp->t_flags & TF_SACK_PERMIT) &&
+	    (IN_FASTRECOVERY(tp->t_flags) ||
+	     (SEQ_LT(tp->snd_nxt, tp->snd_max) && (tp->t_dupacks >= tcprexmtthresh))) &&
 	    (p = tcp_sack_output(tp, &sack_bytes_rxmt))) {
-		uint32_t cwin;
+		int32_t cwin;
 
-		cwin =
-		    imax(min(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt, 0);
+		if (IN_FASTRECOVERY(tp->t_flags)) {
+			cwin = imax(sendwin - tcp_compute_pipe(tp), 0);
+		} else {
+			cwin = imax(sendwin - off, 0);
+		}
 		/* Do not retransmit SACK segments beyond snd_recover */
 		if (SEQ_GT(p->end, tp->snd_recover)) {
 			/*
@@ -314,23 +313,42 @@ again:
 				 * moves past p->rxmit.
 				 */
 				p = NULL;
+				sack_rxmit = false;
 				goto after_sack_rexmit;
 			} else {
 				/* Can rexmit part of the current hole */
-				len = ((int32_t)ulmin(cwin,
-				    SEQ_SUB(tp->snd_recover, p->rxmit)));
+				len = SEQ_SUB(tp->snd_recover, p->rxmit);
+				if (cwin <= len) {
+					len = cwin;
+				} else {
+					sendalot = 1;
+				}
 			}
 		} else {
-			len = ((int32_t)ulmin(cwin,
-			    SEQ_SUB(p->end, p->rxmit)));
+			len = SEQ_SUB(p->end, p->rxmit);
+			if (cwin <= len) {
+				len = cwin;
+			} else {
+				sendalot = 1;
+			}
 		}
+		/* we could have transmitted from the scoreboard,
+		 * but sendwin (expected flightsize) - pipe didn't
+		 * allow any transmission.
+		 * Bypass recalculating the possible transmission
+		 * length further down by setting sack_rxmit.
+		 * Wouldn't be here if there would have been
+		 * nothing in the scoreboard to transmit.
+		 */
 		if (len > 0) {
 			off = SEQ_SUB(p->rxmit, tp->snd_una);
 			KASSERT(off >= 0,("%s: sack block to the left of una : %d",
 			    __func__, off));
-			sack_rxmit = 1;
-			sendalot = 1;
 		}
+		sack_rxmit = true;
+	} else {
+		p = NULL;
+		sack_rxmit = false;
 	}
 after_sack_rexmit:
 	/*
@@ -342,7 +360,7 @@ after_sack_rexmit:
 	if (tp->t_flags & TF_NEEDSYN)
 		flags |= TH_SYN;
 
-	SOCKBUF_LOCK(&so->so_snd);
+	SOCK_SENDBUF_LOCK(so);
 	/*
 	 * If in persist timeout with window of 0, send 1 byte.
 	 * Otherwise, if window is small but nonzero
@@ -391,36 +409,18 @@ after_sack_rexmit:
 	 * If sack_rxmit is true we are retransmitting from the scoreboard
 	 * in which case len is already set.
 	 */
-	if (sack_rxmit == 0) {
-		if (sack_bytes_rxmt == 0) {
-			len = ((int32_t)min(sbavail(&so->so_snd), sendwin) -
-			    off);
+	if (!sack_rxmit) {
+		if ((sack_bytes_rxmt == 0) || SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+			len = imin(sbavail(&so->so_snd), sendwin) - off;
 		} else {
-			int32_t cwin;
-
 			/*
 			 * We are inside of a SACK recovery episode and are
 			 * sending new data, having retransmitted all the
 			 * data possible in the scoreboard.
 			 */
-			len = ((int32_t)min(sbavail(&so->so_snd), tp->snd_wnd) -
-			    off);
-			/*
-			 * Don't remove this (len > 0) check !
-			 * We explicitly check for len > 0 here (although it
-			 * isn't really necessary), to work around a gcc
-			 * optimization issue - to force gcc to compute
-			 * len above. Without this check, the computation
-			 * of len is bungled by the optimizer.
-			 */
-			if (len > 0) {
-				cwin = tp->snd_cwnd - imax(0, (int32_t)
-					(tp->snd_nxt - tp->snd_recover)) -
-					sack_bytes_rxmt;
-				if (cwin < 0)
-					cwin = 0;
-				len = imin(len, cwin);
-			}
+			len = imax(
+			    imin(sbavail(&so->so_snd), sendwin) -
+			    imax(tcp_compute_pipe(tp), off), 0);
 		}
 	}
 
@@ -515,8 +515,8 @@ after_sack_rexmit:
 	 * hardware).
 	 *
 	 * TSO may only be used if we are in a pure bulk sending state.  The
-	 * presence of TCP-MD5, SACK retransmits, SACK advertizements and
-	 * IP options prevent using TSO.  With TSO the TCP header is the same
+	 * presence of TCP-MD5, IP options (IPsec), and possibly SACK
+	 * retransmits prevent using TSO.  With TSO the TCP header is the same
 	 * (except for the sequence number) for all generated packets.  This
 	 * makes it impossible to transmit any options which vary per generated
 	 * segment or packet.
@@ -553,15 +553,15 @@ after_sack_rexmit:
 				offsetof(struct ipoption, ipopt_list);
 	else
 		ipoptlen = 0;
-#if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	ipoptlen += ipsec_optlen;
-#endif
 
 	if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg &&
 	    (tp->t_port == 0) &&
 	    ((tp->t_flags & TF_SIGNATURE) == 0) &&
-	    tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
-	    ipoptlen == 0 && !(flags & TH_SYN))
+	    (!sack_rxmit || V_tcp_sack_tso) &&
+	    (ipoptlen == 0 || (ipoptlen == ipsec_optlen &&
+	     (tp->t_flags2 & TF2_IPSEC_TSO) != 0)) &&
+	    !(flags & TH_SYN))
 		tso = 1;
 
 	if (SEQ_LT((sack_rxmit ? p->rxmit : tp->snd_nxt) + len,
@@ -754,11 +754,11 @@ dontupdate:
 	 * No reason to send a segment, just return.
 	 */
 just_return:
-	SOCKBUF_UNLOCK(&so->so_snd);
+	SOCK_SENDBUF_UNLOCK(so);
 	return (0);
 
 send:
-	SOCKBUF_LOCK_ASSERT(&so->so_snd);
+	SOCK_SENDBUF_LOCK_ASSERT(so);
 	if (len > 0) {
 		if (len >= tp->t_maxseg)
 			tp->t_flags2 |= TF2_PLPMTU_MAXSEGSNT;
@@ -888,7 +888,7 @@ send:
 	if (tp->t_port) {
 		if (V_tcp_udp_tunneling_port == 0) {
 			/* The port was removed?? */
-			SOCKBUF_UNLOCK(&so->so_snd);
+			SOCK_SENDBUF_UNLOCK(so);
 			return (EHOSTUNREACH);
 		}
 		hdrlen += sizeof(struct udphdr);
@@ -917,7 +917,7 @@ send:
 			 * overflowing or exceeding the maximum length
 			 * allowed by the network interface:
 			 */
-			KASSERT(ipoptlen == 0,
+			KASSERT(ipoptlen ==  ipsec_optlen,
 			    ("%s: TSO can't do IP options", __func__));
 
 			/*
@@ -926,8 +926,8 @@ send:
 			 */
 			if (if_hw_tsomax != 0) {
 				/* compute maximum TSO length */
-				max_len = (if_hw_tsomax - hdrlen -
-				    max_linkhdr);
+				max_len = if_hw_tsomax - hdrlen -
+				    ipsec_optlen - max_linkhdr;
 				if (max_len <= 0) {
 					len = 0;
 				} else if (len > max_len) {
@@ -941,7 +941,7 @@ send:
 			 * fractional unless the send sockbuf can be
 			 * emptied:
 			 */
-			max_len = (tp->t_maxseg - optlen);
+			max_len = tp->t_maxseg - optlen - ipsec_optlen;
 			if (((uint32_t)off + (uint32_t)len) <
 			    sbavail(&so->so_snd)) {
 				moff = len % max_len;
@@ -980,9 +980,9 @@ send:
 				 * byte of the payload can be put into the
 				 * TCP segment.
 				 */
-				SOCKBUF_UNLOCK(&so->so_snd);
+				SOCK_SENDBUF_UNLOCK(so);
 				error = EMSGSIZE;
-				sack_rxmit = 0;
+				sack_rxmit = false;
 				goto out;
 			}
 			len = tp->t_maxseg - optlen - ipoptlen;
@@ -1037,6 +1037,9 @@ send:
 			TCPSTAT_ADD(tcps_sndrexmitbyte, len);
 			if (sack_rxmit) {
 				TCPSTAT_INC(tcps_sack_rexmits);
+				if (tso) {
+					TCPSTAT_INC(tcps_sack_rexmits_tso);
+				}
 				TCPSTAT_ADD(tcps_sack_rexmit_bytes, len);
 			}
 #ifdef STATS
@@ -1059,9 +1062,9 @@ send:
 			m = m_gethdr(M_NOWAIT, MT_DATA);
 
 		if (m == NULL) {
-			SOCKBUF_UNLOCK(&so->so_snd);
+			SOCK_SENDBUF_UNLOCK(so);
 			error = ENOBUFS;
-			sack_rxmit = 0;
+			sack_rxmit = false;
 			goto out;
 		}
 
@@ -1080,13 +1083,18 @@ send:
 				sbsndptr_adv(&so->so_snd, mb, len);
 			m->m_len += len;
 		} else {
+			int32_t old_len;
+
 			if (SEQ_LT(tp->snd_nxt, tp->snd_max))
 				msb = NULL;
 			else
 				msb = &so->so_snd;
+			old_len = len;
 			m->m_next = tcp_m_copym(mb, moff,
 			    &len, if_hw_tsomaxsegcount,
 			    if_hw_tsomaxsegsize, msb, hw_tls);
+			if (old_len != len)
+				flags &= ~TH_FIN;
 			if (len <= (tp->t_maxseg - optlen)) {
 				/*
 				 * Must have ran out of mbufs for the copy
@@ -1097,10 +1105,10 @@ send:
 				tso = 0;
 			}
 			if (m->m_next == NULL) {
-				SOCKBUF_UNLOCK(&so->so_snd);
+				SOCK_SENDBUF_UNLOCK(so);
 				(void) m_free(m);
 				error = ENOBUFS;
-				sack_rxmit = 0;
+				sack_rxmit = false;
 				goto out;
 			}
 		}
@@ -1114,9 +1122,9 @@ send:
 		if (((uint32_t)off + (uint32_t)len == sbused(&so->so_snd)) &&
 		    !(flags & TH_SYN))
 			flags |= TH_PUSH;
-		SOCKBUF_UNLOCK(&so->so_snd);
+		SOCK_SENDBUF_UNLOCK(so);
 	} else {
-		SOCKBUF_UNLOCK(&so->so_snd);
+		SOCK_SENDBUF_UNLOCK(so);
 		if (tp->t_flags & TF_ACKNOW)
 			TCPSTAT_INC(tcps_sndacks);
 		else if (flags & (TH_SYN|TH_FIN|TH_RST))
@@ -1129,7 +1137,7 @@ send:
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			error = ENOBUFS;
-			sack_rxmit = 0;
+			sack_rxmit = false;
 			goto out;
 		}
 #ifdef INET6
@@ -1141,7 +1149,7 @@ send:
 		m->m_data += max_linkhdr;
 		m->m_len = hdrlen;
 	}
-	SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
+	SOCK_SENDBUF_UNLOCK_ASSERT(so);
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
@@ -1226,7 +1234,7 @@ send:
 	 * case, since we know we aren't doing a retransmission.
 	 * (retransmit and persist are mutually exclusive...)
 	 */
-	if (sack_rxmit == 0) {
+	if (!sack_rxmit) {
 		if (len || (flags & (TH_SYN|TH_FIN)) ||
 		    tcp_timer_active(tp, TT_PERSIST))
 			th->th_seq = htonl(tp->snd_nxt);
@@ -1258,7 +1266,6 @@ send:
 		bcopy(opt, th + 1, optlen);
 		th->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
 	}
-	tcp_set_flags(th, flags);
 	/*
 	 * Calculate receive window.  Don't shrink window,
 	 * but avoid silly window syndrome.
@@ -1303,8 +1310,8 @@ send:
 		tp->t_flags &= ~TF_RXWIN0SENT;
 	if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
 		th->th_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
-		th->th_flags |= TH_URG;
-	} else
+		flags |= TH_URG;
+	} else {
 		/*
 		 * If no urgent pointer to send, then we pull
 		 * the urgent pointer to the left edge of the send window
@@ -1312,6 +1319,8 @@ send:
 		 * number wraparound.
 		 */
 		tp->snd_up = tp->snd_una;		/* drag it along */
+	}
+	tcp_set_flags(th, flags);
 
 	/*
 	 * Put TCP length in extended header, and then
@@ -1390,10 +1399,10 @@ send:
 	 * The TCP pseudo header checksum is always provided.
 	 */
 	if (tso) {
-		KASSERT(len > tp->t_maxseg - optlen,
+		KASSERT(len > tp->t_maxseg - optlen - ipsec_optlen,
 		    ("%s: len <= tso_segsz", __func__));
 		m->m_pkthdr.csum_flags |= CSUM_TSO;
-		m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen;
+		m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen - ipsec_optlen;
 	}
 
 	KASSERT(len + hdrlen == m_length(m, NULL),
@@ -1454,10 +1463,6 @@ send:
 
 		TCP_PROBE5(send, NULL, tp, ip6, tp, th);
 
-#ifdef TCPPCAP
-		/* Save packet, if requested. */
-		tcp_pcap_add(th, m, &(tp->t_outpkts));
-#endif
 
 		/* TODO: IPv6 IP6TOS_ECT bit on */
 		error = ip6_output(m, inp->in6p_outputopts, &inp->inp_route6,
@@ -1500,11 +1505,6 @@ send:
 
 	TCP_PROBE5(send, NULL, tp, ip, tp, th);
 
-#ifdef TCPPCAP
-	/* Save packet, if requested. */
-	tcp_pcap_add(th, m, &(tp->t_outpkts));
-#endif
-
 	error = ip_output(m, inp->inp_options, &inp->inp_route,
 	    ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0, inp);
 
@@ -1633,11 +1633,20 @@ timer:
 			tp->snd_max = tp->snd_nxt + xlen;
 	}
 	if ((error == 0) &&
-	    (TCPS_HAVEESTABLISHED(tp->t_state) &&
-	     (tp->t_flags & TF_SACK_PERMIT) &&
-	     tp->rcv_numsacks > 0)) {
-		    /* Clean up any DSACK's sent */
-		    tcp_clean_dsack_blocks(tp);
+	    (tp->rcv_numsacks > 0) &&
+	    TCPS_HAVEESTABLISHED(tp->t_state) &&
+	    (tp->t_flags & TF_SACK_PERMIT)) {
+		/* Clean up any DSACK's sent */
+		tcp_clean_dsack_blocks(tp);
+	}
+	if ((error == 0) &&
+	    sack_rxmit &&
+	    SEQ_LT(tp->snd_nxt, SEQ_MIN(p->rxmit, p->end))) {
+		/*
+		 * When transmitting from SACK scoreboard
+		 * after an RTO, pull snd_nxt along.
+		 */
+		tp->snd_nxt = SEQ_MIN(p->rxmit, p->end);
 	}
 	if (error) {
 		/*
@@ -1672,7 +1681,7 @@ timer:
 			if (IN_RECOVERY(tp->t_flags))
 				tp->sackhint.prr_out -= len;
 		}
-		SOCKBUF_UNLOCK_ASSERT(&so->so_snd);	/* Check gotos. */
+		SOCK_SENDBUF_UNLOCK_ASSERT(so);	/* Check gotos. */
 		switch (error) {
 		case EACCES:
 		case EPERM:
@@ -1680,7 +1689,7 @@ timer:
 			return (error);
 		case ENOBUFS:
 			TCP_XMIT_TIMER_ASSERT(tp, len, flags);
-			tp->snd_cwnd = tp->t_maxseg;
+			tp->snd_cwnd = tcp_maxseg(tp);
 			return (0);
 		case EMSGSIZE:
 			/*
diff --git a/sys/netinet/tcp_pcap.c b/sys/netinet/tcp_pcap.c
deleted file mode 100644
index f26287bd7f03..000000000000
--- a/sys/netinet/tcp_pcap.c
+++ /dev/null
@@ -1,452 +0,0 @@
-/*-
- * Copyright (c) 2015
- *	Jonathan Looney. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/queue.h>
-#include <sys/param.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/socketvar.h>
-#include <sys/sysctl.h>
-#include <sys/systm.h>
-#include <sys/mbuf.h>
-#include <sys/eventhandler.h>
-#include <machine/atomic.h>
-#include <netinet/in.h>
-#include <netinet/in_pcb.h>
-#include <netinet/tcp_var.h>
-#include <netinet/tcp_pcap.h>
-
-#define M_LEADINGSPACE_NOWRITE(m)					\
-	((m)->m_data - M_START(m))
-
-int tcp_pcap_aggressive_free = 1;
-static int tcp_pcap_clusters_referenced_cur = 0;
-static int tcp_pcap_clusters_referenced_max = 0;
-
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_aggressive_free,
-	CTLFLAG_RW, &tcp_pcap_aggressive_free, 0,
-	"Free saved packets when the memory system comes under pressure");
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_clusters_referenced_cur,
-	CTLFLAG_RD, &tcp_pcap_clusters_referenced_cur, 0,
-	"Number of clusters currently referenced on TCP PCAP queues");
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_clusters_referenced_max,
-	CTLFLAG_RW, &tcp_pcap_clusters_referenced_max, 0,
-	"Maximum number of clusters allowed to be referenced on TCP PCAP "
-	"queues");
-
-static int tcp_pcap_alloc_reuse_ext = 0;
-static int tcp_pcap_alloc_reuse_mbuf = 0;
-static int tcp_pcap_alloc_new_mbuf = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_reuse_ext,
-	CTLFLAG_RD, &tcp_pcap_alloc_reuse_ext, 0,
-	"Number of mbufs with external storage reused for the TCP PCAP "
-	"functionality");
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_reuse_mbuf,
-	CTLFLAG_RD, &tcp_pcap_alloc_reuse_mbuf, 0,
-	"Number of mbufs with internal storage reused for the TCP PCAP "
-	"functionality");
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_new_mbuf,
-	CTLFLAG_RD, &tcp_pcap_alloc_new_mbuf, 0,
-	"Number of new mbufs allocated for the TCP PCAP functionality");
-
-VNET_DEFINE(int, tcp_pcap_packets) = 0;
-#define V_tcp_pcap_packets	VNET(tcp_pcap_packets)
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_packets,
-	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_pcap_packets), 0,
-	"Default number of packets saved per direction per TCPCB");
-
-/* Initialize the values. */
-static void
-tcp_pcap_max_set(void)
-{
-
-	tcp_pcap_clusters_referenced_max = nmbclusters / 4;
-}
-
-void
-tcp_pcap_init(void)
-{
-
-	tcp_pcap_max_set();
-	EVENTHANDLER_REGISTER(nmbclusters_change, tcp_pcap_max_set,
-		NULL, EVENTHANDLER_PRI_ANY);
-}
-
-/*
- * If we are below the maximum allowed cluster references,
- * increment the reference count and return TRUE. Otherwise,
- * leave the reference count alone and return FALSE.
- */
-static __inline bool
-tcp_pcap_take_cluster_reference(void)
-{
-	if (atomic_fetchadd_int(&tcp_pcap_clusters_referenced_cur, 1) >=
-		tcp_pcap_clusters_referenced_max) {
-		atomic_add_int(&tcp_pcap_clusters_referenced_cur, -1);
-		return FALSE;
-	}
-	return TRUE;
-}
-
-/*
- * For all the external entries in m, apply the given adjustment.
- * This can be used to adjust the counter when an mbuf chain is
- * copied or freed.
- */
-static __inline void
-tcp_pcap_adj_cluster_reference(struct mbuf *m, int adj)
-{
-	while (m) {
-		if (m->m_flags & M_EXT)
-			atomic_add_int(&tcp_pcap_clusters_referenced_cur, adj);
-
-		m = m->m_next;
-	}
-}
-
-/*
- * Free all mbufs in a chain, decrementing the reference count as
- * necessary.
- *
- * Functions in this file should use this instead of m_freem() when
- * they are freeing mbuf chains that may contain clusters that were
- * already included in tcp_pcap_clusters_referenced_cur.
- */
-static void
-tcp_pcap_m_freem(struct mbuf *mb)
-{
-	while (mb != NULL) {
-		if (mb->m_flags & M_EXT)
-			atomic_subtract_int(&tcp_pcap_clusters_referenced_cur,
-			    1);
-		mb = m_free(mb);
-	}
-}
-
-/*
- * Copy data from m to n, where n cannot fit all the data we might
- * want from m.
- *
- * Prioritize data like this:
- * 1. TCP header
- * 2. IP header
- * 3. Data
- */
-static void
-tcp_pcap_copy_bestfit(struct tcphdr *th, struct mbuf *m, struct mbuf *n)
-{
-	struct mbuf *m_cur = m;
-	int bytes_to_copy=0, trailing_data, skip=0, tcp_off;
-
-	/* Below, we assume these will be non-NULL. */
-	KASSERT(th, ("%s: called with th == NULL", __func__));
-	KASSERT(m, ("%s: called with m == NULL", __func__));
-	KASSERT(n, ("%s: called with n == NULL", __func__));
-
-	/* We assume this initialization occurred elsewhere. */
-	KASSERT(n->m_len == 0, ("%s: called with n->m_len=%d (expected 0)",
-		__func__, n->m_len));
-	KASSERT(n->m_data == M_START(n),
-		("%s: called with n->m_data != M_START(n)", __func__));
-
-	/*
-	 * Calculate the size of the TCP header. We use this often
-	 * enough that it is worth just calculating at the start.
-	 */
-	tcp_off = th->th_off << 2;
-
-	/* Trim off leading empty mbufs. */
-	while (m && m->m_len == 0)
-		m = m->m_next;
-
-	if (m) {
-		m_cur = m;
-	}
-	else {
-		/*
-		 * No data? Highly unusual. We would expect to at
-		 * least see a TCP header in the mbuf.
-		 * As we have a pointer to the TCP header, I guess
-		 * we should just copy that. (???)
-		 */
-fallback:
-		bytes_to_copy = tcp_off;
-		if (bytes_to_copy > M_SIZE(n))
-			bytes_to_copy = M_SIZE(n);
-		bcopy(th, n->m_data, bytes_to_copy);
-		n->m_len = bytes_to_copy;
-		return;
-	}
-
-	/*
-	 * Find TCP header. Record the total number of bytes up to,
-	 * and including, the TCP header.
-	 */
-	while (m_cur) {
-		if ((caddr_t) th >= (caddr_t) m_cur->m_data &&
-			(caddr_t) th < (caddr_t) (m_cur->m_data + m_cur->m_len))
-			break;
-		bytes_to_copy += m_cur->m_len;
-		m_cur = m_cur->m_next;
-	}
-	if (m_cur)
-		bytes_to_copy += (caddr_t) th - (caddr_t) m_cur->m_data;
-	else
-		goto fallback;
-	bytes_to_copy += tcp_off;
-
-	/*
-	 * If we already want to copy more bytes than we can hold
-	 * in the destination mbuf, skip leading bytes and copy
-	 * what we can.
-	 *
-	 * Otherwise, consider trailing data.
-	 */
-	if (bytes_to_copy > M_SIZE(n)) {
-		skip  = bytes_to_copy - M_SIZE(n);
-		bytes_to_copy = M_SIZE(n);
-	}
-	else {
-		/*
-		 * Determine how much trailing data is in the chain.
-		 * We start with the length of this mbuf (the one
-		 * containing th) and subtract the size of the TCP
-		 * header (tcp_off) and the size of the data prior
-		 * to th (th - m_cur->m_data).
-		 *
-		 * This *should not* be negative, as the TCP code
-		 * should put the whole TCP header in a single
-		 * mbuf. But, it isn't a problem if it is. We will
-		 * simple work off our negative balance as we look
-		 * at subsequent mbufs.
-		 */
-		trailing_data = m_cur->m_len - tcp_off;
-		trailing_data -= (caddr_t) th - (caddr_t) m_cur->m_data;
-		m_cur = m_cur->m_next;
-		while (m_cur) {
-			trailing_data += m_cur->m_len;
-			m_cur = m_cur->m_next;
-		}
-		if ((bytes_to_copy + trailing_data) > M_SIZE(n))
-			bytes_to_copy = M_SIZE(n);
-		else
-			bytes_to_copy += trailing_data;
-	}
-
-	m_copydata(m, skip, bytes_to_copy, n->m_data);
-	n->m_len = bytes_to_copy;
-}
-
-void
-tcp_pcap_add(struct tcphdr *th, struct mbuf *m, struct mbufq *queue)
-{
-	struct mbuf *n = NULL, *mhead;
-
-	KASSERT(th, ("%s: called with th == NULL", __func__));
-	KASSERT(m, ("%s: called with m == NULL", __func__));
-	KASSERT(queue, ("%s: called with queue == NULL", __func__));
-
-	/* We only care about data packets. */
-	while (m && m->m_type != MT_DATA)
-		m = m->m_next;
-
-	/* We only need to do something if we still have an mbuf. */
-	if (!m)
-		return;
-
-	/* If we are not saving mbufs, return now. */
-	if (queue->mq_maxlen == 0)
-		return;
-
-	/*
-	 * Check to see if we will need to recycle mbufs.
-	 *
-	 * If we need to get rid of mbufs to stay below
-	 * our packet count, try to reuse the mbuf. Once
-	 * we already have a new mbuf (n), then we can
-	 * simply free subsequent mbufs.
-	 *
-	 * Note that most of the logic in here is to deal
-	 * with the reuse. If we are fine with constant
-	 * mbuf allocs/deallocs, we could ditch this logic.
-	 * But, it only seems to make sense to reuse
-	 * mbufs we already have.
-	 */
-	while (mbufq_full(queue)) {
-		mhead = mbufq_dequeue(queue);
-
-		if (n) {
-			tcp_pcap_m_freem(mhead);
-		}
-		else {
-			/*
-			 * If this held an external cluster, try to
-			 * detach the cluster. But, if we held the
-			 * last reference, go through the normal
-			 * free-ing process.
-			 */
-			if (mhead->m_flags & M_EXTPG) {
-				/* Don't mess around with these. */
-				tcp_pcap_m_freem(mhead);
-				continue;
-			} else if (mhead->m_flags & M_EXT) {
-				switch (mhead->m_ext.ext_type) {
-				case EXT_SFBUF:
-					/* Don't mess around with these. */
-					tcp_pcap_m_freem(mhead);
-					continue;
-				default:
-					if (atomic_fetchadd_int(
-						mhead->m_ext.ext_cnt, -1) == 1)
-					{
-						/*
-						 * We held the last reference
-						 * on this cluster. Restore
-						 * the reference count and put
-						 * it back in the pool.
-				 		 */
-						*(mhead->m_ext.ext_cnt) = 1;
-						tcp_pcap_m_freem(mhead);
-						continue;
-					}
-					/*
-					 * We were able to cleanly free the
-					 * reference.
-				 	 */
-					atomic_subtract_int(
-					    &tcp_pcap_clusters_referenced_cur,
-					    1);
-					tcp_pcap_alloc_reuse_ext++;
-					break;
-				}
-			} else {
-				tcp_pcap_alloc_reuse_mbuf++;
-			}
-
-			n = mhead;
-			tcp_pcap_m_freem(n->m_next);
-			m_init(n, M_NOWAIT, MT_DATA, 0);
-		}
-	}
-
-	/* Check to see if we need to get a new mbuf. */
-	if (!n) {
-		if (!(n = m_get(M_NOWAIT, MT_DATA)))
-			return;
-		tcp_pcap_alloc_new_mbuf++;
-	}
-
-	/*
-	 * What are we dealing with? If a cluster, attach it. Otherwise,
-	 * try to copy the data from the beginning of the mbuf to the
-	 * end of data. (There may be data between the start of the data
-	 * area and the current data pointer. We want to get this, because
-	 * it may contain header information that is useful.)
-	 * In cases where that isn't possible, settle for what we can
-	 * get.
-	 */
-	if ((m->m_flags & (M_EXT | M_EXTPG)) &&
-	    tcp_pcap_take_cluster_reference()) {
-		n->m_data = m->m_data;
-		n->m_len = m->m_len;
-		mb_dupcl(n, m);
-	}
-	else if (((m->m_data + m->m_len) - M_START(m)) <= M_SIZE(n)) {
-		/*
-		 * At this point, n is guaranteed to be a normal mbuf
-		 * with no cluster and no packet header. Because the
-		 * logic in this code block requires this, the assert
-		 * is here to catch any instances where someone
-		 * changes the logic to invalidate that assumption.
-		 */
-		KASSERT((n->m_flags & (M_EXT | M_PKTHDR)) == 0,
-			("%s: Unexpected flags (%#x) for mbuf",
-			__func__, n->m_flags));
-		n->m_data = n->m_dat + M_LEADINGSPACE_NOWRITE(m);
-		n->m_len = m->m_len;
-		if (m->m_flags & M_EXTPG)
-			m_copydata(m, 0, m->m_len, n->m_data);
-		else
-			bcopy(M_START(m), n->m_dat,
-			    m->m_len + M_LEADINGSPACE_NOWRITE(m));
-	}
-	else {
-		/*
-		 * This is the case where we need to "settle for what
-		 * we can get". The most probable way to this code
-		 * path is that we've already taken references to the
-		 * maximum number of mbuf clusters we can, and the data
-		 * is too long to fit in an mbuf's internal storage.
-		 * Try for a "best fit".
-		 */
-		tcp_pcap_copy_bestfit(th, m, n);
-
-		/* Don't try to get additional data. */
-		goto add_to_queue;
-	}
-
-	if (m->m_next) {
-		n->m_next = m_copym(m->m_next, 0, M_COPYALL, M_NOWAIT);
-		tcp_pcap_adj_cluster_reference(n->m_next, 1);
-	}
-
-add_to_queue:
-	/* Add the new mbuf to the list. */
-	if (mbufq_enqueue(queue, n)) {
-		/* This shouldn't happen. If INVARIANTS is defined, panic. */
-		KASSERT(0, ("%s: mbufq was unexpectedly full!", __func__));
-		tcp_pcap_m_freem(n);
-	}
-}
-
-void
-tcp_pcap_drain(struct mbufq *queue)
-{
-	struct mbuf *m;
-	while ((m = mbufq_dequeue(queue)))
-		tcp_pcap_m_freem(m);
-}
-
-void
-tcp_pcap_tcpcb_init(struct tcpcb *tp)
-{
-	mbufq_init(&(tp->t_inpkts), V_tcp_pcap_packets);
-	mbufq_init(&(tp->t_outpkts), V_tcp_pcap_packets);
-}
-
-void
-tcp_pcap_set_sock_max(struct mbufq *queue, int newval)
-{
-	queue->mq_maxlen = newval;
-	while (queue->mq_len > queue->mq_maxlen)
-		tcp_pcap_m_freem(mbufq_dequeue(queue));
-}
-
-int
-tcp_pcap_get_sock_max(struct mbufq *queue)
-{
-	return queue->mq_maxlen;
-}
diff --git a/sys/netinet/tcp_pcap.h b/sys/netinet/tcp_pcap.h
deleted file mode 100644
index 8250c06d4ce0..000000000000
--- a/sys/netinet/tcp_pcap.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*-
- * Copyright (c) 2015
- *	Jonathan Looney. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef _NETINET_TCP_PCAP_H_
-#define _NETINET_TCP_PCAP_H_
-
-void tcp_pcap_init(void);
-void tcp_pcap_add(struct tcphdr *th, struct mbuf *m, struct mbufq *queue);
-void tcp_pcap_drain(struct mbufq *queue);
-void tcp_pcap_tcpcb_init(struct tcpcb *tp);
-void tcp_pcap_set_sock_max(struct mbufq *queue, int newval);
-int tcp_pcap_get_sock_max(struct mbufq *queue);
-
-extern int tcp_pcap_aggressive_free;
-
-#endif /* _NETINET_TCP_PCAP_H_ */
diff --git a/sys/netinet/tcp_ratelimit.c b/sys/netinet/tcp_ratelimit.c
index 1834c702c493..a0e837cc7d76 100644
--- a/sys/netinet/tcp_ratelimit.c
+++ b/sys/netinet/tcp_ratelimit.c
@@ -246,10 +246,10 @@ const uint64_t desired_rates[] = {
 #define RS_ONE_GIGABIT_PERSEC 1000000000
 #define RS_TEN_GIGABIT_PERSEC 10000000000
 
-static struct head_tcp_rate_set int_rs;
+static struct head_tcp_rate_set int_rs = CK_LIST_HEAD_INITIALIZER();
 static struct mtx rs_mtx;
-uint32_t rs_number_alive;
-uint32_t rs_number_dead;
+uint32_t rs_number_alive = 0;
+uint32_t rs_number_dead = 0;
 static uint32_t rs_floor_mss = 0;
 static uint32_t wait_time_floor = 8000;	/* 8 ms */
 static uint32_t rs_hw_floor_mss = 16;
@@ -1298,6 +1298,12 @@ tcp_rl_ifnet_departure(void *arg __unused, struct ifnet *ifp)
 	NET_EPOCH_EXIT(et);
 }
 
+void
+tcp_rl_release_ifnet(struct ifnet *ifp)
+{
+	tcp_rl_ifnet_departure(NULL, ifp);
+}
+
 static void
 tcp_rl_shutdown(void *arg __unused, int howto __unused)
 {
@@ -1772,9 +1778,6 @@ static eventhandler_tag rl_shutdown_start;
 static void
 tcp_rs_init(void *st __unused)
 {
-	CK_LIST_INIT(&int_rs);
-	rs_number_alive = 0;
-	rs_number_dead = 0;
 	mtx_init(&rs_mtx, "tcp_rs_mtx", "rsmtx", MTX_DEF);
 	rl_ifnet_departs = EVENTHANDLER_REGISTER(ifnet_departure_event,
 	    tcp_rl_ifnet_departure,
diff --git a/sys/netinet/tcp_ratelimit.h b/sys/netinet/tcp_ratelimit.h
index cd540d1164e1..0ce42dea0d90 100644
--- a/sys/netinet/tcp_ratelimit.h
+++ b/sys/netinet/tcp_ratelimit.h
@@ -94,6 +94,8 @@ CK_LIST_HEAD(head_tcp_rate_set, tcp_rate_set);
 #ifndef ETHERNET_SEGMENT_SIZE
 #define ETHERNET_SEGMENT_SIZE 1514
 #endif
+struct tcpcb;
+
 #ifdef RATELIMIT
 #define DETAILED_RATELIMIT_SYSCTL 1	/*
 					 * Undefine this if you don't want
@@ -131,6 +133,9 @@ tcp_get_pacing_burst_size_w_divisor(struct tcpcb *tp, uint64_t bw, uint32_t segs
 void
 tcp_rl_log_enobuf(const struct tcp_hwrate_limit_table *rte);
 
+void
+tcp_rl_release_ifnet(struct ifnet *ifp);
+
 #else
 static inline const struct tcp_hwrate_limit_table *
 tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *ifp,
@@ -218,6 +223,10 @@ tcp_rl_log_enobuf(const struct tcp_hwrate_limit_table *rte)
 {
 }
 
+static inline void
+tcp_rl_release_ifnet(struct ifnet *ifp)
+{
+}
 #endif
 
 /*
diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c
index 5768d90a9337..5f73e83dc8a9 100644
--- a/sys/netinet/tcp_reass.c
+++ b/sys/netinet/tcp_reass.c
@@ -957,7 +957,7 @@ new_entry:
 		flags = tcp_get_flags(th) & TH_FIN;
 		TCPSTAT_INC(tcps_rcvoopack);
 		TCPSTAT_ADD(tcps_rcvoobyte, *tlenp);
-		SOCKBUF_LOCK(&so->so_rcv);
+		SOCK_RECVBUF_LOCK(so);
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 			m_freem(m);
 		} else {
@@ -1058,7 +1058,7 @@ present:
 #endif
 		return (0);
 	}
-	SOCKBUF_LOCK(&so->so_rcv);
+	SOCK_RECVBUF_LOCK(so);
 	do {
 		tp->rcv_nxt += q->tqe_len;
 		flags = q->tqe_flags & TH_FIN;
diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c
index f59cc5fe0d0b..90d789f0e224 100644
--- a/sys/netinet/tcp_sack.c
+++ b/sys/netinet/tcp_sack.c
@@ -137,6 +137,11 @@ SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, lrd, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_lrd), 1,
     "Perform Lost Retransmission Detection");
 
+VNET_DEFINE(int, tcp_sack_tso) = 0;
+SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, tso, CTLFLAG_VNET | CTLFLAG_RW,
+    &VNET_NAME(tcp_sack_tso), 0,
+    "Allow TSO during SACK loss recovery");
+
 VNET_DEFINE(int, tcp_sack_maxholes) = 128;
 SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, maxholes, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_sack_maxholes), 0,
@@ -558,6 +563,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 	int i, j, num_sack_blks;
 	sackstatus_t sack_changed;
 	int delivered_data, left_edge_delta;
+	int maxseg = tp->t_maxseg - MAX_TCPOPTLEN;
 
 	tcp_seq loss_hiack = 0;
 	int loss_thresh = 0;
@@ -604,7 +610,9 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 			    SEQ_GT(sack.start, th_ack) &&
 			    SEQ_LT(sack.start, tp->snd_max) &&
 			    SEQ_GT(sack.end, tp->snd_una) &&
-			    SEQ_LEQ(sack.end, tp->snd_max)) {
+			    SEQ_LEQ(sack.end, tp->snd_max) &&
+			    ((sack.end - sack.start) >= maxseg ||
+			     SEQ_GEQ(sack.end, tp->snd_max))) {
 				sack_blocks[num_sack_blks++] = sack;
 			} else if (SEQ_LEQ(sack.start, th_ack) &&
 			    SEQ_LEQ(sack.end, th_ack)) {
@@ -645,8 +653,6 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 		 * scoreboard).
 		 */
 		tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack);
-		tp->sackhint.sacked_bytes = 0;	/* reset */
-		tp->sackhint.hole_bytes = 0;
 	}
 	/*
 	 * In the while-loop below, incoming SACK blocks (sack_blocks[]) and
@@ -862,12 +868,26 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 		}
 	}
 
-	KASSERT(!(TAILQ_EMPTY(&tp->snd_holes) && (tp->sackhint.hole_bytes != 0)),
-	    ("SACK scoreboard empty, but accounting non-zero\n"));
-
+	KASSERT(delivered_data >= 0, ("delivered_data < 0"));
 	KASSERT(notlost_bytes <= tp->sackhint.hole_bytes,
 	    ("SACK: more bytes marked notlost than in scoreboard holes"));
 
+	if (TAILQ_EMPTY(&tp->snd_holes)) {
+		KASSERT(tp->sackhint.hole_bytes == 0,
+		    ("SACK scoreboard empty, but accounting non-zero\n"));
+		tp->sackhint.sack_bytes_rexmit = 0;
+		tp->sackhint.sacked_bytes = 0;
+		tp->sackhint.lost_bytes = 0;
+	} else {
+		KASSERT(tp->sackhint.hole_bytes > 0,
+		    ("SACK scoreboard not empty, but has no bytes\n"));
+		tp->sackhint.delivered_data = delivered_data;
+		tp->sackhint.sacked_bytes += delivered_data - left_edge_delta;
+		KASSERT((tp->sackhint.sacked_bytes >= 0), ("sacked_bytes < 0"));
+		tp->sackhint.lost_bytes = tp->sackhint.hole_bytes -
+		    notlost_bytes;
+	}
+
 	if (!(to->to_flags & TOF_SACK))
 		/*
 		 * If this ACK did not contain any
@@ -878,11 +898,6 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 		 * for RFC6675 rescue retransmission.
 		 */
 		sack_changed = SACK_NOCHANGE;
-	tp->sackhint.delivered_data = delivered_data;
-	tp->sackhint.sacked_bytes += delivered_data - left_edge_delta;
-	tp->sackhint.lost_bytes = tp->sackhint.hole_bytes - notlost_bytes;
-	KASSERT((delivered_data >= 0), ("delivered_data < 0"));
-	KASSERT((tp->sackhint.sacked_bytes >= 0), ("sacked_bytes < 0"));
 	return (sack_changed);
 }
 
@@ -953,16 +968,15 @@ tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th, u_int *maxsegp)
 	/* Send one or 2 segments based on how much new data was acked. */
 	if ((BYTES_THIS_ACK(tp, th) / maxseg) >= 2)
 		num_segs = 2;
-	if (V_tcp_do_newsack) {
-		tp->snd_cwnd = imax(tp->snd_nxt - th->th_ack +
-				tp->sackhint.sack_bytes_rexmit -
-				tp->sackhint.sacked_bytes -
-				tp->sackhint.lost_bytes, maxseg) +
-				num_segs * maxseg;
-	} else {
+	if (tp->snd_nxt == tp->snd_max) {
 		tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit +
-		    imax(0, tp->snd_nxt - tp->snd_recover) +
-		    num_segs * maxseg);
+		    (tp->snd_nxt - tp->snd_recover) + num_segs * maxseg);
+	} else {
+		/*
+		 * Since cwnd is not the expected flightsize during
+		 * SACK LR, not deflating cwnd allows the partial
+		 * ACKed amount to be sent.
+		 */
 	}
 	if (tp->snd_cwnd > tp->snd_ssthresh)
 		tp->snd_cwnd = tp->snd_ssthresh;
@@ -998,7 +1012,7 @@ tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th, u_int *maxsegp)
 			highdata--;
 		highdata = SEQ_MIN(highdata, tp->snd_recover);
 		if (SEQ_LT(th->th_ack, highdata)) {
-			tp->snd_fack = th->th_ack;
+			tp->snd_fack = SEQ_MAX(th->th_ack, tp->snd_fack);
 			if ((temp = tcp_sackhole_insert(tp, SEQ_MAX(th->th_ack,
 			    highdata - maxseg), highdata, NULL)) != NULL) {
 				tp->sackhint.hole_bytes +=
@@ -1068,41 +1082,47 @@ tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
  * After a timeout, the SACK list may be rebuilt.  This SACK information
  * should be used to avoid retransmitting SACKed data.  This function
  * traverses the SACK list to see if snd_nxt should be moved forward.
+ * In addition, cwnd will be inflated by the sacked bytes traversed when
+ * moving snd_nxt forward. This prevents a traffic burst after the final
+ * full ACK, and also keeps ACKs coming back.
  */
-void
+int
 tcp_sack_adjust(struct tcpcb *tp)
 {
+	int sacked = 0;
 	struct sackhole *p, *cur = TAILQ_FIRST(&tp->snd_holes);
 
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 	if (cur == NULL) {
 		/* No holes */
-		return;
+		return (0);
 	}
 	if (SEQ_GEQ(tp->snd_nxt, tp->snd_fack)) {
 		/* We're already beyond any SACKed blocks */
-		return;
+		return (tp->sackhint.sacked_bytes);
 	}
-	/*-
+	/*
 	 * Two cases for which we want to advance snd_nxt:
 	 * i) snd_nxt lies between end of one hole and beginning of another
 	 * ii) snd_nxt lies between end of last hole and snd_fack
 	 */
 	while ((p = TAILQ_NEXT(cur, scblink)) != NULL) {
 		if (SEQ_LT(tp->snd_nxt, cur->end)) {
-			return;
+			return (sacked);
 		}
+		sacked += p->start - cur->end;
 		if (SEQ_GEQ(tp->snd_nxt, p->start)) {
 			cur = p;
 		} else {
 			tp->snd_nxt = p->start;
-			return;
+			return (sacked);
 		}
 	}
 	if (SEQ_LT(tp->snd_nxt, cur->end)) {
-		return;
+		return (sacked);
 	}
 	tp->snd_nxt = tp->snd_fack;
+	return (tp->sackhint.sacked_bytes);
 }
 
 /*
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index 946b65cda6a5..d2636f01714e 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -607,7 +607,7 @@ activate_rxt:
 			TCPT_RANGESET_NOSLOP(to, tov,
 			    (bbr->r_ctl.rc_min_rto_ms * MS_IN_USEC),
 			    (bbr->rc_max_rto_sec * USECS_IN_SECOND));
-			bbr_log_timer_var(bbr, 2, cts, 0, srtt, 0, to);
+			bbr_log_timer_var(bbr, 2, cts, 0, bbr_get_rtt(bbr, BBR_SRTT), 0, to);
 			return (to);
 		}
 		return (0);
@@ -978,14 +978,6 @@ bbr_timer_audit(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts, struct sock
 				 * and we do
 				 */
 				return;
-		} else if (sbavail(&inp->inp_socket->so_snd) &&
-		    (tmr_up == PACE_TMR_RXT)) {
-			/*
-			 * if we hit enobufs then we would expect the
-			 * possibility of nothing outstanding and the RXT up
-			 * (and the hptsi timer).
-			 */
-			return;
 		} else if (((V_tcp_always_keepalive ||
 			    inp->inp_socket->so_options & SO_KEEPALIVE) &&
 			    (tp->t_state <= TCPS_CLOSING)) &&
@@ -2356,11 +2348,11 @@ bbr_log_to_event(struct tcp_bbr *bbr, uint32_t cts, int32_t to_num)
 		log.u_bbr.flex1 = bbr->bbr_timer_src;
 		log.u_bbr.flex2 = 0;
 		log.u_bbr.flex3 = bbr->r_ctl.rc_hpts_flags;
-		ar = (uint64_t)(bbr->r_ctl.rc_resend);
+		ar = (uintptr_t)(bbr->r_ctl.rc_resend);
 		ar >>= 32;
 		ar &= 0x00000000ffffffff;
 		log.u_bbr.flex4 = (uint32_t)ar;
-		ar = (uint64_t)bbr->r_ctl.rc_resend;
+		ar = (uintptr_t)bbr->r_ctl.rc_resend;
 		ar &= 0x00000000ffffffff;
 		log.u_bbr.flex5 = (uint32_t)ar;
 		log.u_bbr.flex6 = TICKS_2_USEC(bbr->rc_tp->t_rxtcur);
@@ -2718,12 +2710,13 @@ bbr_type_log_hdwr_pacing(struct tcp_bbr *bbr, const struct ifnet *ifp,
 {
 	if (tcp_bblogging_on(bbr->rc_tp)) {
 		union tcp_log_stackspecific log;
+		uint64_t ifp64 = (uintptr_t)ifp;
 
 		bbr_fill_in_logging_data(bbr, &log.u_bbr, cts);
 		log.u_bbr.flex1 = ((hw_rate >> 32) & 0x00000000ffffffff);
 		log.u_bbr.flex2 = (hw_rate & 0x00000000ffffffff);
-		log.u_bbr.flex3 = (((uint64_t)ifp  >> 32) & 0x00000000ffffffff);
-		log.u_bbr.flex4 = ((uint64_t)ifp & 0x00000000ffffffff);
+		log.u_bbr.flex3 = ((ifp64  >> 32) & 0x00000000ffffffff);
+		log.u_bbr.flex4 = (ifp64 & 0x00000000ffffffff);
 		log.u_bbr.bw_inuse = rate;
 		log.u_bbr.flex5 = line;
 		log.u_bbr.flex6 = error;
@@ -2992,9 +2985,6 @@ use_initial_window:
 		/* We should not be at 0, go to the initial window then  */
 		goto use_initial_window;
 	}
-	if (bw < 1)
-		/* Probably should panic */
-		bw = 1;
 	if (bw < min_bw)
 		bw = min_bw;
 	return (bw);
@@ -3842,7 +3832,7 @@ bbr_post_recovery(struct tcpcb *tp)
 		else if (bbr->r_ctl.rc_delivered == 0)
 			lr2use = 1000;
 		else {
-			lr2use = bbr->r_ctl.rc_lost * 1000;
+			lr2use = (uint64_t)bbr->r_ctl.rc_lost * (uint64_t)1000;
 			lr2use /= bbr->r_ctl.rc_delivered;
 		}
 		lr2use += bbr->r_ctl.recovery_lr;
@@ -4613,7 +4603,7 @@ need_retran:
 	 */
 	if (collapsed_win == 0) {
 		rsm = TAILQ_LAST_FAST(&bbr->r_ctl.rc_map, bbr_sendmap, r_next);
-		if (rsm && (BBR_ACKED | BBR_HAS_FIN)) {
+		if (rsm && (rsm->r_flags & (BBR_ACKED | BBR_HAS_FIN))) {
 			rsm = bbr_find_high_nonack(bbr, rsm);
 		}
 		if (rsm == NULL) {
@@ -5134,6 +5124,16 @@ bbr_timeout_rxt(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
 				tp->t_flags2 |= TF2_PLPMTU_PMTUD;
 				tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
 				tp->t_maxseg = tp->t_pmtud_saved_maxseg;
+				if (tp->t_maxseg < V_tcp_mssdflt) {
+					/*
+					 * The MSS is so small we should not 
+					 * process incoming SACK's since we are 
+					 * subject to attack in such a case.
+					 */
+					tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+				} else {
+					tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+				}
 				KMOD_TCPSTAT_INC(tcps_pmtud_blackhole_failed);
 			}
 		}
@@ -5542,7 +5542,7 @@ lost_rate:
 		bbr_type_log_hdwr_pacing(bbr,
 					 bbr->r_ctl.crte->ptbl->rs_ifp,
 					 rate,
-					 ((bbr->r_ctl.crte == NULL) ? 0 : bbr->r_ctl.crte->rate),
+					 bbr->r_ctl.crte->rate,
 					 __LINE__,
 					 cts,
 					 error);
@@ -6318,8 +6318,6 @@ tcp_bbr_xmit_timer_commit(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts)
 	}
 	/* Round it up */
 	rtt_ticks = USEC_2_TICKS((rtt + (USECS_IN_MSEC - 1)));
-	if (rtt_ticks == 0)
-		rtt_ticks = 1;
 	if (tp->t_srtt != 0) {
 		/*
 		 * srtt is stored as fixed point with 5 bits after the
@@ -6703,7 +6701,7 @@ bbr_update_bbr_info(struct tcp_bbr *bbr, struct bbr_sendmap *rsm, uint32_t rtt,
 		bbr_log_rtt_shrinks(bbr, cts, 0, rtt, __LINE__, BBR_RTTS_NEWRTT, 0);
 		bbr_set_reduced_rtt(bbr, cts, __LINE__);
 	}
-	bbr_log_type_bbrrttprop(bbr, rtt, (rsm ? rsm->r_end : 0), uts, cts,
+	bbr_log_type_bbrrttprop(bbr, rtt, rsm->r_end, uts, cts,
 	    match, rsm->r_start, rsm->r_flags);
 	apply_filter_min_small(&bbr->r_ctl.rc_rttprop, rtt, cts);
 	if (old_rttprop != bbr_get_rtt(bbr, BBR_RTT_PROP)) {
@@ -6783,8 +6781,6 @@ bbr_update_rtt(struct tcpcb *tp, struct tcp_bbr *bbr,
 			t = cts - rsm->r_tim_lastsent[0];
 		else
 			t = 1;
-		if ((int)t <= 0)
-			t = 1;
 		bbr->r_ctl.rc_last_rtt = t;
 		bbr_update_bbr_info(bbr, rsm, t, cts, to->to_tsecr, 0,
 				    BBR_RTT_BY_EXACTMATCH, rsm->r_tim_lastsent[0], ack_type, to);
@@ -6825,8 +6821,6 @@ bbr_update_rtt(struct tcpcb *tp, struct tcp_bbr *bbr,
 					t = cts - rsm->r_tim_lastsent[i];
 				else
 					t = 1;
-				if ((int)t <= 0)
-					t = 1;
 				bbr->r_ctl.rc_last_rtt = t;
 				bbr_update_bbr_info(bbr, rsm, t, cts, to->to_tsecr, uts, BBR_RTT_BY_TSMATCHING,
 						    rsm->r_tim_lastsent[i], ack_type, to);
@@ -7313,11 +7307,9 @@ bbr_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th,
 	p_maxseg = min(bbr->r_ctl.rc_pace_max_segs, maxseg);
 	th_ack = th->th_ack;
 	if (SEQ_GT(th_ack, tp->snd_una)) {
-		acked = th_ack - tp->snd_una;
 		bbr_log_progress_event(bbr, tp, ticks, PROGRESS_UPDATE, __LINE__);
 		bbr->rc_tp->t_acktime = ticks;
-	} else
-		acked = 0;
+	}
 	if (SEQ_LEQ(th_ack, tp->snd_una)) {
 		/* Only sent here for sack processing */
 		goto proc_sack;
@@ -7556,7 +7548,7 @@ proc_sack:
 	 * Sort the SACK blocks so we can update the rack scoreboard with
 	 * just one pass.
 	 */
-	new_sb = sack_filter_blks(&bbr->r_ctl.bbr_sf, sack_blocks,
+	new_sb = sack_filter_blks(tp, &bbr->r_ctl.bbr_sf, sack_blocks,
 				  num_sack_blks, th->th_ack);
 	ctf_log_sack_filter(bbr->rc_tp, new_sb, sack_blocks);
 	BBR_STAT_ADD(bbr_sack_blocks, num_sack_blks);
@@ -7700,6 +7692,43 @@ bbr_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	bbr = (struct tcp_bbr *)tp->t_fb_ptr;
 	lost = bbr->r_ctl.rc_lost;
 	nsegs = max(1, m->m_pkthdr.lro_nsegs);
+	if (SEQ_GEQ(tp->snd_una, tp->iss + (65535 << tp->snd_scale))) {
+		/* Checking SEG.ACK against ISS is definitely redundant. */
+		tp->t_flags2 |= TF2_NO_ISS_CHECK;
+	}
+	if (!V_tcp_insecure_ack) {
+		tcp_seq seq_min;
+		bool ghost_ack_check;
+
+		if (tp->t_flags2 & TF2_NO_ISS_CHECK) {
+			/* Check for too old ACKs (RFC 5961, Section 5.2). */
+			seq_min = tp->snd_una - tp->max_sndwnd;
+			ghost_ack_check = false;
+		} else {
+			if (SEQ_GT(tp->iss + 1, tp->snd_una - tp->max_sndwnd)) {
+				/* Checking for ghost ACKs is stricter. */
+				seq_min = tp->iss + 1;
+				ghost_ack_check = true;
+			} else {
+				/*
+				 * Checking for too old ACKs (RFC 5961,
+				 * Section 5.2) is stricter.
+				 */
+				seq_min = tp->snd_una - tp->max_sndwnd;
+				ghost_ack_check = false;
+			}
+		}
+		if (SEQ_LT(th->th_ack, seq_min)) {
+			if (ghost_ack_check)
+				TCPSTAT_INC(tcps_rcvghostack);
+			else
+				TCPSTAT_INC(tcps_rcvacktooold);
+			/* Send challenge ACK. */
+			ctf_do_dropafterack(m, tp, th, thflags, tlen, ret_val);
+			bbr->r_wanted_output = 1;
+			return (1);
+		}
+	}
 	if (SEQ_GT(th->th_ack, tp->snd_max)) {
 		ctf_do_dropafterack(m, tp, th, thflags, tlen, ret_val);
 		bbr->r_wanted_output = 1;
@@ -7775,7 +7804,7 @@ bbr_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		    (int)(ticks - tp->t_badrxtwin) < 0)
 			bbr_cong_signal(tp, th, CC_RTO_ERR, NULL);
 	}
-	SOCKBUF_LOCK(&so->so_snd);
+	SOCK_SENDBUF_LOCK(so);
 	acked_amount = min(acked, (int)sbavail(&so->so_snd));
 	tp->snd_wnd -= acked_amount;
 	mfree = sbcut_locked(&so->so_snd, acked_amount);
@@ -8247,7 +8276,7 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
 			thflags = tcp_get_flags(th) & TH_FIN;
 			KMOD_TCPSTAT_ADD(tcps_rcvpack, (int)nsegs);
 			KMOD_TCPSTAT_ADD(tcps_rcvbyte, tlen);
-			SOCKBUF_LOCK(&so->so_rcv);
+			SOCK_RECVBUF_LOCK(so);
 			if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
 				m_freem(m);
 			else
@@ -8480,7 +8509,7 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	newsize = tcp_autorcvbuf(m, th, so, tp, tlen);
 
 	/* Add data to socket buffer. */
-	SOCKBUF_LOCK(&so->so_rcv);
+	SOCK_RECVBUF_LOCK(so);
 	if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 		m_freem(m);
 	} else {
@@ -8734,7 +8763,7 @@ bbr_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	    (SEQ_LEQ(th->th_ack, tp->iss) ||
 	    SEQ_GT(th->th_ack, tp->snd_max))) {
 		tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT);
-		ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+		ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen);
 		return (1);
 	}
 	if ((thflags & (TH_ACK | TH_RST)) == (TH_ACK | TH_RST)) {
@@ -8936,7 +8965,7 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	    (SEQ_LEQ(th->th_ack, tp->snd_una) ||
 	     SEQ_GT(th->th_ack, tp->snd_max))) {
 		tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT);
-		ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+		ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen);
 		return (1);
 	}
 	if (tp->t_flags & TF_FASTOPEN) {
@@ -8948,7 +8977,7 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		 */
 		if ((thflags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) {
 			tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT);
-			ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+			ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen);
 			return (1);
 		} else if (thflags & TH_SYN) {
 			/* non-initial SYN is ignored */
@@ -8981,7 +9010,7 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	 */
 	if (SEQ_LT(th->th_seq, tp->irs)) {
 		tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT);
-		ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+		ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen);
 		return (1);
 	}
 	if (ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) {
@@ -9259,7 +9288,7 @@ bbr_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	if (sbavail(&so->so_snd)) {
 		if (ctf_progress_timeout_check(tp, true)) {
 			bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__);
-			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen);
 			return (1);
 		}
 	}
@@ -9356,7 +9385,7 @@ bbr_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	if (sbavail(&so->so_snd)) {
 		if (ctf_progress_timeout_check(tp, true)) {
 			bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__);
-			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen);
 			return (1);
 		}
 	}
@@ -9506,7 +9535,7 @@ bbr_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	if (sbavail(&so->so_snd)) {
 		if (ctf_progress_timeout_check(tp, true)) {
 			bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__);
-			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen);
 			return (1);
 		}
 	}
@@ -9608,7 +9637,7 @@ bbr_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	if (sbavail(&so->so_snd)) {
 		if (ctf_progress_timeout_check(tp, true)) {
 			bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__);
-			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen);
 			return (1);
 		}
 	}
@@ -9710,7 +9739,7 @@ bbr_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	if (sbavail(&so->so_snd)) {
 		if (ctf_progress_timeout_check(tp, true)) {
 			bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__);
-			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen);
 			return (1);
 		}
 	}
@@ -9819,7 +9848,7 @@ bbr_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	if (sbavail(&so->so_snd)) {
 		if (ctf_progress_timeout_check(tp, true)) {
 			bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__);
-			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen);
 			return (1);
 		}
 	}
@@ -10121,7 +10150,7 @@ bbr_init(struct tcpcb *tp, void **ptr)
 	tcp_change_time_units(tp, TCP_TMR_GRANULARITY_TICKS);
 	TCPT_RANGESET(tp->t_rxtcur,
 	    ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
-	    tp->t_rttmin, TCPTV_REXMTMAX);
+	    tp->t_rttmin, tcp_rexmit_max);
 	bbr_start_hpts_timer(bbr, tp, cts, 5, 0, 0);
 	return (0);
 }
@@ -10269,10 +10298,6 @@ bbr_substate_change(struct tcp_bbr *bbr, uint32_t cts, int32_t line, int dolog)
 			bbr->r_ctl.bbr_smallest_srtt_state2 = bbr->r_ctl.bbr_smallest_srtt_this_state;
 	}
 	bbr->rc_bbr_substate++;
-	if (bbr->rc_bbr_substate >= BBR_SUBSTATE_COUNT) {
-		/* Cycle back to first state-> gain */
-		bbr->rc_bbr_substate = 0;
-	}
 	if (bbr_state_val(bbr) == BBR_SUB_GAIN) {
 		/*
 		 * We enter the gain(5/4) cycle (possibly less if
@@ -11323,7 +11348,14 @@ bbr_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 	tcp_dooptions(&to, (u_char *)(th + 1),
 	    (th->th_off << 2) - sizeof(struct tcphdr),
 	    (thflags & TH_SYN) ? TO_SYN : 0);
-
+	if (tp->t_flags2 & TF2_PROC_SACK_PROHIBIT) {
+		/*
+		 * We don't look at sack's from the
+		 * peer because the MSS is too small which
+		 * can subject us to an attack.
+		 */
+		to.to_flags &= ~TOF_SACK;
+	}
 	/*
 	 * If timestamps were negotiated during SYN/ACK and a
 	 * segment without a timestamp is received, silently drop
@@ -11478,7 +11510,7 @@ bbr_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 	if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) &&
 	    (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) {
 		tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT);
-		ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+		ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen);
 		return (1);
 	}
 	if (tiwin > bbr->r_ctl.rc_high_rwnd)
@@ -12083,7 +12115,7 @@ again:
 	len = 0;
 	rsm = NULL;
 	if (flags & TH_RST) {
-		SOCKBUF_LOCK(sb);
+		SOCK_SENDBUF_LOCK(so);
 		goto send;
 	}
 recheck_resend:
@@ -12150,7 +12182,7 @@ recheck_resend:
 			} else {
 				/* Retransmitting SYN */
 				rsm = NULL;
-				SOCKBUF_LOCK(sb);
+				SOCK_SENDBUF_LOCK(so);
 				goto send;
 			}
 		} else
@@ -12249,7 +12281,7 @@ recheck_resend:
 			kern_prefetch(end_rsm, &prefetch_rsm);
 		prefetch_rsm = 1;
 	}
-	SOCKBUF_LOCK(sb);
+	SOCK_SENDBUF_LOCK(so);
 	/*
 	 * If snd_nxt == snd_max and we have transmitted a FIN, the
 	 * sb_offset will be > 0 even if so_snd.sb_cc is 0, resulting in a
@@ -12564,7 +12596,6 @@ recheck_resend:
 	    (len > maxseg) &&
 	    (tp->t_port == 0) &&
 	    ((tp->t_flags & TF_SIGNATURE) == 0) &&
-	    tp->rcv_numsacks == 0 &&
 	    ipoptlen == 0)
 		tso = 1;
 
@@ -12667,7 +12698,7 @@ recheck_resend:
 	 * No reason to send a segment, just return.
 	 */
 just_return:
-	SOCKBUF_UNLOCK(sb);
+	SOCK_SENDBUF_UNLOCK(so);
 just_return_nolock:
 	if (tot_len)
 		slot = bbr_get_pacing_delay(bbr, bbr->r_ctl.rc_bbr_hptsi_gain, tot_len, cts, 0);
@@ -12775,7 +12806,7 @@ send:
 			len--;
 		}
 	}
-	SOCKBUF_LOCK_ASSERT(sb);
+	SOCK_SENDBUF_LOCK_ASSERT(so);
 	if (len > 0) {
 		if ((tp->snd_una == tp->snd_max) &&
 		    (bbr_calc_time(cts, bbr->r_ctl.rc_went_idle_time) >= bbr_rtt_probe_time)) {
@@ -12891,7 +12922,7 @@ send:
 	if (tp->t_port) {
 		if (V_tcp_udp_tunneling_port == 0) {
 			/* The port was removed?? */
-			SOCKBUF_UNLOCK(&so->so_snd);
+			SOCK_SENDBUF_UNLOCK(so);
 			return (EHOSTUNREACH);
 		}
 		hdrlen += sizeof(struct udphdr);
@@ -12982,7 +13013,7 @@ send:
 				 * byte of the payload can be put into the
 				 * TCP segment.
 				 */
-				SOCKBUF_UNLOCK(&so->so_snd);
+				SOCK_SENDBUF_UNLOCK(so);
 				error = EMSGSIZE;
 				sack_rxmit = 0;
 				goto out;
@@ -13052,7 +13083,7 @@ send:
 		if (m == NULL) {
 			BBR_STAT_INC(bbr_failed_mbuf_aloc);
 			bbr_log_enobuf_jmp(bbr, len, cts, __LINE__, len, 0, 0);
-			SOCKBUF_UNLOCK(sb);
+			SOCK_SENDBUF_UNLOCK(so);
 			error = ENOBUFS;
 			sack_rxmit = 0;
 			goto out;
@@ -13096,7 +13127,7 @@ send:
 					 * is the only thing to do.
 					 */
 					BBR_STAT_INC(bbr_offset_drop);
-					SOCKBUF_UNLOCK(sb);
+					SOCK_SENDBUF_UNLOCK(so);
 					(void)m_free(m);
 					return (-EFAULT); /* tcp_drop() */
 				}
@@ -13156,7 +13187,7 @@ send:
 				tso = 0;
 			}
 			if (m->m_next == NULL) {
-				SOCKBUF_UNLOCK(sb);
+				SOCK_SENDBUF_UNLOCK(so);
 				(void)m_free(m);
 				error = ENOBUFS;
 				sack_rxmit = 0;
@@ -13192,9 +13223,9 @@ send:
 		    !(flags & TH_SYN)) {
 			flags |= TH_PUSH;
 		}
-		SOCKBUF_UNLOCK(sb);
+		SOCK_SENDBUF_UNLOCK(so);
 	} else {
-		SOCKBUF_UNLOCK(sb);
+		SOCK_SENDBUF_UNLOCK(so);
 		if (tp->t_flags & TF_ACKNOW)
 			KMOD_TCPSTAT_INC(tcps_sndacks);
 		else if (flags & (TH_SYN | TH_FIN | TH_RST))
@@ -13220,7 +13251,7 @@ send:
 			m->m_data += max_linkhdr;
 		m->m_len = hdrlen;
 	}
-	SOCKBUF_UNLOCK_ASSERT(sb);
+	SOCK_SENDBUF_UNLOCK_ASSERT(so);
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
@@ -13712,7 +13743,7 @@ nomore:
 		 * Everything else will just have to retransmit with the timer
 		 * (no pacer).
 		 */
-		SOCKBUF_UNLOCK_ASSERT(sb);
+		SOCK_SENDBUF_UNLOCK_ASSERT(so);
 		BBR_STAT_INC(bbr_saw_oerr);
 		/* Clear all delay/early tracks */
 		bbr->r_ctl.rc_hptsi_agg_delay = 0;
@@ -13773,6 +13804,16 @@ nomore:
 				if (old_maxseg <= tp->t_maxseg) {
 					/* Huh it did not shrink? */
 					tp->t_maxseg = old_maxseg - 40;
+					if (tp->t_maxseg < V_tcp_mssdflt) {
+						/*
+						 * The MSS is so small we should not 
+						 * process incoming SACK's since we are 
+						 * subject to attack in such a case.
+						 */
+						tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+					} else {
+						tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+					}
 					bbr_log_msgsize_fail(bbr, tp, len, maxseg, mtu, 0, tso, cts);
 				}
 				/*
@@ -13802,6 +13843,7 @@ nomore:
 		case ENETUNREACH:
 			if (TCPS_HAVERCVDSYN(tp->t_state)) {
 				tp->t_softerror = error;
+				error = 0;
 			}
 			/* FALLTHROUGH */
 		default:
@@ -14116,7 +14158,7 @@ struct tcp_function_block __tcp_bbr = {
 	.tfb_tcp_mtu_chg = bbr_mtu_chg,
 	.tfb_pru_options = bbr_pru_options,
 	.tfb_switch_failed = bbr_switch_failed,
-	.tfb_flags = TCP_FUNC_OUTPUT_CANDROP,
+	.tfb_flags = TCP_FUNC_OUTPUT_CANDROP | TCP_FUNC_DEFAULT_OK,
 };
 
 /*
@@ -14149,10 +14191,8 @@ bbr_set_sockopt(struct tcpcb *tp, struct sockopt *sopt)
 	case TCP_BBR_ALGORITHM:
 	case TCP_BBR_TSLIMITS:
 	case TCP_BBR_IWINTSO:
-	case TCP_BBR_RECFORCE:
 	case TCP_BBR_STARTUP_PG:
 	case TCP_BBR_DRAIN_PG:
-	case TCP_BBR_RWND_IS_APP:
 	case TCP_BBR_PROBE_RTT_INT:
 	case TCP_BBR_PROBE_RTT_GAIN:
 	case TCP_BBR_PROBE_RTT_LEN:
@@ -14526,6 +14566,7 @@ bbr_get_sockopt(struct tcpcb *tp, struct sockopt *sopt)
 {
 	struct inpcb *inp = tptoinpcb(tp);
 	struct tcp_bbr *bbr;
+	uint64_t loptval;
 	int32_t error, optval;
 
 	bbr = (struct tcp_bbr *)tp->t_fb_ptr;
@@ -14586,7 +14627,7 @@ bbr_get_sockopt(struct tcpcb *tp, struct sockopt *sopt)
 		optval = bbr->rc_loss_exit;
 		break;
 	case TCP_BBR_USEDEL_RATE:
-		error = EINVAL;
+		loptval = get_filter_value(&bbr->r_ctl.rc_delrate);
 		break;
 	case TCP_BBR_MIN_RTO:
 		optval = bbr->r_ctl.rc_min_rto_ms;
@@ -14670,7 +14711,10 @@ bbr_get_sockopt(struct tcpcb *tp, struct sockopt *sopt)
 		break;
 	}
 	INP_WUNLOCK(inp);
-	error = sooptcopyout(sopt, &optval, sizeof optval);
+	if (sopt->sopt_name == TCP_BBR_USEDEL_RATE)
+		error = sooptcopyout(sopt, &loptval, sizeof loptval);
+	else
+		error = sooptcopyout(sopt, &optval, sizeof optval);
 	return (error);
 }
 
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 669d213e58fb..5280f18dc983 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -40,7 +40,6 @@
 #endif
 #include <sys/lock.h>
 #include <sys/malloc.h>
-#include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>		/* for proc0 declaration */
@@ -193,20 +192,12 @@ static int32_t rack_tlp_use_greater = 1;
 static int32_t rack_reorder_thresh = 2;
 static int32_t rack_reorder_fade = 60000000;	/* 0 - never fade, def 60,000,000
 						 * - 60 seconds */
-static uint16_t rack_policer_rxt_thresh= 0;	/* 499 = 49.9%, 0 is off  */
-static uint8_t rack_policer_avg_thresh = 0; /* 3.2 */
-static uint8_t rack_policer_med_thresh = 0; /* 1 - 16 */
-static uint16_t rack_policer_bucket_reserve = 20; /* How much % is reserved in the bucket */
-static uint64_t rack_pol_min_bw = 125000;	/* 1mbps in Bytes per sec */
-static uint32_t rack_policer_data_thresh = 64000;	/* 64,000 bytes must be sent before we engage */
-static uint32_t rack_policing_do_bw_comp = 1;
 static uint32_t rack_pcm_every_n_rounds = 100;
 static uint32_t rack_pcm_blast = 0;
 static uint32_t rack_pcm_is_enabled = 1;
-static uint8_t rack_req_del_mss = 18;	/* How many segments need to be sent in a recovery episode to do policer_detection */
 static uint8_t rack_ssthresh_rest_rto_rec = 0; /* Do we restore ssthresh when we have rec -> rto -> rec */
 
-static uint32_t rack_gp_gain_req = 1200;		/* Amount percent wise required to gain to record a round has "gaining" */
+static uint32_t rack_gp_gain_req = 1200;		/* Amount percent wise required to gain to record a round as "gaining" */
 static uint32_t rack_rnd_cnt_req = 0x10005;		/* Default number of rounds if we are below rack_gp_gain_req where we exit ss */
 
 
@@ -220,7 +211,6 @@ static uint32_t rack_highest_sack_thresh_seen = 0;
 static uint32_t rack_highest_move_thresh_seen = 0;
 static uint32_t rack_merge_out_sacks_on_attack = 0;
 static int32_t rack_enable_hw_pacing = 0; /* Due to CCSP keep it off by default */
-static int32_t rack_hw_pace_extra_slots = 0;	/* 2 extra MSS time betweens */
 static int32_t rack_hw_rate_caps = 0; /* 1; */
 static int32_t rack_hw_rate_cap_per = 0;	/* 0 -- off  */
 static int32_t rack_hw_rate_min = 0; /* 1500000;*/
@@ -271,7 +261,7 @@ static int32_t rack_enobuf_hw_max = 12000;	/* 12 ms in usecs */
 static int32_t rack_enobuf_hw_min = 10000;	/* 10 ms in usecs */
 static int32_t rack_hw_rwnd_factor = 2;		/* How many max_segs the rwnd must be before we hold off sending */
 static int32_t rack_hw_check_queue = 0;		/* Do we always pre-check queue depth of a hw queue */
-static int32_t rack_full_buffer_discount = 10;
+
 /*
  * Currently regular tcp has a rto_min of 30ms
  * the backoff goes 12 times so that ends up
@@ -364,8 +354,6 @@ static int32_t rack_timely_dec_clear = 6;	/* Do we clear decrement count at a va
 static int32_t rack_timely_max_push_rise = 3;	/* One round of pushing */
 static int32_t rack_timely_max_push_drop = 3;	/* Three round of pushing */
 static int32_t rack_timely_min_segs = 4;	/* 4 segment minimum */
-static int32_t rack_use_max_for_nobackoff = 0;
-static int32_t rack_timely_int_timely_only = 0;	/* do interim timely's only use the timely algo (no b/w changes)? */
 static int32_t rack_timely_no_stopping = 0;
 static int32_t rack_down_raise_thresh = 100;
 static int32_t rack_req_segs = 1;
@@ -392,7 +380,6 @@ counter_u64_t rack_tlp_retran;
 counter_u64_t rack_tlp_retran_bytes;
 counter_u64_t rack_to_tot;
 counter_u64_t rack_hot_alloc;
-counter_u64_t tcp_policer_detected;
 counter_u64_t rack_to_alloc;
 counter_u64_t rack_to_alloc_hard;
 counter_u64_t rack_to_alloc_emerg;
@@ -536,7 +523,7 @@ static int32_t rack_output(struct tcpcb *tp);
 static uint32_t
 rack_proc_sack_blk(struct tcpcb *tp, struct tcp_rack *rack,
     struct sackblk *sack, struct tcpopt *to, struct rack_sendmap **prsm,
-    uint32_t cts, int *no_extra, int *moved_two, uint32_t segsiz);
+    uint32_t cts, uint32_t segsiz);
 static void rack_post_recovery(struct tcpcb *tp, uint32_t th_seq);
 static void rack_remxt_tmr(struct tcpcb *tp);
 static int rack_set_sockopt(struct tcpcb *tp, struct sockopt *sopt);
@@ -558,9 +545,6 @@ rack_do_close_wait(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
     int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos);
 
-static void
-rack_peg_rxt(struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t segsiz);
-
 static int
 rack_do_closing(struct mbuf *m, struct tcphdr *th,
     struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
@@ -633,9 +617,10 @@ rack_swap_beta_values(struct tcp_rack *rack, uint8_t flex8)
 {
 	struct sockopt sopt;
 	struct cc_newreno_opts opt;
-	struct newreno old;
 	struct tcpcb *tp;
-	int error, failed = 0;
+	uint32_t old_beta;
+	uint32_t old_beta_ecn;
+	int error = 0, failed = 0;
 
 	tp = rack->rc_tp;
 	if (tp->t_cc == NULL) {
@@ -663,33 +648,34 @@ rack_swap_beta_values(struct tcp_rack *rack, uint8_t flex8)
 		failed = 3;
 		goto out;
 	}
-	old.beta = opt.val;
+	old_beta = opt.val;
 	opt.name = CC_NEWRENO_BETA_ECN;
 	error = CC_ALGO(tp)->ctl_output(&tp->t_ccv, &sopt, &opt);
 	if (error)  {
 		failed = 4;
 		goto out;
 	}
-	old.beta_ecn = opt.val;
+	old_beta_ecn = opt.val;
 
 	/* Now lets set in the values we have stored */
 	sopt.sopt_dir = SOPT_SET;
 	opt.name = CC_NEWRENO_BETA;
-	opt.val = rack->r_ctl.rc_saved_beta.beta;
+	opt.val = rack->r_ctl.rc_saved_beta;
 	error = CC_ALGO(tp)->ctl_output(&tp->t_ccv, &sopt, &opt);
 	if (error)  {
 		failed = 5;
 		goto out;
 	}
 	opt.name = CC_NEWRENO_BETA_ECN;
-	opt.val = rack->r_ctl.rc_saved_beta.beta_ecn;
+	opt.val = rack->r_ctl.rc_saved_beta_ecn;
 	error = CC_ALGO(tp)->ctl_output(&tp->t_ccv, &sopt, &opt);
 	if (error) {
 		failed = 6;
 		goto out;
 	}
 	/* Save off the values for restoral */
-	memcpy(&rack->r_ctl.rc_saved_beta, &old, sizeof(struct newreno));
+	rack->r_ctl.rc_saved_beta = old_beta;
+	rack->r_ctl.rc_saved_beta_ecn = old_beta_ecn;
 out:
 	if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
 		union tcp_log_stackspecific log;
@@ -697,13 +683,13 @@ out:
 		struct newreno *ptr;
 
 		ptr = ((struct newreno *)tp->t_ccv.cc_data);
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
 		log.u_bbr.flex1 = ptr->beta;
 		log.u_bbr.flex2 = ptr->beta_ecn;
 		log.u_bbr.flex3 = ptr->newreno_flags;
-		log.u_bbr.flex4 = rack->r_ctl.rc_saved_beta.beta;
-		log.u_bbr.flex5 = rack->r_ctl.rc_saved_beta.beta_ecn;
+		log.u_bbr.flex4 = rack->r_ctl.rc_saved_beta;
+		log.u_bbr.flex5 = rack->r_ctl.rc_saved_beta_ecn;
 		log.u_bbr.flex6 = failed;
 		log.u_bbr.flex7 = rack->gp_ready;
 		log.u_bbr.flex7 <<= 1;
@@ -898,7 +884,6 @@ rack_init_sysctls(void)
 	struct sysctl_oid *rack_measure;
 	struct sysctl_oid *rack_probertt;
 	struct sysctl_oid *rack_hw_pacing;
-	struct sysctl_oid *rack_policing;
 
 	rack_attack = SYSCTL_ADD_NODE(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
@@ -952,7 +937,7 @@ rack_init_sysctls(void)
 	SYSCTL_ADD_U32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_probertt),
 	    OID_AUTO, "time_between", CTLFLAG_RW,
-	    & rack_time_between_probertt, 96000000,
+	    &rack_time_between_probertt, 96000000,
 	    "How many useconds between the lowest rtt falling must past before we enter probertt");
 	SYSCTL_ADD_U32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_probertt),
@@ -1068,11 +1053,6 @@ rack_init_sysctls(void)
 	    "Do we not use timely in DGP?");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_pacing),
-	    OID_AUTO, "fullbufdisc", CTLFLAG_RW,
-	    &rack_full_buffer_discount, 10,
-	    "What percentage b/w reduction over the GP estimate for a full buffer (default=0 off)?");
-	SYSCTL_ADD_S32(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_pacing),
 	    OID_AUTO, "fillcw", CTLFLAG_RW,
 	    &rack_fill_cw_state, 0,
 	    "Enable fillcw on new connections (default=0 off)?");
@@ -1213,11 +1193,6 @@ rack_init_sysctls(void)
 	    OID_AUTO, "up_only", CTLFLAG_RW,
 	    &rack_hw_up_only, 0,
 	    "Do we allow hw pacing to lower the rate selected?");
-	SYSCTL_ADD_S32(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_hw_pacing),
-	    OID_AUTO, "extra_mss_precise", CTLFLAG_RW,
-	    &rack_hw_pace_extra_slots, 0,
-	    "If the rates between software and hardware match precisely how many extra time_betweens do we get?");
 	rack_timely = SYSCTL_ADD_NODE(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
 	    OID_AUTO,
@@ -1313,16 +1288,6 @@ rack_init_sysctls(void)
 	    "Rack timely when setting the cwnd what is the min num segments");
 	SYSCTL_ADD_S32(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_timely),
-	    OID_AUTO, "noback_max", CTLFLAG_RW,
-	    &rack_use_max_for_nobackoff, 0,
-	    "Rack timely when deciding if to backoff on a loss, do we use under max rtt else min");
-	SYSCTL_ADD_S32(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_timely),
-	    OID_AUTO, "interim_timely_only", CTLFLAG_RW,
-	    &rack_timely_int_timely_only, 0,
-	    "Rack timely when doing interim timely's do we only do timely (no b/w consideration)");
-	SYSCTL_ADD_S32(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_timely),
 	    OID_AUTO, "nonstop", CTLFLAG_RW,
 	    &rack_timely_no_stopping, 0,
 	    "Rack timely don't stop increase");
@@ -1551,53 +1516,6 @@ rack_init_sysctls(void)
 	    OID_AUTO, "hystartplusplus", CTLFLAG_RW,
 	    &rack_do_hystart, 0,
 	    "Should RACK enable HyStart++ on connections?");
-	/* Policer detection */
-	rack_policing = SYSCTL_ADD_NODE(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_sysctl_root),
-	    OID_AUTO,
-	    "policing",
-	    CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
-	    "policer detection");
-	SYSCTL_ADD_U16(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_policing),
-	    OID_AUTO, "rxt_thresh", CTLFLAG_RW,
-	    &rack_policer_rxt_thresh, 0,
-	   "Percentage of retransmits we need to be a possible policer (499 = 49.9 percent)");
-	SYSCTL_ADD_U8(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_policing),
-	    OID_AUTO, "avg_thresh", CTLFLAG_RW,
-	    &rack_policer_avg_thresh, 0,
-	    "What threshold of average retransmits needed to recover a lost packet (1 - 169 aka 21 = 2.1)?");
-	SYSCTL_ADD_U8(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_policing),
-	    OID_AUTO, "med_thresh", CTLFLAG_RW,
-	    &rack_policer_med_thresh, 0,
-	    "What threshold of Median retransmits needed to recover a lost packet (1 - 16)?");
-	SYSCTL_ADD_U32(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_policing),
-	    OID_AUTO, "data_thresh", CTLFLAG_RW,
-	    &rack_policer_data_thresh, 64000,
-	    "How many bytes must have gotten through before we can start doing policer detection?");
-	SYSCTL_ADD_U32(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_policing),
-	    OID_AUTO, "bwcomp", CTLFLAG_RW,
-	    &rack_policing_do_bw_comp, 1,
-	    "Do we raise up low b/w so that at least pace_max_seg can be sent in the srtt?");
-	SYSCTL_ADD_U8(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_policing),
-	    OID_AUTO, "recmss", CTLFLAG_RW,
-	    &rack_req_del_mss, 18,
-	    "How many MSS must be delivered during recovery to engage policer detection?");
-	SYSCTL_ADD_U16(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_policing),
-	    OID_AUTO, "res_div", CTLFLAG_RW,
-	    &rack_policer_bucket_reserve, 20,
-	    "What percentage is reserved in the policer bucket?");
-	SYSCTL_ADD_U64(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_policing),
-	    OID_AUTO, "min_comp_bw", CTLFLAG_RW,
-	    &rack_pol_min_bw, 125000,
-	    "Do we have a min b/w for b/w compensation (0 = no)?");
 	/* Misc rack controls */
 	rack_misc = SYSCTL_ADD_NODE(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_sysctl_root),
@@ -1880,13 +1798,6 @@ rack_init_sysctls(void)
 	    OID_AUTO, "alloc_hot", CTLFLAG_RD,
 	    &rack_hot_alloc,
 	    "Total allocations from the top of our list");
-	tcp_policer_detected = counter_u64_alloc(M_WAITOK);
-	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
-	    SYSCTL_CHILDREN(rack_counters),
-	    OID_AUTO, "policer_detected", CTLFLAG_RD,
-	    &tcp_policer_detected,
-	    "Total policer_detections");
-
 	rack_to_alloc = counter_u64_alloc(M_WAITOK);
 	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
 	    SYSCTL_CHILDREN(rack_counters),
@@ -2643,6 +2554,7 @@ rack_log_hdwr_pacing(struct tcp_rack *rack,
 		union tcp_log_stackspecific log;
 		struct timeval tv;
 		const struct ifnet *ifp;
+		uint64_t ifp64;
 
 		memset(&log, 0, sizeof(log));
 		log.u_bbr.flex1 = ((hw_rate >> 32) & 0x00000000ffffffff);
@@ -2655,8 +2567,9 @@ rack_log_hdwr_pacing(struct tcp_rack *rack,
 		} else
 			ifp = NULL;
 		if (ifp) {
-			log.u_bbr.flex3 = (((uint64_t)ifp  >> 32) & 0x00000000ffffffff);
-			log.u_bbr.flex4 = ((uint64_t)ifp & 0x00000000ffffffff);
+			ifp64 = (uintptr_t)ifp;
+			log.u_bbr.flex3 = ((ifp64  >> 32) & 0x00000000ffffffff);
+			log.u_bbr.flex4 = (ifp64 & 0x00000000ffffffff);
 		}
 		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
 		log.u_bbr.bw_inuse = rate;
@@ -2752,8 +2665,6 @@ rack_log_retran_reason(struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t
 		union tcp_log_stackspecific log;
 		struct timeval tv;
 
-		if (rack->sack_attack_disable > 0)
-			goto log_anyway;
 		if ((mod != 1) && (rack_verbose_logging == 0))  {
 			/*
 			 * We get 3 values currently for mod
@@ -2766,8 +2677,7 @@ rack_log_retran_reason(struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t
 			 */
 			return;
 		}
-log_anyway:
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.flex1 = tsused;
 		log.u_bbr.flex2 = thresh;
 		log.u_bbr.flex3 = rsm->r_flags;
@@ -2798,7 +2708,7 @@ rack_log_to_start(struct tcp_rack *rack, uint32_t cts, uint32_t to, int32_t slot
 		union tcp_log_stackspecific log;
 		struct timeval tv;
 
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.flex1 = rack->rc_tp->t_srtt;
 		log.u_bbr.flex2 = to;
 		log.u_bbr.flex3 = rack->r_ctl.rc_hpts_flags;
@@ -2841,7 +2751,7 @@ rack_log_to_event(struct tcp_rack *rack, int32_t to_num, struct rack_sendmap *rs
 		union tcp_log_stackspecific log;
 		struct timeval tv;
 
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
 		log.u_bbr.flex8 = to_num;
 		log.u_bbr.flex1 = rack->r_ctl.rc_rack_min_rtt;
@@ -2881,12 +2791,12 @@ rack_log_map_chg(struct tcpcb *tp, struct tcp_rack *rack,
 		union tcp_log_stackspecific log;
 		struct timeval tv;
 
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.flex8 = flag;
 		log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
-		log.u_bbr.cur_del_rate = (uint64_t)prev;
-		log.u_bbr.delRate = (uint64_t)rsm;
-		log.u_bbr.rttProp = (uint64_t)next;
+		log.u_bbr.cur_del_rate = (uintptr_t)prev;
+		log.u_bbr.delRate = (uintptr_t)rsm;
+		log.u_bbr.rttProp = (uintptr_t)next;
 		log.u_bbr.flex7 = 0;
 		if (prev) {
 			log.u_bbr.flex1 = prev->r_start;
@@ -2929,7 +2839,7 @@ rack_log_rtt_upd(struct tcpcb *tp, struct tcp_rack *rack, uint32_t t, uint32_t l
 	if (tcp_bblogging_on(tp)) {
 		union tcp_log_stackspecific log;
 		struct timeval tv;
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
 		log.u_bbr.flex1 = t;
 		log.u_bbr.flex2 = len;
@@ -3007,13 +2917,8 @@ rack_log_rtt_sample(struct tcp_rack *rack, uint32_t rtt)
 		/* Convert our ms to a microsecond */
 		memset(&log, 0, sizeof(log));
 		log.u_bbr.flex1 = rtt;
-		log.u_bbr.flex2 = rack->r_ctl.ack_count;
-		log.u_bbr.flex3 = rack->r_ctl.sack_count;
-		log.u_bbr.flex4 = rack->r_ctl.sack_noextra_move;
-		log.u_bbr.flex5 = rack->r_ctl.sack_moved_extra;
 		log.u_bbr.flex6 = rack->rc_tp->t_rxtcur;
 		log.u_bbr.flex7 = 1;
-		log.u_bbr.flex8 = rack->sack_attack_disable;
 		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
 		log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
 		log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
@@ -3107,7 +3012,7 @@ rack_log_progress_event(struct tcp_rack *rack, struct tcpcb *tp, uint32_t tick,
 		union tcp_log_stackspecific log;
 		struct timeval tv;
 
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
 		log.u_bbr.flex1 = line;
 		log.u_bbr.flex2 = tick;
@@ -3136,7 +3041,7 @@ rack_log_type_bbrsnd(struct tcp_rack *rack, uint32_t len, uint32_t slot, uint32_
 	if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
 		union tcp_log_stackspecific log;
 
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
 		log.u_bbr.flex1 = slot;
 		if (rack->rack_no_prr)
@@ -3144,7 +3049,6 @@ rack_log_type_bbrsnd(struct tcp_rack *rack, uint32_t len, uint32_t slot, uint32_
 		else
 			log.u_bbr.flex2 = rack->r_ctl.rc_prr_sndcnt;
 		log.u_bbr.flex4 = rack->r_ctl.rc_hpts_flags;
-		log.u_bbr.flex5 = rack->r_ctl.ack_during_sd;
 		log.u_bbr.flex6 = line;
 		log.u_bbr.flex7 = (0x0000ffff & rack->r_ctl.rc_hpts_flags);
 		log.u_bbr.flex8 = rack->rc_in_persist;
@@ -3244,7 +3148,7 @@ rack_log_type_just_return(struct tcp_rack *rack, uint32_t cts, uint32_t tlen, ui
 		union tcp_log_stackspecific log;
 		struct timeval tv;
 
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
 		log.u_bbr.flex1 = slot;
 		log.u_bbr.flex2 = rack->r_ctl.rc_hpts_flags;
@@ -3280,7 +3184,7 @@ rack_log_to_cancel(struct tcp_rack *rack, int32_t hpts_removed, int line, uint32
 	if (tcp_bblogging_on(rack->rc_tp)) {
 		union tcp_log_stackspecific log;
 
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
 		log.u_bbr.flex1 = line;
 		log.u_bbr.flex2 = rack->r_ctl.rc_last_output_to;
@@ -3325,7 +3229,7 @@ rack_log_alt_to_to_cancel(struct tcp_rack *rack,
 			/* No you can't use 1, its for the real to cancel */
 			return;
 		}
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
 		log.u_bbr.flex1 = flex1;
 		log.u_bbr.flex2 = flex2;
@@ -3350,7 +3254,7 @@ rack_log_to_processing(struct tcp_rack *rack, uint32_t cts, int32_t ret, int32_t
 		union tcp_log_stackspecific log;
 		struct timeval tv;
 
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.flex1 = timers;
 		log.u_bbr.flex2 = ret;
 		log.u_bbr.flex3 = rack->r_ctl.rc_timer_exp;
@@ -3380,7 +3284,7 @@ rack_log_to_prr(struct tcp_rack *rack, int frm, int orig_cwnd, int line)
 		union tcp_log_stackspecific log;
 		struct timeval tv;
 
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.flex1 = rack->r_ctl.rc_prr_out;
 		log.u_bbr.flex2 = rack->r_ctl.rc_prr_recovery_fs;
 		if (rack->rack_no_prr)
@@ -3406,40 +3310,6 @@ rack_log_to_prr(struct tcp_rack *rack, int frm, int orig_cwnd, int line)
 	}
 }
 
-#ifdef TCP_SAD_DETECTION
-static void
-rack_log_sad(struct tcp_rack *rack, int event)
-{
-	if (tcp_bblogging_on(rack->rc_tp)) {
-		union tcp_log_stackspecific log;
-		struct timeval tv;
-
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
-		log.u_bbr.flex1 = rack->r_ctl.sack_count;
-		log.u_bbr.flex2 = rack->r_ctl.ack_count;
-		log.u_bbr.flex3 = rack->r_ctl.sack_moved_extra;
-		log.u_bbr.flex4 = rack->r_ctl.sack_noextra_move;
-		log.u_bbr.flex5 = rack->r_ctl.rc_num_maps_alloced;
-		log.u_bbr.flex6 = tcp_sack_to_ack_thresh;
-		log.u_bbr.pkts_out = tcp_sack_to_move_thresh;
-		log.u_bbr.lt_epoch = (tcp_force_detection << 8);
-		log.u_bbr.lt_epoch |= rack->do_detection;
-		log.u_bbr.applimited = tcp_map_minimum;
-		log.u_bbr.flex7 = rack->sack_attack_disable;
-		log.u_bbr.flex8 = event;
-		log.u_bbr.bbr_state = rack->rc_suspicious;
-		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
-		log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
-		log.u_bbr.delivered = tcp_sad_decay_val;
-		TCP_LOG_EVENTP(rack->rc_tp, NULL,
-		    &rack->rc_inp->inp_socket->so_rcv,
-		    &rack->rc_inp->inp_socket->so_snd,
-		    TCP_SAD_DETECT, 0,
-		    0, &log, false, &tv);
-	}
-}
-#endif
-
 static void
 rack_counter_destroy(void)
 {
@@ -3470,7 +3340,6 @@ rack_counter_destroy(void)
 	counter_u64_free(rack_saw_enobuf_hw);
 	counter_u64_free(rack_saw_enetunreach);
 	counter_u64_free(rack_hot_alloc);
-	counter_u64_free(tcp_policer_detected);
 	counter_u64_free(rack_to_alloc);
 	counter_u64_free(rack_to_alloc_hard);
 	counter_u64_free(rack_to_alloc_emerg);
@@ -3549,7 +3418,6 @@ static struct rack_sendmap *
 rack_alloc_full_limit(struct tcp_rack *rack)
 {
 	if ((V_tcp_map_entries_limit > 0) &&
-	    (rack->do_detection == 0) &&
 	    (rack->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
 		counter_u64_add(rack_to_alloc_limited, 1);
 		if (!rack->alloc_limit_reported) {
@@ -3570,7 +3438,6 @@ rack_alloc_limit(struct tcp_rack *rack, uint8_t limit_type)
 	if (limit_type) {
 		/* currently there is only one limit type */
 		if (rack->r_ctl.rc_split_limit > 0 &&
-		    (rack->do_detection == 0) &&
 		    rack->r_ctl.rc_num_split_allocs >= rack->r_ctl.rc_split_limit) {
 			counter_u64_add(rack_split_limited, 1);
 			if (!rack->alloc_limit_reported) {
@@ -3578,17 +3445,6 @@ rack_alloc_limit(struct tcp_rack *rack, uint8_t limit_type)
 				counter_u64_add(rack_alloc_limited_conns, 1);
 			}
 			return (NULL);
-#ifdef TCP_SAD_DETECTION
-		} else if ((tcp_sad_limit != 0) &&
-			   (rack->do_detection == 1) &&
-			   (rack->r_ctl.rc_num_split_allocs >= tcp_sad_limit)) {
-			counter_u64_add(rack_split_limited, 1);
-			if (!rack->alloc_limit_reported) {
-				rack->alloc_limit_reported = 1;
-				counter_u64_add(rack_alloc_limited_conns, 1);
-			}
-			return (NULL);
-#endif
 		}
 	}
 
@@ -3623,16 +3479,16 @@ static void
 rack_free(struct tcp_rack *rack, struct rack_sendmap *rsm)
 {
 	if (rsm->r_flags & RACK_APP_LIMITED) {
-		if (rack->r_ctl.rc_app_limited_cnt > 0) {
-			rack->r_ctl.rc_app_limited_cnt--;
-		}
+		KASSERT((rack->r_ctl.rc_app_limited_cnt > 0),
+		    ("app_cnt %u, rsm %p", rack->r_ctl.rc_app_limited_cnt, rsm));
+		rack->r_ctl.rc_app_limited_cnt--;
 	}
 	if (rsm->r_limit_type) {
 		/* currently there is only one limit type */
 		rack->r_ctl.rc_num_split_allocs--;
 	}
 	if (rsm == rack->r_ctl.rc_first_appl) {
-		rack->r_ctl.cleared_app_ack_seq = rsm->r_start + (rsm->r_end - rsm->r_start);
+		rack->r_ctl.cleared_app_ack_seq = rsm->r_end;
 		rack->r_ctl.cleared_app_ack = 1;
 		if (rack->r_ctl.rc_app_limited_cnt == 0)
 			rack->r_ctl.rc_first_appl = NULL;
@@ -3697,8 +3553,7 @@ rack_get_measure_window(struct tcpcb *tp, struct tcp_rack *rack)
 	 * earlier.
 	 *
 	 * So lets calculate the BDP with the "known" b/w using
-	 * the SRTT has our rtt and then multiply it by the
-	 * goal.
+	 * the SRTT as our rtt and then multiply it by the goal.
 	 */
 	bw = rack_get_bw(rack);
 	srtt = (uint64_t)tp->t_srtt;
@@ -4261,7 +4116,7 @@ rack_log_rtt_shrinks(struct tcp_rack *rack, uint32_t us_cts,
 		union tcp_log_stackspecific log;
 		struct timeval tv;
 
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.flex1 = line;
 		log.u_bbr.flex2 = rack->r_ctl.rc_time_probertt_starts;
 		log.u_bbr.flex3 = rack->r_ctl.rc_lower_rtt_us_cts;
@@ -5007,7 +4862,7 @@ rack_log_gp_calc(struct tcp_rack *rack, uint32_t add_part, uint32_t sub_part, ui
 		union tcp_log_stackspecific log;
 		struct timeval tv;
 
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
 		log.u_bbr.flex1 = add_part;
 		log.u_bbr.flex2 = sub_part;
@@ -5357,7 +5212,7 @@ rack_do_goodput_measurement(struct tcpcb *tp, struct tcp_rack *rack,
 			union tcp_log_stackspecific log;
 			struct timeval tv;
 
-			memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+			memset(&log, 0, sizeof(log));
 			log.u_bbr.timeStamp = tcp_get_usecs(&tv);
 			log.u_bbr.flex1 = rack->r_ctl.current_round;
 			log.u_bbr.flex2 = rack->r_ctl.last_rnd_of_gp_rise;
@@ -5393,7 +5248,7 @@ rack_do_goodput_measurement(struct tcpcb *tp, struct tcp_rack *rack,
 					union tcp_log_stackspecific log;
 					struct timeval tv;
 
-					memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+					memset(&log, 0, sizeof(log));
 					log.u_bbr.timeStamp = tcp_get_usecs(&tv);
 					log.u_bbr.flex1 = rack->r_ctl.current_round;
 					log.u_bbr.flex2 = (uint32_t)gp_est;
@@ -5583,7 +5438,7 @@ skip_measurement:
 		rack_log_pacing_delay_calc(rack,
 					   tp->gput_seq,
 					   tp->gput_ack,
-					   (uint64_t)rsm,
+					   (uintptr_t)rsm,
 					   tp->gput_ts,
 					   (((uint64_t)rack->r_ctl.rc_app_limited_cnt << 32) | (uint64_t)rack->r_ctl.rc_gp_output_ts),
 					   9,
@@ -5676,7 +5531,7 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, uint32_t th_ack, uint
 		union tcp_log_stackspecific log;
 		struct timeval tv;
 
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
 		log.u_bbr.flex1 = th_ack;
 		log.u_bbr.flex2 = tp->t_ccv.flags;
@@ -5756,459 +5611,12 @@ tcp_rack_partialack(struct tcpcb *tp)
 		rack->r_wanted_output = 1;
 }
 
-static inline uint64_t
-rack_get_rxt_per(uint64_t snds,  uint64_t rxts)
-{
-	uint64_t rxt_per;
-
-	if (snds > 0) {
-		rxt_per = rxts * 1000;
-		rxt_per /= snds;
-	} else {
-		/* This is an unlikely path */
-		if (rxts) {
-			/* Its the max it was all re-transmits */
-			rxt_per = 0xffffffffffffffff;
-		} else {
-			rxt_per = 0;
-		}
-	}
-	return (rxt_per);
-}
-
-static void
-policer_detection_log(struct tcp_rack *rack, uint32_t flex1, uint32_t flex2, uint32_t flex3, uint32_t flex4, uint8_t flex8)
-{
-	if (tcp_bblogging_on(rack->rc_tp)) {
-		union tcp_log_stackspecific log;
-		struct timeval tv;
-
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
-		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
-		log.u_bbr.flex1 = flex1;
-		log.u_bbr.flex2 = flex2;
-		log.u_bbr.flex3 = flex3;
-		log.u_bbr.flex4 = flex4;
-		log.u_bbr.flex5 = rack->r_ctl.current_policer_bucket;
-		log.u_bbr.flex6 = rack->r_ctl.policer_bucket_size;
-		log.u_bbr.flex7 = 0;
-		log.u_bbr.flex8 = flex8;
-		log.u_bbr.bw_inuse = rack->r_ctl.policer_bw;
-		log.u_bbr.applimited = rack->r_ctl.current_round;
-		log.u_bbr.epoch = rack->r_ctl.policer_max_seg;
-		log.u_bbr.delivered = (uint32_t)rack->r_ctl.bytes_acked_in_recovery;
-		log.u_bbr.cur_del_rate = rack->rc_tp->t_sndbytes;
-		log.u_bbr.delRate = rack->rc_tp->t_snd_rxt_bytes;
-		log.u_bbr.rttProp = rack->r_ctl.gp_bw;
-		log.u_bbr.bbr_state = rack->rc_policer_detected;
-		log.u_bbr.bbr_substate = 0;
-		log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
-		log.u_bbr.use_lt_bw = rack->policer_detect_on;
-		log.u_bbr.lt_epoch = 0;
-		log.u_bbr.pkts_out = 0;
-		tcp_log_event(rack->rc_tp, NULL, NULL, NULL, TCP_POLICER_DET, 0,
-			      0, &log, false, NULL, NULL, 0, &tv);
-	}
-
-}
-
-static void
-policer_detection(struct tcpcb *tp, struct tcp_rack *rack, int post_recovery)
-{
-	/*
-	 * Rack excess rxt accounting is turned on. If we
-	 * are above a threshold of rxt's in at least N
-	 * rounds, then back off the cwnd and ssthresh
-	 * to fit into the long-term b/w.
-	 */
-
-	uint32_t pkts, mid, med, alt_med, avg, segsiz, tot_retran_pkt_count = 0;
-	uint32_t cnt_of_mape_rxt = 0;
-	uint64_t snds, rxts, rxt_per, tim, del, del_bw;
-	int i;
-	struct timeval tv;
-
-
-	/*
-	 * First is there enough packets delivered during recovery to make
-	 * a determiniation of b/w?
-	 */
-	segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
-	if ((rack->rc_policer_detected == 0) &&
-	    (rack->r_ctl.policer_del_mss > 0) &&
-	    ((uint32_t)rack->r_ctl.policer_del_mss > ((rack->r_ctl.bytes_acked_in_recovery + segsiz - 1)/segsiz))) {
-		/*
-		 * Not enough data sent in recovery for initial detection. Once
-		 * we have deteced a policer we allow less than the threshold (polcer_del_mss)
-		 * amount of data in a recovery to let us fall through and double check
-		 * our policer settings and possibly expand or collapse the bucket size and
-		 * the polcier b/w.
-		 *
-		 * Once you are declared to be policed. this block of code cannot be
-		 * reached, instead blocks further down will re-check the policer detection
-		 * triggers and possibly reset the measurements if somehow we have let the
-		 * policer bucket size grow too large.
-		 */
-		if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
-			policer_detection_log(rack, rack->r_ctl.policer_del_mss,
-					      ((rack->r_ctl.bytes_acked_in_recovery + segsiz - 1)/segsiz),
-					      rack->r_ctl.bytes_acked_in_recovery, segsiz, 18);
-		}
-		return;
-	}
-	tcp_get_usecs(&tv);
-	tim = tcp_tv_to_lusectick(&tv) - rack->r_ctl.time_entered_recovery;
-	del = rack->r_ctl.bytes_acked_in_recovery;
-	if (tim > 0)
-		del_bw = (del * (uint64_t)1000000) / tim;
-	else
-		del_bw = 0;
-	/* B/W compensation? */
-
-	if (rack->r_ctl.pol_bw_comp && ((rack->r_ctl.policer_bw > 0) ||
-					(del_bw > 0))) {
-		/*
-		 * Sanity check now that the data is in. How long does it
-		 * take for us to pace out two of our policer_max_seg's?
-		 *
-		 * If it is longer than the RTT then we are set
-		 * too slow, maybe because of not enough data
-		 * sent during recovery.
-		 */
-		uint64_t lentime, res, srtt, max_delbw, alt_bw;
-
-		srtt = (uint64_t)rack_grab_rtt(tp, rack);
-		if ((tp->t_srtt > 0) && (srtt > tp->t_srtt))
-			srtt = tp->t_srtt;
-		lentime = rack->r_ctl.policer_max_seg * (uint64_t)HPTS_USEC_IN_SEC * 2;
-		if (del_bw > rack->r_ctl.policer_bw) {
-			max_delbw = del_bw;
-		} else {
-			max_delbw = rack->r_ctl.policer_bw;
-		}
-		res = lentime / max_delbw;
-		if ((srtt > 0) && (res > srtt)) {
-			/*
-			 * At this rate we can not get two policer_maxsegs
-			 * out before the ack arrives back.
-			 *
-			 * Lets at least get it raised up so that
-			 * we can be a bit faster than that if possible.
-			 */
-			lentime = (rack->r_ctl.policer_max_seg * 2);
-			tim = srtt;
-			alt_bw = (lentime * (uint64_t)HPTS_USEC_IN_SEC) / tim;
-			if (alt_bw > max_delbw) {
-				uint64_t cap_alt_bw;
-
-				cap_alt_bw = (max_delbw + (max_delbw * rack->r_ctl.pol_bw_comp));
-				if ((rack_pol_min_bw > 0) && (cap_alt_bw < rack_pol_min_bw)) {
-					/* We place a min on the cap which defaults to 1Mbps */
-					cap_alt_bw = rack_pol_min_bw;
-				}
-				if (alt_bw <= cap_alt_bw) {
-					/* It should be */
-					del_bw = alt_bw;
-					policer_detection_log(rack,
-							      (uint32_t)tim,
-							      rack->r_ctl.policer_max_seg,
-							      0,
-							      0,
-							      16);
-				} else {
-					/*
-					 * This is an odd case where likely the RTT is very very
-					 * low. And yet it is still being policed. We don't want
-					 * to get more than (rack_policing_do_bw_comp+1) x del-rate
-					 * where del-rate is what we got in recovery for either the
-					 * first Policer Detection(PD) or this PD we are on now.
-					 */
-					del_bw = cap_alt_bw;
-					policer_detection_log(rack,
-							      (uint32_t)tim,
-							      rack->r_ctl.policer_max_seg,
-							      (uint32_t)max_delbw,
-							      (rack->r_ctl.pol_bw_comp + 1),
-							      16);
-				}
-			}
-		}
-	}
-	snds = tp->t_sndbytes - rack->r_ctl.last_policer_sndbytes;
-	rxts = tp->t_snd_rxt_bytes - rack->r_ctl.last_policer_snd_rxt_bytes;
-	rxt_per = rack_get_rxt_per(snds,  rxts);
-	/* Figure up the average  and median */
-	for(i = 0; i < RETRAN_CNT_SIZE; i++) {
-		if (rack->r_ctl.rc_cnt_of_retran[i] > 0) {
-			tot_retran_pkt_count += (i + 1) * rack->r_ctl.rc_cnt_of_retran[i];
-			cnt_of_mape_rxt  += rack->r_ctl.rc_cnt_of_retran[i];
-		}
-	}
-	if (cnt_of_mape_rxt)
-		avg = (tot_retran_pkt_count * 10)/cnt_of_mape_rxt;
-	else
-		avg = 0;
-	alt_med = med = 0;
-	mid = tot_retran_pkt_count/2;
-	for(i = 0; i < RETRAN_CNT_SIZE; i++) {
-		pkts = (i + 1) * rack->r_ctl.rc_cnt_of_retran[i];
-		if (mid > pkts) {
-			mid -= pkts;
-			continue;
-		}
-		med = (i + 1);
-		break;
-	}
-	mid = cnt_of_mape_rxt / 2;
-	for(i = 0; i < RETRAN_CNT_SIZE; i++) {
-		if (mid > rack->r_ctl.rc_cnt_of_retran[i]) {
-			mid -= rack->r_ctl.rc_cnt_of_retran[i];
-			continue;
-		}
-		alt_med = (i + 1);
-		break;
-	}
-	if (rack->r_ctl.policer_alt_median) {
-		/* Swap the medians */
-		uint32_t swap;
-
-		swap = med;
-		med = alt_med;
-		alt_med = swap;
-	}
-	if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
-		union tcp_log_stackspecific log;
-		struct timeval tv;
-
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
-		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
-		log.u_bbr.flex1 = avg;
-		log.u_bbr.flex2 = med;
-		log.u_bbr.flex3 = (uint32_t)rxt_per;
-		log.u_bbr.flex4 = rack->r_ctl.policer_avg_threshold;
-		log.u_bbr.flex5 = rack->r_ctl.policer_med_threshold;
-		log.u_bbr.flex6 = rack->r_ctl.policer_rxt_threshold;
-		log.u_bbr.flex7 = rack->r_ctl.policer_alt_median;
-		log.u_bbr.flex8 = 1;
-		log.u_bbr.delivered = rack->r_ctl.policer_bucket_size;
-		log.u_bbr.applimited = rack->r_ctl.current_round;
-		log.u_bbr.epoch = rack->r_ctl.policer_max_seg;
-		log.u_bbr.bw_inuse = del_bw;
-		log.u_bbr.cur_del_rate = rxts;
-		log.u_bbr.delRate = snds;
-		log.u_bbr.rttProp = rack->r_ctl.gp_bw;
-		log.u_bbr.bbr_state = rack->rc_policer_detected;
-		log.u_bbr.bbr_substate = 0;
-		log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
-		log.u_bbr.use_lt_bw = rack->policer_detect_on;
-		log.u_bbr.lt_epoch = (uint32_t)tim;
-		log.u_bbr.pkts_out = rack->r_ctl.bytes_acked_in_recovery;
-		tcp_log_event(tp, NULL, NULL, NULL, TCP_POLICER_DET, 0,
-			      0, &log, false, NULL, NULL, 0, &tv);
-	}
-	if (med == RETRAN_CNT_SIZE) {
-		/*
-		 * If the median is the maximum, then what we
-		 * likely have here is a network breakage. Either that
-		 * or we are so unlucky that all of our traffic is being
-		 * dropped and having to be retransmitted the maximum times
-		 * and this just is not how a policer works.
-		 *
-		 * If it is truely a policer eventually we will come
-		 * through and it won't be the maximum.
-		 */
-		return;
-	}
-	/* Has enough rounds progressed for us to re-measure? */
-	if ((rxt_per >= (uint64_t)rack->r_ctl.policer_rxt_threshold) &&
-	    (avg >= rack->r_ctl.policer_avg_threshold) &&
-	    (med >= rack->r_ctl.policer_med_threshold)) {
-		/*
-		 * We hit all thresholds that indicate we are
-		 * being policed. Now we may be doing this from a rack timeout
-		 * which then means the rest of recovery will hopefully go
-		 * smoother as we pace. At the end of recovery we will
-		 * fall back in here and reset the values using the
-		 * results of the entire recovery episode (we could also
-		 * hit this as we exit recovery as well which means only
-		 * one time in here).
-		 *
-		 * This is done explicitly that if we hit the thresholds
-		 * again in a second recovery we overwrite the values. We do
-		 * that because over time, as we pace the policer_bucket_size may
-		 * continue to grow. This then provides more and more times when
-		 * we are not pacing to the policer rate. This lets us compensate
-		 * for when we hit a false positive and those flows continue to
-		 * increase. However if its a real policer we will then get over its
-		 * limit, over time, again and thus end up back here hitting the
-		 * thresholds again.
-		 *
-		 * The alternative to this is to instead whenever we pace due to
-		 * policing in rack_policed_sending we could add the amount len paced to the
-		 * idle_snd_una value (which decreases the amount in last_amount_before_rec
-		 * since that is always [th_ack - idle_snd_una]). This would then prevent
-		 * the polcier_bucket_size from growing in additional recovery episodes
-		 * Which would then mean false  postives would be pretty much stuck
-		 * after things got back to normal (assuming that what caused the
-		 * false positive was a small network outage).
-		 *
-		 */
-		tcp_trace_point(rack->rc_tp, TCP_TP_POLICER_DET);
-		if (rack->rc_policer_detected == 0) {
-			/*
-			 * Increment the stat that tells us we identified
-			 * a policer only once. Note that if we ever allow
-			 * the flag to be cleared (reverted) then we need
-			 * to adjust this to not do multi-counting.
-			 */
-			counter_u64_add(tcp_policer_detected, 1);
-		}
-		rack->r_ctl.last_policer_sndbytes = tp->t_sndbytes;
-		rack->r_ctl.last_policer_snd_rxt_bytes = tp->t_snd_rxt_bytes;
-		rack->r_ctl.policer_bw = del_bw;
-		rack->r_ctl.policer_max_seg = tcp_get_pacing_burst_size_w_divisor(rack->rc_tp,
-										  rack->r_ctl.policer_bw,
-										  min(ctf_fixed_maxseg(rack->rc_tp),
-										      rack->r_ctl.rc_pace_min_segs),
-										  0, NULL,
-										  NULL, rack->r_ctl.pace_len_divisor);
-		/* Now what about the policer bucket size */
-		rack->r_ctl.policer_bucket_size = rack->r_ctl.last_amount_before_rec;
-		if (rack->r_ctl.policer_bucket_size < rack->r_ctl.policer_max_seg) {
-			/* We must be able to send our max-seg or else chaos ensues */
-			rack->r_ctl.policer_bucket_size = rack->r_ctl.policer_max_seg * 2;
-		}
-		if (rack->rc_policer_detected == 0)
-			rack->r_ctl.current_policer_bucket = 0;
-		if (tcp_bblogging_on(rack->rc_tp)) {
-			union tcp_log_stackspecific log;
-			struct timeval tv;
-
-			memset(&log.u_bbr, 0, sizeof(log.u_bbr));
-			log.u_bbr.timeStamp = tcp_get_usecs(&tv);
-			log.u_bbr.flex1 = avg;
-			log.u_bbr.flex2 = med;
-			log.u_bbr.flex3 = rxt_per;
-			log.u_bbr.flex4 = rack->r_ctl.policer_avg_threshold;
-			log.u_bbr.flex5 = rack->r_ctl.policer_med_threshold;
-			log.u_bbr.flex6 = rack->r_ctl.policer_rxt_threshold;
-			log.u_bbr.flex7 = rack->r_ctl.policer_alt_median;
-			log.u_bbr.flex8 = 2;
-			log.u_bbr.applimited = rack->r_ctl.current_round;
-			log.u_bbr.bw_inuse = del_bw;
-			log.u_bbr.delivered = rack->r_ctl.policer_bucket_size;
-			log.u_bbr.cur_del_rate = rxts;
-			log.u_bbr.delRate = snds;
-			log.u_bbr.rttProp = rack->r_ctl.gp_bw;
-			log.u_bbr.bbr_state = rack->rc_policer_detected;
-			log.u_bbr.bbr_substate = 0;
-			log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
-			log.u_bbr.use_lt_bw = rack->policer_detect_on;
-			log.u_bbr.epoch = rack->r_ctl.policer_max_seg;
-			log.u_bbr.lt_epoch = (uint32_t)tim;
-			log.u_bbr.pkts_out = rack->r_ctl.bytes_acked_in_recovery;
-			tcp_log_event(tp, NULL, NULL, NULL, TCP_POLICER_DET, 0,
-				      0, &log, false, NULL, NULL, 0, &tv);
-			/*
-			 * Put out an added log, 19, for the sole purpose
-			 * of getting the txt/rxt so that we can benchmark
-			 * in read-bbrlog the ongoing rxt rate after our
-			 * policer invocation in the HYSTART announcments.
-			 */
-			memset(&log.u_bbr, 0, sizeof(log.u_bbr));
-			log.u_bbr.timeStamp = tcp_tv_to_usectick(&tv);
-			log.u_bbr.flex1 = alt_med;
-			log.u_bbr.flex8 = 19;
-			log.u_bbr.cur_del_rate = tp->t_sndbytes;
-			log.u_bbr.delRate = tp->t_snd_rxt_bytes;
-			tcp_log_event(tp, NULL, NULL, NULL, TCP_POLICER_DET, 0,
-				      0, &log, false, NULL, NULL, 0, &tv);
-		}
-		/* Turn off any fast output, thats ended */
-		rack->r_fast_output = 0;
-		/* Mark the time for credits */
-		rack->r_ctl.last_sendtime = tcp_get_u64_usecs(NULL);
-		if (rack->r_rr_config < 2) {
-			/*
-			 * We need to be stricter on the RR config so
-			 * the pacing has priority.
-			 */
-			rack->r_rr_config = 2;
-		}
-		policer_detection_log(rack,
-				      rack->r_ctl.idle_snd_una,
-				      rack->r_ctl.ack_for_idle,
-				      0,
-				      (uint32_t)tim,
-				      14);
-		rack->rc_policer_detected = 1;
-	} else if ((rack->rc_policer_detected == 1) &&
-		   (post_recovery == 1)) {
-		/*
-		 * If we are exiting recovery and have already detected
-		 * we need to possibly update the values.
-		 *
-		 * First: Update the idle -> recovery sent value.
-		 */
-		uint32_t srtt;
-
-		if (rack->r_ctl.last_amount_before_rec > rack->r_ctl.policer_bucket_size) {
-			rack->r_ctl.policer_bucket_size = rack->r_ctl.last_amount_before_rec;
-		}
-		srtt = (uint64_t)rack_grab_rtt(tp, rack);
-		if ((tp->t_srtt > 0) && (srtt > tp->t_srtt))
-			srtt = tp->t_srtt;
-		if ((srtt != 0) &&
-		    (tim < (uint64_t)srtt)) {
-			/*
-			 * Not long enough.
-			 */
-			if (rack_verbose_logging)
-				policer_detection_log(rack,
-						      (uint32_t)tim,
-						      0,
-						      0,
-						      0,
-						      15);
-			return;
-		}
-		/*
-		 * Finally update the b/w if its grown.
-		 */
-		if (del_bw > rack->r_ctl.policer_bw) {
-			rack->r_ctl.policer_bw = del_bw;
-			rack->r_ctl.policer_max_seg = tcp_get_pacing_burst_size_w_divisor(rack->rc_tp,
-											  rack->r_ctl.policer_bw,
-											  min(ctf_fixed_maxseg(rack->rc_tp),
-											      rack->r_ctl.rc_pace_min_segs),
-											  0, NULL,
-											  NULL, rack->r_ctl.pace_len_divisor);
-			if (rack->r_ctl.policer_bucket_size < rack->r_ctl.policer_max_seg) {
-				/* We must be able to send our max-seg or else chaos ensues */
-				rack->r_ctl.policer_bucket_size = rack->r_ctl.policer_max_seg * 2;
-			}
-		}
-		policer_detection_log(rack,
-				      rack->r_ctl.idle_snd_una,
-				      rack->r_ctl.ack_for_idle,
-				      0,
-				      (uint32_t)tim,
-				      3);
-	}
-}
-
 static void
 rack_exit_recovery(struct tcpcb *tp, struct tcp_rack *rack, int how)
 {
-	/* now check with the policer if on */
-	if (rack->policer_detect_on == 1) {
-		policer_detection(tp, rack, 1);
-	}
 	/*
-	 * Now exit recovery, note we must do the idle set after the policer_detection
-	 * to get the amount acked prior to recovery correct.
+	 * Now exit recovery.
 	 */
-	rack->r_ctl.idle_snd_una = tp->snd_una;
 	EXIT_RECOVERY(tp->t_flags);
 }
 
@@ -6238,7 +5646,7 @@ rack_post_recovery(struct tcpcb *tp, uint32_t th_ack)
 		union tcp_log_stackspecific log;
 		struct timeval tv;
 
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
 		log.u_bbr.flex1 = th_ack;
 		log.u_bbr.flex2 = tp->t_ccv.flags;
@@ -6314,69 +5722,11 @@ rack_cong_signal(struct tcpcb *tp, uint32_t type, uint32_t ack, int line)
 		tp->t_flags &= ~TF_WASFRECOVERY;
 		tp->t_flags &= ~TF_WASCRECOVERY;
 		if (!IN_FASTRECOVERY(tp->t_flags)) {
-			struct rack_sendmap *rsm;
-			struct timeval tv;
-			uint32_t segsiz;
-
 			/* Check if this is the end of the initial Start-up i.e. initial slow-start */
 			if (rack->rc_initial_ss_comp == 0) {
 				/* Yep it is the end of the initial slowstart */
 				rack->rc_initial_ss_comp = 1;
 			}
-			microuptime(&tv);
-			rack->r_ctl.time_entered_recovery = tcp_tv_to_lusectick(&tv);
-			if (SEQ_GEQ(ack, tp->snd_una)) {
-				/*
-				 * The ack is above snd_una. Lets see
-				 * if we can establish a postive distance from
-				 * our idle mark.
-				 */
-				rack->r_ctl.ack_for_idle = ack;
-				if (SEQ_GT(ack, rack->r_ctl.idle_snd_una)) {
-					rack->r_ctl.last_amount_before_rec = ack - rack->r_ctl.idle_snd_una;
-				} else {
-					/* No data thru yet */
-					rack->r_ctl.last_amount_before_rec = 0;
-				}
-			} else if (SEQ_GT(tp->snd_una, rack->r_ctl.idle_snd_una)) {
-				/*
-				 * The ack is out of order and behind the snd_una. It may
-				 * have contained SACK information which we processed else
-				 * we would have rejected it.
-				 */
-				rack->r_ctl.ack_for_idle = tp->snd_una;
-				rack->r_ctl.last_amount_before_rec = tp->snd_una - rack->r_ctl.idle_snd_una;
-			} else {
-				rack->r_ctl.ack_for_idle = ack;
-				rack->r_ctl.last_amount_before_rec = 0;
-			}
-			if (rack->rc_policer_detected) {
-				/*
-				 * If we are being policed and we have a loss, it
-				 * means our bucket is now empty. This can happen
-				 * where some other flow on the same host sends
-				 * that this connection is not aware of.
-				 */
-				rack->r_ctl.current_policer_bucket = 0;
-				if (rack_verbose_logging)
-					policer_detection_log(rack, rack->r_ctl.last_amount_before_rec, 0, 0, 0, 4);
-				if (rack->r_ctl.last_amount_before_rec > rack->r_ctl.policer_bucket_size) {
-					rack->r_ctl.policer_bucket_size = rack->r_ctl.last_amount_before_rec;
-				}
-			}
-			memset(rack->r_ctl.rc_cnt_of_retran, 0, sizeof(rack->r_ctl.rc_cnt_of_retran));
-			segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
-			TAILQ_FOREACH(rsm, &rack->r_ctl.rc_tmap, r_tnext) {
-				/*
-				 * Go through the outstanding and re-peg
-				 * any that should have been left in the
-				 * retransmit list (on a double recovery).
-				 */
-				if (rsm->r_act_rxt_cnt > 0) {
-					rack_peg_rxt(rack, rsm, segsiz);
-				}
-			}
-			rack->r_ctl.bytes_acked_in_recovery = 0;
 			rack->r_ctl.rc_prr_delivered = 0;
 			rack->r_ctl.rc_prr_out = 0;
 			rack->r_fast_output = 0;
@@ -6411,8 +5761,6 @@ rack_cong_signal(struct tcpcb *tp, uint32_t type, uint32_t ack, int line)
 		rack->r_fast_output = 0;
 		if (IN_RECOVERY(tp->t_flags))
 			rack_exit_recovery(tp, rack, 2);
-		rack->r_ctl.bytes_acked_in_recovery = 0;
-		rack->r_ctl.time_entered_recovery = 0;
 		orig_cwnd = tp->snd_cwnd;
 		rack_log_to_prr(rack, 16, orig_cwnd, line);
 		if (CC_ALGO(tp)->cong_signal == NULL) {
@@ -6443,7 +5791,7 @@ rack_cong_signal(struct tcpcb *tp, uint32_t type, uint32_t ack, int line)
 		tp->t_badrxtwin = 0;
 		break;
 	}
-	if ((CC_ALGO(tp)->cong_signal != NULL)  &&
+	if ((CC_ALGO(tp)->cong_signal != NULL) &&
 	    (type != CC_RTO)){
 		tp->t_ccv.curack = ack;
 		CC_ALGO(tp)->cong_signal(&tp->t_ccv, type);
@@ -6554,7 +5902,7 @@ rack_calc_thresh_rack(struct tcp_rack *rack, uint32_t srtt, uint32_t cts, int li
 	 *
 	 * If reorder-fade is configured, then we track the last time we saw
 	 * re-ordering occur. If we reach the point where enough time as
-	 * passed we no longer consider reordering has occuring.
+	 * passed we no longer consider reordering as occurring.
 	 *
 	 * Or if reorder-face is 0, then once we see reordering we consider
 	 * the connection to alway be subject to reordering and just set lro
@@ -6812,7 +6160,6 @@ rack_timer_start(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, int sup_
 	}
 	rack->rc_on_min_to = 0;
 	if ((tp->t_state < TCPS_ESTABLISHED) ||
-	    (rack->sack_attack_disable > 0) ||
 	    ((tp->t_flags & TF_SACK_PERMIT) == 0)) {
 		goto activate_rxt;
 	}
@@ -6884,16 +6231,6 @@ activate_rxt:
 			goto activate_rxt;
 		}
 	}
-	if (rack->sack_attack_disable) {
-		/*
-		 * We don't want to do
-		 * any TLP's if you are an attacker.
-		 * Though if you are doing what
-		 * is expected you may still have
-		 * SACK-PASSED marks.
-		 */
-		goto activate_rxt;
-	}
 	/* Convert from ms to usecs */
 	if ((rsm->r_flags & RACK_SACK_PASSED) ||
 	    (rsm->r_flags & RACK_RWND_COLLAPSED) ||
@@ -7008,7 +6345,7 @@ activate_tlp:
 		if (to < rack_tlp_min) {
 			to = rack_tlp_min;
 		}
-		if (to > TICKS_2_USEC(TCPTV_REXMTMAX)) {
+		if (to > TICKS_2_USEC(tcp_rexmit_max)) {
 			/*
 			 * If the TLP time works out to larger than the max
 			 * RTO lets not do TLP.. just RTO.
@@ -7124,7 +6461,6 @@ rack_exit_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
 		rack->lt_bw_up = 1;
 		rack->r_persist_lt_bw_off = 0;
 	}
-	rack->r_ctl.idle_snd_una = tp->snd_una;
 	rack->rc_in_persist = 0;
 	rack->r_ctl.rc_went_idle_time = 0;
 	tp->t_rxtshift = 0;
@@ -7143,7 +6479,7 @@ rack_log_hpts_diag(struct tcp_rack *rack, uint32_t cts,
 	if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
 		union tcp_log_stackspecific log;
 
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.flex1 = diag->p_nxt_slot;
 		log.u_bbr.flex2 = diag->p_cur_slot;
 		log.u_bbr.flex3 = diag->slot_req;
@@ -7182,7 +6518,7 @@ rack_log_wakeup(struct tcpcb *tp, struct tcp_rack *rack, struct sockbuf *sb, uin
 		union tcp_log_stackspecific log;
 		struct timeval tv;
 
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.flex1 = sb->sb_flags;
 		log.u_bbr.flex2 = len;
 		log.u_bbr.flex3 = sb->sb_state;
@@ -7304,25 +6640,6 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 		}
 	}
 	hpts_timeout = rack_timer_start(tp, rack, cts, sup_rack);
-#ifdef TCP_SAD_DETECTION
-	if (rack->sack_attack_disable &&
-	    (rack->r_ctl.ack_during_sd > 0) &&
-	    (slot < tcp_sad_pacing_interval)) {
-		/*
-		 * We have a potential attacker on
-		 * the line. We have possibly some
-		 * (or now) pacing time set. We want to
-		 * slow down the processing of sacks by some
-		 * amount (if it is an attacker). Set the default
-		 * slot for attackers in place (unless the original
-		 * interval is longer). Its stored in
-		 * micro-seconds, so lets convert to msecs.
-		 */
-		slot = tcp_sad_pacing_interval;
-		rack_log_type_bbrsnd(rack, tot_len_this_send, slot, us_cts, &tv, __LINE__);
-		rack->r_ctl.ack_during_sd = 0;
-	}
-#endif
 	if (tp->t_flags & TF_DELACK) {
 		delayed_ack = TICKS_2_USEC(tcp_delacktime);
 		rack->r_ctl.rc_hpts_flags |= PACE_TMR_DELACK;
@@ -7472,11 +6789,7 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
 				tp->t_flags2 |= TF2_DONT_SACK_QUEUE;
 			}
 		}
-		/* For sack attackers we want to ignore sack */
-		if (rack->sack_attack_disable == 1) {
-			tp->t_flags2 |= (TF2_DONT_SACK_QUEUE |
-			    TF2_MBUF_QUEUE_READY);
-		} else if (rack->rc_ack_can_sendout_data) {
+		if (rack->rc_ack_can_sendout_data) {
 			/*
 			 * Ahh but wait, this is that special case
 			 * where the pacing timer can be disturbed
@@ -7608,16 +6921,6 @@ rack_timeout_rack(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
 				      0, 0, 0);
 		return (1);
 	}
-	if ((rack->policer_detect_on == 1) &&
-	    (rack->rc_policer_detected == 0)) {
-		/*
-		 * We do this early if we have not
-		 * deteceted to attempt to detect
-		 * quicker. Normally we want to do this
-		 * as recovery exits (and we will again).
-		 */
-		policer_detection(tp, rack, 0);
-	}
 	return (0);
 }
 
@@ -7740,6 +7043,9 @@ rack_clone_rsm(struct tcp_rack *rack, struct rack_sendmap *nrsm,
 	/* Push bit must go to the right edge as well */
 	if (rsm->r_flags & RACK_HAD_PUSH)
 		rsm->r_flags &= ~RACK_HAD_PUSH;
+	/* Update the count if app limited */
+	if (nrsm->r_flags & RACK_APP_LIMITED)
+		rack->r_ctl.rc_app_limited_cnt++;
 	/* Clone over the state of the hw_tls flag */
 	nrsm->r_hw_tls = rsm->r_hw_tls;
 	/*
@@ -7791,7 +7097,7 @@ rack_merge_rsm(struct tcp_rack *rack,
 		l_rsm->r_flags |= RACK_TLP;
 	if (r_rsm->r_flags & RACK_RWND_COLLAPSED)
 		l_rsm->r_flags |= RACK_RWND_COLLAPSED;
-	if ((r_rsm->r_flags & RACK_APP_LIMITED)  &&
+	if ((r_rsm->r_flags & RACK_APP_LIMITED) &&
 	    ((l_rsm->r_flags & RACK_APP_LIMITED) == 0)) {
 		/*
 		 * If both are app-limited then let the
@@ -8281,11 +7587,8 @@ rack_remxt_tmr(struct tcpcb *tp)
 	rack->r_ctl.rc_resend = tqhash_min(rack->r_ctl.tqh);
 	if (rack->r_ctl.rc_resend != NULL)
 		rack->r_ctl.rc_resend->r_flags |= RACK_TO_REXT;
-	if ((((tp->t_flags & TF_SACK_PERMIT) == 0)
-#ifdef TCP_SAD_DETECTION
-	     || (rack->sack_attack_disable != 0)
-#endif
-		    ) && ((tp->t_flags & TF_SENTFIN) == 0)) {
+	if (((tp->t_flags & TF_SACK_PERMIT) == 0) &&
+	    ((tp->t_flags & TF_SENTFIN) == 0)) {
 		/*
 		 * For non-sack customers new data
 		 * needs to go out as retransmits until
@@ -8583,6 +7886,16 @@ drop_it:
 				tp->t_flags2 |= TF2_PLPMTU_PMTUD;
 				tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
 				tp->t_maxseg = tp->t_pmtud_saved_maxseg;
+				if (tp->t_maxseg < V_tcp_mssdflt) {
+					/*
+					 * The MSS is so small we should not 
+					 * process incoming SACK's since we are 
+					 * subject to attack in such a case.
+					 */
+					tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+				} else {
+					tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+				}
 				KMOD_TCPSTAT_INC(tcps_pmtud_blackhole_failed);
 			}
 		}
@@ -8720,6 +8033,7 @@ skip_time_check:
 		ret = rack_timeout_rack(tp, rack, cts);
 	} else if (timers & PACE_TMR_TLP) {
 		rack->r_ctl.rc_tlp_rxt_last_time = cts;
+		rack->r_fast_output = 0;
 		ret = rack_timeout_tlp(tp, rack, cts, doing_tlp);
 	} else if (timers & PACE_TMR_RXT) {
 		rack->r_ctl.rc_tlp_rxt_last_time = cts;
@@ -8799,86 +8113,6 @@ rack_stop_all_timers(struct tcpcb *tp, struct tcp_rack *rack)
 	}
 }
 
-/*
- * We maintain an array fo 16 (RETRAN_CNT_SIZE) entries. This
- * array is zeroed at the start of recovery. Each time a segment
- * is retransmitted, we translate that into a number of packets
- * (based on segsiz) and based on how many times its been retransmitted
- * increment by the number of packets the counter that represents
- * retansmitted N times. Index 0 is retransmitted 1 time, index 1
- * is retransmitted 2 times etc.
- *
- * So for example when we send a 4344 byte transmission with a 1448
- * byte segsize, and its the third time we have retransmitted this
- * segment, we would add to the rc_cnt_of_retran[2] the value of
- * 3. That represents 3 MSS were retransmitted 3 times (index is
- * the number of times retranmitted minus 1).
- */
-static void
-rack_peg_rxt(struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t segsiz)
-{
-	int idx;
-	uint32_t peg;
-
-	peg = ((rsm->r_end - rsm->r_start) + segsiz) - 1;
-	peg /= segsiz;
-	idx = rsm->r_act_rxt_cnt - 1;
-	if (idx >= RETRAN_CNT_SIZE)
-		idx = RETRAN_CNT_SIZE - 1;
-	/* Max of a uint16_t retransmits in a bucket */
-	if ((rack->r_ctl.rc_cnt_of_retran[idx] + peg) < 0xffff)
-		rack->r_ctl.rc_cnt_of_retran[idx] += peg;
-	else
-		rack->r_ctl.rc_cnt_of_retran[idx] = 0xffff;
-}
-
-/*
- * We maintain an array fo 16 (RETRAN_CNT_SIZE) entries. This
- * array is zeroed at the start of recovery. Each time a segment
- * is retransmitted, we translate that into a number of packets
- * (based on segsiz) and based on how many times its been retransmitted
- * increment by the number of packets the counter that represents
- * retansmitted N times. Index 0 is retransmitted 1 time, index 1
- * is retransmitted 2 times etc.
- *
- * The rack_unpeg_rxt is used when we go to retransmit a segment
- * again. Basically if the segment had previously been retransmitted
- * say 3 times (as our previous example illustrated in the comment
- * above rack_peg_rxt() prior to calling that and incrementing
- * r_ack_rxt_cnt we would have called rack_unpeg_rxt() that would
- * subtract back the previous add from its last rxt (in this
- * example r_act_cnt would have been 2 for 2 retransmissions. So
- * we would have subtracted 3 from rc_cnt_of_reetran[1] to remove
- * those 3 segments. You will see this in the rack_update_rsm()
- * below where we do:
- *	if (rsm->r_act_rxt_cnt > 0) {
- *		rack_unpeg_rxt(rack, rsm, segsiz);
- *	}
- *	rsm->r_act_rxt_cnt++;
- *	rack_peg_rxt(rack, rsm, segsiz);
- *
- * This effectively moves the count from rc_cnt_of_retran[1] to
- * rc_cnt_of_retran[2].
- */
-static void
-rack_unpeg_rxt(struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t segsiz)
-{
-	int idx;
-	uint32_t peg;
-
-	idx = rsm->r_act_rxt_cnt - 1;
-	if (idx >= RETRAN_CNT_SIZE)
-		idx = RETRAN_CNT_SIZE - 1;
-	peg = ((rsm->r_end - rsm->r_start) + segsiz) - 1;
-	peg /= segsiz;
-	if (peg < rack->r_ctl.rc_cnt_of_retran[idx])
-		rack->r_ctl.rc_cnt_of_retran[idx] -= peg;
-	else {
-		/* TSNH */
-		rack->r_ctl.rc_cnt_of_retran[idx] = 0;
-	}
-}
-
 static void
 rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack,
     struct rack_sendmap *rsm, uint64_t ts, uint32_t add_flag, int segsiz)
@@ -8890,13 +8124,8 @@ rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack,
 		rsm->r_rtr_cnt = RACK_NUM_OF_RETRANS;
 		rsm->r_flags |= RACK_OVERMAX;
 	}
-	if (rsm->r_act_rxt_cnt > 0) {
-		/* Drop the count back for this, its retransmitting again */
-		rack_unpeg_rxt(rack, rsm, segsiz);
-	}
 	rsm->r_act_rxt_cnt++;
 	/* Peg the count/index */
-	rack_peg_rxt(rack, rsm, segsiz);
 	rack_log_retran_reason(rack, rsm, __LINE__, 0, 2);
 	rsm->r_dupack = 0;
 	if ((rsm->r_rtr_cnt > 1) && ((rsm->r_flags & RACK_TLP) == 0)) {
@@ -8909,7 +8138,7 @@ rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack,
 		 * remove the lost desgination and reduce the
 		 * bytes considered lost.
 		 */
-		rsm->r_flags  &= ~RACK_WAS_LOST;
+		rsm->r_flags &= ~RACK_WAS_LOST;
 		KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)),
 			("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack));
 		if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start))
@@ -9604,7 +8833,7 @@ rack_apply_updated_usrtt(struct tcp_rack *rack, uint32_t us_rtt, uint32_t us_cts
 
 				val = rack_probertt_lower_within * rack_time_between_probertt;
 				val /= 100;
-				if ((rack->in_probe_rtt == 0)  &&
+				if ((rack->in_probe_rtt == 0) &&
 				    (rack->rc_skip_timely == 0) &&
 				    ((us_cts - rack->r_ctl.rc_lower_rtt_us_cts) >= (rack_time_between_probertt - val)))	{
 					rack_enter_probertt(rack, us_cts);
@@ -10092,40 +9321,19 @@ is_rsm_inside_declared_tlp_block(struct tcp_rack *rack, struct rack_sendmap *rsm
 static uint32_t
 rack_proc_sack_blk(struct tcpcb *tp, struct tcp_rack *rack, struct sackblk *sack,
 		   struct tcpopt *to, struct rack_sendmap **prsm, uint32_t cts,
-		   int *no_extra,
-		   int *moved_two, uint32_t segsiz)
+		   uint32_t segsiz)
 {
 	uint32_t start, end, changed = 0;
 	struct rack_sendmap stack_map;
 	struct rack_sendmap *rsm, *nrsm, *prev, *next;
 	int insret __diagused;
 	int32_t used_ref = 1;
-	int moved = 0;
-#ifdef TCP_SAD_DETECTION
-	int allow_segsiz;
-	int first_time_through = 1;
-#endif
-	int noextra = 0;
 	int can_use_hookery = 0;
 
 	start = sack->start;
 	end = sack->end;
 	rsm = *prsm;
 
-#ifdef TCP_SAD_DETECTION
-	/*
-	 * There are a strange number of proxys and meddle boxes in the world
-	 * that seem to cut up segments on different boundaries. This gets us
-	 * smaller sacks that are still ok in terms of it being an attacker.
-	 * We use the base segsiz to calculate an allowable smallness but
-	 * also enforce a min on the segsiz in case it is an attacker playing
-	 * games with MSS. So basically if the sack arrives and it is
-	 * larger than a worse case 960 bytes, we don't classify the guy
-	 * as supicious.
-	 */
-	allow_segsiz = max(segsiz, 1200) * sad_seg_size_per;
-	allow_segsiz /= 1000;
-#endif
 do_rest_ofb:
 	if ((rsm == NULL) ||
 	    (SEQ_LT(end, rsm->r_start)) ||
@@ -10137,105 +9345,11 @@ do_rest_ofb:
 		 */
 		used_ref = 0;
 		rsm = tqhash_find(rack->r_ctl.tqh, start);
-		moved++;
 	}
 	if (rsm == NULL) {
 		/* TSNH */
 		goto out;
 	}
-#ifdef TCP_SAD_DETECTION
-	/* Now we must check for suspicous activity */
-	if ((first_time_through == 1) &&
-	    ((end - start) < min((rsm->r_end - rsm->r_start), allow_segsiz)) &&
-	    ((rsm->r_flags & RACK_PMTU_CHG) == 0) &&
-	    ((rsm->r_flags & RACK_TLP) == 0)) {
-		/*
-		 * Its less than a full MSS or the segment being acked
-		 * this should only happen if the rsm in question had the
-		 * r_just_ret flag set <and> the end matches the end of
-		 * the rsm block.
-		 *
-		 * Note we do not look at segments that have had TLP's on
-		 * them since we can get un-reported rwnd collapses that
-		 * basically we TLP on and then we get back a sack block
-		 * that goes from the start to only a small way.
-		 *
-		 */
-		int loss, ok;
-
-		ok = 0;
-		if (SEQ_GEQ(end, rsm->r_end)) {
-			if (rsm->r_just_ret == 1) {
-				/* This was at the end of a send which is ok */
-				ok = 1;
-			} else {
-				/* A bit harder was it the end of our segment */
-				int segs, len;
-
-				len = (rsm->r_end - rsm->r_start);
-				segs = len / segsiz;
-				segs *= segsiz;
-				if ((segs + (rsm->r_end - start)) == len) {
-					/*
-					 * So this last bit was the
-					 * end of our send if we cut it
-					 * up into segsiz pieces so its ok.
-					 */
-					ok = 1;
-				}
-			}
-		}
-		if (ok == 0) {
-			/*
-			 * This guy is doing something suspicious
-			 * lets start detection.
-			 */
-			if (rack->rc_suspicious == 0) {
-				tcp_trace_point(rack->rc_tp, TCP_TP_SAD_SUSPECT);
-				counter_u64_add(rack_sack_attacks_suspect, 1);
-				rack->rc_suspicious = 1;
-				rack_log_sad(rack, 4);
-				if (tcp_bblogging_on(rack->rc_tp)) {
-					union tcp_log_stackspecific log;
-					struct timeval tv;
-
-					memset(&log.u_bbr, 0, sizeof(log.u_bbr));
-					log.u_bbr.flex1 = end;
-					log.u_bbr.flex2 = start;
-					log.u_bbr.flex3 = rsm->r_end;
-					log.u_bbr.flex4 = rsm->r_start;
-					log.u_bbr.flex5 = segsiz;
-					log.u_bbr.flex6 = rsm->r_fas;
-					log.u_bbr.flex7 = rsm->r_bas;
-					log.u_bbr.flex8 = 5;
-					log.u_bbr.pkts_out = rsm->r_flags;
-					log.u_bbr.bbr_state = rack->rc_suspicious;
-					log.u_bbr.bbr_substate = rsm->r_just_ret;
-					log.u_bbr.timeStamp = tcp_get_usecs(&tv);
-					log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
-					TCP_LOG_EVENTP(rack->rc_tp, NULL,
-						       &rack->rc_inp->inp_socket->so_rcv,
-						       &rack->rc_inp->inp_socket->so_snd,
-						       TCP_SAD_DETECTION, 0,
-						       0, &log, false, &tv);
-				}
-			}
-			/* You loose some ack count every time you sack
-			 * a small bit that is not butting to the end of
-			 * what we have sent. This is because we never
-			 * send small bits unless its the end of the sb.
-			 * Anyone sending a sack that is not at the end
-			 * is thus very very suspicious.
-			 */
-			loss = (segsiz/2) / (end - start);
-			if (loss < rack->r_ctl.ack_count)
-				rack->r_ctl.ack_count -= loss;
-			else
-				rack->r_ctl.ack_count = 0;
-		}
-	}
-	first_time_through = 0;
-#endif
 	/* Ok we have an ACK for some piece of this rsm */
 	if (rsm->r_start != start) {
 		if ((rsm->r_flags & RACK_ACKED) == 0) {
@@ -10332,7 +9446,6 @@ do_rest_ofb:
 				 * use to update all the gizmos.
 				 */
 				/* Copy up our fudge block */
-				noextra++;
 				nrsm = &stack_map;
 				memcpy(nrsm, rsm, sizeof(struct rack_sendmap));
 				/* Now adjust our tree blocks */
@@ -10383,9 +9496,6 @@ do_rest_ofb:
 				if (rack->app_limited_needs_set)
 					rack_need_set_test(tp, rack, nrsm, tp->snd_una, __LINE__, RACK_USE_END);
 				changed += (nrsm->r_end - nrsm->r_start);
-				/* You get a count for acking a whole segment or more */
-				if ((nrsm->r_end - nrsm->r_start) >= segsiz)
-					rack->r_ctl.ack_count += ((nrsm->r_end - nrsm->r_start) / segsiz);
 				rack->r_ctl.rc_sacked += (nrsm->r_end - nrsm->r_start);
 				if (rsm->r_flags & RACK_WAS_LOST) {
 					int my_chg;
@@ -10463,7 +9573,6 @@ do_rest_ofb:
 				}
 				counter_u64_add(rack_sack_splits, 1);
 				rack_clone_rsm(rack, nrsm, rsm, start);
-				moved++;
 				rsm->r_just_ret = 0;
 #ifndef INVARIANTS
 				(void)tqhash_insert(rack->r_ctl.tqh, nrsm);
@@ -10485,14 +9594,12 @@ do_rest_ofb:
 		} else {
 			/* Already sacked this piece */
 			counter_u64_add(rack_sack_skipped_acked, 1);
-			moved++;
 			if (end == rsm->r_end) {
 				/* Done with block */
 				rsm = tqhash_next(rack->r_ctl.tqh, rsm);
 				goto out;
 			} else if (SEQ_LT(end, rsm->r_end)) {
 				/* A partial sack to a already sacked block */
-				moved++;
 				rsm = tqhash_next(rack->r_ctl.tqh, rsm);
 				goto out;
 			} else {
@@ -10559,8 +9666,6 @@ do_rest_ofb:
 			rack_update_rtt(tp, rack, rsm, to, cts, SACKED, 0);
 			changed += (rsm->r_end - rsm->r_start);
 			/* You get a count for acking a whole segment or more */
-			if ((rsm->r_end - rsm->r_start) >= segsiz)
-				rack->r_ctl.ack_count += ((rsm->r_end - rsm->r_start) / segsiz);
 			if (rsm->r_flags & RACK_WAS_LOST) {
 				int my_chg;
 
@@ -10595,7 +9700,6 @@ do_rest_ofb:
 			rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_SACK_M3, end, __LINE__);
 		} else {
 			counter_u64_add(rack_sack_skipped_acked, 1);
-			moved++;
 		}
 		if (end == rsm->r_end) {
 			/* This block only - done, setup for next */
@@ -10693,7 +9797,6 @@ do_rest_ofb:
 			 * Note if either prev/rsm is a TLP we don't
 			 * do this.
 			 */
-			noextra++;
 			nrsm = &stack_map;
 			memcpy(nrsm, rsm, sizeof(struct rack_sendmap));
 			tqhash_update_end(rack->r_ctl.tqh, prev, end);
@@ -10752,10 +9855,6 @@ do_rest_ofb:
 			if (rack->app_limited_needs_set)
 				rack_need_set_test(tp, rack, nrsm, tp->snd_una, __LINE__, RACK_USE_END);
 			changed += (nrsm->r_end - nrsm->r_start);
-			/* You get a count for acking a whole segment or more */
-			if ((nrsm->r_end - nrsm->r_start) >= segsiz)
-				rack->r_ctl.ack_count += ((nrsm->r_end - nrsm->r_start) / segsiz);
-
 			rack->r_ctl.rc_sacked += (nrsm->r_end - nrsm->r_start);
 			if (rsm->r_flags & RACK_WAS_LOST) {
 				int my_chg;
@@ -10842,7 +9941,6 @@ do_rest_ofb:
 			 */
 			counter_u64_add(rack_sack_splits, 1);
 			rack_clone_rsm(rack, nrsm, rsm, end);
-			moved++;
 			rsm->r_flags &= (~RACK_HAS_FIN);
 			rsm->r_just_ret = 0;
 #ifndef INVARIANTS
@@ -10861,9 +9959,6 @@ do_rest_ofb:
 			rack_log_retran_reason(rack, nrsm, __LINE__, 0, 2);
 			rack_update_rtt(tp, rack, rsm, to, cts, SACKED, 0);
 			changed += (rsm->r_end - rsm->r_start);
-			/* You get a count for acking a whole segment or more */
-			if ((rsm->r_end - rsm->r_start) >= segsiz)
-				rack->r_ctl.ack_count += ((rsm->r_end - rsm->r_start) / segsiz);
 			if (rsm->r_flags & RACK_WAS_LOST) {
 				int my_chg;
 
@@ -10903,7 +9998,6 @@ do_rest_ofb:
 		 * The block was already acked.
 		 */
 		counter_u64_add(rack_sack_skipped_acked, 1);
-		moved++;
 	}
 out:
 	if (rsm &&
@@ -10940,7 +10034,6 @@ out:
 			if (next->r_flags & RACK_ACKED) {
 				/* yep this and next can be merged */
 				rsm = rack_merge_rsm(rack, rsm, next);
-				noextra++;
 				next = tqhash_next(rack->r_ctl.tqh, rsm);
 			} else
 				break;
@@ -10972,7 +10065,6 @@ out:
 			if (prev->r_flags & RACK_ACKED) {
 				/* yep the previous and this can be merged */
 				rsm = rack_merge_rsm(rack, prev, rsm);
-				noextra++;
 				prev = tqhash_prev(rack->r_ctl.tqh, rsm);
 			} else
 				break;
@@ -10986,12 +10078,6 @@ out:
 	/* Save off the next one for quick reference. */
 	nrsm = tqhash_find(rack->r_ctl.tqh, end);
 	*prsm = rack->r_ctl.rc_sacklast = nrsm;
-	/* Pass back the moved. */
-	*moved_two = moved;
-	*no_extra = noextra;
-	if (IN_RECOVERY(tp->t_flags)) {
-		rack->r_ctl.bytes_acked_in_recovery += changed;
-	}
 	return (changed);
 }
 
@@ -11030,66 +10116,6 @@ rack_peer_reneges(struct tcp_rack *rack, struct rack_sendmap *rsm, tcp_seq th_ac
 
 }
 
-static void
-rack_do_decay(struct tcp_rack *rack)
-{
-	struct timeval res;
-
-#define	timersub(tvp, uvp, vvp)						\
-	do {								\
-		(vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec;		\
-		(vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec;	\
-		if ((vvp)->tv_usec < 0) {				\
-			(vvp)->tv_sec--;				\
-			(vvp)->tv_usec += 1000000;			\
-		}							\
-	} while (0)
-
-	timersub(&rack->r_ctl.act_rcv_time, &rack->r_ctl.rc_last_time_decay, &res);
-#undef timersub
-
-	rack->r_ctl.input_pkt++;
-	if ((rack->rc_in_persist) ||
-	    (res.tv_sec >= 1) ||
-	    (rack->rc_tp->snd_max == rack->rc_tp->snd_una)) {
-		/*
-		 * Check for decay of non-SAD,
-		 * we want all SAD detection metrics to
-		 * decay 1/4 per second (or more) passed.
-		 * Current default is 800 so it decays
-		 * 80% every second.
-		 */
-#ifdef TCP_SAD_DETECTION
-		uint32_t pkt_delta;
-
-		pkt_delta = rack->r_ctl.input_pkt - rack->r_ctl.saved_input_pkt;
-#endif
-		/* Update our saved tracking values */
-		rack->r_ctl.saved_input_pkt = rack->r_ctl.input_pkt;
-		rack->r_ctl.rc_last_time_decay = rack->r_ctl.act_rcv_time;
-		/* Now do we escape without decay? */
-#ifdef TCP_SAD_DETECTION
-		if (rack->rc_in_persist ||
-		    (rack->rc_tp->snd_max == rack->rc_tp->snd_una) ||
-		    (pkt_delta < tcp_sad_low_pps)){
-			/*
-			 * We don't decay idle connections
-			 * or ones that have a low input pps.
-			 */
-			return;
-		}
-		/* Decay the counters */
-		rack->r_ctl.ack_count = ctf_decay_count(rack->r_ctl.ack_count,
-							tcp_sad_decay_val);
-		rack->r_ctl.sack_count = ctf_decay_count(rack->r_ctl.sack_count,
-							 tcp_sad_decay_val);
-		rack->r_ctl.sack_moved_extra = ctf_decay_count(rack->r_ctl.sack_moved_extra,
-							       tcp_sad_decay_val);
-		rack->r_ctl.sack_noextra_move = ctf_decay_count(rack->r_ctl.sack_noextra_move,
-								tcp_sad_decay_val);
-#endif
-	}
-}
 
 static void inline
 rack_rsm_sender_update(struct tcp_rack *rack, struct tcpcb *tp, struct rack_sendmap *rsm, uint8_t from)
@@ -11197,7 +10223,7 @@ rack_process_to_cumack(struct tcpcb *tp, struct tcp_rack *rack, register uint32_
 		 * If we have some sack blocks in the filter
 		 * lets prune them out by calling sfb with no blocks.
 		 */
-		sack_filter_blks(&rack->r_ctl.rack_sf, NULL, 0, th_ack);
+		sack_filter_blks(tp, &rack->r_ctl.rack_sf, NULL, 0, th_ack);
 	}
 	if (SEQ_GT(th_ack, tp->snd_una)) {
 		/* Clear any app ack remembered settings */
@@ -11344,7 +10370,7 @@ more:
 			 * and yet before retransmitting we get an ack
 			 * which can happen due to reordering.
 			 */
-			rsm->r_flags  &= ~RACK_WAS_LOST;
+			rsm->r_flags &= ~RACK_WAS_LOST;
 			KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)),
 				("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack));
 			if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start))
@@ -11366,10 +10392,6 @@ more:
 			rsm->r_in_tmap = 0;
 		}
 		newly_acked = 1;
-		if (((rsm->r_flags & RACK_ACKED) == 0) &&
-		    (IN_RECOVERY(tp->t_flags))) {
-			rack->r_ctl.bytes_acked_in_recovery += (rsm->r_end - rsm->r_start);
-		}
 		if (rsm->r_flags & RACK_ACKED) {
 			/*
 			 * It was acked on the scoreboard -- remove
@@ -11452,10 +10474,6 @@ more:
 		 */
 		rack->r_ctl.rc_sacked -= (th_ack - rsm->r_start);
 	} else {
-		if (((rsm->r_flags & RACK_ACKED) == 0) &&
-		    (IN_RECOVERY(tp->t_flags))) {
-			rack->r_ctl.bytes_acked_in_recovery += (th_ack - rsm->r_start);
-		}
 		rack_update_pcm_ack(rack, 1, rsm->r_start, th_ack);
 	}
 	/* And what about the lost flag? */
@@ -11606,192 +10624,11 @@ rack_handle_might_revert(struct tcpcb *tp, struct tcp_rack *rack)
 						tp->snd_ssthresh = rack->r_ctl.rto_ssthresh;
 				}
 			}
-			rack->r_ctl.bytes_acked_in_recovery = 0;
-			rack->r_ctl.time_entered_recovery = 0;
 		}
 		rack->r_might_revert = 0;
 	}
 }
 
-#ifdef TCP_SAD_DETECTION
-
-static void
-rack_merge_out_sacks(struct tcp_rack *rack)
-{
-	struct rack_sendmap *cur, *next, *rsm, *trsm = NULL;
-
-	cur = tqhash_min(rack->r_ctl.tqh);
-	while(cur) {
-		next = tqhash_next(rack->r_ctl.tqh, cur);
-		/*
-		 * The idea is to go through all and merge back
-		 * together the pieces sent together,
-		 */
-		if ((next != NULL) &&
-		    (cur->r_tim_lastsent[0] == next->r_tim_lastsent[0])) {
-			rack_merge_rsm(rack, cur, next);
-		} else {
-			cur = next;
-		}
-	}
-	/*
-	 * now treat it like a rxt event, everything is outstanding
-	 * and sent nothing acvked and dupacks are all zero. If this
-	 * is not an attacker it will have to dupack its way through
-	 * it all.
-	 */
-	TAILQ_INIT(&rack->r_ctl.rc_tmap);
-	TQHASH_FOREACH(rsm, rack->r_ctl.tqh)  {
-		rsm->r_dupack = 0;
-		/* We must re-add it back to the tlist */
-		if (trsm == NULL) {
-			TAILQ_INSERT_HEAD(&rack->r_ctl.rc_tmap, rsm, r_tnext);
-		} else {
-			TAILQ_INSERT_AFTER(&rack->r_ctl.rc_tmap, trsm, rsm, r_tnext);
-		}
-		rsm->r_in_tmap = 1;
-		trsm = rsm;
-		rsm->r_flags &= ~(RACK_ACKED | RACK_SACK_PASSED | RACK_WAS_SACKPASS | RACK_RWND_COLLAPSED);
-	}
-	sack_filter_clear(&rack->r_ctl.rack_sf, rack->rc_tp->snd_una);
-}
-
-static void
-rack_do_detection(struct tcpcb *tp, struct tcp_rack *rack,  uint32_t bytes_this_ack, uint32_t segsiz)
-{
-	int do_detection = 0;
-
-	if (rack->sack_attack_disable || rack->rc_suspicious) {
-		/*
-		 * If we have been disabled we must detect
-		 * to possibly reverse it. Or if the guy has
-		 * sent in suspicious sacks we want to do detection too.
-		 */
-		do_detection = 1;
-
-	} else if  ((rack->do_detection || tcp_force_detection) &&
-		    (tcp_sack_to_ack_thresh > 0) &&
-		    (tcp_sack_to_move_thresh > 0) &&
-		    (rack->r_ctl.rc_num_maps_alloced > tcp_map_minimum)) {
-		/*
-		 * We only detect here if:
-		 * 1) System wide forcing is on <or> do_detection is on
-		 *   <and>
-		 * 2) We have thresholds for move and ack (set one to 0 and we are off)
-		 *   <and>
-		 * 3) We have maps allocated larger than our min (500).
-		 */
-		do_detection = 1;
-	}
-	if (do_detection > 0) {
-		/*
-		 * We have thresholds set to find
-		 * possible attackers and disable sack.
-		 * Check them.
-		 */
-		uint64_t ackratio, moveratio, movetotal;
-
-		/* Log detecting */
-		rack_log_sad(rack, 1);
-		/* Do we establish a ack ratio */
-		if ((rack->r_ctl.sack_count > tcp_map_minimum)  ||
-		    (rack->rc_suspicious == 1) ||
-		    (rack->sack_attack_disable > 0)) {
-			ackratio = (uint64_t)(rack->r_ctl.sack_count);
-			ackratio *= (uint64_t)(1000);
-			if (rack->r_ctl.ack_count)
-				ackratio /= (uint64_t)(rack->r_ctl.ack_count);
-			else {
-				/* We can hit this due to ack totals degregation (via small sacks) */
-				ackratio = 1000;
-			}
-		} else {
-			/*
-			 * No ack ratio needed if we have not
-			 * seen more sacks then the number of map entries.
-			 * The exception to that is if we have disabled sack then
-			 * we need to find a ratio.
-			 */
-			ackratio = 0;
-		}
-
-		if ((rack->sack_attack_disable == 0) &&
-		    (ackratio > rack_highest_sack_thresh_seen))
-			rack_highest_sack_thresh_seen = (uint32_t)ackratio;
-		/* Do we establish a move ratio? */
-		if ((rack->r_ctl.sack_moved_extra > tcp_map_minimum) ||
-		    (rack->rc_suspicious == 1) ||
-		    (rack->sack_attack_disable > 0)) {
-			/*
-			 * We need to have more sack moves than maps
-			 * allocated to have a move ratio considered.
-			 */
-			movetotal = rack->r_ctl.sack_moved_extra;
-			movetotal += rack->r_ctl.sack_noextra_move;
-			moveratio = rack->r_ctl.sack_moved_extra;
-			moveratio *= (uint64_t)1000;
-			if (movetotal)
-				moveratio /= movetotal;
-			else {
-				/* No moves, thats pretty good */
-				moveratio = 0;
-			}
-		} else {
-			/*
-			 * Not enough moves have occured to consider
-			 * if we are out of whack in that ratio.
-			 * The exception to that is if we have disabled sack then
-			 * we need to find a ratio.
-			 */
-			moveratio = 0;
-		}
-		if ((rack->sack_attack_disable == 0) &&
-		    (moveratio > rack_highest_move_thresh_seen))
-			rack_highest_move_thresh_seen = (uint32_t)moveratio;
-		/* Now the tests */
-		if (rack->sack_attack_disable == 0) {
-			/* Not disabled, do we need to disable? */
-			if ((ackratio > tcp_sack_to_ack_thresh) &&
-			    (moveratio > tcp_sack_to_move_thresh)) {
-				/* Disable sack processing */
-				tcp_trace_point(rack->rc_tp, TCP_TP_SAD_TRIGGERED);
-				rack->sack_attack_disable = 1;
-				/* set it so we have the built in delay */
-				rack->r_ctl.ack_during_sd = 1;
-				if (rack_merge_out_sacks_on_attack)
-					rack_merge_out_sacks(rack);
-				counter_u64_add(rack_sack_attacks_detected, 1);
-				tcp_trace_point(rack->rc_tp, TCP_TP_SAD_TRIGGERED);
-				/* Clamp the cwnd at flight size */
-				rack->r_ctl.rc_saved_cwnd = rack->rc_tp->snd_cwnd;
-				rack->rc_tp->snd_cwnd = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
-				rack_log_sad(rack, 2);
-			}
-		} else {
-			/* We are sack-disabled check for false positives */
-			if ((ackratio <= tcp_restoral_thresh) ||
-			    ((rack_merge_out_sacks_on_attack == 0) &&
-			     (rack->rc_suspicious == 0) &&
-			     (rack->r_ctl.rc_num_maps_alloced <= (tcp_map_minimum/2)))) {
-				rack->sack_attack_disable = 0;
-				rack_log_sad(rack, 3);
-				/* Restart counting */
-				rack->r_ctl.sack_count = 0;
-				rack->r_ctl.sack_moved_extra = 0;
-				rack->r_ctl.sack_noextra_move = 1;
-				rack->rc_suspicious = 0;
-				rack->r_ctl.ack_count = max(1,
-							    (bytes_this_ack / segsiz));
-
-				counter_u64_add(rack_sack_attacks_reversed, 1);
-				/* Restore the cwnd */
-				if (rack->r_ctl.rc_saved_cwnd > rack->rc_tp->snd_cwnd)
-					rack->rc_tp->snd_cwnd = rack->r_ctl.rc_saved_cwnd;
-			}
-		}
-	}
-}
-#endif
 
 static int
 rack_note_dsack(struct tcp_rack *rack, tcp_seq start, tcp_seq end)
@@ -11948,9 +10785,9 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered
 	register uint32_t th_ack;
 	int32_t i, j, k, num_sack_blks = 0;
 	uint32_t cts, acked, ack_point;
-	int loop_start = 0, moved_two = 0, no_extra = 0;
+	int loop_start = 0;
 	uint32_t tsused;
-	uint32_t segsiz, o_cnt;
+	uint32_t segsiz;
 
 
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
@@ -11963,8 +10800,6 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered
 	rsm = tqhash_min(rack->r_ctl.tqh);
 	changed = 0;
 	th_ack = th->th_ack;
-	if (rack->sack_attack_disable == 0)
-		rack_do_decay(rack);
 	segsiz = ctf_fixed_maxseg(rack->rc_tp);
 	if (BYTES_THIS_ACK(tp, th) >=  segsiz) {
 		/*
@@ -11975,17 +10810,8 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered
 		int ac;
 
 		ac = BYTES_THIS_ACK(tp, th) / ctf_fixed_maxseg(rack->rc_tp);
-		rack->r_ctl.ack_count += ac;
 		counter_u64_add(rack_ack_total, ac);
 	}
-	if (rack->r_ctl.ack_count > 0xfff00000) {
-		/*
-		 * reduce the number to keep us under
-		 * a uint32_t.
-		 */
-		rack->r_ctl.ack_count /= 2;
-		rack->r_ctl.sack_count /= 2;
-	}
 	if (SEQ_GT(th_ack, tp->snd_una)) {
 		rack_log_progress_event(rack, tp, ticks, PROGRESS_UPDATE, __LINE__);
 		tp->t_acktime = ticks;
@@ -12051,52 +10877,16 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered
 	 * Sort the SACK blocks so we can update the rack scoreboard with
 	 * just one pass.
 	 */
-	o_cnt = num_sack_blks;
-	num_sack_blks = sack_filter_blks(&rack->r_ctl.rack_sf, sack_blocks,
+	num_sack_blks = sack_filter_blks(tp, &rack->r_ctl.rack_sf, sack_blocks,
 					 num_sack_blks, th->th_ack);
 	ctf_log_sack_filter(rack->rc_tp, num_sack_blks, sack_blocks);
 	if (sacks_seen != NULL)
 		*sacks_seen = num_sack_blks;
 	if (num_sack_blks == 0) {
 		/* Nothing to sack, but we need to update counts */
-		if ((o_cnt == 1) &&
-		    (*dsack_seen != 1))
-			rack->r_ctl.sack_count++;
-		else if (o_cnt > 1)
-			rack->r_ctl.sack_count++;
 		goto out_with_totals;
 	}
-	if (rack->sack_attack_disable) {
-		/*
-		 * An attacker disablement is in place, for
-		 * every sack block that is not at least a full MSS
-		 * count up sack_count.
-		 */
-		for (i = 0; i < num_sack_blks; i++) {
-			if ((sack_blocks[i].end - sack_blocks[i].start) < segsiz) {
-				rack->r_ctl.sack_count++;
-			}
-			if (rack->r_ctl.sack_count > 0xfff00000) {
-				/*
-				 * reduce the number to keep us under
-				 * a uint32_t.
-				 */
-				rack->r_ctl.ack_count /= 2;
-				rack->r_ctl.sack_count /= 2;
-			}
-		}
-		goto out;
-	}
 	/* Its a sack of some sort */
-	rack->r_ctl.sack_count += num_sack_blks;
-	if (rack->r_ctl.sack_count > 0xfff00000) {
-		/*
-		 * reduce the number to keep us under
-		 * a uint32_t.
-		 */
-		rack->r_ctl.ack_count /= 2;
-		rack->r_ctl.sack_count /= 2;
-	}
 	if (num_sack_blks < 2) {
 		/* Only one, we don't need to sort */
 		goto do_sack_work;
@@ -12164,7 +10954,7 @@ do_sack_work:
 		 * We probably did the FR and the next
 		 * SACK in continues as we would expect.
 		 */
-		acked = rack_proc_sack_blk(tp, rack, &sack_blocks[0], to, &rsm, cts, &no_extra, &moved_two, segsiz);
+		acked = rack_proc_sack_blk(tp, rack, &sack_blocks[0], to, &rsm, cts, segsiz);
 		if (acked) {
 			rack->r_wanted_output = 1;
 			changed += acked;
@@ -12180,40 +10970,8 @@ do_sack_work:
 			 * are acked). Count this as ACK'd data to boost
 			 * up the chances of recovering any false positives.
 			 */
-			rack->r_ctl.ack_count += (acked / ctf_fixed_maxseg(rack->rc_tp));
 			counter_u64_add(rack_ack_total, (acked / ctf_fixed_maxseg(rack->rc_tp)));
 			counter_u64_add(rack_express_sack, 1);
-			if (rack->r_ctl.ack_count > 0xfff00000) {
-				/*
-				 * reduce the number to keep us under
-				 * a uint32_t.
-				 */
-				rack->r_ctl.ack_count /= 2;
-				rack->r_ctl.sack_count /= 2;
-			}
-			if (moved_two) {
-				/*
-				 * If we did not get a SACK for at least a MSS and
-				 * had to move at all, or if we moved more than our
-				 * threshold, it counts against the "extra" move.
-				 */
-				rack->r_ctl.sack_moved_extra += moved_two;
-				rack->r_ctl.sack_noextra_move += no_extra;
-				counter_u64_add(rack_move_some, 1);
-			} else {
-				/*
-				 * else we did not have to move
-				 * any more than we would expect.
-				 */
-				rack->r_ctl.sack_noextra_move += no_extra;
-				rack->r_ctl.sack_noextra_move++;
-				counter_u64_add(rack_move_none, 1);
-			}
-			if ((rack->r_ctl.sack_moved_extra > 0xfff00000) ||
-			    (rack->r_ctl.sack_noextra_move > 0xfff00000)) {
-				rack->r_ctl.sack_moved_extra /= 2;
-				rack->r_ctl.sack_noextra_move /= 2;
-			}
 			goto out_with_totals;
 		} else {
 			/*
@@ -12226,57 +10984,11 @@ do_sack_work:
 	counter_u64_add(rack_sack_total, 1);
 	rsm = rack->r_ctl.rc_sacklast;
 	for (i = loop_start; i < num_sack_blks; i++) {
-		acked = rack_proc_sack_blk(tp, rack, &sack_blocks[i], to, &rsm, cts, &no_extra, &moved_two, segsiz);
+		acked = rack_proc_sack_blk(tp, rack, &sack_blocks[i], to, &rsm, cts,  segsiz);
 		if (acked) {
 			rack->r_wanted_output = 1;
 			changed += acked;
 		}
-		if (moved_two) {
-			/*
-			 * If we did not get a SACK for at least a MSS and
-			 * had to move at all, or if we moved more than our
-			 * threshold, it counts against the "extra" move.
-			 */
-			rack->r_ctl.sack_moved_extra += moved_two;
-			rack->r_ctl.sack_noextra_move += no_extra;
-			counter_u64_add(rack_move_some, 1);
-		} else {
-			/*
-			 * else we did not have to move
-			 * any more than we would expect.
-			 */
-			rack->r_ctl.sack_noextra_move += no_extra;
-			rack->r_ctl.sack_noextra_move++;
-			counter_u64_add(rack_move_none, 1);
-		}
-		if ((rack->r_ctl.sack_moved_extra > 0xfff00000) ||
-		    (rack->r_ctl.sack_noextra_move > 0xfff00000)) {
-			rack->r_ctl.sack_moved_extra /= 2;
-			rack->r_ctl.sack_noextra_move /= 2;
-		}
-		if (moved_two && (acked < ctf_fixed_maxseg(rack->rc_tp))) {
-			/*
-			 * If the SACK was not a full MSS then
-			 * we add to sack_count the number of
-			 * MSS's (or possibly more than
-			 * a MSS if its a TSO send) we had to skip by.
-			 */
-			rack->r_ctl.sack_count += moved_two;
-			if (rack->r_ctl.sack_count > 0xfff00000) {
-				rack->r_ctl.ack_count /= 2;
-				rack->r_ctl.sack_count /= 2;
-			}
-			counter_u64_add(rack_sack_total, moved_two);
-		}
-		/*
-		 * Now we need to setup for the next
-		 * round. First we make sure we won't
-		 * exceed the size of our uint32_t on
-		 * the various counts, and then clear out
-		 * moved_two.
-		 */
-		moved_two = 0;
-		no_extra = 0;
 	}
 out_with_totals:
 	if (num_sack_blks > 1) {
@@ -12288,13 +11000,9 @@ out_with_totals:
 		 * it could be an attacker constantly
 		 * moving us.
 		 */
-		rack->r_ctl.sack_moved_extra++;
 		counter_u64_add(rack_move_some, 1);
 	}
 out:
-#ifdef TCP_SAD_DETECTION
-	rack_do_detection(tp, rack, BYTES_THIS_ACK(tp, th), ctf_fixed_maxseg(rack->rc_tp));
-#endif
 	if (changed) {
 		/* Something changed cancel the rack timer */
 		rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
@@ -12358,7 +11066,7 @@ rack_strike_dupack(struct tcp_rack *rack, tcp_seq th_ack)
 		 * We need to skip anything already set
 		 * to be retransmitted.
 		 */
-		if ((rsm->r_dupack >= DUP_ACK_THRESHOLD)  ||
+		if ((rsm->r_dupack >= DUP_ACK_THRESHOLD) ||
 		    (rsm->r_flags & RACK_MUST_RXT)) {
 			rsm = TAILQ_NEXT(rsm, r_tnext);
 			continue;
@@ -13061,10 +11769,45 @@ rack_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
+	if (SEQ_GEQ(tp->snd_una, tp->iss + (65535 << tp->snd_scale))) {
+		/* Checking SEG.ACK against ISS is definitely redundant. */
+		tp->t_flags2 |= TF2_NO_ISS_CHECK;
+	}
+	if (!V_tcp_insecure_ack) {
+		tcp_seq seq_min;
+		bool ghost_ack_check;
+
+		if (tp->t_flags2 & TF2_NO_ISS_CHECK) {
+			/* Check for too old ACKs (RFC 5961, Section 5.2). */
+			seq_min = tp->snd_una - tp->max_sndwnd;
+			ghost_ack_check = false;
+		} else {
+			if (SEQ_GT(tp->iss + 1, tp->snd_una - tp->max_sndwnd)) {
+				/* Checking for ghost ACKs is stricter. */
+				seq_min = tp->iss + 1;
+				ghost_ack_check = true;
+			} else {
+				/*
+				 * Checking for too old ACKs (RFC 5961,
+				 * Section 5.2) is stricter.
+				 */
+				seq_min = tp->snd_una - tp->max_sndwnd;
+				ghost_ack_check = false;
+			}
+		}
+		if (SEQ_LT(th->th_ack, seq_min)) {
+			if (ghost_ack_check)
+				TCPSTAT_INC(tcps_rcvghostack);
+			else
+				TCPSTAT_INC(tcps_rcvacktooold);
+			/* Send challenge ACK. */
+			ctf_do_dropafterack(m, tp, th, thflags, tlen, ret_val);
+			rack->r_wanted_output = 1;
+			return (1);
+		}
+	}
 	if (SEQ_GT(th->th_ack, tp->snd_max)) {
-		__ctf_do_dropafterack(m, tp, th, thflags, tlen, ret_val,
-				      &rack->r_ctl.challenge_ack_ts,
-				      &rack->r_ctl.challenge_ack_cnt);
+		ctf_do_dropafterack(m, tp, th, thflags, tlen, ret_val);
 		rack->r_wanted_output = 1;
 		return (1);
 	}
@@ -13092,24 +11835,6 @@ rack_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		}
 		rack_log_ack(tp, to, th, ((in_rec == 0) && IN_FASTRECOVERY(tp->t_flags)),
 			     dup_ack_struck, &dsack_seen, &sacks_seen);
-		if ((rack->sack_attack_disable > 0) &&
-		    (th->th_ack == tp->snd_una) &&
-		    (tiwin == tp->snd_wnd) &&
-		    (orig_tlen == 0) &&
-		    (dsack_seen == 0) &&
-		    (sacks_seen > 0)) {
-			/*
-			 * If sacks have been disabled we may
-			 * want to strike a dup-ack "ignoring" the
-			 * sack as long as the sack was not a "dsack". Note
-			 * that if no sack is sent (TOF_SACK is off) then the
-			 * normal dsack code above rack_log_ack() would have
-			 * already struck. So this is just to catch the case
-			 * were we are ignoring sacks from this guy due to
-			 * it being a suspected attacker.
-			 */
-			rack_strike_dupack(rack, th->th_ack);
-		}
 
 	}
 	if (__predict_false(SEQ_LEQ(th->th_ack, tp->snd_una))) {
@@ -13248,7 +11973,7 @@ rack_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		/* Must be non-newreno (cubic) getting too ahead of itself */
 		tp->snd_cwnd = p_cwnd;
 	}
-	SOCKBUF_LOCK(&so->so_snd);
+	SOCK_SENDBUF_LOCK(so);
 	acked_amount = min(acked, (int)sbavail(&so->so_snd));
 	tp->snd_wnd -= acked_amount;
 	mfree = sbcut_locked(&so->so_snd, acked_amount);
@@ -13289,8 +12014,6 @@ rack_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	if (tp->snd_una == tp->snd_max) {
 		/* Nothing left outstanding */
 		tp->t_flags &= ~TF_PREVVALID;
-		rack->r_ctl.idle_snd_una = tp->snd_una;
-		rack->r_ctl.rc_went_idle_time = tcp_get_usecs(NULL);
 		if (rack->r_ctl.rc_went_idle_time == 0)
 			rack->r_ctl.rc_went_idle_time = 1;
 		rack->r_ctl.retran_during_recovery = 0;
@@ -13348,7 +12071,7 @@ rack_log_collapse(struct tcp_rack *rack, uint32_t cnt, uint32_t split, uint32_t
 		if (rsm == NULL)
 			log.u_bbr.rttProp = 0;
 		else
-			log.u_bbr.rttProp = (uint64_t)rsm;
+			log.u_bbr.rttProp = (uintptr_t)rsm;
 		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
 		log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
 		TCP_LOG_EVENTP(rack->rc_tp, NULL,
@@ -13636,7 +12359,7 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
 			thflags = tcp_get_flags(th) & TH_FIN;
 			KMOD_TCPSTAT_ADD(tcps_rcvpack, nsegs);
 			KMOD_TCPSTAT_ADD(tcps_rcvbyte, tlen);
-			SOCKBUF_LOCK(&so->so_rcv);
+			SOCK_RECVBUF_LOCK(so);
 			if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 				m_freem(m);
 			} else {
@@ -13878,7 +12601,7 @@ rack_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	newsize = tcp_autorcvbuf(m, th, so, tp, tlen);
 
 	/* Add data to socket buffer. */
-	SOCKBUF_LOCK(&so->so_rcv);
+	SOCK_RECVBUF_LOCK(so);
 	if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 		m_freem(m);
 	} else {
@@ -14037,7 +12760,7 @@ rack_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		struct mbuf *mfree;
 
 		rack_ack_received(tp, rack, th->th_ack, nsegs, CC_ACK, 0);
-		SOCKBUF_LOCK(&so->so_snd);
+		SOCK_SENDBUF_LOCK(so);
 		mfree = sbcut_locked(&so->so_snd, acked);
 		tp->snd_una = th->th_ack;
 		/* Note we want to hold the sb lock through the sendmap adjust */
@@ -14103,7 +12826,6 @@ rack_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		rack->r_ctl.retran_during_recovery = 0;
 		rack->rc_suspicious = 0;
 		rack->r_ctl.dsack_byte_cnt = 0;
-		rack->r_ctl.idle_snd_una = tp->snd_una;
 		rack->r_ctl.rc_went_idle_time = tcp_get_usecs(NULL);
 		if (rack->r_ctl.rc_went_idle_time == 0)
 			rack->r_ctl.rc_went_idle_time = 1;
@@ -14154,7 +12876,7 @@ rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	    (SEQ_LEQ(th->th_ack, tp->iss) ||
 	    SEQ_GT(th->th_ack, tp->snd_max))) {
 		tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT);
-		ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+		ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen);
 		return (1);
 	}
 	if ((thflags & (TH_ACK | TH_RST)) == (TH_ACK | TH_RST)) {
@@ -14363,14 +13085,12 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	ctf_calc_rwin(so, tp);
 	if ((thflags & TH_RST) ||
 	    (tp->t_fin_is_rst && (thflags & TH_FIN)))
-		return (__ctf_process_rst(m, th, so, tp,
-					  &rack->r_ctl.challenge_ack_ts,
-					  &rack->r_ctl.challenge_ack_cnt));
+		return (ctf_process_rst(m, th, so, tp));
 	if ((thflags & TH_ACK) &&
 	    (SEQ_LEQ(th->th_ack, tp->snd_una) ||
 	    SEQ_GT(th->th_ack, tp->snd_max))) {
 		tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT);
-		ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+		ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen);
 		return (1);
 	}
 	if (tp->t_flags & TF_FASTOPEN) {
@@ -14383,7 +13103,7 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		 */
 		if ((thflags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) {
 			tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT);
-			ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+			ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen);
 			return (1);
 		} else if (thflags & TH_SYN) {
 			/* non-initial SYN is ignored */
@@ -14417,12 +13137,10 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	 */
 	if (SEQ_LT(th->th_seq, tp->irs)) {
 		tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT);
-		ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+		ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen);
 		return (1);
 	}
-	if (_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
-			      &rack->r_ctl.challenge_ack_ts,
-			      &rack->r_ctl.challenge_ack_cnt)) {
+	if (ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) {
 		return (ret_val);
 	}
 	/*
@@ -14611,9 +13329,7 @@ rack_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so,
 
 	if ((thflags & TH_RST) ||
 	    (tp->t_fin_is_rst && (thflags & TH_FIN)))
-		return (__ctf_process_rst(m, th, so, tp,
-					  &rack->r_ctl.challenge_ack_ts,
-					  &rack->r_ctl.challenge_ack_cnt));
+		return (ctf_process_rst(m, th, so, tp));
 
 	/*
 	 * RFC5961 Section 4.2 Send challenge ACK for any SYN in
@@ -14632,9 +13348,7 @@ rack_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		if (ctf_ts_check(m, th, tp, tlen, thflags, &ret_val))
 			return (ret_val);
 	}
-	if (_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
-			      &rack->r_ctl.challenge_ack_ts,
-			      &rack->r_ctl.challenge_ack_cnt)) {
+	if (ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) {
 		return (ret_val);
 	}
 	/*
@@ -14686,7 +13400,7 @@ rack_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	if (sbavail(&so->so_snd)) {
 		if (ctf_progress_timeout_check(tp, true)) {
 			rack_log_progress_event(rack, tp, tick, PROGRESS_DROP, __LINE__);
-			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen);
 			return (1);
 		}
 	}
@@ -14707,15 +13421,11 @@ rack_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so,
 {
 	int32_t ret_val = 0;
 	int32_t orig_tlen = tlen;
-	struct tcp_rack *rack;
 
-	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	ctf_calc_rwin(so, tp);
 	if ((thflags & TH_RST) ||
 	    (tp->t_fin_is_rst && (thflags & TH_FIN)))
-		return (__ctf_process_rst(m, th, so, tp,
-					  &rack->r_ctl.challenge_ack_ts,
-					  &rack->r_ctl.challenge_ack_cnt));
+		return (ctf_process_rst(m, th, so, tp));
 	/*
 	 * RFC5961 Section 4.2 Send challenge ACK for any SYN in
 	 * synchronized state.
@@ -14733,9 +13443,7 @@ rack_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		if (ctf_ts_check(m, th, tp, tlen, thflags, &ret_val))
 			return (ret_val);
 	}
-	if (_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
-			      &rack->r_ctl.challenge_ack_ts,
-			      &rack->r_ctl.challenge_ack_cnt)) {
+	if (ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) {
 		return (ret_val);
 	}
 	/*
@@ -14788,7 +13496,7 @@ rack_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		if (ctf_progress_timeout_check(tp, true)) {
 			rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr,
 						tp, tick, PROGRESS_DROP, __LINE__);
-			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen);
 			return (1);
 		}
 	}
@@ -14837,16 +13545,12 @@ rack_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	int32_t ret_val = 0;
 	int32_t orig_tlen = tlen;
 	int32_t ourfinisacked = 0;
-	struct tcp_rack *rack;
 
-	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	ctf_calc_rwin(so, tp);
 
 	if ((thflags & TH_RST) ||
 	    (tp->t_fin_is_rst && (thflags & TH_FIN)))
-		return (__ctf_process_rst(m, th, so, tp,
-					  &rack->r_ctl.challenge_ack_ts,
-					  &rack->r_ctl.challenge_ack_cnt));
+		return (ctf_process_rst(m, th, so, tp));
 	/*
 	 * RFC5961 Section 4.2 Send challenge ACK for any SYN in
 	 * synchronized state.
@@ -14864,9 +13568,7 @@ rack_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		if (ctf_ts_check(m, th, tp, tlen, thflags, &ret_val))
 			return (ret_val);
 	}
-	if (_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
-			      &rack->r_ctl.challenge_ack_ts,
-			      &rack->r_ctl.challenge_ack_cnt)) {
+	if (ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) {
 		return (ret_val);
 	}
 	/*
@@ -14944,7 +13646,7 @@ rack_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		if (ctf_progress_timeout_check(tp, true)) {
 			rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr,
 						tp, tick, PROGRESS_DROP, __LINE__);
-			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen);
 			return (1);
 		}
 	}
@@ -14965,16 +13667,12 @@ rack_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	int32_t ret_val = 0;
 	int32_t orig_tlen = tlen;
 	int32_t ourfinisacked = 0;
-	struct tcp_rack *rack;
 
-	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	ctf_calc_rwin(so, tp);
 
 	if ((thflags & TH_RST) ||
 	    (tp->t_fin_is_rst && (thflags & TH_FIN)))
-		return (__ctf_process_rst(m, th, so, tp,
-					  &rack->r_ctl.challenge_ack_ts,
-					  &rack->r_ctl.challenge_ack_cnt));
+		return (ctf_process_rst(m, th, so, tp));
 	/*
 	 * RFC5961 Section 4.2 Send challenge ACK for any SYN in
 	 * synchronized state.
@@ -14992,9 +13690,7 @@ rack_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		if (ctf_ts_check(m, th, tp, tlen, thflags, &ret_val))
 			return (ret_val);
 	}
-	if (_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
-			      &rack->r_ctl.challenge_ack_ts,
-			      &rack->r_ctl.challenge_ack_cnt)) {
+	if (ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) {
 		return (ret_val);
 	}
 	/*
@@ -15051,7 +13747,7 @@ rack_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		if (ctf_progress_timeout_check(tp, true)) {
 			rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr,
 						tp, tick, PROGRESS_DROP, __LINE__);
-			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen);
 			return (1);
 		}
 	}
@@ -15072,16 +13768,12 @@ rack_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	int32_t ret_val = 0;
 	int32_t orig_tlen;
 	int32_t ourfinisacked = 0;
-	struct tcp_rack *rack;
 
-	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	ctf_calc_rwin(so, tp);
 
 	if ((thflags & TH_RST) ||
 	    (tp->t_fin_is_rst && (thflags & TH_FIN)))
-		return (__ctf_process_rst(m, th, so, tp,
-					  &rack->r_ctl.challenge_ack_ts,
-					  &rack->r_ctl.challenge_ack_cnt));
+		return (ctf_process_rst(m, th, so, tp));
 	/*
 	 * RFC5961 Section 4.2 Send challenge ACK for any SYN in
 	 * synchronized state.
@@ -15100,9 +13792,7 @@ rack_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 			return (ret_val);
 	}
 	orig_tlen = tlen;
-	if (_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
-			      &rack->r_ctl.challenge_ack_ts,
-			      &rack->r_ctl.challenge_ack_cnt)) {
+	if (ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) {
 		return (ret_val);
 	}
 	/*
@@ -15159,7 +13849,7 @@ rack_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		if (ctf_progress_timeout_check(tp, true)) {
 			rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr,
 						tp, tick, PROGRESS_DROP, __LINE__);
-			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen);
 			return (1);
 		}
 	}
@@ -15180,17 +13870,13 @@ rack_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	int32_t ret_val = 0;
 	int32_t orig_tlen = tlen;
 	int32_t ourfinisacked = 0;
-	struct tcp_rack *rack;
 
-	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	ctf_calc_rwin(so, tp);
 
 	/* Reset receive buffer auto scaling when not in bulk receive mode. */
 	if ((thflags & TH_RST) ||
 	    (tp->t_fin_is_rst && (thflags & TH_FIN)))
-		return (__ctf_process_rst(m, th, so, tp,
-					  &rack->r_ctl.challenge_ack_ts,
-					  &rack->r_ctl.challenge_ack_cnt));
+		return (ctf_process_rst(m, th, so, tp));
 	/*
 	 * RFC5961 Section 4.2 Send challenge ACK for any SYN in
 	 * synchronized state.
@@ -15208,9 +13894,7 @@ rack_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		if (ctf_ts_check(m, th, tp, tlen, thflags, &ret_val))
 			return (ret_val);
 	}
-	if (_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
-			      &rack->r_ctl.challenge_ack_ts,
-			      &rack->r_ctl.challenge_ack_cnt)) {
+	if (ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) {
 		return (ret_val);
 	}
 	/*
@@ -15269,7 +13953,7 @@ rack_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		if (ctf_progress_timeout_check(tp, true)) {
 			rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr,
 						tp, tick, PROGRESS_DROP, __LINE__);
-			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+			ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen);
 			return (1);
 		}
 	}
@@ -15545,7 +14229,7 @@ rack_log_chg_info(struct tcpcb *tp, struct tcp_rack *rack, uint8_t mod,
 		union tcp_log_stackspecific log;
 		struct timeval tv;
 
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.timeStamp = tcp_get_usecs(&tv);
 		log.u_bbr.flex8 = mod;
 		log.u_bbr.flex1 = flex1;
@@ -15860,36 +14544,6 @@ rack_init_outstanding(struct tcpcb *tp, struct tcp_rack *rack, uint32_t us_cts,
 	return (0);
 }
 
-static void
-rack_translate_policer_detect(struct tcp_rack *rack, uint32_t optval)
-{
-	/*
-	 * P = Percent of retransmits 499 = 49.9%
-	 * A = Average number 1 (.1%) -> 169 (16.9%)
-	 * M = Median number of retrans 1 - 16
-	 * MMMM MMMM AAAA AAAA PPPP PPPP PPPP PPPP
-	 *
-	 */
-	uint16_t per, upp;
-
-	per = optval & 0x0000ffff;
-	rack->r_ctl.policer_rxt_threshold = (uint32_t)(per & 0xffff);
-	upp = ((optval & 0xffff0000) >> 16);
-	rack->r_ctl.policer_avg_threshold = (0x00ff & upp);
-	rack->r_ctl.policer_med_threshold = ((upp >> 8) & 0x00ff);
-	if ((rack->r_ctl.policer_rxt_threshold > 0) &&
-	    (rack->r_ctl.policer_avg_threshold > 0) &&
-	    (rack->r_ctl.policer_med_threshold > 0)) {
-		rack->policer_detect_on = 1;
-	} else {
-		rack->policer_detect_on = 0;
-	}
-	rack->r_ctl.saved_policer_val = optval;
-	policer_detection_log(rack, optval,
-			      rack->r_ctl.policer_avg_threshold,
-			      rack->r_ctl.policer_med_threshold,
-			      rack->r_ctl.policer_rxt_threshold, 11);
-}
 
 static int32_t
 rack_init(struct tcpcb *tp, void **ptr)
@@ -15957,21 +14611,9 @@ rack_init(struct tcpcb *tp, void **ptr)
 	rack->r_ctl.rc_split_limit = V_tcp_map_split_limit;
 	/* We want abe like behavior as well */
 
-	rack->r_ctl.rc_saved_beta.newreno_flags |= CC_NEWRENO_BETA_ECN_ENABLED;
 	rack->r_ctl.rc_reorder_fade = rack_reorder_fade;
 	rack->rc_allow_data_af_clo = rack_ignore_data_after_close;
 	rack->r_ctl.rc_tlp_threshold = rack_tlp_thresh;
-	rack->r_ctl.policer_del_mss = rack_req_del_mss;
-	if ((rack_policer_rxt_thresh > 0) &&
-	    (rack_policer_avg_thresh > 0) &&
-	    (rack_policer_med_thresh > 0)) {
-		rack->r_ctl.policer_rxt_threshold = rack_policer_rxt_thresh;
-		rack->r_ctl.policer_avg_threshold = rack_policer_avg_thresh;
-		rack->r_ctl.policer_med_threshold = rack_policer_med_thresh;
-		rack->policer_detect_on = 1;
-	} else {
-		rack->policer_detect_on = 0;
-	}
 	if (rack_fill_cw_state)
 		rack->rc_pace_to_cwnd = 1;
 	if (rack_pacing_min_seg)
@@ -16008,13 +14650,13 @@ rack_init(struct tcpcb *tp, void **ptr)
 	rack->r_ctl.max_reduction = rack_max_reduce;
 	rack->rc_force_max_seg = 0;
 	TAILQ_INIT(&rack->r_ctl.opt_list);
-	rack->r_ctl.rc_saved_beta.beta = V_newreno_beta_ecn;
-	rack->r_ctl.rc_saved_beta.beta_ecn = V_newreno_beta_ecn;
+	rack->r_ctl.rc_saved_beta = V_newreno_beta_ecn;
+	rack->r_ctl.rc_saved_beta_ecn = V_newreno_beta_ecn;
 	if (rack_hibeta_setting) {
 		rack->rack_hibeta = 1;
 		if ((rack_hibeta_setting >= 50) &&
 		    (rack_hibeta_setting <= 100)) {
-			rack->r_ctl.rc_saved_beta.beta = rack_hibeta_setting;
+			rack->r_ctl.rc_saved_beta = rack_hibeta_setting;
 			rack->r_ctl.saved_hibeta = rack_hibeta_setting;
 		}
 	} else {
@@ -16028,7 +14670,6 @@ rack_init(struct tcpcb *tp, void **ptr)
 	rack->r_ctl.last_tm_mark = 0xffffffffffffffff;
 	rack->r_ctl.rc_reorder_shift = rack_reorder_thresh;
 	rack->r_ctl.rc_pkt_delay = rack_pkt_delay;
-	rack->r_ctl.pol_bw_comp = rack_policing_do_bw_comp;
 	rack->r_ctl.rc_tlp_cwnd_reduce = rack_lower_cwnd_at_tlp;
 	rack->r_ctl.rc_lowest_us_rtt = 0xffffffff;
 	rack->r_ctl.rc_highest_us_rtt = 0;
@@ -16064,7 +14705,6 @@ rack_init(struct tcpcb *tp, void **ptr)
 	if (rack_honors_hpts_min_to)
 		rack->r_use_hpts_min = 1;
 	if (tp->snd_una != 0) {
-		rack->r_ctl.idle_snd_una = tp->snd_una;
 		rack->rc_sendvars_notset = 0;
 		/*
 		 * Make sure any TCP timers are not running.
@@ -16115,7 +14755,6 @@ rack_init(struct tcpcb *tp, void **ptr)
 	rack->r_ctl.rc_lower_rtt_us_cts = us_cts;
 	rack->r_ctl.rc_time_of_last_probertt = us_cts;
 	rack->r_ctl.rc_went_idle_time = us_cts;
-	rack->r_ctl.challenge_ack_ts = tcp_ts_getticks() - (tcp_ack_war_time_window + 1);
 	rack->r_ctl.rc_time_probertt_starts = 0;
 
 	rack->r_ctl.gp_rnd_thresh = rack_rnd_cnt_req & 0xff;
@@ -16142,11 +14781,6 @@ rack_init(struct tcpcb *tp, void **ptr)
 		rack->rack_hdw_pace_ena = 1;
 	if (rack_hw_rate_caps)
 		rack->r_rack_hw_rate_caps = 1;
-#ifdef TCP_SAD_DETECTION
-	rack->do_detection = 1;
-#else
-	rack->do_detection = 0;
-#endif
 	if (rack_non_rxt_use_cr)
 		rack->rack_rec_nonrxt_use_cr = 1;
 	/* Lets setup the fsb block */
@@ -16485,16 +15119,16 @@ rack_fini(struct tcpcb *tp, int32_t tcb_is_purged)
 			union tcp_log_stackspecific log;
 			struct timeval tv;
 
-			memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+			memset(&log, 0, sizeof(log));
 			log.u_bbr.flex8 = 10;
 			log.u_bbr.flex1 = rack->r_ctl.rc_num_maps_alloced;
 			log.u_bbr.flex2 = rack->rc_free_cnt;
 			log.u_bbr.flex3 = cnt_free;
 			log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
 			rsm = tqhash_min(rack->r_ctl.tqh);
-			log.u_bbr.delRate = (uint64_t)rsm;
+			log.u_bbr.delRate = (uintptr_t)rsm;
 			rsm = TAILQ_FIRST(&rack->r_ctl.rc_free);
-			log.u_bbr.cur_del_rate = (uint64_t)rsm;
+			log.u_bbr.cur_del_rate = (uintptr_t)rsm;
 			log.u_bbr.timeStamp = tcp_get_usecs(&tv);
 			log.u_bbr.pkt_epoch = __LINE__;
 			(void)tcp_log_event(tp, NULL, NULL, NULL, TCP_LOG_OUT, ERRNO_UNK,
@@ -16605,12 +15239,6 @@ rack_timer_audit(struct tcpcb *tp, struct tcp_rack *rack, struct sockbuf *sb)
 			if (tmr_up == PACE_TMR_DELACK)
 				/* We are supposed to have delayed ack up and we do */
 				return;
-		} else if (sbavail(&tptosocket(tp)->so_snd) && (tmr_up == PACE_TMR_RXT)) {
-			/*
-			 * if we hit enobufs then we would expect the possibility
-			 * of nothing outstanding and the RXT up (and the hptsi timer).
-			 */
-			return;
 		} else if (((V_tcp_always_keepalive ||
 			     rack->rc_inp->inp_socket->so_options & SO_KEEPALIVE) &&
 			    (tp->t_state <= TCPS_CLOSING)) &&
@@ -16735,7 +15363,7 @@ rack_log_input_packet(struct tcpcb *tp, struct tcp_rack *rack, struct tcp_ackent
 			tcp_req = tcp_req_find_req_for_seq(tp, ae->ack);
 		}
 #endif
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
 		if (rack->rack_no_prr == 0)
 			log.u_bbr.flex1 = rack->r_ctl.rc_prr_sndcnt;
@@ -16938,7 +15566,7 @@ rack_log_pcm(struct tcp_rack *rack, uint8_t mod, uint32_t flex1, uint32_t flex2,
 		struct timeval tv;
 		
 		(void)tcp_get_usecs(&tv);
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.timeStamp = tcp_tv_to_usectick(&tv);
 		log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
 		log.u_bbr.flex8 = mod;
@@ -17021,7 +15649,7 @@ rack_new_round_setup(struct tcpcb *tp, struct tcp_rack *rack, uint32_t high_seq)
 			union tcp_log_stackspecific log;
 			struct timeval tv;
 
-			memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+			memset(&log, 0, sizeof(log));
 			log.u_bbr.timeStamp = tcp_get_usecs(&tv);
 			log.u_bbr.flex1 = rack->r_ctl.current_round;
 			log.u_bbr.flex2 = rack->r_ctl.last_rnd_of_gp_rise;
@@ -17164,10 +15792,7 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mb
 			/* Case C */
 			ae->ack_val_set = ACK_RWND;
 		}
-		if (rack->sack_attack_disable > 0) {
-			rack_log_type_bbrsnd(rack, 0, 0, cts, tv, __LINE__);
-			rack->r_ctl.ack_during_sd++;
-		}
+		rack_log_type_bbrsnd(rack, 0, 0, cts, tv, __LINE__);
 		rack_log_input_packet(tp, rack, ae, ae->ack_val_set, high_seq);
 		/* Validate timestamp */
 		if (ae->flags & HAS_TSTMP) {
@@ -17278,7 +15903,7 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mb
 				 * ack is beyond the largest seq we sent.
 				 */
 				if ((tp->t_flags & TF_ACKNOW) == 0) {
-					ctf_ack_war_checks(tp, &rack->r_ctl.challenge_ack_ts, &rack->r_ctl.challenge_ack_cnt);
+					ctf_ack_war_checks(tp);
 					if (tp->t_flags && TF_ACKNOW)
 						rack->r_wanted_output = 1;
 				}
@@ -17374,28 +15999,6 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mb
 		 * since cum-ack moved forward.
 		 */
 		rack->probe_not_answered = 0;
-		if (rack->sack_attack_disable == 0)
-			rack_do_decay(rack);
-		if (acked >= segsiz) {
-			/*
-			 * You only get credit for
-			 * MSS and greater (and you get extra
-			 * credit for larger cum-ack moves).
-			 */
-			int ac;
-
-			ac = acked / segsiz;
-			rack->r_ctl.ack_count += ac;
-			counter_u64_add(rack_ack_total, ac);
-		}
-		if (rack->r_ctl.ack_count > 0xfff00000) {
-			/*
-			 * reduce the number to keep us under
-			 * a uint32_t.
-			 */
-			rack->r_ctl.ack_count /= 2;
-			rack->r_ctl.sack_count /= 2;
-		}
 		if (tp->t_flags & TF_NEEDSYN) {
 			/*
 			 * T/TCP: Connection was half-synchronized, and our SYN has
@@ -17409,16 +16012,6 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mb
 		}
 		if (acked > sbavail(&so->so_snd))
 			acked_amount = sbavail(&so->so_snd);
-#ifdef TCP_SAD_DETECTION
-		/*
-		 * We only care on a cum-ack move if we are in a sack-disabled
-		 * state. We have already added in to the ack_count, and we never
-		 * would disable on a cum-ack move, so we only care to do the
-		 * detection if it may "undo" it, i.e. we were in disabled already.
-		 */
-		if (rack->sack_attack_disable)
-			rack_do_detection(tp, rack, acked_amount, segsiz);
-#endif
 		if (IN_FASTRECOVERY(tp->t_flags) &&
 		    (rack->rack_no_prr == 0))
 			rack_update_prr(tp, rack, acked_amount, high_seq);
@@ -17489,7 +16082,7 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mb
 				/* Must be non-newreno (cubic) getting too ahead of itself */
 				tp->snd_cwnd = p_cwnd;
 			}
-			SOCKBUF_LOCK(&so->so_snd);
+			SOCK_SENDBUF_LOCK(so);
 			mfree = sbcut_locked(&so->so_snd, acked_amount);
 			tp->snd_una = high_seq;
 			/* Note we want to hold the sb lock through the sendmap adjust */
@@ -17933,7 +16526,14 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 	    __func__));
 	KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
 	    __func__));
-
+	if (tp->t_flags2 & TF2_PROC_SACK_PROHIBIT) {
+		/*
+		 * We don't look at sack's from the
+		 * peer because the MSS is too small which
+		 * can subject us to an attack.
+		 */
+		to.to_flags &= ~TOF_SACK;
+	}
 	if ((tp->t_state >= TCPS_FIN_WAIT_1) &&
 	    (tp->t_flags & TF_GPUTINPROG)) {
 		/*
@@ -17972,7 +16572,7 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 			tcp_req = tcp_req_find_req_for_seq(tp, th->th_ack);
 		}
 #endif
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
 		if (rack->rack_no_prr == 0)
 			log.u_bbr.flex1 = rack->r_ctl.rc_prr_sndcnt;
@@ -18042,10 +16642,7 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 	/* Remove ack required flag if set, we have one  */
 	if (thflags & TH_ACK)
 		rack->rc_ack_required = 0;
-	if (rack->sack_attack_disable > 0) {
-		rack->r_ctl.ack_during_sd++;
-		rack_log_type_bbrsnd(rack, 0, 0, cts, tv, __LINE__);
-	}
+	rack_log_type_bbrsnd(rack, 0, 0, cts, tv, __LINE__);
 	if ((thflags & TH_SYN) && (thflags & TH_FIN) && V_drop_synfin) {
 		way_out = 4;
 		retval = 0;
@@ -18059,7 +16656,7 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 	if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) &&
 	    (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) {
 		tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT);
-		ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+		ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen);
 #ifdef TCP_ACCOUNTING
 		sched_unpin();
 #endif
@@ -18323,7 +16920,7 @@ do_output_now:
 		} else if ((nxt_pkt == 0) && (tp->t_flags & TF_ACKNOW)) {
 			goto do_output_now;
 		} else if ((no_output == 1) &&
-			   (nxt_pkt == 0)  &&
+			   (nxt_pkt == 0) &&
 			   (tcp_in_hpts(rack->rc_tp) == 0)) {
 			/*
 			 * We are not in hpts and we had a pacing timer up. Use
@@ -18441,7 +17038,6 @@ tcp_rack_output(struct tcpcb *tp, struct tcp_rack *rack, uint32_t tsused)
 	struct rack_sendmap *rsm = NULL;
 	int32_t idx;
 	uint32_t srtt = 0, thresh = 0, ts_low = 0;
-	int no_sack = 0;
 
 	/* Return the next guy to be re-transmitted */
 	if (tqhash_empty(rack->r_ctl.tqh)) {
@@ -18464,11 +17060,7 @@ tcp_rack_output(struct tcpcb *tp, struct tcp_rack *rack, uint32_t tsused)
 		return (NULL);
 	}
 check_it:
-	if (((rack->rc_tp->t_flags & TF_SACK_PERMIT) == 0) ||
-	    (rack->sack_attack_disable > 0)) {
-		no_sack = 1;
-	}
-	if ((no_sack > 0) &&
+	if (((rack->rc_tp->t_flags & TF_SACK_PERMIT) == 0) &&
 	    (rsm->r_dupack >= DUP_ACK_THRESHOLD)) {
 		/*
 		 * No sack so we automatically do the 3 strikes and
@@ -18498,8 +17090,7 @@ check_it:
 		return (NULL);
 	}
 	if ((rsm->r_dupack >= DUP_ACK_THRESHOLD) ||
-	    ((rsm->r_flags & RACK_SACK_PASSED) &&
-	     (rack->sack_attack_disable == 0))) {
+	    ((rsm->r_flags & RACK_SACK_PASSED))) {
 		/*
 		 * We have passed the dup-ack threshold <or>
 		 * a SACK has indicated this is missing.
@@ -18589,6 +17180,12 @@ rack_log_pacing_delay_calc (struct tcp_rack *rack, uint32_t len, uint32_t slot,
 		log.u_bbr.cwnd_gain |= rack->rc_gp_saw_ss;
 		log.u_bbr.cwnd_gain <<= 1;
 		log.u_bbr.cwnd_gain |= rack->rc_gp_saw_ca;
+		log.u_bbr.cwnd_gain <<= 1;
+		log.u_bbr.cwnd_gain |= rack->use_fixed_rate;
+		log.u_bbr.cwnd_gain <<= 1;
+		log.u_bbr.cwnd_gain |= rack->rc_always_pace;
+		log.u_bbr.cwnd_gain <<= 1;
+		log.u_bbr.cwnd_gain |= rack->gp_ready;
 		log.u_bbr.bbr_substate = quality;
 		log.u_bbr.bbr_state = rack->dgp_on;
 		log.u_bbr.bbr_state <<= 1;
@@ -18755,7 +17352,7 @@ at_lt_bw:
 			union tcp_log_stackspecific log;
 			struct timeval tv;
 
-			memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+			memset(&log, 0, sizeof(log));
 			log.u_bbr.timeStamp = tcp_get_usecs(&tv);
 			log.u_bbr.flex1 = rack_bw_multipler;
 			log.u_bbr.flex2 = len;
@@ -18839,122 +17436,11 @@ at_lt_bw:
 		return (slot);
 }
 
-static uint32_t
-rack_policer_check_send(struct tcp_rack *rack, uint32_t len, uint32_t segsiz, uint32_t *needs)
-{
-	uint64_t calc;
-
-	rack->rc_policer_should_pace = 0;
-	calc = rack_policer_bucket_reserve * rack->r_ctl.policer_bucket_size;
-	calc /= 100;
-	/*
-	 * Now lets look at if we want more than is in the bucket <or>
-	 * we want more than is reserved in the bucket.
-	 */
-	if (rack_verbose_logging > 0)
-		policer_detection_log(rack, len, segsiz, calc, rack->r_ctl.current_policer_bucket, 8);
-	if ((calc > rack->r_ctl.current_policer_bucket) ||
-	    (len >= (rack->r_ctl.current_policer_bucket - calc))) {
-		/*
-		 * We may want to pace depending on if we are going
-		 * into the reserve or not.
-		 */
-		uint32_t newlen;
-
-		if (calc > rack->r_ctl.current_policer_bucket) {
-			/*
-			 * This will eat into the reserve if we
-			 * don't have room at all some lines
-			 * below will catch it.
-			 */
-			newlen = rack->r_ctl.policer_max_seg;
-			rack->rc_policer_should_pace = 1;
-		} else {
-			/*
-			 * We have all of the reserve plus something in the bucket
-			 * that we can give out.
-			 */
-			newlen = rack->r_ctl.current_policer_bucket - calc;
-			if (newlen < rack->r_ctl.policer_max_seg) {
-				/*
-				 * Into the reserve to get a full policer_max_seg
-				 * so we set the len to that and eat into
-				 * the reserve. If we go over the code
-				 * below will make us wait.
-				 */
-				newlen = rack->r_ctl.policer_max_seg;
-				rack->rc_policer_should_pace = 1;
-			}
-		}
-		if (newlen > rack->r_ctl.current_policer_bucket) {
-			/* We have to wait some */
-			*needs = newlen - rack->r_ctl.current_policer_bucket;
-			return (0);
-		}
-		if (rack_verbose_logging > 0)
-			policer_detection_log(rack, len, segsiz, newlen, 0, 9);
-		len = newlen;
-	} /* else we have all len available above the reserve */
-	if (rack_verbose_logging > 0)
-		policer_detection_log(rack, len, segsiz, calc, 0, 10);
-	return (len);
-}
-
-static uint32_t
-rack_policed_sending(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, uint32_t segsiz, int call_line)
-{
-	/*
-	 * Given a send of len, and a token bucket set at current_policer_bucket_size
-	 * are we close enough to the end of the bucket that we need to pace? If so
-	 * calculate out a time and return it. Otherwise subtract the tokens from
-	 * the bucket.
-	 */
-	uint64_t calc;
-
-	if ((rack->r_ctl.policer_bw == 0) ||
-	    (rack->r_ctl.policer_bucket_size < segsiz)) {
-		/*
-		 * We should have an estimate here...
-		 */
-		return (0);
-	}
-	calc = (uint64_t)rack_policer_bucket_reserve * (uint64_t)rack->r_ctl.policer_bucket_size;
-	calc /= 100;
-	if ((rack->r_ctl.current_policer_bucket < len) ||
-	    (rack->rc_policer_should_pace == 1) ||
-	    ((rack->r_ctl.current_policer_bucket - len) <= (uint32_t)calc)) {
-		/* we need to pace */
-		uint64_t lentim, res;
-		uint32_t slot;
-
-		lentim = (uint64_t)len * (uint64_t)HPTS_USEC_IN_SEC;
-		res = lentim / rack->r_ctl.policer_bw;
-		slot = (uint32_t)res;
-		if (rack->r_ctl.current_policer_bucket > len)
-			rack->r_ctl.current_policer_bucket -= len;
-		else
-			rack->r_ctl.current_policer_bucket = 0;
-		policer_detection_log(rack, len, slot, (uint32_t)rack_policer_bucket_reserve, call_line, 5);
-		rack->rc_policer_should_pace = 0;
-		return(slot);
-	}
-	/* Just take tokens out of the bucket and let rack do whatever it would have */
-	policer_detection_log(rack, len, 0, (uint32_t)rack_policer_bucket_reserve, call_line, 6);
-	if (len < rack->r_ctl.current_policer_bucket) {
-		rack->r_ctl.current_policer_bucket -= len;
-	} else {
-		rack->r_ctl.current_policer_bucket = 0;
-	}
-	return (0);
-}
-
-
 static int32_t
 rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, struct rack_sendmap *rsm, uint32_t segsiz, int line)
 {
 	uint64_t srtt;
 	int32_t slot = 0;
-	int32_t minslot = 0;
 	int can_start_hw_pacing = 1;
 	int err;
 	int pace_one;
@@ -18964,25 +17450,6 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
 		pace_one = 1;
 	else
 		pace_one = 0;
-	if (rack->rc_policer_detected == 1) {
-		/*
-		 * A policer has been detected and we
-		 * have all of our data (policer-bw and
-		 * policer bucket size) calculated. Call
-		 * into the function to find out if we are
-		 * overriding the time.
-		 */
-		slot = rack_policed_sending(rack, tp, len, segsiz, line);
-		if (slot) {
-			uint64_t logbw;
-
-			logbw = rack->r_ctl.current_policer_bucket;
-			logbw <<= 32;
-			logbw |= rack->r_ctl.policer_bucket_size;
-			rack_log_pacing_delay_calc(rack, len, slot, rack->r_ctl.policer_bw, logbw, 0, 89, __LINE__, NULL, 0);
-			return(slot);
-		}
-	}
 	if (rack->rc_always_pace == 0) {
 		/*
 		 * We use the most optimistic possible cwnd/srtt for
@@ -18992,7 +17459,7 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
 		 * the peer to have a gap in data sending.
 		 */
 		uint64_t cwnd, tr_perms = 0;
-		int32_t reduce = 0;
+		int32_t reduce;
 
 	old_method:
 		/*
@@ -19029,7 +17496,8 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
 				slot -= reduce;
 			} else
 				slot = 0;
-		}
+		} else
+			reduce = 0;
 		slot *= HPTS_USEC_IN_MSEC;
 		if (rack->rc_pace_to_cwnd) {
 			uint64_t rate_wanted = 0;
@@ -19079,8 +17547,8 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
 						   rack->r_ctl.rc_last_us_rtt,
 						   88, __LINE__, NULL, gain);
 		}
-		if ((bw_est == 0) || (rate_wanted == 0) ||
-		    ((rack->gp_ready == 0) && (rack->use_fixed_rate == 0))) {
+		if (((bw_est == 0) || (rate_wanted == 0) || (rack->gp_ready == 0)) &&
+		    (rack->use_fixed_rate == 0)) {
 			/*
 			 * No way yet to make a b/w estimate or
 			 * our raise is set incorrectly.
@@ -19305,11 +17773,6 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
 				}
 			}
 		}
-		if (minslot && (minslot > slot)) {
-			rack_log_pacing_delay_calc(rack, minslot, slot, rack->r_ctl.crte->rate, bw_est, lentim,
-						   98, __LINE__, NULL, 0);
-			slot = minslot;
-		}
 	done_w_hdwr:
 		if (rack_limit_time_with_srtt &&
 		    (rack->use_fixed_rate == 0) &&
@@ -19536,7 +17999,7 @@ start_set:
 		rack_log_pacing_delay_calc(rack,
 					   tp->gput_seq,
 					   tp->gput_ack,
-					   (uint64_t)my_rsm,
+					   (uintptr_t)my_rsm,
 					   tp->gput_ts,
 					   (((uint64_t)rack->r_ctl.rc_app_limited_cnt << 32) | (uint64_t)rack->r_ctl.rc_gp_output_ts),
 					   9,
@@ -19589,7 +18052,7 @@ use_latest:
 	rack_log_pacing_delay_calc(rack,
 				   tp->gput_seq,
 				   tp->gput_ack,
-				   (uint64_t)my_rsm,
+				   (uintptr_t)my_rsm,
 				   tp->gput_ts,
 				   (((uint64_t)rack->r_ctl.rc_app_limited_cnt << 32) | (uint64_t)rack->r_ctl.rc_gp_output_ts),
 				   9, __LINE__, NULL, 0);
@@ -19647,7 +18110,7 @@ rack_log_fsb(struct tcp_rack *rack, struct tcpcb *tp, struct socket *so, uint32_
 		union tcp_log_stackspecific log;
 		struct timeval tv;
 
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
 		log.u_bbr.flex1 = error;
 		log.u_bbr.flex2 = flags;
@@ -19912,7 +18375,7 @@ rack_log_queue_level(struct tcpcb *tp, struct tcp_rack *rack,
 	err = in_pcbquery_txrlevel(rack->rc_inp, &p_queue);
 	err = in_pcbquery_txrtlmt(rack->rc_inp,	&p_rate);
 #endif
-	memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+	memset(&log, 0, sizeof(log));
 	log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
 	log.u_bbr.flex1 = p_rate;
 	log.u_bbr.flex2 = p_queue;
@@ -20365,7 +18828,7 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
 			counter_u64_add(rack_collapsed_win_rxt, 1);
 			counter_u64_add(rack_collapsed_win_rxt_bytes, (rsm->r_end - rsm->r_start));
 		}
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
 		if (rack->rack_no_prr)
 			log.u_bbr.flex1 = 0;
@@ -20388,7 +18851,7 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
 		log.u_bbr.pkts_out = tp->t_maxseg;
 		log.u_bbr.timeStamp = cts;
 		log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
-		if (rsm && (rsm->r_rtr_cnt > 0)) {
+		if (rsm->r_rtr_cnt > 0) {
 			/*
 			 * When we have a retransmit we want to log the
 			 * burst at send and flight at send from before.
@@ -20405,7 +18868,7 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
 		}
 		log.u_bbr.lt_epoch = rack->r_ctl.cwnd_to_use;
 		log.u_bbr.delivered = 0;
-		log.u_bbr.rttProp = (uint64_t)rsm;
+		log.u_bbr.rttProp = (uintptr_t)rsm;
 		log.u_bbr.delRate = rsm->r_flags;
 		log.u_bbr.delRate <<= 31;
 		log.u_bbr.delRate |= rack->r_must_retran;
@@ -20515,11 +18978,7 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
 	crtsc = get_cyclecount();
 	if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 		tp->tcp_cnt_counters[SND_OUT_DATA] += cnt_thru;
-	}
-	if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 		tp->tcp_proc_time[SND_OUT_DATA] += (crtsc - ts_val);
-	}
-	if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 		tp->tcp_cnt_counters[CNT_OF_MSS_OUT] += ((len + segsiz - 1) / segsiz);
 	}
 	sched_unpin();
@@ -20588,7 +19047,7 @@ rack_sndbuf_autoscale(struct tcp_rack *rack)
 
 static int
 rack_fast_output(struct tcpcb *tp, struct tcp_rack *rack, uint64_t ts_val,
-		 uint32_t cts, uint32_t ms_cts, struct timeval *tv, long tot_len, int *send_err)
+		 uint32_t cts, uint32_t ms_cts, struct timeval *tv, long *tot_len, int *send_err, int line)
 {
 	/*
 	 * Enter to do fast output. We are given that the sched_pin is
@@ -20761,7 +19220,7 @@ again:
 	}
 	if (rack->r_ctl.fsb.rfo_apply_push &&
 	    (len == rack->r_ctl.fsb.left_to_send)) {
-		tcp_set_flags(th, flags | TH_PUSH);
+		flags |= TH_PUSH;
 		add_flag |= RACK_HAD_PUSH;
 	}
 	if ((m->m_next == NULL) || (len <= 0)){
@@ -20918,7 +19377,7 @@ again:
 	if (tcp_bblogging_on(rack->rc_tp)) {
 		union tcp_log_stackspecific log;
 
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
 		if (rack->rack_no_prr)
 			log.u_bbr.flex1 = 0;
@@ -20940,11 +19399,11 @@ again:
 		log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
 		log.u_bbr.flex5 = log.u_bbr.inflight;
 		log.u_bbr.lt_epoch = rack->r_ctl.cwnd_to_use;
-		log.u_bbr.delivered = 0;
+		log.u_bbr.delivered = rack->r_ctl.fsb.left_to_send;
 		log.u_bbr.rttProp = 0;
 		log.u_bbr.delRate = rack->r_must_retran;
 		log.u_bbr.delRate <<= 1;
-		log.u_bbr.pkt_epoch = __LINE__;
+		log.u_bbr.pkt_epoch = line;
 		/* For fast output no retrans so just inflight and how many mss we send */
 		log.u_bbr.flex5 = log.u_bbr.inflight;
 		log.u_bbr.bbr_substate = (uint8_t)((len + segsiz - 1)/segsiz);
@@ -21008,7 +19467,6 @@ again:
 	}
 	rack_log_output(tp, &to, len, tp->snd_max, flags, error, rack_to_usec_ts(tv),
 			NULL, add_flag, s_mb, s_soff, rack->r_ctl.fsb.hw_tls, segsiz);
-	m = NULL;
 	if (tp->snd_una == tp->snd_max) {
 		rack->r_ctl.rc_tlp_rxt_last_time = cts;
 		rack_log_progress_event(rack, tp, ticks, PROGRESS_START, __LINE__);
@@ -21018,7 +19476,7 @@ again:
 	tcp_account_for_send(tp, len, 0, 0, rack->r_ctl.fsb.hw_tls);
 
 	rack->forced_ack = 0;	/* If we send something zap the FA flag */
-	tot_len += len;
+	*tot_len += len;
 	if ((tp->t_flags & TF_GPUTINPROG) == 0)
 		rack_start_gp_measurement(tp, rack, tp->snd_max, sb_offset);
 	tp->snd_max += len;
@@ -21043,9 +19501,9 @@ again:
 		rack->r_fast_output = 0;
 		rack->r_ctl.fsb.left_to_send = 0;
 		/* At the end of fast_output scale up the sb */
-		SOCKBUF_LOCK(&rack->rc_inp->inp_socket->so_snd);
+		SOCK_SENDBUF_LOCK(rack->rc_inp->inp_socket);
 		rack_sndbuf_autoscale(rack);
-		SOCKBUF_UNLOCK(&rack->rc_inp->inp_socket->so_snd);
+		SOCK_SENDBUF_UNLOCK(rack->rc_inp->inp_socket);
 	}
 	if (tp->t_rtttime == 0) {
 		tp->t_rtttime = ticks;
@@ -21054,6 +19512,7 @@ again:
 	}
 	if ((rack->r_ctl.fsb.left_to_send >= segsiz) &&
 	    (max_val > len) &&
+	    (*tot_len < rack->r_ctl.rc_pace_max_segs) &&
 	    (tso == 0)) {
 		max_val -= len;
 		len = segsiz;
@@ -21065,18 +19524,14 @@ again:
 	}
 	tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
 	counter_u64_add(rack_fto_send, 1);
-	slot = rack_get_pacing_delay(rack, tp, tot_len, NULL, segsiz, __LINE__);
-	rack_start_hpts_timer(rack, tp, cts, slot, tot_len, 0);
+	slot = rack_get_pacing_delay(rack, tp, *tot_len, NULL, segsiz, __LINE__);
+	rack_start_hpts_timer(rack, tp, cts, slot, *tot_len, 0);
 #ifdef TCP_ACCOUNTING
 	crtsc = get_cyclecount();
 	if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 		tp->tcp_cnt_counters[SND_OUT_DATA] += cnt_thru;
-	}
-	if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 		tp->tcp_proc_time[SND_OUT_DATA] += (crtsc - ts_val);
-	}
-	if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
-		tp->tcp_cnt_counters[CNT_OF_MSS_OUT] += ((tot_len + segsiz - 1) / segsiz);
+		tp->tcp_cnt_counters[CNT_OF_MSS_OUT] += ((*tot_len + segsiz - 1) / segsiz);
 	}
 	sched_unpin();
 #endif
@@ -21189,25 +19644,6 @@ restart:
 	return (NULL);
 }
 
-static void
-rack_credit_back_policer_idle_time(struct tcp_rack *rack, uint64_t idle_t, int line)
-{
-	/*
-	 * We were idle some time (idle_t) and so our policer bucket
-	 * needs to grow. It can go no higher than policer_bucket_size.
-	 */
-	uint64_t len;
-
-	len = idle_t * rack->r_ctl.policer_bw;
-	len /= HPTS_USEC_IN_SEC;
-	rack->r_ctl.current_policer_bucket += (uint32_t)len;
-	if (rack->r_ctl.policer_bucket_size < rack->r_ctl.current_policer_bucket) {
-		rack->r_ctl.current_policer_bucket = rack->r_ctl.policer_bucket_size;
-	}
-	if (rack_verbose_logging > 0)
-		policer_detection_log(rack, (uint32_t)len, line, (uint32_t)idle_t, 0, 7);
-}
-
 static inline void
 rack_validate_sizes(struct tcp_rack *rack, int32_t *len, int32_t segsiz, uint32_t pace_max_seg)
 {
@@ -21425,8 +19861,6 @@ rack_output(struct tcpcb *tp)
 		crtsc = get_cyclecount();
 		if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 			tp->tcp_proc_time[SND_BLOCKED] += (crtsc - ts_val);
-		}
-		if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 			tp->tcp_cnt_counters[SND_BLOCKED]++;
 		}
 		sched_unpin();
@@ -21459,20 +19893,36 @@ rack_output(struct tcpcb *tp)
 	    TCPS_HAVEESTABLISHED(tp->t_state)) {
 		rack_set_state(tp, rack);
 	}
+	segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
+	minseg = segsiz;
+	if (rack->r_ctl.rc_pace_max_segs == 0)
+		pace_max_seg = rack->rc_user_set_max_segs * segsiz;
+	else
+		pace_max_seg = rack->r_ctl.rc_pace_max_segs;
 	if ((rack->r_fast_output) &&
 	    (doing_tlp == 0) &&
 	    (tp->rcv_numsacks == 0)) {
 		int ret;
 
 		error = 0;
-		ret = rack_fast_output(tp, rack, ts_val, cts, ms_cts, &tv, tot_len_this_send, &error);
-		if (ret >= 0)
+		ret = rack_fast_output(tp, rack, ts_val, cts, ms_cts, &tv, &tot_len_this_send, &error, __LINE__);
+		if (ret > 0)
 			return(ret);
 		else if (error) {
 			inp = rack->rc_inp;
 			so = inp->inp_socket;
 			sb = &so->so_snd;
 			goto nomore;
+		} else {
+			/* Return == 0, if there is more we can send tot_len wise fall through and send */
+			if (tot_len_this_send >= pace_max_seg) 
+				return (ret);
+#ifdef TCP_ACCOUNTING
+			/* We need to re-pin since fast_output un-pined */
+			sched_pin();
+			ts_val = get_cyclecount();
+#endif
+			/* Fall back out so we can send any more that may bring us to pace_max_seg */
 		}
 	}
 	inp = rack->rc_inp;
@@ -21486,10 +19936,11 @@ rack_output(struct tcpcb *tp)
 	     (tp->t_state == TCPS_SYN_SENT)) &&
 	    SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN or SYN|ACK sent */
 	    (tp->t_rxtshift == 0)) {              /* not a retransmit */
-		cwnd_to_use = rack->r_ctl.cwnd_to_use = tp->snd_cwnd;
-		so = inp->inp_socket;
-		sb = &so->so_snd;
-		goto just_return_nolock;
+		rack_start_hpts_timer(rack, tp, cts, 0, 0, 0);
+#ifdef TCP_ACCOUNTING
+		sched_unpin();
+#endif
+		return (0);
 	}
 	/*
 	 * Determine length of data that should be transmitted, and flags
@@ -21524,35 +19975,14 @@ rack_output(struct tcpcb *tp)
 				rack_exit_probertt(rack, cts);
 			}
 		}
-	}
-	if(rack->policer_detect_on) {
-		/*
-		 * If we are doing policer detetion we at a minium
-		 * record the time but if possible add back to
-		 * the bucket based on the idle time.
-		 */
-		uint64_t idle_t, u64_cts;
-
-		segsiz = min(ctf_fixed_maxseg(tp),
-			     rack->r_ctl.rc_pace_min_segs);
-		u64_cts = tcp_tv_to_lusectick(&tv);
-		if ((rack->rc_policer_detected == 1) &&
-		    (rack->r_ctl.policer_bucket_size > segsiz) &&
-		    (rack->r_ctl.policer_bw > 0) &&
-		    (u64_cts > rack->r_ctl.last_sendtime)) {
-			/* We are being policed add back the time */
-			idle_t = u64_cts - rack->r_ctl.last_sendtime;
-			rack_credit_back_policer_idle_time(rack, idle_t, __LINE__);
-		}
-		rack->r_ctl.last_sendtime = u64_cts;
-	}
+	} else
+		tot_idle = 0;
 	if (rack_use_fsb &&
 	    (rack->r_ctl.fsb.tcp_ip_hdr) &&
 	    (rack->r_fsb_inited == 0) &&
 	    (rack->r_state != TCPS_CLOSED))
 		rack_init_fsb_block(tp, rack, tcp_outflags[tp->t_state]);
 	if (rack->rc_sendvars_notset == 1) {
-		rack->r_ctl.idle_snd_una = tp->snd_una;
 		rack->rc_sendvars_notset = 0;
 		/*
 		 * Make sure any TCP timers (keep-alive) is not running.
@@ -21599,12 +20029,6 @@ again:
 	ms_cts = tcp_tv_to_mssectick(&tv);
 	tso = 0;
 	mtu = 0;
-	segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
-	minseg = segsiz;
-	if (rack->r_ctl.rc_pace_max_segs == 0)
-		pace_max_seg = rack->rc_user_set_max_segs * segsiz;
-	else
-		pace_max_seg = rack->r_ctl.rc_pace_max_segs;
 	if (TCPS_HAVEESTABLISHED(tp->t_state) &&
 	    (rack->r_ctl.pcm_max_seg == 0)) {
 		/*
@@ -21620,7 +20044,7 @@ again:
 			rack->r_ctl.pcm_max_seg = ctf_fixed_maxseg(tp) * 10;
 		}
 	}
-	if ((rack->r_ctl.pcm_max_seg != 0)  && (rack->pcm_needed == 1)) {
+	if ((rack->r_ctl.pcm_max_seg != 0) && (rack->pcm_needed == 1)) {
 		uint32_t rw_avail, cwa;
 
 		if (tp->snd_wnd > ctf_outstanding(tp))
@@ -21664,7 +20088,7 @@ again:
 	len = 0;
 	rsm = NULL;
 	if (flags & TH_RST) {
-		SOCKBUF_LOCK(&inp->inp_socket->so_snd);
+		SOCK_SENDBUF_LOCK(inp->inp_socket);
 		so = inp->inp_socket;
 		sb = &so->so_snd;
 		goto send;
@@ -21841,7 +20265,6 @@ again:
 	 * as long as we are not retransmiting.
 	 */
 	if ((rsm == NULL) &&
-	    (rack->do_detection == 0) &&
 	    (V_tcp_map_entries_limit > 0) &&
 	    (rack->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
 		counter_u64_add(rack_to_alloc_limited, 1);
@@ -21869,19 +20292,10 @@ again:
 	    ((rsm->r_flags & RACK_HAS_FIN) == 0)) {
 		int ret;
 
-		if ((rack->rc_policer_detected == 1) &&
-		    (rack->r_ctl.policer_bucket_size > segsiz) &&
-		    (rack->r_ctl.policer_bw > 0)) {
-			/* Check to see if there is room */
-			if (rack->r_ctl.current_policer_bucket < len) {
-				goto skip_fast_output;
-			}
-		}
 		ret = rack_fast_rsm_output(tp, rack, rsm, ts_val, cts, ms_cts, &tv, len, doing_tlp);
 		if (ret == 0)
 			return (0);
 	}
-skip_fast_output:
 	so = inp->inp_socket;
 	sb = &so->so_snd;
 	if (do_a_prefetch == 0) {
@@ -21933,7 +20347,7 @@ skip_fast_output:
 			kern_prefetch(end_rsm, &prefetch_rsm);
 		prefetch_rsm = 1;
 	}
-	SOCKBUF_LOCK(sb);
+	SOCK_SENDBUF_LOCK(so);
 	if ((sack_rxmit == 0) &&
 	    (TCPS_HAVEESTABLISHED(tp->t_state) ||
 	    (tp->t_flags & TF_FASTOPEN))) {
@@ -22072,43 +20486,6 @@ skip_fast_output:
 		prefetch_so_done = 1;
 	}
 	orig_len = len;
-	if ((rack->rc_policer_detected == 1) &&
-	    (rack->r_ctl.policer_bucket_size > segsiz) &&
-	    (rack->r_ctl.policer_bw > 0) &&
-	    (len > 0)) {
-		/*
-		 * Ok we believe we have a policer watching
-		 * what we send, can we send len? If not can
-		 * we tune it down to a smaller value?
-		 */
-		uint32_t plen, buck_needs;
-
-		plen = rack_policer_check_send(rack, len, segsiz, &buck_needs);
-		if (plen == 0) {
-			/*
-			 * We are not allowed to send. How long
-			 * do we need to pace for i.e. how long
-			 * before len is available to send?
-			 */
-			uint64_t lentime;
-
-			lentime = buck_needs;
-			lentime *= HPTS_USEC_IN_SEC;
-			lentime /= rack->r_ctl.policer_bw;
-			slot = (uint32_t)lentime;
-			tot_len_this_send = 0;
-			SOCKBUF_UNLOCK(sb);
-			if (rack_verbose_logging > 0)
-				policer_detection_log(rack, len, slot, buck_needs, 0, 12);
-			rack_start_hpts_timer(rack, tp, cts, slot, 0, 0);
-			rack_log_type_just_return(rack, cts, 0, slot, hpts_calling, 0, cwnd_to_use);
-			goto just_return_clean;
-		}
-		if (plen < len) {
-			sendalot = 0;
-			len = plen;
-		}
-	}
 	/*
 	 * Lop off SYN bit if it has already been sent.  However, if this is
 	 * SYN-SENT state and if segment contains data and if we don't know
@@ -22308,7 +20685,7 @@ skip_fast_output:
 	if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > segsiz &&
 	    (tp->t_port == 0) &&
 	    ((tp->t_flags & TF_SIGNATURE) == 0) &&
-	    tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
+	    sack_rxmit == 0 &&
 	    ipoptlen == 0)
 		tso = 1;
 	{
@@ -22480,7 +20857,7 @@ dontupdate:
 	 * No reason to send a segment, just return.
 	 */
 just_return:
-	SOCKBUF_UNLOCK(sb);
+	SOCK_SENDBUF_UNLOCK(so);
 just_return_nolock:
 	{
 		int app_limited = CTF_JR_SENT_DATA;
@@ -22507,14 +20884,13 @@ just_return_nolock:
 			rack->r_ctl.fsb.recwin = recwin;
 			slot = rack_get_pacing_delay(rack, tp, tot_len_this_send, NULL, segsiz, __LINE__);
 			if ((error == 0) &&
-			    (rack->rc_policer_detected == 0)  &&
 			    rack_use_rfo &&
 			    ((flags & (TH_SYN|TH_FIN)) == 0) &&
 			    (ipoptlen == 0) &&
-			    (tp->rcv_numsacks == 0) &&
 			    rack->r_fsb_inited &&
 			    TCPS_HAVEESTABLISHED(tp->t_state) &&
 			    ((IN_RECOVERY(tp->t_flags)) == 0) &&
+			    (doing_tlp == 0) &&
 			    (rack->r_must_retran == 0) &&
 			    ((tp->t_flags & TF_NEEDFIN) == 0) &&
 			    (len > 0) && (orig_len > 0) &&
@@ -22656,7 +21032,7 @@ just_return_nolock:
 					} else
 						log = 1;
 				}
-				/* Mark the last packet has app limited */
+				/* Mark the last packet as app limited */
 				rsm = tqhash_max(rack->r_ctl.tqh);
 				if (rsm && ((rsm->r_flags & RACK_APP_LIMITED) == 0)) {
 					if (rack->r_ctl.rc_app_limited_cnt == 0)
@@ -22692,7 +21068,6 @@ just_return_nolock:
 		rack_start_hpts_timer(rack, tp, cts, slot, tot_len_this_send, sup_rack);
 		rack_log_type_just_return(rack, cts, tot_len_this_send, slot, hpts_calling, app_limited, cwnd_to_use);
 	}
-just_return_clean:
 #ifdef NETFLIX_SHARED_CWND
 	if ((sbavail(sb) == 0) &&
 	    rack->r_ctl.rc_scw) {
@@ -22705,19 +21080,13 @@ just_return_clean:
 		crtsc = get_cyclecount();
 		if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 			tp->tcp_cnt_counters[SND_OUT_DATA]++;
-		}
-		if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 			tp->tcp_proc_time[SND_OUT_DATA] += (crtsc - ts_val);
-		}
-		if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 			tp->tcp_cnt_counters[CNT_OF_MSS_OUT] += ((tot_len_this_send + segsiz - 1) / segsiz);
 		}
 	} else {
 		crtsc = get_cyclecount();
 		if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 			tp->tcp_cnt_counters[SND_LIMITED]++;
-		}
-		if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 			tp->tcp_proc_time[SND_LIMITED] += (crtsc - ts_val);
 		}
 	}
@@ -22742,7 +21111,7 @@ send:
 			rack->r_ctl.rc_agg_early = 0;
 			rack->r_early = 0;
 			rack->r_late = 0;
-			SOCKBUF_UNLOCK(&so->so_snd);
+			SOCK_SENDBUF_UNLOCK(so);
 			goto skip_all_send;
 		}
 	}
@@ -22759,7 +21128,8 @@ send:
 		 * is acked first.
 		 */
 		flags &= ~TH_FIN;
-		if ((sbused(sb) == (tp->snd_max - tp->snd_una)) &&
+		if (TCPS_HAVEESTABLISHED(tp->t_state) &&
+		    (sbused(sb) == (tp->snd_max - tp->snd_una)) &&
 		    ((tp->snd_max - tp->snd_una) <= segsiz)) {
 			/*
 			 * Ok less than or right at a MSS is
@@ -22958,13 +21328,11 @@ send:
 	if (tp->t_port) {
 		if (V_tcp_udp_tunneling_port == 0) {
 			/* The port was removed?? */
-			SOCKBUF_UNLOCK(&so->so_snd);
+			SOCK_SENDBUF_UNLOCK(so);
 #ifdef TCP_ACCOUNTING
 			crtsc = get_cyclecount();
 			if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 				tp->tcp_cnt_counters[SND_OUT_FAIL]++;
-			}
-			if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 				tp->tcp_proc_time[SND_OUT_FAIL] += (crtsc - ts_val);
 			}
 			sched_unpin();
@@ -23016,7 +21384,8 @@ send:
 				if (max_len <= 0) {
 					len = 0;
 				} else if (len > max_len) {
-					sendalot = 1;
+					if (doing_tlp == 0)
+						sendalot = 1;
 					len = max_len;
 					mark = 2;
 				}
@@ -23061,7 +21430,7 @@ send:
 				 * byte of the payload can be put into the
 				 * TCP segment.
 				 */
-				SOCKBUF_UNLOCK(&so->so_snd);
+				SOCK_SENDBUF_UNLOCK(so);
 				error = EMSGSIZE;
 				sack_rxmit = 0;
 				goto out;
@@ -23144,7 +21513,7 @@ send:
 			m = m_gethdr(M_NOWAIT, MT_DATA);
 
 		if (m == NULL) {
-			SOCKBUF_UNLOCK(sb);
+			SOCK_SENDBUF_UNLOCK(so);
 			error = ENOBUFS;
 			sack_rxmit = 0;
 			goto out;
@@ -23202,7 +21571,7 @@ send:
 				tso = 0;
 			}
 			if (m->m_next == NULL) {
-				SOCKBUF_UNLOCK(sb);
+				SOCK_SENDBUF_UNLOCK(so);
 				(void)m_free(m);
 				error = ENOBUFS;
 				sack_rxmit = 0;
@@ -23245,10 +21614,9 @@ send:
 			flags |= TH_PUSH;
 			add_flag |= RACK_HAD_PUSH;
 		}
-
-		SOCKBUF_UNLOCK(sb);
+		SOCK_SENDBUF_UNLOCK(so);
 	} else {
-		SOCKBUF_UNLOCK(sb);
+		SOCK_SENDBUF_UNLOCK(so);
 		if (tp->t_flags & TF_ACKNOW)
 			KMOD_TCPSTAT_INC(tcps_sndacks);
 		else if (flags & (TH_SYN | TH_FIN | TH_RST))
@@ -23271,7 +21639,7 @@ send:
 			m->m_data += max_linkhdr;
 		m->m_len = hdrlen;
 	}
-	SOCKBUF_UNLOCK_ASSERT(sb);
+	SOCK_SENDBUF_UNLOCK_ASSERT(so);
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
@@ -23538,7 +21906,7 @@ send:
 	if (tcp_bblogging_on(rack->rc_tp)) {
 		union tcp_log_stackspecific log;
 
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
 		if (rack->rack_no_prr)
 			log.u_bbr.flex1 = 0;
@@ -23592,7 +21960,7 @@ send:
 		}
 		log.u_bbr.lt_epoch = cwnd_to_use;
 		log.u_bbr.delivered = sendalot;
-		log.u_bbr.rttProp = (uint64_t)rsm;
+		log.u_bbr.rttProp = (uintptr_t)rsm;
 		log.u_bbr.pkt_epoch = __LINE__;
 		if (rsm) {
 			log.u_bbr.delRate = rsm->r_flags;
@@ -23714,6 +22082,8 @@ out:
 	 * In transmit state, time the transmission and arrange for the
 	 * retransmit.  In persist state, just set snd_max.
 	 */
+	if ((rsm == NULL) &&  doing_tlp)
+		add_flag |= RACK_TLP;
 	rack_log_output(tp, &to, len, rack_seq, (uint8_t) flags, error,
 			rack_to_usec_ts(&tv),
 			rsm, add_flag, s_mb, s_moff, hw_tls, segsiz);
@@ -23800,15 +22170,14 @@ out:
 			rack->r_ctl.rc_prr_sndcnt = 0;
 	}
 	sub_from_prr = 0;
-	if (doing_tlp) {
-		/* Make sure the TLP is added */
-		add_flag |= RACK_TLP;
-	} else if (rsm) {
-		/* If its a resend without TLP then it must not have the flag */
-		rsm->r_flags &= ~RACK_TLP;
-	}
-
-
+	if (rsm != NULL) {
+		if (doing_tlp)
+			/* Make sure the TLP is added */
+			rsm->r_flags |= RACK_TLP;
+		else
+			/* If its a resend without TLP then it must not have the flag */
+			rsm->r_flags &= ~RACK_TLP;
+ 	}
 	if ((error == 0) &&
 	    (len > 0) &&
 	    (tp->snd_una == tp->snd_max))
@@ -23933,7 +22302,7 @@ out:
 				len = n_len;
 				sb_offset = tp->snd_max - tp->snd_una;
 				/* Re-lock for the next spin */
-				SOCKBUF_LOCK(sb);
+				SOCK_SENDBUF_LOCK(so);
 				goto send;
 			}
 		} else {
@@ -23952,7 +22321,7 @@ out:
 				len = n_len;
 				sb_offset = tp->snd_max - tp->snd_una;
 				/* Re-lock for the next spin */
-				SOCKBUF_LOCK(sb);
+				SOCK_SENDBUF_LOCK(so);
 				goto send;
 			}
 		}
@@ -23982,8 +22351,6 @@ nomore:
 			crtsc = get_cyclecount();
 			if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 				tp->tcp_cnt_counters[SND_OUT_FAIL]++;
-			}
-			if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 				tp->tcp_proc_time[SND_OUT_FAIL] += (crtsc - ts_val);
 			}
 			sched_unpin();
@@ -24037,8 +22404,6 @@ nomore:
 			crtsc = get_cyclecount();
 			if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 				tp->tcp_cnt_counters[SND_OUT_FAIL]++;
-			}
-			if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 				tp->tcp_proc_time[SND_OUT_FAIL] += (crtsc - ts_val);
 			}
 			sched_unpin();
@@ -24046,11 +22411,13 @@ nomore:
 			return (error);
 		case ENETUNREACH:
 			counter_u64_add(rack_saw_enetunreach, 1);
+			/* FALLTHROUGH */
 		case EHOSTDOWN:
 		case EHOSTUNREACH:
 		case ENETDOWN:
 			if (TCPS_HAVERCVDSYN(tp->t_state)) {
 				tp->t_softerror = error;
+				error = 0;
 			}
 			/* FALLTHROUGH */
 		default:
@@ -24060,8 +22427,6 @@ nomore:
 			crtsc = get_cyclecount();
 			if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 				tp->tcp_cnt_counters[SND_OUT_FAIL]++;
-			}
-			if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 				tp->tcp_proc_time[SND_OUT_FAIL] += (crtsc - ts_val);
 			}
 			sched_unpin();
@@ -24150,8 +22515,7 @@ enobufs:
 		    ((flags & (TH_SYN|TH_FIN)) == 0) &&
 		    (rsm == NULL) &&
 		    (ipoptlen == 0) &&
-		    (tp->rcv_numsacks == 0) &&
-		    (rack->rc_policer_detected == 0)  &&
+		    (doing_tlp == 0) &&
 		    rack->r_fsb_inited &&
 		    TCPS_HAVEESTABLISHED(tp->t_state) &&
 		    ((IN_RECOVERY(tp->t_flags)) == 0) &&
@@ -24178,8 +22542,8 @@ enobufs:
 		    rack_use_rfo &&
 		    ((flags & (TH_SYN|TH_FIN)) == 0) &&
 		    (rsm == NULL) &&
+		    (doing_tlp == 0) &&
 		    (ipoptlen == 0) &&
-		    (tp->rcv_numsacks == 0) &&
 		    (rack->r_must_retran == 0) &&
 		    rack->r_fsb_inited &&
 		    TCPS_HAVEESTABLISHED(tp->t_state) &&
@@ -24195,7 +22559,7 @@ enobufs:
 					       segsiz, pace_max_seg, hw_tls, flags);
 			if (rack->r_fast_output) {
 				error = 0;
-				ret = rack_fast_output(tp, rack, ts_val, cts, ms_cts, &tv, tot_len_this_send, &error);
+				ret = rack_fast_output(tp, rack, ts_val, cts, ms_cts, &tv, &tot_len_this_send, &error, __LINE__);
 				if (ret >= 0)
 					return (ret);
 			        else if (error)
@@ -24215,18 +22579,12 @@ skip_all_send:
 	if (tot_len_this_send) {
 		if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 			tp->tcp_cnt_counters[SND_OUT_DATA]++;
-		}
-		if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 			tp->tcp_proc_time[SND_OUT_DATA] += crtsc;
-		}
-		if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 			tp->tcp_cnt_counters[CNT_OF_MSS_OUT] += ((tot_len_this_send + segsiz - 1) /segsiz);
 		}
 	} else {
 		if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 			tp->tcp_cnt_counters[SND_OUT_ACK]++;
-		}
-		if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
 			tp->tcp_proc_time[SND_OUT_ACK] += crtsc;
 		}
 	}
@@ -24562,28 +22920,7 @@ process_hybrid_pacing(struct tcp_rack *rack, struct tcp_hybrid_req *hybrid)
 static int
 rack_stack_information(struct tcpcb *tp, struct stack_specific_info *si)
 {
-	/*
-	 * Gather rack specific information.
-	 */
-	struct tcp_rack *rack;
-
-	rack = (struct tcp_rack *)tp->t_fb_ptr;
 	/* We pulled a SSI info log out what was there */
-	policer_detection_log(rack, rack->rc_highly_buffered, 0, 0, 0, 20);
-	if (rack->policer_detect_on) {
-		si->policer_detection_enabled = 1;
-		if (rack->rc_policer_detected) {
-			si->policer_detected = 1;
-			si->policer_bucket_size = rack->r_ctl.policer_bucket_size;
-			si->policer_last_bw = rack->r_ctl.policer_bw;
-		} else {
-			si->policer_detected = 0;
-			si->policer_bucket_size = 0;
-			si->policer_last_bw = 0;
-		}
-		si->current_round = rack->r_ctl.current_round;
-		si->highly_buffered = rack->rc_highly_buffered;
-	}
 	si->bytes_transmitted = tp->t_sndbytes;
 	si->bytes_retransmitted = tp->t_snd_rxt_bytes;
 	return (0);
@@ -24603,7 +22940,7 @@ rack_process_option(struct tcpcb *tp, struct tcp_rack *rack, int sopt_name,
 
 	switch (sopt_name) {
 	case TCP_RACK_SET_RXT_OPTIONS:
-		if ((optval >= 0) && (optval <= 2)) {
+		if (optval <= 2) {
 			rack_init_retransmit_value(rack, optval);
 		} else {
 			/*
@@ -24650,7 +22987,7 @@ rack_process_option(struct tcpcb *tp, struct tcp_rack *rack, int sopt_name,
 				rack->r_ctl.saved_hibeta = optval;
 				if (rack->rc_pacing_cc_set)
 					rack_undo_cc_pacing(rack);
-				rack->r_ctl.rc_saved_beta.beta = optval;
+				rack->r_ctl.rc_saved_beta = optval;
 			}
 			if (rack->rc_pacing_cc_set == 0)
 				rack_set_cc_pacing(rack);
@@ -24701,8 +23038,7 @@ rack_process_option(struct tcpcb *tp, struct tcp_rack *rack, int sopt_name,
 			 * Not pacing yet so set it into our local
 			 * rack pcb storage.
 			 */
-			rack->r_ctl.rc_saved_beta.beta_ecn = optval;
-			rack->r_ctl.rc_saved_beta.newreno_flags = CC_NEWRENO_BETA_ECN_ENABLED;
+			rack->r_ctl.rc_saved_beta_ecn = optval;
 		}
 		break;
 	case TCP_DEFER_OPTIONS:
@@ -24814,36 +23150,6 @@ rack_process_option(struct tcpcb *tp, struct tcp_rack *rack, int sopt_name,
 	case TCP_RACK_DGP_IN_REC:
 		error = EINVAL;
 		break;
-	case TCP_POLICER_DETECT:		/*  URL:pol_det */
-		RACK_OPTS_INC(tcp_pol_detect);
-		rack_translate_policer_detect(rack, optval);
-		break;
-	case TCP_POLICER_MSS:
-		RACK_OPTS_INC(tcp_pol_mss);
-		rack->r_ctl.policer_del_mss = (uint8_t)optval;
-		if (optval & 0x00000100) {
-			/*
-			 * Value is setup like so:
-			 * VVVV VVVV VVVV VVVV VVVV VVAI MMMM MMMM
-			 * Where MMMM MMMM is MSS setting
-			 * I (9th bit) is the Postive value that
-			 * says it is being set (if its 0 then the
-			 * upper bits 11 - 32 have no meaning.
-			 * This allows setting it off with
-			 * 0x000001MM.
-			 *
-			 * The 10th bit is used to turn on the
-			 * alternate median (not the expanded one).
-			 *
-			 */
-			rack->r_ctl.pol_bw_comp = (optval >> 10);
-		}
-		if (optval & 0x00000200) {
-			rack->r_ctl.policer_alt_median = 1;
-		} else {
-			rack->r_ctl.policer_alt_median = 0;
-		}
-		break;
  	case TCP_RACK_PACE_TO_FILL:
 		RACK_OPTS_INC(tcp_fillcw);
 		if (optval == 0)
@@ -24929,11 +23235,7 @@ rack_process_option(struct tcpcb *tp, struct tcp_rack *rack, int sopt_name,
 		}
 		break;
 	case TCP_RACK_DO_DETECTION:
-		RACK_OPTS_INC(tcp_rack_do_detection);
-		if (optval == 0)
-			rack->do_detection = 0;
-		else
-			rack->do_detection = 1;
+		error = EINVAL;
 		break;
 	case TCP_RACK_TLP_USE:
 		if ((optval < TLP_USE_ID) || (optval > TLP_USE_TWO_TWO)) {
@@ -25462,7 +23764,7 @@ rack_inherit(struct tcpcb *tp, struct inpcb *parent)
 	if (src->rack_hibeta != dest->rack_hibeta) {
 		cnt++;
 		if (src->rack_hibeta) {
-			dest->r_ctl.rc_saved_beta.beta = src->r_ctl.rc_saved_beta.beta;
+			dest->r_ctl.rc_saved_beta = src->r_ctl.rc_saved_beta;
 			dest->rack_hibeta = 1;
 		} else {
 			dest->rack_hibeta = 0;
@@ -25474,12 +23776,8 @@ rack_inherit(struct tcpcb *tp, struct inpcb *parent)
 		cnt++;
 	}
 	/* TCP_RACK_PACING_BETA_ECN */
-	if (dest->r_ctl.rc_saved_beta.beta_ecn != src->r_ctl.rc_saved_beta.beta_ecn) {
-		dest->r_ctl.rc_saved_beta.beta_ecn = src->r_ctl.rc_saved_beta.beta_ecn;
-		cnt++;
-	}
-	if (dest->r_ctl.rc_saved_beta.newreno_flags != src->r_ctl.rc_saved_beta.newreno_flags) {
-		dest->r_ctl.rc_saved_beta.newreno_flags = src->r_ctl.rc_saved_beta.newreno_flags;
+	if (dest->r_ctl.rc_saved_beta_ecn != src->r_ctl.rc_saved_beta_ecn) {
+		dest->r_ctl.rc_saved_beta_ecn = src->r_ctl.rc_saved_beta_ecn;
 		cnt++;
 	}
 	/* We do not do TCP_DEFER_OPTIONS */
@@ -25514,43 +23812,6 @@ rack_inherit(struct tcpcb *tp, struct inpcb *parent)
 		dest->r_limit_scw = src->r_limit_scw;
 		cnt++;
 	}
-	/* TCP_POLICER_DETECT */
-	if (dest->r_ctl.policer_rxt_threshold != src->r_ctl.policer_rxt_threshold) {
-		dest->r_ctl.policer_rxt_threshold = src->r_ctl.policer_rxt_threshold;
-		cnt++;
-	}
-	if (dest->r_ctl.policer_avg_threshold != src->r_ctl.policer_avg_threshold) {
-		dest->r_ctl.policer_avg_threshold = src->r_ctl.policer_avg_threshold;
-		cnt++;
-	}
-	if (dest->r_ctl.policer_med_threshold != src->r_ctl.policer_med_threshold) {
-		dest->r_ctl.policer_med_threshold = src->r_ctl.policer_med_threshold;
-		cnt++;
-	}
-	if (dest->policer_detect_on != src->policer_detect_on) {
-		dest->policer_detect_on = src->policer_detect_on;
-		cnt++;
-	}
-
-	if (dest->r_ctl.saved_policer_val != src->r_ctl.saved_policer_val) {
-		dest->r_ctl.saved_policer_val = src->r_ctl.saved_policer_val;
-		cnt++;
-	}
-	/* TCP_POLICER_MSS */
-	if (dest->r_ctl.policer_del_mss != src->r_ctl.policer_del_mss) {
-		dest->r_ctl.policer_del_mss = src->r_ctl.policer_del_mss;
-		cnt++;
-	}
-
-	if (dest->r_ctl.pol_bw_comp != src->r_ctl.pol_bw_comp) {
-		dest->r_ctl.pol_bw_comp = src->r_ctl.pol_bw_comp;
-		cnt++;
-	}
-
-	if (dest->r_ctl.policer_alt_median != src->r_ctl.policer_alt_median) {
-		dest->r_ctl.policer_alt_median = src->r_ctl.policer_alt_median;
-		cnt++;
-	}
 	/* TCP_RACK_PACE_TO_FILL */
 	if (dest->rc_pace_to_cwnd != src->rc_pace_to_cwnd) {
 		dest->rc_pace_to_cwnd = src->rc_pace_to_cwnd;
@@ -25625,11 +23886,6 @@ rack_inherit(struct tcpcb *tp, struct inpcb *parent)
 		dest->r_ctl.rack_per_of_gp_ca = src->r_ctl.rack_per_of_gp_ca;
 		cnt++;
 	}
-	/* TCP_RACK_DO_DETECTION */
-	if (dest->do_detection != src->do_detection) {
-		dest->do_detection = src->do_detection;
-		cnt++;
-	}
 	/* TCP_RACK_TLP_USE */
 	if (dest->rack_tlp_threshold_use != src->rack_tlp_threshold_use) {
 		dest->rack_tlp_threshold_use = src->rack_tlp_threshold_use;
@@ -25908,7 +24164,7 @@ static struct tcp_function_block __tcp_rack = {
 	.tfb_compute_pipe = rack_compute_pipe,
 	.tfb_stack_info = rack_stack_information,
 	.tfb_inherit = rack_inherit,
-	.tfb_flags = TCP_FUNC_OUTPUT_CANDROP,
+	.tfb_flags = TCP_FUNC_OUTPUT_CANDROP | TCP_FUNC_DEFAULT_OK,
 
 };
 
@@ -26007,8 +24263,6 @@ rack_set_sockopt(struct tcpcb *tp, struct sockopt *sopt)
 		case TCP_RACK_PACING_BETA_ECN:		/*  URL:pacing_beta_ecn */
 		case TCP_RACK_PACE_TO_FILL:		/*  URL:fillcw */
 			/* End pacing related */
-		case TCP_POLICER_DETECT:		/*  URL:pol_det */
-		case TCP_POLICER_MSS:			/*  URL:pol_mss */
 		case TCP_DELACK:			/*  URL:delack (in base TCP i.e. tcp_hints along with cc etc ) */
 		case TCP_RACK_PRR_SENDALOT:		/*  URL:prr_sendalot */
 		case TCP_RACK_MIN_TO:			/*  URL:min_to */
@@ -26020,7 +24274,6 @@ rack_set_sockopt(struct tcpcb *tp, struct sockopt *sopt)
 		case TCP_RACK_TLP_USE:			/*  URL:tlp_use */
 		case TCP_BBR_RACK_RTT_USE:		/*  URL:rttuse */
 		case TCP_BBR_USE_RACK_RR:		/*  URL:rackrr */
-		case TCP_RACK_DO_DETECTION:		/*  URL:detect */
 		case TCP_NO_PRR:			/*  URL:noprr */
 		case TCP_TIMELY_DYN_ADJ:      		/*  URL:dynamic */
 		case TCP_DATA_AFTER_CLOSE:		/*  no URL */
@@ -26199,20 +24452,34 @@ rack_get_sockopt(struct tcpcb *tp, struct sockopt *sopt)
 	 * when you exit recovery.
 	 */
 	case TCP_RACK_PACING_BETA:
+		if (strcmp(tp->t_cc->name, CCALGONAME_NEWRENO) != 0)
+			error = EINVAL;
+		else if (rack->rc_pacing_cc_set == 0)
+			optval = rack->r_ctl.rc_saved_beta;
+		else {
+			/*
+			 * Reach out into the CC data and report back what
+			 * I have previously set. Yeah it looks hackish but
+			 * we don't want to report the saved values.
+			 */
+			if (tp->t_ccv.cc_data)
+				optval = ((struct newreno *)tp->t_ccv.cc_data)->beta;
+			else
+				error = EINVAL;
+		}
 		break;
-		/*
-		 * Beta_ecn is the congestion control value for NewReno that influences how
-		 * much of a backoff happens when a ECN mark is detected. It is normally set
-		 * to 80 for 80% i.e. the cwnd is reduced by 20% of its previous value when
-		 * you exit recovery. Note that classic ECN has a beta of 50, it is only
-		 * ABE Ecn that uses this "less" value, but we do too with pacing :)
-		 */
-
+	/*
+	 * Beta_ecn is the congestion control value for NewReno that influences how
+	 * much of a backoff happens when a ECN mark is detected. It is normally set
+	 * to 80 for 80% i.e. the cwnd is reduced by 20% of its previous value when
+	 * you exit recovery. Note that classic ECN has a beta of 50, it is only
+	 * ABE Ecn that uses this "less" value, but we do too with pacing :)
+	 */
 	case TCP_RACK_PACING_BETA_ECN:
 		if (strcmp(tp->t_cc->name, CCALGONAME_NEWRENO) != 0)
 			error = EINVAL;
 		else if (rack->rc_pacing_cc_set == 0)
-			optval = rack->r_ctl.rc_saved_beta.beta_ecn;
+			optval = rack->r_ctl.rc_saved_beta_ecn;
 		else {
 			/*
 			 * Reach out into the CC data and report back what
@@ -26253,12 +24520,6 @@ rack_get_sockopt(struct tcpcb *tp, struct sockopt *sopt)
 	case TCP_RACK_HI_BETA:
 		optval = rack->rack_hibeta;
 		break;
-	case TCP_POLICER_MSS:
-		optval = rack->r_ctl.policer_del_mss;
-		break;
-	case TCP_POLICER_DETECT:
-		optval = rack->r_ctl.saved_policer_val;
-		break;
 	case TCP_DEFER_OPTIONS:
 		optval = rack->defer_options;
 		break;
@@ -26327,7 +24588,7 @@ rack_get_sockopt(struct tcpcb *tp, struct sockopt *sopt)
 		}
 		break;
 	case TCP_RACK_DO_DETECTION:
-		optval = rack->do_detection;
+		error = EINVAL;
 		break;
 	case TCP_RACK_MBUF_QUEUE:
 		/* Now do we use the LRO mbuf-queue feature */
diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.c b/sys/netinet/tcp_stacks/rack_bbr_common.c
index 4a4a8af2bd78..d1c4ba58bf55 100644
--- a/sys/netinet/tcp_stacks/rack_bbr_common.c
+++ b/sys/netinet/tcp_stacks/rack_bbr_common.c
@@ -361,26 +361,15 @@ ctf_process_inbound_raw(struct tcpcb *tp, struct mbuf *m, int has_pkt)
 	int32_t retval, nxt_pkt, tlen, off;
 	int etype = 0;
 	uint16_t drop_hdrlen;
-	uint8_t iptos, no_vn=0;
+	uint8_t iptos;
 
 	inp = tptoinpcb(tp);
 	INP_WLOCK_ASSERT(inp);
 	NET_EPOCH_ASSERT();
-
-	if (m)
-		ifp = m_rcvif(m);
-	else
-		ifp = NULL;
-	if (ifp == NULL) {
-		/*
-		 * We probably should not work around
-		 * but kassert, since lro alwasy sets rcvif.
-		 */
-		no_vn = 1;
-		goto skip_vnet;
-	}
+	KASSERT(m != NULL, ("ctf_process_inbound_raw: m == NULL"));
+	ifp = m_rcvif(m);
+	KASSERT(ifp != NULL, ("ctf_process_inbound_raw: ifp == NULL"));
 	CURVNET_SET(ifp->if_vnet);
-skip_vnet:
 	tcp_get_usecs(&tv);
 	while (m) {
 		m_save = m->m_nextpkt;
@@ -466,19 +455,15 @@ skip_vnet:
 				m_freem(m);
 				m = m_save;
 			}
-			if (no_vn == 0) {
-				CURVNET_RESTORE();
-			}
+			CURVNET_RESTORE();
 			INP_UNLOCK_ASSERT(inp);
-			return(retval);
+			return (retval);
 		}
 skipped_pkt:
 		m = m_save;
 	}
-	if (no_vn == 0) {
-		CURVNET_RESTORE();
-	}
-	return(retval);
+	CURVNET_RESTORE();
+	return (0);
 }
 
 int
@@ -532,28 +517,19 @@ ctf_do_dropwithreset(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th,
 }
 
 void
-ctf_ack_war_checks(struct tcpcb *tp, uint32_t *ts, uint32_t *cnt)
+ctf_ack_war_checks(struct tcpcb *tp)
 {
-	if ((ts != NULL) && (cnt != NULL) &&
-	    (tcp_ack_war_time_window > 0) &&
-	    (tcp_ack_war_cnt > 0)) {
-		/* We are possibly doing ack war prevention */
-		uint32_t cts;
-
-		/*
-		 * We use a msec tick here which gives us
-		 * roughly 49 days. We don't need the
-		 * precision of a microsecond timestamp which
-		 * would only give us hours.
-		 */
-		cts = tcp_ts_getticks();
-		if (TSTMP_LT((*ts), cts)) {
-			/* Timestamp is in the past */
-			*cnt = 0;
-			*ts = (cts + tcp_ack_war_time_window);
+	sbintime_t now;
+
+	if ((V_tcp_ack_war_time_window > 0) && (V_tcp_ack_war_cnt > 0)) {
+		now = getsbinuptime();
+		if (tp->t_challenge_ack_end < now) {
+			tp->t_challenge_ack_cnt = 0;
+			tp->t_challenge_ack_end = now +
+			    V_tcp_ack_war_time_window * SBT_1MS;
 		}
-		if (*cnt < tcp_ack_war_cnt) {
-			*cnt = (*cnt + 1);
+		if (tp->t_challenge_ack_cnt < V_tcp_ack_war_cnt) {
+			tp->t_challenge_ack_cnt++;
 			tp->t_flags |= TF_ACKNOW;
 		} else
 			tp->t_flags &= ~TF_ACKNOW;
@@ -568,10 +544,9 @@ ctf_ack_war_checks(struct tcpcb *tp, uint32_t *ts, uint32_t *cnt)
  * TCB is still valid and locked.
  */
 int
-_ctf_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th,
-		 struct tcpcb *tp, int32_t *tlenp,
-		 int32_t *thf, int32_t *drop_hdrlen, int32_t *ret_val,
-		 uint32_t *ts, uint32_t *cnt)
+ctf_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th,
+		struct tcpcb *tp, int32_t *tlenp,
+		int32_t *thf, int32_t *drop_hdrlen, int32_t *ret_val)
 {
 	int32_t todrop;
 	int32_t thflags;
@@ -605,7 +580,7 @@ _ctf_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th,
 			 * Send an ACK to resynchronize and drop any data.
 			 * But keep on processing for RST or ACK.
 			 */
-			ctf_ack_war_checks(tp, ts, cnt);
+			ctf_ack_war_checks(tp);
 			todrop = tlen;
 			KMOD_TCPSTAT_INC(tcps_rcvduppack);
 			KMOD_TCPSTAT_ADD(tcps_rcvdupbyte, todrop);
@@ -621,7 +596,7 @@ _ctf_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th,
 			 * ACK now, as the next in-sequence segment
 			 * will clear the DSACK block again
 			 */
-			ctf_ack_war_checks(tp, ts, cnt);
+			ctf_ack_war_checks(tp);
 			if (tp->t_flags & TF_ACKNOW)
 				tcp_update_sack_list(tp, th->th_seq,
 						     th->th_seq + todrop);
@@ -653,10 +628,10 @@ _ctf_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th,
 			 * ack.
 			 */
 			if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
-				ctf_ack_war_checks(tp, ts, cnt);
+				ctf_ack_war_checks(tp);
 				KMOD_TCPSTAT_INC(tcps_rcvwinprobe);
 			} else {
-				__ctf_do_dropafterack(m, tp, th, thflags, tlen, ret_val, ts, cnt);
+				ctf_do_dropafterack(m, tp, th, thflags, tlen, ret_val);
 				return (1);
 			}
 		} else
@@ -677,7 +652,7 @@ _ctf_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th,
  * and valid.
  */
 void
-__ctf_do_dropafterack(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t thflags, int32_t tlen, int32_t *ret_val, uint32_t *ts, uint32_t *cnt)
+ctf_do_dropafterack(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t thflags, int32_t tlen, int32_t *ret_val)
 {
 	/*
 	 * Generate an ACK dropping incoming segment if it occupies sequence
@@ -697,11 +672,11 @@ __ctf_do_dropafterack(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32
 	    (SEQ_GT(tp->snd_una, th->th_ack) ||
 	    SEQ_GT(th->th_ack, tp->snd_max))) {
 		*ret_val = 1;
-		ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen);
+		ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen);
 		return;
 	} else
 		*ret_val = 0;
-	ctf_ack_war_checks(tp, ts, cnt);
+	ctf_ack_war_checks(tp);
 	if (m)
 		m_freem(m);
 }
@@ -720,8 +695,8 @@ ctf_do_drop(struct mbuf *m, struct tcpcb *tp)
 }
 
 int
-__ctf_process_rst(struct mbuf *m, struct tcphdr *th, struct socket *so,
-		struct tcpcb *tp, uint32_t *ts, uint32_t *cnt)
+ctf_process_rst(struct mbuf *m, struct tcphdr *th, struct socket *so,
+		struct tcpcb *tp)
 {
 	/*
 	 * RFC5961 Section 3.2
@@ -768,40 +743,8 @@ __ctf_process_rst(struct mbuf *m, struct tcphdr *th, struct socket *so,
 			dropped = 1;
 			ctf_do_drop(m, tp);
 		} else {
-			int send_challenge;
-
 			KMOD_TCPSTAT_INC(tcps_badrst);
-			if ((ts != NULL) && (cnt != NULL) &&
-			    (tcp_ack_war_time_window > 0) &&
-			    (tcp_ack_war_cnt > 0)) {
-				/* We are possibly preventing an  ack-rst  war prevention */
-				uint32_t cts;
-
-				/*
-				 * We use a msec tick here which gives us
-				 * roughly 49 days. We don't need the
-				 * precision of a microsecond timestamp which
-				 * would only give us hours.
-				 */
-				cts = tcp_ts_getticks();
-				if (TSTMP_LT((*ts), cts)) {
-					/* Timestamp is in the past */
-					*cnt = 0;
-					*ts = (cts + tcp_ack_war_time_window);
-				}
-				if (*cnt < tcp_ack_war_cnt) {
-					*cnt = (*cnt + 1);
-					send_challenge = 1;
-				} else
-					send_challenge = 0;
-			} else
-				send_challenge = 1;
-			if (send_challenge) {
-				/* Send challenge ACK. */
-				tcp_respond(tp, mtod(m, void *), th, m,
-					    tp->rcv_nxt, tp->snd_nxt, TH_ACK);
-				tp->last_ack_sent = tp->rcv_nxt;
-			}
+			tcp_send_challenge_ack(tp, th, m);
 		}
 	} else {
 		m_freem(m);
diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.h b/sys/netinet/tcp_stacks/rack_bbr_common.h
index 9e5fbe675a3a..6a8a056d89b0 100644
--- a/sys/netinet/tcp_stacks/rack_bbr_common.h
+++ b/sys/netinet/tcp_stacks/rack_bbr_common.h
@@ -89,19 +89,15 @@ int ctf_do_queued_segments(struct tcpcb *tp, int have_pkt);
 uint32_t ctf_outstanding(struct tcpcb *tp);
 uint32_t ctf_flight_size(struct tcpcb *tp, uint32_t rc_sacked);
 int
-_ctf_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th,
+ctf_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th,
     struct tcpcb *tp, int32_t *tlenp,
-    int32_t *thf, int32_t *drop_hdrlen, int32_t *ret_val,
-    uint32_t *ts, uint32_t *cnt);
-void ctf_ack_war_checks(struct tcpcb *tp, uint32_t *ts, uint32_t *cnt);
-#define ctf_drop_checks(a, b, c, d, e, f, g, h) _ctf_drop_checks(a, b, c, d, e, f, g, h, NULL, NULL)
+    int32_t *thf, int32_t *drop_hdrlen, int32_t *ret_val);
+void ctf_ack_war_checks(struct tcpcb *tp);
 
 void
-__ctf_do_dropafterack(struct mbuf *m, struct tcpcb *tp,
+ctf_do_dropafterack(struct mbuf *m, struct tcpcb *tp,
       struct tcphdr *th, int32_t thflags, int32_t tlen,
-      int32_t *ret_val, uint32_t *ts, uint32_t *cnt);
-
-#define ctf_do_dropafterack(a, b, c, d, e, f) __ctf_do_dropafterack(a, b, c, d, e, f, NULL, NULL)
+      int32_t *ret_val);
 
 void
 ctf_do_dropwithreset(struct mbuf *m, struct tcpcb *tp,
@@ -110,9 +106,8 @@ void
 ctf_do_drop(struct mbuf *m, struct tcpcb *tp);
 
 int
-__ctf_process_rst(struct mbuf *m, struct tcphdr *th,
-      struct socket *so, struct tcpcb *tp, uint32_t *ts, uint32_t *cnt);
-#define ctf_process_rst(m, t, s, p) __ctf_process_rst(m, t, s, p, NULL, NULL)
+ctf_process_rst(struct mbuf *m, struct tcphdr *th,
+      struct socket *so, struct tcpcb *tp);
 
 void
 ctf_challenge_ack(struct mbuf *m, struct tcphdr *th,
diff --git a/sys/netinet/tcp_stacks/rack_pcm.c b/sys/netinet/tcp_stacks/rack_pcm.c
index 09e90da88895..b0e300847c4a 100644
--- a/sys/netinet/tcp_stacks/rack_pcm.c
+++ b/sys/netinet/tcp_stacks/rack_pcm.c
@@ -241,7 +241,7 @@ skip_ack_accounting:
 		for (i=0; i<rack->r_ctl.pcm_i.cnt; i++) {
 
 			e = &rack->r_ctl.pcm_s[i];
-			memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+			memset(&log, 0, sizeof(log));
 			log.u_bbr.timeStamp = tcp_tv_to_usectick(&tv);
 			log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
 			log.u_bbr.flex8 = 1;
diff --git a/sys/netinet/tcp_stacks/sack_filter.c b/sys/netinet/tcp_stacks/sack_filter.c
index e82fcee2ffac..fc9ee8454a1e 100644
--- a/sys/netinet/tcp_stacks/sack_filter.c
+++ b/sys/netinet/tcp_stacks/sack_filter.c
@@ -35,7 +35,13 @@
 #include <sys/sockopt.h>
 #endif
 #include <netinet/in.h>
+#ifdef _KERNEL
 #include <netinet/in_pcb.h>
+#else
+struct inpcb {
+	uint32_t stuff;
+};
+#endif
 #include <netinet/tcp.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_seq.h>
@@ -86,9 +92,9 @@ uint64_t cnt_used_oldsack = 0;
 int highest_used=0;
 int over_written=0;
 int empty_avail=0;
-int no_collapse = 0;
 FILE *out = NULL;
 FILE *in = NULL;
+
 #endif
 
 #define sack_blk_used(sf, i) ((1 << i) & sf->sf_bits)
@@ -96,6 +102,13 @@ FILE *in = NULL;
 #define sack_blk_clr(sf, i) (~(1 << i) & sf->sf_bits)
 
 #ifndef _KERNEL
+
+static u_int tcp_fixed_maxseg(const struct tcpcb *tp)
+{
+	/* Lets pretend their are timestamps on for user space */
+	return (tp->t_maxseg - 12);
+}
+
 static
 #endif
 void
@@ -118,7 +131,7 @@ sack_filter_prune(struct sack_filter *sf, tcp_seq th_ack)
 	/* start with the oldest */
 	for (i = 0; i < SACK_FILTER_BLOCKS; i++) {
 		if (sack_blk_used(sf, i)) {
-			if (SEQ_GT(th_ack, sf->sf_blks[i].end)) {
+			if (SEQ_GEQ(th_ack, sf->sf_blks[i].end)) {
 				/* This block is consumed */
 				sf->sf_bits = sack_blk_clr(sf, i);
 				sf->sf_used--;
@@ -143,23 +156,27 @@ sack_filter_prune(struct sack_filter *sf, tcp_seq th_ack)
  * if part of it is on the board.
  */
 static int32_t
-is_sack_on_board(struct sack_filter *sf, struct sackblk *b)
+is_sack_on_board(struct sack_filter *sf, struct sackblk *b, int32_t segmax, uint32_t snd_max)
 {
 	int32_t i, cnt;
+	int span_cnt = 0;
+	uint32_t span_start, span_end;
 
+	if (SEQ_LT(b->start, sf->sf_ack)) {
+		/* Behind cum-ack update */
+		b->start = sf->sf_ack;
+	}
+	if (SEQ_LT(b->end, sf->sf_ack)) {
+		/* End back behind too */
+		b->end = sf->sf_ack;
+	}
+	if (b->start == b->end) {
+		return(1);
+	}
+	span_start = b->start;
+	span_end = b->end;
 	for (i = sf->sf_cur, cnt=0; cnt < SACK_FILTER_BLOCKS; cnt++) {
 		if (sack_blk_used(sf, i)) {
-			if (SEQ_LT(b->start, sf->sf_ack)) {
-				/* Behind cum-ack update */
-				b->start = sf->sf_ack;
-			}
-			if (SEQ_LT(b->end, sf->sf_ack)) {
-				/* End back behind too */
-				b->end = sf->sf_ack;
-			}
-			if (b->start == b->end) {
-				return(1);
-			}
 			/* Jonathans Rule 1 */
 			if (SEQ_LEQ(sf->sf_blks[i].start, b->start) &&
 			    SEQ_GEQ(sf->sf_blks[i].end, b->end)) {
@@ -184,6 +201,15 @@ is_sack_on_board(struct sack_filter *sf, struct sackblk *b)
 				 * board   |---|
 				 * sack           |---|
 				 */
+				if ((b->end != snd_max) &&
+				    (span_cnt < 2) &&
+				    ((b->end - b->start) < segmax)) {
+					/*
+					 * Too small for us to mess with so we
+					 * pretend its on the board.
+					 */
+					return (1);
+				}
 				goto nxt_blk;
 			}
 			/* Jonathans Rule 3 */
@@ -194,6 +220,16 @@ is_sack_on_board(struct sack_filter *sf, struct sackblk *b)
 				 * board         |---|
 				 * sack  |---|
 				 */
+				if ((b->end != snd_max) &&
+				    (sf->sf_blks[i].end != snd_max) &&
+				    (span_cnt < 2) &&
+				    ((b->end - b->start) < segmax)) {
+					/*
+					 * Too small for us to mess with so we
+					 * pretend its on the board.
+					 */
+					return (1);
+				}
 				goto nxt_blk;
 			}
 			if (SEQ_LEQ(sf->sf_blks[i].start, b->start)) {
@@ -207,12 +243,36 @@ is_sack_on_board(struct sack_filter *sf, struct sackblk *b)
 				 *  sack    |--------------|
 				 *
 				 * up with this one (we have part of it).
+				 *
 				 * 1) Update the board block to the new end
 				 *      and
 				 * 2) Update the start of this block to my end.
+				 *
+				 * We only do this if the new piece is large enough.
 				 */
+				if (((b->end != snd_max) || (sf->sf_blks[i].end == snd_max)) &&
+				    (span_cnt == 0) &&
+				    ((b->end - sf->sf_blks[i].end) < segmax)) {
+					/*
+					 * Too small for us to mess with so we
+					 * pretend its on the board.
+					 */
+					return (1);
+				}
 				b->start = sf->sf_blks[i].end;
 				sf->sf_blks[i].end = b->end;
+				if (span_cnt == 0) {
+					span_start = sf->sf_blks[i].start;
+					span_end = sf->sf_blks[i].end;
+				} else {
+					if (SEQ_LT(span_start, sf->sf_blks[i].start)) {
+						span_start = sf->sf_blks[i].start;
+					}
+					if (SEQ_GT(span_end, sf->sf_blks[i].end)) {
+						span_end = sf->sf_blks[i].end;
+					}
+				}
+				span_cnt++;
 				goto nxt_blk;
 			}
 			if (SEQ_GEQ(sf->sf_blks[i].end, b->end)) {
@@ -224,12 +284,36 @@ is_sack_on_board(struct sack_filter *sf, struct sackblk *b)
 				 *     <or>
 				 *  board       |----|
 				 *  sack  |----------|
+				 *
 				 * 1) Update the board block to the new start
 				 *      and
 				 * 2) Update the start of this block to my end.
+				 *
+				 * We only do this if the new piece is large enough.
 				 */
+				if (((b->end != snd_max) || (sf->sf_blks[i].end == snd_max)) &&
+				    (span_cnt == 0) &&
+				    ((sf->sf_blks[i].start - b->start) < segmax)) {
+					/*
+					 * Too small for us to mess with so we
+					 * pretend its on the board.
+					 */
+					return (1);
+				}
 				b->end = sf->sf_blks[i].start;
 				sf->sf_blks[i].start = b->start;
+				if (span_cnt == 0) {
+					span_start = sf->sf_blks[i].start;
+					span_end = sf->sf_blks[i].end;
+				} else {
+					if (SEQ_LT(span_start, sf->sf_blks[i].start)) {
+						span_start = sf->sf_blks[i].start;
+					}
+					if (SEQ_GT(span_end, sf->sf_blks[i].end)) {
+						span_end = sf->sf_blks[i].end;
+					}
+				}
+				span_cnt++;
 				goto nxt_blk;
 			}
 		}
@@ -238,46 +322,23 @@ is_sack_on_board(struct sack_filter *sf, struct sackblk *b)
 		i %= SACK_FILTER_BLOCKS;
 	}
 	/* Did we totally consume it in pieces? */
-	if (b->start != b->end)
-		return(0);
-	else
-		return(1);
-}
-
-static int32_t
-sack_filter_old(struct sack_filter *sf, struct sackblk *in, int  numblks)
-{
-	int32_t num, i;
-	struct sackblk blkboard[TCP_MAX_SACK];
-	/*
-	 * An old sack has arrived. It may contain data
-	 * we do not have. We might not have it since
-	 * we could have had a lost ack <or> we might have the
-	 * entire thing on our current board. We want to prune
-	 * off anything we have. With this function though we
-	 * won't add to the board.
-	 */
-	for( i = 0, num = 0; i<numblks; i++ ) {
-		if (is_sack_on_board(sf, &in[i])) {
-#ifndef _KERNEL
-			cnt_skipped_oldsack++;
-#endif
-			continue;
+	if (b->start != b->end) {
+		if ((b->end != snd_max) &&
+		    ((b->end - b->start) < segmax) &&
+		    ((span_end - span_start) < segmax)) {
+			/*
+			 * Too small for us to mess with so we
+			 * pretend its on the board.
+			 */
+			return (1);
 		}
-		/* Did not find it (or found only
-		 * a piece of it). Copy it to
-		 * our outgoing board.
+		return(0);
+	} else {
+		/*
+		 * It was all consumed by the board.
 		 */
-		memcpy(&blkboard[num], &in[i], sizeof(struct sackblk));
-#ifndef _KERNEL
-		cnt_used_oldsack++;
-#endif
-		num++;
-	}
-	if (num) {
-		memcpy(in, blkboard, (num * sizeof(struct sackblk)));
+		return(1);
 	}
-	return (num);
 }
 
 /*
@@ -303,54 +364,53 @@ sack_move_to_empty(struct sack_filter *sf, uint32_t idx)
 }
 
 static int32_t
-sack_filter_new(struct sack_filter *sf, struct sackblk *in, int numblks, tcp_seq th_ack)
+sack_filter_run(struct sack_filter *sf, struct sackblk *in, int numblks, tcp_seq th_ack, int32_t segmax, uint32_t snd_max)
 {
 	struct sackblk blkboard[TCP_MAX_SACK];
-	int32_t num, i;
+	int32_t num, i, room, at;
 	/*
 	 * First lets trim the old and possibly
 	 * throw any away we have.
 	 */
 	for(i=0, num=0; i<numblks; i++) {
-		if (is_sack_on_board(sf, &in[i]))
+		if (is_sack_on_board(sf, &in[i], segmax, snd_max))
 			continue;
 		memcpy(&blkboard[num], &in[i], sizeof(struct sackblk));
 		num++;
 	}
-	if (num == 0)
+	if (num == 0) {
 		return(num);
+	}
 
-	/* Now what we are left with is either
-	 * completely merged on to the board
-	 * from the above steps, or is new
-	 * and need to be added to the board
-	 * with the last one updated to current.
-	 *
-	 * First copy it out, we want to return that
-	 * to our caller for processing.
+	/*
+	 * Calculate the space we have in the filter table.
 	 */
-	memcpy(in, blkboard, (num * sizeof(struct sackblk)));
-	numblks = num;
-	/* Now go through and add to our board as needed */
-	for(i=(num-1); i>=0; i--) {
-		if (is_sack_on_board(sf, &blkboard[i])) {
-			continue;
+	room = SACK_FILTER_BLOCKS - sf->sf_used;
+	if (room < 1)
+		return (0);
+	/*
+	 * Now lets walk through our filtered blkboard (the previous loop
+	 * trimmed off anything on the board we already have so anything
+	 * in blkboard is unique and not seen before) if there is room we copy
+	 * it back out and place a new entry on our board.
+	 */
+	for(i=0, at=0; i<num; i++) {
+		if (room == 0) {
+			/* Can't copy out any more, no more room */
+			break;
 		}
-		/* Add this guy its not listed */
+		/* Copy it out to the outbound */
+		memcpy(&in[at], &blkboard[i], sizeof(struct sackblk));		
+		at++;
+		room--;
+		/* now lets add it to our sack-board */
 		sf->sf_cur++;
 		sf->sf_cur %= SACK_FILTER_BLOCKS;
 		if ((sack_blk_used(sf, sf->sf_cur)) &&
 		    (sf->sf_used < SACK_FILTER_BLOCKS)) {
 			sack_move_to_empty(sf, sf->sf_cur);
 		}
-#ifndef _KERNEL
-		if (sack_blk_used(sf, sf->sf_cur)) {
-			over_written++;
-			if (sf->sf_used < SACK_FILTER_BLOCKS)
-				empty_avail++;
-		}
-#endif
-		memcpy(&sf->sf_blks[sf->sf_cur], &in[i], sizeof(struct sackblk));
+		memcpy(&sf->sf_blks[sf->sf_cur], &blkboard[i], sizeof(struct sackblk));
 		if (sack_blk_used(sf, sf->sf_cur) == 0) {
 			sf->sf_used++;
 #ifndef _KERNEL
@@ -360,7 +420,26 @@ sack_filter_new(struct sack_filter *sf, struct sackblk *in, int numblks, tcp_seq
 			sf->sf_bits = sack_blk_set(sf, sf->sf_cur);
 		}
 	}
-	return(numblks);
+	return(at);
+}
+
+/*
+ * Collapse entry src into entry into
+ * and free up the src entry afterwards.
+ */
+static void
+sack_collapse(struct sack_filter *sf, int32_t src, int32_t into)
+{
+	if (SEQ_LT(sf->sf_blks[src].start, sf->sf_blks[into].start)) {
+		/* src has a lower starting point */
+		sf->sf_blks[into].start = sf->sf_blks[src].start;
+	}
+	if (SEQ_GT(sf->sf_blks[src].end, sf->sf_blks[into].end)) {
+		/* src has a higher ending point */
+		sf->sf_blks[into].end = sf->sf_blks[src].end;
+	}
+	sf->sf_bits = sack_blk_clr(sf, src);
+	sf->sf_used--;
 }
 
 /*
@@ -415,25 +494,6 @@ sack_blocks_overlap_or_meet(struct sack_filter *sf, struct sackblk *sb, uint32_t
 	return (-1);
 }
 
-/*
- * Collapse entry src into entry into
- * and free up the src entry afterwards.
- */
-static void
-sack_collapse(struct sack_filter *sf, int32_t src, int32_t into)
-{
-	if (SEQ_LT(sf->sf_blks[src].start, sf->sf_blks[into].start)) {
-		/* src has a lower starting point */
-		sf->sf_blks[into].start = sf->sf_blks[src].start;
-	}
-	if (SEQ_GT(sf->sf_blks[src].end, sf->sf_blks[into].end)) {
-		/* src has a higher ending point */
-		sf->sf_blks[into].end = sf->sf_blks[src].end;
-	}
-	sf->sf_bits = sack_blk_clr(sf, src);
-	sf->sf_used--;
-}
-
 static void
 sack_board_collapse(struct sack_filter *sf)
 {
@@ -485,9 +545,12 @@ sack_filter_dump(FILE *out, struct sack_filter *sf)
 
 	for(i=0; i<SACK_FILTER_BLOCKS; i++) {
 		if (sack_blk_used(sf, i)) {
-			fprintf(out, "Entry:%d start:%u end:%u\n", i,
-			       sf->sf_blks[i].start,
-			       sf->sf_blks[i].end);
+			fprintf(out, "Entry:%d start:%u end:%u the block is %s\n",
+				i,
+				sf->sf_blks[i].start,
+				sf->sf_blks[i].end,
+				(sack_blk_used(sf, i) ? "USED" : "NOT-USED")
+				);
 		}
 	}
 }
@@ -497,10 +560,11 @@ sack_filter_dump(FILE *out, struct sack_filter *sf)
 static
 #endif
 int
-sack_filter_blks(struct sack_filter *sf, struct sackblk *in, int numblks,
+sack_filter_blks(struct tcpcb *tp, struct sack_filter *sf, struct sackblk *in, int numblks,
 		 tcp_seq th_ack)
 {
 	int32_t i, ret;
+	int32_t segmax;
 
 	if (numblks > TCP_MAX_SACK) {
 #ifdef _KERNEL
@@ -510,14 +574,10 @@ sack_filter_blks(struct sack_filter *sf, struct sackblk *in, int numblks,
 #endif
 		return(numblks);
 	}
-#ifndef _KERNEL
-	if ((sf->sf_used > 1) && (no_collapse == 0))
-		sack_board_collapse(sf);
-
-#else
 	if (sf->sf_used > 1)
 		sack_board_collapse(sf);
-#endif
+
+	segmax = tcp_fixed_maxseg(tp);
 	if ((sf->sf_used == 0) && numblks) {
 		/*
 		 * We are brand new add the blocks in
@@ -527,7 +587,15 @@ sack_filter_blks(struct sack_filter *sf, struct sackblk *in, int numblks,
 		int cnt_added = 0;
 
 		sf->sf_ack = th_ack;
-		for(i=(numblks-1), sf->sf_cur=0; i >= 0; i--) {
+		for(i=0, sf->sf_cur=0; i<numblks; i++) {
+			if ((in[i].end != tp->snd_max) && 
+			    ((in[i].end - in[i].start) < segmax)) {
+				/*
+				 * We do not accept blocks less than a MSS minus all
+				 * possible options space that is not at max_seg.
+				 */
+				continue;
+			}
 			memcpy(&sf->sf_blks[sf->sf_cur], &in[i], sizeof(struct sackblk));
 			sf->sf_bits = sack_blk_set(sf, sf->sf_cur);
 			sf->sf_cur++;
@@ -548,11 +616,9 @@ sack_filter_blks(struct sack_filter *sf, struct sackblk *in, int numblks,
 		sack_filter_prune(sf, th_ack);
 	}
 	if (numblks) {
-		if (SEQ_GEQ(th_ack, sf->sf_ack)) {
-			ret = sack_filter_new(sf, in, numblks, th_ack);
-		} else {
-			ret = sack_filter_old(sf, in, numblks);
-		}
+		ret = sack_filter_run(sf, in, numblks, th_ack, segmax, tp->snd_max);
+		if (sf->sf_used > 1)
+			sack_board_collapse(sf);
 	} else
 		ret = 0;
 	return (ret);
@@ -625,7 +691,8 @@ main(int argc, char **argv)
 	char buffer[512];
 	struct sackblk blks[TCP_MAX_SACK];
 	FILE *err;
-	tcp_seq th_ack, snd_una, snd_max = 0;
+	tcp_seq th_ack;
+	struct tcpcb tp;
 	struct sack_filter sf;
 	int32_t numblks,i;
 	int snd_una_set=0;
@@ -638,10 +705,13 @@ main(int argc, char **argv)
 
 	in = stdin;
 	out = stdout;
-	while ((i = getopt(argc, argv, "ndIi:o:?h")) != -1) {
+	memset(&tp, 0, sizeof(tp));
+	tp.t_maxseg = 1460;
+	
+	while ((i = getopt(argc, argv, "dIi:o:?hS:")) != -1) {
 		switch (i) {
-		case 'n':
-			no_collapse = 1;
+		case 'S':
+			tp.t_maxseg = strtol(optarg, NULL, 0);
 			break;
 		case 'd':
 			detailed_dump = 1;
@@ -666,7 +736,7 @@ main(int argc, char **argv)
 		default:
 		case '?':
 		case 'h':
-			fprintf(stderr, "Use %s [ -i infile -o outfile -I]\n", argv[0]);
+			fprintf(stderr, "Use %s [ -i infile -o outfile -I -S maxseg -n -d ]\n", argv[0]);
 			return(0);
 			break;
 		};
@@ -679,28 +749,28 @@ main(int argc, char **argv)
 	while (fgets(buffer, sizeof(buffer), in) != NULL) {
 		sprintf(line_buf[line_buf_at], "%s", buffer);
 		line_buf_at++;
-		if (strncmp(buffer, "QUIT", 4) == 0) {
+		if (strncmp(buffer, "quit", 4) == 0) {
 			break;
-		} else if (strncmp(buffer, "DUMP", 4) == 0) {
+		} else if (strncmp(buffer, "dump", 4) == 0) {
 			sack_filter_dump(out, &sf);
-		} else if (strncmp(buffer, "MAX:", 4) == 0) {
-			snd_max = strtoul(&buffer[4], NULL, 0);
-		} else if (strncmp(buffer, "COMMIT", 6) == 0) {
+		} else if (strncmp(buffer, "max:", 4) == 0) {
+			tp.snd_max = strtoul(&buffer[4], NULL, 0);
+		} else if (strncmp(buffer, "commit", 6) == 0) {
 			int nn, ii;
 			if (numblks) {
 				uint32_t szof, tot_chg;
+				printf("Dumping line buffer (lines:%d)\n", line_buf_at);
 				for(ii=0; ii<line_buf_at; ii++) {
 					fprintf(out, "%s", line_buf[ii]);
 				}
-				fprintf(out, "------------------------------------\n");
-				nn = sack_filter_blks(&sf, blks, numblks, th_ack);
+				fprintf(out, "------------------------------------ call sfb() nb:%d\n", numblks);
+				nn = sack_filter_blks(&tp, &sf, blks, numblks, th_ack);
 				saved += numblks - nn;
 				tot_sack_blks += numblks;
-				fprintf(out, "ACK:%u\n", sf.sf_ack);
 				for(ii=0, tot_chg=0; ii<nn; ii++) {
 					szof = blks[ii].end - blks[ii].start;
 					tot_chg += szof;
-					fprintf(out, "SACK:%u:%u [%u]\n",
+					fprintf(out, "sack:%u:%u [%u]\n",
 					       blks[ii].start,
 						blks[ii].end, szof);
 				}
@@ -715,7 +785,7 @@ main(int argc, char **argv)
 			memset(line_buf, 0, sizeof(line_buf));
 			line_buf_at=0;
 			numblks = 0;
-		} else if (strncmp(buffer, "CHG:", 4) == 0) {
+		} else if (strncmp(buffer, "chg:", 4) == 0) {
 			sack_chg = strtoul(&buffer[4], NULL, 0);
 			if ((sack_chg != chg_remembered) &&
 			    (sack_chg > chg_remembered)){
@@ -724,20 +794,21 @@ main(int argc, char **argv)
 					);
 			}
 			sack_chg = chg_remembered = 0;
-		} else if (strncmp(buffer, "RXT", 3) == 0) {
-			sack_filter_clear(&sf, snd_una);
-		} else if (strncmp(buffer, "ACK:", 4) == 0) {
+		} else if (strncmp(buffer, "rxt", 3) == 0) {
+			sack_filter_clear(&sf, tp.snd_una);
+		} else if (strncmp(buffer, "ack:", 4) == 0) {
 			th_ack = strtoul(&buffer[4], NULL, 0);
 			if (snd_una_set == 0) {
-				snd_una = th_ack;
+				tp.snd_una = th_ack;
 				snd_una_set = 1;
-			} else if (SEQ_GT(th_ack, snd_una)) {
-				snd_una = th_ack;
+			} else if (SEQ_GT(th_ack, tp.snd_una)) {
+				tp.snd_una = th_ack;
 			}
-		} else if (strncmp(buffer, "EXIT", 4) == 0) {
-			sack_filter_clear(&sf, snd_una);
+			sack_filter_blks(&tp, &sf, NULL, 0, th_ack);
+		} else if (strncmp(buffer, "exit", 4) == 0) {
+			sack_filter_clear(&sf, tp.snd_una);
 			sack_chg = chg_remembered = 0;
-		} else if (strncmp(buffer, "SACK:", 5) == 0) {
+		} else if (strncmp(buffer, "sack:", 5) == 0) {
 			char *end=NULL;
 			uint32_t start;
 			uint32_t endv;
@@ -749,8 +820,8 @@ main(int argc, char **argv)
 				fprintf(out, "--Sack invalid skip 0 start:%u : ??\n", start);
 				continue;
 			}
-			if (SEQ_GT(endv, snd_max))
-				snd_max = endv;
+			if (SEQ_GT(endv, tp.snd_max))
+				tp.snd_max = endv;
 			if (SEQ_LT(endv, start)) {
 				fprintf(out, "--Sack invalid skip 1 endv:%u < start:%u\n", endv, start);
 				continue;
@@ -762,7 +833,7 @@ main(int argc, char **argv)
 			blks[numblks].start = start;
 			blks[numblks].end = endv;
 			numblks++;
-		} else if (strncmp(buffer, "REJ:n:n", 4) == 0) {
+		} else if (strncmp(buffer, "rej:n:n", 4) == 0) {
 			struct sackblk in;
 			char *end=NULL;
 
@@ -772,18 +843,63 @@ main(int argc, char **argv)
 				sack_filter_reject(&sf, &in);
 			} else
 				fprintf(out, "Invalid input END:A:B\n");
-		} else if (strncmp(buffer, "HELP", 4) == 0) {
+		} else if (strncmp(buffer, "save", 4) == 0) {
+			FILE *io;
+
+			io = fopen("sack_setup.bin", "w+");
+			if (io != NULL) {
+				if (fwrite(&sf, sizeof(sf), 1, io) != 1) {
+					printf("Failed to write out sf data\n");
+					unlink("sack_setup.bin");
+					goto outwrite;
+				}
+				if (fwrite(&tp, sizeof(tp), 1, io) != 1) {
+					printf("Failed to write out tp data\n");
+					unlink("sack_setup.bin");
+				} else
+					printf("Save completed\n");
+			outwrite:
+				fclose(io);
+			} else {
+				printf("failed to open sack_setup.bin for writting .. sorry\n");
+			}
+		} else if (strncmp(buffer, "restore", 7) == 0) {
+			FILE *io;
+
+			io = fopen("sack_setup.bin", "r");
+			if (io != NULL) {
+				if (fread(&sf, sizeof(sf), 1, io) != 1) {
+					printf("Failed to read out sf data\n");
+					goto outread;
+				}
+				if (fread(&tp, sizeof(tp), 1, io) != 1) {
+					printf("Failed to read out tp data\n");
+				} else {
+					printf("Restore completed\n");
+					sack_filter_dump(out, &sf);
+				}
+			outread:
+				fclose(io);
+			} else {
+				printf("can't open sack_setup.bin -- sorry no load\n");
+			}
+			
+		} else if (strncmp(buffer, "help", 4) == 0) {
+help:
 			fprintf(out, "You can input:\n");
-			fprintf(out, "SACK:S:E -- to define a sack block\n");
-			fprintf(out, "RXT -- to clear the filter without changing the remembered\n");
-			fprintf(out, "EXIT -- To clear the sack filter and start all fresh\n");
-			fprintf(out, "ACK:N -- To advance the cum-ack to N\n");
-			fprintf(out, "MAX:N -- To set send-max to N\n");
-			fprintf(out, "COMMIT -- To apply the sack you built to the filter and dump the filter\n");
-			fprintf(out, "DUMP -- To display the current contents of the sack filter\n");
-			fprintf(out, "QUIT -- To exit this program\n");
+			fprintf(out, "sack:S:E -- to define a sack block\n");
+			fprintf(out, "rxt -- to clear the filter without changing the remembered\n");
+			fprintf(out, "save -- save current state to sack_setup.bin\n");
+			fprintf(out, "restore -- restore state from sack_setup.bin\n");
+			fprintf(out, "exit -- To clear the sack filter and start all fresh\n");
+			fprintf(out, "ack:N -- To advance the cum-ack to N\n");
+			fprintf(out, "max:N -- To set send-max to N\n");
+			fprintf(out, "commit -- To apply the sack you built to the filter and dump the filter\n");
+			fprintf(out, "dump -- To display the current contents of the sack filter\n");
+			fprintf(out, "quit -- To exit this program\n");
 		} else {
 			fprintf(out, "Command %s unknown\n", buffer);
+			goto help;
 		}
 		memset(buffer, 0, sizeof(buffer));
 	}
diff --git a/sys/netinet/tcp_stacks/sack_filter.h b/sys/netinet/tcp_stacks/sack_filter.h
index fe34b1e3ca9b..b12fcf84567c 100644
--- a/sys/netinet/tcp_stacks/sack_filter.h
+++ b/sys/netinet/tcp_stacks/sack_filter.h
@@ -25,19 +25,84 @@
  * SUCH DAMAGE.
  */
 
-/*
- * Seven entry's is carefully choosen to
- * fit in one cache line. We can easily
- * change this to 15 (but it gets very
- * little extra filtering). To change it
- * to be larger than 15 would require either
- * sf_bits becoming a uint32_t and then you
- * could go to 31.. or change it to a full
- * bitstring.. It is really doubtful you
- * will get much benefit beyond 7, in testing
- * there was a small amount but very very small.
+/**
+ *
+ * The Sack filter is designed to do two functions, first it trys to reduce
+ * the processing of sacks. Consider that often times you get something like
+ *
+ * ack 1 (sack 100:200)
+ * ack 1 (sack 100:300)
+ * ack 1 (sack(100:400)
+ *
+ * You really want to process the 100:200 and then on the next sack process
+ * only 200:300 (the new data) and then finally on the third 300:400. The filter
+ * removes from your processing routines the already processed sack information so
+ * that after the filter completes you only have "new" sacks that you have not
+ * processed. This saves computation time so you do not need to worry about
+ * previously processed sack information.
+ *
+ * The second thing that the sack filter does is help protect against malicious
+ * attackers that are trying to attack any linked lists (or other data structures) 
+ * that are used in sack processing. Consider an attacker sending in sacks for
+ * every other byte of data outstanding. This could in theory drastically split
+ * up any scoreboard you are maintaining and make you search through a very large
+ * linked list (or other structure) eating up CPU. If you split far enough and
+ * fracture your data structure enough you could potentially be crippled by a malicious
+ * peer. How the filter works here is it filters out sacks that are less than an MSS.
+ * We do this because generally a packet (aka MSS) should be kept whole. The only place
+ * we allow a smaller SACK is when the SACK touches the end of our socket buffer. This allows
+ * TLP to still work properly and yet protects us from splitting. The filter also only allows
+ * a set number of splits (defined in SACK_FILTER_BLOCKS). If more than that many sacks locations
+ * are being sent we discard additional ones until the earlier holes are filled up. The maximum
+ * the current filter can be is 15, which we have moved to since we want to be as generous as
+ * possible with allowing for loss. However, in previous testing of the filter it was found
+ * that there was very little benefit from moving from 7 to 15 sack points. Though at
+ * that previous set of tests, we would just discard earlier information in the filter. Now
+ * that we do not do that i.e. discard information and instead drop sack data we have raised
+ * the value to the max i.e. 15. If you want to expand beyond 15 one would have to either increase
+ * the size of the sf_bits to a uint32_t which could then get you a maximum of 31 splits or
+ * move to a true bitstring. If this is done however it further increases your risk to
+ * sack attacks, the bigger the number of splits (filter blocks) that are allowed
+ * the larger your processing arrays will grow as well as the filter.
+ *
+ * Note that this protection does not prevent an attacker from asking for a 20 byte
+ * MSS, that protection must be done elsewhere during the negotiation of the connection
+ * and is done now by just ignoring sack's from connections with too small of MSS which
+ * prevents sack from working and thus makes the connection less efficient but protects
+ * the system from harm.
+ *
+ * We may actually want to consider dropping the size of the array back to 7 to further
+ * protect the system which would be a more cautious approach.
+ *
+ * TCP Developer information:
+ *
+ * To use the sack filter its actually pretty simple. All you do is the normal sorting
+ * and sanity checks of your sacks but then after that you call out to sack_filter_blks()
+ * passing in the tcpcb, the sack-filter you are using (memory you have allocated) the
+ * pointer to the sackblk array and how many sorted valid blocks there are as well
+ * as what the new th_ack point is. The filter will return to you the number of
+ * blocks left after filtering. It will reshape the blocks based on the previous
+ * sacks you have received and processed. If sack_filter_blks() returns 0 then no
+ * new sack data is present to be processed.
+ *
+ * Whenever you reach the point of snd_una == snd_max, you should call sack_filter_clear with
+ * the snd_una point. You also need to call this if you invalidate your sack array for any
+ * reason (such as RTO's or MTU changes or some other thing that makes you think all
+ * data is now un-acknowledged). You can also pass in sack_filter_blks(tp, sf, NULL, 0, th_ack) to
+ * advance the cum-ack point. You can use sack_filter_blks_used(sf) to determine if you have filter blocks as
+ * well. So putting these two together, anytime the cum-ack moves forward you probably want to
+ * do:
+ * if (sack_filter_blks_used(sf))
+ *    sack_filter_blks(tp, sf, NULL, 0, th_ack);
+ *
+ * If for some reason you have ran the sack-filter and something goes wrong (you can't allocate space
+ * for example to split your sack-array. You can "undo" the data within the sack filter by calling
+ * sack_filter_rject(sf, in) passing in the list of blocks to be "removed" from the sack-filter.
+ * You can see an example of this use in bbr.c though rack.c has never found it needed.
+ *
  */
-#define SACK_FILTER_BLOCKS 7
+
+#define SACK_FILTER_BLOCKS 15
 
 struct sack_filter {
 	tcp_seq sf_ack;
@@ -48,7 +113,7 @@ struct sack_filter {
 };
 #ifdef _KERNEL
 void sack_filter_clear(struct sack_filter *sf, tcp_seq seq);
-int sack_filter_blks(struct sack_filter *sf, struct sackblk *in, int numblks,
+int sack_filter_blks(struct tcpcb *tp, struct sack_filter *sf, struct sackblk *in, int numblks,
 		     tcp_seq th_ack);
 void sack_filter_reject(struct sack_filter *sf, struct sackblk *in);
 static inline uint8_t sack_filter_blks_used(struct sack_filter *sf)
diff --git a/sys/netinet/tcp_stacks/tcp_bbr.h b/sys/netinet/tcp_stacks/tcp_bbr.h
index f88efe3c9ef9..10ddd12bda75 100644
--- a/sys/netinet/tcp_stacks/tcp_bbr.h
+++ b/sys/netinet/tcp_stacks/tcp_bbr.h
@@ -347,8 +347,6 @@ struct bbr_log_sysctl_out {
 /*
  * Locking for the rack control block.
  * a) Locked by INP_WLOCK
- * b) Locked by the hpts-mutex
- *
  */
 #define BBR_STATE_STARTUP   0x01
 #define BBR_STATE_DRAIN     0x02
diff --git a/sys/netinet/tcp_stacks/tcp_rack.h b/sys/netinet/tcp_stacks/tcp_rack.h
index 708b437b32ed..144b4fabf7eb 100644
--- a/sys/netinet/tcp_stacks/tcp_rack.h
+++ b/sys/netinet/tcp_stacks/tcp_rack.h
@@ -199,7 +199,6 @@ struct rack_opts_stats {
 	uint64_t tcp_rack_min_pace_seg;
 	uint64_t tcp_rack_pace_rate_ca;
 	uint64_t tcp_rack_rr;
-	uint64_t tcp_rack_do_detection;
 	uint64_t tcp_rack_rrr_no_conf_rate;
 	uint64_t tcp_initial_rate;
 	uint64_t tcp_initial_win;
@@ -328,8 +327,6 @@ extern counter_u64_t rack_opts_arry[RACK_OPTS_SIZE];
 /*
  * Locking for the rack control block.
  * a) Locked by INP_WLOCK
- * b) Locked by the hpts-mutex
- *
  */
 #define RACK_GP_HIST 4	/* How much goodput history do we maintain? */
 #define RETRAN_CNT_SIZE 16
@@ -436,7 +433,6 @@ struct rack_control {
 	uint32_t rc_rcvtime;	/* When we last received data */
 	uint32_t rc_num_split_allocs;	/* num split map entries allocated */
 	uint32_t rc_split_limit;	/* Limit from control var can be set by socket opt */
-	uint32_t rack_avg_rec_sends;
 
 	uint32_t rc_last_output_to;
 	uint32_t rc_went_idle_time;
@@ -458,16 +454,11 @@ struct rack_control {
 	uint16_t rack_per_of_gp_rec; /* 100 = 100%, so from 65536 = 655 x bw, 0=off */
 	uint16_t rack_per_of_gp_probertt; /* 100 = 100%, so from 65536 = 655 x bw, 0=off */
 	uint32_t rc_high_rwnd;
-	uint32_t ack_count;
-	uint32_t sack_count;
-	uint32_t sack_noextra_move;
-	uint32_t sack_moved_extra;
 	struct rack_rtt_sample rack_rs;
 	const struct tcp_hwrate_limit_table *crte;
 	uint32_t rc_agg_early;
 	uint32_t rc_agg_delayed;
 	uint32_t rc_tlp_rxt_last_time;
-	uint32_t rc_saved_cwnd;
 	uint64_t rc_gp_output_ts; /* chg*/
 	uint64_t rc_gp_cumack_ts; /* chg*/
 	struct timeval act_rcv_time;
@@ -489,12 +480,6 @@ struct rack_control {
 	int32_t rc_rtt_diff;		/* Timely style rtt diff of our gp_srtt */
 	uint64_t last_tmit_time_acked;	/* Holds the last cumack point's last send time */
 	/* Recovery stats */
-	uint64_t time_entered_recovery;
-	uint64_t bytes_acked_in_recovery;
-	/* Policer Detection */
-	uint64_t last_policer_sndbytes;
-	uint64_t last_policer_snd_rxt_bytes;
-	uint64_t policer_bw;
 	uint64_t last_sendtime;
 
 	uint64_t last_gpest;
@@ -507,19 +492,9 @@ struct rack_control {
 	uint32_t gp_rnd_thresh;
 	uint32_t ss_hi_fs;
 	uint32_t gate_to_fs;
-	uint32_t policer_max_seg;
-	uint32_t pol_bw_comp;
-	uint16_t policer_rxt_threshold;
-	uint8_t  policer_avg_threshold;
-	uint8_t  policer_med_threshold;
 	uint32_t pcm_max_seg;
 	uint32_t last_pcm_round;
 	uint32_t pcm_idle_rounds;
-	uint32_t current_policer_bucket;
-	uint32_t policer_bucket_size;
-	uint32_t idle_snd_una;
-	uint32_t ack_for_idle;
-	uint32_t last_amount_before_rec;
 
 	uint32_t rc_gp_srtt;		/* Current GP srtt */
 	uint32_t rc_prev_gp_srtt;	/* Previous RTT */
@@ -558,22 +533,17 @@ struct rack_control {
 	uint32_t rc_last_timeout_snduna;
 	uint32_t last_tlp_acked_start;
 	uint32_t last_tlp_acked_end;
-	uint32_t challenge_ack_ts;
-	uint32_t challenge_ack_cnt;
 	uint32_t rc_min_to;	/* Socket option value Lock(a) */
 	uint32_t rc_pkt_delay;	/* Socket option value Lock(a) */
 	uint32_t persist_lost_ends;
-	uint32_t ack_during_sd;
-	uint32_t input_pkt;
-	uint32_t saved_input_pkt;
-	uint32_t saved_policer_val; 	/* The encoded value we used to setup policer detection */
 	uint32_t cleared_app_ack_seq;
 	uint32_t last_rcv_tstmp_for_rtt;
 	uint32_t last_time_of_arm_rcv;
 	uint32_t rto_ssthresh;
-	struct newreno rc_saved_beta;	/*
-					 * For newreno cc:
-					 * rc_saved_cc are the values we have had
+	uint32_t rc_saved_beta;
+	uint32_t rc_saved_beta_ecn;	/*
+					 * For newreno cc: rc_saved_beta and
+					 * rc_saved_beta_ecn are the values we have had
 					 * set by the user, if pacing is not happening
 					 * (i.e. its early and we have not turned on yet
 					 *  or it was turned off). The minute pacing
@@ -586,7 +556,6 @@ struct rack_control {
 	uint16_t rc_cnt_of_retran[RETRAN_CNT_SIZE];
 	uint16_t rc_early_recovery_segs;	/* Socket option value Lock(a) */
 	uint16_t rc_reorder_shift;	/* Socket option value Lock(a) */
-	uint8_t policer_del_mss;	/* How many mss during recovery for policer detection */
 	uint8_t rack_per_upper_bound_ss;
 	uint8_t rack_per_upper_bound_ca;
 	uint8_t cleared_app_ack;
@@ -598,12 +567,9 @@ struct rack_control {
 	uint8_t rc_tlp_cwnd_reduce;	/* Socket option value Lock(a) */
 	uint8_t rc_prr_sendalot;/* Socket option value Lock(a) */
 	uint8_t rc_rate_sample_method;
-	uint8_t policer_alt_median;	/* Alternate median for policer detection */
-	uint8_t full_dgp_in_rec;	/* Flag to say if we do full DGP in recovery */
 	uint8_t client_suggested_maxseg;	/* Not sure what to do with this yet */
 	uint8_t use_gp_not_last;
 	uint8_t pacing_method;	       /* If pace_always, what type of pacing */
-	uint8_t already_had_a_excess;
 };
 #endif
 
@@ -611,27 +577,6 @@ struct rack_control {
 #define RACK_DGP_PACING  0x01
 #define RACK_REG_PACING  0x02
 
-/* DGP with no buffer level mitigations */
-#define DGP_LEVEL0	0
-
-/*
- * DGP with buffer level mitigation where BL:4 caps fillcw and BL:5
- * turns off fillcw.
- */
-#define DGP_LEVEL1	1
-
-/*
- * DGP with buffer level mitigation where BL:3 caps fillcw and BL:4 turns off fillcw
- * and BL:5 reduces by 10%
- */
-#define DGP_LEVEL2	2
-
-/*
- * DGP with buffer level mitigation where BL:2 caps fillcw and BL:3 turns off
- * fillcw  BL:4 reduces by 10% and BL:5 reduces by 20%
- */
-#define DGP_LEVEL3	3
-
 /* Hybrid pacing log defines */
 #define HYBRID_LOG_NO_ROOM	0	/* No room for the clients request */
 #define HYBRID_LOG_TURNED_OFF	1	/* Turned off hybrid pacing */
@@ -650,12 +595,7 @@ struct rack_control {
 #define HYBRID_LOG_EXTEND	14	/* We extended the end */
 #define HYBRID_LOG_SENT_LOST	15	/* A closing sent/lost report */
 
-#define LOST_ZERO	1 	/* Zero it out */
-#define LOST_ADD	2	/* Add to it */
-#define LOST_SUB	3	/* Sub from it */
-
 #define RACK_TIMELY_CNT_BOOST 5	/* At 5th increase boost */
-#define RACK_MINRTT_FILTER_TIM 10 /* Seconds */
 
 #define RACK_HYSTART_OFF	0
 #define RACK_HYSTART_ON		1	/* hystart++ on */
@@ -672,7 +612,6 @@ struct rack_control {
 
 struct tcp_rack {
 	/* First cache line 0x00 */
-	TAILQ_ENTRY(tcp_rack) r_hpts;	/* hptsi queue next Lock(b) */
 	int32_t(*r_substate) (struct mbuf *, struct tcphdr *,
 	    struct socket *, struct tcpcb *, struct tcpopt *,
 	    int32_t, int32_t, uint32_t, int, int, uint8_t);	/* Lock(a) */
@@ -790,8 +729,7 @@ struct tcp_rack {
 		set_pacing_done_a_iw : 1,
 		use_rack_rr : 1,
 		alloc_limit_reported : 1,
-		sack_attack_disable : 1,
-		do_detection : 1,
+		rack_avail : 2,
 		rc_force_max_seg : 1;
 	uint8_t r_early : 1,
 		r_late : 1,
@@ -801,12 +739,9 @@ struct tcp_rack {
 		r_collapse_point_valid : 1,
 		dgp_on : 1;
 	uint16_t rto_from_rec: 1,
-		avail_bit: 1,
+		avail_bit: 4,
 		pcm_in_progress: 1,
 		pcm_needed: 1,
-		policer_detect_on: 1,	/* Are we detecting policers? */
-		rc_policer_detected : 1,	/* We are beiing policed */
-		rc_policer_should_pace : 1,	/* The sizing algo thinks we should pace */
 		rc_sendvars_notset : 1,		/* Inside rack_init send variables (snd_max/una etc) were not set */
 		rc_gp_rtt_set : 1,
 		rc_gp_dyn_mul : 1,
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 312740ccf599..db415f6bdf03 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -58,7 +58,6 @@
 #include <sys/refcount.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
-#include <sys/proc.h>
 #include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
@@ -110,9 +109,6 @@
 #include <netinet/tcpip.h>
 #include <netinet/tcp_fastopen.h>
 #include <netinet/tcp_accounting.h>
-#ifdef TCPPCAP
-#include <netinet/tcp_pcap.h>
-#endif
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
@@ -139,68 +135,14 @@ VNET_DEFINE(int, tcp_mssdflt) = TCP_MSS;
 VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS;
 #endif
 
-#ifdef TCP_SAD_DETECTION
-/*  Sack attack detection thresholds and such */
-SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack_attack,
-    CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
-    "Sack Attack detection thresholds");
-int32_t tcp_force_detection = 0;
-SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, force_detection,
-    CTLFLAG_RW,
-    &tcp_force_detection, 0,
-    "Do we force detection even if the INP has it off?");
-int32_t tcp_sad_limit = 10000;
-SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, limit,
-    CTLFLAG_RW,
-    &tcp_sad_limit, 10000,
-    "If SaD is enabled, what is the limit to sendmap entries (0 = unlimited)?");
-int32_t tcp_sack_to_ack_thresh = 700;	/* 70 % */
-SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sack_to_ack_thresh,
-    CTLFLAG_RW,
-    &tcp_sack_to_ack_thresh, 700,
-    "Percentage of sacks to acks we must see above (10.1 percent is 101)?");
-int32_t tcp_sack_to_move_thresh = 600;	/* 60 % */
-SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, move_thresh,
-    CTLFLAG_RW,
-    &tcp_sack_to_move_thresh, 600,
-    "Percentage of sack moves we must see above (10.1 percent is 101)");
-int32_t tcp_restoral_thresh = 450;	/* 45 % (sack:2:ack -25%) (mv:ratio -15%) **/
-SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, restore_thresh,
-    CTLFLAG_RW,
-    &tcp_restoral_thresh, 450,
-    "Percentage of sack to ack percentage we must see below to restore(10.1 percent is 101)");
-int32_t tcp_sad_decay_val = 800;
-SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, decay_per,
-    CTLFLAG_RW,
-    &tcp_sad_decay_val, 800,
-    "The decay percentage (10.1 percent equals 101 )");
-int32_t tcp_map_minimum = 500;
-SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, nummaps,
-    CTLFLAG_RW,
-    &tcp_map_minimum, 500,
-    "Number of Map enteries before we start detection");
-int32_t tcp_sad_pacing_interval = 2000;
-SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sad_pacing_int,
-    CTLFLAG_RW,
-    &tcp_sad_pacing_interval, 2000,
-    "What is the minimum pacing interval for a classified attacker?");
-
-int32_t tcp_sad_low_pps = 100;
-SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sad_low_pps,
-    CTLFLAG_RW,
-    &tcp_sad_low_pps, 100,
-    "What is the input pps that below which we do not decay?");
-#endif
-uint32_t tcp_ack_war_time_window = 1000;
+VNET_DEFINE(uint32_t, tcp_ack_war_time_window) = 1000;
 SYSCTL_UINT(_net_inet_tcp, OID_AUTO, ack_war_timewindow,
-    CTLFLAG_RW,
-    &tcp_ack_war_time_window, 1000,
-   "If the tcp_stack does ack-war prevention how many milliseconds are in its time window?");
-uint32_t tcp_ack_war_cnt = 5;
-SYSCTL_UINT(_net_inet_tcp, OID_AUTO, ack_war_cnt,
-    CTLFLAG_RW,
-    &tcp_ack_war_cnt, 5,
-   "If the tcp_stack does ack-war prevention how many acks can be sent in its time window?");
+    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_ack_war_time_window), 0,
+   "Time interval in ms used to limit the number (ack_war_cnt) of challenge ACKs sent per TCP connection");
+VNET_DEFINE(uint32_t, tcp_ack_war_cnt) = 5;
+SYSCTL_UINT(_net_inet_tcp, OID_AUTO, ack_war_cnt, CTLFLAG_VNET | CTLFLAG_RW,
+    &VNET_NAME(tcp_ack_war_cnt), 0,
+   "Maximum number of challenge ACKs sent per TCP connection during the time interval (ack_war_timewindow)");
 
 struct rwlock tcp_function_lock;
 
@@ -411,6 +353,7 @@ static struct tcp_function_block tcp_def_funcblk = {
 	.tfb_tcp_fb_init = tcp_default_fb_init,
 	.tfb_tcp_fb_fini = tcp_default_fb_fini,
 	.tfb_switch_failed = tcp_default_switch_failed,
+	.tfb_flags = TCP_FUNC_DEFAULT_OK,
 };
 
 static int tcp_fb_cnt = 0;
@@ -446,23 +389,25 @@ static struct tcp_function_block *
 find_tcp_functions_locked(struct tcp_function_set *fs)
 {
 	struct tcp_function *f;
-	struct tcp_function_block *blk=NULL;
+	struct tcp_function_block *blk = NULL;
 
+	rw_assert(&tcp_function_lock, RA_LOCKED);
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		if (strcmp(f->tf_name, fs->function_set_name) == 0) {
 			blk = f->tf_fb;
 			break;
 		}
 	}
-	return(blk);
+	return (blk);
 }
 
 static struct tcp_function_block *
 find_tcp_fb_locked(struct tcp_function_block *blk, struct tcp_function **s)
 {
-	struct tcp_function_block *rblk=NULL;
+	struct tcp_function_block *rblk = NULL;
 	struct tcp_function *f;
 
+	rw_assert(&tcp_function_lock, RA_LOCKED);
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		if (f->tf_fb == blk) {
 			rblk = blk;
@@ -485,7 +430,7 @@ find_and_ref_tcp_functions(struct tcp_function_set *fs)
 	if (blk)
 		refcount_acquire(&blk->tfb_refcnt);
 	rw_runlock(&tcp_function_lock);
-	return(blk);
+	return (blk);
 }
 
 struct tcp_function_block *
@@ -498,7 +443,7 @@ find_and_ref_tcp_fb(struct tcp_function_block *blk)
 	if (rblk)
 		refcount_acquire(&rblk->tfb_refcnt);
 	rw_runlock(&tcp_function_lock);
-	return(rblk);
+	return (rblk);
 }
 
 /* Find a matching alias for the given tcp_function_block. */
@@ -568,8 +513,7 @@ tcp_switch_back_to_default(struct tcpcb *tp)
 		tfb = NULL;
 	}
 	/* Does the stack accept this connection? */
-	if (tfb != NULL && tfb->tfb_tcp_handoff_ok != NULL &&
-	    (*tfb->tfb_tcp_handoff_ok)(tp)) {
+	if (tfb != NULL && (*tfb->tfb_tcp_handoff_ok)(tp)) {
 		refcount_release(&tfb->tfb_refcnt);
 		tfb = NULL;
 	}
@@ -603,11 +547,9 @@ tcp_switch_back_to_default(struct tcpcb *tp)
 		/* there always should be a default */
 		panic("Can't refer to tcp_def_funcblk");
 	}
-	if (tfb->tfb_tcp_handoff_ok != NULL) {
-		if ((*tfb->tfb_tcp_handoff_ok) (tp)) {
-			/* The default stack cannot say no */
-			panic("Default stack rejects a new session?");
-		}
+	if ((*tfb->tfb_tcp_handoff_ok)(tp)) {
+		/* The default stack cannot say no */
+		panic("Default stack rejects a new session?");
 	}
 	if (tfb->tfb_tcp_fb_init != NULL &&
 	    (*tfb->tfb_tcp_fb_init)(tp, &ptr)) {
@@ -702,7 +644,7 @@ out:
 static int
 sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS)
 {
-	int error=ENOENT;
+	int error = ENOENT;
 	struct tcp_function_set fs;
 	struct tcp_function_block *blk;
 
@@ -720,7 +662,7 @@ sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS)
 
 	/* Check for error or no change */
 	if (error != 0 || req->newptr == NULL)
-		return(error);
+		return (error);
 
 	rw_wlock(&tcp_function_lock);
 	blk = find_tcp_functions_locked(&fs);
@@ -729,6 +671,10 @@ sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS)
 		error = ENOENT;
 		goto done;
 	}
+	if ((blk->tfb_flags & TCP_FUNC_DEFAULT_OK) == 0) {
+		error = EINVAL;
+		goto done;
+	}
 	V_tcp_func_set_ptr = blk;
 done:
 	rw_wunlock(&tcp_function_lock);
@@ -1086,10 +1032,6 @@ tcp_default_fb_init(struct tcpcb *tp, void **ptr)
 	/* We don't use the pointer */
 	*ptr = NULL;
 
-	KASSERT(tp->t_state < TCPS_TIME_WAIT,
-	    ("%s: connection %p in unexpected state %d", __func__, tp,
-	    tp->t_state));
-
 	/* Make sure we get no interesting mbuf queuing behavior */
 	/* All mbuf queue/ack compress flags should be off */
 	tcp_lro_features_off(tp);
@@ -1106,7 +1048,8 @@ tcp_default_fb_init(struct tcpcb *tp, void **ptr)
 	if (tp->t_rxtshift == 0)
 		tp->t_rxtcur = rexmt;
 	else
-		TCPT_RANGESET(tp->t_rxtcur, rexmt, tp->t_rttmin, TCPTV_REXMTMAX);
+		TCPT_RANGESET(tp->t_rxtcur, rexmt, tp->t_rttmin,
+		    tcp_rexmit_max);
 
 	/*
 	 * Nothing to do for ESTABLISHED or LISTEN states. And, we don't
@@ -1225,80 +1168,83 @@ int
 register_tcp_functions_as_names(struct tcp_function_block *blk, int wait,
     const char *names[], int *num_names)
 {
-	struct tcp_function *n;
+	struct tcp_function *f[TCP_FUNCTION_NAME_NUM_MAX];
 	struct tcp_function_set fs;
-	int error, i;
+	int error, i, num_registered;
 
-	KASSERT(names != NULL && *num_names > 0,
-	    ("%s: Called with 0-length name list", __func__));
 	KASSERT(names != NULL, ("%s: Called with NULL name list", __func__));
+	KASSERT(*num_names > 0,
+	    ("%s: Called with non-positive length of name list", __func__));
 	KASSERT(rw_initialized(&tcp_function_lock),
 	    ("%s: called too early", __func__));
 
+	if (*num_names > TCP_FUNCTION_NAME_NUM_MAX) {
+		/* Too many names. */
+		*num_names = 0;
+		return (E2BIG);
+	}
 	if ((blk->tfb_tcp_output == NULL) ||
 	    (blk->tfb_tcp_do_segment == NULL) ||
 	    (blk->tfb_tcp_ctloutput == NULL) ||
+	    (blk->tfb_tcp_handoff_ok == NULL) ||
 	    (strlen(blk->tfb_tcp_block_name) == 0)) {
-		/*
-		 * These functions are required and you
-		 * need a name.
-		 */
+		/* These functions are required and a name is needed. */
 		*num_names = 0;
 		return (EINVAL);
 	}
 
-	if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
-		*num_names = 0;
-		return (EINVAL);
+	for (i = 0; i < *num_names; i++) {
+		f[i] = malloc(sizeof(struct tcp_function), M_TCPFUNCTIONS, wait);
+		if (f[i] == NULL) {
+			while (--i >= 0)
+				free(f[i], M_TCPFUNCTIONS);
+			*num_names = 0;
+			return (ENOMEM);
+		}
 	}
 
+	num_registered = 0;
+	rw_wlock(&tcp_function_lock);
+	if (find_tcp_fb_locked(blk, NULL) != NULL) {
+		/* A TCP function block can only be registered once. */
+		error = EALREADY;
+		goto cleanup;
+	}
+	if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
+		error = EINVAL;
+		goto cleanup;
+	}
 	refcount_init(&blk->tfb_refcnt, 0);
 	blk->tfb_id = atomic_fetchadd_int(&next_tcp_stack_id, 1);
 	for (i = 0; i < *num_names; i++) {
-		n = malloc(sizeof(struct tcp_function), M_TCPFUNCTIONS, wait);
-		if (n == NULL) {
-			error = ENOMEM;
-			goto cleanup;
-		}
-		n->tf_fb = blk;
-
 		(void)strlcpy(fs.function_set_name, names[i],
 		    sizeof(fs.function_set_name));
-		rw_wlock(&tcp_function_lock);
 		if (find_tcp_functions_locked(&fs) != NULL) {
 			/* Duplicate name space not allowed */
-			rw_wunlock(&tcp_function_lock);
-			free(n, M_TCPFUNCTIONS);
 			error = EALREADY;
 			goto cleanup;
 		}
-		(void)strlcpy(n->tf_name, names[i], sizeof(n->tf_name));
-		TAILQ_INSERT_TAIL(&t_functions, n, tf_next);
+		f[i]->tf_fb = blk;
+		(void)strlcpy(f[i]->tf_name, names[i], sizeof(f[i]->tf_name));
+		TAILQ_INSERT_TAIL(&t_functions, f[i], tf_next);
 		tcp_fb_cnt++;
-		rw_wunlock(&tcp_function_lock);
+		num_registered++;
 	}
-	return(0);
+	rw_wunlock(&tcp_function_lock);
+	return (0);
 
 cleanup:
-	/*
-	 * Deregister the names we just added. Because registration failed
-	 * for names[i], we don't need to deregister that name.
-	 */
-	*num_names = i;
-	rw_wlock(&tcp_function_lock);
-	while (--i >= 0) {
-		TAILQ_FOREACH(n, &t_functions, tf_next) {
-			if (!strncmp(n->tf_name, names[i],
-			    TCP_FUNCTION_NAME_LEN_MAX)) {
-				TAILQ_REMOVE(&t_functions, n, tf_next);
-				tcp_fb_cnt--;
-				n->tf_fb = NULL;
-				free(n, M_TCPFUNCTIONS);
-				break;
-			}
+	/* Remove the entries just added. */
+	for (i = 0; i < *num_names; i++) {
+		if (i < num_registered) {
+			TAILQ_REMOVE(&t_functions, f[i], tf_next);
+			tcp_fb_cnt--;
 		}
+		f[i]->tf_fb = NULL;
+		free(f[i], M_TCPFUNCTIONS);
 	}
 	rw_wunlock(&tcp_function_lock);
+	*num_names = num_registered;
 	return (error);
 }
 
@@ -1432,7 +1378,7 @@ deregister_tcp_functions(struct tcp_function_block *blk, bool quiesce,
 }
 
 static void
-tcp_drain(void)
+tcp_drain(void *ctx __unused, int flags __unused)
 {
 	struct epoch_tracker et;
 	VNET_ITERATOR_DECL(vnet_iter);
@@ -1464,13 +1410,6 @@ tcp_drain(void)
 #ifdef TCP_BLACKBOX
 				tcp_log_drain(tcpb);
 #endif
-#ifdef TCPPCAP
-				if (tcp_pcap_aggressive_free) {
-					/* Free the TCP PCAP queues. */
-					tcp_pcap_drain(&(tcpb->t_inpkts));
-					tcp_pcap_drain(&(tcpb->t_outpkts));
-				}
-#endif
 			}
 		}
 		CURVNET_RESTORE();
@@ -1512,6 +1451,8 @@ tcp_vnet_init(void *arg __unused)
 	VNET_PCPUSTAT_ALLOC(tcpstat, M_WAITOK);
 
 	V_tcp_msl = TCPTV_MSL;
+	V_tcp_msl_local = TCPTV_MSL_LOCAL;
+	arc4rand(&V_ts_offset_secret, sizeof(V_ts_offset_secret), 0);
 }
 VNET_SYSINIT(tcp_vnet_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
     tcp_vnet_init, NULL);
@@ -1530,11 +1471,8 @@ tcp_init(void *arg __unused)
 	tcp_keepintvl = TCPTV_KEEPINTVL;
 	tcp_maxpersistidle = TCPTV_KEEP_IDLE;
 	tcp_rexmit_initial = TCPTV_RTOBASE;
-	if (tcp_rexmit_initial < 1)
-		tcp_rexmit_initial = 1;
 	tcp_rexmit_min = TCPTV_MIN;
-	if (tcp_rexmit_min < 1)
-		tcp_rexmit_min = 1;
+	tcp_rexmit_max = TCPTV_REXMTMAX;
 	tcp_persmin = TCPTV_PERSMIN;
 	tcp_persmax = TCPTV_PERSMAX;
 	tcp_rexmit_slop = TCPTV_CPU_VAR;
@@ -1549,7 +1487,6 @@ tcp_init(void *arg __unused)
 	/* Initialize the TCP logging data. */
 	tcp_log_init();
 #endif
-	arc4rand(&V_ts_offset_secret, sizeof(V_ts_offset_secret), 0);
 
 	if (tcp_soreceive_stream) {
 #ifdef INET
@@ -1583,9 +1520,6 @@ tcp_init(void *arg __unused)
 	tcp_bad_csums = counter_u64_alloc(M_WAITOK);
 	tcp_pacing_failures = counter_u64_alloc(M_WAITOK);
 	tcp_dgp_failures = counter_u64_alloc(M_WAITOK);
-#ifdef TCPPCAP
-	tcp_pcap_init();
-#endif
 
 	hashsize = tcp_tcbhashsize;
 	if (hashsize == 0) {
@@ -1640,24 +1574,10 @@ SYSINIT(tcp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, tcp_init, NULL);
 static void
 tcp_destroy(void *unused __unused)
 {
-	int n;
 #ifdef TCP_HHOOK
 	int error;
 #endif
 
-	/*
-	 * All our processes are gone, all our sockets should be cleaned
-	 * up, which means, we should be past the tcp_discardcb() calls.
-	 * Sleep to let all tcpcb timers really disappear and cleanup.
-	 */
-	for (;;) {
-		INP_INFO_WLOCK(&V_tcbinfo);
-		n = V_tcbinfo.ipi_count;
-		INP_INFO_WUNLOCK(&V_tcbinfo);
-		if (n == 0)
-			break;
-		pause("tcpdes", hz / 10);
-	}
 	tcp_hc_destroy();
 	syncache_destroy();
 	in_pcbinfo_destroy(&V_tcbinfo);
@@ -1793,6 +1713,7 @@ tcpip_maketemplate(struct inpcb *inp)
  *
  * NOTE: If m != NULL, then th must point to *inside* the mbuf.
  */
+
 void
 tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
     tcp_seq ack, tcp_seq seq, uint16_t flags)
@@ -2160,7 +2081,7 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
 			union tcp_log_stackspecific log;
 			struct timeval tv;
 
-			memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+			memset(&log, 0, sizeof(log));
 			log.u_bbr.inhpts = tcp_in_hpts(tp);
 			log.u_bbr.flex8 = 4;
 			log.u_bbr.pkts_out = tp->t_maxseg;
@@ -2226,12 +2147,53 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
 }
 
 /*
+ * Send a challenge ack (no data, no SACK option), but not more than
+ * V_tcp_ack_war_cnt per V_tcp_ack_war_time_window (per TCP connection).
+ */
+void
+tcp_send_challenge_ack(struct tcpcb *tp, struct tcphdr *th, struct mbuf *m)
+{
+	sbintime_t now;
+	bool send_challenge_ack;
+
+	if (V_tcp_ack_war_time_window == 0 || V_tcp_ack_war_cnt == 0) {
+		/* ACK war protection is disabled. */
+		send_challenge_ack = true;
+	} else {
+		/* Start new epoch, if the previous one is already over. */
+		now = getsbinuptime();
+		if (tp->t_challenge_ack_end < now) {
+			tp->t_challenge_ack_cnt = 0;
+			tp->t_challenge_ack_end = now +
+			    V_tcp_ack_war_time_window * SBT_1MS;
+		}
+		/*
+		 * Send a challenge ACK, if less than tcp_ack_war_cnt have been
+		 * sent in the current epoch.
+		 */
+		if (tp->t_challenge_ack_cnt < V_tcp_ack_war_cnt) {
+			send_challenge_ack = true;
+			tp->t_challenge_ack_cnt++;
+		} else {
+			send_challenge_ack = false;
+		}
+	}
+	if (send_challenge_ack) {
+		tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt,
+		    tp->snd_nxt, TH_ACK);
+		tp->last_ack_sent = tp->rcv_nxt;
+	}
+}
+
+/*
  * Create a new TCP control block, making an empty reassembly queue and hooking
  * it to the argument protocol control block.  The `inp' parameter must have
  * come from the zone allocator set up by tcpcbstor declaration.
+ * The caller can provide a pointer to a tcpcb of the listener to inherit the
+ * TCP function block from the listener.
  */
 struct tcpcb *
-tcp_newtcpcb(struct inpcb *inp)
+tcp_newtcpcb(struct inpcb *inp, struct tcpcb *listening_tcb)
 {
 	struct tcpcb *tp = intotcpcb(inp);
 #ifdef INET6
@@ -2246,17 +2208,38 @@ tcp_newtcpcb(struct inpcb *inp)
 	bzero(&tp->t_start_zero, t_zero_size);
 
 	/* Initialise cc_var struct for this tcpcb. */
-	tp->t_ccv.type = IPPROTO_TCP;
-	tp->t_ccv.ccvc.tcp = tp;
+	tp->t_ccv.tp = tp;
 	rw_rlock(&tcp_function_lock);
-	tp->t_fb = V_tcp_func_set_ptr;
+	if (listening_tcb != NULL) {
+		INP_LOCK_ASSERT(tptoinpcb(listening_tcb));
+		KASSERT(listening_tcb->t_fb != NULL,
+		    ("tcp_newtcpcb: listening_tcb->t_fb is NULL"));
+		if (listening_tcb->t_fb->tfb_flags & TCP_FUNC_BEING_REMOVED) {
+			rw_runlock(&tcp_function_lock);
+			return (NULL);
+		}
+		tp->t_fb = listening_tcb->t_fb;
+	} else {
+		tp->t_fb = V_tcp_func_set_ptr;
+	}
 	refcount_acquire(&tp->t_fb->tfb_refcnt);
+	KASSERT((tp->t_fb->tfb_flags & TCP_FUNC_BEING_REMOVED) == 0,
+	    ("tcp_newtcpcb: using TFB being removed"));
 	rw_runlock(&tcp_function_lock);
-	/*
-	 * Use the current system default CC algorithm.
-	 */
-	cc_attach(tp, CC_DEFAULT_ALGO());
-
+	CC_LIST_RLOCK();
+	if (listening_tcb != NULL) {
+		if (CC_ALGO(listening_tcb)->flags & CC_MODULE_BEING_REMOVED) {
+			CC_LIST_RUNLOCK();
+			if (tp->t_fb->tfb_tcp_fb_fini)
+				(*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
+			refcount_release(&tp->t_fb->tfb_refcnt);
+			return (NULL);
+		}
+		CC_ALGO(tp) = CC_ALGO(listening_tcb);
+	} else
+		CC_ALGO(tp) = CC_DEFAULT_ALGO();
+	cc_refer(CC_ALGO(tp));
+	CC_LIST_RUNLOCK();
 	if (CC_ALGO(tp)->cb_init != NULL)
 		if (CC_ALGO(tp)->cb_init(&tp->t_ccv, NULL) > 0) {
 			cc_detach(tp);
@@ -2268,6 +2251,10 @@ tcp_newtcpcb(struct inpcb *inp)
 
 #ifdef TCP_HHOOK
 	if (khelp_init_osd(HELPER_CLASS_TCP, &tp->t_osd)) {
+		if (CC_ALGO(tp)->cb_destroy != NULL)
+			CC_ALGO(tp)->cb_destroy(&tp->t_ccv);
+		CC_DATA(tp) = NULL;
+		cc_detach(tp);
 		if (tp->t_fb->tfb_tcp_fb_fini)
 			(*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
 		refcount_release(&tp->t_fb->tfb_refcnt);
@@ -2289,7 +2276,8 @@ tcp_newtcpcb(struct inpcb *inp)
 	tp->t_hpts_cpu = HPTS_CPU_NONE;
 	tp->t_lro_cpu = HPTS_CPU_NONE;
 
-	callout_init_rw(&tp->t_callout, &inp->inp_lock, CALLOUT_RETURNUNLOCKED);
+	callout_init_rw(&tp->t_callout, &inp->inp_lock,
+	    CALLOUT_TRYLOCK | CALLOUT_RETURNUNLOCKED);
 	for (int i = 0; i < TT_N; i++)
 		tp->t_timers[i] = SBT_MAX;
 
@@ -2331,12 +2319,6 @@ tcp_newtcpcb(struct inpcb *inp)
 	 * which may match an IPv4-mapped IPv6 address.
 	 */
 	inp->inp_ip_ttl = V_ip_defttl;
-#ifdef TCPPCAP
-	/*
-	 * Init the TCP PCAP queues.
-	 */
-	tcp_pcap_tcpcb_init(tp);
-#endif
 #ifdef TCP_BLACKBOX
 	/* Initialize the per-TCPCB log data. */
 	tcp_log_tcpcbinit(tp);
@@ -2344,6 +2326,13 @@ tcp_newtcpcb(struct inpcb *inp)
 	tp->t_pacing_rate = -1;
 	if (tp->t_fb->tfb_tcp_fb_init) {
 		if ((*tp->t_fb->tfb_tcp_fb_init)(tp, &tp->t_fb_ptr)) {
+			if (CC_ALGO(tp)->cb_destroy != NULL)
+				CC_ALGO(tp)->cb_destroy(&tp->t_ccv);
+			CC_DATA(tp) = NULL;
+			cc_detach(tp);
+#ifdef TCP_HHOOK
+			khelp_destroy_osd(&tp->t_osd);
+#endif
 			refcount_release(&tp->t_fb->tfb_refcnt);
 			return (NULL);
 		}
@@ -2406,11 +2395,6 @@ tcp_discardcb(struct tcpcb *tp)
 	if (tp->t_flags & TF_TOE)
 		tcp_offload_detach(tp);
 #endif
-#ifdef TCPPCAP
-	/* Free the TCP PCAP queues. */
-	tcp_pcap_drain(&(tp->t_inpkts));
-	tcp_pcap_drain(&(tp->t_outpkts));
-#endif
 
 	/* Allow the CC algorithm to clean up after itself. */
 	if (CC_ALGO(tp)->cb_destroy != NULL)
@@ -2456,10 +2440,8 @@ tcp_discardcb(struct tcpcb *tp)
 	 * XXXRRS: Updating must be after the stack fini() since
 	 * that may be converting some internal representation of
 	 * say srtt etc into the general one used by other stacks.
-	 * Lets also at least protect against the so being NULL
-	 * as RW stated below.
 	 */
-	if ((tp->t_rttupdated >= 4) && (so != NULL)) {
+	if (tp->t_rttupdated >= 4) {
 		struct hc_metrics_lite metrics;
 		uint32_t ssthresh;
 
@@ -2469,9 +2451,6 @@ tcp_discardcb(struct tcpcb *tp)
 		 * are satisfied. This gives us better new start value
 		 * for the congestion avoidance for new connections.
 		 * ssthresh is only set if packet loss occurred on a session.
-		 *
-		 * XXXRW: 'so' may be NULL here, and/or socket buffer may be
-		 * being torn down.  Ideally this code would not use 'so'.
 		 */
 		ssthresh = tp->snd_ssthresh;
 		if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) {
@@ -2494,13 +2473,13 @@ tcp_discardcb(struct tcpcb *tp)
 			    );
 		} else
 			ssthresh = 0;
-		metrics.rmx_ssthresh = ssthresh;
+		metrics.hc_ssthresh = ssthresh;
 
-		metrics.rmx_rtt = tp->t_srtt;
-		metrics.rmx_rttvar = tp->t_rttvar;
-		metrics.rmx_cwnd = tp->snd_cwnd;
-		metrics.rmx_sendpipe = 0;
-		metrics.rmx_recvpipe = 0;
+		metrics.hc_rtt = tp->t_srtt;
+		metrics.hc_rttvar = tp->t_rttvar;
+		metrics.hc_cwnd = tp->snd_cwnd;
+		metrics.hc_sendpipe = 0;
+		metrics.hc_recvpipe = 0;
 
 		tcp_hc_update(&inp->inp_inc, &metrics);
 	}
@@ -2680,6 +2659,272 @@ SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist,
     NULL, 0, tcp_pcblist, "S,xtcpcb",
     "List of active TCP connections");
 
+#define SND_TAG_STATUS_MAXLEN	128
+
+#ifdef KERN_TLS
+
+static struct sx ktlslist_lock;
+SX_SYSINIT(ktlslistlock, &ktlslist_lock, "ktlslist");
+static uint64_t ktls_glob_gen = 1;
+
+static int
+tcp_ktlslist_locked(SYSCTL_HANDLER_ARGS, bool export_keys)
+{
+	struct xinpgen xig;
+	struct inpcb *inp;
+	struct socket *so;
+	struct ktls_session *ksr, *kss;
+	char *buf;
+	struct xktls_session *xktls;
+	uint64_t ipi_gencnt;
+	size_t buflen, len, sz;
+	u_int cnt;
+	int error;
+	bool ek, p;
+
+	sx_assert(&ktlslist_lock, SA_XLOCKED);
+	if (req->newptr != NULL)
+		return (EPERM);
+
+	len = 0;
+	cnt = 0;
+	ipi_gencnt = V_tcbinfo.ipi_gencnt;
+	bzero(&xig, sizeof(xig));
+	xig.xig_len = sizeof(xig);
+	xig.xig_gen = ktls_glob_gen++;
+	xig.xig_sogen = so_gencnt;
+
+	struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_tcbinfo,
+	    INPLOOKUP_RLOCKPCB);
+	while ((inp = inp_next(&inpi)) != NULL) {
+		if (inp->inp_gencnt > ipi_gencnt ||
+		    cr_canseeinpcb(req->td->td_ucred, inp) != 0)
+			continue;
+
+		so = inp->inp_socket;
+		if (so != NULL && so->so_gencnt <= xig.xig_sogen) {
+			p = false;
+			ek = export_keys && cr_canexport_ktlskeys(
+			    req->td, inp);
+			ksr = so->so_rcv.sb_tls_info;
+			if (ksr != NULL) {
+				ksr->gen = xig.xig_gen;
+				p = true;
+				if (ek) {
+					sz = SIZE_T_MAX;
+					ktls_session_copy_keys(ksr,
+					    NULL, &sz);
+					len += sz;
+				}
+				if (ksr->snd_tag != NULL &&
+				    ksr->snd_tag->sw->snd_tag_status_str !=
+				    NULL) {
+					sz = SND_TAG_STATUS_MAXLEN;
+					in_pcbref(inp);
+					INP_RUNLOCK(inp);
+					error = ksr->snd_tag->sw->
+					    snd_tag_status_str(
+					    ksr->snd_tag, NULL, &sz);
+					if (in_pcbrele_rlock(inp))
+						return (EDEADLK);
+					if (error == 0)
+						len += sz;
+				}
+			}
+			kss = so->so_snd.sb_tls_info;
+			if (kss != NULL) {
+				kss->gen = xig.xig_gen;
+				p = true;
+				if (ek) {
+					sz = SIZE_T_MAX;
+					ktls_session_copy_keys(kss,
+					    NULL, &sz);
+					len += sz;
+				}
+				if (kss->snd_tag != NULL &&
+				    kss->snd_tag->sw->snd_tag_status_str !=
+				    NULL) {
+					sz = SND_TAG_STATUS_MAXLEN;
+					in_pcbref(inp);
+					INP_RUNLOCK(inp);
+					error = kss->snd_tag->sw->
+					    snd_tag_status_str(
+					    kss->snd_tag, NULL, &sz);
+					if (in_pcbrele_rlock(inp))
+						return (EDEADLK);
+					if (error == 0)
+						len += sz;
+				}
+			}
+			if (p) {
+				len += sizeof(*xktls);
+				len = roundup2(len, __alignof(struct
+				    xktls_session));
+			}
+		}
+	}
+	if (req->oldptr == NULL) {
+		len += 2 * sizeof(xig);
+		len += 3 * len / 4;
+		req->oldidx = len;
+		return (0);
+	}
+
+	if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
+		return (error);
+
+	error = SYSCTL_OUT(req, &xig, sizeof xig);
+	if (error != 0)
+		return (error);
+
+	buflen = roundup2(sizeof(*xktls) + 2 * TLS_MAX_PARAM_SIZE +
+	    2 * SND_TAG_STATUS_MAXLEN, __alignof(struct xktls_session));
+	buf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
+	struct inpcb_iterator inpi1 = INP_ALL_ITERATOR(&V_tcbinfo,
+	    INPLOOKUP_RLOCKPCB);
+	while ((inp = inp_next(&inpi1)) != NULL) {
+		if (inp->inp_gencnt > ipi_gencnt ||
+		    cr_canseeinpcb(req->td->td_ucred, inp) != 0)
+			continue;
+
+		so = inp->inp_socket;
+		if (so == NULL)
+			continue;
+
+		p = false;
+		ek = export_keys && cr_canexport_ktlskeys(req->td, inp);
+		ksr = so->so_rcv.sb_tls_info;
+		kss = so->so_snd.sb_tls_info;
+		xktls = (struct xktls_session *)buf;
+		if (ksr != NULL && ksr->gen == xig.xig_gen) {
+			p = true;
+			ktls_session_to_xktls_onedir(ksr, ek, &xktls->rcv);
+		}
+		if (kss != NULL && kss->gen == xig.xig_gen) {
+			p = true;
+			ktls_session_to_xktls_onedir(kss, ek, &xktls->snd);
+		}
+		if (!p)
+			continue;
+
+		xktls->inp_gencnt = inp->inp_gencnt;
+		xktls->so_pcb = (kvaddr_t)inp;
+		memcpy(&xktls->coninf, &inp->inp_inc, sizeof(xktls->coninf));
+		len = sizeof(*xktls);
+		if (ksr != NULL && ksr->gen == xig.xig_gen) {
+			if (ek) {
+				sz = buflen - len;
+				ktls_session_copy_keys(ksr, buf + len, &sz);
+				len += sz;
+			} else {
+				xktls->rcv.cipher_key_len = 0;
+				xktls->rcv.auth_key_len = 0;
+			}
+			if (ksr->snd_tag != NULL &&
+			    ksr->snd_tag->sw->snd_tag_status_str != NULL) {
+				sz = SND_TAG_STATUS_MAXLEN;
+				in_pcbref(inp);
+				INP_RUNLOCK(inp);
+				error = ksr->snd_tag->sw->snd_tag_status_str(
+				    ksr->snd_tag, buf + len, &sz);
+				if (in_pcbrele_rlock(inp))
+					return (EDEADLK);
+				if (error == 0) {
+					xktls->rcv.drv_st_len = sz;
+					len += sz;
+				}
+			}
+		}
+		if (kss != NULL && kss->gen == xig.xig_gen) {
+			if (ek) {
+				sz = buflen - len;
+				ktls_session_copy_keys(kss, buf + len, &sz);
+				len += sz;
+			} else {
+				xktls->snd.cipher_key_len = 0;
+				xktls->snd.auth_key_len = 0;
+			}
+			if (kss->snd_tag != NULL &&
+			    kss->snd_tag->sw->snd_tag_status_str != NULL) {
+				sz = SND_TAG_STATUS_MAXLEN;
+				in_pcbref(inp);
+				INP_RUNLOCK(inp);
+				error = kss->snd_tag->sw->snd_tag_status_str(
+				    kss->snd_tag, buf + len, &sz);
+				if (in_pcbrele_rlock(inp))
+					return (EDEADLK);
+				if (error == 0) {
+					xktls->snd.drv_st_len = sz;
+					len += sz;
+				}
+			}
+		}
+		len = roundup2(len, __alignof(*xktls));
+		xktls->tsz = len;
+		xktls->fsz = sizeof(*xktls);
+
+		error = SYSCTL_OUT(req, xktls, len);
+		if (error != 0) {
+			INP_RUNLOCK(inp);
+			break;
+		}
+		cnt++;
+	}
+
+	if (error == 0) {
+		xig.xig_sogen = so_gencnt;
+		xig.xig_count = cnt;
+		error = SYSCTL_OUT(req, &xig, sizeof(xig));
+	}
+
+	zfree(buf, M_TEMP);
+	return (error);
+}
+
+static int
+tcp_ktlslist1(SYSCTL_HANDLER_ARGS, bool export_keys)
+{
+	int repeats, error;
+
+	for (repeats = 0; repeats < 100; repeats++) {
+		if (sx_xlock_sig(&ktlslist_lock))
+			return (EINTR);
+		error = tcp_ktlslist_locked(oidp, arg1, arg2, req,
+		    export_keys);
+		sx_xunlock(&ktlslist_lock);
+		if (error != EDEADLK)
+			break;
+		if (sig_intr() != 0) {
+			error = EINTR;
+			break;
+		}
+		req->oldidx = 0;
+	}
+	return (error);
+}
+	
+static int
+tcp_ktlslist_nokeys(SYSCTL_HANDLER_ARGS)
+{
+	return (tcp_ktlslist1(oidp, arg1, arg2, req, false));
+}
+
+static int
+tcp_ktlslist_wkeys(SYSCTL_HANDLER_ARGS)
+{
+	return (tcp_ktlslist1(oidp, arg1, arg2, req, true));
+}
+
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_KTLSLIST, ktlslist,
+    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
+    NULL, 0, tcp_ktlslist_nokeys, "S,xktls_session",
+    "List of active kTLS sessions for TCP connections");
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_KTLSLIST_WKEYS, ktlslist_wkeys,
+    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
+    NULL, 0, tcp_ktlslist_wkeys, "S,xktls_session",
+    "List of active kTLS sessions for TCP connections with keys");
+#endif /* KERN_TLS */
+
 #ifdef INET
 static int
 tcp_getcred(SYSCTL_HANDLER_ARGS)
@@ -2690,6 +2935,8 @@ tcp_getcred(SYSCTL_HANDLER_ARGS)
 	struct inpcb *inp;
 	int error;
 
+	if (req->newptr == NULL)
+		return (EINVAL);
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
@@ -2732,6 +2979,8 @@ tcp6_getcred(SYSCTL_HANDLER_ARGS)
 	int mapped = 0;
 #endif
 
+	if (req->newptr == NULL)
+		return (EINVAL);
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
@@ -3318,11 +3567,22 @@ tcp_mtudisc(struct inpcb *inp, int mtuoffer)
 	tcp_mss_update(tp, -1, mtuoffer, NULL, NULL);
 
 	so = inp->inp_socket;
-	SOCKBUF_LOCK(&so->so_snd);
+	SOCK_SENDBUF_LOCK(so);
 	/* If the mss is larger than the socket buffer, decrease the mss. */
-	if (so->so_snd.sb_hiwat < tp->t_maxseg)
+	if (so->so_snd.sb_hiwat < tp->t_maxseg) {
 		tp->t_maxseg = so->so_snd.sb_hiwat;
-	SOCKBUF_UNLOCK(&so->so_snd);
+		if (tp->t_maxseg < V_tcp_mssdflt) {
+			/*
+			 * The MSS is so small we should not process incoming
+			 * SACK's since we are subject to attack in such a
+			 * case.
+			 */
+			tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+		} else {
+			tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+		}
+	}
+	SOCK_SENDBUF_UNLOCK(so);
 
 	TCPSTAT_INC(tcps_mturesent);
 	tp->t_rtttime = 0;
@@ -3378,6 +3638,9 @@ tcp_maxmtu(struct in_conninfo *inc, struct tcp_ifcap *cap)
 				cap->tsomax = ifp->if_hw_tsomax;
 				cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
 				cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
+				/* XXXKIB IFCAP2_IPSEC_OFFLOAD_TSO */
+				cap->ipsec_tso =  (ifp->if_capenable2 &
+				    IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) != 0;
 			}
 		}
 	}
@@ -3417,6 +3680,7 @@ tcp_maxmtu6(struct in_conninfo *inc, struct tcp_ifcap *cap)
 				cap->tsomax = ifp->if_hw_tsomax;
 				cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
 				cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
+				cap->ipsec_tso = false; /* XXXKIB */
 			}
 		}
 	}
@@ -3454,8 +3718,19 @@ tcp6_use_min_mtu(struct tcpcb *tp)
 
 		opt = inp->in6p_outputopts;
 		if (opt != NULL && opt->ip6po_minmtu == IP6PO_MINMTU_ALL &&
-		    tp->t_maxseg > TCP6_MSS)
+		    tp->t_maxseg > TCP6_MSS) {
 			tp->t_maxseg = TCP6_MSS;
+			if (tp->t_maxseg < V_tcp_mssdflt) {
+				/*
+				 * The MSS is so small we should not process incoming
+				 * SACK's since we are subject to attack in such a
+				 * case.
+				 */
+				tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+			} else {
+				tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+			}
+		}
 	}
 }
 #endif /* INET6 */
@@ -3507,7 +3782,6 @@ tcp_maxseg(const struct tcpcb *tp)
 		if (tp->t_flags & TF_SACK_PERMIT)
 			optlen += PADTCPOLEN(TCPOLEN_SACK_PERMITTED);
 	}
-#undef PAD
 	optlen = min(optlen, TCP_MAXOLEN);
 	return (tp->t_maxseg - optlen);
 }
@@ -3529,7 +3803,6 @@ tcp_fixed_maxseg(const struct tcpcb *tp)
 	 * for cc modules to figure out what the modulo of the
 	 * cwnd should be.
 	 */
-#define	PAD(len)	((((len) / 4) + !!((len) % 4)) * 4)
 	if (TCPS_HAVEESTABLISHED(tp->t_state)) {
 		if (tp->t_flags & TF_RCVD_TSTMP)
 			optlen = TCPOLEN_TSTAMP_APPA;
@@ -3537,23 +3810,22 @@ tcp_fixed_maxseg(const struct tcpcb *tp)
 			optlen = 0;
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		if (tp->t_flags & TF_SIGNATURE)
-			optlen += PAD(TCPOLEN_SIGNATURE);
+			optlen += PADTCPOLEN(TCPOLEN_SIGNATURE);
 #endif
 	} else {
 		if (tp->t_flags & TF_REQ_TSTMP)
 			optlen = TCPOLEN_TSTAMP_APPA;
 		else
-			optlen = PAD(TCPOLEN_MAXSEG);
+			optlen = PADTCPOLEN(TCPOLEN_MAXSEG);
 		if (tp->t_flags & TF_REQ_SCALE)
-			optlen += PAD(TCPOLEN_WINDOW);
+			optlen += PADTCPOLEN(TCPOLEN_WINDOW);
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		if (tp->t_flags & TF_SIGNATURE)
-			optlen += PAD(TCPOLEN_SIGNATURE);
+			optlen += PADTCPOLEN(TCPOLEN_SIGNATURE);
 #endif
 		if (tp->t_flags & TF_SACK_PERMIT)
-			optlen += PAD(TCPOLEN_SACK_PERMITTED);
+			optlen += PADTCPOLEN(TCPOLEN_SACK_PERMITTED);
 	}
-#undef PAD
 	optlen = min(optlen, TCP_MAXOLEN);
 	return (tp->t_maxseg - optlen);
 }
@@ -4353,7 +4625,7 @@ tcp_req_log_req_info(struct tcpcb *tp, struct tcp_sendfile_track *req,
 		union tcp_log_stackspecific log;
 		struct timeval tv;
 
-		memset(&log.u_bbr, 0, sizeof(log.u_bbr));
+		memset(&log, 0, sizeof(log));
 		log.u_bbr.inhpts = tcp_in_hpts(tp);
 		log.u_bbr.flex8 = val;
 		log.u_bbr.rttProp = req->timestamp;
@@ -4408,7 +4680,7 @@ tcp_req_check_for_stale_entries(struct tcpcb *tp, uint64_t ts, int rm_oldest)
 	uint64_t time_delta, oldest_delta;
 	int i, oldest, oldest_set = 0, cnt_rm = 0;
 
-	for(i = 0; i < MAX_TCP_TRK_REQ; i++) {
+	for (i = 0; i < MAX_TCP_TRK_REQ; i++) {
 		ent = &tp->t_tcpreq_info[i];
 		if (ent->flags != TCP_TRK_TRACK_FLG_USED) {
 			/*
@@ -4451,15 +4723,15 @@ tcp_req_check_for_stale_entries(struct tcpcb *tp, uint64_t ts, int rm_oldest)
 int
 tcp_req_check_for_comp(struct tcpcb *tp, tcp_seq ack_point)
 {
-	int i, ret=0;
+	int i, ret = 0;
 	struct tcp_sendfile_track *ent;
 
 	/* Clean up any old closed end requests that are now completed */
 	if (tp->t_tcpreq_req == 0)
-		return(0);
+		return (0);
 	if (tp->t_tcpreq_closed == 0)
-		return(0);
-	for(i = 0; i < MAX_TCP_TRK_REQ; i++) {
+		return (0);
+	for (i = 0; i < MAX_TCP_TRK_REQ; i++) {
 		ent = &tp->t_tcpreq_info[i];
 		/* Skip empty ones */
 		if (ent->flags == TCP_TRK_TRACK_FLG_EMPTY)
@@ -4482,11 +4754,11 @@ int
 tcp_req_is_entry_comp(struct tcpcb *tp, struct tcp_sendfile_track *ent, tcp_seq ack_point)
 {
 	if (tp->t_tcpreq_req == 0)
-		return(-1);
+		return (-1);
 	if (tp->t_tcpreq_closed == 0)
-		return(-1);
+		return (-1);
 	if (ent->flags == TCP_TRK_TRACK_FLG_EMPTY)
-		return(-1);
+		return (-1);
 	if (SEQ_GEQ(ack_point, ent->end_seq)) {
 		return (1);
 	}
@@ -4508,7 +4780,7 @@ tcp_req_find_a_req_that_is_completed_by(struct tcpcb *tp, tcp_seq th_ack, int *i
 		/* none open */
 		return (NULL);
 	}
-	for(i = 0; i < MAX_TCP_TRK_REQ; i++) {
+	for (i = 0; i < MAX_TCP_TRK_REQ; i++) {
 		ent = &tp->t_tcpreq_info[i];
 		if (ent->flags == TCP_TRK_TRACK_FLG_EMPTY)
 			continue;
@@ -4532,7 +4804,7 @@ tcp_req_find_req_for_seq(struct tcpcb *tp, tcp_seq seq)
 		/* none open */
 		return (NULL);
 	}
-	for(i = 0; i < MAX_TCP_TRK_REQ; i++) {
+	for (i = 0; i < MAX_TCP_TRK_REQ; i++) {
 		ent = &tp->t_tcpreq_info[i];
 		tcp_req_log_req_info(tp, ent, i, TCP_TRK_REQ_LOG_SEARCH,
 				      (uint64_t)seq, 0);
@@ -4580,7 +4852,7 @@ tcp_req_alloc_req_full(struct tcpcb *tp, struct tcp_snd_req *req, uint64_t ts, i
 		    (tp->t_tcpreq_req >= MAX_TCP_TRK_REQ));
 	/* Check to see if this is a duplicate of one not started */
 	if (tp->t_tcpreq_req) {
-		for(i = 0, allocated = 0; i < MAX_TCP_TRK_REQ; i++) {
+		for (i = 0, allocated = 0; i < MAX_TCP_TRK_REQ; i++) {
 			fil = &tp->t_tcpreq_info[i];
 			if ((fil->flags & TCP_TRK_TRACK_FLG_USED) == 0)
 				continue;
@@ -4595,20 +4867,20 @@ tcp_req_alloc_req_full(struct tcpcb *tp, struct tcp_snd_req *req, uint64_t ts, i
 				 * a 4xx of some sort and its going to age
 				 * out, lets not duplicate it.
 				 */
-				return(fil);
+				return (fil);
 			}
 		}
 	}
 	/* Ok if there is no room at the inn we are in trouble */
 	if (tp->t_tcpreq_req >= MAX_TCP_TRK_REQ) {
 		tcp_trace_point(tp, TCP_TP_REQ_LOG_FAIL);
-		for(i = 0; i < MAX_TCP_TRK_REQ; i++) {
+		for (i = 0; i < MAX_TCP_TRK_REQ; i++) {
 			tcp_req_log_req_info(tp, &tp->t_tcpreq_info[i],
 			    i, TCP_TRK_REQ_LOG_ALLOCFAIL, 0, 0);
 		}
 		return (NULL);
 	}
-	for(i = 0, allocated = 0; i < MAX_TCP_TRK_REQ; i++) {
+	for (i = 0, allocated = 0; i < MAX_TCP_TRK_REQ; i++) {
 		fil = &tp->t_tcpreq_info[i];
 		if (fil->flags == TCP_TRK_TRACK_FLG_EMPTY) {
 			allocated = 1;
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index c21dbbb58e31..80e6b53d10df 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -114,14 +114,6 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_syncookiesonly), 0,
     "Use only TCP SYN cookies");
 
-VNET_DEFINE_STATIC(int, functions_inherit_listen_socket_stack) = 1;
-#define V_functions_inherit_listen_socket_stack \
-    VNET(functions_inherit_listen_socket_stack)
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, functions_inherit_listen_socket_stack,
-    CTLFLAG_VNET | CTLFLAG_RW,
-    &VNET_NAME(functions_inherit_listen_socket_stack), 0,
-    "Inherit listen socket's stack");
-
 #ifdef TCP_OFFLOAD
 #define ADDED_BY_TOE(sc) ((sc)->sc_tod != NULL)
 #endif
@@ -139,17 +131,18 @@ static void	 syncache_timer(void *);
 static uint32_t	 syncookie_mac(struct in_conninfo *, tcp_seq, uint8_t,
 		    uint8_t *, uintptr_t);
 static tcp_seq	 syncookie_generate(struct syncache_head *, struct syncache *);
-static struct syncache
-		*syncookie_lookup(struct in_conninfo *, struct syncache_head *,
-		    struct syncache *, struct tcphdr *, struct tcpopt *,
-		    struct socket *, uint16_t);
+static bool	syncookie_expand(struct in_conninfo *,
+		    const struct syncache_head *, struct syncache *,
+		    struct tcphdr *, struct tcpopt *, struct socket *,
+		    uint16_t);
 static void	syncache_pause(struct in_conninfo *);
 static void	syncache_unpause(void *);
 static void	 syncookie_reseed(void *);
 #ifdef INVARIANTS
-static int	 syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
-		    struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
-		    struct socket *lso, uint16_t port);
+static void	syncookie_cmp(struct in_conninfo *,
+		    const struct syncache_head *, struct syncache *,
+		    struct tcphdr *, struct tcpopt *, struct socket *,
+		    uint16_t);
 #endif
 
 /*
@@ -215,7 +208,7 @@ sysctl_net_inet_tcp_syncache_rexmtlimit_check(SYSCTL_HANDLER_ARGS)
 SYSCTL_PROC(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit,
     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
     &VNET_NAME(tcp_syncache.rexmt_limit), 0,
-    sysctl_net_inet_tcp_syncache_rexmtlimit_check, "UI",
+    sysctl_net_inet_tcp_syncache_rexmtlimit_check, "IU",
     "Limit on SYN/ACK retransmissions");
 
 VNET_DEFINE(int, tcp_sc_rst_sock_fail) = 1;
@@ -237,7 +230,7 @@ syncache_free(struct syncache *sc)
 {
 
 	if (sc->sc_ipopts)
-		(void) m_free(sc->sc_ipopts);
+		(void)m_free(sc->sc_ipopts);
 	if (sc->sc_cred)
 		crfree(sc->sc_cred);
 #ifdef MAC
@@ -450,7 +443,7 @@ syncache_timeout(struct syncache *sc, struct syncache_head *sch, int docallout)
 	else
 		TCPT_RANGESET(rexmt,
 		    tcp_rexmit_initial * tcp_backoff[sc->sc_rxmits],
-		    tcp_rexmit_min, TCPTV_REXMTMAX);
+		    tcp_rexmit_min, tcp_rexmit_max);
 	sc->sc_rxttime = ticks + rexmt;
 	sc->sc_rxmits++;
 	if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc)) {
@@ -535,10 +528,16 @@ syncache_timer(void *xsch)
 		}
 
 		NET_EPOCH_ENTER(et);
-		syncache_respond(sc, NULL, TH_SYN|TH_ACK);
+		if (syncache_respond(sc, NULL, TH_SYN|TH_ACK) == 0) {
+			syncache_timeout(sc, sch, 0);
+			TCPSTAT_INC(tcps_sndacks);
+			TCPSTAT_INC(tcps_sndtotal);
+			TCPSTAT_INC(tcps_sc_retransmitted);
+		} else {
+			syncache_drop(sc, sch);
+			TCPSTAT_INC(tcps_sc_dropped);
+		}
 		NET_EPOCH_EXIT(et);
-		TCPSTAT_INC(tcps_sc_retransmitted);
-		syncache_timeout(sc, sch, 0);
 	}
 	if (!TAILQ_EMPTY(&(sch)->sch_bucket))
 		callout_reset(&(sch)->sch_timer, (sch)->sch_nextc - tick,
@@ -696,7 +695,13 @@ syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th, struct mbuf *m,
 				    "sending challenge ACK\n",
 				    s, __func__,
 				    th->th_seq, sc->sc_irs + 1, sc->sc_wnd);
-			syncache_respond(sc, m, TH_ACK);
+			if (syncache_respond(sc, m, TH_ACK) == 0) {
+				TCPSTAT_INC(tcps_sndacks);
+				TCPSTAT_INC(tcps_sndtotal);
+			} else {
+				syncache_drop(sc, sch);
+				TCPSTAT_INC(tcps_sc_dropped);
+			}
 		}
 	} else {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
@@ -777,7 +782,6 @@ done:
 static struct socket *
 syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
 {
-	struct tcp_function_block *blk;
 	struct inpcb *inp = NULL;
 	struct socket *so;
 	struct tcpcb *tp;
@@ -802,7 +806,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
 		goto allocfail;
 	}
 	inp = sotoinpcb(so);
-	if ((tp = tcp_newtcpcb(inp)) == NULL) {
+	if ((tp = tcp_newtcpcb(inp, sototcpcb(lso))) == NULL) {
 		in_pcbfree(inp);
 		sodealloc(so);
 		goto allocfail;
@@ -895,7 +899,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
 		sin.sin_port = sc->sc_inc.inc_fport;
 		bzero((caddr_t)sin.sin_zero, sizeof(sin.sin_zero));
 		INP_HASH_WLOCK(&V_tcbinfo);
-		error = in_pcbconnect(inp, &sin, thread0.td_ucred, false);
+		error = in_pcbconnect(inp, &sin, thread0.td_ucred);
 		INP_HASH_WUNLOCK(&V_tcbinfo);
 		if (error != 0)
 			goto abort;
@@ -912,37 +916,6 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
 	tp->t_port = sc->sc_port;
 	tcp_rcvseqinit(tp);
 	tcp_sendseqinit(tp);
-	blk = sototcpcb(lso)->t_fb;
-	if (V_functions_inherit_listen_socket_stack && blk != tp->t_fb) {
-		/*
-		 * Our parents t_fb was not the default,
-		 * we need to release our ref on tp->t_fb and
-		 * pickup one on the new entry.
-		 */
-		struct tcp_function_block *rblk;
-		void *ptr = NULL;
-
-		rblk = find_and_ref_tcp_fb(blk);
-		KASSERT(rblk != NULL,
-		    ("cannot find blk %p out of syncache?", blk));
-
-		if (rblk->tfb_tcp_fb_init == NULL ||
-		    (*rblk->tfb_tcp_fb_init)(tp, &ptr) == 0) {
-			/* Release the old stack */
-			if (tp->t_fb->tfb_tcp_fb_fini != NULL)
-				(*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
-			refcount_release(&tp->t_fb->tfb_refcnt);
-			/* Now set in all the pointers */
-			tp->t_fb = rblk;
-			tp->t_fb_ptr = ptr;
-		} else {
-			/*
-			 * Initialization failed. Release the reference count on
-			 * the looked up default stack.
-			 */
-			refcount_release(&rblk->tfb_refcnt);
-		}
-	}
 	tp->snd_wl1 = sc->sc_irs;
 	tp->snd_max = tp->iss + 1;
 	tp->snd_nxt = tp->iss + 1;
@@ -1053,6 +1026,7 @@ allocfail:
 	return (NULL);
 
 abort:
+	tcp_discardcb(tp);
 	in_pcbfree(inp);
 	sodealloc(so);
 	if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
@@ -1123,6 +1097,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 		 */
 		if (locked && !V_tcp_syncookies) {
 			SCH_UNLOCK(sch);
+			TCPSTAT_INC(tcps_sc_spurcookie);
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Spurious ACK, "
 				    "segment rejected (syncookies disabled)\n",
@@ -1132,17 +1107,21 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 		if (locked && !V_tcp_syncookiesonly &&
 		    sch->sch_last_overflow < time_uptime - SYNCOOKIE_LIFETIME) {
 			SCH_UNLOCK(sch);
+			TCPSTAT_INC(tcps_sc_spurcookie);
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Spurious ACK, "
 				    "segment rejected (no syncache entry)\n",
 				    s, __func__);
 			goto failed;
 		}
-		bzero(&scs, sizeof(scs));
-		sc = syncookie_lookup(inc, sch, &scs, th, to, *lsop, port);
 		if (locked)
 			SCH_UNLOCK(sch);
-		if (sc == NULL) {
+		bzero(&scs, sizeof(scs));
+		if (syncookie_expand(inc, sch, &scs, th, to, *lsop, port)) {
+			sc = &scs;
+			TCPSTAT_INC(tcps_sc_recvcookie);
+		} else {
+			TCPSTAT_INC(tcps_sc_failcookie);
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Segment failed "
 				    "SYNCOOKIE authentication, segment rejected "
@@ -1399,10 +1378,9 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 	int autoflowlabel = 0;
 #endif
 #ifdef MAC
-	struct label *maclabel;
+	struct label *maclabel = NULL;
 #endif
 	struct syncache scs;
-	struct ucred *cred;
 	uint64_t tfo_response_cookie;
 	unsigned int *tfo_pending = NULL;
 	int tfo_cookie_valid = 0;
@@ -1419,7 +1397,6 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 	 */
 	KASSERT(SOLISTENING(so), ("%s: %p not listening", __func__, so));
 	tp = sototcpcb(so);
-	cred = V_tcp_syncache.see_other ? NULL : crhold(so->so_cred);
 
 #ifdef INET6
 	if (inc->inc_flags & INC_ISIPV6) {
@@ -1549,7 +1526,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 			 * forget it and use the new one we've been given.
 			 */
 			if (sc->sc_ipopts)
-				(void) m_free(sc->sc_ipopts);
+				(void)m_free(sc->sc_ipopts);
 			sc->sc_ipopts = ipopts;
 		}
 		/*
@@ -1565,7 +1542,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 		 */
 		if (sc->sc_flags & SCF_ECN_MASK) {
 			sc->sc_flags &= ~SCF_ECN_MASK;
-			sc->sc_flags = tcp_ecn_syncache_add(tcp_get_flags(th), iptos);
+			sc->sc_flags |= tcp_ecn_syncache_add(tcp_get_flags(th), iptos);
 		}
 #ifdef MAC
 		/*
@@ -1588,56 +1565,54 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
 			syncache_timeout(sc, sch, 1);
 			TCPSTAT_INC(tcps_sndacks);
 			TCPSTAT_INC(tcps_sndtotal);
+		} else {
+			syncache_drop(sc, sch);
+			TCPSTAT_INC(tcps_sc_dropped);
 		}
 		SCH_UNLOCK(sch);
 		goto donenoprobe;
 	}
 
-	if (tfo_cookie_valid) {
-		bzero(&scs, sizeof(scs));
-		sc = &scs;
-		goto skip_alloc;
-	}
-
+	KASSERT(sc == NULL, ("sc(%p) != NULL", sc));
 	/*
 	 * Skip allocating a syncache entry if we are just going to discard
 	 * it later.
 	 */
-	if (!locked) {
+	if (!locked || tfo_cookie_valid) {
 		bzero(&scs, sizeof(scs));
 		sc = &scs;
-	} else
-		sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
-	if (sc == NULL) {
-		/*
-		 * The zone allocator couldn't provide more entries.
-		 * Treat this as if the cache was full; drop the oldest
-		 * entry and insert the new one.
-		 */
-		TCPSTAT_INC(tcps_sc_zonefail);
-		if ((sc = TAILQ_LAST(&sch->sch_bucket, sch_head)) != NULL) {
-			sch->sch_last_overflow = time_uptime;
-			syncache_drop(sc, sch);
-			syncache_pause(inc);
-		}
+	} else {
 		sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
 		if (sc == NULL) {
-			if (V_tcp_syncookies) {
-				bzero(&scs, sizeof(scs));
-				sc = &scs;
-			} else {
-				KASSERT(locked,
-				    ("%s: bucket unexpectedly unlocked",
-				    __func__));
-				SCH_UNLOCK(sch);
-				if (ipopts)
-					(void) m_free(ipopts);
-				goto done;
+			/*
+			 * The zone allocator couldn't provide more entries.
+			 * Treat this as if the cache was full; drop the oldest
+			 * entry and insert the new one.
+			 */
+			TCPSTAT_INC(tcps_sc_zonefail);
+			sc = TAILQ_LAST(&sch->sch_bucket, sch_head);
+			if (sc != NULL) {
+				sch->sch_last_overflow = time_uptime;
+				syncache_drop(sc, sch);
+				syncache_pause(inc);
+			}
+			sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
+			if (sc == NULL) {
+				if (V_tcp_syncookies) {
+					bzero(&scs, sizeof(scs));
+					sc = &scs;
+				} else {
+					KASSERT(locked,
+					    ("%s: bucket unexpectedly unlocked",
+					    __func__));
+					SCH_UNLOCK(sch);
+					goto done;
+				}
 			}
 		}
 	}
 
-skip_alloc:
+	KASSERT(sc != NULL, ("sc == NULL"));
 	if (!tfo_cookie_valid && tfo_response_cookie_valid)
 		sc->sc_tfo_cookie = &tfo_response_cookie;
 
@@ -1647,9 +1622,21 @@ skip_alloc:
 #ifdef MAC
 	sc->sc_label = maclabel;
 #endif
-	sc->sc_cred = cred;
+	/*
+	 * sc_cred is only used in syncache_pcblist() to list TCP endpoints in
+	 * TCPS_SYN_RECEIVED state when V_tcp_syncache.see_other is false.
+	 * Therefore, store the credentials and take a reference count only
+	 * when needed:
+	 * - sc is allocated from the zone and not using the on stack instance.
+	 * - the sysctl variable net.inet.tcp.syncache.see_other is false.
+	 * The reference count is decremented when a zone allocated sc is
+	 * freed in syncache_free().
+	 */
+	if (sc != &scs && !V_tcp_syncache.see_other)
+		sc->sc_cred = crhold(so->so_cred);
+	else
+		sc->sc_cred = NULL;
 	sc->sc_port = port;
-	cred = NULL;
 	sc->sc_ipopts = ipopts;
 	bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
 	sc->sc_ip_tos = ip_tos;
@@ -1759,9 +1746,7 @@ skip_alloc:
 	 * Do a standard 3-way handshake.
 	 */
 	if (syncache_respond(sc, m, TH_SYN|TH_ACK) == 0) {
-		if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs)
-			syncache_free(sc);
-		else if (sc != &scs)
+		if (sc != &scs)
 			syncache_insert(sc, sch);   /* locks and unlocks sch */
 		TCPSTAT_INC(tcps_sndacks);
 		TCPSTAT_INC(tcps_sndtotal);
@@ -1787,12 +1772,13 @@ donenoprobe:
 		tcp_fastopen_decrement_counter(tfo_pending);
 
 tfo_expanded:
-	if (cred != NULL)
-		crfree(cred);
+	if (sc == NULL || sc == &scs) {
 #ifdef MAC
-	if (sc == &scs)
 		mac_syncache_destroy(&maclabel);
 #endif
+		if (ipopts)
+			(void)m_free(ipopts);
+	}
 	return (rv);
 }
 
@@ -2271,8 +2257,8 @@ syncookie_generate(struct syncache_head *sch, struct syncache *sc)
 	return (iss);
 }
 
-static struct syncache *
-syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch,
+static bool
+syncookie_expand(struct in_conninfo *inc, const struct syncache_head *sch,
     struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
     struct socket *lso, uint16_t port)
 {
@@ -2302,7 +2288,7 @@ syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch,
 
 	/* The recomputed hash matches the ACK if this was a genuine cookie. */
 	if ((ack & ~0xff) != (hash & ~0xff))
-		return (NULL);
+		return (false);
 
 	/* Fill in the syncache values. */
 	sc->sc_flags = 0;
@@ -2362,47 +2348,47 @@ syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch,
 
 	sc->sc_port = port;
 
-	TCPSTAT_INC(tcps_sc_recvcookie);
-	return (sc);
+	return (true);
 }
 
 #ifdef INVARIANTS
-static int
-syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
+static void
+syncookie_cmp(struct in_conninfo *inc, const struct syncache_head *sch,
     struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
     struct socket *lso, uint16_t port)
 {
-	struct syncache scs, *scx;
+	struct syncache scs;
 	char *s;
 
 	bzero(&scs, sizeof(scs));
-	scx = syncookie_lookup(inc, sch, &scs, th, to, lso, port);
+	if (syncookie_expand(inc, sch, &scs, th, to, lso, port) &&
+	    (sc->sc_peer_mss != scs.sc_peer_mss ||
+	     sc->sc_requested_r_scale != scs.sc_requested_r_scale ||
+	     sc->sc_requested_s_scale != scs.sc_requested_s_scale ||
+	     (sc->sc_flags & SCF_SACK) != (scs.sc_flags & SCF_SACK))) {
 
-	if ((s = tcp_log_addrs(inc, th, NULL, NULL)) == NULL)
-		return (0);
+		if ((s = tcp_log_addrs(inc, th, NULL, NULL)) == NULL)
+			return;
 
-	if (scx != NULL) {
-		if (sc->sc_peer_mss != scx->sc_peer_mss)
+		if (sc->sc_peer_mss != scs.sc_peer_mss)
 			log(LOG_DEBUG, "%s; %s: mss different %i vs %i\n",
-			    s, __func__, sc->sc_peer_mss, scx->sc_peer_mss);
+			    s, __func__, sc->sc_peer_mss, scs.sc_peer_mss);
 
-		if (sc->sc_requested_r_scale != scx->sc_requested_r_scale)
+		if (sc->sc_requested_r_scale != scs.sc_requested_r_scale)
 			log(LOG_DEBUG, "%s; %s: rwscale different %i vs %i\n",
 			    s, __func__, sc->sc_requested_r_scale,
-			    scx->sc_requested_r_scale);
+			    scs.sc_requested_r_scale);
 
-		if (sc->sc_requested_s_scale != scx->sc_requested_s_scale)
+		if (sc->sc_requested_s_scale != scs.sc_requested_s_scale)
 			log(LOG_DEBUG, "%s; %s: swscale different %i vs %i\n",
 			    s, __func__, sc->sc_requested_s_scale,
-			    scx->sc_requested_s_scale);
+			    scs.sc_requested_s_scale);
 
-		if ((sc->sc_flags & SCF_SACK) != (scx->sc_flags & SCF_SACK))
+		if ((sc->sc_flags & SCF_SACK) != (scs.sc_flags & SCF_SACK))
 			log(LOG_DEBUG, "%s; %s: SACK different\n", s, __func__);
-	}
 
-	if (s != NULL)
 		free(s, M_TCPLOG);
-	return (0);
+	}
 }
 #endif /* INVARIANTS */
 
diff --git a/sys/netinet/tcp_syncache.h b/sys/netinet/tcp_syncache.h
index 9f3beebf16af..55e062e35a54 100644
--- a/sys/netinet/tcp_syncache.h
+++ b/sys/netinet/tcp_syncache.h
@@ -67,7 +67,7 @@ struct syncache {
 	u_int8_t	sc_requested_s_scale:4,
 			sc_requested_r_scale:4;
 	u_int16_t	sc_flags;
-#if defined(TCP_OFFLOAD) || !defined(TCP_OFFLOAD_DISABLE)
+#if defined(TCP_OFFLOAD)
 	struct toedev	*sc_tod;		/* entry added by this TOE */
 	void		*sc_todctx;		/* TOE driver context */
 #endif
@@ -127,7 +127,9 @@ struct tcp_syncache {
 	u_int	cache_limit;
 	u_int	rexmt_limit;
 	uint32_t hash_secret;
+#ifdef VIMAGE
 	struct vnet *vnet;
+#endif
 	struct syncookie_secret secret;
 	struct mtx pause_mtx;
 	struct callout pause_co;
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index 785f68be5621..32ce3001929c 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -74,39 +74,33 @@
 #include <netinet/tcpip.h>
 
 int    tcp_persmin;
-SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmin,
-    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmin, CTLTYPE_INT | CTLFLAG_RW,
     &tcp_persmin, 0, sysctl_msec_to_ticks, "I",
     "minimum persistence interval");
 
 int    tcp_persmax;
-SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmax,
-    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmax, CTLTYPE_INT | CTLFLAG_RW,
     &tcp_persmax, 0, sysctl_msec_to_ticks, "I",
     "maximum persistence interval");
 
 int	tcp_keepinit;
-SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit,
-    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT | CTLFLAG_RW,
     &tcp_keepinit, 0, sysctl_msec_to_ticks, "I",
     "time to establish connection");
 
 int	tcp_keepidle;
-SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle,
-    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT | CTLFLAG_RW,
     &tcp_keepidle, 0, sysctl_msec_to_ticks, "I",
     "time before keepalive probes begin");
 
 int	tcp_keepintvl;
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl,
-    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
-    &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I",
+    CTLTYPE_INT | CTLFLAG_RW, &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I",
     "time between keepalive probes");
 
 int	tcp_delacktime;
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime,
-    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
-    &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
+    CTLTYPE_INT | CTLFLAG_RW, &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
     "Time before a delayed ACK is sent");
 
 VNET_DEFINE(int, tcp_msl);
@@ -115,21 +109,29 @@ SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl,
     &VNET_NAME(tcp_msl), 0, sysctl_msec_to_ticks, "I",
     "Maximum segment lifetime");
 
+VNET_DEFINE(int, tcp_msl_local);
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl_local,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_VNET,
+    &VNET_NAME(tcp_msl_local), 0, sysctl_msec_to_ticks, "I",
+    "Maximum segment lifetime for local communication");
+
 int	tcp_rexmit_initial;
-SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_initial,
-   CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_initial, CTLTYPE_INT | CTLFLAG_RW,
     &tcp_rexmit_initial, 0, sysctl_msec_to_ticks, "I",
     "Initial Retransmission Timeout");
 
 int	tcp_rexmit_min;
-SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min,
-    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT | CTLFLAG_RW,
     &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
     "Minimum Retransmission Timeout");
 
+int	tcp_rexmit_max;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_max, CTLTYPE_INT | CTLFLAG_RW,
+    &tcp_rexmit_max, 0, sysctl_msec_to_ticks, "I",
+    "Maximum Retransmission Timeout");
+
 int	tcp_rexmit_slop;
-SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop,
-    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT | CTLFLAG_RW,
     &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
     "Retransmission Timer Slop");
 
@@ -144,8 +146,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW,
     "Recycle closed FIN_WAIT_2 connections faster");
 
 int    tcp_finwait2_timeout;
-SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout,
-    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT | CTLFLAG_RW,
     &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I",
     "FIN-WAIT2 timeout");
 
@@ -162,8 +163,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
     "Drop TCP options from 3rd and later retransmitted SYN");
 
 int	tcp_maxunacktime = TCPTV_MAXUNACKTIME;
-SYSCTL_PROC(_net_inet_tcp, OID_AUTO, maxunacktime,
-    CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, maxunacktime, CTLTYPE_INT | CTLFLAG_RW,
     &tcp_maxunacktime, 0, sysctl_msec_to_ticks, "I",
     "Maximum time (in ms) that a session can linger without making progress");
 
@@ -629,8 +629,7 @@ tcp_timer_rexmt(struct tcpcb *tp)
 		rexmt = tcp_rexmit_initial * tcp_backoff[tp->t_rxtshift];
 	else
 		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
-	TCPT_RANGESET(tp->t_rxtcur, rexmt,
-		      tp->t_rttmin, TCPTV_REXMTMAX);
+	TCPT_RANGESET(tp->t_rxtcur, rexmt, tp->t_rttmin, tcp_rexmit_max);
 
 	/*
 	 * We enter the path for PLMTUD if connection is established or, if
@@ -756,6 +755,16 @@ tcp_timer_rexmt(struct tcpcb *tp)
 				tp->t_flags2 |= TF2_PLPMTU_PMTUD;
 				tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
 				tp->t_maxseg = tp->t_pmtud_saved_maxseg;
+				if (tp->t_maxseg < V_tcp_mssdflt) {
+					/*
+					 * The MSS is so small we should not 
+					 * process incoming SACK's since we are 
+					 * subject to attack in such a case.
+					 */
+					tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+				} else {
+					tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+				}
 				TCPSTAT_INC(tcps_pmtud_blackhole_failed);
 				/*
 				 * Reset the slow-start flight size as it
@@ -800,7 +809,9 @@ tcp_timer_rexmt(struct tcpcb *tp)
 	 */
 	tp->t_rtttime = 0;
 
-	cc_cong_signal(tp, NULL, CC_RTO);
+	/* Do not overwrite the snd_cwnd on SYN retransmissions. */
+	if (tp->t_state != TCPS_SYN_SENT)
+		cc_cong_signal(tp, NULL, CC_RTO);
 	NET_EPOCH_ENTER(et);
 	rv = tcp_output_locked(tp);
 	NET_EPOCH_EXIT(et);
@@ -864,12 +875,8 @@ tcp_timer_enter(void *xtp)
 	struct inpcb *inp = tptoinpcb(tp);
 	sbintime_t precision;
 	tt_which which;
-	bool tp_valid;
 
 	INP_WLOCK_ASSERT(inp);
-	MPASS((curthread->td_pflags & TDP_INTCPCALLOUT) == 0);
-
-	curthread->td_pflags |= TDP_INTCPCALLOUT;
 
 	which = tcp_timer_next(tp, NULL);
 	MPASS(which < TT_N);
@@ -877,8 +884,7 @@ tcp_timer_enter(void *xtp)
 	tp->t_precisions[which] = 0;
 
 	tcp_bblog_timer(tp, which, TT_PROCESSING, 0);
-	tp_valid = tcp_timersw[which](tp);
-	if (tp_valid) {
+	if (tcp_timersw[which](tp)) {
 		tcp_bblog_timer(tp, which, TT_PROCESSED, 0);
 		if ((which = tcp_timer_next(tp, &precision)) != TT_N) {
 			MPASS(tp->t_state > TCPS_CLOSED);
@@ -888,8 +894,6 @@ tcp_timer_enter(void *xtp)
 		}
 		INP_WUNLOCK(inp);
 	}
-
-	curthread->td_pflags &= ~TDP_INTCPCALLOUT;
 }
 
 /*
@@ -939,35 +943,26 @@ tcp_timer_active(struct tcpcb *tp, tt_which which)
 
 /*
  * Stop all timers associated with tcpcb.
- *
  * Called when tcpcb moves to TCPS_CLOSED.
- *
- * XXXGL: unfortunately our callout(9) is not able to fully stop a locked
- * callout even when only two threads are involved: the callout itself and the
- * thread that does callout_stop().  See where softclock_call_cc() swaps the
- * callwheel lock to callout lock and then checks cc_exec_cancel().  This is
- * the race window.  If it happens, the tcp_timer_enter() won't be executed,
- * however pcb lock will be locked and released, hence we can't free memory.
- * Until callout(9) is improved, just keep retrying.  In my profiling I've seen
- * such event happening less than 1 time per hour with 20-30 Gbit/s of traffic.
  */
 void
 tcp_timer_stop(struct tcpcb *tp)
 {
-	struct inpcb *inp = tptoinpcb(tp);
 
-	INP_WLOCK_ASSERT(inp);
-
-	if (curthread->td_pflags & TDP_INTCPCALLOUT) {
-		int stopped __diagused;
+	INP_WLOCK_ASSERT(tptoinpcb(tp));
 
-		stopped = callout_stop(&tp->t_callout);
-		MPASS(stopped == 0);
-		for (tt_which i = 0; i < TT_N; i++)
-			tp->t_timers[i] = SBT_MAX;
-	} else while(__predict_false(callout_stop(&tp->t_callout) == 0)) {
-		INP_WUNLOCK(inp);
-		kern_yield(PRI_UNCHANGED);
-		INP_WLOCK(inp);
-	}
+	/*
+	 * We don't check return value from callout_stop().  There are two
+	 * reasons why it can return 0.  First, a legitimate one: we could have
+	 * been called from the callout itself.  Second, callout(9) has a bug.
+	 * It can race internally in softclock_call_cc(), when callout has
+	 * already completed, but cc_exec_curr still points at the callout.
+	 */
+	(void )callout_stop(&tp->t_callout);
+	/*
+	 * In case of being called from callout itself, we must make sure that
+	 * we don't reschedule.
+	 */
+	for (tt_which i = 0; i < TT_N; i++)
+		tp->t_timers[i] = SBT_MAX;
 }
diff --git a/sys/netinet/tcp_timer.h b/sys/netinet/tcp_timer.h
index a3ca268417ba..34a0f1375463 100644
--- a/sys/netinet/tcp_timer.h
+++ b/sys/netinet/tcp_timer.h
@@ -32,6 +32,8 @@
 #ifndef _NETINET_TCP_TIMER_H_
 #define _NETINET_TCP_TIMER_H_
 
+#ifdef _KERNEL
+
 /*
  * The TCPT_REXMT timer is used to force retransmissions.
  * The TCP has the TCPT_REXMT timer set whenever segments
@@ -71,21 +73,22 @@
 /*
  * Time constants.
  */
-#define	TCPTV_MSL	( 30*hz)		/* max seg lifetime (hah!) */
+#define	TCPTV_MSL	MSEC_2_TICKS(30000)	/* max seg lifetime (hah!) */
+#define	TCPTV_MSL_LOCAL	MSEC_2_TICKS(10)	/* max seg lifetime for local comm */
 #define	TCPTV_SRTTBASE	0			/* base roundtrip time;
 						   if 0, no idea yet */
-#define	TCPTV_RTOBASE	(  1*hz)		/* assumed RTO if no info */
+#define	TCPTV_RTOBASE	MSEC_2_TICKS(1000)	/* assumed RTO if no info */
 
-#define	TCPTV_PERSMIN	(  5*hz)		/* minimum persist interval */
-#define	TCPTV_PERSMAX	( 60*hz)		/* maximum persist interval */
+#define	TCPTV_PERSMIN	MSEC_2_TICKS(5000)	/* minimum persist interval */
+#define	TCPTV_PERSMAX	MSEC_2_TICKS(60000)	/* maximum persist interval */
 
-#define	TCPTV_KEEP_INIT	( 75*hz)		/* initial connect keepalive */
-#define	TCPTV_KEEP_IDLE	(120*60*hz)		/* dflt time before probing */
-#define	TCPTV_KEEPINTVL	( 75*hz)		/* default probe interval */
+#define	TCPTV_KEEP_INIT	MSEC_2_TICKS(75000)	/* initial connect keepalive */
+#define	TCPTV_KEEP_IDLE	MSEC_2_TICKS(120*60*1000)	/* dflt time before probing */
+#define	TCPTV_KEEPINTVL	MSEC_2_TICKS(75000)	/* default probe interval */
 #define	TCPTV_KEEPCNT	8			/* max probes before drop */
 #define	TCPTV_MAXUNACKTIME	0		/* max time without making progress */
 
-#define TCPTV_FINWAIT2_TIMEOUT (60*hz)         /* FIN_WAIT_2 timeout if no receiver */
+#define	TCPTV_FINWAIT2_TIMEOUT	MSEC_2_TICKS(60000)	/* FIN_WAIT_2 timeout if no receiver */
 
 /*
  * Minimum retransmit timer is 3 ticks, for algorithmic stability.
@@ -107,15 +110,13 @@
  * The prior minimum of 1*hz (1 second) badly breaks throughput on any
  * networks faster then a modem that has minor (e.g. 1%) packet loss.
  */
-#define	TCPTV_MIN	( hz/33 )		/* minimum allowable value */
-#define TCPTV_CPU_VAR	( hz/5 )		/* cpu variance allowed (200ms) */
-#define	TCPTV_REXMTMAX	( 64*hz)		/* max allowable REXMT value */
-
-#define TCPTV_TWTRUNC	8			/* RTO factor to truncate TW */
+#define	TCPTV_MIN	MSEC_2_TICKS(30)	/* minimum allowable value */
+#define	TCPTV_CPU_VAR	MSEC_2_TICKS(200)	/* cpu variance allowed (200ms) */
+#define	TCPTV_REXMTMAX	MSEC_2_TICKS(64000)	/* max allowable REXMT value */
 
 #define	TCP_MAXRXTSHIFT	12			/* maximum retransmits */
 
-#define	TCPTV_DELACK	( hz/25 )		/* 40ms timeout */
+#define	TCPTV_DELACK	MSEC_2_TICKS(40)	/* 40ms timeout */
 
 /*
  * If we exceed this number of retransmits for a single segment, we'll consider
@@ -135,8 +136,6 @@
 		(tv) = (tvmax); \
 } while(0)
 
-#ifdef _KERNEL
-
 #define	TP_KEEPINIT(tp)	((tp)->t_keepinit ? (tp)->t_keepinit : tcp_keepinit)
 #define	TP_KEEPIDLE(tp)	((tp)->t_keepidle ? (tp)->t_keepidle : tcp_keepidle)
 #define	TP_KEEPINTVL(tp) ((tp)->t_keepintvl ? (tp)->t_keepintvl : tcp_keepintvl)
@@ -165,6 +164,7 @@ extern int tcp_maxunacktime;		/* max time without making progress */
 extern int tcp_maxpersistidle;
 extern int tcp_rexmit_initial;
 extern int tcp_rexmit_min;
+extern int tcp_rexmit_max;
 extern int tcp_rexmit_slop;
 extern int tcp_ttl;			/* time to live for TCP segs */
 extern int tcp_backoff[];
@@ -184,6 +184,8 @@ VNET_DECLARE(int, tcp_v6pmtud_blackhole_mss);
 #define V_tcp_v6pmtud_blackhole_mss	VNET(tcp_v6pmtud_blackhole_mss)
 VNET_DECLARE(int, tcp_msl);
 #define V_tcp_msl			VNET(tcp_msl)
+VNET_DECLARE(int, tcp_msl_local);
+#define V_tcp_msl_local			VNET(tcp_msl_local)
 
 #endif /* _KERNEL */
 
diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c
index 8d77db275310..c095fc8f7765 100644
--- a/sys/netinet/tcp_timewait.c
+++ b/sys/netinet/tcp_timewait.c
@@ -87,12 +87,52 @@
 
 #include <security/mac/mac_framework.h>
 
-VNET_DEFINE_STATIC(bool, nolocaltimewait) = true;
+VNET_DEFINE_STATIC(bool, nolocaltimewait) = false;
 #define	V_nolocaltimewait	VNET(nolocaltimewait)
-SYSCTL_BOOL(_net_inet_tcp, OID_AUTO, nolocaltimewait,
-    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nolocaltimewait), true,
+
+static int
+sysctl_net_inet_tcp_nolocaltimewait(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	bool new;
+
+	new = V_nolocaltimewait;
+	error = sysctl_handle_bool(oidp, &new, 0, req);
+	if (error == 0 && req->newptr) {
+		V_nolocaltimewait = new;
+		gone_in(16, "net.inet.tcp.nolocaltimewait is obsolete."
+		    " Use net.inet.tcp.local_msl instead.\n");
+	}
+	return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, nolocaltimewait,
+    CTLFLAG_VNET | CTLFLAG_RW | CTLTYPE_U8,
+    &VNET_NAME(nolocaltimewait), 0, sysctl_net_inet_tcp_nolocaltimewait, "CU",
     "Do not create TCP TIME_WAIT state for local connections");
 
+static u_int
+tcp_eff_msl(struct tcpcb *tp)
+{
+	struct inpcb *inp = tptoinpcb(tp);
+#ifdef INET6
+	bool isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
+#endif
+
+	if (
+#ifdef INET6
+	    isipv6 ? in6_localip(&inp->in6p_faddr) :
+#endif
+#ifdef INET
+	    in_localip(inp->inp_faddr))
+#else
+	    false)
+#endif
+		return (V_tcp_msl_local);
+	else
+		return (V_tcp_msl);
+}
+
 /*
  * Move a TCP connection into TIME_WAIT state.
  *    inp is locked, and is unlocked before returning.
@@ -127,7 +167,7 @@ tcp_twstart(struct tcpcb *tp)
 
 	if (V_nolocaltimewait && (
 #ifdef INET6
-	    isipv6 ? in6_localaddr(&inp->in6p_faddr) :
+	    isipv6 ? in6_localip(&inp->in6p_faddr) :
 #endif
 #ifdef INET
 	    in_localip(inp->inp_faddr)
@@ -140,7 +180,7 @@ tcp_twstart(struct tcpcb *tp)
 		return;
 	}
 
-	tcp_timer_activate(tp, TT_2MSL, 2 * V_tcp_msl);
+	tcp_timer_activate(tp, TT_2MSL, 2 * tcp_eff_msl(tp));
 	INP_WUNLOCK(inp);
 }
 
@@ -283,7 +323,7 @@ tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th,
 	if (thflags & TH_FIN) {
 		seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0);
 		if (seq + 1 == tp->rcv_nxt)
-			tcp_timer_activate(tp, TT_2MSL, 2 * V_tcp_msl);
+			tcp_timer_activate(tp, TT_2MSL, 2 * tcp_eff_msl(tp));
 	}
 
 	/*
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index abdc2de545e9..687b0d538666 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -95,9 +95,6 @@
 #include <netinet/cc/cc.h>
 #include <netinet/tcp_fastopen.h>
 #include <netinet/tcp_hpts.h>
-#ifdef TCPPCAP
-#include <netinet/tcp_pcap.h>
-#endif
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
@@ -149,7 +146,7 @@ tcp_bblog_pru(struct tcpcb *tp, uint32_t pru, int error)
 }
 
 /*
- * TCP attaches to socket via pru_attach(), reserving space,
+ * TCP attaches to socket via pr_attach(), reserving space,
  * and an internet control block.
  */
 static int
@@ -172,19 +169,13 @@ tcp_usr_attach(struct socket *so, int proto, struct thread *td)
 	if (error)
 		goto out;
 	inp = sotoinpcb(so);
-	tp = tcp_newtcpcb(inp);
+	tp = tcp_newtcpcb(inp, NULL);
 	if (tp == NULL) {
 		error = ENOBUFS;
 		in_pcbfree(inp);
 		goto out;
 	}
 	tp->t_state = TCPS_CLOSED;
-	/* Can we inherit anything from the listener? */
-	if ((so->so_listen != NULL) &&
-	    (so->so_listen->so_pcb != NULL) &&
-	    (tp->t_fb->tfb_inherit != NULL)) {
-		(*tp->t_fb->tfb_inherit)(tp, sotoinpcb(so->so_listen));
-	}
 	tcp_bblog_pru(tp, PRU_ATTACH, error);
 	INP_WUNLOCK(inp);
 	TCPSTATES_INC(TCPS_CLOSED);
@@ -268,7 +259,8 @@ tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 		goto out;
 	}
 	INP_HASH_WLOCK(&V_tcbinfo);
-	error = in_pcbbind(inp, sinp, td->td_ucred);
+	error = in_pcbbind(inp, sinp, V_tcp_bind_all_fibs ? 0 : INPBIND_FIB,
+	    td->td_ucred);
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 out:
 	tcp_bblog_pru(tp, PRU_BIND, error);
@@ -336,13 +328,14 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 			}
 			inp->inp_vflag |= INP_IPV4;
 			inp->inp_vflag &= ~INP_IPV6;
-			error = in_pcbbind(inp, &sin, td->td_ucred);
+			error = in_pcbbind(inp, &sin, 0, td->td_ucred);
 			INP_HASH_WUNLOCK(&V_tcbinfo);
 			goto out;
 		}
 	}
 #endif
-	error = in6_pcbbind(inp, sin6, td->td_ucred);
+	error = in6_pcbbind(inp, sin6, V_tcp_bind_all_fibs ? 0 : INPBIND_FIB,
+	    td->td_ucred);
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 out:
 	if (error != 0)
@@ -361,9 +354,10 @@ out:
 static int
 tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
 {
-	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
+	int error = 0;
+	bool already_listening;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL"));
@@ -375,6 +369,7 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
 	tp = intotcpcb(inp);
 
 	SOCK_LOCK(so);
+	already_listening = SOLISTENING(so);
 	error = solisten_proto_check(so);
 	if (error != 0) {
 		SOCK_UNLOCK(so);
@@ -382,7 +377,8 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
 	}
 	if (inp->inp_lport == 0) {
 		INP_HASH_WLOCK(&V_tcbinfo);
-		error = in_pcbbind(inp, NULL, td->td_ucred);
+		error = in_pcbbind(inp, NULL,
+		    V_tcp_bind_all_fibs ? 0 : INPBIND_FIB, td->td_ucred);
 		INP_HASH_WUNLOCK(&V_tcbinfo);
 	}
 	if (error == 0) {
@@ -396,7 +392,11 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
 		solisten_proto_abort(so);
 	}
 	SOCK_UNLOCK(so);
+	if (already_listening)
+		goto out;
 
+	if (error == 0)
+		in_pcblisten(inp);
 	if (tp->t_flags & TF_FASTOPEN)
 		tp->t_tfo_pending = tcp_fastopen_alloc_counter();
 
@@ -412,10 +412,11 @@ out:
 static int
 tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
 {
-	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	u_char vflagsav;
+	int error = 0;
+	bool already_listening;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL"));
@@ -429,6 +430,7 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
 	vflagsav = inp->inp_vflag;
 
 	SOCK_LOCK(so);
+	already_listening = SOLISTENING(so);
 	error = solisten_proto_check(so);
 	if (error != 0) {
 		SOCK_UNLOCK(so);
@@ -439,7 +441,8 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
 		inp->inp_vflag &= ~INP_IPV4;
 		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
 			inp->inp_vflag |= INP_IPV4;
-		error = in6_pcbbind(inp, NULL, td->td_ucred);
+		error = in6_pcbbind(inp, NULL,
+		    V_tcp_bind_all_fibs ? 0 : INPBIND_FIB, td->td_ucred);
 	}
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 	if (error == 0) {
@@ -453,7 +456,11 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
 		solisten_proto_abort(so);
 	}
 	SOCK_UNLOCK(so);
+	if (already_listening)
+		goto out;
 
+	if (error == 0)
+		in_pcblisten(inp);
 	if (tp->t_flags & TF_FASTOPEN)
 		tp->t_tfo_pending = tcp_fastopen_alloc_counter();
 
@@ -584,7 +591,7 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 		goto out;
 	}
 	if (SOLISTENING(so)) {
-		error = EINVAL;
+		error = EOPNOTSUPP;
 		goto out;
 	}
 #ifdef INET
@@ -691,28 +698,22 @@ tcp_usr_disconnect(struct socket *so)
 	struct inpcb *inp;
 	struct tcpcb *tp = NULL;
 	struct epoch_tracker et;
-	int error = 0;
 
 	NET_EPOCH_ENTER(et);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
 	INP_WLOCK(inp);
-	if (inp->inp_flags & INP_DROPPED) {
-		INP_WUNLOCK(inp);
-		NET_EPOCH_EXIT(et);
-		return (ECONNRESET);
-	}
 	tp = intotcpcb(inp);
 
 	if (tp->t_state == TCPS_TIME_WAIT)
 		goto out;
 	tcp_disconnect(tp);
 out:
-	tcp_bblog_pru(tp, PRU_DISCONNECT, error);
+	tcp_bblog_pru(tp, PRU_DISCONNECT, 0);
 	TCP_PROBE2(debug__user, tp, PRU_DISCONNECT);
 	INP_WUNLOCK(inp);
 	NET_EPOCH_EXIT(et);
-	return (error);
+	return (0);
 }
 
 #ifdef INET
@@ -906,8 +907,8 @@ out:
 /*
  * Do a send by putting data in output queue and updating urgent
  * marker if URG set.  Possibly send more data.  Unlike the other
- * pru_*() routines, the mbuf chains are our responsibility.  We
- * must either enqueue them or free them.  The other pru_* routines
+ * pr_*() routines, the mbuf chains are our responsibility.  We
+ * must either enqueue them or free them.  The other pr_*() routines
  * generally are caller-frees.
  */
 static int
@@ -1131,9 +1132,9 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
 		/*
 		 * XXXRW: PRUS_EOF not implemented with PRUS_OOB?
 		 */
-		SOCKBUF_LOCK(&so->so_snd);
+		SOCK_SENDBUF_LOCK(so);
 		if (sbspace(&so->so_snd) < -512) {
-			SOCKBUF_UNLOCK(&so->so_snd);
+			SOCK_SENDBUF_UNLOCK(so);
 			error = ENOBUFS;
 			goto out;
 		}
@@ -1148,7 +1149,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
 		if (tp->t_acktime == 0)
 			tp->t_acktime = ticks;
 		sbappendstream_locked(&so->so_snd, m, flags);
-		SOCKBUF_UNLOCK(&so->so_snd);
+		SOCK_SENDBUF_UNLOCK(so);
 		m = NULL;
 		if (nam && tp->t_state < TCPS_SYN_SENT) {
 			/*
@@ -1243,9 +1244,9 @@ tcp_usr_ready(struct socket *so, struct mbuf *m, int count)
 	}
 	tp = intotcpcb(inp);
 
-	SOCKBUF_LOCK(&so->so_snd);
+	SOCK_SENDBUF_LOCK(so);
 	error = sbready(&so->so_snd, m, count);
-	SOCKBUF_UNLOCK(&so->so_snd);
+	SOCK_SENDBUF_UNLOCK(so);
 	if (error) {
 		INP_WUNLOCK(inp);
 		return (error);
@@ -1418,6 +1419,7 @@ struct protosw tcp_protosw = {
 	.pr_rcvd =		tcp_usr_rcvd,
 	.pr_rcvoob =		tcp_usr_rcvoob,
 	.pr_send =		tcp_usr_send,
+	.pr_sendfile_wait =	sendfile_wait_generic,
 	.pr_ready =		tcp_usr_ready,
 	.pr_shutdown =		tcp_usr_shutdown,
 	.pr_sockaddr =		in_getsockaddr,
@@ -1446,6 +1448,7 @@ struct protosw tcp6_protosw = {
 	.pr_rcvd =		tcp_usr_rcvd,
 	.pr_rcvoob =		tcp_usr_rcvoob,
 	.pr_send =		tcp_usr_send,
+	.pr_sendfile_wait =	sendfile_wait_generic,
 	.pr_ready =		tcp_usr_ready,
 	.pr_shutdown =		tcp_usr_shutdown,
 	.pr_sockaddr =		in6_mapped_sockaddr,
@@ -1475,9 +1478,11 @@ tcp_connect(struct tcpcb *tp, struct sockaddr_in *sin, struct thread *td)
 	    (SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING |
 	    SS_ISDISCONNECTED)) != 0))
 		return (EISCONN);
+	if (__predict_false((so->so_options & SO_REUSEPORT_LB) != 0))
+		return (EOPNOTSUPP);
 
 	INP_HASH_WLOCK(&V_tcbinfo);
-	error = in_pcbconnect(inp, sin, td->td_ucred, true);
+	error = in_pcbconnect(inp, sin, td->td_ucred);
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 	if (error != 0)
 		return (error);
@@ -1515,8 +1520,11 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr_in6 *sin6, struct thread *td)
 	INP_WLOCK_ASSERT(inp);
 
 	if (__predict_false((so->so_state &
-	    (SS_ISCONNECTING | SS_ISCONNECTED)) != 0))
+	    (SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING |
+	    SS_ISDISCONNECTED)) != 0))
 		return (EISCONN);
+	if (__predict_false((so->so_options & SO_REUSEPORT_LB) != 0))
+		return (EOPNOTSUPP);
 
 	INP_HASH_WLOCK(&V_tcbinfo);
 	error = in6_pcbconnect(inp, sin6, td->td_ucred, true);
@@ -1709,11 +1717,7 @@ tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
 		default:
 			return (error);
 		}
-		INP_WLOCK(inp);
-		if (inp->inp_flags & INP_DROPPED) {
-			INP_WUNLOCK(inp);
-			return (ECONNRESET);
-		}
+		INP_WLOCK_RECHECK(inp);
 	} else if (sopt->sopt_name == TCP_FUNCTION_BLK) {
 		/*
 		 * Protect the TCP option TCP_FUNCTION_BLK so
@@ -1728,8 +1732,7 @@ tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
 		if (error)
 			return (error);
 
-		INP_WLOCK(inp);
-		tp = intotcpcb(inp);
+		INP_WLOCK_RECHECK(inp);
 
 		blk = find_and_ref_tcp_functions(&fsn);
 		if (blk == NULL) {
@@ -1742,32 +1745,17 @@ tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
 			INP_WUNLOCK(inp);
 			return (0);
 		}
-		if (tp->t_state != TCPS_CLOSED) {
-			/*
-			 * The user has advanced the state
-			 * past the initial point, we may not
-			 * be able to switch.
-			 */
-			if (blk->tfb_tcp_handoff_ok != NULL) {
-				/*
-				 * Does the stack provide a
-				 * query mechanism, if so it may
-				 * still be possible?
-				 */
-				error = (*blk->tfb_tcp_handoff_ok)(tp);
-			} else
-				error = EINVAL;
-			if (error) {
-				refcount_release(&blk->tfb_refcnt);
-				INP_WUNLOCK(inp);
-				return(error);
-			}
-		}
 		if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
 			refcount_release(&blk->tfb_refcnt);
 			INP_WUNLOCK(inp);
 			return (ENOENT);
 		}
+		error = (*blk->tfb_tcp_handoff_ok)(tp);
+		if (error) {
+			refcount_release(&blk->tfb_refcnt);
+			INP_WUNLOCK(inp);
+			return (error);
+		}
 		/*
 		 * Ensure the new stack takes ownership with a
 		 * clean slate on peak rate threshold.
@@ -1983,7 +1971,7 @@ no_mem_needed:
 	tp = intotcpcb(inp);
 	if (ptr != NULL)
 		memset(ptr, 0, mem_sz);
-	cc_mem.ccvc.tcp = tp;
+	cc_mem.tp = tp;
 	/*
 	 * We once again hold a write lock over the tcb so it's
 	 * safe to do these things without ordering concerns.
@@ -2203,9 +2191,19 @@ unlock_and_done:
 
 			INP_WLOCK_RECHECK(inp);
 			if (optval > 0 && optval <= tp->t_maxseg &&
-			    optval + 40 >= V_tcp_minmss)
+			    optval + 40 >= V_tcp_minmss) {
 				tp->t_maxseg = optval;
-			else
+				if (tp->t_maxseg < V_tcp_mssdflt) {
+					/*
+					 * The MSS is so small we should not process incoming
+					 * SACK's since we are subject to attack in such a
+					 * case.
+					 */
+					tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
+				} else {
+					tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
+				}
+			} else
 				error = EINVAL;
 			goto unlock_and_done;
 
@@ -2348,26 +2346,6 @@ unlock_and_done:
 				    TP_MAXIDLE(tp));
 			goto unlock_and_done;
 
-#ifdef TCPPCAP
-		case TCP_PCAP_OUT:
-		case TCP_PCAP_IN:
-			INP_WUNLOCK(inp);
-			error = sooptcopyin(sopt, &optval, sizeof optval,
-			    sizeof optval);
-			if (error)
-				return (error);
-
-			INP_WLOCK_RECHECK(inp);
-			if (optval >= 0)
-				tcp_pcap_set_sock_max(
-					(sopt->sopt_name == TCP_PCAP_OUT) ?
-					&(tp->t_outpkts) : &(tp->t_inpkts),
-					optval);
-			else
-				error = EINVAL;
-			goto unlock_and_done;
-#endif
-
 		case TCP_FASTOPEN: {
 			struct tcp_fastopen tfo_optval;
 
@@ -2598,16 +2576,6 @@ unhold:
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &ui, sizeof(ui));
 			break;
-#ifdef TCPPCAP
-		case TCP_PCAP_OUT:
-		case TCP_PCAP_IN:
-			optval = tcp_pcap_get_sock_max(
-					(sopt->sopt_name == TCP_PCAP_OUT) ?
-					&(tp->t_outpkts) : &(tp->t_inpkts));
-			INP_WUNLOCK(inp);
-			error = sooptcopyout(sopt, &optval, sizeof optval);
-			break;
-#endif
 		case TCP_FASTOPEN:
 			optval = tp->t_flags & TF_FASTOPEN;
 			INP_WUNLOCK(inp);
@@ -2892,6 +2860,14 @@ db_print_tflags(u_int t_flags)
 		db_printf("%sTF_PREVVALID", comma ? ", " : "");
 		comma = 1;
 	}
+	if (t_flags & TF_WAKESOR) {
+		db_printf("%sTF_WAKESOR", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_GPUTINPROG) {
+		db_printf("%sTF_GPUTINPROG", comma ? ", " : "");
+		comma = 1;
+	}
 	if (t_flags & TF_MORETOCOME) {
 		db_printf("%sTF_MORETOCOME", comma ? ", " : "");
 		comma = 1;
@@ -2912,18 +2888,10 @@ db_print_tflags(u_int t_flags)
 		db_printf("%sTF_FASTRECOVERY", comma ? ", " : "");
 		comma = 1;
 	}
-	if (t_flags & TF_CONGRECOVERY) {
-		db_printf("%sTF_CONGRECOVERY", comma ? ", " : "");
-		comma = 1;
-	}
 	if (t_flags & TF_WASFRECOVERY) {
 		db_printf("%sTF_WASFRECOVERY", comma ? ", " : "");
 		comma = 1;
 	}
-	if (t_flags & TF_WASCRECOVERY) {
-		db_printf("%sTF_WASCRECOVERY", comma ? ", " : "");
-		comma = 1;
-	}
 	if (t_flags & TF_SIGNATURE) {
 		db_printf("%sTF_SIGNATURE", comma ? ", " : "");
 		comma = 1;
@@ -2936,6 +2904,30 @@ db_print_tflags(u_int t_flags)
 		db_printf("%sTF_TSO", comma ? ", " : "");
 		comma = 1;
 	}
+	if (t_flags & TF_TOE) {
+		db_printf("%sTF_TOE", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_CLOSED) {
+		db_printf("%sTF_CLOSED", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_SENTSYN) {
+		db_printf("%sTF_SENTSYN", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_LRD) {
+		db_printf("%sTF_LRD", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_CONGRECOVERY) {
+		db_printf("%sTF_CONGRECOVERY", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags & TF_WASCRECOVERY) {
+		db_printf("%sTF_WASCRECOVERY", comma ? ", " : "");
+		comma = 1;
+	}
 	if (t_flags & TF_FASTOPEN) {
 		db_printf("%sTF_FASTOPEN", comma ? ", " : "");
 		comma = 1;
@@ -2984,10 +2976,62 @@ db_print_tflags2(u_int t_flags2)
 		db_printf("%sTF2_ACE_PERMIT", comma ? ", " : "");
 		comma = 1;
 	}
+	if (t_flags2 & TF2_HPTS_CPU_SET) {
+		db_printf("%sTF2_HPTS_CPU_SET", comma ? ", " : "");
+		comma = 1;
+	}
 	if (t_flags2 & TF2_FBYTES_COMPLETE) {
 		db_printf("%sTF2_FBYTES_COMPLETE", comma ? ", " : "");
 		comma = 1;
 	}
+	if (t_flags2 & TF2_ECN_USE_ECT1) {
+		db_printf("%sTF2_ECN_USE_ECT1", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags2 & TF2_TCP_ACCOUNTING) {
+		db_printf("%sTF2_TCP_ACCOUNTING", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags2 & TF2_HPTS_CALLS) {
+		db_printf("%sTF2_HPTS_CALLS", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags2 & TF2_MBUF_L_ACKS) {
+		db_printf("%sTF2_MBUF_L_ACKS", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags2 & TF2_MBUF_ACKCMP) {
+		db_printf("%sTF2_MBUF_ACKCMP", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags2 & TF2_SUPPORTS_MBUFQ) {
+		db_printf("%sTF2_SUPPORTS_MBUFQ", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags2 & TF2_MBUF_QUEUE_READY) {
+		db_printf("%sTF2_MBUF_QUEUE_READY", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags2 & TF2_DONT_SACK_QUEUE) {
+		db_printf("%sTF2_DONT_SACK_QUEUE", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags2 & TF2_CANNOT_DO_ECN) {
+		db_printf("%sTF2_CANNOT_DO_ECN", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags2 & TF2_PROC_SACK_PROHIBIT) {
+		db_printf("%sTF2_PROC_SACK_PROHIBIT", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags2 & TF2_IPSEC_TSO) {
+		db_printf("%sTF2_IPSEC_TSO", comma ? ", " : "");
+		comma = 1;
+	}
+	if (t_flags2 & TF2_NO_ISS_CHECK) {
+		db_printf("%sTF2_NO_ISS_CHECK", comma ? ", " : "");
+		comma = 1;
+	}
 }
 
 static void
@@ -3007,7 +3051,44 @@ db_print_toobflags(char t_oobflags)
 }
 
 static void
-db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
+db_print_bblog_state(int state)
+{
+	switch (state) {
+	case TCP_LOG_STATE_RATIO_OFF:
+		db_printf("TCP_LOG_STATE_RATIO_OFF");
+		break;
+	case TCP_LOG_STATE_CLEAR:
+		db_printf("TCP_LOG_STATE_CLEAR");
+		break;
+	case TCP_LOG_STATE_OFF:
+		db_printf("TCP_LOG_STATE_OFF");
+		break;
+	case TCP_LOG_STATE_TAIL:
+		db_printf("TCP_LOG_STATE_TAIL");
+		break;
+	case TCP_LOG_STATE_HEAD:
+		db_printf("TCP_LOG_STATE_HEAD");
+		break;
+	case TCP_LOG_STATE_HEAD_AUTO:
+		db_printf("TCP_LOG_STATE_HEAD_AUTO");
+		break;
+	case TCP_LOG_STATE_CONTINUAL:
+		db_printf("TCP_LOG_STATE_CONTINUAL");
+		break;
+	case TCP_LOG_STATE_TAIL_AUTO:
+		db_printf("TCP_LOG_STATE_TAIL_AUTO");
+		break;
+	case TCP_LOG_VIA_BBPOINTS:
+		db_printf("TCP_LOG_STATE_BBPOINTS");
+		break;
+	default:
+		db_printf("UNKNOWN(%d)", state);
+		break;
+	}
+}
+
+static void
+db_print_tcpcb(struct tcpcb *tp, const char *name, int indent, bool show_bblog)
 {
 
 	db_print_indent(indent);
@@ -3117,18 +3198,68 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
 	db_print_indent(indent);
 	db_printf("t_rttlow: %d   rfbuf_ts: %u   rfbuf_cnt: %d\n",
 	    tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt);
+
+	db_print_indent(indent);
+	db_printf("t_fb.tfb_tcp_block_name: %s\n", tp->t_fb->tfb_tcp_block_name);
+
+	db_print_indent(indent);
+	db_printf("t_cc.name: %s\n", tp->t_cc->name);
+
+	db_print_indent(indent);
+	db_printf("_t_logstate: %d (", tp->_t_logstate);
+	db_print_bblog_state(tp->_t_logstate);
+	db_printf(")\n");
+
+	db_print_indent(indent);
+	db_printf("t_lognum: %d   t_loglimit: %d   t_logsn: %u\n",
+	    tp->t_lognum, tp->t_loglimit, tp->t_logsn);
+
+	if (show_bblog) {
+#ifdef TCP_BLACKBOX
+		db_print_bblog_entries(&tp->t_logs, indent);
+#else
+		db_print_indent(indent);
+		db_printf("BBLog not supported\n");
+#endif
+	}
 }
 
 DB_SHOW_COMMAND(tcpcb, db_show_tcpcb)
 {
 	struct tcpcb *tp;
+	bool show_bblog;
 
 	if (!have_addr) {
 		db_printf("usage: show tcpcb <addr>\n");
 		return;
 	}
+	show_bblog = strchr(modif, 'b') != NULL;
 	tp = (struct tcpcb *)addr;
 
-	db_print_tcpcb(tp, "tcpcb", 0);
+	db_print_tcpcb(tp, "tcpcb", 0, show_bblog);
+}
+
+DB_SHOW_ALL_COMMAND(tcpcbs, db_show_all_tcpcbs)
+{
+	VNET_ITERATOR_DECL(vnet_iter);
+	struct inpcb *inp;
+	bool only_locked, show_bblog;
+
+	only_locked = strchr(modif, 'l') != NULL;
+	show_bblog = strchr(modif, 'b') != NULL;
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);
+		CK_LIST_FOREACH(inp, &V_tcbinfo.ipi_listhead, inp_list) {
+			if (only_locked &&
+			    inp->inp_lock.rw_lock == RW_UNLOCKED)
+				continue;
+			db_print_tcpcb(intotcpcb(inp), "tcpcb", 0, show_bblog);
+			if (db_pager_quit)
+				break;
+		}
+		CURVNET_RESTORE();
+		if (db_pager_quit)
+			break;
+	}
 }
 #endif
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index a339f52c2ffa..059b2aff689d 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -452,7 +452,6 @@ struct tcpcb {
 	tcp_seq gput_seq;		/* Outbound measurement seq */
 	tcp_seq gput_ack;		/* Inbound measurement ack */
 	int32_t t_stats_gput_prev;	/* XXXLAS: Prev gput measurement */
-	uint32_t t_maxpeakrate;		/* max peak rate set by user, bytes/s */
 	uint32_t t_sndtlppack;		/* tail loss probe packets sent */
 	uint64_t t_sndtlpbyte;		/* total tail loss probe bytes sent */
 	uint64_t t_sndbytes;		/* total bytes sent */
@@ -465,6 +464,11 @@ struct tcpcb {
 	/* TCP Fast Open */
 	uint8_t t_tfo_client_cookie_len; /* TFO client cookie length */
 	uint32_t t_end_info_status;	/* Status flag of end info */
+	sbintime_t t_challenge_ack_end;	/* End of the challenge ack epoch */
+	uint32_t t_challenge_ack_cnt;	/* Number of challenge ACKs sent in
+					 * current epoch
+					 */
+
 	unsigned int *t_tfo_pending;	/* TFO server pending counter */
 	union {
 		uint8_t client[TCP_FASTOPEN_MAX_COOKIE_LEN];
@@ -495,10 +499,6 @@ struct tcpcb {
 	uint64_t tcp_cnt_counters[TCP_NUM_CNT_COUNTERS];
 	uint64_t tcp_proc_time[TCP_NUM_CNT_COUNTERS];
 #endif
-#ifdef TCPPCAP
-	struct mbufq t_inpkts;		/* List of saved input packets. */
-	struct mbufq t_outpkts;		/* List of saved output packets. */
-#endif
 };
 #endif	/* _KERNEL || _WANT_TCPCB */
 
@@ -528,27 +528,16 @@ typedef enum {
 /* Minimum map entries limit value, if set */
 #define TCP_MIN_MAP_ENTRIES_LIMIT	128
 
-/*
- * TODO: We yet need to brave plowing in
- * to tcp_input() and the pru_usrreq() block.
- * Right now these go to the old standards which
- * are somewhat ok, but in the long term may
- * need to be changed. If we do tackle tcp_input()
- * then we need to get rid of the tcp_do_segment()
- * function below.
- */
 /* Flags for tcp functions */
 #define	TCP_FUNC_BEING_REMOVED	0x01   	/* Can no longer be referenced */
 #define	TCP_FUNC_OUTPUT_CANDROP	0x02   	/* tfb_tcp_output may ask tcp_drop */
+#define	TCP_FUNC_DEFAULT_OK	0x04   	/* Can be used as default */
 
 /**
- * Adding a tfb_tcp_handoff_ok function allows the socket
- * option to change stacks to query you even if the
- * connection is in a later stage. You return 0 to
- * say you can take over and run your stack, you return
- * non-zero (an error number) to say no you can't.
- * If the function is undefined you can only change
- * in the early states (before connect or listen).
+ * tfb_tcp_handoff_ok is a mandatory function allowing
+ * to query a stack, if it can take over a tcpcb.
+ * You return 0 to say you can take over and run your stack,
+ * you return non-zero (an error number) to say no you can't.
  *
  * tfb_tcp_fb_init is used to allow the new stack to
  * setup its control block. Among the things it must
@@ -637,6 +626,9 @@ struct tcp_function_block {
 	uint8_t	tfb_id;
 };
 
+/* Maximum number of names each TCP function block can be registered with. */
+#define	TCP_FUNCTION_NAME_NUM_MAX	8
+
 struct tcp_function {
 	TAILQ_ENTRY(tcp_function)	tf_next;
 	char				tf_name[TCP_FUNCTION_NAME_LEN_MAX];
@@ -846,6 +838,9 @@ tcp_packets_this_ack(struct tcpcb *tp, tcp_seq ack)
 #define	TF2_MBUF_QUEUE_READY	0x00020000 /* Inputs can be queued */
 #define	TF2_DONT_SACK_QUEUE	0x00040000 /* Don't wake on sack */
 #define	TF2_CANNOT_DO_ECN	0x00080000 /* The stack does not do ECN */
+#define	TF2_PROC_SACK_PROHIBIT	0x00100000 /* Due to small MSS size do not process sack's */
+#define	TF2_IPSEC_TSO		0x00200000 /* IPSEC + TSO supported */
+#define	TF2_NO_ISS_CHECK	0x00400000 /* Don't check SEG.ACK against ISS */
 
 /*
  * Structure to hold TCP options that are only used during segment
@@ -883,13 +878,13 @@ struct tcpopt {
 #define	TO_SYN		0x01		/* parse SYN-only options */
 
 struct hc_metrics_lite {	/* must stay in sync with hc_metrics */
-	uint32_t	rmx_mtu;	/* MTU for this path */
-	uint32_t	rmx_ssthresh;	/* outbound gateway buffer limit */
-	uint32_t	rmx_rtt;	/* estimated round trip time */
-	uint32_t	rmx_rttvar;	/* estimated rtt variance */
-	uint32_t	rmx_cwnd;	/* congestion window */
-	uint32_t	rmx_sendpipe;   /* outbound delay-bandwidth product */
-	uint32_t	rmx_recvpipe;   /* inbound delay-bandwidth product */
+	uint32_t	hc_mtu;		/* MTU for this path */
+	uint32_t	hc_ssthresh;	/* outbound gateway buffer limit */
+	uint32_t	hc_rtt;		/* estimated round trip time */
+	uint32_t	hc_rttvar;	/* estimated rtt variance */
+	uint32_t	hc_cwnd;	/* congestion window */
+	uint32_t	hc_sendpipe;	/* outbound delay-bandwidth product */
+	uint32_t	hc_recvpipe;	/* inbound delay-bandwidth product */
 };
 
 #ifndef _NETINET_IN_PCB_H_
@@ -932,9 +927,12 @@ struct in_conninfo;
 	  + (tp)->t_rttvar) >> TCP_DELTA_SHIFT)
 
 /*
- * TCP statistics.
- * Many of these should be kept per connection,
- * but that's inconvenient at the moment.
+ * Global (per-VNET) TCP statistics.  The below structure represents what we
+ * export to the userland, but in the kernel we have an array of counter_u64_t
+ * with as many elements as there are members in the structure.  The counters
+ * shall be increased by TCPSTAT_INC() or KMOD_TCPSTAT_INC().  Adding a new
+ * counter also requires adding corresponding SDT probes into in_kdtrace.h and
+ * into in_kdtrace.c.
  */
 struct	tcpstat {
 	uint64_t tcps_connattempt;	/* connections initiated */
@@ -1020,6 +1018,8 @@ struct	tcpstat {
 	uint64_t tcps_sc_zonefail;	/* zalloc() failed */
 	uint64_t tcps_sc_sendcookie;	/* SYN cookie sent */
 	uint64_t tcps_sc_recvcookie;	/* SYN cookie received */
+	uint64_t tcps_sc_spurcookie;	/* SYN cookie spurious, rejected */
+	uint64_t tcps_sc_failcookie;	/* SYN cookie failed, rejected */
 
 	uint64_t tcps_hc_added;		/* entry added to hostcache */
 	uint64_t tcps_hc_bucketoverflow;/* hostcache per bucket limit hit */
@@ -1029,6 +1029,7 @@ struct	tcpstat {
 	/* SACK related stats */
 	uint64_t tcps_sack_recovery_episode; /* SACK recovery episodes */
 	uint64_t tcps_sack_rexmits;	    /* SACK rexmit segments   */
+	uint64_t tcps_sack_rexmits_tso;	    /* SACK rexmit TSO chunks */
 	uint64_t tcps_sack_rexmit_bytes;    /* SACK rexmit bytes      */
 	uint64_t tcps_sack_rcv_blocks;	    /* SACK blocks (options) received */
 	uint64_t tcps_sack_send_blocks;	    /* SACK blocks (options) sent     */
@@ -1086,15 +1087,17 @@ struct	tcpstat {
 	uint64_t tcps_tlpresends;	/* number of tlp resends */
 	uint64_t tcps_tlpresend_bytes;	/* number of bytes resent by tlp */
 
+	/* SEG.ACK validation failures */
+	uint64_t tcps_rcvghostack;	/* received ACK for data never sent */
+	uint64_t tcps_rcvacktooold;	/* received ACK for data too long ago */
 
-	uint64_t _pad[4];		/* 4 TBD placeholder for STABLE */
+
+	uint64_t _pad[1];		/* 1 TBD placeholder for STABLE */
 };
 
 #define	tcps_rcvmemdrop	tcps_rcvreassfull	/* compat */
 
 #ifdef _KERNEL
-#define	TI_UNLOCKED	1
-#define	TI_RLOCKED	2
 #include <sys/counter.h>
 #include <netinet/in_kdtrace.h>
 
@@ -1236,6 +1239,9 @@ struct tcp_function_info {
 #define	TCPCTL_SACK		14	/* Selective Acknowledgement,rfc 2018 */
 #define	TCPCTL_DROP		15	/* drop tcp connection */
 #define	TCPCTL_STATES		16	/* connection counts by TCP state */
+#define	TCPCTL_KTLSLIST		17	/* connections with active ktls
+					   session */
+#define	TCPCTL_KTLSLIST_WKEYS	18	/* KTLSLIST with key data exported */
 
 #ifdef _KERNEL
 #ifdef SYSCTL_DECL
@@ -1254,9 +1260,12 @@ VNET_DECLARE(int, tcp_log_in_vain);
 VNET_DECLARE(int, drop_synfin);
 VNET_DECLARE(int, path_mtu_discovery);
 VNET_DECLARE(int, tcp_abc_l_var);
+VNET_DECLARE(uint32_t, tcp_ack_war_cnt);
+VNET_DECLARE(uint32_t, tcp_ack_war_time_window);
 VNET_DECLARE(int, tcp_autorcvbuf_max);
 VNET_DECLARE(int, tcp_autosndbuf_inc);
 VNET_DECLARE(int, tcp_autosndbuf_max);
+VNET_DECLARE(int, tcp_bind_all_fibs);
 VNET_DECLARE(int, tcp_delack_enabled);
 VNET_DECLARE(int, tcp_do_autorcvbuf);
 VNET_DECLARE(int, tcp_do_autosndbuf);
@@ -1277,6 +1286,7 @@ VNET_DECLARE(int, tcp_ecn_maxretries);
 VNET_DECLARE(int, tcp_initcwnd_segments);
 VNET_DECLARE(int, tcp_insecure_rst);
 VNET_DECLARE(int, tcp_insecure_syn);
+VNET_DECLARE(int, tcp_insecure_ack);
 VNET_DECLARE(uint32_t, tcp_map_entries_limit);
 VNET_DECLARE(uint32_t, tcp_map_split_limit);
 VNET_DECLARE(int, tcp_minmss);
@@ -1290,6 +1300,7 @@ VNET_DECLARE(int, tcp_retries);
 VNET_DECLARE(int, tcp_sack_globalholes);
 VNET_DECLARE(int, tcp_sack_globalmaxholes);
 VNET_DECLARE(int, tcp_sack_maxholes);
+VNET_DECLARE(int, tcp_sack_tso);
 VNET_DECLARE(int, tcp_sc_rst_sock_fail);
 VNET_DECLARE(int, tcp_sendspace);
 VNET_DECLARE(int, tcp_udp_tunneling_overhead);
@@ -1303,9 +1314,12 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
 #define	V_path_mtu_discovery		VNET(path_mtu_discovery)
 #define	V_tcbinfo			VNET(tcbinfo)
 #define	V_tcp_abc_l_var			VNET(tcp_abc_l_var)
+#define	V_tcp_ack_war_cnt		VNET(tcp_ack_war_cnt)
+#define	V_tcp_ack_war_time_window	VNET(tcp_ack_war_time_window)
 #define	V_tcp_autorcvbuf_max		VNET(tcp_autorcvbuf_max)
 #define	V_tcp_autosndbuf_inc		VNET(tcp_autosndbuf_inc)
 #define	V_tcp_autosndbuf_max		VNET(tcp_autosndbuf_max)
+#define	V_tcp_bind_all_fibs		VNET(tcp_bind_all_fibs)
 #define	V_tcp_delack_enabled		VNET(tcp_delack_enabled)
 #define	V_tcp_do_autorcvbuf		VNET(tcp_do_autorcvbuf)
 #define	V_tcp_do_autosndbuf		VNET(tcp_do_autosndbuf)
@@ -1323,6 +1337,7 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
 #define	V_tcp_initcwnd_segments		VNET(tcp_initcwnd_segments)
 #define	V_tcp_insecure_rst		VNET(tcp_insecure_rst)
 #define	V_tcp_insecure_syn		VNET(tcp_insecure_syn)
+#define	V_tcp_insecure_ack		VNET(tcp_insecure_ack)
 #define	V_tcp_map_entries_limit		VNET(tcp_map_entries_limit)
 #define	V_tcp_map_split_limit		VNET(tcp_map_split_limit)
 #define	V_tcp_minmss			VNET(tcp_minmss)
@@ -1336,6 +1351,7 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
 #define	V_tcp_sack_globalholes		VNET(tcp_sack_globalholes)
 #define	V_tcp_sack_globalmaxholes	VNET(tcp_sack_globalmaxholes)
 #define	V_tcp_sack_maxholes		VNET(tcp_sack_maxholes)
+#define	V_tcp_sack_tso			VNET(tcp_sack_tso)
 #define	V_tcp_sc_rst_sock_fail		VNET(tcp_sc_rst_sock_fail)
 #define	V_tcp_sendspace			VNET(tcp_sendspace)
 #define	V_tcp_udp_tunneling_overhead	VNET(tcp_udp_tunneling_overhead)
@@ -1417,19 +1433,6 @@ extern counter_u64_t tcp_comp_total;
 extern counter_u64_t tcp_uncomp_total;
 extern counter_u64_t tcp_bad_csums;
 
-#ifdef TCP_SAD_DETECTION
-/* Various SACK attack thresholds */
-extern int32_t tcp_force_detection;
-extern int32_t tcp_sad_limit;
-extern int32_t tcp_sack_to_ack_thresh;
-extern int32_t tcp_sack_to_move_thresh;
-extern int32_t tcp_restoral_thresh;
-extern int32_t tcp_sad_decay_val;
-extern int32_t tcp_sad_pacing_interval;
-extern int32_t tcp_sad_low_pps;
-extern int32_t tcp_map_minimum;
-extern int32_t tcp_attack_on_turns_on_logging;
-#endif
 extern uint32_t tcp_ack_war_time_window;
 extern uint32_t tcp_ack_war_cnt;
 
@@ -1442,6 +1445,7 @@ struct tcp_ifcap {
 	u_int	tsomax;
 	u_int	tsomaxsegcount;
 	u_int	tsomaxsegsize;
+	bool	ipsec_tso;
 };
 uint32_t tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
 uint32_t tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);
@@ -1454,11 +1458,12 @@ void	 tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *,
 void	 tcp_mss(struct tcpcb *, int);
 int	 tcp_mssopt(struct in_conninfo *);
 struct tcpcb *
-	 tcp_newtcpcb(struct inpcb *);
+	 tcp_newtcpcb(struct inpcb *, struct tcpcb *);
 int	 tcp_default_output(struct tcpcb *);
 void	 tcp_state_change(struct tcpcb *, int);
 void	 tcp_respond(struct tcpcb *, void *,
 	    struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, uint16_t);
+void	 tcp_send_challenge_ack(struct tcpcb *, struct tcphdr *, struct mbuf *);
 bool	 tcp_twcheck(struct inpcb *, struct tcpopt *, struct tcphdr *,
 	    struct mbuf *, int);
 void	 tcp_setpersist(struct tcpcb *);
@@ -1477,10 +1482,10 @@ void	 tcp_hc_init(void);
 #ifdef VIMAGE
 void	 tcp_hc_destroy(void);
 #endif
-void	 tcp_hc_get(struct in_conninfo *, struct hc_metrics_lite *);
-uint32_t tcp_hc_getmtu(struct in_conninfo *);
-void	 tcp_hc_updatemtu(struct in_conninfo *, uint32_t);
-void	 tcp_hc_update(struct in_conninfo *, struct hc_metrics_lite *);
+void	 tcp_hc_get(const struct in_conninfo *, struct hc_metrics_lite *);
+uint32_t tcp_hc_getmtu(const struct in_conninfo *);
+void	 tcp_hc_updatemtu(const struct in_conninfo *, uint32_t);
+void	 tcp_hc_update(const struct in_conninfo *, struct hc_metrics_lite *);
 void 	 cc_after_idle(struct tcpcb *tp);
 
 extern	struct protosw tcp_protosw;		/* shared for TOE */
@@ -1497,7 +1502,7 @@ void	 tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart,
 	    tcp_seq rcv_lastend);
 void	 tcp_clean_dsack_blocks(struct tcpcb *tp);
 void	 tcp_clean_sackreport(struct tcpcb *tp);
-void	 tcp_sack_adjust(struct tcpcb *tp);
+int	 tcp_sack_adjust(struct tcpcb *tp);
 struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
 void	 tcp_do_prr_ack(struct tcpcb *, struct tcphdr *, struct tcpopt *,
 	    sackstatus_t, u_int *);
diff --git a/sys/netinet/toecore.c b/sys/netinet/toecore.c
index 76aadad9a3b9..4203029ff7c3 100644
--- a/sys/netinet/toecore.c
+++ b/sys/netinet/toecore.c
@@ -525,7 +525,7 @@ toe_connect_failed(struct toedev *tod, struct inpcb *inp, int err)
 			/*
 			 * Temporary failure during offload, take this PCB back.
 			 * Detach from the TOE driver and do the rest of what
-			 * TCP's pru_connect would have done if the connection
+			 * TCP's pr_connect() would have done if the connection
 			 * wasn't offloaded.
 			 */
 
diff --git a/sys/netinet/toecore.h b/sys/netinet/toecore.h
index 612c2fe1caf5..843b261ec162 100644
--- a/sys/netinet/toecore.h
+++ b/sys/netinet/toecore.h
@@ -66,7 +66,7 @@ struct toedev {
 	void (*tod_input)(struct toedev *, struct tcpcb *, struct mbuf *);
 
 	/*
-	 * This is called by the kernel during pru_rcvd for an offloaded TCP
+	 * This is called by the kernel during pr_rcvd() for an offloaded TCP
 	 * connection and provides an opportunity for the TOE driver to manage
 	 * its rx window and credits.
 	 */
diff --git a/sys/netinet/udp.h b/sys/netinet/udp.h
index edff456ba70e..010f2210b516 100644
--- a/sys/netinet/udp.h
+++ b/sys/netinet/udp.h
@@ -44,7 +44,7 @@ struct udphdr {
 	u_short	uh_dport;		/* destination port */
 	u_short	uh_ulen;		/* udp length */
 	u_short	uh_sum;			/* udp checksum */
-};
+} __packed;
 
 /*
  * User-settable options (used with setsockopt).
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index 9dad79e95b04..dafbaf6dc672 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -105,6 +105,11 @@
  * Per RFC 3828, July, 2004.
  */
 
+VNET_DEFINE(int, udp_bind_all_fibs) = 1;
+SYSCTL_INT(_net_inet_udp, OID_AUTO, bind_all_fibs, CTLFLAG_VNET | CTLFLAG_RDTUN,
+    &VNET_NAME(udp_bind_all_fibs), 0,
+    "Bound sockets receive traffic from all FIBs");
+
 /*
  * BSD 4.2 defaulted the udp checksum to be off.  Turning off udp checksums
  * removes the only data integrity mechanism for packets and malformed
@@ -359,10 +364,12 @@ udp_multi_input(struct mbuf *m, int proto, struct sockaddr_in *udp_in)
 #endif
 	struct inpcb *inp;
 	struct mbuf *n;
-	int appends = 0;
+	int appends = 0, fib;
 
 	MPASS(ip->ip_hl == sizeof(struct ip) >> 2);
 
+	fib = M_GETFIB(m);
+
 	while ((inp = inp_next(&inpi)) != NULL) {
 		/*
 		 * XXXRW: Because we weren't holding either the inpcb
@@ -370,6 +377,14 @@ udp_multi_input(struct mbuf *m, int proto, struct sockaddr_in *udp_in)
 		 * before, we should probably recheck now that the
 		 * inpcb lock is held.
 		 */
+
+		if (V_udp_bind_all_fibs == 0 && fib != inp->inp_inc.inc_fibnum)
+			/*
+			 * Sockets bound to a specific FIB can only receive
+			 * packets from that FIB.
+			 */
+			continue;
+
 		/*
 		 * Handle socket delivery policy for any-source
 		 * and source-specific multicast. [RFC3678]
@@ -453,7 +468,7 @@ udp_input(struct mbuf **mp, int *offp, int proto)
 	struct sockaddr_in udp_in[2];
 	struct mbuf *m;
 	struct m_tag *fwd_tag;
-	int cscov_partial, iphlen;
+	int cscov_partial, iphlen, lookupflags;
 
 	m = *mp;
 	iphlen = *offp;
@@ -544,12 +559,12 @@ udp_input(struct mbuf **mp, int *offp, int proto)
 			char b[offsetof(struct ipovly, ih_src)];
 			struct ipovly *ipov = (struct ipovly *)ip;
 
-			bcopy(ipov, b, sizeof(b));
+			memcpy(b, ipov, sizeof(b));
 			bzero(ipov, sizeof(ipov->ih_x1));
 			ipov->ih_len = (proto == IPPROTO_UDP) ?
 			    uh->uh_ulen : htons(ip_len);
 			uh_sum = in_cksum(m, len + sizeof (struct ip));
-			bcopy(b, ipov, sizeof(b));
+			memcpy(ipov, b, sizeof(b));
 		}
 		if (uh_sum) {
 			UDPSTAT_INC(udps_badsum);
@@ -568,14 +583,18 @@ udp_input(struct mbuf **mp, int *offp, int proto)
 	}
 
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
-	    in_broadcast(ip->ip_dst, ifp))
+	    in_ifnet_broadcast(ip->ip_dst, ifp))
 		return (udp_multi_input(m, proto, udp_in));
 
 	pcbinfo = udp_get_inpcbinfo(proto);
 
 	/*
 	 * Locate pcb for datagram.
-	 *
+	 */
+	lookupflags = INPLOOKUP_RLOCKPCB |
+	    (V_udp_bind_all_fibs ? 0 : INPLOOKUP_FIB);
+
+	/*
 	 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
 	 */
 	if ((m->m_flags & M_IP_NEXTHOP) &&
@@ -589,7 +608,7 @@ udp_input(struct mbuf **mp, int *offp, int proto)
 		 * Already got one like this?
 		 */
 		inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
-		    ip->ip_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, ifp, m);
+		    ip->ip_dst, uh->uh_dport, lookupflags, ifp, m);
 		if (!inp) {
 			/*
 			 * It's new.  Try to find the ambushing socket.
@@ -599,8 +618,8 @@ udp_input(struct mbuf **mp, int *offp, int proto)
 			inp = in_pcblookup(pcbinfo, ip->ip_src,
 			    uh->uh_sport, next_hop->sin_addr,
 			    next_hop->sin_port ? htons(next_hop->sin_port) :
-			    uh->uh_dport, INPLOOKUP_WILDCARD |
-			    INPLOOKUP_RLOCKPCB, ifp);
+			    uh->uh_dport, INPLOOKUP_WILDCARD | lookupflags,
+			    ifp);
 		}
 		/* Remove the tag from the packet. We don't need it anymore. */
 		m_tag_delete(m, fwd_tag);
@@ -608,7 +627,7 @@ udp_input(struct mbuf **mp, int *offp, int proto)
 	} else
 		inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
 		    ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD |
-		    INPLOOKUP_RLOCKPCB, ifp, m);
+		    lookupflags, ifp, m);
 	if (inp == NULL) {
 		if (V_udp_log_in_vain) {
 			char src[INET_ADDRSTRLEN];
@@ -825,6 +844,8 @@ udp_getcred(SYSCTL_HANDLER_ARGS)
 	struct inpcb *inp;
 	int error;
 
+	if (req->newptr == NULL)
+		return (EINVAL);
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
@@ -1021,6 +1042,8 @@ udp_v4mapped_pktinfo(struct cmsghdr *cm, struct sockaddr_in * src,
 	struct in6_pktinfo *pktinfo;
 	struct in_addr ia;
 
+	NET_EPOCH_ASSERT();
+
 	if ((flags & PRUS_IPV6) == 0)
 		return (0);
 
@@ -1042,18 +1065,14 @@ udp_v4mapped_pktinfo(struct cmsghdr *cm, struct sockaddr_in * src,
 
 	/* Validate the interface index if specified. */
 	if (pktinfo->ipi6_ifindex) {
-		struct epoch_tracker et;
-
-		NET_EPOCH_ENTER(et);
 		ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
-		NET_EPOCH_EXIT(et);	/* XXXGL: unsafe ifp */
 		if (ifp == NULL)
 			return (ENXIO);
 	} else
 		ifp = NULL;
 	if (ifp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 		ia.s_addr = pktinfo->ipi6_addr.s6_addr32[3];
-		if (in_ifhasaddr(ifp, ia) == 0)
+		if (!in_ifhasaddr(ifp, ia))
 			return (EADDRNOTAVAIL);
 	}
 
@@ -1116,10 +1135,9 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
 	sin = (struct sockaddr_in *)addr;
 
 	/*
-	 * udp_send() may need to temporarily bind or connect the current
-	 * inpcb.  As such, we don't know up front whether we will need the
-	 * pcbinfo lock or not.  Do any work to decide what is needed up
-	 * front before acquiring any locks.
+	 * udp_send() may need to bind the current inpcb.  As such, we don't
+	 * know up front whether we will need the pcbinfo lock or not.  Do any
+	 * work to decide what is needed up front before acquiring any locks.
 	 *
 	 * We will need network epoch in either case, to safely lookup into
 	 * pcb hash.
@@ -1243,7 +1261,7 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
 		}
 		INP_HASH_WLOCK(pcbinfo);
 		error = in_pcbbind_setup(inp, &src, &laddr.s_addr, &lport,
-		    td->td_ucred);
+		    V_udp_bind_all_fibs ? 0 : INPBIND_FIB, td->td_ucred);
 		INP_HASH_WUNLOCK(pcbinfo);
 		if ((flags & PRUS_IPV6) != 0)
 			inp->inp_vflag = vflagsav;
@@ -1273,66 +1291,37 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
 		error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
 		if (error)
 			goto release;
-
 		/*
-		 * If a local address or port hasn't yet been selected, or if
-		 * the destination address needs to be rewritten due to using
-		 * a special INADDR_ constant, invoke in_pcbconnect_setup()
-		 * to do the heavy lifting.  Once a port is selected, we
-		 * commit the binding back to the socket; we also commit the
-		 * binding of the address if in jail.
-		 *
-		 * If we already have a valid binding and we're not
-		 * requesting a destination address rewrite, use a fast path.
+		 * sendto(2) on unconnected UDP socket results in implicit
+		 * binding to INADDR_ANY and anonymous port.  This has two
+		 * side effects:
+		 * 1) after first sendto(2) the socket will receive datagrams
+		 *    destined to the selected port.
+		 * 2) subsequent sendto(2) calls will use the same source port.
 		 */
-		if (inp->inp_laddr.s_addr == INADDR_ANY ||
-		    inp->inp_lport == 0 ||
-		    sin->sin_addr.s_addr == INADDR_ANY ||
-		    sin->sin_addr.s_addr == INADDR_BROADCAST) {
-			if ((flags & PRUS_IPV6) != 0) {
-				vflagsav = inp->inp_vflag;
-				inp->inp_vflag |= INP_IPV4;
-				inp->inp_vflag &= ~INP_IPV6;
-			}
+		if (inp->inp_lport == 0) {
+			struct sockaddr_in wild = {
+				.sin_family = AF_INET,
+				.sin_len = sizeof(struct sockaddr_in),
+			};
+
 			INP_HASH_WLOCK(pcbinfo);
-			error = in_pcbconnect_setup(inp, sin, &laddr.s_addr,
-			    &lport, &faddr.s_addr, &fport, td->td_ucred);
-			if ((flags & PRUS_IPV6) != 0)
-				inp->inp_vflag = vflagsav;
-			if (error) {
-				INP_HASH_WUNLOCK(pcbinfo);
+			error = in_pcbbind(inp, &wild, V_udp_bind_all_fibs ?
+			    0 : INPBIND_FIB, td->td_ucred);
+			INP_HASH_WUNLOCK(pcbinfo);
+			if (error)
+				goto release;
+			lport = inp->inp_lport;
+			laddr = inp->inp_laddr;
+		}
+		if (laddr.s_addr == INADDR_ANY) {
+			error = in_pcbladdr(inp, &sin->sin_addr, &laddr,
+			    td->td_ucred);
+			if (error)
 				goto release;
-			}
-
-			/*
-			 * XXXRW: Why not commit the port if the address is
-			 * !INADDR_ANY?
-			 */
-			/* Commit the local port if newly assigned. */
-			if (inp->inp_laddr.s_addr == INADDR_ANY &&
-			    inp->inp_lport == 0) {
-				INP_WLOCK_ASSERT(inp);
-				/*
-				 * Remember addr if jailed, to prevent
-				 * rebinding.
-				 */
-				if (prison_flag(td->td_ucred, PR_IP4))
-					inp->inp_laddr = laddr;
-				inp->inp_lport = lport;
-				error = in_pcbinshash(inp);
-				INP_HASH_WUNLOCK(pcbinfo);
-				if (error != 0) {
-					inp->inp_lport = 0;
-					error = EAGAIN;
-					goto release;
-				}
-				inp->inp_flags |= INP_ANONPORT;
-			} else
-				INP_HASH_WUNLOCK(pcbinfo);
-		} else {
-			faddr = sin->sin_addr;
-			fport = sin->sin_port;
 		}
+		faddr = sin->sin_addr;
+		fport = sin->sin_port;
 	} else {
 		INP_LOCK_ASSERT(inp);
 		faddr = inp->inp_faddr;
@@ -1592,7 +1581,8 @@ udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 
 	INP_WLOCK(inp);
 	INP_HASH_WLOCK(pcbinfo);
-	error = in_pcbbind(inp, sinp, td->td_ucred);
+	error = in_pcbbind(inp, sinp, V_udp_bind_all_fibs ? 0 : INPBIND_FIB,
+	    td->td_ucred);
 	INP_HASH_WUNLOCK(pcbinfo);
 	INP_WUNLOCK(inp);
 	return (error);
@@ -1648,7 +1638,7 @@ udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 	}
 	NET_EPOCH_ENTER(et);
 	INP_HASH_WLOCK(pcbinfo);
-	error = in_pcbconnect(inp, sin, td->td_ucred, true);
+	error = in_pcbconnect(inp, sin, td->td_ucred);
 	INP_HASH_WUNLOCK(pcbinfo);
 	NET_EPOCH_EXIT(et);
 	if (error == 0)
diff --git a/sys/netinet/udp_var.h b/sys/netinet/udp_var.h
index 0d70bad91df4..3895f365db3c 100644
--- a/sys/netinet/udp_var.h
+++ b/sys/netinet/udp_var.h
@@ -120,7 +120,7 @@ struct udpcb {
 	void 		*u_tun_ctx;	/* Tunneling callback context. */
 };
 
-#define	intoudpcb(ip)	__containerof((inp), struct udpcb, u_inpcb)
+#define	intoudpcb(ip)	__containerof((ip), struct udpcb, u_inpcb)
 #define	sotoudpcb(so)	(intoudpcb(sotoinpcb(so)))
 
 VNET_PCPUSTAT_DECLARE(struct udpstat, udpstat);
@@ -155,13 +155,15 @@ VNET_DECLARE(struct inpcbinfo, ulitecbinfo);
 
 extern u_long			udp_sendspace;
 extern u_long			udp_recvspace;
-VNET_DECLARE(int, udp_cksum);
+VNET_DECLARE(int, udp_bind_all_fibs);
 VNET_DECLARE(int, udp_blackhole);
 VNET_DECLARE(bool, udp_blackhole_local);
+VNET_DECLARE(int, udp_cksum);
 VNET_DECLARE(int, udp_log_in_vain);
-#define	V_udp_cksum		VNET(udp_cksum)
+#define	V_udp_bind_all_fibs	VNET(udp_bind_all_fibs)
 #define	V_udp_blackhole		VNET(udp_blackhole)
 #define	V_udp_blackhole_local	VNET(udp_blackhole_local)
+#define	V_udp_cksum		VNET(udp_cksum)
 #define	V_udp_log_in_vain	VNET(udp_log_in_vain)
 
 VNET_DECLARE(int, zero_checksum_port);