aboutsummaryrefslogtreecommitdiff
path: root/sys/netinet
diff options
context:
space:
mode:
Diffstat (limited to 'sys/netinet')
-rw-r--r--sys/netinet/in_mcast.c113
-rw-r--r--sys/netinet/in_pcb.c172
-rw-r--r--sys/netinet/in_pcb.h24
-rw-r--r--sys/netinet/in_proto.c2
-rw-r--r--sys/netinet/libalias/alias_db.c2
-rw-r--r--sys/netinet/raw_ip.c4
-rw-r--r--sys/netinet/siftr.c2
-rw-r--r--sys/netinet/tcp.h1
-rw-r--r--sys/netinet/tcp_hpts_test.c20
-rw-r--r--sys/netinet/tcp_input.c7
-rw-r--r--sys/netinet/tcp_output.c2
-rw-r--r--sys/netinet/tcp_stacks/bbr.c8
-rw-r--r--sys/netinet/tcp_stacks/rack.c284
-rw-r--r--sys/netinet/tcp_stacks/tcp_rack.h1
-rw-r--r--sys/netinet/tcp_subr.c2
-rw-r--r--sys/netinet/tcp_syncache.c66
-rw-r--r--sys/netinet/tcp_syncache.h1
-rw-r--r--sys/netinet/tcp_timer.c7
-rw-r--r--sys/netinet/tcp_usrreq.c286
-rw-r--r--sys/netinet/tcp_var.h29
-rw-r--r--sys/netinet/udp_usrreq.c49
-rw-r--r--sys/netinet/udp_var.h1
22 files changed, 288 insertions, 795 deletions
diff --git a/sys/netinet/in_mcast.c b/sys/netinet/in_mcast.c
index f5b20c49ffd2..ba112afbf002 100644
--- a/sys/netinet/in_mcast.c
+++ b/sys/netinet/in_mcast.c
@@ -159,9 +159,6 @@ static struct ip_moptions *
static int inp_get_source_filters(struct inpcb *, struct sockopt *);
static int inp_join_group(struct inpcb *, struct sockopt *);
static int inp_leave_group(struct inpcb *, struct sockopt *);
-static struct ifnet *
- inp_lookup_mcast_ifp(const struct inpcb *,
- const struct sockaddr_in *, const struct in_addr);
static int inp_block_unblock_source(struct inpcb *, struct sockopt *);
static int inp_set_multicast_if(struct inpcb *, struct sockopt *);
static int inp_set_source_filters(struct inpcb *, struct sockopt *);
@@ -1832,69 +1829,55 @@ inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
}
/*
- * Look up the ifnet to use for a multicast group membership,
- * given the IPv4 address of an interface, and the IPv4 group address.
- *
- * This routine exists to support legacy multicast applications
- * which do not understand that multicast memberships are scoped to
- * specific physical links in the networking stack, or which need
- * to join link-scope groups before IPv4 addresses are configured.
- *
- * Use this socket's current FIB number for any required FIB lookup.
- * If ina is INADDR_ANY, look up the group address in the unicast FIB,
- * and use its ifp; usually, this points to the default next-hop.
- *
- * If the FIB lookup fails, attempt to use the first non-loopback
- * interface with multicast capability in the system as a
- * last resort. The legacy IPv4 ASM API requires that we do
- * this in order to allow groups to be joined when the routing
- * table has not yet been populated during boot.
- *
- * Returns NULL if no ifp could be found, otherwise return referenced ifp.
+ * Look up the ifnet to join a multicast group membership via legacy
+ * IP_ADD_MEMBERSHIP or via more modern MCAST_JOIN_GROUP.
*
- * FUTURE: Implement IPv4 source-address selection.
+ * If the interface index was specified explicitly, just use it. If the
+ * address was specified (legacy), try to find matching interface. Else
+ * (index == 0 && no address) do a route lookup. If that fails for a modern
+ * MCAST_JOIN_GROUP return failure, for legacy IP_ADD_MEMBERSHIP find first
+ * multicast capable interface.
*/
static struct ifnet *
-inp_lookup_mcast_ifp(const struct inpcb *inp,
- const struct sockaddr_in *gsin, const struct in_addr ina)
+inp_lookup_mcast_ifp(const struct inpcb *inp, const struct in_addr maddr,
+const struct in_addr *ina, const u_int index)
{
struct ifnet *ifp;
struct nhop_object *nh;
NET_EPOCH_ASSERT();
- KASSERT(inp != NULL, ("%s: inp must not be NULL", __func__));
- KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__));
- KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)),
- ("%s: not multicast", __func__));
- ifp = NULL;
- if (!in_nullhost(ina)) {
- INADDR_TO_IFP(ina, ifp);
+ if (index != 0)
+ return (ifnet_byindex_ref(index));
+
+ if (ina != NULL && !in_nullhost(*ina)) {
+ INADDR_TO_IFP(*ina, ifp);
if (ifp != NULL)
if_ref(ifp);
- } else {
- nh = fib4_lookup(inp->inp_inc.inc_fibnum, gsin->sin_addr, 0, NHR_NONE, 0);
- if (nh != NULL) {
- ifp = nh->nh_ifp;
- if_ref(ifp);
- } else {
- struct in_ifaddr *ia;
- struct ifnet *mifp;
-
- mifp = NULL;
- CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
- mifp = ia->ia_ifp;
- if (!(mifp->if_flags & IFF_LOOPBACK) &&
- (mifp->if_flags & IFF_MULTICAST)) {
- ifp = mifp;
- if_ref(ifp);
- break;
- }
+ return (ifp);
+ }
+
+ nh = fib4_lookup(inp->inp_inc.inc_fibnum, maddr, 0, NHR_NONE, 0);
+ if (nh != NULL) {
+ ifp = nh->nh_ifp;
+ if_ref(ifp);
+ return (ifp);
+ }
+
+ if (ina != NULL) {
+ struct in_ifaddr *ia;
+
+ CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
+ if (!(ia->ia_ifp->if_flags & IFF_LOOPBACK) &&
+ (ia->ia_ifp->if_flags & IFF_MULTICAST)) {
+ ifp = ia->ia_ifp;
+ if_ref(ifp);
+ return (ifp);
}
}
}
- return (ifp);
+ return (NULL);
}
/*
@@ -1926,13 +1909,13 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
switch (sopt->sopt_name) {
case IP_ADD_MEMBERSHIP: {
struct ip_mreqn mreqn;
+ bool mreq;
- if (sopt->sopt_valsize == sizeof(struct ip_mreqn))
- error = sooptcopyin(sopt, &mreqn,
- sizeof(struct ip_mreqn), sizeof(struct ip_mreqn));
- else
- error = sooptcopyin(sopt, &mreqn,
- sizeof(struct ip_mreq), sizeof(struct ip_mreq));
+ mreq = (sopt->sopt_valsize != sizeof(struct ip_mreqn));
+
+ error = sooptcopyin(sopt, &mreqn,
+ mreq ? sizeof(struct ip_mreq) : sizeof(struct ip_mreqn),
+ mreq ? sizeof(struct ip_mreq) : sizeof(struct ip_mreqn));
if (error)
return (error);
@@ -1943,12 +1926,9 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
return (EINVAL);
NET_EPOCH_ENTER(et);
- if (sopt->sopt_valsize == sizeof(struct ip_mreqn) &&
- mreqn.imr_ifindex != 0)
- ifp = ifnet_byindex_ref(mreqn.imr_ifindex);
- else
- ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
- mreqn.imr_address);
+ ifp = inp_lookup_mcast_ifp(inp, mreqn.imr_multiaddr,
+ mreq ? &mreqn.imr_address : NULL,
+ mreq ? 0 : mreqn.imr_ifindex);
NET_EPOCH_EXIT(et);
break;
}
@@ -1971,8 +1951,8 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
ssa->sin.sin_addr = mreqs.imr_sourceaddr;
NET_EPOCH_ENTER(et);
- ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
- mreqs.imr_interface);
+ ifp = inp_lookup_mcast_ifp(inp, mreqs.imr_multiaddr,
+ &mreqs.imr_interface, 0);
NET_EPOCH_EXIT(et);
CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p",
__func__, ntohl(mreqs.imr_interface.s_addr), ifp);
@@ -2013,7 +1993,8 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
return (EINVAL);
NET_EPOCH_ENTER(et);
- ifp = ifnet_byindex_ref(gsr.gsr_interface);
+ ifp = inp_lookup_mcast_ifp(inp, gsa->sin.sin_addr, NULL,
+ gsr.gsr_interface);
NET_EPOCH_EXIT(et);
if (ifp == NULL)
return (EADDRNOTAVAIL);
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index dbe48242381d..b7dae78fb2c2 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -2665,10 +2665,13 @@ in_pcbinshash(struct inpcb *inp)
INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
/*
- * Add entry to load balance group.
- * Only do this if SO_REUSEPORT_LB is set.
+ * Ignore SO_REUSEPORT_LB if the socket is connected. Really this case
+ * should be an error, but for UDP sockets it is not, and some
+ * applications erroneously set it on connected UDP sockets, so we can't
+ * change this without breaking compatibility.
*/
- if ((inp->inp_socket->so_options & SO_REUSEPORT_LB) != 0) {
+ if (!connected &&
+ (inp->inp_socket->so_options & SO_REUSEPORT_LB) != 0) {
int error = in_pcbinslbgrouphash(inp, M_NODOM);
if (error != 0)
return (error);
@@ -2770,6 +2773,10 @@ in_pcbrehash(struct inpcb *inp)
connected = !in_nullhost(inp->inp_faddr);
}
+ /* See the comment in in_pcbinshash(). */
+ if (connected && (inp->inp_flags & INP_INLBGROUP) != 0)
+ in_pcbremlbgrouphash(inp);
+
/*
* When rehashing, the caller must ensure that either the new or the old
* foreign address was unspecified.
@@ -3051,143 +3058,7 @@ db_print_inconninfo(struct in_conninfo *inc, const char *name, int indent)
ntohs(inc->inc_fport));
}
-static void
-db_print_inpflags(int inp_flags)
-{
- int comma;
-
- comma = 0;
- if (inp_flags & INP_RECVOPTS) {
- db_printf("%sINP_RECVOPTS", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & INP_RECVRETOPTS) {
- db_printf("%sINP_RECVRETOPTS", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & INP_RECVDSTADDR) {
- db_printf("%sINP_RECVDSTADDR", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & INP_ORIGDSTADDR) {
- db_printf("%sINP_ORIGDSTADDR", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & INP_HDRINCL) {
- db_printf("%sINP_HDRINCL", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & INP_HIGHPORT) {
- db_printf("%sINP_HIGHPORT", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & INP_LOWPORT) {
- db_printf("%sINP_LOWPORT", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & INP_ANONPORT) {
- db_printf("%sINP_ANONPORT", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & INP_RECVIF) {
- db_printf("%sINP_RECVIF", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & INP_MTUDISC) {
- db_printf("%sINP_MTUDISC", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & INP_RECVTTL) {
- db_printf("%sINP_RECVTTL", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & INP_DONTFRAG) {
- db_printf("%sINP_DONTFRAG", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & INP_RECVTOS) {
- db_printf("%sINP_RECVTOS", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & IN6P_IPV6_V6ONLY) {
- db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & IN6P_PKTINFO) {
- db_printf("%sIN6P_PKTINFO", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & IN6P_HOPLIMIT) {
- db_printf("%sIN6P_HOPLIMIT", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & IN6P_HOPOPTS) {
- db_printf("%sIN6P_HOPOPTS", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & IN6P_DSTOPTS) {
- db_printf("%sIN6P_DSTOPTS", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & IN6P_RTHDR) {
- db_printf("%sIN6P_RTHDR", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & IN6P_RTHDRDSTOPTS) {
- db_printf("%sIN6P_RTHDRDSTOPTS", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & IN6P_TCLASS) {
- db_printf("%sIN6P_TCLASS", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & IN6P_AUTOFLOWLABEL) {
- db_printf("%sIN6P_AUTOFLOWLABEL", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & INP_ONESBCAST) {
- db_printf("%sINP_ONESBCAST", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & INP_DROPPED) {
- db_printf("%sINP_DROPPED", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & INP_SOCKREF) {
- db_printf("%sINP_SOCKREF", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & IN6P_RFC2292) {
- db_printf("%sIN6P_RFC2292", comma ? ", " : "");
- comma = 1;
- }
- if (inp_flags & IN6P_MTU) {
- db_printf("IN6P_MTU%s", comma ? ", " : "");
- comma = 1;
- }
-}
-
-static void
-db_print_inpvflag(u_char inp_vflag)
-{
- int comma;
-
- comma = 0;
- if (inp_vflag & INP_IPV4) {
- db_printf("%sINP_IPV4", comma ? ", " : "");
- comma = 1;
- }
- if (inp_vflag & INP_IPV6) {
- db_printf("%sINP_IPV6", comma ? ", " : "");
- comma = 1;
- }
- if (inp_vflag & INP_IPV6PROTO) {
- db_printf("%sINP_IPV6PROTO", comma ? ", " : "");
- comma = 1;
- }
-}
-
-static void
+void
db_print_inpcb(struct inpcb *inp, const char *name, int indent)
{
@@ -3197,38 +3068,39 @@ db_print_inpcb(struct inpcb *inp, const char *name, int indent)
indent += 2;
db_print_indent(indent);
- db_printf("inp_flow: 0x%x\n", inp->inp_flow);
+ db_printf("inp_flow: 0x%x inp_label: %p\n", inp->inp_flow,
+ inp->inp_label);
db_print_inconninfo(&inp->inp_inc, "inp_conninfo", indent);
db_print_indent(indent);
- db_printf("inp_label: %p inp_flags: 0x%x (",
- inp->inp_label, inp->inp_flags);
- db_print_inpflags(inp->inp_flags);
- db_printf(")\n");
+ db_printf("inp_flags: 0x%b\n", inp->inp_flags, INP_FLAGS_BITS);
db_print_indent(indent);
- db_printf("inp_sp: %p inp_vflag: 0x%x (", inp->inp_sp,
- inp->inp_vflag);
- db_print_inpvflag(inp->inp_vflag);
- db_printf(")\n");
+ db_printf("inp_flags2: 0x%b\n", inp->inp_flags2, INP_FLAGS2_BITS);
+
+ db_print_indent(indent);
+ db_printf("inp_sp: %p inp_vflag: 0x%b\n", inp->inp_sp,
+ inp->inp_vflag, INP_VFLAGS_BITS);
db_print_indent(indent);
db_printf("inp_ip_ttl: %d inp_ip_p: %d inp_ip_minttl: %d\n",
inp->inp_ip_ttl, inp->inp_ip_p, inp->inp_ip_minttl);
- db_print_indent(indent);
#ifdef INET6
if (inp->inp_vflag & INP_IPV6) {
+ db_print_indent(indent);
db_printf("in6p_options: %p in6p_outputopts: %p "
"in6p_moptions: %p\n", inp->in6p_options,
inp->in6p_outputopts, inp->in6p_moptions);
+ db_print_indent(indent);
db_printf("in6p_icmp6filt: %p in6p_cksum %d "
"in6p_hops %u\n", inp->in6p_icmp6filt, inp->in6p_cksum,
inp->in6p_hops);
} else
#endif
{
+ db_print_indent(indent);
db_printf("inp_ip_tos: %d inp_ip_options: %p "
"inp_ip_moptions: %p\n", inp->inp_ip_tos,
inp->inp_options, inp->inp_moptions);
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index 9e0618e87601..975b8129c70d 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -539,6 +539,9 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
#define INP_IPV6 0x2
#define INP_IPV6PROTO 0x4 /* opened under IPv6 protocol */
+/* inp_vflags description for use with printf(9) %b identifier. */
+#define INP_VFLAGS_BITS "\20\1INP_IPV4\2INP_IPV6\3INP_IPV6PROTO"
+
/*
* Flags for inp_flags.
*/
@@ -582,6 +585,17 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\
IN6P_MTU)
+/* inp_flags description for use with printf(9) %b identifier. */
+#define INP_FLAGS_BITS "\20" \
+ "\1INP_RECVOPTS\2INP_RECVRETOPTS\3INP_RECVDSTADDR\4INP_HDRINCL" \
+ "\5INP_HIGHPORT\6INP_LOWPORT\7INP_ANONPORT\10INP_RECVIF" \
+ "\11INP_MTUDISC\12INP_FREED\13INP_RECVTTL\14INP_DONTFRAG" \
+ "\15INP_BINDANY\16INP_INHASHLIST\17INP_RECVTOS\20IN6P_IPV6_V6ONLY" \
+ "\21IN6P_PKTINFO\22IN6P_HOPLIMIT\23IN6P_HOPOPTS\24IN6P_DSTOPTS" \
+ "\25IN6P_RTHDR\26IN6P_RTHDRDSTOPTS\27IN6P_TCLASS\30IN6P_AUTOFLOWLABEL" \
+ "\31INP_INLBGROUP\32INP_ONESBCAST\33INP_DROPPED\34INP_SOCKREF" \
+ "\35INP_RESERVED_0\36INP_BOUNDFIB\37IN6P_RFC2292\40IN6P_MTU"
+
/*
* Flags for inp_flags2.
*/
@@ -610,6 +624,13 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
#define INP_2PCP_MASK (INP_2PCP_BIT0 | INP_2PCP_BIT1 | INP_2PCP_BIT2)
#define INP_2PCP_SHIFT 18 /* shift PCP field in/out of inp_flags2 */
+/* inp_flags2 description for use with printf(9) %b identifier. */
+#define INP_FLAGS2_BITS "\20" \
+ "\11INP_RECVFLOWID\12INP_RECVRSSBUCKETID" \
+ "\13INP_RATE_LIMIT_CHANGED\14INP_ORIGDSTADDR" \
+ "\22INP_2PCP_SET\23INP_2PCP_BIT0\24INP_2PCP_BIT1" \
+ "\25INP_2PCP_BIT2"
+
/*
* Flags passed to in_pcblookup*(), inp_smr_lock() and inp_next().
*/
@@ -730,6 +751,9 @@ int in_pcbquery_txrlevel(struct inpcb *, uint32_t *);
void in_pcboutput_txrtlmt(struct inpcb *, struct ifnet *, struct mbuf *);
void in_pcboutput_eagain(struct inpcb *);
#endif
+#ifdef DDB
+void db_print_inpcb(struct inpcb *, const char *, int);
+#endif
#endif /* _KERNEL */
#endif /* !_NETINET_IN_PCB_H_ */
diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c
index db46da6022c5..42a6cf0b5810 100644
--- a/sys/netinet/in_proto.c
+++ b/sys/netinet/in_proto.c
@@ -108,6 +108,8 @@ SYSCTL_NODE(_net_inet, IPPROTO_ICMP, icmp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"ICMP");
SYSCTL_NODE(_net_inet, IPPROTO_UDP, udp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"UDP");
+SYSCTL_NODE(_net_inet, IPPROTO_UDPLITE, udplite, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+ "UDP-Lite");
SYSCTL_NODE(_net_inet, IPPROTO_TCP, tcp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"TCP");
#if defined(SCTP) || defined(SCTP_SUPPORT)
diff --git a/sys/netinet/libalias/alias_db.c b/sys/netinet/libalias/alias_db.c
index c143d74a2f45..41f0a328daec 100644
--- a/sys/netinet/libalias/alias_db.c
+++ b/sys/netinet/libalias/alias_db.c
@@ -2181,7 +2181,7 @@ LibAliasInit(struct libalias *la)
#undef malloc /* XXX: ugly */
la = malloc(sizeof *la, M_ALIAS, M_WAITOK | M_ZERO);
#else
- la = calloc(sizeof *la, 1);
+ la = calloc(1, sizeof *la);
if (la == NULL)
return (la);
#endif
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index 66070faf97e9..bfe608be6b36 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -680,7 +680,6 @@ rip_ctloutput(struct socket *so, struct sockopt *sopt)
break;
case IP_DUMMYNET3: /* generic dummynet v.3 functions */
- case IP_DUMMYNET_GET:
if (ip_dn_ctl_ptr != NULL)
error = ip_dn_ctl_ptr(sopt);
else
@@ -747,9 +746,6 @@ rip_ctloutput(struct socket *so, struct sockopt *sopt)
break;
case IP_DUMMYNET3: /* generic dummynet v.3 functions */
- case IP_DUMMYNET_CONFIGURE:
- case IP_DUMMYNET_DEL:
- case IP_DUMMYNET_FLUSH:
if (ip_dn_ctl_ptr != NULL)
error = ip_dn_ctl_ptr(sopt);
else
diff --git a/sys/netinet/siftr.c b/sys/netinet/siftr.c
index 374b5595fcbc..5b89ca026e85 100644
--- a/sys/netinet/siftr.c
+++ b/sys/netinet/siftr.c
@@ -519,7 +519,7 @@ siftr_pkt_manager_thread(void *arg)
if (log_buf != NULL) {
alq_post_flags(siftr_alq, log_buf, 0);
}
- for (;cnt > 0; cnt--) {
+ for (; cnt > 0; cnt--) {
pkt_node = STAILQ_FIRST(&tmp_pkt_queue);
STAILQ_REMOVE_HEAD(&tmp_pkt_queue, nodes);
free(pkt_node, M_SIFTR_PKTNODE);
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
index 41a49b318cd5..cab7d1c5e726 100644
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -541,7 +541,6 @@ struct tcp_log_user {
#define TCP_HYBRID_PACING_H_MS 0x0008 /* A client hint for maxseg is present */
#define TCP_HYBRID_PACING_ENABLE 0x0010 /* We are enabling hybrid pacing else disable */
#define TCP_HYBRID_PACING_S_MSS 0x0020 /* Clent wants us to set the mss overriding gp est in CU */
-#define TCP_HAS_PLAYOUT_MS 0x0040 /* The client included the chunk playout milliseconds: deprecate */
/* the below are internal only flags */
#define TCP_HYBRID_PACING_USER_MASK 0x0FFF /* Non-internal flags mask */
#define TCP_HYBRID_PACING_SETMSS 0x1000 /* Internal flag that tells us we set the mss on this entry */
diff --git a/sys/netinet/tcp_hpts_test.c b/sys/netinet/tcp_hpts_test.c
index bab5827e0572..c5dc9cb5b03b 100644
--- a/sys/netinet/tcp_hpts_test.c
+++ b/sys/netinet/tcp_hpts_test.c
@@ -27,6 +27,7 @@
#include <tests/ktest.h>
#include <sys/cdefs.h>
+#include "opt_inet.h"
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/interrupt.h>
@@ -119,6 +120,8 @@ SYSCTL_INT(_net_inet_tcp_hpts_test, OID_AUTO, exit_on_failure, CTLFLAG_RW,
} \
} while (0)
+#ifdef TCP_HPTS_KTEST
+
static void
dump_hpts_entry(struct ktest_test_context *ctx, struct tcp_hpts_entry *hpts)
{
@@ -1658,5 +1661,22 @@ static const struct ktest_test_info tests[] = {
KTEST_INFO(generation_count_validation),
};
+#else /* TCP_HPTS_KTEST */
+
+/*
+ * Stub to indicate that the TCP HPTS ktest is not enabled.
+ */
+KTEST_FUNC(module_load_without_tests)
+{
+ KTEST_LOG(ctx, "Warning: TCP HPTS ktest is not enabled");
+ return (0);
+}
+
+static const struct ktest_test_info tests[] = {
+ KTEST_INFO(module_load_without_tests),
+};
+
+#endif
+
KTEST_MODULE_DECLARE(ktest_tcphpts, tests);
KTEST_MODULE_DEPEND(ktest_tcphpts, tcphpts);
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index dd27ec77c1af..9c58c2815d13 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -219,7 +219,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_do_autorcvbuf), 0,
"Enable automatic receive buffer sizing");
-VNET_DEFINE(int, tcp_autorcvbuf_max) = 2*1024*1024;
+VNET_DEFINE(int, tcp_autorcvbuf_max) = 8*1024*1024;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_autorcvbuf_max), 0,
"Max size of automatic receive buffer");
@@ -1192,11 +1192,10 @@ tfo_socket_result:
if (thflags & TH_ACK) {
if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: Listen socket: "
- "SYN|ACK invalid, segment rejected\n",
+ "SYN|ACK invalid, segment ignored\n",
s, __func__);
- syncache_badack(&inc, port); /* XXX: Not needed! */
TCPSTAT_INC(tcps_badsyn);
- goto dropwithreset;
+ goto dropunlock;
}
/*
* If the drop_synfin option is enabled, drop all
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index 2dfb7faf56e3..208f72c4661c 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -123,7 +123,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_inc, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_autosndbuf_inc), 0,
"Incrementor step size of automatic send buffer");
-VNET_DEFINE(int, tcp_autosndbuf_max) = 2*1024*1024;
+VNET_DEFINE(int, tcp_autosndbuf_max) = 8*1024*1024;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_autosndbuf_max), 0,
"Max size of automatic send buffer");
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index 66983edcdd73..10383bc0801e 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -477,7 +477,7 @@ bbr_log_rtt_shrinks(struct tcp_bbr *bbr, uint32_t cts, uint32_t applied,
uint16_t set);
static struct bbr_sendmap *
bbr_find_lowest_rsm(struct tcp_bbr *bbr);
-static __inline uint32_t
+static inline uint32_t
bbr_get_rtt(struct tcp_bbr *bbr, int32_t rtt_type);
static void
bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t pacing_delay,
@@ -1841,7 +1841,7 @@ bbr_counter_destroy(void)
}
-static __inline void
+static inline void
bbr_fill_in_logging_data(struct tcp_bbr *bbr, struct tcp_log_bbr *l, uint32_t cts)
{
memset(l, 0, sizeof(union tcp_log_stackspecific));
@@ -4206,7 +4206,7 @@ bbr_calc_thresh_tlp(struct tcpcb *tp, struct tcp_bbr *bbr,
/*
* Return one of three RTTs to use (in microseconds).
*/
-static __inline uint32_t
+static inline uint32_t
bbr_get_rtt(struct tcp_bbr *bbr, int32_t rtt_type)
{
uint32_t f_rtt;
@@ -4370,7 +4370,7 @@ bbr_timeout_rack(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
return (0);
}
-static __inline void
+static inline void
bbr_clone_rsm(struct tcp_bbr *bbr, struct bbr_sendmap *nrsm, struct bbr_sendmap *rsm, uint32_t start)
{
int idx;
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index c7962b57a69e..9ed26d5a617b 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -204,10 +204,6 @@ static int32_t rack_dnd_default = 0; /* For rr_conf = 3, what is the default fo
static int32_t rack_rxt_controls = 0;
static int32_t rack_fill_cw_state = 0;
static uint8_t rack_req_measurements = 1;
-/* Attack threshold detections */
-static uint32_t rack_highest_sack_thresh_seen = 0;
-static uint32_t rack_highest_move_thresh_seen = 0;
-static uint32_t rack_merge_out_sacks_on_attack = 0;
static int32_t rack_enable_hw_pacing = 0; /* Due to CCSP keep it off by default */
static int32_t rack_hw_rate_caps = 0; /* 1; */
static int32_t rack_hw_rate_cap_per = 0; /* 0 -- off */
@@ -223,7 +219,6 @@ static int32_t rack_default_pacing_divisor = 250;
static uint16_t rack_pacing_min_seg = 0;
static int32_t rack_timely_off = 0;
-static uint32_t sad_seg_size_per = 800; /* 80.0 % */
static int32_t rack_pkt_delay = 1000;
static int32_t rack_send_a_lot_in_prr = 1;
static int32_t rack_min_to = 1000; /* Number of microsecond min timeout */
@@ -399,18 +394,6 @@ counter_u64_t rack_extended_rfo;
counter_u64_t rack_sack_proc_all;
counter_u64_t rack_sack_proc_short;
counter_u64_t rack_sack_proc_restart;
-counter_u64_t rack_sack_attacks_detected;
-counter_u64_t rack_sack_attacks_reversed;
-counter_u64_t rack_sack_attacks_suspect;
-counter_u64_t rack_sack_used_next_merge;
-counter_u64_t rack_sack_splits;
-counter_u64_t rack_sack_used_prev_merge;
-counter_u64_t rack_sack_skipped_acked;
-counter_u64_t rack_ack_total;
-counter_u64_t rack_express_sack;
-counter_u64_t rack_sack_total;
-counter_u64_t rack_move_none;
-counter_u64_t rack_move_some;
counter_u64_t rack_input_idle_reduces;
counter_u64_t rack_collapsed_win;
@@ -834,18 +817,6 @@ sysctl_rack_clear(SYSCTL_HANDLER_ARGS)
counter_u64_zero(rack_rxt_clamps_cwnd_uniq);
counter_u64_zero(rack_multi_single_eq);
counter_u64_zero(rack_proc_non_comp_ack);
- counter_u64_zero(rack_sack_attacks_detected);
- counter_u64_zero(rack_sack_attacks_reversed);
- counter_u64_zero(rack_sack_attacks_suspect);
- counter_u64_zero(rack_sack_used_next_merge);
- counter_u64_zero(rack_sack_used_prev_merge);
- counter_u64_zero(rack_sack_splits);
- counter_u64_zero(rack_sack_skipped_acked);
- counter_u64_zero(rack_ack_total);
- counter_u64_zero(rack_express_sack);
- counter_u64_zero(rack_sack_total);
- counter_u64_zero(rack_move_none);
- counter_u64_zero(rack_move_some);
counter_u64_zero(rack_try_scwnd);
counter_u64_zero(rack_collapsed_win);
counter_u64_zero(rack_collapsed_win_rxt);
@@ -872,7 +843,6 @@ static void
rack_init_sysctls(void)
{
struct sysctl_oid *rack_counters;
- struct sysctl_oid *rack_attack;
struct sysctl_oid *rack_pacing;
struct sysctl_oid *rack_timely;
struct sysctl_oid *rack_timers;
@@ -883,12 +853,6 @@ rack_init_sysctls(void)
struct sysctl_oid *rack_probertt;
struct sysctl_oid *rack_hw_pacing;
- rack_attack = SYSCTL_ADD_NODE(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_sysctl_root),
- OID_AUTO,
- "sack_attack",
- CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
- "Rack Sack Attack Counters and Controls");
rack_counters = SYSCTL_ADD_NODE(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_sysctl_root),
OID_AUTO,
@@ -1535,11 +1499,6 @@ rack_init_sysctls(void)
"Do not disturb default for rack_rrr = 3");
SYSCTL_ADD_S32(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_misc),
- OID_AUTO, "sad_seg_per", CTLFLAG_RW,
- &sad_seg_size_per, 800,
- "Percentage of segment size needed in a sack 800 = 80.0?");
- SYSCTL_ADD_S32(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_misc),
OID_AUTO, "rxt_controls", CTLFLAG_RW,
&rack_rxt_controls, 0,
"Retransmit sending size controls (valid values 0, 1, 2 default=1)?");
@@ -1619,85 +1578,6 @@ rack_init_sysctls(void)
&rack_autosndbuf_inc, 20,
"What percentage should rack scale up its snd buffer by?");
-
- /* Sack Attacker detection stuff */
- SYSCTL_ADD_U32(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_attack),
- OID_AUTO, "merge_out", CTLFLAG_RW,
- &rack_merge_out_sacks_on_attack, 0,
- "Do we merge the sendmap when we decide we are being attacked?");
-
- SYSCTL_ADD_U32(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_attack),
- OID_AUTO, "detect_highsackratio", CTLFLAG_RW,
- &rack_highest_sack_thresh_seen, 0,
- "Highest sack to ack ratio seen");
- SYSCTL_ADD_U32(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_attack),
- OID_AUTO, "detect_highmoveratio", CTLFLAG_RW,
- &rack_highest_move_thresh_seen, 0,
- "Highest move to non-move ratio seen");
- rack_ack_total = counter_u64_alloc(M_WAITOK);
- SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_attack),
- OID_AUTO, "acktotal", CTLFLAG_RD,
- &rack_ack_total,
- "Total number of Ack's");
- rack_express_sack = counter_u64_alloc(M_WAITOK);
- SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_attack),
- OID_AUTO, "exp_sacktotal", CTLFLAG_RD,
- &rack_express_sack,
- "Total expresss number of Sack's");
- rack_sack_total = counter_u64_alloc(M_WAITOK);
- SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_attack),
- OID_AUTO, "sacktotal", CTLFLAG_RD,
- &rack_sack_total,
- "Total number of SACKs");
- rack_move_none = counter_u64_alloc(M_WAITOK);
- SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_attack),
- OID_AUTO, "move_none", CTLFLAG_RD,
- &rack_move_none,
- "Total number of SACK index reuse of positions under threshold");
- rack_move_some = counter_u64_alloc(M_WAITOK);
- SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_attack),
- OID_AUTO, "move_some", CTLFLAG_RD,
- &rack_move_some,
- "Total number of SACK index reuse of positions over threshold");
- rack_sack_attacks_detected = counter_u64_alloc(M_WAITOK);
- SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_attack),
- OID_AUTO, "attacks", CTLFLAG_RD,
- &rack_sack_attacks_detected,
- "Total number of SACK attackers that had sack disabled");
- rack_sack_attacks_reversed = counter_u64_alloc(M_WAITOK);
- SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_attack),
- OID_AUTO, "reversed", CTLFLAG_RD,
- &rack_sack_attacks_reversed,
- "Total number of SACK attackers that were later determined false positive");
- rack_sack_attacks_suspect = counter_u64_alloc(M_WAITOK);
- SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_attack),
- OID_AUTO, "suspect", CTLFLAG_RD,
- &rack_sack_attacks_suspect,
- "Total number of SACKs that triggered early detection");
-
- rack_sack_used_next_merge = counter_u64_alloc(M_WAITOK);
- SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_attack),
- OID_AUTO, "nextmerge", CTLFLAG_RD,
- &rack_sack_used_next_merge,
- "Total number of times we used the next merge");
- rack_sack_used_prev_merge = counter_u64_alloc(M_WAITOK);
- SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_attack),
- OID_AUTO, "prevmerge", CTLFLAG_RD,
- &rack_sack_used_prev_merge,
- "Total number of times we used the prev merge");
/* Counters */
rack_total_bytes = counter_u64_alloc(M_WAITOK);
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
@@ -1908,18 +1788,6 @@ rack_init_sysctls(void)
OID_AUTO, "sack_short", CTLFLAG_RD,
&rack_sack_proc_short,
"Total times we took shortcut for sack processing");
- rack_sack_skipped_acked = counter_u64_alloc(M_WAITOK);
- SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_attack),
- OID_AUTO, "skipacked", CTLFLAG_RD,
- &rack_sack_skipped_acked,
- "Total number of times we skipped previously sacked");
- rack_sack_splits = counter_u64_alloc(M_WAITOK);
- SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
- SYSCTL_CHILDREN(rack_attack),
- OID_AUTO, "ofsplit", CTLFLAG_RD,
- &rack_sack_splits,
- "Total number of times we did the old fashion tree split");
rack_input_idle_reduces = counter_u64_alloc(M_WAITOK);
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_counters),
@@ -3319,16 +3187,6 @@ rack_counter_destroy(void)
counter_u64_free(rack_hw_pace_lost);
counter_u64_free(rack_non_fto_send);
counter_u64_free(rack_extended_rfo);
- counter_u64_free(rack_ack_total);
- counter_u64_free(rack_express_sack);
- counter_u64_free(rack_sack_total);
- counter_u64_free(rack_move_none);
- counter_u64_free(rack_move_some);
- counter_u64_free(rack_sack_attacks_detected);
- counter_u64_free(rack_sack_attacks_reversed);
- counter_u64_free(rack_sack_attacks_suspect);
- counter_u64_free(rack_sack_used_next_merge);
- counter_u64_free(rack_sack_used_prev_merge);
counter_u64_free(rack_tlp_tot);
counter_u64_free(rack_tlp_newdata);
counter_u64_free(rack_tlp_retran);
@@ -3351,8 +3209,6 @@ rack_counter_destroy(void)
counter_u64_free(rack_sack_proc_all);
counter_u64_free(rack_sack_proc_restart);
counter_u64_free(rack_sack_proc_short);
- counter_u64_free(rack_sack_skipped_acked);
- counter_u64_free(rack_sack_splits);
counter_u64_free(rack_input_idle_reduces);
counter_u64_free(rack_collapsed_win);
counter_u64_free(rack_collapsed_win_rxt);
@@ -4730,7 +4586,7 @@ rack_make_timely_judgement(struct tcp_rack *rack, uint32_t rtt, int32_t rtt_diff
return (timely_says);
}
-static __inline int
+static inline int
rack_in_gp_window(struct tcpcb *tp, struct rack_sendmap *rsm)
{
if (SEQ_GEQ(rsm->r_start, tp->gput_seq) &&
@@ -4767,7 +4623,7 @@ rack_in_gp_window(struct tcpcb *tp, struct rack_sendmap *rsm)
return (0);
}
-static __inline void
+static inline void
rack_mark_in_gp_win(struct tcpcb *tp, struct rack_sendmap *rsm)
{
@@ -4784,7 +4640,7 @@ rack_mark_in_gp_win(struct tcpcb *tp, struct rack_sendmap *rsm)
rsm->r_flags &= ~RACK_IN_GP_WIN;
}
-static __inline void
+static inline void
rack_clear_gp_marks(struct tcpcb *tp, struct tcp_rack *rack)
{
/* A GP measurement is ending, clear all marks on the send map*/
@@ -4802,7 +4658,7 @@ rack_clear_gp_marks(struct tcpcb *tp, struct tcp_rack *rack)
}
-static __inline void
+static inline void
rack_tend_gp_marks(struct tcpcb *tp, struct tcp_rack *rack)
{
struct rack_sendmap *rsm = NULL;
@@ -6864,6 +6720,18 @@ rack_mark_lost(struct tcpcb *tp,
}
}
+static inline void
+rack_mark_nolonger_lost(struct tcp_rack *rack, struct rack_sendmap *rsm)
+{
+ KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)),
+ ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack));
+ rsm->r_flags &= ~RACK_WAS_LOST;
+ if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start))
+ rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start;
+ else
+ rack->r_ctl.rc_considered_lost = 0;
+}
+
/*
* RACK Timer, here we simply do logging and house keeping.
* the normal rack_output() function will call the
@@ -7005,7 +6873,7 @@ rack_setup_offset_for_rsm(struct tcp_rack *rack, struct rack_sendmap *src_rsm, s
rsm->orig_t_space = M_TRAILINGROOM(rsm->m);
}
-static __inline void
+static inline void
rack_clone_rsm(struct tcp_rack *rack, struct rack_sendmap *nrsm,
struct rack_sendmap *rsm, uint32_t start)
{
@@ -8130,13 +7998,7 @@ rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack,
* remove the lost desgination and reduce the
* bytes considered lost.
*/
- rsm->r_flags &= ~RACK_WAS_LOST;
- KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)),
- ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack));
- if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start))
- rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start;
- else
- rack->r_ctl.rc_considered_lost = 0;
+ rack_mark_nolonger_lost(rack, rsm);
}
idx = rsm->r_rtr_cnt - 1;
rsm->r_tim_lastsent[idx] = ts;
@@ -9492,6 +9354,11 @@ do_rest_ofb:
if (rsm->r_flags & RACK_WAS_LOST) {
int my_chg;
+ /*
+ * Note here we do not use our rack_mark_nolonger_lost() function
+ * since we are moving our data pointer around and the
+ * ack'ed side is already not considered lost.
+ */
my_chg = (nrsm->r_end - nrsm->r_start);
KASSERT((rack->r_ctl.rc_considered_lost >= my_chg),
("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack));
@@ -9531,7 +9398,6 @@ do_rest_ofb:
goto out;
}
rack_log_map_chg(tp, rack, &stack_map, rsm, next, MAP_SACK_M1, end, __LINE__);
- counter_u64_add(rack_sack_used_next_merge, 1);
/* Postion for the next block */
start = next->r_end;
rsm = tqhash_next(rack->r_ctl.tqh, next);
@@ -9563,7 +9429,6 @@ do_rest_ofb:
*/
goto out;
}
- counter_u64_add(rack_sack_splits, 1);
rack_clone_rsm(rack, nrsm, rsm, start);
rsm->r_just_ret = 0;
#ifndef INVARIANTS
@@ -9585,7 +9450,6 @@ do_rest_ofb:
}
} else {
/* Already sacked this piece */
- counter_u64_add(rack_sack_skipped_acked, 1);
if (end == rsm->r_end) {
/* Done with block */
rsm = tqhash_next(rack->r_ctl.tqh, rsm);
@@ -9659,16 +9523,11 @@ do_rest_ofb:
changed += (rsm->r_end - rsm->r_start);
/* You get a count for acking a whole segment or more */
if (rsm->r_flags & RACK_WAS_LOST) {
- int my_chg;
-
- my_chg = (rsm->r_end - rsm->r_start);
- rsm->r_flags &= ~RACK_WAS_LOST;
- KASSERT((rack->r_ctl.rc_considered_lost >= my_chg),
- ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack));
- if (my_chg <= rack->r_ctl.rc_considered_lost)
- rack->r_ctl.rc_considered_lost -= my_chg;
- else
- rack->r_ctl.rc_considered_lost = 0;
+ /*
+ * Here we can use the inline function since
+ * the rsm is truly marked lost and now no longer lost.
+ */
+ rack_mark_nolonger_lost(rack, rsm);
}
rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start);
if (rsm->r_in_tmap) /* should be true */
@@ -9690,8 +9549,6 @@ do_rest_ofb:
rsm->r_in_tmap = 0;
}
rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_SACK_M3, end, __LINE__);
- } else {
- counter_u64_add(rack_sack_skipped_acked, 1);
}
if (end == rsm->r_end) {
/* This block only - done, setup for next */
@@ -9851,6 +9708,10 @@ do_rest_ofb:
if (rsm->r_flags & RACK_WAS_LOST) {
int my_chg;
+ /*
+ * Note here we are using hookery again so we can't
+ * use our rack_mark_nolonger_lost() function.
+ */
my_chg = (nrsm->r_end - nrsm->r_start);
KASSERT((rack->r_ctl.rc_considered_lost >= my_chg),
("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack));
@@ -9866,7 +9727,6 @@ do_rest_ofb:
}
rack_log_map_chg(tp, rack, prev, &stack_map, rsm, MAP_SACK_M4, end, __LINE__);
rsm = prev;
- counter_u64_add(rack_sack_used_prev_merge, 1);
} else {
/**
* This is the case where our previous
@@ -9931,7 +9791,6 @@ do_rest_ofb:
* rsm |---| (acked)
* nrsm |------| (not acked)
*/
- counter_u64_add(rack_sack_splits, 1);
rack_clone_rsm(rack, nrsm, rsm, end);
rsm->r_flags &= (~RACK_HAS_FIN);
rsm->r_just_ret = 0;
@@ -9952,16 +9811,10 @@ do_rest_ofb:
rack_update_rtt(tp, rack, rsm, to, cts, SACKED, 0);
changed += (rsm->r_end - rsm->r_start);
if (rsm->r_flags & RACK_WAS_LOST) {
- int my_chg;
-
- my_chg = (rsm->r_end - rsm->r_start);
- rsm->r_flags &= ~RACK_WAS_LOST;
- KASSERT((rack->r_ctl.rc_considered_lost >= my_chg),
- ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack));
- if (my_chg <= rack->r_ctl.rc_considered_lost)
- rack->r_ctl.rc_considered_lost -= my_chg;
- else
- rack->r_ctl.rc_considered_lost = 0;
+ /*
+ * Here it is safe to use our function.
+ */
+ rack_mark_nolonger_lost(rack, rsm);
}
rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start);
@@ -9985,11 +9838,6 @@ do_rest_ofb:
rsm->r_in_tmap = 0;
}
}
- } else if (start != end){
- /*
- * The block was already acked.
- */
- counter_u64_add(rack_sack_skipped_acked, 1);
}
out:
if (rsm &&
@@ -10362,13 +10210,7 @@ more:
* and yet before retransmitting we get an ack
* which can happen due to reordering.
*/
- rsm->r_flags &= ~RACK_WAS_LOST;
- KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)),
- ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack));
- if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start))
- rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start;
- else
- rack->r_ctl.rc_considered_lost = 0;
+ rack_mark_nolonger_lost(rack, rsm);
}
rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_FREE, rsm->r_end, __LINE__);
rack->r_ctl.rc_holes_rxt -= rsm->r_rtr_bytes;
@@ -10476,12 +10318,7 @@ more:
* which can happen due to reordering. In this
* case its only a partial ack of the send.
*/
- KASSERT((rack->r_ctl.rc_considered_lost >= (th_ack - rsm->r_start)),
- ("rsm:%p rack:%p rc_considered_lost goes negative th_ack:%u", rsm, rack, th_ack));
- if (rack->r_ctl.rc_considered_lost >= (th_ack - rsm->r_start))
- rack->r_ctl.rc_considered_lost -= th_ack - rsm->r_start;
- else
- rack->r_ctl.rc_considered_lost = 0;
+ rack_mark_nolonger_lost(rack, rsm);
}
/*
* Clear the dup ack count for
@@ -10793,17 +10630,6 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered
changed = 0;
th_ack = th->th_ack;
segsiz = ctf_fixed_maxseg(rack->rc_tp);
- if (BYTES_THIS_ACK(tp, th) >= segsiz) {
- /*
- * You only get credit for
- * MSS and greater (and you get extra
- * credit for larger cum-ack moves).
- */
- int ac;
-
- ac = BYTES_THIS_ACK(tp, th) / ctf_fixed_maxseg(rack->rc_tp);
- counter_u64_add(rack_ack_total, ac);
- }
if (SEQ_GT(th_ack, tp->snd_una)) {
rack_log_progress_event(rack, tp, ticks, PROGRESS_UPDATE, __LINE__);
tp->t_acktime = ticks;
@@ -10875,8 +10701,8 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered
if (sacks_seen != NULL)
*sacks_seen = num_sack_blks;
if (num_sack_blks == 0) {
- /* Nothing to sack, but we need to update counts */
- goto out_with_totals;
+ /* Nothing to sack */
+ goto out;
}
/* Its a sack of some sort */
if (num_sack_blks < 2) {
@@ -10899,7 +10725,7 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered
*/
again:
if (num_sack_blks == 0)
- goto out_with_totals;
+ goto out;
if (num_sack_blks > 1) {
for (i = 0; i < num_sack_blks; i++) {
for (j = i + 1; j < num_sack_blks; j++) {
@@ -10952,19 +10778,7 @@ do_sack_work:
changed += acked;
}
if (num_sack_blks == 1) {
- /*
- * This is what we would expect from
- * a normal implementation to happen
- * after we have retransmitted the FR,
- * i.e the sack-filter pushes down
- * to 1 block and the next to be retransmitted
- * is the sequence in the sack block (has more
- * are acked). Count this as ACK'd data to boost
- * up the chances of recovering any false positives.
- */
- counter_u64_add(rack_ack_total, (acked / ctf_fixed_maxseg(rack->rc_tp)));
- counter_u64_add(rack_express_sack, 1);
- goto out_with_totals;
+ goto out;
} else {
/*
* Start the loop through the
@@ -10973,7 +10787,6 @@ do_sack_work:
loop_start = 1;
}
}
- counter_u64_add(rack_sack_total, 1);
rsm = rack->r_ctl.rc_sacklast;
for (i = loop_start; i < num_sack_blks; i++) {
acked = rack_proc_sack_blk(tp, rack, &sack_blocks[i], to, &rsm, cts, segsiz);
@@ -10982,18 +10795,6 @@ do_sack_work:
changed += acked;
}
}
-out_with_totals:
- if (num_sack_blks > 1) {
- /*
- * You get an extra stroke if
- * you have more than one sack-blk, this
- * could be where we are skipping forward
- * and the sack-filter is still working, or
- * it could be an attacker constantly
- * moving us.
- */
- counter_u64_add(rack_move_some, 1);
- }
out:
if (changed) {
/* Something changed cancel the rack timer */
@@ -14713,7 +14514,6 @@ rack_init(struct tcpcb *tp, void **ptr)
rack->r_ctl.rc_prr_sendalot = rack_send_a_lot_in_prr;
rack->r_ctl.rc_min_to = rack_min_to;
microuptime(&rack->r_ctl.act_rcv_time);
- rack->r_ctl.rc_last_time_decay = rack->r_ctl.act_rcv_time;
rack->r_ctl.rack_per_of_gp_ss = rack_per_of_gp_ss;
if (rack_hw_up_only)
rack->r_up_only = 1;
diff --git a/sys/netinet/tcp_stacks/tcp_rack.h b/sys/netinet/tcp_stacks/tcp_rack.h
index 144b4fabf7eb..cac17d9aeb50 100644
--- a/sys/netinet/tcp_stacks/tcp_rack.h
+++ b/sys/netinet/tcp_stacks/tcp_rack.h
@@ -462,7 +462,6 @@ struct rack_control {
uint64_t rc_gp_output_ts; /* chg*/
uint64_t rc_gp_cumack_ts; /* chg*/
struct timeval act_rcv_time;
- struct timeval rc_last_time_decay; /* SAD time decay happened here */
uint64_t gp_bw;
uint64_t init_rate;
#ifdef NETFLIX_SHARED_CWND
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index c817c79881d6..b6f428b279b3 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -607,7 +607,7 @@ tcp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *inp,
}
}
m->m_pkthdr.tcp_tun_port = port = uh->uh_sport;
- bcopy(th, uh, m->m_len - off);
+ bcopy(th, uh, m->m_len - off - sizeof(struct udphdr));
m->m_len -= sizeof(struct udphdr);
m->m_pkthdr.len -= sizeof(struct udphdr);
/*
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index f842a5678fa1..fa7035771714 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -713,23 +713,6 @@ done:
}
void
-syncache_badack(struct in_conninfo *inc, uint16_t port)
-{
- struct syncache *sc;
- struct syncache_head *sch;
-
- if (syncache_cookiesonly())
- return;
- sc = syncache_lookup(inc, &sch); /* returns locked sch */
- SCH_LOCK_ASSERT(sch);
- if ((sc != NULL) && (sc->sc_port == port)) {
- syncache_drop(sc, sch);
- TCPSTAT_INC(tcps_sc_badack);
- }
- SCH_UNLOCK(sch);
-}
-
-void
syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq, uint16_t port)
{
struct syncache *sc;
@@ -1046,6 +1029,8 @@ abort:
*
* On syncache_socket() success the newly created socket
* has its underlying inp locked.
+ *
+ * *lsop is updated, if and only if 1 is returned.
*/
int
syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
@@ -1094,12 +1079,14 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
*/
SCH_UNLOCK(sch);
TCPSTAT_INC(tcps_sc_spurcookie);
- if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: Spurious ACK, "
"segment rejected "
"(syncookies disabled)\n",
s, __func__);
- goto failed;
+ free(s, M_TCPLOG);
+ }
+ return (0);
}
if (sch->sch_last_overflow <
time_uptime - SYNCOOKIE_LIFETIME) {
@@ -1109,12 +1096,14 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
*/
SCH_UNLOCK(sch);
TCPSTAT_INC(tcps_sc_spurcookie);
- if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: Spurious ACK, "
"segment rejected "
"(no syncache entry)\n",
s, __func__);
- goto failed;
+ free(s, M_TCPLOG);
+ }
+ return (0);
}
SCH_UNLOCK(sch);
}
@@ -1128,11 +1117,13 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
TCPSTAT_INC(tcps_sc_recvcookie);
} else {
TCPSTAT_INC(tcps_sc_failcookie);
- if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: Segment failed "
"SYNCOOKIE authentication, segment rejected "
"(probably spoofed)\n", s, __func__);
- goto failed;
+ free(s, M_TCPLOG);
+ }
+ return (0);
}
#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
/* If received ACK has MD5 signature, check it. */
@@ -1160,7 +1151,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
/*
* If listening socket requested TCP digests, check that
* received ACK has signature and it is correct.
- * If not, drop the ACK and leave sc entry in th cache,
+ * If not, drop the ACK and leave sc entry in the cache,
* because SYN was received with correct signature.
*/
if (sc->sc_flags & SCF_SIGNATURE) {
@@ -1206,9 +1197,9 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
"%s; %s: SEG.TSval %u < TS.Recent %u, "
"segment dropped\n", s, __func__,
to->to_tsval, sc->sc_tsreflect);
- free(s, M_TCPLOG);
}
SCH_UNLOCK(sch);
+ free(s, M_TCPLOG);
return (-1); /* Do not send RST */
}
@@ -1225,7 +1216,6 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
"expected, segment processed normally\n",
s, __func__);
free(s, M_TCPLOG);
- s = NULL;
}
}
@@ -1312,16 +1302,6 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
if (sc != &scs)
syncache_free(sc);
return (1);
-failed:
- if (sc != NULL) {
- TCPSTATES_DEC(TCPS_SYN_RECEIVED);
- if (sc != &scs)
- syncache_free(sc);
- }
- if (s != NULL)
- free(s, M_TCPLOG);
- *lsop = NULL;
- return (0);
}
static struct socket *
@@ -1383,6 +1363,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
struct tcpcb *tp;
struct socket *rv = NULL;
struct syncache *sc = NULL;
+ struct ucred *cred;
struct syncache_head *sch;
struct mbuf *ipopts = NULL;
u_int ltflags;
@@ -1411,6 +1392,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
*/
KASSERT(SOLISTENING(so), ("%s: %p not listening", __func__, so));
tp = sototcpcb(so);
+ cred = V_tcp_syncache.see_other ? NULL : crhold(so->so_cred);
#ifdef INET6
if (inc->inc_flags & INC_ISIPV6) {
@@ -1639,16 +1621,16 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
/*
* sc_cred is only used in syncache_pcblist() to list TCP endpoints in
* TCPS_SYN_RECEIVED state when V_tcp_syncache.see_other is false.
- * Therefore, store the credentials and take a reference count only
- * when needed:
+ * Therefore, store the credentials only when needed:
* - sc is allocated from the zone and not using the on stack instance.
* - the sysctl variable net.inet.tcp.syncache.see_other is false.
* The reference count is decremented when a zone allocated sc is
* freed in syncache_free().
*/
- if (sc != &scs && !V_tcp_syncache.see_other)
- sc->sc_cred = crhold(so->so_cred);
- else
+ if (sc != &scs && !V_tcp_syncache.see_other) {
+ sc->sc_cred = cred;
+ cred = NULL;
+ } else
sc->sc_cred = NULL;
sc->sc_port = port;
sc->sc_ipopts = ipopts;
@@ -1786,6 +1768,8 @@ donenoprobe:
tcp_fastopen_decrement_counter(tfo_pending);
tfo_expanded:
+ if (cred != NULL)
+ crfree(cred);
if (sc == NULL || sc == &scs) {
#ifdef MAC
mac_syncache_destroy(&maclabel);
diff --git a/sys/netinet/tcp_syncache.h b/sys/netinet/tcp_syncache.h
index 37f6ff3d6ca9..c916b4de6ae0 100644
--- a/sys/netinet/tcp_syncache.h
+++ b/sys/netinet/tcp_syncache.h
@@ -45,7 +45,6 @@ struct socket * syncache_add(struct in_conninfo *, struct tcpopt *,
void *, void *, uint8_t, uint16_t);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *, struct mbuf *,
uint16_t);
-void syncache_badack(struct in_conninfo *, uint16_t);
int syncache_pcblist(struct sysctl_req *);
struct syncache {
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index 3b9fe7a317b0..57c57666fa3a 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -513,9 +513,12 @@ tcp_timer_persist(struct tcpcb *tp)
if (progdrop || (tp->t_rxtshift >= V_tcp_retries &&
(ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff))) {
- if (!progdrop)
+ if (progdrop) {
+ tcp_log_end_status(tp, TCP_EI_STATUS_PROGRESS);
+ } else {
TCPSTAT_INC(tcps_persistdrop);
- tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX);
+ tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX);
+ }
goto dropit;
}
/*
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 98c934955121..4d1a6455d09e 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -2799,258 +2799,6 @@ db_print_tstate(int t_state)
}
static void
-db_print_tflags(u_int t_flags)
-{
- int comma;
-
- comma = 0;
- if (t_flags & TF_ACKNOW) {
- db_printf("%sTF_ACKNOW", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_DELACK) {
- db_printf("%sTF_DELACK", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_NODELAY) {
- db_printf("%sTF_NODELAY", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_NOOPT) {
- db_printf("%sTF_NOOPT", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_SENTFIN) {
- db_printf("%sTF_SENTFIN", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_REQ_SCALE) {
- db_printf("%sTF_REQ_SCALE", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_RCVD_SCALE) {
- db_printf("%sTF_RECVD_SCALE", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_REQ_TSTMP) {
- db_printf("%sTF_REQ_TSTMP", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_RCVD_TSTMP) {
- db_printf("%sTF_RCVD_TSTMP", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_SACK_PERMIT) {
- db_printf("%sTF_SACK_PERMIT", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_NEEDSYN) {
- db_printf("%sTF_NEEDSYN", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_NEEDFIN) {
- db_printf("%sTF_NEEDFIN", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_NOPUSH) {
- db_printf("%sTF_NOPUSH", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_PREVVALID) {
- db_printf("%sTF_PREVVALID", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_WAKESOR) {
- db_printf("%sTF_WAKESOR", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_GPUTINPROG) {
- db_printf("%sTF_GPUTINPROG", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_MORETOCOME) {
- db_printf("%sTF_MORETOCOME", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_SONOTCONN) {
- db_printf("%sTF_SONOTCONN", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_LASTIDLE) {
- db_printf("%sTF_LASTIDLE", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_RXWIN0SENT) {
- db_printf("%sTF_RXWIN0SENT", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_FASTRECOVERY) {
- db_printf("%sTF_FASTRECOVERY", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_WASFRECOVERY) {
- db_printf("%sTF_WASFRECOVERY", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_SIGNATURE) {
- db_printf("%sTF_SIGNATURE", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_FORCEDATA) {
- db_printf("%sTF_FORCEDATA", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_TSO) {
- db_printf("%sTF_TSO", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_TOE) {
- db_printf("%sTF_TOE", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_CLOSED) {
- db_printf("%sTF_CLOSED", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_SENTSYN) {
- db_printf("%sTF_SENTSYN", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_LRD) {
- db_printf("%sTF_LRD", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_CONGRECOVERY) {
- db_printf("%sTF_CONGRECOVERY", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_WASCRECOVERY) {
- db_printf("%sTF_WASCRECOVERY", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags & TF_FASTOPEN) {
- db_printf("%sTF_FASTOPEN", comma ? ", " : "");
- comma = 1;
- }
-}
-
-static void
-db_print_tflags2(u_int t_flags2)
-{
- int comma;
-
- comma = 0;
- if (t_flags2 & TF2_PLPMTU_BLACKHOLE) {
- db_printf("%sTF2_PLPMTU_BLACKHOLE", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_PLPMTU_PMTUD) {
- db_printf("%sTF2_PLPMTU_PMTUD", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_PLPMTU_MAXSEGSNT) {
- db_printf("%sTF2_PLPMTU_MAXSEGSNT", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_LOG_AUTO) {
- db_printf("%sTF2_LOG_AUTO", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_DROP_AF_DATA) {
- db_printf("%sTF2_DROP_AF_DATA", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_ECN_PERMIT) {
- db_printf("%sTF2_ECN_PERMIT", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_ECN_SND_CWR) {
- db_printf("%sTF2_ECN_SND_CWR", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_ECN_SND_ECE) {
- db_printf("%sTF2_ECN_SND_ECE", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_ACE_PERMIT) {
- db_printf("%sTF2_ACE_PERMIT", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_HPTS_CPU_SET) {
- db_printf("%sTF2_HPTS_CPU_SET", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_FBYTES_COMPLETE) {
- db_printf("%sTF2_FBYTES_COMPLETE", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_ECN_USE_ECT1) {
- db_printf("%sTF2_ECN_USE_ECT1", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_TCP_ACCOUNTING) {
- db_printf("%sTF2_TCP_ACCOUNTING", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_HPTS_CALLS) {
- db_printf("%sTF2_HPTS_CALLS", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_MBUF_L_ACKS) {
- db_printf("%sTF2_MBUF_L_ACKS", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_MBUF_ACKCMP) {
- db_printf("%sTF2_MBUF_ACKCMP", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_SUPPORTS_MBUFQ) {
- db_printf("%sTF2_SUPPORTS_MBUFQ", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_MBUF_QUEUE_READY) {
- db_printf("%sTF2_MBUF_QUEUE_READY", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_DONT_SACK_QUEUE) {
- db_printf("%sTF2_DONT_SACK_QUEUE", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_CANNOT_DO_ECN) {
- db_printf("%sTF2_CANNOT_DO_ECN", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_PROC_SACK_PROHIBIT) {
- db_printf("%sTF2_PROC_SACK_PROHIBIT", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_IPSEC_TSO) {
- db_printf("%sTF2_IPSEC_TSO", comma ? ", " : "");
- comma = 1;
- }
- if (t_flags2 & TF2_NO_ISS_CHECK) {
- db_printf("%sTF2_NO_ISS_CHECK", comma ? ", " : "");
- comma = 1;
- }
-}
-
-static void
-db_print_toobflags(char t_oobflags)
-{
- int comma;
-
- comma = 0;
- if (t_oobflags & TCPOOB_HAVEDATA) {
- db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : "");
- comma = 1;
- }
- if (t_oobflags & TCPOOB_HADDATA) {
- db_printf("%sTCPOOB_HADDATA", comma ? ", " : "");
- comma = 1;
- }
-}
-
-static void
db_print_bblog_state(int state)
{
switch (state) {
@@ -3088,7 +2836,8 @@ db_print_bblog_state(int state)
}
static void
-db_print_tcpcb(struct tcpcb *tp, const char *name, int indent, bool show_bblog)
+db_print_tcpcb(struct tcpcb *tp, const char *name, int indent, bool show_bblog,
+ bool show_inpcb)
{
db_print_indent(indent);
@@ -3096,6 +2845,9 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent, bool show_bblog)
indent += 2;
+ if (show_inpcb)
+ db_print_inpcb(tptoinpcb(tp), "t_inpcb", indent);
+
db_print_indent(indent);
db_printf("t_segq first: %p t_segqlen: %d t_dupacks: %d\n",
TAILQ_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
@@ -3110,14 +2862,10 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent, bool show_bblog)
db_printf(")\n");
db_print_indent(indent);
- db_printf("t_flags: 0x%x (", tp->t_flags);
- db_print_tflags(tp->t_flags);
- db_printf(")\n");
+ db_printf("t_flags: 0x%b\n", tp->t_flags, TF_BITS);
db_print_indent(indent);
- db_printf("t_flags2: 0x%x (", tp->t_flags2);
- db_print_tflags2(tp->t_flags2);
- db_printf(")\n");
+ db_printf("t_flags2: 0x%b\n", tp->t_flags2, TF2_BITS);
db_print_indent(indent);
db_printf("snd_una: 0x%08x snd_max: 0x%08x snd_nxt: 0x%08x\n",
@@ -3164,9 +2912,8 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent, bool show_bblog)
tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror);
db_print_indent(indent);
- db_printf("t_oobflags: 0x%x (", tp->t_oobflags);
- db_print_toobflags(tp->t_oobflags);
- db_printf(") t_iobc: 0x%02x\n", tp->t_iobc);
+ db_printf("t_oobflags: 0x%b t_iobc: 0x%02x\n", tp->t_oobflags,
+ TCPOOB_BITS, tp->t_iobc);
db_print_indent(indent);
db_printf("snd_scale: %u rcv_scale: %u request_r_scale: %u\n",
@@ -3227,33 +2974,36 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent, bool show_bblog)
DB_SHOW_COMMAND(tcpcb, db_show_tcpcb)
{
struct tcpcb *tp;
- bool show_bblog;
+ bool show_bblog, show_inpcb;
if (!have_addr) {
- db_printf("usage: show tcpcb <addr>\n");
+ db_printf("usage: show tcpcb[/bi] <addr>\n");
return;
}
show_bblog = strchr(modif, 'b') != NULL;
+ show_inpcb = strchr(modif, 'i') != NULL;
tp = (struct tcpcb *)addr;
-
- db_print_tcpcb(tp, "tcpcb", 0, show_bblog);
+ db_print_tcpcb(tp, "tcpcb", 0, show_bblog, show_inpcb);
}
DB_SHOW_ALL_COMMAND(tcpcbs, db_show_all_tcpcbs)
{
VNET_ITERATOR_DECL(vnet_iter);
struct inpcb *inp;
- bool only_locked, show_bblog;
+ struct tcpcb *tp;
+ bool only_locked, show_bblog, show_inpcb;
only_locked = strchr(modif, 'l') != NULL;
show_bblog = strchr(modif, 'b') != NULL;
+ show_inpcb = strchr(modif, 'i') != NULL;
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
CK_LIST_FOREACH(inp, &V_tcbinfo.ipi_listhead, inp_list) {
if (only_locked &&
inp->inp_lock.rw_lock == RW_UNLOCKED)
continue;
- db_print_tcpcb(intotcpcb(inp), "tcpcb", 0, show_bblog);
+ tp = intotcpcb(inp);
+ db_print_tcpcb(tp, "tcpcb", 0, show_bblog, show_inpcb);
if (db_pager_quit)
break;
}
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index c3be95c80798..f9297be46af7 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -795,6 +795,17 @@ tcp_packets_this_ack(struct tcpcb *tp, tcp_seq ack)
#define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */
#define TF_FASTOPEN 0x80000000 /* TCP Fast Open indication */
+/* t_flags description for use with printf(9) %b identifier. */
+#define TF_BITS "\20" \
+ "\1TF_ACKNOW\2TF_DELACK\3TF_NODELAY\4TF_NOOPT" \
+ "\5TF_SENTFIN\6TF_REQ_SCALE\7TF_RCVD_SCALE\10TF_REQ_TSTMP" \
+ "\11TF_RCVD_TSTMP\12TF_SACK_PERMIT\13TF_NEEDSYN\14TF_NEEDFIN" \
+ "\15TF_NOPUSH\16TF_PREVVALID\17TF_WAKESOR\20TF_GPUTINPROG" \
+ "\21TF_MORETOCOME\22TF_SONOTCONN\23TF_LASTIDLE\24TF_RXWIN0SENT" \
+ "\25TF_FASTRECOVERY\26TF_WASFRECOVERY\27TF_SIGNATURE\30TF_FORCEDATA" \
+ "\31TF_TSO\32TF_TOE\33TF_CLOSED\34TF_SENTSYN" \
+ "\35TF_LRD\36TF_CONGRECOVERY\37TF_WASCRECOVERY\40TF_FASTOPEN"
+
#define IN_FASTRECOVERY(t_flags) (t_flags & TF_FASTRECOVERY)
#define ENTER_FASTRECOVERY(t_flags) t_flags |= TF_FASTRECOVERY
#define EXIT_FASTRECOVERY(t_flags) t_flags &= ~TF_FASTRECOVERY
@@ -815,6 +826,9 @@ tcp_packets_this_ack(struct tcpcb *tp, tcp_seq ack)
#define TCPOOB_HAVEDATA 0x01
#define TCPOOB_HADDATA 0x02
+/* t_oobflags description for use with printf(9) %b identifier. */
+#define TCPOOB_BITS "\20\1TCPOOB_HAVEDATA\2TCPOOB_HADDATA"
+
/*
* Flags for the extended TCP flags field, t_flags2
*/
@@ -842,6 +856,21 @@ tcp_packets_this_ack(struct tcpcb *tp, tcp_seq ack)
#define TF2_IPSEC_TSO 0x00200000 /* IPSEC + TSO supported */
#define TF2_NO_ISS_CHECK 0x00400000 /* Don't check SEG.ACK against ISS */
+/* t_flags2 description for use with printf(9) %b identifier. */
+#define TF2_BITS "\20" \
+ "\1TF2_PLPMTU_BLACKHOLE\2TF2_PLPMTU_PMTUD" \
+ "\3TF2_PLPMTU_MAXSEGSNT\4TF2_LOG_AUTO" \
+ "\5TF2_DROP_AF_DATA\6TF2_ECN_PERMIT" \
+ "\7TF2_ECN_SND_CWR\10TF2_ECN_SND_ECE" \
+ "\11TF2_ACE_PERMIT\12TF2_HPTS_CPU_SET" \
+ "\13TF2_FBYTES_COMPLETE\14TF2_ECN_USE_ECT1" \
+ "\15TF2_TCP_ACCOUNTING\16TF2_HPTS_CALLS" \
+ "\17TF2_MBUF_L_ACKS\20TF2_MBUF_ACKCMP" \
+ "\21TF2_SUPPORTS_MBUFQ\22TF2_MBUF_QUEUE_READY" \
+ "\23TF2_DONT_SACK_QUEUE\24TF2_CANNOT_DO_ECN" \
+ "\25TF2_PROC_SACK_PROHIBIT\26TF2_IPSEC_TSO" \
+ "\27TF2_NO_ISS_CHECK"
+
/*
* Structure to hold TCP options that are only used during segment
* processing (in tcp_input), but not held in the tcpcb.
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index cea8a916679b..f1d952037d5a 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -787,7 +787,8 @@ udplite_ctlinput(struct icmp *icmp)
static int
udp_pcblist(SYSCTL_HANDLER_ARGS)
{
- struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_udbinfo,
+ struct inpcbinfo *pcbinfo = udp_get_inpcbinfo(arg2);
+ struct inpcb_iterator inpi = INP_ALL_ITERATOR(pcbinfo,
INPLOOKUP_RLOCKPCB);
struct xinpgen xig;
struct inpcb *inp;
@@ -799,7 +800,7 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
if (req->oldptr == 0) {
int n;
- n = V_udbinfo.ipi_count;
+ n = pcbinfo->ipi_count;
n += imax(n / 8, 10);
req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
return (0);
@@ -810,8 +811,8 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
bzero(&xig, sizeof(xig));
xig.xig_len = sizeof xig;
- xig.xig_count = V_udbinfo.ipi_count;
- xig.xig_gen = V_udbinfo.ipi_gencnt;
+ xig.xig_count = pcbinfo->ipi_count;
+ xig.xig_gen = pcbinfo->ipi_gencnt;
xig.xig_sogen = so_gencnt;
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
@@ -838,9 +839,9 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
* that something happened while we were processing this
* request, and it might be necessary to retry.
*/
- xig.xig_gen = V_udbinfo.ipi_gencnt;
+ xig.xig_gen = pcbinfo->ipi_gencnt;
xig.xig_sogen = so_gencnt;
- xig.xig_count = V_udbinfo.ipi_count;
+ xig.xig_count = pcbinfo->ipi_count;
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
@@ -848,10 +849,15 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
}
SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist,
- CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
+ CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, IPPROTO_UDP,
udp_pcblist, "S,xinpcb",
"List of active UDP sockets");
+SYSCTL_PROC(_net_inet_udplite, OID_AUTO, pcblist,
+ CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, IPPROTO_UDPLITE,
+ udp_pcblist, "S,xinpcb",
+ "List of active UDP-Lite sockets");
+
#ifdef INET
static int
udp_getcred(SYSCTL_HANDLER_ARGS)
@@ -1166,7 +1172,19 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
else
INP_RLOCK(inp);
NET_EPOCH_ENTER(et);
+#ifdef INET6
+ if ((flags & PRUS_IPV6) != 0) {
+ if ((inp->in6p_outputopts != NULL) &&
+ (inp->in6p_outputopts->ip6po_tclass != -1))
+ tos = (u_char)inp->in6p_outputopts->ip6po_tclass;
+ else
+ tos = 0;
+ } else {
+ tos = inp->inp_ip_tos;
+ }
+#else
tos = inp->inp_ip_tos;
+#endif
if (control != NULL) {
/*
* XXX: Currently, we assume all the optional information is
@@ -1190,6 +1208,23 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
error = udp_v4mapped_pktinfo(cm, &src, inp, flags);
if (error != 0)
break;
+ if (((flags & PRUS_IPV6) != 0) &&
+ (cm->cmsg_level == IPPROTO_IPV6) &&
+ (cm->cmsg_type == IPV6_TCLASS)) {
+ int tclass;
+
+ if (cm->cmsg_len != CMSG_LEN(sizeof(int))) {
+ error = EINVAL;
+ break;
+ }
+ tclass = *(int *)CMSG_DATA(cm);
+ if (tclass < -1 || tclass > 255) {
+ error = EINVAL;
+ break;
+ }
+ if (tclass != -1)
+ tos = (u_char)tclass;
+ }
#endif
if (cm->cmsg_level != IPPROTO_IP)
continue;
diff --git a/sys/netinet/udp_var.h b/sys/netinet/udp_var.h
index 3895f365db3c..3ae08fc0b8f0 100644
--- a/sys/netinet/udp_var.h
+++ b/sys/netinet/udp_var.h
@@ -147,6 +147,7 @@ void kmod_udpstat_inc(int statnum);
} while (0)
SYSCTL_DECL(_net_inet_udp);
+SYSCTL_DECL(_net_inet_udplite);
VNET_DECLARE(struct inpcbinfo, udbinfo);
VNET_DECLARE(struct inpcbinfo, ulitecbinfo);