diff options
Diffstat (limited to 'sys/netinet')
| -rw-r--r-- | sys/netinet/in_mcast.c | 113 | ||||
| -rw-r--r-- | sys/netinet/in_pcb.c | 172 | ||||
| -rw-r--r-- | sys/netinet/in_pcb.h | 24 | ||||
| -rw-r--r-- | sys/netinet/in_proto.c | 2 | ||||
| -rw-r--r-- | sys/netinet/libalias/alias_db.c | 2 | ||||
| -rw-r--r-- | sys/netinet/raw_ip.c | 4 | ||||
| -rw-r--r-- | sys/netinet/siftr.c | 2 | ||||
| -rw-r--r-- | sys/netinet/tcp.h | 1 | ||||
| -rw-r--r-- | sys/netinet/tcp_hpts_test.c | 20 | ||||
| -rw-r--r-- | sys/netinet/tcp_input.c | 7 | ||||
| -rw-r--r-- | sys/netinet/tcp_output.c | 2 | ||||
| -rw-r--r-- | sys/netinet/tcp_stacks/bbr.c | 8 | ||||
| -rw-r--r-- | sys/netinet/tcp_stacks/rack.c | 284 | ||||
| -rw-r--r-- | sys/netinet/tcp_stacks/tcp_rack.h | 1 | ||||
| -rw-r--r-- | sys/netinet/tcp_subr.c | 2 | ||||
| -rw-r--r-- | sys/netinet/tcp_syncache.c | 66 | ||||
| -rw-r--r-- | sys/netinet/tcp_syncache.h | 1 | ||||
| -rw-r--r-- | sys/netinet/tcp_timer.c | 7 | ||||
| -rw-r--r-- | sys/netinet/tcp_usrreq.c | 286 | ||||
| -rw-r--r-- | sys/netinet/tcp_var.h | 29 | ||||
| -rw-r--r-- | sys/netinet/udp_usrreq.c | 49 | ||||
| -rw-r--r-- | sys/netinet/udp_var.h | 1 |
22 files changed, 288 insertions, 795 deletions
diff --git a/sys/netinet/in_mcast.c b/sys/netinet/in_mcast.c index f5b20c49ffd2..ba112afbf002 100644 --- a/sys/netinet/in_mcast.c +++ b/sys/netinet/in_mcast.c @@ -159,9 +159,6 @@ static struct ip_moptions * static int inp_get_source_filters(struct inpcb *, struct sockopt *); static int inp_join_group(struct inpcb *, struct sockopt *); static int inp_leave_group(struct inpcb *, struct sockopt *); -static struct ifnet * - inp_lookup_mcast_ifp(const struct inpcb *, - const struct sockaddr_in *, const struct in_addr); static int inp_block_unblock_source(struct inpcb *, struct sockopt *); static int inp_set_multicast_if(struct inpcb *, struct sockopt *); static int inp_set_source_filters(struct inpcb *, struct sockopt *); @@ -1832,69 +1829,55 @@ inp_getmoptions(struct inpcb *inp, struct sockopt *sopt) } /* - * Look up the ifnet to use for a multicast group membership, - * given the IPv4 address of an interface, and the IPv4 group address. - * - * This routine exists to support legacy multicast applications - * which do not understand that multicast memberships are scoped to - * specific physical links in the networking stack, or which need - * to join link-scope groups before IPv4 addresses are configured. - * - * Use this socket's current FIB number for any required FIB lookup. - * If ina is INADDR_ANY, look up the group address in the unicast FIB, - * and use its ifp; usually, this points to the default next-hop. - * - * If the FIB lookup fails, attempt to use the first non-loopback - * interface with multicast capability in the system as a - * last resort. The legacy IPv4 ASM API requires that we do - * this in order to allow groups to be joined when the routing - * table has not yet been populated during boot. - * - * Returns NULL if no ifp could be found, otherwise return referenced ifp. + * Look up the ifnet to join a multicast group membership via legacy + * IP_ADD_MEMBERSHIP or via more modern MCAST_JOIN_GROUP. * - * FUTURE: Implement IPv4 source-address selection. + * If the interface index was specified explicitly, just use it. If the + * address was specified (legacy), try to find matching interface. Else + * (index == 0 && no address) do a route lookup. If that fails for a modern + * MCAST_JOIN_GROUP return failure, for legacy IP_ADD_MEMBERSHIP find first + * multicast capable interface. */ static struct ifnet * -inp_lookup_mcast_ifp(const struct inpcb *inp, - const struct sockaddr_in *gsin, const struct in_addr ina) +inp_lookup_mcast_ifp(const struct inpcb *inp, const struct in_addr maddr, +const struct in_addr *ina, const u_int index) { struct ifnet *ifp; struct nhop_object *nh; NET_EPOCH_ASSERT(); - KASSERT(inp != NULL, ("%s: inp must not be NULL", __func__)); - KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__)); - KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)), - ("%s: not multicast", __func__)); - ifp = NULL; - if (!in_nullhost(ina)) { - INADDR_TO_IFP(ina, ifp); + if (index != 0) + return (ifnet_byindex_ref(index)); + + if (ina != NULL && !in_nullhost(*ina)) { + INADDR_TO_IFP(*ina, ifp); if (ifp != NULL) if_ref(ifp); - } else { - nh = fib4_lookup(inp->inp_inc.inc_fibnum, gsin->sin_addr, 0, NHR_NONE, 0); - if (nh != NULL) { - ifp = nh->nh_ifp; - if_ref(ifp); - } else { - struct in_ifaddr *ia; - struct ifnet *mifp; - - mifp = NULL; - CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { - mifp = ia->ia_ifp; - if (!(mifp->if_flags & IFF_LOOPBACK) && - (mifp->if_flags & IFF_MULTICAST)) { - ifp = mifp; - if_ref(ifp); - break; - } + return (ifp); + } + + nh = fib4_lookup(inp->inp_inc.inc_fibnum, maddr, 0, NHR_NONE, 0); + if (nh != NULL) { + ifp = nh->nh_ifp; + if_ref(ifp); + return (ifp); + } + + if (ina != NULL) { + struct in_ifaddr *ia; + + CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { + if (!(ia->ia_ifp->if_flags & IFF_LOOPBACK) && + (ia->ia_ifp->if_flags & IFF_MULTICAST)) { + ifp = ia->ia_ifp; + if_ref(ifp); + return (ifp); } } } - return (ifp); + return (NULL); } /* @@ -1926,13 +1909,13 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt) switch (sopt->sopt_name) { case IP_ADD_MEMBERSHIP: { struct ip_mreqn mreqn; + bool mreq; - if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) - error = sooptcopyin(sopt, &mreqn, - sizeof(struct ip_mreqn), sizeof(struct ip_mreqn)); - else - error = sooptcopyin(sopt, &mreqn, - sizeof(struct ip_mreq), sizeof(struct ip_mreq)); + mreq = (sopt->sopt_valsize != sizeof(struct ip_mreqn)); + + error = sooptcopyin(sopt, &mreqn, + mreq ? sizeof(struct ip_mreq) : sizeof(struct ip_mreqn), + mreq ? sizeof(struct ip_mreq) : sizeof(struct ip_mreqn)); if (error) return (error); @@ -1943,12 +1926,9 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt) return (EINVAL); NET_EPOCH_ENTER(et); - if (sopt->sopt_valsize == sizeof(struct ip_mreqn) && - mreqn.imr_ifindex != 0) - ifp = ifnet_byindex_ref(mreqn.imr_ifindex); - else - ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, - mreqn.imr_address); + ifp = inp_lookup_mcast_ifp(inp, mreqn.imr_multiaddr, + mreq ? &mreqn.imr_address : NULL, + mreq ? 0 : mreqn.imr_ifindex); NET_EPOCH_EXIT(et); break; } @@ -1971,8 +1951,8 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt) ssa->sin.sin_addr = mreqs.imr_sourceaddr; NET_EPOCH_ENTER(et); - ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, - mreqs.imr_interface); + ifp = inp_lookup_mcast_ifp(inp, mreqs.imr_multiaddr, + &mreqs.imr_interface, 0); NET_EPOCH_EXIT(et); CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", __func__, ntohl(mreqs.imr_interface.s_addr), ifp); @@ -2013,7 +1993,8 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt) return (EINVAL); NET_EPOCH_ENTER(et); - ifp = ifnet_byindex_ref(gsr.gsr_interface); + ifp = inp_lookup_mcast_ifp(inp, gsa->sin.sin_addr, NULL, + gsr.gsr_interface); NET_EPOCH_EXIT(et); if (ifp == NULL) return (EADDRNOTAVAIL); diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index dbe48242381d..b7dae78fb2c2 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -2665,10 +2665,13 @@ in_pcbinshash(struct inpcb *inp) INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)]; /* - * Add entry to load balance group. - * Only do this if SO_REUSEPORT_LB is set. + * Ignore SO_REUSEPORT_LB if the socket is connected. Really this case + * should be an error, but for UDP sockets it is not, and some + * applications erroneously set it on connected UDP sockets, so we can't + * change this without breaking compatibility. */ - if ((inp->inp_socket->so_options & SO_REUSEPORT_LB) != 0) { + if (!connected && + (inp->inp_socket->so_options & SO_REUSEPORT_LB) != 0) { int error = in_pcbinslbgrouphash(inp, M_NODOM); if (error != 0) return (error); @@ -2770,6 +2773,10 @@ in_pcbrehash(struct inpcb *inp) connected = !in_nullhost(inp->inp_faddr); } + /* See the comment in in_pcbinshash(). */ + if (connected && (inp->inp_flags & INP_INLBGROUP) != 0) + in_pcbremlbgrouphash(inp); + /* * When rehashing, the caller must ensure that either the new or the old * foreign address was unspecified. @@ -3051,143 +3058,7 @@ db_print_inconninfo(struct in_conninfo *inc, const char *name, int indent) ntohs(inc->inc_fport)); } -static void -db_print_inpflags(int inp_flags) -{ - int comma; - - comma = 0; - if (inp_flags & INP_RECVOPTS) { - db_printf("%sINP_RECVOPTS", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_RECVRETOPTS) { - db_printf("%sINP_RECVRETOPTS", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_RECVDSTADDR) { - db_printf("%sINP_RECVDSTADDR", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_ORIGDSTADDR) { - db_printf("%sINP_ORIGDSTADDR", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_HDRINCL) { - db_printf("%sINP_HDRINCL", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_HIGHPORT) { - db_printf("%sINP_HIGHPORT", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_LOWPORT) { - db_printf("%sINP_LOWPORT", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_ANONPORT) { - db_printf("%sINP_ANONPORT", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_RECVIF) { - db_printf("%sINP_RECVIF", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_MTUDISC) { - db_printf("%sINP_MTUDISC", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_RECVTTL) { - db_printf("%sINP_RECVTTL", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_DONTFRAG) { - db_printf("%sINP_DONTFRAG", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_RECVTOS) { - db_printf("%sINP_RECVTOS", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_IPV6_V6ONLY) { - db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_PKTINFO) { - db_printf("%sIN6P_PKTINFO", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_HOPLIMIT) { - db_printf("%sIN6P_HOPLIMIT", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_HOPOPTS) { - db_printf("%sIN6P_HOPOPTS", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_DSTOPTS) { - db_printf("%sIN6P_DSTOPTS", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_RTHDR) { - db_printf("%sIN6P_RTHDR", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_RTHDRDSTOPTS) { - db_printf("%sIN6P_RTHDRDSTOPTS", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_TCLASS) { - db_printf("%sIN6P_TCLASS", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_AUTOFLOWLABEL) { - db_printf("%sIN6P_AUTOFLOWLABEL", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_ONESBCAST) { - db_printf("%sINP_ONESBCAST", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_DROPPED) { - db_printf("%sINP_DROPPED", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & INP_SOCKREF) { - db_printf("%sINP_SOCKREF", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_RFC2292) { - db_printf("%sIN6P_RFC2292", comma ? ", " : ""); - comma = 1; - } - if (inp_flags & IN6P_MTU) { - db_printf("IN6P_MTU%s", comma ? ", " : ""); - comma = 1; - } -} - -static void -db_print_inpvflag(u_char inp_vflag) -{ - int comma; - - comma = 0; - if (inp_vflag & INP_IPV4) { - db_printf("%sINP_IPV4", comma ? ", " : ""); - comma = 1; - } - if (inp_vflag & INP_IPV6) { - db_printf("%sINP_IPV6", comma ? ", " : ""); - comma = 1; - } - if (inp_vflag & INP_IPV6PROTO) { - db_printf("%sINP_IPV6PROTO", comma ? ", " : ""); - comma = 1; - } -} - -static void +void db_print_inpcb(struct inpcb *inp, const char *name, int indent) { @@ -3197,38 +3068,39 @@ db_print_inpcb(struct inpcb *inp, const char *name, int indent) indent += 2; db_print_indent(indent); - db_printf("inp_flow: 0x%x\n", inp->inp_flow); + db_printf("inp_flow: 0x%x inp_label: %p\n", inp->inp_flow, + inp->inp_label); db_print_inconninfo(&inp->inp_inc, "inp_conninfo", indent); db_print_indent(indent); - db_printf("inp_label: %p inp_flags: 0x%x (", - inp->inp_label, inp->inp_flags); - db_print_inpflags(inp->inp_flags); - db_printf(")\n"); + db_printf("inp_flags: 0x%b\n", inp->inp_flags, INP_FLAGS_BITS); db_print_indent(indent); - db_printf("inp_sp: %p inp_vflag: 0x%x (", inp->inp_sp, - inp->inp_vflag); - db_print_inpvflag(inp->inp_vflag); - db_printf(")\n"); + db_printf("inp_flags2: 0x%b\n", inp->inp_flags2, INP_FLAGS2_BITS); + + db_print_indent(indent); + db_printf("inp_sp: %p inp_vflag: 0x%b\n", inp->inp_sp, + inp->inp_vflag, INP_VFLAGS_BITS); db_print_indent(indent); db_printf("inp_ip_ttl: %d inp_ip_p: %d inp_ip_minttl: %d\n", inp->inp_ip_ttl, inp->inp_ip_p, inp->inp_ip_minttl); - db_print_indent(indent); #ifdef INET6 if (inp->inp_vflag & INP_IPV6) { + db_print_indent(indent); db_printf("in6p_options: %p in6p_outputopts: %p " "in6p_moptions: %p\n", inp->in6p_options, inp->in6p_outputopts, inp->in6p_moptions); + db_print_indent(indent); db_printf("in6p_icmp6filt: %p in6p_cksum %d " "in6p_hops %u\n", inp->in6p_icmp6filt, inp->in6p_cksum, inp->in6p_hops); } else #endif { + db_print_indent(indent); db_printf("inp_ip_tos: %d inp_ip_options: %p " "inp_ip_moptions: %p\n", inp->inp_ip_tos, inp->inp_options, inp->inp_moptions); diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 9e0618e87601..975b8129c70d 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -539,6 +539,9 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, #define INP_IPV6 0x2 #define INP_IPV6PROTO 0x4 /* opened under IPv6 protocol */ +/* inp_vflags description for use with printf(9) %b identifier. */ +#define INP_VFLAGS_BITS "\20\1INP_IPV4\2INP_IPV6\3INP_IPV6PROTO" + /* * Flags for inp_flags. */ @@ -582,6 +585,17 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\ IN6P_MTU) +/* inp_flags description for use with printf(9) %b identifier. */ +#define INP_FLAGS_BITS "\20" \ + "\1INP_RECVOPTS\2INP_RECVRETOPTS\3INP_RECVDSTADDR\4INP_HDRINCL" \ + "\5INP_HIGHPORT\6INP_LOWPORT\7INP_ANONPORT\10INP_RECVIF" \ + "\11INP_MTUDISC\12INP_FREED\13INP_RECVTTL\14INP_DONTFRAG" \ + "\15INP_BINDANY\16INP_INHASHLIST\17INP_RECVTOS\20IN6P_IPV6_V6ONLY" \ + "\21IN6P_PKTINFO\22IN6P_HOPLIMIT\23IN6P_HOPOPTS\24IN6P_DSTOPTS" \ + "\25IN6P_RTHDR\26IN6P_RTHDRDSTOPTS\27IN6P_TCLASS\30IN6P_AUTOFLOWLABEL" \ + "\31INP_INLBGROUP\32INP_ONESBCAST\33INP_DROPPED\34INP_SOCKREF" \ + "\35INP_RESERVED_0\36INP_BOUNDFIB\37IN6P_RFC2292\40IN6P_MTU" + /* * Flags for inp_flags2. */ @@ -610,6 +624,13 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, #define INP_2PCP_MASK (INP_2PCP_BIT0 | INP_2PCP_BIT1 | INP_2PCP_BIT2) #define INP_2PCP_SHIFT 18 /* shift PCP field in/out of inp_flags2 */ +/* inp_flags2 description for use with printf(9) %b identifier. */ +#define INP_FLAGS2_BITS "\20" \ + "\11INP_RECVFLOWID\12INP_RECVRSSBUCKETID" \ + "\13INP_RATE_LIMIT_CHANGED\14INP_ORIGDSTADDR" \ + "\22INP_2PCP_SET\23INP_2PCP_BIT0\24INP_2PCP_BIT1" \ + "\25INP_2PCP_BIT2" + /* * Flags passed to in_pcblookup*(), inp_smr_lock() and inp_next(). */ @@ -730,6 +751,9 @@ int in_pcbquery_txrlevel(struct inpcb *, uint32_t *); void in_pcboutput_txrtlmt(struct inpcb *, struct ifnet *, struct mbuf *); void in_pcboutput_eagain(struct inpcb *); #endif +#ifdef DDB +void db_print_inpcb(struct inpcb *, const char *, int); +#endif #endif /* _KERNEL */ #endif /* !_NETINET_IN_PCB_H_ */ diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index db46da6022c5..42a6cf0b5810 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -108,6 +108,8 @@ SYSCTL_NODE(_net_inet, IPPROTO_ICMP, icmp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "ICMP"); SYSCTL_NODE(_net_inet, IPPROTO_UDP, udp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "UDP"); +SYSCTL_NODE(_net_inet, IPPROTO_UDPLITE, udplite, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, + "UDP-Lite"); SYSCTL_NODE(_net_inet, IPPROTO_TCP, tcp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "TCP"); #if defined(SCTP) || defined(SCTP_SUPPORT) diff --git a/sys/netinet/libalias/alias_db.c b/sys/netinet/libalias/alias_db.c index c143d74a2f45..41f0a328daec 100644 --- a/sys/netinet/libalias/alias_db.c +++ b/sys/netinet/libalias/alias_db.c @@ -2181,7 +2181,7 @@ LibAliasInit(struct libalias *la) #undef malloc /* XXX: ugly */ la = malloc(sizeof *la, M_ALIAS, M_WAITOK | M_ZERO); #else - la = calloc(sizeof *la, 1); + la = calloc(1, sizeof *la); if (la == NULL) return (la); #endif diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index 66070faf97e9..bfe608be6b36 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -680,7 +680,6 @@ rip_ctloutput(struct socket *so, struct sockopt *sopt) break; case IP_DUMMYNET3: /* generic dummynet v.3 functions */ - case IP_DUMMYNET_GET: if (ip_dn_ctl_ptr != NULL) error = ip_dn_ctl_ptr(sopt); else @@ -747,9 +746,6 @@ rip_ctloutput(struct socket *so, struct sockopt *sopt) break; case IP_DUMMYNET3: /* generic dummynet v.3 functions */ - case IP_DUMMYNET_CONFIGURE: - case IP_DUMMYNET_DEL: - case IP_DUMMYNET_FLUSH: if (ip_dn_ctl_ptr != NULL) error = ip_dn_ctl_ptr(sopt); else diff --git a/sys/netinet/siftr.c b/sys/netinet/siftr.c index 374b5595fcbc..5b89ca026e85 100644 --- a/sys/netinet/siftr.c +++ b/sys/netinet/siftr.c @@ -519,7 +519,7 @@ siftr_pkt_manager_thread(void *arg) if (log_buf != NULL) { alq_post_flags(siftr_alq, log_buf, 0); } - for (;cnt > 0; cnt--) { + for (; cnt > 0; cnt--) { pkt_node = STAILQ_FIRST(&tmp_pkt_queue); STAILQ_REMOVE_HEAD(&tmp_pkt_queue, nodes); free(pkt_node, M_SIFTR_PKTNODE); diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h index 41a49b318cd5..cab7d1c5e726 100644 --- a/sys/netinet/tcp.h +++ b/sys/netinet/tcp.h @@ -541,7 +541,6 @@ struct tcp_log_user { #define TCP_HYBRID_PACING_H_MS 0x0008 /* A client hint for maxseg is present */ #define TCP_HYBRID_PACING_ENABLE 0x0010 /* We are enabling hybrid pacing else disable */ #define TCP_HYBRID_PACING_S_MSS 0x0020 /* Clent wants us to set the mss overriding gp est in CU */ -#define TCP_HAS_PLAYOUT_MS 0x0040 /* The client included the chunk playout milliseconds: deprecate */ /* the below are internal only flags */ #define TCP_HYBRID_PACING_USER_MASK 0x0FFF /* Non-internal flags mask */ #define TCP_HYBRID_PACING_SETMSS 0x1000 /* Internal flag that tells us we set the mss on this entry */ diff --git a/sys/netinet/tcp_hpts_test.c b/sys/netinet/tcp_hpts_test.c index bab5827e0572..c5dc9cb5b03b 100644 --- a/sys/netinet/tcp_hpts_test.c +++ b/sys/netinet/tcp_hpts_test.c @@ -27,6 +27,7 @@ #include <tests/ktest.h> #include <sys/cdefs.h> +#include "opt_inet.h" #include <sys/param.h> #include <sys/bus.h> #include <sys/interrupt.h> @@ -119,6 +120,8 @@ SYSCTL_INT(_net_inet_tcp_hpts_test, OID_AUTO, exit_on_failure, CTLFLAG_RW, } \ } while (0) +#ifdef TCP_HPTS_KTEST + static void dump_hpts_entry(struct ktest_test_context *ctx, struct tcp_hpts_entry *hpts) { @@ -1658,5 +1661,22 @@ static const struct ktest_test_info tests[] = { KTEST_INFO(generation_count_validation), }; +#else /* TCP_HPTS_KTEST */ + +/* + * Stub to indicate that the TCP HPTS ktest is not enabled. + */ +KTEST_FUNC(module_load_without_tests) +{ + KTEST_LOG(ctx, "Warning: TCP HPTS ktest is not enabled"); + return (0); +} + +static const struct ktest_test_info tests[] = { + KTEST_INFO(module_load_without_tests), +}; + +#endif + KTEST_MODULE_DECLARE(ktest_tcphpts, tests); KTEST_MODULE_DEPEND(ktest_tcphpts, tcphpts); diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index dd27ec77c1af..9c58c2815d13 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -219,7 +219,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_autorcvbuf), 0, "Enable automatic receive buffer sizing"); -VNET_DEFINE(int, tcp_autorcvbuf_max) = 2*1024*1024; +VNET_DEFINE(int, tcp_autorcvbuf_max) = 8*1024*1024; SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_autorcvbuf_max), 0, "Max size of automatic receive buffer"); @@ -1192,11 +1192,10 @@ tfo_socket_result: if (thflags & TH_ACK) { if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " - "SYN|ACK invalid, segment rejected\n", + "SYN|ACK invalid, segment ignored\n", s, __func__); - syncache_badack(&inc, port); /* XXX: Not needed! */ TCPSTAT_INC(tcps_badsyn); - goto dropwithreset; + goto dropunlock; } /* * If the drop_synfin option is enabled, drop all diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 2dfb7faf56e3..208f72c4661c 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -123,7 +123,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_inc, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_autosndbuf_inc), 0, "Incrementor step size of automatic send buffer"); -VNET_DEFINE(int, tcp_autosndbuf_max) = 2*1024*1024; +VNET_DEFINE(int, tcp_autosndbuf_max) = 8*1024*1024; SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_autosndbuf_max), 0, "Max size of automatic send buffer"); diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c index 66983edcdd73..10383bc0801e 100644 --- a/sys/netinet/tcp_stacks/bbr.c +++ b/sys/netinet/tcp_stacks/bbr.c @@ -477,7 +477,7 @@ bbr_log_rtt_shrinks(struct tcp_bbr *bbr, uint32_t cts, uint32_t applied, uint16_t set); static struct bbr_sendmap * bbr_find_lowest_rsm(struct tcp_bbr *bbr); -static __inline uint32_t +static inline uint32_t bbr_get_rtt(struct tcp_bbr *bbr, int32_t rtt_type); static void bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t pacing_delay, @@ -1841,7 +1841,7 @@ bbr_counter_destroy(void) } -static __inline void +static inline void bbr_fill_in_logging_data(struct tcp_bbr *bbr, struct tcp_log_bbr *l, uint32_t cts) { memset(l, 0, sizeof(union tcp_log_stackspecific)); @@ -4206,7 +4206,7 @@ bbr_calc_thresh_tlp(struct tcpcb *tp, struct tcp_bbr *bbr, /* * Return one of three RTTs to use (in microseconds). */ -static __inline uint32_t +static inline uint32_t bbr_get_rtt(struct tcp_bbr *bbr, int32_t rtt_type) { uint32_t f_rtt; @@ -4370,7 +4370,7 @@ bbr_timeout_rack(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts) return (0); } -static __inline void +static inline void bbr_clone_rsm(struct tcp_bbr *bbr, struct bbr_sendmap *nrsm, struct bbr_sendmap *rsm, uint32_t start) { int idx; diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index c7962b57a69e..9ed26d5a617b 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -204,10 +204,6 @@ static int32_t rack_dnd_default = 0; /* For rr_conf = 3, what is the default fo static int32_t rack_rxt_controls = 0; static int32_t rack_fill_cw_state = 0; static uint8_t rack_req_measurements = 1; -/* Attack threshold detections */ -static uint32_t rack_highest_sack_thresh_seen = 0; -static uint32_t rack_highest_move_thresh_seen = 0; -static uint32_t rack_merge_out_sacks_on_attack = 0; static int32_t rack_enable_hw_pacing = 0; /* Due to CCSP keep it off by default */ static int32_t rack_hw_rate_caps = 0; /* 1; */ static int32_t rack_hw_rate_cap_per = 0; /* 0 -- off */ @@ -223,7 +219,6 @@ static int32_t rack_default_pacing_divisor = 250; static uint16_t rack_pacing_min_seg = 0; static int32_t rack_timely_off = 0; -static uint32_t sad_seg_size_per = 800; /* 80.0 % */ static int32_t rack_pkt_delay = 1000; static int32_t rack_send_a_lot_in_prr = 1; static int32_t rack_min_to = 1000; /* Number of microsecond min timeout */ @@ -399,18 +394,6 @@ counter_u64_t rack_extended_rfo; counter_u64_t rack_sack_proc_all; counter_u64_t rack_sack_proc_short; counter_u64_t rack_sack_proc_restart; -counter_u64_t rack_sack_attacks_detected; -counter_u64_t rack_sack_attacks_reversed; -counter_u64_t rack_sack_attacks_suspect; -counter_u64_t rack_sack_used_next_merge; -counter_u64_t rack_sack_splits; -counter_u64_t rack_sack_used_prev_merge; -counter_u64_t rack_sack_skipped_acked; -counter_u64_t rack_ack_total; -counter_u64_t rack_express_sack; -counter_u64_t rack_sack_total; -counter_u64_t rack_move_none; -counter_u64_t rack_move_some; counter_u64_t rack_input_idle_reduces; counter_u64_t rack_collapsed_win; @@ -834,18 +817,6 @@ sysctl_rack_clear(SYSCTL_HANDLER_ARGS) counter_u64_zero(rack_rxt_clamps_cwnd_uniq); counter_u64_zero(rack_multi_single_eq); counter_u64_zero(rack_proc_non_comp_ack); - counter_u64_zero(rack_sack_attacks_detected); - counter_u64_zero(rack_sack_attacks_reversed); - counter_u64_zero(rack_sack_attacks_suspect); - counter_u64_zero(rack_sack_used_next_merge); - counter_u64_zero(rack_sack_used_prev_merge); - counter_u64_zero(rack_sack_splits); - counter_u64_zero(rack_sack_skipped_acked); - counter_u64_zero(rack_ack_total); - counter_u64_zero(rack_express_sack); - counter_u64_zero(rack_sack_total); - counter_u64_zero(rack_move_none); - counter_u64_zero(rack_move_some); counter_u64_zero(rack_try_scwnd); counter_u64_zero(rack_collapsed_win); counter_u64_zero(rack_collapsed_win_rxt); @@ -872,7 +843,6 @@ static void rack_init_sysctls(void) { struct sysctl_oid *rack_counters; - struct sysctl_oid *rack_attack; struct sysctl_oid *rack_pacing; struct sysctl_oid *rack_timely; struct sysctl_oid *rack_timers; @@ -883,12 +853,6 @@ rack_init_sysctls(void) struct sysctl_oid *rack_probertt; struct sysctl_oid *rack_hw_pacing; - rack_attack = SYSCTL_ADD_NODE(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_sysctl_root), - OID_AUTO, - "sack_attack", - CTLFLAG_RW | CTLFLAG_MPSAFE, 0, - "Rack Sack Attack Counters and Controls"); rack_counters = SYSCTL_ADD_NODE(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, @@ -1535,11 +1499,6 @@ rack_init_sysctls(void) "Do not disturb default for rack_rrr = 3"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_misc), - OID_AUTO, "sad_seg_per", CTLFLAG_RW, - &sad_seg_size_per, 800, - "Percentage of segment size needed in a sack 800 = 80.0?"); - SYSCTL_ADD_S32(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_misc), OID_AUTO, "rxt_controls", CTLFLAG_RW, &rack_rxt_controls, 0, "Retransmit sending size controls (valid values 0, 1, 2 default=1)?"); @@ -1619,85 +1578,6 @@ rack_init_sysctls(void) &rack_autosndbuf_inc, 20, "What percentage should rack scale up its snd buffer by?"); - - /* Sack Attacker detection stuff */ - SYSCTL_ADD_U32(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "merge_out", CTLFLAG_RW, - &rack_merge_out_sacks_on_attack, 0, - "Do we merge the sendmap when we decide we are being attacked?"); - - SYSCTL_ADD_U32(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "detect_highsackratio", CTLFLAG_RW, - &rack_highest_sack_thresh_seen, 0, - "Highest sack to ack ratio seen"); - SYSCTL_ADD_U32(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "detect_highmoveratio", CTLFLAG_RW, - &rack_highest_move_thresh_seen, 0, - "Highest move to non-move ratio seen"); - rack_ack_total = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "acktotal", CTLFLAG_RD, - &rack_ack_total, - "Total number of Ack's"); - rack_express_sack = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "exp_sacktotal", CTLFLAG_RD, - &rack_express_sack, - "Total expresss number of Sack's"); - rack_sack_total = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "sacktotal", CTLFLAG_RD, - &rack_sack_total, - "Total number of SACKs"); - rack_move_none = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "move_none", CTLFLAG_RD, - &rack_move_none, - "Total number of SACK index reuse of positions under threshold"); - rack_move_some = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "move_some", CTLFLAG_RD, - &rack_move_some, - "Total number of SACK index reuse of positions over threshold"); - rack_sack_attacks_detected = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "attacks", CTLFLAG_RD, - &rack_sack_attacks_detected, - "Total number of SACK attackers that had sack disabled"); - rack_sack_attacks_reversed = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "reversed", CTLFLAG_RD, - &rack_sack_attacks_reversed, - "Total number of SACK attackers that were later determined false positive"); - rack_sack_attacks_suspect = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "suspect", CTLFLAG_RD, - &rack_sack_attacks_suspect, - "Total number of SACKs that triggered early detection"); - - rack_sack_used_next_merge = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "nextmerge", CTLFLAG_RD, - &rack_sack_used_next_merge, - "Total number of times we used the next merge"); - rack_sack_used_prev_merge = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "prevmerge", CTLFLAG_RD, - &rack_sack_used_prev_merge, - "Total number of times we used the prev merge"); /* Counters */ rack_total_bytes = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, @@ -1908,18 +1788,6 @@ rack_init_sysctls(void) OID_AUTO, "sack_short", CTLFLAG_RD, &rack_sack_proc_short, "Total times we took shortcut for sack processing"); - rack_sack_skipped_acked = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "skipacked", CTLFLAG_RD, - &rack_sack_skipped_acked, - "Total number of times we skipped previously sacked"); - rack_sack_splits = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "ofsplit", CTLFLAG_RD, - &rack_sack_splits, - "Total number of times we did the old fashion tree split"); rack_input_idle_reduces = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_counters), @@ -3319,16 +3187,6 @@ rack_counter_destroy(void) counter_u64_free(rack_hw_pace_lost); counter_u64_free(rack_non_fto_send); counter_u64_free(rack_extended_rfo); - counter_u64_free(rack_ack_total); - counter_u64_free(rack_express_sack); - counter_u64_free(rack_sack_total); - counter_u64_free(rack_move_none); - counter_u64_free(rack_move_some); - counter_u64_free(rack_sack_attacks_detected); - counter_u64_free(rack_sack_attacks_reversed); - counter_u64_free(rack_sack_attacks_suspect); - counter_u64_free(rack_sack_used_next_merge); - counter_u64_free(rack_sack_used_prev_merge); counter_u64_free(rack_tlp_tot); counter_u64_free(rack_tlp_newdata); counter_u64_free(rack_tlp_retran); @@ -3351,8 +3209,6 @@ rack_counter_destroy(void) counter_u64_free(rack_sack_proc_all); counter_u64_free(rack_sack_proc_restart); counter_u64_free(rack_sack_proc_short); - counter_u64_free(rack_sack_skipped_acked); - counter_u64_free(rack_sack_splits); counter_u64_free(rack_input_idle_reduces); counter_u64_free(rack_collapsed_win); counter_u64_free(rack_collapsed_win_rxt); @@ -4730,7 +4586,7 @@ rack_make_timely_judgement(struct tcp_rack *rack, uint32_t rtt, int32_t rtt_diff return (timely_says); } -static __inline int +static inline int rack_in_gp_window(struct tcpcb *tp, struct rack_sendmap *rsm) { if (SEQ_GEQ(rsm->r_start, tp->gput_seq) && @@ -4767,7 +4623,7 @@ rack_in_gp_window(struct tcpcb *tp, struct rack_sendmap *rsm) return (0); } -static __inline void +static inline void rack_mark_in_gp_win(struct tcpcb *tp, struct rack_sendmap *rsm) { @@ -4784,7 +4640,7 @@ rack_mark_in_gp_win(struct tcpcb *tp, struct rack_sendmap *rsm) rsm->r_flags &= ~RACK_IN_GP_WIN; } -static __inline void +static inline void rack_clear_gp_marks(struct tcpcb *tp, struct tcp_rack *rack) { /* A GP measurement is ending, clear all marks on the send map*/ @@ -4802,7 +4658,7 @@ rack_clear_gp_marks(struct tcpcb *tp, struct tcp_rack *rack) } -static __inline void +static inline void rack_tend_gp_marks(struct tcpcb *tp, struct tcp_rack *rack) { struct rack_sendmap *rsm = NULL; @@ -6864,6 +6720,18 @@ rack_mark_lost(struct tcpcb *tp, } } +static inline void +rack_mark_nolonger_lost(struct tcp_rack *rack, struct rack_sendmap *rsm) +{ + KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), + ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); + rsm->r_flags &= ~RACK_WAS_LOST; + if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) + rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start; + else + rack->r_ctl.rc_considered_lost = 0; +} + /* * RACK Timer, here we simply do logging and house keeping. * the normal rack_output() function will call the @@ -7005,7 +6873,7 @@ rack_setup_offset_for_rsm(struct tcp_rack *rack, struct rack_sendmap *src_rsm, s rsm->orig_t_space = M_TRAILINGROOM(rsm->m); } -static __inline void +static inline void rack_clone_rsm(struct tcp_rack *rack, struct rack_sendmap *nrsm, struct rack_sendmap *rsm, uint32_t start) { @@ -8130,13 +7998,7 @@ rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack, * remove the lost desgination and reduce the * bytes considered lost. */ - rsm->r_flags &= ~RACK_WAS_LOST; - KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), - ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); - if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) - rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start; - else - rack->r_ctl.rc_considered_lost = 0; + rack_mark_nolonger_lost(rack, rsm); } idx = rsm->r_rtr_cnt - 1; rsm->r_tim_lastsent[idx] = ts; @@ -9492,6 +9354,11 @@ do_rest_ofb: if (rsm->r_flags & RACK_WAS_LOST) { int my_chg; + /* + * Note here we do not use our rack_mark_nolonger_lost() function + * since we are moving our data pointer around and the + * ack'ed side is already not considered lost. + */ my_chg = (nrsm->r_end - nrsm->r_start); KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); @@ -9531,7 +9398,6 @@ do_rest_ofb: goto out; } rack_log_map_chg(tp, rack, &stack_map, rsm, next, MAP_SACK_M1, end, __LINE__); - counter_u64_add(rack_sack_used_next_merge, 1); /* Postion for the next block */ start = next->r_end; rsm = tqhash_next(rack->r_ctl.tqh, next); @@ -9563,7 +9429,6 @@ do_rest_ofb: */ goto out; } - counter_u64_add(rack_sack_splits, 1); rack_clone_rsm(rack, nrsm, rsm, start); rsm->r_just_ret = 0; #ifndef INVARIANTS @@ -9585,7 +9450,6 @@ do_rest_ofb: } } else { /* Already sacked this piece */ - counter_u64_add(rack_sack_skipped_acked, 1); if (end == rsm->r_end) { /* Done with block */ rsm = tqhash_next(rack->r_ctl.tqh, rsm); @@ -9659,16 +9523,11 @@ do_rest_ofb: changed += (rsm->r_end - rsm->r_start); /* You get a count for acking a whole segment or more */ if (rsm->r_flags & RACK_WAS_LOST) { - int my_chg; - - my_chg = (rsm->r_end - rsm->r_start); - rsm->r_flags &= ~RACK_WAS_LOST; - KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), - ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); - if (my_chg <= rack->r_ctl.rc_considered_lost) - rack->r_ctl.rc_considered_lost -= my_chg; - else - rack->r_ctl.rc_considered_lost = 0; + /* + * Here we can use the inline function since + * the rsm is truly marked lost and now no longer lost. + */ + rack_mark_nolonger_lost(rack, rsm); } rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start); if (rsm->r_in_tmap) /* should be true */ @@ -9690,8 +9549,6 @@ do_rest_ofb: rsm->r_in_tmap = 0; } rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_SACK_M3, end, __LINE__); - } else { - counter_u64_add(rack_sack_skipped_acked, 1); } if (end == rsm->r_end) { /* This block only - done, setup for next */ @@ -9851,6 +9708,10 @@ do_rest_ofb: if (rsm->r_flags & RACK_WAS_LOST) { int my_chg; + /* + * Note here we are using hookery again so we can't + * use our rack_mark_nolonger_lost() function. + */ my_chg = (nrsm->r_end - nrsm->r_start); KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); @@ -9866,7 +9727,6 @@ do_rest_ofb: } rack_log_map_chg(tp, rack, prev, &stack_map, rsm, MAP_SACK_M4, end, __LINE__); rsm = prev; - counter_u64_add(rack_sack_used_prev_merge, 1); } else { /** * This is the case where our previous @@ -9931,7 +9791,6 @@ do_rest_ofb: * rsm |---| (acked) * nrsm |------| (not acked) */ - counter_u64_add(rack_sack_splits, 1); rack_clone_rsm(rack, nrsm, rsm, end); rsm->r_flags &= (~RACK_HAS_FIN); rsm->r_just_ret = 0; @@ -9952,16 +9811,10 @@ do_rest_ofb: rack_update_rtt(tp, rack, rsm, to, cts, SACKED, 0); changed += (rsm->r_end - rsm->r_start); if (rsm->r_flags & RACK_WAS_LOST) { - int my_chg; - - my_chg = (rsm->r_end - rsm->r_start); - rsm->r_flags &= ~RACK_WAS_LOST; - KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), - ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); - if (my_chg <= rack->r_ctl.rc_considered_lost) - rack->r_ctl.rc_considered_lost -= my_chg; - else - rack->r_ctl.rc_considered_lost = 0; + /* + * Here it is safe to use our function. + */ + rack_mark_nolonger_lost(rack, rsm); } rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start); @@ -9985,11 +9838,6 @@ do_rest_ofb: rsm->r_in_tmap = 0; } } - } else if (start != end){ - /* - * The block was already acked. - */ - counter_u64_add(rack_sack_skipped_acked, 1); } out: if (rsm && @@ -10362,13 +10210,7 @@ more: * and yet before retransmitting we get an ack * which can happen due to reordering. */ - rsm->r_flags &= ~RACK_WAS_LOST; - KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), - ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); - if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) - rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start; - else - rack->r_ctl.rc_considered_lost = 0; + rack_mark_nolonger_lost(rack, rsm); } rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_FREE, rsm->r_end, __LINE__); rack->r_ctl.rc_holes_rxt -= rsm->r_rtr_bytes; @@ -10476,12 +10318,7 @@ more: * which can happen due to reordering. In this * case its only a partial ack of the send. */ - KASSERT((rack->r_ctl.rc_considered_lost >= (th_ack - rsm->r_start)), - ("rsm:%p rack:%p rc_considered_lost goes negative th_ack:%u", rsm, rack, th_ack)); - if (rack->r_ctl.rc_considered_lost >= (th_ack - rsm->r_start)) - rack->r_ctl.rc_considered_lost -= th_ack - rsm->r_start; - else - rack->r_ctl.rc_considered_lost = 0; + rack_mark_nolonger_lost(rack, rsm); } /* * Clear the dup ack count for @@ -10793,17 +10630,6 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered changed = 0; th_ack = th->th_ack; segsiz = ctf_fixed_maxseg(rack->rc_tp); - if (BYTES_THIS_ACK(tp, th) >= segsiz) { - /* - * You only get credit for - * MSS and greater (and you get extra - * credit for larger cum-ack moves). - */ - int ac; - - ac = BYTES_THIS_ACK(tp, th) / ctf_fixed_maxseg(rack->rc_tp); - counter_u64_add(rack_ack_total, ac); - } if (SEQ_GT(th_ack, tp->snd_una)) { rack_log_progress_event(rack, tp, ticks, PROGRESS_UPDATE, __LINE__); tp->t_acktime = ticks; @@ -10875,8 +10701,8 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered if (sacks_seen != NULL) *sacks_seen = num_sack_blks; if (num_sack_blks == 0) { - /* Nothing to sack, but we need to update counts */ - goto out_with_totals; + /* Nothing to sack */ + goto out; } /* Its a sack of some sort */ if (num_sack_blks < 2) { @@ -10899,7 +10725,7 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered */ again: if (num_sack_blks == 0) - goto out_with_totals; + goto out; if (num_sack_blks > 1) { for (i = 0; i < num_sack_blks; i++) { for (j = i + 1; j < num_sack_blks; j++) { @@ -10952,19 +10778,7 @@ do_sack_work: changed += acked; } if (num_sack_blks == 1) { - /* - * This is what we would expect from - * a normal implementation to happen - * after we have retransmitted the FR, - * i.e the sack-filter pushes down - * to 1 block and the next to be retransmitted - * is the sequence in the sack block (has more - * are acked). Count this as ACK'd data to boost - * up the chances of recovering any false positives. - */ - counter_u64_add(rack_ack_total, (acked / ctf_fixed_maxseg(rack->rc_tp))); - counter_u64_add(rack_express_sack, 1); - goto out_with_totals; + goto out; } else { /* * Start the loop through the @@ -10973,7 +10787,6 @@ do_sack_work: loop_start = 1; } } - counter_u64_add(rack_sack_total, 1); rsm = rack->r_ctl.rc_sacklast; for (i = loop_start; i < num_sack_blks; i++) { acked = rack_proc_sack_blk(tp, rack, &sack_blocks[i], to, &rsm, cts, segsiz); @@ -10982,18 +10795,6 @@ do_sack_work: changed += acked; } } -out_with_totals: - if (num_sack_blks > 1) { - /* - * You get an extra stroke if - * you have more than one sack-blk, this - * could be where we are skipping forward - * and the sack-filter is still working, or - * it could be an attacker constantly - * moving us. - */ - counter_u64_add(rack_move_some, 1); - } out: if (changed) { /* Something changed cancel the rack timer */ @@ -14713,7 +14514,6 @@ rack_init(struct tcpcb *tp, void **ptr) rack->r_ctl.rc_prr_sendalot = rack_send_a_lot_in_prr; rack->r_ctl.rc_min_to = rack_min_to; microuptime(&rack->r_ctl.act_rcv_time); - rack->r_ctl.rc_last_time_decay = rack->r_ctl.act_rcv_time; rack->r_ctl.rack_per_of_gp_ss = rack_per_of_gp_ss; if (rack_hw_up_only) rack->r_up_only = 1; diff --git a/sys/netinet/tcp_stacks/tcp_rack.h b/sys/netinet/tcp_stacks/tcp_rack.h index 144b4fabf7eb..cac17d9aeb50 100644 --- a/sys/netinet/tcp_stacks/tcp_rack.h +++ b/sys/netinet/tcp_stacks/tcp_rack.h @@ -462,7 +462,6 @@ struct rack_control { uint64_t rc_gp_output_ts; /* chg*/ uint64_t rc_gp_cumack_ts; /* chg*/ struct timeval act_rcv_time; - struct timeval rc_last_time_decay; /* SAD time decay happened here */ uint64_t gp_bw; uint64_t init_rate; #ifdef NETFLIX_SHARED_CWND diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index c817c79881d6..b6f428b279b3 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -607,7 +607,7 @@ tcp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *inp, } } m->m_pkthdr.tcp_tun_port = port = uh->uh_sport; - bcopy(th, uh, m->m_len - off); + bcopy(th, uh, m->m_len - off - sizeof(struct udphdr)); m->m_len -= sizeof(struct udphdr); m->m_pkthdr.len -= sizeof(struct udphdr); /* diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index f842a5678fa1..fa7035771714 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -713,23 +713,6 @@ done: } void -syncache_badack(struct in_conninfo *inc, uint16_t port) -{ - struct syncache *sc; - struct syncache_head *sch; - - if (syncache_cookiesonly()) - return; - sc = syncache_lookup(inc, &sch); /* returns locked sch */ - SCH_LOCK_ASSERT(sch); - if ((sc != NULL) && (sc->sc_port == port)) { - syncache_drop(sc, sch); - TCPSTAT_INC(tcps_sc_badack); - } - SCH_UNLOCK(sch); -} - -void syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq, uint16_t port) { struct syncache *sc; @@ -1046,6 +1029,8 @@ abort: * * On syncache_socket() success the newly created socket * has its underlying inp locked. + * + * *lsop is updated, if and only if 1 is returned. */ int syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, @@ -1094,12 +1079,14 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, */ SCH_UNLOCK(sch); TCPSTAT_INC(tcps_sc_spurcookie); - if ((s = tcp_log_addrs(inc, th, NULL, NULL))) + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Spurious ACK, " "segment rejected " "(syncookies disabled)\n", s, __func__); - goto failed; + free(s, M_TCPLOG); + } + return (0); } if (sch->sch_last_overflow < time_uptime - SYNCOOKIE_LIFETIME) { @@ -1109,12 +1096,14 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, */ SCH_UNLOCK(sch); TCPSTAT_INC(tcps_sc_spurcookie); - if ((s = tcp_log_addrs(inc, th, NULL, NULL))) + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Spurious ACK, " "segment rejected " "(no syncache entry)\n", s, __func__); - goto failed; + free(s, M_TCPLOG); + } + return (0); } SCH_UNLOCK(sch); } @@ -1128,11 +1117,13 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, TCPSTAT_INC(tcps_sc_recvcookie); } else { TCPSTAT_INC(tcps_sc_failcookie); - if ((s = tcp_log_addrs(inc, th, NULL, NULL))) + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, "%s; %s: Segment failed " "SYNCOOKIE authentication, segment rejected " "(probably spoofed)\n", s, __func__); - goto failed; + free(s, M_TCPLOG); + } + return (0); } #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) /* If received ACK has MD5 signature, check it. */ @@ -1160,7 +1151,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, /* * If listening socket requested TCP digests, check that * received ACK has signature and it is correct. - * If not, drop the ACK and leave sc entry in th cache, + * If not, drop the ACK and leave sc entry in the cache, * because SYN was received with correct signature. */ if (sc->sc_flags & SCF_SIGNATURE) { @@ -1206,9 +1197,9 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, "%s; %s: SEG.TSval %u < TS.Recent %u, " "segment dropped\n", s, __func__, to->to_tsval, sc->sc_tsreflect); - free(s, M_TCPLOG); } SCH_UNLOCK(sch); + free(s, M_TCPLOG); return (-1); /* Do not send RST */ } @@ -1225,7 +1216,6 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, "expected, segment processed normally\n", s, __func__); free(s, M_TCPLOG); - s = NULL; } } @@ -1312,16 +1302,6 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, if (sc != &scs) syncache_free(sc); return (1); -failed: - if (sc != NULL) { - TCPSTATES_DEC(TCPS_SYN_RECEIVED); - if (sc != &scs) - syncache_free(sc); - } - if (s != NULL) - free(s, M_TCPLOG); - *lsop = NULL; - return (0); } static struct socket * @@ -1383,6 +1363,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, struct tcpcb *tp; struct socket *rv = NULL; struct syncache *sc = NULL; + struct ucred *cred; struct syncache_head *sch; struct mbuf *ipopts = NULL; u_int ltflags; @@ -1411,6 +1392,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, */ KASSERT(SOLISTENING(so), ("%s: %p not listening", __func__, so)); tp = sototcpcb(so); + cred = V_tcp_syncache.see_other ? NULL : crhold(so->so_cred); #ifdef INET6 if (inc->inc_flags & INC_ISIPV6) { @@ -1639,16 +1621,16 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, /* * sc_cred is only used in syncache_pcblist() to list TCP endpoints in * TCPS_SYN_RECEIVED state when V_tcp_syncache.see_other is false. - * Therefore, store the credentials and take a reference count only - * when needed: + * Therefore, store the credentials only when needed: * - sc is allocated from the zone and not using the on stack instance. * - the sysctl variable net.inet.tcp.syncache.see_other is false. * The reference count is decremented when a zone allocated sc is * freed in syncache_free(). */ - if (sc != &scs && !V_tcp_syncache.see_other) - sc->sc_cred = crhold(so->so_cred); - else + if (sc != &scs && !V_tcp_syncache.see_other) { + sc->sc_cred = cred; + cred = NULL; + } else sc->sc_cred = NULL; sc->sc_port = port; sc->sc_ipopts = ipopts; @@ -1786,6 +1768,8 @@ donenoprobe: tcp_fastopen_decrement_counter(tfo_pending); tfo_expanded: + if (cred != NULL) + crfree(cred); if (sc == NULL || sc == &scs) { #ifdef MAC mac_syncache_destroy(&maclabel); diff --git a/sys/netinet/tcp_syncache.h b/sys/netinet/tcp_syncache.h index 37f6ff3d6ca9..c916b4de6ae0 100644 --- a/sys/netinet/tcp_syncache.h +++ b/sys/netinet/tcp_syncache.h @@ -45,7 +45,6 @@ struct socket * syncache_add(struct in_conninfo *, struct tcpopt *, void *, void *, uint8_t, uint16_t); void syncache_chkrst(struct in_conninfo *, struct tcphdr *, struct mbuf *, uint16_t); -void syncache_badack(struct in_conninfo *, uint16_t); int syncache_pcblist(struct sysctl_req *); struct syncache { diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index 3b9fe7a317b0..57c57666fa3a 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -513,9 +513,12 @@ tcp_timer_persist(struct tcpcb *tp) if (progdrop || (tp->t_rxtshift >= V_tcp_retries && (ticks - tp->t_rcvtime >= tcp_maxpersistidle || ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff))) { - if (!progdrop) + if (progdrop) { + tcp_log_end_status(tp, TCP_EI_STATUS_PROGRESS); + } else { TCPSTAT_INC(tcps_persistdrop); - tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); + tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); + } goto dropit; } /* diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 98c934955121..4d1a6455d09e 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -2799,258 +2799,6 @@ db_print_tstate(int t_state) } static void -db_print_tflags(u_int t_flags) -{ - int comma; - - comma = 0; - if (t_flags & TF_ACKNOW) { - db_printf("%sTF_ACKNOW", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_DELACK) { - db_printf("%sTF_DELACK", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_NODELAY) { - db_printf("%sTF_NODELAY", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_NOOPT) { - db_printf("%sTF_NOOPT", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_SENTFIN) { - db_printf("%sTF_SENTFIN", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_REQ_SCALE) { - db_printf("%sTF_REQ_SCALE", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_RCVD_SCALE) { - db_printf("%sTF_RECVD_SCALE", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_REQ_TSTMP) { - db_printf("%sTF_REQ_TSTMP", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_RCVD_TSTMP) { - db_printf("%sTF_RCVD_TSTMP", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_SACK_PERMIT) { - db_printf("%sTF_SACK_PERMIT", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_NEEDSYN) { - db_printf("%sTF_NEEDSYN", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_NEEDFIN) { - db_printf("%sTF_NEEDFIN", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_NOPUSH) { - db_printf("%sTF_NOPUSH", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_PREVVALID) { - db_printf("%sTF_PREVVALID", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_WAKESOR) { - db_printf("%sTF_WAKESOR", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_GPUTINPROG) { - db_printf("%sTF_GPUTINPROG", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_MORETOCOME) { - db_printf("%sTF_MORETOCOME", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_SONOTCONN) { - db_printf("%sTF_SONOTCONN", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_LASTIDLE) { - db_printf("%sTF_LASTIDLE", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_RXWIN0SENT) { - db_printf("%sTF_RXWIN0SENT", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_FASTRECOVERY) { - db_printf("%sTF_FASTRECOVERY", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_WASFRECOVERY) { - db_printf("%sTF_WASFRECOVERY", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_SIGNATURE) { - db_printf("%sTF_SIGNATURE", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_FORCEDATA) { - db_printf("%sTF_FORCEDATA", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_TSO) { - db_printf("%sTF_TSO", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_TOE) { - db_printf("%sTF_TOE", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_CLOSED) { - db_printf("%sTF_CLOSED", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_SENTSYN) { - db_printf("%sTF_SENTSYN", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_LRD) { - db_printf("%sTF_LRD", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_CONGRECOVERY) { - db_printf("%sTF_CONGRECOVERY", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_WASCRECOVERY) { - db_printf("%sTF_WASCRECOVERY", comma ? ", " : ""); - comma = 1; - } - if (t_flags & TF_FASTOPEN) { - db_printf("%sTF_FASTOPEN", comma ? ", " : ""); - comma = 1; - } -} - -static void -db_print_tflags2(u_int t_flags2) -{ - int comma; - - comma = 0; - if (t_flags2 & TF2_PLPMTU_BLACKHOLE) { - db_printf("%sTF2_PLPMTU_BLACKHOLE", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_PLPMTU_PMTUD) { - db_printf("%sTF2_PLPMTU_PMTUD", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_PLPMTU_MAXSEGSNT) { - db_printf("%sTF2_PLPMTU_MAXSEGSNT", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_LOG_AUTO) { - db_printf("%sTF2_LOG_AUTO", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_DROP_AF_DATA) { - db_printf("%sTF2_DROP_AF_DATA", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_ECN_PERMIT) { - db_printf("%sTF2_ECN_PERMIT", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_ECN_SND_CWR) { - db_printf("%sTF2_ECN_SND_CWR", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_ECN_SND_ECE) { - db_printf("%sTF2_ECN_SND_ECE", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_ACE_PERMIT) { - db_printf("%sTF2_ACE_PERMIT", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_HPTS_CPU_SET) { - db_printf("%sTF2_HPTS_CPU_SET", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_FBYTES_COMPLETE) { - db_printf("%sTF2_FBYTES_COMPLETE", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_ECN_USE_ECT1) { - db_printf("%sTF2_ECN_USE_ECT1", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_TCP_ACCOUNTING) { - db_printf("%sTF2_TCP_ACCOUNTING", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_HPTS_CALLS) { - db_printf("%sTF2_HPTS_CALLS", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_MBUF_L_ACKS) { - db_printf("%sTF2_MBUF_L_ACKS", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_MBUF_ACKCMP) { - db_printf("%sTF2_MBUF_ACKCMP", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_SUPPORTS_MBUFQ) { - db_printf("%sTF2_SUPPORTS_MBUFQ", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_MBUF_QUEUE_READY) { - db_printf("%sTF2_MBUF_QUEUE_READY", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_DONT_SACK_QUEUE) { - db_printf("%sTF2_DONT_SACK_QUEUE", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_CANNOT_DO_ECN) { - db_printf("%sTF2_CANNOT_DO_ECN", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_PROC_SACK_PROHIBIT) { - db_printf("%sTF2_PROC_SACK_PROHIBIT", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_IPSEC_TSO) { - db_printf("%sTF2_IPSEC_TSO", comma ? ", " : ""); - comma = 1; - } - if (t_flags2 & TF2_NO_ISS_CHECK) { - db_printf("%sTF2_NO_ISS_CHECK", comma ? ", " : ""); - comma = 1; - } -} - -static void -db_print_toobflags(char t_oobflags) -{ - int comma; - - comma = 0; - if (t_oobflags & TCPOOB_HAVEDATA) { - db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : ""); - comma = 1; - } - if (t_oobflags & TCPOOB_HADDATA) { - db_printf("%sTCPOOB_HADDATA", comma ? ", " : ""); - comma = 1; - } -} - -static void db_print_bblog_state(int state) { switch (state) { @@ -3088,7 +2836,8 @@ db_print_bblog_state(int state) } static void -db_print_tcpcb(struct tcpcb *tp, const char *name, int indent, bool show_bblog) +db_print_tcpcb(struct tcpcb *tp, const char *name, int indent, bool show_bblog, + bool show_inpcb) { db_print_indent(indent); @@ -3096,6 +2845,9 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent, bool show_bblog) indent += 2; + if (show_inpcb) + db_print_inpcb(tptoinpcb(tp), "t_inpcb", indent); + db_print_indent(indent); db_printf("t_segq first: %p t_segqlen: %d t_dupacks: %d\n", TAILQ_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks); @@ -3110,14 +2862,10 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent, bool show_bblog) db_printf(")\n"); db_print_indent(indent); - db_printf("t_flags: 0x%x (", tp->t_flags); - db_print_tflags(tp->t_flags); - db_printf(")\n"); + db_printf("t_flags: 0x%b\n", tp->t_flags, TF_BITS); db_print_indent(indent); - db_printf("t_flags2: 0x%x (", tp->t_flags2); - db_print_tflags2(tp->t_flags2); - db_printf(")\n"); + db_printf("t_flags2: 0x%b\n", tp->t_flags2, TF2_BITS); db_print_indent(indent); db_printf("snd_una: 0x%08x snd_max: 0x%08x snd_nxt: 0x%08x\n", @@ -3164,9 +2912,8 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent, bool show_bblog) tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror); db_print_indent(indent); - db_printf("t_oobflags: 0x%x (", tp->t_oobflags); - db_print_toobflags(tp->t_oobflags); - db_printf(") t_iobc: 0x%02x\n", tp->t_iobc); + db_printf("t_oobflags: 0x%b t_iobc: 0x%02x\n", tp->t_oobflags, + TCPOOB_BITS, tp->t_iobc); db_print_indent(indent); db_printf("snd_scale: %u rcv_scale: %u request_r_scale: %u\n", @@ -3227,33 +2974,36 @@ db_print_tcpcb(struct tcpcb *tp, const char *name, int indent, bool show_bblog) DB_SHOW_COMMAND(tcpcb, db_show_tcpcb) { struct tcpcb *tp; - bool show_bblog; + bool show_bblog, show_inpcb; if (!have_addr) { - db_printf("usage: show tcpcb <addr>\n"); + db_printf("usage: show tcpcb[/bi] <addr>\n"); return; } show_bblog = strchr(modif, 'b') != NULL; + show_inpcb = strchr(modif, 'i') != NULL; tp = (struct tcpcb *)addr; - - db_print_tcpcb(tp, "tcpcb", 0, show_bblog); + db_print_tcpcb(tp, "tcpcb", 0, show_bblog, show_inpcb); } DB_SHOW_ALL_COMMAND(tcpcbs, db_show_all_tcpcbs) { VNET_ITERATOR_DECL(vnet_iter); struct inpcb *inp; - bool only_locked, show_bblog; + struct tcpcb *tp; + bool only_locked, show_bblog, show_inpcb; only_locked = strchr(modif, 'l') != NULL; show_bblog = strchr(modif, 'b') != NULL; + show_inpcb = strchr(modif, 'i') != NULL; VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); CK_LIST_FOREACH(inp, &V_tcbinfo.ipi_listhead, inp_list) { if (only_locked && inp->inp_lock.rw_lock == RW_UNLOCKED) continue; - db_print_tcpcb(intotcpcb(inp), "tcpcb", 0, show_bblog); + tp = intotcpcb(inp); + db_print_tcpcb(tp, "tcpcb", 0, show_bblog, show_inpcb); if (db_pager_quit) break; } diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index c3be95c80798..f9297be46af7 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -795,6 +795,17 @@ tcp_packets_this_ack(struct tcpcb *tp, tcp_seq ack) #define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */ #define TF_FASTOPEN 0x80000000 /* TCP Fast Open indication */ +/* t_flags description for use with printf(9) %b identifier. */ +#define TF_BITS "\20" \ + "\1TF_ACKNOW\2TF_DELACK\3TF_NODELAY\4TF_NOOPT" \ + "\5TF_SENTFIN\6TF_REQ_SCALE\7TF_RCVD_SCALE\10TF_REQ_TSTMP" \ + "\11TF_RCVD_TSTMP\12TF_SACK_PERMIT\13TF_NEEDSYN\14TF_NEEDFIN" \ + "\15TF_NOPUSH\16TF_PREVVALID\17TF_WAKESOR\20TF_GPUTINPROG" \ + "\21TF_MORETOCOME\22TF_SONOTCONN\23TF_LASTIDLE\24TF_RXWIN0SENT" \ + "\25TF_FASTRECOVERY\26TF_WASFRECOVERY\27TF_SIGNATURE\30TF_FORCEDATA" \ + "\31TF_TSO\32TF_TOE\33TF_CLOSED\34TF_SENTSYN" \ + "\35TF_LRD\36TF_CONGRECOVERY\37TF_WASCRECOVERY\40TF_FASTOPEN" + #define IN_FASTRECOVERY(t_flags) (t_flags & TF_FASTRECOVERY) #define ENTER_FASTRECOVERY(t_flags) t_flags |= TF_FASTRECOVERY #define EXIT_FASTRECOVERY(t_flags) t_flags &= ~TF_FASTRECOVERY @@ -815,6 +826,9 @@ tcp_packets_this_ack(struct tcpcb *tp, tcp_seq ack) #define TCPOOB_HAVEDATA 0x01 #define TCPOOB_HADDATA 0x02 +/* t_oobflags description for use with printf(9) %b identifier. */ +#define TCPOOB_BITS "\20\1TCPOOB_HAVEDATA\2TCPOOB_HADDATA" + /* * Flags for the extended TCP flags field, t_flags2 */ @@ -842,6 +856,21 @@ tcp_packets_this_ack(struct tcpcb *tp, tcp_seq ack) #define TF2_IPSEC_TSO 0x00200000 /* IPSEC + TSO supported */ #define TF2_NO_ISS_CHECK 0x00400000 /* Don't check SEG.ACK against ISS */ +/* t_flags2 description for use with printf(9) %b identifier. */ +#define TF2_BITS "\20" \ + "\1TF2_PLPMTU_BLACKHOLE\2TF2_PLPMTU_PMTUD" \ + "\3TF2_PLPMTU_MAXSEGSNT\4TF2_LOG_AUTO" \ + "\5TF2_DROP_AF_DATA\6TF2_ECN_PERMIT" \ + "\7TF2_ECN_SND_CWR\10TF2_ECN_SND_ECE" \ + "\11TF2_ACE_PERMIT\12TF2_HPTS_CPU_SET" \ + "\13TF2_FBYTES_COMPLETE\14TF2_ECN_USE_ECT1" \ + "\15TF2_TCP_ACCOUNTING\16TF2_HPTS_CALLS" \ + "\17TF2_MBUF_L_ACKS\20TF2_MBUF_ACKCMP" \ + "\21TF2_SUPPORTS_MBUFQ\22TF2_MBUF_QUEUE_READY" \ + "\23TF2_DONT_SACK_QUEUE\24TF2_CANNOT_DO_ECN" \ + "\25TF2_PROC_SACK_PROHIBIT\26TF2_IPSEC_TSO" \ + "\27TF2_NO_ISS_CHECK" + /* * Structure to hold TCP options that are only used during segment * processing (in tcp_input), but not held in the tcpcb. diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index cea8a916679b..f1d952037d5a 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -787,7 +787,8 @@ udplite_ctlinput(struct icmp *icmp) static int udp_pcblist(SYSCTL_HANDLER_ARGS) { - struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_udbinfo, + struct inpcbinfo *pcbinfo = udp_get_inpcbinfo(arg2); + struct inpcb_iterator inpi = INP_ALL_ITERATOR(pcbinfo, INPLOOKUP_RLOCKPCB); struct xinpgen xig; struct inpcb *inp; @@ -799,7 +800,7 @@ udp_pcblist(SYSCTL_HANDLER_ARGS) if (req->oldptr == 0) { int n; - n = V_udbinfo.ipi_count; + n = pcbinfo->ipi_count; n += imax(n / 8, 10); req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb); return (0); @@ -810,8 +811,8 @@ udp_pcblist(SYSCTL_HANDLER_ARGS) bzero(&xig, sizeof(xig)); xig.xig_len = sizeof xig; - xig.xig_count = V_udbinfo.ipi_count; - xig.xig_gen = V_udbinfo.ipi_gencnt; + xig.xig_count = pcbinfo->ipi_count; + xig.xig_gen = pcbinfo->ipi_gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) @@ -838,9 +839,9 @@ udp_pcblist(SYSCTL_HANDLER_ARGS) * that something happened while we were processing this * request, and it might be necessary to retry. */ - xig.xig_gen = V_udbinfo.ipi_gencnt; + xig.xig_gen = pcbinfo->ipi_gencnt; xig.xig_sogen = so_gencnt; - xig.xig_count = V_udbinfo.ipi_count; + xig.xig_count = pcbinfo->ipi_count; error = SYSCTL_OUT(req, &xig, sizeof xig); } @@ -848,10 +849,15 @@ udp_pcblist(SYSCTL_HANDLER_ARGS) } SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, - CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, + CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, IPPROTO_UDP, udp_pcblist, "S,xinpcb", "List of active UDP sockets"); +SYSCTL_PROC(_net_inet_udplite, OID_AUTO, pcblist, + CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, IPPROTO_UDPLITE, + udp_pcblist, "S,xinpcb", + "List of active UDP-Lite sockets"); + #ifdef INET static int udp_getcred(SYSCTL_HANDLER_ARGS) @@ -1166,7 +1172,19 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, else INP_RLOCK(inp); NET_EPOCH_ENTER(et); +#ifdef INET6 + if ((flags & PRUS_IPV6) != 0) { + if ((inp->in6p_outputopts != NULL) && + (inp->in6p_outputopts->ip6po_tclass != -1)) + tos = (u_char)inp->in6p_outputopts->ip6po_tclass; + else + tos = 0; + } else { + tos = inp->inp_ip_tos; + } +#else tos = inp->inp_ip_tos; +#endif if (control != NULL) { /* * XXX: Currently, we assume all the optional information is @@ -1190,6 +1208,23 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, error = udp_v4mapped_pktinfo(cm, &src, inp, flags); if (error != 0) break; + if (((flags & PRUS_IPV6) != 0) && + (cm->cmsg_level == IPPROTO_IPV6) && + (cm->cmsg_type == IPV6_TCLASS)) { + int tclass; + + if (cm->cmsg_len != CMSG_LEN(sizeof(int))) { + error = EINVAL; + break; + } + tclass = *(int *)CMSG_DATA(cm); + if (tclass < -1 || tclass > 255) { + error = EINVAL; + break; + } + if (tclass != -1) + tos = (u_char)tclass; + } #endif if (cm->cmsg_level != IPPROTO_IP) continue; diff --git a/sys/netinet/udp_var.h b/sys/netinet/udp_var.h index 3895f365db3c..3ae08fc0b8f0 100644 --- a/sys/netinet/udp_var.h +++ b/sys/netinet/udp_var.h @@ -147,6 +147,7 @@ void kmod_udpstat_inc(int statnum); } while (0) SYSCTL_DECL(_net_inet_udp); +SYSCTL_DECL(_net_inet_udplite); VNET_DECLARE(struct inpcbinfo, udbinfo); VNET_DECLARE(struct inpcbinfo, ulitecbinfo); |
