diff options
Diffstat (limited to 'sys/netinet')
-rw-r--r-- | sys/netinet/icmp_var.h | 9 | ||||
-rw-r--r-- | sys/netinet/in_fib_dxr.c | 6 | ||||
-rw-r--r-- | sys/netinet/in_pcb.c | 17 | ||||
-rw-r--r-- | sys/netinet/in_pcb.h | 1 | ||||
-rw-r--r-- | sys/netinet/ip_icmp.c | 3 | ||||
-rw-r--r-- | sys/netinet/tcp_hpts.c | 73 | ||||
-rw-r--r-- | sys/netinet/tcp_hpts.h | 17 | ||||
-rw-r--r-- | sys/netinet/tcp_input.c | 48 | ||||
-rw-r--r-- | sys/netinet/tcp_log_buf.c | 2 | ||||
-rw-r--r-- | sys/netinet/tcp_log_buf.h | 14 | ||||
-rw-r--r-- | sys/netinet/tcp_lro.c | 12 | ||||
-rw-r--r-- | sys/netinet/tcp_sack.c | 2 | ||||
-rw-r--r-- | sys/netinet/tcp_stacks/bbr.c | 40 | ||||
-rw-r--r-- | sys/netinet/tcp_stacks/rack.c | 80 | ||||
-rw-r--r-- | sys/netinet/tcp_stacks/rack_bbr_common.c | 2 | ||||
-rw-r--r-- | sys/netinet/tcp_stacks/rack_pcm.c | 4 | ||||
-rw-r--r-- | sys/netinet/tcp_stacks/sack_filter.c | 8 | ||||
-rw-r--r-- | sys/netinet/tcp_stacks/sack_filter.h | 2 | ||||
-rw-r--r-- | sys/netinet/tcp_subr.c | 67 | ||||
-rw-r--r-- | sys/netinet/tcp_timer.c | 4 | ||||
-rw-r--r-- | sys/netinet/tcp_usrreq.c | 8 | ||||
-rw-r--r-- | sys/netinet/tcp_var.h | 26 |
22 files changed, 247 insertions, 198 deletions
diff --git a/sys/netinet/icmp_var.h b/sys/netinet/icmp_var.h index b1f2b0ebf911..d6b75e482e35 100644 --- a/sys/netinet/icmp_var.h +++ b/sys/netinet/icmp_var.h @@ -104,11 +104,10 @@ extern int badport_bandlim(int); #define BANDLIM_ICMP_UNREACH 0 #define BANDLIM_ICMP_ECHO 1 #define BANDLIM_ICMP_TSTAMP 2 -#define BANDLIM_RST_CLOSEDPORT 3 /* No connection, and no listeners */ -#define BANDLIM_RST_OPENPORT 4 /* No connection, listener */ -#define BANDLIM_ICMP6_UNREACH 5 -#define BANDLIM_SCTP_OOTB 6 -#define BANDLIM_MAX 7 +#define BANDLIM_TCP_RST 3 +#define BANDLIM_ICMP6_UNREACH 4 +#define BANDLIM_SCTP_OOTB 5 +#define BANDLIM_MAX 6 #endif #endif diff --git a/sys/netinet/in_fib_dxr.c b/sys/netinet/in_fib_dxr.c index b889131b544b..538cd43a88a3 100644 --- a/sys/netinet/in_fib_dxr.c +++ b/sys/netinet/in_fib_dxr.c @@ -345,7 +345,7 @@ initheap(struct dxr_aux *da, uint32_t dst_u32, uint32_t chunk) struct heap_entry *fhp = &da->heap[0]; struct rtentry *rt; struct route_nhop_data rnd; - + da->heap_index = 0; da->dst.sin_addr.s_addr = htonl(dst_u32); rt = fib4_lookup_rt(da->fibnum, da->dst.sin_addr, 0, NHR_UNLOCKED, @@ -1143,7 +1143,7 @@ dxr_destroy(void *data) free(da, M_DXRAUX); } -static void +static void epoch_dxr_destroy(epoch_context_t ctx) { struct dxr *dxr = __containerof(ctx, struct dxr, epoch_ctx); @@ -1202,7 +1202,7 @@ dxr_dump_end(void *data, struct fib_dp *dp) static enum flm_op_result dxr_dump_rib_item(struct rtentry *rt, void *data) { - + return (FLM_SUCCESS); } diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index bccd4b84561a..dbe48242381d 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -1745,6 +1745,23 @@ in_pcbrele(struct inpcb *inp, const inp_lookup_t lock) } /* + * Dereference and rlock inp, for which the caller must own the + * reference. Returns true if inp no longer usable, false otherwise. + */ +bool +in_pcbrele_rlock(struct inpcb *inp) +{ + INP_RLOCK(inp); + if (in_pcbrele_rlocked(inp)) + return (true); + if ((inp->inp_flags & INP_FREED) != 0) { + INP_RUNLOCK(inp); + return (true); + } + return (false); +} + +/* * Unconditionally schedule an inpcb to be freed by decrementing its * reference count, which should occur only after the inpcb has been detached * from its socket. If another thread holds a temporary reference (acquired diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 57cf15ca37fc..9e0618e87601 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -681,6 +681,7 @@ void in_pcbref(struct inpcb *); bool in_pcbrele(struct inpcb *, inp_lookup_t); bool in_pcbrele_rlocked(struct inpcb *); bool in_pcbrele_wlocked(struct inpcb *); +bool in_pcbrele_rlock(struct inpcb *inp); typedef bool inp_match_t(const struct inpcb *, void *); struct inpcb_iterator { diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c index cb4b6df57c57..71b75d18efd0 100644 --- a/sys/netinet/ip_icmp.c +++ b/sys/netinet/ip_icmp.c @@ -1097,8 +1097,7 @@ static const char *icmp_rate_descrs[BANDLIM_MAX] = { [BANDLIM_ICMP_UNREACH] = "icmp unreach", [BANDLIM_ICMP_ECHO] = "icmp ping", [BANDLIM_ICMP_TSTAMP] = "icmp tstamp", - [BANDLIM_RST_CLOSEDPORT] = "closed port RST", - [BANDLIM_RST_OPENPORT] = "open port RST", + [BANDLIM_TCP_RST] = "tcp reset", [BANDLIM_ICMP6_UNREACH] = "icmp6 unreach", [BANDLIM_SCTP_OOTB] = "sctp ootb", }; diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c index 91f8251589e4..b60cdf45af52 100644 --- a/sys/netinet/tcp_hpts.c +++ b/sys/netinet/tcp_hpts.c @@ -433,38 +433,40 @@ static void tcp_hpts_log(struct tcp_hpts_entry *hpts, struct tcpcb *tp, struct timeval *tv, int slots_to_run, int idx, bool from_callout) { - union tcp_log_stackspecific log; - /* - * Unused logs are - * 64 bit - delRate, rttProp, bw_inuse - * 16 bit - cwnd_gain - * 8 bit - bbr_state, bbr_substate, inhpts; - */ - memset(&log, 0, sizeof(log)); - log.u_bbr.flex1 = hpts->p_nxt_slot; - log.u_bbr.flex2 = hpts->p_cur_slot; - log.u_bbr.flex3 = hpts->p_prev_slot; - log.u_bbr.flex4 = idx; - log.u_bbr.flex5 = hpts->p_curtick; - log.u_bbr.flex6 = hpts->p_on_queue_cnt; - log.u_bbr.flex7 = hpts->p_cpu; - log.u_bbr.flex8 = (uint8_t)from_callout; - log.u_bbr.inflight = slots_to_run; - log.u_bbr.applimited = hpts->overidden_sleep; - log.u_bbr.delivered = hpts->saved_curtick; - log.u_bbr.timeStamp = tcp_tv_to_usectick(tv); - log.u_bbr.epoch = hpts->saved_curslot; - log.u_bbr.lt_epoch = hpts->saved_prev_slot; - log.u_bbr.pkts_out = hpts->p_delayed_by; - log.u_bbr.lost = hpts->p_hpts_sleep_time; - log.u_bbr.pacing_gain = hpts->p_cpu; - log.u_bbr.pkt_epoch = hpts->p_runningslot; - log.u_bbr.use_lt_bw = 1; - TCP_LOG_EVENTP(tp, NULL, - &tptosocket(tp)->so_rcv, - &tptosocket(tp)->so_snd, - BBR_LOG_HPTSDIAG, 0, - 0, &log, false, tv); + if (hpts_does_tp_logging && tcp_bblogging_on(tp)) { + union tcp_log_stackspecific log; + /* + * Unused logs are + * 64 bit - delRate, rttProp, bw_inuse + * 16 bit - cwnd_gain + * 8 bit - bbr_state, bbr_substate, inhpts; + */ + memset(&log, 0, sizeof(log)); + log.u_bbr.flex1 = hpts->p_nxt_slot; + log.u_bbr.flex2 = hpts->p_cur_slot; + log.u_bbr.flex3 = hpts->p_prev_slot; + log.u_bbr.flex4 = idx; + log.u_bbr.flex5 = hpts->p_curtick; + log.u_bbr.flex6 = hpts->p_on_queue_cnt; + log.u_bbr.flex7 = hpts->p_cpu; + log.u_bbr.flex8 = (uint8_t)from_callout; + log.u_bbr.inflight = slots_to_run; + log.u_bbr.applimited = hpts->overidden_sleep; + log.u_bbr.delivered = hpts->saved_curtick; + log.u_bbr.timeStamp = tcp_tv_to_usectick(tv); + log.u_bbr.epoch = hpts->saved_curslot; + log.u_bbr.lt_epoch = hpts->saved_prev_slot; + log.u_bbr.pkts_out = hpts->p_delayed_by; + log.u_bbr.lost = hpts->p_hpts_sleep_time; + log.u_bbr.pacing_gain = hpts->p_cpu; + log.u_bbr.pkt_epoch = hpts->p_runningslot; + log.u_bbr.use_lt_bw = 1; + TCP_LOG_EVENTP(tp, NULL, + &tptosocket(tp)->so_rcv, + &tptosocket(tp)->so_snd, + BBR_LOG_HPTSDIAG, 0, + 0, &log, false, tv); + } } static void @@ -1353,10 +1355,7 @@ again: } CURVNET_SET(inp->inp_vnet); /* Lets do any logging that we might want to */ - if (hpts_does_tp_logging && tcp_bblogging_on(tp)) { - tcp_hpts_log(hpts, tp, &tv, slots_to_run, i, - from_callout); - } + tcp_hpts_log(hpts, tp, &tv, slots_to_run, i, from_callout); if (tp->t_fb_ptr != NULL) { kern_prefetch(tp->t_fb_ptr, &did_prefetch); @@ -1487,7 +1486,7 @@ no_run: } void -__tcp_set_hpts(struct tcpcb *tp, int32_t line) +tcp_set_hpts(struct tcpcb *tp) { struct tcp_hpts_entry *hpts; int failed; diff --git a/sys/netinet/tcp_hpts.h b/sys/netinet/tcp_hpts.h index b097a2b98db9..f5856ed8e688 100644 --- a/sys/netinet/tcp_hpts.h +++ b/sys/netinet/tcp_hpts.h @@ -149,8 +149,7 @@ uint32_t tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, #define tcp_hpts_insert(inp, slot) \ tcp_hpts_insert_diag((inp), (slot), __LINE__, NULL) -void __tcp_set_hpts(struct tcpcb *tp, int32_t line); -#define tcp_set_hpts(a) __tcp_set_hpts(a, __LINE__) +void tcp_set_hpts(struct tcpcb *tp); void tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason); @@ -165,25 +164,25 @@ extern int32_t tcp_min_hptsi_time; * The following functions should also be available * to userspace as well. */ -static __inline uint32_t +static inline uint32_t tcp_tv_to_hptstick(const struct timeval *sv) { return ((sv->tv_sec * 100000) + (sv->tv_usec / HPTS_TICKS_PER_SLOT)); } -static __inline uint32_t +static inline uint32_t tcp_tv_to_usectick(const struct timeval *sv) { return ((uint32_t) ((sv->tv_sec * HPTS_USEC_IN_SEC) + sv->tv_usec)); } -static __inline uint32_t +static inline uint32_t tcp_tv_to_mssectick(const struct timeval *sv) { return ((uint32_t) ((sv->tv_sec * HPTS_MSEC_IN_SEC) + (sv->tv_usec/HPTS_USEC_IN_MSEC))); } -static __inline uint64_t +static inline uint64_t tcp_tv_to_lusectick(const struct timeval *sv) { return ((uint64_t)((sv->tv_sec * HPTS_USEC_IN_SEC) + sv->tv_usec)); @@ -199,7 +198,7 @@ get_hpts_min_sleep_time(void) return (tcp_min_hptsi_time + HPTS_TICKS_PER_SLOT); } -static __inline uint32_t +static inline uint32_t tcp_gethptstick(struct timeval *sv) { struct timeval tv; @@ -210,7 +209,7 @@ tcp_gethptstick(struct timeval *sv) return (tcp_tv_to_hptstick(sv)); } -static __inline uint64_t +static inline uint64_t tcp_get_u64_usecs(struct timeval *tv) { struct timeval tvd; @@ -221,7 +220,7 @@ tcp_get_u64_usecs(struct timeval *tv) return (tcp_tv_to_lusectick(tv)); } -static __inline uint32_t +static inline uint32_t tcp_get_usecs(struct timeval *tv) { struct timeval tvd; diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 7c032e13f37a..de428ae1af6f 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -621,6 +621,7 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port) #endif /* INET6 */ struct tcpopt to; /* options in this segment */ char *s = NULL; /* address and port logging */ + bool closed_port = false; /* segment is hitting a closed port */ NET_EPOCH_ASSERT(); @@ -907,7 +908,8 @@ findpcb: log(LOG_INFO, "%s; %s: Connection attempt " "to closed port\n", s, __func__); } - rstreason = BANDLIM_RST_CLOSEDPORT; + rstreason = BANDLIM_TCP_RST; + closed_port = true; goto dropwithreset; } INP_LOCK_ASSERT(inp); @@ -998,12 +1000,14 @@ findpcb: * down or it is in the CLOSED state. Either way we drop the * segment and send an appropriate response. */ - rstreason = BANDLIM_RST_CLOSEDPORT; + rstreason = BANDLIM_TCP_RST; + closed_port = true; goto dropwithreset; } if ((tp->t_port != port) && (tp->t_state > TCPS_LISTEN)) { - rstreason = BANDLIM_RST_CLOSEDPORT; + rstreason = BANDLIM_TCP_RST; + closed_port = true; goto dropwithreset; } @@ -1055,6 +1059,8 @@ findpcb: * socket appended to the listen queue in SYN_RECEIVED state. */ if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) { + int result; + /* * Parse the TCP options here because * syncookies need access to the reflected @@ -1064,8 +1070,8 @@ findpcb: /* * NB: syncache_expand() doesn't unlock inp. */ - rstreason = syncache_expand(&inc, &to, th, &so, m, port); - if (rstreason < 0) { + result = syncache_expand(&inc, &to, th, &so, m, port); + if (result < 0) { /* * A failing TCP MD5 signature comparison * must result in the segment being dropped @@ -1073,7 +1079,7 @@ findpcb: * to the sender. */ goto dropunlock; - } else if (rstreason == 0) { + } else if (result == 0) { /* * No syncache entry, or ACK was not for our * SYN/ACK. Do our protection against double @@ -1092,7 +1098,7 @@ findpcb: * of the failure cause. */ INP_WUNLOCK(inp); - rstreason = BANDLIM_RST_OPENPORT; + rstreason = BANDLIM_TCP_RST; lookupflag &= ~INPLOOKUP_WILDCARD; goto findpcb; } @@ -1183,7 +1189,7 @@ tfo_socket_result: s, __func__); syncache_badack(&inc, port); /* XXX: Not needed! */ TCPSTAT_INC(tcps_badsyn); - rstreason = BANDLIM_RST_OPENPORT; + rstreason = BANDLIM_TCP_RST; goto dropwithreset; } /* @@ -1259,7 +1265,7 @@ tfo_socket_result: "Connection attempt to deprecated " "IPv6 address rejected\n", s, __func__); - rstreason = BANDLIM_RST_OPENPORT; + rstreason = BANDLIM_TCP_RST; goto dropwithreset; } } @@ -1380,9 +1386,10 @@ dropwithreset: * When blackholing do not respond with a RST but * completely ignore the segment and drop it. */ - if (((rstreason == BANDLIM_RST_OPENPORT && V_blackhole == 3) || - (rstreason == BANDLIM_RST_CLOSEDPORT && - ((V_blackhole == 1 && (thflags & TH_SYN)) || V_blackhole > 1))) && + if (rstreason == BANDLIM_TCP_RST && + ((!closed_port && V_blackhole == 3) || + (closed_port && + ((V_blackhole == 1 && (thflags & TH_SYN)) || V_blackhole > 1))) && (V_blackhole_local || ( #ifdef INET6 isipv6 ? !in6_localip(&ip6->ip6_src) : @@ -1515,7 +1522,9 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, struct tcpopt to; int tfo_syn; u_int maxseg = 0; + bool no_data; + no_data = (tlen == 0); thflags = tcp_get_flags(th); tp->sackhint.last_sack_ack = 0; sack_changed = SACK_NOCHANGE; @@ -1754,7 +1763,7 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, tp->ts_recent = to.to_tsval; } - if (tlen == 0) { + if (no_data) { if (SEQ_GT(th->th_ack, tp->snd_una) && SEQ_LEQ(th->th_ack, tp->snd_max) && !IN_RECOVERY(tp->t_flags) && @@ -1963,7 +1972,7 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, if ((thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))) { - rstreason = BANDLIM_RST_OPENPORT; + rstreason = BANDLIM_TCP_RST; tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); goto dropwithreset; } @@ -1976,7 +1985,7 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, * FIN, or a RST. */ if ((thflags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)) { - rstreason = BANDLIM_RST_OPENPORT; + rstreason = BANDLIM_TCP_RST; tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); goto dropwithreset; } else if (thflags & TH_SYN) { @@ -2244,7 +2253,7 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, * for the "LAND" DoS attack. */ if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) { - rstreason = BANDLIM_RST_OPENPORT; + rstreason = BANDLIM_TCP_RST; tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); goto dropwithreset; } @@ -2557,7 +2566,7 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, if (SEQ_LEQ(th->th_ack, tp->snd_una)) { maxseg = tcp_maxseg(tp); - if (tlen == 0 && + if (no_data && (tiwin == tp->snd_wnd || (tp->t_flags & TF_SACK_PERMIT))) { /* @@ -3113,8 +3122,7 @@ step6: (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) || (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) { /* keep track of pure window updates */ - if (tlen == 0 && - tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) + if (no_data && tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) TCPSTAT_INC(tcps_rcvwinupd); tp->snd_wnd = tiwin; tp->snd_wl1 = th->th_seq; @@ -3424,7 +3432,7 @@ dropafterack: if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) && (SEQ_GT(tp->snd_una, th->th_ack) || SEQ_GT(th->th_ack, tp->snd_max)) ) { - rstreason = BANDLIM_RST_OPENPORT; + rstreason = BANDLIM_TCP_RST; tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); goto dropwithreset; } diff --git a/sys/netinet/tcp_log_buf.c b/sys/netinet/tcp_log_buf.c index 75d693bc019b..e24790ece43d 100644 --- a/sys/netinet/tcp_log_buf.c +++ b/sys/netinet/tcp_log_buf.c @@ -2878,7 +2878,7 @@ tcp_log_sendfile(struct socket *so, off_t offset, size_t nbytes, int flags) /* double check log state now that we have the lock */ if (inp->inp_flags & INP_DROPPED) goto done; - if (tp->_t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(tp)) { struct timeval tv; tcp_log_eventspecific_t log; diff --git a/sys/netinet/tcp_log_buf.h b/sys/netinet/tcp_log_buf.h index fef32e16b2e4..f8c064b6a104 100644 --- a/sys/netinet/tcp_log_buf.h +++ b/sys/netinet/tcp_log_buf.h @@ -377,12 +377,12 @@ extern int32_t tcp_trace_point_count; /* * Returns true if any sort of BB logging is enabled, - * commonly used throughout the codebase. + * commonly used throughout the codebase. */ static inline int tcp_bblogging_on(struct tcpcb *tp) { - if (tp->_t_logstate <= TCP_LOG_STATE_OFF) + if (tp->_t_logstate <= TCP_LOG_STATE_OFF) return (0); if (tp->_t_logstate == TCP_LOG_VIA_BBPOINTS) return (0); @@ -427,7 +427,7 @@ tcp_set_bblog_state(struct tcpcb *tp, uint8_t ls, uint8_t bbpoint) } } -static inline uint32_t +static inline uint32_t tcp_get_bblog_state(struct tcpcb *tp) { return (tp->_t_logstate); @@ -539,12 +539,12 @@ struct tcpcb; NULL, NULL, 0, NULL); \ } while (0) #endif /* TCP_LOG_FORCEVERBOSE */ +/* Assumes/requires the caller has already checked tcp_bblogging_on(tp). */ #define TCP_LOG_EVENTP(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder, tv) \ do { \ - if (tcp_bblogging_on(tp)) \ - tcp_log_event(tp, th, rxbuf, txbuf, eventid, \ - errornum, len, stackinfo, th_hostorder, \ - NULL, NULL, 0, tv); \ + KASSERT(tcp_bblogging_on(tp), ("bblogging is off")); \ + tcp_log_event(tp, th, rxbuf, txbuf, eventid, errornum, len, \ + stackinfo, th_hostorder, NULL, NULL, 0, tv); \ } while (0) #ifdef TCP_BLACKBOX diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c index 10afed17bf3b..7512679bd4e9 100644 --- a/sys/netinet/tcp_lro.c +++ b/sys/netinet/tcp_lro.c @@ -1301,9 +1301,9 @@ tcp_lro_rx_common(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, bool use_h return (TCP_LRO_CANNOT); #endif if (((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) != - ((CSUM_DATA_VALID | CSUM_PSEUDO_HDR))) || + ((CSUM_DATA_VALID | CSUM_PSEUDO_HDR))) || (m->m_pkthdr.csum_data != 0xffff)) { - /* + /* * The checksum either did not have hardware offload * or it was a bad checksum. We can't LRO such * a packet. @@ -1334,7 +1334,7 @@ tcp_lro_rx_common(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, bool use_h #endif /* If no hardware or arrival stamp on the packet add timestamp */ if ((m->m_flags & (M_TSTMP_LRO | M_TSTMP)) == 0) { - m->m_pkthdr.rcv_tstmp = bintime2ns(&lc->lro_last_queue_time); + m->m_pkthdr.rcv_tstmp = bintime2ns(&lc->lro_last_queue_time); m->m_flags |= M_TSTMP_LRO; } @@ -1429,9 +1429,9 @@ tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum) int error; if (((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) != - ((CSUM_DATA_VALID | CSUM_PSEUDO_HDR))) || + ((CSUM_DATA_VALID | CSUM_PSEUDO_HDR))) || (m->m_pkthdr.csum_data != 0xffff)) { - /* + /* * The checksum either did not have hardware offload * or it was a bad checksum. We can't LRO such * a packet. @@ -1481,7 +1481,7 @@ tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb) ((mb->m_flags & M_TSTMP) == 0)) { /* Add in an LRO time since no hardware */ binuptime(&lc->lro_last_queue_time); - mb->m_pkthdr.rcv_tstmp = bintime2ns(&lc->lro_last_queue_time); + mb->m_pkthdr.rcv_tstmp = bintime2ns(&lc->lro_last_queue_time); mb->m_flags |= M_TSTMP_LRO; } diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c index 90d789f0e224..4405098a8620 100644 --- a/sys/netinet/tcp_sack.c +++ b/sys/netinet/tcp_sack.c @@ -744,7 +744,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) while (cur != NULL) { if (!(sblkp >= sack_blocks)) { if (((loss_sblks >= tcprexmtthresh) || - (loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg))) + (loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg))) break; loss_thresh += loss_hiack - cur->end; loss_hiack = cur->start; diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c index e2cfec5c9275..b232d3f08fe6 100644 --- a/sys/netinet/tcp_stacks/bbr.c +++ b/sys/netinet/tcp_stacks/bbr.c @@ -5126,8 +5126,8 @@ bbr_timeout_rxt(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts) tp->t_maxseg = tp->t_pmtud_saved_maxseg; if (tp->t_maxseg < V_tcp_mssdflt) { /* - * The MSS is so small we should not - * process incoming SACK's since we are + * The MSS is so small we should not + * process incoming SACK's since we are * subject to attack in such a case. */ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; @@ -8763,7 +8763,7 @@ bbr_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so, (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } if ((thflags & (TH_ACK | TH_RST)) == (TH_ACK | TH_RST)) { @@ -8965,7 +8965,7 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, (SEQ_LEQ(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } if (tp->t_flags & TF_FASTOPEN) { @@ -8977,7 +8977,7 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if ((thflags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } else if (thflags & TH_SYN) { /* non-initial SYN is ignored */ @@ -9010,7 +9010,7 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if (SEQ_LT(th->th_seq, tp->irs)) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } if (ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) { @@ -9288,7 +9288,7 @@ bbr_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so, if (sbavail(&so->so_snd)) { if (ctf_progress_timeout_check(tp, true)) { bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -9385,7 +9385,7 @@ bbr_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so, if (sbavail(&so->so_snd)) { if (ctf_progress_timeout_check(tp, true)) { bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -9535,7 +9535,7 @@ bbr_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so, if (sbavail(&so->so_snd)) { if (ctf_progress_timeout_check(tp, true)) { bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -9637,7 +9637,7 @@ bbr_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so, if (sbavail(&so->so_snd)) { if (ctf_progress_timeout_check(tp, true)) { bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -9739,7 +9739,7 @@ bbr_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so, if (sbavail(&so->so_snd)) { if (ctf_progress_timeout_check(tp, true)) { bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -9848,7 +9848,7 @@ bbr_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so, if (sbavail(&so->so_snd)) { if (ctf_progress_timeout_check(tp, true)) { bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -10141,7 +10141,7 @@ bbr_init(struct tcpcb *tp, void **ptr) * flags. */ bbr_stop_all_timers(tp, bbr); - /* + /* * Validate the timers are not in usec, if they are convert. * BBR should in theory move to USEC and get rid of a * lot of the TICKS_2 calls.. but for now we stay @@ -11510,7 +11510,7 @@ bbr_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } if (tiwin > bbr->r_ctl.rc_high_rwnd) @@ -11544,7 +11544,7 @@ bbr_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, bbr_check_bbr_for_state(bbr, cts, __LINE__, (bbr->r_ctl.rc_lost - lost)); if (nxt_pkt == 0) { if ((bbr->r_wanted_output != 0) || - (tp->t_flags & TF_ACKNOW)) { + (tp->t_flags & TF_ACKNOW)) { bbr->rc_output_starts_timer = 0; did_out = 1; @@ -13172,11 +13172,7 @@ send: mb, moff, &len, if_hw_tsomaxsegcount, if_hw_tsomaxsegsize, msb, - ((rsm == NULL) ? hw_tls : 0) -#ifdef NETFLIX_COPY_ARGS - , NULL, NULL -#endif - ); + ((rsm == NULL) ? hw_tls : 0)); if (len <= maxseg) { /* * Must have ran out of mbufs for the copy @@ -13806,8 +13802,8 @@ nomore: tp->t_maxseg = old_maxseg - 40; if (tp->t_maxseg < V_tcp_mssdflt) { /* - * The MSS is so small we should not - * process incoming SACK's since we are + * The MSS is so small we should not + * process incoming SACK's since we are * subject to attack in such a case. */ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index 8e05498863b9..940a4024bb73 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -40,7 +40,6 @@ #endif #include <sys/lock.h> #include <sys/malloc.h> -#include <sys/lock.h> #include <sys/mutex.h> #include <sys/mbuf.h> #include <sys/proc.h> /* for proc0 declaration */ @@ -198,7 +197,7 @@ static uint32_t rack_pcm_blast = 0; static uint32_t rack_pcm_is_enabled = 1; static uint8_t rack_ssthresh_rest_rto_rec = 0; /* Do we restore ssthresh when we have rec -> rto -> rec */ -static uint32_t rack_gp_gain_req = 1200; /* Amount percent wise required to gain to record a round has "gaining" */ +static uint32_t rack_gp_gain_req = 1200; /* Amount percent wise required to gain to record a round as "gaining" */ static uint32_t rack_rnd_cnt_req = 0x10005; /* Default number of rounds if we are below rack_gp_gain_req where we exit ss */ @@ -938,7 +937,7 @@ rack_init_sysctls(void) SYSCTL_ADD_U32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_probertt), OID_AUTO, "time_between", CTLFLAG_RW, - & rack_time_between_probertt, 96000000, + &rack_time_between_probertt, 96000000, "How many useconds between the lowest rtt falling must past before we enter probertt"); SYSCTL_ADD_U32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_probertt), @@ -3480,9 +3479,9 @@ static void rack_free(struct tcp_rack *rack, struct rack_sendmap *rsm) { if (rsm->r_flags & RACK_APP_LIMITED) { - if (rack->r_ctl.rc_app_limited_cnt > 0) { - rack->r_ctl.rc_app_limited_cnt--; - } + KASSERT((rack->r_ctl.rc_app_limited_cnt > 0), + ("app_cnt %u, rsm %p", rack->r_ctl.rc_app_limited_cnt, rsm)); + rack->r_ctl.rc_app_limited_cnt--; } if (rsm->r_limit_type) { /* currently there is only one limit type */ @@ -3554,8 +3553,7 @@ rack_get_measure_window(struct tcpcb *tp, struct tcp_rack *rack) * earlier. * * So lets calculate the BDP with the "known" b/w using - * the SRTT has our rtt and then multiply it by the - * goal. + * the SRTT as our rtt and then multiply it by the goal. */ bw = rack_get_bw(rack); srtt = (uint64_t)tp->t_srtt; @@ -5793,7 +5791,7 @@ rack_cong_signal(struct tcpcb *tp, uint32_t type, uint32_t ack, int line) tp->t_badrxtwin = 0; break; } - if ((CC_ALGO(tp)->cong_signal != NULL) && + if ((CC_ALGO(tp)->cong_signal != NULL) && (type != CC_RTO)){ tp->t_ccv.curack = ack; CC_ALGO(tp)->cong_signal(&tp->t_ccv, type); @@ -5904,7 +5902,7 @@ rack_calc_thresh_rack(struct tcp_rack *rack, uint32_t srtt, uint32_t cts, int li * * If reorder-fade is configured, then we track the last time we saw * re-ordering occur. If we reach the point where enough time as - * passed we no longer consider reordering has occuring. + * passed we no longer consider reordering as occurring. * * Or if reorder-face is 0, then once we see reordering we consider * the connection to alway be subject to reordering and just set lro @@ -7045,6 +7043,9 @@ rack_clone_rsm(struct tcp_rack *rack, struct rack_sendmap *nrsm, /* Push bit must go to the right edge as well */ if (rsm->r_flags & RACK_HAD_PUSH) rsm->r_flags &= ~RACK_HAD_PUSH; + /* Update the count if app limited */ + if (nrsm->r_flags & RACK_APP_LIMITED) + rack->r_ctl.rc_app_limited_cnt++; /* Clone over the state of the hw_tls flag */ nrsm->r_hw_tls = rsm->r_hw_tls; /* @@ -7096,7 +7097,7 @@ rack_merge_rsm(struct tcp_rack *rack, l_rsm->r_flags |= RACK_TLP; if (r_rsm->r_flags & RACK_RWND_COLLAPSED) l_rsm->r_flags |= RACK_RWND_COLLAPSED; - if ((r_rsm->r_flags & RACK_APP_LIMITED) && + if ((r_rsm->r_flags & RACK_APP_LIMITED) && ((l_rsm->r_flags & RACK_APP_LIMITED) == 0)) { /* * If both are app-limited then let the @@ -7887,8 +7888,8 @@ drop_it: tp->t_maxseg = tp->t_pmtud_saved_maxseg; if (tp->t_maxseg < V_tcp_mssdflt) { /* - * The MSS is so small we should not - * process incoming SACK's since we are + * The MSS is so small we should not + * process incoming SACK's since we are * subject to attack in such a case. */ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; @@ -8137,7 +8138,7 @@ rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack, * remove the lost desgination and reduce the * bytes considered lost. */ - rsm->r_flags &= ~RACK_WAS_LOST; + rsm->r_flags &= ~RACK_WAS_LOST; KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) @@ -8832,7 +8833,7 @@ rack_apply_updated_usrtt(struct tcp_rack *rack, uint32_t us_rtt, uint32_t us_cts val = rack_probertt_lower_within * rack_time_between_probertt; val /= 100; - if ((rack->in_probe_rtt == 0) && + if ((rack->in_probe_rtt == 0) && (rack->rc_skip_timely == 0) && ((us_cts - rack->r_ctl.rc_lower_rtt_us_cts) >= (rack_time_between_probertt - val))) { rack_enter_probertt(rack, us_cts); @@ -10369,7 +10370,7 @@ more: * and yet before retransmitting we get an ack * which can happen due to reordering. */ - rsm->r_flags &= ~RACK_WAS_LOST; + rsm->r_flags &= ~RACK_WAS_LOST; KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) @@ -11065,7 +11066,7 @@ rack_strike_dupack(struct tcp_rack *rack, tcp_seq th_ack) * We need to skip anything already set * to be retransmitted. */ - if ((rsm->r_dupack >= DUP_ACK_THRESHOLD) || + if ((rsm->r_dupack >= DUP_ACK_THRESHOLD) || (rsm->r_flags & RACK_MUST_RXT)) { rsm = TAILQ_NEXT(rsm, r_tnext); continue; @@ -12875,7 +12876,7 @@ rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so, (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } if ((thflags & (TH_ACK | TH_RST)) == (TH_ACK | TH_RST)) { @@ -13089,7 +13090,7 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, (SEQ_LEQ(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } if (tp->t_flags & TF_FASTOPEN) { @@ -13102,7 +13103,7 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if ((thflags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } else if (thflags & TH_SYN) { /* non-initial SYN is ignored */ @@ -13136,7 +13137,7 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if (SEQ_LT(th->th_seq, tp->irs)) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } if (ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) { @@ -13399,7 +13400,7 @@ rack_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so, if (sbavail(&so->so_snd)) { if (ctf_progress_timeout_check(tp, true)) { rack_log_progress_event(rack, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -13495,7 +13496,7 @@ rack_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so, if (ctf_progress_timeout_check(tp, true)) { rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -13645,7 +13646,7 @@ rack_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so, if (ctf_progress_timeout_check(tp, true)) { rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -13746,7 +13747,7 @@ rack_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so, if (ctf_progress_timeout_check(tp, true)) { rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -13848,7 +13849,7 @@ rack_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so, if (ctf_progress_timeout_check(tp, true)) { rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -13952,7 +13953,7 @@ rack_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so, if (ctf_progress_timeout_check(tp, true)) { rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -14637,9 +14638,6 @@ rack_init(struct tcpcb *tp, void **ptr) if (rack->r_ctl.pcm_s == NULL) { rack->r_ctl.pcm_i.cnt_alloc = 0; } -#ifdef NETFLIX_STATS - rack->r_ctl.side_chan_dis_mask = tcp_sidechannel_disable_mask; -#endif rack->r_ctl.rack_per_upper_bound_ss = (uint8_t)rack_per_upper_bound_ss; rack->r_ctl.rack_per_upper_bound_ca = (uint8_t)rack_per_upper_bound_ca; if (rack_enable_shared_cwnd) @@ -15563,7 +15561,7 @@ rack_log_pcm(struct tcp_rack *rack, uint8_t mod, uint32_t flex1, uint32_t flex2, if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; - + (void)tcp_get_usecs(&tv); memset(&log, 0, sizeof(log)); log.u_bbr.timeStamp = tcp_tv_to_usectick(&tv); @@ -16655,7 +16653,7 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); #ifdef TCP_ACCOUNTING sched_unpin(); #endif @@ -16919,7 +16917,7 @@ do_output_now: } else if ((nxt_pkt == 0) && (tp->t_flags & TF_ACKNOW)) { goto do_output_now; } else if ((no_output == 1) && - (nxt_pkt == 0) && + (nxt_pkt == 0) && (tcp_in_hpts(rack->rc_tp) == 0)) { /* * We are not in hpts and we had a pacing timer up. Use @@ -17546,7 +17544,7 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str rack->r_ctl.rc_last_us_rtt, 88, __LINE__, NULL, gain); } - if (((bw_est == 0) || (rate_wanted == 0) || (rack->gp_ready == 0)) && + if (((bw_est == 0) || (rate_wanted == 0) || (rack->gp_ready == 0)) && (rack->use_fixed_rate == 0)) { /* * No way yet to make a b/w estimate or @@ -17986,7 +17984,7 @@ start_set: tp->gput_ack = tp->gput_seq + rack_get_measure_window(tp, rack); rack->r_ctl.rc_gp_cumack_ts = 0; if ((rack->r_ctl.cleared_app_ack == 1) && - (SEQ_GEQ(rack->r_ctl.cleared_app_ack, tp->gput_seq))) { + (SEQ_GEQ(tp->gput_seq, rack->r_ctl.cleared_app_ack_seq))) { /* * We just cleared an application limited period * so the next seq out needs to skip the first @@ -19914,7 +19912,7 @@ rack_output(struct tcpcb *tp) goto nomore; } else { /* Return == 0, if there is more we can send tot_len wise fall through and send */ - if (tot_len_this_send >= pace_max_seg) + if (tot_len_this_send >= pace_max_seg) return (ret); #ifdef TCP_ACCOUNTING /* We need to re-pin since fast_output un-pined */ @@ -20043,7 +20041,7 @@ again: rack->r_ctl.pcm_max_seg = ctf_fixed_maxseg(tp) * 10; } } - if ((rack->r_ctl.pcm_max_seg != 0) && (rack->pcm_needed == 1)) { + if ((rack->r_ctl.pcm_max_seg != 0) && (rack->pcm_needed == 1)) { uint32_t rw_avail, cwa; if (tp->snd_wnd > ctf_outstanding(tp)) @@ -21031,7 +21029,7 @@ just_return_nolock: } else log = 1; } - /* Mark the last packet has app limited */ + /* Mark the last packet as app limited */ rsm = tqhash_max(rack->r_ctl.tqh); if (rsm && ((rsm->r_flags & RACK_APP_LIMITED) == 0)) { if (rack->r_ctl.rc_app_limited_cnt == 0) @@ -21555,11 +21553,7 @@ send: m->m_next = tcp_m_copym( mb, moff, &len, if_hw_tsomaxsegcount, if_hw_tsomaxsegsize, msb, - ((rsm == NULL) ? hw_tls : 0) -#ifdef NETFLIX_COPY_ARGS - , &s_mb, &s_moff -#endif - ); + ((rsm == NULL) ? hw_tls : 0)); if (len <= (tp->t_maxseg - optlen)) { /* * Must have ran out of mbufs for the copy diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.c b/sys/netinet/tcp_stacks/rack_bbr_common.c index da26b8cb1f9b..d1c4ba58bf55 100644 --- a/sys/netinet/tcp_stacks/rack_bbr_common.c +++ b/sys/netinet/tcp_stacks/rack_bbr_common.c @@ -672,7 +672,7 @@ ctf_do_dropafterack(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t (SEQ_GT(tp->snd_una, th->th_ack) || SEQ_GT(th->th_ack, tp->snd_max))) { *ret_val = 1; - ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); return; } else *ret_val = 0; diff --git a/sys/netinet/tcp_stacks/rack_pcm.c b/sys/netinet/tcp_stacks/rack_pcm.c index b0e300847c4a..101e6826536c 100644 --- a/sys/netinet/tcp_stacks/rack_pcm.c +++ b/sys/netinet/tcp_stacks/rack_pcm.c @@ -172,7 +172,7 @@ rack_update_pcm_ack(struct tcp_rack *rack, int was_cumack, uint32_t start, uint3 goto skip_ack_accounting; } /* - * Record ACK data. + * Record ACK data. */ ack_arrival = tcp_tv_to_lusectick(&rack->r_ctl.act_rcv_time); if (SEQ_GT(end, rack->r_ctl.pcm_i.eseq)) { @@ -305,7 +305,7 @@ skip_ack_accounting: 0, &log, false, NULL, NULL, 0, &tv); } } - /* + /* * Here we need a lot to be added including: * 1) Some form of measurement, where if we think the measurement * is valid we iterate over the PCM data and come up with a path diff --git a/sys/netinet/tcp_stacks/sack_filter.c b/sys/netinet/tcp_stacks/sack_filter.c index fc9ee8454a1e..2b70548f3cc6 100644 --- a/sys/netinet/tcp_stacks/sack_filter.c +++ b/sys/netinet/tcp_stacks/sack_filter.c @@ -400,7 +400,7 @@ sack_filter_run(struct sack_filter *sf, struct sackblk *in, int numblks, tcp_seq break; } /* Copy it out to the outbound */ - memcpy(&in[at], &blkboard[i], sizeof(struct sackblk)); + memcpy(&in[at], &blkboard[i], sizeof(struct sackblk)); at++; room--; /* now lets add it to our sack-board */ @@ -588,7 +588,7 @@ sack_filter_blks(struct tcpcb *tp, struct sack_filter *sf, struct sackblk *in, i sf->sf_ack = th_ack; for(i=0, sf->sf_cur=0; i<numblks; i++) { - if ((in[i].end != tp->snd_max) && + if ((in[i].end != tp->snd_max) && ((in[i].end - in[i].start) < segmax)) { /* * We do not accept blocks less than a MSS minus all @@ -707,7 +707,7 @@ main(int argc, char **argv) out = stdout; memset(&tp, 0, sizeof(tp)); tp.t_maxseg = 1460; - + while ((i = getopt(argc, argv, "dIi:o:?hS:")) != -1) { switch (i) { case 'S': @@ -883,7 +883,7 @@ main(int argc, char **argv) } else { printf("can't open sack_setup.bin -- sorry no load\n"); } - + } else if (strncmp(buffer, "help", 4) == 0) { help: fprintf(out, "You can input:\n"); diff --git a/sys/netinet/tcp_stacks/sack_filter.h b/sys/netinet/tcp_stacks/sack_filter.h index b12fcf84567c..a1c0684a4359 100644 --- a/sys/netinet/tcp_stacks/sack_filter.h +++ b/sys/netinet/tcp_stacks/sack_filter.h @@ -42,7 +42,7 @@ * previously processed sack information. * * The second thing that the sack filter does is help protect against malicious - * attackers that are trying to attack any linked lists (or other data structures) + * attackers that are trying to attack any linked lists (or other data structures) * that are used in sack processing. Consider an attacker sending in sacks for * every other byte of data outstanding. This could in theory drastically split * up any scoreboard you are maintaining and make you search through a very large diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index cd42a67294a6..26e7e53d540c 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -2720,9 +2720,15 @@ tcp_ktlslist_locked(SYSCTL_HANDLER_ARGS, bool export_keys) ksr->snd_tag->sw->snd_tag_status_str != NULL) { sz = SND_TAG_STATUS_MAXLEN; - ksr->snd_tag->sw->snd_tag_status_str( + in_pcbref(inp); + INP_RUNLOCK(inp); + error = ksr->snd_tag->sw-> + snd_tag_status_str( ksr->snd_tag, NULL, &sz); - len += sz; + if (in_pcbrele_rlock(inp)) + return (EDEADLK); + if (error == 0) + len += sz; } } kss = so->so_snd.sb_tls_info; @@ -2739,9 +2745,15 @@ tcp_ktlslist_locked(SYSCTL_HANDLER_ARGS, bool export_keys) kss->snd_tag->sw->snd_tag_status_str != NULL) { sz = SND_TAG_STATUS_MAXLEN; - kss->snd_tag->sw->snd_tag_status_str( + in_pcbref(inp); + INP_RUNLOCK(inp); + error = kss->snd_tag->sw-> + snd_tag_status_str( kss->snd_tag, NULL, &sz); - len += sz; + if (in_pcbrele_rlock(inp)) + return (EDEADLK); + if (error == 0) + len += sz; } } if (p) { @@ -2811,9 +2823,16 @@ tcp_ktlslist_locked(SYSCTL_HANDLER_ARGS, bool export_keys) if (ksr->snd_tag != NULL && ksr->snd_tag->sw->snd_tag_status_str != NULL) { sz = SND_TAG_STATUS_MAXLEN; - ksr->snd_tag->sw->snd_tag_status_str( + in_pcbref(inp); + INP_RUNLOCK(inp); + error = ksr->snd_tag->sw->snd_tag_status_str( ksr->snd_tag, buf + len, &sz); - len += sz; + if (in_pcbrele_rlock(inp)) + return (EDEADLK); + if (error == 0) { + xktls->rcv.drv_st_len = sz; + len += sz; + } } } if (kss != NULL && kss->gen == xig.xig_gen) { @@ -2828,9 +2847,16 @@ tcp_ktlslist_locked(SYSCTL_HANDLER_ARGS, bool export_keys) if (kss->snd_tag != NULL && kss->snd_tag->sw->snd_tag_status_str != NULL) { sz = SND_TAG_STATUS_MAXLEN; - kss->snd_tag->sw->snd_tag_status_str( + in_pcbref(inp); + INP_RUNLOCK(inp); + error = kss->snd_tag->sw->snd_tag_status_str( kss->snd_tag, buf + len, &sz); - len += sz; + if (in_pcbrele_rlock(inp)) + return (EDEADLK); + if (error == 0) { + xktls->snd.drv_st_len = sz; + len += sz; + } } } len = roundup2(len, __alignof(*xktls)); @@ -2858,12 +2884,23 @@ tcp_ktlslist_locked(SYSCTL_HANDLER_ARGS, bool export_keys) static int tcp_ktlslist1(SYSCTL_HANDLER_ARGS, bool export_keys) { - int res; - - sx_xlock(&ktlslist_lock); - res = tcp_ktlslist_locked(oidp, arg1, arg2, req, export_keys); - sx_xunlock(&ktlslist_lock); - return (res); + int repeats, error; + + for (repeats = 0; repeats < 100; repeats++) { + if (sx_xlock_sig(&ktlslist_lock)) + return (EINTR); + error = tcp_ktlslist_locked(oidp, arg1, arg2, req, + export_keys); + sx_xunlock(&ktlslist_lock); + if (error != EDEADLK) + break; + if (sig_intr() != 0) { + error = EINTR; + break; + } + req->oldidx = 0; + } + return (error); } static int @@ -4500,7 +4537,7 @@ tcp_change_time_units(struct tcpcb *tp, int granularity) panic("Unknown granularity:%d tp:%p", granularity, tp); } -#endif +#endif } void diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index 32ce3001929c..3b9fe7a317b0 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -757,8 +757,8 @@ tcp_timer_rexmt(struct tcpcb *tp) tp->t_maxseg = tp->t_pmtud_saved_maxseg; if (tp->t_maxseg < V_tcp_mssdflt) { /* - * The MSS is so small we should not - * process incoming SACK's since we are + * The MSS is so small we should not + * process incoming SACK's since we are * subject to attack in such a case. */ tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT; diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 687b0d538666..70e4c04b79e5 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -1768,9 +1768,9 @@ tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt) /* * Release the ref count the lookup * acquired. - */ + */ refcount_release(&blk->tfb_refcnt); - /* + /* * Now there is a chance that the * init() function mucked with some * things before it failed, such as @@ -1800,7 +1800,7 @@ tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt) * new one already. */ refcount_release(&tp->t_fb->tfb_refcnt); - /* + /* * Set in the new stack. */ tp->t_fb = blk; @@ -1934,7 +1934,7 @@ tcp_set_cc_mod(struct inpcb *inp, struct sockopt *sopt) CC_LIST_RUNLOCK(); return(ESRCH); } - /* + /* * With a reference the algorithm cannot be removed * so we hold a reference through the change process. */ diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 059b2aff689d..b90f65e83cb1 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -182,7 +182,7 @@ struct tcp_sendfile_track { * snd_una). When the response comes back indicating * that there was data (return value 1), then the caller * can build a sendmap entry based on the range and the - * times. The next query would then be done at the + * times. The next query would then be done at the * newly created sendmap_end. Repeated until sendmap_end == snd_max. * * Flags in sendmap_flags are defined below as well. @@ -197,7 +197,7 @@ struct tcp_sendfile_track { * The rack_times are a misc collection of information that * the old stack might possibly fill in. Of course its possible * that an old stack may not have a piece of information. If so - * then setting that value to zero is advised. Setting any + * then setting that value to zero is advised. Setting any * timestamp passed should only place a zero in it when it * is unfilled. This may mean that a time is off by a micro-second * but this is ok in the grand scheme of things. @@ -205,13 +205,13 @@ struct tcp_sendfile_track { * When switching stacks it is desireable to get as much information * from the old stack to the new stack as possible. Though not always * will the stack be compatible in the types of information. The - * init() function needs to take care when it begins changing + * init() function needs to take care when it begins changing * things such as inp_flags2 and the timer units to position these * changes at a point where it is unlikely they will fail after * making such changes. A stack optionally can have an "undo" - * function + * function * - * To transfer information to the old stack from the new in + * To transfer information to the old stack from the new in * respect to LRO and the inp_flags2, the new stack should set * the inp_flags2 to what it supports. The old stack in its * fini() function should call the tcp_handle_orphaned_packets() @@ -544,13 +544,13 @@ typedef enum { * do is: * a) Make sure that the inp_flags2 is setup correctly * for LRO. There are two flags that the previous - * stack may have set INP_MBUF_ACKCMP and + * stack may have set INP_MBUF_ACKCMP and * INP_SUPPORTS_MBUFQ. If the new stack does not * support these it *should* clear the flags. * b) Make sure that the timers are in the proper * granularity that the stack wants. The stack * should check the t_tmr_granularity field. Currently - * there are two values that it may hold + * there are two values that it may hold * TCP_TMR_GRANULARITY_TICKS and TCP_TMR_GRANULARITY_USEC. * Use the functions tcp_timer_convert(tp, granularity); * to move the timers to the correct format for your stack. @@ -558,14 +558,14 @@ typedef enum { * The new stack may also optionally query the tfb_chg_query * function if the old stack has one. The new stack may ask * for one of three entries and can also state to the old - * stack its support for the INP_MBUF_ACKCMP and + * stack its support for the INP_MBUF_ACKCMP and * INP_SUPPORTS_MBUFQ. This is important since if there are * queued ack's without that statement the old stack will * be forced to discard the queued acks. The requests that * can be made for information by the new stacks are: * * Note also that the tfb_tcp_fb_init() when called can - * determine if a query is needed by looking at the + * determine if a query is needed by looking at the * value passed in the ptr. The ptr is designed to be * set in with any allocated memory, but the address * of the condtion (ptr == &tp->t_fb_ptr) will be @@ -573,17 +573,17 @@ typedef enum { * setup of a tcb (which means no query would be needed). * If, however, the value is not t_fb_ptr, then the caller * is in the middle of a stack switch and is the new stack. - * A query would be appropriate (if the new stack support + * A query would be appropriate (if the new stack support * the query mechanism). * * TCP_QUERY_SENDMAP - Query of outstanding data. * TCP_QUERY_TIMERS_UP - Query about running timers. - * TCP_SUPPORTED_LRO - Declaration in req_param of - * the inp_flags2 supported by + * TCP_SUPPORTED_LRO - Declaration in req_param of + * the inp_flags2 supported by * the new stack. * TCP_QUERY_RACK_TIMES - Enquire about various timestamps * and states the old stack may be in. - * + * * tfb_tcp_fb_fini is changed to add a flag to tell * the old stack if the tcb is being destroyed or * not. A one in the flag means the TCB is being |