diff options
Diffstat (limited to 'sys/netinet')
-rw-r--r-- | sys/netinet/icmp_var.h | 9 | ||||
-rw-r--r-- | sys/netinet/in_pcb.c | 17 | ||||
-rw-r--r-- | sys/netinet/in_pcb.h | 1 | ||||
-rw-r--r-- | sys/netinet/ip_icmp.c | 3 | ||||
-rw-r--r-- | sys/netinet/tcp_hpts.c | 73 | ||||
-rw-r--r-- | sys/netinet/tcp_hpts.h | 17 | ||||
-rw-r--r-- | sys/netinet/tcp_input.c | 48 | ||||
-rw-r--r-- | sys/netinet/tcp_log_buf.c | 2 | ||||
-rw-r--r-- | sys/netinet/tcp_log_buf.h | 8 | ||||
-rw-r--r-- | sys/netinet/tcp_stacks/bbr.c | 22 | ||||
-rw-r--r-- | sys/netinet/tcp_stacks/rack.c | 63 | ||||
-rw-r--r-- | sys/netinet/tcp_stacks/rack_bbr_common.c | 2 | ||||
-rw-r--r-- | sys/netinet/tcp_subr.c | 65 |
13 files changed, 195 insertions, 135 deletions
diff --git a/sys/netinet/icmp_var.h b/sys/netinet/icmp_var.h index b1f2b0ebf911..d6b75e482e35 100644 --- a/sys/netinet/icmp_var.h +++ b/sys/netinet/icmp_var.h @@ -104,11 +104,10 @@ extern int badport_bandlim(int); #define BANDLIM_ICMP_UNREACH 0 #define BANDLIM_ICMP_ECHO 1 #define BANDLIM_ICMP_TSTAMP 2 -#define BANDLIM_RST_CLOSEDPORT 3 /* No connection, and no listeners */ -#define BANDLIM_RST_OPENPORT 4 /* No connection, listener */ -#define BANDLIM_ICMP6_UNREACH 5 -#define BANDLIM_SCTP_OOTB 6 -#define BANDLIM_MAX 7 +#define BANDLIM_TCP_RST 3 +#define BANDLIM_ICMP6_UNREACH 4 +#define BANDLIM_SCTP_OOTB 5 +#define BANDLIM_MAX 6 #endif #endif diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index bccd4b84561a..dbe48242381d 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -1745,6 +1745,23 @@ in_pcbrele(struct inpcb *inp, const inp_lookup_t lock) } /* + * Dereference and rlock inp, for which the caller must own the + * reference. Returns true if inp no longer usable, false otherwise. + */ +bool +in_pcbrele_rlock(struct inpcb *inp) +{ + INP_RLOCK(inp); + if (in_pcbrele_rlocked(inp)) + return (true); + if ((inp->inp_flags & INP_FREED) != 0) { + INP_RUNLOCK(inp); + return (true); + } + return (false); +} + +/* * Unconditionally schedule an inpcb to be freed by decrementing its * reference count, which should occur only after the inpcb has been detached * from its socket. If another thread holds a temporary reference (acquired diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 57cf15ca37fc..9e0618e87601 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -681,6 +681,7 @@ void in_pcbref(struct inpcb *); bool in_pcbrele(struct inpcb *, inp_lookup_t); bool in_pcbrele_rlocked(struct inpcb *); bool in_pcbrele_wlocked(struct inpcb *); +bool in_pcbrele_rlock(struct inpcb *inp); typedef bool inp_match_t(const struct inpcb *, void *); struct inpcb_iterator { diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c index cb4b6df57c57..71b75d18efd0 100644 --- a/sys/netinet/ip_icmp.c +++ b/sys/netinet/ip_icmp.c @@ -1097,8 +1097,7 @@ static const char *icmp_rate_descrs[BANDLIM_MAX] = { [BANDLIM_ICMP_UNREACH] = "icmp unreach", [BANDLIM_ICMP_ECHO] = "icmp ping", [BANDLIM_ICMP_TSTAMP] = "icmp tstamp", - [BANDLIM_RST_CLOSEDPORT] = "closed port RST", - [BANDLIM_RST_OPENPORT] = "open port RST", + [BANDLIM_TCP_RST] = "tcp reset", [BANDLIM_ICMP6_UNREACH] = "icmp6 unreach", [BANDLIM_SCTP_OOTB] = "sctp ootb", }; diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c index 91f8251589e4..b60cdf45af52 100644 --- a/sys/netinet/tcp_hpts.c +++ b/sys/netinet/tcp_hpts.c @@ -433,38 +433,40 @@ static void tcp_hpts_log(struct tcp_hpts_entry *hpts, struct tcpcb *tp, struct timeval *tv, int slots_to_run, int idx, bool from_callout) { - union tcp_log_stackspecific log; - /* - * Unused logs are - * 64 bit - delRate, rttProp, bw_inuse - * 16 bit - cwnd_gain - * 8 bit - bbr_state, bbr_substate, inhpts; - */ - memset(&log, 0, sizeof(log)); - log.u_bbr.flex1 = hpts->p_nxt_slot; - log.u_bbr.flex2 = hpts->p_cur_slot; - log.u_bbr.flex3 = hpts->p_prev_slot; - log.u_bbr.flex4 = idx; - log.u_bbr.flex5 = hpts->p_curtick; - log.u_bbr.flex6 = hpts->p_on_queue_cnt; - log.u_bbr.flex7 = hpts->p_cpu; - log.u_bbr.flex8 = (uint8_t)from_callout; - log.u_bbr.inflight = slots_to_run; - log.u_bbr.applimited = hpts->overidden_sleep; - log.u_bbr.delivered = hpts->saved_curtick; - log.u_bbr.timeStamp = tcp_tv_to_usectick(tv); - log.u_bbr.epoch = hpts->saved_curslot; - log.u_bbr.lt_epoch = hpts->saved_prev_slot; - log.u_bbr.pkts_out = hpts->p_delayed_by; - log.u_bbr.lost = hpts->p_hpts_sleep_time; - log.u_bbr.pacing_gain = hpts->p_cpu; - log.u_bbr.pkt_epoch = hpts->p_runningslot; - log.u_bbr.use_lt_bw = 1; - TCP_LOG_EVENTP(tp, NULL, - &tptosocket(tp)->so_rcv, - &tptosocket(tp)->so_snd, - BBR_LOG_HPTSDIAG, 0, - 0, &log, false, tv); + if (hpts_does_tp_logging && tcp_bblogging_on(tp)) { + union tcp_log_stackspecific log; + /* + * Unused logs are + * 64 bit - delRate, rttProp, bw_inuse + * 16 bit - cwnd_gain + * 8 bit - bbr_state, bbr_substate, inhpts; + */ + memset(&log, 0, sizeof(log)); + log.u_bbr.flex1 = hpts->p_nxt_slot; + log.u_bbr.flex2 = hpts->p_cur_slot; + log.u_bbr.flex3 = hpts->p_prev_slot; + log.u_bbr.flex4 = idx; + log.u_bbr.flex5 = hpts->p_curtick; + log.u_bbr.flex6 = hpts->p_on_queue_cnt; + log.u_bbr.flex7 = hpts->p_cpu; + log.u_bbr.flex8 = (uint8_t)from_callout; + log.u_bbr.inflight = slots_to_run; + log.u_bbr.applimited = hpts->overidden_sleep; + log.u_bbr.delivered = hpts->saved_curtick; + log.u_bbr.timeStamp = tcp_tv_to_usectick(tv); + log.u_bbr.epoch = hpts->saved_curslot; + log.u_bbr.lt_epoch = hpts->saved_prev_slot; + log.u_bbr.pkts_out = hpts->p_delayed_by; + log.u_bbr.lost = hpts->p_hpts_sleep_time; + log.u_bbr.pacing_gain = hpts->p_cpu; + log.u_bbr.pkt_epoch = hpts->p_runningslot; + log.u_bbr.use_lt_bw = 1; + TCP_LOG_EVENTP(tp, NULL, + &tptosocket(tp)->so_rcv, + &tptosocket(tp)->so_snd, + BBR_LOG_HPTSDIAG, 0, + 0, &log, false, tv); + } } static void @@ -1353,10 +1355,7 @@ again: } CURVNET_SET(inp->inp_vnet); /* Lets do any logging that we might want to */ - if (hpts_does_tp_logging && tcp_bblogging_on(tp)) { - tcp_hpts_log(hpts, tp, &tv, slots_to_run, i, - from_callout); - } + tcp_hpts_log(hpts, tp, &tv, slots_to_run, i, from_callout); if (tp->t_fb_ptr != NULL) { kern_prefetch(tp->t_fb_ptr, &did_prefetch); @@ -1487,7 +1486,7 @@ no_run: } void -__tcp_set_hpts(struct tcpcb *tp, int32_t line) +tcp_set_hpts(struct tcpcb *tp) { struct tcp_hpts_entry *hpts; int failed; diff --git a/sys/netinet/tcp_hpts.h b/sys/netinet/tcp_hpts.h index b097a2b98db9..f5856ed8e688 100644 --- a/sys/netinet/tcp_hpts.h +++ b/sys/netinet/tcp_hpts.h @@ -149,8 +149,7 @@ uint32_t tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, #define tcp_hpts_insert(inp, slot) \ tcp_hpts_insert_diag((inp), (slot), __LINE__, NULL) -void __tcp_set_hpts(struct tcpcb *tp, int32_t line); -#define tcp_set_hpts(a) __tcp_set_hpts(a, __LINE__) +void tcp_set_hpts(struct tcpcb *tp); void tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason); @@ -165,25 +164,25 @@ extern int32_t tcp_min_hptsi_time; * The following functions should also be available * to userspace as well. */ -static __inline uint32_t +static inline uint32_t tcp_tv_to_hptstick(const struct timeval *sv) { return ((sv->tv_sec * 100000) + (sv->tv_usec / HPTS_TICKS_PER_SLOT)); } -static __inline uint32_t +static inline uint32_t tcp_tv_to_usectick(const struct timeval *sv) { return ((uint32_t) ((sv->tv_sec * HPTS_USEC_IN_SEC) + sv->tv_usec)); } -static __inline uint32_t +static inline uint32_t tcp_tv_to_mssectick(const struct timeval *sv) { return ((uint32_t) ((sv->tv_sec * HPTS_MSEC_IN_SEC) + (sv->tv_usec/HPTS_USEC_IN_MSEC))); } -static __inline uint64_t +static inline uint64_t tcp_tv_to_lusectick(const struct timeval *sv) { return ((uint64_t)((sv->tv_sec * HPTS_USEC_IN_SEC) + sv->tv_usec)); @@ -199,7 +198,7 @@ get_hpts_min_sleep_time(void) return (tcp_min_hptsi_time + HPTS_TICKS_PER_SLOT); } -static __inline uint32_t +static inline uint32_t tcp_gethptstick(struct timeval *sv) { struct timeval tv; @@ -210,7 +209,7 @@ tcp_gethptstick(struct timeval *sv) return (tcp_tv_to_hptstick(sv)); } -static __inline uint64_t +static inline uint64_t tcp_get_u64_usecs(struct timeval *tv) { struct timeval tvd; @@ -221,7 +220,7 @@ tcp_get_u64_usecs(struct timeval *tv) return (tcp_tv_to_lusectick(tv)); } -static __inline uint32_t +static inline uint32_t tcp_get_usecs(struct timeval *tv) { struct timeval tvd; diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 7c032e13f37a..de428ae1af6f 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -621,6 +621,7 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port) #endif /* INET6 */ struct tcpopt to; /* options in this segment */ char *s = NULL; /* address and port logging */ + bool closed_port = false; /* segment is hitting a closed port */ NET_EPOCH_ASSERT(); @@ -907,7 +908,8 @@ findpcb: log(LOG_INFO, "%s; %s: Connection attempt " "to closed port\n", s, __func__); } - rstreason = BANDLIM_RST_CLOSEDPORT; + rstreason = BANDLIM_TCP_RST; + closed_port = true; goto dropwithreset; } INP_LOCK_ASSERT(inp); @@ -998,12 +1000,14 @@ findpcb: * down or it is in the CLOSED state. Either way we drop the * segment and send an appropriate response. */ - rstreason = BANDLIM_RST_CLOSEDPORT; + rstreason = BANDLIM_TCP_RST; + closed_port = true; goto dropwithreset; } if ((tp->t_port != port) && (tp->t_state > TCPS_LISTEN)) { - rstreason = BANDLIM_RST_CLOSEDPORT; + rstreason = BANDLIM_TCP_RST; + closed_port = true; goto dropwithreset; } @@ -1055,6 +1059,8 @@ findpcb: * socket appended to the listen queue in SYN_RECEIVED state. */ if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) { + int result; + /* * Parse the TCP options here because * syncookies need access to the reflected @@ -1064,8 +1070,8 @@ findpcb: /* * NB: syncache_expand() doesn't unlock inp. */ - rstreason = syncache_expand(&inc, &to, th, &so, m, port); - if (rstreason < 0) { + result = syncache_expand(&inc, &to, th, &so, m, port); + if (result < 0) { /* * A failing TCP MD5 signature comparison * must result in the segment being dropped @@ -1073,7 +1079,7 @@ findpcb: * to the sender. */ goto dropunlock; - } else if (rstreason == 0) { + } else if (result == 0) { /* * No syncache entry, or ACK was not for our * SYN/ACK. Do our protection against double @@ -1092,7 +1098,7 @@ findpcb: * of the failure cause. */ INP_WUNLOCK(inp); - rstreason = BANDLIM_RST_OPENPORT; + rstreason = BANDLIM_TCP_RST; lookupflag &= ~INPLOOKUP_WILDCARD; goto findpcb; } @@ -1183,7 +1189,7 @@ tfo_socket_result: s, __func__); syncache_badack(&inc, port); /* XXX: Not needed! */ TCPSTAT_INC(tcps_badsyn); - rstreason = BANDLIM_RST_OPENPORT; + rstreason = BANDLIM_TCP_RST; goto dropwithreset; } /* @@ -1259,7 +1265,7 @@ tfo_socket_result: "Connection attempt to deprecated " "IPv6 address rejected\n", s, __func__); - rstreason = BANDLIM_RST_OPENPORT; + rstreason = BANDLIM_TCP_RST; goto dropwithreset; } } @@ -1380,9 +1386,10 @@ dropwithreset: * When blackholing do not respond with a RST but * completely ignore the segment and drop it. */ - if (((rstreason == BANDLIM_RST_OPENPORT && V_blackhole == 3) || - (rstreason == BANDLIM_RST_CLOSEDPORT && - ((V_blackhole == 1 && (thflags & TH_SYN)) || V_blackhole > 1))) && + if (rstreason == BANDLIM_TCP_RST && + ((!closed_port && V_blackhole == 3) || + (closed_port && + ((V_blackhole == 1 && (thflags & TH_SYN)) || V_blackhole > 1))) && (V_blackhole_local || ( #ifdef INET6 isipv6 ? !in6_localip(&ip6->ip6_src) : @@ -1515,7 +1522,9 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, struct tcpopt to; int tfo_syn; u_int maxseg = 0; + bool no_data; + no_data = (tlen == 0); thflags = tcp_get_flags(th); tp->sackhint.last_sack_ack = 0; sack_changed = SACK_NOCHANGE; @@ -1754,7 +1763,7 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, tp->ts_recent = to.to_tsval; } - if (tlen == 0) { + if (no_data) { if (SEQ_GT(th->th_ack, tp->snd_una) && SEQ_LEQ(th->th_ack, tp->snd_max) && !IN_RECOVERY(tp->t_flags) && @@ -1963,7 +1972,7 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, if ((thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))) { - rstreason = BANDLIM_RST_OPENPORT; + rstreason = BANDLIM_TCP_RST; tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); goto dropwithreset; } @@ -1976,7 +1985,7 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, * FIN, or a RST. */ if ((thflags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)) { - rstreason = BANDLIM_RST_OPENPORT; + rstreason = BANDLIM_TCP_RST; tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); goto dropwithreset; } else if (thflags & TH_SYN) { @@ -2244,7 +2253,7 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, * for the "LAND" DoS attack. */ if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) { - rstreason = BANDLIM_RST_OPENPORT; + rstreason = BANDLIM_TCP_RST; tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); goto dropwithreset; } @@ -2557,7 +2566,7 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, if (SEQ_LEQ(th->th_ack, tp->snd_una)) { maxseg = tcp_maxseg(tp); - if (tlen == 0 && + if (no_data && (tiwin == tp->snd_wnd || (tp->t_flags & TF_SACK_PERMIT))) { /* @@ -3113,8 +3122,7 @@ step6: (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) || (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) { /* keep track of pure window updates */ - if (tlen == 0 && - tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) + if (no_data && tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) TCPSTAT_INC(tcps_rcvwinupd); tp->snd_wnd = tiwin; tp->snd_wl1 = th->th_seq; @@ -3424,7 +3432,7 @@ dropafterack: if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) && (SEQ_GT(tp->snd_una, th->th_ack) || SEQ_GT(th->th_ack, tp->snd_max)) ) { - rstreason = BANDLIM_RST_OPENPORT; + rstreason = BANDLIM_TCP_RST; tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); goto dropwithreset; } diff --git a/sys/netinet/tcp_log_buf.c b/sys/netinet/tcp_log_buf.c index 75d693bc019b..e24790ece43d 100644 --- a/sys/netinet/tcp_log_buf.c +++ b/sys/netinet/tcp_log_buf.c @@ -2878,7 +2878,7 @@ tcp_log_sendfile(struct socket *so, off_t offset, size_t nbytes, int flags) /* double check log state now that we have the lock */ if (inp->inp_flags & INP_DROPPED) goto done; - if (tp->_t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(tp)) { struct timeval tv; tcp_log_eventspecific_t log; diff --git a/sys/netinet/tcp_log_buf.h b/sys/netinet/tcp_log_buf.h index fef32e16b2e4..3e7eef8a1cda 100644 --- a/sys/netinet/tcp_log_buf.h +++ b/sys/netinet/tcp_log_buf.h @@ -539,12 +539,12 @@ struct tcpcb; NULL, NULL, 0, NULL); \ } while (0) #endif /* TCP_LOG_FORCEVERBOSE */ +/* Assumes/requires the caller has already checked tcp_bblogging_on(tp). */ #define TCP_LOG_EVENTP(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder, tv) \ do { \ - if (tcp_bblogging_on(tp)) \ - tcp_log_event(tp, th, rxbuf, txbuf, eventid, \ - errornum, len, stackinfo, th_hostorder, \ - NULL, NULL, 0, tv); \ + KASSERT(tcp_bblogging_on(tp), ("bblogging is off")); \ + tcp_log_event(tp, th, rxbuf, txbuf, eventid, errornum, len, \ + stackinfo, th_hostorder, NULL, NULL, 0, tv); \ } while (0) #ifdef TCP_BLACKBOX diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c index e2cfec5c9275..d2636f01714e 100644 --- a/sys/netinet/tcp_stacks/bbr.c +++ b/sys/netinet/tcp_stacks/bbr.c @@ -8763,7 +8763,7 @@ bbr_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so, (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } if ((thflags & (TH_ACK | TH_RST)) == (TH_ACK | TH_RST)) { @@ -8965,7 +8965,7 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, (SEQ_LEQ(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } if (tp->t_flags & TF_FASTOPEN) { @@ -8977,7 +8977,7 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if ((thflags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } else if (thflags & TH_SYN) { /* non-initial SYN is ignored */ @@ -9010,7 +9010,7 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if (SEQ_LT(th->th_seq, tp->irs)) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } if (ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) { @@ -9288,7 +9288,7 @@ bbr_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so, if (sbavail(&so->so_snd)) { if (ctf_progress_timeout_check(tp, true)) { bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -9385,7 +9385,7 @@ bbr_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so, if (sbavail(&so->so_snd)) { if (ctf_progress_timeout_check(tp, true)) { bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -9535,7 +9535,7 @@ bbr_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so, if (sbavail(&so->so_snd)) { if (ctf_progress_timeout_check(tp, true)) { bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -9637,7 +9637,7 @@ bbr_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so, if (sbavail(&so->so_snd)) { if (ctf_progress_timeout_check(tp, true)) { bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -9739,7 +9739,7 @@ bbr_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so, if (sbavail(&so->so_snd)) { if (ctf_progress_timeout_check(tp, true)) { bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -9848,7 +9848,7 @@ bbr_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so, if (sbavail(&so->so_snd)) { if (ctf_progress_timeout_check(tp, true)) { bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -11510,7 +11510,7 @@ bbr_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } if (tiwin > bbr->r_ctl.rc_high_rwnd) diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index 8e05498863b9..834e1347a152 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -40,7 +40,6 @@ #endif #include <sys/lock.h> #include <sys/malloc.h> -#include <sys/lock.h> #include <sys/mutex.h> #include <sys/mbuf.h> #include <sys/proc.h> /* for proc0 declaration */ @@ -198,7 +197,7 @@ static uint32_t rack_pcm_blast = 0; static uint32_t rack_pcm_is_enabled = 1; static uint8_t rack_ssthresh_rest_rto_rec = 0; /* Do we restore ssthresh when we have rec -> rto -> rec */ -static uint32_t rack_gp_gain_req = 1200; /* Amount percent wise required to gain to record a round has "gaining" */ +static uint32_t rack_gp_gain_req = 1200; /* Amount percent wise required to gain to record a round as "gaining" */ static uint32_t rack_rnd_cnt_req = 0x10005; /* Default number of rounds if we are below rack_gp_gain_req where we exit ss */ @@ -938,7 +937,7 @@ rack_init_sysctls(void) SYSCTL_ADD_U32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_probertt), OID_AUTO, "time_between", CTLFLAG_RW, - & rack_time_between_probertt, 96000000, + &rack_time_between_probertt, 96000000, "How many useconds between the lowest rtt falling must past before we enter probertt"); SYSCTL_ADD_U32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_probertt), @@ -3480,9 +3479,9 @@ static void rack_free(struct tcp_rack *rack, struct rack_sendmap *rsm) { if (rsm->r_flags & RACK_APP_LIMITED) { - if (rack->r_ctl.rc_app_limited_cnt > 0) { - rack->r_ctl.rc_app_limited_cnt--; - } + KASSERT((rack->r_ctl.rc_app_limited_cnt > 0), + ("app_cnt %u, rsm %p", rack->r_ctl.rc_app_limited_cnt, rsm)); + rack->r_ctl.rc_app_limited_cnt--; } if (rsm->r_limit_type) { /* currently there is only one limit type */ @@ -3554,8 +3553,7 @@ rack_get_measure_window(struct tcpcb *tp, struct tcp_rack *rack) * earlier. * * So lets calculate the BDP with the "known" b/w using - * the SRTT has our rtt and then multiply it by the - * goal. + * the SRTT as our rtt and then multiply it by the goal. */ bw = rack_get_bw(rack); srtt = (uint64_t)tp->t_srtt; @@ -5793,7 +5791,7 @@ rack_cong_signal(struct tcpcb *tp, uint32_t type, uint32_t ack, int line) tp->t_badrxtwin = 0; break; } - if ((CC_ALGO(tp)->cong_signal != NULL) && + if ((CC_ALGO(tp)->cong_signal != NULL) && (type != CC_RTO)){ tp->t_ccv.curack = ack; CC_ALGO(tp)->cong_signal(&tp->t_ccv, type); @@ -5904,7 +5902,7 @@ rack_calc_thresh_rack(struct tcp_rack *rack, uint32_t srtt, uint32_t cts, int li * * If reorder-fade is configured, then we track the last time we saw * re-ordering occur. If we reach the point where enough time as - * passed we no longer consider reordering has occuring. + * passed we no longer consider reordering as occurring. * * Or if reorder-face is 0, then once we see reordering we consider * the connection to alway be subject to reordering and just set lro @@ -7045,6 +7043,9 @@ rack_clone_rsm(struct tcp_rack *rack, struct rack_sendmap *nrsm, /* Push bit must go to the right edge as well */ if (rsm->r_flags & RACK_HAD_PUSH) rsm->r_flags &= ~RACK_HAD_PUSH; + /* Update the count if app limited */ + if (nrsm->r_flags & RACK_APP_LIMITED) + rack->r_ctl.rc_app_limited_cnt++; /* Clone over the state of the hw_tls flag */ nrsm->r_hw_tls = rsm->r_hw_tls; /* @@ -7096,7 +7097,7 @@ rack_merge_rsm(struct tcp_rack *rack, l_rsm->r_flags |= RACK_TLP; if (r_rsm->r_flags & RACK_RWND_COLLAPSED) l_rsm->r_flags |= RACK_RWND_COLLAPSED; - if ((r_rsm->r_flags & RACK_APP_LIMITED) && + if ((r_rsm->r_flags & RACK_APP_LIMITED) && ((l_rsm->r_flags & RACK_APP_LIMITED) == 0)) { /* * If both are app-limited then let the @@ -8137,7 +8138,7 @@ rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack, * remove the lost desgination and reduce the * bytes considered lost. */ - rsm->r_flags &= ~RACK_WAS_LOST; + rsm->r_flags &= ~RACK_WAS_LOST; KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) @@ -8832,7 +8833,7 @@ rack_apply_updated_usrtt(struct tcp_rack *rack, uint32_t us_rtt, uint32_t us_cts val = rack_probertt_lower_within * rack_time_between_probertt; val /= 100; - if ((rack->in_probe_rtt == 0) && + if ((rack->in_probe_rtt == 0) && (rack->rc_skip_timely == 0) && ((us_cts - rack->r_ctl.rc_lower_rtt_us_cts) >= (rack_time_between_probertt - val))) { rack_enter_probertt(rack, us_cts); @@ -10369,7 +10370,7 @@ more: * and yet before retransmitting we get an ack * which can happen due to reordering. */ - rsm->r_flags &= ~RACK_WAS_LOST; + rsm->r_flags &= ~RACK_WAS_LOST; KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) @@ -11065,7 +11066,7 @@ rack_strike_dupack(struct tcp_rack *rack, tcp_seq th_ack) * We need to skip anything already set * to be retransmitted. */ - if ((rsm->r_dupack >= DUP_ACK_THRESHOLD) || + if ((rsm->r_dupack >= DUP_ACK_THRESHOLD) || (rsm->r_flags & RACK_MUST_RXT)) { rsm = TAILQ_NEXT(rsm, r_tnext); continue; @@ -12875,7 +12876,7 @@ rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so, (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } if ((thflags & (TH_ACK | TH_RST)) == (TH_ACK | TH_RST)) { @@ -13089,7 +13090,7 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, (SEQ_LEQ(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } if (tp->t_flags & TF_FASTOPEN) { @@ -13102,7 +13103,7 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if ((thflags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } else if (thflags & TH_SYN) { /* non-initial SYN is ignored */ @@ -13136,7 +13137,7 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if (SEQ_LT(th->th_seq, tp->irs)) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } if (ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val)) { @@ -13399,7 +13400,7 @@ rack_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so, if (sbavail(&so->so_snd)) { if (ctf_progress_timeout_check(tp, true)) { rack_log_progress_event(rack, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -13495,7 +13496,7 @@ rack_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so, if (ctf_progress_timeout_check(tp, true)) { rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -13645,7 +13646,7 @@ rack_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so, if (ctf_progress_timeout_check(tp, true)) { rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -13746,7 +13747,7 @@ rack_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so, if (ctf_progress_timeout_check(tp, true)) { rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -13848,7 +13849,7 @@ rack_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so, if (ctf_progress_timeout_check(tp, true)) { rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -13952,7 +13953,7 @@ rack_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so, if (ctf_progress_timeout_check(tp, true)) { rack_log_progress_event((struct tcp_rack *)tp->t_fb_ptr, tp, tick, PROGRESS_DROP, __LINE__); - ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_TCP_RST, tlen); return (1); } } @@ -16655,7 +16656,7 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) && (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); - ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); #ifdef TCP_ACCOUNTING sched_unpin(); #endif @@ -16919,7 +16920,7 @@ do_output_now: } else if ((nxt_pkt == 0) && (tp->t_flags & TF_ACKNOW)) { goto do_output_now; } else if ((no_output == 1) && - (nxt_pkt == 0) && + (nxt_pkt == 0) && (tcp_in_hpts(rack->rc_tp) == 0)) { /* * We are not in hpts and we had a pacing timer up. Use @@ -17546,7 +17547,7 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str rack->r_ctl.rc_last_us_rtt, 88, __LINE__, NULL, gain); } - if (((bw_est == 0) || (rate_wanted == 0) || (rack->gp_ready == 0)) && + if (((bw_est == 0) || (rate_wanted == 0) || (rack->gp_ready == 0)) && (rack->use_fixed_rate == 0)) { /* * No way yet to make a b/w estimate or @@ -17986,7 +17987,7 @@ start_set: tp->gput_ack = tp->gput_seq + rack_get_measure_window(tp, rack); rack->r_ctl.rc_gp_cumack_ts = 0; if ((rack->r_ctl.cleared_app_ack == 1) && - (SEQ_GEQ(rack->r_ctl.cleared_app_ack, tp->gput_seq))) { + (SEQ_GEQ(tp->gput_seq, rack->r_ctl.cleared_app_ack_seq))) { /* * We just cleared an application limited period * so the next seq out needs to skip the first @@ -20043,7 +20044,7 @@ again: rack->r_ctl.pcm_max_seg = ctf_fixed_maxseg(tp) * 10; } } - if ((rack->r_ctl.pcm_max_seg != 0) && (rack->pcm_needed == 1)) { + if ((rack->r_ctl.pcm_max_seg != 0) && (rack->pcm_needed == 1)) { uint32_t rw_avail, cwa; if (tp->snd_wnd > ctf_outstanding(tp)) @@ -21031,7 +21032,7 @@ just_return_nolock: } else log = 1; } - /* Mark the last packet has app limited */ + /* Mark the last packet as app limited */ rsm = tqhash_max(rack->r_ctl.tqh); if (rsm && ((rsm->r_flags & RACK_APP_LIMITED) == 0)) { if (rack->r_ctl.rc_app_limited_cnt == 0) diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.c b/sys/netinet/tcp_stacks/rack_bbr_common.c index da26b8cb1f9b..d1c4ba58bf55 100644 --- a/sys/netinet/tcp_stacks/rack_bbr_common.c +++ b/sys/netinet/tcp_stacks/rack_bbr_common.c @@ -672,7 +672,7 @@ ctf_do_dropafterack(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t (SEQ_GT(tp->snd_una, th->th_ack) || SEQ_GT(th->th_ack, tp->snd_max))) { *ret_val = 1; - ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); + ctf_do_dropwithreset(m, tp, th, BANDLIM_TCP_RST, tlen); return; } else *ret_val = 0; diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index cd42a67294a6..db415f6bdf03 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -2720,9 +2720,15 @@ tcp_ktlslist_locked(SYSCTL_HANDLER_ARGS, bool export_keys) ksr->snd_tag->sw->snd_tag_status_str != NULL) { sz = SND_TAG_STATUS_MAXLEN; - ksr->snd_tag->sw->snd_tag_status_str( + in_pcbref(inp); + INP_RUNLOCK(inp); + error = ksr->snd_tag->sw-> + snd_tag_status_str( ksr->snd_tag, NULL, &sz); - len += sz; + if (in_pcbrele_rlock(inp)) + return (EDEADLK); + if (error == 0) + len += sz; } } kss = so->so_snd.sb_tls_info; @@ -2739,9 +2745,15 @@ tcp_ktlslist_locked(SYSCTL_HANDLER_ARGS, bool export_keys) kss->snd_tag->sw->snd_tag_status_str != NULL) { sz = SND_TAG_STATUS_MAXLEN; - kss->snd_tag->sw->snd_tag_status_str( + in_pcbref(inp); + INP_RUNLOCK(inp); + error = kss->snd_tag->sw-> + snd_tag_status_str( kss->snd_tag, NULL, &sz); - len += sz; + if (in_pcbrele_rlock(inp)) + return (EDEADLK); + if (error == 0) + len += sz; } } if (p) { @@ -2811,9 +2823,16 @@ tcp_ktlslist_locked(SYSCTL_HANDLER_ARGS, bool export_keys) if (ksr->snd_tag != NULL && ksr->snd_tag->sw->snd_tag_status_str != NULL) { sz = SND_TAG_STATUS_MAXLEN; - ksr->snd_tag->sw->snd_tag_status_str( + in_pcbref(inp); + INP_RUNLOCK(inp); + error = ksr->snd_tag->sw->snd_tag_status_str( ksr->snd_tag, buf + len, &sz); - len += sz; + if (in_pcbrele_rlock(inp)) + return (EDEADLK); + if (error == 0) { + xktls->rcv.drv_st_len = sz; + len += sz; + } } } if (kss != NULL && kss->gen == xig.xig_gen) { @@ -2828,9 +2847,16 @@ tcp_ktlslist_locked(SYSCTL_HANDLER_ARGS, bool export_keys) if (kss->snd_tag != NULL && kss->snd_tag->sw->snd_tag_status_str != NULL) { sz = SND_TAG_STATUS_MAXLEN; - kss->snd_tag->sw->snd_tag_status_str( + in_pcbref(inp); + INP_RUNLOCK(inp); + error = kss->snd_tag->sw->snd_tag_status_str( kss->snd_tag, buf + len, &sz); - len += sz; + if (in_pcbrele_rlock(inp)) + return (EDEADLK); + if (error == 0) { + xktls->snd.drv_st_len = sz; + len += sz; + } } } len = roundup2(len, __alignof(*xktls)); @@ -2858,12 +2884,23 @@ tcp_ktlslist_locked(SYSCTL_HANDLER_ARGS, bool export_keys) static int tcp_ktlslist1(SYSCTL_HANDLER_ARGS, bool export_keys) { - int res; - - sx_xlock(&ktlslist_lock); - res = tcp_ktlslist_locked(oidp, arg1, arg2, req, export_keys); - sx_xunlock(&ktlslist_lock); - return (res); + int repeats, error; + + for (repeats = 0; repeats < 100; repeats++) { + if (sx_xlock_sig(&ktlslist_lock)) + return (EINTR); + error = tcp_ktlslist_locked(oidp, arg1, arg2, req, + export_keys); + sx_xunlock(&ktlslist_lock); + if (error != EDEADLK) + break; + if (sig_intr() != 0) { + error = EINTR; + break; + } + req->oldidx = 0; + } + return (error); } static int |