diff options
Diffstat (limited to 'sys/netinet')
| -rw-r--r-- | sys/netinet/in_pcb.c | 13 | ||||
| -rw-r--r-- | sys/netinet/in_proto.c | 2 | ||||
| -rw-r--r-- | sys/netinet/tcp_stacks/bbr.c | 8 | ||||
| -rw-r--r-- | sys/netinet/tcp_stacks/rack.c | 284 | ||||
| -rw-r--r-- | sys/netinet/tcp_stacks/tcp_rack.h | 1 | ||||
| -rw-r--r-- | sys/netinet/tcp_subr.c | 2 | ||||
| -rw-r--r-- | sys/netinet/tcp_syncache.c | 2 | ||||
| -rw-r--r-- | sys/netinet/tcp_timer.c | 7 | ||||
| -rw-r--r-- | sys/netinet/udp_usrreq.c | 20 | ||||
| -rw-r--r-- | sys/netinet/udp_var.h | 1 |
10 files changed, 79 insertions, 261 deletions
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index dbe48242381d..712ff28768dc 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -2665,10 +2665,13 @@ in_pcbinshash(struct inpcb *inp) INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)]; /* - * Add entry to load balance group. - * Only do this if SO_REUSEPORT_LB is set. + * Ignore SO_REUSEPORT_LB if the socket is connected. Really this case + * should be an error, but for UDP sockets it is not, and some + * applications erroneously set it on connected UDP sockets, so we can't + * change this without breaking compatibility. */ - if ((inp->inp_socket->so_options & SO_REUSEPORT_LB) != 0) { + if (!connected && + (inp->inp_socket->so_options & SO_REUSEPORT_LB) != 0) { int error = in_pcbinslbgrouphash(inp, M_NODOM); if (error != 0) return (error); @@ -2770,6 +2773,10 @@ in_pcbrehash(struct inpcb *inp) connected = !in_nullhost(inp->inp_faddr); } + /* See the comment in in_pcbinshash(). */ + if (connected && (inp->inp_flags & INP_INLBGROUP) != 0) + in_pcbremlbgrouphash(inp); + /* * When rehashing, the caller must ensure that either the new or the old * foreign address was unspecified. diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index db46da6022c5..42a6cf0b5810 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -108,6 +108,8 @@ SYSCTL_NODE(_net_inet, IPPROTO_ICMP, icmp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "ICMP"); SYSCTL_NODE(_net_inet, IPPROTO_UDP, udp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "UDP"); +SYSCTL_NODE(_net_inet, IPPROTO_UDPLITE, udplite, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, + "UDP-Lite"); SYSCTL_NODE(_net_inet, IPPROTO_TCP, tcp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "TCP"); #if defined(SCTP) || defined(SCTP_SUPPORT) diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c index 66983edcdd73..10383bc0801e 100644 --- a/sys/netinet/tcp_stacks/bbr.c +++ b/sys/netinet/tcp_stacks/bbr.c @@ -477,7 +477,7 @@ bbr_log_rtt_shrinks(struct tcp_bbr *bbr, uint32_t cts, uint32_t applied, uint16_t set); static struct bbr_sendmap * bbr_find_lowest_rsm(struct tcp_bbr *bbr); -static __inline uint32_t +static inline uint32_t bbr_get_rtt(struct tcp_bbr *bbr, int32_t rtt_type); static void bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t pacing_delay, @@ -1841,7 +1841,7 @@ bbr_counter_destroy(void) } -static __inline void +static inline void bbr_fill_in_logging_data(struct tcp_bbr *bbr, struct tcp_log_bbr *l, uint32_t cts) { memset(l, 0, sizeof(union tcp_log_stackspecific)); @@ -4206,7 +4206,7 @@ bbr_calc_thresh_tlp(struct tcpcb *tp, struct tcp_bbr *bbr, /* * Return one of three RTTs to use (in microseconds). */ -static __inline uint32_t +static inline uint32_t bbr_get_rtt(struct tcp_bbr *bbr, int32_t rtt_type) { uint32_t f_rtt; @@ -4370,7 +4370,7 @@ bbr_timeout_rack(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts) return (0); } -static __inline void +static inline void bbr_clone_rsm(struct tcp_bbr *bbr, struct bbr_sendmap *nrsm, struct bbr_sendmap *rsm, uint32_t start) { int idx; diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index c7962b57a69e..9ed26d5a617b 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -204,10 +204,6 @@ static int32_t rack_dnd_default = 0; /* For rr_conf = 3, what is the default fo static int32_t rack_rxt_controls = 0; static int32_t rack_fill_cw_state = 0; static uint8_t rack_req_measurements = 1; -/* Attack threshold detections */ -static uint32_t rack_highest_sack_thresh_seen = 0; -static uint32_t rack_highest_move_thresh_seen = 0; -static uint32_t rack_merge_out_sacks_on_attack = 0; static int32_t rack_enable_hw_pacing = 0; /* Due to CCSP keep it off by default */ static int32_t rack_hw_rate_caps = 0; /* 1; */ static int32_t rack_hw_rate_cap_per = 0; /* 0 -- off */ @@ -223,7 +219,6 @@ static int32_t rack_default_pacing_divisor = 250; static uint16_t rack_pacing_min_seg = 0; static int32_t rack_timely_off = 0; -static uint32_t sad_seg_size_per = 800; /* 80.0 % */ static int32_t rack_pkt_delay = 1000; static int32_t rack_send_a_lot_in_prr = 1; static int32_t rack_min_to = 1000; /* Number of microsecond min timeout */ @@ -399,18 +394,6 @@ counter_u64_t rack_extended_rfo; counter_u64_t rack_sack_proc_all; counter_u64_t rack_sack_proc_short; counter_u64_t rack_sack_proc_restart; -counter_u64_t rack_sack_attacks_detected; -counter_u64_t rack_sack_attacks_reversed; -counter_u64_t rack_sack_attacks_suspect; -counter_u64_t rack_sack_used_next_merge; -counter_u64_t rack_sack_splits; -counter_u64_t rack_sack_used_prev_merge; -counter_u64_t rack_sack_skipped_acked; -counter_u64_t rack_ack_total; -counter_u64_t rack_express_sack; -counter_u64_t rack_sack_total; -counter_u64_t rack_move_none; -counter_u64_t rack_move_some; counter_u64_t rack_input_idle_reduces; counter_u64_t rack_collapsed_win; @@ -834,18 +817,6 @@ sysctl_rack_clear(SYSCTL_HANDLER_ARGS) counter_u64_zero(rack_rxt_clamps_cwnd_uniq); counter_u64_zero(rack_multi_single_eq); counter_u64_zero(rack_proc_non_comp_ack); - counter_u64_zero(rack_sack_attacks_detected); - counter_u64_zero(rack_sack_attacks_reversed); - counter_u64_zero(rack_sack_attacks_suspect); - counter_u64_zero(rack_sack_used_next_merge); - counter_u64_zero(rack_sack_used_prev_merge); - counter_u64_zero(rack_sack_splits); - counter_u64_zero(rack_sack_skipped_acked); - counter_u64_zero(rack_ack_total); - counter_u64_zero(rack_express_sack); - counter_u64_zero(rack_sack_total); - counter_u64_zero(rack_move_none); - counter_u64_zero(rack_move_some); counter_u64_zero(rack_try_scwnd); counter_u64_zero(rack_collapsed_win); counter_u64_zero(rack_collapsed_win_rxt); @@ -872,7 +843,6 @@ static void rack_init_sysctls(void) { struct sysctl_oid *rack_counters; - struct sysctl_oid *rack_attack; struct sysctl_oid *rack_pacing; struct sysctl_oid *rack_timely; struct sysctl_oid *rack_timers; @@ -883,12 +853,6 @@ rack_init_sysctls(void) struct sysctl_oid *rack_probertt; struct sysctl_oid *rack_hw_pacing; - rack_attack = SYSCTL_ADD_NODE(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_sysctl_root), - OID_AUTO, - "sack_attack", - CTLFLAG_RW | CTLFLAG_MPSAFE, 0, - "Rack Sack Attack Counters and Controls"); rack_counters = SYSCTL_ADD_NODE(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), OID_AUTO, @@ -1535,11 +1499,6 @@ rack_init_sysctls(void) "Do not disturb default for rack_rrr = 3"); SYSCTL_ADD_S32(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_misc), - OID_AUTO, "sad_seg_per", CTLFLAG_RW, - &sad_seg_size_per, 800, - "Percentage of segment size needed in a sack 800 = 80.0?"); - SYSCTL_ADD_S32(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_misc), OID_AUTO, "rxt_controls", CTLFLAG_RW, &rack_rxt_controls, 0, "Retransmit sending size controls (valid values 0, 1, 2 default=1)?"); @@ -1619,85 +1578,6 @@ rack_init_sysctls(void) &rack_autosndbuf_inc, 20, "What percentage should rack scale up its snd buffer by?"); - - /* Sack Attacker detection stuff */ - SYSCTL_ADD_U32(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "merge_out", CTLFLAG_RW, - &rack_merge_out_sacks_on_attack, 0, - "Do we merge the sendmap when we decide we are being attacked?"); - - SYSCTL_ADD_U32(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "detect_highsackratio", CTLFLAG_RW, - &rack_highest_sack_thresh_seen, 0, - "Highest sack to ack ratio seen"); - SYSCTL_ADD_U32(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "detect_highmoveratio", CTLFLAG_RW, - &rack_highest_move_thresh_seen, 0, - "Highest move to non-move ratio seen"); - rack_ack_total = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "acktotal", CTLFLAG_RD, - &rack_ack_total, - "Total number of Ack's"); - rack_express_sack = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "exp_sacktotal", CTLFLAG_RD, - &rack_express_sack, - "Total expresss number of Sack's"); - rack_sack_total = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "sacktotal", CTLFLAG_RD, - &rack_sack_total, - "Total number of SACKs"); - rack_move_none = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "move_none", CTLFLAG_RD, - &rack_move_none, - "Total number of SACK index reuse of positions under threshold"); - rack_move_some = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "move_some", CTLFLAG_RD, - &rack_move_some, - "Total number of SACK index reuse of positions over threshold"); - rack_sack_attacks_detected = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "attacks", CTLFLAG_RD, - &rack_sack_attacks_detected, - "Total number of SACK attackers that had sack disabled"); - rack_sack_attacks_reversed = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "reversed", CTLFLAG_RD, - &rack_sack_attacks_reversed, - "Total number of SACK attackers that were later determined false positive"); - rack_sack_attacks_suspect = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "suspect", CTLFLAG_RD, - &rack_sack_attacks_suspect, - "Total number of SACKs that triggered early detection"); - - rack_sack_used_next_merge = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "nextmerge", CTLFLAG_RD, - &rack_sack_used_next_merge, - "Total number of times we used the next merge"); - rack_sack_used_prev_merge = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "prevmerge", CTLFLAG_RD, - &rack_sack_used_prev_merge, - "Total number of times we used the prev merge"); /* Counters */ rack_total_bytes = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, @@ -1908,18 +1788,6 @@ rack_init_sysctls(void) OID_AUTO, "sack_short", CTLFLAG_RD, &rack_sack_proc_short, "Total times we took shortcut for sack processing"); - rack_sack_skipped_acked = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "skipacked", CTLFLAG_RD, - &rack_sack_skipped_acked, - "Total number of times we skipped previously sacked"); - rack_sack_splits = counter_u64_alloc(M_WAITOK); - SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_attack), - OID_AUTO, "ofsplit", CTLFLAG_RD, - &rack_sack_splits, - "Total number of times we did the old fashion tree split"); rack_input_idle_reduces = counter_u64_alloc(M_WAITOK); SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_counters), @@ -3319,16 +3187,6 @@ rack_counter_destroy(void) counter_u64_free(rack_hw_pace_lost); counter_u64_free(rack_non_fto_send); counter_u64_free(rack_extended_rfo); - counter_u64_free(rack_ack_total); - counter_u64_free(rack_express_sack); - counter_u64_free(rack_sack_total); - counter_u64_free(rack_move_none); - counter_u64_free(rack_move_some); - counter_u64_free(rack_sack_attacks_detected); - counter_u64_free(rack_sack_attacks_reversed); - counter_u64_free(rack_sack_attacks_suspect); - counter_u64_free(rack_sack_used_next_merge); - counter_u64_free(rack_sack_used_prev_merge); counter_u64_free(rack_tlp_tot); counter_u64_free(rack_tlp_newdata); counter_u64_free(rack_tlp_retran); @@ -3351,8 +3209,6 @@ rack_counter_destroy(void) counter_u64_free(rack_sack_proc_all); counter_u64_free(rack_sack_proc_restart); counter_u64_free(rack_sack_proc_short); - counter_u64_free(rack_sack_skipped_acked); - counter_u64_free(rack_sack_splits); counter_u64_free(rack_input_idle_reduces); counter_u64_free(rack_collapsed_win); counter_u64_free(rack_collapsed_win_rxt); @@ -4730,7 +4586,7 @@ rack_make_timely_judgement(struct tcp_rack *rack, uint32_t rtt, int32_t rtt_diff return (timely_says); } -static __inline int +static inline int rack_in_gp_window(struct tcpcb *tp, struct rack_sendmap *rsm) { if (SEQ_GEQ(rsm->r_start, tp->gput_seq) && @@ -4767,7 +4623,7 @@ rack_in_gp_window(struct tcpcb *tp, struct rack_sendmap *rsm) return (0); } -static __inline void +static inline void rack_mark_in_gp_win(struct tcpcb *tp, struct rack_sendmap *rsm) { @@ -4784,7 +4640,7 @@ rack_mark_in_gp_win(struct tcpcb *tp, struct rack_sendmap *rsm) rsm->r_flags &= ~RACK_IN_GP_WIN; } -static __inline void +static inline void rack_clear_gp_marks(struct tcpcb *tp, struct tcp_rack *rack) { /* A GP measurement is ending, clear all marks on the send map*/ @@ -4802,7 +4658,7 @@ rack_clear_gp_marks(struct tcpcb *tp, struct tcp_rack *rack) } -static __inline void +static inline void rack_tend_gp_marks(struct tcpcb *tp, struct tcp_rack *rack) { struct rack_sendmap *rsm = NULL; @@ -6864,6 +6720,18 @@ rack_mark_lost(struct tcpcb *tp, } } +static inline void +rack_mark_nolonger_lost(struct tcp_rack *rack, struct rack_sendmap *rsm) +{ + KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), + ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); + rsm->r_flags &= ~RACK_WAS_LOST; + if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) + rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start; + else + rack->r_ctl.rc_considered_lost = 0; +} + /* * RACK Timer, here we simply do logging and house keeping. * the normal rack_output() function will call the @@ -7005,7 +6873,7 @@ rack_setup_offset_for_rsm(struct tcp_rack *rack, struct rack_sendmap *src_rsm, s rsm->orig_t_space = M_TRAILINGROOM(rsm->m); } -static __inline void +static inline void rack_clone_rsm(struct tcp_rack *rack, struct rack_sendmap *nrsm, struct rack_sendmap *rsm, uint32_t start) { @@ -8130,13 +7998,7 @@ rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack, * remove the lost desgination and reduce the * bytes considered lost. */ - rsm->r_flags &= ~RACK_WAS_LOST; - KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), - ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); - if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) - rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start; - else - rack->r_ctl.rc_considered_lost = 0; + rack_mark_nolonger_lost(rack, rsm); } idx = rsm->r_rtr_cnt - 1; rsm->r_tim_lastsent[idx] = ts; @@ -9492,6 +9354,11 @@ do_rest_ofb: if (rsm->r_flags & RACK_WAS_LOST) { int my_chg; + /* + * Note here we do not use our rack_mark_nolonger_lost() function + * since we are moving our data pointer around and the + * ack'ed side is already not considered lost. + */ my_chg = (nrsm->r_end - nrsm->r_start); KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); @@ -9531,7 +9398,6 @@ do_rest_ofb: goto out; } rack_log_map_chg(tp, rack, &stack_map, rsm, next, MAP_SACK_M1, end, __LINE__); - counter_u64_add(rack_sack_used_next_merge, 1); /* Postion for the next block */ start = next->r_end; rsm = tqhash_next(rack->r_ctl.tqh, next); @@ -9563,7 +9429,6 @@ do_rest_ofb: */ goto out; } - counter_u64_add(rack_sack_splits, 1); rack_clone_rsm(rack, nrsm, rsm, start); rsm->r_just_ret = 0; #ifndef INVARIANTS @@ -9585,7 +9450,6 @@ do_rest_ofb: } } else { /* Already sacked this piece */ - counter_u64_add(rack_sack_skipped_acked, 1); if (end == rsm->r_end) { /* Done with block */ rsm = tqhash_next(rack->r_ctl.tqh, rsm); @@ -9659,16 +9523,11 @@ do_rest_ofb: changed += (rsm->r_end - rsm->r_start); /* You get a count for acking a whole segment or more */ if (rsm->r_flags & RACK_WAS_LOST) { - int my_chg; - - my_chg = (rsm->r_end - rsm->r_start); - rsm->r_flags &= ~RACK_WAS_LOST; - KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), - ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); - if (my_chg <= rack->r_ctl.rc_considered_lost) - rack->r_ctl.rc_considered_lost -= my_chg; - else - rack->r_ctl.rc_considered_lost = 0; + /* + * Here we can use the inline function since + * the rsm is truly marked lost and now no longer lost. + */ + rack_mark_nolonger_lost(rack, rsm); } rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start); if (rsm->r_in_tmap) /* should be true */ @@ -9690,8 +9549,6 @@ do_rest_ofb: rsm->r_in_tmap = 0; } rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_SACK_M3, end, __LINE__); - } else { - counter_u64_add(rack_sack_skipped_acked, 1); } if (end == rsm->r_end) { /* This block only - done, setup for next */ @@ -9851,6 +9708,10 @@ do_rest_ofb: if (rsm->r_flags & RACK_WAS_LOST) { int my_chg; + /* + * Note here we are using hookery again so we can't + * use our rack_mark_nolonger_lost() function. + */ my_chg = (nrsm->r_end - nrsm->r_start); KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); @@ -9866,7 +9727,6 @@ do_rest_ofb: } rack_log_map_chg(tp, rack, prev, &stack_map, rsm, MAP_SACK_M4, end, __LINE__); rsm = prev; - counter_u64_add(rack_sack_used_prev_merge, 1); } else { /** * This is the case where our previous @@ -9931,7 +9791,6 @@ do_rest_ofb: * rsm |---| (acked) * nrsm |------| (not acked) */ - counter_u64_add(rack_sack_splits, 1); rack_clone_rsm(rack, nrsm, rsm, end); rsm->r_flags &= (~RACK_HAS_FIN); rsm->r_just_ret = 0; @@ -9952,16 +9811,10 @@ do_rest_ofb: rack_update_rtt(tp, rack, rsm, to, cts, SACKED, 0); changed += (rsm->r_end - rsm->r_start); if (rsm->r_flags & RACK_WAS_LOST) { - int my_chg; - - my_chg = (rsm->r_end - rsm->r_start); - rsm->r_flags &= ~RACK_WAS_LOST; - KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), - ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); - if (my_chg <= rack->r_ctl.rc_considered_lost) - rack->r_ctl.rc_considered_lost -= my_chg; - else - rack->r_ctl.rc_considered_lost = 0; + /* + * Here it is safe to use our function. + */ + rack_mark_nolonger_lost(rack, rsm); } rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start); @@ -9985,11 +9838,6 @@ do_rest_ofb: rsm->r_in_tmap = 0; } } - } else if (start != end){ - /* - * The block was already acked. - */ - counter_u64_add(rack_sack_skipped_acked, 1); } out: if (rsm && @@ -10362,13 +10210,7 @@ more: * and yet before retransmitting we get an ack * which can happen due to reordering. */ - rsm->r_flags &= ~RACK_WAS_LOST; - KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), - ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); - if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) - rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start; - else - rack->r_ctl.rc_considered_lost = 0; + rack_mark_nolonger_lost(rack, rsm); } rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_FREE, rsm->r_end, __LINE__); rack->r_ctl.rc_holes_rxt -= rsm->r_rtr_bytes; @@ -10476,12 +10318,7 @@ more: * which can happen due to reordering. In this * case its only a partial ack of the send. */ - KASSERT((rack->r_ctl.rc_considered_lost >= (th_ack - rsm->r_start)), - ("rsm:%p rack:%p rc_considered_lost goes negative th_ack:%u", rsm, rack, th_ack)); - if (rack->r_ctl.rc_considered_lost >= (th_ack - rsm->r_start)) - rack->r_ctl.rc_considered_lost -= th_ack - rsm->r_start; - else - rack->r_ctl.rc_considered_lost = 0; + rack_mark_nolonger_lost(rack, rsm); } /* * Clear the dup ack count for @@ -10793,17 +10630,6 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered changed = 0; th_ack = th->th_ack; segsiz = ctf_fixed_maxseg(rack->rc_tp); - if (BYTES_THIS_ACK(tp, th) >= segsiz) { - /* - * You only get credit for - * MSS and greater (and you get extra - * credit for larger cum-ack moves). - */ - int ac; - - ac = BYTES_THIS_ACK(tp, th) / ctf_fixed_maxseg(rack->rc_tp); - counter_u64_add(rack_ack_total, ac); - } if (SEQ_GT(th_ack, tp->snd_una)) { rack_log_progress_event(rack, tp, ticks, PROGRESS_UPDATE, __LINE__); tp->t_acktime = ticks; @@ -10875,8 +10701,8 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered if (sacks_seen != NULL) *sacks_seen = num_sack_blks; if (num_sack_blks == 0) { - /* Nothing to sack, but we need to update counts */ - goto out_with_totals; + /* Nothing to sack */ + goto out; } /* Its a sack of some sort */ if (num_sack_blks < 2) { @@ -10899,7 +10725,7 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered */ again: if (num_sack_blks == 0) - goto out_with_totals; + goto out; if (num_sack_blks > 1) { for (i = 0; i < num_sack_blks; i++) { for (j = i + 1; j < num_sack_blks; j++) { @@ -10952,19 +10778,7 @@ do_sack_work: changed += acked; } if (num_sack_blks == 1) { - /* - * This is what we would expect from - * a normal implementation to happen - * after we have retransmitted the FR, - * i.e the sack-filter pushes down - * to 1 block and the next to be retransmitted - * is the sequence in the sack block (has more - * are acked). Count this as ACK'd data to boost - * up the chances of recovering any false positives. - */ - counter_u64_add(rack_ack_total, (acked / ctf_fixed_maxseg(rack->rc_tp))); - counter_u64_add(rack_express_sack, 1); - goto out_with_totals; + goto out; } else { /* * Start the loop through the @@ -10973,7 +10787,6 @@ do_sack_work: loop_start = 1; } } - counter_u64_add(rack_sack_total, 1); rsm = rack->r_ctl.rc_sacklast; for (i = loop_start; i < num_sack_blks; i++) { acked = rack_proc_sack_blk(tp, rack, &sack_blocks[i], to, &rsm, cts, segsiz); @@ -10982,18 +10795,6 @@ do_sack_work: changed += acked; } } -out_with_totals: - if (num_sack_blks > 1) { - /* - * You get an extra stroke if - * you have more than one sack-blk, this - * could be where we are skipping forward - * and the sack-filter is still working, or - * it could be an attacker constantly - * moving us. - */ - counter_u64_add(rack_move_some, 1); - } out: if (changed) { /* Something changed cancel the rack timer */ @@ -14713,7 +14514,6 @@ rack_init(struct tcpcb *tp, void **ptr) rack->r_ctl.rc_prr_sendalot = rack_send_a_lot_in_prr; rack->r_ctl.rc_min_to = rack_min_to; microuptime(&rack->r_ctl.act_rcv_time); - rack->r_ctl.rc_last_time_decay = rack->r_ctl.act_rcv_time; rack->r_ctl.rack_per_of_gp_ss = rack_per_of_gp_ss; if (rack_hw_up_only) rack->r_up_only = 1; diff --git a/sys/netinet/tcp_stacks/tcp_rack.h b/sys/netinet/tcp_stacks/tcp_rack.h index 144b4fabf7eb..cac17d9aeb50 100644 --- a/sys/netinet/tcp_stacks/tcp_rack.h +++ b/sys/netinet/tcp_stacks/tcp_rack.h @@ -462,7 +462,6 @@ struct rack_control { uint64_t rc_gp_output_ts; /* chg*/ uint64_t rc_gp_cumack_ts; /* chg*/ struct timeval act_rcv_time; - struct timeval rc_last_time_decay; /* SAD time decay happened here */ uint64_t gp_bw; uint64_t init_rate; #ifdef NETFLIX_SHARED_CWND diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index c817c79881d6..b6f428b279b3 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -607,7 +607,7 @@ tcp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *inp, } } m->m_pkthdr.tcp_tun_port = port = uh->uh_sport; - bcopy(th, uh, m->m_len - off); + bcopy(th, uh, m->m_len - off - sizeof(struct udphdr)); m->m_len -= sizeof(struct udphdr); m->m_pkthdr.len -= sizeof(struct udphdr); /* diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index be20fb44a820..3cb538f7054d 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -1168,7 +1168,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, /* * If listening socket requested TCP digests, check that * received ACK has signature and it is correct. - * If not, drop the ACK and leave sc entry in th cache, + * If not, drop the ACK and leave sc entry in the cache, * because SYN was received with correct signature. */ if (sc->sc_flags & SCF_SIGNATURE) { diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index 3b9fe7a317b0..57c57666fa3a 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -513,9 +513,12 @@ tcp_timer_persist(struct tcpcb *tp) if (progdrop || (tp->t_rxtshift >= V_tcp_retries && (ticks - tp->t_rcvtime >= tcp_maxpersistidle || ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff))) { - if (!progdrop) + if (progdrop) { + tcp_log_end_status(tp, TCP_EI_STATUS_PROGRESS); + } else { TCPSTAT_INC(tcps_persistdrop); - tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); + tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); + } goto dropit; } /* diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index cea8a916679b..04d01099d54a 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -787,7 +787,8 @@ udplite_ctlinput(struct icmp *icmp) static int udp_pcblist(SYSCTL_HANDLER_ARGS) { - struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_udbinfo, + struct inpcbinfo *pcbinfo = udp_get_inpcbinfo(arg2); + struct inpcb_iterator inpi = INP_ALL_ITERATOR(pcbinfo, INPLOOKUP_RLOCKPCB); struct xinpgen xig; struct inpcb *inp; @@ -799,7 +800,7 @@ udp_pcblist(SYSCTL_HANDLER_ARGS) if (req->oldptr == 0) { int n; - n = V_udbinfo.ipi_count; + n = pcbinfo->ipi_count; n += imax(n / 8, 10); req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb); return (0); @@ -810,8 +811,8 @@ udp_pcblist(SYSCTL_HANDLER_ARGS) bzero(&xig, sizeof(xig)); xig.xig_len = sizeof xig; - xig.xig_count = V_udbinfo.ipi_count; - xig.xig_gen = V_udbinfo.ipi_gencnt; + xig.xig_count = pcbinfo->ipi_count; + xig.xig_gen = pcbinfo->ipi_gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) @@ -838,9 +839,9 @@ udp_pcblist(SYSCTL_HANDLER_ARGS) * that something happened while we were processing this * request, and it might be necessary to retry. */ - xig.xig_gen = V_udbinfo.ipi_gencnt; + xig.xig_gen = pcbinfo->ipi_gencnt; xig.xig_sogen = so_gencnt; - xig.xig_count = V_udbinfo.ipi_count; + xig.xig_count = pcbinfo->ipi_count; error = SYSCTL_OUT(req, &xig, sizeof xig); } @@ -848,10 +849,15 @@ udp_pcblist(SYSCTL_HANDLER_ARGS) } SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, - CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, + CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, IPPROTO_UDP, udp_pcblist, "S,xinpcb", "List of active UDP sockets"); +SYSCTL_PROC(_net_inet_udplite, OID_AUTO, pcblist, + CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, IPPROTO_UDPLITE, + udp_pcblist, "S,xinpcb", + "List of active UDP-Lite sockets"); + #ifdef INET static int udp_getcred(SYSCTL_HANDLER_ARGS) diff --git a/sys/netinet/udp_var.h b/sys/netinet/udp_var.h index 3895f365db3c..3ae08fc0b8f0 100644 --- a/sys/netinet/udp_var.h +++ b/sys/netinet/udp_var.h @@ -147,6 +147,7 @@ void kmod_udpstat_inc(int statnum); } while (0) SYSCTL_DECL(_net_inet_udp); +SYSCTL_DECL(_net_inet_udplite); VNET_DECLARE(struct inpcbinfo, udbinfo); VNET_DECLARE(struct inpcbinfo, ulitecbinfo); |
