diff options
Diffstat (limited to 'sys/netinet')
| -rw-r--r-- | sys/netinet/in_pcb.c | 13 | ||||
| -rw-r--r-- | sys/netinet/in_proto.c | 2 | ||||
| -rw-r--r-- | sys/netinet/libalias/alias_db.c | 2 | ||||
| -rw-r--r-- | sys/netinet/raw_ip.c | 4 | ||||
| -rw-r--r-- | sys/netinet/tcp_stacks/bbr.c | 8 | ||||
| -rw-r--r-- | sys/netinet/tcp_stacks/rack.c | 284 | ||||
| -rw-r--r-- | sys/netinet/tcp_stacks/tcp_rack.h | 1 | ||||
| -rw-r--r-- | sys/netinet/tcp_subr.c | 2 | ||||
| -rw-r--r-- | sys/netinet/tcp_syncache.c | 16 | ||||
| -rw-r--r-- | sys/netinet/tcp_timer.c | 7 | ||||
| -rw-r--r-- | sys/netinet/udp_usrreq.c | 49 | ||||
| -rw-r--r-- | sys/netinet/udp_var.h | 1 | 
12 files changed, 118 insertions, 271 deletions
| diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index dbe48242381d..712ff28768dc 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -2665,10 +2665,13 @@ in_pcbinshash(struct inpcb *inp)  	    INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];  	/* -	 * Add entry to load balance group. -	 * Only do this if SO_REUSEPORT_LB is set. +	 * Ignore SO_REUSEPORT_LB if the socket is connected.  Really this case +	 * should be an error, but for UDP sockets it is not, and some +	 * applications erroneously set it on connected UDP sockets, so we can't +	 * change this without breaking compatibility.  	 */ -	if ((inp->inp_socket->so_options & SO_REUSEPORT_LB) != 0) { +	if (!connected && +	    (inp->inp_socket->so_options & SO_REUSEPORT_LB) != 0) {  		int error = in_pcbinslbgrouphash(inp, M_NODOM);  		if (error != 0)  			return (error); @@ -2770,6 +2773,10 @@ in_pcbrehash(struct inpcb *inp)  		connected = !in_nullhost(inp->inp_faddr);  	} +	/* See the comment in in_pcbinshash(). */ +	if (connected && (inp->inp_flags & INP_INLBGROUP) != 0) +		in_pcbremlbgrouphash(inp); +  	/*  	 * When rehashing, the caller must ensure that either the new or the old  	 * foreign address was unspecified. diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index db46da6022c5..42a6cf0b5810 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -108,6 +108,8 @@ SYSCTL_NODE(_net_inet, IPPROTO_ICMP, icmp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,      "ICMP");  SYSCTL_NODE(_net_inet, IPPROTO_UDP, udp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,      "UDP"); +SYSCTL_NODE(_net_inet, IPPROTO_UDPLITE, udplite, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, +    "UDP-Lite");  SYSCTL_NODE(_net_inet, IPPROTO_TCP, tcp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,      "TCP");  #if defined(SCTP) || defined(SCTP_SUPPORT) diff --git a/sys/netinet/libalias/alias_db.c b/sys/netinet/libalias/alias_db.c index c143d74a2f45..41f0a328daec 100644 --- a/sys/netinet/libalias/alias_db.c +++ b/sys/netinet/libalias/alias_db.c @@ -2181,7 +2181,7 @@ LibAliasInit(struct libalias *la)  #undef malloc	/* XXX: ugly */  		la = malloc(sizeof *la, M_ALIAS, M_WAITOK | M_ZERO);  #else -		la = calloc(sizeof *la, 1); +		la = calloc(1, sizeof *la);  		if (la == NULL)  			return (la);  #endif diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index 66070faf97e9..bfe608be6b36 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -680,7 +680,6 @@ rip_ctloutput(struct socket *so, struct sockopt *sopt)  			break;  		case IP_DUMMYNET3:	/* generic dummynet v.3 functions */ -		case IP_DUMMYNET_GET:  			if (ip_dn_ctl_ptr != NULL)  				error = ip_dn_ctl_ptr(sopt);  			else @@ -747,9 +746,6 @@ rip_ctloutput(struct socket *so, struct sockopt *sopt)  			break;  		case IP_DUMMYNET3:	/* generic dummynet v.3 functions */ -		case IP_DUMMYNET_CONFIGURE: -		case IP_DUMMYNET_DEL: -		case IP_DUMMYNET_FLUSH:  			if (ip_dn_ctl_ptr != NULL)  				error = ip_dn_ctl_ptr(sopt);  			else diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c index 66983edcdd73..10383bc0801e 100644 --- a/sys/netinet/tcp_stacks/bbr.c +++ b/sys/netinet/tcp_stacks/bbr.c @@ -477,7 +477,7 @@ bbr_log_rtt_shrinks(struct tcp_bbr *bbr, uint32_t cts, uint32_t applied,  		    uint16_t set);  static struct bbr_sendmap *  bbr_find_lowest_rsm(struct tcp_bbr *bbr); -static __inline uint32_t +static inline uint32_t  bbr_get_rtt(struct tcp_bbr *bbr, int32_t rtt_type);  static void  bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t pacing_delay, @@ -1841,7 +1841,7 @@ bbr_counter_destroy(void)  } -static __inline void +static inline void  bbr_fill_in_logging_data(struct tcp_bbr *bbr, struct tcp_log_bbr *l, uint32_t cts)  {  	memset(l, 0, sizeof(union tcp_log_stackspecific)); @@ -4206,7 +4206,7 @@ bbr_calc_thresh_tlp(struct tcpcb *tp, struct tcp_bbr *bbr,  /*   * Return one of three RTTs to use (in microseconds).   */ -static __inline uint32_t +static inline uint32_t  bbr_get_rtt(struct tcp_bbr *bbr, int32_t rtt_type)  {  	uint32_t f_rtt; @@ -4370,7 +4370,7 @@ bbr_timeout_rack(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)  	return (0);  } -static __inline void +static inline void  bbr_clone_rsm(struct tcp_bbr *bbr, struct bbr_sendmap *nrsm, struct bbr_sendmap *rsm, uint32_t start)  {  	int idx; diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index c7962b57a69e..9ed26d5a617b 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -204,10 +204,6 @@ static int32_t rack_dnd_default = 0;		/* For rr_conf = 3, what is the default fo  static int32_t rack_rxt_controls = 0;  static int32_t rack_fill_cw_state = 0;  static uint8_t rack_req_measurements = 1; -/* Attack threshold detections */ -static uint32_t rack_highest_sack_thresh_seen = 0; -static uint32_t rack_highest_move_thresh_seen = 0; -static uint32_t rack_merge_out_sacks_on_attack = 0;  static int32_t rack_enable_hw_pacing = 0; /* Due to CCSP keep it off by default */  static int32_t rack_hw_rate_caps = 0; /* 1; */  static int32_t rack_hw_rate_cap_per = 0;	/* 0 -- off  */ @@ -223,7 +219,6 @@ static int32_t rack_default_pacing_divisor = 250;  static uint16_t rack_pacing_min_seg = 0;  static int32_t rack_timely_off = 0; -static uint32_t sad_seg_size_per = 800;	/* 80.0 % */  static int32_t rack_pkt_delay = 1000;  static int32_t rack_send_a_lot_in_prr = 1;  static int32_t rack_min_to = 1000;	/* Number of microsecond  min timeout */ @@ -399,18 +394,6 @@ counter_u64_t rack_extended_rfo;  counter_u64_t rack_sack_proc_all;  counter_u64_t rack_sack_proc_short;  counter_u64_t rack_sack_proc_restart; -counter_u64_t rack_sack_attacks_detected; -counter_u64_t rack_sack_attacks_reversed; -counter_u64_t rack_sack_attacks_suspect; -counter_u64_t rack_sack_used_next_merge; -counter_u64_t rack_sack_splits; -counter_u64_t rack_sack_used_prev_merge; -counter_u64_t rack_sack_skipped_acked; -counter_u64_t rack_ack_total; -counter_u64_t rack_express_sack; -counter_u64_t rack_sack_total; -counter_u64_t rack_move_none; -counter_u64_t rack_move_some;  counter_u64_t rack_input_idle_reduces;  counter_u64_t rack_collapsed_win; @@ -834,18 +817,6 @@ sysctl_rack_clear(SYSCTL_HANDLER_ARGS)  		counter_u64_zero(rack_rxt_clamps_cwnd_uniq);  		counter_u64_zero(rack_multi_single_eq);  		counter_u64_zero(rack_proc_non_comp_ack); -		counter_u64_zero(rack_sack_attacks_detected); -		counter_u64_zero(rack_sack_attacks_reversed); -		counter_u64_zero(rack_sack_attacks_suspect); -		counter_u64_zero(rack_sack_used_next_merge); -		counter_u64_zero(rack_sack_used_prev_merge); -		counter_u64_zero(rack_sack_splits); -		counter_u64_zero(rack_sack_skipped_acked); -		counter_u64_zero(rack_ack_total); -		counter_u64_zero(rack_express_sack); -		counter_u64_zero(rack_sack_total); -		counter_u64_zero(rack_move_none); -		counter_u64_zero(rack_move_some);  		counter_u64_zero(rack_try_scwnd);  		counter_u64_zero(rack_collapsed_win);  		counter_u64_zero(rack_collapsed_win_rxt); @@ -872,7 +843,6 @@ static void  rack_init_sysctls(void)  {  	struct sysctl_oid *rack_counters; -	struct sysctl_oid *rack_attack;  	struct sysctl_oid *rack_pacing;  	struct sysctl_oid *rack_timely;  	struct sysctl_oid *rack_timers; @@ -883,12 +853,6 @@ rack_init_sysctls(void)  	struct sysctl_oid *rack_probertt;  	struct sysctl_oid *rack_hw_pacing; -	rack_attack = SYSCTL_ADD_NODE(&rack_sysctl_ctx, -	    SYSCTL_CHILDREN(rack_sysctl_root), -	    OID_AUTO, -	    "sack_attack", -	    CTLFLAG_RW | CTLFLAG_MPSAFE, 0, -	    "Rack Sack Attack Counters and Controls");  	rack_counters = SYSCTL_ADD_NODE(&rack_sysctl_ctx,  	    SYSCTL_CHILDREN(rack_sysctl_root),  	    OID_AUTO, @@ -1535,11 +1499,6 @@ rack_init_sysctls(void)  	    "Do not disturb default for rack_rrr = 3");  	SYSCTL_ADD_S32(&rack_sysctl_ctx,  	    SYSCTL_CHILDREN(rack_misc), -	    OID_AUTO, "sad_seg_per", CTLFLAG_RW, -	    &sad_seg_size_per, 800, -	    "Percentage of segment size needed in a sack 800 = 80.0?"); -	SYSCTL_ADD_S32(&rack_sysctl_ctx, -	    SYSCTL_CHILDREN(rack_misc),  	    OID_AUTO, "rxt_controls", CTLFLAG_RW,  	    &rack_rxt_controls, 0,  	    "Retransmit sending size controls (valid  values 0, 1, 2 default=1)?"); @@ -1619,85 +1578,6 @@ rack_init_sysctls(void)  	    &rack_autosndbuf_inc, 20,  	    "What percentage should rack scale up its snd buffer by?"); - -	/* Sack Attacker detection stuff */ -	SYSCTL_ADD_U32(&rack_sysctl_ctx, -	    SYSCTL_CHILDREN(rack_attack), -	    OID_AUTO, "merge_out", CTLFLAG_RW, -	    &rack_merge_out_sacks_on_attack, 0, -	    "Do we merge the sendmap when we decide we are being attacked?"); - -	SYSCTL_ADD_U32(&rack_sysctl_ctx, -	    SYSCTL_CHILDREN(rack_attack), -	    OID_AUTO, "detect_highsackratio", CTLFLAG_RW, -	    &rack_highest_sack_thresh_seen, 0, -	    "Highest sack to ack ratio seen"); -	SYSCTL_ADD_U32(&rack_sysctl_ctx, -	    SYSCTL_CHILDREN(rack_attack), -	    OID_AUTO, "detect_highmoveratio", CTLFLAG_RW, -	    &rack_highest_move_thresh_seen, 0, -	    "Highest move to non-move ratio seen"); -	rack_ack_total = counter_u64_alloc(M_WAITOK); -	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, -	    SYSCTL_CHILDREN(rack_attack), -	    OID_AUTO, "acktotal", CTLFLAG_RD, -	    &rack_ack_total, -	    "Total number of Ack's"); -	rack_express_sack = counter_u64_alloc(M_WAITOK); -	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, -	    SYSCTL_CHILDREN(rack_attack), -	    OID_AUTO, "exp_sacktotal", CTLFLAG_RD, -	    &rack_express_sack, -	    "Total expresss number of Sack's"); -	rack_sack_total = counter_u64_alloc(M_WAITOK); -	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, -	    SYSCTL_CHILDREN(rack_attack), -	    OID_AUTO, "sacktotal", CTLFLAG_RD, -	    &rack_sack_total, -	    "Total number of SACKs"); -	rack_move_none = counter_u64_alloc(M_WAITOK); -	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, -	    SYSCTL_CHILDREN(rack_attack), -	    OID_AUTO, "move_none", CTLFLAG_RD, -	    &rack_move_none, -	    "Total number of SACK index reuse of positions under threshold"); -	rack_move_some = counter_u64_alloc(M_WAITOK); -	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, -	    SYSCTL_CHILDREN(rack_attack), -	    OID_AUTO, "move_some", CTLFLAG_RD, -	    &rack_move_some, -	    "Total number of SACK index reuse of positions over threshold"); -	rack_sack_attacks_detected = counter_u64_alloc(M_WAITOK); -	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, -	    SYSCTL_CHILDREN(rack_attack), -	    OID_AUTO, "attacks", CTLFLAG_RD, -	    &rack_sack_attacks_detected, -	    "Total number of SACK attackers that had sack disabled"); -	rack_sack_attacks_reversed = counter_u64_alloc(M_WAITOK); -	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, -	    SYSCTL_CHILDREN(rack_attack), -	    OID_AUTO, "reversed", CTLFLAG_RD, -	    &rack_sack_attacks_reversed, -	    "Total number of SACK attackers that were later determined false positive"); -	rack_sack_attacks_suspect = counter_u64_alloc(M_WAITOK); -	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, -	    SYSCTL_CHILDREN(rack_attack), -	    OID_AUTO, "suspect", CTLFLAG_RD, -	    &rack_sack_attacks_suspect, -	    "Total number of SACKs that triggered early detection"); - -	rack_sack_used_next_merge = counter_u64_alloc(M_WAITOK); -	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, -	    SYSCTL_CHILDREN(rack_attack), -	    OID_AUTO, "nextmerge", CTLFLAG_RD, -	    &rack_sack_used_next_merge, -	    "Total number of times we used the next merge"); -	rack_sack_used_prev_merge = counter_u64_alloc(M_WAITOK); -	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, -	    SYSCTL_CHILDREN(rack_attack), -	    OID_AUTO, "prevmerge", CTLFLAG_RD, -	    &rack_sack_used_prev_merge, -	    "Total number of times we used the prev merge");  	/* Counters */  	rack_total_bytes = counter_u64_alloc(M_WAITOK);  	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, @@ -1908,18 +1788,6 @@ rack_init_sysctls(void)  	    OID_AUTO, "sack_short", CTLFLAG_RD,  	    &rack_sack_proc_short,  	    "Total times we took shortcut for sack processing"); -	rack_sack_skipped_acked = counter_u64_alloc(M_WAITOK); -	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, -	    SYSCTL_CHILDREN(rack_attack), -	    OID_AUTO, "skipacked", CTLFLAG_RD, -	    &rack_sack_skipped_acked, -	    "Total number of times we skipped previously sacked"); -	rack_sack_splits = counter_u64_alloc(M_WAITOK); -	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, -	    SYSCTL_CHILDREN(rack_attack), -	    OID_AUTO, "ofsplit", CTLFLAG_RD, -	    &rack_sack_splits, -	    "Total number of times we did the old fashion tree split");  	rack_input_idle_reduces = counter_u64_alloc(M_WAITOK);  	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,  	    SYSCTL_CHILDREN(rack_counters), @@ -3319,16 +3187,6 @@ rack_counter_destroy(void)  	counter_u64_free(rack_hw_pace_lost);  	counter_u64_free(rack_non_fto_send);  	counter_u64_free(rack_extended_rfo); -	counter_u64_free(rack_ack_total); -	counter_u64_free(rack_express_sack); -	counter_u64_free(rack_sack_total); -	counter_u64_free(rack_move_none); -	counter_u64_free(rack_move_some); -	counter_u64_free(rack_sack_attacks_detected); -	counter_u64_free(rack_sack_attacks_reversed); -	counter_u64_free(rack_sack_attacks_suspect); -	counter_u64_free(rack_sack_used_next_merge); -	counter_u64_free(rack_sack_used_prev_merge);  	counter_u64_free(rack_tlp_tot);  	counter_u64_free(rack_tlp_newdata);  	counter_u64_free(rack_tlp_retran); @@ -3351,8 +3209,6 @@ rack_counter_destroy(void)  	counter_u64_free(rack_sack_proc_all);  	counter_u64_free(rack_sack_proc_restart);  	counter_u64_free(rack_sack_proc_short); -	counter_u64_free(rack_sack_skipped_acked); -	counter_u64_free(rack_sack_splits);  	counter_u64_free(rack_input_idle_reduces);  	counter_u64_free(rack_collapsed_win);  	counter_u64_free(rack_collapsed_win_rxt); @@ -4730,7 +4586,7 @@ rack_make_timely_judgement(struct tcp_rack *rack, uint32_t rtt, int32_t rtt_diff  	return (timely_says);  } -static __inline int +static inline int  rack_in_gp_window(struct tcpcb *tp, struct rack_sendmap *rsm)  {  	if (SEQ_GEQ(rsm->r_start, tp->gput_seq) && @@ -4767,7 +4623,7 @@ rack_in_gp_window(struct tcpcb *tp, struct rack_sendmap *rsm)  	return (0);  } -static __inline void +static inline void  rack_mark_in_gp_win(struct tcpcb *tp, struct rack_sendmap *rsm)  { @@ -4784,7 +4640,7 @@ rack_mark_in_gp_win(struct tcpcb *tp, struct rack_sendmap *rsm)  		rsm->r_flags &= ~RACK_IN_GP_WIN;  } -static __inline void +static inline void  rack_clear_gp_marks(struct tcpcb *tp, struct tcp_rack *rack)  {  	/* A GP measurement is ending, clear all marks on the send map*/ @@ -4802,7 +4658,7 @@ rack_clear_gp_marks(struct tcpcb *tp, struct tcp_rack *rack)  } -static __inline void +static inline void  rack_tend_gp_marks(struct tcpcb *tp, struct tcp_rack *rack)  {  	struct rack_sendmap *rsm = NULL; @@ -6864,6 +6720,18 @@ rack_mark_lost(struct tcpcb *tp,  	}  } +static inline void +rack_mark_nolonger_lost(struct tcp_rack *rack, struct rack_sendmap *rsm) +{ +	KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), +		("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack)); +	rsm->r_flags &= ~RACK_WAS_LOST; +	if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) +		rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start; +	else +		rack->r_ctl.rc_considered_lost = 0; +} +  /*   * RACK Timer, here we simply do logging and house keeping.   * the normal rack_output() function will call the @@ -7005,7 +6873,7 @@ rack_setup_offset_for_rsm(struct tcp_rack *rack, struct rack_sendmap *src_rsm, s  	rsm->orig_t_space = M_TRAILINGROOM(rsm->m);  } -static __inline void +static inline void  rack_clone_rsm(struct tcp_rack *rack, struct rack_sendmap *nrsm,  	       struct rack_sendmap *rsm, uint32_t start)  { @@ -8130,13 +7998,7 @@ rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack,  		 * remove the lost desgination and reduce the  		 * bytes considered lost.  		 */ -		rsm->r_flags &= ~RACK_WAS_LOST; -		KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), -			("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack)); -		if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) -			rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start; -		else -			rack->r_ctl.rc_considered_lost = 0; +		rack_mark_nolonger_lost(rack, rsm);  	}  	idx = rsm->r_rtr_cnt - 1;  	rsm->r_tim_lastsent[idx] = ts; @@ -9492,6 +9354,11 @@ do_rest_ofb:  				if (rsm->r_flags & RACK_WAS_LOST) {  					int my_chg; +					/* +					 * Note here we do not use our rack_mark_nolonger_lost() function +					 * since we are moving our data pointer around and the +					 * ack'ed side is already not considered lost. +					 */  					my_chg = (nrsm->r_end - nrsm->r_start);  					KASSERT((rack->r_ctl.rc_considered_lost >= my_chg),  						("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack)); @@ -9531,7 +9398,6 @@ do_rest_ofb:  					goto out;  				}  				rack_log_map_chg(tp, rack, &stack_map, rsm, next, MAP_SACK_M1, end, __LINE__); -				counter_u64_add(rack_sack_used_next_merge, 1);  				/* Postion for the next block */  				start = next->r_end;  				rsm = tqhash_next(rack->r_ctl.tqh, next); @@ -9563,7 +9429,6 @@ do_rest_ofb:  					 */  					goto out;  				} -				counter_u64_add(rack_sack_splits, 1);  				rack_clone_rsm(rack, nrsm, rsm, start);  				rsm->r_just_ret = 0;  #ifndef INVARIANTS @@ -9585,7 +9450,6 @@ do_rest_ofb:  			}  		} else {  			/* Already sacked this piece */ -			counter_u64_add(rack_sack_skipped_acked, 1);  			if (end == rsm->r_end) {  				/* Done with block */  				rsm = tqhash_next(rack->r_ctl.tqh, rsm); @@ -9659,16 +9523,11 @@ do_rest_ofb:  			changed += (rsm->r_end - rsm->r_start);  			/* You get a count for acking a whole segment or more */  			if (rsm->r_flags & RACK_WAS_LOST) { -				int my_chg; - -				my_chg = (rsm->r_end - rsm->r_start); -				rsm->r_flags &= ~RACK_WAS_LOST; -				KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), -					("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack)); -				if (my_chg <= rack->r_ctl.rc_considered_lost) -					rack->r_ctl.rc_considered_lost -= my_chg; -				else -					rack->r_ctl.rc_considered_lost = 0; +				/* +				 * Here we can use the inline function since +				 * the rsm is truly marked lost and now no longer lost. +				 */ +				rack_mark_nolonger_lost(rack, rsm);  			}  			rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start);  			if (rsm->r_in_tmap) /* should be true */ @@ -9690,8 +9549,6 @@ do_rest_ofb:  				rsm->r_in_tmap = 0;  			}  			rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_SACK_M3, end, __LINE__); -		} else { -			counter_u64_add(rack_sack_skipped_acked, 1);  		}  		if (end == rsm->r_end) {  			/* This block only - done, setup for next */ @@ -9851,6 +9708,10 @@ do_rest_ofb:  			if (rsm->r_flags & RACK_WAS_LOST) {  				int my_chg; +				/* +				 * Note here we are using hookery again so we can't +				 * use our rack_mark_nolonger_lost() function. +				 */  				my_chg = (nrsm->r_end - nrsm->r_start);  				KASSERT((rack->r_ctl.rc_considered_lost >= my_chg),  					("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack)); @@ -9866,7 +9727,6 @@ do_rest_ofb:  			}  			rack_log_map_chg(tp, rack, prev, &stack_map, rsm, MAP_SACK_M4, end, __LINE__);  			rsm = prev; -			counter_u64_add(rack_sack_used_prev_merge, 1);  		} else {  			/**  			 * This is the case where our previous @@ -9931,7 +9791,6 @@ do_rest_ofb:  			 * rsm      |---|         (acked)  			 * nrsm         |------|  (not acked)  			 */ -			counter_u64_add(rack_sack_splits, 1);  			rack_clone_rsm(rack, nrsm, rsm, end);  			rsm->r_flags &= (~RACK_HAS_FIN);  			rsm->r_just_ret = 0; @@ -9952,16 +9811,10 @@ do_rest_ofb:  			rack_update_rtt(tp, rack, rsm, to, cts, SACKED, 0);  			changed += (rsm->r_end - rsm->r_start);  			if (rsm->r_flags & RACK_WAS_LOST) { -				int my_chg; - -				my_chg = (rsm->r_end - rsm->r_start); -				rsm->r_flags &= ~RACK_WAS_LOST; -				KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), -					("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack)); -				if (my_chg <= rack->r_ctl.rc_considered_lost) -					rack->r_ctl.rc_considered_lost -= my_chg; -				else -					rack->r_ctl.rc_considered_lost = 0; +				/* +				 * Here it is safe to use our function. +				 */ +				rack_mark_nolonger_lost(rack, rsm);  			}  			rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start); @@ -9985,11 +9838,6 @@ do_rest_ofb:  				rsm->r_in_tmap = 0;  			}  		} -	} else if (start != end){ -		/* -		 * The block was already acked. -		 */ -		counter_u64_add(rack_sack_skipped_acked, 1);  	}  out:  	if (rsm && @@ -10362,13 +10210,7 @@ more:  			 * and yet before retransmitting we get an ack  			 * which can happen due to reordering.  			 */ -			rsm->r_flags &= ~RACK_WAS_LOST; -			KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), -				("rsm:%p rack:%p rc_considered_lost goes negative", rsm,  rack)); -			if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) -				rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start; -			else -				rack->r_ctl.rc_considered_lost = 0; +			rack_mark_nolonger_lost(rack, rsm);  		}  		rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_FREE, rsm->r_end, __LINE__);  		rack->r_ctl.rc_holes_rxt -= rsm->r_rtr_bytes; @@ -10476,12 +10318,7 @@ more:  		 * which can happen due to reordering. In this  		 * case its only a partial ack of the send.  		 */ -		KASSERT((rack->r_ctl.rc_considered_lost >= (th_ack - rsm->r_start)), -			("rsm:%p rack:%p rc_considered_lost goes negative th_ack:%u", rsm,  rack, th_ack)); -		if (rack->r_ctl.rc_considered_lost >= (th_ack - rsm->r_start)) -			rack->r_ctl.rc_considered_lost -= th_ack - rsm->r_start; -		else -			rack->r_ctl.rc_considered_lost = 0; +		rack_mark_nolonger_lost(rack, rsm);  	}  	/*  	 * Clear the dup ack count for @@ -10793,17 +10630,6 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered  	changed = 0;  	th_ack = th->th_ack;  	segsiz = ctf_fixed_maxseg(rack->rc_tp); -	if (BYTES_THIS_ACK(tp, th) >=  segsiz) { -		/* -		 * You only get credit for -		 * MSS and greater (and you get extra -		 * credit for larger cum-ack moves). -		 */ -		int ac; - -		ac = BYTES_THIS_ACK(tp, th) / ctf_fixed_maxseg(rack->rc_tp); -		counter_u64_add(rack_ack_total, ac); -	}  	if (SEQ_GT(th_ack, tp->snd_una)) {  		rack_log_progress_event(rack, tp, ticks, PROGRESS_UPDATE, __LINE__);  		tp->t_acktime = ticks; @@ -10875,8 +10701,8 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered  	if (sacks_seen != NULL)  		*sacks_seen = num_sack_blks;  	if (num_sack_blks == 0) { -		/* Nothing to sack, but we need to update counts */ -		goto out_with_totals; +		/* Nothing to sack */ +		goto out;  	}  	/* Its a sack of some sort */  	if (num_sack_blks < 2) { @@ -10899,7 +10725,7 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered  	 */  again:  	if (num_sack_blks == 0) -		goto out_with_totals; +		goto out;  	if (num_sack_blks > 1) {  		for (i = 0; i < num_sack_blks; i++) {  			for (j = i + 1; j < num_sack_blks; j++) { @@ -10952,19 +10778,7 @@ do_sack_work:  			changed += acked;  		}  		if (num_sack_blks == 1) { -			/* -			 * This is what we would expect from -			 * a normal implementation to happen -			 * after we have retransmitted the FR, -			 * i.e the sack-filter pushes down -			 * to 1 block and the next to be retransmitted -			 * is the sequence in the sack block (has more -			 * are acked). Count this as ACK'd data to boost -			 * up the chances of recovering any false positives. -			 */ -			counter_u64_add(rack_ack_total, (acked / ctf_fixed_maxseg(rack->rc_tp))); -			counter_u64_add(rack_express_sack, 1); -			goto out_with_totals; +			goto out;  		} else {  			/*  			 * Start the loop through the @@ -10973,7 +10787,6 @@ do_sack_work:  			loop_start = 1;  		}  	} -	counter_u64_add(rack_sack_total, 1);  	rsm = rack->r_ctl.rc_sacklast;  	for (i = loop_start; i < num_sack_blks; i++) {  		acked = rack_proc_sack_blk(tp, rack, &sack_blocks[i], to, &rsm, cts,  segsiz); @@ -10982,18 +10795,6 @@ do_sack_work:  			changed += acked;  		}  	} -out_with_totals: -	if (num_sack_blks > 1) { -		/* -		 * You get an extra stroke if -		 * you have more than one sack-blk, this -		 * could be where we are skipping forward -		 * and the sack-filter is still working, or -		 * it could be an attacker constantly -		 * moving us. -		 */ -		counter_u64_add(rack_move_some, 1); -	}  out:  	if (changed) {  		/* Something changed cancel the rack timer */ @@ -14713,7 +14514,6 @@ rack_init(struct tcpcb *tp, void **ptr)  	rack->r_ctl.rc_prr_sendalot = rack_send_a_lot_in_prr;  	rack->r_ctl.rc_min_to = rack_min_to;  	microuptime(&rack->r_ctl.act_rcv_time); -	rack->r_ctl.rc_last_time_decay = rack->r_ctl.act_rcv_time;  	rack->r_ctl.rack_per_of_gp_ss = rack_per_of_gp_ss;  	if (rack_hw_up_only)  		rack->r_up_only = 1; diff --git a/sys/netinet/tcp_stacks/tcp_rack.h b/sys/netinet/tcp_stacks/tcp_rack.h index 144b4fabf7eb..cac17d9aeb50 100644 --- a/sys/netinet/tcp_stacks/tcp_rack.h +++ b/sys/netinet/tcp_stacks/tcp_rack.h @@ -462,7 +462,6 @@ struct rack_control {  	uint64_t rc_gp_output_ts; /* chg*/  	uint64_t rc_gp_cumack_ts; /* chg*/  	struct timeval act_rcv_time; -	struct timeval rc_last_time_decay;	/* SAD time decay happened here */  	uint64_t gp_bw;  	uint64_t init_rate;  #ifdef NETFLIX_SHARED_CWND diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index c817c79881d6..b6f428b279b3 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -607,7 +607,7 @@ tcp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *inp,  		}  	}  	m->m_pkthdr.tcp_tun_port = port = uh->uh_sport; -	bcopy(th, uh, m->m_len - off); +	bcopy(th, uh, m->m_len - off - sizeof(struct udphdr));  	m->m_len -= sizeof(struct udphdr);  	m->m_pkthdr.len -= sizeof(struct udphdr);  	/* diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index be20fb44a820..3a7755e9f09e 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -1168,7 +1168,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,  		/*  		 * If listening socket requested TCP digests, check that  		 * received ACK has signature and it is correct. -		 * If not, drop the ACK and leave sc entry in th cache, +		 * If not, drop the ACK and leave sc entry in the cache,  		 * because SYN was received with correct signature.  		 */  		if (sc->sc_flags & SCF_SIGNATURE) { @@ -1380,6 +1380,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,  	struct tcpcb *tp;  	struct socket *rv = NULL;  	struct syncache *sc = NULL; +	struct ucred *cred;  	struct syncache_head *sch;  	struct mbuf *ipopts = NULL;  	u_int ltflags; @@ -1408,6 +1409,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,  	 */  	KASSERT(SOLISTENING(so), ("%s: %p not listening", __func__, so));  	tp = sototcpcb(so); +	cred = V_tcp_syncache.see_other ? NULL : crhold(so->so_cred);  #ifdef INET6  	if (inc->inc_flags & INC_ISIPV6) { @@ -1636,16 +1638,16 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,  	/*  	 * sc_cred is only used in syncache_pcblist() to list TCP endpoints in  	 * TCPS_SYN_RECEIVED state when V_tcp_syncache.see_other is false. -	 * Therefore, store the credentials and take a reference count only -	 * when needed: +	 * Therefore, store the credentials only when needed:  	 * - sc is allocated from the zone and not using the on stack instance.  	 * - the sysctl variable net.inet.tcp.syncache.see_other is false.  	 * The reference count is decremented when a zone allocated sc is  	 * freed in syncache_free().  	 */ -	if (sc != &scs && !V_tcp_syncache.see_other) -		sc->sc_cred = crhold(so->so_cred); -	else +	if (sc != &scs && !V_tcp_syncache.see_other) { +		sc->sc_cred = cred; +		cred = NULL; +	} else  		sc->sc_cred = NULL;  	sc->sc_port = port;  	sc->sc_ipopts = ipopts; @@ -1783,6 +1785,8 @@ donenoprobe:  		tcp_fastopen_decrement_counter(tfo_pending);  tfo_expanded: +	if (cred != NULL) +		crfree(cred);  	if (sc == NULL || sc == &scs) {  #ifdef MAC  		mac_syncache_destroy(&maclabel); diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index 3b9fe7a317b0..57c57666fa3a 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -513,9 +513,12 @@ tcp_timer_persist(struct tcpcb *tp)  	if (progdrop || (tp->t_rxtshift >= V_tcp_retries &&  	    (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||  	     ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff))) { -		if (!progdrop) +		if (progdrop) { +			tcp_log_end_status(tp, TCP_EI_STATUS_PROGRESS); +		} else {  			TCPSTAT_INC(tcps_persistdrop); -		tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); +			tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); +		}  		goto dropit;  	}  	/* diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index cea8a916679b..f1d952037d5a 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -787,7 +787,8 @@ udplite_ctlinput(struct icmp *icmp)  static int  udp_pcblist(SYSCTL_HANDLER_ARGS)  { -	struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_udbinfo, +	struct inpcbinfo *pcbinfo = udp_get_inpcbinfo(arg2); +	struct inpcb_iterator inpi = INP_ALL_ITERATOR(pcbinfo,  	    INPLOOKUP_RLOCKPCB);  	struct xinpgen xig;  	struct inpcb *inp; @@ -799,7 +800,7 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)  	if (req->oldptr == 0) {  		int n; -		n = V_udbinfo.ipi_count; +		n = pcbinfo->ipi_count;  		n += imax(n / 8, 10);  		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);  		return (0); @@ -810,8 +811,8 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)  	bzero(&xig, sizeof(xig));  	xig.xig_len = sizeof xig; -	xig.xig_count = V_udbinfo.ipi_count; -	xig.xig_gen = V_udbinfo.ipi_gencnt; +	xig.xig_count = pcbinfo->ipi_count; +	xig.xig_gen = pcbinfo->ipi_gencnt;  	xig.xig_sogen = so_gencnt;  	error = SYSCTL_OUT(req, &xig, sizeof xig);  	if (error) @@ -838,9 +839,9 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)  		 * that something happened while we were processing this  		 * request, and it might be necessary to retry.  		 */ -		xig.xig_gen = V_udbinfo.ipi_gencnt; +		xig.xig_gen = pcbinfo->ipi_gencnt;  		xig.xig_sogen = so_gencnt; -		xig.xig_count = V_udbinfo.ipi_count; +		xig.xig_count = pcbinfo->ipi_count;  		error = SYSCTL_OUT(req, &xig, sizeof xig);  	} @@ -848,10 +849,15 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)  }  SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, -    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, +    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, IPPROTO_UDP,      udp_pcblist, "S,xinpcb",      "List of active UDP sockets"); +SYSCTL_PROC(_net_inet_udplite, OID_AUTO, pcblist, +    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, IPPROTO_UDPLITE, +    udp_pcblist, "S,xinpcb", +    "List of active UDP-Lite sockets"); +  #ifdef INET  static int  udp_getcred(SYSCTL_HANDLER_ARGS) @@ -1166,7 +1172,19 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,  	else  		INP_RLOCK(inp);  	NET_EPOCH_ENTER(et); +#ifdef INET6 +	if ((flags & PRUS_IPV6) != 0) { +		if ((inp->in6p_outputopts != NULL) && +		    (inp->in6p_outputopts->ip6po_tclass != -1)) +			tos = (u_char)inp->in6p_outputopts->ip6po_tclass; +		else +			tos = 0; +	} else { +		tos = inp->inp_ip_tos; +	} +#else  	tos = inp->inp_ip_tos; +#endif  	if (control != NULL) {  		/*  		 * XXX: Currently, we assume all the optional information is @@ -1190,6 +1208,23 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,  			error = udp_v4mapped_pktinfo(cm, &src, inp, flags);  			if (error != 0)  				break; +			if (((flags & PRUS_IPV6) != 0) && +			    (cm->cmsg_level == IPPROTO_IPV6) && +			    (cm->cmsg_type == IPV6_TCLASS)) { +				int tclass; + +				if (cm->cmsg_len != CMSG_LEN(sizeof(int))) { +					error = EINVAL; +					break; +				} +				tclass = *(int *)CMSG_DATA(cm); +				if (tclass < -1 || tclass > 255) { +					error = EINVAL; +					break; +				} +				if (tclass != -1) +					tos = (u_char)tclass; +			}  #endif  			if (cm->cmsg_level != IPPROTO_IP)  				continue; diff --git a/sys/netinet/udp_var.h b/sys/netinet/udp_var.h index 3895f365db3c..3ae08fc0b8f0 100644 --- a/sys/netinet/udp_var.h +++ b/sys/netinet/udp_var.h @@ -147,6 +147,7 @@ void	kmod_udpstat_inc(int statnum);  	} while (0)  SYSCTL_DECL(_net_inet_udp); +SYSCTL_DECL(_net_inet_udplite);  VNET_DECLARE(struct inpcbinfo, udbinfo);  VNET_DECLARE(struct inpcbinfo, ulitecbinfo); | 
