diff options
| author | Lawrence Stewart <lstewart@FreeBSD.org> | 2011-01-24 23:08:38 +0000 | 
|---|---|---|
| committer | Lawrence Stewart <lstewart@FreeBSD.org> | 2011-01-24 23:08:38 +0000 | 
| commit | 050570efa79efcc9cf5adeb545f1a679c8dc377b (patch) | |
| tree | 8ecd0666e2a0cc863323b0b79a0e980cfd705e8d | |
| parent | 5f3b301a43c68d4a8eac808098c750af879fe6fb (diff) | |
Notes
| -rw-r--r-- | sys/modules/khelp/Makefile | 2 | ||||
| -rw-r--r-- | sys/modules/khelp/h_ertt/Makefile | 9 | ||||
| -rw-r--r-- | sys/netinet/khelp/h_ertt.c | 545 | ||||
| -rw-r--r-- | sys/netinet/khelp/h_ertt.h | 89 | 
4 files changed, 644 insertions, 1 deletions
diff --git a/sys/modules/khelp/Makefile b/sys/modules/khelp/Makefile index 4dfb082ac351..1901331a3c2f 100644 --- a/sys/modules/khelp/Makefile +++ b/sys/modules/khelp/Makefile @@ -1,5 +1,5 @@  # $FreeBSD$ -SUBDIR= +SUBDIR=	h_ertt  .include <bsd.subdir.mk> diff --git a/sys/modules/khelp/h_ertt/Makefile b/sys/modules/khelp/h_ertt/Makefile new file mode 100644 index 000000000000..33d8c7b0e765 --- /dev/null +++ b/sys/modules/khelp/h_ertt/Makefile @@ -0,0 +1,9 @@ +# $FreeBSD$ + +.include <bsd.own.mk> + +.PATH: ${.CURDIR}/../../../netinet/khelp +KMOD=	h_ertt +SRCS=	h_ertt.c + +.include <bsd.kmod.mk> diff --git a/sys/netinet/khelp/h_ertt.c b/sys/netinet/khelp/h_ertt.c new file mode 100644 index 000000000000..b5dcfe045ab2 --- /dev/null +++ b/sys/netinet/khelp/h_ertt.c @@ -0,0 +1,545 @@ +/*- + * Copyright (c) 2009-2010 + * 	Swinburne University of Technology, Melbourne, Australia + * Copyright (c) 2010 Lawrence Stewart <lstewart@freebsd.org> + * Copyright (c) 2010-2011 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed at the Centre for Advanced Internet + * Architectures, Swinburne University, by David Hayes, made possible in part by + * a grant from the Cisco University Research Program Fund at Community + * Foundation Silicon Valley. + * + * Portions of this software were developed at the Centre for Advanced + * Internet Architectures, Swinburne University of Technology, Melbourne, + * Australia by David Hayes under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/mbuf.h> +#include <sys/module.h> +#include <sys/hhook.h> +#include <sys/khelp.h> +#include <sys/module_khelp.h> +#include <sys/socket.h> +#include <sys/sockopt.h> + +#include <net/vnet.h> + +#include <netinet/in.h> +#include <netinet/in_pcb.h> +#include <netinet/tcp_seq.h> +#include <netinet/tcp_var.h> + +#include <netinet/khelp/h_ertt.h> + +#include <vm/uma.h> + +uma_zone_t txseginfo_zone; + +/* Smoothing factor for delayed ack guess. */ +#define	DLYACK_SMOOTH	5 + +/* Max number of time stamp errors allowed in a session. */ +#define	MAX_TS_ERR	10 + +static int ertt_packet_measurement_hook(int hhook_type, int hhook_id, +    void *udata, void *ctx_data, void *hdata, struct osd *hosd); +static int ertt_add_tx_segment_info_hook(int hhook_type, int hhook_id, +    void *udata, void *ctx_data, void *hdata, struct osd *hosd); +static int ertt_mod_init(void); +static int ertt_mod_destroy(void); +static int ertt_uma_ctor(void *mem, int size, void *arg, int flags); +static void ertt_uma_dtor(void *mem, int size, void *arg); + +/* + * Contains information about the sent segment for comparison with the + * corresponding ack. + */ +struct txseginfo { +	/* Segment length. */ +	long		len; +	/* Segment sequence number. */ +	tcp_seq		seq; +	/* Time stamp indicating when the packet was sent. */ +	uint32_t	tx_ts; +	/* Last received receiver ts (if the TCP option is used). */ +	uint32_t	rx_ts; +	uint32_t	flags; +	TAILQ_ENTRY (txseginfo) txsegi_lnk; +}; + +/* Flags for struct txseginfo. */ +#define	TXSI_TSO		0x01 /* TSO was used for this entry. */ +#define	TXSI_RTT_MEASURE_START	0x02 /* Start a per RTT measurement. */ +#define	TXSI_RX_MEASURE_END	0x04 /* Measure the rx rate until this txsi. */ + +struct helper ertt_helper = { +	.mod_init = ertt_mod_init, +	.mod_destroy = ertt_mod_destroy, +	.h_flags = HELPER_NEEDS_OSD, +	.h_classes = HELPER_CLASS_TCP +}; + +/* Define the helper hook info required by ERTT. */ +struct hookinfo ertt_hooks[] = { +	{ +		.hook_type = HHOOK_TYPE_TCP, +		.hook_id = HHOOK_TCP_EST_IN, +		.hook_udata = NULL, +		.hook_func = &ertt_packet_measurement_hook +	}, +	{ +		.hook_type = HHOOK_TYPE_TCP, +		.hook_id = HHOOK_TCP_EST_OUT, +		.hook_udata = NULL, +		.hook_func = &ertt_add_tx_segment_info_hook +	} +}; + +/* Flags to indicate how marked_packet_rtt should handle this txsi. */ +#define	MULTI_ACK		0x01 /* More than this txsi is acked. */ +#define	OLD_TXSI		0x02 /* TXSI is old according to timestamps. */ +#define	CORRECT_ACK		0X04 /* Acks this TXSI. */ +#define	FORCED_MEASUREMENT	0X08 /* Force an RTT measurement. */ + +/* + * This fuction measures the RTT of a particular segment/ack pair, or the next + * closest if this will yield an inaccurate result due to delayed acking or + * other issues. + */ +static void inline +marked_packet_rtt(struct txseginfo *txsi, struct ertt *e_t, struct tcpcb *tp, +    uint32_t *pmeasurenext, int *pmeasurenext_len, int *prtt_bytes_adjust, +    int mflag) +{ + +	/* +	 * If we can't measure this one properly due to delayed acking adjust +	 * byte counters and flag to measure next txsi. Note that since the +	 * marked packet's transmitted bytes are measured we need to subtract the +	 * transmitted bytes. Then pretend the next txsi was marked. +	 */ +	if (mflag & (MULTI_ACK|OLD_TXSI)) { +		*pmeasurenext = txsi->tx_ts; +		*pmeasurenext_len = txsi->len; +		*prtt_bytes_adjust += *pmeasurenext_len; +	} else { +		if (mflag & FORCED_MEASUREMENT) { +			e_t->markedpkt_rtt = ticks - *pmeasurenext + 1; +			e_t->bytes_tx_in_marked_rtt = e_t->bytes_tx_in_rtt + +			    *pmeasurenext_len - *prtt_bytes_adjust; +		} else { +			e_t->markedpkt_rtt = ticks - txsi->tx_ts + 1; +			e_t->bytes_tx_in_marked_rtt = e_t->bytes_tx_in_rtt - +			    *prtt_bytes_adjust; +		} +		e_t->marked_snd_cwnd = tp->snd_cwnd; + +		/* +		 * Reset the ERTT_MEASUREMENT_IN_PROGRESS flag to indicate to +		 * add_tx_segment_info that a new measurement should be started. +		 */ +		e_t->flags &= ~ERTT_MEASUREMENT_IN_PROGRESS; +		/* +		 * Set ERTT_NEW_MEASUREMENT to tell the congestion control +		 * algorithm that a new marked RTT measurement has has been made +		 * and is available for use. +		 */ +		e_t->flags |= ERTT_NEW_MEASUREMENT; + +		if (tp->t_flags & TF_TSO) { +			/* Temporarily disable TSO to aid a new measurment. */ +			tp->t_flags &= ~TF_TSO; +			/* Keep track that we've disabled it. */ +			e_t->flags |= ERTT_TSO_DISABLED; +		} +	} +} + +/* + * Ertt_packet_measurements uses a small amount of state kept on each packet + * sent to match incoming acknowledgements. This enables more accurate and + * secure round trip time measurements. The resulting measurement is used for + * congestion control algorithms which require a more accurate time. + * Ertt_packet_measurements is called via the helper hook in tcp_input.c + */ +static int +ertt_packet_measurement_hook(int hhook_type, int hhook_id, void *udata, +    void *ctx_data, void *hdata, struct osd *hosd) +{ +	struct ertt *e_t; +	struct tcpcb *tp; +	struct tcphdr *th; +	struct tcpopt *to; +	struct tcp_hhook_data *thdp; +	struct txseginfo *txsi; +	int acked, measurenext_len, multiack, new_sacked_bytes, rtt_bytes_adjust; +	uint32_t measurenext, rts; +	tcp_seq ack; + +	KASSERT(ctx_data != NULL, ("%s: ctx_data is NULL!", __func__)); +	KASSERT(hdata != NULL, ("%s: hdata is NULL!", __func__)); + +	e_t = (struct ertt *)hdata; +	thdp = ctx_data; +	tp = thdp->tp; +	th = thdp->th; +	to = thdp->to; +	new_sacked_bytes = (tp->sackhint.last_sack_ack != 0); +	measurenext = measurenext_len = multiack = rts = rtt_bytes_adjust = 0; +	acked = th->th_ack - tp->snd_una; + +	INP_WLOCK_ASSERT(tp->t_inpcb); + +	/* Packet has provided new acknowledgements. */ +	if (acked > 0 || new_sacked_bytes) { +		if (acked == 0 && new_sacked_bytes) { +			/* Use last sacked data. */ +			ack = tp->sackhint.last_sack_ack; +		} else +			ack = th->th_ack; + +		txsi = TAILQ_FIRST(&e_t->txsegi_q); +		while (txsi != NULL) { +			rts = 0; + +			/* Acknowledgement is acking more than this txsi. */ +			if (SEQ_GT(ack, txsi->seq + txsi->len)) { +				if (txsi->flags & TXSI_RTT_MEASURE_START || +				    measurenext) { +					marked_packet_rtt(txsi, e_t, tp, +					    &measurenext, &measurenext_len, +					    &rtt_bytes_adjust, MULTI_ACK); +				} +				TAILQ_REMOVE(&e_t->txsegi_q, txsi, txsegi_lnk); +				uma_zfree(txseginfo_zone, txsi); +				txsi = TAILQ_FIRST(&e_t->txsegi_q); +				continue; +			} + +			/* +			 * Guess if delayed acks are being used by the receiver. +			 * +			 * XXXDH: A simple heuristic that could be improved +			 */ +			if (!new_sacked_bytes) { +				if (acked > tp->t_maxseg) { +					e_t->dlyack_rx += +					    (e_t->dlyack_rx < DLYACK_SMOOTH) ? +					    1 : 0; +					multiack = 1; +				} else if (acked > txsi->len) { +					multiack = 1; +					e_t->dlyack_rx += +					    (e_t->dlyack_rx < DLYACK_SMOOTH) ? +					    1 : 0; +				} else if (acked == tp->t_maxseg || +					   acked == txsi->len) { +					e_t->dlyack_rx -= +					    (e_t->dlyack_rx > 0) ? 1 : 0; +				} +				/* Otherwise leave dlyack_rx the way it was. */ +			} + +			/* +			 * Time stamps are only to help match the txsi with the +			 * received acknowledgements. +			 */ +			if (e_t->timestamp_errors < MAX_TS_ERR && +			    (to->to_flags & TOF_TS) != 0 && to->to_tsecr) { +				/* +				 * Note: All packets sent with the offload will +				 * have the same time stamp. If we are sending +				 * on a fast interface and the t_maxseg is much +				 * smaller than one tick, this will be fine. The +				 * time stamp would be the same whether we were +				 * using tso or not. However, if the interface +				 * is slow, this will cause problems with the +				 * calculations. If the interface is slow, there +				 * is not reason to be using tso, and it should +				 * be turned off. +				 */ +				/* +				 * If there are too many time stamp errors, time +				 * stamps won't be trusted +				 */ +				rts = to->to_tsecr; +				/* Before this packet. */ +				if (!e_t->dlyack_rx && TSTMP_LT(rts, txsi->tx_ts)) +					/* When delayed acking is used, the +					 * reflected time stamp is of the first +					 * packet and thus may be before +					 * txsi->tx_ts. +					 */ +					break; +				if (TSTMP_GT(rts, txsi->tx_ts)) { +					/* +					 * If reflected time stamp is later than +					 * tx_tsi, then this txsi is old. +					 */ +					if (txsi->flags & TXSI_RTT_MEASURE_START +					    || measurenext) { +						marked_packet_rtt(txsi, e_t, tp, +						    &measurenext, &measurenext_len, +						    &rtt_bytes_adjust, OLD_TXSI); +					} +					TAILQ_REMOVE(&e_t->txsegi_q, txsi, +					    txsegi_lnk); +					uma_zfree(txseginfo_zone, txsi); +					txsi = TAILQ_FIRST(&e_t->txsegi_q); +					continue; +				} +				if (rts == txsi->tx_ts && +				    TSTMP_LT(to->to_tsval, txsi->rx_ts)) { +					/* +					 * Segment received before sent! +					 * Something is wrong with the received +					 * timestamps so increment errors. If +					 * this keeps up we will ignore +					 * timestamps. +					 */ +					e_t->timestamp_errors++; +				} +			} +			/* +			 * Acknowledging a sequence number before this txsi. +			 * If it is an old txsi that may have had the same seq +			 * numbers, it should have been removed if time stamps +			 * are being used. +			 */ +			if (SEQ_LEQ(ack, txsi->seq)) +				break; /* Before first packet in txsi. */ + +			/* +			 * Only ack > txsi->seq and ack <= txsi->seq+txsi->len +			 * past this point. +			 * +			 * If delayed acks are being used, an acknowledgement +			 * for a single segment will have been delayed by the +			 * receiver and will yield an inaccurate measurement. In +			 * this case, we only make the measurement if more than +			 * one segment is being acknowledged or sack is +			 * currently being used. +			 */ +			if (!e_t->dlyack_rx || multiack || new_sacked_bytes) { +				/* Make an accurate new measurement. */ +				e_t->rtt = ticks - txsi->tx_ts + 1; + +				if (e_t->rtt < e_t->minrtt || e_t->minrtt == 0) +					e_t->minrtt = e_t->rtt; + +				if (e_t->rtt > e_t->maxrtt || e_t->maxrtt == 0) +					e_t->maxrtt = e_t->rtt; +			} + +			if (txsi->flags & TXSI_RTT_MEASURE_START || measurenext) +				marked_packet_rtt(txsi, e_t, tp, +				    &measurenext, &measurenext_len, +				    &rtt_bytes_adjust, CORRECT_ACK); + +			if (txsi->flags & TXSI_TSO) { +				txsi->len -= acked; +				if (txsi->len > 0) { +					/* +					 * This presumes ack for first bytes in +					 * txsi, this may not be true but it +					 * shouldn't cause problems for the +					 * timing. +					 * +					 * We remeasure RTT even though we only +					 * have a single txsi. The rationale +					 * behind this is that it is better to +					 * have a slightly inaccurate +					 * measurement than no additional +					 * measurement for the rest of the bulk +					 * transfer. Since TSO is only used on +					 * high speed interface cards, so the +					 * packets should be transmitted at line +					 * rate back to back with little +					 * difference in transmission times (in +					 * ticks). +					 */ +					txsi->seq += acked; +					/* +					 * Reset txsi measure flag so we don't +					 * use it for another RTT measurement. +					 */ +					txsi->flags &= ~TXSI_RTT_MEASURE_START; +					/* +					 * There is still more data to be acked +					 * from tso bulk transmission, so we +					 * won't remove it from the TAILQ yet. +					 */ +					break; +				} +			} + +			TAILQ_REMOVE(&e_t->txsegi_q, txsi, txsegi_lnk); +			uma_zfree(txseginfo_zone, txsi); +			break; +		} + +		if (measurenext) { +			/* +			 * We need to do a RTT measurement. It won't be the best +			 * if we do it here. +			 */ +			marked_packet_rtt(txsi, e_t, tp, +			    &measurenext, &measurenext_len, +			    &rtt_bytes_adjust, FORCED_MEASUREMENT); +		} +	} + +	return (0); +} + +/* + * Add information about a transmitted segment to a list. + * This is called via the helper hook in tcp_output.c + */ +static int +ertt_add_tx_segment_info_hook(int hhook_type, int hhook_id, void *udata, +    void *ctx_data, void *hdata, struct osd *hosd) +{ +	struct ertt *e_t; +	struct tcpcb *tp; +	struct tcphdr *th; +	struct tcpopt *to; +	struct tcp_hhook_data *thdp; +	struct txseginfo *txsi; +	long len; +	int tso; + +	KASSERT(ctx_data != NULL, ("%s: ctx_data is NULL!", __func__)); +	KASSERT(hdata != NULL, ("%s: hdata is NULL!", __func__)); + +	e_t = (struct ertt *)hdata; +	thdp = ctx_data; +	tp = thdp->tp; +	th = thdp->th; +	to = thdp->to; +	len = thdp->len; +	tso = thdp->tso; + +	INP_WLOCK_ASSERT(tp->t_inpcb); + +	if (len > 0) { +		txsi = uma_zalloc(txseginfo_zone, M_NOWAIT); +		if (txsi != NULL) { +			/* Construct txsi setting the necessary flags. */ +			txsi->flags = 0; /* Needs to be initialised. */ +			txsi->seq = ntohl(th->th_seq); +			txsi->len = len; +			if (tso) +				txsi->flags |= TXSI_TSO; +			else if (e_t->flags & ERTT_TSO_DISABLED) { +				tp->t_flags |= TF_TSO; +				e_t->flags &= ~ERTT_TSO_DISABLED; +			} + +			if (e_t->flags & ERTT_MEASUREMENT_IN_PROGRESS) { +				e_t->bytes_tx_in_rtt += len; +			} else { +				txsi->flags |= TXSI_RTT_MEASURE_START; +				e_t->flags |= ERTT_MEASUREMENT_IN_PROGRESS; +				e_t->bytes_tx_in_rtt = len; +			} + +			if (((tp->t_flags & TF_NOOPT) == 0) && +			    (to->to_flags & TOF_TS)) { +				txsi->tx_ts = ntohl(to->to_tsval) - +				    tp->ts_offset; +				txsi->rx_ts = ntohl(to->to_tsecr); +			} else { +				txsi->tx_ts = ticks; +				txsi->rx_ts = 0; /* No received time stamp. */ +			} +			TAILQ_INSERT_TAIL(&e_t->txsegi_q, txsi, txsegi_lnk); +		} +	} + +	return (0); +} + +static int +ertt_mod_init(void) +{ + +	txseginfo_zone = uma_zcreate("ertt_txseginfo", sizeof(struct txseginfo), +	    NULL, NULL, NULL, NULL, 0, 0); + +	return (0); +} + +static int +ertt_mod_destroy(void) +{ + +	uma_zdestroy(txseginfo_zone); + +	return (0); +} + +static int +ertt_uma_ctor(void *mem, int size, void *arg, int flags) +{ +	struct ertt *e_t; + +	e_t = mem; + +	TAILQ_INIT(&e_t->txsegi_q); +	e_t->timestamp_errors = 0; +	e_t->minrtt = 0; +	e_t->maxrtt = 0; +	e_t->rtt = 0; +	e_t->flags = 0; +	e_t->dlyack_rx = 0; +	e_t->bytes_tx_in_rtt = 0; +	e_t->markedpkt_rtt = 0; + +	return (0); +} + +static void +ertt_uma_dtor(void *mem, int size, void *arg) +{ +	struct ertt *e_t; +	struct txseginfo *n_txsi, *txsi; + +	e_t = mem; +	txsi = TAILQ_FIRST(&e_t->txsegi_q); +	while (txsi != NULL) { +		n_txsi = TAILQ_NEXT(txsi, txsegi_lnk); +		uma_zfree(txseginfo_zone, txsi); +		txsi = n_txsi; +	} +} + +KHELP_DECLARE_MOD_UMA(ertt, &ertt_helper, ertt_hooks, 1, sizeof(struct ertt), +    ertt_uma_ctor, ertt_uma_dtor); diff --git a/sys/netinet/khelp/h_ertt.h b/sys/netinet/khelp/h_ertt.h new file mode 100644 index 000000000000..8051a6c0fa78 --- /dev/null +++ b/sys/netinet/khelp/h_ertt.h @@ -0,0 +1,89 @@ +/*- + * Copyright (c) 2009-2010 + * 	Swinburne University of Technology, Melbourne, Australia + * Copyright (c) 2010 Lawrence Stewart <lstewart@freebsd.org> + * All rights reserved. + * + * This software was developed at the Centre for Advanced Internet + * Architectures, Swinburne University, by David Hayes, made possible in part by + * a grant from the Cisco University Research Program Fund at Community + * Foundation Silicon Valley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * The ERTT (Enhanced Round Trip Time) Khelp module calculates an estimate of + * the instantaneous TCP RTT which, for example, is used by delay-based + * congestion control schemes. When the module is loaded, ERTT data is + * calculated for each active TCP connection and encapsulated within a + * "struct ertt". + * + * This software was first released in 2010 by David Hayes and Lawrence Stewart + * whilst working on the NewTCP research project at Swinburne University's + * Centre for Advanced Internet Architectures, Melbourne, Australia, which was + * made possible in part by a grant from the Cisco University Research Program + * Fund at Community Foundation Silicon Valley. Testing and development was + * further assisted by a grant from the FreeBSD Foundation. More details are + * available at: + *   http://caia.swin.edu.au/urp/newtcp/ + */ + +#ifndef	_NETINET_KHELP_H_ERTT_ +#define	_NETINET_KHELP_H_ERTT_ + +struct txseginfo; + +/* Structure used as the ertt data block. */ +struct ertt { +	/* Information about transmitted segments to aid in RTT calculation. */ +	TAILQ_HEAD(txseginfo_head, txseginfo) txsegi_q; +	/* Bytes TX so far in marked RTT. */ +	long		bytes_tx_in_rtt; +	/* Final version of above. */ +	long		bytes_tx_in_marked_rtt; +	/* cwnd for marked RTT. */ +	unsigned long	marked_snd_cwnd; +	/* Per-packet measured RTT. */ +	int		rtt; +	/* Maximum RTT measured. */ +	int		maxrtt; +	/* Minimum RTT measured. */ +	int		minrtt; +	/* Guess if the receiver is using delayed ack. */ +	int		dlyack_rx; +	/* Keep track of inconsistencies in packet timestamps. */ +	int		timestamp_errors; +	/* RTT for a marked packet. */ +	int		markedpkt_rtt; +	/* Flags to signal conditions between hook function calls. */ +	uint32_t	flags; +}; + +/* Flags for struct ertt. */ +#define	ERTT_NEW_MEASUREMENT		0x01 +#define	ERTT_MEASUREMENT_IN_PROGRESS	0x02 +#define	ERTT_TSO_DISABLED		0x04 + +#endif /* _NETINET_KHELP_H_ERTT_ */  | 
