summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/netinet/tcp_output.c129
-rw-r--r--sys/netinet/tcp_syncache.c32
2 files changed, 106 insertions, 55 deletions
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index 37a4ef1f94dd..c6ce9e28536e 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -545,19 +545,6 @@ send:
(void)memcpy(opt + 2, &mss, sizeof(mss));
optlen = TCPOLEN_MAXSEG;
- /*
- * If this is the first SYN of connection (not a SYN
- * ACK), include SACK_PERMIT_HDR option. If this is a
- * SYN ACK, include SACK_PERMIT_HDR option if peer has
- * already done so. This is only for active connect,
- * since the syncache takes care of the passive connect.
- */
- if (tp->sack_enable && ((flags & TH_ACK) == 0 ||
- (tp->t_flags & TF_SACK_PERMIT))) {
- *((u_int32_t *) (opt + optlen)) =
- htonl(TCPOPT_SACK_PERMIT_HDR);
- optlen += 4;
- }
if ((tp->t_flags & TF_REQ_SCALE) &&
((flags & TH_ACK) == 0 ||
(tp->t_flags & TF_RCVD_SCALE))) {
@@ -589,33 +576,6 @@ send:
optlen += TCPOLEN_TSTAMP_APPA;
}
- /*
- * Send SACKs if necessary. This should be the last option processed.
- * Only as many SACKs are sent as are permitted by the maximum options
- * size. No more than three SACKs are sent.
- */
- if (tp->sack_enable && tp->t_state == TCPS_ESTABLISHED &&
- (tp->t_flags & (TF_SACK_PERMIT|TF_NOOPT)) == TF_SACK_PERMIT &&
- tp->rcv_numsacks) {
- u_int32_t *lp = (u_int32_t *)(opt + optlen);
- u_int32_t *olp = lp++;
- int count = 0; /* actual number of SACKs inserted */
- int maxsack = (MAX_TCPOPTLEN - (optlen + 4))/TCPOLEN_SACK;
-
- tcpstat.tcps_sack_send_blocks++;
- maxsack = min(maxsack, TCP_MAX_SACK);
- for (i = 0; (i < tp->rcv_numsacks && count < maxsack); i++) {
- struct sackblk sack = tp->sackblks[i];
- if (sack.start == 0 && sack.end == 0)
- continue;
- *lp++ = htonl(sack.start);
- *lp++ = htonl(sack.end);
- count++;
- }
- *olp = htonl(TCPOPT_SACK_HDR|(TCPOLEN_SACK*count+2));
- optlen += TCPOLEN_SACK*count + 4; /* including leading NOPs */
- }
-
#ifdef TCP_SIGNATURE
#ifdef INET6
if (!isipv6)
@@ -632,14 +592,93 @@ send:
for (i = 0; i < TCP_SIGLEN; i++)
*bp++ = 0;
optlen += TCPOLEN_SIGNATURE;
-
- /* Terminate options list and maintain 32-bit alignment. */
- *bp++ = TCPOPT_NOP;
- *bp++ = TCPOPT_EOL;
- optlen += 2;
}
#endif /* TCP_SIGNATURE */
+ if (tp->sack_enable && ((tp->t_flags & TF_NOOPT) == 0)) {
+ /*
+ * Tack on the SACK permitted option *last*.
+ * And do padding of options after tacking this on.
+ * This is because of MSS, TS, WinScale and Signatures are
+ * all present, we have just 2 bytes left for the SACK
+ * permitted option, which is just enough.
+ */
+ /*
+ * If this is the first SYN of connection (not a SYN
+ * ACK), include SACK permitted option. If this is a
+ * SYN ACK, include SACK permitted option if peer has
+ * already done so. This is only for active connect,
+ * since the syncache takes care of the passive connect.
+ */
+ if ((flags & TH_SYN) &&
+ (!(flags & TH_ACK) || (tp->t_flags & TF_SACK_PERMIT))) {
+ u_char *bp;
+ bp = (u_char *)opt + optlen;
+
+ *bp++ = TCPOPT_SACK_PERMITTED;
+ *bp++ = TCPOLEN_SACK_PERMITTED;
+ optlen += TCPOLEN_SACK_PERMITTED;
+ }
+
+ /*
+ * Send SACKs if necessary. This should be the last
+ * option processed. Only as many SACKs are sent as
+ * are permitted by the maximum options size.
+ *
+ * In general, SACK blocks consume 8*n+2 bytes.
+ * So a full size SACK blocks option is 34 bytes
+ * (to generate 4 SACK blocks). At a minimum,
+ * we need 10 bytes (to generate 1 SACK block).
+ * If TCP Timestamps (12 bytes) and TCP Signatures
+ * (18 bytes) are both present, we'll just have
+ * 10 bytes for SACK options 40 - (12 + 18).
+ */
+ if (TCPS_HAVEESTABLISHED(tp->t_state) &&
+ (tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0 &&
+ MAX_TCPOPTLEN - optlen - 2 >= TCPOLEN_SACK) {
+ int nsack, sackoptlen, padlen;
+ u_char *bp = (u_char *)opt + optlen;
+ u_int32_t *lp;
+
+ nsack = (MAX_TCPOPTLEN - optlen - 2) / TCPOLEN_SACK;
+ nsack = min(nsack, tp->rcv_numsacks);
+ sackoptlen = (2 + nsack * TCPOLEN_SACK);
+
+ /*
+ * First we need to pad options so that the
+ * SACK blocks can start at a 4-byte boundary
+ * (sack option and length are at a 2 byte offset).
+ */
+ padlen = (MAX_TCPOPTLEN - optlen - sackoptlen) % 4;
+ optlen += padlen;
+ while (padlen-- > 0)
+ *bp++ = TCPOPT_NOP;
+
+ tcpstat.tcps_sack_send_blocks++;
+ *bp++ = TCPOPT_SACK;
+ *bp++ = sackoptlen;
+ lp = (u_int32_t *)bp;
+ for (i = 0; i < nsack; i++) {
+ struct sackblk sack = tp->sackblks[i];
+ *lp++ = htonl(sack.start);
+ *lp++ = htonl(sack.end);
+ }
+ optlen += sackoptlen;
+ }
+ }
+
+ /* Pad TCP options to a 4 byte boundary */
+ if (optlen < MAX_TCPOPTLEN && (optlen % sizeof(u_int32_t))) {
+ int pad = sizeof(u_int32_t) - (optlen % sizeof(u_int32_t));
+ u_char *bp = (u_char *)opt + optlen;
+
+ optlen += pad;
+ while (pad) {
+ *bp++ = TCPOPT_EOL;
+ pad--;
+ }
+ }
+
hdrlen += optlen;
#ifdef INET6
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index b95efb66c20a..1e46d7063399 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -983,9 +983,14 @@ syncache_add(inc, to, th, sop, m)
* XXX Currently we always record the option by default and will
* attempt to use it in syncache_respond().
*/
+#if 1
+ if (tp->t_flags & TF_SIGNATURE)
+ sc->sc_flags |= SCF_SIGNATURE;
+#else
if (to->to_flags & TOF_SIGNATURE)
sc->sc_flags = SCF_SIGNATURE;
#endif
+#endif
if (to->to_flags & TOF_SACK)
sc->sc_flags |= SCF_SACK;
@@ -1051,10 +1056,12 @@ syncache_respond(sc, m)
((sc->sc_flags & SCF_WINSCALE) ? 4 : 0) +
((sc->sc_flags & SCF_TIMESTAMP) ? TCPOLEN_TSTAMP_APPA : 0);
#ifdef TCP_SIGNATURE
- optlen += (sc->sc_flags & SCF_SIGNATURE) ?
- TCPOLEN_SIGNATURE + 2 : 0;
+ if (sc->sc_flags & SCF_SIGNATURE)
+ optlen += TCPOLEN_SIGNATURE;
#endif
- optlen += ((sc->sc_flags & SCF_SACK) ? 4 : 0);
+ if (sc->sc_flags & SCF_SACK)
+ optlen += TCPOLEN_SACK_PERMITTED;
+ optlen = roundup2(optlen, 4);
}
tlen = hlen + sizeof(struct tcphdr) + optlen;
@@ -1175,16 +1182,21 @@ syncache_respond(sc, m)
*bp++ = 0;
tcp_signature_compute(m, sizeof(struct ip), 0, optlen,
optp + 2, IPSEC_DIR_OUTBOUND);
- *bp++ = TCPOPT_NOP;
- *bp++ = TCPOPT_EOL;
- optp += TCPOLEN_SIGNATURE + 2;
+ optp += TCPOLEN_SIGNATURE;
}
#endif /* TCP_SIGNATURE */
- if (sc->sc_flags & SCF_SACK) {
- *(u_int32_t *)optp = htonl(TCPOPT_SACK_PERMIT_HDR);
- optp += 4;
- }
+ if (sc->sc_flags & SCF_SACK) {
+ *optp++ = TCPOPT_SACK_PERMITTED;
+ *optp++ = TCPOLEN_SACK_PERMITTED;
+ }
+
+ {
+ /* Pad TCP options to a 4 byte boundary */
+ int padlen = optlen - (optp - (u_int8_t *)(th + 1));
+ while (padlen-- > 0)
+ *optp++ = TCPOPT_EOL;
+ }
}
#ifdef INET6