aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Tuexen <tuexen@FreeBSD.org>2021-04-18 14:08:08 +0000
committerMichael Tuexen <tuexen@FreeBSD.org>2021-04-18 14:16:42 +0000
commit9e644c23000c2f5028b235f6263d17ffb24d3605 (patch)
tree846286d2c178f8c442c7bd711ceb0a50a9c06f9e
parent136f6b6c0cc1343a7637c3250ff9dd0eced4b4d0 (diff)
downloadsrc-9e644c23000c2f5028b235f6263d17ffb24d3605.tar.gz
src-9e644c23000c2f5028b235f6263d17ffb24d3605.zip
-rw-r--r--share/man/man4/tcp.415
-rw-r--r--sys/netinet/tcp.h1
-rw-r--r--sys/netinet/tcp_input.c47
-rw-r--r--sys/netinet/tcp_output.c80
-rw-r--r--sys/netinet/tcp_stacks/bbr.c38
-rw-r--r--sys/netinet/tcp_stacks/rack.c26
-rw-r--r--sys/netinet/tcp_subr.c462
-rw-r--r--sys/netinet/tcp_syncache.c127
-rw-r--r--sys/netinet/tcp_syncache.h12
-rw-r--r--sys/netinet/tcp_timewait.c84
-rw-r--r--sys/netinet/tcp_usrreq.c30
-rw-r--r--sys/netinet/tcp_var.h27
-rw-r--r--sys/netinet/toecore.c4
-rw-r--r--sys/netinet6/tcp6_var.h2
-rw-r--r--sys/sys/mbuf.h1
-rw-r--r--usr.bin/netstat/inet.c4
-rw-r--r--usr.bin/sockstat/sockstat.16
-rw-r--r--usr.bin/sockstat/sockstat.c13
18 files changed, 821 insertions, 158 deletions
diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4
index cbb8021226fe..873cfe4b822a 100644
--- a/share/man/man4/tcp.4
+++ b/share/man/man4/tcp.4
@@ -34,7 +34,7 @@
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
.\" $FreeBSD$
.\"
-.Dd April 17, 2021
+.Dd April 18, 2021
.Dt TCP 4
.Os
.Sh NAME
@@ -329,6 +329,9 @@ currently executing.
This is typically used after a process or thread inherits a listen
socket from its parent, and sets its CPU affinity to a particular core.
.El
+.It Dv TCP_REMOTE_UDP_ENCAPS_PORT
+Set and get the remote UDP encapsulation port.
+It can only be set on a closed TCP socket.
.El
.Pp
The option level for the
@@ -755,6 +758,16 @@ A CSV list of template_spec=percent key-value pairs which controls the per
template sampling rates when
.Xr stats 3
sampling is enabled.
+.It Va udp_tunneling_port
+The local UDP encapsulation port.
+A value of 0 indicates that UDP encapsulation is disabled.
+The default is 0.
+.It Va udp_tunneling_overhead
+The overhead taken into account when using UDP encapsulation.
+Since MSS clamping by middleboxes will most likely not work, values larger than
+8 (the size of the UDP header) are also supported.
+Supported values are between 8 and 1024.
+The default is 8.
.El
.Sh ERRORS
A socket operation may fail with one of the following errors returned:
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
index 0b71bd4658f8..d2bf1f8431fd 100644
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -183,6 +183,7 @@ struct tcphdr {
#define TCP_RXTLS_MODE 42 /* Receive TLS mode */
#define TCP_CONGESTION 64 /* get/set congestion control algorithm */
#define TCP_CCALGOOPT 65 /* get/set cc algorithm specific options */
+#define TCP_REMOTE_UDP_ENCAPS_PORT 71 /* Enable TCP over UDP tunneling via the specified port */
#define TCP_DELACK 72 /* socket option for delayed ack */
#define TCP_FIN_IS_RST 73 /* A fin from the peer is treated has a RST */
#define TCP_LOG_LIMIT 74 /* Limit to number of records in tcp-log */
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index ed184de4a4bf..8592f3313725 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -123,6 +123,7 @@ __FBSDID("$FreeBSD$");
#ifdef TCP_OFFLOAD
#include <netinet/tcp_offload.h>
#endif
+#include <netinet/udp.h>
#include <netipsec/ipsec_support.h>
@@ -567,7 +568,7 @@ cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos)
*/
#ifdef INET6
int
-tcp6_input(struct mbuf **mp, int *offp, int proto)
+tcp6_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
{
struct mbuf *m;
struct in6_ifaddr *ia6;
@@ -597,12 +598,19 @@ tcp6_input(struct mbuf **mp, int *offp, int proto)
}
*mp = m;
- return (tcp_input(mp, offp, proto));
+ return (tcp_input_with_port(mp, offp, proto, port));
+}
+
+int
+tcp6_input(struct mbuf **mp, int *offp, int proto)
+{
+
+ return(tcp6_input_with_port(mp, offp, proto, 0));
}
#endif /* INET6 */
int
-tcp_input(struct mbuf **mp, int *offp, int proto)
+tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
{
struct mbuf *m = *mp;
struct tcphdr *th = NULL;
@@ -659,6 +667,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
ip6 = mtod(m, struct ip6_hdr *);
th = (struct tcphdr *)((caddr_t)ip6 + off0);
tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
+ if (port)
+ goto skip6_csum;
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) {
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
th->th_sum = m->m_pkthdr.csum_data;
@@ -672,7 +682,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
TCPSTAT_INC(tcps_rcvbadsum);
goto drop;
}
-
+ skip6_csum:
/*
* Be proactive about unspecified IPv6 address in source.
* As we use all-zero to indicate unbounded/unconnected pcb,
@@ -713,6 +723,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
tlen = ntohs(ip->ip_len) - off0;
iptos = ip->ip_tos;
+ if (port)
+ goto skip_csum;
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
th->th_sum = m->m_pkthdr.csum_data;
@@ -742,8 +754,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
ip->ip_v = IPVERSION;
ip->ip_hl = off0 >> 2;
}
-
- if (th->th_sum) {
+ skip_csum:
+ if (th->th_sum && (port == 0)) {
TCPSTAT_INC(tcps_rcvbadsum);
goto drop;
}
@@ -1004,6 +1016,11 @@ findpcb:
goto dropwithreset;
}
+ if ((tp->t_port != port) && (tp->t_state > TCPS_LISTEN)) {
+ rstreason = BANDLIM_RST_CLOSEDPORT;
+ goto dropwithreset;
+ }
+
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE) {
tcp_offload_input(tp, m);
@@ -1074,7 +1091,7 @@ findpcb:
* NB: syncache_expand() doesn't unlock
* inp and tcpinfo locks.
*/
- rstreason = syncache_expand(&inc, &to, th, &so, m);
+ rstreason = syncache_expand(&inc, &to, th, &so, m, port);
if (rstreason < 0) {
/*
* A failing TCP MD5 signature comparison
@@ -1156,7 +1173,7 @@ tfo_socket_result:
* causes.
*/
if (thflags & TH_RST) {
- syncache_chkrst(&inc, th, m);
+ syncache_chkrst(&inc, th, m, port);
goto dropunlock;
}
/*
@@ -1178,7 +1195,7 @@ tfo_socket_result:
log(LOG_DEBUG, "%s; %s: Listen socket: "
"SYN|ACK invalid, segment rejected\n",
s, __func__);
- syncache_badack(&inc); /* XXX: Not needed! */
+ syncache_badack(&inc, port); /* XXX: Not needed! */
TCPSTAT_INC(tcps_badsyn);
rstreason = BANDLIM_RST_OPENPORT;
goto dropwithreset;
@@ -1337,7 +1354,7 @@ tfo_socket_result:
TCP_PROBE3(debug__input, tp, th, m);
tcp_dooptions(&to, optp, optlen, TO_SYN);
if ((so = syncache_add(&inc, &to, th, inp, so, m, NULL, NULL,
- iptos)) != NULL)
+ iptos, port)) != NULL)
goto tfo_socket_result;
/*
@@ -1468,6 +1485,12 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so,
return (newsize);
}
+int
+tcp_input(struct mbuf **mp, int *offp, int proto)
+{
+ return(tcp_input_with_port(mp, offp, proto, 0));
+}
+
void
tcp_handle_wakeup(struct tcpcb *tp, struct socket *so)
{
@@ -3672,11 +3695,13 @@ tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer,
sizeof (struct ip6_hdr) + sizeof (struct tcphdr) :
sizeof (struct tcpiphdr);
#else
- const size_t min_protoh = sizeof(struct tcpiphdr);
+ size_t min_protoh = sizeof(struct tcpiphdr);
#endif
INP_WLOCK_ASSERT(tp->t_inpcb);
+ if (tp->t_port)
+ min_protoh += V_tcp_udp_tunneling_overhead;
if (mtuoffer != -1) {
KASSERT(offer == -1, ("%s: conflict", __func__));
offer = mtuoffer - min_protoh;
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index e23cdc749e98..5bda2be14df0 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -101,6 +101,8 @@ __FBSDID("$FreeBSD$");
#include <netipsec/ipsec_support.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
#include <machine/in_cksum.h>
#include <security/mac/mac_framework.h>
@@ -207,7 +209,7 @@ tcp_output(struct tcpcb *tp)
#endif
struct tcphdr *th;
u_char opt[TCP_MAXOLEN];
- unsigned ipoptlen, optlen, hdrlen;
+ unsigned ipoptlen, optlen, hdrlen, ulen;
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
unsigned ipsec_optlen = 0;
#endif
@@ -216,6 +218,7 @@ tcp_output(struct tcpcb *tp)
struct sackhole *p;
int tso, mtu;
struct tcpopt to;
+ struct udphdr *udp = NULL;
unsigned int wanted_cookie = 0;
unsigned int dont_sendalot = 0;
#if 0
@@ -558,6 +561,7 @@ after_sack_rexmit:
#endif
if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg &&
+ (tp->t_port == 0) &&
((tp->t_flags & TF_SIGNATURE) == 0) &&
tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
ipoptlen == 0 && !(flags & TH_SYN))
@@ -800,6 +804,8 @@ send:
/* Maximum segment size. */
if (flags & TH_SYN) {
to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc);
+ if (tp->t_port)
+ to.to_mss -= V_tcp_udp_tunneling_overhead;
to.to_flags |= TOF_MSS;
/*
@@ -887,7 +893,14 @@ send:
!(to.to_flags & TOF_FASTOPEN))
len = 0;
}
-
+ if (tp->t_port) {
+ if (V_tcp_udp_tunneling_port == 0) {
+ /* The port was removed?? */
+ SOCKBUF_UNLOCK(&so->so_snd);
+ return (EHOSTUNREACH);
+ }
+ hdrlen += sizeof(struct udphdr);
+ }
/*
* Adjust data length if insertion of options will
* bump the packet length beyond the t_maxseg length.
@@ -1140,8 +1153,17 @@ send:
#ifdef INET6
if (isipv6) {
ip6 = mtod(m, struct ip6_hdr *);
- th = (struct tcphdr *)(ip6 + 1);
- tcpip_fillheaders(tp->t_inpcb, ip6, th);
+ if (tp->t_port) {
+ udp = (struct udphdr *)((caddr_t)ip6 + ipoptlen + sizeof(struct ip6_hdr));
+ udp->uh_sport = htons(V_tcp_udp_tunneling_port);
+ udp->uh_dport = tp->t_port;
+ ulen = hdrlen + len - sizeof(struct ip6_hdr);
+ udp->uh_ulen = htons(ulen);
+ th = (struct tcphdr *)(udp + 1);
+ } else {
+ th = (struct tcphdr *)(ip6 + 1);
+ }
+ tcpip_fillheaders(tp->t_inpcb, tp->t_port, ip6, th);
} else
#endif /* INET6 */
{
@@ -1149,8 +1171,16 @@ send:
#ifdef TCPDEBUG
ipov = (struct ipovly *)ip;
#endif
- th = (struct tcphdr *)(ip + 1);
- tcpip_fillheaders(tp->t_inpcb, ip, th);
+ if (tp->t_port) {
+ udp = (struct udphdr *)((caddr_t)ip + ipoptlen + sizeof(struct ip));
+ udp->uh_sport = htons(V_tcp_udp_tunneling_port);
+ udp->uh_dport = tp->t_port;
+ ulen = hdrlen + len - sizeof(struct ip);
+ udp->uh_ulen = htons(ulen);
+ th = (struct tcphdr *)(udp + 1);
+ } else
+ th = (struct tcphdr *)(ip + 1);
+ tcpip_fillheaders(tp->t_inpcb, tp->t_port, ip, th);
}
/*
@@ -1309,7 +1339,6 @@ send:
* checksum extended header and data.
*/
m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
- m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
if (to.to_flags & TOF_SIGNATURE) {
@@ -1336,9 +1365,19 @@ send:
* There is no need to fill in ip6_plen right now.
* It will be filled later by ip6_output.
*/
- m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
- th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) +
- optlen + len, IPPROTO_TCP, 0);
+ if (tp->t_port) {
+ m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ udp->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 0);
+ th->th_sum = htons(0);
+ UDPSTAT_INC(udps_opackets);
+ } else {
+ m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+ th->th_sum = in6_cksum_pseudo(ip6,
+ sizeof(struct tcphdr) + optlen + len, IPPROTO_TCP,
+ 0);
+ }
}
#endif
#if defined(INET6) && defined(INET)
@@ -1346,9 +1385,20 @@ send:
#endif
#ifdef INET
{
- m->m_pkthdr.csum_flags = CSUM_TCP;
- th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
- htons(sizeof(struct tcphdr) + IPPROTO_TCP + len + optlen));
+ if (tp->t_port) {
+ m->m_pkthdr.csum_flags = CSUM_UDP;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ udp->uh_sum = in_pseudo(ip->ip_src.s_addr,
+ ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP));
+ th->th_sum = htons(0);
+ UDPSTAT_INC(udps_opackets);
+ } else {
+ m->m_pkthdr.csum_flags = CSUM_TCP;
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+ th->th_sum = in_pseudo(ip->ip_src.s_addr,
+ ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) +
+ IPPROTO_TCP + len + optlen));
+ }
/* IP version must be set here for ipv4/ipv6 checking later */
KASSERT(ip->ip_v == IPVERSION,
@@ -1473,8 +1523,10 @@ send:
* NB: Don't set DF on small MTU/MSS to have a safe fallback.
*/
if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) {
- ip->ip_off |= htons(IP_DF);
tp->t_flags2 |= TF2_PLPMTU_PMTUD;
+ if (tp->t_port == 0 || len < V_tcp_minmss) {
+ ip->ip_off |= htons(IP_DF);
+ }
} else {
tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
}
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index 673dee911c87..febac7ad424c 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -11969,14 +11969,10 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
#endif
struct tcp_bbr *bbr;
struct tcphdr *th;
-#ifdef NETFLIX_TCPOUDP
struct udphdr *udp = NULL;
-#endif
u_char opt[TCP_MAXOLEN];
unsigned ipoptlen, optlen, hdrlen;
-#ifdef NETFLIX_TCPOUDP
unsigned ulen;
-#endif
uint32_t bbr_seq;
uint32_t delay_calc=0;
uint8_t doing_tlp = 0;
@@ -12991,10 +12987,8 @@ send:
/* Maximum segment size. */
if (flags & TH_SYN) {
to.to_mss = tcp_mssopt(&inp->inp_inc);
-#ifdef NETFLIX_TCPOUDP
if (tp->t_port)
to.to_mss -= V_tcp_udp_tunneling_overhead;
-#endif
to.to_flags |= TOF_MSS;
/*
* On SYN or SYN|ACK transmits on TFO connections,
@@ -13063,7 +13057,6 @@ send:
!(to.to_flags & TOF_FASTOPEN))
len = 0;
}
-#ifdef NETFLIX_TCPOUDP
if (tp->t_port) {
if (V_tcp_udp_tunneling_port == 0) {
/* The port was removed?? */
@@ -13072,7 +13065,6 @@ send:
}
hdrlen += sizeof(struct udphdr);
}
-#endif
#ifdef INET6
if (isipv6)
ipoptlen = ip6_optlen(tp->t_inpcb);
@@ -13408,7 +13400,6 @@ send:
#ifdef INET6
if (isipv6) {
ip6 = mtod(m, struct ip6_hdr *);
-#ifdef NETFLIX_TCPOUDP
if (tp->t_port) {
udp = (struct udphdr *)((caddr_t)ip6 + ipoptlen + sizeof(struct ip6_hdr));
udp->uh_sport = htons(V_tcp_udp_tunneling_port);
@@ -13417,17 +13408,9 @@ send:
udp->uh_ulen = htons(ulen);
th = (struct tcphdr *)(udp + 1);
} else {
-#endif
th = (struct tcphdr *)(ip6 + 1);
-
-#ifdef NETFLIX_TCPOUDP
}
-#endif
- tcpip_fillheaders(inp,
-#ifdef NETFLIX_TCPOUDP
- tp->t_port,
-#endif
- ip6, th);
+ tcpip_fillheaders(inp, tp->t_port, ip6, th);
} else
#endif /* INET6 */
{
@@ -13435,7 +13418,6 @@ send:
#ifdef TCPDEBUG
ipov = (struct ipovly *)ip;
#endif
-#ifdef NETFLIX_TCPOUDP
if (tp->t_port) {
udp = (struct udphdr *)((caddr_t)ip + ipoptlen + sizeof(struct ip));
udp->uh_sport = htons(V_tcp_udp_tunneling_port);
@@ -13443,14 +13425,10 @@ send:
ulen = hdrlen + len - sizeof(struct ip);
udp->uh_ulen = htons(ulen);
th = (struct tcphdr *)(udp + 1);
- } else
-#endif
+ } else {
th = (struct tcphdr *)(ip + 1);
- tcpip_fillheaders(inp,
-#ifdef NETFLIX_TCPOUDP
- tp->t_port,
-#endif
- ip, th);
+ }
+ tcpip_fillheaders(inp, tp->t_port, ip, th);
}
/*
* If we are doing retransmissions, then snd_nxt will not reflect
@@ -13600,7 +13578,6 @@ send:
* ip6_plen is not need to be filled now, and will be filled
* in ip6_output.
*/
-#ifdef NETFLIX_TCPOUDP
if (tp->t_port) {
m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
@@ -13608,14 +13585,11 @@ send:
th->th_sum = htons(0);
UDPSTAT_INC(udps_opackets);
} else {
-#endif
csum_flags = m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) +
optlen + len, IPPROTO_TCP, 0);
-#ifdef NETFLIX_TCPOUDP
}
-#endif
}
#endif
#if defined(INET6) && defined(INET)
@@ -13623,7 +13597,6 @@ send:
#endif
#ifdef INET
{
-#ifdef NETFLIX_TCPOUDP
if (tp->t_port) {
m->m_pkthdr.csum_flags = CSUM_UDP;
m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
@@ -13632,15 +13605,12 @@ send:
th->th_sum = htons(0);
UDPSTAT_INC(udps_opackets);
} else {
-#endif
csum_flags = m->m_pkthdr.csum_flags = CSUM_TCP;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
th->th_sum = in_pseudo(ip->ip_src.s_addr,
ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) +
IPPROTO_TCP + len + optlen));
-#ifdef NETFLIX_TCPOUDP
}
-#endif
/* IP version must be set here for ipv4/ipv6 checking later */
KASSERT(ip->ip_v == IPVERSION,
("%s: IP version incorrect: %d", __func__, ip->ip_v));
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 0079bf8b6400..d2093e1afab7 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -13008,10 +13008,8 @@ send:
if (flags & TH_SYN) {
tp->snd_nxt = tp->iss;
to.to_mss = tcp_mssopt(&inp->inp_inc);
-#ifdef NETFLIX_TCPOUDP
if (tp->t_port)
to.to_mss -= V_tcp_udp_tunneling_overhead;
-#endif
to.to_flags |= TOF_MSS;
/*
@@ -13088,7 +13086,6 @@ send:
!(to.to_flags & TOF_FASTOPEN))
len = 0;
}
-#ifdef NETFLIX_TCPOUDP
if (tp->t_port) {
if (V_tcp_udp_tunneling_port == 0) {
/* The port was removed?? */
@@ -13097,7 +13094,6 @@ send:
}
hdrlen += sizeof(struct udphdr);
}
-#endif
#ifdef INET6
if (isipv6)
ipoptlen = ip6_optlen(tp->t_inpcb);
@@ -13372,7 +13368,6 @@ send:
#ifdef INET6
if (isipv6) {
ip6 = mtod(m, struct ip6_hdr *);
-#ifdef NETFLIX_TCPOUDP
if (tp->t_port) {
udp = (struct udphdr *)((caddr_t)ip6 + ipoptlen + sizeof(struct ip6_hdr));
udp->uh_sport = htons(V_tcp_udp_tunneling_port);
@@ -13380,14 +13375,10 @@ send:
ulen = hdrlen + len - sizeof(struct ip6_hdr);
udp->uh_ulen = htons(ulen);
th = (struct tcphdr *)(udp + 1);
- } else
-#endif
+ } else {
th = (struct tcphdr *)(ip6 + 1);
- tcpip_fillheaders(inp,
-#ifdef NETFLIX_TCPOUDP
- tp->t_port,
-#endif
- ip6, th);
+ }
+ tcpip_fillheaders(inp, tp->t_port, ip6, th);
} else
#endif /* INET6 */
{
@@ -13395,7 +13386,6 @@ send:
#ifdef TCPDEBUG
ipov = (struct ipovly *)ip;
#endif
-#ifdef NETFLIX_TCPOUDP
if (tp->t_port) {
udp = (struct udphdr *)((caddr_t)ip + ipoptlen + sizeof(struct ip));
udp->uh_sport = htons(V_tcp_udp_tunneling_port);
@@ -13403,14 +13393,10 @@ send:
ulen = hdrlen + len - sizeof(struct ip);
udp->uh_ulen = htons(ulen);
th = (struct tcphdr *)(udp + 1);
- } else
-#endif
+ } else {
th = (struct tcphdr *)(ip + 1);
- tcpip_fillheaders(inp,
-#ifdef NETFLIX_TCPOUDP
- tp->t_port,
-#endif
- ip, th);
+ }
+ tcpip_fillheaders(inp, tp->t_port, ip, th);
}
/*
* Fill in fields, remembering maximum advertised window for use in
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index e973555efbcb..1ebc7357def3 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -126,6 +126,8 @@ __FBSDID("$FreeBSD$");
#ifdef TCP_OFFLOAD
#include <netinet/tcp_offload.h>
#endif
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
#include <netipsec/ipsec_support.h>
@@ -501,6 +503,80 @@ tcp_switch_back_to_default(struct tcpcb *tp)
}
}
+static void
+tcp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *inp,
+ const struct sockaddr *sa, void *ctx)
+{
+ struct ip *iph;
+#ifdef INET6
+ struct ip6_hdr *ip6;
+#endif
+ struct udphdr *uh;
+ struct tcphdr *th;
+ int thlen;
+ uint16_t port;
+
+ TCPSTAT_INC(tcps_tunneled_pkts);
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ /* Can't handle one that is not a pkt hdr */
+ TCPSTAT_INC(tcps_tunneled_errs);
+ goto out;
+ }
+ thlen = sizeof(struct tcphdr);
+ if (m->m_len < off + sizeof(struct udphdr) + thlen &&
+ (m = m_pullup(m, off + sizeof(struct udphdr) + thlen)) == NULL) {
+ TCPSTAT_INC(tcps_tunneled_errs);
+ goto out;
+ }
+ iph = mtod(m, struct ip *);
+ uh = (struct udphdr *)((caddr_t)iph + off);
+ th = (struct tcphdr *)(uh + 1);
+ thlen = th->th_off << 2;
+ if (m->m_len < off + sizeof(struct udphdr) + thlen) {
+ m = m_pullup(m, off + sizeof(struct udphdr) + thlen);
+ if (m == NULL) {
+ TCPSTAT_INC(tcps_tunneled_errs);
+ goto out;
+ } else {
+ iph = mtod(m, struct ip *);
+ uh = (struct udphdr *)((caddr_t)iph + off);
+ th = (struct tcphdr *)(uh + 1);
+ }
+ }
+ m->m_pkthdr.tcp_tun_port = port = uh->uh_sport;
+ bcopy(th, uh, m->m_len - off);
+ m->m_len -= sizeof(struct udphdr);
+ m->m_pkthdr.len -= sizeof(struct udphdr);
+ /*
+ * We use the same algorithm for
+ * both UDP and TCP for c-sum. So
+ * the code in tcp_input will skip
+ * the checksum. So we do nothing
+ * with the flag (m->m_pkthdr.csum_flags).
+ */
+ switch (iph->ip_v) {
+#ifdef INET
+ case IPVERSION:
+ iph->ip_len = htons(ntohs(iph->ip_len) - sizeof(struct udphdr));
+ tcp_input_with_port(&m, &off, IPPROTO_TCP, port);
+ break;
+#endif
+#ifdef INET6
+ case IPV6_VERSION >> 4:
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - sizeof(struct udphdr));
+ tcp6_input_with_port(&m, &off, IPPROTO_TCP, port);
+ break;
+#endif
+ default:
+ goto out;
+ break;
+ }
+ return;
+out:
+ m_freem(m);
+}
+
static int
sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS)
{
@@ -598,6 +674,183 @@ SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_available,
NULL, 0, sysctl_net_inet_list_available, "A",
"list available TCP Function sets");
+VNET_DEFINE(int, tcp_udp_tunneling_port) = TCP_TUNNELING_PORT_DEFAULT;
+
+#ifdef INET
+VNET_DEFINE(struct socket *, udp4_tun_socket) = NULL;
+#define V_udp4_tun_socket VNET(udp4_tun_socket)
+#endif
+#ifdef INET6
+VNET_DEFINE(struct socket *, udp6_tun_socket) = NULL;
+#define V_udp6_tun_socket VNET(udp6_tun_socket)
+#endif
+
+static void
+tcp_over_udp_stop(void)
+{
+ /*
+ * This function assumes sysctl caller holds inp_rinfo_lock()
+ * for writting!
+ */
+#ifdef INET
+ if (V_udp4_tun_socket != NULL) {
+ soclose(V_udp4_tun_socket);
+ V_udp4_tun_socket = NULL;
+ }
+#endif
+#ifdef INET6
+ if (V_udp6_tun_socket != NULL) {
+ soclose(V_udp6_tun_socket);
+ V_udp6_tun_socket = NULL;
+ }
+#endif
+}
+
+static int
+tcp_over_udp_start(void)
+{
+ uint16_t port;
+ int ret;
+#ifdef INET
+ struct sockaddr_in sin;
+#endif
+#ifdef INET6
+ struct sockaddr_in6 sin6;
+#endif
+ /*
+ * This function assumes sysctl caller holds inp_info_rlock()
+ * for writting!
+ */
+ port = V_tcp_udp_tunneling_port;
+ if (ntohs(port) == 0) {
+ /* Must have a port set */
+ return (EINVAL);
+ }
+#ifdef INET
+ if (V_udp4_tun_socket != NULL) {
+ /* Already running -- must stop first */
+ return (EALREADY);
+ }
+#endif
+#ifdef INET6
+ if (V_udp6_tun_socket != NULL) {
+ /* Already running -- must stop first */
+ return (EALREADY);
+ }
+#endif
+#ifdef INET
+ if ((ret = socreate(PF_INET, &V_udp4_tun_socket,
+ SOCK_DGRAM, IPPROTO_UDP,
+ curthread->td_ucred, curthread))) {
+ tcp_over_udp_stop();
+ return (ret);
+ }
+ /* Call the special UDP hook. */
+ if ((ret = udp_set_kernel_tunneling(V_udp4_tun_socket,
+ tcp_recv_udp_tunneled_packet,
+ tcp_ctlinput_viaudp,
+ NULL))) {
+ tcp_over_udp_stop();
+ return (ret);
+ }
+ /* Ok, we have a socket, bind it to the port. */
+ memset(&sin, 0, sizeof(struct sockaddr_in));
+ sin.sin_len = sizeof(struct sockaddr_in);
+ sin.sin_family = AF_INET;
+ sin.sin_port = htons(port);
+ if ((ret = sobind(V_udp4_tun_socket,
+ (struct sockaddr *)&sin, curthread))) {
+ tcp_over_udp_stop();
+ return (ret);
+ }
+#endif
+#ifdef INET6
+ if ((ret = socreate(PF_INET6, &V_udp6_tun_socket,
+ SOCK_DGRAM, IPPROTO_UDP,
+ curthread->td_ucred, curthread))) {
+ tcp_over_udp_stop();
+ return (ret);
+ }
+ /* Call the special UDP hook. */
+ if ((ret = udp_set_kernel_tunneling(V_udp6_tun_socket,
+ tcp_recv_udp_tunneled_packet,
+ tcp6_ctlinput_viaudp,
+ NULL))) {
+ tcp_over_udp_stop();
+ return (ret);
+ }
+ /* Ok, we have a socket, bind it to the port. */
+ memset(&sin6, 0, sizeof(struct sockaddr_in6));
+ sin6.sin6_len = sizeof(struct sockaddr_in6);
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_port = htons(port);
+ if ((ret = sobind(V_udp6_tun_socket,
+ (struct sockaddr *)&sin6, curthread))) {
+ tcp_over_udp_stop();
+ return (ret);
+ }
+#endif
+ return (0);
+}
+
+static int
+sysctl_net_inet_tcp_udp_tunneling_port_check(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ uint32_t old, new;
+
+ old = V_tcp_udp_tunneling_port;
+ new = old;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if ((error == 0) &&
+ (req->newptr != NULL)) {
+ if ((new < TCP_TUNNELING_PORT_MIN) ||
+ (new > TCP_TUNNELING_PORT_MAX)) {
+ error = EINVAL;
+ } else {
+ V_tcp_udp_tunneling_port = new;
+ if (old != 0) {
+ tcp_over_udp_stop();
+ }
+ if (new != 0) {
+ error = tcp_over_udp_start();
+ }
+ }
+ }
+ return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, udp_tunneling_port,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ &VNET_NAME(tcp_udp_tunneling_port),
+ 0, &sysctl_net_inet_tcp_udp_tunneling_port_check, "IU",
+ "Tunneling port for tcp over udp");
+
+VNET_DEFINE(int, tcp_udp_tunneling_overhead) = TCP_TUNNELING_OVERHEAD_DEFAULT;
+
+static int
+sysctl_net_inet_tcp_udp_tunneling_overhead_check(SYSCTL_HANDLER_ARGS)
+{
+ int error, new;
+
+ new = V_tcp_udp_tunneling_overhead;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr) {
+ if ((new < TCP_TUNNELING_OVERHEAD_MIN) ||
+ (new > TCP_TUNNELING_OVERHEAD_MAX))
+ error = EINVAL;
+ else
+ V_tcp_udp_tunneling_overhead = new;
+ }
+ return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, udp_tunneling_overhead,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ &VNET_NAME(tcp_udp_tunneling_overhead),
+ 0, &sysctl_net_inet_tcp_udp_tunneling_overhead_check, "IU",
+ "MSS reduction when using tcp over udp");
+
/*
* Exports one (struct tcp_function_info) for each alias/name.
*/
@@ -1314,7 +1567,7 @@ tcp_fini(void *xtp)
* of the tcpcb each time to conserve mbufs.
*/
void
-tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr)
+tcpip_fillheaders(struct inpcb *inp, uint16_t port, void *ip_ptr, void *tcp_ptr)
{
struct tcphdr *th = (struct tcphdr *)tcp_ptr;
@@ -1329,7 +1582,10 @@ tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr)
(inp->inp_flow & IPV6_FLOWINFO_MASK);
ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
(IPV6_VERSION & IPV6_VERSION_MASK);
- ip6->ip6_nxt = IPPROTO_TCP;
+ if (port == 0)
+ ip6->ip6_nxt = IPPROTO_TCP;
+ else
+ ip6->ip6_nxt = IPPROTO_UDP;
ip6->ip6_plen = htons(sizeof(struct tcphdr));
ip6->ip6_src = inp->in6p_laddr;
ip6->ip6_dst = inp->in6p_faddr;
@@ -1351,7 +1607,10 @@ tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr)
ip->ip_off = 0;
ip->ip_ttl = inp->inp_ip_ttl;
ip->ip_sum = 0;
- ip->ip_p = IPPROTO_TCP;
+ if (port == 0)
+ ip->ip_p = IPPROTO_TCP;
+ else
+ ip->ip_p = IPPROTO_UDP;
ip->ip_src = inp->inp_laddr;
ip->ip_dst = inp->inp_faddr;
}
@@ -1381,7 +1640,7 @@ tcpip_maketemplate(struct inpcb *inp)
t = malloc(sizeof(*t), M_TEMP, M_NOWAIT);
if (t == NULL)
return (NULL);
- tcpip_fillheaders(inp, (void *)&t->tt_ipgen, (void *)&t->tt_t);
+ tcpip_fillheaders(inp, 0, (void *)&t->tt_ipgen, (void *)&t->tt_t);
return (t);
}
@@ -1407,14 +1666,16 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
struct inpcb *inp;
struct ip *ip;
struct mbuf *optm;
+ struct udphdr *uh = NULL;
struct tcphdr *nth;
u_char *optp;
#ifdef INET6
struct ip6_hdr *ip6;
int isipv6;
#endif /* INET6 */
- int optlen, tlen, win;
+ int optlen, tlen, win, ulen;
bool incl_opts;
+ uint16_t port;
KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL"));
NET_EPOCH_ASSERT();
@@ -1432,6 +1693,19 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
} else
inp = NULL;
+ if (m != NULL) {
+#ifdef INET6
+ if (isipv6 && ip6 && (ip6->ip6_nxt == IPPROTO_UDP))
+ port = m->m_pkthdr.tcp_tun_port;
+ else
+#endif
+ if (ip && (ip->ip_p == IPPROTO_UDP))
+ port = m->m_pkthdr.tcp_tun_port;
+ else
+ port = 0;
+ } else
+ port = tp->t_port;
+
incl_opts = false;
win = 0;
if (tp != NULL) {
@@ -1454,16 +1728,30 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
sizeof(struct ip6_hdr));
ip6 = mtod(m, struct ip6_hdr *);
nth = (struct tcphdr *)(ip6 + 1);
+ if (port) {
+ /* Insert a UDP header */
+ uh = (struct udphdr *)nth;
+ uh->uh_sport = htons(V_tcp_udp_tunneling_port);
+ uh->uh_dport = port;
+ nth = (struct tcphdr *)(uh + 1);
+ }
} else
#endif /* INET6 */
{
bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
ip = mtod(m, struct ip *);
nth = (struct tcphdr *)(ip + 1);
+ if (port) {
+ /* Insert a UDP header */
+ uh = (struct udphdr *)nth;
+ uh->uh_sport = htons(V_tcp_udp_tunneling_port);
+ uh->uh_dport = port;
+ nth = (struct tcphdr *)(uh + 1);
+ }
}
bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
flags = TH_ACK;
- } else if (!M_WRITABLE(m)) {
+ } else if ((!M_WRITABLE(m)) || (port != 0)) {
struct mbuf *n;
/* Can't reuse 'm', allocate a new mbuf. */
@@ -1489,6 +1777,13 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
ip6 = mtod(n, struct ip6_hdr *);
xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
nth = (struct tcphdr *)(ip6 + 1);
+ if (port) {
+ /* Insert a UDP header */
+ uh = (struct udphdr *)nth;
+ uh->uh_sport = htons(V_tcp_udp_tunneling_port);
+ uh->uh_dport = port;
+ nth = (struct tcphdr *)(uh + 1);
+ }
} else
#endif /* INET6 */
{
@@ -1496,6 +1791,13 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
ip = mtod(n, struct ip *);
xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t);
nth = (struct tcphdr *)(ip + 1);
+ if (port) {
+ /* Insert a UDP header */
+ uh = (struct udphdr *)nth;
+ uh->uh_sport = htons(V_tcp_udp_tunneling_port);
+ uh->uh_dport = port;
+ nth = (struct tcphdr *)(uh + 1);
+ }
}
bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
xchg(nth->th_dport, nth->th_sport, uint16_t);
@@ -1544,6 +1846,8 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
#ifdef INET
tlen = sizeof (struct tcpiphdr);
#endif
+ if (port)
+ tlen += sizeof (struct udphdr);
#ifdef INVARIANTS
m->m_len = 0;
KASSERT(M_TRAILINGSPACE(m) >= tlen,
@@ -1587,9 +1891,16 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
optlen = 0;
#ifdef INET6
if (isipv6) {
+ if (uh) {
+ ulen = tlen - sizeof(struct ip6_hdr);
+ uh->uh_ulen = htons(ulen);
+ }
ip6->ip6_flow = 0;
ip6->ip6_vfc = IPV6_VERSION;
- ip6->ip6_nxt = IPPROTO_TCP;
+ if (port)
+ ip6->ip6_nxt = IPPROTO_UDP;
+ else
+ ip6->ip6_nxt = IPPROTO_TCP;
ip6->ip6_plen = htons(tlen - sizeof(*ip6));
}
#endif
@@ -1598,8 +1909,17 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
#endif
#ifdef INET
{
+ if (uh) {
+ ulen = tlen - sizeof(struct ip);
+ uh->uh_ulen = htons(ulen);
+ }
ip->ip_len = htons(tlen);
ip->ip_ttl = V_ip_defttl;
+ if (port) {
+ ip->ip_p = IPPROTO_UDP;
+ } else {
+ ip->ip_p = IPPROTO_TCP;
+ }
if (V_path_mtu_discovery)
ip->ip_off |= htons(IP_DF);
}
@@ -1643,12 +1963,19 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
}
#endif
- m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#ifdef INET6
if (isipv6) {
- m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
- nth->th_sum = in6_cksum_pseudo(ip6,
- tlen - sizeof(struct ip6_hdr), IPPROTO_TCP, 0);
+ if (port) {
+ m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ uh->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 0);
+ nth->th_sum = 0;
+ } else {
+ m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+ nth->th_sum = in6_cksum_pseudo(ip6,
+ tlen - sizeof(struct ip6_hdr), IPPROTO_TCP, 0);
+ }
ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb :
NULL, NULL);
}
@@ -1658,9 +1985,18 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
#endif
#ifdef INET
{
- m->m_pkthdr.csum_flags = CSUM_TCP;
- nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
- htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p)));
+ if (port) {
+ uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+ htons(ulen + IPPROTO_UDP));
+ m->m_pkthdr.csum_flags = CSUM_UDP;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ nth->th_sum = 0;
+ } else {
+ m->m_pkthdr.csum_flags = CSUM_TCP;
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+ nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+ htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p)));
+ }
}
#endif /* INET */
#ifdef TCPDEBUG
@@ -2460,8 +2796,8 @@ SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred,
#endif /* INET6 */
#ifdef INET
-void
-tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
+static void
+tcp_ctlinput_with_port(int cmd, struct sockaddr *sa, void *vip, uint16_t port)
{
struct ip *ip = vip;
struct tcphdr *th;
@@ -2515,6 +2851,9 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
!(inp->inp_flags & INP_DROPPED) &&
!(inp->inp_socket == NULL)) {
tp = intotcpcb(inp);
+ if (tp->t_port != port) {
+ goto out;
+ }
if (SEQ_GEQ(ntohl(icmp_tcp_seq), tp->snd_una) &&
SEQ_LT(ntohl(icmp_tcp_seq), tp->snd_max)) {
if (cmd == PRC_MSGSIZE) {
@@ -2561,17 +2900,61 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
inc.inc_lport = th->th_sport;
inc.inc_faddr = faddr;
inc.inc_laddr = ip->ip_src;
- syncache_unreach(&inc, icmp_tcp_seq);
+ syncache_unreach(&inc, icmp_tcp_seq, port);
}
out:
if (inp != NULL)
INP_WUNLOCK(inp);
}
+
+void
+tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
+{
+ tcp_ctlinput_with_port(cmd, sa, vip, htons(0));
+}
+
+void
+tcp_ctlinput_viaudp(int cmd, struct sockaddr *sa, void *vip, void *unused)
+{
+ /* Its a tunneled TCP over UDP icmp */
+ struct ip *outer_ip, *inner_ip;
+ struct icmp *icmp;
+ struct udphdr *udp;
+ struct tcphdr *th, ttemp;
+ int i_hlen, o_len;
+ uint16_t port;
+
+ inner_ip = (struct ip *)vip;
+ icmp = (struct icmp *)((caddr_t)inner_ip -
+ (sizeof(struct icmp) - sizeof(struct ip)));
+ outer_ip = (struct ip *)((caddr_t)icmp - sizeof(struct ip));
+ i_hlen = inner_ip->ip_hl << 2;
+ o_len = ntohs(outer_ip->ip_len);
+ if (o_len <
+ (sizeof(struct ip) + 8 + i_hlen + sizeof(struct udphdr) + offsetof(struct tcphdr, th_ack))) {
+ /* Not enough data present */
+ return;
+ }
+ /* Ok lets strip out the inner udphdr header by copying up on top of it the tcp hdr */
+ udp = (struct udphdr *)(((caddr_t)inner_ip) + i_hlen);
+ if (ntohs(udp->uh_sport) != V_tcp_udp_tunneling_port) {
+ return;
+ }
+ port = udp->uh_dport;
+ th = (struct tcphdr *)(udp + 1);
+ memcpy(&ttemp, th, sizeof(struct tcphdr));
+ memcpy(udp, &ttemp, sizeof(struct tcphdr));
+ /* Now adjust down the size of the outer IP header */
+ o_len -= sizeof(struct udphdr);
+ outer_ip->ip_len = htons(o_len);
+ /* Now call in to the normal handling code */
+ tcp_ctlinput_with_port(cmd, sa, vip, port);
+}
#endif /* INET */
#ifdef INET6
-void
-tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
+static void
+tcp6_ctlinput_with_port(int cmd, struct sockaddr *sa, void *d, uint16_t port)
{
struct in6_addr *dst;
struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
@@ -2661,6 +3044,9 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
!(inp->inp_flags & INP_DROPPED) &&
!(inp->inp_socket == NULL)) {
tp = intotcpcb(inp);
+ if (tp->t_port != port) {
+ goto out;
+ }
if (SEQ_GEQ(ntohl(icmp_tcp_seq), tp->snd_una) &&
SEQ_LT(ntohl(icmp_tcp_seq), tp->snd_max)) {
if (cmd == PRC_MSGSIZE) {
@@ -2710,12 +3096,45 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
inc.inc_lport = t_ports.th_sport;
inc.inc6_faddr = *dst;
inc.inc6_laddr = ip6->ip6_src;
- syncache_unreach(&inc, icmp_tcp_seq);
+ syncache_unreach(&inc, icmp_tcp_seq, port);
}
out:
if (inp != NULL)
INP_WUNLOCK(inp);
}
+
+void
+tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
+{
+ tcp6_ctlinput_with_port(cmd, sa, d, htons(0));
+}
+
+void
+tcp6_ctlinput_viaudp(int cmd, struct sockaddr *sa, void *d, void *unused)
+{
+ struct ip6ctlparam *ip6cp;
+ struct mbuf *m;
+ struct udphdr *udp;
+ uint16_t port;
+
+ ip6cp = (struct ip6ctlparam *)d;
+ m = m_pulldown(ip6cp->ip6c_m, ip6cp->ip6c_off, sizeof(struct udphdr), NULL);
+ if (m == NULL) {
+ return;
+ }
+ udp = mtod(m, struct udphdr *);
+ if (ntohs(udp->uh_sport) != V_tcp_udp_tunneling_port) {
+ return;
+ }
+ port = udp->uh_dport;
+ m_adj(m, sizeof(struct udphdr));
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ ip6cp->ip6c_m->m_pkthdr.len -= sizeof(struct udphdr);
+ }
+ /* Now call in to the normal handling code */
+ tcp6_ctlinput_with_port(cmd, sa, d, port);
+}
+
#endif /* INET6 */
static uint32_t
@@ -3448,11 +3867,13 @@ void
tcp_inptoxtp(const struct inpcb *inp, struct xtcpcb *xt)
{
struct tcpcb *tp = intotcpcb(inp);
+ struct tcptw *tw = intotw(inp);
sbintime_t now;
bzero(xt, sizeof(*xt));
if (inp->inp_flags & INP_TIMEWAIT) {
xt->t_state = TCPS_TIME_WAIT;
+ xt->xt_encaps_port = tw->t_port;
} else {
xt->t_state = tp->t_state;
xt->t_logstate = tp->t_logstate;
@@ -3484,6 +3905,7 @@ tcp_inptoxtp(const struct inpcb *inp, struct xtcpcb *xt)
#undef COPYTIMER
xt->t_rcvtime = 1000 * (ticks - tp->t_rcvtime) / hz;
+ xt->xt_encaps_port = tp->t_port;
bcopy(tp->t_fb->tfb_tcp_block_name, xt->xt_stack,
TCP_FUNCTION_NAME_LEN_MAX);
bcopy(CC_ALGO(tp)->name, xt->xt_cc,
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 4cd8411af8d5..35d9c091ab96 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -96,6 +96,8 @@ __FBSDID("$FreeBSD$");
#ifdef TCP_OFFLOAD
#include <netinet/toecore.h>
#endif
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
#include <netipsec/ipsec_support.h>
@@ -143,14 +145,14 @@ static tcp_seq syncookie_generate(struct syncache_head *, struct syncache *);
static struct syncache
*syncookie_lookup(struct in_conninfo *, struct syncache_head *,
struct syncache *, struct tcphdr *, struct tcpopt *,
- struct socket *);
+ struct socket *, uint16_t);
static void syncache_pause(struct in_conninfo *);
static void syncache_unpause(void *);
static void syncookie_reseed(void *);
#ifdef INVARIANTS
static int syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
- struct socket *lso);
+ struct socket *lso, uint16_t port);
#endif
/*
@@ -610,7 +612,8 @@ syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp)
* If required send a challenge ACK.
*/
void
-syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th, struct mbuf *m)
+syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th, struct mbuf *m,
+ uint16_t port)
{
struct syncache *sc;
struct syncache_head *sch;
@@ -650,6 +653,16 @@ syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th, struct mbuf *m)
goto done;
}
+ /* The remote UDP encaps port does not match. */
+ if (sc->sc_port != port) {
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Spurious RST with matching "
+ "syncache entry but non-matching UDP encaps port, "
+ "segment ignored\n", s, __func__);
+ TCPSTAT_INC(tcps_badrst);
+ goto done;
+ }
+
/*
* If the RST bit is set, check the sequence number to see
* if this is a valid reset segment.
@@ -716,7 +729,7 @@ done:
}
void
-syncache_badack(struct in_conninfo *inc)
+syncache_badack(struct in_conninfo *inc, uint16_t port)
{
struct syncache *sc;
struct syncache_head *sch;
@@ -725,7 +738,7 @@ syncache_badack(struct in_conninfo *inc)
return;
sc = syncache_lookup(inc, &sch); /* returns locked sch */
SCH_LOCK_ASSERT(sch);
- if (sc != NULL) {
+ if ((sc != NULL) && (sc->sc_port == port)) {
syncache_drop(sc, sch);
TCPSTAT_INC(tcps_sc_badack);
}
@@ -733,7 +746,7 @@ syncache_badack(struct in_conninfo *inc)
}
void
-syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq)
+syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq, uint16_t port)
{
struct syncache *sc;
struct syncache_head *sch;
@@ -745,6 +758,10 @@ syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq)
if (sc == NULL)
goto done;
+ /* If the port != sc_port, then it's a bogus ICMP msg */
+ if (port != sc->sc_port)
+ goto done;
+
/* If the sequence number != sc_iss, then it's a bogus ICMP msg */
if (ntohl(th_seq) != sc->sc_iss)
goto done;
@@ -951,6 +968,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
tcp_state_change(tp, TCPS_SYN_RECEIVED);
tp->iss = sc->sc_iss;
tp->irs = sc->sc_irs;
+ tp->t_port = sc->sc_port;
tcp_rcvseqinit(tp);
tcp_sendseqinit(tp);
blk = sototcpcb(lso)->t_fb;
@@ -1071,7 +1089,7 @@ abort2:
*/
int
syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
- struct socket **lsop, struct mbuf *m)
+ struct socket **lsop, struct mbuf *m, uint16_t port)
{
struct syncache *sc;
struct syncache_head *sch;
@@ -1099,7 +1117,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
* values with the reconstructed values from the cookie.
*/
if (sc != NULL)
- syncookie_cmp(inc, sch, sc, th, to, *lsop);
+ syncookie_cmp(inc, sch, sc, th, to, *lsop, port);
#endif
if (sc == NULL) {
@@ -1133,7 +1151,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
goto failed;
}
bzero(&scs, sizeof(scs));
- sc = syncookie_lookup(inc, sch, &scs, th, to, *lsop);
+ sc = syncookie_lookup(inc, sch, &scs, th, to, *lsop, port);
if (locked)
SCH_UNLOCK(sch);
if (sc == NULL) {
@@ -1160,6 +1178,10 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
}
#endif /* TCP_SIGNATURE */
} else {
+ if (sc->sc_port != port) {
+ SCH_UNLOCK(sch);
+ return (0);
+ }
#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
/*
* If listening socket requested TCP digests, check that
@@ -1380,7 +1402,7 @@ syncache_tfo_expand(struct syncache *sc, struct socket *lso, struct mbuf *m,
struct socket *
syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
struct inpcb *inp, struct socket *so, struct mbuf *m, void *tod,
- void *todctx, uint8_t iptos)
+ void *todctx, uint8_t iptos, uint16_t port)
{
struct tcpcb *tp;
struct socket *rv = NULL;
@@ -1640,6 +1662,7 @@ skip_alloc:
sc->sc_label = maclabel;
#endif
sc->sc_cred = cred;
+ sc->sc_port = port;
cred = NULL;
sc->sc_ipopts = ipopts;
bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
@@ -1797,8 +1820,9 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
struct ip *ip = NULL;
struct mbuf *m;
struct tcphdr *th = NULL;
+ struct udphdr *udp = NULL;
int optlen, error = 0; /* Make compiler happy */
- u_int16_t hlen, tlen, mssopt;
+ u_int16_t hlen, tlen, mssopt, ulen;
struct tcpopt to;
#ifdef INET6
struct ip6_hdr *ip6 = NULL;
@@ -1812,9 +1836,14 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
#endif
sizeof(struct ip);
tlen = hlen + sizeof(struct tcphdr);
-
+ if (sc->sc_port) {
+ tlen += sizeof(struct udphdr);
+ }
/* Determine MSS we advertize to other end of connection. */
- mssopt = max(tcp_mssopt(&sc->sc_inc), V_tcp_minmss);
+ mssopt = tcp_mssopt(&sc->sc_inc);
+ if (sc->sc_port)
+ mssopt -= V_tcp_udp_tunneling_overhead;
+ mssopt = max(mssopt, V_tcp_minmss);
/* XXX: Assume that the entire packet will fit in a header mbuf. */
KASSERT(max_linkhdr + tlen + TCP_MAXOLEN <= MHLEN,
@@ -1836,7 +1865,6 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
if (sc->sc_inc.inc_flags & INC_ISIPV6) {
ip6 = mtod(m, struct ip6_hdr *);
ip6->ip6_vfc = IPV6_VERSION;
- ip6->ip6_nxt = IPPROTO_TCP;
ip6->ip6_src = sc->sc_inc.inc6_laddr;
ip6->ip6_dst = sc->sc_inc.inc6_faddr;
ip6->ip6_plen = htons(tlen - hlen);
@@ -1844,9 +1872,18 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
/* Zero out traffic class and flow label. */
ip6->ip6_flow &= ~IPV6_FLOWINFO_MASK;
ip6->ip6_flow |= sc->sc_flowlabel;
+ if (sc->sc_port != 0) {
+ ip6->ip6_nxt = IPPROTO_UDP;
+ udp = (struct udphdr *)(ip6 + 1);
+ udp->uh_sport = htons(V_tcp_udp_tunneling_port);
+ udp->uh_dport = sc->sc_port;
+ ulen = (tlen - sizeof(struct ip6_hdr));
+ th = (struct tcphdr *)(udp + 1);
+ } else {
+ ip6->ip6_nxt = IPPROTO_TCP;
+ th = (struct tcphdr *)(ip6 + 1);
+ }
ip6->ip6_flow |= htonl(sc->sc_ip_tos << 20);
-
- th = (struct tcphdr *)(ip6 + 1);
}
#endif
#if defined(INET6) && defined(INET)
@@ -1861,7 +1898,6 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
ip->ip_id = 0;
ip->ip_off = 0;
ip->ip_sum = 0;
- ip->ip_p = IPPROTO_TCP;
ip->ip_src = sc->sc_inc.inc_laddr;
ip->ip_dst = sc->sc_inc.inc_faddr;
ip->ip_ttl = sc->sc_ip_ttl;
@@ -1876,8 +1912,17 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
*/
if (V_path_mtu_discovery && ((sc->sc_flags & SCF_UNREACH) == 0))
ip->ip_off |= htons(IP_DF);
-
- th = (struct tcphdr *)(ip + 1);
+ if (sc->sc_port == 0) {
+ ip->ip_p = IPPROTO_TCP;
+ th = (struct tcphdr *)(ip + 1);
+ } else {
+ ip->ip_p = IPPROTO_UDP;
+ udp = (struct udphdr *)(ip + 1);
+ udp->uh_sport = htons(V_tcp_udp_tunneling_port);
+ udp->uh_dport = sc->sc_port;
+ ulen = (tlen - sizeof(struct ip));
+ th = (struct tcphdr *)(udp + 1);
+ }
}
#endif /* INET */
th->th_sport = sc->sc_inc.inc_lport;
@@ -1957,8 +2002,11 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
} else
optlen = 0;
+ if (udp) {
+ ulen += optlen;
+ udp->uh_ulen = htons(ulen);
+ }
M_SETFIB(m, sc->sc_inc.inc_fibnum);
- m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
/*
* If we have peer's SYN and it has a flowid, then let's assign it to
* our SYN|ACK. ip6_output() and ip_output() will not assign flowid
@@ -1970,9 +2018,18 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
}
#ifdef INET6
if (sc->sc_inc.inc_flags & INC_ISIPV6) {
- m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
- th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen,
- IPPROTO_TCP, 0);
+ if (sc->sc_port) {
+ m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ udp->uh_sum = in6_cksum_pseudo(ip6, ulen,
+ IPPROTO_UDP, 0);
+ th->th_sum = htons(0);
+ } else {
+ m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+ th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen,
+ IPPROTO_TCP, 0);
+ }
ip6->ip6_hlim = sc->sc_ip_ttl;
#ifdef TCP_OFFLOAD
if (ADDED_BY_TOE(sc)) {
@@ -1992,9 +2049,18 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
#endif
#ifdef INET
{
- m->m_pkthdr.csum_flags = CSUM_TCP;
- th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
- htons(tlen + optlen - hlen + IPPROTO_TCP));
+ if (sc->sc_port) {
+ m->m_pkthdr.csum_flags = CSUM_UDP;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ udp->uh_sum = in_pseudo(ip->ip_src.s_addr,
+ ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP));
+ th->th_sum = htons(0);
+ } else {
+ m->m_pkthdr.csum_flags = CSUM_TCP;
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+ th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+ htons(tlen + optlen - hlen + IPPROTO_TCP));
+ }
#ifdef TCP_OFFLOAD
if (ADDED_BY_TOE(sc)) {
struct toedev *tod = sc->sc_tod;
@@ -2224,7 +2290,7 @@ syncookie_generate(struct syncache_head *sch, struct syncache *sc)
static struct syncache *
syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch,
struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
- struct socket *lso)
+ struct socket *lso, uint16_t port)
{
uint32_t hash;
uint8_t *secbits;
@@ -2310,6 +2376,8 @@ syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch,
sc->sc_rxmits = 0;
+ sc->sc_port = port;
+
TCPSTAT_INC(tcps_sc_recvcookie);
return (sc);
}
@@ -2318,13 +2386,13 @@ syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch,
static int
syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
- struct socket *lso)
+ struct socket *lso, uint16_t port)
{
struct syncache scs, *scx;
char *s;
bzero(&scs, sizeof(scs));
- scx = syncookie_lookup(inc, sch, &scs, th, to, lso);
+ scx = syncookie_lookup(inc, sch, &scs, th, to, lso, port);
if ((s = tcp_log_addrs(inc, th, NULL, NULL)) == NULL)
return (0);
@@ -2510,6 +2578,7 @@ syncache_pcblist(struct sysctl_req *req)
xt.xt_inp.inp_vflag = INP_IPV6;
else
xt.xt_inp.inp_vflag = INP_IPV4;
+ xt.xt_encaps_port = sc->sc_port;
bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc,
sizeof (struct in_conninfo));
error = SYSCTL_OUT(req, &xt, sizeof xt);
diff --git a/sys/netinet/tcp_syncache.h b/sys/netinet/tcp_syncache.h
index 03e34a89c112..a16a80c483d5 100644
--- a/sys/netinet/tcp_syncache.h
+++ b/sys/netinet/tcp_syncache.h
@@ -40,14 +40,15 @@ void syncache_init(void);
#ifdef VIMAGE
void syncache_destroy(void);
#endif
-void syncache_unreach(struct in_conninfo *, tcp_seq);
+void syncache_unreach(struct in_conninfo *, tcp_seq, uint16_t);
int syncache_expand(struct in_conninfo *, struct tcpopt *,
- struct tcphdr *, struct socket **, struct mbuf *);
+ struct tcphdr *, struct socket **, struct mbuf *, uint16_t);
struct socket * syncache_add(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct inpcb *, struct socket *, struct mbuf *,
- void *, void *, uint8_t);
-void syncache_chkrst(struct in_conninfo *, struct tcphdr *, struct mbuf *);
-void syncache_badack(struct in_conninfo *);
+ void *, void *, uint8_t, uint16_t);
+void syncache_chkrst(struct in_conninfo *, struct tcphdr *, struct mbuf *,
+ uint16_t);
+void syncache_badack(struct in_conninfo *, uint16_t);
int syncache_pcblist(struct sysctl_req *);
struct syncache {
@@ -55,6 +56,7 @@ struct syncache {
struct in_conninfo sc_inc; /* addresses */
int sc_rxttime; /* retransmit time */
u_int16_t sc_rxmits; /* retransmit counter */
+ u_int16_t sc_port; /* remote UDP encaps port */
u_int32_t sc_tsreflect; /* timestamp to reflect */
u_int32_t sc_tsoff; /* ts offset w/ syncookies */
u_int32_t sc_flowlabel; /* IPv6 flowlabel */
diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c
index f98927b196fc..b62386ddca05 100644
--- a/sys/netinet/tcp_timewait.c
+++ b/sys/netinet/tcp_timewait.c
@@ -93,6 +93,8 @@ __FBSDID("$FreeBSD$");
#include <netinet6/ip6protosw.h>
#endif
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
#include <machine/in_cksum.h>
#include <security/mac/mac_framework.h>
@@ -318,6 +320,7 @@ tcp_twstart(struct tcpcb *tp)
}
tw->snd_nxt = tp->snd_nxt;
+ tw->t_port = tp->t_port;
tw->rcv_nxt = tp->rcv_nxt;
tw->iss = tp->iss;
tw->irs = tp->irs;
@@ -436,6 +439,7 @@ tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th,
* while in TIME_WAIT, drop the old connection
* and start over if the sequence numbers
* are above the previous ones.
+ * Allow UDP port number changes in this case.
*/
if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tw->rcv_nxt)) {
tcp_twclose(tw, 0);
@@ -443,6 +447,25 @@ tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th,
}
/*
+ * Send RST if UDP port numbers don't match
+ */
+ if (tw->t_port != m->m_pkthdr.tcp_tun_port) {
+ if (th->th_flags & TH_ACK) {
+ tcp_respond(NULL, mtod(m, void *), th, m,
+ (tcp_seq)0, th->th_ack, TH_RST);
+ } else {
+ if (th->th_flags & TH_SYN)
+ tlen++;
+ if (th->th_flags & TH_FIN)
+ tlen++;
+ tcp_respond(NULL, mtod(m, void *), th, m,
+ th->th_seq+tlen, (tcp_seq)0, TH_RST|TH_ACK);
+ }
+ INP_WUNLOCK(inp);
+ return (0);
+ }
+
+ /*
* Drop the segment if it does not contain an ACK.
*/
if ((thflags & TH_ACK) == 0)
@@ -555,13 +578,14 @@ tcp_twrespond(struct tcptw *tw, int flags)
#ifdef INET
struct ip *ip = NULL;
#endif
- u_int hdrlen, optlen;
+ u_int hdrlen, optlen, ulen;
int error = 0; /* Keep compiler happy */
struct tcpopt to;
#ifdef INET6
struct ip6_hdr *ip6 = NULL;
int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
#endif
+ struct udphdr *udp = NULL;
hdrlen = 0; /* Keep compiler happy */
INP_WLOCK_ASSERT(inp);
@@ -579,8 +603,16 @@ tcp_twrespond(struct tcptw *tw, int flags)
if (isipv6) {
hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
ip6 = mtod(m, struct ip6_hdr *);
- th = (struct tcphdr *)(ip6 + 1);
- tcpip_fillheaders(inp, ip6, th);
+ if (tw->t_port) {
+ udp = (struct udphdr *)(ip6 + 1);
+ hdrlen += sizeof(struct udphdr);
+ udp->uh_sport = htons(V_tcp_udp_tunneling_port);
+ udp->uh_dport = tw->t_port;
+ ulen = (hdrlen - sizeof(struct ip6_hdr));
+ th = (struct tcphdr *)(udp + 1);
+ } else
+ th = (struct tcphdr *)(ip6 + 1);
+ tcpip_fillheaders(inp, tw->t_port, ip6, th);
}
#endif
#if defined(INET6) && defined(INET)
@@ -590,8 +622,16 @@ tcp_twrespond(struct tcptw *tw, int flags)
{
hdrlen = sizeof(struct tcpiphdr);
ip = mtod(m, struct ip *);
- th = (struct tcphdr *)(ip + 1);
- tcpip_fillheaders(inp, ip, th);
+ if (tw->t_port) {
+ udp = (struct udphdr *)(ip + 1);
+ hdrlen += sizeof(struct udphdr);
+ udp->uh_sport = htons(V_tcp_udp_tunneling_port);
+ udp->uh_dport = tw->t_port;
+ ulen = (hdrlen - sizeof(struct ip));
+ th = (struct tcphdr *)(udp + 1);
+ } else
+ th = (struct tcphdr *)(ip + 1);
+ tcpip_fillheaders(inp, tw->t_port, ip, th);
}
#endif
to.to_flags = 0;
@@ -607,6 +647,10 @@ tcp_twrespond(struct tcptw *tw, int flags)
}
optlen = tcp_addoptions(&to, (u_char *)(th + 1));
+ if (udp) {
+ ulen += optlen;
+ udp->uh_ulen = htons(ulen);
+ }
m->m_len = hdrlen + optlen;
m->m_pkthdr.len = m->m_len;
@@ -618,12 +662,19 @@ tcp_twrespond(struct tcptw *tw, int flags)
th->th_flags = flags;
th->th_win = htons(tw->last_win);
- m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#ifdef INET6
if (isipv6) {
- m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
- th->th_sum = in6_cksum_pseudo(ip6,
- sizeof(struct tcphdr) + optlen, IPPROTO_TCP, 0);
+ if (tw->t_port) {
+ m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ udp->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 0);
+ th->th_sum = htons(0);
+ } else {
+ m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+ th->th_sum = in6_cksum_pseudo(ip6,
+ sizeof(struct tcphdr) + optlen, IPPROTO_TCP, 0);
+ }
ip6->ip6_hlim = in6_selecthlim(inp, NULL);
TCP_PROBE5(send, NULL, NULL, ip6, NULL, th);
error = ip6_output(m, inp->in6p_outputopts, NULL,
@@ -635,9 +686,18 @@ tcp_twrespond(struct tcptw *tw, int flags)
#endif
#ifdef INET
{
- m->m_pkthdr.csum_flags = CSUM_TCP;
- th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
- htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
+ if (tw->t_port) {
+ m->m_pkthdr.csum_flags = CSUM_UDP;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ udp->uh_sum = in_pseudo(ip->ip_src.s_addr,
+ ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP));
+ th->th_sum = htons(0);
+ } else {
+ m->m_pkthdr.csum_flags = CSUM_TCP;
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+ th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+ htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
+ }
ip->ip_len = htons(m->m_pkthdr.len);
if (V_path_mtu_discovery)
ip->ip_off |= htons(IP_DF);
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 31b580bfafcc..c4cfb5ea199f 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -2049,6 +2049,31 @@ unlock_and_done:
}
goto unlock_and_done;
+ case TCP_REMOTE_UDP_ENCAPS_PORT:
+ INP_WUNLOCK(inp);
+ error = sooptcopyin(sopt, &optval, sizeof optval,
+ sizeof optval);
+ if (error)
+ return (error);
+ if ((optval < TCP_TUNNELING_PORT_MIN) ||
+ (optval > TCP_TUNNELING_PORT_MAX)) {
+ /* Its got to be in range */
+ return (EINVAL);
+ }
+ if ((V_tcp_udp_tunneling_port == 0) && (optval != 0)) {
+ /* You have to have enabled a UDP tunneling port first */
+ return (EINVAL);
+ }
+ INP_WLOCK_RECHECK(inp);
+ if (tp->t_state != TCPS_CLOSED) {
+ /* You can't change after you are connected */
+ error = EINVAL;
+ } else {
+ /* Ok we are all good set the port */
+ tp->t_port = htons(optval);
+ }
+ goto unlock_and_done;
+
case TCP_MAXSEG:
INP_WUNLOCK(inp);
error = sooptcopyin(sopt, &optval, sizeof optval,
@@ -2388,6 +2413,11 @@ unlock_and_done:
INP_WUNLOCK(inp);
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
+ case TCP_REMOTE_UDP_ENCAPS_PORT:
+ optval = ntohs(tp->t_port);
+ INP_WUNLOCK(inp);
+ error = sooptcopyout(sopt, &optval, sizeof optval);
+ break;
case TCP_NOOPT:
optval = tp->t_flags & TF_NOOPT;
INP_WUNLOCK(inp);
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 4d28cab80d89..dfd2f239d007 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -282,6 +282,16 @@ struct tcptemp {
struct tcphdr tt_t;
};
+/* Enable TCP/UDP tunneling port */
+#define TCP_TUNNELING_PORT_MIN 0
+#define TCP_TUNNELING_PORT_MAX 65535
+#define TCP_TUNNELING_PORT_DEFAULT 0
+
+/* Enable TCP/UDP tunneling port */
+#define TCP_TUNNELING_OVERHEAD_MIN sizeof(struct udphdr)
+#define TCP_TUNNELING_OVERHEAD_MAX 1024
+#define TCP_TUNNELING_OVERHEAD_DEFAULT TCP_TUNNELING_OVERHEAD_MIN
+
/* Minimum map entries limit value, if set */
#define TCP_MIN_MAP_ENTRIES_LIMIT 128
@@ -502,6 +512,8 @@ struct in_conninfo;
struct tcptw {
struct inpcb *tw_inpcb; /* XXX back pointer to internet pcb */
+ uint32_t t_port:16, /* UDP port number if TCPoUDP */
+ t_unused:16;
tcp_seq snd_nxt;
tcp_seq rcv_nxt;
tcp_seq iss;
@@ -678,7 +690,10 @@ struct tcpstat {
uint64_t tcps_pmtud_blackhole_activated_min_mss; /* BH at min MSS Count */
uint64_t tcps_pmtud_blackhole_failed; /* Black Hole Failure Count */
- uint64_t _pad[12]; /* 6 UTO, 6 TBD */
+ uint64_t tcps_tunneled_pkts; /* Packets encap's in UDP received */
+ uint64_t tcps_tunneled_errs; /* Packets that had errors that were UDP encaped */
+
+ uint64_t _pad[10]; /* 6 UTO, 6 TBD */
};
#define tcps_rcvmemdrop tcps_rcvreassfull /* compat */
@@ -776,7 +791,9 @@ struct xtcpcb {
uint32_t t_rcv_wnd; /* (s) */
uint32_t t_snd_wnd; /* (s) */
uint32_t xt_ecn; /* (s) */
- int32_t spare32[26];
+ uint16_t xt_encaps_port; /* (s) */
+ int16_t spare16;
+ int32_t spare32[25];
} __aligned(8);
#ifdef _KERNEL
@@ -867,6 +884,8 @@ VNET_DECLARE(int, tcp_sack_globalmaxholes);
VNET_DECLARE(int, tcp_sack_maxholes);
VNET_DECLARE(int, tcp_sc_rst_sock_fail);
VNET_DECLARE(int, tcp_sendspace);
+VNET_DECLARE(int, tcp_udp_tunneling_overhead);
+VNET_DECLARE(int, tcp_udp_tunneling_port);
VNET_DECLARE(struct inpcbhead, tcb);
VNET_DECLARE(struct inpcbinfo, tcbinfo);
@@ -929,6 +948,7 @@ void tcp_twstart(struct tcpcb *);
void tcp_twclose(struct tcptw *, int);
void tcp_ctlinput(int, struct sockaddr *, void *);
int tcp_ctloutput(struct socket *, struct sockopt *);
+void tcp_ctlinput_viaudp(int, struct sockaddr *, void *, void *);
struct tcpcb *
tcp_drop(struct tcpcb *, int);
void tcp_drain(void);
@@ -963,6 +983,7 @@ void hhook_run_tcp_est_in(struct tcpcb *tp,
int tcp_input(struct mbuf **, int *, int);
int tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *,
struct tcpcb *, int);
+int tcp_input_with_port(struct mbuf **, int *, int, uint16_t);
void tcp_handle_wakeup(struct tcpcb *, struct socket *);
void tcp_do_segment(struct mbuf *, struct tcphdr *,
struct socket *, struct tcpcb *, int, int, uint8_t);
@@ -1033,7 +1054,7 @@ void tcp_setpersist(struct tcpcb *);
void tcp_slowtimo(void);
struct tcptemp *
tcpip_maketemplate(struct inpcb *);
-void tcpip_fillheaders(struct inpcb *, void *, void *);
+void tcpip_fillheaders(struct inpcb *, uint16_t, void *, void *);
void tcp_timer_activate(struct tcpcb *, uint32_t, u_int);
int tcp_timer_suspend(struct tcpcb *, uint32_t);
void tcp_timers_unsuspend(struct tcpcb *, uint32_t);
diff --git a/sys/netinet/toecore.c b/sys/netinet/toecore.c
index 480aa64c1bf7..d8d499a6fde3 100644
--- a/sys/netinet/toecore.c
+++ b/sys/netinet/toecore.c
@@ -352,7 +352,7 @@ toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
INP_RLOCK_ASSERT(inp);
(void )syncache_add(inc, to, th, inp, inp->inp_socket, NULL, tod,
- todctx, iptos);
+ todctx, iptos, htons(0));
}
int
@@ -362,7 +362,7 @@ toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to,
NET_EPOCH_ASSERT();
- return (syncache_expand(inc, to, th, lsop, NULL));
+ return (syncache_expand(inc, to, th, lsop, NULL, htons(0)));
}
/*
diff --git a/sys/netinet6/tcp6_var.h b/sys/netinet6/tcp6_var.h
index 7c758fbd3479..2e411963676e 100644
--- a/sys/netinet6/tcp6_var.h
+++ b/sys/netinet6/tcp6_var.h
@@ -74,8 +74,10 @@ VNET_DECLARE(int, tcp_v6mssdflt); /* XXX */
struct ip6_hdr;
void tcp6_ctlinput(int, struct sockaddr *, void *);
+void tcp6_ctlinput_viaudp(int, struct sockaddr *, void *, void *);
void tcp6_init(void);
int tcp6_input(struct mbuf **, int *, int);
+int tcp6_input_with_port(struct mbuf **, int *, int, uint16_t);
extern struct pr_usrreqs tcp6_usrreqs;
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
index 371ae8feae46..dd92103cb1fd 100644
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@@ -198,6 +198,7 @@ struct pkthdr {
} PH_loc;
};
#define ether_vtag PH_per.sixteen[0]
+#define tcp_tun_port PH_per.sixteen[0] /* outbound */
#define PH_vt PH_per
#define vt_nrecs sixteen[0] /* mld and v6-ND */
#define tso_segsz PH_per.sixteen[1] /* inbound after LRO */
diff --git a/usr.bin/netstat/inet.c b/usr.bin/netstat/inet.c
index 95b0d8931b26..49478c4a9247 100644
--- a/usr.bin/netstat/inet.c
+++ b/usr.bin/netstat/inet.c
@@ -664,6 +664,10 @@ tcp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
"{N:(for} {:received-ack-bytes/%ju} {N:/byte%s})\n");
p(tcps_rcvdupack, "\t\t{:received-duplicate-acks/%ju} "
"{N:/duplicate ack%s}\n");
+ p(tcps_tunneled_pkts, "\t\t{:received-udp-tunneled-pkts/%ju} "
+ "{N:/UDP tunneled pkt%s}\n");
+ p(tcps_tunneled_errs, "\t\t{:received-bad-udp-tunneled-pkts/%ju} "
+ "{N:/UDP tunneled pkt cnt with error%s}\n");
p(tcps_rcvacktoomuch, "\t\t{:received-acks-for-unsent-data/%ju} "
"{N:/ack%s for unsent data}\n");
p2(tcps_rcvpack, tcps_rcvbyte, "\t\t"
diff --git a/usr.bin/sockstat/sockstat.1 b/usr.bin/sockstat/sockstat.1
index 8521c50348c9..f602ad467f9f 100644
--- a/usr.bin/sockstat/sockstat.1
+++ b/usr.bin/sockstat/sockstat.1
@@ -27,7 +27,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd December 30, 2020
+.Dd March 28, 2021
.Dt SOCKSTAT 1
.Os
.Sh NAME
@@ -98,7 +98,7 @@ Display the protocol state, if applicable.
This is currently only implemented for SCTP and TCP.
.It Fl U
Display the remote UDP encapsulation port number, if applicable.
-This is currently only implemented for SCTP.
+This is currently only implemented for SCTP and TCP.
.It Fl u
Show
.Dv AF_LOCAL
@@ -163,7 +163,7 @@ The address the foreign end of the socket is bound to (see
.It Li ENCAPS
The remote UDP encapsulation port number if
.Fl U
-is specified (only for SCTP).
+is specified (only for SCTP or TCP).
.It Li PATH STATE
The path state if
.Fl s
diff --git a/usr.bin/sockstat/sockstat.c b/usr.bin/sockstat/sockstat.c
index 26f31d96b8e0..109b254b7438 100644
--- a/usr.bin/sockstat/sockstat.c
+++ b/usr.bin/sockstat/sockstat.c
@@ -710,6 +710,8 @@ gather_inet(int proto)
sockaddr(&faddr->address, sock->family,
&xip->in6p_faddr, xip->inp_fport);
}
+ if (proto == IPPROTO_TCP)
+ faddr->encaps_port = xtp->xt_encaps_port;
laddr->next = NULL;
faddr->next = NULL;
sock->laddr = laddr;
@@ -1087,10 +1089,13 @@ displaysock(struct sock *s, int pos)
}
if (opt_U) {
if (faddr != NULL &&
- s->proto == IPPROTO_SCTP &&
- s->state != SCTP_CLOSED &&
- s->state != SCTP_BOUND &&
- s->state != SCTP_LISTEN) {
+ ((s->proto == IPPROTO_SCTP &&
+ s->state != SCTP_CLOSED &&
+ s->state != SCTP_BOUND &&
+ s->state != SCTP_LISTEN) ||
+ (s->proto == IPPROTO_TCP &&
+ s->state != TCPS_CLOSED &&
+ s->state != TCPS_LISTEN))) {
while (pos < offset)
pos += xprintf(" ");
pos += xprintf("%u",