aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/netmap/netmap_offloadings.c
diff options
context:
space:
mode:
authorLuigi Rizzo <luigi@FreeBSD.org>2016-10-16 14:13:32 +0000
committerLuigi Rizzo <luigi@FreeBSD.org>2016-10-16 14:13:32 +0000
commit37e3a6d349581b4dd0aebf24be7b1b159a698dcf (patch)
tree0e61deea141c9733af511b0485cf1fd0f2dd17ed /sys/dev/netmap/netmap_offloadings.c
parent63f6b1a75a8e6e33e4f9d65571c6a221444d3b05 (diff)
Notes
Diffstat (limited to 'sys/dev/netmap/netmap_offloadings.c')
-rw-r--r--sys/dev/netmap/netmap_offloadings.c260
1 files changed, 174 insertions, 86 deletions
diff --git a/sys/dev/netmap/netmap_offloadings.c b/sys/dev/netmap/netmap_offloadings.c
index dadc1dcbc14c..f8da672ffa53 100644
--- a/sys/dev/netmap/netmap_offloadings.c
+++ b/sys/dev/netmap/netmap_offloadings.c
@@ -1,5 +1,6 @@
/*
- * Copyright (C) 2014 Vincenzo Maffione. All rights reserved.
+ * Copyright (C) 2014-2015 Vincenzo Maffione
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -31,9 +32,9 @@
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/param.h> /* defines used in kernel.h */
-#include <sys/malloc.h> /* types used in module initialization */
#include <sys/kernel.h> /* types used in module initialization */
#include <sys/sockio.h>
+#include <sys/malloc.h>
#include <sys/socketvar.h> /* struct socket */
#include <sys/socket.h> /* sockaddrs */
#include <net/if.h>
@@ -64,21 +65,21 @@
/* This routine is called by bdg_mismatch_datapath() when it finishes
* accumulating bytes for a segment, in order to fix some fields in the
* segment headers (which still contain the same content as the header
- * of the original GSO packet). 'buf' points to the beginning (e.g.
- * the ethernet header) of the segment, and 'len' is its length.
+ * of the original GSO packet). 'pkt' points to the beginning of the IP
+ * header of the segment, while 'len' is the length of the IP packet.
*/
-static void gso_fix_segment(uint8_t *buf, size_t len, u_int idx,
- u_int segmented_bytes, u_int last_segment,
- u_int tcp, u_int iphlen)
+static void
+gso_fix_segment(uint8_t *pkt, size_t len, u_int ipv4, u_int iphlen, u_int tcp,
+ u_int idx, u_int segmented_bytes, u_int last_segment)
{
- struct nm_iphdr *iph = (struct nm_iphdr *)(buf + 14);
- struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(buf + 14);
+ struct nm_iphdr *iph = (struct nm_iphdr *)(pkt);
+ struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(pkt);
uint16_t *check = NULL;
uint8_t *check_data = NULL;
- if (iphlen == 20) {
+ if (ipv4) {
/* Set the IPv4 "Total Length" field. */
- iph->tot_len = htobe16(len-14);
+ iph->tot_len = htobe16(len);
ND("ip total length %u", be16toh(ip->tot_len));
/* Set the IPv4 "Identification" field. */
@@ -87,15 +88,15 @@ static void gso_fix_segment(uint8_t *buf, size_t len, u_int idx,
/* Compute and insert the IPv4 header checksum. */
iph->check = 0;
- iph->check = nm_csum_ipv4(iph);
+ iph->check = nm_os_csum_ipv4(iph);
ND("IP csum %x", be16toh(iph->check));
- } else {/* if (iphlen == 40) */
+ } else {
/* Set the IPv6 "Payload Len" field. */
- ip6h->payload_len = htobe16(len-14-iphlen);
+ ip6h->payload_len = htobe16(len-iphlen);
}
if (tcp) {
- struct nm_tcphdr *tcph = (struct nm_tcphdr *)(buf + 14 + iphlen);
+ struct nm_tcphdr *tcph = (struct nm_tcphdr *)(pkt + iphlen);
/* Set the TCP sequence number. */
tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
@@ -110,10 +111,10 @@ static void gso_fix_segment(uint8_t *buf, size_t len, u_int idx,
check = &tcph->check;
check_data = (uint8_t *)tcph;
} else { /* UDP */
- struct nm_udphdr *udph = (struct nm_udphdr *)(buf + 14 + iphlen);
+ struct nm_udphdr *udph = (struct nm_udphdr *)(pkt + iphlen);
/* Set the UDP 'Length' field. */
- udph->len = htobe16(len-14-iphlen);
+ udph->len = htobe16(len-iphlen);
check = &udph->check;
check_data = (uint8_t *)udph;
@@ -121,48 +122,80 @@ static void gso_fix_segment(uint8_t *buf, size_t len, u_int idx,
/* Compute and insert TCP/UDP checksum. */
*check = 0;
- if (iphlen == 20)
- nm_csum_tcpudp_ipv4(iph, check_data, len-14-iphlen, check);
+ if (ipv4)
+ nm_os_csum_tcpudp_ipv4(iph, check_data, len-iphlen, check);
else
- nm_csum_tcpudp_ipv6(ip6h, check_data, len-14-iphlen, check);
+ nm_os_csum_tcpudp_ipv6(ip6h, check_data, len-iphlen, check);
ND("TCP/UDP csum %x", be16toh(*check));
}
+static int
+vnet_hdr_is_bad(struct nm_vnet_hdr *vh)
+{
+ uint8_t gso_type = vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
+
+ return (
+ (gso_type != VIRTIO_NET_HDR_GSO_NONE &&
+ gso_type != VIRTIO_NET_HDR_GSO_TCPV4 &&
+ gso_type != VIRTIO_NET_HDR_GSO_UDP &&
+ gso_type != VIRTIO_NET_HDR_GSO_TCPV6)
+ ||
+ (vh->flags & ~(VIRTIO_NET_HDR_F_NEEDS_CSUM
+ | VIRTIO_NET_HDR_F_DATA_VALID))
+ );
+}
/* The VALE mismatch datapath implementation. */
-void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
- struct netmap_vp_adapter *dst_na,
- struct nm_bdg_fwd *ft_p, struct netmap_ring *ring,
- u_int *j, u_int lim, u_int *howmany)
+void
+bdg_mismatch_datapath(struct netmap_vp_adapter *na,
+ struct netmap_vp_adapter *dst_na,
+ const struct nm_bdg_fwd *ft_p,
+ struct netmap_ring *dst_ring,
+ u_int *j, u_int lim, u_int *howmany)
{
- struct netmap_slot *slot = NULL;
+ struct netmap_slot *dst_slot = NULL;
struct nm_vnet_hdr *vh = NULL;
- /* Number of source slots to process. */
- u_int frags = ft_p->ft_frags;
- struct nm_bdg_fwd *ft_end = ft_p + frags;
+ const struct nm_bdg_fwd *ft_end = ft_p + ft_p->ft_frags;
/* Source and destination pointers. */
uint8_t *dst, *src;
size_t src_len, dst_len;
+ /* Indices and counters for the destination ring. */
u_int j_start = *j;
+ u_int j_cur = j_start;
u_int dst_slots = 0;
- /* If the source port uses the offloadings, while destination doesn't,
- * we grab the source virtio-net header and do the offloadings here.
- */
- if (na->virt_hdr_len && !dst_na->virt_hdr_len) {
- vh = (struct nm_vnet_hdr *)ft_p->ft_buf;
+ if (unlikely(ft_p == ft_end)) {
+ RD(3, "No source slots to process");
+ return;
}
/* Init source and dest pointers. */
src = ft_p->ft_buf;
src_len = ft_p->ft_len;
- slot = &ring->slot[*j];
- dst = NMB(&dst_na->up, slot);
+ dst_slot = &dst_ring->slot[j_cur];
+ dst = NMB(&dst_na->up, dst_slot);
dst_len = src_len;
+ /* If the source port uses the offloadings, while destination doesn't,
+ * we grab the source virtio-net header and do the offloadings here.
+ */
+ if (na->up.virt_hdr_len && !dst_na->up.virt_hdr_len) {
+ vh = (struct nm_vnet_hdr *)src;
+ /* Initial sanity check on the source virtio-net header. If
+ * something seems wrong, just drop the packet. */
+ if (src_len < na->up.virt_hdr_len) {
+ RD(3, "Short src vnet header, dropping");
+ return;
+ }
+ if (vnet_hdr_is_bad(vh)) {
+ RD(3, "Bad src vnet header, dropping");
+ return;
+ }
+ }
+
/* We are processing the first input slot and there is a mismatch
* between source and destination virt_hdr_len (SHL and DHL).
* When the a client is using virtio-net headers, the header length
@@ -185,14 +218,14 @@ void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
* 12 | 0 | doesn't exist
* 12 | 10 | copied from the first 10 bytes of source header
*/
- bzero(dst, dst_na->virt_hdr_len);
- if (na->virt_hdr_len && dst_na->virt_hdr_len)
+ bzero(dst, dst_na->up.virt_hdr_len);
+ if (na->up.virt_hdr_len && dst_na->up.virt_hdr_len)
memcpy(dst, src, sizeof(struct nm_vnet_hdr));
/* Skip the virtio-net headers. */
- src += na->virt_hdr_len;
- src_len -= na->virt_hdr_len;
- dst += dst_na->virt_hdr_len;
- dst_len = dst_na->virt_hdr_len + src_len;
+ src += na->up.virt_hdr_len;
+ src_len -= na->up.virt_hdr_len;
+ dst += dst_na->up.virt_hdr_len;
+ dst_len = dst_na->up.virt_hdr_len + src_len;
/* Here it could be dst_len == 0 (which implies src_len == 0),
* so we avoid passing a zero length fragment.
@@ -214,16 +247,27 @@ void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
u_int gso_idx = 0;
/* Payload data bytes segmented so far (e.g. TCP data bytes). */
u_int segmented_bytes = 0;
+ /* Is this an IPv4 or IPv6 GSO packet? */
+ u_int ipv4 = 0;
/* Length of the IP header (20 if IPv4, 40 if IPv6). */
u_int iphlen = 0;
+ /* Length of the Ethernet header (18 if 802.1q, otherwise 14). */
+ u_int ethhlen = 14;
/* Is this a TCP or an UDP GSO packet? */
u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
== VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;
/* Segment the GSO packet contained into the input slots (frags). */
- while (ft_p != ft_end) {
+ for (;;) {
size_t copy;
+ if (dst_slots >= *howmany) {
+ /* We still have work to do, but we've run out of
+ * dst slots, so we have to drop the packet. */
+ RD(3, "Not enough slots, dropping GSO packet");
+ return;
+ }
+
/* Grab the GSO header if we don't have it. */
if (!gso_hdr) {
uint16_t ethertype;
@@ -231,28 +275,75 @@ void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
gso_hdr = src;
/* Look at the 'Ethertype' field to see if this packet
- * is IPv4 or IPv6.
- */
- ethertype = be16toh(*((uint16_t *)(gso_hdr + 12)));
- if (ethertype == 0x0800)
- iphlen = 20;
- else /* if (ethertype == 0x86DD) */
- iphlen = 40;
+ * is IPv4 or IPv6, taking into account VLAN
+ * encapsulation. */
+ for (;;) {
+ if (src_len < ethhlen) {
+ RD(3, "Short GSO fragment [eth], dropping");
+ return;
+ }
+ ethertype = be16toh(*((uint16_t *)
+ (gso_hdr + ethhlen - 2)));
+ if (ethertype != 0x8100) /* not 802.1q */
+ break;
+ ethhlen += 4;
+ }
+ switch (ethertype) {
+ case 0x0800: /* IPv4 */
+ {
+ struct nm_iphdr *iph = (struct nm_iphdr *)
+ (gso_hdr + ethhlen);
+
+ if (src_len < ethhlen + 20) {
+ RD(3, "Short GSO fragment "
+ "[IPv4], dropping");
+ return;
+ }
+ ipv4 = 1;
+ iphlen = 4 * (iph->version_ihl & 0x0F);
+ break;
+ }
+ case 0x86DD: /* IPv6 */
+ ipv4 = 0;
+ iphlen = 40;
+ break;
+ default:
+ RD(3, "Unsupported ethertype, "
+ "dropping GSO packet");
+ return;
+ }
ND(3, "type=%04x", ethertype);
+ if (src_len < ethhlen + iphlen) {
+ RD(3, "Short GSO fragment [IP], dropping");
+ return;
+ }
+
/* Compute gso_hdr_len. For TCP we need to read the
* content of the 'Data Offset' field.
*/
if (tcp) {
- struct nm_tcphdr *tcph =
- (struct nm_tcphdr *)&gso_hdr[14+iphlen];
+ struct nm_tcphdr *tcph = (struct nm_tcphdr *)
+ (gso_hdr + ethhlen + iphlen);
- gso_hdr_len = 14 + iphlen + 4*(tcph->doff >> 4);
- } else
- gso_hdr_len = 14 + iphlen + 8; /* UDP */
+ if (src_len < ethhlen + iphlen + 20) {
+ RD(3, "Short GSO fragment "
+ "[TCP], dropping");
+ return;
+ }
+ gso_hdr_len = ethhlen + iphlen +
+ 4 * (tcph->doff >> 4);
+ } else {
+ gso_hdr_len = ethhlen + iphlen + 8; /* UDP */
+ }
+
+ if (src_len < gso_hdr_len) {
+ RD(3, "Short GSO fragment [TCP/UDP], dropping");
+ return;
+ }
ND(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
- dst_na->mfs);
+ dst_na->mfs);
/* Advance source pointers. */
src += gso_hdr_len;
@@ -263,7 +354,6 @@ void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
break;
src = ft_p->ft_buf;
src_len = ft_p->ft_len;
- continue;
}
}
@@ -289,25 +379,24 @@ void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
/* After raw segmentation, we must fix some header
* fields and compute checksums, in a protocol dependent
* way. */
- gso_fix_segment(dst, gso_bytes, gso_idx,
- segmented_bytes,
- src_len == 0 && ft_p + 1 == ft_end,
- tcp, iphlen);
+ gso_fix_segment(dst + ethhlen, gso_bytes - ethhlen,
+ ipv4, iphlen, tcp,
+ gso_idx, segmented_bytes,
+ src_len == 0 && ft_p + 1 == ft_end);
ND("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
- slot->len = gso_bytes;
- slot->flags = 0;
- segmented_bytes += gso_bytes - gso_hdr_len;
-
+ dst_slot->len = gso_bytes;
+ dst_slot->flags = 0;
dst_slots++;
-
- /* Next destination slot. */
- *j = nm_next(*j, lim);
- slot = &ring->slot[*j];
- dst = NMB(&dst_na->up, slot);
+ segmented_bytes += gso_bytes - gso_hdr_len;
gso_bytes = 0;
gso_idx++;
+
+ /* Next destination slot. */
+ j_cur = nm_next(j_cur, lim);
+ dst_slot = &dst_ring->slot[j_cur];
+ dst = NMB(&dst_na->up, dst_slot);
}
/* Next input slot. */
@@ -342,10 +431,10 @@ void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
/* Init/update the packet checksum if needed. */
if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
if (!dst_slots)
- csum = nm_csum_raw(src + vh->csum_start,
+ csum = nm_os_csum_raw(src + vh->csum_start,
src_len - vh->csum_start, 0);
else
- csum = nm_csum_raw(src, src_len, csum);
+ csum = nm_os_csum_raw(src, src_len, csum);
}
/* Round to a multiple of 64 */
@@ -359,44 +448,43 @@ void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
} else {
memcpy(dst, src, (int)src_len);
}
- slot->len = dst_len;
-
+ dst_slot->len = dst_len;
dst_slots++;
/* Next destination slot. */
- *j = nm_next(*j, lim);
- slot = &ring->slot[*j];
- dst = NMB(&dst_na->up, slot);
+ j_cur = nm_next(j_cur, lim);
+ dst_slot = &dst_ring->slot[j_cur];
+ dst = NMB(&dst_na->up, dst_slot);
/* Next source slot. */
ft_p++;
src = ft_p->ft_buf;
dst_len = src_len = ft_p->ft_len;
-
}
/* Finalize (fold) the checksum if needed. */
if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
- *check = nm_csum_fold(csum);
+ *check = nm_os_csum_fold(csum);
}
ND(3, "using %u dst_slots", dst_slots);
- /* A second pass on the desitations slots to set the slot flags,
+ /* A second pass on the destination slots to set the slot flags,
* using the right number of destination slots.
*/
- while (j_start != *j) {
- slot = &ring->slot[j_start];
- slot->flags = (dst_slots << 8)| NS_MOREFRAG;
+ while (j_start != j_cur) {
+ dst_slot = &dst_ring->slot[j_start];
+ dst_slot->flags = (dst_slots << 8)| NS_MOREFRAG;
j_start = nm_next(j_start, lim);
}
/* Clear NS_MOREFRAG flag on last entry. */
- slot->flags = (dst_slots << 8);
+ dst_slot->flags = (dst_slots << 8);
}
- /* Update howmany. */
+ /* Update howmany and j. This is to commit the use of
+ * those slots in the destination ring. */
if (unlikely(dst_slots > *howmany)) {
- dst_slots = *howmany;
- D("Slot allocation error: Should never happen");
+ D("Slot allocation error: This is a bug");
}
+ *j = j_cur;
*howmany -= dst_slots;
}