aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/netmap/netmap_offloadings.c
diff options
context:
space:
mode:
authorLuigi Rizzo <luigi@FreeBSD.org>2014-02-15 04:53:04 +0000
committerLuigi Rizzo <luigi@FreeBSD.org>2014-02-15 04:53:04 +0000
commitf0ea3689a9c1c27067145ed902811149e78cc4fa (patch)
tree5f40d56905d46741e85cd83a0278b12363e3e2a7 /sys/dev/netmap/netmap_offloadings.c
parent53bf5ef829d5fd312db3851ce6cb589173b744e1 (diff)
Notes
Diffstat (limited to 'sys/dev/netmap/netmap_offloadings.c')
-rw-r--r--sys/dev/netmap/netmap_offloadings.c401
1 files changed, 401 insertions, 0 deletions
diff --git a/sys/dev/netmap/netmap_offloadings.c b/sys/dev/netmap/netmap_offloadings.c
new file mode 100644
index 0000000000000..a776a24245773
--- /dev/null
+++ b/sys/dev/netmap/netmap_offloadings.c
@@ -0,0 +1,401 @@
+/*
+ * Copyright (C) 2014 Vincenzo Maffione. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* $FreeBSD$ */
+
+#if defined(__FreeBSD__)
+#include <sys/cdefs.h> /* prerequisite */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h> /* defines used in kernel.h */
+#include <sys/kernel.h> /* types used in module initialization */
+#include <sys/sockio.h>
+#include <sys/socketvar.h> /* struct socket */
+#include <sys/socket.h> /* sockaddrs */
+#include <net/if.h>
+#include <net/if_var.h>
+#include <machine/bus.h> /* bus_dmamap_* */
+#include <sys/endian.h>
+
+#elif defined(linux)
+
+#include "bsd_glue.h"
+
+#elif defined(__APPLE__)
+
+#warning OSX support is only partial
+#include "osx_glue.h"
+
+#else
+
+#error Unsupported platform
+
+#endif /* unsupported */
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+
+
+
+/* This routine is called by bdg_mismatch_datapath() when it finishes
+ * accumulating bytes for a segment, in order to fix some fields in the
+ * segment headers (which still contain the same content as the header
+ * of the original GSO packet). 'buf' points to the beginning (e.g.
+ * the ethernet header) of the segment, and 'len' is its length.
+ */
+static void gso_fix_segment(uint8_t *buf, size_t len, u_int idx,
+ u_int segmented_bytes, u_int last_segment,
+ u_int tcp, u_int iphlen)
+{
+ struct nm_iphdr *iph = (struct nm_iphdr *)(buf + 14);
+ struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(buf + 14);
+ uint16_t *check = NULL;
+ uint8_t *check_data = NULL;
+
+ if (iphlen == 20) {
+ /* Set the IPv4 "Total Length" field. */
+ iph->tot_len = htobe16(len-14);
+ ND("ip total length %u", be16toh(ip->tot_len));
+
+ /* Set the IPv4 "Identification" field. */
+ iph->id = htobe16(be16toh(iph->id) + idx);
+ ND("ip identification %u", be16toh(iph->id));
+
+ /* Compute and insert the IPv4 header checksum. */
+ iph->check = 0;
+ iph->check = nm_csum_ipv4(iph);
+ ND("IP csum %x", be16toh(iph->check));
+ } else {/* if (iphlen == 40) */
+ /* Set the IPv6 "Payload Len" field. */
+ ip6h->payload_len = htobe16(len-14-iphlen);
+ }
+
+ if (tcp) {
+ struct nm_tcphdr *tcph = (struct nm_tcphdr *)(buf + 14 + iphlen);
+
+ /* Set the TCP sequence number. */
+ tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
+ ND("tcp seq %u", be32toh(tcph->seq));
+
+ /* Zero the PSH and FIN TCP flags if this is not the last
+ segment. */
+ if (!last_segment)
+ tcph->flags &= ~(0x8 | 0x1);
+ ND("last_segment %u", last_segment);
+
+ check = &tcph->check;
+ check_data = (uint8_t *)tcph;
+ } else { /* UDP */
+ struct nm_udphdr *udph = (struct nm_udphdr *)(buf + 14 + iphlen);
+
+ /* Set the UDP 'Length' field. */
+ udph->len = htobe16(len-14-iphlen);
+
+ check = &udph->check;
+ check_data = (uint8_t *)udph;
+ }
+
+ /* Compute and insert TCP/UDP checksum. */
+ *check = 0;
+ if (iphlen == 20)
+ nm_csum_tcpudp_ipv4(iph, check_data, len-14-iphlen, check);
+ else
+ nm_csum_tcpudp_ipv6(ip6h, check_data, len-14-iphlen, check);
+
+ ND("TCP/UDP csum %x", be16toh(*check));
+}
+
+
+/* The VALE mismatch datapath implementation. */
+void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
+ struct netmap_vp_adapter *dst_na,
+ struct nm_bdg_fwd *ft_p, struct netmap_ring *ring,
+ u_int *j, u_int lim, u_int *howmany)
+{
+ struct netmap_slot *slot = NULL;
+ struct nm_vnet_hdr *vh = NULL;
+ /* Number of source slots to process. */
+ u_int frags = ft_p->ft_frags;
+ struct nm_bdg_fwd *ft_end = ft_p + frags;
+
+ /* Source and destination pointers. */
+ uint8_t *dst, *src;
+ size_t src_len, dst_len;
+
+ u_int j_start = *j;
+ u_int dst_slots = 0;
+
+ /* If the source port uses the offloadings, while destination doesn't,
+ * we grab the source virtio-net header and do the offloadings here.
+ */
+ if (na->virt_hdr_len && !dst_na->virt_hdr_len) {
+ vh = (struct nm_vnet_hdr *)ft_p->ft_buf;
+ }
+
+ /* Init source and dest pointers. */
+ src = ft_p->ft_buf;
+ src_len = ft_p->ft_len;
+ slot = &ring->slot[*j];
+ dst = BDG_NMB(&dst_na->up, slot);
+ dst_len = src_len;
+
+ /* We are processing the first input slot and there is a mismatch
+ * between source and destination virt_hdr_len (SHL and DHL).
+ * When the a client is using virtio-net headers, the header length
+ * can be:
+ * - 10: the header corresponds to the struct nm_vnet_hdr
+ * - 12: the first 10 bytes correspond to the struct
+ * virtio_net_hdr, and the last 2 bytes store the
+ * "mergeable buffers" info, which is an optional
+ * hint that can be zeroed for compability
+ *
+ * The destination header is therefore built according to the
+ * following table:
+ *
+ * SHL | DHL | destination header
+ * -----------------------------
+ * 0 | 10 | zero
+ * 0 | 12 | zero
+ * 10 | 0 | doesn't exist
+ * 10 | 12 | first 10 bytes are copied from source header, last 2 are zero
+ * 12 | 0 | doesn't exist
+ * 12 | 10 | copied from the first 10 bytes of source header
+ */
+ bzero(dst, dst_na->virt_hdr_len);
+ if (na->virt_hdr_len && dst_na->virt_hdr_len)
+ memcpy(dst, src, sizeof(struct nm_vnet_hdr));
+ /* Skip the virtio-net headers. */
+ src += na->virt_hdr_len;
+ src_len -= na->virt_hdr_len;
+ dst += dst_na->virt_hdr_len;
+ dst_len = dst_na->virt_hdr_len + src_len;
+
+ /* Here it could be dst_len == 0 (which implies src_len == 0),
+ * so we avoid passing a zero length fragment.
+ */
+ if (dst_len == 0) {
+ ft_p++;
+ src = ft_p->ft_buf;
+ src_len = ft_p->ft_len;
+ dst_len = src_len;
+ }
+
+ if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+ u_int gso_bytes = 0;
+ /* Length of the GSO packet header. */
+ u_int gso_hdr_len = 0;
+ /* Pointer to the GSO packet header. Assume it is in a single fragment. */
+ uint8_t *gso_hdr = NULL;
+ /* Index of the current segment. */
+ u_int gso_idx = 0;
+ /* Payload data bytes segmented so far (e.g. TCP data bytes). */
+ u_int segmented_bytes = 0;
+ /* Length of the IP header (20 if IPv4, 40 if IPv6). */
+ u_int iphlen = 0;
+ /* Is this a TCP or an UDP GSO packet? */
+ u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
+ == VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;
+
+ /* Segment the GSO packet contained into the input slots (frags). */
+ while (ft_p != ft_end) {
+ size_t copy;
+
+ /* Grab the GSO header if we don't have it. */
+ if (!gso_hdr) {
+ uint16_t ethertype;
+
+ gso_hdr = src;
+
+ /* Look at the 'Ethertype' field to see if this packet
+ * is IPv4 or IPv6.
+ */
+ ethertype = be16toh(*((uint16_t *)(gso_hdr + 12)));
+ if (ethertype == 0x0800)
+ iphlen = 20;
+ else /* if (ethertype == 0x86DD) */
+ iphlen = 40;
+ ND(3, "type=%04x", ethertype);
+
+ /* Compute gso_hdr_len. For TCP we need to read the
+ * content of the 'Data Offset' field.
+ */
+ if (tcp) {
+ struct nm_tcphdr *tcph =
+ (struct nm_tcphdr *)&gso_hdr[14+iphlen];
+
+ gso_hdr_len = 14 + iphlen + 4*(tcph->doff >> 4);
+ } else
+ gso_hdr_len = 14 + iphlen + 8; /* UDP */
+
+ ND(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
+ dst_na->mfs);
+
+ /* Advance source pointers. */
+ src += gso_hdr_len;
+ src_len -= gso_hdr_len;
+ if (src_len == 0) {
+ ft_p++;
+ if (ft_p == ft_end)
+ break;
+ src = ft_p->ft_buf;
+ src_len = ft_p->ft_len;
+ continue;
+ }
+ }
+
+ /* Fill in the header of the current segment. */
+ if (gso_bytes == 0) {
+ memcpy(dst, gso_hdr, gso_hdr_len);
+ gso_bytes = gso_hdr_len;
+ }
+
+ /* Fill in data and update source and dest pointers. */
+ copy = src_len;
+ if (gso_bytes + copy > dst_na->mfs)
+ copy = dst_na->mfs - gso_bytes;
+ memcpy(dst + gso_bytes, src, copy);
+ gso_bytes += copy;
+ src += copy;
+ src_len -= copy;
+
+ /* A segment is complete or we have processed all the
+ the GSO payload bytes. */
+ if (gso_bytes >= dst_na->mfs ||
+ (src_len == 0 && ft_p + 1 == ft_end)) {
+ /* After raw segmentation, we must fix some header
+ * fields and compute checksums, in a protocol dependent
+ * way. */
+ gso_fix_segment(dst, gso_bytes, gso_idx,
+ segmented_bytes,
+ src_len == 0 && ft_p + 1 == ft_end,
+ tcp, iphlen);
+
+ ND("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
+ slot->len = gso_bytes;
+ slot->flags = 0;
+ segmented_bytes += gso_bytes - gso_hdr_len;
+
+ dst_slots++;
+
+ /* Next destination slot. */
+ *j = nm_next(*j, lim);
+ slot = &ring->slot[*j];
+ dst = BDG_NMB(&dst_na->up, slot);
+
+ gso_bytes = 0;
+ gso_idx++;
+ }
+
+ /* Next input slot. */
+ if (src_len == 0) {
+ ft_p++;
+ if (ft_p == ft_end)
+ break;
+ src = ft_p->ft_buf;
+ src_len = ft_p->ft_len;
+ }
+ }
+ ND(3, "%d bytes segmented", segmented_bytes);
+
+ } else {
+ /* Address of a checksum field into a destination slot. */
+ uint16_t *check = NULL;
+ /* Accumulator for an unfolded checksum. */
+ rawsum_t csum = 0;
+
+ /* Process a non-GSO packet. */
+
+ /* Init 'check' if necessary. */
+ if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
+ if (unlikely(vh->csum_offset + vh->csum_start > src_len))
+ D("invalid checksum request");
+ else
+ check = (uint16_t *)(dst + vh->csum_start +
+ vh->csum_offset);
+ }
+
+ while (ft_p != ft_end) {
+ /* Init/update the packet checksum if needed. */
+ if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
+ if (!dst_slots)
+ csum = nm_csum_raw(src + vh->csum_start,
+ src_len - vh->csum_start, 0);
+ else
+ csum = nm_csum_raw(src, src_len, csum);
+ }
+
+ /* Round to a multiple of 64 */
+ src_len = (src_len + 63) & ~63;
+
+ if (ft_p->ft_flags & NS_INDIRECT) {
+ if (copyin(src, dst, src_len)) {
+ /* Invalid user pointer, pretend len is 0. */
+ dst_len = 0;
+ }
+ } else {
+ memcpy(dst, src, (int)src_len);
+ }
+ slot->len = dst_len;
+
+ dst_slots++;
+
+ /* Next destination slot. */
+ *j = nm_next(*j, lim);
+ slot = &ring->slot[*j];
+ dst = BDG_NMB(&dst_na->up, slot);
+
+ /* Next source slot. */
+ ft_p++;
+ src = ft_p->ft_buf;
+ dst_len = src_len = ft_p->ft_len;
+
+ }
+
+ /* Finalize (fold) the checksum if needed. */
+ if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
+ *check = nm_csum_fold(csum);
+ }
+ ND(3, "using %u dst_slots", dst_slots);
+
+ /* A second pass on the desitations slots to set the slot flags,
+ * using the right number of destination slots.
+ */
+ while (j_start != *j) {
+ slot = &ring->slot[j_start];
+ slot->flags = (dst_slots << 8)| NS_MOREFRAG;
+ j_start = nm_next(j_start, lim);
+ }
+ /* Clear NS_MOREFRAG flag on last entry. */
+ slot->flags = (dst_slots << 8);
+ }
+
+ /* Update howmany. */
+ if (unlikely(dst_slots > *howmany)) {
+ dst_slots = *howmany;
+ D("Slot allocation error: Should never happen");
+ }
+ *howmany -= dst_slots;
+}