netmap_offloadings.c revision 330897
1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (C) 2014 Vincenzo Maffione. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *   1. Redistributions of source code must retain the above copyright
10 *      notice, this list of conditions and the following disclaimer.
11 *   2. Redistributions in binary form must reproduce the above copyright
12 *      notice, this list of conditions and the following disclaimer in the
13 *      documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28/* $FreeBSD: stable/11/sys/dev/netmap/netmap_offloadings.c 330897 2018-03-14 03:19:51Z eadler $ */
29
30#if defined(__FreeBSD__)
31#include <sys/cdefs.h> /* prerequisite */
32
33#include <sys/types.h>
34#include <sys/errno.h>
35#include <sys/param.h>	/* defines used in kernel.h */
36#include <sys/malloc.h>	/* types used in module initialization */
37#include <sys/kernel.h>	/* types used in module initialization */
38#include <sys/sockio.h>
39#include <sys/socketvar.h>	/* struct socket */
40#include <sys/socket.h> /* sockaddrs */
41#include <net/if.h>
42#include <net/if_var.h>
43#include <machine/bus.h>	/* bus_dmamap_* */
44#include <sys/endian.h>
45
46#elif defined(linux)
47
48#include "bsd_glue.h"
49
50#elif defined(__APPLE__)
51
52#warning OSX support is only partial
53#include "osx_glue.h"
54
55#else
56
57#error	Unsupported platform
58
59#endif /* unsupported */
60
61#include <net/netmap.h>
62#include <dev/netmap/netmap_kern.h>
63
64
65
66/* This routine is called by bdg_mismatch_datapath() when it finishes
67 * accumulating bytes for a segment, in order to fix some fields in the
68 * segment headers (which still contain the same content as the header
69 * of the original GSO packet). 'buf' points to the beginning (e.g.
70 * the ethernet header) of the segment, and 'len' is its length.
71 */
72static void gso_fix_segment(uint8_t *buf, size_t len, u_int idx,
73			    u_int segmented_bytes, u_int last_segment,
74			    u_int tcp, u_int iphlen)
75{
76	struct nm_iphdr *iph = (struct nm_iphdr *)(buf + 14);
77	struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(buf + 14);
78	uint16_t *check = NULL;
79	uint8_t *check_data = NULL;
80
81	if (iphlen == 20) {
82		/* Set the IPv4 "Total Length" field. */
83		iph->tot_len = htobe16(len-14);
84		ND("ip total length %u", be16toh(ip->tot_len));
85
86		/* Set the IPv4 "Identification" field. */
87		iph->id = htobe16(be16toh(iph->id) + idx);
88		ND("ip identification %u", be16toh(iph->id));
89
90		/* Compute and insert the IPv4 header checksum. */
91		iph->check = 0;
92		iph->check = nm_csum_ipv4(iph);
93		ND("IP csum %x", be16toh(iph->check));
94	} else {/* if (iphlen == 40) */
95		/* Set the IPv6 "Payload Len" field. */
96		ip6h->payload_len = htobe16(len-14-iphlen);
97	}
98
99	if (tcp) {
100		struct nm_tcphdr *tcph = (struct nm_tcphdr *)(buf + 14 + iphlen);
101
102		/* Set the TCP sequence number. */
103		tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
104		ND("tcp seq %u", be32toh(tcph->seq));
105
106		/* Zero the PSH and FIN TCP flags if this is not the last
107		   segment. */
108		if (!last_segment)
109			tcph->flags &= ~(0x8 | 0x1);
110		ND("last_segment %u", last_segment);
111
112		check = &tcph->check;
113		check_data = (uint8_t *)tcph;
114	} else { /* UDP */
115		struct nm_udphdr *udph = (struct nm_udphdr *)(buf + 14 + iphlen);
116
117		/* Set the UDP 'Length' field. */
118		udph->len = htobe16(len-14-iphlen);
119
120		check = &udph->check;
121		check_data = (uint8_t *)udph;
122	}
123
124	/* Compute and insert TCP/UDP checksum. */
125	*check = 0;
126	if (iphlen == 20)
127		nm_csum_tcpudp_ipv4(iph, check_data, len-14-iphlen, check);
128	else
129		nm_csum_tcpudp_ipv6(ip6h, check_data, len-14-iphlen, check);
130
131	ND("TCP/UDP csum %x", be16toh(*check));
132}
133
134
135/* The VALE mismatch datapath implementation. */
136void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
137			   struct netmap_vp_adapter *dst_na,
138			   struct nm_bdg_fwd *ft_p, struct netmap_ring *ring,
139			   u_int *j, u_int lim, u_int *howmany)
140{
141	struct netmap_slot *slot = NULL;
142	struct nm_vnet_hdr *vh = NULL;
143	/* Number of source slots to process. */
144	u_int frags = ft_p->ft_frags;
145	struct nm_bdg_fwd *ft_end = ft_p + frags;
146
147	/* Source and destination pointers. */
148	uint8_t *dst, *src;
149	size_t src_len, dst_len;
150
151	u_int j_start = *j;
152	u_int dst_slots = 0;
153
154	/* If the source port uses the offloadings, while destination doesn't,
155	 * we grab the source virtio-net header and do the offloadings here.
156	 */
157	if (na->virt_hdr_len && !dst_na->virt_hdr_len) {
158		vh = (struct nm_vnet_hdr *)ft_p->ft_buf;
159	}
160
161	/* Init source and dest pointers. */
162	src = ft_p->ft_buf;
163	src_len = ft_p->ft_len;
164	slot = &ring->slot[*j];
165	dst = NMB(&dst_na->up, slot);
166	dst_len = src_len;
167
168	/* We are processing the first input slot and there is a mismatch
169	 * between source and destination virt_hdr_len (SHL and DHL).
170	 * When the a client is using virtio-net headers, the header length
171	 * can be:
172	 *    - 10: the header corresponds to the struct nm_vnet_hdr
173	 *    - 12: the first 10 bytes correspond to the struct
174	 *          virtio_net_hdr, and the last 2 bytes store the
175	 *          "mergeable buffers" info, which is an optional
176	 *	    hint that can be zeroed for compatibility
177	 *
178	 * The destination header is therefore built according to the
179	 * following table:
180	 *
181	 * SHL | DHL | destination header
182	 * -----------------------------
183	 *   0 |  10 | zero
184	 *   0 |  12 | zero
185	 *  10 |   0 | doesn't exist
186	 *  10 |  12 | first 10 bytes are copied from source header, last 2 are zero
187	 *  12 |   0 | doesn't exist
188	 *  12 |  10 | copied from the first 10 bytes of source header
189	 */
190	bzero(dst, dst_na->virt_hdr_len);
191	if (na->virt_hdr_len && dst_na->virt_hdr_len)
192		memcpy(dst, src, sizeof(struct nm_vnet_hdr));
193	/* Skip the virtio-net headers. */
194	src += na->virt_hdr_len;
195	src_len -= na->virt_hdr_len;
196	dst += dst_na->virt_hdr_len;
197	dst_len = dst_na->virt_hdr_len + src_len;
198
199	/* Here it could be dst_len == 0 (which implies src_len == 0),
200	 * so we avoid passing a zero length fragment.
201	 */
202	if (dst_len == 0) {
203		ft_p++;
204		src = ft_p->ft_buf;
205		src_len = ft_p->ft_len;
206		dst_len = src_len;
207	}
208
209	if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
210		u_int gso_bytes = 0;
211		/* Length of the GSO packet header. */
212		u_int gso_hdr_len = 0;
213		/* Pointer to the GSO packet header. Assume it is in a single fragment. */
214		uint8_t *gso_hdr = NULL;
215		/* Index of the current segment. */
216		u_int gso_idx = 0;
217		/* Payload data bytes segmented so far (e.g. TCP data bytes). */
218		u_int segmented_bytes = 0;
219		/* Length of the IP header (20 if IPv4, 40 if IPv6). */
220		u_int iphlen = 0;
221		/* Is this a TCP or an UDP GSO packet? */
222		u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
223				== VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;
224
225		/* Segment the GSO packet contained into the input slots (frags). */
226		while (ft_p != ft_end) {
227			size_t copy;
228
229			/* Grab the GSO header if we don't have it. */
230			if (!gso_hdr) {
231				uint16_t ethertype;
232
233				gso_hdr = src;
234
235				/* Look at the 'Ethertype' field to see if this packet
236				 * is IPv4 or IPv6.
237				 */
238				ethertype = be16toh(*((uint16_t *)(gso_hdr  + 12)));
239				if (ethertype == 0x0800)
240					iphlen = 20;
241				else /* if (ethertype == 0x86DD) */
242					iphlen = 40;
243				ND(3, "type=%04x", ethertype);
244
245				/* Compute gso_hdr_len. For TCP we need to read the
246				 * content of the 'Data Offset' field.
247				 */
248				if (tcp) {
249					struct nm_tcphdr *tcph =
250						(struct nm_tcphdr *)&gso_hdr[14+iphlen];
251
252					gso_hdr_len = 14 + iphlen + 4*(tcph->doff >> 4);
253				} else
254					gso_hdr_len = 14 + iphlen + 8; /* UDP */
255
256				ND(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
257								dst_na->mfs);
258
259				/* Advance source pointers. */
260				src += gso_hdr_len;
261				src_len -= gso_hdr_len;
262				if (src_len == 0) {
263					ft_p++;
264					if (ft_p == ft_end)
265						break;
266					src = ft_p->ft_buf;
267					src_len = ft_p->ft_len;
268					continue;
269				}
270			}
271
272			/* Fill in the header of the current segment. */
273			if (gso_bytes == 0) {
274				memcpy(dst, gso_hdr, gso_hdr_len);
275				gso_bytes = gso_hdr_len;
276			}
277
278			/* Fill in data and update source and dest pointers. */
279			copy = src_len;
280			if (gso_bytes + copy > dst_na->mfs)
281				copy = dst_na->mfs - gso_bytes;
282			memcpy(dst + gso_bytes, src, copy);
283			gso_bytes += copy;
284			src += copy;
285			src_len -= copy;
286
287			/* A segment is complete or we have processed all the
288			   the GSO payload bytes. */
289			if (gso_bytes >= dst_na->mfs ||
290				(src_len == 0 && ft_p + 1 == ft_end)) {
291				/* After raw segmentation, we must fix some header
292				 * fields and compute checksums, in a protocol dependent
293				 * way. */
294				gso_fix_segment(dst, gso_bytes, gso_idx,
295						segmented_bytes,
296						src_len == 0 && ft_p + 1 == ft_end,
297						tcp, iphlen);
298
299				ND("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
300				slot->len = gso_bytes;
301				slot->flags = 0;
302				segmented_bytes += gso_bytes - gso_hdr_len;
303
304				dst_slots++;
305
306				/* Next destination slot. */
307				*j = nm_next(*j, lim);
308				slot = &ring->slot[*j];
309				dst = NMB(&dst_na->up, slot);
310
311				gso_bytes = 0;
312				gso_idx++;
313			}
314
315			/* Next input slot. */
316			if (src_len == 0) {
317				ft_p++;
318				if (ft_p == ft_end)
319					break;
320				src = ft_p->ft_buf;
321				src_len = ft_p->ft_len;
322			}
323		}
324		ND(3, "%d bytes segmented", segmented_bytes);
325
326	} else {
327		/* Address of a checksum field into a destination slot. */
328		uint16_t *check = NULL;
329		/* Accumulator for an unfolded checksum. */
330		rawsum_t csum = 0;
331
332		/* Process a non-GSO packet. */
333
334		/* Init 'check' if necessary. */
335		if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
336			if (unlikely(vh->csum_offset + vh->csum_start > src_len))
337				D("invalid checksum request");
338			else
339				check = (uint16_t *)(dst + vh->csum_start +
340						vh->csum_offset);
341		}
342
343		while (ft_p != ft_end) {
344			/* Init/update the packet checksum if needed. */
345			if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
346				if (!dst_slots)
347					csum = nm_csum_raw(src + vh->csum_start,
348								src_len - vh->csum_start, 0);
349				else
350					csum = nm_csum_raw(src, src_len, csum);
351			}
352
353			/* Round to a multiple of 64 */
354			src_len = (src_len + 63) & ~63;
355
356			if (ft_p->ft_flags & NS_INDIRECT) {
357				if (copyin(src, dst, src_len)) {
358					/* Invalid user pointer, pretend len is 0. */
359					dst_len = 0;
360				}
361			} else {
362				memcpy(dst, src, (int)src_len);
363			}
364			slot->len = dst_len;
365
366			dst_slots++;
367
368			/* Next destination slot. */
369			*j = nm_next(*j, lim);
370			slot = &ring->slot[*j];
371			dst = NMB(&dst_na->up, slot);
372
373			/* Next source slot. */
374			ft_p++;
375			src = ft_p->ft_buf;
376			dst_len = src_len = ft_p->ft_len;
377
378		}
379
380		/* Finalize (fold) the checksum if needed. */
381		if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
382			*check = nm_csum_fold(csum);
383		}
384		ND(3, "using %u dst_slots", dst_slots);
385
386		/* A second pass on the desitations slots to set the slot flags,
387		 * using the right number of destination slots.
388		 */
389		while (j_start != *j) {
390			slot = &ring->slot[j_start];
391			slot->flags = (dst_slots << 8)| NS_MOREFRAG;
392			j_start = nm_next(j_start, lim);
393		}
394		/* Clear NS_MOREFRAG flag on last entry. */
395		slot->flags = (dst_slots << 8);
396	}
397
398	/* Update howmany. */
399	if (unlikely(dst_slots > *howmany)) {
400		dst_slots = *howmany;
401		D("Slot allocation error: Should never happen");
402	}
403	*howmany -= dst_slots;
404}
405