netmap_offloadings.c revision 330897
1/*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2014 Vincenzo Maffione. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28/* $FreeBSD: stable/11/sys/dev/netmap/netmap_offloadings.c 330897 2018-03-14 03:19:51Z eadler $ */ 29 30#if defined(__FreeBSD__) 31#include <sys/cdefs.h> /* prerequisite */ 32 33#include <sys/types.h> 34#include <sys/errno.h> 35#include <sys/param.h> /* defines used in kernel.h */ 36#include <sys/malloc.h> /* types used in module initialization */ 37#include <sys/kernel.h> /* types used in module initialization */ 38#include <sys/sockio.h> 39#include <sys/socketvar.h> /* struct socket */ 40#include <sys/socket.h> /* sockaddrs */ 41#include <net/if.h> 42#include <net/if_var.h> 43#include <machine/bus.h> /* bus_dmamap_* */ 44#include <sys/endian.h> 45 46#elif defined(linux) 47 48#include "bsd_glue.h" 49 50#elif defined(__APPLE__) 51 52#warning OSX support is only partial 53#include "osx_glue.h" 54 55#else 56 57#error Unsupported platform 58 59#endif /* unsupported */ 60 61#include <net/netmap.h> 62#include <dev/netmap/netmap_kern.h> 63 64 65 66/* This routine is called by bdg_mismatch_datapath() when it finishes 67 * accumulating bytes for a segment, in order to fix some fields in the 68 * segment headers (which still contain the same content as the header 69 * of the original GSO packet). 'buf' points to the beginning (e.g. 70 * the ethernet header) of the segment, and 'len' is its length. 71 */ 72static void gso_fix_segment(uint8_t *buf, size_t len, u_int idx, 73 u_int segmented_bytes, u_int last_segment, 74 u_int tcp, u_int iphlen) 75{ 76 struct nm_iphdr *iph = (struct nm_iphdr *)(buf + 14); 77 struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(buf + 14); 78 uint16_t *check = NULL; 79 uint8_t *check_data = NULL; 80 81 if (iphlen == 20) { 82 /* Set the IPv4 "Total Length" field. */ 83 iph->tot_len = htobe16(len-14); 84 ND("ip total length %u", be16toh(ip->tot_len)); 85 86 /* Set the IPv4 "Identification" field. */ 87 iph->id = htobe16(be16toh(iph->id) + idx); 88 ND("ip identification %u", be16toh(iph->id)); 89 90 /* Compute and insert the IPv4 header checksum. */ 91 iph->check = 0; 92 iph->check = nm_csum_ipv4(iph); 93 ND("IP csum %x", be16toh(iph->check)); 94 } else {/* if (iphlen == 40) */ 95 /* Set the IPv6 "Payload Len" field. */ 96 ip6h->payload_len = htobe16(len-14-iphlen); 97 } 98 99 if (tcp) { 100 struct nm_tcphdr *tcph = (struct nm_tcphdr *)(buf + 14 + iphlen); 101 102 /* Set the TCP sequence number. */ 103 tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes); 104 ND("tcp seq %u", be32toh(tcph->seq)); 105 106 /* Zero the PSH and FIN TCP flags if this is not the last 107 segment. */ 108 if (!last_segment) 109 tcph->flags &= ~(0x8 | 0x1); 110 ND("last_segment %u", last_segment); 111 112 check = &tcph->check; 113 check_data = (uint8_t *)tcph; 114 } else { /* UDP */ 115 struct nm_udphdr *udph = (struct nm_udphdr *)(buf + 14 + iphlen); 116 117 /* Set the UDP 'Length' field. */ 118 udph->len = htobe16(len-14-iphlen); 119 120 check = &udph->check; 121 check_data = (uint8_t *)udph; 122 } 123 124 /* Compute and insert TCP/UDP checksum. */ 125 *check = 0; 126 if (iphlen == 20) 127 nm_csum_tcpudp_ipv4(iph, check_data, len-14-iphlen, check); 128 else 129 nm_csum_tcpudp_ipv6(ip6h, check_data, len-14-iphlen, check); 130 131 ND("TCP/UDP csum %x", be16toh(*check)); 132} 133 134 135/* The VALE mismatch datapath implementation. */ 136void bdg_mismatch_datapath(struct netmap_vp_adapter *na, 137 struct netmap_vp_adapter *dst_na, 138 struct nm_bdg_fwd *ft_p, struct netmap_ring *ring, 139 u_int *j, u_int lim, u_int *howmany) 140{ 141 struct netmap_slot *slot = NULL; 142 struct nm_vnet_hdr *vh = NULL; 143 /* Number of source slots to process. */ 144 u_int frags = ft_p->ft_frags; 145 struct nm_bdg_fwd *ft_end = ft_p + frags; 146 147 /* Source and destination pointers. */ 148 uint8_t *dst, *src; 149 size_t src_len, dst_len; 150 151 u_int j_start = *j; 152 u_int dst_slots = 0; 153 154 /* If the source port uses the offloadings, while destination doesn't, 155 * we grab the source virtio-net header and do the offloadings here. 156 */ 157 if (na->virt_hdr_len && !dst_na->virt_hdr_len) { 158 vh = (struct nm_vnet_hdr *)ft_p->ft_buf; 159 } 160 161 /* Init source and dest pointers. */ 162 src = ft_p->ft_buf; 163 src_len = ft_p->ft_len; 164 slot = &ring->slot[*j]; 165 dst = NMB(&dst_na->up, slot); 166 dst_len = src_len; 167 168 /* We are processing the first input slot and there is a mismatch 169 * between source and destination virt_hdr_len (SHL and DHL). 170 * When the a client is using virtio-net headers, the header length 171 * can be: 172 * - 10: the header corresponds to the struct nm_vnet_hdr 173 * - 12: the first 10 bytes correspond to the struct 174 * virtio_net_hdr, and the last 2 bytes store the 175 * "mergeable buffers" info, which is an optional 176 * hint that can be zeroed for compatibility 177 * 178 * The destination header is therefore built according to the 179 * following table: 180 * 181 * SHL | DHL | destination header 182 * ----------------------------- 183 * 0 | 10 | zero 184 * 0 | 12 | zero 185 * 10 | 0 | doesn't exist 186 * 10 | 12 | first 10 bytes are copied from source header, last 2 are zero 187 * 12 | 0 | doesn't exist 188 * 12 | 10 | copied from the first 10 bytes of source header 189 */ 190 bzero(dst, dst_na->virt_hdr_len); 191 if (na->virt_hdr_len && dst_na->virt_hdr_len) 192 memcpy(dst, src, sizeof(struct nm_vnet_hdr)); 193 /* Skip the virtio-net headers. */ 194 src += na->virt_hdr_len; 195 src_len -= na->virt_hdr_len; 196 dst += dst_na->virt_hdr_len; 197 dst_len = dst_na->virt_hdr_len + src_len; 198 199 /* Here it could be dst_len == 0 (which implies src_len == 0), 200 * so we avoid passing a zero length fragment. 201 */ 202 if (dst_len == 0) { 203 ft_p++; 204 src = ft_p->ft_buf; 205 src_len = ft_p->ft_len; 206 dst_len = src_len; 207 } 208 209 if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 210 u_int gso_bytes = 0; 211 /* Length of the GSO packet header. */ 212 u_int gso_hdr_len = 0; 213 /* Pointer to the GSO packet header. Assume it is in a single fragment. */ 214 uint8_t *gso_hdr = NULL; 215 /* Index of the current segment. */ 216 u_int gso_idx = 0; 217 /* Payload data bytes segmented so far (e.g. TCP data bytes). */ 218 u_int segmented_bytes = 0; 219 /* Length of the IP header (20 if IPv4, 40 if IPv6). */ 220 u_int iphlen = 0; 221 /* Is this a TCP or an UDP GSO packet? */ 222 u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) 223 == VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1; 224 225 /* Segment the GSO packet contained into the input slots (frags). */ 226 while (ft_p != ft_end) { 227 size_t copy; 228 229 /* Grab the GSO header if we don't have it. */ 230 if (!gso_hdr) { 231 uint16_t ethertype; 232 233 gso_hdr = src; 234 235 /* Look at the 'Ethertype' field to see if this packet 236 * is IPv4 or IPv6. 237 */ 238 ethertype = be16toh(*((uint16_t *)(gso_hdr + 12))); 239 if (ethertype == 0x0800) 240 iphlen = 20; 241 else /* if (ethertype == 0x86DD) */ 242 iphlen = 40; 243 ND(3, "type=%04x", ethertype); 244 245 /* Compute gso_hdr_len. For TCP we need to read the 246 * content of the 'Data Offset' field. 247 */ 248 if (tcp) { 249 struct nm_tcphdr *tcph = 250 (struct nm_tcphdr *)&gso_hdr[14+iphlen]; 251 252 gso_hdr_len = 14 + iphlen + 4*(tcph->doff >> 4); 253 } else 254 gso_hdr_len = 14 + iphlen + 8; /* UDP */ 255 256 ND(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len, 257 dst_na->mfs); 258 259 /* Advance source pointers. */ 260 src += gso_hdr_len; 261 src_len -= gso_hdr_len; 262 if (src_len == 0) { 263 ft_p++; 264 if (ft_p == ft_end) 265 break; 266 src = ft_p->ft_buf; 267 src_len = ft_p->ft_len; 268 continue; 269 } 270 } 271 272 /* Fill in the header of the current segment. */ 273 if (gso_bytes == 0) { 274 memcpy(dst, gso_hdr, gso_hdr_len); 275 gso_bytes = gso_hdr_len; 276 } 277 278 /* Fill in data and update source and dest pointers. */ 279 copy = src_len; 280 if (gso_bytes + copy > dst_na->mfs) 281 copy = dst_na->mfs - gso_bytes; 282 memcpy(dst + gso_bytes, src, copy); 283 gso_bytes += copy; 284 src += copy; 285 src_len -= copy; 286 287 /* A segment is complete or we have processed all the 288 the GSO payload bytes. */ 289 if (gso_bytes >= dst_na->mfs || 290 (src_len == 0 && ft_p + 1 == ft_end)) { 291 /* After raw segmentation, we must fix some header 292 * fields and compute checksums, in a protocol dependent 293 * way. */ 294 gso_fix_segment(dst, gso_bytes, gso_idx, 295 segmented_bytes, 296 src_len == 0 && ft_p + 1 == ft_end, 297 tcp, iphlen); 298 299 ND("frame %u completed with %d bytes", gso_idx, (int)gso_bytes); 300 slot->len = gso_bytes; 301 slot->flags = 0; 302 segmented_bytes += gso_bytes - gso_hdr_len; 303 304 dst_slots++; 305 306 /* Next destination slot. */ 307 *j = nm_next(*j, lim); 308 slot = &ring->slot[*j]; 309 dst = NMB(&dst_na->up, slot); 310 311 gso_bytes = 0; 312 gso_idx++; 313 } 314 315 /* Next input slot. */ 316 if (src_len == 0) { 317 ft_p++; 318 if (ft_p == ft_end) 319 break; 320 src = ft_p->ft_buf; 321 src_len = ft_p->ft_len; 322 } 323 } 324 ND(3, "%d bytes segmented", segmented_bytes); 325 326 } else { 327 /* Address of a checksum field into a destination slot. */ 328 uint16_t *check = NULL; 329 /* Accumulator for an unfolded checksum. */ 330 rawsum_t csum = 0; 331 332 /* Process a non-GSO packet. */ 333 334 /* Init 'check' if necessary. */ 335 if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) { 336 if (unlikely(vh->csum_offset + vh->csum_start > src_len)) 337 D("invalid checksum request"); 338 else 339 check = (uint16_t *)(dst + vh->csum_start + 340 vh->csum_offset); 341 } 342 343 while (ft_p != ft_end) { 344 /* Init/update the packet checksum if needed. */ 345 if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) { 346 if (!dst_slots) 347 csum = nm_csum_raw(src + vh->csum_start, 348 src_len - vh->csum_start, 0); 349 else 350 csum = nm_csum_raw(src, src_len, csum); 351 } 352 353 /* Round to a multiple of 64 */ 354 src_len = (src_len + 63) & ~63; 355 356 if (ft_p->ft_flags & NS_INDIRECT) { 357 if (copyin(src, dst, src_len)) { 358 /* Invalid user pointer, pretend len is 0. */ 359 dst_len = 0; 360 } 361 } else { 362 memcpy(dst, src, (int)src_len); 363 } 364 slot->len = dst_len; 365 366 dst_slots++; 367 368 /* Next destination slot. */ 369 *j = nm_next(*j, lim); 370 slot = &ring->slot[*j]; 371 dst = NMB(&dst_na->up, slot); 372 373 /* Next source slot. */ 374 ft_p++; 375 src = ft_p->ft_buf; 376 dst_len = src_len = ft_p->ft_len; 377 378 } 379 380 /* Finalize (fold) the checksum if needed. */ 381 if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) { 382 *check = nm_csum_fold(csum); 383 } 384 ND(3, "using %u dst_slots", dst_slots); 385 386 /* A second pass on the desitations slots to set the slot flags, 387 * using the right number of destination slots. 388 */ 389 while (j_start != *j) { 390 slot = &ring->slot[j_start]; 391 slot->flags = (dst_slots << 8)| NS_MOREFRAG; 392 j_start = nm_next(j_start, lim); 393 } 394 /* Clear NS_MOREFRAG flag on last entry. */ 395 slot->flags = (dst_slots << 8); 396 } 397 398 /* Update howmany. */ 399 if (unlikely(dst_slots > *howmany)) { 400 dst_slots = *howmany; 401 D("Slot allocation error: Should never happen"); 402 } 403 *howmany -= dst_slots; 404} 405