pkt-gen.c revision 274259
1/* 2 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 3 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27/* 28 * $FreeBSD: stable/10/tools/tools/netmap/pkt-gen.c 274259 2014-11-08 00:42:11Z gnn $ 29 * $Id: pkt-gen.c 12346 2013-06-12 17:36:25Z luigi $ 30 * 31 * Example program to show how to build a multithreaded packet 32 * source/sink using the netmap device. 33 * 34 * In this example we create a programmable number of threads 35 * to take care of all the queues of the interface used to 36 * send or receive traffic. 37 * 38 */ 39 40// #define TRASH_VHOST_HDR 41 42#define _GNU_SOURCE /* for CPU_SET() */ 43#include <stdio.h> 44#define NETMAP_WITH_LIBS 45#include <net/netmap_user.h> 46 47 48#include <ctype.h> // isprint() 49#include <unistd.h> // sysconf() 50#include <sys/poll.h> 51#include <arpa/inet.h> /* ntohs */ 52#include <sys/sysctl.h> /* sysctl */ 53#include <ifaddrs.h> /* getifaddrs */ 54#include <net/ethernet.h> 55#include <netinet/in.h> 56#include <netinet/ip.h> 57#include <netinet/udp.h> 58 59#include <pthread.h> 60 61#ifndef NO_PCAP 62#include <pcap/pcap.h> 63#endif 64 65#ifdef linux 66 67#define cpuset_t cpu_set_t 68 69#define ifr_flagshigh ifr_flags /* only the low 16 bits here */ 70#define IFF_PPROMISC IFF_PROMISC /* IFF_PPROMISC does not exist */ 71#include <linux/ethtool.h> 72#include <linux/sockios.h> 73 74#define CLOCK_REALTIME_PRECISE CLOCK_REALTIME 75#include <netinet/ether.h> /* ether_aton */ 76#include <linux/if_packet.h> /* sockaddr_ll */ 77#endif /* linux */ 78 79#ifdef __FreeBSD__ 80#include <sys/endian.h> /* le64toh */ 81#include <machine/param.h> 82 83#include <pthread_np.h> /* pthread w/ affinity */ 84#include <sys/cpuset.h> /* cpu_set */ 85#include <net/if_dl.h> /* LLADDR */ 86#endif /* __FreeBSD__ */ 87 88#ifdef __APPLE__ 89 90#define cpuset_t uint64_t // XXX 91static inline void CPU_ZERO(cpuset_t *p) 92{ 93 *p = 0; 94} 95 96static inline void CPU_SET(uint32_t i, cpuset_t *p) 97{ 98 *p |= 1<< (i & 0x3f); 99} 100 101#define pthread_setaffinity_np(a, b, c) ((void)a, 0) 102 103#define ifr_flagshigh ifr_flags // XXX 104#define IFF_PPROMISC IFF_PROMISC 105#include <net/if_dl.h> /* LLADDR */ 106#define clock_gettime(a,b) \ 107 do {struct timespec t0 = {0,0}; *(b) = t0; } while (0) 108#endif /* __APPLE__ */ 109 110const char *default_payload="netmap pkt-gen DIRECT payload\n" 111 "http://info.iet.unipi.it/~luigi/netmap/ "; 112 113const char *indirect_payload="netmap pkt-gen indirect payload\n" 114 "http://info.iet.unipi.it/~luigi/netmap/ "; 115 116int verbose = 0; 117 118#define SKIP_PAYLOAD 1 /* do not check payload. XXX unused */ 119 120 121#define VIRT_HDR_1 10 /* length of a base vnet-hdr */ 122#define VIRT_HDR_2 12 /* length of the extenede vnet-hdr */ 123#define VIRT_HDR_MAX VIRT_HDR_2 124struct virt_header { 125 uint8_t fields[VIRT_HDR_MAX]; 126}; 127 128#define MAX_BODYSIZE 16384 129 130struct pkt { 131 struct virt_header vh; 132 struct ether_header eh; 133 struct ip ip; 134 struct udphdr udp; 135 uint8_t body[MAX_BODYSIZE]; // XXX hardwired 136} __attribute__((__packed__)); 137 138struct ip_range { 139 char *name; 140 uint32_t start, end; /* same as struct in_addr */ 141 uint16_t port0, port1; 142}; 143 144struct mac_range { 145 char *name; 146 struct ether_addr start, end; 147}; 148 149/* ifname can be netmap:foo-xxxx */ 150#define MAX_IFNAMELEN 64 /* our buffer for ifname */ 151//#define MAX_PKTSIZE 1536 152#define MAX_PKTSIZE MAX_BODYSIZE /* XXX: + IP_HDR + ETH_HDR */ 153 154/* compact timestamp to fit into 60 byte packet. (enough to obtain RTT) */ 155struct tstamp { 156 uint32_t sec; 157 uint32_t nsec; 158}; 159 160/* 161 * global arguments for all threads 162 */ 163 164struct glob_arg { 165 struct ip_range src_ip; 166 struct ip_range dst_ip; 167 struct mac_range dst_mac; 168 struct mac_range src_mac; 169 int pkt_size; 170 int burst; 171 int forever; 172 int npackets; /* total packets to send */ 173 int frags; /* fragments per packet */ 174 int nthreads; 175 int cpus; 176 int options; /* testing */ 177#define OPT_PREFETCH 1 178#define OPT_ACCESS 2 179#define OPT_COPY 4 180#define OPT_MEMCPY 8 181#define OPT_TS 16 /* add a timestamp */ 182#define OPT_INDIRECT 32 /* use indirect buffers, tx only */ 183#define OPT_DUMP 64 /* dump rx/tx traffic */ 184#define OPT_MONITOR_TX 128 185#define OPT_MONITOR_RX 256 186 int dev_type; 187#ifndef NO_PCAP 188 pcap_t *p; 189#endif 190 191 int tx_rate; 192 struct timespec tx_period; 193 194 int affinity; 195 int main_fd; 196 struct nm_desc *nmd; 197 int report_interval; /* milliseconds between prints */ 198 void *(*td_body)(void *); 199 void *mmap_addr; 200 char ifname[MAX_IFNAMELEN]; 201 char *nmr_config; 202 int dummy_send; 203 int virt_header; /* send also the virt_header */ 204 int extra_bufs; /* goes in nr_arg3 */ 205 char *packet_file; /* -P option */ 206}; 207enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP }; 208 209 210/* 211 * Arguments for a new thread. The same structure is used by 212 * the source and the sink 213 */ 214struct targ { 215 struct glob_arg *g; 216 int used; 217 int completed; 218 int cancel; 219 int fd; 220 struct nm_desc *nmd; 221 volatile uint64_t count; 222 struct timespec tic, toc; 223 int me; 224 pthread_t thread; 225 int affinity; 226 227 struct pkt pkt; 228 void *frame; 229}; 230 231 232/* 233 * extract the extremes from a range of ipv4 addresses. 234 * addr_lo[-addr_hi][:port_lo[-port_hi]] 235 */ 236static void 237extract_ip_range(struct ip_range *r) 238{ 239 char *ap, *pp; 240 struct in_addr a; 241 242 if (verbose) 243 D("extract IP range from %s", r->name); 244 r->port0 = r->port1 = 0; 245 r->start = r->end = 0; 246 247 /* the first - splits start/end of range */ 248 ap = index(r->name, '-'); /* do we have ports ? */ 249 if (ap) { 250 *ap++ = '\0'; 251 } 252 /* grab the initial values (mandatory) */ 253 pp = index(r->name, ':'); 254 if (pp) { 255 *pp++ = '\0'; 256 r->port0 = r->port1 = strtol(pp, NULL, 0); 257 }; 258 inet_aton(r->name, &a); 259 r->start = r->end = ntohl(a.s_addr); 260 if (ap) { 261 pp = index(ap, ':'); 262 if (pp) { 263 *pp++ = '\0'; 264 if (*pp) 265 r->port1 = strtol(pp, NULL, 0); 266 } 267 if (*ap) { 268 inet_aton(ap, &a); 269 r->end = ntohl(a.s_addr); 270 } 271 } 272 if (r->port0 > r->port1) { 273 uint16_t tmp = r->port0; 274 r->port0 = r->port1; 275 r->port1 = tmp; 276 } 277 if (r->start > r->end) { 278 uint32_t tmp = r->start; 279 r->start = r->end; 280 r->end = tmp; 281 } 282 { 283 struct in_addr a; 284 char buf1[16]; // one ip address 285 286 a.s_addr = htonl(r->end); 287 strncpy(buf1, inet_ntoa(a), sizeof(buf1)); 288 a.s_addr = htonl(r->start); 289 if (1) 290 D("range is %s:%d to %s:%d", 291 inet_ntoa(a), r->port0, buf1, r->port1); 292 } 293} 294 295static void 296extract_mac_range(struct mac_range *r) 297{ 298 if (verbose) 299 D("extract MAC range from %s", r->name); 300 bcopy(ether_aton(r->name), &r->start, 6); 301 bcopy(ether_aton(r->name), &r->end, 6); 302#if 0 303 bcopy(targ->src_mac, eh->ether_shost, 6); 304 p = index(targ->g->src_mac, '-'); 305 if (p) 306 targ->src_mac_range = atoi(p+1); 307 308 bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6); 309 bcopy(targ->dst_mac, eh->ether_dhost, 6); 310 p = index(targ->g->dst_mac, '-'); 311 if (p) 312 targ->dst_mac_range = atoi(p+1); 313#endif 314 if (verbose) 315 D("%s starts at %s", r->name, ether_ntoa(&r->start)); 316} 317 318static struct targ *targs; 319static int global_nthreads; 320 321/* control-C handler */ 322static void 323sigint_h(int sig) 324{ 325 int i; 326 327 (void)sig; /* UNUSED */ 328 D("received control-C on thread %p", pthread_self()); 329 for (i = 0; i < global_nthreads; i++) { 330 targs[i].cancel = 1; 331 } 332 signal(SIGINT, SIG_DFL); 333} 334 335/* sysctl wrapper to return the number of active CPUs */ 336static int 337system_ncpus(void) 338{ 339 int ncpus; 340#if defined (__FreeBSD__) 341 int mib[2] = { CTL_HW, HW_NCPU }; 342 size_t len = sizeof(mib); 343 sysctl(mib, 2, &ncpus, &len, NULL, 0); 344#elif defined(linux) 345 ncpus = sysconf(_SC_NPROCESSORS_ONLN); 346#else /* others */ 347 ncpus = 1; 348#endif /* others */ 349 return (ncpus); 350} 351 352#ifdef __linux__ 353#define sockaddr_dl sockaddr_ll 354#define sdl_family sll_family 355#define AF_LINK AF_PACKET 356#define LLADDR(s) s->sll_addr; 357#include <linux/if_tun.h> 358#define TAP_CLONEDEV "/dev/net/tun" 359#endif /* __linux__ */ 360 361#ifdef __FreeBSD__ 362#include <net/if_tun.h> 363#define TAP_CLONEDEV "/dev/tap" 364#endif /* __FreeBSD */ 365 366#ifdef __APPLE__ 367// #warning TAP not supported on apple ? 368#include <net/if_utun.h> 369#define TAP_CLONEDEV "/dev/tap" 370#endif /* __APPLE__ */ 371 372 373/* 374 * parse the vale configuration in conf and put it in nmr. 375 * Return the flag set if necessary. 376 * The configuration may consist of 0 to 4 numbers separated 377 * by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings. 378 * Missing numbers or zeroes stand for default values. 379 * As an additional convenience, if exactly one number 380 * is specified, then this is assigned to both #tx-slots and #rx-slots. 381 * If there is no 4th number, then the 3rd is assigned to both #tx-rings 382 * and #rx-rings. 383 */ 384int 385parse_nmr_config(const char* conf, struct nmreq *nmr) 386{ 387 char *w, *tok; 388 int i, v; 389 390 nmr->nr_tx_rings = nmr->nr_rx_rings = 0; 391 nmr->nr_tx_slots = nmr->nr_rx_slots = 0; 392 if (conf == NULL || ! *conf) 393 return 0; 394 w = strdup(conf); 395 for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) { 396 v = atoi(tok); 397 switch (i) { 398 case 0: 399 nmr->nr_tx_slots = nmr->nr_rx_slots = v; 400 break; 401 case 1: 402 nmr->nr_rx_slots = v; 403 break; 404 case 2: 405 nmr->nr_tx_rings = nmr->nr_rx_rings = v; 406 break; 407 case 3: 408 nmr->nr_rx_rings = v; 409 break; 410 default: 411 D("ignored config: %s", tok); 412 break; 413 } 414 } 415 D("txr %d txd %d rxr %d rxd %d", 416 nmr->nr_tx_rings, nmr->nr_tx_slots, 417 nmr->nr_rx_rings, nmr->nr_rx_slots); 418 free(w); 419 return (nmr->nr_tx_rings || nmr->nr_tx_slots || 420 nmr->nr_rx_rings || nmr->nr_rx_slots) ? 421 NM_OPEN_RING_CFG : 0; 422} 423 424 425/* 426 * locate the src mac address for our interface, put it 427 * into the user-supplied buffer. return 0 if ok, -1 on error. 428 */ 429static int 430source_hwaddr(const char *ifname, char *buf) 431{ 432 struct ifaddrs *ifaphead, *ifap; 433 int l = sizeof(ifap->ifa_name); 434 435 if (getifaddrs(&ifaphead) != 0) { 436 D("getifaddrs %s failed", ifname); 437 return (-1); 438 } 439 440 for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) { 441 struct sockaddr_dl *sdl = 442 (struct sockaddr_dl *)ifap->ifa_addr; 443 uint8_t *mac; 444 445 if (!sdl || sdl->sdl_family != AF_LINK) 446 continue; 447 if (strncmp(ifap->ifa_name, ifname, l) != 0) 448 continue; 449 mac = (uint8_t *)LLADDR(sdl); 450 sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x", 451 mac[0], mac[1], mac[2], 452 mac[3], mac[4], mac[5]); 453 if (verbose) 454 D("source hwaddr %s", buf); 455 break; 456 } 457 freeifaddrs(ifaphead); 458 return ifap ? 0 : 1; 459} 460 461 462/* set the thread affinity. */ 463static int 464setaffinity(pthread_t me, int i) 465{ 466 cpuset_t cpumask; 467 468 if (i == -1) 469 return 0; 470 471 /* Set thread affinity affinity.*/ 472 CPU_ZERO(&cpumask); 473 CPU_SET(i, &cpumask); 474 475 if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) { 476 D("Unable to set affinity: %s", strerror(errno)); 477 return 1; 478 } 479 return 0; 480} 481 482/* Compute the checksum of the given ip header. */ 483static uint16_t 484checksum(const void *data, uint16_t len, uint32_t sum) 485{ 486 const uint8_t *addr = data; 487 uint32_t i; 488 489 /* Checksum all the pairs of bytes first... */ 490 for (i = 0; i < (len & ~1U); i += 2) { 491 sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i))); 492 if (sum > 0xFFFF) 493 sum -= 0xFFFF; 494 } 495 /* 496 * If there's a single byte left over, checksum it, too. 497 * Network byte order is big-endian, so the remaining byte is 498 * the high byte. 499 */ 500 if (i < len) { 501 sum += addr[i] << 8; 502 if (sum > 0xFFFF) 503 sum -= 0xFFFF; 504 } 505 return sum; 506} 507 508static u_int16_t 509wrapsum(u_int32_t sum) 510{ 511 sum = ~sum & 0xFFFF; 512 return (htons(sum)); 513} 514 515/* Check the payload of the packet for errors (use it for debug). 516 * Look for consecutive ascii representations of the size of the packet. 517 */ 518static void 519dump_payload(char *p, int len, struct netmap_ring *ring, int cur) 520{ 521 char buf[128]; 522 int i, j, i0; 523 524 /* get the length in ASCII of the length of the packet. */ 525 526 printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n", 527 ring, cur, ring->slot[cur].buf_idx, 528 ring->slot[cur].flags, len); 529 /* hexdump routine */ 530 for (i = 0; i < len; ) { 531 memset(buf, sizeof(buf), ' '); 532 sprintf(buf, "%5d: ", i); 533 i0 = i; 534 for (j=0; j < 16 && i < len; i++, j++) 535 sprintf(buf+7+j*3, "%02x ", (uint8_t)(p[i])); 536 i = i0; 537 for (j=0; j < 16 && i < len; i++, j++) 538 sprintf(buf+7+j + 48, "%c", 539 isprint(p[i]) ? p[i] : '.'); 540 printf("%s\n", buf); 541 } 542} 543 544/* 545 * Fill a packet with some payload. 546 * We create a UDP packet so the payload starts at 547 * 14+20+8 = 42 bytes. 548 */ 549#ifdef __linux__ 550#define uh_sport source 551#define uh_dport dest 552#define uh_ulen len 553#define uh_sum check 554#endif /* linux */ 555 556/* 557 * increment the addressed in the packet, 558 * starting from the least significant field. 559 * DST_IP DST_PORT SRC_IP SRC_PORT 560 */ 561static void 562update_addresses(struct pkt *pkt, struct glob_arg *g) 563{ 564 uint32_t a; 565 uint16_t p; 566 struct ip *ip = &pkt->ip; 567 struct udphdr *udp = &pkt->udp; 568 569 do { 570 p = ntohs(udp->uh_sport); 571 if (p < g->src_ip.port1) { /* just inc, no wrap */ 572 udp->uh_sport = htons(p + 1); 573 break; 574 } 575 udp->uh_sport = htons(g->src_ip.port0); 576 577 a = ntohl(ip->ip_src.s_addr); 578 if (a < g->src_ip.end) { /* just inc, no wrap */ 579 ip->ip_src.s_addr = htonl(a + 1); 580 break; 581 } 582 ip->ip_src.s_addr = htonl(g->src_ip.start); 583 584 udp->uh_sport = htons(g->src_ip.port0); 585 p = ntohs(udp->uh_dport); 586 if (p < g->dst_ip.port1) { /* just inc, no wrap */ 587 udp->uh_dport = htons(p + 1); 588 break; 589 } 590 udp->uh_dport = htons(g->dst_ip.port0); 591 592 a = ntohl(ip->ip_dst.s_addr); 593 if (a < g->dst_ip.end) { /* just inc, no wrap */ 594 ip->ip_dst.s_addr = htonl(a + 1); 595 break; 596 } 597 ip->ip_dst.s_addr = htonl(g->dst_ip.start); 598 } while (0); 599 // update checksum 600} 601 602/* 603 * initialize one packet and prepare for the next one. 604 * The copy could be done better instead of repeating it each time. 605 */ 606static void 607initialize_packet(struct targ *targ) 608{ 609 struct pkt *pkt = &targ->pkt; 610 struct ether_header *eh; 611 struct ip *ip; 612 struct udphdr *udp; 613 uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(struct ip); 614 const char *payload = targ->g->options & OPT_INDIRECT ? 615 indirect_payload : default_payload; 616 int i, l0 = strlen(payload); 617 618 char errbuf[PCAP_ERRBUF_SIZE]; 619 pcap_t *file; 620 struct pcap_pkthdr *header; 621 const unsigned char *packet; 622 623 /* Read a packet from a PCAP file if asked. */ 624 if (targ->g->packet_file != NULL) { 625 if ((file = pcap_open_offline(targ->g->packet_file, 626 errbuf)) == NULL) 627 D("failed to open pcap file %s", 628 targ->g->packet_file); 629 if (pcap_next_ex(file, &header, &packet) < 0) 630 D("failed to read packet from %s", 631 targ->g->packet_file); 632 if ((targ->frame = malloc(header->caplen)) == NULL) 633 D("out of memory"); 634 bcopy(packet, (unsigned char *)targ->frame, header->caplen); 635 targ->g->pkt_size = header->caplen; 636 pcap_close(file); 637 return; 638 } 639 640 /* create a nice NUL-terminated string */ 641 for (i = 0; i < paylen; i += l0) { 642 if (l0 > paylen - i) 643 l0 = paylen - i; // last round 644 bcopy(payload, pkt->body + i, l0); 645 } 646 pkt->body[i-1] = '\0'; 647 ip = &pkt->ip; 648 649 /* prepare the headers */ 650 ip->ip_v = IPVERSION; 651 ip->ip_hl = 5; 652 ip->ip_id = 0; 653 ip->ip_tos = IPTOS_LOWDELAY; 654 ip->ip_len = ntohs(targ->g->pkt_size - sizeof(*eh)); 655 ip->ip_id = 0; 656 ip->ip_off = htons(IP_DF); /* Don't fragment */ 657 ip->ip_ttl = IPDEFTTL; 658 ip->ip_p = IPPROTO_UDP; 659 ip->ip_dst.s_addr = htonl(targ->g->dst_ip.start); 660 ip->ip_src.s_addr = htonl(targ->g->src_ip.start); 661 ip->ip_sum = wrapsum(checksum(ip, sizeof(*ip), 0)); 662 663 664 udp = &pkt->udp; 665 udp->uh_sport = htons(targ->g->src_ip.port0); 666 udp->uh_dport = htons(targ->g->dst_ip.port0); 667 udp->uh_ulen = htons(paylen); 668 /* Magic: taken from sbin/dhclient/packet.c */ 669 udp->uh_sum = wrapsum(checksum(udp, sizeof(*udp), 670 checksum(pkt->body, 671 paylen - sizeof(*udp), 672 checksum(&ip->ip_src, 2 * sizeof(ip->ip_src), 673 IPPROTO_UDP + (u_int32_t)ntohs(udp->uh_ulen) 674 ) 675 ) 676 )); 677 678 eh = &pkt->eh; 679 bcopy(&targ->g->src_mac.start, eh->ether_shost, 6); 680 bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6); 681 eh->ether_type = htons(ETHERTYPE_IP); 682 683 bzero(&pkt->vh, sizeof(pkt->vh)); 684#ifdef TRASH_VHOST_HDR 685 /* set bogus content */ 686 pkt->vh.fields[0] = 0xff; 687 pkt->vh.fields[1] = 0xff; 688 pkt->vh.fields[2] = 0xff; 689 pkt->vh.fields[3] = 0xff; 690 pkt->vh.fields[4] = 0xff; 691 pkt->vh.fields[5] = 0xff; 692#endif /* TRASH_VHOST_HDR */ 693 // dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0); 694} 695 696static void 697set_vnet_hdr_len(struct targ *t) 698{ 699 int err, l = t->g->virt_header; 700 struct nmreq req; 701 702 if (l == 0) 703 return; 704 705 memset(&req, 0, sizeof(req)); 706 bcopy(t->nmd->req.nr_name, req.nr_name, sizeof(req.nr_name)); 707 req.nr_version = NETMAP_API; 708 req.nr_cmd = NETMAP_BDG_VNET_HDR; 709 req.nr_arg1 = l; 710 err = ioctl(t->fd, NIOCREGIF, &req); 711 if (err) { 712 D("Unable to set vnet header length %d", l); 713 } 714} 715 716 717/* 718 * create and enqueue a batch of packets on a ring. 719 * On the last one set NS_REPORT to tell the driver to generate 720 * an interrupt when done. 721 */ 722static int 723send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame, 724 int size, struct glob_arg *g, u_int count, int options, 725 u_int nfrags) 726{ 727 u_int n, sent, cur = ring->cur; 728 u_int fcnt; 729 730 n = nm_ring_space(ring); 731 if (n < count) 732 count = n; 733 if (count < nfrags) { 734 D("truncating packet, no room for frags %d %d", 735 count, nfrags); 736 } 737#if 0 738 if (options & (OPT_COPY | OPT_PREFETCH) ) { 739 for (sent = 0; sent < count; sent++) { 740 struct netmap_slot *slot = &ring->slot[cur]; 741 char *p = NETMAP_BUF(ring, slot->buf_idx); 742 743 __builtin_prefetch(p); 744 cur = nm_ring_next(ring, cur); 745 } 746 cur = ring->cur; 747 } 748#endif 749 for (fcnt = nfrags, sent = 0; sent < count; sent++) { 750 struct netmap_slot *slot = &ring->slot[cur]; 751 char *p = NETMAP_BUF(ring, slot->buf_idx); 752 753 slot->flags = 0; 754 if (options & OPT_INDIRECT) { 755 slot->flags |= NS_INDIRECT; 756 slot->ptr = (uint64_t)frame; 757 } else if (options & OPT_COPY) { 758 nm_pkt_copy(frame, p, size); 759 if (fcnt == nfrags) 760 update_addresses(pkt, g); 761 } else if (options & OPT_MEMCPY) { 762 memcpy(p, frame, size); 763 if (fcnt == nfrags) 764 update_addresses(pkt, g); 765 } else if (options & OPT_PREFETCH) { 766 __builtin_prefetch(p); 767 } 768 if (options & OPT_DUMP) 769 dump_payload(p, size, ring, cur); 770 slot->len = size; 771 if (--fcnt > 0) 772 slot->flags |= NS_MOREFRAG; 773 else 774 fcnt = nfrags; 775 if (sent == count - 1) { 776 slot->flags &= ~NS_MOREFRAG; 777 slot->flags |= NS_REPORT; 778 } 779 cur = nm_ring_next(ring, cur); 780 } 781 ring->head = ring->cur = cur; 782 783 return (sent); 784} 785 786/* 787 * Send a packet, and wait for a response. 788 * The payload (after UDP header, ofs 42) has a 4-byte sequence 789 * followed by a struct timeval (or bintime?) 790 */ 791#define PAY_OFS 42 /* where in the pkt... */ 792 793static void * 794pinger_body(void *data) 795{ 796 struct targ *targ = (struct targ *) data; 797 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 798 struct netmap_if *nifp = targ->nmd->nifp; 799 int i, rx = 0, n = targ->g->npackets; 800 void *frame; 801 int size; 802 uint32_t sent = 0; 803 struct timespec ts, now, last_print; 804 uint32_t count = 0, min = 1000000000, av = 0; 805 806 frame = &targ->pkt; 807 frame += sizeof(targ->pkt.vh) - targ->g->virt_header; 808 size = targ->g->pkt_size + targ->g->virt_header; 809 810 if (targ->g->nthreads > 1) { 811 D("can only ping with 1 thread"); 812 return NULL; 813 } 814 815 clock_gettime(CLOCK_REALTIME_PRECISE, &last_print); 816 now = last_print; 817 while (n == 0 || (int)sent < n) { 818 struct netmap_ring *ring = NETMAP_TXRING(nifp, 0); 819 struct netmap_slot *slot; 820 char *p; 821 for (i = 0; i < 1; i++) { /* XXX why the loop for 1 pkt ? */ 822 slot = &ring->slot[ring->cur]; 823 slot->len = size; 824 p = NETMAP_BUF(ring, slot->buf_idx); 825 826 if (nm_ring_empty(ring)) { 827 D("-- ouch, cannot send"); 828 } else { 829 struct tstamp *tp; 830 nm_pkt_copy(frame, p, size); 831 clock_gettime(CLOCK_REALTIME_PRECISE, &ts); 832 bcopy(&sent, p+42, sizeof(sent)); 833 tp = (struct tstamp *)(p+46); 834 tp->sec = (uint32_t)ts.tv_sec; 835 tp->nsec = (uint32_t)ts.tv_nsec; 836 sent++; 837 ring->head = ring->cur = nm_ring_next(ring, ring->cur); 838 } 839 } 840 /* should use a parameter to decide how often to send */ 841 if (poll(&pfd, 1, 3000) <= 0) { 842 D("poll error/timeout on queue %d: %s", targ->me, 843 strerror(errno)); 844 continue; 845 } 846 /* see what we got back */ 847 for (i = targ->nmd->first_tx_ring; 848 i <= targ->nmd->last_tx_ring; i++) { 849 ring = NETMAP_RXRING(nifp, i); 850 while (!nm_ring_empty(ring)) { 851 uint32_t seq; 852 struct tstamp *tp; 853 slot = &ring->slot[ring->cur]; 854 p = NETMAP_BUF(ring, slot->buf_idx); 855 856 clock_gettime(CLOCK_REALTIME_PRECISE, &now); 857 bcopy(p+42, &seq, sizeof(seq)); 858 tp = (struct tstamp *)(p+46); 859 ts.tv_sec = (time_t)tp->sec; 860 ts.tv_nsec = (long)tp->nsec; 861 ts.tv_sec = now.tv_sec - ts.tv_sec; 862 ts.tv_nsec = now.tv_nsec - ts.tv_nsec; 863 if (ts.tv_nsec < 0) { 864 ts.tv_nsec += 1000000000; 865 ts.tv_sec--; 866 } 867 if (1) D("seq %d/%d delta %d.%09d", seq, sent, 868 (int)ts.tv_sec, (int)ts.tv_nsec); 869 if (ts.tv_nsec < (int)min) 870 min = ts.tv_nsec; 871 count ++; 872 av += ts.tv_nsec; 873 ring->head = ring->cur = nm_ring_next(ring, ring->cur); 874 rx++; 875 } 876 } 877 //D("tx %d rx %d", sent, rx); 878 //usleep(100000); 879 ts.tv_sec = now.tv_sec - last_print.tv_sec; 880 ts.tv_nsec = now.tv_nsec - last_print.tv_nsec; 881 if (ts.tv_nsec < 0) { 882 ts.tv_nsec += 1000000000; 883 ts.tv_sec--; 884 } 885 if (ts.tv_sec >= 1) { 886 D("count %d min %d av %d", 887 count, min, av/count); 888 count = 0; 889 av = 0; 890 min = 100000000; 891 last_print = now; 892 } 893 } 894 return NULL; 895} 896 897 898/* 899 * reply to ping requests 900 */ 901static void * 902ponger_body(void *data) 903{ 904 struct targ *targ = (struct targ *) data; 905 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 906 struct netmap_if *nifp = targ->nmd->nifp; 907 struct netmap_ring *txring, *rxring; 908 int i, rx = 0, sent = 0, n = targ->g->npackets; 909 910 if (targ->g->nthreads > 1) { 911 D("can only reply ping with 1 thread"); 912 return NULL; 913 } 914 D("understood ponger %d but don't know how to do it", n); 915 while (n == 0 || sent < n) { 916 uint32_t txcur, txavail; 917//#define BUSYWAIT 918#ifdef BUSYWAIT 919 ioctl(pfd.fd, NIOCRXSYNC, NULL); 920#else 921 if (poll(&pfd, 1, 1000) <= 0) { 922 D("poll error/timeout on queue %d: %s", targ->me, 923 strerror(errno)); 924 continue; 925 } 926#endif 927 txring = NETMAP_TXRING(nifp, 0); 928 txcur = txring->cur; 929 txavail = nm_ring_space(txring); 930 /* see what we got back */ 931 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 932 rxring = NETMAP_RXRING(nifp, i); 933 while (!nm_ring_empty(rxring)) { 934 uint16_t *spkt, *dpkt; 935 uint32_t cur = rxring->cur; 936 struct netmap_slot *slot = &rxring->slot[cur]; 937 char *src, *dst; 938 src = NETMAP_BUF(rxring, slot->buf_idx); 939 //D("got pkt %p of size %d", src, slot->len); 940 rxring->head = rxring->cur = nm_ring_next(rxring, cur); 941 rx++; 942 if (txavail == 0) 943 continue; 944 dst = NETMAP_BUF(txring, 945 txring->slot[txcur].buf_idx); 946 /* copy... */ 947 dpkt = (uint16_t *)dst; 948 spkt = (uint16_t *)src; 949 nm_pkt_copy(src, dst, slot->len); 950 dpkt[0] = spkt[3]; 951 dpkt[1] = spkt[4]; 952 dpkt[2] = spkt[5]; 953 dpkt[3] = spkt[0]; 954 dpkt[4] = spkt[1]; 955 dpkt[5] = spkt[2]; 956 txring->slot[txcur].len = slot->len; 957 /* XXX swap src dst mac */ 958 txcur = nm_ring_next(txring, txcur); 959 txavail--; 960 sent++; 961 } 962 } 963 txring->head = txring->cur = txcur; 964 targ->count = sent; 965#ifdef BUSYWAIT 966 ioctl(pfd.fd, NIOCTXSYNC, NULL); 967#endif 968 //D("tx %d rx %d", sent, rx); 969 } 970 return NULL; 971} 972 973static __inline int 974timespec_ge(const struct timespec *a, const struct timespec *b) 975{ 976 977 if (a->tv_sec > b->tv_sec) 978 return (1); 979 if (a->tv_sec < b->tv_sec) 980 return (0); 981 if (a->tv_nsec >= b->tv_nsec) 982 return (1); 983 return (0); 984} 985 986static __inline struct timespec 987timeval2spec(const struct timeval *a) 988{ 989 struct timespec ts = { 990 .tv_sec = a->tv_sec, 991 .tv_nsec = a->tv_usec * 1000 992 }; 993 return ts; 994} 995 996static __inline struct timeval 997timespec2val(const struct timespec *a) 998{ 999 struct timeval tv = { 1000 .tv_sec = a->tv_sec, 1001 .tv_usec = a->tv_nsec / 1000 1002 }; 1003 return tv; 1004} 1005 1006 1007static __inline struct timespec 1008timespec_add(struct timespec a, struct timespec b) 1009{ 1010 struct timespec ret = { a.tv_sec + b.tv_sec, a.tv_nsec + b.tv_nsec }; 1011 if (ret.tv_nsec >= 1000000000) { 1012 ret.tv_sec++; 1013 ret.tv_nsec -= 1000000000; 1014 } 1015 return ret; 1016} 1017 1018static __inline struct timespec 1019timespec_sub(struct timespec a, struct timespec b) 1020{ 1021 struct timespec ret = { a.tv_sec - b.tv_sec, a.tv_nsec - b.tv_nsec }; 1022 if (ret.tv_nsec < 0) { 1023 ret.tv_sec--; 1024 ret.tv_nsec += 1000000000; 1025 } 1026 return ret; 1027} 1028 1029 1030/* 1031 * wait until ts, either busy or sleeping if more than 1ms. 1032 * Return wakeup time. 1033 */ 1034static struct timespec 1035wait_time(struct timespec ts) 1036{ 1037 for (;;) { 1038 struct timespec w, cur; 1039 clock_gettime(CLOCK_REALTIME_PRECISE, &cur); 1040 w = timespec_sub(ts, cur); 1041 if (w.tv_sec < 0) 1042 return cur; 1043 else if (w.tv_sec > 0 || w.tv_nsec > 1000000) 1044 poll(NULL, 0, 1); 1045 } 1046} 1047 1048static void * 1049sender_body(void *data) 1050{ 1051 struct targ *targ = (struct targ *) data; 1052 struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; 1053 struct netmap_if *nifp; 1054 struct netmap_ring *txring; 1055 int i, n = targ->g->npackets / targ->g->nthreads; 1056 int64_t sent = 0; 1057 int options = targ->g->options | OPT_COPY; 1058 struct timespec nexttime = { 0, 0}; // XXX silence compiler 1059 int rate_limit = targ->g->tx_rate; 1060 struct pkt *pkt = &targ->pkt; 1061 void *frame; 1062 int size; 1063 1064 if (targ->frame == NULL) { 1065 frame = pkt; 1066 frame += sizeof(pkt->vh) - targ->g->virt_header; 1067 size = targ->g->pkt_size + targ->g->virt_header; 1068 } else { 1069 frame = targ->frame; 1070 size = targ->g->pkt_size; 1071 } 1072 1073 D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd); 1074 if (setaffinity(targ->thread, targ->affinity)) 1075 goto quit; 1076 1077 /* main loop.*/ 1078 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1079 if (rate_limit) { 1080 targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); 1081 targ->tic.tv_nsec = 0; 1082 wait_time(targ->tic); 1083 nexttime = targ->tic; 1084 } 1085 if (targ->g->dev_type == DEV_TAP) { 1086 D("writing to file desc %d", targ->g->main_fd); 1087 1088 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1089 if (write(targ->g->main_fd, frame, size) != -1) 1090 sent++; 1091 update_addresses(pkt, targ->g); 1092 if (i > 10000) { 1093 targ->count = sent; 1094 i = 0; 1095 } 1096 } 1097#ifndef NO_PCAP 1098 } else if (targ->g->dev_type == DEV_PCAP) { 1099 pcap_t *p = targ->g->p; 1100 1101 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1102 if (pcap_inject(p, frame, size) != -1) 1103 sent++; 1104 update_addresses(pkt, targ->g); 1105 if (i > 10000) { 1106 targ->count = sent; 1107 i = 0; 1108 } 1109 } 1110#endif /* NO_PCAP */ 1111 } else { 1112 int tosend = 0; 1113 int frags = targ->g->frags; 1114 1115 nifp = targ->nmd->nifp; 1116 while (!targ->cancel && (n == 0 || sent < n)) { 1117 1118 if (rate_limit && tosend <= 0) { 1119 tosend = targ->g->burst; 1120 nexttime = timespec_add(nexttime, targ->g->tx_period); 1121 wait_time(nexttime); 1122 } 1123 1124 /* 1125 * wait for available room in the send queue(s) 1126 */ 1127 if (poll(&pfd, 1, 2000) <= 0) { 1128 if (targ->cancel) 1129 break; 1130 D("poll error/timeout on queue %d: %s", targ->me, 1131 strerror(errno)); 1132 // goto quit; 1133 } 1134 if (pfd.revents & POLLERR) { 1135 D("poll error"); 1136 goto quit; 1137 } 1138 /* 1139 * scan our queues and send on those with room 1140 */ 1141 if (options & OPT_COPY && sent > 100000 && !(targ->g->options & OPT_COPY) ) { 1142 D("drop copy"); 1143 options &= ~OPT_COPY; 1144 } 1145 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1146 int m, limit = rate_limit ? tosend : targ->g->burst; 1147 if (n > 0 && n - sent < limit) 1148 limit = n - sent; 1149 txring = NETMAP_TXRING(nifp, i); 1150 if (nm_ring_empty(txring)) 1151 continue; 1152 if (frags > 1) 1153 limit = ((limit + frags - 1) / frags) * frags; 1154 1155 m = send_packets(txring, pkt, frame, size, targ->g, 1156 limit, options, frags); 1157 ND("limit %d tail %d frags %d m %d", 1158 limit, txring->tail, frags, m); 1159 sent += m; 1160 targ->count = sent; 1161 if (rate_limit) { 1162 tosend -= m; 1163 if (tosend <= 0) 1164 break; 1165 } 1166 } 1167 } 1168 /* flush any remaining packets */ 1169 D("flush tail %d head %d on thread %p", 1170 txring->tail, txring->head, 1171 pthread_self()); 1172 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1173 1174 /* final part: wait all the TX queues to be empty. */ 1175 for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1176 txring = NETMAP_TXRING(nifp, i); 1177 while (nm_tx_pending(txring)) { 1178 RD(5, "pending tx tail %d head %d on ring %d", 1179 txring->tail, txring->head, i); 1180 ioctl(pfd.fd, NIOCTXSYNC, NULL); 1181 usleep(1); /* wait 1 tick */ 1182 } 1183 } 1184 } /* end DEV_NETMAP */ 1185 1186 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1187 targ->completed = 1; 1188 targ->count = sent; 1189 1190quit: 1191 /* reset the ``used`` flag. */ 1192 targ->used = 0; 1193 1194 return (NULL); 1195} 1196 1197 1198#ifndef NO_PCAP 1199static void 1200receive_pcap(u_char *user, const struct pcap_pkthdr * h, 1201 const u_char * bytes) 1202{ 1203 int *count = (int *)user; 1204 (void)h; /* UNUSED */ 1205 (void)bytes; /* UNUSED */ 1206 (*count)++; 1207} 1208#endif /* !NO_PCAP */ 1209 1210static int 1211receive_packets(struct netmap_ring *ring, u_int limit, int dump) 1212{ 1213 u_int cur, rx, n; 1214 1215 cur = ring->cur; 1216 n = nm_ring_space(ring); 1217 if (n < limit) 1218 limit = n; 1219 for (rx = 0; rx < limit; rx++) { 1220 struct netmap_slot *slot = &ring->slot[cur]; 1221 char *p = NETMAP_BUF(ring, slot->buf_idx); 1222 1223 if (dump) 1224 dump_payload(p, slot->len, ring, cur); 1225 1226 cur = nm_ring_next(ring, cur); 1227 } 1228 ring->head = ring->cur = cur; 1229 1230 return (rx); 1231} 1232 1233static void * 1234receiver_body(void *data) 1235{ 1236 struct targ *targ = (struct targ *) data; 1237 struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1238 struct netmap_if *nifp; 1239 struct netmap_ring *rxring; 1240 int i; 1241 uint64_t received = 0; 1242 1243 if (setaffinity(targ->thread, targ->affinity)) 1244 goto quit; 1245 1246 D("reading from %s fd %d main_fd %d", 1247 targ->g->ifname, targ->fd, targ->g->main_fd); 1248 /* unbounded wait for the first packet. */ 1249 for (;!targ->cancel;) { 1250 i = poll(&pfd, 1, 1000); 1251 if (i > 0 && !(pfd.revents & POLLERR)) 1252 break; 1253 RD(1, "waiting for initial packets, poll returns %d %d", 1254 i, pfd.revents); 1255 } 1256 /* main loop, exit after 1s silence */ 1257 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1258 if (targ->g->dev_type == DEV_TAP) { 1259 while (!targ->cancel) { 1260 char buf[MAX_BODYSIZE]; 1261 /* XXX should we poll ? */ 1262 if (read(targ->g->main_fd, buf, sizeof(buf)) > 0) 1263 targ->count++; 1264 } 1265#ifndef NO_PCAP 1266 } else if (targ->g->dev_type == DEV_PCAP) { 1267 while (!targ->cancel) { 1268 /* XXX should we poll ? */ 1269 pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, 1270 (u_char *)&targ->count); 1271 } 1272#endif /* !NO_PCAP */ 1273 } else { 1274 int dump = targ->g->options & OPT_DUMP; 1275 1276 nifp = targ->nmd->nifp; 1277 while (!targ->cancel) { 1278 /* Once we started to receive packets, wait at most 1 seconds 1279 before quitting. */ 1280 if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { 1281 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1282 targ->toc.tv_sec -= 1; /* Subtract timeout time. */ 1283 goto out; 1284 } 1285 1286 if (pfd.revents & POLLERR) { 1287 D("poll err"); 1288 goto quit; 1289 } 1290 1291 for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 1292 int m; 1293 1294 rxring = NETMAP_RXRING(nifp, i); 1295 if (nm_ring_empty(rxring)) 1296 continue; 1297 1298 m = receive_packets(rxring, targ->g->burst, dump); 1299 received += m; 1300 } 1301 targ->count = received; 1302 } 1303 } 1304 1305 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1306 1307out: 1308 targ->completed = 1; 1309 targ->count = received; 1310 1311quit: 1312 /* reset the ``used`` flag. */ 1313 targ->used = 0; 1314 1315 return (NULL); 1316} 1317 1318/* very crude code to print a number in normalized form. 1319 * Caller has to make sure that the buffer is large enough. 1320 */ 1321static const char * 1322norm(char *buf, double val) 1323{ 1324 char *units[] = { "", "K", "M", "G", "T" }; 1325 u_int i; 1326 1327 for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *) - 1; i++) 1328 val /= 1000; 1329 sprintf(buf, "%.2f %s", val, units[i]); 1330 return buf; 1331} 1332 1333static void 1334tx_output(uint64_t sent, int size, double delta) 1335{ 1336 double bw, raw_bw, pps; 1337 char b1[40], b2[80], b3[80]; 1338 1339 printf("Sent %llu packets, %d bytes each, in %.2f seconds.\n", 1340 (unsigned long long)sent, size, delta); 1341 if (delta == 0) 1342 delta = 1e-6; 1343 if (size < 60) /* correct for min packet size */ 1344 size = 60; 1345 pps = sent / delta; 1346 bw = (8.0 * size * sent) / delta; 1347 /* raw packets have4 bytes crc + 20 bytes framing */ 1348 raw_bw = (8.0 * (size + 24) * sent) / delta; 1349 1350 printf("Speed: %spps Bandwidth: %sbps (raw %sbps)\n", 1351 norm(b1, pps), norm(b2, bw), norm(b3, raw_bw) ); 1352} 1353 1354 1355static void 1356rx_output(uint64_t received, double delta) 1357{ 1358 double pps; 1359 char b1[40]; 1360 1361 printf("Received %llu packets, in %.2f seconds.\n", 1362 (unsigned long long) received, delta); 1363 1364 if (delta == 0) 1365 delta = 1e-6; 1366 pps = received / delta; 1367 printf("Speed: %spps\n", norm(b1, pps)); 1368} 1369 1370static void 1371usage(void) 1372{ 1373 const char *cmd = "pkt-gen"; 1374 fprintf(stderr, 1375 "Usage:\n" 1376 "%s arguments\n" 1377 "\t-i interface interface name\n" 1378 "\t-f function tx rx ping pong\n" 1379 "\t-n count number of iterations (can be 0)\n" 1380 "\t-t pkts_to_send also forces tx mode\n" 1381 "\t-r pkts_to_receive also forces rx mode\n" 1382 "\t-l pkt_size in bytes excluding CRC\n" 1383 "\t-d dst_ip[:port[-dst_ip:port]] single or range\n" 1384 "\t-s src_ip[:port[-src_ip:port]] single or range\n" 1385 "\t-D dst-mac\n" 1386 "\t-S src-mac\n" 1387 "\t-a cpu_id use setaffinity\n" 1388 "\t-b burst size testing, mostly\n" 1389 "\t-c cores cores to use\n" 1390 "\t-p threads processes/threads to use\n" 1391 "\t-T report_ms milliseconds between reports\n" 1392 "\t-P use libpcap instead of netmap\n" 1393 "\t-w wait_for_link_time in seconds\n" 1394 "\t-R rate in packets per second\n" 1395 "\t-X dump payload\n" 1396 "\t-H len add empty virtio-net-header with size 'len'\n" 1397 "\t-P file load packet from pcap file" 1398 "", 1399 cmd); 1400 1401 exit(0); 1402} 1403 1404static void 1405start_threads(struct glob_arg *g) 1406{ 1407 int i; 1408 1409 targs = calloc(g->nthreads, sizeof(*targs)); 1410 /* 1411 * Now create the desired number of threads, each one 1412 * using a single descriptor. 1413 */ 1414 for (i = 0; i < g->nthreads; i++) { 1415 struct targ *t = &targs[i]; 1416 1417 bzero(t, sizeof(*t)); 1418 t->fd = -1; /* default, with pcap */ 1419 t->g = g; 1420 1421 if (g->dev_type == DEV_NETMAP) { 1422 struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */ 1423 uint64_t nmd_flags = 0; 1424 nmd.self = &nmd; 1425 1426 if (g->nthreads > 1) { 1427 if (nmd.req.nr_flags != NR_REG_ALL_NIC) { 1428 D("invalid nthreads mode %d", nmd.req.nr_flags); 1429 continue; 1430 } 1431 nmd.req.nr_flags = NR_REG_ONE_NIC; 1432 nmd.req.nr_ringid = i; 1433 } 1434 /* Only touch one of the rings (rx is already ok) */ 1435 if (g->td_body == receiver_body) 1436 nmd_flags |= NETMAP_NO_TX_POLL; 1437 1438 /* register interface. Override ifname and ringid etc. */ 1439 if (g->options & OPT_MONITOR_TX) 1440 nmd.req.nr_flags |= NR_MONITOR_TX; 1441 if (g->options & OPT_MONITOR_RX) 1442 nmd.req.nr_flags |= NR_MONITOR_RX; 1443 1444 t->nmd = nm_open(t->g->ifname, NULL, nmd_flags | 1445 NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, &nmd); 1446 if (t->nmd == NULL) { 1447 D("Unable to open %s: %s", 1448 t->g->ifname, strerror(errno)); 1449 continue; 1450 } 1451 t->fd = t->nmd->fd; 1452 set_vnet_hdr_len(t); 1453 1454 } else { 1455 targs[i].fd = g->main_fd; 1456 } 1457 t->used = 1; 1458 t->me = i; 1459 if (g->affinity >= 0) { 1460 if (g->affinity < g->cpus) 1461 t->affinity = g->affinity; 1462 else 1463 t->affinity = i % g->cpus; 1464 } else { 1465 t->affinity = -1; 1466 } 1467 /* default, init packets */ 1468 initialize_packet(t); 1469 1470 if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) { 1471 D("Unable to create thread %d: %s", i, strerror(errno)); 1472 t->used = 0; 1473 } 1474 } 1475} 1476 1477static void 1478main_thread(struct glob_arg *g) 1479{ 1480 int i; 1481 1482 uint64_t prev = 0; 1483 uint64_t count = 0; 1484 double delta_t; 1485 struct timeval tic, toc; 1486 1487 gettimeofday(&toc, NULL); 1488 for (;;) { 1489 struct timeval now, delta; 1490 uint64_t pps, usec, my_count, npkts; 1491 int done = 0; 1492 1493 delta.tv_sec = g->report_interval/1000; 1494 delta.tv_usec = (g->report_interval%1000)*1000; 1495 select(0, NULL, NULL, NULL, &delta); 1496 gettimeofday(&now, NULL); 1497 timersub(&now, &toc, &toc); 1498 my_count = 0; 1499 for (i = 0; i < g->nthreads; i++) { 1500 my_count += targs[i].count; 1501 if (targs[i].used == 0) 1502 done++; 1503 } 1504 usec = toc.tv_sec* 1000000 + toc.tv_usec; 1505 if (usec < 10000) 1506 continue; 1507 npkts = my_count - prev; 1508 pps = (npkts*1000000 + usec/2) / usec; 1509 D("%llu pps (%llu pkts in %llu usec)", 1510 (unsigned long long)pps, 1511 (unsigned long long)npkts, 1512 (unsigned long long)usec); 1513 prev = my_count; 1514 toc = now; 1515 if (done == g->nthreads) 1516 break; 1517 } 1518 1519 timerclear(&tic); 1520 timerclear(&toc); 1521 for (i = 0; i < g->nthreads; i++) { 1522 struct timespec t_tic, t_toc; 1523 /* 1524 * Join active threads, unregister interfaces and close 1525 * file descriptors. 1526 */ 1527 if (targs[i].used) 1528 pthread_join(targs[i].thread, NULL); 1529 close(targs[i].fd); 1530 1531 if (targs[i].completed == 0) 1532 D("ouch, thread %d exited with error", i); 1533 1534 /* 1535 * Collect threads output and extract information about 1536 * how long it took to send all the packets. 1537 */ 1538 count += targs[i].count; 1539 t_tic = timeval2spec(&tic); 1540 t_toc = timeval2spec(&toc); 1541 if (!timerisset(&tic) || timespec_ge(&targs[i].tic, &t_tic)) 1542 tic = timespec2val(&targs[i].tic); 1543 if (!timerisset(&toc) || timespec_ge(&targs[i].toc, &t_toc)) 1544 toc = timespec2val(&targs[i].toc); 1545 } 1546 1547 /* print output. */ 1548 timersub(&toc, &tic, &toc); 1549 delta_t = toc.tv_sec + 1e-6* toc.tv_usec; 1550 if (g->td_body == sender_body) 1551 tx_output(count, g->pkt_size, delta_t); 1552 else 1553 rx_output(count, delta_t); 1554 1555 if (g->dev_type == DEV_NETMAP) { 1556 munmap(g->nmd->mem, g->nmd->req.nr_memsize); 1557 close(g->main_fd); 1558 } 1559} 1560 1561 1562struct sf { 1563 char *key; 1564 void *f; 1565}; 1566 1567static struct sf func[] = { 1568 { "tx", sender_body }, 1569 { "rx", receiver_body }, 1570 { "ping", pinger_body }, 1571 { "pong", ponger_body }, 1572 { NULL, NULL } 1573}; 1574 1575static int 1576tap_alloc(char *dev) 1577{ 1578 struct ifreq ifr; 1579 int fd, err; 1580 char *clonedev = TAP_CLONEDEV; 1581 1582 (void)err; 1583 (void)dev; 1584 /* Arguments taken by the function: 1585 * 1586 * char *dev: the name of an interface (or '\0'). MUST have enough 1587 * space to hold the interface name if '\0' is passed 1588 * int flags: interface flags (eg, IFF_TUN etc.) 1589 */ 1590 1591#ifdef __FreeBSD__ 1592 if (dev[3]) { /* tapSomething */ 1593 static char buf[128]; 1594 snprintf(buf, sizeof(buf), "/dev/%s", dev); 1595 clonedev = buf; 1596 } 1597#endif 1598 /* open the device */ 1599 if( (fd = open(clonedev, O_RDWR)) < 0 ) { 1600 return fd; 1601 } 1602 D("%s open successful", clonedev); 1603 1604 /* preparation of the struct ifr, of type "struct ifreq" */ 1605 memset(&ifr, 0, sizeof(ifr)); 1606 1607#ifdef linux 1608 ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 1609 1610 if (*dev) { 1611 /* if a device name was specified, put it in the structure; otherwise, 1612 * the kernel will try to allocate the "next" device of the 1613 * specified type */ 1614 strncpy(ifr.ifr_name, dev, IFNAMSIZ); 1615 } 1616 1617 /* try to create the device */ 1618 if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) { 1619 D("failed to to a TUNSETIFF: %s", strerror(errno)); 1620 close(fd); 1621 return err; 1622 } 1623 1624 /* if the operation was successful, write back the name of the 1625 * interface to the variable "dev", so the caller can know 1626 * it. Note that the caller MUST reserve space in *dev (see calling 1627 * code below) */ 1628 strcpy(dev, ifr.ifr_name); 1629 D("new name is %s", dev); 1630#endif /* linux */ 1631 1632 /* this is the special file descriptor that the caller will use to talk 1633 * with the virtual interface */ 1634 return fd; 1635} 1636 1637int 1638main(int arc, char **argv) 1639{ 1640 int i; 1641 1642 struct glob_arg g; 1643 1644 int ch; 1645 int wait_link = 2; 1646 int devqueues = 1; /* how many device queues */ 1647 1648 bzero(&g, sizeof(g)); 1649 1650 g.main_fd = -1; 1651 g.td_body = receiver_body; 1652 g.report_interval = 1000; /* report interval */ 1653 g.affinity = -1; 1654 /* ip addresses can also be a range x.x.x.x-x.x.x.y */ 1655 g.src_ip.name = "10.0.0.1"; 1656 g.dst_ip.name = "10.1.0.1"; 1657 g.dst_mac.name = "ff:ff:ff:ff:ff:ff"; 1658 g.src_mac.name = NULL; 1659 g.pkt_size = 60; 1660 g.burst = 512; // default 1661 g.nthreads = 1; 1662 g.cpus = 1; 1663 g.forever = 1; 1664 g.tx_rate = 0; 1665 g.frags = 1; 1666 g.nmr_config = ""; 1667 g.virt_header = 0; 1668 1669 while ( (ch = getopt(arc, argv, 1670 "a:f:F:n:i:Il:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:e:m:P:")) != -1) { 1671 struct sf *fn; 1672 1673 switch(ch) { 1674 default: 1675 D("bad option %c %s", ch, optarg); 1676 usage(); 1677 break; 1678 1679 case 'n': 1680 g.npackets = atoi(optarg); 1681 break; 1682 1683 case 'F': 1684 i = atoi(optarg); 1685 if (i < 1 || i > 63) { 1686 D("invalid frags %d [1..63], ignore", i); 1687 break; 1688 } 1689 g.frags = i; 1690 break; 1691 1692 case 'f': 1693 for (fn = func; fn->key; fn++) { 1694 if (!strcmp(fn->key, optarg)) 1695 break; 1696 } 1697 if (fn->key) 1698 g.td_body = fn->f; 1699 else 1700 D("unrecognised function %s", optarg); 1701 break; 1702 1703 case 'o': /* data generation options */ 1704 g.options = atoi(optarg); 1705 break; 1706 1707 case 'a': /* force affinity */ 1708 g.affinity = atoi(optarg); 1709 break; 1710 1711 case 'i': /* interface */ 1712 /* a prefix of tap: netmap: or pcap: forces the mode. 1713 * otherwise we guess 1714 */ 1715 D("interface is %s", optarg); 1716 if (strlen(optarg) > MAX_IFNAMELEN - 8) { 1717 D("ifname too long %s", optarg); 1718 break; 1719 } 1720 strcpy(g.ifname, optarg); 1721 if (!strcmp(optarg, "null")) { 1722 g.dev_type = DEV_NETMAP; 1723 g.dummy_send = 1; 1724 } else if (!strncmp(optarg, "tap:", 4)) { 1725 g.dev_type = DEV_TAP; 1726 strcpy(g.ifname, optarg + 4); 1727 } else if (!strncmp(optarg, "pcap:", 5)) { 1728 g.dev_type = DEV_PCAP; 1729 strcpy(g.ifname, optarg + 5); 1730 } else if (!strncmp(optarg, "netmap:", 7) || 1731 !strncmp(optarg, "vale", 4)) { 1732 g.dev_type = DEV_NETMAP; 1733 } else if (!strncmp(optarg, "tap", 3)) { 1734 g.dev_type = DEV_TAP; 1735 } else { /* prepend netmap: */ 1736 g.dev_type = DEV_NETMAP; 1737 sprintf(g.ifname, "netmap:%s", optarg); 1738 } 1739 break; 1740 1741 case 'I': 1742 g.options |= OPT_INDIRECT; /* XXX use indirect buffer */ 1743 break; 1744 1745 case 'l': /* pkt_size */ 1746 g.pkt_size = atoi(optarg); 1747 break; 1748 1749 case 'd': 1750 g.dst_ip.name = optarg; 1751 break; 1752 1753 case 's': 1754 g.src_ip.name = optarg; 1755 break; 1756 1757 case 'T': /* report interval */ 1758 g.report_interval = atoi(optarg); 1759 break; 1760 1761 case 'w': 1762 wait_link = atoi(optarg); 1763 break; 1764 1765 case 'W': /* XXX changed default */ 1766 g.forever = 0; /* do not exit rx even with no traffic */ 1767 break; 1768 1769 case 'b': /* burst */ 1770 g.burst = atoi(optarg); 1771 break; 1772 case 'c': 1773 g.cpus = atoi(optarg); 1774 break; 1775 case 'p': 1776 g.nthreads = atoi(optarg); 1777 break; 1778 1779 case 'D': /* destination mac */ 1780 g.dst_mac.name = optarg; 1781 break; 1782 1783 case 'S': /* source mac */ 1784 g.src_mac.name = optarg; 1785 break; 1786 case 'v': 1787 verbose++; 1788 break; 1789 case 'R': 1790 g.tx_rate = atoi(optarg); 1791 break; 1792 case 'X': 1793 g.options |= OPT_DUMP; 1794 break; 1795 case 'C': 1796 g.nmr_config = strdup(optarg); 1797 break; 1798 case 'H': 1799 g.virt_header = atoi(optarg); 1800 break; 1801 case 'e': /* extra bufs */ 1802 g.extra_bufs = atoi(optarg); 1803 break; 1804 case 'm': 1805 if (strcmp(optarg, "tx") == 0) { 1806 g.options |= OPT_MONITOR_TX; 1807 } else if (strcmp(optarg, "rx") == 0) { 1808 g.options |= OPT_MONITOR_RX; 1809 } else { 1810 D("unrecognized monitor mode %s", optarg); 1811 } 1812 break; 1813 case 'P': 1814 g.packet_file = strdup(optarg); 1815 break; 1816 } 1817 1818 } 1819 1820 if (g.ifname == NULL) { 1821 D("missing ifname"); 1822 usage(); 1823 } 1824 1825 i = system_ncpus(); 1826 if (g.cpus < 0 || g.cpus > i) { 1827 D("%d cpus is too high, have only %d cpus", g.cpus, i); 1828 usage(); 1829 } 1830 if (g.cpus == 0) 1831 g.cpus = i; 1832 1833 if (g.pkt_size < 16 || g.pkt_size > MAX_PKTSIZE) { 1834 D("bad pktsize %d [16..%d]\n", g.pkt_size, MAX_PKTSIZE); 1835 usage(); 1836 } 1837 1838 if (g.src_mac.name == NULL) { 1839 static char mybuf[20] = "00:00:00:00:00:00"; 1840 /* retrieve source mac address. */ 1841 if (source_hwaddr(g.ifname, mybuf) == -1) { 1842 D("Unable to retrieve source mac"); 1843 // continue, fail later 1844 } 1845 g.src_mac.name = mybuf; 1846 } 1847 /* extract address ranges */ 1848 extract_ip_range(&g.src_ip); 1849 extract_ip_range(&g.dst_ip); 1850 extract_mac_range(&g.src_mac); 1851 extract_mac_range(&g.dst_mac); 1852 1853 if (g.src_ip.start != g.src_ip.end || 1854 g.src_ip.port0 != g.src_ip.port1 || 1855 g.dst_ip.start != g.dst_ip.end || 1856 g.dst_ip.port0 != g.dst_ip.port1) 1857 g.options |= OPT_COPY; 1858 1859 if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1 1860 && g.virt_header != VIRT_HDR_2) { 1861 D("bad virtio-net-header length"); 1862 usage(); 1863 } 1864 1865 if (g.dev_type == DEV_TAP) { 1866 D("want to use tap %s", g.ifname); 1867 g.main_fd = tap_alloc(g.ifname); 1868 if (g.main_fd < 0) { 1869 D("cannot open tap %s", g.ifname); 1870 usage(); 1871 } 1872#ifndef NO_PCAP 1873 } else if (g.dev_type == DEV_PCAP) { 1874 char pcap_errbuf[PCAP_ERRBUF_SIZE]; 1875 1876 pcap_errbuf[0] = '\0'; // init the buffer 1877 g.p = pcap_open_live(g.ifname, 256 /* XXX */, 1, 100, pcap_errbuf); 1878 if (g.p == NULL) { 1879 D("cannot open pcap on %s", g.ifname); 1880 usage(); 1881 } 1882 g.main_fd = pcap_fileno(g.p); 1883 D("using pcap on %s fileno %d", g.ifname, g.main_fd); 1884#endif /* !NO_PCAP */ 1885 } else if (g.dummy_send) { /* but DEV_NETMAP */ 1886 D("using a dummy send routine"); 1887 } else { 1888 struct nmreq base_nmd; 1889 1890 bzero(&base_nmd, sizeof(base_nmd)); 1891 1892 parse_nmr_config(g.nmr_config, &base_nmd); 1893 if (g.extra_bufs) { 1894 base_nmd.nr_arg3 = g.extra_bufs; 1895 } 1896 1897 /* 1898 * Open the netmap device using nm_open(). 1899 * 1900 * protocol stack and may cause a reset of the card, 1901 * which in turn may take some time for the PHY to 1902 * reconfigure. We do the open here to have time to reset. 1903 */ 1904 g.nmd = nm_open(g.ifname, &base_nmd, 0, NULL); 1905 if (g.nmd == NULL) { 1906 D("Unable to open %s: %s", g.ifname, strerror(errno)); 1907 goto out; 1908 } 1909 g.main_fd = g.nmd->fd; 1910 D("mapped %dKB at %p", g.nmd->req.nr_memsize>>10, g.nmd->mem); 1911 1912 /* get num of queues in tx or rx */ 1913 if (g.td_body == sender_body) 1914 devqueues = g.nmd->req.nr_tx_rings; 1915 else 1916 devqueues = g.nmd->req.nr_rx_rings; 1917 1918 /* validate provided nthreads. */ 1919 if (g.nthreads < 1 || g.nthreads > devqueues) { 1920 D("bad nthreads %d, have %d queues", g.nthreads, devqueues); 1921 // continue, fail later 1922 } 1923 1924 if (verbose) { 1925 struct netmap_if *nifp = g.nmd->nifp; 1926 struct nmreq *req = &g.nmd->req; 1927 1928 D("nifp at offset %d, %d tx %d rx region %d", 1929 req->nr_offset, req->nr_tx_rings, req->nr_rx_rings, 1930 req->nr_arg2); 1931 for (i = 0; i <= req->nr_tx_rings; i++) { 1932 struct netmap_ring *ring = NETMAP_TXRING(nifp, i); 1933 D(" TX%d at 0x%lx slots %d", i, 1934 (char *)ring - (char *)nifp, ring->num_slots); 1935 } 1936 for (i = 0; i <= req->nr_rx_rings; i++) { 1937 struct netmap_ring *ring = NETMAP_RXRING(nifp, i); 1938 D(" RX%d at 0x%lx slots %d", i, 1939 (char *)ring - (char *)nifp, ring->num_slots); 1940 } 1941 } 1942 1943 /* Print some debug information. */ 1944 fprintf(stdout, 1945 "%s %s: %d queues, %d threads and %d cpus.\n", 1946 (g.td_body == sender_body) ? "Sending on" : "Receiving from", 1947 g.ifname, 1948 devqueues, 1949 g.nthreads, 1950 g.cpus); 1951 if (g.td_body == sender_body) { 1952 fprintf(stdout, "%s -> %s (%s -> %s)\n", 1953 g.src_ip.name, g.dst_ip.name, 1954 g.src_mac.name, g.dst_mac.name); 1955 } 1956 1957out: 1958 /* Exit if something went wrong. */ 1959 if (g.main_fd < 0) { 1960 D("aborting"); 1961 usage(); 1962 } 1963 } 1964 1965 1966 if (g.options) { 1967 D("--- SPECIAL OPTIONS:%s%s%s%s%s\n", 1968 g.options & OPT_PREFETCH ? " prefetch" : "", 1969 g.options & OPT_ACCESS ? " access" : "", 1970 g.options & OPT_MEMCPY ? " memcpy" : "", 1971 g.options & OPT_INDIRECT ? " indirect" : "", 1972 g.options & OPT_COPY ? " copy" : ""); 1973 } 1974 1975 g.tx_period.tv_sec = g.tx_period.tv_nsec = 0; 1976 if (g.tx_rate > 0) { 1977 /* try to have at least something every second, 1978 * reducing the burst size to some 0.01s worth of data 1979 * (but no less than one full set of fragments) 1980 */ 1981 uint64_t x; 1982 int lim = (g.tx_rate)/300; 1983 if (g.burst > lim) 1984 g.burst = lim; 1985 if (g.burst < g.frags) 1986 g.burst = g.frags; 1987 x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate; 1988 g.tx_period.tv_nsec = x; 1989 g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000; 1990 g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000; 1991 } 1992 if (g.td_body == sender_body) 1993 D("Sending %d packets every %ld.%09ld s", 1994 g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec); 1995 /* Wait for PHY reset. */ 1996 D("Wait %d secs for phy reset", wait_link); 1997 sleep(wait_link); 1998 D("Ready..."); 1999 2000 /* Install ^C handler. */ 2001 global_nthreads = g.nthreads; 2002 signal(SIGINT, sigint_h); 2003 2004 start_threads(&g); 2005 main_thread(&g); 2006 return 0; 2007} 2008 2009/* end of file */ 2010