pci_virtio_net.c revision 250083
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: head/usr.sbin/bhyve/pci_virtio_net.c 250083 2013-04-30 00:36:16Z neel $ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: head/usr.sbin/bhyve/pci_virtio_net.c 250083 2013-04-30 00:36:16Z neel $"); 31 32#include <sys/param.h> 33#include <sys/linker_set.h> 34#include <sys/select.h> 35#include <sys/uio.h> 36#include <sys/ioctl.h> 37 38#include <errno.h> 39#include <fcntl.h> 40#include <stdio.h> 41#include <stdlib.h> 42#include <stdint.h> 43#include <string.h> 44#include <strings.h> 45#include <unistd.h> 46#include <assert.h> 47#include <md5.h> 48#include <pthread.h> 49#include <pthread_np.h> 50 51#include "bhyverun.h" 52#include "pci_emul.h" 53#include "mevent.h" 54#include "virtio.h" 55 56#define VTNET_RINGSZ 1024 57 58#define VTNET_MAXSEGS 32 59 60/* 61 * PCI config-space register offsets 62 */ 63#define VTNET_R_CFG0 24 64#define VTNET_R_CFG1 25 65#define VTNET_R_CFG2 26 66#define VTNET_R_CFG3 27 67#define VTNET_R_CFG4 28 68#define VTNET_R_CFG5 29 69#define VTNET_R_CFG6 30 70#define VTNET_R_CFG7 31 71#define VTNET_R_MAX 31 72 73#define VTNET_REGSZ VTNET_R_MAX+1 74 75/* 76 * Host capabilities 77 */ 78#define VTNET_S_HOSTCAPS \ 79 ( 0x00000020 | /* host supplies MAC */ \ 80 0x00008000 | /* host can merge Rx buffers */ \ 81 0x00010000 ) /* config status available */ 82 83/* 84 * Queue definitions. 85 */ 86#define VTNET_RXQ 0 87#define VTNET_TXQ 1 88#define VTNET_CTLQ 2 89 90#define VTNET_MAXQ 3 91 92static int use_msix = 1; 93 94struct vring_hqueue { 95 /* Internal state */ 96 uint16_t hq_size; 97 uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */ 98 99 /* Host-context pointers to the queue */ 100 struct virtio_desc *hq_dtable; 101 uint16_t *hq_avail_flags; 102 uint16_t *hq_avail_idx; /* monotonically increasing */ 103 uint16_t *hq_avail_ring; 104 105 uint16_t *hq_used_flags; 106 uint16_t *hq_used_idx; /* monotonically increasing */ 107 struct virtio_used *hq_used_ring; 108}; 109 110/* 111 * Fixed network header size 112 */ 113struct virtio_net_rxhdr { 114 uint8_t vrh_flags; 115 uint8_t vrh_gso_type; 116 uint16_t vrh_hdr_len; 117 uint16_t vrh_gso_size; 118 uint16_t vrh_csum_start; 119 uint16_t vrh_csum_offset; 120 uint16_t vrh_bufs; 121} __packed; 122 123/* 124 * Debug printf 125 */ 126static int pci_vtnet_debug; 127#define DPRINTF(params) if (pci_vtnet_debug) printf params 128#define WPRINTF(params) printf params 129 130/* 131 * Per-device softc 132 */ 133struct pci_vtnet_softc { 134 struct pci_devinst *vsc_pi; 135 pthread_mutex_t vsc_mtx; 136 struct mevent *vsc_mevp; 137 138 int vsc_curq; 139 int vsc_status; 140 int vsc_isr; 141 int vsc_tapfd; 142 int vsc_rx_ready; 143 int resetting; 144 145 uint32_t vsc_features; 146 uint8_t vsc_macaddr[6]; 147 148 uint64_t vsc_pfn[VTNET_MAXQ]; 149 struct vring_hqueue vsc_hq[VTNET_MAXQ]; 150 uint16_t vsc_msix_table_idx[VTNET_MAXQ]; 151 152 pthread_mutex_t rx_mtx; 153 int rx_in_progress; 154 155 pthread_t tx_tid; 156 pthread_mutex_t tx_mtx; 157 pthread_cond_t tx_cond; 158 int tx_in_progress; 159}; 160#define vtnet_ctx(sc) ((sc)->vsc_pi->pi_vmctx) 161 162/* 163 * Return the size of IO BAR that maps virtio header and device specific 164 * region. The size would vary depending on whether MSI-X is enabled or 165 * not. 166 */ 167static uint64_t 168pci_vtnet_iosize(struct pci_devinst *pi) 169{ 170 if (pci_msix_enabled(pi)) 171 return (VTNET_REGSZ); 172 else 173 return (VTNET_REGSZ - (VTCFG_R_CFG1 - VTCFG_R_MSIX)); 174} 175 176/* 177 * Return the number of available descriptors in the vring taking care 178 * of the 16-bit index wraparound. 179 */ 180static int 181hq_num_avail(struct vring_hqueue *hq) 182{ 183 uint16_t ndesc; 184 185 /* 186 * We're just computing (a-b) mod 2^16 187 * 188 * The only glitch here is that in standard C, 189 * uint16_t promotes to (signed) int when int has 190 * more than 16 bits (pretty much always now), so 191 * we have to force it back to unsigned. 192 */ 193 ndesc = (unsigned)*hq->hq_avail_idx - (unsigned)hq->hq_cur_aidx; 194 195 assert(ndesc <= hq->hq_size); 196 197 return (ndesc); 198} 199 200static uint16_t 201pci_vtnet_qsize(int qnum) 202{ 203 /* XXX no ctl queue currently */ 204 if (qnum == VTNET_CTLQ) { 205 return (0); 206 } 207 208 /* XXX fixed currently. Maybe different for tx/rx/ctl */ 209 return (VTNET_RINGSZ); 210} 211 212static void 213pci_vtnet_ring_reset(struct pci_vtnet_softc *sc, int ring) 214{ 215 struct vring_hqueue *hq; 216 217 assert(ring < VTNET_MAXQ); 218 219 hq = &sc->vsc_hq[ring]; 220 221 /* 222 * Reset all soft state 223 */ 224 hq->hq_cur_aidx = 0; 225} 226 227/* 228 * If the transmit thread is active then stall until it is done. 229 */ 230static void 231pci_vtnet_txwait(struct pci_vtnet_softc *sc) 232{ 233 234 pthread_mutex_lock(&sc->tx_mtx); 235 while (sc->tx_in_progress) { 236 pthread_mutex_unlock(&sc->tx_mtx); 237 usleep(10000); 238 pthread_mutex_lock(&sc->tx_mtx); 239 } 240 pthread_mutex_unlock(&sc->tx_mtx); 241} 242 243/* 244 * If the receive thread is active then stall until it is done. 245 */ 246static void 247pci_vtnet_rxwait(struct pci_vtnet_softc *sc) 248{ 249 250 pthread_mutex_lock(&sc->rx_mtx); 251 while (sc->rx_in_progress) { 252 pthread_mutex_unlock(&sc->rx_mtx); 253 usleep(10000); 254 pthread_mutex_lock(&sc->rx_mtx); 255 } 256 pthread_mutex_unlock(&sc->rx_mtx); 257} 258 259static void 260pci_vtnet_update_status(struct pci_vtnet_softc *sc, uint32_t value) 261{ 262 263 if (value == 0) { 264 DPRINTF(("vtnet: device reset requested !\n")); 265 266 sc->resetting = 1; 267 268 /* 269 * Wait for the transmit and receive threads to finish their 270 * processing. 271 */ 272 pci_vtnet_txwait(sc); 273 pci_vtnet_rxwait(sc); 274 275 sc->vsc_rx_ready = 0; 276 pci_vtnet_ring_reset(sc, VTNET_RXQ); 277 pci_vtnet_ring_reset(sc, VTNET_TXQ); 278 279 sc->resetting = 0; 280 } 281 282 sc->vsc_status = value; 283} 284 285/* 286 * Called to send a buffer chain out to the tap device 287 */ 288static void 289pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, 290 int len) 291{ 292 char pad[60]; 293 294 if (sc->vsc_tapfd == -1) 295 return; 296 297 /* 298 * If the length is < 60, pad out to that and add the 299 * extra zero'd segment to the iov. It is guaranteed that 300 * there is always an extra iov available by the caller. 301 */ 302 if (len < 60) { 303 memset(pad, 0, 60 - len); 304 iov[iovcnt].iov_base = pad; 305 iov[iovcnt].iov_len = 60 - len; 306 iovcnt++; 307 } 308 (void) writev(sc->vsc_tapfd, iov, iovcnt); 309} 310 311/* 312 * Called when there is read activity on the tap file descriptor. 313 * Each buffer posted by the guest is assumed to be able to contain 314 * an entire ethernet frame + rx header. 315 * MP note: the dummybuf is only used for discarding frames, so there 316 * is no need for it to be per-vtnet or locked. 317 */ 318static uint8_t dummybuf[2048]; 319 320static void 321pci_vtnet_tap_rx(struct pci_vtnet_softc *sc) 322{ 323 struct virtio_desc *vd; 324 struct virtio_used *vu; 325 struct vring_hqueue *hq; 326 struct virtio_net_rxhdr *vrx; 327 uint8_t *buf; 328 int i; 329 int len; 330 int ndescs; 331 int didx, uidx, aidx; /* descriptor, avail and used index */ 332 333 /* 334 * Should never be called without a valid tap fd 335 */ 336 assert(sc->vsc_tapfd != -1); 337 338 /* 339 * But, will be called when the rx ring hasn't yet 340 * been set up or the guest is resetting the device. 341 */ 342 if (!sc->vsc_rx_ready || sc->resetting) { 343 /* 344 * Drop the packet and try later. 345 */ 346 (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); 347 return; 348 } 349 350 /* 351 * Calculate the number of available rx buffers 352 */ 353 hq = &sc->vsc_hq[VTNET_RXQ]; 354 355 ndescs = hq_num_avail(hq); 356 357 if (ndescs == 0) { 358 /* 359 * Drop the packet and try later 360 */ 361 (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); 362 return; 363 } 364 365 aidx = hq->hq_cur_aidx; 366 uidx = *hq->hq_used_idx; 367 for (i = 0; i < ndescs; i++) { 368 /* 369 * 'aidx' indexes into the an array of descriptor indexes 370 */ 371 didx = hq->hq_avail_ring[aidx % hq->hq_size]; 372 assert(didx >= 0 && didx < hq->hq_size); 373 374 vd = &hq->hq_dtable[didx]; 375 376 /* 377 * Get a pointer to the rx header, and use the 378 * data immediately following it for the packet buffer. 379 */ 380 vrx = paddr_guest2host(vtnet_ctx(sc), vd->vd_addr, vd->vd_len); 381 buf = (uint8_t *)(vrx + 1); 382 383 len = read(sc->vsc_tapfd, buf, 384 vd->vd_len - sizeof(struct virtio_net_rxhdr)); 385 386 if (len < 0 && errno == EWOULDBLOCK) { 387 break; 388 } 389 390 /* 391 * The only valid field in the rx packet header is the 392 * number of buffers, which is always 1 without TSO 393 * support. 394 */ 395 memset(vrx, 0, sizeof(struct virtio_net_rxhdr)); 396 vrx->vrh_bufs = 1; 397 398 /* 399 * Write this descriptor into the used ring 400 */ 401 vu = &hq->hq_used_ring[uidx % hq->hq_size]; 402 vu->vu_idx = didx; 403 vu->vu_tlen = len + sizeof(struct virtio_net_rxhdr); 404 uidx++; 405 aidx++; 406 } 407 408 /* 409 * Update the used pointer, and signal an interrupt if allowed 410 */ 411 *hq->hq_used_idx = uidx; 412 hq->hq_cur_aidx = aidx; 413 414 if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) { 415 if (use_msix) { 416 pci_generate_msix(sc->vsc_pi, 417 sc->vsc_msix_table_idx[VTNET_RXQ]); 418 } else { 419 sc->vsc_isr |= 1; 420 pci_generate_msi(sc->vsc_pi, 0); 421 } 422 } 423} 424 425static void 426pci_vtnet_tap_callback(int fd, enum ev_type type, void *param) 427{ 428 struct pci_vtnet_softc *sc = param; 429 430 pthread_mutex_lock(&sc->rx_mtx); 431 sc->rx_in_progress = 1; 432 pci_vtnet_tap_rx(sc); 433 sc->rx_in_progress = 0; 434 pthread_mutex_unlock(&sc->rx_mtx); 435 436} 437 438static void 439pci_vtnet_ping_rxq(struct pci_vtnet_softc *sc) 440{ 441 /* 442 * A qnotify means that the rx process can now begin 443 */ 444 if (sc->vsc_rx_ready == 0) { 445 sc->vsc_rx_ready = 1; 446 } 447} 448 449static void 450pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vring_hqueue *hq) 451{ 452 struct iovec iov[VTNET_MAXSEGS + 1]; 453 struct virtio_desc *vd; 454 struct virtio_used *vu; 455 int i; 456 int plen; 457 int tlen; 458 int uidx, aidx, didx; 459 460 uidx = *hq->hq_used_idx; 461 aidx = hq->hq_cur_aidx; 462 didx = hq->hq_avail_ring[aidx % hq->hq_size]; 463 assert(didx >= 0 && didx < hq->hq_size); 464 465 vd = &hq->hq_dtable[didx]; 466 467 /* 468 * Run through the chain of descriptors, ignoring the 469 * first header descriptor. However, include the header 470 * length in the total length that will be put into the 471 * used queue. 472 */ 473 tlen = vd->vd_len; 474 vd = &hq->hq_dtable[vd->vd_next]; 475 476 for (i = 0, plen = 0; 477 i < VTNET_MAXSEGS; 478 i++, vd = &hq->hq_dtable[vd->vd_next]) { 479 iov[i].iov_base = paddr_guest2host(vtnet_ctx(sc), 480 vd->vd_addr, vd->vd_len); 481 iov[i].iov_len = vd->vd_len; 482 plen += vd->vd_len; 483 tlen += vd->vd_len; 484 485 if ((vd->vd_flags & VRING_DESC_F_NEXT) == 0) 486 break; 487 } 488 assert(i < VTNET_MAXSEGS); 489 490 DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, i + 1)); 491 pci_vtnet_tap_tx(sc, iov, i + 1, plen); 492 493 /* 494 * Return this chain back to the host 495 */ 496 vu = &hq->hq_used_ring[uidx % hq->hq_size]; 497 vu->vu_idx = didx; 498 vu->vu_tlen = tlen; 499 hq->hq_cur_aidx = aidx + 1; 500 *hq->hq_used_idx = uidx + 1; 501} 502 503static void 504pci_vtnet_ping_txq(struct pci_vtnet_softc *sc) 505{ 506 struct vring_hqueue *hq = &sc->vsc_hq[VTNET_TXQ]; 507 int ndescs; 508 509 /* 510 * Calculate number of ring entries to process 511 */ 512 ndescs = hq_num_avail(hq); 513 514 if (ndescs == 0) 515 return; 516 517 /* Signal the tx thread for processing */ 518 pthread_mutex_lock(&sc->tx_mtx); 519 if (sc->tx_in_progress == 0) 520 pthread_cond_signal(&sc->tx_cond); 521 pthread_mutex_unlock(&sc->tx_mtx); 522} 523 524/* 525 * Thread which will handle processing of TX desc 526 */ 527static void * 528pci_vtnet_tx_thread(void *param) 529{ 530 struct pci_vtnet_softc *sc = (struct pci_vtnet_softc *) param; 531 struct vring_hqueue *hq; 532 int i, ndescs, needintr,error; 533 534 needintr = 0; 535 hq = &sc->vsc_hq[VTNET_TXQ]; 536 537 /* 538 * Let us wait till the tx queue pointers get initialised & 539 * first tx signaled 540 */ 541 pthread_mutex_lock(&sc->tx_mtx); 542 error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 543 assert(error == 0); 544 545 for (;;) { 546 pthread_mutex_lock(&sc->tx_mtx); 547 for (;;) { 548 if (sc->resetting) { 549 ndescs = 0; 550 needintr = 0; 551 } else 552 ndescs = hq_num_avail(hq); 553 554 if (ndescs != 0) 555 break; 556 557 if (needintr) { 558 /* 559 * Generate an interrupt if able 560 */ 561 if ((*hq->hq_avail_flags & 562 VRING_AVAIL_F_NO_INTERRUPT) == 0) { 563 if (use_msix) { 564 pci_generate_msix(sc->vsc_pi, 565 sc->vsc_msix_table_idx[VTNET_TXQ]); 566 } else { 567 sc->vsc_isr |= 1; 568 pci_generate_msi(sc->vsc_pi, 0); 569 } 570 } 571 } 572 needintr = 0; 573 sc->tx_in_progress = 0; 574 error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 575 assert(error == 0); 576 } 577 sc->tx_in_progress = 1; 578 pthread_mutex_unlock(&sc->tx_mtx); 579 580 while (ndescs > 0) { 581 /* 582 * Run through all the entries, placing them into 583 * iovecs and sending when an end-of-packet is found 584 */ 585 for (i = 0; i < ndescs; i++) 586 pci_vtnet_proctx(sc, hq); 587 needintr = 1; 588 ndescs = hq_num_avail(hq); 589 } 590 } 591} 592 593static void 594pci_vtnet_ping_ctlq(struct pci_vtnet_softc *sc) 595{ 596 597 DPRINTF(("vtnet: control qnotify!\n\r")); 598} 599 600static void 601pci_vtnet_ring_init(struct pci_vtnet_softc *sc, uint64_t pfn) 602{ 603 struct vring_hqueue *hq; 604 int qnum = sc->vsc_curq; 605 606 assert(qnum < VTNET_MAXQ); 607 608 sc->vsc_pfn[qnum] = pfn << VRING_PFN; 609 610 /* 611 * Set up host pointers to the various parts of the 612 * queue 613 */ 614 hq = &sc->vsc_hq[qnum]; 615 hq->hq_size = pci_vtnet_qsize(qnum); 616 617 hq->hq_dtable = paddr_guest2host(vtnet_ctx(sc), pfn << VRING_PFN, 618 vring_size(hq->hq_size)); 619 hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size); 620 hq->hq_avail_idx = hq->hq_avail_flags + 1; 621 hq->hq_avail_ring = hq->hq_avail_flags + 2; 622 hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring, 623 VRING_ALIGN); 624 hq->hq_used_idx = hq->hq_used_flags + 1; 625 hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2); 626 627 /* 628 * Initialize queue indexes 629 */ 630 hq->hq_cur_aidx = 0; 631} 632 633static int 634pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 635{ 636 MD5_CTX mdctx; 637 unsigned char digest[16]; 638 char nstr[80]; 639 char tname[MAXCOMLEN + 1]; 640 struct pci_vtnet_softc *sc; 641 const char *env_msi; 642 643 sc = malloc(sizeof(struct pci_vtnet_softc)); 644 memset(sc, 0, sizeof(struct pci_vtnet_softc)); 645 646 pi->pi_arg = sc; 647 sc->vsc_pi = pi; 648 649 pthread_mutex_init(&sc->vsc_mtx, NULL); 650 651 /* 652 * Use MSI if set by user 653 */ 654 if ((env_msi = getenv("BHYVE_USE_MSI")) != NULL) { 655 if (strcasecmp(env_msi, "yes") == 0) 656 use_msix = 0; 657 } 658 659 /* 660 * Attempt to open the tap device 661 */ 662 sc->vsc_tapfd = -1; 663 if (opts != NULL) { 664 char tbuf[80]; 665 666 strcpy(tbuf, "/dev/"); 667 strlcat(tbuf, opts, sizeof(tbuf)); 668 669 sc->vsc_tapfd = open(tbuf, O_RDWR); 670 if (sc->vsc_tapfd == -1) { 671 WPRINTF(("open of tap device %s failed\n", tbuf)); 672 } else { 673 /* 674 * Set non-blocking and register for read 675 * notifications with the event loop 676 */ 677 int opt = 1; 678 if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) { 679 WPRINTF(("tap device O_NONBLOCK failed\n")); 680 close(sc->vsc_tapfd); 681 sc->vsc_tapfd = -1; 682 } 683 684 sc->vsc_mevp = mevent_add(sc->vsc_tapfd, 685 EVF_READ, 686 pci_vtnet_tap_callback, 687 sc); 688 if (sc->vsc_mevp == NULL) { 689 WPRINTF(("Could not register event\n")); 690 close(sc->vsc_tapfd); 691 sc->vsc_tapfd = -1; 692 } 693 } 694 } 695 696 /* 697 * The MAC address is the standard NetApp OUI of 00-a0-98, 698 * followed by an MD5 of the vm name. The slot/func number is 699 * prepended to this for slots other than 1:0, so that 700 * a bootloader can netboot from the equivalent of slot 1. 701 */ 702 if (pi->pi_slot == 1 && pi->pi_func == 0) { 703 strncpy(nstr, vmname, sizeof(nstr)); 704 } else { 705 snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot, 706 pi->pi_func, vmname); 707 } 708 709 MD5Init(&mdctx); 710 MD5Update(&mdctx, nstr, strlen(nstr)); 711 MD5Final(digest, &mdctx); 712 713 sc->vsc_macaddr[0] = 0x00; 714 sc->vsc_macaddr[1] = 0xa0; 715 sc->vsc_macaddr[2] = 0x98; 716 sc->vsc_macaddr[3] = digest[0]; 717 sc->vsc_macaddr[4] = digest[1]; 718 sc->vsc_macaddr[5] = digest[2]; 719 720 /* initialize config space */ 721 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); 722 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 723 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); 724 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); 725 726 if (use_msix) { 727 /* MSI-X support */ 728 int i; 729 730 for (i = 0; i < VTNET_MAXQ; i++) 731 sc->vsc_msix_table_idx[i] = VIRTIO_MSI_NO_VECTOR; 732 733 /* 734 * BAR 1 used to map MSI-X table and PBA 735 */ 736 if (pci_emul_add_msixcap(pi, VTNET_MAXQ, 1)) 737 return (1); 738 } else { 739 /* MSI support */ 740 pci_emul_add_msicap(pi, 1); 741 } 742 743 pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTNET_REGSZ); 744 745 sc->resetting = 0; 746 747 sc->rx_in_progress = 0; 748 pthread_mutex_init(&sc->rx_mtx, NULL); 749 750 /* 751 * Initialize tx semaphore & spawn TX processing thread 752 * As of now, only one thread for TX desc processing is 753 * spawned. 754 */ 755 sc->tx_in_progress = 0; 756 pthread_mutex_init(&sc->tx_mtx, NULL); 757 pthread_cond_init(&sc->tx_cond, NULL); 758 pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc); 759 snprintf(tname, sizeof(tname), "%s vtnet%d tx", vmname, pi->pi_slot); 760 pthread_set_name_np(sc->tx_tid, tname); 761 762 return (0); 763} 764 765/* 766 * Function pointer array to handle queue notifications 767 */ 768static void (*pci_vtnet_qnotify[VTNET_MAXQ])(struct pci_vtnet_softc *) = { 769 pci_vtnet_ping_rxq, 770 pci_vtnet_ping_txq, 771 pci_vtnet_ping_ctlq 772}; 773 774static uint64_t 775vtnet_adjust_offset(struct pci_devinst *pi, uint64_t offset) 776{ 777 /* 778 * Device specific offsets used by guest would change based on 779 * whether MSI-X capability is enabled or not 780 */ 781 if (!pci_msix_enabled(pi)) { 782 if (offset >= VTCFG_R_MSIX) 783 return (offset + (VTCFG_R_CFG1 - VTCFG_R_MSIX)); 784 } 785 786 return (offset); 787} 788 789static void 790pci_vtnet_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 791 int baridx, uint64_t offset, int size, uint64_t value) 792{ 793 struct pci_vtnet_softc *sc = pi->pi_arg; 794 void *ptr; 795 796 if (use_msix) { 797 if (baridx == pci_msix_table_bar(pi) || 798 baridx == pci_msix_pba_bar(pi)) { 799 pci_emul_msix_twrite(pi, offset, size, value); 800 return; 801 } 802 } 803 804 assert(baridx == 0); 805 806 if (offset + size > pci_vtnet_iosize(pi)) { 807 DPRINTF(("vtnet_write: 2big, offset %ld size %d\n", 808 offset, size)); 809 return; 810 } 811 812 pthread_mutex_lock(&sc->vsc_mtx); 813 814 offset = vtnet_adjust_offset(pi, offset); 815 816 switch (offset) { 817 case VTCFG_R_GUESTCAP: 818 assert(size == 4); 819 sc->vsc_features = value & VTNET_S_HOSTCAPS; 820 break; 821 case VTCFG_R_PFN: 822 assert(size == 4); 823 pci_vtnet_ring_init(sc, value); 824 break; 825 case VTCFG_R_QSEL: 826 assert(size == 2); 827 assert(value < VTNET_MAXQ); 828 sc->vsc_curq = value; 829 break; 830 case VTCFG_R_QNOTIFY: 831 assert(size == 2); 832 assert(value < VTNET_MAXQ); 833 (*pci_vtnet_qnotify[value])(sc); 834 break; 835 case VTCFG_R_STATUS: 836 assert(size == 1); 837 pci_vtnet_update_status(sc, value); 838 break; 839 case VTCFG_R_CFGVEC: 840 assert(size == 2); 841 sc->vsc_msix_table_idx[VTNET_CTLQ] = value; 842 break; 843 case VTCFG_R_QVEC: 844 assert(size == 2); 845 assert(sc->vsc_curq != VTNET_CTLQ); 846 sc->vsc_msix_table_idx[sc->vsc_curq] = value; 847 break; 848 case VTNET_R_CFG0: 849 case VTNET_R_CFG1: 850 case VTNET_R_CFG2: 851 case VTNET_R_CFG3: 852 case VTNET_R_CFG4: 853 case VTNET_R_CFG5: 854 assert((size + offset) <= (VTNET_R_CFG5 + 1)); 855 ptr = &sc->vsc_macaddr[offset - VTNET_R_CFG0]; 856 /* 857 * The driver is allowed to change the MAC address 858 */ 859 sc->vsc_macaddr[offset - VTNET_R_CFG0] = value; 860 if (size == 1) { 861 *(uint8_t *) ptr = value; 862 } else if (size == 2) { 863 *(uint16_t *) ptr = value; 864 } else { 865 *(uint32_t *) ptr = value; 866 } 867 break; 868 case VTCFG_R_HOSTCAP: 869 case VTCFG_R_QNUM: 870 case VTCFG_R_ISR: 871 case VTNET_R_CFG6: 872 case VTNET_R_CFG7: 873 DPRINTF(("vtnet: write to readonly reg %ld\n\r", offset)); 874 break; 875 default: 876 DPRINTF(("vtnet: unknown i/o write offset %ld\n\r", offset)); 877 value = 0; 878 break; 879 } 880 881 pthread_mutex_unlock(&sc->vsc_mtx); 882} 883 884uint64_t 885pci_vtnet_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 886 int baridx, uint64_t offset, int size) 887{ 888 struct pci_vtnet_softc *sc = pi->pi_arg; 889 void *ptr; 890 uint64_t value; 891 892 if (use_msix) { 893 if (baridx == pci_msix_table_bar(pi) || 894 baridx == pci_msix_pba_bar(pi)) { 895 return (pci_emul_msix_tread(pi, offset, size)); 896 } 897 } 898 899 assert(baridx == 0); 900 901 if (offset + size > pci_vtnet_iosize(pi)) { 902 DPRINTF(("vtnet_read: 2big, offset %ld size %d\n", 903 offset, size)); 904 return (0); 905 } 906 907 pthread_mutex_lock(&sc->vsc_mtx); 908 909 offset = vtnet_adjust_offset(pi, offset); 910 911 switch (offset) { 912 case VTCFG_R_HOSTCAP: 913 assert(size == 4); 914 value = VTNET_S_HOSTCAPS; 915 break; 916 case VTCFG_R_GUESTCAP: 917 assert(size == 4); 918 value = sc->vsc_features; /* XXX never read ? */ 919 break; 920 case VTCFG_R_PFN: 921 assert(size == 4); 922 value = sc->vsc_pfn[sc->vsc_curq] >> VRING_PFN; 923 break; 924 case VTCFG_R_QNUM: 925 assert(size == 2); 926 value = pci_vtnet_qsize(sc->vsc_curq); 927 break; 928 case VTCFG_R_QSEL: 929 assert(size == 2); 930 value = sc->vsc_curq; /* XXX never read ? */ 931 break; 932 case VTCFG_R_QNOTIFY: 933 assert(size == 2); 934 value = sc->vsc_curq; /* XXX never read ? */ 935 break; 936 case VTCFG_R_STATUS: 937 assert(size == 1); 938 value = sc->vsc_status; 939 break; 940 case VTCFG_R_ISR: 941 assert(size == 1); 942 value = sc->vsc_isr; 943 sc->vsc_isr = 0; /* a read clears this flag */ 944 break; 945 case VTCFG_R_CFGVEC: 946 assert(size == 2); 947 value = sc->vsc_msix_table_idx[VTNET_CTLQ]; 948 break; 949 case VTCFG_R_QVEC: 950 assert(size == 2); 951 assert(sc->vsc_curq != VTNET_CTLQ); 952 value = sc->vsc_msix_table_idx[sc->vsc_curq]; 953 break; 954 case VTNET_R_CFG0: 955 case VTNET_R_CFG1: 956 case VTNET_R_CFG2: 957 case VTNET_R_CFG3: 958 case VTNET_R_CFG4: 959 case VTNET_R_CFG5: 960 assert((size + offset) <= (VTNET_R_CFG5 + 1)); 961 ptr = &sc->vsc_macaddr[offset - VTNET_R_CFG0]; 962 if (size == 1) { 963 value = *(uint8_t *) ptr; 964 } else if (size == 2) { 965 value = *(uint16_t *) ptr; 966 } else { 967 value = *(uint32_t *) ptr; 968 } 969 break; 970 case VTNET_R_CFG6: 971 assert(size != 4); 972 value = 0x01; /* XXX link always up */ 973 break; 974 case VTNET_R_CFG7: 975 assert(size == 1); 976 value = 0; /* XXX link status in LSB */ 977 break; 978 default: 979 DPRINTF(("vtnet: unknown i/o read offset %ld\n\r", offset)); 980 value = 0; 981 break; 982 } 983 984 pthread_mutex_unlock(&sc->vsc_mtx); 985 986 return (value); 987} 988 989struct pci_devemu pci_de_vnet = { 990 .pe_emu = "virtio-net", 991 .pe_init = pci_vtnet_init, 992 .pe_barwrite = pci_vtnet_write, 993 .pe_barread = pci_vtnet_read 994}; 995PCI_EMUL_SET(pci_de_vnet); 996