pci_virtio_net.c revision 250197
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: head/usr.sbin/bhyve/pci_virtio_net.c 250197 2013-05-03 01:16:18Z neel $ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: head/usr.sbin/bhyve/pci_virtio_net.c 250197 2013-05-03 01:16:18Z neel $"); 31 32#include <sys/param.h> 33#include <sys/linker_set.h> 34#include <sys/select.h> 35#include <sys/uio.h> 36#include <sys/ioctl.h> 37 38#include <errno.h> 39#include <fcntl.h> 40#include <stdio.h> 41#include <stdlib.h> 42#include <stdint.h> 43#include <string.h> 44#include <strings.h> 45#include <unistd.h> 46#include <assert.h> 47#include <md5.h> 48#include <pthread.h> 49#include <pthread_np.h> 50 51#include "bhyverun.h" 52#include "pci_emul.h" 53#include "mevent.h" 54#include "virtio.h" 55 56#define VTNET_RINGSZ 1024 57 58#define VTNET_MAXSEGS 32 59 60/* 61 * PCI config-space register offsets 62 */ 63#define VTNET_R_CFG0 24 64#define VTNET_R_CFG1 25 65#define VTNET_R_CFG2 26 66#define VTNET_R_CFG3 27 67#define VTNET_R_CFG4 28 68#define VTNET_R_CFG5 29 69#define VTNET_R_CFG6 30 70#define VTNET_R_CFG7 31 71#define VTNET_R_MAX 31 72 73#define VTNET_REGSZ VTNET_R_MAX+1 74 75/* 76 * Host capabilities 77 */ 78#define VTNET_S_HOSTCAPS \ 79 ( 0x00000020 | /* host supplies MAC */ \ 80 0x00008000 | /* host can merge Rx buffers */ \ 81 0x00010000 | /* config status available */ \ 82 VIRTIO_F_NOTIFY_ON_EMPTY) 83 84/* 85 * Queue definitions. 86 */ 87#define VTNET_RXQ 0 88#define VTNET_TXQ 1 89#define VTNET_CTLQ 2 90 91#define VTNET_MAXQ 3 92 93static int use_msix = 1; 94 95struct vring_hqueue { 96 /* Internal state */ 97 uint16_t hq_size; 98 uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */ 99 100 /* Host-context pointers to the queue */ 101 struct virtio_desc *hq_dtable; 102 uint16_t *hq_avail_flags; 103 uint16_t *hq_avail_idx; /* monotonically increasing */ 104 uint16_t *hq_avail_ring; 105 106 uint16_t *hq_used_flags; 107 uint16_t *hq_used_idx; /* monotonically increasing */ 108 struct virtio_used *hq_used_ring; 109}; 110 111/* 112 * Fixed network header size 113 */ 114struct virtio_net_rxhdr { 115 uint8_t vrh_flags; 116 uint8_t vrh_gso_type; 117 uint16_t vrh_hdr_len; 118 uint16_t vrh_gso_size; 119 uint16_t vrh_csum_start; 120 uint16_t vrh_csum_offset; 121 uint16_t vrh_bufs; 122} __packed; 123 124/* 125 * Debug printf 126 */ 127static int pci_vtnet_debug; 128#define DPRINTF(params) if (pci_vtnet_debug) printf params 129#define WPRINTF(params) printf params 130 131/* 132 * Per-device softc 133 */ 134struct pci_vtnet_softc { 135 struct pci_devinst *vsc_pi; 136 pthread_mutex_t vsc_mtx; 137 struct mevent *vsc_mevp; 138 139 int vsc_curq; 140 int vsc_status; 141 int vsc_isr; 142 int vsc_tapfd; 143 int vsc_rx_ready; 144 int resetting; 145 146 uint32_t vsc_features; 147 uint8_t vsc_macaddr[6]; 148 149 uint64_t vsc_pfn[VTNET_MAXQ]; 150 struct vring_hqueue vsc_hq[VTNET_MAXQ]; 151 uint16_t vsc_msix_table_idx[VTNET_MAXQ]; 152 153 pthread_mutex_t rx_mtx; 154 int rx_in_progress; 155 156 pthread_t tx_tid; 157 pthread_mutex_t tx_mtx; 158 pthread_cond_t tx_cond; 159 int tx_in_progress; 160}; 161#define vtnet_ctx(sc) ((sc)->vsc_pi->pi_vmctx) 162#define notify_on_empty(sc) ((sc)->vsc_features & VIRTIO_F_NOTIFY_ON_EMPTY) 163 164/* 165 * Return the size of IO BAR that maps virtio header and device specific 166 * region. The size would vary depending on whether MSI-X is enabled or 167 * not. 168 */ 169static uint64_t 170pci_vtnet_iosize(struct pci_devinst *pi) 171{ 172 if (pci_msix_enabled(pi)) 173 return (VTNET_REGSZ); 174 else 175 return (VTNET_REGSZ - (VTCFG_R_CFG1 - VTCFG_R_MSIX)); 176} 177 178/* 179 * Return the number of available descriptors in the vring taking care 180 * of the 16-bit index wraparound. 181 */ 182static int 183hq_num_avail(struct vring_hqueue *hq) 184{ 185 uint16_t ndesc; 186 187 /* 188 * We're just computing (a-b) mod 2^16 189 * 190 * The only glitch here is that in standard C, 191 * uint16_t promotes to (signed) int when int has 192 * more than 16 bits (pretty much always now), so 193 * we have to force it back to unsigned. 194 */ 195 ndesc = (unsigned)*hq->hq_avail_idx - (unsigned)hq->hq_cur_aidx; 196 197 assert(ndesc <= hq->hq_size); 198 199 return (ndesc); 200} 201 202static uint16_t 203pci_vtnet_qsize(int qnum) 204{ 205 /* XXX no ctl queue currently */ 206 if (qnum == VTNET_CTLQ) { 207 return (0); 208 } 209 210 /* XXX fixed currently. Maybe different for tx/rx/ctl */ 211 return (VTNET_RINGSZ); 212} 213 214static void 215pci_vtnet_ring_reset(struct pci_vtnet_softc *sc, int ring) 216{ 217 struct vring_hqueue *hq; 218 219 assert(ring < VTNET_MAXQ); 220 221 hq = &sc->vsc_hq[ring]; 222 223 /* 224 * Reset all soft state 225 */ 226 hq->hq_cur_aidx = 0; 227} 228 229/* 230 * If the transmit thread is active then stall until it is done. 231 */ 232static void 233pci_vtnet_txwait(struct pci_vtnet_softc *sc) 234{ 235 236 pthread_mutex_lock(&sc->tx_mtx); 237 while (sc->tx_in_progress) { 238 pthread_mutex_unlock(&sc->tx_mtx); 239 usleep(10000); 240 pthread_mutex_lock(&sc->tx_mtx); 241 } 242 pthread_mutex_unlock(&sc->tx_mtx); 243} 244 245/* 246 * If the receive thread is active then stall until it is done. 247 */ 248static void 249pci_vtnet_rxwait(struct pci_vtnet_softc *sc) 250{ 251 252 pthread_mutex_lock(&sc->rx_mtx); 253 while (sc->rx_in_progress) { 254 pthread_mutex_unlock(&sc->rx_mtx); 255 usleep(10000); 256 pthread_mutex_lock(&sc->rx_mtx); 257 } 258 pthread_mutex_unlock(&sc->rx_mtx); 259} 260 261static void 262pci_vtnet_update_status(struct pci_vtnet_softc *sc, uint32_t value) 263{ 264 int i; 265 266 if (value == 0) { 267 DPRINTF(("vtnet: device reset requested !\n")); 268 269 sc->resetting = 1; 270 271 /* 272 * Wait for the transmit and receive threads to finish their 273 * processing. 274 */ 275 pci_vtnet_txwait(sc); 276 pci_vtnet_rxwait(sc); 277 278 sc->vsc_rx_ready = 0; 279 pci_vtnet_ring_reset(sc, VTNET_RXQ); 280 pci_vtnet_ring_reset(sc, VTNET_TXQ); 281 282 for (i = 0; i < VTNET_MAXQ; i++) 283 sc->vsc_msix_table_idx[i] = VIRTIO_MSI_NO_VECTOR; 284 285 sc->vsc_isr = 0; 286 sc->vsc_features = 0; 287 288 sc->resetting = 0; 289 } 290 291 sc->vsc_status = value; 292} 293 294static void 295vtnet_generate_interrupt(struct pci_vtnet_softc *sc, int qidx) 296{ 297 298 if (use_msix) { 299 pci_generate_msix(sc->vsc_pi, sc->vsc_msix_table_idx[qidx]); 300 } else { 301 sc->vsc_isr |= 1; 302 pci_generate_msi(sc->vsc_pi, 0); 303 } 304} 305 306/* 307 * Called to send a buffer chain out to the tap device 308 */ 309static void 310pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, 311 int len) 312{ 313 char pad[60]; 314 315 if (sc->vsc_tapfd == -1) 316 return; 317 318 /* 319 * If the length is < 60, pad out to that and add the 320 * extra zero'd segment to the iov. It is guaranteed that 321 * there is always an extra iov available by the caller. 322 */ 323 if (len < 60) { 324 memset(pad, 0, 60 - len); 325 iov[iovcnt].iov_base = pad; 326 iov[iovcnt].iov_len = 60 - len; 327 iovcnt++; 328 } 329 (void) writev(sc->vsc_tapfd, iov, iovcnt); 330} 331 332/* 333 * Called when there is read activity on the tap file descriptor. 334 * Each buffer posted by the guest is assumed to be able to contain 335 * an entire ethernet frame + rx header. 336 * MP note: the dummybuf is only used for discarding frames, so there 337 * is no need for it to be per-vtnet or locked. 338 */ 339static uint8_t dummybuf[2048]; 340 341static void 342pci_vtnet_tap_rx(struct pci_vtnet_softc *sc) 343{ 344 struct virtio_desc *vd; 345 struct virtio_used *vu; 346 struct vring_hqueue *hq; 347 struct virtio_net_rxhdr *vrx; 348 uint8_t *buf; 349 int i; 350 int len; 351 int ndescs; 352 int didx, uidx, aidx; /* descriptor, avail and used index */ 353 354 /* 355 * Should never be called without a valid tap fd 356 */ 357 assert(sc->vsc_tapfd != -1); 358 359 /* 360 * But, will be called when the rx ring hasn't yet 361 * been set up or the guest is resetting the device. 362 */ 363 if (!sc->vsc_rx_ready || sc->resetting) { 364 /* 365 * Drop the packet and try later. 366 */ 367 (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); 368 return; 369 } 370 371 /* 372 * Calculate the number of available rx buffers 373 */ 374 hq = &sc->vsc_hq[VTNET_RXQ]; 375 376 ndescs = hq_num_avail(hq); 377 378 if (ndescs == 0) { 379 /* 380 * Drop the packet and try later 381 */ 382 (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); 383 384 if (notify_on_empty(sc)) 385 vtnet_generate_interrupt(sc, VTNET_RXQ); 386 387 return; 388 } 389 390 aidx = hq->hq_cur_aidx; 391 uidx = *hq->hq_used_idx; 392 for (i = 0; i < ndescs; i++) { 393 /* 394 * 'aidx' indexes into the an array of descriptor indexes 395 */ 396 didx = hq->hq_avail_ring[aidx % hq->hq_size]; 397 assert(didx >= 0 && didx < hq->hq_size); 398 399 vd = &hq->hq_dtable[didx]; 400 401 /* 402 * Get a pointer to the rx header, and use the 403 * data immediately following it for the packet buffer. 404 */ 405 vrx = paddr_guest2host(vtnet_ctx(sc), vd->vd_addr, vd->vd_len); 406 buf = (uint8_t *)(vrx + 1); 407 408 len = read(sc->vsc_tapfd, buf, 409 vd->vd_len - sizeof(struct virtio_net_rxhdr)); 410 411 if (len < 0 && errno == EWOULDBLOCK) { 412 break; 413 } 414 415 /* 416 * The only valid field in the rx packet header is the 417 * number of buffers, which is always 1 without TSO 418 * support. 419 */ 420 memset(vrx, 0, sizeof(struct virtio_net_rxhdr)); 421 vrx->vrh_bufs = 1; 422 423 /* 424 * Write this descriptor into the used ring 425 */ 426 vu = &hq->hq_used_ring[uidx % hq->hq_size]; 427 vu->vu_idx = didx; 428 vu->vu_tlen = len + sizeof(struct virtio_net_rxhdr); 429 uidx++; 430 aidx++; 431 } 432 433 /* 434 * Update the used pointer, and signal an interrupt if allowed 435 */ 436 *hq->hq_used_idx = uidx; 437 hq->hq_cur_aidx = aidx; 438 439 if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) 440 vtnet_generate_interrupt(sc, VTNET_RXQ); 441} 442 443static void 444pci_vtnet_tap_callback(int fd, enum ev_type type, void *param) 445{ 446 struct pci_vtnet_softc *sc = param; 447 448 pthread_mutex_lock(&sc->rx_mtx); 449 sc->rx_in_progress = 1; 450 pci_vtnet_tap_rx(sc); 451 sc->rx_in_progress = 0; 452 pthread_mutex_unlock(&sc->rx_mtx); 453 454} 455 456static void 457pci_vtnet_ping_rxq(struct pci_vtnet_softc *sc) 458{ 459 /* 460 * A qnotify means that the rx process can now begin 461 */ 462 if (sc->vsc_rx_ready == 0) { 463 sc->vsc_rx_ready = 1; 464 } 465} 466 467static void 468pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vring_hqueue *hq) 469{ 470 struct iovec iov[VTNET_MAXSEGS + 1]; 471 struct virtio_desc *vd; 472 struct virtio_used *vu; 473 int i; 474 int plen; 475 int tlen; 476 int uidx, aidx, didx; 477 478 uidx = *hq->hq_used_idx; 479 aidx = hq->hq_cur_aidx; 480 didx = hq->hq_avail_ring[aidx % hq->hq_size]; 481 assert(didx >= 0 && didx < hq->hq_size); 482 483 vd = &hq->hq_dtable[didx]; 484 485 /* 486 * Run through the chain of descriptors, ignoring the 487 * first header descriptor. However, include the header 488 * length in the total length that will be put into the 489 * used queue. 490 */ 491 tlen = vd->vd_len; 492 vd = &hq->hq_dtable[vd->vd_next]; 493 494 for (i = 0, plen = 0; 495 i < VTNET_MAXSEGS; 496 i++, vd = &hq->hq_dtable[vd->vd_next]) { 497 iov[i].iov_base = paddr_guest2host(vtnet_ctx(sc), 498 vd->vd_addr, vd->vd_len); 499 iov[i].iov_len = vd->vd_len; 500 plen += vd->vd_len; 501 tlen += vd->vd_len; 502 503 if ((vd->vd_flags & VRING_DESC_F_NEXT) == 0) 504 break; 505 } 506 assert(i < VTNET_MAXSEGS); 507 508 DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, i + 1)); 509 pci_vtnet_tap_tx(sc, iov, i + 1, plen); 510 511 /* 512 * Return this chain back to the host 513 */ 514 vu = &hq->hq_used_ring[uidx % hq->hq_size]; 515 vu->vu_idx = didx; 516 vu->vu_tlen = tlen; 517 hq->hq_cur_aidx = aidx + 1; 518 *hq->hq_used_idx = uidx + 1; 519} 520 521static void 522pci_vtnet_ping_txq(struct pci_vtnet_softc *sc) 523{ 524 struct vring_hqueue *hq = &sc->vsc_hq[VTNET_TXQ]; 525 int ndescs; 526 527 /* 528 * Calculate number of ring entries to process 529 */ 530 ndescs = hq_num_avail(hq); 531 532 if (ndescs == 0) 533 return; 534 535 /* Signal the tx thread for processing */ 536 pthread_mutex_lock(&sc->tx_mtx); 537 if (sc->tx_in_progress == 0) 538 pthread_cond_signal(&sc->tx_cond); 539 pthread_mutex_unlock(&sc->tx_mtx); 540} 541 542/* 543 * Thread which will handle processing of TX desc 544 */ 545static void * 546pci_vtnet_tx_thread(void *param) 547{ 548 struct pci_vtnet_softc *sc = (struct pci_vtnet_softc *) param; 549 struct vring_hqueue *hq; 550 int i, ndescs, error; 551 552 hq = &sc->vsc_hq[VTNET_TXQ]; 553 554 /* 555 * Let us wait till the tx queue pointers get initialised & 556 * first tx signaled 557 */ 558 pthread_mutex_lock(&sc->tx_mtx); 559 error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 560 assert(error == 0); 561 562 for (;;) { 563 pthread_mutex_lock(&sc->tx_mtx); 564 for (;;) { 565 if (sc->resetting) 566 ndescs = 0; 567 else 568 ndescs = hq_num_avail(hq); 569 570 if (ndescs != 0) 571 break; 572 573 sc->tx_in_progress = 0; 574 error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 575 assert(error == 0); 576 } 577 sc->tx_in_progress = 1; 578 pthread_mutex_unlock(&sc->tx_mtx); 579 580 while (ndescs > 0) { 581 /* 582 * Run through all the entries, placing them into 583 * iovecs and sending when an end-of-packet is found 584 */ 585 for (i = 0; i < ndescs; i++) 586 pci_vtnet_proctx(sc, hq); 587 588 ndescs = hq_num_avail(hq); 589 } 590 591 /* 592 * Generate an interrupt if needed. 593 */ 594 if (notify_on_empty(sc) || 595 (*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) 596 vtnet_generate_interrupt(sc, VTNET_TXQ); 597 } 598} 599 600static void 601pci_vtnet_ping_ctlq(struct pci_vtnet_softc *sc) 602{ 603 604 DPRINTF(("vtnet: control qnotify!\n\r")); 605} 606 607static void 608pci_vtnet_ring_init(struct pci_vtnet_softc *sc, uint64_t pfn) 609{ 610 struct vring_hqueue *hq; 611 int qnum = sc->vsc_curq; 612 613 assert(qnum < VTNET_MAXQ); 614 615 sc->vsc_pfn[qnum] = pfn << VRING_PFN; 616 617 /* 618 * Set up host pointers to the various parts of the 619 * queue 620 */ 621 hq = &sc->vsc_hq[qnum]; 622 hq->hq_size = pci_vtnet_qsize(qnum); 623 624 hq->hq_dtable = paddr_guest2host(vtnet_ctx(sc), pfn << VRING_PFN, 625 vring_size(hq->hq_size)); 626 hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size); 627 hq->hq_avail_idx = hq->hq_avail_flags + 1; 628 hq->hq_avail_ring = hq->hq_avail_flags + 2; 629 hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring, 630 VRING_ALIGN); 631 hq->hq_used_idx = hq->hq_used_flags + 1; 632 hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2); 633 634 /* 635 * Initialize queue indexes 636 */ 637 hq->hq_cur_aidx = 0; 638} 639 640static int 641pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 642{ 643 MD5_CTX mdctx; 644 unsigned char digest[16]; 645 char nstr[80]; 646 char tname[MAXCOMLEN + 1]; 647 struct pci_vtnet_softc *sc; 648 const char *env_msi; 649 650 sc = malloc(sizeof(struct pci_vtnet_softc)); 651 memset(sc, 0, sizeof(struct pci_vtnet_softc)); 652 653 pi->pi_arg = sc; 654 sc->vsc_pi = pi; 655 656 pthread_mutex_init(&sc->vsc_mtx, NULL); 657 658 /* 659 * Use MSI if set by user 660 */ 661 if ((env_msi = getenv("BHYVE_USE_MSI")) != NULL) { 662 if (strcasecmp(env_msi, "yes") == 0) 663 use_msix = 0; 664 } 665 666 /* 667 * Attempt to open the tap device 668 */ 669 sc->vsc_tapfd = -1; 670 if (opts != NULL) { 671 char tbuf[80]; 672 673 strcpy(tbuf, "/dev/"); 674 strlcat(tbuf, opts, sizeof(tbuf)); 675 676 sc->vsc_tapfd = open(tbuf, O_RDWR); 677 if (sc->vsc_tapfd == -1) { 678 WPRINTF(("open of tap device %s failed\n", tbuf)); 679 } else { 680 /* 681 * Set non-blocking and register for read 682 * notifications with the event loop 683 */ 684 int opt = 1; 685 if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) { 686 WPRINTF(("tap device O_NONBLOCK failed\n")); 687 close(sc->vsc_tapfd); 688 sc->vsc_tapfd = -1; 689 } 690 691 sc->vsc_mevp = mevent_add(sc->vsc_tapfd, 692 EVF_READ, 693 pci_vtnet_tap_callback, 694 sc); 695 if (sc->vsc_mevp == NULL) { 696 WPRINTF(("Could not register event\n")); 697 close(sc->vsc_tapfd); 698 sc->vsc_tapfd = -1; 699 } 700 } 701 } 702 703 /* 704 * The MAC address is the standard NetApp OUI of 00-a0-98, 705 * followed by an MD5 of the vm name. The slot/func number is 706 * prepended to this for slots other than 1:0, so that 707 * a bootloader can netboot from the equivalent of slot 1. 708 */ 709 if (pi->pi_slot == 1 && pi->pi_func == 0) { 710 strncpy(nstr, vmname, sizeof(nstr)); 711 } else { 712 snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot, 713 pi->pi_func, vmname); 714 } 715 716 MD5Init(&mdctx); 717 MD5Update(&mdctx, nstr, strlen(nstr)); 718 MD5Final(digest, &mdctx); 719 720 sc->vsc_macaddr[0] = 0x00; 721 sc->vsc_macaddr[1] = 0xa0; 722 sc->vsc_macaddr[2] = 0x98; 723 sc->vsc_macaddr[3] = digest[0]; 724 sc->vsc_macaddr[4] = digest[1]; 725 sc->vsc_macaddr[5] = digest[2]; 726 727 /* initialize config space */ 728 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); 729 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 730 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); 731 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); 732 733 if (use_msix) { 734 /* MSI-X support */ 735 int i; 736 737 for (i = 0; i < VTNET_MAXQ; i++) 738 sc->vsc_msix_table_idx[i] = VIRTIO_MSI_NO_VECTOR; 739 740 /* 741 * BAR 1 used to map MSI-X table and PBA 742 */ 743 if (pci_emul_add_msixcap(pi, VTNET_MAXQ, 1)) 744 return (1); 745 } else { 746 /* MSI support */ 747 pci_emul_add_msicap(pi, 1); 748 } 749 750 pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTNET_REGSZ); 751 752 sc->resetting = 0; 753 754 sc->rx_in_progress = 0; 755 pthread_mutex_init(&sc->rx_mtx, NULL); 756 757 /* 758 * Initialize tx semaphore & spawn TX processing thread 759 * As of now, only one thread for TX desc processing is 760 * spawned. 761 */ 762 sc->tx_in_progress = 0; 763 pthread_mutex_init(&sc->tx_mtx, NULL); 764 pthread_cond_init(&sc->tx_cond, NULL); 765 pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc); 766 snprintf(tname, sizeof(tname), "%s vtnet%d tx", vmname, pi->pi_slot); 767 pthread_set_name_np(sc->tx_tid, tname); 768 769 return (0); 770} 771 772/* 773 * Function pointer array to handle queue notifications 774 */ 775static void (*pci_vtnet_qnotify[VTNET_MAXQ])(struct pci_vtnet_softc *) = { 776 pci_vtnet_ping_rxq, 777 pci_vtnet_ping_txq, 778 pci_vtnet_ping_ctlq 779}; 780 781static uint64_t 782vtnet_adjust_offset(struct pci_devinst *pi, uint64_t offset) 783{ 784 /* 785 * Device specific offsets used by guest would change based on 786 * whether MSI-X capability is enabled or not 787 */ 788 if (!pci_msix_enabled(pi)) { 789 if (offset >= VTCFG_R_MSIX) 790 return (offset + (VTCFG_R_CFG1 - VTCFG_R_MSIX)); 791 } 792 793 return (offset); 794} 795 796static void 797pci_vtnet_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 798 int baridx, uint64_t offset, int size, uint64_t value) 799{ 800 struct pci_vtnet_softc *sc = pi->pi_arg; 801 void *ptr; 802 803 if (use_msix) { 804 if (baridx == pci_msix_table_bar(pi) || 805 baridx == pci_msix_pba_bar(pi)) { 806 pci_emul_msix_twrite(pi, offset, size, value); 807 return; 808 } 809 } 810 811 assert(baridx == 0); 812 813 if (offset + size > pci_vtnet_iosize(pi)) { 814 DPRINTF(("vtnet_write: 2big, offset %ld size %d\n", 815 offset, size)); 816 return; 817 } 818 819 pthread_mutex_lock(&sc->vsc_mtx); 820 821 offset = vtnet_adjust_offset(pi, offset); 822 823 switch (offset) { 824 case VTCFG_R_GUESTCAP: 825 assert(size == 4); 826 sc->vsc_features = value & VTNET_S_HOSTCAPS; 827 break; 828 case VTCFG_R_PFN: 829 assert(size == 4); 830 pci_vtnet_ring_init(sc, value); 831 break; 832 case VTCFG_R_QSEL: 833 assert(size == 2); 834 assert(value < VTNET_MAXQ); 835 sc->vsc_curq = value; 836 break; 837 case VTCFG_R_QNOTIFY: 838 assert(size == 2); 839 assert(value < VTNET_MAXQ); 840 (*pci_vtnet_qnotify[value])(sc); 841 break; 842 case VTCFG_R_STATUS: 843 assert(size == 1); 844 pci_vtnet_update_status(sc, value); 845 break; 846 case VTCFG_R_CFGVEC: 847 assert(size == 2); 848 sc->vsc_msix_table_idx[VTNET_CTLQ] = value; 849 break; 850 case VTCFG_R_QVEC: 851 assert(size == 2); 852 assert(sc->vsc_curq != VTNET_CTLQ); 853 sc->vsc_msix_table_idx[sc->vsc_curq] = value; 854 break; 855 case VTNET_R_CFG0: 856 case VTNET_R_CFG1: 857 case VTNET_R_CFG2: 858 case VTNET_R_CFG3: 859 case VTNET_R_CFG4: 860 case VTNET_R_CFG5: 861 assert((size + offset) <= (VTNET_R_CFG5 + 1)); 862 ptr = &sc->vsc_macaddr[offset - VTNET_R_CFG0]; 863 /* 864 * The driver is allowed to change the MAC address 865 */ 866 sc->vsc_macaddr[offset - VTNET_R_CFG0] = value; 867 if (size == 1) { 868 *(uint8_t *) ptr = value; 869 } else if (size == 2) { 870 *(uint16_t *) ptr = value; 871 } else { 872 *(uint32_t *) ptr = value; 873 } 874 break; 875 case VTCFG_R_HOSTCAP: 876 case VTCFG_R_QNUM: 877 case VTCFG_R_ISR: 878 case VTNET_R_CFG6: 879 case VTNET_R_CFG7: 880 DPRINTF(("vtnet: write to readonly reg %ld\n\r", offset)); 881 break; 882 default: 883 DPRINTF(("vtnet: unknown i/o write offset %ld\n\r", offset)); 884 value = 0; 885 break; 886 } 887 888 pthread_mutex_unlock(&sc->vsc_mtx); 889} 890 891uint64_t 892pci_vtnet_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 893 int baridx, uint64_t offset, int size) 894{ 895 struct pci_vtnet_softc *sc = pi->pi_arg; 896 void *ptr; 897 uint64_t value; 898 899 if (use_msix) { 900 if (baridx == pci_msix_table_bar(pi) || 901 baridx == pci_msix_pba_bar(pi)) { 902 return (pci_emul_msix_tread(pi, offset, size)); 903 } 904 } 905 906 assert(baridx == 0); 907 908 if (offset + size > pci_vtnet_iosize(pi)) { 909 DPRINTF(("vtnet_read: 2big, offset %ld size %d\n", 910 offset, size)); 911 return (0); 912 } 913 914 pthread_mutex_lock(&sc->vsc_mtx); 915 916 offset = vtnet_adjust_offset(pi, offset); 917 918 switch (offset) { 919 case VTCFG_R_HOSTCAP: 920 assert(size == 4); 921 value = VTNET_S_HOSTCAPS; 922 break; 923 case VTCFG_R_GUESTCAP: 924 assert(size == 4); 925 value = sc->vsc_features; /* XXX never read ? */ 926 break; 927 case VTCFG_R_PFN: 928 assert(size == 4); 929 value = sc->vsc_pfn[sc->vsc_curq] >> VRING_PFN; 930 break; 931 case VTCFG_R_QNUM: 932 assert(size == 2); 933 value = pci_vtnet_qsize(sc->vsc_curq); 934 break; 935 case VTCFG_R_QSEL: 936 assert(size == 2); 937 value = sc->vsc_curq; /* XXX never read ? */ 938 break; 939 case VTCFG_R_QNOTIFY: 940 assert(size == 2); 941 value = sc->vsc_curq; /* XXX never read ? */ 942 break; 943 case VTCFG_R_STATUS: 944 assert(size == 1); 945 value = sc->vsc_status; 946 break; 947 case VTCFG_R_ISR: 948 assert(size == 1); 949 value = sc->vsc_isr; 950 sc->vsc_isr = 0; /* a read clears this flag */ 951 break; 952 case VTCFG_R_CFGVEC: 953 assert(size == 2); 954 value = sc->vsc_msix_table_idx[VTNET_CTLQ]; 955 break; 956 case VTCFG_R_QVEC: 957 assert(size == 2); 958 assert(sc->vsc_curq != VTNET_CTLQ); 959 value = sc->vsc_msix_table_idx[sc->vsc_curq]; 960 break; 961 case VTNET_R_CFG0: 962 case VTNET_R_CFG1: 963 case VTNET_R_CFG2: 964 case VTNET_R_CFG3: 965 case VTNET_R_CFG4: 966 case VTNET_R_CFG5: 967 assert((size + offset) <= (VTNET_R_CFG5 + 1)); 968 ptr = &sc->vsc_macaddr[offset - VTNET_R_CFG0]; 969 if (size == 1) { 970 value = *(uint8_t *) ptr; 971 } else if (size == 2) { 972 value = *(uint16_t *) ptr; 973 } else { 974 value = *(uint32_t *) ptr; 975 } 976 break; 977 case VTNET_R_CFG6: 978 assert(size != 4); 979 value = 0x01; /* XXX link always up */ 980 break; 981 case VTNET_R_CFG7: 982 assert(size == 1); 983 value = 0; /* XXX link status in LSB */ 984 break; 985 default: 986 DPRINTF(("vtnet: unknown i/o read offset %ld\n\r", offset)); 987 value = 0; 988 break; 989 } 990 991 pthread_mutex_unlock(&sc->vsc_mtx); 992 993 return (value); 994} 995 996struct pci_devemu pci_de_vnet = { 997 .pe_emu = "virtio-net", 998 .pe_init = pci_vtnet_init, 999 .pe_barwrite = pci_vtnet_write, 1000 .pe_barread = pci_vtnet_read 1001}; 1002PCI_EMUL_SET(pci_de_vnet); 1003