pci_virtio_net.c revision 249917
1141104Sharti/*- 294589Sobrien * Copyright (c) 2011 NetApp, Inc. 394589Sobrien * All rights reserved. 45814Sjkh * 51590Srgrimes * Redistribution and use in source and binary forms, with or without 61590Srgrimes * modification, are permitted provided that the following conditions 71590Srgrimes * are met: 81590Srgrimes * 1. Redistributions of source code must retain the above copyright 91590Srgrimes * notice, this list of conditions and the following disclaimer. 101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111590Srgrimes * notice, this list of conditions and the following disclaimer in the 121590Srgrimes * documentation and/or other materials provided with the distribution. 131590Srgrimes * 141590Srgrimes * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 151590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 161590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 171590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 181590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 191590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 201590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 211590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 221590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 231590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 241590Srgrimes * SUCH DAMAGE. 251590Srgrimes * 261590Srgrimes * $FreeBSD: head/usr.sbin/bhyve/pci_virtio_net.c 249917 2013-04-26 05:13:48Z grehan $ 271590Srgrimes */ 281590Srgrimes 291590Srgrimes#include <sys/cdefs.h> 301590Srgrimes__FBSDID("$FreeBSD: head/usr.sbin/bhyve/pci_virtio_net.c 249917 2013-04-26 05:13:48Z grehan $"); 311590Srgrimes 321590Srgrimes#include <sys/param.h> 331590Srgrimes#include <sys/linker_set.h> 341590Srgrimes#include <sys/select.h> 351590Srgrimes#include <sys/uio.h> 361590Srgrimes#include <sys/ioctl.h> 371590Srgrimes 3862833Swsanchez#include <errno.h> 3962833Swsanchez#include <fcntl.h> 401590Srgrimes#include <stdio.h> 411590Srgrimes#include <stdlib.h> 4262833Swsanchez#include <stdint.h> 4394587Sobrien#include <string.h> 441590Srgrimes#include <strings.h> 4535483Simp#include <unistd.h> 46103503Sjmallett#include <assert.h> 4735483Simp#include <md5.h> 4835483Simp#include <pthread.h> 491590Srgrimes#include <pthread_np.h> 501590Srgrimes 511590Srgrimes#include "bhyverun.h" 521590Srgrimes#include "pci_emul.h" 531590Srgrimes#include "mevent.h" 541590Srgrimes#include "virtio.h" 551590Srgrimes 561590Srgrimes#define VTNET_RINGSZ 1024 571590Srgrimes 581590Srgrimes#define VTNET_MAXSEGS 32 591590Srgrimes 601590Srgrimes/* 611590Srgrimes * PCI config-space register offsets 621590Srgrimes */ 631590Srgrimes#define VTNET_R_CFG0 24 641590Srgrimes#define VTNET_R_CFG1 25 651590Srgrimes#define VTNET_R_CFG2 26 661590Srgrimes#define VTNET_R_CFG3 27 671590Srgrimes#define VTNET_R_CFG4 28 681590Srgrimes#define VTNET_R_CFG5 29 691590Srgrimes#define VTNET_R_CFG6 30 701590Srgrimes#define VTNET_R_CFG7 31 711590Srgrimes#define VTNET_R_MAX 31 721590Srgrimes 731590Srgrimes#define VTNET_REGSZ VTNET_R_MAX+1 741590Srgrimes 751590Srgrimes/* 761590Srgrimes * Host capabilities 771590Srgrimes */ 781590Srgrimes#define VTNET_S_HOSTCAPS \ 791590Srgrimes ( 0x00000020 | /* host supplies MAC */ \ 801590Srgrimes 0x00008000 | /* host can merge Rx buffers */ \ 811590Srgrimes 0x00010000 ) /* config status available */ 821590Srgrimes 831590Srgrimes/* 841590Srgrimes * Queue definitions. 851590Srgrimes */ 8694594Sobrien#define VTNET_RXQ 0 871590Srgrimes#define VTNET_TXQ 1 881590Srgrimes#define VTNET_CTLQ 2 891590Srgrimes 901590Srgrimes#define VTNET_MAXQ 3 911590Srgrimes 921590Srgrimesstatic int use_msix = 1; 931590Srgrimes 941590Srgrimesstruct vring_hqueue { 951590Srgrimes /* Internal state */ 961590Srgrimes uint16_t hq_size; 971590Srgrimes uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */ 981590Srgrimes 991590Srgrimes /* Host-context pointers to the queue */ 1001590Srgrimes struct virtio_desc *hq_dtable; 1011590Srgrimes uint16_t *hq_avail_flags; 1021590Srgrimes uint16_t *hq_avail_idx; /* monotonically increasing */ 1031590Srgrimes uint16_t *hq_avail_ring; 1041590Srgrimes 1051590Srgrimes uint16_t *hq_used_flags; 106141104Sharti uint16_t *hq_used_idx; /* monotonically increasing */ 1071590Srgrimes struct virtio_used *hq_used_ring; 108107447Sru}; 109104475Sphk 110107447Sru/* 1111590Srgrimes * Fixed network header size 112141104Sharti */ 11394506Scharnierstruct virtio_net_rxhdr { 1145814Sjkh uint8_t vrh_flags; 1151590Srgrimes uint8_t vrh_gso_type; 1165814Sjkh uint16_t vrh_hdr_len; 117141104Sharti uint16_t vrh_gso_size; 11880381Ssheldonh uint16_t vrh_csum_start; 11994506Scharnier uint16_t vrh_csum_offset; 120141104Sharti uint16_t vrh_bufs; 121141104Sharti} __packed; 122142457Sharti 123141104Sharti/* 1241590Srgrimes * Debug printf 125141104Sharti */ 126141104Shartistatic int pci_vtnet_debug; 1271590Srgrimes#define DPRINTF(params) if (pci_vtnet_debug) printf params 128141104Sharti#define WPRINTF(params) printf params 129141104Sharti 1301590Srgrimes/* 131141104Sharti * Per-device softc 132141104Sharti */ 133141104Shartistruct pci_vtnet_softc { 134141104Sharti struct pci_devinst *vsc_pi; 1351590Srgrimes pthread_mutex_t vsc_mtx; 136137202Sharti struct mevent *vsc_mevp; 137137202Sharti 1381590Srgrimes int vsc_curq; 1398874Srgrimes int vsc_status; 1401590Srgrimes int vsc_isr; 1411590Srgrimes int vsc_tapfd; 1421590Srgrimes int vsc_rx_ready; 143103503Sjmallett int vsc_rxpend; 144103503Sjmallett int tx_in_progress; 145103503Sjmallett int resetting; 1461590Srgrimes 14718730Ssteve uint32_t vsc_features; 14818730Ssteve uint8_t vsc_macaddr[6]; 14918730Ssteve 15018730Ssteve uint64_t vsc_pfn[VTNET_MAXQ]; 151138232Sharti struct vring_hqueue vsc_hq[VTNET_MAXQ]; 1521590Srgrimes uint16_t vsc_msix_table_idx[VTNET_MAXQ]; 1531590Srgrimes pthread_t tx_tid; 1541590Srgrimes pthread_mutex_t tx_mtx; 1551590Srgrimes pthread_cond_t tx_cond; 1561590Srgrimes}; 1571590Srgrimes#define vtnet_ctx(sc) ((sc)->vsc_pi->pi_vmctx) 1581590Srgrimes 1591590Srgrimes/* 1601590Srgrimes * Return the size of IO BAR that maps virtio header and device specific 1611590Srgrimes * region. The size would vary depending on whether MSI-X is enabled or 1621590Srgrimes * not. 1631590Srgrimes */ 1641590Srgrimesstatic uint64_t 1651590Srgrimespci_vtnet_iosize(struct pci_devinst *pi) 1661590Srgrimes{ 167103503Sjmallett if (pci_msix_enabled(pi)) 168103503Sjmallett return (VTNET_REGSZ); 169103503Sjmallett else 170103503Sjmallett return (VTNET_REGSZ - (VTCFG_R_CFG1 - VTCFG_R_MSIX)); 1711590Srgrimes} 1721590Srgrimes 17368898Skris/* 174138232Sharti * Return the number of available descriptors in the vring taking care 17568898Skris * of the 16-bit index wraparound. 17668898Skris */ 17768898Skrisstatic int 17868898Skrishq_num_avail(struct vring_hqueue *hq) 1791590Srgrimes{ 1801590Srgrimes uint16_t ndesc; 181138228Sharti 1821590Srgrimes /* 1831590Srgrimes * We're just computing (a-b) mod 2^16 1841590Srgrimes * 1851590Srgrimes * The only glitch here is that in standard C, 1861590Srgrimes * uint16_t promotes to (signed) int when int has 1871590Srgrimes * more than 16 bits (pretty much always now), so 1881590Srgrimes * we have to force it back to unsigned. 189136841Sru */ 1901590Srgrimes ndesc = (unsigned)*hq->hq_avail_idx - (unsigned)hq->hq_cur_aidx; 1911590Srgrimes 1921590Srgrimes assert(ndesc <= hq->hq_size); 1931590Srgrimes 1941590Srgrimes return (ndesc); 1951590Srgrimes} 1961590Srgrimes 1971590Srgrimesstatic uint16_t 1981590Srgrimespci_vtnet_qsize(int qnum) 1991590Srgrimes{ 20018730Ssteve /* XXX no ctl queue currently */ 20118730Ssteve if (qnum == VTNET_CTLQ) { 2021590Srgrimes return (0); 20318730Ssteve } 2041590Srgrimes 2051590Srgrimes /* XXX fixed currently. Maybe different for tx/rx/ctl */ 2061590Srgrimes return (VTNET_RINGSZ); 20764739Sgreen} 20864739Sgreen 20964739Sgreenstatic void 21064739Sgreenpci_vtnet_ring_reset(struct pci_vtnet_softc *sc, int ring) 21164739Sgreen{ 21264739Sgreen struct vring_hqueue *hq; 21364739Sgreen 21464739Sgreen assert(ring < VTNET_MAXQ); 21564739Sgreen 2161590Srgrimes hq = &sc->vsc_hq[ring]; 217138079Sharti 218138079Sharti /* 219138079Sharti * Reset all soft state 220138079Sharti */ 221138079Sharti hq->hq_cur_aidx = 0; 2221590Srgrimes} 2231590Srgrimes 224139062Shartistatic void 22518730Sstevepci_vtnet_update_status(struct pci_vtnet_softc *sc, uint32_t value) 226138916Sharti{ 227138916Sharti 228138916Sharti if (value == 0) { 229138916Sharti DPRINTF(("vtnet: device reset requested !\n")); 23018730Ssteve 2311590Srgrimes /* Wait for TX thread to complete pending desc processing */ 232137572Sphk sc->resetting = 1; 233104475Sphk pthread_mutex_lock(&sc->tx_mtx); 234104475Sphk 235104475Sphk while (sc->tx_in_progress) { 2361590Srgrimes pthread_mutex_unlock(&sc->tx_mtx); 2371590Srgrimes usleep(10000); 2381590Srgrimes pthread_mutex_lock(&sc->tx_mtx); 2391590Srgrimes } 24018730Ssteve 2411590Srgrimes pthread_mutex_unlock(&sc->tx_mtx); 242141252Sharti 2431590Srgrimes pci_vtnet_ring_reset(sc, VTNET_RXQ); 2441590Srgrimes pci_vtnet_ring_reset(sc, VTNET_TXQ); 2451590Srgrimes sc->vsc_rx_ready = 0; 246137202Sharti sc->resetting = 0; 247137202Sharti } 248138232Sharti 24918730Ssteve sc->vsc_status = value; 2501590Srgrimes} 251137252Sharti 252137252Sharti/* 253137252Sharti * Called to send a buffer chain out to the tap device 2548874Srgrimes */ 255138916Shartistatic void 256138916Shartipci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, 257138916Sharti int len) 2581590Srgrimes{ 259138916Sharti char pad[60]; 2601590Srgrimes 261137606Sphk if (sc->vsc_tapfd == -1) 262137606Sphk return; 263137606Sphk 2641590Srgrimes /* 265137605Sharti * If the length is < 60, pad out to that and add the 266137605Sharti * extra zero'd segment to the iov. It is guaranteed that 267137605Sharti * there is always an extra iov available by the caller. 2681590Srgrimes */ 26918730Ssteve if (len < 60) { 2701590Srgrimes memset(pad, 0, 60 - len); 2711590Srgrimes iov[iovcnt].iov_base = pad; 27218730Ssteve iov[iovcnt].iov_len = 60 - len; 2731590Srgrimes iovcnt++; 27418730Ssteve } 2751590Srgrimes (void) writev(sc->vsc_tapfd, iov, iovcnt); 2761590Srgrimes} 2771590Srgrimes 27818730Ssteve/* 27918730Ssteve * Called when there is read activity on the tap file descriptor. 28018730Ssteve * Each buffer posted by the guest is assumed to be able to contain 28118730Ssteve * an entire ethernet frame + rx header. 28218730Ssteve * MP note: the dummybuf is only used for discarding frames, so there 28318730Ssteve * is no need for it to be per-vtnet or locked. 284103503Sjmallett */ 28518730Sstevestatic uint8_t dummybuf[2048]; 286138232Sharti 28718730Sstevestatic void 28818730Sstevepci_vtnet_tap_rx(struct pci_vtnet_softc *sc) 28918730Ssteve{ 29018730Ssteve struct virtio_desc *vd; 29118730Ssteve struct virtio_used *vu; 29218730Ssteve struct vring_hqueue *hq; 29318730Ssteve struct virtio_net_rxhdr *vrx; 294103503Sjmallett uint8_t *buf; 295103503Sjmallett int i; 29618730Ssteve int len; 29718730Ssteve int ndescs; 29892921Simp int didx, uidx, aidx; /* descriptor, avail and used index */ 29992921Simp 30092921Simp /* 30192921Simp * Should never be called without a valid tap fd 30292921Simp */ 30392921Simp assert(sc->vsc_tapfd != -1); 30492921Simp 30592921Simp /* 30692921Simp * But, will be called when the rx ring hasn't yet 30792921Simp * been set up. 30892921Simp */ 30992921Simp if (sc->vsc_rx_ready == 0) { 310138228Sharti /* 31192921Simp * Drop the packet and try later. 31292921Simp */ 3131590Srgrimes (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); 314137605Sharti return; 315137605Sharti } 316137605Sharti 317137605Sharti /* 318137605Sharti * Calculate the number of available rx buffers 319137605Sharti */ 320137605Sharti hq = &sc->vsc_hq[VTNET_RXQ]; 321137605Sharti 322137605Sharti ndescs = hq_num_avail(hq); 323137605Sharti 324137605Sharti if (ndescs == 0) { 325137605Sharti /* 326137605Sharti * Need to wait for host notification to read 3271590Srgrimes */ 3281590Srgrimes if (sc->vsc_rxpend == 0) { 3291590Srgrimes WPRINTF(("vtnet: no rx descriptors !\n")); 330137252Sharti sc->vsc_rxpend = 1; 3311590Srgrimes } 3321590Srgrimes 3331590Srgrimes /* 3341590Srgrimes * Drop the packet and try later 3351590Srgrimes */ 3361590Srgrimes (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); 3371590Srgrimes return; 3381590Srgrimes } 3391590Srgrimes 3401590Srgrimes aidx = hq->hq_cur_aidx; 341104696Sjmallett uidx = *hq->hq_used_idx; 3421590Srgrimes for (i = 0; i < ndescs; i++) { 343138264Sharti /* 344138232Sharti * 'aidx' indexes into the an array of descriptor indexes 345137202Sharti */ 346137252Sharti didx = hq->hq_avail_ring[aidx % hq->hq_size]; 347137252Sharti assert(didx >= 0 && didx < hq->hq_size); 3481590Srgrimes 349138232Sharti vd = &hq->hq_dtable[didx]; 3501590Srgrimes 3511590Srgrimes /* 3521590Srgrimes * Get a pointer to the rx header, and use the 3531590Srgrimes * data immediately following it for the packet buffer. 3541590Srgrimes */ 355137252Sharti vrx = paddr_guest2host(vtnet_ctx(sc), vd->vd_addr, vd->vd_len); 3561590Srgrimes buf = (uint8_t *)(vrx + 1); 3571590Srgrimes 3581590Srgrimes len = read(sc->vsc_tapfd, buf, 3591590Srgrimes vd->vd_len - sizeof(struct virtio_net_rxhdr)); 3601590Srgrimes 3611590Srgrimes if (len < 0 && errno == EWOULDBLOCK) { 3621590Srgrimes break; 3638874Srgrimes } 3641590Srgrimes 3651590Srgrimes /* 3661590Srgrimes * The only valid field in the rx packet header is the 367104696Sjmallett * number of buffers, which is always 1 without TSO 3681590Srgrimes * support. 36918730Ssteve */ 37018730Ssteve memset(vrx, 0, sizeof(struct virtio_net_rxhdr)); 3718874Srgrimes vrx->vrh_bufs = 1; 372137605Sharti 373137605Sharti /* 374137605Sharti * Write this descriptor into the used ring 375137605Sharti */ 376103545Sjmallett vu = &hq->hq_used_ring[uidx % hq->hq_size]; 377138916Sharti vu->vu_idx = didx; 3781590Srgrimes vu->vu_tlen = len + sizeof(struct virtio_net_rxhdr); 3791590Srgrimes uidx++; 3801590Srgrimes aidx++; 3811590Srgrimes } 3821590Srgrimes 3831590Srgrimes /* 3841590Srgrimes * Update the used pointer, and signal an interrupt if allowed 38518730Ssteve */ 3861590Srgrimes *hq->hq_used_idx = uidx; 38718730Ssteve hq->hq_cur_aidx = aidx; 3881590Srgrimes 3898874Srgrimes if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) { 3901590Srgrimes if (use_msix) { 3911590Srgrimes pci_generate_msix(sc->vsc_pi, 3921590Srgrimes sc->vsc_msix_table_idx[VTNET_RXQ]); 3931590Srgrimes } else { 39438520Scracauer sc->vsc_isr |= 1; 3951590Srgrimes pci_generate_msi(sc->vsc_pi, 0); 3968874Srgrimes } 3971590Srgrimes } 3981590Srgrimes} 3991590Srgrimes 4001590Srgrimesstatic void 4011590Srgrimespci_vtnet_tap_callback(int fd, enum ev_type type, void *param) 402137605Sharti{ 4031590Srgrimes struct pci_vtnet_softc *sc = param; 40418730Ssteve 40518730Ssteve pthread_mutex_lock(&sc->vsc_mtx); 40618730Ssteve pci_vtnet_tap_rx(sc); 40718730Ssteve pthread_mutex_unlock(&sc->vsc_mtx); 4081590Srgrimes 409138232Sharti} 410138232Sharti 411138232Shartistatic void 4121590Srgrimespci_vtnet_ping_rxq(struct pci_vtnet_softc *sc) 413138232Sharti{ 41418730Ssteve /* 4155814Sjkh * A qnotify means that the rx process can now begin 416138916Sharti */ 4171590Srgrimes if (sc->vsc_rx_ready == 0) { 41818730Ssteve sc->vsc_rx_ready = 1; 419138232Sharti } 42018730Ssteve 42118730Ssteve /* 4221590Srgrimes * If the rx queue was empty, attempt to receive a 4231590Srgrimes * packet that was previously blocked due to no rx bufs 4241590Srgrimes * available 4251590Srgrimes */ 4261590Srgrimes if (sc->vsc_rxpend) { 4271590Srgrimes WPRINTF(("vtnet: rx resumed\n\r")); 4281590Srgrimes sc->vsc_rxpend = 0; 4291590Srgrimes pci_vtnet_tap_rx(sc); 4301590Srgrimes } 4311590Srgrimes} 4321590Srgrimes 4331590Srgrimesstatic void 4341590Srgrimespci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vring_hqueue *hq) 4351590Srgrimes{ 4361590Srgrimes struct iovec iov[VTNET_MAXSEGS + 1]; 4371590Srgrimes struct virtio_desc *vd; 4381590Srgrimes struct virtio_used *vu; 439138561Sharti int i; 4401590Srgrimes int plen; 441138264Sharti int tlen; 442138561Sharti int uidx, aidx, didx; 4431590Srgrimes 4441590Srgrimes uidx = *hq->hq_used_idx; 4451590Srgrimes aidx = hq->hq_cur_aidx; 4461590Srgrimes didx = hq->hq_avail_ring[aidx % hq->hq_size]; 4471590Srgrimes assert(didx >= 0 && didx < hq->hq_size); 4481590Srgrimes 4491590Srgrimes vd = &hq->hq_dtable[didx]; 4501590Srgrimes 4511590Srgrimes /* 4521590Srgrimes * Run through the chain of descriptors, ignoring the 4531590Srgrimes * first header descriptor. However, include the header 4541590Srgrimes * length in the total length that will be put into the 4551590Srgrimes * used queue. 4561590Srgrimes */ 4571590Srgrimes tlen = vd->vd_len; 45894594Sobrien vd = &hq->hq_dtable[vd->vd_next]; 4591590Srgrimes 4601590Srgrimes for (i = 0, plen = 0; 4611590Srgrimes i < VTNET_MAXSEGS; 4621590Srgrimes i++, vd = &hq->hq_dtable[vd->vd_next]) { 4631590Srgrimes iov[i].iov_base = paddr_guest2host(vtnet_ctx(sc), 4641590Srgrimes vd->vd_addr, vd->vd_len); 4651590Srgrimes iov[i].iov_len = vd->vd_len; 4661590Srgrimes plen += vd->vd_len; 4671590Srgrimes tlen += vd->vd_len; 4681590Srgrimes 4691590Srgrimes if ((vd->vd_flags & VRING_DESC_F_NEXT) == 0) 4701590Srgrimes break; 4711590Srgrimes } 4721590Srgrimes assert(i < VTNET_MAXSEGS); 473104696Sjmallett 4741590Srgrimes DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, i + 1)); 475141258Sharti pci_vtnet_tap_tx(sc, iov, i + 1, plen); 476141258Sharti 477141258Sharti /* 478141258Sharti * Return this chain back to the host 479141258Sharti */ 480141258Sharti vu = &hq->hq_used_ring[uidx % hq->hq_size]; 481141258Sharti vu->vu_idx = didx; 482141258Sharti vu->vu_tlen = tlen; 483141258Sharti hq->hq_cur_aidx = aidx + 1; 484141258Sharti *hq->hq_used_idx = uidx + 1; 485141258Sharti 486141258Sharti} 487141258Sharti 488141258Shartistatic void 489142457Shartipci_vtnet_ping_txq(struct pci_vtnet_softc *sc) 4901590Srgrimes{ 49118730Ssteve struct vring_hqueue *hq = &sc->vsc_hq[VTNET_TXQ]; 4921590Srgrimes int ndescs; 49318730Ssteve 4948874Srgrimes /* 4951590Srgrimes * Calculate number of ring entries to process 496138916Sharti */ 497138232Sharti ndescs = hq_num_avail(hq); 4981590Srgrimes 499138232Sharti if (ndescs == 0) 5001590Srgrimes return; 5011590Srgrimes 502103508Sjmallett /* Signal the tx thread for processing */ 503103545Sjmallett pthread_mutex_lock(&sc->tx_mtx); 504138232Sharti if (sc->tx_in_progress == 0) 505138232Sharti pthread_cond_signal(&sc->tx_cond); 5061590Srgrimes pthread_mutex_unlock(&sc->tx_mtx); 5071590Srgrimes} 5081590Srgrimes 5091590Srgrimes/* 5101590Srgrimes * Thread which will handle processing of TX desc 5111590Srgrimes */ 5121590Srgrimesstatic void * 513138916Shartipci_vtnet_tx_thread(void *param) 514142457Sharti{ 515142457Sharti struct pci_vtnet_softc *sc = (struct pci_vtnet_softc *) param; 516142457Sharti struct vring_hqueue *hq; 517142457Sharti int i, ndescs, needintr,error; 518142457Sharti 519142457Sharti needintr = 0; 520138264Sharti hq = &sc->vsc_hq[VTNET_TXQ]; 5211590Srgrimes 5221590Srgrimes /* 5231590Srgrimes * Let us wait till the tx queue pointers get initialised & 5241590Srgrimes * first tx signaled 525132839Sharti */ 526132839Sharti pthread_mutex_lock(&sc->tx_mtx); 5271590Srgrimes error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 528132839Sharti assert(error == 0); 529132839Sharti 530132839Sharti for (;;) { 531132839Sharti pthread_mutex_lock(&sc->tx_mtx); 53260569Swill for (;;) { 533132839Sharti if (sc->resetting) { 534132839Sharti ndescs = 0; 535132839Sharti needintr = 0; 5361590Srgrimes } else 537132839Sharti ndescs = hq_num_avail(hq); 538132839Sharti 539132839Sharti if (ndescs != 0) 540132839Sharti break; 541132839Sharti 542132839Sharti if (needintr) { 543132839Sharti /* 544132839Sharti * Generate an interrupt if able 545132839Sharti */ 546132839Sharti if ((*hq->hq_avail_flags & 547132839Sharti VRING_AVAIL_F_NO_INTERRUPT) == 0) { 548132839Sharti if (use_msix) { 5491590Srgrimes pci_generate_msix(sc->vsc_pi, 5501590Srgrimes sc->vsc_msix_table_idx[VTNET_TXQ]); 5511590Srgrimes } else { 5521590Srgrimes sc->vsc_isr |= 1; 553138232Sharti pci_generate_msi(sc->vsc_pi, 0); 5541590Srgrimes } 5551590Srgrimes } 5561590Srgrimes } 55718730Ssteve needintr = 0; 5581590Srgrimes sc->tx_in_progress = 0; 55918730Ssteve error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 5601590Srgrimes assert(error == 0); 5611590Srgrimes } 5621590Srgrimes sc->tx_in_progress = 1; 5631590Srgrimes pthread_mutex_unlock(&sc->tx_mtx); 5641590Srgrimes 5651590Srgrimes while (ndescs > 0) { 56618730Ssteve /* 5671590Srgrimes * Run through all the entries, placing them into 5681590Srgrimes * iovecs and sending when an end-of-packet is found 5691590Srgrimes */ 5701590Srgrimes for (i = 0; i < ndescs; i++) 5711590Srgrimes pci_vtnet_proctx(sc, hq); 5721590Srgrimes needintr = 1; 5731590Srgrimes ndescs = hq_num_avail(hq); 5741590Srgrimes } 5751590Srgrimes } 57618730Ssteve} 5771590Srgrimes 57818730Sstevestatic void 57918730Sstevepci_vtnet_ping_ctlq(struct pci_vtnet_softc *sc) 58018730Ssteve{ 5811590Srgrimes 58218730Ssteve DPRINTF(("vtnet: control qnotify!\n\r")); 5831590Srgrimes} 5841590Srgrimes 58518730Sstevestatic void 5861590Srgrimespci_vtnet_ring_init(struct pci_vtnet_softc *sc, uint64_t pfn) 5871590Srgrimes{ 5881590Srgrimes struct vring_hqueue *hq; 5891590Srgrimes int qnum = sc->vsc_curq; 5901590Srgrimes 5911590Srgrimes assert(qnum < VTNET_MAXQ); 5921590Srgrimes 5931590Srgrimes sc->vsc_pfn[qnum] = pfn << VRING_PFN; 5941590Srgrimes 5951590Srgrimes /* 59618730Ssteve * Set up host pointers to the various parts of the 5971590Srgrimes * queue 59818730Ssteve */ 59918730Ssteve hq = &sc->vsc_hq[qnum]; 6001590Srgrimes hq->hq_size = pci_vtnet_qsize(qnum); 6011590Srgrimes 6021590Srgrimes hq->hq_dtable = paddr_guest2host(vtnet_ctx(sc), pfn << VRING_PFN, 6031590Srgrimes vring_size(hq->hq_size)); 6041590Srgrimes hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size); 6051590Srgrimes hq->hq_avail_idx = hq->hq_avail_flags + 1; 6061590Srgrimes hq->hq_avail_ring = hq->hq_avail_flags + 2; 6071590Srgrimes hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring, 6081590Srgrimes VRING_ALIGN); 6091590Srgrimes hq->hq_used_idx = hq->hq_used_flags + 1; 6101590Srgrimes hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2); 6111590Srgrimes 6121590Srgrimes /* 6131590Srgrimes * Initialize queue indexes 6141590Srgrimes */ 6151590Srgrimes hq->hq_cur_aidx = 0; 6168874Srgrimes} 61718730Ssteve 6188874Srgrimesstatic int 6191590Srgrimespci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 6201590Srgrimes{ 6211590Srgrimes MD5_CTX mdctx; 6221590Srgrimes unsigned char digest[16]; 6231590Srgrimes char nstr[80]; 6241590Srgrimes char tname[MAXCOMLEN + 1]; 625138264Sharti struct pci_vtnet_softc *sc; 62618730Ssteve const char *env_msi; 6271590Srgrimes 6281590Srgrimes sc = malloc(sizeof(struct pci_vtnet_softc)); 62918730Ssteve memset(sc, 0, sizeof(struct pci_vtnet_softc)); 6301590Srgrimes 6311590Srgrimes pi->pi_arg = sc; 63218730Ssteve sc->vsc_pi = pi; 6331590Srgrimes 634138232Sharti pthread_mutex_init(&sc->vsc_mtx, NULL); 6351590Srgrimes 6361590Srgrimes /* 6371590Srgrimes * Use MSI if set by user 6381590Srgrimes */ 6391590Srgrimes if ((env_msi = getenv("BHYVE_USE_MSI")) != NULL) { 6401590Srgrimes if (strcasecmp(env_msi, "yes") == 0) 6411590Srgrimes use_msix = 0; 6421590Srgrimes } 6431590Srgrimes 6441590Srgrimes /* 6451590Srgrimes * Attempt to open the tap device 6461590Srgrimes */ 6471590Srgrimes sc->vsc_tapfd = -1; 6481590Srgrimes if (opts != NULL) { 6491590Srgrimes char tbuf[80]; 6501590Srgrimes 6511590Srgrimes strcpy(tbuf, "/dev/"); 652104696Sjmallett strlcat(tbuf, opts, sizeof(tbuf)); 6531590Srgrimes 654142457Sharti sc->vsc_tapfd = open(tbuf, O_RDWR); 655142457Sharti if (sc->vsc_tapfd == -1) { 656138232Sharti WPRINTF(("open of tap device %s failed\n", tbuf)); 657142457Sharti } else { 658142457Sharti /* 659142457Sharti * Set non-blocking and register for read 660142457Sharti * notifications with the event loop 661142457Sharti */ 662138232Sharti int opt = 1; 6631590Srgrimes if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) { 6641590Srgrimes WPRINTF(("tap device O_NONBLOCK failed\n")); 66518730Ssteve close(sc->vsc_tapfd); 6661590Srgrimes sc->vsc_tapfd = -1; 6671590Srgrimes } 66818730Ssteve 66918730Ssteve sc->vsc_mevp = mevent_add(sc->vsc_tapfd, 67018730Ssteve EVF_READ, 67118730Ssteve pci_vtnet_tap_callback, 67218730Ssteve sc); 67318730Ssteve if (sc->vsc_mevp == NULL) { 67418730Ssteve WPRINTF(("Could not register event\n")); 67518730Ssteve close(sc->vsc_tapfd); 67618730Ssteve sc->vsc_tapfd = -1; 67718730Ssteve } 67818730Ssteve } 67918730Ssteve } 680104696Sjmallett 68118730Ssteve /* 682138232Sharti * The MAC address is the standard NetApp OUI of 00-a0-98, 68318730Ssteve * followed by an MD5 of the vm name. The slot/func number is 684137202Sharti * prepended to this for slots other than 1:0, so that 68518730Ssteve * a bootloader can netboot from the equivalent of slot 1. 68618730Ssteve */ 68718730Ssteve if (pi->pi_slot == 1 && pi->pi_func == 0) { 688138232Sharti strncpy(nstr, vmname, sizeof(nstr)); 68918730Ssteve } else { 69018730Ssteve snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot, 691138232Sharti pi->pi_func, vmname); 69218730Ssteve } 693138232Sharti 69418730Ssteve MD5Init(&mdctx); 69518730Ssteve MD5Update(&mdctx, nstr, strlen(nstr)); 69618730Ssteve MD5Final(digest, &mdctx); 69718730Ssteve 69818730Ssteve sc->vsc_macaddr[0] = 0x00; 69918730Ssteve sc->vsc_macaddr[1] = 0xa0; 7001590Srgrimes sc->vsc_macaddr[2] = 0x98; 7011590Srgrimes sc->vsc_macaddr[3] = digest[0]; 7021590Srgrimes sc->vsc_macaddr[4] = digest[1]; 7031590Srgrimes sc->vsc_macaddr[5] = digest[2]; 7041590Srgrimes 7051590Srgrimes /* initialize config space */ 7061590Srgrimes pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); 7071590Srgrimes pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 7081590Srgrimes pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); 7091590Srgrimes pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); 7101590Srgrimes 7111590Srgrimes if (use_msix) { 7121590Srgrimes /* MSI-X support */ 7131590Srgrimes int i; 7141590Srgrimes 7151590Srgrimes for (i = 0; i < VTNET_MAXQ; i++) 7161590Srgrimes sc->vsc_msix_table_idx[i] = VIRTIO_MSI_NO_VECTOR; 7171590Srgrimes 7181590Srgrimes /* 7191590Srgrimes * BAR 1 used to map MSI-X table and PBA 7201590Srgrimes */ 7211590Srgrimes if (pci_emul_add_msixcap(pi, VTNET_MAXQ, 1)) 7221590Srgrimes return (1); 723104696Sjmallett } else { 7241590Srgrimes /* MSI support */ 72518730Ssteve pci_emul_add_msicap(pi, 1); 7261590Srgrimes } 72718730Ssteve 72818730Ssteve pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTNET_REGSZ); 72918730Ssteve 7301590Srgrimes /* 7311590Srgrimes * Initialize tx semaphore & spawn TX processing thread 7321590Srgrimes * As of now, only one thread for TX desc processing is 7331590Srgrimes * spawned. 7341590Srgrimes */ 7351590Srgrimes sc->tx_in_progress = 0; 7361590Srgrimes sc->resetting = 0; 7371590Srgrimes pthread_mutex_init(&sc->tx_mtx, NULL); 7381590Srgrimes pthread_cond_init(&sc->tx_cond, NULL); 73918730Ssteve pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc); 7401590Srgrimes snprintf(tname, sizeof(tname), "%s vtnet%d tx", vmname, pi->pi_slot); 741138232Sharti pthread_set_name_np(sc->tx_tid, tname); 7421590Srgrimes 7431590Srgrimes 74418730Ssteve return (0); 7451590Srgrimes} 7461590Srgrimes 7471590Srgrimes/* 7481590Srgrimes * Function pointer array to handle queue notifications 74918730Ssteve */ 75018730Sstevestatic void (*pci_vtnet_qnotify[VTNET_MAXQ])(struct pci_vtnet_softc *) = { 75118730Ssteve pci_vtnet_ping_rxq, 75218730Ssteve pci_vtnet_ping_txq, 75318730Ssteve pci_vtnet_ping_ctlq 7541590Srgrimes}; 75518730Ssteve 75618730Sstevestatic uint64_t 75718730Sstevevtnet_adjust_offset(struct pci_devinst *pi, uint64_t offset) 7581590Srgrimes{ 75918730Ssteve /* 7601590Srgrimes * Device specific offsets used by guest would change based on 7611590Srgrimes * whether MSI-X capability is enabled or not 7621590Srgrimes */ 7631590Srgrimes if (!pci_msix_enabled(pi)) { 7641590Srgrimes if (offset >= VTCFG_R_MSIX) 7651590Srgrimes return (offset + (VTCFG_R_CFG1 - VTCFG_R_MSIX)); 7668874Srgrimes } 7671590Srgrimes 76818730Ssteve return (offset); 76918730Ssteve} 7701590Srgrimes 7711590Srgrimesstatic void 7721590Srgrimespci_vtnet_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 7738874Srgrimes int baridx, uint64_t offset, int size, uint64_t value) 77418730Ssteve{ 7751590Srgrimes struct pci_vtnet_softc *sc = pi->pi_arg; 7761590Srgrimes void *ptr; 7771590Srgrimes 7781590Srgrimes if (use_msix) { 7791590Srgrimes if (baridx == pci_msix_table_bar(pi) || 78018730Ssteve baridx == pci_msix_pba_bar(pi)) { 78194582Sobrien pci_emul_msix_twrite(pi, offset, size, value); 78294582Sobrien return; 7831590Srgrimes } 7841590Srgrimes } 7851590Srgrimes 7861590Srgrimes assert(baridx == 0); 78718730Ssteve 788103545Sjmallett if (offset + size > pci_vtnet_iosize(pi)) { 78918730Ssteve DPRINTF(("vtnet_write: 2big, offset %ld size %d\n", 7901590Srgrimes offset, size)); 79118730Ssteve return; 7921590Srgrimes } 7931590Srgrimes 794138232Sharti pthread_mutex_lock(&sc->vsc_mtx); 79518730Ssteve 79618730Ssteve offset = vtnet_adjust_offset(pi, offset); 7971590Srgrimes 7981590Srgrimes switch (offset) { 79918730Ssteve case VTCFG_R_GUESTCAP: 8001590Srgrimes assert(size == 4); 8011590Srgrimes sc->vsc_features = value & VTNET_S_HOSTCAPS; 8021590Srgrimes break; 80318730Ssteve case VTCFG_R_PFN: 8041590Srgrimes assert(size == 4); 8051590Srgrimes pci_vtnet_ring_init(sc, value); 806138232Sharti break; 8071590Srgrimes case VTCFG_R_QSEL: 80818730Ssteve assert(size == 2); 809103545Sjmallett assert(value < VTNET_MAXQ); 8101590Srgrimes sc->vsc_curq = value; 81118730Ssteve break; 8121590Srgrimes case VTCFG_R_QNOTIFY: 8131590Srgrimes assert(size == 2); 814138232Sharti assert(value < VTNET_MAXQ); 815137252Sharti (*pci_vtnet_qnotify[value])(sc); 8161590Srgrimes break; 817138916Sharti case VTCFG_R_STATUS: 818138232Sharti assert(size == 1); 8191590Srgrimes pci_vtnet_update_status(sc, value); 82018730Ssteve break; 8211590Srgrimes case VTCFG_R_CFGVEC: 8221590Srgrimes assert(size == 2); 8231590Srgrimes sc->vsc_msix_table_idx[VTNET_CTLQ] = value; 8241590Srgrimes break; 8251590Srgrimes case VTCFG_R_QVEC: 826137252Sharti assert(size == 2); 8271590Srgrimes assert(sc->vsc_curq != VTNET_CTLQ); 82818730Ssteve sc->vsc_msix_table_idx[sc->vsc_curq] = value; 8291590Srgrimes break; 8301590Srgrimes case VTNET_R_CFG0: 831138232Sharti case VTNET_R_CFG1: 8321590Srgrimes case VTNET_R_CFG2: 83318730Ssteve case VTNET_R_CFG3: 834103545Sjmallett case VTNET_R_CFG4: 83518730Ssteve case VTNET_R_CFG5: 83618730Ssteve assert((size + offset) <= (VTNET_R_CFG5 + 1)); 83718730Ssteve ptr = &sc->vsc_macaddr[offset - VTNET_R_CFG0]; 83818730Ssteve /* 83918730Ssteve * The driver is allowed to change the MAC address 84018730Ssteve */ 84118730Ssteve sc->vsc_macaddr[offset - VTNET_R_CFG0] = value; 8421590Srgrimes if (size == 1) { 84318730Ssteve *(uint8_t *) ptr = value; 8441590Srgrimes } else if (size == 2) { 84518730Ssteve *(uint16_t *) ptr = value; 846138916Sharti } else { 84718730Ssteve *(uint32_t *) ptr = value; 848137252Sharti } 84918730Ssteve break; 85018730Ssteve case VTCFG_R_HOSTCAP: 851103545Sjmallett case VTCFG_R_QNUM: 85218730Ssteve case VTCFG_R_ISR: 853138232Sharti case VTNET_R_CFG6: 85418730Ssteve case VTNET_R_CFG7: 8551590Srgrimes DPRINTF(("vtnet: write to readonly reg %ld\n\r", offset)); 8561590Srgrimes break; 85718730Ssteve default: 8581590Srgrimes DPRINTF(("vtnet: unknown i/o write offset %ld\n\r", offset)); 8591590Srgrimes value = 0; 860138232Sharti break; 8611590Srgrimes } 8621590Srgrimes 863138232Sharti pthread_mutex_unlock(&sc->vsc_mtx); 8641590Srgrimes} 8651590Srgrimes 8661590Srgrimesuint64_t 8671590Srgrimespci_vtnet_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 8681590Srgrimes int baridx, uint64_t offset, int size) 8691590Srgrimes{ 8701590Srgrimes struct pci_vtnet_softc *sc = pi->pi_arg; 871138564Sharti void *ptr; 872138564Sharti uint64_t value; 87318730Ssteve 87418730Ssteve if (use_msix) { 87518730Ssteve if (baridx == pci_msix_table_bar(pi) || 87618730Ssteve baridx == pci_msix_pba_bar(pi)) { 87718730Ssteve return (pci_emul_msix_tread(pi, offset, size)); 87818730Ssteve } 87918730Ssteve } 88018730Ssteve 88118730Ssteve assert(baridx == 0); 88218730Ssteve 88318730Ssteve if (offset + size > pci_vtnet_iosize(pi)) { 88418730Ssteve DPRINTF(("vtnet_read: 2big, offset %ld size %d\n", 88518730Ssteve offset, size)); 88618730Ssteve return (0); 88718730Ssteve } 88818730Ssteve 88918730Ssteve pthread_mutex_lock(&sc->vsc_mtx); 89018730Ssteve 891104108Sjmallett offset = vtnet_adjust_offset(pi, offset); 892104108Sjmallett 8931590Srgrimes switch (offset) { 8941590Srgrimes case VTCFG_R_HOSTCAP: 8951590Srgrimes assert(size == 4); 8961590Srgrimes value = VTNET_S_HOSTCAPS; 8971590Srgrimes break; 8988874Srgrimes case VTCFG_R_GUESTCAP: 8991590Srgrimes assert(size == 4); 9001590Srgrimes value = sc->vsc_features; /* XXX never read ? */ 9011590Srgrimes break; 90218730Ssteve case VTCFG_R_PFN: 9031590Srgrimes assert(size == 4); 9041590Srgrimes value = sc->vsc_pfn[sc->vsc_curq] >> VRING_PFN; 9051590Srgrimes break; 9061590Srgrimes case VTCFG_R_QNUM: 9071590Srgrimes assert(size == 2); 9081590Srgrimes value = pci_vtnet_qsize(sc->vsc_curq); 9091590Srgrimes break; 91069527Swill case VTCFG_R_QSEL: 911138916Sharti assert(size == 2); 912138264Sharti value = sc->vsc_curq; /* XXX never read ? */ 9131590Srgrimes break; 9141590Srgrimes case VTCFG_R_QNOTIFY: 91518730Ssteve assert(size == 2); 91669531Swill value = sc->vsc_curq; /* XXX never read ? */ 91718730Ssteve break; 9181590Srgrimes case VTCFG_R_STATUS: 91969531Swill assert(size == 1); 9201590Srgrimes value = sc->vsc_status; 9211590Srgrimes break; 92218730Ssteve case VTCFG_R_ISR: 9231590Srgrimes assert(size == 1); 9241590Srgrimes value = sc->vsc_isr; 9251590Srgrimes sc->vsc_isr = 0; /* a read clears this flag */ 9261590Srgrimes break; 9271590Srgrimes case VTCFG_R_CFGVEC: 9281590Srgrimes assert(size == 2); 9291590Srgrimes value = sc->vsc_msix_table_idx[VTNET_CTLQ]; 9301590Srgrimes break; 9311590Srgrimes case VTCFG_R_QVEC: 9321590Srgrimes assert(size == 2); 9331590Srgrimes assert(sc->vsc_curq != VTNET_CTLQ); 9341590Srgrimes value = sc->vsc_msix_table_idx[sc->vsc_curq]; 9358874Srgrimes break; 93668898Skris case VTNET_R_CFG0: 9371590Srgrimes case VTNET_R_CFG1: 9381590Srgrimes case VTNET_R_CFG2: 9391590Srgrimes case VTNET_R_CFG3: 94018730Ssteve case VTNET_R_CFG4: 9411590Srgrimes case VTNET_R_CFG5: 9421590Srgrimes assert((size + offset) <= (VTNET_R_CFG5 + 1)); 9431590Srgrimes ptr = &sc->vsc_macaddr[offset - VTNET_R_CFG0]; 9441590Srgrimes if (size == 1) { 9451590Srgrimes value = *(uint8_t *) ptr; 9461590Srgrimes } else if (size == 2) { 947104696Sjmallett value = *(uint16_t *) ptr; 9481590Srgrimes } else { 9491590Srgrimes value = *(uint32_t *) ptr; 9501590Srgrimes } 9511590Srgrimes break; 9521590Srgrimes case VTNET_R_CFG6: 9531590Srgrimes assert(size != 4); 9541590Srgrimes value = 0x01; /* XXX link always up */ 9551590Srgrimes break; 9561590Srgrimes case VTNET_R_CFG7: 9571590Srgrimes assert(size == 1); 958104696Sjmallett value = 0; /* XXX link status in LSB */ 9591590Srgrimes break; 9601590Srgrimes default: 96118730Ssteve DPRINTF(("vtnet: unknown i/o read offset %ld\n\r", offset)); 9621590Srgrimes value = 0; 963138264Sharti break; 9641590Srgrimes } 9651590Srgrimes 9661590Srgrimes pthread_mutex_unlock(&sc->vsc_mtx); 9671590Srgrimes 9681590Srgrimes return (value); 9691590Srgrimes} 9708874Srgrimes 9711590Srgrimesstruct pci_devemu pci_de_vnet = { 972138232Sharti .pe_emu = "virtio-net", 973138232Sharti .pe_init = pci_vtnet_init, 9741590Srgrimes .pe_barwrite = pci_vtnet_write, 9751590Srgrimes .pe_barread = pci_vtnet_read 9761590Srgrimes}; 9771590SrgrimesPCI_EMUL_SET(pci_de_vnet); 9781590Srgrimes