1221828Sgrehan/*-
2221828Sgrehan * Copyright (c) 2011 NetApp, Inc.
3221828Sgrehan * All rights reserved.
4221828Sgrehan *
5221828Sgrehan * Redistribution and use in source and binary forms, with or without
6221828Sgrehan * modification, are permitted provided that the following conditions
7221828Sgrehan * are met:
8221828Sgrehan * 1. Redistributions of source code must retain the above copyright
9221828Sgrehan *    notice, this list of conditions and the following disclaimer.
10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
11221828Sgrehan *    notice, this list of conditions and the following disclaimer in the
12221828Sgrehan *    documentation and/or other materials provided with the distribution.
13221828Sgrehan *
14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17221828Sgrehan * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24221828Sgrehan * SUCH DAMAGE.
25221828Sgrehan *
26221828Sgrehan * $FreeBSD$
27221828Sgrehan */
28221828Sgrehan
29221828Sgrehan#include <sys/cdefs.h>
30221828Sgrehan__FBSDID("$FreeBSD$");
31221828Sgrehan
32221828Sgrehan#include <sys/param.h>
33221828Sgrehan#include <sys/linker_set.h>
34221828Sgrehan#include <sys/select.h>
35221828Sgrehan#include <sys/uio.h>
36221828Sgrehan#include <sys/ioctl.h>
37252682Sgrehan#include <net/ethernet.h>
38221828Sgrehan
39221828Sgrehan#include <errno.h>
40221828Sgrehan#include <fcntl.h>
41221828Sgrehan#include <stdio.h>
42221828Sgrehan#include <stdlib.h>
43221828Sgrehan#include <stdint.h>
44221828Sgrehan#include <string.h>
45221828Sgrehan#include <strings.h>
46221828Sgrehan#include <unistd.h>
47221828Sgrehan#include <assert.h>
48221828Sgrehan#include <md5.h>
49221828Sgrehan#include <pthread.h>
50249917Sgrehan#include <pthread_np.h>
51221828Sgrehan
52244167Sgrehan#include "bhyverun.h"
53221828Sgrehan#include "pci_emul.h"
54221828Sgrehan#include "mevent.h"
55221828Sgrehan#include "virtio.h"
56221828Sgrehan
57249917Sgrehan#define VTNET_RINGSZ	1024
58221828Sgrehan
59221828Sgrehan#define VTNET_MAXSEGS	32
60221828Sgrehan
61221828Sgrehan/*
62253440Sgrehan * Host capabilities.  Note that we only offer a few of these.
63221828Sgrehan */
64253440Sgrehan#define	VIRTIO_NET_F_CSUM	(1 <<  0) /* host handles partial cksum */
65253440Sgrehan#define	VIRTIO_NET_F_GUEST_CSUM	(1 <<  1) /* guest handles partial cksum */
66253440Sgrehan#define	VIRTIO_NET_F_MAC	(1 <<  5) /* host supplies MAC */
67253440Sgrehan#define	VIRTIO_NET_F_GSO_DEPREC	(1 <<  6) /* deprecated: host handles GSO */
68253440Sgrehan#define	VIRTIO_NET_F_GUEST_TSO4	(1 <<  7) /* guest can rcv TSOv4 */
69253440Sgrehan#define	VIRTIO_NET_F_GUEST_TSO6	(1 <<  8) /* guest can rcv TSOv6 */
70253440Sgrehan#define	VIRTIO_NET_F_GUEST_ECN	(1 <<  9) /* guest can rcv TSO with ECN */
71253440Sgrehan#define	VIRTIO_NET_F_GUEST_UFO	(1 << 10) /* guest can rcv UFO */
72253440Sgrehan#define	VIRTIO_NET_F_HOST_TSO4	(1 << 11) /* host can rcv TSOv4 */
73253440Sgrehan#define	VIRTIO_NET_F_HOST_TSO6	(1 << 12) /* host can rcv TSOv6 */
74253440Sgrehan#define	VIRTIO_NET_F_HOST_ECN	(1 << 13) /* host can rcv TSO with ECN */
75253440Sgrehan#define	VIRTIO_NET_F_HOST_UFO	(1 << 14) /* host can rcv UFO */
76253440Sgrehan#define	VIRTIO_NET_F_MRG_RXBUF	(1 << 15) /* host can merge RX buffers */
77253440Sgrehan#define	VIRTIO_NET_F_STATUS	(1 << 16) /* config status field available */
78253440Sgrehan#define	VIRTIO_NET_F_CTRL_VQ	(1 << 17) /* control channel available */
79253440Sgrehan#define	VIRTIO_NET_F_CTRL_RX	(1 << 18) /* control channel RX mode support */
80253440Sgrehan#define	VIRTIO_NET_F_CTRL_VLAN	(1 << 19) /* control channel VLAN filtering */
81253440Sgrehan#define	VIRTIO_NET_F_GUEST_ANNOUNCE \
82253440Sgrehan				(1 << 21) /* guest can send gratuitous pkts */
83221828Sgrehan
84253440Sgrehan#define VTNET_S_HOSTCAPS      \
85253440Sgrehan  ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \
86253440Sgrehan    VIRTIO_F_NOTIFY_ON_EMPTY)
87221828Sgrehan
88221828Sgrehan/*
89253440Sgrehan * PCI config-space "registers"
90221828Sgrehan */
91253440Sgrehanstruct virtio_net_config {
92253440Sgrehan	uint8_t  mac[6];
93253440Sgrehan	uint16_t status;
94253440Sgrehan} __packed;
95221828Sgrehan
96221828Sgrehan/*
97221828Sgrehan * Queue definitions.
98221828Sgrehan */
99221828Sgrehan#define VTNET_RXQ	0
100221828Sgrehan#define VTNET_TXQ	1
101253440Sgrehan#define VTNET_CTLQ	2	/* NB: not yet supported */
102221828Sgrehan
103221828Sgrehan#define VTNET_MAXQ	3
104221828Sgrehan
105221828Sgrehan/*
106221828Sgrehan * Fixed network header size
107221828Sgrehan */
108221828Sgrehanstruct virtio_net_rxhdr {
109221828Sgrehan	uint8_t		vrh_flags;
110221828Sgrehan	uint8_t		vrh_gso_type;
111221828Sgrehan	uint16_t	vrh_hdr_len;
112221828Sgrehan	uint16_t	vrh_gso_size;
113221828Sgrehan	uint16_t	vrh_csum_start;
114221828Sgrehan	uint16_t	vrh_csum_offset;
115221828Sgrehan	uint16_t	vrh_bufs;
116221828Sgrehan} __packed;
117221828Sgrehan
118221828Sgrehan/*
119221828Sgrehan * Debug printf
120221828Sgrehan */
121221828Sgrehanstatic int pci_vtnet_debug;
122221828Sgrehan#define DPRINTF(params) if (pci_vtnet_debug) printf params
123221828Sgrehan#define WPRINTF(params) printf params
124221828Sgrehan
125221828Sgrehan/*
126221828Sgrehan * Per-device softc
127221828Sgrehan */
128221828Sgrehanstruct pci_vtnet_softc {
129253440Sgrehan	struct virtio_softc vsc_vs;
130253440Sgrehan	struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
131221828Sgrehan	pthread_mutex_t vsc_mtx;
132221828Sgrehan	struct mevent	*vsc_mevp;
133221828Sgrehan
134221828Sgrehan	int		vsc_tapfd;
135221828Sgrehan	int		vsc_rx_ready;
136253440Sgrehan	volatile int	resetting;	/* set and checked outside lock */
137221828Sgrehan
138221828Sgrehan	uint32_t	vsc_features;
139253440Sgrehan	struct virtio_net_config vsc_config;
140221828Sgrehan
141250083Sneel	pthread_mutex_t	rx_mtx;
142250083Sneel	int		rx_in_progress;
143250083Sneel
144249917Sgrehan	pthread_t 	tx_tid;
145249917Sgrehan	pthread_mutex_t	tx_mtx;
146249917Sgrehan	pthread_cond_t	tx_cond;
147250083Sneel	int		tx_in_progress;
148221828Sgrehan};
149221828Sgrehan
150253440Sgrehanstatic void pci_vtnet_reset(void *);
151253440Sgrehan/* static void pci_vtnet_notify(void *, struct vqueue_info *); */
152253440Sgrehanstatic int pci_vtnet_cfgread(void *, int, int, uint32_t *);
153253440Sgrehanstatic int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
154246109Sneel
155253440Sgrehanstatic struct virtio_consts vtnet_vi_consts = {
156253440Sgrehan	"vtnet",		/* our name */
157253440Sgrehan	VTNET_MAXQ - 1,		/* we currently support 2 virtqueues */
158253440Sgrehan	sizeof(struct virtio_net_config), /* config reg size */
159253440Sgrehan	pci_vtnet_reset,	/* reset */
160253440Sgrehan	NULL,			/* device-wide qnotify -- not used */
161253440Sgrehan	pci_vtnet_cfgread,	/* read PCI config */
162253440Sgrehan	pci_vtnet_cfgwrite,	/* write PCI config */
163253440Sgrehan	VTNET_S_HOSTCAPS,	/* our capabilities */
164253440Sgrehan};
165221828Sgrehan
166250083Sneel/*
167250083Sneel * If the transmit thread is active then stall until it is done.
168250083Sneel */
169244160Sgrehanstatic void
170250083Sneelpci_vtnet_txwait(struct pci_vtnet_softc *sc)
171250083Sneel{
172250083Sneel
173250083Sneel	pthread_mutex_lock(&sc->tx_mtx);
174250083Sneel	while (sc->tx_in_progress) {
175250083Sneel		pthread_mutex_unlock(&sc->tx_mtx);
176250083Sneel		usleep(10000);
177250083Sneel		pthread_mutex_lock(&sc->tx_mtx);
178250083Sneel	}
179250083Sneel	pthread_mutex_unlock(&sc->tx_mtx);
180250083Sneel}
181250083Sneel
182250083Sneel/*
183250083Sneel * If the receive thread is active then stall until it is done.
184250083Sneel */
185250083Sneelstatic void
186250083Sneelpci_vtnet_rxwait(struct pci_vtnet_softc *sc)
187250083Sneel{
188250083Sneel
189250083Sneel	pthread_mutex_lock(&sc->rx_mtx);
190250083Sneel	while (sc->rx_in_progress) {
191250083Sneel		pthread_mutex_unlock(&sc->rx_mtx);
192250083Sneel		usleep(10000);
193250083Sneel		pthread_mutex_lock(&sc->rx_mtx);
194250083Sneel	}
195250083Sneel	pthread_mutex_unlock(&sc->rx_mtx);
196250083Sneel}
197250083Sneel
198250083Sneelstatic void
199253440Sgrehanpci_vtnet_reset(void *vsc)
200221828Sgrehan{
201253440Sgrehan	struct pci_vtnet_softc *sc = vsc;
202244160Sgrehan
203253440Sgrehan	DPRINTF(("vtnet: device reset requested !\n"));
204249917Sgrehan
205253440Sgrehan	sc->resetting = 1;
206250083Sneel
207253440Sgrehan	/*
208253440Sgrehan	 * Wait for the transmit and receive threads to finish their
209253440Sgrehan	 * processing.
210253440Sgrehan	 */
211253440Sgrehan	pci_vtnet_txwait(sc);
212253440Sgrehan	pci_vtnet_rxwait(sc);
213250083Sneel
214253440Sgrehan	sc->vsc_rx_ready = 0;
215250086Sneel
216253440Sgrehan	/* now reset rings, MSI-X vectors, and negotiated capabilities */
217253440Sgrehan	vi_reset_dev(&sc->vsc_vs);
218250086Sneel
219253440Sgrehan	sc->resetting = 0;
220221828Sgrehan}
221221828Sgrehan
222221828Sgrehan/*
223221828Sgrehan * Called to send a buffer chain out to the tap device
224221828Sgrehan */
225221828Sgrehanstatic void
226221828Sgrehanpci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
227221828Sgrehan		 int len)
228221828Sgrehan{
229253440Sgrehan	static char pad[60]; /* all zero bytes */
230221828Sgrehan
231221828Sgrehan	if (sc->vsc_tapfd == -1)
232221828Sgrehan		return;
233221828Sgrehan
234221828Sgrehan	/*
235221828Sgrehan	 * If the length is < 60, pad out to that and add the
236221828Sgrehan	 * extra zero'd segment to the iov. It is guaranteed that
237221828Sgrehan	 * there is always an extra iov available by the caller.
238221828Sgrehan	 */
239221828Sgrehan	if (len < 60) {
240221828Sgrehan		iov[iovcnt].iov_base = pad;
241221828Sgrehan		iov[iovcnt].iov_len = 60 - len;
242221828Sgrehan		iovcnt++;
243221828Sgrehan	}
244221828Sgrehan	(void) writev(sc->vsc_tapfd, iov, iovcnt);
245221828Sgrehan}
246221828Sgrehan
247221828Sgrehan/*
248221828Sgrehan *  Called when there is read activity on the tap file descriptor.
249221828Sgrehan * Each buffer posted by the guest is assumed to be able to contain
250221828Sgrehan * an entire ethernet frame + rx header.
251221828Sgrehan *  MP note: the dummybuf is only used for discarding frames, so there
252221828Sgrehan * is no need for it to be per-vtnet or locked.
253221828Sgrehan */
254221828Sgrehanstatic uint8_t dummybuf[2048];
255221828Sgrehan
256221828Sgrehanstatic void
257221828Sgrehanpci_vtnet_tap_rx(struct pci_vtnet_softc *sc)
258221828Sgrehan{
259253440Sgrehan	struct vqueue_info *vq;
260221828Sgrehan	struct virtio_net_rxhdr *vrx;
261221828Sgrehan	uint8_t *buf;
262221828Sgrehan	int len;
263253440Sgrehan	struct iovec iov;
264221828Sgrehan
265221828Sgrehan	/*
266221828Sgrehan	 * Should never be called without a valid tap fd
267221828Sgrehan	 */
268221828Sgrehan	assert(sc->vsc_tapfd != -1);
269221828Sgrehan
270221828Sgrehan	/*
271221828Sgrehan	 * But, will be called when the rx ring hasn't yet
272250083Sneel	 * been set up or the guest is resetting the device.
273221828Sgrehan	 */
274250083Sneel	if (!sc->vsc_rx_ready || sc->resetting) {
275221828Sgrehan		/*
276221828Sgrehan		 * Drop the packet and try later.
277221828Sgrehan		 */
278221828Sgrehan		(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
279221828Sgrehan		return;
280221828Sgrehan	}
281221828Sgrehan
282221828Sgrehan	/*
283253440Sgrehan	 * Check for available rx buffers
284221828Sgrehan	 */
285253440Sgrehan	vq = &sc->vsc_queues[VTNET_RXQ];
286253440Sgrehan	vq_startchains(vq);
287253440Sgrehan	if (!vq_has_descs(vq)) {
288221828Sgrehan		/*
289253440Sgrehan		 * Drop the packet and try later.  Interrupt on
290253440Sgrehan		 * empty, if that's negotiated.
291221828Sgrehan		 */
292221828Sgrehan		(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
293253440Sgrehan		vq_endchains(vq, 1);
294221828Sgrehan		return;
295221828Sgrehan	}
296221828Sgrehan
297253440Sgrehan	do {
298221828Sgrehan		/*
299253440Sgrehan		 * Get descriptor chain, which should have just
300253440Sgrehan		 * one descriptor in it.
301253440Sgrehan		 * ??? allow guests to use multiple descs?
302221828Sgrehan		 */
303253440Sgrehan		assert(vq_getchain(vq, &iov, 1, NULL) == 1);
304221828Sgrehan
305221828Sgrehan		/*
306221828Sgrehan		 * Get a pointer to the rx header, and use the
307221828Sgrehan		 * data immediately following it for the packet buffer.
308221828Sgrehan		 */
309253440Sgrehan		vrx = iov.iov_base;
310221828Sgrehan		buf = (uint8_t *)(vrx + 1);
311221828Sgrehan
312221828Sgrehan		len = read(sc->vsc_tapfd, buf,
313253440Sgrehan			   iov.iov_len - sizeof(struct virtio_net_rxhdr));
314221828Sgrehan
315221828Sgrehan		if (len < 0 && errno == EWOULDBLOCK) {
316253440Sgrehan			/*
317253440Sgrehan			 * No more packets, but still some avail ring
318253440Sgrehan			 * entries.  Interrupt if needed/appropriate.
319253440Sgrehan			 */
320253440Sgrehan			vq_endchains(vq, 0);
321253440Sgrehan			return;
322221828Sgrehan		}
323221828Sgrehan
324221828Sgrehan		/*
325221828Sgrehan		 * The only valid field in the rx packet header is the
326221828Sgrehan		 * number of buffers, which is always 1 without TSO
327221828Sgrehan		 * support.
328221828Sgrehan		 */
329221828Sgrehan		memset(vrx, 0, sizeof(struct virtio_net_rxhdr));
330221828Sgrehan		vrx->vrh_bufs = 1;
331221828Sgrehan
332221828Sgrehan		/*
333253440Sgrehan		 * Release this chain and handle more chains.
334221828Sgrehan		 */
335253440Sgrehan		vq_relchain(vq, len + sizeof(struct virtio_net_rxhdr));
336253440Sgrehan	} while (vq_has_descs(vq));
337221828Sgrehan
338253440Sgrehan	/* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
339253440Sgrehan	vq_endchains(vq, 1);
340221828Sgrehan}
341221828Sgrehan
342221828Sgrehanstatic void
343221828Sgrehanpci_vtnet_tap_callback(int fd, enum ev_type type, void *param)
344221828Sgrehan{
345221828Sgrehan	struct pci_vtnet_softc *sc = param;
346221828Sgrehan
347250083Sneel	pthread_mutex_lock(&sc->rx_mtx);
348250083Sneel	sc->rx_in_progress = 1;
349221828Sgrehan	pci_vtnet_tap_rx(sc);
350250083Sneel	sc->rx_in_progress = 0;
351250083Sneel	pthread_mutex_unlock(&sc->rx_mtx);
352221828Sgrehan
353221828Sgrehan}
354221828Sgrehan
355221828Sgrehanstatic void
356253440Sgrehanpci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq)
357221828Sgrehan{
358253440Sgrehan	struct pci_vtnet_softc *sc = vsc;
359253440Sgrehan
360221828Sgrehan	/*
361221828Sgrehan	 * A qnotify means that the rx process can now begin
362221828Sgrehan	 */
363221828Sgrehan	if (sc->vsc_rx_ready == 0) {
364221828Sgrehan		sc->vsc_rx_ready = 1;
365221828Sgrehan	}
366221828Sgrehan}
367221828Sgrehan
368221828Sgrehanstatic void
369253440Sgrehanpci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
370221828Sgrehan{
371221828Sgrehan	struct iovec iov[VTNET_MAXSEGS + 1];
372253440Sgrehan	int i, n;
373253440Sgrehan	int plen, tlen;
374221828Sgrehan
375221828Sgrehan	/*
376253440Sgrehan	 * Obtain chain of descriptors.  The first one is
377253440Sgrehan	 * really the header descriptor, so we need to sum
378253440Sgrehan	 * up two lengths: packet length and transfer length.
379221828Sgrehan	 */
380253440Sgrehan	n = vq_getchain(vq, iov, VTNET_MAXSEGS, NULL);
381253440Sgrehan	assert(n >= 1 && n <= VTNET_MAXSEGS);
382253440Sgrehan	plen = 0;
383253440Sgrehan	tlen = iov[0].iov_len;
384253440Sgrehan	for (i = 1; i < n; i++) {
385253440Sgrehan		plen += iov[i].iov_len;
386253440Sgrehan		tlen += iov[i].iov_len;
387221828Sgrehan	}
388221828Sgrehan
389253440Sgrehan	DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, n));
390253440Sgrehan	pci_vtnet_tap_tx(sc, &iov[1], n - 1, plen);
391221828Sgrehan
392253440Sgrehan	/* chain is processed, release it and set tlen */
393253440Sgrehan	vq_relchain(vq, tlen);
394221828Sgrehan}
395221828Sgrehan
396221828Sgrehanstatic void
397253440Sgrehanpci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq)
398221828Sgrehan{
399253440Sgrehan	struct pci_vtnet_softc *sc = vsc;
400221828Sgrehan
401221828Sgrehan	/*
402253440Sgrehan	 * Any ring entries to process?
403221828Sgrehan	 */
404253440Sgrehan	if (!vq_has_descs(vq))
405221828Sgrehan		return;
406221828Sgrehan
407249917Sgrehan	/* Signal the tx thread for processing */
408249917Sgrehan	pthread_mutex_lock(&sc->tx_mtx);
409249917Sgrehan	if (sc->tx_in_progress == 0)
410249917Sgrehan		pthread_cond_signal(&sc->tx_cond);
411249917Sgrehan	pthread_mutex_unlock(&sc->tx_mtx);
412221828Sgrehan}
413221828Sgrehan
414249917Sgrehan/*
415249917Sgrehan * Thread which will handle processing of TX desc
416249917Sgrehan */
417249917Sgrehanstatic void *
418249917Sgrehanpci_vtnet_tx_thread(void *param)
419249917Sgrehan{
420253440Sgrehan	struct pci_vtnet_softc *sc = param;
421253440Sgrehan	struct vqueue_info *vq;
422253440Sgrehan	int have_work, error;
423253440Sgrehan
424253440Sgrehan	vq = &sc->vsc_queues[VTNET_TXQ];
425253440Sgrehan
426253440Sgrehan	/*
427253440Sgrehan	 * Let us wait till the tx queue pointers get initialised &
428253440Sgrehan	 * first tx signaled
429249917Sgrehan	 */
430249917Sgrehan	pthread_mutex_lock(&sc->tx_mtx);
431249917Sgrehan	error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
432249917Sgrehan	assert(error == 0);
433253440Sgrehan
434249917Sgrehan	for (;;) {
435253440Sgrehan		/* note - tx mutex is locked here */
436253440Sgrehan		do {
437250197Sneel			if (sc->resetting)
438253440Sgrehan				have_work = 0;
439250197Sneel			else
440253440Sgrehan				have_work = vq_has_descs(vq);
441250197Sneel
442253440Sgrehan			if (!have_work) {
443253440Sgrehan				sc->tx_in_progress = 0;
444253440Sgrehan				error = pthread_cond_wait(&sc->tx_cond,
445253440Sgrehan							  &sc->tx_mtx);
446253440Sgrehan				assert(error == 0);
447253440Sgrehan			}
448253440Sgrehan		} while (!have_work);
449249917Sgrehan		sc->tx_in_progress = 1;
450249917Sgrehan		pthread_mutex_unlock(&sc->tx_mtx);
451249917Sgrehan
452253440Sgrehan		vq_startchains(vq);
453253440Sgrehan		do {
454249917Sgrehan			/*
455253440Sgrehan			 * Run through entries, placing them into
456253440Sgrehan			 * iovecs and sending when an end-of-packet
457253440Sgrehan			 * is found
458249917Sgrehan			 */
459253440Sgrehan			pci_vtnet_proctx(sc, vq);
460253440Sgrehan		} while (vq_has_descs(vq));
461250197Sneel
462250197Sneel		/*
463250197Sneel		 * Generate an interrupt if needed.
464250197Sneel		 */
465253440Sgrehan		vq_endchains(vq, 1);
466253440Sgrehan
467253440Sgrehan		pthread_mutex_lock(&sc->tx_mtx);
468249917Sgrehan	}
469221828Sgrehan}
470221828Sgrehan
471253440Sgrehan#ifdef notyet
472221828Sgrehanstatic void
473253440Sgrehanpci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq)
474221828Sgrehan{
475221828Sgrehan
476253440Sgrehan	DPRINTF(("vtnet: control qnotify!\n\r"));
477221828Sgrehan}
478253440Sgrehan#endif
479221828Sgrehan
480221828Sgrehanstatic int
481252682Sgrehanpci_vtnet_parsemac(char *mac_str, uint8_t *mac_addr)
482252682Sgrehan{
483252682Sgrehan        struct ether_addr *ea;
484252682Sgrehan        char *tmpstr;
485252682Sgrehan        char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
486252682Sgrehan
487252682Sgrehan        tmpstr = strsep(&mac_str,"=");
488252682Sgrehan
489252682Sgrehan        if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
490252682Sgrehan                ea = ether_aton(mac_str);
491252682Sgrehan
492252682Sgrehan                if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
493252682Sgrehan                    memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
494252682Sgrehan			fprintf(stderr, "Invalid MAC %s\n", mac_str);
495252682Sgrehan                        return (EINVAL);
496252682Sgrehan                } else
497252682Sgrehan                        memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
498252682Sgrehan        }
499252682Sgrehan
500252682Sgrehan        return (0);
501252682Sgrehan}
502252682Sgrehan
503252682Sgrehan
504252682Sgrehanstatic int
505221828Sgrehanpci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
506221828Sgrehan{
507221828Sgrehan	MD5_CTX mdctx;
508221828Sgrehan	unsigned char digest[16];
509221828Sgrehan	char nstr[80];
510249917Sgrehan	char tname[MAXCOMLEN + 1];
511221828Sgrehan	struct pci_vtnet_softc *sc;
512252682Sgrehan	char *devname;
513252682Sgrehan	char *vtopts;
514252682Sgrehan	int mac_provided;
515221828Sgrehan
516221828Sgrehan	sc = malloc(sizeof(struct pci_vtnet_softc));
517221828Sgrehan	memset(sc, 0, sizeof(struct pci_vtnet_softc));
518221828Sgrehan
519253440Sgrehan	pthread_mutex_init(&sc->vsc_mtx, NULL);
520221828Sgrehan
521253440Sgrehan	vi_softc_linkup(&sc->vsc_vs, &vtnet_vi_consts, sc, pi, sc->vsc_queues);
522253440Sgrehan	sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ;
523253440Sgrehan	sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq;
524253440Sgrehan	sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ;
525253440Sgrehan	sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq;
526253440Sgrehan#ifdef notyet
527253440Sgrehan	sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ;
528253440Sgrehan        sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq;
529253440Sgrehan#endif
530246109Sneel
531246109Sneel	/*
532252682Sgrehan	 * Attempt to open the tap device and read the MAC address
533252682Sgrehan	 * if specified
534221828Sgrehan	 */
535252682Sgrehan	mac_provided = 0;
536221828Sgrehan	sc->vsc_tapfd = -1;
537221828Sgrehan	if (opts != NULL) {
538221828Sgrehan		char tbuf[80];
539252682Sgrehan		int err;
540221828Sgrehan
541252682Sgrehan		devname = vtopts = strdup(opts);
542252682Sgrehan		(void) strsep(&vtopts, ",");
543252682Sgrehan
544252682Sgrehan		if (vtopts != NULL) {
545253440Sgrehan			err = pci_vtnet_parsemac(vtopts, sc->vsc_config.mac);
546252682Sgrehan			if (err != 0) {
547252682Sgrehan				free(devname);
548252682Sgrehan				return (err);
549252682Sgrehan			}
550252682Sgrehan			mac_provided = 1;
551252682Sgrehan		}
552252682Sgrehan
553221828Sgrehan		strcpy(tbuf, "/dev/");
554252682Sgrehan		strlcat(tbuf, devname, sizeof(tbuf));
555221828Sgrehan
556252682Sgrehan		free(devname);
557252682Sgrehan
558221828Sgrehan		sc->vsc_tapfd = open(tbuf, O_RDWR);
559221828Sgrehan		if (sc->vsc_tapfd == -1) {
560221828Sgrehan			WPRINTF(("open of tap device %s failed\n", tbuf));
561221828Sgrehan		} else {
562221828Sgrehan			/*
563221828Sgrehan			 * Set non-blocking and register for read
564221828Sgrehan			 * notifications with the event loop
565221828Sgrehan			 */
566221828Sgrehan			int opt = 1;
567221828Sgrehan			if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
568221828Sgrehan				WPRINTF(("tap device O_NONBLOCK failed\n"));
569221828Sgrehan				close(sc->vsc_tapfd);
570221828Sgrehan				sc->vsc_tapfd = -1;
571221828Sgrehan			}
572221828Sgrehan
573221828Sgrehan			sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
574221828Sgrehan						  EVF_READ,
575221828Sgrehan						  pci_vtnet_tap_callback,
576221828Sgrehan						  sc);
577221828Sgrehan			if (sc->vsc_mevp == NULL) {
578221828Sgrehan				WPRINTF(("Could not register event\n"));
579221828Sgrehan				close(sc->vsc_tapfd);
580221828Sgrehan				sc->vsc_tapfd = -1;
581221828Sgrehan			}
582221828Sgrehan		}
583221828Sgrehan	}
584221828Sgrehan
585221828Sgrehan	/*
586252682Sgrehan	 * The default MAC address is the standard NetApp OUI of 00-a0-98,
587252682Sgrehan	 * followed by an MD5 of the PCI slot/func number and dev name
588221828Sgrehan	 */
589252682Sgrehan	if (!mac_provided) {
590244159Sgrehan		snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot,
591259496Sgrehan		    pi->pi_func, vmname);
592221828Sgrehan
593252682Sgrehan		MD5Init(&mdctx);
594252682Sgrehan		MD5Update(&mdctx, nstr, strlen(nstr));
595252682Sgrehan		MD5Final(digest, &mdctx);
596221828Sgrehan
597253440Sgrehan		sc->vsc_config.mac[0] = 0x00;
598253440Sgrehan		sc->vsc_config.mac[1] = 0xa0;
599253440Sgrehan		sc->vsc_config.mac[2] = 0x98;
600253440Sgrehan		sc->vsc_config.mac[3] = digest[0];
601253440Sgrehan		sc->vsc_config.mac[4] = digest[1];
602253440Sgrehan		sc->vsc_config.mac[5] = digest[2];
603252682Sgrehan	}
604221828Sgrehan
605221828Sgrehan	/* initialize config space */
606221828Sgrehan	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET);
607221828Sgrehan	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
608221828Sgrehan	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
609221828Sgrehan	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
610253440Sgrehan
611253440Sgrehan	/* link always up */
612253440Sgrehan	sc->vsc_config.status = 1;
613246109Sneel
614253440Sgrehan	/* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */
615256755Sgrehan	if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix()))
616253440Sgrehan		return (1);
617246109Sneel
618253440Sgrehan	/* use BAR 0 to map config regs in IO space */
619253440Sgrehan	vi_set_io_bar(&sc->vsc_vs, 0);
620246109Sneel
621250083Sneel	sc->resetting = 0;
622250083Sneel
623250083Sneel	sc->rx_in_progress = 0;
624250083Sneel	pthread_mutex_init(&sc->rx_mtx, NULL);
625250083Sneel
626249917Sgrehan	/*
627253440Sgrehan	 * Initialize tx semaphore & spawn TX processing thread.
628249917Sgrehan	 * As of now, only one thread for TX desc processing is
629249917Sgrehan	 * spawned.
630249917Sgrehan	 */
631249917Sgrehan	sc->tx_in_progress = 0;
632249917Sgrehan	pthread_mutex_init(&sc->tx_mtx, NULL);
633249917Sgrehan	pthread_cond_init(&sc->tx_cond, NULL);
634249917Sgrehan	pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc);
635259496Sgrehan	snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot,
636259496Sgrehan	    pi->pi_func);
637249917Sgrehan        pthread_set_name_np(sc->tx_tid, tname);
638221828Sgrehan
639221828Sgrehan	return (0);
640221828Sgrehan}
641221828Sgrehan
642253440Sgrehanstatic int
643253440Sgrehanpci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value)
644246109Sneel{
645253440Sgrehan	struct pci_vtnet_softc *sc = vsc;
646222830Sgrehan	void *ptr;
647222830Sgrehan
648253440Sgrehan	if (offset < 6) {
649253440Sgrehan		assert(offset + size <= 6);
650221828Sgrehan		/*
651221828Sgrehan		 * The driver is allowed to change the MAC address
652221828Sgrehan		 */
653253440Sgrehan		ptr = &sc->vsc_config.mac[offset];
654253440Sgrehan		memcpy(ptr, &value, size);
655253440Sgrehan	} else {
656253440Sgrehan		DPRINTF(("vtnet: write to readonly reg %d\n\r", offset));
657253440Sgrehan		return (1);
658221828Sgrehan	}
659253440Sgrehan	return (0);
660221828Sgrehan}
661221828Sgrehan
662253440Sgrehanstatic int
663253440Sgrehanpci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval)
664221828Sgrehan{
665253440Sgrehan	struct pci_vtnet_softc *sc = vsc;
666222830Sgrehan	void *ptr;
667221828Sgrehan
668253440Sgrehan	ptr = (uint8_t *)&sc->vsc_config + offset;
669253440Sgrehan	memcpy(retval, ptr, size);
670253440Sgrehan	return (0);
671221828Sgrehan}
672221828Sgrehan
673221828Sgrehanstruct pci_devemu pci_de_vnet = {
674241744Sgrehan	.pe_emu = 	"virtio-net",
675241744Sgrehan	.pe_init =	pci_vtnet_init,
676253440Sgrehan	.pe_barwrite =	vi_pci_write,
677253440Sgrehan	.pe_barread =	vi_pci_read
678221828Sgrehan};
679221828SgrehanPCI_EMUL_SET(pci_de_vnet);
680