pci_virtio_net.c revision 284900
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/10/usr.sbin/bhyve/pci_virtio_net.c 284900 2015-06-28 03:22:26Z neel $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_virtio_net.c 284900 2015-06-28 03:22:26Z neel $");
31
32#include <sys/param.h>
33#include <sys/linker_set.h>
34#include <sys/select.h>
35#include <sys/uio.h>
36#include <sys/ioctl.h>
37#include <machine/atomic.h>
38#include <net/ethernet.h>
39
40#include <errno.h>
41#include <fcntl.h>
42#include <stdio.h>
43#include <stdlib.h>
44#include <stdint.h>
45#include <string.h>
46#include <strings.h>
47#include <unistd.h>
48#include <assert.h>
49#include <md5.h>
50#include <pthread.h>
51#include <pthread_np.h>
52
53#include "bhyverun.h"
54#include "pci_emul.h"
55#include "mevent.h"
56#include "virtio.h"
57
58#define VTNET_RINGSZ	1024
59
60#define VTNET_MAXSEGS	32
61
62/*
63 * Host capabilities.  Note that we only offer a few of these.
64 */
65#define	VIRTIO_NET_F_CSUM	(1 <<  0) /* host handles partial cksum */
66#define	VIRTIO_NET_F_GUEST_CSUM	(1 <<  1) /* guest handles partial cksum */
67#define	VIRTIO_NET_F_MAC	(1 <<  5) /* host supplies MAC */
68#define	VIRTIO_NET_F_GSO_DEPREC	(1 <<  6) /* deprecated: host handles GSO */
69#define	VIRTIO_NET_F_GUEST_TSO4	(1 <<  7) /* guest can rcv TSOv4 */
70#define	VIRTIO_NET_F_GUEST_TSO6	(1 <<  8) /* guest can rcv TSOv6 */
71#define	VIRTIO_NET_F_GUEST_ECN	(1 <<  9) /* guest can rcv TSO with ECN */
72#define	VIRTIO_NET_F_GUEST_UFO	(1 << 10) /* guest can rcv UFO */
73#define	VIRTIO_NET_F_HOST_TSO4	(1 << 11) /* host can rcv TSOv4 */
74#define	VIRTIO_NET_F_HOST_TSO6	(1 << 12) /* host can rcv TSOv6 */
75#define	VIRTIO_NET_F_HOST_ECN	(1 << 13) /* host can rcv TSO with ECN */
76#define	VIRTIO_NET_F_HOST_UFO	(1 << 14) /* host can rcv UFO */
77#define	VIRTIO_NET_F_MRG_RXBUF	(1 << 15) /* host can merge RX buffers */
78#define	VIRTIO_NET_F_STATUS	(1 << 16) /* config status field available */
79#define	VIRTIO_NET_F_CTRL_VQ	(1 << 17) /* control channel available */
80#define	VIRTIO_NET_F_CTRL_RX	(1 << 18) /* control channel RX mode support */
81#define	VIRTIO_NET_F_CTRL_VLAN	(1 << 19) /* control channel VLAN filtering */
82#define	VIRTIO_NET_F_GUEST_ANNOUNCE \
83				(1 << 21) /* guest can send gratuitous pkts */
84
85#define VTNET_S_HOSTCAPS      \
86  ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \
87    VIRTIO_F_NOTIFY_ON_EMPTY)
88
89/*
90 * PCI config-space "registers"
91 */
92struct virtio_net_config {
93	uint8_t  mac[6];
94	uint16_t status;
95} __packed;
96
97/*
98 * Queue definitions.
99 */
100#define VTNET_RXQ	0
101#define VTNET_TXQ	1
102#define VTNET_CTLQ	2	/* NB: not yet supported */
103
104#define VTNET_MAXQ	3
105
106/*
107 * Fixed network header size
108 */
109struct virtio_net_rxhdr {
110	uint8_t		vrh_flags;
111	uint8_t		vrh_gso_type;
112	uint16_t	vrh_hdr_len;
113	uint16_t	vrh_gso_size;
114	uint16_t	vrh_csum_start;
115	uint16_t	vrh_csum_offset;
116	uint16_t	vrh_bufs;
117} __packed;
118
119/*
120 * Debug printf
121 */
122static int pci_vtnet_debug;
123#define DPRINTF(params) if (pci_vtnet_debug) printf params
124#define WPRINTF(params) printf params
125
126/*
127 * Per-device softc
128 */
129struct pci_vtnet_softc {
130	struct virtio_softc vsc_vs;
131	struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
132	pthread_mutex_t vsc_mtx;
133	struct mevent	*vsc_mevp;
134
135	int		vsc_tapfd;
136	int		vsc_rx_ready;
137	volatile int	resetting;	/* set and checked outside lock */
138
139	uint64_t	vsc_features;	/* negotiated features */
140
141	struct virtio_net_config vsc_config;
142
143	pthread_mutex_t	rx_mtx;
144	int		rx_in_progress;
145	int		rx_vhdrlen;
146	int		rx_merge;	/* merged rx bufs in use */
147
148	pthread_t 	tx_tid;
149	pthread_mutex_t	tx_mtx;
150	pthread_cond_t	tx_cond;
151	int		tx_in_progress;
152};
153
154static void pci_vtnet_reset(void *);
155/* static void pci_vtnet_notify(void *, struct vqueue_info *); */
156static int pci_vtnet_cfgread(void *, int, int, uint32_t *);
157static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
158static void pci_vtnet_neg_features(void *, uint64_t);
159
160static struct virtio_consts vtnet_vi_consts = {
161	"vtnet",		/* our name */
162	VTNET_MAXQ - 1,		/* we currently support 2 virtqueues */
163	sizeof(struct virtio_net_config), /* config reg size */
164	pci_vtnet_reset,	/* reset */
165	NULL,			/* device-wide qnotify -- not used */
166	pci_vtnet_cfgread,	/* read PCI config */
167	pci_vtnet_cfgwrite,	/* write PCI config */
168	pci_vtnet_neg_features,	/* apply negotiated features */
169	VTNET_S_HOSTCAPS,	/* our capabilities */
170};
171
172/*
173 * If the transmit thread is active then stall until it is done.
174 */
175static void
176pci_vtnet_txwait(struct pci_vtnet_softc *sc)
177{
178
179	pthread_mutex_lock(&sc->tx_mtx);
180	while (sc->tx_in_progress) {
181		pthread_mutex_unlock(&sc->tx_mtx);
182		usleep(10000);
183		pthread_mutex_lock(&sc->tx_mtx);
184	}
185	pthread_mutex_unlock(&sc->tx_mtx);
186}
187
188/*
189 * If the receive thread is active then stall until it is done.
190 */
191static void
192pci_vtnet_rxwait(struct pci_vtnet_softc *sc)
193{
194
195	pthread_mutex_lock(&sc->rx_mtx);
196	while (sc->rx_in_progress) {
197		pthread_mutex_unlock(&sc->rx_mtx);
198		usleep(10000);
199		pthread_mutex_lock(&sc->rx_mtx);
200	}
201	pthread_mutex_unlock(&sc->rx_mtx);
202}
203
204static void
205pci_vtnet_reset(void *vsc)
206{
207	struct pci_vtnet_softc *sc = vsc;
208
209	DPRINTF(("vtnet: device reset requested !\n"));
210
211	sc->resetting = 1;
212
213	/*
214	 * Wait for the transmit and receive threads to finish their
215	 * processing.
216	 */
217	pci_vtnet_txwait(sc);
218	pci_vtnet_rxwait(sc);
219
220	sc->vsc_rx_ready = 0;
221	sc->rx_merge = 1;
222	sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
223
224	/* now reset rings, MSI-X vectors, and negotiated capabilities */
225	vi_reset_dev(&sc->vsc_vs);
226
227	sc->resetting = 0;
228}
229
230/*
231 * Called to send a buffer chain out to the tap device
232 */
233static void
234pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
235		 int len)
236{
237	static char pad[60]; /* all zero bytes */
238
239	if (sc->vsc_tapfd == -1)
240		return;
241
242	/*
243	 * If the length is < 60, pad out to that and add the
244	 * extra zero'd segment to the iov. It is guaranteed that
245	 * there is always an extra iov available by the caller.
246	 */
247	if (len < 60) {
248		iov[iovcnt].iov_base = pad;
249		iov[iovcnt].iov_len = 60 - len;
250		iovcnt++;
251	}
252	(void) writev(sc->vsc_tapfd, iov, iovcnt);
253}
254
255/*
256 *  Called when there is read activity on the tap file descriptor.
257 * Each buffer posted by the guest is assumed to be able to contain
258 * an entire ethernet frame + rx header.
259 *  MP note: the dummybuf is only used for discarding frames, so there
260 * is no need for it to be per-vtnet or locked.
261 */
262static uint8_t dummybuf[2048];
263
264static __inline struct iovec *
265rx_iov_trim(struct iovec *iov, int *niov, int tlen)
266{
267	struct iovec *riov;
268
269	/* XXX short-cut: assume first segment is >= tlen */
270	assert(iov[0].iov_len >= tlen);
271
272	iov[0].iov_len -= tlen;
273	if (iov[0].iov_len == 0) {
274		assert(*niov > 1);
275		*niov -= 1;
276		riov = &iov[1];
277	} else {
278		iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen);
279		riov = &iov[0];
280	}
281
282	return (riov);
283}
284
285static void
286pci_vtnet_tap_rx(struct pci_vtnet_softc *sc)
287{
288	struct iovec iov[VTNET_MAXSEGS], *riov;
289	struct vqueue_info *vq;
290	void *vrx;
291	int len, n;
292	uint16_t idx;
293
294	/*
295	 * Should never be called without a valid tap fd
296	 */
297	assert(sc->vsc_tapfd != -1);
298
299	/*
300	 * But, will be called when the rx ring hasn't yet
301	 * been set up or the guest is resetting the device.
302	 */
303	if (!sc->vsc_rx_ready || sc->resetting) {
304		/*
305		 * Drop the packet and try later.
306		 */
307		(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
308		return;
309	}
310
311	/*
312	 * Check for available rx buffers
313	 */
314	vq = &sc->vsc_queues[VTNET_RXQ];
315	if (!vq_has_descs(vq)) {
316		/*
317		 * Drop the packet and try later.  Interrupt on
318		 * empty, if that's negotiated.
319		 */
320		(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
321		vq_endchains(vq, 1);
322		return;
323	}
324
325	do {
326		/*
327		 * Get descriptor chain.
328		 */
329		n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
330		assert(n >= 1 && n <= VTNET_MAXSEGS);
331
332		/*
333		 * Get a pointer to the rx header, and use the
334		 * data immediately following it for the packet buffer.
335		 */
336		vrx = iov[0].iov_base;
337		riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
338
339		len = readv(sc->vsc_tapfd, riov, n);
340
341		if (len < 0 && errno == EWOULDBLOCK) {
342			/*
343			 * No more packets, but still some avail ring
344			 * entries.  Interrupt if needed/appropriate.
345			 */
346			vq_retchain(vq);
347			vq_endchains(vq, 0);
348			return;
349		}
350
351		/*
352		 * The only valid field in the rx packet header is the
353		 * number of buffers if merged rx bufs were negotiated.
354		 */
355		memset(vrx, 0, sc->rx_vhdrlen);
356
357		if (sc->rx_merge) {
358			struct virtio_net_rxhdr *vrxh;
359
360			vrxh = vrx;
361			vrxh->vrh_bufs = 1;
362		}
363
364		/*
365		 * Release this chain and handle more chains.
366		 */
367		vq_relchain(vq, idx, len + sc->rx_vhdrlen);
368	} while (vq_has_descs(vq));
369
370	/* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
371	vq_endchains(vq, 1);
372}
373
374static void
375pci_vtnet_tap_callback(int fd, enum ev_type type, void *param)
376{
377	struct pci_vtnet_softc *sc = param;
378
379	pthread_mutex_lock(&sc->rx_mtx);
380	sc->rx_in_progress = 1;
381	pci_vtnet_tap_rx(sc);
382	sc->rx_in_progress = 0;
383	pthread_mutex_unlock(&sc->rx_mtx);
384
385}
386
387static void
388pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq)
389{
390	struct pci_vtnet_softc *sc = vsc;
391
392	/*
393	 * A qnotify means that the rx process can now begin
394	 */
395	if (sc->vsc_rx_ready == 0) {
396		sc->vsc_rx_ready = 1;
397		vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
398	}
399}
400
401static void
402pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
403{
404	struct iovec iov[VTNET_MAXSEGS + 1];
405	int i, n;
406	int plen, tlen;
407	uint16_t idx;
408
409	/*
410	 * Obtain chain of descriptors.  The first one is
411	 * really the header descriptor, so we need to sum
412	 * up two lengths: packet length and transfer length.
413	 */
414	n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
415	assert(n >= 1 && n <= VTNET_MAXSEGS);
416	plen = 0;
417	tlen = iov[0].iov_len;
418	for (i = 1; i < n; i++) {
419		plen += iov[i].iov_len;
420		tlen += iov[i].iov_len;
421	}
422
423	DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, n));
424	pci_vtnet_tap_tx(sc, &iov[1], n - 1, plen);
425
426	/* chain is processed, release it and set tlen */
427	vq_relchain(vq, idx, tlen);
428}
429
430static void
431pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq)
432{
433	struct pci_vtnet_softc *sc = vsc;
434
435	/*
436	 * Any ring entries to process?
437	 */
438	if (!vq_has_descs(vq))
439		return;
440
441	/* Signal the tx thread for processing */
442	pthread_mutex_lock(&sc->tx_mtx);
443	vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
444	if (sc->tx_in_progress == 0)
445		pthread_cond_signal(&sc->tx_cond);
446	pthread_mutex_unlock(&sc->tx_mtx);
447}
448
449/*
450 * Thread which will handle processing of TX desc
451 */
452static void *
453pci_vtnet_tx_thread(void *param)
454{
455	struct pci_vtnet_softc *sc = param;
456	struct vqueue_info *vq;
457	int error;
458
459	vq = &sc->vsc_queues[VTNET_TXQ];
460
461	/*
462	 * Let us wait till the tx queue pointers get initialised &
463	 * first tx signaled
464	 */
465	pthread_mutex_lock(&sc->tx_mtx);
466	error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
467	assert(error == 0);
468
469	for (;;) {
470		/* note - tx mutex is locked here */
471		while (sc->resetting || !vq_has_descs(vq)) {
472			vq->vq_used->vu_flags &= ~VRING_USED_F_NO_NOTIFY;
473			mb();
474			if (!sc->resetting && vq_has_descs(vq))
475				break;
476
477			sc->tx_in_progress = 0;
478			error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
479			assert(error == 0);
480		}
481		vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
482		sc->tx_in_progress = 1;
483		pthread_mutex_unlock(&sc->tx_mtx);
484
485		do {
486			/*
487			 * Run through entries, placing them into
488			 * iovecs and sending when an end-of-packet
489			 * is found
490			 */
491			pci_vtnet_proctx(sc, vq);
492		} while (vq_has_descs(vq));
493
494		/*
495		 * Generate an interrupt if needed.
496		 */
497		vq_endchains(vq, 1);
498
499		pthread_mutex_lock(&sc->tx_mtx);
500	}
501}
502
503#ifdef notyet
504static void
505pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq)
506{
507
508	DPRINTF(("vtnet: control qnotify!\n\r"));
509}
510#endif
511
512static int
513pci_vtnet_parsemac(char *mac_str, uint8_t *mac_addr)
514{
515        struct ether_addr *ea;
516        char *tmpstr;
517        char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
518
519        tmpstr = strsep(&mac_str,"=");
520
521        if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
522                ea = ether_aton(mac_str);
523
524                if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
525                    memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
526			fprintf(stderr, "Invalid MAC %s\n", mac_str);
527                        return (EINVAL);
528                } else
529                        memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
530        }
531
532        return (0);
533}
534
535
536static int
537pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
538{
539	MD5_CTX mdctx;
540	unsigned char digest[16];
541	char nstr[80];
542	char tname[MAXCOMLEN + 1];
543	struct pci_vtnet_softc *sc;
544	char *devname;
545	char *vtopts;
546	int mac_provided;
547
548	sc = calloc(1, sizeof(struct pci_vtnet_softc));
549
550	pthread_mutex_init(&sc->vsc_mtx, NULL);
551
552	vi_softc_linkup(&sc->vsc_vs, &vtnet_vi_consts, sc, pi, sc->vsc_queues);
553	sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
554
555	sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ;
556	sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq;
557	sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ;
558	sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq;
559#ifdef notyet
560	sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ;
561        sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq;
562#endif
563
564	/*
565	 * Attempt to open the tap device and read the MAC address
566	 * if specified
567	 */
568	mac_provided = 0;
569	sc->vsc_tapfd = -1;
570	if (opts != NULL) {
571		char tbuf[80];
572		int err;
573
574		devname = vtopts = strdup(opts);
575		(void) strsep(&vtopts, ",");
576
577		if (vtopts != NULL) {
578			err = pci_vtnet_parsemac(vtopts, sc->vsc_config.mac);
579			if (err != 0) {
580				free(devname);
581				return (err);
582			}
583			mac_provided = 1;
584		}
585
586		strcpy(tbuf, "/dev/");
587		strlcat(tbuf, devname, sizeof(tbuf));
588
589		free(devname);
590
591		sc->vsc_tapfd = open(tbuf, O_RDWR);
592		if (sc->vsc_tapfd == -1) {
593			WPRINTF(("open of tap device %s failed\n", tbuf));
594		} else {
595			/*
596			 * Set non-blocking and register for read
597			 * notifications with the event loop
598			 */
599			int opt = 1;
600			if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
601				WPRINTF(("tap device O_NONBLOCK failed\n"));
602				close(sc->vsc_tapfd);
603				sc->vsc_tapfd = -1;
604			}
605
606			sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
607						  EVF_READ,
608						  pci_vtnet_tap_callback,
609						  sc);
610			if (sc->vsc_mevp == NULL) {
611				WPRINTF(("Could not register event\n"));
612				close(sc->vsc_tapfd);
613				sc->vsc_tapfd = -1;
614			}
615		}
616	}
617
618	/*
619	 * The default MAC address is the standard NetApp OUI of 00-a0-98,
620	 * followed by an MD5 of the PCI slot/func number and dev name
621	 */
622	if (!mac_provided) {
623		snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot,
624		    pi->pi_func, vmname);
625
626		MD5Init(&mdctx);
627		MD5Update(&mdctx, nstr, strlen(nstr));
628		MD5Final(digest, &mdctx);
629
630		sc->vsc_config.mac[0] = 0x00;
631		sc->vsc_config.mac[1] = 0xa0;
632		sc->vsc_config.mac[2] = 0x98;
633		sc->vsc_config.mac[3] = digest[0];
634		sc->vsc_config.mac[4] = digest[1];
635		sc->vsc_config.mac[5] = digest[2];
636	}
637
638	/* initialize config space */
639	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET);
640	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
641	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
642	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
643	pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
644
645	/* Link is up if we managed to open tap device. */
646	sc->vsc_config.status = (opts == NULL || sc->vsc_tapfd >= 0);
647
648	/* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */
649	if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix()))
650		return (1);
651
652	/* use BAR 0 to map config regs in IO space */
653	vi_set_io_bar(&sc->vsc_vs, 0);
654
655	sc->resetting = 0;
656
657	sc->rx_merge = 1;
658	sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
659	sc->rx_in_progress = 0;
660	pthread_mutex_init(&sc->rx_mtx, NULL);
661
662	/*
663	 * Initialize tx semaphore & spawn TX processing thread.
664	 * As of now, only one thread for TX desc processing is
665	 * spawned.
666	 */
667	sc->tx_in_progress = 0;
668	pthread_mutex_init(&sc->tx_mtx, NULL);
669	pthread_cond_init(&sc->tx_cond, NULL);
670	pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc);
671	snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot,
672	    pi->pi_func);
673        pthread_set_name_np(sc->tx_tid, tname);
674
675	return (0);
676}
677
678static int
679pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value)
680{
681	struct pci_vtnet_softc *sc = vsc;
682	void *ptr;
683
684	if (offset < 6) {
685		assert(offset + size <= 6);
686		/*
687		 * The driver is allowed to change the MAC address
688		 */
689		ptr = &sc->vsc_config.mac[offset];
690		memcpy(ptr, &value, size);
691	} else {
692		/* silently ignore other writes */
693		DPRINTF(("vtnet: write to readonly reg %d\n\r", offset));
694	}
695
696	return (0);
697}
698
699static int
700pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval)
701{
702	struct pci_vtnet_softc *sc = vsc;
703	void *ptr;
704
705	ptr = (uint8_t *)&sc->vsc_config + offset;
706	memcpy(retval, ptr, size);
707	return (0);
708}
709
710static void
711pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features)
712{
713	struct pci_vtnet_softc *sc = vsc;
714
715	sc->vsc_features = negotiated_features;
716
717	if (!(sc->vsc_features & VIRTIO_NET_F_MRG_RXBUF)) {
718		sc->rx_merge = 0;
719		/* non-merge rx header is 2 bytes shorter */
720		sc->rx_vhdrlen -= 2;
721	}
722}
723
724struct pci_devemu pci_de_vnet = {
725	.pe_emu = 	"virtio-net",
726	.pe_init =	pci_vtnet_init,
727	.pe_barwrite =	vi_pci_write,
728	.pe_barread =	vi_pci_read
729};
730PCI_EMUL_SET(pci_de_vnet);
731