pci_virtio_net.c revision 222830
1221828Sgrehan/*-
2221828Sgrehan * Copyright (c) 2011 NetApp, Inc.
3221828Sgrehan * All rights reserved.
4221828Sgrehan *
5221828Sgrehan * Redistribution and use in source and binary forms, with or without
6221828Sgrehan * modification, are permitted provided that the following conditions
7221828Sgrehan * are met:
8221828Sgrehan * 1. Redistributions of source code must retain the above copyright
9221828Sgrehan *    notice, this list of conditions and the following disclaimer.
10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
11221828Sgrehan *    notice, this list of conditions and the following disclaimer in the
12221828Sgrehan *    documentation and/or other materials provided with the distribution.
13221828Sgrehan *
14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17221828Sgrehan * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24221828Sgrehan * SUCH DAMAGE.
25221828Sgrehan *
26221828Sgrehan * $FreeBSD$
27221828Sgrehan */
28221828Sgrehan
29221828Sgrehan#include <sys/cdefs.h>
30221828Sgrehan__FBSDID("$FreeBSD$");
31221828Sgrehan
32221828Sgrehan#include <sys/param.h>
33221828Sgrehan#include <sys/linker_set.h>
34221828Sgrehan#include <sys/select.h>
35221828Sgrehan#include <sys/uio.h>
36221828Sgrehan#include <sys/ioctl.h>
37221828Sgrehan
38221828Sgrehan#include <errno.h>
39221828Sgrehan#include <fcntl.h>
40221828Sgrehan#include <stdio.h>
41221828Sgrehan#include <stdlib.h>
42221828Sgrehan#include <stdint.h>
43221828Sgrehan#include <string.h>
44221828Sgrehan#include <strings.h>
45221828Sgrehan#include <unistd.h>
46221828Sgrehan#include <assert.h>
47221828Sgrehan#include <md5.h>
48221828Sgrehan#include <pthread.h>
49221828Sgrehan
50221828Sgrehan#include "fbsdrun.h"
51221828Sgrehan#include "pci_emul.h"
52221828Sgrehan#include "mevent.h"
53221828Sgrehan#include "virtio.h"
54221828Sgrehan
55221828Sgrehan#define VTNET_RINGSZ	256
56221828Sgrehan
57221828Sgrehan#define VTNET_MAXSEGS	32
58221828Sgrehan
59221828Sgrehan/*
60221828Sgrehan * PCI config-space register offsets
61221828Sgrehan */
62221828Sgrehan#define VTNET_R_CFG0	       20
63221828Sgrehan#define VTNET_R_CFG1	       21
64221828Sgrehan#define VTNET_R_CFG2	       22
65221828Sgrehan#define VTNET_R_CFG3	       23
66221828Sgrehan#define VTNET_R_CFG4	       24
67221828Sgrehan#define VTNET_R_CFG5	       25
68221828Sgrehan#define VTNET_R_CFG6	       26
69221828Sgrehan#define VTNET_R_CFG7	       27
70221828Sgrehan#define VTNET_R_MAX	       27
71221828Sgrehan
72221828Sgrehan#define VTNET_REGSZ		VTNET_R_MAX+1
73221828Sgrehan
74221828Sgrehan/*
75221828Sgrehan * Host capabilities
76221828Sgrehan */
77221828Sgrehan#define VTNET_S_HOSTCAPS      \
78221828Sgrehan  ( 0x00000020 |	/* host supplies MAC */ \
79221828Sgrehan    0x00008000 |	/* host can merge Rx buffers */ \
80221828Sgrehan    0x00010000 )	/* config status available */
81221828Sgrehan
82221828Sgrehan/*
83221828Sgrehan * Queue definitions.
84221828Sgrehan */
85221828Sgrehan#define VTNET_RXQ	0
86221828Sgrehan#define VTNET_TXQ	1
87221828Sgrehan#define VTNET_CTLQ	2
88221828Sgrehan
89221828Sgrehan#define VTNET_MAXQ	3
90221828Sgrehan
91221828Sgrehanstruct vring_hqueue {
92221828Sgrehan	/* Internal state */
93221828Sgrehan	uint16_t	hq_size;
94221828Sgrehan	uint16_t	hq_cur_aidx;		/* trails behind 'avail_idx' */
95221828Sgrehan
96221828Sgrehan	 /* Host-context pointers to the queue */
97221828Sgrehan	struct virtio_desc *hq_dtable;
98221828Sgrehan	uint16_t	*hq_avail_flags;
99221828Sgrehan	uint16_t	*hq_avail_idx;		/* monotonically increasing */
100221828Sgrehan	uint16_t	*hq_avail_ring;
101221828Sgrehan
102221828Sgrehan	uint16_t	*hq_used_flags;
103221828Sgrehan	uint16_t	*hq_used_idx;		/* monotonically increasing */
104221828Sgrehan	struct virtio_used *hq_used_ring;
105221828Sgrehan};
106221828Sgrehan
107221828Sgrehan/*
108221828Sgrehan * Fixed network header size
109221828Sgrehan */
110221828Sgrehanstruct virtio_net_rxhdr {
111221828Sgrehan	uint8_t		vrh_flags;
112221828Sgrehan	uint8_t		vrh_gso_type;
113221828Sgrehan	uint16_t	vrh_hdr_len;
114221828Sgrehan	uint16_t	vrh_gso_size;
115221828Sgrehan	uint16_t	vrh_csum_start;
116221828Sgrehan	uint16_t	vrh_csum_offset;
117221828Sgrehan	uint16_t	vrh_bufs;
118221828Sgrehan} __packed;
119221828Sgrehan
120221828Sgrehan/*
121221828Sgrehan * Debug printf
122221828Sgrehan */
123221828Sgrehanstatic int pci_vtnet_debug;
124221828Sgrehan#define DPRINTF(params) if (pci_vtnet_debug) printf params
125221828Sgrehan#define WPRINTF(params) printf params
126221828Sgrehan
127221828Sgrehan/*
128221828Sgrehan * Per-device softc
129221828Sgrehan */
130221828Sgrehanstruct pci_vtnet_softc {
131221828Sgrehan	struct pci_devinst *vsc_pi;
132221828Sgrehan	pthread_mutex_t vsc_mtx;
133221828Sgrehan	struct mevent	*vsc_mevp;
134221828Sgrehan
135221828Sgrehan	int		vsc_curq;
136221828Sgrehan	int		vsc_status;
137221828Sgrehan	int		vsc_isr;
138221828Sgrehan	int		vsc_tapfd;
139221828Sgrehan	int		vsc_rx_ready;
140221828Sgrehan	int		vsc_rxpend;
141221828Sgrehan
142221828Sgrehan	uint32_t	vsc_features;
143221828Sgrehan	uint8_t		vsc_macaddr[6];
144221828Sgrehan
145221828Sgrehan	uint64_t	vsc_pfn[VTNET_MAXQ];
146221828Sgrehan	struct	vring_hqueue vsc_hq[VTNET_MAXQ];
147221828Sgrehan};
148221828Sgrehan
149221828Sgrehan/*
150221828Sgrehan * Return the number of available descriptors in the vring taking care
151221828Sgrehan * of the 16-bit index wraparound.
152221828Sgrehan */
153221828Sgrehanstatic int
154221828Sgrehanhq_num_avail(struct vring_hqueue *hq)
155221828Sgrehan{
156221828Sgrehan	int ndesc;
157221828Sgrehan
158221828Sgrehan	if (*hq->hq_avail_idx >= hq->hq_cur_aidx)
159221828Sgrehan		ndesc = *hq->hq_avail_idx - hq->hq_cur_aidx;
160221828Sgrehan	else
161221828Sgrehan		ndesc = UINT16_MAX - hq->hq_cur_aidx + *hq->hq_avail_idx + 1;
162221828Sgrehan
163221828Sgrehan	assert(ndesc >= 0 && ndesc <= hq->hq_size);
164221828Sgrehan
165221828Sgrehan	return (ndesc);
166221828Sgrehan}
167221828Sgrehan
168221828Sgrehanstatic uint16_t
169221828Sgrehanpci_vtnet_qsize(int qnum)
170221828Sgrehan{
171221828Sgrehan	/* XXX no ctl queue currently */
172221828Sgrehan	if (qnum == VTNET_CTLQ) {
173221828Sgrehan		return (0);
174221828Sgrehan	}
175221828Sgrehan
176221828Sgrehan	/* XXX fixed currently. Maybe different for tx/rx/ctl */
177221828Sgrehan	return (VTNET_RINGSZ);
178221828Sgrehan}
179221828Sgrehan
180221828Sgrehanstatic void
181221828Sgrehanpci_vtnet_update_status(struct pci_vtnet_softc *sc, uint32_t value)
182221828Sgrehan{
183221828Sgrehan	if (value == 0) {
184221828Sgrehan		DPRINTF(("vtnet: device reset requested !\n"));
185221828Sgrehan	}
186221828Sgrehan
187221828Sgrehan	sc->vsc_status = value;
188221828Sgrehan}
189221828Sgrehan
190221828Sgrehan/*
191221828Sgrehan * Called to send a buffer chain out to the tap device
192221828Sgrehan */
193221828Sgrehanstatic void
194221828Sgrehanpci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
195221828Sgrehan		 int len)
196221828Sgrehan{
197221828Sgrehan	char pad[60];
198221828Sgrehan
199221828Sgrehan	if (sc->vsc_tapfd == -1)
200221828Sgrehan		return;
201221828Sgrehan
202221828Sgrehan	/*
203221828Sgrehan	 * If the length is < 60, pad out to that and add the
204221828Sgrehan	 * extra zero'd segment to the iov. It is guaranteed that
205221828Sgrehan	 * there is always an extra iov available by the caller.
206221828Sgrehan	 */
207221828Sgrehan	if (len < 60) {
208221828Sgrehan		memset(pad, 0, 60 - len);
209221828Sgrehan		iov[iovcnt].iov_base = pad;
210221828Sgrehan		iov[iovcnt].iov_len = 60 - len;
211221828Sgrehan		iovcnt++;
212221828Sgrehan	}
213221828Sgrehan	(void) writev(sc->vsc_tapfd, iov, iovcnt);
214221828Sgrehan}
215221828Sgrehan
216221828Sgrehan/*
217221828Sgrehan *  Called when there is read activity on the tap file descriptor.
218221828Sgrehan * Each buffer posted by the guest is assumed to be able to contain
219221828Sgrehan * an entire ethernet frame + rx header.
220221828Sgrehan *  MP note: the dummybuf is only used for discarding frames, so there
221221828Sgrehan * is no need for it to be per-vtnet or locked.
222221828Sgrehan */
223221828Sgrehanstatic uint8_t dummybuf[2048];
224221828Sgrehan
225221828Sgrehanstatic void
226221828Sgrehanpci_vtnet_tap_rx(struct pci_vtnet_softc *sc)
227221828Sgrehan{
228221828Sgrehan	struct virtio_desc *vd;
229221828Sgrehan	struct virtio_used *vu;
230221828Sgrehan	struct vring_hqueue *hq;
231221828Sgrehan	struct virtio_net_rxhdr *vrx;
232221828Sgrehan	uint8_t *buf;
233221828Sgrehan	int i;
234221828Sgrehan	int len;
235221828Sgrehan	int ndescs;
236221828Sgrehan	int didx, uidx, aidx;	/* descriptor, avail and used index */
237221828Sgrehan
238221828Sgrehan	/*
239221828Sgrehan	 * Should never be called without a valid tap fd
240221828Sgrehan	 */
241221828Sgrehan	assert(sc->vsc_tapfd != -1);
242221828Sgrehan
243221828Sgrehan	/*
244221828Sgrehan	 * But, will be called when the rx ring hasn't yet
245221828Sgrehan	 * been set up.
246221828Sgrehan	 */
247221828Sgrehan	if (sc->vsc_rx_ready == 0) {
248221828Sgrehan		/*
249221828Sgrehan		 * Drop the packet and try later.
250221828Sgrehan		 */
251221828Sgrehan		(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
252221828Sgrehan		return;
253221828Sgrehan	}
254221828Sgrehan
255221828Sgrehan	/*
256221828Sgrehan	 * Calculate the number of available rx buffers
257221828Sgrehan	 */
258221828Sgrehan	hq = &sc->vsc_hq[VTNET_RXQ];
259221828Sgrehan
260221828Sgrehan	ndescs = hq_num_avail(hq);
261221828Sgrehan
262221828Sgrehan	if (ndescs == 0) {
263221828Sgrehan		/*
264221828Sgrehan		 * Need to wait for host notification to read
265221828Sgrehan		 */
266221828Sgrehan		if (sc->vsc_rxpend == 0) {
267221828Sgrehan			WPRINTF(("vtnet: no rx descriptors !\n"));
268221828Sgrehan			sc->vsc_rxpend = 1;
269221828Sgrehan		}
270221828Sgrehan
271221828Sgrehan		/*
272221828Sgrehan		 * Drop the packet and try later
273221828Sgrehan		 */
274221828Sgrehan		(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
275221828Sgrehan		return;
276221828Sgrehan	}
277221828Sgrehan
278221828Sgrehan	aidx = hq->hq_cur_aidx;
279221828Sgrehan	uidx = *hq->hq_used_idx;
280221828Sgrehan	for (i = 0; i < ndescs; i++) {
281221828Sgrehan		/*
282221828Sgrehan		 * 'aidx' indexes into the an array of descriptor indexes
283221828Sgrehan		 */
284221828Sgrehan		didx = hq->hq_avail_ring[aidx % hq->hq_size];
285221828Sgrehan		assert(didx >= 0 && didx < hq->hq_size);
286221828Sgrehan
287221828Sgrehan		vd = &hq->hq_dtable[didx];
288221828Sgrehan
289221828Sgrehan		/*
290221828Sgrehan		 * Get a pointer to the rx header, and use the
291221828Sgrehan		 * data immediately following it for the packet buffer.
292221828Sgrehan		 */
293221828Sgrehan		vrx = (struct virtio_net_rxhdr *)paddr_guest2host(vd->vd_addr);
294221828Sgrehan		buf = (uint8_t *)(vrx + 1);
295221828Sgrehan
296221828Sgrehan		len = read(sc->vsc_tapfd, buf,
297221828Sgrehan			   vd->vd_len - sizeof(struct virtio_net_rxhdr));
298221828Sgrehan
299221828Sgrehan		if (len < 0 && errno == EWOULDBLOCK) {
300221828Sgrehan			break;
301221828Sgrehan		}
302221828Sgrehan
303221828Sgrehan		/*
304221828Sgrehan		 * The only valid field in the rx packet header is the
305221828Sgrehan		 * number of buffers, which is always 1 without TSO
306221828Sgrehan		 * support.
307221828Sgrehan		 */
308221828Sgrehan		memset(vrx, 0, sizeof(struct virtio_net_rxhdr));
309221828Sgrehan		vrx->vrh_bufs = 1;
310221828Sgrehan
311221828Sgrehan		/*
312221828Sgrehan		 * Write this descriptor into the used ring
313221828Sgrehan		 */
314221828Sgrehan		vu = &hq->hq_used_ring[uidx % hq->hq_size];
315221828Sgrehan		vu->vu_idx = didx;
316221828Sgrehan		vu->vu_tlen = len + sizeof(struct virtio_net_rxhdr);
317221828Sgrehan		uidx++;
318221828Sgrehan		aidx++;
319221828Sgrehan	}
320221828Sgrehan
321221828Sgrehan	/*
322221828Sgrehan	 * Update the used pointer, and signal an interrupt if allowed
323221828Sgrehan	 */
324221828Sgrehan	*hq->hq_used_idx = uidx;
325221828Sgrehan	hq->hq_cur_aidx = aidx;
326221828Sgrehan
327221828Sgrehan	if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) {
328221828Sgrehan		sc->vsc_isr |= 1;
329221828Sgrehan		pci_generate_msi(sc->vsc_pi, 0);
330221828Sgrehan	}
331221828Sgrehan}
332221828Sgrehan
333221828Sgrehanstatic void
334221828Sgrehanpci_vtnet_tap_callback(int fd, enum ev_type type, void *param)
335221828Sgrehan{
336221828Sgrehan	struct pci_vtnet_softc *sc = param;
337221828Sgrehan
338221828Sgrehan	pthread_mutex_lock(&sc->vsc_mtx);
339221828Sgrehan	pci_vtnet_tap_rx(sc);
340221828Sgrehan	pthread_mutex_unlock(&sc->vsc_mtx);
341221828Sgrehan
342221828Sgrehan}
343221828Sgrehan
344221828Sgrehanstatic void
345221828Sgrehanpci_vtnet_ping_rxq(struct pci_vtnet_softc *sc)
346221828Sgrehan{
347221828Sgrehan	/*
348221828Sgrehan	 * A qnotify means that the rx process can now begin
349221828Sgrehan	 */
350221828Sgrehan	if (sc->vsc_rx_ready == 0) {
351221828Sgrehan		sc->vsc_rx_ready = 1;
352221828Sgrehan	}
353221828Sgrehan
354221828Sgrehan	/*
355221828Sgrehan	 * If the rx queue was empty, attempt to receive a
356221828Sgrehan	 * packet that was previously blocked due to no rx bufs
357221828Sgrehan	 * available
358221828Sgrehan	 */
359221828Sgrehan	if (sc->vsc_rxpend) {
360221828Sgrehan		WPRINTF(("vtnet: rx resumed\n\r"));
361221828Sgrehan		sc->vsc_rxpend = 0;
362221828Sgrehan		pci_vtnet_tap_rx(sc);
363221828Sgrehan	}
364221828Sgrehan}
365221828Sgrehan
366221828Sgrehanstatic void
367221828Sgrehanpci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vring_hqueue *hq)
368221828Sgrehan{
369221828Sgrehan	struct iovec iov[VTNET_MAXSEGS + 1];
370221828Sgrehan	struct virtio_desc *vd;
371221828Sgrehan	struct virtio_used *vu;
372221828Sgrehan	int i;
373221828Sgrehan	int plen;
374221828Sgrehan	int tlen;
375221828Sgrehan	int uidx, aidx, didx;
376221828Sgrehan
377221828Sgrehan	uidx = *hq->hq_used_idx;
378221828Sgrehan	aidx = hq->hq_cur_aidx;
379221828Sgrehan	didx = hq->hq_avail_ring[aidx % hq->hq_size];
380221828Sgrehan	assert(didx >= 0 && didx < hq->hq_size);
381221828Sgrehan
382221828Sgrehan	vd = &hq->hq_dtable[didx];
383221828Sgrehan
384221828Sgrehan	/*
385221828Sgrehan	 * Run through the chain of descriptors, ignoring the
386221828Sgrehan	 * first header descriptor. However, include the header
387221828Sgrehan	 * length in the total length that will be put into the
388221828Sgrehan	 * used queue.
389221828Sgrehan	 */
390221828Sgrehan	tlen = vd->vd_len;
391221828Sgrehan	vd = &hq->hq_dtable[vd->vd_next];
392221828Sgrehan
393221828Sgrehan	for (i = 0, plen = 0;
394221828Sgrehan	     i < VTNET_MAXSEGS;
395221828Sgrehan	     i++, vd = &hq->hq_dtable[vd->vd_next]) {
396221828Sgrehan		iov[i].iov_base = paddr_guest2host(vd->vd_addr);
397221828Sgrehan		iov[i].iov_len = vd->vd_len;
398221828Sgrehan		plen += vd->vd_len;
399221828Sgrehan		tlen += vd->vd_len;
400221828Sgrehan
401221828Sgrehan		if ((vd->vd_flags & VRING_DESC_F_NEXT) == 0)
402221828Sgrehan			break;
403221828Sgrehan	}
404221828Sgrehan	assert(i < VTNET_MAXSEGS);
405221828Sgrehan
406221828Sgrehan	DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, i + 1));
407221828Sgrehan	pci_vtnet_tap_tx(sc, iov, i + 1, plen);
408221828Sgrehan
409221828Sgrehan	/*
410221828Sgrehan	 * Return this chain back to the host
411221828Sgrehan	 */
412221828Sgrehan	vu = &hq->hq_used_ring[uidx % hq->hq_size];
413221828Sgrehan	vu->vu_idx = didx;
414221828Sgrehan	vu->vu_tlen = tlen;
415221828Sgrehan	hq->hq_cur_aidx = aidx + 1;
416221828Sgrehan	*hq->hq_used_idx = uidx + 1;
417221828Sgrehan
418221828Sgrehan	/*
419221828Sgrehan	 * Generate an interrupt if able
420221828Sgrehan	 */
421221828Sgrehan	if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) {
422221828Sgrehan		sc->vsc_isr |= 1;
423221828Sgrehan		pci_generate_msi(sc->vsc_pi, 0);
424221828Sgrehan	}
425221828Sgrehan}
426221828Sgrehan
427221828Sgrehanstatic void
428221828Sgrehanpci_vtnet_ping_txq(struct pci_vtnet_softc *sc)
429221828Sgrehan{
430221828Sgrehan	struct vring_hqueue *hq = &sc->vsc_hq[VTNET_TXQ];
431221828Sgrehan	int i;
432221828Sgrehan	int ndescs;
433221828Sgrehan
434221828Sgrehan	/*
435221828Sgrehan	 * Calculate number of ring entries to process
436221828Sgrehan	 */
437221828Sgrehan	ndescs = hq_num_avail(hq);
438221828Sgrehan
439221828Sgrehan	if (ndescs == 0)
440221828Sgrehan		return;
441221828Sgrehan
442221828Sgrehan	/*
443221828Sgrehan	 * Run through all the entries, placing them into iovecs and
444221828Sgrehan	 * sending when an end-of-packet is found
445221828Sgrehan	 */
446221828Sgrehan	for (i = 0; i < ndescs; i++)
447221828Sgrehan		pci_vtnet_proctx(sc, hq);
448221828Sgrehan}
449221828Sgrehan
450221828Sgrehanstatic void
451221828Sgrehanpci_vtnet_ping_ctlq(struct pci_vtnet_softc *sc)
452221828Sgrehan{
453221828Sgrehan
454221828Sgrehan	DPRINTF(("vtnet: control qnotify!\n\r"));
455221828Sgrehan}
456221828Sgrehan
457221828Sgrehanstatic void
458221828Sgrehanpci_vtnet_ring_init(struct pci_vtnet_softc *sc, uint64_t pfn)
459221828Sgrehan{
460221828Sgrehan	struct vring_hqueue *hq;
461221828Sgrehan	int qnum = sc->vsc_curq;
462221828Sgrehan
463221828Sgrehan	assert(qnum < VTNET_MAXQ);
464221828Sgrehan
465221828Sgrehan	sc->vsc_pfn[qnum] = pfn << VRING_PFN;
466221828Sgrehan
467221828Sgrehan	/*
468221828Sgrehan	 * Set up host pointers to the various parts of the
469221828Sgrehan	 * queue
470221828Sgrehan	 */
471221828Sgrehan	hq = &sc->vsc_hq[qnum];
472221828Sgrehan	hq->hq_size = pci_vtnet_qsize(qnum);
473221828Sgrehan
474221828Sgrehan	hq->hq_dtable = paddr_guest2host(pfn << VRING_PFN);
475221828Sgrehan	hq->hq_avail_flags =  (uint16_t *)(hq->hq_dtable + hq->hq_size);
476221828Sgrehan	hq->hq_avail_idx = hq->hq_avail_flags + 1;
477221828Sgrehan	hq->hq_avail_ring = hq->hq_avail_flags + 2;
478221828Sgrehan	hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring,
479221828Sgrehan						 VRING_ALIGN);
480221828Sgrehan	hq->hq_used_idx = hq->hq_used_flags + 1;
481221828Sgrehan	hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2);
482221828Sgrehan
483221828Sgrehan	/*
484221828Sgrehan	 * Initialize queue indexes
485221828Sgrehan	 */
486221828Sgrehan	hq->hq_cur_aidx = 0;
487221828Sgrehan}
488221828Sgrehan
489221828Sgrehanstatic int
490221828Sgrehanpci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
491221828Sgrehan{
492221828Sgrehan	MD5_CTX mdctx;
493221828Sgrehan	unsigned char digest[16];
494221828Sgrehan	char nstr[80];
495221828Sgrehan	struct pci_vtnet_softc *sc;
496221828Sgrehan
497221828Sgrehan	/*
498221828Sgrehan	 * Access to guest memory is required. Fail if
499221828Sgrehan	 * memory not mapped
500221828Sgrehan	 */
501221828Sgrehan	if (paddr_guest2host(0) == NULL)
502221828Sgrehan		return (1);
503221828Sgrehan
504221828Sgrehan	sc = malloc(sizeof(struct pci_vtnet_softc));
505221828Sgrehan	memset(sc, 0, sizeof(struct pci_vtnet_softc));
506221828Sgrehan
507221828Sgrehan	pi->pi_arg = sc;
508221828Sgrehan	sc->vsc_pi = pi;
509221828Sgrehan
510221828Sgrehan	pthread_mutex_init(&sc->vsc_mtx, NULL);
511221828Sgrehan
512221828Sgrehan	/*
513221828Sgrehan	 * Attempt to open the tap device
514221828Sgrehan	 */
515221828Sgrehan	sc->vsc_tapfd = -1;
516221828Sgrehan	if (opts != NULL) {
517221828Sgrehan		char tbuf[80];
518221828Sgrehan
519221828Sgrehan		strcpy(tbuf, "/dev/");
520221828Sgrehan		strncat(tbuf, opts, sizeof(tbuf) - strlen(tbuf));
521221828Sgrehan
522221828Sgrehan		sc->vsc_tapfd = open(tbuf, O_RDWR);
523221828Sgrehan		if (sc->vsc_tapfd == -1) {
524221828Sgrehan			WPRINTF(("open of tap device %s failed\n", tbuf));
525221828Sgrehan		} else {
526221828Sgrehan			/*
527221828Sgrehan			 * Set non-blocking and register for read
528221828Sgrehan			 * notifications with the event loop
529221828Sgrehan			 */
530221828Sgrehan			int opt = 1;
531221828Sgrehan			if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
532221828Sgrehan				WPRINTF(("tap device O_NONBLOCK failed\n"));
533221828Sgrehan				close(sc->vsc_tapfd);
534221828Sgrehan				sc->vsc_tapfd = -1;
535221828Sgrehan			}
536221828Sgrehan
537221828Sgrehan			sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
538221828Sgrehan						  EVF_READ,
539221828Sgrehan						  pci_vtnet_tap_callback,
540221828Sgrehan						  sc);
541221828Sgrehan			if (sc->vsc_mevp == NULL) {
542221828Sgrehan				WPRINTF(("Could not register event\n"));
543221828Sgrehan				close(sc->vsc_tapfd);
544221828Sgrehan				sc->vsc_tapfd = -1;
545221828Sgrehan			}
546221828Sgrehan		}
547221828Sgrehan	}
548221828Sgrehan
549221828Sgrehan	/*
550221828Sgrehan	 * The MAC address is the standard NetApp OUI of 00-a0-98,
551221828Sgrehan	 * followed by an MD5 of the vm name. The slot number is
552221828Sgrehan	 * prepended to this for slots other than 1, so that
553221828Sgrehan	 * CFE can netboot from the equivalent of slot 1.
554221828Sgrehan	 */
555221828Sgrehan	if (pi->pi_slot == 1) {
556221828Sgrehan		strncpy(nstr, vmname, sizeof(nstr));
557221828Sgrehan	} else {
558221828Sgrehan		snprintf(nstr, sizeof(nstr), "%d-%s", pi->pi_slot, vmname);
559221828Sgrehan	}
560221828Sgrehan
561221828Sgrehan	MD5Init(&mdctx);
562221828Sgrehan	MD5Update(&mdctx, nstr, strlen(nstr));
563221828Sgrehan	MD5Final(digest, &mdctx);
564221828Sgrehan
565221828Sgrehan	sc->vsc_macaddr[0] = 0x00;
566221828Sgrehan	sc->vsc_macaddr[1] = 0xa0;
567221828Sgrehan	sc->vsc_macaddr[2] = 0x98;
568221828Sgrehan	sc->vsc_macaddr[3] = digest[0];
569221828Sgrehan	sc->vsc_macaddr[4] = digest[1];
570221828Sgrehan	sc->vsc_macaddr[5] = digest[2];
571221828Sgrehan
572221828Sgrehan	/* initialize config space */
573221828Sgrehan	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET);
574221828Sgrehan	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
575221828Sgrehan	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
576221828Sgrehan	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
577221828Sgrehan	pci_emul_alloc_bar(pi, 0, 0, PCIBAR_IO, VTNET_REGSZ);
578221828Sgrehan	pci_emul_add_msicap(pi, 1);
579221828Sgrehan
580221828Sgrehan	return (0);
581221828Sgrehan}
582221828Sgrehan
583221828Sgrehan/*
584221828Sgrehan * Function pointer array to handle queue notifications
585221828Sgrehan */
586221828Sgrehanstatic void (*pci_vtnet_qnotify[VTNET_MAXQ])(struct pci_vtnet_softc *) = {
587221828Sgrehan	pci_vtnet_ping_rxq,
588221828Sgrehan	pci_vtnet_ping_txq,
589221828Sgrehan	pci_vtnet_ping_ctlq
590221828Sgrehan};
591221828Sgrehan
592221828Sgrehanstatic void
593221828Sgrehanpci_vtnet_write(struct pci_devinst *pi, int baridx, int offset, int size,
594221828Sgrehan		uint32_t value)
595221828Sgrehan{
596221828Sgrehan	struct pci_vtnet_softc *sc = pi->pi_arg;
597222830Sgrehan	void *ptr;
598222830Sgrehan
599221828Sgrehan	if (offset + size > VTNET_REGSZ) {
600221828Sgrehan		DPRINTF(("vtnet_write: 2big, offset %d size %d\n",
601221828Sgrehan			 offset, size));
602221828Sgrehan		return;
603221828Sgrehan	}
604221828Sgrehan
605221828Sgrehan	pthread_mutex_lock(&sc->vsc_mtx);
606221828Sgrehan
607221828Sgrehan	switch (offset) {
608221828Sgrehan	case VTCFG_R_GUESTCAP:
609221828Sgrehan		assert(size == 4);
610221828Sgrehan		sc->vsc_features = value & VTNET_S_HOSTCAPS;
611221828Sgrehan		break;
612221828Sgrehan	case VTCFG_R_PFN:
613221828Sgrehan		assert(size == 4);
614221828Sgrehan		pci_vtnet_ring_init(sc, value);
615221828Sgrehan		break;
616221828Sgrehan	case VTCFG_R_QSEL:
617221828Sgrehan		assert(size == 2);
618221828Sgrehan		assert(value < VTNET_MAXQ);
619221828Sgrehan		sc->vsc_curq = value;
620221828Sgrehan		break;
621221828Sgrehan	case VTCFG_R_QNOTIFY:
622221828Sgrehan		assert(size == 2);
623221828Sgrehan		assert(value < VTNET_MAXQ);
624221828Sgrehan		(*pci_vtnet_qnotify[value])(sc);
625221828Sgrehan		break;
626221828Sgrehan	case VTCFG_R_STATUS:
627221828Sgrehan		assert(size == 1);
628221828Sgrehan		pci_vtnet_update_status(sc, value);
629221828Sgrehan		break;
630221828Sgrehan	case VTNET_R_CFG0:
631221828Sgrehan	case VTNET_R_CFG1:
632221828Sgrehan	case VTNET_R_CFG2:
633221828Sgrehan	case VTNET_R_CFG3:
634221828Sgrehan	case VTNET_R_CFG4:
635221828Sgrehan	case VTNET_R_CFG5:
636222830Sgrehan		assert((size + offset) <= (VTNET_R_CFG5 + 1));
637222830Sgrehan		ptr = &sc->vsc_macaddr[offset - VTNET_R_CFG0];
638221828Sgrehan		/*
639221828Sgrehan		 * The driver is allowed to change the MAC address
640221828Sgrehan		 */
641221828Sgrehan		sc->vsc_macaddr[offset - VTNET_R_CFG0] = value;
642222830Sgrehan		if (size == 1) {
643222830Sgrehan			*(uint8_t *) ptr = value;
644222830Sgrehan		} else if (size == 2) {
645222830Sgrehan			*(uint16_t *) ptr = value;
646222830Sgrehan		} else {
647222830Sgrehan			*(uint32_t *) ptr = value;
648222830Sgrehan		}
649221828Sgrehan		break;
650221828Sgrehan	case VTCFG_R_HOSTCAP:
651221828Sgrehan	case VTCFG_R_QNUM:
652221828Sgrehan	case VTCFG_R_ISR:
653221828Sgrehan	case VTNET_R_CFG6:
654221828Sgrehan	case VTNET_R_CFG7:
655221828Sgrehan		DPRINTF(("vtnet: write to readonly reg %d\n\r", offset));
656221828Sgrehan		break;
657221828Sgrehan	default:
658221828Sgrehan		DPRINTF(("vtnet: unknown i/o write offset %d\n\r", offset));
659221828Sgrehan		value = 0;
660221828Sgrehan		break;
661221828Sgrehan	}
662221828Sgrehan
663221828Sgrehan	pthread_mutex_unlock(&sc->vsc_mtx);
664221828Sgrehan}
665221828Sgrehan
666221828Sgrehanuint32_t
667221828Sgrehanpci_vtnet_read(struct pci_devinst *pi, int baridx, int offset, int size)
668221828Sgrehan{
669221828Sgrehan	struct pci_vtnet_softc *sc = pi->pi_arg;
670222830Sgrehan	void *ptr;
671221828Sgrehan	uint32_t value;
672221828Sgrehan
673221828Sgrehan	if (offset + size > VTNET_REGSZ) {
674221828Sgrehan		DPRINTF(("vtnet_read: 2big, offset %d size %d\n",
675221828Sgrehan			 offset, size));
676221828Sgrehan		return (0);
677221828Sgrehan	}
678221828Sgrehan
679221828Sgrehan	pthread_mutex_lock(&sc->vsc_mtx);
680221828Sgrehan
681221828Sgrehan	switch (offset) {
682221828Sgrehan	case VTCFG_R_HOSTCAP:
683221828Sgrehan		assert(size == 4);
684221828Sgrehan		value = VTNET_S_HOSTCAPS;
685221828Sgrehan		break;
686221828Sgrehan	case VTCFG_R_GUESTCAP:
687221828Sgrehan		assert(size == 4);
688221828Sgrehan		value = sc->vsc_features; /* XXX never read ? */
689221828Sgrehan		break;
690221828Sgrehan	case VTCFG_R_PFN:
691221828Sgrehan		assert(size == 4);
692221828Sgrehan		value = sc->vsc_pfn[sc->vsc_curq] >> VRING_PFN;
693221828Sgrehan		break;
694221828Sgrehan	case VTCFG_R_QNUM:
695221828Sgrehan		assert(size == 2);
696221828Sgrehan		value = pci_vtnet_qsize(sc->vsc_curq);
697221828Sgrehan		break;
698221828Sgrehan	case VTCFG_R_QSEL:
699221828Sgrehan		assert(size == 2);
700221828Sgrehan		value = sc->vsc_curq;  /* XXX never read ? */
701221828Sgrehan		break;
702221828Sgrehan	case VTCFG_R_QNOTIFY:
703221828Sgrehan		assert(size == 2);
704221828Sgrehan		value = sc->vsc_curq;  /* XXX never read ? */
705221828Sgrehan		break;
706221828Sgrehan	case VTCFG_R_STATUS:
707221828Sgrehan		assert(size == 1);
708221828Sgrehan		value = sc->vsc_status;
709221828Sgrehan		break;
710221828Sgrehan	case VTCFG_R_ISR:
711221828Sgrehan		assert(size == 1);
712221828Sgrehan		value = sc->vsc_isr;
713221828Sgrehan		sc->vsc_isr = 0;     /* a read clears this flag */
714221828Sgrehan		break;
715221828Sgrehan	case VTNET_R_CFG0:
716221828Sgrehan	case VTNET_R_CFG1:
717221828Sgrehan	case VTNET_R_CFG2:
718221828Sgrehan	case VTNET_R_CFG3:
719221828Sgrehan	case VTNET_R_CFG4:
720221828Sgrehan	case VTNET_R_CFG5:
721222830Sgrehan                assert((size + offset) <= (VTNET_R_CFG5 + 1));
722222830Sgrehan                ptr = &sc->vsc_macaddr[offset - VTNET_R_CFG0];
723222830Sgrehan                if (size == 1) {
724222830Sgrehan                        value = *(uint8_t *) ptr;
725222830Sgrehan                } else if (size == 2) {
726222830Sgrehan                        value = *(uint16_t *) ptr;
727222830Sgrehan                } else {
728222830Sgrehan                        value = *(uint32_t *) ptr;
729222830Sgrehan                }
730221828Sgrehan		break;
731221828Sgrehan	case VTNET_R_CFG6:
732222830Sgrehan		assert(size != 4);
733222830Sgrehan		value = 0x01; /* XXX link always up */
734221828Sgrehan		break;
735221828Sgrehan	case VTNET_R_CFG7:
736221828Sgrehan		assert(size == 1);
737222830Sgrehan		value = 0; /* XXX link status in LSB */
738221828Sgrehan		break;
739221828Sgrehan	default:
740221828Sgrehan		DPRINTF(("vtnet: unknown i/o read offset %d\n\r", offset));
741221828Sgrehan		value = 0;
742221828Sgrehan		break;
743221828Sgrehan	}
744221828Sgrehan
745221828Sgrehan	pthread_mutex_unlock(&sc->vsc_mtx);
746221828Sgrehan
747221828Sgrehan	return (value);
748221828Sgrehan}
749221828Sgrehan
750221828Sgrehanstruct pci_devemu pci_de_vnet = {
751221828Sgrehan	.pe_emu = "virtio-net",
752221828Sgrehan	.pe_init = pci_vtnet_init,
753221828Sgrehan	.pe_iow = pci_vtnet_write,
754221828Sgrehan	.pe_ior = pci_vtnet_read,
755221828Sgrehan};
756221828SgrehanPCI_EMUL_SET(pci_de_vnet);
757