t4_vf.c revision 318851
1/*-
2 * Copyright (c) 2016 Chelsio Communications, Inc.
3 * All rights reserved.
4 * Written by: John Baldwin <jhb@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/10/sys/dev/cxgbe/t4_vf.c 318851 2017-05-25 01:43:28Z np $");
30
31#include "opt_inet.h"
32#include "opt_inet6.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/bus.h>
37#include <sys/conf.h>
38#include <sys/counter.h>
39#include <sys/kernel.h>
40#include <sys/module.h>
41#include <sys/priv.h>
42#include <dev/pci/pcivar.h>
43#if defined(__i386__) || defined(__amd64__)
44#include <vm/vm.h>
45#include <vm/pmap.h>
46#endif
47
48#include "common/common.h"
49#include "common/t4_regs.h"
50#include "t4_ioctl.h"
51#include "t4_mp_ring.h"
52
53/*
54 * Some notes:
55 *
56 * The Virtual Interfaces are connected to an internal switch on the chip
57 * which allows VIs attached to the same port to talk to each other even when
58 * the port link is down.  As a result, we might want to always report a
59 * VF's link as being "up".
60 *
61 * XXX: Add a TUNABLE and possible per-device sysctl for this?
62 */
63
64struct intrs_and_queues {
65	uint16_t intr_type;	/* MSI, or MSI-X */
66	uint16_t nirq;		/* Total # of vectors */
67	uint16_t intr_flags_10g;/* Interrupt flags for each 10G port */
68	uint16_t intr_flags_1g;	/* Interrupt flags for each 1G port */
69	uint16_t ntxq10g;	/* # of NIC txq's for each 10G port */
70	uint16_t nrxq10g;	/* # of NIC rxq's for each 10G port */
71	uint16_t ntxq1g;	/* # of NIC txq's for each 1G port */
72	uint16_t nrxq1g;	/* # of NIC rxq's for each 1G port */
73};
74
75struct {
76	uint16_t device;
77	char *desc;
78} t4vf_pciids[] = {
79	{0x4800, "Chelsio T440-dbg VF"},
80	{0x4801, "Chelsio T420-CR VF"},
81	{0x4802, "Chelsio T422-CR VF"},
82	{0x4803, "Chelsio T440-CR VF"},
83	{0x4804, "Chelsio T420-BCH VF"},
84	{0x4805, "Chelsio T440-BCH VF"},
85	{0x4806, "Chelsio T440-CH VF"},
86	{0x4807, "Chelsio T420-SO VF"},
87	{0x4808, "Chelsio T420-CX VF"},
88	{0x4809, "Chelsio T420-BT VF"},
89	{0x480a, "Chelsio T404-BT VF"},
90	{0x480e, "Chelsio T440-LP-CR VF"},
91}, t5vf_pciids[] = {
92	{0x5800, "Chelsio T580-dbg VF"},
93	{0x5801,  "Chelsio T520-CR VF"},	/* 2 x 10G */
94	{0x5802,  "Chelsio T522-CR VF"},	/* 2 x 10G, 2 X 1G */
95	{0x5803,  "Chelsio T540-CR VF"},	/* 4 x 10G */
96	{0x5807,  "Chelsio T520-SO VF"},	/* 2 x 10G, nomem */
97	{0x5809,  "Chelsio T520-BT VF"},	/* 2 x 10GBaseT */
98	{0x580a,  "Chelsio T504-BT VF"},	/* 4 x 1G */
99	{0x580d,  "Chelsio T580-CR VF"},	/* 2 x 40G */
100	{0x580e,  "Chelsio T540-LP-CR VF"},	/* 4 x 10G */
101	{0x5810,  "Chelsio T580-LP-CR VF"},	/* 2 x 40G */
102	{0x5811,  "Chelsio T520-LL-CR VF"},	/* 2 x 10G */
103	{0x5812,  "Chelsio T560-CR VF"},	/* 1 x 40G, 2 x 10G */
104	{0x5814,  "Chelsio T580-LP-SO-CR VF"},	/* 2 x 40G, nomem */
105	{0x5815,  "Chelsio T502-BT VF"},	/* 2 x 1G */
106#ifdef notyet
107	{0x5804,  "Chelsio T520-BCH VF"},
108	{0x5805,  "Chelsio T540-BCH VF"},
109	{0x5806,  "Chelsio T540-CH VF"},
110	{0x5808,  "Chelsio T520-CX VF"},
111	{0x580b,  "Chelsio B520-SR VF"},
112	{0x580c,  "Chelsio B504-BT VF"},
113	{0x580f,  "Chelsio Amsterdam VF"},
114	{0x5813,  "Chelsio T580-CHR VF"},
115#endif
116}, t6vf_pciids[] = {
117	{0x6800, "Chelsio T6-DBG-25 VF"},	/* 2 x 10/25G, debug */
118	{0x6801, "Chelsio T6225-CR VF"},	/* 2 x 10/25G */
119	{0x6802, "Chelsio T6225-SO-CR VF"},	/* 2 x 10/25G, nomem */
120	{0x6803, "Chelsio T6425-CR VF"},	/* 4 x 10/25G */
121	{0x6804, "Chelsio T6425-SO-CR VF"},	/* 4 x 10/25G, nomem */
122	{0x6805, "Chelsio T6225-OCP-SO VF"},	/* 2 x 10/25G, nomem */
123	{0x6806, "Chelsio T62100-OCP-SO VF"},	/* 2 x 40/50/100G, nomem */
124	{0x6807, "Chelsio T62100-LP-CR VF"},	/* 2 x 40/50/100G */
125	{0x6808, "Chelsio T62100-SO-CR VF"},	/* 2 x 40/50/100G, nomem */
126	{0x6809, "Chelsio T6210-BT VF"},	/* 2 x 10GBASE-T */
127	{0x680d, "Chelsio T62100-CR VF"},	/* 2 x 40/50/100G */
128	{0x6810, "Chelsio T6-DBG-100 VF"},	/* 2 x 40/50/100G, debug */
129	{0x6811, "Chelsio T6225-LL-CR VF"},	/* 2 x 10/25G */
130	{0x6814, "Chelsio T61100-OCP-SO VF"},	/* 1 x 40/50/100G, nomem */
131	{0x6815, "Chelsio T6201-BT VF"},	/* 2 x 1000BASE-T */
132
133	/* Custom */
134	{0x6880, "Chelsio T6225 80 VF"},
135	{0x6881, "Chelsio T62100 81 VF"},
136};
137
138static d_ioctl_t t4vf_ioctl;
139
140static struct cdevsw t4vf_cdevsw = {
141       .d_version = D_VERSION,
142       .d_ioctl = t4vf_ioctl,
143       .d_name = "t4vf",
144};
145
146static int
147t4vf_probe(device_t dev)
148{
149	uint16_t d;
150	size_t i;
151
152	d = pci_get_device(dev);
153	for (i = 0; i < nitems(t4vf_pciids); i++) {
154		if (d == t4vf_pciids[i].device) {
155			device_set_desc(dev, t4vf_pciids[i].desc);
156			return (BUS_PROBE_DEFAULT);
157		}
158	}
159	return (ENXIO);
160}
161
162static int
163t5vf_probe(device_t dev)
164{
165	uint16_t d;
166	size_t i;
167
168	d = pci_get_device(dev);
169	for (i = 0; i < nitems(t5vf_pciids); i++) {
170		if (d == t5vf_pciids[i].device) {
171			device_set_desc(dev, t5vf_pciids[i].desc);
172			return (BUS_PROBE_DEFAULT);
173		}
174	}
175	return (ENXIO);
176}
177
178static int
179t6vf_probe(device_t dev)
180{
181	uint16_t d;
182	size_t i;
183
184	d = pci_get_device(dev);
185	for (i = 0; i < nitems(t6vf_pciids); i++) {
186		if (d == t6vf_pciids[i].device) {
187			device_set_desc(dev, t6vf_pciids[i].desc);
188			return (BUS_PROBE_DEFAULT);
189		}
190	}
191	return (ENXIO);
192}
193
194#define FW_PARAM_DEV(param) \
195	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
196	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
197#define FW_PARAM_PFVF(param) \
198	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
199	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param))
200
201static int
202get_params__pre_init(struct adapter *sc)
203{
204	int rc;
205	uint32_t param[3], val[3];
206
207	param[0] = FW_PARAM_DEV(FWREV);
208	param[1] = FW_PARAM_DEV(TPREV);
209	param[2] = FW_PARAM_DEV(CCLK);
210	rc = -t4vf_query_params(sc, nitems(param), param, val);
211	if (rc != 0) {
212		device_printf(sc->dev,
213		    "failed to query parameters (pre_init): %d.\n", rc);
214		return (rc);
215	}
216
217	sc->params.fw_vers = val[0];
218	sc->params.tp_vers = val[1];
219	sc->params.vpd.cclk = val[2];
220
221	snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u",
222	    G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers),
223	    G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers),
224	    G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers),
225	    G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers));
226
227	snprintf(sc->tp_version, sizeof(sc->tp_version), "%u.%u.%u.%u",
228	    G_FW_HDR_FW_VER_MAJOR(sc->params.tp_vers),
229	    G_FW_HDR_FW_VER_MINOR(sc->params.tp_vers),
230	    G_FW_HDR_FW_VER_MICRO(sc->params.tp_vers),
231	    G_FW_HDR_FW_VER_BUILD(sc->params.tp_vers));
232
233	return (0);
234}
235
236static int
237get_params__post_init(struct adapter *sc)
238{
239	int rc;
240
241	rc = -t4vf_get_sge_params(sc);
242	if (rc != 0) {
243		device_printf(sc->dev,
244		    "unable to retrieve adapter SGE parameters: %d\n", rc);
245		return (rc);
246	}
247
248	rc = -t4vf_get_rss_glb_config(sc);
249	if (rc != 0) {
250		device_printf(sc->dev,
251		    "unable to retrieve adapter RSS parameters: %d\n", rc);
252		return (rc);
253	}
254	if (sc->params.rss.mode != FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
255		device_printf(sc->dev,
256		    "unable to operate with global RSS mode %d\n",
257		    sc->params.rss.mode);
258		return (EINVAL);
259	}
260
261	rc = t4_read_chip_settings(sc);
262	if (rc != 0)
263		return (rc);
264
265	/*
266	 * Grab our Virtual Interface resource allocation, extract the
267	 * features that we're interested in and do a bit of sanity testing on
268	 * what we discover.
269	 */
270	rc = -t4vf_get_vfres(sc);
271	if (rc != 0) {
272		device_printf(sc->dev,
273		    "unable to get virtual interface resources: %d\n", rc);
274		return (rc);
275	}
276
277	/*
278	 * Check for various parameter sanity issues.
279	 */
280	if (sc->params.vfres.pmask == 0) {
281		device_printf(sc->dev, "no port access configured/usable!\n");
282		return (EINVAL);
283	}
284	if (sc->params.vfres.nvi == 0) {
285		device_printf(sc->dev,
286		    "no virtual interfaces configured/usable!\n");
287		return (EINVAL);
288	}
289	sc->params.portvec = sc->params.vfres.pmask;
290
291	return (0);
292}
293
294static int
295set_params__post_init(struct adapter *sc)
296{
297	uint32_t param, val;
298
299	/* ask for encapsulated CPLs */
300	param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP);
301	val = 1;
302	(void)t4vf_set_params(sc, 1, &param, &val);
303
304	return (0);
305}
306
307#undef FW_PARAM_PFVF
308#undef FW_PARAM_DEV
309
310static int
311cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g,
312    struct intrs_and_queues *iaq)
313{
314	struct vf_resources *vfres;
315	int nrxq10g, nrxq1g, nrxq;
316	int ntxq10g, ntxq1g, ntxq;
317	int itype, iq_avail, navail, rc;
318
319	/*
320	 * Figure out the layout of queues across our VIs and ensure
321	 * we can allocate enough interrupts for our layout.
322	 */
323	vfres = &sc->params.vfres;
324	bzero(iaq, sizeof(*iaq));
325
326	for (itype = INTR_MSIX; itype != 0; itype >>= 1) {
327		if (itype == INTR_INTX)
328			continue;
329
330		if (itype == INTR_MSIX)
331			navail = pci_msix_count(sc->dev);
332		else
333			navail = pci_msi_count(sc->dev);
334
335		if (navail == 0)
336			continue;
337
338		iaq->intr_type = itype;
339		iaq->intr_flags_10g = 0;
340		iaq->intr_flags_1g = 0;
341
342		/*
343		 * XXX: The Linux driver reserves an Ingress Queue for
344		 * forwarded interrupts when using MSI (but not MSI-X).
345		 * It seems it just always asks for 2 interrupts and
346		 * forwards all rxqs to the forwarded interrupt.
347		 *
348		 * We must reserve one IRQ for the for the firmware
349		 * event queue.
350		 *
351		 * Every rxq requires an ingress queue with a free
352		 * list and interrupts and an egress queue.  Every txq
353		 * requires an ETH egress queue.
354		 */
355		iaq->nirq = T4VF_EXTRA_INTR;
356
357		/*
358		 * First, determine how many queues we can allocate.
359		 * Start by finding the upper bound on rxqs from the
360		 * limit on ingress queues.
361		 */
362		iq_avail = vfres->niqflint - iaq->nirq;
363		if (iq_avail < n10g + n1g) {
364			device_printf(sc->dev,
365			    "Not enough ingress queues (%d) for %d ports\n",
366			    vfres->niqflint, n10g + n1g);
367			return (ENXIO);
368		}
369
370		/*
371		 * Try to honor the cap on interrupts.  If there aren't
372		 * enough interrupts for at least one interrupt per
373		 * port, then don't bother, we will just forward all
374		 * interrupts to one interrupt in that case.
375		 */
376		if (iaq->nirq + n10g + n1g <= navail) {
377			if (iq_avail > navail - iaq->nirq)
378				iq_avail = navail - iaq->nirq;
379		}
380
381		nrxq10g = t4_nrxq10g;
382		nrxq1g = t4_nrxq1g;
383		nrxq = n10g * nrxq10g + n1g * nrxq1g;
384		if (nrxq > iq_avail && nrxq1g > 1) {
385			/* Too many ingress queues.  Try just 1 for 1G. */
386			nrxq1g = 1;
387			nrxq = n10g * nrxq10g + n1g * nrxq1g;
388		}
389		if (nrxq > iq_avail) {
390			/*
391			 * Still too many ingress queues.  Use what we
392			 * can for each 10G port.
393			 */
394			nrxq10g = (iq_avail - n1g) / n10g;
395			nrxq = n10g * nrxq10g + n1g * nrxq1g;
396		}
397		KASSERT(nrxq <= iq_avail, ("too many ingress queues"));
398
399		/*
400		 * Next, determine the upper bound on txqs from the limit
401		 * on ETH queues.
402		 */
403		if (vfres->nethctrl < n10g + n1g) {
404			device_printf(sc->dev,
405			    "Not enough ETH queues (%d) for %d ports\n",
406			    vfres->nethctrl, n10g + n1g);
407			return (ENXIO);
408		}
409
410		ntxq10g = t4_ntxq10g;
411		ntxq1g = t4_ntxq1g;
412		ntxq = n10g * ntxq10g + n1g * ntxq1g;
413		if (ntxq > vfres->nethctrl) {
414			/* Too many ETH queues.  Try just 1 for 1G. */
415			ntxq1g = 1;
416			ntxq = n10g * ntxq10g + n1g * ntxq1g;
417		}
418		if (ntxq > vfres->nethctrl) {
419			/*
420			 * Still too many ETH queues.  Use what we
421			 * can for each 10G port.
422			 */
423			ntxq10g = (vfres->nethctrl - n1g) / n10g;
424			ntxq = n10g * ntxq10g + n1g * ntxq1g;
425		}
426		KASSERT(ntxq <= vfres->nethctrl, ("too many ETH queues"));
427
428		/*
429		 * Finally, ensure we have enough egress queues.
430		 */
431		if (vfres->neq < (n10g + n1g) * 2) {
432			device_printf(sc->dev,
433			    "Not enough egress queues (%d) for %d ports\n",
434			    vfres->neq, n10g + n1g);
435			return (ENXIO);
436		}
437		if (nrxq + ntxq > vfres->neq) {
438			/* Just punt and use 1 for everything. */
439			nrxq1g = ntxq1g = nrxq10g = ntxq10g = 1;
440			nrxq = n10g * nrxq10g + n1g * nrxq1g;
441			ntxq = n10g * ntxq10g + n1g * ntxq1g;
442		}
443		KASSERT(nrxq <= iq_avail, ("too many ingress queues"));
444		KASSERT(ntxq <= vfres->nethctrl, ("too many ETH queues"));
445		KASSERT(nrxq + ntxq <= vfres->neq, ("too many egress queues"));
446
447		/*
448		 * Do we have enough interrupts?  For MSI the interrupts
449		 * have to be a power of 2 as well.
450		 */
451		iaq->nirq += nrxq;
452		iaq->ntxq10g = ntxq10g;
453		iaq->ntxq1g = ntxq1g;
454		iaq->nrxq10g = nrxq10g;
455		iaq->nrxq1g = nrxq1g;
456		if (iaq->nirq <= navail &&
457		    (itype != INTR_MSI || powerof2(iaq->nirq))) {
458			navail = iaq->nirq;
459			if (itype == INTR_MSIX)
460				rc = pci_alloc_msix(sc->dev, &navail);
461			else
462				rc = pci_alloc_msi(sc->dev, &navail);
463			if (rc != 0) {
464				device_printf(sc->dev,
465		    "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n",
466				    itype, rc, iaq->nirq, navail);
467				return (rc);
468			}
469			if (navail == iaq->nirq) {
470				iaq->intr_flags_10g = INTR_RXQ;
471				iaq->intr_flags_1g = INTR_RXQ;
472				return (0);
473			}
474			pci_release_msi(sc->dev);
475		}
476
477		/* Fall back to a single interrupt. */
478		iaq->nirq = 1;
479		navail = iaq->nirq;
480		if (itype == INTR_MSIX)
481			rc = pci_alloc_msix(sc->dev, &navail);
482		else
483			rc = pci_alloc_msi(sc->dev, &navail);
484		if (rc != 0)
485			device_printf(sc->dev,
486		    "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n",
487			    itype, rc, iaq->nirq, navail);
488		iaq->intr_flags_10g = 0;
489		iaq->intr_flags_1g = 0;
490		return (rc);
491	}
492
493	device_printf(sc->dev,
494	    "failed to find a usable interrupt type.  "
495	    "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types,
496	    pci_msix_count(sc->dev), pci_msi_count(sc->dev));
497
498	return (ENXIO);
499}
500
501static int
502t4vf_attach(device_t dev)
503{
504	struct adapter *sc;
505	int rc = 0, i, j, n10g, n1g, rqidx, tqidx;
506	struct make_dev_args mda;
507	struct intrs_and_queues iaq;
508	struct sge *s;
509
510	sc = device_get_softc(dev);
511	sc->dev = dev;
512	pci_enable_busmaster(dev);
513	pci_set_max_read_req(dev, 4096);
514	sc->params.pci.mps = pci_get_max_payload(dev);
515
516	sc->flags |= IS_VF;
517
518	sc->sge_gts_reg = VF_SGE_REG(A_SGE_VF_GTS);
519	sc->sge_kdoorbell_reg = VF_SGE_REG(A_SGE_VF_KDOORBELL);
520	snprintf(sc->lockname, sizeof(sc->lockname), "%s",
521	    device_get_nameunit(dev));
522	mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF);
523	t4_add_adapter(sc);
524
525	mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF);
526	TAILQ_INIT(&sc->sfl);
527	callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0);
528
529	mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF);
530
531	rc = t4_map_bars_0_and_4(sc);
532	if (rc != 0)
533		goto done; /* error message displayed already */
534
535	rc = -t4vf_prep_adapter(sc);
536	if (rc != 0)
537		goto done;
538
539	t4_init_devnames(sc);
540	if (sc->names == NULL) {
541		rc = ENOTSUP;
542		goto done; /* error message displayed already */
543	}
544
545	/*
546	 * Leave the 'pf' and 'mbox' values as zero.  This ensures
547	 * that various firmware messages do not set the fields which
548	 * is the correct thing to do for a VF.
549	 */
550
551	memset(sc->chan_map, 0xff, sizeof(sc->chan_map));
552
553	make_dev_args_init(&mda);
554	mda.mda_devsw = &t4vf_cdevsw;
555	mda.mda_uid = UID_ROOT;
556	mda.mda_gid = GID_WHEEL;
557	mda.mda_mode = 0600;
558	mda.mda_si_drv1 = sc;
559	rc = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev));
560	if (rc != 0)
561		device_printf(dev, "failed to create nexus char device: %d.\n",
562		    rc);
563
564#if defined(__i386__)
565	if ((cpu_feature & CPUID_CX8) == 0) {
566		device_printf(dev, "64 bit atomics not available.\n");
567		rc = ENOTSUP;
568		goto done;
569	}
570#endif
571
572	/*
573	 * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
574	 * 2.6.31 and later we can't call pci_reset_function() in order to
575	 * issue an FLR because of a self- deadlock on the device semaphore.
576	 * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
577	 * cases where they're needed -- for instance, some versions of KVM
578	 * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
579	 * use the firmware based reset in order to reset any per function
580	 * state.
581	 */
582	rc = -t4vf_fw_reset(sc);
583	if (rc != 0) {
584		device_printf(dev, "FW reset failed: %d\n", rc);
585		goto done;
586	}
587	sc->flags |= FW_OK;
588
589	/*
590	 * Grab basic operational parameters.  These will predominantly have
591	 * been set up by the Physical Function Driver or will be hard coded
592	 * into the adapter.  We just have to live with them ...  Note that
593	 * we _must_ get our VPD parameters before our SGE parameters because
594	 * we need to know the adapter's core clock from the VPD in order to
595	 * properly decode the SGE Timer Values.
596	 */
597	rc = get_params__pre_init(sc);
598	if (rc != 0)
599		goto done; /* error message displayed already */
600	rc = get_params__post_init(sc);
601	if (rc != 0)
602		goto done; /* error message displayed already */
603
604	rc = set_params__post_init(sc);
605	if (rc != 0)
606		goto done; /* error message displayed already */
607
608	rc = t4_map_bar_2(sc);
609	if (rc != 0)
610		goto done; /* error message displayed already */
611
612	rc = t4_create_dma_tag(sc);
613	if (rc != 0)
614		goto done; /* error message displayed already */
615
616	/*
617	 * The number of "ports" which we support is equal to the number of
618	 * Virtual Interfaces with which we've been provisioned.
619	 */
620	sc->params.nports = imin(sc->params.vfres.nvi, MAX_NPORTS);
621
622	/*
623	 * We may have been provisioned with more VIs than the number of
624	 * ports we're allowed to access (our Port Access Rights Mask).
625	 * Just use a single VI for each port.
626	 */
627	sc->params.nports = imin(sc->params.nports,
628	    bitcount32(sc->params.vfres.pmask));
629
630#ifdef notyet
631	/*
632	 * XXX: The Linux VF driver will lower nports if it thinks there
633	 * are too few resources in vfres (niqflint, nethctrl, neq).
634	 */
635#endif
636
637	/*
638	 * First pass over all the ports - allocate VIs and initialize some
639	 * basic parameters like mac address, port type, etc.  We also figure
640	 * out whether a port is 10G or 1G and use that information when
641	 * calculating how many interrupts to attempt to allocate.
642	 */
643	n10g = n1g = 0;
644	for_each_port(sc, i) {
645		struct port_info *pi;
646
647		pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK);
648		sc->port[i] = pi;
649
650		/* These must be set before t4_port_init */
651		pi->adapter = sc;
652		pi->port_id = i;
653		pi->nvi = 1;
654		pi->vi = malloc(sizeof(struct vi_info) * pi->nvi, M_CXGBE,
655		    M_ZERO | M_WAITOK);
656
657		/*
658		 * Allocate the "main" VI and initialize parameters
659		 * like mac addr.
660		 */
661		rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i);
662		if (rc != 0) {
663			device_printf(dev, "unable to initialize port %d: %d\n",
664			    i, rc);
665			free(pi->vi, M_CXGBE);
666			free(pi, M_CXGBE);
667			sc->port[i] = NULL;
668			goto done;
669		}
670
671		/* No t4_link_start. */
672
673		snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d",
674		    device_get_nameunit(dev), i);
675		mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF);
676		sc->chan_map[pi->tx_chan] = i;
677
678		if (port_top_speed(pi) >= 10) {
679			n10g++;
680		} else {
681			n1g++;
682		}
683
684		pi->dev = device_add_child(dev, sc->names->vf_ifnet_name, -1);
685		if (pi->dev == NULL) {
686			device_printf(dev,
687			    "failed to add device for port %d.\n", i);
688			rc = ENXIO;
689			goto done;
690		}
691		pi->vi[0].dev = pi->dev;
692		device_set_softc(pi->dev, pi);
693	}
694
695	/*
696	 * Interrupt type, # of interrupts, # of rx/tx queues, etc.
697	 */
698	rc = cfg_itype_and_nqueues(sc, n10g, n1g, &iaq);
699	if (rc != 0)
700		goto done; /* error message displayed already */
701
702	sc->intr_type = iaq.intr_type;
703	sc->intr_count = iaq.nirq;
704
705	s = &sc->sge;
706	s->nrxq = n10g * iaq.nrxq10g + n1g * iaq.nrxq1g;
707	s->ntxq = n10g * iaq.ntxq10g + n1g * iaq.ntxq1g;
708	s->neq = s->ntxq + s->nrxq;	/* the free list in an rxq is an eq */
709	s->neq += sc->params.nports + 1;/* ctrl queues: 1 per port + 1 mgmt */
710	s->niq = s->nrxq + 1;		/* 1 extra for firmware event queue */
711
712	s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE,
713	    M_ZERO | M_WAITOK);
714	s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE,
715	    M_ZERO | M_WAITOK);
716	s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE,
717	    M_ZERO | M_WAITOK);
718	s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE,
719	    M_ZERO | M_WAITOK);
720
721	sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE,
722	    M_ZERO | M_WAITOK);
723
724	/*
725	 * Second pass over the ports.  This time we know the number of rx and
726	 * tx queues that each port should get.
727	 */
728	rqidx = tqidx = 0;
729	for_each_port(sc, i) {
730		struct port_info *pi = sc->port[i];
731		struct vi_info *vi;
732
733		if (pi == NULL)
734			continue;
735
736		for_each_vi(pi, j, vi) {
737			vi->pi = pi;
738			vi->qsize_rxq = t4_qsize_rxq;
739			vi->qsize_txq = t4_qsize_txq;
740
741			vi->first_rxq = rqidx;
742			vi->first_txq = tqidx;
743			if (port_top_speed(pi) >= 10) {
744				vi->tmr_idx = t4_tmr_idx_10g;
745				vi->pktc_idx = t4_pktc_idx_10g;
746				vi->flags |= iaq.intr_flags_10g & INTR_RXQ;
747				vi->nrxq = j == 0 ? iaq.nrxq10g : 1;
748				vi->ntxq = j == 0 ? iaq.ntxq10g : 1;
749			} else {
750				vi->tmr_idx = t4_tmr_idx_1g;
751				vi->pktc_idx = t4_pktc_idx_1g;
752				vi->flags |= iaq.intr_flags_1g & INTR_RXQ;
753				vi->nrxq = j == 0 ? iaq.nrxq1g : 1;
754				vi->ntxq = j == 0 ? iaq.ntxq1g : 1;
755			}
756			rqidx += vi->nrxq;
757			tqidx += vi->ntxq;
758
759			vi->rsrv_noflowq = 0;
760		}
761	}
762
763	rc = t4_setup_intr_handlers(sc);
764	if (rc != 0) {
765		device_printf(dev,
766		    "failed to setup interrupt handlers: %d\n", rc);
767		goto done;
768	}
769
770	rc = bus_generic_attach(dev);
771	if (rc != 0) {
772		device_printf(dev,
773		    "failed to attach all child ports: %d\n", rc);
774		goto done;
775	}
776
777	device_printf(dev,
778	    "%d ports, %d %s interrupt%s, %d eq, %d iq\n",
779	    sc->params.nports, sc->intr_count, sc->intr_type == INTR_MSIX ?
780	    "MSI-X" : "MSI", sc->intr_count > 1 ? "s" : "", sc->sge.neq,
781	    sc->sge.niq);
782
783done:
784	if (rc != 0)
785		t4_detach_common(dev);
786	else
787		t4_sysctls(sc);
788
789	return (rc);
790}
791
792static void
793get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf)
794{
795
796	/* 0x3f is used as the revision for VFs. */
797	regs->version = chip_id(sc) | (0x3f << 10);
798	t4_get_regs(sc, buf, regs->len);
799}
800
801static void
802t4_clr_vi_stats(struct adapter *sc)
803{
804	int reg;
805
806	for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L;
807	     reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4)
808		t4_write_reg(sc, VF_MPS_REG(reg), 0);
809}
810
811static int
812t4vf_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
813    struct thread *td)
814{
815	int rc;
816	struct adapter *sc = dev->si_drv1;
817
818	rc = priv_check(td, PRIV_DRIVER);
819	if (rc != 0)
820		return (rc);
821
822	switch (cmd) {
823	case CHELSIO_T4_GETREG: {
824		struct t4_reg *edata = (struct t4_reg *)data;
825
826		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
827			return (EFAULT);
828
829		if (edata->size == 4)
830			edata->val = t4_read_reg(sc, edata->addr);
831		else if (edata->size == 8)
832			edata->val = t4_read_reg64(sc, edata->addr);
833		else
834			return (EINVAL);
835
836		break;
837	}
838	case CHELSIO_T4_SETREG: {
839		struct t4_reg *edata = (struct t4_reg *)data;
840
841		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
842			return (EFAULT);
843
844		if (edata->size == 4) {
845			if (edata->val & 0xffffffff00000000)
846				return (EINVAL);
847			t4_write_reg(sc, edata->addr, (uint32_t) edata->val);
848		} else if (edata->size == 8)
849			t4_write_reg64(sc, edata->addr, edata->val);
850		else
851			return (EINVAL);
852		break;
853	}
854	case CHELSIO_T4_REGDUMP: {
855		struct t4_regdump *regs = (struct t4_regdump *)data;
856		int reglen = t4_get_regs_len(sc);
857		uint8_t *buf;
858
859		if (regs->len < reglen) {
860			regs->len = reglen; /* hint to the caller */
861			return (ENOBUFS);
862		}
863
864		regs->len = reglen;
865		buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO);
866		get_regs(sc, regs, buf);
867		rc = copyout(buf, regs->data, reglen);
868		free(buf, M_CXGBE);
869		break;
870	}
871	case CHELSIO_T4_CLEAR_STATS: {
872		int i, v;
873		u_int port_id = *(uint32_t *)data;
874		struct port_info *pi;
875		struct vi_info *vi;
876
877		if (port_id >= sc->params.nports)
878			return (EINVAL);
879		pi = sc->port[port_id];
880
881		/* MAC stats */
882		pi->tx_parse_error = 0;
883		t4_clr_vi_stats(sc);
884
885		/*
886		 * Since this command accepts a port, clear stats for
887		 * all VIs on this port.
888		 */
889		for_each_vi(pi, v, vi) {
890			if (vi->flags & VI_INIT_DONE) {
891				struct sge_rxq *rxq;
892				struct sge_txq *txq;
893
894				for_each_rxq(vi, i, rxq) {
895#if defined(INET) || defined(INET6)
896					rxq->lro.lro_queued = 0;
897					rxq->lro.lro_flushed = 0;
898#endif
899					rxq->rxcsum = 0;
900					rxq->vlan_extraction = 0;
901				}
902
903				for_each_txq(vi, i, txq) {
904					txq->txcsum = 0;
905					txq->tso_wrs = 0;
906					txq->vlan_insertion = 0;
907					txq->imm_wrs = 0;
908					txq->sgl_wrs = 0;
909					txq->txpkt_wrs = 0;
910					txq->txpkts0_wrs = 0;
911					txq->txpkts1_wrs = 0;
912					txq->txpkts0_pkts = 0;
913					txq->txpkts1_pkts = 0;
914					mp_ring_reset_stats(txq->r);
915				}
916			}
917		}
918		break;
919	}
920	case CHELSIO_T4_SCHED_CLASS:
921		rc = t4_set_sched_class(sc, (struct t4_sched_params *)data);
922		break;
923	case CHELSIO_T4_SCHED_QUEUE:
924		rc = t4_set_sched_queue(sc, (struct t4_sched_queue *)data);
925		break;
926	default:
927		rc = ENOTTY;
928	}
929
930	return (rc);
931}
932
933static device_method_t t4vf_methods[] = {
934	DEVMETHOD(device_probe,		t4vf_probe),
935	DEVMETHOD(device_attach,	t4vf_attach),
936	DEVMETHOD(device_detach,	t4_detach_common),
937
938	DEVMETHOD_END
939};
940
941static driver_t t4vf_driver = {
942	"t4vf",
943	t4vf_methods,
944	sizeof(struct adapter)
945};
946
947static device_method_t t5vf_methods[] = {
948	DEVMETHOD(device_probe,		t5vf_probe),
949	DEVMETHOD(device_attach,	t4vf_attach),
950	DEVMETHOD(device_detach,	t4_detach_common),
951
952	DEVMETHOD_END
953};
954
955static driver_t t5vf_driver = {
956	"t5vf",
957	t5vf_methods,
958	sizeof(struct adapter)
959};
960
961static device_method_t t6vf_methods[] = {
962	DEVMETHOD(device_probe,		t6vf_probe),
963	DEVMETHOD(device_attach,	t4vf_attach),
964	DEVMETHOD(device_detach,	t4_detach_common),
965
966	DEVMETHOD_END
967};
968
969static driver_t t6vf_driver = {
970	"t6vf",
971	t6vf_methods,
972	sizeof(struct adapter)
973};
974
975static driver_t cxgbev_driver = {
976	"cxgbev",
977	cxgbe_methods,
978	sizeof(struct port_info)
979};
980
981static driver_t cxlv_driver = {
982	"cxlv",
983	cxgbe_methods,
984	sizeof(struct port_info)
985};
986
987static driver_t ccv_driver = {
988	"ccv",
989	cxgbe_methods,
990	sizeof(struct port_info)
991};
992
993static devclass_t t4vf_devclass, t5vf_devclass, t6vf_devclass;
994static devclass_t cxgbev_devclass, cxlv_devclass, ccv_devclass;
995
996DRIVER_MODULE(t4vf, pci, t4vf_driver, t4vf_devclass, 0, 0);
997MODULE_VERSION(t4vf, 1);
998MODULE_DEPEND(t4vf, t4nex, 1, 1, 1);
999
1000DRIVER_MODULE(t5vf, pci, t5vf_driver, t5vf_devclass, 0, 0);
1001MODULE_VERSION(t5vf, 1);
1002MODULE_DEPEND(t5vf, t5nex, 1, 1, 1);
1003
1004DRIVER_MODULE(t6vf, pci, t6vf_driver, t6vf_devclass, 0, 0);
1005MODULE_VERSION(t6vf, 1);
1006MODULE_DEPEND(t6vf, t6nex, 1, 1, 1);
1007
1008DRIVER_MODULE(cxgbev, t4vf, cxgbev_driver, cxgbev_devclass, 0, 0);
1009MODULE_VERSION(cxgbev, 1);
1010
1011DRIVER_MODULE(cxlv, t5vf, cxlv_driver, cxlv_devclass, 0, 0);
1012MODULE_VERSION(cxlv, 1);
1013
1014DRIVER_MODULE(ccv, t6vf, ccv_driver, ccv_devclass, 0, 0);
1015MODULE_VERSION(ccv, 1);
1016