t4_main.c revision 308318
1/*-
2 * Copyright (c) 2011 Chelsio Communications, Inc.
3 * All rights reserved.
4 * Written by: Navdeep Parhar <np@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/10/sys/dev/cxgbe/t4_main.c 308318 2016-11-04 21:43:10Z jhb $");
30
31#include "opt_ddb.h"
32#include "opt_inet.h"
33#include "opt_inet6.h"
34
35#include <sys/param.h>
36#include <sys/conf.h>
37#include <sys/priv.h>
38#include <sys/kernel.h>
39#include <sys/bus.h>
40#include <sys/systm.h>
41#include <sys/counter.h>
42#include <sys/module.h>
43#include <sys/malloc.h>
44#include <sys/queue.h>
45#include <sys/taskqueue.h>
46#include <sys/pciio.h>
47#include <dev/pci/pcireg.h>
48#include <dev/pci/pcivar.h>
49#include <dev/pci/pci_private.h>
50#include <sys/firmware.h>
51#include <sys/sbuf.h>
52#include <sys/smp.h>
53#include <sys/socket.h>
54#include <sys/sockio.h>
55#include <sys/sysctl.h>
56#include <net/ethernet.h>
57#include <net/if.h>
58#include <net/if_types.h>
59#include <net/if_dl.h>
60#include <net/if_vlan_var.h>
61#ifdef RSS
62#include <net/rss_config.h>
63#endif
64#if defined(__i386__) || defined(__amd64__)
65#include <vm/vm.h>
66#include <vm/pmap.h>
67#endif
68#ifdef DDB
69#include <ddb/ddb.h>
70#include <ddb/db_lex.h>
71#endif
72
73#include "common/common.h"
74#include "common/t4_msg.h"
75#include "common/t4_regs.h"
76#include "common/t4_regs_values.h"
77#include "t4_ioctl.h"
78#include "t4_l2t.h"
79#include "t4_mp_ring.h"
80
81/* T4 bus driver interface */
82static int t4_probe(device_t);
83static int t4_attach(device_t);
84static int t4_detach(device_t);
85static device_method_t t4_methods[] = {
86	DEVMETHOD(device_probe,		t4_probe),
87	DEVMETHOD(device_attach,	t4_attach),
88	DEVMETHOD(device_detach,	t4_detach),
89
90	DEVMETHOD_END
91};
92static driver_t t4_driver = {
93	"t4nex",
94	t4_methods,
95	sizeof(struct adapter)
96};
97
98
99/* T4 port (cxgbe) interface */
100static int cxgbe_probe(device_t);
101static int cxgbe_attach(device_t);
102static int cxgbe_detach(device_t);
103static device_method_t cxgbe_methods[] = {
104	DEVMETHOD(device_probe,		cxgbe_probe),
105	DEVMETHOD(device_attach,	cxgbe_attach),
106	DEVMETHOD(device_detach,	cxgbe_detach),
107	{ 0, 0 }
108};
109static driver_t cxgbe_driver = {
110	"cxgbe",
111	cxgbe_methods,
112	sizeof(struct port_info)
113};
114
115/* T4 VI (vcxgbe) interface */
116static int vcxgbe_probe(device_t);
117static int vcxgbe_attach(device_t);
118static int vcxgbe_detach(device_t);
119static device_method_t vcxgbe_methods[] = {
120	DEVMETHOD(device_probe,		vcxgbe_probe),
121	DEVMETHOD(device_attach,	vcxgbe_attach),
122	DEVMETHOD(device_detach,	vcxgbe_detach),
123	{ 0, 0 }
124};
125static driver_t vcxgbe_driver = {
126	"vcxgbe",
127	vcxgbe_methods,
128	sizeof(struct vi_info)
129};
130
131static d_ioctl_t t4_ioctl;
132static d_open_t t4_open;
133static d_close_t t4_close;
134
135static struct cdevsw t4_cdevsw = {
136       .d_version = D_VERSION,
137       .d_flags = 0,
138       .d_open = t4_open,
139       .d_close = t4_close,
140       .d_ioctl = t4_ioctl,
141       .d_name = "t4nex",
142};
143
144/* T5 bus driver interface */
145static int t5_probe(device_t);
146static device_method_t t5_methods[] = {
147	DEVMETHOD(device_probe,		t5_probe),
148	DEVMETHOD(device_attach,	t4_attach),
149	DEVMETHOD(device_detach,	t4_detach),
150
151	DEVMETHOD_END
152};
153static driver_t t5_driver = {
154	"t5nex",
155	t5_methods,
156	sizeof(struct adapter)
157};
158
159
160/* T5 port (cxl) interface */
161static driver_t cxl_driver = {
162	"cxl",
163	cxgbe_methods,
164	sizeof(struct port_info)
165};
166
167/* T5 VI (vcxl) interface */
168static driver_t vcxl_driver = {
169	"vcxl",
170	vcxgbe_methods,
171	sizeof(struct vi_info)
172};
173
174static struct cdevsw t5_cdevsw = {
175       .d_version = D_VERSION,
176       .d_flags = 0,
177       .d_open = t4_open,
178       .d_close = t4_close,
179       .d_ioctl = t4_ioctl,
180       .d_name = "t5nex",
181};
182
183/* ifnet + media interface */
184static void cxgbe_init(void *);
185static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t);
186static int cxgbe_transmit(struct ifnet *, struct mbuf *);
187static void cxgbe_qflush(struct ifnet *);
188static int cxgbe_media_change(struct ifnet *);
189static void cxgbe_media_status(struct ifnet *, struct ifmediareq *);
190
191MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services");
192
193/*
194 * Correct lock order when you need to acquire multiple locks is t4_list_lock,
195 * then ADAPTER_LOCK, then t4_uld_list_lock.
196 */
197static struct sx t4_list_lock;
198SLIST_HEAD(, adapter) t4_list;
199#ifdef TCP_OFFLOAD
200static struct sx t4_uld_list_lock;
201SLIST_HEAD(, uld_info) t4_uld_list;
202#endif
203
204/*
205 * Tunables.  See tweak_tunables() too.
206 *
207 * Each tunable is set to a default value here if it's known at compile-time.
208 * Otherwise it is set to -1 as an indication to tweak_tunables() that it should
209 * provide a reasonable default when the driver is loaded.
210 *
211 * Tunables applicable to both T4 and T5 are under hw.cxgbe.  Those specific to
212 * T5 are under hw.cxl.
213 */
214
215/*
216 * Number of queues for tx and rx, 10G and 1G, NIC and offload.
217 */
218#define NTXQ_10G 16
219static int t4_ntxq10g = -1;
220TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq10g);
221
222#define NRXQ_10G 8
223static int t4_nrxq10g = -1;
224TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq10g);
225
226#define NTXQ_1G 4
227static int t4_ntxq1g = -1;
228TUNABLE_INT("hw.cxgbe.ntxq1g", &t4_ntxq1g);
229
230#define NRXQ_1G 2
231static int t4_nrxq1g = -1;
232TUNABLE_INT("hw.cxgbe.nrxq1g", &t4_nrxq1g);
233
234#define NTXQ_VI 1
235static int t4_ntxq_vi = -1;
236TUNABLE_INT("hw.cxgbe.ntxq_vi", &t4_ntxq_vi);
237
238#define NRXQ_VI 1
239static int t4_nrxq_vi = -1;
240TUNABLE_INT("hw.cxgbe.nrxq_vi", &t4_nrxq_vi);
241
242static int t4_rsrv_noflowq = 0;
243TUNABLE_INT("hw.cxgbe.rsrv_noflowq", &t4_rsrv_noflowq);
244
245#ifdef TCP_OFFLOAD
246#define NOFLDTXQ_10G 8
247static int t4_nofldtxq10g = -1;
248TUNABLE_INT("hw.cxgbe.nofldtxq10g", &t4_nofldtxq10g);
249
250#define NOFLDRXQ_10G 2
251static int t4_nofldrxq10g = -1;
252TUNABLE_INT("hw.cxgbe.nofldrxq10g", &t4_nofldrxq10g);
253
254#define NOFLDTXQ_1G 2
255static int t4_nofldtxq1g = -1;
256TUNABLE_INT("hw.cxgbe.nofldtxq1g", &t4_nofldtxq1g);
257
258#define NOFLDRXQ_1G 1
259static int t4_nofldrxq1g = -1;
260TUNABLE_INT("hw.cxgbe.nofldrxq1g", &t4_nofldrxq1g);
261
262#define NOFLDTXQ_VI 1
263static int t4_nofldtxq_vi = -1;
264TUNABLE_INT("hw.cxgbe.nofldtxq_vi", &t4_nofldtxq_vi);
265
266#define NOFLDRXQ_VI 1
267static int t4_nofldrxq_vi = -1;
268TUNABLE_INT("hw.cxgbe.nofldrxq_vi", &t4_nofldrxq_vi);
269#endif
270
271#ifdef DEV_NETMAP
272#define NNMTXQ_VI 2
273static int t4_nnmtxq_vi = -1;
274TUNABLE_INT("hw.cxgbe.nnmtxq_vi", &t4_nnmtxq_vi);
275
276#define NNMRXQ_VI 2
277static int t4_nnmrxq_vi = -1;
278TUNABLE_INT("hw.cxgbe.nnmrxq_vi", &t4_nnmrxq_vi);
279#endif
280
281/*
282 * Holdoff parameters for 10G and 1G ports.
283 */
284#define TMR_IDX_10G 1
285static int t4_tmr_idx_10g = TMR_IDX_10G;
286TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx_10g);
287
288#define PKTC_IDX_10G (-1)
289static int t4_pktc_idx_10g = PKTC_IDX_10G;
290TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx_10g);
291
292#define TMR_IDX_1G 1
293static int t4_tmr_idx_1g = TMR_IDX_1G;
294TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_1G", &t4_tmr_idx_1g);
295
296#define PKTC_IDX_1G (-1)
297static int t4_pktc_idx_1g = PKTC_IDX_1G;
298TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_1G", &t4_pktc_idx_1g);
299
300/*
301 * Size (# of entries) of each tx and rx queue.
302 */
303static unsigned int t4_qsize_txq = TX_EQ_QSIZE;
304TUNABLE_INT("hw.cxgbe.qsize_txq", &t4_qsize_txq);
305
306static unsigned int t4_qsize_rxq = RX_IQ_QSIZE;
307TUNABLE_INT("hw.cxgbe.qsize_rxq", &t4_qsize_rxq);
308
309/*
310 * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively).
311 */
312static int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX;
313TUNABLE_INT("hw.cxgbe.interrupt_types", &t4_intr_types);
314
315/*
316 * Configuration file.
317 */
318#define DEFAULT_CF	"default"
319#define FLASH_CF	"flash"
320#define UWIRE_CF	"uwire"
321#define FPGA_CF		"fpga"
322static char t4_cfg_file[32] = DEFAULT_CF;
323TUNABLE_STR("hw.cxgbe.config_file", t4_cfg_file, sizeof(t4_cfg_file));
324
325/*
326 * PAUSE settings (bit 0, 1 = rx_pause, tx_pause respectively).
327 * rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them.
328 * tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water
329 *            mark or when signalled to do so, 0 to never emit PAUSE.
330 */
331static int t4_pause_settings = PAUSE_TX | PAUSE_RX;
332TUNABLE_INT("hw.cxgbe.pause_settings", &t4_pause_settings);
333
334/*
335 * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed,
336 * encouraged respectively).
337 */
338static unsigned int t4_fw_install = 1;
339TUNABLE_INT("hw.cxgbe.fw_install", &t4_fw_install);
340
341/*
342 * ASIC features that will be used.  Disable the ones you don't want so that the
343 * chip resources aren't wasted on features that will not be used.
344 */
345static int t4_nbmcaps_allowed = 0;
346TUNABLE_INT("hw.cxgbe.nbmcaps_allowed", &t4_nbmcaps_allowed);
347
348static int t4_linkcaps_allowed = 0;	/* No DCBX, PPP, etc. by default */
349TUNABLE_INT("hw.cxgbe.linkcaps_allowed", &t4_linkcaps_allowed);
350
351static int t4_switchcaps_allowed = FW_CAPS_CONFIG_SWITCH_INGRESS |
352    FW_CAPS_CONFIG_SWITCH_EGRESS;
353TUNABLE_INT("hw.cxgbe.switchcaps_allowed", &t4_switchcaps_allowed);
354
355static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC;
356TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed);
357
358static int t4_toecaps_allowed = -1;
359TUNABLE_INT("hw.cxgbe.toecaps_allowed", &t4_toecaps_allowed);
360
361static int t4_rdmacaps_allowed = -1;
362TUNABLE_INT("hw.cxgbe.rdmacaps_allowed", &t4_rdmacaps_allowed);
363
364static int t4_tlscaps_allowed = 0;
365TUNABLE_INT("hw.cxgbe.tlscaps_allowed", &t4_tlscaps_allowed);
366
367static int t4_iscsicaps_allowed = -1;
368TUNABLE_INT("hw.cxgbe.iscsicaps_allowed", &t4_iscsicaps_allowed);
369
370static int t4_fcoecaps_allowed = 0;
371TUNABLE_INT("hw.cxgbe.fcoecaps_allowed", &t4_fcoecaps_allowed);
372
373static int t5_write_combine = 0;
374TUNABLE_INT("hw.cxl.write_combine", &t5_write_combine);
375
376static int t4_num_vis = 1;
377TUNABLE_INT("hw.cxgbe.num_vis", &t4_num_vis);
378
379/* Functions used by extra VIs to obtain unique MAC addresses for each VI. */
380static int vi_mac_funcs[] = {
381	FW_VI_FUNC_OFLD,
382	FW_VI_FUNC_IWARP,
383	FW_VI_FUNC_OPENISCSI,
384	FW_VI_FUNC_OPENFCOE,
385	FW_VI_FUNC_FOISCSI,
386	FW_VI_FUNC_FOFCOE,
387};
388
389struct intrs_and_queues {
390	uint16_t intr_type;	/* INTx, MSI, or MSI-X */
391	uint16_t nirq;		/* Total # of vectors */
392	uint16_t intr_flags_10g;/* Interrupt flags for each 10G port */
393	uint16_t intr_flags_1g;	/* Interrupt flags for each 1G port */
394	uint16_t ntxq10g;	/* # of NIC txq's for each 10G port */
395	uint16_t nrxq10g;	/* # of NIC rxq's for each 10G port */
396	uint16_t ntxq1g;	/* # of NIC txq's for each 1G port */
397	uint16_t nrxq1g;	/* # of NIC rxq's for each 1G port */
398	uint16_t rsrv_noflowq;	/* Flag whether to reserve queue 0 */
399	uint16_t nofldtxq10g;	/* # of TOE txq's for each 10G port */
400	uint16_t nofldrxq10g;	/* # of TOE rxq's for each 10G port */
401	uint16_t nofldtxq1g;	/* # of TOE txq's for each 1G port */
402	uint16_t nofldrxq1g;	/* # of TOE rxq's for each 1G port */
403
404	/* The vcxgbe/vcxl interfaces use these and not the ones above. */
405	uint16_t ntxq_vi;	/* # of NIC txq's */
406	uint16_t nrxq_vi;	/* # of NIC rxq's */
407	uint16_t nofldtxq_vi;	/* # of TOE txq's */
408	uint16_t nofldrxq_vi;	/* # of TOE rxq's */
409	uint16_t nnmtxq_vi;	/* # of netmap txq's */
410	uint16_t nnmrxq_vi;	/* # of netmap rxq's */
411};
412
413struct filter_entry {
414        uint32_t valid:1;	/* filter allocated and valid */
415        uint32_t locked:1;	/* filter is administratively locked */
416        uint32_t pending:1;	/* filter action is pending firmware reply */
417	uint32_t smtidx:8;	/* Source MAC Table index for smac */
418	struct l2t_entry *l2t;	/* Layer Two Table entry for dmac */
419
420        struct t4_filter_specification fs;
421};
422
423static int map_bars_0_and_4(struct adapter *);
424static int map_bar_2(struct adapter *);
425static void setup_memwin(struct adapter *);
426static void position_memwin(struct adapter *, int, uint32_t);
427static int rw_via_memwin(struct adapter *, int, uint32_t, uint32_t *, int, int);
428static inline int read_via_memwin(struct adapter *, int, uint32_t, uint32_t *,
429    int);
430static inline int write_via_memwin(struct adapter *, int, uint32_t,
431    const uint32_t *, int);
432static int validate_mem_range(struct adapter *, uint32_t, int);
433static int fwmtype_to_hwmtype(int);
434static int validate_mt_off_len(struct adapter *, int, uint32_t, int,
435    uint32_t *);
436static int fixup_devlog_params(struct adapter *);
437static int cfg_itype_and_nqueues(struct adapter *, int, int, int,
438    struct intrs_and_queues *);
439static int prep_firmware(struct adapter *);
440static int partition_resources(struct adapter *, const struct firmware *,
441    const char *);
442static int get_params__pre_init(struct adapter *);
443static int get_params__post_init(struct adapter *);
444static int set_params__post_init(struct adapter *);
445static void t4_set_desc(struct adapter *);
446static void build_medialist(struct port_info *, struct ifmedia *);
447static int cxgbe_init_synchronized(struct vi_info *);
448static int cxgbe_uninit_synchronized(struct vi_info *);
449static int setup_intr_handlers(struct adapter *);
450static void quiesce_txq(struct adapter *, struct sge_txq *);
451static void quiesce_wrq(struct adapter *, struct sge_wrq *);
452static void quiesce_iq(struct adapter *, struct sge_iq *);
453static void quiesce_fl(struct adapter *, struct sge_fl *);
454static int t4_alloc_irq(struct adapter *, struct irq *, int rid,
455    driver_intr_t *, void *, char *);
456static int t4_free_irq(struct adapter *, struct irq *);
457static void get_regs(struct adapter *, struct t4_regdump *, uint8_t *);
458static void vi_refresh_stats(struct adapter *, struct vi_info *);
459static void cxgbe_refresh_stats(struct adapter *, struct port_info *);
460static void cxgbe_tick(void *);
461static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t);
462static int cpl_not_handled(struct sge_iq *, const struct rss_header *,
463    struct mbuf *);
464static int an_not_handled(struct sge_iq *, const struct rsp_ctrl *);
465static int fw_msg_not_handled(struct adapter *, const __be64 *);
466static void t4_sysctls(struct adapter *);
467static void cxgbe_sysctls(struct port_info *);
468static int sysctl_int_array(SYSCTL_HANDLER_ARGS);
469static int sysctl_bitfield(SYSCTL_HANDLER_ARGS);
470static int sysctl_btphy(SYSCTL_HANDLER_ARGS);
471static int sysctl_noflowq(SYSCTL_HANDLER_ARGS);
472static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS);
473static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS);
474static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS);
475static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS);
476static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS);
477static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS);
478static int sysctl_temperature(SYSCTL_HANDLER_ARGS);
479#ifdef SBUF_DRAIN
480static int sysctl_cctrl(SYSCTL_HANDLER_ARGS);
481static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS);
482static int sysctl_cim_la(SYSCTL_HANDLER_ARGS);
483static int sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS);
484static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS);
485static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS);
486static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS);
487static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS);
488static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS);
489static int sysctl_devlog(SYSCTL_HANDLER_ARGS);
490static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS);
491static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS);
492static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS);
493static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS);
494static int sysctl_meminfo(SYSCTL_HANDLER_ARGS);
495static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS);
496static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS);
497static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS);
498static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS);
499static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS);
500static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS);
501static int sysctl_tids(SYSCTL_HANDLER_ARGS);
502static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS);
503static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS);
504static int sysctl_tp_la(SYSCTL_HANDLER_ARGS);
505static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS);
506static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS);
507static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS);
508#endif
509#ifdef TCP_OFFLOAD
510static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS);
511static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS);
512static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS);
513#endif
514static uint32_t fconf_iconf_to_mode(uint32_t, uint32_t);
515static uint32_t mode_to_fconf(uint32_t);
516static uint32_t mode_to_iconf(uint32_t);
517static int check_fspec_against_fconf_iconf(struct adapter *,
518    struct t4_filter_specification *);
519static int get_filter_mode(struct adapter *, uint32_t *);
520static int set_filter_mode(struct adapter *, uint32_t);
521static inline uint64_t get_filter_hits(struct adapter *, uint32_t);
522static int get_filter(struct adapter *, struct t4_filter *);
523static int set_filter(struct adapter *, struct t4_filter *);
524static int del_filter(struct adapter *, struct t4_filter *);
525static void clear_filter(struct filter_entry *);
526static int set_filter_wr(struct adapter *, int);
527static int del_filter_wr(struct adapter *, int);
528static int get_sge_context(struct adapter *, struct t4_sge_context *);
529static int load_fw(struct adapter *, struct t4_data *);
530static int read_card_mem(struct adapter *, int, struct t4_mem_range *);
531static int read_i2c(struct adapter *, struct t4_i2c_data *);
532static int set_sched_class(struct adapter *, struct t4_sched_params *);
533static int set_sched_queue(struct adapter *, struct t4_sched_queue *);
534#ifdef TCP_OFFLOAD
535static int toe_capability(struct vi_info *, int);
536#endif
537static int mod_event(module_t, int, void *);
538
539struct {
540	uint16_t device;
541	char *desc;
542} t4_pciids[] = {
543	{0xa000, "Chelsio Terminator 4 FPGA"},
544	{0x4400, "Chelsio T440-dbg"},
545	{0x4401, "Chelsio T420-CR"},
546	{0x4402, "Chelsio T422-CR"},
547	{0x4403, "Chelsio T440-CR"},
548	{0x4404, "Chelsio T420-BCH"},
549	{0x4405, "Chelsio T440-BCH"},
550	{0x4406, "Chelsio T440-CH"},
551	{0x4407, "Chelsio T420-SO"},
552	{0x4408, "Chelsio T420-CX"},
553	{0x4409, "Chelsio T420-BT"},
554	{0x440a, "Chelsio T404-BT"},
555	{0x440e, "Chelsio T440-LP-CR"},
556}, t5_pciids[] = {
557	{0xb000, "Chelsio Terminator 5 FPGA"},
558	{0x5400, "Chelsio T580-dbg"},
559	{0x5401,  "Chelsio T520-CR"},		/* 2 x 10G */
560	{0x5402,  "Chelsio T522-CR"},		/* 2 x 10G, 2 X 1G */
561	{0x5403,  "Chelsio T540-CR"},		/* 4 x 10G */
562	{0x5407,  "Chelsio T520-SO"},		/* 2 x 10G, nomem */
563	{0x5409,  "Chelsio T520-BT"},		/* 2 x 10GBaseT */
564	{0x540a,  "Chelsio T504-BT"},		/* 4 x 1G */
565	{0x540d,  "Chelsio T580-CR"},		/* 2 x 40G */
566	{0x540e,  "Chelsio T540-LP-CR"},	/* 4 x 10G */
567	{0x5410,  "Chelsio T580-LP-CR"},	/* 2 x 40G */
568	{0x5411,  "Chelsio T520-LL-CR"},	/* 2 x 10G */
569	{0x5412,  "Chelsio T560-CR"},		/* 1 x 40G, 2 x 10G */
570	{0x5414,  "Chelsio T580-LP-SO-CR"},	/* 2 x 40G, nomem */
571	{0x5415,  "Chelsio T502-BT"},		/* 2 x 1G */
572#ifdef notyet
573	{0x5404,  "Chelsio T520-BCH"},
574	{0x5405,  "Chelsio T540-BCH"},
575	{0x5406,  "Chelsio T540-CH"},
576	{0x5408,  "Chelsio T520-CX"},
577	{0x540b,  "Chelsio B520-SR"},
578	{0x540c,  "Chelsio B504-BT"},
579	{0x540f,  "Chelsio Amsterdam"},
580	{0x5413,  "Chelsio T580-CHR"},
581#endif
582};
583
584#ifdef TCP_OFFLOAD
585/*
586 * service_iq() has an iq and needs the fl.  Offset of fl from the iq should be
587 * exactly the same for both rxq and ofld_rxq.
588 */
589CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq));
590CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl));
591#endif
592
593/* No easy way to include t4_msg.h before adapter.h so we check this way */
594CTASSERT(nitems(((struct adapter *)0)->cpl_handler) == NUM_CPL_CMDS);
595CTASSERT(nitems(((struct adapter *)0)->fw_msg_handler) == NUM_FW6_TYPES);
596
597CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE);
598
599static int
600t4_probe(device_t dev)
601{
602	int i;
603	uint16_t v = pci_get_vendor(dev);
604	uint16_t d = pci_get_device(dev);
605	uint8_t f = pci_get_function(dev);
606
607	if (v != PCI_VENDOR_ID_CHELSIO)
608		return (ENXIO);
609
610	/* Attach only to PF0 of the FPGA */
611	if (d == 0xa000 && f != 0)
612		return (ENXIO);
613
614	for (i = 0; i < nitems(t4_pciids); i++) {
615		if (d == t4_pciids[i].device) {
616			device_set_desc(dev, t4_pciids[i].desc);
617			return (BUS_PROBE_DEFAULT);
618		}
619	}
620
621	return (ENXIO);
622}
623
624static int
625t5_probe(device_t dev)
626{
627	int i;
628	uint16_t v = pci_get_vendor(dev);
629	uint16_t d = pci_get_device(dev);
630	uint8_t f = pci_get_function(dev);
631
632	if (v != PCI_VENDOR_ID_CHELSIO)
633		return (ENXIO);
634
635	/* Attach only to PF0 of the FPGA */
636	if (d == 0xb000 && f != 0)
637		return (ENXIO);
638
639	for (i = 0; i < nitems(t5_pciids); i++) {
640		if (d == t5_pciids[i].device) {
641			device_set_desc(dev, t5_pciids[i].desc);
642			return (BUS_PROBE_DEFAULT);
643		}
644	}
645
646	return (ENXIO);
647}
648
649static void
650t5_attribute_workaround(device_t dev)
651{
652	device_t root_port;
653	uint32_t v;
654
655	/*
656	 * The T5 chips do not properly echo the No Snoop and Relaxed
657	 * Ordering attributes when replying to a TLP from a Root
658	 * Port.  As a workaround, find the parent Root Port and
659	 * disable No Snoop and Relaxed Ordering.  Note that this
660	 * affects all devices under this root port.
661	 */
662	root_port = pci_find_pcie_root_port(dev);
663	if (root_port == NULL) {
664		device_printf(dev, "Unable to find parent root port\n");
665		return;
666	}
667
668	v = pcie_adjust_config(root_port, PCIER_DEVICE_CTL,
669	    PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE, 0, 2);
670	if ((v & (PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE)) !=
671	    0)
672		device_printf(dev, "Disabled No Snoop/Relaxed Ordering on %s\n",
673		    device_get_nameunit(root_port));
674}
675
676static int
677t4_attach(device_t dev)
678{
679	struct adapter *sc;
680	int rc = 0, i, j, n10g, n1g, rqidx, tqidx;
681	struct intrs_and_queues iaq;
682	struct sge *s;
683	uint8_t *buf;
684#ifdef TCP_OFFLOAD
685	int ofld_rqidx, ofld_tqidx;
686#endif
687#ifdef DEV_NETMAP
688	int nm_rqidx, nm_tqidx;
689#endif
690	int num_vis;
691
692	sc = device_get_softc(dev);
693	sc->dev = dev;
694	TUNABLE_INT_FETCH("hw.cxgbe.debug_flags", &sc->debug_flags);
695
696	if ((pci_get_device(dev) & 0xff00) == 0x5400)
697		t5_attribute_workaround(dev);
698	pci_enable_busmaster(dev);
699	if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) {
700		uint32_t v;
701
702		pci_set_max_read_req(dev, 4096);
703		v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2);
704		v |= PCIEM_CTL_RELAXED_ORD_ENABLE;
705		pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
706
707		sc->params.pci.mps = 128 << ((v & PCIEM_CTL_MAX_PAYLOAD) >> 5);
708	}
709
710	sc->traceq = -1;
711	mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF);
712	snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer",
713	    device_get_nameunit(dev));
714
715	snprintf(sc->lockname, sizeof(sc->lockname), "%s",
716	    device_get_nameunit(dev));
717	mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF);
718	sx_xlock(&t4_list_lock);
719	SLIST_INSERT_HEAD(&t4_list, sc, link);
720	sx_xunlock(&t4_list_lock);
721
722	mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF);
723	TAILQ_INIT(&sc->sfl);
724	callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0);
725
726	mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF);
727
728	rc = map_bars_0_and_4(sc);
729	if (rc != 0)
730		goto done; /* error message displayed already */
731
732	/*
733	 * This is the real PF# to which we're attaching.  Works from within PCI
734	 * passthrough environments too, where pci_get_function() could return a
735	 * different PF# depending on the passthrough configuration.  We need to
736	 * use the real PF# in all our communication with the firmware.
737	 */
738	sc->pf = G_SOURCEPF(t4_read_reg(sc, A_PL_WHOAMI));
739	sc->mbox = sc->pf;
740
741	memset(sc->chan_map, 0xff, sizeof(sc->chan_map));
742	sc->an_handler = an_not_handled;
743	for (i = 0; i < nitems(sc->cpl_handler); i++)
744		sc->cpl_handler[i] = cpl_not_handled;
745	for (i = 0; i < nitems(sc->fw_msg_handler); i++)
746		sc->fw_msg_handler[i] = fw_msg_not_handled;
747	t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, t4_filter_rpl);
748	t4_register_cpl_handler(sc, CPL_TRACE_PKT, t4_trace_pkt);
749	t4_register_cpl_handler(sc, CPL_T5_TRACE_PKT, t5_trace_pkt);
750	t4_init_sge_cpl_handlers(sc);
751
752	/* Prepare the adapter for operation. */
753	buf = malloc(PAGE_SIZE, M_CXGBE, M_ZERO | M_WAITOK);
754	rc = -t4_prep_adapter(sc, buf);
755	free(buf, M_CXGBE);
756	if (rc != 0) {
757		device_printf(dev, "failed to prepare adapter: %d.\n", rc);
758		goto done;
759	}
760
761	/*
762	 * Do this really early, with the memory windows set up even before the
763	 * character device.  The userland tool's register i/o and mem read
764	 * will work even in "recovery mode".
765	 */
766	setup_memwin(sc);
767	if (t4_init_devlog_params(sc, 0) == 0)
768		fixup_devlog_params(sc);
769	sc->cdev = make_dev(is_t4(sc) ? &t4_cdevsw : &t5_cdevsw,
770	    device_get_unit(dev), UID_ROOT, GID_WHEEL, 0600, "%s",
771	    device_get_nameunit(dev));
772	if (sc->cdev == NULL)
773		device_printf(dev, "failed to create nexus char device.\n");
774	else
775		sc->cdev->si_drv1 = sc;
776
777	/* Go no further if recovery mode has been requested. */
778	if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) {
779		device_printf(dev, "recovery mode.\n");
780		goto done;
781	}
782
783#if defined(__i386__)
784	if ((cpu_feature & CPUID_CX8) == 0) {
785		device_printf(dev, "64 bit atomics not available.\n");
786		rc = ENOTSUP;
787		goto done;
788	}
789#endif
790
791	/* Prepare the firmware for operation */
792	rc = prep_firmware(sc);
793	if (rc != 0)
794		goto done; /* error message displayed already */
795
796	rc = get_params__post_init(sc);
797	if (rc != 0)
798		goto done; /* error message displayed already */
799
800	rc = set_params__post_init(sc);
801	if (rc != 0)
802		goto done; /* error message displayed already */
803
804	rc = map_bar_2(sc);
805	if (rc != 0)
806		goto done; /* error message displayed already */
807
808	rc = t4_create_dma_tag(sc);
809	if (rc != 0)
810		goto done; /* error message displayed already */
811
812	/*
813	 * Number of VIs to create per-port.  The first VI is the "main" regular
814	 * VI for the port.  The rest are additional virtual interfaces on the
815	 * same physical port.  Note that the main VI does not have native
816	 * netmap support but the extra VIs do.
817	 *
818	 * Limit the number of VIs per port to the number of available
819	 * MAC addresses per port.
820	 */
821	if (t4_num_vis >= 1)
822		num_vis = t4_num_vis;
823	else
824		num_vis = 1;
825	if (num_vis > nitems(vi_mac_funcs)) {
826		num_vis = nitems(vi_mac_funcs);
827		device_printf(dev, "Number of VIs limited to %d\n", num_vis);
828	}
829
830	/*
831	 * First pass over all the ports - allocate VIs and initialize some
832	 * basic parameters like mac address, port type, etc.  We also figure
833	 * out whether a port is 10G or 1G and use that information when
834	 * calculating how many interrupts to attempt to allocate.
835	 */
836	n10g = n1g = 0;
837	for_each_port(sc, i) {
838		struct port_info *pi;
839
840		pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK);
841		sc->port[i] = pi;
842
843		/* These must be set before t4_port_init */
844		pi->adapter = sc;
845		pi->port_id = i;
846		/*
847		 * XXX: vi[0] is special so we can't delay this allocation until
848		 * pi->nvi's final value is known.
849		 */
850		pi->vi = malloc(sizeof(struct vi_info) * num_vis, M_CXGBE,
851		    M_ZERO | M_WAITOK);
852
853		/*
854		 * Allocate the "main" VI and initialize parameters
855		 * like mac addr.
856		 */
857		rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i);
858		if (rc != 0) {
859			device_printf(dev, "unable to initialize port %d: %d\n",
860			    i, rc);
861			free(pi->vi, M_CXGBE);
862			free(pi, M_CXGBE);
863			sc->port[i] = NULL;
864			goto done;
865		}
866
867		pi->link_cfg.requested_fc &= ~(PAUSE_TX | PAUSE_RX);
868		pi->link_cfg.requested_fc |= t4_pause_settings;
869		pi->link_cfg.fc &= ~(PAUSE_TX | PAUSE_RX);
870		pi->link_cfg.fc |= t4_pause_settings;
871
872		rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, &pi->link_cfg);
873		if (rc != 0) {
874			device_printf(dev, "port %d l1cfg failed: %d\n", i, rc);
875			free(pi->vi, M_CXGBE);
876			free(pi, M_CXGBE);
877			sc->port[i] = NULL;
878			goto done;
879		}
880
881		snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d",
882		    device_get_nameunit(dev), i);
883		mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF);
884		sc->chan_map[pi->tx_chan] = i;
885
886		if (is_10G_port(pi) || is_40G_port(pi)) {
887			n10g++;
888		} else {
889			n1g++;
890		}
891
892		pi->linkdnrc = -1;
893
894		pi->dev = device_add_child(dev, is_t4(sc) ? "cxgbe" : "cxl", -1);
895		if (pi->dev == NULL) {
896			device_printf(dev,
897			    "failed to add device for port %d.\n", i);
898			rc = ENXIO;
899			goto done;
900		}
901		pi->vi[0].dev = pi->dev;
902		device_set_softc(pi->dev, pi);
903	}
904
905	/*
906	 * Interrupt type, # of interrupts, # of rx/tx queues, etc.
907	 */
908	rc = cfg_itype_and_nqueues(sc, n10g, n1g, num_vis, &iaq);
909	if (rc != 0)
910		goto done; /* error message displayed already */
911	if (iaq.nrxq_vi + iaq.nofldrxq_vi + iaq.nnmrxq_vi == 0)
912		num_vis = 1;
913
914	sc->intr_type = iaq.intr_type;
915	sc->intr_count = iaq.nirq;
916
917	s = &sc->sge;
918	s->nrxq = n10g * iaq.nrxq10g + n1g * iaq.nrxq1g;
919	s->ntxq = n10g * iaq.ntxq10g + n1g * iaq.ntxq1g;
920	if (num_vis > 1) {
921		s->nrxq += (n10g + n1g) * (num_vis - 1) * iaq.nrxq_vi;
922		s->ntxq += (n10g + n1g) * (num_vis - 1) * iaq.ntxq_vi;
923	}
924	s->neq = s->ntxq + s->nrxq;	/* the free list in an rxq is an eq */
925	s->neq += sc->params.nports + 1;/* ctrl queues: 1 per port + 1 mgmt */
926	s->niq = s->nrxq + 1;		/* 1 extra for firmware event queue */
927#ifdef TCP_OFFLOAD
928	if (is_offload(sc)) {
929		s->nofldrxq = n10g * iaq.nofldrxq10g + n1g * iaq.nofldrxq1g;
930		s->nofldtxq = n10g * iaq.nofldtxq10g + n1g * iaq.nofldtxq1g;
931		if (num_vis > 1) {
932			s->nofldrxq += (n10g + n1g) * (num_vis - 1) *
933			    iaq.nofldrxq_vi;
934			s->nofldtxq += (n10g + n1g) * (num_vis - 1) *
935			    iaq.nofldtxq_vi;
936		}
937		s->neq += s->nofldtxq + s->nofldrxq;
938		s->niq += s->nofldrxq;
939
940		s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq),
941		    M_CXGBE, M_ZERO | M_WAITOK);
942		s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq),
943		    M_CXGBE, M_ZERO | M_WAITOK);
944	}
945#endif
946#ifdef DEV_NETMAP
947	if (num_vis > 1) {
948		s->nnmrxq = (n10g + n1g) * (num_vis - 1) * iaq.nnmrxq_vi;
949		s->nnmtxq = (n10g + n1g) * (num_vis - 1) * iaq.nnmtxq_vi;
950	}
951	s->neq += s->nnmtxq + s->nnmrxq;
952	s->niq += s->nnmrxq;
953
954	s->nm_rxq = malloc(s->nnmrxq * sizeof(struct sge_nm_rxq),
955	    M_CXGBE, M_ZERO | M_WAITOK);
956	s->nm_txq = malloc(s->nnmtxq * sizeof(struct sge_nm_txq),
957	    M_CXGBE, M_ZERO | M_WAITOK);
958#endif
959
960	s->ctrlq = malloc(sc->params.nports * sizeof(struct sge_wrq), M_CXGBE,
961	    M_ZERO | M_WAITOK);
962	s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE,
963	    M_ZERO | M_WAITOK);
964	s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE,
965	    M_ZERO | M_WAITOK);
966	s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE,
967	    M_ZERO | M_WAITOK);
968	s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE,
969	    M_ZERO | M_WAITOK);
970
971	sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE,
972	    M_ZERO | M_WAITOK);
973
974	t4_init_l2t(sc, M_WAITOK);
975
976	/*
977	 * Second pass over the ports.  This time we know the number of rx and
978	 * tx queues that each port should get.
979	 */
980	rqidx = tqidx = 0;
981#ifdef TCP_OFFLOAD
982	ofld_rqidx = ofld_tqidx = 0;
983#endif
984#ifdef DEV_NETMAP
985	nm_rqidx = nm_tqidx = 0;
986#endif
987	for_each_port(sc, i) {
988		struct port_info *pi = sc->port[i];
989		struct vi_info *vi;
990
991		if (pi == NULL)
992			continue;
993
994		pi->nvi = num_vis;
995		for_each_vi(pi, j, vi) {
996			vi->pi = pi;
997			vi->qsize_rxq = t4_qsize_rxq;
998			vi->qsize_txq = t4_qsize_txq;
999
1000			vi->first_rxq = rqidx;
1001			vi->first_txq = tqidx;
1002			if (is_10G_port(pi) || is_40G_port(pi)) {
1003				vi->tmr_idx = t4_tmr_idx_10g;
1004				vi->pktc_idx = t4_pktc_idx_10g;
1005				vi->flags |= iaq.intr_flags_10g & INTR_RXQ;
1006				vi->nrxq = j == 0 ? iaq.nrxq10g : iaq.nrxq_vi;
1007				vi->ntxq = j == 0 ? iaq.ntxq10g : iaq.ntxq_vi;
1008			} else {
1009				vi->tmr_idx = t4_tmr_idx_1g;
1010				vi->pktc_idx = t4_pktc_idx_1g;
1011				vi->flags |= iaq.intr_flags_1g & INTR_RXQ;
1012				vi->nrxq = j == 0 ? iaq.nrxq1g : iaq.nrxq_vi;
1013				vi->ntxq = j == 0 ? iaq.ntxq1g : iaq.ntxq_vi;
1014			}
1015			rqidx += vi->nrxq;
1016			tqidx += vi->ntxq;
1017
1018			if (j == 0 && vi->ntxq > 1)
1019				vi->rsrv_noflowq = iaq.rsrv_noflowq ? 1 : 0;
1020			else
1021				vi->rsrv_noflowq = 0;
1022
1023#ifdef TCP_OFFLOAD
1024			vi->first_ofld_rxq = ofld_rqidx;
1025			vi->first_ofld_txq = ofld_tqidx;
1026			if (is_10G_port(pi) || is_40G_port(pi)) {
1027				vi->flags |= iaq.intr_flags_10g & INTR_OFLD_RXQ;
1028				vi->nofldrxq = j == 0 ? iaq.nofldrxq10g :
1029				    iaq.nofldrxq_vi;
1030				vi->nofldtxq = j == 0 ? iaq.nofldtxq10g :
1031				    iaq.nofldtxq_vi;
1032			} else {
1033				vi->flags |= iaq.intr_flags_1g & INTR_OFLD_RXQ;
1034				vi->nofldrxq = j == 0 ? iaq.nofldrxq1g :
1035				    iaq.nofldrxq_vi;
1036				vi->nofldtxq = j == 0 ? iaq.nofldtxq1g :
1037				    iaq.nofldtxq_vi;
1038			}
1039			ofld_rqidx += vi->nofldrxq;
1040			ofld_tqidx += vi->nofldtxq;
1041#endif
1042#ifdef DEV_NETMAP
1043			if (j > 0) {
1044				vi->first_nm_rxq = nm_rqidx;
1045				vi->first_nm_txq = nm_tqidx;
1046				vi->nnmrxq = iaq.nnmrxq_vi;
1047				vi->nnmtxq = iaq.nnmtxq_vi;
1048				nm_rqidx += vi->nnmrxq;
1049				nm_tqidx += vi->nnmtxq;
1050			}
1051#endif
1052		}
1053	}
1054
1055	rc = setup_intr_handlers(sc);
1056	if (rc != 0) {
1057		device_printf(dev,
1058		    "failed to setup interrupt handlers: %d\n", rc);
1059		goto done;
1060	}
1061
1062	rc = bus_generic_attach(dev);
1063	if (rc != 0) {
1064		device_printf(dev,
1065		    "failed to attach all child ports: %d\n", rc);
1066		goto done;
1067	}
1068
1069	device_printf(dev,
1070	    "PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n",
1071	    sc->params.pci.speed, sc->params.pci.width, sc->params.nports,
1072	    sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" :
1073	    (sc->intr_type == INTR_MSI ? "MSI" : "INTx"),
1074	    sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq);
1075
1076	t4_set_desc(sc);
1077
1078done:
1079	if (rc != 0 && sc->cdev) {
1080		/* cdev was created and so cxgbetool works; recover that way. */
1081		device_printf(dev,
1082		    "error during attach, adapter is now in recovery mode.\n");
1083		rc = 0;
1084	}
1085
1086	if (rc != 0)
1087		t4_detach(dev);
1088	else
1089		t4_sysctls(sc);
1090
1091	return (rc);
1092}
1093
1094/*
1095 * Idempotent
1096 */
1097static int
1098t4_detach(device_t dev)
1099{
1100	struct adapter *sc;
1101	struct port_info *pi;
1102	int i, rc;
1103
1104	sc = device_get_softc(dev);
1105
1106	if (sc->flags & FULL_INIT_DONE)
1107		t4_intr_disable(sc);
1108
1109	if (sc->cdev) {
1110		destroy_dev(sc->cdev);
1111		sc->cdev = NULL;
1112	}
1113
1114	rc = bus_generic_detach(dev);
1115	if (rc) {
1116		device_printf(dev,
1117		    "failed to detach child devices: %d\n", rc);
1118		return (rc);
1119	}
1120
1121	for (i = 0; i < sc->intr_count; i++)
1122		t4_free_irq(sc, &sc->irq[i]);
1123
1124	for (i = 0; i < MAX_NPORTS; i++) {
1125		pi = sc->port[i];
1126		if (pi) {
1127			t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid);
1128			if (pi->dev)
1129				device_delete_child(dev, pi->dev);
1130
1131			mtx_destroy(&pi->pi_lock);
1132			free(pi->vi, M_CXGBE);
1133			free(pi, M_CXGBE);
1134		}
1135	}
1136
1137	if (sc->flags & FULL_INIT_DONE)
1138		adapter_full_uninit(sc);
1139
1140	if (sc->flags & FW_OK)
1141		t4_fw_bye(sc, sc->mbox);
1142
1143	if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX)
1144		pci_release_msi(dev);
1145
1146	if (sc->regs_res)
1147		bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid,
1148		    sc->regs_res);
1149
1150	if (sc->udbs_res)
1151		bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid,
1152		    sc->udbs_res);
1153
1154	if (sc->msix_res)
1155		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid,
1156		    sc->msix_res);
1157
1158	if (sc->l2t)
1159		t4_free_l2t(sc->l2t);
1160
1161#ifdef TCP_OFFLOAD
1162	free(sc->sge.ofld_rxq, M_CXGBE);
1163	free(sc->sge.ofld_txq, M_CXGBE);
1164#endif
1165#ifdef DEV_NETMAP
1166	free(sc->sge.nm_rxq, M_CXGBE);
1167	free(sc->sge.nm_txq, M_CXGBE);
1168#endif
1169	free(sc->irq, M_CXGBE);
1170	free(sc->sge.rxq, M_CXGBE);
1171	free(sc->sge.txq, M_CXGBE);
1172	free(sc->sge.ctrlq, M_CXGBE);
1173	free(sc->sge.iqmap, M_CXGBE);
1174	free(sc->sge.eqmap, M_CXGBE);
1175	free(sc->tids.ftid_tab, M_CXGBE);
1176	t4_destroy_dma_tag(sc);
1177	if (mtx_initialized(&sc->sc_lock)) {
1178		sx_xlock(&t4_list_lock);
1179		SLIST_REMOVE(&t4_list, sc, adapter, link);
1180		sx_xunlock(&t4_list_lock);
1181		mtx_destroy(&sc->sc_lock);
1182	}
1183
1184	callout_drain(&sc->sfl_callout);
1185	if (mtx_initialized(&sc->tids.ftid_lock))
1186		mtx_destroy(&sc->tids.ftid_lock);
1187	if (mtx_initialized(&sc->sfl_lock))
1188		mtx_destroy(&sc->sfl_lock);
1189	if (mtx_initialized(&sc->ifp_lock))
1190		mtx_destroy(&sc->ifp_lock);
1191	if (mtx_initialized(&sc->reg_lock))
1192		mtx_destroy(&sc->reg_lock);
1193
1194	for (i = 0; i < NUM_MEMWIN; i++) {
1195		struct memwin *mw = &sc->memwin[i];
1196
1197		if (rw_initialized(&mw->mw_lock))
1198			rw_destroy(&mw->mw_lock);
1199	}
1200
1201	bzero(sc, sizeof(*sc));
1202
1203	return (0);
1204}
1205
1206static int
1207cxgbe_probe(device_t dev)
1208{
1209	char buf[128];
1210	struct port_info *pi = device_get_softc(dev);
1211
1212	snprintf(buf, sizeof(buf), "port %d", pi->port_id);
1213	device_set_desc_copy(dev, buf);
1214
1215	return (BUS_PROBE_DEFAULT);
1216}
1217
1218#define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
1219    IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
1220    IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS)
1221#define T4_CAP_ENABLE (T4_CAP)
1222
1223static int
1224cxgbe_vi_attach(device_t dev, struct vi_info *vi)
1225{
1226	struct ifnet *ifp;
1227	struct sbuf *sb;
1228
1229	vi->xact_addr_filt = -1;
1230	callout_init(&vi->tick, 1);
1231
1232	/* Allocate an ifnet and set it up */
1233	ifp = if_alloc(IFT_ETHER);
1234	if (ifp == NULL) {
1235		device_printf(dev, "Cannot allocate ifnet\n");
1236		return (ENOMEM);
1237	}
1238	vi->ifp = ifp;
1239	ifp->if_softc = vi;
1240
1241	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1242	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1243
1244	ifp->if_init = cxgbe_init;
1245	ifp->if_ioctl = cxgbe_ioctl;
1246	ifp->if_transmit = cxgbe_transmit;
1247	ifp->if_qflush = cxgbe_qflush;
1248
1249	ifp->if_capabilities = T4_CAP;
1250#ifdef TCP_OFFLOAD
1251	if (vi->nofldrxq != 0)
1252		ifp->if_capabilities |= IFCAP_TOE;
1253#endif
1254	ifp->if_capenable = T4_CAP_ENABLE;
1255	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1256	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1257
1258	ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
1259	ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS;
1260	ifp->if_hw_tsomaxsegsize = 65536;
1261
1262	/* Initialize ifmedia for this VI */
1263	ifmedia_init(&vi->media, IFM_IMASK, cxgbe_media_change,
1264	    cxgbe_media_status);
1265	build_medialist(vi->pi, &vi->media);
1266
1267	vi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, ifp,
1268	    EVENTHANDLER_PRI_ANY);
1269
1270	ether_ifattach(ifp, vi->hw_addr);
1271#ifdef DEV_NETMAP
1272	if (vi->nnmrxq != 0)
1273		cxgbe_nm_attach(vi);
1274#endif
1275	sb = sbuf_new_auto();
1276	sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq);
1277#ifdef TCP_OFFLOAD
1278	if (ifp->if_capabilities & IFCAP_TOE)
1279		sbuf_printf(sb, "; %d txq, %d rxq (TOE)",
1280		    vi->nofldtxq, vi->nofldrxq);
1281#endif
1282#ifdef DEV_NETMAP
1283	if (ifp->if_capabilities & IFCAP_NETMAP)
1284		sbuf_printf(sb, "; %d txq, %d rxq (netmap)",
1285		    vi->nnmtxq, vi->nnmrxq);
1286#endif
1287	sbuf_finish(sb);
1288	device_printf(dev, "%s\n", sbuf_data(sb));
1289	sbuf_delete(sb);
1290
1291	vi_sysctls(vi);
1292
1293	return (0);
1294}
1295
1296static int
1297cxgbe_attach(device_t dev)
1298{
1299	struct port_info *pi = device_get_softc(dev);
1300	struct vi_info *vi;
1301	int i, rc;
1302
1303	callout_init_mtx(&pi->tick, &pi->pi_lock, 0);
1304
1305	rc = cxgbe_vi_attach(dev, &pi->vi[0]);
1306	if (rc)
1307		return (rc);
1308
1309	for_each_vi(pi, i, vi) {
1310		if (i == 0)
1311			continue;
1312		vi->dev = device_add_child(dev, is_t4(pi->adapter) ?
1313		    "vcxgbe" : "vcxl", -1);
1314		if (vi->dev == NULL) {
1315			device_printf(dev, "failed to add VI %d\n", i);
1316			continue;
1317		}
1318		device_set_softc(vi->dev, vi);
1319	}
1320
1321	cxgbe_sysctls(pi);
1322
1323	bus_generic_attach(dev);
1324
1325	return (0);
1326}
1327
1328static void
1329cxgbe_vi_detach(struct vi_info *vi)
1330{
1331	struct ifnet *ifp = vi->ifp;
1332
1333	ether_ifdetach(ifp);
1334
1335	if (vi->vlan_c)
1336		EVENTHANDLER_DEREGISTER(vlan_config, vi->vlan_c);
1337
1338	/* Let detach proceed even if these fail. */
1339#ifdef DEV_NETMAP
1340	if (ifp->if_capabilities & IFCAP_NETMAP)
1341		cxgbe_nm_detach(vi);
1342#endif
1343	cxgbe_uninit_synchronized(vi);
1344	callout_drain(&vi->tick);
1345	vi_full_uninit(vi);
1346
1347	ifmedia_removeall(&vi->media);
1348	if_free(vi->ifp);
1349	vi->ifp = NULL;
1350}
1351
1352static int
1353cxgbe_detach(device_t dev)
1354{
1355	struct port_info *pi = device_get_softc(dev);
1356	struct adapter *sc = pi->adapter;
1357	int rc;
1358
1359	/* Detach the extra VIs first. */
1360	rc = bus_generic_detach(dev);
1361	if (rc)
1362		return (rc);
1363	device_delete_children(dev);
1364
1365	doom_vi(sc, &pi->vi[0]);
1366
1367	if (pi->flags & HAS_TRACEQ) {
1368		sc->traceq = -1;	/* cloner should not create ifnet */
1369		t4_tracer_port_detach(sc);
1370	}
1371
1372	cxgbe_vi_detach(&pi->vi[0]);
1373	callout_drain(&pi->tick);
1374
1375	end_synchronized_op(sc, 0);
1376
1377	return (0);
1378}
1379
1380static void
1381cxgbe_init(void *arg)
1382{
1383	struct vi_info *vi = arg;
1384	struct adapter *sc = vi->pi->adapter;
1385
1386	if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0)
1387		return;
1388	cxgbe_init_synchronized(vi);
1389	end_synchronized_op(sc, 0);
1390}
1391
1392static int
1393cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
1394{
1395	int rc = 0, mtu, flags, can_sleep;
1396	struct vi_info *vi = ifp->if_softc;
1397	struct adapter *sc = vi->pi->adapter;
1398	struct ifreq *ifr = (struct ifreq *)data;
1399	uint32_t mask;
1400
1401	switch (cmd) {
1402	case SIOCSIFMTU:
1403		mtu = ifr->ifr_mtu;
1404		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO))
1405			return (EINVAL);
1406
1407		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu");
1408		if (rc)
1409			return (rc);
1410		ifp->if_mtu = mtu;
1411		if (vi->flags & VI_INIT_DONE) {
1412			t4_update_fl_bufsize(ifp);
1413			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1414				rc = update_mac_settings(ifp, XGMAC_MTU);
1415		}
1416		end_synchronized_op(sc, 0);
1417		break;
1418
1419	case SIOCSIFFLAGS:
1420		can_sleep = 0;
1421redo_sifflags:
1422		rc = begin_synchronized_op(sc, vi,
1423		    can_sleep ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4flg");
1424		if (rc)
1425			return (rc);
1426
1427		if (ifp->if_flags & IFF_UP) {
1428			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1429				flags = vi->if_flags;
1430				if ((ifp->if_flags ^ flags) &
1431				    (IFF_PROMISC | IFF_ALLMULTI)) {
1432					if (can_sleep == 1) {
1433						end_synchronized_op(sc, 0);
1434						can_sleep = 0;
1435						goto redo_sifflags;
1436					}
1437					rc = update_mac_settings(ifp,
1438					    XGMAC_PROMISC | XGMAC_ALLMULTI);
1439				}
1440			} else {
1441				if (can_sleep == 0) {
1442					end_synchronized_op(sc, LOCK_HELD);
1443					can_sleep = 1;
1444					goto redo_sifflags;
1445				}
1446				rc = cxgbe_init_synchronized(vi);
1447			}
1448			vi->if_flags = ifp->if_flags;
1449		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1450			if (can_sleep == 0) {
1451				end_synchronized_op(sc, LOCK_HELD);
1452				can_sleep = 1;
1453				goto redo_sifflags;
1454			}
1455			rc = cxgbe_uninit_synchronized(vi);
1456		}
1457		end_synchronized_op(sc, can_sleep ? 0 : LOCK_HELD);
1458		break;
1459
1460	case SIOCADDMULTI:
1461	case SIOCDELMULTI: /* these two are called with a mutex held :-( */
1462		rc = begin_synchronized_op(sc, vi, HOLD_LOCK, "t4multi");
1463		if (rc)
1464			return (rc);
1465		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1466			rc = update_mac_settings(ifp, XGMAC_MCADDRS);
1467		end_synchronized_op(sc, LOCK_HELD);
1468		break;
1469
1470	case SIOCSIFCAP:
1471		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap");
1472		if (rc)
1473			return (rc);
1474
1475		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1476		if (mask & IFCAP_TXCSUM) {
1477			ifp->if_capenable ^= IFCAP_TXCSUM;
1478			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1479
1480			if (IFCAP_TSO4 & ifp->if_capenable &&
1481			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1482				ifp->if_capenable &= ~IFCAP_TSO4;
1483				if_printf(ifp,
1484				    "tso4 disabled due to -txcsum.\n");
1485			}
1486		}
1487		if (mask & IFCAP_TXCSUM_IPV6) {
1488			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1489			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1490
1491			if (IFCAP_TSO6 & ifp->if_capenable &&
1492			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1493				ifp->if_capenable &= ~IFCAP_TSO6;
1494				if_printf(ifp,
1495				    "tso6 disabled due to -txcsum6.\n");
1496			}
1497		}
1498		if (mask & IFCAP_RXCSUM)
1499			ifp->if_capenable ^= IFCAP_RXCSUM;
1500		if (mask & IFCAP_RXCSUM_IPV6)
1501			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1502
1503		/*
1504		 * Note that we leave CSUM_TSO alone (it is always set).  The
1505		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1506		 * sending a TSO request our way, so it's sufficient to toggle
1507		 * IFCAP_TSOx only.
1508		 */
1509		if (mask & IFCAP_TSO4) {
1510			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1511			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1512				if_printf(ifp, "enable txcsum first.\n");
1513				rc = EAGAIN;
1514				goto fail;
1515			}
1516			ifp->if_capenable ^= IFCAP_TSO4;
1517		}
1518		if (mask & IFCAP_TSO6) {
1519			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
1520			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1521				if_printf(ifp, "enable txcsum6 first.\n");
1522				rc = EAGAIN;
1523				goto fail;
1524			}
1525			ifp->if_capenable ^= IFCAP_TSO6;
1526		}
1527		if (mask & IFCAP_LRO) {
1528#if defined(INET) || defined(INET6)
1529			int i;
1530			struct sge_rxq *rxq;
1531
1532			ifp->if_capenable ^= IFCAP_LRO;
1533			for_each_rxq(vi, i, rxq) {
1534				if (ifp->if_capenable & IFCAP_LRO)
1535					rxq->iq.flags |= IQ_LRO_ENABLED;
1536				else
1537					rxq->iq.flags &= ~IQ_LRO_ENABLED;
1538			}
1539#endif
1540		}
1541#ifdef TCP_OFFLOAD
1542		if (mask & IFCAP_TOE) {
1543			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE;
1544
1545			rc = toe_capability(vi, enable);
1546			if (rc != 0)
1547				goto fail;
1548
1549			ifp->if_capenable ^= mask;
1550		}
1551#endif
1552		if (mask & IFCAP_VLAN_HWTAGGING) {
1553			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1554			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1555				rc = update_mac_settings(ifp, XGMAC_VLANEX);
1556		}
1557		if (mask & IFCAP_VLAN_MTU) {
1558			ifp->if_capenable ^= IFCAP_VLAN_MTU;
1559
1560			/* Need to find out how to disable auto-mtu-inflation */
1561		}
1562		if (mask & IFCAP_VLAN_HWTSO)
1563			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1564		if (mask & IFCAP_VLAN_HWCSUM)
1565			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
1566
1567#ifdef VLAN_CAPABILITIES
1568		VLAN_CAPABILITIES(ifp);
1569#endif
1570fail:
1571		end_synchronized_op(sc, 0);
1572		break;
1573
1574	case SIOCSIFMEDIA:
1575	case SIOCGIFMEDIA:
1576		ifmedia_ioctl(ifp, ifr, &vi->media, cmd);
1577		break;
1578
1579	case SIOCGI2C: {
1580		struct ifi2creq i2c;
1581
1582		rc = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
1583		if (rc != 0)
1584			break;
1585		if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
1586			rc = EPERM;
1587			break;
1588		}
1589		if (i2c.len > sizeof(i2c.data)) {
1590			rc = EINVAL;
1591			break;
1592		}
1593		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c");
1594		if (rc)
1595			return (rc);
1596		rc = -t4_i2c_rd(sc, sc->mbox, vi->pi->port_id, i2c.dev_addr,
1597		    i2c.offset, i2c.len, &i2c.data[0]);
1598		end_synchronized_op(sc, 0);
1599		if (rc == 0)
1600			rc = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
1601		break;
1602	}
1603
1604	default:
1605		rc = ether_ioctl(ifp, cmd, data);
1606	}
1607
1608	return (rc);
1609}
1610
1611static int
1612cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
1613{
1614	struct vi_info *vi = ifp->if_softc;
1615	struct port_info *pi = vi->pi;
1616	struct adapter *sc = pi->adapter;
1617	struct sge_txq *txq;
1618	void *items[1];
1619	int rc;
1620
1621	M_ASSERTPKTHDR(m);
1622	MPASS(m->m_nextpkt == NULL);	/* not quite ready for this yet */
1623
1624	if (__predict_false(pi->link_cfg.link_ok == 0)) {
1625		m_freem(m);
1626		return (ENETDOWN);
1627	}
1628
1629	rc = parse_pkt(&m);
1630	if (__predict_false(rc != 0)) {
1631		MPASS(m == NULL);			/* was freed already */
1632		atomic_add_int(&pi->tx_parse_error, 1);	/* rare, atomic is ok */
1633		return (rc);
1634	}
1635
1636	/* Select a txq. */
1637	txq = &sc->sge.txq[vi->first_txq];
1638	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1639		txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) +
1640		    vi->rsrv_noflowq);
1641
1642	items[0] = m;
1643	rc = mp_ring_enqueue(txq->r, items, 1, 4096);
1644	if (__predict_false(rc != 0))
1645		m_freem(m);
1646
1647	return (rc);
1648}
1649
1650static void
1651cxgbe_qflush(struct ifnet *ifp)
1652{
1653	struct vi_info *vi = ifp->if_softc;
1654	struct sge_txq *txq;
1655	int i;
1656
1657	/* queues do not exist if !VI_INIT_DONE. */
1658	if (vi->flags & VI_INIT_DONE) {
1659		for_each_txq(vi, i, txq) {
1660			TXQ_LOCK(txq);
1661			txq->eq.flags &= ~EQ_ENABLED;
1662			TXQ_UNLOCK(txq);
1663			while (!mp_ring_is_idle(txq->r)) {
1664				mp_ring_check_drainage(txq->r, 0);
1665				pause("qflush", 1);
1666			}
1667		}
1668	}
1669	if_qflush(ifp);
1670}
1671
1672static int
1673cxgbe_media_change(struct ifnet *ifp)
1674{
1675	struct vi_info *vi = ifp->if_softc;
1676
1677	device_printf(vi->dev, "%s unimplemented.\n", __func__);
1678
1679	return (EOPNOTSUPP);
1680}
1681
1682static void
1683cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1684{
1685	struct vi_info *vi = ifp->if_softc;
1686	struct port_info *pi = vi->pi;
1687	struct ifmedia_entry *cur;
1688	int speed = pi->link_cfg.speed;
1689
1690	cur = vi->media.ifm_cur;
1691
1692	ifmr->ifm_status = IFM_AVALID;
1693	if (!pi->link_cfg.link_ok)
1694		return;
1695
1696	ifmr->ifm_status |= IFM_ACTIVE;
1697
1698	/* active and current will differ iff current media is autoselect. */
1699	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
1700		return;
1701
1702	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
1703	if (speed == 10000)
1704		ifmr->ifm_active |= IFM_10G_T;
1705	else if (speed == 1000)
1706		ifmr->ifm_active |= IFM_1000_T;
1707	else if (speed == 100)
1708		ifmr->ifm_active |= IFM_100_TX;
1709	else if (speed == 10)
1710		ifmr->ifm_active |= IFM_10_T;
1711	else
1712		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
1713			    speed));
1714}
1715
1716static int
1717vcxgbe_probe(device_t dev)
1718{
1719	char buf[128];
1720	struct vi_info *vi = device_get_softc(dev);
1721
1722	snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id,
1723	    vi - vi->pi->vi);
1724	device_set_desc_copy(dev, buf);
1725
1726	return (BUS_PROBE_DEFAULT);
1727}
1728
1729static int
1730vcxgbe_attach(device_t dev)
1731{
1732	struct vi_info *vi;
1733	struct port_info *pi;
1734	struct adapter *sc;
1735	int func, index, rc;
1736	u32 param, val;
1737
1738	vi = device_get_softc(dev);
1739	pi = vi->pi;
1740	sc = pi->adapter;
1741
1742	index = vi - pi->vi;
1743	KASSERT(index < nitems(vi_mac_funcs),
1744	    ("%s: VI %s doesn't have a MAC func", __func__,
1745	    device_get_nameunit(dev)));
1746	func = vi_mac_funcs[index];
1747	rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1,
1748	    vi->hw_addr, &vi->rss_size, func, 0);
1749	if (rc < 0) {
1750		device_printf(dev, "Failed to allocate virtual interface "
1751		    "for port %d: %d\n", pi->port_id, -rc);
1752		return (-rc);
1753	}
1754	vi->viid = rc;
1755
1756	param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
1757	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) |
1758	    V_FW_PARAMS_PARAM_YZ(vi->viid);
1759	rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
1760	if (rc)
1761		vi->rss_base = 0xffff;
1762	else {
1763		/* MPASS((val >> 16) == rss_size); */
1764		vi->rss_base = val & 0xffff;
1765	}
1766
1767	rc = cxgbe_vi_attach(dev, vi);
1768	if (rc) {
1769		t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
1770		return (rc);
1771	}
1772	return (0);
1773}
1774
1775static int
1776vcxgbe_detach(device_t dev)
1777{
1778	struct vi_info *vi;
1779	struct adapter *sc;
1780
1781	vi = device_get_softc(dev);
1782	sc = vi->pi->adapter;
1783
1784	doom_vi(sc, vi);
1785
1786	cxgbe_vi_detach(vi);
1787	t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
1788
1789	end_synchronized_op(sc, 0);
1790
1791	return (0);
1792}
1793
1794void
1795t4_fatal_err(struct adapter *sc)
1796{
1797	t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0);
1798	t4_intr_disable(sc);
1799	log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n",
1800	    device_get_nameunit(sc->dev));
1801}
1802
1803static int
1804map_bars_0_and_4(struct adapter *sc)
1805{
1806	sc->regs_rid = PCIR_BAR(0);
1807	sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
1808	    &sc->regs_rid, RF_ACTIVE);
1809	if (sc->regs_res == NULL) {
1810		device_printf(sc->dev, "cannot map registers.\n");
1811		return (ENXIO);
1812	}
1813	sc->bt = rman_get_bustag(sc->regs_res);
1814	sc->bh = rman_get_bushandle(sc->regs_res);
1815	sc->mmio_len = rman_get_size(sc->regs_res);
1816	setbit(&sc->doorbells, DOORBELL_KDB);
1817
1818	sc->msix_rid = PCIR_BAR(4);
1819	sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
1820	    &sc->msix_rid, RF_ACTIVE);
1821	if (sc->msix_res == NULL) {
1822		device_printf(sc->dev, "cannot map MSI-X BAR.\n");
1823		return (ENXIO);
1824	}
1825
1826	return (0);
1827}
1828
1829static int
1830map_bar_2(struct adapter *sc)
1831{
1832
1833	/*
1834	 * T4: only iWARP driver uses the userspace doorbells.  There is no need
1835	 * to map it if RDMA is disabled.
1836	 */
1837	if (is_t4(sc) && sc->rdmacaps == 0)
1838		return (0);
1839
1840	sc->udbs_rid = PCIR_BAR(2);
1841	sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
1842	    &sc->udbs_rid, RF_ACTIVE);
1843	if (sc->udbs_res == NULL) {
1844		device_printf(sc->dev, "cannot map doorbell BAR.\n");
1845		return (ENXIO);
1846	}
1847	sc->udbs_base = rman_get_virtual(sc->udbs_res);
1848
1849	if (is_t5(sc)) {
1850		setbit(&sc->doorbells, DOORBELL_UDB);
1851#if defined(__i386__) || defined(__amd64__)
1852		if (t5_write_combine) {
1853			int rc;
1854
1855			/*
1856			 * Enable write combining on BAR2.  This is the
1857			 * userspace doorbell BAR and is split into 128B
1858			 * (UDBS_SEG_SIZE) doorbell regions, each associated
1859			 * with an egress queue.  The first 64B has the doorbell
1860			 * and the second 64B can be used to submit a tx work
1861			 * request with an implicit doorbell.
1862			 */
1863
1864			rc = pmap_change_attr((vm_offset_t)sc->udbs_base,
1865			    rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING);
1866			if (rc == 0) {
1867				clrbit(&sc->doorbells, DOORBELL_UDB);
1868				setbit(&sc->doorbells, DOORBELL_WCWR);
1869				setbit(&sc->doorbells, DOORBELL_UDBWC);
1870			} else {
1871				device_printf(sc->dev,
1872				    "couldn't enable write combining: %d\n",
1873				    rc);
1874			}
1875
1876			t4_write_reg(sc, A_SGE_STAT_CFG,
1877			    V_STATSOURCE_T5(7) | V_STATMODE(0));
1878		}
1879#endif
1880	}
1881
1882	return (0);
1883}
1884
1885struct memwin_init {
1886	uint32_t base;
1887	uint32_t aperture;
1888};
1889
1890static const struct memwin_init t4_memwin[NUM_MEMWIN] = {
1891	{ MEMWIN0_BASE, MEMWIN0_APERTURE },
1892	{ MEMWIN1_BASE, MEMWIN1_APERTURE },
1893	{ MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 }
1894};
1895
1896static const struct memwin_init t5_memwin[NUM_MEMWIN] = {
1897	{ MEMWIN0_BASE, MEMWIN0_APERTURE },
1898	{ MEMWIN1_BASE, MEMWIN1_APERTURE },
1899	{ MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 },
1900};
1901
1902static void
1903setup_memwin(struct adapter *sc)
1904{
1905	const struct memwin_init *mw_init;
1906	struct memwin *mw;
1907	int i;
1908	uint32_t bar0;
1909
1910	if (is_t4(sc)) {
1911		/*
1912		 * Read low 32b of bar0 indirectly via the hardware backdoor
1913		 * mechanism.  Works from within PCI passthrough environments
1914		 * too, where rman_get_start() can return a different value.  We
1915		 * need to program the T4 memory window decoders with the actual
1916		 * addresses that will be coming across the PCIe link.
1917		 */
1918		bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0));
1919		bar0 &= (uint32_t) PCIM_BAR_MEM_BASE;
1920
1921		mw_init = &t4_memwin[0];
1922	} else {
1923		/* T5+ use the relative offset inside the PCIe BAR */
1924		bar0 = 0;
1925
1926		mw_init = &t5_memwin[0];
1927	}
1928
1929	for (i = 0, mw = &sc->memwin[0]; i < NUM_MEMWIN; i++, mw_init++, mw++) {
1930		rw_init(&mw->mw_lock, "memory window access");
1931		mw->mw_base = mw_init->base;
1932		mw->mw_aperture = mw_init->aperture;
1933		mw->mw_curpos = 0;
1934		t4_write_reg(sc,
1935		    PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i),
1936		    (mw->mw_base + bar0) | V_BIR(0) |
1937		    V_WINDOW(ilog2(mw->mw_aperture) - 10));
1938		rw_wlock(&mw->mw_lock);
1939		position_memwin(sc, i, 0);
1940		rw_wunlock(&mw->mw_lock);
1941	}
1942
1943	/* flush */
1944	t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2));
1945}
1946
1947/*
1948 * Positions the memory window at the given address in the card's address space.
1949 * There are some alignment requirements and the actual position may be at an
1950 * address prior to the requested address.  mw->mw_curpos always has the actual
1951 * position of the window.
1952 */
1953static void
1954position_memwin(struct adapter *sc, int idx, uint32_t addr)
1955{
1956	struct memwin *mw;
1957	uint32_t pf;
1958	uint32_t reg;
1959
1960	MPASS(idx >= 0 && idx < NUM_MEMWIN);
1961	mw = &sc->memwin[idx];
1962	rw_assert(&mw->mw_lock, RA_WLOCKED);
1963
1964	if (is_t4(sc)) {
1965		pf = 0;
1966		mw->mw_curpos = addr & ~0xf;	/* start must be 16B aligned */
1967	} else {
1968		pf = V_PFNUM(sc->pf);
1969		mw->mw_curpos = addr & ~0x7f;	/* start must be 128B aligned */
1970	}
1971	reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, idx);
1972	t4_write_reg(sc, reg, mw->mw_curpos | pf);
1973	t4_read_reg(sc, reg);	/* flush */
1974}
1975
1976static int
1977rw_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val,
1978    int len, int rw)
1979{
1980	struct memwin *mw;
1981	uint32_t mw_end, v;
1982
1983	MPASS(idx >= 0 && idx < NUM_MEMWIN);
1984
1985	/* Memory can only be accessed in naturally aligned 4 byte units */
1986	if (addr & 3 || len & 3 || len <= 0)
1987		return (EINVAL);
1988
1989	mw = &sc->memwin[idx];
1990	while (len > 0) {
1991		rw_rlock(&mw->mw_lock);
1992		mw_end = mw->mw_curpos + mw->mw_aperture;
1993		if (addr >= mw_end || addr < mw->mw_curpos) {
1994			/* Will need to reposition the window */
1995			if (!rw_try_upgrade(&mw->mw_lock)) {
1996				rw_runlock(&mw->mw_lock);
1997				rw_wlock(&mw->mw_lock);
1998			}
1999			rw_assert(&mw->mw_lock, RA_WLOCKED);
2000			position_memwin(sc, idx, addr);
2001			rw_downgrade(&mw->mw_lock);
2002			mw_end = mw->mw_curpos + mw->mw_aperture;
2003		}
2004		rw_assert(&mw->mw_lock, RA_RLOCKED);
2005		while (addr < mw_end && len > 0) {
2006			if (rw == 0) {
2007				v = t4_read_reg(sc, mw->mw_base + addr -
2008				    mw->mw_curpos);
2009				*val++ = le32toh(v);
2010			} else {
2011				v = *val++;
2012				t4_write_reg(sc, mw->mw_base + addr -
2013				    mw->mw_curpos, htole32(v));;
2014			}
2015			addr += 4;
2016			len -= 4;
2017		}
2018		rw_runlock(&mw->mw_lock);
2019	}
2020
2021	return (0);
2022}
2023
2024static inline int
2025read_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val,
2026    int len)
2027{
2028
2029	return (rw_via_memwin(sc, idx, addr, val, len, 0));
2030}
2031
2032static inline int
2033write_via_memwin(struct adapter *sc, int idx, uint32_t addr,
2034    const uint32_t *val, int len)
2035{
2036
2037	return (rw_via_memwin(sc, idx, addr, (void *)(uintptr_t)val, len, 1));
2038}
2039
2040static int
2041t4_range_cmp(const void *a, const void *b)
2042{
2043	return ((const struct t4_range *)a)->start -
2044	       ((const struct t4_range *)b)->start;
2045}
2046
2047/*
2048 * Verify that the memory range specified by the addr/len pair is valid within
2049 * the card's address space.
2050 */
2051static int
2052validate_mem_range(struct adapter *sc, uint32_t addr, int len)
2053{
2054	struct t4_range mem_ranges[4], *r, *next;
2055	uint32_t em, addr_len;
2056	int i, n, remaining;
2057
2058	/* Memory can only be accessed in naturally aligned 4 byte units */
2059	if (addr & 3 || len & 3 || len <= 0)
2060		return (EINVAL);
2061
2062	/* Enabled memories */
2063	em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
2064
2065	r = &mem_ranges[0];
2066	n = 0;
2067	bzero(r, sizeof(mem_ranges));
2068	if (em & F_EDRAM0_ENABLE) {
2069		addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
2070		r->size = G_EDRAM0_SIZE(addr_len) << 20;
2071		if (r->size > 0) {
2072			r->start = G_EDRAM0_BASE(addr_len) << 20;
2073			if (addr >= r->start &&
2074			    addr + len <= r->start + r->size)
2075				return (0);
2076			r++;
2077			n++;
2078		}
2079	}
2080	if (em & F_EDRAM1_ENABLE) {
2081		addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
2082		r->size = G_EDRAM1_SIZE(addr_len) << 20;
2083		if (r->size > 0) {
2084			r->start = G_EDRAM1_BASE(addr_len) << 20;
2085			if (addr >= r->start &&
2086			    addr + len <= r->start + r->size)
2087				return (0);
2088			r++;
2089			n++;
2090		}
2091	}
2092	if (em & F_EXT_MEM_ENABLE) {
2093		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
2094		r->size = G_EXT_MEM_SIZE(addr_len) << 20;
2095		if (r->size > 0) {
2096			r->start = G_EXT_MEM_BASE(addr_len) << 20;
2097			if (addr >= r->start &&
2098			    addr + len <= r->start + r->size)
2099				return (0);
2100			r++;
2101			n++;
2102		}
2103	}
2104	if (is_t5(sc) && em & F_EXT_MEM1_ENABLE) {
2105		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
2106		r->size = G_EXT_MEM1_SIZE(addr_len) << 20;
2107		if (r->size > 0) {
2108			r->start = G_EXT_MEM1_BASE(addr_len) << 20;
2109			if (addr >= r->start &&
2110			    addr + len <= r->start + r->size)
2111				return (0);
2112			r++;
2113			n++;
2114		}
2115	}
2116	MPASS(n <= nitems(mem_ranges));
2117
2118	if (n > 1) {
2119		/* Sort and merge the ranges. */
2120		qsort(mem_ranges, n, sizeof(struct t4_range), t4_range_cmp);
2121
2122		/* Start from index 0 and examine the next n - 1 entries. */
2123		r = &mem_ranges[0];
2124		for (remaining = n - 1; remaining > 0; remaining--, r++) {
2125
2126			MPASS(r->size > 0);	/* r is a valid entry. */
2127			next = r + 1;
2128			MPASS(next->size > 0);	/* and so is the next one. */
2129
2130			while (r->start + r->size >= next->start) {
2131				/* Merge the next one into the current entry. */
2132				r->size = max(r->start + r->size,
2133				    next->start + next->size) - r->start;
2134				n--;	/* One fewer entry in total. */
2135				if (--remaining == 0)
2136					goto done;	/* short circuit */
2137				next++;
2138			}
2139			if (next != r + 1) {
2140				/*
2141				 * Some entries were merged into r and next
2142				 * points to the first valid entry that couldn't
2143				 * be merged.
2144				 */
2145				MPASS(next->size > 0);	/* must be valid */
2146				memcpy(r + 1, next, remaining * sizeof(*r));
2147#ifdef INVARIANTS
2148				/*
2149				 * This so that the foo->size assertion in the
2150				 * next iteration of the loop do the right
2151				 * thing for entries that were pulled up and are
2152				 * no longer valid.
2153				 */
2154				MPASS(n < nitems(mem_ranges));
2155				bzero(&mem_ranges[n], (nitems(mem_ranges) - n) *
2156				    sizeof(struct t4_range));
2157#endif
2158			}
2159		}
2160done:
2161		/* Done merging the ranges. */
2162		MPASS(n > 0);
2163		r = &mem_ranges[0];
2164		for (i = 0; i < n; i++, r++) {
2165			if (addr >= r->start &&
2166			    addr + len <= r->start + r->size)
2167				return (0);
2168		}
2169	}
2170
2171	return (EFAULT);
2172}
2173
2174static int
2175fwmtype_to_hwmtype(int mtype)
2176{
2177
2178	switch (mtype) {
2179	case FW_MEMTYPE_EDC0:
2180		return (MEM_EDC0);
2181	case FW_MEMTYPE_EDC1:
2182		return (MEM_EDC1);
2183	case FW_MEMTYPE_EXTMEM:
2184		return (MEM_MC0);
2185	case FW_MEMTYPE_EXTMEM1:
2186		return (MEM_MC1);
2187	default:
2188		panic("%s: cannot translate fw mtype %d.", __func__, mtype);
2189	}
2190}
2191
2192/*
2193 * Verify that the memory range specified by the memtype/offset/len pair is
2194 * valid and lies entirely within the memtype specified.  The global address of
2195 * the start of the range is returned in addr.
2196 */
2197static int
2198validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, int len,
2199    uint32_t *addr)
2200{
2201	uint32_t em, addr_len, maddr;
2202
2203	/* Memory can only be accessed in naturally aligned 4 byte units */
2204	if (off & 3 || len & 3 || len == 0)
2205		return (EINVAL);
2206
2207	em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
2208	switch (fwmtype_to_hwmtype(mtype)) {
2209	case MEM_EDC0:
2210		if (!(em & F_EDRAM0_ENABLE))
2211			return (EINVAL);
2212		addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
2213		maddr = G_EDRAM0_BASE(addr_len) << 20;
2214		break;
2215	case MEM_EDC1:
2216		if (!(em & F_EDRAM1_ENABLE))
2217			return (EINVAL);
2218		addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
2219		maddr = G_EDRAM1_BASE(addr_len) << 20;
2220		break;
2221	case MEM_MC:
2222		if (!(em & F_EXT_MEM_ENABLE))
2223			return (EINVAL);
2224		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
2225		maddr = G_EXT_MEM_BASE(addr_len) << 20;
2226		break;
2227	case MEM_MC1:
2228		if (!is_t5(sc) || !(em & F_EXT_MEM1_ENABLE))
2229			return (EINVAL);
2230		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
2231		maddr = G_EXT_MEM1_BASE(addr_len) << 20;
2232		break;
2233	default:
2234		return (EINVAL);
2235	}
2236
2237	*addr = maddr + off;	/* global address */
2238	return (validate_mem_range(sc, *addr, len));
2239}
2240
2241static int
2242fixup_devlog_params(struct adapter *sc)
2243{
2244	struct devlog_params *dparams = &sc->params.devlog;
2245	int rc;
2246
2247	rc = validate_mt_off_len(sc, dparams->memtype, dparams->start,
2248	    dparams->size, &dparams->addr);
2249
2250	return (rc);
2251}
2252
2253static int
2254cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g, int num_vis,
2255    struct intrs_and_queues *iaq)
2256{
2257	int rc, itype, navail, nrxq10g, nrxq1g, n;
2258	int nofldrxq10g = 0, nofldrxq1g = 0;
2259
2260	bzero(iaq, sizeof(*iaq));
2261
2262	iaq->ntxq10g = t4_ntxq10g;
2263	iaq->ntxq1g = t4_ntxq1g;
2264	iaq->ntxq_vi = t4_ntxq_vi;
2265	iaq->nrxq10g = nrxq10g = t4_nrxq10g;
2266	iaq->nrxq1g = nrxq1g = t4_nrxq1g;
2267	iaq->nrxq_vi = t4_nrxq_vi;
2268	iaq->rsrv_noflowq = t4_rsrv_noflowq;
2269#ifdef TCP_OFFLOAD
2270	if (is_offload(sc)) {
2271		iaq->nofldtxq10g = t4_nofldtxq10g;
2272		iaq->nofldtxq1g = t4_nofldtxq1g;
2273		iaq->nofldtxq_vi = t4_nofldtxq_vi;
2274		iaq->nofldrxq10g = nofldrxq10g = t4_nofldrxq10g;
2275		iaq->nofldrxq1g = nofldrxq1g = t4_nofldrxq1g;
2276		iaq->nofldrxq_vi = t4_nofldrxq_vi;
2277	}
2278#endif
2279#ifdef DEV_NETMAP
2280	iaq->nnmtxq_vi = t4_nnmtxq_vi;
2281	iaq->nnmrxq_vi = t4_nnmrxq_vi;
2282#endif
2283
2284	for (itype = INTR_MSIX; itype; itype >>= 1) {
2285
2286		if ((itype & t4_intr_types) == 0)
2287			continue;	/* not allowed */
2288
2289		if (itype == INTR_MSIX)
2290			navail = pci_msix_count(sc->dev);
2291		else if (itype == INTR_MSI)
2292			navail = pci_msi_count(sc->dev);
2293		else
2294			navail = 1;
2295restart:
2296		if (navail == 0)
2297			continue;
2298
2299		iaq->intr_type = itype;
2300		iaq->intr_flags_10g = 0;
2301		iaq->intr_flags_1g = 0;
2302
2303		/*
2304		 * Best option: an interrupt vector for errors, one for the
2305		 * firmware event queue, and one for every rxq (NIC and TOE) of
2306		 * every VI.  The VIs that support netmap use the same
2307		 * interrupts for the NIC rx queues and the netmap rx queues
2308		 * because only one set of queues is active at a time.
2309		 */
2310		iaq->nirq = T4_EXTRA_INTR;
2311		iaq->nirq += n10g * (nrxq10g + nofldrxq10g);
2312		iaq->nirq += n1g * (nrxq1g + nofldrxq1g);
2313		iaq->nirq += (n10g + n1g) * (num_vis - 1) *
2314		    max(iaq->nrxq_vi, iaq->nnmrxq_vi);	/* See comment above. */
2315		iaq->nirq += (n10g + n1g) * (num_vis - 1) * iaq->nofldrxq_vi;
2316		if (iaq->nirq <= navail &&
2317		    (itype != INTR_MSI || powerof2(iaq->nirq))) {
2318			iaq->intr_flags_10g = INTR_ALL;
2319			iaq->intr_flags_1g = INTR_ALL;
2320			goto allocate;
2321		}
2322
2323		/* Disable the VIs (and netmap) if there aren't enough intrs */
2324		if (num_vis > 1) {
2325			device_printf(sc->dev, "virtual interfaces disabled "
2326			    "because num_vis=%u with current settings "
2327			    "(nrxq10g=%u, nrxq1g=%u, nofldrxq10g=%u, "
2328			    "nofldrxq1g=%u, nrxq_vi=%u nofldrxq_vi=%u, "
2329			    "nnmrxq_vi=%u) would need %u interrupts but "
2330			    "only %u are available.\n", num_vis, nrxq10g,
2331			    nrxq1g, nofldrxq10g, nofldrxq1g, iaq->nrxq_vi,
2332			    iaq->nofldrxq_vi, iaq->nnmrxq_vi, iaq->nirq,
2333			    navail);
2334			num_vis = 1;
2335			iaq->ntxq_vi = iaq->nrxq_vi = 0;
2336			iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0;
2337			iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0;
2338			goto restart;
2339		}
2340
2341		/*
2342		 * Second best option: a vector for errors, one for the firmware
2343		 * event queue, and vectors for either all the NIC rx queues or
2344		 * all the TOE rx queues.  The queues that don't get vectors
2345		 * will forward their interrupts to those that do.
2346		 */
2347		iaq->nirq = T4_EXTRA_INTR;
2348		if (nrxq10g >= nofldrxq10g) {
2349			iaq->intr_flags_10g = INTR_RXQ;
2350			iaq->nirq += n10g * nrxq10g;
2351		} else {
2352			iaq->intr_flags_10g = INTR_OFLD_RXQ;
2353			iaq->nirq += n10g * nofldrxq10g;
2354		}
2355		if (nrxq1g >= nofldrxq1g) {
2356			iaq->intr_flags_1g = INTR_RXQ;
2357			iaq->nirq += n1g * nrxq1g;
2358		} else {
2359			iaq->intr_flags_1g = INTR_OFLD_RXQ;
2360			iaq->nirq += n1g * nofldrxq1g;
2361		}
2362		if (iaq->nirq <= navail &&
2363		    (itype != INTR_MSI || powerof2(iaq->nirq)))
2364			goto allocate;
2365
2366		/*
2367		 * Next best option: an interrupt vector for errors, one for the
2368		 * firmware event queue, and at least one per main-VI.  At this
2369		 * point we know we'll have to downsize nrxq and/or nofldrxq to
2370		 * fit what's available to us.
2371		 */
2372		iaq->nirq = T4_EXTRA_INTR;
2373		iaq->nirq += n10g + n1g;
2374		if (iaq->nirq <= navail) {
2375			int leftover = navail - iaq->nirq;
2376
2377			if (n10g > 0) {
2378				int target = max(nrxq10g, nofldrxq10g);
2379
2380				iaq->intr_flags_10g = nrxq10g >= nofldrxq10g ?
2381				    INTR_RXQ : INTR_OFLD_RXQ;
2382
2383				n = 1;
2384				while (n < target && leftover >= n10g) {
2385					leftover -= n10g;
2386					iaq->nirq += n10g;
2387					n++;
2388				}
2389				iaq->nrxq10g = min(n, nrxq10g);
2390#ifdef TCP_OFFLOAD
2391				iaq->nofldrxq10g = min(n, nofldrxq10g);
2392#endif
2393			}
2394
2395			if (n1g > 0) {
2396				int target = max(nrxq1g, nofldrxq1g);
2397
2398				iaq->intr_flags_1g = nrxq1g >= nofldrxq1g ?
2399				    INTR_RXQ : INTR_OFLD_RXQ;
2400
2401				n = 1;
2402				while (n < target && leftover >= n1g) {
2403					leftover -= n1g;
2404					iaq->nirq += n1g;
2405					n++;
2406				}
2407				iaq->nrxq1g = min(n, nrxq1g);
2408#ifdef TCP_OFFLOAD
2409				iaq->nofldrxq1g = min(n, nofldrxq1g);
2410#endif
2411			}
2412
2413			if (itype != INTR_MSI || powerof2(iaq->nirq))
2414				goto allocate;
2415		}
2416
2417		/*
2418		 * Least desirable option: one interrupt vector for everything.
2419		 */
2420		iaq->nirq = iaq->nrxq10g = iaq->nrxq1g = 1;
2421		iaq->intr_flags_10g = iaq->intr_flags_1g = 0;
2422#ifdef TCP_OFFLOAD
2423		if (is_offload(sc))
2424			iaq->nofldrxq10g = iaq->nofldrxq1g = 1;
2425#endif
2426allocate:
2427		navail = iaq->nirq;
2428		rc = 0;
2429		if (itype == INTR_MSIX)
2430			rc = pci_alloc_msix(sc->dev, &navail);
2431		else if (itype == INTR_MSI)
2432			rc = pci_alloc_msi(sc->dev, &navail);
2433
2434		if (rc == 0) {
2435			if (navail == iaq->nirq)
2436				return (0);
2437
2438			/*
2439			 * Didn't get the number requested.  Use whatever number
2440			 * the kernel is willing to allocate (it's in navail).
2441			 */
2442			device_printf(sc->dev, "fewer vectors than requested, "
2443			    "type=%d, req=%d, rcvd=%d; will downshift req.\n",
2444			    itype, iaq->nirq, navail);
2445			pci_release_msi(sc->dev);
2446			goto restart;
2447		}
2448
2449		device_printf(sc->dev,
2450		    "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n",
2451		    itype, rc, iaq->nirq, navail);
2452	}
2453
2454	device_printf(sc->dev,
2455	    "failed to find a usable interrupt type.  "
2456	    "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types,
2457	    pci_msix_count(sc->dev), pci_msi_count(sc->dev));
2458
2459	return (ENXIO);
2460}
2461
2462#define FW_VERSION(chip) ( \
2463    V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \
2464    V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \
2465    V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \
2466    V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD))
2467#define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf)
2468
2469struct fw_info {
2470	uint8_t chip;
2471	char *kld_name;
2472	char *fw_mod_name;
2473	struct fw_hdr fw_hdr;	/* XXX: waste of space, need a sparse struct */
2474} fw_info[] = {
2475	{
2476		.chip = CHELSIO_T4,
2477		.kld_name = "t4fw_cfg",
2478		.fw_mod_name = "t4fw",
2479		.fw_hdr = {
2480			.chip = FW_HDR_CHIP_T4,
2481			.fw_ver = htobe32_const(FW_VERSION(T4)),
2482			.intfver_nic = FW_INTFVER(T4, NIC),
2483			.intfver_vnic = FW_INTFVER(T4, VNIC),
2484			.intfver_ofld = FW_INTFVER(T4, OFLD),
2485			.intfver_ri = FW_INTFVER(T4, RI),
2486			.intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU),
2487			.intfver_iscsi = FW_INTFVER(T4, ISCSI),
2488			.intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU),
2489			.intfver_fcoe = FW_INTFVER(T4, FCOE),
2490		},
2491	}, {
2492		.chip = CHELSIO_T5,
2493		.kld_name = "t5fw_cfg",
2494		.fw_mod_name = "t5fw",
2495		.fw_hdr = {
2496			.chip = FW_HDR_CHIP_T5,
2497			.fw_ver = htobe32_const(FW_VERSION(T5)),
2498			.intfver_nic = FW_INTFVER(T5, NIC),
2499			.intfver_vnic = FW_INTFVER(T5, VNIC),
2500			.intfver_ofld = FW_INTFVER(T5, OFLD),
2501			.intfver_ri = FW_INTFVER(T5, RI),
2502			.intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU),
2503			.intfver_iscsi = FW_INTFVER(T5, ISCSI),
2504			.intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU),
2505			.intfver_fcoe = FW_INTFVER(T5, FCOE),
2506		},
2507	}
2508};
2509
2510static struct fw_info *
2511find_fw_info(int chip)
2512{
2513	int i;
2514
2515	for (i = 0; i < nitems(fw_info); i++) {
2516		if (fw_info[i].chip == chip)
2517			return (&fw_info[i]);
2518	}
2519	return (NULL);
2520}
2521
2522/*
2523 * Is the given firmware API compatible with the one the driver was compiled
2524 * with?
2525 */
2526static int
2527fw_compatible(const struct fw_hdr *hdr1, const struct fw_hdr *hdr2)
2528{
2529
2530	/* short circuit if it's the exact same firmware version */
2531	if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver)
2532		return (1);
2533
2534	/*
2535	 * XXX: Is this too conservative?  Perhaps I should limit this to the
2536	 * features that are supported in the driver.
2537	 */
2538#define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x)
2539	if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) &&
2540	    SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) &&
2541	    SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe))
2542		return (1);
2543#undef SAME_INTF
2544
2545	return (0);
2546}
2547
2548/*
2549 * The firmware in the KLD is usable, but should it be installed?  This routine
2550 * explains itself in detail if it indicates the KLD firmware should be
2551 * installed.
2552 */
2553static int
2554should_install_kld_fw(struct adapter *sc, int card_fw_usable, int k, int c)
2555{
2556	const char *reason;
2557
2558	if (!card_fw_usable) {
2559		reason = "incompatible or unusable";
2560		goto install;
2561	}
2562
2563	if (k > c) {
2564		reason = "older than the version bundled with this driver";
2565		goto install;
2566	}
2567
2568	if (t4_fw_install == 2 && k != c) {
2569		reason = "different than the version bundled with this driver";
2570		goto install;
2571	}
2572
2573	return (0);
2574
2575install:
2576	if (t4_fw_install == 0) {
2577		device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
2578		    "but the driver is prohibited from installing a different "
2579		    "firmware on the card.\n",
2580		    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
2581		    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason);
2582
2583		return (0);
2584	}
2585
2586	device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
2587	    "installing firmware %u.%u.%u.%u on card.\n",
2588	    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
2589	    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason,
2590	    G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
2591	    G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k));
2592
2593	return (1);
2594}
2595/*
2596 * Establish contact with the firmware and determine if we are the master driver
2597 * or not, and whether we are responsible for chip initialization.
2598 */
2599static int
2600prep_firmware(struct adapter *sc)
2601{
2602	const struct firmware *fw = NULL, *default_cfg;
2603	int rc, pf, card_fw_usable, kld_fw_usable, need_fw_reset = 1;
2604	enum dev_state state;
2605	struct fw_info *fw_info;
2606	struct fw_hdr *card_fw;		/* fw on the card */
2607	const struct fw_hdr *kld_fw;	/* fw in the KLD */
2608	const struct fw_hdr *drv_fw;	/* fw header the driver was compiled
2609					   against */
2610
2611	/* Contact firmware. */
2612	rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state);
2613	if (rc < 0 || state == DEV_STATE_ERR) {
2614		rc = -rc;
2615		device_printf(sc->dev,
2616		    "failed to connect to the firmware: %d, %d.\n", rc, state);
2617		return (rc);
2618	}
2619	pf = rc;
2620	if (pf == sc->mbox)
2621		sc->flags |= MASTER_PF;
2622	else if (state == DEV_STATE_UNINIT) {
2623		/*
2624		 * We didn't get to be the master so we definitely won't be
2625		 * configuring the chip.  It's a bug if someone else hasn't
2626		 * configured it already.
2627		 */
2628		device_printf(sc->dev, "couldn't be master(%d), "
2629		    "device not already initialized either(%d).\n", rc, state);
2630		return (EDOOFUS);
2631	}
2632
2633	/* This is the firmware whose headers the driver was compiled against */
2634	fw_info = find_fw_info(chip_id(sc));
2635	if (fw_info == NULL) {
2636		device_printf(sc->dev,
2637		    "unable to look up firmware information for chip %d.\n",
2638		    chip_id(sc));
2639		return (EINVAL);
2640	}
2641	drv_fw = &fw_info->fw_hdr;
2642
2643	/*
2644	 * The firmware KLD contains many modules.  The KLD name is also the
2645	 * name of the module that contains the default config file.
2646	 */
2647	default_cfg = firmware_get(fw_info->kld_name);
2648
2649	/* Read the header of the firmware on the card */
2650	card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK);
2651	rc = -t4_read_flash(sc, FLASH_FW_START,
2652	    sizeof (*card_fw) / sizeof (uint32_t), (uint32_t *)card_fw, 1);
2653	if (rc == 0)
2654		card_fw_usable = fw_compatible(drv_fw, (const void*)card_fw);
2655	else {
2656		device_printf(sc->dev,
2657		    "Unable to read card's firmware header: %d\n", rc);
2658		card_fw_usable = 0;
2659	}
2660
2661	/* This is the firmware in the KLD */
2662	fw = firmware_get(fw_info->fw_mod_name);
2663	if (fw != NULL) {
2664		kld_fw = (const void *)fw->data;
2665		kld_fw_usable = fw_compatible(drv_fw, kld_fw);
2666	} else {
2667		kld_fw = NULL;
2668		kld_fw_usable = 0;
2669	}
2670
2671	if (card_fw_usable && card_fw->fw_ver == drv_fw->fw_ver &&
2672	    (!kld_fw_usable || kld_fw->fw_ver == drv_fw->fw_ver)) {
2673		/*
2674		 * Common case: the firmware on the card is an exact match and
2675		 * the KLD is an exact match too, or the KLD is
2676		 * absent/incompatible.  Note that t4_fw_install = 2 is ignored
2677		 * here -- use cxgbetool loadfw if you want to reinstall the
2678		 * same firmware as the one on the card.
2679		 */
2680	} else if (kld_fw_usable && state == DEV_STATE_UNINIT &&
2681	    should_install_kld_fw(sc, card_fw_usable, be32toh(kld_fw->fw_ver),
2682	    be32toh(card_fw->fw_ver))) {
2683
2684		rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0);
2685		if (rc != 0) {
2686			device_printf(sc->dev,
2687			    "failed to install firmware: %d\n", rc);
2688			goto done;
2689		}
2690
2691		/* Installed successfully, update the cached header too. */
2692		memcpy(card_fw, kld_fw, sizeof(*card_fw));
2693		card_fw_usable = 1;
2694		need_fw_reset = 0;	/* already reset as part of load_fw */
2695	}
2696
2697	if (!card_fw_usable) {
2698		uint32_t d, c, k;
2699
2700		d = ntohl(drv_fw->fw_ver);
2701		c = ntohl(card_fw->fw_ver);
2702		k = kld_fw ? ntohl(kld_fw->fw_ver) : 0;
2703
2704		device_printf(sc->dev, "Cannot find a usable firmware: "
2705		    "fw_install %d, chip state %d, "
2706		    "driver compiled with %d.%d.%d.%d, "
2707		    "card has %d.%d.%d.%d, KLD has %d.%d.%d.%d\n",
2708		    t4_fw_install, state,
2709		    G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
2710		    G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d),
2711		    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
2712		    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c),
2713		    G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
2714		    G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k));
2715		rc = EINVAL;
2716		goto done;
2717	}
2718
2719	/* We're using whatever's on the card and it's known to be good. */
2720	sc->params.fw_vers = ntohl(card_fw->fw_ver);
2721	snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u",
2722	    G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers),
2723	    G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers),
2724	    G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers),
2725	    G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers));
2726
2727	t4_get_tp_version(sc, &sc->params.tp_vers);
2728	snprintf(sc->tp_version, sizeof(sc->tp_version), "%u.%u.%u.%u",
2729	    G_FW_HDR_FW_VER_MAJOR(sc->params.tp_vers),
2730	    G_FW_HDR_FW_VER_MINOR(sc->params.tp_vers),
2731	    G_FW_HDR_FW_VER_MICRO(sc->params.tp_vers),
2732	    G_FW_HDR_FW_VER_BUILD(sc->params.tp_vers));
2733
2734	if (t4_get_exprom_version(sc, &sc->params.exprom_vers) != 0)
2735		sc->params.exprom_vers = 0;
2736	else {
2737		snprintf(sc->exprom_version, sizeof(sc->exprom_version),
2738		    "%u.%u.%u.%u",
2739		    G_FW_HDR_FW_VER_MAJOR(sc->params.exprom_vers),
2740		    G_FW_HDR_FW_VER_MINOR(sc->params.exprom_vers),
2741		    G_FW_HDR_FW_VER_MICRO(sc->params.exprom_vers),
2742		    G_FW_HDR_FW_VER_BUILD(sc->params.exprom_vers));
2743	}
2744
2745	/* Reset device */
2746	if (need_fw_reset &&
2747	    (rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST)) != 0) {
2748		device_printf(sc->dev, "firmware reset failed: %d.\n", rc);
2749		if (rc != ETIMEDOUT && rc != EIO)
2750			t4_fw_bye(sc, sc->mbox);
2751		goto done;
2752	}
2753	sc->flags |= FW_OK;
2754
2755	rc = get_params__pre_init(sc);
2756	if (rc != 0)
2757		goto done; /* error message displayed already */
2758
2759	/* Partition adapter resources as specified in the config file. */
2760	if (state == DEV_STATE_UNINIT) {
2761
2762		KASSERT(sc->flags & MASTER_PF,
2763		    ("%s: trying to change chip settings when not master.",
2764		    __func__));
2765
2766		rc = partition_resources(sc, default_cfg, fw_info->kld_name);
2767		if (rc != 0)
2768			goto done;	/* error message displayed already */
2769
2770		t4_tweak_chip_settings(sc);
2771
2772		/* get basic stuff going */
2773		rc = -t4_fw_initialize(sc, sc->mbox);
2774		if (rc != 0) {
2775			device_printf(sc->dev, "fw init failed: %d.\n", rc);
2776			goto done;
2777		}
2778	} else {
2779		snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", pf);
2780		sc->cfcsum = 0;
2781	}
2782
2783done:
2784	free(card_fw, M_CXGBE);
2785	if (fw != NULL)
2786		firmware_put(fw, FIRMWARE_UNLOAD);
2787	if (default_cfg != NULL)
2788		firmware_put(default_cfg, FIRMWARE_UNLOAD);
2789
2790	return (rc);
2791}
2792
2793#define FW_PARAM_DEV(param) \
2794	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
2795	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
2796#define FW_PARAM_PFVF(param) \
2797	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
2798	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param))
2799
2800/*
2801 * Partition chip resources for use between various PFs, VFs, etc.
2802 */
2803static int
2804partition_resources(struct adapter *sc, const struct firmware *default_cfg,
2805    const char *name_prefix)
2806{
2807	const struct firmware *cfg = NULL;
2808	int rc = 0;
2809	struct fw_caps_config_cmd caps;
2810	uint32_t mtype, moff, finicsum, cfcsum;
2811
2812	/*
2813	 * Figure out what configuration file to use.  Pick the default config
2814	 * file for the card if the user hasn't specified one explicitly.
2815	 */
2816	snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", t4_cfg_file);
2817	if (strncmp(t4_cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) {
2818		/* Card specific overrides go here. */
2819		if (pci_get_device(sc->dev) == 0x440a)
2820			snprintf(sc->cfg_file, sizeof(sc->cfg_file), UWIRE_CF);
2821		if (is_fpga(sc))
2822			snprintf(sc->cfg_file, sizeof(sc->cfg_file), FPGA_CF);
2823	}
2824
2825	/*
2826	 * We need to load another module if the profile is anything except
2827	 * "default" or "flash".
2828	 */
2829	if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) != 0 &&
2830	    strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) {
2831		char s[32];
2832
2833		snprintf(s, sizeof(s), "%s_%s", name_prefix, sc->cfg_file);
2834		cfg = firmware_get(s);
2835		if (cfg == NULL) {
2836			if (default_cfg != NULL) {
2837				device_printf(sc->dev,
2838				    "unable to load module \"%s\" for "
2839				    "configuration profile \"%s\", will use "
2840				    "the default config file instead.\n",
2841				    s, sc->cfg_file);
2842				snprintf(sc->cfg_file, sizeof(sc->cfg_file),
2843				    "%s", DEFAULT_CF);
2844			} else {
2845				device_printf(sc->dev,
2846				    "unable to load module \"%s\" for "
2847				    "configuration profile \"%s\", will use "
2848				    "the config file on the card's flash "
2849				    "instead.\n", s, sc->cfg_file);
2850				snprintf(sc->cfg_file, sizeof(sc->cfg_file),
2851				    "%s", FLASH_CF);
2852			}
2853		}
2854	}
2855
2856	if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) == 0 &&
2857	    default_cfg == NULL) {
2858		device_printf(sc->dev,
2859		    "default config file not available, will use the config "
2860		    "file on the card's flash instead.\n");
2861		snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF);
2862	}
2863
2864	if (strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) {
2865		u_int cflen;
2866		const uint32_t *cfdata;
2867		uint32_t param, val, addr;
2868
2869		KASSERT(cfg != NULL || default_cfg != NULL,
2870		    ("%s: no config to upload", __func__));
2871
2872		/*
2873		 * Ask the firmware where it wants us to upload the config file.
2874		 */
2875		param = FW_PARAM_DEV(CF);
2876		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
2877		if (rc != 0) {
2878			/* No support for config file?  Shouldn't happen. */
2879			device_printf(sc->dev,
2880			    "failed to query config file location: %d.\n", rc);
2881			goto done;
2882		}
2883		mtype = G_FW_PARAMS_PARAM_Y(val);
2884		moff = G_FW_PARAMS_PARAM_Z(val) << 16;
2885
2886		/*
2887		 * XXX: sheer laziness.  We deliberately added 4 bytes of
2888		 * useless stuffing/comments at the end of the config file so
2889		 * it's ok to simply throw away the last remaining bytes when
2890		 * the config file is not an exact multiple of 4.  This also
2891		 * helps with the validate_mt_off_len check.
2892		 */
2893		if (cfg != NULL) {
2894			cflen = cfg->datasize & ~3;
2895			cfdata = cfg->data;
2896		} else {
2897			cflen = default_cfg->datasize & ~3;
2898			cfdata = default_cfg->data;
2899		}
2900
2901		if (cflen > FLASH_CFG_MAX_SIZE) {
2902			device_printf(sc->dev,
2903			    "config file too long (%d, max allowed is %d).  "
2904			    "Will try to use the config on the card, if any.\n",
2905			    cflen, FLASH_CFG_MAX_SIZE);
2906			goto use_config_on_flash;
2907		}
2908
2909		rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr);
2910		if (rc != 0) {
2911			device_printf(sc->dev,
2912			    "%s: addr (%d/0x%x) or len %d is not valid: %d.  "
2913			    "Will try to use the config on the card, if any.\n",
2914			    __func__, mtype, moff, cflen, rc);
2915			goto use_config_on_flash;
2916		}
2917		write_via_memwin(sc, 2, addr, cfdata, cflen);
2918	} else {
2919use_config_on_flash:
2920		mtype = FW_MEMTYPE_FLASH;
2921		moff = t4_flash_cfg_addr(sc);
2922	}
2923
2924	bzero(&caps, sizeof(caps));
2925	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
2926	    F_FW_CMD_REQUEST | F_FW_CMD_READ);
2927	caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
2928	    V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
2929	    V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) | FW_LEN16(caps));
2930	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
2931	if (rc != 0) {
2932		device_printf(sc->dev,
2933		    "failed to pre-process config file: %d "
2934		    "(mtype %d, moff 0x%x).\n", rc, mtype, moff);
2935		goto done;
2936	}
2937
2938	finicsum = be32toh(caps.finicsum);
2939	cfcsum = be32toh(caps.cfcsum);
2940	if (finicsum != cfcsum) {
2941		device_printf(sc->dev,
2942		    "WARNING: config file checksum mismatch: %08x %08x\n",
2943		    finicsum, cfcsum);
2944	}
2945	sc->cfcsum = cfcsum;
2946
2947#define LIMIT_CAPS(x) do { \
2948	caps.x &= htobe16(t4_##x##_allowed); \
2949} while (0)
2950
2951	/*
2952	 * Let the firmware know what features will (not) be used so it can tune
2953	 * things accordingly.
2954	 */
2955	LIMIT_CAPS(nbmcaps);
2956	LIMIT_CAPS(linkcaps);
2957	LIMIT_CAPS(switchcaps);
2958	LIMIT_CAPS(niccaps);
2959	LIMIT_CAPS(toecaps);
2960	LIMIT_CAPS(rdmacaps);
2961	LIMIT_CAPS(tlscaps);
2962	LIMIT_CAPS(iscsicaps);
2963	LIMIT_CAPS(fcoecaps);
2964#undef LIMIT_CAPS
2965
2966	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
2967	    F_FW_CMD_REQUEST | F_FW_CMD_WRITE);
2968	caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
2969	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL);
2970	if (rc != 0) {
2971		device_printf(sc->dev,
2972		    "failed to process config file: %d.\n", rc);
2973	}
2974done:
2975	if (cfg != NULL)
2976		firmware_put(cfg, FIRMWARE_UNLOAD);
2977	return (rc);
2978}
2979
2980/*
2981 * Retrieve parameters that are needed (or nice to have) very early.
2982 */
2983static int
2984get_params__pre_init(struct adapter *sc)
2985{
2986	int rc;
2987	uint32_t param[2], val[2];
2988
2989	param[0] = FW_PARAM_DEV(PORTVEC);
2990	param[1] = FW_PARAM_DEV(CCLK);
2991	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
2992	if (rc != 0) {
2993		device_printf(sc->dev,
2994		    "failed to query parameters (pre_init): %d.\n", rc);
2995		return (rc);
2996	}
2997
2998	sc->params.portvec = val[0];
2999	sc->params.nports = bitcount32(val[0]);
3000	sc->params.vpd.cclk = val[1];
3001
3002	/* Read device log parameters. */
3003	rc = -t4_init_devlog_params(sc, 1);
3004	if (rc == 0)
3005		fixup_devlog_params(sc);
3006	else {
3007		device_printf(sc->dev,
3008		    "failed to get devlog parameters: %d.\n", rc);
3009		rc = 0;	/* devlog isn't critical for device operation */
3010	}
3011
3012	return (rc);
3013}
3014
3015/*
3016 * Retrieve various parameters that are of interest to the driver.  The device
3017 * has been initialized by the firmware at this point.
3018 */
3019static int
3020get_params__post_init(struct adapter *sc)
3021{
3022	int rc;
3023	uint32_t param[7], val[7];
3024	struct fw_caps_config_cmd caps;
3025
3026	param[0] = FW_PARAM_PFVF(IQFLINT_START);
3027	param[1] = FW_PARAM_PFVF(EQ_START);
3028	param[2] = FW_PARAM_PFVF(FILTER_START);
3029	param[3] = FW_PARAM_PFVF(FILTER_END);
3030	param[4] = FW_PARAM_PFVF(L2T_START);
3031	param[5] = FW_PARAM_PFVF(L2T_END);
3032	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
3033	if (rc != 0) {
3034		device_printf(sc->dev,
3035		    "failed to query parameters (post_init): %d.\n", rc);
3036		return (rc);
3037	}
3038
3039	sc->sge.iq_start = val[0];
3040	sc->sge.eq_start = val[1];
3041	sc->tids.ftid_base = val[2];
3042	sc->tids.nftids = val[3] - val[2] + 1;
3043	sc->params.ftid_min = val[2];
3044	sc->params.ftid_max = val[3];
3045	sc->vres.l2t.start = val[4];
3046	sc->vres.l2t.size = val[5] - val[4] + 1;
3047	KASSERT(sc->vres.l2t.size <= L2T_SIZE,
3048	    ("%s: L2 table size (%u) larger than expected (%u)",
3049	    __func__, sc->vres.l2t.size, L2T_SIZE));
3050
3051	/* get capabilites */
3052	bzero(&caps, sizeof(caps));
3053	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3054	    F_FW_CMD_REQUEST | F_FW_CMD_READ);
3055	caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
3056	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
3057	if (rc != 0) {
3058		device_printf(sc->dev,
3059		    "failed to get card capabilities: %d.\n", rc);
3060		return (rc);
3061	}
3062
3063#define READ_CAPS(x) do { \
3064	sc->x = htobe16(caps.x); \
3065} while (0)
3066	READ_CAPS(nbmcaps);
3067	READ_CAPS(linkcaps);
3068	READ_CAPS(switchcaps);
3069	READ_CAPS(niccaps);
3070	READ_CAPS(toecaps);
3071	READ_CAPS(rdmacaps);
3072	READ_CAPS(tlscaps);
3073	READ_CAPS(iscsicaps);
3074	READ_CAPS(fcoecaps);
3075
3076	if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) {
3077		param[0] = FW_PARAM_PFVF(ETHOFLD_START);
3078		param[1] = FW_PARAM_PFVF(ETHOFLD_END);
3079		param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
3080		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val);
3081		if (rc != 0) {
3082			device_printf(sc->dev,
3083			    "failed to query NIC parameters: %d.\n", rc);
3084			return (rc);
3085		}
3086		sc->tids.etid_base = val[0];
3087		sc->params.etid_min = val[0];
3088		sc->tids.netids = val[1] - val[0] + 1;
3089		sc->params.netids = sc->tids.netids;
3090		sc->params.eo_wr_cred = val[2];
3091		sc->params.ethoffload = 1;
3092	}
3093
3094	if (sc->toecaps) {
3095		/* query offload-related parameters */
3096		param[0] = FW_PARAM_DEV(NTID);
3097		param[1] = FW_PARAM_PFVF(SERVER_START);
3098		param[2] = FW_PARAM_PFVF(SERVER_END);
3099		param[3] = FW_PARAM_PFVF(TDDP_START);
3100		param[4] = FW_PARAM_PFVF(TDDP_END);
3101		param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
3102		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
3103		if (rc != 0) {
3104			device_printf(sc->dev,
3105			    "failed to query TOE parameters: %d.\n", rc);
3106			return (rc);
3107		}
3108		sc->tids.ntids = val[0];
3109		sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
3110		sc->tids.stid_base = val[1];
3111		sc->tids.nstids = val[2] - val[1] + 1;
3112		sc->vres.ddp.start = val[3];
3113		sc->vres.ddp.size = val[4] - val[3] + 1;
3114		sc->params.ofldq_wr_cred = val[5];
3115		sc->params.offload = 1;
3116	}
3117	if (sc->rdmacaps) {
3118		param[0] = FW_PARAM_PFVF(STAG_START);
3119		param[1] = FW_PARAM_PFVF(STAG_END);
3120		param[2] = FW_PARAM_PFVF(RQ_START);
3121		param[3] = FW_PARAM_PFVF(RQ_END);
3122		param[4] = FW_PARAM_PFVF(PBL_START);
3123		param[5] = FW_PARAM_PFVF(PBL_END);
3124		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
3125		if (rc != 0) {
3126			device_printf(sc->dev,
3127			    "failed to query RDMA parameters(1): %d.\n", rc);
3128			return (rc);
3129		}
3130		sc->vres.stag.start = val[0];
3131		sc->vres.stag.size = val[1] - val[0] + 1;
3132		sc->vres.rq.start = val[2];
3133		sc->vres.rq.size = val[3] - val[2] + 1;
3134		sc->vres.pbl.start = val[4];
3135		sc->vres.pbl.size = val[5] - val[4] + 1;
3136
3137		param[0] = FW_PARAM_PFVF(SQRQ_START);
3138		param[1] = FW_PARAM_PFVF(SQRQ_END);
3139		param[2] = FW_PARAM_PFVF(CQ_START);
3140		param[3] = FW_PARAM_PFVF(CQ_END);
3141		param[4] = FW_PARAM_PFVF(OCQ_START);
3142		param[5] = FW_PARAM_PFVF(OCQ_END);
3143		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
3144		if (rc != 0) {
3145			device_printf(sc->dev,
3146			    "failed to query RDMA parameters(2): %d.\n", rc);
3147			return (rc);
3148		}
3149		sc->vres.qp.start = val[0];
3150		sc->vres.qp.size = val[1] - val[0] + 1;
3151		sc->vres.cq.start = val[2];
3152		sc->vres.cq.size = val[3] - val[2] + 1;
3153		sc->vres.ocq.start = val[4];
3154		sc->vres.ocq.size = val[5] - val[4] + 1;
3155	}
3156	if (sc->iscsicaps) {
3157		param[0] = FW_PARAM_PFVF(ISCSI_START);
3158		param[1] = FW_PARAM_PFVF(ISCSI_END);
3159		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
3160		if (rc != 0) {
3161			device_printf(sc->dev,
3162			    "failed to query iSCSI parameters: %d.\n", rc);
3163			return (rc);
3164		}
3165		sc->vres.iscsi.start = val[0];
3166		sc->vres.iscsi.size = val[1] - val[0] + 1;
3167	}
3168
3169	/*
3170	 * We've got the params we wanted to query via the firmware.  Now grab
3171	 * some others directly from the chip.
3172	 */
3173	rc = t4_read_chip_settings(sc);
3174
3175	return (rc);
3176}
3177
3178static int
3179set_params__post_init(struct adapter *sc)
3180{
3181	uint32_t param, val;
3182
3183	/* ask for encapsulated CPLs */
3184	param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP);
3185	val = 1;
3186	(void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
3187
3188	return (0);
3189}
3190
3191#undef FW_PARAM_PFVF
3192#undef FW_PARAM_DEV
3193
3194static void
3195t4_set_desc(struct adapter *sc)
3196{
3197	char buf[128];
3198	struct adapter_params *p = &sc->params;
3199
3200	snprintf(buf, sizeof(buf), "Chelsio %s %sNIC (rev %d), S/N:%s, "
3201	    "P/N:%s, E/C:%s", p->vpd.id, is_offload(sc) ? "R" : "",
3202	    chip_rev(sc), p->vpd.sn, p->vpd.pn, p->vpd.ec);
3203
3204	device_set_desc_copy(sc->dev, buf);
3205}
3206
3207static void
3208build_medialist(struct port_info *pi, struct ifmedia *media)
3209{
3210	int m;
3211
3212	PORT_LOCK(pi);
3213
3214	ifmedia_removeall(media);
3215
3216	m = IFM_ETHER | IFM_FDX;
3217
3218	switch(pi->port_type) {
3219	case FW_PORT_TYPE_BT_XFI:
3220	case FW_PORT_TYPE_BT_XAUI:
3221		ifmedia_add(media, m | IFM_10G_T, 0, NULL);
3222		/* fall through */
3223
3224	case FW_PORT_TYPE_BT_SGMII:
3225		ifmedia_add(media, m | IFM_1000_T, 0, NULL);
3226		ifmedia_add(media, m | IFM_100_TX, 0, NULL);
3227		ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
3228		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
3229		break;
3230
3231	case FW_PORT_TYPE_CX4:
3232		ifmedia_add(media, m | IFM_10G_CX4, 0, NULL);
3233		ifmedia_set(media, m | IFM_10G_CX4);
3234		break;
3235
3236	case FW_PORT_TYPE_QSFP_10G:
3237	case FW_PORT_TYPE_SFP:
3238	case FW_PORT_TYPE_FIBER_XFI:
3239	case FW_PORT_TYPE_FIBER_XAUI:
3240		switch (pi->mod_type) {
3241
3242		case FW_PORT_MOD_TYPE_LR:
3243			ifmedia_add(media, m | IFM_10G_LR, 0, NULL);
3244			ifmedia_set(media, m | IFM_10G_LR);
3245			break;
3246
3247		case FW_PORT_MOD_TYPE_SR:
3248			ifmedia_add(media, m | IFM_10G_SR, 0, NULL);
3249			ifmedia_set(media, m | IFM_10G_SR);
3250			break;
3251
3252		case FW_PORT_MOD_TYPE_LRM:
3253			ifmedia_add(media, m | IFM_10G_LRM, 0, NULL);
3254			ifmedia_set(media, m | IFM_10G_LRM);
3255			break;
3256
3257		case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
3258		case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
3259			ifmedia_add(media, m | IFM_10G_TWINAX, 0, NULL);
3260			ifmedia_set(media, m | IFM_10G_TWINAX);
3261			break;
3262
3263		case FW_PORT_MOD_TYPE_NONE:
3264			m &= ~IFM_FDX;
3265			ifmedia_add(media, m | IFM_NONE, 0, NULL);
3266			ifmedia_set(media, m | IFM_NONE);
3267			break;
3268
3269		case FW_PORT_MOD_TYPE_NA:
3270		case FW_PORT_MOD_TYPE_ER:
3271		default:
3272			device_printf(pi->dev,
3273			    "unknown port_type (%d), mod_type (%d)\n",
3274			    pi->port_type, pi->mod_type);
3275			ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
3276			ifmedia_set(media, m | IFM_UNKNOWN);
3277			break;
3278		}
3279		break;
3280
3281	case FW_PORT_TYPE_QSFP:
3282		switch (pi->mod_type) {
3283
3284		case FW_PORT_MOD_TYPE_LR:
3285			ifmedia_add(media, m | IFM_40G_LR4, 0, NULL);
3286			ifmedia_set(media, m | IFM_40G_LR4);
3287			break;
3288
3289		case FW_PORT_MOD_TYPE_SR:
3290			ifmedia_add(media, m | IFM_40G_SR4, 0, NULL);
3291			ifmedia_set(media, m | IFM_40G_SR4);
3292			break;
3293
3294		case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
3295		case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
3296			ifmedia_add(media, m | IFM_40G_CR4, 0, NULL);
3297			ifmedia_set(media, m | IFM_40G_CR4);
3298			break;
3299
3300		case FW_PORT_MOD_TYPE_NONE:
3301			m &= ~IFM_FDX;
3302			ifmedia_add(media, m | IFM_NONE, 0, NULL);
3303			ifmedia_set(media, m | IFM_NONE);
3304			break;
3305
3306		default:
3307			device_printf(pi->dev,
3308			    "unknown port_type (%d), mod_type (%d)\n",
3309			    pi->port_type, pi->mod_type);
3310			ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
3311			ifmedia_set(media, m | IFM_UNKNOWN);
3312			break;
3313		}
3314		break;
3315
3316	default:
3317		device_printf(pi->dev,
3318		    "unknown port_type (%d), mod_type (%d)\n", pi->port_type,
3319		    pi->mod_type);
3320		ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
3321		ifmedia_set(media, m | IFM_UNKNOWN);
3322		break;
3323	}
3324
3325	PORT_UNLOCK(pi);
3326}
3327
3328#define FW_MAC_EXACT_CHUNK	7
3329
3330/*
3331 * Program the port's XGMAC based on parameters in ifnet.  The caller also
3332 * indicates which parameters should be programmed (the rest are left alone).
3333 */
3334int
3335update_mac_settings(struct ifnet *ifp, int flags)
3336{
3337	int rc = 0;
3338	struct vi_info *vi = ifp->if_softc;
3339	struct port_info *pi = vi->pi;
3340	struct adapter *sc = pi->adapter;
3341	int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1;
3342
3343	ASSERT_SYNCHRONIZED_OP(sc);
3344	KASSERT(flags, ("%s: not told what to update.", __func__));
3345
3346	if (flags & XGMAC_MTU)
3347		mtu = ifp->if_mtu;
3348
3349	if (flags & XGMAC_PROMISC)
3350		promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0;
3351
3352	if (flags & XGMAC_ALLMULTI)
3353		allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0;
3354
3355	if (flags & XGMAC_VLANEX)
3356		vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0;
3357
3358	if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) {
3359		rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc,
3360		    allmulti, 1, vlanex, false);
3361		if (rc) {
3362			if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags,
3363			    rc);
3364			return (rc);
3365		}
3366	}
3367
3368	if (flags & XGMAC_UCADDR) {
3369		uint8_t ucaddr[ETHER_ADDR_LEN];
3370
3371		bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr));
3372		rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt,
3373		    ucaddr, true, true);
3374		if (rc < 0) {
3375			rc = -rc;
3376			if_printf(ifp, "change_mac failed: %d\n", rc);
3377			return (rc);
3378		} else {
3379			vi->xact_addr_filt = rc;
3380			rc = 0;
3381		}
3382	}
3383
3384	if (flags & XGMAC_MCADDRS) {
3385		const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK];
3386		int del = 1;
3387		uint64_t hash = 0;
3388		struct ifmultiaddr *ifma;
3389		int i = 0, j;
3390
3391		if_maddr_rlock(ifp);
3392		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3393			if (ifma->ifma_addr->sa_family != AF_LINK)
3394				continue;
3395			mcaddr[i] =
3396			    LLADDR((struct sockaddr_dl *)ifma->ifma_addr);
3397			MPASS(ETHER_IS_MULTICAST(mcaddr[i]));
3398			i++;
3399
3400			if (i == FW_MAC_EXACT_CHUNK) {
3401				rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid,
3402				    del, i, mcaddr, NULL, &hash, 0);
3403				if (rc < 0) {
3404					rc = -rc;
3405					for (j = 0; j < i; j++) {
3406						if_printf(ifp,
3407						    "failed to add mc address"
3408						    " %02x:%02x:%02x:"
3409						    "%02x:%02x:%02x rc=%d\n",
3410						    mcaddr[j][0], mcaddr[j][1],
3411						    mcaddr[j][2], mcaddr[j][3],
3412						    mcaddr[j][4], mcaddr[j][5],
3413						    rc);
3414					}
3415					goto mcfail;
3416				}
3417				del = 0;
3418				i = 0;
3419			}
3420		}
3421		if (i > 0) {
3422			rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i,
3423			    mcaddr, NULL, &hash, 0);
3424			if (rc < 0) {
3425				rc = -rc;
3426				for (j = 0; j < i; j++) {
3427					if_printf(ifp,
3428					    "failed to add mc address"
3429					    " %02x:%02x:%02x:"
3430					    "%02x:%02x:%02x rc=%d\n",
3431					    mcaddr[j][0], mcaddr[j][1],
3432					    mcaddr[j][2], mcaddr[j][3],
3433					    mcaddr[j][4], mcaddr[j][5],
3434					    rc);
3435				}
3436				goto mcfail;
3437			}
3438		}
3439
3440		rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, hash, 0);
3441		if (rc != 0)
3442			if_printf(ifp, "failed to set mc address hash: %d", rc);
3443mcfail:
3444		if_maddr_runlock(ifp);
3445	}
3446
3447	return (rc);
3448}
3449
3450/*
3451 * {begin|end}_synchronized_op must be called from the same thread.
3452 */
3453int
3454begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags,
3455    char *wmesg)
3456{
3457	int rc, pri;
3458
3459#ifdef WITNESS
3460	/* the caller thinks it's ok to sleep, but is it really? */
3461	if (flags & SLEEP_OK)
3462		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
3463		    "begin_synchronized_op");
3464#endif
3465
3466	if (INTR_OK)
3467		pri = PCATCH;
3468	else
3469		pri = 0;
3470
3471	ADAPTER_LOCK(sc);
3472	for (;;) {
3473
3474		if (vi && IS_DOOMED(vi)) {
3475			rc = ENXIO;
3476			goto done;
3477		}
3478
3479		if (!IS_BUSY(sc)) {
3480			rc = 0;
3481			break;
3482		}
3483
3484		if (!(flags & SLEEP_OK)) {
3485			rc = EBUSY;
3486			goto done;
3487		}
3488
3489		if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) {
3490			rc = EINTR;
3491			goto done;
3492		}
3493	}
3494
3495	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
3496	SET_BUSY(sc);
3497#ifdef INVARIANTS
3498	sc->last_op = wmesg;
3499	sc->last_op_thr = curthread;
3500	sc->last_op_flags = flags;
3501#endif
3502
3503done:
3504	if (!(flags & HOLD_LOCK) || rc)
3505		ADAPTER_UNLOCK(sc);
3506
3507	return (rc);
3508}
3509
3510/*
3511 * Tell if_ioctl and if_init that the VI is going away.  This is
3512 * special variant of begin_synchronized_op and must be paired with a
3513 * call to end_synchronized_op.
3514 */
3515void
3516doom_vi(struct adapter *sc, struct vi_info *vi)
3517{
3518
3519	ADAPTER_LOCK(sc);
3520	SET_DOOMED(vi);
3521	wakeup(&sc->flags);
3522	while (IS_BUSY(sc))
3523		mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0);
3524	SET_BUSY(sc);
3525#ifdef INVARIANTS
3526	sc->last_op = "t4detach";
3527	sc->last_op_thr = curthread;
3528	sc->last_op_flags = 0;
3529#endif
3530	ADAPTER_UNLOCK(sc);
3531}
3532
3533/*
3534 * {begin|end}_synchronized_op must be called from the same thread.
3535 */
3536void
3537end_synchronized_op(struct adapter *sc, int flags)
3538{
3539
3540	if (flags & LOCK_HELD)
3541		ADAPTER_LOCK_ASSERT_OWNED(sc);
3542	else
3543		ADAPTER_LOCK(sc);
3544
3545	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
3546	CLR_BUSY(sc);
3547	wakeup(&sc->flags);
3548	ADAPTER_UNLOCK(sc);
3549}
3550
3551static int
3552cxgbe_init_synchronized(struct vi_info *vi)
3553{
3554	struct port_info *pi = vi->pi;
3555	struct adapter *sc = pi->adapter;
3556	struct ifnet *ifp = vi->ifp;
3557	int rc = 0, i;
3558	struct sge_txq *txq;
3559
3560	ASSERT_SYNCHRONIZED_OP(sc);
3561
3562	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3563		return (0);	/* already running */
3564
3565	if (!(sc->flags & FULL_INIT_DONE) &&
3566	    ((rc = adapter_full_init(sc)) != 0))
3567		return (rc);	/* error message displayed already */
3568
3569	if (!(vi->flags & VI_INIT_DONE) &&
3570	    ((rc = vi_full_init(vi)) != 0))
3571		return (rc); /* error message displayed already */
3572
3573	rc = update_mac_settings(ifp, XGMAC_ALL);
3574	if (rc)
3575		goto done;	/* error message displayed already */
3576
3577	rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true);
3578	if (rc != 0) {
3579		if_printf(ifp, "enable_vi failed: %d\n", rc);
3580		goto done;
3581	}
3582
3583	/*
3584	 * Can't fail from this point onwards.  Review cxgbe_uninit_synchronized
3585	 * if this changes.
3586	 */
3587
3588	for_each_txq(vi, i, txq) {
3589		TXQ_LOCK(txq);
3590		txq->eq.flags |= EQ_ENABLED;
3591		TXQ_UNLOCK(txq);
3592	}
3593
3594	/*
3595	 * The first iq of the first port to come up is used for tracing.
3596	 */
3597	if (sc->traceq < 0 && IS_MAIN_VI(vi)) {
3598		sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id;
3599		t4_write_reg(sc, is_t4(sc) ?  A_MPS_TRC_RSS_CONTROL :
3600		    A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) |
3601		    V_QUEUENUMBER(sc->traceq));
3602		pi->flags |= HAS_TRACEQ;
3603	}
3604
3605	/* all ok */
3606	PORT_LOCK(pi);
3607	ifp->if_drv_flags |= IFF_DRV_RUNNING;
3608	pi->up_vis++;
3609
3610	if (pi->nvi > 1)
3611		callout_reset(&vi->tick, hz, vi_tick, vi);
3612	else
3613		callout_reset(&pi->tick, hz, cxgbe_tick, pi);
3614	PORT_UNLOCK(pi);
3615done:
3616	if (rc != 0)
3617		cxgbe_uninit_synchronized(vi);
3618
3619	return (rc);
3620}
3621
3622/*
3623 * Idempotent.
3624 */
3625static int
3626cxgbe_uninit_synchronized(struct vi_info *vi)
3627{
3628	struct port_info *pi = vi->pi;
3629	struct adapter *sc = pi->adapter;
3630	struct ifnet *ifp = vi->ifp;
3631	int rc, i;
3632	struct sge_txq *txq;
3633
3634	ASSERT_SYNCHRONIZED_OP(sc);
3635
3636	if (!(vi->flags & VI_INIT_DONE)) {
3637		KASSERT(!(ifp->if_drv_flags & IFF_DRV_RUNNING),
3638		    ("uninited VI is running"));
3639		return (0);
3640	}
3641
3642	/*
3643	 * Disable the VI so that all its data in either direction is discarded
3644	 * by the MPS.  Leave everything else (the queues, interrupts, and 1Hz
3645	 * tick) intact as the TP can deliver negative advice or data that it's
3646	 * holding in its RAM (for an offloaded connection) even after the VI is
3647	 * disabled.
3648	 */
3649	rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false);
3650	if (rc) {
3651		if_printf(ifp, "disable_vi failed: %d\n", rc);
3652		return (rc);
3653	}
3654
3655	for_each_txq(vi, i, txq) {
3656		TXQ_LOCK(txq);
3657		txq->eq.flags &= ~EQ_ENABLED;
3658		TXQ_UNLOCK(txq);
3659	}
3660
3661	PORT_LOCK(pi);
3662	if (pi->nvi == 1)
3663		callout_stop(&pi->tick);
3664	else
3665		callout_stop(&vi->tick);
3666	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3667		PORT_UNLOCK(pi);
3668		return (0);
3669	}
3670	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3671	pi->up_vis--;
3672	if (pi->up_vis > 0) {
3673		PORT_UNLOCK(pi);
3674		return (0);
3675	}
3676	PORT_UNLOCK(pi);
3677
3678	pi->link_cfg.link_ok = 0;
3679	pi->link_cfg.speed = 0;
3680	pi->linkdnrc = -1;
3681	t4_os_link_changed(sc, pi->port_id, 0, -1);
3682
3683	return (0);
3684}
3685
3686/*
3687 * It is ok for this function to fail midway and return right away.  t4_detach
3688 * will walk the entire sc->irq list and clean up whatever is valid.
3689 */
3690static int
3691setup_intr_handlers(struct adapter *sc)
3692{
3693	int rc, rid, p, q, v;
3694	char s[8];
3695	struct irq *irq;
3696	struct port_info *pi;
3697	struct vi_info *vi;
3698	struct sge *sge = &sc->sge;
3699	struct sge_rxq *rxq;
3700#ifdef TCP_OFFLOAD
3701	struct sge_ofld_rxq *ofld_rxq;
3702#endif
3703#ifdef DEV_NETMAP
3704	struct sge_nm_rxq *nm_rxq;
3705#endif
3706
3707	/*
3708	 * Setup interrupts.
3709	 */
3710	irq = &sc->irq[0];
3711	rid = sc->intr_type == INTR_INTX ? 0 : 1;
3712	if (sc->intr_count == 1)
3713		return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all"));
3714
3715	/* Multiple interrupts. */
3716	KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports,
3717	    ("%s: too few intr.", __func__));
3718
3719	/* The first one is always error intr */
3720	rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err");
3721	if (rc != 0)
3722		return (rc);
3723	irq++;
3724	rid++;
3725
3726	/* The second one is always the firmware event queue */
3727	rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sge->fwq, "evt");
3728	if (rc != 0)
3729		return (rc);
3730	irq++;
3731	rid++;
3732
3733	for_each_port(sc, p) {
3734		pi = sc->port[p];
3735		for_each_vi(pi, v, vi) {
3736			vi->first_intr = rid - 1;
3737
3738			if (vi->nnmrxq > 0) {
3739				int n = max(vi->nrxq, vi->nnmrxq);
3740
3741				MPASS(vi->flags & INTR_RXQ);
3742
3743				rxq = &sge->rxq[vi->first_rxq];
3744#ifdef DEV_NETMAP
3745				nm_rxq = &sge->nm_rxq[vi->first_nm_rxq];
3746#endif
3747				for (q = 0; q < n; q++) {
3748					snprintf(s, sizeof(s), "%x%c%x", p,
3749					    'a' + v, q);
3750					if (q < vi->nrxq)
3751						irq->rxq = rxq++;
3752#ifdef DEV_NETMAP
3753					if (q < vi->nnmrxq)
3754						irq->nm_rxq = nm_rxq++;
3755#endif
3756					rc = t4_alloc_irq(sc, irq, rid,
3757					    t4_vi_intr, irq, s);
3758					if (rc != 0)
3759						return (rc);
3760					irq++;
3761					rid++;
3762					vi->nintr++;
3763				}
3764			} else if (vi->flags & INTR_RXQ) {
3765				for_each_rxq(vi, q, rxq) {
3766					snprintf(s, sizeof(s), "%x%c%x", p,
3767					    'a' + v, q);
3768					rc = t4_alloc_irq(sc, irq, rid,
3769					    t4_intr, rxq, s);
3770					if (rc != 0)
3771						return (rc);
3772					irq++;
3773					rid++;
3774					vi->nintr++;
3775				}
3776			}
3777#ifdef TCP_OFFLOAD
3778			if (vi->flags & INTR_OFLD_RXQ) {
3779				for_each_ofld_rxq(vi, q, ofld_rxq) {
3780					snprintf(s, sizeof(s), "%x%c%x", p,
3781					    'A' + v, q);
3782					rc = t4_alloc_irq(sc, irq, rid,
3783					    t4_intr, ofld_rxq, s);
3784					if (rc != 0)
3785						return (rc);
3786					irq++;
3787					rid++;
3788					vi->nintr++;
3789				}
3790			}
3791#endif
3792		}
3793	}
3794	MPASS(irq == &sc->irq[sc->intr_count]);
3795
3796	return (0);
3797}
3798
3799int
3800adapter_full_init(struct adapter *sc)
3801{
3802	int rc, i;
3803
3804	ASSERT_SYNCHRONIZED_OP(sc);
3805	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
3806	KASSERT((sc->flags & FULL_INIT_DONE) == 0,
3807	    ("%s: FULL_INIT_DONE already", __func__));
3808
3809	/*
3810	 * queues that belong to the adapter (not any particular port).
3811	 */
3812	rc = t4_setup_adapter_queues(sc);
3813	if (rc != 0)
3814		goto done;
3815
3816	for (i = 0; i < nitems(sc->tq); i++) {
3817		sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT,
3818		    taskqueue_thread_enqueue, &sc->tq[i]);
3819		if (sc->tq[i] == NULL) {
3820			device_printf(sc->dev,
3821			    "failed to allocate task queue %d\n", i);
3822			rc = ENOMEM;
3823			goto done;
3824		}
3825		taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d",
3826		    device_get_nameunit(sc->dev), i);
3827	}
3828
3829	t4_intr_enable(sc);
3830	sc->flags |= FULL_INIT_DONE;
3831done:
3832	if (rc != 0)
3833		adapter_full_uninit(sc);
3834
3835	return (rc);
3836}
3837
3838int
3839adapter_full_uninit(struct adapter *sc)
3840{
3841	int i;
3842
3843	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
3844
3845	t4_teardown_adapter_queues(sc);
3846
3847	for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) {
3848		taskqueue_free(sc->tq[i]);
3849		sc->tq[i] = NULL;
3850	}
3851
3852	sc->flags &= ~FULL_INIT_DONE;
3853
3854	return (0);
3855}
3856
3857#ifdef RSS
3858#define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \
3859    RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \
3860    RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \
3861    RSS_HASHTYPE_RSS_UDP_IPV6)
3862
3863/* Translates kernel hash types to hardware. */
3864static int
3865hashconfig_to_hashen(int hashconfig)
3866{
3867	int hashen = 0;
3868
3869	if (hashconfig & RSS_HASHTYPE_RSS_IPV4)
3870		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN;
3871	if (hashconfig & RSS_HASHTYPE_RSS_IPV6)
3872		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN;
3873	if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) {
3874		hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
3875		    F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
3876	}
3877	if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) {
3878		hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
3879		    F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
3880	}
3881	if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4)
3882		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
3883	if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6)
3884		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
3885
3886	return (hashen);
3887}
3888
3889/* Translates hardware hash types to kernel. */
3890static int
3891hashen_to_hashconfig(int hashen)
3892{
3893	int hashconfig = 0;
3894
3895	if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) {
3896		/*
3897		 * If UDP hashing was enabled it must have been enabled for
3898		 * either IPv4 or IPv6 (inclusive or).  Enabling UDP without
3899		 * enabling any 4-tuple hash is nonsense configuration.
3900		 */
3901		MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
3902		    F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN));
3903
3904		if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
3905			hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4;
3906		if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
3907			hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6;
3908	}
3909	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
3910		hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4;
3911	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
3912		hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6;
3913	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
3914		hashconfig |= RSS_HASHTYPE_RSS_IPV4;
3915	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
3916		hashconfig |= RSS_HASHTYPE_RSS_IPV6;
3917
3918	return (hashconfig);
3919}
3920#endif
3921
3922int
3923vi_full_init(struct vi_info *vi)
3924{
3925	struct adapter *sc = vi->pi->adapter;
3926	struct ifnet *ifp = vi->ifp;
3927	uint16_t *rss;
3928	struct sge_rxq *rxq;
3929	int rc, i, j, hashen;
3930#ifdef RSS
3931	int nbuckets = rss_getnumbuckets();
3932	int hashconfig = rss_gethashconfig();
3933	int extra;
3934	uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
3935	uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
3936#endif
3937
3938	ASSERT_SYNCHRONIZED_OP(sc);
3939	KASSERT((vi->flags & VI_INIT_DONE) == 0,
3940	    ("%s: VI_INIT_DONE already", __func__));
3941
3942	sysctl_ctx_init(&vi->ctx);
3943	vi->flags |= VI_SYSCTL_CTX;
3944
3945	/*
3946	 * Allocate tx/rx/fl queues for this VI.
3947	 */
3948	rc = t4_setup_vi_queues(vi);
3949	if (rc != 0)
3950		goto done;	/* error message displayed already */
3951
3952	/*
3953	 * Setup RSS for this VI.  Save a copy of the RSS table for later use.
3954	 */
3955	if (vi->nrxq > vi->rss_size) {
3956		if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); "
3957		    "some queues will never receive traffic.\n", vi->nrxq,
3958		    vi->rss_size);
3959	} else if (vi->rss_size % vi->nrxq) {
3960		if_printf(ifp, "nrxq (%d), hw RSS table size (%d); "
3961		    "expect uneven traffic distribution.\n", vi->nrxq,
3962		    vi->rss_size);
3963	}
3964#ifdef RSS
3965	MPASS(RSS_KEYSIZE == 40);
3966	if (vi->nrxq != nbuckets) {
3967		if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);"
3968		    "performance will be impacted.\n", vi->nrxq, nbuckets);
3969	}
3970
3971	rss_getkey((void *)&raw_rss_key[0]);
3972	for (i = 0; i < nitems(rss_key); i++) {
3973		rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]);
3974	}
3975	t4_write_rss_key(sc, &rss_key[0], -1);
3976#endif
3977	rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK);
3978	for (i = 0; i < vi->rss_size;) {
3979#ifdef RSS
3980		j = rss_get_indirection_to_bucket(i);
3981		j %= vi->nrxq;
3982		rxq = &sc->sge.rxq[vi->first_rxq + j];
3983		rss[i++] = rxq->iq.abs_id;
3984#else
3985		for_each_rxq(vi, j, rxq) {
3986			rss[i++] = rxq->iq.abs_id;
3987			if (i == vi->rss_size)
3988				break;
3989		}
3990#endif
3991	}
3992
3993	rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss,
3994	    vi->rss_size);
3995	if (rc != 0) {
3996		if_printf(ifp, "rss_config failed: %d\n", rc);
3997		goto done;
3998	}
3999
4000#ifdef RSS
4001	hashen = hashconfig_to_hashen(hashconfig);
4002
4003	/*
4004	 * We may have had to enable some hashes even though the global config
4005	 * wants them disabled.  This is a potential problem that must be
4006	 * reported to the user.
4007	 */
4008	extra = hashen_to_hashconfig(hashen) ^ hashconfig;
4009
4010	/*
4011	 * If we consider only the supported hash types, then the enabled hashes
4012	 * are a superset of the requested hashes.  In other words, there cannot
4013	 * be any supported hash that was requested but not enabled, but there
4014	 * can be hashes that were not requested but had to be enabled.
4015	 */
4016	extra &= SUPPORTED_RSS_HASHTYPES;
4017	MPASS((extra & hashconfig) == 0);
4018
4019	if (extra) {
4020		if_printf(ifp,
4021		    "global RSS config (0x%x) cannot be accomodated.\n",
4022		    hashconfig);
4023	}
4024	if (extra & RSS_HASHTYPE_RSS_IPV4)
4025		if_printf(ifp, "IPv4 2-tuple hashing forced on.\n");
4026	if (extra & RSS_HASHTYPE_RSS_TCP_IPV4)
4027		if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n");
4028	if (extra & RSS_HASHTYPE_RSS_IPV6)
4029		if_printf(ifp, "IPv6 2-tuple hashing forced on.\n");
4030	if (extra & RSS_HASHTYPE_RSS_TCP_IPV6)
4031		if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n");
4032	if (extra & RSS_HASHTYPE_RSS_UDP_IPV4)
4033		if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n");
4034	if (extra & RSS_HASHTYPE_RSS_UDP_IPV6)
4035		if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n");
4036#else
4037	hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN |
4038	    F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN |
4039	    F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
4040	    F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN;
4041#endif
4042	rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, hashen, rss[0]);
4043	if (rc != 0) {
4044		if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc);
4045		goto done;
4046	}
4047
4048	vi->rss = rss;
4049	vi->flags |= VI_INIT_DONE;
4050done:
4051	if (rc != 0)
4052		vi_full_uninit(vi);
4053
4054	return (rc);
4055}
4056
4057/*
4058 * Idempotent.
4059 */
4060int
4061vi_full_uninit(struct vi_info *vi)
4062{
4063	struct port_info *pi = vi->pi;
4064	struct adapter *sc = pi->adapter;
4065	int i;
4066	struct sge_rxq *rxq;
4067	struct sge_txq *txq;
4068#ifdef TCP_OFFLOAD
4069	struct sge_ofld_rxq *ofld_rxq;
4070	struct sge_wrq *ofld_txq;
4071#endif
4072
4073	if (vi->flags & VI_INIT_DONE) {
4074
4075		/* Need to quiesce queues.  */
4076
4077		/* XXX: Only for the first VI? */
4078		if (IS_MAIN_VI(vi))
4079			quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
4080
4081		for_each_txq(vi, i, txq) {
4082			quiesce_txq(sc, txq);
4083		}
4084
4085#ifdef TCP_OFFLOAD
4086		for_each_ofld_txq(vi, i, ofld_txq) {
4087			quiesce_wrq(sc, ofld_txq);
4088		}
4089#endif
4090
4091		for_each_rxq(vi, i, rxq) {
4092			quiesce_iq(sc, &rxq->iq);
4093			quiesce_fl(sc, &rxq->fl);
4094		}
4095
4096#ifdef TCP_OFFLOAD
4097		for_each_ofld_rxq(vi, i, ofld_rxq) {
4098			quiesce_iq(sc, &ofld_rxq->iq);
4099			quiesce_fl(sc, &ofld_rxq->fl);
4100		}
4101#endif
4102		free(vi->rss, M_CXGBE);
4103		free(vi->nm_rss, M_CXGBE);
4104	}
4105
4106	t4_teardown_vi_queues(vi);
4107	vi->flags &= ~VI_INIT_DONE;
4108
4109	return (0);
4110}
4111
4112static void
4113quiesce_txq(struct adapter *sc, struct sge_txq *txq)
4114{
4115	struct sge_eq *eq = &txq->eq;
4116	struct sge_qstat *spg = (void *)&eq->desc[eq->sidx];
4117
4118	(void) sc;	/* unused */
4119
4120#ifdef INVARIANTS
4121	TXQ_LOCK(txq);
4122	MPASS((eq->flags & EQ_ENABLED) == 0);
4123	TXQ_UNLOCK(txq);
4124#endif
4125
4126	/* Wait for the mp_ring to empty. */
4127	while (!mp_ring_is_idle(txq->r)) {
4128		mp_ring_check_drainage(txq->r, 0);
4129		pause("rquiesce", 1);
4130	}
4131
4132	/* Then wait for the hardware to finish. */
4133	while (spg->cidx != htobe16(eq->pidx))
4134		pause("equiesce", 1);
4135
4136	/* Finally, wait for the driver to reclaim all descriptors. */
4137	while (eq->cidx != eq->pidx)
4138		pause("dquiesce", 1);
4139}
4140
4141static void
4142quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq)
4143{
4144
4145	/* XXXTX */
4146}
4147
4148static void
4149quiesce_iq(struct adapter *sc, struct sge_iq *iq)
4150{
4151	(void) sc;	/* unused */
4152
4153	/* Synchronize with the interrupt handler */
4154	while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED))
4155		pause("iqfree", 1);
4156}
4157
4158static void
4159quiesce_fl(struct adapter *sc, struct sge_fl *fl)
4160{
4161	mtx_lock(&sc->sfl_lock);
4162	FL_LOCK(fl);
4163	fl->flags |= FL_DOOMED;
4164	FL_UNLOCK(fl);
4165	callout_stop(&sc->sfl_callout);
4166	mtx_unlock(&sc->sfl_lock);
4167
4168	KASSERT((fl->flags & FL_STARVING) == 0,
4169	    ("%s: still starving", __func__));
4170}
4171
4172static int
4173t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid,
4174    driver_intr_t *handler, void *arg, char *name)
4175{
4176	int rc;
4177
4178	irq->rid = rid;
4179	irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid,
4180	    RF_SHAREABLE | RF_ACTIVE);
4181	if (irq->res == NULL) {
4182		device_printf(sc->dev,
4183		    "failed to allocate IRQ for rid %d, name %s.\n", rid, name);
4184		return (ENOMEM);
4185	}
4186
4187	rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET,
4188	    NULL, handler, arg, &irq->tag);
4189	if (rc != 0) {
4190		device_printf(sc->dev,
4191		    "failed to setup interrupt for rid %d, name %s: %d\n",
4192		    rid, name, rc);
4193	} else if (name)
4194		bus_describe_intr(sc->dev, irq->res, irq->tag, name);
4195
4196	return (rc);
4197}
4198
4199static int
4200t4_free_irq(struct adapter *sc, struct irq *irq)
4201{
4202	if (irq->tag)
4203		bus_teardown_intr(sc->dev, irq->res, irq->tag);
4204	if (irq->res)
4205		bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res);
4206
4207	bzero(irq, sizeof(*irq));
4208
4209	return (0);
4210}
4211
4212static void
4213get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf)
4214{
4215
4216	regs->version = chip_id(sc) | chip_rev(sc) << 10;
4217	t4_get_regs(sc, buf, regs->len);
4218}
4219
4220#define	A_PL_INDIR_CMD	0x1f8
4221
4222#define	S_PL_AUTOINC	31
4223#define	M_PL_AUTOINC	0x1U
4224#define	V_PL_AUTOINC(x)	((x) << S_PL_AUTOINC)
4225#define	G_PL_AUTOINC(x)	(((x) >> S_PL_AUTOINC) & M_PL_AUTOINC)
4226
4227#define	S_PL_VFID	20
4228#define	M_PL_VFID	0xffU
4229#define	V_PL_VFID(x)	((x) << S_PL_VFID)
4230#define	G_PL_VFID(x)	(((x) >> S_PL_VFID) & M_PL_VFID)
4231
4232#define	S_PL_ADDR	0
4233#define	M_PL_ADDR	0xfffffU
4234#define	V_PL_ADDR(x)	((x) << S_PL_ADDR)
4235#define	G_PL_ADDR(x)	(((x) >> S_PL_ADDR) & M_PL_ADDR)
4236
4237#define	A_PL_INDIR_DATA	0x1fc
4238
4239static uint64_t
4240read_vf_stat(struct adapter *sc, unsigned int viid, int reg)
4241{
4242	u32 stats[2];
4243
4244	mtx_assert(&sc->reg_lock, MA_OWNED);
4245	t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
4246	    V_PL_VFID(G_FW_VIID_VIN(viid)) | V_PL_ADDR(VF_MPS_REG(reg)));
4247	stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA);
4248	stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA);
4249	return (((uint64_t)stats[1]) << 32 | stats[0]);
4250}
4251
4252static void
4253t4_get_vi_stats(struct adapter *sc, unsigned int viid,
4254    struct fw_vi_stats_vf *stats)
4255{
4256
4257#define GET_STAT(name) \
4258	read_vf_stat(sc, viid, A_MPS_VF_STAT_##name##_L)
4259
4260	stats->tx_bcast_bytes    = GET_STAT(TX_VF_BCAST_BYTES);
4261	stats->tx_bcast_frames   = GET_STAT(TX_VF_BCAST_FRAMES);
4262	stats->tx_mcast_bytes    = GET_STAT(TX_VF_MCAST_BYTES);
4263	stats->tx_mcast_frames   = GET_STAT(TX_VF_MCAST_FRAMES);
4264	stats->tx_ucast_bytes    = GET_STAT(TX_VF_UCAST_BYTES);
4265	stats->tx_ucast_frames   = GET_STAT(TX_VF_UCAST_FRAMES);
4266	stats->tx_drop_frames    = GET_STAT(TX_VF_DROP_FRAMES);
4267	stats->tx_offload_bytes  = GET_STAT(TX_VF_OFFLOAD_BYTES);
4268	stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES);
4269	stats->rx_bcast_bytes    = GET_STAT(RX_VF_BCAST_BYTES);
4270	stats->rx_bcast_frames   = GET_STAT(RX_VF_BCAST_FRAMES);
4271	stats->rx_mcast_bytes    = GET_STAT(RX_VF_MCAST_BYTES);
4272	stats->rx_mcast_frames   = GET_STAT(RX_VF_MCAST_FRAMES);
4273	stats->rx_ucast_bytes    = GET_STAT(RX_VF_UCAST_BYTES);
4274	stats->rx_ucast_frames   = GET_STAT(RX_VF_UCAST_FRAMES);
4275	stats->rx_err_frames     = GET_STAT(RX_VF_ERR_FRAMES);
4276
4277#undef GET_STAT
4278}
4279
4280static void
4281t4_clr_vi_stats(struct adapter *sc, unsigned int viid)
4282{
4283	int reg;
4284
4285	t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
4286	    V_PL_VFID(G_FW_VIID_VIN(viid)) |
4287	    V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L)));
4288	for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L;
4289	     reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4)
4290		t4_write_reg(sc, A_PL_INDIR_DATA, 0);
4291}
4292
4293static void
4294vi_refresh_stats(struct adapter *sc, struct vi_info *vi)
4295{
4296	struct ifnet *ifp = vi->ifp;
4297	struct sge_txq *txq;
4298	int i, drops;
4299	struct fw_vi_stats_vf *s = &vi->stats;
4300	struct timeval tv;
4301	const struct timeval interval = {0, 250000};	/* 250ms */
4302
4303	if (!(vi->flags & VI_INIT_DONE))
4304		return;
4305
4306	getmicrotime(&tv);
4307	timevalsub(&tv, &interval);
4308	if (timevalcmp(&tv, &vi->last_refreshed, <))
4309		return;
4310
4311	mtx_lock(&sc->reg_lock);
4312	t4_get_vi_stats(sc, vi->viid, &vi->stats);
4313
4314	ifp->if_ipackets = s->rx_bcast_frames + s->rx_mcast_frames +
4315	    s->rx_ucast_frames;
4316	ifp->if_ierrors = s->rx_err_frames;
4317	ifp->if_opackets = s->tx_bcast_frames + s->tx_mcast_frames +
4318	    s->tx_ucast_frames + s->tx_offload_frames;
4319	ifp->if_oerrors = s->tx_drop_frames;
4320	ifp->if_ibytes = s->rx_bcast_bytes + s->rx_mcast_bytes +
4321	    s->rx_ucast_bytes;
4322	ifp->if_obytes = s->tx_bcast_bytes + s->tx_mcast_bytes +
4323	    s->tx_ucast_bytes + s->tx_offload_bytes;
4324	ifp->if_imcasts = s->rx_mcast_frames;
4325	ifp->if_omcasts = s->tx_mcast_frames;
4326
4327	drops = 0;
4328	for_each_txq(vi, i, txq)
4329		drops += counter_u64_fetch(txq->r->drops);
4330	ifp->if_snd.ifq_drops = drops;
4331
4332	getmicrotime(&vi->last_refreshed);
4333	mtx_unlock(&sc->reg_lock);
4334}
4335
4336static void
4337cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi)
4338{
4339	struct vi_info *vi = &pi->vi[0];
4340	struct ifnet *ifp = vi->ifp;
4341	struct sge_txq *txq;
4342	int i, drops;
4343	struct port_stats *s = &pi->stats;
4344	struct timeval tv;
4345	const struct timeval interval = {0, 250000};	/* 250ms */
4346
4347	getmicrotime(&tv);
4348	timevalsub(&tv, &interval);
4349	if (timevalcmp(&tv, &pi->last_refreshed, <))
4350		return;
4351
4352	t4_get_port_stats(sc, pi->tx_chan, s);
4353
4354	ifp->if_opackets = s->tx_frames;
4355	ifp->if_ipackets = s->rx_frames;
4356	ifp->if_obytes = s->tx_octets;
4357	ifp->if_ibytes = s->rx_octets;
4358	ifp->if_omcasts = s->tx_mcast_frames;
4359	ifp->if_imcasts = s->rx_mcast_frames;
4360	ifp->if_iqdrops = s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 +
4361	    s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 +
4362	    s->rx_trunc3;
4363	for (i = 0; i < sc->chip_params->nchan; i++) {
4364		if (pi->rx_chan_map & (1 << i)) {
4365			uint32_t v;
4366
4367			mtx_lock(&sc->reg_lock);
4368			t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v,
4369			    1, A_TP_MIB_TNL_CNG_DROP_0 + i);
4370			mtx_unlock(&sc->reg_lock);
4371			ifp->if_iqdrops += v;
4372		}
4373	}
4374
4375	drops = s->tx_drop;
4376	for_each_txq(vi, i, txq)
4377		drops += counter_u64_fetch(txq->r->drops);
4378	ifp->if_snd.ifq_drops = drops;
4379
4380	ifp->if_oerrors = s->tx_error_frames;
4381	ifp->if_ierrors = s->rx_jabber + s->rx_runt + s->rx_too_long +
4382	    s->rx_fcs_err + s->rx_len_err;
4383
4384	getmicrotime(&pi->last_refreshed);
4385}
4386
4387static void
4388cxgbe_tick(void *arg)
4389{
4390	struct port_info *pi = arg;
4391	struct adapter *sc = pi->adapter;
4392
4393	PORT_LOCK_ASSERT_OWNED(pi);
4394	cxgbe_refresh_stats(sc, pi);
4395
4396	callout_schedule(&pi->tick, hz);
4397}
4398
4399void
4400vi_tick(void *arg)
4401{
4402	struct vi_info *vi = arg;
4403	struct adapter *sc = vi->pi->adapter;
4404
4405	vi_refresh_stats(sc, vi);
4406
4407	callout_schedule(&vi->tick, hz);
4408}
4409
4410static void
4411cxgbe_vlan_config(void *arg, struct ifnet *ifp, uint16_t vid)
4412{
4413	struct ifnet *vlan;
4414
4415	if (arg != ifp || ifp->if_type != IFT_ETHER)
4416		return;
4417
4418	vlan = VLAN_DEVAT(ifp, vid);
4419	VLAN_SETCOOKIE(vlan, ifp);
4420}
4421
4422static int
4423cpl_not_handled(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
4424{
4425
4426#ifdef INVARIANTS
4427	panic("%s: opcode 0x%02x on iq %p with payload %p",
4428	    __func__, rss->opcode, iq, m);
4429#else
4430	log(LOG_ERR, "%s: opcode 0x%02x on iq %p with payload %p\n",
4431	    __func__, rss->opcode, iq, m);
4432	m_freem(m);
4433#endif
4434	return (EDOOFUS);
4435}
4436
4437int
4438t4_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
4439{
4440	uintptr_t *loc, new;
4441
4442	if (opcode >= nitems(sc->cpl_handler))
4443		return (EINVAL);
4444
4445	new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
4446	loc = (uintptr_t *) &sc->cpl_handler[opcode];
4447	atomic_store_rel_ptr(loc, new);
4448
4449	return (0);
4450}
4451
4452static int
4453an_not_handled(struct sge_iq *iq, const struct rsp_ctrl *ctrl)
4454{
4455
4456#ifdef INVARIANTS
4457	panic("%s: async notification on iq %p (ctrl %p)", __func__, iq, ctrl);
4458#else
4459	log(LOG_ERR, "%s: async notification on iq %p (ctrl %p)\n",
4460	    __func__, iq, ctrl);
4461#endif
4462	return (EDOOFUS);
4463}
4464
4465int
4466t4_register_an_handler(struct adapter *sc, an_handler_t h)
4467{
4468	uintptr_t *loc, new;
4469
4470	new = h ? (uintptr_t)h : (uintptr_t)an_not_handled;
4471	loc = (uintptr_t *) &sc->an_handler;
4472	atomic_store_rel_ptr(loc, new);
4473
4474	return (0);
4475}
4476
4477static int
4478fw_msg_not_handled(struct adapter *sc, const __be64 *rpl)
4479{
4480	const struct cpl_fw6_msg *cpl =
4481	    __containerof(rpl, struct cpl_fw6_msg, data[0]);
4482
4483#ifdef INVARIANTS
4484	panic("%s: fw_msg type %d", __func__, cpl->type);
4485#else
4486	log(LOG_ERR, "%s: fw_msg type %d\n", __func__, cpl->type);
4487#endif
4488	return (EDOOFUS);
4489}
4490
4491int
4492t4_register_fw_msg_handler(struct adapter *sc, int type, fw_msg_handler_t h)
4493{
4494	uintptr_t *loc, new;
4495
4496	if (type >= nitems(sc->fw_msg_handler))
4497		return (EINVAL);
4498
4499	/*
4500	 * These are dispatched by the handler for FW{4|6}_CPL_MSG using the CPL
4501	 * handler dispatch table.  Reject any attempt to install a handler for
4502	 * this subtype.
4503	 */
4504	if (type == FW_TYPE_RSSCPL || type == FW6_TYPE_RSSCPL)
4505		return (EINVAL);
4506
4507	new = h ? (uintptr_t)h : (uintptr_t)fw_msg_not_handled;
4508	loc = (uintptr_t *) &sc->fw_msg_handler[type];
4509	atomic_store_rel_ptr(loc, new);
4510
4511	return (0);
4512}
4513
4514/*
4515 * Should match fw_caps_config_<foo> enums in t4fw_interface.h
4516 */
4517static char *caps_decoder[] = {
4518	"\20\001IPMI\002NCSI",				/* 0: NBM */
4519	"\20\001PPP\002QFC\003DCBX",			/* 1: link */
4520	"\20\001INGRESS\002EGRESS",			/* 2: switch */
4521	"\20\001NIC\002VM\003IDS\004UM\005UM_ISGL"	/* 3: NIC */
4522	    "\006HASHFILTER\007ETHOFLD",
4523	"\20\001TOE",					/* 4: TOE */
4524	"\20\001RDDP\002RDMAC",				/* 5: RDMA */
4525	"\20\001INITIATOR_PDU\002TARGET_PDU"		/* 6: iSCSI */
4526	    "\003INITIATOR_CNXOFLD\004TARGET_CNXOFLD"
4527	    "\005INITIATOR_SSNOFLD\006TARGET_SSNOFLD"
4528	    "\007T10DIF"
4529	    "\010INITIATOR_CMDOFLD\011TARGET_CMDOFLD",
4530	"\20\00KEYS",					/* 7: TLS */
4531	"\20\001INITIATOR\002TARGET\003CTRL_OFLD"	/* 8: FCoE */
4532		    "\004PO_INITIATOR\005PO_TARGET",
4533};
4534
4535static void
4536t4_sysctls(struct adapter *sc)
4537{
4538	struct sysctl_ctx_list *ctx;
4539	struct sysctl_oid *oid;
4540	struct sysctl_oid_list *children, *c0;
4541	static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"};
4542
4543	ctx = device_get_sysctl_ctx(sc->dev);
4544
4545	/*
4546	 * dev.t4nex.X.
4547	 */
4548	oid = device_get_sysctl_tree(sc->dev);
4549	c0 = children = SYSCTL_CHILDREN(oid);
4550
4551	sc->sc_do_rxcopy = 1;
4552	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW,
4553	    &sc->sc_do_rxcopy, 1, "Do RX copy of small frames");
4554
4555	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL,
4556	    sc->params.nports, "# of ports");
4557
4558	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD,
4559	    NULL, chip_rev(sc), "chip hardware revision");
4560
4561	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "tp_version",
4562	    CTLFLAG_RD, sc->tp_version, 0, "TP microcode version");
4563
4564	if (sc->params.exprom_vers != 0) {
4565		SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "exprom_version",
4566		    CTLFLAG_RD, sc->exprom_version, 0, "expansion ROM version");
4567	}
4568
4569	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version",
4570	    CTLFLAG_RD, sc->fw_version, 0, "firmware version");
4571
4572	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf",
4573	    CTLFLAG_RD, sc->cfg_file, 0, "configuration file");
4574
4575	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL,
4576	    sc->cfcsum, "config file checksum");
4577
4578	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells",
4579	    CTLTYPE_STRING | CTLFLAG_RD, doorbells, sc->doorbells,
4580	    sysctl_bitfield, "A", "available doorbells");
4581
4582#define SYSCTL_CAP(name, n, text) \
4583	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, #name, \
4584	    CTLTYPE_STRING | CTLFLAG_RD, caps_decoder[n], sc->name, \
4585	    sysctl_bitfield, "A", "available " text "capabilities")
4586
4587	SYSCTL_CAP(nbmcaps, 0, "NBM");
4588	SYSCTL_CAP(linkcaps, 1, "link");
4589	SYSCTL_CAP(switchcaps, 2, "switch");
4590	SYSCTL_CAP(niccaps, 3, "NIC");
4591	SYSCTL_CAP(toecaps, 4, "TCP offload");
4592	SYSCTL_CAP(rdmacaps, 5, "RDMA");
4593	SYSCTL_CAP(iscsicaps, 6, "iSCSI");
4594	SYSCTL_CAP(tlscaps, 7, "TLS");
4595	SYSCTL_CAP(fcoecaps, 8, "FCoE");
4596#undef SYSCTL_CAP
4597
4598	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL,
4599	    sc->params.vpd.cclk, "core clock frequency (in KHz)");
4600
4601	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers",
4602	    CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.timer_val,
4603	    sizeof(sc->params.sge.timer_val), sysctl_int_array, "A",
4604	    "interrupt holdoff timer values (us)");
4605
4606	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts",
4607	    CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.counter_val,
4608	    sizeof(sc->params.sge.counter_val), sysctl_int_array, "A",
4609	    "interrupt holdoff packet counter values");
4610
4611	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD,
4612	    NULL, sc->tids.nftids, "number of filters");
4613
4614	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT |
4615	    CTLFLAG_RD, sc, 0, sysctl_temperature, "I",
4616	    "chip temperature (in Celsius)");
4617
4618	t4_sge_sysctls(sc, ctx, children);
4619
4620	sc->lro_timeout = 100;
4621	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_timeout", CTLFLAG_RW,
4622	    &sc->lro_timeout, 0, "lro inactive-flush timeout (in us)");
4623
4624	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "debug_flags", CTLFLAG_RW,
4625	    &sc->debug_flags, 0, "flags to enable runtime debugging");
4626
4627#ifdef SBUF_DRAIN
4628	/*
4629	 * dev.t4nex.X.misc.  Marked CTLFLAG_SKIP to avoid information overload.
4630	 */
4631	oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc",
4632	    CTLFLAG_RD | CTLFLAG_SKIP, NULL,
4633	    "logs and miscellaneous information");
4634	children = SYSCTL_CHILDREN(oid);
4635
4636	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl",
4637	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4638	    sysctl_cctrl, "A", "congestion control");
4639
4640	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0",
4641	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4642	    sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)");
4643
4644	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1",
4645	    CTLTYPE_STRING | CTLFLAG_RD, sc, 1,
4646	    sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)");
4647
4648	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp",
4649	    CTLTYPE_STRING | CTLFLAG_RD, sc, 2,
4650	    sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)");
4651
4652	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0",
4653	    CTLTYPE_STRING | CTLFLAG_RD, sc, 3,
4654	    sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)");
4655
4656	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1",
4657	    CTLTYPE_STRING | CTLFLAG_RD, sc, 4,
4658	    sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)");
4659
4660	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi",
4661	    CTLTYPE_STRING | CTLFLAG_RD, sc, 5,
4662	    sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)");
4663
4664	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la",
4665	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4666	    chip_id(sc) <= CHELSIO_T5 ? sysctl_cim_la : sysctl_cim_la_t6,
4667	    "A", "CIM logic analyzer");
4668
4669	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la",
4670	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4671	    sysctl_cim_ma_la, "A", "CIM MA logic analyzer");
4672
4673	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0",
4674	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0 + CIM_NUM_IBQ,
4675	    sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)");
4676
4677	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1",
4678	    CTLTYPE_STRING | CTLFLAG_RD, sc, 1 + CIM_NUM_IBQ,
4679	    sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)");
4680
4681	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2",
4682	    CTLTYPE_STRING | CTLFLAG_RD, sc, 2 + CIM_NUM_IBQ,
4683	    sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)");
4684
4685	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3",
4686	    CTLTYPE_STRING | CTLFLAG_RD, sc, 3 + CIM_NUM_IBQ,
4687	    sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)");
4688
4689	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge",
4690	    CTLTYPE_STRING | CTLFLAG_RD, sc, 4 + CIM_NUM_IBQ,
4691	    sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)");
4692
4693	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi",
4694	    CTLTYPE_STRING | CTLFLAG_RD, sc, 5 + CIM_NUM_IBQ,
4695	    sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)");
4696
4697	if (chip_id(sc) > CHELSIO_T4) {
4698		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx",
4699		    CTLTYPE_STRING | CTLFLAG_RD, sc, 6 + CIM_NUM_IBQ,
4700		    sysctl_cim_ibq_obq, "A", "CIM OBQ 6 (SGE0-RX)");
4701
4702		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx",
4703		    CTLTYPE_STRING | CTLFLAG_RD, sc, 7 + CIM_NUM_IBQ,
4704		    sysctl_cim_ibq_obq, "A", "CIM OBQ 7 (SGE1-RX)");
4705	}
4706
4707	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la",
4708	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4709	    sysctl_cim_pif_la, "A", "CIM PIF logic analyzer");
4710
4711	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg",
4712	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4713	    sysctl_cim_qcfg, "A", "CIM queue configuration");
4714
4715	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats",
4716	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4717	    sysctl_cpl_stats, "A", "CPL statistics");
4718
4719	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats",
4720	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4721	    sysctl_ddp_stats, "A", "non-TCP DDP statistics");
4722
4723	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog",
4724	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4725	    sysctl_devlog, "A", "firmware's device log");
4726
4727	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats",
4728	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4729	    sysctl_fcoe_stats, "A", "FCoE statistics");
4730
4731	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched",
4732	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4733	    sysctl_hw_sched, "A", "hardware scheduler ");
4734
4735	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t",
4736	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4737	    sysctl_l2t, "A", "hardware L2 table");
4738
4739	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats",
4740	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4741	    sysctl_lb_stats, "A", "loopback statistics");
4742
4743	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo",
4744	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4745	    sysctl_meminfo, "A", "memory regions");
4746
4747	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam",
4748	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4749	    chip_id(sc) <= CHELSIO_T5 ? sysctl_mps_tcam : sysctl_mps_tcam_t6,
4750	    "A", "MPS TCAM entries");
4751
4752	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus",
4753	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4754	    sysctl_path_mtus, "A", "path MTUs");
4755
4756	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats",
4757	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4758	    sysctl_pm_stats, "A", "PM statistics");
4759
4760	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats",
4761	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4762	    sysctl_rdma_stats, "A", "RDMA statistics");
4763
4764	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats",
4765	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4766	    sysctl_tcp_stats, "A", "TCP statistics");
4767
4768	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids",
4769	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4770	    sysctl_tids, "A", "TID information");
4771
4772	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats",
4773	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4774	    sysctl_tp_err_stats, "A", "TP error statistics");
4775
4776	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la_mask",
4777	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tp_la_mask, "I",
4778	    "TP logic analyzer event capture mask");
4779
4780	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la",
4781	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4782	    sysctl_tp_la, "A", "TP logic analyzer");
4783
4784	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate",
4785	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4786	    sysctl_tx_rate, "A", "Tx rate");
4787
4788	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la",
4789	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4790	    sysctl_ulprx_la, "A", "ULPRX logic analyzer");
4791
4792	if (is_t5(sc)) {
4793		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats",
4794		    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4795		    sysctl_wcwr_stats, "A", "write combined work requests");
4796	}
4797#endif
4798
4799#ifdef TCP_OFFLOAD
4800	if (is_offload(sc)) {
4801		/*
4802		 * dev.t4nex.X.toe.
4803		 */
4804		oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD,
4805		    NULL, "TOE parameters");
4806		children = SYSCTL_CHILDREN(oid);
4807
4808		sc->tt.sndbuf = 256 * 1024;
4809		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW,
4810		    &sc->tt.sndbuf, 0, "max hardware send buffer size");
4811
4812		sc->tt.ddp = 0;
4813		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW,
4814		    &sc->tt.ddp, 0, "DDP allowed");
4815
4816		sc->tt.indsz = G_INDICATESIZE(t4_read_reg(sc, A_TP_PARA_REG5));
4817		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "indsz", CTLFLAG_RW,
4818		    &sc->tt.indsz, 0, "DDP max indicate size allowed");
4819
4820		sc->tt.ddp_thres =
4821		    G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2));
4822		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp_thres", CTLFLAG_RW,
4823		    &sc->tt.ddp_thres, 0, "DDP threshold");
4824
4825		sc->tt.rx_coalesce = 1;
4826		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce",
4827		    CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing");
4828
4829		sc->tt.tx_align = 1;
4830		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align",
4831		    CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload");
4832
4833		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timer_tick",
4834		    CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_tick, "A",
4835		    "TP timer tick (us)");
4836
4837		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timestamp_tick",
4838		    CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_tp_tick, "A",
4839		    "TCP timestamp tick (us)");
4840
4841		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_tick",
4842		    CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_tp_tick, "A",
4843		    "DACK tick (us)");
4844
4845		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_timer",
4846		    CTLTYPE_UINT | CTLFLAG_RD, sc, 0, sysctl_tp_dack_timer,
4847		    "IU", "DACK timer (us)");
4848
4849		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_min",
4850		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MIN,
4851		    sysctl_tp_timer, "LU", "Retransmit min (us)");
4852
4853		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_max",
4854		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MAX,
4855		    sysctl_tp_timer, "LU", "Retransmit max (us)");
4856
4857		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_min",
4858		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MIN,
4859		    sysctl_tp_timer, "LU", "Persist timer min (us)");
4860
4861		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_max",
4862		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MAX,
4863		    sysctl_tp_timer, "LU", "Persist timer max (us)");
4864
4865		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_idle",
4866		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_IDLE,
4867		    sysctl_tp_timer, "LU", "Keepidle idle timer (us)");
4868
4869		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_intvl",
4870		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_INTVL,
4871		    sysctl_tp_timer, "LU", "Keepidle interval (us)");
4872
4873		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "initial_srtt",
4874		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_INIT_SRTT,
4875		    sysctl_tp_timer, "LU", "Initial SRTT (us)");
4876
4877		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "finwait2_timer",
4878		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_FINWAIT2_TIMER,
4879		    sysctl_tp_timer, "LU", "FINWAIT2 timer (us)");
4880	}
4881#endif
4882}
4883
4884void
4885vi_sysctls(struct vi_info *vi)
4886{
4887	struct sysctl_ctx_list *ctx;
4888	struct sysctl_oid *oid;
4889	struct sysctl_oid_list *children;
4890
4891	ctx = device_get_sysctl_ctx(vi->dev);
4892
4893	/*
4894	 * dev.v?(cxgbe|cxl).X.
4895	 */
4896	oid = device_get_sysctl_tree(vi->dev);
4897	children = SYSCTL_CHILDREN(oid);
4898
4899	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL,
4900	    vi->viid, "VI identifer");
4901	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD,
4902	    &vi->nrxq, 0, "# of rx queues");
4903	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD,
4904	    &vi->ntxq, 0, "# of tx queues");
4905	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD,
4906	    &vi->first_rxq, 0, "index of first rx queue");
4907	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD,
4908	    &vi->first_txq, 0, "index of first tx queue");
4909
4910	if (IS_MAIN_VI(vi)) {
4911		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq",
4912		    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_noflowq, "IU",
4913		    "Reserve queue 0 for non-flowid packets");
4914	}
4915
4916#ifdef TCP_OFFLOAD
4917	if (vi->nofldrxq != 0) {
4918		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD,
4919		    &vi->nofldrxq, 0,
4920		    "# of rx queues for offloaded TCP connections");
4921		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD,
4922		    &vi->nofldtxq, 0,
4923		    "# of tx queues for offloaded TCP connections");
4924		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq",
4925		    CTLFLAG_RD, &vi->first_ofld_rxq, 0,
4926		    "index of first TOE rx queue");
4927		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq",
4928		    CTLFLAG_RD, &vi->first_ofld_txq, 0,
4929		    "index of first TOE tx queue");
4930	}
4931#endif
4932#ifdef DEV_NETMAP
4933	if (vi->nnmrxq != 0) {
4934		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmrxq", CTLFLAG_RD,
4935		    &vi->nnmrxq, 0, "# of netmap rx queues");
4936		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmtxq", CTLFLAG_RD,
4937		    &vi->nnmtxq, 0, "# of netmap tx queues");
4938		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_rxq",
4939		    CTLFLAG_RD, &vi->first_nm_rxq, 0,
4940		    "index of first netmap rx queue");
4941		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_txq",
4942		    CTLFLAG_RD, &vi->first_nm_txq, 0,
4943		    "index of first netmap tx queue");
4944	}
4945#endif
4946
4947	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx",
4948	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx, "I",
4949	    "holdoff timer index");
4950	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx",
4951	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx, "I",
4952	    "holdoff packet counter index");
4953
4954	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq",
4955	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_rxq, "I",
4956	    "rx queue size");
4957	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq",
4958	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_txq, "I",
4959	    "tx queue size");
4960}
4961
4962static void
4963cxgbe_sysctls(struct port_info *pi)
4964{
4965	struct sysctl_ctx_list *ctx;
4966	struct sysctl_oid *oid;
4967	struct sysctl_oid_list *children;
4968	struct adapter *sc = pi->adapter;
4969
4970	ctx = device_get_sysctl_ctx(pi->dev);
4971
4972	/*
4973	 * dev.cxgbe.X.
4974	 */
4975	oid = device_get_sysctl_tree(pi->dev);
4976	children = SYSCTL_CHILDREN(oid);
4977
4978	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc", CTLTYPE_STRING |
4979	   CTLFLAG_RD, pi, 0, sysctl_linkdnrc, "A", "reason why link is down");
4980	if (pi->port_type == FW_PORT_TYPE_BT_XAUI) {
4981		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature",
4982		    CTLTYPE_INT | CTLFLAG_RD, pi, 0, sysctl_btphy, "I",
4983		    "PHY temperature (in Celsius)");
4984		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version",
4985		    CTLTYPE_INT | CTLFLAG_RD, pi, 1, sysctl_btphy, "I",
4986		    "PHY firmware version");
4987	}
4988
4989	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings",
4990	    CTLTYPE_STRING | CTLFLAG_RW, pi, PAUSE_TX, sysctl_pause_settings,
4991	    "A", "PAUSE settings (bit 0 = rx_pause, bit 1 = tx_pause)");
4992
4993	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "max_speed", CTLFLAG_RD, NULL,
4994	    port_top_speed(pi), "max speed (in Gbps)");
4995
4996	/*
4997	 * dev.cxgbe.X.stats.
4998	 */
4999	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD,
5000	    NULL, "port statistics");
5001	children = SYSCTL_CHILDREN(oid);
5002	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD,
5003	    &pi->tx_parse_error, 0,
5004	    "# of tx packets with invalid length or # of segments");
5005
5006#define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \
5007	SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \
5008	    CTLTYPE_U64 | CTLFLAG_RD, sc, reg, \
5009	    sysctl_handle_t4_reg64, "QU", desc)
5010
5011	SYSCTL_ADD_T4_REG64(pi, "tx_octets", "# of octets in good frames",
5012	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BYTES_L));
5013	SYSCTL_ADD_T4_REG64(pi, "tx_frames", "total # of good frames",
5014	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_FRAMES_L));
5015	SYSCTL_ADD_T4_REG64(pi, "tx_bcast_frames", "# of broadcast frames",
5016	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BCAST_L));
5017	SYSCTL_ADD_T4_REG64(pi, "tx_mcast_frames", "# of multicast frames",
5018	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_MCAST_L));
5019	SYSCTL_ADD_T4_REG64(pi, "tx_ucast_frames", "# of unicast frames",
5020	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_UCAST_L));
5021	SYSCTL_ADD_T4_REG64(pi, "tx_error_frames", "# of error frames",
5022	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_ERROR_L));
5023	SYSCTL_ADD_T4_REG64(pi, "tx_frames_64",
5024	    "# of tx frames in this range",
5025	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_64B_L));
5026	SYSCTL_ADD_T4_REG64(pi, "tx_frames_65_127",
5027	    "# of tx frames in this range",
5028	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_65B_127B_L));
5029	SYSCTL_ADD_T4_REG64(pi, "tx_frames_128_255",
5030	    "# of tx frames in this range",
5031	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_128B_255B_L));
5032	SYSCTL_ADD_T4_REG64(pi, "tx_frames_256_511",
5033	    "# of tx frames in this range",
5034	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_256B_511B_L));
5035	SYSCTL_ADD_T4_REG64(pi, "tx_frames_512_1023",
5036	    "# of tx frames in this range",
5037	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_512B_1023B_L));
5038	SYSCTL_ADD_T4_REG64(pi, "tx_frames_1024_1518",
5039	    "# of tx frames in this range",
5040	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1024B_1518B_L));
5041	SYSCTL_ADD_T4_REG64(pi, "tx_frames_1519_max",
5042	    "# of tx frames in this range",
5043	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1519B_MAX_L));
5044	SYSCTL_ADD_T4_REG64(pi, "tx_drop", "# of dropped tx frames",
5045	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_DROP_L));
5046	SYSCTL_ADD_T4_REG64(pi, "tx_pause", "# of pause frames transmitted",
5047	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PAUSE_L));
5048	SYSCTL_ADD_T4_REG64(pi, "tx_ppp0", "# of PPP prio 0 frames transmitted",
5049	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP0_L));
5050	SYSCTL_ADD_T4_REG64(pi, "tx_ppp1", "# of PPP prio 1 frames transmitted",
5051	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP1_L));
5052	SYSCTL_ADD_T4_REG64(pi, "tx_ppp2", "# of PPP prio 2 frames transmitted",
5053	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP2_L));
5054	SYSCTL_ADD_T4_REG64(pi, "tx_ppp3", "# of PPP prio 3 frames transmitted",
5055	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP3_L));
5056	SYSCTL_ADD_T4_REG64(pi, "tx_ppp4", "# of PPP prio 4 frames transmitted",
5057	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP4_L));
5058	SYSCTL_ADD_T4_REG64(pi, "tx_ppp5", "# of PPP prio 5 frames transmitted",
5059	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP5_L));
5060	SYSCTL_ADD_T4_REG64(pi, "tx_ppp6", "# of PPP prio 6 frames transmitted",
5061	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP6_L));
5062	SYSCTL_ADD_T4_REG64(pi, "tx_ppp7", "# of PPP prio 7 frames transmitted",
5063	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP7_L));
5064
5065	SYSCTL_ADD_T4_REG64(pi, "rx_octets", "# of octets in good frames",
5066	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BYTES_L));
5067	SYSCTL_ADD_T4_REG64(pi, "rx_frames", "total # of good frames",
5068	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_FRAMES_L));
5069	SYSCTL_ADD_T4_REG64(pi, "rx_bcast_frames", "# of broadcast frames",
5070	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BCAST_L));
5071	SYSCTL_ADD_T4_REG64(pi, "rx_mcast_frames", "# of multicast frames",
5072	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MCAST_L));
5073	SYSCTL_ADD_T4_REG64(pi, "rx_ucast_frames", "# of unicast frames",
5074	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_UCAST_L));
5075	SYSCTL_ADD_T4_REG64(pi, "rx_too_long", "# of frames exceeding MTU",
5076	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_ERROR_L));
5077	SYSCTL_ADD_T4_REG64(pi, "rx_jabber", "# of jabber frames",
5078	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_CRC_ERROR_L));
5079	SYSCTL_ADD_T4_REG64(pi, "rx_fcs_err",
5080	    "# of frames received with bad FCS",
5081	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L));
5082	SYSCTL_ADD_T4_REG64(pi, "rx_len_err",
5083	    "# of frames received with length error",
5084	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LEN_ERROR_L));
5085	SYSCTL_ADD_T4_REG64(pi, "rx_symbol_err", "symbol errors",
5086	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_SYM_ERROR_L));
5087	SYSCTL_ADD_T4_REG64(pi, "rx_runt", "# of short frames received",
5088	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LESS_64B_L));
5089	SYSCTL_ADD_T4_REG64(pi, "rx_frames_64",
5090	    "# of rx frames in this range",
5091	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_64B_L));
5092	SYSCTL_ADD_T4_REG64(pi, "rx_frames_65_127",
5093	    "# of rx frames in this range",
5094	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_65B_127B_L));
5095	SYSCTL_ADD_T4_REG64(pi, "rx_frames_128_255",
5096	    "# of rx frames in this range",
5097	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_128B_255B_L));
5098	SYSCTL_ADD_T4_REG64(pi, "rx_frames_256_511",
5099	    "# of rx frames in this range",
5100	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_256B_511B_L));
5101	SYSCTL_ADD_T4_REG64(pi, "rx_frames_512_1023",
5102	    "# of rx frames in this range",
5103	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_512B_1023B_L));
5104	SYSCTL_ADD_T4_REG64(pi, "rx_frames_1024_1518",
5105	    "# of rx frames in this range",
5106	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1024B_1518B_L));
5107	SYSCTL_ADD_T4_REG64(pi, "rx_frames_1519_max",
5108	    "# of rx frames in this range",
5109	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1519B_MAX_L));
5110	SYSCTL_ADD_T4_REG64(pi, "rx_pause", "# of pause frames received",
5111	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PAUSE_L));
5112	SYSCTL_ADD_T4_REG64(pi, "rx_ppp0", "# of PPP prio 0 frames received",
5113	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP0_L));
5114	SYSCTL_ADD_T4_REG64(pi, "rx_ppp1", "# of PPP prio 1 frames received",
5115	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP1_L));
5116	SYSCTL_ADD_T4_REG64(pi, "rx_ppp2", "# of PPP prio 2 frames received",
5117	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP2_L));
5118	SYSCTL_ADD_T4_REG64(pi, "rx_ppp3", "# of PPP prio 3 frames received",
5119	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP3_L));
5120	SYSCTL_ADD_T4_REG64(pi, "rx_ppp4", "# of PPP prio 4 frames received",
5121	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP4_L));
5122	SYSCTL_ADD_T4_REG64(pi, "rx_ppp5", "# of PPP prio 5 frames received",
5123	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP5_L));
5124	SYSCTL_ADD_T4_REG64(pi, "rx_ppp6", "# of PPP prio 6 frames received",
5125	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP6_L));
5126	SYSCTL_ADD_T4_REG64(pi, "rx_ppp7", "# of PPP prio 7 frames received",
5127	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP7_L));
5128
5129#undef SYSCTL_ADD_T4_REG64
5130
5131#define SYSCTL_ADD_T4_PORTSTAT(name, desc) \
5132	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \
5133	    &pi->stats.name, desc)
5134
5135	/* We get these from port_stats and they may be stale by upto 1s */
5136	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow0,
5137	    "# drops due to buffer-group 0 overflows");
5138	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow1,
5139	    "# drops due to buffer-group 1 overflows");
5140	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow2,
5141	    "# drops due to buffer-group 2 overflows");
5142	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow3,
5143	    "# drops due to buffer-group 3 overflows");
5144	SYSCTL_ADD_T4_PORTSTAT(rx_trunc0,
5145	    "# of buffer-group 0 truncated packets");
5146	SYSCTL_ADD_T4_PORTSTAT(rx_trunc1,
5147	    "# of buffer-group 1 truncated packets");
5148	SYSCTL_ADD_T4_PORTSTAT(rx_trunc2,
5149	    "# of buffer-group 2 truncated packets");
5150	SYSCTL_ADD_T4_PORTSTAT(rx_trunc3,
5151	    "# of buffer-group 3 truncated packets");
5152
5153#undef SYSCTL_ADD_T4_PORTSTAT
5154}
5155
5156static int
5157sysctl_int_array(SYSCTL_HANDLER_ARGS)
5158{
5159	int rc, *i, space = 0;
5160	struct sbuf sb;
5161
5162	sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND);
5163	for (i = arg1; arg2; arg2 -= sizeof(int), i++) {
5164		if (space)
5165			sbuf_printf(&sb, " ");
5166		sbuf_printf(&sb, "%d", *i);
5167		space = 1;
5168	}
5169	sbuf_finish(&sb);
5170	rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
5171	sbuf_delete(&sb);
5172	return (rc);
5173}
5174
5175static int
5176sysctl_bitfield(SYSCTL_HANDLER_ARGS)
5177{
5178	int rc;
5179	struct sbuf *sb;
5180
5181	rc = sysctl_wire_old_buffer(req, 0);
5182	if (rc != 0)
5183		return(rc);
5184
5185	sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5186	if (sb == NULL)
5187		return (ENOMEM);
5188
5189	sbuf_printf(sb, "%b", (int)arg2, (char *)arg1);
5190	rc = sbuf_finish(sb);
5191	sbuf_delete(sb);
5192
5193	return (rc);
5194}
5195
5196static int
5197sysctl_btphy(SYSCTL_HANDLER_ARGS)
5198{
5199	struct port_info *pi = arg1;
5200	int op = arg2;
5201	struct adapter *sc = pi->adapter;
5202	u_int v;
5203	int rc;
5204
5205	rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt");
5206	if (rc)
5207		return (rc);
5208	/* XXX: magic numbers */
5209	rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820,
5210	    &v);
5211	end_synchronized_op(sc, 0);
5212	if (rc)
5213		return (rc);
5214	if (op == 0)
5215		v /= 256;
5216
5217	rc = sysctl_handle_int(oidp, &v, 0, req);
5218	return (rc);
5219}
5220
5221static int
5222sysctl_noflowq(SYSCTL_HANDLER_ARGS)
5223{
5224	struct vi_info *vi = arg1;
5225	int rc, val;
5226
5227	val = vi->rsrv_noflowq;
5228	rc = sysctl_handle_int(oidp, &val, 0, req);
5229	if (rc != 0 || req->newptr == NULL)
5230		return (rc);
5231
5232	if ((val >= 1) && (vi->ntxq > 1))
5233		vi->rsrv_noflowq = 1;
5234	else
5235		vi->rsrv_noflowq = 0;
5236
5237	return (rc);
5238}
5239
5240static int
5241sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS)
5242{
5243	struct vi_info *vi = arg1;
5244	struct adapter *sc = vi->pi->adapter;
5245	int idx, rc, i;
5246	struct sge_rxq *rxq;
5247#ifdef TCP_OFFLOAD
5248	struct sge_ofld_rxq *ofld_rxq;
5249#endif
5250	uint8_t v;
5251
5252	idx = vi->tmr_idx;
5253
5254	rc = sysctl_handle_int(oidp, &idx, 0, req);
5255	if (rc != 0 || req->newptr == NULL)
5256		return (rc);
5257
5258	if (idx < 0 || idx >= SGE_NTIMERS)
5259		return (EINVAL);
5260
5261	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
5262	    "t4tmr");
5263	if (rc)
5264		return (rc);
5265
5266	v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1);
5267	for_each_rxq(vi, i, rxq) {
5268#ifdef atomic_store_rel_8
5269		atomic_store_rel_8(&rxq->iq.intr_params, v);
5270#else
5271		rxq->iq.intr_params = v;
5272#endif
5273	}
5274#ifdef TCP_OFFLOAD
5275	for_each_ofld_rxq(vi, i, ofld_rxq) {
5276#ifdef atomic_store_rel_8
5277		atomic_store_rel_8(&ofld_rxq->iq.intr_params, v);
5278#else
5279		ofld_rxq->iq.intr_params = v;
5280#endif
5281	}
5282#endif
5283	vi->tmr_idx = idx;
5284
5285	end_synchronized_op(sc, LOCK_HELD);
5286	return (0);
5287}
5288
5289static int
5290sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS)
5291{
5292	struct vi_info *vi = arg1;
5293	struct adapter *sc = vi->pi->adapter;
5294	int idx, rc;
5295
5296	idx = vi->pktc_idx;
5297
5298	rc = sysctl_handle_int(oidp, &idx, 0, req);
5299	if (rc != 0 || req->newptr == NULL)
5300		return (rc);
5301
5302	if (idx < -1 || idx >= SGE_NCOUNTERS)
5303		return (EINVAL);
5304
5305	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
5306	    "t4pktc");
5307	if (rc)
5308		return (rc);
5309
5310	if (vi->flags & VI_INIT_DONE)
5311		rc = EBUSY; /* cannot be changed once the queues are created */
5312	else
5313		vi->pktc_idx = idx;
5314
5315	end_synchronized_op(sc, LOCK_HELD);
5316	return (rc);
5317}
5318
5319static int
5320sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS)
5321{
5322	struct vi_info *vi = arg1;
5323	struct adapter *sc = vi->pi->adapter;
5324	int qsize, rc;
5325
5326	qsize = vi->qsize_rxq;
5327
5328	rc = sysctl_handle_int(oidp, &qsize, 0, req);
5329	if (rc != 0 || req->newptr == NULL)
5330		return (rc);
5331
5332	if (qsize < 128 || (qsize & 7))
5333		return (EINVAL);
5334
5335	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
5336	    "t4rxqs");
5337	if (rc)
5338		return (rc);
5339
5340	if (vi->flags & VI_INIT_DONE)
5341		rc = EBUSY; /* cannot be changed once the queues are created */
5342	else
5343		vi->qsize_rxq = qsize;
5344
5345	end_synchronized_op(sc, LOCK_HELD);
5346	return (rc);
5347}
5348
5349static int
5350sysctl_qsize_txq(SYSCTL_HANDLER_ARGS)
5351{
5352	struct vi_info *vi = arg1;
5353	struct adapter *sc = vi->pi->adapter;
5354	int qsize, rc;
5355
5356	qsize = vi->qsize_txq;
5357
5358	rc = sysctl_handle_int(oidp, &qsize, 0, req);
5359	if (rc != 0 || req->newptr == NULL)
5360		return (rc);
5361
5362	if (qsize < 128 || qsize > 65536)
5363		return (EINVAL);
5364
5365	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
5366	    "t4txqs");
5367	if (rc)
5368		return (rc);
5369
5370	if (vi->flags & VI_INIT_DONE)
5371		rc = EBUSY; /* cannot be changed once the queues are created */
5372	else
5373		vi->qsize_txq = qsize;
5374
5375	end_synchronized_op(sc, LOCK_HELD);
5376	return (rc);
5377}
5378
5379static int
5380sysctl_pause_settings(SYSCTL_HANDLER_ARGS)
5381{
5382	struct port_info *pi = arg1;
5383	struct adapter *sc = pi->adapter;
5384	struct link_config *lc = &pi->link_cfg;
5385	int rc;
5386
5387	if (req->newptr == NULL) {
5388		struct sbuf *sb;
5389		static char *bits = "\20\1PAUSE_RX\2PAUSE_TX";
5390
5391		rc = sysctl_wire_old_buffer(req, 0);
5392		if (rc != 0)
5393			return(rc);
5394
5395		sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5396		if (sb == NULL)
5397			return (ENOMEM);
5398
5399		sbuf_printf(sb, "%b", lc->fc & (PAUSE_TX | PAUSE_RX), bits);
5400		rc = sbuf_finish(sb);
5401		sbuf_delete(sb);
5402	} else {
5403		char s[2];
5404		int n;
5405
5406		s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX));
5407		s[1] = 0;
5408
5409		rc = sysctl_handle_string(oidp, s, sizeof(s), req);
5410		if (rc != 0)
5411			return(rc);
5412
5413		if (s[1] != 0)
5414			return (EINVAL);
5415		if (s[0] < '0' || s[0] > '9')
5416			return (EINVAL);	/* not a number */
5417		n = s[0] - '0';
5418		if (n & ~(PAUSE_TX | PAUSE_RX))
5419			return (EINVAL);	/* some other bit is set too */
5420
5421		rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
5422		    "t4PAUSE");
5423		if (rc)
5424			return (rc);
5425		if ((lc->requested_fc & (PAUSE_TX | PAUSE_RX)) != n) {
5426			int link_ok = lc->link_ok;
5427
5428			lc->requested_fc &= ~(PAUSE_TX | PAUSE_RX);
5429			lc->requested_fc |= n;
5430			rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc);
5431			lc->link_ok = link_ok;	/* restore */
5432		}
5433		end_synchronized_op(sc, 0);
5434	}
5435
5436	return (rc);
5437}
5438
5439static int
5440sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS)
5441{
5442	struct adapter *sc = arg1;
5443	int reg = arg2;
5444	uint64_t val;
5445
5446	val = t4_read_reg64(sc, reg);
5447
5448	return (sysctl_handle_64(oidp, &val, 0, req));
5449}
5450
5451static int
5452sysctl_temperature(SYSCTL_HANDLER_ARGS)
5453{
5454	struct adapter *sc = arg1;
5455	int rc, t;
5456	uint32_t param, val;
5457
5458	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp");
5459	if (rc)
5460		return (rc);
5461	param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
5462	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
5463	    V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP);
5464	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
5465	end_synchronized_op(sc, 0);
5466	if (rc)
5467		return (rc);
5468
5469	/* unknown is returned as 0 but we display -1 in that case */
5470	t = val == 0 ? -1 : val;
5471
5472	rc = sysctl_handle_int(oidp, &t, 0, req);
5473	return (rc);
5474}
5475
5476#ifdef SBUF_DRAIN
5477static int
5478sysctl_cctrl(SYSCTL_HANDLER_ARGS)
5479{
5480	struct adapter *sc = arg1;
5481	struct sbuf *sb;
5482	int rc, i;
5483	uint16_t incr[NMTUS][NCCTRL_WIN];
5484	static const char *dec_fac[] = {
5485		"0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875",
5486		"0.9375"
5487	};
5488
5489	rc = sysctl_wire_old_buffer(req, 0);
5490	if (rc != 0)
5491		return (rc);
5492
5493	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
5494	if (sb == NULL)
5495		return (ENOMEM);
5496
5497	t4_read_cong_tbl(sc, incr);
5498
5499	for (i = 0; i < NCCTRL_WIN; ++i) {
5500		sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i,
5501		    incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i],
5502		    incr[5][i], incr[6][i], incr[7][i]);
5503		sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n",
5504		    incr[8][i], incr[9][i], incr[10][i], incr[11][i],
5505		    incr[12][i], incr[13][i], incr[14][i], incr[15][i],
5506		    sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]);
5507	}
5508
5509	rc = sbuf_finish(sb);
5510	sbuf_delete(sb);
5511
5512	return (rc);
5513}
5514
5515static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = {
5516	"TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI",	/* ibq's */
5517	"ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI",	/* obq's */
5518	"SGE0-RX", "SGE1-RX"	/* additional obq's (T5 onwards) */
5519};
5520
5521static int
5522sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS)
5523{
5524	struct adapter *sc = arg1;
5525	struct sbuf *sb;
5526	int rc, i, n, qid = arg2;
5527	uint32_t *buf, *p;
5528	char *qtype;
5529	u_int cim_num_obq = sc->chip_params->cim_num_obq;
5530
5531	KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq,
5532	    ("%s: bad qid %d\n", __func__, qid));
5533
5534	if (qid < CIM_NUM_IBQ) {
5535		/* inbound queue */
5536		qtype = "IBQ";
5537		n = 4 * CIM_IBQ_SIZE;
5538		buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
5539		rc = t4_read_cim_ibq(sc, qid, buf, n);
5540	} else {
5541		/* outbound queue */
5542		qtype = "OBQ";
5543		qid -= CIM_NUM_IBQ;
5544		n = 4 * cim_num_obq * CIM_OBQ_SIZE;
5545		buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
5546		rc = t4_read_cim_obq(sc, qid, buf, n);
5547	}
5548
5549	if (rc < 0) {
5550		rc = -rc;
5551		goto done;
5552	}
5553	n = rc * sizeof(uint32_t);	/* rc has # of words actually read */
5554
5555	rc = sysctl_wire_old_buffer(req, 0);
5556	if (rc != 0)
5557		goto done;
5558
5559	sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
5560	if (sb == NULL) {
5561		rc = ENOMEM;
5562		goto done;
5563	}
5564
5565	sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]);
5566	for (i = 0, p = buf; i < n; i += 16, p += 4)
5567		sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1],
5568		    p[2], p[3]);
5569
5570	rc = sbuf_finish(sb);
5571	sbuf_delete(sb);
5572done:
5573	free(buf, M_CXGBE);
5574	return (rc);
5575}
5576
5577static int
5578sysctl_cim_la(SYSCTL_HANDLER_ARGS)
5579{
5580	struct adapter *sc = arg1;
5581	u_int cfg;
5582	struct sbuf *sb;
5583	uint32_t *buf, *p;
5584	int rc;
5585
5586	MPASS(chip_id(sc) <= CHELSIO_T5);
5587
5588	rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
5589	if (rc != 0)
5590		return (rc);
5591
5592	rc = sysctl_wire_old_buffer(req, 0);
5593	if (rc != 0)
5594		return (rc);
5595
5596	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
5597	if (sb == NULL)
5598		return (ENOMEM);
5599
5600	buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
5601	    M_ZERO | M_WAITOK);
5602
5603	rc = -t4_cim_read_la(sc, buf, NULL);
5604	if (rc != 0)
5605		goto done;
5606
5607	sbuf_printf(sb, "Status   Data      PC%s",
5608	    cfg & F_UPDBGLACAPTPCONLY ? "" :
5609	    "     LS0Stat  LS0Addr             LS0Data");
5610
5611	for (p = buf; p <= &buf[sc->params.cim_la_size - 8]; p += 8) {
5612		if (cfg & F_UPDBGLACAPTPCONLY) {
5613			sbuf_printf(sb, "\n  %02x   %08x %08x", p[5] & 0xff,
5614			    p[6], p[7]);
5615			sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x",
5616			    (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8,
5617			    p[4] & 0xff, p[5] >> 8);
5618			sbuf_printf(sb, "\n  %02x   %x%07x %x%07x",
5619			    (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
5620			    p[1] & 0xf, p[2] >> 4);
5621		} else {
5622			sbuf_printf(sb,
5623			    "\n  %02x   %x%07x %x%07x %08x %08x "
5624			    "%08x%08x%08x%08x",
5625			    (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
5626			    p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5],
5627			    p[6], p[7]);
5628		}
5629	}
5630
5631	rc = sbuf_finish(sb);
5632	sbuf_delete(sb);
5633done:
5634	free(buf, M_CXGBE);
5635	return (rc);
5636}
5637
5638static int
5639sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS)
5640{
5641	struct adapter *sc = arg1;
5642	u_int cfg;
5643	struct sbuf *sb;
5644	uint32_t *buf, *p;
5645	int rc;
5646
5647	MPASS(chip_id(sc) > CHELSIO_T5);
5648
5649	rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
5650	if (rc != 0)
5651		return (rc);
5652
5653	rc = sysctl_wire_old_buffer(req, 0);
5654	if (rc != 0)
5655		return (rc);
5656
5657	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
5658	if (sb == NULL)
5659		return (ENOMEM);
5660
5661	buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
5662	    M_ZERO | M_WAITOK);
5663
5664	rc = -t4_cim_read_la(sc, buf, NULL);
5665	if (rc != 0)
5666		goto done;
5667
5668	sbuf_printf(sb, "Status   Inst    Data      PC%s",
5669	    cfg & F_UPDBGLACAPTPCONLY ? "" :
5670	    "     LS0Stat  LS0Addr  LS0Data  LS1Stat  LS1Addr  LS1Data");
5671
5672	for (p = buf; p <= &buf[sc->params.cim_la_size - 10]; p += 10) {
5673		if (cfg & F_UPDBGLACAPTPCONLY) {
5674			sbuf_printf(sb, "\n  %02x   %08x %08x %08x",
5675			    p[3] & 0xff, p[2], p[1], p[0]);
5676			sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x %02x%06x",
5677			    (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8,
5678			    p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8);
5679			sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x",
5680			    (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16,
5681			    p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff,
5682			    p[6] >> 16);
5683		} else {
5684			sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x "
5685			    "%08x %08x %08x %08x %08x %08x",
5686			    (p[9] >> 16) & 0xff,
5687			    p[9] & 0xffff, p[8] >> 16,
5688			    p[8] & 0xffff, p[7] >> 16,
5689			    p[7] & 0xffff, p[6] >> 16,
5690			    p[2], p[1], p[0], p[5], p[4], p[3]);
5691		}
5692	}
5693
5694	rc = sbuf_finish(sb);
5695	sbuf_delete(sb);
5696done:
5697	free(buf, M_CXGBE);
5698	return (rc);
5699}
5700
5701static int
5702sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS)
5703{
5704	struct adapter *sc = arg1;
5705	u_int i;
5706	struct sbuf *sb;
5707	uint32_t *buf, *p;
5708	int rc;
5709
5710	rc = sysctl_wire_old_buffer(req, 0);
5711	if (rc != 0)
5712		return (rc);
5713
5714	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
5715	if (sb == NULL)
5716		return (ENOMEM);
5717
5718	buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE,
5719	    M_ZERO | M_WAITOK);
5720
5721	t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE);
5722	p = buf;
5723
5724	for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
5725		sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2],
5726		    p[1], p[0]);
5727	}
5728
5729	sbuf_printf(sb, "\n\nCnt ID Tag UE       Data       RDY VLD");
5730	for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
5731		sbuf_printf(sb, "\n%3u %2u  %x   %u %08x%08x  %u   %u",
5732		    (p[2] >> 10) & 0xff, (p[2] >> 7) & 7,
5733		    (p[2] >> 3) & 0xf, (p[2] >> 2) & 1,
5734		    (p[1] >> 2) | ((p[2] & 3) << 30),
5735		    (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1,
5736		    p[0] & 1);
5737	}
5738
5739	rc = sbuf_finish(sb);
5740	sbuf_delete(sb);
5741	free(buf, M_CXGBE);
5742	return (rc);
5743}
5744
5745static int
5746sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS)
5747{
5748	struct adapter *sc = arg1;
5749	u_int i;
5750	struct sbuf *sb;
5751	uint32_t *buf, *p;
5752	int rc;
5753
5754	rc = sysctl_wire_old_buffer(req, 0);
5755	if (rc != 0)
5756		return (rc);
5757
5758	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
5759	if (sb == NULL)
5760		return (ENOMEM);
5761
5762	buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE,
5763	    M_ZERO | M_WAITOK);
5764
5765	t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL);
5766	p = buf;
5767
5768	sbuf_printf(sb, "Cntl ID DataBE   Addr                 Data");
5769	for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
5770		sbuf_printf(sb, "\n %02x  %02x  %04x  %08x %08x%08x%08x%08x",
5771		    (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff,
5772		    p[4], p[3], p[2], p[1], p[0]);
5773	}
5774
5775	sbuf_printf(sb, "\n\nCntl ID               Data");
5776	for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
5777		sbuf_printf(sb, "\n %02x  %02x %08x%08x%08x%08x",
5778		    (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]);
5779	}
5780
5781	rc = sbuf_finish(sb);
5782	sbuf_delete(sb);
5783	free(buf, M_CXGBE);
5784	return (rc);
5785}
5786
5787static int
5788sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS)
5789{
5790	struct adapter *sc = arg1;
5791	struct sbuf *sb;
5792	int rc, i;
5793	uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
5794	uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
5795	uint16_t thres[CIM_NUM_IBQ];
5796	uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr;
5797	uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat;
5798	u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq;
5799
5800	cim_num_obq = sc->chip_params->cim_num_obq;
5801	if (is_t4(sc)) {
5802		ibq_rdaddr = A_UP_IBQ_0_RDADDR;
5803		obq_rdaddr = A_UP_OBQ_0_REALADDR;
5804	} else {
5805		ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR;
5806		obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR;
5807	}
5808	nq = CIM_NUM_IBQ + cim_num_obq;
5809
5810	rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat);
5811	if (rc == 0)
5812		rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr);
5813	if (rc != 0)
5814		return (rc);
5815
5816	t4_read_cimq_cfg(sc, base, size, thres);
5817
5818	rc = sysctl_wire_old_buffer(req, 0);
5819	if (rc != 0)
5820		return (rc);
5821
5822	sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
5823	if (sb == NULL)
5824		return (ENOMEM);
5825
5826	sbuf_printf(sb, "Queue  Base  Size Thres RdPtr WrPtr  SOP  EOP Avail");
5827
5828	for (i = 0; i < CIM_NUM_IBQ; i++, p += 4)
5829		sbuf_printf(sb, "\n%7s %5x %5u %5u %6x  %4x %4u %4u %5u",
5830		    qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]),
5831		    G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
5832		    G_QUEREMFLITS(p[2]) * 16);
5833	for ( ; i < nq; i++, p += 4, wr += 2)
5834		sbuf_printf(sb, "\n%7s %5x %5u %12x  %4x %4u %4u %5u", qname[i],
5835		    base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff,
5836		    wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
5837		    G_QUEREMFLITS(p[2]) * 16);
5838
5839	rc = sbuf_finish(sb);
5840	sbuf_delete(sb);
5841
5842	return (rc);
5843}
5844
5845static int
5846sysctl_cpl_stats(SYSCTL_HANDLER_ARGS)
5847{
5848	struct adapter *sc = arg1;
5849	struct sbuf *sb;
5850	int rc;
5851	struct tp_cpl_stats stats;
5852
5853	rc = sysctl_wire_old_buffer(req, 0);
5854	if (rc != 0)
5855		return (rc);
5856
5857	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
5858	if (sb == NULL)
5859		return (ENOMEM);
5860
5861	mtx_lock(&sc->reg_lock);
5862	t4_tp_get_cpl_stats(sc, &stats);
5863	mtx_unlock(&sc->reg_lock);
5864
5865	if (sc->chip_params->nchan > 2) {
5866		sbuf_printf(sb, "                 channel 0  channel 1"
5867		    "  channel 2  channel 3");
5868		sbuf_printf(sb, "\nCPL requests:   %10u %10u %10u %10u",
5869		    stats.req[0], stats.req[1], stats.req[2], stats.req[3]);
5870		sbuf_printf(sb, "\nCPL responses:   %10u %10u %10u %10u",
5871		    stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]);
5872	} else {
5873		sbuf_printf(sb, "                 channel 0  channel 1");
5874		sbuf_printf(sb, "\nCPL requests:   %10u %10u",
5875		    stats.req[0], stats.req[1]);
5876		sbuf_printf(sb, "\nCPL responses:   %10u %10u",
5877		    stats.rsp[0], stats.rsp[1]);
5878	}
5879
5880	rc = sbuf_finish(sb);
5881	sbuf_delete(sb);
5882
5883	return (rc);
5884}
5885
5886static int
5887sysctl_ddp_stats(SYSCTL_HANDLER_ARGS)
5888{
5889	struct adapter *sc = arg1;
5890	struct sbuf *sb;
5891	int rc;
5892	struct tp_usm_stats stats;
5893
5894	rc = sysctl_wire_old_buffer(req, 0);
5895	if (rc != 0)
5896		return(rc);
5897
5898	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
5899	if (sb == NULL)
5900		return (ENOMEM);
5901
5902	t4_get_usm_stats(sc, &stats);
5903
5904	sbuf_printf(sb, "Frames: %u\n", stats.frames);
5905	sbuf_printf(sb, "Octets: %ju\n", stats.octets);
5906	sbuf_printf(sb, "Drops:  %u", stats.drops);
5907
5908	rc = sbuf_finish(sb);
5909	sbuf_delete(sb);
5910
5911	return (rc);
5912}
5913
5914static const char * const devlog_level_strings[] = {
5915	[FW_DEVLOG_LEVEL_EMERG]		= "EMERG",
5916	[FW_DEVLOG_LEVEL_CRIT]		= "CRIT",
5917	[FW_DEVLOG_LEVEL_ERR]		= "ERR",
5918	[FW_DEVLOG_LEVEL_NOTICE]	= "NOTICE",
5919	[FW_DEVLOG_LEVEL_INFO]		= "INFO",
5920	[FW_DEVLOG_LEVEL_DEBUG]		= "DEBUG"
5921};
5922
5923static const char * const devlog_facility_strings[] = {
5924	[FW_DEVLOG_FACILITY_CORE]	= "CORE",
5925	[FW_DEVLOG_FACILITY_CF]		= "CF",
5926	[FW_DEVLOG_FACILITY_SCHED]	= "SCHED",
5927	[FW_DEVLOG_FACILITY_TIMER]	= "TIMER",
5928	[FW_DEVLOG_FACILITY_RES]	= "RES",
5929	[FW_DEVLOG_FACILITY_HW]		= "HW",
5930	[FW_DEVLOG_FACILITY_FLR]	= "FLR",
5931	[FW_DEVLOG_FACILITY_DMAQ]	= "DMAQ",
5932	[FW_DEVLOG_FACILITY_PHY]	= "PHY",
5933	[FW_DEVLOG_FACILITY_MAC]	= "MAC",
5934	[FW_DEVLOG_FACILITY_PORT]	= "PORT",
5935	[FW_DEVLOG_FACILITY_VI]		= "VI",
5936	[FW_DEVLOG_FACILITY_FILTER]	= "FILTER",
5937	[FW_DEVLOG_FACILITY_ACL]	= "ACL",
5938	[FW_DEVLOG_FACILITY_TM]		= "TM",
5939	[FW_DEVLOG_FACILITY_QFC]	= "QFC",
5940	[FW_DEVLOG_FACILITY_DCB]	= "DCB",
5941	[FW_DEVLOG_FACILITY_ETH]	= "ETH",
5942	[FW_DEVLOG_FACILITY_OFLD]	= "OFLD",
5943	[FW_DEVLOG_FACILITY_RI]		= "RI",
5944	[FW_DEVLOG_FACILITY_ISCSI]	= "ISCSI",
5945	[FW_DEVLOG_FACILITY_FCOE]	= "FCOE",
5946	[FW_DEVLOG_FACILITY_FOISCSI]	= "FOISCSI",
5947	[FW_DEVLOG_FACILITY_FOFCOE]	= "FOFCOE",
5948	[FW_DEVLOG_FACILITY_CHNET]	= "CHNET",
5949};
5950
5951static int
5952sysctl_devlog(SYSCTL_HANDLER_ARGS)
5953{
5954	struct adapter *sc = arg1;
5955	struct devlog_params *dparams = &sc->params.devlog;
5956	struct fw_devlog_e *buf, *e;
5957	int i, j, rc, nentries, first = 0;
5958	struct sbuf *sb;
5959	uint64_t ftstamp = UINT64_MAX;
5960
5961	if (dparams->addr == 0)
5962		return (ENXIO);
5963
5964	buf = malloc(dparams->size, M_CXGBE, M_NOWAIT);
5965	if (buf == NULL)
5966		return (ENOMEM);
5967
5968	rc = read_via_memwin(sc, 1, dparams->addr, (void *)buf, dparams->size);
5969	if (rc != 0)
5970		goto done;
5971
5972	nentries = dparams->size / sizeof(struct fw_devlog_e);
5973	for (i = 0; i < nentries; i++) {
5974		e = &buf[i];
5975
5976		if (e->timestamp == 0)
5977			break;	/* end */
5978
5979		e->timestamp = be64toh(e->timestamp);
5980		e->seqno = be32toh(e->seqno);
5981		for (j = 0; j < 8; j++)
5982			e->params[j] = be32toh(e->params[j]);
5983
5984		if (e->timestamp < ftstamp) {
5985			ftstamp = e->timestamp;
5986			first = i;
5987		}
5988	}
5989
5990	if (buf[first].timestamp == 0)
5991		goto done;	/* nothing in the log */
5992
5993	rc = sysctl_wire_old_buffer(req, 0);
5994	if (rc != 0)
5995		goto done;
5996
5997	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
5998	if (sb == NULL) {
5999		rc = ENOMEM;
6000		goto done;
6001	}
6002	sbuf_printf(sb, "%10s  %15s  %8s  %8s  %s\n",
6003	    "Seq#", "Tstamp", "Level", "Facility", "Message");
6004
6005	i = first;
6006	do {
6007		e = &buf[i];
6008		if (e->timestamp == 0)
6009			break;	/* end */
6010
6011		sbuf_printf(sb, "%10d  %15ju  %8s  %8s  ",
6012		    e->seqno, e->timestamp,
6013		    (e->level < nitems(devlog_level_strings) ?
6014			devlog_level_strings[e->level] : "UNKNOWN"),
6015		    (e->facility < nitems(devlog_facility_strings) ?
6016			devlog_facility_strings[e->facility] : "UNKNOWN"));
6017		sbuf_printf(sb, e->fmt, e->params[0], e->params[1],
6018		    e->params[2], e->params[3], e->params[4],
6019		    e->params[5], e->params[6], e->params[7]);
6020
6021		if (++i == nentries)
6022			i = 0;
6023	} while (i != first);
6024
6025	rc = sbuf_finish(sb);
6026	sbuf_delete(sb);
6027done:
6028	free(buf, M_CXGBE);
6029	return (rc);
6030}
6031
6032static int
6033sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS)
6034{
6035	struct adapter *sc = arg1;
6036	struct sbuf *sb;
6037	int rc;
6038	struct tp_fcoe_stats stats[MAX_NCHAN];
6039	int i, nchan = sc->chip_params->nchan;
6040
6041	rc = sysctl_wire_old_buffer(req, 0);
6042	if (rc != 0)
6043		return (rc);
6044
6045	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6046	if (sb == NULL)
6047		return (ENOMEM);
6048
6049	for (i = 0; i < nchan; i++)
6050		t4_get_fcoe_stats(sc, i, &stats[i]);
6051
6052	if (nchan > 2) {
6053		sbuf_printf(sb, "                   channel 0        channel 1"
6054		    "        channel 2        channel 3");
6055		sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju %16ju %16ju",
6056		    stats[0].octets_ddp, stats[1].octets_ddp,
6057		    stats[2].octets_ddp, stats[3].octets_ddp);
6058		sbuf_printf(sb, "\nframesDDP:  %16u %16u %16u %16u",
6059		    stats[0].frames_ddp, stats[1].frames_ddp,
6060		    stats[2].frames_ddp, stats[3].frames_ddp);
6061		sbuf_printf(sb, "\nframesDrop: %16u %16u %16u %16u",
6062		    stats[0].frames_drop, stats[1].frames_drop,
6063		    stats[2].frames_drop, stats[3].frames_drop);
6064	} else {
6065		sbuf_printf(sb, "                   channel 0        channel 1");
6066		sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju",
6067		    stats[0].octets_ddp, stats[1].octets_ddp);
6068		sbuf_printf(sb, "\nframesDDP:  %16u %16u",
6069		    stats[0].frames_ddp, stats[1].frames_ddp);
6070		sbuf_printf(sb, "\nframesDrop: %16u %16u",
6071		    stats[0].frames_drop, stats[1].frames_drop);
6072	}
6073
6074	rc = sbuf_finish(sb);
6075	sbuf_delete(sb);
6076
6077	return (rc);
6078}
6079
6080static int
6081sysctl_hw_sched(SYSCTL_HANDLER_ARGS)
6082{
6083	struct adapter *sc = arg1;
6084	struct sbuf *sb;
6085	int rc, i;
6086	unsigned int map, kbps, ipg, mode;
6087	unsigned int pace_tab[NTX_SCHED];
6088
6089	rc = sysctl_wire_old_buffer(req, 0);
6090	if (rc != 0)
6091		return (rc);
6092
6093	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6094	if (sb == NULL)
6095		return (ENOMEM);
6096
6097	map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP);
6098	mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG));
6099	t4_read_pace_tbl(sc, pace_tab);
6100
6101	sbuf_printf(sb, "Scheduler  Mode   Channel  Rate (Kbps)   "
6102	    "Class IPG (0.1 ns)   Flow IPG (us)");
6103
6104	for (i = 0; i < NTX_SCHED; ++i, map >>= 2) {
6105		t4_get_tx_sched(sc, i, &kbps, &ipg);
6106		sbuf_printf(sb, "\n    %u      %-5s     %u     ", i,
6107		    (mode & (1 << i)) ? "flow" : "class", map & 3);
6108		if (kbps)
6109			sbuf_printf(sb, "%9u     ", kbps);
6110		else
6111			sbuf_printf(sb, " disabled     ");
6112
6113		if (ipg)
6114			sbuf_printf(sb, "%13u        ", ipg);
6115		else
6116			sbuf_printf(sb, "     disabled        ");
6117
6118		if (pace_tab[i])
6119			sbuf_printf(sb, "%10u", pace_tab[i]);
6120		else
6121			sbuf_printf(sb, "  disabled");
6122	}
6123
6124	rc = sbuf_finish(sb);
6125	sbuf_delete(sb);
6126
6127	return (rc);
6128}
6129
6130static int
6131sysctl_lb_stats(SYSCTL_HANDLER_ARGS)
6132{
6133	struct adapter *sc = arg1;
6134	struct sbuf *sb;
6135	int rc, i, j;
6136	uint64_t *p0, *p1;
6137	struct lb_port_stats s[2];
6138	static const char *stat_name[] = {
6139		"OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:",
6140		"UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:",
6141		"Frames128To255:", "Frames256To511:", "Frames512To1023:",
6142		"Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:",
6143		"BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:",
6144		"BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:",
6145		"BG2FramesTrunc:", "BG3FramesTrunc:"
6146	};
6147
6148	rc = sysctl_wire_old_buffer(req, 0);
6149	if (rc != 0)
6150		return (rc);
6151
6152	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6153	if (sb == NULL)
6154		return (ENOMEM);
6155
6156	memset(s, 0, sizeof(s));
6157
6158	for (i = 0; i < sc->chip_params->nchan; i += 2) {
6159		t4_get_lb_stats(sc, i, &s[0]);
6160		t4_get_lb_stats(sc, i + 1, &s[1]);
6161
6162		p0 = &s[0].octets;
6163		p1 = &s[1].octets;
6164		sbuf_printf(sb, "%s                       Loopback %u"
6165		    "           Loopback %u", i == 0 ? "" : "\n", i, i + 1);
6166
6167		for (j = 0; j < nitems(stat_name); j++)
6168			sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j],
6169				   *p0++, *p1++);
6170	}
6171
6172	rc = sbuf_finish(sb);
6173	sbuf_delete(sb);
6174
6175	return (rc);
6176}
6177
6178static int
6179sysctl_linkdnrc(SYSCTL_HANDLER_ARGS)
6180{
6181	int rc = 0;
6182	struct port_info *pi = arg1;
6183	struct sbuf *sb;
6184
6185	rc = sysctl_wire_old_buffer(req, 0);
6186	if (rc != 0)
6187		return(rc);
6188	sb = sbuf_new_for_sysctl(NULL, NULL, 64, req);
6189	if (sb == NULL)
6190		return (ENOMEM);
6191
6192	if (pi->linkdnrc < 0)
6193		sbuf_printf(sb, "n/a");
6194	else
6195		sbuf_printf(sb, "%s", t4_link_down_rc_str(pi->linkdnrc));
6196
6197	rc = sbuf_finish(sb);
6198	sbuf_delete(sb);
6199
6200	return (rc);
6201}
6202
6203struct mem_desc {
6204	unsigned int base;
6205	unsigned int limit;
6206	unsigned int idx;
6207};
6208
6209static int
6210mem_desc_cmp(const void *a, const void *b)
6211{
6212	return ((const struct mem_desc *)a)->base -
6213	       ((const struct mem_desc *)b)->base;
6214}
6215
6216static void
6217mem_region_show(struct sbuf *sb, const char *name, unsigned int from,
6218    unsigned int to)
6219{
6220	unsigned int size;
6221
6222	if (from == to)
6223		return;
6224
6225	size = to - from + 1;
6226	if (size == 0)
6227		return;
6228
6229	/* XXX: need humanize_number(3) in libkern for a more readable 'size' */
6230	sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size);
6231}
6232
6233static int
6234sysctl_meminfo(SYSCTL_HANDLER_ARGS)
6235{
6236	struct adapter *sc = arg1;
6237	struct sbuf *sb;
6238	int rc, i, n;
6239	uint32_t lo, hi, used, alloc;
6240	static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"};
6241	static const char *region[] = {
6242		"DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:",
6243		"Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:",
6244		"Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:",
6245		"TDDP region:", "TPT region:", "STAG region:", "RQ region:",
6246		"RQUDP region:", "PBL region:", "TXPBL region:",
6247		"DBVFIFO region:", "ULPRX state:", "ULPTX state:",
6248		"On-chip queues:"
6249	};
6250	struct mem_desc avail[4];
6251	struct mem_desc mem[nitems(region) + 3];	/* up to 3 holes */
6252	struct mem_desc *md = mem;
6253
6254	rc = sysctl_wire_old_buffer(req, 0);
6255	if (rc != 0)
6256		return (rc);
6257
6258	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6259	if (sb == NULL)
6260		return (ENOMEM);
6261
6262	for (i = 0; i < nitems(mem); i++) {
6263		mem[i].limit = 0;
6264		mem[i].idx = i;
6265	}
6266
6267	/* Find and sort the populated memory ranges */
6268	i = 0;
6269	lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
6270	if (lo & F_EDRAM0_ENABLE) {
6271		hi = t4_read_reg(sc, A_MA_EDRAM0_BAR);
6272		avail[i].base = G_EDRAM0_BASE(hi) << 20;
6273		avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20);
6274		avail[i].idx = 0;
6275		i++;
6276	}
6277	if (lo & F_EDRAM1_ENABLE) {
6278		hi = t4_read_reg(sc, A_MA_EDRAM1_BAR);
6279		avail[i].base = G_EDRAM1_BASE(hi) << 20;
6280		avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20);
6281		avail[i].idx = 1;
6282		i++;
6283	}
6284	if (lo & F_EXT_MEM_ENABLE) {
6285		hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
6286		avail[i].base = G_EXT_MEM_BASE(hi) << 20;
6287		avail[i].limit = avail[i].base +
6288		    (G_EXT_MEM_SIZE(hi) << 20);
6289		avail[i].idx = is_t5(sc) ? 3 : 2;	/* Call it MC0 for T5 */
6290		i++;
6291	}
6292	if (is_t5(sc) && lo & F_EXT_MEM1_ENABLE) {
6293		hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
6294		avail[i].base = G_EXT_MEM1_BASE(hi) << 20;
6295		avail[i].limit = avail[i].base +
6296		    (G_EXT_MEM1_SIZE(hi) << 20);
6297		avail[i].idx = 4;
6298		i++;
6299	}
6300	if (!i)                                    /* no memory available */
6301		return 0;
6302	qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp);
6303
6304	(md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR);
6305	(md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR);
6306	(md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR);
6307	(md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
6308	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE);
6309	(md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE);
6310	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE);
6311	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE);
6312	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE);
6313
6314	/* the next few have explicit upper bounds */
6315	md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE);
6316	md->limit = md->base - 1 +
6317		    t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) *
6318		    G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE));
6319	md++;
6320
6321	md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE);
6322	md->limit = md->base - 1 +
6323		    t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) *
6324		    G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE));
6325	md++;
6326
6327	if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
6328		if (chip_id(sc) <= CHELSIO_T5)
6329			md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE);
6330		else
6331			md->base = t4_read_reg(sc, A_LE_DB_HASH_TBL_BASE_ADDR);
6332		md->limit = 0;
6333	} else {
6334		md->base = 0;
6335		md->idx = nitems(region);  /* hide it */
6336	}
6337	md++;
6338
6339#define ulp_region(reg) \
6340	md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\
6341	(md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT)
6342
6343	ulp_region(RX_ISCSI);
6344	ulp_region(RX_TDDP);
6345	ulp_region(TX_TPT);
6346	ulp_region(RX_STAG);
6347	ulp_region(RX_RQ);
6348	ulp_region(RX_RQUDP);
6349	ulp_region(RX_PBL);
6350	ulp_region(TX_PBL);
6351#undef ulp_region
6352
6353	md->base = 0;
6354	md->idx = nitems(region);
6355	if (!is_t4(sc)) {
6356		uint32_t size = 0;
6357		uint32_t sge_ctrl = t4_read_reg(sc, A_SGE_CONTROL2);
6358		uint32_t fifo_size = t4_read_reg(sc, A_SGE_DBVFIFO_SIZE);
6359
6360		if (is_t5(sc)) {
6361			if (sge_ctrl & F_VFIFO_ENABLE)
6362				size = G_DBVFIFO_SIZE(fifo_size);
6363		} else
6364			size = G_T6_DBVFIFO_SIZE(fifo_size);
6365
6366		if (size) {
6367			md->base = G_BASEADDR(t4_read_reg(sc,
6368			    A_SGE_DBVFIFO_BADDR));
6369			md->limit = md->base + (size << 2) - 1;
6370		}
6371	}
6372	md++;
6373
6374	md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE);
6375	md->limit = 0;
6376	md++;
6377	md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE);
6378	md->limit = 0;
6379	md++;
6380
6381	md->base = sc->vres.ocq.start;
6382	if (sc->vres.ocq.size)
6383		md->limit = md->base + sc->vres.ocq.size - 1;
6384	else
6385		md->idx = nitems(region);  /* hide it */
6386	md++;
6387
6388	/* add any address-space holes, there can be up to 3 */
6389	for (n = 0; n < i - 1; n++)
6390		if (avail[n].limit < avail[n + 1].base)
6391			(md++)->base = avail[n].limit;
6392	if (avail[n].limit)
6393		(md++)->base = avail[n].limit;
6394
6395	n = md - mem;
6396	qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp);
6397
6398	for (lo = 0; lo < i; lo++)
6399		mem_region_show(sb, memory[avail[lo].idx], avail[lo].base,
6400				avail[lo].limit - 1);
6401
6402	sbuf_printf(sb, "\n");
6403	for (i = 0; i < n; i++) {
6404		if (mem[i].idx >= nitems(region))
6405			continue;                        /* skip holes */
6406		if (!mem[i].limit)
6407			mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0;
6408		mem_region_show(sb, region[mem[i].idx], mem[i].base,
6409				mem[i].limit);
6410	}
6411
6412	sbuf_printf(sb, "\n");
6413	lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR);
6414	hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1;
6415	mem_region_show(sb, "uP RAM:", lo, hi);
6416
6417	lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR);
6418	hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1;
6419	mem_region_show(sb, "uP Extmem2:", lo, hi);
6420
6421	lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE);
6422	sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n",
6423		   G_PMRXMAXPAGE(lo),
6424		   t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10,
6425		   (lo & F_PMRXNUMCHN) ? 2 : 1);
6426
6427	lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE);
6428	hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE);
6429	sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n",
6430		   G_PMTXMAXPAGE(lo),
6431		   hi >= (1 << 20) ? (hi >> 20) : (hi >> 10),
6432		   hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo));
6433	sbuf_printf(sb, "%u p-structs\n",
6434		   t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT));
6435
6436	for (i = 0; i < 4; i++) {
6437		if (chip_id(sc) > CHELSIO_T5)
6438			lo = t4_read_reg(sc, A_MPS_RX_MAC_BG_PG_CNT0 + i * 4);
6439		else
6440			lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4);
6441		if (is_t5(sc)) {
6442			used = G_T5_USED(lo);
6443			alloc = G_T5_ALLOC(lo);
6444		} else {
6445			used = G_USED(lo);
6446			alloc = G_ALLOC(lo);
6447		}
6448		/* For T6 these are MAC buffer groups */
6449		sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated",
6450		    i, used, alloc);
6451	}
6452	for (i = 0; i < sc->chip_params->nchan; i++) {
6453		if (chip_id(sc) > CHELSIO_T5)
6454			lo = t4_read_reg(sc, A_MPS_RX_LPBK_BG_PG_CNT0 + i * 4);
6455		else
6456			lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4);
6457		if (is_t5(sc)) {
6458			used = G_T5_USED(lo);
6459			alloc = G_T5_ALLOC(lo);
6460		} else {
6461			used = G_USED(lo);
6462			alloc = G_ALLOC(lo);
6463		}
6464		/* For T6 these are MAC buffer groups */
6465		sbuf_printf(sb,
6466		    "\nLoopback %d using %u pages out of %u allocated",
6467		    i, used, alloc);
6468	}
6469
6470	rc = sbuf_finish(sb);
6471	sbuf_delete(sb);
6472
6473	return (rc);
6474}
6475
6476static inline void
6477tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask)
6478{
6479	*mask = x | y;
6480	y = htobe64(y);
6481	memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN);
6482}
6483
6484static int
6485sysctl_mps_tcam(SYSCTL_HANDLER_ARGS)
6486{
6487	struct adapter *sc = arg1;
6488	struct sbuf *sb;
6489	int rc, i;
6490
6491	MPASS(chip_id(sc) <= CHELSIO_T5);
6492
6493	rc = sysctl_wire_old_buffer(req, 0);
6494	if (rc != 0)
6495		return (rc);
6496
6497	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6498	if (sb == NULL)
6499		return (ENOMEM);
6500
6501	sbuf_printf(sb,
6502	    "Idx  Ethernet address     Mask     Vld Ports PF"
6503	    "  VF              Replication             P0 P1 P2 P3  ML");
6504	for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
6505		uint64_t tcamx, tcamy, mask;
6506		uint32_t cls_lo, cls_hi;
6507		uint8_t addr[ETHER_ADDR_LEN];
6508
6509		tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i));
6510		tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i));
6511		if (tcamx & tcamy)
6512			continue;
6513		tcamxy2valmask(tcamx, tcamy, addr, &mask);
6514		cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
6515		cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
6516		sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx"
6517			   "  %c   %#x%4u%4d", i, addr[0], addr[1], addr[2],
6518			   addr[3], addr[4], addr[5], (uintmax_t)mask,
6519			   (cls_lo & F_SRAM_VLD) ? 'Y' : 'N',
6520			   G_PORTMAP(cls_hi), G_PF(cls_lo),
6521			   (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1);
6522
6523		if (cls_lo & F_REPLICATE) {
6524			struct fw_ldst_cmd ldst_cmd;
6525
6526			memset(&ldst_cmd, 0, sizeof(ldst_cmd));
6527			ldst_cmd.op_to_addrspace =
6528			    htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
6529				F_FW_CMD_REQUEST | F_FW_CMD_READ |
6530				V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
6531			ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
6532			ldst_cmd.u.mps.rplc.fid_idx =
6533			    htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
6534				V_FW_LDST_CMD_IDX(i));
6535
6536			rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
6537			    "t4mps");
6538			if (rc)
6539				break;
6540			rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
6541			    sizeof(ldst_cmd), &ldst_cmd);
6542			end_synchronized_op(sc, 0);
6543
6544			if (rc != 0) {
6545				sbuf_printf(sb, "%36d", rc);
6546				rc = 0;
6547			} else {
6548				sbuf_printf(sb, " %08x %08x %08x %08x",
6549				    be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
6550				    be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
6551				    be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
6552				    be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
6553			}
6554		} else
6555			sbuf_printf(sb, "%36s", "");
6556
6557		sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo),
6558		    G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo),
6559		    G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf);
6560	}
6561
6562	if (rc)
6563		(void) sbuf_finish(sb);
6564	else
6565		rc = sbuf_finish(sb);
6566	sbuf_delete(sb);
6567
6568	return (rc);
6569}
6570
6571static int
6572sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS)
6573{
6574	struct adapter *sc = arg1;
6575	struct sbuf *sb;
6576	int rc, i;
6577
6578	MPASS(chip_id(sc) > CHELSIO_T5);
6579
6580	rc = sysctl_wire_old_buffer(req, 0);
6581	if (rc != 0)
6582		return (rc);
6583
6584	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6585	if (sb == NULL)
6586		return (ENOMEM);
6587
6588	sbuf_printf(sb, "Idx  Ethernet address     Mask       VNI   Mask"
6589	    "   IVLAN Vld DIP_Hit   Lookup  Port Vld Ports PF  VF"
6590	    "                           Replication"
6591	    "                                    P0 P1 P2 P3  ML\n");
6592
6593	for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
6594		uint8_t dip_hit, vlan_vld, lookup_type, port_num;
6595		uint16_t ivlan;
6596		uint64_t tcamx, tcamy, val, mask;
6597		uint32_t cls_lo, cls_hi, ctl, data2, vnix, vniy;
6598		uint8_t addr[ETHER_ADDR_LEN];
6599
6600		ctl = V_CTLREQID(1) | V_CTLCMDTYPE(0) | V_CTLXYBITSEL(0);
6601		if (i < 256)
6602			ctl |= V_CTLTCAMINDEX(i) | V_CTLTCAMSEL(0);
6603		else
6604			ctl |= V_CTLTCAMINDEX(i - 256) | V_CTLTCAMSEL(1);
6605		t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
6606		val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
6607		tcamy = G_DMACH(val) << 32;
6608		tcamy |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
6609		data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
6610		lookup_type = G_DATALKPTYPE(data2);
6611		port_num = G_DATAPORTNUM(data2);
6612		if (lookup_type && lookup_type != M_DATALKPTYPE) {
6613			/* Inner header VNI */
6614			vniy = ((data2 & F_DATAVIDH2) << 23) |
6615				       (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
6616			dip_hit = data2 & F_DATADIPHIT;
6617			vlan_vld = 0;
6618		} else {
6619			vniy = 0;
6620			dip_hit = 0;
6621			vlan_vld = data2 & F_DATAVIDH2;
6622			ivlan = G_VIDL(val);
6623		}
6624
6625		ctl |= V_CTLXYBITSEL(1);
6626		t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
6627		val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
6628		tcamx = G_DMACH(val) << 32;
6629		tcamx |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
6630		data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
6631		if (lookup_type && lookup_type != M_DATALKPTYPE) {
6632			/* Inner header VNI mask */
6633			vnix = ((data2 & F_DATAVIDH2) << 23) |
6634			       (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
6635		} else
6636			vnix = 0;
6637
6638		if (tcamx & tcamy)
6639			continue;
6640		tcamxy2valmask(tcamx, tcamy, addr, &mask);
6641
6642		cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
6643		cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
6644
6645		if (lookup_type && lookup_type != M_DATALKPTYPE) {
6646			sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
6647			    "%012jx %06x %06x    -    -   %3c"
6648			    "      'I'  %4x   %3c   %#x%4u%4d", i, addr[0],
6649			    addr[1], addr[2], addr[3], addr[4], addr[5],
6650			    (uintmax_t)mask, vniy, vnix, dip_hit ? 'Y' : 'N',
6651			    port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
6652			    G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
6653			    cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
6654		} else {
6655			sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
6656			    "%012jx    -       -   ", i, addr[0], addr[1],
6657			    addr[2], addr[3], addr[4], addr[5],
6658			    (uintmax_t)mask);
6659
6660			if (vlan_vld)
6661				sbuf_printf(sb, "%4u   Y     ", ivlan);
6662			else
6663				sbuf_printf(sb, "  -    N     ");
6664
6665			sbuf_printf(sb, "-      %3c  %4x   %3c   %#x%4u%4d",
6666			    lookup_type ? 'I' : 'O', port_num,
6667			    cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
6668			    G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
6669			    cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
6670		}
6671
6672
6673		if (cls_lo & F_T6_REPLICATE) {
6674			struct fw_ldst_cmd ldst_cmd;
6675
6676			memset(&ldst_cmd, 0, sizeof(ldst_cmd));
6677			ldst_cmd.op_to_addrspace =
6678			    htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
6679				F_FW_CMD_REQUEST | F_FW_CMD_READ |
6680				V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
6681			ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
6682			ldst_cmd.u.mps.rplc.fid_idx =
6683			    htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
6684				V_FW_LDST_CMD_IDX(i));
6685
6686			rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
6687			    "t6mps");
6688			if (rc)
6689				break;
6690			rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
6691			    sizeof(ldst_cmd), &ldst_cmd);
6692			end_synchronized_op(sc, 0);
6693
6694			if (rc != 0) {
6695				sbuf_printf(sb, "%72d", rc);
6696				rc = 0;
6697			} else {
6698				sbuf_printf(sb, " %08x %08x %08x %08x"
6699				    " %08x %08x %08x %08x",
6700				    be32toh(ldst_cmd.u.mps.rplc.rplc255_224),
6701				    be32toh(ldst_cmd.u.mps.rplc.rplc223_192),
6702				    be32toh(ldst_cmd.u.mps.rplc.rplc191_160),
6703				    be32toh(ldst_cmd.u.mps.rplc.rplc159_128),
6704				    be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
6705				    be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
6706				    be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
6707				    be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
6708			}
6709		} else
6710			sbuf_printf(sb, "%72s", "");
6711
6712		sbuf_printf(sb, "%4u%3u%3u%3u %#x",
6713		    G_T6_SRAM_PRIO0(cls_lo), G_T6_SRAM_PRIO1(cls_lo),
6714		    G_T6_SRAM_PRIO2(cls_lo), G_T6_SRAM_PRIO3(cls_lo),
6715		    (cls_lo >> S_T6_MULTILISTEN0) & 0xf);
6716	}
6717
6718	if (rc)
6719		(void) sbuf_finish(sb);
6720	else
6721		rc = sbuf_finish(sb);
6722	sbuf_delete(sb);
6723
6724	return (rc);
6725}
6726
6727static int
6728sysctl_path_mtus(SYSCTL_HANDLER_ARGS)
6729{
6730	struct adapter *sc = arg1;
6731	struct sbuf *sb;
6732	int rc;
6733	uint16_t mtus[NMTUS];
6734
6735	rc = sysctl_wire_old_buffer(req, 0);
6736	if (rc != 0)
6737		return (rc);
6738
6739	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6740	if (sb == NULL)
6741		return (ENOMEM);
6742
6743	t4_read_mtu_tbl(sc, mtus, NULL);
6744
6745	sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u",
6746	    mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6],
6747	    mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13],
6748	    mtus[14], mtus[15]);
6749
6750	rc = sbuf_finish(sb);
6751	sbuf_delete(sb);
6752
6753	return (rc);
6754}
6755
6756static int
6757sysctl_pm_stats(SYSCTL_HANDLER_ARGS)
6758{
6759	struct adapter *sc = arg1;
6760	struct sbuf *sb;
6761	int rc, i;
6762	uint32_t tx_cnt[MAX_PM_NSTATS], rx_cnt[MAX_PM_NSTATS];
6763	uint64_t tx_cyc[MAX_PM_NSTATS], rx_cyc[MAX_PM_NSTATS];
6764	static const char *tx_stats[MAX_PM_NSTATS] = {
6765		"Read:", "Write bypass:", "Write mem:", "Bypass + mem:",
6766		"Tx FIFO wait", NULL, "Tx latency"
6767	};
6768	static const char *rx_stats[MAX_PM_NSTATS] = {
6769		"Read:", "Write bypass:", "Write mem:", "Flush:",
6770		" Rx FIFO wait", NULL, "Rx latency"
6771	};
6772
6773	rc = sysctl_wire_old_buffer(req, 0);
6774	if (rc != 0)
6775		return (rc);
6776
6777	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6778	if (sb == NULL)
6779		return (ENOMEM);
6780
6781	t4_pmtx_get_stats(sc, tx_cnt, tx_cyc);
6782	t4_pmrx_get_stats(sc, rx_cnt, rx_cyc);
6783
6784	sbuf_printf(sb, "                Tx pcmds             Tx bytes");
6785	for (i = 0; i < 4; i++) {
6786		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
6787		    tx_cyc[i]);
6788	}
6789
6790	sbuf_printf(sb, "\n                Rx pcmds             Rx bytes");
6791	for (i = 0; i < 4; i++) {
6792		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
6793		    rx_cyc[i]);
6794	}
6795
6796	if (chip_id(sc) > CHELSIO_T5) {
6797		sbuf_printf(sb,
6798		    "\n              Total wait      Total occupancy");
6799		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
6800		    tx_cyc[i]);
6801		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
6802		    rx_cyc[i]);
6803
6804		i += 2;
6805		MPASS(i < nitems(tx_stats));
6806
6807		sbuf_printf(sb,
6808		    "\n                   Reads           Total wait");
6809		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
6810		    tx_cyc[i]);
6811		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
6812		    rx_cyc[i]);
6813	}
6814
6815	rc = sbuf_finish(sb);
6816	sbuf_delete(sb);
6817
6818	return (rc);
6819}
6820
6821static int
6822sysctl_rdma_stats(SYSCTL_HANDLER_ARGS)
6823{
6824	struct adapter *sc = arg1;
6825	struct sbuf *sb;
6826	int rc;
6827	struct tp_rdma_stats stats;
6828
6829	rc = sysctl_wire_old_buffer(req, 0);
6830	if (rc != 0)
6831		return (rc);
6832
6833	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6834	if (sb == NULL)
6835		return (ENOMEM);
6836
6837	mtx_lock(&sc->reg_lock);
6838	t4_tp_get_rdma_stats(sc, &stats);
6839	mtx_unlock(&sc->reg_lock);
6840
6841	sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod);
6842	sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt);
6843
6844	rc = sbuf_finish(sb);
6845	sbuf_delete(sb);
6846
6847	return (rc);
6848}
6849
6850static int
6851sysctl_tcp_stats(SYSCTL_HANDLER_ARGS)
6852{
6853	struct adapter *sc = arg1;
6854	struct sbuf *sb;
6855	int rc;
6856	struct tp_tcp_stats v4, v6;
6857
6858	rc = sysctl_wire_old_buffer(req, 0);
6859	if (rc != 0)
6860		return (rc);
6861
6862	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6863	if (sb == NULL)
6864		return (ENOMEM);
6865
6866	mtx_lock(&sc->reg_lock);
6867	t4_tp_get_tcp_stats(sc, &v4, &v6);
6868	mtx_unlock(&sc->reg_lock);
6869
6870	sbuf_printf(sb,
6871	    "                                IP                 IPv6\n");
6872	sbuf_printf(sb, "OutRsts:      %20u %20u\n",
6873	    v4.tcp_out_rsts, v6.tcp_out_rsts);
6874	sbuf_printf(sb, "InSegs:       %20ju %20ju\n",
6875	    v4.tcp_in_segs, v6.tcp_in_segs);
6876	sbuf_printf(sb, "OutSegs:      %20ju %20ju\n",
6877	    v4.tcp_out_segs, v6.tcp_out_segs);
6878	sbuf_printf(sb, "RetransSegs:  %20ju %20ju",
6879	    v4.tcp_retrans_segs, v6.tcp_retrans_segs);
6880
6881	rc = sbuf_finish(sb);
6882	sbuf_delete(sb);
6883
6884	return (rc);
6885}
6886
6887static int
6888sysctl_tids(SYSCTL_HANDLER_ARGS)
6889{
6890	struct adapter *sc = arg1;
6891	struct sbuf *sb;
6892	int rc;
6893	struct tid_info *t = &sc->tids;
6894
6895	rc = sysctl_wire_old_buffer(req, 0);
6896	if (rc != 0)
6897		return (rc);
6898
6899	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6900	if (sb == NULL)
6901		return (ENOMEM);
6902
6903	if (t->natids) {
6904		sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1,
6905		    t->atids_in_use);
6906	}
6907
6908	if (t->ntids) {
6909		if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
6910			uint32_t b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4;
6911
6912			if (b) {
6913				sbuf_printf(sb, "TID range: 0-%u, %u-%u", b - 1,
6914				    t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4,
6915				    t->ntids - 1);
6916			} else {
6917				sbuf_printf(sb, "TID range: %u-%u",
6918				    t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4,
6919				    t->ntids - 1);
6920			}
6921		} else
6922			sbuf_printf(sb, "TID range: 0-%u", t->ntids - 1);
6923		sbuf_printf(sb, ", in use: %u\n",
6924		    atomic_load_acq_int(&t->tids_in_use));
6925	}
6926
6927	if (t->nstids) {
6928		sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base,
6929		    t->stid_base + t->nstids - 1, t->stids_in_use);
6930	}
6931
6932	if (t->nftids) {
6933		sbuf_printf(sb, "FTID range: %u-%u\n", t->ftid_base,
6934		    t->ftid_base + t->nftids - 1);
6935	}
6936
6937	if (t->netids) {
6938		sbuf_printf(sb, "ETID range: %u-%u\n", t->etid_base,
6939		    t->etid_base + t->netids - 1);
6940	}
6941
6942	sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users",
6943	    t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4),
6944	    t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6));
6945
6946	rc = sbuf_finish(sb);
6947	sbuf_delete(sb);
6948
6949	return (rc);
6950}
6951
6952static int
6953sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS)
6954{
6955	struct adapter *sc = arg1;
6956	struct sbuf *sb;
6957	int rc;
6958	struct tp_err_stats stats;
6959
6960	rc = sysctl_wire_old_buffer(req, 0);
6961	if (rc != 0)
6962		return (rc);
6963
6964	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6965	if (sb == NULL)
6966		return (ENOMEM);
6967
6968	mtx_lock(&sc->reg_lock);
6969	t4_tp_get_err_stats(sc, &stats);
6970	mtx_unlock(&sc->reg_lock);
6971
6972	if (sc->chip_params->nchan > 2) {
6973		sbuf_printf(sb, "                 channel 0  channel 1"
6974		    "  channel 2  channel 3\n");
6975		sbuf_printf(sb, "macInErrs:      %10u %10u %10u %10u\n",
6976		    stats.mac_in_errs[0], stats.mac_in_errs[1],
6977		    stats.mac_in_errs[2], stats.mac_in_errs[3]);
6978		sbuf_printf(sb, "hdrInErrs:      %10u %10u %10u %10u\n",
6979		    stats.hdr_in_errs[0], stats.hdr_in_errs[1],
6980		    stats.hdr_in_errs[2], stats.hdr_in_errs[3]);
6981		sbuf_printf(sb, "tcpInErrs:      %10u %10u %10u %10u\n",
6982		    stats.tcp_in_errs[0], stats.tcp_in_errs[1],
6983		    stats.tcp_in_errs[2], stats.tcp_in_errs[3]);
6984		sbuf_printf(sb, "tcp6InErrs:     %10u %10u %10u %10u\n",
6985		    stats.tcp6_in_errs[0], stats.tcp6_in_errs[1],
6986		    stats.tcp6_in_errs[2], stats.tcp6_in_errs[3]);
6987		sbuf_printf(sb, "tnlCongDrops:   %10u %10u %10u %10u\n",
6988		    stats.tnl_cong_drops[0], stats.tnl_cong_drops[1],
6989		    stats.tnl_cong_drops[2], stats.tnl_cong_drops[3]);
6990		sbuf_printf(sb, "tnlTxDrops:     %10u %10u %10u %10u\n",
6991		    stats.tnl_tx_drops[0], stats.tnl_tx_drops[1],
6992		    stats.tnl_tx_drops[2], stats.tnl_tx_drops[3]);
6993		sbuf_printf(sb, "ofldVlanDrops:  %10u %10u %10u %10u\n",
6994		    stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1],
6995		    stats.ofld_vlan_drops[2], stats.ofld_vlan_drops[3]);
6996		sbuf_printf(sb, "ofldChanDrops:  %10u %10u %10u %10u\n\n",
6997		    stats.ofld_chan_drops[0], stats.ofld_chan_drops[1],
6998		    stats.ofld_chan_drops[2], stats.ofld_chan_drops[3]);
6999	} else {
7000		sbuf_printf(sb, "                 channel 0  channel 1\n");
7001		sbuf_printf(sb, "macInErrs:      %10u %10u\n",
7002		    stats.mac_in_errs[0], stats.mac_in_errs[1]);
7003		sbuf_printf(sb, "hdrInErrs:      %10u %10u\n",
7004		    stats.hdr_in_errs[0], stats.hdr_in_errs[1]);
7005		sbuf_printf(sb, "tcpInErrs:      %10u %10u\n",
7006		    stats.tcp_in_errs[0], stats.tcp_in_errs[1]);
7007		sbuf_printf(sb, "tcp6InErrs:     %10u %10u\n",
7008		    stats.tcp6_in_errs[0], stats.tcp6_in_errs[1]);
7009		sbuf_printf(sb, "tnlCongDrops:   %10u %10u\n",
7010		    stats.tnl_cong_drops[0], stats.tnl_cong_drops[1]);
7011		sbuf_printf(sb, "tnlTxDrops:     %10u %10u\n",
7012		    stats.tnl_tx_drops[0], stats.tnl_tx_drops[1]);
7013		sbuf_printf(sb, "ofldVlanDrops:  %10u %10u\n",
7014		    stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1]);
7015		sbuf_printf(sb, "ofldChanDrops:  %10u %10u\n\n",
7016		    stats.ofld_chan_drops[0], stats.ofld_chan_drops[1]);
7017	}
7018
7019	sbuf_printf(sb, "ofldNoNeigh:    %u\nofldCongDefer:  %u",
7020	    stats.ofld_no_neigh, stats.ofld_cong_defer);
7021
7022	rc = sbuf_finish(sb);
7023	sbuf_delete(sb);
7024
7025	return (rc);
7026}
7027
7028static int
7029sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS)
7030{
7031	struct adapter *sc = arg1;
7032	struct tp_params *tpp = &sc->params.tp;
7033	u_int mask;
7034	int rc;
7035
7036	mask = tpp->la_mask >> 16;
7037	rc = sysctl_handle_int(oidp, &mask, 0, req);
7038	if (rc != 0 || req->newptr == NULL)
7039		return (rc);
7040	if (mask > 0xffff)
7041		return (EINVAL);
7042	tpp->la_mask = mask << 16;
7043	t4_set_reg_field(sc, A_TP_DBG_LA_CONFIG, 0xffff0000U, tpp->la_mask);
7044
7045	return (0);
7046}
7047
7048struct field_desc {
7049	const char *name;
7050	u_int start;
7051	u_int width;
7052};
7053
7054static void
7055field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f)
7056{
7057	char buf[32];
7058	int line_size = 0;
7059
7060	while (f->name) {
7061		uint64_t mask = (1ULL << f->width) - 1;
7062		int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name,
7063		    ((uintmax_t)v >> f->start) & mask);
7064
7065		if (line_size + len >= 79) {
7066			line_size = 8;
7067			sbuf_printf(sb, "\n        ");
7068		}
7069		sbuf_printf(sb, "%s ", buf);
7070		line_size += len + 1;
7071		f++;
7072	}
7073	sbuf_printf(sb, "\n");
7074}
7075
7076static const struct field_desc tp_la0[] = {
7077	{ "RcfOpCodeOut", 60, 4 },
7078	{ "State", 56, 4 },
7079	{ "WcfState", 52, 4 },
7080	{ "RcfOpcSrcOut", 50, 2 },
7081	{ "CRxError", 49, 1 },
7082	{ "ERxError", 48, 1 },
7083	{ "SanityFailed", 47, 1 },
7084	{ "SpuriousMsg", 46, 1 },
7085	{ "FlushInputMsg", 45, 1 },
7086	{ "FlushInputCpl", 44, 1 },
7087	{ "RssUpBit", 43, 1 },
7088	{ "RssFilterHit", 42, 1 },
7089	{ "Tid", 32, 10 },
7090	{ "InitTcb", 31, 1 },
7091	{ "LineNumber", 24, 7 },
7092	{ "Emsg", 23, 1 },
7093	{ "EdataOut", 22, 1 },
7094	{ "Cmsg", 21, 1 },
7095	{ "CdataOut", 20, 1 },
7096	{ "EreadPdu", 19, 1 },
7097	{ "CreadPdu", 18, 1 },
7098	{ "TunnelPkt", 17, 1 },
7099	{ "RcfPeerFin", 16, 1 },
7100	{ "RcfReasonOut", 12, 4 },
7101	{ "TxCchannel", 10, 2 },
7102	{ "RcfTxChannel", 8, 2 },
7103	{ "RxEchannel", 6, 2 },
7104	{ "RcfRxChannel", 5, 1 },
7105	{ "RcfDataOutSrdy", 4, 1 },
7106	{ "RxDvld", 3, 1 },
7107	{ "RxOoDvld", 2, 1 },
7108	{ "RxCongestion", 1, 1 },
7109	{ "TxCongestion", 0, 1 },
7110	{ NULL }
7111};
7112
7113static const struct field_desc tp_la1[] = {
7114	{ "CplCmdIn", 56, 8 },
7115	{ "CplCmdOut", 48, 8 },
7116	{ "ESynOut", 47, 1 },
7117	{ "EAckOut", 46, 1 },
7118	{ "EFinOut", 45, 1 },
7119	{ "ERstOut", 44, 1 },
7120	{ "SynIn", 43, 1 },
7121	{ "AckIn", 42, 1 },
7122	{ "FinIn", 41, 1 },
7123	{ "RstIn", 40, 1 },
7124	{ "DataIn", 39, 1 },
7125	{ "DataInVld", 38, 1 },
7126	{ "PadIn", 37, 1 },
7127	{ "RxBufEmpty", 36, 1 },
7128	{ "RxDdp", 35, 1 },
7129	{ "RxFbCongestion", 34, 1 },
7130	{ "TxFbCongestion", 33, 1 },
7131	{ "TxPktSumSrdy", 32, 1 },
7132	{ "RcfUlpType", 28, 4 },
7133	{ "Eread", 27, 1 },
7134	{ "Ebypass", 26, 1 },
7135	{ "Esave", 25, 1 },
7136	{ "Static0", 24, 1 },
7137	{ "Cread", 23, 1 },
7138	{ "Cbypass", 22, 1 },
7139	{ "Csave", 21, 1 },
7140	{ "CPktOut", 20, 1 },
7141	{ "RxPagePoolFull", 18, 2 },
7142	{ "RxLpbkPkt", 17, 1 },
7143	{ "TxLpbkPkt", 16, 1 },
7144	{ "RxVfValid", 15, 1 },
7145	{ "SynLearned", 14, 1 },
7146	{ "SetDelEntry", 13, 1 },
7147	{ "SetInvEntry", 12, 1 },
7148	{ "CpcmdDvld", 11, 1 },
7149	{ "CpcmdSave", 10, 1 },
7150	{ "RxPstructsFull", 8, 2 },
7151	{ "EpcmdDvld", 7, 1 },
7152	{ "EpcmdFlush", 6, 1 },
7153	{ "EpcmdTrimPrefix", 5, 1 },
7154	{ "EpcmdTrimPostfix", 4, 1 },
7155	{ "ERssIp4Pkt", 3, 1 },
7156	{ "ERssIp6Pkt", 2, 1 },
7157	{ "ERssTcpUdpPkt", 1, 1 },
7158	{ "ERssFceFipPkt", 0, 1 },
7159	{ NULL }
7160};
7161
7162static const struct field_desc tp_la2[] = {
7163	{ "CplCmdIn", 56, 8 },
7164	{ "MpsVfVld", 55, 1 },
7165	{ "MpsPf", 52, 3 },
7166	{ "MpsVf", 44, 8 },
7167	{ "SynIn", 43, 1 },
7168	{ "AckIn", 42, 1 },
7169	{ "FinIn", 41, 1 },
7170	{ "RstIn", 40, 1 },
7171	{ "DataIn", 39, 1 },
7172	{ "DataInVld", 38, 1 },
7173	{ "PadIn", 37, 1 },
7174	{ "RxBufEmpty", 36, 1 },
7175	{ "RxDdp", 35, 1 },
7176	{ "RxFbCongestion", 34, 1 },
7177	{ "TxFbCongestion", 33, 1 },
7178	{ "TxPktSumSrdy", 32, 1 },
7179	{ "RcfUlpType", 28, 4 },
7180	{ "Eread", 27, 1 },
7181	{ "Ebypass", 26, 1 },
7182	{ "Esave", 25, 1 },
7183	{ "Static0", 24, 1 },
7184	{ "Cread", 23, 1 },
7185	{ "Cbypass", 22, 1 },
7186	{ "Csave", 21, 1 },
7187	{ "CPktOut", 20, 1 },
7188	{ "RxPagePoolFull", 18, 2 },
7189	{ "RxLpbkPkt", 17, 1 },
7190	{ "TxLpbkPkt", 16, 1 },
7191	{ "RxVfValid", 15, 1 },
7192	{ "SynLearned", 14, 1 },
7193	{ "SetDelEntry", 13, 1 },
7194	{ "SetInvEntry", 12, 1 },
7195	{ "CpcmdDvld", 11, 1 },
7196	{ "CpcmdSave", 10, 1 },
7197	{ "RxPstructsFull", 8, 2 },
7198	{ "EpcmdDvld", 7, 1 },
7199	{ "EpcmdFlush", 6, 1 },
7200	{ "EpcmdTrimPrefix", 5, 1 },
7201	{ "EpcmdTrimPostfix", 4, 1 },
7202	{ "ERssIp4Pkt", 3, 1 },
7203	{ "ERssIp6Pkt", 2, 1 },
7204	{ "ERssTcpUdpPkt", 1, 1 },
7205	{ "ERssFceFipPkt", 0, 1 },
7206	{ NULL }
7207};
7208
7209static void
7210tp_la_show(struct sbuf *sb, uint64_t *p, int idx)
7211{
7212
7213	field_desc_show(sb, *p, tp_la0);
7214}
7215
7216static void
7217tp_la_show2(struct sbuf *sb, uint64_t *p, int idx)
7218{
7219
7220	if (idx)
7221		sbuf_printf(sb, "\n");
7222	field_desc_show(sb, p[0], tp_la0);
7223	if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
7224		field_desc_show(sb, p[1], tp_la0);
7225}
7226
7227static void
7228tp_la_show3(struct sbuf *sb, uint64_t *p, int idx)
7229{
7230
7231	if (idx)
7232		sbuf_printf(sb, "\n");
7233	field_desc_show(sb, p[0], tp_la0);
7234	if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
7235		field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1);
7236}
7237
7238static int
7239sysctl_tp_la(SYSCTL_HANDLER_ARGS)
7240{
7241	struct adapter *sc = arg1;
7242	struct sbuf *sb;
7243	uint64_t *buf, *p;
7244	int rc;
7245	u_int i, inc;
7246	void (*show_func)(struct sbuf *, uint64_t *, int);
7247
7248	rc = sysctl_wire_old_buffer(req, 0);
7249	if (rc != 0)
7250		return (rc);
7251
7252	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7253	if (sb == NULL)
7254		return (ENOMEM);
7255
7256	buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK);
7257
7258	t4_tp_read_la(sc, buf, NULL);
7259	p = buf;
7260
7261	switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) {
7262	case 2:
7263		inc = 2;
7264		show_func = tp_la_show2;
7265		break;
7266	case 3:
7267		inc = 2;
7268		show_func = tp_la_show3;
7269		break;
7270	default:
7271		inc = 1;
7272		show_func = tp_la_show;
7273	}
7274
7275	for (i = 0; i < TPLA_SIZE / inc; i++, p += inc)
7276		(*show_func)(sb, p, i);
7277
7278	rc = sbuf_finish(sb);
7279	sbuf_delete(sb);
7280	free(buf, M_CXGBE);
7281	return (rc);
7282}
7283
7284static int
7285sysctl_tx_rate(SYSCTL_HANDLER_ARGS)
7286{
7287	struct adapter *sc = arg1;
7288	struct sbuf *sb;
7289	int rc;
7290	u64 nrate[MAX_NCHAN], orate[MAX_NCHAN];
7291
7292	rc = sysctl_wire_old_buffer(req, 0);
7293	if (rc != 0)
7294		return (rc);
7295
7296	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7297	if (sb == NULL)
7298		return (ENOMEM);
7299
7300	t4_get_chan_txrate(sc, nrate, orate);
7301
7302	if (sc->chip_params->nchan > 2) {
7303		sbuf_printf(sb, "              channel 0   channel 1"
7304		    "   channel 2   channel 3\n");
7305		sbuf_printf(sb, "NIC B/s:     %10ju  %10ju  %10ju  %10ju\n",
7306		    nrate[0], nrate[1], nrate[2], nrate[3]);
7307		sbuf_printf(sb, "Offload B/s: %10ju  %10ju  %10ju  %10ju",
7308		    orate[0], orate[1], orate[2], orate[3]);
7309	} else {
7310		sbuf_printf(sb, "              channel 0   channel 1\n");
7311		sbuf_printf(sb, "NIC B/s:     %10ju  %10ju\n",
7312		    nrate[0], nrate[1]);
7313		sbuf_printf(sb, "Offload B/s: %10ju  %10ju",
7314		    orate[0], orate[1]);
7315	}
7316
7317	rc = sbuf_finish(sb);
7318	sbuf_delete(sb);
7319
7320	return (rc);
7321}
7322
7323static int
7324sysctl_ulprx_la(SYSCTL_HANDLER_ARGS)
7325{
7326	struct adapter *sc = arg1;
7327	struct sbuf *sb;
7328	uint32_t *buf, *p;
7329	int rc, i;
7330
7331	rc = sysctl_wire_old_buffer(req, 0);
7332	if (rc != 0)
7333		return (rc);
7334
7335	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7336	if (sb == NULL)
7337		return (ENOMEM);
7338
7339	buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE,
7340	    M_ZERO | M_WAITOK);
7341
7342	t4_ulprx_read_la(sc, buf);
7343	p = buf;
7344
7345	sbuf_printf(sb, "      Pcmd        Type   Message"
7346	    "                Data");
7347	for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) {
7348		sbuf_printf(sb, "\n%08x%08x  %4x  %08x  %08x%08x%08x%08x",
7349		    p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]);
7350	}
7351
7352	rc = sbuf_finish(sb);
7353	sbuf_delete(sb);
7354	free(buf, M_CXGBE);
7355	return (rc);
7356}
7357
7358static int
7359sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS)
7360{
7361	struct adapter *sc = arg1;
7362	struct sbuf *sb;
7363	int rc, v;
7364
7365	rc = sysctl_wire_old_buffer(req, 0);
7366	if (rc != 0)
7367		return (rc);
7368
7369	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7370	if (sb == NULL)
7371		return (ENOMEM);
7372
7373	v = t4_read_reg(sc, A_SGE_STAT_CFG);
7374	if (G_STATSOURCE_T5(v) == 7) {
7375		if (G_STATMODE(v) == 0) {
7376			sbuf_printf(sb, "total %d, incomplete %d",
7377			    t4_read_reg(sc, A_SGE_STAT_TOTAL),
7378			    t4_read_reg(sc, A_SGE_STAT_MATCH));
7379		} else if (G_STATMODE(v) == 1) {
7380			sbuf_printf(sb, "total %d, data overflow %d",
7381			    t4_read_reg(sc, A_SGE_STAT_TOTAL),
7382			    t4_read_reg(sc, A_SGE_STAT_MATCH));
7383		}
7384	}
7385	rc = sbuf_finish(sb);
7386	sbuf_delete(sb);
7387
7388	return (rc);
7389}
7390#endif
7391
7392#ifdef TCP_OFFLOAD
7393static void
7394unit_conv(char *buf, size_t len, u_int val, u_int factor)
7395{
7396	u_int rem = val % factor;
7397
7398	if (rem == 0)
7399		snprintf(buf, len, "%u", val / factor);
7400	else {
7401		while (rem % 10 == 0)
7402			rem /= 10;
7403		snprintf(buf, len, "%u.%u", val / factor, rem);
7404	}
7405}
7406
7407static int
7408sysctl_tp_tick(SYSCTL_HANDLER_ARGS)
7409{
7410	struct adapter *sc = arg1;
7411	char buf[16];
7412	u_int res, re;
7413	u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
7414
7415	res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
7416	switch (arg2) {
7417	case 0:
7418		/* timer_tick */
7419		re = G_TIMERRESOLUTION(res);
7420		break;
7421	case 1:
7422		/* TCP timestamp tick */
7423		re = G_TIMESTAMPRESOLUTION(res);
7424		break;
7425	case 2:
7426		/* DACK tick */
7427		re = G_DELAYEDACKRESOLUTION(res);
7428		break;
7429	default:
7430		return (EDOOFUS);
7431	}
7432
7433	unit_conv(buf, sizeof(buf), (cclk_ps << re), 1000000);
7434
7435	return (sysctl_handle_string(oidp, buf, sizeof(buf), req));
7436}
7437
7438static int
7439sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS)
7440{
7441	struct adapter *sc = arg1;
7442	u_int res, dack_re, v;
7443	u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
7444
7445	res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
7446	dack_re = G_DELAYEDACKRESOLUTION(res);
7447	v = ((cclk_ps << dack_re) / 1000000) * t4_read_reg(sc, A_TP_DACK_TIMER);
7448
7449	return (sysctl_handle_int(oidp, &v, 0, req));
7450}
7451
7452static int
7453sysctl_tp_timer(SYSCTL_HANDLER_ARGS)
7454{
7455	struct adapter *sc = arg1;
7456	int reg = arg2;
7457	u_int tre;
7458	u_long tp_tick_us, v;
7459	u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
7460
7461	MPASS(reg == A_TP_RXT_MIN || reg == A_TP_RXT_MAX ||
7462	    reg == A_TP_PERS_MIN || reg == A_TP_PERS_MAX ||
7463	    reg == A_TP_KEEP_IDLE || A_TP_KEEP_INTVL || reg == A_TP_INIT_SRTT ||
7464	    reg == A_TP_FINWAIT2_TIMER);
7465
7466	tre = G_TIMERRESOLUTION(t4_read_reg(sc, A_TP_TIMER_RESOLUTION));
7467	tp_tick_us = (cclk_ps << tre) / 1000000;
7468
7469	if (reg == A_TP_INIT_SRTT)
7470		v = tp_tick_us * G_INITSRTT(t4_read_reg(sc, reg));
7471	else
7472		v = tp_tick_us * t4_read_reg(sc, reg);
7473
7474	return (sysctl_handle_long(oidp, &v, 0, req));
7475}
7476#endif
7477
7478static uint32_t
7479fconf_iconf_to_mode(uint32_t fconf, uint32_t iconf)
7480{
7481	uint32_t mode;
7482
7483	mode = T4_FILTER_IPv4 | T4_FILTER_IPv6 | T4_FILTER_IP_SADDR |
7484	    T4_FILTER_IP_DADDR | T4_FILTER_IP_SPORT | T4_FILTER_IP_DPORT;
7485
7486	if (fconf & F_FRAGMENTATION)
7487		mode |= T4_FILTER_IP_FRAGMENT;
7488
7489	if (fconf & F_MPSHITTYPE)
7490		mode |= T4_FILTER_MPS_HIT_TYPE;
7491
7492	if (fconf & F_MACMATCH)
7493		mode |= T4_FILTER_MAC_IDX;
7494
7495	if (fconf & F_ETHERTYPE)
7496		mode |= T4_FILTER_ETH_TYPE;
7497
7498	if (fconf & F_PROTOCOL)
7499		mode |= T4_FILTER_IP_PROTO;
7500
7501	if (fconf & F_TOS)
7502		mode |= T4_FILTER_IP_TOS;
7503
7504	if (fconf & F_VLAN)
7505		mode |= T4_FILTER_VLAN;
7506
7507	if (fconf & F_VNIC_ID) {
7508		mode |= T4_FILTER_VNIC;
7509		if (iconf & F_VNIC)
7510			mode |= T4_FILTER_IC_VNIC;
7511	}
7512
7513	if (fconf & F_PORT)
7514		mode |= T4_FILTER_PORT;
7515
7516	if (fconf & F_FCOE)
7517		mode |= T4_FILTER_FCoE;
7518
7519	return (mode);
7520}
7521
7522static uint32_t
7523mode_to_fconf(uint32_t mode)
7524{
7525	uint32_t fconf = 0;
7526
7527	if (mode & T4_FILTER_IP_FRAGMENT)
7528		fconf |= F_FRAGMENTATION;
7529
7530	if (mode & T4_FILTER_MPS_HIT_TYPE)
7531		fconf |= F_MPSHITTYPE;
7532
7533	if (mode & T4_FILTER_MAC_IDX)
7534		fconf |= F_MACMATCH;
7535
7536	if (mode & T4_FILTER_ETH_TYPE)
7537		fconf |= F_ETHERTYPE;
7538
7539	if (mode & T4_FILTER_IP_PROTO)
7540		fconf |= F_PROTOCOL;
7541
7542	if (mode & T4_FILTER_IP_TOS)
7543		fconf |= F_TOS;
7544
7545	if (mode & T4_FILTER_VLAN)
7546		fconf |= F_VLAN;
7547
7548	if (mode & T4_FILTER_VNIC)
7549		fconf |= F_VNIC_ID;
7550
7551	if (mode & T4_FILTER_PORT)
7552		fconf |= F_PORT;
7553
7554	if (mode & T4_FILTER_FCoE)
7555		fconf |= F_FCOE;
7556
7557	return (fconf);
7558}
7559
7560static uint32_t
7561mode_to_iconf(uint32_t mode)
7562{
7563
7564	if (mode & T4_FILTER_IC_VNIC)
7565		return (F_VNIC);
7566	return (0);
7567}
7568
7569static int check_fspec_against_fconf_iconf(struct adapter *sc,
7570    struct t4_filter_specification *fs)
7571{
7572	struct tp_params *tpp = &sc->params.tp;
7573	uint32_t fconf = 0;
7574
7575	if (fs->val.frag || fs->mask.frag)
7576		fconf |= F_FRAGMENTATION;
7577
7578	if (fs->val.matchtype || fs->mask.matchtype)
7579		fconf |= F_MPSHITTYPE;
7580
7581	if (fs->val.macidx || fs->mask.macidx)
7582		fconf |= F_MACMATCH;
7583
7584	if (fs->val.ethtype || fs->mask.ethtype)
7585		fconf |= F_ETHERTYPE;
7586
7587	if (fs->val.proto || fs->mask.proto)
7588		fconf |= F_PROTOCOL;
7589
7590	if (fs->val.tos || fs->mask.tos)
7591		fconf |= F_TOS;
7592
7593	if (fs->val.vlan_vld || fs->mask.vlan_vld)
7594		fconf |= F_VLAN;
7595
7596	if (fs->val.ovlan_vld || fs->mask.ovlan_vld) {
7597		fconf |= F_VNIC_ID;
7598		if (tpp->ingress_config & F_VNIC)
7599			return (EINVAL);
7600	}
7601
7602	if (fs->val.pfvf_vld || fs->mask.pfvf_vld) {
7603		fconf |= F_VNIC_ID;
7604		if ((tpp->ingress_config & F_VNIC) == 0)
7605			return (EINVAL);
7606	}
7607
7608	if (fs->val.iport || fs->mask.iport)
7609		fconf |= F_PORT;
7610
7611	if (fs->val.fcoe || fs->mask.fcoe)
7612		fconf |= F_FCOE;
7613
7614	if ((tpp->vlan_pri_map | fconf) != tpp->vlan_pri_map)
7615		return (E2BIG);
7616
7617	return (0);
7618}
7619
7620static int
7621get_filter_mode(struct adapter *sc, uint32_t *mode)
7622{
7623	struct tp_params *tpp = &sc->params.tp;
7624
7625	/*
7626	 * We trust the cached values of the relevant TP registers.  This means
7627	 * things work reliably only if writes to those registers are always via
7628	 * t4_set_filter_mode.
7629	 */
7630	*mode = fconf_iconf_to_mode(tpp->vlan_pri_map, tpp->ingress_config);
7631
7632	return (0);
7633}
7634
7635static int
7636set_filter_mode(struct adapter *sc, uint32_t mode)
7637{
7638	struct tp_params *tpp = &sc->params.tp;
7639	uint32_t fconf, iconf;
7640	int rc;
7641
7642	iconf = mode_to_iconf(mode);
7643	if ((iconf ^ tpp->ingress_config) & F_VNIC) {
7644		/*
7645		 * For now we just complain if A_TP_INGRESS_CONFIG is not
7646		 * already set to the correct value for the requested filter
7647		 * mode.  It's not clear if it's safe to write to this register
7648		 * on the fly.  (And we trust the cached value of the register).
7649		 */
7650		return (EBUSY);
7651	}
7652
7653	fconf = mode_to_fconf(mode);
7654
7655	rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
7656	    "t4setfm");
7657	if (rc)
7658		return (rc);
7659
7660	if (sc->tids.ftids_in_use > 0) {
7661		rc = EBUSY;
7662		goto done;
7663	}
7664
7665#ifdef TCP_OFFLOAD
7666	if (uld_active(sc, ULD_TOM)) {
7667		rc = EBUSY;
7668		goto done;
7669	}
7670#endif
7671
7672	rc = -t4_set_filter_mode(sc, fconf);
7673done:
7674	end_synchronized_op(sc, LOCK_HELD);
7675	return (rc);
7676}
7677
7678static inline uint64_t
7679get_filter_hits(struct adapter *sc, uint32_t fid)
7680{
7681	uint32_t tcb_addr;
7682
7683	tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) +
7684	    (fid + sc->tids.ftid_base) * TCB_SIZE;
7685
7686	if (is_t4(sc)) {
7687		uint64_t hits;
7688
7689		read_via_memwin(sc, 0, tcb_addr + 16, (uint32_t *)&hits, 8);
7690		return (be64toh(hits));
7691	} else {
7692		uint32_t hits;
7693
7694		read_via_memwin(sc, 0, tcb_addr + 24, &hits, 4);
7695		return (be32toh(hits));
7696	}
7697}
7698
7699static int
7700get_filter(struct adapter *sc, struct t4_filter *t)
7701{
7702	int i, rc, nfilters = sc->tids.nftids;
7703	struct filter_entry *f;
7704
7705	rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
7706	    "t4getf");
7707	if (rc)
7708		return (rc);
7709
7710	if (sc->tids.ftids_in_use == 0 || sc->tids.ftid_tab == NULL ||
7711	    t->idx >= nfilters) {
7712		t->idx = 0xffffffff;
7713		goto done;
7714	}
7715
7716	f = &sc->tids.ftid_tab[t->idx];
7717	for (i = t->idx; i < nfilters; i++, f++) {
7718		if (f->valid) {
7719			t->idx = i;
7720			t->l2tidx = f->l2t ? f->l2t->idx : 0;
7721			t->smtidx = f->smtidx;
7722			if (f->fs.hitcnts)
7723				t->hits = get_filter_hits(sc, t->idx);
7724			else
7725				t->hits = UINT64_MAX;
7726			t->fs = f->fs;
7727
7728			goto done;
7729		}
7730	}
7731
7732	t->idx = 0xffffffff;
7733done:
7734	end_synchronized_op(sc, LOCK_HELD);
7735	return (0);
7736}
7737
7738static int
7739set_filter(struct adapter *sc, struct t4_filter *t)
7740{
7741	unsigned int nfilters, nports;
7742	struct filter_entry *f;
7743	int i, rc;
7744
7745	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setf");
7746	if (rc)
7747		return (rc);
7748
7749	nfilters = sc->tids.nftids;
7750	nports = sc->params.nports;
7751
7752	if (nfilters == 0) {
7753		rc = ENOTSUP;
7754		goto done;
7755	}
7756
7757	if (!(sc->flags & FULL_INIT_DONE)) {
7758		rc = EAGAIN;
7759		goto done;
7760	}
7761
7762	if (t->idx >= nfilters) {
7763		rc = EINVAL;
7764		goto done;
7765	}
7766
7767	/* Validate against the global filter mode and ingress config */
7768	rc = check_fspec_against_fconf_iconf(sc, &t->fs);
7769	if (rc != 0)
7770		goto done;
7771
7772	if (t->fs.action == FILTER_SWITCH && t->fs.eport >= nports) {
7773		rc = EINVAL;
7774		goto done;
7775	}
7776
7777	if (t->fs.val.iport >= nports) {
7778		rc = EINVAL;
7779		goto done;
7780	}
7781
7782	/* Can't specify an iq if not steering to it */
7783	if (!t->fs.dirsteer && t->fs.iq) {
7784		rc = EINVAL;
7785		goto done;
7786	}
7787
7788	/* IPv6 filter idx must be 4 aligned */
7789	if (t->fs.type == 1 &&
7790	    ((t->idx & 0x3) || t->idx + 4 >= nfilters)) {
7791		rc = EINVAL;
7792		goto done;
7793	}
7794
7795	if (sc->tids.ftid_tab == NULL) {
7796		KASSERT(sc->tids.ftids_in_use == 0,
7797		    ("%s: no memory allocated but filters_in_use > 0",
7798		    __func__));
7799
7800		sc->tids.ftid_tab = malloc(sizeof (struct filter_entry) *
7801		    nfilters, M_CXGBE, M_NOWAIT | M_ZERO);
7802		if (sc->tids.ftid_tab == NULL) {
7803			rc = ENOMEM;
7804			goto done;
7805		}
7806		mtx_init(&sc->tids.ftid_lock, "T4 filters", 0, MTX_DEF);
7807	}
7808
7809	for (i = 0; i < 4; i++) {
7810		f = &sc->tids.ftid_tab[t->idx + i];
7811
7812		if (f->pending || f->valid) {
7813			rc = EBUSY;
7814			goto done;
7815		}
7816		if (f->locked) {
7817			rc = EPERM;
7818			goto done;
7819		}
7820
7821		if (t->fs.type == 0)
7822			break;
7823	}
7824
7825	f = &sc->tids.ftid_tab[t->idx];
7826	f->fs = t->fs;
7827
7828	rc = set_filter_wr(sc, t->idx);
7829done:
7830	end_synchronized_op(sc, 0);
7831
7832	if (rc == 0) {
7833		mtx_lock(&sc->tids.ftid_lock);
7834		for (;;) {
7835			if (f->pending == 0) {
7836				rc = f->valid ? 0 : EIO;
7837				break;
7838			}
7839
7840			if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock,
7841			    PCATCH, "t4setfw", 0)) {
7842				rc = EINPROGRESS;
7843				break;
7844			}
7845		}
7846		mtx_unlock(&sc->tids.ftid_lock);
7847	}
7848	return (rc);
7849}
7850
7851static int
7852del_filter(struct adapter *sc, struct t4_filter *t)
7853{
7854	unsigned int nfilters;
7855	struct filter_entry *f;
7856	int rc;
7857
7858	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4delf");
7859	if (rc)
7860		return (rc);
7861
7862	nfilters = sc->tids.nftids;
7863
7864	if (nfilters == 0) {
7865		rc = ENOTSUP;
7866		goto done;
7867	}
7868
7869	if (sc->tids.ftid_tab == NULL || sc->tids.ftids_in_use == 0 ||
7870	    t->idx >= nfilters) {
7871		rc = EINVAL;
7872		goto done;
7873	}
7874
7875	if (!(sc->flags & FULL_INIT_DONE)) {
7876		rc = EAGAIN;
7877		goto done;
7878	}
7879
7880	f = &sc->tids.ftid_tab[t->idx];
7881
7882	if (f->pending) {
7883		rc = EBUSY;
7884		goto done;
7885	}
7886	if (f->locked) {
7887		rc = EPERM;
7888		goto done;
7889	}
7890
7891	if (f->valid) {
7892		t->fs = f->fs;	/* extra info for the caller */
7893		rc = del_filter_wr(sc, t->idx);
7894	}
7895
7896done:
7897	end_synchronized_op(sc, 0);
7898
7899	if (rc == 0) {
7900		mtx_lock(&sc->tids.ftid_lock);
7901		for (;;) {
7902			if (f->pending == 0) {
7903				rc = f->valid ? EIO : 0;
7904				break;
7905			}
7906
7907			if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock,
7908			    PCATCH, "t4delfw", 0)) {
7909				rc = EINPROGRESS;
7910				break;
7911			}
7912		}
7913		mtx_unlock(&sc->tids.ftid_lock);
7914	}
7915
7916	return (rc);
7917}
7918
7919static void
7920clear_filter(struct filter_entry *f)
7921{
7922	if (f->l2t)
7923		t4_l2t_release(f->l2t);
7924
7925	bzero(f, sizeof (*f));
7926}
7927
7928static int
7929set_filter_wr(struct adapter *sc, int fidx)
7930{
7931	struct filter_entry *f = &sc->tids.ftid_tab[fidx];
7932	struct fw_filter_wr *fwr;
7933	unsigned int ftid, vnic_vld, vnic_vld_mask;
7934	struct wrq_cookie cookie;
7935
7936	ASSERT_SYNCHRONIZED_OP(sc);
7937
7938	if (f->fs.newdmac || f->fs.newvlan) {
7939		/* This filter needs an L2T entry; allocate one. */
7940		f->l2t = t4_l2t_alloc_switching(sc->l2t);
7941		if (f->l2t == NULL)
7942			return (EAGAIN);
7943		if (t4_l2t_set_switching(sc, f->l2t, f->fs.vlan, f->fs.eport,
7944		    f->fs.dmac)) {
7945			t4_l2t_release(f->l2t);
7946			f->l2t = NULL;
7947			return (ENOMEM);
7948		}
7949	}
7950
7951	/* Already validated against fconf, iconf */
7952	MPASS((f->fs.val.pfvf_vld & f->fs.val.ovlan_vld) == 0);
7953	MPASS((f->fs.mask.pfvf_vld & f->fs.mask.ovlan_vld) == 0);
7954	if (f->fs.val.pfvf_vld || f->fs.val.ovlan_vld)
7955		vnic_vld = 1;
7956	else
7957		vnic_vld = 0;
7958	if (f->fs.mask.pfvf_vld || f->fs.mask.ovlan_vld)
7959		vnic_vld_mask = 1;
7960	else
7961		vnic_vld_mask = 0;
7962
7963	ftid = sc->tids.ftid_base + fidx;
7964
7965	fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie);
7966	if (fwr == NULL)
7967		return (ENOMEM);
7968	bzero(fwr, sizeof(*fwr));
7969
7970	fwr->op_pkd = htobe32(V_FW_WR_OP(FW_FILTER_WR));
7971	fwr->len16_pkd = htobe32(FW_LEN16(*fwr));
7972	fwr->tid_to_iq =
7973	    htobe32(V_FW_FILTER_WR_TID(ftid) |
7974		V_FW_FILTER_WR_RQTYPE(f->fs.type) |
7975		V_FW_FILTER_WR_NOREPLY(0) |
7976		V_FW_FILTER_WR_IQ(f->fs.iq));
7977	fwr->del_filter_to_l2tix =
7978	    htobe32(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) |
7979		V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) |
7980		V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) |
7981		V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) |
7982		V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) |
7983		V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) |
7984		V_FW_FILTER_WR_DMAC(f->fs.newdmac) |
7985		V_FW_FILTER_WR_SMAC(f->fs.newsmac) |
7986		V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT ||
7987		    f->fs.newvlan == VLAN_REWRITE) |
7988		V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE ||
7989		    f->fs.newvlan == VLAN_REWRITE) |
7990		V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) |
7991		V_FW_FILTER_WR_TXCHAN(f->fs.eport) |
7992		V_FW_FILTER_WR_PRIO(f->fs.prio) |
7993		V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0));
7994	fwr->ethtype = htobe16(f->fs.val.ethtype);
7995	fwr->ethtypem = htobe16(f->fs.mask.ethtype);
7996	fwr->frag_to_ovlan_vldm =
7997	    (V_FW_FILTER_WR_FRAG(f->fs.val.frag) |
7998		V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) |
7999		V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.vlan_vld) |
8000		V_FW_FILTER_WR_OVLAN_VLD(vnic_vld) |
8001		V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.vlan_vld) |
8002		V_FW_FILTER_WR_OVLAN_VLDM(vnic_vld_mask));
8003	fwr->smac_sel = 0;
8004	fwr->rx_chan_rx_rpl_iq = htobe16(V_FW_FILTER_WR_RX_CHAN(0) |
8005	    V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.fwq.abs_id));
8006	fwr->maci_to_matchtypem =
8007	    htobe32(V_FW_FILTER_WR_MACI(f->fs.val.macidx) |
8008		V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) |
8009		V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) |
8010		V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) |
8011		V_FW_FILTER_WR_PORT(f->fs.val.iport) |
8012		V_FW_FILTER_WR_PORTM(f->fs.mask.iport) |
8013		V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) |
8014		V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype));
8015	fwr->ptcl = f->fs.val.proto;
8016	fwr->ptclm = f->fs.mask.proto;
8017	fwr->ttyp = f->fs.val.tos;
8018	fwr->ttypm = f->fs.mask.tos;
8019	fwr->ivlan = htobe16(f->fs.val.vlan);
8020	fwr->ivlanm = htobe16(f->fs.mask.vlan);
8021	fwr->ovlan = htobe16(f->fs.val.vnic);
8022	fwr->ovlanm = htobe16(f->fs.mask.vnic);
8023	bcopy(f->fs.val.dip, fwr->lip, sizeof (fwr->lip));
8024	bcopy(f->fs.mask.dip, fwr->lipm, sizeof (fwr->lipm));
8025	bcopy(f->fs.val.sip, fwr->fip, sizeof (fwr->fip));
8026	bcopy(f->fs.mask.sip, fwr->fipm, sizeof (fwr->fipm));
8027	fwr->lp = htobe16(f->fs.val.dport);
8028	fwr->lpm = htobe16(f->fs.mask.dport);
8029	fwr->fp = htobe16(f->fs.val.sport);
8030	fwr->fpm = htobe16(f->fs.mask.sport);
8031	if (f->fs.newsmac)
8032		bcopy(f->fs.smac, fwr->sma, sizeof (fwr->sma));
8033
8034	f->pending = 1;
8035	sc->tids.ftids_in_use++;
8036
8037	commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie);
8038	return (0);
8039}
8040
8041static int
8042del_filter_wr(struct adapter *sc, int fidx)
8043{
8044	struct filter_entry *f = &sc->tids.ftid_tab[fidx];
8045	struct fw_filter_wr *fwr;
8046	unsigned int ftid;
8047	struct wrq_cookie cookie;
8048
8049	ftid = sc->tids.ftid_base + fidx;
8050
8051	fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie);
8052	if (fwr == NULL)
8053		return (ENOMEM);
8054	bzero(fwr, sizeof (*fwr));
8055
8056	t4_mk_filtdelwr(ftid, fwr, sc->sge.fwq.abs_id);
8057
8058	f->pending = 1;
8059	commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie);
8060	return (0);
8061}
8062
8063int
8064t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
8065{
8066	struct adapter *sc = iq->adapter;
8067	const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1);
8068	unsigned int idx = GET_TID(rpl);
8069	unsigned int rc;
8070	struct filter_entry *f;
8071
8072	KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
8073	    rss->opcode));
8074
8075	if (is_ftid(sc, idx)) {
8076
8077		idx -= sc->tids.ftid_base;
8078		f = &sc->tids.ftid_tab[idx];
8079		rc = G_COOKIE(rpl->cookie);
8080
8081		mtx_lock(&sc->tids.ftid_lock);
8082		if (rc == FW_FILTER_WR_FLT_ADDED) {
8083			KASSERT(f->pending, ("%s: filter[%u] isn't pending.",
8084			    __func__, idx));
8085			f->smtidx = (be64toh(rpl->oldval) >> 24) & 0xff;
8086			f->pending = 0;  /* asynchronous setup completed */
8087			f->valid = 1;
8088		} else {
8089			if (rc != FW_FILTER_WR_FLT_DELETED) {
8090				/* Add or delete failed, display an error */
8091				log(LOG_ERR,
8092				    "filter %u setup failed with error %u\n",
8093				    idx, rc);
8094			}
8095
8096			clear_filter(f);
8097			sc->tids.ftids_in_use--;
8098		}
8099		wakeup(&sc->tids.ftid_tab);
8100		mtx_unlock(&sc->tids.ftid_lock);
8101	}
8102
8103	return (0);
8104}
8105
8106static int
8107get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt)
8108{
8109	int rc;
8110
8111	if (cntxt->cid > M_CTXTQID)
8112		return (EINVAL);
8113
8114	if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS &&
8115	    cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM)
8116		return (EINVAL);
8117
8118	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt");
8119	if (rc)
8120		return (rc);
8121
8122	if (sc->flags & FW_OK) {
8123		rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id,
8124		    &cntxt->data[0]);
8125		if (rc == 0)
8126			goto done;
8127	}
8128
8129	/*
8130	 * Read via firmware failed or wasn't even attempted.  Read directly via
8131	 * the backdoor.
8132	 */
8133	rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]);
8134done:
8135	end_synchronized_op(sc, 0);
8136	return (rc);
8137}
8138
8139static int
8140load_fw(struct adapter *sc, struct t4_data *fw)
8141{
8142	int rc;
8143	uint8_t *fw_data;
8144
8145	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw");
8146	if (rc)
8147		return (rc);
8148
8149	if (sc->flags & FULL_INIT_DONE) {
8150		rc = EBUSY;
8151		goto done;
8152	}
8153
8154	fw_data = malloc(fw->len, M_CXGBE, M_WAITOK);
8155	if (fw_data == NULL) {
8156		rc = ENOMEM;
8157		goto done;
8158	}
8159
8160	rc = copyin(fw->data, fw_data, fw->len);
8161	if (rc == 0)
8162		rc = -t4_load_fw(sc, fw_data, fw->len);
8163
8164	free(fw_data, M_CXGBE);
8165done:
8166	end_synchronized_op(sc, 0);
8167	return (rc);
8168}
8169
8170#define MAX_READ_BUF_SIZE (128 * 1024)
8171static int
8172read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr)
8173{
8174	uint32_t addr, remaining, n;
8175	uint32_t *buf;
8176	int rc;
8177	uint8_t *dst;
8178
8179	rc = validate_mem_range(sc, mr->addr, mr->len);
8180	if (rc != 0)
8181		return (rc);
8182
8183	buf = malloc(min(mr->len, MAX_READ_BUF_SIZE), M_CXGBE, M_WAITOK);
8184	addr = mr->addr;
8185	remaining = mr->len;
8186	dst = (void *)mr->data;
8187
8188	while (remaining) {
8189		n = min(remaining, MAX_READ_BUF_SIZE);
8190		read_via_memwin(sc, 2, addr, buf, n);
8191
8192		rc = copyout(buf, dst, n);
8193		if (rc != 0)
8194			break;
8195
8196		dst += n;
8197		remaining -= n;
8198		addr += n;
8199	}
8200
8201	free(buf, M_CXGBE);
8202	return (rc);
8203}
8204#undef MAX_READ_BUF_SIZE
8205
8206static int
8207read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd)
8208{
8209	int rc;
8210
8211	if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports)
8212		return (EINVAL);
8213
8214	if (i2cd->len > sizeof(i2cd->data))
8215		return (EFBIG);
8216
8217	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd");
8218	if (rc)
8219		return (rc);
8220	rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr,
8221	    i2cd->offset, i2cd->len, &i2cd->data[0]);
8222	end_synchronized_op(sc, 0);
8223
8224	return (rc);
8225}
8226
8227static int
8228in_range(int val, int lo, int hi)
8229{
8230
8231	return (val < 0 || (val <= hi && val >= lo));
8232}
8233
8234static int
8235set_sched_class(struct adapter *sc, struct t4_sched_params *p)
8236{
8237	int fw_subcmd, fw_type, rc;
8238
8239	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setsc");
8240	if (rc)
8241		return (rc);
8242
8243	if (!(sc->flags & FULL_INIT_DONE)) {
8244		rc = EAGAIN;
8245		goto done;
8246	}
8247
8248	/*
8249	 * Translate the cxgbetool parameters into T4 firmware parameters.  (The
8250	 * sub-command and type are in common locations.)
8251	 */
8252	if (p->subcmd == SCHED_CLASS_SUBCMD_CONFIG)
8253		fw_subcmd = FW_SCHED_SC_CONFIG;
8254	else if (p->subcmd == SCHED_CLASS_SUBCMD_PARAMS)
8255		fw_subcmd = FW_SCHED_SC_PARAMS;
8256	else {
8257		rc = EINVAL;
8258		goto done;
8259	}
8260	if (p->type == SCHED_CLASS_TYPE_PACKET)
8261		fw_type = FW_SCHED_TYPE_PKTSCHED;
8262	else {
8263		rc = EINVAL;
8264		goto done;
8265	}
8266
8267	if (fw_subcmd == FW_SCHED_SC_CONFIG) {
8268		/* Vet our parameters ..*/
8269		if (p->u.config.minmax < 0) {
8270			rc = EINVAL;
8271			goto done;
8272		}
8273
8274		/* And pass the request to the firmware ...*/
8275		rc = -t4_sched_config(sc, fw_type, p->u.config.minmax, 1);
8276		goto done;
8277	}
8278
8279	if (fw_subcmd == FW_SCHED_SC_PARAMS) {
8280		int fw_level;
8281		int fw_mode;
8282		int fw_rateunit;
8283		int fw_ratemode;
8284
8285		if (p->u.params.level == SCHED_CLASS_LEVEL_CL_RL)
8286			fw_level = FW_SCHED_PARAMS_LEVEL_CL_RL;
8287		else if (p->u.params.level == SCHED_CLASS_LEVEL_CL_WRR)
8288			fw_level = FW_SCHED_PARAMS_LEVEL_CL_WRR;
8289		else if (p->u.params.level == SCHED_CLASS_LEVEL_CH_RL)
8290			fw_level = FW_SCHED_PARAMS_LEVEL_CH_RL;
8291		else {
8292			rc = EINVAL;
8293			goto done;
8294		}
8295
8296		if (p->u.params.mode == SCHED_CLASS_MODE_CLASS)
8297			fw_mode = FW_SCHED_PARAMS_MODE_CLASS;
8298		else if (p->u.params.mode == SCHED_CLASS_MODE_FLOW)
8299			fw_mode = FW_SCHED_PARAMS_MODE_FLOW;
8300		else {
8301			rc = EINVAL;
8302			goto done;
8303		}
8304
8305		if (p->u.params.rateunit == SCHED_CLASS_RATEUNIT_BITS)
8306			fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE;
8307		else if (p->u.params.rateunit == SCHED_CLASS_RATEUNIT_PKTS)
8308			fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE;
8309		else {
8310			rc = EINVAL;
8311			goto done;
8312		}
8313
8314		if (p->u.params.ratemode == SCHED_CLASS_RATEMODE_REL)
8315			fw_ratemode = FW_SCHED_PARAMS_RATE_REL;
8316		else if (p->u.params.ratemode == SCHED_CLASS_RATEMODE_ABS)
8317			fw_ratemode = FW_SCHED_PARAMS_RATE_ABS;
8318		else {
8319			rc = EINVAL;
8320			goto done;
8321		}
8322
8323		/* Vet our parameters ... */
8324		if (!in_range(p->u.params.channel, 0, 3) ||
8325		    !in_range(p->u.params.cl, 0, sc->chip_params->nsched_cls) ||
8326		    !in_range(p->u.params.minrate, 0, 10000000) ||
8327		    !in_range(p->u.params.maxrate, 0, 10000000) ||
8328		    !in_range(p->u.params.weight, 0, 100)) {
8329			rc = ERANGE;
8330			goto done;
8331		}
8332
8333		/*
8334		 * Translate any unset parameters into the firmware's
8335		 * nomenclature and/or fail the call if the parameters
8336		 * are required ...
8337		 */
8338		if (p->u.params.rateunit < 0 || p->u.params.ratemode < 0 ||
8339		    p->u.params.channel < 0 || p->u.params.cl < 0) {
8340			rc = EINVAL;
8341			goto done;
8342		}
8343		if (p->u.params.minrate < 0)
8344			p->u.params.minrate = 0;
8345		if (p->u.params.maxrate < 0) {
8346			if (p->u.params.level == SCHED_CLASS_LEVEL_CL_RL ||
8347			    p->u.params.level == SCHED_CLASS_LEVEL_CH_RL) {
8348				rc = EINVAL;
8349				goto done;
8350			} else
8351				p->u.params.maxrate = 0;
8352		}
8353		if (p->u.params.weight < 0) {
8354			if (p->u.params.level == SCHED_CLASS_LEVEL_CL_WRR) {
8355				rc = EINVAL;
8356				goto done;
8357			} else
8358				p->u.params.weight = 0;
8359		}
8360		if (p->u.params.pktsize < 0) {
8361			if (p->u.params.level == SCHED_CLASS_LEVEL_CL_RL ||
8362			    p->u.params.level == SCHED_CLASS_LEVEL_CH_RL) {
8363				rc = EINVAL;
8364				goto done;
8365			} else
8366				p->u.params.pktsize = 0;
8367		}
8368
8369		/* See what the firmware thinks of the request ... */
8370		rc = -t4_sched_params(sc, fw_type, fw_level, fw_mode,
8371		    fw_rateunit, fw_ratemode, p->u.params.channel,
8372		    p->u.params.cl, p->u.params.minrate, p->u.params.maxrate,
8373		    p->u.params.weight, p->u.params.pktsize, 1);
8374		goto done;
8375	}
8376
8377	rc = EINVAL;
8378done:
8379	end_synchronized_op(sc, 0);
8380	return (rc);
8381}
8382
8383static int
8384set_sched_queue(struct adapter *sc, struct t4_sched_queue *p)
8385{
8386	struct port_info *pi = NULL;
8387	struct vi_info *vi;
8388	struct sge_txq *txq;
8389	uint32_t fw_mnem, fw_queue, fw_class;
8390	int i, rc;
8391
8392	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setsq");
8393	if (rc)
8394		return (rc);
8395
8396	if (!(sc->flags & FULL_INIT_DONE)) {
8397		rc = EAGAIN;
8398		goto done;
8399	}
8400
8401	if (p->port >= sc->params.nports) {
8402		rc = EINVAL;
8403		goto done;
8404	}
8405
8406	/* XXX: Only supported for the main VI. */
8407	pi = sc->port[p->port];
8408	vi = &pi->vi[0];
8409	if (!in_range(p->queue, 0, vi->ntxq - 1) || !in_range(p->cl, 0, 7)) {
8410		rc = EINVAL;
8411		goto done;
8412	}
8413
8414	/*
8415	 * Create a template for the FW_PARAMS_CMD mnemonic and value (TX
8416	 * Scheduling Class in this case).
8417	 */
8418	fw_mnem = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
8419	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH));
8420	fw_class = p->cl < 0 ? 0xffffffff : p->cl;
8421
8422	/*
8423	 * If op.queue is non-negative, then we're only changing the scheduling
8424	 * on a single specified TX queue.
8425	 */
8426	if (p->queue >= 0) {
8427		txq = &sc->sge.txq[vi->first_txq + p->queue];
8428		fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id));
8429		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue,
8430		    &fw_class);
8431		goto done;
8432	}
8433
8434	/*
8435	 * Change the scheduling on all the TX queues for the
8436	 * interface.
8437	 */
8438	for_each_txq(vi, i, txq) {
8439		fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id));
8440		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue,
8441		    &fw_class);
8442		if (rc)
8443			goto done;
8444	}
8445
8446	rc = 0;
8447done:
8448	end_synchronized_op(sc, 0);
8449	return (rc);
8450}
8451
8452int
8453t4_os_find_pci_capability(struct adapter *sc, int cap)
8454{
8455	int i;
8456
8457	return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0);
8458}
8459
8460int
8461t4_os_pci_save_state(struct adapter *sc)
8462{
8463	device_t dev;
8464	struct pci_devinfo *dinfo;
8465
8466	dev = sc->dev;
8467	dinfo = device_get_ivars(dev);
8468
8469	pci_cfg_save(dev, dinfo, 0);
8470	return (0);
8471}
8472
8473int
8474t4_os_pci_restore_state(struct adapter *sc)
8475{
8476	device_t dev;
8477	struct pci_devinfo *dinfo;
8478
8479	dev = sc->dev;
8480	dinfo = device_get_ivars(dev);
8481
8482	pci_cfg_restore(dev, dinfo);
8483	return (0);
8484}
8485
8486void
8487t4_os_portmod_changed(const struct adapter *sc, int idx)
8488{
8489	struct port_info *pi = sc->port[idx];
8490	struct vi_info *vi;
8491	struct ifnet *ifp;
8492	int v;
8493	static const char *mod_str[] = {
8494		NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM"
8495	};
8496
8497	for_each_vi(pi, v, vi) {
8498		build_medialist(pi, &vi->media);
8499	}
8500
8501	ifp = pi->vi[0].ifp;
8502	if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
8503		if_printf(ifp, "transceiver unplugged.\n");
8504	else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
8505		if_printf(ifp, "unknown transceiver inserted.\n");
8506	else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
8507		if_printf(ifp, "unsupported transceiver inserted.\n");
8508	else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) {
8509		if_printf(ifp, "%s transceiver inserted.\n",
8510		    mod_str[pi->mod_type]);
8511	} else {
8512		if_printf(ifp, "transceiver (type %d) inserted.\n",
8513		    pi->mod_type);
8514	}
8515}
8516
8517void
8518t4_os_link_changed(struct adapter *sc, int idx, int link_stat, int reason)
8519{
8520	struct port_info *pi = sc->port[idx];
8521	struct vi_info *vi;
8522	struct ifnet *ifp;
8523	int v;
8524
8525	if (link_stat)
8526		pi->linkdnrc = -1;
8527	else {
8528		if (reason >= 0)
8529			pi->linkdnrc = reason;
8530	}
8531	for_each_vi(pi, v, vi) {
8532		ifp = vi->ifp;
8533		if (ifp == NULL)
8534			continue;
8535
8536		if (link_stat) {
8537			ifp->if_baudrate = IF_Mbps(pi->link_cfg.speed);
8538			if_link_state_change(ifp, LINK_STATE_UP);
8539		} else {
8540			if_link_state_change(ifp, LINK_STATE_DOWN);
8541		}
8542	}
8543}
8544
8545void
8546t4_iterate(void (*func)(struct adapter *, void *), void *arg)
8547{
8548	struct adapter *sc;
8549
8550	sx_slock(&t4_list_lock);
8551	SLIST_FOREACH(sc, &t4_list, link) {
8552		/*
8553		 * func should not make any assumptions about what state sc is
8554		 * in - the only guarantee is that sc->sc_lock is a valid lock.
8555		 */
8556		func(sc, arg);
8557	}
8558	sx_sunlock(&t4_list_lock);
8559}
8560
8561static int
8562t4_open(struct cdev *dev, int flags, int type, struct thread *td)
8563{
8564       return (0);
8565}
8566
8567static int
8568t4_close(struct cdev *dev, int flags, int type, struct thread *td)
8569{
8570       return (0);
8571}
8572
8573static int
8574t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
8575    struct thread *td)
8576{
8577	int rc;
8578	struct adapter *sc = dev->si_drv1;
8579
8580	rc = priv_check(td, PRIV_DRIVER);
8581	if (rc != 0)
8582		return (rc);
8583
8584	switch (cmd) {
8585	case CHELSIO_T4_GETREG: {
8586		struct t4_reg *edata = (struct t4_reg *)data;
8587
8588		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
8589			return (EFAULT);
8590
8591		if (edata->size == 4)
8592			edata->val = t4_read_reg(sc, edata->addr);
8593		else if (edata->size == 8)
8594			edata->val = t4_read_reg64(sc, edata->addr);
8595		else
8596			return (EINVAL);
8597
8598		break;
8599	}
8600	case CHELSIO_T4_SETREG: {
8601		struct t4_reg *edata = (struct t4_reg *)data;
8602
8603		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
8604			return (EFAULT);
8605
8606		if (edata->size == 4) {
8607			if (edata->val & 0xffffffff00000000)
8608				return (EINVAL);
8609			t4_write_reg(sc, edata->addr, (uint32_t) edata->val);
8610		} else if (edata->size == 8)
8611			t4_write_reg64(sc, edata->addr, edata->val);
8612		else
8613			return (EINVAL);
8614		break;
8615	}
8616	case CHELSIO_T4_REGDUMP: {
8617		struct t4_regdump *regs = (struct t4_regdump *)data;
8618		int reglen = is_t4(sc) ? T4_REGDUMP_SIZE : T5_REGDUMP_SIZE;
8619		uint8_t *buf;
8620
8621		if (regs->len < reglen) {
8622			regs->len = reglen; /* hint to the caller */
8623			return (ENOBUFS);
8624		}
8625
8626		regs->len = reglen;
8627		buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO);
8628		get_regs(sc, regs, buf);
8629		rc = copyout(buf, regs->data, reglen);
8630		free(buf, M_CXGBE);
8631		break;
8632	}
8633	case CHELSIO_T4_GET_FILTER_MODE:
8634		rc = get_filter_mode(sc, (uint32_t *)data);
8635		break;
8636	case CHELSIO_T4_SET_FILTER_MODE:
8637		rc = set_filter_mode(sc, *(uint32_t *)data);
8638		break;
8639	case CHELSIO_T4_GET_FILTER:
8640		rc = get_filter(sc, (struct t4_filter *)data);
8641		break;
8642	case CHELSIO_T4_SET_FILTER:
8643		rc = set_filter(sc, (struct t4_filter *)data);
8644		break;
8645	case CHELSIO_T4_DEL_FILTER:
8646		rc = del_filter(sc, (struct t4_filter *)data);
8647		break;
8648	case CHELSIO_T4_GET_SGE_CONTEXT:
8649		rc = get_sge_context(sc, (struct t4_sge_context *)data);
8650		break;
8651	case CHELSIO_T4_LOAD_FW:
8652		rc = load_fw(sc, (struct t4_data *)data);
8653		break;
8654	case CHELSIO_T4_GET_MEM:
8655		rc = read_card_mem(sc, 2, (struct t4_mem_range *)data);
8656		break;
8657	case CHELSIO_T4_GET_I2C:
8658		rc = read_i2c(sc, (struct t4_i2c_data *)data);
8659		break;
8660	case CHELSIO_T4_CLEAR_STATS: {
8661		int i, v;
8662		u_int port_id = *(uint32_t *)data;
8663		struct port_info *pi;
8664		struct vi_info *vi;
8665
8666		if (port_id >= sc->params.nports)
8667			return (EINVAL);
8668		pi = sc->port[port_id];
8669
8670		/* MAC stats */
8671		t4_clr_port_stats(sc, pi->tx_chan);
8672		pi->tx_parse_error = 0;
8673		mtx_lock(&sc->reg_lock);
8674		for_each_vi(pi, v, vi) {
8675			if (vi->flags & VI_INIT_DONE)
8676				t4_clr_vi_stats(sc, vi->viid);
8677		}
8678		mtx_unlock(&sc->reg_lock);
8679
8680		/*
8681		 * Since this command accepts a port, clear stats for
8682		 * all VIs on this port.
8683		 */
8684		for_each_vi(pi, v, vi) {
8685			if (vi->flags & VI_INIT_DONE) {
8686				struct sge_rxq *rxq;
8687				struct sge_txq *txq;
8688				struct sge_wrq *wrq;
8689
8690				for_each_rxq(vi, i, rxq) {
8691#if defined(INET) || defined(INET6)
8692					rxq->lro.lro_queued = 0;
8693					rxq->lro.lro_flushed = 0;
8694#endif
8695					rxq->rxcsum = 0;
8696					rxq->vlan_extraction = 0;
8697				}
8698
8699				for_each_txq(vi, i, txq) {
8700					txq->txcsum = 0;
8701					txq->tso_wrs = 0;
8702					txq->vlan_insertion = 0;
8703					txq->imm_wrs = 0;
8704					txq->sgl_wrs = 0;
8705					txq->txpkt_wrs = 0;
8706					txq->txpkts0_wrs = 0;
8707					txq->txpkts1_wrs = 0;
8708					txq->txpkts0_pkts = 0;
8709					txq->txpkts1_pkts = 0;
8710					mp_ring_reset_stats(txq->r);
8711				}
8712
8713#ifdef TCP_OFFLOAD
8714				/* nothing to clear for each ofld_rxq */
8715
8716				for_each_ofld_txq(vi, i, wrq) {
8717					wrq->tx_wrs_direct = 0;
8718					wrq->tx_wrs_copied = 0;
8719				}
8720#endif
8721
8722				if (IS_MAIN_VI(vi)) {
8723					wrq = &sc->sge.ctrlq[pi->port_id];
8724					wrq->tx_wrs_direct = 0;
8725					wrq->tx_wrs_copied = 0;
8726				}
8727			}
8728		}
8729		break;
8730	}
8731	case CHELSIO_T4_SCHED_CLASS:
8732		rc = set_sched_class(sc, (struct t4_sched_params *)data);
8733		break;
8734	case CHELSIO_T4_SCHED_QUEUE:
8735		rc = set_sched_queue(sc, (struct t4_sched_queue *)data);
8736		break;
8737	case CHELSIO_T4_GET_TRACER:
8738		rc = t4_get_tracer(sc, (struct t4_tracer *)data);
8739		break;
8740	case CHELSIO_T4_SET_TRACER:
8741		rc = t4_set_tracer(sc, (struct t4_tracer *)data);
8742		break;
8743	default:
8744		rc = EINVAL;
8745	}
8746
8747	return (rc);
8748}
8749
8750void
8751t4_db_full(struct adapter *sc)
8752{
8753
8754	CXGBE_UNIMPLEMENTED(__func__);
8755}
8756
8757void
8758t4_db_dropped(struct adapter *sc)
8759{
8760
8761	CXGBE_UNIMPLEMENTED(__func__);
8762}
8763
8764#ifdef TCP_OFFLOAD
8765void
8766t4_iscsi_init(struct ifnet *ifp, unsigned int tag_mask,
8767    const unsigned int *pgsz_order)
8768{
8769	struct vi_info *vi = ifp->if_softc;
8770	struct adapter *sc = vi->pi->adapter;
8771
8772	t4_write_reg(sc, A_ULP_RX_ISCSI_TAGMASK, tag_mask);
8773	t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, V_HPZ0(pgsz_order[0]) |
8774		V_HPZ1(pgsz_order[1]) | V_HPZ2(pgsz_order[2]) |
8775		V_HPZ3(pgsz_order[3]));
8776}
8777
8778static int
8779toe_capability(struct vi_info *vi, int enable)
8780{
8781	int rc;
8782	struct port_info *pi = vi->pi;
8783	struct adapter *sc = pi->adapter;
8784
8785	ASSERT_SYNCHRONIZED_OP(sc);
8786
8787	if (!is_offload(sc))
8788		return (ENODEV);
8789
8790	if (enable) {
8791		if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) {
8792			/* TOE is already enabled. */
8793			return (0);
8794		}
8795
8796		/*
8797		 * We need the port's queues around so that we're able to send
8798		 * and receive CPLs to/from the TOE even if the ifnet for this
8799		 * port has never been UP'd administratively.
8800		 */
8801		if (!(vi->flags & VI_INIT_DONE)) {
8802			rc = vi_full_init(vi);
8803			if (rc)
8804				return (rc);
8805		}
8806		if (!(pi->vi[0].flags & VI_INIT_DONE)) {
8807			rc = vi_full_init(&pi->vi[0]);
8808			if (rc)
8809				return (rc);
8810		}
8811
8812		if (isset(&sc->offload_map, pi->port_id)) {
8813			/* TOE is enabled on another VI of this port. */
8814			pi->uld_vis++;
8815			return (0);
8816		}
8817
8818		if (!uld_active(sc, ULD_TOM)) {
8819			rc = t4_activate_uld(sc, ULD_TOM);
8820			if (rc == EAGAIN) {
8821				log(LOG_WARNING,
8822				    "You must kldload t4_tom.ko before trying "
8823				    "to enable TOE on a cxgbe interface.\n");
8824			}
8825			if (rc != 0)
8826				return (rc);
8827			KASSERT(sc->tom_softc != NULL,
8828			    ("%s: TOM activated but softc NULL", __func__));
8829			KASSERT(uld_active(sc, ULD_TOM),
8830			    ("%s: TOM activated but flag not set", __func__));
8831		}
8832
8833		/* Activate iWARP and iSCSI too, if the modules are loaded. */
8834		if (!uld_active(sc, ULD_IWARP))
8835			(void) t4_activate_uld(sc, ULD_IWARP);
8836		if (!uld_active(sc, ULD_ISCSI))
8837			(void) t4_activate_uld(sc, ULD_ISCSI);
8838
8839		pi->uld_vis++;
8840		setbit(&sc->offload_map, pi->port_id);
8841	} else {
8842		pi->uld_vis--;
8843
8844		if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0)
8845			return (0);
8846
8847		KASSERT(uld_active(sc, ULD_TOM),
8848		    ("%s: TOM never initialized?", __func__));
8849		clrbit(&sc->offload_map, pi->port_id);
8850	}
8851
8852	return (0);
8853}
8854
8855/*
8856 * Add an upper layer driver to the global list.
8857 */
8858int
8859t4_register_uld(struct uld_info *ui)
8860{
8861	int rc = 0;
8862	struct uld_info *u;
8863
8864	sx_xlock(&t4_uld_list_lock);
8865	SLIST_FOREACH(u, &t4_uld_list, link) {
8866	    if (u->uld_id == ui->uld_id) {
8867		    rc = EEXIST;
8868		    goto done;
8869	    }
8870	}
8871
8872	SLIST_INSERT_HEAD(&t4_uld_list, ui, link);
8873	ui->refcount = 0;
8874done:
8875	sx_xunlock(&t4_uld_list_lock);
8876	return (rc);
8877}
8878
8879int
8880t4_unregister_uld(struct uld_info *ui)
8881{
8882	int rc = EINVAL;
8883	struct uld_info *u;
8884
8885	sx_xlock(&t4_uld_list_lock);
8886
8887	SLIST_FOREACH(u, &t4_uld_list, link) {
8888	    if (u == ui) {
8889		    if (ui->refcount > 0) {
8890			    rc = EBUSY;
8891			    goto done;
8892		    }
8893
8894		    SLIST_REMOVE(&t4_uld_list, ui, uld_info, link);
8895		    rc = 0;
8896		    goto done;
8897	    }
8898	}
8899done:
8900	sx_xunlock(&t4_uld_list_lock);
8901	return (rc);
8902}
8903
8904int
8905t4_activate_uld(struct adapter *sc, int id)
8906{
8907	int rc;
8908	struct uld_info *ui;
8909
8910	ASSERT_SYNCHRONIZED_OP(sc);
8911
8912	if (id < 0 || id > ULD_MAX)
8913		return (EINVAL);
8914	rc = EAGAIN;	/* kldoad the module with this ULD and try again. */
8915
8916	sx_slock(&t4_uld_list_lock);
8917
8918	SLIST_FOREACH(ui, &t4_uld_list, link) {
8919		if (ui->uld_id == id) {
8920			if (!(sc->flags & FULL_INIT_DONE)) {
8921				rc = adapter_full_init(sc);
8922				if (rc != 0)
8923					break;
8924			}
8925
8926			rc = ui->activate(sc);
8927			if (rc == 0) {
8928				setbit(&sc->active_ulds, id);
8929				ui->refcount++;
8930			}
8931			break;
8932		}
8933	}
8934
8935	sx_sunlock(&t4_uld_list_lock);
8936
8937	return (rc);
8938}
8939
8940int
8941t4_deactivate_uld(struct adapter *sc, int id)
8942{
8943	int rc;
8944	struct uld_info *ui;
8945
8946	ASSERT_SYNCHRONIZED_OP(sc);
8947
8948	if (id < 0 || id > ULD_MAX)
8949		return (EINVAL);
8950	rc = ENXIO;
8951
8952	sx_slock(&t4_uld_list_lock);
8953
8954	SLIST_FOREACH(ui, &t4_uld_list, link) {
8955		if (ui->uld_id == id) {
8956			rc = ui->deactivate(sc);
8957			if (rc == 0) {
8958				clrbit(&sc->active_ulds, id);
8959				ui->refcount--;
8960			}
8961			break;
8962		}
8963	}
8964
8965	sx_sunlock(&t4_uld_list_lock);
8966
8967	return (rc);
8968}
8969
8970int
8971uld_active(struct adapter *sc, int uld_id)
8972{
8973
8974	MPASS(uld_id >= 0 && uld_id <= ULD_MAX);
8975
8976	return (isset(&sc->active_ulds, uld_id));
8977}
8978#endif
8979
8980/*
8981 * Come up with reasonable defaults for some of the tunables, provided they're
8982 * not set by the user (in which case we'll use the values as is).
8983 */
8984static void
8985tweak_tunables(void)
8986{
8987	int nc = mp_ncpus;	/* our snapshot of the number of CPUs */
8988
8989	if (t4_ntxq10g < 1) {
8990#ifdef RSS
8991		t4_ntxq10g = rss_getnumbuckets();
8992#else
8993		t4_ntxq10g = min(nc, NTXQ_10G);
8994#endif
8995	}
8996
8997	if (t4_ntxq1g < 1) {
8998#ifdef RSS
8999		/* XXX: way too many for 1GbE? */
9000		t4_ntxq1g = rss_getnumbuckets();
9001#else
9002		t4_ntxq1g = min(nc, NTXQ_1G);
9003#endif
9004	}
9005
9006	if (t4_ntxq_vi < 1)
9007		t4_ntxq_vi = min(nc, NTXQ_VI);
9008
9009	if (t4_nrxq10g < 1) {
9010#ifdef RSS
9011		t4_nrxq10g = rss_getnumbuckets();
9012#else
9013		t4_nrxq10g = min(nc, NRXQ_10G);
9014#endif
9015	}
9016
9017	if (t4_nrxq1g < 1) {
9018#ifdef RSS
9019		/* XXX: way too many for 1GbE? */
9020		t4_nrxq1g = rss_getnumbuckets();
9021#else
9022		t4_nrxq1g = min(nc, NRXQ_1G);
9023#endif
9024	}
9025
9026	if (t4_nrxq_vi < 1)
9027		t4_nrxq_vi = min(nc, NRXQ_VI);
9028
9029#ifdef TCP_OFFLOAD
9030	if (t4_nofldtxq10g < 1)
9031		t4_nofldtxq10g = min(nc, NOFLDTXQ_10G);
9032
9033	if (t4_nofldtxq1g < 1)
9034		t4_nofldtxq1g = min(nc, NOFLDTXQ_1G);
9035
9036	if (t4_nofldtxq_vi < 1)
9037		t4_nofldtxq_vi = min(nc, NOFLDTXQ_VI);
9038
9039	if (t4_nofldrxq10g < 1)
9040		t4_nofldrxq10g = min(nc, NOFLDRXQ_10G);
9041
9042	if (t4_nofldrxq1g < 1)
9043		t4_nofldrxq1g = min(nc, NOFLDRXQ_1G);
9044
9045	if (t4_nofldrxq_vi < 1)
9046		t4_nofldrxq_vi = min(nc, NOFLDRXQ_VI);
9047
9048	if (t4_toecaps_allowed == -1)
9049		t4_toecaps_allowed = FW_CAPS_CONFIG_TOE;
9050
9051	if (t4_rdmacaps_allowed == -1) {
9052		t4_rdmacaps_allowed = FW_CAPS_CONFIG_RDMA_RDDP |
9053		    FW_CAPS_CONFIG_RDMA_RDMAC;
9054	}
9055
9056	if (t4_iscsicaps_allowed == -1) {
9057		t4_iscsicaps_allowed = FW_CAPS_CONFIG_ISCSI_INITIATOR_PDU |
9058		    FW_CAPS_CONFIG_ISCSI_TARGET_PDU |
9059		    FW_CAPS_CONFIG_ISCSI_T10DIF;
9060	}
9061#else
9062	if (t4_toecaps_allowed == -1)
9063		t4_toecaps_allowed = 0;
9064
9065	if (t4_rdmacaps_allowed == -1)
9066		t4_rdmacaps_allowed = 0;
9067
9068	if (t4_iscsicaps_allowed == -1)
9069		t4_iscsicaps_allowed = 0;
9070#endif
9071
9072#ifdef DEV_NETMAP
9073	if (t4_nnmtxq_vi < 1)
9074		t4_nnmtxq_vi = min(nc, NNMTXQ_VI);
9075
9076	if (t4_nnmrxq_vi < 1)
9077		t4_nnmrxq_vi = min(nc, NNMRXQ_VI);
9078#endif
9079
9080	if (t4_tmr_idx_10g < 0 || t4_tmr_idx_10g >= SGE_NTIMERS)
9081		t4_tmr_idx_10g = TMR_IDX_10G;
9082
9083	if (t4_pktc_idx_10g < -1 || t4_pktc_idx_10g >= SGE_NCOUNTERS)
9084		t4_pktc_idx_10g = PKTC_IDX_10G;
9085
9086	if (t4_tmr_idx_1g < 0 || t4_tmr_idx_1g >= SGE_NTIMERS)
9087		t4_tmr_idx_1g = TMR_IDX_1G;
9088
9089	if (t4_pktc_idx_1g < -1 || t4_pktc_idx_1g >= SGE_NCOUNTERS)
9090		t4_pktc_idx_1g = PKTC_IDX_1G;
9091
9092	if (t4_qsize_txq < 128)
9093		t4_qsize_txq = 128;
9094
9095	if (t4_qsize_rxq < 128)
9096		t4_qsize_rxq = 128;
9097	while (t4_qsize_rxq & 7)
9098		t4_qsize_rxq++;
9099
9100	t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX;
9101}
9102
9103#ifdef DDB
9104static void
9105t4_dump_tcb(struct adapter *sc, int tid)
9106{
9107	uint32_t base, i, j, off, pf, reg, save, tcb_addr, win_pos;
9108
9109	reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2);
9110	save = t4_read_reg(sc, reg);
9111	base = sc->memwin[2].mw_base;
9112
9113	/* Dump TCB for the tid */
9114	tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
9115	tcb_addr += tid * TCB_SIZE;
9116
9117	if (is_t4(sc)) {
9118		pf = 0;
9119		win_pos = tcb_addr & ~0xf;	/* start must be 16B aligned */
9120	} else {
9121		pf = V_PFNUM(sc->pf);
9122		win_pos = tcb_addr & ~0x7f;	/* start must be 128B aligned */
9123	}
9124	t4_write_reg(sc, reg, win_pos | pf);
9125	t4_read_reg(sc, reg);
9126
9127	off = tcb_addr - win_pos;
9128	for (i = 0; i < 4; i++) {
9129		uint32_t buf[8];
9130		for (j = 0; j < 8; j++, off += 4)
9131			buf[j] = htonl(t4_read_reg(sc, base + off));
9132
9133		db_printf("%08x %08x %08x %08x %08x %08x %08x %08x\n",
9134		    buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6],
9135		    buf[7]);
9136	}
9137
9138	t4_write_reg(sc, reg, save);
9139	t4_read_reg(sc, reg);
9140}
9141
9142static void
9143t4_dump_devlog(struct adapter *sc)
9144{
9145	struct devlog_params *dparams = &sc->params.devlog;
9146	struct fw_devlog_e e;
9147	int i, first, j, m, nentries, rc;
9148	uint64_t ftstamp = UINT64_MAX;
9149
9150	if (dparams->start == 0) {
9151		db_printf("devlog params not valid\n");
9152		return;
9153	}
9154
9155	nentries = dparams->size / sizeof(struct fw_devlog_e);
9156	m = fwmtype_to_hwmtype(dparams->memtype);
9157
9158	/* Find the first entry. */
9159	first = -1;
9160	for (i = 0; i < nentries && !db_pager_quit; i++) {
9161		rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
9162		    sizeof(e), (void *)&e);
9163		if (rc != 0)
9164			break;
9165
9166		if (e.timestamp == 0)
9167			break;
9168
9169		e.timestamp = be64toh(e.timestamp);
9170		if (e.timestamp < ftstamp) {
9171			ftstamp = e.timestamp;
9172			first = i;
9173		}
9174	}
9175
9176	if (first == -1)
9177		return;
9178
9179	i = first;
9180	do {
9181		rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
9182		    sizeof(e), (void *)&e);
9183		if (rc != 0)
9184			return;
9185
9186		if (e.timestamp == 0)
9187			return;
9188
9189		e.timestamp = be64toh(e.timestamp);
9190		e.seqno = be32toh(e.seqno);
9191		for (j = 0; j < 8; j++)
9192			e.params[j] = be32toh(e.params[j]);
9193
9194		db_printf("%10d  %15ju  %8s  %8s  ",
9195		    e.seqno, e.timestamp,
9196		    (e.level < nitems(devlog_level_strings) ?
9197			devlog_level_strings[e.level] : "UNKNOWN"),
9198		    (e.facility < nitems(devlog_facility_strings) ?
9199			devlog_facility_strings[e.facility] : "UNKNOWN"));
9200		db_printf(e.fmt, e.params[0], e.params[1], e.params[2],
9201		    e.params[3], e.params[4], e.params[5], e.params[6],
9202		    e.params[7]);
9203
9204		if (++i == nentries)
9205			i = 0;
9206	} while (i != first && !db_pager_quit);
9207}
9208
9209static struct command_table db_t4_table = LIST_HEAD_INITIALIZER(db_t4_table);
9210_DB_SET(_show, t4, NULL, db_show_table, 0, &db_t4_table);
9211
9212DB_FUNC(devlog, db_show_devlog, db_t4_table, CS_OWN, NULL)
9213{
9214	device_t dev;
9215	int t;
9216	bool valid;
9217
9218	valid = false;
9219	t = db_read_token();
9220	if (t == tIDENT) {
9221		dev = device_lookup_by_name(db_tok_string);
9222		valid = true;
9223	}
9224	db_skip_to_eol();
9225	if (!valid) {
9226		db_printf("usage: show t4 devlog <nexus>\n");
9227		return;
9228	}
9229
9230	if (dev == NULL) {
9231		db_printf("device not found\n");
9232		return;
9233	}
9234
9235	t4_dump_devlog(device_get_softc(dev));
9236}
9237
9238DB_FUNC(tcb, db_show_t4tcb, db_t4_table, CS_OWN, NULL)
9239{
9240	device_t dev;
9241	int radix, tid, t;
9242	bool valid;
9243
9244	valid = false;
9245	radix = db_radix;
9246	db_radix = 10;
9247	t = db_read_token();
9248	if (t == tIDENT) {
9249		dev = device_lookup_by_name(db_tok_string);
9250		t = db_read_token();
9251		if (t == tNUMBER) {
9252			tid = db_tok_number;
9253			valid = true;
9254		}
9255	}
9256	db_radix = radix;
9257	db_skip_to_eol();
9258	if (!valid) {
9259		db_printf("usage: show t4 tcb <nexus> <tid>\n");
9260		return;
9261	}
9262
9263	if (dev == NULL) {
9264		db_printf("device not found\n");
9265		return;
9266	}
9267	if (tid < 0) {
9268		db_printf("invalid tid\n");
9269		return;
9270	}
9271
9272	t4_dump_tcb(device_get_softc(dev), tid);
9273}
9274#endif
9275
9276static struct sx mlu;	/* mod load unload */
9277SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload");
9278
9279static int
9280mod_event(module_t mod, int cmd, void *arg)
9281{
9282	int rc = 0;
9283	static int loaded = 0;
9284
9285	switch (cmd) {
9286	case MOD_LOAD:
9287		sx_xlock(&mlu);
9288		if (loaded++ == 0) {
9289			t4_sge_modload();
9290			sx_init(&t4_list_lock, "T4/T5 adapters");
9291			SLIST_INIT(&t4_list);
9292#ifdef TCP_OFFLOAD
9293			sx_init(&t4_uld_list_lock, "T4/T5 ULDs");
9294			SLIST_INIT(&t4_uld_list);
9295#endif
9296			t4_tracer_modload();
9297			tweak_tunables();
9298		}
9299		sx_xunlock(&mlu);
9300		break;
9301
9302	case MOD_UNLOAD:
9303		sx_xlock(&mlu);
9304		if (--loaded == 0) {
9305			int tries;
9306
9307			sx_slock(&t4_list_lock);
9308			if (!SLIST_EMPTY(&t4_list)) {
9309				rc = EBUSY;
9310				sx_sunlock(&t4_list_lock);
9311				goto done_unload;
9312			}
9313#ifdef TCP_OFFLOAD
9314			sx_slock(&t4_uld_list_lock);
9315			if (!SLIST_EMPTY(&t4_uld_list)) {
9316				rc = EBUSY;
9317				sx_sunlock(&t4_uld_list_lock);
9318				sx_sunlock(&t4_list_lock);
9319				goto done_unload;
9320			}
9321#endif
9322			tries = 0;
9323			while (tries++ < 5 && t4_sge_extfree_refs() != 0) {
9324				uprintf("%ju clusters with custom free routine "
9325				    "still is use.\n", t4_sge_extfree_refs());
9326				pause("t4unload", 2 * hz);
9327			}
9328#ifdef TCP_OFFLOAD
9329			sx_sunlock(&t4_uld_list_lock);
9330#endif
9331			sx_sunlock(&t4_list_lock);
9332
9333			if (t4_sge_extfree_refs() == 0) {
9334				t4_tracer_modunload();
9335#ifdef TCP_OFFLOAD
9336				sx_destroy(&t4_uld_list_lock);
9337#endif
9338				sx_destroy(&t4_list_lock);
9339				t4_sge_modunload();
9340				loaded = 0;
9341			} else {
9342				rc = EBUSY;
9343				loaded++;	/* undo earlier decrement */
9344			}
9345		}
9346done_unload:
9347		sx_xunlock(&mlu);
9348		break;
9349	}
9350
9351	return (rc);
9352}
9353
9354static devclass_t t4_devclass, t5_devclass;
9355static devclass_t cxgbe_devclass, cxl_devclass;
9356static devclass_t vcxgbe_devclass, vcxl_devclass;
9357
9358DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0);
9359MODULE_VERSION(t4nex, 1);
9360MODULE_DEPEND(t4nex, firmware, 1, 1, 1);
9361
9362DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0);
9363MODULE_VERSION(t5nex, 1);
9364MODULE_DEPEND(t5nex, firmware, 1, 1, 1);
9365
9366DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0);
9367MODULE_VERSION(cxgbe, 1);
9368
9369DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0);
9370MODULE_VERSION(cxl, 1);
9371
9372DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0);
9373MODULE_VERSION(vcxgbe, 1);
9374
9375DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0);
9376MODULE_VERSION(vcxl, 1);
9377