1300113Sscottl/*-
2347197Serj * Copyright (c) 2014-2018, Matthew Macy <mmacy@mattmacy.io>
3300113Sscottl * All rights reserved.
4300113Sscottl *
5300113Sscottl * Redistribution and use in source and binary forms, with or without
6300113Sscottl * modification, are permitted provided that the following conditions are met:
7300113Sscottl *
8300113Sscottl *  1. Redistributions of source code must retain the above copyright notice,
9300113Sscottl *     this list of conditions and the following disclaimer.
10300113Sscottl *
11300113Sscottl *  2. Neither the name of Matthew Macy nor the names of its
12300113Sscottl *     contributors may be used to endorse or promote products derived from
13300113Sscottl *     this software without specific prior written permission.
14300113Sscottl *
15300113Sscottl * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16300113Sscottl * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17300113Sscottl * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18300113Sscottl * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19300113Sscottl * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20300113Sscottl * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21300113Sscottl * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22300113Sscottl * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23300113Sscottl * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24300113Sscottl * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25300113Sscottl * POSSIBILITY OF SUCH DAMAGE.
26300113Sscottl */
27300113Sscottl
28300113Sscottl#include <sys/cdefs.h>
29300113Sscottl__FBSDID("$FreeBSD: stable/11/sys/net/iflib.c 361063 2020-05-14 21:59:23Z erj $");
30300113Sscottl
31300147Sbz#include "opt_inet.h"
32300147Sbz#include "opt_inet6.h"
33300147Sbz#include "opt_acpi.h"
34333338Sshurd#include "opt_sched.h"
35300147Sbz
36300113Sscottl#include <sys/param.h>
37300113Sscottl#include <sys/types.h>
38300113Sscottl#include <sys/bus.h>
39300113Sscottl#include <sys/eventhandler.h>
40300113Sscottl#include <sys/sockio.h>
41300113Sscottl#include <sys/kernel.h>
42300113Sscottl#include <sys/lock.h>
43300113Sscottl#include <sys/mutex.h>
44300113Sscottl#include <sys/module.h>
45300113Sscottl#include <sys/kobj.h>
46300113Sscottl#include <sys/rman.h>
47300113Sscottl#include <sys/sbuf.h>
48300113Sscottl#include <sys/smp.h>
49300113Sscottl#include <sys/socket.h>
50300113Sscottl#include <sys/sysctl.h>
51300113Sscottl#include <sys/syslog.h>
52300113Sscottl#include <sys/taskqueue.h>
53304704Sshurd#include <sys/limits.h>
54300113Sscottl
55300113Sscottl
56300113Sscottl#include <net/if.h>
57300113Sscottl#include <net/if_var.h>
58300113Sscottl#include <net/if_types.h>
59300113Sscottl#include <net/if_media.h>
60300113Sscottl#include <net/bpf.h>
61300113Sscottl#include <net/ethernet.h>
62300113Sscottl#include <net/mp_ring.h>
63333338Sshurd#include <net/vnet.h>
64300113Sscottl
65300113Sscottl#include <netinet/in.h>
66300113Sscottl#include <netinet/in_pcb.h>
67300113Sscottl#include <netinet/tcp_lro.h>
68300113Sscottl#include <netinet/in_systm.h>
69300113Sscottl#include <netinet/if_ether.h>
70300113Sscottl#include <netinet/ip.h>
71300113Sscottl#include <netinet/ip6.h>
72300113Sscottl#include <netinet/tcp.h>
73333338Sshurd#include <netinet/ip_var.h>
74333338Sshurd#include <netinet6/ip6_var.h>
75300113Sscottl
76300113Sscottl#include <machine/bus.h>
77300113Sscottl#include <machine/in_cksum.h>
78300113Sscottl
79300113Sscottl#include <vm/vm.h>
80300113Sscottl#include <vm/pmap.h>
81300113Sscottl
82300113Sscottl#include <dev/led/led.h>
83300113Sscottl#include <dev/pci/pcireg.h>
84300113Sscottl#include <dev/pci/pcivar.h>
85300113Sscottl#include <dev/pci/pci_private.h>
86300113Sscottl
87300113Sscottl#include <net/iflib.h>
88300113Sscottl
89300113Sscottl#include "ifdi_if.h"
90300113Sscottl
91300113Sscottl#if defined(__i386__) || defined(__amd64__)
92300113Sscottl#include <sys/memdesc.h>
93300113Sscottl#include <machine/bus.h>
94300113Sscottl#include <machine/md_var.h>
95300113Sscottl#include <machine/specialreg.h>
96300113Sscottl#include <x86/include/busdma_impl.h>
97300113Sscottl#include <x86/iommu/busdma_dmar.h>
98300113Sscottl#endif
99300113Sscottl
100347197Serj#ifdef PCI_IOV
101347197Serj#include <dev/pci/pci_iov.h>
102347197Serj#endif
103347197Serj
104347197Serj
105333338Sshurd#include <sys/bitstring.h>
106300113Sscottl/*
107333338Sshurd * enable accounting of every mbuf as it comes in to and goes out of
108333338Sshurd * iflib's software descriptor references
109300113Sscottl */
110300113Sscottl#define MEMORY_LOGGING 0
111300113Sscottl/*
112300113Sscottl * Enable mbuf vectors for compressing long mbuf chains
113300113Sscottl */
114300113Sscottl
115300113Sscottl/*
116300113Sscottl * NB:
117300113Sscottl * - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead
118300113Sscottl *   we prefetch needs to be determined by the time spent in m_free vis a vis
119300113Sscottl *   the cost of a prefetch. This will of course vary based on the workload:
120300113Sscottl *      - NFLX's m_free path is dominated by vm-based M_EXT manipulation which
121300113Sscottl *        is quite expensive, thus suggesting very little prefetch.
122300113Sscottl *      - small packet forwarding which is just returning a single mbuf to
123300113Sscottl *        UMA will typically be very fast vis a vis the cost of a memory
124300113Sscottl *        access.
125300113Sscottl */
126300113Sscottl
127300113Sscottl
128300113Sscottl/*
129300113Sscottl * File organization:
130300113Sscottl *  - private structures
131300113Sscottl *  - iflib private utility functions
132300113Sscottl *  - ifnet functions
133300113Sscottl *  - vlan registry and other exported functions
134300113Sscottl *  - iflib public core functions
135300113Sscottl *
136300113Sscottl *
137300113Sscottl */
138300113Sscottlstatic MALLOC_DEFINE(M_IFLIB, "iflib", "ifnet library");
139300113Sscottl
140358272Shselasky#define	IFLIB_RXEOF_MORE (1U << 0)
141358272Shselasky#define	IFLIB_RXEOF_EMPTY (2U << 0)
142358272Shselasky
143300113Sscottlstruct iflib_txq;
144300113Sscottltypedef struct iflib_txq *iflib_txq_t;
145300113Sscottlstruct iflib_rxq;
146300113Sscottltypedef struct iflib_rxq *iflib_rxq_t;
147300113Sscottlstruct iflib_fl;
148300113Sscottltypedef struct iflib_fl *iflib_fl_t;
149300113Sscottl
150333338Sshurdstatic void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid);
151333338Sshurd
152300113Sscottltypedef struct iflib_filter_info {
153300113Sscottl	driver_filter_t *ifi_filter;
154300113Sscottl	void *ifi_filter_arg;
155300113Sscottl	struct grouptask *ifi_task;
156333338Sshurd	void *ifi_ctx;
157300113Sscottl} *iflib_filter_info_t;
158300113Sscottl
159300113Sscottlstruct iflib_ctx {
160300113Sscottl	KOBJ_FIELDS;
161347197Serj	/*
162347197Serj	 * Pointer to hardware driver's softc
163347197Serj	 */
164300113Sscottl	void *ifc_softc;
165300113Sscottl	device_t ifc_dev;
166300113Sscottl	if_t ifc_ifp;
167300113Sscottl
168300113Sscottl	cpuset_t ifc_cpus;
169300113Sscottl	if_shared_ctx_t ifc_sctx;
170300113Sscottl	struct if_softc_ctx ifc_softc_ctx;
171300113Sscottl
172347197Serj	struct mtx ifc_ctx_mtx;
173347197Serj	struct mtx ifc_state_mtx;
174300113Sscottl
175300113Sscottl	uint16_t ifc_nhwtxqs;
176300113Sscottl
177300113Sscottl	iflib_txq_t ifc_txqs;
178300113Sscottl	iflib_rxq_t ifc_rxqs;
179300113Sscottl	uint32_t ifc_if_flags;
180300113Sscottl	uint32_t ifc_flags;
181300113Sscottl	uint32_t ifc_max_fl_buf_size;
182347212Serj	uint32_t ifc_rx_mbuf_sz;
183300113Sscottl
184300113Sscottl	int ifc_link_state;
185300113Sscottl	int ifc_link_irq;
186300113Sscottl	int ifc_watchdog_events;
187300113Sscottl	struct cdev *ifc_led_dev;
188300113Sscottl	struct resource *ifc_msix_mem;
189300113Sscottl
190300113Sscottl	struct if_irq ifc_legacy_irq;
191300113Sscottl	struct grouptask ifc_admin_task;
192300113Sscottl	struct grouptask ifc_vflr_task;
193300113Sscottl	struct iflib_filter_info ifc_filter_info;
194300113Sscottl	struct ifmedia	ifc_media;
195300113Sscottl
196300113Sscottl	struct sysctl_oid *ifc_sysctl_node;
197300113Sscottl	uint16_t ifc_sysctl_ntxqs;
198300113Sscottl	uint16_t ifc_sysctl_nrxqs;
199304704Sshurd	uint16_t ifc_sysctl_qs_eq_override;
200333338Sshurd	uint16_t ifc_sysctl_rx_budget;
201304704Sshurd
202333338Sshurd	qidx_t ifc_sysctl_ntxds[8];
203333338Sshurd	qidx_t ifc_sysctl_nrxds[8];
204300113Sscottl	struct if_txrx ifc_txrx;
205300113Sscottl#define isc_txd_encap  ifc_txrx.ift_txd_encap
206300113Sscottl#define isc_txd_flush  ifc_txrx.ift_txd_flush
207300113Sscottl#define isc_txd_credits_update  ifc_txrx.ift_txd_credits_update
208300113Sscottl#define isc_rxd_available ifc_txrx.ift_rxd_available
209300113Sscottl#define isc_rxd_pkt_get ifc_txrx.ift_rxd_pkt_get
210300113Sscottl#define isc_rxd_refill ifc_txrx.ift_rxd_refill
211300113Sscottl#define isc_rxd_flush ifc_txrx.ift_rxd_flush
212300113Sscottl#define isc_rxd_refill ifc_txrx.ift_rxd_refill
213300113Sscottl#define isc_rxd_refill ifc_txrx.ift_rxd_refill
214300113Sscottl#define isc_legacy_intr ifc_txrx.ift_legacy_intr
215300113Sscottl	eventhandler_tag ifc_vlan_attach_event;
216300113Sscottl	eventhandler_tag ifc_vlan_detach_event;
217300113Sscottl	uint8_t ifc_mac[ETHER_ADDR_LEN];
218300113Sscottl	char ifc_mtx_name[16];
219300113Sscottl};
220300113Sscottl
221300113Sscottl
222300113Sscottlvoid *
223300113Sscottliflib_get_softc(if_ctx_t ctx)
224300113Sscottl{
225300113Sscottl
226300113Sscottl	return (ctx->ifc_softc);
227300113Sscottl}
228300113Sscottl
229300113Sscottldevice_t
230300113Sscottliflib_get_dev(if_ctx_t ctx)
231300113Sscottl{
232300113Sscottl
233300113Sscottl	return (ctx->ifc_dev);
234300113Sscottl}
235300113Sscottl
236300113Sscottlif_t
237300113Sscottliflib_get_ifp(if_ctx_t ctx)
238300113Sscottl{
239300113Sscottl
240300113Sscottl	return (ctx->ifc_ifp);
241300113Sscottl}
242300113Sscottl
243300113Sscottlstruct ifmedia *
244300113Sscottliflib_get_media(if_ctx_t ctx)
245300113Sscottl{
246300113Sscottl
247300113Sscottl	return (&ctx->ifc_media);
248300113Sscottl}
249300113Sscottl
250300113Sscottlvoid
251300113Sscottliflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN])
252300113Sscottl{
253300113Sscottl
254300113Sscottl	bcopy(mac, ctx->ifc_mac, ETHER_ADDR_LEN);
255300113Sscottl}
256300113Sscottl
257300113Sscottlif_softc_ctx_t
258300113Sscottliflib_get_softc_ctx(if_ctx_t ctx)
259300113Sscottl{
260300113Sscottl
261300113Sscottl	return (&ctx->ifc_softc_ctx);
262300113Sscottl}
263300113Sscottl
264300113Sscottlif_shared_ctx_t
265300113Sscottliflib_get_sctx(if_ctx_t ctx)
266300113Sscottl{
267300113Sscottl
268300113Sscottl	return (ctx->ifc_sctx);
269300113Sscottl}
270300113Sscottl
271333338Sshurd#define IP_ALIGNED(m) ((((uintptr_t)(m)->m_data) & 0x3) == 0x2)
272300113Sscottl#define CACHE_PTR_INCREMENT (CACHE_LINE_SIZE/sizeof(void*))
273333338Sshurd#define CACHE_PTR_NEXT(ptr) ((void *)(((uintptr_t)(ptr)+CACHE_LINE_SIZE-1) & (CACHE_LINE_SIZE-1)))
274300113Sscottl
275300113Sscottl#define LINK_ACTIVE(ctx) ((ctx)->ifc_link_state == LINK_STATE_UP)
276300113Sscottl#define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF)
277300113Sscottl
278300113Sscottl#define RX_SW_DESC_MAP_CREATED	(1 << 0)
279300113Sscottl#define TX_SW_DESC_MAP_CREATED	(1 << 1)
280300113Sscottl#define RX_SW_DESC_INUSE        (1 << 3)
281300113Sscottl#define TX_SW_DESC_MAPPED       (1 << 4)
282300113Sscottl
283333338Sshurd#define	M_TOOBIG		M_PROTO1
284300113Sscottl
285333338Sshurdtypedef struct iflib_sw_rx_desc_array {
286333338Sshurd	bus_dmamap_t	*ifsd_map;         /* bus_dma maps for packet */
287333338Sshurd	struct mbuf	**ifsd_m;           /* pkthdr mbufs */
288333338Sshurd	caddr_t		*ifsd_cl;          /* direct cluster pointer for rx */
289333338Sshurd	uint8_t		*ifsd_flags;
290333338Sshurd} iflib_rxsd_array_t;
291300113Sscottl
292300113Sscottltypedef struct iflib_sw_tx_desc_array {
293300113Sscottl	bus_dmamap_t    *ifsd_map;         /* bus_dma maps for packet */
294300113Sscottl	struct mbuf    **ifsd_m;           /* pkthdr mbufs */
295300113Sscottl	uint8_t		*ifsd_flags;
296333338Sshurd} if_txsd_vec_t;
297300113Sscottl
298300113Sscottl
299300113Sscottl/* magic number that should be high enough for any hardware */
300300113Sscottl#define IFLIB_MAX_TX_SEGS		128
301333338Sshurd/* bnxt supports 64 with hardware LRO enabled */
302333338Sshurd#define IFLIB_MAX_RX_SEGS		64
303300113Sscottl#define IFLIB_RX_COPY_THRESH		128
304300113Sscottl#define IFLIB_MAX_RX_REFRESH		32
305333338Sshurd/* The minimum descriptors per second before we start coalescing */
306333338Sshurd#define IFLIB_MIN_DESC_SEC		16384
307333338Sshurd#define IFLIB_DEFAULT_TX_UPDATE_FREQ	16
308300113Sscottl#define IFLIB_QUEUE_IDLE		0
309300113Sscottl#define IFLIB_QUEUE_HUNG		1
310300113Sscottl#define IFLIB_QUEUE_WORKING		2
311333338Sshurd/* maximum number of txqs that can share an rx interrupt */
312333338Sshurd#define IFLIB_MAX_TX_SHARED_INTR	4
313300113Sscottl
314333338Sshurd/* this should really scale with ring size - this is a fairly arbitrary value */
315333338Sshurd#define TX_BATCH_SIZE			32
316300113Sscottl
317300113Sscottl#define IFLIB_RESTART_BUDGET		8
318300113Sscottl
319333338Sshurd#define	IFC_LEGACY		0x001
320333338Sshurd#define	IFC_QFLUSH		0x002
321333338Sshurd#define	IFC_MULTISEG		0x004
322333338Sshurd#define	IFC_DMAR		0x008
323333338Sshurd#define	IFC_SC_ALLOCATED	0x010
324333338Sshurd#define	IFC_INIT_DONE		0x020
325333338Sshurd#define	IFC_PREFETCH		0x040
326333338Sshurd#define	IFC_DO_RESET		0x080
327347197Serj#define	IFC_DO_WATCHDOG		0x100
328347197Serj#define	IFC_CHECK_HUNG		0x200
329347197Serj#define	IFC_IN_DETACH		0x800
330300113Sscottl
331347197Serj
332300113Sscottl#define CSUM_OFFLOAD		(CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \
333300113Sscottl				 CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \
334300113Sscottl				 CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP)
335300113Sscottlstruct iflib_txq {
336333338Sshurd	qidx_t		ift_in_use;
337333338Sshurd	qidx_t		ift_cidx;
338333338Sshurd	qidx_t		ift_cidx_processed;
339333338Sshurd	qidx_t		ift_pidx;
340300113Sscottl	uint8_t		ift_gen;
341304704Sshurd	uint8_t		ift_br_offset;
342333338Sshurd	uint16_t	ift_npending;
343333338Sshurd	uint16_t	ift_db_pending;
344333338Sshurd	uint16_t	ift_rs_pending;
345300113Sscottl	/* implicit pad */
346333338Sshurd	uint8_t		ift_txd_size[8];
347300113Sscottl	uint64_t	ift_processed;
348300113Sscottl	uint64_t	ift_cleaned;
349333338Sshurd	uint64_t	ift_cleaned_prev;
350300113Sscottl#if MEMORY_LOGGING
351300113Sscottl	uint64_t	ift_enqueued;
352300113Sscottl	uint64_t	ift_dequeued;
353300113Sscottl#endif
354300113Sscottl	uint64_t	ift_no_tx_dma_setup;
355300113Sscottl	uint64_t	ift_no_desc_avail;
356300113Sscottl	uint64_t	ift_mbuf_defrag_failed;
357300113Sscottl	uint64_t	ift_mbuf_defrag;
358300113Sscottl	uint64_t	ift_map_failed;
359300113Sscottl	uint64_t	ift_txd_encap_efbig;
360300113Sscottl	uint64_t	ift_pullups;
361300113Sscottl
362300113Sscottl	struct mtx	ift_mtx;
363300113Sscottl	struct mtx	ift_db_mtx;
364300113Sscottl
365300113Sscottl	/* constant values */
366300113Sscottl	if_ctx_t	ift_ctx;
367333338Sshurd	struct ifmp_ring        *ift_br;
368300113Sscottl	struct grouptask	ift_task;
369333338Sshurd	qidx_t		ift_size;
370300113Sscottl	uint16_t	ift_id;
371300113Sscottl	struct callout	ift_timer;
372300113Sscottl
373333338Sshurd	if_txsd_vec_t	ift_sds;
374333338Sshurd	uint8_t		ift_qstatus;
375333338Sshurd	uint8_t		ift_closed;
376333338Sshurd	uint8_t		ift_update_freq;
377300113Sscottl	struct iflib_filter_info ift_filter_info;
378300113Sscottl	bus_dma_tag_t		ift_desc_tag;
379300113Sscottl	bus_dma_tag_t		ift_tso_desc_tag;
380300113Sscottl	iflib_dma_info_t	ift_ifdi;
381300113Sscottl#define MTX_NAME_LEN 16
382300113Sscottl	char                    ift_mtx_name[MTX_NAME_LEN];
383300113Sscottl	char                    ift_db_mtx_name[MTX_NAME_LEN];
384300113Sscottl	bus_dma_segment_t	ift_segs[IFLIB_MAX_TX_SEGS]  __aligned(CACHE_LINE_SIZE);
385333338Sshurd#ifdef IFLIB_DIAGNOSTICS
386333338Sshurd	uint64_t ift_cpu_exec_count[256];
387333338Sshurd#endif
388300113Sscottl} __aligned(CACHE_LINE_SIZE);
389300113Sscottl
390300113Sscottlstruct iflib_fl {
391333338Sshurd	qidx_t		ifl_cidx;
392333338Sshurd	qidx_t		ifl_pidx;
393333338Sshurd	qidx_t		ifl_credits;
394300113Sscottl	uint8_t		ifl_gen;
395333338Sshurd	uint8_t		ifl_rxd_size;
396300113Sscottl#if MEMORY_LOGGING
397300113Sscottl	uint64_t	ifl_m_enqueued;
398300113Sscottl	uint64_t	ifl_m_dequeued;
399300113Sscottl	uint64_t	ifl_cl_enqueued;
400300113Sscottl	uint64_t	ifl_cl_dequeued;
401300113Sscottl#endif
402300113Sscottl	/* implicit pad */
403300113Sscottl
404333338Sshurd	bitstr_t 	*ifl_rx_bitmap;
405333338Sshurd	qidx_t		ifl_fragidx;
406300113Sscottl	/* constant */
407333338Sshurd	qidx_t		ifl_size;
408300113Sscottl	uint16_t	ifl_buf_size;
409300113Sscottl	uint16_t	ifl_cltype;
410300113Sscottl	uma_zone_t	ifl_zone;
411333338Sshurd	iflib_rxsd_array_t	ifl_sds;
412300113Sscottl	iflib_rxq_t	ifl_rxq;
413300113Sscottl	uint8_t		ifl_id;
414300113Sscottl	bus_dma_tag_t           ifl_desc_tag;
415300113Sscottl	iflib_dma_info_t	ifl_ifdi;
416300113Sscottl	uint64_t	ifl_bus_addrs[IFLIB_MAX_RX_REFRESH] __aligned(CACHE_LINE_SIZE);
417300113Sscottl	caddr_t		ifl_vm_addrs[IFLIB_MAX_RX_REFRESH];
418333338Sshurd	qidx_t	ifl_rxd_idxs[IFLIB_MAX_RX_REFRESH];
419300113Sscottl}  __aligned(CACHE_LINE_SIZE);
420300113Sscottl
421333338Sshurdstatic inline qidx_t
422333338Sshurdget_inuse(int size, qidx_t cidx, qidx_t pidx, uint8_t gen)
423300113Sscottl{
424333338Sshurd	qidx_t used;
425300113Sscottl
426300113Sscottl	if (pidx > cidx)
427300113Sscottl		used = pidx - cidx;
428300113Sscottl	else if (pidx < cidx)
429300113Sscottl		used = size - cidx + pidx;
430300113Sscottl	else if (gen == 0 && pidx == cidx)
431300113Sscottl		used = 0;
432300113Sscottl	else if (gen == 1 && pidx == cidx)
433300113Sscottl		used = size;
434300113Sscottl	else
435300113Sscottl		panic("bad state");
436300113Sscottl
437300113Sscottl	return (used);
438300113Sscottl}
439300113Sscottl
440300113Sscottl#define TXQ_AVAIL(txq) (txq->ift_size - get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen))
441300113Sscottl
442300113Sscottl#define IDXDIFF(head, tail, wrap) \
443300113Sscottl	((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head))
444300113Sscottl
445300113Sscottlstruct iflib_rxq {
446300113Sscottl	/* If there is a separate completion queue -
447300113Sscottl	 * these are the cq cidx and pidx. Otherwise
448300113Sscottl	 * these are unused.
449300113Sscottl	 */
450333338Sshurd	qidx_t		ifr_size;
451333338Sshurd	qidx_t		ifr_cq_cidx;
452333338Sshurd	qidx_t		ifr_cq_pidx;
453300113Sscottl	uint8_t		ifr_cq_gen;
454304704Sshurd	uint8_t		ifr_fl_offset;
455300113Sscottl
456300113Sscottl	if_ctx_t	ifr_ctx;
457300113Sscottl	iflib_fl_t	ifr_fl;
458300113Sscottl	uint64_t	ifr_rx_irq;
459300113Sscottl	uint16_t	ifr_id;
460300113Sscottl	uint8_t		ifr_lro_enabled;
461300113Sscottl	uint8_t		ifr_nfl;
462333338Sshurd	uint8_t		ifr_ntxqirq;
463333338Sshurd	uint8_t		ifr_txqid[IFLIB_MAX_TX_SHARED_INTR];
464300113Sscottl	struct lro_ctrl			ifr_lc;
465300113Sscottl	struct grouptask        ifr_task;
466358272Shselasky	struct callout		ifr_watchdog;
467300113Sscottl	struct iflib_filter_info ifr_filter_info;
468300113Sscottl	iflib_dma_info_t		ifr_ifdi;
469333338Sshurd
470300113Sscottl	/* dynamically allocate if any drivers need a value substantially larger than this */
471300113Sscottl	struct if_rxd_frag	ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE);
472333338Sshurd#ifdef IFLIB_DIAGNOSTICS
473333338Sshurd	uint64_t ifr_cpu_exec_count[256];
474333338Sshurd#endif
475300113Sscottl}  __aligned(CACHE_LINE_SIZE);
476300113Sscottl
477333338Sshurdtypedef struct if_rxsd {
478333338Sshurd	caddr_t *ifsd_cl;
479333338Sshurd	struct mbuf **ifsd_m;
480333338Sshurd	iflib_fl_t ifsd_fl;
481333338Sshurd	qidx_t ifsd_cidx;
482333338Sshurd} *if_rxsd_t;
483333338Sshurd
484333338Sshurd/* multiple of word size */
485333338Sshurd#ifdef __LP64__
486333338Sshurd#define PKT_INFO_SIZE	6
487333338Sshurd#define RXD_INFO_SIZE	5
488333338Sshurd#define PKT_TYPE uint64_t
489333338Sshurd#else
490333338Sshurd#define PKT_INFO_SIZE	11
491333338Sshurd#define RXD_INFO_SIZE	8
492333338Sshurd#define PKT_TYPE uint32_t
493333338Sshurd#endif
494333338Sshurd#define PKT_LOOP_BOUND  ((PKT_INFO_SIZE/3)*3)
495333338Sshurd#define RXD_LOOP_BOUND  ((RXD_INFO_SIZE/4)*4)
496333338Sshurd
497333338Sshurdtypedef struct if_pkt_info_pad {
498333338Sshurd	PKT_TYPE pkt_val[PKT_INFO_SIZE];
499333338Sshurd} *if_pkt_info_pad_t;
500333338Sshurdtypedef struct if_rxd_info_pad {
501333338Sshurd	PKT_TYPE rxd_val[RXD_INFO_SIZE];
502333338Sshurd} *if_rxd_info_pad_t;
503333338Sshurd
504333338SshurdCTASSERT(sizeof(struct if_pkt_info_pad) == sizeof(struct if_pkt_info));
505333338SshurdCTASSERT(sizeof(struct if_rxd_info_pad) == sizeof(struct if_rxd_info));
506333338Sshurd
507333338Sshurd
508333338Sshurdstatic inline void
509333338Sshurdpkt_info_zero(if_pkt_info_t pi)
510333338Sshurd{
511333338Sshurd	if_pkt_info_pad_t pi_pad;
512333338Sshurd
513333338Sshurd	pi_pad = (if_pkt_info_pad_t)pi;
514333338Sshurd	pi_pad->pkt_val[0] = 0; pi_pad->pkt_val[1] = 0; pi_pad->pkt_val[2] = 0;
515333338Sshurd	pi_pad->pkt_val[3] = 0; pi_pad->pkt_val[4] = 0; pi_pad->pkt_val[5] = 0;
516333338Sshurd#ifndef __LP64__
517333338Sshurd	pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0;
518333338Sshurd	pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0;
519333338Sshurd#endif
520333338Sshurd}
521333338Sshurd
522333338Sshurdstatic inline void
523333338Sshurdrxd_info_zero(if_rxd_info_t ri)
524333338Sshurd{
525333338Sshurd	if_rxd_info_pad_t ri_pad;
526333338Sshurd	int i;
527333338Sshurd
528333338Sshurd	ri_pad = (if_rxd_info_pad_t)ri;
529333338Sshurd	for (i = 0; i < RXD_LOOP_BOUND; i += 4) {
530333338Sshurd		ri_pad->rxd_val[i] = 0;
531333338Sshurd		ri_pad->rxd_val[i+1] = 0;
532333338Sshurd		ri_pad->rxd_val[i+2] = 0;
533333338Sshurd		ri_pad->rxd_val[i+3] = 0;
534333338Sshurd	}
535333338Sshurd#ifdef __LP64__
536333338Sshurd	ri_pad->rxd_val[RXD_INFO_SIZE-1] = 0;
537333338Sshurd#endif
538333338Sshurd}
539333338Sshurd
540300113Sscottl/*
541300113Sscottl * Only allow a single packet to take up most 1/nth of the tx ring
542300113Sscottl */
543300113Sscottl#define MAX_SINGLE_PACKET_FRACTION 12
544300113Sscottl#define IF_BAD_DMA (bus_addr_t)-1
545300113Sscottl
546300113Sscottl#define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING))
547300113Sscottl
548347197Serj#define CTX_LOCK_INIT(_sc, _name)  mtx_init(&(_sc)->ifc_ctx_mtx, _name, "iflib ctx lock", MTX_DEF)
549347197Serj#define CTX_LOCK(ctx) mtx_lock(&(ctx)->ifc_ctx_mtx)
550347197Serj#define CTX_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_ctx_mtx)
551347197Serj#define CTX_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_ctx_mtx)
552300113Sscottl
553300113Sscottl
554347197Serj#define STATE_LOCK_INIT(_sc, _name)  mtx_init(&(_sc)->ifc_state_mtx, _name, "iflib state lock", MTX_DEF)
555347197Serj#define STATE_LOCK(ctx) mtx_lock(&(ctx)->ifc_state_mtx)
556347197Serj#define STATE_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_state_mtx)
557347197Serj#define STATE_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_state_mtx)
558300113Sscottl
559347197Serj
560347197Serj
561300113Sscottl#define CALLOUT_LOCK(txq)	mtx_lock(&txq->ift_mtx)
562300113Sscottl#define CALLOUT_UNLOCK(txq) 	mtx_unlock(&txq->ift_mtx)
563300113Sscottl
564300113Sscottl/* Our boot-time initialization hook */
565300113Sscottlstatic int	iflib_module_event_handler(module_t, int, void *);
566300113Sscottl
567300113Sscottlstatic moduledata_t iflib_moduledata = {
568300113Sscottl	"iflib",
569300113Sscottl	iflib_module_event_handler,
570300113Sscottl	NULL
571300113Sscottl};
572300113Sscottl
573300113SscottlDECLARE_MODULE(iflib, iflib_moduledata, SI_SUB_INIT_IF, SI_ORDER_ANY);
574300113SscottlMODULE_VERSION(iflib, 1);
575300113Sscottl
576300113SscottlMODULE_DEPEND(iflib, pci, 1, 1, 1);
577300113SscottlMODULE_DEPEND(iflib, ether, 1, 1, 1);
578300113Sscottl
579333338SshurdTASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1);
580300113SscottlTASKQGROUP_DEFINE(if_config_tqg, 1, 1);
581300113Sscottl
582300113Sscottl#ifndef IFLIB_DEBUG_COUNTERS
583300113Sscottl#ifdef INVARIANTS
584300113Sscottl#define IFLIB_DEBUG_COUNTERS 1
585300113Sscottl#else
586300113Sscottl#define IFLIB_DEBUG_COUNTERS 0
587300113Sscottl#endif /* !INVARIANTS */
588300113Sscottl#endif
589300113Sscottl
590300113Sscottlstatic SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0,
591300113Sscottl                   "iflib driver parameters");
592300113Sscottl
593300113Sscottl/*
594300113Sscottl * XXX need to ensure that this can't accidentally cause the head to be moved backwards
595300113Sscottl */
596300113Sscottlstatic int iflib_min_tx_latency = 0;
597300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW,
598333338Sshurd		   &iflib_min_tx_latency, 0, "minimize transmit latency at the possible expense of throughput");
599333338Sshurdstatic int iflib_no_tx_batch = 0;
600333338SshurdSYSCTL_INT(_net_iflib, OID_AUTO, no_tx_batch, CTLFLAG_RW,
601333338Sshurd		   &iflib_no_tx_batch, 0, "minimize transmit latency at the possible expense of throughput");
602300113Sscottl
603300113Sscottl
604300113Sscottl#if IFLIB_DEBUG_COUNTERS
605300113Sscottl
606300113Sscottlstatic int iflib_tx_seen;
607300113Sscottlstatic int iflib_tx_sent;
608300113Sscottlstatic int iflib_tx_encap;
609300113Sscottlstatic int iflib_rx_allocs;
610300113Sscottlstatic int iflib_fl_refills;
611300113Sscottlstatic int iflib_fl_refills_large;
612300113Sscottlstatic int iflib_tx_frees;
613300113Sscottl
614300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, tx_seen, CTLFLAG_RD,
615300113Sscottl		   &iflib_tx_seen, 0, "# tx mbufs seen");
616300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, tx_sent, CTLFLAG_RD,
617300113Sscottl		   &iflib_tx_sent, 0, "# tx mbufs sent");
618300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, tx_encap, CTLFLAG_RD,
619300113Sscottl		   &iflib_tx_encap, 0, "# tx mbufs encapped");
620300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, tx_frees, CTLFLAG_RD,
621300113Sscottl		   &iflib_tx_frees, 0, "# tx frees");
622300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, rx_allocs, CTLFLAG_RD,
623300113Sscottl		   &iflib_rx_allocs, 0, "# rx allocations");
624300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, fl_refills, CTLFLAG_RD,
625300113Sscottl		   &iflib_fl_refills, 0, "# refills");
626300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, fl_refills_large, CTLFLAG_RD,
627300113Sscottl		   &iflib_fl_refills_large, 0, "# large refills");
628300113Sscottl
629300113Sscottl
630300113Sscottlstatic int iflib_txq_drain_flushing;
631300113Sscottlstatic int iflib_txq_drain_oactive;
632300113Sscottlstatic int iflib_txq_drain_notready;
633300113Sscottlstatic int iflib_txq_drain_encapfail;
634300113Sscottl
635300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_flushing, CTLFLAG_RD,
636300113Sscottl		   &iflib_txq_drain_flushing, 0, "# drain flushes");
637300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_oactive, CTLFLAG_RD,
638300113Sscottl		   &iflib_txq_drain_oactive, 0, "# drain oactives");
639300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_notready, CTLFLAG_RD,
640300113Sscottl		   &iflib_txq_drain_notready, 0, "# drain notready");
641300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_encapfail, CTLFLAG_RD,
642300113Sscottl		   &iflib_txq_drain_encapfail, 0, "# drain encap fails");
643300113Sscottl
644300113Sscottl
645300113Sscottlstatic int iflib_encap_load_mbuf_fail;
646333338Sshurdstatic int iflib_encap_pad_mbuf_fail;
647300113Sscottlstatic int iflib_encap_txq_avail_fail;
648300113Sscottlstatic int iflib_encap_txd_encap_fail;
649300113Sscottl
650300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, encap_load_mbuf_fail, CTLFLAG_RD,
651300113Sscottl		   &iflib_encap_load_mbuf_fail, 0, "# busdma load failures");
652333338SshurdSYSCTL_INT(_net_iflib, OID_AUTO, encap_pad_mbuf_fail, CTLFLAG_RD,
653333338Sshurd		   &iflib_encap_pad_mbuf_fail, 0, "# runt frame pad failures");
654300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, encap_txq_avail_fail, CTLFLAG_RD,
655300113Sscottl		   &iflib_encap_txq_avail_fail, 0, "# txq avail failures");
656300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, encap_txd_encap_fail, CTLFLAG_RD,
657300113Sscottl		   &iflib_encap_txd_encap_fail, 0, "# driver encap failures");
658300113Sscottl
659300113Sscottlstatic int iflib_task_fn_rxs;
660300113Sscottlstatic int iflib_rx_intr_enables;
661300113Sscottlstatic int iflib_fast_intrs;
662300113Sscottlstatic int iflib_intr_link;
663300113Sscottlstatic int iflib_intr_msix;
664300113Sscottlstatic int iflib_rx_unavail;
665300113Sscottlstatic int iflib_rx_ctx_inactive;
666300113Sscottlstatic int iflib_rx_zero_len;
667300113Sscottlstatic int iflib_rx_if_input;
668300113Sscottlstatic int iflib_rx_mbuf_null;
669300113Sscottlstatic int iflib_rxd_flush;
670300113Sscottl
671300113Sscottlstatic int iflib_verbose_debug;
672300113Sscottl
673300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, intr_link, CTLFLAG_RD,
674300113Sscottl		   &iflib_intr_link, 0, "# intr link calls");
675300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, intr_msix, CTLFLAG_RD,
676300113Sscottl		   &iflib_intr_msix, 0, "# intr msix calls");
677300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, task_fn_rx, CTLFLAG_RD,
678300113Sscottl		   &iflib_task_fn_rxs, 0, "# task_fn_rx calls");
679300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, rx_intr_enables, CTLFLAG_RD,
680300113Sscottl		   &iflib_rx_intr_enables, 0, "# rx intr enables");
681300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, fast_intrs, CTLFLAG_RD,
682300113Sscottl		   &iflib_fast_intrs, 0, "# fast_intr calls");
683300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, rx_unavail, CTLFLAG_RD,
684300113Sscottl		   &iflib_rx_unavail, 0, "# times rxeof called with no available data");
685300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, rx_ctx_inactive, CTLFLAG_RD,
686300113Sscottl		   &iflib_rx_ctx_inactive, 0, "# times rxeof called with inactive context");
687300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, rx_zero_len, CTLFLAG_RD,
688300113Sscottl		   &iflib_rx_zero_len, 0, "# times rxeof saw zero len mbuf");
689300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, rx_if_input, CTLFLAG_RD,
690300113Sscottl		   &iflib_rx_if_input, 0, "# times rxeof called if_input");
691300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, rx_mbuf_null, CTLFLAG_RD,
692300113Sscottl		   &iflib_rx_mbuf_null, 0, "# times rxeof got null mbuf");
693300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, rxd_flush, CTLFLAG_RD,
694300113Sscottl	         &iflib_rxd_flush, 0, "# times rxd_flush called");
695300113SscottlSYSCTL_INT(_net_iflib, OID_AUTO, verbose_debug, CTLFLAG_RW,
696300113Sscottl		   &iflib_verbose_debug, 0, "enable verbose debugging");
697300113Sscottl
698300113Sscottl#define DBG_COUNTER_INC(name) atomic_add_int(&(iflib_ ## name), 1)
699333338Sshurdstatic void
700333338Sshurdiflib_debug_reset(void)
701333338Sshurd{
702333338Sshurd	iflib_tx_seen = iflib_tx_sent = iflib_tx_encap = iflib_rx_allocs =
703333338Sshurd		iflib_fl_refills = iflib_fl_refills_large = iflib_tx_frees =
704333338Sshurd		iflib_txq_drain_flushing = iflib_txq_drain_oactive =
705333338Sshurd		iflib_txq_drain_notready = iflib_txq_drain_encapfail =
706333338Sshurd		iflib_encap_load_mbuf_fail = iflib_encap_pad_mbuf_fail =
707333338Sshurd		iflib_encap_txq_avail_fail = iflib_encap_txd_encap_fail =
708333338Sshurd		iflib_task_fn_rxs = iflib_rx_intr_enables = iflib_fast_intrs =
709333338Sshurd		iflib_intr_link = iflib_intr_msix = iflib_rx_unavail =
710333338Sshurd		iflib_rx_ctx_inactive = iflib_rx_zero_len = iflib_rx_if_input =
711333338Sshurd		iflib_rx_mbuf_null = iflib_rxd_flush = 0;
712333338Sshurd}
713300113Sscottl
714300113Sscottl#else
715300113Sscottl#define DBG_COUNTER_INC(name)
716333338Sshurdstatic void iflib_debug_reset(void) {}
717300113Sscottl#endif
718300113Sscottl
719300113Sscottl
720300113Sscottl
721300113Sscottl#define IFLIB_DEBUG 0
722300113Sscottl
723300113Sscottlstatic void iflib_tx_structures_free(if_ctx_t ctx);
724300113Sscottlstatic void iflib_rx_structures_free(if_ctx_t ctx);
725300113Sscottlstatic int iflib_queues_alloc(if_ctx_t ctx);
726300113Sscottlstatic int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq);
727333338Sshurdstatic int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget);
728300113Sscottlstatic int iflib_qset_structures_setup(if_ctx_t ctx);
729300113Sscottlstatic int iflib_msix_init(if_ctx_t ctx);
730300113Sscottlstatic int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, char *str);
731300113Sscottlstatic void iflib_txq_check_drain(iflib_txq_t txq, int budget);
732300113Sscottlstatic uint32_t iflib_txq_can_drain(struct ifmp_ring *);
733300113Sscottlstatic int iflib_register(if_ctx_t);
734361057Serjstatic void iflib_unregister_vlan_handlers(if_ctx_t ctx);
735300113Sscottlstatic void iflib_init_locked(if_ctx_t ctx);
736300113Sscottlstatic void iflib_add_device_sysctl_pre(if_ctx_t ctx);
737300113Sscottlstatic void iflib_add_device_sysctl_post(if_ctx_t ctx);
738333338Sshurdstatic void iflib_ifmp_purge(iflib_txq_t txq);
739333338Sshurdstatic void _iflib_pre_assert(if_softc_ctx_t scctx);
740333338Sshurdstatic void iflib_stop(if_ctx_t ctx);
741333338Sshurdstatic void iflib_if_init_locked(if_ctx_t ctx);
742347197Serjstatic void iflib_free_intr_mem(if_ctx_t ctx);
743333338Sshurd#ifndef __NO_STRICT_ALIGNMENT
744333338Sshurdstatic struct mbuf * iflib_fixup_rx(struct mbuf *m);
745333338Sshurd#endif
746300113Sscottl
747300113Sscottl#ifdef DEV_NETMAP
748300113Sscottl#include <sys/selinfo.h>
749300113Sscottl#include <net/netmap.h>
750300113Sscottl#include <dev/netmap/netmap_kern.h>
751300113Sscottl
752300113SscottlMODULE_DEPEND(iflib, netmap, 1, 1, 1);
753300113Sscottl
754333338Sshurdstatic int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init);
755333338Sshurd
756300113Sscottl/*
757300113Sscottl * device-specific sysctl variables:
758300113Sscottl *
759302665Sbdrewery * iflib_crcstrip: 0: keep CRC in rx frames (default), 1: strip it.
760300113Sscottl *	During regular operations the CRC is stripped, but on some
761300113Sscottl *	hardware reception of frames not multiple of 64 is slower,
762300113Sscottl *	so using crcstrip=0 helps in benchmarks.
763300113Sscottl *
764302665Sbdrewery * iflib_rx_miss, iflib_rx_miss_bufs:
765300113Sscottl *	count packets that might be missed due to lost interrupts.
766300113Sscottl */
767300113SscottlSYSCTL_DECL(_dev_netmap);
768300113Sscottl/*
769300113Sscottl * The xl driver by default strips CRCs and we do not override it.
770300113Sscottl */
771300113Sscottl
772300113Sscottlint iflib_crcstrip = 1;
773300113SscottlSYSCTL_INT(_dev_netmap, OID_AUTO, iflib_crcstrip,
774300113Sscottl    CTLFLAG_RW, &iflib_crcstrip, 1, "strip CRC on rx frames");
775300113Sscottl
776300113Sscottlint iflib_rx_miss, iflib_rx_miss_bufs;
777300113SscottlSYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss,
778300113Sscottl    CTLFLAG_RW, &iflib_rx_miss, 0, "potentially missed rx intr");
779302665SbdrewerySYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss_bufs,
780300113Sscottl    CTLFLAG_RW, &iflib_rx_miss_bufs, 0, "potentially missed rx intr bufs");
781300113Sscottl
782300113Sscottl/*
783300113Sscottl * Register/unregister. We are already under netmap lock.
784300113Sscottl * Only called on the first register or the last unregister.
785300113Sscottl */
786300113Sscottlstatic int
787300113Sscottliflib_netmap_register(struct netmap_adapter *na, int onoff)
788300113Sscottl{
789300113Sscottl	struct ifnet *ifp = na->ifp;
790300113Sscottl	if_ctx_t ctx = ifp->if_softc;
791333338Sshurd	int status;
792300113Sscottl
793300113Sscottl	CTX_LOCK(ctx);
794300113Sscottl	IFDI_INTR_DISABLE(ctx);
795300113Sscottl
796300113Sscottl	/* Tell the stack that the interface is no longer active */
797300113Sscottl	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
798300113Sscottl
799300113Sscottl	if (!CTX_IS_VF(ctx))
800333338Sshurd		IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip);
801300113Sscottl
802300113Sscottl	/* enable or disable flags and callbacks in na and ifp */
803300113Sscottl	if (onoff) {
804300113Sscottl		nm_set_native_flags(na);
805300113Sscottl	} else {
806300113Sscottl		nm_clear_native_flags(na);
807300113Sscottl	}
808333338Sshurd	iflib_stop(ctx);
809333338Sshurd	iflib_init_locked(ctx);
810333338Sshurd	IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); // XXX why twice ?
811333338Sshurd	status = ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1;
812333338Sshurd	if (status)
813333338Sshurd		nm_clear_native_flags(na);
814300113Sscottl	CTX_UNLOCK(ctx);
815333338Sshurd	return (status);
816300113Sscottl}
817300113Sscottl
818333338Sshurdstatic int
819333338Sshurdnetmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init)
820333338Sshurd{
821333338Sshurd	struct netmap_adapter *na = kring->na;
822333338Sshurd	u_int const lim = kring->nkr_num_slots - 1;
823333338Sshurd	u_int head = kring->rhead;
824333338Sshurd	struct netmap_ring *ring = kring->ring;
825333338Sshurd	bus_dmamap_t *map;
826333338Sshurd	struct if_rxd_update iru;
827333338Sshurd	if_ctx_t ctx = rxq->ifr_ctx;
828333338Sshurd	iflib_fl_t fl = &rxq->ifr_fl[0];
829333338Sshurd	uint32_t refill_pidx, nic_i;
830333338Sshurd
831333338Sshurd	if (nm_i == head && __predict_true(!init))
832333338Sshurd		return 0;
833333338Sshurd	iru_init(&iru, rxq, 0 /* flid */);
834333338Sshurd	map = fl->ifl_sds.ifsd_map;
835333338Sshurd	refill_pidx = netmap_idx_k2n(kring, nm_i);
836333338Sshurd	/*
837333338Sshurd	 * IMPORTANT: we must leave one free slot in the ring,
838333338Sshurd	 * so move head back by one unit
839333338Sshurd	 */
840333338Sshurd	head = nm_prev(head, lim);
841333338Sshurd	while (nm_i != head) {
842333338Sshurd		for (int tmp_pidx = 0; tmp_pidx < IFLIB_MAX_RX_REFRESH && nm_i != head; tmp_pidx++) {
843333338Sshurd			struct netmap_slot *slot = &ring->slot[nm_i];
844333338Sshurd			void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[tmp_pidx]);
845333338Sshurd			uint32_t nic_i_dma = refill_pidx;
846333338Sshurd			nic_i = netmap_idx_k2n(kring, nm_i);
847333338Sshurd
848333338Sshurd			MPASS(tmp_pidx < IFLIB_MAX_RX_REFRESH);
849333338Sshurd
850333338Sshurd			if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
851333338Sshurd			        return netmap_ring_reinit(kring);
852333338Sshurd
853333338Sshurd			fl->ifl_vm_addrs[tmp_pidx] = addr;
854333338Sshurd			if (__predict_false(init) && map) {
855333338Sshurd				netmap_load_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr);
856333338Sshurd			} else if (map && (slot->flags & NS_BUF_CHANGED)) {
857333338Sshurd				/* buffer has changed, reload map */
858333338Sshurd				netmap_reload_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr);
859333338Sshurd			}
860333338Sshurd			slot->flags &= ~NS_BUF_CHANGED;
861333338Sshurd
862333338Sshurd			nm_i = nm_next(nm_i, lim);
863333338Sshurd			fl->ifl_rxd_idxs[tmp_pidx] = nic_i = nm_next(nic_i, lim);
864333338Sshurd			if (nm_i != head && tmp_pidx < IFLIB_MAX_RX_REFRESH-1)
865333338Sshurd				continue;
866333338Sshurd
867333338Sshurd			iru.iru_pidx = refill_pidx;
868333338Sshurd			iru.iru_count = tmp_pidx+1;
869333338Sshurd			ctx->isc_rxd_refill(ctx->ifc_softc, &iru);
870333338Sshurd
871333338Sshurd			refill_pidx = nic_i;
872333338Sshurd			if (map == NULL)
873333338Sshurd				continue;
874333338Sshurd
875333338Sshurd			for (int n = 0; n < iru.iru_count; n++) {
876333338Sshurd				bus_dmamap_sync(fl->ifl_ifdi->idi_tag, map[nic_i_dma],
877333338Sshurd						BUS_DMASYNC_PREREAD);
878333338Sshurd				/* XXX - change this to not use the netmap func*/
879333338Sshurd				nic_i_dma = nm_next(nic_i_dma, lim);
880333338Sshurd			}
881333338Sshurd		}
882333338Sshurd	}
883333338Sshurd	kring->nr_hwcur = head;
884333338Sshurd
885333338Sshurd	if (map)
886333338Sshurd		bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
887333338Sshurd				BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
888333338Sshurd	ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i);
889333338Sshurd	return (0);
890333338Sshurd}
891333338Sshurd
892300113Sscottl/*
893300113Sscottl * Reconcile kernel and user view of the transmit ring.
894300113Sscottl *
895300113Sscottl * All information is in the kring.
896300113Sscottl * Userspace wants to send packets up to the one before kring->rhead,
897300113Sscottl * kernel knows kring->nr_hwcur is the first unsent packet.
898300113Sscottl *
899300113Sscottl * Here we push packets out (as many as possible), and possibly
900300113Sscottl * reclaim buffers from previously completed transmission.
901300113Sscottl *
902300113Sscottl * The caller (netmap) guarantees that there is only one instance
903300113Sscottl * running at any time. Any interference with other driver
904300113Sscottl * methods should be handled by the individual drivers.
905300113Sscottl */
906300113Sscottlstatic int
907300113Sscottliflib_netmap_txsync(struct netmap_kring *kring, int flags)
908300113Sscottl{
909300113Sscottl	struct netmap_adapter *na = kring->na;
910300113Sscottl	struct ifnet *ifp = na->ifp;
911300113Sscottl	struct netmap_ring *ring = kring->ring;
912300113Sscottl	u_int nm_i;	/* index into the netmap ring */
913300113Sscottl	u_int nic_i;	/* index into the NIC ring */
914300113Sscottl	u_int n;
915300113Sscottl	u_int const lim = kring->nkr_num_slots - 1;
916300113Sscottl	u_int const head = kring->rhead;
917300113Sscottl	struct if_pkt_info pi;
918300113Sscottl
919300113Sscottl	/*
920300113Sscottl	 * interrupts on every tx packet are expensive so request
921300113Sscottl	 * them every half ring, or where NS_REPORT is set
922300113Sscottl	 */
923300113Sscottl	u_int report_frequency = kring->nkr_num_slots >> 1;
924300113Sscottl	/* device-specific */
925300113Sscottl	if_ctx_t ctx = ifp->if_softc;
926300113Sscottl	iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id];
927300113Sscottl
928333338Sshurd	if (txq->ift_sds.ifsd_map)
929333338Sshurd		bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map,
930333338Sshurd				BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
931300113Sscottl
932300113Sscottl
933300113Sscottl	/*
934300113Sscottl	 * First part: process new packets to send.
935300113Sscottl	 * nm_i is the current index in the netmap ring,
936300113Sscottl	 * nic_i is the corresponding index in the NIC ring.
937300113Sscottl	 *
938300113Sscottl	 * If we have packets to send (nm_i != head)
939300113Sscottl	 * iterate over the netmap ring, fetch length and update
940300113Sscottl	 * the corresponding slot in the NIC ring. Some drivers also
941300113Sscottl	 * need to update the buffer's physical address in the NIC slot
942300113Sscottl	 * even NS_BUF_CHANGED is not set (PNMB computes the addresses).
943300113Sscottl	 *
944300113Sscottl	 * The netmap_reload_map() calls is especially expensive,
945300113Sscottl	 * even when (as in this case) the tag is 0, so do only
946300113Sscottl	 * when the buffer has actually changed.
947300113Sscottl	 *
948300113Sscottl	 * If possible do not set the report/intr bit on all slots,
949300113Sscottl	 * but only a few times per ring or when NS_REPORT is set.
950300113Sscottl	 *
951300113Sscottl	 * Finally, on 10G and faster drivers, it might be useful
952300113Sscottl	 * to prefetch the next slot and txr entry.
953300113Sscottl	 */
954300113Sscottl
955333338Sshurd	nm_i = netmap_idx_n2k(kring, kring->nr_hwcur);
956333338Sshurd	pkt_info_zero(&pi);
957333338Sshurd	pi.ipi_segs = txq->ift_segs;
958333338Sshurd	pi.ipi_qsidx = kring->ring_id;
959300113Sscottl	if (nm_i != head) {	/* we have new packets to send */
960300113Sscottl		nic_i = netmap_idx_k2n(kring, nm_i);
961300113Sscottl
962300113Sscottl		__builtin_prefetch(&ring->slot[nm_i]);
963300113Sscottl		__builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i]);
964333338Sshurd		if (txq->ift_sds.ifsd_map)
965333338Sshurd			__builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]);
966300113Sscottl
967300113Sscottl		for (n = 0; nm_i != head; n++) {
968300113Sscottl			struct netmap_slot *slot = &ring->slot[nm_i];
969300113Sscottl			u_int len = slot->len;
970300113Sscottl			uint64_t paddr;
971300113Sscottl			void *addr = PNMB(na, slot, &paddr);
972300113Sscottl			int flags = (slot->flags & NS_REPORT ||
973300113Sscottl				nic_i == 0 || nic_i == report_frequency) ?
974300113Sscottl				IPI_TX_INTR : 0;
975300113Sscottl
976300113Sscottl			/* device-specific */
977333338Sshurd			pi.ipi_len = len;
978333338Sshurd			pi.ipi_segs[0].ds_addr = paddr;
979333338Sshurd			pi.ipi_segs[0].ds_len = len;
980333338Sshurd			pi.ipi_nsegs = 1;
981333338Sshurd			pi.ipi_ndescs = 0;
982300113Sscottl			pi.ipi_pidx = nic_i;
983300113Sscottl			pi.ipi_flags = flags;
984300113Sscottl
985300113Sscottl			/* Fill the slot in the NIC ring. */
986300113Sscottl			ctx->isc_txd_encap(ctx->ifc_softc, &pi);
987300113Sscottl
988300113Sscottl			/* prefetch for next round */
989300113Sscottl			__builtin_prefetch(&ring->slot[nm_i + 1]);
990300113Sscottl			__builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i + 1]);
991333338Sshurd			if (txq->ift_sds.ifsd_map) {
992333338Sshurd				__builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]);
993300113Sscottl
994333338Sshurd				NM_CHECK_ADDR_LEN(na, addr, len);
995300113Sscottl
996333338Sshurd				if (slot->flags & NS_BUF_CHANGED) {
997333338Sshurd					/* buffer has changed, reload map */
998333338Sshurd					netmap_reload_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[nic_i], addr);
999333338Sshurd				}
1000333338Sshurd				/* make sure changes to the buffer are synced */
1001333338Sshurd				bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_sds.ifsd_map[nic_i],
1002333338Sshurd						BUS_DMASYNC_PREWRITE);
1003300113Sscottl			}
1004300113Sscottl			slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
1005300113Sscottl			nm_i = nm_next(nm_i, lim);
1006300113Sscottl			nic_i = nm_next(nic_i, lim);
1007300113Sscottl		}
1008300113Sscottl		kring->nr_hwcur = head;
1009300113Sscottl
1010300113Sscottl		/* synchronize the NIC ring */
1011333338Sshurd		if (txq->ift_sds.ifsd_map)
1012333338Sshurd			bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map,
1013300113Sscottl						BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1014300113Sscottl
1015300113Sscottl		/* (re)start the tx unit up to slot nic_i (excluded) */
1016300113Sscottl		ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, nic_i);
1017300113Sscottl	}
1018300113Sscottl
1019300113Sscottl	/*
1020300113Sscottl	 * Second part: reclaim buffers for completed transmissions.
1021300113Sscottl	 */
1022300113Sscottl	if (iflib_tx_credits_update(ctx, txq)) {
1023300113Sscottl		/* some tx completed, increment avail */
1024300113Sscottl		nic_i = txq->ift_cidx_processed;
1025300113Sscottl		kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
1026300113Sscottl	}
1027300113Sscottl	return (0);
1028300113Sscottl}
1029300113Sscottl
1030300113Sscottl/*
1031300113Sscottl * Reconcile kernel and user view of the receive ring.
1032300113Sscottl * Same as for the txsync, this routine must be efficient.
1033300113Sscottl * The caller guarantees a single invocations, but races against
1034300113Sscottl * the rest of the driver should be handled here.
1035300113Sscottl *
1036300113Sscottl * On call, kring->rhead is the first packet that userspace wants
1037300113Sscottl * to keep, and kring->rcur is the wakeup point.
1038300113Sscottl * The kernel has previously reported packets up to kring->rtail.
1039300113Sscottl *
1040300113Sscottl * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective
1041300113Sscottl * of whether or not we received an interrupt.
1042300113Sscottl */
1043300113Sscottlstatic int
1044300113Sscottliflib_netmap_rxsync(struct netmap_kring *kring, int flags)
1045300113Sscottl{
1046300113Sscottl	struct netmap_adapter *na = kring->na;
1047300113Sscottl	struct netmap_ring *ring = kring->ring;
1048333338Sshurd	uint32_t nm_i;	/* index into the netmap ring */
1049333338Sshurd	uint32_t nic_i;	/* index into the NIC ring */
1050300113Sscottl	u_int i, n;
1051300113Sscottl	u_int const lim = kring->nkr_num_slots - 1;
1052333338Sshurd	u_int const head = netmap_idx_n2k(kring, kring->rhead);
1053300113Sscottl	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
1054300113Sscottl	struct if_rxd_info ri;
1055333338Sshurd
1056333338Sshurd	struct ifnet *ifp = na->ifp;
1057300113Sscottl	if_ctx_t ctx = ifp->if_softc;
1058300113Sscottl	iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id];
1059300113Sscottl	iflib_fl_t fl = rxq->ifr_fl;
1060300113Sscottl	if (head > lim)
1061300113Sscottl		return netmap_ring_reinit(kring);
1062300113Sscottl
1063300113Sscottl	/* XXX check sync modes */
1064333338Sshurd	for (i = 0, fl = rxq->ifr_fl; i < rxq->ifr_nfl; i++, fl++) {
1065333338Sshurd		if (fl->ifl_sds.ifsd_map == NULL)
1066333338Sshurd			continue;
1067300113Sscottl		bus_dmamap_sync(rxq->ifr_fl[i].ifl_desc_tag, fl->ifl_ifdi->idi_map,
1068300113Sscottl				BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1069333338Sshurd	}
1070300113Sscottl	/*
1071300113Sscottl	 * First part: import newly received packets.
1072300113Sscottl	 *
1073300113Sscottl	 * nm_i is the index of the next free slot in the netmap ring,
1074300113Sscottl	 * nic_i is the index of the next received packet in the NIC ring,
1075300113Sscottl	 * and they may differ in case if_init() has been called while
1076300113Sscottl	 * in netmap mode. For the receive ring we have
1077300113Sscottl	 *
1078300113Sscottl	 *	nic_i = rxr->next_check;
1079300113Sscottl	 *	nm_i = kring->nr_hwtail (previous)
1080300113Sscottl	 * and
1081300113Sscottl	 *	nm_i == (nic_i + kring->nkr_hwofs) % ring_size
1082300113Sscottl	 *
1083300113Sscottl	 * rxr->next_check is set to 0 on a ring reinit
1084300113Sscottl	 */
1085300113Sscottl	if (netmap_no_pendintr || force_update) {
1086300113Sscottl		int crclen = iflib_crcstrip ? 0 : 4;
1087300113Sscottl		int error, avail;
1088300113Sscottl
1089333338Sshurd		for (i = 0; i < rxq->ifr_nfl; i++) {
1090333338Sshurd			fl = &rxq->ifr_fl[i];
1091300113Sscottl			nic_i = fl->ifl_cidx;
1092300113Sscottl			nm_i = netmap_idx_n2k(kring, nic_i);
1093333338Sshurd			avail = iflib_rxd_avail(ctx, rxq, nic_i, USHRT_MAX);
1094300113Sscottl			for (n = 0; avail > 0; n++, avail--) {
1095333338Sshurd				rxd_info_zero(&ri);
1096333338Sshurd				ri.iri_frags = rxq->ifr_frags;
1097333338Sshurd				ri.iri_qsidx = kring->ring_id;
1098333338Sshurd				ri.iri_ifp = ctx->ifc_ifp;
1099333338Sshurd				ri.iri_cidx = nic_i;
1100333338Sshurd
1101300113Sscottl				error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri);
1102333338Sshurd				ring->slot[nm_i].len = error ? 0 : ri.iri_len - crclen;
1103341477Svmaffione				ring->slot[nm_i].flags = 0;
1104333338Sshurd				if (fl->ifl_sds.ifsd_map)
1105333338Sshurd					bus_dmamap_sync(fl->ifl_ifdi->idi_tag,
1106333338Sshurd							fl->ifl_sds.ifsd_map[nic_i], BUS_DMASYNC_POSTREAD);
1107300113Sscottl				nm_i = nm_next(nm_i, lim);
1108300113Sscottl				nic_i = nm_next(nic_i, lim);
1109300113Sscottl			}
1110300113Sscottl			if (n) { /* update the state variables */
1111300113Sscottl				if (netmap_no_pendintr && !force_update) {
1112300113Sscottl					/* diagnostics */
1113300113Sscottl					iflib_rx_miss ++;
1114300113Sscottl					iflib_rx_miss_bufs += n;
1115300113Sscottl				}
1116300113Sscottl				fl->ifl_cidx = nic_i;
1117333338Sshurd				kring->nr_hwtail = netmap_idx_k2n(kring, nm_i);
1118300113Sscottl			}
1119300113Sscottl			kring->nr_kflags &= ~NKR_PENDINTR;
1120300113Sscottl		}
1121300113Sscottl	}
1122300113Sscottl	/*
1123300113Sscottl	 * Second part: skip past packets that userspace has released.
1124300113Sscottl	 * (kring->nr_hwcur to head excluded),
1125300113Sscottl	 * and make the buffers available for reception.
1126300113Sscottl	 * As usual nm_i is the index in the netmap ring,
1127300113Sscottl	 * nic_i is the index in the NIC ring, and
1128300113Sscottl	 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size
1129300113Sscottl	 */
1130300113Sscottl	/* XXX not sure how this will work with multiple free lists */
1131333338Sshurd	nm_i = netmap_idx_n2k(kring, kring->nr_hwcur);
1132300113Sscottl
1133333338Sshurd	return (netmap_fl_refill(rxq, kring, nm_i, false));
1134333338Sshurd}
1135300113Sscottl
1136300113Sscottlstatic int
1137300113Sscottliflib_netmap_attach(if_ctx_t ctx)
1138300113Sscottl{
1139300113Sscottl	struct netmap_adapter na;
1140304704Sshurd	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
1141300113Sscottl
1142300113Sscottl	bzero(&na, sizeof(na));
1143300113Sscottl
1144300113Sscottl	na.ifp = ctx->ifc_ifp;
1145300113Sscottl	na.na_flags = NAF_BDG_MAYSLEEP;
1146300113Sscottl	MPASS(ctx->ifc_softc_ctx.isc_ntxqsets);
1147300113Sscottl	MPASS(ctx->ifc_softc_ctx.isc_nrxqsets);
1148300113Sscottl
1149304704Sshurd	na.num_tx_desc = scctx->isc_ntxd[0];
1150304704Sshurd	na.num_rx_desc = scctx->isc_nrxd[0];
1151300113Sscottl	na.nm_txsync = iflib_netmap_txsync;
1152300113Sscottl	na.nm_rxsync = iflib_netmap_rxsync;
1153300113Sscottl	na.nm_register = iflib_netmap_register;
1154300113Sscottl	na.num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets;
1155300113Sscottl	na.num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets;
1156300113Sscottl	return (netmap_attach(&na));
1157300113Sscottl}
1158300113Sscottl
1159300113Sscottlstatic void
1160300113Sscottliflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq)
1161300113Sscottl{
1162300113Sscottl	struct netmap_adapter *na = NA(ctx->ifc_ifp);
1163300113Sscottl	struct netmap_slot *slot;
1164300113Sscottl
1165300113Sscottl	slot = netmap_reset(na, NR_TX, txq->ift_id, 0);
1166315221Spfg	if (slot == NULL)
1167300113Sscottl		return;
1168333338Sshurd	if (txq->ift_sds.ifsd_map == NULL)
1169333338Sshurd		return;
1170300113Sscottl
1171304704Sshurd	for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) {
1172300113Sscottl
1173300113Sscottl		/*
1174300113Sscottl		 * In netmap mode, set the map for the packet buffer.
1175300113Sscottl		 * NOTE: Some drivers (not this one) also need to set
1176300113Sscottl		 * the physical buffer address in the NIC ring.
1177300113Sscottl		 * netmap_idx_n2k() maps a nic index, i, into the corresponding
1178300113Sscottl		 * netmap slot index, si
1179300113Sscottl		 */
1180341477Svmaffione		int si = netmap_idx_n2k(na->tx_rings[txq->ift_id], i);
1181300113Sscottl		netmap_load_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[i], NMB(na, slot + si));
1182300113Sscottl	}
1183300113Sscottl}
1184333338Sshurd
1185300113Sscottlstatic void
1186300113Sscottliflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq)
1187300113Sscottl{
1188300113Sscottl	struct netmap_adapter *na = NA(ctx->ifc_ifp);
1189341477Svmaffione	struct netmap_kring *kring = na->rx_rings[rxq->ifr_id];
1190300113Sscottl	struct netmap_slot *slot;
1191333338Sshurd	uint32_t nm_i;
1192300113Sscottl
1193300113Sscottl	slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0);
1194315221Spfg	if (slot == NULL)
1195300113Sscottl		return;
1196333338Sshurd	nm_i = netmap_idx_n2k(kring, 0);
1197333338Sshurd	netmap_fl_refill(rxq, kring, nm_i, true);
1198300113Sscottl}
1199300113Sscottl
1200300113Sscottl#define iflib_netmap_detach(ifp) netmap_detach(ifp)
1201300113Sscottl
1202300113Sscottl#else
1203300113Sscottl#define iflib_netmap_txq_init(ctx, txq)
1204300113Sscottl#define iflib_netmap_rxq_init(ctx, rxq)
1205300113Sscottl#define iflib_netmap_detach(ifp)
1206300113Sscottl
1207300113Sscottl#define iflib_netmap_attach(ctx) (0)
1208300113Sscottl#define netmap_rx_irq(ifp, qid, budget) (0)
1209333338Sshurd#define netmap_tx_irq(ifp, qid) do {} while (0)
1210300113Sscottl
1211300113Sscottl#endif
1212300113Sscottl
1213300113Sscottl#if defined(__i386__) || defined(__amd64__)
1214300113Sscottlstatic __inline void
1215300113Sscottlprefetch(void *x)
1216300113Sscottl{
1217300113Sscottl	__asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
1218300113Sscottl}
1219333338Sshurdstatic __inline void
1220333338Sshurdprefetch2cachelines(void *x)
1221333338Sshurd{
1222333338Sshurd	__asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
1223333338Sshurd#if (CACHE_LINE_SIZE < 128)
1224333338Sshurd	__asm volatile("prefetcht0 %0" :: "m" (*(((unsigned long *)x)+CACHE_LINE_SIZE/(sizeof(unsigned long)))));
1225333338Sshurd#endif
1226333338Sshurd}
1227300113Sscottl#else
1228300113Sscottl#define prefetch(x)
1229333338Sshurd#define prefetch2cachelines(x)
1230300113Sscottl#endif
1231300113Sscottl
1232300113Sscottlstatic void
1233333338Sshurdiru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid)
1234333338Sshurd{
1235333338Sshurd	iflib_fl_t fl;
1236333338Sshurd
1237333338Sshurd	fl = &rxq->ifr_fl[flid];
1238333338Sshurd	iru->iru_paddrs = fl->ifl_bus_addrs;
1239333338Sshurd	iru->iru_vaddrs = &fl->ifl_vm_addrs[0];
1240333338Sshurd	iru->iru_idxs = fl->ifl_rxd_idxs;
1241333338Sshurd	iru->iru_qsidx = rxq->ifr_id;
1242333338Sshurd	iru->iru_buf_size = fl->ifl_buf_size;
1243333338Sshurd	iru->iru_flidx = fl->ifl_id;
1244333338Sshurd}
1245333338Sshurd
1246333338Sshurdstatic void
1247300113Sscottl_iflib_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err)
1248300113Sscottl{
1249300113Sscottl	if (err)
1250300113Sscottl		return;
1251300113Sscottl	*(bus_addr_t *) arg = segs[0].ds_addr;
1252300113Sscottl}
1253300113Sscottl
1254300113Sscottlint
1255300113Sscottliflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags)
1256300113Sscottl{
1257300113Sscottl	int err;
1258300113Sscottl	if_shared_ctx_t sctx = ctx->ifc_sctx;
1259300113Sscottl	device_t dev = ctx->ifc_dev;
1260300113Sscottl
1261300113Sscottl	KASSERT(sctx->isc_q_align != 0, ("alignment value not initialized"));
1262300113Sscottl
1263300113Sscottl	err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
1264300113Sscottl				sctx->isc_q_align, 0,	/* alignment, bounds */
1265300113Sscottl				BUS_SPACE_MAXADDR,	/* lowaddr */
1266300113Sscottl				BUS_SPACE_MAXADDR,	/* highaddr */
1267300113Sscottl				NULL, NULL,		/* filter, filterarg */
1268300113Sscottl				size,			/* maxsize */
1269300113Sscottl				1,			/* nsegments */
1270300113Sscottl				size,			/* maxsegsize */
1271300113Sscottl				BUS_DMA_ALLOCNOW,	/* flags */
1272300113Sscottl				NULL,			/* lockfunc */
1273300113Sscottl				NULL,			/* lockarg */
1274300113Sscottl				&dma->idi_tag);
1275300113Sscottl	if (err) {
1276300113Sscottl		device_printf(dev,
1277300113Sscottl		    "%s: bus_dma_tag_create failed: %d\n",
1278300113Sscottl		    __func__, err);
1279300113Sscottl		goto fail_0;
1280300113Sscottl	}
1281300113Sscottl
1282300113Sscottl	err = bus_dmamem_alloc(dma->idi_tag, (void**) &dma->idi_vaddr,
1283300113Sscottl	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->idi_map);
1284300113Sscottl	if (err) {
1285300113Sscottl		device_printf(dev,
1286300113Sscottl		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
1287300113Sscottl		    __func__, (uintmax_t)size, err);
1288300113Sscottl		goto fail_1;
1289300113Sscottl	}
1290300113Sscottl
1291300113Sscottl	dma->idi_paddr = IF_BAD_DMA;
1292300113Sscottl	err = bus_dmamap_load(dma->idi_tag, dma->idi_map, dma->idi_vaddr,
1293300113Sscottl	    size, _iflib_dmamap_cb, &dma->idi_paddr, mapflags | BUS_DMA_NOWAIT);
1294300113Sscottl	if (err || dma->idi_paddr == IF_BAD_DMA) {
1295300113Sscottl		device_printf(dev,
1296300113Sscottl		    "%s: bus_dmamap_load failed: %d\n",
1297300113Sscottl		    __func__, err);
1298300113Sscottl		goto fail_2;
1299300113Sscottl	}
1300300113Sscottl
1301300113Sscottl	dma->idi_size = size;
1302300113Sscottl	return (0);
1303300113Sscottl
1304300113Sscottlfail_2:
1305300113Sscottl	bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map);
1306300113Sscottlfail_1:
1307300113Sscottl	bus_dma_tag_destroy(dma->idi_tag);
1308300113Sscottlfail_0:
1309300113Sscottl	dma->idi_tag = NULL;
1310300113Sscottl
1311300113Sscottl	return (err);
1312300113Sscottl}
1313300113Sscottl
1314300113Sscottlint
1315300113Sscottliflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count)
1316300113Sscottl{
1317300113Sscottl	int i, err;
1318300113Sscottl	iflib_dma_info_t *dmaiter;
1319300113Sscottl
1320300113Sscottl	dmaiter = dmalist;
1321300113Sscottl	for (i = 0; i < count; i++, dmaiter++) {
1322300113Sscottl		if ((err = iflib_dma_alloc(ctx, sizes[i], *dmaiter, mapflags)) != 0)
1323300113Sscottl			break;
1324300113Sscottl	}
1325300113Sscottl	if (err)
1326300113Sscottl		iflib_dma_free_multi(dmalist, i);
1327300113Sscottl	return (err);
1328300113Sscottl}
1329300113Sscottl
1330300113Sscottlvoid
1331300113Sscottliflib_dma_free(iflib_dma_info_t dma)
1332300113Sscottl{
1333300113Sscottl	if (dma->idi_tag == NULL)
1334300113Sscottl		return;
1335300113Sscottl	if (dma->idi_paddr != IF_BAD_DMA) {
1336300113Sscottl		bus_dmamap_sync(dma->idi_tag, dma->idi_map,
1337300113Sscottl		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1338300113Sscottl		bus_dmamap_unload(dma->idi_tag, dma->idi_map);
1339300113Sscottl		dma->idi_paddr = IF_BAD_DMA;
1340300113Sscottl	}
1341300113Sscottl	if (dma->idi_vaddr != NULL) {
1342300113Sscottl		bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map);
1343300113Sscottl		dma->idi_vaddr = NULL;
1344300113Sscottl	}
1345300113Sscottl	bus_dma_tag_destroy(dma->idi_tag);
1346300113Sscottl	dma->idi_tag = NULL;
1347300113Sscottl}
1348300113Sscottl
1349300113Sscottlvoid
1350300113Sscottliflib_dma_free_multi(iflib_dma_info_t *dmalist, int count)
1351300113Sscottl{
1352300113Sscottl	int i;
1353300113Sscottl	iflib_dma_info_t *dmaiter = dmalist;
1354300113Sscottl
1355300113Sscottl	for (i = 0; i < count; i++, dmaiter++)
1356300113Sscottl		iflib_dma_free(*dmaiter);
1357300113Sscottl}
1358300113Sscottl
1359333338Sshurd#ifdef EARLY_AP_STARTUP
1360333338Sshurdstatic const int iflib_started = 1;
1361333338Sshurd#else
1362333338Sshurd/*
1363333338Sshurd * We used to abuse the smp_started flag to decide if the queues have been
1364333338Sshurd * fully initialized (by late taskqgroup_adjust() calls in a SYSINIT()).
1365333338Sshurd * That gave bad races, since the SYSINIT() runs strictly after smp_started
1366333338Sshurd * is set.  Run a SYSINIT() strictly after that to just set a usable
1367333338Sshurd * completion flag.
1368333338Sshurd */
1369333338Sshurd
1370333338Sshurdstatic int iflib_started;
1371333338Sshurd
1372333338Sshurdstatic void
1373333338Sshurdiflib_record_started(void *arg)
1374333338Sshurd{
1375333338Sshurd	iflib_started = 1;
1376333338Sshurd}
1377333338Sshurd
1378333338SshurdSYSINIT(iflib_record_started, SI_SUB_SMP + 1, SI_ORDER_FIRST,
1379333338Sshurd	iflib_record_started, NULL);
1380333338Sshurd#endif
1381333338Sshurd
1382300113Sscottlstatic int
1383300113Sscottliflib_fast_intr(void *arg)
1384300113Sscottl{
1385300113Sscottl	iflib_filter_info_t info = arg;
1386300113Sscottl	struct grouptask *gtask = info->ifi_task;
1387333338Sshurd	if (!iflib_started)
1388333338Sshurd		return (FILTER_HANDLED);
1389300113Sscottl
1390300113Sscottl	DBG_COUNTER_INC(fast_intrs);
1391300113Sscottl	if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED)
1392300113Sscottl		return (FILTER_HANDLED);
1393300113Sscottl
1394300113Sscottl	GROUPTASK_ENQUEUE(gtask);
1395300113Sscottl	return (FILTER_HANDLED);
1396300113Sscottl}
1397300113Sscottl
1398300113Sscottlstatic int
1399333338Sshurdiflib_fast_intr_rxtx(void *arg)
1400333338Sshurd{
1401333338Sshurd	iflib_filter_info_t info = arg;
1402333338Sshurd	struct grouptask *gtask = info->ifi_task;
1403333338Sshurd	iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx;
1404333338Sshurd	if_ctx_t ctx;
1405333338Sshurd	int i, cidx;
1406333338Sshurd
1407333338Sshurd	if (!iflib_started)
1408333338Sshurd		return (FILTER_HANDLED);
1409333338Sshurd
1410333338Sshurd	DBG_COUNTER_INC(fast_intrs);
1411333338Sshurd	if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED)
1412333338Sshurd		return (FILTER_HANDLED);
1413333338Sshurd
1414333338Sshurd	for (i = 0; i < rxq->ifr_ntxqirq; i++) {
1415333338Sshurd		qidx_t txqid = rxq->ifr_txqid[i];
1416333338Sshurd
1417333338Sshurd		ctx = rxq->ifr_ctx;
1418333338Sshurd
1419333338Sshurd		if (!ctx->isc_txd_credits_update(ctx->ifc_softc, txqid, false)) {
1420333338Sshurd			IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid);
1421333338Sshurd			continue;
1422333338Sshurd		}
1423333338Sshurd		GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task);
1424333338Sshurd	}
1425333338Sshurd	if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ)
1426333338Sshurd		cidx = rxq->ifr_cq_cidx;
1427333338Sshurd	else
1428333338Sshurd		cidx = rxq->ifr_fl[0].ifl_cidx;
1429333338Sshurd	if (iflib_rxd_avail(ctx, rxq, cidx, 1))
1430333338Sshurd		GROUPTASK_ENQUEUE(gtask);
1431333338Sshurd	else
1432333338Sshurd		IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
1433333338Sshurd	return (FILTER_HANDLED);
1434333338Sshurd}
1435333338Sshurd
1436333338Sshurd
1437333338Sshurdstatic int
1438333338Sshurdiflib_fast_intr_ctx(void *arg)
1439333338Sshurd{
1440333338Sshurd	iflib_filter_info_t info = arg;
1441333338Sshurd	struct grouptask *gtask = info->ifi_task;
1442333338Sshurd
1443333338Sshurd	if (!iflib_started)
1444333338Sshurd		return (FILTER_HANDLED);
1445333338Sshurd
1446333338Sshurd	DBG_COUNTER_INC(fast_intrs);
1447333338Sshurd	if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED)
1448333338Sshurd		return (FILTER_HANDLED);
1449333338Sshurd
1450333338Sshurd	GROUPTASK_ENQUEUE(gtask);
1451333338Sshurd	return (FILTER_HANDLED);
1452333338Sshurd}
1453333338Sshurd
1454333338Sshurdstatic int
1455300113Sscottl_iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid,
1456300113Sscottl	driver_filter_t filter, driver_intr_t handler, void *arg,
1457300113Sscottl				 char *name)
1458300113Sscottl{
1459333338Sshurd	int rc, flags;
1460300113Sscottl	struct resource *res;
1461333338Sshurd	void *tag = NULL;
1462300113Sscottl	device_t dev = ctx->ifc_dev;
1463300113Sscottl
1464333338Sshurd	flags = RF_ACTIVE;
1465333338Sshurd	if (ctx->ifc_flags & IFC_LEGACY)
1466333338Sshurd		flags |= RF_SHAREABLE;
1467300113Sscottl	MPASS(rid < 512);
1468300113Sscottl	irq->ii_rid = rid;
1469333338Sshurd	res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &irq->ii_rid, flags);
1470300113Sscottl	if (res == NULL) {
1471300113Sscottl		device_printf(dev,
1472300113Sscottl		    "failed to allocate IRQ for rid %d, name %s.\n", rid, name);
1473300113Sscottl		return (ENOMEM);
1474300113Sscottl	}
1475300113Sscottl	irq->ii_res = res;
1476300113Sscottl	KASSERT(filter == NULL || handler == NULL, ("filter and handler can't both be non-NULL"));
1477300113Sscottl	rc = bus_setup_intr(dev, res, INTR_MPSAFE | INTR_TYPE_NET,
1478300113Sscottl						filter, handler, arg, &tag);
1479300113Sscottl	if (rc != 0) {
1480300113Sscottl		device_printf(dev,
1481300113Sscottl		    "failed to setup interrupt for rid %d, name %s: %d\n",
1482300113Sscottl					  rid, name ? name : "unknown", rc);
1483300113Sscottl		return (rc);
1484300113Sscottl	} else if (name)
1485306770Sjhb		bus_describe_intr(dev, res, tag, "%s", name);
1486300113Sscottl
1487300113Sscottl	irq->ii_tag = tag;
1488300113Sscottl	return (0);
1489300113Sscottl}
1490300113Sscottl
1491300113Sscottl
1492300113Sscottl/*********************************************************************
1493300113Sscottl *
1494300113Sscottl *  Allocate memory for tx_buffer structures. The tx_buffer stores all
1495300113Sscottl *  the information needed to transmit a packet on the wire. This is
1496300113Sscottl *  called only once at attach, setup is done every reset.
1497300113Sscottl *
1498300113Sscottl **********************************************************************/
1499300113Sscottl
1500300113Sscottlstatic int
1501300113Sscottliflib_txsd_alloc(iflib_txq_t txq)
1502300113Sscottl{
1503300113Sscottl	if_ctx_t ctx = txq->ift_ctx;
1504300113Sscottl	if_shared_ctx_t sctx = ctx->ifc_sctx;
1505300113Sscottl	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
1506300113Sscottl	device_t dev = ctx->ifc_dev;
1507300113Sscottl	int err, nsegments, ntsosegments;
1508300113Sscottl
1509300113Sscottl	nsegments = scctx->isc_tx_nsegments;
1510300113Sscottl	ntsosegments = scctx->isc_tx_tso_segments_max;
1511304704Sshurd	MPASS(scctx->isc_ntxd[0] > 0);
1512304704Sshurd	MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0);
1513300113Sscottl	MPASS(nsegments > 0);
1514300113Sscottl	MPASS(ntsosegments > 0);
1515300113Sscottl	/*
1516300113Sscottl	 * Setup DMA descriptor areas.
1517300113Sscottl	 */
1518300113Sscottl	if ((err = bus_dma_tag_create(bus_get_dma_tag(dev),
1519300113Sscottl			       1, 0,			/* alignment, bounds */
1520300113Sscottl			       BUS_SPACE_MAXADDR,	/* lowaddr */
1521300113Sscottl			       BUS_SPACE_MAXADDR,	/* highaddr */
1522300113Sscottl			       NULL, NULL,		/* filter, filterarg */
1523300113Sscottl			       sctx->isc_tx_maxsize,		/* maxsize */
1524300113Sscottl			       nsegments,	/* nsegments */
1525300113Sscottl			       sctx->isc_tx_maxsegsize,	/* maxsegsize */
1526300113Sscottl			       0,			/* flags */
1527300113Sscottl			       NULL,			/* lockfunc */
1528300113Sscottl			       NULL,			/* lockfuncarg */
1529300113Sscottl			       &txq->ift_desc_tag))) {
1530300113Sscottl		device_printf(dev,"Unable to allocate TX DMA tag: %d\n", err);
1531321662Sdim		device_printf(dev,"maxsize: %ju nsegments: %d maxsegsize: %ju\n",
1532321662Sdim		    (uintmax_t)sctx->isc_tx_maxsize, nsegments, (uintmax_t)sctx->isc_tx_maxsegsize);
1533300113Sscottl		goto fail;
1534300113Sscottl	}
1535300113Sscottl	if ((err = bus_dma_tag_create(bus_get_dma_tag(dev),
1536300113Sscottl			       1, 0,			/* alignment, bounds */
1537300113Sscottl			       BUS_SPACE_MAXADDR,	/* lowaddr */
1538300113Sscottl			       BUS_SPACE_MAXADDR,	/* highaddr */
1539300113Sscottl			       NULL, NULL,		/* filter, filterarg */
1540300113Sscottl			       scctx->isc_tx_tso_size_max,		/* maxsize */
1541300113Sscottl			       ntsosegments,	/* nsegments */
1542300113Sscottl			       scctx->isc_tx_tso_segsize_max,	/* maxsegsize */
1543300113Sscottl			       0,			/* flags */
1544300113Sscottl			       NULL,			/* lockfunc */
1545300113Sscottl			       NULL,			/* lockfuncarg */
1546300113Sscottl			       &txq->ift_tso_desc_tag))) {
1547300113Sscottl		device_printf(dev,"Unable to allocate TX TSO DMA tag: %d\n", err);
1548300113Sscottl
1549300113Sscottl		goto fail;
1550300113Sscottl	}
1551300113Sscottl	if (!(txq->ift_sds.ifsd_flags =
1552300113Sscottl	    (uint8_t *) malloc(sizeof(uint8_t) *
1553304704Sshurd	    scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
1554300113Sscottl		device_printf(dev, "Unable to allocate tx_buffer memory\n");
1555300113Sscottl		err = ENOMEM;
1556300113Sscottl		goto fail;
1557300113Sscottl	}
1558300113Sscottl	if (!(txq->ift_sds.ifsd_m =
1559300113Sscottl	    (struct mbuf **) malloc(sizeof(struct mbuf *) *
1560304704Sshurd	    scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
1561300113Sscottl		device_printf(dev, "Unable to allocate tx_buffer memory\n");
1562300113Sscottl		err = ENOMEM;
1563300113Sscottl		goto fail;
1564300113Sscottl	}
1565300113Sscottl
1566300113Sscottl        /* Create the descriptor buffer dma maps */
1567333338Sshurd#if defined(ACPI_DMAR) || (! (defined(__i386__) || defined(__amd64__)))
1568300113Sscottl	if ((ctx->ifc_flags & IFC_DMAR) == 0)
1569300113Sscottl		return (0);
1570300113Sscottl
1571300113Sscottl	if (!(txq->ift_sds.ifsd_map =
1572304704Sshurd	    (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
1573300113Sscottl		device_printf(dev, "Unable to allocate tx_buffer map memory\n");
1574300113Sscottl		err = ENOMEM;
1575300113Sscottl		goto fail;
1576300113Sscottl	}
1577300113Sscottl
1578304704Sshurd	for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) {
1579300113Sscottl		err = bus_dmamap_create(txq->ift_desc_tag, 0, &txq->ift_sds.ifsd_map[i]);
1580300113Sscottl		if (err != 0) {
1581300113Sscottl			device_printf(dev, "Unable to create TX DMA map\n");
1582300113Sscottl			goto fail;
1583300113Sscottl		}
1584300113Sscottl	}
1585300113Sscottl#endif
1586300113Sscottl	return (0);
1587300113Sscottlfail:
1588300113Sscottl	/* We free all, it handles case where we are in the middle */
1589300113Sscottl	iflib_tx_structures_free(ctx);
1590300113Sscottl	return (err);
1591300113Sscottl}
1592300113Sscottl
1593300113Sscottlstatic void
1594300113Sscottliflib_txsd_destroy(if_ctx_t ctx, iflib_txq_t txq, int i)
1595300113Sscottl{
1596300113Sscottl	bus_dmamap_t map;
1597300113Sscottl
1598361063Serj	if (txq->ift_sds.ifsd_map != NULL) {
1599300113Sscottl		map = txq->ift_sds.ifsd_map[i];
1600300113Sscottl		bus_dmamap_unload(txq->ift_desc_tag, map);
1601300113Sscottl		bus_dmamap_destroy(txq->ift_desc_tag, map);
1602300113Sscottl		txq->ift_sds.ifsd_map[i] = NULL;
1603300113Sscottl	}
1604300113Sscottl}
1605300113Sscottl
1606300113Sscottlstatic void
1607300113Sscottliflib_txq_destroy(iflib_txq_t txq)
1608300113Sscottl{
1609300113Sscottl	if_ctx_t ctx = txq->ift_ctx;
1610300113Sscottl
1611304704Sshurd	for (int i = 0; i < txq->ift_size; i++)
1612300113Sscottl		iflib_txsd_destroy(ctx, txq, i);
1613361058Serj
1614361058Serj	if (txq->ift_br != NULL) {
1615361058Serj		ifmp_ring_free(txq->ift_br);
1616361058Serj		txq->ift_br = NULL;
1617361058Serj	}
1618361058Serj
1619361058Serj	mtx_destroy(&txq->ift_mtx);
1620361058Serj
1621300113Sscottl	if (txq->ift_sds.ifsd_map != NULL) {
1622300113Sscottl		free(txq->ift_sds.ifsd_map, M_IFLIB);
1623300113Sscottl		txq->ift_sds.ifsd_map = NULL;
1624300113Sscottl	}
1625300113Sscottl	if (txq->ift_sds.ifsd_m != NULL) {
1626300113Sscottl		free(txq->ift_sds.ifsd_m, M_IFLIB);
1627300113Sscottl		txq->ift_sds.ifsd_m = NULL;
1628300113Sscottl	}
1629300113Sscottl	if (txq->ift_sds.ifsd_flags != NULL) {
1630300113Sscottl		free(txq->ift_sds.ifsd_flags, M_IFLIB);
1631300113Sscottl		txq->ift_sds.ifsd_flags = NULL;
1632300113Sscottl	}
1633300113Sscottl	if (txq->ift_desc_tag != NULL) {
1634300113Sscottl		bus_dma_tag_destroy(txq->ift_desc_tag);
1635300113Sscottl		txq->ift_desc_tag = NULL;
1636300113Sscottl	}
1637300113Sscottl	if (txq->ift_tso_desc_tag != NULL) {
1638300113Sscottl		bus_dma_tag_destroy(txq->ift_tso_desc_tag);
1639300113Sscottl		txq->ift_tso_desc_tag = NULL;
1640300113Sscottl	}
1641361058Serj	if (txq->ift_ifdi != NULL) {
1642361058Serj		free(txq->ift_ifdi, M_IFLIB);
1643361058Serj	}
1644300113Sscottl}
1645300113Sscottl
1646300113Sscottlstatic void
1647300113Sscottliflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i)
1648300113Sscottl{
1649300113Sscottl	struct mbuf **mp;
1650300113Sscottl
1651300113Sscottl	mp = &txq->ift_sds.ifsd_m[i];
1652300113Sscottl	if (*mp == NULL)
1653300113Sscottl		return;
1654300113Sscottl
1655300113Sscottl	if (txq->ift_sds.ifsd_map != NULL) {
1656300113Sscottl		bus_dmamap_sync(txq->ift_desc_tag,
1657300113Sscottl				txq->ift_sds.ifsd_map[i],
1658300113Sscottl				BUS_DMASYNC_POSTWRITE);
1659300113Sscottl		bus_dmamap_unload(txq->ift_desc_tag,
1660300113Sscottl				  txq->ift_sds.ifsd_map[i]);
1661300113Sscottl	}
1662304704Sshurd	m_free(*mp);
1663300113Sscottl	DBG_COUNTER_INC(tx_frees);
1664300113Sscottl	*mp = NULL;
1665300113Sscottl}
1666300113Sscottl
1667300113Sscottlstatic int
1668300113Sscottliflib_txq_setup(iflib_txq_t txq)
1669300113Sscottl{
1670300113Sscottl	if_ctx_t ctx = txq->ift_ctx;
1671304704Sshurd	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
1672300113Sscottl	iflib_dma_info_t di;
1673300113Sscottl	int i;
1674300113Sscottl
1675333338Sshurd	/* Set number of descriptors available */
1676300113Sscottl	txq->ift_qstatus = IFLIB_QUEUE_IDLE;
1677333338Sshurd	/* XXX make configurable */
1678333338Sshurd	txq->ift_update_freq = IFLIB_DEFAULT_TX_UPDATE_FREQ;
1679300113Sscottl
1680300113Sscottl	/* Reset indices */
1681333338Sshurd	txq->ift_cidx_processed = 0;
1682333338Sshurd	txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0;
1683304704Sshurd	txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset];
1684300113Sscottl
1685300113Sscottl	for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++)
1686300113Sscottl		bzero((void *)di->idi_vaddr, di->idi_size);
1687300113Sscottl
1688300113Sscottl	IFDI_TXQ_SETUP(ctx, txq->ift_id);
1689300113Sscottl	for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++)
1690300113Sscottl		bus_dmamap_sync(di->idi_tag, di->idi_map,
1691300113Sscottl						BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1692300113Sscottl	return (0);
1693300113Sscottl}
1694300113Sscottl
1695300113Sscottl/*********************************************************************
1696300113Sscottl *
1697300113Sscottl *  Allocate memory for rx_buffer structures. Since we use one
1698300113Sscottl *  rx_buffer per received packet, the maximum number of rx_buffer's
1699300113Sscottl *  that we'll need is equal to the number of receive descriptors
1700300113Sscottl *  that we've allocated.
1701300113Sscottl *
1702300113Sscottl **********************************************************************/
1703300113Sscottlstatic int
1704300113Sscottliflib_rxsd_alloc(iflib_rxq_t rxq)
1705300113Sscottl{
1706300113Sscottl	if_ctx_t ctx = rxq->ifr_ctx;
1707300113Sscottl	if_shared_ctx_t sctx = ctx->ifc_sctx;
1708304704Sshurd	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
1709300113Sscottl	device_t dev = ctx->ifc_dev;
1710300113Sscottl	iflib_fl_t fl;
1711300113Sscottl	int			err;
1712300113Sscottl
1713304704Sshurd	MPASS(scctx->isc_nrxd[0] > 0);
1714304704Sshurd	MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0);
1715300113Sscottl
1716300113Sscottl	fl = rxq->ifr_fl;
1717300113Sscottl	for (int i = 0; i <  rxq->ifr_nfl; i++, fl++) {
1718304704Sshurd		fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */
1719300113Sscottl		err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
1720300113Sscottl					 1, 0,			/* alignment, bounds */
1721300113Sscottl					 BUS_SPACE_MAXADDR,	/* lowaddr */
1722300113Sscottl					 BUS_SPACE_MAXADDR,	/* highaddr */
1723300113Sscottl					 NULL, NULL,		/* filter, filterarg */
1724300113Sscottl					 sctx->isc_rx_maxsize,	/* maxsize */
1725300113Sscottl					 sctx->isc_rx_nsegments,	/* nsegments */
1726300113Sscottl					 sctx->isc_rx_maxsegsize,	/* maxsegsize */
1727300113Sscottl					 0,			/* flags */
1728300113Sscottl					 NULL,			/* lockfunc */
1729300113Sscottl					 NULL,			/* lockarg */
1730300113Sscottl					 &fl->ifl_desc_tag);
1731300113Sscottl		if (err) {
1732300113Sscottl			device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
1733300113Sscottl				__func__, err);
1734300113Sscottl			goto fail;
1735300113Sscottl		}
1736333338Sshurd		if (!(fl->ifl_sds.ifsd_flags =
1737333338Sshurd		      (uint8_t *) malloc(sizeof(uint8_t) *
1738333338Sshurd					 scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
1739333338Sshurd			device_printf(dev, "Unable to allocate tx_buffer memory\n");
1740333338Sshurd			err = ENOMEM;
1741333338Sshurd			goto fail;
1742333338Sshurd		}
1743333338Sshurd		if (!(fl->ifl_sds.ifsd_m =
1744333338Sshurd		      (struct mbuf **) malloc(sizeof(struct mbuf *) *
1745333338Sshurd					      scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
1746333338Sshurd			device_printf(dev, "Unable to allocate tx_buffer memory\n");
1747333338Sshurd			err = ENOMEM;
1748333338Sshurd			goto fail;
1749333338Sshurd		}
1750333338Sshurd		if (!(fl->ifl_sds.ifsd_cl =
1751333338Sshurd		      (caddr_t *) malloc(sizeof(caddr_t) *
1752333338Sshurd					      scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
1753333338Sshurd			device_printf(dev, "Unable to allocate tx_buffer memory\n");
1754333338Sshurd			err = ENOMEM;
1755333338Sshurd			goto fail;
1756333338Sshurd		}
1757300113Sscottl
1758333338Sshurd		/* Create the descriptor buffer dma maps */
1759333338Sshurd#if defined(ACPI_DMAR) || (! (defined(__i386__) || defined(__amd64__)))
1760333338Sshurd		if ((ctx->ifc_flags & IFC_DMAR) == 0)
1761333338Sshurd			continue;
1762333338Sshurd
1763333338Sshurd		if (!(fl->ifl_sds.ifsd_map =
1764333338Sshurd		      (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
1765333338Sshurd			device_printf(dev, "Unable to allocate tx_buffer map memory\n");
1766333338Sshurd			err = ENOMEM;
1767333338Sshurd			goto fail;
1768333338Sshurd		}
1769333338Sshurd
1770333338Sshurd		for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++) {
1771333338Sshurd			err = bus_dmamap_create(fl->ifl_desc_tag, 0, &fl->ifl_sds.ifsd_map[i]);
1772333338Sshurd			if (err != 0) {
1773333338Sshurd				device_printf(dev, "Unable to create RX buffer DMA map\n");
1774300113Sscottl				goto fail;
1775300113Sscottl			}
1776300113Sscottl		}
1777333338Sshurd#endif
1778300113Sscottl	}
1779300113Sscottl	return (0);
1780300113Sscottl
1781300113Sscottlfail:
1782300113Sscottl	iflib_rx_structures_free(ctx);
1783300113Sscottl	return (err);
1784300113Sscottl}
1785300113Sscottl
1786300113Sscottl
1787300113Sscottl/*
1788300113Sscottl * Internal service routines
1789300113Sscottl */
1790300113Sscottl
1791300113Sscottlstruct rxq_refill_cb_arg {
1792300113Sscottl	int               error;
1793300113Sscottl	bus_dma_segment_t seg;
1794300113Sscottl	int               nseg;
1795300113Sscottl};
1796300113Sscottl
1797300113Sscottlstatic void
1798300113Sscottl_rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
1799300113Sscottl{
1800300113Sscottl	struct rxq_refill_cb_arg *cb_arg = arg;
1801300113Sscottl
1802300113Sscottl	cb_arg->error = error;
1803300113Sscottl	cb_arg->seg = segs[0];
1804300113Sscottl	cb_arg->nseg = nseg;
1805300113Sscottl}
1806300113Sscottl
1807300113Sscottl
1808300113Sscottl#ifdef ACPI_DMAR
1809300113Sscottl#define IS_DMAR(ctx) (ctx->ifc_flags & IFC_DMAR)
1810300113Sscottl#else
1811300113Sscottl#define IS_DMAR(ctx) (0)
1812300113Sscottl#endif
1813300113Sscottl
1814300113Sscottl/**
1815300113Sscottl *	rxq_refill - refill an rxq  free-buffer list
1816300113Sscottl *	@ctx: the iflib context
1817300113Sscottl *	@rxq: the free-list to refill
1818300113Sscottl *	@n: the number of new buffers to allocate
1819300113Sscottl *
1820300113Sscottl *	(Re)populate an rxq free-buffer list with up to @n new packet buffers.
1821300113Sscottl *	The caller must assure that @n does not exceed the queue's capacity.
1822300113Sscottl */
1823358272Shselaskystatic uint8_t
1824300113Sscottl_iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count)
1825300113Sscottl{
1826300113Sscottl	struct mbuf *m;
1827333338Sshurd	int idx, frag_idx = fl->ifl_fragidx;
1828333338Sshurd        int pidx = fl->ifl_pidx;
1829333338Sshurd	caddr_t cl, *sd_cl;
1830333338Sshurd	struct mbuf **sd_m;
1831333338Sshurd	uint8_t *sd_flags;
1832333338Sshurd	struct if_rxd_update iru;
1833333338Sshurd	bus_dmamap_t *sd_map;
1834300113Sscottl	int n, i = 0;
1835300113Sscottl	uint64_t bus_addr;
1836300113Sscottl	int err;
1837333338Sshurd	qidx_t credits;
1838300113Sscottl
1839333338Sshurd	sd_m = fl->ifl_sds.ifsd_m;
1840333338Sshurd	sd_map = fl->ifl_sds.ifsd_map;
1841333338Sshurd	sd_cl = fl->ifl_sds.ifsd_cl;
1842333338Sshurd	sd_flags = fl->ifl_sds.ifsd_flags;
1843333338Sshurd	idx = pidx;
1844333338Sshurd	credits = fl->ifl_credits;
1845333338Sshurd
1846300113Sscottl	n  = count;
1847300113Sscottl	MPASS(n > 0);
1848333338Sshurd	MPASS(credits + n <= fl->ifl_size);
1849300113Sscottl
1850300113Sscottl	if (pidx < fl->ifl_cidx)
1851300113Sscottl		MPASS(pidx + n <= fl->ifl_cidx);
1852333338Sshurd	if (pidx == fl->ifl_cidx && (credits < fl->ifl_size))
1853300113Sscottl		MPASS(fl->ifl_gen == 0);
1854300113Sscottl	if (pidx > fl->ifl_cidx)
1855300113Sscottl		MPASS(n <= fl->ifl_size - pidx + fl->ifl_cidx);
1856300113Sscottl
1857300113Sscottl	DBG_COUNTER_INC(fl_refills);
1858300113Sscottl	if (n > 8)
1859300113Sscottl		DBG_COUNTER_INC(fl_refills_large);
1860333338Sshurd	iru_init(&iru, fl->ifl_rxq, fl->ifl_id);
1861300113Sscottl	while (n--) {
1862300113Sscottl		/*
1863300113Sscottl		 * We allocate an uninitialized mbuf + cluster, mbuf is
1864300113Sscottl		 * initialized after rx.
1865300113Sscottl		 *
1866300113Sscottl		 * If the cluster is still set then we know a minimum sized packet was received
1867300113Sscottl		 */
1868333338Sshurd		bit_ffc_at(fl->ifl_rx_bitmap, frag_idx, fl->ifl_size,  &frag_idx);
1869333338Sshurd		if ((frag_idx < 0) || (frag_idx >= fl->ifl_size))
1870333338Sshurd                	bit_ffc(fl->ifl_rx_bitmap, fl->ifl_size, &frag_idx);
1871333338Sshurd		if ((cl = sd_cl[frag_idx]) == NULL) {
1872333338Sshurd                       if ((cl = sd_cl[frag_idx] = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL)
1873300113Sscottl				break;
1874300113Sscottl#if MEMORY_LOGGING
1875300113Sscottl			fl->ifl_cl_enqueued++;
1876300113Sscottl#endif
1877300113Sscottl		}
1878300113Sscottl		if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
1879300113Sscottl			break;
1880300113Sscottl		}
1881300113Sscottl#if MEMORY_LOGGING
1882300113Sscottl		fl->ifl_m_enqueued++;
1883300113Sscottl#endif
1884300113Sscottl
1885300113Sscottl		DBG_COUNTER_INC(rx_allocs);
1886300113Sscottl#if defined(__i386__) || defined(__amd64__)
1887300113Sscottl		if (!IS_DMAR(ctx)) {
1888300113Sscottl			bus_addr = pmap_kextract((vm_offset_t)cl);
1889300113Sscottl		} else
1890300113Sscottl#endif
1891300113Sscottl		{
1892300113Sscottl			struct rxq_refill_cb_arg cb_arg;
1893300113Sscottl			iflib_rxq_t q;
1894300113Sscottl
1895300113Sscottl			cb_arg.error = 0;
1896300113Sscottl			q = fl->ifl_rxq;
1897333338Sshurd			MPASS(sd_map != NULL);
1898333338Sshurd			MPASS(sd_map[frag_idx] != NULL);
1899333338Sshurd			err = bus_dmamap_load(fl->ifl_desc_tag, sd_map[frag_idx],
1900300113Sscottl		         cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, 0);
1901333338Sshurd			bus_dmamap_sync(fl->ifl_desc_tag, sd_map[frag_idx],
1902333338Sshurd					BUS_DMASYNC_PREREAD);
1903300113Sscottl
1904300113Sscottl			if (err != 0 || cb_arg.error) {
1905300113Sscottl				/*
1906300113Sscottl				 * !zone_pack ?
1907300113Sscottl				 */
1908300113Sscottl				if (fl->ifl_zone == zone_pack)
1909300113Sscottl					uma_zfree(fl->ifl_zone, cl);
1910300113Sscottl				m_free(m);
1911300113Sscottl				n = 0;
1912300113Sscottl				goto done;
1913300113Sscottl			}
1914300113Sscottl			bus_addr = cb_arg.seg.ds_addr;
1915300113Sscottl		}
1916333338Sshurd                bit_set(fl->ifl_rx_bitmap, frag_idx);
1917333338Sshurd		sd_flags[frag_idx] |= RX_SW_DESC_INUSE;
1918300113Sscottl
1919333338Sshurd		MPASS(sd_m[frag_idx] == NULL);
1920333338Sshurd		sd_cl[frag_idx] = cl;
1921333338Sshurd		sd_m[frag_idx] = m;
1922333338Sshurd		fl->ifl_rxd_idxs[i] = frag_idx;
1923300113Sscottl		fl->ifl_bus_addrs[i] = bus_addr;
1924300113Sscottl		fl->ifl_vm_addrs[i] = cl;
1925333338Sshurd		credits++;
1926300113Sscottl		i++;
1927333338Sshurd		MPASS(credits <= fl->ifl_size);
1928333338Sshurd		if (++idx == fl->ifl_size) {
1929300113Sscottl			fl->ifl_gen = 1;
1930333338Sshurd			idx = 0;
1931300113Sscottl		}
1932300113Sscottl		if (n == 0 || i == IFLIB_MAX_RX_REFRESH) {
1933333338Sshurd			iru.iru_pidx = pidx;
1934333338Sshurd			iru.iru_count = i;
1935333338Sshurd			ctx->isc_rxd_refill(ctx->ifc_softc, &iru);
1936300113Sscottl			i = 0;
1937333338Sshurd			pidx = idx;
1938333338Sshurd			fl->ifl_pidx = idx;
1939333338Sshurd			fl->ifl_credits = credits;
1940300113Sscottl		}
1941333338Sshurd
1942300113Sscottl	}
1943300113Sscottldone:
1944333338Sshurd	if (i) {
1945333338Sshurd		iru.iru_pidx = pidx;
1946333338Sshurd		iru.iru_count = i;
1947333338Sshurd		ctx->isc_rxd_refill(ctx->ifc_softc, &iru);
1948333338Sshurd		fl->ifl_pidx = idx;
1949333338Sshurd		fl->ifl_credits = credits;
1950333338Sshurd	}
1951300113Sscottl	DBG_COUNTER_INC(rxd_flush);
1952300113Sscottl	if (fl->ifl_pidx == 0)
1953300113Sscottl		pidx = fl->ifl_size - 1;
1954300113Sscottl	else
1955300113Sscottl		pidx = fl->ifl_pidx - 1;
1956333338Sshurd
1957333338Sshurd	if (sd_map)
1958333338Sshurd		bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
1959333338Sshurd				BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1960300113Sscottl	ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx);
1961333338Sshurd	fl->ifl_fragidx = frag_idx;
1962358272Shselasky
1963358272Shselasky	return (n == -1 ? 0 : IFLIB_RXEOF_EMPTY);
1964300113Sscottl}
1965300113Sscottl
1966358272Shselaskystatic __inline uint8_t
1967300113Sscottl__iflib_fl_refill_lt(if_ctx_t ctx, iflib_fl_t fl, int max)
1968300113Sscottl{
1969300113Sscottl	/* we avoid allowing pidx to catch up with cidx as it confuses ixl */
1970300113Sscottl	int32_t reclaimable = fl->ifl_size - fl->ifl_credits - 1;
1971300113Sscottl#ifdef INVARIANTS
1972300113Sscottl	int32_t delta = fl->ifl_size - get_inuse(fl->ifl_size, fl->ifl_cidx, fl->ifl_pidx, fl->ifl_gen) - 1;
1973300113Sscottl#endif
1974300113Sscottl
1975300113Sscottl	MPASS(fl->ifl_credits <= fl->ifl_size);
1976300113Sscottl	MPASS(reclaimable == delta);
1977300113Sscottl
1978300113Sscottl	if (reclaimable > 0)
1979358272Shselasky		return (_iflib_fl_refill(ctx, fl, min(max, reclaimable)));
1980358272Shselasky	return (0);
1981300113Sscottl}
1982300113Sscottl
1983347197Serjuint8_t
1984347197Serjiflib_in_detach(if_ctx_t ctx)
1985347197Serj{
1986347197Serj	bool in_detach;
1987347197Serj	STATE_LOCK(ctx);
1988347197Serj	in_detach = !!(ctx->ifc_flags & IFC_IN_DETACH);
1989347197Serj	STATE_UNLOCK(ctx);
1990347197Serj	return (in_detach);
1991347197Serj}
1992347197Serj
1993300113Sscottlstatic void
1994300113Sscottliflib_fl_bufs_free(iflib_fl_t fl)
1995300113Sscottl{
1996300113Sscottl	iflib_dma_info_t idi = fl->ifl_ifdi;
1997300113Sscottl	uint32_t i;
1998300113Sscottl
1999300113Sscottl	for (i = 0; i < fl->ifl_size; i++) {
2000333338Sshurd		struct mbuf **sd_m = &fl->ifl_sds.ifsd_m[i];
2001333338Sshurd		uint8_t *sd_flags = &fl->ifl_sds.ifsd_flags[i];
2002333338Sshurd		caddr_t *sd_cl = &fl->ifl_sds.ifsd_cl[i];
2003300113Sscottl
2004333338Sshurd		if (*sd_flags & RX_SW_DESC_INUSE) {
2005333338Sshurd			if (fl->ifl_sds.ifsd_map != NULL) {
2006333338Sshurd				bus_dmamap_t sd_map = fl->ifl_sds.ifsd_map[i];
2007333338Sshurd				bus_dmamap_unload(fl->ifl_desc_tag, sd_map);
2008300113Sscottl			}
2009333338Sshurd			if (*sd_m != NULL) {
2010333338Sshurd				m_init(*sd_m, M_NOWAIT, MT_DATA, 0);
2011333338Sshurd				uma_zfree(zone_mbuf, *sd_m);
2012333338Sshurd			}
2013333338Sshurd			if (*sd_cl != NULL)
2014333338Sshurd				uma_zfree(fl->ifl_zone, *sd_cl);
2015333338Sshurd			*sd_flags = 0;
2016300113Sscottl		} else {
2017333338Sshurd			MPASS(*sd_cl == NULL);
2018333338Sshurd			MPASS(*sd_m == NULL);
2019300113Sscottl		}
2020300113Sscottl#if MEMORY_LOGGING
2021300113Sscottl		fl->ifl_m_dequeued++;
2022300113Sscottl		fl->ifl_cl_dequeued++;
2023300113Sscottl#endif
2024333338Sshurd		*sd_cl = NULL;
2025333338Sshurd		*sd_m = NULL;
2026300113Sscottl	}
2027333338Sshurd#ifdef INVARIANTS
2028333338Sshurd	for (i = 0; i < fl->ifl_size; i++) {
2029333338Sshurd		MPASS(fl->ifl_sds.ifsd_flags[i] == 0);
2030333338Sshurd		MPASS(fl->ifl_sds.ifsd_cl[i] == NULL);
2031333338Sshurd		MPASS(fl->ifl_sds.ifsd_m[i] == NULL);
2032333338Sshurd	}
2033333338Sshurd#endif
2034300113Sscottl	/*
2035300113Sscottl	 * Reset free list values
2036300113Sscottl	 */
2037333338Sshurd	fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = fl->ifl_fragidx = 0;
2038300113Sscottl	bzero(idi->idi_vaddr, idi->idi_size);
2039300113Sscottl}
2040300113Sscottl
2041300113Sscottl/*********************************************************************
2042300113Sscottl *
2043300113Sscottl *  Initialize a receive ring and its buffers.
2044300113Sscottl *
2045300113Sscottl **********************************************************************/
2046300113Sscottlstatic int
2047300113Sscottliflib_fl_setup(iflib_fl_t fl)
2048300113Sscottl{
2049300113Sscottl	iflib_rxq_t rxq = fl->ifl_rxq;
2050300113Sscottl	if_ctx_t ctx = rxq->ifr_ctx;
2051300113Sscottl
2052333338Sshurd	bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size - 1);
2053300113Sscottl	/*
2054300113Sscottl	** Free current RX buffer structs and their mbufs
2055300113Sscottl	*/
2056300113Sscottl	iflib_fl_bufs_free(fl);
2057300113Sscottl	/* Now replenish the mbufs */
2058300113Sscottl	MPASS(fl->ifl_credits == 0);
2059347212Serj	fl->ifl_buf_size = ctx->ifc_rx_mbuf_sz;
2060300113Sscottl	if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size)
2061300113Sscottl		ctx->ifc_max_fl_buf_size = fl->ifl_buf_size;
2062300113Sscottl	fl->ifl_cltype = m_gettype(fl->ifl_buf_size);
2063300113Sscottl	fl->ifl_zone = m_getzone(fl->ifl_buf_size);
2064300113Sscottl
2065300113Sscottl
2066300113Sscottl	/* avoid pre-allocating zillions of clusters to an idle card
2067300113Sscottl	 * potentially speeding up attach
2068300113Sscottl	 */
2069358272Shselasky	(void) _iflib_fl_refill(ctx, fl, min(128, fl->ifl_size));
2070300113Sscottl	MPASS(min(128, fl->ifl_size) == fl->ifl_credits);
2071300113Sscottl	if (min(128, fl->ifl_size) != fl->ifl_credits)
2072300113Sscottl		return (ENOBUFS);
2073300113Sscottl	/*
2074300113Sscottl	 * handle failure
2075300113Sscottl	 */
2076300113Sscottl	MPASS(rxq != NULL);
2077300113Sscottl	MPASS(fl->ifl_ifdi != NULL);
2078300113Sscottl	bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
2079300113Sscottl	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2080300113Sscottl	return (0);
2081300113Sscottl}
2082300113Sscottl
2083300113Sscottl/*********************************************************************
2084300113Sscottl *
2085300113Sscottl *  Free receive ring data structures
2086300113Sscottl *
2087300113Sscottl **********************************************************************/
2088300113Sscottlstatic void
2089300113Sscottliflib_rx_sds_free(iflib_rxq_t rxq)
2090300113Sscottl{
2091300113Sscottl	iflib_fl_t fl;
2092300113Sscottl	int i;
2093300113Sscottl
2094300113Sscottl	if (rxq->ifr_fl != NULL) {
2095300113Sscottl		for (i = 0; i < rxq->ifr_nfl; i++) {
2096300113Sscottl			fl = &rxq->ifr_fl[i];
2097300113Sscottl			if (fl->ifl_desc_tag != NULL) {
2098300113Sscottl				bus_dma_tag_destroy(fl->ifl_desc_tag);
2099300113Sscottl				fl->ifl_desc_tag = NULL;
2100300113Sscottl			}
2101333338Sshurd			free(fl->ifl_sds.ifsd_m, M_IFLIB);
2102333338Sshurd			free(fl->ifl_sds.ifsd_cl, M_IFLIB);
2103333338Sshurd			/* XXX destroy maps first */
2104333338Sshurd			free(fl->ifl_sds.ifsd_map, M_IFLIB);
2105333338Sshurd			fl->ifl_sds.ifsd_m = NULL;
2106333338Sshurd			fl->ifl_sds.ifsd_cl = NULL;
2107333338Sshurd			fl->ifl_sds.ifsd_map = NULL;
2108300113Sscottl		}
2109300113Sscottl		free(rxq->ifr_fl, M_IFLIB);
2110300113Sscottl		rxq->ifr_fl = NULL;
2111300113Sscottl		rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0;
2112361058Serj		free(rxq->ifr_ifdi, M_IFLIB);
2113361058Serj		rxq->ifr_ifdi = NULL;
2114300113Sscottl	}
2115300113Sscottl}
2116300113Sscottl
2117300113Sscottl/*
2118300113Sscottl * MI independent logic
2119300113Sscottl *
2120300113Sscottl */
2121300113Sscottlstatic void
2122300113Sscottliflib_timer(void *arg)
2123300113Sscottl{
2124300113Sscottl	iflib_txq_t txq = arg;
2125300113Sscottl	if_ctx_t ctx = txq->ift_ctx;
2126333338Sshurd	if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
2127300113Sscottl
2128300113Sscottl	if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
2129300113Sscottl		return;
2130300113Sscottl	/*
2131300113Sscottl	** Check on the state of the TX queue(s), this
2132300113Sscottl	** can be done without the lock because its RO
2133300113Sscottl	** and the HUNG state will be static if set.
2134300113Sscottl	*/
2135300113Sscottl	IFDI_TIMER(ctx, txq->ift_id);
2136300113Sscottl	if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) &&
2137333338Sshurd	    ((txq->ift_cleaned_prev == txq->ift_cleaned) ||
2138333338Sshurd	     (sctx->isc_pause_frames == 0)))
2139300113Sscottl		goto hung;
2140300113Sscottl
2141361061Serj	if (txq->ift_qstatus != IFLIB_QUEUE_IDLE &&
2142361061Serj	    ifmp_ring_is_stalled(txq->ift_br)) {
2143361061Serj		KASSERT(ctx->ifc_link_state == LINK_STATE_UP, ("queue can't be marked as hung if interface is down"));
2144333338Sshurd		txq->ift_qstatus = IFLIB_QUEUE_HUNG;
2145361061Serj	}
2146333338Sshurd	txq->ift_cleaned_prev = txq->ift_cleaned;
2147333338Sshurd	/* handle any laggards */
2148333338Sshurd	if (txq->ift_db_pending)
2149300113Sscottl		GROUPTASK_ENQUEUE(&txq->ift_task);
2150300113Sscottl
2151333338Sshurd	sctx->isc_pause_frames = 0;
2152300113Sscottl	if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)
2153300113Sscottl		callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu);
2154300113Sscottl	return;
2155347197Serj hung:
2156300113Sscottl	device_printf(ctx->ifc_dev,  "TX(%d) desc avail = %d, pidx = %d\n",
2157300113Sscottl				  txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx);
2158347197Serj	STATE_LOCK(ctx);
2159347197Serj	if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2160347197Serj	ctx->ifc_flags |= (IFC_DO_WATCHDOG|IFC_DO_RESET);
2161333338Sshurd	iflib_admin_intr_deferred(ctx);
2162347197Serj	STATE_UNLOCK(ctx);
2163300113Sscottl}
2164300113Sscottl
2165300113Sscottlstatic void
2166347212Serjiflib_calc_rx_mbuf_sz(if_ctx_t ctx)
2167347212Serj{
2168347212Serj	if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
2169347212Serj
2170347212Serj	/*
2171347212Serj	 * XXX don't set the max_frame_size to larger
2172347212Serj	 * than the hardware can handle
2173347212Serj	 */
2174347212Serj	if (sctx->isc_max_frame_size <= MCLBYTES)
2175347212Serj		ctx->ifc_rx_mbuf_sz = MCLBYTES;
2176347212Serj	else
2177347212Serj		ctx->ifc_rx_mbuf_sz = MJUMPAGESIZE;
2178347212Serj}
2179347212Serj
2180347212Serjuint32_t
2181347212Serjiflib_get_rx_mbuf_sz(if_ctx_t ctx)
2182347212Serj{
2183347212Serj	return (ctx->ifc_rx_mbuf_sz);
2184347212Serj}
2185347212Serj
2186347212Serjstatic void
2187300113Sscottliflib_init_locked(if_ctx_t ctx)
2188300113Sscottl{
2189300113Sscottl	if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
2190333338Sshurd	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
2191300113Sscottl	if_t ifp = ctx->ifc_ifp;
2192300113Sscottl	iflib_fl_t fl;
2193300113Sscottl	iflib_txq_t txq;
2194300113Sscottl	iflib_rxq_t rxq;
2195333338Sshurd	int i, j, tx_ip_csum_flags, tx_ip6_csum_flags;
2196300113Sscottl
2197300113Sscottl
2198300113Sscottl	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2199300113Sscottl	IFDI_INTR_DISABLE(ctx);
2200300113Sscottl
2201333338Sshurd	tx_ip_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP);
2202333338Sshurd	tx_ip6_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_SCTP);
2203300113Sscottl	/* Set hardware offload abilities */
2204300113Sscottl	if_clearhwassist(ifp);
2205300113Sscottl	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
2206333338Sshurd		if_sethwassistbits(ifp, tx_ip_csum_flags, 0);
2207300113Sscottl	if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6)
2208333338Sshurd		if_sethwassistbits(ifp,  tx_ip6_csum_flags, 0);
2209300113Sscottl	if (if_getcapenable(ifp) & IFCAP_TSO4)
2210300113Sscottl		if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
2211300113Sscottl	if (if_getcapenable(ifp) & IFCAP_TSO6)
2212300113Sscottl		if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
2213300113Sscottl
2214300113Sscottl	for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) {
2215300113Sscottl		CALLOUT_LOCK(txq);
2216300113Sscottl		callout_stop(&txq->ift_timer);
2217300113Sscottl		CALLOUT_UNLOCK(txq);
2218300113Sscottl		iflib_netmap_txq_init(ctx, txq);
2219300113Sscottl	}
2220347212Serj
2221347212Serj	/*
2222347212Serj	 * Calculate a suitable Rx mbuf size prior to calling IFDI_INIT, so
2223347212Serj	 * that drivers can use the value when setting up the hardware receive
2224347212Serj	 * buffers.
2225347212Serj	 */
2226347212Serj	iflib_calc_rx_mbuf_sz(ctx);
2227347212Serj
2228304704Sshurd#ifdef INVARIANTS
2229304704Sshurd	i = if_getdrvflags(ifp);
2230304704Sshurd#endif
2231300113Sscottl	IFDI_INIT(ctx);
2232304704Sshurd	MPASS(if_getdrvflags(ifp) == i);
2233300113Sscottl	for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) {
2234333338Sshurd		/* XXX this should really be done on a per-queue basis */
2235333338Sshurd		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
2236333338Sshurd			MPASS(rxq->ifr_id == i);
2237333338Sshurd			iflib_netmap_rxq_init(ctx, rxq);
2238333338Sshurd			continue;
2239333338Sshurd		}
2240300113Sscottl		for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) {
2241300113Sscottl			if (iflib_fl_setup(fl)) {
2242300113Sscottl				device_printf(ctx->ifc_dev, "freelist setup failed - check cluster settings\n");
2243300113Sscottl				goto done;
2244300113Sscottl			}
2245300113Sscottl		}
2246300113Sscottl	}
2247338871Serjdone:
2248300113Sscottl	if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
2249300113Sscottl	IFDI_INTR_ENABLE(ctx);
2250300113Sscottl	txq = ctx->ifc_txqs;
2251300113Sscottl	for (i = 0; i < sctx->isc_ntxqsets; i++, txq++)
2252300113Sscottl		callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq,
2253300113Sscottl			txq->ift_timer.c_cpu);
2254300113Sscottl}
2255300113Sscottl
2256300113Sscottlstatic int
2257300113Sscottliflib_media_change(if_t ifp)
2258300113Sscottl{
2259300113Sscottl	if_ctx_t ctx = if_getsoftc(ifp);
2260300113Sscottl	int err;
2261300113Sscottl
2262300113Sscottl	CTX_LOCK(ctx);
2263300113Sscottl	if ((err = IFDI_MEDIA_CHANGE(ctx)) == 0)
2264300113Sscottl		iflib_init_locked(ctx);
2265300113Sscottl	CTX_UNLOCK(ctx);
2266300113Sscottl	return (err);
2267300113Sscottl}
2268300113Sscottl
2269300113Sscottlstatic void
2270300113Sscottliflib_media_status(if_t ifp, struct ifmediareq *ifmr)
2271300113Sscottl{
2272300113Sscottl	if_ctx_t ctx = if_getsoftc(ifp);
2273300113Sscottl
2274300113Sscottl	CTX_LOCK(ctx);
2275300113Sscottl	IFDI_UPDATE_ADMIN_STATUS(ctx);
2276300113Sscottl	IFDI_MEDIA_STATUS(ctx, ifmr);
2277300113Sscottl	CTX_UNLOCK(ctx);
2278300113Sscottl}
2279300113Sscottl
2280300113Sscottlstatic void
2281300113Sscottliflib_stop(if_ctx_t ctx)
2282300113Sscottl{
2283300113Sscottl	iflib_txq_t txq = ctx->ifc_txqs;
2284300113Sscottl	iflib_rxq_t rxq = ctx->ifc_rxqs;
2285300113Sscottl	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
2286300113Sscottl	iflib_dma_info_t di;
2287300113Sscottl	iflib_fl_t fl;
2288300113Sscottl	int i, j;
2289300113Sscottl
2290300113Sscottl	/* Tell the stack that the interface is no longer active */
2291300113Sscottl	if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2292300113Sscottl
2293300113Sscottl	IFDI_INTR_DISABLE(ctx);
2294333338Sshurd	DELAY(1000);
2295333338Sshurd	IFDI_STOP(ctx);
2296333338Sshurd	DELAY(1000);
2297300113Sscottl
2298333338Sshurd	iflib_debug_reset();
2299300113Sscottl	/* Wait for current tx queue users to exit to disarm watchdog timer. */
2300300113Sscottl	for (i = 0; i < scctx->isc_ntxqsets; i++, txq++) {
2301300113Sscottl		/* make sure all transmitters have completed before proceeding XXX */
2302300113Sscottl
2303333338Sshurd		CALLOUT_LOCK(txq);
2304333338Sshurd		callout_stop(&txq->ift_timer);
2305333338Sshurd		CALLOUT_UNLOCK(txq);
2306333338Sshurd
2307300113Sscottl		/* clean any enqueued buffers */
2308333338Sshurd		iflib_ifmp_purge(txq);
2309300113Sscottl		/* Free any existing tx buffers. */
2310304704Sshurd		for (j = 0; j < txq->ift_size; j++) {
2311300113Sscottl			iflib_txsd_free(ctx, txq, j);
2312300113Sscottl		}
2313300113Sscottl		txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0;
2314333338Sshurd		txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0;
2315300113Sscottl		txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0;
2316300113Sscottl		txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0;
2317300113Sscottl		txq->ift_pullups = 0;
2318333338Sshurd		ifmp_ring_reset_stats(txq->ift_br);
2319300113Sscottl		for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwtxqs; j++, di++)
2320300113Sscottl			bzero((void *)di->idi_vaddr, di->idi_size);
2321300113Sscottl	}
2322300113Sscottl	for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) {
2323300113Sscottl		/* make sure all transmitters have completed before proceeding XXX */
2324300113Sscottl
2325333338Sshurd		for (j = 0, di = rxq->ifr_ifdi; j < rxq->ifr_nfl; j++, di++)
2326300113Sscottl			bzero((void *)di->idi_vaddr, di->idi_size);
2327300113Sscottl		/* also resets the free lists pidx/cidx */
2328300113Sscottl		for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
2329300113Sscottl			iflib_fl_bufs_free(fl);
2330300113Sscottl	}
2331300113Sscottl}
2332300113Sscottl
2333333338Sshurdstatic inline caddr_t
2334333338Sshurdcalc_next_rxd(iflib_fl_t fl, int cidx)
2335300113Sscottl{
2336333338Sshurd	qidx_t size;
2337333338Sshurd	int nrxd;
2338333338Sshurd	caddr_t start, end, cur, next;
2339333338Sshurd
2340333338Sshurd	nrxd = fl->ifl_size;
2341333338Sshurd	size = fl->ifl_rxd_size;
2342333338Sshurd	start = fl->ifl_ifdi->idi_vaddr;
2343333338Sshurd
2344333338Sshurd	if (__predict_false(size == 0))
2345333338Sshurd		return (start);
2346333338Sshurd	cur = start + size*cidx;
2347333338Sshurd	end = start + size*nrxd;
2348333338Sshurd	next = CACHE_PTR_NEXT(cur);
2349333338Sshurd	return (next < end ? next : start);
2350333338Sshurd}
2351333338Sshurd
2352333338Sshurdstatic inline void
2353333338Sshurdprefetch_pkts(iflib_fl_t fl, int cidx)
2354333338Sshurd{
2355333338Sshurd	int nextptr;
2356333338Sshurd	int nrxd = fl->ifl_size;
2357333338Sshurd	caddr_t next_rxd;
2358333338Sshurd
2359333338Sshurd
2360333338Sshurd	nextptr = (cidx + CACHE_PTR_INCREMENT) & (nrxd-1);
2361333338Sshurd	prefetch(&fl->ifl_sds.ifsd_m[nextptr]);
2362333338Sshurd	prefetch(&fl->ifl_sds.ifsd_cl[nextptr]);
2363333338Sshurd	next_rxd = calc_next_rxd(fl, cidx);
2364333338Sshurd	prefetch(next_rxd);
2365333338Sshurd	prefetch(fl->ifl_sds.ifsd_m[(cidx + 1) & (nrxd-1)]);
2366333338Sshurd	prefetch(fl->ifl_sds.ifsd_m[(cidx + 2) & (nrxd-1)]);
2367333338Sshurd	prefetch(fl->ifl_sds.ifsd_m[(cidx + 3) & (nrxd-1)]);
2368333338Sshurd	prefetch(fl->ifl_sds.ifsd_m[(cidx + 4) & (nrxd-1)]);
2369333338Sshurd	prefetch(fl->ifl_sds.ifsd_cl[(cidx + 1) & (nrxd-1)]);
2370333338Sshurd	prefetch(fl->ifl_sds.ifsd_cl[(cidx + 2) & (nrxd-1)]);
2371333338Sshurd	prefetch(fl->ifl_sds.ifsd_cl[(cidx + 3) & (nrxd-1)]);
2372333338Sshurd	prefetch(fl->ifl_sds.ifsd_cl[(cidx + 4) & (nrxd-1)]);
2373333338Sshurd}
2374333338Sshurd
2375333338Sshurdstatic void
2376333338Sshurdrxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int unload, if_rxsd_t sd)
2377333338Sshurd{
2378300113Sscottl	int flid, cidx;
2379333338Sshurd	bus_dmamap_t map;
2380300113Sscottl	iflib_fl_t fl;
2381300113Sscottl	iflib_dma_info_t di;
2382333338Sshurd	int next;
2383300113Sscottl
2384333338Sshurd	map = NULL;
2385300113Sscottl	flid = irf->irf_flid;
2386300113Sscottl	cidx = irf->irf_idx;
2387300113Sscottl	fl = &rxq->ifr_fl[flid];
2388333338Sshurd	sd->ifsd_fl = fl;
2389333338Sshurd	sd->ifsd_cidx = cidx;
2390333338Sshurd	sd->ifsd_m = &fl->ifl_sds.ifsd_m[cidx];
2391333338Sshurd	sd->ifsd_cl = &fl->ifl_sds.ifsd_cl[cidx];
2392300113Sscottl	fl->ifl_credits--;
2393300113Sscottl#if MEMORY_LOGGING
2394300113Sscottl	fl->ifl_m_dequeued++;
2395300113Sscottl#endif
2396333338Sshurd	if (rxq->ifr_ctx->ifc_flags & IFC_PREFETCH)
2397333338Sshurd		prefetch_pkts(fl, cidx);
2398333338Sshurd	if (fl->ifl_sds.ifsd_map != NULL) {
2399333338Sshurd		next = (cidx + CACHE_PTR_INCREMENT) & (fl->ifl_size-1);
2400333338Sshurd		prefetch(&fl->ifl_sds.ifsd_map[next]);
2401333338Sshurd		map = fl->ifl_sds.ifsd_map[cidx];
2402333338Sshurd		di = fl->ifl_ifdi;
2403333338Sshurd		next = (cidx + CACHE_LINE_SIZE) & (fl->ifl_size-1);
2404333338Sshurd		prefetch(&fl->ifl_sds.ifsd_flags[next]);
2405333338Sshurd		bus_dmamap_sync(di->idi_tag, di->idi_map,
2406333338Sshurd				BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2407300113Sscottl
2408300113Sscottl	/* not valid assert if bxe really does SGE from non-contiguous elements */
2409333338Sshurd		MPASS(fl->ifl_cidx == cidx);
2410333338Sshurd		if (unload)
2411333338Sshurd			bus_dmamap_unload(fl->ifl_desc_tag, map);
2412333338Sshurd	}
2413333338Sshurd	fl->ifl_cidx = (fl->ifl_cidx + 1) & (fl->ifl_size-1);
2414333338Sshurd	if (__predict_false(fl->ifl_cidx == 0))
2415300113Sscottl		fl->ifl_gen = 0;
2416333338Sshurd	if (map != NULL)
2417333338Sshurd		bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
2418333338Sshurd			BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2419333338Sshurd        bit_clear(fl->ifl_rx_bitmap, cidx);
2420300113Sscottl}
2421300113Sscottl
2422300113Sscottlstatic struct mbuf *
2423333338Sshurdassemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd)
2424300113Sscottl{
2425333338Sshurd	int i, padlen , flags;
2426300113Sscottl	struct mbuf *m, *mh, *mt;
2427300113Sscottl	caddr_t cl;
2428300113Sscottl
2429300113Sscottl	i = 0;
2430304704Sshurd	mh = NULL;
2431300113Sscottl	do {
2432333338Sshurd		rxd_frag_to_sd(rxq, &ri->iri_frags[i], TRUE, sd);
2433300113Sscottl
2434333338Sshurd		MPASS(*sd->ifsd_cl != NULL);
2435333338Sshurd		MPASS(*sd->ifsd_m != NULL);
2436304704Sshurd
2437304704Sshurd		/* Don't include zero-length frags */
2438304704Sshurd		if (ri->iri_frags[i].irf_len == 0) {
2439304704Sshurd			/* XXX we can save the cluster here, but not the mbuf */
2440333338Sshurd			m_init(*sd->ifsd_m, M_NOWAIT, MT_DATA, 0);
2441333338Sshurd			m_free(*sd->ifsd_m);
2442333338Sshurd			*sd->ifsd_m = NULL;
2443304704Sshurd			continue;
2444304704Sshurd		}
2445333338Sshurd		m = *sd->ifsd_m;
2446333338Sshurd		*sd->ifsd_m = NULL;
2447304704Sshurd		if (mh == NULL) {
2448300113Sscottl			flags = M_PKTHDR|M_EXT;
2449300113Sscottl			mh = mt = m;
2450300113Sscottl			padlen = ri->iri_pad;
2451300113Sscottl		} else {
2452300113Sscottl			flags = M_EXT;
2453300113Sscottl			mt->m_next = m;
2454300113Sscottl			mt = m;
2455300113Sscottl			/* assuming padding is only on the first fragment */
2456300113Sscottl			padlen = 0;
2457300113Sscottl		}
2458333338Sshurd		cl = *sd->ifsd_cl;
2459333338Sshurd		*sd->ifsd_cl = NULL;
2460300113Sscottl
2461300113Sscottl		/* Can these two be made one ? */
2462300113Sscottl		m_init(m, M_NOWAIT, MT_DATA, flags);
2463333338Sshurd		m_cljset(m, cl, sd->ifsd_fl->ifl_cltype);
2464300113Sscottl		/*
2465300113Sscottl		 * These must follow m_init and m_cljset
2466300113Sscottl		 */
2467300113Sscottl		m->m_data += padlen;
2468300113Sscottl		ri->iri_len -= padlen;
2469304704Sshurd		m->m_len = ri->iri_frags[i].irf_len;
2470300113Sscottl	} while (++i < ri->iri_nfrags);
2471300113Sscottl
2472300113Sscottl	return (mh);
2473300113Sscottl}
2474300113Sscottl
2475300113Sscottl/*
2476300113Sscottl * Process one software descriptor
2477300113Sscottl */
2478300113Sscottlstatic struct mbuf *
2479300113Sscottliflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri)
2480300113Sscottl{
2481333338Sshurd	struct if_rxsd sd;
2482300113Sscottl	struct mbuf *m;
2483300113Sscottl
2484300113Sscottl	/* should I merge this back in now that the two paths are basically duplicated? */
2485304704Sshurd	if (ri->iri_nfrags == 1 &&
2486331673Smarkj	    ri->iri_frags[0].irf_len <= MIN(IFLIB_RX_COPY_THRESH, MHLEN)) {
2487333338Sshurd		rxd_frag_to_sd(rxq, &ri->iri_frags[0], FALSE, &sd);
2488333338Sshurd		m = *sd.ifsd_m;
2489333338Sshurd		*sd.ifsd_m = NULL;
2490300113Sscottl		m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR);
2491333338Sshurd#ifndef __NO_STRICT_ALIGNMENT
2492333338Sshurd		if (!IP_ALIGNED(m))
2493333338Sshurd			m->m_data += 2;
2494333338Sshurd#endif
2495333338Sshurd		memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len);
2496304704Sshurd		m->m_len = ri->iri_frags[0].irf_len;
2497300113Sscottl       } else {
2498333338Sshurd		m = assemble_segments(rxq, ri, &sd);
2499300113Sscottl	}
2500300113Sscottl	m->m_pkthdr.len = ri->iri_len;
2501300113Sscottl	m->m_pkthdr.rcvif = ri->iri_ifp;
2502300113Sscottl	m->m_flags |= ri->iri_flags;
2503300113Sscottl	m->m_pkthdr.ether_vtag = ri->iri_vtag;
2504300113Sscottl	m->m_pkthdr.flowid = ri->iri_flowid;
2505300113Sscottl	M_HASHTYPE_SET(m, ri->iri_rsstype);
2506300113Sscottl	m->m_pkthdr.csum_flags = ri->iri_csum_flags;
2507300113Sscottl	m->m_pkthdr.csum_data = ri->iri_csum_data;
2508300113Sscottl	return (m);
2509300113Sscottl}
2510300113Sscottl
2511333338Sshurd#if defined(INET6) || defined(INET)
2512333338Sshurdstatic void
2513333338Sshurdiflib_get_ip_forwarding(struct lro_ctrl *lc, bool *v4, bool *v6)
2514333338Sshurd{
2515333338Sshurd	CURVNET_SET(lc->ifp->if_vnet);
2516333338Sshurd#if defined(INET6)
2517333338Sshurd	*v6 = VNET(ip6_forwarding);
2518333338Sshurd#endif
2519333338Sshurd#if defined(INET)
2520333338Sshurd	*v4 = VNET(ipforwarding);
2521333338Sshurd#endif
2522333338Sshurd	CURVNET_RESTORE();
2523333338Sshurd}
2524333338Sshurd
2525333338Sshurd/*
2526333338Sshurd * Returns true if it's possible this packet could be LROed.
2527333338Sshurd * if it returns false, it is guaranteed that tcp_lro_rx()
2528333338Sshurd * would not return zero.
2529333338Sshurd */
2530300113Sscottlstatic bool
2531333338Sshurdiflib_check_lro_possible(struct mbuf *m, bool v4_forwarding, bool v6_forwarding)
2532300113Sscottl{
2533333338Sshurd	struct ether_header *eh;
2534333338Sshurd	uint16_t eh_type;
2535333338Sshurd
2536333338Sshurd	eh = mtod(m, struct ether_header *);
2537333338Sshurd	eh_type = ntohs(eh->ether_type);
2538333338Sshurd	switch (eh_type) {
2539333338Sshurd#if defined(INET6)
2540333338Sshurd		case ETHERTYPE_IPV6:
2541333338Sshurd			return !v6_forwarding;
2542333338Sshurd#endif
2543333338Sshurd#if defined (INET)
2544333338Sshurd		case ETHERTYPE_IP:
2545333338Sshurd			return !v4_forwarding;
2546333338Sshurd#endif
2547333338Sshurd	}
2548333338Sshurd
2549333338Sshurd	return false;
2550333338Sshurd}
2551333338Sshurd#else
2552333338Sshurdstatic void
2553333338Sshurdiflib_get_ip_forwarding(struct lro_ctrl *lc __unused, bool *v4 __unused, bool *v6 __unused)
2554333338Sshurd{
2555333338Sshurd}
2556333338Sshurd#endif
2557333338Sshurd
2558358272Shselaskystatic void
2559358272Shselasky_task_fn_rx_watchdog(void *context)
2560358272Shselasky{
2561358272Shselasky	iflib_rxq_t rxq = context;
2562358272Shselasky
2563358272Shselasky	GROUPTASK_ENQUEUE(&rxq->ifr_task);
2564358272Shselasky}
2565358272Shselasky
2566358272Shselaskystatic uint8_t
2567333338Sshurdiflib_rxeof(iflib_rxq_t rxq, qidx_t budget)
2568333338Sshurd{
2569300113Sscottl	if_ctx_t ctx = rxq->ifr_ctx;
2570300113Sscottl	if_shared_ctx_t sctx = ctx->ifc_sctx;
2571304704Sshurd	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
2572300113Sscottl	int avail, i;
2573333338Sshurd	qidx_t *cidxp;
2574300113Sscottl	struct if_rxd_info ri;
2575300113Sscottl	int err, budget_left, rx_bytes, rx_pkts;
2576300113Sscottl	iflib_fl_t fl;
2577300113Sscottl	struct ifnet *ifp;
2578300113Sscottl	int lro_enabled;
2579333338Sshurd	bool lro_possible = false;
2580333338Sshurd	bool v4_forwarding, v6_forwarding;
2581358272Shselasky	uint8_t retval = 0;
2582333338Sshurd
2583300113Sscottl	/*
2584300113Sscottl	 * XXX early demux data packets so that if_input processing only handles
2585300113Sscottl	 * acks in interrupt context
2586300113Sscottl	 */
2587333338Sshurd	struct mbuf *m, *mh, *mt, *mf;
2588300113Sscottl
2589333338Sshurd	ifp = ctx->ifc_ifp;
2590300113Sscottl	mh = mt = NULL;
2591300113Sscottl	MPASS(budget > 0);
2592300113Sscottl	rx_pkts	= rx_bytes = 0;
2593304704Sshurd	if (sctx->isc_flags & IFLIB_HAS_RXCQ)
2594300113Sscottl		cidxp = &rxq->ifr_cq_cidx;
2595300113Sscottl	else
2596300113Sscottl		cidxp = &rxq->ifr_fl[0].ifl_cidx;
2597304704Sshurd	if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) {
2598300113Sscottl		for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++)
2599358272Shselasky			retval |= __iflib_fl_refill_lt(ctx, fl, budget + 8);
2600300113Sscottl		DBG_COUNTER_INC(rx_unavail);
2601358272Shselasky		return (retval);
2602300113Sscottl	}
2603300113Sscottl
2604300113Sscottl	for (budget_left = budget; (budget_left > 0) && (avail > 0); budget_left--, avail--) {
2605300113Sscottl		if (__predict_false(!CTX_ACTIVE(ctx))) {
2606300113Sscottl			DBG_COUNTER_INC(rx_ctx_inactive);
2607300113Sscottl			break;
2608300113Sscottl		}
2609300113Sscottl		/*
2610300113Sscottl		 * Reset client set fields to their default values
2611300113Sscottl		 */
2612333338Sshurd		rxd_info_zero(&ri);
2613300113Sscottl		ri.iri_qsidx = rxq->ifr_id;
2614300113Sscottl		ri.iri_cidx = *cidxp;
2615333338Sshurd		ri.iri_ifp = ifp;
2616300113Sscottl		ri.iri_frags = rxq->ifr_frags;
2617300113Sscottl		err = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri);
2618300113Sscottl
2619333338Sshurd		if (err)
2620333338Sshurd			goto err;
2621304704Sshurd		if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
2622304704Sshurd			*cidxp = ri.iri_cidx;
2623304704Sshurd			/* Update our consumer index */
2624333338Sshurd			/* XXX NB: shurd - check if this is still safe */
2625304704Sshurd			while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) {
2626304704Sshurd				rxq->ifr_cq_cidx -= scctx->isc_nrxd[0];
2627300113Sscottl				rxq->ifr_cq_gen = 0;
2628300113Sscottl			}
2629300113Sscottl			/* was this only a completion queue message? */
2630300113Sscottl			if (__predict_false(ri.iri_nfrags == 0))
2631300113Sscottl				continue;
2632300113Sscottl		}
2633300113Sscottl		MPASS(ri.iri_nfrags != 0);
2634300113Sscottl		MPASS(ri.iri_len != 0);
2635300113Sscottl
2636300113Sscottl		/* will advance the cidx on the corresponding free lists */
2637300113Sscottl		m = iflib_rxd_pkt_get(rxq, &ri);
2638300113Sscottl		if (avail == 0 && budget_left)
2639304704Sshurd			avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left);
2640300113Sscottl
2641300113Sscottl		if (__predict_false(m == NULL)) {
2642300113Sscottl			DBG_COUNTER_INC(rx_mbuf_null);
2643300113Sscottl			continue;
2644300113Sscottl		}
2645300113Sscottl		/* imm_pkt: -- cxgb */
2646300113Sscottl		if (mh == NULL)
2647300113Sscottl			mh = mt = m;
2648300113Sscottl		else {
2649300113Sscottl			mt->m_nextpkt = m;
2650300113Sscottl			mt = m;
2651300113Sscottl		}
2652300113Sscottl	}
2653300113Sscottl	/* make sure that we can refill faster than drain */
2654300113Sscottl	for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++)
2655358272Shselasky		retval |= __iflib_fl_refill_lt(ctx, fl, budget + 8);
2656300113Sscottl
2657300113Sscottl	lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO);
2658333338Sshurd	if (lro_enabled)
2659333338Sshurd		iflib_get_ip_forwarding(&rxq->ifr_lc, &v4_forwarding, &v6_forwarding);
2660333338Sshurd	mt = mf = NULL;
2661300113Sscottl	while (mh != NULL) {
2662300113Sscottl		m = mh;
2663300113Sscottl		mh = mh->m_nextpkt;
2664300113Sscottl		m->m_nextpkt = NULL;
2665333338Sshurd#ifndef __NO_STRICT_ALIGNMENT
2666333338Sshurd		if (!IP_ALIGNED(m) && (m = iflib_fixup_rx(m)) == NULL)
2667333338Sshurd			continue;
2668333338Sshurd#endif
2669300113Sscottl		rx_bytes += m->m_pkthdr.len;
2670300113Sscottl		rx_pkts++;
2671300147Sbz#if defined(INET6) || defined(INET)
2672333338Sshurd		if (lro_enabled) {
2673333338Sshurd			if (!lro_possible) {
2674333338Sshurd				lro_possible = iflib_check_lro_possible(m, v4_forwarding, v6_forwarding);
2675333338Sshurd				if (lro_possible && mf != NULL) {
2676333338Sshurd					ifp->if_input(ifp, mf);
2677333338Sshurd					DBG_COUNTER_INC(rx_if_input);
2678333338Sshurd					mt = mf = NULL;
2679333338Sshurd				}
2680333338Sshurd			}
2681333338Sshurd			if ((m->m_pkthdr.csum_flags & (CSUM_L4_CALC|CSUM_L4_VALID)) ==
2682333338Sshurd			    (CSUM_L4_CALC|CSUM_L4_VALID)) {
2683333338Sshurd				if (lro_possible && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0)
2684333338Sshurd					continue;
2685333338Sshurd			}
2686333338Sshurd		}
2687333338Sshurd#endif
2688333338Sshurd		if (lro_possible) {
2689333338Sshurd			ifp->if_input(ifp, m);
2690333338Sshurd			DBG_COUNTER_INC(rx_if_input);
2691300113Sscottl			continue;
2692333338Sshurd		}
2693333338Sshurd
2694333338Sshurd		if (mf == NULL)
2695333338Sshurd			mf = m;
2696333338Sshurd		if (mt != NULL)
2697333338Sshurd			mt->m_nextpkt = m;
2698333338Sshurd		mt = m;
2699333338Sshurd	}
2700333338Sshurd	if (mf != NULL) {
2701333338Sshurd		ifp->if_input(ifp, mf);
2702300113Sscottl		DBG_COUNTER_INC(rx_if_input);
2703300113Sscottl	}
2704304704Sshurd
2705300113Sscottl	if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes);
2706300113Sscottl	if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts);
2707300113Sscottl
2708300113Sscottl	/*
2709300113Sscottl	 * Flush any outstanding LRO work
2710300113Sscottl	 */
2711300147Sbz#if defined(INET6) || defined(INET)
2712304704Sshurd	tcp_lro_flush_all(&rxq->ifr_lc);
2713300147Sbz#endif
2714358272Shselasky	if (avail != 0 || iflib_rxd_avail(ctx, rxq, *cidxp, 1) != 0)
2715358272Shselasky		retval |= IFLIB_RXEOF_MORE;
2716358272Shselasky	return (retval);
2717333338Sshurderr:
2718347197Serj	STATE_LOCK(ctx);
2719333338Sshurd	ctx->ifc_flags |= IFC_DO_RESET;
2720333338Sshurd	iflib_admin_intr_deferred(ctx);
2721347197Serj	STATE_UNLOCK(ctx);
2722358272Shselasky	return (0);
2723300113Sscottl}
2724300113Sscottl
2725333338Sshurd#define TXD_NOTIFY_COUNT(txq) (((txq)->ift_size / (txq)->ift_update_freq)-1)
2726333338Sshurdstatic inline qidx_t
2727333338Sshurdtxq_max_db_deferred(iflib_txq_t txq, qidx_t in_use)
2728333338Sshurd{
2729333338Sshurd	qidx_t notify_count = TXD_NOTIFY_COUNT(txq);
2730333338Sshurd	qidx_t minthresh = txq->ift_size / 8;
2731333338Sshurd	if (in_use > 4*minthresh)
2732333338Sshurd		return (notify_count);
2733333338Sshurd	if (in_use > 2*minthresh)
2734333338Sshurd		return (notify_count >> 1);
2735333338Sshurd	if (in_use > minthresh)
2736333338Sshurd		return (notify_count >> 3);
2737333338Sshurd	return (0);
2738333338Sshurd}
2739333338Sshurd
2740333338Sshurdstatic inline qidx_t
2741333338Sshurdtxq_max_rs_deferred(iflib_txq_t txq)
2742333338Sshurd{
2743333338Sshurd	qidx_t notify_count = TXD_NOTIFY_COUNT(txq);
2744333338Sshurd	qidx_t minthresh = txq->ift_size / 8;
2745333338Sshurd	if (txq->ift_in_use > 4*minthresh)
2746333338Sshurd		return (notify_count);
2747333338Sshurd	if (txq->ift_in_use > 2*minthresh)
2748333338Sshurd		return (notify_count >> 1);
2749333338Sshurd	if (txq->ift_in_use > minthresh)
2750333338Sshurd		return (notify_count >> 2);
2751333338Sshurd	return (2);
2752333338Sshurd}
2753333338Sshurd
2754300113Sscottl#define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags)
2755300113Sscottl#define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG)
2756333338Sshurd
2757333338Sshurd#define TXQ_MAX_DB_DEFERRED(txq, in_use) txq_max_db_deferred((txq), (in_use))
2758333338Sshurd#define TXQ_MAX_RS_DEFERRED(txq) txq_max_rs_deferred(txq)
2759304704Sshurd#define TXQ_MAX_DB_CONSUMED(size) (size >> 4)
2760300113Sscottl
2761333338Sshurd/* forward compatibility for cxgb */
2762333338Sshurd#define FIRST_QSET(ctx) 0
2763333338Sshurd#define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets)
2764333338Sshurd#define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets)
2765333338Sshurd#define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx))
2766333338Sshurd#define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments))
2767333338Sshurd
2768333338Sshurd/* XXX we should be setting this to something other than zero */
2769333338Sshurd#define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh)
2770333338Sshurd#define MAX_TX_DESC(ctx) ((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max)
2771333338Sshurd
2772333338Sshurdstatic inline bool
2773333338Sshurdiflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring, qidx_t in_use)
2774300113Sscottl{
2775333338Sshurd	qidx_t dbval, max;
2776333338Sshurd	bool rang;
2777300113Sscottl
2778333338Sshurd	rang = false;
2779333338Sshurd	max = TXQ_MAX_DB_DEFERRED(txq, in_use);
2780333338Sshurd	if (ring || txq->ift_db_pending >= max) {
2781300113Sscottl		dbval = txq->ift_npending ? txq->ift_npending : txq->ift_pidx;
2782300113Sscottl		ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, dbval);
2783300113Sscottl		txq->ift_db_pending = txq->ift_npending = 0;
2784333338Sshurd		rang = true;
2785300113Sscottl	}
2786333338Sshurd	return (rang);
2787300113Sscottl}
2788300113Sscottl
2789300113Sscottl#ifdef PKT_DEBUG
2790300113Sscottlstatic void
2791300113Sscottlprint_pkt(if_pkt_info_t pi)
2792300113Sscottl{
2793300113Sscottl	printf("pi len:  %d qsidx: %d nsegs: %d ndescs: %d flags: %x pidx: %d\n",
2794300113Sscottl	       pi->ipi_len, pi->ipi_qsidx, pi->ipi_nsegs, pi->ipi_ndescs, pi->ipi_flags, pi->ipi_pidx);
2795300113Sscottl	printf("pi new_pidx: %d csum_flags: %lx tso_segsz: %d mflags: %x vtag: %d\n",
2796300113Sscottl	       pi->ipi_new_pidx, pi->ipi_csum_flags, pi->ipi_tso_segsz, pi->ipi_mflags, pi->ipi_vtag);
2797300113Sscottl	printf("pi etype: %d ehdrlen: %d ip_hlen: %d ipproto: %d\n",
2798300113Sscottl	       pi->ipi_etype, pi->ipi_ehdrlen, pi->ipi_ip_hlen, pi->ipi_ipproto);
2799300113Sscottl}
2800300113Sscottl#endif
2801300113Sscottl
2802300113Sscottl#define IS_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_IP_TSO)
2803338871Serj#define IS_TX_OFFLOAD4(pi) ((pi)->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP_TSO))
2804300113Sscottl#define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO)
2805338871Serj#define IS_TX_OFFLOAD6(pi) ((pi)->ipi_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_TSO))
2806300113Sscottl
2807300113Sscottlstatic int
2808300113Sscottliflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
2809300113Sscottl{
2810333338Sshurd	if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx;
2811300113Sscottl	struct ether_vlan_header *eh;
2812304704Sshurd	struct mbuf *m, *n;
2813300113Sscottl
2814304704Sshurd	n = m = *mp;
2815333338Sshurd	if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) &&
2816333338Sshurd	    M_WRITABLE(m) == 0) {
2817333338Sshurd		if ((m = m_dup(m, M_NOWAIT)) == NULL) {
2818333338Sshurd			return (ENOMEM);
2819333338Sshurd		} else {
2820333338Sshurd			m_freem(*mp);
2821333338Sshurd			n = *mp = m;
2822333338Sshurd		}
2823333338Sshurd	}
2824333338Sshurd
2825300113Sscottl	/*
2826300113Sscottl	 * Determine where frame payload starts.
2827300113Sscottl	 * Jump over vlan headers if already present,
2828300113Sscottl	 * helpful for QinQ too.
2829300113Sscottl	 */
2830300113Sscottl	if (__predict_false(m->m_len < sizeof(*eh))) {
2831300113Sscottl		txq->ift_pullups++;
2832300113Sscottl		if (__predict_false((m = m_pullup(m, sizeof(*eh))) == NULL))
2833300113Sscottl			return (ENOMEM);
2834300113Sscottl	}
2835300113Sscottl	eh = mtod(m, struct ether_vlan_header *);
2836300113Sscottl	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2837300113Sscottl		pi->ipi_etype = ntohs(eh->evl_proto);
2838300113Sscottl		pi->ipi_ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2839300113Sscottl	} else {
2840300113Sscottl		pi->ipi_etype = ntohs(eh->evl_encap_proto);
2841300113Sscottl		pi->ipi_ehdrlen = ETHER_HDR_LEN;
2842300113Sscottl	}
2843300113Sscottl
2844300113Sscottl	switch (pi->ipi_etype) {
2845300113Sscottl#ifdef INET
2846300113Sscottl	case ETHERTYPE_IP:
2847300113Sscottl	{
2848300113Sscottl		struct ip *ip = NULL;
2849300113Sscottl		struct tcphdr *th = NULL;
2850300113Sscottl		int minthlen;
2851300113Sscottl
2852300113Sscottl		minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th));
2853300113Sscottl		if (__predict_false(m->m_len < minthlen)) {
2854300113Sscottl			/*
2855300113Sscottl			 * if this code bloat is causing too much of a hit
2856300113Sscottl			 * move it to a separate function and mark it noinline
2857300113Sscottl			 */
2858300113Sscottl			if (m->m_len == pi->ipi_ehdrlen) {
2859300113Sscottl				n = m->m_next;
2860300113Sscottl				MPASS(n);
2861300113Sscottl				if (n->m_len >= sizeof(*ip))  {
2862300113Sscottl					ip = (struct ip *)n->m_data;
2863300113Sscottl					if (n->m_len >= (ip->ip_hl << 2) + sizeof(*th))
2864300113Sscottl						th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
2865300113Sscottl				} else {
2866300113Sscottl					txq->ift_pullups++;
2867300113Sscottl					if (__predict_false((m = m_pullup(m, minthlen)) == NULL))
2868300113Sscottl						return (ENOMEM);
2869300113Sscottl					ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
2870300113Sscottl				}
2871300113Sscottl			} else {
2872300113Sscottl				txq->ift_pullups++;
2873300113Sscottl				if (__predict_false((m = m_pullup(m, minthlen)) == NULL))
2874300113Sscottl					return (ENOMEM);
2875300113Sscottl				ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
2876300113Sscottl				if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th))
2877300113Sscottl					th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
2878300113Sscottl			}
2879300113Sscottl		} else {
2880300113Sscottl			ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
2881300113Sscottl			if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th))
2882300113Sscottl				th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
2883300113Sscottl		}
2884300113Sscottl		pi->ipi_ip_hlen = ip->ip_hl << 2;
2885300113Sscottl		pi->ipi_ipproto = ip->ip_p;
2886300113Sscottl		pi->ipi_flags |= IPI_TX_IPV4;
2887300113Sscottl
2888338871Serj		/* TCP checksum offload may require TCP header length */
2889338871Serj		if (IS_TX_OFFLOAD4(pi)) {
2890338871Serj			if (__predict_true(pi->ipi_ipproto == IPPROTO_TCP)) {
2891333338Sshurd				if (__predict_false(th == NULL)) {
2892333338Sshurd					txq->ift_pullups++;
2893333338Sshurd					if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL))
2894333338Sshurd						return (ENOMEM);
2895333338Sshurd					th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen);
2896333338Sshurd				}
2897333338Sshurd				pi->ipi_tcp_hflags = th->th_flags;
2898333338Sshurd				pi->ipi_tcp_hlen = th->th_off << 2;
2899333338Sshurd				pi->ipi_tcp_seq = th->th_seq;
2900300113Sscottl			}
2901338871Serj			if (IS_TSO4(pi)) {
2902338871Serj				if (__predict_false(ip->ip_p != IPPROTO_TCP))
2903338871Serj					return (ENXIO);
2904347208Serj				/*
2905347208Serj				 * TSO always requires hardware checksum offload.
2906347208Serj				 */
2907347208Serj				pi->ipi_csum_flags |= (CSUM_IP_TCP | CSUM_IP);
2908338871Serj				th->th_sum = in_pseudo(ip->ip_src.s_addr,
2909338871Serj						       ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2910338871Serj				pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz;
2911338871Serj				if (sctx->isc_flags & IFLIB_TSO_INIT_IP) {
2912338871Serj					ip->ip_sum = 0;
2913338871Serj					ip->ip_len = htons(pi->ipi_ip_hlen + pi->ipi_tcp_hlen + pi->ipi_tso_segsz);
2914338871Serj				}
2915333338Sshurd			}
2916300113Sscottl		}
2917347208Serj		if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_IP))
2918347208Serj                       ip->ip_sum = 0;
2919347208Serj
2920300113Sscottl		break;
2921300113Sscottl	}
2922300113Sscottl#endif
2923300113Sscottl#ifdef INET6
2924300113Sscottl	case ETHERTYPE_IPV6:
2925300113Sscottl	{
2926300113Sscottl		struct ip6_hdr *ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen);
2927300113Sscottl		struct tcphdr *th;
2928300113Sscottl		pi->ipi_ip_hlen = sizeof(struct ip6_hdr);
2929300113Sscottl
2930300113Sscottl		if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) {
2931300113Sscottl			if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL))
2932300113Sscottl				return (ENOMEM);
2933300113Sscottl		}
2934300113Sscottl		th = (struct tcphdr *)((caddr_t)ip6 + pi->ipi_ip_hlen);
2935300113Sscottl
2936300113Sscottl		/* XXX-BZ this will go badly in case of ext hdrs. */
2937300113Sscottl		pi->ipi_ipproto = ip6->ip6_nxt;
2938300113Sscottl		pi->ipi_flags |= IPI_TX_IPV6;
2939300113Sscottl
2940338871Serj		/* TCP checksum offload may require TCP header length */
2941338871Serj		if (IS_TX_OFFLOAD6(pi)) {
2942333338Sshurd			if (pi->ipi_ipproto == IPPROTO_TCP) {
2943333338Sshurd				if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) {
2944338871Serj					txq->ift_pullups++;
2945333338Sshurd					if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL))
2946333338Sshurd						return (ENOMEM);
2947333338Sshurd				}
2948333338Sshurd				pi->ipi_tcp_hflags = th->th_flags;
2949333338Sshurd				pi->ipi_tcp_hlen = th->th_off << 2;
2950338871Serj				pi->ipi_tcp_seq = th->th_seq;
2951300113Sscottl			}
2952338871Serj			if (IS_TSO6(pi)) {
2953338871Serj				if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP))
2954338871Serj					return (ENXIO);
2955338871Serj				/*
2956347208Serj				 * TSO always requires hardware checksum offload.
2957338871Serj				 */
2958338871Serj				pi->ipi_csum_flags |= CSUM_IP6_TCP;
2959338871Serj				th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
2960338871Serj				pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz;
2961338871Serj			}
2962300113Sscottl		}
2963300113Sscottl		break;
2964300113Sscottl	}
2965300113Sscottl#endif
2966300113Sscottl	default:
2967300113Sscottl		pi->ipi_csum_flags &= ~CSUM_OFFLOAD;
2968300113Sscottl		pi->ipi_ip_hlen = 0;
2969300113Sscottl		break;
2970300113Sscottl	}
2971300113Sscottl	*mp = m;
2972333338Sshurd
2973300113Sscottl	return (0);
2974300113Sscottl}
2975300113Sscottl
2976300113Sscottlstatic  __noinline  struct mbuf *
2977300113Sscottlcollapse_pkthdr(struct mbuf *m0)
2978300113Sscottl{
2979300113Sscottl	struct mbuf *m, *m_next, *tmp;
2980300113Sscottl
2981300113Sscottl	m = m0;
2982300113Sscottl	m_next = m->m_next;
2983300113Sscottl	while (m_next != NULL && m_next->m_len == 0) {
2984300113Sscottl		m = m_next;
2985300113Sscottl		m->m_next = NULL;
2986300113Sscottl		m_free(m);
2987300113Sscottl		m_next = m_next->m_next;
2988300113Sscottl	}
2989300113Sscottl	m = m0;
2990300113Sscottl	m->m_next = m_next;
2991300113Sscottl	if ((m_next->m_flags & M_EXT) == 0) {
2992300113Sscottl		m = m_defrag(m, M_NOWAIT);
2993300113Sscottl	} else {
2994300113Sscottl		tmp = m_next->m_next;
2995300113Sscottl		memcpy(m_next, m, MPKTHSIZE);
2996300113Sscottl		m = m_next;
2997300113Sscottl		m->m_next = tmp;
2998300113Sscottl	}
2999300113Sscottl	return (m);
3000300113Sscottl}
3001300113Sscottl
3002300113Sscottl/*
3003300113Sscottl * If dodgy hardware rejects the scatter gather chain we've handed it
3004304704Sshurd * we'll need to remove the mbuf chain from ifsg_m[] before we can add the
3005304704Sshurd * m_defrag'd mbufs
3006300113Sscottl */
3007300113Sscottlstatic __noinline struct mbuf *
3008304704Sshurdiflib_remove_mbuf(iflib_txq_t txq)
3009300113Sscottl{
3010304704Sshurd	int ntxd, i, pidx;
3011300113Sscottl	struct mbuf *m, *mh, **ifsd_m;
3012300113Sscottl
3013300113Sscottl	pidx = txq->ift_pidx;
3014300113Sscottl	ifsd_m = txq->ift_sds.ifsd_m;
3015304704Sshurd	ntxd = txq->ift_size;
3016300113Sscottl	mh = m = ifsd_m[pidx];
3017300113Sscottl	ifsd_m[pidx] = NULL;
3018300113Sscottl#if MEMORY_LOGGING
3019300113Sscottl	txq->ift_dequeued++;
3020300113Sscottl#endif
3021300113Sscottl	i = 1;
3022300113Sscottl
3023304704Sshurd	while (m) {
3024300113Sscottl		ifsd_m[(pidx + i) & (ntxd -1)] = NULL;
3025300113Sscottl#if MEMORY_LOGGING
3026300113Sscottl		txq->ift_dequeued++;
3027300113Sscottl#endif
3028300113Sscottl		m = m->m_next;
3029300113Sscottl		i++;
3030300113Sscottl	}
3031300113Sscottl	return (mh);
3032300113Sscottl}
3033300113Sscottl
3034300113Sscottlstatic int
3035300113Sscottliflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map,
3036300113Sscottl			  struct mbuf **m0, bus_dma_segment_t *segs, int *nsegs,
3037300113Sscottl			  int max_segs, int flags)
3038300113Sscottl{
3039300113Sscottl	if_ctx_t ctx;
3040300113Sscottl	if_shared_ctx_t		sctx;
3041304704Sshurd	if_softc_ctx_t		scctx;
3042333338Sshurd	int i, next, pidx, err, ntxd, count;
3043333338Sshurd	struct mbuf *m, *tmp, **ifsd_m;
3044300113Sscottl
3045300113Sscottl	m = *m0;
3046300113Sscottl
3047300113Sscottl	/*
3048300113Sscottl	 * Please don't ever do this
3049300113Sscottl	 */
3050300113Sscottl	if (__predict_false(m->m_len == 0))
3051300113Sscottl		*m0 = m = collapse_pkthdr(m);
3052300113Sscottl
3053300113Sscottl	ctx = txq->ift_ctx;
3054300113Sscottl	sctx = ctx->ifc_sctx;
3055304704Sshurd	scctx = &ctx->ifc_softc_ctx;
3056300113Sscottl	ifsd_m = txq->ift_sds.ifsd_m;
3057304704Sshurd	ntxd = txq->ift_size;
3058300113Sscottl	pidx = txq->ift_pidx;
3059300113Sscottl	if (map != NULL) {
3060300113Sscottl		uint8_t *ifsd_flags = txq->ift_sds.ifsd_flags;
3061300113Sscottl
3062300113Sscottl		err = bus_dmamap_load_mbuf_sg(tag, map,
3063300113Sscottl					      *m0, segs, nsegs, BUS_DMA_NOWAIT);
3064300113Sscottl		if (err)
3065300113Sscottl			return (err);
3066300113Sscottl		ifsd_flags[pidx] |= TX_SW_DESC_MAPPED;
3067333338Sshurd		count = 0;
3068300113Sscottl		m = *m0;
3069300113Sscottl		do {
3070333338Sshurd			if (__predict_false(m->m_len <= 0)) {
3071333338Sshurd				tmp = m;
3072333338Sshurd				m = m->m_next;
3073333338Sshurd				tmp->m_next = NULL;
3074333338Sshurd				m_free(tmp);
3075333338Sshurd				continue;
3076333338Sshurd			}
3077300113Sscottl			m = m->m_next;
3078333338Sshurd			count++;
3079300113Sscottl		} while (m != NULL);
3080333338Sshurd		if (count > *nsegs) {
3081333338Sshurd			ifsd_m[pidx] = *m0;
3082333338Sshurd			ifsd_m[pidx]->m_flags |= M_TOOBIG;
3083333338Sshurd			return (0);
3084333338Sshurd		}
3085333338Sshurd		m = *m0;
3086333338Sshurd		count = 0;
3087333338Sshurd		do {
3088333338Sshurd			next = (pidx + count) & (ntxd-1);
3089333338Sshurd			MPASS(ifsd_m[next] == NULL);
3090333338Sshurd			ifsd_m[next] = m;
3091333338Sshurd			count++;
3092333338Sshurd			tmp = m;
3093333338Sshurd			m = m->m_next;
3094333338Sshurd		} while (m != NULL);
3095300113Sscottl	} else {
3096333338Sshurd		int buflen, sgsize, maxsegsz, max_sgsize;
3097300113Sscottl		vm_offset_t vaddr;
3098300113Sscottl		vm_paddr_t curaddr;
3099300113Sscottl
3100300113Sscottl		count = i = 0;
3101300113Sscottl		m = *m0;
3102333338Sshurd		if (m->m_pkthdr.csum_flags & CSUM_TSO)
3103333338Sshurd			maxsegsz = scctx->isc_tx_tso_segsize_max;
3104333338Sshurd		else
3105333338Sshurd			maxsegsz = sctx->isc_tx_maxsegsize;
3106333338Sshurd
3107300113Sscottl		do {
3108300113Sscottl			if (__predict_false(m->m_len <= 0)) {
3109300113Sscottl				tmp = m;
3110300113Sscottl				m = m->m_next;
3111300113Sscottl				tmp->m_next = NULL;
3112300113Sscottl				m_free(tmp);
3113300113Sscottl				continue;
3114300113Sscottl			}
3115300113Sscottl			buflen = m->m_len;
3116300113Sscottl			vaddr = (vm_offset_t)m->m_data;
3117300113Sscottl			/*
3118300113Sscottl			 * see if we can't be smarter about physically
3119300113Sscottl			 * contiguous mappings
3120300113Sscottl			 */
3121300113Sscottl			next = (pidx + count) & (ntxd-1);
3122300113Sscottl			MPASS(ifsd_m[next] == NULL);
3123300113Sscottl#if MEMORY_LOGGING
3124300113Sscottl			txq->ift_enqueued++;
3125300113Sscottl#endif
3126300113Sscottl			ifsd_m[next] = m;
3127300113Sscottl			while (buflen > 0) {
3128333338Sshurd				if (i >= max_segs)
3129333338Sshurd					goto err;
3130300113Sscottl				max_sgsize = MIN(buflen, maxsegsz);
3131300113Sscottl				curaddr = pmap_kextract(vaddr);
3132300113Sscottl				sgsize = PAGE_SIZE - (curaddr & PAGE_MASK);
3133300113Sscottl				sgsize = MIN(sgsize, max_sgsize);
3134300113Sscottl				segs[i].ds_addr = curaddr;
3135300113Sscottl				segs[i].ds_len = sgsize;
3136300113Sscottl				vaddr += sgsize;
3137300113Sscottl				buflen -= sgsize;
3138300113Sscottl				i++;
3139300113Sscottl			}
3140300113Sscottl			count++;
3141300113Sscottl			tmp = m;
3142300113Sscottl			m = m->m_next;
3143300113Sscottl		} while (m != NULL);
3144300113Sscottl		*nsegs = i;
3145300113Sscottl	}
3146300113Sscottl	return (0);
3147300113Sscottlerr:
3148304704Sshurd	*m0 = iflib_remove_mbuf(txq);
3149300113Sscottl	return (EFBIG);
3150300113Sscottl}
3151300113Sscottl
3152333338Sshurdstatic inline caddr_t
3153333338Sshurdcalc_next_txd(iflib_txq_t txq, int cidx, uint8_t qid)
3154333338Sshurd{
3155333338Sshurd	qidx_t size;
3156333338Sshurd	int ntxd;
3157333338Sshurd	caddr_t start, end, cur, next;
3158333338Sshurd
3159333338Sshurd	ntxd = txq->ift_size;
3160333338Sshurd	size = txq->ift_txd_size[qid];
3161333338Sshurd	start = txq->ift_ifdi[qid].idi_vaddr;
3162333338Sshurd
3163333338Sshurd	if (__predict_false(size == 0))
3164333338Sshurd		return (start);
3165333338Sshurd	cur = start + size*cidx;
3166333338Sshurd	end = start + size*ntxd;
3167333338Sshurd	next = CACHE_PTR_NEXT(cur);
3168333338Sshurd	return (next < end ? next : start);
3169333338Sshurd}
3170333338Sshurd
3171333338Sshurd/*
3172333338Sshurd * Pad an mbuf to ensure a minimum ethernet frame size.
3173333338Sshurd * min_frame_size is the frame size (less CRC) to pad the mbuf to
3174333338Sshurd */
3175333338Sshurdstatic __noinline int
3176333338Sshurdiflib_ether_pad(device_t dev, struct mbuf **m_head, uint16_t min_frame_size)
3177333338Sshurd{
3178333338Sshurd	/*
3179333338Sshurd	 * 18 is enough bytes to pad an ARP packet to 46 bytes, and
3180333338Sshurd	 * and ARP message is the smallest common payload I can think of
3181333338Sshurd	 */
3182333338Sshurd	static char pad[18];	/* just zeros */
3183333338Sshurd	int n;
3184333338Sshurd	struct mbuf *new_head;
3185333338Sshurd
3186333338Sshurd	if (!M_WRITABLE(*m_head)) {
3187333338Sshurd		new_head = m_dup(*m_head, M_NOWAIT);
3188333338Sshurd		if (new_head == NULL) {
3189333338Sshurd			m_freem(*m_head);
3190333338Sshurd			device_printf(dev, "cannot pad short frame, m_dup() failed");
3191333338Sshurd			DBG_COUNTER_INC(encap_pad_mbuf_fail);
3192333338Sshurd			return ENOMEM;
3193333338Sshurd		}
3194333338Sshurd		m_freem(*m_head);
3195333338Sshurd		*m_head = new_head;
3196333338Sshurd	}
3197333338Sshurd
3198333338Sshurd	for (n = min_frame_size - (*m_head)->m_pkthdr.len;
3199333338Sshurd	     n > 0; n -= sizeof(pad))
3200333338Sshurd		if (!m_append(*m_head, min(n, sizeof(pad)), pad))
3201333338Sshurd			break;
3202333338Sshurd
3203333338Sshurd	if (n > 0) {
3204333338Sshurd		m_freem(*m_head);
3205333338Sshurd		device_printf(dev, "cannot pad short frame\n");
3206333338Sshurd		DBG_COUNTER_INC(encap_pad_mbuf_fail);
3207333338Sshurd		return (ENOBUFS);
3208333338Sshurd	}
3209333338Sshurd
3210333338Sshurd	return 0;
3211333338Sshurd}
3212333338Sshurd
3213300113Sscottlstatic int
3214300113Sscottliflib_encap(iflib_txq_t txq, struct mbuf **m_headp)
3215300113Sscottl{
3216300113Sscottl	if_ctx_t		ctx;
3217300113Sscottl	if_shared_ctx_t		sctx;
3218300113Sscottl	if_softc_ctx_t		scctx;
3219300113Sscottl	bus_dma_segment_t	*segs;
3220300113Sscottl	struct mbuf		*m_head;
3221333338Sshurd	void			*next_txd;
3222300113Sscottl	bus_dmamap_t		map;
3223300113Sscottl	struct if_pkt_info	pi;
3224300113Sscottl	int remap = 0;
3225300113Sscottl	int err, nsegs, ndesc, max_segs, pidx, cidx, next, ntxd;
3226300113Sscottl	bus_dma_tag_t desc_tag;
3227300113Sscottl
3228300113Sscottl	segs = txq->ift_segs;
3229300113Sscottl	ctx = txq->ift_ctx;
3230300113Sscottl	sctx = ctx->ifc_sctx;
3231300113Sscottl	scctx = &ctx->ifc_softc_ctx;
3232300113Sscottl	segs = txq->ift_segs;
3233304704Sshurd	ntxd = txq->ift_size;
3234300113Sscottl	m_head = *m_headp;
3235300113Sscottl	map = NULL;
3236300113Sscottl
3237300113Sscottl	/*
3238300113Sscottl	 * If we're doing TSO the next descriptor to clean may be quite far ahead
3239300113Sscottl	 */
3240300113Sscottl	cidx = txq->ift_cidx;
3241300113Sscottl	pidx = txq->ift_pidx;
3242333338Sshurd	if (ctx->ifc_flags & IFC_PREFETCH) {
3243333338Sshurd		next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1);
3244333338Sshurd		if (!(ctx->ifc_flags & IFLIB_HAS_TXCQ)) {
3245333338Sshurd			next_txd = calc_next_txd(txq, cidx, 0);
3246333338Sshurd			prefetch(next_txd);
3247333338Sshurd		}
3248300113Sscottl
3249333338Sshurd		/* prefetch the next cache line of mbuf pointers and flags */
3250333338Sshurd		prefetch(&txq->ift_sds.ifsd_m[next]);
3251333338Sshurd		if (txq->ift_sds.ifsd_map != NULL) {
3252333338Sshurd			prefetch(&txq->ift_sds.ifsd_map[next]);
3253333338Sshurd			next = (cidx + CACHE_LINE_SIZE) & (ntxd-1);
3254333338Sshurd			prefetch(&txq->ift_sds.ifsd_flags[next]);
3255333338Sshurd		}
3256333338Sshurd	} else if (txq->ift_sds.ifsd_map != NULL)
3257300113Sscottl		map = txq->ift_sds.ifsd_map[pidx];
3258300113Sscottl
3259300113Sscottl	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
3260300113Sscottl		desc_tag = txq->ift_tso_desc_tag;
3261300113Sscottl		max_segs = scctx->isc_tx_tso_segments_max;
3262300113Sscottl	} else {
3263300113Sscottl		desc_tag = txq->ift_desc_tag;
3264300113Sscottl		max_segs = scctx->isc_tx_nsegments;
3265300113Sscottl	}
3266333338Sshurd	if ((sctx->isc_flags & IFLIB_NEED_ETHER_PAD) &&
3267333338Sshurd	    __predict_false(m_head->m_pkthdr.len < scctx->isc_min_frame_size)) {
3268333338Sshurd		err = iflib_ether_pad(ctx->ifc_dev, m_headp, scctx->isc_min_frame_size);
3269333338Sshurd		if (err)
3270333338Sshurd			return err;
3271333338Sshurd	}
3272300113Sscottl	m_head = *m_headp;
3273333338Sshurd
3274333338Sshurd	pkt_info_zero(&pi);
3275333338Sshurd	pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST));
3276333338Sshurd	pi.ipi_pidx = pidx;
3277333338Sshurd	pi.ipi_qsidx = txq->ift_id;
3278300113Sscottl	pi.ipi_len = m_head->m_pkthdr.len;
3279300113Sscottl	pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags;
3280300113Sscottl	pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0;
3281300113Sscottl
3282300113Sscottl	/* deliberate bitwise OR to make one condition */
3283300113Sscottl	if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) {
3284300113Sscottl		if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0))
3285300113Sscottl			return (err);
3286300113Sscottl		m_head = *m_headp;
3287300113Sscottl	}
3288300113Sscottl
3289300113Sscottlretry:
3290300113Sscottl	err = iflib_busdma_load_mbuf_sg(txq, desc_tag, map, m_headp, segs, &nsegs, max_segs, BUS_DMA_NOWAIT);
3291300113Sscottldefrag:
3292300113Sscottl	if (__predict_false(err)) {
3293300113Sscottl		switch (err) {
3294300113Sscottl		case EFBIG:
3295300113Sscottl			/* try collapse once and defrag once */
3296344472Sshurd			if (remap == 0) {
3297300113Sscottl				m_head = m_collapse(*m_headp, M_NOWAIT, max_segs);
3298344472Sshurd				/* try defrag if collapsing fails */
3299344472Sshurd				if (m_head == NULL)
3300344472Sshurd					remap++;
3301344472Sshurd			}
3302300113Sscottl			if (remap == 1)
3303300113Sscottl				m_head = m_defrag(*m_headp, M_NOWAIT);
3304347212Serj			/*
3305347212Serj			 * remap should never be >1 unless bus_dmamap_load_mbuf_sg
3306347212Serj			 * failed to map an mbuf that was run through m_defrag
3307347212Serj			 */
3308347212Serj			MPASS(remap <= 1);
3309347212Serj			if (__predict_false(m_head == NULL || remap > 1))
3310347212Serj				goto defrag_failed;
3311300113Sscottl			remap++;
3312300113Sscottl			txq->ift_mbuf_defrag++;
3313300113Sscottl			*m_headp = m_head;
3314300113Sscottl			goto retry;
3315300113Sscottl			break;
3316300113Sscottl		case ENOMEM:
3317300113Sscottl			txq->ift_no_tx_dma_setup++;
3318300113Sscottl			break;
3319300113Sscottl		default:
3320300113Sscottl			txq->ift_no_tx_dma_setup++;
3321300113Sscottl			m_freem(*m_headp);
3322300113Sscottl			DBG_COUNTER_INC(tx_frees);
3323300113Sscottl			*m_headp = NULL;
3324300113Sscottl			break;
3325300113Sscottl		}
3326300113Sscottl		txq->ift_map_failed++;
3327300113Sscottl		DBG_COUNTER_INC(encap_load_mbuf_fail);
3328300113Sscottl		return (err);
3329300113Sscottl	}
3330300113Sscottl
3331300113Sscottl	/*
3332300113Sscottl	 * XXX assumes a 1 to 1 relationship between segments and
3333300113Sscottl	 *        descriptors - this does not hold true on all drivers, e.g.
3334300113Sscottl	 *        cxgb
3335300113Sscottl	 */
3336300113Sscottl	if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) {
3337300113Sscottl		txq->ift_no_desc_avail++;
3338300113Sscottl		if (map != NULL)
3339300113Sscottl			bus_dmamap_unload(desc_tag, map);
3340300113Sscottl		DBG_COUNTER_INC(encap_txq_avail_fail);
3341304704Sshurd		if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0)
3342300113Sscottl			GROUPTASK_ENQUEUE(&txq->ift_task);
3343300113Sscottl		return (ENOBUFS);
3344300113Sscottl	}
3345333338Sshurd	/*
3346333338Sshurd	 * On Intel cards we can greatly reduce the number of TX interrupts
3347333338Sshurd	 * we see by only setting report status on every Nth descriptor.
3348333338Sshurd	 * However, this also means that the driver will need to keep track
3349333338Sshurd	 * of the descriptors that RS was set on to check them for the DD bit.
3350333338Sshurd	 */
3351333338Sshurd	txq->ift_rs_pending += nsegs + 1;
3352333338Sshurd	if (txq->ift_rs_pending > TXQ_MAX_RS_DEFERRED(txq) ||
3353333503Sshurd	     iflib_no_tx_batch || (TXQ_AVAIL(txq) - nsegs) <= MAX_TX_DESC(ctx) + 2) {
3354333338Sshurd		pi.ipi_flags |= IPI_TX_INTR;
3355333338Sshurd		txq->ift_rs_pending = 0;
3356333338Sshurd	}
3357333338Sshurd
3358300113Sscottl	pi.ipi_segs = segs;
3359300113Sscottl	pi.ipi_nsegs = nsegs;
3360300113Sscottl
3361304704Sshurd	MPASS(pidx >= 0 && pidx < txq->ift_size);
3362300113Sscottl#ifdef PKT_DEBUG
3363300113Sscottl	print_pkt(&pi);
3364300113Sscottl#endif
3365333338Sshurd	if (map != NULL)
3366333338Sshurd		bus_dmamap_sync(desc_tag, map, BUS_DMASYNC_PREWRITE);
3367300113Sscottl	if ((err = ctx->isc_txd_encap(ctx->ifc_softc, &pi)) == 0) {
3368333338Sshurd		if (map != NULL)
3369333338Sshurd			bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
3370333338Sshurd					BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3371300113Sscottl		DBG_COUNTER_INC(tx_encap);
3372333338Sshurd		MPASS(pi.ipi_new_pidx < txq->ift_size);
3373300113Sscottl
3374300113Sscottl		ndesc = pi.ipi_new_pidx - pi.ipi_pidx;
3375300113Sscottl		if (pi.ipi_new_pidx < pi.ipi_pidx) {
3376304704Sshurd			ndesc += txq->ift_size;
3377300113Sscottl			txq->ift_gen = 1;
3378300113Sscottl		}
3379333338Sshurd		/*
3380333338Sshurd		 * drivers can need as many as
3381333338Sshurd		 * two sentinels
3382333338Sshurd		 */
3383333338Sshurd		MPASS(ndesc <= pi.ipi_nsegs + 2);
3384300113Sscottl		MPASS(pi.ipi_new_pidx != pidx);
3385300113Sscottl		MPASS(ndesc > 0);
3386300113Sscottl		txq->ift_in_use += ndesc;
3387333338Sshurd
3388300113Sscottl		/*
3389300113Sscottl		 * We update the last software descriptor again here because there may
3390300113Sscottl		 * be a sentinel and/or there may be more mbufs than segments
3391300113Sscottl		 */
3392300113Sscottl		txq->ift_pidx = pi.ipi_new_pidx;
3393300113Sscottl		txq->ift_npending += pi.ipi_ndescs;
3394344472Sshurd	} else {
3395304704Sshurd		*m_headp = m_head = iflib_remove_mbuf(txq);
3396344472Sshurd		if (err == EFBIG) {
3397344472Sshurd			txq->ift_txd_encap_efbig++;
3398344472Sshurd			if (remap < 2) {
3399344472Sshurd				remap = 1;
3400344472Sshurd				goto defrag;
3401344472Sshurd			}
3402344472Sshurd		}
3403300113Sscottl		DBG_COUNTER_INC(encap_txd_encap_fail);
3404344472Sshurd		goto defrag_failed;
3405344472Sshurd	}
3406300113Sscottl	return (err);
3407300113Sscottl
3408300113Sscottldefrag_failed:
3409300113Sscottl	txq->ift_mbuf_defrag_failed++;
3410300113Sscottl	txq->ift_map_failed++;
3411300113Sscottl	m_freem(*m_headp);
3412300113Sscottl	DBG_COUNTER_INC(tx_frees);
3413300113Sscottl	*m_headp = NULL;
3414300113Sscottl	return (ENOMEM);
3415300113Sscottl}
3416300113Sscottl
3417300113Sscottlstatic void
3418300113Sscottliflib_tx_desc_free(iflib_txq_t txq, int n)
3419300113Sscottl{
3420300113Sscottl	int hasmap;
3421300113Sscottl	uint32_t qsize, cidx, mask, gen;
3422300113Sscottl	struct mbuf *m, **ifsd_m;
3423300113Sscottl	uint8_t *ifsd_flags;
3424300113Sscottl	bus_dmamap_t *ifsd_map;
3425333338Sshurd	bool do_prefetch;
3426300113Sscottl
3427300113Sscottl	cidx = txq->ift_cidx;
3428300113Sscottl	gen = txq->ift_gen;
3429304704Sshurd	qsize = txq->ift_size;
3430300113Sscottl	mask = qsize-1;
3431300113Sscottl	hasmap = txq->ift_sds.ifsd_map != NULL;
3432300113Sscottl	ifsd_flags = txq->ift_sds.ifsd_flags;
3433300113Sscottl	ifsd_m = txq->ift_sds.ifsd_m;
3434300113Sscottl	ifsd_map = txq->ift_sds.ifsd_map;
3435333338Sshurd	do_prefetch = (txq->ift_ctx->ifc_flags & IFC_PREFETCH);
3436300113Sscottl
3437300113Sscottl	while (n--) {
3438333338Sshurd		if (do_prefetch) {
3439333338Sshurd			prefetch(ifsd_m[(cidx + 3) & mask]);
3440333338Sshurd			prefetch(ifsd_m[(cidx + 4) & mask]);
3441333338Sshurd		}
3442300113Sscottl		if (ifsd_m[cidx] != NULL) {
3443300113Sscottl			prefetch(&ifsd_m[(cidx + CACHE_PTR_INCREMENT) & mask]);
3444300113Sscottl			prefetch(&ifsd_flags[(cidx + CACHE_PTR_INCREMENT) & mask]);
3445300113Sscottl			if (hasmap && (ifsd_flags[cidx] & TX_SW_DESC_MAPPED)) {
3446300113Sscottl				/*
3447300113Sscottl				 * does it matter if it's not the TSO tag? If so we'll
3448300113Sscottl				 * have to add the type to flags
3449300113Sscottl				 */
3450300113Sscottl				bus_dmamap_unload(txq->ift_desc_tag, ifsd_map[cidx]);
3451300113Sscottl				ifsd_flags[cidx] &= ~TX_SW_DESC_MAPPED;
3452300113Sscottl			}
3453300113Sscottl			if ((m = ifsd_m[cidx]) != NULL) {
3454300113Sscottl				/* XXX we don't support any drivers that batch packets yet */
3455300113Sscottl				MPASS(m->m_nextpkt == NULL);
3456333338Sshurd				/* if the number of clusters exceeds the number of segments
3457333338Sshurd				 * there won't be space on the ring to save a pointer to each
3458333338Sshurd				 * cluster so we simply free the list here
3459333338Sshurd				 */
3460333338Sshurd				if (m->m_flags & M_TOOBIG) {
3461333338Sshurd					m_freem(m);
3462333338Sshurd				} else {
3463333338Sshurd					m_free(m);
3464333338Sshurd				}
3465300113Sscottl				ifsd_m[cidx] = NULL;
3466300113Sscottl#if MEMORY_LOGGING
3467300113Sscottl				txq->ift_dequeued++;
3468300113Sscottl#endif
3469300113Sscottl				DBG_COUNTER_INC(tx_frees);
3470300113Sscottl			}
3471300113Sscottl		}
3472300113Sscottl		if (__predict_false(++cidx == qsize)) {
3473300113Sscottl			cidx = 0;
3474300113Sscottl			gen = 0;
3475300113Sscottl		}
3476300113Sscottl	}
3477300113Sscottl	txq->ift_cidx = cidx;
3478300113Sscottl	txq->ift_gen = gen;
3479300113Sscottl}
3480300113Sscottl
3481300113Sscottlstatic __inline int
3482300113Sscottliflib_completed_tx_reclaim(iflib_txq_t txq, int thresh)
3483300113Sscottl{
3484300113Sscottl	int reclaim;
3485300113Sscottl	if_ctx_t ctx = txq->ift_ctx;
3486300113Sscottl
3487300113Sscottl	KASSERT(thresh >= 0, ("invalid threshold to reclaim"));
3488300113Sscottl	MPASS(thresh /*+ MAX_TX_DESC(txq->ift_ctx) */ < txq->ift_size);
3489300113Sscottl
3490300113Sscottl	/*
3491300113Sscottl	 * Need a rate-limiting check so that this isn't called every time
3492300113Sscottl	 */
3493300113Sscottl	iflib_tx_credits_update(ctx, txq);
3494300113Sscottl	reclaim = DESC_RECLAIMABLE(txq);
3495300113Sscottl
3496300113Sscottl	if (reclaim <= thresh /* + MAX_TX_DESC(txq->ift_ctx) */) {
3497300113Sscottl#ifdef INVARIANTS
3498300113Sscottl		if (iflib_verbose_debug) {
3499300113Sscottl			printf("%s processed=%ju cleaned=%ju tx_nsegments=%d reclaim=%d thresh=%d\n", __FUNCTION__,
3500300113Sscottl			       txq->ift_processed, txq->ift_cleaned, txq->ift_ctx->ifc_softc_ctx.isc_tx_nsegments,
3501300113Sscottl			       reclaim, thresh);
3502300113Sscottl
3503300113Sscottl		}
3504300113Sscottl#endif
3505300113Sscottl		return (0);
3506300113Sscottl	}
3507300113Sscottl	iflib_tx_desc_free(txq, reclaim);
3508300113Sscottl	txq->ift_cleaned += reclaim;
3509300113Sscottl	txq->ift_in_use -= reclaim;
3510300113Sscottl
3511300113Sscottl	return (reclaim);
3512300113Sscottl}
3513300113Sscottl
3514300113Sscottlstatic struct mbuf **
3515333338Sshurd_ring_peek_one(struct ifmp_ring *r, int cidx, int offset, int remaining)
3516300113Sscottl{
3517333338Sshurd	int next, size;
3518333338Sshurd	struct mbuf **items;
3519300113Sscottl
3520333338Sshurd	size = r->size;
3521333338Sshurd	next = (cidx + CACHE_PTR_INCREMENT) & (size-1);
3522333338Sshurd	items = __DEVOLATILE(struct mbuf **, &r->items[0]);
3523333338Sshurd
3524333338Sshurd	prefetch(items[(cidx + offset) & (size-1)]);
3525333338Sshurd	if (remaining > 1) {
3526333338Sshurd		prefetch2cachelines(&items[next]);
3527333338Sshurd		prefetch2cachelines(items[(cidx + offset + 1) & (size-1)]);
3528333338Sshurd		prefetch2cachelines(items[(cidx + offset + 2) & (size-1)]);
3529333338Sshurd		prefetch2cachelines(items[(cidx + offset + 3) & (size-1)]);
3530333338Sshurd	}
3531333338Sshurd	return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (size-1)]));
3532300113Sscottl}
3533300113Sscottl
3534300113Sscottlstatic void
3535300113Sscottliflib_txq_check_drain(iflib_txq_t txq, int budget)
3536300113Sscottl{
3537300113Sscottl
3538333338Sshurd	ifmp_ring_check_drainage(txq->ift_br, budget);
3539300113Sscottl}
3540300113Sscottl
3541300113Sscottlstatic uint32_t
3542300113Sscottliflib_txq_can_drain(struct ifmp_ring *r)
3543300113Sscottl{
3544300113Sscottl	iflib_txq_t txq = r->cookie;
3545300113Sscottl	if_ctx_t ctx = txq->ift_ctx;
3546300113Sscottl
3547333338Sshurd	return ((TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2) ||
3548333338Sshurd		ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false));
3549300113Sscottl}
3550300113Sscottl
3551300113Sscottlstatic uint32_t
3552300113Sscottliflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
3553300113Sscottl{
3554300113Sscottl	iflib_txq_t txq = r->cookie;
3555300113Sscottl	if_ctx_t ctx = txq->ift_ctx;
3556333338Sshurd	struct ifnet *ifp = ctx->ifc_ifp;
3557300113Sscottl	struct mbuf **mp, *m;
3558333338Sshurd	int i, count, consumed, pkt_sent, bytes_sent, mcast_sent, avail;
3559333338Sshurd	int reclaimed, err, in_use_prev, desc_used;
3560333338Sshurd	bool do_prefetch, ring, rang;
3561300113Sscottl
3562300113Sscottl	if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING) ||
3563300113Sscottl			    !LINK_ACTIVE(ctx))) {
3564300113Sscottl		DBG_COUNTER_INC(txq_drain_notready);
3565300113Sscottl		return (0);
3566300113Sscottl	}
3567333338Sshurd	reclaimed = iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx));
3568333338Sshurd	rang = iflib_txd_db_check(ctx, txq, reclaimed, txq->ift_in_use);
3569300113Sscottl	avail = IDXDIFF(pidx, cidx, r->size);
3570300113Sscottl	if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) {
3571300113Sscottl		DBG_COUNTER_INC(txq_drain_flushing);
3572300113Sscottl		for (i = 0; i < avail; i++) {
3573304704Sshurd			m_free(r->items[(cidx + i) & (r->size-1)]);
3574300113Sscottl			r->items[(cidx + i) & (r->size-1)] = NULL;
3575300113Sscottl		}
3576300113Sscottl		return (avail);
3577300113Sscottl	}
3578333338Sshurd
3579300113Sscottl	if (__predict_false(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) {
3580300113Sscottl		txq->ift_qstatus = IFLIB_QUEUE_IDLE;
3581300113Sscottl		CALLOUT_LOCK(txq);
3582300113Sscottl		callout_stop(&txq->ift_timer);
3583300113Sscottl		CALLOUT_UNLOCK(txq);
3584300113Sscottl		DBG_COUNTER_INC(txq_drain_oactive);
3585300113Sscottl		return (0);
3586300113Sscottl	}
3587333338Sshurd	if (reclaimed)
3588333338Sshurd		txq->ift_qstatus = IFLIB_QUEUE_IDLE;
3589300113Sscottl	consumed = mcast_sent = bytes_sent = pkt_sent = 0;
3590300113Sscottl	count = MIN(avail, TX_BATCH_SIZE);
3591333338Sshurd#ifdef INVARIANTS
3592333338Sshurd	if (iflib_verbose_debug)
3593333338Sshurd		printf("%s avail=%d ifc_flags=%x txq_avail=%d ", __FUNCTION__,
3594333338Sshurd		       avail, ctx->ifc_flags, TXQ_AVAIL(txq));
3595333338Sshurd#endif
3596333338Sshurd	do_prefetch = (ctx->ifc_flags & IFC_PREFETCH);
3597333338Sshurd	avail = TXQ_AVAIL(txq);
3598333338Sshurd	for (desc_used = i = 0; i < count && avail > MAX_TX_DESC(ctx) + 2; i++) {
3599333338Sshurd		int pidx_prev, rem = do_prefetch ? count - i : 0;
3600300113Sscottl
3601333338Sshurd		mp = _ring_peek_one(r, cidx, i, rem);
3602333338Sshurd		MPASS(mp != NULL && *mp != NULL);
3603333338Sshurd		if (__predict_false(*mp == (struct mbuf *)txq)) {
3604333338Sshurd			consumed++;
3605333338Sshurd			reclaimed++;
3606333338Sshurd			continue;
3607333338Sshurd		}
3608300113Sscottl		in_use_prev = txq->ift_in_use;
3609333338Sshurd		pidx_prev = txq->ift_pidx;
3610300113Sscottl		err = iflib_encap(txq, mp);
3611333338Sshurd		if (__predict_false(err)) {
3612300113Sscottl			DBG_COUNTER_INC(txq_drain_encapfail);
3613333338Sshurd			/* no room - bail out */
3614333338Sshurd			if (err == ENOBUFS)
3615333338Sshurd				break;
3616333338Sshurd			consumed++;
3617333338Sshurd			DBG_COUNTER_INC(txq_drain_encapfail);
3618333338Sshurd			/* we can't send this packet - skip it */
3619333338Sshurd			continue;
3620300113Sscottl		}
3621300113Sscottl		consumed++;
3622300113Sscottl		pkt_sent++;
3623300113Sscottl		m = *mp;
3624300113Sscottl		DBG_COUNTER_INC(tx_sent);
3625300113Sscottl		bytes_sent += m->m_pkthdr.len;
3626333338Sshurd		mcast_sent += !!(m->m_flags & M_MCAST);
3627333338Sshurd		avail = TXQ_AVAIL(txq);
3628300113Sscottl
3629300113Sscottl		txq->ift_db_pending += (txq->ift_in_use - in_use_prev);
3630300113Sscottl		desc_used += (txq->ift_in_use - in_use_prev);
3631300113Sscottl		ETHER_BPF_MTAP(ifp, m);
3632333338Sshurd		if (__predict_false(!(ifp->if_drv_flags & IFF_DRV_RUNNING)))
3633300113Sscottl			break;
3634333338Sshurd		rang = iflib_txd_db_check(ctx, txq, false, in_use_prev);
3635300113Sscottl	}
3636300113Sscottl
3637333338Sshurd	/* deliberate use of bitwise or to avoid gratuitous short-circuit */
3638333338Sshurd	ring = rang ? false  : (iflib_min_tx_latency | err) || (TXQ_AVAIL(txq) < MAX_TX_DESC(ctx));
3639333338Sshurd	iflib_txd_db_check(ctx, txq, ring, txq->ift_in_use);
3640300113Sscottl	if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent);
3641300113Sscottl	if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent);
3642300113Sscottl	if (mcast_sent)
3643300113Sscottl		if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent);
3644333338Sshurd#ifdef INVARIANTS
3645333338Sshurd	if (iflib_verbose_debug)
3646333338Sshurd		printf("consumed=%d\n", consumed);
3647333338Sshurd#endif
3648300113Sscottl	return (consumed);
3649300113Sscottl}
3650300113Sscottl
3651333338Sshurdstatic uint32_t
3652333338Sshurdiflib_txq_drain_always(struct ifmp_ring *r)
3653333338Sshurd{
3654333338Sshurd	return (1);
3655333338Sshurd}
3656333338Sshurd
3657333338Sshurdstatic uint32_t
3658333338Sshurdiflib_txq_drain_free(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
3659333338Sshurd{
3660333338Sshurd	int i, avail;
3661333338Sshurd	struct mbuf **mp;
3662333338Sshurd	iflib_txq_t txq;
3663333338Sshurd
3664333338Sshurd	txq = r->cookie;
3665333338Sshurd
3666333338Sshurd	txq->ift_qstatus = IFLIB_QUEUE_IDLE;
3667333338Sshurd	CALLOUT_LOCK(txq);
3668333338Sshurd	callout_stop(&txq->ift_timer);
3669333338Sshurd	CALLOUT_UNLOCK(txq);
3670333338Sshurd
3671333338Sshurd	avail = IDXDIFF(pidx, cidx, r->size);
3672333338Sshurd	for (i = 0; i < avail; i++) {
3673333338Sshurd		mp = _ring_peek_one(r, cidx, i, avail - i);
3674333338Sshurd		if (__predict_false(*mp == (struct mbuf *)txq))
3675333338Sshurd			continue;
3676333338Sshurd		m_freem(*mp);
3677333338Sshurd	}
3678333338Sshurd	MPASS(ifmp_ring_is_stalled(r) == 0);
3679333338Sshurd	return (avail);
3680333338Sshurd}
3681333338Sshurd
3682300113Sscottlstatic void
3683333338Sshurdiflib_ifmp_purge(iflib_txq_t txq)
3684333338Sshurd{
3685333338Sshurd	struct ifmp_ring *r;
3686333338Sshurd
3687333338Sshurd	r = txq->ift_br;
3688333338Sshurd	r->drain = iflib_txq_drain_free;
3689333338Sshurd	r->can_drain = iflib_txq_drain_always;
3690333338Sshurd
3691333338Sshurd	ifmp_ring_check_drainage(r, r->size);
3692333338Sshurd
3693333338Sshurd	r->drain = iflib_txq_drain;
3694333338Sshurd	r->can_drain = iflib_txq_can_drain;
3695333338Sshurd}
3696333338Sshurd
3697333338Sshurdstatic void
3698304704Sshurd_task_fn_tx(void *context)
3699300113Sscottl{
3700300113Sscottl	iflib_txq_t txq = context;
3701300113Sscottl	if_ctx_t ctx = txq->ift_ctx;
3702333338Sshurd	struct ifnet *ifp = ctx->ifc_ifp;
3703333338Sshurd	int rc;
3704300113Sscottl
3705333338Sshurd#ifdef IFLIB_DIAGNOSTICS
3706333338Sshurd	txq->ift_cpu_exec_count[curcpu]++;
3707333338Sshurd#endif
3708300113Sscottl	if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
3709300113Sscottl		return;
3710333338Sshurd	if (if_getcapenable(ifp) & IFCAP_NETMAP) {
3711333338Sshurd		if (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false))
3712333338Sshurd			netmap_tx_irq(ifp, txq->ift_id);
3713333338Sshurd		IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id);
3714333338Sshurd		return;
3715333338Sshurd	}
3716333338Sshurd	if (txq->ift_db_pending)
3717333338Sshurd		ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE);
3718333338Sshurd	ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
3719333338Sshurd	if (ctx->ifc_flags & IFC_LEGACY)
3720333338Sshurd		IFDI_INTR_ENABLE(ctx);
3721333338Sshurd	else {
3722333338Sshurd		rc = IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id);
3723333338Sshurd		KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver"));
3724333338Sshurd	}
3725300113Sscottl}
3726300113Sscottl
3727300113Sscottlstatic void
3728304704Sshurd_task_fn_rx(void *context)
3729300113Sscottl{
3730300113Sscottl	iflib_rxq_t rxq = context;
3731300113Sscottl	if_ctx_t ctx = rxq->ifr_ctx;
3732358272Shselasky	uint8_t more;
3733304704Sshurd	int rc;
3734333338Sshurd	uint16_t budget;
3735300113Sscottl
3736333338Sshurd#ifdef IFLIB_DIAGNOSTICS
3737333338Sshurd	rxq->ifr_cpu_exec_count[curcpu]++;
3738333338Sshurd#endif
3739300113Sscottl	DBG_COUNTER_INC(task_fn_rxs);
3740300113Sscottl	if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
3741300113Sscottl		return;
3742333338Sshurd#ifdef DEV_NETMAP
3743333338Sshurd	if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) {
3744333338Sshurd		u_int work = 0;
3745333338Sshurd		if (netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &work)) {
3746358272Shselasky			more = 0;
3747358272Shselasky			goto skip_rxeof;
3748333338Sshurd		}
3749333338Sshurd	}
3750333338Sshurd#endif
3751333338Sshurd	budget = ctx->ifc_sysctl_rx_budget;
3752333338Sshurd	if (budget == 0)
3753333338Sshurd		budget = 16;	/* XXX */
3754358272Shselasky	more = iflib_rxeof(rxq, budget);
3755358272Shselasky#ifdef DEV_NETMAP
3756358272Shselaskyskip_rxeof:
3757358272Shselasky#endif
3758358272Shselasky	if ((more & IFLIB_RXEOF_MORE) == 0) {
3759300113Sscottl		if (ctx->ifc_flags & IFC_LEGACY)
3760300113Sscottl			IFDI_INTR_ENABLE(ctx);
3761300113Sscottl		else {
3762300113Sscottl			DBG_COUNTER_INC(rx_intr_enables);
3763333338Sshurd			rc = IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
3764304704Sshurd			KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver"));
3765300113Sscottl		}
3766300113Sscottl	}
3767300113Sscottl	if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
3768300113Sscottl		return;
3769358272Shselasky
3770358272Shselasky	if (more & IFLIB_RXEOF_MORE)
3771300113Sscottl		GROUPTASK_ENQUEUE(&rxq->ifr_task);
3772358272Shselasky	else if (more & IFLIB_RXEOF_EMPTY)
3773358272Shselasky		callout_reset_curcpu(&rxq->ifr_watchdog, 1, &_task_fn_rx_watchdog, rxq);
3774300113Sscottl}
3775300113Sscottl
3776300113Sscottlstatic void
3777304704Sshurd_task_fn_admin(void *context)
3778300113Sscottl{
3779300113Sscottl	if_ctx_t ctx = context;
3780300113Sscottl	if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
3781300113Sscottl	iflib_txq_t txq;
3782300113Sscottl	int i;
3783347197Serj	bool oactive, running, do_reset, do_watchdog, in_detach;
3784300113Sscottl
3785347197Serj	STATE_LOCK(ctx);
3786347197Serj	running = (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING);
3787347197Serj	oactive = (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE);
3788347197Serj	do_reset = (ctx->ifc_flags & IFC_DO_RESET);
3789347197Serj	do_watchdog = (ctx->ifc_flags & IFC_DO_WATCHDOG);
3790347197Serj	in_detach = (ctx->ifc_flags & IFC_IN_DETACH);
3791347197Serj	ctx->ifc_flags &= ~(IFC_DO_RESET|IFC_DO_WATCHDOG);
3792347197Serj	STATE_UNLOCK(ctx);
3793300113Sscottl
3794347197Serj	if ((!running & !oactive) &&
3795347197Serj	    !(ctx->ifc_sctx->isc_flags & IFLIB_ADMIN_ALWAYS_RUN))
3796347373Serj		return;
3797347197Serj	if (in_detach)
3798347197Serj		return;
3799347197Serj
3800300113Sscottl	CTX_LOCK(ctx);
3801300113Sscottl	for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) {
3802300113Sscottl		CALLOUT_LOCK(txq);
3803300113Sscottl		callout_stop(&txq->ift_timer);
3804300113Sscottl		CALLOUT_UNLOCK(txq);
3805300113Sscottl	}
3806347197Serj	if (do_watchdog) {
3807347197Serj		ctx->ifc_watchdog_events++;
3808347197Serj		IFDI_WATCHDOG_RESET(ctx);
3809347197Serj	}
3810300113Sscottl	IFDI_UPDATE_ADMIN_STATUS(ctx);
3811300113Sscottl	for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++)
3812300113Sscottl		callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu);
3813300113Sscottl	IFDI_LINK_INTR_ENABLE(ctx);
3814347197Serj	if (do_reset)
3815333338Sshurd		iflib_if_init_locked(ctx);
3816300113Sscottl	CTX_UNLOCK(ctx);
3817300113Sscottl
3818300113Sscottl	if (LINK_ACTIVE(ctx) == 0)
3819300113Sscottl		return;
3820300113Sscottl	for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++)
3821300113Sscottl		iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET);
3822300113Sscottl}
3823300113Sscottl
3824300113Sscottl
3825300113Sscottlstatic void
3826304704Sshurd_task_fn_iov(void *context)
3827300113Sscottl{
3828300113Sscottl	if_ctx_t ctx = context;
3829300113Sscottl
3830347197Serj	if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) &&
3831347197Serj	    !(ctx->ifc_sctx->isc_flags & IFLIB_ADMIN_ALWAYS_RUN))
3832300113Sscottl		return;
3833300113Sscottl
3834300113Sscottl	CTX_LOCK(ctx);
3835300113Sscottl	IFDI_VFLR_HANDLE(ctx);
3836300113Sscottl	CTX_UNLOCK(ctx);
3837300113Sscottl}
3838300113Sscottl
3839300113Sscottlstatic int
3840300113Sscottliflib_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
3841300113Sscottl{
3842300113Sscottl	int err;
3843300113Sscottl	if_int_delay_info_t info;
3844300113Sscottl	if_ctx_t ctx;
3845300113Sscottl
3846300113Sscottl	info = (if_int_delay_info_t)arg1;
3847300113Sscottl	ctx = info->iidi_ctx;
3848300113Sscottl	info->iidi_req = req;
3849300113Sscottl	info->iidi_oidp = oidp;
3850300113Sscottl	CTX_LOCK(ctx);
3851300113Sscottl	err = IFDI_SYSCTL_INT_DELAY(ctx, info);
3852300113Sscottl	CTX_UNLOCK(ctx);
3853300113Sscottl	return (err);
3854300113Sscottl}
3855300113Sscottl
3856300113Sscottl/*********************************************************************
3857300113Sscottl *
3858300113Sscottl *  IFNET FUNCTIONS
3859300113Sscottl *
3860300113Sscottl **********************************************************************/
3861300113Sscottl
3862300113Sscottlstatic void
3863300113Sscottliflib_if_init_locked(if_ctx_t ctx)
3864300113Sscottl{
3865300113Sscottl	iflib_stop(ctx);
3866300113Sscottl	iflib_init_locked(ctx);
3867300113Sscottl}
3868300113Sscottl
3869300113Sscottl
3870300113Sscottlstatic void
3871300113Sscottliflib_if_init(void *arg)
3872300113Sscottl{
3873300113Sscottl	if_ctx_t ctx = arg;
3874300113Sscottl
3875300113Sscottl	CTX_LOCK(ctx);
3876300113Sscottl	iflib_if_init_locked(ctx);
3877300113Sscottl	CTX_UNLOCK(ctx);
3878300113Sscottl}
3879300113Sscottl
3880300113Sscottlstatic int
3881300113Sscottliflib_if_transmit(if_t ifp, struct mbuf *m)
3882300113Sscottl{
3883300113Sscottl	if_ctx_t	ctx = if_getsoftc(ifp);
3884300113Sscottl
3885300113Sscottl	iflib_txq_t txq;
3886304704Sshurd	int err, qidx;
3887300113Sscottl
3888300113Sscottl	if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) {
3889300113Sscottl		DBG_COUNTER_INC(tx_frees);
3890300113Sscottl		m_freem(m);
3891347212Serj		return (ENETDOWN);
3892300113Sscottl	}
3893300113Sscottl
3894304704Sshurd	MPASS(m->m_nextpkt == NULL);
3895300113Sscottl	qidx = 0;
3896300113Sscottl	if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m))
3897300113Sscottl		qidx = QIDX(ctx, m);
3898300113Sscottl	/*
3899300113Sscottl	 * XXX calculate buf_ring based on flowid (divvy up bits?)
3900300113Sscottl	 */
3901300113Sscottl	txq = &ctx->ifc_txqs[qidx];
3902300113Sscottl
3903300113Sscottl#ifdef DRIVER_BACKPRESSURE
3904300113Sscottl	if (txq->ift_closed) {
3905300113Sscottl		while (m != NULL) {
3906300113Sscottl			next = m->m_nextpkt;
3907300113Sscottl			m->m_nextpkt = NULL;
3908300113Sscottl			m_freem(m);
3909300113Sscottl			m = next;
3910300113Sscottl		}
3911300113Sscottl		return (ENOBUFS);
3912300113Sscottl	}
3913300113Sscottl#endif
3914304704Sshurd#ifdef notyet
3915300113Sscottl	qidx = count = 0;
3916300113Sscottl	mp = marr;
3917300113Sscottl	next = m;
3918300113Sscottl	do {
3919300113Sscottl		count++;
3920300113Sscottl		next = next->m_nextpkt;
3921300113Sscottl	} while (next != NULL);
3922300113Sscottl
3923301563Scem	if (count > nitems(marr))
3924300113Sscottl		if ((mp = malloc(count*sizeof(struct mbuf *), M_IFLIB, M_NOWAIT)) == NULL) {
3925300113Sscottl			/* XXX check nextpkt */
3926300113Sscottl			m_freem(m);
3927300113Sscottl			/* XXX simplify for now */
3928300113Sscottl			DBG_COUNTER_INC(tx_frees);
3929300113Sscottl			return (ENOBUFS);
3930300113Sscottl		}
3931300113Sscottl	for (next = m, i = 0; next != NULL; i++) {
3932300113Sscottl		mp[i] = next;
3933300113Sscottl		next = next->m_nextpkt;
3934300113Sscottl		mp[i]->m_nextpkt = NULL;
3935300113Sscottl	}
3936304704Sshurd#endif
3937300113Sscottl	DBG_COUNTER_INC(tx_seen);
3938333338Sshurd	err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE);
3939300113Sscottl
3940333338Sshurd	GROUPTASK_ENQUEUE(&txq->ift_task);
3941304704Sshurd	if (err) {
3942300113Sscottl		/* support forthcoming later */
3943300113Sscottl#ifdef DRIVER_BACKPRESSURE
3944300113Sscottl		txq->ift_closed = TRUE;
3945300113Sscottl#endif
3946333338Sshurd		ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
3947304704Sshurd		m_freem(m);
3948300113Sscottl	}
3949300113Sscottl
3950300113Sscottl	return (err);
3951300113Sscottl}
3952300113Sscottl
3953300113Sscottlstatic void
3954300113Sscottliflib_if_qflush(if_t ifp)
3955300113Sscottl{
3956300113Sscottl	if_ctx_t ctx = if_getsoftc(ifp);
3957300113Sscottl	iflib_txq_t txq = ctx->ifc_txqs;
3958300113Sscottl	int i;
3959300113Sscottl
3960347197Serj	STATE_LOCK(ctx);
3961300113Sscottl	ctx->ifc_flags |= IFC_QFLUSH;
3962347197Serj	STATE_UNLOCK(ctx);
3963300113Sscottl	for (i = 0; i < NTXQSETS(ctx); i++, txq++)
3964333338Sshurd		while (!(ifmp_ring_is_idle(txq->ift_br) || ifmp_ring_is_stalled(txq->ift_br)))
3965300113Sscottl			iflib_txq_check_drain(txq, 0);
3966347197Serj	STATE_LOCK(ctx);
3967300113Sscottl	ctx->ifc_flags &= ~IFC_QFLUSH;
3968347197Serj	STATE_UNLOCK(ctx);
3969300113Sscottl
3970300113Sscottl	if_qflush(ifp);
3971300113Sscottl}
3972300113Sscottl
3973300113Sscottl
3974304704Sshurd#define IFCAP_FLAGS (IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \
3975333338Sshurd		     IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTAGGING | IFCAP_HWSTATS | \
3976300113Sscottl		     IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO)
3977300113Sscottl
3978300113Sscottlstatic int
3979300113Sscottliflib_if_ioctl(if_t ifp, u_long command, caddr_t data)
3980300113Sscottl{
3981300113Sscottl	if_ctx_t ctx = if_getsoftc(ifp);
3982300113Sscottl	struct ifreq	*ifr = (struct ifreq *)data;
3983300113Sscottl#if defined(INET) || defined(INET6)
3984300113Sscottl	struct ifaddr	*ifa = (struct ifaddr *)data;
3985300113Sscottl#endif
3986300113Sscottl	bool		avoid_reset = FALSE;
3987300113Sscottl	int		err = 0, reinit = 0, bits;
3988300113Sscottl
3989300113Sscottl	switch (command) {
3990300113Sscottl	case SIOCSIFADDR:
3991300113Sscottl#ifdef INET
3992300113Sscottl		if (ifa->ifa_addr->sa_family == AF_INET)
3993300113Sscottl			avoid_reset = TRUE;
3994300113Sscottl#endif
3995300113Sscottl#ifdef INET6
3996300113Sscottl		if (ifa->ifa_addr->sa_family == AF_INET6)
3997300113Sscottl			avoid_reset = TRUE;
3998300113Sscottl#endif
3999300113Sscottl		/*
4000300113Sscottl		** Calling init results in link renegotiation,
4001300113Sscottl		** so we avoid doing it when possible.
4002300113Sscottl		*/
4003300113Sscottl		if (avoid_reset) {
4004300113Sscottl			if_setflagbits(ifp, IFF_UP,0);
4005338871Serj			if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
4006300113Sscottl				reinit = 1;
4007300113Sscottl#ifdef INET
4008300113Sscottl			if (!(if_getflags(ifp) & IFF_NOARP))
4009300113Sscottl				arp_ifinit(ifp, ifa);
4010300113Sscottl#endif
4011300113Sscottl		} else
4012300113Sscottl			err = ether_ioctl(ifp, command, data);
4013300113Sscottl		break;
4014300113Sscottl	case SIOCSIFMTU:
4015300113Sscottl		CTX_LOCK(ctx);
4016300113Sscottl		if (ifr->ifr_mtu == if_getmtu(ifp)) {
4017300113Sscottl			CTX_UNLOCK(ctx);
4018300113Sscottl			break;
4019300113Sscottl		}
4020300113Sscottl		bits = if_getdrvflags(ifp);
4021300113Sscottl		/* stop the driver and free any clusters before proceeding */
4022300113Sscottl		iflib_stop(ctx);
4023300113Sscottl
4024300113Sscottl		if ((err = IFDI_MTU_SET(ctx, ifr->ifr_mtu)) == 0) {
4025347197Serj			STATE_LOCK(ctx);
4026300113Sscottl			if (ifr->ifr_mtu > ctx->ifc_max_fl_buf_size)
4027300113Sscottl				ctx->ifc_flags |= IFC_MULTISEG;
4028300113Sscottl			else
4029300113Sscottl				ctx->ifc_flags &= ~IFC_MULTISEG;
4030347197Serj			STATE_UNLOCK(ctx);
4031300113Sscottl			err = if_setmtu(ifp, ifr->ifr_mtu);
4032300113Sscottl		}
4033300113Sscottl		iflib_init_locked(ctx);
4034347197Serj		STATE_LOCK(ctx);
4035300113Sscottl		if_setdrvflags(ifp, bits);
4036347197Serj		STATE_UNLOCK(ctx);
4037300113Sscottl		CTX_UNLOCK(ctx);
4038300113Sscottl		break;
4039300113Sscottl	case SIOCSIFFLAGS:
4040300113Sscottl		CTX_LOCK(ctx);
4041300113Sscottl		if (if_getflags(ifp) & IFF_UP) {
4042300113Sscottl			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
4043300113Sscottl				if ((if_getflags(ifp) ^ ctx->ifc_if_flags) &
4044300113Sscottl				    (IFF_PROMISC | IFF_ALLMULTI)) {
4045300113Sscottl					err = IFDI_PROMISC_SET(ctx, if_getflags(ifp));
4046300113Sscottl				}
4047300113Sscottl			} else
4048300113Sscottl				reinit = 1;
4049300113Sscottl		} else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
4050300113Sscottl			iflib_stop(ctx);
4051300113Sscottl		}
4052300113Sscottl		ctx->ifc_if_flags = if_getflags(ifp);
4053300113Sscottl		CTX_UNLOCK(ctx);
4054300113Sscottl		break;
4055300113Sscottl	case SIOCADDMULTI:
4056300113Sscottl	case SIOCDELMULTI:
4057300113Sscottl		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
4058300113Sscottl			CTX_LOCK(ctx);
4059300113Sscottl			IFDI_INTR_DISABLE(ctx);
4060300113Sscottl			IFDI_MULTI_SET(ctx);
4061300113Sscottl			IFDI_INTR_ENABLE(ctx);
4062300113Sscottl			CTX_UNLOCK(ctx);
4063300113Sscottl		}
4064300113Sscottl		break;
4065300113Sscottl	case SIOCSIFMEDIA:
4066300113Sscottl		CTX_LOCK(ctx);
4067300113Sscottl		IFDI_MEDIA_SET(ctx);
4068300113Sscottl		CTX_UNLOCK(ctx);
4069300113Sscottl		/* falls thru */
4070300113Sscottl	case SIOCGIFMEDIA:
4071333338Sshurd	case SIOCGIFXMEDIA:
4072300113Sscottl		err = ifmedia_ioctl(ifp, ifr, &ctx->ifc_media, command);
4073300113Sscottl		break;
4074300113Sscottl	case SIOCGI2C:
4075300113Sscottl	{
4076300113Sscottl		struct ifi2creq i2c;
4077300113Sscottl
4078332288Sbrooks		err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
4079300113Sscottl		if (err != 0)
4080300113Sscottl			break;
4081300113Sscottl		if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
4082300113Sscottl			err = EINVAL;
4083300113Sscottl			break;
4084300113Sscottl		}
4085300113Sscottl		if (i2c.len > sizeof(i2c.data)) {
4086300113Sscottl			err = EINVAL;
4087300113Sscottl			break;
4088300113Sscottl		}
4089300113Sscottl
4090300113Sscottl		if ((err = IFDI_I2C_REQ(ctx, &i2c)) == 0)
4091332288Sbrooks			err = copyout(&i2c, ifr_data_get_ptr(ifr),
4092332288Sbrooks			    sizeof(i2c));
4093300113Sscottl		break;
4094300113Sscottl	}
4095300113Sscottl	case SIOCSIFCAP:
4096300113Sscottl	{
4097300113Sscottl		int mask, setmask;
4098300113Sscottl
4099300113Sscottl		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
4100300113Sscottl		setmask = 0;
4101300113Sscottl#ifdef TCP_OFFLOAD
4102300113Sscottl		setmask |= mask & (IFCAP_TOE4|IFCAP_TOE6);
4103300113Sscottl#endif
4104300113Sscottl		setmask |= (mask & IFCAP_FLAGS);
4105300113Sscottl
4106338871Serj		if (setmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
4107333338Sshurd			setmask |= (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6);
4108300113Sscottl		if ((mask & IFCAP_WOL) &&
4109300113Sscottl		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0)
4110300113Sscottl			setmask |= (mask & (IFCAP_WOL_MCAST|IFCAP_WOL_MAGIC));
4111300113Sscottl		if_vlancap(ifp);
4112300113Sscottl		/*
4113300113Sscottl		 * want to ensure that traffic has stopped before we change any of the flags
4114300113Sscottl		 */
4115300113Sscottl		if (setmask) {
4116300113Sscottl			CTX_LOCK(ctx);
4117300113Sscottl			bits = if_getdrvflags(ifp);
4118333338Sshurd			if (bits & IFF_DRV_RUNNING)
4119300113Sscottl				iflib_stop(ctx);
4120347197Serj			STATE_LOCK(ctx);
4121300113Sscottl			if_togglecapenable(ifp, setmask);
4122347197Serj			STATE_UNLOCK(ctx);
4123333338Sshurd			if (bits & IFF_DRV_RUNNING)
4124300113Sscottl				iflib_init_locked(ctx);
4125347197Serj			STATE_LOCK(ctx);
4126300113Sscottl			if_setdrvflags(ifp, bits);
4127347197Serj			STATE_UNLOCK(ctx);
4128300113Sscottl			CTX_UNLOCK(ctx);
4129300113Sscottl		}
4130300113Sscottl		break;
4131338871Serj	}
4132300113Sscottl	case SIOCGPRIVATE_0:
4133300113Sscottl	case SIOCSDRVSPEC:
4134300113Sscottl	case SIOCGDRVSPEC:
4135300113Sscottl		CTX_LOCK(ctx);
4136300113Sscottl		err = IFDI_PRIV_IOCTL(ctx, command, data);
4137300113Sscottl		CTX_UNLOCK(ctx);
4138300113Sscottl		break;
4139300113Sscottl	default:
4140300113Sscottl		err = ether_ioctl(ifp, command, data);
4141300113Sscottl		break;
4142300113Sscottl	}
4143300113Sscottl	if (reinit)
4144300113Sscottl		iflib_if_init(ctx);
4145300113Sscottl	return (err);
4146300113Sscottl}
4147300113Sscottl
4148300113Sscottlstatic uint64_t
4149300113Sscottliflib_if_get_counter(if_t ifp, ift_counter cnt)
4150300113Sscottl{
4151300113Sscottl	if_ctx_t ctx = if_getsoftc(ifp);
4152300113Sscottl
4153300113Sscottl	return (IFDI_GET_COUNTER(ctx, cnt));
4154300113Sscottl}
4155300113Sscottl
4156300113Sscottl/*********************************************************************
4157300113Sscottl *
4158300113Sscottl *  OTHER FUNCTIONS EXPORTED TO THE STACK
4159300113Sscottl *
4160300113Sscottl **********************************************************************/
4161300113Sscottl
4162300113Sscottlstatic void
4163300113Sscottliflib_vlan_register(void *arg, if_t ifp, uint16_t vtag)
4164300113Sscottl{
4165300113Sscottl	if_ctx_t ctx = if_getsoftc(ifp);
4166300113Sscottl
4167300113Sscottl	if ((void *)ctx != arg)
4168300113Sscottl		return;
4169300113Sscottl
4170300113Sscottl	if ((vtag == 0) || (vtag > 4095))
4171300113Sscottl		return;
4172300113Sscottl
4173300113Sscottl	CTX_LOCK(ctx);
4174361055Serj	/* Driver may need all untagged packets to be flushed */
4175361055Serj	if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG))
4176361055Serj		iflib_stop(ctx);
4177300113Sscottl	IFDI_VLAN_REGISTER(ctx, vtag);
4178361055Serj	/* Re-init to load the changes, if required */
4179361055Serj	if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG))
4180361055Serj		iflib_init_locked(ctx);
4181300113Sscottl	CTX_UNLOCK(ctx);
4182300113Sscottl}
4183300113Sscottl
4184300113Sscottlstatic void
4185300113Sscottliflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag)
4186300113Sscottl{
4187300113Sscottl	if_ctx_t ctx = if_getsoftc(ifp);
4188300113Sscottl
4189300113Sscottl	if ((void *)ctx != arg)
4190300113Sscottl		return;
4191300113Sscottl
4192300113Sscottl	if ((vtag == 0) || (vtag > 4095))
4193300113Sscottl		return;
4194300113Sscottl
4195300113Sscottl	CTX_LOCK(ctx);
4196361055Serj	/* Driver may need all tagged packets to be flushed */
4197361055Serj	if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG))
4198361055Serj		iflib_stop(ctx);
4199300113Sscottl	IFDI_VLAN_UNREGISTER(ctx, vtag);
4200361055Serj	/* Re-init to load the changes, if required */
4201361055Serj	if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG))
4202361055Serj		iflib_init_locked(ctx);
4203300113Sscottl	CTX_UNLOCK(ctx);
4204300113Sscottl}
4205300113Sscottl
4206300113Sscottlstatic void
4207300113Sscottliflib_led_func(void *arg, int onoff)
4208300113Sscottl{
4209300113Sscottl	if_ctx_t ctx = arg;
4210300113Sscottl
4211300113Sscottl	CTX_LOCK(ctx);
4212300113Sscottl	IFDI_LED_FUNC(ctx, onoff);
4213300113Sscottl	CTX_UNLOCK(ctx);
4214300113Sscottl}
4215300113Sscottl
4216300113Sscottl/*********************************************************************
4217300113Sscottl *
4218300113Sscottl *  BUS FUNCTION DEFINITIONS
4219300113Sscottl *
4220300113Sscottl **********************************************************************/
4221300113Sscottl
4222300113Sscottlint
4223300113Sscottliflib_device_probe(device_t dev)
4224300113Sscottl{
4225300113Sscottl	pci_vendor_info_t *ent;
4226300113Sscottl
4227300113Sscottl	uint16_t	pci_vendor_id, pci_device_id;
4228300113Sscottl	uint16_t	pci_subvendor_id, pci_subdevice_id;
4229300113Sscottl	uint16_t	pci_rev_id;
4230300113Sscottl	if_shared_ctx_t sctx;
4231300113Sscottl
4232300113Sscottl	if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC)
4233300113Sscottl		return (ENOTSUP);
4234300113Sscottl
4235300113Sscottl	pci_vendor_id = pci_get_vendor(dev);
4236300113Sscottl	pci_device_id = pci_get_device(dev);
4237300113Sscottl	pci_subvendor_id = pci_get_subvendor(dev);
4238300113Sscottl	pci_subdevice_id = pci_get_subdevice(dev);
4239300113Sscottl	pci_rev_id = pci_get_revid(dev);
4240300113Sscottl	if (sctx->isc_parse_devinfo != NULL)
4241300113Sscottl		sctx->isc_parse_devinfo(&pci_device_id, &pci_subvendor_id, &pci_subdevice_id, &pci_rev_id);
4242300113Sscottl
4243300113Sscottl	ent = sctx->isc_vendor_info;
4244300113Sscottl	while (ent->pvi_vendor_id != 0) {
4245300113Sscottl		if (pci_vendor_id != ent->pvi_vendor_id) {
4246300113Sscottl			ent++;
4247300113Sscottl			continue;
4248300113Sscottl		}
4249300113Sscottl		if ((pci_device_id == ent->pvi_device_id) &&
4250300113Sscottl		    ((pci_subvendor_id == ent->pvi_subvendor_id) ||
4251300113Sscottl		     (ent->pvi_subvendor_id == 0)) &&
4252300113Sscottl		    ((pci_subdevice_id == ent->pvi_subdevice_id) ||
4253300113Sscottl		     (ent->pvi_subdevice_id == 0)) &&
4254300113Sscottl		    ((pci_rev_id == ent->pvi_rev_id) ||
4255300113Sscottl		     (ent->pvi_rev_id == 0))) {
4256300113Sscottl
4257300113Sscottl			device_set_desc_copy(dev, ent->pvi_name);
4258300113Sscottl			/* this needs to be changed to zero if the bus probing code
4259300113Sscottl			 * ever stops re-probing on best match because the sctx
4260300113Sscottl			 * may have its values over written by register calls
4261300113Sscottl			 * in subsequent probes
4262300113Sscottl			 */
4263300113Sscottl			return (BUS_PROBE_DEFAULT);
4264300113Sscottl		}
4265300113Sscottl		ent++;
4266300113Sscottl	}
4267300113Sscottl	return (ENXIO);
4268300113Sscottl}
4269300113Sscottl
4270300113Sscottlint
4271300113Sscottliflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp)
4272300113Sscottl{
4273300113Sscottl	int err, rid, msix, msix_bar;
4274300113Sscottl	if_ctx_t ctx;
4275300113Sscottl	if_t ifp;
4276300113Sscottl	if_softc_ctx_t scctx;
4277304704Sshurd	int i;
4278304704Sshurd	uint16_t main_txq;
4279304704Sshurd	uint16_t main_rxq;
4280300113Sscottl
4281300113Sscottl
4282300113Sscottl	ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO);
4283300113Sscottl
4284300113Sscottl	if (sc == NULL) {
4285300113Sscottl		sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO);
4286300113Sscottl		device_set_softc(dev, ctx);
4287304704Sshurd		ctx->ifc_flags |= IFC_SC_ALLOCATED;
4288300113Sscottl	}
4289300113Sscottl
4290300113Sscottl	ctx->ifc_sctx = sctx;
4291300113Sscottl	ctx->ifc_dev = dev;
4292300113Sscottl	ctx->ifc_softc = sc;
4293300113Sscottl
4294300113Sscottl	if ((err = iflib_register(ctx)) != 0) {
4295300113Sscottl		device_printf(dev, "iflib_register failed %d\n", err);
4296300113Sscottl		return (err);
4297300113Sscottl	}
4298300113Sscottl	iflib_add_device_sysctl_pre(ctx);
4299304704Sshurd
4300304704Sshurd	scctx = &ctx->ifc_softc_ctx;
4301333338Sshurd	ifp = ctx->ifc_ifp;
4302333338Sshurd	ctx->ifc_nhwtxqs = sctx->isc_ntxqs;
4303333338Sshurd
4304304704Sshurd	/*
4305304704Sshurd	 * XXX sanity check that ntxd & nrxd are a power of 2
4306304704Sshurd	 */
4307304704Sshurd	if (ctx->ifc_sysctl_ntxqs != 0)
4308304704Sshurd		scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs;
4309304704Sshurd	if (ctx->ifc_sysctl_nrxqs != 0)
4310304704Sshurd		scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs;
4311304704Sshurd
4312304704Sshurd	for (i = 0; i < sctx->isc_ntxqs; i++) {
4313304704Sshurd		if (ctx->ifc_sysctl_ntxds[i] != 0)
4314304704Sshurd			scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i];
4315304704Sshurd		else
4316304704Sshurd			scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i];
4317304704Sshurd	}
4318304704Sshurd
4319304704Sshurd	for (i = 0; i < sctx->isc_nrxqs; i++) {
4320304704Sshurd		if (ctx->ifc_sysctl_nrxds[i] != 0)
4321304704Sshurd			scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i];
4322304704Sshurd		else
4323304704Sshurd			scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i];
4324304704Sshurd	}
4325304704Sshurd
4326304704Sshurd	for (i = 0; i < sctx->isc_nrxqs; i++) {
4327304704Sshurd		if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) {
4328304704Sshurd			device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n",
4329304704Sshurd				      i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]);
4330304704Sshurd			scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i];
4331304704Sshurd		}
4332304704Sshurd		if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) {
4333304704Sshurd			device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n",
4334304704Sshurd				      i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]);
4335304704Sshurd			scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i];
4336304704Sshurd		}
4337304704Sshurd	}
4338304704Sshurd
4339304704Sshurd	for (i = 0; i < sctx->isc_ntxqs; i++) {
4340304704Sshurd		if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) {
4341304704Sshurd			device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n",
4342304704Sshurd				      i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]);
4343304704Sshurd			scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i];
4344304704Sshurd		}
4345304704Sshurd		if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) {
4346304704Sshurd			device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n",
4347304704Sshurd				      i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]);
4348304704Sshurd			scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i];
4349304704Sshurd		}
4350304704Sshurd	}
4351304704Sshurd
4352300113Sscottl	if ((err = IFDI_ATTACH_PRE(ctx)) != 0) {
4353300113Sscottl		device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err);
4354300113Sscottl		return (err);
4355300113Sscottl	}
4356333338Sshurd	_iflib_pre_assert(scctx);
4357333338Sshurd	ctx->ifc_txrx = *scctx->isc_txrx;
4358304704Sshurd
4359333338Sshurd#ifdef INVARIANTS
4360333338Sshurd	MPASS(scctx->isc_capenable);
4361333338Sshurd	if (scctx->isc_capenable & IFCAP_TXCSUM)
4362333338Sshurd		MPASS(scctx->isc_tx_csum_flags);
4363333338Sshurd#endif
4364333338Sshurd
4365333338Sshurd	if_setcapabilities(ifp, scctx->isc_capenable | IFCAP_HWSTATS);
4366333338Sshurd	if_setcapenable(ifp, scctx->isc_capenable | IFCAP_HWSTATS);
4367333338Sshurd
4368333338Sshurd	if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets))
4369333338Sshurd		scctx->isc_ntxqsets = scctx->isc_ntxqsets_max;
4370333338Sshurd	if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets))
4371333338Sshurd		scctx->isc_nrxqsets = scctx->isc_nrxqsets_max;
4372333338Sshurd
4373300113Sscottl#ifdef ACPI_DMAR
4374300113Sscottl	if (dmar_get_dma_tag(device_get_parent(dev), dev) != NULL)
4375300113Sscottl		ctx->ifc_flags |= IFC_DMAR;
4376333338Sshurd#elif !(defined(__i386__) || defined(__amd64__))
4377333338Sshurd	/* set unconditionally for !x86 */
4378333338Sshurd	ctx->ifc_flags |= IFC_DMAR;
4379300113Sscottl#endif
4380300113Sscottl
4381300113Sscottl	msix_bar = scctx->isc_msix_bar;
4382333338Sshurd	main_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0;
4383333338Sshurd	main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0;
4384300113Sscottl
4385304704Sshurd	/* XXX change for per-queue sizes */
4386304704Sshurd	device_printf(dev, "using %d tx descriptors and %d rx descriptors\n",
4387304704Sshurd		      scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]);
4388304704Sshurd	for (i = 0; i < sctx->isc_nrxqs; i++) {
4389304704Sshurd		if (!powerof2(scctx->isc_nrxd[i])) {
4390304704Sshurd			/* round down instead? */
4391304704Sshurd			device_printf(dev, "# rx descriptors must be a power of 2\n");
4392304704Sshurd			err = EINVAL;
4393304704Sshurd			goto fail;
4394304704Sshurd		}
4395304704Sshurd	}
4396304704Sshurd	for (i = 0; i < sctx->isc_ntxqs; i++) {
4397304704Sshurd		if (!powerof2(scctx->isc_ntxd[i])) {
4398304704Sshurd			device_printf(dev,
4399304704Sshurd			    "# tx descriptors must be a power of 2");
4400304704Sshurd			err = EINVAL;
4401304704Sshurd			goto fail;
4402304704Sshurd		}
4403304704Sshurd	}
4404304704Sshurd
4405304704Sshurd	if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] /
4406304704Sshurd	    MAX_SINGLE_PACKET_FRACTION)
4407304704Sshurd		scctx->isc_tx_nsegments = max(1, scctx->isc_ntxd[main_txq] /
4408304704Sshurd		    MAX_SINGLE_PACKET_FRACTION);
4409304704Sshurd	if (scctx->isc_tx_tso_segments_max > scctx->isc_ntxd[main_txq] /
4410304704Sshurd	    MAX_SINGLE_PACKET_FRACTION)
4411304704Sshurd		scctx->isc_tx_tso_segments_max = max(1,
4412304704Sshurd		    scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION);
4413304704Sshurd
4414300113Sscottl	/*
4415300113Sscottl	 * Protect the stack against modern hardware
4416300113Sscottl	 */
4417300113Sscottl	if (scctx->isc_tx_tso_size_max > FREEBSD_TSO_SIZE_MAX)
4418300113Sscottl		scctx->isc_tx_tso_size_max = FREEBSD_TSO_SIZE_MAX;
4419300113Sscottl
4420300113Sscottl	/* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */
4421300113Sscottl	ifp->if_hw_tsomaxsegcount = scctx->isc_tx_tso_segments_max;
4422300113Sscottl	ifp->if_hw_tsomax = scctx->isc_tx_tso_size_max;
4423300113Sscottl	ifp->if_hw_tsomaxsegsize = scctx->isc_tx_tso_segsize_max;
4424300113Sscottl	if (scctx->isc_rss_table_size == 0)
4425300113Sscottl		scctx->isc_rss_table_size = 64;
4426304704Sshurd	scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1;
4427333338Sshurd
4428333338Sshurd	GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx);
4429333338Sshurd	/* XXX format name */
4430333338Sshurd	taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin");
4431333338Sshurd
4432333338Sshurd	/* Set up cpu set.  If it fails, use the set of all CPUs. */
4433333338Sshurd	if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) != 0) {
4434333338Sshurd		device_printf(dev, "Unable to fetch CPU list\n");
4435333338Sshurd		CPU_COPY(&all_cpus, &ctx->ifc_cpus);
4436333338Sshurd	}
4437333338Sshurd	MPASS(CPU_COUNT(&ctx->ifc_cpus) > 0);
4438333338Sshurd
4439300113Sscottl	/*
4440300113Sscottl	** Now setup MSI or MSI/X, should
4441300113Sscottl	** return us the number of supported
4442300113Sscottl	** vectors. (Will be 1 for MSI)
4443300113Sscottl	*/
4444300113Sscottl	if (sctx->isc_flags & IFLIB_SKIP_MSIX) {
4445300113Sscottl		msix = scctx->isc_vectors;
4446300113Sscottl	} else if (scctx->isc_msix_bar != 0)
4447333338Sshurd	       /*
4448333338Sshurd		* The simple fact that isc_msix_bar is not 0 does not mean we
4449333338Sshurd		* we have a good value there that is known to work.
4450333338Sshurd		*/
4451300113Sscottl		msix = iflib_msix_init(ctx);
4452300113Sscottl	else {
4453300113Sscottl		scctx->isc_vectors = 1;
4454300113Sscottl		scctx->isc_ntxqsets = 1;
4455300113Sscottl		scctx->isc_nrxqsets = 1;
4456300113Sscottl		scctx->isc_intr = IFLIB_INTR_LEGACY;
4457300113Sscottl		msix = 0;
4458300113Sscottl	}
4459300113Sscottl	/* Get memory for the station queues */
4460300113Sscottl	if ((err = iflib_queues_alloc(ctx))) {
4461300113Sscottl		device_printf(dev, "Unable to allocate queue memory\n");
4462300113Sscottl		goto fail;
4463300113Sscottl	}
4464300113Sscottl
4465333503Sshurd	if ((err = iflib_qset_structures_setup(ctx)))
4466300113Sscottl		goto fail_queues;
4467333338Sshurd	/*
4468333338Sshurd	 * Group taskqueues aren't properly set up until SMP is started,
4469333338Sshurd	 * so we disable interrupts until we can handle them post
4470333338Sshurd	 * SI_SUB_SMP.
4471333338Sshurd	 *
4472333338Sshurd	 * XXX: disabling interrupts doesn't actually work, at least for
4473333338Sshurd	 * the non-MSI case.  When they occur before SI_SUB_SMP completes,
4474333338Sshurd	 * we do null handling and depend on this not causing too large an
4475333338Sshurd	 * interrupt storm.
4476333338Sshurd	 */
4477333338Sshurd	IFDI_INTR_DISABLE(ctx);
4478300113Sscottl	if (msix > 1 && (err = IFDI_MSIX_INTR_ASSIGN(ctx, msix)) != 0) {
4479300113Sscottl		device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", err);
4480300113Sscottl		goto fail_intr_free;
4481300113Sscottl	}
4482300113Sscottl	if (msix <= 1) {
4483300113Sscottl		rid = 0;
4484300113Sscottl		if (scctx->isc_intr == IFLIB_INTR_MSI) {
4485300113Sscottl			MPASS(msix == 1);
4486300113Sscottl			rid = 1;
4487300113Sscottl		}
4488304704Sshurd		if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) {
4489300113Sscottl			device_printf(dev, "iflib_legacy_setup failed %d\n", err);
4490300113Sscottl			goto fail_intr_free;
4491300113Sscottl		}
4492300113Sscottl	}
4493300113Sscottl	ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac);
4494300113Sscottl	if ((err = IFDI_ATTACH_POST(ctx)) != 0) {
4495300113Sscottl		device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err);
4496300113Sscottl		goto fail_detach;
4497300113Sscottl	}
4498300113Sscottl	if ((err = iflib_netmap_attach(ctx))) {
4499300113Sscottl		device_printf(ctx->ifc_dev, "netmap attach failed: %d\n", err);
4500300113Sscottl		goto fail_detach;
4501300113Sscottl	}
4502300113Sscottl	*ctxp = ctx;
4503300113Sscottl
4504304704Sshurd	if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter);
4505300113Sscottl	iflib_add_device_sysctl_post(ctx);
4506300113Sscottl	return (0);
4507300113Sscottlfail_detach:
4508300113Sscottl	ether_ifdetach(ctx->ifc_ifp);
4509300113Sscottlfail_intr_free:
4510300113Sscottl	if (scctx->isc_intr == IFLIB_INTR_MSIX || scctx->isc_intr == IFLIB_INTR_MSI)
4511300113Sscottl		pci_release_msi(ctx->ifc_dev);
4512300113Sscottlfail_queues:
4513333503Sshurd	iflib_tx_structures_free(ctx);
4514333503Sshurd	iflib_rx_structures_free(ctx);
4515300113Sscottlfail:
4516300113Sscottl	IFDI_DETACH(ctx);
4517300113Sscottl	return (err);
4518300113Sscottl}
4519300113Sscottl
4520300113Sscottlint
4521300113Sscottliflib_device_attach(device_t dev)
4522300113Sscottl{
4523300113Sscottl	if_ctx_t ctx;
4524300113Sscottl	if_shared_ctx_t sctx;
4525300113Sscottl
4526300113Sscottl	if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC)
4527300113Sscottl		return (ENOTSUP);
4528300113Sscottl
4529300113Sscottl	pci_enable_busmaster(dev);
4530300113Sscottl
4531300113Sscottl	return (iflib_device_register(dev, NULL, sctx, &ctx));
4532300113Sscottl}
4533300113Sscottl
4534300113Sscottlint
4535300113Sscottliflib_device_deregister(if_ctx_t ctx)
4536300113Sscottl{
4537300113Sscottl	if_t ifp = ctx->ifc_ifp;
4538300113Sscottl	iflib_txq_t txq;
4539300113Sscottl	iflib_rxq_t rxq;
4540300113Sscottl	device_t dev = ctx->ifc_dev;
4541333338Sshurd	int i, j;
4542300113Sscottl	struct taskqgroup *tqg;
4543333338Sshurd	iflib_fl_t fl;
4544300113Sscottl
4545300113Sscottl	/* Make sure VLANS are not using driver */
4546300113Sscottl	if (if_vlantrunkinuse(ifp)) {
4547347197Serj		device_printf(dev, "Vlan in use, detach first\n");
4548300113Sscottl		return (EBUSY);
4549300113Sscottl	}
4550347197Serj#ifdef PCI_IOV
4551347197Serj	if (!CTX_IS_VF(ctx) && pci_iov_detach(dev) != 0) {
4552347197Serj		device_printf(dev, "SR-IOV in use; detach first.\n");
4553347197Serj		return (EBUSY);
4554347197Serj	}
4555347197Serj#endif
4556300113Sscottl
4557347197Serj	STATE_LOCK(ctx);
4558347197Serj	ctx->ifc_flags |= IFC_IN_DETACH;
4559347197Serj	STATE_UNLOCK(ctx);
4560347197Serj
4561361057Serj	/* Unregister VLAN handlers before calling iflib_stop() */
4562361057Serj	iflib_unregister_vlan_handlers(ctx);
4563361057Serj
4564361057Serj	iflib_netmap_detach(ifp);
4565361057Serj	ether_ifdetach(ifp);
4566361057Serj
4567300113Sscottl	CTX_LOCK(ctx);
4568300113Sscottl	iflib_stop(ctx);
4569300113Sscottl	CTX_UNLOCK(ctx);
4570300113Sscottl
4571300113Sscottl	if (ctx->ifc_led_dev != NULL)
4572300113Sscottl		led_destroy(ctx->ifc_led_dev);
4573300113Sscottl	/* XXX drain any dependent tasks */
4574333338Sshurd	tqg = qgroup_if_io_tqg;
4575304704Sshurd	for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) {
4576300113Sscottl		callout_drain(&txq->ift_timer);
4577300113Sscottl		if (txq->ift_task.gt_uniq != NULL)
4578300113Sscottl			taskqgroup_detach(tqg, &txq->ift_task);
4579300113Sscottl	}
4580300113Sscottl	for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) {
4581358272Shselasky		callout_drain(&rxq->ifr_watchdog);
4582300113Sscottl		if (rxq->ifr_task.gt_uniq != NULL)
4583300113Sscottl			taskqgroup_detach(tqg, &rxq->ifr_task);
4584333338Sshurd
4585333338Sshurd		for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
4586333338Sshurd			free(fl->ifl_rx_bitmap, M_IFLIB);
4587333338Sshurd
4588300113Sscottl	}
4589300113Sscottl	tqg = qgroup_if_config_tqg;
4590300113Sscottl	if (ctx->ifc_admin_task.gt_uniq != NULL)
4591300113Sscottl		taskqgroup_detach(tqg, &ctx->ifc_admin_task);
4592300113Sscottl	if (ctx->ifc_vflr_task.gt_uniq != NULL)
4593300113Sscottl		taskqgroup_detach(tqg, &ctx->ifc_vflr_task);
4594347197Serj	CTX_LOCK(ctx);
4595347197Serj	IFDI_DETACH(ctx);
4596347197Serj	CTX_UNLOCK(ctx);
4597300113Sscottl
4598347197Serj	/* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/
4599347197Serj	CTX_LOCK_DESTROY(ctx);
4600304704Sshurd	device_set_softc(ctx->ifc_dev, NULL);
4601347197Serj	iflib_free_intr_mem(ctx);
4602300113Sscottl
4603300113Sscottl	bus_generic_detach(dev);
4604300113Sscottl	if_free(ifp);
4605300113Sscottl
4606300113Sscottl	iflib_tx_structures_free(ctx);
4607300113Sscottl	iflib_rx_structures_free(ctx);
4608304704Sshurd	if (ctx->ifc_flags & IFC_SC_ALLOCATED)
4609304704Sshurd		free(ctx->ifc_softc, M_IFLIB);
4610347197Serj	STATE_LOCK_DESTROY(ctx);
4611304704Sshurd	free(ctx, M_IFLIB);
4612300113Sscottl	return (0);
4613300113Sscottl}
4614300113Sscottl
4615347197Serjstatic void
4616347197Serjiflib_free_intr_mem(if_ctx_t ctx)
4617347197Serj{
4618300113Sscottl
4619347197Serj	if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_MSIX) {
4620347197Serj		iflib_irq_free(ctx, &ctx->ifc_legacy_irq);
4621347197Serj	}
4622347197Serj	if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) {
4623347197Serj		pci_release_msi(ctx->ifc_dev);
4624347197Serj	}
4625347197Serj	if (ctx->ifc_msix_mem != NULL) {
4626347197Serj		bus_release_resource(ctx->ifc_dev, SYS_RES_MEMORY,
4627347197Serj		    rman_get_rid(ctx->ifc_msix_mem), ctx->ifc_msix_mem);
4628347197Serj		ctx->ifc_msix_mem = NULL;
4629347197Serj	}
4630347197Serj}
4631347197Serj
4632300113Sscottlint
4633300113Sscottliflib_device_detach(device_t dev)
4634300113Sscottl{
4635300113Sscottl	if_ctx_t ctx = device_get_softc(dev);
4636300113Sscottl
4637300113Sscottl	return (iflib_device_deregister(ctx));
4638300113Sscottl}
4639300113Sscottl
4640300113Sscottlint
4641300113Sscottliflib_device_suspend(device_t dev)
4642300113Sscottl{
4643300113Sscottl	if_ctx_t ctx = device_get_softc(dev);
4644300113Sscottl
4645300113Sscottl	CTX_LOCK(ctx);
4646300113Sscottl	IFDI_SUSPEND(ctx);
4647300113Sscottl	CTX_UNLOCK(ctx);
4648300113Sscottl
4649300113Sscottl	return bus_generic_suspend(dev);
4650300113Sscottl}
4651300113Sscottlint
4652300113Sscottliflib_device_shutdown(device_t dev)
4653300113Sscottl{
4654300113Sscottl	if_ctx_t ctx = device_get_softc(dev);
4655300113Sscottl
4656300113Sscottl	CTX_LOCK(ctx);
4657300113Sscottl	IFDI_SHUTDOWN(ctx);
4658300113Sscottl	CTX_UNLOCK(ctx);
4659300113Sscottl
4660300113Sscottl	return bus_generic_suspend(dev);
4661300113Sscottl}
4662300113Sscottl
4663300113Sscottl
4664300113Sscottlint
4665300113Sscottliflib_device_resume(device_t dev)
4666300113Sscottl{
4667300113Sscottl	if_ctx_t ctx = device_get_softc(dev);
4668300113Sscottl	iflib_txq_t txq = ctx->ifc_txqs;
4669300113Sscottl
4670300113Sscottl	CTX_LOCK(ctx);
4671300113Sscottl	IFDI_RESUME(ctx);
4672343099Sshurd	iflib_if_init_locked(ctx);
4673300113Sscottl	CTX_UNLOCK(ctx);
4674300113Sscottl	for (int i = 0; i < NTXQSETS(ctx); i++, txq++)
4675300113Sscottl		iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET);
4676300113Sscottl
4677300113Sscottl	return (bus_generic_resume(dev));
4678300113Sscottl}
4679300113Sscottl
4680300113Sscottlint
4681300113Sscottliflib_device_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *params)
4682300113Sscottl{
4683300113Sscottl	int error;
4684300113Sscottl	if_ctx_t ctx = device_get_softc(dev);
4685300113Sscottl
4686300113Sscottl	CTX_LOCK(ctx);
4687300113Sscottl	error = IFDI_IOV_INIT(ctx, num_vfs, params);
4688300113Sscottl	CTX_UNLOCK(ctx);
4689300113Sscottl
4690300113Sscottl	return (error);
4691300113Sscottl}
4692300113Sscottl
4693300113Sscottlvoid
4694300113Sscottliflib_device_iov_uninit(device_t dev)
4695300113Sscottl{
4696300113Sscottl	if_ctx_t ctx = device_get_softc(dev);
4697300113Sscottl
4698300113Sscottl	CTX_LOCK(ctx);
4699300113Sscottl	IFDI_IOV_UNINIT(ctx);
4700300113Sscottl	CTX_UNLOCK(ctx);
4701300113Sscottl}
4702300113Sscottl
4703300113Sscottlint
4704300113Sscottliflib_device_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params)
4705300113Sscottl{
4706300113Sscottl	int error;
4707300113Sscottl	if_ctx_t ctx = device_get_softc(dev);
4708300113Sscottl
4709300113Sscottl	CTX_LOCK(ctx);
4710300113Sscottl	error = IFDI_IOV_VF_ADD(ctx, vfnum, params);
4711300113Sscottl	CTX_UNLOCK(ctx);
4712300113Sscottl
4713300113Sscottl	return (error);
4714300113Sscottl}
4715300113Sscottl
4716300113Sscottl/*********************************************************************
4717300113Sscottl *
4718300113Sscottl *  MODULE FUNCTION DEFINITIONS
4719300113Sscottl *
4720300113Sscottl **********************************************************************/
4721300113Sscottl
4722300113Sscottl/*
4723300113Sscottl * - Start a fast taskqueue thread for each core
4724300113Sscottl * - Start a taskqueue for control operations
4725300113Sscottl */
4726300113Sscottlstatic int
4727300113Sscottliflib_module_init(void)
4728300113Sscottl{
4729300113Sscottl	return (0);
4730300113Sscottl}
4731300113Sscottl
4732300113Sscottlstatic int
4733300113Sscottliflib_module_event_handler(module_t mod, int what, void *arg)
4734300113Sscottl{
4735300113Sscottl	int err;
4736300113Sscottl
4737300113Sscottl	switch (what) {
4738300113Sscottl	case MOD_LOAD:
4739300113Sscottl		if ((err = iflib_module_init()) != 0)
4740300113Sscottl			return (err);
4741300113Sscottl		break;
4742300113Sscottl	case MOD_UNLOAD:
4743300113Sscottl		return (EBUSY);
4744300113Sscottl	default:
4745300113Sscottl		return (EOPNOTSUPP);
4746300113Sscottl	}
4747300113Sscottl
4748300113Sscottl	return (0);
4749300113Sscottl}
4750300113Sscottl
4751300113Sscottl/*********************************************************************
4752300113Sscottl *
4753300113Sscottl *  PUBLIC FUNCTION DEFINITIONS
4754300113Sscottl *     ordered as in iflib.h
4755300113Sscottl *
4756300113Sscottl **********************************************************************/
4757300113Sscottl
4758300113Sscottl
4759300113Sscottlstatic void
4760300113Sscottl_iflib_assert(if_shared_ctx_t sctx)
4761300113Sscottl{
4762300113Sscottl	MPASS(sctx->isc_tx_maxsize);
4763300113Sscottl	MPASS(sctx->isc_tx_maxsegsize);
4764300113Sscottl
4765300113Sscottl	MPASS(sctx->isc_rx_maxsize);
4766300113Sscottl	MPASS(sctx->isc_rx_nsegments);
4767300113Sscottl	MPASS(sctx->isc_rx_maxsegsize);
4768300113Sscottl
4769304704Sshurd	MPASS(sctx->isc_nrxd_min[0]);
4770304704Sshurd	MPASS(sctx->isc_nrxd_max[0]);
4771304704Sshurd	MPASS(sctx->isc_nrxd_default[0]);
4772304704Sshurd	MPASS(sctx->isc_ntxd_min[0]);
4773304704Sshurd	MPASS(sctx->isc_ntxd_max[0]);
4774304704Sshurd	MPASS(sctx->isc_ntxd_default[0]);
4775300113Sscottl}
4776300113Sscottl
4777333338Sshurdstatic void
4778333338Sshurd_iflib_pre_assert(if_softc_ctx_t scctx)
4779333338Sshurd{
4780333338Sshurd
4781333338Sshurd	MPASS(scctx->isc_txrx->ift_txd_encap);
4782333338Sshurd	MPASS(scctx->isc_txrx->ift_txd_flush);
4783333338Sshurd	MPASS(scctx->isc_txrx->ift_txd_credits_update);
4784333338Sshurd	MPASS(scctx->isc_txrx->ift_rxd_available);
4785333338Sshurd	MPASS(scctx->isc_txrx->ift_rxd_pkt_get);
4786333338Sshurd	MPASS(scctx->isc_txrx->ift_rxd_refill);
4787333338Sshurd	MPASS(scctx->isc_txrx->ift_rxd_flush);
4788333338Sshurd}
4789333338Sshurd
4790300113Sscottlstatic int
4791300113Sscottliflib_register(if_ctx_t ctx)
4792300113Sscottl{
4793300113Sscottl	if_shared_ctx_t sctx = ctx->ifc_sctx;
4794300113Sscottl	driver_t *driver = sctx->isc_driver;
4795300113Sscottl	device_t dev = ctx->ifc_dev;
4796300113Sscottl	if_t ifp;
4797300113Sscottl
4798300113Sscottl	_iflib_assert(sctx);
4799300113Sscottl
4800300113Sscottl	CTX_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev));
4801352091Serj	STATE_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev));
4802300113Sscottl
4803347197Serj	ifp = ctx->ifc_ifp = if_alloc(IFT_ETHER);
4804300113Sscottl	if (ifp == NULL) {
4805300113Sscottl		device_printf(dev, "can not allocate ifnet structure\n");
4806300113Sscottl		return (ENOMEM);
4807300113Sscottl	}
4808300113Sscottl
4809300113Sscottl	/*
4810300113Sscottl	 * Initialize our context's device specific methods
4811300113Sscottl	 */
4812300113Sscottl	kobj_init((kobj_t) ctx, (kobj_class_t) driver);
4813300113Sscottl	kobj_class_compile((kobj_class_t) driver);
4814300113Sscottl	driver->refs++;
4815300113Sscottl
4816300113Sscottl	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4817300113Sscottl	if_setsoftc(ifp, ctx);
4818300113Sscottl	if_setdev(ifp, dev);
4819300113Sscottl	if_setinitfn(ifp, iflib_if_init);
4820300113Sscottl	if_setioctlfn(ifp, iflib_if_ioctl);
4821300113Sscottl	if_settransmitfn(ifp, iflib_if_transmit);
4822300113Sscottl	if_setqflushfn(ifp, iflib_if_qflush);
4823300113Sscottl	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
4824300113Sscottl
4825300113Sscottl	ctx->ifc_vlan_attach_event =
4826300113Sscottl		EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx,
4827300113Sscottl							  EVENTHANDLER_PRI_FIRST);
4828300113Sscottl	ctx->ifc_vlan_detach_event =
4829300113Sscottl		EVENTHANDLER_REGISTER(vlan_unconfig, iflib_vlan_unregister, ctx,
4830300113Sscottl							  EVENTHANDLER_PRI_FIRST);
4831300113Sscottl
4832300113Sscottl	ifmedia_init(&ctx->ifc_media, IFM_IMASK,
4833300113Sscottl					 iflib_media_change, iflib_media_status);
4834300113Sscottl
4835300113Sscottl	return (0);
4836300113Sscottl}
4837300113Sscottl
4838361057Serjstatic void
4839361057Serjiflib_unregister_vlan_handlers(if_ctx_t ctx)
4840361057Serj{
4841361057Serj	/* Unregister VLAN events */
4842361057Serj	if (ctx->ifc_vlan_attach_event != NULL) {
4843361057Serj		EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event);
4844361057Serj		ctx->ifc_vlan_attach_event = NULL;
4845361057Serj	}
4846361057Serj	if (ctx->ifc_vlan_detach_event != NULL) {
4847361057Serj		EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event);
4848361057Serj		ctx->ifc_vlan_detach_event = NULL;
4849361057Serj	}
4850300113Sscottl
4851361057Serj}
4852361057Serj
4853300113Sscottlstatic int
4854300113Sscottliflib_queues_alloc(if_ctx_t ctx)
4855300113Sscottl{
4856300113Sscottl	if_shared_ctx_t sctx = ctx->ifc_sctx;
4857304704Sshurd	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
4858300113Sscottl	device_t dev = ctx->ifc_dev;
4859304704Sshurd	int nrxqsets = scctx->isc_nrxqsets;
4860304704Sshurd	int ntxqsets = scctx->isc_ntxqsets;
4861300113Sscottl	iflib_txq_t txq;
4862300113Sscottl	iflib_rxq_t rxq;
4863300113Sscottl	iflib_fl_t fl = NULL;
4864304704Sshurd	int i, j, cpu, err, txconf, rxconf;
4865300113Sscottl	iflib_dma_info_t ifdip;
4866304704Sshurd	uint32_t *rxqsizes = scctx->isc_rxqsizes;
4867304704Sshurd	uint32_t *txqsizes = scctx->isc_txqsizes;
4868300113Sscottl	uint8_t nrxqs = sctx->isc_nrxqs;
4869300113Sscottl	uint8_t ntxqs = sctx->isc_ntxqs;
4870300113Sscottl	int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1;
4871300113Sscottl	caddr_t *vaddrs;
4872300113Sscottl	uint64_t *paddrs;
4873300113Sscottl
4874304704Sshurd	KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1"));
4875304704Sshurd	KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1"));
4876300113Sscottl
4877338871Serj	/* Allocate the TX ring struct memory */
4878333341Sshurd	if (!(ctx->ifc_txqs =
4879300113Sscottl	    (iflib_txq_t) malloc(sizeof(struct iflib_txq) *
4880300113Sscottl	    ntxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) {
4881300113Sscottl		device_printf(dev, "Unable to allocate TX ring memory\n");
4882300113Sscottl		err = ENOMEM;
4883300113Sscottl		goto fail;
4884300113Sscottl	}
4885300113Sscottl
4886300113Sscottl	/* Now allocate the RX */
4887333341Sshurd	if (!(ctx->ifc_rxqs =
4888300113Sscottl	    (iflib_rxq_t) malloc(sizeof(struct iflib_rxq) *
4889300113Sscottl	    nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) {
4890300113Sscottl		device_printf(dev, "Unable to allocate RX ring memory\n");
4891300113Sscottl		err = ENOMEM;
4892300113Sscottl		goto rx_fail;
4893300113Sscottl	}
4894300113Sscottl
4895333341Sshurd	txq = ctx->ifc_txqs;
4896333341Sshurd	rxq = ctx->ifc_rxqs;
4897300113Sscottl
4898300113Sscottl	/*
4899300113Sscottl	 * XXX handle allocation failure
4900300113Sscottl	 */
4901302372Snwhitehorn	for (txconf = i = 0, cpu = CPU_FIRST(); i < ntxqsets; i++, txconf++, txq++, cpu = CPU_NEXT(cpu)) {
4902300113Sscottl		/* Set up some basics */
4903300113Sscottl
4904300113Sscottl		if ((ifdip = malloc(sizeof(struct iflib_dma_info) * ntxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) {
4905300113Sscottl			device_printf(dev, "failed to allocate iflib_dma_info\n");
4906300113Sscottl			err = ENOMEM;
4907301567Scem			goto err_tx_desc;
4908300113Sscottl		}
4909300113Sscottl		txq->ift_ifdi = ifdip;
4910300113Sscottl		for (j = 0; j < ntxqs; j++, ifdip++) {
4911300113Sscottl			if (iflib_dma_alloc(ctx, txqsizes[j], ifdip, BUS_DMA_NOWAIT)) {
4912300113Sscottl				device_printf(dev, "Unable to allocate Descriptor memory\n");
4913300113Sscottl				err = ENOMEM;
4914300113Sscottl				goto err_tx_desc;
4915300113Sscottl			}
4916333338Sshurd			txq->ift_txd_size[j] = scctx->isc_txd_size[j];
4917300113Sscottl			bzero((void *)ifdip->idi_vaddr, txqsizes[j]);
4918300113Sscottl		}
4919300113Sscottl		txq->ift_ctx = ctx;
4920300113Sscottl		txq->ift_id = i;
4921304704Sshurd		if (sctx->isc_flags & IFLIB_HAS_TXCQ) {
4922304704Sshurd			txq->ift_br_offset = 1;
4923304704Sshurd		} else {
4924304704Sshurd			txq->ift_br_offset = 0;
4925304704Sshurd		}
4926300113Sscottl		/* XXX fix this */
4927302372Snwhitehorn		txq->ift_timer.c_cpu = cpu;
4928300113Sscottl
4929300113Sscottl		if (iflib_txsd_alloc(txq)) {
4930300113Sscottl			device_printf(dev, "Critical Failure setting up TX buffers\n");
4931300113Sscottl			err = ENOMEM;
4932300113Sscottl			goto err_tx_desc;
4933300113Sscottl		}
4934300113Sscottl
4935300113Sscottl		/* Initialize the TX lock */
4936300113Sscottl		snprintf(txq->ift_mtx_name, MTX_NAME_LEN, "%s:tx(%d):callout",
4937300113Sscottl		    device_get_nameunit(dev), txq->ift_id);
4938300113Sscottl		mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF);
4939300113Sscottl		callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0);
4940300113Sscottl
4941300113Sscottl		snprintf(txq->ift_db_mtx_name, MTX_NAME_LEN, "%s:tx(%d):db",
4942300113Sscottl			 device_get_nameunit(dev), txq->ift_id);
4943300113Sscottl
4944333338Sshurd		err = ifmp_ring_alloc(&txq->ift_br, 2048, txq, iflib_txq_drain,
4945333338Sshurd				      iflib_txq_can_drain, M_IFLIB, M_WAITOK);
4946333338Sshurd		if (err) {
4947333338Sshurd			/* XXX free any allocated rings */
4948333338Sshurd			device_printf(dev, "Unable to allocate buf_ring\n");
4949333338Sshurd			goto err_tx_desc;
4950300113Sscottl		}
4951300113Sscottl	}
4952300113Sscottl
4953300113Sscottl	for (rxconf = i = 0; i < nrxqsets; i++, rxconf++, rxq++) {
4954300113Sscottl		/* Set up some basics */
4955358272Shselasky		callout_init(&rxq->ifr_watchdog, 1);
4956300113Sscottl
4957300113Sscottl		if ((ifdip = malloc(sizeof(struct iflib_dma_info) * nrxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) {
4958300113Sscottl			device_printf(dev, "failed to allocate iflib_dma_info\n");
4959300113Sscottl			err = ENOMEM;
4960301567Scem			goto err_tx_desc;
4961300113Sscottl		}
4962300113Sscottl
4963300113Sscottl		rxq->ifr_ifdi = ifdip;
4964333338Sshurd		/* XXX this needs to be changed if #rx queues != #tx queues */
4965333338Sshurd		rxq->ifr_ntxqirq = 1;
4966333338Sshurd		rxq->ifr_txqid[0] = i;
4967300113Sscottl		for (j = 0; j < nrxqs; j++, ifdip++) {
4968300113Sscottl			if (iflib_dma_alloc(ctx, rxqsizes[j], ifdip, BUS_DMA_NOWAIT)) {
4969300113Sscottl				device_printf(dev, "Unable to allocate Descriptor memory\n");
4970300113Sscottl				err = ENOMEM;
4971300113Sscottl				goto err_tx_desc;
4972300113Sscottl			}
4973300113Sscottl			bzero((void *)ifdip->idi_vaddr, rxqsizes[j]);
4974300113Sscottl		}
4975300113Sscottl		rxq->ifr_ctx = ctx;
4976300113Sscottl		rxq->ifr_id = i;
4977304704Sshurd		if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
4978304704Sshurd			rxq->ifr_fl_offset = 1;
4979300113Sscottl		} else {
4980304704Sshurd			rxq->ifr_fl_offset = 0;
4981300113Sscottl		}
4982300113Sscottl		rxq->ifr_nfl = nfree_lists;
4983300113Sscottl		if (!(fl =
4984300113Sscottl			  (iflib_fl_t) malloc(sizeof(struct iflib_fl) * nfree_lists, M_IFLIB, M_NOWAIT | M_ZERO))) {
4985300113Sscottl			device_printf(dev, "Unable to allocate free list memory\n");
4986300113Sscottl			err = ENOMEM;
4987301567Scem			goto err_tx_desc;
4988300113Sscottl		}
4989300113Sscottl		rxq->ifr_fl = fl;
4990300113Sscottl		for (j = 0; j < nfree_lists; j++) {
4991333338Sshurd			fl[j].ifl_rxq = rxq;
4992333338Sshurd			fl[j].ifl_id = j;
4993333338Sshurd			fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + rxq->ifr_fl_offset];
4994333338Sshurd			fl[j].ifl_rxd_size = scctx->isc_rxd_size[j];
4995300113Sscottl		}
4996347197Serj		/* Allocate receive buffers for the ring*/
4997300113Sscottl		if (iflib_rxsd_alloc(rxq)) {
4998300113Sscottl			device_printf(dev,
4999300113Sscottl			    "Critical Failure setting up receive buffers\n");
5000300113Sscottl			err = ENOMEM;
5001300113Sscottl			goto err_rx_desc;
5002300113Sscottl		}
5003333338Sshurd
5004333338Sshurd		for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
5005333338Sshurd			fl->ifl_rx_bitmap = bit_alloc(fl->ifl_size, M_IFLIB, M_WAITOK|M_ZERO);
5006300113Sscottl	}
5007300113Sscottl
5008300113Sscottl	/* TXQs */
5009300113Sscottl	vaddrs = malloc(sizeof(caddr_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK);
5010300113Sscottl	paddrs = malloc(sizeof(uint64_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK);
5011300113Sscottl	for (i = 0; i < ntxqsets; i++) {
5012300113Sscottl		iflib_dma_info_t di = ctx->ifc_txqs[i].ift_ifdi;
5013300113Sscottl
5014300113Sscottl		for (j = 0; j < ntxqs; j++, di++) {
5015300113Sscottl			vaddrs[i*ntxqs + j] = di->idi_vaddr;
5016300113Sscottl			paddrs[i*ntxqs + j] = di->idi_paddr;
5017300113Sscottl		}
5018300113Sscottl	}
5019300113Sscottl	if ((err = IFDI_TX_QUEUES_ALLOC(ctx, vaddrs, paddrs, ntxqs, ntxqsets)) != 0) {
5020300113Sscottl		device_printf(ctx->ifc_dev, "device queue allocation failed\n");
5021300113Sscottl		iflib_tx_structures_free(ctx);
5022300113Sscottl		free(vaddrs, M_IFLIB);
5023300113Sscottl		free(paddrs, M_IFLIB);
5024300113Sscottl		goto err_rx_desc;
5025300113Sscottl	}
5026300113Sscottl	free(vaddrs, M_IFLIB);
5027300113Sscottl	free(paddrs, M_IFLIB);
5028300113Sscottl
5029300113Sscottl	/* RXQs */
5030300113Sscottl	vaddrs = malloc(sizeof(caddr_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK);
5031300113Sscottl	paddrs = malloc(sizeof(uint64_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK);
5032300113Sscottl	for (i = 0; i < nrxqsets; i++) {
5033300113Sscottl		iflib_dma_info_t di = ctx->ifc_rxqs[i].ifr_ifdi;
5034300113Sscottl
5035300113Sscottl		for (j = 0; j < nrxqs; j++, di++) {
5036300113Sscottl			vaddrs[i*nrxqs + j] = di->idi_vaddr;
5037300113Sscottl			paddrs[i*nrxqs + j] = di->idi_paddr;
5038300113Sscottl		}
5039300113Sscottl	}
5040300113Sscottl	if ((err = IFDI_RX_QUEUES_ALLOC(ctx, vaddrs, paddrs, nrxqs, nrxqsets)) != 0) {
5041300113Sscottl		device_printf(ctx->ifc_dev, "device queue allocation failed\n");
5042300113Sscottl		iflib_tx_structures_free(ctx);
5043300113Sscottl		free(vaddrs, M_IFLIB);
5044300113Sscottl		free(paddrs, M_IFLIB);
5045300113Sscottl		goto err_rx_desc;
5046300113Sscottl	}
5047300113Sscottl	free(vaddrs, M_IFLIB);
5048300113Sscottl	free(paddrs, M_IFLIB);
5049300113Sscottl
5050300113Sscottl	return (0);
5051300113Sscottl
5052300113Sscottl/* XXX handle allocation failure changes */
5053300113Sscottlerr_rx_desc:
5054300113Sscottlerr_tx_desc:
5055333341Sshurdrx_fail:
5056300113Sscottl	if (ctx->ifc_rxqs != NULL)
5057300113Sscottl		free(ctx->ifc_rxqs, M_IFLIB);
5058300113Sscottl	ctx->ifc_rxqs = NULL;
5059300113Sscottl	if (ctx->ifc_txqs != NULL)
5060300113Sscottl		free(ctx->ifc_txqs, M_IFLIB);
5061300113Sscottl	ctx->ifc_txqs = NULL;
5062300113Sscottlfail:
5063300113Sscottl	return (err);
5064300113Sscottl}
5065300113Sscottl
5066300113Sscottlstatic int
5067300113Sscottliflib_tx_structures_setup(if_ctx_t ctx)
5068300113Sscottl{
5069300113Sscottl	iflib_txq_t txq = ctx->ifc_txqs;
5070300113Sscottl	int i;
5071300113Sscottl
5072300113Sscottl	for (i = 0; i < NTXQSETS(ctx); i++, txq++)
5073300113Sscottl		iflib_txq_setup(txq);
5074300113Sscottl
5075300113Sscottl	return (0);
5076300113Sscottl}
5077300113Sscottl
5078300113Sscottlstatic void
5079300113Sscottliflib_tx_structures_free(if_ctx_t ctx)
5080300113Sscottl{
5081300113Sscottl	iflib_txq_t txq = ctx->ifc_txqs;
5082300113Sscottl	int i, j;
5083300113Sscottl
5084300113Sscottl	for (i = 0; i < NTXQSETS(ctx); i++, txq++) {
5085300113Sscottl		for (j = 0; j < ctx->ifc_nhwtxqs; j++)
5086300113Sscottl			iflib_dma_free(&txq->ift_ifdi[j]);
5087361058Serj		iflib_txq_destroy(txq);
5088300113Sscottl	}
5089300113Sscottl	free(ctx->ifc_txqs, M_IFLIB);
5090300113Sscottl	ctx->ifc_txqs = NULL;
5091300113Sscottl	IFDI_QUEUES_FREE(ctx);
5092300113Sscottl}
5093300113Sscottl
5094300113Sscottl/*********************************************************************
5095300113Sscottl *
5096300113Sscottl *  Initialize all receive rings.
5097300113Sscottl *
5098300113Sscottl **********************************************************************/
5099300113Sscottlstatic int
5100300113Sscottliflib_rx_structures_setup(if_ctx_t ctx)
5101300113Sscottl{
5102300113Sscottl	iflib_rxq_t rxq = ctx->ifc_rxqs;
5103300147Sbz	int q;
5104300147Sbz#if defined(INET6) || defined(INET)
5105300147Sbz	int i, err;
5106300147Sbz#endif
5107300113Sscottl
5108300113Sscottl	for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) {
5109300147Sbz#if defined(INET6) || defined(INET)
5110300113Sscottl		tcp_lro_free(&rxq->ifr_lc);
5111304704Sshurd		if ((err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp,
5112304704Sshurd		    TCP_LRO_ENTRIES, min(1024,
5113304704Sshurd		    ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset]))) != 0) {
5114300113Sscottl			device_printf(ctx->ifc_dev, "LRO Initialization failed!\n");
5115300113Sscottl			goto fail;
5116300113Sscottl		}
5117300113Sscottl		rxq->ifr_lro_enabled = TRUE;
5118300147Sbz#endif
5119300113Sscottl		IFDI_RXQ_SETUP(ctx, rxq->ifr_id);
5120300113Sscottl	}
5121300113Sscottl	return (0);
5122300147Sbz#if defined(INET6) || defined(INET)
5123300113Sscottlfail:
5124300113Sscottl	/*
5125300113Sscottl	 * Free RX software descriptors allocated so far, we will only handle
5126300113Sscottl	 * the rings that completed, the failing case will have
5127300113Sscottl	 * cleaned up for itself. 'q' failed, so its the terminus.
5128300113Sscottl	 */
5129300113Sscottl	rxq = ctx->ifc_rxqs;
5130300113Sscottl	for (i = 0; i < q; ++i, rxq++) {
5131300113Sscottl		iflib_rx_sds_free(rxq);
5132300113Sscottl		rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0;
5133300113Sscottl	}
5134300113Sscottl	return (err);
5135300147Sbz#endif
5136300113Sscottl}
5137300113Sscottl
5138300113Sscottl/*********************************************************************
5139300113Sscottl *
5140300113Sscottl *  Free all receive rings.
5141300113Sscottl *
5142300113Sscottl **********************************************************************/
5143300113Sscottlstatic void
5144300113Sscottliflib_rx_structures_free(if_ctx_t ctx)
5145300113Sscottl{
5146300113Sscottl	iflib_rxq_t rxq = ctx->ifc_rxqs;
5147361063Serj	if_shared_ctx_t sctx = ctx->ifc_sctx;
5148361063Serj	int i, j;
5149300113Sscottl
5150361063Serj	for (i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) {
5151361063Serj		for (j = 0; j < sctx->isc_nrxqs; j++)
5152361063Serj			iflib_dma_free(&rxq->ifr_ifdi[j]);
5153300113Sscottl		iflib_rx_sds_free(rxq);
5154300113Sscottl	}
5155347197Serj	free(ctx->ifc_rxqs, M_IFLIB);
5156347197Serj	ctx->ifc_rxqs = NULL;
5157300113Sscottl}
5158300113Sscottl
5159300113Sscottlstatic int
5160300113Sscottliflib_qset_structures_setup(if_ctx_t ctx)
5161300113Sscottl{
5162300113Sscottl	int err;
5163300113Sscottl
5164333503Sshurd	/*
5165333503Sshurd	 * It is expected that the caller takes care of freeing queues if this
5166333503Sshurd	 * fails.
5167333503Sshurd	 */
5168333503Sshurd	if ((err = iflib_tx_structures_setup(ctx)) != 0) {
5169333503Sshurd		device_printf(ctx->ifc_dev, "iflib_tx_structures_setup failed: %d\n", err);
5170300113Sscottl		return (err);
5171333503Sshurd	}
5172300113Sscottl
5173333503Sshurd	if ((err = iflib_rx_structures_setup(ctx)) != 0)
5174300113Sscottl		device_printf(ctx->ifc_dev, "iflib_rx_structures_setup failed: %d\n", err);
5175333503Sshurd
5176300113Sscottl	return (err);
5177300113Sscottl}
5178300113Sscottl
5179300113Sscottlint
5180300113Sscottliflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid,
5181300113Sscottl				driver_filter_t filter, void *filter_arg, driver_intr_t handler, void *arg, char *name)
5182300113Sscottl{
5183300113Sscottl
5184300113Sscottl	return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name));
5185300113Sscottl}
5186300113Sscottl
5187333338Sshurd#ifdef SMP
5188333338Sshurdstatic int
5189333338Sshurdfind_nth(if_ctx_t ctx, int qid)
5190300113Sscottl{
5191333338Sshurd	cpuset_t cpus;
5192333338Sshurd	int i, cpuid, eqid, count;
5193300113Sscottl
5194333338Sshurd	CPU_COPY(&ctx->ifc_cpus, &cpus);
5195333338Sshurd	count = CPU_COUNT(&cpus);
5196333338Sshurd	eqid = qid % count;
5197300113Sscottl	/* clear up to the qid'th bit */
5198333338Sshurd	for (i = 0; i < eqid; i++) {
5199333338Sshurd		cpuid = CPU_FFS(&cpus);
5200333338Sshurd		MPASS(cpuid != 0);
5201333338Sshurd		CPU_CLR(cpuid-1, &cpus);
5202300113Sscottl	}
5203333338Sshurd	cpuid = CPU_FFS(&cpus);
5204333338Sshurd	MPASS(cpuid != 0);
5205333338Sshurd	return (cpuid-1);
5206300113Sscottl}
5207300113Sscottl
5208333338Sshurd#ifdef SCHED_ULE
5209333338Sshurdextern struct cpu_group *cpu_top;              /* CPU topology */
5210333338Sshurd
5211333338Sshurdstatic int
5212333338Sshurdfind_child_with_core(int cpu, struct cpu_group *grp)
5213333338Sshurd{
5214333338Sshurd	int i;
5215333338Sshurd
5216333338Sshurd	if (grp->cg_children == 0)
5217333338Sshurd		return -1;
5218333338Sshurd
5219333338Sshurd	MPASS(grp->cg_child);
5220333338Sshurd	for (i = 0; i < grp->cg_children; i++) {
5221333338Sshurd		if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask))
5222333338Sshurd			return i;
5223333338Sshurd	}
5224333338Sshurd
5225333338Sshurd	return -1;
5226333338Sshurd}
5227333338Sshurd
5228333338Sshurd/*
5229333338Sshurd * Find the nth "close" core to the specified core
5230333338Sshurd * "close" is defined as the deepest level that shares
5231333338Sshurd * at least an L2 cache.  With threads, this will be
5232333338Sshurd * threads on the same core.  If the sahred cache is L3
5233333338Sshurd * or higher, simply returns the same core.
5234333338Sshurd */
5235333338Sshurdstatic int
5236333338Sshurdfind_close_core(int cpu, int core_offset)
5237333338Sshurd{
5238333338Sshurd	struct cpu_group *grp;
5239333338Sshurd	int i;
5240333338Sshurd	int fcpu;
5241333338Sshurd	cpuset_t cs;
5242333338Sshurd
5243333338Sshurd	grp = cpu_top;
5244333338Sshurd	if (grp == NULL)
5245333338Sshurd		return cpu;
5246333338Sshurd	i = 0;
5247333338Sshurd	while ((i = find_child_with_core(cpu, grp)) != -1) {
5248333338Sshurd		/* If the child only has one cpu, don't descend */
5249333338Sshurd		if (grp->cg_child[i].cg_count <= 1)
5250333338Sshurd			break;
5251333338Sshurd		grp = &grp->cg_child[i];
5252333338Sshurd	}
5253333338Sshurd
5254333338Sshurd	/* If they don't share at least an L2 cache, use the same CPU */
5255333338Sshurd	if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE)
5256333338Sshurd		return cpu;
5257333338Sshurd
5258333338Sshurd	/* Now pick one */
5259333338Sshurd	CPU_COPY(&grp->cg_mask, &cs);
5260333338Sshurd
5261333338Sshurd	/* Add the selected CPU offset to core offset. */
5262333338Sshurd	for (i = 0; (fcpu = CPU_FFS(&cs)) != 0; i++) {
5263333338Sshurd		if (fcpu - 1 == cpu)
5264333338Sshurd			break;
5265333338Sshurd		CPU_CLR(fcpu - 1, &cs);
5266333338Sshurd	}
5267333338Sshurd	MPASS(fcpu);
5268333338Sshurd
5269333338Sshurd	core_offset += i;
5270333338Sshurd
5271333338Sshurd	CPU_COPY(&grp->cg_mask, &cs);
5272333338Sshurd	for (i = core_offset % grp->cg_count; i > 0; i--) {
5273333338Sshurd		MPASS(CPU_FFS(&cs));
5274333338Sshurd		CPU_CLR(CPU_FFS(&cs) - 1, &cs);
5275333338Sshurd	}
5276333338Sshurd	MPASS(CPU_FFS(&cs));
5277333338Sshurd	return CPU_FFS(&cs) - 1;
5278333338Sshurd}
5279333338Sshurd#else
5280333338Sshurdstatic int
5281333338Sshurdfind_close_core(int cpu, int core_offset __unused)
5282333338Sshurd{
5283333338Sshurd	return cpu;
5284333338Sshurd}
5285333338Sshurd#endif
5286333338Sshurd
5287333338Sshurdstatic int
5288333338Sshurdget_core_offset(if_ctx_t ctx, iflib_intr_type_t type, int qid)
5289333338Sshurd{
5290333338Sshurd	switch (type) {
5291333338Sshurd	case IFLIB_INTR_TX:
5292333338Sshurd		/* TX queues get cores which share at least an L2 cache with the corresponding RX queue */
5293333338Sshurd		/* XXX handle multiple RX threads per core and more than two core per L2 group */
5294333338Sshurd		return qid / CPU_COUNT(&ctx->ifc_cpus) + 1;
5295333338Sshurd	case IFLIB_INTR_RX:
5296333338Sshurd	case IFLIB_INTR_RXTX:
5297333338Sshurd		/* RX queues get the specified core */
5298333338Sshurd		return qid / CPU_COUNT(&ctx->ifc_cpus);
5299333338Sshurd	default:
5300333338Sshurd		return -1;
5301333338Sshurd	}
5302333338Sshurd}
5303333338Sshurd#else
5304333338Sshurd#define get_core_offset(ctx, type, qid)	CPU_FIRST()
5305333338Sshurd#define find_close_core(cpuid, tid)	CPU_FIRST()
5306333338Sshurd#define find_nth(ctx, gid)		CPU_FIRST()
5307333338Sshurd#endif
5308333338Sshurd
5309333338Sshurd/* Just to avoid copy/paste */
5310333338Sshurdstatic inline int
5311333338Sshurdiflib_irq_set_affinity(if_ctx_t ctx, int irq, iflib_intr_type_t type, int qid,
5312333338Sshurd    struct grouptask *gtask, struct taskqgroup *tqg, void *uniq, char *name)
5313333338Sshurd{
5314333338Sshurd	int cpuid;
5315333338Sshurd	int err, tid;
5316333338Sshurd
5317333338Sshurd	cpuid = find_nth(ctx, qid);
5318333338Sshurd	tid = get_core_offset(ctx, type, qid);
5319333338Sshurd	MPASS(tid >= 0);
5320333338Sshurd	cpuid = find_close_core(cpuid, tid);
5321333338Sshurd	err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, irq, name);
5322333338Sshurd	if (err) {
5323333338Sshurd		device_printf(ctx->ifc_dev, "taskqgroup_attach_cpu failed %d\n", err);
5324333338Sshurd		return (err);
5325333338Sshurd	}
5326333338Sshurd#ifdef notyet
5327333338Sshurd	if (cpuid > ctx->ifc_cpuid_highest)
5328333338Sshurd		ctx->ifc_cpuid_highest = cpuid;
5329333338Sshurd#endif
5330333338Sshurd	return 0;
5331333338Sshurd}
5332333338Sshurd
5333300113Sscottlint
5334300113Sscottliflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
5335300113Sscottl						iflib_intr_type_t type, driver_filter_t *filter,
5336300113Sscottl						void *filter_arg, int qid, char *name)
5337300113Sscottl{
5338300113Sscottl	struct grouptask *gtask;
5339300113Sscottl	struct taskqgroup *tqg;
5340300113Sscottl	iflib_filter_info_t info;
5341304704Sshurd	gtask_fn_t *fn;
5342300113Sscottl	int tqrid, err;
5343333338Sshurd	driver_filter_t *intr_fast;
5344300113Sscottl	void *q;
5345300113Sscottl
5346300113Sscottl	info = &ctx->ifc_filter_info;
5347333338Sshurd	tqrid = rid;
5348300113Sscottl
5349300113Sscottl	switch (type) {
5350300113Sscottl	/* XXX merge tx/rx for netmap? */
5351300113Sscottl	case IFLIB_INTR_TX:
5352300113Sscottl		q = &ctx->ifc_txqs[qid];
5353300113Sscottl		info = &ctx->ifc_txqs[qid].ift_filter_info;
5354300113Sscottl		gtask = &ctx->ifc_txqs[qid].ift_task;
5355333338Sshurd		tqg = qgroup_if_io_tqg;
5356300113Sscottl		fn = _task_fn_tx;
5357333338Sshurd		intr_fast = iflib_fast_intr;
5358333338Sshurd		GROUPTASK_INIT(gtask, 0, fn, q);
5359300113Sscottl		break;
5360300113Sscottl	case IFLIB_INTR_RX:
5361300113Sscottl		q = &ctx->ifc_rxqs[qid];
5362300113Sscottl		info = &ctx->ifc_rxqs[qid].ifr_filter_info;
5363300113Sscottl		gtask = &ctx->ifc_rxqs[qid].ifr_task;
5364333338Sshurd		tqg = qgroup_if_io_tqg;
5365300113Sscottl		fn = _task_fn_rx;
5366333338Sshurd		intr_fast = iflib_fast_intr;
5367333338Sshurd		GROUPTASK_INIT(gtask, 0, fn, q);
5368300113Sscottl		break;
5369333338Sshurd	case IFLIB_INTR_RXTX:
5370333338Sshurd		q = &ctx->ifc_rxqs[qid];
5371333338Sshurd		info = &ctx->ifc_rxqs[qid].ifr_filter_info;
5372333338Sshurd		gtask = &ctx->ifc_rxqs[qid].ifr_task;
5373333338Sshurd		tqg = qgroup_if_io_tqg;
5374333338Sshurd		fn = _task_fn_rx;
5375333338Sshurd		intr_fast = iflib_fast_intr_rxtx;
5376333338Sshurd		GROUPTASK_INIT(gtask, 0, fn, q);
5377333338Sshurd		break;
5378300113Sscottl	case IFLIB_INTR_ADMIN:
5379300113Sscottl		q = ctx;
5380333338Sshurd		tqrid = -1;
5381300113Sscottl		info = &ctx->ifc_filter_info;
5382300113Sscottl		gtask = &ctx->ifc_admin_task;
5383300113Sscottl		tqg = qgroup_if_config_tqg;
5384300113Sscottl		fn = _task_fn_admin;
5385333338Sshurd		intr_fast = iflib_fast_intr_ctx;
5386300113Sscottl		break;
5387300113Sscottl	default:
5388300113Sscottl		panic("unknown net intr type");
5389300113Sscottl	}
5390300113Sscottl
5391300113Sscottl	info->ifi_filter = filter;
5392300113Sscottl	info->ifi_filter_arg = filter_arg;
5393300113Sscottl	info->ifi_task = gtask;
5394333338Sshurd	info->ifi_ctx = q;
5395300113Sscottl
5396333338Sshurd	err = _iflib_irq_alloc(ctx, irq, rid, intr_fast, NULL, info,  name);
5397333338Sshurd	if (err != 0) {
5398333338Sshurd		device_printf(ctx->ifc_dev, "_iflib_irq_alloc failed %d\n", err);
5399333338Sshurd		return (err);
5400333338Sshurd	}
5401333338Sshurd	if (type == IFLIB_INTR_ADMIN)
5402333338Sshurd		return (0);
5403300113Sscottl
5404300113Sscottl	if (tqrid != -1) {
5405333338Sshurd		err = iflib_irq_set_affinity(ctx, rman_get_start(irq->ii_res), type, qid, gtask, tqg, q, name);
5406333338Sshurd		if (err)
5407333338Sshurd			return (err);
5408333338Sshurd	} else {
5409333338Sshurd		taskqgroup_attach(tqg, gtask, q, rman_get_start(irq->ii_res), name);
5410333338Sshurd	}
5411300113Sscottl
5412300113Sscottl	return (0);
5413300113Sscottl}
5414300113Sscottl
5415300113Sscottlvoid
5416347197Serjiflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void *arg, int qid, char *name)
5417300113Sscottl{
5418300113Sscottl	struct grouptask *gtask;
5419300113Sscottl	struct taskqgroup *tqg;
5420304704Sshurd	gtask_fn_t *fn;
5421300113Sscottl	void *q;
5422333338Sshurd	int irq_num = -1;
5423333338Sshurd	int err;
5424300113Sscottl
5425300113Sscottl	switch (type) {
5426300113Sscottl	case IFLIB_INTR_TX:
5427300113Sscottl		q = &ctx->ifc_txqs[qid];
5428300113Sscottl		gtask = &ctx->ifc_txqs[qid].ift_task;
5429333338Sshurd		tqg = qgroup_if_io_tqg;
5430300113Sscottl		fn = _task_fn_tx;
5431333338Sshurd		if (irq != NULL)
5432333338Sshurd			irq_num = rman_get_start(irq->ii_res);
5433300113Sscottl		break;
5434300113Sscottl	case IFLIB_INTR_RX:
5435300113Sscottl		q = &ctx->ifc_rxqs[qid];
5436300113Sscottl		gtask = &ctx->ifc_rxqs[qid].ifr_task;
5437333338Sshurd		tqg = qgroup_if_io_tqg;
5438300113Sscottl		fn = _task_fn_rx;
5439333338Sshurd		if (irq != NULL)
5440333338Sshurd			irq_num = rman_get_start(irq->ii_res);
5441300113Sscottl		break;
5442300113Sscottl	case IFLIB_INTR_IOV:
5443300113Sscottl		q = ctx;
5444300113Sscottl		gtask = &ctx->ifc_vflr_task;
5445300113Sscottl		tqg = qgroup_if_config_tqg;
5446300113Sscottl		fn = _task_fn_iov;
5447300113Sscottl		break;
5448300113Sscottl	default:
5449300113Sscottl		panic("unknown net intr type");
5450300113Sscottl	}
5451300113Sscottl	GROUPTASK_INIT(gtask, 0, fn, q);
5452333338Sshurd	if (irq_num != -1) {
5453333338Sshurd		err = iflib_irq_set_affinity(ctx, irq_num, type, qid, gtask, tqg, q, name);
5454333338Sshurd		if (err)
5455333338Sshurd			taskqgroup_attach(tqg, gtask, q, irq_num, name);
5456333338Sshurd	}
5457333338Sshurd	else {
5458333338Sshurd		taskqgroup_attach(tqg, gtask, q, irq_num, name);
5459333338Sshurd	}
5460300113Sscottl}
5461300113Sscottl
5462300113Sscottlvoid
5463300113Sscottliflib_irq_free(if_ctx_t ctx, if_irq_t irq)
5464300113Sscottl{
5465300113Sscottl	if (irq->ii_tag)
5466300113Sscottl		bus_teardown_intr(ctx->ifc_dev, irq->ii_res, irq->ii_tag);
5467300113Sscottl
5468300113Sscottl	if (irq->ii_res)
5469300113Sscottl		bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ, irq->ii_rid, irq->ii_res);
5470300113Sscottl}
5471300113Sscottl
5472300113Sscottlstatic int
5473300113Sscottliflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *rid, char *name)
5474300113Sscottl{
5475300113Sscottl	iflib_txq_t txq = ctx->ifc_txqs;
5476300113Sscottl	iflib_rxq_t rxq = ctx->ifc_rxqs;
5477300113Sscottl	if_irq_t irq = &ctx->ifc_legacy_irq;
5478300113Sscottl	iflib_filter_info_t info;
5479300113Sscottl	struct grouptask *gtask;
5480300113Sscottl	struct taskqgroup *tqg;
5481304704Sshurd	gtask_fn_t *fn;
5482300113Sscottl	int tqrid;
5483300113Sscottl	void *q;
5484300113Sscottl	int err;
5485300113Sscottl
5486300113Sscottl	q = &ctx->ifc_rxqs[0];
5487300113Sscottl	info = &rxq[0].ifr_filter_info;
5488300113Sscottl	gtask = &rxq[0].ifr_task;
5489333338Sshurd	tqg = qgroup_if_io_tqg;
5490300113Sscottl	tqrid = irq->ii_rid = *rid;
5491300113Sscottl	fn = _task_fn_rx;
5492300113Sscottl
5493300113Sscottl	ctx->ifc_flags |= IFC_LEGACY;
5494300113Sscottl	info->ifi_filter = filter;
5495300113Sscottl	info->ifi_filter_arg = filter_arg;
5496300113Sscottl	info->ifi_task = gtask;
5497300113Sscottl
5498300113Sscottl	/* We allocate a single interrupt resource */
5499333338Sshurd	if ((err = _iflib_irq_alloc(ctx, irq, tqrid, iflib_fast_intr_ctx, NULL, info, name)) != 0)
5500300113Sscottl		return (err);
5501300113Sscottl	GROUPTASK_INIT(gtask, 0, fn, q);
5502333338Sshurd	taskqgroup_attach(tqg, gtask, q, rman_get_start(irq->ii_res), name);
5503300113Sscottl
5504300113Sscottl	GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq);
5505333338Sshurd	taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, rman_get_start(irq->ii_res), "tx");
5506300113Sscottl	return (0);
5507300113Sscottl}
5508300113Sscottl
5509300113Sscottlvoid
5510300113Sscottliflib_led_create(if_ctx_t ctx)
5511300113Sscottl{
5512300113Sscottl
5513300113Sscottl	ctx->ifc_led_dev = led_create(iflib_led_func, ctx,
5514333338Sshurd	    device_get_nameunit(ctx->ifc_dev));
5515300113Sscottl}
5516300113Sscottl
5517300113Sscottlvoid
5518300113Sscottliflib_tx_intr_deferred(if_ctx_t ctx, int txqid)
5519300113Sscottl{
5520300113Sscottl
5521300113Sscottl	GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task);
5522300113Sscottl}
5523300113Sscottl
5524300113Sscottlvoid
5525300113Sscottliflib_rx_intr_deferred(if_ctx_t ctx, int rxqid)
5526300113Sscottl{
5527300113Sscottl
5528300113Sscottl	GROUPTASK_ENQUEUE(&ctx->ifc_rxqs[rxqid].ifr_task);
5529300113Sscottl}
5530300113Sscottl
5531300113Sscottlvoid
5532300113Sscottliflib_admin_intr_deferred(if_ctx_t ctx)
5533300113Sscottl{
5534333338Sshurd#ifdef INVARIANTS
5535333338Sshurd	struct grouptask *gtask;
5536300113Sscottl
5537333338Sshurd	gtask = &ctx->ifc_admin_task;
5538333338Sshurd	MPASS(gtask != NULL && gtask->gt_taskqueue != NULL);
5539333338Sshurd#endif
5540333338Sshurd
5541300113Sscottl	GROUPTASK_ENQUEUE(&ctx->ifc_admin_task);
5542300113Sscottl}
5543300113Sscottl
5544300113Sscottlvoid
5545300113Sscottliflib_iov_intr_deferred(if_ctx_t ctx)
5546300113Sscottl{
5547300113Sscottl
5548300113Sscottl	GROUPTASK_ENQUEUE(&ctx->ifc_vflr_task);
5549300113Sscottl}
5550300113Sscottl
5551300113Sscottlvoid
5552300113Sscottliflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name)
5553300113Sscottl{
5554300113Sscottl
5555333338Sshurd	taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, -1, name);
5556300113Sscottl}
5557300113Sscottl
5558300113Sscottlvoid
5559304704Sshurdiflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, gtask_fn_t *fn,
5560300113Sscottl	char *name)
5561300113Sscottl{
5562300113Sscottl
5563300113Sscottl	GROUPTASK_INIT(gtask, 0, fn, ctx);
5564300113Sscottl	taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, -1, name);
5565300113Sscottl}
5566300113Sscottl
5567300113Sscottlvoid
5568304704Sshurdiflib_config_gtask_deinit(struct grouptask *gtask)
5569300113Sscottl{
5570304704Sshurd
5571304704Sshurd	taskqgroup_detach(qgroup_if_config_tqg, gtask);
5572304704Sshurd}
5573304704Sshurd
5574304704Sshurdvoid
5575304704Sshurdiflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate)
5576304704Sshurd{
5577300113Sscottl	if_t ifp = ctx->ifc_ifp;
5578300113Sscottl	iflib_txq_t txq = ctx->ifc_txqs;
5579300113Sscottl
5580300113Sscottl	if_setbaudrate(ifp, baudrate);
5581347197Serj	if (baudrate >= IF_Gbps(10)) {
5582347197Serj		STATE_LOCK(ctx);
5583333338Sshurd		ctx->ifc_flags |= IFC_PREFETCH;
5584347197Serj		STATE_UNLOCK(ctx);
5585347197Serj	}
5586300113Sscottl	/* If link down, disable watchdog */
5587300113Sscottl	if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) {
5588300113Sscottl		for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++)
5589300113Sscottl			txq->ift_qstatus = IFLIB_QUEUE_IDLE;
5590300113Sscottl	}
5591300113Sscottl	ctx->ifc_link_state = link_state;
5592300113Sscottl	if_link_state_change(ifp, link_state);
5593300113Sscottl}
5594300113Sscottl
5595300113Sscottlstatic int
5596300113Sscottliflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq)
5597300113Sscottl{
5598300113Sscottl	int credits;
5599333338Sshurd#ifdef INVARIANTS
5600333338Sshurd	int credits_pre = txq->ift_cidx_processed;
5601333338Sshurd#endif
5602300113Sscottl
5603300113Sscottl	if (ctx->isc_txd_credits_update == NULL)
5604300113Sscottl		return (0);
5605300113Sscottl
5606333338Sshurd	if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, true)) == 0)
5607300113Sscottl		return (0);
5608300113Sscottl
5609300113Sscottl	txq->ift_processed += credits;
5610300113Sscottl	txq->ift_cidx_processed += credits;
5611300113Sscottl
5612333338Sshurd	MPASS(credits_pre + credits == txq->ift_cidx_processed);
5613300113Sscottl	if (txq->ift_cidx_processed >= txq->ift_size)
5614300113Sscottl		txq->ift_cidx_processed -= txq->ift_size;
5615300113Sscottl	return (credits);
5616300113Sscottl}
5617300113Sscottl
5618300113Sscottlstatic int
5619333338Sshurdiflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget)
5620300113Sscottl{
5621300113Sscottl
5622304704Sshurd	return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx,
5623304704Sshurd	    budget));
5624300113Sscottl}
5625300113Sscottl
5626300113Sscottlvoid
5627300113Sscottliflib_add_int_delay_sysctl(if_ctx_t ctx, const char *name,
5628300113Sscottl	const char *description, if_int_delay_info_t info,
5629300113Sscottl	int offset, int value)
5630300113Sscottl{
5631300113Sscottl	info->iidi_ctx = ctx;
5632300113Sscottl	info->iidi_offset = offset;
5633300113Sscottl	info->iidi_value = value;
5634300113Sscottl	SYSCTL_ADD_PROC(device_get_sysctl_ctx(ctx->ifc_dev),
5635300113Sscottl	    SYSCTL_CHILDREN(device_get_sysctl_tree(ctx->ifc_dev)),
5636300113Sscottl	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5637300113Sscottl	    info, 0, iflib_sysctl_int_delay, "I", description);
5638300113Sscottl}
5639300113Sscottl
5640300113Sscottlstruct mtx *
5641300113Sscottliflib_ctx_lock_get(if_ctx_t ctx)
5642300113Sscottl{
5643300113Sscottl
5644347197Serj	return (&ctx->ifc_ctx_mtx);
5645300113Sscottl}
5646300113Sscottl
5647300113Sscottlstatic int
5648300113Sscottliflib_msix_init(if_ctx_t ctx)
5649300113Sscottl{
5650300113Sscottl	device_t dev = ctx->ifc_dev;
5651300113Sscottl	if_shared_ctx_t sctx = ctx->ifc_sctx;
5652300113Sscottl	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
5653300113Sscottl	int vectors, queues, rx_queues, tx_queues, queuemsgs, msgs;
5654300113Sscottl	int iflib_num_tx_queues, iflib_num_rx_queues;
5655300113Sscottl	int err, admincnt, bar;
5656300113Sscottl
5657333338Sshurd	iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs;
5658333338Sshurd	iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs;
5659304704Sshurd
5660333338Sshurd	device_printf(dev, "msix_init qsets capped at %d\n", imax(scctx->isc_ntxqsets, scctx->isc_nrxqsets));
5661333338Sshurd
5662300113Sscottl	bar = ctx->ifc_softc_ctx.isc_msix_bar;
5663300113Sscottl	admincnt = sctx->isc_admin_intrcnt;
5664333338Sshurd	/* Override by global tuneable */
5665333338Sshurd	{
5666333338Sshurd		int i;
5667333338Sshurd		size_t len = sizeof(i);
5668333338Sshurd		err = kernel_sysctlbyname(curthread, "hw.pci.enable_msix", &i, &len, NULL, 0, NULL, 0);
5669333338Sshurd		if (err == 0) {
5670333338Sshurd			if (i == 0)
5671333338Sshurd				goto msi;
5672333338Sshurd		}
5673333338Sshurd		else {
5674333338Sshurd			device_printf(dev, "unable to read hw.pci.enable_msix.");
5675333338Sshurd		}
5676333338Sshurd	}
5677300113Sscottl	/* Override by tuneable */
5678333338Sshurd	if (scctx->isc_disable_msix)
5679300113Sscottl		goto msi;
5680300113Sscottl
5681300113Sscottl	/*
5682300113Sscottl	** When used in a virtualized environment
5683300113Sscottl	** PCI BUSMASTER capability may not be set
5684300113Sscottl	** so explicity set it here and rewrite
5685300113Sscottl	** the ENABLE in the MSIX control register
5686300113Sscottl	** at this point to cause the host to
5687300113Sscottl	** successfully initialize us.
5688300113Sscottl	*/
5689300113Sscottl	{
5690300113Sscottl		int msix_ctrl, rid;
5691300113Sscottl
5692333338Sshurd 		pci_enable_busmaster(dev);
5693300113Sscottl		rid = 0;
5694333338Sshurd		if (pci_find_cap(dev, PCIY_MSIX, &rid) == 0 && rid != 0) {
5695333338Sshurd			rid += PCIR_MSIX_CTRL;
5696333338Sshurd			msix_ctrl = pci_read_config(dev, rid, 2);
5697333338Sshurd			msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
5698333338Sshurd			pci_write_config(dev, rid, msix_ctrl, 2);
5699333338Sshurd		} else {
5700333338Sshurd			device_printf(dev, "PCIY_MSIX capability not found; "
5701333338Sshurd			                   "or rid %d == 0.\n", rid);
5702333338Sshurd			goto msi;
5703333338Sshurd		}
5704300113Sscottl	}
5705300113Sscottl
5706300113Sscottl	/*
5707300113Sscottl	 * bar == -1 => "trust me I know what I'm doing"
5708300113Sscottl	 * Some drivers are for hardware that is so shoddily
5709300113Sscottl	 * documented that no one knows which bars are which
5710300113Sscottl	 * so the developer has to map all bars. This hack
5711300113Sscottl	 * allows shoddy garbage to use msix in this framework.
5712300113Sscottl	 */
5713300113Sscottl	if (bar != -1) {
5714300113Sscottl		ctx->ifc_msix_mem = bus_alloc_resource_any(dev,
5715300113Sscottl	            SYS_RES_MEMORY, &bar, RF_ACTIVE);
5716300113Sscottl		if (ctx->ifc_msix_mem == NULL) {
5717300113Sscottl			/* May not be enabled */
5718300113Sscottl			device_printf(dev, "Unable to map MSIX table \n");
5719300113Sscottl			goto msi;
5720300113Sscottl		}
5721300113Sscottl	}
5722300113Sscottl	/* First try MSI/X */
5723300113Sscottl	if ((msgs = pci_msix_count(dev)) == 0) { /* system has msix disabled */
5724300113Sscottl		device_printf(dev, "System has MSIX disabled \n");
5725300113Sscottl		bus_release_resource(dev, SYS_RES_MEMORY,
5726300113Sscottl		    bar, ctx->ifc_msix_mem);
5727300113Sscottl		ctx->ifc_msix_mem = NULL;
5728300113Sscottl		goto msi;
5729300113Sscottl	}
5730300113Sscottl#if IFLIB_DEBUG
5731300113Sscottl	/* use only 1 qset in debug mode */
5732300113Sscottl	queuemsgs = min(msgs - admincnt, 1);
5733300113Sscottl#else
5734300113Sscottl	queuemsgs = msgs - admincnt;
5735300113Sscottl#endif
5736300113Sscottl#ifdef RSS
5737333338Sshurd	queues = imin(queuemsgs, rss_getnumbuckets());
5738300113Sscottl#else
5739333338Sshurd	queues = queuemsgs;
5740300113Sscottl#endif
5741333338Sshurd	queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues);
5742333338Sshurd	device_printf(dev, "pxm cpus: %d queue msgs: %d admincnt: %d\n",
5743333338Sshurd				  CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt);
5744300113Sscottl#ifdef  RSS
5745300113Sscottl	/* If we're doing RSS, clamp at the number of RSS buckets */
5746300113Sscottl	if (queues > rss_getnumbuckets())
5747300113Sscottl		queues = rss_getnumbuckets();
5748300113Sscottl#endif
5749304704Sshurd	if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt)
5750304704Sshurd		rx_queues = iflib_num_rx_queues;
5751300113Sscottl	else
5752300113Sscottl		rx_queues = queues;
5753333338Sshurd
5754333338Sshurd	if (rx_queues > scctx->isc_nrxqsets)
5755333338Sshurd		rx_queues = scctx->isc_nrxqsets;
5756333338Sshurd
5757304704Sshurd	/*
5758304704Sshurd	 * We want this to be all logical CPUs by default
5759304704Sshurd	 */
5760300113Sscottl	if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues)
5761300113Sscottl		tx_queues = iflib_num_tx_queues;
5762300113Sscottl	else
5763304704Sshurd		tx_queues = mp_ncpus;
5764300113Sscottl
5765333338Sshurd	if (tx_queues > scctx->isc_ntxqsets)
5766333338Sshurd		tx_queues = scctx->isc_ntxqsets;
5767333338Sshurd
5768304704Sshurd	if (ctx->ifc_sysctl_qs_eq_override == 0) {
5769304704Sshurd#ifdef INVARIANTS
5770304704Sshurd		if (tx_queues != rx_queues)
5771347197Serj			device_printf(dev,
5772347197Serj			    "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n",
5773347197Serj			    min(rx_queues, tx_queues), min(rx_queues, tx_queues));
5774304704Sshurd#endif
5775304704Sshurd		tx_queues = min(rx_queues, tx_queues);
5776304704Sshurd		rx_queues = min(rx_queues, tx_queues);
5777304704Sshurd	}
5778304704Sshurd
5779300113Sscottl	device_printf(dev, "using %d rx queues %d tx queues \n", rx_queues, tx_queues);
5780300113Sscottl
5781304704Sshurd	vectors = rx_queues + admincnt;
5782300113Sscottl	if ((err = pci_alloc_msix(dev, &vectors)) == 0) {
5783347197Serj		device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
5784347197Serj		    vectors);
5785300113Sscottl		scctx->isc_vectors = vectors;
5786300113Sscottl		scctx->isc_nrxqsets = rx_queues;
5787300113Sscottl		scctx->isc_ntxqsets = tx_queues;
5788300113Sscottl		scctx->isc_intr = IFLIB_INTR_MSIX;
5789304704Sshurd
5790300113Sscottl		return (vectors);
5791300113Sscottl	} else {
5792347197Serj		device_printf(dev,
5793347197Serj		    "failed to allocate %d msix vectors, err: %d - using MSI\n", vectors, err);
5794300113Sscottl	}
5795300113Sscottlmsi:
5796300113Sscottl	vectors = pci_msi_count(dev);
5797300113Sscottl	scctx->isc_nrxqsets = 1;
5798300113Sscottl	scctx->isc_ntxqsets = 1;
5799300113Sscottl	scctx->isc_vectors = vectors;
5800300113Sscottl	if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) {
5801300113Sscottl		device_printf(dev,"Using an MSI interrupt\n");
5802300113Sscottl		scctx->isc_intr = IFLIB_INTR_MSI;
5803300113Sscottl	} else {
5804300113Sscottl		device_printf(dev,"Using a Legacy interrupt\n");
5805300113Sscottl		scctx->isc_intr = IFLIB_INTR_LEGACY;
5806300113Sscottl	}
5807300113Sscottl
5808300113Sscottl	return (vectors);
5809300113Sscottl}
5810300113Sscottl
5811300113Sscottlchar * ring_states[] = { "IDLE", "BUSY", "STALLED", "ABDICATED" };
5812300113Sscottl
5813300113Sscottlstatic int
5814300113Sscottlmp_ring_state_handler(SYSCTL_HANDLER_ARGS)
5815300113Sscottl{
5816300113Sscottl	int rc;
5817300113Sscottl	uint16_t *state = ((uint16_t *)oidp->oid_arg1);
5818300113Sscottl	struct sbuf *sb;
5819300113Sscottl	char *ring_state = "UNKNOWN";
5820300113Sscottl
5821300113Sscottl	/* XXX needed ? */
5822300113Sscottl	rc = sysctl_wire_old_buffer(req, 0);
5823300113Sscottl	MPASS(rc == 0);
5824300113Sscottl	if (rc != 0)
5825300113Sscottl		return (rc);
5826300113Sscottl	sb = sbuf_new_for_sysctl(NULL, NULL, 80, req);
5827300113Sscottl	MPASS(sb != NULL);
5828300113Sscottl	if (sb == NULL)
5829300113Sscottl		return (ENOMEM);
5830300113Sscottl	if (state[3] <= 3)
5831300113Sscottl		ring_state = ring_states[state[3]];
5832300113Sscottl
5833300113Sscottl	sbuf_printf(sb, "pidx_head: %04hd pidx_tail: %04hd cidx: %04hd state: %s",
5834300113Sscottl		    state[0], state[1], state[2], ring_state);
5835300113Sscottl	rc = sbuf_finish(sb);
5836300113Sscottl	sbuf_delete(sb);
5837300113Sscottl        return(rc);
5838300113Sscottl}
5839300113Sscottl
5840304704Sshurdenum iflib_ndesc_handler {
5841304704Sshurd	IFLIB_NTXD_HANDLER,
5842304704Sshurd	IFLIB_NRXD_HANDLER,
5843304704Sshurd};
5844300113Sscottl
5845304704Sshurdstatic int
5846304704Sshurdmp_ndesc_handler(SYSCTL_HANDLER_ARGS)
5847304704Sshurd{
5848304704Sshurd	if_ctx_t ctx = (void *)arg1;
5849304704Sshurd	enum iflib_ndesc_handler type = arg2;
5850304704Sshurd	char buf[256] = {0};
5851333338Sshurd	qidx_t *ndesc;
5852304704Sshurd	char *p, *next;
5853304704Sshurd	int nqs, rc, i;
5854300113Sscottl
5855304704Sshurd	MPASS(type == IFLIB_NTXD_HANDLER || type == IFLIB_NRXD_HANDLER);
5856304704Sshurd
5857304704Sshurd	nqs = 8;
5858304704Sshurd	switch(type) {
5859304704Sshurd	case IFLIB_NTXD_HANDLER:
5860304704Sshurd		ndesc = ctx->ifc_sysctl_ntxds;
5861304704Sshurd		if (ctx->ifc_sctx)
5862304704Sshurd			nqs = ctx->ifc_sctx->isc_ntxqs;
5863304704Sshurd		break;
5864304704Sshurd	case IFLIB_NRXD_HANDLER:
5865304704Sshurd		ndesc = ctx->ifc_sysctl_nrxds;
5866304704Sshurd		if (ctx->ifc_sctx)
5867304704Sshurd			nqs = ctx->ifc_sctx->isc_nrxqs;
5868304704Sshurd		break;
5869304704Sshurd	}
5870304704Sshurd	if (nqs == 0)
5871304704Sshurd		nqs = 8;
5872304704Sshurd
5873304704Sshurd	for (i=0; i<8; i++) {
5874304704Sshurd		if (i >= nqs)
5875304704Sshurd			break;
5876304704Sshurd		if (i)
5877304704Sshurd			strcat(buf, ",");
5878304704Sshurd		sprintf(strchr(buf, 0), "%d", ndesc[i]);
5879304704Sshurd	}
5880304704Sshurd
5881304704Sshurd	rc = sysctl_handle_string(oidp, buf, sizeof(buf), req);
5882304704Sshurd	if (rc || req->newptr == NULL)
5883304704Sshurd		return rc;
5884304704Sshurd
5885304704Sshurd	for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p;
5886304704Sshurd	    i++, p = strsep(&next, " ,")) {
5887304704Sshurd		ndesc[i] = strtoul(p, NULL, 10);
5888304704Sshurd	}
5889304704Sshurd
5890304704Sshurd	return(rc);
5891304704Sshurd}
5892304704Sshurd
5893300113Sscottl#define NAME_BUFLEN 32
5894300113Sscottlstatic void
5895300113Sscottliflib_add_device_sysctl_pre(if_ctx_t ctx)
5896300113Sscottl{
5897300113Sscottl        device_t dev = iflib_get_dev(ctx);
5898300113Sscottl	struct sysctl_oid_list *child, *oid_list;
5899300113Sscottl	struct sysctl_ctx_list *ctx_list;
5900300113Sscottl	struct sysctl_oid *node;
5901300113Sscottl
5902300113Sscottl	ctx_list = device_get_sysctl_ctx(dev);
5903300113Sscottl	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
5904300113Sscottl	ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "iflib",
5905300113Sscottl						      CTLFLAG_RD, NULL, "IFLIB fields");
5906300113Sscottl	oid_list = SYSCTL_CHILDREN(node);
5907300113Sscottl
5908347213Serj	SYSCTL_ADD_CONST_STRING(ctx_list, oid_list, OID_AUTO, "driver_version",
5909347213Serj		       CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version,
5910304704Sshurd		       "driver version");
5911304704Sshurd
5912300113Sscottl	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs",
5913300113Sscottl		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0,
5914300113Sscottl			"# of txqs to use, 0 => use default #");
5915300113Sscottl	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs",
5916304704Sshurd		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0,
5917304704Sshurd			"# of rxqs to use, 0 => use default #");
5918304704Sshurd	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable",
5919304704Sshurd		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0,
5920304704Sshurd                       "permit #txq != #rxq");
5921333338Sshurd	SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix",
5922333338Sshurd                      CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0,
5923333338Sshurd                      "disable MSIX (default 0)");
5924333338Sshurd	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget",
5925333338Sshurd		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0,
5926333338Sshurd                       "set the rx budget");
5927300113Sscottl
5928304704Sshurd	/* XXX change for per-queue sizes */
5929304704Sshurd	SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds",
5930304704Sshurd		       CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NTXD_HANDLER,
5931304704Sshurd                       mp_ndesc_handler, "A",
5932304704Sshurd                       "list of # of tx descriptors to use, 0 = use default #");
5933304704Sshurd	SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds",
5934304704Sshurd		       CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER,
5935304704Sshurd                       mp_ndesc_handler, "A",
5936304704Sshurd                       "list of # of rx descriptors to use, 0 = use default #");
5937300113Sscottl}
5938300113Sscottl
5939300113Sscottlstatic void
5940300113Sscottliflib_add_device_sysctl_post(if_ctx_t ctx)
5941300113Sscottl{
5942300113Sscottl	if_shared_ctx_t sctx = ctx->ifc_sctx;
5943300113Sscottl	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
5944300113Sscottl        device_t dev = iflib_get_dev(ctx);
5945300113Sscottl	struct sysctl_oid_list *child;
5946300113Sscottl	struct sysctl_ctx_list *ctx_list;
5947300113Sscottl	iflib_fl_t fl;
5948300113Sscottl	iflib_txq_t txq;
5949300113Sscottl	iflib_rxq_t rxq;
5950300113Sscottl	int i, j;
5951300113Sscottl	char namebuf[NAME_BUFLEN];
5952300113Sscottl	char *qfmt;
5953300113Sscottl	struct sysctl_oid *queue_node, *fl_node, *node;
5954300113Sscottl	struct sysctl_oid_list *queue_list, *fl_list;
5955300113Sscottl	ctx_list = device_get_sysctl_ctx(dev);
5956300113Sscottl
5957300113Sscottl	node = ctx->ifc_sysctl_node;
5958300113Sscottl	child = SYSCTL_CHILDREN(node);
5959300113Sscottl
5960300113Sscottl	if (scctx->isc_ntxqsets > 100)
5961300113Sscottl		qfmt = "txq%03d";
5962300113Sscottl	else if (scctx->isc_ntxqsets > 10)
5963300113Sscottl		qfmt = "txq%02d";
5964300113Sscottl	else
5965300113Sscottl		qfmt = "txq%d";
5966300113Sscottl	for (i = 0, txq = ctx->ifc_txqs; i < scctx->isc_ntxqsets; i++, txq++) {
5967300113Sscottl		snprintf(namebuf, NAME_BUFLEN, qfmt, i);
5968300113Sscottl		queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf,
5969300113Sscottl					     CTLFLAG_RD, NULL, "Queue Name");
5970300113Sscottl		queue_list = SYSCTL_CHILDREN(queue_node);
5971300113Sscottl#if MEMORY_LOGGING
5972300113Sscottl		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_dequeued",
5973300113Sscottl				CTLFLAG_RD,
5974300113Sscottl				&txq->ift_dequeued, "total mbufs freed");
5975300113Sscottl		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_enqueued",
5976300113Sscottl				CTLFLAG_RD,
5977300113Sscottl				&txq->ift_enqueued, "total mbufs enqueued");
5978300113Sscottl#endif
5979300113Sscottl		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag",
5980300113Sscottl				   CTLFLAG_RD,
5981300113Sscottl				   &txq->ift_mbuf_defrag, "# of times m_defrag was called");
5982300113Sscottl		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "m_pullups",
5983300113Sscottl				   CTLFLAG_RD,
5984300113Sscottl				   &txq->ift_pullups, "# of times m_pullup was called");
5985300113Sscottl		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag_failed",
5986300113Sscottl				   CTLFLAG_RD,
5987300113Sscottl				   &txq->ift_mbuf_defrag_failed, "# of times m_defrag failed");
5988300113Sscottl		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail",
5989300113Sscottl				   CTLFLAG_RD,
5990304704Sshurd				   &txq->ift_no_desc_avail, "# of times no descriptors were available");
5991300113Sscottl		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed",
5992300113Sscottl				   CTLFLAG_RD,
5993300113Sscottl				   &txq->ift_map_failed, "# of times dma map failed");
5994300113Sscottl		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txd_encap_efbig",
5995300113Sscottl				   CTLFLAG_RD,
5996300113Sscottl				   &txq->ift_txd_encap_efbig, "# of times txd_encap returned EFBIG");
5997300113Sscottl		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_tx_dma_setup",
5998300113Sscottl				   CTLFLAG_RD,
5999300113Sscottl				   &txq->ift_no_tx_dma_setup, "# of times map failed for other than EFBIG");
6000300113Sscottl		SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_pidx",
6001300113Sscottl				   CTLFLAG_RD,
6002300113Sscottl				   &txq->ift_pidx, 1, "Producer Index");
6003300113Sscottl		SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx",
6004300113Sscottl				   CTLFLAG_RD,
6005300113Sscottl				   &txq->ift_cidx, 1, "Consumer Index");
6006300113Sscottl		SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx_processed",
6007300113Sscottl				   CTLFLAG_RD,
6008300113Sscottl				   &txq->ift_cidx_processed, 1, "Consumer Index seen by credit update");
6009300113Sscottl		SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_in_use",
6010300113Sscottl				   CTLFLAG_RD,
6011300113Sscottl				   &txq->ift_in_use, 1, "descriptors in use");
6012300113Sscottl		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_processed",
6013300113Sscottl				   CTLFLAG_RD,
6014300113Sscottl				   &txq->ift_processed, "descriptors procesed for clean");
6015300113Sscottl		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_cleaned",
6016300113Sscottl				   CTLFLAG_RD,
6017300113Sscottl				   &txq->ift_cleaned, "total cleaned");
6018300113Sscottl		SYSCTL_ADD_PROC(ctx_list, queue_list, OID_AUTO, "ring_state",
6019333338Sshurd				CTLTYPE_STRING | CTLFLAG_RD, __DEVOLATILE(uint64_t *, &txq->ift_br->state),
6020300113Sscottl				0, mp_ring_state_handler, "A", "soft ring state");
6021300113Sscottl		SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_enqueues",
6022333338Sshurd				       CTLFLAG_RD, &txq->ift_br->enqueues,
6023300113Sscottl				       "# of enqueues to the mp_ring for this queue");
6024300113Sscottl		SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_drops",
6025333338Sshurd				       CTLFLAG_RD, &txq->ift_br->drops,
6026300113Sscottl				       "# of drops in the mp_ring for this queue");
6027300113Sscottl		SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_starts",
6028333338Sshurd				       CTLFLAG_RD, &txq->ift_br->starts,
6029300113Sscottl				       "# of normal consumer starts in the mp_ring for this queue");
6030300113Sscottl		SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_stalls",
6031333338Sshurd				       CTLFLAG_RD, &txq->ift_br->stalls,
6032300113Sscottl					       "# of consumer stalls in the mp_ring for this queue");
6033300113Sscottl		SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_restarts",
6034333338Sshurd			       CTLFLAG_RD, &txq->ift_br->restarts,
6035300113Sscottl				       "# of consumer restarts in the mp_ring for this queue");
6036300113Sscottl		SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_abdications",
6037333338Sshurd				       CTLFLAG_RD, &txq->ift_br->abdications,
6038300113Sscottl				       "# of consumer abdications in the mp_ring for this queue");
6039300113Sscottl	}
6040300113Sscottl
6041300113Sscottl	if (scctx->isc_nrxqsets > 100)
6042300113Sscottl		qfmt = "rxq%03d";
6043300113Sscottl	else if (scctx->isc_nrxqsets > 10)
6044300113Sscottl		qfmt = "rxq%02d";
6045300113Sscottl	else
6046300113Sscottl		qfmt = "rxq%d";
6047300113Sscottl	for (i = 0, rxq = ctx->ifc_rxqs; i < scctx->isc_nrxqsets; i++, rxq++) {
6048300113Sscottl		snprintf(namebuf, NAME_BUFLEN, qfmt, i);
6049300113Sscottl		queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf,
6050300113Sscottl					     CTLFLAG_RD, NULL, "Queue Name");
6051300113Sscottl		queue_list = SYSCTL_CHILDREN(queue_node);
6052304704Sshurd		if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
6053300113Sscottl			SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_pidx",
6054300113Sscottl				       CTLFLAG_RD,
6055300113Sscottl				       &rxq->ifr_cq_pidx, 1, "Producer Index");
6056300113Sscottl			SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_cidx",
6057300113Sscottl				       CTLFLAG_RD,
6058300113Sscottl				       &rxq->ifr_cq_cidx, 1, "Consumer Index");
6059300113Sscottl		}
6060333338Sshurd
6061300113Sscottl		for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) {
6062300113Sscottl			snprintf(namebuf, NAME_BUFLEN, "rxq_fl%d", j);
6063300113Sscottl			fl_node = SYSCTL_ADD_NODE(ctx_list, queue_list, OID_AUTO, namebuf,
6064300113Sscottl						     CTLFLAG_RD, NULL, "freelist Name");
6065300113Sscottl			fl_list = SYSCTL_CHILDREN(fl_node);
6066300113Sscottl			SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "pidx",
6067300113Sscottl				       CTLFLAG_RD,
6068300113Sscottl				       &fl->ifl_pidx, 1, "Producer Index");
6069300113Sscottl			SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "cidx",
6070300113Sscottl				       CTLFLAG_RD,
6071300113Sscottl				       &fl->ifl_cidx, 1, "Consumer Index");
6072300113Sscottl			SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "credits",
6073300113Sscottl				       CTLFLAG_RD,
6074300113Sscottl				       &fl->ifl_credits, 1, "credits available");
6075300113Sscottl#if MEMORY_LOGGING
6076300113Sscottl			SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_enqueued",
6077300113Sscottl					CTLFLAG_RD,
6078300113Sscottl					&fl->ifl_m_enqueued, "mbufs allocated");
6079300113Sscottl			SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_dequeued",
6080300113Sscottl					CTLFLAG_RD,
6081300113Sscottl					&fl->ifl_m_dequeued, "mbufs freed");
6082300113Sscottl			SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_enqueued",
6083300113Sscottl					CTLFLAG_RD,
6084300113Sscottl					&fl->ifl_cl_enqueued, "clusters allocated");
6085300113Sscottl			SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_dequeued",
6086300113Sscottl					CTLFLAG_RD,
6087300113Sscottl					&fl->ifl_cl_dequeued, "clusters freed");
6088300113Sscottl#endif
6089300113Sscottl
6090300113Sscottl		}
6091300113Sscottl	}
6092300113Sscottl
6093300113Sscottl}
6094333338Sshurd
6095347197Serjvoid
6096347197Serjiflib_request_reset(if_ctx_t ctx)
6097347197Serj{
6098347197Serj
6099347197Serj	STATE_LOCK(ctx);
6100347197Serj	ctx->ifc_flags |= IFC_DO_RESET;
6101347197Serj	STATE_UNLOCK(ctx);
6102347197Serj}
6103347197Serj
6104333338Sshurd#ifndef __NO_STRICT_ALIGNMENT
6105333338Sshurdstatic struct mbuf *
6106333338Sshurdiflib_fixup_rx(struct mbuf *m)
6107333338Sshurd{
6108333338Sshurd	struct mbuf *n;
6109333338Sshurd
6110333338Sshurd	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
6111333338Sshurd		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
6112333338Sshurd		m->m_data += ETHER_HDR_LEN;
6113333338Sshurd		n = m;
6114333338Sshurd	} else {
6115333338Sshurd		MGETHDR(n, M_NOWAIT, MT_DATA);
6116333338Sshurd		if (n == NULL) {
6117333338Sshurd			m_freem(m);
6118333338Sshurd			return (NULL);
6119333338Sshurd		}
6120333338Sshurd		bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
6121333338Sshurd		m->m_data += ETHER_HDR_LEN;
6122333338Sshurd		m->m_len -= ETHER_HDR_LEN;
6123333338Sshurd		n->m_len = ETHER_HDR_LEN;
6124333338Sshurd		M_MOVE_PKTHDR(n, m);
6125333338Sshurd		n->m_next = m;
6126333338Sshurd	}
6127333338Sshurd	return (n);
6128333338Sshurd}
6129333338Sshurd#endif
6130