sfxge_tx.c revision 284555
1132718Skan/*-
2169689Skan * Copyright (c) 2010-2015 Solarflare Communications Inc.
3132718Skan * All rights reserved.
4132718Skan *
5132718Skan * This software was developed in part by Philip Paeps under contract for
6132718Skan * Solarflare Communications, Inc.
7132718Skan *
8132718Skan * Redistribution and use in source and binary forms, with or without
9132718Skan * modification, are permitted provided that the following conditions are met:
10132718Skan *
11132718Skan * 1. Redistributions of source code must retain the above copyright notice,
12132718Skan *    this list of conditions and the following disclaimer.
13132718Skan * 2. Redistributions in binary form must reproduce the above copyright notice,
14132718Skan *    this list of conditions and the following disclaimer in the documentation
15132718Skan *    and/or other materials provided with the distribution.
16132718Skan *
17132718Skan * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18132718Skan * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19169689Skan * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20169689Skan * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21132718Skan * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22132718Skan * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23132718Skan * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24132718Skan * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25132718Skan * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26132718Skan * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27132718Skan * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28132718Skan *
29132718Skan * The views and conclusions contained in the software and documentation are
30132718Skan * those of the authors and should not be interpreted as representing official
31132718Skan * policies, either expressed or implied, of the FreeBSD Project.
32132718Skan */
33132718Skan
34132718Skan/* Theory of operation:
35132718Skan *
36132718Skan * Tx queues allocation and mapping
37132718Skan *
38132718Skan * One Tx queue with enabled checksum offload is allocated per Rx channel
39132718Skan * (event queue).  Also 2 Tx queues (one without checksum offload and one
40132718Skan * with IP checksum offload only) are allocated and bound to event queue 0.
41132718Skan * sfxge_txq_type is used as Tx queue label.
42132718Skan *
43132718Skan * So, event queue plus label mapping to Tx queue index is:
44132718Skan *	if event queue index is 0, TxQ-index = TxQ-label * [0..SFXGE_TXQ_NTYPES)
45132718Skan *	else TxQ-index = SFXGE_TXQ_NTYPES + EvQ-index - 1
46132718Skan * See sfxge_get_txq_by_label() sfxge_ev.c
47132718Skan */
48132718Skan
49132718Skan#include <sys/cdefs.h>
50132718Skan__FBSDID("$FreeBSD: stable/10/sys/dev/sfxge/sfxge_tx.c 284555 2015-06-18 15:46:39Z arybchik $");
51132718Skan
52132718Skan#include <sys/types.h>
53132718Skan#include <sys/mbuf.h>
54132718Skan#include <sys/smp.h>
55132718Skan#include <sys/socket.h>
56132718Skan#include <sys/sysctl.h>
57132718Skan#include <sys/syslog.h>
58169689Skan
59132718Skan#include <net/bpf.h>
60132718Skan#include <net/ethernet.h>
61132718Skan#include <net/if.h>
62132718Skan#include <net/if_vlan_var.h>
63169689Skan
64132718Skan#include <netinet/in.h>
65132718Skan#include <netinet/ip.h>
66132718Skan#include <netinet/ip6.h>
67169689Skan#include <netinet/tcp.h>
68132718Skan
69132718Skan#include "common/efx.h"
70132718Skan
71132718Skan#include "sfxge.h"
72132718Skan#include "sfxge_tx.h"
73132718Skan
74132718Skan
75169689Skan#define	SFXGE_PARAM_TX_DPL_GET_MAX	SFXGE_PARAM(tx_dpl_get_max)
76132718Skanstatic int sfxge_tx_dpl_get_max = SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT;
77132718SkanTUNABLE_INT(SFXGE_PARAM_TX_DPL_GET_MAX, &sfxge_tx_dpl_get_max);
78132718SkanSYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_get_max, CTLFLAG_RDTUN,
79132718Skan	   &sfxge_tx_dpl_get_max, 0,
80132718Skan	   "Maximum number of any packets in deferred packet get-list");
81132718Skan
82132718Skan#define	SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX \
83132718Skan	SFXGE_PARAM(tx_dpl_get_non_tcp_max)
84169689Skanstatic int sfxge_tx_dpl_get_non_tcp_max =
85169689Skan	SFXGE_TX_DPL_GET_NON_TCP_PKT_LIMIT_DEFAULT;
86169689SkanTUNABLE_INT(SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX, &sfxge_tx_dpl_get_non_tcp_max);
87132718SkanSYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_get_non_tcp_max, CTLFLAG_RDTUN,
88132718Skan	   &sfxge_tx_dpl_get_non_tcp_max, 0,
89132718Skan	   "Maximum number of non-TCP packets in deferred packet get-list");
90132718Skan
91132718Skan#define	SFXGE_PARAM_TX_DPL_PUT_MAX	SFXGE_PARAM(tx_dpl_put_max)
92132718Skanstatic int sfxge_tx_dpl_put_max = SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT;
93132718SkanTUNABLE_INT(SFXGE_PARAM_TX_DPL_PUT_MAX, &sfxge_tx_dpl_put_max);
94132718SkanSYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_put_max, CTLFLAG_RDTUN,
95132718Skan	   &sfxge_tx_dpl_put_max, 0,
96132718Skan	   "Maximum number of any packets in deferred packet put-list");
97132718Skan
98132718Skan#define	SFXGE_PARAM_TSO_FW_ASSISTED	SFXGE_PARAM(tso_fw_assisted)
99132718Skanstatic int sfxge_tso_fw_assisted = 1;
100132718SkanTUNABLE_INT(SFXGE_PARAM_TSO_FW_ASSISTED, &sfxge_tso_fw_assisted);
101132718SkanSYSCTL_INT(_hw_sfxge, OID_AUTO, tso_fw_assisted, CTLFLAG_RDTUN,
102132718Skan	   &sfxge_tso_fw_assisted, 0,
103132718Skan	   "Use FW-assisted TSO if supported by NIC firmware");
104132718Skan
105132718Skan
106132718Skanstatic const struct {
107132718Skan	const char *name;
108132718Skan	size_t offset;
109132718Skan} sfxge_tx_stats[] = {
110169689Skan#define	SFXGE_TX_STAT(name, member) \
111132718Skan	{ #name, offsetof(struct sfxge_txq, member) }
112169689Skan	SFXGE_TX_STAT(tso_bursts, tso_bursts),
113132718Skan	SFXGE_TX_STAT(tso_packets, tso_packets),
114132718Skan	SFXGE_TX_STAT(tso_long_headers, tso_long_headers),
115132718Skan	SFXGE_TX_STAT(tso_pdrop_too_many, tso_pdrop_too_many),
116132718Skan	SFXGE_TX_STAT(tso_pdrop_no_rsrc, tso_pdrop_no_rsrc),
117169689Skan	SFXGE_TX_STAT(tx_collapses, collapses),
118132718Skan	SFXGE_TX_STAT(tx_drops, drops),
119132718Skan	SFXGE_TX_STAT(tx_get_overflow, get_overflow),
120132718Skan	SFXGE_TX_STAT(tx_get_non_tcp_overflow, get_non_tcp_overflow),
121132718Skan	SFXGE_TX_STAT(tx_put_overflow, put_overflow),
122132718Skan	SFXGE_TX_STAT(tx_netdown_drops, netdown_drops),
123169689Skan};
124132718Skan
125169689Skan
126132718Skan/* Forward declarations. */
127132718Skanstatic void sfxge_tx_qdpl_service(struct sfxge_txq *txq);
128132718Skanstatic void sfxge_tx_qlist_post(struct sfxge_txq *txq);
129132718Skanstatic void sfxge_tx_qunblock(struct sfxge_txq *txq);
130169689Skanstatic int sfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf,
131132718Skan			      const bus_dma_segment_t *dma_seg, int n_dma_seg,
132132718Skan			      int vlan_tagged);
133132718Skan
134132718Skanstatic int
135132718Skansfxge_tx_maybe_insert_tag(struct sfxge_txq *txq, struct mbuf *mbuf)
136132718Skan{
137132718Skan	uint16_t this_tag = ((mbuf->m_flags & M_VLANTAG) ?
138132718Skan			     mbuf->m_pkthdr.ether_vtag :
139132718Skan			     0);
140132718Skan
141132718Skan	if (this_tag == txq->hw_vlan_tci)
142169689Skan		return (0);
143132718Skan
144132718Skan	efx_tx_qdesc_vlantci_create(txq->common,
145132718Skan				    bswap16(this_tag),
146169689Skan				    &txq->pend_desc[0]);
147132718Skan	txq->n_pend_desc = 1;
148132718Skan	txq->hw_vlan_tci = this_tag;
149132718Skan	return (1);
150169689Skan}
151132718Skan
152132718Skanstatic inline void
153132718Skansfxge_next_stmp(struct sfxge_txq *txq, struct sfxge_tx_mapping **pstmp)
154132718Skan{
155132718Skan	KASSERT((*pstmp)->flags == 0, ("stmp flags are not 0"));
156132718Skan	if (__predict_false(*pstmp ==
157132718Skan			    &txq->stmp[txq->ptr_mask]))
158132718Skan		*pstmp = &txq->stmp[0];
159132718Skan	else
160132718Skan		(*pstmp)++;
161169689Skan}
162132718Skan
163169689Skan
164132718Skanvoid
165132718Skansfxge_tx_qcomplete(struct sfxge_txq *txq, struct sfxge_evq *evq)
166132718Skan{
167132718Skan	unsigned int completed;
168169689Skan
169132718Skan	SFXGE_EVQ_LOCK_ASSERT_OWNED(evq);
170169689Skan
171169689Skan	completed = txq->completed;
172169689Skan	while (completed != txq->pending) {
173132718Skan		struct sfxge_tx_mapping *stmp;
174132718Skan		unsigned int id;
175132718Skan
176132718Skan		id = completed++ & txq->ptr_mask;
177132718Skan
178132718Skan		stmp = &txq->stmp[id];
179132718Skan		if (stmp->flags & TX_BUF_UNMAP) {
180132718Skan			bus_dmamap_unload(txq->packet_dma_tag, stmp->map);
181132718Skan			if (stmp->flags & TX_BUF_MBUF) {
182132718Skan				struct mbuf *m = stmp->u.mbuf;
183132718Skan				do
184132718Skan					m = m_free(m);
185132718Skan				while (m != NULL);
186132718Skan			} else {
187132718Skan				free(stmp->u.heap_buf, M_SFXGE);
188132718Skan			}
189132718Skan			stmp->flags = 0;
190132718Skan		}
191132718Skan	}
192132718Skan	txq->completed = completed;
193132718Skan
194132718Skan	/* Check whether we need to unblock the queue. */
195132718Skan	mb();
196132718Skan	if (txq->blocked) {
197132718Skan		unsigned int level;
198132718Skan
199132718Skan		level = txq->added - txq->completed;
200132718Skan		if (level <= SFXGE_TXQ_UNBLOCK_LEVEL(txq->entries))
201132718Skan			sfxge_tx_qunblock(txq);
202132718Skan	}
203132718Skan}
204132718Skan
205132718Skanstatic unsigned int
206132718Skansfxge_is_mbuf_non_tcp(struct mbuf *mbuf)
207132718Skan{
208132718Skan	/* Absense of TCP checksum flags does not mean that it is non-TCP
209132718Skan	 * but it should be true if user wants to achieve high throughput.
210132718Skan	 */
211132718Skan	return (!(mbuf->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)));
212132718Skan}
213132718Skan
214132718Skan/*
215132718Skan * Reorder the put list and append it to the get list.
216132718Skan */
217169689Skanstatic void
218132718Skansfxge_tx_qdpl_swizzle(struct sfxge_txq *txq)
219132718Skan{
220132718Skan	struct sfxge_tx_dpl *stdp;
221132718Skan	struct mbuf *mbuf, *get_next, **get_tailp;
222132718Skan	volatile uintptr_t *putp;
223132718Skan	uintptr_t put;
224132718Skan	unsigned int count;
225132718Skan	unsigned int non_tcp_count;
226132718Skan
227132718Skan	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
228132718Skan
229132718Skan	stdp = &txq->dpl;
230132718Skan
231132718Skan	/* Acquire the put list. */
232132718Skan	putp = &stdp->std_put;
233169689Skan	put = atomic_readandclear_ptr(putp);
234132718Skan	mbuf = (void *)put;
235132718Skan
236132718Skan	if (mbuf == NULL)
237132718Skan		return;
238132718Skan
239132718Skan	/* Reverse the put list. */
240132718Skan	get_tailp = &mbuf->m_nextpkt;
241132718Skan	get_next = NULL;
242132718Skan
243132718Skan	count = 0;
244132718Skan	non_tcp_count = 0;
245132718Skan	do {
246132718Skan		struct mbuf *put_next;
247132718Skan
248132718Skan		non_tcp_count += sfxge_is_mbuf_non_tcp(mbuf);
249132718Skan		put_next = mbuf->m_nextpkt;
250132718Skan		mbuf->m_nextpkt = get_next;
251132718Skan		get_next = mbuf;
252132718Skan		mbuf = put_next;
253132718Skan
254132718Skan		count++;
255132718Skan	} while (mbuf != NULL);
256132718Skan
257132718Skan	if (count > stdp->std_put_hiwat)
258132718Skan		stdp->std_put_hiwat = count;
259132718Skan
260132718Skan	/* Append the reversed put list to the get list. */
261132718Skan	KASSERT(*get_tailp == NULL, ("*get_tailp != NULL"));
262132718Skan	*stdp->std_getp = get_next;
263132718Skan	stdp->std_getp = get_tailp;
264132718Skan	stdp->std_get_count += count;
265132718Skan	stdp->std_get_non_tcp_count += non_tcp_count;
266132718Skan}
267132718Skan
268132718Skanstatic void
269132718Skansfxge_tx_qreap(struct sfxge_txq *txq)
270132718Skan{
271132718Skan	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
272132718Skan
273132718Skan	txq->reaped = txq->completed;
274132718Skan}
275132718Skan
276132718Skanstatic void
277132718Skansfxge_tx_qlist_post(struct sfxge_txq *txq)
278132718Skan{
279132718Skan	unsigned int old_added;
280132718Skan	unsigned int block_level;
281132718Skan	unsigned int level;
282132718Skan	int rc;
283132718Skan
284132718Skan	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
285132718Skan
286132718Skan	KASSERT(txq->n_pend_desc != 0, ("txq->n_pend_desc == 0"));
287132718Skan	KASSERT(txq->n_pend_desc <= txq->max_pkt_desc,
288132718Skan		("txq->n_pend_desc too large"));
289132718Skan	KASSERT(!txq->blocked, ("txq->blocked"));
290132718Skan
291132718Skan	old_added = txq->added;
292132718Skan
293132718Skan	/* Post the fragment list. */
294132718Skan	rc = efx_tx_qdesc_post(txq->common, txq->pend_desc, txq->n_pend_desc,
295132718Skan			  txq->reaped, &txq->added);
296169689Skan	KASSERT(rc == 0, ("efx_tx_qdesc_post() failed"));
297132718Skan
298132718Skan	/* If efx_tx_qdesc_post() had to refragment, our information about
299132718Skan	 * buffers to free may be associated with the wrong
300132718Skan	 * descriptors.
301132718Skan	 */
302169689Skan	KASSERT(txq->added - old_added == txq->n_pend_desc,
303132718Skan		("efx_tx_qdesc_post() refragmented descriptors"));
304132718Skan
305132718Skan	level = txq->added - txq->reaped;
306132718Skan	KASSERT(level <= txq->entries, ("overfilled TX queue"));
307132718Skan
308132718Skan	/* Clear the fragment list. */
309132718Skan	txq->n_pend_desc = 0;
310132718Skan
311132718Skan	/*
312132718Skan	 * Set the block level to ensure there is space to generate a
313132718Skan	 * large number of descriptors for TSO.
314132718Skan	 */
315132718Skan	block_level = EFX_TXQ_LIMIT(txq->entries) - txq->max_pkt_desc;
316132718Skan
317132718Skan	/* Have we reached the block level? */
318132718Skan	if (level < block_level)
319132718Skan		return;
320132718Skan
321132718Skan	/* Reap, and check again */
322132718Skan	sfxge_tx_qreap(txq);
323132718Skan	level = txq->added - txq->reaped;
324132718Skan	if (level < block_level)
325132718Skan		return;
326132718Skan
327132718Skan	txq->blocked = 1;
328132718Skan
329169689Skan	/*
330132718Skan	 * Avoid a race with completion interrupt handling that could leave
331132718Skan	 * the queue blocked.
332132718Skan	 */
333132718Skan	mb();
334169689Skan	sfxge_tx_qreap(txq);
335132718Skan	level = txq->added - txq->reaped;
336132718Skan	if (level < block_level) {
337132718Skan		mb();
338132718Skan		txq->blocked = 0;
339132718Skan	}
340132718Skan}
341132718Skan
342132718Skanstatic int sfxge_tx_queue_mbuf(struct sfxge_txq *txq, struct mbuf *mbuf)
343132718Skan{
344132718Skan	bus_dmamap_t *used_map;
345132718Skan	bus_dmamap_t map;
346132718Skan	bus_dma_segment_t dma_seg[SFXGE_TX_MAPPING_MAX_SEG];
347132718Skan	unsigned int id;
348132718Skan	struct sfxge_tx_mapping *stmp;
349132718Skan	efx_desc_t *desc;
350132718Skan	int n_dma_seg;
351132718Skan	int rc;
352132718Skan	int i;
353132718Skan	int eop;
354132718Skan	int vlan_tagged;
355132718Skan
356132718Skan	KASSERT(!txq->blocked, ("txq->blocked"));
357132718Skan
358132718Skan	if (mbuf->m_pkthdr.csum_flags & CSUM_TSO)
359132718Skan		prefetch_read_many(mbuf->m_data);
360132718Skan
361132718Skan	if (__predict_false(txq->init_state != SFXGE_TXQ_STARTED)) {
362132718Skan		rc = EINTR;
363132718Skan		goto reject;
364132718Skan	}
365132718Skan
366132718Skan	/* Load the packet for DMA. */
367132718Skan	id = txq->added & txq->ptr_mask;
368132718Skan	stmp = &txq->stmp[id];
369132718Skan	rc = bus_dmamap_load_mbuf_sg(txq->packet_dma_tag, stmp->map,
370132718Skan				     mbuf, dma_seg, &n_dma_seg, 0);
371132718Skan	if (rc == EFBIG) {
372169689Skan		/* Try again. */
373132718Skan		struct mbuf *new_mbuf = m_collapse(mbuf, M_NOWAIT,
374132718Skan						   SFXGE_TX_MAPPING_MAX_SEG);
375132718Skan		if (new_mbuf == NULL)
376132718Skan			goto reject;
377132718Skan		++txq->collapses;
378169689Skan		mbuf = new_mbuf;
379132718Skan		rc = bus_dmamap_load_mbuf_sg(txq->packet_dma_tag,
380132718Skan					     stmp->map, mbuf,
381132718Skan					     dma_seg, &n_dma_seg, 0);
382132718Skan	}
383132718Skan	if (rc != 0)
384132718Skan		goto reject;
385132718Skan
386132718Skan	/* Make the packet visible to the hardware. */
387132718Skan	bus_dmamap_sync(txq->packet_dma_tag, stmp->map, BUS_DMASYNC_PREWRITE);
388132718Skan
389132718Skan	used_map = &stmp->map;
390132718Skan
391132718Skan	vlan_tagged = sfxge_tx_maybe_insert_tag(txq, mbuf);
392132718Skan	if (vlan_tagged) {
393132718Skan		sfxge_next_stmp(txq, &stmp);
394132718Skan	}
395132718Skan	if (mbuf->m_pkthdr.csum_flags & CSUM_TSO) {
396132718Skan		rc = sfxge_tx_queue_tso(txq, mbuf, dma_seg, n_dma_seg, vlan_tagged);
397132718Skan		if (rc < 0)
398169689Skan			goto reject_mapped;
399169689Skan		stmp = &txq->stmp[(rc - 1) & txq->ptr_mask];
400169689Skan	} else {
401132718Skan		/* Add the mapping to the fragment list, and set flags
402132718Skan		 * for the buffer.
403132718Skan		 */
404132718Skan
405132718Skan		i = 0;
406132718Skan		for (;;) {
407169689Skan			desc = &txq->pend_desc[i + vlan_tagged];
408132718Skan			eop = (i == n_dma_seg - 1);
409132718Skan			efx_tx_qdesc_dma_create(txq->common,
410132718Skan						dma_seg[i].ds_addr,
411132718Skan						dma_seg[i].ds_len,
412132718Skan						eop,
413169689Skan						desc);
414132718Skan			if (eop)
415132718Skan				break;
416132718Skan			i++;
417169689Skan			sfxge_next_stmp(txq, &stmp);
418169689Skan		}
419132718Skan		txq->n_pend_desc = n_dma_seg + vlan_tagged;
420132718Skan	}
421132718Skan
422132718Skan	/*
423132718Skan	 * If the mapping required more than one descriptor
424132718Skan	 * then we need to associate the DMA map with the last
425132718Skan	 * descriptor, not the first.
426132718Skan	 */
427132718Skan	if (used_map != &stmp->map) {
428132718Skan		map = stmp->map;
429132718Skan		stmp->map = *used_map;
430132718Skan		*used_map = map;
431132718Skan	}
432132718Skan
433132718Skan	stmp->u.mbuf = mbuf;
434169689Skan	stmp->flags = TX_BUF_UNMAP | TX_BUF_MBUF;
435132718Skan
436169689Skan	/* Post the fragment list. */
437132718Skan	sfxge_tx_qlist_post(txq);
438132718Skan
439132718Skan	return (0);
440169689Skan
441169689Skanreject_mapped:
442132718Skan	bus_dmamap_unload(txq->packet_dma_tag, *used_map);
443169689Skanreject:
444132718Skan	/* Drop the packet on the floor. */
445132718Skan	m_freem(mbuf);
446132718Skan	++txq->drops;
447132718Skan
448169689Skan	return (rc);
449169689Skan}
450169689Skan
451169689Skan/*
452132718Skan * Drain the deferred packet list into the transmit queue.
453132718Skan */
454132718Skanstatic void
455132718Skansfxge_tx_qdpl_drain(struct sfxge_txq *txq)
456132718Skan{
457132718Skan	struct sfxge_softc *sc;
458132718Skan	struct sfxge_tx_dpl *stdp;
459132718Skan	struct mbuf *mbuf, *next;
460132718Skan	unsigned int count;
461169689Skan	unsigned int non_tcp_count;
462132718Skan	unsigned int pushed;
463132718Skan	int rc;
464169689Skan
465169689Skan	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
466132718Skan
467132718Skan	sc = txq->sc;
468132718Skan	stdp = &txq->dpl;
469132718Skan	pushed = txq->added;
470132718Skan
471132718Skan	if (__predict_true(txq->init_state == SFXGE_TXQ_STARTED)) {
472169689Skan		prefetch_read_many(sc->enp);
473132718Skan		prefetch_read_many(txq->common);
474132718Skan	}
475132718Skan
476132718Skan	mbuf = stdp->std_get;
477169689Skan	count = stdp->std_get_count;
478132718Skan	non_tcp_count = stdp->std_get_non_tcp_count;
479132718Skan
480132718Skan	if (count > stdp->std_get_hiwat)
481132718Skan		stdp->std_get_hiwat = count;
482132718Skan
483132718Skan	while (count != 0) {
484132718Skan		KASSERT(mbuf != NULL, ("mbuf == NULL"));
485132718Skan
486132718Skan		next = mbuf->m_nextpkt;
487132718Skan		mbuf->m_nextpkt = NULL;
488132718Skan
489132718Skan		ETHER_BPF_MTAP(sc->ifnet, mbuf); /* packet capture */
490169689Skan
491132718Skan		if (next != NULL)
492169689Skan			prefetch_read_many(next);
493132718Skan
494132718Skan		rc = sfxge_tx_queue_mbuf(txq, mbuf);
495132718Skan		--count;
496132718Skan		non_tcp_count -= sfxge_is_mbuf_non_tcp(mbuf);
497132718Skan		mbuf = next;
498132718Skan		if (rc != 0)
499132718Skan			continue;
500132718Skan
501132718Skan		if (txq->blocked)
502132718Skan			break;
503132718Skan
504132718Skan		/* Push the fragments to the hardware in batches. */
505132718Skan		if (txq->added - pushed >= SFXGE_TX_BATCH) {
506132718Skan			efx_tx_qpush(txq->common, txq->added, pushed);
507132718Skan			pushed = txq->added;
508132718Skan		}
509132718Skan	}
510132718Skan
511132718Skan	if (count == 0) {
512132718Skan		KASSERT(mbuf == NULL, ("mbuf != NULL"));
513132718Skan		KASSERT(non_tcp_count == 0,
514132718Skan			("inconsistent TCP/non-TCP detection"));
515132718Skan		stdp->std_get = NULL;
516169689Skan		stdp->std_get_count = 0;
517132718Skan		stdp->std_get_non_tcp_count = 0;
518132718Skan		stdp->std_getp = &stdp->std_get;
519169689Skan	} else {
520132718Skan		stdp->std_get = mbuf;
521132718Skan		stdp->std_get_count = count;
522169689Skan		stdp->std_get_non_tcp_count = non_tcp_count;
523132718Skan	}
524132718Skan
525132718Skan	if (txq->added != pushed)
526132718Skan		efx_tx_qpush(txq->common, txq->added, pushed);
527132718Skan
528132718Skan	KASSERT(txq->blocked || stdp->std_get_count == 0,
529132718Skan		("queue unblocked but count is non-zero"));
530132718Skan}
531132718Skan
532132718Skan#define	SFXGE_TX_QDPL_PENDING(_txq)	((_txq)->dpl.std_put != 0)
533132718Skan
534132718Skan/*
535132718Skan * Service the deferred packet list.
536169689Skan *
537132718Skan * NOTE: drops the txq mutex!
538132718Skan */
539132718Skanstatic void
540132718Skansfxge_tx_qdpl_service(struct sfxge_txq *txq)
541132718Skan{
542132718Skan	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
543132718Skan
544132718Skan	do {
545132718Skan		if (SFXGE_TX_QDPL_PENDING(txq))
546132718Skan			sfxge_tx_qdpl_swizzle(txq);
547132718Skan
548132718Skan		if (!txq->blocked)
549132718Skan			sfxge_tx_qdpl_drain(txq);
550132718Skan
551132718Skan		SFXGE_TXQ_UNLOCK(txq);
552169689Skan	} while (SFXGE_TX_QDPL_PENDING(txq) &&
553169689Skan		 SFXGE_TXQ_TRYLOCK(txq));
554169689Skan}
555169689Skan
556169689Skan/*
557169689Skan * Put a packet on the deferred packet get-list.
558169689Skan */
559169689Skanstatic int
560169689Skansfxge_tx_qdpl_put_locked(struct sfxge_txq *txq, struct mbuf *mbuf)
561169689Skan{
562169689Skan	struct sfxge_tx_dpl *stdp;
563169689Skan
564169689Skan	stdp = &txq->dpl;
565169689Skan
566169689Skan	KASSERT(mbuf->m_nextpkt == NULL, ("mbuf->m_nextpkt != NULL"));
567169689Skan
568169689Skan	SFXGE_TXQ_LOCK_ASSERT_OWNED(txq);
569132718Skan
570132718Skan	if (stdp->std_get_count >= stdp->std_get_max) {
571132718Skan		txq->get_overflow++;
572132718Skan		return (ENOBUFS);
573132718Skan	}
574132718Skan	if (sfxge_is_mbuf_non_tcp(mbuf)) {
575132718Skan		if (stdp->std_get_non_tcp_count >=
576132718Skan		    stdp->std_get_non_tcp_max) {
577132718Skan			txq->get_non_tcp_overflow++;
578132718Skan			return (ENOBUFS);
579132718Skan		}
580132718Skan		stdp->std_get_non_tcp_count++;
581169689Skan	}
582132718Skan
583169689Skan	*(stdp->std_getp) = mbuf;
584132718Skan	stdp->std_getp = &mbuf->m_nextpkt;
585132718Skan	stdp->std_get_count++;
586132718Skan
587132718Skan	return (0);
588132718Skan}
589132718Skan
590132718Skan/*
591132718Skan * Put a packet on the deferred packet put-list.
592169689Skan *
593132718Skan * We overload the csum_data field in the mbuf to keep track of this length
594132718Skan * because there is no cheap alternative to avoid races.
595132718Skan */
596132718Skanstatic int
597132718Skansfxge_tx_qdpl_put_unlocked(struct sfxge_txq *txq, struct mbuf *mbuf)
598132718Skan{
599132718Skan	struct sfxge_tx_dpl *stdp;
600132718Skan	volatile uintptr_t *putp;
601132718Skan	uintptr_t old;
602132718Skan	uintptr_t new;
603132718Skan	unsigned old_len;
604132718Skan
605169689Skan	KASSERT(mbuf->m_nextpkt == NULL, ("mbuf->m_nextpkt != NULL"));
606132718Skan
607132718Skan	SFXGE_TXQ_LOCK_ASSERT_NOTOWNED(txq);
608132718Skan
609132718Skan	stdp = &txq->dpl;
610169689Skan	putp = &stdp->std_put;
611132718Skan	new = (uintptr_t)mbuf;
612132718Skan
613132718Skan	do {
614132718Skan		old = *putp;
615132718Skan		if (old != 0) {
616132718Skan			struct mbuf *mp = (struct mbuf *)old;
617132718Skan			old_len = mp->m_pkthdr.csum_data;
618132718Skan		} else
619132718Skan			old_len = 0;
620132718Skan		if (old_len >= stdp->std_put_max) {
621132718Skan			atomic_add_long(&txq->put_overflow, 1);
622132718Skan			return (ENOBUFS);
623132718Skan		}
624132718Skan		mbuf->m_pkthdr.csum_data = old_len + 1;
625132718Skan		mbuf->m_nextpkt = (void *)old;
626132718Skan	} while (atomic_cmpset_ptr(putp, old, new) == 0);
627132718Skan
628132718Skan	return (0);
629132718Skan}
630132718Skan
631132718Skan/*
632132718Skan * Called from if_transmit - will try to grab the txq lock and enqueue to the
633132718Skan * put list if it succeeds, otherwise try to push onto the defer list if space.
634132718Skan */
635132718Skanstatic int
636132718Skansfxge_tx_packet_add(struct sfxge_txq *txq, struct mbuf *m)
637132718Skan{
638169689Skan	int rc;
639169689Skan
640169689Skan	if (!SFXGE_LINK_UP(txq->sc)) {
641132718Skan		atomic_add_long(&txq->netdown_drops, 1);
642132718Skan		return (ENETDOWN);
643132718Skan	}
644132718Skan
645132718Skan	/*
646132718Skan	 * Try to grab the txq lock.  If we are able to get the lock,
647132718Skan	 * the packet will be appended to the "get list" of the deferred
648132718Skan	 * packet list.  Otherwise, it will be pushed on the "put list".
649132718Skan	 */
650132718Skan	if (SFXGE_TXQ_TRYLOCK(txq)) {
651169689Skan		/* First swizzle put-list to get-list to keep order */
652132718Skan		sfxge_tx_qdpl_swizzle(txq);
653132718Skan
654132718Skan		rc = sfxge_tx_qdpl_put_locked(txq, m);
655132718Skan
656132718Skan		/* Try to service the list. */
657132718Skan		sfxge_tx_qdpl_service(txq);
658132718Skan		/* Lock has been dropped. */
659132718Skan	} else {
660132718Skan		rc = sfxge_tx_qdpl_put_unlocked(txq, m);
661132718Skan
662132718Skan		/*
663132718Skan		 * Try to grab the lock again.
664132718Skan		 *
665132718Skan		 * If we are able to get the lock, we need to process
666132718Skan		 * the deferred packet list.  If we are not able to get
667132718Skan		 * the lock, another thread is processing the list.
668132718Skan		 */
669132718Skan		if ((rc == 0) && SFXGE_TXQ_TRYLOCK(txq)) {
670132718Skan			sfxge_tx_qdpl_service(txq);
671132718Skan			/* Lock has been dropped. */
672132718Skan		}
673132718Skan	}
674132718Skan
675132718Skan	SFXGE_TXQ_LOCK_ASSERT_NOTOWNED(txq);
676132718Skan
677132718Skan	return (rc);
678132718Skan}
679132718Skan
680132718Skanstatic void
681132718Skansfxge_tx_qdpl_flush(struct sfxge_txq *txq)
682132718Skan{
683132718Skan	struct sfxge_tx_dpl *stdp = &txq->dpl;
684132718Skan	struct mbuf *mbuf, *next;
685132718Skan
686132718Skan	SFXGE_TXQ_LOCK(txq);
687132718Skan
688132718Skan	sfxge_tx_qdpl_swizzle(txq);
689132718Skan	for (mbuf = stdp->std_get; mbuf != NULL; mbuf = next) {
690132718Skan		next = mbuf->m_nextpkt;
691132718Skan		m_freem(mbuf);
692132718Skan	}
693132718Skan	stdp->std_get = NULL;
694132718Skan	stdp->std_get_count = 0;
695132718Skan	stdp->std_get_non_tcp_count = 0;
696132718Skan	stdp->std_getp = &stdp->std_get;
697132718Skan
698132718Skan	SFXGE_TXQ_UNLOCK(txq);
699132718Skan}
700132718Skan
701132718Skanvoid
702132718Skansfxge_if_qflush(struct ifnet *ifp)
703132718Skan{
704132718Skan	struct sfxge_softc *sc;
705132718Skan	unsigned int i;
706169689Skan
707169689Skan	sc = ifp->if_softc;
708169689Skan
709169689Skan	for (i = 0; i < sc->txq_count; i++)
710169689Skan		sfxge_tx_qdpl_flush(sc->txq[i]);
711169689Skan}
712132718Skan
713132718Skan/*
714132718Skan * TX start -- called by the stack.
715132718Skan */
716132718Skanint
717132718Skansfxge_if_transmit(struct ifnet *ifp, struct mbuf *m)
718132718Skan{
719132718Skan	struct sfxge_softc *sc;
720132718Skan	struct sfxge_txq *txq;
721132718Skan	int rc;
722132718Skan
723169689Skan	sc = (struct sfxge_softc *)ifp->if_softc;
724169689Skan
725169689Skan	/*
726169689Skan	 * Transmit may be called when interface is up from the kernel
727132718Skan	 * point of view, but not yet up (in progress) from the driver
728132718Skan	 * point of view. I.e. link aggregation bring up.
729132718Skan	 * Transmit may be called when interface is up from the driver
730132718Skan	 * point of view, but already down from the kernel point of
731132718Skan	 * view. I.e. Rx when interface shutdown is in progress.
732169689Skan	 */
733132718Skan	KASSERT((ifp->if_flags & IFF_UP) || (sc->if_flags & IFF_UP),
734132718Skan		("interface not up"));
735132718Skan
736132718Skan	/* Pick the desired transmit queue. */
737132718Skan	if (m->m_pkthdr.csum_flags &
738132718Skan	    (CSUM_DELAY_DATA | CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_TSO)) {
739132718Skan		int index = 0;
740132718Skan
741132718Skan		/* check if flowid is set */
742132718Skan		if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
743169689Skan			uint32_t hash = m->m_pkthdr.flowid;
744132718Skan
745132718Skan			index = sc->rx_indir_table[hash % SFXGE_RX_SCALE_MAX];
746132718Skan		}
747132718Skan		txq = sc->txq[SFXGE_TXQ_IP_TCP_UDP_CKSUM + index];
748132718Skan	} else if (m->m_pkthdr.csum_flags & CSUM_DELAY_IP) {
749132718Skan		txq = sc->txq[SFXGE_TXQ_IP_CKSUM];
750132718Skan	} else {
751132718Skan		txq = sc->txq[SFXGE_TXQ_NON_CKSUM];
752132718Skan	}
753132718Skan
754132718Skan	rc = sfxge_tx_packet_add(txq, m);
755132718Skan	if (rc != 0)
756132718Skan		m_freem(m);
757169689Skan
758132718Skan	return (rc);
759132718Skan}
760132718Skan
761132718Skan/*
762132718Skan * Software "TSO".  Not quite as good as doing it in hardware, but
763132718Skan * still faster than segmenting in the stack.
764132718Skan */
765132718Skan
766132718Skanstruct sfxge_tso_state {
767132718Skan	/* Output position */
768132718Skan	unsigned out_len;	/* Remaining length in current segment */
769132718Skan	unsigned seqnum;	/* Current sequence number */
770132718Skan	unsigned packet_space;	/* Remaining space in current packet */
771132718Skan
772132718Skan	/* Input position */
773132718Skan	uint64_t dma_addr;	/* DMA address of current position */
774132718Skan	unsigned in_len;	/* Remaining length in current mbuf */
775132718Skan
776132718Skan	const struct mbuf *mbuf; /* Input mbuf (head of chain) */
777132718Skan	u_short protocol;	/* Network protocol (after VLAN decap) */
778132718Skan	ssize_t nh_off;		/* Offset of network header */
779132718Skan	ssize_t tcph_off;	/* Offset of TCP header */
780132718Skan	unsigned header_len;	/* Number of bytes of header */
781132718Skan	unsigned seg_size;	/* TCP segment size */
782132718Skan	int fw_assisted;	/* Use FW-assisted TSO */
783132718Skan	u_short packet_id;	/* IPv4 packet ID from the original packet */
784132718Skan	efx_desc_t header_desc; /* Precomputed header descriptor for
785132718Skan				 * FW-assisted TSO */
786132718Skan};
787132718Skan
788132718Skanstatic const struct ip *tso_iph(const struct sfxge_tso_state *tso)
789132718Skan{
790222082Sbenl	KASSERT(tso->protocol == htons(ETHERTYPE_IP),
791132718Skan		("tso_iph() in non-IPv4 state"));
792132718Skan	return (const struct ip *)(tso->mbuf->m_data + tso->nh_off);
793132718Skan}
794132718Skanstatic __unused const struct ip6_hdr *tso_ip6h(const struct sfxge_tso_state *tso)
795132718Skan{
796132718Skan	KASSERT(tso->protocol == htons(ETHERTYPE_IPV6),
797132718Skan		("tso_ip6h() in non-IPv6 state"));
798132718Skan	return (const struct ip6_hdr *)(tso->mbuf->m_data + tso->nh_off);
799132718Skan}
800132718Skanstatic const struct tcphdr *tso_tcph(const struct sfxge_tso_state *tso)
801132718Skan{
802132718Skan	return (const struct tcphdr *)(tso->mbuf->m_data + tso->tcph_off);
803132718Skan}
804132718Skan
805132718Skan/* Size of preallocated TSO header buffers.  Larger blocks must be
806169689Skan * allocated from the heap.
807132718Skan */
808132718Skan#define	TSOH_STD_SIZE	128
809132718Skan
810132718Skan/* At most half the descriptors in the queue at any time will refer to
811132718Skan * a TSO header buffer, since they must always be followed by a
812132718Skan * payload descriptor referring to an mbuf.
813132718Skan */
814132718Skan#define	TSOH_COUNT(_txq_entries)	((_txq_entries) / 2u)
815132718Skan#define	TSOH_PER_PAGE	(PAGE_SIZE / TSOH_STD_SIZE)
816132718Skan#define	TSOH_PAGE_COUNT(_txq_entries)	\
817132718Skan	((TSOH_COUNT(_txq_entries) + TSOH_PER_PAGE - 1) / TSOH_PER_PAGE)
818132718Skan
819132718Skanstatic int tso_init(struct sfxge_txq *txq)
820132718Skan{
821132718Skan	struct sfxge_softc *sc = txq->sc;
822132718Skan	unsigned int tsoh_page_count = TSOH_PAGE_COUNT(sc->txq_entries);
823132718Skan	int i, rc;
824132718Skan
825222082Sbenl	/* Allocate TSO header buffers */
826132718Skan	txq->tsoh_buffer = malloc(tsoh_page_count * sizeof(txq->tsoh_buffer[0]),
827132718Skan				  M_SFXGE, M_WAITOK);
828132718Skan
829132718Skan	for (i = 0; i < tsoh_page_count; i++) {
830132718Skan		rc = sfxge_dma_alloc(sc, PAGE_SIZE, &txq->tsoh_buffer[i]);
831132718Skan		if (rc != 0)
832132718Skan			goto fail;
833132718Skan	}
834132718Skan
835132718Skan	return (0);
836132718Skan
837132718Skanfail:
838132718Skan	while (i-- > 0)
839132718Skan		sfxge_dma_free(&txq->tsoh_buffer[i]);
840132718Skan	free(txq->tsoh_buffer, M_SFXGE);
841132718Skan	txq->tsoh_buffer = NULL;
842132718Skan	return (rc);
843132718Skan}
844132718Skan
845132718Skanstatic void tso_fini(struct sfxge_txq *txq)
846132718Skan{
847132718Skan	int i;
848132718Skan
849132718Skan	if (txq->tsoh_buffer != NULL) {
850132718Skan		for (i = 0; i < TSOH_PAGE_COUNT(txq->sc->txq_entries); i++)
851132718Skan			sfxge_dma_free(&txq->tsoh_buffer[i]);
852132718Skan		free(txq->tsoh_buffer, M_SFXGE);
853132718Skan	}
854132718Skan}
855132718Skan
856132718Skanstatic void tso_start(struct sfxge_txq *txq, struct sfxge_tso_state *tso,
857132718Skan		      const bus_dma_segment_t *hdr_dma_seg,
858132718Skan		      struct mbuf *mbuf)
859132718Skan{
860132718Skan	struct ether_header *eh = mtod(mbuf, struct ether_header *);
861132718Skan	const efx_nic_cfg_t *encp = efx_nic_cfg_get(txq->sc->enp);
862132718Skan	const struct tcphdr *th;
863132718Skan	struct tcphdr th_copy;
864132718Skan
865132718Skan	tso->fw_assisted = txq->sc->tso_fw_assisted;
866132718Skan	tso->mbuf = mbuf;
867132718Skan
868132718Skan	/* Find network protocol and header */
869132718Skan	tso->protocol = eh->ether_type;
870132718Skan	if (tso->protocol == htons(ETHERTYPE_VLAN)) {
871132718Skan		struct ether_vlan_header *veh =
872132718Skan			mtod(mbuf, struct ether_vlan_header *);
873132718Skan		tso->protocol = veh->evl_proto;
874132718Skan		tso->nh_off = sizeof(*veh);
875132718Skan	} else {
876132718Skan		tso->nh_off = sizeof(*eh);
877132718Skan	}
878132718Skan
879132718Skan	/* Find TCP header */
880132718Skan	if (tso->protocol == htons(ETHERTYPE_IP)) {
881132718Skan		KASSERT(tso_iph(tso)->ip_p == IPPROTO_TCP,
882132718Skan			("TSO required on non-TCP packet"));
883132718Skan		tso->tcph_off = tso->nh_off + 4 * tso_iph(tso)->ip_hl;
884132718Skan		tso->packet_id = tso_iph(tso)->ip_id;
885132718Skan	} else {
886132718Skan		KASSERT(tso->protocol == htons(ETHERTYPE_IPV6),
887132718Skan			("TSO required on non-IP packet"));
888132718Skan		KASSERT(tso_ip6h(tso)->ip6_nxt == IPPROTO_TCP,
889132718Skan			("TSO required on non-TCP packet"));
890132718Skan		tso->tcph_off = tso->nh_off + sizeof(struct ip6_hdr);
891132718Skan		tso->packet_id = 0;
892132718Skan	}
893132718Skan	if (tso->fw_assisted &&
894132718Skan	    __predict_false(tso->tcph_off >
895132718Skan			    encp->enc_tx_tso_tcp_header_offset_limit)) {
896132718Skan		tso->fw_assisted = 0;
897132718Skan	}
898132718Skan
899132718Skan	KASSERT(mbuf->m_len >= tso->tcph_off,
900132718Skan		("network header is fragmented in mbuf"));
901132718Skan	/* We need TCP header including flags (window is the next) */
902132718Skan	if (mbuf->m_len < tso->tcph_off + offsetof(struct tcphdr, th_win)) {
903132718Skan		m_copydata(tso->mbuf, tso->tcph_off, sizeof(th_copy),
904132718Skan			   (caddr_t)&th_copy);
905132718Skan		th = &th_copy;
906132718Skan	} else {
907169689Skan		th = tso_tcph(tso);
908132718Skan	}
909132718Skan
910132718Skan	tso->header_len = tso->tcph_off + 4 * th->th_off;
911132718Skan	tso->seg_size = mbuf->m_pkthdr.tso_segsz;
912132718Skan
913132718Skan	tso->seqnum = ntohl(th->th_seq);
914132718Skan
915132718Skan	/* These flags must not be duplicated */
916132718Skan	/*
917132718Skan	 * RST should not be duplicated as well, but FreeBSD kernel
918132718Skan	 * generates TSO packets with RST flag. So, do not assert
919132718Skan	 * its absence.
920132718Skan	 */
921169689Skan	KASSERT(!(th->th_flags & (TH_URG | TH_SYN)),
922132718Skan		("incompatible TCP flag 0x%x on TSO packet",
923132718Skan		 th->th_flags & (TH_URG | TH_SYN)));
924132718Skan
925132718Skan	tso->out_len = mbuf->m_pkthdr.len - tso->header_len;
926132718Skan
927169689Skan	if (tso->fw_assisted) {
928132718Skan		if (hdr_dma_seg->ds_len >= tso->header_len)
929132718Skan			efx_tx_qdesc_dma_create(txq->common,
930132718Skan						hdr_dma_seg->ds_addr,
931132718Skan						tso->header_len,
932132718Skan						B_FALSE,
933132718Skan						&tso->header_desc);
934132718Skan		else
935169689Skan			tso->fw_assisted = 0;
936132718Skan	}
937132718Skan}
938132718Skan
939132718Skan/*
940132718Skan * tso_fill_packet_with_fragment - form descriptors for the current fragment
941132718Skan *
942132718Skan * Form descriptors for the current fragment, until we reach the end
943132718Skan * of fragment or end-of-packet.  Return 0 on success, 1 if not enough
944132718Skan * space.
945132718Skan */
946132718Skanstatic void tso_fill_packet_with_fragment(struct sfxge_txq *txq,
947132718Skan					  struct sfxge_tso_state *tso)
948132718Skan{
949132718Skan	efx_desc_t *desc;
950132718Skan	int n;
951132718Skan
952169689Skan	if (tso->in_len == 0 || tso->packet_space == 0)
953132718Skan		return;
954132718Skan
955132718Skan	KASSERT(tso->in_len > 0, ("TSO input length went negative"));
956132718Skan	KASSERT(tso->packet_space > 0, ("TSO packet space went negative"));
957132718Skan
958132718Skan	n = min(tso->in_len, tso->packet_space);
959132718Skan
960132718Skan	tso->packet_space -= n;
961132718Skan	tso->out_len -= n;
962132718Skan	tso->in_len -= n;
963132718Skan
964132718Skan	desc = &txq->pend_desc[txq->n_pend_desc++];
965132718Skan	efx_tx_qdesc_dma_create(txq->common,
966132718Skan				tso->dma_addr,
967169689Skan				n,
968132718Skan				tso->out_len == 0 || tso->packet_space == 0,
969132718Skan				desc);
970132718Skan
971132718Skan	tso->dma_addr += n;
972132718Skan}
973132718Skan
974132718Skan/* Callback from bus_dmamap_load() for long TSO headers. */
975132718Skanstatic void tso_map_long_header(void *dma_addr_ret,
976132718Skan				bus_dma_segment_t *segs, int nseg,
977132718Skan				int error)
978132718Skan{
979132718Skan	*(uint64_t *)dma_addr_ret = ((__predict_true(error == 0) &&
980132718Skan				      __predict_true(nseg == 1)) ?
981169689Skan				     segs->ds_addr : 0);
982132718Skan}
983132718Skan
984132718Skan/*
985132718Skan * tso_start_new_packet - generate a new header and prepare for the new packet
986132718Skan *
987132718Skan * Generate a new header and prepare for the new packet.  Return 0 on
988132718Skan * success, or an error code if failed to alloc header.
989132718Skan */
990132718Skanstatic int tso_start_new_packet(struct sfxge_txq *txq,
991132718Skan				struct sfxge_tso_state *tso,
992169689Skan				unsigned int *idp)
993169689Skan{
994169689Skan	unsigned int id = *idp;
995169689Skan	struct tcphdr *tsoh_th;
996169689Skan	unsigned ip_length;
997169689Skan	caddr_t header;
998169689Skan	uint64_t dma_addr;
999169689Skan	bus_dmamap_t map;
1000169689Skan	efx_desc_t *desc;
1001169689Skan	int rc;
1002169689Skan
1003169689Skan	if (tso->fw_assisted) {
1004169689Skan		uint8_t tcp_flags = tso_tcph(tso)->th_flags;
1005169689Skan
1006169689Skan		if (tso->out_len > tso->seg_size)
1007169689Skan			tcp_flags &= ~(TH_FIN | TH_PUSH);
1008169689Skan
1009169689Skan		/* TSO option descriptor */
1010169689Skan		desc = &txq->pend_desc[txq->n_pend_desc++];
1011169689Skan		efx_tx_qdesc_tso_create(txq->common,
1012169689Skan					tso->packet_id,
1013169689Skan					tso->seqnum,
1014169689Skan					tcp_flags,
1015169689Skan					desc++);
1016169689Skan		KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0"));
1017169689Skan		id = (id + 1) & txq->ptr_mask;
1018169689Skan
1019169689Skan		/* Header DMA descriptor */
1020169689Skan		*desc = tso->header_desc;
1021169689Skan		txq->n_pend_desc++;
1022169689Skan		KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0"));
1023169689Skan		id = (id + 1) & txq->ptr_mask;
1024169689Skan
1025132718Skan		tso->seqnum += tso->seg_size;
1026132718Skan	} else {
1027132718Skan		/* Allocate a DMA-mapped header buffer. */
1028132718Skan		if (__predict_true(tso->header_len <= TSOH_STD_SIZE)) {
1029132718Skan			unsigned int page_index = (id / 2) / TSOH_PER_PAGE;
1030169689Skan			unsigned int buf_index = (id / 2) % TSOH_PER_PAGE;
1031132718Skan
1032132718Skan			header = (txq->tsoh_buffer[page_index].esm_base +
1033132718Skan				  buf_index * TSOH_STD_SIZE);
1034132718Skan			dma_addr = (txq->tsoh_buffer[page_index].esm_addr +
1035132718Skan				    buf_index * TSOH_STD_SIZE);
1036132718Skan			map = txq->tsoh_buffer[page_index].esm_map;
1037132718Skan
1038132718Skan			KASSERT(txq->stmp[id].flags == 0,
1039132718Skan				("stmp flags are not 0"));
1040132718Skan		} else {
1041132718Skan			struct sfxge_tx_mapping *stmp = &txq->stmp[id];
1042132718Skan
1043132718Skan			/* We cannot use bus_dmamem_alloc() as that may sleep */
1044132718Skan			header = malloc(tso->header_len, M_SFXGE, M_NOWAIT);
1045132718Skan			if (__predict_false(!header))
1046132718Skan				return (ENOMEM);
1047132718Skan			rc = bus_dmamap_load(txq->packet_dma_tag, stmp->map,
1048132718Skan					     header, tso->header_len,
1049132718Skan					     tso_map_long_header, &dma_addr,
1050132718Skan					     BUS_DMA_NOWAIT);
1051132718Skan			if (__predict_false(dma_addr == 0)) {
1052132718Skan				if (rc == 0) {
1053132718Skan					/* Succeeded but got >1 segment */
1054169689Skan					bus_dmamap_unload(txq->packet_dma_tag,
1055132718Skan							  stmp->map);
1056132718Skan					rc = EINVAL;
1057132718Skan				}
1058132718Skan				free(header, M_SFXGE);
1059132718Skan				return (rc);
1060132718Skan			}
1061132718Skan			map = stmp->map;
1062132718Skan
1063132718Skan			txq->tso_long_headers++;
1064132718Skan			stmp->u.heap_buf = header;
1065132718Skan			stmp->flags = TX_BUF_UNMAP;
1066132718Skan		}
1067132718Skan
1068132718Skan		tsoh_th = (struct tcphdr *)(header + tso->tcph_off);
1069132718Skan
1070169689Skan		/* Copy and update the headers. */
1071132718Skan		m_copydata(tso->mbuf, 0, tso->header_len, header);
1072132718Skan
1073132718Skan		tsoh_th->th_seq = htonl(tso->seqnum);
1074132718Skan		tso->seqnum += tso->seg_size;
1075132718Skan		if (tso->out_len > tso->seg_size) {
1076132718Skan			/* This packet will not finish the TSO burst. */
1077132718Skan			ip_length = tso->header_len - tso->nh_off + tso->seg_size;
1078132718Skan			tsoh_th->th_flags &= ~(TH_FIN | TH_PUSH);
1079132718Skan		} else {
1080132718Skan			/* This packet will be the last in the TSO burst. */
1081132718Skan			ip_length = tso->header_len - tso->nh_off + tso->out_len;
1082132718Skan		}
1083132718Skan
1084132718Skan		if (tso->protocol == htons(ETHERTYPE_IP)) {
1085132718Skan			struct ip *tsoh_iph = (struct ip *)(header + tso->nh_off);
1086169689Skan			tsoh_iph->ip_len = htons(ip_length);
1087132718Skan			/* XXX We should increment ip_id, but FreeBSD doesn't
1088132718Skan			 * currently allocate extra IDs for multiple segments.
1089132718Skan			 */
1090132718Skan		} else {
1091132718Skan			struct ip6_hdr *tsoh_iph =
1092132718Skan				(struct ip6_hdr *)(header + tso->nh_off);
1093132718Skan			tsoh_iph->ip6_plen = htons(ip_length - sizeof(*tsoh_iph));
1094132718Skan		}
1095132718Skan
1096132718Skan		/* Make the header visible to the hardware. */
1097132718Skan		bus_dmamap_sync(txq->packet_dma_tag, map, BUS_DMASYNC_PREWRITE);
1098132718Skan
1099132718Skan		/* Form a descriptor for this header. */
1100132718Skan		desc = &txq->pend_desc[txq->n_pend_desc++];
1101132718Skan		efx_tx_qdesc_dma_create(txq->common,
1102132718Skan					dma_addr,
1103132718Skan					tso->header_len,
1104132718Skan					0,
1105132718Skan					desc);
1106132718Skan		id = (id + 1) & txq->ptr_mask;
1107132718Skan	}
1108132718Skan	tso->packet_space = tso->seg_size;
1109132718Skan	txq->tso_packets++;
1110132718Skan	*idp = id;
1111132718Skan
1112132718Skan	return (0);
1113132718Skan}
1114132718Skan
1115132718Skanstatic int
1116132718Skansfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf,
1117132718Skan		   const bus_dma_segment_t *dma_seg, int n_dma_seg,
1118132718Skan		   int vlan_tagged)
1119132718Skan{
1120132718Skan	struct sfxge_tso_state tso;
1121132718Skan	unsigned int id;
1122169689Skan	unsigned skipped = 0;
1123169689Skan
1124132718Skan	tso_start(txq, &tso, dma_seg, mbuf);
1125132718Skan
1126169689Skan	while (dma_seg->ds_len + skipped <= tso.header_len) {
1127132718Skan		skipped += dma_seg->ds_len;
1128132718Skan		--n_dma_seg;
1129132718Skan		KASSERT(n_dma_seg, ("no payload found in TSO packet"));
1130132718Skan		++dma_seg;
1131132718Skan	}
1132132718Skan	tso.in_len = dma_seg->ds_len - (tso.header_len - skipped);
1133169689Skan	tso.dma_addr = dma_seg->ds_addr + (tso.header_len - skipped);
1134169689Skan
1135169689Skan	id = (txq->added + vlan_tagged) & txq->ptr_mask;
1136169689Skan	if (__predict_false(tso_start_new_packet(txq, &tso, &id)))
1137169689Skan		return (-1);
1138169689Skan
1139169689Skan	while (1) {
1140132718Skan		tso_fill_packet_with_fragment(txq, &tso);
1141132718Skan		/* Exactly one DMA descriptor is added */
1142132718Skan		KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0"));
1143132718Skan		id = (id + 1) & txq->ptr_mask;
1144132718Skan
1145132718Skan		/* Move onto the next fragment? */
1146132718Skan		if (tso.in_len == 0) {
1147132718Skan			--n_dma_seg;
1148132718Skan			if (n_dma_seg == 0)
1149132718Skan				break;
1150132718Skan			++dma_seg;
1151132718Skan			tso.in_len = dma_seg->ds_len;
1152132718Skan			tso.dma_addr = dma_seg->ds_addr;
1153132718Skan		}
1154132718Skan
1155132718Skan		/* End of packet? */
1156132718Skan		if (tso.packet_space == 0) {
1157132718Skan			/* If the queue is now full due to tiny MSS,
1158132718Skan			 * or we can't create another header, discard
1159132718Skan			 * the remainder of the input mbuf but do not
1160132718Skan			 * roll back the work we have done.
1161132718Skan			 */
1162132718Skan			if (txq->n_pend_desc + tso.fw_assisted +
1163132718Skan			    1 /* header */ + n_dma_seg >
1164132718Skan			    txq->max_pkt_desc) {
1165132718Skan				txq->tso_pdrop_too_many++;
1166132718Skan				break;
1167169689Skan			}
1168132718Skan			if (__predict_false(tso_start_new_packet(txq, &tso,
1169132718Skan								 &id))) {
1170132718Skan				txq->tso_pdrop_no_rsrc++;
1171132718Skan				break;
1172132718Skan			}
1173132718Skan		}
1174132718Skan	}
1175132718Skan
1176132718Skan	txq->tso_bursts++;
1177132718Skan	return (id);
1178132718Skan}
1179132718Skan
1180132718Skanstatic void
1181132718Skansfxge_tx_qunblock(struct sfxge_txq *txq)
1182132718Skan{
1183132718Skan	struct sfxge_softc *sc;
1184132718Skan	struct sfxge_evq *evq;
1185132718Skan
1186132718Skan	sc = txq->sc;
1187132718Skan	evq = sc->evq[txq->evq_index];
1188132718Skan
1189132718Skan	SFXGE_EVQ_LOCK_ASSERT_OWNED(evq);
1190132718Skan
1191132718Skan	if (__predict_false(txq->init_state != SFXGE_TXQ_STARTED))
1192132718Skan		return;
1193132718Skan
1194132718Skan	SFXGE_TXQ_LOCK(txq);
1195132718Skan
1196132718Skan	if (txq->blocked) {
1197132718Skan		unsigned int level;
1198132718Skan
1199132718Skan		level = txq->added - txq->completed;
1200132718Skan		if (level <= SFXGE_TXQ_UNBLOCK_LEVEL(txq->entries)) {
1201132718Skan			/* reaped must be in sync with blocked */
1202132718Skan			sfxge_tx_qreap(txq);
1203169689Skan			txq->blocked = 0;
1204132718Skan		}
1205132718Skan	}
1206132718Skan
1207132718Skan	sfxge_tx_qdpl_service(txq);
1208132718Skan	/* note: lock has been dropped */
1209132718Skan}
1210132718Skan
1211132718Skanvoid
1212132718Skansfxge_tx_qflush_done(struct sfxge_txq *txq)
1213132718Skan{
1214132718Skan
1215132718Skan	txq->flush_state = SFXGE_FLUSH_DONE;
1216132718Skan}
1217132718Skan
1218132718Skanstatic void
1219132718Skansfxge_tx_qstop(struct sfxge_softc *sc, unsigned int index)
1220132718Skan{
1221132718Skan	struct sfxge_txq *txq;
1222132718Skan	struct sfxge_evq *evq;
1223132718Skan	unsigned int count;
1224132718Skan
1225132718Skan	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
1226132718Skan
1227132718Skan	txq = sc->txq[index];
1228132718Skan	evq = sc->evq[txq->evq_index];
1229132718Skan
1230132718Skan	SFXGE_EVQ_LOCK(evq);
1231132718Skan	SFXGE_TXQ_LOCK(txq);
1232132718Skan
1233132718Skan	KASSERT(txq->init_state == SFXGE_TXQ_STARTED,
1234132718Skan	    ("txq->init_state != SFXGE_TXQ_STARTED"));
1235132718Skan
1236132718Skan	txq->init_state = SFXGE_TXQ_INITIALIZED;
1237132718Skan
1238132718Skan	if (txq->flush_state != SFXGE_FLUSH_DONE) {
1239132718Skan		txq->flush_state = SFXGE_FLUSH_PENDING;
1240132718Skan
1241132718Skan		SFXGE_EVQ_UNLOCK(evq);
1242132718Skan		SFXGE_TXQ_UNLOCK(txq);
1243132718Skan
1244132718Skan		/* Flush the transmit queue. */
1245132718Skan		if (efx_tx_qflush(txq->common) != 0) {
1246132718Skan			log(LOG_ERR, "%s: Flushing Tx queue %u failed\n",
1247132718Skan			    device_get_nameunit(sc->dev), index);
1248132718Skan			txq->flush_state = SFXGE_FLUSH_DONE;
1249132718Skan		} else {
1250132718Skan			count = 0;
1251132718Skan			do {
1252132718Skan				/* Spin for 100ms. */
1253132718Skan				DELAY(100000);
1254132718Skan				if (txq->flush_state != SFXGE_FLUSH_PENDING)
1255132718Skan					break;
1256132718Skan			} while (++count < 20);
1257132718Skan		}
1258132718Skan		SFXGE_EVQ_LOCK(evq);
1259132718Skan		SFXGE_TXQ_LOCK(txq);
1260132718Skan
1261132718Skan		KASSERT(txq->flush_state != SFXGE_FLUSH_FAILED,
1262132718Skan		    ("txq->flush_state == SFXGE_FLUSH_FAILED"));
1263132718Skan
1264132718Skan		if (txq->flush_state != SFXGE_FLUSH_DONE) {
1265132718Skan			/* Flush timeout */
1266132718Skan			log(LOG_ERR, "%s: Cannot flush Tx queue %u\n",
1267132718Skan			    device_get_nameunit(sc->dev), index);
1268132718Skan			txq->flush_state = SFXGE_FLUSH_DONE;
1269132718Skan		}
1270132718Skan	}
1271132718Skan
1272132718Skan	txq->blocked = 0;
1273132718Skan	txq->pending = txq->added;
1274132718Skan
1275132718Skan	sfxge_tx_qcomplete(txq, evq);
1276132718Skan	KASSERT(txq->completed == txq->added,
1277132718Skan	    ("txq->completed != txq->added"));
1278132718Skan
1279132718Skan	sfxge_tx_qreap(txq);
1280132718Skan	KASSERT(txq->reaped == txq->completed,
1281132718Skan	    ("txq->reaped != txq->completed"));
1282132718Skan
1283132718Skan	txq->added = 0;
1284132718Skan	txq->pending = 0;
1285132718Skan	txq->completed = 0;
1286132718Skan	txq->reaped = 0;
1287132718Skan
1288132718Skan	/* Destroy the common code transmit queue. */
1289132718Skan	efx_tx_qdestroy(txq->common);
1290132718Skan	txq->common = NULL;
1291132718Skan
1292132718Skan	efx_sram_buf_tbl_clear(sc->enp, txq->buf_base_id,
1293132718Skan	    EFX_TXQ_NBUFS(sc->txq_entries));
1294132718Skan
1295132718Skan	SFXGE_EVQ_UNLOCK(evq);
1296132718Skan	SFXGE_TXQ_UNLOCK(txq);
1297132718Skan}
1298132718Skan
1299132718Skanstatic int
1300132718Skansfxge_tx_qstart(struct sfxge_softc *sc, unsigned int index)
1301{
1302	struct sfxge_txq *txq;
1303	efsys_mem_t *esmp;
1304	uint16_t flags;
1305	struct sfxge_evq *evq;
1306	unsigned int desc_index;
1307	int rc;
1308
1309	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
1310
1311	txq = sc->txq[index];
1312	esmp = &txq->mem;
1313	evq = sc->evq[txq->evq_index];
1314
1315	KASSERT(txq->init_state == SFXGE_TXQ_INITIALIZED,
1316	    ("txq->init_state != SFXGE_TXQ_INITIALIZED"));
1317	KASSERT(evq->init_state == SFXGE_EVQ_STARTED,
1318	    ("evq->init_state != SFXGE_EVQ_STARTED"));
1319
1320	/* Program the buffer table. */
1321	if ((rc = efx_sram_buf_tbl_set(sc->enp, txq->buf_base_id, esmp,
1322	    EFX_TXQ_NBUFS(sc->txq_entries))) != 0)
1323		return (rc);
1324
1325	/* Determine the kind of queue we are creating. */
1326	switch (txq->type) {
1327	case SFXGE_TXQ_NON_CKSUM:
1328		flags = 0;
1329		break;
1330	case SFXGE_TXQ_IP_CKSUM:
1331		flags = EFX_CKSUM_IPV4;
1332		break;
1333	case SFXGE_TXQ_IP_TCP_UDP_CKSUM:
1334		flags = EFX_CKSUM_IPV4 | EFX_CKSUM_TCPUDP;
1335		break;
1336	default:
1337		KASSERT(0, ("Impossible TX queue"));
1338		flags = 0;
1339		break;
1340	}
1341
1342	/* Create the common code transmit queue. */
1343	if ((rc = efx_tx_qcreate(sc->enp, index, txq->type, esmp,
1344	    sc->txq_entries, txq->buf_base_id, flags, evq->common,
1345	    &txq->common, &desc_index)) != 0)
1346		goto fail;
1347
1348	/* Initialise queue descriptor indexes */
1349	txq->added = txq->pending = txq->completed = txq->reaped = desc_index;
1350
1351	SFXGE_TXQ_LOCK(txq);
1352
1353	/* Enable the transmit queue. */
1354	efx_tx_qenable(txq->common);
1355
1356	txq->init_state = SFXGE_TXQ_STARTED;
1357	txq->flush_state = SFXGE_FLUSH_REQUIRED;
1358
1359	SFXGE_TXQ_UNLOCK(txq);
1360
1361	return (0);
1362
1363fail:
1364	efx_sram_buf_tbl_clear(sc->enp, txq->buf_base_id,
1365	    EFX_TXQ_NBUFS(sc->txq_entries));
1366	return (rc);
1367}
1368
1369void
1370sfxge_tx_stop(struct sfxge_softc *sc)
1371{
1372	int index;
1373
1374	index = sc->txq_count;
1375	while (--index >= 0)
1376		sfxge_tx_qstop(sc, index);
1377
1378	/* Tear down the transmit module */
1379	efx_tx_fini(sc->enp);
1380}
1381
1382int
1383sfxge_tx_start(struct sfxge_softc *sc)
1384{
1385	int index;
1386	int rc;
1387
1388	/* Initialize the common code transmit module. */
1389	if ((rc = efx_tx_init(sc->enp)) != 0)
1390		return (rc);
1391
1392	for (index = 0; index < sc->txq_count; index++) {
1393		if ((rc = sfxge_tx_qstart(sc, index)) != 0)
1394			goto fail;
1395	}
1396
1397	return (0);
1398
1399fail:
1400	while (--index >= 0)
1401		sfxge_tx_qstop(sc, index);
1402
1403	efx_tx_fini(sc->enp);
1404
1405	return (rc);
1406}
1407
1408static int
1409sfxge_txq_stat_init(struct sfxge_txq *txq, struct sysctl_oid *txq_node)
1410{
1411	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(txq->sc->dev);
1412	struct sysctl_oid *stat_node;
1413	unsigned int id;
1414
1415	stat_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(txq_node), OID_AUTO,
1416				    "stats", CTLFLAG_RD, NULL,
1417				    "Tx queue statistics");
1418	if (stat_node == NULL)
1419		return (ENOMEM);
1420
1421	for (id = 0; id < nitems(sfxge_tx_stats); id++) {
1422		SYSCTL_ADD_ULONG(
1423		    ctx, SYSCTL_CHILDREN(stat_node), OID_AUTO,
1424		    sfxge_tx_stats[id].name, CTLFLAG_RD | CTLFLAG_STATS,
1425		    (unsigned long *)((caddr_t)txq + sfxge_tx_stats[id].offset),
1426		    "");
1427	}
1428
1429	return (0);
1430}
1431
1432/**
1433 * Destroy a transmit queue.
1434 */
1435static void
1436sfxge_tx_qfini(struct sfxge_softc *sc, unsigned int index)
1437{
1438	struct sfxge_txq *txq;
1439	unsigned int nmaps;
1440
1441	txq = sc->txq[index];
1442
1443	KASSERT(txq->init_state == SFXGE_TXQ_INITIALIZED,
1444	    ("txq->init_state != SFXGE_TXQ_INITIALIZED"));
1445
1446	if (txq->type == SFXGE_TXQ_IP_TCP_UDP_CKSUM)
1447		tso_fini(txq);
1448
1449	/* Free the context arrays. */
1450	free(txq->pend_desc, M_SFXGE);
1451	nmaps = sc->txq_entries;
1452	while (nmaps-- != 0)
1453		bus_dmamap_destroy(txq->packet_dma_tag, txq->stmp[nmaps].map);
1454	free(txq->stmp, M_SFXGE);
1455
1456	/* Release DMA memory mapping. */
1457	sfxge_dma_free(&txq->mem);
1458
1459	sc->txq[index] = NULL;
1460
1461	SFXGE_TXQ_LOCK_DESTROY(txq);
1462
1463	free(txq, M_SFXGE);
1464}
1465
1466/*
1467 * Estimate maximum number of Tx descriptors required for TSO packet.
1468 * With minimum MSS and maximum mbuf length we might need more (even
1469 * than a ring-ful of descriptors), but this should not happen in
1470 * practice except due to deliberate attack.  In that case we will
1471 * truncate the output at a packet boundary.
1472 */
1473static unsigned int
1474sfxge_tx_max_pkt_desc(const struct sfxge_softc *sc, enum sfxge_txq_type type)
1475{
1476	/* One descriptor for every input fragment */
1477	unsigned int max_descs = SFXGE_TX_MAPPING_MAX_SEG;
1478
1479	/* VLAN tagging Tx option descriptor may be required */
1480	if (efx_nic_cfg_get(sc->enp)->enc_hw_tx_insert_vlan_enabled)
1481		max_descs++;
1482
1483	if (type == SFXGE_TXQ_IP_TCP_UDP_CKSUM) {
1484		/*
1485		 * Plus header and payload descriptor for each output segment.
1486		 * Minus one since header fragment is already counted.
1487		 */
1488		max_descs += SFXGE_TSO_MAX_SEGS * 2 - 1;
1489
1490		/* FW assisted TSO requires one more descriptor per segment */
1491		if (sc->tso_fw_assisted)
1492			max_descs += SFXGE_TSO_MAX_SEGS;
1493	}
1494
1495	return (max_descs);
1496}
1497
1498static int
1499sfxge_tx_qinit(struct sfxge_softc *sc, unsigned int txq_index,
1500	       enum sfxge_txq_type type, unsigned int evq_index)
1501{
1502	char name[16];
1503	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
1504	struct sysctl_oid *txq_node;
1505	struct sfxge_txq *txq;
1506	struct sfxge_evq *evq;
1507	struct sfxge_tx_dpl *stdp;
1508	struct sysctl_oid *dpl_node;
1509	efsys_mem_t *esmp;
1510	unsigned int nmaps;
1511	int rc;
1512
1513	txq = malloc(sizeof(struct sfxge_txq), M_SFXGE, M_ZERO | M_WAITOK);
1514	txq->sc = sc;
1515	txq->entries = sc->txq_entries;
1516	txq->ptr_mask = txq->entries - 1;
1517
1518	sc->txq[txq_index] = txq;
1519	esmp = &txq->mem;
1520
1521	evq = sc->evq[evq_index];
1522
1523	/* Allocate and zero DMA space for the descriptor ring. */
1524	if ((rc = sfxge_dma_alloc(sc, EFX_TXQ_SIZE(sc->txq_entries), esmp)) != 0)
1525		return (rc);
1526
1527	/* Allocate buffer table entries. */
1528	sfxge_sram_buf_tbl_alloc(sc, EFX_TXQ_NBUFS(sc->txq_entries),
1529				 &txq->buf_base_id);
1530
1531	/* Create a DMA tag for packet mappings. */
1532	if (bus_dma_tag_create(sc->parent_dma_tag, 1, 0x1000,
1533	    MIN(0x3FFFFFFFFFFFUL, BUS_SPACE_MAXADDR), BUS_SPACE_MAXADDR, NULL,
1534	    NULL, 0x11000, SFXGE_TX_MAPPING_MAX_SEG, 0x1000, 0, NULL, NULL,
1535	    &txq->packet_dma_tag) != 0) {
1536		device_printf(sc->dev, "Couldn't allocate txq DMA tag\n");
1537		rc = ENOMEM;
1538		goto fail;
1539	}
1540
1541	/* Allocate pending descriptor array for batching writes. */
1542	txq->pend_desc = malloc(sizeof(efx_desc_t) * sc->txq_entries,
1543				M_SFXGE, M_ZERO | M_WAITOK);
1544
1545	/* Allocate and initialise mbuf DMA mapping array. */
1546	txq->stmp = malloc(sizeof(struct sfxge_tx_mapping) * sc->txq_entries,
1547	    M_SFXGE, M_ZERO | M_WAITOK);
1548	for (nmaps = 0; nmaps < sc->txq_entries; nmaps++) {
1549		rc = bus_dmamap_create(txq->packet_dma_tag, 0,
1550				       &txq->stmp[nmaps].map);
1551		if (rc != 0)
1552			goto fail2;
1553	}
1554
1555	snprintf(name, sizeof(name), "%u", txq_index);
1556	txq_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->txqs_node),
1557				   OID_AUTO, name, CTLFLAG_RD, NULL, "");
1558	if (txq_node == NULL) {
1559		rc = ENOMEM;
1560		goto fail_txq_node;
1561	}
1562
1563	if (type == SFXGE_TXQ_IP_TCP_UDP_CKSUM &&
1564	    (rc = tso_init(txq)) != 0)
1565		goto fail3;
1566
1567	if (sfxge_tx_dpl_get_max <= 0) {
1568		log(LOG_ERR, "%s=%d must be greater than 0",
1569		    SFXGE_PARAM_TX_DPL_GET_MAX, sfxge_tx_dpl_get_max);
1570		rc = EINVAL;
1571		goto fail_tx_dpl_get_max;
1572	}
1573	if (sfxge_tx_dpl_get_non_tcp_max <= 0) {
1574		log(LOG_ERR, "%s=%d must be greater than 0",
1575		    SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX,
1576		    sfxge_tx_dpl_get_non_tcp_max);
1577		rc = EINVAL;
1578		goto fail_tx_dpl_get_max;
1579	}
1580	if (sfxge_tx_dpl_put_max < 0) {
1581		log(LOG_ERR, "%s=%d must be greater or equal to 0",
1582		    SFXGE_PARAM_TX_DPL_PUT_MAX, sfxge_tx_dpl_put_max);
1583		rc = EINVAL;
1584		goto fail_tx_dpl_put_max;
1585	}
1586
1587	/* Initialize the deferred packet list. */
1588	stdp = &txq->dpl;
1589	stdp->std_put_max = sfxge_tx_dpl_put_max;
1590	stdp->std_get_max = sfxge_tx_dpl_get_max;
1591	stdp->std_get_non_tcp_max = sfxge_tx_dpl_get_non_tcp_max;
1592	stdp->std_getp = &stdp->std_get;
1593
1594	SFXGE_TXQ_LOCK_INIT(txq, device_get_nameunit(sc->dev), txq_index);
1595
1596	dpl_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(txq_node), OID_AUTO,
1597				   "dpl", CTLFLAG_RD, NULL,
1598				   "Deferred packet list statistics");
1599	if (dpl_node == NULL) {
1600		rc = ENOMEM;
1601		goto fail_dpl_node;
1602	}
1603
1604	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(dpl_node), OID_AUTO,
1605			"get_count", CTLFLAG_RD | CTLFLAG_STATS,
1606			&stdp->std_get_count, 0, "");
1607	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(dpl_node), OID_AUTO,
1608			"get_non_tcp_count", CTLFLAG_RD | CTLFLAG_STATS,
1609			&stdp->std_get_non_tcp_count, 0, "");
1610	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(dpl_node), OID_AUTO,
1611			"get_hiwat", CTLFLAG_RD | CTLFLAG_STATS,
1612			&stdp->std_get_hiwat, 0, "");
1613	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(dpl_node), OID_AUTO,
1614			"put_hiwat", CTLFLAG_RD | CTLFLAG_STATS,
1615			&stdp->std_put_hiwat, 0, "");
1616
1617	rc = sfxge_txq_stat_init(txq, txq_node);
1618	if (rc != 0)
1619		goto fail_txq_stat_init;
1620
1621	txq->type = type;
1622	txq->evq_index = evq_index;
1623	txq->txq_index = txq_index;
1624	txq->init_state = SFXGE_TXQ_INITIALIZED;
1625	txq->hw_vlan_tci = 0;
1626
1627	txq->max_pkt_desc = sfxge_tx_max_pkt_desc(sc, type);
1628
1629	return (0);
1630
1631fail_txq_stat_init:
1632fail_dpl_node:
1633fail_tx_dpl_put_max:
1634fail_tx_dpl_get_max:
1635fail3:
1636fail_txq_node:
1637	free(txq->pend_desc, M_SFXGE);
1638fail2:
1639	while (nmaps-- != 0)
1640		bus_dmamap_destroy(txq->packet_dma_tag, txq->stmp[nmaps].map);
1641	free(txq->stmp, M_SFXGE);
1642	bus_dma_tag_destroy(txq->packet_dma_tag);
1643
1644fail:
1645	sfxge_dma_free(esmp);
1646
1647	return (rc);
1648}
1649
1650static int
1651sfxge_tx_stat_handler(SYSCTL_HANDLER_ARGS)
1652{
1653	struct sfxge_softc *sc = arg1;
1654	unsigned int id = arg2;
1655	unsigned long sum;
1656	unsigned int index;
1657
1658	/* Sum across all TX queues */
1659	sum = 0;
1660	for (index = 0; index < sc->txq_count; index++)
1661		sum += *(unsigned long *)((caddr_t)sc->txq[index] +
1662					  sfxge_tx_stats[id].offset);
1663
1664	return (SYSCTL_OUT(req, &sum, sizeof(sum)));
1665}
1666
1667static void
1668sfxge_tx_stat_init(struct sfxge_softc *sc)
1669{
1670	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
1671	struct sysctl_oid_list *stat_list;
1672	unsigned int id;
1673
1674	stat_list = SYSCTL_CHILDREN(sc->stats_node);
1675
1676	for (id = 0; id < nitems(sfxge_tx_stats); id++) {
1677		SYSCTL_ADD_PROC(
1678			ctx, stat_list,
1679			OID_AUTO, sfxge_tx_stats[id].name,
1680			CTLTYPE_ULONG|CTLFLAG_RD,
1681			sc, id, sfxge_tx_stat_handler, "LU",
1682			"");
1683	}
1684}
1685
1686void
1687sfxge_tx_fini(struct sfxge_softc *sc)
1688{
1689	int index;
1690
1691	index = sc->txq_count;
1692	while (--index >= 0)
1693		sfxge_tx_qfini(sc, index);
1694
1695	sc->txq_count = 0;
1696}
1697
1698
1699int
1700sfxge_tx_init(struct sfxge_softc *sc)
1701{
1702	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sc->enp);
1703	struct sfxge_intr *intr;
1704	int index;
1705	int rc;
1706
1707	intr = &sc->intr;
1708
1709	KASSERT(intr->state == SFXGE_INTR_INITIALIZED,
1710	    ("intr->state != SFXGE_INTR_INITIALIZED"));
1711
1712	sc->txq_count = SFXGE_TXQ_NTYPES - 1 + sc->intr.n_alloc;
1713
1714	sc->tso_fw_assisted = sfxge_tso_fw_assisted;
1715	if (sc->tso_fw_assisted)
1716		sc->tso_fw_assisted =
1717		    (encp->enc_features & EFX_FEATURE_FW_ASSISTED_TSO) &&
1718		    (encp->enc_fw_assisted_tso_enabled);
1719
1720	sc->txqs_node = SYSCTL_ADD_NODE(
1721		device_get_sysctl_ctx(sc->dev),
1722		SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
1723		OID_AUTO, "txq", CTLFLAG_RD, NULL, "Tx queues");
1724	if (sc->txqs_node == NULL) {
1725		rc = ENOMEM;
1726		goto fail_txq_node;
1727	}
1728
1729	/* Initialize the transmit queues */
1730	if ((rc = sfxge_tx_qinit(sc, SFXGE_TXQ_NON_CKSUM,
1731	    SFXGE_TXQ_NON_CKSUM, 0)) != 0)
1732		goto fail;
1733
1734	if ((rc = sfxge_tx_qinit(sc, SFXGE_TXQ_IP_CKSUM,
1735	    SFXGE_TXQ_IP_CKSUM, 0)) != 0)
1736		goto fail2;
1737
1738	for (index = 0;
1739	     index < sc->txq_count - SFXGE_TXQ_NTYPES + 1;
1740	     index++) {
1741		if ((rc = sfxge_tx_qinit(sc, SFXGE_TXQ_NTYPES - 1 + index,
1742		    SFXGE_TXQ_IP_TCP_UDP_CKSUM, index)) != 0)
1743			goto fail3;
1744	}
1745
1746	sfxge_tx_stat_init(sc);
1747
1748	return (0);
1749
1750fail3:
1751	while (--index >= 0)
1752		sfxge_tx_qfini(sc, SFXGE_TXQ_IP_TCP_UDP_CKSUM + index);
1753
1754	sfxge_tx_qfini(sc, SFXGE_TXQ_IP_CKSUM);
1755
1756fail2:
1757	sfxge_tx_qfini(sc, SFXGE_TXQ_NON_CKSUM);
1758
1759fail:
1760fail_txq_node:
1761	sc->txq_count = 0;
1762	return (rc);
1763}
1764