1/*-
2 * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting
3 * Copyright (c) 2010-2012 Adrian Chadd, Xenion Pty Ltd
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer,
11 *    without modification.
12 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
13 *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
14 *    redistribution must be conditioned upon including a substantially
15 *    similar Disclaimer requirement for further binary redistribution.
16 *
17 * NO WARRANTY
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
21 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
22 * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
23 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
26 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
28 * THE POSSIBILITY OF SUCH DAMAGES.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD$");
33
34/*
35 * Driver for the Atheros Wireless LAN controller.
36 *
37 * This software is derived from work of Atsushi Onoe; his contribution
38 * is greatly appreciated.
39 */
40
41#include "opt_inet.h"
42#include "opt_ath.h"
43#include "opt_wlan.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/sysctl.h>
48#include <sys/mbuf.h>
49#include <sys/malloc.h>
50#include <sys/lock.h>
51#include <sys/mutex.h>
52#include <sys/kernel.h>
53#include <sys/socket.h>
54#include <sys/sockio.h>
55#include <sys/errno.h>
56#include <sys/callout.h>
57#include <sys/bus.h>
58#include <sys/endian.h>
59#include <sys/kthread.h>
60#include <sys/taskqueue.h>
61#include <sys/priv.h>
62
63#include <machine/bus.h>
64
65#include <net/if.h>
66#include <net/if_dl.h>
67#include <net/if_media.h>
68#include <net/if_types.h>
69#include <net/if_arp.h>
70#include <net/ethernet.h>
71#include <net/if_llc.h>
72
73#include <net80211/ieee80211_var.h>
74#include <net80211/ieee80211_regdomain.h>
75#ifdef IEEE80211_SUPPORT_SUPERG
76#include <net80211/ieee80211_superg.h>
77#endif
78#ifdef IEEE80211_SUPPORT_TDMA
79#include <net80211/ieee80211_tdma.h>
80#endif
81#include <net80211/ieee80211_ht.h>
82
83#include <net/bpf.h>
84
85#ifdef INET
86#include <netinet/in.h>
87#include <netinet/if_ether.h>
88#endif
89
90#include <dev/ath/if_athvar.h>
91#include <dev/ath/ath_hal/ah_devid.h>		/* XXX for softled */
92#include <dev/ath/ath_hal/ah_diagcodes.h>
93
94#include <dev/ath/if_ath_debug.h>
95
96#ifdef ATH_TX99_DIAG
97#include <dev/ath/ath_tx99/ath_tx99.h>
98#endif
99
100#include <dev/ath/if_ath_misc.h>
101#include <dev/ath/if_ath_tx.h>
102#include <dev/ath/if_ath_tx_ht.h>
103
104#ifdef	ATH_DEBUG_ALQ
105#include <dev/ath/if_ath_alq.h>
106#endif
107
108/*
109 * How many retries to perform in software
110 */
111#define	SWMAX_RETRIES		10
112
113/*
114 * What queue to throw the non-QoS TID traffic into
115 */
116#define	ATH_NONQOS_TID_AC	WME_AC_VO
117
118#if 0
119static int ath_tx_node_is_asleep(struct ath_softc *sc, struct ath_node *an);
120#endif
121static int ath_tx_ampdu_pending(struct ath_softc *sc, struct ath_node *an,
122    int tid);
123static int ath_tx_ampdu_running(struct ath_softc *sc, struct ath_node *an,
124    int tid);
125static ieee80211_seq ath_tx_tid_seqno_assign(struct ath_softc *sc,
126    struct ieee80211_node *ni, struct ath_buf *bf, struct mbuf *m0);
127static int ath_tx_action_frame_override_queue(struct ath_softc *sc,
128    struct ieee80211_node *ni, struct mbuf *m0, int *tid);
129static struct ath_buf *
130ath_tx_retry_clone(struct ath_softc *sc, struct ath_node *an,
131    struct ath_tid *tid, struct ath_buf *bf);
132
133#ifdef	ATH_DEBUG_ALQ
134void
135ath_tx_alq_post(struct ath_softc *sc, struct ath_buf *bf_first)
136{
137	struct ath_buf *bf;
138	int i, n;
139	const char *ds;
140
141	/* XXX we should skip out early if debugging isn't enabled! */
142	bf = bf_first;
143
144	while (bf != NULL) {
145		/* XXX should ensure bf_nseg > 0! */
146		if (bf->bf_nseg == 0)
147			break;
148		n = ((bf->bf_nseg - 1) / sc->sc_tx_nmaps) + 1;
149		for (i = 0, ds = (const char *) bf->bf_desc;
150		    i < n;
151		    i++, ds += sc->sc_tx_desclen) {
152			if_ath_alq_post(&sc->sc_alq,
153			    ATH_ALQ_EDMA_TXDESC,
154			    sc->sc_tx_desclen,
155			    ds);
156		}
157		bf = bf->bf_next;
158	}
159}
160#endif /* ATH_DEBUG_ALQ */
161
162/*
163 * Whether to use the 11n rate scenario functions or not
164 */
165static inline int
166ath_tx_is_11n(struct ath_softc *sc)
167{
168	return ((sc->sc_ah->ah_magic == 0x20065416) ||
169		    (sc->sc_ah->ah_magic == 0x19741014));
170}
171
172/*
173 * Obtain the current TID from the given frame.
174 *
175 * Non-QoS frames need to go into TID 16 (IEEE80211_NONQOS_TID.)
176 * This has implications for which AC/priority the packet is placed
177 * in.
178 */
179static int
180ath_tx_gettid(struct ath_softc *sc, const struct mbuf *m0)
181{
182	const struct ieee80211_frame *wh;
183	int pri = M_WME_GETAC(m0);
184
185	wh = mtod(m0, const struct ieee80211_frame *);
186	if (! IEEE80211_QOS_HAS_SEQ(wh))
187		return IEEE80211_NONQOS_TID;
188	else
189		return WME_AC_TO_TID(pri);
190}
191
192static void
193ath_tx_set_retry(struct ath_softc *sc, struct ath_buf *bf)
194{
195	struct ieee80211_frame *wh;
196
197	wh = mtod(bf->bf_m, struct ieee80211_frame *);
198	/* Only update/resync if needed */
199	if (bf->bf_state.bfs_isretried == 0) {
200		wh->i_fc[1] |= IEEE80211_FC1_RETRY;
201		bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap,
202		    BUS_DMASYNC_PREWRITE);
203	}
204	bf->bf_state.bfs_isretried = 1;
205	bf->bf_state.bfs_retries ++;
206}
207
208/*
209 * Determine what the correct AC queue for the given frame
210 * should be.
211 *
212 * This code assumes that the TIDs map consistently to
213 * the underlying hardware (or software) ath_txq.
214 * Since the sender may try to set an AC which is
215 * arbitrary, non-QoS TIDs may end up being put on
216 * completely different ACs. There's no way to put a
217 * TID into multiple ath_txq's for scheduling, so
218 * for now we override the AC/TXQ selection and set
219 * non-QOS TID frames into the BE queue.
220 *
221 * This may be completely incorrect - specifically,
222 * some management frames may end up out of order
223 * compared to the QoS traffic they're controlling.
224 * I'll look into this later.
225 */
226static int
227ath_tx_getac(struct ath_softc *sc, const struct mbuf *m0)
228{
229	const struct ieee80211_frame *wh;
230	int pri = M_WME_GETAC(m0);
231	wh = mtod(m0, const struct ieee80211_frame *);
232	if (IEEE80211_QOS_HAS_SEQ(wh))
233		return pri;
234
235	return ATH_NONQOS_TID_AC;
236}
237
238void
239ath_txfrag_cleanup(struct ath_softc *sc,
240	ath_bufhead *frags, struct ieee80211_node *ni)
241{
242	struct ath_buf *bf, *next;
243
244	ATH_TXBUF_LOCK_ASSERT(sc);
245
246	TAILQ_FOREACH_SAFE(bf, frags, bf_list, next) {
247		/* NB: bf assumed clean */
248		TAILQ_REMOVE(frags, bf, bf_list);
249		ath_returnbuf_head(sc, bf);
250		ieee80211_node_decref(ni);
251	}
252}
253
254/*
255 * Setup xmit of a fragmented frame.  Allocate a buffer
256 * for each frag and bump the node reference count to
257 * reflect the held reference to be setup by ath_tx_start.
258 */
259int
260ath_txfrag_setup(struct ath_softc *sc, ath_bufhead *frags,
261	struct mbuf *m0, struct ieee80211_node *ni)
262{
263	struct mbuf *m;
264	struct ath_buf *bf;
265
266	ATH_TXBUF_LOCK(sc);
267	for (m = m0->m_nextpkt; m != NULL; m = m->m_nextpkt) {
268		/* XXX non-management? */
269		bf = _ath_getbuf_locked(sc, ATH_BUFTYPE_NORMAL);
270		if (bf == NULL) {	/* out of buffers, cleanup */
271			device_printf(sc->sc_dev, "%s: no buffer?\n",
272			    __func__);
273			ath_txfrag_cleanup(sc, frags, ni);
274			break;
275		}
276		ieee80211_node_incref(ni);
277		TAILQ_INSERT_TAIL(frags, bf, bf_list);
278	}
279	ATH_TXBUF_UNLOCK(sc);
280
281	return !TAILQ_EMPTY(frags);
282}
283
284/*
285 * Reclaim mbuf resources.  For fragmented frames we
286 * need to claim each frag chained with m_nextpkt.
287 */
288void
289ath_freetx(struct mbuf *m)
290{
291	struct mbuf *next;
292
293	do {
294		next = m->m_nextpkt;
295		m->m_nextpkt = NULL;
296		m_freem(m);
297	} while ((m = next) != NULL);
298}
299
300static int
301ath_tx_dmasetup(struct ath_softc *sc, struct ath_buf *bf, struct mbuf *m0)
302{
303	struct mbuf *m;
304	int error;
305
306	/*
307	 * Load the DMA map so any coalescing is done.  This
308	 * also calculates the number of descriptors we need.
309	 */
310	error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m0,
311				     bf->bf_segs, &bf->bf_nseg,
312				     BUS_DMA_NOWAIT);
313	if (error == EFBIG) {
314		/* XXX packet requires too many descriptors */
315		bf->bf_nseg = ATH_MAX_SCATTER + 1;
316	} else if (error != 0) {
317		sc->sc_stats.ast_tx_busdma++;
318		ath_freetx(m0);
319		return error;
320	}
321	/*
322	 * Discard null packets and check for packets that
323	 * require too many TX descriptors.  We try to convert
324	 * the latter to a cluster.
325	 */
326	if (bf->bf_nseg > ATH_MAX_SCATTER) {		/* too many desc's, linearize */
327		sc->sc_stats.ast_tx_linear++;
328		m = m_collapse(m0, M_NOWAIT, ATH_MAX_SCATTER);
329		if (m == NULL) {
330			ath_freetx(m0);
331			sc->sc_stats.ast_tx_nombuf++;
332			return ENOMEM;
333		}
334		m0 = m;
335		error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m0,
336					     bf->bf_segs, &bf->bf_nseg,
337					     BUS_DMA_NOWAIT);
338		if (error != 0) {
339			sc->sc_stats.ast_tx_busdma++;
340			ath_freetx(m0);
341			return error;
342		}
343		KASSERT(bf->bf_nseg <= ATH_MAX_SCATTER,
344		    ("too many segments after defrag; nseg %u", bf->bf_nseg));
345	} else if (bf->bf_nseg == 0) {		/* null packet, discard */
346		sc->sc_stats.ast_tx_nodata++;
347		ath_freetx(m0);
348		return EIO;
349	}
350	DPRINTF(sc, ATH_DEBUG_XMIT, "%s: m %p len %u\n",
351		__func__, m0, m0->m_pkthdr.len);
352	bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_PREWRITE);
353	bf->bf_m = m0;
354
355	return 0;
356}
357
358/*
359 * Chain together segments+descriptors for a frame - 11n or otherwise.
360 *
361 * For aggregates, this is called on each frame in the aggregate.
362 */
363static void
364ath_tx_chaindesclist(struct ath_softc *sc, struct ath_desc *ds0,
365    struct ath_buf *bf, int is_aggr, int is_first_subframe,
366    int is_last_subframe)
367{
368	struct ath_hal *ah = sc->sc_ah;
369	char *ds;
370	int i, bp, dsp;
371	HAL_DMA_ADDR bufAddrList[4];
372	uint32_t segLenList[4];
373	int numTxMaps = 1;
374	int isFirstDesc = 1;
375
376	/*
377	 * XXX There's txdma and txdma_mgmt; the descriptor
378	 * sizes must match.
379	 */
380	struct ath_descdma *dd = &sc->sc_txdma;
381
382	/*
383	 * Fillin the remainder of the descriptor info.
384	 */
385
386	/*
387	 * We need the number of TX data pointers in each descriptor.
388	 * EDMA and later chips support 4 TX buffers per descriptor;
389	 * previous chips just support one.
390	 */
391	numTxMaps = sc->sc_tx_nmaps;
392
393	/*
394	 * For EDMA and later chips ensure the TX map is fully populated
395	 * before advancing to the next descriptor.
396	 */
397	ds = (char *) bf->bf_desc;
398	bp = dsp = 0;
399	bzero(bufAddrList, sizeof(bufAddrList));
400	bzero(segLenList, sizeof(segLenList));
401	for (i = 0; i < bf->bf_nseg; i++) {
402		bufAddrList[bp] = bf->bf_segs[i].ds_addr;
403		segLenList[bp] = bf->bf_segs[i].ds_len;
404		bp++;
405
406		/*
407		 * Go to the next segment if this isn't the last segment
408		 * and there's space in the current TX map.
409		 */
410		if ((i != bf->bf_nseg - 1) && (bp < numTxMaps))
411			continue;
412
413		/*
414		 * Last segment or we're out of buffer pointers.
415		 */
416		bp = 0;
417
418		if (i == bf->bf_nseg - 1)
419			ath_hal_settxdesclink(ah, (struct ath_desc *) ds, 0);
420		else
421			ath_hal_settxdesclink(ah, (struct ath_desc *) ds,
422			    bf->bf_daddr + dd->dd_descsize * (dsp + 1));
423
424		/*
425		 * XXX This assumes that bfs_txq is the actual destination
426		 * hardware queue at this point.  It may not have been
427		 * assigned, it may actually be pointing to the multicast
428		 * software TXQ id.  These must be fixed!
429		 */
430		ath_hal_filltxdesc(ah, (struct ath_desc *) ds
431			, bufAddrList
432			, segLenList
433			, bf->bf_descid		/* XXX desc id */
434			, bf->bf_state.bfs_tx_queue
435			, isFirstDesc		/* first segment */
436			, i == bf->bf_nseg - 1	/* last segment */
437			, (struct ath_desc *) ds0	/* first descriptor */
438		);
439
440		/*
441		 * Make sure the 11n aggregate fields are cleared.
442		 *
443		 * XXX TODO: this doesn't need to be called for
444		 * aggregate frames; as it'll be called on all
445		 * sub-frames.  Since the descriptors are in
446		 * non-cacheable memory, this leads to some
447		 * rather slow writes on MIPS/ARM platforms.
448		 */
449		if (ath_tx_is_11n(sc))
450			ath_hal_clr11n_aggr(sc->sc_ah, (struct ath_desc *) ds);
451
452		/*
453		 * If 11n is enabled, set it up as if it's an aggregate
454		 * frame.
455		 */
456		if (is_last_subframe) {
457			ath_hal_set11n_aggr_last(sc->sc_ah,
458			    (struct ath_desc *) ds);
459		} else if (is_aggr) {
460			/*
461			 * This clears the aggrlen field; so
462			 * the caller needs to call set_aggr_first()!
463			 *
464			 * XXX TODO: don't call this for the first
465			 * descriptor in the first frame in an
466			 * aggregate!
467			 */
468			ath_hal_set11n_aggr_middle(sc->sc_ah,
469			    (struct ath_desc *) ds,
470			    bf->bf_state.bfs_ndelim);
471		}
472		isFirstDesc = 0;
473		bf->bf_lastds = (struct ath_desc *) ds;
474
475		/*
476		 * Don't forget to skip to the next descriptor.
477		 */
478		ds += sc->sc_tx_desclen;
479		dsp++;
480
481		/*
482		 * .. and don't forget to blank these out!
483		 */
484		bzero(bufAddrList, sizeof(bufAddrList));
485		bzero(segLenList, sizeof(segLenList));
486	}
487	bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_PREWRITE);
488}
489
490/*
491 * Set the rate control fields in the given descriptor based on
492 * the bf_state fields and node state.
493 *
494 * The bfs fields should already be set with the relevant rate
495 * control information, including whether MRR is to be enabled.
496 *
497 * Since the FreeBSD HAL currently sets up the first TX rate
498 * in ath_hal_setuptxdesc(), this will setup the MRR
499 * conditionally for the pre-11n chips, and call ath_buf_set_rate
500 * unconditionally for 11n chips. These require the 11n rate
501 * scenario to be set if MCS rates are enabled, so it's easier
502 * to just always call it. The caller can then only set rates 2, 3
503 * and 4 if multi-rate retry is needed.
504 */
505static void
506ath_tx_set_ratectrl(struct ath_softc *sc, struct ieee80211_node *ni,
507    struct ath_buf *bf)
508{
509	struct ath_rc_series *rc = bf->bf_state.bfs_rc;
510
511	/* If mrr is disabled, blank tries 1, 2, 3 */
512	if (! bf->bf_state.bfs_ismrr)
513		rc[1].tries = rc[2].tries = rc[3].tries = 0;
514
515#if 0
516	/*
517	 * If NOACK is set, just set ntries=1.
518	 */
519	else if (bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) {
520		rc[1].tries = rc[2].tries = rc[3].tries = 0;
521		rc[0].tries = 1;
522	}
523#endif
524
525	/*
526	 * Always call - that way a retried descriptor will
527	 * have the MRR fields overwritten.
528	 *
529	 * XXX TODO: see if this is really needed - setting up
530	 * the first descriptor should set the MRR fields to 0
531	 * for us anyway.
532	 */
533	if (ath_tx_is_11n(sc)) {
534		ath_buf_set_rate(sc, ni, bf);
535	} else {
536		ath_hal_setupxtxdesc(sc->sc_ah, bf->bf_desc
537			, rc[1].ratecode, rc[1].tries
538			, rc[2].ratecode, rc[2].tries
539			, rc[3].ratecode, rc[3].tries
540		);
541	}
542}
543
544/*
545 * Setup segments+descriptors for an 11n aggregate.
546 * bf_first is the first buffer in the aggregate.
547 * The descriptor list must already been linked together using
548 * bf->bf_next.
549 */
550static void
551ath_tx_setds_11n(struct ath_softc *sc, struct ath_buf *bf_first)
552{
553	struct ath_buf *bf, *bf_prev = NULL;
554	struct ath_desc *ds0 = bf_first->bf_desc;
555
556	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, "%s: nframes=%d, al=%d\n",
557	    __func__, bf_first->bf_state.bfs_nframes,
558	    bf_first->bf_state.bfs_al);
559
560	bf = bf_first;
561
562	if (bf->bf_state.bfs_txrate0 == 0)
563		device_printf(sc->sc_dev, "%s: bf=%p, txrate0=%d\n",
564		    __func__, bf, 0);
565	if (bf->bf_state.bfs_rc[0].ratecode == 0)
566		device_printf(sc->sc_dev, "%s: bf=%p, rix0=%d\n",
567		    __func__, bf, 0);
568
569	/*
570	 * Setup all descriptors of all subframes - this will
571	 * call ath_hal_set11naggrmiddle() on every frame.
572	 */
573	while (bf != NULL) {
574		DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
575		    "%s: bf=%p, nseg=%d, pktlen=%d, seqno=%d\n",
576		    __func__, bf, bf->bf_nseg, bf->bf_state.bfs_pktlen,
577		    SEQNO(bf->bf_state.bfs_seqno));
578
579		/*
580		 * Setup the initial fields for the first descriptor - all
581		 * the non-11n specific stuff.
582		 */
583		ath_hal_setuptxdesc(sc->sc_ah, bf->bf_desc
584			, bf->bf_state.bfs_pktlen	/* packet length */
585			, bf->bf_state.bfs_hdrlen	/* header length */
586			, bf->bf_state.bfs_atype	/* Atheros packet type */
587			, bf->bf_state.bfs_txpower	/* txpower */
588			, bf->bf_state.bfs_txrate0
589			, bf->bf_state.bfs_try0		/* series 0 rate/tries */
590			, bf->bf_state.bfs_keyix	/* key cache index */
591			, bf->bf_state.bfs_txantenna	/* antenna mode */
592			, bf->bf_state.bfs_txflags | HAL_TXDESC_INTREQ	/* flags */
593			, bf->bf_state.bfs_ctsrate	/* rts/cts rate */
594			, bf->bf_state.bfs_ctsduration	/* rts/cts duration */
595		);
596
597		/*
598		 * First descriptor? Setup the rate control and initial
599		 * aggregate header information.
600		 */
601		if (bf == bf_first) {
602			/*
603			 * setup first desc with rate and aggr info
604			 */
605			ath_tx_set_ratectrl(sc, bf->bf_node, bf);
606		}
607
608		/*
609		 * Setup the descriptors for a multi-descriptor frame.
610		 * This is both aggregate and non-aggregate aware.
611		 */
612		ath_tx_chaindesclist(sc, ds0, bf,
613		    1, /* is_aggr */
614		    !! (bf == bf_first), /* is_first_subframe */
615		    !! (bf->bf_next == NULL) /* is_last_subframe */
616		    );
617
618		if (bf == bf_first) {
619			/*
620			 * Initialise the first 11n aggregate with the
621			 * aggregate length and aggregate enable bits.
622			 */
623			ath_hal_set11n_aggr_first(sc->sc_ah,
624			    ds0,
625			    bf->bf_state.bfs_al,
626			    bf->bf_state.bfs_ndelim);
627		}
628
629		/*
630		 * Link the last descriptor of the previous frame
631		 * to the beginning descriptor of this frame.
632		 */
633		if (bf_prev != NULL)
634			ath_hal_settxdesclink(sc->sc_ah, bf_prev->bf_lastds,
635			    bf->bf_daddr);
636
637		/* Save a copy so we can link the next descriptor in */
638		bf_prev = bf;
639		bf = bf->bf_next;
640	}
641
642	/*
643	 * Set the first descriptor bf_lastds field to point to
644	 * the last descriptor in the last subframe, that's where
645	 * the status update will occur.
646	 */
647	bf_first->bf_lastds = bf_prev->bf_lastds;
648
649	/*
650	 * And bf_last in the first descriptor points to the end of
651	 * the aggregate list.
652	 */
653	bf_first->bf_last = bf_prev;
654
655	/*
656	 * For non-AR9300 NICs, which require the rate control
657	 * in the final descriptor - let's set that up now.
658	 *
659	 * This is because the filltxdesc() HAL call doesn't
660	 * populate the last segment with rate control information
661	 * if firstSeg is also true.  For non-aggregate frames
662	 * that is fine, as the first frame already has rate control
663	 * info.  But if the last frame in an aggregate has one
664	 * descriptor, both firstseg and lastseg will be true and
665	 * the rate info isn't copied.
666	 *
667	 * This is inefficient on MIPS/ARM platforms that have
668	 * non-cachable memory for TX descriptors, but we'll just
669	 * make do for now.
670	 *
671	 * As to why the rate table is stashed in the last descriptor
672	 * rather than the first descriptor?  Because proctxdesc()
673	 * is called on the final descriptor in an MPDU or A-MPDU -
674	 * ie, the one that gets updated by the hardware upon
675	 * completion.  That way proctxdesc() doesn't need to know
676	 * about the first _and_ last TX descriptor.
677	 */
678	ath_hal_setuplasttxdesc(sc->sc_ah, bf_prev->bf_lastds, ds0);
679
680	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, "%s: end\n", __func__);
681}
682
683/*
684 * Hand-off a frame to the multicast TX queue.
685 *
686 * This is a software TXQ which will be appended to the CAB queue
687 * during the beacon setup code.
688 *
689 * XXX TODO: since the AR9300 EDMA TX queue support wants the QCU ID
690 * as part of the TX descriptor, bf_state.bfs_tx_queue must be updated
691 * with the actual hardware txq, or all of this will fall apart.
692 *
693 * XXX It may not be a bad idea to just stuff the QCU ID into bf_state
694 * and retire bfs_tx_queue; then make sure the CABQ QCU ID is populated
695 * correctly.
696 */
697static void
698ath_tx_handoff_mcast(struct ath_softc *sc, struct ath_txq *txq,
699    struct ath_buf *bf)
700{
701	ATH_TX_LOCK_ASSERT(sc);
702
703	KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
704	     ("%s: busy status 0x%x", __func__, bf->bf_flags));
705
706	/*
707	 * Ensure that the tx queue is the cabq, so things get
708	 * mapped correctly.
709	 */
710	if (bf->bf_state.bfs_tx_queue != sc->sc_cabq->axq_qnum) {
711		device_printf(sc->sc_dev,
712		    "%s: bf=%p, bfs_tx_queue=%d, axq_qnum=%d\n",
713		    __func__,
714		    bf,
715		    bf->bf_state.bfs_tx_queue,
716		    txq->axq_qnum);
717	}
718
719	ATH_TXQ_LOCK(txq);
720	if (ATH_TXQ_LAST(txq, axq_q_s) != NULL) {
721		struct ath_buf *bf_last = ATH_TXQ_LAST(txq, axq_q_s);
722		struct ieee80211_frame *wh;
723
724		/* mark previous frame */
725		wh = mtod(bf_last->bf_m, struct ieee80211_frame *);
726		wh->i_fc[1] |= IEEE80211_FC1_MORE_DATA;
727		bus_dmamap_sync(sc->sc_dmat, bf_last->bf_dmamap,
728		    BUS_DMASYNC_PREWRITE);
729
730		/* link descriptor */
731		ath_hal_settxdesclink(sc->sc_ah,
732		    bf_last->bf_lastds,
733		    bf->bf_daddr);
734	}
735	ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
736	ATH_TXQ_UNLOCK(txq);
737}
738
739/*
740 * Hand-off packet to a hardware queue.
741 */
742static void
743ath_tx_handoff_hw(struct ath_softc *sc, struct ath_txq *txq,
744    struct ath_buf *bf)
745{
746	struct ath_hal *ah = sc->sc_ah;
747	struct ath_buf *bf_first;
748
749	/*
750	 * Insert the frame on the outbound list and pass it on
751	 * to the hardware.  Multicast frames buffered for power
752	 * save stations and transmit from the CAB queue are stored
753	 * on a s/w only queue and loaded on to the CAB queue in
754	 * the SWBA handler since frames only go out on DTIM and
755	 * to avoid possible races.
756	 */
757	ATH_TX_LOCK_ASSERT(sc);
758	KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
759	     ("%s: busy status 0x%x", __func__, bf->bf_flags));
760	KASSERT(txq->axq_qnum != ATH_TXQ_SWQ,
761	     ("ath_tx_handoff_hw called for mcast queue"));
762
763	/*
764	 * XXX racy, should hold the PCU lock when checking this,
765	 * and also should ensure that the TX counter is >0!
766	 */
767	KASSERT((sc->sc_inreset_cnt == 0),
768	    ("%s: TX during reset?\n", __func__));
769
770#if 0
771	/*
772	 * This causes a LOR. Find out where the PCU lock is being
773	 * held whilst the TXQ lock is grabbed - that shouldn't
774	 * be occuring.
775	 */
776	ATH_PCU_LOCK(sc);
777	if (sc->sc_inreset_cnt) {
778		ATH_PCU_UNLOCK(sc);
779		DPRINTF(sc, ATH_DEBUG_RESET,
780		    "%s: called with sc_in_reset != 0\n",
781		    __func__);
782		DPRINTF(sc, ATH_DEBUG_XMIT,
783		    "%s: queued: TXDP[%u] = %p (%p) depth %d\n",
784		    __func__, txq->axq_qnum,
785		    (caddr_t)bf->bf_daddr, bf->bf_desc,
786		    txq->axq_depth);
787		/* XXX axq_link needs to be set and updated! */
788		ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
789		if (bf->bf_state.bfs_aggr)
790			txq->axq_aggr_depth++;
791		return;
792		}
793	ATH_PCU_UNLOCK(sc);
794#endif
795
796	ATH_TXQ_LOCK(txq);
797
798	/*
799	 * XXX TODO: if there's a holdingbf, then
800	 * ATH_TXQ_PUTRUNNING should be clear.
801	 *
802	 * If there is a holdingbf and the list is empty,
803	 * then axq_link should be pointing to the holdingbf.
804	 *
805	 * Otherwise it should point to the last descriptor
806	 * in the last ath_buf.
807	 *
808	 * In any case, we should really ensure that we
809	 * update the previous descriptor link pointer to
810	 * this descriptor, regardless of all of the above state.
811	 *
812	 * For now this is captured by having axq_link point
813	 * to either the holdingbf (if the TXQ list is empty)
814	 * or the end of the list (if the TXQ list isn't empty.)
815	 * I'd rather just kill axq_link here and do it as above.
816	 */
817
818	/*
819	 * Append the frame to the TX queue.
820	 */
821	ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
822	ATH_KTR(sc, ATH_KTR_TX, 3,
823	    "ath_tx_handoff: non-tdma: txq=%u, add bf=%p "
824	    "depth=%d",
825	    txq->axq_qnum,
826	    bf,
827	    txq->axq_depth);
828
829	/*
830	 * If there's a link pointer, update it.
831	 *
832	 * XXX we should replace this with the above logic, just
833	 * to kill axq_link with fire.
834	 */
835	if (txq->axq_link != NULL) {
836		*txq->axq_link = bf->bf_daddr;
837		DPRINTF(sc, ATH_DEBUG_XMIT,
838		    "%s: link[%u](%p)=%p (%p) depth %d\n", __func__,
839		    txq->axq_qnum, txq->axq_link,
840		    (caddr_t)bf->bf_daddr, bf->bf_desc,
841		    txq->axq_depth);
842		ATH_KTR(sc, ATH_KTR_TX, 5,
843		    "ath_tx_handoff: non-tdma: link[%u](%p)=%p (%p) "
844		    "lastds=%d",
845		    txq->axq_qnum, txq->axq_link,
846		    (caddr_t)bf->bf_daddr, bf->bf_desc,
847		    bf->bf_lastds);
848	}
849
850	/*
851	 * If we've not pushed anything into the hardware yet,
852	 * push the head of the queue into the TxDP.
853	 *
854	 * Once we've started DMA, there's no guarantee that
855	 * updating the TxDP with a new value will actually work.
856	 * So we just don't do that - if we hit the end of the list,
857	 * we keep that buffer around (the "holding buffer") and
858	 * re-start DMA by updating the link pointer of _that_
859	 * descriptor and then restart DMA.
860	 */
861	if (! (txq->axq_flags & ATH_TXQ_PUTRUNNING)) {
862		bf_first = TAILQ_FIRST(&txq->axq_q);
863		txq->axq_flags |= ATH_TXQ_PUTRUNNING;
864		ath_hal_puttxbuf(ah, txq->axq_qnum, bf_first->bf_daddr);
865		DPRINTF(sc, ATH_DEBUG_XMIT,
866		    "%s: TXDP[%u] = %p (%p) depth %d\n",
867		    __func__, txq->axq_qnum,
868		    (caddr_t)bf_first->bf_daddr, bf_first->bf_desc,
869		    txq->axq_depth);
870		ATH_KTR(sc, ATH_KTR_TX, 5,
871		    "ath_tx_handoff: TXDP[%u] = %p (%p) "
872		    "lastds=%p depth %d",
873		    txq->axq_qnum,
874		    (caddr_t)bf_first->bf_daddr, bf_first->bf_desc,
875		    bf_first->bf_lastds,
876		    txq->axq_depth);
877	}
878
879	/*
880	 * Ensure that the bf TXQ matches this TXQ, so later
881	 * checking and holding buffer manipulation is sane.
882	 */
883	if (bf->bf_state.bfs_tx_queue != txq->axq_qnum) {
884		device_printf(sc->sc_dev,
885		    "%s: bf=%p, bfs_tx_queue=%d, axq_qnum=%d\n",
886		    __func__,
887		    bf,
888		    bf->bf_state.bfs_tx_queue,
889		    txq->axq_qnum);
890	}
891
892	/*
893	 * Track aggregate queue depth.
894	 */
895	if (bf->bf_state.bfs_aggr)
896		txq->axq_aggr_depth++;
897
898	/*
899	 * Update the link pointer.
900	 */
901	ath_hal_gettxdesclinkptr(ah, bf->bf_lastds, &txq->axq_link);
902
903	/*
904	 * Start DMA.
905	 *
906	 * If we wrote a TxDP above, DMA will start from here.
907	 *
908	 * If DMA is running, it'll do nothing.
909	 *
910	 * If the DMA engine hit the end of the QCU list (ie LINK=NULL,
911	 * or VEOL) then it stops at the last transmitted write.
912	 * We then append a new frame by updating the link pointer
913	 * in that descriptor and then kick TxE here; it will re-read
914	 * that last descriptor and find the new descriptor to transmit.
915	 *
916	 * This is why we keep the holding descriptor around.
917	 */
918	ath_hal_txstart(ah, txq->axq_qnum);
919	ATH_TXQ_UNLOCK(txq);
920	ATH_KTR(sc, ATH_KTR_TX, 1,
921	    "ath_tx_handoff: txq=%u, txstart", txq->axq_qnum);
922}
923
924/*
925 * Restart TX DMA for the given TXQ.
926 *
927 * This must be called whether the queue is empty or not.
928 */
929static void
930ath_legacy_tx_dma_restart(struct ath_softc *sc, struct ath_txq *txq)
931{
932	struct ath_buf *bf, *bf_last;
933
934	ATH_TXQ_LOCK_ASSERT(txq);
935
936	/* XXX make this ATH_TXQ_FIRST */
937	bf = TAILQ_FIRST(&txq->axq_q);
938	bf_last = ATH_TXQ_LAST(txq, axq_q_s);
939
940	if (bf == NULL)
941		return;
942
943	DPRINTF(sc, ATH_DEBUG_RESET,
944	    "%s: Q%d: bf=%p, bf_last=%p, daddr=0x%08x\n",
945	    __func__,
946	    txq->axq_qnum,
947	    bf,
948	    bf_last,
949	    (uint32_t) bf->bf_daddr);
950
951#ifdef	ATH_DEBUG
952	if (sc->sc_debug & ATH_DEBUG_RESET)
953		ath_tx_dump(sc, txq);
954#endif
955
956	/*
957	 * This is called from a restart, so DMA is known to be
958	 * completely stopped.
959	 */
960	KASSERT((!(txq->axq_flags & ATH_TXQ_PUTRUNNING)),
961	    ("%s: Q%d: called with PUTRUNNING=1\n",
962	    __func__,
963	    txq->axq_qnum));
964
965	ath_hal_puttxbuf(sc->sc_ah, txq->axq_qnum, bf->bf_daddr);
966	txq->axq_flags |= ATH_TXQ_PUTRUNNING;
967
968	ath_hal_gettxdesclinkptr(sc->sc_ah, bf_last->bf_lastds,
969	    &txq->axq_link);
970	ath_hal_txstart(sc->sc_ah, txq->axq_qnum);
971}
972
973/*
974 * Hand off a packet to the hardware (or mcast queue.)
975 *
976 * The relevant hardware txq should be locked.
977 */
978static void
979ath_legacy_xmit_handoff(struct ath_softc *sc, struct ath_txq *txq,
980    struct ath_buf *bf)
981{
982	ATH_TX_LOCK_ASSERT(sc);
983
984#ifdef	ATH_DEBUG_ALQ
985	if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
986		ath_tx_alq_post(sc, bf);
987#endif
988
989	if (txq->axq_qnum == ATH_TXQ_SWQ)
990		ath_tx_handoff_mcast(sc, txq, bf);
991	else
992		ath_tx_handoff_hw(sc, txq, bf);
993}
994
995static int
996ath_tx_tag_crypto(struct ath_softc *sc, struct ieee80211_node *ni,
997    struct mbuf *m0, int iswep, int isfrag, int *hdrlen, int *pktlen,
998    int *keyix)
999{
1000	DPRINTF(sc, ATH_DEBUG_XMIT,
1001	    "%s: hdrlen=%d, pktlen=%d, isfrag=%d, iswep=%d, m0=%p\n",
1002	    __func__,
1003	    *hdrlen,
1004	    *pktlen,
1005	    isfrag,
1006	    iswep,
1007	    m0);
1008
1009	if (iswep) {
1010		const struct ieee80211_cipher *cip;
1011		struct ieee80211_key *k;
1012
1013		/*
1014		 * Construct the 802.11 header+trailer for an encrypted
1015		 * frame. The only reason this can fail is because of an
1016		 * unknown or unsupported cipher/key type.
1017		 */
1018		k = ieee80211_crypto_encap(ni, m0);
1019		if (k == NULL) {
1020			/*
1021			 * This can happen when the key is yanked after the
1022			 * frame was queued.  Just discard the frame; the
1023			 * 802.11 layer counts failures and provides
1024			 * debugging/diagnostics.
1025			 */
1026			return (0);
1027		}
1028		/*
1029		 * Adjust the packet + header lengths for the crypto
1030		 * additions and calculate the h/w key index.  When
1031		 * a s/w mic is done the frame will have had any mic
1032		 * added to it prior to entry so m0->m_pkthdr.len will
1033		 * account for it. Otherwise we need to add it to the
1034		 * packet length.
1035		 */
1036		cip = k->wk_cipher;
1037		(*hdrlen) += cip->ic_header;
1038		(*pktlen) += cip->ic_header + cip->ic_trailer;
1039		/* NB: frags always have any TKIP MIC done in s/w */
1040		if ((k->wk_flags & IEEE80211_KEY_SWMIC) == 0 && !isfrag)
1041			(*pktlen) += cip->ic_miclen;
1042		(*keyix) = k->wk_keyix;
1043	} else if (ni->ni_ucastkey.wk_cipher == &ieee80211_cipher_none) {
1044		/*
1045		 * Use station key cache slot, if assigned.
1046		 */
1047		(*keyix) = ni->ni_ucastkey.wk_keyix;
1048		if ((*keyix) == IEEE80211_KEYIX_NONE)
1049			(*keyix) = HAL_TXKEYIX_INVALID;
1050	} else
1051		(*keyix) = HAL_TXKEYIX_INVALID;
1052
1053	return (1);
1054}
1055
1056/*
1057 * Calculate whether interoperability protection is required for
1058 * this frame.
1059 *
1060 * This requires the rate control information be filled in,
1061 * as the protection requirement depends upon the current
1062 * operating mode / PHY.
1063 */
1064static void
1065ath_tx_calc_protection(struct ath_softc *sc, struct ath_buf *bf)
1066{
1067	struct ieee80211_frame *wh;
1068	uint8_t rix;
1069	uint16_t flags;
1070	int shortPreamble;
1071	const HAL_RATE_TABLE *rt = sc->sc_currates;
1072	struct ifnet *ifp = sc->sc_ifp;
1073	struct ieee80211com *ic = ifp->if_l2com;
1074
1075	flags = bf->bf_state.bfs_txflags;
1076	rix = bf->bf_state.bfs_rc[0].rix;
1077	shortPreamble = bf->bf_state.bfs_shpream;
1078	wh = mtod(bf->bf_m, struct ieee80211_frame *);
1079
1080	/*
1081	 * If 802.11g protection is enabled, determine whether
1082	 * to use RTS/CTS or just CTS.  Note that this is only
1083	 * done for OFDM unicast frames.
1084	 */
1085	if ((ic->ic_flags & IEEE80211_F_USEPROT) &&
1086	    rt->info[rix].phy == IEEE80211_T_OFDM &&
1087	    (flags & HAL_TXDESC_NOACK) == 0) {
1088		bf->bf_state.bfs_doprot = 1;
1089		/* XXX fragments must use CCK rates w/ protection */
1090		if (ic->ic_protmode == IEEE80211_PROT_RTSCTS) {
1091			flags |= HAL_TXDESC_RTSENA;
1092		} else if (ic->ic_protmode == IEEE80211_PROT_CTSONLY) {
1093			flags |= HAL_TXDESC_CTSENA;
1094		}
1095		/*
1096		 * For frags it would be desirable to use the
1097		 * highest CCK rate for RTS/CTS.  But stations
1098		 * farther away may detect it at a lower CCK rate
1099		 * so use the configured protection rate instead
1100		 * (for now).
1101		 */
1102		sc->sc_stats.ast_tx_protect++;
1103	}
1104
1105	/*
1106	 * If 11n protection is enabled and it's a HT frame,
1107	 * enable RTS.
1108	 *
1109	 * XXX ic_htprotmode or ic_curhtprotmode?
1110	 * XXX should it_htprotmode only matter if ic_curhtprotmode
1111	 * XXX indicates it's not a HT pure environment?
1112	 */
1113	if ((ic->ic_htprotmode == IEEE80211_PROT_RTSCTS) &&
1114	    rt->info[rix].phy == IEEE80211_T_HT &&
1115	    (flags & HAL_TXDESC_NOACK) == 0) {
1116		flags |= HAL_TXDESC_RTSENA;
1117		sc->sc_stats.ast_tx_htprotect++;
1118	}
1119	bf->bf_state.bfs_txflags = flags;
1120}
1121
1122/*
1123 * Update the frame duration given the currently selected rate.
1124 *
1125 * This also updates the frame duration value, so it will require
1126 * a DMA flush.
1127 */
1128static void
1129ath_tx_calc_duration(struct ath_softc *sc, struct ath_buf *bf)
1130{
1131	struct ieee80211_frame *wh;
1132	uint8_t rix;
1133	uint16_t flags;
1134	int shortPreamble;
1135	struct ath_hal *ah = sc->sc_ah;
1136	const HAL_RATE_TABLE *rt = sc->sc_currates;
1137	int isfrag = bf->bf_m->m_flags & M_FRAG;
1138
1139	flags = bf->bf_state.bfs_txflags;
1140	rix = bf->bf_state.bfs_rc[0].rix;
1141	shortPreamble = bf->bf_state.bfs_shpream;
1142	wh = mtod(bf->bf_m, struct ieee80211_frame *);
1143
1144	/*
1145	 * Calculate duration.  This logically belongs in the 802.11
1146	 * layer but it lacks sufficient information to calculate it.
1147	 */
1148	if ((flags & HAL_TXDESC_NOACK) == 0 &&
1149	    (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) != IEEE80211_FC0_TYPE_CTL) {
1150		u_int16_t dur;
1151		if (shortPreamble)
1152			dur = rt->info[rix].spAckDuration;
1153		else
1154			dur = rt->info[rix].lpAckDuration;
1155		if (wh->i_fc[1] & IEEE80211_FC1_MORE_FRAG) {
1156			dur += dur;		/* additional SIFS+ACK */
1157			/*
1158			 * Include the size of next fragment so NAV is
1159			 * updated properly.  The last fragment uses only
1160			 * the ACK duration
1161			 *
1162			 * XXX TODO: ensure that the rate lookup for each
1163			 * fragment is the same as the rate used by the
1164			 * first fragment!
1165			 */
1166			dur += ath_hal_computetxtime(ah,
1167			    rt,
1168			    bf->bf_nextfraglen,
1169			    rix, shortPreamble);
1170		}
1171		if (isfrag) {
1172			/*
1173			 * Force hardware to use computed duration for next
1174			 * fragment by disabling multi-rate retry which updates
1175			 * duration based on the multi-rate duration table.
1176			 */
1177			bf->bf_state.bfs_ismrr = 0;
1178			bf->bf_state.bfs_try0 = ATH_TXMGTTRY;
1179			/* XXX update bfs_rc[0].try? */
1180		}
1181
1182		/* Update the duration field itself */
1183		*(u_int16_t *)wh->i_dur = htole16(dur);
1184	}
1185}
1186
1187static uint8_t
1188ath_tx_get_rtscts_rate(struct ath_hal *ah, const HAL_RATE_TABLE *rt,
1189    int cix, int shortPreamble)
1190{
1191	uint8_t ctsrate;
1192
1193	/*
1194	 * CTS transmit rate is derived from the transmit rate
1195	 * by looking in the h/w rate table.  We must also factor
1196	 * in whether or not a short preamble is to be used.
1197	 */
1198	/* NB: cix is set above where RTS/CTS is enabled */
1199	KASSERT(cix != 0xff, ("cix not setup"));
1200	ctsrate = rt->info[cix].rateCode;
1201
1202	/* XXX this should only matter for legacy rates */
1203	if (shortPreamble)
1204		ctsrate |= rt->info[cix].shortPreamble;
1205
1206	return (ctsrate);
1207}
1208
1209/*
1210 * Calculate the RTS/CTS duration for legacy frames.
1211 */
1212static int
1213ath_tx_calc_ctsduration(struct ath_hal *ah, int rix, int cix,
1214    int shortPreamble, int pktlen, const HAL_RATE_TABLE *rt,
1215    int flags)
1216{
1217	int ctsduration = 0;
1218
1219	/* This mustn't be called for HT modes */
1220	if (rt->info[cix].phy == IEEE80211_T_HT) {
1221		printf("%s: HT rate where it shouldn't be (0x%x)\n",
1222		    __func__, rt->info[cix].rateCode);
1223		return (-1);
1224	}
1225
1226	/*
1227	 * Compute the transmit duration based on the frame
1228	 * size and the size of an ACK frame.  We call into the
1229	 * HAL to do the computation since it depends on the
1230	 * characteristics of the actual PHY being used.
1231	 *
1232	 * NB: CTS is assumed the same size as an ACK so we can
1233	 *     use the precalculated ACK durations.
1234	 */
1235	if (shortPreamble) {
1236		if (flags & HAL_TXDESC_RTSENA)		/* SIFS + CTS */
1237			ctsduration += rt->info[cix].spAckDuration;
1238		ctsduration += ath_hal_computetxtime(ah,
1239			rt, pktlen, rix, AH_TRUE);
1240		if ((flags & HAL_TXDESC_NOACK) == 0)	/* SIFS + ACK */
1241			ctsduration += rt->info[rix].spAckDuration;
1242	} else {
1243		if (flags & HAL_TXDESC_RTSENA)		/* SIFS + CTS */
1244			ctsduration += rt->info[cix].lpAckDuration;
1245		ctsduration += ath_hal_computetxtime(ah,
1246			rt, pktlen, rix, AH_FALSE);
1247		if ((flags & HAL_TXDESC_NOACK) == 0)	/* SIFS + ACK */
1248			ctsduration += rt->info[rix].lpAckDuration;
1249	}
1250
1251	return (ctsduration);
1252}
1253
1254/*
1255 * Update the given ath_buf with updated rts/cts setup and duration
1256 * values.
1257 *
1258 * To support rate lookups for each software retry, the rts/cts rate
1259 * and cts duration must be re-calculated.
1260 *
1261 * This function assumes the RTS/CTS flags have been set as needed;
1262 * mrr has been disabled; and the rate control lookup has been done.
1263 *
1264 * XXX TODO: MRR need only be disabled for the pre-11n NICs.
1265 * XXX The 11n NICs support per-rate RTS/CTS configuration.
1266 */
1267static void
1268ath_tx_set_rtscts(struct ath_softc *sc, struct ath_buf *bf)
1269{
1270	uint16_t ctsduration = 0;
1271	uint8_t ctsrate = 0;
1272	uint8_t rix = bf->bf_state.bfs_rc[0].rix;
1273	uint8_t cix = 0;
1274	const HAL_RATE_TABLE *rt = sc->sc_currates;
1275
1276	/*
1277	 * No RTS/CTS enabled? Don't bother.
1278	 */
1279	if ((bf->bf_state.bfs_txflags &
1280	    (HAL_TXDESC_RTSENA | HAL_TXDESC_CTSENA)) == 0) {
1281		/* XXX is this really needed? */
1282		bf->bf_state.bfs_ctsrate = 0;
1283		bf->bf_state.bfs_ctsduration = 0;
1284		return;
1285	}
1286
1287	/*
1288	 * If protection is enabled, use the protection rix control
1289	 * rate. Otherwise use the rate0 control rate.
1290	 */
1291	if (bf->bf_state.bfs_doprot)
1292		rix = sc->sc_protrix;
1293	else
1294		rix = bf->bf_state.bfs_rc[0].rix;
1295
1296	/*
1297	 * If the raw path has hard-coded ctsrate0 to something,
1298	 * use it.
1299	 */
1300	if (bf->bf_state.bfs_ctsrate0 != 0)
1301		cix = ath_tx_findrix(sc, bf->bf_state.bfs_ctsrate0);
1302	else
1303		/* Control rate from above */
1304		cix = rt->info[rix].controlRate;
1305
1306	/* Calculate the rtscts rate for the given cix */
1307	ctsrate = ath_tx_get_rtscts_rate(sc->sc_ah, rt, cix,
1308	    bf->bf_state.bfs_shpream);
1309
1310	/* The 11n chipsets do ctsduration calculations for you */
1311	if (! ath_tx_is_11n(sc))
1312		ctsduration = ath_tx_calc_ctsduration(sc->sc_ah, rix, cix,
1313		    bf->bf_state.bfs_shpream, bf->bf_state.bfs_pktlen,
1314		    rt, bf->bf_state.bfs_txflags);
1315
1316	/* Squirrel away in ath_buf */
1317	bf->bf_state.bfs_ctsrate = ctsrate;
1318	bf->bf_state.bfs_ctsduration = ctsduration;
1319
1320	/*
1321	 * Must disable multi-rate retry when using RTS/CTS.
1322	 */
1323	if (!sc->sc_mrrprot) {
1324		bf->bf_state.bfs_ismrr = 0;
1325		bf->bf_state.bfs_try0 =
1326		    bf->bf_state.bfs_rc[0].tries = ATH_TXMGTTRY; /* XXX ew */
1327	}
1328}
1329
1330/*
1331 * Setup the descriptor chain for a normal or fast-frame
1332 * frame.
1333 *
1334 * XXX TODO: extend to include the destination hardware QCU ID.
1335 * Make sure that is correct.  Make sure that when being added
1336 * to the mcastq, the CABQ QCUID is set or things will get a bit
1337 * odd.
1338 */
1339static void
1340ath_tx_setds(struct ath_softc *sc, struct ath_buf *bf)
1341{
1342	struct ath_desc *ds = bf->bf_desc;
1343	struct ath_hal *ah = sc->sc_ah;
1344
1345	if (bf->bf_state.bfs_txrate0 == 0)
1346		device_printf(sc->sc_dev, "%s: bf=%p, txrate0=%d\n",
1347		    __func__, bf, 0);
1348
1349	ath_hal_setuptxdesc(ah, ds
1350		, bf->bf_state.bfs_pktlen	/* packet length */
1351		, bf->bf_state.bfs_hdrlen	/* header length */
1352		, bf->bf_state.bfs_atype	/* Atheros packet type */
1353		, bf->bf_state.bfs_txpower	/* txpower */
1354		, bf->bf_state.bfs_txrate0
1355		, bf->bf_state.bfs_try0		/* series 0 rate/tries */
1356		, bf->bf_state.bfs_keyix	/* key cache index */
1357		, bf->bf_state.bfs_txantenna	/* antenna mode */
1358		, bf->bf_state.bfs_txflags	/* flags */
1359		, bf->bf_state.bfs_ctsrate	/* rts/cts rate */
1360		, bf->bf_state.bfs_ctsduration	/* rts/cts duration */
1361	);
1362
1363	/*
1364	 * This will be overriden when the descriptor chain is written.
1365	 */
1366	bf->bf_lastds = ds;
1367	bf->bf_last = bf;
1368
1369	/* Set rate control and descriptor chain for this frame */
1370	ath_tx_set_ratectrl(sc, bf->bf_node, bf);
1371	ath_tx_chaindesclist(sc, ds, bf, 0, 0, 0);
1372}
1373
1374/*
1375 * Do a rate lookup.
1376 *
1377 * This performs a rate lookup for the given ath_buf only if it's required.
1378 * Non-data frames and raw frames don't require it.
1379 *
1380 * This populates the primary and MRR entries; MRR values are
1381 * then disabled later on if something requires it (eg RTS/CTS on
1382 * pre-11n chipsets.
1383 *
1384 * This needs to be done before the RTS/CTS fields are calculated
1385 * as they may depend upon the rate chosen.
1386 */
1387static void
1388ath_tx_do_ratelookup(struct ath_softc *sc, struct ath_buf *bf)
1389{
1390	uint8_t rate, rix;
1391	int try0;
1392
1393	if (! bf->bf_state.bfs_doratelookup)
1394		return;
1395
1396	/* Get rid of any previous state */
1397	bzero(bf->bf_state.bfs_rc, sizeof(bf->bf_state.bfs_rc));
1398
1399	ATH_NODE_LOCK(ATH_NODE(bf->bf_node));
1400	ath_rate_findrate(sc, ATH_NODE(bf->bf_node), bf->bf_state.bfs_shpream,
1401	    bf->bf_state.bfs_pktlen, &rix, &try0, &rate);
1402
1403	/* In case MRR is disabled, make sure rc[0] is setup correctly */
1404	bf->bf_state.bfs_rc[0].rix = rix;
1405	bf->bf_state.bfs_rc[0].ratecode = rate;
1406	bf->bf_state.bfs_rc[0].tries = try0;
1407
1408	if (bf->bf_state.bfs_ismrr && try0 != ATH_TXMAXTRY)
1409		ath_rate_getxtxrates(sc, ATH_NODE(bf->bf_node), rix,
1410		    bf->bf_state.bfs_rc);
1411	ATH_NODE_UNLOCK(ATH_NODE(bf->bf_node));
1412
1413	sc->sc_txrix = rix;	/* for LED blinking */
1414	sc->sc_lastdatarix = rix;	/* for fast frames */
1415	bf->bf_state.bfs_try0 = try0;
1416	bf->bf_state.bfs_txrate0 = rate;
1417}
1418
1419/*
1420 * Update the CLRDMASK bit in the ath_buf if it needs to be set.
1421 */
1422static void
1423ath_tx_update_clrdmask(struct ath_softc *sc, struct ath_tid *tid,
1424    struct ath_buf *bf)
1425{
1426	struct ath_node *an = ATH_NODE(bf->bf_node);
1427
1428	ATH_TX_LOCK_ASSERT(sc);
1429
1430	if (an->clrdmask == 1) {
1431		bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
1432		an->clrdmask = 0;
1433	}
1434}
1435
1436/*
1437 * Return whether this frame should be software queued or
1438 * direct dispatched.
1439 *
1440 * When doing powersave, BAR frames should be queued but other management
1441 * frames should be directly sent.
1442 *
1443 * When not doing powersave, stick BAR frames into the hardware queue
1444 * so it goes out even though the queue is paused.
1445 *
1446 * For now, management frames are also software queued by default.
1447 */
1448static int
1449ath_tx_should_swq_frame(struct ath_softc *sc, struct ath_node *an,
1450    struct mbuf *m0, int *queue_to_head)
1451{
1452	struct ieee80211_node *ni = &an->an_node;
1453	struct ieee80211_frame *wh;
1454	uint8_t type, subtype;
1455
1456	wh = mtod(m0, struct ieee80211_frame *);
1457	type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
1458	subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
1459
1460	(*queue_to_head) = 0;
1461
1462	/* If it's not in powersave - direct-dispatch BAR */
1463	if ((ATH_NODE(ni)->an_is_powersave == 0)
1464	    && type == IEEE80211_FC0_TYPE_CTL &&
1465	    subtype == IEEE80211_FC0_SUBTYPE_BAR) {
1466		DPRINTF(sc, ATH_DEBUG_SW_TX,
1467		    "%s: BAR: TX'ing direct\n", __func__);
1468		return (0);
1469	} else if ((ATH_NODE(ni)->an_is_powersave == 1)
1470	    && type == IEEE80211_FC0_TYPE_CTL &&
1471	    subtype == IEEE80211_FC0_SUBTYPE_BAR) {
1472		/* BAR TX whilst asleep; queue */
1473		DPRINTF(sc, ATH_DEBUG_SW_TX,
1474		    "%s: swq: TX'ing\n", __func__);
1475		(*queue_to_head) = 1;
1476		return (1);
1477	} else if ((ATH_NODE(ni)->an_is_powersave == 1)
1478	    && (type == IEEE80211_FC0_TYPE_MGT ||
1479	        type == IEEE80211_FC0_TYPE_CTL)) {
1480		/*
1481		 * Other control/mgmt frame; bypass software queuing
1482		 * for now!
1483		 */
1484		device_printf(sc->sc_dev,
1485		    "%s: %6D: Node is asleep; sending mgmt "
1486		    "(type=%d, subtype=%d)\n",
1487		    __func__,
1488		    ni->ni_macaddr,
1489		    ":",
1490		    type,
1491		    subtype);
1492		return (0);
1493	} else {
1494		return (1);
1495	}
1496}
1497
1498
1499/*
1500 * Transmit the given frame to the hardware.
1501 *
1502 * The frame must already be setup; rate control must already have
1503 * been done.
1504 *
1505 * XXX since the TXQ lock is being held here (and I dislike holding
1506 * it for this long when not doing software aggregation), later on
1507 * break this function into "setup_normal" and "xmit_normal". The
1508 * lock only needs to be held for the ath_tx_handoff call.
1509 *
1510 * XXX we don't update the leak count here - if we're doing
1511 * direct frame dispatch, we need to be able to do it without
1512 * decrementing the leak count (eg multicast queue frames.)
1513 */
1514static void
1515ath_tx_xmit_normal(struct ath_softc *sc, struct ath_txq *txq,
1516    struct ath_buf *bf)
1517{
1518	struct ath_node *an = ATH_NODE(bf->bf_node);
1519	struct ath_tid *tid = &an->an_tid[bf->bf_state.bfs_tid];
1520
1521	ATH_TX_LOCK_ASSERT(sc);
1522
1523	/*
1524	 * For now, just enable CLRDMASK. ath_tx_xmit_normal() does
1525	 * set a completion handler however it doesn't (yet) properly
1526	 * handle the strict ordering requirements needed for normal,
1527	 * non-aggregate session frames.
1528	 *
1529	 * Once this is implemented, only set CLRDMASK like this for
1530	 * frames that must go out - eg management/raw frames.
1531	 */
1532	bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
1533
1534	/* Setup the descriptor before handoff */
1535	ath_tx_do_ratelookup(sc, bf);
1536	ath_tx_calc_duration(sc, bf);
1537	ath_tx_calc_protection(sc, bf);
1538	ath_tx_set_rtscts(sc, bf);
1539	ath_tx_rate_fill_rcflags(sc, bf);
1540	ath_tx_setds(sc, bf);
1541
1542	/* Track per-TID hardware queue depth correctly */
1543	tid->hwq_depth++;
1544
1545	/* Assign the completion handler */
1546	bf->bf_comp = ath_tx_normal_comp;
1547
1548	/* Hand off to hardware */
1549	ath_tx_handoff(sc, txq, bf);
1550}
1551
1552/*
1553 * Do the basic frame setup stuff that's required before the frame
1554 * is added to a software queue.
1555 *
1556 * All frames get mostly the same treatment and it's done once.
1557 * Retransmits fiddle with things like the rate control setup,
1558 * setting the retransmit bit in the packet; doing relevant DMA/bus
1559 * syncing and relinking it (back) into the hardware TX queue.
1560 *
1561 * Note that this may cause the mbuf to be reallocated, so
1562 * m0 may not be valid.
1563 */
1564static int
1565ath_tx_normal_setup(struct ath_softc *sc, struct ieee80211_node *ni,
1566    struct ath_buf *bf, struct mbuf *m0, struct ath_txq *txq)
1567{
1568	struct ieee80211vap *vap = ni->ni_vap;
1569	struct ath_hal *ah = sc->sc_ah;
1570	struct ifnet *ifp = sc->sc_ifp;
1571	struct ieee80211com *ic = ifp->if_l2com;
1572	const struct chanAccParams *cap = &ic->ic_wme.wme_chanParams;
1573	int error, iswep, ismcast, isfrag, ismrr;
1574	int keyix, hdrlen, pktlen, try0 = 0;
1575	u_int8_t rix = 0, txrate = 0;
1576	struct ath_desc *ds;
1577	struct ieee80211_frame *wh;
1578	u_int subtype, flags;
1579	HAL_PKT_TYPE atype;
1580	const HAL_RATE_TABLE *rt;
1581	HAL_BOOL shortPreamble;
1582	struct ath_node *an;
1583	u_int pri;
1584
1585	/*
1586	 * To ensure that both sequence numbers and the CCMP PN handling
1587	 * is "correct", make sure that the relevant TID queue is locked.
1588	 * Otherwise the CCMP PN and seqno may appear out of order, causing
1589	 * re-ordered frames to have out of order CCMP PN's, resulting
1590	 * in many, many frame drops.
1591	 */
1592	ATH_TX_LOCK_ASSERT(sc);
1593
1594	wh = mtod(m0, struct ieee80211_frame *);
1595	iswep = wh->i_fc[1] & IEEE80211_FC1_WEP;
1596	ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1);
1597	isfrag = m0->m_flags & M_FRAG;
1598	hdrlen = ieee80211_anyhdrsize(wh);
1599	/*
1600	 * Packet length must not include any
1601	 * pad bytes; deduct them here.
1602	 */
1603	pktlen = m0->m_pkthdr.len - (hdrlen & 3);
1604
1605	/* Handle encryption twiddling if needed */
1606	if (! ath_tx_tag_crypto(sc, ni, m0, iswep, isfrag, &hdrlen,
1607	    &pktlen, &keyix)) {
1608		ath_freetx(m0);
1609		return EIO;
1610	}
1611
1612	/* packet header may have moved, reset our local pointer */
1613	wh = mtod(m0, struct ieee80211_frame *);
1614
1615	pktlen += IEEE80211_CRC_LEN;
1616
1617	/*
1618	 * Load the DMA map so any coalescing is done.  This
1619	 * also calculates the number of descriptors we need.
1620	 */
1621	error = ath_tx_dmasetup(sc, bf, m0);
1622	if (error != 0)
1623		return error;
1624	bf->bf_node = ni;			/* NB: held reference */
1625	m0 = bf->bf_m;				/* NB: may have changed */
1626	wh = mtod(m0, struct ieee80211_frame *);
1627
1628	/* setup descriptors */
1629	ds = bf->bf_desc;
1630	rt = sc->sc_currates;
1631	KASSERT(rt != NULL, ("no rate table, mode %u", sc->sc_curmode));
1632
1633	/*
1634	 * NB: the 802.11 layer marks whether or not we should
1635	 * use short preamble based on the current mode and
1636	 * negotiated parameters.
1637	 */
1638	if ((ic->ic_flags & IEEE80211_F_SHPREAMBLE) &&
1639	    (ni->ni_capinfo & IEEE80211_CAPINFO_SHORT_PREAMBLE)) {
1640		shortPreamble = AH_TRUE;
1641		sc->sc_stats.ast_tx_shortpre++;
1642	} else {
1643		shortPreamble = AH_FALSE;
1644	}
1645
1646	an = ATH_NODE(ni);
1647	//flags = HAL_TXDESC_CLRDMASK;		/* XXX needed for crypto errs */
1648	flags = 0;
1649	ismrr = 0;				/* default no multi-rate retry*/
1650	pri = M_WME_GETAC(m0);			/* honor classification */
1651	/* XXX use txparams instead of fixed values */
1652	/*
1653	 * Calculate Atheros packet type from IEEE80211 packet header,
1654	 * setup for rate calculations, and select h/w transmit queue.
1655	 */
1656	switch (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) {
1657	case IEEE80211_FC0_TYPE_MGT:
1658		subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
1659		if (subtype == IEEE80211_FC0_SUBTYPE_BEACON)
1660			atype = HAL_PKT_TYPE_BEACON;
1661		else if (subtype == IEEE80211_FC0_SUBTYPE_PROBE_RESP)
1662			atype = HAL_PKT_TYPE_PROBE_RESP;
1663		else if (subtype == IEEE80211_FC0_SUBTYPE_ATIM)
1664			atype = HAL_PKT_TYPE_ATIM;
1665		else
1666			atype = HAL_PKT_TYPE_NORMAL;	/* XXX */
1667		rix = an->an_mgmtrix;
1668		txrate = rt->info[rix].rateCode;
1669		if (shortPreamble)
1670			txrate |= rt->info[rix].shortPreamble;
1671		try0 = ATH_TXMGTTRY;
1672		flags |= HAL_TXDESC_INTREQ;	/* force interrupt */
1673		break;
1674	case IEEE80211_FC0_TYPE_CTL:
1675		atype = HAL_PKT_TYPE_PSPOLL;	/* stop setting of duration */
1676		rix = an->an_mgmtrix;
1677		txrate = rt->info[rix].rateCode;
1678		if (shortPreamble)
1679			txrate |= rt->info[rix].shortPreamble;
1680		try0 = ATH_TXMGTTRY;
1681		flags |= HAL_TXDESC_INTREQ;	/* force interrupt */
1682		break;
1683	case IEEE80211_FC0_TYPE_DATA:
1684		atype = HAL_PKT_TYPE_NORMAL;		/* default */
1685		/*
1686		 * Data frames: multicast frames go out at a fixed rate,
1687		 * EAPOL frames use the mgmt frame rate; otherwise consult
1688		 * the rate control module for the rate to use.
1689		 */
1690		if (ismcast) {
1691			rix = an->an_mcastrix;
1692			txrate = rt->info[rix].rateCode;
1693			if (shortPreamble)
1694				txrate |= rt->info[rix].shortPreamble;
1695			try0 = 1;
1696		} else if (m0->m_flags & M_EAPOL) {
1697			/* XXX? maybe always use long preamble? */
1698			rix = an->an_mgmtrix;
1699			txrate = rt->info[rix].rateCode;
1700			if (shortPreamble)
1701				txrate |= rt->info[rix].shortPreamble;
1702			try0 = ATH_TXMAXTRY;	/* XXX?too many? */
1703		} else {
1704			/*
1705			 * Do rate lookup on each TX, rather than using
1706			 * the hard-coded TX information decided here.
1707			 */
1708			ismrr = 1;
1709			bf->bf_state.bfs_doratelookup = 1;
1710		}
1711		if (cap->cap_wmeParams[pri].wmep_noackPolicy)
1712			flags |= HAL_TXDESC_NOACK;
1713		break;
1714	default:
1715		if_printf(ifp, "bogus frame type 0x%x (%s)\n",
1716			wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK, __func__);
1717		/* XXX statistic */
1718		/* XXX free tx dmamap */
1719		ath_freetx(m0);
1720		return EIO;
1721	}
1722
1723	/*
1724	 * There are two known scenarios where the frame AC doesn't match
1725	 * what the destination TXQ is.
1726	 *
1727	 * + non-QoS frames (eg management?) that the net80211 stack has
1728	 *   assigned a higher AC to, but since it's a non-QoS TID, it's
1729	 *   being thrown into TID 16.  TID 16 gets the AC_BE queue.
1730	 *   It's quite possible that management frames should just be
1731	 *   direct dispatched to hardware rather than go via the software
1732	 *   queue; that should be investigated in the future.  There are
1733	 *   some specific scenarios where this doesn't make sense, mostly
1734	 *   surrounding ADDBA request/response - hence why that is special
1735	 *   cased.
1736	 *
1737	 * + Multicast frames going into the VAP mcast queue.  That shows up
1738	 *   as "TXQ 11".
1739	 *
1740	 * This driver should eventually support separate TID and TXQ locking,
1741	 * allowing for arbitrary AC frames to appear on arbitrary software
1742	 * queues, being queued to the "correct" hardware queue when needed.
1743	 */
1744#if 0
1745	if (txq != sc->sc_ac2q[pri]) {
1746		device_printf(sc->sc_dev,
1747		    "%s: txq=%p (%d), pri=%d, pri txq=%p (%d)\n",
1748		    __func__,
1749		    txq,
1750		    txq->axq_qnum,
1751		    pri,
1752		    sc->sc_ac2q[pri],
1753		    sc->sc_ac2q[pri]->axq_qnum);
1754	}
1755#endif
1756
1757	/*
1758	 * Calculate miscellaneous flags.
1759	 */
1760	if (ismcast) {
1761		flags |= HAL_TXDESC_NOACK;	/* no ack on broad/multicast */
1762	} else if (pktlen > vap->iv_rtsthreshold &&
1763	    (ni->ni_ath_flags & IEEE80211_NODE_FF) == 0) {
1764		flags |= HAL_TXDESC_RTSENA;	/* RTS based on frame length */
1765		sc->sc_stats.ast_tx_rts++;
1766	}
1767	if (flags & HAL_TXDESC_NOACK)		/* NB: avoid double counting */
1768		sc->sc_stats.ast_tx_noack++;
1769#ifdef IEEE80211_SUPPORT_TDMA
1770	if (sc->sc_tdma && (flags & HAL_TXDESC_NOACK) == 0) {
1771		DPRINTF(sc, ATH_DEBUG_TDMA,
1772		    "%s: discard frame, ACK required w/ TDMA\n", __func__);
1773		sc->sc_stats.ast_tdma_ack++;
1774		/* XXX free tx dmamap */
1775		ath_freetx(m0);
1776		return EIO;
1777	}
1778#endif
1779
1780	/*
1781	 * Determine if a tx interrupt should be generated for
1782	 * this descriptor.  We take a tx interrupt to reap
1783	 * descriptors when the h/w hits an EOL condition or
1784	 * when the descriptor is specifically marked to generate
1785	 * an interrupt.  We periodically mark descriptors in this
1786	 * way to insure timely replenishing of the supply needed
1787	 * for sending frames.  Defering interrupts reduces system
1788	 * load and potentially allows more concurrent work to be
1789	 * done but if done to aggressively can cause senders to
1790	 * backup.
1791	 *
1792	 * NB: use >= to deal with sc_txintrperiod changing
1793	 *     dynamically through sysctl.
1794	 */
1795	if (flags & HAL_TXDESC_INTREQ) {
1796		txq->axq_intrcnt = 0;
1797	} else if (++txq->axq_intrcnt >= sc->sc_txintrperiod) {
1798		flags |= HAL_TXDESC_INTREQ;
1799		txq->axq_intrcnt = 0;
1800	}
1801
1802	/* This point forward is actual TX bits */
1803
1804	/*
1805	 * At this point we are committed to sending the frame
1806	 * and we don't need to look at m_nextpkt; clear it in
1807	 * case this frame is part of frag chain.
1808	 */
1809	m0->m_nextpkt = NULL;
1810
1811	if (IFF_DUMPPKTS(sc, ATH_DEBUG_XMIT))
1812		ieee80211_dump_pkt(ic, mtod(m0, const uint8_t *), m0->m_len,
1813		    sc->sc_hwmap[rix].ieeerate, -1);
1814
1815	if (ieee80211_radiotap_active_vap(vap)) {
1816		u_int64_t tsf = ath_hal_gettsf64(ah);
1817
1818		sc->sc_tx_th.wt_tsf = htole64(tsf);
1819		sc->sc_tx_th.wt_flags = sc->sc_hwmap[rix].txflags;
1820		if (iswep)
1821			sc->sc_tx_th.wt_flags |= IEEE80211_RADIOTAP_F_WEP;
1822		if (isfrag)
1823			sc->sc_tx_th.wt_flags |= IEEE80211_RADIOTAP_F_FRAG;
1824		sc->sc_tx_th.wt_rate = sc->sc_hwmap[rix].ieeerate;
1825		sc->sc_tx_th.wt_txpower = ieee80211_get_node_txpower(ni);
1826		sc->sc_tx_th.wt_antenna = sc->sc_txantenna;
1827
1828		ieee80211_radiotap_tx(vap, m0);
1829	}
1830
1831	/* Blank the legacy rate array */
1832	bzero(&bf->bf_state.bfs_rc, sizeof(bf->bf_state.bfs_rc));
1833
1834	/*
1835	 * ath_buf_set_rate needs at least one rate/try to setup
1836	 * the rate scenario.
1837	 */
1838	bf->bf_state.bfs_rc[0].rix = rix;
1839	bf->bf_state.bfs_rc[0].tries = try0;
1840	bf->bf_state.bfs_rc[0].ratecode = txrate;
1841
1842	/* Store the decided rate index values away */
1843	bf->bf_state.bfs_pktlen = pktlen;
1844	bf->bf_state.bfs_hdrlen = hdrlen;
1845	bf->bf_state.bfs_atype = atype;
1846	bf->bf_state.bfs_txpower = ieee80211_get_node_txpower(ni);
1847	bf->bf_state.bfs_txrate0 = txrate;
1848	bf->bf_state.bfs_try0 = try0;
1849	bf->bf_state.bfs_keyix = keyix;
1850	bf->bf_state.bfs_txantenna = sc->sc_txantenna;
1851	bf->bf_state.bfs_txflags = flags;
1852	bf->bf_state.bfs_shpream = shortPreamble;
1853
1854	/* XXX this should be done in ath_tx_setrate() */
1855	bf->bf_state.bfs_ctsrate0 = 0;	/* ie, no hard-coded ctsrate */
1856	bf->bf_state.bfs_ctsrate = 0;	/* calculated later */
1857	bf->bf_state.bfs_ctsduration = 0;
1858	bf->bf_state.bfs_ismrr = ismrr;
1859
1860	return 0;
1861}
1862
1863/*
1864 * Queue a frame to the hardware or software queue.
1865 *
1866 * This can be called by the net80211 code.
1867 *
1868 * XXX what about locking? Or, push the seqno assign into the
1869 * XXX aggregate scheduler so its serialised?
1870 *
1871 * XXX When sending management frames via ath_raw_xmit(),
1872 *     should CLRDMASK be set unconditionally?
1873 */
1874int
1875ath_tx_start(struct ath_softc *sc, struct ieee80211_node *ni,
1876    struct ath_buf *bf, struct mbuf *m0)
1877{
1878	struct ieee80211vap *vap = ni->ni_vap;
1879	struct ath_vap *avp = ATH_VAP(vap);
1880	int r = 0;
1881	u_int pri;
1882	int tid;
1883	struct ath_txq *txq;
1884	int ismcast;
1885	const struct ieee80211_frame *wh;
1886	int is_ampdu, is_ampdu_tx, is_ampdu_pending;
1887	ieee80211_seq seqno;
1888	uint8_t type, subtype;
1889	int queue_to_head;
1890
1891	ATH_TX_LOCK_ASSERT(sc);
1892
1893	/*
1894	 * Determine the target hardware queue.
1895	 *
1896	 * For multicast frames, the txq gets overridden appropriately
1897	 * depending upon the state of PS.
1898	 *
1899	 * For any other frame, we do a TID/QoS lookup inside the frame
1900	 * to see what the TID should be. If it's a non-QoS frame, the
1901	 * AC and TID are overridden. The TID/TXQ code assumes the
1902	 * TID is on a predictable hardware TXQ, so we don't support
1903	 * having a node TID queued to multiple hardware TXQs.
1904	 * This may change in the future but would require some locking
1905	 * fudgery.
1906	 */
1907	pri = ath_tx_getac(sc, m0);
1908	tid = ath_tx_gettid(sc, m0);
1909
1910	txq = sc->sc_ac2q[pri];
1911	wh = mtod(m0, struct ieee80211_frame *);
1912	ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1);
1913	type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
1914	subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
1915
1916	/*
1917	 * Enforce how deep the multicast queue can grow.
1918	 *
1919	 * XXX duplicated in ath_raw_xmit().
1920	 */
1921	if (IEEE80211_IS_MULTICAST(wh->i_addr1)) {
1922		if (sc->sc_cabq->axq_depth + sc->sc_cabq->fifo.axq_depth
1923		    > sc->sc_txq_mcastq_maxdepth) {
1924			sc->sc_stats.ast_tx_mcastq_overflow++;
1925			m_freem(m0);
1926			return (ENOBUFS);
1927		}
1928	}
1929
1930	/*
1931	 * Enforce how deep the unicast queue can grow.
1932	 *
1933	 * If the node is in power save then we don't want
1934	 * the software queue to grow too deep, or a node may
1935	 * end up consuming all of the ath_buf entries.
1936	 *
1937	 * For now, only do this for DATA frames.
1938	 *
1939	 * We will want to cap how many management/control
1940	 * frames get punted to the software queue so it doesn't
1941	 * fill up.  But the correct solution isn't yet obvious.
1942	 * In any case, this check should at least let frames pass
1943	 * that we are direct-dispatching.
1944	 *
1945	 * XXX TODO: duplicate this to the raw xmit path!
1946	 */
1947	if (type == IEEE80211_FC0_TYPE_DATA &&
1948	    ATH_NODE(ni)->an_is_powersave &&
1949	    ATH_NODE(ni)->an_swq_depth >
1950	     sc->sc_txq_node_psq_maxdepth) {
1951		sc->sc_stats.ast_tx_node_psq_overflow++;
1952		m_freem(m0);
1953		return (ENOBUFS);
1954	}
1955
1956	/* A-MPDU TX */
1957	is_ampdu_tx = ath_tx_ampdu_running(sc, ATH_NODE(ni), tid);
1958	is_ampdu_pending = ath_tx_ampdu_pending(sc, ATH_NODE(ni), tid);
1959	is_ampdu = is_ampdu_tx | is_ampdu_pending;
1960
1961	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: tid=%d, ac=%d, is_ampdu=%d\n",
1962	    __func__, tid, pri, is_ampdu);
1963
1964	/* Set local packet state, used to queue packets to hardware */
1965	bf->bf_state.bfs_tid = tid;
1966	bf->bf_state.bfs_tx_queue = txq->axq_qnum;
1967	bf->bf_state.bfs_pri = pri;
1968
1969#if 1
1970	/*
1971	 * When servicing one or more stations in power-save mode
1972	 * (or) if there is some mcast data waiting on the mcast
1973	 * queue (to prevent out of order delivery) multicast frames
1974	 * must be bufferd until after the beacon.
1975	 *
1976	 * TODO: we should lock the mcastq before we check the length.
1977	 */
1978	if (sc->sc_cabq_enable && ismcast && (vap->iv_ps_sta || avp->av_mcastq.axq_depth)) {
1979		txq = &avp->av_mcastq;
1980		/*
1981		 * Mark the frame as eventually belonging on the CAB
1982		 * queue, so the descriptor setup functions will
1983		 * correctly initialise the descriptor 'qcuId' field.
1984		 */
1985		bf->bf_state.bfs_tx_queue = sc->sc_cabq->axq_qnum;
1986	}
1987#endif
1988
1989	/* Do the generic frame setup */
1990	/* XXX should just bzero the bf_state? */
1991	bf->bf_state.bfs_dobaw = 0;
1992
1993	/* A-MPDU TX? Manually set sequence number */
1994	/*
1995	 * Don't do it whilst pending; the net80211 layer still
1996	 * assigns them.
1997	 */
1998	if (is_ampdu_tx) {
1999		/*
2000		 * Always call; this function will
2001		 * handle making sure that null data frames
2002		 * don't get a sequence number from the current
2003		 * TID and thus mess with the BAW.
2004		 */
2005		seqno = ath_tx_tid_seqno_assign(sc, ni, bf, m0);
2006
2007		/*
2008		 * Don't add QoS NULL frames to the BAW.
2009		 */
2010		if (IEEE80211_QOS_HAS_SEQ(wh) &&
2011		    subtype != IEEE80211_FC0_SUBTYPE_QOS_NULL) {
2012			bf->bf_state.bfs_dobaw = 1;
2013		}
2014	}
2015
2016	/*
2017	 * If needed, the sequence number has been assigned.
2018	 * Squirrel it away somewhere easy to get to.
2019	 */
2020	bf->bf_state.bfs_seqno = M_SEQNO_GET(m0) << IEEE80211_SEQ_SEQ_SHIFT;
2021
2022	/* Is ampdu pending? fetch the seqno and print it out */
2023	if (is_ampdu_pending)
2024		DPRINTF(sc, ATH_DEBUG_SW_TX,
2025		    "%s: tid %d: ampdu pending, seqno %d\n",
2026		    __func__, tid, M_SEQNO_GET(m0));
2027
2028	/* This also sets up the DMA map */
2029	r = ath_tx_normal_setup(sc, ni, bf, m0, txq);
2030
2031	if (r != 0)
2032		goto done;
2033
2034	/* At this point m0 could have changed! */
2035	m0 = bf->bf_m;
2036
2037#if 1
2038	/*
2039	 * If it's a multicast frame, do a direct-dispatch to the
2040	 * destination hardware queue. Don't bother software
2041	 * queuing it.
2042	 */
2043	/*
2044	 * If it's a BAR frame, do a direct dispatch to the
2045	 * destination hardware queue. Don't bother software
2046	 * queuing it, as the TID will now be paused.
2047	 * Sending a BAR frame can occur from the net80211 txa timer
2048	 * (ie, retries) or from the ath txtask (completion call.)
2049	 * It queues directly to hardware because the TID is paused
2050	 * at this point (and won't be unpaused until the BAR has
2051	 * either been TXed successfully or max retries has been
2052	 * reached.)
2053	 */
2054	/*
2055	 * Until things are better debugged - if this node is asleep
2056	 * and we're sending it a non-BAR frame, direct dispatch it.
2057	 * Why? Because we need to figure out what's actually being
2058	 * sent - eg, during reassociation/reauthentication after
2059	 * the node (last) disappeared whilst asleep, the driver should
2060	 * have unpaused/unsleep'ed the node.  So until that is
2061	 * sorted out, use this workaround.
2062	 */
2063	if (txq == &avp->av_mcastq) {
2064		DPRINTF(sc, ATH_DEBUG_SW_TX,
2065		    "%s: bf=%p: mcastq: TX'ing\n", __func__, bf);
2066		bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
2067		ath_tx_xmit_normal(sc, txq, bf);
2068	} else if (ath_tx_should_swq_frame(sc, ATH_NODE(ni), m0,
2069	    &queue_to_head)) {
2070		ath_tx_swq(sc, ni, txq, queue_to_head, bf);
2071	} else {
2072		bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
2073		ath_tx_xmit_normal(sc, txq, bf);
2074	}
2075#else
2076	/*
2077	 * For now, since there's no software queue,
2078	 * direct-dispatch to the hardware.
2079	 */
2080	bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
2081	/*
2082	 * Update the current leak count if
2083	 * we're leaking frames; and set the
2084	 * MORE flag as appropriate.
2085	 */
2086	ath_tx_leak_count_update(sc, tid, bf);
2087	ath_tx_xmit_normal(sc, txq, bf);
2088#endif
2089done:
2090	return 0;
2091}
2092
2093static int
2094ath_tx_raw_start(struct ath_softc *sc, struct ieee80211_node *ni,
2095	struct ath_buf *bf, struct mbuf *m0,
2096	const struct ieee80211_bpf_params *params)
2097{
2098	struct ifnet *ifp = sc->sc_ifp;
2099	struct ieee80211com *ic = ifp->if_l2com;
2100	struct ath_hal *ah = sc->sc_ah;
2101	struct ieee80211vap *vap = ni->ni_vap;
2102	int error, ismcast, ismrr;
2103	int keyix, hdrlen, pktlen, try0, txantenna;
2104	u_int8_t rix, txrate;
2105	struct ieee80211_frame *wh;
2106	u_int flags;
2107	HAL_PKT_TYPE atype;
2108	const HAL_RATE_TABLE *rt;
2109	struct ath_desc *ds;
2110	u_int pri;
2111	int o_tid = -1;
2112	int do_override;
2113	uint8_t type, subtype;
2114	int queue_to_head;
2115
2116	ATH_TX_LOCK_ASSERT(sc);
2117
2118	wh = mtod(m0, struct ieee80211_frame *);
2119	ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1);
2120	hdrlen = ieee80211_anyhdrsize(wh);
2121	/*
2122	 * Packet length must not include any
2123	 * pad bytes; deduct them here.
2124	 */
2125	/* XXX honor IEEE80211_BPF_DATAPAD */
2126	pktlen = m0->m_pkthdr.len - (hdrlen & 3) + IEEE80211_CRC_LEN;
2127
2128	type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
2129	subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
2130
2131	ATH_KTR(sc, ATH_KTR_TX, 2,
2132	     "ath_tx_raw_start: ni=%p, bf=%p, raw", ni, bf);
2133
2134	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: ismcast=%d\n",
2135	    __func__, ismcast);
2136
2137	pri = params->ibp_pri & 3;
2138	/* Override pri if the frame isn't a QoS one */
2139	if (! IEEE80211_QOS_HAS_SEQ(wh))
2140		pri = ath_tx_getac(sc, m0);
2141
2142	/* XXX If it's an ADDBA, override the correct queue */
2143	do_override = ath_tx_action_frame_override_queue(sc, ni, m0, &o_tid);
2144
2145	/* Map ADDBA to the correct priority */
2146	if (do_override) {
2147#if 0
2148		device_printf(sc->sc_dev,
2149		    "%s: overriding tid %d pri %d -> %d\n",
2150		    __func__, o_tid, pri, TID_TO_WME_AC(o_tid));
2151#endif
2152		pri = TID_TO_WME_AC(o_tid);
2153	}
2154
2155	/* Handle encryption twiddling if needed */
2156	if (! ath_tx_tag_crypto(sc, ni,
2157	    m0, params->ibp_flags & IEEE80211_BPF_CRYPTO, 0,
2158	    &hdrlen, &pktlen, &keyix)) {
2159		ath_freetx(m0);
2160		return EIO;
2161	}
2162	/* packet header may have moved, reset our local pointer */
2163	wh = mtod(m0, struct ieee80211_frame *);
2164
2165	/* Do the generic frame setup */
2166	/* XXX should just bzero the bf_state? */
2167	bf->bf_state.bfs_dobaw = 0;
2168
2169	error = ath_tx_dmasetup(sc, bf, m0);
2170	if (error != 0)
2171		return error;
2172	m0 = bf->bf_m;				/* NB: may have changed */
2173	wh = mtod(m0, struct ieee80211_frame *);
2174	bf->bf_node = ni;			/* NB: held reference */
2175
2176	/* Always enable CLRDMASK for raw frames for now.. */
2177	flags = HAL_TXDESC_CLRDMASK;		/* XXX needed for crypto errs */
2178	flags |= HAL_TXDESC_INTREQ;		/* force interrupt */
2179	if (params->ibp_flags & IEEE80211_BPF_RTS)
2180		flags |= HAL_TXDESC_RTSENA;
2181	else if (params->ibp_flags & IEEE80211_BPF_CTS) {
2182		/* XXX assume 11g/11n protection? */
2183		bf->bf_state.bfs_doprot = 1;
2184		flags |= HAL_TXDESC_CTSENA;
2185	}
2186	/* XXX leave ismcast to injector? */
2187	if ((params->ibp_flags & IEEE80211_BPF_NOACK) || ismcast)
2188		flags |= HAL_TXDESC_NOACK;
2189
2190	rt = sc->sc_currates;
2191	KASSERT(rt != NULL, ("no rate table, mode %u", sc->sc_curmode));
2192	rix = ath_tx_findrix(sc, params->ibp_rate0);
2193	txrate = rt->info[rix].rateCode;
2194	if (params->ibp_flags & IEEE80211_BPF_SHORTPRE)
2195		txrate |= rt->info[rix].shortPreamble;
2196	sc->sc_txrix = rix;
2197	try0 = params->ibp_try0;
2198	ismrr = (params->ibp_try1 != 0);
2199	txantenna = params->ibp_pri >> 2;
2200	if (txantenna == 0)			/* XXX? */
2201		txantenna = sc->sc_txantenna;
2202
2203	/*
2204	 * Since ctsrate is fixed, store it away for later
2205	 * use when the descriptor fields are being set.
2206	 */
2207	if (flags & (HAL_TXDESC_RTSENA|HAL_TXDESC_CTSENA))
2208		bf->bf_state.bfs_ctsrate0 = params->ibp_ctsrate;
2209
2210	/*
2211	 * NB: we mark all packets as type PSPOLL so the h/w won't
2212	 * set the sequence number, duration, etc.
2213	 */
2214	atype = HAL_PKT_TYPE_PSPOLL;
2215
2216	if (IFF_DUMPPKTS(sc, ATH_DEBUG_XMIT))
2217		ieee80211_dump_pkt(ic, mtod(m0, caddr_t), m0->m_len,
2218		    sc->sc_hwmap[rix].ieeerate, -1);
2219
2220	if (ieee80211_radiotap_active_vap(vap)) {
2221		u_int64_t tsf = ath_hal_gettsf64(ah);
2222
2223		sc->sc_tx_th.wt_tsf = htole64(tsf);
2224		sc->sc_tx_th.wt_flags = sc->sc_hwmap[rix].txflags;
2225		if (wh->i_fc[1] & IEEE80211_FC1_WEP)
2226			sc->sc_tx_th.wt_flags |= IEEE80211_RADIOTAP_F_WEP;
2227		if (m0->m_flags & M_FRAG)
2228			sc->sc_tx_th.wt_flags |= IEEE80211_RADIOTAP_F_FRAG;
2229		sc->sc_tx_th.wt_rate = sc->sc_hwmap[rix].ieeerate;
2230		sc->sc_tx_th.wt_txpower = MIN(params->ibp_power,
2231		    ieee80211_get_node_txpower(ni));
2232		sc->sc_tx_th.wt_antenna = sc->sc_txantenna;
2233
2234		ieee80211_radiotap_tx(vap, m0);
2235	}
2236
2237	/*
2238	 * Formulate first tx descriptor with tx controls.
2239	 */
2240	ds = bf->bf_desc;
2241	/* XXX check return value? */
2242
2243	/* Store the decided rate index values away */
2244	bf->bf_state.bfs_pktlen = pktlen;
2245	bf->bf_state.bfs_hdrlen = hdrlen;
2246	bf->bf_state.bfs_atype = atype;
2247	bf->bf_state.bfs_txpower = MIN(params->ibp_power,
2248	    ieee80211_get_node_txpower(ni));
2249	bf->bf_state.bfs_txrate0 = txrate;
2250	bf->bf_state.bfs_try0 = try0;
2251	bf->bf_state.bfs_keyix = keyix;
2252	bf->bf_state.bfs_txantenna = txantenna;
2253	bf->bf_state.bfs_txflags = flags;
2254	bf->bf_state.bfs_shpream =
2255	    !! (params->ibp_flags & IEEE80211_BPF_SHORTPRE);
2256
2257	/* Set local packet state, used to queue packets to hardware */
2258	bf->bf_state.bfs_tid = WME_AC_TO_TID(pri);
2259	bf->bf_state.bfs_tx_queue = sc->sc_ac2q[pri]->axq_qnum;
2260	bf->bf_state.bfs_pri = pri;
2261
2262	/* XXX this should be done in ath_tx_setrate() */
2263	bf->bf_state.bfs_ctsrate = 0;
2264	bf->bf_state.bfs_ctsduration = 0;
2265	bf->bf_state.bfs_ismrr = ismrr;
2266
2267	/* Blank the legacy rate array */
2268	bzero(&bf->bf_state.bfs_rc, sizeof(bf->bf_state.bfs_rc));
2269
2270	bf->bf_state.bfs_rc[0].rix =
2271	    ath_tx_findrix(sc, params->ibp_rate0);
2272	bf->bf_state.bfs_rc[0].tries = try0;
2273	bf->bf_state.bfs_rc[0].ratecode = txrate;
2274
2275	if (ismrr) {
2276		int rix;
2277
2278		rix = ath_tx_findrix(sc, params->ibp_rate1);
2279		bf->bf_state.bfs_rc[1].rix = rix;
2280		bf->bf_state.bfs_rc[1].tries = params->ibp_try1;
2281
2282		rix = ath_tx_findrix(sc, params->ibp_rate2);
2283		bf->bf_state.bfs_rc[2].rix = rix;
2284		bf->bf_state.bfs_rc[2].tries = params->ibp_try2;
2285
2286		rix = ath_tx_findrix(sc, params->ibp_rate3);
2287		bf->bf_state.bfs_rc[3].rix = rix;
2288		bf->bf_state.bfs_rc[3].tries = params->ibp_try3;
2289	}
2290	/*
2291	 * All the required rate control decisions have been made;
2292	 * fill in the rc flags.
2293	 */
2294	ath_tx_rate_fill_rcflags(sc, bf);
2295
2296	/* NB: no buffered multicast in power save support */
2297
2298	/*
2299	 * If we're overiding the ADDBA destination, dump directly
2300	 * into the hardware queue, right after any pending
2301	 * frames to that node are.
2302	 */
2303	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: dooverride=%d\n",
2304	    __func__, do_override);
2305
2306#if 1
2307	/*
2308	 * Put addba frames in the right place in the right TID/HWQ.
2309	 */
2310	if (do_override) {
2311		bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
2312		/*
2313		 * XXX if it's addba frames, should we be leaking
2314		 * them out via the frame leak method?
2315		 * XXX for now let's not risk it; but we may wish
2316		 * to investigate this later.
2317		 */
2318		ath_tx_xmit_normal(sc, sc->sc_ac2q[pri], bf);
2319	} else if (ath_tx_should_swq_frame(sc, ATH_NODE(ni), m0,
2320	    &queue_to_head)) {
2321		/* Queue to software queue */
2322		ath_tx_swq(sc, ni, sc->sc_ac2q[pri], queue_to_head, bf);
2323	} else {
2324		bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
2325		ath_tx_xmit_normal(sc, sc->sc_ac2q[pri], bf);
2326	}
2327#else
2328	/* Direct-dispatch to the hardware */
2329	bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
2330	/*
2331	 * Update the current leak count if
2332	 * we're leaking frames; and set the
2333	 * MORE flag as appropriate.
2334	 */
2335	ath_tx_leak_count_update(sc, tid, bf);
2336	ath_tx_xmit_normal(sc, sc->sc_ac2q[pri], bf);
2337#endif
2338	return 0;
2339}
2340
2341/*
2342 * Send a raw frame.
2343 *
2344 * This can be called by net80211.
2345 */
2346int
2347ath_raw_xmit(struct ieee80211_node *ni, struct mbuf *m,
2348	const struct ieee80211_bpf_params *params)
2349{
2350	struct ieee80211com *ic = ni->ni_ic;
2351	struct ifnet *ifp = ic->ic_ifp;
2352	struct ath_softc *sc = ifp->if_softc;
2353	struct ath_buf *bf;
2354	struct ieee80211_frame *wh = mtod(m, struct ieee80211_frame *);
2355	int error = 0;
2356
2357	ATH_PCU_LOCK(sc);
2358	if (sc->sc_inreset_cnt > 0) {
2359		device_printf(sc->sc_dev, "%s: sc_inreset_cnt > 0; bailing\n",
2360		    __func__);
2361		error = EIO;
2362		ATH_PCU_UNLOCK(sc);
2363		goto bad0;
2364	}
2365	sc->sc_txstart_cnt++;
2366	ATH_PCU_UNLOCK(sc);
2367
2368	ATH_TX_LOCK(sc);
2369
2370	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->sc_invalid) {
2371		DPRINTF(sc, ATH_DEBUG_XMIT, "%s: discard frame, %s", __func__,
2372		    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ?
2373			"!running" : "invalid");
2374		m_freem(m);
2375		error = ENETDOWN;
2376		goto bad;
2377	}
2378
2379	/*
2380	 * Enforce how deep the multicast queue can grow.
2381	 *
2382	 * XXX duplicated in ath_tx_start().
2383	 */
2384	if (IEEE80211_IS_MULTICAST(wh->i_addr1)) {
2385		if (sc->sc_cabq->axq_depth + sc->sc_cabq->fifo.axq_depth
2386		    > sc->sc_txq_mcastq_maxdepth) {
2387			sc->sc_stats.ast_tx_mcastq_overflow++;
2388			error = ENOBUFS;
2389		}
2390
2391		if (error != 0) {
2392			m_freem(m);
2393			goto bad;
2394		}
2395	}
2396
2397	/*
2398	 * Grab a TX buffer and associated resources.
2399	 */
2400	bf = ath_getbuf(sc, ATH_BUFTYPE_MGMT);
2401	if (bf == NULL) {
2402		sc->sc_stats.ast_tx_nobuf++;
2403		m_freem(m);
2404		error = ENOBUFS;
2405		goto bad;
2406	}
2407	ATH_KTR(sc, ATH_KTR_TX, 3, "ath_raw_xmit: m=%p, params=%p, bf=%p\n",
2408	    m, params,  bf);
2409
2410	if (params == NULL) {
2411		/*
2412		 * Legacy path; interpret frame contents to decide
2413		 * precisely how to send the frame.
2414		 */
2415		if (ath_tx_start(sc, ni, bf, m)) {
2416			error = EIO;		/* XXX */
2417			goto bad2;
2418		}
2419	} else {
2420		/*
2421		 * Caller supplied explicit parameters to use in
2422		 * sending the frame.
2423		 */
2424		if (ath_tx_raw_start(sc, ni, bf, m, params)) {
2425			error = EIO;		/* XXX */
2426			goto bad2;
2427		}
2428	}
2429	sc->sc_wd_timer = 5;
2430	ifp->if_opackets++;
2431	sc->sc_stats.ast_tx_raw++;
2432
2433	/*
2434	 * Update the TIM - if there's anything queued to the
2435	 * software queue and power save is enabled, we should
2436	 * set the TIM.
2437	 */
2438	ath_tx_update_tim(sc, ni, 1);
2439
2440	ATH_TX_UNLOCK(sc);
2441
2442	ATH_PCU_LOCK(sc);
2443	sc->sc_txstart_cnt--;
2444	ATH_PCU_UNLOCK(sc);
2445
2446	return 0;
2447bad2:
2448	ATH_KTR(sc, ATH_KTR_TX, 3, "ath_raw_xmit: bad2: m=%p, params=%p, "
2449	    "bf=%p",
2450	    m,
2451	    params,
2452	    bf);
2453	ATH_TXBUF_LOCK(sc);
2454	ath_returnbuf_head(sc, bf);
2455	ATH_TXBUF_UNLOCK(sc);
2456bad:
2457
2458	ATH_TX_UNLOCK(sc);
2459
2460	ATH_PCU_LOCK(sc);
2461	sc->sc_txstart_cnt--;
2462	ATH_PCU_UNLOCK(sc);
2463bad0:
2464	ATH_KTR(sc, ATH_KTR_TX, 2, "ath_raw_xmit: bad0: m=%p, params=%p",
2465	    m, params);
2466	ifp->if_oerrors++;
2467	sc->sc_stats.ast_tx_raw_fail++;
2468	ieee80211_free_node(ni);
2469
2470	return error;
2471}
2472
2473/* Some helper functions */
2474
2475/*
2476 * ADDBA (and potentially others) need to be placed in the same
2477 * hardware queue as the TID/node it's relating to. This is so
2478 * it goes out after any pending non-aggregate frames to the
2479 * same node/TID.
2480 *
2481 * If this isn't done, the ADDBA can go out before the frames
2482 * queued in hardware. Even though these frames have a sequence
2483 * number -earlier- than the ADDBA can be transmitted (but
2484 * no frames whose sequence numbers are after the ADDBA should
2485 * be!) they'll arrive after the ADDBA - and the receiving end
2486 * will simply drop them as being out of the BAW.
2487 *
2488 * The frames can't be appended to the TID software queue - it'll
2489 * never be sent out. So these frames have to be directly
2490 * dispatched to the hardware, rather than queued in software.
2491 * So if this function returns true, the TXQ has to be
2492 * overridden and it has to be directly dispatched.
2493 *
2494 * It's a dirty hack, but someone's gotta do it.
2495 */
2496
2497/*
2498 * XXX doesn't belong here!
2499 */
2500static int
2501ieee80211_is_action(struct ieee80211_frame *wh)
2502{
2503	/* Type: Management frame? */
2504	if ((wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) !=
2505	    IEEE80211_FC0_TYPE_MGT)
2506		return 0;
2507
2508	/* Subtype: Action frame? */
2509	if ((wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK) !=
2510	    IEEE80211_FC0_SUBTYPE_ACTION)
2511		return 0;
2512
2513	return 1;
2514}
2515
2516#define	MS(_v, _f)	(((_v) & _f) >> _f##_S)
2517/*
2518 * Return an alternate TID for ADDBA request frames.
2519 *
2520 * Yes, this likely should be done in the net80211 layer.
2521 */
2522static int
2523ath_tx_action_frame_override_queue(struct ath_softc *sc,
2524    struct ieee80211_node *ni,
2525    struct mbuf *m0, int *tid)
2526{
2527	struct ieee80211_frame *wh = mtod(m0, struct ieee80211_frame *);
2528	struct ieee80211_action_ba_addbarequest *ia;
2529	uint8_t *frm;
2530	uint16_t baparamset;
2531
2532	/* Not action frame? Bail */
2533	if (! ieee80211_is_action(wh))
2534		return 0;
2535
2536	/* XXX Not needed for frames we send? */
2537#if 0
2538	/* Correct length? */
2539	if (! ieee80211_parse_action(ni, m))
2540		return 0;
2541#endif
2542
2543	/* Extract out action frame */
2544	frm = (u_int8_t *)&wh[1];
2545	ia = (struct ieee80211_action_ba_addbarequest *) frm;
2546
2547	/* Not ADDBA? Bail */
2548	if (ia->rq_header.ia_category != IEEE80211_ACTION_CAT_BA)
2549		return 0;
2550	if (ia->rq_header.ia_action != IEEE80211_ACTION_BA_ADDBA_REQUEST)
2551		return 0;
2552
2553	/* Extract TID, return it */
2554	baparamset = le16toh(ia->rq_baparamset);
2555	*tid = (int) MS(baparamset, IEEE80211_BAPS_TID);
2556
2557	return 1;
2558}
2559#undef	MS
2560
2561/* Per-node software queue operations */
2562
2563/*
2564 * Add the current packet to the given BAW.
2565 * It is assumed that the current packet
2566 *
2567 * + fits inside the BAW;
2568 * + already has had a sequence number allocated.
2569 *
2570 * Since the BAW status may be modified by both the ath task and
2571 * the net80211/ifnet contexts, the TID must be locked.
2572 */
2573void
2574ath_tx_addto_baw(struct ath_softc *sc, struct ath_node *an,
2575    struct ath_tid *tid, struct ath_buf *bf)
2576{
2577	int index, cindex;
2578	struct ieee80211_tx_ampdu *tap;
2579
2580	ATH_TX_LOCK_ASSERT(sc);
2581
2582	if (bf->bf_state.bfs_isretried)
2583		return;
2584
2585	tap = ath_tx_get_tx_tid(an, tid->tid);
2586
2587	if (! bf->bf_state.bfs_dobaw) {
2588		device_printf(sc->sc_dev,
2589		    "%s: dobaw=0, seqno=%d, window %d:%d\n",
2590		    __func__,
2591		    SEQNO(bf->bf_state.bfs_seqno),
2592		    tap->txa_start,
2593		    tap->txa_wnd);
2594	}
2595
2596	if (bf->bf_state.bfs_addedbaw)
2597		device_printf(sc->sc_dev,
2598		    "%s: re-added? tid=%d, seqno %d; window %d:%d; "
2599		    "baw head=%d tail=%d\n",
2600		    __func__, tid->tid, SEQNO(bf->bf_state.bfs_seqno),
2601		    tap->txa_start, tap->txa_wnd, tid->baw_head,
2602		    tid->baw_tail);
2603
2604	/*
2605	 * Verify that the given sequence number is not outside of the
2606	 * BAW.  Complain loudly if that's the case.
2607	 */
2608	if (! BAW_WITHIN(tap->txa_start, tap->txa_wnd,
2609	    SEQNO(bf->bf_state.bfs_seqno))) {
2610		device_printf(sc->sc_dev,
2611		    "%s: bf=%p: outside of BAW?? tid=%d, seqno %d; window %d:%d; "
2612		    "baw head=%d tail=%d\n",
2613		    __func__, bf, tid->tid, SEQNO(bf->bf_state.bfs_seqno),
2614		    tap->txa_start, tap->txa_wnd, tid->baw_head,
2615		    tid->baw_tail);
2616	}
2617
2618	/*
2619	 * ni->ni_txseqs[] is the currently allocated seqno.
2620	 * the txa state contains the current baw start.
2621	 */
2622	index  = ATH_BA_INDEX(tap->txa_start, SEQNO(bf->bf_state.bfs_seqno));
2623	cindex = (tid->baw_head + index) & (ATH_TID_MAX_BUFS - 1);
2624	DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
2625	    "%s: tid=%d, seqno %d; window %d:%d; index=%d cindex=%d "
2626	    "baw head=%d tail=%d\n",
2627	    __func__, tid->tid, SEQNO(bf->bf_state.bfs_seqno),
2628	    tap->txa_start, tap->txa_wnd, index, cindex, tid->baw_head,
2629	    tid->baw_tail);
2630
2631
2632#if 0
2633	assert(tid->tx_buf[cindex] == NULL);
2634#endif
2635	if (tid->tx_buf[cindex] != NULL) {
2636		device_printf(sc->sc_dev,
2637		    "%s: ba packet dup (index=%d, cindex=%d, "
2638		    "head=%d, tail=%d)\n",
2639		    __func__, index, cindex, tid->baw_head, tid->baw_tail);
2640		device_printf(sc->sc_dev,
2641		    "%s: BA bf: %p; seqno=%d ; new bf: %p; seqno=%d\n",
2642		    __func__,
2643		    tid->tx_buf[cindex],
2644		    SEQNO(tid->tx_buf[cindex]->bf_state.bfs_seqno),
2645		    bf,
2646		    SEQNO(bf->bf_state.bfs_seqno)
2647		);
2648	}
2649	tid->tx_buf[cindex] = bf;
2650
2651	if (index >= ((tid->baw_tail - tid->baw_head) &
2652	    (ATH_TID_MAX_BUFS - 1))) {
2653		tid->baw_tail = cindex;
2654		INCR(tid->baw_tail, ATH_TID_MAX_BUFS);
2655	}
2656}
2657
2658/*
2659 * Flip the BAW buffer entry over from the existing one to the new one.
2660 *
2661 * When software retransmitting a (sub-)frame, it is entirely possible that
2662 * the frame ath_buf is marked as BUSY and can't be immediately reused.
2663 * In that instance the buffer is cloned and the new buffer is used for
2664 * retransmit. We thus need to update the ath_buf slot in the BAW buf
2665 * tracking array to maintain consistency.
2666 */
2667static void
2668ath_tx_switch_baw_buf(struct ath_softc *sc, struct ath_node *an,
2669    struct ath_tid *tid, struct ath_buf *old_bf, struct ath_buf *new_bf)
2670{
2671	int index, cindex;
2672	struct ieee80211_tx_ampdu *tap;
2673	int seqno = SEQNO(old_bf->bf_state.bfs_seqno);
2674
2675	ATH_TX_LOCK_ASSERT(sc);
2676
2677	tap = ath_tx_get_tx_tid(an, tid->tid);
2678	index  = ATH_BA_INDEX(tap->txa_start, seqno);
2679	cindex = (tid->baw_head + index) & (ATH_TID_MAX_BUFS - 1);
2680
2681	/*
2682	 * Just warn for now; if it happens then we should find out
2683	 * about it. It's highly likely the aggregation session will
2684	 * soon hang.
2685	 */
2686	if (old_bf->bf_state.bfs_seqno != new_bf->bf_state.bfs_seqno) {
2687		device_printf(sc->sc_dev, "%s: retransmitted buffer"
2688		    " has mismatching seqno's, BA session may hang.\n",
2689		    __func__);
2690		device_printf(sc->sc_dev, "%s: old seqno=%d, new_seqno=%d\n",
2691		    __func__,
2692		    old_bf->bf_state.bfs_seqno,
2693		    new_bf->bf_state.bfs_seqno);
2694	}
2695
2696	if (tid->tx_buf[cindex] != old_bf) {
2697		device_printf(sc->sc_dev, "%s: ath_buf pointer incorrect; "
2698		    " has m BA session may hang.\n",
2699		    __func__);
2700		device_printf(sc->sc_dev, "%s: old bf=%p, new bf=%p\n",
2701		    __func__,
2702		    old_bf, new_bf);
2703	}
2704
2705	tid->tx_buf[cindex] = new_bf;
2706}
2707
2708/*
2709 * seq_start - left edge of BAW
2710 * seq_next - current/next sequence number to allocate
2711 *
2712 * Since the BAW status may be modified by both the ath task and
2713 * the net80211/ifnet contexts, the TID must be locked.
2714 */
2715static void
2716ath_tx_update_baw(struct ath_softc *sc, struct ath_node *an,
2717    struct ath_tid *tid, const struct ath_buf *bf)
2718{
2719	int index, cindex;
2720	struct ieee80211_tx_ampdu *tap;
2721	int seqno = SEQNO(bf->bf_state.bfs_seqno);
2722
2723	ATH_TX_LOCK_ASSERT(sc);
2724
2725	tap = ath_tx_get_tx_tid(an, tid->tid);
2726	index  = ATH_BA_INDEX(tap->txa_start, seqno);
2727	cindex = (tid->baw_head + index) & (ATH_TID_MAX_BUFS - 1);
2728
2729	DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
2730	    "%s: tid=%d, baw=%d:%d, seqno=%d, index=%d, cindex=%d, "
2731	    "baw head=%d, tail=%d\n",
2732	    __func__, tid->tid, tap->txa_start, tap->txa_wnd, seqno, index,
2733	    cindex, tid->baw_head, tid->baw_tail);
2734
2735	/*
2736	 * If this occurs then we have a big problem - something else
2737	 * has slid tap->txa_start along without updating the BAW
2738	 * tracking start/end pointers. Thus the TX BAW state is now
2739	 * completely busted.
2740	 *
2741	 * But for now, since I haven't yet fixed TDMA and buffer cloning,
2742	 * it's quite possible that a cloned buffer is making its way
2743	 * here and causing it to fire off. Disable TDMA for now.
2744	 */
2745	if (tid->tx_buf[cindex] != bf) {
2746		device_printf(sc->sc_dev,
2747		    "%s: comp bf=%p, seq=%d; slot bf=%p, seqno=%d\n",
2748		    __func__,
2749		    bf, SEQNO(bf->bf_state.bfs_seqno),
2750		    tid->tx_buf[cindex],
2751		    (tid->tx_buf[cindex] != NULL) ?
2752		      SEQNO(tid->tx_buf[cindex]->bf_state.bfs_seqno) : -1);
2753	}
2754
2755	tid->tx_buf[cindex] = NULL;
2756
2757	while (tid->baw_head != tid->baw_tail &&
2758	    !tid->tx_buf[tid->baw_head]) {
2759		INCR(tap->txa_start, IEEE80211_SEQ_RANGE);
2760		INCR(tid->baw_head, ATH_TID_MAX_BUFS);
2761	}
2762	DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
2763	    "%s: baw is now %d:%d, baw head=%d\n",
2764	    __func__, tap->txa_start, tap->txa_wnd, tid->baw_head);
2765}
2766
2767static void
2768ath_tx_leak_count_update(struct ath_softc *sc, struct ath_tid *tid,
2769    struct ath_buf *bf)
2770{
2771	struct ieee80211_frame *wh;
2772
2773	ATH_TX_LOCK_ASSERT(sc);
2774
2775	if (tid->an->an_leak_count > 0) {
2776		wh = mtod(bf->bf_m, struct ieee80211_frame *);
2777
2778		/*
2779		 * Update MORE based on the software/net80211 queue states.
2780		 */
2781		if ((tid->an->an_stack_psq > 0)
2782		    || (tid->an->an_swq_depth > 0))
2783			wh->i_fc[1] |= IEEE80211_FC1_MORE_DATA;
2784		else
2785			wh->i_fc[1] &= ~IEEE80211_FC1_MORE_DATA;
2786
2787		DPRINTF(sc, ATH_DEBUG_NODE_PWRSAVE,
2788		    "%s: %6D: leak count = %d, psq=%d, swq=%d, MORE=%d\n",
2789		    __func__,
2790		    tid->an->an_node.ni_macaddr,
2791		    ":",
2792		    tid->an->an_leak_count,
2793		    tid->an->an_stack_psq,
2794		    tid->an->an_swq_depth,
2795		    !! (wh->i_fc[1] & IEEE80211_FC1_MORE_DATA));
2796
2797		/*
2798		 * Re-sync the underlying buffer.
2799		 */
2800		bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap,
2801		    BUS_DMASYNC_PREWRITE);
2802
2803		tid->an->an_leak_count --;
2804	}
2805}
2806
2807static int
2808ath_tx_tid_can_tx_or_sched(struct ath_softc *sc, struct ath_tid *tid)
2809{
2810
2811	ATH_TX_LOCK_ASSERT(sc);
2812
2813	if (tid->an->an_leak_count > 0) {
2814		return (1);
2815	}
2816	if (tid->paused)
2817		return (0);
2818	return (1);
2819}
2820
2821/*
2822 * Mark the current node/TID as ready to TX.
2823 *
2824 * This is done to make it easy for the software scheduler to
2825 * find which nodes have data to send.
2826 *
2827 * The TXQ lock must be held.
2828 */
2829void
2830ath_tx_tid_sched(struct ath_softc *sc, struct ath_tid *tid)
2831{
2832	struct ath_txq *txq = sc->sc_ac2q[tid->ac];
2833
2834	ATH_TX_LOCK_ASSERT(sc);
2835
2836	/*
2837	 * If we are leaking out a frame to this destination
2838	 * for PS-POLL, ensure that we allow scheduling to
2839	 * occur.
2840	 */
2841	if (! ath_tx_tid_can_tx_or_sched(sc, tid))
2842		return;		/* paused, can't schedule yet */
2843
2844	if (tid->sched)
2845		return;		/* already scheduled */
2846
2847	tid->sched = 1;
2848
2849#if 0
2850	/*
2851	 * If this is a sleeping node we're leaking to, given
2852	 * it a higher priority.  This is so bad for QoS it hurts.
2853	 */
2854	if (tid->an->an_leak_count) {
2855		TAILQ_INSERT_HEAD(&txq->axq_tidq, tid, axq_qelem);
2856	} else {
2857		TAILQ_INSERT_TAIL(&txq->axq_tidq, tid, axq_qelem);
2858	}
2859#endif
2860
2861	/*
2862	 * We can't do the above - it'll confuse the TXQ software
2863	 * scheduler which will keep checking the _head_ TID
2864	 * in the list to see if it has traffic.  If we queue
2865	 * a TID to the head of the list and it doesn't transmit,
2866	 * we'll check it again.
2867	 *
2868	 * So, get the rest of this leaking frames support working
2869	 * and reliable first and _then_ optimise it so they're
2870	 * pushed out in front of any other pending software
2871	 * queued nodes.
2872	 */
2873	TAILQ_INSERT_TAIL(&txq->axq_tidq, tid, axq_qelem);
2874}
2875
2876/*
2877 * Mark the current node as no longer needing to be polled for
2878 * TX packets.
2879 *
2880 * The TXQ lock must be held.
2881 */
2882static void
2883ath_tx_tid_unsched(struct ath_softc *sc, struct ath_tid *tid)
2884{
2885	struct ath_txq *txq = sc->sc_ac2q[tid->ac];
2886
2887	ATH_TX_LOCK_ASSERT(sc);
2888
2889	if (tid->sched == 0)
2890		return;
2891
2892	tid->sched = 0;
2893	TAILQ_REMOVE(&txq->axq_tidq, tid, axq_qelem);
2894}
2895
2896/*
2897 * Assign a sequence number manually to the given frame.
2898 *
2899 * This should only be called for A-MPDU TX frames.
2900 */
2901static ieee80211_seq
2902ath_tx_tid_seqno_assign(struct ath_softc *sc, struct ieee80211_node *ni,
2903    struct ath_buf *bf, struct mbuf *m0)
2904{
2905	struct ieee80211_frame *wh;
2906	int tid, pri;
2907	ieee80211_seq seqno;
2908	uint8_t subtype;
2909
2910	/* TID lookup */
2911	wh = mtod(m0, struct ieee80211_frame *);
2912	pri = M_WME_GETAC(m0);			/* honor classification */
2913	tid = WME_AC_TO_TID(pri);
2914	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: pri=%d, tid=%d, qos has seq=%d\n",
2915	    __func__, pri, tid, IEEE80211_QOS_HAS_SEQ(wh));
2916
2917	/* XXX Is it a control frame? Ignore */
2918
2919	/* Does the packet require a sequence number? */
2920	if (! IEEE80211_QOS_HAS_SEQ(wh))
2921		return -1;
2922
2923	ATH_TX_LOCK_ASSERT(sc);
2924
2925	/*
2926	 * Is it a QOS NULL Data frame? Give it a sequence number from
2927	 * the default TID (IEEE80211_NONQOS_TID.)
2928	 *
2929	 * The RX path of everything I've looked at doesn't include the NULL
2930	 * data frame sequence number in the aggregation state updates, so
2931	 * assigning it a sequence number there will cause a BAW hole on the
2932	 * RX side.
2933	 */
2934	subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
2935	if (subtype == IEEE80211_FC0_SUBTYPE_QOS_NULL) {
2936		/* XXX no locking for this TID? This is a bit of a problem. */
2937		seqno = ni->ni_txseqs[IEEE80211_NONQOS_TID];
2938		INCR(ni->ni_txseqs[IEEE80211_NONQOS_TID], IEEE80211_SEQ_RANGE);
2939	} else {
2940		/* Manually assign sequence number */
2941		seqno = ni->ni_txseqs[tid];
2942		INCR(ni->ni_txseqs[tid], IEEE80211_SEQ_RANGE);
2943	}
2944	*(uint16_t *)&wh->i_seq[0] = htole16(seqno << IEEE80211_SEQ_SEQ_SHIFT);
2945	M_SEQNO_SET(m0, seqno);
2946
2947	/* Return so caller can do something with it if needed */
2948	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s:  -> seqno=%d\n", __func__, seqno);
2949	return seqno;
2950}
2951
2952/*
2953 * Attempt to direct dispatch an aggregate frame to hardware.
2954 * If the frame is out of BAW, queue.
2955 * Otherwise, schedule it as a single frame.
2956 */
2957static void
2958ath_tx_xmit_aggr(struct ath_softc *sc, struct ath_node *an,
2959    struct ath_txq *txq, struct ath_buf *bf)
2960{
2961	struct ath_tid *tid = &an->an_tid[bf->bf_state.bfs_tid];
2962	struct ieee80211_tx_ampdu *tap;
2963
2964	ATH_TX_LOCK_ASSERT(sc);
2965
2966	tap = ath_tx_get_tx_tid(an, tid->tid);
2967
2968	/* paused? queue */
2969	if (! ath_tx_tid_can_tx_or_sched(sc, tid)) {
2970		ATH_TID_INSERT_HEAD(tid, bf, bf_list);
2971		/* XXX don't sched - we're paused! */
2972		return;
2973	}
2974
2975	/* outside baw? queue */
2976	if (bf->bf_state.bfs_dobaw &&
2977	    (! BAW_WITHIN(tap->txa_start, tap->txa_wnd,
2978	    SEQNO(bf->bf_state.bfs_seqno)))) {
2979		ATH_TID_INSERT_HEAD(tid, bf, bf_list);
2980		ath_tx_tid_sched(sc, tid);
2981		return;
2982	}
2983
2984	/*
2985	 * This is a temporary check and should be removed once
2986	 * all the relevant code paths have been fixed.
2987	 *
2988	 * During aggregate retries, it's possible that the head
2989	 * frame will fail (which has the bfs_aggr and bfs_nframes
2990	 * fields set for said aggregate) and will be retried as
2991	 * a single frame.  In this instance, the values should
2992	 * be reset or the completion code will get upset with you.
2993	 */
2994	if (bf->bf_state.bfs_aggr != 0 || bf->bf_state.bfs_nframes > 1) {
2995		device_printf(sc->sc_dev, "%s: bfs_aggr=%d, bfs_nframes=%d\n",
2996		    __func__,
2997		    bf->bf_state.bfs_aggr,
2998		    bf->bf_state.bfs_nframes);
2999		bf->bf_state.bfs_aggr = 0;
3000		bf->bf_state.bfs_nframes = 1;
3001	}
3002
3003	/* Update CLRDMASK just before this frame is queued */
3004	ath_tx_update_clrdmask(sc, tid, bf);
3005
3006	/* Direct dispatch to hardware */
3007	ath_tx_do_ratelookup(sc, bf);
3008	ath_tx_calc_duration(sc, bf);
3009	ath_tx_calc_protection(sc, bf);
3010	ath_tx_set_rtscts(sc, bf);
3011	ath_tx_rate_fill_rcflags(sc, bf);
3012	ath_tx_setds(sc, bf);
3013
3014	/* Statistics */
3015	sc->sc_aggr_stats.aggr_low_hwq_single_pkt++;
3016
3017	/* Track per-TID hardware queue depth correctly */
3018	tid->hwq_depth++;
3019
3020	/* Add to BAW */
3021	if (bf->bf_state.bfs_dobaw) {
3022		ath_tx_addto_baw(sc, an, tid, bf);
3023		bf->bf_state.bfs_addedbaw = 1;
3024	}
3025
3026	/* Set completion handler, multi-frame aggregate or not */
3027	bf->bf_comp = ath_tx_aggr_comp;
3028
3029	/*
3030	 * Update the current leak count if
3031	 * we're leaking frames; and set the
3032	 * MORE flag as appropriate.
3033	 */
3034	ath_tx_leak_count_update(sc, tid, bf);
3035
3036	/* Hand off to hardware */
3037	ath_tx_handoff(sc, txq, bf);
3038}
3039
3040/*
3041 * Attempt to send the packet.
3042 * If the queue isn't busy, direct-dispatch.
3043 * If the queue is busy enough, queue the given packet on the
3044 *  relevant software queue.
3045 */
3046void
3047ath_tx_swq(struct ath_softc *sc, struct ieee80211_node *ni,
3048    struct ath_txq *txq, int queue_to_head, struct ath_buf *bf)
3049{
3050	struct ath_node *an = ATH_NODE(ni);
3051	struct ieee80211_frame *wh;
3052	struct ath_tid *atid;
3053	int pri, tid;
3054	struct mbuf *m0 = bf->bf_m;
3055
3056	ATH_TX_LOCK_ASSERT(sc);
3057
3058	/* Fetch the TID - non-QoS frames get assigned to TID 16 */
3059	wh = mtod(m0, struct ieee80211_frame *);
3060	pri = ath_tx_getac(sc, m0);
3061	tid = ath_tx_gettid(sc, m0);
3062	atid = &an->an_tid[tid];
3063
3064	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: bf=%p, pri=%d, tid=%d, qos=%d\n",
3065	    __func__, bf, pri, tid, IEEE80211_QOS_HAS_SEQ(wh));
3066
3067	/* Set local packet state, used to queue packets to hardware */
3068	/* XXX potentially duplicate info, re-check */
3069	bf->bf_state.bfs_tid = tid;
3070	bf->bf_state.bfs_tx_queue = txq->axq_qnum;
3071	bf->bf_state.bfs_pri = pri;
3072
3073	/*
3074	 * If the hardware queue isn't busy, queue it directly.
3075	 * If the hardware queue is busy, queue it.
3076	 * If the TID is paused or the traffic it outside BAW, software
3077	 * queue it.
3078	 *
3079	 * If the node is in power-save and we're leaking a frame,
3080	 * leak a single frame.
3081	 */
3082	if (! ath_tx_tid_can_tx_or_sched(sc, atid)) {
3083		/* TID is paused, queue */
3084		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: paused\n", __func__);
3085		/*
3086		 * If the caller requested that it be sent at a high
3087		 * priority, queue it at the head of the list.
3088		 */
3089		if (queue_to_head)
3090			ATH_TID_INSERT_HEAD(atid, bf, bf_list);
3091		else
3092			ATH_TID_INSERT_TAIL(atid, bf, bf_list);
3093	} else if (ath_tx_ampdu_pending(sc, an, tid)) {
3094		/* AMPDU pending; queue */
3095		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: pending\n", __func__);
3096		ATH_TID_INSERT_TAIL(atid, bf, bf_list);
3097		/* XXX sched? */
3098	} else if (ath_tx_ampdu_running(sc, an, tid)) {
3099		/* AMPDU running, attempt direct dispatch if possible */
3100
3101		/*
3102		 * Always queue the frame to the tail of the list.
3103		 */
3104		ATH_TID_INSERT_TAIL(atid, bf, bf_list);
3105
3106		/*
3107		 * If the hardware queue isn't busy, direct dispatch
3108		 * the head frame in the list.  Don't schedule the
3109		 * TID - let it build some more frames first?
3110		 *
3111		 * When running A-MPDU, always just check the hardware
3112		 * queue depth against the aggregate frame limit.
3113		 * We don't want to burst a large number of single frames
3114		 * out to the hardware; we want to aggressively hold back.
3115		 *
3116		 * Otherwise, schedule the TID.
3117		 */
3118		/* XXX TXQ locking */
3119		if (txq->axq_depth + txq->fifo.axq_depth < sc->sc_hwq_limit_aggr) {
3120			bf = ATH_TID_FIRST(atid);
3121			ATH_TID_REMOVE(atid, bf, bf_list);
3122
3123			/*
3124			 * Ensure it's definitely treated as a non-AMPDU
3125			 * frame - this information may have been left
3126			 * over from a previous attempt.
3127			 */
3128			bf->bf_state.bfs_aggr = 0;
3129			bf->bf_state.bfs_nframes = 1;
3130
3131			/* Queue to the hardware */
3132			ath_tx_xmit_aggr(sc, an, txq, bf);
3133			DPRINTF(sc, ATH_DEBUG_SW_TX,
3134			    "%s: xmit_aggr\n",
3135			    __func__);
3136		} else {
3137			DPRINTF(sc, ATH_DEBUG_SW_TX,
3138			    "%s: ampdu; swq'ing\n",
3139			    __func__);
3140
3141			ath_tx_tid_sched(sc, atid);
3142		}
3143	/*
3144	 * If we're not doing A-MPDU, be prepared to direct dispatch
3145	 * up to both limits if possible.  This particular corner
3146	 * case may end up with packet starvation between aggregate
3147	 * traffic and non-aggregate traffic: we wnat to ensure
3148	 * that non-aggregate stations get a few frames queued to the
3149	 * hardware before the aggregate station(s) get their chance.
3150	 *
3151	 * So if you only ever see a couple of frames direct dispatched
3152	 * to the hardware from a non-AMPDU client, check both here
3153	 * and in the software queue dispatcher to ensure that those
3154	 * non-AMPDU stations get a fair chance to transmit.
3155	 */
3156	/* XXX TXQ locking */
3157	} else if ((txq->axq_depth + txq->fifo.axq_depth < sc->sc_hwq_limit_nonaggr) &&
3158		    (txq->axq_aggr_depth < sc->sc_hwq_limit_aggr)) {
3159		/* AMPDU not running, attempt direct dispatch */
3160		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: xmit_normal\n", __func__);
3161		/* See if clrdmask needs to be set */
3162		ath_tx_update_clrdmask(sc, atid, bf);
3163
3164		/*
3165		 * Update the current leak count if
3166		 * we're leaking frames; and set the
3167		 * MORE flag as appropriate.
3168		 */
3169		ath_tx_leak_count_update(sc, atid, bf);
3170
3171		/*
3172		 * Dispatch the frame.
3173		 */
3174		ath_tx_xmit_normal(sc, txq, bf);
3175	} else {
3176		/* Busy; queue */
3177		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: swq'ing\n", __func__);
3178		ATH_TID_INSERT_TAIL(atid, bf, bf_list);
3179		ath_tx_tid_sched(sc, atid);
3180	}
3181}
3182
3183/*
3184 * Only set the clrdmask bit if none of the nodes are currently
3185 * filtered.
3186 *
3187 * XXX TODO: go through all the callers and check to see
3188 * which are being called in the context of looping over all
3189 * TIDs (eg, if all tids are being paused, resumed, etc.)
3190 * That'll avoid O(n^2) complexity here.
3191 */
3192static void
3193ath_tx_set_clrdmask(struct ath_softc *sc, struct ath_node *an)
3194{
3195	int i;
3196
3197	ATH_TX_LOCK_ASSERT(sc);
3198
3199	for (i = 0; i < IEEE80211_TID_SIZE; i++) {
3200		if (an->an_tid[i].isfiltered == 1)
3201			return;
3202	}
3203	an->clrdmask = 1;
3204}
3205
3206/*
3207 * Configure the per-TID node state.
3208 *
3209 * This likely belongs in if_ath_node.c but I can't think of anywhere
3210 * else to put it just yet.
3211 *
3212 * This sets up the SLISTs and the mutex as appropriate.
3213 */
3214void
3215ath_tx_tid_init(struct ath_softc *sc, struct ath_node *an)
3216{
3217	int i, j;
3218	struct ath_tid *atid;
3219
3220	for (i = 0; i < IEEE80211_TID_SIZE; i++) {
3221		atid = &an->an_tid[i];
3222
3223		/* XXX now with this bzer(), is the field 0'ing needed? */
3224		bzero(atid, sizeof(*atid));
3225
3226		TAILQ_INIT(&atid->tid_q);
3227		TAILQ_INIT(&atid->filtq.tid_q);
3228		atid->tid = i;
3229		atid->an = an;
3230		for (j = 0; j < ATH_TID_MAX_BUFS; j++)
3231			atid->tx_buf[j] = NULL;
3232		atid->baw_head = atid->baw_tail = 0;
3233		atid->paused = 0;
3234		atid->sched = 0;
3235		atid->hwq_depth = 0;
3236		atid->cleanup_inprogress = 0;
3237		if (i == IEEE80211_NONQOS_TID)
3238			atid->ac = ATH_NONQOS_TID_AC;
3239		else
3240			atid->ac = TID_TO_WME_AC(i);
3241	}
3242	an->clrdmask = 1;	/* Always start by setting this bit */
3243}
3244
3245/*
3246 * Pause the current TID. This stops packets from being transmitted
3247 * on it.
3248 *
3249 * Since this is also called from upper layers as well as the driver,
3250 * it will get the TID lock.
3251 */
3252static void
3253ath_tx_tid_pause(struct ath_softc *sc, struct ath_tid *tid)
3254{
3255
3256	ATH_TX_LOCK_ASSERT(sc);
3257	tid->paused++;
3258	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, "%s: paused = %d\n",
3259	    __func__, tid->paused);
3260}
3261
3262/*
3263 * Unpause the current TID, and schedule it if needed.
3264 */
3265static void
3266ath_tx_tid_resume(struct ath_softc *sc, struct ath_tid *tid)
3267{
3268	ATH_TX_LOCK_ASSERT(sc);
3269
3270	/*
3271	 * There's some odd places where ath_tx_tid_resume() is called
3272	 * when it shouldn't be; this works around that particular issue
3273	 * until it's actually resolved.
3274	 */
3275	if (tid->paused == 0) {
3276		device_printf(sc->sc_dev, "%s: %6D: paused=0?\n",
3277		    __func__,
3278		    tid->an->an_node.ni_macaddr,
3279		    ":");
3280	} else {
3281		tid->paused--;
3282	}
3283
3284	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, "%s: unpaused = %d\n",
3285	    __func__, tid->paused);
3286
3287	if (tid->paused)
3288		return;
3289
3290	/*
3291	 * Override the clrdmask configuration for the next frame
3292	 * from this TID, just to get the ball rolling.
3293	 */
3294	ath_tx_set_clrdmask(sc, tid->an);
3295
3296	if (tid->axq_depth == 0)
3297		return;
3298
3299	/* XXX isfiltered shouldn't ever be 0 at this point */
3300	if (tid->isfiltered == 1) {
3301		device_printf(sc->sc_dev, "%s: filtered?!\n", __func__);
3302		return;
3303	}
3304
3305	ath_tx_tid_sched(sc, tid);
3306
3307	/*
3308	 * Queue the software TX scheduler.
3309	 */
3310	ath_tx_swq_kick(sc);
3311}
3312
3313/*
3314 * Add the given ath_buf to the TID filtered frame list.
3315 * This requires the TID be filtered.
3316 */
3317static void
3318ath_tx_tid_filt_addbuf(struct ath_softc *sc, struct ath_tid *tid,
3319    struct ath_buf *bf)
3320{
3321
3322	ATH_TX_LOCK_ASSERT(sc);
3323
3324	if (! tid->isfiltered)
3325		device_printf(sc->sc_dev, "%s: not filtered?!\n", __func__);
3326
3327	DPRINTF(sc, ATH_DEBUG_SW_TX_FILT, "%s: bf=%p\n", __func__, bf);
3328
3329	/* Set the retry bit and bump the retry counter */
3330	ath_tx_set_retry(sc, bf);
3331	sc->sc_stats.ast_tx_swfiltered++;
3332
3333	ATH_TID_FILT_INSERT_TAIL(tid, bf, bf_list);
3334}
3335
3336/*
3337 * Handle a completed filtered frame from the given TID.
3338 * This just enables/pauses the filtered frame state if required
3339 * and appends the filtered frame to the filtered queue.
3340 */
3341static void
3342ath_tx_tid_filt_comp_buf(struct ath_softc *sc, struct ath_tid *tid,
3343    struct ath_buf *bf)
3344{
3345
3346	ATH_TX_LOCK_ASSERT(sc);
3347
3348	if (! tid->isfiltered) {
3349		DPRINTF(sc, ATH_DEBUG_SW_TX_FILT, "%s: filter transition\n",
3350		    __func__);
3351		tid->isfiltered = 1;
3352		ath_tx_tid_pause(sc, tid);
3353	}
3354
3355	/* Add the frame to the filter queue */
3356	ath_tx_tid_filt_addbuf(sc, tid, bf);
3357}
3358
3359/*
3360 * Complete the filtered frame TX completion.
3361 *
3362 * If there are no more frames in the hardware queue, unpause/unfilter
3363 * the TID if applicable.  Otherwise we will wait for a node PS transition
3364 * to unfilter.
3365 */
3366static void
3367ath_tx_tid_filt_comp_complete(struct ath_softc *sc, struct ath_tid *tid)
3368{
3369	struct ath_buf *bf;
3370
3371	ATH_TX_LOCK_ASSERT(sc);
3372
3373	if (tid->hwq_depth != 0)
3374		return;
3375
3376	DPRINTF(sc, ATH_DEBUG_SW_TX_FILT, "%s: hwq=0, transition back\n",
3377	    __func__);
3378	tid->isfiltered = 0;
3379	/* XXX ath_tx_tid_resume() also calls ath_tx_set_clrdmask()! */
3380	ath_tx_set_clrdmask(sc, tid->an);
3381
3382	/* XXX this is really quite inefficient */
3383	while ((bf = ATH_TID_FILT_LAST(tid, ath_bufhead_s)) != NULL) {
3384		ATH_TID_FILT_REMOVE(tid, bf, bf_list);
3385		ATH_TID_INSERT_HEAD(tid, bf, bf_list);
3386	}
3387
3388	ath_tx_tid_resume(sc, tid);
3389}
3390
3391/*
3392 * Called when a single (aggregate or otherwise) frame is completed.
3393 *
3394 * Returns 1 if the buffer could be added to the filtered list
3395 * (cloned or otherwise), 0 if the buffer couldn't be added to the
3396 * filtered list (failed clone; expired retry) and the caller should
3397 * free it and handle it like a failure (eg by sending a BAR.)
3398 */
3399static int
3400ath_tx_tid_filt_comp_single(struct ath_softc *sc, struct ath_tid *tid,
3401    struct ath_buf *bf)
3402{
3403	struct ath_buf *nbf;
3404	int retval;
3405
3406	ATH_TX_LOCK_ASSERT(sc);
3407
3408	/*
3409	 * Don't allow a filtered frame to live forever.
3410	 */
3411	if (bf->bf_state.bfs_retries > SWMAX_RETRIES) {
3412		sc->sc_stats.ast_tx_swretrymax++;
3413		DPRINTF(sc, ATH_DEBUG_SW_TX_FILT,
3414		    "%s: bf=%p, seqno=%d, exceeded retries\n",
3415		    __func__,
3416		    bf,
3417		    bf->bf_state.bfs_seqno);
3418		return (0);
3419	}
3420
3421	/*
3422	 * A busy buffer can't be added to the retry list.
3423	 * It needs to be cloned.
3424	 */
3425	if (bf->bf_flags & ATH_BUF_BUSY) {
3426		nbf = ath_tx_retry_clone(sc, tid->an, tid, bf);
3427		DPRINTF(sc, ATH_DEBUG_SW_TX_FILT,
3428		    "%s: busy buffer clone: %p -> %p\n",
3429		    __func__, bf, nbf);
3430	} else {
3431		nbf = bf;
3432	}
3433
3434	if (nbf == NULL) {
3435		DPRINTF(sc, ATH_DEBUG_SW_TX_FILT,
3436		    "%s: busy buffer couldn't be cloned (%p)!\n",
3437		    __func__, bf);
3438		retval = 1;
3439	} else {
3440		ath_tx_tid_filt_comp_buf(sc, tid, nbf);
3441		retval = 0;
3442	}
3443	ath_tx_tid_filt_comp_complete(sc, tid);
3444
3445	return (retval);
3446}
3447
3448static void
3449ath_tx_tid_filt_comp_aggr(struct ath_softc *sc, struct ath_tid *tid,
3450    struct ath_buf *bf_first, ath_bufhead *bf_q)
3451{
3452	struct ath_buf *bf, *bf_next, *nbf;
3453
3454	ATH_TX_LOCK_ASSERT(sc);
3455
3456	bf = bf_first;
3457	while (bf) {
3458		bf_next = bf->bf_next;
3459		bf->bf_next = NULL;	/* Remove it from the aggr list */
3460
3461		/*
3462		 * Don't allow a filtered frame to live forever.
3463		 */
3464		if (bf->bf_state.bfs_retries > SWMAX_RETRIES) {
3465			sc->sc_stats.ast_tx_swretrymax++;
3466			DPRINTF(sc, ATH_DEBUG_SW_TX_FILT,
3467			    "%s: bf=%p, seqno=%d, exceeded retries\n",
3468			    __func__,
3469			    bf,
3470			    bf->bf_state.bfs_seqno);
3471			TAILQ_INSERT_TAIL(bf_q, bf, bf_list);
3472			goto next;
3473		}
3474
3475		if (bf->bf_flags & ATH_BUF_BUSY) {
3476			nbf = ath_tx_retry_clone(sc, tid->an, tid, bf);
3477			DPRINTF(sc, ATH_DEBUG_SW_TX_FILT,
3478			    "%s: busy buffer cloned: %p -> %p",
3479			    __func__, bf, nbf);
3480		} else {
3481			nbf = bf;
3482		}
3483
3484		/*
3485		 * If the buffer couldn't be cloned, add it to bf_q;
3486		 * the caller will free the buffer(s) as required.
3487		 */
3488		if (nbf == NULL) {
3489			DPRINTF(sc, ATH_DEBUG_SW_TX_FILT,
3490			    "%s: buffer couldn't be cloned! (%p)\n",
3491			    __func__, bf);
3492			TAILQ_INSERT_TAIL(bf_q, bf, bf_list);
3493		} else {
3494			ath_tx_tid_filt_comp_buf(sc, tid, nbf);
3495		}
3496next:
3497		bf = bf_next;
3498	}
3499
3500	ath_tx_tid_filt_comp_complete(sc, tid);
3501}
3502
3503/*
3504 * Suspend the queue because we need to TX a BAR.
3505 */
3506static void
3507ath_tx_tid_bar_suspend(struct ath_softc *sc, struct ath_tid *tid)
3508{
3509
3510	ATH_TX_LOCK_ASSERT(sc);
3511
3512	DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
3513	    "%s: tid=%d, bar_wait=%d, bar_tx=%d, called\n",
3514	    __func__,
3515	    tid->tid,
3516	    tid->bar_wait,
3517	    tid->bar_tx);
3518
3519	/* We shouldn't be called when bar_tx is 1 */
3520	if (tid->bar_tx) {
3521		device_printf(sc->sc_dev, "%s: bar_tx is 1?!\n",
3522		    __func__);
3523	}
3524
3525	/* If we've already been called, just be patient. */
3526	if (tid->bar_wait)
3527		return;
3528
3529	/* Wait! */
3530	tid->bar_wait = 1;
3531
3532	/* Only one pause, no matter how many frames fail */
3533	ath_tx_tid_pause(sc, tid);
3534}
3535
3536/*
3537 * We've finished with BAR handling - either we succeeded or
3538 * failed. Either way, unsuspend TX.
3539 */
3540static void
3541ath_tx_tid_bar_unsuspend(struct ath_softc *sc, struct ath_tid *tid)
3542{
3543
3544	ATH_TX_LOCK_ASSERT(sc);
3545
3546	DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
3547	    "%s: %6D: TID=%d, called\n",
3548	    __func__,
3549	    tid->an->an_node.ni_macaddr,
3550	    ":",
3551	    tid->tid);
3552
3553	if (tid->bar_tx == 0 || tid->bar_wait == 0) {
3554		device_printf(sc->sc_dev,
3555		    "%s: %6D: TID=%d, bar_tx=%d, bar_wait=%d: ?\n",
3556		    __func__,
3557		    tid->an->an_node.ni_macaddr,
3558		    ":",
3559		    tid->tid,
3560		    tid->bar_tx,
3561		    tid->bar_wait);
3562	}
3563
3564	tid->bar_tx = tid->bar_wait = 0;
3565	ath_tx_tid_resume(sc, tid);
3566}
3567
3568/*
3569 * Return whether we're ready to TX a BAR frame.
3570 *
3571 * Requires the TID lock be held.
3572 */
3573static int
3574ath_tx_tid_bar_tx_ready(struct ath_softc *sc, struct ath_tid *tid)
3575{
3576
3577	ATH_TX_LOCK_ASSERT(sc);
3578
3579	if (tid->bar_wait == 0 || tid->hwq_depth > 0)
3580		return (0);
3581
3582	DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
3583	    "%s: %6D: TID=%d, bar ready\n",
3584	    __func__,
3585	    tid->an->an_node.ni_macaddr,
3586	    ":",
3587	    tid->tid);
3588
3589	return (1);
3590}
3591
3592/*
3593 * Check whether the current TID is ready to have a BAR
3594 * TXed and if so, do the TX.
3595 *
3596 * Since the TID/TXQ lock can't be held during a call to
3597 * ieee80211_send_bar(), we have to do the dirty thing of unlocking it,
3598 * sending the BAR and locking it again.
3599 *
3600 * Eventually, the code to send the BAR should be broken out
3601 * from this routine so the lock doesn't have to be reacquired
3602 * just to be immediately dropped by the caller.
3603 */
3604static void
3605ath_tx_tid_bar_tx(struct ath_softc *sc, struct ath_tid *tid)
3606{
3607	struct ieee80211_tx_ampdu *tap;
3608
3609	ATH_TX_LOCK_ASSERT(sc);
3610
3611	DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
3612	    "%s: %6D: TID=%d, called\n",
3613	    __func__,
3614	    tid->an->an_node.ni_macaddr,
3615	    ":",
3616	    tid->tid);
3617
3618	tap = ath_tx_get_tx_tid(tid->an, tid->tid);
3619
3620	/*
3621	 * This is an error condition!
3622	 */
3623	if (tid->bar_wait == 0 || tid->bar_tx == 1) {
3624		device_printf(sc->sc_dev,
3625		    "%s: %6D: TID=%d, bar_tx=%d, bar_wait=%d: ?\n",
3626		    __func__,
3627		    tid->an->an_node.ni_macaddr,
3628		    ":",
3629		    tid->tid,
3630		    tid->bar_tx,
3631		    tid->bar_wait);
3632		return;
3633	}
3634
3635	/* Don't do anything if we still have pending frames */
3636	if (tid->hwq_depth > 0) {
3637		DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
3638		    "%s: %6D: TID=%d, hwq_depth=%d, waiting\n",
3639		    __func__,
3640		    tid->an->an_node.ni_macaddr,
3641		    ":",
3642		    tid->tid,
3643		    tid->hwq_depth);
3644		return;
3645	}
3646
3647	/* We're now about to TX */
3648	tid->bar_tx = 1;
3649
3650	/*
3651	 * Override the clrdmask configuration for the next frame,
3652	 * just to get the ball rolling.
3653	 */
3654	ath_tx_set_clrdmask(sc, tid->an);
3655
3656	/*
3657	 * Calculate new BAW left edge, now that all frames have either
3658	 * succeeded or failed.
3659	 *
3660	 * XXX verify this is _actually_ the valid value to begin at!
3661	 */
3662	DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
3663	    "%s: %6D: TID=%d, new BAW left edge=%d\n",
3664	    __func__,
3665	    tid->an->an_node.ni_macaddr,
3666	    ":",
3667	    tid->tid,
3668	    tap->txa_start);
3669
3670	/* Try sending the BAR frame */
3671	/* We can't hold the lock here! */
3672
3673	ATH_TX_UNLOCK(sc);
3674	if (ieee80211_send_bar(&tid->an->an_node, tap, tap->txa_start) == 0) {
3675		/* Success? Now we wait for notification that it's done */
3676		ATH_TX_LOCK(sc);
3677		return;
3678	}
3679
3680	/* Failure? For now, warn loudly and continue */
3681	ATH_TX_LOCK(sc);
3682	device_printf(sc->sc_dev,
3683	    "%s: %6D: TID=%d, failed to TX BAR, continue!\n",
3684	    __func__,
3685	    tid->an->an_node.ni_macaddr,
3686	    ":",
3687	    tid->tid);
3688	ath_tx_tid_bar_unsuspend(sc, tid);
3689}
3690
3691static void
3692ath_tx_tid_drain_pkt(struct ath_softc *sc, struct ath_node *an,
3693    struct ath_tid *tid, ath_bufhead *bf_cq, struct ath_buf *bf)
3694{
3695
3696	ATH_TX_LOCK_ASSERT(sc);
3697
3698	/*
3699	 * If the current TID is running AMPDU, update
3700	 * the BAW.
3701	 */
3702	if (ath_tx_ampdu_running(sc, an, tid->tid) &&
3703	    bf->bf_state.bfs_dobaw) {
3704		/*
3705		 * Only remove the frame from the BAW if it's
3706		 * been transmitted at least once; this means
3707		 * the frame was in the BAW to begin with.
3708		 */
3709		if (bf->bf_state.bfs_retries > 0) {
3710			ath_tx_update_baw(sc, an, tid, bf);
3711			bf->bf_state.bfs_dobaw = 0;
3712		}
3713#if 0
3714		/*
3715		 * This has become a non-fatal error now
3716		 */
3717		if (! bf->bf_state.bfs_addedbaw)
3718			device_printf(sc->sc_dev,
3719			    "%s: wasn't added: seqno %d\n",
3720			    __func__, SEQNO(bf->bf_state.bfs_seqno));
3721#endif
3722	}
3723
3724	/* Strip it out of an aggregate list if it was in one */
3725	bf->bf_next = NULL;
3726
3727	/* Insert on the free queue to be freed by the caller */
3728	TAILQ_INSERT_TAIL(bf_cq, bf, bf_list);
3729}
3730
3731static void
3732ath_tx_tid_drain_print(struct ath_softc *sc, struct ath_node *an,
3733    const char *pfx, struct ath_tid *tid, struct ath_buf *bf)
3734{
3735	struct ieee80211_node *ni = &an->an_node;
3736	struct ath_txq *txq = sc->sc_ac2q[tid->ac];
3737	struct ieee80211_tx_ampdu *tap;
3738
3739	tap = ath_tx_get_tx_tid(an, tid->tid);
3740
3741	device_printf(sc->sc_dev,
3742	    "%s: %s: %6D: bf=%p: addbaw=%d, dobaw=%d, "
3743	    "seqno=%d, retry=%d\n",
3744	    __func__,
3745	    pfx,
3746	    ni->ni_macaddr,
3747	    ":",
3748	    bf,
3749	    bf->bf_state.bfs_addedbaw,
3750	    bf->bf_state.bfs_dobaw,
3751	    SEQNO(bf->bf_state.bfs_seqno),
3752	    bf->bf_state.bfs_retries);
3753	device_printf(sc->sc_dev,
3754	    "%s: %s: %6D: bf=%p: txq[%d] axq_depth=%d, axq_aggr_depth=%d\n",
3755	    __func__,
3756	    pfx,
3757	    ni->ni_macaddr,
3758	    ":",
3759	    bf,
3760	    txq->axq_qnum,
3761	    txq->axq_depth,
3762	    txq->axq_aggr_depth);
3763
3764	device_printf(sc->sc_dev,
3765	    "%s: %s: %6D: bf=%p: tid txq_depth=%d hwq_depth=%d, bar_wait=%d, "
3766	      "isfiltered=%d\n",
3767	    __func__,
3768	    pfx,
3769	    ni->ni_macaddr,
3770	    ":",
3771	    bf,
3772	    tid->axq_depth,
3773	    tid->hwq_depth,
3774	    tid->bar_wait,
3775	    tid->isfiltered);
3776	device_printf(sc->sc_dev,
3777	    "%s: %s: %6D: tid %d: "
3778	    "sched=%d, paused=%d, "
3779	    "incomp=%d, baw_head=%d, "
3780	    "baw_tail=%d txa_start=%d, ni_txseqs=%d\n",
3781	     __func__,
3782	     pfx,
3783	     ni->ni_macaddr,
3784	     ":",
3785	     tid->tid,
3786	     tid->sched, tid->paused,
3787	     tid->incomp, tid->baw_head,
3788	     tid->baw_tail, tap == NULL ? -1 : tap->txa_start,
3789	     ni->ni_txseqs[tid->tid]);
3790
3791	/* XXX Dump the frame, see what it is? */
3792	ieee80211_dump_pkt(ni->ni_ic,
3793	    mtod(bf->bf_m, const uint8_t *),
3794	    bf->bf_m->m_len, 0, -1);
3795}
3796
3797/*
3798 * Free any packets currently pending in the software TX queue.
3799 *
3800 * This will be called when a node is being deleted.
3801 *
3802 * It can also be called on an active node during an interface
3803 * reset or state transition.
3804 *
3805 * (From Linux/reference):
3806 *
3807 * TODO: For frame(s) that are in the retry state, we will reuse the
3808 * sequence number(s) without setting the retry bit. The
3809 * alternative is to give up on these and BAR the receiver's window
3810 * forward.
3811 */
3812static void
3813ath_tx_tid_drain(struct ath_softc *sc, struct ath_node *an,
3814    struct ath_tid *tid, ath_bufhead *bf_cq)
3815{
3816	struct ath_buf *bf;
3817	struct ieee80211_tx_ampdu *tap;
3818	struct ieee80211_node *ni = &an->an_node;
3819	int t;
3820
3821	tap = ath_tx_get_tx_tid(an, tid->tid);
3822
3823	ATH_TX_LOCK_ASSERT(sc);
3824
3825	/* Walk the queue, free frames */
3826	t = 0;
3827	for (;;) {
3828		bf = ATH_TID_FIRST(tid);
3829		if (bf == NULL) {
3830			break;
3831		}
3832
3833		if (t == 0) {
3834			ath_tx_tid_drain_print(sc, an, "norm", tid, bf);
3835			t = 1;
3836		}
3837
3838		ATH_TID_REMOVE(tid, bf, bf_list);
3839		ath_tx_tid_drain_pkt(sc, an, tid, bf_cq, bf);
3840	}
3841
3842	/* And now, drain the filtered frame queue */
3843	t = 0;
3844	for (;;) {
3845		bf = ATH_TID_FILT_FIRST(tid);
3846		if (bf == NULL)
3847			break;
3848
3849		if (t == 0) {
3850			ath_tx_tid_drain_print(sc, an, "filt", tid, bf);
3851			t = 1;
3852		}
3853
3854		ATH_TID_FILT_REMOVE(tid, bf, bf_list);
3855		ath_tx_tid_drain_pkt(sc, an, tid, bf_cq, bf);
3856	}
3857
3858	/*
3859	 * Override the clrdmask configuration for the next frame
3860	 * in case there is some future transmission, just to get
3861	 * the ball rolling.
3862	 *
3863	 * This won't hurt things if the TID is about to be freed.
3864	 */
3865	ath_tx_set_clrdmask(sc, tid->an);
3866
3867	/*
3868	 * Now that it's completed, grab the TID lock and update
3869	 * the sequence number and BAW window.
3870	 * Because sequence numbers have been assigned to frames
3871	 * that haven't been sent yet, it's entirely possible
3872	 * we'll be called with some pending frames that have not
3873	 * been transmitted.
3874	 *
3875	 * The cleaner solution is to do the sequence number allocation
3876	 * when the packet is first transmitted - and thus the "retries"
3877	 * check above would be enough to update the BAW/seqno.
3878	 */
3879
3880	/* But don't do it for non-QoS TIDs */
3881	if (tap) {
3882#if 1
3883		DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
3884		    "%s: %6D: node %p: TID %d: sliding BAW left edge to %d\n",
3885		    __func__,
3886		    ni->ni_macaddr,
3887		    ":",
3888		    an,
3889		    tid->tid,
3890		    tap->txa_start);
3891#endif
3892		ni->ni_txseqs[tid->tid] = tap->txa_start;
3893		tid->baw_tail = tid->baw_head;
3894	}
3895}
3896
3897/*
3898 * Reset the TID state.  This must be only called once the node has
3899 * had its frames flushed from this TID, to ensure that no other
3900 * pause / unpause logic can kick in.
3901 */
3902static void
3903ath_tx_tid_reset(struct ath_softc *sc, struct ath_tid *tid)
3904{
3905
3906#if 0
3907	tid->bar_wait = tid->bar_tx = tid->isfiltered = 0;
3908	tid->paused = tid->sched = tid->addba_tx_pending = 0;
3909	tid->incomp = tid->cleanup_inprogress = 0;
3910#endif
3911
3912	/*
3913	 * If we have a bar_wait set, we need to unpause the TID
3914	 * here.  Otherwise once cleanup has finished, the TID won't
3915	 * have the right paused counter.
3916	 *
3917	 * XXX I'm not going through resume here - I don't want the
3918	 * node to be rescheuled just yet.  This however should be
3919	 * methodized!
3920	 */
3921	if (tid->bar_wait) {
3922		if (tid->paused > 0) {
3923			tid->paused --;
3924		}
3925	}
3926
3927	/*
3928	 * XXX same with a currently filtered TID.
3929	 *
3930	 * Since this is being called during a flush, we assume that
3931	 * the filtered frame list is actually empty.
3932	 *
3933	 * XXX TODO: add in a check to ensure that the filtered queue
3934	 * depth is actually 0!
3935	 */
3936	if (tid->isfiltered) {
3937		if (tid->paused > 0) {
3938			tid->paused --;
3939		}
3940	}
3941
3942	/*
3943	 * Clear BAR, filtered frames, scheduled and ADDBA pending.
3944	 * The TID may be going through cleanup from the last association
3945	 * where things in the BAW are still in the hardware queue.
3946	 */
3947	tid->bar_wait = 0;
3948	tid->bar_tx = 0;
3949	tid->isfiltered = 0;
3950	tid->sched = 0;
3951	tid->addba_tx_pending = 0;
3952
3953	/*
3954	 * XXX TODO: it may just be enough to walk the HWQs and mark
3955	 * frames for that node as non-aggregate; or mark the ath_node
3956	 * with something that indicates that aggregation is no longer
3957	 * occuring.  Then we can just toss the BAW complaints and
3958	 * do a complete hard reset of state here - no pause, no
3959	 * complete counter, etc.
3960	 */
3961
3962}
3963
3964/*
3965 * Flush all software queued packets for the given node.
3966 *
3967 * This occurs when a completion handler frees the last buffer
3968 * for a node, and the node is thus freed. This causes the node
3969 * to be cleaned up, which ends up calling ath_tx_node_flush.
3970 */
3971void
3972ath_tx_node_flush(struct ath_softc *sc, struct ath_node *an)
3973{
3974	int tid;
3975	ath_bufhead bf_cq;
3976	struct ath_buf *bf;
3977
3978	TAILQ_INIT(&bf_cq);
3979
3980	ATH_KTR(sc, ATH_KTR_NODE, 1, "ath_tx_node_flush: flush node; ni=%p",
3981	    &an->an_node);
3982
3983	ATH_TX_LOCK(sc);
3984	DPRINTF(sc, ATH_DEBUG_NODE,
3985	    "%s: %6D: flush; is_powersave=%d, stack_psq=%d, tim=%d, "
3986	    "swq_depth=%d, clrdmask=%d, leak_count=%d\n",
3987	    __func__,
3988	    an->an_node.ni_macaddr,
3989	    ":",
3990	    an->an_is_powersave,
3991	    an->an_stack_psq,
3992	    an->an_tim_set,
3993	    an->an_swq_depth,
3994	    an->clrdmask,
3995	    an->an_leak_count);
3996
3997	for (tid = 0; tid < IEEE80211_TID_SIZE; tid++) {
3998		struct ath_tid *atid = &an->an_tid[tid];
3999
4000		/* Free packets */
4001		ath_tx_tid_drain(sc, an, atid, &bf_cq);
4002
4003		/* Remove this tid from the list of active tids */
4004		ath_tx_tid_unsched(sc, atid);
4005
4006		/* Reset the per-TID pause, BAR, etc state */
4007		ath_tx_tid_reset(sc, atid);
4008	}
4009
4010	/*
4011	 * Clear global leak count
4012	 */
4013	an->an_leak_count = 0;
4014	ATH_TX_UNLOCK(sc);
4015
4016	/* Handle completed frames */
4017	while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) {
4018		TAILQ_REMOVE(&bf_cq, bf, bf_list);
4019		ath_tx_default_comp(sc, bf, 0);
4020	}
4021}
4022
4023/*
4024 * Drain all the software TXQs currently with traffic queued.
4025 */
4026void
4027ath_tx_txq_drain(struct ath_softc *sc, struct ath_txq *txq)
4028{
4029	struct ath_tid *tid;
4030	ath_bufhead bf_cq;
4031	struct ath_buf *bf;
4032
4033	TAILQ_INIT(&bf_cq);
4034	ATH_TX_LOCK(sc);
4035
4036	/*
4037	 * Iterate over all active tids for the given txq,
4038	 * flushing and unsched'ing them
4039	 */
4040	while (! TAILQ_EMPTY(&txq->axq_tidq)) {
4041		tid = TAILQ_FIRST(&txq->axq_tidq);
4042		ath_tx_tid_drain(sc, tid->an, tid, &bf_cq);
4043		ath_tx_tid_unsched(sc, tid);
4044	}
4045
4046	ATH_TX_UNLOCK(sc);
4047
4048	while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) {
4049		TAILQ_REMOVE(&bf_cq, bf, bf_list);
4050		ath_tx_default_comp(sc, bf, 0);
4051	}
4052}
4053
4054/*
4055 * Handle completion of non-aggregate session frames.
4056 *
4057 * This (currently) doesn't implement software retransmission of
4058 * non-aggregate frames!
4059 *
4060 * Software retransmission of non-aggregate frames needs to obey
4061 * the strict sequence number ordering, and drop any frames that
4062 * will fail this.
4063 *
4064 * For now, filtered frames and frame transmission will cause
4065 * all kinds of issues.  So we don't support them.
4066 *
4067 * So anyone queuing frames via ath_tx_normal_xmit() or
4068 * ath_tx_hw_queue_norm() must override and set CLRDMASK.
4069 */
4070void
4071ath_tx_normal_comp(struct ath_softc *sc, struct ath_buf *bf, int fail)
4072{
4073	struct ieee80211_node *ni = bf->bf_node;
4074	struct ath_node *an = ATH_NODE(ni);
4075	int tid = bf->bf_state.bfs_tid;
4076	struct ath_tid *atid = &an->an_tid[tid];
4077	struct ath_tx_status *ts = &bf->bf_status.ds_txstat;
4078
4079	/* The TID state is protected behind the TXQ lock */
4080	ATH_TX_LOCK(sc);
4081
4082	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: bf=%p: fail=%d, hwq_depth now %d\n",
4083	    __func__, bf, fail, atid->hwq_depth - 1);
4084
4085	atid->hwq_depth--;
4086
4087#if 0
4088	/*
4089	 * If the frame was filtered, stick it on the filter frame
4090	 * queue and complain about it.  It shouldn't happen!
4091	 */
4092	if ((ts->ts_status & HAL_TXERR_FILT) ||
4093	    (ts->ts_status != 0 && atid->isfiltered)) {
4094		device_printf(sc->sc_dev,
4095		    "%s: isfiltered=%d, ts_status=%d: huh?\n",
4096		    __func__,
4097		    atid->isfiltered,
4098		    ts->ts_status);
4099		ath_tx_tid_filt_comp_buf(sc, atid, bf);
4100	}
4101#endif
4102	if (atid->isfiltered)
4103		device_printf(sc->sc_dev, "%s: filtered?!\n", __func__);
4104	if (atid->hwq_depth < 0)
4105		device_printf(sc->sc_dev, "%s: hwq_depth < 0: %d\n",
4106		    __func__, atid->hwq_depth);
4107
4108	/*
4109	 * If the queue is filtered, potentially mark it as complete
4110	 * and reschedule it as needed.
4111	 *
4112	 * This is required as there may be a subsequent TX descriptor
4113	 * for this end-node that has CLRDMASK set, so it's quite possible
4114	 * that a filtered frame will be followed by a non-filtered
4115	 * (complete or otherwise) frame.
4116	 *
4117	 * XXX should we do this before we complete the frame?
4118	 */
4119	if (atid->isfiltered)
4120		ath_tx_tid_filt_comp_complete(sc, atid);
4121	ATH_TX_UNLOCK(sc);
4122
4123	/*
4124	 * punt to rate control if we're not being cleaned up
4125	 * during a hw queue drain and the frame wanted an ACK.
4126	 */
4127	if (fail == 0 && ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0))
4128		ath_tx_update_ratectrl(sc, ni, bf->bf_state.bfs_rc,
4129		    ts, bf->bf_state.bfs_pktlen,
4130		    1, (ts->ts_status == 0) ? 0 : 1);
4131
4132	ath_tx_default_comp(sc, bf, fail);
4133}
4134
4135/*
4136 * Handle cleanup of aggregate session packets that aren't
4137 * an A-MPDU.
4138 *
4139 * There's no need to update the BAW here - the session is being
4140 * torn down.
4141 */
4142static void
4143ath_tx_comp_cleanup_unaggr(struct ath_softc *sc, struct ath_buf *bf)
4144{
4145	struct ieee80211_node *ni = bf->bf_node;
4146	struct ath_node *an = ATH_NODE(ni);
4147	int tid = bf->bf_state.bfs_tid;
4148	struct ath_tid *atid = &an->an_tid[tid];
4149
4150	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, "%s: TID %d: incomp=%d\n",
4151	    __func__, tid, atid->incomp);
4152
4153	ATH_TX_LOCK(sc);
4154	atid->incomp--;
4155	if (atid->incomp == 0) {
4156		DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
4157		    "%s: TID %d: cleaned up! resume!\n",
4158		    __func__, tid);
4159		atid->cleanup_inprogress = 0;
4160		ath_tx_tid_resume(sc, atid);
4161	}
4162	ATH_TX_UNLOCK(sc);
4163
4164	ath_tx_default_comp(sc, bf, 0);
4165}
4166
4167/*
4168 * Performs transmit side cleanup when TID changes from aggregated to
4169 * unaggregated.
4170 *
4171 * - Discard all retry frames from the s/w queue.
4172 * - Fix the tx completion function for all buffers in s/w queue.
4173 * - Count the number of unacked frames, and let transmit completion
4174 *   handle it later.
4175 *
4176 * The caller is responsible for pausing the TID and unpausing the
4177 * TID if no cleanup was required. Otherwise the cleanup path will
4178 * unpause the TID once the last hardware queued frame is completed.
4179 */
4180static void
4181ath_tx_tid_cleanup(struct ath_softc *sc, struct ath_node *an, int tid,
4182    ath_bufhead *bf_cq)
4183{
4184	struct ath_tid *atid = &an->an_tid[tid];
4185	struct ieee80211_tx_ampdu *tap;
4186	struct ath_buf *bf, *bf_next;
4187
4188	ATH_TX_LOCK_ASSERT(sc);
4189
4190	DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
4191	    "%s: TID %d: called\n", __func__, tid);
4192
4193	/*
4194	 * Move the filtered frames to the TX queue, before
4195	 * we run off and discard/process things.
4196	 */
4197	/* XXX this is really quite inefficient */
4198	while ((bf = ATH_TID_FILT_LAST(atid, ath_bufhead_s)) != NULL) {
4199		ATH_TID_FILT_REMOVE(atid, bf, bf_list);
4200		ATH_TID_INSERT_HEAD(atid, bf, bf_list);
4201	}
4202
4203	/*
4204	 * Update the frames in the software TX queue:
4205	 *
4206	 * + Discard retry frames in the queue
4207	 * + Fix the completion function to be non-aggregate
4208	 */
4209	bf = ATH_TID_FIRST(atid);
4210	while (bf) {
4211		if (bf->bf_state.bfs_isretried) {
4212			bf_next = TAILQ_NEXT(bf, bf_list);
4213			ATH_TID_REMOVE(atid, bf, bf_list);
4214			if (bf->bf_state.bfs_dobaw) {
4215				ath_tx_update_baw(sc, an, atid, bf);
4216				if (! bf->bf_state.bfs_addedbaw)
4217					device_printf(sc->sc_dev,
4218					    "%s: wasn't added: seqno %d\n",
4219					    __func__,
4220					    SEQNO(bf->bf_state.bfs_seqno));
4221			}
4222			bf->bf_state.bfs_dobaw = 0;
4223			/*
4224			 * Call the default completion handler with "fail" just
4225			 * so upper levels are suitably notified about this.
4226			 */
4227			TAILQ_INSERT_TAIL(bf_cq, bf, bf_list);
4228			bf = bf_next;
4229			continue;
4230		}
4231		/* Give these the default completion handler */
4232		bf->bf_comp = ath_tx_normal_comp;
4233		bf = TAILQ_NEXT(bf, bf_list);
4234	}
4235
4236	/*
4237	 * Calculate what hardware-queued frames exist based
4238	 * on the current BAW size. Ie, what frames have been
4239	 * added to the TX hardware queue for this TID but
4240	 * not yet ACKed.
4241	 */
4242	tap = ath_tx_get_tx_tid(an, tid);
4243	/* Need the lock - fiddling with BAW */
4244	while (atid->baw_head != atid->baw_tail) {
4245		if (atid->tx_buf[atid->baw_head]) {
4246			atid->incomp++;
4247			atid->cleanup_inprogress = 1;
4248			atid->tx_buf[atid->baw_head] = NULL;
4249		}
4250		INCR(atid->baw_head, ATH_TID_MAX_BUFS);
4251		INCR(tap->txa_start, IEEE80211_SEQ_RANGE);
4252	}
4253
4254	if (atid->cleanup_inprogress)
4255		DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
4256		    "%s: TID %d: cleanup needed: %d packets\n",
4257		    __func__, tid, atid->incomp);
4258
4259	/* Owner now must free completed frames */
4260}
4261
4262static struct ath_buf *
4263ath_tx_retry_clone(struct ath_softc *sc, struct ath_node *an,
4264    struct ath_tid *tid, struct ath_buf *bf)
4265{
4266	struct ath_buf *nbf;
4267	int error;
4268
4269	/*
4270	 * Clone the buffer.  This will handle the dma unmap and
4271	 * copy the node reference to the new buffer.  If this
4272	 * works out, 'bf' will have no DMA mapping, no mbuf
4273	 * pointer and no node reference.
4274	 */
4275	nbf = ath_buf_clone(sc, bf);
4276
4277#if 0
4278	device_printf(sc->sc_dev, "%s: ATH_BUF_BUSY; cloning\n",
4279	    __func__);
4280#endif
4281
4282	if (nbf == NULL) {
4283		/* Failed to clone */
4284		device_printf(sc->sc_dev,
4285		    "%s: failed to clone a busy buffer\n",
4286		    __func__);
4287		return NULL;
4288	}
4289
4290	/* Setup the dma for the new buffer */
4291	error = ath_tx_dmasetup(sc, nbf, nbf->bf_m);
4292	if (error != 0) {
4293		device_printf(sc->sc_dev,
4294		    "%s: failed to setup dma for clone\n",
4295		    __func__);
4296		/*
4297		 * Put this at the head of the list, not tail;
4298		 * that way it doesn't interfere with the
4299		 * busy buffer logic (which uses the tail of
4300		 * the list.)
4301		 */
4302		ATH_TXBUF_LOCK(sc);
4303		ath_returnbuf_head(sc, nbf);
4304		ATH_TXBUF_UNLOCK(sc);
4305		return NULL;
4306	}
4307
4308	/* Update BAW if required, before we free the original buf */
4309	if (bf->bf_state.bfs_dobaw)
4310		ath_tx_switch_baw_buf(sc, an, tid, bf, nbf);
4311
4312	/* Free original buffer; return new buffer */
4313	ath_freebuf(sc, bf);
4314
4315	return nbf;
4316}
4317
4318/*
4319 * Handle retrying an unaggregate frame in an aggregate
4320 * session.
4321 *
4322 * If too many retries occur, pause the TID, wait for
4323 * any further retransmits (as there's no reason why
4324 * non-aggregate frames in an aggregate session are
4325 * transmitted in-order; they just have to be in-BAW)
4326 * and then queue a BAR.
4327 */
4328static void
4329ath_tx_aggr_retry_unaggr(struct ath_softc *sc, struct ath_buf *bf)
4330{
4331	struct ieee80211_node *ni = bf->bf_node;
4332	struct ath_node *an = ATH_NODE(ni);
4333	int tid = bf->bf_state.bfs_tid;
4334	struct ath_tid *atid = &an->an_tid[tid];
4335	struct ieee80211_tx_ampdu *tap;
4336
4337	ATH_TX_LOCK(sc);
4338
4339	tap = ath_tx_get_tx_tid(an, tid);
4340
4341	/*
4342	 * If the buffer is marked as busy, we can't directly
4343	 * reuse it. Instead, try to clone the buffer.
4344	 * If the clone is successful, recycle the old buffer.
4345	 * If the clone is unsuccessful, set bfs_retries to max
4346	 * to force the next bit of code to free the buffer
4347	 * for us.
4348	 */
4349	if ((bf->bf_state.bfs_retries < SWMAX_RETRIES) &&
4350	    (bf->bf_flags & ATH_BUF_BUSY)) {
4351		struct ath_buf *nbf;
4352		nbf = ath_tx_retry_clone(sc, an, atid, bf);
4353		if (nbf)
4354			/* bf has been freed at this point */
4355			bf = nbf;
4356		else
4357			bf->bf_state.bfs_retries = SWMAX_RETRIES + 1;
4358	}
4359
4360	if (bf->bf_state.bfs_retries >= SWMAX_RETRIES) {
4361		DPRINTF(sc, ATH_DEBUG_SW_TX_RETRIES,
4362		    "%s: exceeded retries; seqno %d\n",
4363		    __func__, SEQNO(bf->bf_state.bfs_seqno));
4364		sc->sc_stats.ast_tx_swretrymax++;
4365
4366		/* Update BAW anyway */
4367		if (bf->bf_state.bfs_dobaw) {
4368			ath_tx_update_baw(sc, an, atid, bf);
4369			if (! bf->bf_state.bfs_addedbaw)
4370				device_printf(sc->sc_dev,
4371				    "%s: wasn't added: seqno %d\n",
4372				    __func__, SEQNO(bf->bf_state.bfs_seqno));
4373		}
4374		bf->bf_state.bfs_dobaw = 0;
4375
4376		/* Suspend the TX queue and get ready to send the BAR */
4377		ath_tx_tid_bar_suspend(sc, atid);
4378
4379		/* Send the BAR if there are no other frames waiting */
4380		if (ath_tx_tid_bar_tx_ready(sc, atid))
4381			ath_tx_tid_bar_tx(sc, atid);
4382
4383		ATH_TX_UNLOCK(sc);
4384
4385		/* Free buffer, bf is free after this call */
4386		ath_tx_default_comp(sc, bf, 0);
4387		return;
4388	}
4389
4390	/*
4391	 * This increments the retry counter as well as
4392	 * sets the retry flag in the ath_buf and packet
4393	 * body.
4394	 */
4395	ath_tx_set_retry(sc, bf);
4396	sc->sc_stats.ast_tx_swretries++;
4397
4398	/*
4399	 * Insert this at the head of the queue, so it's
4400	 * retried before any current/subsequent frames.
4401	 */
4402	ATH_TID_INSERT_HEAD(atid, bf, bf_list);
4403	ath_tx_tid_sched(sc, atid);
4404	/* Send the BAR if there are no other frames waiting */
4405	if (ath_tx_tid_bar_tx_ready(sc, atid))
4406		ath_tx_tid_bar_tx(sc, atid);
4407
4408	ATH_TX_UNLOCK(sc);
4409}
4410
4411/*
4412 * Common code for aggregate excessive retry/subframe retry.
4413 * If retrying, queues buffers to bf_q. If not, frees the
4414 * buffers.
4415 *
4416 * XXX should unify this with ath_tx_aggr_retry_unaggr()
4417 */
4418static int
4419ath_tx_retry_subframe(struct ath_softc *sc, struct ath_buf *bf,
4420    ath_bufhead *bf_q)
4421{
4422	struct ieee80211_node *ni = bf->bf_node;
4423	struct ath_node *an = ATH_NODE(ni);
4424	int tid = bf->bf_state.bfs_tid;
4425	struct ath_tid *atid = &an->an_tid[tid];
4426
4427	ATH_TX_LOCK_ASSERT(sc);
4428
4429	/* XXX clr11naggr should be done for all subframes */
4430	ath_hal_clr11n_aggr(sc->sc_ah, bf->bf_desc);
4431	ath_hal_set11nburstduration(sc->sc_ah, bf->bf_desc, 0);
4432
4433	/* ath_hal_set11n_virtualmorefrag(sc->sc_ah, bf->bf_desc, 0); */
4434
4435	/*
4436	 * If the buffer is marked as busy, we can't directly
4437	 * reuse it. Instead, try to clone the buffer.
4438	 * If the clone is successful, recycle the old buffer.
4439	 * If the clone is unsuccessful, set bfs_retries to max
4440	 * to force the next bit of code to free the buffer
4441	 * for us.
4442	 */
4443	if ((bf->bf_state.bfs_retries < SWMAX_RETRIES) &&
4444	    (bf->bf_flags & ATH_BUF_BUSY)) {
4445		struct ath_buf *nbf;
4446		nbf = ath_tx_retry_clone(sc, an, atid, bf);
4447		if (nbf)
4448			/* bf has been freed at this point */
4449			bf = nbf;
4450		else
4451			bf->bf_state.bfs_retries = SWMAX_RETRIES + 1;
4452	}
4453
4454	if (bf->bf_state.bfs_retries >= SWMAX_RETRIES) {
4455		sc->sc_stats.ast_tx_swretrymax++;
4456		DPRINTF(sc, ATH_DEBUG_SW_TX_RETRIES,
4457		    "%s: max retries: seqno %d\n",
4458		    __func__, SEQNO(bf->bf_state.bfs_seqno));
4459		ath_tx_update_baw(sc, an, atid, bf);
4460		if (! bf->bf_state.bfs_addedbaw)
4461			device_printf(sc->sc_dev,
4462			    "%s: wasn't added: seqno %d\n",
4463			    __func__, SEQNO(bf->bf_state.bfs_seqno));
4464		bf->bf_state.bfs_dobaw = 0;
4465		return 1;
4466	}
4467
4468	ath_tx_set_retry(sc, bf);
4469	sc->sc_stats.ast_tx_swretries++;
4470	bf->bf_next = NULL;		/* Just to make sure */
4471
4472	/* Clear the aggregate state */
4473	bf->bf_state.bfs_aggr = 0;
4474	bf->bf_state.bfs_ndelim = 0;	/* ??? needed? */
4475	bf->bf_state.bfs_nframes = 1;
4476
4477	TAILQ_INSERT_TAIL(bf_q, bf, bf_list);
4478	return 0;
4479}
4480
4481/*
4482 * error pkt completion for an aggregate destination
4483 */
4484static void
4485ath_tx_comp_aggr_error(struct ath_softc *sc, struct ath_buf *bf_first,
4486    struct ath_tid *tid)
4487{
4488	struct ieee80211_node *ni = bf_first->bf_node;
4489	struct ath_node *an = ATH_NODE(ni);
4490	struct ath_buf *bf_next, *bf;
4491	ath_bufhead bf_q;
4492	int drops = 0;
4493	struct ieee80211_tx_ampdu *tap;
4494	ath_bufhead bf_cq;
4495
4496	TAILQ_INIT(&bf_q);
4497	TAILQ_INIT(&bf_cq);
4498
4499	/*
4500	 * Update rate control - all frames have failed.
4501	 *
4502	 * XXX use the length in the first frame in the series;
4503	 * XXX just so things are consistent for now.
4504	 */
4505	ath_tx_update_ratectrl(sc, ni, bf_first->bf_state.bfs_rc,
4506	    &bf_first->bf_status.ds_txstat,
4507	    bf_first->bf_state.bfs_pktlen,
4508	    bf_first->bf_state.bfs_nframes, bf_first->bf_state.bfs_nframes);
4509
4510	ATH_TX_LOCK(sc);
4511	tap = ath_tx_get_tx_tid(an, tid->tid);
4512	sc->sc_stats.ast_tx_aggr_failall++;
4513
4514	/* Retry all subframes */
4515	bf = bf_first;
4516	while (bf) {
4517		bf_next = bf->bf_next;
4518		bf->bf_next = NULL;	/* Remove it from the aggr list */
4519		sc->sc_stats.ast_tx_aggr_fail++;
4520		if (ath_tx_retry_subframe(sc, bf, &bf_q)) {
4521			drops++;
4522			bf->bf_next = NULL;
4523			TAILQ_INSERT_TAIL(&bf_cq, bf, bf_list);
4524		}
4525		bf = bf_next;
4526	}
4527
4528	/* Prepend all frames to the beginning of the queue */
4529	while ((bf = TAILQ_LAST(&bf_q, ath_bufhead_s)) != NULL) {
4530		TAILQ_REMOVE(&bf_q, bf, bf_list);
4531		ATH_TID_INSERT_HEAD(tid, bf, bf_list);
4532	}
4533
4534	/*
4535	 * Schedule the TID to be re-tried.
4536	 */
4537	ath_tx_tid_sched(sc, tid);
4538
4539	/*
4540	 * send bar if we dropped any frames
4541	 *
4542	 * Keep the txq lock held for now, as we need to ensure
4543	 * that ni_txseqs[] is consistent (as it's being updated
4544	 * in the ifnet TX context or raw TX context.)
4545	 */
4546	if (drops) {
4547		/* Suspend the TX queue and get ready to send the BAR */
4548		ath_tx_tid_bar_suspend(sc, tid);
4549	}
4550
4551	/*
4552	 * Send BAR if required
4553	 */
4554	if (ath_tx_tid_bar_tx_ready(sc, tid))
4555		ath_tx_tid_bar_tx(sc, tid);
4556
4557	ATH_TX_UNLOCK(sc);
4558
4559	/* Complete frames which errored out */
4560	while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) {
4561		TAILQ_REMOVE(&bf_cq, bf, bf_list);
4562		ath_tx_default_comp(sc, bf, 0);
4563	}
4564}
4565
4566/*
4567 * Handle clean-up of packets from an aggregate list.
4568 *
4569 * There's no need to update the BAW here - the session is being
4570 * torn down.
4571 */
4572static void
4573ath_tx_comp_cleanup_aggr(struct ath_softc *sc, struct ath_buf *bf_first)
4574{
4575	struct ath_buf *bf, *bf_next;
4576	struct ieee80211_node *ni = bf_first->bf_node;
4577	struct ath_node *an = ATH_NODE(ni);
4578	int tid = bf_first->bf_state.bfs_tid;
4579	struct ath_tid *atid = &an->an_tid[tid];
4580
4581	ATH_TX_LOCK(sc);
4582
4583	/* update incomp */
4584	bf = bf_first;
4585	while (bf) {
4586		atid->incomp--;
4587		bf = bf->bf_next;
4588	}
4589
4590	if (atid->incomp == 0) {
4591		DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
4592		    "%s: TID %d: cleaned up! resume!\n",
4593		    __func__, tid);
4594		atid->cleanup_inprogress = 0;
4595		ath_tx_tid_resume(sc, atid);
4596	}
4597
4598	/* Send BAR if required */
4599	/* XXX why would we send a BAR when transitioning to non-aggregation? */
4600	/*
4601	 * XXX TODO: we should likely just tear down the BAR state here,
4602	 * rather than sending a BAR.
4603	 */
4604	if (ath_tx_tid_bar_tx_ready(sc, atid))
4605		ath_tx_tid_bar_tx(sc, atid);
4606
4607	ATH_TX_UNLOCK(sc);
4608
4609	/* Handle frame completion */
4610	bf = bf_first;
4611	while (bf) {
4612		bf_next = bf->bf_next;
4613		ath_tx_default_comp(sc, bf, 1);
4614		bf = bf_next;
4615	}
4616}
4617
4618/*
4619 * Handle completion of an set of aggregate frames.
4620 *
4621 * Note: the completion handler is the last descriptor in the aggregate,
4622 * not the last descriptor in the first frame.
4623 */
4624static void
4625ath_tx_aggr_comp_aggr(struct ath_softc *sc, struct ath_buf *bf_first,
4626    int fail)
4627{
4628	//struct ath_desc *ds = bf->bf_lastds;
4629	struct ieee80211_node *ni = bf_first->bf_node;
4630	struct ath_node *an = ATH_NODE(ni);
4631	int tid = bf_first->bf_state.bfs_tid;
4632	struct ath_tid *atid = &an->an_tid[tid];
4633	struct ath_tx_status ts;
4634	struct ieee80211_tx_ampdu *tap;
4635	ath_bufhead bf_q;
4636	ath_bufhead bf_cq;
4637	int seq_st, tx_ok;
4638	int hasba, isaggr;
4639	uint32_t ba[2];
4640	struct ath_buf *bf, *bf_next;
4641	int ba_index;
4642	int drops = 0;
4643	int nframes = 0, nbad = 0, nf;
4644	int pktlen;
4645	/* XXX there's too much on the stack? */
4646	struct ath_rc_series rc[ATH_RC_NUM];
4647	int txseq;
4648
4649	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, "%s: called; hwq_depth=%d\n",
4650	    __func__, atid->hwq_depth);
4651
4652	/*
4653	 * Take a copy; this may be needed -after- bf_first
4654	 * has been completed and freed.
4655	 */
4656	ts = bf_first->bf_status.ds_txstat;
4657
4658	TAILQ_INIT(&bf_q);
4659	TAILQ_INIT(&bf_cq);
4660
4661	/* The TID state is kept behind the TXQ lock */
4662	ATH_TX_LOCK(sc);
4663
4664	atid->hwq_depth--;
4665	if (atid->hwq_depth < 0)
4666		device_printf(sc->sc_dev, "%s: hwq_depth < 0: %d\n",
4667		    __func__, atid->hwq_depth);
4668
4669	/*
4670	 * If the TID is filtered, handle completing the filter
4671	 * transition before potentially kicking it to the cleanup
4672	 * function.
4673	 *
4674	 * XXX this is duplicate work, ew.
4675	 */
4676	if (atid->isfiltered)
4677		ath_tx_tid_filt_comp_complete(sc, atid);
4678
4679	/*
4680	 * Punt cleanup to the relevant function, not our problem now
4681	 */
4682	if (atid->cleanup_inprogress) {
4683		if (atid->isfiltered)
4684			device_printf(sc->sc_dev,
4685			    "%s: isfiltered=1, normal_comp?\n",
4686			    __func__);
4687		ATH_TX_UNLOCK(sc);
4688		ath_tx_comp_cleanup_aggr(sc, bf_first);
4689		return;
4690	}
4691
4692	/*
4693	 * If the frame is filtered, transition to filtered frame
4694	 * mode and add this to the filtered frame list.
4695	 *
4696	 * XXX TODO: figure out how this interoperates with
4697	 * BAR, pause and cleanup states.
4698	 */
4699	if ((ts.ts_status & HAL_TXERR_FILT) ||
4700	    (ts.ts_status != 0 && atid->isfiltered)) {
4701		if (fail != 0)
4702			device_printf(sc->sc_dev,
4703			    "%s: isfiltered=1, fail=%d\n", __func__, fail);
4704		ath_tx_tid_filt_comp_aggr(sc, atid, bf_first, &bf_cq);
4705
4706		/* Remove from BAW */
4707		TAILQ_FOREACH_SAFE(bf, &bf_cq, bf_list, bf_next) {
4708			if (bf->bf_state.bfs_addedbaw)
4709				drops++;
4710			if (bf->bf_state.bfs_dobaw) {
4711				ath_tx_update_baw(sc, an, atid, bf);
4712				if (! bf->bf_state.bfs_addedbaw)
4713					device_printf(sc->sc_dev,
4714					    "%s: wasn't added: seqno %d\n",
4715					    __func__,
4716					    SEQNO(bf->bf_state.bfs_seqno));
4717			}
4718			bf->bf_state.bfs_dobaw = 0;
4719		}
4720		/*
4721		 * If any intermediate frames in the BAW were dropped when
4722		 * handling filtering things, send a BAR.
4723		 */
4724		if (drops)
4725			ath_tx_tid_bar_suspend(sc, atid);
4726
4727		/*
4728		 * Finish up by sending a BAR if required and freeing
4729		 * the frames outside of the TX lock.
4730		 */
4731		goto finish_send_bar;
4732	}
4733
4734	/*
4735	 * XXX for now, use the first frame in the aggregate for
4736	 * XXX rate control completion; it's at least consistent.
4737	 */
4738	pktlen = bf_first->bf_state.bfs_pktlen;
4739
4740	/*
4741	 * Handle errors first!
4742	 *
4743	 * Here, handle _any_ error as a "exceeded retries" error.
4744	 * Later on (when filtered frames are to be specially handled)
4745	 * it'll have to be expanded.
4746	 */
4747#if 0
4748	if (ts.ts_status & HAL_TXERR_XRETRY) {
4749#endif
4750	if (ts.ts_status != 0) {
4751		ATH_TX_UNLOCK(sc);
4752		ath_tx_comp_aggr_error(sc, bf_first, atid);
4753		return;
4754	}
4755
4756	tap = ath_tx_get_tx_tid(an, tid);
4757
4758	/*
4759	 * extract starting sequence and block-ack bitmap
4760	 */
4761	/* XXX endian-ness of seq_st, ba? */
4762	seq_st = ts.ts_seqnum;
4763	hasba = !! (ts.ts_flags & HAL_TX_BA);
4764	tx_ok = (ts.ts_status == 0);
4765	isaggr = bf_first->bf_state.bfs_aggr;
4766	ba[0] = ts.ts_ba_low;
4767	ba[1] = ts.ts_ba_high;
4768
4769	/*
4770	 * Copy the TX completion status and the rate control
4771	 * series from the first descriptor, as it may be freed
4772	 * before the rate control code can get its grubby fingers
4773	 * into things.
4774	 */
4775	memcpy(rc, bf_first->bf_state.bfs_rc, sizeof(rc));
4776
4777	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
4778	    "%s: txa_start=%d, tx_ok=%d, status=%.8x, flags=%.8x, "
4779	    "isaggr=%d, seq_st=%d, hasba=%d, ba=%.8x, %.8x\n",
4780	    __func__, tap->txa_start, tx_ok, ts.ts_status, ts.ts_flags,
4781	    isaggr, seq_st, hasba, ba[0], ba[1]);
4782
4783	/*
4784	 * The reference driver doesn't do this; it simply ignores
4785	 * this check in its entirety.
4786	 *
4787	 * I've seen this occur when using iperf to send traffic
4788	 * out tid 1 - the aggregate frames are all marked as TID 1,
4789	 * but the TXSTATUS has TID=0.  So, let's just ignore this
4790	 * check.
4791	 */
4792#if 0
4793	/* Occasionally, the MAC sends a tx status for the wrong TID. */
4794	if (tid != ts.ts_tid) {
4795		device_printf(sc->sc_dev, "%s: tid %d != hw tid %d\n",
4796		    __func__, tid, ts.ts_tid);
4797		tx_ok = 0;
4798	}
4799#endif
4800
4801	/* AR5416 BA bug; this requires an interface reset */
4802	if (isaggr && tx_ok && (! hasba)) {
4803		device_printf(sc->sc_dev,
4804		    "%s: AR5416 bug: hasba=%d; txok=%d, isaggr=%d, "
4805		    "seq_st=%d\n",
4806		    __func__, hasba, tx_ok, isaggr, seq_st);
4807		/* XXX TODO: schedule an interface reset */
4808#ifdef ATH_DEBUG
4809		ath_printtxbuf(sc, bf_first,
4810		    sc->sc_ac2q[atid->ac]->axq_qnum, 0, 0);
4811#endif
4812	}
4813
4814	/*
4815	 * Walk the list of frames, figure out which ones were correctly
4816	 * sent and which weren't.
4817	 */
4818	bf = bf_first;
4819	nf = bf_first->bf_state.bfs_nframes;
4820
4821	/* bf_first is going to be invalid once this list is walked */
4822	bf_first = NULL;
4823
4824	/*
4825	 * Walk the list of completed frames and determine
4826	 * which need to be completed and which need to be
4827	 * retransmitted.
4828	 *
4829	 * For completed frames, the completion functions need
4830	 * to be called at the end of this function as the last
4831	 * node reference may free the node.
4832	 *
4833	 * Finally, since the TXQ lock can't be held during the
4834	 * completion callback (to avoid lock recursion),
4835	 * the completion calls have to be done outside of the
4836	 * lock.
4837	 */
4838	while (bf) {
4839		nframes++;
4840		ba_index = ATH_BA_INDEX(seq_st,
4841		    SEQNO(bf->bf_state.bfs_seqno));
4842		bf_next = bf->bf_next;
4843		bf->bf_next = NULL;	/* Remove it from the aggr list */
4844
4845		DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
4846		    "%s: checking bf=%p seqno=%d; ack=%d\n",
4847		    __func__, bf, SEQNO(bf->bf_state.bfs_seqno),
4848		    ATH_BA_ISSET(ba, ba_index));
4849
4850		if (tx_ok && ATH_BA_ISSET(ba, ba_index)) {
4851			sc->sc_stats.ast_tx_aggr_ok++;
4852			ath_tx_update_baw(sc, an, atid, bf);
4853			bf->bf_state.bfs_dobaw = 0;
4854			if (! bf->bf_state.bfs_addedbaw)
4855				device_printf(sc->sc_dev,
4856				    "%s: wasn't added: seqno %d\n",
4857				    __func__, SEQNO(bf->bf_state.bfs_seqno));
4858			bf->bf_next = NULL;
4859			TAILQ_INSERT_TAIL(&bf_cq, bf, bf_list);
4860		} else {
4861			sc->sc_stats.ast_tx_aggr_fail++;
4862			if (ath_tx_retry_subframe(sc, bf, &bf_q)) {
4863				drops++;
4864				bf->bf_next = NULL;
4865				TAILQ_INSERT_TAIL(&bf_cq, bf, bf_list);
4866			}
4867			nbad++;
4868		}
4869		bf = bf_next;
4870	}
4871
4872	/*
4873	 * Now that the BAW updates have been done, unlock
4874	 *
4875	 * txseq is grabbed before the lock is released so we
4876	 * have a consistent view of what -was- in the BAW.
4877	 * Anything after this point will not yet have been
4878	 * TXed.
4879	 */
4880	txseq = tap->txa_start;
4881	ATH_TX_UNLOCK(sc);
4882
4883	if (nframes != nf)
4884		device_printf(sc->sc_dev,
4885		    "%s: num frames seen=%d; bf nframes=%d\n",
4886		    __func__, nframes, nf);
4887
4888	/*
4889	 * Now we know how many frames were bad, call the rate
4890	 * control code.
4891	 */
4892	if (fail == 0)
4893		ath_tx_update_ratectrl(sc, ni, rc, &ts, pktlen, nframes,
4894		    nbad);
4895
4896	/*
4897	 * send bar if we dropped any frames
4898	 */
4899	if (drops) {
4900		/* Suspend the TX queue and get ready to send the BAR */
4901		ATH_TX_LOCK(sc);
4902		ath_tx_tid_bar_suspend(sc, atid);
4903		ATH_TX_UNLOCK(sc);
4904	}
4905
4906	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
4907	    "%s: txa_start now %d\n", __func__, tap->txa_start);
4908
4909	ATH_TX_LOCK(sc);
4910
4911	/* Prepend all frames to the beginning of the queue */
4912	while ((bf = TAILQ_LAST(&bf_q, ath_bufhead_s)) != NULL) {
4913		TAILQ_REMOVE(&bf_q, bf, bf_list);
4914		ATH_TID_INSERT_HEAD(atid, bf, bf_list);
4915	}
4916
4917	/*
4918	 * Reschedule to grab some further frames.
4919	 */
4920	ath_tx_tid_sched(sc, atid);
4921
4922	/*
4923	 * If the queue is filtered, re-schedule as required.
4924	 *
4925	 * This is required as there may be a subsequent TX descriptor
4926	 * for this end-node that has CLRDMASK set, so it's quite possible
4927	 * that a filtered frame will be followed by a non-filtered
4928	 * (complete or otherwise) frame.
4929	 *
4930	 * XXX should we do this before we complete the frame?
4931	 */
4932	if (atid->isfiltered)
4933		ath_tx_tid_filt_comp_complete(sc, atid);
4934
4935finish_send_bar:
4936
4937	/*
4938	 * Send BAR if required
4939	 */
4940	if (ath_tx_tid_bar_tx_ready(sc, atid))
4941		ath_tx_tid_bar_tx(sc, atid);
4942
4943	ATH_TX_UNLOCK(sc);
4944
4945	/* Do deferred completion */
4946	while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) {
4947		TAILQ_REMOVE(&bf_cq, bf, bf_list);
4948		ath_tx_default_comp(sc, bf, 0);
4949	}
4950}
4951
4952/*
4953 * Handle completion of unaggregated frames in an ADDBA
4954 * session.
4955 *
4956 * Fail is set to 1 if the entry is being freed via a call to
4957 * ath_tx_draintxq().
4958 */
4959static void
4960ath_tx_aggr_comp_unaggr(struct ath_softc *sc, struct ath_buf *bf, int fail)
4961{
4962	struct ieee80211_node *ni = bf->bf_node;
4963	struct ath_node *an = ATH_NODE(ni);
4964	int tid = bf->bf_state.bfs_tid;
4965	struct ath_tid *atid = &an->an_tid[tid];
4966	struct ath_tx_status ts;
4967	int drops = 0;
4968
4969	/*
4970	 * Take a copy of this; filtering/cloning the frame may free the
4971	 * bf pointer.
4972	 */
4973	ts = bf->bf_status.ds_txstat;
4974
4975	/*
4976	 * Update rate control status here, before we possibly
4977	 * punt to retry or cleanup.
4978	 *
4979	 * Do it outside of the TXQ lock.
4980	 */
4981	if (fail == 0 && ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0))
4982		ath_tx_update_ratectrl(sc, ni, bf->bf_state.bfs_rc,
4983		    &bf->bf_status.ds_txstat,
4984		    bf->bf_state.bfs_pktlen,
4985		    1, (ts.ts_status == 0) ? 0 : 1);
4986
4987	/*
4988	 * This is called early so atid->hwq_depth can be tracked.
4989	 * This unfortunately means that it's released and regrabbed
4990	 * during retry and cleanup. That's rather inefficient.
4991	 */
4992	ATH_TX_LOCK(sc);
4993
4994	if (tid == IEEE80211_NONQOS_TID)
4995		device_printf(sc->sc_dev, "%s: TID=16!\n", __func__);
4996
4997	DPRINTF(sc, ATH_DEBUG_SW_TX,
4998	    "%s: bf=%p: tid=%d, hwq_depth=%d, seqno=%d\n",
4999	    __func__, bf, bf->bf_state.bfs_tid, atid->hwq_depth,
5000	    SEQNO(bf->bf_state.bfs_seqno));
5001
5002	atid->hwq_depth--;
5003	if (atid->hwq_depth < 0)
5004		device_printf(sc->sc_dev, "%s: hwq_depth < 0: %d\n",
5005		    __func__, atid->hwq_depth);
5006
5007	/*
5008	 * If the TID is filtered, handle completing the filter
5009	 * transition before potentially kicking it to the cleanup
5010	 * function.
5011	 */
5012	if (atid->isfiltered)
5013		ath_tx_tid_filt_comp_complete(sc, atid);
5014
5015	/*
5016	 * If a cleanup is in progress, punt to comp_cleanup;
5017	 * rather than handling it here. It's thus their
5018	 * responsibility to clean up, call the completion
5019	 * function in net80211, etc.
5020	 */
5021	if (atid->cleanup_inprogress) {
5022		if (atid->isfiltered)
5023			device_printf(sc->sc_dev,
5024			    "%s: isfiltered=1, normal_comp?\n",
5025			    __func__);
5026		ATH_TX_UNLOCK(sc);
5027		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: cleanup_unaggr\n",
5028		    __func__);
5029		ath_tx_comp_cleanup_unaggr(sc, bf);
5030		return;
5031	}
5032
5033	/*
5034	 * XXX TODO: how does cleanup, BAR and filtered frame handling
5035	 * overlap?
5036	 *
5037	 * If the frame is filtered OR if it's any failure but
5038	 * the TID is filtered, the frame must be added to the
5039	 * filtered frame list.
5040	 *
5041	 * However - a busy buffer can't be added to the filtered
5042	 * list as it will end up being recycled without having
5043	 * been made available for the hardware.
5044	 */
5045	if ((ts.ts_status & HAL_TXERR_FILT) ||
5046	    (ts.ts_status != 0 && atid->isfiltered)) {
5047		int freeframe;
5048
5049		if (fail != 0)
5050			device_printf(sc->sc_dev,
5051			    "%s: isfiltered=1, fail=%d\n",
5052			    __func__,
5053			    fail);
5054		freeframe = ath_tx_tid_filt_comp_single(sc, atid, bf);
5055		if (freeframe) {
5056			/* Remove from BAW */
5057			if (bf->bf_state.bfs_addedbaw)
5058				drops++;
5059			if (bf->bf_state.bfs_dobaw) {
5060				ath_tx_update_baw(sc, an, atid, bf);
5061				if (! bf->bf_state.bfs_addedbaw)
5062					device_printf(sc->sc_dev,
5063					    "%s: wasn't added: seqno %d\n",
5064					    __func__, SEQNO(bf->bf_state.bfs_seqno));
5065			}
5066			bf->bf_state.bfs_dobaw = 0;
5067		}
5068
5069		/*
5070		 * If the frame couldn't be filtered, treat it as a drop and
5071		 * prepare to send a BAR.
5072		 */
5073		if (freeframe && drops)
5074			ath_tx_tid_bar_suspend(sc, atid);
5075
5076		/*
5077		 * Send BAR if required
5078		 */
5079		if (ath_tx_tid_bar_tx_ready(sc, atid))
5080			ath_tx_tid_bar_tx(sc, atid);
5081
5082		ATH_TX_UNLOCK(sc);
5083		/*
5084		 * If freeframe is set, then the frame couldn't be
5085		 * cloned and bf is still valid.  Just complete/free it.
5086		 */
5087		if (freeframe)
5088			ath_tx_default_comp(sc, bf, fail);
5089
5090
5091		return;
5092	}
5093	/*
5094	 * Don't bother with the retry check if all frames
5095	 * are being failed (eg during queue deletion.)
5096	 */
5097#if 0
5098	if (fail == 0 && ts->ts_status & HAL_TXERR_XRETRY) {
5099#endif
5100	if (fail == 0 && ts.ts_status != 0) {
5101		ATH_TX_UNLOCK(sc);
5102		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: retry_unaggr\n",
5103		    __func__);
5104		ath_tx_aggr_retry_unaggr(sc, bf);
5105		return;
5106	}
5107
5108	/* Success? Complete */
5109	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: TID=%d, seqno %d\n",
5110	    __func__, tid, SEQNO(bf->bf_state.bfs_seqno));
5111	if (bf->bf_state.bfs_dobaw) {
5112		ath_tx_update_baw(sc, an, atid, bf);
5113		bf->bf_state.bfs_dobaw = 0;
5114		if (! bf->bf_state.bfs_addedbaw)
5115			device_printf(sc->sc_dev,
5116			    "%s: wasn't added: seqno %d\n",
5117			    __func__, SEQNO(bf->bf_state.bfs_seqno));
5118	}
5119
5120	/*
5121	 * If the queue is filtered, re-schedule as required.
5122	 *
5123	 * This is required as there may be a subsequent TX descriptor
5124	 * for this end-node that has CLRDMASK set, so it's quite possible
5125	 * that a filtered frame will be followed by a non-filtered
5126	 * (complete or otherwise) frame.
5127	 *
5128	 * XXX should we do this before we complete the frame?
5129	 */
5130	if (atid->isfiltered)
5131		ath_tx_tid_filt_comp_complete(sc, atid);
5132
5133	/*
5134	 * Send BAR if required
5135	 */
5136	if (ath_tx_tid_bar_tx_ready(sc, atid))
5137		ath_tx_tid_bar_tx(sc, atid);
5138
5139	ATH_TX_UNLOCK(sc);
5140
5141	ath_tx_default_comp(sc, bf, fail);
5142	/* bf is freed at this point */
5143}
5144
5145void
5146ath_tx_aggr_comp(struct ath_softc *sc, struct ath_buf *bf, int fail)
5147{
5148	if (bf->bf_state.bfs_aggr)
5149		ath_tx_aggr_comp_aggr(sc, bf, fail);
5150	else
5151		ath_tx_aggr_comp_unaggr(sc, bf, fail);
5152}
5153
5154/*
5155 * Schedule some packets from the given node/TID to the hardware.
5156 *
5157 * This is the aggregate version.
5158 */
5159void
5160ath_tx_tid_hw_queue_aggr(struct ath_softc *sc, struct ath_node *an,
5161    struct ath_tid *tid)
5162{
5163	struct ath_buf *bf;
5164	struct ath_txq *txq = sc->sc_ac2q[tid->ac];
5165	struct ieee80211_tx_ampdu *tap;
5166	ATH_AGGR_STATUS status;
5167	ath_bufhead bf_q;
5168
5169	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: tid=%d\n", __func__, tid->tid);
5170	ATH_TX_LOCK_ASSERT(sc);
5171
5172	/*
5173	 * XXX TODO: If we're called for a queue that we're leaking frames to,
5174	 * ensure we only leak one.
5175	 */
5176
5177	tap = ath_tx_get_tx_tid(an, tid->tid);
5178
5179	if (tid->tid == IEEE80211_NONQOS_TID)
5180		device_printf(sc->sc_dev, "%s: called for TID=NONQOS_TID?\n",
5181		    __func__);
5182
5183	for (;;) {
5184		status = ATH_AGGR_DONE;
5185
5186		/*
5187		 * If the upper layer has paused the TID, don't
5188		 * queue any further packets.
5189		 *
5190		 * This can also occur from the completion task because
5191		 * of packet loss; but as its serialised with this code,
5192		 * it won't "appear" half way through queuing packets.
5193		 */
5194		if (! ath_tx_tid_can_tx_or_sched(sc, tid))
5195			break;
5196
5197		bf = ATH_TID_FIRST(tid);
5198		if (bf == NULL) {
5199			break;
5200		}
5201
5202		/*
5203		 * If the packet doesn't fall within the BAW (eg a NULL
5204		 * data frame), schedule it directly; continue.
5205		 */
5206		if (! bf->bf_state.bfs_dobaw) {
5207			DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
5208			    "%s: non-baw packet\n",
5209			    __func__);
5210			ATH_TID_REMOVE(tid, bf, bf_list);
5211
5212			if (bf->bf_state.bfs_nframes > 1)
5213				device_printf(sc->sc_dev,
5214				    "%s: aggr=%d, nframes=%d\n",
5215				    __func__,
5216				    bf->bf_state.bfs_aggr,
5217				    bf->bf_state.bfs_nframes);
5218
5219			/*
5220			 * This shouldn't happen - such frames shouldn't
5221			 * ever have been queued as an aggregate in the
5222			 * first place.  However, make sure the fields
5223			 * are correctly setup just to be totally sure.
5224			 */
5225			bf->bf_state.bfs_aggr = 0;
5226			bf->bf_state.bfs_nframes = 1;
5227
5228			/* Update CLRDMASK just before this frame is queued */
5229			ath_tx_update_clrdmask(sc, tid, bf);
5230
5231			ath_tx_do_ratelookup(sc, bf);
5232			ath_tx_calc_duration(sc, bf);
5233			ath_tx_calc_protection(sc, bf);
5234			ath_tx_set_rtscts(sc, bf);
5235			ath_tx_rate_fill_rcflags(sc, bf);
5236			ath_tx_setds(sc, bf);
5237			ath_hal_clr11n_aggr(sc->sc_ah, bf->bf_desc);
5238
5239			sc->sc_aggr_stats.aggr_nonbaw_pkt++;
5240
5241			/* Queue the packet; continue */
5242			goto queuepkt;
5243		}
5244
5245		TAILQ_INIT(&bf_q);
5246
5247		/*
5248		 * Do a rate control lookup on the first frame in the
5249		 * list. The rate control code needs that to occur
5250		 * before it can determine whether to TX.
5251		 * It's inaccurate because the rate control code doesn't
5252		 * really "do" aggregate lookups, so it only considers
5253		 * the size of the first frame.
5254		 */
5255		ath_tx_do_ratelookup(sc, bf);
5256		bf->bf_state.bfs_rc[3].rix = 0;
5257		bf->bf_state.bfs_rc[3].tries = 0;
5258
5259		ath_tx_calc_duration(sc, bf);
5260		ath_tx_calc_protection(sc, bf);
5261
5262		ath_tx_set_rtscts(sc, bf);
5263		ath_tx_rate_fill_rcflags(sc, bf);
5264
5265		status = ath_tx_form_aggr(sc, an, tid, &bf_q);
5266
5267		DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
5268		    "%s: ath_tx_form_aggr() status=%d\n", __func__, status);
5269
5270		/*
5271		 * No frames to be picked up - out of BAW
5272		 */
5273		if (TAILQ_EMPTY(&bf_q))
5274			break;
5275
5276		/*
5277		 * This assumes that the descriptor list in the ath_bufhead
5278		 * are already linked together via bf_next pointers.
5279		 */
5280		bf = TAILQ_FIRST(&bf_q);
5281
5282		if (status == ATH_AGGR_8K_LIMITED)
5283			sc->sc_aggr_stats.aggr_rts_aggr_limited++;
5284
5285		/*
5286		 * If it's the only frame send as non-aggregate
5287		 * assume that ath_tx_form_aggr() has checked
5288		 * whether it's in the BAW and added it appropriately.
5289		 */
5290		if (bf->bf_state.bfs_nframes == 1) {
5291			DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
5292			    "%s: single-frame aggregate\n", __func__);
5293
5294			/* Update CLRDMASK just before this frame is queued */
5295			ath_tx_update_clrdmask(sc, tid, bf);
5296
5297			bf->bf_state.bfs_aggr = 0;
5298			bf->bf_state.bfs_ndelim = 0;
5299			ath_tx_setds(sc, bf);
5300			ath_hal_clr11n_aggr(sc->sc_ah, bf->bf_desc);
5301			if (status == ATH_AGGR_BAW_CLOSED)
5302				sc->sc_aggr_stats.aggr_baw_closed_single_pkt++;
5303			else
5304				sc->sc_aggr_stats.aggr_single_pkt++;
5305		} else {
5306			DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
5307			    "%s: multi-frame aggregate: %d frames, "
5308			    "length %d\n",
5309			     __func__, bf->bf_state.bfs_nframes,
5310			    bf->bf_state.bfs_al);
5311			bf->bf_state.bfs_aggr = 1;
5312			sc->sc_aggr_stats.aggr_pkts[bf->bf_state.bfs_nframes]++;
5313			sc->sc_aggr_stats.aggr_aggr_pkt++;
5314
5315			/* Update CLRDMASK just before this frame is queued */
5316			ath_tx_update_clrdmask(sc, tid, bf);
5317
5318			/*
5319			 * Calculate the duration/protection as required.
5320			 */
5321			ath_tx_calc_duration(sc, bf);
5322			ath_tx_calc_protection(sc, bf);
5323
5324			/*
5325			 * Update the rate and rtscts information based on the
5326			 * rate decision made by the rate control code;
5327			 * the first frame in the aggregate needs it.
5328			 */
5329			ath_tx_set_rtscts(sc, bf);
5330
5331			/*
5332			 * Setup the relevant descriptor fields
5333			 * for aggregation. The first descriptor
5334			 * already points to the rest in the chain.
5335			 */
5336			ath_tx_setds_11n(sc, bf);
5337
5338		}
5339	queuepkt:
5340		/* Set completion handler, multi-frame aggregate or not */
5341		bf->bf_comp = ath_tx_aggr_comp;
5342
5343		if (bf->bf_state.bfs_tid == IEEE80211_NONQOS_TID)
5344		    device_printf(sc->sc_dev, "%s: TID=16?\n", __func__);
5345
5346		/*
5347		 * Update leak count and frame config if were leaking frames.
5348		 *
5349		 * XXX TODO: it should update all frames in an aggregate
5350		 * correctly!
5351		 */
5352		ath_tx_leak_count_update(sc, tid, bf);
5353
5354		/* Punt to txq */
5355		ath_tx_handoff(sc, txq, bf);
5356
5357		/* Track outstanding buffer count to hardware */
5358		/* aggregates are "one" buffer */
5359		tid->hwq_depth++;
5360
5361		/*
5362		 * Break out if ath_tx_form_aggr() indicated
5363		 * there can't be any further progress (eg BAW is full.)
5364		 * Checking for an empty txq is done above.
5365		 *
5366		 * XXX locking on txq here?
5367		 */
5368		/* XXX TXQ locking */
5369		if (txq->axq_aggr_depth >= sc->sc_hwq_limit_aggr ||
5370		    (status == ATH_AGGR_BAW_CLOSED ||
5371		     status == ATH_AGGR_LEAK_CLOSED))
5372			break;
5373	}
5374}
5375
5376/*
5377 * Schedule some packets from the given node/TID to the hardware.
5378 *
5379 * XXX TODO: this routine doesn't enforce the maximum TXQ depth.
5380 * It just dumps frames into the TXQ.  We should limit how deep
5381 * the transmit queue can grow for frames dispatched to the given
5382 * TXQ.
5383 *
5384 * To avoid locking issues, either we need to own the TXQ lock
5385 * at this point, or we need to pass in the maximum frame count
5386 * from the caller.
5387 */
5388void
5389ath_tx_tid_hw_queue_norm(struct ath_softc *sc, struct ath_node *an,
5390    struct ath_tid *tid)
5391{
5392	struct ath_buf *bf;
5393	struct ath_txq *txq = sc->sc_ac2q[tid->ac];
5394
5395	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: node %p: TID %d: called\n",
5396	    __func__, an, tid->tid);
5397
5398	ATH_TX_LOCK_ASSERT(sc);
5399
5400	/* Check - is AMPDU pending or running? then print out something */
5401	if (ath_tx_ampdu_pending(sc, an, tid->tid))
5402		device_printf(sc->sc_dev, "%s: tid=%d, ampdu pending?\n",
5403		    __func__, tid->tid);
5404	if (ath_tx_ampdu_running(sc, an, tid->tid))
5405		device_printf(sc->sc_dev, "%s: tid=%d, ampdu running?\n",
5406		    __func__, tid->tid);
5407
5408	for (;;) {
5409
5410		/*
5411		 * If the upper layers have paused the TID, don't
5412		 * queue any further packets.
5413		 *
5414		 * XXX if we are leaking frames, make sure we decrement
5415		 * that counter _and_ we continue here.
5416		 */
5417		if (! ath_tx_tid_can_tx_or_sched(sc, tid))
5418			break;
5419
5420		bf = ATH_TID_FIRST(tid);
5421		if (bf == NULL) {
5422			break;
5423		}
5424
5425		ATH_TID_REMOVE(tid, bf, bf_list);
5426
5427		/* Sanity check! */
5428		if (tid->tid != bf->bf_state.bfs_tid) {
5429			device_printf(sc->sc_dev, "%s: bfs_tid %d !="
5430			    " tid %d\n",
5431			    __func__, bf->bf_state.bfs_tid, tid->tid);
5432		}
5433		/* Normal completion handler */
5434		bf->bf_comp = ath_tx_normal_comp;
5435
5436		/*
5437		 * Override this for now, until the non-aggregate
5438		 * completion handler correctly handles software retransmits.
5439		 */
5440		bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
5441
5442		/* Update CLRDMASK just before this frame is queued */
5443		ath_tx_update_clrdmask(sc, tid, bf);
5444
5445		/* Program descriptors + rate control */
5446		ath_tx_do_ratelookup(sc, bf);
5447		ath_tx_calc_duration(sc, bf);
5448		ath_tx_calc_protection(sc, bf);
5449		ath_tx_set_rtscts(sc, bf);
5450		ath_tx_rate_fill_rcflags(sc, bf);
5451		ath_tx_setds(sc, bf);
5452
5453		/*
5454		 * Update the current leak count if
5455		 * we're leaking frames; and set the
5456		 * MORE flag as appropriate.
5457		 */
5458		ath_tx_leak_count_update(sc, tid, bf);
5459
5460		/* Track outstanding buffer count to hardware */
5461		/* aggregates are "one" buffer */
5462		tid->hwq_depth++;
5463
5464		/* Punt to hardware or software txq */
5465		ath_tx_handoff(sc, txq, bf);
5466	}
5467}
5468
5469/*
5470 * Schedule some packets to the given hardware queue.
5471 *
5472 * This function walks the list of TIDs (ie, ath_node TIDs
5473 * with queued traffic) and attempts to schedule traffic
5474 * from them.
5475 *
5476 * TID scheduling is implemented as a FIFO, with TIDs being
5477 * added to the end of the queue after some frames have been
5478 * scheduled.
5479 */
5480void
5481ath_txq_sched(struct ath_softc *sc, struct ath_txq *txq)
5482{
5483	struct ath_tid *tid, *next, *last;
5484
5485	ATH_TX_LOCK_ASSERT(sc);
5486
5487	/*
5488	 * Don't schedule if the hardware queue is busy.
5489	 * This (hopefully) gives some more time to aggregate
5490	 * some packets in the aggregation queue.
5491	 *
5492	 * XXX It doesn't stop a parallel sender from sneaking
5493	 * in transmitting a frame!
5494	 */
5495	/* XXX TXQ locking */
5496	if (txq->axq_aggr_depth + txq->fifo.axq_depth >= sc->sc_hwq_limit_aggr) {
5497		sc->sc_aggr_stats.aggr_sched_nopkt++;
5498		return;
5499	}
5500	if (txq->axq_depth >= sc->sc_hwq_limit_nonaggr) {
5501		sc->sc_aggr_stats.aggr_sched_nopkt++;
5502		return;
5503	}
5504
5505	last = TAILQ_LAST(&txq->axq_tidq, axq_t_s);
5506
5507	TAILQ_FOREACH_SAFE(tid, &txq->axq_tidq, axq_qelem, next) {
5508		/*
5509		 * Suspend paused queues here; they'll be resumed
5510		 * once the addba completes or times out.
5511		 */
5512		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: tid=%d, paused=%d\n",
5513		    __func__, tid->tid, tid->paused);
5514		ath_tx_tid_unsched(sc, tid);
5515		/*
5516		 * This node may be in power-save and we're leaking
5517		 * a frame; be careful.
5518		 */
5519		if (! ath_tx_tid_can_tx_or_sched(sc, tid)) {
5520			continue;
5521		}
5522		if (ath_tx_ampdu_running(sc, tid->an, tid->tid))
5523			ath_tx_tid_hw_queue_aggr(sc, tid->an, tid);
5524		else
5525			ath_tx_tid_hw_queue_norm(sc, tid->an, tid);
5526
5527		/* Not empty? Re-schedule */
5528		if (tid->axq_depth != 0)
5529			ath_tx_tid_sched(sc, tid);
5530
5531		/*
5532		 * Give the software queue time to aggregate more
5533		 * packets.  If we aren't running aggregation then
5534		 * we should still limit the hardware queue depth.
5535		 */
5536		/* XXX TXQ locking */
5537		if (txq->axq_aggr_depth + txq->fifo.axq_depth >= sc->sc_hwq_limit_aggr) {
5538			break;
5539		}
5540		if (txq->axq_depth >= sc->sc_hwq_limit_nonaggr) {
5541			break;
5542		}
5543
5544		/*
5545		 * If this was the last entry on the original list, stop.
5546		 * Otherwise nodes that have been rescheduled onto the end
5547		 * of the TID FIFO list will just keep being rescheduled.
5548		 *
5549		 * XXX What should we do about nodes that were paused
5550		 * but are pending a leaking frame in response to a ps-poll?
5551		 * They'll be put at the front of the list; so they'll
5552		 * prematurely trigger this condition! Ew.
5553		 */
5554		if (tid == last)
5555			break;
5556	}
5557}
5558
5559/*
5560 * TX addba handling
5561 */
5562
5563/*
5564 * Return net80211 TID struct pointer, or NULL for none
5565 */
5566struct ieee80211_tx_ampdu *
5567ath_tx_get_tx_tid(struct ath_node *an, int tid)
5568{
5569	struct ieee80211_node *ni = &an->an_node;
5570	struct ieee80211_tx_ampdu *tap;
5571
5572	if (tid == IEEE80211_NONQOS_TID)
5573		return NULL;
5574
5575	tap = &ni->ni_tx_ampdu[tid];
5576	return tap;
5577}
5578
5579/*
5580 * Is AMPDU-TX running?
5581 */
5582static int
5583ath_tx_ampdu_running(struct ath_softc *sc, struct ath_node *an, int tid)
5584{
5585	struct ieee80211_tx_ampdu *tap;
5586
5587	if (tid == IEEE80211_NONQOS_TID)
5588		return 0;
5589
5590	tap = ath_tx_get_tx_tid(an, tid);
5591	if (tap == NULL)
5592		return 0;	/* Not valid; default to not running */
5593
5594	return !! (tap->txa_flags & IEEE80211_AGGR_RUNNING);
5595}
5596
5597/*
5598 * Is AMPDU-TX negotiation pending?
5599 */
5600static int
5601ath_tx_ampdu_pending(struct ath_softc *sc, struct ath_node *an, int tid)
5602{
5603	struct ieee80211_tx_ampdu *tap;
5604
5605	if (tid == IEEE80211_NONQOS_TID)
5606		return 0;
5607
5608	tap = ath_tx_get_tx_tid(an, tid);
5609	if (tap == NULL)
5610		return 0;	/* Not valid; default to not pending */
5611
5612	return !! (tap->txa_flags & IEEE80211_AGGR_XCHGPEND);
5613}
5614
5615/*
5616 * Is AMPDU-TX pending for the given TID?
5617 */
5618
5619
5620/*
5621 * Method to handle sending an ADDBA request.
5622 *
5623 * We tap this so the relevant flags can be set to pause the TID
5624 * whilst waiting for the response.
5625 *
5626 * XXX there's no timeout handler we can override?
5627 */
5628int
5629ath_addba_request(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap,
5630    int dialogtoken, int baparamset, int batimeout)
5631{
5632	struct ath_softc *sc = ni->ni_ic->ic_ifp->if_softc;
5633	int tid = tap->txa_tid;
5634	struct ath_node *an = ATH_NODE(ni);
5635	struct ath_tid *atid = &an->an_tid[tid];
5636
5637	/*
5638	 * XXX danger Will Robinson!
5639	 *
5640	 * Although the taskqueue may be running and scheduling some more
5641	 * packets, these should all be _before_ the addba sequence number.
5642	 * However, net80211 will keep self-assigning sequence numbers
5643	 * until addba has been negotiated.
5644	 *
5645	 * In the past, these packets would be "paused" (which still works
5646	 * fine, as they're being scheduled to the driver in the same
5647	 * serialised method which is calling the addba request routine)
5648	 * and when the aggregation session begins, they'll be dequeued
5649	 * as aggregate packets and added to the BAW. However, now there's
5650	 * a "bf->bf_state.bfs_dobaw" flag, and this isn't set for these
5651	 * packets. Thus they never get included in the BAW tracking and
5652	 * this can cause the initial burst of packets after the addba
5653	 * negotiation to "hang", as they quickly fall outside the BAW.
5654	 *
5655	 * The "eventual" solution should be to tag these packets with
5656	 * dobaw. Although net80211 has given us a sequence number,
5657	 * it'll be "after" the left edge of the BAW and thus it'll
5658	 * fall within it.
5659	 */
5660	ATH_TX_LOCK(sc);
5661	/*
5662	 * This is a bit annoying.  Until net80211 HT code inherits some
5663	 * (any) locking, we may have this called in parallel BUT only
5664	 * one response/timeout will be called.  Grr.
5665	 */
5666	if (atid->addba_tx_pending == 0) {
5667		ath_tx_tid_pause(sc, atid);
5668		atid->addba_tx_pending = 1;
5669	}
5670	ATH_TX_UNLOCK(sc);
5671
5672	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
5673	    "%s: %6D: called; dialogtoken=%d, baparamset=%d, batimeout=%d\n",
5674	    __func__,
5675	    ni->ni_macaddr,
5676	    ":",
5677	    dialogtoken, baparamset, batimeout);
5678	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
5679	    "%s: txa_start=%d, ni_txseqs=%d\n",
5680	    __func__, tap->txa_start, ni->ni_txseqs[tid]);
5681
5682	return sc->sc_addba_request(ni, tap, dialogtoken, baparamset,
5683	    batimeout);
5684}
5685
5686/*
5687 * Handle an ADDBA response.
5688 *
5689 * We unpause the queue so TX'ing can resume.
5690 *
5691 * Any packets TX'ed from this point should be "aggregate" (whether
5692 * aggregate or not) so the BAW is updated.
5693 *
5694 * Note! net80211 keeps self-assigning sequence numbers until
5695 * ampdu is negotiated. This means the initially-negotiated BAW left
5696 * edge won't match the ni->ni_txseq.
5697 *
5698 * So, being very dirty, the BAW left edge is "slid" here to match
5699 * ni->ni_txseq.
5700 *
5701 * What likely SHOULD happen is that all packets subsequent to the
5702 * addba request should be tagged as aggregate and queued as non-aggregate
5703 * frames; thus updating the BAW. For now though, I'll just slide the
5704 * window.
5705 */
5706int
5707ath_addba_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap,
5708    int status, int code, int batimeout)
5709{
5710	struct ath_softc *sc = ni->ni_ic->ic_ifp->if_softc;
5711	int tid = tap->txa_tid;
5712	struct ath_node *an = ATH_NODE(ni);
5713	struct ath_tid *atid = &an->an_tid[tid];
5714	int r;
5715
5716	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
5717	    "%s: %6D: called; status=%d, code=%d, batimeout=%d\n", __func__,
5718	    ni->ni_macaddr,
5719	    ":",
5720	    status, code, batimeout);
5721
5722	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
5723	    "%s: txa_start=%d, ni_txseqs=%d\n",
5724	    __func__, tap->txa_start, ni->ni_txseqs[tid]);
5725
5726	/*
5727	 * Call this first, so the interface flags get updated
5728	 * before the TID is unpaused. Otherwise a race condition
5729	 * exists where the unpaused TID still doesn't yet have
5730	 * IEEE80211_AGGR_RUNNING set.
5731	 */
5732	r = sc->sc_addba_response(ni, tap, status, code, batimeout);
5733
5734	ATH_TX_LOCK(sc);
5735	atid->addba_tx_pending = 0;
5736	/*
5737	 * XXX dirty!
5738	 * Slide the BAW left edge to wherever net80211 left it for us.
5739	 * Read above for more information.
5740	 */
5741	tap->txa_start = ni->ni_txseqs[tid];
5742	ath_tx_tid_resume(sc, atid);
5743	ATH_TX_UNLOCK(sc);
5744	return r;
5745}
5746
5747
5748/*
5749 * Stop ADDBA on a queue.
5750 *
5751 * This can be called whilst BAR TX is currently active on the queue,
5752 * so make sure this is unblocked before continuing.
5753 */
5754void
5755ath_addba_stop(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap)
5756{
5757	struct ath_softc *sc = ni->ni_ic->ic_ifp->if_softc;
5758	int tid = tap->txa_tid;
5759	struct ath_node *an = ATH_NODE(ni);
5760	struct ath_tid *atid = &an->an_tid[tid];
5761	ath_bufhead bf_cq;
5762	struct ath_buf *bf;
5763
5764	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, "%s: %6D: called\n",
5765	    __func__,
5766	    ni->ni_macaddr,
5767	    ":");
5768
5769	/*
5770	 * Pause TID traffic early, so there aren't any races
5771	 * Unblock the pending BAR held traffic, if it's currently paused.
5772	 */
5773	ATH_TX_LOCK(sc);
5774	ath_tx_tid_pause(sc, atid);
5775	if (atid->bar_wait) {
5776		/*
5777		 * bar_unsuspend() expects bar_tx == 1, as it should be
5778		 * called from the TX completion path.  This quietens
5779		 * the warning.  It's cleared for us anyway.
5780		 */
5781		atid->bar_tx = 1;
5782		ath_tx_tid_bar_unsuspend(sc, atid);
5783	}
5784	ATH_TX_UNLOCK(sc);
5785
5786	/* There's no need to hold the TXQ lock here */
5787	sc->sc_addba_stop(ni, tap);
5788
5789	/*
5790	 * ath_tx_tid_cleanup will resume the TID if possible, otherwise
5791	 * it'll set the cleanup flag, and it'll be unpaused once
5792	 * things have been cleaned up.
5793	 */
5794	TAILQ_INIT(&bf_cq);
5795	ATH_TX_LOCK(sc);
5796	ath_tx_tid_cleanup(sc, an, tid, &bf_cq);
5797	/*
5798	 * Unpause the TID if no cleanup is required.
5799	 */
5800	if (! atid->cleanup_inprogress)
5801		ath_tx_tid_resume(sc, atid);
5802	ATH_TX_UNLOCK(sc);
5803
5804	/* Handle completing frames and fail them */
5805	while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) {
5806		TAILQ_REMOVE(&bf_cq, bf, bf_list);
5807		ath_tx_default_comp(sc, bf, 1);
5808	}
5809
5810}
5811
5812/*
5813 * Handle a node reassociation.
5814 *
5815 * We may have a bunch of frames queued to the hardware; those need
5816 * to be marked as cleanup.
5817 */
5818void
5819ath_tx_node_reassoc(struct ath_softc *sc, struct ath_node *an)
5820{
5821	struct ath_tid *tid;
5822	int i;
5823	ath_bufhead bf_cq;
5824	struct ath_buf *bf;
5825
5826	TAILQ_INIT(&bf_cq);
5827
5828	ATH_TX_UNLOCK_ASSERT(sc);
5829
5830	ATH_TX_LOCK(sc);
5831	for (i = 0; i < IEEE80211_TID_SIZE; i++) {
5832		tid = &an->an_tid[i];
5833		if (tid->hwq_depth == 0)
5834			continue;
5835		ath_tx_tid_pause(sc, tid);
5836		DPRINTF(sc, ATH_DEBUG_NODE,
5837		    "%s: %6D: TID %d: cleaning up TID\n",
5838		    __func__,
5839		    an->an_node.ni_macaddr,
5840		    ":",
5841		    i);
5842		ath_tx_tid_cleanup(sc, an, i, &bf_cq);
5843		/*
5844		 * Unpause the TID if no cleanup is required.
5845		 */
5846		if (! tid->cleanup_inprogress)
5847			ath_tx_tid_resume(sc, tid);
5848	}
5849	ATH_TX_UNLOCK(sc);
5850
5851	/* Handle completing frames and fail them */
5852	while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) {
5853		TAILQ_REMOVE(&bf_cq, bf, bf_list);
5854		ath_tx_default_comp(sc, bf, 1);
5855	}
5856}
5857
5858/*
5859 * Note: net80211 bar_timeout() doesn't call this function on BAR failure;
5860 * it simply tears down the aggregation session. Ew.
5861 *
5862 * It however will call ieee80211_ampdu_stop() which will call
5863 * ic->ic_addba_stop().
5864 *
5865 * XXX This uses a hard-coded max BAR count value; the whole
5866 * XXX BAR TX success or failure should be better handled!
5867 */
5868void
5869ath_bar_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap,
5870    int status)
5871{
5872	struct ath_softc *sc = ni->ni_ic->ic_ifp->if_softc;
5873	int tid = tap->txa_tid;
5874	struct ath_node *an = ATH_NODE(ni);
5875	struct ath_tid *atid = &an->an_tid[tid];
5876	int attempts = tap->txa_attempts;
5877
5878	DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
5879	    "%s: %6D: called; txa_tid=%d, atid->tid=%d, status=%d, attempts=%d\n",
5880	    __func__,
5881	    ni->ni_macaddr,
5882	    ":",
5883	    tap->txa_tid,
5884	    atid->tid,
5885	    status,
5886	    attempts);
5887
5888	/* Note: This may update the BAW details */
5889	sc->sc_bar_response(ni, tap, status);
5890
5891	/* Unpause the TID */
5892	/*
5893	 * XXX if this is attempt=50, the TID will be downgraded
5894	 * XXX to a non-aggregate session. So we must unpause the
5895	 * XXX TID here or it'll never be done.
5896	 *
5897	 * Also, don't call it if bar_tx/bar_wait are 0; something
5898	 * has beaten us to the punch? (XXX figure out what?)
5899	 */
5900	if (status == 0 || attempts == 50) {
5901		ATH_TX_LOCK(sc);
5902		if (atid->bar_tx == 0 || atid->bar_wait == 0)
5903			device_printf(sc->sc_dev,
5904			    "%s: huh? bar_tx=%d, bar_wait=%d\n",
5905			    __func__,
5906			    atid->bar_tx, atid->bar_wait);
5907		else
5908			ath_tx_tid_bar_unsuspend(sc, atid);
5909		ATH_TX_UNLOCK(sc);
5910	}
5911}
5912
5913/*
5914 * This is called whenever the pending ADDBA request times out.
5915 * Unpause and reschedule the TID.
5916 */
5917void
5918ath_addba_response_timeout(struct ieee80211_node *ni,
5919    struct ieee80211_tx_ampdu *tap)
5920{
5921	struct ath_softc *sc = ni->ni_ic->ic_ifp->if_softc;
5922	int tid = tap->txa_tid;
5923	struct ath_node *an = ATH_NODE(ni);
5924	struct ath_tid *atid = &an->an_tid[tid];
5925
5926	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
5927	    "%s: %6D: TID=%d, called; resuming\n",
5928	    __func__,
5929	    ni->ni_macaddr,
5930	    ":",
5931	    tid);
5932
5933	ATH_TX_LOCK(sc);
5934	atid->addba_tx_pending = 0;
5935	ATH_TX_UNLOCK(sc);
5936
5937	/* Note: This updates the aggregate state to (again) pending */
5938	sc->sc_addba_response_timeout(ni, tap);
5939
5940	/* Unpause the TID; which reschedules it */
5941	ATH_TX_LOCK(sc);
5942	ath_tx_tid_resume(sc, atid);
5943	ATH_TX_UNLOCK(sc);
5944}
5945
5946/*
5947 * Check if a node is asleep or not.
5948 */
5949int
5950ath_tx_node_is_asleep(struct ath_softc *sc, struct ath_node *an)
5951{
5952
5953	ATH_TX_LOCK_ASSERT(sc);
5954
5955	return (an->an_is_powersave);
5956}
5957
5958/*
5959 * Mark a node as currently "in powersaving."
5960 * This suspends all traffic on the node.
5961 *
5962 * This must be called with the node/tx locks free.
5963 *
5964 * XXX TODO: the locking silliness below is due to how the node
5965 * locking currently works.  Right now, the node lock is grabbed
5966 * to do rate control lookups and these are done with the TX
5967 * queue lock held.  This means the node lock can't be grabbed
5968 * first here or a LOR will occur.
5969 *
5970 * Eventually (hopefully!) the TX path code will only grab
5971 * the TXQ lock when transmitting and the ath_node lock when
5972 * doing node/TID operations.  There are other complications -
5973 * the sched/unsched operations involve walking the per-txq
5974 * 'active tid' list and this requires both locks to be held.
5975 */
5976void
5977ath_tx_node_sleep(struct ath_softc *sc, struct ath_node *an)
5978{
5979	struct ath_tid *atid;
5980	struct ath_txq *txq;
5981	int tid;
5982
5983	ATH_TX_UNLOCK_ASSERT(sc);
5984
5985	/* Suspend all traffic on the node */
5986	ATH_TX_LOCK(sc);
5987
5988	if (an->an_is_powersave) {
5989		device_printf(sc->sc_dev,
5990		    "%s: %6D: node was already asleep!\n",
5991		    __func__,
5992		    an->an_node.ni_macaddr,
5993		    ":");
5994		ATH_TX_UNLOCK(sc);
5995		return;
5996	}
5997
5998	for (tid = 0; tid < IEEE80211_TID_SIZE; tid++) {
5999		atid = &an->an_tid[tid];
6000		txq = sc->sc_ac2q[atid->ac];
6001
6002		ath_tx_tid_pause(sc, atid);
6003	}
6004
6005	/* Mark node as in powersaving */
6006	an->an_is_powersave = 1;
6007
6008	ATH_TX_UNLOCK(sc);
6009}
6010
6011/*
6012 * Mark a node as currently "awake."
6013 * This resumes all traffic to the node.
6014 */
6015void
6016ath_tx_node_wakeup(struct ath_softc *sc, struct ath_node *an)
6017{
6018	struct ath_tid *atid;
6019	struct ath_txq *txq;
6020	int tid;
6021
6022	ATH_TX_UNLOCK_ASSERT(sc);
6023
6024	ATH_TX_LOCK(sc);
6025
6026	/* !? */
6027	if (an->an_is_powersave == 0) {
6028		ATH_TX_UNLOCK(sc);
6029		device_printf(sc->sc_dev,
6030		    "%s: an=%p: node was already awake\n",
6031		    __func__, an);
6032		return;
6033	}
6034
6035	/* Mark node as awake */
6036	an->an_is_powersave = 0;
6037	/*
6038	 * Clear any pending leaked frame requests
6039	 */
6040	an->an_leak_count = 0;
6041
6042	for (tid = 0; tid < IEEE80211_TID_SIZE; tid++) {
6043		atid = &an->an_tid[tid];
6044		txq = sc->sc_ac2q[atid->ac];
6045
6046		ath_tx_tid_resume(sc, atid);
6047	}
6048	ATH_TX_UNLOCK(sc);
6049}
6050
6051static int
6052ath_legacy_dma_txsetup(struct ath_softc *sc)
6053{
6054
6055	/* nothing new needed */
6056	return (0);
6057}
6058
6059static int
6060ath_legacy_dma_txteardown(struct ath_softc *sc)
6061{
6062
6063	/* nothing new needed */
6064	return (0);
6065}
6066
6067void
6068ath_xmit_setup_legacy(struct ath_softc *sc)
6069{
6070	/*
6071	 * For now, just set the descriptor length to sizeof(ath_desc);
6072	 * worry about extracting the real length out of the HAL later.
6073	 */
6074	sc->sc_tx_desclen = sizeof(struct ath_desc);
6075	sc->sc_tx_statuslen = sizeof(struct ath_desc);
6076	sc->sc_tx_nmaps = 1;	/* only one buffer per TX desc */
6077
6078	sc->sc_tx.xmit_setup = ath_legacy_dma_txsetup;
6079	sc->sc_tx.xmit_teardown = ath_legacy_dma_txteardown;
6080	sc->sc_tx.xmit_attach_comp_func = ath_legacy_attach_comp_func;
6081
6082	sc->sc_tx.xmit_dma_restart = ath_legacy_tx_dma_restart;
6083	sc->sc_tx.xmit_handoff = ath_legacy_xmit_handoff;
6084
6085	sc->sc_tx.xmit_drain = ath_legacy_tx_drain;
6086}
6087