t4_cpl_io.c revision 309108
1/*-
2 * Copyright (c) 2012 Chelsio Communications, Inc.
3 * All rights reserved.
4 * Written by: Navdeep Parhar <np@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/10/sys/dev/cxgbe/tom/t4_cpl_io.c 309108 2016-11-24 14:48:46Z jch $");
30
31#include "opt_inet.h"
32
33#ifdef TCP_OFFLOAD
34#include <sys/param.h>
35#include <sys/types.h>
36#include <sys/kernel.h>
37#include <sys/ktr.h>
38#include <sys/module.h>
39#include <sys/protosw.h>
40#include <sys/domain.h>
41#include <sys/socket.h>
42#include <sys/socketvar.h>
43#include <sys/sglist.h>
44#include <netinet/in.h>
45#include <netinet/in_pcb.h>
46#include <netinet/ip.h>
47#include <netinet/ip6.h>
48#include <netinet/tcp_var.h>
49#define TCPSTATES
50#include <netinet/tcp_fsm.h>
51#include <netinet/tcp_seq.h>
52#include <netinet/toecore.h>
53
54#include "common/common.h"
55#include "common/t4_msg.h"
56#include "common/t4_regs.h"
57#include "common/t4_tcb.h"
58#include "tom/t4_tom_l2t.h"
59#include "tom/t4_tom.h"
60
61VNET_DECLARE(int, tcp_do_autosndbuf);
62#define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf)
63VNET_DECLARE(int, tcp_autosndbuf_inc);
64#define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc)
65VNET_DECLARE(int, tcp_autosndbuf_max);
66#define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max)
67VNET_DECLARE(int, tcp_do_autorcvbuf);
68#define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf)
69VNET_DECLARE(int, tcp_autorcvbuf_inc);
70#define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc)
71VNET_DECLARE(int, tcp_autorcvbuf_max);
72#define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max)
73
74/*
75 * For ULP connections HW may add headers, e.g., for digests, that aren't part
76 * of the messages sent by the host but that are part of the TCP payload and
77 * therefore consume TCP sequence space.  Tx connection parameters that
78 * operate in TCP sequence space are affected by the HW additions and need to
79 * compensate for them to accurately track TCP sequence numbers. This array
80 * contains the compensating extra lengths for ULP packets.  It is indexed by
81 * a packet's ULP submode.
82 */
83const unsigned int t4_ulp_extra_len[] = {0, 4, 4, 8};
84
85/*
86 * Return the length of any HW additions that will be made to a Tx packet.
87 * Such additions can happen for some types of ULP packets.
88 */
89static inline unsigned int
90ulp_extra_len(struct mbuf *m, int *ulp_mode)
91{
92	struct m_tag    *mtag;
93
94	if ((mtag = m_tag_find(m, CXGBE_ISCSI_MBUF_TAG, NULL)) == NULL)
95		return (0);
96	*ulp_mode = *((int *)(mtag + 1));
97
98	return (t4_ulp_extra_len[*ulp_mode & 3]);
99}
100
101void
102send_flowc_wr(struct toepcb *toep, struct flowc_tx_params *ftxp)
103{
104	struct wrqe *wr;
105	struct fw_flowc_wr *flowc;
106	unsigned int nparams = ftxp ? 8 : 6, flowclen;
107	struct vi_info *vi = toep->vi;
108	struct port_info *pi = vi->pi;
109	struct adapter *sc = pi->adapter;
110	unsigned int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN;
111	struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
112
113	KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT),
114	    ("%s: flowc for tid %u sent already", __func__, toep->tid));
115
116	flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval);
117
118	wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq);
119	if (wr == NULL) {
120		/* XXX */
121		panic("%s: allocation failure.", __func__);
122	}
123	flowc = wrtod(wr);
124	memset(flowc, 0, wr->wr_len);
125
126	flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
127	    V_FW_FLOWC_WR_NPARAMS(nparams));
128	flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) |
129	    V_FW_WR_FLOWID(toep->tid));
130
131	flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
132	flowc->mnemval[0].val = htobe32(pfvf);
133	flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
134	flowc->mnemval[1].val = htobe32(pi->tx_chan);
135	flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
136	flowc->mnemval[2].val = htobe32(pi->tx_chan);
137	flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
138	flowc->mnemval[3].val = htobe32(toep->ofld_rxq->iq.abs_id);
139	if (ftxp) {
140		uint32_t sndbuf = min(ftxp->snd_space, sc->tt.sndbuf);
141
142		flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT;
143		flowc->mnemval[4].val = htobe32(ftxp->snd_nxt);
144		flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT;
145		flowc->mnemval[5].val = htobe32(ftxp->rcv_nxt);
146		flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF;
147		flowc->mnemval[6].val = htobe32(sndbuf);
148		flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
149		flowc->mnemval[7].val = htobe32(ftxp->mss);
150
151		CTR6(KTR_CXGBE,
152		    "%s: tid %u, mss %u, sndbuf %u, snd_nxt 0x%x, rcv_nxt 0x%x",
153		    __func__, toep->tid, ftxp->mss, sndbuf, ftxp->snd_nxt,
154		    ftxp->rcv_nxt);
155	} else {
156		flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF;
157		flowc->mnemval[4].val = htobe32(512);
158		flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS;
159		flowc->mnemval[5].val = htobe32(512);
160
161		CTR2(KTR_CXGBE, "%s: tid %u", __func__, toep->tid);
162	}
163
164	txsd->tx_credits = howmany(flowclen, 16);
165	txsd->plen = 0;
166	KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0,
167	    ("%s: not enough credits (%d)", __func__, toep->tx_credits));
168	toep->tx_credits -= txsd->tx_credits;
169	if (__predict_false(++toep->txsd_pidx == toep->txsd_total))
170		toep->txsd_pidx = 0;
171	toep->txsd_avail--;
172
173	toep->flags |= TPF_FLOWC_WR_SENT;
174        t4_wrq_tx(sc, wr);
175}
176
177void
178send_reset(struct adapter *sc, struct toepcb *toep, uint32_t snd_nxt)
179{
180	struct wrqe *wr;
181	struct cpl_abort_req *req;
182	int tid = toep->tid;
183	struct inpcb *inp = toep->inp;
184	struct tcpcb *tp = intotcpcb(inp);	/* don't use if INP_DROPPED */
185
186	INP_WLOCK_ASSERT(inp);
187
188	CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x%s",
189	    __func__, toep->tid,
190	    inp->inp_flags & INP_DROPPED ? "inp dropped" :
191	    tcpstates[tp->t_state],
192	    toep->flags, inp->inp_flags,
193	    toep->flags & TPF_ABORT_SHUTDOWN ?
194	    " (abort already in progress)" : "");
195
196	if (toep->flags & TPF_ABORT_SHUTDOWN)
197		return;	/* abort already in progress */
198
199	toep->flags |= TPF_ABORT_SHUTDOWN;
200
201	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
202	    ("%s: flowc_wr not sent for tid %d.", __func__, tid));
203
204	wr = alloc_wrqe(sizeof(*req), toep->ofld_txq);
205	if (wr == NULL) {
206		/* XXX */
207		panic("%s: allocation failure.", __func__);
208	}
209	req = wrtod(wr);
210
211	INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, tid);
212	if (inp->inp_flags & INP_DROPPED)
213		req->rsvd0 = htobe32(snd_nxt);
214	else
215		req->rsvd0 = htobe32(tp->snd_nxt);
216	req->rsvd1 = !(toep->flags & TPF_TX_DATA_SENT);
217	req->cmd = CPL_ABORT_SEND_RST;
218
219	/*
220	 * XXX: What's the correct way to tell that the inp hasn't been detached
221	 * from its socket?  Should I even be flushing the snd buffer here?
222	 */
223	if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) {
224		struct socket *so = inp->inp_socket;
225
226		if (so != NULL)	/* because I'm not sure.  See comment above */
227			sbflush(&so->so_snd);
228	}
229
230	t4_l2t_send(sc, wr, toep->l2te);
231}
232
233/*
234 * Called when a connection is established to translate the TCP options
235 * reported by HW to FreeBSD's native format.
236 */
237static void
238assign_rxopt(struct tcpcb *tp, unsigned int opt)
239{
240	struct toepcb *toep = tp->t_toe;
241	struct inpcb *inp = tp->t_inpcb;
242	struct adapter *sc = td_adapter(toep->td);
243	int n;
244
245	INP_LOCK_ASSERT(inp);
246
247	if (inp->inp_inc.inc_flags & INC_ISIPV6)
248		n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
249	else
250		n = sizeof(struct ip) + sizeof(struct tcphdr);
251	tp->t_maxseg = tp->t_maxopd = sc->params.mtus[G_TCPOPT_MSS(opt)] - n;
252
253	CTR4(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u)", __func__, toep->tid,
254	    G_TCPOPT_MSS(opt), sc->params.mtus[G_TCPOPT_MSS(opt)]);
255
256	if (G_TCPOPT_TSTAMP(opt)) {
257		tp->t_flags |= TF_RCVD_TSTMP;	/* timestamps ok */
258		tp->ts_recent = 0;		/* hmmm */
259		tp->ts_recent_age = tcp_ts_getticks();
260		tp->t_maxseg -= TCPOLEN_TSTAMP_APPA;
261	}
262
263	if (G_TCPOPT_SACK(opt))
264		tp->t_flags |= TF_SACK_PERMIT;	/* should already be set */
265	else
266		tp->t_flags &= ~TF_SACK_PERMIT;	/* sack disallowed by peer */
267
268	if (G_TCPOPT_WSCALE_OK(opt))
269		tp->t_flags |= TF_RCVD_SCALE;
270
271	/* Doing window scaling? */
272	if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
273	    (TF_RCVD_SCALE | TF_REQ_SCALE)) {
274		tp->rcv_scale = tp->request_r_scale;
275		tp->snd_scale = G_TCPOPT_SND_WSCALE(opt);
276	}
277}
278
279/*
280 * Completes some final bits of initialization for just established connections
281 * and changes their state to TCPS_ESTABLISHED.
282 *
283 * The ISNs are from after the exchange of SYNs.  i.e., the true ISN + 1.
284 */
285void
286make_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn,
287    uint16_t opt)
288{
289	struct inpcb *inp = toep->inp;
290	struct socket *so = inp->inp_socket;
291	struct tcpcb *tp = intotcpcb(inp);
292	long bufsize;
293	uint32_t iss = be32toh(snd_isn) - 1;	/* true ISS */
294	uint32_t irs = be32toh(rcv_isn) - 1;	/* true IRS */
295	uint16_t tcpopt = be16toh(opt);
296	struct flowc_tx_params ftxp;
297
298	CURVNET_SET(so->so_vnet);
299	INP_WLOCK_ASSERT(inp);
300	KASSERT(tp->t_state == TCPS_SYN_SENT ||
301	    tp->t_state == TCPS_SYN_RECEIVED,
302	    ("%s: TCP state %s", __func__, tcpstates[tp->t_state]));
303
304	CTR4(KTR_CXGBE, "%s: tid %d, toep %p, inp %p",
305	    __func__, toep->tid, toep, inp);
306
307	tp->t_state = TCPS_ESTABLISHED;
308	tp->t_starttime = ticks;
309	TCPSTAT_INC(tcps_connects);
310
311	tp->irs = irs;
312	tcp_rcvseqinit(tp);
313	tp->rcv_wnd = toep->rx_credits << 10;
314	tp->rcv_adv += tp->rcv_wnd;
315	tp->last_ack_sent = tp->rcv_nxt;
316
317	/*
318	 * If we were unable to send all rx credits via opt0, save the remainder
319	 * in rx_credits so that they can be handed over with the next credit
320	 * update.
321	 */
322	SOCKBUF_LOCK(&so->so_rcv);
323	bufsize = select_rcv_wnd(so);
324	SOCKBUF_UNLOCK(&so->so_rcv);
325	toep->rx_credits = bufsize - tp->rcv_wnd;
326
327	tp->iss = iss;
328	tcp_sendseqinit(tp);
329	tp->snd_una = iss + 1;
330	tp->snd_nxt = iss + 1;
331	tp->snd_max = iss + 1;
332
333	assign_rxopt(tp, tcpopt);
334
335	SOCKBUF_LOCK(&so->so_snd);
336	if (so->so_snd.sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf)
337		bufsize = V_tcp_autosndbuf_max;
338	else
339		bufsize = sbspace(&so->so_snd);
340	SOCKBUF_UNLOCK(&so->so_snd);
341
342	ftxp.snd_nxt = tp->snd_nxt;
343	ftxp.rcv_nxt = tp->rcv_nxt;
344	ftxp.snd_space = bufsize;
345	ftxp.mss = tp->t_maxseg;
346	send_flowc_wr(toep, &ftxp);
347
348	soisconnected(so);
349	CURVNET_RESTORE();
350}
351
352static int
353send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits)
354{
355	struct wrqe *wr;
356	struct cpl_rx_data_ack *req;
357	uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
358
359	KASSERT(credits >= 0, ("%s: %d credits", __func__, credits));
360
361	wr = alloc_wrqe(sizeof(*req), toep->ctrlq);
362	if (wr == NULL)
363		return (0);
364	req = wrtod(wr);
365
366	INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid);
367	req->credit_dack = htobe32(dack | V_RX_CREDITS(credits));
368
369	t4_wrq_tx(sc, wr);
370	return (credits);
371}
372
373void
374t4_rcvd(struct toedev *tod, struct tcpcb *tp)
375{
376	struct adapter *sc = tod->tod_softc;
377	struct inpcb *inp = tp->t_inpcb;
378	struct socket *so = inp->inp_socket;
379	struct sockbuf *sb = &so->so_rcv;
380	struct toepcb *toep = tp->t_toe;
381	int credits;
382
383	INP_WLOCK_ASSERT(inp);
384
385	SOCKBUF_LOCK(sb);
386	KASSERT(toep->sb_cc >= sb->sb_cc,
387	    ("%s: sb %p has more data (%d) than last time (%d).",
388	    __func__, sb, sb->sb_cc, toep->sb_cc));
389	if (toep->ulp_mode == ULP_MODE_ISCSI) {
390		toep->rx_credits += toep->sb_cc;
391		toep->sb_cc = 0;
392	} else {
393		toep->rx_credits += toep->sb_cc - sb->sb_cc;
394		toep->sb_cc = sb->sb_cc;
395	}
396	if (toep->rx_credits > 0 &&
397	    (tp->rcv_wnd <= 32 * 1024 || toep->rx_credits >= 64 * 1024 ||
398	    (toep->rx_credits >= 16 * 1024 && tp->rcv_wnd <= 128 * 1024) ||
399	    toep->sb_cc + tp->rcv_wnd < sb->sb_lowat)) {
400
401		credits = send_rx_credits(sc, toep, toep->rx_credits);
402		toep->rx_credits -= credits;
403		tp->rcv_wnd += credits;
404		tp->rcv_adv += credits;
405	}
406	SOCKBUF_UNLOCK(sb);
407}
408
409/*
410 * Close a connection by sending a CPL_CLOSE_CON_REQ message.
411 */
412static int
413close_conn(struct adapter *sc, struct toepcb *toep)
414{
415	struct wrqe *wr;
416	struct cpl_close_con_req *req;
417	unsigned int tid = toep->tid;
418
419	CTR3(KTR_CXGBE, "%s: tid %u%s", __func__, toep->tid,
420	    toep->flags & TPF_FIN_SENT ? ", IGNORED" : "");
421
422	if (toep->flags & TPF_FIN_SENT)
423		return (0);
424
425	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
426	    ("%s: flowc_wr not sent for tid %u.", __func__, tid));
427
428	wr = alloc_wrqe(sizeof(*req), toep->ofld_txq);
429	if (wr == NULL) {
430		/* XXX */
431		panic("%s: allocation failure.", __func__);
432	}
433	req = wrtod(wr);
434
435        req->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) |
436	    V_FW_WR_IMMDLEN(sizeof(*req) - sizeof(req->wr)));
437	req->wr.wr_mid = htonl(V_FW_WR_LEN16(howmany(sizeof(*req), 16)) |
438	    V_FW_WR_FLOWID(tid));
439        req->wr.wr_lo = cpu_to_be64(0);
440        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
441	req->rsvd = 0;
442
443	toep->flags |= TPF_FIN_SENT;
444	toep->flags &= ~TPF_SEND_FIN;
445	t4_l2t_send(sc, wr, toep->l2te);
446
447	return (0);
448}
449
450#define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16)
451#define MIN_OFLD_TX_CREDITS (howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16))
452
453/* Maximum amount of immediate data we could stuff in a WR */
454static inline int
455max_imm_payload(int tx_credits)
456{
457	const int n = 2;	/* Use only up to 2 desc for imm. data WR */
458
459	KASSERT(tx_credits >= 0 &&
460		tx_credits <= MAX_OFLD_TX_CREDITS,
461		("%s: %d credits", __func__, tx_credits));
462
463	if (tx_credits < MIN_OFLD_TX_CREDITS)
464		return (0);
465
466	if (tx_credits >= (n * EQ_ESIZE) / 16)
467		return ((n * EQ_ESIZE) - sizeof(struct fw_ofld_tx_data_wr));
468	else
469		return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_wr));
470}
471
472/* Maximum number of SGL entries we could stuff in a WR */
473static inline int
474max_dsgl_nsegs(int tx_credits)
475{
476	int nseg = 1;	/* ulptx_sgl has room for 1, rest ulp_tx_sge_pair */
477	int sge_pair_credits = tx_credits - MIN_OFLD_TX_CREDITS;
478
479	KASSERT(tx_credits >= 0 &&
480		tx_credits <= MAX_OFLD_TX_CREDITS,
481		("%s: %d credits", __func__, tx_credits));
482
483	if (tx_credits < MIN_OFLD_TX_CREDITS)
484		return (0);
485
486	nseg += 2 * (sge_pair_credits * 16 / 24);
487	if ((sge_pair_credits * 16) % 24 == 16)
488		nseg++;
489
490	return (nseg);
491}
492
493static inline void
494write_tx_wr(void *dst, struct toepcb *toep, unsigned int immdlen,
495    unsigned int plen, uint8_t credits, int shove, int ulp_mode, int txalign)
496{
497	struct fw_ofld_tx_data_wr *txwr = dst;
498	unsigned int wr_ulp_mode;
499
500	txwr->op_to_immdlen = htobe32(V_WR_OP(FW_OFLD_TX_DATA_WR) |
501	    V_FW_WR_IMMDLEN(immdlen));
502	txwr->flowid_len16 = htobe32(V_FW_WR_FLOWID(toep->tid) |
503	    V_FW_WR_LEN16(credits));
504
505	/* for iscsi, the mode & submode setting is per-packet */
506	if (toep->ulp_mode == ULP_MODE_ISCSI)
507		wr_ulp_mode = V_TX_ULP_MODE(ulp_mode >> 4) |
508		    V_TX_ULP_SUBMODE(ulp_mode & 3);
509	else
510		wr_ulp_mode = V_TX_ULP_MODE(toep->ulp_mode);
511
512	txwr->lsodisable_to_flags = htobe32(wr_ulp_mode | V_TX_URG(0) | /*XXX*/
513	    V_TX_SHOVE(shove));
514	txwr->plen = htobe32(plen);
515
516	if (txalign > 0) {
517		struct tcpcb *tp = intotcpcb(toep->inp);
518
519		if (plen < 2 * tp->t_maxseg || is_10G_port(toep->vi->pi))
520			txwr->lsodisable_to_flags |=
521			    htobe32(F_FW_OFLD_TX_DATA_WR_LSODISABLE);
522		else
523			txwr->lsodisable_to_flags |=
524			    htobe32(F_FW_OFLD_TX_DATA_WR_ALIGNPLD |
525				(tp->t_flags & TF_NODELAY ? 0 :
526				F_FW_OFLD_TX_DATA_WR_ALIGNPLDSHOVE));
527	}
528}
529
530/*
531 * Generate a DSGL from a starting mbuf.  The total number of segments and the
532 * maximum segments in any one mbuf are provided.
533 */
534static void
535write_tx_sgl(void *dst, struct mbuf *start, struct mbuf *stop, int nsegs, int n)
536{
537	struct mbuf *m;
538	struct ulptx_sgl *usgl = dst;
539	int i, j, rc;
540	struct sglist sg;
541	struct sglist_seg segs[n];
542
543	KASSERT(nsegs > 0, ("%s: nsegs 0", __func__));
544
545	sglist_init(&sg, n, segs);
546	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
547	    V_ULPTX_NSGE(nsegs));
548
549	i = -1;
550	for (m = start; m != stop; m = m->m_next) {
551		rc = sglist_append(&sg, mtod(m, void *), m->m_len);
552		if (__predict_false(rc != 0))
553			panic("%s: sglist_append %d", __func__, rc);
554
555		for (j = 0; j < sg.sg_nseg; i++, j++) {
556			if (i < 0) {
557				usgl->len0 = htobe32(segs[j].ss_len);
558				usgl->addr0 = htobe64(segs[j].ss_paddr);
559			} else {
560				usgl->sge[i / 2].len[i & 1] =
561				    htobe32(segs[j].ss_len);
562				usgl->sge[i / 2].addr[i & 1] =
563				    htobe64(segs[j].ss_paddr);
564			}
565#ifdef INVARIANTS
566			nsegs--;
567#endif
568		}
569		sglist_reset(&sg);
570	}
571	if (i & 1)
572		usgl->sge[i / 2].len[1] = htobe32(0);
573	KASSERT(nsegs == 0, ("%s: nsegs %d, start %p, stop %p",
574	    __func__, nsegs, start, stop));
575}
576
577/*
578 * Max number of SGL entries an offload tx work request can have.  This is 41
579 * (1 + 40) for a full 512B work request.
580 * fw_ofld_tx_data_wr(16B) + ulptx_sgl(16B, 1) + ulptx_sge_pair(480B, 40)
581 */
582#define OFLD_SGL_LEN (41)
583
584/*
585 * Send data and/or a FIN to the peer.
586 *
587 * The socket's so_snd buffer consists of a stream of data starting with sb_mb
588 * and linked together with m_next.  sb_sndptr, if set, is the last mbuf that
589 * was transmitted.
590 *
591 * drop indicates the number of bytes that should be dropped from the head of
592 * the send buffer.  It is an optimization that lets do_fw4_ack avoid creating
593 * contention on the send buffer lock (before this change it used to do
594 * sowwakeup and then t4_push_frames right after that when recovering from tx
595 * stalls).  When drop is set this function MUST drop the bytes and wake up any
596 * writers.
597 */
598void
599t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
600{
601	struct mbuf *sndptr, *m, *sb_sndptr;
602	struct fw_ofld_tx_data_wr *txwr;
603	struct wrqe *wr;
604	u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf;
605	struct inpcb *inp = toep->inp;
606	struct tcpcb *tp = intotcpcb(inp);
607	struct socket *so = inp->inp_socket;
608	struct sockbuf *sb = &so->so_snd;
609	int tx_credits, shove, compl, sowwakeup;
610	struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
611
612	INP_WLOCK_ASSERT(inp);
613	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
614	    ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid));
615
616	KASSERT(toep->ulp_mode == ULP_MODE_NONE ||
617	    toep->ulp_mode == ULP_MODE_TCPDDP ||
618	    toep->ulp_mode == ULP_MODE_RDMA,
619	    ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep));
620
621	/*
622	 * This function doesn't resume by itself.  Someone else must clear the
623	 * flag and call this function.
624	 */
625	if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) {
626		KASSERT(drop == 0,
627		    ("%s: drop (%d) != 0 but tx is suspended", __func__, drop));
628		return;
629	}
630
631	do {
632		tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS);
633		max_imm = max_imm_payload(tx_credits);
634		max_nsegs = max_dsgl_nsegs(tx_credits);
635
636		SOCKBUF_LOCK(sb);
637		sowwakeup = drop;
638		if (drop) {
639			sbdrop_locked(sb, drop);
640			drop = 0;
641		}
642		sb_sndptr = sb->sb_sndptr;
643		sndptr = sb_sndptr ? sb_sndptr->m_next : sb->sb_mb;
644		plen = 0;
645		nsegs = 0;
646		max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */
647		for (m = sndptr; m != NULL; m = m->m_next) {
648			int n = sglist_count(mtod(m, void *), m->m_len);
649
650			nsegs += n;
651			plen += m->m_len;
652
653			/* This mbuf sent us _over_ the nsegs limit, back out */
654			if (plen > max_imm && nsegs > max_nsegs) {
655				nsegs -= n;
656				plen -= m->m_len;
657				if (plen == 0) {
658					/* Too few credits */
659					toep->flags |= TPF_TX_SUSPENDED;
660					if (sowwakeup)
661						sowwakeup_locked(so);
662					else
663						SOCKBUF_UNLOCK(sb);
664					SOCKBUF_UNLOCK_ASSERT(sb);
665					return;
666				}
667				break;
668			}
669
670			if (max_nsegs_1mbuf < n)
671				max_nsegs_1mbuf = n;
672			sb_sndptr = m;	/* new sb->sb_sndptr if all goes well */
673
674			/* This mbuf put us right at the max_nsegs limit */
675			if (plen > max_imm && nsegs == max_nsegs) {
676				m = m->m_next;
677				break;
678			}
679		}
680
681		if (sb->sb_cc > sb->sb_hiwat * 5 / 8 &&
682		    toep->plen_nocompl + plen >= sb->sb_hiwat / 4)
683			compl = 1;
684		else
685			compl = 0;
686
687		if (sb->sb_flags & SB_AUTOSIZE &&
688		    V_tcp_do_autosndbuf &&
689		    sb->sb_hiwat < V_tcp_autosndbuf_max &&
690		    sb->sb_cc >= sb->sb_hiwat * 7 / 8) {
691			int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc,
692			    V_tcp_autosndbuf_max);
693
694			if (!sbreserve_locked(sb, newsize, so, NULL))
695				sb->sb_flags &= ~SB_AUTOSIZE;
696			else
697				sowwakeup = 1;	/* room available */
698		}
699		if (sowwakeup)
700			sowwakeup_locked(so);
701		else
702			SOCKBUF_UNLOCK(sb);
703		SOCKBUF_UNLOCK_ASSERT(sb);
704
705		/* nothing to send */
706		if (plen == 0) {
707			KASSERT(m == NULL,
708			    ("%s: nothing to send, but m != NULL", __func__));
709			break;
710		}
711
712		if (__predict_false(toep->flags & TPF_FIN_SENT))
713			panic("%s: excess tx.", __func__);
714
715		shove = m == NULL && !(tp->t_flags & TF_MORETOCOME);
716		if (plen <= max_imm) {
717
718			/* Immediate data tx */
719
720			wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16),
721					toep->ofld_txq);
722			if (wr == NULL) {
723				/* XXX: how will we recover from this? */
724				toep->flags |= TPF_TX_SUSPENDED;
725				return;
726			}
727			txwr = wrtod(wr);
728			credits = howmany(wr->wr_len, 16);
729			write_tx_wr(txwr, toep, plen, plen, credits, shove, 0,
730			    sc->tt.tx_align);
731			m_copydata(sndptr, 0, plen, (void *)(txwr + 1));
732			nsegs = 0;
733		} else {
734			int wr_len;
735
736			/* DSGL tx */
737
738			wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) +
739			    ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
740			wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq);
741			if (wr == NULL) {
742				/* XXX: how will we recover from this? */
743				toep->flags |= TPF_TX_SUSPENDED;
744				return;
745			}
746			txwr = wrtod(wr);
747			credits = howmany(wr_len, 16);
748			write_tx_wr(txwr, toep, 0, plen, credits, shove, 0,
749			    sc->tt.tx_align);
750			write_tx_sgl(txwr + 1, sndptr, m, nsegs,
751			    max_nsegs_1mbuf);
752			if (wr_len & 0xf) {
753				uint64_t *pad = (uint64_t *)
754				    ((uintptr_t)txwr + wr_len);
755				*pad = 0;
756			}
757		}
758
759		KASSERT(toep->tx_credits >= credits,
760			("%s: not enough credits", __func__));
761
762		toep->tx_credits -= credits;
763		toep->tx_nocompl += credits;
764		toep->plen_nocompl += plen;
765		if (toep->tx_credits <= toep->tx_total * 3 / 8 &&
766		    toep->tx_nocompl >= toep->tx_total / 4)
767			compl = 1;
768
769		if (compl || toep->ulp_mode == ULP_MODE_RDMA) {
770			txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL);
771			toep->tx_nocompl = 0;
772			toep->plen_nocompl = 0;
773		}
774
775		tp->snd_nxt += plen;
776		tp->snd_max += plen;
777
778		SOCKBUF_LOCK(sb);
779		KASSERT(sb_sndptr, ("%s: sb_sndptr is NULL", __func__));
780		sb->sb_sndptr = sb_sndptr;
781		SOCKBUF_UNLOCK(sb);
782
783		toep->flags |= TPF_TX_DATA_SENT;
784		if (toep->tx_credits < MIN_OFLD_TX_CREDITS)
785			toep->flags |= TPF_TX_SUSPENDED;
786
787		KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
788		txsd->plen = plen;
789		txsd->tx_credits = credits;
790		txsd++;
791		if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) {
792			toep->txsd_pidx = 0;
793			txsd = &toep->txsd[0];
794		}
795		toep->txsd_avail--;
796
797		t4_l2t_send(sc, wr, toep->l2te);
798	} while (m != NULL);
799
800	/* Send a FIN if requested, but only if there's no more data to send */
801	if (m == NULL && toep->flags & TPF_SEND_FIN)
802		close_conn(sc, toep);
803}
804
805/* Send ULP data over TOE using TX_DATA_WR. We send whole mbuf at once */
806void
807t4_ulp_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
808{
809	struct mbuf *sndptr, *m = NULL;
810	struct fw_ofld_tx_data_wr *txwr;
811	struct wrqe *wr;
812	unsigned int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf;
813	struct inpcb *inp = toep->inp;
814	struct tcpcb *tp;
815	struct socket *so;
816	struct sockbuf *sb;
817	int tx_credits, ulp_len = 0, ulp_mode = 0, qlen = 0;
818	int shove, compl;
819	struct ofld_tx_sdesc *txsd;
820
821	INP_WLOCK_ASSERT(inp);
822	if (toep->flags & TPF_ABORT_SHUTDOWN)
823		return;
824
825	tp = intotcpcb(inp);
826	so = inp->inp_socket;
827	sb = &so->so_snd;
828	txsd = &toep->txsd[toep->txsd_pidx];
829
830	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
831	    ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid));
832
833	/*
834	 * This function doesn't resume by itself.  Someone else must clear the
835	 * flag and call this function.
836	 */
837	if (__predict_false(toep->flags & TPF_TX_SUSPENDED))
838		return;
839
840	sndptr = t4_queue_iscsi_callback(so, toep, 1, &qlen);
841	if (!qlen)
842		return;
843
844	do {
845		tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS);
846		max_imm = max_imm_payload(tx_credits);
847		max_nsegs = max_dsgl_nsegs(tx_credits);
848
849		if (drop) {
850			t4_cpl_iscsi_callback(toep->td, toep, &drop,
851			    CPL_FW4_ACK);
852			drop = 0;
853		}
854
855		plen = 0;
856		nsegs = 0;
857		max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */
858		for (m = sndptr; m != NULL; m = m->m_next) {
859			int n = sglist_count(mtod(m, void *), m->m_len);
860
861			nsegs += n;
862			plen += m->m_len;
863
864			/* This mbuf sent us _over_ the nsegs limit, return */
865			if (plen > max_imm && nsegs > max_nsegs) {
866				toep->flags |= TPF_TX_SUSPENDED;
867				return;
868			}
869
870			if (max_nsegs_1mbuf < n)
871				max_nsegs_1mbuf = n;
872
873			/* This mbuf put us right at the max_nsegs limit */
874			if (plen > max_imm && nsegs == max_nsegs) {
875				toep->flags |= TPF_TX_SUSPENDED;
876				return;
877			}
878		}
879
880		shove = m == NULL && !(tp->t_flags & TF_MORETOCOME);
881		/* nothing to send */
882		if (plen == 0) {
883			KASSERT(m == NULL,
884			    ("%s: nothing to send, but m != NULL", __func__));
885			break;
886		}
887
888		if (__predict_false(toep->flags & TPF_FIN_SENT))
889			panic("%s: excess tx.", __func__);
890
891		ulp_len = plen + ulp_extra_len(sndptr, &ulp_mode);
892		if (plen <= max_imm) {
893
894			/* Immediate data tx */
895			wr = alloc_wrqe(roundup(sizeof(*txwr) + plen, 16),
896					toep->ofld_txq);
897			if (wr == NULL) {
898				/* XXX: how will we recover from this? */
899				toep->flags |= TPF_TX_SUSPENDED;
900				return;
901			}
902			txwr = wrtod(wr);
903			credits = howmany(wr->wr_len, 16);
904			write_tx_wr(txwr, toep, plen, ulp_len, credits, shove,
905								ulp_mode, 0);
906			m_copydata(sndptr, 0, plen, (void *)(txwr + 1));
907		} else {
908			int wr_len;
909
910			/* DSGL tx */
911			wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) +
912			    ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
913			wr = alloc_wrqe(roundup(wr_len, 16), toep->ofld_txq);
914			if (wr == NULL) {
915				/* XXX: how will we recover from this? */
916				toep->flags |= TPF_TX_SUSPENDED;
917				return;
918			}
919			txwr = wrtod(wr);
920			credits = howmany(wr_len, 16);
921			write_tx_wr(txwr, toep, 0, ulp_len, credits, shove,
922								ulp_mode, 0);
923			write_tx_sgl(txwr + 1, sndptr, m, nsegs,
924			    max_nsegs_1mbuf);
925			if (wr_len & 0xf) {
926				uint64_t *pad = (uint64_t *)
927				    ((uintptr_t)txwr + wr_len);
928				*pad = 0;
929			}
930		}
931
932		KASSERT(toep->tx_credits >= credits,
933			("%s: not enough credits", __func__));
934
935		toep->tx_credits -= credits;
936		toep->tx_nocompl += credits;
937		toep->plen_nocompl += plen;
938		if (toep->tx_credits <= toep->tx_total * 3 / 8 &&
939			toep->tx_nocompl >= toep->tx_total / 4)
940			compl = 1;
941
942		if (compl) {
943			txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL);
944			toep->tx_nocompl = 0;
945			toep->plen_nocompl = 0;
946		}
947		tp->snd_nxt += ulp_len;
948		tp->snd_max += ulp_len;
949
950                /* goto next mbuf */
951		sndptr = m = t4_queue_iscsi_callback(so, toep, 2, &qlen);
952
953		toep->flags |= TPF_TX_DATA_SENT;
954		if (toep->tx_credits < MIN_OFLD_TX_CREDITS) {
955			toep->flags |= TPF_TX_SUSPENDED;
956		}
957
958		KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
959		txsd->plen = plen;
960		txsd->tx_credits = credits;
961		txsd++;
962		if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) {
963			toep->txsd_pidx = 0;
964			txsd = &toep->txsd[0];
965		}
966		toep->txsd_avail--;
967
968		t4_l2t_send(sc, wr, toep->l2te);
969	} while (m != NULL);
970
971	/* Send a FIN if requested, but only if there's no more data to send */
972	if (m == NULL && toep->flags & TPF_SEND_FIN)
973		close_conn(sc, toep);
974}
975
976int
977t4_tod_output(struct toedev *tod, struct tcpcb *tp)
978{
979	struct adapter *sc = tod->tod_softc;
980#ifdef INVARIANTS
981	struct inpcb *inp = tp->t_inpcb;
982#endif
983	struct toepcb *toep = tp->t_toe;
984
985	INP_WLOCK_ASSERT(inp);
986	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
987	    ("%s: inp %p dropped.", __func__, inp));
988	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
989
990	t4_push_frames(sc, toep, 0);
991
992	return (0);
993}
994
995int
996t4_send_fin(struct toedev *tod, struct tcpcb *tp)
997{
998	struct adapter *sc = tod->tod_softc;
999#ifdef INVARIANTS
1000	struct inpcb *inp = tp->t_inpcb;
1001#endif
1002	struct toepcb *toep = tp->t_toe;
1003
1004	INP_WLOCK_ASSERT(inp);
1005	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
1006	    ("%s: inp %p dropped.", __func__, inp));
1007	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
1008
1009	toep->flags |= TPF_SEND_FIN;
1010	if (tp->t_state >= TCPS_ESTABLISHED) {
1011		if (toep->ulp_mode == ULP_MODE_ISCSI)
1012			t4_ulp_push_frames(sc, toep, 0);
1013		else
1014			t4_push_frames(sc, toep, 0);
1015	}
1016
1017	return (0);
1018}
1019
1020int
1021t4_send_rst(struct toedev *tod, struct tcpcb *tp)
1022{
1023	struct adapter *sc = tod->tod_softc;
1024#if defined(INVARIANTS)
1025	struct inpcb *inp = tp->t_inpcb;
1026#endif
1027	struct toepcb *toep = tp->t_toe;
1028
1029	INP_WLOCK_ASSERT(inp);
1030	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
1031	    ("%s: inp %p dropped.", __func__, inp));
1032	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
1033
1034	/* hmmmm */
1035	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
1036	    ("%s: flowc for tid %u [%s] not sent already",
1037	    __func__, toep->tid, tcpstates[tp->t_state]));
1038
1039	send_reset(sc, toep, 0);
1040	return (0);
1041}
1042
1043/*
1044 * Peer has sent us a FIN.
1045 */
1046static int
1047do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
1048{
1049	struct adapter *sc = iq->adapter;
1050	const struct cpl_peer_close *cpl = (const void *)(rss + 1);
1051	unsigned int tid = GET_TID(cpl);
1052	struct toepcb *toep = lookup_tid(sc, tid);
1053	struct inpcb *inp = toep->inp;
1054	struct tcpcb *tp = NULL;
1055	struct socket *so;
1056	struct sockbuf *sb;
1057#ifdef INVARIANTS
1058	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1059#endif
1060
1061	KASSERT(opcode == CPL_PEER_CLOSE,
1062	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1063	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1064
1065	if (__predict_false(toep->flags & TPF_SYNQE)) {
1066#ifdef INVARIANTS
1067		struct synq_entry *synqe = (void *)toep;
1068
1069		INP_WLOCK(synqe->lctx->inp);
1070		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
1071			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
1072			    ("%s: listen socket closed but tid %u not aborted.",
1073			    __func__, tid));
1074		} else {
1075			/*
1076			 * do_pass_accept_req is still running and will
1077			 * eventually take care of this tid.
1078			 */
1079		}
1080		INP_WUNLOCK(synqe->lctx->inp);
1081#endif
1082		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
1083		    toep, toep->flags);
1084		return (0);
1085	}
1086
1087	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
1088
1089	INP_INFO_RLOCK(&V_tcbinfo);
1090	INP_WLOCK(inp);
1091	tp = intotcpcb(inp);
1092
1093	CTR5(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__,
1094	    tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp);
1095
1096	if (toep->flags & TPF_ABORT_SHUTDOWN)
1097		goto done;
1098
1099	tp->rcv_nxt++;	/* FIN */
1100
1101	so = inp->inp_socket;
1102	sb = &so->so_rcv;
1103	SOCKBUF_LOCK(sb);
1104	if (__predict_false(toep->ddp_flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE))) {
1105		handle_ddp_close(toep, tp, sb, cpl->rcv_nxt);
1106	}
1107	socantrcvmore_locked(so);	/* unlocks the sockbuf */
1108
1109	if (toep->ulp_mode != ULP_MODE_RDMA) {
1110		KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt),
1111	    		("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt,
1112	    		be32toh(cpl->rcv_nxt)));
1113	}
1114
1115	switch (tp->t_state) {
1116	case TCPS_SYN_RECEIVED:
1117		tp->t_starttime = ticks;
1118		/* FALLTHROUGH */
1119
1120	case TCPS_ESTABLISHED:
1121		tp->t_state = TCPS_CLOSE_WAIT;
1122		break;
1123
1124	case TCPS_FIN_WAIT_1:
1125		tp->t_state = TCPS_CLOSING;
1126		break;
1127
1128	case TCPS_FIN_WAIT_2:
1129		tcp_twstart(tp);
1130		INP_UNLOCK_ASSERT(inp);	 /* safe, we have a ref on the inp */
1131		INP_INFO_RUNLOCK(&V_tcbinfo);
1132
1133		INP_WLOCK(inp);
1134		final_cpl_received(toep);
1135		return (0);
1136
1137	default:
1138		log(LOG_ERR, "%s: TID %u received CPL_PEER_CLOSE in state %d\n",
1139		    __func__, tid, tp->t_state);
1140	}
1141done:
1142	INP_WUNLOCK(inp);
1143	INP_INFO_RUNLOCK(&V_tcbinfo);
1144	return (0);
1145}
1146
1147/*
1148 * Peer has ACK'd our FIN.
1149 */
1150static int
1151do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
1152    struct mbuf *m)
1153{
1154	struct adapter *sc = iq->adapter;
1155	const struct cpl_close_con_rpl *cpl = (const void *)(rss + 1);
1156	unsigned int tid = GET_TID(cpl);
1157	struct toepcb *toep = lookup_tid(sc, tid);
1158	struct inpcb *inp = toep->inp;
1159	struct tcpcb *tp = NULL;
1160	struct socket *so = NULL;
1161#ifdef INVARIANTS
1162	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1163#endif
1164
1165	KASSERT(opcode == CPL_CLOSE_CON_RPL,
1166	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1167	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1168	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
1169
1170	INP_INFO_RLOCK(&V_tcbinfo);
1171	INP_WLOCK(inp);
1172	tp = intotcpcb(inp);
1173
1174	CTR4(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x",
1175	    __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags);
1176
1177	if (toep->flags & TPF_ABORT_SHUTDOWN)
1178		goto done;
1179
1180	so = inp->inp_socket;
1181	tp->snd_una = be32toh(cpl->snd_nxt) - 1;	/* exclude FIN */
1182
1183	switch (tp->t_state) {
1184	case TCPS_CLOSING:	/* see TCPS_FIN_WAIT_2 in do_peer_close too */
1185		tcp_twstart(tp);
1186release:
1187		INP_UNLOCK_ASSERT(inp);	/* safe, we have a ref on the  inp */
1188		INP_INFO_RUNLOCK(&V_tcbinfo);
1189
1190		INP_WLOCK(inp);
1191		final_cpl_received(toep);	/* no more CPLs expected */
1192
1193		return (0);
1194	case TCPS_LAST_ACK:
1195		if (tcp_close(tp))
1196			INP_WUNLOCK(inp);
1197		goto release;
1198
1199	case TCPS_FIN_WAIT_1:
1200		if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
1201			soisdisconnected(so);
1202		tp->t_state = TCPS_FIN_WAIT_2;
1203		break;
1204
1205	default:
1206		log(LOG_ERR,
1207		    "%s: TID %u received CPL_CLOSE_CON_RPL in state %s\n",
1208		    __func__, tid, tcpstates[tp->t_state]);
1209	}
1210done:
1211	INP_WUNLOCK(inp);
1212	INP_INFO_RUNLOCK(&V_tcbinfo);
1213	return (0);
1214}
1215
1216void
1217send_abort_rpl(struct adapter *sc, struct sge_wrq *ofld_txq, int tid,
1218    int rst_status)
1219{
1220	struct wrqe *wr;
1221	struct cpl_abort_rpl *cpl;
1222
1223	wr = alloc_wrqe(sizeof(*cpl), ofld_txq);
1224	if (wr == NULL) {
1225		/* XXX */
1226		panic("%s: allocation failure.", __func__);
1227	}
1228	cpl = wrtod(wr);
1229
1230	INIT_TP_WR_MIT_CPL(cpl, CPL_ABORT_RPL, tid);
1231	cpl->cmd = rst_status;
1232
1233	t4_wrq_tx(sc, wr);
1234}
1235
1236static int
1237abort_status_to_errno(struct tcpcb *tp, unsigned int abort_reason)
1238{
1239	switch (abort_reason) {
1240	case CPL_ERR_BAD_SYN:
1241	case CPL_ERR_CONN_RESET:
1242		return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET);
1243	case CPL_ERR_XMIT_TIMEDOUT:
1244	case CPL_ERR_PERSIST_TIMEDOUT:
1245	case CPL_ERR_FINWAIT2_TIMEDOUT:
1246	case CPL_ERR_KEEPALIVE_TIMEDOUT:
1247		return (ETIMEDOUT);
1248	default:
1249		return (EIO);
1250	}
1251}
1252
1253int
1254cpl_not_handled(struct sge_iq *, const struct rss_header *, struct mbuf *);
1255/*
1256 * tom_cpl_iscsi_callback -
1257 * iscsi and tom would share the following cpl messages, so when any of these
1258 * message is received, after tom is done with processing it, the messages
1259 * needs to be forwarded to iscsi for further processing:
1260 * - CPL_SET_TCB_RPL
1261 * - CPL_RX_DATA_DDP
1262 */
1263void (*tom_cpl_iscsi_callback)(struct tom_data *, struct socket *, void *,
1264    unsigned int);
1265
1266struct mbuf *(*tom_queue_iscsi_callback)(struct socket *, unsigned int, int *);
1267/*
1268 * Check if the handler function is set for a given CPL
1269 * return 0 if the function is NULL or cpl_not_handled, 1 otherwise.
1270 */
1271int
1272t4tom_cpl_handler_registered(struct adapter *sc, unsigned int opcode)
1273{
1274
1275	MPASS(opcode < nitems(sc->cpl_handler));
1276
1277	return (sc->cpl_handler[opcode] &&
1278	    sc->cpl_handler[opcode] != cpl_not_handled);
1279}
1280
1281/*
1282 * set the tom_cpl_iscsi_callback function, this function should be used
1283 * whenever both toe and iscsi need to process the same cpl msg.
1284 */
1285void
1286t4tom_register_cpl_iscsi_callback(void (*fp)(struct tom_data *, struct socket *,
1287    void *, unsigned int))
1288{
1289
1290	tom_cpl_iscsi_callback = fp;
1291}
1292
1293void
1294t4tom_register_queue_iscsi_callback(struct mbuf *(*fp)(struct socket *,
1295    unsigned int, int *qlen))
1296{
1297
1298	tom_queue_iscsi_callback = fp;
1299}
1300
1301int
1302t4_cpl_iscsi_callback(struct tom_data *td, struct toepcb *toep, void *m,
1303    unsigned int opcode)
1304{
1305	struct socket *so;
1306
1307	if (opcode == CPL_FW4_ACK)
1308		so = toep->inp->inp_socket;
1309	else {
1310		INP_WLOCK(toep->inp);
1311		so = toep->inp->inp_socket;
1312		INP_WUNLOCK(toep->inp);
1313	}
1314
1315	if (tom_cpl_iscsi_callback && so) {
1316		if (toep->ulp_mode == ULP_MODE_ISCSI) {
1317			tom_cpl_iscsi_callback(td, so, m, opcode);
1318			return (0);
1319		}
1320	}
1321
1322	return (1);
1323}
1324
1325struct mbuf *
1326t4_queue_iscsi_callback(struct socket *so, struct toepcb *toep,
1327    unsigned int cmd, int *qlen)
1328{
1329
1330	if (tom_queue_iscsi_callback && so) {
1331		if (toep->ulp_mode == ULP_MODE_ISCSI)
1332			return (tom_queue_iscsi_callback(so, cmd, qlen));
1333	}
1334
1335	return (NULL);
1336}
1337
1338/*
1339 * TCP RST from the peer, timeout, or some other such critical error.
1340 */
1341static int
1342do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
1343{
1344	struct adapter *sc = iq->adapter;
1345	const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1);
1346	unsigned int tid = GET_TID(cpl);
1347	struct toepcb *toep = lookup_tid(sc, tid);
1348	struct sge_wrq *ofld_txq = toep->ofld_txq;
1349	struct inpcb *inp;
1350	struct tcpcb *tp;
1351#ifdef INVARIANTS
1352	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1353#endif
1354
1355	KASSERT(opcode == CPL_ABORT_REQ_RSS,
1356	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1357	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1358
1359	if (toep->flags & TPF_SYNQE)
1360		return (do_abort_req_synqe(iq, rss, m));
1361
1362	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
1363
1364	if (negative_advice(cpl->status)) {
1365		CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)",
1366		    __func__, cpl->status, tid, toep->flags);
1367		return (0);	/* Ignore negative advice */
1368	}
1369
1370	inp = toep->inp;
1371	INP_INFO_RLOCK(&V_tcbinfo);	/* for tcp_close */
1372	INP_WLOCK(inp);
1373
1374	tp = intotcpcb(inp);
1375
1376	CTR6(KTR_CXGBE,
1377	    "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d",
1378	    __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags,
1379	    inp->inp_flags, cpl->status);
1380
1381	/*
1382	 * If we'd initiated an abort earlier the reply to it is responsible for
1383	 * cleaning up resources.  Otherwise we tear everything down right here
1384	 * right now.  We owe the T4 a CPL_ABORT_RPL no matter what.
1385	 */
1386	if (toep->flags & TPF_ABORT_SHUTDOWN) {
1387		INP_WUNLOCK(inp);
1388		goto done;
1389	}
1390	toep->flags |= TPF_ABORT_SHUTDOWN;
1391
1392	if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) {
1393		struct socket *so = inp->inp_socket;
1394
1395		if (so != NULL)
1396			so_error_set(so, abort_status_to_errno(tp,
1397			    cpl->status));
1398		tp = tcp_close(tp);
1399		if (tp == NULL)
1400			INP_WLOCK(inp);	/* re-acquire */
1401	}
1402
1403	final_cpl_received(toep);
1404done:
1405	INP_INFO_RUNLOCK(&V_tcbinfo);
1406	send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
1407	return (0);
1408}
1409
1410/*
1411 * Reply to the CPL_ABORT_REQ (send_reset)
1412 */
1413static int
1414do_abort_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
1415{
1416	struct adapter *sc = iq->adapter;
1417	const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1);
1418	unsigned int tid = GET_TID(cpl);
1419	struct toepcb *toep = lookup_tid(sc, tid);
1420	struct inpcb *inp = toep->inp;
1421#ifdef INVARIANTS
1422	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1423#endif
1424
1425	KASSERT(opcode == CPL_ABORT_RPL_RSS,
1426	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1427	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1428
1429	if (toep->flags & TPF_SYNQE)
1430		return (do_abort_rpl_synqe(iq, rss, m));
1431
1432	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
1433
1434	CTR5(KTR_CXGBE, "%s: tid %u, toep %p, inp %p, status %d",
1435	    __func__, tid, toep, inp, cpl->status);
1436
1437	KASSERT(toep->flags & TPF_ABORT_SHUTDOWN,
1438	    ("%s: wasn't expecting abort reply", __func__));
1439
1440	INP_WLOCK(inp);
1441	final_cpl_received(toep);
1442
1443	return (0);
1444}
1445
1446static int
1447do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
1448{
1449	struct adapter *sc = iq->adapter;
1450	const struct cpl_rx_data *cpl = mtod(m, const void *);
1451	unsigned int tid = GET_TID(cpl);
1452	struct toepcb *toep = lookup_tid(sc, tid);
1453	struct inpcb *inp = toep->inp;
1454	struct tcpcb *tp;
1455	struct socket *so;
1456	struct sockbuf *sb;
1457	int len;
1458	uint32_t ddp_placed = 0;
1459
1460	if (__predict_false(toep->flags & TPF_SYNQE)) {
1461#ifdef INVARIANTS
1462		struct synq_entry *synqe = (void *)toep;
1463
1464		INP_WLOCK(synqe->lctx->inp);
1465		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
1466			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
1467			    ("%s: listen socket closed but tid %u not aborted.",
1468			    __func__, tid));
1469		} else {
1470			/*
1471			 * do_pass_accept_req is still running and will
1472			 * eventually take care of this tid.
1473			 */
1474		}
1475		INP_WUNLOCK(synqe->lctx->inp);
1476#endif
1477		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
1478		    toep, toep->flags);
1479		m_freem(m);
1480		return (0);
1481	}
1482
1483	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
1484
1485	/* strip off CPL header */
1486	m_adj(m, sizeof(*cpl));
1487	len = m->m_pkthdr.len;
1488
1489	INP_WLOCK(inp);
1490	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
1491		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
1492		    __func__, tid, len, inp->inp_flags);
1493		INP_WUNLOCK(inp);
1494		m_freem(m);
1495		return (0);
1496	}
1497
1498	tp = intotcpcb(inp);
1499
1500	if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq)))
1501		ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt;
1502
1503	tp->rcv_nxt += len;
1504	if (tp->rcv_wnd < len) {
1505		KASSERT(toep->ulp_mode == ULP_MODE_RDMA,
1506				("%s: negative window size", __func__));
1507	}
1508
1509	tp->rcv_wnd -= len;
1510	tp->t_rcvtime = ticks;
1511
1512	so = inp_inpcbtosocket(inp);
1513	sb = &so->so_rcv;
1514	SOCKBUF_LOCK(sb);
1515
1516	if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
1517		CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
1518		    __func__, tid, len);
1519		m_freem(m);
1520		SOCKBUF_UNLOCK(sb);
1521		INP_WUNLOCK(inp);
1522
1523		INP_INFO_RLOCK(&V_tcbinfo);
1524		INP_WLOCK(inp);
1525		tp = tcp_drop(tp, ECONNRESET);
1526		if (tp)
1527			INP_WUNLOCK(inp);
1528		INP_INFO_RUNLOCK(&V_tcbinfo);
1529
1530		return (0);
1531	}
1532
1533	/* receive buffer autosize */
1534	CURVNET_SET(so->so_vnet);
1535	if (sb->sb_flags & SB_AUTOSIZE &&
1536	    V_tcp_do_autorcvbuf &&
1537	    sb->sb_hiwat < V_tcp_autorcvbuf_max &&
1538	    len > (sbspace(sb) / 8 * 7)) {
1539		unsigned int hiwat = sb->sb_hiwat;
1540		unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
1541		    V_tcp_autorcvbuf_max);
1542
1543		if (!sbreserve_locked(sb, newsize, so, NULL))
1544			sb->sb_flags &= ~SB_AUTOSIZE;
1545		else
1546			toep->rx_credits += newsize - hiwat;
1547	}
1548
1549	if (toep->ulp_mode == ULP_MODE_TCPDDP) {
1550		int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off;
1551
1552		if (changed) {
1553			if (toep->ddp_flags & DDP_SC_REQ)
1554				toep->ddp_flags ^= DDP_ON | DDP_SC_REQ;
1555			else {
1556				KASSERT(cpl->ddp_off == 1,
1557				    ("%s: DDP switched on by itself.",
1558				    __func__));
1559
1560				/* Fell out of DDP mode */
1561				toep->ddp_flags &= ~(DDP_ON | DDP_BUF0_ACTIVE |
1562				    DDP_BUF1_ACTIVE);
1563
1564				if (ddp_placed)
1565					insert_ddp_data(toep, ddp_placed);
1566			}
1567		}
1568
1569		if ((toep->ddp_flags & DDP_OK) == 0 &&
1570		    time_uptime >= toep->ddp_disabled + DDP_RETRY_WAIT) {
1571			toep->ddp_score = DDP_LOW_SCORE;
1572			toep->ddp_flags |= DDP_OK;
1573			CTR3(KTR_CXGBE, "%s: tid %u DDP_OK @ %u",
1574			    __func__, tid, time_uptime);
1575		}
1576
1577		if (toep->ddp_flags & DDP_ON) {
1578
1579			/*
1580			 * CPL_RX_DATA with DDP on can only be an indicate.  Ask
1581			 * soreceive to post a buffer or disable DDP.  The
1582			 * payload that arrived in this indicate is appended to
1583			 * the socket buffer as usual.
1584			 */
1585
1586#if 0
1587			CTR5(KTR_CXGBE,
1588			    "%s: tid %u (0x%x) DDP indicate (seq 0x%x, len %d)",
1589			    __func__, tid, toep->flags, be32toh(cpl->seq), len);
1590#endif
1591			sb->sb_flags |= SB_DDP_INDICATE;
1592		} else if ((toep->ddp_flags & (DDP_OK|DDP_SC_REQ)) == DDP_OK &&
1593		    tp->rcv_wnd > DDP_RSVD_WIN && len >= sc->tt.ddp_thres) {
1594
1595			/*
1596			 * DDP allowed but isn't on (and a request to switch it
1597			 * on isn't pending either), and conditions are ripe for
1598			 * it to work.  Switch it on.
1599			 */
1600
1601			enable_ddp(sc, toep);
1602		}
1603	}
1604
1605	KASSERT(toep->sb_cc >= sb->sb_cc,
1606	    ("%s: sb %p has more data (%d) than last time (%d).",
1607	    __func__, sb, sb->sb_cc, toep->sb_cc));
1608	toep->rx_credits += toep->sb_cc - sb->sb_cc;
1609	sbappendstream_locked(sb, m);
1610	toep->sb_cc = sb->sb_cc;
1611	if (toep->rx_credits > 0 && toep->sb_cc + tp->rcv_wnd < sb->sb_lowat) {
1612		int credits;
1613
1614		credits = send_rx_credits(sc, toep, toep->rx_credits);
1615		toep->rx_credits -= credits;
1616		tp->rcv_wnd += credits;
1617		tp->rcv_adv += credits;
1618	}
1619	sorwakeup_locked(so);
1620	SOCKBUF_UNLOCK_ASSERT(sb);
1621
1622	INP_WUNLOCK(inp);
1623	CURVNET_RESTORE();
1624	return (0);
1625}
1626
1627#define S_CPL_FW4_ACK_OPCODE    24
1628#define M_CPL_FW4_ACK_OPCODE    0xff
1629#define V_CPL_FW4_ACK_OPCODE(x) ((x) << S_CPL_FW4_ACK_OPCODE)
1630#define G_CPL_FW4_ACK_OPCODE(x) \
1631    (((x) >> S_CPL_FW4_ACK_OPCODE) & M_CPL_FW4_ACK_OPCODE)
1632
1633#define S_CPL_FW4_ACK_FLOWID    0
1634#define M_CPL_FW4_ACK_FLOWID    0xffffff
1635#define V_CPL_FW4_ACK_FLOWID(x) ((x) << S_CPL_FW4_ACK_FLOWID)
1636#define G_CPL_FW4_ACK_FLOWID(x) \
1637    (((x) >> S_CPL_FW4_ACK_FLOWID) & M_CPL_FW4_ACK_FLOWID)
1638
1639#define S_CPL_FW4_ACK_CR        24
1640#define M_CPL_FW4_ACK_CR        0xff
1641#define V_CPL_FW4_ACK_CR(x)     ((x) << S_CPL_FW4_ACK_CR)
1642#define G_CPL_FW4_ACK_CR(x)     (((x) >> S_CPL_FW4_ACK_CR) & M_CPL_FW4_ACK_CR)
1643
1644#define S_CPL_FW4_ACK_SEQVAL    0
1645#define M_CPL_FW4_ACK_SEQVAL    0x1
1646#define V_CPL_FW4_ACK_SEQVAL(x) ((x) << S_CPL_FW4_ACK_SEQVAL)
1647#define G_CPL_FW4_ACK_SEQVAL(x) \
1648    (((x) >> S_CPL_FW4_ACK_SEQVAL) & M_CPL_FW4_ACK_SEQVAL)
1649#define F_CPL_FW4_ACK_SEQVAL    V_CPL_FW4_ACK_SEQVAL(1U)
1650
1651static int
1652do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
1653{
1654	struct adapter *sc = iq->adapter;
1655	const struct cpl_fw4_ack *cpl = (const void *)(rss + 1);
1656	unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl)));
1657	struct toepcb *toep = lookup_tid(sc, tid);
1658	struct inpcb *inp;
1659	struct tcpcb *tp;
1660	struct socket *so;
1661	uint8_t credits = cpl->credits;
1662	struct ofld_tx_sdesc *txsd;
1663	int plen;
1664#ifdef INVARIANTS
1665	unsigned int opcode = G_CPL_FW4_ACK_OPCODE(be32toh(OPCODE_TID(cpl)));
1666#endif
1667
1668	/*
1669	 * Very unusual case: we'd sent a flowc + abort_req for a synq entry and
1670	 * now this comes back carrying the credits for the flowc.
1671	 */
1672	if (__predict_false(toep->flags & TPF_SYNQE)) {
1673		KASSERT(toep->flags & TPF_ABORT_SHUTDOWN,
1674		    ("%s: credits for a synq entry %p", __func__, toep));
1675		return (0);
1676	}
1677
1678	inp = toep->inp;
1679
1680	KASSERT(opcode == CPL_FW4_ACK,
1681	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1682	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1683	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
1684
1685	INP_WLOCK(inp);
1686
1687	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) {
1688		INP_WUNLOCK(inp);
1689		return (0);
1690	}
1691
1692	KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0,
1693	    ("%s: inp_flags 0x%x", __func__, inp->inp_flags));
1694
1695	tp = intotcpcb(inp);
1696
1697	if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) {
1698		tcp_seq snd_una = be32toh(cpl->snd_una);
1699
1700#ifdef INVARIANTS
1701		if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) {
1702			log(LOG_ERR,
1703			    "%s: unexpected seq# %x for TID %u, snd_una %x\n",
1704			    __func__, snd_una, toep->tid, tp->snd_una);
1705		}
1706#endif
1707
1708		if (tp->snd_una != snd_una) {
1709			tp->snd_una = snd_una;
1710			tp->ts_recent_age = tcp_ts_getticks();
1711		}
1712	}
1713
1714	so = inp->inp_socket;
1715	txsd = &toep->txsd[toep->txsd_cidx];
1716	plen = 0;
1717	while (credits) {
1718		KASSERT(credits >= txsd->tx_credits,
1719		    ("%s: too many (or partial) credits", __func__));
1720		credits -= txsd->tx_credits;
1721		toep->tx_credits += txsd->tx_credits;
1722		plen += txsd->plen;
1723		txsd++;
1724		toep->txsd_avail++;
1725		KASSERT(toep->txsd_avail <= toep->txsd_total,
1726		    ("%s: txsd avail > total", __func__));
1727		if (__predict_false(++toep->txsd_cidx == toep->txsd_total)) {
1728			txsd = &toep->txsd[0];
1729			toep->txsd_cidx = 0;
1730		}
1731	}
1732
1733	if (toep->tx_credits == toep->tx_total) {
1734		toep->tx_nocompl = 0;
1735		toep->plen_nocompl = 0;
1736	}
1737
1738	if (toep->flags & TPF_TX_SUSPENDED &&
1739	    toep->tx_credits >= toep->tx_total / 4) {
1740		toep->flags &= ~TPF_TX_SUSPENDED;
1741		if (toep->ulp_mode == ULP_MODE_ISCSI)
1742			t4_ulp_push_frames(sc, toep, plen);
1743		else
1744			t4_push_frames(sc, toep, plen);
1745	} else if (plen > 0) {
1746		struct sockbuf *sb = &so->so_snd;
1747
1748		if (toep->ulp_mode == ULP_MODE_ISCSI)
1749			t4_cpl_iscsi_callback(toep->td, toep, &plen,
1750			    CPL_FW4_ACK);
1751		else {
1752			SOCKBUF_LOCK(sb);
1753			sbdrop_locked(sb, plen);
1754			sowwakeup_locked(so);
1755			SOCKBUF_UNLOCK_ASSERT(sb);
1756		}
1757	}
1758
1759	INP_WUNLOCK(inp);
1760
1761	return (0);
1762}
1763
1764static int
1765do_set_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
1766{
1767	struct adapter *sc = iq->adapter;
1768	const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1);
1769	unsigned int tid = GET_TID(cpl);
1770#ifdef INVARIANTS
1771	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1772#endif
1773
1774	KASSERT(opcode == CPL_SET_TCB_RPL,
1775	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1776	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1777
1778	if (is_ftid(sc, tid))
1779		return (t4_filter_rpl(iq, rss, m)); /* TCB is a filter */
1780	else {
1781		struct toepcb *toep = lookup_tid(sc, tid);
1782
1783		t4_cpl_iscsi_callback(toep->td, toep, m, CPL_SET_TCB_RPL);
1784		return (0);
1785	}
1786
1787	CXGBE_UNIMPLEMENTED(__func__);
1788}
1789
1790void
1791t4_set_tcb_field(struct adapter *sc, struct toepcb *toep, int ctrl,
1792    uint16_t word, uint64_t mask, uint64_t val)
1793{
1794	struct wrqe *wr;
1795	struct cpl_set_tcb_field *req;
1796
1797	wr = alloc_wrqe(sizeof(*req), ctrl ? toep->ctrlq : toep->ofld_txq);
1798	if (wr == NULL) {
1799		/* XXX */
1800		panic("%s: allocation failure.", __func__);
1801	}
1802	req = wrtod(wr);
1803
1804	INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, toep->tid);
1805	req->reply_ctrl = htobe16(V_NO_REPLY(1) |
1806	    V_QUEUENO(toep->ofld_rxq->iq.abs_id));
1807	req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0));
1808	req->mask = htobe64(mask);
1809	req->val = htobe64(val);
1810
1811	t4_wrq_tx(sc, wr);
1812}
1813
1814void
1815t4_init_cpl_io_handlers(struct adapter *sc)
1816{
1817
1818	t4_register_cpl_handler(sc, CPL_PEER_CLOSE, do_peer_close);
1819	t4_register_cpl_handler(sc, CPL_CLOSE_CON_RPL, do_close_con_rpl);
1820	t4_register_cpl_handler(sc, CPL_ABORT_REQ_RSS, do_abort_req);
1821	t4_register_cpl_handler(sc, CPL_ABORT_RPL_RSS, do_abort_rpl);
1822	t4_register_cpl_handler(sc, CPL_RX_DATA, do_rx_data);
1823	t4_register_cpl_handler(sc, CPL_FW4_ACK, do_fw4_ack);
1824	t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, do_set_tcb_rpl);
1825}
1826
1827void
1828t4_uninit_cpl_io_handlers(struct adapter *sc)
1829{
1830
1831	t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, t4_filter_rpl);
1832}
1833#endif
1834