1237263Snp/*-
2237263Snp * Copyright (c) 2012 Chelsio Communications, Inc.
3237263Snp * All rights reserved.
4237263Snp *
5237263Snp * Redistribution and use in source and binary forms, with or without
6237263Snp * modification, are permitted provided that the following conditions
7237263Snp * are met:
8237263Snp * 1. Redistributions of source code must retain the above copyright
9237263Snp *    notice, this list of conditions and the following disclaimer.
10237263Snp * 2. Redistributions in binary form must reproduce the above copyright
11237263Snp *    notice, this list of conditions and the following disclaimer in the
12237263Snp *    documentation and/or other materials provided with the distribution.
13237263Snp *
14237263Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15237263Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16237263Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17237263Snp * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18237263Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19237263Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20237263Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21237263Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22237263Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23237263Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24237263Snp * SUCH DAMAGE.
25237263Snp */
26174641Skmacy
27174641Skmacy#include <sys/cdefs.h>
28174641Skmacy__FBSDID("$FreeBSD$");
29174641Skmacy
30237263Snp#include "opt_inet.h"
31237263Snp
32237263Snp#ifdef TCP_OFFLOAD
33174641Skmacy#include <sys/param.h>
34174641Skmacy#include <sys/systm.h>
35174641Skmacy#include <sys/fcntl.h>
36174641Skmacy#include <sys/kernel.h>
37174641Skmacy#include <sys/limits.h>
38176472Skmacy#include <sys/ktr.h>
39174641Skmacy#include <sys/lock.h>
40174641Skmacy#include <sys/mbuf.h>
41174641Skmacy#include <sys/mutex.h>
42181011Skmacy#include <sys/sockstate.h>
43181011Skmacy#include <sys/sockopt.h>
44174641Skmacy#include <sys/socket.h>
45237263Snp#include <sys/socketvar.h>
46181039Sps#include <sys/sockbuf.h>
47174641Skmacy#include <sys/sysctl.h>
48174641Skmacy#include <sys/syslog.h>
49174641Skmacy#include <sys/protosw.h>
50174641Skmacy#include <sys/priv.h>
51237263Snp#include <sys/sglist.h>
52237263Snp#include <sys/taskqueue.h>
53183289Skmacy
54174641Skmacy#include <net/if.h>
55237263Snp#include <net/ethernet.h>
56174641Skmacy#include <net/route.h>
57174641Skmacy
58174641Skmacy#include <netinet/in.h>
59174641Skmacy#include <netinet/in_pcb.h>
60174641Skmacy#include <netinet/in_systm.h>
61174641Skmacy#include <netinet/in_var.h>
62174641Skmacy
63174641Skmacy#include <netinet/ip.h>
64174641Skmacy#include <netinet/tcp_var.h>
65237263Snp#define TCPSTATES
66174641Skmacy#include <netinet/tcp_fsm.h>
67237263Snp#include <netinet/toecore.h>
68174641Skmacy#include <netinet/tcp_seq.h>
69176472Skmacy#include <netinet/tcp_timer.h>
70174641Skmacy#include <net/route.h>
71174641Skmacy
72237263Snp#include "cxgb_include.h"
73237263Snp#include "ulp/tom/cxgb_l2t.h"
74237263Snp#include "ulp/tom/cxgb_tom.h"
75237263Snp#include "ulp/tom/cxgb_toepcb.h"
76174641Skmacy
77237263SnpVNET_DECLARE(int, tcp_do_autosndbuf);
78237263Snp#define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf)
79237263SnpVNET_DECLARE(int, tcp_autosndbuf_inc);
80237263Snp#define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc)
81237263SnpVNET_DECLARE(int, tcp_autosndbuf_max);
82237263Snp#define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max)
83237263SnpVNET_DECLARE(int, tcp_do_autorcvbuf);
84237263Snp#define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf)
85237263SnpVNET_DECLARE(int, tcp_autorcvbuf_inc);
86237263Snp#define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc)
87237263SnpVNET_DECLARE(int, tcp_autorcvbuf_max);
88237263Snp#define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max)
89237263Snpextern int always_keepalive;
90237263Snp
91174641Skmacy/*
92174641Skmacy * For ULP connections HW may add headers, e.g., for digests, that aren't part
93174641Skmacy * of the messages sent by the host but that are part of the TCP payload and
94174641Skmacy * therefore consume TCP sequence space.  Tx connection parameters that
95174641Skmacy * operate in TCP sequence space are affected by the HW additions and need to
96174641Skmacy * compensate for them to accurately track TCP sequence numbers. This array
97174641Skmacy * contains the compensating extra lengths for ULP packets.  It is indexed by
98174641Skmacy * a packet's ULP submode.
99174641Skmacy */
100174641Skmacyconst unsigned int t3_ulp_extra_len[] = {0, 4, 4, 8};
101174641Skmacy
102174641Skmacy/*
103174641Skmacy * Max receive window supported by HW in bytes.  Only a small part of it can
104174641Skmacy * be set through option0, the rest needs to be set through RX_DATA_ACK.
105174641Skmacy */
106174641Skmacy#define MAX_RCV_WND ((1U << 27) - 1)
107174641Skmacy
108174641Skmacy/*
109174641Skmacy * Min receive window.  We want it to be large enough to accommodate receive
110174641Skmacy * coalescing, handle jumbo frames, and not trigger sender SWS avoidance.
111174641Skmacy */
112174641Skmacy#define MIN_RCV_WND (24 * 1024U)
113178302Skmacy#define INP_TOS(inp) ((inp_ip_tos_get(inp) >> 2) & M_TOS)
114174641Skmacy
115237263Snpstatic void t3_release_offload_resources(struct toepcb *);
116237263Snpstatic void send_reset(struct toepcb *toep);
117174641Skmacy
118237263Snp/*
119237263Snp * Called after the last CPL for the toepcb has been received.
120237263Snp *
121237263Snp * The inp must be wlocked on entry and is unlocked (or maybe destroyed) by the
122237263Snp * time this function exits.
123237263Snp */
124237263Snpstatic int
125237263Snptoepcb_release(struct toepcb *toep)
126237263Snp{
127237263Snp	struct inpcb *inp = toep->tp_inp;
128237263Snp	struct toedev *tod = toep->tp_tod;
129237263Snp	struct tom_data *td = t3_tomdata(tod);
130237263Snp	int rc;
131178302Skmacy
132237263Snp	INP_WLOCK_ASSERT(inp);
133237263Snp	KASSERT(!(toep->tp_flags & TP_CPL_DONE),
134237263Snp	    ("%s: double release?", __func__));
135174641Skmacy
136237263Snp	CTR2(KTR_CXGB, "%s: tid %d", __func__, toep->tp_tid);
137174641Skmacy
138237263Snp	toep->tp_flags |= TP_CPL_DONE;
139237263Snp	toep->tp_inp = NULL;
140178302Skmacy
141237263Snp	mtx_lock(&td->toep_list_lock);
142237263Snp	TAILQ_REMOVE(&td->toep_list, toep, link);
143237263Snp	mtx_unlock(&td->toep_list_lock);
144176472Skmacy
145237263Snp	if (!(toep->tp_flags & TP_ATTACHED))
146237263Snp		t3_release_offload_resources(toep);
147237263Snp
148237263Snp	rc = in_pcbrele_wlocked(inp);
149237263Snp	if (!rc)
150237263Snp		INP_WUNLOCK(inp);
151237263Snp	return (rc);
152174641Skmacy}
153174641Skmacy
154237263Snp/*
155237263Snp * One sided detach.  The tcpcb is going away and we need to unhook the toepcb
156237263Snp * hanging off it.  If the TOE driver is also done with the toepcb we'll release
157237263Snp * all offload resources.
158237263Snp */
159174641Skmacystatic void
160237263Snptoepcb_detach(struct inpcb *inp)
161174641Skmacy{
162237263Snp	struct toepcb *toep;
163237263Snp	struct tcpcb *tp;
164174641Skmacy
165237263Snp	KASSERT(inp, ("%s: inp is NULL", __func__));
166237263Snp	INP_WLOCK_ASSERT(inp);
167174641Skmacy
168237263Snp	tp = intotcpcb(inp);
169237263Snp	toep = tp->t_toe;
170174641Skmacy
171237263Snp	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
172237263Snp	KASSERT(toep->tp_flags & TP_ATTACHED, ("%s: not attached", __func__));
173178302Skmacy
174237263Snp	CTR6(KTR_CXGB, "%s: %s %u, toep %p, inp %p, tp %p", __func__,
175237263Snp	    tp->t_state == TCPS_SYN_SENT ? "atid" : "tid", toep->tp_tid,
176237263Snp	    toep, inp, tp);
177174641Skmacy
178237263Snp	tp->t_toe = NULL;
179237263Snp	tp->t_flags &= ~TF_TOE;
180237263Snp	toep->tp_flags &= ~TP_ATTACHED;
181174641Skmacy
182237263Snp	if (toep->tp_flags & TP_CPL_DONE)
183237263Snp		t3_release_offload_resources(toep);
184174641Skmacy}
185174641Skmacy
186237263Snpvoid
187237263Snpt3_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp)
188174641Skmacy{
189174641Skmacy
190237263Snp	toepcb_detach(tp->t_inpcb);
191174641Skmacy}
192174641Skmacy
193237263Snpstatic int
194237263Snpalloc_atid(struct tid_info *t, void *ctx)
195174641Skmacy{
196237263Snp	int atid = -1;
197174641Skmacy
198237263Snp	mtx_lock(&t->atid_lock);
199237263Snp	if (t->afree) {
200237263Snp		union active_open_entry *p = t->afree;
201176472Skmacy
202237263Snp		atid = (p - t->atid_tab) + t->atid_base;
203237263Snp		t->afree = p->next;
204237263Snp		p->ctx = ctx;
205237263Snp		t->atids_in_use++;
206174641Skmacy	}
207237263Snp	mtx_unlock(&t->atid_lock);
208174641Skmacy
209237263Snp	return (atid);
210237263Snp}
211178302Skmacy
212237263Snpstatic void
213237263Snpfree_atid(struct tid_info *t, int atid)
214237263Snp{
215237263Snp	union active_open_entry *p = atid2entry(t, atid);
216178302Skmacy
217237263Snp	mtx_lock(&t->atid_lock);
218237263Snp	p->next = t->afree;
219237263Snp	t->afree = p;
220237263Snp	t->atids_in_use--;
221237263Snp	mtx_unlock(&t->atid_lock);
222237263Snp}
223178302Skmacy
224237263Snpvoid
225237263Snpinsert_tid(struct tom_data *td, void *ctx, unsigned int tid)
226237263Snp{
227237263Snp	struct tid_info *t = &td->tid_maps;
228178302Skmacy
229237263Snp	t->tid_tab[tid] = ctx;
230237263Snp	atomic_add_int(&t->tids_in_use, 1);
231237263Snp}
232174641Skmacy
233237263Snpvoid
234237263Snpupdate_tid(struct tom_data *td, void *ctx, unsigned int tid)
235237263Snp{
236237263Snp	struct tid_info *t = &td->tid_maps;
237174641Skmacy
238237263Snp	t->tid_tab[tid] = ctx;
239237263Snp}
240178302Skmacy
241237263Snpvoid
242237263Snpremove_tid(struct tom_data *td, unsigned int tid)
243178302Skmacy{
244237263Snp	struct tid_info *t = &td->tid_maps;
245178302Skmacy
246237263Snp	t->tid_tab[tid] = NULL;
247237263Snp	atomic_add_int(&t->tids_in_use, -1);
248178302Skmacy}
249174641Skmacy
250237263Snp/* use ctx as a next pointer in the tid release list */
251237263Snpvoid
252237263Snpqueue_tid_release(struct toedev *tod, unsigned int tid)
253174641Skmacy{
254237263Snp	struct tom_data *td = t3_tomdata(tod);
255237263Snp	void **p = &td->tid_maps.tid_tab[tid];
256237263Snp	struct adapter *sc = tod->tod_softc;
257174641Skmacy
258237263Snp	mtx_lock(&td->tid_release_lock);
259237263Snp	*p = td->tid_release_list;
260237263Snp	td->tid_release_list = p;
261237263Snp	if (!*p)
262237263Snp		taskqueue_enqueue(sc->tq, &td->tid_release_task);
263237263Snp	mtx_unlock(&td->tid_release_lock);
264174641Skmacy}
265174641Skmacy
266174641Skmacy/*
267237263Snp * Populate a TID_RELEASE WR.
268174641Skmacy */
269237263Snpstatic inline void
270237263Snpmk_tid_release(struct cpl_tid_release *cpl, unsigned int tid)
271174641Skmacy{
272174641Skmacy
273237263Snp	cpl->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
274237263Snp	OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
275174641Skmacy}
276174641Skmacy
277237263Snpvoid
278237263Snprelease_tid(struct toedev *tod, unsigned int tid, int qset)
279174641Skmacy{
280237263Snp	struct tom_data *td = t3_tomdata(tod);
281237263Snp	struct adapter *sc = tod->tod_softc;
282174641Skmacy	struct mbuf *m;
283237263Snp	struct cpl_tid_release *cpl;
284237263Snp#ifdef INVARIANTS
285237263Snp	struct tid_info *t = &td->tid_maps;
286237263Snp#endif
287174641Skmacy
288237263Snp	KASSERT(tid >= 0 && tid < t->ntids,
289237263Snp	    ("%s: tid=%d, ntids=%d", __func__, tid, t->ntids));
290237263Snp
291237263Snp	m = M_GETHDR_OFLD(qset, CPL_PRIORITY_CONTROL, cpl);
292237263Snp	if (m) {
293237263Snp		mk_tid_release(cpl, tid);
294237263Snp		t3_offload_tx(sc, m);
295237263Snp		remove_tid(td, tid);
296237263Snp	} else
297237263Snp		queue_tid_release(tod, tid);
298237263Snp
299174641Skmacy}
300174641Skmacy
301176472Skmacyvoid
302237263Snpt3_process_tid_release_list(void *data, int pending)
303176472Skmacy{
304176472Skmacy	struct mbuf *m;
305237263Snp	struct tom_data *td = data;
306237263Snp	struct adapter *sc = td->tod.tod_softc;
307174641Skmacy
308237263Snp	mtx_lock(&td->tid_release_lock);
309237263Snp	while (td->tid_release_list) {
310237263Snp		void **p = td->tid_release_list;
311237263Snp		unsigned int tid = p - td->tid_maps.tid_tab;
312237263Snp		struct cpl_tid_release *cpl;
313176472Skmacy
314237263Snp		td->tid_release_list = (void **)*p;
315237263Snp		m = M_GETHDR_OFLD(0, CPL_PRIORITY_CONTROL, cpl); /* qs 0 here */
316237263Snp		if (m == NULL)
317237263Snp			break;	/* XXX: who reschedules the release task? */
318237263Snp		mtx_unlock(&td->tid_release_lock);
319237263Snp		mk_tid_release(cpl, tid);
320237263Snp		t3_offload_tx(sc, m);
321237263Snp		remove_tid(td, tid);
322237263Snp		mtx_lock(&td->tid_release_lock);
323237263Snp	}
324237263Snp	mtx_unlock(&td->tid_release_lock);
325176472Skmacy}
326176472Skmacy
327176472Skmacystatic void
328237263Snpclose_conn(struct adapter *sc, struct toepcb *toep)
329176472Skmacy{
330237263Snp	struct mbuf *m;
331237263Snp	struct cpl_close_con_req *req;
332176472Skmacy
333237263Snp	if (toep->tp_flags & TP_FIN_SENT)
334237263Snp		return;
335176472Skmacy
336237263Snp	m = M_GETHDR_OFLD(toep->tp_qset, CPL_PRIORITY_DATA, req);
337237263Snp	if (m == NULL)
338237263Snp		CXGB_UNIMPLEMENTED();
339176472Skmacy
340237263Snp	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
341237263Snp	req->wr.wrh_lo = htonl(V_WR_TID(toep->tp_tid));
342237263Snp	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, toep->tp_tid));
343237263Snp	req->rsvd = 0;
344176472Skmacy
345237263Snp	toep->tp_flags |= TP_FIN_SENT;
346237263Snp	t3_offload_tx(sc, m);
347176472Skmacy}
348176472Skmacy
349237263Snpstatic inline void
350237263Snpmake_tx_data_wr(struct socket *so, struct tx_data_wr *req, int len,
351237263Snp    struct mbuf *tail)
352176472Skmacy{
353237263Snp	struct tcpcb *tp = so_sototcpcb(so);
354237263Snp	struct toepcb *toep = tp->t_toe;
355237263Snp	struct sockbuf *snd;
356176472Skmacy
357237263Snp	inp_lock_assert(tp->t_inpcb);
358237263Snp	snd = so_sockbuf_snd(so);
359176472Skmacy
360237263Snp	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
361237263Snp	req->wr.wrh_lo = htonl(V_WR_TID(toep->tp_tid));
362237263Snp	/* len includes the length of any HW ULP additions */
363237263Snp	req->len = htonl(len);
364237263Snp	req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx));
365237263Snp	/* V_TX_ULP_SUBMODE sets both the mode and submode */
366237263Snp	req->flags = htonl(V_TX_ULP_SUBMODE(toep->tp_ulp_mode) | V_TX_URG(0) |
367237263Snp	    V_TX_SHOVE(!(tp->t_flags & TF_MORETOCOME) && (tail ? 0 : 1)));
368237263Snp	req->sndseq = htonl(tp->snd_nxt);
369237263Snp	if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) {
370237263Snp		struct adapter *sc = toep->tp_tod->tod_softc;
371237263Snp		int cpu_idx = sc->rrss_map[toep->tp_qset];
372176472Skmacy
373237263Snp		req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
374237263Snp		    V_TX_CPU_IDX(cpu_idx));
375176472Skmacy
376237263Snp		/* Sendbuffer is in units of 32KB. */
377237263Snp		if (V_tcp_do_autosndbuf && snd->sb_flags & SB_AUTOSIZE)
378237263Snp			req->param |= htonl(V_TX_SNDBUF(VNET(tcp_autosndbuf_max) >> 15));
379237263Snp		else
380237263Snp			req->param |= htonl(V_TX_SNDBUF(snd->sb_hiwat >> 15));
381176472Skmacy
382237263Snp		toep->tp_flags |= TP_DATASENT;
383237263Snp	}
384176472Skmacy}
385176472Skmacy
386176472Skmacy/*
387237263Snp * TOM_XXX_DUPLICATION sgl_len, calc_tx_descs, calc_tx_descs_ofld, mbuf_wrs, etc.
388237263Snp * TOM_XXX_MOVE to some common header file.
389174641Skmacy */
390174641Skmacy/*
391237263Snp * IMM_LEN: # of bytes that can be tx'd as immediate data.  There are 16 flits
392237263Snp * in a tx desc; subtract 3 for tx_data_wr (including the WR header), and 1 more
393237263Snp * for the second gen bit flit.  This leaves us with 12 flits.
394237263Snp *
395237263Snp * descs_to_sgllen: # of SGL entries that can fit into the given # of tx descs.
396237263Snp * The first desc has a tx_data_wr (which includes the WR header), the rest have
397237263Snp * the WR header only.  All descs have the second gen bit flit.
398237263Snp *
399237263Snp * sgllen_to_descs: # of tx descs used up by an sgl of given length.  The first
400237263Snp * desc has a tx_data_wr (which includes the WR header), the rest have the WR
401237263Snp * header only.  All descs have the second gen bit flit.
402237263Snp *
403237263Snp * flits_to_sgllen: # of SGL entries that can be fit in the given # of flits.
404237263Snp *
405174641Skmacy */
406237263Snp#define IMM_LEN 96
407237263Snpstatic int descs_to_sgllen[TX_MAX_DESC + 1] = {0, 8, 17, 26, 35};
408237263Snpstatic int sgllen_to_descs[TX_MAX_SEGS] = {
409237263Snp	0, 1, 1, 1, 1, 1, 1, 1, 1, 2,	/*  0 -  9 */
410237263Snp	2, 2, 2, 2, 2, 2, 2, 2, 3, 3,	/* 10 - 19 */
411237263Snp	3, 3, 3, 3, 3, 3, 3, 4, 4, 4,	/* 20 - 29 */
412237263Snp	4, 4, 4, 4, 4, 4		/* 30 - 35 */
413237263Snp};
414237263Snp#if 0
415237263Snpstatic int flits_to_sgllen[TX_DESC_FLITS + 1] = {
416237263Snp	0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10
417237263Snp};
418237263Snp#endif
419237263Snp#if SGE_NUM_GENBITS != 2
420237263Snp#error "SGE_NUM_GENBITS really must be 2"
421237263Snp#endif
422237263Snp
423237263Snpint
424237263Snpt3_push_frames(struct socket *so, int req_completion)
425174641Skmacy{
426237263Snp	struct tcpcb *tp = so_sototcpcb(so);
427174641Skmacy	struct toepcb *toep = tp->t_toe;
428237263Snp	struct mbuf *m0, *sndptr, *m;
429237263Snp	struct toedev *tod = toep->tp_tod;
430237263Snp	struct adapter *sc = tod->tod_softc;
431237263Snp	int bytes, ndesc, total_bytes = 0, mlen;
432237263Snp	struct sockbuf *snd;
433237263Snp	struct sglist *sgl;
434237263Snp	struct ofld_hdr *oh;
435237263Snp	caddr_t dst;
436237263Snp	struct tx_data_wr *wr;
437174641Skmacy
438237263Snp	inp_lock_assert(tp->t_inpcb);
439178302Skmacy
440237263Snp	snd = so_sockbuf_snd(so);
441237263Snp	SOCKBUF_LOCK(snd);
442174641Skmacy
443178302Skmacy	/*
444237263Snp	 * Autosize the send buffer.
445174641Skmacy	 */
446237263Snp	if (snd->sb_flags & SB_AUTOSIZE && VNET(tcp_do_autosndbuf)) {
447237263Snp		if (snd->sb_cc >= (snd->sb_hiwat / 8 * 7) &&
448237263Snp		    snd->sb_cc < VNET(tcp_autosndbuf_max)) {
449237263Snp			if (!sbreserve_locked(snd, min(snd->sb_hiwat +
450237263Snp			    VNET(tcp_autosndbuf_inc), VNET(tcp_autosndbuf_max)),
451237263Snp			    so, curthread))
452237263Snp				snd->sb_flags &= ~SB_AUTOSIZE;
453237263Snp		}
454237263Snp	}
455174641Skmacy
456237263Snp	if (toep->tp_m_last && toep->tp_m_last == snd->sb_sndptr)
457237263Snp		sndptr = toep->tp_m_last->m_next;
458237263Snp	else
459237263Snp		sndptr = snd->sb_sndptr ? snd->sb_sndptr : snd->sb_mb;
460174641Skmacy
461237263Snp	/* Nothing to send or no WRs available for sending data */
462237263Snp	if (toep->tp_wr_avail == 0 || sndptr == NULL)
463237263Snp		goto out;
464174641Skmacy
465237263Snp	/* Something to send and at least 1 WR available */
466237263Snp	while (toep->tp_wr_avail && sndptr != NULL) {
467174641Skmacy
468237263Snp		m0 = m_gethdr(M_NOWAIT, MT_DATA);
469237263Snp		if (m0 == NULL)
470237263Snp			break;
471237263Snp		oh = mtod(m0, struct ofld_hdr *);
472237263Snp		wr = (void *)(oh + 1);
473237263Snp		dst = (void *)(wr + 1);
474174641Skmacy
475237263Snp		m0->m_pkthdr.len = m0->m_len = sizeof(*oh) + sizeof(*wr);
476237263Snp		oh->flags = V_HDR_CTRL(CPL_PRIORITY_DATA) | F_HDR_DF |
477237263Snp		    V_HDR_QSET(toep->tp_qset);
478174641Skmacy
479237263Snp		/*
480237263Snp		 * Try to construct an immediate data WR if possible.  Stuff as
481237263Snp		 * much data into it as possible, one whole mbuf at a time.
482237263Snp		 */
483237263Snp		mlen = sndptr->m_len;
484237263Snp		ndesc = bytes = 0;
485237263Snp		while (mlen <= IMM_LEN - bytes) {
486237263Snp			bcopy(sndptr->m_data, dst, mlen);
487237263Snp			bytes += mlen;
488237263Snp			dst += mlen;
489174641Skmacy
490237263Snp			if (!(sndptr = sndptr->m_next))
491237263Snp				break;
492237263Snp			mlen = sndptr->m_len;
493237263Snp		}
494174641Skmacy
495237263Snp		if (bytes) {
496174641Skmacy
497237263Snp			/* Was able to fit 'bytes' bytes in an immediate WR */
498177530Skmacy
499237263Snp			ndesc = 1;
500237263Snp			make_tx_data_wr(so, wr, bytes, sndptr);
501174641Skmacy
502237263Snp			m0->m_len += bytes;
503237263Snp			m0->m_pkthdr.len = m0->m_len;
504174641Skmacy
505237263Snp		} else {
506237263Snp			int wr_avail = min(toep->tp_wr_avail, TX_MAX_DESC);
507174641Skmacy
508237263Snp			/* Need to make an SGL */
509174641Skmacy
510237263Snp			sgl = sglist_alloc(descs_to_sgllen[wr_avail], M_NOWAIT);
511237263Snp			if (sgl == NULL)
512237263Snp				break;
513177530Skmacy
514237263Snp			for (m = sndptr; m != NULL; m = m->m_next) {
515237263Snp				if ((mlen = m->m_len) > 0) {
516237263Snp					if (sglist_append(sgl, m->m_data, mlen))
517237263Snp					    break;
518237263Snp				}
519237263Snp				bytes += mlen;
520237263Snp			}
521237263Snp			sndptr = m;
522237263Snp			if (bytes == 0) {
523237263Snp				sglist_free(sgl);
524237263Snp				break;
525237263Snp			}
526237263Snp			ndesc = sgllen_to_descs[sgl->sg_nseg];
527237263Snp			oh->flags |= F_HDR_SGL;
528237263Snp			oh->sgl = sgl;
529237263Snp			make_tx_data_wr(so, wr, bytes, sndptr);
530237263Snp		}
531178302Skmacy
532237263Snp		oh->flags |= V_HDR_NDESC(ndesc);
533237263Snp		oh->plen = bytes;
534174641Skmacy
535237263Snp		snd->sb_sndptr = sndptr;
536237263Snp		snd->sb_sndptroff += bytes;
537237263Snp		if (sndptr == NULL) {
538237263Snp			snd->sb_sndptr = snd->sb_mbtail;
539237263Snp			snd->sb_sndptroff -= snd->sb_mbtail->m_len;
540237263Snp			toep->tp_m_last = snd->sb_mbtail;
541237263Snp		} else
542237263Snp			toep->tp_m_last = NULL;
543177530Skmacy
544237263Snp		total_bytes += bytes;
545174641Skmacy
546237263Snp		toep->tp_wr_avail -= ndesc;
547237263Snp		toep->tp_wr_unacked += ndesc;
548174641Skmacy
549237263Snp		if ((req_completion && toep->tp_wr_unacked == ndesc) ||
550237263Snp		    toep->tp_wr_unacked >= toep->tp_wr_max / 2) {
551237263Snp			wr->wr.wrh_hi |= htonl(F_WR_COMPL);
552237263Snp			toep->tp_wr_unacked = 0;
553237263Snp		}
554174641Skmacy
555237263Snp		enqueue_wr(toep, m0);
556237263Snp		l2t_send(sc, m0, toep->tp_l2t);
557237263Snp	}
558237263Snpout:
559237263Snp	SOCKBUF_UNLOCK(snd);
560174641Skmacy
561237263Snp	if (sndptr == NULL && (toep->tp_flags & TP_SEND_FIN))
562237263Snp		close_conn(sc, toep);
563174641Skmacy
564237263Snp	return (total_bytes);
565174641Skmacy}
566174641Skmacy
567237263Snpstatic int
568237263Snpsend_rx_credits(struct adapter *sc, struct toepcb *toep, int credits)
569174641Skmacy{
570174641Skmacy	struct mbuf *m;
571237263Snp	struct cpl_rx_data_ack *req;
572237263Snp	uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
573174641Skmacy
574237263Snp	m = M_GETHDR_OFLD(toep->tp_qset, CPL_PRIORITY_CONTROL, req);
575237263Snp	if (m == NULL)
576237263Snp		return (0);
577174641Skmacy
578237263Snp	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
579237263Snp	req->wr.wrh_lo = 0;
580237263Snp	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid));
581237263Snp	req->credit_dack = htonl(dack | V_RX_CREDITS(credits));
582237263Snp	t3_offload_tx(sc, m);
583237263Snp	return (credits);
584174641Skmacy}
585174641Skmacy
586174641Skmacyvoid
587237263Snpt3_rcvd(struct toedev *tod, struct tcpcb *tp)
588174641Skmacy{
589237263Snp	struct adapter *sc = tod->tod_softc;
590237263Snp	struct inpcb *inp = tp->t_inpcb;
591237263Snp	struct socket *so = inp->inp_socket;
592237263Snp	struct sockbuf *so_rcv = &so->so_rcv;
593237263Snp	struct toepcb *toep = tp->t_toe;
594237263Snp	int must_send;
595178302Skmacy
596237263Snp	INP_WLOCK_ASSERT(inp);
597174641Skmacy
598237263Snp	SOCKBUF_LOCK(so_rcv);
599237263Snp	KASSERT(toep->tp_enqueued >= so_rcv->sb_cc,
600237263Snp	    ("%s: so_rcv->sb_cc > enqueued", __func__));
601237263Snp	toep->tp_rx_credits += toep->tp_enqueued - so_rcv->sb_cc;
602237263Snp	toep->tp_enqueued = so_rcv->sb_cc;
603237263Snp	SOCKBUF_UNLOCK(so_rcv);
604174641Skmacy
605237263Snp	must_send = toep->tp_rx_credits + 16384 >= tp->rcv_wnd;
606237263Snp	if (must_send || toep->tp_rx_credits >= 15 * 1024) {
607237263Snp		int credits;
608178302Skmacy
609237263Snp		credits = send_rx_credits(sc, toep, toep->tp_rx_credits);
610237263Snp		toep->tp_rx_credits -= credits;
611237263Snp		tp->rcv_wnd += credits;
612237263Snp		tp->rcv_adv += credits;
613237263Snp	}
614177340Skmacy}
615177340Skmacy
616237263Snpstatic int
617237263Snpdo_rx_urg_notify(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
618174641Skmacy{
619237263Snp	struct adapter *sc = qs->adap;
620237263Snp	struct tom_data *td = sc->tom_softc;
621237263Snp	struct cpl_rx_urg_notify *hdr = mtod(m, void *);
622237263Snp	unsigned int tid = GET_TID(hdr);
623237263Snp	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
624174641Skmacy
625237263Snp	log(LOG_ERR, "%s: tid %u inp %p", __func__, tid, toep->tp_inp);
626174641Skmacy
627237263Snp	m_freem(m);
628237263Snp	return (0);
629174641Skmacy}
630174641Skmacy
631237263Snpint
632237263Snpt3_send_fin(struct toedev *tod, struct tcpcb *tp)
633174641Skmacy{
634237263Snp	struct toepcb *toep = tp->t_toe;
635237263Snp	struct inpcb *inp = tp->t_inpcb;
636237263Snp	struct socket *so = inp_inpcbtosocket(inp);
637237263Snp#if defined(KTR)
638237263Snp	unsigned int tid = toep->tp_tid;
639237263Snp#endif
640174641Skmacy
641237263Snp	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
642237263Snp	INP_WLOCK_ASSERT(inp);
643174641Skmacy
644237263Snp	CTR4(KTR_CXGB, "%s: tid %d, toep %p, flags %x", __func__, tid, toep,
645237263Snp	    toep->tp_flags);
646174641Skmacy
647237263Snp	toep->tp_flags |= TP_SEND_FIN;
648237263Snp	t3_push_frames(so, 1);
649174641Skmacy
650237263Snp	return (0);
651174641Skmacy}
652174641Skmacy
653174641Skmacyint
654237263Snpt3_tod_output(struct toedev *tod, struct tcpcb *tp)
655174641Skmacy{
656237263Snp	struct inpcb *inp = tp->t_inpcb;
657237263Snp	struct socket *so = inp->inp_socket;
658174641Skmacy
659237263Snp	t3_push_frames(so, 1);
660237263Snp	return (0);
661174641Skmacy}
662174641Skmacy
663237263Snp/* What mtu_idx to use, given a 4-tuple and/or an MSS cap */
664237263Snpint
665237263Snpfind_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss)
666174641Skmacy{
667237263Snp	unsigned short *mtus = &sc->params.mtus[0];
668237263Snp	int i = 0, mss;
669178302Skmacy
670237263Snp	KASSERT(inc != NULL || pmss > 0,
671237263Snp	    ("%s: at least one of inc/pmss must be specified", __func__));
672174641Skmacy
673237263Snp	mss = inc ? tcp_mssopt(inc) : pmss;
674237263Snp	if (pmss > 0 && mss > pmss)
675237263Snp		mss = pmss;
676174641Skmacy
677237263Snp	while (i < NMTUS - 1 && mtus[i + 1] <= mss + 40)
678237263Snp		++i;
679174641Skmacy
680174641Skmacy	return (i);
681174641Skmacy}
682174641Skmacy
683174641Skmacystatic inline void
684237263Snppurge_wr_queue(struct toepcb *toep)
685174641Skmacy{
686237263Snp	struct mbuf *m;
687237263Snp	struct ofld_hdr *oh;
688174641Skmacy
689237263Snp	while ((m = mbufq_dequeue(&toep->wr_list)) != NULL) {
690237263Snp		oh = mtod(m, struct ofld_hdr *);
691237263Snp		if (oh->flags & F_HDR_SGL)
692237263Snp			sglist_free(oh->sgl);
693237263Snp		m_freem(m);
694237263Snp	}
695174641Skmacy}
696174641Skmacy
697174641Skmacy/*
698237263Snp * Release cxgb(4) and T3 resources held by an offload connection (TID, L2T
699237263Snp * entry, etc.)
700174641Skmacy */
701174641Skmacystatic void
702174641Skmacyt3_release_offload_resources(struct toepcb *toep)
703174641Skmacy{
704237263Snp	struct toedev *tod = toep->tp_tod;
705237263Snp	struct tom_data *td = t3_tomdata(tod);
706174641Skmacy
707237263Snp	/*
708237263Snp	 * The TOM explicitly detaches its toepcb from the system's inp before
709237263Snp	 * it releases the offload resources.
710237263Snp	 */
711237263Snp	if (toep->tp_inp) {
712237263Snp		panic("%s: inp %p still attached to toepcb %p",
713237263Snp		    __func__, toep->tp_inp, toep);
714237263Snp	}
715174641Skmacy
716237263Snp	if (toep->tp_wr_avail != toep->tp_wr_max)
717174641Skmacy		purge_wr_queue(toep);
718174641Skmacy
719174641Skmacy	if (toep->tp_l2t) {
720237263Snp		l2t_release(td->l2t, toep->tp_l2t);
721174641Skmacy		toep->tp_l2t = NULL;
722174641Skmacy	}
723174641Skmacy
724237263Snp	if (toep->tp_tid >= 0)
725237263Snp		release_tid(tod, toep->tp_tid, toep->tp_qset);
726174641Skmacy
727237263Snp	toepcb_free(toep);
728174641Skmacy}
729174641Skmacy
730174641Skmacy/*
731237263Snp * Determine the receive window size for a socket.
732174641Skmacy */
733237263Snpunsigned long
734237263Snpselect_rcv_wnd(struct socket *so)
735174641Skmacy{
736237263Snp	unsigned long wnd;
737237263Snp
738237263Snp	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
739237263Snp
740237263Snp	wnd = sbspace(&so->so_rcv);
741237263Snp	if (wnd < MIN_RCV_WND)
742237263Snp		wnd = MIN_RCV_WND;
743237263Snp
744237263Snp	return min(wnd, MAX_RCV_WND);
745237263Snp}
746237263Snp
747237263Snpint
748237263Snpselect_rcv_wscale(void)
749237263Snp{
750174641Skmacy	int wscale = 0;
751237263Snp	unsigned long space = sb_max;
752174641Skmacy
753174641Skmacy	if (space > MAX_RCV_WND)
754174641Skmacy		space = MAX_RCV_WND;
755174641Skmacy
756237263Snp	while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space)
757237263Snp		wscale++;
758176472Skmacy
759176472Skmacy	return (wscale);
760174641Skmacy}
761174641Skmacy
762237263Snp
763174641Skmacy/*
764237263Snp * Set up the socket for TCP offload.
765174641Skmacy */
766237263Snpvoid
767237263Snpoffload_socket(struct socket *so, struct toepcb *toep)
768174641Skmacy{
769237263Snp	struct toedev *tod = toep->tp_tod;
770237263Snp	struct tom_data *td = t3_tomdata(tod);
771237263Snp	struct inpcb *inp = sotoinpcb(so);
772237263Snp	struct tcpcb *tp = intotcpcb(inp);
773174641Skmacy
774237263Snp	INP_WLOCK_ASSERT(inp);
775176472Skmacy
776237263Snp	/* Update socket */
777237263Snp	SOCKBUF_LOCK(&so->so_snd);
778237263Snp	so_sockbuf_snd(so)->sb_flags |= SB_NOCOALESCE;
779237263Snp	SOCKBUF_UNLOCK(&so->so_snd);
780237263Snp	SOCKBUF_LOCK(&so->so_rcv);
781237263Snp	so_sockbuf_rcv(so)->sb_flags |= SB_NOCOALESCE;
782237263Snp	SOCKBUF_UNLOCK(&so->so_rcv);
783174641Skmacy
784237263Snp	/* Update TCP PCB */
785237263Snp	tp->tod = toep->tp_tod;
786174641Skmacy	tp->t_toe = toep;
787237263Snp	tp->t_flags |= TF_TOE;
788176472Skmacy
789237263Snp	/* Install an extra hold on inp */
790237263Snp	toep->tp_inp = inp;
791237263Snp	toep->tp_flags |= TP_ATTACHED;
792237263Snp	in_pcbref(inp);
793174641Skmacy
794237263Snp	/* Add the TOE PCB to the active list */
795237263Snp	mtx_lock(&td->toep_list_lock);
796237263Snp	TAILQ_INSERT_HEAD(&td->toep_list, toep, link);
797237263Snp	mtx_unlock(&td->toep_list_lock);
798174641Skmacy}
799174641Skmacy
800237263Snp/* This is _not_ the normal way to "unoffload" a socket. */
801237263Snpvoid
802237263Snpundo_offload_socket(struct socket *so)
803174641Skmacy{
804237263Snp	struct inpcb *inp = sotoinpcb(so);
805237263Snp	struct tcpcb *tp = intotcpcb(inp);
806237263Snp	struct toepcb *toep = tp->t_toe;
807237263Snp	struct toedev *tod = toep->tp_tod;
808237263Snp	struct tom_data *td = t3_tomdata(tod);
809174641Skmacy
810237263Snp	INP_WLOCK_ASSERT(inp);
811174641Skmacy
812237263Snp	so_sockbuf_snd(so)->sb_flags &= ~SB_NOCOALESCE;
813237263Snp	so_sockbuf_rcv(so)->sb_flags &= ~SB_NOCOALESCE;
814174641Skmacy
815237263Snp	tp->tod = NULL;
816237263Snp	tp->t_toe = NULL;
817237263Snp	tp->t_flags &= ~TF_TOE;
818174641Skmacy
819237263Snp	toep->tp_inp = NULL;
820237263Snp	toep->tp_flags &= ~TP_ATTACHED;
821237263Snp	if (in_pcbrele_wlocked(inp))
822237263Snp		panic("%s: inp freed.", __func__);
823237263Snp
824237263Snp	mtx_lock(&td->toep_list_lock);
825237263Snp	TAILQ_REMOVE(&td->toep_list, toep, link);
826237263Snp	mtx_unlock(&td->toep_list_lock);
827174641Skmacy}
828176472Skmacy
829237263Snp/*
830237263Snp * Socket could be a listening socket, and we may not have a toepcb at all at
831237263Snp * this time.
832237263Snp */
833237263Snpuint32_t
834237263Snpcalc_opt0h(struct socket *so, int mtu_idx, int rscale, struct l2t_entry *e)
835176472Skmacy{
836237263Snp	uint32_t opt0h = F_TCAM_BYPASS | V_WND_SCALE(rscale) |
837237263Snp	    V_MSS_IDX(mtu_idx);
838176472Skmacy
839237263Snp	if (so != NULL) {
840237263Snp		struct inpcb *inp = sotoinpcb(so);
841237263Snp		struct tcpcb *tp = intotcpcb(inp);
842237263Snp		int keepalive = always_keepalive ||
843237263Snp		    so_options_get(so) & SO_KEEPALIVE;
844237263Snp
845237263Snp		opt0h |= V_NAGLE((tp->t_flags & TF_NODELAY) == 0);
846237263Snp		opt0h |= V_KEEP_ALIVE(keepalive != 0);
847237263Snp	}
848237263Snp
849237263Snp	if (e != NULL)
850237263Snp		opt0h |= V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx);
851237263Snp
852237263Snp	return (htobe32(opt0h));
853176472Skmacy}
854176472Skmacy
855237263Snpuint32_t
856237263Snpcalc_opt0l(struct socket *so, int rcv_bufsize)
857174641Skmacy{
858237263Snp	uint32_t opt0l = V_ULP_MODE(ULP_MODE_NONE) | V_RCV_BUFSIZ(rcv_bufsize);
859176472Skmacy
860237263Snp	KASSERT(rcv_bufsize <= M_RCV_BUFSIZ,
861237263Snp	    ("%s: rcv_bufsize (%d) is too high", __func__, rcv_bufsize));
862237263Snp
863237263Snp	if (so != NULL)		/* optional because noone cares about IP TOS */
864237263Snp		opt0l |= V_TOS(INP_TOS(sotoinpcb(so)));
865237263Snp
866237263Snp	return (htobe32(opt0l));
867174641Skmacy}
868174641Skmacy
869174641Skmacy/*
870174641Skmacy * Convert an ACT_OPEN_RPL status to an errno.
871174641Skmacy */
872174641Skmacystatic int
873174641Skmacyact_open_rpl_status_to_errno(int status)
874174641Skmacy{
875174641Skmacy	switch (status) {
876174641Skmacy	case CPL_ERR_CONN_RESET:
877174641Skmacy		return (ECONNREFUSED);
878174641Skmacy	case CPL_ERR_ARP_MISS:
879174641Skmacy		return (EHOSTUNREACH);
880174641Skmacy	case CPL_ERR_CONN_TIMEDOUT:
881174641Skmacy		return (ETIMEDOUT);
882174641Skmacy	case CPL_ERR_TCAM_FULL:
883239511Snp		return (EAGAIN);
884174641Skmacy	case CPL_ERR_CONN_EXIST:
885174641Skmacy		log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n");
886239511Snp		return (EAGAIN);
887174641Skmacy	default:
888174641Skmacy		return (EIO);
889174641Skmacy	}
890174641Skmacy}
891174641Skmacy
892174641Skmacy/*
893174641Skmacy * Return whether a failed active open has allocated a TID
894174641Skmacy */
895174641Skmacystatic inline int
896174641Skmacyact_open_has_tid(int status)
897174641Skmacy{
898174641Skmacy	return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
899174641Skmacy	       status != CPL_ERR_ARP_MISS;
900174641Skmacy}
901174641Skmacy
902174641Skmacy/*
903237263Snp * Active open failed.
904174641Skmacy */
905174641Skmacystatic int
906237263Snpdo_act_open_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
907174641Skmacy{
908237263Snp	struct adapter *sc = qs->adap;
909237263Snp	struct tom_data *td = sc->tom_softc;
910237263Snp	struct toedev *tod = &td->tod;
911237263Snp	struct cpl_act_open_rpl *rpl = mtod(m, void *);
912237263Snp	unsigned int atid = G_TID(ntohl(rpl->atid));
913237263Snp	struct toepcb *toep = lookup_atid(&td->tid_maps, atid);
914237263Snp	struct inpcb *inp = toep->tp_inp;
915239511Snp	int s = rpl->status, rc;
916237263Snp
917237263Snp	CTR3(KTR_CXGB, "%s: atid %u, status %u ", __func__, atid, s);
918237263Snp
919237263Snp	free_atid(&td->tid_maps, atid);
920237263Snp	toep->tp_tid = -1;
921237263Snp
922237263Snp	if (act_open_has_tid(s))
923237263Snp		queue_tid_release(tod, GET_TID(rpl));
924237263Snp
925239511Snp	rc = act_open_rpl_status_to_errno(s);
926239511Snp	if (rc != EAGAIN)
927237263Snp		INP_INFO_WLOCK(&V_tcbinfo);
928239511Snp	INP_WLOCK(inp);
929239511Snp	toe_connect_failed(tod, inp, rc);
930239511Snp	toepcb_release(toep);	/* unlocks inp */
931239511Snp	if (rc != EAGAIN)
932237263Snp		INP_INFO_WUNLOCK(&V_tcbinfo);
933237263Snp
934237263Snp	m_freem(m);
935174641Skmacy	return (0);
936174641Skmacy}
937174641Skmacy
938174641Skmacy/*
939237263Snp * Send an active open request.
940174641Skmacy *
941237263Snp * State of affairs on entry:
942237263Snp * soisconnecting (so_state |= SS_ISCONNECTING)
943237263Snp * tcbinfo not locked (this has changed - used to be WLOCKed)
944237263Snp * inp WLOCKed
945237263Snp * tp->t_state = TCPS_SYN_SENT
946237263Snp * rtalloc1, RT_UNLOCK on rt.
947174641Skmacy */
948174641Skmacyint
949237263Snpt3_connect(struct toedev *tod, struct socket *so,
950174641Skmacy    struct rtentry *rt, struct sockaddr *nam)
951174641Skmacy{
952237263Snp	struct mbuf *m = NULL;
953237263Snp	struct l2t_entry *e = NULL;
954237263Snp	struct tom_data *td = t3_tomdata(tod);
955237263Snp	struct adapter *sc = tod->tod_softc;
956237263Snp	struct cpl_act_open_req *cpl;
957237263Snp	struct inpcb *inp = sotoinpcb(so);
958174641Skmacy	struct tcpcb *tp = intotcpcb(inp);
959237263Snp	struct toepcb *toep;
960237263Snp	int atid = -1, mtu_idx, rscale, cpu_idx, qset;
961237263Snp	struct sockaddr *gw;
962237263Snp	struct ifnet *ifp = rt->rt_ifp;
963237263Snp	struct port_info *pi = ifp->if_softc;	/* XXX wrong for VLAN etc. */
964174641Skmacy
965237263Snp	INP_WLOCK_ASSERT(inp);
966237263Snp
967237263Snp	toep = toepcb_alloc(tod);
968174641Skmacy	if (toep == NULL)
969237263Snp		goto failed;
970174641Skmacy
971237263Snp	atid = alloc_atid(&td->tid_maps, toep);
972237263Snp	if (atid < 0)
973237263Snp		goto failed;
974174641Skmacy
975237263Snp	qset = pi->first_qset + (arc4random() % pi->nqsets);
976174641Skmacy
977237263Snp	m = M_GETHDR_OFLD(qset, CPL_PRIORITY_CONTROL, cpl);
978237263Snp	if (m == NULL)
979237263Snp		goto failed;
980178302Skmacy
981237263Snp	gw = rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : nam;
982237263Snp	e = t3_l2t_get(pi, ifp, gw);
983237263Snp	if (e == NULL)
984237263Snp		goto failed;
985174641Skmacy
986237263Snp	toep->tp_l2t = e;
987237263Snp	toep->tp_tid = atid;	/* used to double check response */
988237263Snp	toep->tp_qset = qset;
989174641Skmacy
990237263Snp	SOCKBUF_LOCK(&so->so_rcv);
991237263Snp	/* opt0 rcv_bufsiz initially, assumes its normal meaning later */
992237263Snp	toep->tp_rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
993237263Snp	SOCKBUF_UNLOCK(&so->so_rcv);
994174641Skmacy
995237263Snp	offload_socket(so, toep);
996174641Skmacy
997237263Snp	/*
998237263Snp	 * The kernel sets request_r_scale based on sb_max whereas we need to
999237263Snp	 * take hardware's MAX_RCV_WND into account too.  This is normally a
1000237263Snp	 * no-op as MAX_RCV_WND is much larger than the default sb_max.
1001237263Snp	 */
1002237263Snp	if (tp->t_flags & TF_REQ_SCALE)
1003237263Snp		rscale = tp->request_r_scale = select_rcv_wscale();
1004237263Snp	else
1005237263Snp		rscale = 0;
1006237263Snp	mtu_idx = find_best_mtu_idx(sc, &inp->inp_inc, 0);
1007237263Snp	cpu_idx = sc->rrss_map[qset];
1008174641Skmacy
1009237263Snp	cpl->wr.wrh_hi = htobe32(V_WR_OP(FW_WROPCODE_FORWARD));
1010237263Snp	cpl->wr.wrh_lo = 0;
1011237263Snp	OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid));
1012237263Snp	inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, &cpl->peer_ip,
1013237263Snp	    &cpl->peer_port);
1014237263Snp	cpl->opt0h = calc_opt0h(so, mtu_idx, rscale, e);
1015237263Snp	cpl->opt0l = calc_opt0l(so, toep->tp_rx_credits);
1016237263Snp	cpl->params = 0;
1017237263Snp	cpl->opt2 = calc_opt2(cpu_idx);
1018174641Skmacy
1019237263Snp	CTR5(KTR_CXGB, "%s: atid %u (%s), toep %p, inp %p", __func__,
1020237263Snp	    toep->tp_tid, tcpstates[tp->t_state], toep, inp);
1021174641Skmacy
1022237263Snp	if (l2t_send(sc, m, e) == 0)
1023237263Snp		return (0);
1024178767Skmacy
1025237263Snp	undo_offload_socket(so);
1026174641Skmacy
1027237263Snpfailed:
1028237263Snp	CTR5(KTR_CXGB, "%s: FAILED, atid %d, toep %p, l2te %p, mbuf %p",
1029237263Snp	    __func__, atid, toep, e, m);
1030174641Skmacy
1031237263Snp	if (atid >= 0)
1032237263Snp		free_atid(&td->tid_maps, atid);
1033180583Skmacy
1034237263Snp	if (e)
1035237263Snp		l2t_release(td->l2t, e);
1036180583Skmacy
1037237263Snp	if (toep)
1038237263Snp		toepcb_free(toep);
1039174641Skmacy
1040237263Snp	m_freem(m);
1041178302Skmacy
1042237263Snp	return (ENOMEM);
1043174641Skmacy}
1044174641Skmacy
1045174641Skmacy/*
1046237263Snp * Send an ABORT_REQ message.  Cannot fail.  This routine makes sure we do not
1047237263Snp * send multiple ABORT_REQs for the same connection and also that we do not try
1048237263Snp * to send a message after the connection has closed.
1049176472Skmacy */
1050176472Skmacystatic void
1051237263Snpsend_reset(struct toepcb *toep)
1052176472Skmacy{
1053176472Skmacy
1054237263Snp	struct cpl_abort_req *req;
1055237263Snp	unsigned int tid = toep->tp_tid;
1056237263Snp	struct inpcb *inp = toep->tp_inp;
1057237263Snp	struct socket *so = inp->inp_socket;
1058237263Snp	struct tcpcb *tp = intotcpcb(inp);
1059237263Snp	struct toedev *tod = toep->tp_tod;
1060237263Snp	struct adapter *sc = tod->tod_softc;
1061237263Snp	struct mbuf *m;
1062176472Skmacy
1063237263Snp	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
1064237263Snp	INP_WLOCK_ASSERT(inp);
1065176472Skmacy
1066237263Snp	CTR4(KTR_CXGB, "%s: tid %d, toep %p (%x)", __func__, tid, toep,
1067237263Snp	    toep->tp_flags);
1068176472Skmacy
1069237263Snp	if (toep->tp_flags & TP_ABORT_SHUTDOWN)
1070176472Skmacy		return;
1071176472Skmacy
1072237263Snp	toep->tp_flags |= (TP_ABORT_RPL_PENDING | TP_ABORT_SHUTDOWN);
1073176472Skmacy
1074237263Snp	/* Purge the send queue */
1075237263Snp	sbflush(so_sockbuf_snd(so));
1076237263Snp	purge_wr_queue(toep);
1077176472Skmacy
1078237263Snp	m = M_GETHDR_OFLD(toep->tp_qset, CPL_PRIORITY_DATA, req);
1079237263Snp	if (m == NULL)
1080237263Snp		CXGB_UNIMPLEMENTED();
1081176472Skmacy
1082237263Snp	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
1083237263Snp	req->wr.wrh_lo = htonl(V_WR_TID(tid));
1084237263Snp	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
1085237263Snp	req->rsvd0 = htonl(tp->snd_nxt);
1086237263Snp	req->rsvd1 = !(toep->tp_flags & TP_DATASENT);
1087237263Snp	req->cmd = CPL_ABORT_SEND_RST;
1088176472Skmacy
1089237263Snp	if (tp->t_state == TCPS_SYN_SENT)
1090237263Snp		mbufq_tail(&toep->out_of_order_queue, m); /* defer */
1091176472Skmacy	else
1092237263Snp		l2t_send(sc, m, toep->tp_l2t);
1093176472Skmacy}
1094176472Skmacy
1095237263Snpint
1096237263Snpt3_send_rst(struct toedev *tod __unused, struct tcpcb *tp)
1097176472Skmacy{
1098176472Skmacy
1099237263Snp	send_reset(tp->t_toe);
1100176472Skmacy	return (0);
1101176472Skmacy}
1102176472Skmacy
1103176472Skmacy/*
1104237263Snp * Handler for RX_DATA CPL messages.
1105174641Skmacy */
1106237263Snpstatic int
1107237263Snpdo_rx_data(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1108174641Skmacy{
1109237263Snp	struct adapter *sc = qs->adap;
1110237263Snp	struct tom_data *td = sc->tom_softc;
1111237263Snp	struct cpl_rx_data *hdr = mtod(m, void *);
1112237263Snp	unsigned int tid = GET_TID(hdr);
1113237263Snp	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
1114237263Snp	struct inpcb *inp = toep->tp_inp;
1115237263Snp	struct tcpcb *tp;
1116178302Skmacy	struct socket *so;
1117237263Snp	struct sockbuf *so_rcv;
1118174641Skmacy
1119237263Snp	/* Advance over CPL */
1120174641Skmacy	m_adj(m, sizeof(*hdr));
1121174641Skmacy
1122237263Snp	/* XXX: revisit.  This comes from the T4 TOM */
1123237263Snp	if (__predict_false(inp == NULL)) {
1124237263Snp		/*
1125237263Snp		 * do_pass_establish failed and must be attempting to abort the
1126237263Snp		 * connection.  Meanwhile, the T4 has sent us data for such a
1127237263Snp		 * connection.
1128237263Snp		 */
1129176472Skmacy#ifdef notyet
1130237263Snp		KASSERT(toepcb_flag(toep, TPF_ABORT_SHUTDOWN),
1131237263Snp		    ("%s: inp NULL and tid isn't being aborted", __func__));
1132176472Skmacy#endif
1133237263Snp		m_freem(m);
1134237263Snp		return (0);
1135174641Skmacy	}
1136174641Skmacy
1137237263Snp	INP_WLOCK(inp);
1138237263Snp	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
1139237263Snp		CTR4(KTR_CXGB, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
1140237263Snp		    __func__, tid, m->m_pkthdr.len, inp->inp_flags);
1141237263Snp		INP_WUNLOCK(inp);
1142237263Snp		m_freem(m);
1143237263Snp		return (0);
1144177340Skmacy	}
1145174641Skmacy
1146237263Snp	if (__predict_false(hdr->dack_mode != toep->tp_delack_mode))
1147237263Snp		toep->tp_delack_mode = hdr->dack_mode;
1148174641Skmacy
1149237263Snp	tp = intotcpcb(inp);
1150178302Skmacy
1151237263Snp#ifdef INVARIANTS
1152237263Snp	if (__predict_false(tp->rcv_nxt != be32toh(hdr->seq))) {
1153237263Snp		log(LOG_ERR,
1154237263Snp		    "%s: unexpected seq# %x for TID %u, rcv_nxt %x\n",
1155237263Snp		    __func__, be32toh(hdr->seq), toep->tp_tid, tp->rcv_nxt);
1156174641Skmacy	}
1157177340Skmacy#endif
1158237263Snp	tp->rcv_nxt += m->m_pkthdr.len;
1159237263Snp	KASSERT(tp->rcv_wnd >= m->m_pkthdr.len,
1160237263Snp	    ("%s: negative window size", __func__));
1161237263Snp	tp->rcv_wnd -= m->m_pkthdr.len;
1162237263Snp	tp->t_rcvtime = ticks;
1163178302Skmacy
1164237263Snp	so  = inp->inp_socket;
1165237263Snp	so_rcv = &so->so_rcv;
1166237263Snp	SOCKBUF_LOCK(so_rcv);
1167174641Skmacy
1168237263Snp	if (__predict_false(so_rcv->sb_state & SBS_CANTRCVMORE)) {
1169237263Snp		CTR3(KTR_CXGB, "%s: tid %u, excess rx (%d bytes)",
1170237263Snp		    __func__, tid, m->m_pkthdr.len);
1171237263Snp		SOCKBUF_UNLOCK(so_rcv);
1172237263Snp		INP_WUNLOCK(inp);
1173174641Skmacy
1174237263Snp		INP_INFO_WLOCK(&V_tcbinfo);
1175237263Snp		INP_WLOCK(inp);
1176237263Snp		tp = tcp_drop(tp, ECONNRESET);
1177237263Snp		if (tp)
1178237263Snp			INP_WUNLOCK(inp);
1179237263Snp		INP_INFO_WUNLOCK(&V_tcbinfo);
1180174641Skmacy
1181237263Snp		m_freem(m);
1182237263Snp		return (0);
1183174641Skmacy	}
1184174641Skmacy
1185237263Snp	/* receive buffer autosize */
1186237263Snp	if (so_rcv->sb_flags & SB_AUTOSIZE &&
1187237263Snp	    V_tcp_do_autorcvbuf &&
1188237263Snp	    so_rcv->sb_hiwat < V_tcp_autorcvbuf_max &&
1189237263Snp	    (m->m_pkthdr.len > (sbspace(so_rcv) / 8 * 7) || tp->rcv_wnd < 32768)) {
1190237263Snp		unsigned int hiwat = so_rcv->sb_hiwat;
1191237263Snp		unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
1192237263Snp		    V_tcp_autorcvbuf_max);
1193174641Skmacy
1194237263Snp		if (!sbreserve_locked(so_rcv, newsize, so, NULL))
1195237263Snp			so_rcv->sb_flags &= ~SB_AUTOSIZE;
1196237263Snp		else
1197237263Snp			toep->tp_rx_credits += newsize - hiwat;
1198174641Skmacy	}
1199176472Skmacy
1200237263Snp	toep->tp_enqueued += m->m_pkthdr.len;
1201237263Snp	sbappendstream_locked(so_rcv, m);
1202237263Snp	sorwakeup_locked(so);
1203237263Snp	SOCKBUF_UNLOCK_ASSERT(so_rcv);
1204178302Skmacy
1205237263Snp	INP_WUNLOCK(inp);
1206174641Skmacy	return (0);
1207174641Skmacy}
1208174641Skmacy
1209174641Skmacy/*
1210237263Snp * Handler for PEER_CLOSE CPL messages.
1211174641Skmacy */
1212176472Skmacystatic int
1213237263Snpdo_peer_close(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1214176472Skmacy{
1215237263Snp	struct adapter *sc = qs->adap;
1216237263Snp	struct tom_data *td = sc->tom_softc;
1217237263Snp	const struct cpl_peer_close *hdr = mtod(m, void *);
1218237263Snp	unsigned int tid = GET_TID(hdr);
1219237263Snp	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
1220237263Snp	struct inpcb *inp = toep->tp_inp;
1221237263Snp	struct tcpcb *tp;
1222237263Snp	struct socket *so;
1223176472Skmacy
1224237263Snp	INP_INFO_WLOCK(&V_tcbinfo);
1225237263Snp	INP_WLOCK(inp);
1226237263Snp	tp = intotcpcb(inp);
1227176472Skmacy
1228237263Snp	CTR5(KTR_CXGB, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__,
1229237263Snp	    tid, tp ? tcpstates[tp->t_state] : "no tp" , toep->tp_flags, inp);
1230176472Skmacy
1231237263Snp	if (toep->tp_flags & TP_ABORT_RPL_PENDING)
1232237263Snp		goto done;
1233178302Skmacy
1234237263Snp	so = inp_inpcbtosocket(inp);
1235178302Skmacy
1236237263Snp	socantrcvmore(so);
1237237263Snp	tp->rcv_nxt++;
1238176472Skmacy
1239174641Skmacy	switch (tp->t_state) {
1240174641Skmacy	case TCPS_SYN_RECEIVED:
1241237263Snp		tp->t_starttime = ticks;
1242237263Snp		/* FALLTHROUGH */
1243174641Skmacy	case TCPS_ESTABLISHED:
1244174641Skmacy		tp->t_state = TCPS_CLOSE_WAIT;
1245174641Skmacy		break;
1246174641Skmacy	case TCPS_FIN_WAIT_1:
1247174641Skmacy		tp->t_state = TCPS_CLOSING;
1248174641Skmacy		break;
1249174641Skmacy	case TCPS_FIN_WAIT_2:
1250237263Snp		tcp_twstart(tp);
1251237263Snp		INP_UNLOCK_ASSERT(inp);	/* safe, we have a ref on the  inp */
1252237263Snp		INP_INFO_WUNLOCK(&V_tcbinfo);
1253237263Snp
1254237263Snp		INP_WLOCK(inp);
1255237263Snp		toepcb_release(toep);	/* no more CPLs expected */
1256237263Snp
1257237263Snp		m_freem(m);
1258237263Snp		return (0);
1259174641Skmacy	default:
1260237263Snp		log(LOG_ERR, "%s: TID %u received PEER_CLOSE in bad state %d\n",
1261237263Snp		    __func__, toep->tp_tid, tp->t_state);
1262174641Skmacy	}
1263174641Skmacy
1264237263Snpdone:
1265237263Snp	INP_WUNLOCK(inp);
1266237263Snp	INP_INFO_WUNLOCK(&V_tcbinfo);
1267178767Skmacy
1268237263Snp	m_freem(m);
1269237263Snp	return (0);
1270174641Skmacy}
1271174641Skmacy
1272174641Skmacy/*
1273237263Snp * Handler for CLOSE_CON_RPL CPL messages.  peer ACK to our FIN received.
1274174641Skmacy */
1275174641Skmacystatic int
1276237263Snpdo_close_con_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1277174641Skmacy{
1278237263Snp	struct adapter *sc = qs->adap;
1279237263Snp	struct tom_data *td = sc->tom_softc;
1280237263Snp	const struct cpl_close_con_rpl *rpl = mtod(m, void *);
1281237263Snp	unsigned int tid = GET_TID(rpl);
1282237263Snp	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
1283237263Snp	struct inpcb *inp = toep->tp_inp;
1284237263Snp	struct tcpcb *tp;
1285237263Snp	struct socket *so;
1286174641Skmacy
1287237263Snp	INP_INFO_WLOCK(&V_tcbinfo);
1288237263Snp	INP_WLOCK(inp);
1289237263Snp	tp = intotcpcb(inp);
1290174641Skmacy
1291237263Snp	CTR4(KTR_CXGB, "%s: tid %u (%s), toep_flags 0x%x", __func__, tid,
1292237263Snp	    tp ? tcpstates[tp->t_state] : "no tp", toep->tp_flags);
1293174641Skmacy
1294237263Snp	if ((toep->tp_flags & TP_ABORT_RPL_PENDING))
1295237263Snp		goto done;
1296237263Snp
1297237263Snp	so = inp_inpcbtosocket(inp);
1298174641Skmacy	tp->snd_una = ntohl(rpl->snd_nxt) - 1;  /* exclude FIN */
1299174641Skmacy
1300174641Skmacy	switch (tp->t_state) {
1301237263Snp	case TCPS_CLOSING:
1302237263Snp		tcp_twstart(tp);
1303237263Snprelease:
1304237263Snp		INP_UNLOCK_ASSERT(inp);	/* safe, we have a ref on the  inp */
1305237263Snp		INP_INFO_WUNLOCK(&V_tcbinfo);
1306174641Skmacy
1307237263Snp		INP_WLOCK(inp);
1308237263Snp		toepcb_release(toep);	/* no more CPLs expected */
1309237263Snp
1310237263Snp		m_freem(m);
1311237263Snp		return (0);
1312174641Skmacy	case TCPS_LAST_ACK:
1313237263Snp		if (tcp_close(tp))
1314237263Snp			INP_WUNLOCK(inp);
1315237263Snp		goto release;
1316237263Snp
1317174641Skmacy	case TCPS_FIN_WAIT_1:
1318237263Snp		if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
1319237263Snp			soisdisconnected(so);
1320176472Skmacy		tp->t_state = TCPS_FIN_WAIT_2;
1321174641Skmacy		break;
1322174641Skmacy	default:
1323174641Skmacy		log(LOG_ERR,
1324237263Snp		    "%s: TID %u received CLOSE_CON_RPL in bad state %d\n",
1325237263Snp		    __func__, toep->tp_tid, tp->t_state);
1326174641Skmacy	}
1327178302Skmacy
1328237263Snpdone:
1329237263Snp	INP_WUNLOCK(inp);
1330237263Snp	INP_INFO_WUNLOCK(&V_tcbinfo);
1331178302Skmacy
1332176472Skmacy	m_freem(m);
1333237263Snp	return (0);
1334174641Skmacy}
1335174641Skmacy
1336174641Skmacystatic int
1337237263Snpdo_smt_write_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1338174641Skmacy{
1339237263Snp	struct cpl_smt_write_rpl *rpl = mtod(m, void *);
1340174641Skmacy
1341237263Snp	if (rpl->status != CPL_ERR_NONE) {
1342237263Snp		log(LOG_ERR,
1343237263Snp		    "Unexpected SMT_WRITE_RPL status %u for entry %u\n",
1344237263Snp		    rpl->status, GET_TID(rpl));
1345237263Snp	}
1346237263Snp
1347237263Snp	m_freem(m);
1348174641Skmacy	return (0);
1349174641Skmacy}
1350174641Skmacy
1351237263Snpstatic int
1352237263Snpdo_set_tcb_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1353174641Skmacy{
1354237263Snp	struct cpl_set_tcb_rpl *rpl = mtod(m, void *);
1355237263Snp
1356237263Snp	if (rpl->status != CPL_ERR_NONE) {
1357237263Snp		log(LOG_ERR, "Unexpected SET_TCB_RPL status %u for tid %u\n",
1358237263Snp		    rpl->status, GET_TID(rpl));
1359174641Skmacy	}
1360174641Skmacy
1361237263Snp	m_freem(m);
1362237263Snp	return (0);
1363174641Skmacy}
1364174641Skmacy
1365174641Skmacy/*
1366174641Skmacy * Handle an ABORT_RPL_RSS CPL message.
1367174641Skmacy */
1368174641Skmacystatic int
1369237263Snpdo_abort_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1370174641Skmacy{
1371237263Snp	struct adapter *sc = qs->adap;
1372237263Snp	struct tom_data *td = sc->tom_softc;
1373237263Snp	const struct cpl_abort_rpl_rss *rpl = mtod(m, void *);
1374237263Snp	unsigned int tid = GET_TID(rpl);
1375237263Snp	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
1376237263Snp	struct inpcb *inp;
1377237263Snp
1378174641Skmacy	/*
1379174641Skmacy	 * Ignore replies to post-close aborts indicating that the abort was
1380174641Skmacy	 * requested too late.  These connections are terminated when we get
1381174641Skmacy	 * PEER_CLOSE or CLOSE_CON_RPL and by the time the abort_rpl_rss
1382174641Skmacy	 * arrives the TID is either no longer used or it has been recycled.
1383174641Skmacy	 */
1384174641Skmacy	if (rpl->status == CPL_ERR_ABORT_FAILED) {
1385237263Snp		m_freem(m);
1386174641Skmacy		return (0);
1387174641Skmacy	}
1388174641Skmacy
1389237263Snp	if (toep->tp_flags & TP_IS_A_SYNQ_ENTRY)
1390237263Snp		return (do_abort_rpl_synqe(qs, r, m));
1391174641Skmacy
1392237263Snp	CTR4(KTR_CXGB, "%s: tid %d, toep %p, status %d", __func__, tid, toep,
1393237263Snp	    rpl->status);
1394174641Skmacy
1395237263Snp	inp = toep->tp_inp;
1396237263Snp	INP_WLOCK(inp);
1397237263Snp
1398237263Snp	if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
1399237263Snp		if (!(toep->tp_flags & TP_ABORT_RPL_RCVD)) {
1400237263Snp			toep->tp_flags |= TP_ABORT_RPL_RCVD;
1401237263Snp			INP_WUNLOCK(inp);
1402237263Snp		} else {
1403237263Snp			toep->tp_flags &= ~TP_ABORT_RPL_RCVD;
1404237263Snp			toep->tp_flags &= TP_ABORT_RPL_PENDING;
1405237263Snp			toepcb_release(toep);	/* no more CPLs expected */
1406237263Snp		}
1407174641Skmacy	}
1408174641Skmacy
1409237263Snp	m_freem(m);
1410174641Skmacy	return (0);
1411174641Skmacy}
1412174641Skmacy
1413174641Skmacy/*
1414237263Snp * Convert the status code of an ABORT_REQ into a FreeBSD error code.
1415174641Skmacy */
1416174641Skmacystatic int
1417237263Snpabort_status_to_errno(struct tcpcb *tp, int abort_reason)
1418174641Skmacy{
1419174641Skmacy	switch (abort_reason) {
1420174641Skmacy	case CPL_ERR_BAD_SYN:
1421174641Skmacy	case CPL_ERR_CONN_RESET:
1422174641Skmacy		return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET);
1423174641Skmacy	case CPL_ERR_XMIT_TIMEDOUT:
1424174641Skmacy	case CPL_ERR_PERSIST_TIMEDOUT:
1425174641Skmacy	case CPL_ERR_FINWAIT2_TIMEDOUT:
1426174641Skmacy	case CPL_ERR_KEEPALIVE_TIMEDOUT:
1427174641Skmacy		return (ETIMEDOUT);
1428174641Skmacy	default:
1429174641Skmacy		return (EIO);
1430174641Skmacy	}
1431174641Skmacy}
1432174641Skmacy
1433174641Skmacy/*
1434174641Skmacy * Returns whether an ABORT_REQ_RSS message is a negative advice.
1435174641Skmacy */
1436174641Skmacystatic inline int
1437174641Skmacyis_neg_adv_abort(unsigned int status)
1438174641Skmacy{
1439174641Skmacy	return status == CPL_ERR_RTX_NEG_ADVICE ||
1440174641Skmacy	    status == CPL_ERR_PERSIST_NEG_ADVICE;
1441174641Skmacy}
1442174641Skmacy
1443237263Snpvoid
1444237263Snpsend_abort_rpl(struct toedev *tod, int tid, int qset)
1445174641Skmacy{
1446237263Snp	struct mbuf *reply;
1447237263Snp	struct cpl_abort_rpl *rpl;
1448237263Snp	struct adapter *sc = tod->tod_softc;
1449174641Skmacy
1450237263Snp	reply = M_GETHDR_OFLD(qset, CPL_PRIORITY_DATA, rpl);
1451237263Snp	if (!reply)
1452237263Snp		CXGB_UNIMPLEMENTED();
1453174641Skmacy
1454237263Snp	rpl->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
1455237263Snp	rpl->wr.wrh_lo = htonl(V_WR_TID(tid));
1456237263Snp	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
1457237263Snp	rpl->cmd = CPL_ABORT_NO_RST;
1458174641Skmacy
1459237263Snp	t3_offload_tx(sc, reply);
1460174641Skmacy}
1461174641Skmacy
1462174641Skmacy/*
1463237263Snp * Handle an ABORT_REQ_RSS CPL message.  If we're waiting for an ABORT_RPL we
1464237263Snp * ignore this request except that we need to reply to it.
1465174641Skmacy */
1466174641Skmacystatic int
1467237263Snpdo_abort_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1468174641Skmacy{
1469237263Snp	struct adapter *sc = qs->adap;
1470237263Snp	struct tom_data *td = sc->tom_softc;
1471237263Snp	struct toedev *tod = &td->tod;
1472237263Snp	const struct cpl_abort_req_rss *req = mtod(m, void *);
1473237263Snp	unsigned int tid = GET_TID(req);
1474237263Snp	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
1475237263Snp	struct inpcb *inp;
1476237263Snp	struct tcpcb *tp;
1477237263Snp	struct socket *so;
1478237263Snp	int qset = toep->tp_qset;
1479174641Skmacy
1480237263Snp	if (is_neg_adv_abort(req->status)) {
1481237263Snp		CTR4(KTR_CXGB, "%s: negative advice %d for tid %u (%x)",
1482237263Snp		    __func__, req->status, tid, toep->tp_flags);
1483237263Snp		m_freem(m);
1484237263Snp		return (0);
1485174641Skmacy	}
1486174641Skmacy
1487237263Snp	if (toep->tp_flags & TP_IS_A_SYNQ_ENTRY)
1488237263Snp		return (do_abort_req_synqe(qs, r, m));
1489178302Skmacy
1490237263Snp	inp = toep->tp_inp;
1491237263Snp	INP_INFO_WLOCK(&V_tcbinfo);	/* for tcp_close */
1492237263Snp	INP_WLOCK(inp);
1493174641Skmacy
1494237263Snp	tp = intotcpcb(inp);
1495237263Snp	so = inp->inp_socket;
1496178302Skmacy
1497237263Snp	CTR6(KTR_CXGB, "%s: tid %u (%s), toep %p (%x), status %d",
1498237263Snp	    __func__, tid, tcpstates[tp->t_state], toep, toep->tp_flags,
1499237263Snp	    req->status);
1500178302Skmacy
1501237263Snp	if (!(toep->tp_flags & TP_ABORT_REQ_RCVD)) {
1502174641Skmacy		toep->tp_flags |= TP_ABORT_REQ_RCVD;
1503237263Snp		toep->tp_flags |= TP_ABORT_SHUTDOWN;
1504237263Snp		INP_WUNLOCK(inp);
1505237263Snp		INP_INFO_WUNLOCK(&V_tcbinfo);
1506237263Snp		m_freem(m);
1507174641Skmacy		return (0);
1508174641Skmacy	}
1509237263Snp	toep->tp_flags &= ~TP_ABORT_REQ_RCVD;
1510174641Skmacy
1511174641Skmacy	/*
1512237263Snp	 * If we'd sent a reset on this toep, we'll ignore this and clean up in
1513237263Snp	 * the T3's reply to our reset instead.
1514174641Skmacy	 */
1515174641Skmacy	if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
1516237263Snp		toep->tp_flags |= TP_ABORT_RPL_SENT;
1517237263Snp		INP_WUNLOCK(inp);
1518237263Snp	} else {
1519237263Snp		so_error_set(so, abort_status_to_errno(tp, req->status));
1520237263Snp		tp = tcp_close(tp);
1521237263Snp		if (tp == NULL)
1522237263Snp			INP_WLOCK(inp);	/* re-acquire */
1523237263Snp		toepcb_release(toep);	/* no more CPLs expected */
1524174641Skmacy	}
1525237263Snp	INP_INFO_WUNLOCK(&V_tcbinfo);
1526174641Skmacy
1527237263Snp	send_abort_rpl(tod, tid, qset);
1528237263Snp	m_freem(m);
1529237263Snp	return (0);
1530174641Skmacy}
1531174641Skmacy
1532174641Skmacystatic void
1533237263Snpassign_rxopt(struct tcpcb *tp, uint16_t tcpopt)
1534174641Skmacy{
1535237263Snp	struct toepcb *toep = tp->t_toe;
1536237263Snp	struct adapter *sc = toep->tp_tod->tod_softc;
1537174641Skmacy
1538237263Snp	tp->t_maxseg = tp->t_maxopd = sc->params.mtus[G_TCPOPT_MSS(tcpopt)] - 40;
1539174641Skmacy
1540237263Snp	if (G_TCPOPT_TSTAMP(tcpopt)) {
1541237263Snp		tp->t_flags |= TF_RCVD_TSTMP;
1542237263Snp		tp->t_flags |= TF_REQ_TSTMP;	/* forcibly set */
1543237263Snp		tp->ts_recent = 0;		/* XXX */
1544237263Snp		tp->ts_recent_age = tcp_ts_getticks();
1545237263Snp		tp->t_maxseg -= TCPOLEN_TSTAMP_APPA;
1546174641Skmacy	}
1547174641Skmacy
1548237263Snp	if (G_TCPOPT_SACK(tcpopt))
1549237263Snp		tp->t_flags |= TF_SACK_PERMIT;
1550237263Snp	else
1551237263Snp		tp->t_flags &= ~TF_SACK_PERMIT;
1552174641Skmacy
1553237263Snp	if (G_TCPOPT_WSCALE_OK(tcpopt))
1554237263Snp		tp->t_flags |= TF_RCVD_SCALE;
1555176472Skmacy
1556237263Snp	if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
1557237263Snp	    (TF_RCVD_SCALE | TF_REQ_SCALE)) {
1558237263Snp		tp->rcv_scale = tp->request_r_scale;
1559237263Snp		tp->snd_scale = G_TCPOPT_SND_WSCALE(tcpopt);
1560174641Skmacy	}
1561174641Skmacy
1562174641Skmacy}
1563174641Skmacy
1564174641Skmacy/*
1565237263Snp * The ISS and IRS are from after the exchange of SYNs and are off by 1.
1566174641Skmacy */
1567237263Snpvoid
1568237263Snpmake_established(struct socket *so, uint32_t cpl_iss, uint32_t cpl_irs,
1569237263Snp    uint16_t cpl_tcpopt)
1570174641Skmacy{
1571237263Snp	struct inpcb *inp = sotoinpcb(so);
1572237263Snp	struct tcpcb *tp = intotcpcb(inp);
1573174641Skmacy	struct toepcb *toep = tp->t_toe;
1574237263Snp	long bufsize;
1575237263Snp	uint32_t iss = be32toh(cpl_iss) - 1;	/* true ISS */
1576237263Snp	uint32_t irs = be32toh(cpl_irs) - 1;	/* true IRS */
1577237263Snp	uint16_t tcpopt = be16toh(cpl_tcpopt);
1578174641Skmacy
1579237263Snp	INP_WLOCK_ASSERT(inp);
1580174641Skmacy
1581237263Snp	tp->t_state = TCPS_ESTABLISHED;
1582176472Skmacy	tp->t_starttime = ticks;
1583237263Snp	TCPSTAT_INC(tcps_connects);
1584174641Skmacy
1585237263Snp	CTR4(KTR_CXGB, "%s tid %u, toep %p, inp %p", tcpstates[tp->t_state],
1586237263Snp	    toep->tp_tid, toep, inp);
1587174641Skmacy
1588237263Snp	tp->irs = irs;
1589237263Snp	tcp_rcvseqinit(tp);
1590237263Snp	tp->rcv_wnd = toep->tp_rx_credits << 10;
1591237263Snp	tp->rcv_adv += tp->rcv_wnd;
1592237263Snp	tp->last_ack_sent = tp->rcv_nxt;
1593174641Skmacy
1594178302Skmacy	/*
1595237263Snp	 * If we were unable to send all rx credits via opt0, save the remainder
1596237263Snp	 * in rx_credits so that they can be handed over with the next credit
1597237263Snp	 * update.
1598178302Skmacy	 */
1599237263Snp	SOCKBUF_LOCK(&so->so_rcv);
1600237263Snp	bufsize = select_rcv_wnd(so);
1601237263Snp	SOCKBUF_UNLOCK(&so->so_rcv);
1602237263Snp	toep->tp_rx_credits = bufsize - tp->rcv_wnd;
1603178302Skmacy
1604237263Snp	tp->iss = iss;
1605237263Snp	tcp_sendseqinit(tp);
1606237263Snp	tp->snd_una = iss + 1;
1607237263Snp	tp->snd_nxt = iss + 1;
1608237263Snp	tp->snd_max = iss + 1;
1609178302Skmacy
1610237263Snp	assign_rxopt(tp, tcpopt);
1611237263Snp	soisconnected(so);
1612174641Skmacy}
1613174641Skmacy
1614174641Skmacy/*
1615174641Skmacy * Fill in the right TID for CPL messages waiting in the out-of-order queue
1616174641Skmacy * and send them to the TOE.
1617174641Skmacy */
1618174641Skmacystatic void
1619178302Skmacyfixup_and_send_ofo(struct toepcb *toep)
1620174641Skmacy{
1621174641Skmacy	struct mbuf *m;
1622237263Snp	struct toedev *tod = toep->tp_tod;
1623237263Snp	struct adapter *sc = tod->tod_softc;
1624237263Snp	struct inpcb *inp = toep->tp_inp;
1625174641Skmacy	unsigned int tid = toep->tp_tid;
1626174641Skmacy
1627237263Snp	inp_lock_assert(inp);
1628237263Snp
1629174641Skmacy	while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) {
1630237263Snp		struct ofld_hdr *oh = mtod(m, void *);
1631174641Skmacy		/*
1632174641Skmacy		 * A variety of messages can be waiting but the fields we'll
1633174641Skmacy		 * be touching are common to all so any message type will do.
1634174641Skmacy		 */
1635237263Snp		struct cpl_close_con_req *p = (void *)(oh + 1);
1636174641Skmacy
1637237263Snp		p->wr.wrh_lo = htonl(V_WR_TID(tid));
1638174641Skmacy		OPCODE_TID(p) = htonl(MK_OPCODE_TID(p->ot.opcode, tid));
1639237263Snp		t3_offload_tx(sc, m);
1640174641Skmacy	}
1641174641Skmacy}
1642174641Skmacy
1643174641Skmacy/*
1644237263Snp * Process a CPL_ACT_ESTABLISH message.
1645174641Skmacy */
1646237263Snpstatic int
1647237263Snpdo_act_establish(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1648174641Skmacy{
1649237263Snp	struct adapter *sc = qs->adap;
1650237263Snp	struct tom_data *td = sc->tom_softc;
1651237263Snp	struct cpl_act_establish *req = mtod(m, void *);
1652237263Snp	unsigned int tid = GET_TID(req);
1653237263Snp	unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
1654237263Snp	struct toepcb *toep = lookup_atid(&td->tid_maps, atid);
1655237263Snp	struct inpcb *inp = toep->tp_inp;
1656237263Snp	struct tcpcb *tp;
1657237263Snp	struct socket *so;
1658174641Skmacy
1659237263Snp	CTR3(KTR_CXGB, "%s: atid %u, tid %u", __func__, atid, tid);
1660174641Skmacy
1661237263Snp	free_atid(&td->tid_maps, atid);
1662174641Skmacy
1663237263Snp	INP_WLOCK(inp);
1664237263Snp	tp = intotcpcb(inp);
1665174641Skmacy
1666237263Snp	KASSERT(toep->tp_qset == qs->idx,
1667237263Snp	    ("%s qset mismatch %d %d", __func__, toep->tp_qset, qs->idx));
1668237263Snp	KASSERT(toep->tp_tid == atid,
1669237263Snp	    ("%s atid mismatch %d %d", __func__, toep->tp_tid, atid));
1670174641Skmacy
1671237263Snp	toep->tp_tid = tid;
1672237263Snp	insert_tid(td, toep, tid);
1673237263Snp
1674237263Snp	if (inp->inp_flags & INP_DROPPED) {
1675237263Snp		/* socket closed by the kernel before hw told us it connected */
1676237263Snp		send_reset(toep);
1677237263Snp		goto done;
1678174641Skmacy	}
1679178302Skmacy
1680237263Snp	KASSERT(tp->t_state == TCPS_SYN_SENT,
1681237263Snp	    ("TID %u expected TCPS_SYN_SENT, found %d.", tid, tp->t_state));
1682178302Skmacy
1683237263Snp	so = inp->inp_socket;
1684237263Snp	make_established(so, req->snd_isn, req->rcv_isn, req->tcp_opt);
1685237263Snp
1686178302Skmacy	/*
1687237263Snp	 * Now that we finally have a TID send any CPL messages that we had to
1688237263Snp	 * defer for lack of a TID.
1689174641Skmacy	 */
1690237263Snp	if (mbufq_len(&toep->out_of_order_queue))
1691237263Snp		fixup_and_send_ofo(toep);
1692174641Skmacy
1693237263Snpdone:
1694237263Snp	INP_WUNLOCK(inp);
1695237263Snp	m_freem(m);
1696174641Skmacy	return (0);
1697174641Skmacy}
1698174641Skmacy
1699174641Skmacy/*
1700174641Skmacy * Process an acknowledgment of WR completion.  Advance snd_una and send the
1701174641Skmacy * next batch of work requests from the write queue.
1702174641Skmacy */
1703174641Skmacystatic void
1704174641Skmacywr_ack(struct toepcb *toep, struct mbuf *m)
1705174641Skmacy{
1706237263Snp	struct inpcb *inp = toep->tp_inp;
1707237263Snp	struct tcpcb *tp;
1708237263Snp	struct cpl_wr_ack *hdr = mtod(m, void *);
1709178302Skmacy	struct socket *so;
1710174641Skmacy	unsigned int credits = ntohs(hdr->credits);
1711174641Skmacy	u32 snd_una = ntohl(hdr->snd_una);
1712174641Skmacy	int bytes = 0;
1713178302Skmacy	struct sockbuf *snd;
1714237263Snp	struct mbuf *p;
1715237263Snp	struct ofld_hdr *oh;
1716174641Skmacy
1717237263Snp	inp_wlock(inp);
1718237263Snp	tp = intotcpcb(inp);
1719237263Snp	so = inp->inp_socket;
1720174641Skmacy	toep->tp_wr_avail += credits;
1721174641Skmacy	if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail)
1722174641Skmacy		toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail;
1723174641Skmacy
1724174641Skmacy	while (credits) {
1725237263Snp		p = peek_wr(toep);
1726237263Snp
1727174641Skmacy		if (__predict_false(!p)) {
1728237263Snp			CTR5(KTR_CXGB, "%s: %u extra WR_ACK credits, "
1729237263Snp			    "tid %u, state %u, wr_avail %u", __func__, credits,
1730237263Snp			    toep->tp_tid, tp->t_state, toep->tp_wr_avail);
1731237263Snp
1732174641Skmacy			log(LOG_ERR, "%u WR_ACK credits for TID %u with "
1733176472Skmacy			    "nothing pending, state %u wr_avail=%u\n",
1734176472Skmacy			    credits, toep->tp_tid, tp->t_state, toep->tp_wr_avail);
1735174641Skmacy			break;
1736174641Skmacy		}
1737176472Skmacy
1738237263Snp		oh = mtod(p, struct ofld_hdr *);
1739176472Skmacy
1740237263Snp		KASSERT(credits >= G_HDR_NDESC(oh->flags),
1741237263Snp		    ("%s: partial credits?  %d %d", __func__, credits,
1742237263Snp		    G_HDR_NDESC(oh->flags)));
1743174641Skmacy
1744237263Snp		dequeue_wr(toep);
1745237263Snp		credits -= G_HDR_NDESC(oh->flags);
1746237263Snp		bytes += oh->plen;
1747174641Skmacy
1748237263Snp		if (oh->flags & F_HDR_SGL)
1749237263Snp			sglist_free(oh->sgl);
1750237263Snp		m_freem(p);
1751237263Snp	}
1752174641Skmacy
1753237263Snp	if (__predict_false(SEQ_LT(snd_una, tp->snd_una)))
1754174641Skmacy		goto out_free;
1755174641Skmacy
1756174641Skmacy	if (tp->snd_una != snd_una) {
1757174641Skmacy		tp->snd_una = snd_una;
1758237263Snp		tp->ts_recent_age = tcp_ts_getticks();
1759174641Skmacy		if (tp->snd_una == tp->snd_nxt)
1760174641Skmacy			toep->tp_flags &= ~TP_TX_WAIT_IDLE;
1761174641Skmacy	}
1762178302Skmacy
1763178302Skmacy	snd = so_sockbuf_snd(so);
1764174641Skmacy	if (bytes) {
1765237263Snp		SOCKBUF_LOCK(snd);
1766178302Skmacy		sbdrop_locked(snd, bytes);
1767178302Skmacy		so_sowwakeup_locked(so);
1768174641Skmacy	}
1769178302Skmacy
1770178302Skmacy	if (snd->sb_sndptroff < snd->sb_cc)
1771174641Skmacy		t3_push_frames(so, 0);
1772174641Skmacy
1773174641Skmacyout_free:
1774177530Skmacy	inp_wunlock(tp->t_inpcb);
1775237263Snp	m_freem(m);
1776174641Skmacy}
1777174641Skmacy
1778174641Skmacy/*
1779174641Skmacy * Handler for TX_DATA_ACK CPL messages.
1780174641Skmacy */
1781174641Skmacystatic int
1782237263Snpdo_wr_ack(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1783174641Skmacy{
1784237263Snp	struct adapter *sc = qs->adap;
1785237263Snp	struct tom_data *td = sc->tom_softc;
1786237263Snp	struct cpl_wr_ack *hdr = mtod(m, void *);
1787237263Snp	unsigned int tid = GET_TID(hdr);
1788237263Snp	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
1789174641Skmacy
1790237263Snp	/* XXX bad race */
1791237263Snp	if (toep)
1792237263Snp		wr_ack(toep, m);
1793174641Skmacy
1794176472Skmacy	return (0);
1795176472Skmacy}
1796176472Skmacy
1797174641Skmacyvoid
1798237263Snpt3_init_cpl_io(struct adapter *sc)
1799176472Skmacy{
1800237263Snp	t3_register_cpl_handler(sc, CPL_ACT_ESTABLISH, do_act_establish);
1801237263Snp	t3_register_cpl_handler(sc, CPL_ACT_OPEN_RPL, do_act_open_rpl);
1802237263Snp	t3_register_cpl_handler(sc, CPL_RX_URG_NOTIFY, do_rx_urg_notify);
1803237263Snp	t3_register_cpl_handler(sc, CPL_RX_DATA, do_rx_data);
1804237263Snp	t3_register_cpl_handler(sc, CPL_TX_DMA_ACK, do_wr_ack);
1805237263Snp	t3_register_cpl_handler(sc, CPL_PEER_CLOSE, do_peer_close);
1806237263Snp	t3_register_cpl_handler(sc, CPL_ABORT_REQ_RSS, do_abort_req);
1807237263Snp	t3_register_cpl_handler(sc, CPL_ABORT_RPL_RSS, do_abort_rpl);
1808237263Snp	t3_register_cpl_handler(sc, CPL_CLOSE_CON_RPL, do_close_con_rpl);
1809237263Snp	t3_register_cpl_handler(sc, CPL_SMT_WRITE_RPL, do_smt_write_rpl);
1810237263Snp	t3_register_cpl_handler(sc, CPL_SET_TCB_RPL, do_set_tcb_rpl);
1811176472Skmacy}
1812176472Skmacy#endif
1813