1237263Snp/*-
2237263Snp * Copyright (c) 2012 Chelsio Communications, Inc.
3237263Snp * All rights reserved.
4237263Snp * Written by: Navdeep Parhar <np@FreeBSD.org>
5237263Snp *
6237263Snp * Redistribution and use in source and binary forms, with or without
7237263Snp * modification, are permitted provided that the following conditions
8237263Snp * are met:
9237263Snp * 1. Redistributions of source code must retain the above copyright
10237263Snp *    notice, this list of conditions and the following disclaimer.
11237263Snp * 2. Redistributions in binary form must reproduce the above copyright
12237263Snp *    notice, this list of conditions and the following disclaimer in the
13237263Snp *    documentation and/or other materials provided with the distribution.
14237263Snp *
15237263Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16237263Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17237263Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18237263Snp * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19237263Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20237263Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21237263Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22237263Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23237263Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24237263Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25237263Snp * SUCH DAMAGE.
26237263Snp */
27237263Snp
28237263Snp#include <sys/cdefs.h>
29237263Snp__FBSDID("$FreeBSD$");
30237263Snp
31237263Snp#include "opt_inet.h"
32245468Snp#include "opt_inet6.h"
33237263Snp
34237263Snp#ifdef TCP_OFFLOAD
35237263Snp#include <sys/param.h>
36237263Snp#include <sys/types.h>
37237263Snp#include <sys/kernel.h>
38237263Snp#include <sys/ktr.h>
39237263Snp#include <sys/module.h>
40237263Snp#include <sys/protosw.h>
41237263Snp#include <sys/refcount.h>
42237263Snp#include <sys/domain.h>
43237263Snp#include <sys/fnv_hash.h>
44237263Snp#include <sys/socket.h>
45237263Snp#include <sys/socketvar.h>
46237263Snp#include <net/ethernet.h>
47237263Snp#include <net/if.h>
48237263Snp#include <net/if_types.h>
49237263Snp#include <net/if_vlan_var.h>
50237263Snp#include <net/route.h>
51237263Snp#include <netinet/in.h>
52237263Snp#include <netinet/in_pcb.h>
53237263Snp#include <netinet/ip.h>
54245468Snp#include <netinet/ip6.h>
55245468Snp#include <netinet6/scope6_var.h>
56239544Snp#include <netinet/tcp_timer.h>
57237263Snp#include <netinet/tcp_var.h>
58237263Snp#define TCPSTATES
59237263Snp#include <netinet/tcp_fsm.h>
60237263Snp#include <netinet/toecore.h>
61237263Snp
62237263Snp#include "common/common.h"
63237263Snp#include "common/t4_msg.h"
64237263Snp#include "common/t4_regs.h"
65237263Snp#include "tom/t4_tom_l2t.h"
66237263Snp#include "tom/t4_tom.h"
67237263Snp
68237263Snp/* stid services */
69245276Snpstatic int alloc_stid(struct adapter *, struct listen_ctx *, int);
70245276Snpstatic struct listen_ctx *lookup_stid(struct adapter *, int);
71245276Snpstatic void free_stid(struct adapter *, struct listen_ctx *);
72237263Snp
73237263Snp/* lctx services */
74237263Snpstatic struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *,
75237263Snp    struct port_info *);
76237263Snpstatic int free_lctx(struct adapter *, struct listen_ctx *);
77237263Snpstatic void hold_lctx(struct listen_ctx *);
78237263Snpstatic void listen_hash_add(struct adapter *, struct listen_ctx *);
79237263Snpstatic struct listen_ctx *listen_hash_find(struct adapter *, struct inpcb *);
80237263Snpstatic struct listen_ctx *listen_hash_del(struct adapter *, struct inpcb *);
81237263Snpstatic struct inpcb *release_lctx(struct adapter *, struct listen_ctx *);
82237263Snp
83237263Snpstatic inline void save_qids_in_mbuf(struct mbuf *, struct port_info *);
84237263Snpstatic inline void get_qids_from_mbuf(struct mbuf *m, int *, int *);
85237263Snpstatic void send_reset_synqe(struct toedev *, struct synq_entry *);
86237263Snp
87237263Snpstatic int
88245276Snpalloc_stid(struct adapter *sc, struct listen_ctx *lctx, int isipv6)
89237263Snp{
90237263Snp	struct tid_info *t = &sc->tids;
91245276Snp	u_int stid, n, f, mask;
92245276Snp	struct stid_region *sr = &lctx->stid_region;
93237263Snp
94245276Snp	/*
95245276Snp	 * An IPv6 server needs 2 naturally aligned stids (1 stid = 4 cells) in
96245276Snp	 * the TCAM.  The start of the stid region is properly aligned (the chip
97245276Snp	 * requires each region to be 128-cell aligned).
98245276Snp	 */
99245276Snp	n = isipv6 ? 2 : 1;
100245276Snp	mask = n - 1;
101245276Snp	KASSERT((t->stid_base & mask) == 0 && (t->nstids & mask) == 0,
102245276Snp	    ("%s: stid region (%u, %u) not properly aligned.  n = %u",
103245276Snp	    __func__, t->stid_base, t->nstids, n));
104245276Snp
105237263Snp	mtx_lock(&t->stid_lock);
106245276Snp	if (n > t->nstids - t->stids_in_use) {
107245276Snp		mtx_unlock(&t->stid_lock);
108245276Snp		return (-1);
109245276Snp	}
110237263Snp
111245276Snp	if (t->nstids_free_head >= n) {
112245276Snp		/*
113245276Snp		 * This allocation will definitely succeed because the region
114245276Snp		 * starts at a good alignment and we just checked we have enough
115245276Snp		 * stids free.
116245276Snp		 */
117245276Snp		f = t->nstids_free_head & mask;
118245276Snp		t->nstids_free_head -= n + f;
119245276Snp		stid = t->nstids_free_head;
120245276Snp		TAILQ_INSERT_HEAD(&t->stids, sr, link);
121245276Snp	} else {
122245276Snp		struct stid_region *s;
123245276Snp
124245276Snp		stid = t->nstids_free_head;
125245276Snp		TAILQ_FOREACH(s, &t->stids, link) {
126245276Snp			stid += s->used + s->free;
127245276Snp			f = stid & mask;
128251518Snp			if (s->free >= n + f) {
129245276Snp				stid -= n + f;
130245276Snp				s->free -= n + f;
131245276Snp				TAILQ_INSERT_AFTER(&t->stids, s, sr, link);
132245276Snp				goto allocated;
133245276Snp			}
134245276Snp		}
135245276Snp
136245276Snp		if (__predict_false(stid != t->nstids)) {
137245276Snp			panic("%s: stids TAILQ (%p) corrupt."
138245276Snp			    "  At %d instead of %d at the end of the queue.",
139245276Snp			    __func__, &t->stids, stid, t->nstids);
140245276Snp		}
141245276Snp
142245276Snp		mtx_unlock(&t->stid_lock);
143245276Snp		return (-1);
144237263Snp	}
145245276Snp
146245276Snpallocated:
147245276Snp	sr->used = n;
148245276Snp	sr->free = f;
149245276Snp	t->stids_in_use += n;
150245276Snp	t->stid_tab[stid] = lctx;
151237263Snp	mtx_unlock(&t->stid_lock);
152245276Snp
153245276Snp	KASSERT(((stid + t->stid_base) & mask) == 0,
154245276Snp	    ("%s: EDOOFUS.", __func__));
155245276Snp	return (stid + t->stid_base);
156237263Snp}
157237263Snp
158245276Snpstatic struct listen_ctx *
159237263Snplookup_stid(struct adapter *sc, int stid)
160237263Snp{
161237263Snp	struct tid_info *t = &sc->tids;
162237263Snp
163245276Snp	return (t->stid_tab[stid - t->stid_base]);
164237263Snp}
165237263Snp
166237263Snpstatic void
167245276Snpfree_stid(struct adapter *sc, struct listen_ctx *lctx)
168237263Snp{
169237263Snp	struct tid_info *t = &sc->tids;
170245276Snp	struct stid_region *sr = &lctx->stid_region;
171245276Snp	struct stid_region *s;
172237263Snp
173245276Snp	KASSERT(sr->used > 0, ("%s: nonsense free (%d)", __func__, sr->used));
174245276Snp
175237263Snp	mtx_lock(&t->stid_lock);
176245276Snp	s = TAILQ_PREV(sr, stid_head, link);
177245276Snp	if (s != NULL)
178245276Snp		s->free += sr->used + sr->free;
179245276Snp	else
180245276Snp		t->nstids_free_head += sr->used + sr->free;
181245276Snp	KASSERT(t->stids_in_use >= sr->used,
182245276Snp	    ("%s: stids_in_use (%u) < stids being freed (%u)", __func__,
183245276Snp	    t->stids_in_use, sr->used));
184245276Snp	t->stids_in_use -= sr->used;
185245276Snp	TAILQ_REMOVE(&t->stids, sr, link);
186237263Snp	mtx_unlock(&t->stid_lock);
187237263Snp}
188237263Snp
189237263Snpstatic struct listen_ctx *
190237263Snpalloc_lctx(struct adapter *sc, struct inpcb *inp, struct port_info *pi)
191237263Snp{
192237263Snp	struct listen_ctx *lctx;
193237263Snp
194237263Snp	INP_WLOCK_ASSERT(inp);
195237263Snp
196237263Snp	lctx = malloc(sizeof(struct listen_ctx), M_CXGBE, M_NOWAIT | M_ZERO);
197237263Snp	if (lctx == NULL)
198237263Snp		return (NULL);
199237263Snp
200245468Snp	lctx->stid = alloc_stid(sc, lctx, inp->inp_vflag & INP_IPV6);
201237263Snp	if (lctx->stid < 0) {
202237263Snp		free(lctx, M_CXGBE);
203237263Snp		return (NULL);
204237263Snp	}
205237263Snp
206259804Snp	if (inp->inp_vflag & INP_IPV6 &&
207259804Snp	    !IN6_ARE_ADDR_EQUAL(&in6addr_any, &inp->in6p_laddr)) {
208259804Snp		struct tom_data *td = sc->tom_softc;
209259804Snp
210259804Snp		lctx->ce = hold_lip(td, &inp->in6p_laddr);
211259804Snp		if (lctx->ce == NULL) {
212259804Snp			free(lctx, M_CXGBE);
213259804Snp			return (NULL);
214259804Snp		}
215259804Snp	}
216259804Snp
217237263Snp	lctx->ctrlq = &sc->sge.ctrlq[pi->port_id];
218237263Snp	lctx->ofld_rxq = &sc->sge.ofld_rxq[pi->first_ofld_rxq];
219237263Snp	refcount_init(&lctx->refcount, 1);
220237263Snp	TAILQ_INIT(&lctx->synq);
221237263Snp
222237263Snp	lctx->inp = inp;
223237263Snp	in_pcbref(inp);
224237263Snp
225237263Snp	return (lctx);
226237263Snp}
227237263Snp
228237263Snp/* Don't call this directly, use release_lctx instead */
229237263Snpstatic int
230237263Snpfree_lctx(struct adapter *sc, struct listen_ctx *lctx)
231237263Snp{
232237263Snp	struct inpcb *inp = lctx->inp;
233259804Snp	struct tom_data *td = sc->tom_softc;
234237263Snp
235237263Snp	INP_WLOCK_ASSERT(inp);
236237263Snp	KASSERT(lctx->refcount == 0,
237237263Snp	    ("%s: refcount %d", __func__, lctx->refcount));
238237263Snp	KASSERT(TAILQ_EMPTY(&lctx->synq),
239237263Snp	    ("%s: synq not empty.", __func__));
240237263Snp	KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid));
241237263Snp
242237263Snp	CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p",
243237263Snp	    __func__, lctx->stid, lctx, lctx->inp);
244237263Snp
245259804Snp	if (lctx->ce)
246259804Snp		release_lip(td, lctx->ce);
247245276Snp	free_stid(sc, lctx);
248237263Snp	free(lctx, M_CXGBE);
249237263Snp
250237263Snp	return (in_pcbrele_wlocked(inp));
251237263Snp}
252237263Snp
253237263Snpstatic void
254237263Snphold_lctx(struct listen_ctx *lctx)
255237263Snp{
256237263Snp
257237263Snp	refcount_acquire(&lctx->refcount);
258237263Snp}
259237263Snp
260237263Snpstatic inline uint32_t
261237263Snplisten_hashfn(void *key, u_long mask)
262237263Snp{
263237263Snp
264237263Snp	return (fnv_32_buf(&key, sizeof(key), FNV1_32_INIT) & mask);
265237263Snp}
266237263Snp
267237263Snp/*
268237263Snp * Add a listen_ctx entry to the listen hash table.
269237263Snp */
270237263Snpstatic void
271237263Snplisten_hash_add(struct adapter *sc, struct listen_ctx *lctx)
272237263Snp{
273237263Snp	struct tom_data *td = sc->tom_softc;
274237263Snp	int bucket = listen_hashfn(lctx->inp, td->listen_mask);
275237263Snp
276237263Snp	mtx_lock(&td->lctx_hash_lock);
277237263Snp	LIST_INSERT_HEAD(&td->listen_hash[bucket], lctx, link);
278237263Snp	td->lctx_count++;
279237263Snp	mtx_unlock(&td->lctx_hash_lock);
280237263Snp}
281237263Snp
282237263Snp/*
283237263Snp * Look for the listening socket's context entry in the hash and return it.
284237263Snp */
285237263Snpstatic struct listen_ctx *
286237263Snplisten_hash_find(struct adapter *sc, struct inpcb *inp)
287237263Snp{
288237263Snp	struct tom_data *td = sc->tom_softc;
289237263Snp	int bucket = listen_hashfn(inp, td->listen_mask);
290237263Snp	struct listen_ctx *lctx;
291237263Snp
292237263Snp	mtx_lock(&td->lctx_hash_lock);
293237263Snp	LIST_FOREACH(lctx, &td->listen_hash[bucket], link) {
294237263Snp		if (lctx->inp == inp)
295237263Snp			break;
296237263Snp	}
297237263Snp	mtx_unlock(&td->lctx_hash_lock);
298237263Snp
299237263Snp	return (lctx);
300237263Snp}
301237263Snp
302237263Snp/*
303237263Snp * Removes the listen_ctx structure for inp from the hash and returns it.
304237263Snp */
305237263Snpstatic struct listen_ctx *
306237263Snplisten_hash_del(struct adapter *sc, struct inpcb *inp)
307237263Snp{
308237263Snp	struct tom_data *td = sc->tom_softc;
309237263Snp	int bucket = listen_hashfn(inp, td->listen_mask);
310237263Snp	struct listen_ctx *lctx, *l;
311237263Snp
312237263Snp	mtx_lock(&td->lctx_hash_lock);
313237263Snp	LIST_FOREACH_SAFE(lctx, &td->listen_hash[bucket], link, l) {
314237263Snp		if (lctx->inp == inp) {
315237263Snp			LIST_REMOVE(lctx, link);
316237263Snp			td->lctx_count--;
317237263Snp			break;
318237263Snp		}
319237263Snp	}
320237263Snp	mtx_unlock(&td->lctx_hash_lock);
321237263Snp
322237263Snp	return (lctx);
323237263Snp}
324237263Snp
325237263Snp/*
326237263Snp * Releases a hold on the lctx.  Must be called with the listening socket's inp
327237263Snp * locked.  The inp may be freed by this function and it returns NULL to
328237263Snp * indicate this.
329237263Snp */
330237263Snpstatic struct inpcb *
331237263Snprelease_lctx(struct adapter *sc, struct listen_ctx *lctx)
332237263Snp{
333237263Snp	struct inpcb *inp = lctx->inp;
334237263Snp	int inp_freed = 0;
335237263Snp
336237263Snp	INP_WLOCK_ASSERT(inp);
337237263Snp	if (refcount_release(&lctx->refcount))
338237263Snp		inp_freed = free_lctx(sc, lctx);
339237263Snp
340237263Snp	return (inp_freed ? NULL : inp);
341237263Snp}
342237263Snp
343237263Snpstatic void
344237263Snpsend_reset_synqe(struct toedev *tod, struct synq_entry *synqe)
345237263Snp{
346237263Snp	struct adapter *sc = tod->tod_softc;
347237263Snp	struct mbuf *m = synqe->syn;
348237263Snp	struct ifnet *ifp = m->m_pkthdr.rcvif;
349237263Snp	struct port_info *pi = ifp->if_softc;
350237263Snp	struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx];
351241626Snp	struct wrqe *wr;
352241626Snp	struct fw_flowc_wr *flowc;
353237263Snp	struct cpl_abort_req *req;
354237263Snp	int txqid, rxqid, flowclen;
355237263Snp	struct sge_wrq *ofld_txq;
356237263Snp	struct sge_ofld_rxq *ofld_rxq;
357241642Snp	const int nparams = 6;
358237263Snp	unsigned int pfvf = G_FW_VIID_PFN(pi->viid) << S_FW_VIID_PFN;
359237263Snp
360237263Snp	INP_WLOCK_ASSERT(synqe->lctx->inp);
361237263Snp
362243680Snp	CTR5(KTR_CXGBE, "%s: synqe %p (0x%x), tid %d%s",
363243680Snp	    __func__, synqe, synqe->flags, synqe->tid,
364239514Snp	    synqe->flags & TPF_ABORT_SHUTDOWN ?
365237263Snp	    " (abort already in progress)" : "");
366239514Snp	if (synqe->flags & TPF_ABORT_SHUTDOWN)
367237263Snp		return;	/* abort already in progress */
368239514Snp	synqe->flags |= TPF_ABORT_SHUTDOWN;
369237263Snp
370237263Snp	get_qids_from_mbuf(m, &txqid, &rxqid);
371237263Snp	ofld_txq = &sc->sge.ofld_txq[txqid];
372237263Snp	ofld_rxq = &sc->sge.ofld_rxq[rxqid];
373237263Snp
374237263Snp	/* The wrqe will have two WRs - a flowc followed by an abort_req */
375237263Snp	flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval);
376237263Snp
377248925Snp	wr = alloc_wrqe(roundup2(flowclen, EQ_ESIZE) + sizeof(*req), ofld_txq);
378237263Snp	if (wr == NULL) {
379237263Snp		/* XXX */
380237263Snp		panic("%s: allocation failure.", __func__);
381237263Snp	}
382237263Snp	flowc = wrtod(wr);
383248925Snp	req = (void *)((caddr_t)flowc + roundup2(flowclen, EQ_ESIZE));
384237263Snp
385237263Snp	/* First the flowc ... */
386237263Snp	memset(flowc, 0, wr->wr_len);
387237263Snp	flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
388237263Snp	    V_FW_FLOWC_WR_NPARAMS(nparams));
389237263Snp	flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) |
390237263Snp	    V_FW_WR_FLOWID(synqe->tid));
391237263Snp	flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
392241626Snp	flowc->mnemval[0].val = htobe32(pfvf);
393241626Snp	flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
394241626Snp	flowc->mnemval[1].val = htobe32(pi->tx_chan);
395241626Snp	flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
396241626Snp	flowc->mnemval[2].val = htobe32(pi->tx_chan);
397241626Snp	flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
398241626Snp	flowc->mnemval[3].val = htobe32(ofld_rxq->iq.abs_id);
399241642Snp 	flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF;
400241642Snp 	flowc->mnemval[4].val = htobe32(512);
401241642Snp 	flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS;
402241642Snp 	flowc->mnemval[5].val = htobe32(512);
403239514Snp	synqe->flags |= TPF_FLOWC_WR_SENT;
404237263Snp
405237263Snp	/* ... then ABORT request */
406237263Snp	INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, synqe->tid);
407237263Snp	req->rsvd0 = 0;	/* don't have a snd_nxt */
408237263Snp	req->rsvd1 = 1;	/* no data sent yet */
409237263Snp	req->cmd = CPL_ABORT_SEND_RST;
410237263Snp
411237263Snp	t4_l2t_send(sc, wr, e);
412237263Snp}
413237263Snp
414237263Snpstatic int
415237263Snpcreate_server(struct adapter *sc, struct listen_ctx *lctx)
416237263Snp{
417237263Snp	struct wrqe *wr;
418237263Snp	struct cpl_pass_open_req *req;
419245468Snp	struct inpcb *inp = lctx->inp;
420237263Snp
421237263Snp	wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
422237263Snp	if (wr == NULL) {
423237263Snp		log(LOG_ERR, "%s: allocation failure", __func__);
424237263Snp		return (ENOMEM);
425237263Snp	}
426237263Snp	req = wrtod(wr);
427237263Snp
428237263Snp	INIT_TP_WR(req, 0);
429237263Snp	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid));
430245468Snp	req->local_port = inp->inp_lport;
431237263Snp	req->peer_port = 0;
432245468Snp	req->local_ip = inp->inp_laddr.s_addr;
433237263Snp	req->peer_ip = 0;
434237263Snp	req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
435237263Snp	req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
436237263Snp	    F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
437237263Snp
438237263Snp	t4_wrq_tx(sc, wr);
439237263Snp	return (0);
440237263Snp}
441237263Snp
442237263Snpstatic int
443245468Snpcreate_server6(struct adapter *sc, struct listen_ctx *lctx)
444245468Snp{
445245468Snp	struct wrqe *wr;
446245468Snp	struct cpl_pass_open_req6 *req;
447245468Snp	struct inpcb *inp = lctx->inp;
448245468Snp
449245468Snp	wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
450245468Snp	if (wr == NULL) {
451245468Snp		log(LOG_ERR, "%s: allocation failure", __func__);
452245468Snp		return (ENOMEM);
453245468Snp	}
454245468Snp	req = wrtod(wr);
455245468Snp
456245468Snp	INIT_TP_WR(req, 0);
457245468Snp	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, lctx->stid));
458245468Snp	req->local_port = inp->inp_lport;
459245468Snp	req->peer_port = 0;
460245468Snp	req->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0];
461245468Snp	req->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8];
462245468Snp	req->peer_ip_hi = 0;
463245468Snp	req->peer_ip_lo = 0;
464245468Snp	req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
465245468Snp	req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
466245468Snp	    F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
467245468Snp
468245468Snp	t4_wrq_tx(sc, wr);
469245468Snp	return (0);
470245468Snp}
471245468Snp
472245468Snpstatic int
473237263Snpdestroy_server(struct adapter *sc, struct listen_ctx *lctx)
474237263Snp{
475237263Snp	struct wrqe *wr;
476237263Snp	struct cpl_close_listsvr_req *req;
477237263Snp
478237263Snp	wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
479237263Snp	if (wr == NULL) {
480237263Snp		/* XXX */
481237263Snp		panic("%s: allocation failure.", __func__);
482237263Snp	}
483237263Snp	req = wrtod(wr);
484237263Snp
485237263Snp	INIT_TP_WR(req, 0);
486237263Snp	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ,
487237263Snp	    lctx->stid));
488237263Snp	req->reply_ctrl = htobe16(lctx->ofld_rxq->iq.abs_id);
489237263Snp	req->rsvd = htobe16(0);
490237263Snp
491237263Snp	t4_wrq_tx(sc, wr);
492237263Snp	return (0);
493237263Snp}
494237263Snp
495237263Snp/*
496237263Snp * Start a listening server by sending a passive open request to HW.
497237263Snp *
498237263Snp * Can't take adapter lock here and access to sc->flags, sc->open_device_map,
499237263Snp * sc->offload_map, if_capenable are all race prone.
500237263Snp */
501237263Snpint
502237263Snpt4_listen_start(struct toedev *tod, struct tcpcb *tp)
503237263Snp{
504237263Snp	struct adapter *sc = tod->tod_softc;
505237263Snp	struct port_info *pi;
506237263Snp	struct inpcb *inp = tp->t_inpcb;
507237263Snp	struct listen_ctx *lctx;
508245468Snp	int i, rc;
509237263Snp
510237263Snp	INP_WLOCK_ASSERT(inp);
511237263Snp
512259804Snp	/* Don't start a hardware listener for any loopback address. */
513259804Snp	if (inp->inp_vflag & INP_IPV6 && IN6_IS_ADDR_LOOPBACK(&inp->in6p_laddr))
514259804Snp		return (0);
515259804Snp	if (!(inp->inp_vflag & INP_IPV6) &&
516259804Snp	    IN_LOOPBACK(ntohl(inp->inp_laddr.s_addr)))
517259804Snp		return (0);
518237263Snp#if 0
519237263Snp	ADAPTER_LOCK(sc);
520237263Snp	if (IS_BUSY(sc)) {
521237263Snp		log(LOG_ERR, "%s: listen request ignored, %s is busy",
522237263Snp		    __func__, device_get_nameunit(sc->dev));
523237263Snp		goto done;
524237263Snp	}
525237263Snp
526237263Snp	KASSERT(sc->flags & TOM_INIT_DONE,
527237263Snp	    ("%s: TOM not initialized", __func__));
528237263Snp#endif
529237263Snp
530237263Snp	if ((sc->open_device_map & sc->offload_map) == 0)
531237263Snp		goto done;	/* no port that's UP with IFCAP_TOE enabled */
532237263Snp
533237263Snp	/*
534245468Snp	 * Find a running port with IFCAP_TOE (4 or 6).  We'll use the first
535245468Snp	 * such port's queues to send the passive open and receive the reply to
536245468Snp	 * it.
537237263Snp	 *
538237263Snp	 * XXX: need a way to mark a port in use by offload.  if_cxgbe should
539237263Snp	 * then reject any attempt to bring down such a port (and maybe reject
540237263Snp	 * attempts to disable IFCAP_TOE on that port too?).
541237263Snp	 */
542237263Snp	for_each_port(sc, i) {
543237263Snp		if (isset(&sc->open_device_map, i) &&
544245468Snp		    sc->port[i]->ifp->if_capenable & IFCAP_TOE)
545237263Snp				break;
546237263Snp	}
547237263Snp	KASSERT(i < sc->params.nports,
548237263Snp	    ("%s: no running port with TOE capability enabled.", __func__));
549237263Snp	pi = sc->port[i];
550237263Snp
551237263Snp	if (listen_hash_find(sc, inp) != NULL)
552237263Snp		goto done;	/* already setup */
553237263Snp
554237263Snp	lctx = alloc_lctx(sc, inp, pi);
555237263Snp	if (lctx == NULL) {
556237263Snp		log(LOG_ERR,
557237263Snp		    "%s: listen request ignored, %s couldn't allocate lctx\n",
558237263Snp		    __func__, device_get_nameunit(sc->dev));
559237263Snp		goto done;
560237263Snp	}
561237263Snp	listen_hash_add(sc, lctx);
562237263Snp
563245468Snp	CTR6(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p vflag 0x%x",
564245468Snp	    __func__, lctx->stid, tcpstates[tp->t_state], lctx, inp,
565245468Snp	    inp->inp_vflag);
566237263Snp
567245468Snp	if (inp->inp_vflag & INP_IPV6)
568245468Snp		rc = create_server6(sc, lctx);
569245468Snp	else
570245468Snp		rc = create_server(sc, lctx);
571245468Snp	if (rc != 0) {
572245468Snp		log(LOG_ERR, "%s: %s failed to create hw listener: %d.\n",
573245468Snp		    __func__, device_get_nameunit(sc->dev), rc);
574237263Snp		(void) listen_hash_del(sc, inp);
575237263Snp		inp = release_lctx(sc, lctx);
576237263Snp		/* can't be freed, host stack has a reference */
577237263Snp		KASSERT(inp != NULL, ("%s: inp freed", __func__));
578237263Snp		goto done;
579237263Snp	}
580237263Snp	lctx->flags |= LCTX_RPL_PENDING;
581237263Snpdone:
582237263Snp#if 0
583237263Snp	ADAPTER_UNLOCK(sc);
584237263Snp#endif
585237263Snp	return (0);
586237263Snp}
587237263Snp
588237263Snpint
589237263Snpt4_listen_stop(struct toedev *tod, struct tcpcb *tp)
590237263Snp{
591237263Snp	struct listen_ctx *lctx;
592237263Snp	struct adapter *sc = tod->tod_softc;
593237263Snp	struct inpcb *inp = tp->t_inpcb;
594237263Snp	struct synq_entry *synqe;
595237263Snp
596237263Snp	INP_WLOCK_ASSERT(inp);
597237263Snp
598237263Snp	lctx = listen_hash_del(sc, inp);
599237263Snp	if (lctx == NULL)
600237263Snp		return (ENOENT);	/* no hardware listener for this inp */
601237263Snp
602237263Snp	CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, flags %x", __func__, lctx->stid,
603237263Snp	    lctx, lctx->flags);
604237263Snp
605237263Snp	/*
606237263Snp	 * If the reply to the PASS_OPEN is still pending we'll wait for it to
607237263Snp	 * arrive and clean up when it does.
608237263Snp	 */
609237263Snp	if (lctx->flags & LCTX_RPL_PENDING) {
610237263Snp		KASSERT(TAILQ_EMPTY(&lctx->synq),
611237263Snp		    ("%s: synq not empty.", __func__));
612237263Snp		return (EINPROGRESS);
613237263Snp	}
614237263Snp
615237263Snp	/*
616237263Snp	 * The host stack will abort all the connections on the listening
617237263Snp	 * socket's so_comp.  It doesn't know about the connections on the synq
618237263Snp	 * so we need to take care of those.
619237263Snp	 */
620243680Snp	TAILQ_FOREACH(synqe, &lctx->synq, link) {
621243680Snp		if (synqe->flags & TPF_SYNQE_HAS_L2TE)
622243680Snp			send_reset_synqe(tod, synqe);
623243680Snp	}
624237263Snp
625237263Snp	destroy_server(sc, lctx);
626237263Snp	return (0);
627237263Snp}
628237263Snp
629237263Snpstatic inline void
630237263Snphold_synqe(struct synq_entry *synqe)
631237263Snp{
632237263Snp
633237263Snp	refcount_acquire(&synqe->refcnt);
634237263Snp}
635237263Snp
636237263Snpstatic inline void
637237263Snprelease_synqe(struct synq_entry *synqe)
638237263Snp{
639237263Snp
640237263Snp	if (refcount_release(&synqe->refcnt)) {
641239514Snp		int needfree = synqe->flags & TPF_SYNQE_NEEDFREE;
642237263Snp
643237263Snp		m_freem(synqe->syn);
644237263Snp		if (needfree)
645237263Snp			free(synqe, M_CXGBE);
646237263Snp	}
647237263Snp}
648237263Snp
649237263Snpvoid
650237263Snpt4_syncache_added(struct toedev *tod __unused, void *arg)
651237263Snp{
652237263Snp	struct synq_entry *synqe = arg;
653237263Snp
654237263Snp	hold_synqe(synqe);
655237263Snp}
656237263Snp
657237263Snpvoid
658237263Snpt4_syncache_removed(struct toedev *tod __unused, void *arg)
659237263Snp{
660237263Snp	struct synq_entry *synqe = arg;
661237263Snp
662237263Snp	release_synqe(synqe);
663237263Snp}
664237263Snp
665237263Snp/* XXX */
666237263Snpextern void tcp_dooptions(struct tcpopt *, u_char *, int, int);
667237263Snp
668237263Snpint
669237263Snpt4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m)
670237263Snp{
671237263Snp	struct adapter *sc = tod->tod_softc;
672237263Snp	struct synq_entry *synqe = arg;
673237263Snp	struct wrqe *wr;
674237263Snp	struct l2t_entry *e;
675237263Snp	struct tcpopt to;
676237263Snp	struct ip *ip = mtod(m, struct ip *);
677245468Snp	struct tcphdr *th;
678237263Snp
679237263Snp	wr = (struct wrqe *)atomic_readandclear_ptr(&synqe->wr);
680243110Snp	if (wr == NULL) {
681243110Snp		m_freem(m);
682237263Snp		return (EALREADY);
683243110Snp	}
684237263Snp
685245468Snp	if (ip->ip_v == IPVERSION)
686245468Snp		th = (void *)(ip + 1);
687245468Snp	else
688245468Snp		th = (void *)((struct ip6_hdr *)ip + 1);
689237263Snp	bzero(&to, sizeof(to));
690237263Snp	tcp_dooptions(&to, (void *)(th + 1), (th->th_off << 2) - sizeof(*th),
691237263Snp	    TO_SYN);
692237263Snp
693237263Snp	/* save these for later */
694237263Snp	synqe->iss = be32toh(th->th_seq);
695237263Snp	synqe->ts = to.to_tsval;
696237263Snp
697252711Snp	if (is_t5(sc)) {
698252711Snp		struct cpl_t5_pass_accept_rpl *rpl5 = wrtod(wr);
699252711Snp
700252711Snp		rpl5->iss = th->th_seq;
701252711Snp	}
702252711Snp
703237263Snp	e = &sc->l2t->l2tab[synqe->l2e_idx];
704237263Snp	t4_l2t_send(sc, wr, e);
705237263Snp
706237263Snp	m_freem(m);	/* don't need this any more */
707237263Snp	return (0);
708237263Snp}
709237263Snp
710237263Snpstatic int
711237263Snpdo_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss,
712237263Snp    struct mbuf *m)
713237263Snp{
714237263Snp	struct adapter *sc = iq->adapter;
715237263Snp	const struct cpl_pass_open_rpl *cpl = (const void *)(rss + 1);
716237263Snp	int stid = GET_TID(cpl);
717237263Snp	unsigned int status = cpl->status;
718237263Snp	struct listen_ctx *lctx = lookup_stid(sc, stid);
719237263Snp	struct inpcb *inp = lctx->inp;
720237263Snp#ifdef INVARIANTS
721237263Snp	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
722237263Snp#endif
723237263Snp
724237263Snp	KASSERT(opcode == CPL_PASS_OPEN_RPL,
725237263Snp	    ("%s: unexpected opcode 0x%x", __func__, opcode));
726237263Snp	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
727237263Snp	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
728237263Snp
729237263Snp	INP_WLOCK(inp);
730237263Snp
731237263Snp	CTR4(KTR_CXGBE, "%s: stid %d, status %u, flags 0x%x",
732237263Snp	    __func__, stid, status, lctx->flags);
733237263Snp
734237263Snp	lctx->flags &= ~LCTX_RPL_PENDING;
735237263Snp
736237263Snp	if (status != CPL_ERR_NONE)
737245468Snp		log(LOG_ERR, "listener (stid %u) failed: %d\n", stid, status);
738237263Snp
739237263Snp#ifdef INVARIANTS
740237263Snp	/*
741237263Snp	 * If the inp has been dropped (listening socket closed) then
742237263Snp	 * listen_stop must have run and taken the inp out of the hash.
743237263Snp	 */
744237263Snp	if (inp->inp_flags & INP_DROPPED) {
745237263Snp		KASSERT(listen_hash_del(sc, inp) == NULL,
746237263Snp		    ("%s: inp %p still in listen hash", __func__, inp));
747237263Snp	}
748237263Snp#endif
749237263Snp
750237263Snp	if (inp->inp_flags & INP_DROPPED && status != CPL_ERR_NONE) {
751237263Snp		if (release_lctx(sc, lctx) != NULL)
752237263Snp			INP_WUNLOCK(inp);
753237263Snp		return (status);
754237263Snp	}
755237263Snp
756237263Snp	/*
757237263Snp	 * Listening socket stopped listening earlier and now the chip tells us
758237263Snp	 * it has started the hardware listener.  Stop it; the lctx will be
759237263Snp	 * released in do_close_server_rpl.
760237263Snp	 */
761237263Snp	if (inp->inp_flags & INP_DROPPED) {
762237263Snp		destroy_server(sc, lctx);
763237263Snp		INP_WUNLOCK(inp);
764237263Snp		return (status);
765237263Snp	}
766237263Snp
767237263Snp	/*
768237263Snp	 * Failed to start hardware listener.  Take inp out of the hash and
769237263Snp	 * release our reference on it.  An error message has been logged
770237263Snp	 * already.
771237263Snp	 */
772237263Snp	if (status != CPL_ERR_NONE) {
773237263Snp		listen_hash_del(sc, inp);
774237263Snp		if (release_lctx(sc, lctx) != NULL)
775237263Snp			INP_WUNLOCK(inp);
776237263Snp		return (status);
777237263Snp	}
778237263Snp
779237263Snp	/* hardware listener open for business */
780237263Snp
781237263Snp	INP_WUNLOCK(inp);
782237263Snp	return (status);
783237263Snp}
784237263Snp
785237263Snpstatic int
786237263Snpdo_close_server_rpl(struct sge_iq *iq, const struct rss_header *rss,
787237263Snp    struct mbuf *m)
788237263Snp{
789237263Snp	struct adapter *sc = iq->adapter;
790237263Snp	const struct cpl_close_listsvr_rpl *cpl = (const void *)(rss + 1);
791237263Snp	int stid = GET_TID(cpl);
792237263Snp	unsigned int status = cpl->status;
793237263Snp	struct listen_ctx *lctx = lookup_stid(sc, stid);
794237263Snp	struct inpcb *inp = lctx->inp;
795237263Snp#ifdef INVARIANTS
796237263Snp	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
797237263Snp#endif
798237263Snp
799237263Snp	KASSERT(opcode == CPL_CLOSE_LISTSRV_RPL,
800237263Snp	    ("%s: unexpected opcode 0x%x", __func__, opcode));
801237263Snp	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
802237263Snp	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
803237263Snp
804237263Snp	CTR3(KTR_CXGBE, "%s: stid %u, status %u", __func__, stid, status);
805237263Snp
806237263Snp	if (status != CPL_ERR_NONE) {
807245468Snp		log(LOG_ERR, "%s: failed (%u) to close listener for stid %u\n",
808237263Snp		    __func__, status, stid);
809237263Snp		return (status);
810237263Snp	}
811237263Snp
812237263Snp	INP_WLOCK(inp);
813237263Snp	inp = release_lctx(sc, lctx);
814237263Snp	if (inp != NULL)
815237263Snp		INP_WUNLOCK(inp);
816237263Snp
817237263Snp	return (status);
818237263Snp}
819237263Snp
820237263Snpstatic void
821237263Snpdone_with_synqe(struct adapter *sc, struct synq_entry *synqe)
822237263Snp{
823237263Snp	struct listen_ctx *lctx = synqe->lctx;
824237263Snp	struct inpcb *inp = lctx->inp;
825237263Snp	struct port_info *pi = synqe->syn->m_pkthdr.rcvif->if_softc;
826237263Snp	struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx];
827237263Snp
828237263Snp	INP_WLOCK_ASSERT(inp);
829237263Snp
830237263Snp	TAILQ_REMOVE(&lctx->synq, synqe, link);
831237263Snp	inp = release_lctx(sc, lctx);
832237263Snp	if (inp)
833237263Snp		INP_WUNLOCK(inp);
834237263Snp	remove_tid(sc, synqe->tid);
835237263Snp	release_tid(sc, synqe->tid, &sc->sge.ctrlq[pi->port_id]);
836237263Snp	t4_l2t_release(e);
837237263Snp	release_synqe(synqe);	/* removed from synq list */
838237263Snp}
839237263Snp
840237263Snpint
841237263Snpdo_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss,
842237263Snp    struct mbuf *m)
843237263Snp{
844237263Snp	struct adapter *sc = iq->adapter;
845237263Snp	const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1);
846237263Snp	unsigned int tid = GET_TID(cpl);
847237263Snp	struct synq_entry *synqe = lookup_tid(sc, tid);
848237263Snp	struct listen_ctx *lctx = synqe->lctx;
849237263Snp	struct inpcb *inp = lctx->inp;
850237263Snp	int txqid;
851237263Snp	struct sge_wrq *ofld_txq;
852237263Snp#ifdef INVARIANTS
853237263Snp	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
854237263Snp#endif
855237263Snp
856237263Snp	KASSERT(opcode == CPL_ABORT_REQ_RSS,
857237263Snp	    ("%s: unexpected opcode 0x%x", __func__, opcode));
858237263Snp	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
859237263Snp	KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
860237263Snp
861237263Snp	CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
862237263Snp	    __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
863237263Snp
864245935Snp	if (negative_advice(cpl->status))
865237263Snp		return (0);	/* Ignore negative advice */
866237263Snp
867237263Snp	INP_WLOCK(inp);
868237263Snp
869237263Snp	get_qids_from_mbuf(synqe->syn, &txqid, NULL);
870237263Snp	ofld_txq = &sc->sge.ofld_txq[txqid];
871237263Snp
872237263Snp	/*
873237263Snp	 * If we'd initiated an abort earlier the reply to it is responsible for
874237263Snp	 * cleaning up resources.  Otherwise we tear everything down right here
875237263Snp	 * right now.  We owe the T4 a CPL_ABORT_RPL no matter what.
876237263Snp	 */
877239514Snp	if (synqe->flags & TPF_ABORT_SHUTDOWN) {
878237263Snp		INP_WUNLOCK(inp);
879237263Snp		goto done;
880237263Snp	}
881237263Snp
882237263Snp	done_with_synqe(sc, synqe);
883237263Snp	/* inp lock released by done_with_synqe */
884237263Snpdone:
885237263Snp	send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
886237263Snp	return (0);
887237263Snp}
888237263Snp
889237263Snpint
890237263Snpdo_abort_rpl_synqe(struct sge_iq *iq, const struct rss_header *rss,
891237263Snp    struct mbuf *m)
892237263Snp{
893237263Snp	struct adapter *sc = iq->adapter;
894237263Snp	const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1);
895237263Snp	unsigned int tid = GET_TID(cpl);
896237263Snp	struct synq_entry *synqe = lookup_tid(sc, tid);
897237263Snp	struct listen_ctx *lctx = synqe->lctx;
898237263Snp	struct inpcb *inp = lctx->inp;
899237263Snp#ifdef INVARIANTS
900237263Snp	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
901237263Snp#endif
902237263Snp
903237263Snp	KASSERT(opcode == CPL_ABORT_RPL_RSS,
904237263Snp	    ("%s: unexpected opcode 0x%x", __func__, opcode));
905237263Snp	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
906237263Snp	KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
907237263Snp
908237263Snp	CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
909237263Snp	    __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
910237263Snp
911237263Snp	INP_WLOCK(inp);
912239514Snp	KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
913237263Snp	    ("%s: wasn't expecting abort reply for synqe %p (0x%x)",
914237263Snp	    __func__, synqe, synqe->flags));
915237263Snp
916237263Snp	done_with_synqe(sc, synqe);
917237263Snp	/* inp lock released by done_with_synqe */
918237263Snp
919237263Snp	return (0);
920237263Snp}
921237263Snp
922237263Snpvoid
923237263Snpt4_offload_socket(struct toedev *tod, void *arg, struct socket *so)
924237263Snp{
925237263Snp	struct adapter *sc = tod->tod_softc;
926237263Snp	struct synq_entry *synqe = arg;
927237263Snp#ifdef INVARIANTS
928237263Snp	struct inpcb *inp = sotoinpcb(so);
929237263Snp#endif
930237263Snp	struct cpl_pass_establish *cpl = mtod(synqe->syn, void *);
931237263Snp	struct toepcb *toep = *(struct toepcb **)(cpl + 1);
932237263Snp
933237263Snp	INP_INFO_LOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
934237263Snp	INP_WLOCK_ASSERT(inp);
935239514Snp	KASSERT(synqe->flags & TPF_SYNQE,
936237263Snp	    ("%s: %p not a synq_entry?", __func__, arg));
937237263Snp
938237263Snp	offload_socket(so, toep);
939237263Snp	make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt);
940239514Snp	toep->flags |= TPF_CPL_PENDING;
941237263Snp	update_tid(sc, synqe->tid, toep);
942239544Snp	synqe->flags |= TPF_SYNQE_EXPANDED;
943237263Snp}
944237263Snp
945237263Snpstatic inline void
946237263Snpsave_qids_in_mbuf(struct mbuf *m, struct port_info *pi)
947237263Snp{
948237263Snp	uint32_t txqid, rxqid;
949237263Snp
950237263Snp	txqid = (arc4random() % pi->nofldtxq) + pi->first_ofld_txq;
951237263Snp	rxqid = (arc4random() % pi->nofldrxq) + pi->first_ofld_rxq;
952237263Snp
953237263Snp	m->m_pkthdr.flowid = (txqid << 16) | (rxqid & 0xffff);
954237263Snp}
955237263Snp
956237263Snpstatic inline void
957237263Snpget_qids_from_mbuf(struct mbuf *m, int *txqid, int *rxqid)
958237263Snp{
959237263Snp
960237263Snp	if (txqid)
961237263Snp		*txqid = m->m_pkthdr.flowid >> 16;
962237263Snp	if (rxqid)
963237263Snp		*rxqid = m->m_pkthdr.flowid & 0xffff;
964237263Snp}
965237263Snp
966237263Snp/*
967237263Snp * Use the trailing space in the mbuf in which the PASS_ACCEPT_REQ arrived to
968237263Snp * store some state temporarily.
969237263Snp */
970237263Snpstatic struct synq_entry *
971237263Snpmbuf_to_synqe(struct mbuf *m)
972237263Snp{
973248925Snp	int len = roundup2(sizeof (struct synq_entry), 8);
974237263Snp	int tspace = M_TRAILINGSPACE(m);
975237263Snp	struct synq_entry *synqe = NULL;
976237263Snp
977237263Snp	if (tspace < len) {
978237263Snp		synqe = malloc(sizeof(*synqe), M_CXGBE, M_NOWAIT);
979237263Snp		if (synqe == NULL)
980237263Snp			return (NULL);
981239514Snp		synqe->flags = TPF_SYNQE | TPF_SYNQE_NEEDFREE;
982239514Snp	} else {
983245937Snp		synqe = (void *)(m->m_data + m->m_len + tspace - len);
984239514Snp		synqe->flags = TPF_SYNQE;
985239514Snp	}
986237263Snp
987237263Snp	return (synqe);
988237263Snp}
989237263Snp
990237263Snpstatic void
991237263Snpt4opt_to_tcpopt(const struct tcp_options *t4opt, struct tcpopt *to)
992237263Snp{
993237263Snp	bzero(to, sizeof(*to));
994237263Snp
995237263Snp	if (t4opt->mss) {
996237263Snp		to->to_flags |= TOF_MSS;
997237263Snp		to->to_mss = be16toh(t4opt->mss);
998237263Snp	}
999237263Snp
1000237263Snp	if (t4opt->wsf) {
1001237263Snp		to->to_flags |= TOF_SCALE;
1002237263Snp		to->to_wscale = t4opt->wsf;
1003237263Snp	}
1004237263Snp
1005237263Snp	if (t4opt->tstamp)
1006237263Snp		to->to_flags |= TOF_TS;
1007237263Snp
1008237263Snp	if (t4opt->sack)
1009237263Snp		to->to_flags |= TOF_SACKPERM;
1010237263Snp}
1011237263Snp
1012237263Snp/*
1013237263Snp * Options2 for passive open.
1014237263Snp */
1015237263Snpstatic uint32_t
1016237263Snpcalc_opt2p(struct adapter *sc, struct port_info *pi, int rxqid,
1017239344Snp    const struct tcp_options *tcpopt, struct tcphdr *th, int ulp_mode)
1018237263Snp{
1019237263Snp	struct sge_ofld_rxq *ofld_rxq = &sc->sge.ofld_rxq[rxqid];
1020249385Snp	uint32_t opt2;
1021237263Snp
1022249385Snp	opt2 = V_TX_QUEUE(sc->params.tp.tx_modq[pi->tx_chan]) |
1023249385Snp	    F_RSS_QUEUE_VALID | V_RSS_QUEUE(ofld_rxq->iq.abs_id);
1024249385Snp
1025237263Snp	if (V_tcp_do_rfc1323) {
1026237263Snp		if (tcpopt->tstamp)
1027237263Snp			opt2 |= F_TSTAMPS_EN;
1028237263Snp		if (tcpopt->sack)
1029237263Snp			opt2 |= F_SACK_EN;
1030255198Snp		if (tcpopt->wsf <= 14)
1031237263Snp			opt2 |= F_WND_SCALE_EN;
1032237263Snp	}
1033237263Snp
1034237263Snp	if (V_tcp_do_ecn && th->th_flags & (TH_ECE | TH_CWR))
1035237263Snp		opt2 |= F_CCTRL_ECN;
1036237263Snp
1037249385Snp	/* RX_COALESCE is always a valid value (0 or M_RX_COALESCE). */
1038248925Snp	if (is_t4(sc))
1039249385Snp		opt2 |= F_RX_COALESCE_VALID;
1040252711Snp	else {
1041249385Snp		opt2 |= F_T5_OPT_2_VALID;
1042252711Snp		opt2 |= F_CONG_CNTRL_VALID; /* OPT_2_ISS really, for T5 */
1043252711Snp	}
1044252728Snp	if (sc->tt.rx_coalesce)
1045252728Snp		opt2 |= V_RX_COALESCE(M_RX_COALESCE);
1046237263Snp
1047239344Snp#ifdef USE_DDP_RX_FLOW_CONTROL
1048239344Snp	if (ulp_mode == ULP_MODE_TCPDDP)
1049239344Snp		opt2 |= F_RX_FC_VALID | F_RX_FC_DDP;
1050239344Snp#endif
1051239344Snp
1052237263Snp	return htobe32(opt2);
1053237263Snp}
1054237263Snp
1055237263Snpstatic void
1056237263Snppass_accept_req_to_protohdrs(const struct mbuf *m, struct in_conninfo *inc,
1057237263Snp    struct tcphdr *th)
1058237263Snp{
1059237263Snp	const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
1060237263Snp	const struct ether_header *eh;
1061237263Snp	unsigned int hlen = be32toh(cpl->hdr_len);
1062245468Snp	uintptr_t l3hdr;
1063237263Snp	const struct tcphdr *tcp;
1064237263Snp
1065237263Snp	eh = (const void *)(cpl + 1);
1066245468Snp	l3hdr = ((uintptr_t)eh + G_ETH_HDR_LEN(hlen));
1067245468Snp	tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen));
1068237263Snp
1069237263Snp	if (inc) {
1070237263Snp		bzero(inc, sizeof(*inc));
1071237263Snp		inc->inc_fport = tcp->th_sport;
1072237263Snp		inc->inc_lport = tcp->th_dport;
1073245468Snp		if (((struct ip *)l3hdr)->ip_v == IPVERSION) {
1074245468Snp			const struct ip *ip = (const void *)l3hdr;
1075245468Snp
1076245468Snp			inc->inc_faddr = ip->ip_src;
1077245468Snp			inc->inc_laddr = ip->ip_dst;
1078245468Snp		} else {
1079245468Snp			const struct ip6_hdr *ip6 = (const void *)l3hdr;
1080245468Snp
1081237263Snp			inc->inc_flags |= INC_ISIPV6;
1082245468Snp			inc->inc6_faddr = ip6->ip6_src;
1083245468Snp			inc->inc6_laddr = ip6->ip6_dst;
1084245468Snp		}
1085237263Snp	}
1086237263Snp
1087237263Snp	if (th) {
1088237263Snp		bcopy(tcp, th, sizeof(*th));
1089237263Snp		tcp_fields_to_host(th);		/* just like tcp_input */
1090237263Snp	}
1091237263Snp}
1092237263Snp
1093245468Snpstatic int
1094245468Snpifnet_has_ip6(struct ifnet *ifp, struct in6_addr *ip6)
1095245468Snp{
1096245468Snp	struct ifaddr *ifa;
1097245468Snp	struct sockaddr_in6 *sin6;
1098245468Snp	int found = 0;
1099245468Snp	struct in6_addr in6 = *ip6;
1100245468Snp
1101245468Snp	/* Just as in ip6_input */
1102245468Snp	if (in6_clearscope(&in6) || in6_clearscope(&in6))
1103245468Snp		return (0);
1104245468Snp	in6_setscope(&in6, ifp, NULL);
1105245468Snp
1106245468Snp	if_addr_rlock(ifp);
1107245468Snp	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1108245468Snp		sin6 = (void *)ifa->ifa_addr;
1109245468Snp		if (sin6->sin6_family != AF_INET6)
1110245468Snp			continue;
1111245468Snp
1112245468Snp		if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &in6)) {
1113245468Snp			found = 1;
1114245468Snp			break;
1115245468Snp		}
1116245468Snp	}
1117245468Snp	if_addr_runlock(ifp);
1118245468Snp
1119245468Snp	return (found);
1120245468Snp}
1121245468Snp
1122245468Snpstatic struct l2t_entry *
1123245468Snpget_l2te_for_nexthop(struct port_info *pi, struct ifnet *ifp,
1124245468Snp    struct in_conninfo *inc)
1125245468Snp{
1126245468Snp	struct rtentry *rt;
1127245468Snp	struct l2t_entry *e;
1128245468Snp	struct sockaddr_in6 sin6;
1129245468Snp	struct sockaddr *dst = (void *)&sin6;
1130245468Snp
1131245468Snp	if (inc->inc_flags & INC_ISIPV6) {
1132245468Snp		dst->sa_len = sizeof(struct sockaddr_in6);
1133245468Snp		dst->sa_family = AF_INET6;
1134245468Snp		((struct sockaddr_in6 *)dst)->sin6_addr = inc->inc6_faddr;
1135245468Snp
1136245468Snp		if (IN6_IS_ADDR_LINKLOCAL(&inc->inc6_laddr)) {
1137245468Snp			/* no need for route lookup */
1138245468Snp			e = t4_l2t_get(pi, ifp, dst);
1139245468Snp			return (e);
1140245468Snp		}
1141245468Snp	} else {
1142245468Snp		dst->sa_len = sizeof(struct sockaddr_in);
1143245468Snp		dst->sa_family = AF_INET;
1144245468Snp		((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr;
1145245468Snp	}
1146245468Snp
1147245468Snp	rt = rtalloc1(dst, 0, 0);
1148245468Snp	if (rt == NULL)
1149245468Snp		return (NULL);
1150245468Snp	else {
1151245468Snp		struct sockaddr *nexthop;
1152245468Snp
1153245468Snp		RT_UNLOCK(rt);
1154245468Snp		if (rt->rt_ifp != ifp)
1155245468Snp			e = NULL;
1156245468Snp		else {
1157245468Snp			if (rt->rt_flags & RTF_GATEWAY)
1158245468Snp				nexthop = rt->rt_gateway;
1159245468Snp			else
1160245468Snp				nexthop = dst;
1161245468Snp			e = t4_l2t_get(pi, ifp, nexthop);
1162245468Snp		}
1163245468Snp		RTFREE(rt);
1164245468Snp	}
1165245468Snp
1166245468Snp	return (e);
1167245468Snp}
1168245468Snp
1169245468Snpstatic int
1170245468Snpifnet_has_ip(struct ifnet *ifp, struct in_addr in)
1171245468Snp{
1172245468Snp	struct ifaddr *ifa;
1173245468Snp	struct sockaddr_in *sin;
1174245468Snp	int found = 0;
1175245468Snp
1176245468Snp	if_addr_rlock(ifp);
1177245468Snp	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1178245468Snp		sin = (void *)ifa->ifa_addr;
1179245468Snp		if (sin->sin_family != AF_INET)
1180245468Snp			continue;
1181245468Snp
1182245468Snp		if (sin->sin_addr.s_addr == in.s_addr) {
1183245468Snp			found = 1;
1184245468Snp			break;
1185245468Snp		}
1186245468Snp	}
1187245468Snp	if_addr_runlock(ifp);
1188245468Snp
1189245468Snp	return (found);
1190245468Snp}
1191245468Snp
1192237263Snp#define REJECT_PASS_ACCEPT()	do { \
1193237263Snp	reject_reason = __LINE__; \
1194237263Snp	goto reject; \
1195237263Snp} while (0)
1196237263Snp
1197237263Snp/*
1198237263Snp * The context associated with a tid entry via insert_tid could be a synq_entry
1199237263Snp * or a toepcb.  The only way CPL handlers can tell is via a bit in these flags.
1200237263Snp */
1201237263SnpCTASSERT(offsetof(struct toepcb, flags) == offsetof(struct synq_entry, flags));
1202237263Snp
1203237263Snp/*
1204237263Snp * Incoming SYN on a listening socket.
1205237263Snp *
1206237263Snp * XXX: Every use of ifp in this routine has a bad race with up/down, toe/-toe,
1207237263Snp * etc.
1208237263Snp */
1209237263Snpstatic int
1210237263Snpdo_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
1211237263Snp    struct mbuf *m)
1212237263Snp{
1213237263Snp	struct adapter *sc = iq->adapter;
1214237263Snp	struct toedev *tod;
1215237263Snp	const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
1216237263Snp	struct cpl_pass_accept_rpl *rpl;
1217237263Snp	struct wrqe *wr;
1218237263Snp	unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
1219237263Snp	unsigned int tid = GET_TID(cpl);
1220237263Snp	struct listen_ctx *lctx = lookup_stid(sc, stid);
1221237263Snp	struct inpcb *inp;
1222237263Snp	struct socket *so;
1223237263Snp	struct in_conninfo inc;
1224237263Snp	struct tcphdr th;
1225237263Snp	struct tcpopt to;
1226237263Snp	struct port_info *pi;
1227245468Snp	struct ifnet *hw_ifp, *ifp;
1228237263Snp	struct l2t_entry *e = NULL;
1229239344Snp	int rscale, mtu_idx, rx_credits, rxqid, ulp_mode;
1230237263Snp	struct synq_entry *synqe = NULL;
1231237263Snp	int reject_reason;
1232237263Snp	uint16_t vid;
1233237263Snp#ifdef INVARIANTS
1234237263Snp	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1235237263Snp#endif
1236237263Snp
1237237263Snp	KASSERT(opcode == CPL_PASS_ACCEPT_REQ,
1238237263Snp	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1239237263Snp	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
1240237263Snp
1241237263Snp	CTR4(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid,
1242237263Snp	    lctx);
1243237263Snp
1244237263Snp	pass_accept_req_to_protohdrs(m, &inc, &th);
1245237263Snp	t4opt_to_tcpopt(&cpl->tcpopt, &to);
1246237263Snp
1247237263Snp	pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))];
1248245468Snp	hw_ifp = pi->ifp;	/* the cxgbeX ifnet */
1249245468Snp	m->m_pkthdr.rcvif = hw_ifp;
1250245468Snp	tod = TOEDEV(hw_ifp);
1251237263Snp
1252237263Snp	/*
1253245468Snp	 * Figure out if there is a pseudo interface (vlan, lagg, etc.)
1254245468Snp	 * involved.  Don't offload if the SYN had a VLAN tag and the vid
1255245468Snp	 * doesn't match anything on this interface.
1256245468Snp	 *
1257245468Snp	 * XXX: lagg support, lagg + vlan support.
1258237263Snp	 */
1259237263Snp	vid = EVL_VLANOFTAG(be16toh(cpl->vlan));
1260237263Snp	if (vid != 0xfff) {
1261245468Snp		ifp = VLAN_DEVAT(hw_ifp, vid);
1262245468Snp		if (ifp == NULL)
1263237263Snp			REJECT_PASS_ACCEPT();
1264245468Snp	} else
1265245468Snp		ifp = hw_ifp;
1266237263Snp
1267237263Snp	/*
1268237263Snp	 * Don't offload if the peer requested a TCP option that's not known to
1269237263Snp	 * the silicon.
1270237263Snp	 */
1271237263Snp	if (cpl->tcpopt.unknown)
1272237263Snp		REJECT_PASS_ACCEPT();
1273237263Snp
1274245468Snp	if (inc.inc_flags & INC_ISIPV6) {
1275237263Snp
1276245468Snp		/* Don't offload if the ifcap isn't enabled */
1277245468Snp		if ((ifp->if_capenable & IFCAP_TOE6) == 0)
1278245468Snp			REJECT_PASS_ACCEPT();
1279245468Snp
1280245468Snp		/*
1281245468Snp		 * SYN must be directed to an IP6 address on this ifnet.  This
1282245468Snp		 * is more restrictive than in6_localip.
1283245468Snp		 */
1284245468Snp		if (!ifnet_has_ip6(ifp, &inc.inc6_laddr))
1285245468Snp			REJECT_PASS_ACCEPT();
1286245468Snp	} else {
1287245468Snp
1288245468Snp		/* Don't offload if the ifcap isn't enabled */
1289245468Snp		if ((ifp->if_capenable & IFCAP_TOE4) == 0)
1290245468Snp			REJECT_PASS_ACCEPT();
1291245468Snp
1292245468Snp		/*
1293245468Snp		 * SYN must be directed to an IP address on this ifnet.  This
1294245468Snp		 * is more restrictive than in_localip.
1295245468Snp		 */
1296245468Snp		if (!ifnet_has_ip(ifp, inc.inc_laddr))
1297245468Snp			REJECT_PASS_ACCEPT();
1298237263Snp	}
1299237263Snp
1300245468Snp	e = get_l2te_for_nexthop(pi, ifp, &inc);
1301245468Snp	if (e == NULL)
1302245468Snp		REJECT_PASS_ACCEPT();
1303245468Snp
1304237263Snp	synqe = mbuf_to_synqe(m);
1305237263Snp	if (synqe == NULL)
1306237263Snp		REJECT_PASS_ACCEPT();
1307237263Snp
1308252711Snp	wr = alloc_wrqe(is_t4(sc) ? sizeof(struct cpl_pass_accept_rpl) :
1309252711Snp	    sizeof(struct cpl_t5_pass_accept_rpl), &sc->sge.ctrlq[pi->port_id]);
1310237263Snp	if (wr == NULL)
1311237263Snp		REJECT_PASS_ACCEPT();
1312237263Snp	rpl = wrtod(wr);
1313237263Snp
1314237263Snp	INP_INFO_WLOCK(&V_tcbinfo);	/* for 4-tuple check, syncache_add */
1315237263Snp
1316237263Snp	/* Don't offload if the 4-tuple is already in use */
1317237263Snp	if (toe_4tuple_check(&inc, &th, ifp) != 0) {
1318237263Snp		INP_INFO_WUNLOCK(&V_tcbinfo);
1319237263Snp		free(wr, M_CXGBE);
1320237263Snp		REJECT_PASS_ACCEPT();
1321237263Snp	}
1322237263Snp
1323237263Snp	inp = lctx->inp;		/* listening socket, not owned by TOE */
1324237263Snp	INP_WLOCK(inp);
1325237263Snp
1326237263Snp	/* Don't offload if the listening socket has closed */
1327237263Snp	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
1328237263Snp		/*
1329237263Snp		 * The listening socket has closed.  The reply from the TOE to
1330237263Snp		 * our CPL_CLOSE_LISTSRV_REQ will ultimately release all
1331237263Snp		 * resources tied to this listen context.
1332237263Snp		 */
1333237263Snp		INP_WUNLOCK(inp);
1334237263Snp		INP_INFO_WUNLOCK(&V_tcbinfo);
1335237263Snp		free(wr, M_CXGBE);
1336237263Snp		REJECT_PASS_ACCEPT();
1337237263Snp	}
1338237263Snp	so = inp->inp_socket;
1339237263Snp
1340237263Snp	mtu_idx = find_best_mtu_idx(sc, &inc, be16toh(cpl->tcpopt.mss));
1341237263Snp	rscale = cpl->tcpopt.wsf && V_tcp_do_rfc1323 ? select_rcv_wscale() : 0;
1342237263Snp	SOCKBUF_LOCK(&so->so_rcv);
1343237263Snp	/* opt0 rcv_bufsiz initially, assumes its normal meaning later */
1344237263Snp	rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
1345237263Snp	SOCKBUF_UNLOCK(&so->so_rcv);
1346237263Snp
1347237263Snp	save_qids_in_mbuf(m, pi);
1348237263Snp	get_qids_from_mbuf(m, NULL, &rxqid);
1349237263Snp
1350252711Snp	if (is_t4(sc))
1351252711Snp		INIT_TP_WR_MIT_CPL(rpl, CPL_PASS_ACCEPT_RPL, tid);
1352252711Snp	else {
1353252711Snp		struct cpl_t5_pass_accept_rpl *rpl5 = (void *)rpl;
1354252711Snp
1355252711Snp		INIT_TP_WR_MIT_CPL(rpl5, CPL_PASS_ACCEPT_RPL, tid);
1356252711Snp	}
1357239344Snp	if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0) {
1358239344Snp		ulp_mode = ULP_MODE_TCPDDP;
1359239514Snp		synqe->flags |= TPF_SYNQE_TCPDDP;
1360239344Snp	} else
1361239344Snp		ulp_mode = ULP_MODE_NONE;
1362239344Snp	rpl->opt0 = calc_opt0(so, pi, e, mtu_idx, rscale, rx_credits, ulp_mode);
1363239344Snp	rpl->opt2 = calc_opt2p(sc, pi, rxqid, &cpl->tcpopt, &th, ulp_mode);
1364237263Snp
1365237263Snp	synqe->tid = tid;
1366237263Snp	synqe->lctx = lctx;
1367237263Snp	synqe->syn = m;
1368237263Snp	m = NULL;
1369245937Snp	refcount_init(&synqe->refcnt, 1);	/* 1 means extra hold */
1370237263Snp	synqe->l2e_idx = e->idx;
1371237263Snp	synqe->rcv_bufsize = rx_credits;
1372237263Snp	atomic_store_rel_ptr(&synqe->wr, (uintptr_t)wr);
1373237263Snp
1374237263Snp	insert_tid(sc, tid, synqe);
1375237263Snp	TAILQ_INSERT_TAIL(&lctx->synq, synqe, link);
1376237263Snp	hold_synqe(synqe);	/* hold for the duration it's in the synq */
1377237263Snp	hold_lctx(lctx);	/* A synqe on the list has a ref on its lctx */
1378237263Snp
1379237263Snp	/*
1380237263Snp	 * If all goes well t4_syncache_respond will get called during
1381237263Snp	 * syncache_add.  Also note that syncache_add releases both pcbinfo and
1382237263Snp	 * pcb locks.
1383237263Snp	 */
1384237263Snp	toe_syncache_add(&inc, &to, &th, inp, tod, synqe);
1385237263Snp	INP_UNLOCK_ASSERT(inp);	/* ok to assert, we have a ref on the inp */
1386237263Snp	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
1387237263Snp
1388237263Snp	/*
1389237263Snp	 * If we replied during syncache_add (synqe->wr has been consumed),
1390237263Snp	 * good.  Otherwise, set it to 0 so that further syncache_respond
1391237263Snp	 * attempts by the kernel will be ignored.
1392237263Snp	 */
1393237263Snp	if (atomic_cmpset_ptr(&synqe->wr, (uintptr_t)wr, 0)) {
1394237263Snp
1395237263Snp		/*
1396237263Snp		 * syncache may or may not have a hold on the synqe, which may
1397237263Snp		 * or may not be stashed in the original SYN mbuf passed to us.
1398237263Snp		 * Just copy it over instead of dealing with all possibilities.
1399237263Snp		 */
1400243857Sglebius		m = m_dup(synqe->syn, M_NOWAIT);
1401237263Snp		if (m)
1402245468Snp			m->m_pkthdr.rcvif = hw_ifp;
1403237263Snp
1404242666Snp		remove_tid(sc, synqe->tid);
1405237263Snp		free(wr, M_CXGBE);
1406243680Snp
1407243680Snp		/* Yank the synqe out of the lctx synq. */
1408243680Snp		INP_WLOCK(inp);
1409243680Snp		TAILQ_REMOVE(&lctx->synq, synqe, link);
1410243680Snp		release_synqe(synqe);	/* removed from synq list */
1411243680Snp		inp = release_lctx(sc, lctx);
1412243680Snp		if (inp)
1413243680Snp			INP_WUNLOCK(inp);
1414243680Snp
1415245937Snp		release_synqe(synqe);	/* extra hold */
1416237263Snp		REJECT_PASS_ACCEPT();
1417237263Snp	}
1418243680Snp
1419237263Snp	CTR5(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p, SYNACK",
1420237263Snp	    __func__, stid, tid, lctx, synqe);
1421243680Snp
1422243680Snp	INP_WLOCK(inp);
1423243680Snp	synqe->flags |= TPF_SYNQE_HAS_L2TE;
1424243680Snp	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
1425243680Snp		/*
1426243680Snp		 * Listening socket closed but tod_listen_stop did not abort
1427243680Snp		 * this tid because there was no L2T entry for the tid at that
1428243680Snp		 * time.  Abort it now.  The reply to the abort will clean up.
1429243680Snp		 */
1430245937Snp		CTR6(KTR_CXGBE,
1431245937Snp		    "%s: stid %u, tid %u, lctx %p, synqe %p (0x%x), ABORT",
1432245937Snp		    __func__, stid, tid, lctx, synqe, synqe->flags);
1433245937Snp		if (!(synqe->flags & TPF_SYNQE_EXPANDED))
1434245937Snp			send_reset_synqe(tod, synqe);
1435243680Snp		INP_WUNLOCK(inp);
1436243680Snp
1437245937Snp		release_synqe(synqe);	/* extra hold */
1438243680Snp		return (__LINE__);
1439243680Snp	}
1440243680Snp	INP_WUNLOCK(inp);
1441243680Snp
1442245937Snp	release_synqe(synqe);	/* extra hold */
1443237263Snp	return (0);
1444237263Snpreject:
1445237263Snp	CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
1446237263Snp	    reject_reason);
1447237263Snp
1448237263Snp	if (e)
1449237263Snp		t4_l2t_release(e);
1450237263Snp	release_tid(sc, tid, lctx->ctrlq);
1451237263Snp
1452237263Snp	if (__predict_true(m != NULL)) {
1453237263Snp		m_adj(m, sizeof(*cpl));
1454237263Snp		m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID |
1455237263Snp		    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1456237263Snp		m->m_pkthdr.csum_data = 0xffff;
1457245468Snp		hw_ifp->if_input(hw_ifp, m);
1458237263Snp	}
1459237263Snp
1460237263Snp	return (reject_reason);
1461237263Snp}
1462237263Snp
1463237263Snpstatic void
1464237263Snpsynqe_to_protohdrs(struct synq_entry *synqe,
1465237263Snp    const struct cpl_pass_establish *cpl, struct in_conninfo *inc,
1466237263Snp    struct tcphdr *th, struct tcpopt *to)
1467237263Snp{
1468237263Snp	uint16_t tcp_opt = be16toh(cpl->tcp_opt);
1469237263Snp
1470237263Snp	/* start off with the original SYN */
1471237263Snp	pass_accept_req_to_protohdrs(synqe->syn, inc, th);
1472237263Snp
1473237263Snp	/* modify parts to make it look like the ACK to our SYN|ACK */
1474237263Snp	th->th_flags = TH_ACK;
1475237263Snp	th->th_ack = synqe->iss + 1;
1476237263Snp	th->th_seq = be32toh(cpl->rcv_isn);
1477237263Snp	bzero(to, sizeof(*to));
1478237263Snp	if (G_TCPOPT_TSTAMP(tcp_opt)) {
1479237263Snp		to->to_flags |= TOF_TS;
1480237263Snp		to->to_tsecr = synqe->ts;
1481237263Snp	}
1482237263Snp}
1483237263Snp
1484237263Snpstatic int
1485237263Snpdo_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
1486237263Snp    struct mbuf *m)
1487237263Snp{
1488237263Snp	struct adapter *sc = iq->adapter;
1489237263Snp	struct port_info *pi;
1490237263Snp	struct ifnet *ifp;
1491237263Snp	const struct cpl_pass_establish *cpl = (const void *)(rss + 1);
1492237263Snp#if defined(KTR) || defined(INVARIANTS)
1493237263Snp	unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
1494237263Snp#endif
1495237263Snp	unsigned int tid = GET_TID(cpl);
1496237263Snp	struct synq_entry *synqe = lookup_tid(sc, tid);
1497237263Snp	struct listen_ctx *lctx = synqe->lctx;
1498237263Snp	struct inpcb *inp = lctx->inp;
1499237263Snp	struct socket *so;
1500237263Snp	struct tcphdr th;
1501237263Snp	struct tcpopt to;
1502237263Snp	struct in_conninfo inc;
1503237263Snp	struct toepcb *toep;
1504237263Snp	u_int txqid, rxqid;
1505237263Snp#ifdef INVARIANTS
1506237263Snp	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1507237263Snp#endif
1508237263Snp
1509237263Snp	KASSERT(opcode == CPL_PASS_ESTABLISH,
1510237263Snp	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1511237263Snp	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1512237263Snp	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
1513239514Snp	KASSERT(synqe->flags & TPF_SYNQE,
1514237263Snp	    ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
1515237263Snp
1516237263Snp	INP_INFO_WLOCK(&V_tcbinfo);	/* for syncache_expand */
1517237263Snp	INP_WLOCK(inp);
1518237263Snp
1519237263Snp	CTR6(KTR_CXGBE,
1520237263Snp	    "%s: stid %u, tid %u, synqe %p (0x%x), inp_flags 0x%x",
1521237263Snp	    __func__, stid, tid, synqe, synqe->flags, inp->inp_flags);
1522237263Snp
1523237263Snp	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
1524237263Snp
1525243680Snp		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
1526243680Snp			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
1527243680Snp			    ("%s: listen socket closed but tid %u not aborted.",
1528243680Snp			    __func__, tid));
1529243680Snp		}
1530243680Snp
1531237263Snp		INP_WUNLOCK(inp);
1532237263Snp		INP_INFO_WUNLOCK(&V_tcbinfo);
1533237263Snp		return (0);
1534237263Snp	}
1535237263Snp
1536237263Snp	ifp = synqe->syn->m_pkthdr.rcvif;
1537237263Snp	pi = ifp->if_softc;
1538237263Snp	KASSERT(pi->adapter == sc,
1539237263Snp	    ("%s: pi %p, sc %p mismatch", __func__, pi, sc));
1540237263Snp
1541237263Snp	get_qids_from_mbuf(synqe->syn, &txqid, &rxqid);
1542237263Snp	KASSERT(rxqid == iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0],
1543237263Snp	    ("%s: CPL arrived on unexpected rxq.  %d %d", __func__, rxqid,
1544237263Snp	    (int)(iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0])));
1545237263Snp
1546237263Snp	toep = alloc_toepcb(pi, txqid, rxqid, M_NOWAIT);
1547237263Snp	if (toep == NULL) {
1548237263Snpreset:
1549243680Snp		/*
1550243680Snp		 * The reply to this abort will perform final cleanup.  There is
1551243680Snp		 * no need to check for HAS_L2TE here.  We can be here only if
1552243680Snp		 * we responded to the PASS_ACCEPT_REQ, and our response had the
1553243680Snp		 * L2T idx.
1554243680Snp		 */
1555237263Snp		send_reset_synqe(TOEDEV(ifp), synqe);
1556237263Snp		INP_WUNLOCK(inp);
1557237263Snp		INP_INFO_WUNLOCK(&V_tcbinfo);
1558237263Snp		return (0);
1559237263Snp	}
1560237263Snp	toep->tid = tid;
1561237263Snp	toep->l2te = &sc->l2t->l2tab[synqe->l2e_idx];
1562239514Snp	if (synqe->flags & TPF_SYNQE_TCPDDP)
1563239344Snp		set_tcpddp_ulp_mode(toep);
1564239344Snp	else
1565239344Snp		toep->ulp_mode = ULP_MODE_NONE;
1566237263Snp	/* opt0 rcv_bufsiz initially, assumes its normal meaning later */
1567237263Snp	toep->rx_credits = synqe->rcv_bufsize;
1568237263Snp
1569237263Snp	so = inp->inp_socket;
1570237263Snp	KASSERT(so != NULL, ("%s: socket is NULL", __func__));
1571237263Snp
1572237263Snp	/* Come up with something that syncache_expand should be ok with. */
1573237263Snp	synqe_to_protohdrs(synqe, cpl, &inc, &th, &to);
1574237263Snp
1575237263Snp	/*
1576237263Snp	 * No more need for anything in the mbuf that carried the
1577237263Snp	 * CPL_PASS_ACCEPT_REQ.  Drop the CPL_PASS_ESTABLISH and toep pointer
1578237263Snp	 * there.  XXX: bad form but I don't want to increase the size of synqe.
1579237263Snp	 */
1580237263Snp	m = synqe->syn;
1581237263Snp	KASSERT(sizeof(*cpl) + sizeof(toep) <= m->m_len,
1582237263Snp	    ("%s: no room in mbuf %p (m_len %d)", __func__, m, m->m_len));
1583237263Snp	bcopy(cpl, mtod(m, void *), sizeof(*cpl));
1584237263Snp	*(struct toepcb **)(mtod(m, struct cpl_pass_establish *) + 1) = toep;
1585237263Snp
1586237263Snp	if (!toe_syncache_expand(&inc, &to, &th, &so) || so == NULL) {
1587237263Snp		free_toepcb(toep);
1588237263Snp		goto reset;
1589237263Snp	}
1590237263Snp
1591239544Snp	/*
1592239544Snp	 * This is for the unlikely case where the syncache entry that we added
1593239544Snp	 * has been evicted from the syncache, but the syncache_expand above
1594239544Snp	 * works because of syncookies.
1595239544Snp	 *
1596239544Snp	 * XXX: we've held the tcbinfo lock throughout so there's no risk of
1597239544Snp	 * anyone accept'ing a connection before we've installed our hooks, but
1598239544Snp	 * this somewhat defeats the purpose of having a tod_offload_socket :-(
1599239544Snp	 */
1600239544Snp	if (__predict_false(!(synqe->flags & TPF_SYNQE_EXPANDED))) {
1601239544Snp		struct inpcb *new_inp = sotoinpcb(so);
1602239544Snp
1603239544Snp		INP_WLOCK(new_inp);
1604239544Snp		tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
1605239544Snp		t4_offload_socket(TOEDEV(ifp), synqe, so);
1606239544Snp		INP_WUNLOCK(new_inp);
1607239544Snp	}
1608239544Snp
1609237263Snp	/* Done with the synqe */
1610237263Snp	TAILQ_REMOVE(&lctx->synq, synqe, link);
1611237263Snp	inp = release_lctx(sc, lctx);
1612237263Snp	if (inp != NULL)
1613237263Snp		INP_WUNLOCK(inp);
1614237263Snp	INP_INFO_WUNLOCK(&V_tcbinfo);
1615237263Snp	release_synqe(synqe);
1616237263Snp
1617237263Snp	return (0);
1618237263Snp}
1619237263Snp
1620237263Snpvoid
1621237263Snpt4_init_listen_cpl_handlers(struct adapter *sc)
1622237263Snp{
1623237263Snp
1624237263Snp	t4_register_cpl_handler(sc, CPL_PASS_OPEN_RPL, do_pass_open_rpl);
1625237263Snp	t4_register_cpl_handler(sc, CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl);
1626237263Snp	t4_register_cpl_handler(sc, CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
1627237263Snp	t4_register_cpl_handler(sc, CPL_PASS_ESTABLISH, do_pass_establish);
1628237263Snp}
1629237263Snp#endif
1630