1/**************************************************************************
2
3Copyright (c) 2007, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include "opt_inet.h"
33
34#ifdef TCP_OFFLOAD
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/bus.h>
39#include <sys/pciio.h>
40#include <sys/conf.h>
41#include <machine/bus.h>
42#include <machine/resource.h>
43#include <sys/bus_dma.h>
44#include <sys/rman.h>
45#include <sys/ioccom.h>
46#include <sys/mbuf.h>
47#include <sys/rwlock.h>
48#include <sys/linker.h>
49#include <sys/firmware.h>
50#include <sys/socket.h>
51#include <sys/socketvar.h>
52#include <sys/sockio.h>
53#include <sys/smp.h>
54#include <sys/sysctl.h>
55#include <sys/syslog.h>
56#include <sys/queue.h>
57#include <sys/taskqueue.h>
58#include <sys/proc.h>
59#include <sys/uio.h>
60
61#include <net/route.h>
62#include <netinet/in_systm.h>
63#include <netinet/in.h>
64#include <netinet/in_pcb.h>
65#include <netinet/ip.h>
66#include <netinet/ip_var.h>
67#include <netinet/tcp_var.h>
68#include <netinet/tcp.h>
69#include <netinet/tcpip.h>
70
71#include <rdma/ib_verbs.h>
72#include <linux/idr.h>
73#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
74
75#include <cxgb_include.h>
76#include <ulp/tom/cxgb_tom.h>
77#include <ulp/tom/cxgb_toepcb.h>
78#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
79#include <rdma/ib_verbs.h>
80#include <linux/idr.h>
81
82#include <ulp/iw_cxgb/iw_cxgb_wr.h>
83#include <ulp/iw_cxgb/iw_cxgb_hal.h>
84#include <ulp/iw_cxgb/iw_cxgb_provider.h>
85#include <ulp/iw_cxgb/iw_cxgb_cm.h>
86#include <ulp/iw_cxgb/iw_cxgb.h>
87
88#ifdef KTR
89static char *states[] = {
90	"idle",
91	"listen",
92	"connecting",
93	"mpa_wait_req",
94	"mpa_req_sent",
95	"mpa_req_rcvd",
96	"mpa_rep_sent",
97	"fpdu_mode",
98	"aborting",
99	"closing",
100	"moribund",
101	"dead",
102	NULL,
103};
104#endif
105
106SYSCTL_NODE(_hw, OID_AUTO, iw_cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters");
107
108static int ep_timeout_secs = 60;
109TUNABLE_INT("hw.iw_cxgb.ep_timeout_secs", &ep_timeout_secs);
110SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RW, &ep_timeout_secs, 0,
111    "CM Endpoint operation timeout in seconds (default=60)");
112
113static int mpa_rev = 1;
114TUNABLE_INT("hw.iw_cxgb.mpa_rev", &mpa_rev);
115SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RW, &mpa_rev, 0,
116    "MPA Revision, 0 supports amso1100, 1 is spec compliant. (default=1)");
117
118static int markers_enabled = 0;
119TUNABLE_INT("hw.iw_cxgb.markers_enabled", &markers_enabled);
120SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RW, &markers_enabled, 0,
121    "Enable MPA MARKERS (default(0)=disabled)");
122
123static int crc_enabled = 1;
124TUNABLE_INT("hw.iw_cxgb.crc_enabled", &crc_enabled);
125SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RW, &crc_enabled, 0,
126    "Enable MPA CRC (default(1)=enabled)");
127
128static int rcv_win = 256 * 1024;
129TUNABLE_INT("hw.iw_cxgb.rcv_win", &rcv_win);
130SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RW, &rcv_win, 0,
131    "TCP receive window in bytes (default=256KB)");
132
133static int snd_win = 32 * 1024;
134TUNABLE_INT("hw.iw_cxgb.snd_win", &snd_win);
135SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, snd_win, CTLFLAG_RW, &snd_win, 0,
136    "TCP send window in bytes (default=32KB)");
137
138static unsigned int nocong = 0;
139TUNABLE_INT("hw.iw_cxgb.nocong", &nocong);
140SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, nocong, CTLFLAG_RW, &nocong, 0,
141    "Turn off congestion control (default=0)");
142
143static unsigned int cong_flavor = 1;
144TUNABLE_INT("hw.iw_cxgb.cong_flavor", &cong_flavor);
145SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RW, &cong_flavor, 0,
146    "TCP Congestion control flavor (default=1)");
147
148static void ep_timeout(void *arg);
149static void connect_reply_upcall(struct iwch_ep *ep, int status);
150static int iwch_so_upcall(struct socket *so, void *arg, int waitflag);
151
152/*
153 * Cruft to offload socket upcalls onto thread.
154 */
155static struct mtx req_lock;
156static TAILQ_HEAD(iwch_ep_list, iwch_ep_common) req_list;
157static struct task iw_cxgb_task;
158static struct taskqueue *iw_cxgb_taskq;
159static void process_req(void *ctx, int pending);
160
161static void
162start_ep_timer(struct iwch_ep *ep)
163{
164	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
165	if (callout_pending(&ep->timer)) {
166		CTR2(KTR_IW_CXGB, "%s stopped / restarted timer ep %p", __FUNCTION__, ep);
167		callout_deactivate(&ep->timer);
168		callout_drain(&ep->timer);
169	} else {
170		/*
171		 * XXX this looks racy
172		 */
173		get_ep(&ep->com);
174		callout_init(&ep->timer, TRUE);
175	}
176	callout_reset(&ep->timer, ep_timeout_secs * hz, ep_timeout, ep);
177}
178
179static void
180stop_ep_timer(struct iwch_ep *ep)
181{
182	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
183	if (!callout_pending(&ep->timer)) {
184		CTR3(KTR_IW_CXGB, "%s timer stopped when its not running!  ep %p state %u\n",
185                       __func__, ep, ep->com.state);
186		return;
187	}
188	callout_drain(&ep->timer);
189	put_ep(&ep->com);
190}
191
192static int
193set_tcpinfo(struct iwch_ep *ep)
194{
195	struct socket *so = ep->com.so;
196	struct inpcb *inp = sotoinpcb(so);
197	struct tcpcb *tp;
198	struct toepcb *toep;
199	int rc = 0;
200
201	INP_WLOCK(inp);
202	tp = intotcpcb(inp);
203
204	if ((tp->t_flags & TF_TOE) == 0) {
205		rc = EINVAL;
206		printf("%s: connection NOT OFFLOADED!\n", __func__);
207		goto done;
208	}
209	toep = tp->t_toe;
210
211	ep->hwtid = toep->tp_tid;
212	ep->snd_seq = tp->snd_nxt;
213	ep->rcv_seq = tp->rcv_nxt;
214	ep->emss = tp->t_maxseg;
215	if (ep->emss < 128)
216		ep->emss = 128;
217done:
218	INP_WUNLOCK(inp);
219	return (rc);
220
221}
222
223static enum iwch_ep_state
224state_read(struct iwch_ep_common *epc)
225{
226	enum iwch_ep_state state;
227
228	mtx_lock(&epc->lock);
229	state = epc->state;
230	mtx_unlock(&epc->lock);
231	return state;
232}
233
234static void
235__state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
236{
237	epc->state = new;
238}
239
240static void
241state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
242{
243
244	mtx_lock(&epc->lock);
245	CTR3(KTR_IW_CXGB, "%s - %s -> %s", __FUNCTION__, states[epc->state], states[new]);
246	__state_set(epc, new);
247	mtx_unlock(&epc->lock);
248	return;
249}
250
251static void *
252alloc_ep(int size, int flags)
253{
254	struct iwch_ep_common *epc;
255
256	epc = malloc(size, M_DEVBUF, flags);
257	if (epc) {
258		memset(epc, 0, size);
259		refcount_init(&epc->refcount, 1);
260		mtx_init(&epc->lock, "iwch_epc lock", NULL, MTX_DEF|MTX_DUPOK);
261		cv_init(&epc->waitq, "iwch_epc cv");
262	}
263	CTR2(KTR_IW_CXGB, "%s alloc ep %p", __FUNCTION__, epc);
264	return epc;
265}
266
267void __free_ep(struct iwch_ep_common *epc)
268{
269	CTR3(KTR_IW_CXGB, "%s ep %p state %s", __FUNCTION__, epc, states[state_read(epc)]);
270	KASSERT(!epc->so, ("%s warning ep->so %p \n", __FUNCTION__, epc->so));
271	KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __FUNCTION__, epc));
272	free(epc, M_DEVBUF);
273}
274
275static struct rtentry *
276find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
277    __be16 peer_port, u8 tos)
278{
279        struct route iproute;
280        struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst;
281
282        bzero(&iproute, sizeof iproute);
283	dst->sin_family = AF_INET;
284	dst->sin_len = sizeof *dst;
285        dst->sin_addr.s_addr = peer_ip;
286
287        rtalloc(&iproute);
288	return iproute.ro_rt;
289}
290
291static void
292close_socket(struct iwch_ep_common *epc, int close)
293{
294	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]);
295	SOCK_LOCK(epc->so);
296	soupcall_clear(epc->so, SO_RCV);
297	SOCK_UNLOCK(epc->so);
298	if (close)
299		soclose(epc->so);
300	else
301		soshutdown(epc->so, SHUT_WR|SHUT_RD);
302	epc->so = NULL;
303}
304
305static void
306shutdown_socket(struct iwch_ep_common *epc)
307{
308	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]);
309	soshutdown(epc->so, SHUT_WR);
310}
311
312static void
313abort_socket(struct iwch_ep *ep)
314{
315	struct sockopt sopt;
316	int err;
317	struct linger l;
318
319	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
320	l.l_onoff = 1;
321	l.l_linger = 0;
322
323	/* linger_time of 0 forces RST to be sent */
324	sopt.sopt_dir = SOPT_SET;
325	sopt.sopt_level = SOL_SOCKET;
326	sopt.sopt_name = SO_LINGER;
327	sopt.sopt_val = (caddr_t)&l;
328	sopt.sopt_valsize = sizeof l;
329	sopt.sopt_td = NULL;
330	err = sosetopt(ep->com.so, &sopt);
331	if (err)
332		printf("%s can't set linger to 0, no RST! err %d\n", __FUNCTION__, err);
333}
334
335static void
336send_mpa_req(struct iwch_ep *ep)
337{
338	int mpalen;
339	struct mpa_message *mpa;
340	struct mbuf *m;
341	int err;
342
343	CTR3(KTR_IW_CXGB, "%s ep %p pd_len %d", __FUNCTION__, ep, ep->plen);
344
345	mpalen = sizeof(*mpa) + ep->plen;
346	m = m_gethdr(mpalen, M_NOWAIT);
347	if (m == NULL) {
348		connect_reply_upcall(ep, -ENOMEM);
349		return;
350	}
351	mpa = mtod(m, struct mpa_message *);
352	m->m_len = mpalen;
353	m->m_pkthdr.len = mpalen;
354	memset(mpa, 0, sizeof(*mpa));
355	memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
356	mpa->flags = (crc_enabled ? MPA_CRC : 0) |
357		     (markers_enabled ? MPA_MARKERS : 0);
358	mpa->private_data_size = htons(ep->plen);
359	mpa->revision = mpa_rev;
360	if (ep->plen)
361		memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
362
363	err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
364	if (err) {
365		m_freem(m);
366		connect_reply_upcall(ep, -ENOMEM);
367		return;
368	}
369
370	start_ep_timer(ep);
371	state_set(&ep->com, MPA_REQ_SENT);
372	return;
373}
374
375static int
376send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
377{
378	int mpalen;
379	struct mpa_message *mpa;
380	struct mbuf *m;
381	int err;
382
383	CTR3(KTR_IW_CXGB, "%s ep %p plen %d", __FUNCTION__, ep, plen);
384
385	mpalen = sizeof(*mpa) + plen;
386
387	m = m_gethdr(mpalen, M_NOWAIT);
388	if (m == NULL) {
389		printf("%s - cannot alloc mbuf!\n", __FUNCTION__);
390		return (-ENOMEM);
391	}
392	mpa = mtod(m, struct mpa_message *);
393	m->m_len = mpalen;
394	m->m_pkthdr.len = mpalen;
395	memset(mpa, 0, sizeof(*mpa));
396	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
397	mpa->flags = MPA_REJECT;
398	mpa->revision = mpa_rev;
399	mpa->private_data_size = htons(plen);
400	if (plen)
401		memcpy(mpa->private_data, pdata, plen);
402	err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
403	PANIC_IF(err);
404	return 0;
405}
406
407static int
408send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
409{
410	int mpalen;
411	struct mpa_message *mpa;
412	struct mbuf *m;
413
414	CTR4(KTR_IW_CXGB, "%s ep %p so %p plen %d", __FUNCTION__, ep, ep->com.so, plen);
415
416	mpalen = sizeof(*mpa) + plen;
417
418	m = m_gethdr(mpalen, M_NOWAIT);
419	if (m == NULL) {
420		printf("%s - cannot alloc mbuf!\n", __FUNCTION__);
421		return (-ENOMEM);
422	}
423	mpa = mtod(m, struct mpa_message *);
424	m->m_len = mpalen;
425	m->m_pkthdr.len = mpalen;
426	memset(mpa, 0, sizeof(*mpa));
427	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
428	mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
429		     (markers_enabled ? MPA_MARKERS : 0);
430	mpa->revision = mpa_rev;
431	mpa->private_data_size = htons(plen);
432	if (plen)
433		memcpy(mpa->private_data, pdata, plen);
434
435	state_set(&ep->com, MPA_REP_SENT);
436	return sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
437		ep->com.thread);
438}
439
440static void
441close_complete_upcall(struct iwch_ep *ep)
442{
443	struct iw_cm_event event;
444
445	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
446	memset(&event, 0, sizeof(event));
447	event.event = IW_CM_EVENT_CLOSE;
448	if (ep->com.cm_id) {
449		CTR3(KTR_IW_CXGB, "close complete delivered ep %p cm_id %p tid %d",
450		     ep, ep->com.cm_id, ep->hwtid);
451		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
452		ep->com.cm_id->rem_ref(ep->com.cm_id);
453		ep->com.cm_id = NULL;
454		ep->com.qp = NULL;
455	}
456}
457
458static void
459abort_connection(struct iwch_ep *ep)
460{
461	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
462	state_set(&ep->com, ABORTING);
463	abort_socket(ep);
464	close_socket(&ep->com, 0);
465	close_complete_upcall(ep);
466	state_set(&ep->com, DEAD);
467	put_ep(&ep->com);
468}
469
470static void
471peer_close_upcall(struct iwch_ep *ep)
472{
473	struct iw_cm_event event;
474
475	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
476	memset(&event, 0, sizeof(event));
477	event.event = IW_CM_EVENT_DISCONNECT;
478	if (ep->com.cm_id) {
479		CTR3(KTR_IW_CXGB, "peer close delivered ep %p cm_id %p tid %d",
480		     ep, ep->com.cm_id, ep->hwtid);
481		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
482	}
483}
484
485static void
486peer_abort_upcall(struct iwch_ep *ep)
487{
488	struct iw_cm_event event;
489
490	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
491	memset(&event, 0, sizeof(event));
492	event.event = IW_CM_EVENT_CLOSE;
493	event.status = ECONNRESET;
494	if (ep->com.cm_id) {
495		CTR3(KTR_IW_CXGB, "abort delivered ep %p cm_id %p tid %d", ep,
496		     ep->com.cm_id, ep->hwtid);
497		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
498		ep->com.cm_id->rem_ref(ep->com.cm_id);
499		ep->com.cm_id = NULL;
500		ep->com.qp = NULL;
501	}
502}
503
504static void
505connect_reply_upcall(struct iwch_ep *ep, int status)
506{
507	struct iw_cm_event event;
508
509	CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], status);
510	memset(&event, 0, sizeof(event));
511	event.event = IW_CM_EVENT_CONNECT_REPLY;
512	event.status = status;
513	event.local_addr = ep->com.local_addr;
514	event.remote_addr = ep->com.remote_addr;
515
516	if ((status == 0) || (status == ECONNREFUSED)) {
517		event.private_data_len = ep->plen;
518		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
519	}
520	if (ep->com.cm_id) {
521		CTR4(KTR_IW_CXGB, "%s ep %p tid %d status %d", __FUNCTION__, ep,
522		     ep->hwtid, status);
523		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
524	}
525	if (status < 0) {
526		ep->com.cm_id->rem_ref(ep->com.cm_id);
527		ep->com.cm_id = NULL;
528		ep->com.qp = NULL;
529	}
530}
531
532static void
533connect_request_upcall(struct iwch_ep *ep)
534{
535	struct iw_cm_event event;
536
537	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
538	memset(&event, 0, sizeof(event));
539	event.event = IW_CM_EVENT_CONNECT_REQUEST;
540	event.local_addr = ep->com.local_addr;
541	event.remote_addr = ep->com.remote_addr;
542	event.private_data_len = ep->plen;
543	event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
544	event.provider_data = ep;
545	event.so = ep->com.so;
546	if (state_read(&ep->parent_ep->com) != DEAD) {
547		get_ep(&ep->com);
548		ep->parent_ep->com.cm_id->event_handler(
549						ep->parent_ep->com.cm_id,
550						&event);
551	}
552	put_ep(&ep->parent_ep->com);
553}
554
555static void
556established_upcall(struct iwch_ep *ep)
557{
558	struct iw_cm_event event;
559
560	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
561	memset(&event, 0, sizeof(event));
562	event.event = IW_CM_EVENT_ESTABLISHED;
563	if (ep->com.cm_id) {
564		CTR3(KTR_IW_CXGB, "%s ep %p tid %d", __FUNCTION__, ep, ep->hwtid);
565		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
566	}
567}
568
569static void
570process_mpa_reply(struct iwch_ep *ep)
571{
572	struct mpa_message *mpa;
573	u16 plen;
574	struct iwch_qp_attributes attrs;
575	enum iwch_qp_attr_mask mask;
576	int err;
577	struct mbuf *top, *m;
578	int flags = MSG_DONTWAIT;
579	struct uio uio;
580	int len;
581
582	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
583
584	/*
585	 * Stop mpa timer.  If it expired, then the state has
586	 * changed and we bail since ep_timeout already aborted
587	 * the connection.
588	 */
589	stop_ep_timer(ep);
590	if (state_read(&ep->com) != MPA_REQ_SENT)
591		return;
592
593	uio.uio_resid = len = 1000000;
594	uio.uio_td = ep->com.thread;
595	err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
596	if (err) {
597		if (err == EWOULDBLOCK) {
598			start_ep_timer(ep);
599			return;
600		}
601		err = -err;
602		goto err;
603	}
604
605	if (ep->com.so->so_rcv.sb_mb) {
606		printf("%s data after soreceive called! so %p sb_mb %p top %p\n",
607			__FUNCTION__, ep->com.so, ep->com.so->so_rcv.sb_mb, top);
608	}
609
610	m = top;
611	do {
612		/*
613		 * If we get more than the supported amount of private data
614		 * then we must fail this connection.
615		 */
616		if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
617			err = (-EINVAL);
618			goto err;
619		}
620
621		/*
622		 * copy the new data into our accumulation buffer.
623		 */
624		m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
625		ep->mpa_pkt_len += m->m_len;
626		if (!m->m_next)
627			m = m->m_nextpkt;
628		else
629			m = m->m_next;
630	} while (m);
631
632	m_freem(top);
633
634	/*
635	 * if we don't even have the mpa message, then bail.
636	 */
637	if (ep->mpa_pkt_len < sizeof(*mpa))
638		return;
639	mpa = (struct mpa_message *)ep->mpa_pkt;
640
641	/* Validate MPA header. */
642	if (mpa->revision != mpa_rev) {
643		CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision);
644		err = EPROTO;
645		goto err;
646	}
647	if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
648		CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key);
649		err = EPROTO;
650		goto err;
651	}
652
653	plen = ntohs(mpa->private_data_size);
654
655	/*
656	 * Fail if there's too much private data.
657	 */
658	if (plen > MPA_MAX_PRIVATE_DATA) {
659		CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen);
660		err = EPROTO;
661		goto err;
662	}
663
664	/*
665	 * If plen does not account for pkt size
666	 */
667	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
668		CTR2(KTR_IW_CXGB, "%s pkt too big %d", __FUNCTION__, ep->mpa_pkt_len);
669		err = EPROTO;
670		goto err;
671	}
672
673	ep->plen = (u8) plen;
674
675	/*
676	 * If we don't have all the pdata yet, then bail.
677	 * We'll continue process when more data arrives.
678	 */
679	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
680		return;
681
682	if (mpa->flags & MPA_REJECT) {
683		err = ECONNREFUSED;
684		goto err;
685	}
686
687	/*
688	 * If we get here we have accumulated the entire mpa
689	 * start reply message including private data. And
690	 * the MPA header is valid.
691	 */
692	CTR1(KTR_IW_CXGB, "%s mpa rpl looks good!", __FUNCTION__);
693	state_set(&ep->com, FPDU_MODE);
694	ep->mpa_attr.initiator = 1;
695	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
696	ep->mpa_attr.recv_marker_enabled = markers_enabled;
697	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
698	ep->mpa_attr.version = mpa_rev;
699	if (set_tcpinfo(ep)) {
700		printf("%s set_tcpinfo error\n", __FUNCTION__);
701		goto err;
702	}
703	CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, "
704	     "xmit_marker_enabled=%d, version=%d", __FUNCTION__,
705	     ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
706	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
707
708	attrs.mpa_attr = ep->mpa_attr;
709	attrs.max_ird = ep->ird;
710	attrs.max_ord = ep->ord;
711	attrs.llp_stream_handle = ep;
712	attrs.next_state = IWCH_QP_STATE_RTS;
713
714	mask = IWCH_QP_ATTR_NEXT_STATE |
715	    IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR |
716	    IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD;
717
718	/* bind QP and TID with INIT_WR */
719	err = iwch_modify_qp(ep->com.qp->rhp,
720			     ep->com.qp, mask, &attrs, 1);
721	if (!err)
722		goto out;
723err:
724	abort_connection(ep);
725out:
726	connect_reply_upcall(ep, err);
727	return;
728}
729
730static void
731process_mpa_request(struct iwch_ep *ep)
732{
733	struct mpa_message *mpa;
734	u16 plen;
735	int flags = MSG_DONTWAIT;
736	struct mbuf *top, *m;
737	int err;
738	struct uio uio;
739	int len;
740
741	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
742
743	/*
744	 * Stop mpa timer.  If it expired, then the state has
745	 * changed and we bail since ep_timeout already aborted
746	 * the connection.
747	 */
748	stop_ep_timer(ep);
749	if (state_read(&ep->com) != MPA_REQ_WAIT)
750		return;
751
752	uio.uio_resid = len = 1000000;
753	uio.uio_td = ep->com.thread;
754	err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
755	if (err) {
756		if (err == EWOULDBLOCK) {
757			start_ep_timer(ep);
758			return;
759		}
760		err = -err;
761		goto err;
762	}
763
764	m = top;
765	do {
766
767		/*
768		 * If we get more than the supported amount of private data
769		 * then we must fail this connection.
770		 */
771		if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
772			CTR2(KTR_IW_CXGB, "%s mpa message too big %d", __FUNCTION__,
773				ep->mpa_pkt_len + m->m_len);
774			goto err;
775		}
776
777
778		/*
779		 * Copy the new data into our accumulation buffer.
780		 */
781		m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
782		ep->mpa_pkt_len += m->m_len;
783
784		if (!m->m_next)
785			m = m->m_nextpkt;
786		else
787			m = m->m_next;
788	} while (m);
789
790	m_freem(top);
791
792	/*
793	 * If we don't even have the mpa message, then bail.
794	 * We'll continue process when more data arrives.
795	 */
796	if (ep->mpa_pkt_len < sizeof(*mpa)) {
797		start_ep_timer(ep);
798		CTR2(KTR_IW_CXGB, "%s not enough header %d...waiting...", __FUNCTION__,
799			ep->mpa_pkt_len);
800		return;
801	}
802	mpa = (struct mpa_message *) ep->mpa_pkt;
803
804	/*
805	 * Validate MPA Header.
806	 */
807	if (mpa->revision != mpa_rev) {
808		CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision);
809		goto err;
810	}
811
812	if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
813		CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key);
814		goto err;
815	}
816
817	plen = ntohs(mpa->private_data_size);
818
819	/*
820	 * Fail if there's too much private data.
821	 */
822	if (plen > MPA_MAX_PRIVATE_DATA) {
823		CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen);
824		goto err;
825	}
826
827	/*
828	 * If plen does not account for pkt size
829	 */
830	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
831		CTR2(KTR_IW_CXGB, "%s more data after private data %d", __FUNCTION__,
832			ep->mpa_pkt_len);
833		goto err;
834	}
835	ep->plen = (u8) plen;
836
837	/*
838	 * If we don't have all the pdata yet, then bail.
839	 */
840	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) {
841		start_ep_timer(ep);
842		CTR2(KTR_IW_CXGB, "%s more mpa msg to come %d", __FUNCTION__,
843			ep->mpa_pkt_len);
844		return;
845	}
846
847	/*
848	 * If we get here we have accumulated the entire mpa
849	 * start reply message including private data.
850	 */
851	ep->mpa_attr.initiator = 0;
852	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
853	ep->mpa_attr.recv_marker_enabled = markers_enabled;
854	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
855	ep->mpa_attr.version = mpa_rev;
856	if (set_tcpinfo(ep)) {
857		printf("%s set_tcpinfo error\n", __FUNCTION__);
858		goto err;
859	}
860	CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, "
861	     "xmit_marker_enabled=%d, version=%d", __FUNCTION__,
862	     ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
863	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
864
865	state_set(&ep->com, MPA_REQ_RCVD);
866
867	/* drive upcall */
868	connect_request_upcall(ep);
869	return;
870err:
871	abort_connection(ep);
872	return;
873}
874
875static void
876process_peer_close(struct iwch_ep *ep)
877{
878	struct iwch_qp_attributes attrs;
879	int disconnect = 1;
880	int release = 0;
881
882	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
883
884	mtx_lock(&ep->com.lock);
885	switch (ep->com.state) {
886	case MPA_REQ_WAIT:
887		__state_set(&ep->com, CLOSING);
888		break;
889	case MPA_REQ_SENT:
890		__state_set(&ep->com, CLOSING);
891		connect_reply_upcall(ep, -ECONNRESET);
892		break;
893	case MPA_REQ_RCVD:
894
895		/*
896		 * We're gonna mark this puppy DEAD, but keep
897		 * the reference on it until the ULP accepts or
898		 * rejects the CR.
899		 */
900		__state_set(&ep->com, CLOSING);
901		break;
902	case MPA_REP_SENT:
903		__state_set(&ep->com, CLOSING);
904		break;
905	case FPDU_MODE:
906		start_ep_timer(ep);
907		__state_set(&ep->com, CLOSING);
908		attrs.next_state = IWCH_QP_STATE_CLOSING;
909		iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
910			       IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
911		peer_close_upcall(ep);
912		break;
913	case ABORTING:
914		disconnect = 0;
915		break;
916	case CLOSING:
917		__state_set(&ep->com, MORIBUND);
918		disconnect = 0;
919		break;
920	case MORIBUND:
921		stop_ep_timer(ep);
922		if (ep->com.cm_id && ep->com.qp) {
923			attrs.next_state = IWCH_QP_STATE_IDLE;
924			iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
925				       IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
926		}
927		close_socket(&ep->com, 0);
928		close_complete_upcall(ep);
929		__state_set(&ep->com, DEAD);
930		release = 1;
931		disconnect = 0;
932		break;
933	case DEAD:
934		disconnect = 0;
935		break;
936	default:
937		PANIC_IF(1);
938	}
939	mtx_unlock(&ep->com.lock);
940	if (disconnect)
941		iwch_ep_disconnect(ep, 0, M_NOWAIT);
942	if (release)
943		put_ep(&ep->com);
944	return;
945}
946
947static void
948process_conn_error(struct iwch_ep *ep)
949{
950	struct iwch_qp_attributes attrs;
951	int ret;
952
953	mtx_lock(&ep->com.lock);
954	CTR3(KTR_IW_CXGB, "%s ep %p state %u", __func__, ep, ep->com.state);
955	switch (ep->com.state) {
956	case MPA_REQ_WAIT:
957		stop_ep_timer(ep);
958		break;
959	case MPA_REQ_SENT:
960		stop_ep_timer(ep);
961		connect_reply_upcall(ep, -ECONNRESET);
962		break;
963	case MPA_REP_SENT:
964		ep->com.rpl_err = ECONNRESET;
965		CTR1(KTR_IW_CXGB, "waking up ep %p", ep);
966		break;
967	case MPA_REQ_RCVD:
968
969		/*
970		 * We're gonna mark this puppy DEAD, but keep
971		 * the reference on it until the ULP accepts or
972		 * rejects the CR.
973		 */
974		break;
975	case MORIBUND:
976	case CLOSING:
977		stop_ep_timer(ep);
978		/*FALLTHROUGH*/
979	case FPDU_MODE:
980		if (ep->com.cm_id && ep->com.qp) {
981			attrs.next_state = IWCH_QP_STATE_ERROR;
982			ret = iwch_modify_qp(ep->com.qp->rhp,
983				     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
984				     &attrs, 1);
985			if (ret)
986				log(LOG_ERR,
987				       "%s - qp <- error failed!\n",
988				       __FUNCTION__);
989		}
990		peer_abort_upcall(ep);
991		break;
992	case ABORTING:
993		break;
994	case DEAD:
995		mtx_unlock(&ep->com.lock);
996		CTR2(KTR_IW_CXGB, "%s so_error %d IN DEAD STATE!!!!", __FUNCTION__,
997			ep->com.so->so_error);
998		return;
999	default:
1000		PANIC_IF(1);
1001		break;
1002	}
1003
1004	if (ep->com.state != ABORTING) {
1005		close_socket(&ep->com, 0);
1006		__state_set(&ep->com, DEAD);
1007		put_ep(&ep->com);
1008	}
1009	mtx_unlock(&ep->com.lock);
1010	return;
1011}
1012
1013static void
1014process_close_complete(struct iwch_ep *ep)
1015{
1016	struct iwch_qp_attributes attrs;
1017	int release = 0;
1018
1019	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1020	PANIC_IF(!ep);
1021
1022	/* The cm_id may be null if we failed to connect */
1023	mtx_lock(&ep->com.lock);
1024	switch (ep->com.state) {
1025	case CLOSING:
1026		__state_set(&ep->com, MORIBUND);
1027		break;
1028	case MORIBUND:
1029		stop_ep_timer(ep);
1030		if ((ep->com.cm_id) && (ep->com.qp)) {
1031			attrs.next_state = IWCH_QP_STATE_IDLE;
1032			iwch_modify_qp(ep->com.qp->rhp,
1033					     ep->com.qp,
1034					     IWCH_QP_ATTR_NEXT_STATE,
1035					     &attrs, 1);
1036		}
1037		if (ep->parent_ep)
1038			close_socket(&ep->com, 1);
1039		else
1040			close_socket(&ep->com, 0);
1041		close_complete_upcall(ep);
1042		__state_set(&ep->com, DEAD);
1043		release = 1;
1044		break;
1045	case ABORTING:
1046		break;
1047	case DEAD:
1048	default:
1049		PANIC_IF(1);
1050		break;
1051	}
1052	mtx_unlock(&ep->com.lock);
1053	if (release)
1054		put_ep(&ep->com);
1055	return;
1056}
1057
1058/*
1059 * T3A does 3 things when a TERM is received:
1060 * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet
1061 * 2) generate an async event on the QP with the TERMINATE opcode
1062 * 3) post a TERMINATE opcde cqe into the associated CQ.
1063 *
1064 * For (1), we save the message in the qp for later consumer consumption.
1065 * For (2), we move the QP into TERMINATE, post a QP event and disconnect.
1066 * For (3), we toss the CQE in cxio_poll_cq().
1067 *
1068 * terminate() handles case (1)...
1069 */
1070static int
1071terminate(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1072{
1073	struct adapter *sc = qs->adap;
1074	struct tom_data *td = sc->tom_softc;
1075	uint32_t hash = *((uint32_t *)r + 1);
1076	unsigned int tid = ntohl(hash) >> 8 & 0xfffff;
1077	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
1078	struct socket *so = toep->tp_inp->inp_socket;
1079	struct iwch_ep *ep = so->so_rcv.sb_upcallarg;
1080
1081	if (state_read(&ep->com) != FPDU_MODE)
1082		goto done;
1083
1084	m_adj(m, sizeof(struct cpl_rdma_terminate));
1085
1086	CTR4(KTR_IW_CXGB, "%s: tid %u, ep %p, saved %d bytes",
1087	    __func__, tid, ep, m->m_len);
1088
1089	m_copydata(m, 0, m->m_len, ep->com.qp->attr.terminate_buffer);
1090	ep->com.qp->attr.terminate_msg_len = m->m_len;
1091	ep->com.qp->attr.is_terminate_local = 0;
1092
1093done:
1094	m_freem(m);
1095	return (0);
1096}
1097
1098static int
1099ec_status(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
1100{
1101	struct adapter *sc = qs->adap;
1102	struct tom_data *td = sc->tom_softc;
1103	struct cpl_rdma_ec_status *rep = mtod(m, void *);
1104	unsigned int tid = GET_TID(rep);
1105	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
1106	struct socket *so = toep->tp_inp->inp_socket;
1107	struct iwch_ep *ep = so->so_rcv.sb_upcallarg;
1108
1109	if (rep->status) {
1110		struct iwch_qp_attributes attrs;
1111
1112		CTR1(KTR_IW_CXGB, "%s BAD CLOSE - Aborting", __FUNCTION__);
1113		stop_ep_timer(ep);
1114		attrs.next_state = IWCH_QP_STATE_ERROR;
1115		iwch_modify_qp(ep->com.qp->rhp,
1116			     ep->com.qp,
1117			     IWCH_QP_ATTR_NEXT_STATE,
1118			     &attrs, 1);
1119		abort_connection(ep);
1120	}
1121
1122	m_freem(m);
1123	return (0);
1124}
1125
1126static void
1127ep_timeout(void *arg)
1128{
1129	struct iwch_ep *ep = (struct iwch_ep *)arg;
1130	struct iwch_qp_attributes attrs;
1131	int err = 0;
1132	int abort = 1;
1133
1134	mtx_lock(&ep->com.lock);
1135	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1136	switch (ep->com.state) {
1137	case MPA_REQ_SENT:
1138		__state_set(&ep->com, ABORTING);
1139		connect_reply_upcall(ep, -ETIMEDOUT);
1140		break;
1141	case MPA_REQ_WAIT:
1142		__state_set(&ep->com, ABORTING);
1143		break;
1144	case CLOSING:
1145	case MORIBUND:
1146		if (ep->com.cm_id && ep->com.qp)
1147			err = 1;
1148		__state_set(&ep->com, ABORTING);
1149		break;
1150	default:
1151		CTR3(KTR_IW_CXGB, "%s unexpected state ep %p state %u\n",
1152			__func__, ep, ep->com.state);
1153		abort = 0;
1154	}
1155	mtx_unlock(&ep->com.lock);
1156	if (err){
1157		attrs.next_state = IWCH_QP_STATE_ERROR;
1158		iwch_modify_qp(ep->com.qp->rhp,
1159			     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1160			     &attrs, 1);
1161	}
1162	if (abort)
1163		abort_connection(ep);
1164	put_ep(&ep->com);
1165}
1166
1167int
1168iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
1169{
1170	int err;
1171	struct iwch_ep *ep = to_ep(cm_id);
1172	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1173
1174	if (state_read(&ep->com) == DEAD) {
1175		put_ep(&ep->com);
1176		return (-ECONNRESET);
1177	}
1178	PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD);
1179	if (mpa_rev == 0) {
1180		abort_connection(ep);
1181	} else {
1182		err = send_mpa_reject(ep, pdata, pdata_len);
1183		err = soshutdown(ep->com.so, 3);
1184	}
1185	put_ep(&ep->com);
1186	return 0;
1187}
1188
1189int
1190iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1191{
1192	int err;
1193	struct iwch_qp_attributes attrs;
1194	enum iwch_qp_attr_mask mask;
1195	struct iwch_ep *ep = to_ep(cm_id);
1196	struct iwch_dev *h = to_iwch_dev(cm_id->device);
1197	struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
1198
1199	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1200	if (state_read(&ep->com) == DEAD) {
1201		err = -ECONNRESET;
1202		goto err;
1203	}
1204
1205	PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD);
1206	PANIC_IF(!qp);
1207
1208	if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
1209	    (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
1210		abort_connection(ep);
1211		err = -EINVAL;
1212		goto err;
1213	}
1214
1215	cm_id->add_ref(cm_id);
1216	ep->com.cm_id = cm_id;
1217	ep->com.qp = qp;
1218
1219	ep->com.rpl_err = 0;
1220	ep->com.rpl_done = 0;
1221	ep->ird = conn_param->ird;
1222	ep->ord = conn_param->ord;
1223	CTR3(KTR_IW_CXGB, "%s ird %d ord %d", __FUNCTION__, ep->ird, ep->ord);
1224
1225	/* bind QP to EP and move to RTS */
1226	attrs.mpa_attr = ep->mpa_attr;
1227	attrs.max_ird = ep->ird;
1228	attrs.max_ord = ep->ord;
1229	attrs.llp_stream_handle = ep;
1230	attrs.next_state = IWCH_QP_STATE_RTS;
1231
1232	/* bind QP and TID with INIT_WR */
1233	mask = IWCH_QP_ATTR_NEXT_STATE |
1234			     IWCH_QP_ATTR_LLP_STREAM_HANDLE |
1235			     IWCH_QP_ATTR_MPA_ATTR |
1236			     IWCH_QP_ATTR_MAX_IRD |
1237			     IWCH_QP_ATTR_MAX_ORD;
1238
1239	err = iwch_modify_qp(ep->com.qp->rhp,
1240			     ep->com.qp, mask, &attrs, 1);
1241
1242	if (err)
1243		goto err1;
1244
1245	err = send_mpa_reply(ep, conn_param->private_data,
1246 			     conn_param->private_data_len);
1247	if (err)
1248		goto err1;
1249	state_set(&ep->com, FPDU_MODE);
1250	established_upcall(ep);
1251	put_ep(&ep->com);
1252	return 0;
1253err1:
1254	ep->com.cm_id = NULL;
1255	ep->com.qp = NULL;
1256	cm_id->rem_ref(cm_id);
1257err:
1258	put_ep(&ep->com);
1259	return err;
1260}
1261
1262static int init_sock(struct iwch_ep_common *epc)
1263{
1264	int err;
1265	struct sockopt sopt;
1266	int on=1;
1267
1268	SOCK_LOCK(epc->so);
1269	soupcall_set(epc->so, SO_RCV, iwch_so_upcall, epc);
1270	epc->so->so_state |= SS_NBIO;
1271	SOCK_UNLOCK(epc->so);
1272	sopt.sopt_dir = SOPT_SET;
1273	sopt.sopt_level = IPPROTO_TCP;
1274	sopt.sopt_name = TCP_NODELAY;
1275	sopt.sopt_val = (caddr_t)&on;
1276	sopt.sopt_valsize = sizeof on;
1277	sopt.sopt_td = NULL;
1278	err = sosetopt(epc->so, &sopt);
1279	if (err)
1280		printf("%s can't set TCP_NODELAY err %d\n", __FUNCTION__, err);
1281
1282	return 0;
1283}
1284
1285static int
1286is_loopback_dst(struct iw_cm_id *cm_id)
1287{
1288	uint16_t port = cm_id->remote_addr.sin_port;
1289	int ifa_present;
1290
1291	cm_id->remote_addr.sin_port = 0;
1292	ifa_present = ifa_ifwithaddr_check(
1293	    (struct sockaddr *)&cm_id->remote_addr);
1294	cm_id->remote_addr.sin_port = port;
1295	return (ifa_present);
1296}
1297
1298int
1299iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1300{
1301	int err = 0;
1302	struct iwch_dev *h = to_iwch_dev(cm_id->device);
1303	struct iwch_ep *ep;
1304	struct rtentry *rt;
1305	struct toedev *tdev;
1306
1307	if (is_loopback_dst(cm_id)) {
1308		err = -ENOSYS;
1309		goto out;
1310	}
1311
1312	ep = alloc_ep(sizeof(*ep), M_NOWAIT);
1313	if (!ep) {
1314		printf("%s - cannot alloc ep.\n", __FUNCTION__);
1315		err = (-ENOMEM);
1316		goto out;
1317	}
1318	callout_init(&ep->timer, TRUE);
1319	ep->plen = conn_param->private_data_len;
1320	if (ep->plen)
1321		memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
1322		       conn_param->private_data, ep->plen);
1323	ep->ird = conn_param->ird;
1324	ep->ord = conn_param->ord;
1325
1326	cm_id->add_ref(cm_id);
1327	ep->com.cm_id = cm_id;
1328	ep->com.qp = get_qhp(h, conn_param->qpn);
1329	ep->com.thread = curthread;
1330	PANIC_IF(!ep->com.qp);
1331	CTR4(KTR_IW_CXGB, "%s qpn 0x%x qp %p cm_id %p", __FUNCTION__, conn_param->qpn,
1332	     ep->com.qp, cm_id);
1333
1334	ep->com.so = cm_id->so;
1335	err = init_sock(&ep->com);
1336	if (err)
1337		goto fail2;
1338
1339	/* find a route */
1340	rt = find_route(cm_id->local_addr.sin_addr.s_addr,
1341			cm_id->remote_addr.sin_addr.s_addr,
1342			cm_id->local_addr.sin_port,
1343			cm_id->remote_addr.sin_port, IPTOS_LOWDELAY);
1344	if (!rt) {
1345		printf("%s - cannot find route.\n", __FUNCTION__);
1346		err = EHOSTUNREACH;
1347		goto fail2;
1348	}
1349
1350	if (!(rt->rt_ifp->if_flags & IFCAP_TOE)) {
1351		printf("%s - interface not TOE capable.\n", __FUNCTION__);
1352		RTFREE(rt);
1353		goto fail2;
1354	}
1355	tdev = TOEDEV(rt->rt_ifp);
1356	if (tdev == NULL) {
1357		printf("%s - No toedev for interface.\n", __FUNCTION__);
1358		RTFREE(rt);
1359		goto fail2;
1360	}
1361	RTFREE(rt);
1362
1363	state_set(&ep->com, CONNECTING);
1364	ep->com.local_addr = cm_id->local_addr;
1365	ep->com.remote_addr = cm_id->remote_addr;
1366	err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr,
1367		ep->com.thread);
1368	if (!err)
1369		goto out;
1370fail2:
1371	put_ep(&ep->com);
1372out:
1373	return err;
1374}
1375
1376int
1377iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
1378{
1379	int err = 0;
1380	struct iwch_listen_ep *ep;
1381
1382	ep = alloc_ep(sizeof(*ep), M_NOWAIT);
1383	if (!ep) {
1384		printf("%s - cannot alloc ep.\n", __FUNCTION__);
1385		err = ENOMEM;
1386		goto out;
1387	}
1388	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
1389	cm_id->add_ref(cm_id);
1390	ep->com.cm_id = cm_id;
1391	ep->backlog = backlog;
1392	ep->com.local_addr = cm_id->local_addr;
1393	ep->com.thread = curthread;
1394	state_set(&ep->com, LISTEN);
1395
1396	ep->com.so = cm_id->so;
1397	err = init_sock(&ep->com);
1398	if (err)
1399		goto fail;
1400
1401	err = solisten(ep->com.so, ep->backlog, ep->com.thread);
1402	if (!err) {
1403		cm_id->provider_data = ep;
1404		goto out;
1405	}
1406	close_socket(&ep->com, 0);
1407fail:
1408	cm_id->rem_ref(cm_id);
1409	put_ep(&ep->com);
1410out:
1411	return err;
1412}
1413
1414int
1415iwch_destroy_listen(struct iw_cm_id *cm_id)
1416{
1417	struct iwch_listen_ep *ep = to_listen_ep(cm_id);
1418
1419	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
1420
1421	state_set(&ep->com, DEAD);
1422	close_socket(&ep->com, 0);
1423	cm_id->rem_ref(cm_id);
1424	put_ep(&ep->com);
1425	return 0;
1426}
1427
1428int
1429iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags)
1430{
1431	int close = 0;
1432
1433	mtx_lock(&ep->com.lock);
1434
1435	PANIC_IF(!ep);
1436	PANIC_IF(!ep->com.so);
1437
1438	CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s, abrupt %d", __FUNCTION__, ep,
1439	     ep->com.so, states[ep->com.state], abrupt);
1440
1441	switch (ep->com.state) {
1442	case MPA_REQ_WAIT:
1443	case MPA_REQ_SENT:
1444	case MPA_REQ_RCVD:
1445	case MPA_REP_SENT:
1446	case FPDU_MODE:
1447		close = 1;
1448		if (abrupt)
1449			ep->com.state = ABORTING;
1450		else {
1451			ep->com.state = CLOSING;
1452			start_ep_timer(ep);
1453		}
1454		break;
1455	case CLOSING:
1456		close = 1;
1457		if (abrupt) {
1458			stop_ep_timer(ep);
1459			ep->com.state = ABORTING;
1460		} else
1461			ep->com.state = MORIBUND;
1462		break;
1463	case MORIBUND:
1464	case ABORTING:
1465	case DEAD:
1466		CTR3(KTR_IW_CXGB, "%s ignoring disconnect ep %p state %u\n",
1467			__func__, ep, ep->com.state);
1468		break;
1469	default:
1470		panic("unknown state: %d\n", ep->com.state);
1471		break;
1472	}
1473
1474	mtx_unlock(&ep->com.lock);
1475	if (close) {
1476		if (abrupt)
1477			abort_connection(ep);
1478		else {
1479			if (!ep->parent_ep)
1480				__state_set(&ep->com, MORIBUND);
1481			shutdown_socket(&ep->com);
1482		}
1483	}
1484	return 0;
1485}
1486
1487static void
1488process_data(struct iwch_ep *ep)
1489{
1490	struct sockaddr_in *local, *remote;
1491
1492	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1493
1494	switch (state_read(&ep->com)) {
1495	case MPA_REQ_SENT:
1496		process_mpa_reply(ep);
1497		break;
1498	case MPA_REQ_WAIT:
1499
1500		/*
1501		 * XXX
1502		 * Set local and remote addrs here because when we
1503		 * dequeue the newly accepted socket, they aren't set
1504		 * yet in the pcb!
1505		 */
1506		in_getsockaddr(ep->com.so, (struct sockaddr **)&local);
1507		in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote);
1508		CTR3(KTR_IW_CXGB, "%s local %s remote %s", __FUNCTION__,
1509			inet_ntoa(local->sin_addr),
1510			inet_ntoa(remote->sin_addr));
1511		ep->com.local_addr = *local;
1512		ep->com.remote_addr = *remote;
1513		free(local, M_SONAME);
1514		free(remote, M_SONAME);
1515		process_mpa_request(ep);
1516		break;
1517	default:
1518		if (ep->com.so->so_rcv.sb_cc)
1519			printf("%s Unexpected streaming data."
1520			       " ep %p state %d so %p so_state %x so_rcv.sb_cc %u so_rcv.sb_mb %p\n",
1521			       __FUNCTION__, ep, state_read(&ep->com), ep->com.so, ep->com.so->so_state,
1522			       ep->com.so->so_rcv.sb_cc, ep->com.so->so_rcv.sb_mb);
1523		break;
1524	}
1525	return;
1526}
1527
1528static void
1529process_connected(struct iwch_ep *ep)
1530{
1531	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
1532	if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) {
1533		send_mpa_req(ep);
1534	} else {
1535		connect_reply_upcall(ep, -ep->com.so->so_error);
1536		close_socket(&ep->com, 0);
1537		state_set(&ep->com, DEAD);
1538		put_ep(&ep->com);
1539	}
1540}
1541
1542static struct socket *
1543dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct iwch_ep *child_ep)
1544{
1545	struct socket *so;
1546
1547	ACCEPT_LOCK();
1548	so = TAILQ_FIRST(&head->so_comp);
1549	if (!so) {
1550		ACCEPT_UNLOCK();
1551		return NULL;
1552	}
1553	TAILQ_REMOVE(&head->so_comp, so, so_list);
1554	head->so_qlen--;
1555	SOCK_LOCK(so);
1556	so->so_qstate &= ~SQ_COMP;
1557	so->so_head = NULL;
1558	soref(so);
1559	soupcall_set(so, SO_RCV, iwch_so_upcall, child_ep);
1560	so->so_state |= SS_NBIO;
1561	PANIC_IF(!(so->so_state & SS_ISCONNECTED));
1562	PANIC_IF(so->so_error);
1563	SOCK_UNLOCK(so);
1564	ACCEPT_UNLOCK();
1565	soaccept(so, (struct sockaddr **)remote);
1566	return so;
1567}
1568
1569static void
1570process_newconn(struct iwch_ep *parent_ep)
1571{
1572	struct socket *child_so;
1573	struct iwch_ep *child_ep;
1574	struct sockaddr_in *remote;
1575
1576	CTR3(KTR_IW_CXGB, "%s parent ep %p so %p", __FUNCTION__, parent_ep, parent_ep->com.so);
1577	child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT);
1578	if (!child_ep) {
1579		log(LOG_ERR, "%s - failed to allocate ep entry!\n",
1580		       __FUNCTION__);
1581		return;
1582	}
1583	child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep);
1584	if (!child_so) {
1585		log(LOG_ERR, "%s - failed to dequeue child socket!\n",
1586		       __FUNCTION__);
1587		__free_ep(&child_ep->com);
1588		return;
1589	}
1590	CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__,
1591		inet_ntoa(remote->sin_addr), ntohs(remote->sin_port));
1592	child_ep->com.tdev = parent_ep->com.tdev;
1593	child_ep->com.local_addr.sin_family = parent_ep->com.local_addr.sin_family;
1594	child_ep->com.local_addr.sin_port = parent_ep->com.local_addr.sin_port;
1595	child_ep->com.local_addr.sin_addr.s_addr = parent_ep->com.local_addr.sin_addr.s_addr;
1596	child_ep->com.local_addr.sin_len = parent_ep->com.local_addr.sin_len;
1597	child_ep->com.remote_addr.sin_family = remote->sin_family;
1598	child_ep->com.remote_addr.sin_port = remote->sin_port;
1599	child_ep->com.remote_addr.sin_addr.s_addr = remote->sin_addr.s_addr;
1600	child_ep->com.remote_addr.sin_len = remote->sin_len;
1601	child_ep->com.so = child_so;
1602	child_ep->com.cm_id = NULL;
1603	child_ep->com.thread = parent_ep->com.thread;
1604	child_ep->parent_ep = parent_ep;
1605
1606	free(remote, M_SONAME);
1607	get_ep(&parent_ep->com);
1608	child_ep->parent_ep = parent_ep;
1609	callout_init(&child_ep->timer, TRUE);
1610	state_set(&child_ep->com, MPA_REQ_WAIT);
1611	start_ep_timer(child_ep);
1612
1613	/* maybe the request has already been queued up on the socket... */
1614	process_mpa_request(child_ep);
1615}
1616
1617static int
1618iwch_so_upcall(struct socket *so, void *arg, int waitflag)
1619{
1620	struct iwch_ep *ep = arg;
1621
1622	CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]);
1623	mtx_lock(&req_lock);
1624	if (ep && ep->com.so && !ep->com.entry.tqe_prev) {
1625		get_ep(&ep->com);
1626		TAILQ_INSERT_TAIL(&req_list, &ep->com, entry);
1627		taskqueue_enqueue(iw_cxgb_taskq, &iw_cxgb_task);
1628	}
1629	mtx_unlock(&req_lock);
1630	return (SU_OK);
1631}
1632
1633static void
1634process_socket_event(struct iwch_ep *ep)
1635{
1636	int state = state_read(&ep->com);
1637	struct socket *so = ep->com.so;
1638
1639	CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]);
1640	if (state == CONNECTING) {
1641		process_connected(ep);
1642		return;
1643	}
1644
1645	if (state == LISTEN) {
1646		process_newconn(ep);
1647		return;
1648	}
1649
1650	/* connection error */
1651	if (so->so_error) {
1652		process_conn_error(ep);
1653		return;
1654	}
1655
1656	/* peer close */
1657	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) {
1658		process_peer_close(ep);
1659		return;
1660	}
1661
1662	/* close complete */
1663	if (so->so_state & (SS_ISDISCONNECTED)) {
1664		process_close_complete(ep);
1665		return;
1666	}
1667
1668	/* rx data */
1669	process_data(ep);
1670	return;
1671}
1672
1673static void
1674process_req(void *ctx, int pending)
1675{
1676	struct iwch_ep_common *epc;
1677
1678	CTR1(KTR_IW_CXGB, "%s enter", __FUNCTION__);
1679	mtx_lock(&req_lock);
1680	while (!TAILQ_EMPTY(&req_list)) {
1681		epc = TAILQ_FIRST(&req_list);
1682		TAILQ_REMOVE(&req_list, epc, entry);
1683		epc->entry.tqe_prev = NULL;
1684		mtx_unlock(&req_lock);
1685		if (epc->so)
1686			process_socket_event((struct iwch_ep *)epc);
1687		put_ep(epc);
1688		mtx_lock(&req_lock);
1689	}
1690	mtx_unlock(&req_lock);
1691}
1692
1693int
1694iwch_cm_init(void)
1695{
1696	TAILQ_INIT(&req_list);
1697	mtx_init(&req_lock, "iw_cxgb req_list lock", NULL, MTX_DEF);
1698	iw_cxgb_taskq = taskqueue_create("iw_cxgb_taskq", M_NOWAIT,
1699		taskqueue_thread_enqueue, &iw_cxgb_taskq);
1700        if (iw_cxgb_taskq == NULL) {
1701                printf("failed to allocate iw_cxgb taskqueue\n");
1702                return (ENOMEM);
1703        }
1704        taskqueue_start_threads(&iw_cxgb_taskq, 1, PI_NET, "iw_cxgb taskq");
1705        TASK_INIT(&iw_cxgb_task, 0, process_req, NULL);
1706	return (0);
1707}
1708
1709void
1710iwch_cm_term(void)
1711{
1712
1713	taskqueue_drain(iw_cxgb_taskq, &iw_cxgb_task);
1714	taskqueue_free(iw_cxgb_taskq);
1715}
1716
1717void
1718iwch_cm_init_cpl(struct adapter *sc)
1719{
1720
1721	t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, terminate);
1722	t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, ec_status);
1723}
1724
1725void
1726iwch_cm_term_cpl(struct adapter *sc)
1727{
1728
1729	t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, NULL);
1730	t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, NULL);
1731}
1732#endif
1733