cm.c revision 309450
1/*
2 * Copyright (c) 2009-2013, 2016 Chelsio, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *	  copyright notice, this list of conditions and the following
16 *	  disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *	  copyright notice, this list of conditions and the following
20 *	  disclaimer in the documentation and/or other materials
21 *	  provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/10/sys/dev/cxgbe/iw_cxgbe/cm.c 309450 2016-12-03 00:18:38Z jhb $");
34
35#include "opt_inet.h"
36
37#ifdef TCP_OFFLOAD
38#include <sys/types.h>
39#include <sys/malloc.h>
40#include <sys/socket.h>
41#include <sys/socketvar.h>
42#include <sys/sockio.h>
43#include <sys/taskqueue.h>
44#include <netinet/in.h>
45#include <net/route.h>
46
47#include <netinet/in_systm.h>
48#include <netinet/in_pcb.h>
49#include <netinet/ip.h>
50#include <netinet/ip_var.h>
51#include <netinet/tcp_var.h>
52#include <netinet/tcp.h>
53#include <netinet/tcpip.h>
54
55#include <netinet/toecore.h>
56
57struct sge_iq;
58struct rss_header;
59#include <linux/types.h>
60#include "offload.h"
61#include "tom/t4_tom.h"
62
63#define TOEPCB(so)  ((struct toepcb *)(so_sototcpcb((so))->t_toe))
64
65#include "iw_cxgbe.h"
66#include <linux/module.h>
67#include <linux/workqueue.h>
68#include <linux/notifier.h>
69#include <linux/inetdevice.h>
70#include <linux/if_vlan.h>
71#include <net/netevent.h>
72
73static spinlock_t req_lock;
74static TAILQ_HEAD(c4iw_ep_list, c4iw_ep_common) req_list;
75static struct work_struct c4iw_task;
76static struct workqueue_struct *c4iw_taskq;
77static LIST_HEAD(timeout_list);
78static spinlock_t timeout_lock;
79
80static void process_req(struct work_struct *ctx);
81static void start_ep_timer(struct c4iw_ep *ep);
82static int stop_ep_timer(struct c4iw_ep *ep);
83static int set_tcpinfo(struct c4iw_ep *ep);
84static void process_timeout(struct c4iw_ep *ep);
85static void process_timedout_eps(void);
86static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc);
87static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate);
88static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate);
89static void *alloc_ep(int size, gfp_t flags);
90void __free_ep(struct c4iw_ep_common *epc);
91static struct rtentry * find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
92		__be16 peer_port, u8 tos);
93static int close_socket(struct c4iw_ep_common *epc, int close);
94static int shutdown_socket(struct c4iw_ep_common *epc);
95static void abort_socket(struct c4iw_ep *ep);
96static int send_mpa_req(struct c4iw_ep *ep);
97static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen);
98static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen);
99static void close_complete_upcall(struct c4iw_ep *ep, int status);
100static int send_abort(struct c4iw_ep *ep);
101static void peer_close_upcall(struct c4iw_ep *ep);
102static void peer_abort_upcall(struct c4iw_ep *ep);
103static void connect_reply_upcall(struct c4iw_ep *ep, int status);
104static int connect_request_upcall(struct c4iw_ep *ep);
105static void established_upcall(struct c4iw_ep *ep);
106static int process_mpa_reply(struct c4iw_ep *ep);
107static int process_mpa_request(struct c4iw_ep *ep);
108static void process_peer_close(struct c4iw_ep *ep);
109static void process_conn_error(struct c4iw_ep *ep);
110static void process_close_complete(struct c4iw_ep *ep);
111static void ep_timeout(unsigned long arg);
112static void init_sock(struct c4iw_ep_common *epc);
113static void process_data(struct c4iw_ep *ep);
114static void process_connected(struct c4iw_ep *ep);
115static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag);
116static void process_socket_event(struct c4iw_ep *ep);
117static void release_ep_resources(struct c4iw_ep *ep);
118
119#define START_EP_TIMER(ep) \
120    do { \
121	    CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \
122		__func__, __LINE__, (ep)); \
123	    start_ep_timer(ep); \
124    } while (0)
125
126#define STOP_EP_TIMER(ep) \
127    ({ \
128	    CTR3(KTR_IW_CXGBE, "stop_ep_timer (%s:%d) ep %p", \
129		__func__, __LINE__, (ep)); \
130	    stop_ep_timer(ep); \
131    })
132
133#ifdef KTR
134static char *states[] = {
135	"idle",
136	"listen",
137	"connecting",
138	"mpa_wait_req",
139	"mpa_req_sent",
140	"mpa_req_rcvd",
141	"mpa_rep_sent",
142	"fpdu_mode",
143	"aborting",
144	"closing",
145	"moribund",
146	"dead",
147	NULL,
148};
149#endif
150
151
152static void deref_cm_id(struct c4iw_ep_common *epc)
153{
154      epc->cm_id->rem_ref(epc->cm_id);
155      epc->cm_id = NULL;
156      set_bit(CM_ID_DEREFED, &epc->history);
157}
158
159static void ref_cm_id(struct c4iw_ep_common *epc)
160{
161      set_bit(CM_ID_REFED, &epc->history);
162      epc->cm_id->add_ref(epc->cm_id);
163}
164
165static void deref_qp(struct c4iw_ep *ep)
166{
167	c4iw_qp_rem_ref(&ep->com.qp->ibqp);
168	clear_bit(QP_REFERENCED, &ep->com.flags);
169	set_bit(QP_DEREFED, &ep->com.history);
170}
171
172static void ref_qp(struct c4iw_ep *ep)
173{
174	set_bit(QP_REFERENCED, &ep->com.flags);
175	set_bit(QP_REFED, &ep->com.history);
176	c4iw_qp_add_ref(&ep->com.qp->ibqp);
177}
178
179static void process_timeout(struct c4iw_ep *ep)
180{
181	struct c4iw_qp_attributes attrs;
182	int abort = 1;
183
184	mutex_lock(&ep->com.mutex);
185	CTR4(KTR_IW_CXGBE, "%s ep :%p, tid:%u, state %d", __func__,
186			ep, ep->hwtid, ep->com.state);
187	set_bit(TIMEDOUT, &ep->com.history);
188	switch (ep->com.state) {
189	case MPA_REQ_SENT:
190		connect_reply_upcall(ep, -ETIMEDOUT);
191		break;
192	case MPA_REQ_WAIT:
193	case MPA_REQ_RCVD:
194	case MPA_REP_SENT:
195	case FPDU_MODE:
196		break;
197	case CLOSING:
198	case MORIBUND:
199		if (ep->com.cm_id && ep->com.qp) {
200			attrs.next_state = C4IW_QP_STATE_ERROR;
201			c4iw_modify_qp(ep->com.dev, ep->com.qp,
202					C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
203		}
204		close_complete_upcall(ep, -ETIMEDOUT);
205		break;
206	case ABORTING:
207	case DEAD:
208		/*
209		 * These states are expected if the ep timed out at the same
210		 * time as another thread was calling stop_ep_timer().
211		 * So we silently do nothing for these states.
212		 */
213		abort = 0;
214		break;
215	default:
216		CTR4(KTR_IW_CXGBE, "%s unexpected state ep %p tid %u state %u\n"
217				, __func__, ep, ep->hwtid, ep->com.state);
218		abort = 0;
219	}
220	mutex_unlock(&ep->com.mutex);
221	if (abort)
222		c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
223	c4iw_put_ep(&ep->com);
224	return;
225}
226
227static void process_timedout_eps(void)
228{
229	struct c4iw_ep *ep;
230
231	spin_lock(&timeout_lock);
232	while (!list_empty(&timeout_list)) {
233		struct list_head *tmp;
234		tmp = timeout_list.next;
235		list_del(tmp);
236		tmp->next = tmp->prev = NULL;
237		spin_unlock(&timeout_lock);
238		ep = list_entry(tmp, struct c4iw_ep, entry);
239		process_timeout(ep);
240		spin_lock(&timeout_lock);
241	}
242	spin_unlock(&timeout_lock);
243	return;
244}
245
246static void
247process_req(struct work_struct *ctx)
248{
249	struct c4iw_ep_common *epc;
250
251	process_timedout_eps();
252	spin_lock(&req_lock);
253	while (!TAILQ_EMPTY(&req_list)) {
254		epc = TAILQ_FIRST(&req_list);
255		TAILQ_REMOVE(&req_list, epc, entry);
256		epc->entry.tqe_prev = NULL;
257		spin_unlock(&req_lock);
258		CTR3(KTR_IW_CXGBE, "%s so :%p, ep:%p", __func__,
259				epc->so, epc);
260		if (epc->so)
261			process_socket_event((struct c4iw_ep *)epc);
262		c4iw_put_ep(epc);
263		process_timedout_eps();
264		spin_lock(&req_lock);
265	}
266	spin_unlock(&req_lock);
267}
268
269/*
270 * XXX: doesn't belong here in the iWARP driver.
271 * XXX: assumes that the connection was offloaded by cxgbe/t4_tom if TF_TOE is
272 *      set.  Is this a valid assumption for active open?
273 */
274static int
275set_tcpinfo(struct c4iw_ep *ep)
276{
277	struct socket *so = ep->com.so;
278	struct inpcb *inp = sotoinpcb(so);
279	struct tcpcb *tp;
280	struct toepcb *toep;
281	int rc = 0;
282
283	INP_WLOCK(inp);
284	tp = intotcpcb(inp);
285	if ((tp->t_flags & TF_TOE) == 0) {
286		rc = EINVAL;
287		log(LOG_ERR, "%s: connection not offloaded (so %p, ep %p)\n",
288		    __func__, so, ep);
289		goto done;
290	}
291	toep = TOEPCB(so);
292
293	ep->hwtid = toep->tid;
294	ep->snd_seq = tp->snd_nxt;
295	ep->rcv_seq = tp->rcv_nxt;
296	ep->emss = max(tp->t_maxseg, 128);
297done:
298	INP_WUNLOCK(inp);
299	return (rc);
300
301}
302
303static struct rtentry *
304find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
305		__be16 peer_port, u8 tos)
306{
307	struct route iproute;
308	struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst;
309
310	CTR5(KTR_IW_CXGBE, "%s:frtB %x, %x, %d, %d", __func__, local_ip,
311	    peer_ip, ntohs(local_port), ntohs(peer_port));
312	bzero(&iproute, sizeof iproute);
313	dst->sin_family = AF_INET;
314	dst->sin_len = sizeof *dst;
315	dst->sin_addr.s_addr = peer_ip;
316
317	rtalloc(&iproute);
318	CTR2(KTR_IW_CXGBE, "%s:frtE %p", __func__, (uint64_t)iproute.ro_rt);
319	return iproute.ro_rt;
320}
321
322static int
323close_socket(struct c4iw_ep_common *epc, int close)
324{
325	struct socket *so = epc->so;
326	int rc;
327
328	CTR5(KTR_IW_CXGBE, "%s:csoB so %p, ep %p, state %s, tid %d", __func__,
329			so, epc, states[epc->state],
330			((struct c4iw_ep *)epc)->hwtid);
331	mutex_lock(&epc->so_mutex);
332	if ((so == NULL) || (so->so_count == 0)) {
333		mutex_unlock(&epc->so_mutex);
334		CTR5(KTR_IW_CXGBE, "%s:cso1 so %p, ep %p, state %s, tid %d",
335				__func__, so, epc, states[epc->state],
336				((struct c4iw_ep *)epc)->hwtid);
337		return -EINVAL;
338	}
339
340	SOCK_LOCK(so);
341	soupcall_clear(so, SO_RCV);
342	SOCK_UNLOCK(so);
343
344	if (close)
345                rc = soclose(so);
346        else
347                rc = soshutdown(so, SHUT_WR | SHUT_RD);
348	epc->so = NULL;
349
350	mutex_unlock(&epc->so_mutex);
351	return (rc);
352}
353
354static int
355shutdown_socket(struct c4iw_ep_common *epc)
356{
357
358	struct socket *so = epc->so;
359	int rc;
360
361	CTR5(KTR_IW_CXGBE, "%s:ssoB so %p, ep %p, state %s, tid %d", __func__,
362			epc->so, epc, states[epc->state],
363			((struct c4iw_ep *)epc)->hwtid);
364	mutex_lock(&epc->so_mutex);
365	if ((so == NULL) || (so->so_count == 0)) {
366		mutex_unlock(&epc->so_mutex);
367		CTR5(KTR_IW_CXGBE, "%s:sso1 so %p, ep %p, state %s, tid %d",
368			__func__, epc->so, epc, states[epc->state],
369			((struct c4iw_ep *)epc)->hwtid);
370		return -EINVAL;
371	}
372	rc = soshutdown(so, SHUT_WR);
373	mutex_unlock(&epc->so_mutex);
374	return rc;
375}
376
377static void
378abort_socket(struct c4iw_ep *ep)
379{
380	struct sockopt sopt;
381	int rc;
382	struct linger l;
383
384	CTR5(KTR_IW_CXGBE, "%s ep %p so %p state %s tid %d", __func__, ep,
385			ep->com.so, states[ep->com.state], ep->hwtid);
386	mutex_lock(&ep->com.so_mutex);
387	l.l_onoff = 1;
388	l.l_linger = 0;
389
390	/* linger_time of 0 forces RST to be sent */
391	sopt.sopt_dir = SOPT_SET;
392	sopt.sopt_level = SOL_SOCKET;
393	sopt.sopt_name = SO_LINGER;
394	sopt.sopt_val = (caddr_t)&l;
395	sopt.sopt_valsize = sizeof l;
396	sopt.sopt_td = NULL;
397	rc = sosetopt(ep->com.so, &sopt);
398	if (rc) {
399		log(LOG_ERR, "%s: can't set linger to 0, no RST! err %d\n",
400		    __func__, rc);
401	}
402	mutex_unlock(&ep->com.so_mutex);
403}
404
405static void
406process_peer_close(struct c4iw_ep *ep)
407{
408	struct c4iw_qp_attributes attrs;
409	int disconnect = 1;
410	int release = 0;
411
412	CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep,
413	    ep->com.so, states[ep->com.state]);
414
415	mutex_lock(&ep->com.mutex);
416	switch (ep->com.state) {
417
418		case MPA_REQ_WAIT:
419			CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT CLOSING",
420			    __func__, ep);
421			__state_set(&ep->com, CLOSING);
422			break;
423
424		case MPA_REQ_SENT:
425			CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT CLOSING",
426			    __func__, ep);
427			__state_set(&ep->com, DEAD);
428			connect_reply_upcall(ep, -ECONNABORTED);
429
430			disconnect = 0;
431			STOP_EP_TIMER(ep);
432			close_socket(&ep->com, 0);
433			deref_cm_id(&ep->com);
434			release = 1;
435			break;
436
437		case MPA_REQ_RCVD:
438
439			/*
440			 * We're gonna mark this puppy DEAD, but keep
441			 * the reference on it until the ULP accepts or
442			 * rejects the CR.
443			 */
444			CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING",
445			    __func__, ep);
446			__state_set(&ep->com, CLOSING);
447			c4iw_get_ep(&ep->com);
448			break;
449
450		case MPA_REP_SENT:
451			CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING",
452			    __func__, ep);
453			__state_set(&ep->com, CLOSING);
454			break;
455
456		case FPDU_MODE:
457			CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING",
458			    __func__, ep);
459			START_EP_TIMER(ep);
460			__state_set(&ep->com, CLOSING);
461			attrs.next_state = C4IW_QP_STATE_CLOSING;
462			c4iw_modify_qp(ep->com.dev, ep->com.qp,
463					C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
464			peer_close_upcall(ep);
465			break;
466
467		case ABORTING:
468			CTR2(KTR_IW_CXGBE, "%s:ppc6 %p ABORTING (disconn)",
469			    __func__, ep);
470			disconnect = 0;
471			break;
472
473		case CLOSING:
474			CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND",
475			    __func__, ep);
476			__state_set(&ep->com, MORIBUND);
477			disconnect = 0;
478			break;
479
480		case MORIBUND:
481			CTR2(KTR_IW_CXGBE, "%s:ppc8 %p MORIBUND DEAD", __func__,
482			    ep);
483			STOP_EP_TIMER(ep);
484			if (ep->com.cm_id && ep->com.qp) {
485				attrs.next_state = C4IW_QP_STATE_IDLE;
486				c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
487						C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
488			}
489			close_socket(&ep->com, 0);
490			close_complete_upcall(ep, 0);
491			__state_set(&ep->com, DEAD);
492			release = 1;
493			disconnect = 0;
494			break;
495
496		case DEAD:
497			CTR2(KTR_IW_CXGBE, "%s:ppc9 %p DEAD (disconn)",
498			    __func__, ep);
499			disconnect = 0;
500			break;
501
502		default:
503			panic("%s: ep %p state %d", __func__, ep,
504			    ep->com.state);
505			break;
506	}
507
508	mutex_unlock(&ep->com.mutex);
509
510	if (disconnect) {
511
512		CTR2(KTR_IW_CXGBE, "%s:ppca %p", __func__, ep);
513		c4iw_ep_disconnect(ep, 0, M_NOWAIT);
514	}
515	if (release) {
516
517		CTR2(KTR_IW_CXGBE, "%s:ppcb %p", __func__, ep);
518		c4iw_put_ep(&ep->com);
519	}
520	CTR2(KTR_IW_CXGBE, "%s:ppcE %p", __func__, ep);
521	return;
522}
523
524static void
525process_conn_error(struct c4iw_ep *ep)
526{
527	struct c4iw_qp_attributes attrs;
528	int ret;
529	int state;
530
531	mutex_lock(&ep->com.mutex);
532	state = ep->com.state;
533	CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s",
534	    __func__, ep, ep->com.so, ep->com.so->so_error,
535	    states[ep->com.state]);
536
537	switch (state) {
538
539		case MPA_REQ_WAIT:
540			STOP_EP_TIMER(ep);
541			break;
542
543		case MPA_REQ_SENT:
544			STOP_EP_TIMER(ep);
545			connect_reply_upcall(ep, -ECONNRESET);
546			break;
547
548		case MPA_REP_SENT:
549			ep->com.rpl_err = ECONNRESET;
550			CTR1(KTR_IW_CXGBE, "waking up ep %p", ep);
551			break;
552
553		case MPA_REQ_RCVD:
554
555			/*
556			 * We're gonna mark this puppy DEAD, but keep
557			 * the reference on it until the ULP accepts or
558			 * rejects the CR.
559			 */
560			c4iw_get_ep(&ep->com);
561			break;
562
563		case MORIBUND:
564		case CLOSING:
565			STOP_EP_TIMER(ep);
566			/*FALLTHROUGH*/
567		case FPDU_MODE:
568
569			if (ep->com.cm_id && ep->com.qp) {
570
571				attrs.next_state = C4IW_QP_STATE_ERROR;
572				ret = c4iw_modify_qp(ep->com.qp->rhp,
573					ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
574					&attrs, 1);
575				if (ret)
576					log(LOG_ERR,
577							"%s - qp <- error failed!\n",
578							__func__);
579			}
580			peer_abort_upcall(ep);
581			break;
582
583		case ABORTING:
584			break;
585
586		case DEAD:
587			CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!",
588			    __func__, ep->com.so->so_error);
589			mutex_unlock(&ep->com.mutex);
590			return;
591
592		default:
593			panic("%s: ep %p state %d", __func__, ep, state);
594			break;
595	}
596
597	if (state != ABORTING) {
598		if (ep->parent_ep) {
599			CTR2(KTR_IW_CXGBE, "%s:pce1 %p", __func__, ep);
600			close_socket(&ep->com, 1);
601		} else {
602			CTR2(KTR_IW_CXGBE, "%s:pce2 %p", __func__, ep);
603			close_socket(&ep->com, 0);
604		}
605
606		__state_set(&ep->com, DEAD);
607		c4iw_put_ep(&ep->com);
608	}
609	mutex_unlock(&ep->com.mutex);
610	CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep);
611	return;
612}
613
614static void
615process_close_complete(struct c4iw_ep *ep)
616{
617	struct c4iw_qp_attributes attrs;
618	int release = 0;
619
620	CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep,
621	    ep->com.so, states[ep->com.state]);
622
623	/* The cm_id may be null if we failed to connect */
624	mutex_lock(&ep->com.mutex);
625	set_bit(CLOSE_CON_RPL, &ep->com.history);
626
627	switch (ep->com.state) {
628
629		case CLOSING:
630			CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND",
631			    __func__, ep);
632			__state_set(&ep->com, MORIBUND);
633			break;
634
635		case MORIBUND:
636			CTR2(KTR_IW_CXGBE, "%s:pcc1 %p MORIBUND DEAD", __func__,
637			    ep);
638			STOP_EP_TIMER(ep);
639
640			if ((ep->com.cm_id) && (ep->com.qp)) {
641
642				CTR2(KTR_IW_CXGBE, "%s:pcc2 %p QP_STATE_IDLE",
643				    __func__, ep);
644				attrs.next_state = C4IW_QP_STATE_IDLE;
645				c4iw_modify_qp(ep->com.dev,
646						ep->com.qp,
647						C4IW_QP_ATTR_NEXT_STATE,
648						&attrs, 1);
649			}
650
651			if (ep->parent_ep) {
652
653				CTR2(KTR_IW_CXGBE, "%s:pcc3 %p", __func__, ep);
654				close_socket(&ep->com, 1);
655			}
656			else {
657
658				CTR2(KTR_IW_CXGBE, "%s:pcc4 %p", __func__, ep);
659				close_socket(&ep->com, 0);
660			}
661			close_complete_upcall(ep, 0);
662			__state_set(&ep->com, DEAD);
663			release = 1;
664			break;
665
666		case ABORTING:
667			CTR2(KTR_IW_CXGBE, "%s:pcc5 %p ABORTING", __func__, ep);
668			break;
669
670		case DEAD:
671			CTR2(KTR_IW_CXGBE, "%s:pcc6 %p DEAD", __func__, ep);
672			break;
673		default:
674			CTR2(KTR_IW_CXGBE, "%s:pcc7 %p unknown ep state",
675					__func__, ep);
676			panic("%s:pcc6 %p unknown ep state", __func__, ep);
677			break;
678	}
679	mutex_unlock(&ep->com.mutex);
680
681	if (release) {
682
683		CTR2(KTR_IW_CXGBE, "%s:pcc8 %p", __func__, ep);
684		c4iw_put_ep(&ep->com);
685	}
686	CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep);
687	return;
688}
689
690static void
691init_sock(struct c4iw_ep_common *epc)
692{
693	int rc;
694	struct sockopt sopt;
695	struct socket *so = epc->so;
696	int on = 1;
697
698	mutex_lock(&epc->so_mutex);
699	if ((so == NULL) || (so->so_count == 0)) {
700		mutex_unlock(&epc->so_mutex);
701		CTR5(KTR_IW_CXGBE, "%s:iso1 so %p, ep %p, state %s, tid %d",
702			__func__, so, epc, states[epc->state],
703			((struct c4iw_ep *)epc)->hwtid);
704		return;
705	}
706	SOCK_LOCK(so);
707	soupcall_set(so, SO_RCV, c4iw_so_upcall, epc);
708	so->so_state |= SS_NBIO;
709	SOCK_UNLOCK(so);
710	sopt.sopt_dir = SOPT_SET;
711	sopt.sopt_level = IPPROTO_TCP;
712	sopt.sopt_name = TCP_NODELAY;
713	sopt.sopt_val = (caddr_t)&on;
714	sopt.sopt_valsize = sizeof on;
715	sopt.sopt_td = NULL;
716	rc = sosetopt(so, &sopt);
717	if (rc) {
718		log(LOG_ERR, "%s: can't set TCP_NODELAY on so %p (%d)\n",
719		    __func__, so, rc);
720	}
721	mutex_unlock(&epc->so_mutex);
722}
723
724static void
725process_data(struct c4iw_ep *ep)
726{
727	struct sockaddr_in *local, *remote;
728	int disconnect = 0;
729
730	CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sb_cc %d", __func__,
731	    ep->com.so, ep, states[ep->com.state], ep->com.so->so_rcv.sb_cc);
732
733	switch (state_read(&ep->com)) {
734	case MPA_REQ_SENT:
735		disconnect = process_mpa_reply(ep);
736		break;
737	case MPA_REQ_WAIT:
738		in_getsockaddr(ep->com.so, (struct sockaddr **)&local);
739		in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote);
740		ep->com.local_addr = *local;
741		ep->com.remote_addr = *remote;
742		free(local, M_SONAME);
743		free(remote, M_SONAME);
744		disconnect = process_mpa_request(ep);
745		break;
746	default:
747		if (ep->com.so->so_rcv.sb_cc)
748			log(LOG_ERR, "%s: Unexpected streaming data.  "
749			    "ep %p, state %d, so %p, so_state 0x%x, sb_cc %u\n",
750			    __func__, ep, state_read(&ep->com), ep->com.so,
751			    ep->com.so->so_state, ep->com.so->so_rcv.sb_cc);
752		break;
753	}
754	if (disconnect)
755		c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
756
757}
758
759static void
760process_connected(struct c4iw_ep *ep)
761{
762
763	if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) {
764		if (send_mpa_req(ep))
765			goto err;
766	}
767	else {
768		connect_reply_upcall(ep, -ep->com.so->so_error);
769		goto err;
770	}
771	return;
772err:
773	close_socket(&ep->com, 0);
774	state_set(&ep->com, DEAD);
775	c4iw_put_ep(&ep->com);
776	return;
777}
778
779void
780process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so)
781{
782	struct c4iw_ep *child_ep;
783	struct sockaddr_in *local;
784	struct sockaddr_in *remote;
785	struct c4iw_ep *parent_ep = parent_cm_id->provider_data;
786	int ret = 0;
787
788	if (!child_so) {
789		CTR4(KTR_IW_CXGBE,
790		    "%s: parent so %p, parent ep %p, child so %p, invalid so",
791		    __func__, parent_ep->com.so, parent_ep, child_so);
792		log(LOG_ERR, "%s: invalid child socket\n", __func__);
793		return;
794	}
795	child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT);
796	if (!child_ep) {
797		CTR3(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, ENOMEM",
798		    __func__, parent_ep->com.so, parent_ep);
799		log(LOG_ERR, "%s: failed to allocate ep entry\n", __func__);
800		return;
801	}
802	SOCKBUF_LOCK(&child_so->so_rcv);
803	soupcall_set(child_so, SO_RCV, c4iw_so_upcall, child_ep);
804	SOCKBUF_UNLOCK(&child_so->so_rcv);
805
806	CTR5(KTR_IW_CXGBE,
807	    "%s: parent so %p, parent ep %p, child so %p, child ep %p",
808	     __func__, parent_ep->com.so, parent_ep, child_so, child_ep);
809
810	in_getsockaddr(child_so, (struct sockaddr **)&local);
811	in_getpeeraddr(child_so, (struct sockaddr **)&remote);
812
813	child_ep->com.local_addr = *local;
814	child_ep->com.remote_addr = *remote;
815	child_ep->com.dev = parent_ep->com.dev;
816	child_ep->com.so = child_so;
817	child_ep->com.cm_id = NULL;
818	child_ep->com.thread = parent_ep->com.thread;
819	child_ep->parent_ep = parent_ep;
820
821	free(local, M_SONAME);
822	free(remote, M_SONAME);
823
824	c4iw_get_ep(&parent_ep->com);
825	init_timer(&child_ep->timer);
826	state_set(&child_ep->com, MPA_REQ_WAIT);
827	START_EP_TIMER(child_ep);
828
829	/* maybe the request has already been queued up on the socket... */
830	ret = process_mpa_request(child_ep);
831	if (ret == 2)
832		/* ABORT */
833		c4iw_ep_disconnect(child_ep, 1, GFP_KERNEL);
834	else if (ret == 1)
835		/* CLOSE */
836		c4iw_ep_disconnect(child_ep, 0, GFP_KERNEL);
837
838	return;
839}
840
841static int
842c4iw_so_upcall(struct socket *so, void *arg, int waitflag)
843{
844	struct c4iw_ep *ep = arg;
845
846	spin_lock(&req_lock);
847
848	CTR6(KTR_IW_CXGBE,
849	    "%s: so %p, so_state 0x%x, ep %p, ep_state %s, tqe_prev %p",
850	    __func__, so, so->so_state, ep, states[ep->com.state],
851	    ep->com.entry.tqe_prev);
852
853	if (ep && ep->com.so && !ep->com.entry.tqe_prev) {
854		KASSERT(ep->com.so == so, ("%s: XXX review.", __func__));
855		c4iw_get_ep(&ep->com);
856		TAILQ_INSERT_TAIL(&req_list, &ep->com, entry);
857		queue_work(c4iw_taskq, &c4iw_task);
858	}
859
860	spin_unlock(&req_lock);
861	return (SU_OK);
862}
863
864static void
865process_socket_event(struct c4iw_ep *ep)
866{
867	int state = state_read(&ep->com);
868	struct socket *so = ep->com.so;
869
870	CTR6(KTR_IW_CXGBE, "process_socket_event: so %p, so_state 0x%x, "
871	    "so_err %d, sb_state 0x%x, ep %p, ep_state %s", so, so->so_state,
872	    so->so_error, so->so_rcv.sb_state, ep, states[state]);
873
874	if (state == CONNECTING) {
875		process_connected(ep);
876		return;
877	}
878
879	if (state == LISTEN) {
880		/* socket listening events are handled at IWCM */
881		CTR3(KTR_IW_CXGBE, "%s Invalid ep state:%u, ep:%p", __func__,
882			    ep->com.state, ep);
883		BUG();
884		return;
885	}
886
887	/* connection error */
888	if (so->so_error) {
889		process_conn_error(ep);
890		return;
891	}
892
893	/* peer close */
894	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state <= CLOSING) {
895		process_peer_close(ep);
896		/*
897		 * check whether socket disconnect event is pending before
898		 * returning. Fallthrough if yes.
899		 */
900		if (!(so->so_state & SS_ISDISCONNECTED))
901			return;
902	}
903
904	/* close complete */
905	if (so->so_state & SS_ISDISCONNECTED) {
906		process_close_complete(ep);
907		return;
908	}
909
910	/* rx data */
911	process_data(ep);
912}
913
914SYSCTL_NODE(_hw, OID_AUTO, iw_cxgbe, CTLFLAG_RD, 0, "iw_cxgbe driver parameters");
915
916static int dack_mode = 0;
917TUNABLE_INT("hw.iw_cxgbe.dack_mode", &dack_mode);
918SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, dack_mode, CTLFLAG_RW, &dack_mode, 0,
919		"Delayed ack mode (default = 0)");
920
921int c4iw_max_read_depth = 8;
922TUNABLE_INT("hw.iw_cxgbe.c4iw_max_read_depth", &c4iw_max_read_depth);
923SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_max_read_depth, CTLFLAG_RW, &c4iw_max_read_depth, 0,
924		"Per-connection max ORD/IRD (default = 8)");
925
926static int enable_tcp_timestamps;
927TUNABLE_INT("hw.iw_cxgbe.enable_tcp_timestamps", &enable_tcp_timestamps);
928SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_timestamps, CTLFLAG_RW, &enable_tcp_timestamps, 0,
929		"Enable tcp timestamps (default = 0)");
930
931static int enable_tcp_sack;
932TUNABLE_INT("hw.iw_cxgbe.enable_tcp_sack", &enable_tcp_sack);
933SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_sack, CTLFLAG_RW, &enable_tcp_sack, 0,
934		"Enable tcp SACK (default = 0)");
935
936static int enable_tcp_window_scaling = 1;
937TUNABLE_INT("hw.iw_cxgbe.enable_tcp_window_scaling", &enable_tcp_window_scaling);
938SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_window_scaling, CTLFLAG_RW, &enable_tcp_window_scaling, 0,
939		"Enable tcp window scaling (default = 1)");
940
941int c4iw_debug = 1;
942TUNABLE_INT("hw.iw_cxgbe.c4iw_debug", &c4iw_debug);
943SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_debug, CTLFLAG_RW, &c4iw_debug, 0,
944		"Enable debug logging (default = 0)");
945
946static int peer2peer = 1;
947TUNABLE_INT("hw.iw_cxgbe.peer2peer", &peer2peer);
948SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, peer2peer, CTLFLAG_RW, &peer2peer, 0,
949		"Support peer2peer ULPs (default = 1)");
950
951static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
952TUNABLE_INT("hw.iw_cxgbe.p2p_type", &p2p_type);
953SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, p2p_type, CTLFLAG_RW, &p2p_type, 0,
954		"RDMAP opcode to use for the RTR message: 1 = RDMA_READ 0 = RDMA_WRITE (default 1)");
955
956static int ep_timeout_secs = 60;
957TUNABLE_INT("hw.iw_cxgbe.ep_timeout_secs", &ep_timeout_secs);
958SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, ep_timeout_secs, CTLFLAG_RW, &ep_timeout_secs, 0,
959		"CM Endpoint operation timeout in seconds (default = 60)");
960
961static int mpa_rev = 1;
962TUNABLE_INT("hw.iw_cxgbe.mpa_rev", &mpa_rev);
963SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, mpa_rev, CTLFLAG_RW, &mpa_rev, 0,
964		"MPA Revision, 0 supports amso1100, 1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft compliant (default = 1)");
965
966static int markers_enabled;
967TUNABLE_INT("hw.iw_cxgbe.markers_enabled", &markers_enabled);
968SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, markers_enabled, CTLFLAG_RW, &markers_enabled, 0,
969		"Enable MPA MARKERS (default(0) = disabled)");
970
971static int crc_enabled = 1;
972TUNABLE_INT("hw.iw_cxgbe.crc_enabled", &crc_enabled);
973SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, crc_enabled, CTLFLAG_RW, &crc_enabled, 0,
974		"Enable MPA CRC (default(1) = enabled)");
975
976static int rcv_win = 256 * 1024;
977TUNABLE_INT("hw.iw_cxgbe.rcv_win", &rcv_win);
978SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, rcv_win, CTLFLAG_RW, &rcv_win, 0,
979		"TCP receive window in bytes (default = 256KB)");
980
981static int snd_win = 128 * 1024;
982TUNABLE_INT("hw.iw_cxgbe.snd_win", &snd_win);
983SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, snd_win, CTLFLAG_RW, &snd_win, 0,
984		"TCP send window in bytes (default = 128KB)");
985
986static void
987start_ep_timer(struct c4iw_ep *ep)
988{
989
990	if (timer_pending(&ep->timer)) {
991		CTR2(KTR_IW_CXGBE, "%s: ep %p, already started", __func__, ep);
992		printk(KERN_ERR "%s timer already started! ep %p\n", __func__,
993		    ep);
994		return;
995	}
996	clear_bit(TIMEOUT, &ep->com.flags);
997	c4iw_get_ep(&ep->com);
998	ep->timer.expires = jiffies + ep_timeout_secs * HZ;
999	ep->timer.data = (unsigned long)ep;
1000	ep->timer.function = ep_timeout;
1001	add_timer(&ep->timer);
1002}
1003
1004static int
1005stop_ep_timer(struct c4iw_ep *ep)
1006{
1007
1008	del_timer_sync(&ep->timer);
1009	if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
1010		c4iw_put_ep(&ep->com);
1011		return 0;
1012	}
1013	return 1;
1014}
1015
1016static enum
1017c4iw_ep_state state_read(struct c4iw_ep_common *epc)
1018{
1019	enum c4iw_ep_state state;
1020
1021	mutex_lock(&epc->mutex);
1022	state = epc->state;
1023	mutex_unlock(&epc->mutex);
1024
1025	return (state);
1026}
1027
1028static void
1029__state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
1030{
1031
1032	epc->state = new;
1033}
1034
1035static void
1036state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
1037{
1038
1039	mutex_lock(&epc->mutex);
1040	__state_set(epc, new);
1041	mutex_unlock(&epc->mutex);
1042}
1043
1044static void *
1045alloc_ep(int size, gfp_t gfp)
1046{
1047	struct c4iw_ep_common *epc;
1048
1049	epc = kzalloc(size, gfp);
1050	if (epc == NULL)
1051		return (NULL);
1052
1053	kref_init(&epc->kref);
1054	mutex_init(&epc->mutex);
1055	mutex_init(&epc->so_mutex);
1056	c4iw_init_wr_wait(&epc->wr_wait);
1057
1058	return (epc);
1059}
1060
1061void
1062__free_ep(struct c4iw_ep_common *epc)
1063{
1064	CTR2(KTR_IW_CXGBE, "%s:feB %p", __func__, epc);
1065	KASSERT(!epc->so, ("%s warning ep->so %p \n", __func__, epc->so));
1066	KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __func__, epc));
1067	free(epc, M_DEVBUF);
1068	CTR2(KTR_IW_CXGBE, "%s:feE %p", __func__, epc);
1069}
1070
1071void _c4iw_free_ep(struct kref *kref)
1072{
1073	struct c4iw_ep *ep;
1074	struct c4iw_ep_common *epc;
1075
1076	ep = container_of(kref, struct c4iw_ep, com.kref);
1077	epc = &ep->com;
1078	KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list",
1079	    __func__, epc));
1080	if (test_bit(QP_REFERENCED, &ep->com.flags))
1081		deref_qp(ep);
1082	kfree(ep);
1083}
1084
1085static void release_ep_resources(struct c4iw_ep *ep)
1086{
1087	CTR2(KTR_IW_CXGBE, "%s:rerB %p", __func__, ep);
1088	set_bit(RELEASE_RESOURCES, &ep->com.flags);
1089	c4iw_put_ep(&ep->com);
1090	CTR2(KTR_IW_CXGBE, "%s:rerE %p", __func__, ep);
1091}
1092
1093static int
1094send_mpa_req(struct c4iw_ep *ep)
1095{
1096	int mpalen;
1097	struct mpa_message *mpa;
1098	struct mpa_v2_conn_params mpa_v2_params;
1099	struct mbuf *m;
1100	char mpa_rev_to_use = mpa_rev;
1101	int err = 0;
1102
1103	if (ep->retry_with_mpa_v1)
1104		mpa_rev_to_use = 1;
1105	mpalen = sizeof(*mpa) + ep->plen;
1106	if (mpa_rev_to_use == 2)
1107		mpalen += sizeof(struct mpa_v2_conn_params);
1108
1109	mpa = malloc(mpalen, M_CXGBE, M_NOWAIT);
1110	if (mpa == NULL) {
1111		err = -ENOMEM;
1112		CTR3(KTR_IW_CXGBE, "%s:smr1 ep: %p , error: %d",
1113				__func__, ep, err);
1114		goto err;
1115	}
1116
1117	memset(mpa, 0, mpalen);
1118	memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
1119	mpa->flags = (crc_enabled ? MPA_CRC : 0) |
1120		(markers_enabled ? MPA_MARKERS : 0) |
1121		(mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
1122	mpa->private_data_size = htons(ep->plen);
1123	mpa->revision = mpa_rev_to_use;
1124
1125	if (mpa_rev_to_use == 1) {
1126		ep->tried_with_mpa_v1 = 1;
1127		ep->retry_with_mpa_v1 = 0;
1128	}
1129
1130	if (mpa_rev_to_use == 2) {
1131		mpa->private_data_size +=
1132			htons(sizeof(struct mpa_v2_conn_params));
1133		mpa_v2_params.ird = htons((u16)ep->ird);
1134		mpa_v2_params.ord = htons((u16)ep->ord);
1135
1136		if (peer2peer) {
1137			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1138
1139			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) {
1140				mpa_v2_params.ord |=
1141				    htons(MPA_V2_RDMA_WRITE_RTR);
1142			} else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) {
1143				mpa_v2_params.ord |=
1144					htons(MPA_V2_RDMA_READ_RTR);
1145			}
1146		}
1147		memcpy(mpa->private_data, &mpa_v2_params,
1148			sizeof(struct mpa_v2_conn_params));
1149
1150		if (ep->plen) {
1151
1152			memcpy(mpa->private_data +
1153				sizeof(struct mpa_v2_conn_params),
1154				ep->mpa_pkt + sizeof(*mpa), ep->plen);
1155		}
1156	} else {
1157
1158		if (ep->plen)
1159			memcpy(mpa->private_data,
1160					ep->mpa_pkt + sizeof(*mpa), ep->plen);
1161		CTR2(KTR_IW_CXGBE, "%s:smr7 %p", __func__, ep);
1162	}
1163
1164	m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA);
1165	if (m == NULL) {
1166		err = -ENOMEM;
1167		CTR3(KTR_IW_CXGBE, "%s:smr2 ep: %p , error: %d",
1168				__func__, ep, err);
1169		free(mpa, M_CXGBE);
1170		goto err;
1171	}
1172	m_copyback(m, 0, mpalen, (void *)mpa);
1173	free(mpa, M_CXGBE);
1174
1175	err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
1176			ep->com.thread);
1177	if (err) {
1178		CTR3(KTR_IW_CXGBE, "%s:smr3 ep: %p , error: %d",
1179				__func__, ep, err);
1180		goto err;
1181	}
1182
1183	START_EP_TIMER(ep);
1184	state_set(&ep->com, MPA_REQ_SENT);
1185	ep->mpa_attr.initiator = 1;
1186	CTR3(KTR_IW_CXGBE, "%s:smrE %p, error: %d", __func__, ep, err);
1187	return 0;
1188err:
1189	connect_reply_upcall(ep, err);
1190	CTR3(KTR_IW_CXGBE, "%s:smrE %p, error: %d", __func__, ep, err);
1191	return err;
1192}
1193
1194static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
1195{
1196	int mpalen ;
1197	struct mpa_message *mpa;
1198	struct mpa_v2_conn_params mpa_v2_params;
1199	struct mbuf *m;
1200	int err;
1201
1202	CTR4(KTR_IW_CXGBE, "%s:smrejB %p %u %d", __func__, ep, ep->hwtid,
1203	    ep->plen);
1204
1205	mpalen = sizeof(*mpa) + plen;
1206
1207	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1208
1209		mpalen += sizeof(struct mpa_v2_conn_params);
1210		CTR4(KTR_IW_CXGBE, "%s:smrej1 %p %u %d", __func__, ep,
1211		    ep->mpa_attr.version, mpalen);
1212	}
1213
1214	mpa = malloc(mpalen, M_CXGBE, M_NOWAIT);
1215	if (mpa == NULL)
1216		return (-ENOMEM);
1217
1218	memset(mpa, 0, mpalen);
1219	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1220	mpa->flags = MPA_REJECT;
1221	mpa->revision = mpa_rev;
1222	mpa->private_data_size = htons(plen);
1223
1224	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1225
1226		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1227		mpa->private_data_size +=
1228			htons(sizeof(struct mpa_v2_conn_params));
1229		mpa_v2_params.ird = htons(((u16)ep->ird) |
1230				(peer2peer ? MPA_V2_PEER2PEER_MODEL :
1231				 0));
1232		mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
1233					(p2p_type ==
1234					 FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
1235					 MPA_V2_RDMA_WRITE_RTR : p2p_type ==
1236					 FW_RI_INIT_P2PTYPE_READ_REQ ?
1237					 MPA_V2_RDMA_READ_RTR : 0) : 0));
1238		memcpy(mpa->private_data, &mpa_v2_params,
1239				sizeof(struct mpa_v2_conn_params));
1240
1241		if (ep->plen)
1242			memcpy(mpa->private_data +
1243					sizeof(struct mpa_v2_conn_params), pdata, plen);
1244		CTR5(KTR_IW_CXGBE, "%s:smrej3 %p %d %d %d", __func__, ep,
1245		    mpa_v2_params.ird, mpa_v2_params.ord, ep->plen);
1246	} else
1247		if (plen)
1248			memcpy(mpa->private_data, pdata, plen);
1249
1250	m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA);
1251	if (m == NULL) {
1252		free(mpa, M_CXGBE);
1253		return (-ENOMEM);
1254	}
1255	m_copyback(m, 0, mpalen, (void *)mpa);
1256	free(mpa, M_CXGBE);
1257
1258	err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
1259	if (!err)
1260		ep->snd_seq += mpalen;
1261	CTR4(KTR_IW_CXGBE, "%s:smrejE %p %u %d", __func__, ep, ep->hwtid, err);
1262	return err;
1263}
1264
1265static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
1266{
1267	int mpalen;
1268	struct mpa_message *mpa;
1269	struct mbuf *m;
1270	struct mpa_v2_conn_params mpa_v2_params;
1271	int err;
1272
1273	CTR2(KTR_IW_CXGBE, "%s:smrepB %p", __func__, ep);
1274
1275	mpalen = sizeof(*mpa) + plen;
1276
1277	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1278
1279		CTR3(KTR_IW_CXGBE, "%s:smrep1 %p %d", __func__, ep,
1280		    ep->mpa_attr.version);
1281		mpalen += sizeof(struct mpa_v2_conn_params);
1282	}
1283
1284	mpa = malloc(mpalen, M_CXGBE, M_NOWAIT);
1285	if (mpa == NULL)
1286		return (-ENOMEM);
1287
1288	memset(mpa, 0, sizeof(*mpa));
1289	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1290	mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
1291		(markers_enabled ? MPA_MARKERS : 0);
1292	mpa->revision = ep->mpa_attr.version;
1293	mpa->private_data_size = htons(plen);
1294
1295	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1296
1297		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1298		mpa->private_data_size +=
1299			htons(sizeof(struct mpa_v2_conn_params));
1300		mpa_v2_params.ird = htons((u16)ep->ird);
1301		mpa_v2_params.ord = htons((u16)ep->ord);
1302		CTR5(KTR_IW_CXGBE, "%s:smrep3 %p %d %d %d", __func__, ep,
1303		    ep->mpa_attr.version, mpa_v2_params.ird, mpa_v2_params.ord);
1304
1305		if (peer2peer && (ep->mpa_attr.p2p_type !=
1306			FW_RI_INIT_P2PTYPE_DISABLED)) {
1307
1308			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1309
1310			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) {
1311
1312				mpa_v2_params.ord |=
1313					htons(MPA_V2_RDMA_WRITE_RTR);
1314				CTR5(KTR_IW_CXGBE, "%s:smrep4 %p %d %d %d",
1315				    __func__, ep, p2p_type, mpa_v2_params.ird,
1316				    mpa_v2_params.ord);
1317			}
1318			else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) {
1319
1320				mpa_v2_params.ord |=
1321					htons(MPA_V2_RDMA_READ_RTR);
1322				CTR5(KTR_IW_CXGBE, "%s:smrep5 %p %d %d %d",
1323				    __func__, ep, p2p_type, mpa_v2_params.ird,
1324				    mpa_v2_params.ord);
1325			}
1326		}
1327
1328		memcpy(mpa->private_data, &mpa_v2_params,
1329			sizeof(struct mpa_v2_conn_params));
1330
1331		if (ep->plen)
1332			memcpy(mpa->private_data +
1333				sizeof(struct mpa_v2_conn_params), pdata, plen);
1334	} else
1335		if (plen)
1336			memcpy(mpa->private_data, pdata, plen);
1337
1338	m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA);
1339	if (m == NULL) {
1340		free(mpa, M_CXGBE);
1341		return (-ENOMEM);
1342	}
1343	m_copyback(m, 0, mpalen, (void *)mpa);
1344	free(mpa, M_CXGBE);
1345
1346
1347	state_set(&ep->com, MPA_REP_SENT);
1348	ep->snd_seq += mpalen;
1349	err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
1350			ep->com.thread);
1351	CTR3(KTR_IW_CXGBE, "%s:smrepE %p %d", __func__, ep, err);
1352	return err;
1353}
1354
1355
1356
1357static void close_complete_upcall(struct c4iw_ep *ep, int status)
1358{
1359	struct iw_cm_event event;
1360
1361	CTR2(KTR_IW_CXGBE, "%s:ccuB %p", __func__, ep);
1362	memset(&event, 0, sizeof(event));
1363	event.event = IW_CM_EVENT_CLOSE;
1364	event.status = status;
1365
1366	if (ep->com.cm_id) {
1367
1368		CTR2(KTR_IW_CXGBE, "%s:ccu1 %1", __func__, ep);
1369		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1370		deref_cm_id(&ep->com);
1371		set_bit(CLOSE_UPCALL, &ep->com.history);
1372	}
1373	CTR2(KTR_IW_CXGBE, "%s:ccuE %p", __func__, ep);
1374}
1375
1376static int send_abort(struct c4iw_ep *ep)
1377{
1378	int err;
1379
1380	CTR2(KTR_IW_CXGBE, "%s:abB %p", __func__, ep);
1381	abort_socket(ep);
1382
1383	/*
1384	 * Since socket options were set as l_onoff=1 and l_linger=0 in in
1385	 * abort_socket, invoking soclose here sends a RST (reset) to the peer.
1386	 */
1387	err = close_socket(&ep->com, 1);
1388	set_bit(ABORT_CONN, &ep->com.history);
1389	CTR2(KTR_IW_CXGBE, "%s:abE %p", __func__, ep);
1390
1391	/*
1392	 * TBD: iw_cgbe driver should receive ABORT reply for every ABORT
1393	 * request it has sent. But the current TOE driver is not propagating
1394	 * this ABORT reply event (via do_abort_rpl) to iw_cxgbe. So as a work-
1395	 * around de-refer 'ep' (which was refered before sending ABORT request)
1396	 * here instead of doing it in abort_rpl() handler of iw_cxgbe driver.
1397	 */
1398	c4iw_put_ep(&ep->com);
1399	return err;
1400}
1401
1402static void peer_close_upcall(struct c4iw_ep *ep)
1403{
1404	struct iw_cm_event event;
1405
1406	CTR2(KTR_IW_CXGBE, "%s:pcuB %p", __func__, ep);
1407	memset(&event, 0, sizeof(event));
1408	event.event = IW_CM_EVENT_DISCONNECT;
1409
1410	if (ep->com.cm_id) {
1411
1412		CTR2(KTR_IW_CXGBE, "%s:pcu1 %p", __func__, ep);
1413		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1414		set_bit(DISCONN_UPCALL, &ep->com.history);
1415	}
1416	CTR2(KTR_IW_CXGBE, "%s:pcuE %p", __func__, ep);
1417}
1418
1419static void peer_abort_upcall(struct c4iw_ep *ep)
1420{
1421	struct iw_cm_event event;
1422
1423	CTR2(KTR_IW_CXGBE, "%s:pauB %p", __func__, ep);
1424	memset(&event, 0, sizeof(event));
1425	event.event = IW_CM_EVENT_CLOSE;
1426	event.status = -ECONNRESET;
1427
1428	if (ep->com.cm_id) {
1429
1430		CTR2(KTR_IW_CXGBE, "%s:pau1 %p", __func__, ep);
1431		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1432		deref_cm_id(&ep->com);
1433		set_bit(ABORT_UPCALL, &ep->com.history);
1434	}
1435	CTR2(KTR_IW_CXGBE, "%s:pauE %p", __func__, ep);
1436}
1437
1438static void connect_reply_upcall(struct c4iw_ep *ep, int status)
1439{
1440	struct iw_cm_event event;
1441
1442	CTR3(KTR_IW_CXGBE, "%s:cruB %p, status: %d", __func__, ep, status);
1443	memset(&event, 0, sizeof(event));
1444	event.event = IW_CM_EVENT_CONNECT_REPLY;
1445	event.status = ((status == -ECONNABORTED) || (status == -EPIPE)) ?
1446					-ECONNRESET : status;
1447	event.local_addr = ep->com.local_addr;
1448	event.remote_addr = ep->com.remote_addr;
1449
1450	if ((status == 0) || (status == -ECONNREFUSED)) {
1451
1452		if (!ep->tried_with_mpa_v1) {
1453
1454			CTR2(KTR_IW_CXGBE, "%s:cru1 %p", __func__, ep);
1455			/* this means MPA_v2 is used */
1456			event.private_data_len = ep->plen -
1457				sizeof(struct mpa_v2_conn_params);
1458			event.private_data = ep->mpa_pkt +
1459				sizeof(struct mpa_message) +
1460				sizeof(struct mpa_v2_conn_params);
1461		} else {
1462
1463			CTR2(KTR_IW_CXGBE, "%s:cru2 %p", __func__, ep);
1464			/* this means MPA_v1 is used */
1465			event.private_data_len = ep->plen;
1466			event.private_data = ep->mpa_pkt +
1467				sizeof(struct mpa_message);
1468		}
1469	}
1470
1471	if (ep->com.cm_id) {
1472
1473		CTR2(KTR_IW_CXGBE, "%s:cru3 %p", __func__, ep);
1474		set_bit(CONN_RPL_UPCALL, &ep->com.history);
1475		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1476	}
1477
1478	if(status == -ECONNABORTED) {
1479
1480		CTR3(KTR_IW_CXGBE, "%s:cruE %p %d", __func__, ep, status);
1481		return;
1482	}
1483
1484	if (status < 0) {
1485
1486		CTR3(KTR_IW_CXGBE, "%s:cru4 %p %d", __func__, ep, status);
1487		deref_cm_id(&ep->com);
1488	}
1489
1490	CTR2(KTR_IW_CXGBE, "%s:cruE %p", __func__, ep);
1491}
1492
1493static int connect_request_upcall(struct c4iw_ep *ep)
1494{
1495	struct iw_cm_event event;
1496	int ret;
1497
1498	CTR3(KTR_IW_CXGBE, "%s: ep %p, mpa_v1 %d", __func__, ep,
1499	    ep->tried_with_mpa_v1);
1500
1501	memset(&event, 0, sizeof(event));
1502	event.event = IW_CM_EVENT_CONNECT_REQUEST;
1503	event.local_addr = ep->com.local_addr;
1504	event.remote_addr = ep->com.remote_addr;
1505	event.provider_data = ep;
1506	event.so = ep->com.so;
1507
1508	if (!ep->tried_with_mpa_v1) {
1509		/* this means MPA_v2 is used */
1510		event.ord = ep->ord;
1511		event.ird = ep->ird;
1512		event.private_data_len = ep->plen -
1513			sizeof(struct mpa_v2_conn_params);
1514		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
1515			sizeof(struct mpa_v2_conn_params);
1516	} else {
1517
1518		/* this means MPA_v1 is used. Send max supported */
1519		event.ord = c4iw_max_read_depth;
1520		event.ird = c4iw_max_read_depth;
1521		event.private_data_len = ep->plen;
1522		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
1523	}
1524
1525	c4iw_get_ep(&ep->com);
1526	ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
1527	    &event);
1528	if(ret)
1529		c4iw_put_ep(&ep->com);
1530
1531	set_bit(CONNREQ_UPCALL, &ep->com.history);
1532	c4iw_put_ep(&ep->parent_ep->com);
1533	return ret;
1534}
1535
1536static void established_upcall(struct c4iw_ep *ep)
1537{
1538	struct iw_cm_event event;
1539
1540	CTR2(KTR_IW_CXGBE, "%s:euB %p", __func__, ep);
1541	memset(&event, 0, sizeof(event));
1542	event.event = IW_CM_EVENT_ESTABLISHED;
1543	event.ird = ep->ird;
1544	event.ord = ep->ord;
1545
1546	if (ep->com.cm_id) {
1547
1548		CTR2(KTR_IW_CXGBE, "%s:eu1 %p", __func__, ep);
1549		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1550		set_bit(ESTAB_UPCALL, &ep->com.history);
1551	}
1552	CTR2(KTR_IW_CXGBE, "%s:euE %p", __func__, ep);
1553}
1554
1555
1556/*
1557 * process_mpa_reply - process streaming mode MPA reply
1558 *
1559 * Returns:
1560 *
1561 * 0 upon success indicating a connect request was delivered to the ULP
1562 * or the mpa request is incomplete but valid so far.
1563 *
1564 * 1 if a failure requires the caller to close the connection.
1565 *
1566 * 2 if a failure requires the caller to abort the connection.
1567 */
1568static int process_mpa_reply(struct c4iw_ep *ep)
1569{
1570	struct mpa_message *mpa;
1571	struct mpa_v2_conn_params *mpa_v2_params;
1572	u16 plen;
1573	u16 resp_ird, resp_ord;
1574	u8 rtr_mismatch = 0, insuff_ird = 0;
1575	struct c4iw_qp_attributes attrs;
1576	enum c4iw_qp_attr_mask mask;
1577	int err;
1578	struct mbuf *top, *m;
1579	int flags = MSG_DONTWAIT;
1580	struct uio uio;
1581	int disconnect = 0;
1582
1583	CTR2(KTR_IW_CXGBE, "%s:pmrB %p", __func__, ep);
1584
1585	/*
1586	 * Stop mpa timer.  If it expired, then
1587	 * we ignore the MPA reply.  process_timeout()
1588	 * will abort the connection.
1589	 */
1590	if (STOP_EP_TIMER(ep))
1591		return 0;
1592
1593	uio.uio_resid = 1000000;
1594	uio.uio_td = ep->com.thread;
1595	err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
1596
1597	if (err) {
1598
1599		if (err == EWOULDBLOCK) {
1600
1601			CTR2(KTR_IW_CXGBE, "%s:pmr1 %p", __func__, ep);
1602			START_EP_TIMER(ep);
1603			return 0;
1604		}
1605		err = -err;
1606		CTR2(KTR_IW_CXGBE, "%s:pmr2 %p", __func__, ep);
1607		goto err;
1608	}
1609
1610	if (ep->com.so->so_rcv.sb_mb) {
1611
1612		CTR2(KTR_IW_CXGBE, "%s:pmr3 %p", __func__, ep);
1613		printf("%s data after soreceive called! so %p sb_mb %p top %p\n",
1614		       __func__, ep->com.so, ep->com.so->so_rcv.sb_mb, top);
1615	}
1616
1617	m = top;
1618
1619	do {
1620
1621		CTR2(KTR_IW_CXGBE, "%s:pmr4 %p", __func__, ep);
1622		/*
1623		 * If we get more than the supported amount of private data
1624		 * then we must fail this connection.
1625		 */
1626		if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
1627
1628			CTR3(KTR_IW_CXGBE, "%s:pmr5 %p %d", __func__, ep,
1629			    ep->mpa_pkt_len + m->m_len);
1630			err = (-EINVAL);
1631			goto err_stop_timer;
1632		}
1633
1634		/*
1635		 * copy the new data into our accumulation buffer.
1636		 */
1637		m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
1638		ep->mpa_pkt_len += m->m_len;
1639		if (!m->m_next)
1640			m = m->m_nextpkt;
1641		else
1642			m = m->m_next;
1643	} while (m);
1644
1645	m_freem(top);
1646	/*
1647	 * if we don't even have the mpa message, then bail.
1648	 */
1649	if (ep->mpa_pkt_len < sizeof(*mpa)) {
1650		return 0;
1651	}
1652	mpa = (struct mpa_message *) ep->mpa_pkt;
1653
1654	/* Validate MPA header. */
1655	if (mpa->revision > mpa_rev) {
1656
1657		CTR4(KTR_IW_CXGBE, "%s:pmr6 %p %d %d", __func__, ep,
1658		    mpa->revision, mpa_rev);
1659		printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d, "
1660				" Received = %d\n", __func__, mpa_rev, mpa->revision);
1661		err = -EPROTO;
1662		goto err_stop_timer;
1663	}
1664
1665	if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
1666
1667		CTR2(KTR_IW_CXGBE, "%s:pmr7 %p", __func__, ep);
1668		err = -EPROTO;
1669		goto err_stop_timer;
1670	}
1671
1672	plen = ntohs(mpa->private_data_size);
1673
1674	/*
1675	 * Fail if there's too much private data.
1676	 */
1677	if (plen > MPA_MAX_PRIVATE_DATA) {
1678
1679		CTR2(KTR_IW_CXGBE, "%s:pmr8 %p", __func__, ep);
1680		err = -EPROTO;
1681		goto err_stop_timer;
1682	}
1683
1684	/*
1685	 * If plen does not account for pkt size
1686	 */
1687	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1688
1689		CTR2(KTR_IW_CXGBE, "%s:pmr9 %p", __func__, ep);
1690		STOP_EP_TIMER(ep);
1691		err = -EPROTO;
1692		goto err_stop_timer;
1693	}
1694
1695	ep->plen = (u8) plen;
1696
1697	/*
1698	 * If we don't have all the pdata yet, then bail.
1699	 * We'll continue process when more data arrives.
1700	 */
1701	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) {
1702
1703		CTR2(KTR_IW_CXGBE, "%s:pmra %p", __func__, ep);
1704		return 0;
1705	}
1706
1707	if (mpa->flags & MPA_REJECT) {
1708
1709		CTR2(KTR_IW_CXGBE, "%s:pmrb %p", __func__, ep);
1710		err = -ECONNREFUSED;
1711		goto err_stop_timer;
1712	}
1713
1714	/*
1715	 * If we get here we have accumulated the entire mpa
1716	 * start reply message including private data. And
1717	 * the MPA header is valid.
1718	 */
1719	state_set(&ep->com, FPDU_MODE);
1720	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1721	ep->mpa_attr.recv_marker_enabled = markers_enabled;
1722	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1723	ep->mpa_attr.version = mpa->revision;
1724	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1725
1726	if (mpa->revision == 2) {
1727
1728		CTR2(KTR_IW_CXGBE, "%s:pmrc %p", __func__, ep);
1729		ep->mpa_attr.enhanced_rdma_conn =
1730			mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1731
1732		if (ep->mpa_attr.enhanced_rdma_conn) {
1733
1734			CTR2(KTR_IW_CXGBE, "%s:pmrd %p", __func__, ep);
1735			mpa_v2_params = (struct mpa_v2_conn_params *)
1736				(ep->mpa_pkt + sizeof(*mpa));
1737			resp_ird = ntohs(mpa_v2_params->ird) &
1738				MPA_V2_IRD_ORD_MASK;
1739			resp_ord = ntohs(mpa_v2_params->ord) &
1740				MPA_V2_IRD_ORD_MASK;
1741
1742			/*
1743			 * This is a double-check. Ideally, below checks are
1744			 * not required since ird/ord stuff has been taken
1745			 * care of in c4iw_accept_cr
1746			 */
1747			if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) {
1748
1749				CTR2(KTR_IW_CXGBE, "%s:pmre %p", __func__, ep);
1750				err = -ENOMEM;
1751				ep->ird = resp_ord;
1752				ep->ord = resp_ird;
1753				insuff_ird = 1;
1754			}
1755
1756			if (ntohs(mpa_v2_params->ird) &
1757				MPA_V2_PEER2PEER_MODEL) {
1758
1759				CTR2(KTR_IW_CXGBE, "%s:pmrf %p", __func__, ep);
1760				if (ntohs(mpa_v2_params->ord) &
1761					MPA_V2_RDMA_WRITE_RTR) {
1762
1763					CTR2(KTR_IW_CXGBE, "%s:pmrg %p", __func__, ep);
1764					ep->mpa_attr.p2p_type =
1765						FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1766				}
1767				else if (ntohs(mpa_v2_params->ord) &
1768					MPA_V2_RDMA_READ_RTR) {
1769
1770					CTR2(KTR_IW_CXGBE, "%s:pmrh %p", __func__, ep);
1771					ep->mpa_attr.p2p_type =
1772						FW_RI_INIT_P2PTYPE_READ_REQ;
1773				}
1774			}
1775		}
1776	} else {
1777
1778		CTR2(KTR_IW_CXGBE, "%s:pmri %p", __func__, ep);
1779
1780		if (mpa->revision == 1) {
1781
1782			CTR2(KTR_IW_CXGBE, "%s:pmrj %p", __func__, ep);
1783
1784			if (peer2peer) {
1785
1786				CTR2(KTR_IW_CXGBE, "%s:pmrk %p", __func__, ep);
1787				ep->mpa_attr.p2p_type = p2p_type;
1788			}
1789		}
1790	}
1791
1792	if (set_tcpinfo(ep)) {
1793
1794		CTR2(KTR_IW_CXGBE, "%s:pmrl %p", __func__, ep);
1795		printf("%s set_tcpinfo error\n", __func__);
1796		err = -ECONNRESET;
1797		goto err;
1798	}
1799
1800	CTR6(KTR_IW_CXGBE, "%s - crc_enabled = %d, recv_marker_enabled = %d, "
1801	    "xmit_marker_enabled = %d, version = %d p2p_type = %d", __func__,
1802	    ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
1803	    ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1804	    ep->mpa_attr.p2p_type);
1805
1806	/*
1807	 * If responder's RTR does not match with that of initiator, assign
1808	 * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
1809	 * generated when moving QP to RTS state.
1810	 * A TERM message will be sent after QP has moved to RTS state
1811	 */
1812	if ((ep->mpa_attr.version == 2) && peer2peer &&
1813		(ep->mpa_attr.p2p_type != p2p_type)) {
1814
1815		CTR2(KTR_IW_CXGBE, "%s:pmrm %p", __func__, ep);
1816		ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1817		rtr_mismatch = 1;
1818	}
1819
1820
1821	//ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq;
1822	attrs.mpa_attr = ep->mpa_attr;
1823	attrs.max_ird = ep->ird;
1824	attrs.max_ord = ep->ord;
1825	attrs.llp_stream_handle = ep;
1826	attrs.next_state = C4IW_QP_STATE_RTS;
1827
1828	mask = C4IW_QP_ATTR_NEXT_STATE |
1829		C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR |
1830		C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD;
1831
1832	/* bind QP and TID with INIT_WR */
1833	err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1);
1834
1835	if (err) {
1836
1837		CTR2(KTR_IW_CXGBE, "%s:pmrn %p", __func__, ep);
1838		goto err;
1839	}
1840
1841	/*
1842	 * If responder's RTR requirement did not match with what initiator
1843	 * supports, generate TERM message
1844	 */
1845	if (rtr_mismatch) {
1846
1847		CTR2(KTR_IW_CXGBE, "%s:pmro %p", __func__, ep);
1848		printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
1849		attrs.layer_etype = LAYER_MPA | DDP_LLP;
1850		attrs.ecode = MPA_NOMATCH_RTR;
1851		attrs.next_state = C4IW_QP_STATE_TERMINATE;
1852		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1853			C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
1854		err = -ENOMEM;
1855		disconnect = 1;
1856		goto out;
1857	}
1858
1859	/*
1860	 * Generate TERM if initiator IRD is not sufficient for responder
1861	 * provided ORD. Currently, we do the same behaviour even when
1862	 * responder provided IRD is also not sufficient as regards to
1863	 * initiator ORD.
1864	 */
1865	if (insuff_ird) {
1866
1867		CTR2(KTR_IW_CXGBE, "%s:pmrp %p", __func__, ep);
1868		printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
1869				__func__);
1870		attrs.layer_etype = LAYER_MPA | DDP_LLP;
1871		attrs.ecode = MPA_INSUFF_IRD;
1872		attrs.next_state = C4IW_QP_STATE_TERMINATE;
1873		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1874			C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
1875		err = -ENOMEM;
1876		disconnect = 1;
1877		goto out;
1878	}
1879	goto out;
1880err_stop_timer:
1881	STOP_EP_TIMER(ep);
1882err:
1883	disconnect = 2;
1884out:
1885	connect_reply_upcall(ep, err);
1886	CTR2(KTR_IW_CXGBE, "%s:pmrE %p", __func__, ep);
1887	return disconnect;
1888}
1889
1890/*
1891 * process_mpa_request - process streaming mode MPA request
1892 *
1893 * Returns:
1894 *
1895 * 0 upon success indicating a connect request was delivered to the ULP
1896 * or the mpa request is incomplete but valid so far.
1897 *
1898 * 1 if a failure requires the caller to close the connection.
1899 *
1900 * 2 if a failure requires the caller to abort the connection.
1901 */
1902static int
1903process_mpa_request(struct c4iw_ep *ep)
1904{
1905	struct mpa_message *mpa;
1906	u16 plen;
1907	int flags = MSG_DONTWAIT;
1908	int rc;
1909	struct iovec iov;
1910	struct uio uio;
1911	enum c4iw_ep_state state = state_read(&ep->com);
1912
1913	CTR3(KTR_IW_CXGBE, "%s: ep %p, state %s", __func__, ep, states[state]);
1914
1915	if (state != MPA_REQ_WAIT)
1916		return 0;
1917
1918	iov.iov_base = &ep->mpa_pkt[ep->mpa_pkt_len];
1919	iov.iov_len = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len;
1920	uio.uio_iov = &iov;
1921	uio.uio_iovcnt = 1;
1922	uio.uio_offset = 0;
1923	uio.uio_resid = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len;
1924	uio.uio_segflg = UIO_SYSSPACE;
1925	uio.uio_rw = UIO_READ;
1926	uio.uio_td = NULL; /* uio.uio_td = ep->com.thread; */
1927
1928	rc = soreceive(ep->com.so, NULL, &uio, NULL, NULL, &flags);
1929	if (rc == EAGAIN)
1930		return 0;
1931	else if (rc)
1932		goto err_stop_timer;
1933
1934	KASSERT(uio.uio_offset > 0, ("%s: sorecieve on so %p read no data",
1935	    __func__, ep->com.so));
1936	ep->mpa_pkt_len += uio.uio_offset;
1937
1938	/*
1939	 * If we get more than the supported amount of private data then we must
1940	 * fail this connection.  XXX: check so_rcv->sb_cc, or peek with another
1941	 * soreceive, or increase the size of mpa_pkt by 1 and abort if the last
1942	 * byte is filled by the soreceive above.
1943	 */
1944
1945	/* Don't even have the MPA message.  Wait for more data to arrive. */
1946	if (ep->mpa_pkt_len < sizeof(*mpa))
1947		return 0;
1948	mpa = (struct mpa_message *) ep->mpa_pkt;
1949
1950	/*
1951	 * Validate MPA Header.
1952	 */
1953	if (mpa->revision > mpa_rev) {
1954		log(LOG_ERR, "%s: MPA version mismatch. Local = %d,"
1955		    " Received = %d\n", __func__, mpa_rev, mpa->revision);
1956		goto err_stop_timer;
1957	}
1958
1959	if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)))
1960		goto err_stop_timer;
1961
1962	/*
1963	 * Fail if there's too much private data.
1964	 */
1965	plen = ntohs(mpa->private_data_size);
1966	if (plen > MPA_MAX_PRIVATE_DATA)
1967		goto err_stop_timer;
1968
1969	/*
1970	 * If plen does not account for pkt size
1971	 */
1972	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen))
1973		goto err_stop_timer;
1974
1975	ep->plen = (u8) plen;
1976
1977	/*
1978	 * If we don't have all the pdata yet, then bail.
1979	 */
1980	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1981		return 0;
1982
1983	/*
1984	 * If we get here we have accumulated the entire mpa
1985	 * start reply message including private data.
1986	 */
1987	ep->mpa_attr.initiator = 0;
1988	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1989	ep->mpa_attr.recv_marker_enabled = markers_enabled;
1990	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1991	ep->mpa_attr.version = mpa->revision;
1992	if (mpa->revision == 1)
1993		ep->tried_with_mpa_v1 = 1;
1994	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1995
1996	if (mpa->revision == 2) {
1997		ep->mpa_attr.enhanced_rdma_conn =
1998		    mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1999		if (ep->mpa_attr.enhanced_rdma_conn) {
2000			struct mpa_v2_conn_params *mpa_v2_params;
2001			u16 ird, ord;
2002
2003			mpa_v2_params = (void *)&ep->mpa_pkt[sizeof(*mpa)];
2004			ird = ntohs(mpa_v2_params->ird);
2005			ord = ntohs(mpa_v2_params->ord);
2006
2007			ep->ird = ird & MPA_V2_IRD_ORD_MASK;
2008			ep->ord = ord & MPA_V2_IRD_ORD_MASK;
2009			if (ird & MPA_V2_PEER2PEER_MODEL && peer2peer) {
2010				if (ord & MPA_V2_RDMA_WRITE_RTR) {
2011					ep->mpa_attr.p2p_type =
2012					    FW_RI_INIT_P2PTYPE_RDMA_WRITE;
2013				} else if (ord & MPA_V2_RDMA_READ_RTR) {
2014					ep->mpa_attr.p2p_type =
2015					    FW_RI_INIT_P2PTYPE_READ_REQ;
2016				}
2017			}
2018		}
2019	} else if (mpa->revision == 1 && peer2peer)
2020		ep->mpa_attr.p2p_type = p2p_type;
2021
2022	if (set_tcpinfo(ep))
2023		goto err_stop_timer;
2024
2025	CTR5(KTR_IW_CXGBE, "%s: crc_enabled = %d, recv_marker_enabled = %d, "
2026	    "xmit_marker_enabled = %d, version = %d", __func__,
2027	    ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
2028	    ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
2029
2030	state_set(&ep->com, MPA_REQ_RCVD);
2031	STOP_EP_TIMER(ep);
2032
2033	/* drive upcall */
2034	mutex_lock(&ep->parent_ep->com.mutex);
2035	if (ep->parent_ep->com.state != DEAD) {
2036		if (connect_request_upcall(ep))
2037			goto err_unlock_parent;
2038	} else
2039		goto err_unlock_parent;
2040	mutex_unlock(&ep->parent_ep->com.mutex);
2041	return 0;
2042
2043err_unlock_parent:
2044	mutex_unlock(&ep->parent_ep->com.mutex);
2045	goto err_out;
2046err_stop_timer:
2047	STOP_EP_TIMER(ep);
2048err_out:
2049	return 2;
2050}
2051
2052/*
2053 * Upcall from the adapter indicating data has been transmitted.
2054 * For us its just the single MPA request or reply.  We can now free
2055 * the skb holding the mpa message.
2056 */
2057int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
2058{
2059	int err;
2060	struct c4iw_ep *ep = to_ep(cm_id);
2061	CTR2(KTR_IW_CXGBE, "%s:crcB %p", __func__, ep);
2062	int abort = 0;
2063
2064	if ((state_read(&ep->com) == DEAD) ||
2065			(state_read(&ep->com) != MPA_REQ_RCVD)) {
2066
2067		CTR2(KTR_IW_CXGBE, "%s:crc1 %p", __func__, ep);
2068		c4iw_put_ep(&ep->com);
2069		return -ECONNRESET;
2070	}
2071	set_bit(ULP_REJECT, &ep->com.history);
2072
2073	if (mpa_rev == 0) {
2074
2075		CTR2(KTR_IW_CXGBE, "%s:crc2 %p", __func__, ep);
2076		abort = 1;
2077	}
2078	else {
2079
2080		CTR2(KTR_IW_CXGBE, "%s:crc3 %p", __func__, ep);
2081		abort = send_mpa_reject(ep, pdata, pdata_len);
2082	}
2083	stop_ep_timer(ep);
2084	err = c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL);
2085	c4iw_put_ep(&ep->com);
2086	CTR3(KTR_IW_CXGBE, "%s:crc4 %p, err: %d", __func__, ep, err);
2087	return 0;
2088}
2089
2090int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2091{
2092	int err;
2093	struct c4iw_qp_attributes attrs;
2094	enum c4iw_qp_attr_mask mask;
2095	struct c4iw_ep *ep = to_ep(cm_id);
2096	struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
2097	struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
2098	int abort = 0;
2099
2100	CTR2(KTR_IW_CXGBE, "%s:cacB %p", __func__, ep);
2101
2102	if (state_read(&ep->com) == DEAD) {
2103
2104		CTR2(KTR_IW_CXGBE, "%s:cac1 %p", __func__, ep);
2105		err = -ECONNRESET;
2106		goto err_out;
2107	}
2108
2109	BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
2110	BUG_ON(!qp);
2111
2112	set_bit(ULP_ACCEPT, &ep->com.history);
2113
2114	if ((conn_param->ord > c4iw_max_read_depth) ||
2115		(conn_param->ird > c4iw_max_read_depth)) {
2116
2117		CTR2(KTR_IW_CXGBE, "%s:cac2 %p", __func__, ep);
2118		err = -EINVAL;
2119		goto err_abort;
2120	}
2121
2122	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
2123
2124		CTR2(KTR_IW_CXGBE, "%s:cac3 %p", __func__, ep);
2125
2126		if (conn_param->ord > ep->ird) {
2127
2128			CTR2(KTR_IW_CXGBE, "%s:cac4 %p", __func__, ep);
2129			ep->ird = conn_param->ird;
2130			ep->ord = conn_param->ord;
2131			send_mpa_reject(ep, conn_param->private_data,
2132					conn_param->private_data_len);
2133			err = -ENOMEM;
2134			goto err_abort;
2135		}
2136
2137		if (conn_param->ird > ep->ord) {
2138
2139			CTR2(KTR_IW_CXGBE, "%s:cac5 %p", __func__, ep);
2140
2141			if (!ep->ord) {
2142
2143				CTR2(KTR_IW_CXGBE, "%s:cac6 %p", __func__, ep);
2144				conn_param->ird = 1;
2145			}
2146			else {
2147				CTR2(KTR_IW_CXGBE, "%s:cac7 %p", __func__, ep);
2148				err = -ENOMEM;
2149				goto err_abort;
2150			}
2151		}
2152
2153	}
2154	ep->ird = conn_param->ird;
2155	ep->ord = conn_param->ord;
2156
2157	if (ep->mpa_attr.version != 2) {
2158
2159		CTR2(KTR_IW_CXGBE, "%s:cac8 %p", __func__, ep);
2160
2161		if (peer2peer && ep->ird == 0) {
2162
2163			CTR2(KTR_IW_CXGBE, "%s:cac9 %p", __func__, ep);
2164			ep->ird = 1;
2165		}
2166	}
2167
2168
2169	ep->com.cm_id = cm_id;
2170	ref_cm_id(&ep->com);
2171	ep->com.qp = qp;
2172	ref_qp(ep);
2173	//ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq;
2174
2175	/* bind QP to EP and move to RTS */
2176	attrs.mpa_attr = ep->mpa_attr;
2177	attrs.max_ird = ep->ird;
2178	attrs.max_ord = ep->ord;
2179	attrs.llp_stream_handle = ep;
2180	attrs.next_state = C4IW_QP_STATE_RTS;
2181
2182	/* bind QP and TID with INIT_WR */
2183	mask = C4IW_QP_ATTR_NEXT_STATE |
2184		C4IW_QP_ATTR_LLP_STREAM_HANDLE |
2185		C4IW_QP_ATTR_MPA_ATTR |
2186		C4IW_QP_ATTR_MAX_IRD |
2187		C4IW_QP_ATTR_MAX_ORD;
2188
2189	err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1);
2190
2191	if (err) {
2192
2193		CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep);
2194		goto err_defef_cm_id;
2195	}
2196	err = send_mpa_reply(ep, conn_param->private_data,
2197			conn_param->private_data_len);
2198
2199	if (err) {
2200
2201		CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep);
2202		goto err_defef_cm_id;
2203	}
2204
2205	state_set(&ep->com, FPDU_MODE);
2206	established_upcall(ep);
2207	c4iw_put_ep(&ep->com);
2208	CTR2(KTR_IW_CXGBE, "%s:cacE %p", __func__, ep);
2209	return 0;
2210err_defef_cm_id:
2211	deref_cm_id(&ep->com);
2212err_abort:
2213	abort = 1;
2214err_out:
2215	if (abort)
2216		c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
2217	c4iw_put_ep(&ep->com);
2218	CTR2(KTR_IW_CXGBE, "%s:cacE err %p", __func__, ep);
2219	return err;
2220}
2221
2222
2223
2224int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2225{
2226	int err = 0;
2227	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
2228	struct c4iw_ep *ep = NULL;
2229	struct rtentry *rt;
2230	struct toedev *tdev;
2231
2232	CTR2(KTR_IW_CXGBE, "%s:ccB %p", __func__, cm_id);
2233
2234	if ((conn_param->ord > c4iw_max_read_depth) ||
2235		(conn_param->ird > c4iw_max_read_depth)) {
2236
2237		CTR2(KTR_IW_CXGBE, "%s:cc1 %p", __func__, cm_id);
2238		err = -EINVAL;
2239		goto out;
2240	}
2241	ep = alloc_ep(sizeof(*ep), M_NOWAIT);
2242
2243	if (!ep) {
2244
2245		CTR2(KTR_IW_CXGBE, "%s:cc2 %p", __func__, cm_id);
2246		printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
2247		err = -ENOMEM;
2248		goto out;
2249	}
2250	init_timer(&ep->timer);
2251	ep->plen = conn_param->private_data_len;
2252
2253	if (ep->plen) {
2254
2255		CTR2(KTR_IW_CXGBE, "%s:cc3 %p", __func__, ep);
2256		memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
2257				conn_param->private_data, ep->plen);
2258	}
2259	ep->ird = conn_param->ird;
2260	ep->ord = conn_param->ord;
2261
2262	if (peer2peer && ep->ord == 0) {
2263
2264		CTR2(KTR_IW_CXGBE, "%s:cc4 %p", __func__, ep);
2265		ep->ord = 1;
2266	}
2267
2268	ep->com.dev = dev;
2269	ep->com.cm_id = cm_id;
2270	ref_cm_id(&ep->com);
2271	ep->com.qp = get_qhp(dev, conn_param->qpn);
2272
2273	if (!ep->com.qp) {
2274
2275		CTR2(KTR_IW_CXGBE, "%s:cc5 %p", __func__, ep);
2276		err = -EINVAL;
2277		goto fail2;
2278	}
2279	ref_qp(ep);
2280	ep->com.thread = curthread;
2281	ep->com.so = cm_id->so;
2282
2283	init_sock(&ep->com);
2284
2285	/* find a route */
2286	rt = find_route(
2287		cm_id->local_addr.sin_addr.s_addr,
2288		cm_id->remote_addr.sin_addr.s_addr,
2289		cm_id->local_addr.sin_port,
2290		cm_id->remote_addr.sin_port, 0);
2291
2292	if (!rt) {
2293
2294		CTR2(KTR_IW_CXGBE, "%s:cc7 %p", __func__, ep);
2295		printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
2296		err = -EHOSTUNREACH;
2297		goto fail2;
2298	}
2299
2300	if (!(rt->rt_ifp->if_capenable & IFCAP_TOE)) {
2301
2302		CTR2(KTR_IW_CXGBE, "%s:cc8 %p", __func__, ep);
2303		printf("%s - interface not TOE capable.\n", __func__);
2304		close_socket(&ep->com, 0);
2305		err = -ENOPROTOOPT;
2306		goto fail3;
2307	}
2308	tdev = TOEDEV(rt->rt_ifp);
2309
2310	if (tdev == NULL) {
2311
2312		CTR2(KTR_IW_CXGBE, "%s:cc9 %p", __func__, ep);
2313		printf("%s - No toedev for interface.\n", __func__);
2314		goto fail3;
2315	}
2316	RTFREE(rt);
2317
2318	state_set(&ep->com, CONNECTING);
2319	ep->tos = 0;
2320	ep->com.local_addr = cm_id->local_addr;
2321	ep->com.remote_addr = cm_id->remote_addr;
2322	err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr,
2323		ep->com.thread);
2324
2325	if (!err) {
2326		CTR2(KTR_IW_CXGBE, "%s:cca %p", __func__, ep);
2327		goto out;
2328	} else {
2329		close_socket(&ep->com, 0);
2330		goto fail2;
2331	}
2332
2333fail3:
2334	CTR2(KTR_IW_CXGBE, "%s:ccb %p", __func__, ep);
2335	RTFREE(rt);
2336fail2:
2337	deref_cm_id(&ep->com);
2338	c4iw_put_ep(&ep->com);
2339out:
2340	CTR2(KTR_IW_CXGBE, "%s:ccE %p", __func__, ep);
2341	return err;
2342}
2343
2344/*
2345 * iwcm->create_listen_ep.  Returns -errno on failure.
2346 */
2347int
2348c4iw_create_listen_ep(struct iw_cm_id *cm_id, int backlog)
2349{
2350	int rc;
2351	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
2352	struct c4iw_listen_ep *ep;
2353	struct socket *so = cm_id->so;
2354
2355	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
2356	CTR5(KTR_IW_CXGBE, "%s: cm_id %p, lso %p, ep %p, inp %p", __func__,
2357	    cm_id, so, ep, so->so_pcb);
2358	if (ep == NULL) {
2359		log(LOG_ERR, "%s: failed to alloc memory for endpoint\n",
2360		    __func__);
2361		rc = ENOMEM;
2362		goto failed;
2363	}
2364
2365	ep->com.cm_id = cm_id;
2366	ref_cm_id(&ep->com);
2367	ep->com.dev = dev;
2368	ep->backlog = backlog;
2369	ep->com.local_addr = cm_id->local_addr;
2370	ep->com.thread = curthread;
2371	state_set(&ep->com, LISTEN);
2372	ep->com.so = so;
2373
2374	cm_id->provider_data = ep;
2375	return (0);
2376
2377failed:
2378	CTR3(KTR_IW_CXGBE, "%s: cm_id %p, FAILED (%d)", __func__, cm_id, rc);
2379	return (-rc);
2380}
2381
2382void
2383c4iw_destroy_listen_ep(struct iw_cm_id *cm_id)
2384{
2385	struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
2386
2387	CTR4(KTR_IW_CXGBE, "%s: cm_id %p, so %p, state %s", __func__, cm_id,
2388	    cm_id->so, states[ep->com.state]);
2389
2390	state_set(&ep->com, DEAD);
2391	deref_cm_id(&ep->com);
2392	c4iw_put_ep(&ep->com);
2393
2394	return;
2395}
2396
2397int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
2398{
2399	int ret = 0;
2400	int close = 0;
2401	int fatal = 0;
2402	struct c4iw_rdev *rdev;
2403
2404	mutex_lock(&ep->com.mutex);
2405
2406	CTR2(KTR_IW_CXGBE, "%s:cedB %p", __func__, ep);
2407
2408	rdev = &ep->com.dev->rdev;
2409
2410	if (c4iw_fatal_error(rdev)) {
2411
2412		CTR2(KTR_IW_CXGBE, "%s:ced1 %p", __func__, ep);
2413		fatal = 1;
2414		close_complete_upcall(ep, -ECONNRESET);
2415		ep->com.state = DEAD;
2416	}
2417	CTR3(KTR_IW_CXGBE, "%s:ced2 %p %s", __func__, ep,
2418	    states[ep->com.state]);
2419
2420	switch (ep->com.state) {
2421
2422		case MPA_REQ_WAIT:
2423		case MPA_REQ_SENT:
2424		case MPA_REQ_RCVD:
2425		case MPA_REP_SENT:
2426		case FPDU_MODE:
2427			close = 1;
2428			if (abrupt)
2429				ep->com.state = ABORTING;
2430			else {
2431				ep->com.state = CLOSING;
2432				START_EP_TIMER(ep);
2433			}
2434			set_bit(CLOSE_SENT, &ep->com.flags);
2435			break;
2436
2437		case CLOSING:
2438
2439			if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
2440
2441				close = 1;
2442				if (abrupt) {
2443					STOP_EP_TIMER(ep);
2444					ep->com.state = ABORTING;
2445				} else
2446					ep->com.state = MORIBUND;
2447			}
2448			break;
2449
2450		case MORIBUND:
2451		case ABORTING:
2452		case DEAD:
2453			CTR3(KTR_IW_CXGBE,
2454			    "%s ignoring disconnect ep %p state %u", __func__,
2455			    ep, ep->com.state);
2456			break;
2457
2458		default:
2459			BUG();
2460			break;
2461	}
2462
2463	mutex_unlock(&ep->com.mutex);
2464
2465	if (close) {
2466
2467		CTR2(KTR_IW_CXGBE, "%s:ced3 %p", __func__, ep);
2468
2469		if (abrupt) {
2470
2471			CTR2(KTR_IW_CXGBE, "%s:ced4 %p", __func__, ep);
2472			set_bit(EP_DISC_ABORT, &ep->com.history);
2473			close_complete_upcall(ep, -ECONNRESET);
2474			ret = send_abort(ep);
2475		} else {
2476
2477			CTR2(KTR_IW_CXGBE, "%s:ced5 %p", __func__, ep);
2478			set_bit(EP_DISC_CLOSE, &ep->com.history);
2479
2480			if (!ep->parent_ep)
2481				__state_set(&ep->com, MORIBUND);
2482			ret = shutdown_socket(&ep->com);
2483		}
2484
2485		if (ret) {
2486
2487			fatal = 1;
2488		}
2489	}
2490
2491	if (fatal) {
2492		set_bit(EP_DISC_FAIL, &ep->com.history);
2493		if (!abrupt) {
2494			STOP_EP_TIMER(ep);
2495			close_complete_upcall(ep, -EIO);
2496		}
2497		if (ep->com.qp) {
2498			struct c4iw_qp_attributes attrs;
2499
2500			attrs.next_state = C4IW_QP_STATE_ERROR;
2501			ret = c4iw_modify_qp(ep->com.dev, ep->com.qp,
2502						C4IW_QP_ATTR_NEXT_STATE,
2503						&attrs, 1);
2504			if (ret) {
2505				CTR2(KTR_IW_CXGBE, "%s:ced7 %p", __func__, ep);
2506				printf("%s - qp <- error failed!\n", __func__);
2507			}
2508		}
2509		release_ep_resources(ep);
2510		ep->com.state = DEAD;
2511		CTR2(KTR_IW_CXGBE, "%s:ced6 %p", __func__, ep);
2512	}
2513	CTR2(KTR_IW_CXGBE, "%s:cedE %p", __func__, ep);
2514	return ret;
2515}
2516
2517#ifdef C4IW_EP_REDIRECT
2518int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
2519		struct l2t_entry *l2t)
2520{
2521	struct c4iw_ep *ep = ctx;
2522
2523	if (ep->dst != old)
2524		return 0;
2525
2526	PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
2527			l2t);
2528	dst_hold(new);
2529	cxgb4_l2t_release(ep->l2t);
2530	ep->l2t = l2t;
2531	dst_release(old);
2532	ep->dst = new;
2533	return 1;
2534}
2535#endif
2536
2537
2538
2539static void ep_timeout(unsigned long arg)
2540{
2541	struct c4iw_ep *ep = (struct c4iw_ep *)arg;
2542	int kickit = 0;
2543
2544	CTR2(KTR_IW_CXGBE, "%s:etB %p", __func__, ep);
2545	spin_lock(&timeout_lock);
2546
2547	if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
2548
2549		/*
2550		 * Only insert if it is not already on the list.
2551		 */
2552		if (!ep->entry.next) {
2553			list_add_tail(&ep->entry, &timeout_list);
2554			kickit = 1;
2555		}
2556	}
2557	spin_unlock(&timeout_lock);
2558
2559	if (kickit) {
2560
2561		CTR2(KTR_IW_CXGBE, "%s:et1 %p", __func__, ep);
2562		queue_work(c4iw_taskq, &c4iw_task);
2563	}
2564	CTR2(KTR_IW_CXGBE, "%s:etE %p", __func__, ep);
2565}
2566
2567static int fw6_wr_rpl(struct adapter *sc, const __be64 *rpl)
2568{
2569	uint64_t val = be64toh(*rpl);
2570	int ret;
2571	struct c4iw_wr_wait *wr_waitp;
2572
2573	ret = (int)((val >> 8) & 0xff);
2574	wr_waitp = (struct c4iw_wr_wait *)rpl[1];
2575	CTR3(KTR_IW_CXGBE, "%s wr_waitp %p ret %u", __func__, wr_waitp, ret);
2576	if (wr_waitp)
2577		c4iw_wake_up(wr_waitp, ret ? -ret : 0);
2578
2579	return (0);
2580}
2581
2582static int fw6_cqe_handler(struct adapter *sc, const __be64 *rpl)
2583{
2584	struct t4_cqe cqe =*(const struct t4_cqe *)(&rpl[0]);
2585
2586	CTR2(KTR_IW_CXGBE, "%s rpl %p", __func__, rpl);
2587	c4iw_ev_dispatch(sc->iwarp_softc, &cqe);
2588
2589	return (0);
2590}
2591
2592static int terminate(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
2593{
2594	struct adapter *sc = iq->adapter;
2595	const struct cpl_rdma_terminate *cpl = mtod(m, const void *);
2596	unsigned int tid = GET_TID(cpl);
2597	struct c4iw_qp_attributes attrs;
2598	struct toepcb *toep = lookup_tid(sc, tid);
2599	struct socket *so;
2600	struct c4iw_ep *ep;
2601
2602	INP_WLOCK(toep->inp);
2603	so = inp_inpcbtosocket(toep->inp);
2604	ep = so->so_rcv.sb_upcallarg;
2605	INP_WUNLOCK(toep->inp);
2606
2607	CTR2(KTR_IW_CXGBE, "%s:tB %p %d", __func__, ep);
2608
2609	if (ep && ep->com.qp) {
2610
2611		printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid,
2612				ep->com.qp->wq.sq.qid);
2613		attrs.next_state = C4IW_QP_STATE_TERMINATE;
2614		c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs,
2615				1);
2616	} else
2617		printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid);
2618	CTR2(KTR_IW_CXGBE, "%s:tE %p %d", __func__, ep);
2619
2620	return 0;
2621}
2622
2623int __init c4iw_cm_init(void)
2624{
2625
2626	t4_register_cpl_handler(CPL_RDMA_TERMINATE, terminate);
2627	t4_register_fw_msg_handler(FW6_TYPE_WR_RPL, fw6_wr_rpl);
2628	t4_register_fw_msg_handler(FW6_TYPE_CQE, fw6_cqe_handler);
2629	t4_register_an_handler(c4iw_ev_handler);
2630
2631	TAILQ_INIT(&req_list);
2632	spin_lock_init(&req_lock);
2633	INIT_LIST_HEAD(&timeout_list);
2634	spin_lock_init(&timeout_lock);
2635
2636	INIT_WORK(&c4iw_task, process_req);
2637
2638	c4iw_taskq = create_singlethread_workqueue("iw_cxgbe");
2639	if (!c4iw_taskq)
2640		return -ENOMEM;
2641
2642	return 0;
2643}
2644
2645void __exit c4iw_cm_term(void)
2646{
2647	WARN_ON(!TAILQ_EMPTY(&req_list));
2648	WARN_ON(!list_empty(&timeout_list));
2649	flush_workqueue(c4iw_taskq);
2650	destroy_workqueue(c4iw_taskq);
2651
2652	t4_register_cpl_handler(CPL_RDMA_TERMINATE, NULL);
2653	t4_register_fw_msg_handler(FW6_TYPE_WR_RPL, NULL);
2654	t4_register_fw_msg_handler(FW6_TYPE_CQE, NULL);
2655	t4_register_an_handler(NULL);
2656}
2657#endif
2658