1/*
2 * Copyright (c) 2006 Mellanox Technologies Ltd.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id$
33 */
34#include "sdp.h"
35
36#define SDP_MAJV_MINV 0x22
37
38SDP_MODPARAM_SINT(sdp_link_layer_ib_only, 1, "Support only link layer of "
39		"type Infiniband");
40
41enum {
42	SDP_HH_SIZE = 76,
43	SDP_HAH_SIZE = 180,
44};
45
46static void
47sdp_qp_event_handler(struct ib_event *event, void *data)
48{
49}
50
51static int
52sdp_get_max_dev_sge(struct ib_device *dev)
53{
54	struct ib_device_attr attr;
55	static int max_sges = -1;
56
57	if (max_sges > 0)
58		goto out;
59
60	ib_query_device(dev, &attr);
61
62	max_sges = attr.max_sge;
63
64out:
65	return max_sges;
66}
67
68static int
69sdp_init_qp(struct socket *sk, struct rdma_cm_id *id)
70{
71	struct ib_qp_init_attr qp_init_attr = {
72		.event_handler = sdp_qp_event_handler,
73		.cap.max_send_wr = SDP_TX_SIZE,
74		.cap.max_recv_wr = SDP_RX_SIZE,
75        	.sq_sig_type = IB_SIGNAL_REQ_WR,
76        	.qp_type = IB_QPT_RC,
77	};
78	struct ib_device *device = id->device;
79	struct sdp_sock *ssk;
80	int rc;
81
82	sdp_dbg(sk, "%s\n", __func__);
83
84	ssk = sdp_sk(sk);
85	ssk->max_sge = sdp_get_max_dev_sge(device);
86	sdp_dbg(sk, "Max sges: %d\n", ssk->max_sge);
87
88	qp_init_attr.cap.max_send_sge = MIN(ssk->max_sge, SDP_MAX_SEND_SGES);
89	sdp_dbg(sk, "Setting max send sge to: %d\n",
90	    qp_init_attr.cap.max_send_sge);
91
92	qp_init_attr.cap.max_recv_sge = MIN(ssk->max_sge, SDP_MAX_RECV_SGES);
93	sdp_dbg(sk, "Setting max recv sge to: %d\n",
94	    qp_init_attr.cap.max_recv_sge);
95
96	ssk->sdp_dev = ib_get_client_data(device, &sdp_client);
97	if (!ssk->sdp_dev) {
98		sdp_warn(sk, "SDP not available on device %s\n", device->name);
99		rc = -ENODEV;
100		goto err_rx;
101	}
102
103	rc = sdp_rx_ring_create(ssk, device);
104	if (rc)
105		goto err_rx;
106
107	rc = sdp_tx_ring_create(ssk, device);
108	if (rc)
109		goto err_tx;
110
111	qp_init_attr.recv_cq = ssk->rx_ring.cq;
112	qp_init_attr.send_cq = ssk->tx_ring.cq;
113
114	rc = rdma_create_qp(id, ssk->sdp_dev->pd, &qp_init_attr);
115	if (rc) {
116		sdp_warn(sk, "Unable to create QP: %d.\n", rc);
117		goto err_qp;
118	}
119	ssk->qp = id->qp;
120	ssk->ib_device = device;
121	ssk->qp_active = 1;
122	ssk->context.device = device;
123
124	sdp_dbg(sk, "%s done\n", __func__);
125	return 0;
126
127err_qp:
128	sdp_tx_ring_destroy(ssk);
129err_tx:
130	sdp_rx_ring_destroy(ssk);
131err_rx:
132	return rc;
133}
134
135static int
136sdp_connect_handler(struct socket *sk, struct rdma_cm_id *id,
137    struct rdma_cm_event *event)
138{
139	struct sockaddr_in *src_addr;
140	struct sockaddr_in *dst_addr;
141	struct socket *child;
142	const struct sdp_hh *h;
143	struct sdp_sock *ssk;
144	int rc;
145
146	sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id);
147
148	h = event->param.conn.private_data;
149	SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
150
151	if (!h->max_adverts)
152		return -EINVAL;
153
154	child = sonewconn(sk, SS_ISCONNECTED);
155	if (!child)
156		return -ENOMEM;
157
158	ssk = sdp_sk(child);
159	rc = sdp_init_qp(child, id);
160	if (rc)
161		return rc;
162	SDP_WLOCK(ssk);
163	id->context = ssk;
164	ssk->id = id;
165	ssk->socket = child;
166	ssk->cred = crhold(child->so_cred);
167	dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
168	src_addr = (struct sockaddr_in *)&id->route.addr.src_addr;
169	ssk->fport = dst_addr->sin_port;
170	ssk->faddr = dst_addr->sin_addr.s_addr;
171	ssk->lport = src_addr->sin_port;
172	ssk->max_bufs = ntohs(h->bsdh.bufs);
173	atomic_set(&ssk->tx_ring.credits, ssk->max_bufs);
174	ssk->min_bufs = tx_credits(ssk) / 4;
175	ssk->xmit_size_goal = ntohl(h->localrcvsz) - sizeof(struct sdp_bsdh);
176	sdp_init_buffers(ssk, rcvbuf_initial_size);
177	ssk->state = TCPS_SYN_RECEIVED;
178	SDP_WUNLOCK(ssk);
179
180	return 0;
181}
182
183static int
184sdp_response_handler(struct socket *sk, struct rdma_cm_id *id,
185    struct rdma_cm_event *event)
186{
187	const struct sdp_hah *h;
188	struct sockaddr_in *dst_addr;
189	struct sdp_sock *ssk;
190	sdp_dbg(sk, "%s\n", __func__);
191
192	ssk = sdp_sk(sk);
193	SDP_WLOCK(ssk);
194	ssk->state = TCPS_ESTABLISHED;
195	sdp_set_default_moderation(ssk);
196	if (ssk->flags & SDP_DROPPED) {
197		SDP_WUNLOCK(ssk);
198		return 0;
199	}
200	if (sk->so_options & SO_KEEPALIVE)
201		sdp_start_keepalive_timer(sk);
202	h = event->param.conn.private_data;
203	SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
204	ssk->max_bufs = ntohs(h->bsdh.bufs);
205	atomic_set(&ssk->tx_ring.credits, ssk->max_bufs);
206	ssk->min_bufs = tx_credits(ssk) / 4;
207	ssk->xmit_size_goal =
208		ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh);
209	ssk->poll_cq = 1;
210
211	dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
212	ssk->fport = dst_addr->sin_port;
213	ssk->faddr = dst_addr->sin_addr.s_addr;
214	soisconnected(sk);
215	SDP_WUNLOCK(ssk);
216
217	return 0;
218}
219
220static int
221sdp_connected_handler(struct socket *sk, struct rdma_cm_event *event)
222{
223	struct sdp_sock *ssk;
224
225	sdp_dbg(sk, "%s\n", __func__);
226
227	ssk = sdp_sk(sk);
228	SDP_WLOCK(ssk);
229	ssk->state = TCPS_ESTABLISHED;
230
231	sdp_set_default_moderation(ssk);
232
233	if (sk->so_options & SO_KEEPALIVE)
234		sdp_start_keepalive_timer(sk);
235
236	if ((ssk->flags & SDP_DROPPED) == 0)
237		soisconnected(sk);
238	SDP_WUNLOCK(ssk);
239	return 0;
240}
241
242static int
243sdp_disconnected_handler(struct socket *sk)
244{
245	struct sdp_sock *ssk;
246
247	ssk = sdp_sk(sk);
248	sdp_dbg(sk, "%s\n", __func__);
249
250	SDP_WLOCK_ASSERT(ssk);
251	if (sdp_sk(sk)->state == TCPS_SYN_RECEIVED) {
252		sdp_connected_handler(sk, NULL);
253
254		if (rcv_nxt(ssk))
255			return 0;
256	}
257
258	return -ECONNRESET;
259}
260
261int
262sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
263{
264	struct rdma_conn_param conn_param;
265	struct socket *sk;
266	struct sdp_sock *ssk;
267	struct sdp_hah hah;
268	struct sdp_hh hh;
269
270	int rc = 0;
271
272	ssk = id->context;
273	sk = NULL;
274	if (ssk)
275		sk = ssk->socket;
276	if (!ssk || !sk || !ssk->id) {
277		sdp_dbg(sk,
278		    "cm_id is being torn down, event %d, ssk %p, sk %p, id %p\n",
279		       	event->event, ssk, sk, id);
280		return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ?
281			-EINVAL : 0;
282	}
283
284	sdp_dbg(sk, "%s event %d id %p\n", __func__, event->event, id);
285	switch (event->event) {
286	case RDMA_CM_EVENT_ADDR_RESOLVED:
287		sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_RESOLVED\n");
288
289		if (sdp_link_layer_ib_only &&
290			rdma_node_get_transport(id->device->node_type) ==
291				RDMA_TRANSPORT_IB &&
292			rdma_port_get_link_layer(id->device, id->port_num) !=
293				IB_LINK_LAYER_INFINIBAND) {
294			sdp_dbg(sk, "Link layer is: %d. Only IB link layer "
295				"is allowed\n",
296				rdma_port_get_link_layer(id->device, id->port_num));
297			rc = -ENETUNREACH;
298			break;
299		}
300
301		rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT);
302		break;
303	case RDMA_CM_EVENT_ADDR_ERROR:
304		sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_ERROR\n");
305		rc = -ENETUNREACH;
306		break;
307	case RDMA_CM_EVENT_ROUTE_RESOLVED:
308		sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_RESOLVED : %p\n", id);
309		rc = sdp_init_qp(sk, id);
310		if (rc)
311			break;
312		atomic_set(&sdp_sk(sk)->remote_credits,
313				rx_ring_posted(sdp_sk(sk)));
314		memset(&hh, 0, sizeof hh);
315		hh.bsdh.mid = SDP_MID_HELLO;
316		hh.bsdh.len = htonl(sizeof(struct sdp_hh));
317		hh.max_adverts = 1;
318		hh.ipv_cap = 0x40;
319		hh.majv_minv = SDP_MAJV_MINV;
320		sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size);
321		hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk)));
322		hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_bytes);
323		hh.max_adverts = 0x1;
324		sdp_sk(sk)->laddr =
325			((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
326		memset(&conn_param, 0, sizeof conn_param);
327		conn_param.private_data_len = sizeof hh;
328		conn_param.private_data = &hh;
329		conn_param.responder_resources = 4 /* TODO */;
330		conn_param.initiator_depth = 4 /* TODO */;
331		conn_param.retry_count = SDP_RETRY_COUNT;
332		SDP_DUMP_PACKET(NULL, "TX", NULL, &hh.bsdh);
333		rc = rdma_connect(id, &conn_param);
334		break;
335	case RDMA_CM_EVENT_ROUTE_ERROR:
336		sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_ERROR : %p\n", id);
337		rc = -ETIMEDOUT;
338		break;
339	case RDMA_CM_EVENT_CONNECT_REQUEST:
340		sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_REQUEST\n");
341		rc = sdp_connect_handler(sk, id, event);
342		if (rc) {
343			sdp_dbg(sk, "Destroying qp\n");
344			rdma_reject(id, NULL, 0);
345			break;
346		}
347		ssk = id->context;
348		atomic_set(&ssk->remote_credits, rx_ring_posted(ssk));
349		memset(&hah, 0, sizeof hah);
350		hah.bsdh.mid = SDP_MID_HELLO_ACK;
351		hah.bsdh.bufs = htons(rx_ring_posted(ssk));
352		hah.bsdh.len = htonl(sizeof(struct sdp_hah));
353		hah.majv_minv = SDP_MAJV_MINV;
354		hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec,
355					    but just in case */
356		hah.actrcvsz = htonl(ssk->recv_bytes);
357		memset(&conn_param, 0, sizeof conn_param);
358		conn_param.private_data_len = sizeof hah;
359		conn_param.private_data = &hah;
360		conn_param.responder_resources = 4 /* TODO */;
361		conn_param.initiator_depth = 4 /* TODO */;
362		conn_param.retry_count = SDP_RETRY_COUNT;
363		SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh);
364		rc = rdma_accept(id, &conn_param);
365		if (rc) {
366			ssk->id = NULL;
367			id->qp = NULL;
368			id->context = NULL;
369		}
370		break;
371	case RDMA_CM_EVENT_CONNECT_RESPONSE:
372		sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_RESPONSE\n");
373		rc = sdp_response_handler(sk, id, event);
374		if (rc) {
375			sdp_dbg(sk, "Destroying qp\n");
376			rdma_reject(id, NULL, 0);
377		} else
378			rc = rdma_accept(id, NULL);
379		break;
380	case RDMA_CM_EVENT_CONNECT_ERROR:
381		sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_ERROR\n");
382		rc = -ETIMEDOUT;
383		break;
384	case RDMA_CM_EVENT_UNREACHABLE:
385		sdp_dbg(sk, "RDMA_CM_EVENT_UNREACHABLE\n");
386		rc = -ENETUNREACH;
387		break;
388	case RDMA_CM_EVENT_REJECTED:
389		sdp_dbg(sk, "RDMA_CM_EVENT_REJECTED\n");
390		rc = -ECONNREFUSED;
391		break;
392	case RDMA_CM_EVENT_ESTABLISHED:
393		sdp_dbg(sk, "RDMA_CM_EVENT_ESTABLISHED\n");
394		sdp_sk(sk)->laddr =
395			((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
396		rc = sdp_connected_handler(sk, event);
397		break;
398	case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */
399		sdp_dbg(sk, "RDMA_CM_EVENT_DISCONNECTED\n");
400
401		SDP_WLOCK(ssk);
402		if (ssk->state == TCPS_LAST_ACK) {
403			sdp_cancel_dreq_wait_timeout(ssk);
404
405			sdp_dbg(sk, "%s: waiting for Infiniband tear down\n",
406				__func__);
407		}
408		ssk->qp_active = 0;
409		SDP_WUNLOCK(ssk);
410		rdma_disconnect(id);
411		SDP_WLOCK(ssk);
412		if (ssk->state != TCPS_TIME_WAIT) {
413			if (ssk->state == TCPS_CLOSE_WAIT) {
414				sdp_dbg(sk, "IB teardown while in "
415					"TCPS_CLOSE_WAIT taking reference to "
416					"let close() finish the work\n");
417			}
418			rc = sdp_disconnected_handler(sk);
419			if (rc)
420				rc = -EPIPE;
421		}
422		SDP_WUNLOCK(ssk);
423		break;
424	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
425		sdp_dbg(sk, "RDMA_CM_EVENT_TIMEWAIT_EXIT\n");
426		SDP_WLOCK(ssk);
427		rc = sdp_disconnected_handler(sk);
428		SDP_WUNLOCK(ssk);
429		break;
430	case RDMA_CM_EVENT_DEVICE_REMOVAL:
431		sdp_dbg(sk, "RDMA_CM_EVENT_DEVICE_REMOVAL\n");
432		rc = -ENETRESET;
433		break;
434	default:
435		printk(KERN_ERR "SDP: Unexpected CMA event: %d\n",
436		       event->event);
437		rc = -ECONNABORTED;
438		break;
439	}
440
441	sdp_dbg(sk, "event %d done. status %d\n", event->event, rc);
442
443	if (rc) {
444		SDP_WLOCK(ssk);
445		if (ssk->id == id) {
446			ssk->id = NULL;
447			id->qp = NULL;
448			id->context = NULL;
449			if (sdp_notify(ssk, -rc))
450				SDP_WUNLOCK(ssk);
451		} else
452			SDP_WUNLOCK(ssk);
453	}
454
455	return rc;
456}
457