1/*
2 * Copyright (c) 2004-2007 Intel Corporation.  All rights reserved.
3 * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
4 * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#include <linux/completion.h>
37#include <linux/dma-mapping.h>
38#include <linux/device.h>
39#include <linux/err.h>
40#include <linux/idr.h>
41#include <linux/interrupt.h>
42#include <linux/random.h>
43#include <linux/rbtree.h>
44#include <linux/spinlock.h>
45#include <linux/sysfs.h>
46#include <linux/workqueue.h>
47#include <linux/kdev_t.h>
48
49#include <rdma/ib_cache.h>
50#include <rdma/ib_cm.h>
51#include "cm_msgs.h"
52
53MODULE_AUTHOR("Sean Hefty");
54MODULE_DESCRIPTION("InfiniBand CM");
55MODULE_LICENSE("Dual BSD/GPL");
56
57#define PFX    "ib_cm: "
58
59/*
60 * Limit CM message timeouts to something reasonable:
61 * 8 seconds per message, with up to 15 retries
62 */
63static int max_timeout = 21;
64module_param(max_timeout, int, 0644);
65MODULE_PARM_DESC(max_timeout, "Maximum IB CM per message timeout "
66                             "(default=21, or ~8 seconds)");
67
68static void cm_add_one(struct ib_device *device);
69static void cm_remove_one(struct ib_device *device);
70
71static struct ib_client cm_client = {
72	.name   = "cm",
73	.add    = cm_add_one,
74	.remove = cm_remove_one
75};
76
77static struct ib_cm {
78	spinlock_t lock;
79	struct list_head device_list;
80	rwlock_t device_lock;
81	struct rb_root listen_service_table;
82	u64 listen_service_id;
83	/* struct rb_root peer_service_table; todo: fix peer to peer */
84	struct rb_root remote_qp_table;
85	struct rb_root remote_id_table;
86	struct rb_root remote_sidr_table;
87	struct idr local_id_table;
88	__be32 random_id_operand;
89	struct list_head timewait_list;
90	struct workqueue_struct *wq;
91} cm;
92
93/* Counter indexes ordered by attribute ID */
94enum {
95	CM_REQ_COUNTER,
96	CM_MRA_COUNTER,
97	CM_REJ_COUNTER,
98	CM_REP_COUNTER,
99	CM_RTU_COUNTER,
100	CM_DREQ_COUNTER,
101	CM_DREP_COUNTER,
102	CM_SIDR_REQ_COUNTER,
103	CM_SIDR_REP_COUNTER,
104	CM_LAP_COUNTER,
105	CM_APR_COUNTER,
106	CM_ATTR_COUNT,
107	CM_ATTR_ID_OFFSET = 0x0010,
108};
109
110enum {
111	CM_XMIT,
112	CM_XMIT_RETRIES,
113	CM_RECV,
114	CM_RECV_DUPLICATES,
115	CM_COUNTER_GROUPS
116};
117
118static char const counter_group_names[CM_COUNTER_GROUPS]
119				     [sizeof("cm_rx_duplicates")] = {
120	"cm_tx_msgs", "cm_tx_retries",
121	"cm_rx_msgs", "cm_rx_duplicates"
122};
123
124struct cm_counter_group {
125	struct kobject obj;
126	atomic_long_t counter[CM_ATTR_COUNT];
127};
128
129struct cm_counter_attribute {
130	struct attribute attr;
131	int index;
132};
133
134#define CM_COUNTER_ATTR(_name, _index) \
135struct cm_counter_attribute cm_##_name##_counter_attr = { \
136	.attr = { .name = __stringify(_name), .mode = 0444 }, \
137	.index = _index \
138}
139
140static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
141static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
142static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
143static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
144static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
145static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
146static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
147static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
148static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
149static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
150static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
151
152static struct attribute *cm_counter_default_attrs[] = {
153	&cm_req_counter_attr.attr,
154	&cm_mra_counter_attr.attr,
155	&cm_rej_counter_attr.attr,
156	&cm_rep_counter_attr.attr,
157	&cm_rtu_counter_attr.attr,
158	&cm_dreq_counter_attr.attr,
159	&cm_drep_counter_attr.attr,
160	&cm_sidr_req_counter_attr.attr,
161	&cm_sidr_rep_counter_attr.attr,
162	&cm_lap_counter_attr.attr,
163	&cm_apr_counter_attr.attr,
164	NULL
165};
166
167struct cm_port {
168	struct cm_device *cm_dev;
169	struct ib_mad_agent *mad_agent;
170	struct kobject port_obj;
171	u8 port_num;
172	struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
173};
174
175struct cm_device {
176	struct list_head list;
177	struct ib_device *ib_device;
178	struct device *device;
179	u8 ack_delay;
180	struct cm_port *port[0];
181};
182
183struct cm_av {
184	struct cm_port *port;
185	union ib_gid dgid;
186	struct ib_ah_attr ah_attr;
187	u16 pkey_index;
188	u8 timeout;
189};
190
191struct cm_work {
192	struct delayed_work work;
193	struct list_head list;
194	struct cm_port *port;
195	struct ib_mad_recv_wc *mad_recv_wc;	/* Received MADs */
196	__be32 local_id;			/* Established / timewait */
197	__be32 remote_id;
198	struct ib_cm_event cm_event;
199	struct ib_sa_path_rec path[0];
200};
201
202struct cm_timewait_info {
203	struct cm_work work;			/* Must be first. */
204	struct list_head list;
205	struct rb_node remote_qp_node;
206	struct rb_node remote_id_node;
207	__be64 remote_ca_guid;
208	__be32 remote_qpn;
209	u8 inserted_remote_qp;
210	u8 inserted_remote_id;
211};
212
213struct cm_id_private {
214	struct ib_cm_id	id;
215
216	struct rb_node service_node;
217	struct rb_node sidr_id_node;
218	spinlock_t lock;	/* Do not acquire inside cm.lock */
219	struct completion comp;
220	atomic_t refcount;
221
222	struct ib_mad_send_buf *msg;
223	struct cm_timewait_info *timewait_info;
224	/* todo: use alternate port on send failure */
225	struct cm_av av;
226	struct cm_av alt_av;
227	struct ib_cm_compare_data *compare_data;
228
229	void *private_data;
230	__be64 tid;
231	__be32 local_qpn;
232	__be32 remote_qpn;
233	enum ib_qp_type qp_type;
234	__be32 sq_psn;
235	__be32 rq_psn;
236	int timeout_ms;
237	enum ib_mtu path_mtu;
238	__be16 pkey;
239	u8 private_data_len;
240	u8 max_cm_retries;
241	u8 peer_to_peer;
242	u8 responder_resources;
243	u8 initiator_depth;
244	u8 retry_count;
245	u8 rnr_retry_count;
246	u8 service_timeout;
247	u8 target_ack_delay;
248
249	struct list_head work_list;
250	atomic_t work_count;
251};
252
253static void cm_work_handler(struct work_struct *work);
254
255static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
256{
257	if (atomic_dec_and_test(&cm_id_priv->refcount))
258		complete(&cm_id_priv->comp);
259}
260
261static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
262			struct ib_mad_send_buf **msg)
263{
264	struct ib_mad_agent *mad_agent;
265	struct ib_mad_send_buf *m;
266	struct ib_ah *ah;
267
268	mad_agent = cm_id_priv->av.port->mad_agent;
269	ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr);
270	if (IS_ERR(ah))
271		return PTR_ERR(ah);
272
273	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
274			       cm_id_priv->av.pkey_index,
275			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
276			       GFP_ATOMIC);
277	if (IS_ERR(m)) {
278		ib_destroy_ah(ah);
279		return PTR_ERR(m);
280	}
281
282	/* Timeout set by caller if response is expected. */
283	m->ah = ah;
284	m->retries = cm_id_priv->max_cm_retries;
285
286	atomic_inc(&cm_id_priv->refcount);
287	m->context[0] = cm_id_priv;
288	*msg = m;
289	return 0;
290}
291
292static int cm_alloc_response_msg(struct cm_port *port,
293				 struct ib_mad_recv_wc *mad_recv_wc,
294				 struct ib_mad_send_buf **msg)
295{
296	struct ib_mad_send_buf *m;
297	struct ib_ah *ah;
298
299	ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
300				  mad_recv_wc->recv_buf.grh, port->port_num);
301	if (IS_ERR(ah))
302		return PTR_ERR(ah);
303
304	m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
305			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
306			       GFP_ATOMIC);
307	if (IS_ERR(m)) {
308		ib_destroy_ah(ah);
309		return PTR_ERR(m);
310	}
311	m->ah = ah;
312	*msg = m;
313	return 0;
314}
315
316static void cm_free_msg(struct ib_mad_send_buf *msg)
317{
318	ib_destroy_ah(msg->ah);
319	if (msg->context[0])
320		cm_deref_id(msg->context[0]);
321	ib_free_send_mad(msg);
322}
323
324static void * cm_copy_private_data(const void *private_data,
325				   u8 private_data_len)
326{
327	void *data;
328
329	if (!private_data || !private_data_len)
330		return NULL;
331
332	data = kmemdup(private_data, private_data_len, GFP_KERNEL);
333	if (!data)
334		return ERR_PTR(-ENOMEM);
335
336	return data;
337}
338
339static void cm_set_private_data(struct cm_id_private *cm_id_priv,
340				 void *private_data, u8 private_data_len)
341{
342	if (cm_id_priv->private_data && cm_id_priv->private_data_len)
343		kfree(cm_id_priv->private_data);
344
345	cm_id_priv->private_data = private_data;
346	cm_id_priv->private_data_len = private_data_len;
347}
348
349static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
350				    struct ib_grh *grh, struct cm_av *av)
351{
352	av->port = port;
353	av->pkey_index = wc->pkey_index;
354	ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc,
355			   grh, &av->ah_attr);
356}
357
358static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
359{
360	struct cm_device *cm_dev;
361	struct cm_port *port = NULL;
362	unsigned long flags;
363	int ret;
364	u8 p;
365
366	read_lock_irqsave(&cm.device_lock, flags);
367	list_for_each_entry(cm_dev, &cm.device_list, list) {
368		if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
369					&p, NULL)) {
370			port = cm_dev->port[p-1];
371			break;
372		}
373	}
374	read_unlock_irqrestore(&cm.device_lock, flags);
375
376	if (!port)
377		return -EINVAL;
378
379	ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
380				  be16_to_cpu(path->pkey), &av->pkey_index);
381	if (ret)
382		return ret;
383
384	av->port = port;
385	ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
386			     &av->ah_attr);
387	av->timeout = path->packet_life_time + 1;
388	return 0;
389}
390
391static int cm_alloc_id(struct cm_id_private *cm_id_priv)
392{
393	unsigned long flags;
394	int ret, id;
395	static int next_id;
396
397	do {
398		spin_lock_irqsave(&cm.lock, flags);
399		ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
400					next_id, &id);
401		if (!ret)
402			next_id = ((unsigned) id + 1) & MAX_ID_MASK;
403		spin_unlock_irqrestore(&cm.lock, flags);
404	} while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
405
406	cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
407	return ret;
408}
409
410static void cm_free_id(__be32 local_id)
411{
412	spin_lock_irq(&cm.lock);
413	idr_remove(&cm.local_id_table,
414		   (__force int) (local_id ^ cm.random_id_operand));
415	spin_unlock_irq(&cm.lock);
416}
417
418static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
419{
420	struct cm_id_private *cm_id_priv;
421
422	cm_id_priv = idr_find(&cm.local_id_table,
423			      (__force int) (local_id ^ cm.random_id_operand));
424	if (cm_id_priv) {
425		if (cm_id_priv->id.remote_id == remote_id)
426			atomic_inc(&cm_id_priv->refcount);
427		else
428			cm_id_priv = NULL;
429	}
430
431	return cm_id_priv;
432}
433
434static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
435{
436	struct cm_id_private *cm_id_priv;
437
438	spin_lock_irq(&cm.lock);
439	cm_id_priv = cm_get_id(local_id, remote_id);
440	spin_unlock_irq(&cm.lock);
441
442	return cm_id_priv;
443}
444
445static void cm_mask_copy(u8 *dst, u8 *src, u8 *mask)
446{
447	int i;
448
449	for (i = 0; i < IB_CM_COMPARE_SIZE / sizeof(unsigned long); i++)
450		((unsigned long *) dst)[i] = ((unsigned long *) src)[i] &
451					     ((unsigned long *) mask)[i];
452}
453
454static int cm_compare_data(struct ib_cm_compare_data *src_data,
455			   struct ib_cm_compare_data *dst_data)
456{
457	u8 src[IB_CM_COMPARE_SIZE];
458	u8 dst[IB_CM_COMPARE_SIZE];
459
460	if (!src_data || !dst_data)
461		return 0;
462
463	cm_mask_copy(src, src_data->data, dst_data->mask);
464	cm_mask_copy(dst, dst_data->data, src_data->mask);
465	return memcmp(src, dst, IB_CM_COMPARE_SIZE);
466}
467
468static int cm_compare_private_data(u8 *private_data,
469				   struct ib_cm_compare_data *dst_data)
470{
471	u8 src[IB_CM_COMPARE_SIZE];
472
473	if (!dst_data)
474		return 0;
475
476	cm_mask_copy(src, private_data, dst_data->mask);
477	return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE);
478}
479
480/*
481 * Trivial helpers to strip endian annotation and compare; the
482 * endianness doesn't actually matter since we just need a stable
483 * order for the RB tree.
484 */
485static int be32_lt(__be32 a, __be32 b)
486{
487	return (__force u32) a < (__force u32) b;
488}
489
490static int be32_gt(__be32 a, __be32 b)
491{
492	return (__force u32) a > (__force u32) b;
493}
494
495static int be64_lt(__be64 a, __be64 b)
496{
497	return (__force u64) a < (__force u64) b;
498}
499
500static int be64_gt(__be64 a, __be64 b)
501{
502	return (__force u64) a > (__force u64) b;
503}
504
505static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
506{
507	struct rb_node **link = &cm.listen_service_table.rb_node;
508	struct rb_node *parent = NULL;
509	struct cm_id_private *cur_cm_id_priv;
510	__be64 service_id = cm_id_priv->id.service_id;
511	__be64 service_mask = cm_id_priv->id.service_mask;
512	int data_cmp;
513
514	while (*link) {
515		parent = *link;
516		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
517					  service_node);
518		data_cmp = cm_compare_data(cm_id_priv->compare_data,
519					   cur_cm_id_priv->compare_data);
520		if ((cur_cm_id_priv->id.service_mask & service_id) ==
521		    (service_mask & cur_cm_id_priv->id.service_id) &&
522		    (cm_id_priv->id.device == cur_cm_id_priv->id.device) &&
523		    !data_cmp)
524			return cur_cm_id_priv;
525
526		if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
527			link = &(*link)->rb_left;
528		else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
529			link = &(*link)->rb_right;
530		else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
531			link = &(*link)->rb_left;
532		else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
533			link = &(*link)->rb_right;
534		else if (data_cmp < 0)
535			link = &(*link)->rb_left;
536		else
537			link = &(*link)->rb_right;
538	}
539	rb_link_node(&cm_id_priv->service_node, parent, link);
540	rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
541	return NULL;
542}
543
544static struct cm_id_private * cm_find_listen(struct ib_device *device,
545					     __be64 service_id,
546					     u8 *private_data)
547{
548	struct rb_node *node = cm.listen_service_table.rb_node;
549	struct cm_id_private *cm_id_priv;
550	int data_cmp;
551
552	while (node) {
553		cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
554		data_cmp = cm_compare_private_data(private_data,
555						   cm_id_priv->compare_data);
556		if ((cm_id_priv->id.service_mask & service_id) ==
557		     cm_id_priv->id.service_id &&
558		    (cm_id_priv->id.device == device) && !data_cmp)
559			return cm_id_priv;
560
561		if (device < cm_id_priv->id.device)
562			node = node->rb_left;
563		else if (device > cm_id_priv->id.device)
564			node = node->rb_right;
565		else if (be64_lt(service_id, cm_id_priv->id.service_id))
566			node = node->rb_left;
567		else if (be64_gt(service_id, cm_id_priv->id.service_id))
568			node = node->rb_right;
569		else if (data_cmp < 0)
570			node = node->rb_left;
571		else
572			node = node->rb_right;
573	}
574	return NULL;
575}
576
577static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
578						     *timewait_info)
579{
580	struct rb_node **link = &cm.remote_id_table.rb_node;
581	struct rb_node *parent = NULL;
582	struct cm_timewait_info *cur_timewait_info;
583	__be64 remote_ca_guid = timewait_info->remote_ca_guid;
584	__be32 remote_id = timewait_info->work.remote_id;
585
586	while (*link) {
587		parent = *link;
588		cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
589					     remote_id_node);
590		if (be32_lt(remote_id, cur_timewait_info->work.remote_id))
591			link = &(*link)->rb_left;
592		else if (be32_gt(remote_id, cur_timewait_info->work.remote_id))
593			link = &(*link)->rb_right;
594		else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
595			link = &(*link)->rb_left;
596		else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
597			link = &(*link)->rb_right;
598		else
599			return cur_timewait_info;
600	}
601	timewait_info->inserted_remote_id = 1;
602	rb_link_node(&timewait_info->remote_id_node, parent, link);
603	rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
604	return NULL;
605}
606
607static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
608						   __be32 remote_id)
609{
610	struct rb_node *node = cm.remote_id_table.rb_node;
611	struct cm_timewait_info *timewait_info;
612
613	while (node) {
614		timewait_info = rb_entry(node, struct cm_timewait_info,
615					 remote_id_node);
616		if (be32_lt(remote_id, timewait_info->work.remote_id))
617			node = node->rb_left;
618		else if (be32_gt(remote_id, timewait_info->work.remote_id))
619			node = node->rb_right;
620		else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid))
621			node = node->rb_left;
622		else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
623			node = node->rb_right;
624		else
625			return timewait_info;
626	}
627	return NULL;
628}
629
630static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
631						      *timewait_info)
632{
633	struct rb_node **link = &cm.remote_qp_table.rb_node;
634	struct rb_node *parent = NULL;
635	struct cm_timewait_info *cur_timewait_info;
636	__be64 remote_ca_guid = timewait_info->remote_ca_guid;
637	__be32 remote_qpn = timewait_info->remote_qpn;
638
639	while (*link) {
640		parent = *link;
641		cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
642					     remote_qp_node);
643		if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn))
644			link = &(*link)->rb_left;
645		else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn))
646			link = &(*link)->rb_right;
647		else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
648			link = &(*link)->rb_left;
649		else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
650			link = &(*link)->rb_right;
651		else
652			return cur_timewait_info;
653	}
654	timewait_info->inserted_remote_qp = 1;
655	rb_link_node(&timewait_info->remote_qp_node, parent, link);
656	rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
657	return NULL;
658}
659
660static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
661						    *cm_id_priv)
662{
663	struct rb_node **link = &cm.remote_sidr_table.rb_node;
664	struct rb_node *parent = NULL;
665	struct cm_id_private *cur_cm_id_priv;
666	union ib_gid *port_gid = &cm_id_priv->av.dgid;
667	__be32 remote_id = cm_id_priv->id.remote_id;
668
669	while (*link) {
670		parent = *link;
671		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
672					  sidr_id_node);
673		if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id))
674			link = &(*link)->rb_left;
675		else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
676			link = &(*link)->rb_right;
677		else {
678			int cmp;
679			cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
680				     sizeof *port_gid);
681			if (cmp < 0)
682				link = &(*link)->rb_left;
683			else if (cmp > 0)
684				link = &(*link)->rb_right;
685			else
686				return cur_cm_id_priv;
687		}
688	}
689	rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
690	rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
691	return NULL;
692}
693
694static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
695			       enum ib_cm_sidr_status status)
696{
697	struct ib_cm_sidr_rep_param param;
698
699	memset(&param, 0, sizeof param);
700	param.status = status;
701	ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
702}
703
704struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
705				 ib_cm_handler cm_handler,
706				 void *context)
707{
708	struct cm_id_private *cm_id_priv;
709	int ret;
710
711	cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
712	if (!cm_id_priv)
713		return ERR_PTR(-ENOMEM);
714
715	cm_id_priv->id.state = IB_CM_IDLE;
716	cm_id_priv->id.device = device;
717	cm_id_priv->id.cm_handler = cm_handler;
718	cm_id_priv->id.context = context;
719	cm_id_priv->id.remote_cm_qpn = 1;
720	ret = cm_alloc_id(cm_id_priv);
721	if (ret)
722		goto error;
723
724	spin_lock_init(&cm_id_priv->lock);
725	init_completion(&cm_id_priv->comp);
726	INIT_LIST_HEAD(&cm_id_priv->work_list);
727	atomic_set(&cm_id_priv->work_count, -1);
728	atomic_set(&cm_id_priv->refcount, 1);
729	return &cm_id_priv->id;
730
731error:
732	kfree(cm_id_priv);
733	return ERR_PTR(-ENOMEM);
734}
735EXPORT_SYMBOL(ib_create_cm_id);
736
737static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
738{
739	struct cm_work *work;
740
741	if (list_empty(&cm_id_priv->work_list))
742		return NULL;
743
744	work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
745	list_del(&work->list);
746	return work;
747}
748
749static void cm_free_work(struct cm_work *work)
750{
751	if (work->mad_recv_wc)
752		ib_free_recv_mad(work->mad_recv_wc);
753	kfree(work);
754}
755
756static inline int cm_convert_to_ms(int iba_time)
757{
758	/* approximate conversion to ms from 4.096us x 2^iba_time */
759	return 1 << max(iba_time - 8, 0);
760}
761
762/*
763 * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
764 * Because of how ack_timeout is stored, adding one doubles the timeout.
765 * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
766 * increment it (round up) only if the other is within 50%.
767 */
768static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
769{
770	int ack_timeout = packet_life_time + 1;
771
772	if (ack_timeout >= ca_ack_delay)
773		ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
774	else
775		ack_timeout = ca_ack_delay +
776			      (ack_timeout >= (ca_ack_delay - 1));
777
778	return min(31, ack_timeout);
779}
780
781static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
782{
783	if (timewait_info->inserted_remote_id) {
784		rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
785		timewait_info->inserted_remote_id = 0;
786	}
787
788	if (timewait_info->inserted_remote_qp) {
789		rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
790		timewait_info->inserted_remote_qp = 0;
791	}
792}
793
794static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
795{
796	struct cm_timewait_info *timewait_info;
797
798	timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
799	if (!timewait_info)
800		return ERR_PTR(-ENOMEM);
801
802	timewait_info->work.local_id = local_id;
803	INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
804	timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
805	return timewait_info;
806}
807
808static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
809{
810	int wait_time;
811	unsigned long flags;
812
813	spin_lock_irqsave(&cm.lock, flags);
814	cm_cleanup_timewait(cm_id_priv->timewait_info);
815	list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
816	spin_unlock_irqrestore(&cm.lock, flags);
817
818	/*
819	 * The cm_id could be destroyed by the user before we exit timewait.
820	 * To protect against this, we search for the cm_id after exiting
821	 * timewait before notifying the user that we've exited timewait.
822	 */
823	cm_id_priv->id.state = IB_CM_TIMEWAIT;
824	wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
825	queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
826			   msecs_to_jiffies(wait_time));
827	cm_id_priv->timewait_info = NULL;
828}
829
830static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
831{
832	unsigned long flags;
833
834	cm_id_priv->id.state = IB_CM_IDLE;
835	if (cm_id_priv->timewait_info) {
836		spin_lock_irqsave(&cm.lock, flags);
837		cm_cleanup_timewait(cm_id_priv->timewait_info);
838		spin_unlock_irqrestore(&cm.lock, flags);
839		kfree(cm_id_priv->timewait_info);
840		cm_id_priv->timewait_info = NULL;
841	}
842}
843
844static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
845{
846	struct cm_id_private *cm_id_priv;
847	struct cm_work *work;
848
849	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
850retest:
851	spin_lock_irq(&cm_id_priv->lock);
852	switch (cm_id->state) {
853	case IB_CM_LISTEN:
854		cm_id->state = IB_CM_IDLE;
855		spin_unlock_irq(&cm_id_priv->lock);
856		spin_lock_irq(&cm.lock);
857		rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
858		spin_unlock_irq(&cm.lock);
859		break;
860	case IB_CM_SIDR_REQ_SENT:
861		cm_id->state = IB_CM_IDLE;
862		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
863		spin_unlock_irq(&cm_id_priv->lock);
864		break;
865	case IB_CM_SIDR_REQ_RCVD:
866		spin_unlock_irq(&cm_id_priv->lock);
867		cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
868		break;
869	case IB_CM_REQ_SENT:
870		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
871		spin_unlock_irq(&cm_id_priv->lock);
872		ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
873			       &cm_id_priv->id.device->node_guid,
874			       sizeof cm_id_priv->id.device->node_guid,
875			       NULL, 0);
876		break;
877	case IB_CM_REQ_RCVD:
878		if (err == -ENOMEM) {
879			/* Do not reject to allow future retries. */
880			cm_reset_to_idle(cm_id_priv);
881			spin_unlock_irq(&cm_id_priv->lock);
882		} else {
883			spin_unlock_irq(&cm_id_priv->lock);
884			ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
885				       NULL, 0, NULL, 0);
886		}
887		break;
888	case IB_CM_MRA_REQ_RCVD:
889	case IB_CM_REP_SENT:
890	case IB_CM_MRA_REP_RCVD:
891		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
892		/* Fall through */
893	case IB_CM_MRA_REQ_SENT:
894	case IB_CM_REP_RCVD:
895	case IB_CM_MRA_REP_SENT:
896		spin_unlock_irq(&cm_id_priv->lock);
897		ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
898			       NULL, 0, NULL, 0);
899		break;
900	case IB_CM_ESTABLISHED:
901		spin_unlock_irq(&cm_id_priv->lock);
902		ib_send_cm_dreq(cm_id, NULL, 0);
903		goto retest;
904	case IB_CM_DREQ_SENT:
905		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
906		cm_enter_timewait(cm_id_priv);
907		spin_unlock_irq(&cm_id_priv->lock);
908		break;
909	case IB_CM_DREQ_RCVD:
910		spin_unlock_irq(&cm_id_priv->lock);
911		ib_send_cm_drep(cm_id, NULL, 0);
912		break;
913	default:
914		spin_unlock_irq(&cm_id_priv->lock);
915		break;
916	}
917
918	cm_free_id(cm_id->local_id);
919	cm_deref_id(cm_id_priv);
920	wait_for_completion(&cm_id_priv->comp);
921	while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
922		cm_free_work(work);
923	kfree(cm_id_priv->compare_data);
924	kfree(cm_id_priv->private_data);
925	kfree(cm_id_priv);
926}
927
928void ib_destroy_cm_id(struct ib_cm_id *cm_id)
929{
930	cm_destroy_id(cm_id, 0);
931}
932EXPORT_SYMBOL(ib_destroy_cm_id);
933
934int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
935		 struct ib_cm_compare_data *compare_data)
936{
937	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
938	unsigned long flags;
939	int ret = 0;
940
941	service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
942	service_id &= service_mask;
943	if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
944	    (service_id != IB_CM_ASSIGN_SERVICE_ID))
945		return -EINVAL;
946
947	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
948	if (cm_id->state != IB_CM_IDLE)
949		return -EINVAL;
950
951	if (compare_data) {
952		cm_id_priv->compare_data = kzalloc(sizeof *compare_data,
953						   GFP_KERNEL);
954		if (!cm_id_priv->compare_data)
955			return -ENOMEM;
956		cm_mask_copy(cm_id_priv->compare_data->data,
957			     compare_data->data, compare_data->mask);
958		memcpy(cm_id_priv->compare_data->mask, compare_data->mask,
959		       IB_CM_COMPARE_SIZE);
960	}
961
962	cm_id->state = IB_CM_LISTEN;
963
964	spin_lock_irqsave(&cm.lock, flags);
965	if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
966		cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
967		cm_id->service_mask = ~cpu_to_be64(0);
968	} else {
969		cm_id->service_id = service_id;
970		cm_id->service_mask = service_mask;
971	}
972	cur_cm_id_priv = cm_insert_listen(cm_id_priv);
973	spin_unlock_irqrestore(&cm.lock, flags);
974
975	if (cur_cm_id_priv) {
976		cm_id->state = IB_CM_IDLE;
977		kfree(cm_id_priv->compare_data);
978		cm_id_priv->compare_data = NULL;
979		ret = -EBUSY;
980	}
981	return ret;
982}
983EXPORT_SYMBOL(ib_cm_listen);
984
985static __be64 cm_form_tid(struct cm_id_private *cm_id_priv,
986			  enum cm_msg_sequence msg_seq)
987{
988	u64 hi_tid, low_tid;
989
990	hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
991	low_tid  = (u64) ((__force u32)cm_id_priv->id.local_id |
992			  (msg_seq << 30));
993	return cpu_to_be64(hi_tid | low_tid);
994}
995
996static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
997			      __be16 attr_id, __be64 tid)
998{
999	hdr->base_version  = IB_MGMT_BASE_VERSION;
1000	hdr->mgmt_class	   = IB_MGMT_CLASS_CM;
1001	hdr->class_version = IB_CM_CLASS_VERSION;
1002	hdr->method	   = IB_MGMT_METHOD_SEND;
1003	hdr->attr_id	   = attr_id;
1004	hdr->tid	   = tid;
1005}
1006
1007static void cm_format_req(struct cm_req_msg *req_msg,
1008			  struct cm_id_private *cm_id_priv,
1009			  struct ib_cm_req_param *param)
1010{
1011	struct ib_sa_path_rec *pri_path = param->primary_path;
1012	struct ib_sa_path_rec *alt_path = param->alternate_path;
1013
1014	cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
1015			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
1016
1017	req_msg->local_comm_id = cm_id_priv->id.local_id;
1018	req_msg->service_id = param->service_id;
1019	req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1020	cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
1021	cm_req_set_resp_res(req_msg, param->responder_resources);
1022	cm_req_set_init_depth(req_msg, param->initiator_depth);
1023	cm_req_set_remote_resp_timeout(req_msg,
1024				       param->remote_cm_response_timeout);
1025       if (param->remote_cm_response_timeout > (u8) max_timeout) {
1026               printk(KERN_WARNING PFX "req remote_cm_response_timeout %d > "
1027                      "%d, decreasing\n", param->remote_cm_response_timeout,
1028                      max_timeout);
1029               cm_req_set_remote_resp_timeout(req_msg, (u8) max_timeout);
1030       }
1031	cm_req_set_qp_type(req_msg, param->qp_type);
1032	cm_req_set_flow_ctrl(req_msg, param->flow_control);
1033	cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
1034	cm_req_set_local_resp_timeout(req_msg,
1035				      param->local_cm_response_timeout);
1036       if (param->local_cm_response_timeout > (u8) max_timeout) {
1037               printk(KERN_WARNING PFX "req local_cm_response_timeout %d > "
1038                      "%d, decreasing\n", param->local_cm_response_timeout,
1039                      max_timeout);
1040               cm_req_set_local_resp_timeout(req_msg, (u8) max_timeout);
1041       }
1042	cm_req_set_retry_count(req_msg, param->retry_count);
1043	req_msg->pkey = param->primary_path->pkey;
1044	cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
1045	cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
1046	cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
1047	cm_req_set_srq(req_msg, param->srq);
1048
1049	if (pri_path->hop_limit <= 1) {
1050		req_msg->primary_local_lid = pri_path->slid;
1051		req_msg->primary_remote_lid = pri_path->dlid;
1052	} else {
1053		/* Work-around until there's a way to obtain remote LID info */
1054		req_msg->primary_local_lid = IB_LID_PERMISSIVE;
1055		req_msg->primary_remote_lid = IB_LID_PERMISSIVE;
1056	}
1057	req_msg->primary_local_gid = pri_path->sgid;
1058	req_msg->primary_remote_gid = pri_path->dgid;
1059	cm_req_set_primary_flow_label(req_msg, pri_path->flow_label);
1060	cm_req_set_primary_packet_rate(req_msg, pri_path->rate);
1061	req_msg->primary_traffic_class = pri_path->traffic_class;
1062	req_msg->primary_hop_limit = pri_path->hop_limit;
1063	cm_req_set_primary_sl(req_msg, pri_path->sl);
1064	cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1));
1065	cm_req_set_primary_local_ack_timeout(req_msg,
1066		cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1067			       pri_path->packet_life_time));
1068
1069	if (alt_path) {
1070		if (alt_path->hop_limit <= 1) {
1071			req_msg->alt_local_lid = alt_path->slid;
1072			req_msg->alt_remote_lid = alt_path->dlid;
1073		} else {
1074			req_msg->alt_local_lid = IB_LID_PERMISSIVE;
1075			req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
1076		}
1077		req_msg->alt_local_gid = alt_path->sgid;
1078		req_msg->alt_remote_gid = alt_path->dgid;
1079		cm_req_set_alt_flow_label(req_msg,
1080					  alt_path->flow_label);
1081		cm_req_set_alt_packet_rate(req_msg, alt_path->rate);
1082		req_msg->alt_traffic_class = alt_path->traffic_class;
1083		req_msg->alt_hop_limit = alt_path->hop_limit;
1084		cm_req_set_alt_sl(req_msg, alt_path->sl);
1085		cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1));
1086		cm_req_set_alt_local_ack_timeout(req_msg,
1087			cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1088				       alt_path->packet_life_time));
1089	}
1090
1091	if (param->private_data && param->private_data_len)
1092		memcpy(req_msg->private_data, param->private_data,
1093		       param->private_data_len);
1094}
1095
1096static int cm_validate_req_param(struct ib_cm_req_param *param)
1097{
1098	/* peer-to-peer not supported */
1099	if (param->peer_to_peer)
1100		return -EINVAL;
1101
1102	if (!param->primary_path)
1103		return -EINVAL;
1104
1105	if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC)
1106		return -EINVAL;
1107
1108	if (param->private_data &&
1109	    param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
1110		return -EINVAL;
1111
1112	if (param->alternate_path &&
1113	    (param->alternate_path->pkey != param->primary_path->pkey ||
1114	     param->alternate_path->mtu != param->primary_path->mtu))
1115		return -EINVAL;
1116
1117	return 0;
1118}
1119
1120int ib_send_cm_req(struct ib_cm_id *cm_id,
1121		   struct ib_cm_req_param *param)
1122{
1123	struct cm_id_private *cm_id_priv;
1124	struct cm_req_msg *req_msg;
1125	unsigned long flags;
1126	int ret;
1127
1128	ret = cm_validate_req_param(param);
1129	if (ret)
1130		return ret;
1131
1132	/* Verify that we're not in timewait. */
1133	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1134	spin_lock_irqsave(&cm_id_priv->lock, flags);
1135	if (cm_id->state != IB_CM_IDLE) {
1136		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1137		ret = -EINVAL;
1138		goto out;
1139	}
1140	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1141
1142	cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1143							    id.local_id);
1144	if (IS_ERR(cm_id_priv->timewait_info)) {
1145		ret = PTR_ERR(cm_id_priv->timewait_info);
1146		goto out;
1147	}
1148
1149	ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av);
1150	if (ret)
1151		goto error1;
1152	if (param->alternate_path) {
1153		ret = cm_init_av_by_path(param->alternate_path,
1154					 &cm_id_priv->alt_av);
1155		if (ret)
1156			goto error1;
1157	}
1158	cm_id->service_id = param->service_id;
1159	cm_id->service_mask = ~cpu_to_be64(0);
1160	cm_id_priv->timeout_ms = cm_convert_to_ms(
1161				    param->primary_path->packet_life_time) * 2 +
1162				 cm_convert_to_ms(
1163				    param->remote_cm_response_timeout);
1164       if (cm_id_priv->timeout_ms > cm_convert_to_ms(max_timeout)) {
1165               printk(KERN_WARNING PFX "req timeout_ms %d > %d, decreasing\n",
1166                      cm_id_priv->timeout_ms, cm_convert_to_ms(max_timeout));
1167               cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout);
1168       }
1169	cm_id_priv->max_cm_retries = param->max_cm_retries;
1170	cm_id_priv->initiator_depth = param->initiator_depth;
1171	cm_id_priv->responder_resources = param->responder_resources;
1172	cm_id_priv->retry_count = param->retry_count;
1173	cm_id_priv->path_mtu = param->primary_path->mtu;
1174	cm_id_priv->pkey = param->primary_path->pkey;
1175	cm_id_priv->qp_type = param->qp_type;
1176
1177	ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
1178	if (ret)
1179		goto error1;
1180
1181	req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
1182	cm_format_req(req_msg, cm_id_priv, param);
1183	cm_id_priv->tid = req_msg->hdr.tid;
1184	cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
1185	cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
1186
1187	cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
1188	cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
1189
1190	spin_lock_irqsave(&cm_id_priv->lock, flags);
1191	ret = ib_post_send_mad(cm_id_priv->msg, NULL);
1192	if (ret) {
1193		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1194		goto error2;
1195	}
1196	BUG_ON(cm_id->state != IB_CM_IDLE);
1197	cm_id->state = IB_CM_REQ_SENT;
1198	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1199	return 0;
1200
1201error2:	cm_free_msg(cm_id_priv->msg);
1202error1:	kfree(cm_id_priv->timewait_info);
1203out:	return ret;
1204}
1205EXPORT_SYMBOL(ib_send_cm_req);
1206
1207static int cm_issue_rej(struct cm_port *port,
1208			struct ib_mad_recv_wc *mad_recv_wc,
1209			enum ib_cm_rej_reason reason,
1210			enum cm_msg_response msg_rejected,
1211			void *ari, u8 ari_length)
1212{
1213	struct ib_mad_send_buf *msg = NULL;
1214	struct cm_rej_msg *rej_msg, *rcv_msg;
1215	int ret;
1216
1217	ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
1218	if (ret)
1219		return ret;
1220
1221	/* We just need common CM header information.  Cast to any message. */
1222	rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
1223	rej_msg = (struct cm_rej_msg *) msg->mad;
1224
1225	cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
1226	rej_msg->remote_comm_id = rcv_msg->local_comm_id;
1227	rej_msg->local_comm_id = rcv_msg->remote_comm_id;
1228	cm_rej_set_msg_rejected(rej_msg, msg_rejected);
1229	rej_msg->reason = cpu_to_be16(reason);
1230
1231	if (ari && ari_length) {
1232		cm_rej_set_reject_info_len(rej_msg, ari_length);
1233		memcpy(rej_msg->ari, ari, ari_length);
1234	}
1235
1236	ret = ib_post_send_mad(msg, NULL);
1237	if (ret)
1238		cm_free_msg(msg);
1239
1240	return ret;
1241}
1242
1243static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
1244				    __be32 local_qpn, __be32 remote_qpn)
1245{
1246	return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) ||
1247		((local_ca_guid == remote_ca_guid) &&
1248		 (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn))));
1249}
1250
1251static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
1252					    struct ib_sa_path_rec *primary_path,
1253					    struct ib_sa_path_rec *alt_path)
1254{
1255	memset(primary_path, 0, sizeof *primary_path);
1256	primary_path->dgid = req_msg->primary_local_gid;
1257	primary_path->sgid = req_msg->primary_remote_gid;
1258	primary_path->dlid = req_msg->primary_local_lid;
1259	primary_path->slid = req_msg->primary_remote_lid;
1260	primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
1261	primary_path->hop_limit = req_msg->primary_hop_limit;
1262	primary_path->traffic_class = req_msg->primary_traffic_class;
1263	primary_path->reversible = 1;
1264	primary_path->pkey = req_msg->pkey;
1265	primary_path->sl = cm_req_get_primary_sl(req_msg);
1266	primary_path->mtu_selector = IB_SA_EQ;
1267	primary_path->mtu = cm_req_get_path_mtu(req_msg);
1268	primary_path->rate_selector = IB_SA_EQ;
1269	primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
1270	primary_path->packet_life_time_selector = IB_SA_EQ;
1271	primary_path->packet_life_time =
1272		cm_req_get_primary_local_ack_timeout(req_msg);
1273	primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
1274
1275	if (req_msg->alt_local_lid) {
1276		memset(alt_path, 0, sizeof *alt_path);
1277		alt_path->dgid = req_msg->alt_local_gid;
1278		alt_path->sgid = req_msg->alt_remote_gid;
1279		alt_path->dlid = req_msg->alt_local_lid;
1280		alt_path->slid = req_msg->alt_remote_lid;
1281		alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
1282		alt_path->hop_limit = req_msg->alt_hop_limit;
1283		alt_path->traffic_class = req_msg->alt_traffic_class;
1284		alt_path->reversible = 1;
1285		alt_path->pkey = req_msg->pkey;
1286		alt_path->sl = cm_req_get_alt_sl(req_msg);
1287		alt_path->mtu_selector = IB_SA_EQ;
1288		alt_path->mtu = cm_req_get_path_mtu(req_msg);
1289		alt_path->rate_selector = IB_SA_EQ;
1290		alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
1291		alt_path->packet_life_time_selector = IB_SA_EQ;
1292		alt_path->packet_life_time =
1293			cm_req_get_alt_local_ack_timeout(req_msg);
1294		alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
1295	}
1296}
1297
1298static void cm_format_req_event(struct cm_work *work,
1299				struct cm_id_private *cm_id_priv,
1300				struct ib_cm_id *listen_id)
1301{
1302	struct cm_req_msg *req_msg;
1303	struct ib_cm_req_event_param *param;
1304
1305	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1306	param = &work->cm_event.param.req_rcvd;
1307	param->listen_id = listen_id;
1308	param->port = cm_id_priv->av.port->port_num;
1309	param->primary_path = &work->path[0];
1310	if (req_msg->alt_local_lid)
1311		param->alternate_path = &work->path[1];
1312	else
1313		param->alternate_path = NULL;
1314	param->remote_ca_guid = req_msg->local_ca_guid;
1315	param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
1316	param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
1317	param->qp_type = cm_req_get_qp_type(req_msg);
1318	param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
1319	param->responder_resources = cm_req_get_init_depth(req_msg);
1320	param->initiator_depth = cm_req_get_resp_res(req_msg);
1321	param->local_cm_response_timeout =
1322					cm_req_get_remote_resp_timeout(req_msg);
1323	param->flow_control = cm_req_get_flow_ctrl(req_msg);
1324	param->remote_cm_response_timeout =
1325					cm_req_get_local_resp_timeout(req_msg);
1326	param->retry_count = cm_req_get_retry_count(req_msg);
1327	param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1328	param->srq = cm_req_get_srq(req_msg);
1329	work->cm_event.private_data = &req_msg->private_data;
1330}
1331
1332static void cm_process_work(struct cm_id_private *cm_id_priv,
1333			    struct cm_work *work)
1334{
1335	int ret;
1336
1337	/* We will typically only have the current event to report. */
1338	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
1339	cm_free_work(work);
1340
1341	while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
1342		spin_lock_irq(&cm_id_priv->lock);
1343		work = cm_dequeue_work(cm_id_priv);
1344		spin_unlock_irq(&cm_id_priv->lock);
1345		BUG_ON(!work);
1346		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
1347						&work->cm_event);
1348		cm_free_work(work);
1349	}
1350	cm_deref_id(cm_id_priv);
1351	if (ret)
1352		cm_destroy_id(&cm_id_priv->id, ret);
1353}
1354
1355static void cm_format_mra(struct cm_mra_msg *mra_msg,
1356			  struct cm_id_private *cm_id_priv,
1357			  enum cm_msg_response msg_mraed, u8 service_timeout,
1358			  const void *private_data, u8 private_data_len)
1359{
1360	cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
1361	cm_mra_set_msg_mraed(mra_msg, msg_mraed);
1362	mra_msg->local_comm_id = cm_id_priv->id.local_id;
1363	mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
1364	cm_mra_set_service_timeout(mra_msg, service_timeout);
1365
1366	if (private_data && private_data_len)
1367		memcpy(mra_msg->private_data, private_data, private_data_len);
1368}
1369
1370static void cm_format_rej(struct cm_rej_msg *rej_msg,
1371			  struct cm_id_private *cm_id_priv,
1372			  enum ib_cm_rej_reason reason,
1373			  void *ari,
1374			  u8 ari_length,
1375			  const void *private_data,
1376			  u8 private_data_len)
1377{
1378	cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
1379	rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
1380
1381	switch(cm_id_priv->id.state) {
1382	case IB_CM_REQ_RCVD:
1383		rej_msg->local_comm_id = 0;
1384		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1385		break;
1386	case IB_CM_MRA_REQ_SENT:
1387		rej_msg->local_comm_id = cm_id_priv->id.local_id;
1388		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1389		break;
1390	case IB_CM_REP_RCVD:
1391	case IB_CM_MRA_REP_SENT:
1392		rej_msg->local_comm_id = cm_id_priv->id.local_id;
1393		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
1394		break;
1395	default:
1396		rej_msg->local_comm_id = cm_id_priv->id.local_id;
1397		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
1398		break;
1399	}
1400
1401	rej_msg->reason = cpu_to_be16(reason);
1402	if (ari && ari_length) {
1403		cm_rej_set_reject_info_len(rej_msg, ari_length);
1404		memcpy(rej_msg->ari, ari, ari_length);
1405	}
1406
1407	if (private_data && private_data_len)
1408		memcpy(rej_msg->private_data, private_data, private_data_len);
1409}
1410
1411static void cm_dup_req_handler(struct cm_work *work,
1412			       struct cm_id_private *cm_id_priv)
1413{
1414	struct ib_mad_send_buf *msg = NULL;
1415	int ret;
1416
1417	atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1418			counter[CM_REQ_COUNTER]);
1419
1420	/* Quick state check to discard duplicate REQs. */
1421	if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
1422		return;
1423
1424	ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1425	if (ret)
1426		return;
1427
1428	spin_lock_irq(&cm_id_priv->lock);
1429	switch (cm_id_priv->id.state) {
1430	case IB_CM_MRA_REQ_SENT:
1431		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1432			      CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
1433			      cm_id_priv->private_data,
1434			      cm_id_priv->private_data_len);
1435		break;
1436	case IB_CM_TIMEWAIT:
1437		cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
1438			      IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
1439		break;
1440	default:
1441		goto unlock;
1442	}
1443	spin_unlock_irq(&cm_id_priv->lock);
1444
1445	ret = ib_post_send_mad(msg, NULL);
1446	if (ret)
1447		goto free;
1448	return;
1449
1450unlock:	spin_unlock_irq(&cm_id_priv->lock);
1451free:	cm_free_msg(msg);
1452}
1453
1454static struct cm_id_private * cm_match_req(struct cm_work *work,
1455					   struct cm_id_private *cm_id_priv)
1456{
1457	struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
1458	struct cm_timewait_info *timewait_info;
1459	struct cm_req_msg *req_msg;
1460
1461	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1462
1463	/* Check for possible duplicate REQ. */
1464	spin_lock_irq(&cm.lock);
1465	timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
1466	if (timewait_info) {
1467		cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1468					   timewait_info->work.remote_id);
1469		spin_unlock_irq(&cm.lock);
1470		if (cur_cm_id_priv) {
1471			cm_dup_req_handler(work, cur_cm_id_priv);
1472			cm_deref_id(cur_cm_id_priv);
1473		}
1474		return NULL;
1475	}
1476
1477	/* Check for stale connections. */
1478	timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
1479	if (timewait_info) {
1480		cm_cleanup_timewait(cm_id_priv->timewait_info);
1481		spin_unlock_irq(&cm.lock);
1482		cm_issue_rej(work->port, work->mad_recv_wc,
1483			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
1484			     NULL, 0);
1485		return NULL;
1486	}
1487
1488	/* Find matching listen request. */
1489	listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
1490					   req_msg->service_id,
1491					   req_msg->private_data);
1492	if (!listen_cm_id_priv) {
1493		cm_cleanup_timewait(cm_id_priv->timewait_info);
1494		spin_unlock_irq(&cm.lock);
1495		cm_issue_rej(work->port, work->mad_recv_wc,
1496			     IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
1497			     NULL, 0);
1498		goto out;
1499	}
1500	atomic_inc(&listen_cm_id_priv->refcount);
1501	atomic_inc(&cm_id_priv->refcount);
1502	cm_id_priv->id.state = IB_CM_REQ_RCVD;
1503	atomic_inc(&cm_id_priv->work_count);
1504	spin_unlock_irq(&cm.lock);
1505out:
1506	return listen_cm_id_priv;
1507}
1508
1509/*
1510 * Work-around for inter-subnet connections.  If the LIDs are permissive,
1511 * we need to override the LID/SL data in the REQ with the LID information
1512 * in the work completion.
1513 */
1514static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
1515{
1516	if (!cm_req_get_primary_subnet_local(req_msg)) {
1517		if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) {
1518			req_msg->primary_local_lid = cpu_to_be16(wc->slid);
1519			cm_req_set_primary_sl(req_msg, wc->sl);
1520		}
1521
1522		if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE)
1523			req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1524	}
1525
1526	if (!cm_req_get_alt_subnet_local(req_msg)) {
1527		if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) {
1528			req_msg->alt_local_lid = cpu_to_be16(wc->slid);
1529			cm_req_set_alt_sl(req_msg, wc->sl);
1530		}
1531
1532		if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE)
1533			req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1534	}
1535}
1536
1537static int cm_req_handler(struct cm_work *work)
1538{
1539	struct ib_cm_id *cm_id;
1540	struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
1541	struct cm_req_msg *req_msg;
1542	int ret;
1543
1544	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1545
1546	cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
1547	if (IS_ERR(cm_id))
1548		return PTR_ERR(cm_id);
1549
1550	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1551	cm_id_priv->id.remote_id = req_msg->local_comm_id;
1552	cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
1553				work->mad_recv_wc->recv_buf.grh,
1554				&cm_id_priv->av);
1555	cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1556							    id.local_id);
1557	if (IS_ERR(cm_id_priv->timewait_info)) {
1558		ret = PTR_ERR(cm_id_priv->timewait_info);
1559		goto destroy;
1560	}
1561	cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
1562	cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
1563	cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
1564
1565	listen_cm_id_priv = cm_match_req(work, cm_id_priv);
1566	if (!listen_cm_id_priv) {
1567		ret = -EINVAL;
1568		kfree(cm_id_priv->timewait_info);
1569		goto destroy;
1570	}
1571
1572	cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
1573	cm_id_priv->id.context = listen_cm_id_priv->id.context;
1574	cm_id_priv->id.service_id = req_msg->service_id;
1575	cm_id_priv->id.service_mask = ~cpu_to_be64(0);
1576
1577	cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
1578	cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
1579	ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
1580	if (ret) {
1581		ib_get_cached_gid(work->port->cm_dev->ib_device,
1582				  work->port->port_num, 0, &work->path[0].sgid);
1583		ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
1584			       &work->path[0].sgid, sizeof work->path[0].sgid,
1585			       NULL, 0);
1586		goto rejected;
1587	}
1588	if (req_msg->alt_local_lid) {
1589		ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
1590		if (ret) {
1591			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
1592				       &work->path[0].sgid,
1593				       sizeof work->path[0].sgid, NULL, 0);
1594			goto rejected;
1595		}
1596	}
1597	cm_id_priv->tid = req_msg->hdr.tid;
1598	cm_id_priv->timeout_ms = cm_convert_to_ms(
1599					cm_req_get_local_resp_timeout(req_msg));
1600       if (cm_req_get_local_resp_timeout(req_msg) > (u8) max_timeout) {
1601               printk(KERN_WARNING PFX "rcvd cm_local_resp_timeout %d > %d, "
1602                      "decreasing used timeout_ms\n",
1603                      cm_req_get_local_resp_timeout(req_msg), max_timeout);
1604               cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout);
1605       }
1606
1607	cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
1608	cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
1609	cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
1610	cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
1611	cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
1612	cm_id_priv->pkey = req_msg->pkey;
1613	cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
1614	cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
1615	cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1616	cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
1617
1618	cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
1619	cm_process_work(cm_id_priv, work);
1620	cm_deref_id(listen_cm_id_priv);
1621	return 0;
1622
1623rejected:
1624	atomic_dec(&cm_id_priv->refcount);
1625	cm_deref_id(listen_cm_id_priv);
1626destroy:
1627	ib_destroy_cm_id(cm_id);
1628	return ret;
1629}
1630
1631static void cm_format_rep(struct cm_rep_msg *rep_msg,
1632			  struct cm_id_private *cm_id_priv,
1633			  struct ib_cm_rep_param *param)
1634{
1635	cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
1636	rep_msg->local_comm_id = cm_id_priv->id.local_id;
1637	rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
1638	cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
1639	cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
1640	rep_msg->resp_resources = param->responder_resources;
1641	rep_msg->initiator_depth = param->initiator_depth;
1642	cm_rep_set_target_ack_delay(rep_msg,
1643				    cm_id_priv->av.port->cm_dev->ack_delay);
1644	cm_rep_set_failover(rep_msg, param->failover_accepted);
1645	cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
1646	cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
1647	cm_rep_set_srq(rep_msg, param->srq);
1648	rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1649
1650	if (param->private_data && param->private_data_len)
1651		memcpy(rep_msg->private_data, param->private_data,
1652		       param->private_data_len);
1653}
1654
1655int ib_send_cm_rep(struct ib_cm_id *cm_id,
1656		   struct ib_cm_rep_param *param)
1657{
1658	struct cm_id_private *cm_id_priv;
1659	struct ib_mad_send_buf *msg;
1660	struct cm_rep_msg *rep_msg;
1661	unsigned long flags;
1662	int ret;
1663
1664	if (param->private_data &&
1665	    param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE)
1666		return -EINVAL;
1667
1668	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1669	spin_lock_irqsave(&cm_id_priv->lock, flags);
1670	if (cm_id->state != IB_CM_REQ_RCVD &&
1671	    cm_id->state != IB_CM_MRA_REQ_SENT) {
1672		ret = -EINVAL;
1673		goto out;
1674	}
1675
1676	ret = cm_alloc_msg(cm_id_priv, &msg);
1677	if (ret)
1678		goto out;
1679
1680	rep_msg = (struct cm_rep_msg *) msg->mad;
1681	cm_format_rep(rep_msg, cm_id_priv, param);
1682	msg->timeout_ms = cm_id_priv->timeout_ms;
1683	msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
1684
1685	ret = ib_post_send_mad(msg, NULL);
1686	if (ret) {
1687		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1688		cm_free_msg(msg);
1689		return ret;
1690	}
1691
1692	cm_id->state = IB_CM_REP_SENT;
1693	cm_id_priv->msg = msg;
1694	cm_id_priv->initiator_depth = param->initiator_depth;
1695	cm_id_priv->responder_resources = param->responder_resources;
1696	cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
1697	cm_id_priv->local_qpn = cm_rep_get_local_qpn(rep_msg);
1698
1699out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1700	return ret;
1701}
1702EXPORT_SYMBOL(ib_send_cm_rep);
1703
1704static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
1705			  struct cm_id_private *cm_id_priv,
1706			  const void *private_data,
1707			  u8 private_data_len)
1708{
1709	cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
1710	rtu_msg->local_comm_id = cm_id_priv->id.local_id;
1711	rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
1712
1713	if (private_data && private_data_len)
1714		memcpy(rtu_msg->private_data, private_data, private_data_len);
1715}
1716
1717int ib_send_cm_rtu(struct ib_cm_id *cm_id,
1718		   const void *private_data,
1719		   u8 private_data_len)
1720{
1721	struct cm_id_private *cm_id_priv;
1722	struct ib_mad_send_buf *msg;
1723	unsigned long flags;
1724	void *data;
1725	int ret;
1726
1727	if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE)
1728		return -EINVAL;
1729
1730	data = cm_copy_private_data(private_data, private_data_len);
1731	if (IS_ERR(data))
1732		return PTR_ERR(data);
1733
1734	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1735	spin_lock_irqsave(&cm_id_priv->lock, flags);
1736	if (cm_id->state != IB_CM_REP_RCVD &&
1737	    cm_id->state != IB_CM_MRA_REP_SENT) {
1738		ret = -EINVAL;
1739		goto error;
1740	}
1741
1742	ret = cm_alloc_msg(cm_id_priv, &msg);
1743	if (ret)
1744		goto error;
1745
1746	cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1747		      private_data, private_data_len);
1748
1749	ret = ib_post_send_mad(msg, NULL);
1750	if (ret) {
1751		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1752		cm_free_msg(msg);
1753		kfree(data);
1754		return ret;
1755	}
1756
1757	cm_id->state = IB_CM_ESTABLISHED;
1758	cm_set_private_data(cm_id_priv, data, private_data_len);
1759	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1760	return 0;
1761
1762error:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1763	kfree(data);
1764	return ret;
1765}
1766EXPORT_SYMBOL(ib_send_cm_rtu);
1767
1768static void cm_format_rep_event(struct cm_work *work)
1769{
1770	struct cm_rep_msg *rep_msg;
1771	struct ib_cm_rep_event_param *param;
1772
1773	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1774	param = &work->cm_event.param.rep_rcvd;
1775	param->remote_ca_guid = rep_msg->local_ca_guid;
1776	param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
1777	param->remote_qpn = be32_to_cpu(cm_rep_get_local_qpn(rep_msg));
1778	param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
1779	param->responder_resources = rep_msg->initiator_depth;
1780	param->initiator_depth = rep_msg->resp_resources;
1781	param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
1782	param->failover_accepted = cm_rep_get_failover(rep_msg);
1783	param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
1784	param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1785	param->srq = cm_rep_get_srq(rep_msg);
1786	work->cm_event.private_data = &rep_msg->private_data;
1787}
1788
1789static void cm_dup_rep_handler(struct cm_work *work)
1790{
1791	struct cm_id_private *cm_id_priv;
1792	struct cm_rep_msg *rep_msg;
1793	struct ib_mad_send_buf *msg = NULL;
1794	int ret;
1795
1796	rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
1797	cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
1798				   rep_msg->local_comm_id);
1799	if (!cm_id_priv)
1800		return;
1801
1802	atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1803			counter[CM_REP_COUNTER]);
1804	ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1805	if (ret)
1806		goto deref;
1807
1808	spin_lock_irq(&cm_id_priv->lock);
1809	if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
1810		cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1811			      cm_id_priv->private_data,
1812			      cm_id_priv->private_data_len);
1813	else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
1814		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1815			      CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
1816			      cm_id_priv->private_data,
1817			      cm_id_priv->private_data_len);
1818	else
1819		goto unlock;
1820	spin_unlock_irq(&cm_id_priv->lock);
1821
1822	ret = ib_post_send_mad(msg, NULL);
1823	if (ret)
1824		goto free;
1825	goto deref;
1826
1827unlock:	spin_unlock_irq(&cm_id_priv->lock);
1828free:	cm_free_msg(msg);
1829deref:	cm_deref_id(cm_id_priv);
1830}
1831
1832static int cm_rep_handler(struct cm_work *work)
1833{
1834	struct cm_id_private *cm_id_priv;
1835	struct cm_rep_msg *rep_msg;
1836	int ret;
1837
1838	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1839	cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
1840	if (!cm_id_priv) {
1841		cm_dup_rep_handler(work);
1842		return -EINVAL;
1843	}
1844
1845	cm_format_rep_event(work);
1846
1847	spin_lock_irq(&cm_id_priv->lock);
1848	switch (cm_id_priv->id.state) {
1849	case IB_CM_REQ_SENT:
1850	case IB_CM_MRA_REQ_RCVD:
1851		break;
1852	default:
1853		spin_unlock_irq(&cm_id_priv->lock);
1854		ret = -EINVAL;
1855		goto error;
1856	}
1857
1858	cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
1859	cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
1860	cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg);
1861
1862	spin_lock(&cm.lock);
1863	/* Check for duplicate REP. */
1864	if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
1865		spin_unlock(&cm.lock);
1866		spin_unlock_irq(&cm_id_priv->lock);
1867		ret = -EINVAL;
1868		goto error;
1869	}
1870	/* Check for a stale connection. */
1871	if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
1872		rb_erase(&cm_id_priv->timewait_info->remote_id_node,
1873			 &cm.remote_id_table);
1874		cm_id_priv->timewait_info->inserted_remote_id = 0;
1875		spin_unlock(&cm.lock);
1876		spin_unlock_irq(&cm_id_priv->lock);
1877		cm_issue_rej(work->port, work->mad_recv_wc,
1878			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
1879			     NULL, 0);
1880		ret = -EINVAL;
1881		goto error;
1882	}
1883	spin_unlock(&cm.lock);
1884
1885	cm_id_priv->id.state = IB_CM_REP_RCVD;
1886	cm_id_priv->id.remote_id = rep_msg->local_comm_id;
1887	cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg);
1888	cm_id_priv->initiator_depth = rep_msg->resp_resources;
1889	cm_id_priv->responder_resources = rep_msg->initiator_depth;
1890	cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
1891	cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1892	cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
1893	cm_id_priv->av.timeout =
1894			cm_ack_timeout(cm_id_priv->target_ack_delay,
1895				       cm_id_priv->av.timeout - 1);
1896	cm_id_priv->alt_av.timeout =
1897			cm_ack_timeout(cm_id_priv->target_ack_delay,
1898				       cm_id_priv->alt_av.timeout - 1);
1899
1900	/* todo: handle peer_to_peer */
1901
1902	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1903	ret = atomic_inc_and_test(&cm_id_priv->work_count);
1904	if (!ret)
1905		list_add_tail(&work->list, &cm_id_priv->work_list);
1906	spin_unlock_irq(&cm_id_priv->lock);
1907
1908	if (ret)
1909		cm_process_work(cm_id_priv, work);
1910	else
1911		cm_deref_id(cm_id_priv);
1912	return 0;
1913
1914error:
1915	cm_deref_id(cm_id_priv);
1916	return ret;
1917}
1918
1919static int cm_establish_handler(struct cm_work *work)
1920{
1921	struct cm_id_private *cm_id_priv;
1922	int ret;
1923
1924	/* See comment in cm_establish about lookup. */
1925	cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
1926	if (!cm_id_priv)
1927		return -EINVAL;
1928
1929	spin_lock_irq(&cm_id_priv->lock);
1930	if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
1931		spin_unlock_irq(&cm_id_priv->lock);
1932		goto out;
1933	}
1934
1935	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1936	ret = atomic_inc_and_test(&cm_id_priv->work_count);
1937	if (!ret)
1938		list_add_tail(&work->list, &cm_id_priv->work_list);
1939	spin_unlock_irq(&cm_id_priv->lock);
1940
1941	if (ret)
1942		cm_process_work(cm_id_priv, work);
1943	else
1944		cm_deref_id(cm_id_priv);
1945	return 0;
1946out:
1947	cm_deref_id(cm_id_priv);
1948	return -EINVAL;
1949}
1950
1951static int cm_rtu_handler(struct cm_work *work)
1952{
1953	struct cm_id_private *cm_id_priv;
1954	struct cm_rtu_msg *rtu_msg;
1955	int ret;
1956
1957	rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
1958	cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
1959				   rtu_msg->local_comm_id);
1960	if (!cm_id_priv)
1961		return -EINVAL;
1962
1963	work->cm_event.private_data = &rtu_msg->private_data;
1964
1965	spin_lock_irq(&cm_id_priv->lock);
1966	if (cm_id_priv->id.state != IB_CM_REP_SENT &&
1967	    cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
1968		spin_unlock_irq(&cm_id_priv->lock);
1969		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1970				counter[CM_RTU_COUNTER]);
1971		goto out;
1972	}
1973	cm_id_priv->id.state = IB_CM_ESTABLISHED;
1974
1975	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1976	ret = atomic_inc_and_test(&cm_id_priv->work_count);
1977	if (!ret)
1978		list_add_tail(&work->list, &cm_id_priv->work_list);
1979	spin_unlock_irq(&cm_id_priv->lock);
1980
1981	if (ret)
1982		cm_process_work(cm_id_priv, work);
1983	else
1984		cm_deref_id(cm_id_priv);
1985	return 0;
1986out:
1987	cm_deref_id(cm_id_priv);
1988	return -EINVAL;
1989}
1990
1991static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
1992			  struct cm_id_private *cm_id_priv,
1993			  const void *private_data,
1994			  u8 private_data_len)
1995{
1996	cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
1997			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ));
1998	dreq_msg->local_comm_id = cm_id_priv->id.local_id;
1999	dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
2000	cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
2001
2002	if (private_data && private_data_len)
2003		memcpy(dreq_msg->private_data, private_data, private_data_len);
2004}
2005
2006int ib_send_cm_dreq(struct ib_cm_id *cm_id,
2007		    const void *private_data,
2008		    u8 private_data_len)
2009{
2010	struct cm_id_private *cm_id_priv;
2011	struct ib_mad_send_buf *msg;
2012	unsigned long flags;
2013	int ret;
2014
2015	if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
2016		return -EINVAL;
2017
2018	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2019	spin_lock_irqsave(&cm_id_priv->lock, flags);
2020	if (cm_id->state != IB_CM_ESTABLISHED) {
2021		ret = -EINVAL;
2022		goto out;
2023	}
2024
2025	ret = cm_alloc_msg(cm_id_priv, &msg);
2026	if (ret) {
2027		cm_enter_timewait(cm_id_priv);
2028		goto out;
2029	}
2030
2031	cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
2032		       private_data, private_data_len);
2033	msg->timeout_ms = cm_id_priv->timeout_ms;
2034	msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
2035
2036	ret = ib_post_send_mad(msg, NULL);
2037	if (ret) {
2038		cm_enter_timewait(cm_id_priv);
2039		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2040		cm_free_msg(msg);
2041		return ret;
2042	}
2043
2044	cm_id->state = IB_CM_DREQ_SENT;
2045	cm_id_priv->msg = msg;
2046out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2047	return ret;
2048}
2049EXPORT_SYMBOL(ib_send_cm_dreq);
2050
2051static void cm_format_drep(struct cm_drep_msg *drep_msg,
2052			  struct cm_id_private *cm_id_priv,
2053			  const void *private_data,
2054			  u8 private_data_len)
2055{
2056	cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
2057	drep_msg->local_comm_id = cm_id_priv->id.local_id;
2058	drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
2059
2060	if (private_data && private_data_len)
2061		memcpy(drep_msg->private_data, private_data, private_data_len);
2062}
2063
2064int ib_send_cm_drep(struct ib_cm_id *cm_id,
2065		    const void *private_data,
2066		    u8 private_data_len)
2067{
2068	struct cm_id_private *cm_id_priv;
2069	struct ib_mad_send_buf *msg;
2070	unsigned long flags;
2071	void *data;
2072	int ret;
2073
2074	if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
2075		return -EINVAL;
2076
2077	data = cm_copy_private_data(private_data, private_data_len);
2078	if (IS_ERR(data))
2079		return PTR_ERR(data);
2080
2081	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2082	spin_lock_irqsave(&cm_id_priv->lock, flags);
2083	if (cm_id->state != IB_CM_DREQ_RCVD) {
2084		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2085		kfree(data);
2086		return -EINVAL;
2087	}
2088
2089	cm_set_private_data(cm_id_priv, data, private_data_len);
2090	cm_enter_timewait(cm_id_priv);
2091
2092	ret = cm_alloc_msg(cm_id_priv, &msg);
2093	if (ret)
2094		goto out;
2095
2096	cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2097		       private_data, private_data_len);
2098
2099	ret = ib_post_send_mad(msg, NULL);
2100	if (ret) {
2101		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2102		cm_free_msg(msg);
2103		return ret;
2104	}
2105
2106out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2107	return ret;
2108}
2109EXPORT_SYMBOL(ib_send_cm_drep);
2110
2111static int cm_issue_drep(struct cm_port *port,
2112			 struct ib_mad_recv_wc *mad_recv_wc)
2113{
2114	struct ib_mad_send_buf *msg = NULL;
2115	struct cm_dreq_msg *dreq_msg;
2116	struct cm_drep_msg *drep_msg;
2117	int ret;
2118
2119	ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
2120	if (ret)
2121		return ret;
2122
2123	dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
2124	drep_msg = (struct cm_drep_msg *) msg->mad;
2125
2126	cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
2127	drep_msg->remote_comm_id = dreq_msg->local_comm_id;
2128	drep_msg->local_comm_id = dreq_msg->remote_comm_id;
2129
2130	ret = ib_post_send_mad(msg, NULL);
2131	if (ret)
2132		cm_free_msg(msg);
2133
2134	return ret;
2135}
2136
2137static int cm_dreq_handler(struct cm_work *work)
2138{
2139	struct cm_id_private *cm_id_priv;
2140	struct cm_dreq_msg *dreq_msg;
2141	struct ib_mad_send_buf *msg = NULL;
2142	int ret;
2143
2144	dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
2145	cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
2146				   dreq_msg->local_comm_id);
2147	if (!cm_id_priv) {
2148		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2149				counter[CM_DREQ_COUNTER]);
2150		cm_issue_drep(work->port, work->mad_recv_wc);
2151		return -EINVAL;
2152	}
2153
2154	work->cm_event.private_data = &dreq_msg->private_data;
2155
2156	spin_lock_irq(&cm_id_priv->lock);
2157	if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
2158		goto unlock;
2159
2160	switch (cm_id_priv->id.state) {
2161	case IB_CM_REP_SENT:
2162	case IB_CM_DREQ_SENT:
2163		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2164		break;
2165	case IB_CM_ESTABLISHED:
2166	case IB_CM_MRA_REP_RCVD:
2167		break;
2168	case IB_CM_TIMEWAIT:
2169		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2170				counter[CM_DREQ_COUNTER]);
2171		if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
2172			goto unlock;
2173
2174		cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2175			       cm_id_priv->private_data,
2176			       cm_id_priv->private_data_len);
2177		spin_unlock_irq(&cm_id_priv->lock);
2178
2179		if (ib_post_send_mad(msg, NULL))
2180			cm_free_msg(msg);
2181		goto deref;
2182	case IB_CM_DREQ_RCVD:
2183		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2184				counter[CM_DREQ_COUNTER]);
2185		goto unlock;
2186	default:
2187		goto unlock;
2188	}
2189	cm_id_priv->id.state = IB_CM_DREQ_RCVD;
2190	cm_id_priv->tid = dreq_msg->hdr.tid;
2191	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2192	if (!ret)
2193		list_add_tail(&work->list, &cm_id_priv->work_list);
2194	spin_unlock_irq(&cm_id_priv->lock);
2195
2196	if (ret)
2197		cm_process_work(cm_id_priv, work);
2198	else
2199		cm_deref_id(cm_id_priv);
2200	return 0;
2201
2202unlock:	spin_unlock_irq(&cm_id_priv->lock);
2203deref:	cm_deref_id(cm_id_priv);
2204	return -EINVAL;
2205}
2206
2207static int cm_drep_handler(struct cm_work *work)
2208{
2209	struct cm_id_private *cm_id_priv;
2210	struct cm_drep_msg *drep_msg;
2211	int ret;
2212
2213	drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
2214	cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
2215				   drep_msg->local_comm_id);
2216	if (!cm_id_priv)
2217		return -EINVAL;
2218
2219	work->cm_event.private_data = &drep_msg->private_data;
2220
2221	spin_lock_irq(&cm_id_priv->lock);
2222	if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
2223	    cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
2224		spin_unlock_irq(&cm_id_priv->lock);
2225		goto out;
2226	}
2227	cm_enter_timewait(cm_id_priv);
2228
2229	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2230	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2231	if (!ret)
2232		list_add_tail(&work->list, &cm_id_priv->work_list);
2233	spin_unlock_irq(&cm_id_priv->lock);
2234
2235	if (ret)
2236		cm_process_work(cm_id_priv, work);
2237	else
2238		cm_deref_id(cm_id_priv);
2239	return 0;
2240out:
2241	cm_deref_id(cm_id_priv);
2242	return -EINVAL;
2243}
2244
2245int ib_send_cm_rej(struct ib_cm_id *cm_id,
2246		   enum ib_cm_rej_reason reason,
2247		   void *ari,
2248		   u8 ari_length,
2249		   const void *private_data,
2250		   u8 private_data_len)
2251{
2252	struct cm_id_private *cm_id_priv;
2253	struct ib_mad_send_buf *msg;
2254	unsigned long flags;
2255	int ret;
2256
2257	if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
2258	    (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
2259		return -EINVAL;
2260
2261	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2262
2263	spin_lock_irqsave(&cm_id_priv->lock, flags);
2264	switch (cm_id->state) {
2265	case IB_CM_REQ_SENT:
2266	case IB_CM_MRA_REQ_RCVD:
2267	case IB_CM_REQ_RCVD:
2268	case IB_CM_MRA_REQ_SENT:
2269	case IB_CM_REP_RCVD:
2270	case IB_CM_MRA_REP_SENT:
2271		ret = cm_alloc_msg(cm_id_priv, &msg);
2272		if (!ret)
2273			cm_format_rej((struct cm_rej_msg *) msg->mad,
2274				      cm_id_priv, reason, ari, ari_length,
2275				      private_data, private_data_len);
2276
2277		cm_reset_to_idle(cm_id_priv);
2278		break;
2279	case IB_CM_REP_SENT:
2280	case IB_CM_MRA_REP_RCVD:
2281		ret = cm_alloc_msg(cm_id_priv, &msg);
2282		if (!ret)
2283			cm_format_rej((struct cm_rej_msg *) msg->mad,
2284				      cm_id_priv, reason, ari, ari_length,
2285				      private_data, private_data_len);
2286
2287		cm_enter_timewait(cm_id_priv);
2288		break;
2289	default:
2290		ret = -EINVAL;
2291		goto out;
2292	}
2293
2294	if (ret)
2295		goto out;
2296
2297	ret = ib_post_send_mad(msg, NULL);
2298	if (ret)
2299		cm_free_msg(msg);
2300
2301out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2302	return ret;
2303}
2304EXPORT_SYMBOL(ib_send_cm_rej);
2305
2306static void cm_format_rej_event(struct cm_work *work)
2307{
2308	struct cm_rej_msg *rej_msg;
2309	struct ib_cm_rej_event_param *param;
2310
2311	rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2312	param = &work->cm_event.param.rej_rcvd;
2313	param->ari = rej_msg->ari;
2314	param->ari_length = cm_rej_get_reject_info_len(rej_msg);
2315	param->reason = __be16_to_cpu(rej_msg->reason);
2316	work->cm_event.private_data = &rej_msg->private_data;
2317}
2318
2319static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
2320{
2321	struct cm_timewait_info *timewait_info;
2322	struct cm_id_private *cm_id_priv;
2323	__be32 remote_id;
2324
2325	remote_id = rej_msg->local_comm_id;
2326
2327	if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
2328		spin_lock_irq(&cm.lock);
2329		timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
2330						  remote_id);
2331		if (!timewait_info) {
2332			spin_unlock_irq(&cm.lock);
2333			return NULL;
2334		}
2335		cm_id_priv = idr_find(&cm.local_id_table, (__force int)
2336				      (timewait_info->work.local_id ^
2337				       cm.random_id_operand));
2338		if (cm_id_priv) {
2339			if (cm_id_priv->id.remote_id == remote_id)
2340				atomic_inc(&cm_id_priv->refcount);
2341			else
2342				cm_id_priv = NULL;
2343		}
2344		spin_unlock_irq(&cm.lock);
2345	} else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
2346		cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
2347	else
2348		cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
2349
2350	return cm_id_priv;
2351}
2352
2353static int cm_rej_handler(struct cm_work *work)
2354{
2355	struct cm_id_private *cm_id_priv;
2356	struct cm_rej_msg *rej_msg;
2357	int ret;
2358
2359	rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2360	cm_id_priv = cm_acquire_rejected_id(rej_msg);
2361	if (!cm_id_priv)
2362		return -EINVAL;
2363
2364	cm_format_rej_event(work);
2365
2366	spin_lock_irq(&cm_id_priv->lock);
2367	switch (cm_id_priv->id.state) {
2368	case IB_CM_REQ_SENT:
2369	case IB_CM_MRA_REQ_RCVD:
2370	case IB_CM_REP_SENT:
2371	case IB_CM_MRA_REP_RCVD:
2372		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2373		/* fall through */
2374	case IB_CM_REQ_RCVD:
2375	case IB_CM_MRA_REQ_SENT:
2376		if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
2377			cm_enter_timewait(cm_id_priv);
2378		else
2379			cm_reset_to_idle(cm_id_priv);
2380		break;
2381	case IB_CM_DREQ_SENT:
2382		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2383		/* fall through */
2384	case IB_CM_REP_RCVD:
2385	case IB_CM_MRA_REP_SENT:
2386	case IB_CM_ESTABLISHED:
2387		cm_enter_timewait(cm_id_priv);
2388		break;
2389	default:
2390		spin_unlock_irq(&cm_id_priv->lock);
2391		ret = -EINVAL;
2392		goto out;
2393	}
2394
2395	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2396	if (!ret)
2397		list_add_tail(&work->list, &cm_id_priv->work_list);
2398	spin_unlock_irq(&cm_id_priv->lock);
2399
2400	if (ret)
2401		cm_process_work(cm_id_priv, work);
2402	else
2403		cm_deref_id(cm_id_priv);
2404	return 0;
2405out:
2406	cm_deref_id(cm_id_priv);
2407	return -EINVAL;
2408}
2409
2410int ib_send_cm_mra(struct ib_cm_id *cm_id,
2411		   u8 service_timeout,
2412		   const void *private_data,
2413		   u8 private_data_len)
2414{
2415	struct cm_id_private *cm_id_priv;
2416	struct ib_mad_send_buf *msg;
2417	enum ib_cm_state cm_state;
2418	enum ib_cm_lap_state lap_state;
2419	enum cm_msg_response msg_response;
2420	void *data;
2421	unsigned long flags;
2422	int ret;
2423
2424	if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
2425		return -EINVAL;
2426
2427	data = cm_copy_private_data(private_data, private_data_len);
2428	if (IS_ERR(data))
2429		return PTR_ERR(data);
2430
2431	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2432
2433	spin_lock_irqsave(&cm_id_priv->lock, flags);
2434	switch(cm_id_priv->id.state) {
2435	case IB_CM_REQ_RCVD:
2436		cm_state = IB_CM_MRA_REQ_SENT;
2437		lap_state = cm_id->lap_state;
2438		msg_response = CM_MSG_RESPONSE_REQ;
2439		break;
2440	case IB_CM_REP_RCVD:
2441		cm_state = IB_CM_MRA_REP_SENT;
2442		lap_state = cm_id->lap_state;
2443		msg_response = CM_MSG_RESPONSE_REP;
2444		break;
2445	case IB_CM_ESTABLISHED:
2446		if (cm_id->lap_state == IB_CM_LAP_RCVD) {
2447			cm_state = cm_id->state;
2448			lap_state = IB_CM_MRA_LAP_SENT;
2449			msg_response = CM_MSG_RESPONSE_OTHER;
2450			break;
2451		}
2452	default:
2453		ret = -EINVAL;
2454		goto error1;
2455	}
2456
2457	if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
2458		ret = cm_alloc_msg(cm_id_priv, &msg);
2459		if (ret)
2460			goto error1;
2461
2462		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2463			      msg_response, service_timeout,
2464			      private_data, private_data_len);
2465		ret = ib_post_send_mad(msg, NULL);
2466		if (ret)
2467			goto error2;
2468	}
2469
2470	cm_id->state = cm_state;
2471	cm_id->lap_state = lap_state;
2472	cm_id_priv->service_timeout = service_timeout;
2473	cm_set_private_data(cm_id_priv, data, private_data_len);
2474	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2475	return 0;
2476
2477error1:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2478	kfree(data);
2479	return ret;
2480
2481error2:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2482	kfree(data);
2483	cm_free_msg(msg);
2484	return ret;
2485}
2486EXPORT_SYMBOL(ib_send_cm_mra);
2487
2488static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
2489{
2490	switch (cm_mra_get_msg_mraed(mra_msg)) {
2491	case CM_MSG_RESPONSE_REQ:
2492		return cm_acquire_id(mra_msg->remote_comm_id, 0);
2493	case CM_MSG_RESPONSE_REP:
2494	case CM_MSG_RESPONSE_OTHER:
2495		return cm_acquire_id(mra_msg->remote_comm_id,
2496				     mra_msg->local_comm_id);
2497	default:
2498		return NULL;
2499	}
2500}
2501
2502static int cm_mra_handler(struct cm_work *work)
2503{
2504	struct cm_id_private *cm_id_priv;
2505	struct cm_mra_msg *mra_msg;
2506	int timeout, ret;
2507
2508	mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
2509	cm_id_priv = cm_acquire_mraed_id(mra_msg);
2510	if (!cm_id_priv)
2511		return -EINVAL;
2512
2513	work->cm_event.private_data = &mra_msg->private_data;
2514	work->cm_event.param.mra_rcvd.service_timeout =
2515					cm_mra_get_service_timeout(mra_msg);
2516	timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
2517		  cm_convert_to_ms(cm_id_priv->av.timeout);
2518       if (timeout > cm_convert_to_ms(max_timeout)) {
2519               printk(KERN_WARNING PFX "calculated mra timeout %d > %d, "
2520                      "decreasing used timeout_ms\n", timeout,
2521                      cm_convert_to_ms(max_timeout));
2522               timeout = cm_convert_to_ms(max_timeout);
2523       }
2524
2525	spin_lock_irq(&cm_id_priv->lock);
2526	switch (cm_id_priv->id.state) {
2527	case IB_CM_REQ_SENT:
2528		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
2529		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
2530				  cm_id_priv->msg, timeout))
2531			goto out;
2532		cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
2533		break;
2534	case IB_CM_REP_SENT:
2535		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
2536		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
2537				  cm_id_priv->msg, timeout))
2538			goto out;
2539		cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
2540		break;
2541	case IB_CM_ESTABLISHED:
2542		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
2543		    cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
2544		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
2545				  cm_id_priv->msg, timeout)) {
2546			if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2547				atomic_long_inc(&work->port->
2548						counter_group[CM_RECV_DUPLICATES].
2549						counter[CM_MRA_COUNTER]);
2550			goto out;
2551		}
2552		cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
2553		break;
2554	case IB_CM_MRA_REQ_RCVD:
2555	case IB_CM_MRA_REP_RCVD:
2556		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2557				counter[CM_MRA_COUNTER]);
2558		/* fall through */
2559	default:
2560		goto out;
2561	}
2562
2563	cm_id_priv->msg->context[1] = (void *) (unsigned long)
2564				      cm_id_priv->id.state;
2565	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2566	if (!ret)
2567		list_add_tail(&work->list, &cm_id_priv->work_list);
2568	spin_unlock_irq(&cm_id_priv->lock);
2569
2570	if (ret)
2571		cm_process_work(cm_id_priv, work);
2572	else
2573		cm_deref_id(cm_id_priv);
2574	return 0;
2575out:
2576	spin_unlock_irq(&cm_id_priv->lock);
2577	cm_deref_id(cm_id_priv);
2578	return -EINVAL;
2579}
2580
2581static void cm_format_lap(struct cm_lap_msg *lap_msg,
2582			  struct cm_id_private *cm_id_priv,
2583			  struct ib_sa_path_rec *alternate_path,
2584			  const void *private_data,
2585			  u8 private_data_len)
2586{
2587	cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
2588			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP));
2589	lap_msg->local_comm_id = cm_id_priv->id.local_id;
2590	lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
2591	cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
2592	/* todo: need remote CM response timeout */
2593	cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
2594	lap_msg->alt_local_lid = alternate_path->slid;
2595	lap_msg->alt_remote_lid = alternate_path->dlid;
2596	lap_msg->alt_local_gid = alternate_path->sgid;
2597	lap_msg->alt_remote_gid = alternate_path->dgid;
2598	cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
2599	cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
2600	lap_msg->alt_hop_limit = alternate_path->hop_limit;
2601	cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
2602	cm_lap_set_sl(lap_msg, alternate_path->sl);
2603	cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
2604	cm_lap_set_local_ack_timeout(lap_msg,
2605		cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
2606			       alternate_path->packet_life_time));
2607
2608	if (private_data && private_data_len)
2609		memcpy(lap_msg->private_data, private_data, private_data_len);
2610}
2611
2612int ib_send_cm_lap(struct ib_cm_id *cm_id,
2613		   struct ib_sa_path_rec *alternate_path,
2614		   const void *private_data,
2615		   u8 private_data_len)
2616{
2617	struct cm_id_private *cm_id_priv;
2618	struct ib_mad_send_buf *msg;
2619	unsigned long flags;
2620	int ret;
2621
2622	if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
2623		return -EINVAL;
2624
2625	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2626	spin_lock_irqsave(&cm_id_priv->lock, flags);
2627	if (cm_id->state != IB_CM_ESTABLISHED ||
2628	    (cm_id->lap_state != IB_CM_LAP_UNINIT &&
2629	     cm_id->lap_state != IB_CM_LAP_IDLE)) {
2630		ret = -EINVAL;
2631		goto out;
2632	}
2633
2634	ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
2635	if (ret)
2636		goto out;
2637	cm_id_priv->alt_av.timeout =
2638			cm_ack_timeout(cm_id_priv->target_ack_delay,
2639				       cm_id_priv->alt_av.timeout - 1);
2640
2641	ret = cm_alloc_msg(cm_id_priv, &msg);
2642	if (ret)
2643		goto out;
2644
2645	cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
2646		      alternate_path, private_data, private_data_len);
2647	msg->timeout_ms = cm_id_priv->timeout_ms;
2648	msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
2649
2650	ret = ib_post_send_mad(msg, NULL);
2651	if (ret) {
2652		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2653		cm_free_msg(msg);
2654		return ret;
2655	}
2656
2657	cm_id->lap_state = IB_CM_LAP_SENT;
2658	cm_id_priv->msg = msg;
2659
2660out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2661	return ret;
2662}
2663EXPORT_SYMBOL(ib_send_cm_lap);
2664
2665static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
2666				    struct ib_sa_path_rec *path,
2667				    struct cm_lap_msg *lap_msg)
2668{
2669	memset(path, 0, sizeof *path);
2670	path->dgid = lap_msg->alt_local_gid;
2671	path->sgid = lap_msg->alt_remote_gid;
2672	path->dlid = lap_msg->alt_local_lid;
2673	path->slid = lap_msg->alt_remote_lid;
2674	path->flow_label = cm_lap_get_flow_label(lap_msg);
2675	path->hop_limit = lap_msg->alt_hop_limit;
2676	path->traffic_class = cm_lap_get_traffic_class(lap_msg);
2677	path->reversible = 1;
2678	path->pkey = cm_id_priv->pkey;
2679	path->sl = cm_lap_get_sl(lap_msg);
2680	path->mtu_selector = IB_SA_EQ;
2681	path->mtu = cm_id_priv->path_mtu;
2682	path->rate_selector = IB_SA_EQ;
2683	path->rate = cm_lap_get_packet_rate(lap_msg);
2684	path->packet_life_time_selector = IB_SA_EQ;
2685	path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
2686	path->packet_life_time -= (path->packet_life_time > 0);
2687}
2688
2689static int cm_lap_handler(struct cm_work *work)
2690{
2691	struct cm_id_private *cm_id_priv;
2692	struct cm_lap_msg *lap_msg;
2693	struct ib_cm_lap_event_param *param;
2694	struct ib_mad_send_buf *msg = NULL;
2695	int ret;
2696
2697	/* todo: verify LAP request and send reject APR if invalid. */
2698	lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
2699	cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
2700				   lap_msg->local_comm_id);
2701	if (!cm_id_priv)
2702		return -EINVAL;
2703
2704	param = &work->cm_event.param.lap_rcvd;
2705	param->alternate_path = &work->path[0];
2706	cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
2707	work->cm_event.private_data = &lap_msg->private_data;
2708
2709	spin_lock_irq(&cm_id_priv->lock);
2710	if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
2711		goto unlock;
2712
2713	switch (cm_id_priv->id.lap_state) {
2714	case IB_CM_LAP_UNINIT:
2715	case IB_CM_LAP_IDLE:
2716		break;
2717	case IB_CM_MRA_LAP_SENT:
2718		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2719				counter[CM_LAP_COUNTER]);
2720		if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
2721			goto unlock;
2722
2723		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2724			      CM_MSG_RESPONSE_OTHER,
2725			      cm_id_priv->service_timeout,
2726			      cm_id_priv->private_data,
2727			      cm_id_priv->private_data_len);
2728		spin_unlock_irq(&cm_id_priv->lock);
2729
2730		if (ib_post_send_mad(msg, NULL))
2731			cm_free_msg(msg);
2732		goto deref;
2733	case IB_CM_LAP_RCVD:
2734		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2735				counter[CM_LAP_COUNTER]);
2736		goto unlock;
2737	default:
2738		goto unlock;
2739	}
2740
2741	cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
2742	cm_id_priv->tid = lap_msg->hdr.tid;
2743	cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
2744				work->mad_recv_wc->recv_buf.grh,
2745				&cm_id_priv->av);
2746	cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av);
2747	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2748	if (!ret)
2749		list_add_tail(&work->list, &cm_id_priv->work_list);
2750	spin_unlock_irq(&cm_id_priv->lock);
2751
2752	if (ret)
2753		cm_process_work(cm_id_priv, work);
2754	else
2755		cm_deref_id(cm_id_priv);
2756	return 0;
2757
2758unlock:	spin_unlock_irq(&cm_id_priv->lock);
2759deref:	cm_deref_id(cm_id_priv);
2760	return -EINVAL;
2761}
2762
2763static void cm_format_apr(struct cm_apr_msg *apr_msg,
2764			  struct cm_id_private *cm_id_priv,
2765			  enum ib_cm_apr_status status,
2766			  void *info,
2767			  u8 info_length,
2768			  const void *private_data,
2769			  u8 private_data_len)
2770{
2771	cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
2772	apr_msg->local_comm_id = cm_id_priv->id.local_id;
2773	apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
2774	apr_msg->ap_status = (u8) status;
2775
2776	if (info && info_length) {
2777		apr_msg->info_length = info_length;
2778		memcpy(apr_msg->info, info, info_length);
2779	}
2780
2781	if (private_data && private_data_len)
2782		memcpy(apr_msg->private_data, private_data, private_data_len);
2783}
2784
2785int ib_send_cm_apr(struct ib_cm_id *cm_id,
2786		   enum ib_cm_apr_status status,
2787		   void *info,
2788		   u8 info_length,
2789		   const void *private_data,
2790		   u8 private_data_len)
2791{
2792	struct cm_id_private *cm_id_priv;
2793	struct ib_mad_send_buf *msg;
2794	unsigned long flags;
2795	int ret;
2796
2797	if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
2798	    (info && info_length > IB_CM_APR_INFO_LENGTH))
2799		return -EINVAL;
2800
2801	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2802	spin_lock_irqsave(&cm_id_priv->lock, flags);
2803	if (cm_id->state != IB_CM_ESTABLISHED ||
2804	    (cm_id->lap_state != IB_CM_LAP_RCVD &&
2805	     cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
2806		ret = -EINVAL;
2807		goto out;
2808	}
2809
2810	ret = cm_alloc_msg(cm_id_priv, &msg);
2811	if (ret)
2812		goto out;
2813
2814	cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
2815		      info, info_length, private_data, private_data_len);
2816	ret = ib_post_send_mad(msg, NULL);
2817	if (ret) {
2818		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2819		cm_free_msg(msg);
2820		return ret;
2821	}
2822
2823	cm_id->lap_state = IB_CM_LAP_IDLE;
2824out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2825	return ret;
2826}
2827EXPORT_SYMBOL(ib_send_cm_apr);
2828
2829static int cm_apr_handler(struct cm_work *work)
2830{
2831	struct cm_id_private *cm_id_priv;
2832	struct cm_apr_msg *apr_msg;
2833	int ret;
2834
2835	apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
2836	cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
2837				   apr_msg->local_comm_id);
2838	if (!cm_id_priv)
2839		return -EINVAL; /* Unmatched reply. */
2840
2841	work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
2842	work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
2843	work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
2844	work->cm_event.private_data = &apr_msg->private_data;
2845
2846	spin_lock_irq(&cm_id_priv->lock);
2847	if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
2848	    (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
2849	     cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
2850		spin_unlock_irq(&cm_id_priv->lock);
2851		goto out;
2852	}
2853	cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
2854	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2855	cm_id_priv->msg = NULL;
2856
2857	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2858	if (!ret)
2859		list_add_tail(&work->list, &cm_id_priv->work_list);
2860	spin_unlock_irq(&cm_id_priv->lock);
2861
2862	if (ret)
2863		cm_process_work(cm_id_priv, work);
2864	else
2865		cm_deref_id(cm_id_priv);
2866	return 0;
2867out:
2868	cm_deref_id(cm_id_priv);
2869	return -EINVAL;
2870}
2871
2872static int cm_timewait_handler(struct cm_work *work)
2873{
2874	struct cm_timewait_info *timewait_info;
2875	struct cm_id_private *cm_id_priv;
2876	int ret;
2877
2878	timewait_info = (struct cm_timewait_info *)work;
2879	spin_lock_irq(&cm.lock);
2880	list_del(&timewait_info->list);
2881	spin_unlock_irq(&cm.lock);
2882
2883	cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
2884				   timewait_info->work.remote_id);
2885	if (!cm_id_priv)
2886		return -EINVAL;
2887
2888	spin_lock_irq(&cm_id_priv->lock);
2889	if (cm_id_priv->id.state != IB_CM_TIMEWAIT ||
2890	    cm_id_priv->remote_qpn != timewait_info->remote_qpn) {
2891		spin_unlock_irq(&cm_id_priv->lock);
2892		goto out;
2893	}
2894	cm_id_priv->id.state = IB_CM_IDLE;
2895	ret = atomic_inc_and_test(&cm_id_priv->work_count);
2896	if (!ret)
2897		list_add_tail(&work->list, &cm_id_priv->work_list);
2898	spin_unlock_irq(&cm_id_priv->lock);
2899
2900	if (ret)
2901		cm_process_work(cm_id_priv, work);
2902	else
2903		cm_deref_id(cm_id_priv);
2904	return 0;
2905out:
2906	cm_deref_id(cm_id_priv);
2907	return -EINVAL;
2908}
2909
2910static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
2911			       struct cm_id_private *cm_id_priv,
2912			       struct ib_cm_sidr_req_param *param)
2913{
2914	cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
2915			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
2916	sidr_req_msg->request_id = cm_id_priv->id.local_id;
2917	sidr_req_msg->pkey = param->path->pkey;
2918	sidr_req_msg->service_id = param->service_id;
2919
2920	if (param->private_data && param->private_data_len)
2921		memcpy(sidr_req_msg->private_data, param->private_data,
2922		       param->private_data_len);
2923}
2924
2925int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
2926			struct ib_cm_sidr_req_param *param)
2927{
2928	struct cm_id_private *cm_id_priv;
2929	struct ib_mad_send_buf *msg;
2930	unsigned long flags;
2931	int ret;
2932
2933	if (!param->path || (param->private_data &&
2934	     param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE))
2935		return -EINVAL;
2936
2937	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2938	ret = cm_init_av_by_path(param->path, &cm_id_priv->av);
2939	if (ret)
2940		goto out;
2941
2942	cm_id->service_id = param->service_id;
2943	cm_id->service_mask = ~cpu_to_be64(0);
2944	cm_id_priv->timeout_ms = param->timeout_ms;
2945       if (cm_id_priv->timeout_ms > cm_convert_to_ms(max_timeout)) {
2946               printk(KERN_WARNING PFX "sidr req timeout_ms %d > %d, "
2947                      "decreasing used timeout_ms\n", param->timeout_ms,
2948                      cm_convert_to_ms(max_timeout));
2949               cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout);
2950       }
2951	cm_id_priv->max_cm_retries = param->max_cm_retries;
2952	ret = cm_alloc_msg(cm_id_priv, &msg);
2953	if (ret)
2954		goto out;
2955
2956	cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
2957			   param);
2958	msg->timeout_ms = cm_id_priv->timeout_ms;
2959	msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
2960
2961	spin_lock_irqsave(&cm_id_priv->lock, flags);
2962	if (cm_id->state == IB_CM_IDLE)
2963		ret = ib_post_send_mad(msg, NULL);
2964	else
2965		ret = -EINVAL;
2966
2967	if (ret) {
2968		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2969		cm_free_msg(msg);
2970		goto out;
2971	}
2972	cm_id->state = IB_CM_SIDR_REQ_SENT;
2973	cm_id_priv->msg = msg;
2974	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2975out:
2976	return ret;
2977}
2978EXPORT_SYMBOL(ib_send_cm_sidr_req);
2979
2980static void cm_format_sidr_req_event(struct cm_work *work,
2981				     struct ib_cm_id *listen_id)
2982{
2983	struct cm_sidr_req_msg *sidr_req_msg;
2984	struct ib_cm_sidr_req_event_param *param;
2985
2986	sidr_req_msg = (struct cm_sidr_req_msg *)
2987				work->mad_recv_wc->recv_buf.mad;
2988	param = &work->cm_event.param.sidr_req_rcvd;
2989	param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
2990	param->listen_id = listen_id;
2991	param->port = work->port->port_num;
2992	work->cm_event.private_data = &sidr_req_msg->private_data;
2993}
2994
2995static int cm_sidr_req_handler(struct cm_work *work)
2996{
2997	struct ib_cm_id *cm_id;
2998	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
2999	struct cm_sidr_req_msg *sidr_req_msg;
3000	struct ib_wc *wc;
3001
3002	cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
3003	if (IS_ERR(cm_id))
3004		return PTR_ERR(cm_id);
3005	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3006
3007	/* Record SGID/SLID and request ID for lookup. */
3008	sidr_req_msg = (struct cm_sidr_req_msg *)
3009				work->mad_recv_wc->recv_buf.mad;
3010	wc = work->mad_recv_wc->wc;
3011	cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
3012	cm_id_priv->av.dgid.global.interface_id = 0;
3013	cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
3014				work->mad_recv_wc->recv_buf.grh,
3015				&cm_id_priv->av);
3016	cm_id_priv->id.remote_id = sidr_req_msg->request_id;
3017	cm_id_priv->tid = sidr_req_msg->hdr.tid;
3018	atomic_inc(&cm_id_priv->work_count);
3019
3020	spin_lock_irq(&cm.lock);
3021	cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
3022	if (cur_cm_id_priv) {
3023		spin_unlock_irq(&cm.lock);
3024		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3025				counter[CM_SIDR_REQ_COUNTER]);
3026		goto out; /* Duplicate message. */
3027	}
3028	cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
3029	cur_cm_id_priv = cm_find_listen(cm_id->device,
3030					sidr_req_msg->service_id,
3031					sidr_req_msg->private_data);
3032	if (!cur_cm_id_priv) {
3033		spin_unlock_irq(&cm.lock);
3034		cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
3035		goto out; /* No match. */
3036	}
3037	atomic_inc(&cur_cm_id_priv->refcount);
3038	spin_unlock_irq(&cm.lock);
3039
3040	cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
3041	cm_id_priv->id.context = cur_cm_id_priv->id.context;
3042	cm_id_priv->id.service_id = sidr_req_msg->service_id;
3043	cm_id_priv->id.service_mask = ~cpu_to_be64(0);
3044
3045	cm_format_sidr_req_event(work, &cur_cm_id_priv->id);
3046	cm_process_work(cm_id_priv, work);
3047	cm_deref_id(cur_cm_id_priv);
3048	return 0;
3049out:
3050	ib_destroy_cm_id(&cm_id_priv->id);
3051	return -EINVAL;
3052}
3053
3054static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
3055			       struct cm_id_private *cm_id_priv,
3056			       struct ib_cm_sidr_rep_param *param)
3057{
3058	cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
3059			  cm_id_priv->tid);
3060	sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
3061	sidr_rep_msg->status = param->status;
3062	cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
3063	sidr_rep_msg->service_id = cm_id_priv->id.service_id;
3064	sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
3065
3066	if (param->info && param->info_length)
3067		memcpy(sidr_rep_msg->info, param->info, param->info_length);
3068
3069	if (param->private_data && param->private_data_len)
3070		memcpy(sidr_rep_msg->private_data, param->private_data,
3071		       param->private_data_len);
3072}
3073
3074int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
3075			struct ib_cm_sidr_rep_param *param)
3076{
3077	struct cm_id_private *cm_id_priv;
3078	struct ib_mad_send_buf *msg;
3079	unsigned long flags;
3080	int ret;
3081
3082	if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
3083	    (param->private_data &&
3084	     param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
3085		return -EINVAL;
3086
3087	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3088	spin_lock_irqsave(&cm_id_priv->lock, flags);
3089	if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
3090		ret = -EINVAL;
3091		goto error;
3092	}
3093
3094	ret = cm_alloc_msg(cm_id_priv, &msg);
3095	if (ret)
3096		goto error;
3097
3098	cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
3099			   param);
3100	ret = ib_post_send_mad(msg, NULL);
3101	if (ret) {
3102		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3103		cm_free_msg(msg);
3104		return ret;
3105	}
3106	cm_id->state = IB_CM_IDLE;
3107	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3108
3109	spin_lock_irqsave(&cm.lock, flags);
3110	rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
3111	spin_unlock_irqrestore(&cm.lock, flags);
3112	return 0;
3113
3114error:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3115	return ret;
3116}
3117EXPORT_SYMBOL(ib_send_cm_sidr_rep);
3118
3119static void cm_format_sidr_rep_event(struct cm_work *work)
3120{
3121	struct cm_sidr_rep_msg *sidr_rep_msg;
3122	struct ib_cm_sidr_rep_event_param *param;
3123
3124	sidr_rep_msg = (struct cm_sidr_rep_msg *)
3125				work->mad_recv_wc->recv_buf.mad;
3126	param = &work->cm_event.param.sidr_rep_rcvd;
3127	param->status = sidr_rep_msg->status;
3128	param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
3129	param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
3130	param->info = &sidr_rep_msg->info;
3131	param->info_len = sidr_rep_msg->info_length;
3132	work->cm_event.private_data = &sidr_rep_msg->private_data;
3133}
3134
3135static int cm_sidr_rep_handler(struct cm_work *work)
3136{
3137	struct cm_sidr_rep_msg *sidr_rep_msg;
3138	struct cm_id_private *cm_id_priv;
3139
3140	sidr_rep_msg = (struct cm_sidr_rep_msg *)
3141				work->mad_recv_wc->recv_buf.mad;
3142	cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
3143	if (!cm_id_priv)
3144		return -EINVAL; /* Unmatched reply. */
3145
3146	spin_lock_irq(&cm_id_priv->lock);
3147	if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
3148		spin_unlock_irq(&cm_id_priv->lock);
3149		goto out;
3150	}
3151	cm_id_priv->id.state = IB_CM_IDLE;
3152	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3153	spin_unlock_irq(&cm_id_priv->lock);
3154
3155	cm_format_sidr_rep_event(work);
3156	cm_process_work(cm_id_priv, work);
3157	return 0;
3158out:
3159	cm_deref_id(cm_id_priv);
3160	return -EINVAL;
3161}
3162
3163static void cm_process_send_error(struct ib_mad_send_buf *msg,
3164				  enum ib_wc_status wc_status)
3165{
3166	struct cm_id_private *cm_id_priv;
3167	struct ib_cm_event cm_event;
3168	enum ib_cm_state state;
3169	int ret;
3170
3171	memset(&cm_event, 0, sizeof cm_event);
3172	cm_id_priv = msg->context[0];
3173
3174	/* Discard old sends or ones without a response. */
3175	spin_lock_irq(&cm_id_priv->lock);
3176	state = (enum ib_cm_state) (unsigned long) msg->context[1];
3177	if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
3178		goto discard;
3179
3180	switch (state) {
3181	case IB_CM_REQ_SENT:
3182	case IB_CM_MRA_REQ_RCVD:
3183		cm_reset_to_idle(cm_id_priv);
3184		cm_event.event = IB_CM_REQ_ERROR;
3185		break;
3186	case IB_CM_REP_SENT:
3187	case IB_CM_MRA_REP_RCVD:
3188		cm_reset_to_idle(cm_id_priv);
3189		cm_event.event = IB_CM_REP_ERROR;
3190		break;
3191	case IB_CM_DREQ_SENT:
3192		cm_enter_timewait(cm_id_priv);
3193		cm_event.event = IB_CM_DREQ_ERROR;
3194		break;
3195	case IB_CM_SIDR_REQ_SENT:
3196		cm_id_priv->id.state = IB_CM_IDLE;
3197		cm_event.event = IB_CM_SIDR_REQ_ERROR;
3198		break;
3199	default:
3200		goto discard;
3201	}
3202	spin_unlock_irq(&cm_id_priv->lock);
3203	cm_event.param.send_status = wc_status;
3204
3205	/* No other events can occur on the cm_id at this point. */
3206	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
3207	cm_free_msg(msg);
3208	if (ret)
3209		ib_destroy_cm_id(&cm_id_priv->id);
3210	return;
3211discard:
3212	spin_unlock_irq(&cm_id_priv->lock);
3213	cm_free_msg(msg);
3214}
3215
3216static void cm_send_handler(struct ib_mad_agent *mad_agent,
3217			    struct ib_mad_send_wc *mad_send_wc)
3218{
3219	struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
3220	struct cm_port *port;
3221	u16 attr_index;
3222
3223	port = mad_agent->context;
3224	attr_index = be16_to_cpu(((struct ib_mad_hdr *)
3225				  msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
3226
3227	/*
3228	 * If the send was in response to a received message (context[0] is not
3229	 * set to a cm_id), and is not a REJ, then it is a send that was
3230	 * manually retried.
3231	 */
3232	if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
3233		msg->retries = 1;
3234
3235	atomic_long_add(1 + msg->retries,
3236			&port->counter_group[CM_XMIT].counter[attr_index]);
3237	if (msg->retries)
3238		atomic_long_add(msg->retries,
3239				&port->counter_group[CM_XMIT_RETRIES].
3240				counter[attr_index]);
3241
3242	switch (mad_send_wc->status) {
3243	case IB_WC_SUCCESS:
3244	case IB_WC_WR_FLUSH_ERR:
3245		cm_free_msg(msg);
3246		break;
3247	default:
3248		if (msg->context[0] && msg->context[1])
3249			cm_process_send_error(msg, mad_send_wc->status);
3250		else
3251			cm_free_msg(msg);
3252		break;
3253	}
3254}
3255
3256static void cm_work_handler(struct work_struct *_work)
3257{
3258	struct cm_work *work = container_of(_work, struct cm_work, work.work);
3259	int ret;
3260
3261	switch (work->cm_event.event) {
3262	case IB_CM_REQ_RECEIVED:
3263		ret = cm_req_handler(work);
3264		break;
3265	case IB_CM_MRA_RECEIVED:
3266		ret = cm_mra_handler(work);
3267		break;
3268	case IB_CM_REJ_RECEIVED:
3269		ret = cm_rej_handler(work);
3270		break;
3271	case IB_CM_REP_RECEIVED:
3272		ret = cm_rep_handler(work);
3273		break;
3274	case IB_CM_RTU_RECEIVED:
3275		ret = cm_rtu_handler(work);
3276		break;
3277	case IB_CM_USER_ESTABLISHED:
3278		ret = cm_establish_handler(work);
3279		break;
3280	case IB_CM_DREQ_RECEIVED:
3281		ret = cm_dreq_handler(work);
3282		break;
3283	case IB_CM_DREP_RECEIVED:
3284		ret = cm_drep_handler(work);
3285		break;
3286	case IB_CM_SIDR_REQ_RECEIVED:
3287		ret = cm_sidr_req_handler(work);
3288		break;
3289	case IB_CM_SIDR_REP_RECEIVED:
3290		ret = cm_sidr_rep_handler(work);
3291		break;
3292	case IB_CM_LAP_RECEIVED:
3293		ret = cm_lap_handler(work);
3294		break;
3295	case IB_CM_APR_RECEIVED:
3296		ret = cm_apr_handler(work);
3297		break;
3298	case IB_CM_TIMEWAIT_EXIT:
3299		ret = cm_timewait_handler(work);
3300		break;
3301	default:
3302		ret = -EINVAL;
3303		break;
3304	}
3305	if (ret)
3306		cm_free_work(work);
3307}
3308
3309static int cm_establish(struct ib_cm_id *cm_id)
3310{
3311	struct cm_id_private *cm_id_priv;
3312	struct cm_work *work;
3313	unsigned long flags;
3314	int ret = 0;
3315
3316	work = kmalloc(sizeof *work, GFP_ATOMIC);
3317	if (!work)
3318		return -ENOMEM;
3319
3320	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3321	spin_lock_irqsave(&cm_id_priv->lock, flags);
3322	switch (cm_id->state)
3323	{
3324	case IB_CM_REP_SENT:
3325	case IB_CM_MRA_REP_RCVD:
3326		cm_id->state = IB_CM_ESTABLISHED;
3327		break;
3328	case IB_CM_ESTABLISHED:
3329		ret = -EISCONN;
3330		break;
3331	default:
3332		ret = -EINVAL;
3333		break;
3334	}
3335	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3336
3337	if (ret) {
3338		kfree(work);
3339		goto out;
3340	}
3341
3342	/*
3343	 * The CM worker thread may try to destroy the cm_id before it
3344	 * can execute this work item.  To prevent potential deadlock,
3345	 * we need to find the cm_id once we're in the context of the
3346	 * worker thread, rather than holding a reference on it.
3347	 */
3348	INIT_DELAYED_WORK(&work->work, cm_work_handler);
3349	work->local_id = cm_id->local_id;
3350	work->remote_id = cm_id->remote_id;
3351	work->mad_recv_wc = NULL;
3352	work->cm_event.event = IB_CM_USER_ESTABLISHED;
3353	queue_delayed_work(cm.wq, &work->work, 0);
3354out:
3355	return ret;
3356}
3357
3358static int cm_migrate(struct ib_cm_id *cm_id)
3359{
3360	struct cm_id_private *cm_id_priv;
3361	unsigned long flags;
3362	int ret = 0;
3363
3364	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3365	spin_lock_irqsave(&cm_id_priv->lock, flags);
3366	if (cm_id->state == IB_CM_ESTABLISHED &&
3367	    (cm_id->lap_state == IB_CM_LAP_UNINIT ||
3368	     cm_id->lap_state == IB_CM_LAP_IDLE)) {
3369		cm_id->lap_state = IB_CM_LAP_IDLE;
3370		cm_id_priv->av = cm_id_priv->alt_av;
3371	} else
3372		ret = -EINVAL;
3373	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3374
3375	return ret;
3376}
3377
3378int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
3379{
3380	int ret;
3381
3382	switch (event) {
3383	case IB_EVENT_COMM_EST:
3384		ret = cm_establish(cm_id);
3385		break;
3386	case IB_EVENT_PATH_MIG:
3387		ret = cm_migrate(cm_id);
3388		break;
3389	default:
3390		ret = -EINVAL;
3391	}
3392	return ret;
3393}
3394EXPORT_SYMBOL(ib_cm_notify);
3395
3396static void cm_recv_handler(struct ib_mad_agent *mad_agent,
3397			    struct ib_mad_recv_wc *mad_recv_wc)
3398{
3399	struct cm_port *port = mad_agent->context;
3400	struct cm_work *work;
3401	enum ib_cm_event_type event;
3402	u16 attr_id;
3403	int paths = 0;
3404
3405	switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
3406	case CM_REQ_ATTR_ID:
3407		paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)->
3408						    alt_local_lid != 0);
3409		event = IB_CM_REQ_RECEIVED;
3410		break;
3411	case CM_MRA_ATTR_ID:
3412		event = IB_CM_MRA_RECEIVED;
3413		break;
3414	case CM_REJ_ATTR_ID:
3415		event = IB_CM_REJ_RECEIVED;
3416		break;
3417	case CM_REP_ATTR_ID:
3418		event = IB_CM_REP_RECEIVED;
3419		break;
3420	case CM_RTU_ATTR_ID:
3421		event = IB_CM_RTU_RECEIVED;
3422		break;
3423	case CM_DREQ_ATTR_ID:
3424		event = IB_CM_DREQ_RECEIVED;
3425		break;
3426	case CM_DREP_ATTR_ID:
3427		event = IB_CM_DREP_RECEIVED;
3428		break;
3429	case CM_SIDR_REQ_ATTR_ID:
3430		event = IB_CM_SIDR_REQ_RECEIVED;
3431		break;
3432	case CM_SIDR_REP_ATTR_ID:
3433		event = IB_CM_SIDR_REP_RECEIVED;
3434		break;
3435	case CM_LAP_ATTR_ID:
3436		paths = 1;
3437		event = IB_CM_LAP_RECEIVED;
3438		break;
3439	case CM_APR_ATTR_ID:
3440		event = IB_CM_APR_RECEIVED;
3441		break;
3442	default:
3443		ib_free_recv_mad(mad_recv_wc);
3444		return;
3445	}
3446
3447	attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
3448	atomic_long_inc(&port->counter_group[CM_RECV].
3449			counter[attr_id - CM_ATTR_ID_OFFSET]);
3450
3451	work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
3452		       GFP_KERNEL);
3453	if (!work) {
3454		ib_free_recv_mad(mad_recv_wc);
3455		return;
3456	}
3457
3458	INIT_DELAYED_WORK(&work->work, cm_work_handler);
3459	work->cm_event.event = event;
3460	work->mad_recv_wc = mad_recv_wc;
3461	work->port = port;
3462	queue_delayed_work(cm.wq, &work->work, 0);
3463}
3464
3465static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
3466				struct ib_qp_attr *qp_attr,
3467				int *qp_attr_mask)
3468{
3469	unsigned long flags;
3470	int ret;
3471
3472	spin_lock_irqsave(&cm_id_priv->lock, flags);
3473	switch (cm_id_priv->id.state) {
3474	case IB_CM_REQ_SENT:
3475	case IB_CM_MRA_REQ_RCVD:
3476	case IB_CM_REQ_RCVD:
3477	case IB_CM_MRA_REQ_SENT:
3478	case IB_CM_REP_RCVD:
3479	case IB_CM_MRA_REP_SENT:
3480	case IB_CM_REP_SENT:
3481	case IB_CM_MRA_REP_RCVD:
3482	case IB_CM_ESTABLISHED:
3483		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
3484				IB_QP_PKEY_INDEX | IB_QP_PORT;
3485		qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
3486		if (cm_id_priv->responder_resources)
3487			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
3488						    IB_ACCESS_REMOTE_ATOMIC;
3489		qp_attr->pkey_index = cm_id_priv->av.pkey_index;
3490		qp_attr->port_num = cm_id_priv->av.port->port_num;
3491		ret = 0;
3492		break;
3493	default:
3494		ret = -EINVAL;
3495		break;
3496	}
3497	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3498	return ret;
3499}
3500
3501static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
3502			       struct ib_qp_attr *qp_attr,
3503			       int *qp_attr_mask)
3504{
3505	unsigned long flags;
3506	int ret;
3507
3508	spin_lock_irqsave(&cm_id_priv->lock, flags);
3509	switch (cm_id_priv->id.state) {
3510	case IB_CM_REQ_RCVD:
3511	case IB_CM_MRA_REQ_SENT:
3512	case IB_CM_REP_RCVD:
3513	case IB_CM_MRA_REP_SENT:
3514	case IB_CM_REP_SENT:
3515	case IB_CM_MRA_REP_RCVD:
3516	case IB_CM_ESTABLISHED:
3517		*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
3518				IB_QP_DEST_QPN | IB_QP_RQ_PSN;
3519		qp_attr->ah_attr = cm_id_priv->av.ah_attr;
3520		qp_attr->path_mtu = cm_id_priv->path_mtu;
3521		qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
3522		qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
3523		if (cm_id_priv->qp_type == IB_QPT_RC) {
3524			*qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
3525					 IB_QP_MIN_RNR_TIMER;
3526			qp_attr->max_dest_rd_atomic =
3527					cm_id_priv->responder_resources;
3528			qp_attr->min_rnr_timer = 0;
3529		}
3530		if (cm_id_priv->alt_av.ah_attr.dlid) {
3531			*qp_attr_mask |= IB_QP_ALT_PATH;
3532			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3533			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3534			qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3535			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3536		}
3537		ret = 0;
3538		break;
3539	default:
3540		ret = -EINVAL;
3541		break;
3542	}
3543	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3544	return ret;
3545}
3546
3547static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
3548			       struct ib_qp_attr *qp_attr,
3549			       int *qp_attr_mask)
3550{
3551	unsigned long flags;
3552	int ret;
3553
3554	spin_lock_irqsave(&cm_id_priv->lock, flags);
3555	switch (cm_id_priv->id.state) {
3556	/* Allow transition to RTS before sending REP */
3557	case IB_CM_REQ_RCVD:
3558	case IB_CM_MRA_REQ_SENT:
3559
3560	case IB_CM_REP_RCVD:
3561	case IB_CM_MRA_REP_SENT:
3562	case IB_CM_REP_SENT:
3563	case IB_CM_MRA_REP_RCVD:
3564	case IB_CM_ESTABLISHED:
3565		if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
3566			*qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
3567			qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
3568			if (cm_id_priv->qp_type == IB_QPT_RC) {
3569				*qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
3570						 IB_QP_RNR_RETRY |
3571						 IB_QP_MAX_QP_RD_ATOMIC;
3572				qp_attr->timeout = cm_id_priv->av.timeout;
3573				qp_attr->retry_cnt = cm_id_priv->retry_count;
3574				qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
3575				qp_attr->max_rd_atomic =
3576					cm_id_priv->initiator_depth;
3577			}
3578			if (cm_id_priv->alt_av.ah_attr.dlid) {
3579				*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
3580				qp_attr->path_mig_state = IB_MIG_REARM;
3581			}
3582		} else {
3583			*qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
3584			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3585			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3586			qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3587			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3588			qp_attr->path_mig_state = IB_MIG_REARM;
3589		}
3590		ret = 0;
3591		break;
3592	default:
3593		ret = -EINVAL;
3594		break;
3595	}
3596	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3597	return ret;
3598}
3599
3600int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
3601		       struct ib_qp_attr *qp_attr,
3602		       int *qp_attr_mask)
3603{
3604	struct cm_id_private *cm_id_priv;
3605	int ret;
3606
3607	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3608	switch (qp_attr->qp_state) {
3609	case IB_QPS_INIT:
3610		ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask);
3611		break;
3612	case IB_QPS_RTR:
3613		ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask);
3614		break;
3615	case IB_QPS_RTS:
3616		ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
3617		break;
3618	default:
3619		ret = -EINVAL;
3620		break;
3621	}
3622	return ret;
3623}
3624EXPORT_SYMBOL(ib_cm_init_qp_attr);
3625
3626static void cm_get_ack_delay(struct cm_device *cm_dev)
3627{
3628	struct ib_device_attr attr;
3629
3630	if (ib_query_device(cm_dev->ib_device, &attr))
3631		cm_dev->ack_delay = 0; /* acks will rely on packet life time */
3632	else
3633		cm_dev->ack_delay = attr.local_ca_ack_delay;
3634}
3635
3636static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
3637			       char *buf)
3638{
3639	struct cm_counter_group *group;
3640	struct cm_counter_attribute *cm_attr;
3641
3642	group = container_of(obj, struct cm_counter_group, obj);
3643	cm_attr = container_of(attr, struct cm_counter_attribute, attr);
3644
3645	return sprintf(buf, "%ld\n",
3646		       atomic_long_read(&group->counter[cm_attr->index]));
3647}
3648
3649static struct sysfs_ops cm_counter_ops = {
3650	.show = cm_show_counter
3651};
3652
3653static struct kobj_type cm_counter_obj_type = {
3654	.sysfs_ops = &cm_counter_ops,
3655	.default_attrs = cm_counter_default_attrs
3656};
3657
3658static void cm_release_port_obj(struct kobject *obj)
3659{
3660	struct cm_port *cm_port;
3661
3662	cm_port = container_of(obj, struct cm_port, port_obj);
3663	kfree(cm_port);
3664}
3665
3666static struct kobj_type cm_port_obj_type = {
3667	.release = cm_release_port_obj
3668};
3669
3670struct class cm_class = {
3671	.name    = "infiniband_cm",
3672};
3673EXPORT_SYMBOL(cm_class);
3674
3675static int cm_create_port_fs(struct cm_port *port)
3676{
3677	int i, ret;
3678
3679	ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type,
3680				   &port->cm_dev->device->kobj,
3681				   "%d", port->port_num);
3682	if (ret) {
3683		kfree(port);
3684		return ret;
3685	}
3686
3687	for (i = 0; i < CM_COUNTER_GROUPS; i++) {
3688		ret = kobject_init_and_add(&port->counter_group[i].obj,
3689					   &cm_counter_obj_type,
3690					   &port->port_obj,
3691					   "%s", counter_group_names[i]);
3692		if (ret)
3693			goto error;
3694	}
3695
3696	return 0;
3697
3698error:
3699	while (i--)
3700		kobject_put(&port->counter_group[i].obj);
3701	kobject_put(&port->port_obj);
3702	return ret;
3703
3704}
3705
3706static void cm_remove_port_fs(struct cm_port *port)
3707{
3708	int i;
3709
3710	for (i = 0; i < CM_COUNTER_GROUPS; i++)
3711		kobject_put(&port->counter_group[i].obj);
3712
3713	kobject_put(&port->port_obj);
3714}
3715
3716static void cm_add_one(struct ib_device *ib_device)
3717{
3718	struct cm_device *cm_dev;
3719	struct cm_port *port;
3720	struct ib_mad_reg_req reg_req = {
3721		.mgmt_class = IB_MGMT_CLASS_CM,
3722		.mgmt_class_version = IB_CM_CLASS_VERSION
3723	};
3724	struct ib_port_modify port_modify = {
3725		.set_port_cap_mask = IB_PORT_CM_SUP
3726	};
3727	unsigned long flags;
3728	int ret;
3729	u8 i;
3730
3731	if (rdma_node_get_transport(ib_device->node_type) != RDMA_TRANSPORT_IB)
3732		return;
3733
3734	cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) *
3735			 ib_device->phys_port_cnt, GFP_KERNEL);
3736	if (!cm_dev)
3737		return;
3738
3739	cm_dev->ib_device = ib_device;
3740	cm_get_ack_delay(cm_dev);
3741
3742	cm_dev->device = device_create(&cm_class, &ib_device->dev,
3743				       MKDEV(0, 0), NULL,
3744				       "%s", ib_device->name);
3745	if (!cm_dev->device) {
3746		kfree(cm_dev);
3747		return;
3748	}
3749
3750	set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
3751	for (i = 1; i <= ib_device->phys_port_cnt; i++) {
3752		port = kzalloc(sizeof *port, GFP_KERNEL);
3753		if (!port)
3754			goto error1;
3755
3756		cm_dev->port[i-1] = port;
3757		port->cm_dev = cm_dev;
3758		port->port_num = i;
3759
3760		ret = cm_create_port_fs(port);
3761		if (ret)
3762			goto error1;
3763
3764		port->mad_agent = ib_register_mad_agent(ib_device, i,
3765							IB_QPT_GSI,
3766							&reg_req,
3767							0,
3768							cm_send_handler,
3769							cm_recv_handler,
3770							port);
3771		if (IS_ERR(port->mad_agent))
3772			goto error2;
3773
3774		ret = ib_modify_port(ib_device, i, 0, &port_modify);
3775		if (ret)
3776			goto error3;
3777	}
3778	ib_set_client_data(ib_device, &cm_client, cm_dev);
3779
3780	write_lock_irqsave(&cm.device_lock, flags);
3781	list_add_tail(&cm_dev->list, &cm.device_list);
3782	write_unlock_irqrestore(&cm.device_lock, flags);
3783	return;
3784
3785error3:
3786	ib_unregister_mad_agent(port->mad_agent);
3787error2:
3788	cm_remove_port_fs(port);
3789error1:
3790	port_modify.set_port_cap_mask = 0;
3791	port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
3792	while (--i) {
3793		port = cm_dev->port[i-1];
3794		ib_modify_port(ib_device, port->port_num, 0, &port_modify);
3795		ib_unregister_mad_agent(port->mad_agent);
3796		cm_remove_port_fs(port);
3797	}
3798	device_unregister(cm_dev->device);
3799	kfree(cm_dev);
3800}
3801
3802static void cm_remove_one(struct ib_device *ib_device)
3803{
3804	struct cm_device *cm_dev;
3805	struct cm_port *port;
3806	struct ib_port_modify port_modify = {
3807		.clr_port_cap_mask = IB_PORT_CM_SUP
3808	};
3809	unsigned long flags;
3810	int i;
3811
3812	cm_dev = ib_get_client_data(ib_device, &cm_client);
3813	if (!cm_dev)
3814		return;
3815
3816	write_lock_irqsave(&cm.device_lock, flags);
3817	list_del(&cm_dev->list);
3818	write_unlock_irqrestore(&cm.device_lock, flags);
3819
3820	for (i = 1; i <= ib_device->phys_port_cnt; i++) {
3821		port = cm_dev->port[i-1];
3822		ib_modify_port(ib_device, port->port_num, 0, &port_modify);
3823		ib_unregister_mad_agent(port->mad_agent);
3824		flush_workqueue(cm.wq);
3825		cm_remove_port_fs(port);
3826	}
3827	device_unregister(cm_dev->device);
3828	kfree(cm_dev);
3829}
3830
3831static int __init ib_cm_init(void)
3832{
3833	int ret;
3834
3835	memset(&cm, 0, sizeof cm);
3836	INIT_LIST_HEAD(&cm.device_list);
3837	rwlock_init(&cm.device_lock);
3838	spin_lock_init(&cm.lock);
3839	cm.listen_service_table = RB_ROOT;
3840	cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
3841	cm.remote_id_table = RB_ROOT;
3842	cm.remote_qp_table = RB_ROOT;
3843	cm.remote_sidr_table = RB_ROOT;
3844	idr_init(&cm.local_id_table);
3845	get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
3846	idr_pre_get(&cm.local_id_table, GFP_KERNEL);
3847	INIT_LIST_HEAD(&cm.timewait_list);
3848
3849	ret = class_register(&cm_class);
3850	if (ret)
3851		return -ENOMEM;
3852
3853	cm.wq = create_workqueue("ib_cm");
3854	if (!cm.wq) {
3855		ret = -ENOMEM;
3856		goto error1;
3857	}
3858
3859	ret = ib_register_client(&cm_client);
3860	if (ret)
3861		goto error2;
3862
3863	return 0;
3864error2:
3865	destroy_workqueue(cm.wq);
3866error1:
3867	class_unregister(&cm_class);
3868	return ret;
3869}
3870
3871static void __exit ib_cm_cleanup(void)
3872{
3873	struct cm_timewait_info *timewait_info, *tmp;
3874
3875	spin_lock_irq(&cm.lock);
3876	list_for_each_entry(timewait_info, &cm.timewait_list, list)
3877		cancel_delayed_work(&timewait_info->work.work);
3878	spin_unlock_irq(&cm.lock);
3879
3880	ib_unregister_client(&cm_client);
3881	destroy_workqueue(cm.wq);
3882
3883	list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) {
3884		list_del(&timewait_info->list);
3885		kfree(timewait_info);
3886	}
3887
3888	class_unregister(&cm_class);
3889	idr_destroy(&cm.local_id_table);
3890}
3891
3892module_init_order(ib_cm_init, SI_ORDER_SECOND);
3893module_exit(ib_cm_cleanup);
3894
3895