cma.c revision 309378
1216299Ssyrinx/*
2216299Ssyrinx * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3216299Ssyrinx * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4216299Ssyrinx * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5216299Ssyrinx * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
6216299Ssyrinx * Copyright (c) 2016 Chelsio Communications.  All rights reserved.
7216299Ssyrinx *
8216299Ssyrinx * This software is available to you under a choice of one of two
9216299Ssyrinx * licenses.  You may choose to be licensed under the terms of the GNU
10216299Ssyrinx * General Public License (GPL) Version 2, available from the file
11216299Ssyrinx * COPYING in the main directory of this source tree, or the
12216299Ssyrinx * OpenIB.org BSD license below:
13216299Ssyrinx *
14216299Ssyrinx *     Redistribution and use in source and binary forms, with or
15216299Ssyrinx *     without modification, are permitted provided that the following
16216299Ssyrinx *     conditions are met:
17216299Ssyrinx *
18216299Ssyrinx *      - Redistributions of source code must retain the above
19216299Ssyrinx *        copyright notice, this list of conditions and the following
20216299Ssyrinx *        disclaimer.
21216299Ssyrinx *
22216299Ssyrinx *      - Redistributions in binary form must reproduce the above
23216299Ssyrinx *        copyright notice, this list of conditions and the following
24216299Ssyrinx *        disclaimer in the documentation and/or other materials
25216299Ssyrinx *        provided with the distribution.
26216299Ssyrinx *
27216299Ssyrinx * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28216299Ssyrinx * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29216299Ssyrinx * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30216299Ssyrinx * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31216299Ssyrinx * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32216299Ssyrinx * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33216299Ssyrinx * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34216299Ssyrinx * SOFTWARE.
35216299Ssyrinx */
36216299Ssyrinx
37216299Ssyrinx#include <linux/completion.h>
38216299Ssyrinx#include <linux/in.h>
39216299Ssyrinx#include <linux/in6.h>
40216299Ssyrinx#include <linux/mutex.h>
41216299Ssyrinx#include <linux/random.h>
42216299Ssyrinx#include <linux/idr.h>
43216299Ssyrinx#include <linux/inetdevice.h>
44216299Ssyrinx
45216299Ssyrinx#include <net/tcp.h>
46216299Ssyrinx#include <net/ipv6.h>
47216299Ssyrinx
48216299Ssyrinx#include <rdma/rdma_cm.h>
49216299Ssyrinx#include <rdma/rdma_cm_ib.h>
50216299Ssyrinx#include <rdma/ib_cache.h>
51216299Ssyrinx#include <rdma/ib_cm.h>
52216299Ssyrinx#include <rdma/ib_sa.h>
53216299Ssyrinx#include <rdma/iw_cm.h>
54216299Ssyrinx
55216299SsyrinxMODULE_AUTHOR("Sean Hefty");
56216299SsyrinxMODULE_DESCRIPTION("Generic RDMA CM Agent");
57216299SsyrinxMODULE_LICENSE("Dual BSD/GPL");
58216299Ssyrinx
59216299Ssyrinxstatic int tavor_quirk = 0;
60216299Ssyrinxmodule_param_named(tavor_quirk, tavor_quirk, int, 0644);
61216299SsyrinxMODULE_PARM_DESC(tavor_quirk, "Tavor performance quirk: limit MTU to 1K if > 0");
62216299Ssyrinx
63216299Ssyrinxint unify_tcp_port_space = 1;
64216299Ssyrinxmodule_param(unify_tcp_port_space, int, 0644);
65216299SsyrinxMODULE_PARM_DESC(unify_tcp_port_space, "Unify the host TCP and RDMA port "
66216299Ssyrinx		 "space allocation (default=1)");
67216299Ssyrinx
68216299Ssyrinx#define CMA_CM_RESPONSE_TIMEOUT 20
69216299Ssyrinx#define CMA_MAX_CM_RETRIES 15
70216299Ssyrinx#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
71216299Ssyrinx#define IBOE_PACKET_LIFETIME 18
72216299Ssyrinx
73216299Ssyrinxstatic int cma_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
74216299Ssyrinxmodule_param_named(cma_response_timeout, cma_response_timeout, int, 0644);
75216299SsyrinxMODULE_PARM_DESC(cma_response_timeout, "CMA_CM_RESPONSE_TIMEOUT default=20");
76216299Ssyrinx
77216299Ssyrinxstatic int def_prec2sl = 3;
78216299Ssyrinxmodule_param_named(def_prec2sl, def_prec2sl, int, 0644);
79216299SsyrinxMODULE_PARM_DESC(def_prec2sl, "Default value for SL priority with RoCE. Valid values 0 - 7");
80216299Ssyrinx
81216299Ssyrinxstatic void cma_add_one(struct ib_device *device);
82216299Ssyrinxstatic void cma_remove_one(struct ib_device *device);
83216299Ssyrinx
84216299Ssyrinxstatic struct ib_client cma_client = {
85216299Ssyrinx	.name   = "cma",
86216299Ssyrinx	.add    = cma_add_one,
87216299Ssyrinx	.remove = cma_remove_one
88216299Ssyrinx};
89216299Ssyrinx
90216482Ssyrinxstatic struct ib_sa_client sa_client;
91216299Ssyrinxstatic struct rdma_addr_client addr_client;
92216299Ssyrinxstatic LIST_HEAD(dev_list);
93216299Ssyrinxstatic LIST_HEAD(listen_any_list);
94216299Ssyrinxstatic DEFINE_MUTEX(lock);
95216299Ssyrinxstatic struct workqueue_struct *cma_wq;
96216299Ssyrinxstatic DEFINE_IDR(sdp_ps);
97216299Ssyrinxstatic DEFINE_IDR(tcp_ps);
98216299Ssyrinxstatic DEFINE_IDR(udp_ps);
99216299Ssyrinxstatic DEFINE_IDR(ipoib_ps);
100216299Ssyrinx#if defined(INET)
101216299Ssyrinxstatic int next_port;
102216299Ssyrinx#endif
103216299Ssyrinx
104216299Ssyrinxstruct cma_device {
105216299Ssyrinx	struct list_head	list;
106216299Ssyrinx	struct ib_device	*device;
107216299Ssyrinx	struct completion	comp;
108216299Ssyrinx	atomic_t		refcount;
109216299Ssyrinx	struct list_head	id_list;
110216299Ssyrinx};
111216299Ssyrinx
112216299Ssyrinxenum cma_state {
113216299Ssyrinx	CMA_IDLE,
114216299Ssyrinx	CMA_ADDR_QUERY,
115216299Ssyrinx	CMA_ADDR_RESOLVED,
116216299Ssyrinx	CMA_ROUTE_QUERY,
117216299Ssyrinx	CMA_ROUTE_RESOLVED,
118216299Ssyrinx	CMA_CONNECT,
119216299Ssyrinx	CMA_DISCONNECT,
120216299Ssyrinx	CMA_ADDR_BOUND,
121216299Ssyrinx	CMA_LISTEN,
122216299Ssyrinx	CMA_DEVICE_REMOVAL,
123216299Ssyrinx	CMA_DESTROYING
124216299Ssyrinx};
125216299Ssyrinx
126216299Ssyrinxstruct rdma_bind_list {
127216299Ssyrinx	struct idr		*ps;
128216299Ssyrinx	struct hlist_head	owners;
129216299Ssyrinx	unsigned short		port;
130216299Ssyrinx};
131216299Ssyrinx
132216299Ssyrinx/*
133216299Ssyrinx * Device removal can occur at anytime, so we need extra handling to
134216299Ssyrinx * serialize notifying the user of device removal with other callbacks.
135216299Ssyrinx * We do this by disabling removal notification while a callback is in process,
136216299Ssyrinx * and reporting it after the callback completes.
137216299Ssyrinx */
138216299Ssyrinxstruct rdma_id_private {
139216299Ssyrinx	struct rdma_cm_id	id;
140216299Ssyrinx
141216299Ssyrinx	struct rdma_bind_list	*bind_list;
142216482Ssyrinx	struct socket		*sock;
143216299Ssyrinx	struct hlist_node	node;
144216299Ssyrinx	struct list_head	list; /* listen_any_list or cma_device.list */
145216299Ssyrinx	struct list_head	listen_list; /* per device listens */
146216299Ssyrinx	struct cma_device	*cma_dev;
147216299Ssyrinx	struct list_head	mc_list;
148216299Ssyrinx
149216299Ssyrinx	int			internal_id;
150216299Ssyrinx	enum cma_state		state;
151216299Ssyrinx	spinlock_t		lock;
152216299Ssyrinx	struct mutex		qp_mutex;
153216299Ssyrinx
154216299Ssyrinx	struct completion	comp;
155216299Ssyrinx	atomic_t		refcount;
156216299Ssyrinx	struct mutex		handler_mutex;
157216299Ssyrinx
158216299Ssyrinx	int			backlog;
159216299Ssyrinx	int			timeout_ms;
160216299Ssyrinx	struct ib_sa_query	*query;
161216299Ssyrinx	int			query_id;
162216299Ssyrinx	union {
163216299Ssyrinx		struct ib_cm_id	*ib;
164216299Ssyrinx		struct iw_cm_id	*iw;
165216299Ssyrinx	} cm_id;
166216299Ssyrinx
167216299Ssyrinx	u32			seq_num;
168216299Ssyrinx	u32			qkey;
169216299Ssyrinx	u32			qp_num;
170216299Ssyrinx	u8			srq;
171216299Ssyrinx	u8			tos;
172216299Ssyrinx};
173216299Ssyrinx
174216482Ssyrinxstruct cma_multicast {
175216299Ssyrinx	struct rdma_id_private *id_priv;
176216299Ssyrinx	union {
177216299Ssyrinx		struct ib_sa_multicast *ib;
178216299Ssyrinx	} multicast;
179216299Ssyrinx	struct list_head	list;
180216299Ssyrinx	void			*context;
181216482Ssyrinx	struct sockaddr_storage	addr;
182216299Ssyrinx	struct kref		mcref;
183216299Ssyrinx};
184216299Ssyrinx
185216299Ssyrinxstruct cma_work {
186216299Ssyrinx	struct work_struct	work;
187216299Ssyrinx	struct rdma_id_private	*id;
188216299Ssyrinx	enum cma_state		old_state;
189216299Ssyrinx	enum cma_state		new_state;
190216299Ssyrinx	struct rdma_cm_event	event;
191216299Ssyrinx};
192216299Ssyrinx
193216299Ssyrinxstruct cma_ndev_work {
194216299Ssyrinx	struct work_struct	work;
195216299Ssyrinx	struct rdma_id_private	*id;
196216299Ssyrinx	struct rdma_cm_event	event;
197216299Ssyrinx};
198216299Ssyrinx
199216299Ssyrinxstruct iboe_mcast_work {
200216299Ssyrinx	struct work_struct	 work;
201216299Ssyrinx	struct rdma_id_private	*id;
202216482Ssyrinx	struct cma_multicast	*mc;
203216299Ssyrinx};
204216299Ssyrinx
205216299Ssyrinxunion cma_ip_addr {
206216299Ssyrinx	struct in6_addr ip6;
207216299Ssyrinx	struct {
208216299Ssyrinx		__be32 pad[3];
209216482Ssyrinx		__be32 addr;
210216299Ssyrinx	} ip4;
211216299Ssyrinx};
212216299Ssyrinx
213216299Ssyrinxstruct cma_hdr {
214216299Ssyrinx	u8 cma_version;
215216299Ssyrinx	u8 ip_version;	/* IP version: 7:4 */
216216299Ssyrinx	__be16 port;
217216299Ssyrinx	union cma_ip_addr src_addr;
218216299Ssyrinx	union cma_ip_addr dst_addr;
219216299Ssyrinx};
220216299Ssyrinx
221216299Ssyrinxstruct sdp_hh {
222216299Ssyrinx	u8 bsdh[16];
223216299Ssyrinx	u8 sdp_version; /* Major version: 7:4 */
224216299Ssyrinx	u8 ip_version;	/* IP version: 7:4 */
225216299Ssyrinx	u8 sdp_specific1[10];
226216299Ssyrinx	__be16 port;
227216299Ssyrinx	__be16 sdp_specific2;
228216299Ssyrinx	union cma_ip_addr src_addr;
229216299Ssyrinx	union cma_ip_addr dst_addr;
230216299Ssyrinx};
231216299Ssyrinx
232216299Ssyrinxstruct sdp_hah {
233216299Ssyrinx	u8 bsdh[16];
234216299Ssyrinx	u8 sdp_version;
235216299Ssyrinx};
236216299Ssyrinx
237216299Ssyrinx#define CMA_VERSION 0x00
238216299Ssyrinx#define SDP_MAJ_VERSION 0x2
239216299Ssyrinx
240216299Ssyrinxstatic int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
241216299Ssyrinx{
242216299Ssyrinx	unsigned long flags;
243216299Ssyrinx	int ret;
244216299Ssyrinx
245216299Ssyrinx	spin_lock_irqsave(&id_priv->lock, flags);
246216299Ssyrinx	ret = (id_priv->state == comp);
247216299Ssyrinx	spin_unlock_irqrestore(&id_priv->lock, flags);
248216299Ssyrinx	return ret;
249216299Ssyrinx}
250216299Ssyrinx
251216299Ssyrinxstatic int cma_comp_exch(struct rdma_id_private *id_priv,
252216299Ssyrinx			 enum cma_state comp, enum cma_state exch)
253216299Ssyrinx{
254216299Ssyrinx	unsigned long flags;
255216299Ssyrinx	int ret;
256216299Ssyrinx
257216299Ssyrinx	spin_lock_irqsave(&id_priv->lock, flags);
258216299Ssyrinx	if ((ret = (id_priv->state == comp)))
259216299Ssyrinx		id_priv->state = exch;
260216299Ssyrinx	spin_unlock_irqrestore(&id_priv->lock, flags);
261216299Ssyrinx	return ret;
262216299Ssyrinx}
263216299Ssyrinx
264216299Ssyrinxstatic enum cma_state cma_exch(struct rdma_id_private *id_priv,
265216299Ssyrinx			       enum cma_state exch)
266216299Ssyrinx{
267216299Ssyrinx	unsigned long flags;
268216299Ssyrinx	enum cma_state old;
269216299Ssyrinx
270216299Ssyrinx	spin_lock_irqsave(&id_priv->lock, flags);
271216299Ssyrinx	old = id_priv->state;
272216299Ssyrinx	id_priv->state = exch;
273216299Ssyrinx	spin_unlock_irqrestore(&id_priv->lock, flags);
274216299Ssyrinx	return old;
275216299Ssyrinx}
276216299Ssyrinx
277216299Ssyrinxstatic inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
278216299Ssyrinx{
279216299Ssyrinx	return hdr->ip_version >> 4;
280216299Ssyrinx}
281216299Ssyrinx
282216299Ssyrinxstatic inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
283216299Ssyrinx{
284216299Ssyrinx	hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
285216299Ssyrinx}
286216299Ssyrinx
287216299Ssyrinxstatic inline u8 sdp_get_majv(u8 sdp_version)
288216299Ssyrinx{
289216299Ssyrinx	return sdp_version >> 4;
290216299Ssyrinx}
291216299Ssyrinx
292216299Ssyrinxstatic inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
293216299Ssyrinx{
294216299Ssyrinx	return hh->ip_version >> 4;
295216299Ssyrinx}
296216299Ssyrinx
297216299Ssyrinxstatic inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
298216299Ssyrinx{
299216299Ssyrinx	hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
300216299Ssyrinx}
301216299Ssyrinx
302216299Ssyrinxstatic inline int cma_is_ud_ps(enum rdma_port_space ps)
303216299Ssyrinx{
304216299Ssyrinx	return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
305216299Ssyrinx}
306216299Ssyrinx
307216299Ssyrinxstatic void cma_attach_to_dev(struct rdma_id_private *id_priv,
308216299Ssyrinx			      struct cma_device *cma_dev)
309216299Ssyrinx{
310216299Ssyrinx	atomic_inc(&cma_dev->refcount);
311216299Ssyrinx	id_priv->cma_dev = cma_dev;
312216482Ssyrinx	id_priv->id.device = cma_dev->device;
313216482Ssyrinx	id_priv->id.route.addr.dev_addr.transport =
314216299Ssyrinx		rdma_node_get_transport(cma_dev->device->node_type);
315216299Ssyrinx	list_add_tail(&id_priv->list, &cma_dev->id_list);
316216299Ssyrinx}
317216299Ssyrinx
318216299Ssyrinxstatic inline void cma_deref_dev(struct cma_device *cma_dev)
319216299Ssyrinx{
320216299Ssyrinx	if (atomic_dec_and_test(&cma_dev->refcount))
321216299Ssyrinx		complete(&cma_dev->comp);
322216299Ssyrinx}
323216299Ssyrinx
324216299Ssyrinxstatic inline void release_mc(struct kref *kref)
325310903Sngie{
326216299Ssyrinx	struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
327216299Ssyrinx
328216299Ssyrinx	kfree(mc->multicast.ib);
329216299Ssyrinx	kfree(mc);
330216299Ssyrinx}
331216299Ssyrinx
332216299Ssyrinxstatic void cma_detach_from_dev(struct rdma_id_private *id_priv)
333216299Ssyrinx{
334216299Ssyrinx	list_del(&id_priv->list);
335216299Ssyrinx	cma_deref_dev(id_priv->cma_dev);
336216299Ssyrinx	id_priv->cma_dev = NULL;
337216299Ssyrinx}
338216299Ssyrinx
339216299Ssyrinxstatic int cma_set_qkey(struct rdma_id_private *id_priv)
340216299Ssyrinx{
341216299Ssyrinx	struct ib_sa_mcmember_rec rec;
342216482Ssyrinx	int ret = 0;
343216299Ssyrinx
344216299Ssyrinx	if (id_priv->qkey)
345216299Ssyrinx		return 0;
346216299Ssyrinx
347216299Ssyrinx	switch (id_priv->id.ps) {
348216299Ssyrinx	case RDMA_PS_UDP:
349216299Ssyrinx		id_priv->qkey = RDMA_UDP_QKEY;
350216299Ssyrinx		break;
351216299Ssyrinx	case RDMA_PS_IPOIB:
352216482Ssyrinx		ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid);
353216299Ssyrinx		ret = ib_sa_get_mcmember_rec(id_priv->id.device,
354216299Ssyrinx					     id_priv->id.port_num, &rec.mgid,
355216299Ssyrinx					     &rec);
356216299Ssyrinx		if (!ret)
357216299Ssyrinx			id_priv->qkey = be32_to_cpu(rec.qkey);
358216299Ssyrinx		break;
359216299Ssyrinx	default:
360216299Ssyrinx		break;
361216482Ssyrinx	}
362216299Ssyrinx	return ret;
363216299Ssyrinx}
364216299Ssyrinx
365216299Ssyrinxstatic int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num)
366216299Ssyrinx{
367216299Ssyrinx	int i;
368216299Ssyrinx	int err;
369276677Sngie	struct ib_port_attr props;
370216299Ssyrinx	union ib_gid tmp;
371216299Ssyrinx
372216299Ssyrinx	err = ib_query_port(device, port_num, &props);
373216299Ssyrinx	if (err)
374216299Ssyrinx		return 1;
375216299Ssyrinx
376216299Ssyrinx	for (i = 0; i < props.gid_tbl_len; ++i) {
377216299Ssyrinx		err = ib_query_gid(device, port_num, i, &tmp);
378216299Ssyrinx		if (err)
379216299Ssyrinx			return 1;
380216299Ssyrinx		if (!memcmp(&tmp, gid, sizeof tmp))
381276677Sngie			return 0;
382216299Ssyrinx	}
383216299Ssyrinx
384216299Ssyrinx	return -EAGAIN;
385216299Ssyrinx}
386216299Ssyrinx
387216299Ssyrinxint
388216299Ssyrinxrdma_find_cmid_laddr(struct sockaddr_in *local_addr, unsigned short dev_type,
389216299Ssyrinx							void **cm_id)
390216299Ssyrinx{
391216299Ssyrinx	int ret;
392216299Ssyrinx	u8 port;
393216299Ssyrinx	int found_dev = 0, found_cmid = 0;
394216299Ssyrinx	struct rdma_id_private  *id_priv;
395216299Ssyrinx	struct rdma_id_private  *dev_id_priv;
396216299Ssyrinx	struct cma_device	*cma_dev;
397216299Ssyrinx	struct rdma_dev_addr	dev_addr;
398216299Ssyrinx	union ib_gid		gid;
399216299Ssyrinx	enum rdma_link_layer dev_ll = dev_type == ARPHRD_INFINIBAND ?
400216299Ssyrinx		IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
401216299Ssyrinx
402216299Ssyrinx	memset(&dev_addr, 0, sizeof(dev_addr));
403
404	ret = rdma_translate_ip((struct sockaddr *)local_addr,
405							&dev_addr);
406	if (ret)
407		goto err;
408
409	/* find rdma device based on MAC address/gid */
410	mutex_lock(&lock);
411
412	memcpy(&gid, dev_addr.src_dev_addr +
413	       rdma_addr_gid_offset(&dev_addr), sizeof(gid));
414
415	list_for_each_entry(cma_dev, &dev_list, list)
416		for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port)
417			if ((rdma_port_get_link_layer(cma_dev->device, port) ==
418								 dev_ll) &&
419			 (rdma_node_get_transport(cma_dev->device->node_type) ==
420							RDMA_TRANSPORT_IWARP)) {
421					ret = find_gid_port(cma_dev->device,
422								&gid, port);
423					if (!ret) {
424						found_dev = 1;
425						goto out;
426					} else if (ret == 1) {
427						mutex_unlock(&lock);
428						goto err;
429					}
430			}
431out:
432	mutex_unlock(&lock);
433
434	if (!found_dev)
435		goto err;
436
437	/* Traverse through the list of listening cm_id's to find the
438	 * desired cm_id based on rdma device & port number.
439	 */
440	list_for_each_entry(id_priv, &listen_any_list, list)
441		list_for_each_entry(dev_id_priv, &id_priv->listen_list,
442						 listen_list)
443			if (dev_id_priv->cma_dev == cma_dev)
444				if (dev_id_priv->cm_id.iw->local_addr.sin_port
445						== local_addr->sin_port) {
446					*cm_id = (void *)dev_id_priv->cm_id.iw;
447					found_cmid = 1;
448				}
449	return found_cmid ? 0 : -ENODEV;
450
451err:
452	return -ENODEV;
453}
454EXPORT_SYMBOL(rdma_find_cmid_laddr);
455
456static int cma_acquire_dev(struct rdma_id_private *id_priv)
457{
458	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
459	struct cma_device *cma_dev;
460	union ib_gid gid;
461	int ret = -ENODEV;
462
463	if (dev_addr->dev_type != ARPHRD_INFINIBAND) {
464		iboe_addr_get_sgid(dev_addr, &gid);
465		list_for_each_entry(cma_dev, &dev_list, list) {
466			ret = ib_find_cached_gid(cma_dev->device, &gid,
467						 &id_priv->id.port_num, NULL);
468			if (!ret)
469				goto out;
470		}
471	}
472
473	memcpy(&gid, dev_addr->src_dev_addr +
474	       rdma_addr_gid_offset(dev_addr), sizeof gid);
475	list_for_each_entry(cma_dev, &dev_list, list) {
476		ret = ib_find_cached_gid(cma_dev->device, &gid,
477					 &id_priv->id.port_num, NULL);
478		if (!ret)
479			break;
480	}
481
482out:
483	if (!ret)
484		cma_attach_to_dev(id_priv, cma_dev);
485
486	return ret;
487}
488
489static void cma_deref_id(struct rdma_id_private *id_priv)
490{
491	if (atomic_dec_and_test(&id_priv->refcount))
492		complete(&id_priv->comp);
493}
494
495static int cma_disable_callback(struct rdma_id_private *id_priv,
496			      enum cma_state state)
497{
498	mutex_lock(&id_priv->handler_mutex);
499	if (id_priv->state != state) {
500		mutex_unlock(&id_priv->handler_mutex);
501		return -EINVAL;
502	}
503	return 0;
504}
505
506static int cma_has_cm_dev(struct rdma_id_private *id_priv)
507{
508	return (id_priv->id.device && id_priv->cm_id.ib);
509}
510
511struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
512				  void *context, enum rdma_port_space ps)
513{
514	struct rdma_id_private *id_priv;
515
516	id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
517	if (!id_priv)
518		return ERR_PTR(-ENOMEM);
519
520	id_priv->state = CMA_IDLE;
521	id_priv->id.context = context;
522	id_priv->id.event_handler = event_handler;
523	id_priv->id.ps = ps;
524	spin_lock_init(&id_priv->lock);
525	mutex_init(&id_priv->qp_mutex);
526	init_completion(&id_priv->comp);
527	atomic_set(&id_priv->refcount, 1);
528	mutex_init(&id_priv->handler_mutex);
529	INIT_LIST_HEAD(&id_priv->listen_list);
530	INIT_LIST_HEAD(&id_priv->mc_list);
531	get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
532
533	return &id_priv->id;
534}
535EXPORT_SYMBOL(rdma_create_id);
536
537static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
538{
539	struct ib_qp_attr qp_attr;
540	int qp_attr_mask, ret;
541
542	qp_attr.qp_state = IB_QPS_INIT;
543	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
544	if (ret)
545		return ret;
546
547	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
548	if (ret)
549		return ret;
550
551	qp_attr.qp_state = IB_QPS_RTR;
552	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
553	if (ret)
554		return ret;
555
556	qp_attr.qp_state = IB_QPS_RTS;
557	qp_attr.sq_psn = 0;
558	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
559
560	return ret;
561}
562
563static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
564{
565	struct ib_qp_attr qp_attr;
566	int qp_attr_mask, ret;
567
568	qp_attr.qp_state = IB_QPS_INIT;
569	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
570	if (ret)
571		return ret;
572
573	return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
574}
575
576int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
577		   struct ib_qp_init_attr *qp_init_attr)
578{
579	struct rdma_id_private *id_priv;
580	struct ib_qp *qp;
581	int ret;
582
583	id_priv = container_of(id, struct rdma_id_private, id);
584	if (id->device != pd->device)
585		return -EINVAL;
586
587	qp = ib_create_qp(pd, qp_init_attr);
588	if (IS_ERR(qp))
589		return PTR_ERR(qp);
590
591	if (cma_is_ud_ps(id_priv->id.ps))
592		ret = cma_init_ud_qp(id_priv, qp);
593	else
594		ret = cma_init_conn_qp(id_priv, qp);
595	if (ret)
596		goto err;
597
598	id->qp = qp;
599	id_priv->qp_num = qp->qp_num;
600	id_priv->srq = (qp->srq != NULL);
601	return 0;
602err:
603	ib_destroy_qp(qp);
604	return ret;
605}
606EXPORT_SYMBOL(rdma_create_qp);
607
608void rdma_destroy_qp(struct rdma_cm_id *id)
609{
610	struct rdma_id_private *id_priv;
611
612	id_priv = container_of(id, struct rdma_id_private, id);
613	mutex_lock(&id_priv->qp_mutex);
614	ib_destroy_qp(id_priv->id.qp);
615	id_priv->id.qp = NULL;
616	mutex_unlock(&id_priv->qp_mutex);
617}
618EXPORT_SYMBOL(rdma_destroy_qp);
619
620static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
621			     struct rdma_conn_param *conn_param)
622{
623	struct ib_qp_attr qp_attr;
624	int qp_attr_mask, ret;
625
626	mutex_lock(&id_priv->qp_mutex);
627	if (!id_priv->id.qp) {
628		ret = 0;
629		goto out;
630	}
631
632	/* Need to update QP attributes from default values. */
633	qp_attr.qp_state = IB_QPS_INIT;
634	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
635	if (ret)
636		goto out;
637
638	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
639	if (ret)
640		goto out;
641
642	qp_attr.qp_state = IB_QPS_RTR;
643	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
644	if (ret)
645		goto out;
646
647	if (conn_param)
648		qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
649	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
650out:
651	mutex_unlock(&id_priv->qp_mutex);
652	return ret;
653}
654
655static int cma_modify_qp_rts(struct rdma_id_private *id_priv,
656			     struct rdma_conn_param *conn_param)
657{
658	struct ib_qp_attr qp_attr;
659	int qp_attr_mask, ret;
660
661	mutex_lock(&id_priv->qp_mutex);
662	if (!id_priv->id.qp) {
663		ret = 0;
664		goto out;
665	}
666
667	qp_attr.qp_state = IB_QPS_RTS;
668	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
669	if (ret)
670		goto out;
671
672	if (conn_param)
673		qp_attr.max_rd_atomic = conn_param->initiator_depth;
674	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
675out:
676	mutex_unlock(&id_priv->qp_mutex);
677	return ret;
678}
679
680static int cma_modify_qp_err(struct rdma_id_private *id_priv)
681{
682	struct ib_qp_attr qp_attr;
683	int ret;
684
685	mutex_lock(&id_priv->qp_mutex);
686	if (!id_priv->id.qp) {
687		ret = 0;
688		goto out;
689	}
690
691	qp_attr.qp_state = IB_QPS_ERR;
692	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
693out:
694	mutex_unlock(&id_priv->qp_mutex);
695	return ret;
696}
697
698static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
699			       struct ib_qp_attr *qp_attr, int *qp_attr_mask)
700{
701	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
702	int ret;
703	u16 pkey;
704
705	if (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) ==
706	    IB_LINK_LAYER_INFINIBAND)
707		pkey = ib_addr_get_pkey(dev_addr);
708	else
709		pkey = 0xffff;
710
711	ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
712				  pkey, &qp_attr->pkey_index);
713	if (ret)
714		return ret;
715
716	qp_attr->port_num = id_priv->id.port_num;
717	*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
718
719	if (cma_is_ud_ps(id_priv->id.ps)) {
720		ret = cma_set_qkey(id_priv);
721		if (ret)
722			return ret;
723
724		qp_attr->qkey = id_priv->qkey;
725		*qp_attr_mask |= IB_QP_QKEY;
726	} else {
727		qp_attr->qp_access_flags = 0;
728		*qp_attr_mask |= IB_QP_ACCESS_FLAGS;
729	}
730	return 0;
731}
732
733int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
734		       int *qp_attr_mask)
735{
736	struct rdma_id_private *id_priv;
737	int ret = 0;
738
739	id_priv = container_of(id, struct rdma_id_private, id);
740	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
741	case RDMA_TRANSPORT_IB:
742		if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps))
743			ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
744		else
745			ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
746						 qp_attr_mask);
747		if (qp_attr->qp_state == IB_QPS_RTR)
748			qp_attr->rq_psn = id_priv->seq_num;
749		break;
750	case RDMA_TRANSPORT_IWARP:
751		if (!id_priv->cm_id.iw) {
752			qp_attr->qp_access_flags = 0;
753			*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
754		} else
755			ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
756						 qp_attr_mask);
757		break;
758	default:
759		ret = -ENOSYS;
760		break;
761	}
762
763	return ret;
764}
765EXPORT_SYMBOL(rdma_init_qp_attr);
766
767static inline int cma_zero_addr(struct sockaddr *addr)
768{
769	struct in6_addr *ip6;
770
771	if (addr->sa_family == AF_INET)
772		return ipv4_is_zeronet(
773			((struct sockaddr_in *)addr)->sin_addr.s_addr);
774	else {
775		ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
776		return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
777			ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
778	}
779}
780
781static inline int cma_loopback_addr(struct sockaddr *addr)
782{
783	if (addr->sa_family == AF_INET)
784		return ipv4_is_loopback(
785			((struct sockaddr_in *) addr)->sin_addr.s_addr);
786	else
787		return ipv6_addr_loopback(
788			&((struct sockaddr_in6 *) addr)->sin6_addr);
789}
790
791static inline int cma_any_addr(struct sockaddr *addr)
792{
793	return cma_zero_addr(addr) || cma_loopback_addr(addr);
794}
795int
796rdma_cma_any_addr(struct sockaddr *addr)
797{
798	return cma_any_addr(addr);
799}
800EXPORT_SYMBOL(rdma_cma_any_addr);
801
802static inline __be16 cma_port(struct sockaddr *addr)
803{
804	if (addr->sa_family == AF_INET)
805		return ((struct sockaddr_in *) addr)->sin_port;
806	else
807		return ((struct sockaddr_in6 *) addr)->sin6_port;
808}
809
810static inline int cma_any_port(struct sockaddr *addr)
811{
812	return !cma_port(addr);
813}
814
815static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
816			    u8 *ip_ver, __be16 *port,
817			    union cma_ip_addr **src, union cma_ip_addr **dst)
818{
819	switch (ps) {
820	case RDMA_PS_SDP:
821		if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
822		    SDP_MAJ_VERSION)
823			return -EINVAL;
824
825		*ip_ver	= sdp_get_ip_ver(hdr);
826		*port	= ((struct sdp_hh *) hdr)->port;
827		*src	= &((struct sdp_hh *) hdr)->src_addr;
828		*dst	= &((struct sdp_hh *) hdr)->dst_addr;
829		break;
830	default:
831		if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
832			return -EINVAL;
833
834		*ip_ver	= cma_get_ip_ver(hdr);
835		*port	= ((struct cma_hdr *) hdr)->port;
836		*src	= &((struct cma_hdr *) hdr)->src_addr;
837		*dst	= &((struct cma_hdr *) hdr)->dst_addr;
838		break;
839	}
840
841	if (*ip_ver != 4 && *ip_ver != 6)
842		return -EINVAL;
843	return 0;
844}
845
846static void cma_save_net_info(struct rdma_addr *addr,
847			      struct rdma_addr *listen_addr,
848			      u8 ip_ver, __be16 port,
849			      union cma_ip_addr *src, union cma_ip_addr *dst)
850{
851	struct sockaddr_in *listen4, *ip4;
852	struct sockaddr_in6 *listen6, *ip6;
853
854	switch (ip_ver) {
855	case 4:
856		listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
857		ip4 = (struct sockaddr_in *) &addr->src_addr;
858		ip4->sin_family = listen4->sin_family;
859		ip4->sin_addr.s_addr = dst->ip4.addr;
860		ip4->sin_port = listen4->sin_port;
861
862		ip4 = (struct sockaddr_in *) &addr->dst_addr;
863		ip4->sin_family = listen4->sin_family;
864		ip4->sin_addr.s_addr = src->ip4.addr;
865		ip4->sin_port = port;
866		break;
867	case 6:
868		listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
869		ip6 = (struct sockaddr_in6 *) &addr->src_addr;
870		ip6->sin6_family = listen6->sin6_family;
871		ip6->sin6_addr = dst->ip6;
872		ip6->sin6_port = listen6->sin6_port;
873
874		ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
875		ip6->sin6_family = listen6->sin6_family;
876		ip6->sin6_addr = src->ip6;
877		ip6->sin6_port = port;
878		break;
879	default:
880		break;
881	}
882}
883
884static inline int cma_user_data_offset(enum rdma_port_space ps)
885{
886	switch (ps) {
887	case RDMA_PS_SDP:
888		return 0;
889	default:
890		return sizeof(struct cma_hdr);
891	}
892}
893
894static void cma_cancel_route(struct rdma_id_private *id_priv)
895{
896	switch (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)) {
897	case IB_LINK_LAYER_INFINIBAND:
898		if (id_priv->query)
899			ib_sa_cancel_query(id_priv->query_id, id_priv->query);
900		break;
901	default:
902		break;
903	}
904}
905
906static void cma_cancel_listens(struct rdma_id_private *id_priv)
907{
908	struct rdma_id_private *dev_id_priv;
909
910	/*
911	 * Remove from listen_any_list to prevent added devices from spawning
912	 * additional listen requests.
913	 */
914	mutex_lock(&lock);
915	list_del(&id_priv->list);
916
917	while (!list_empty(&id_priv->listen_list)) {
918		dev_id_priv = list_entry(id_priv->listen_list.next,
919					 struct rdma_id_private, listen_list);
920		/* sync with device removal to avoid duplicate destruction */
921		list_del_init(&dev_id_priv->list);
922		list_del(&dev_id_priv->listen_list);
923		mutex_unlock(&lock);
924
925		rdma_destroy_id(&dev_id_priv->id);
926		mutex_lock(&lock);
927	}
928	mutex_unlock(&lock);
929}
930
931static void cma_cancel_operation(struct rdma_id_private *id_priv,
932				 enum cma_state state)
933{
934	switch (state) {
935	case CMA_ADDR_QUERY:
936		rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
937		break;
938	case CMA_ROUTE_QUERY:
939		cma_cancel_route(id_priv);
940		break;
941	case CMA_LISTEN:
942		if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)
943				&& !id_priv->cma_dev)
944			cma_cancel_listens(id_priv);
945		break;
946	default:
947		break;
948	}
949}
950
951static void cma_release_port(struct rdma_id_private *id_priv)
952{
953	struct rdma_bind_list *bind_list = id_priv->bind_list;
954
955	if (!bind_list)
956		return;
957
958	mutex_lock(&lock);
959	hlist_del(&id_priv->node);
960	if (hlist_empty(&bind_list->owners)) {
961		idr_remove(bind_list->ps, bind_list->port);
962		kfree(bind_list);
963	}
964	mutex_unlock(&lock);
965	if (id_priv->sock)
966		sock_release(id_priv->sock);
967}
968
969static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
970{
971	struct cma_multicast *mc;
972
973	while (!list_empty(&id_priv->mc_list)) {
974		mc = container_of(id_priv->mc_list.next,
975				  struct cma_multicast, list);
976		list_del(&mc->list);
977		switch (rdma_port_get_link_layer(id_priv->cma_dev->device, id_priv->id.port_num)) {
978		case IB_LINK_LAYER_INFINIBAND:
979			ib_sa_free_multicast(mc->multicast.ib);
980			kfree(mc);
981			break;
982		case IB_LINK_LAYER_ETHERNET:
983			kref_put(&mc->mcref, release_mc);
984			break;
985		default:
986			break;
987		}
988	}
989}
990
991void rdma_destroy_id(struct rdma_cm_id *id)
992{
993	struct rdma_id_private *id_priv;
994	enum cma_state state;
995
996	id_priv = container_of(id, struct rdma_id_private, id);
997	state = cma_exch(id_priv, CMA_DESTROYING);
998	cma_cancel_operation(id_priv, state);
999
1000	mutex_lock(&lock);
1001	if (id_priv->cma_dev) {
1002		mutex_unlock(&lock);
1003		switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
1004		case RDMA_TRANSPORT_IB:
1005			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
1006				ib_destroy_cm_id(id_priv->cm_id.ib);
1007			break;
1008		case RDMA_TRANSPORT_IWARP:
1009			if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
1010				iw_destroy_cm_id(id_priv->cm_id.iw);
1011			break;
1012		default:
1013			break;
1014		}
1015		cma_leave_mc_groups(id_priv);
1016		mutex_lock(&lock);
1017		cma_detach_from_dev(id_priv);
1018	}
1019	mutex_unlock(&lock);
1020
1021	cma_release_port(id_priv);
1022	cma_deref_id(id_priv);
1023	wait_for_completion(&id_priv->comp);
1024
1025	if (id_priv->internal_id)
1026		cma_deref_id(id_priv->id.context);
1027
1028	kfree(id_priv->id.route.path_rec);
1029	kfree(id_priv);
1030}
1031EXPORT_SYMBOL(rdma_destroy_id);
1032
1033static int cma_rep_recv(struct rdma_id_private *id_priv)
1034{
1035	int ret;
1036
1037	ret = cma_modify_qp_rtr(id_priv, NULL);
1038	if (ret)
1039		goto reject;
1040
1041	ret = cma_modify_qp_rts(id_priv, NULL);
1042	if (ret)
1043		goto reject;
1044
1045	ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
1046	if (ret)
1047		goto reject;
1048
1049	return 0;
1050reject:
1051	cma_modify_qp_err(id_priv);
1052	ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
1053		       NULL, 0, NULL, 0);
1054	return ret;
1055}
1056
1057static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
1058{
1059	if (id_priv->id.ps == RDMA_PS_SDP &&
1060	    sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
1061	    SDP_MAJ_VERSION)
1062		return -EINVAL;
1063
1064	return 0;
1065}
1066
1067static void cma_set_rep_event_data(struct rdma_cm_event *event,
1068				   struct ib_cm_rep_event_param *rep_data,
1069				   void *private_data)
1070{
1071	event->param.conn.private_data = private_data;
1072	event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
1073	event->param.conn.responder_resources = rep_data->responder_resources;
1074	event->param.conn.initiator_depth = rep_data->initiator_depth;
1075	event->param.conn.flow_control = rep_data->flow_control;
1076	event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
1077	event->param.conn.srq = rep_data->srq;
1078	event->param.conn.qp_num = rep_data->remote_qpn;
1079}
1080
1081static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1082{
1083	struct rdma_id_private *id_priv = cm_id->context;
1084	struct rdma_cm_event event;
1085	int ret = 0;
1086
1087	if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
1088		cma_disable_callback(id_priv, CMA_CONNECT)) ||
1089	    (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
1090		cma_disable_callback(id_priv, CMA_DISCONNECT)))
1091		return 0;
1092
1093	memset(&event, 0, sizeof event);
1094	switch (ib_event->event) {
1095	case IB_CM_REQ_ERROR:
1096	case IB_CM_REP_ERROR:
1097		event.event = RDMA_CM_EVENT_UNREACHABLE;
1098		event.status = -ETIMEDOUT;
1099		break;
1100	case IB_CM_REP_RECEIVED:
1101		event.status = cma_verify_rep(id_priv, ib_event->private_data);
1102		if (event.status)
1103			event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1104		else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
1105			event.status = cma_rep_recv(id_priv);
1106			event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
1107						     RDMA_CM_EVENT_ESTABLISHED;
1108		} else
1109			event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
1110		cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
1111				       ib_event->private_data);
1112		break;
1113	case IB_CM_RTU_RECEIVED:
1114	case IB_CM_USER_ESTABLISHED:
1115		event.event = RDMA_CM_EVENT_ESTABLISHED;
1116		break;
1117	case IB_CM_DREQ_ERROR:
1118		event.status = -ETIMEDOUT; /* fall through */
1119	case IB_CM_DREQ_RECEIVED:
1120	case IB_CM_DREP_RECEIVED:
1121		if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
1122			goto out;
1123		event.event = RDMA_CM_EVENT_DISCONNECTED;
1124		break;
1125	case IB_CM_TIMEWAIT_EXIT:
1126		event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT;
1127		break;
1128	case IB_CM_MRA_RECEIVED:
1129		/* ignore event */
1130		goto out;
1131	case IB_CM_REJ_RECEIVED:
1132		cma_modify_qp_err(id_priv);
1133		event.status = ib_event->param.rej_rcvd.reason;
1134		event.event = RDMA_CM_EVENT_REJECTED;
1135		event.param.conn.private_data = ib_event->private_data;
1136		event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
1137		break;
1138	default:
1139		printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
1140		       ib_event->event);
1141		goto out;
1142	}
1143
1144	ret = id_priv->id.event_handler(&id_priv->id, &event);
1145	if (ret) {
1146		/* Destroy the CM ID by returning a non-zero value. */
1147		id_priv->cm_id.ib = NULL;
1148		cma_exch(id_priv, CMA_DESTROYING);
1149		mutex_unlock(&id_priv->handler_mutex);
1150		rdma_destroy_id(&id_priv->id);
1151		return ret;
1152	}
1153out:
1154	mutex_unlock(&id_priv->handler_mutex);
1155	return ret;
1156}
1157
1158static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
1159					       struct ib_cm_event *ib_event)
1160{
1161	struct rdma_id_private *id_priv;
1162	struct rdma_cm_id *id;
1163	struct rdma_route *rt;
1164	union cma_ip_addr *src, *dst;
1165	__be16 port;
1166	u8 ip_ver;
1167	int ret;
1168
1169	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1170			     &ip_ver, &port, &src, &dst))
1171		goto err;
1172
1173	id = rdma_create_id(listen_id->event_handler, listen_id->context,
1174			    listen_id->ps);
1175	if (IS_ERR(id))
1176		goto err;
1177
1178	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1179			  ip_ver, port, src, dst);
1180
1181	rt = &id->route;
1182	rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
1183	rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,
1184			       GFP_KERNEL);
1185	if (!rt->path_rec)
1186		goto destroy_id;
1187
1188	rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
1189	if (rt->num_paths == 2)
1190		rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
1191
1192	if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
1193		rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
1194		rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
1195		ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey);
1196	} else {
1197		ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
1198					&rt->addr.dev_addr);
1199		if (ret)
1200			goto destroy_id;
1201	}
1202	rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
1203
1204	id_priv = container_of(id, struct rdma_id_private, id);
1205	id_priv->state = CMA_CONNECT;
1206	return id_priv;
1207
1208destroy_id:
1209	rdma_destroy_id(id);
1210err:
1211	return NULL;
1212}
1213
1214static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
1215					      struct ib_cm_event *ib_event)
1216{
1217	struct rdma_id_private *id_priv;
1218	struct rdma_cm_id *id;
1219	union cma_ip_addr *src, *dst;
1220	__be16 port;
1221	u8 ip_ver;
1222	int ret;
1223
1224	id = rdma_create_id(listen_id->event_handler, listen_id->context,
1225			    listen_id->ps);
1226	if (IS_ERR(id))
1227		return NULL;
1228
1229
1230	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1231			     &ip_ver, &port, &src, &dst))
1232		goto err;
1233
1234	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1235			  ip_ver, port, src, dst);
1236
1237	if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
1238		ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
1239					&id->route.addr.dev_addr);
1240		if (ret)
1241			goto err;
1242	}
1243
1244	id_priv = container_of(id, struct rdma_id_private, id);
1245	id_priv->state = CMA_CONNECT;
1246	return id_priv;
1247err:
1248	rdma_destroy_id(id);
1249	return NULL;
1250}
1251
1252static void cma_set_req_event_data(struct rdma_cm_event *event,
1253				   struct ib_cm_req_event_param *req_data,
1254				   void *private_data, int offset)
1255{
1256	event->param.conn.private_data = private_data + offset;
1257	event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
1258	event->param.conn.responder_resources = req_data->responder_resources;
1259	event->param.conn.initiator_depth = req_data->initiator_depth;
1260	event->param.conn.flow_control = req_data->flow_control;
1261	event->param.conn.retry_count = req_data->retry_count;
1262	event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
1263	event->param.conn.srq = req_data->srq;
1264	event->param.conn.qp_num = req_data->remote_qpn;
1265}
1266
1267static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1268{
1269	struct rdma_id_private *listen_id, *conn_id;
1270	struct rdma_cm_event event;
1271	int offset, ret;
1272
1273	listen_id = cm_id->context;
1274	if (cma_disable_callback(listen_id, CMA_LISTEN))
1275		return -ECONNABORTED;
1276
1277	memset(&event, 0, sizeof event);
1278	offset = cma_user_data_offset(listen_id->id.ps);
1279	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1280	if (cma_is_ud_ps(listen_id->id.ps)) {
1281		conn_id = cma_new_udp_id(&listen_id->id, ib_event);
1282		event.param.ud.private_data = ib_event->private_data + offset;
1283		event.param.ud.private_data_len =
1284				IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
1285	} else {
1286		conn_id = cma_new_conn_id(&listen_id->id, ib_event);
1287		cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
1288				       ib_event->private_data, offset);
1289	}
1290	if (!conn_id) {
1291		ret = -ENOMEM;
1292		goto out;
1293	}
1294
1295	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1296	mutex_lock(&lock);
1297	ret = cma_acquire_dev(conn_id);
1298	mutex_unlock(&lock);
1299	if (ret)
1300		goto release_conn_id;
1301
1302	conn_id->cm_id.ib = cm_id;
1303	cm_id->context = conn_id;
1304	cm_id->cm_handler = cma_ib_handler;
1305
1306	ret = conn_id->id.event_handler(&conn_id->id, &event);
1307	if (!ret) {
1308		/*
1309		 * Acquire mutex to prevent user executing rdma_destroy_id()
1310		 * while we're accessing the cm_id.
1311		 */
1312		mutex_lock(&lock);
1313		if (cma_comp(conn_id, CMA_CONNECT) &&
1314		    !cma_is_ud_ps(conn_id->id.ps))
1315			ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
1316		mutex_unlock(&lock);
1317		mutex_unlock(&conn_id->handler_mutex);
1318		goto out;
1319	}
1320
1321	/* Destroy the CM ID by returning a non-zero value. */
1322	conn_id->cm_id.ib = NULL;
1323
1324release_conn_id:
1325	cma_exch(conn_id, CMA_DESTROYING);
1326	mutex_unlock(&conn_id->handler_mutex);
1327	rdma_destroy_id(&conn_id->id);
1328
1329out:
1330	mutex_unlock(&listen_id->handler_mutex);
1331	return ret;
1332}
1333
1334static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
1335{
1336	return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr)));
1337}
1338
1339static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
1340				 struct ib_cm_compare_data *compare)
1341{
1342	struct cma_hdr *cma_data, *cma_mask;
1343	struct sdp_hh *sdp_data, *sdp_mask;
1344	__be32 ip4_addr;
1345#ifdef INET6
1346	struct in6_addr ip6_addr;
1347#endif
1348
1349	memset(compare, 0, sizeof *compare);
1350	cma_data = (void *) compare->data;
1351	cma_mask = (void *) compare->mask;
1352	sdp_data = (void *) compare->data;
1353	sdp_mask = (void *) compare->mask;
1354
1355	switch (addr->sa_family) {
1356	case AF_INET:
1357		ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
1358		if (ps == RDMA_PS_SDP) {
1359			sdp_set_ip_ver(sdp_data, 4);
1360			sdp_set_ip_ver(sdp_mask, 0xF);
1361			sdp_data->dst_addr.ip4.addr = ip4_addr;
1362			sdp_mask->dst_addr.ip4.addr = htonl(~0);
1363		} else {
1364			cma_set_ip_ver(cma_data, 4);
1365			cma_set_ip_ver(cma_mask, 0xF);
1366			cma_data->dst_addr.ip4.addr = ip4_addr;
1367			cma_mask->dst_addr.ip4.addr = htonl(~0);
1368		}
1369		break;
1370#ifdef INET6
1371	case AF_INET6:
1372		ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
1373		if (ps == RDMA_PS_SDP) {
1374			sdp_set_ip_ver(sdp_data, 6);
1375			sdp_set_ip_ver(sdp_mask, 0xF);
1376			sdp_data->dst_addr.ip6 = ip6_addr;
1377			memset(&sdp_mask->dst_addr.ip6, 0xFF,
1378			       sizeof sdp_mask->dst_addr.ip6);
1379		} else {
1380			cma_set_ip_ver(cma_data, 6);
1381			cma_set_ip_ver(cma_mask, 0xF);
1382			cma_data->dst_addr.ip6 = ip6_addr;
1383			memset(&cma_mask->dst_addr.ip6, 0xFF,
1384			       sizeof cma_mask->dst_addr.ip6);
1385		}
1386		break;
1387#endif
1388	default:
1389		break;
1390	}
1391}
1392
1393static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1394{
1395	struct rdma_id_private *id_priv = iw_id->context;
1396	struct rdma_cm_event event;
1397	struct sockaddr_in *sin;
1398	int ret = 0;
1399
1400	if (cma_disable_callback(id_priv, CMA_CONNECT))
1401		return 0;
1402
1403	memset(&event, 0, sizeof event);
1404	switch (iw_event->event) {
1405	case IW_CM_EVENT_CLOSE:
1406		event.event = RDMA_CM_EVENT_DISCONNECTED;
1407		break;
1408	case IW_CM_EVENT_CONNECT_REPLY:
1409		sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1410		*sin = iw_event->local_addr;
1411		sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
1412		*sin = iw_event->remote_addr;
1413		switch ((int)iw_event->status) {
1414		case 0:
1415			event.event = RDMA_CM_EVENT_ESTABLISHED;
1416			break;
1417		case -ECONNRESET:
1418		case -ECONNREFUSED:
1419			event.event = RDMA_CM_EVENT_REJECTED;
1420			break;
1421		case -ETIMEDOUT:
1422			event.event = RDMA_CM_EVENT_UNREACHABLE;
1423			break;
1424		default:
1425			event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1426			break;
1427		}
1428		break;
1429	case IW_CM_EVENT_ESTABLISHED:
1430		event.event = RDMA_CM_EVENT_ESTABLISHED;
1431		break;
1432	default:
1433		BUG_ON(1);
1434	}
1435
1436	event.status = iw_event->status;
1437	event.param.conn.private_data = iw_event->private_data;
1438	event.param.conn.private_data_len = iw_event->private_data_len;
1439	ret = id_priv->id.event_handler(&id_priv->id, &event);
1440	if (ret) {
1441		/* Destroy the CM ID by returning a non-zero value. */
1442		id_priv->cm_id.iw = NULL;
1443		cma_exch(id_priv, CMA_DESTROYING);
1444		mutex_unlock(&id_priv->handler_mutex);
1445		rdma_destroy_id(&id_priv->id);
1446		return ret;
1447	}
1448
1449	mutex_unlock(&id_priv->handler_mutex);
1450	return ret;
1451}
1452
1453static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1454			       struct iw_cm_event *iw_event)
1455{
1456	struct rdma_cm_id *new_cm_id;
1457	struct rdma_id_private *listen_id, *conn_id;
1458	struct sockaddr_in *sin;
1459	struct net_device *dev = NULL;
1460	struct rdma_cm_event event;
1461	int ret;
1462	struct ib_device_attr attr;
1463
1464	listen_id = cm_id->context;
1465	if (cma_disable_callback(listen_id, CMA_LISTEN))
1466		return -ECONNABORTED;
1467
1468	/* Create a new RDMA id for the new IW CM ID */
1469	new_cm_id = rdma_create_id(listen_id->id.event_handler,
1470				   listen_id->id.context,
1471				   RDMA_PS_TCP);
1472	if (IS_ERR(new_cm_id)) {
1473		ret = -ENOMEM;
1474		goto out;
1475	}
1476	conn_id = container_of(new_cm_id, struct rdma_id_private, id);
1477	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1478	conn_id->state = CMA_CONNECT;
1479
1480	dev = ip_dev_find(NULL, iw_event->local_addr.sin_addr.s_addr);
1481	if (!dev) {
1482		ret = -EADDRNOTAVAIL;
1483		mutex_unlock(&conn_id->handler_mutex);
1484		rdma_destroy_id(new_cm_id);
1485		goto out;
1486	}
1487	ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
1488	if (ret) {
1489		mutex_unlock(&conn_id->handler_mutex);
1490		rdma_destroy_id(new_cm_id);
1491		goto out;
1492	}
1493
1494	mutex_lock(&lock);
1495	ret = cma_acquire_dev(conn_id);
1496	mutex_unlock(&lock);
1497	if (ret) {
1498		mutex_unlock(&conn_id->handler_mutex);
1499		rdma_destroy_id(new_cm_id);
1500		goto out;
1501	}
1502
1503	conn_id->cm_id.iw = cm_id;
1504	cm_id->context = conn_id;
1505	cm_id->cm_handler = cma_iw_handler;
1506
1507	sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
1508	*sin = iw_event->local_addr;
1509	sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
1510	*sin = iw_event->remote_addr;
1511
1512	ret = ib_query_device(conn_id->id.device, &attr);
1513	if (ret) {
1514		mutex_unlock(&conn_id->handler_mutex);
1515		rdma_destroy_id(new_cm_id);
1516		goto out;
1517	}
1518
1519	memset(&event, 0, sizeof event);
1520	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1521	event.param.conn.private_data = iw_event->private_data;
1522	event.param.conn.private_data_len = iw_event->private_data_len;
1523	event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;
1524	event.param.conn.responder_resources = attr.max_qp_rd_atom;
1525	ret = conn_id->id.event_handler(&conn_id->id, &event);
1526	if (ret) {
1527		/* User wants to destroy the CM ID */
1528		conn_id->cm_id.iw = NULL;
1529		cma_exch(conn_id, CMA_DESTROYING);
1530		mutex_unlock(&conn_id->handler_mutex);
1531		rdma_destroy_id(&conn_id->id);
1532		goto out;
1533	}
1534
1535	mutex_unlock(&conn_id->handler_mutex);
1536
1537out:
1538	if (dev)
1539		dev_put(dev);
1540	mutex_unlock(&listen_id->handler_mutex);
1541	return ret;
1542}
1543
1544static int cma_ib_listen(struct rdma_id_private *id_priv)
1545{
1546	struct ib_cm_compare_data compare_data;
1547	struct sockaddr *addr;
1548	__be64 svc_id;
1549	int ret;
1550
1551	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
1552					    id_priv);
1553	if (IS_ERR(id_priv->cm_id.ib))
1554		return PTR_ERR(id_priv->cm_id.ib);
1555
1556	addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
1557	svc_id = cma_get_service_id(id_priv->id.ps, addr);
1558	if (cma_any_addr(addr))
1559		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
1560	else {
1561		cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
1562		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
1563	}
1564
1565	if (ret) {
1566		ib_destroy_cm_id(id_priv->cm_id.ib);
1567		id_priv->cm_id.ib = NULL;
1568	}
1569
1570	return ret;
1571}
1572
1573static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
1574{
1575	int ret;
1576	struct sockaddr_in *sin;
1577
1578	id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
1579					    id_priv->sock,
1580					    iw_conn_req_handler,
1581					    id_priv);
1582	if (IS_ERR(id_priv->cm_id.iw))
1583		return PTR_ERR(id_priv->cm_id.iw);
1584
1585	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1586	id_priv->cm_id.iw->local_addr = *sin;
1587
1588	ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
1589
1590	if (ret) {
1591		iw_destroy_cm_id(id_priv->cm_id.iw);
1592		id_priv->cm_id.iw = NULL;
1593	}
1594
1595	return ret;
1596}
1597
1598static int cma_listen_handler(struct rdma_cm_id *id,
1599			      struct rdma_cm_event *event)
1600{
1601	struct rdma_id_private *id_priv = id->context;
1602
1603	id->context = id_priv->id.context;
1604	id->event_handler = id_priv->id.event_handler;
1605	return id_priv->id.event_handler(id, event);
1606}
1607
1608static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1609			      struct cma_device *cma_dev)
1610{
1611	struct rdma_id_private *dev_id_priv;
1612	struct rdma_cm_id *id;
1613	int ret;
1614
1615	id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
1616	if (IS_ERR(id))
1617		return;
1618
1619	dev_id_priv = container_of(id, struct rdma_id_private, id);
1620
1621	dev_id_priv->state = CMA_ADDR_BOUND;
1622	dev_id_priv->sock = id_priv->sock;
1623	memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
1624	       ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
1625
1626	cma_attach_to_dev(dev_id_priv, cma_dev);
1627	list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
1628	atomic_inc(&id_priv->refcount);
1629	dev_id_priv->internal_id = 1;
1630
1631	ret = rdma_listen(id, id_priv->backlog);
1632	if (ret)
1633		printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
1634		       "listening on device %s\n", ret, cma_dev->device->name);
1635}
1636
1637static void cma_listen_on_all(struct rdma_id_private *id_priv)
1638{
1639	struct cma_device *cma_dev;
1640
1641	mutex_lock(&lock);
1642	list_add_tail(&id_priv->list, &listen_any_list);
1643	list_for_each_entry(cma_dev, &dev_list, list)
1644		cma_listen_on_dev(id_priv, cma_dev);
1645	mutex_unlock(&lock);
1646}
1647
1648int rdma_listen(struct rdma_cm_id *id, int backlog)
1649{
1650	struct rdma_id_private *id_priv;
1651	int ret;
1652
1653	id_priv = container_of(id, struct rdma_id_private, id);
1654	if (id_priv->state == CMA_IDLE) {
1655		((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
1656		ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
1657		if (ret)
1658			return ret;
1659	}
1660
1661	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
1662		return -EINVAL;
1663
1664	id_priv->backlog = backlog;
1665	if (id->device) {
1666		switch (rdma_node_get_transport(id->device->node_type)) {
1667		case RDMA_TRANSPORT_IB:
1668			ret = cma_ib_listen(id_priv);
1669			if (ret)
1670				goto err;
1671			break;
1672		case RDMA_TRANSPORT_IWARP:
1673			ret = cma_iw_listen(id_priv, backlog);
1674			if (ret)
1675				goto err;
1676			break;
1677		default:
1678			ret = -ENOSYS;
1679			goto err;
1680		}
1681	} else
1682		cma_listen_on_all(id_priv);
1683
1684	return 0;
1685err:
1686	id_priv->backlog = 0;
1687	cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
1688	return ret;
1689}
1690EXPORT_SYMBOL(rdma_listen);
1691
1692void rdma_set_service_type(struct rdma_cm_id *id, int tos)
1693{
1694	struct rdma_id_private *id_priv;
1695
1696	id_priv = container_of(id, struct rdma_id_private, id);
1697	id_priv->tos = (u8) tos;
1698}
1699EXPORT_SYMBOL(rdma_set_service_type);
1700
1701static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
1702			      void *context)
1703{
1704	struct cma_work *work = context;
1705	struct rdma_route *route;
1706
1707	route = &work->id->id.route;
1708
1709	if (!status) {
1710		route->num_paths = 1;
1711		*route->path_rec = *path_rec;
1712	} else {
1713		work->old_state = CMA_ROUTE_QUERY;
1714		work->new_state = CMA_ADDR_RESOLVED;
1715		work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
1716		work->event.status = status;
1717	}
1718
1719	queue_work(cma_wq, &work->work);
1720}
1721
1722static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1723			      struct cma_work *work)
1724{
1725	struct rdma_addr *addr = &id_priv->id.route.addr;
1726	struct ib_sa_path_rec path_rec;
1727	ib_sa_comp_mask comp_mask;
1728	struct sockaddr_in6 *sin6;
1729
1730	memset(&path_rec, 0, sizeof path_rec);
1731	rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
1732	rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
1733	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
1734	path_rec.numb_path = 1;
1735	path_rec.reversible = 1;
1736	path_rec.service_id = cma_get_service_id(id_priv->id.ps,
1737							(struct sockaddr *) &addr->dst_addr);
1738
1739	comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
1740		    IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
1741		    IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
1742
1743	if (addr->src_addr.ss_family == AF_INET) {
1744		path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
1745		comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
1746	} else {
1747		sin6 = (struct sockaddr_in6 *) &addr->src_addr;
1748		path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
1749		comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
1750	}
1751
1752	if (tavor_quirk) {
1753		path_rec.mtu_selector = IB_SA_LT;
1754		path_rec.mtu = IB_MTU_2048;
1755	}
1756
1757	id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
1758					       id_priv->id.port_num, &path_rec,
1759					       comp_mask, timeout_ms,
1760					       GFP_KERNEL, cma_query_handler,
1761					       work, &id_priv->query);
1762
1763	return (id_priv->query_id < 0) ? id_priv->query_id : 0;
1764}
1765
1766static void cma_work_handler(struct work_struct *_work)
1767{
1768	struct cma_work *work = container_of(_work, struct cma_work, work);
1769	struct rdma_id_private *id_priv = work->id;
1770	int destroy = 0;
1771
1772	mutex_lock(&id_priv->handler_mutex);
1773	if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
1774		goto out;
1775
1776	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1777		cma_exch(id_priv, CMA_DESTROYING);
1778		destroy = 1;
1779	}
1780out:
1781	mutex_unlock(&id_priv->handler_mutex);
1782	cma_deref_id(id_priv);
1783	if (destroy)
1784		rdma_destroy_id(&id_priv->id);
1785	kfree(work);
1786}
1787
1788static void cma_ndev_work_handler(struct work_struct *_work)
1789{
1790	struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
1791	struct rdma_id_private *id_priv = work->id;
1792	int destroy = 0;
1793
1794	mutex_lock(&id_priv->handler_mutex);
1795	if (id_priv->state == CMA_DESTROYING ||
1796	    id_priv->state == CMA_DEVICE_REMOVAL)
1797		goto out;
1798
1799	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1800		cma_exch(id_priv, CMA_DESTROYING);
1801		destroy = 1;
1802	}
1803
1804out:
1805	mutex_unlock(&id_priv->handler_mutex);
1806	cma_deref_id(id_priv);
1807	if (destroy)
1808		rdma_destroy_id(&id_priv->id);
1809	kfree(work);
1810}
1811
1812static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
1813{
1814	struct rdma_route *route = &id_priv->id.route;
1815	struct cma_work *work;
1816	int ret;
1817
1818	work = kzalloc(sizeof *work, GFP_KERNEL);
1819	if (!work)
1820		return -ENOMEM;
1821
1822	work->id = id_priv;
1823	INIT_WORK(&work->work, cma_work_handler);
1824	work->old_state = CMA_ROUTE_QUERY;
1825	work->new_state = CMA_ROUTE_RESOLVED;
1826	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1827
1828	route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
1829	if (!route->path_rec) {
1830		ret = -ENOMEM;
1831		goto err1;
1832	}
1833
1834	ret = cma_query_ib_route(id_priv, timeout_ms, work);
1835	if (ret)
1836		goto err2;
1837
1838	return 0;
1839err2:
1840	kfree(route->path_rec);
1841	route->path_rec = NULL;
1842err1:
1843	kfree(work);
1844	return ret;
1845}
1846
1847int rdma_set_ib_paths(struct rdma_cm_id *id,
1848		      struct ib_sa_path_rec *path_rec, int num_paths)
1849{
1850	struct rdma_id_private *id_priv;
1851	int ret;
1852
1853	id_priv = container_of(id, struct rdma_id_private, id);
1854	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
1855		return -EINVAL;
1856
1857	id->route.path_rec = kmalloc(sizeof *path_rec * num_paths, GFP_KERNEL);
1858	if (!id->route.path_rec) {
1859		ret = -ENOMEM;
1860		goto err;
1861	}
1862
1863	memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths);
1864	return 0;
1865err:
1866	cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
1867	return ret;
1868}
1869EXPORT_SYMBOL(rdma_set_ib_paths);
1870
1871static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
1872{
1873	struct cma_work *work;
1874
1875	work = kzalloc(sizeof *work, GFP_KERNEL);
1876	if (!work)
1877		return -ENOMEM;
1878
1879	work->id = id_priv;
1880	INIT_WORK(&work->work, cma_work_handler);
1881	work->old_state = CMA_ROUTE_QUERY;
1882	work->new_state = CMA_ROUTE_RESOLVED;
1883	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1884	queue_work(cma_wq, &work->work);
1885	return 0;
1886}
1887
1888static u8 tos_to_sl(u8 tos)
1889{
1890	return def_prec2sl & 7;
1891}
1892
1893static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
1894{
1895	struct rdma_route *route = &id_priv->id.route;
1896	struct rdma_addr *addr = &route->addr;
1897	struct cma_work *work;
1898	int ret;
1899	struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr;
1900	struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr;
1901	struct net_device *ndev = NULL;
1902	u16 vid;
1903
1904	if (src_addr->sin_family != dst_addr->sin_family)
1905		return -EINVAL;
1906
1907	work = kzalloc(sizeof *work, GFP_KERNEL);
1908	if (!work)
1909		return -ENOMEM;
1910
1911	work->id = id_priv;
1912	INIT_WORK(&work->work, cma_work_handler);
1913
1914	route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL);
1915	if (!route->path_rec) {
1916		ret = -ENOMEM;
1917		goto err1;
1918	}
1919
1920	route->num_paths = 1;
1921
1922	if (addr->dev_addr.bound_dev_if)
1923		ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
1924	if (!ndev) {
1925		ret = -ENODEV;
1926		goto err2;
1927	}
1928
1929	vid = rdma_vlan_dev_vlan_id(ndev);
1930
1931	iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid);
1932	iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid);
1933
1934	route->path_rec->hop_limit = 1;
1935	route->path_rec->reversible = 1;
1936	route->path_rec->pkey = cpu_to_be16(0xffff);
1937	route->path_rec->mtu_selector = IB_SA_EQ;
1938	route->path_rec->sl = tos_to_sl(id_priv->tos);
1939
1940#ifdef __linux__
1941	route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
1942#else
1943	route->path_rec->mtu = iboe_get_mtu(ndev->if_mtu);
1944#endif
1945	route->path_rec->rate_selector = IB_SA_EQ;
1946	route->path_rec->rate = iboe_get_rate(ndev);
1947	dev_put(ndev);
1948	route->path_rec->packet_life_time_selector = IB_SA_EQ;
1949	route->path_rec->packet_life_time = IBOE_PACKET_LIFETIME;
1950	if (!route->path_rec->mtu) {
1951		ret = -EINVAL;
1952		goto err2;
1953	}
1954
1955	work->old_state = CMA_ROUTE_QUERY;
1956	work->new_state = CMA_ROUTE_RESOLVED;
1957	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1958	work->event.status = 0;
1959
1960	queue_work(cma_wq, &work->work);
1961
1962	return 0;
1963
1964err2:
1965	kfree(route->path_rec);
1966	route->path_rec = NULL;
1967err1:
1968	kfree(work);
1969	return ret;
1970}
1971
1972int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
1973{
1974	struct rdma_id_private *id_priv;
1975	int ret;
1976
1977	id_priv = container_of(id, struct rdma_id_private, id);
1978	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
1979		return -EINVAL;
1980
1981	atomic_inc(&id_priv->refcount);
1982	switch (rdma_node_get_transport(id->device->node_type)) {
1983	case RDMA_TRANSPORT_IB:
1984		switch (rdma_port_get_link_layer(id->device, id->port_num)) {
1985		case IB_LINK_LAYER_INFINIBAND:
1986			ret = cma_resolve_ib_route(id_priv, timeout_ms);
1987			break;
1988		case IB_LINK_LAYER_ETHERNET:
1989			ret = cma_resolve_iboe_route(id_priv);
1990			break;
1991		default:
1992			ret = -ENOSYS;
1993		}
1994		break;
1995	case RDMA_TRANSPORT_IWARP:
1996		ret = cma_resolve_iw_route(id_priv, timeout_ms);
1997		break;
1998	default:
1999		ret = -ENOSYS;
2000		break;
2001	}
2002	if (ret)
2003		goto err;
2004
2005	return 0;
2006err:
2007	cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
2008	cma_deref_id(id_priv);
2009	return ret;
2010}
2011EXPORT_SYMBOL(rdma_resolve_route);
2012
2013static int cma_bind_loopback(struct rdma_id_private *id_priv)
2014{
2015	struct cma_device *cma_dev;
2016	struct ib_port_attr port_attr;
2017	union ib_gid gid;
2018	u16 pkey;
2019	int ret;
2020	u8 p;
2021
2022	mutex_lock(&lock);
2023	if (list_empty(&dev_list)) {
2024		ret = -ENODEV;
2025		goto out;
2026	}
2027	list_for_each_entry(cma_dev, &dev_list, list)
2028		for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
2029			if (!ib_query_port(cma_dev->device, p, &port_attr) &&
2030			    port_attr.state == IB_PORT_ACTIVE)
2031				goto port_found;
2032
2033	p = 1;
2034	cma_dev = list_entry(dev_list.next, struct cma_device, list);
2035
2036port_found:
2037	ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
2038	if (ret)
2039		goto out;
2040
2041	ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
2042	if (ret)
2043		goto out;
2044
2045	id_priv->id.route.addr.dev_addr.dev_type =
2046		(rdma_port_get_link_layer(cma_dev->device, p) == IB_LINK_LAYER_INFINIBAND) ?
2047		ARPHRD_INFINIBAND : ARPHRD_ETHER;
2048
2049	rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
2050	ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
2051	id_priv->id.port_num = p;
2052	cma_attach_to_dev(id_priv, cma_dev);
2053out:
2054	mutex_unlock(&lock);
2055	return ret;
2056}
2057
2058static void addr_handler(int status, struct sockaddr *src_addr,
2059			 struct rdma_dev_addr *dev_addr, void *context)
2060{
2061	struct rdma_id_private *id_priv = context;
2062	struct rdma_cm_event event;
2063
2064	memset(&event, 0, sizeof event);
2065	mutex_lock(&id_priv->handler_mutex);
2066
2067	/*
2068	 * Grab mutex to block rdma_destroy_id() from removing the device while
2069	 * we're trying to acquire it.
2070	 */
2071	mutex_lock(&lock);
2072	if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
2073		mutex_unlock(&lock);
2074		goto out;
2075	}
2076
2077	if (!status && !id_priv->cma_dev)
2078		status = cma_acquire_dev(id_priv);
2079	mutex_unlock(&lock);
2080
2081	if (status) {
2082		if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
2083			goto out;
2084		event.event = RDMA_CM_EVENT_ADDR_ERROR;
2085		event.status = status;
2086	} else {
2087		memcpy(&id_priv->id.route.addr.src_addr, src_addr,
2088		       ip_addr_size(src_addr));
2089		event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2090	}
2091
2092	if (id_priv->id.event_handler(&id_priv->id, &event)) {
2093		cma_exch(id_priv, CMA_DESTROYING);
2094		mutex_unlock(&id_priv->handler_mutex);
2095		cma_deref_id(id_priv);
2096		rdma_destroy_id(&id_priv->id);
2097		return;
2098	}
2099out:
2100	mutex_unlock(&id_priv->handler_mutex);
2101	cma_deref_id(id_priv);
2102}
2103
2104static int cma_resolve_loopback(struct rdma_id_private *id_priv)
2105{
2106	struct cma_work *work;
2107	struct sockaddr *src, *dst;
2108	union ib_gid gid;
2109	int ret;
2110
2111	work = kzalloc(sizeof *work, GFP_KERNEL);
2112	if (!work)
2113		return -ENOMEM;
2114
2115	if (!id_priv->cma_dev) {
2116		ret = cma_bind_loopback(id_priv);
2117		if (ret)
2118			goto err;
2119	}
2120
2121	rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
2122	rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
2123
2124	src = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
2125	if (cma_zero_addr(src)) {
2126		dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
2127		if ((src->sa_family = dst->sa_family) == AF_INET) {
2128			((struct sockaddr_in *) src)->sin_addr.s_addr =
2129				((struct sockaddr_in *) dst)->sin_addr.s_addr;
2130		} else {
2131			ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr,
2132				       &((struct sockaddr_in6 *) dst)->sin6_addr);
2133		}
2134	}
2135
2136	work->id = id_priv;
2137	INIT_WORK(&work->work, cma_work_handler);
2138	work->old_state = CMA_ADDR_QUERY;
2139	work->new_state = CMA_ADDR_RESOLVED;
2140	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2141	queue_work(cma_wq, &work->work);
2142	return 0;
2143err:
2144	kfree(work);
2145	return ret;
2146}
2147
2148static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2149			 struct sockaddr *dst_addr)
2150{
2151	if (!src_addr || !src_addr->sa_family) {
2152		src_addr = (struct sockaddr *) &id->route.addr.src_addr;
2153		if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) {
2154			((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
2155				((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
2156		}
2157	}
2158	if (!cma_any_addr(src_addr))
2159		return rdma_bind_addr(id, src_addr);
2160	else {
2161		struct sockaddr_in addr_in;
2162
2163        	memset(&addr_in, 0, sizeof addr_in);
2164        	addr_in.sin_family = dst_addr->sa_family;
2165        	addr_in.sin_len = sizeof addr_in;
2166        	return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
2167	}
2168}
2169
2170int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2171		      struct sockaddr *dst_addr, int timeout_ms)
2172{
2173	struct rdma_id_private *id_priv;
2174	int ret;
2175
2176	id_priv = container_of(id, struct rdma_id_private, id);
2177	if (id_priv->state == CMA_IDLE) {
2178		ret = cma_bind_addr(id, src_addr, dst_addr);
2179		if (ret)
2180			return ret;
2181	}
2182
2183	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
2184		return -EINVAL;
2185
2186	atomic_inc(&id_priv->refcount);
2187	memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
2188	if (cma_any_addr(dst_addr))
2189		ret = cma_resolve_loopback(id_priv);
2190	else
2191		ret = rdma_resolve_ip(&addr_client, (struct sockaddr *) &id->route.addr.src_addr,
2192				      dst_addr, &id->route.addr.dev_addr,
2193				      timeout_ms, addr_handler, id_priv);
2194	if (ret)
2195		goto err;
2196
2197	return 0;
2198err:
2199	cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
2200	cma_deref_id(id_priv);
2201	return ret;
2202}
2203EXPORT_SYMBOL(rdma_resolve_addr);
2204
2205static void cma_bind_port(struct rdma_bind_list *bind_list,
2206			  struct rdma_id_private *id_priv)
2207{
2208	struct sockaddr_in *sin;
2209
2210	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
2211	sin->sin_port = htons(bind_list->port);
2212	id_priv->bind_list = bind_list;
2213	hlist_add_head(&id_priv->node, &bind_list->owners);
2214}
2215
2216static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
2217			  unsigned short snum)
2218{
2219	struct rdma_bind_list *bind_list;
2220	int port, ret;
2221
2222	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
2223	if (!bind_list)
2224		return -ENOMEM;
2225
2226	do {
2227		ret = idr_get_new_above(ps, bind_list, snum, &port);
2228	} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
2229
2230	if (ret)
2231		goto err1;
2232
2233	if (port != snum) {
2234		ret = -EADDRNOTAVAIL;
2235		goto err2;
2236	}
2237
2238	bind_list->ps = ps;
2239	bind_list->port = (unsigned short) port;
2240	cma_bind_port(bind_list, id_priv);
2241	return 0;
2242err2:
2243	idr_remove(ps, port);
2244err1:
2245	kfree(bind_list);
2246	return ret;
2247}
2248
2249static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
2250{
2251#if defined(INET)
2252	struct rdma_bind_list *bind_list;
2253	int port, ret, low, high;
2254
2255	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
2256	if (!bind_list)
2257		return -ENOMEM;
2258
2259retry:
2260	/* FIXME: add proper port randomization per like inet_csk_get_port */
2261	do {
2262		ret = idr_get_new_above(ps, bind_list, next_port, &port);
2263	} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
2264
2265	if (ret)
2266		goto err1;
2267
2268	inet_get_local_port_range(&low, &high);
2269	if (port > high) {
2270		if (next_port != low) {
2271			idr_remove(ps, port);
2272			next_port = low;
2273			goto retry;
2274		}
2275		ret = -EADDRNOTAVAIL;
2276		goto err2;
2277	}
2278
2279	if (port == high)
2280		next_port = low;
2281	else
2282		next_port = port + 1;
2283
2284	bind_list->ps = ps;
2285	bind_list->port = (unsigned short) port;
2286	cma_bind_port(bind_list, id_priv);
2287	return 0;
2288err2:
2289	idr_remove(ps, port);
2290err1:
2291	kfree(bind_list);
2292	return ret;
2293#else
2294	return -ENOSPC;
2295#endif
2296}
2297
2298static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
2299{
2300	struct rdma_id_private *cur_id;
2301	struct sockaddr_in *sin, *cur_sin;
2302	struct rdma_bind_list *bind_list;
2303	struct hlist_node *node;
2304	unsigned short snum;
2305
2306	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
2307	snum = ntohs(sin->sin_port);
2308#ifdef __linux__
2309	if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
2310		return -EACCES;
2311#endif
2312
2313	bind_list = idr_find(ps, snum);
2314	if (!bind_list)
2315		return cma_alloc_port(ps, id_priv, snum);
2316
2317	/*
2318	 * We don't support binding to any address if anyone is bound to
2319	 * a specific address on the same port.
2320	 */
2321	if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr))
2322		return -EADDRNOTAVAIL;
2323
2324	hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
2325		if (cma_any_addr((struct sockaddr *) &cur_id->id.route.addr.src_addr))
2326			return -EADDRNOTAVAIL;
2327
2328		cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
2329		if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
2330			return -EADDRINUSE;
2331	}
2332
2333	cma_bind_port(bind_list, id_priv);
2334	return 0;
2335}
2336
2337static int cma_get_tcp_port(struct rdma_id_private *id_priv)
2338{
2339	int ret;
2340	int size;
2341	struct socket *sock;
2342
2343	ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
2344	if (ret)
2345		return ret;
2346#ifdef __linux__
2347	ret = sock->ops->bind(sock,
2348			(struct sockaddr *) &id_priv->id.route.addr.src_addr,
2349			ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
2350#else
2351	ret = -sobind(sock,
2352			(struct sockaddr *)&id_priv->id.route.addr.src_addr,
2353			curthread);
2354#endif
2355	if (ret) {
2356		sock_release(sock);
2357		return ret;
2358	}
2359
2360	size = ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr);
2361	ret = sock_getname(sock,
2362			(struct sockaddr *) &id_priv->id.route.addr.src_addr,
2363			&size, 0);
2364	if (ret) {
2365		sock_release(sock);
2366		return ret;
2367	}
2368
2369	id_priv->sock = sock;
2370	return 0;
2371}
2372
2373static int cma_get_port(struct rdma_id_private *id_priv)
2374{
2375	struct idr *ps;
2376	int ret;
2377
2378	switch (id_priv->id.ps) {
2379	case RDMA_PS_SDP:
2380		ps = &sdp_ps;
2381		break;
2382	case RDMA_PS_TCP:
2383		ps = &tcp_ps;
2384		if (unify_tcp_port_space) {
2385			ret = cma_get_tcp_port(id_priv);
2386			if (ret)
2387				goto out;
2388		}
2389		break;
2390	case RDMA_PS_UDP:
2391		ps = &udp_ps;
2392		break;
2393	case RDMA_PS_IPOIB:
2394		ps = &ipoib_ps;
2395		break;
2396	default:
2397		return -EPROTONOSUPPORT;
2398	}
2399
2400	mutex_lock(&lock);
2401	if (cma_any_port((struct sockaddr *) &id_priv->id.route.addr.src_addr))
2402		ret = cma_alloc_any_port(ps, id_priv);
2403	else
2404		ret = cma_use_port(ps, id_priv);
2405	mutex_unlock(&lock);
2406out:
2407	return ret;
2408}
2409
2410static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
2411			       struct sockaddr *addr)
2412{
2413#if defined(INET6)
2414	struct sockaddr_in6 *sin6;
2415
2416	if (addr->sa_family != AF_INET6)
2417		return 0;
2418
2419	sin6 = (struct sockaddr_in6 *) addr;
2420#ifdef __linux__
2421	if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
2422#else
2423	if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) &&
2424#endif
2425	    !sin6->sin6_scope_id)
2426			return -EINVAL;
2427
2428	dev_addr->bound_dev_if = sin6->sin6_scope_id;
2429#endif
2430	return 0;
2431}
2432
2433int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2434{
2435	struct rdma_id_private *id_priv;
2436	int ret;
2437
2438	if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
2439		return -EAFNOSUPPORT;
2440
2441	id_priv = container_of(id, struct rdma_id_private, id);
2442	if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
2443		return -EINVAL;
2444
2445	ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
2446	if (ret)
2447		goto err1;
2448
2449	if (!cma_any_addr(addr)) {
2450		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
2451		if (ret)
2452			goto err1;
2453
2454		mutex_lock(&lock);
2455		ret = cma_acquire_dev(id_priv);
2456		mutex_unlock(&lock);
2457		if (ret)
2458			goto err1;
2459	}
2460
2461	memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
2462	ret = cma_get_port(id_priv);
2463	if (ret)
2464		goto err2;
2465
2466	return 0;
2467err2:
2468	if (id_priv->cma_dev) {
2469		mutex_lock(&lock);
2470		cma_detach_from_dev(id_priv);
2471		mutex_unlock(&lock);
2472	}
2473err1:
2474	cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
2475	return ret;
2476}
2477EXPORT_SYMBOL(rdma_bind_addr);
2478
2479static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
2480			  struct rdma_route *route)
2481{
2482	struct cma_hdr *cma_hdr;
2483	struct sdp_hh *sdp_hdr;
2484
2485	if (route->addr.src_addr.ss_family == AF_INET) {
2486		struct sockaddr_in *src4, *dst4;
2487
2488		src4 = (struct sockaddr_in *) &route->addr.src_addr;
2489		dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
2490
2491		switch (ps) {
2492		case RDMA_PS_SDP:
2493			sdp_hdr = hdr;
2494			if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
2495				return -EINVAL;
2496			sdp_set_ip_ver(sdp_hdr, 4);
2497			sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2498			sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2499			sdp_hdr->port = src4->sin_port;
2500			break;
2501		default:
2502			cma_hdr = hdr;
2503			cma_hdr->cma_version = CMA_VERSION;
2504			cma_set_ip_ver(cma_hdr, 4);
2505			cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2506			cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2507			cma_hdr->port = src4->sin_port;
2508			break;
2509		}
2510	} else {
2511		struct sockaddr_in6 *src6, *dst6;
2512
2513		src6 = (struct sockaddr_in6 *) &route->addr.src_addr;
2514		dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr;
2515
2516		switch (ps) {
2517		case RDMA_PS_SDP:
2518			sdp_hdr = hdr;
2519			if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
2520				return -EINVAL;
2521			sdp_set_ip_ver(sdp_hdr, 6);
2522			sdp_hdr->src_addr.ip6 = src6->sin6_addr;
2523			sdp_hdr->dst_addr.ip6 = dst6->sin6_addr;
2524			sdp_hdr->port = src6->sin6_port;
2525			break;
2526		default:
2527			cma_hdr = hdr;
2528			cma_hdr->cma_version = CMA_VERSION;
2529			cma_set_ip_ver(cma_hdr, 6);
2530			cma_hdr->src_addr.ip6 = src6->sin6_addr;
2531			cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
2532			cma_hdr->port = src6->sin6_port;
2533			break;
2534		}
2535	}
2536	return 0;
2537}
2538
2539static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
2540				struct ib_cm_event *ib_event)
2541{
2542	struct rdma_id_private *id_priv = cm_id->context;
2543	struct rdma_cm_event event;
2544	struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
2545	int ret = 0;
2546
2547	if (cma_disable_callback(id_priv, CMA_CONNECT))
2548		return 0;
2549
2550	memset(&event, 0, sizeof event);
2551	switch (ib_event->event) {
2552	case IB_CM_SIDR_REQ_ERROR:
2553		event.event = RDMA_CM_EVENT_UNREACHABLE;
2554		event.status = -ETIMEDOUT;
2555		break;
2556	case IB_CM_SIDR_REP_RECEIVED:
2557		event.param.ud.private_data = ib_event->private_data;
2558		event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
2559		if (rep->status != IB_SIDR_SUCCESS) {
2560			event.event = RDMA_CM_EVENT_UNREACHABLE;
2561			event.status = ib_event->param.sidr_rep_rcvd.status;
2562			break;
2563		}
2564		ret = cma_set_qkey(id_priv);
2565		if (ret) {
2566			event.event = RDMA_CM_EVENT_ADDR_ERROR;
2567			event.status = -EINVAL;
2568			break;
2569		}
2570		if (id_priv->qkey != rep->qkey) {
2571			event.event = RDMA_CM_EVENT_UNREACHABLE;
2572			event.status = -EINVAL;
2573			break;
2574		}
2575		ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
2576				     id_priv->id.route.path_rec,
2577				     &event.param.ud.ah_attr);
2578		event.param.ud.qp_num = rep->qpn;
2579		event.param.ud.qkey = rep->qkey;
2580		event.event = RDMA_CM_EVENT_ESTABLISHED;
2581		event.status = 0;
2582		break;
2583	default:
2584		printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
2585		       ib_event->event);
2586		goto out;
2587	}
2588
2589	ret = id_priv->id.event_handler(&id_priv->id, &event);
2590	if (ret) {
2591		/* Destroy the CM ID by returning a non-zero value. */
2592		id_priv->cm_id.ib = NULL;
2593		cma_exch(id_priv, CMA_DESTROYING);
2594		mutex_unlock(&id_priv->handler_mutex);
2595		rdma_destroy_id(&id_priv->id);
2596		return ret;
2597	}
2598out:
2599	mutex_unlock(&id_priv->handler_mutex);
2600	return ret;
2601}
2602
2603static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
2604			      struct rdma_conn_param *conn_param)
2605{
2606	struct ib_cm_sidr_req_param req;
2607	struct rdma_route *route;
2608	int ret;
2609
2610	req.private_data_len = sizeof(struct cma_hdr) +
2611			       conn_param->private_data_len;
2612	req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2613	if (!req.private_data)
2614		return -ENOMEM;
2615
2616	if (conn_param->private_data && conn_param->private_data_len)
2617		memcpy((void *) req.private_data + sizeof(struct cma_hdr),
2618		       conn_param->private_data, conn_param->private_data_len);
2619
2620	route = &id_priv->id.route;
2621	ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route);
2622	if (ret)
2623		goto out;
2624
2625	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device,
2626					    cma_sidr_rep_handler, id_priv);
2627	if (IS_ERR(id_priv->cm_id.ib)) {
2628		ret = PTR_ERR(id_priv->cm_id.ib);
2629		goto out;
2630	}
2631
2632	req.path = route->path_rec;
2633	req.service_id = cma_get_service_id(id_priv->id.ps,
2634					    (struct sockaddr *) &route->addr.dst_addr);
2635	req.timeout_ms = 1 << (cma_response_timeout - 8);
2636	req.max_cm_retries = CMA_MAX_CM_RETRIES;
2637
2638	ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
2639	if (ret) {
2640		ib_destroy_cm_id(id_priv->cm_id.ib);
2641		id_priv->cm_id.ib = NULL;
2642	}
2643out:
2644	kfree(req.private_data);
2645	return ret;
2646}
2647
2648static int cma_connect_ib(struct rdma_id_private *id_priv,
2649			  struct rdma_conn_param *conn_param)
2650{
2651	struct ib_cm_req_param req;
2652	struct rdma_route *route;
2653	void *private_data;
2654	int offset, ret;
2655
2656	memset(&req, 0, sizeof req);
2657	offset = cma_user_data_offset(id_priv->id.ps);
2658	req.private_data_len = offset + conn_param->private_data_len;
2659	private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2660	if (!private_data)
2661		return -ENOMEM;
2662
2663	if (conn_param->private_data && conn_param->private_data_len)
2664		memcpy(private_data + offset, conn_param->private_data,
2665		       conn_param->private_data_len);
2666
2667	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
2668					    id_priv);
2669	if (IS_ERR(id_priv->cm_id.ib)) {
2670		ret = PTR_ERR(id_priv->cm_id.ib);
2671		goto out;
2672	}
2673
2674	route = &id_priv->id.route;
2675	ret = cma_format_hdr(private_data, id_priv->id.ps, route);
2676	if (ret)
2677		goto out;
2678	req.private_data = private_data;
2679
2680	req.primary_path = &route->path_rec[0];
2681	if (route->num_paths == 2)
2682		req.alternate_path = &route->path_rec[1];
2683
2684	req.service_id = cma_get_service_id(id_priv->id.ps,
2685					    (struct sockaddr *) &route->addr.dst_addr);
2686	req.qp_num = id_priv->qp_num;
2687	req.qp_type = IB_QPT_RC;
2688	req.starting_psn = id_priv->seq_num;
2689	req.responder_resources = conn_param->responder_resources;
2690	req.initiator_depth = conn_param->initiator_depth;
2691	req.flow_control = conn_param->flow_control;
2692	req.retry_count = conn_param->retry_count;
2693	req.rnr_retry_count = conn_param->rnr_retry_count;
2694       req.remote_cm_response_timeout = cma_response_timeout;
2695       req.local_cm_response_timeout = cma_response_timeout;
2696	req.max_cm_retries = CMA_MAX_CM_RETRIES;
2697	req.srq = id_priv->srq ? 1 : 0;
2698
2699	ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
2700out:
2701	if (ret && !IS_ERR(id_priv->cm_id.ib)) {
2702		ib_destroy_cm_id(id_priv->cm_id.ib);
2703		id_priv->cm_id.ib = NULL;
2704	}
2705
2706	kfree(private_data);
2707	return ret;
2708}
2709
2710static int cma_connect_iw(struct rdma_id_private *id_priv,
2711			  struct rdma_conn_param *conn_param)
2712{
2713	struct iw_cm_id *cm_id;
2714	struct sockaddr_in* sin;
2715	int ret;
2716	struct iw_cm_conn_param iw_param;
2717
2718	cm_id = iw_create_cm_id(id_priv->id.device, id_priv->sock,
2719				cma_iw_handler, id_priv);
2720	if (IS_ERR(cm_id)) {
2721		ret = PTR_ERR(cm_id);
2722		goto out;
2723	}
2724
2725	id_priv->cm_id.iw = cm_id;
2726
2727	sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
2728	cm_id->local_addr = *sin;
2729
2730	sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
2731	cm_id->remote_addr = *sin;
2732
2733	ret = cma_modify_qp_rtr(id_priv, conn_param);
2734	if (ret)
2735		goto out;
2736
2737	iw_param.ord = conn_param->initiator_depth;
2738	iw_param.ird = conn_param->responder_resources;
2739	iw_param.private_data = conn_param->private_data;
2740	iw_param.private_data_len = conn_param->private_data_len;
2741	if (id_priv->id.qp)
2742		iw_param.qpn = id_priv->qp_num;
2743	else
2744		iw_param.qpn = conn_param->qp_num;
2745	ret = iw_cm_connect(cm_id, &iw_param);
2746out:
2747	if (ret && !IS_ERR(cm_id)) {
2748		iw_destroy_cm_id(cm_id);
2749		id_priv->cm_id.iw = NULL;
2750	}
2751	return ret;
2752}
2753
2754int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2755{
2756	struct rdma_id_private *id_priv;
2757	int ret;
2758
2759	id_priv = container_of(id, struct rdma_id_private, id);
2760	if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
2761		return -EINVAL;
2762
2763	if (!id->qp) {
2764		id_priv->qp_num = conn_param->qp_num;
2765		id_priv->srq = conn_param->srq;
2766	}
2767
2768	switch (rdma_node_get_transport(id->device->node_type)) {
2769	case RDMA_TRANSPORT_IB:
2770		if (cma_is_ud_ps(id->ps))
2771			ret = cma_resolve_ib_udp(id_priv, conn_param);
2772		else
2773			ret = cma_connect_ib(id_priv, conn_param);
2774		break;
2775	case RDMA_TRANSPORT_IWARP:
2776		ret = cma_connect_iw(id_priv, conn_param);
2777		break;
2778	default:
2779		ret = -ENOSYS;
2780		break;
2781	}
2782	if (ret)
2783		goto err;
2784
2785	return 0;
2786err:
2787	cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
2788	return ret;
2789}
2790EXPORT_SYMBOL(rdma_connect);
2791
2792static int cma_accept_ib(struct rdma_id_private *id_priv,
2793			 struct rdma_conn_param *conn_param)
2794{
2795	struct ib_cm_rep_param rep;
2796	int ret;
2797
2798	ret = cma_modify_qp_rtr(id_priv, conn_param);
2799	if (ret)
2800		goto out;
2801
2802	ret = cma_modify_qp_rts(id_priv, conn_param);
2803	if (ret)
2804		goto out;
2805
2806	memset(&rep, 0, sizeof rep);
2807	rep.qp_num = id_priv->qp_num;
2808	rep.starting_psn = id_priv->seq_num;
2809	rep.private_data = conn_param->private_data;
2810	rep.private_data_len = conn_param->private_data_len;
2811	rep.responder_resources = conn_param->responder_resources;
2812	rep.initiator_depth = conn_param->initiator_depth;
2813	rep.failover_accepted = 0;
2814	rep.flow_control = conn_param->flow_control;
2815	rep.rnr_retry_count = conn_param->rnr_retry_count;
2816	rep.srq = id_priv->srq ? 1 : 0;
2817
2818	ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
2819out:
2820	return ret;
2821}
2822
2823static int cma_accept_iw(struct rdma_id_private *id_priv,
2824		  struct rdma_conn_param *conn_param)
2825{
2826	struct iw_cm_conn_param iw_param;
2827	int ret;
2828
2829	ret = cma_modify_qp_rtr(id_priv, conn_param);
2830	if (ret)
2831		return ret;
2832
2833	iw_param.ord = conn_param->initiator_depth;
2834	iw_param.ird = conn_param->responder_resources;
2835	iw_param.private_data = conn_param->private_data;
2836	iw_param.private_data_len = conn_param->private_data_len;
2837	if (id_priv->id.qp) {
2838		iw_param.qpn = id_priv->qp_num;
2839	} else
2840		iw_param.qpn = conn_param->qp_num;
2841
2842	return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
2843}
2844
2845static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
2846			     enum ib_cm_sidr_status status,
2847			     const void *private_data, int private_data_len)
2848{
2849	struct ib_cm_sidr_rep_param rep;
2850	int ret;
2851
2852	memset(&rep, 0, sizeof rep);
2853	rep.status = status;
2854	if (status == IB_SIDR_SUCCESS) {
2855		ret = cma_set_qkey(id_priv);
2856		if (ret)
2857			return ret;
2858		rep.qp_num = id_priv->qp_num;
2859		rep.qkey = id_priv->qkey;
2860	}
2861	rep.private_data = private_data;
2862	rep.private_data_len = private_data_len;
2863
2864	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
2865}
2866
2867int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2868{
2869	struct rdma_id_private *id_priv;
2870	int ret;
2871
2872	id_priv = container_of(id, struct rdma_id_private, id);
2873	if (!cma_comp(id_priv, CMA_CONNECT))
2874		return -EINVAL;
2875
2876	if (!id->qp && conn_param) {
2877		id_priv->qp_num = conn_param->qp_num;
2878		id_priv->srq = conn_param->srq;
2879	}
2880
2881	switch (rdma_node_get_transport(id->device->node_type)) {
2882	case RDMA_TRANSPORT_IB:
2883		if (cma_is_ud_ps(id->ps))
2884			ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2885						conn_param->private_data,
2886						conn_param->private_data_len);
2887		else if (conn_param)
2888			ret = cma_accept_ib(id_priv, conn_param);
2889		else
2890			ret = cma_rep_recv(id_priv);
2891		break;
2892	case RDMA_TRANSPORT_IWARP:
2893		ret = cma_accept_iw(id_priv, conn_param);
2894		break;
2895	default:
2896		ret = -ENOSYS;
2897		break;
2898	}
2899
2900	if (ret)
2901		goto reject;
2902
2903	return 0;
2904reject:
2905	cma_modify_qp_err(id_priv);
2906	rdma_reject(id, NULL, 0);
2907	return ret;
2908}
2909EXPORT_SYMBOL(rdma_accept);
2910
2911int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
2912{
2913	struct rdma_id_private *id_priv;
2914	int ret;
2915
2916	id_priv = container_of(id, struct rdma_id_private, id);
2917	if (!cma_has_cm_dev(id_priv))
2918		return -EINVAL;
2919
2920	switch (id->device->node_type) {
2921	case RDMA_NODE_IB_CA:
2922		ret = ib_cm_notify(id_priv->cm_id.ib, event);
2923		break;
2924	default:
2925		ret = 0;
2926		break;
2927	}
2928	return ret;
2929}
2930EXPORT_SYMBOL(rdma_notify);
2931
2932int rdma_reject(struct rdma_cm_id *id, const void *private_data,
2933		u8 private_data_len)
2934{
2935	struct rdma_id_private *id_priv;
2936	int ret;
2937
2938	id_priv = container_of(id, struct rdma_id_private, id);
2939	if (!cma_has_cm_dev(id_priv))
2940		return -EINVAL;
2941
2942	switch (rdma_node_get_transport(id->device->node_type)) {
2943	case RDMA_TRANSPORT_IB:
2944		if (cma_is_ud_ps(id->ps))
2945			ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
2946						private_data, private_data_len);
2947		else
2948			ret = ib_send_cm_rej(id_priv->cm_id.ib,
2949					     IB_CM_REJ_CONSUMER_DEFINED, NULL,
2950					     0, private_data, private_data_len);
2951		break;
2952	case RDMA_TRANSPORT_IWARP:
2953		ret = iw_cm_reject(id_priv->cm_id.iw,
2954				   private_data, private_data_len);
2955		break;
2956	default:
2957		ret = -ENOSYS;
2958		break;
2959	}
2960	return ret;
2961}
2962EXPORT_SYMBOL(rdma_reject);
2963
2964int rdma_disconnect(struct rdma_cm_id *id)
2965{
2966	struct rdma_id_private *id_priv;
2967	int ret;
2968
2969	id_priv = container_of(id, struct rdma_id_private, id);
2970	if (!cma_has_cm_dev(id_priv))
2971		return -EINVAL;
2972
2973	switch (rdma_node_get_transport(id->device->node_type)) {
2974	case RDMA_TRANSPORT_IB:
2975		ret = cma_modify_qp_err(id_priv);
2976		if (ret)
2977			goto out;
2978		/* Initiate or respond to a disconnect. */
2979		if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
2980			ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
2981		break;
2982	case RDMA_TRANSPORT_IWARP:
2983		ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
2984		break;
2985	default:
2986		ret = -EINVAL;
2987		break;
2988	}
2989out:
2990	return ret;
2991}
2992EXPORT_SYMBOL(rdma_disconnect);
2993
2994static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2995{
2996	struct rdma_id_private *id_priv;
2997	struct cma_multicast *mc = multicast->context;
2998	struct rdma_cm_event event;
2999	int ret;
3000
3001	id_priv = mc->id_priv;
3002	if (cma_disable_callback(id_priv, CMA_ADDR_BOUND) &&
3003	    cma_disable_callback(id_priv, CMA_ADDR_RESOLVED))
3004		return 0;
3005
3006	mutex_lock(&id_priv->qp_mutex);
3007	if (!status && id_priv->id.qp)
3008		status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
3009					 multicast->rec.mlid);
3010	mutex_unlock(&id_priv->qp_mutex);
3011
3012	memset(&event, 0, sizeof event);
3013	event.status = status;
3014	event.param.ud.private_data = mc->context;
3015	if (!status) {
3016		event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
3017		ib_init_ah_from_mcmember(id_priv->id.device,
3018					 id_priv->id.port_num, &multicast->rec,
3019					 &event.param.ud.ah_attr);
3020		event.param.ud.qp_num = 0xFFFFFF;
3021		event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
3022	} else
3023		event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
3024
3025	ret = id_priv->id.event_handler(&id_priv->id, &event);
3026	if (ret) {
3027		cma_exch(id_priv, CMA_DESTROYING);
3028		mutex_unlock(&id_priv->handler_mutex);
3029		rdma_destroy_id(&id_priv->id);
3030		return 0;
3031	}
3032
3033	mutex_unlock(&id_priv->handler_mutex);
3034	return 0;
3035}
3036
3037static void cma_set_mgid(struct rdma_id_private *id_priv,
3038			 struct sockaddr *addr, union ib_gid *mgid)
3039{
3040#if defined(INET) || defined(INET6)
3041	unsigned char mc_map[MAX_ADDR_LEN];
3042	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
3043#endif
3044#ifdef INET
3045	struct sockaddr_in *sin = (struct sockaddr_in *) addr;
3046#endif
3047#ifdef INET6
3048	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
3049#endif
3050
3051	if (cma_any_addr(addr)) {
3052		memset(mgid, 0, sizeof *mgid);
3053#ifdef INET6
3054	} else if ((addr->sa_family == AF_INET6) &&
3055		   ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
3056								 0xFF10A01B)) {
3057		/* IPv6 address is an SA assigned MGID. */
3058		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
3059	} else if (addr->sa_family == AF_INET6) {
3060		ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
3061		if (id_priv->id.ps == RDMA_PS_UDP)
3062			mc_map[7] = 0x01;	/* Use RDMA CM signature */
3063		*mgid = *(union ib_gid *) (mc_map + 4);
3064#endif
3065#ifdef INET
3066	} else {
3067		ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
3068		if (id_priv->id.ps == RDMA_PS_UDP)
3069			mc_map[7] = 0x01;	/* Use RDMA CM signature */
3070		*mgid = *(union ib_gid *) (mc_map + 4);
3071#endif
3072	}
3073}
3074
3075static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
3076				 struct cma_multicast *mc)
3077{
3078	struct ib_sa_mcmember_rec rec;
3079	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
3080	ib_sa_comp_mask comp_mask;
3081	int ret;
3082
3083	ib_addr_get_mgid(dev_addr, &rec.mgid);
3084	ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
3085				     &rec.mgid, &rec);
3086	if (ret)
3087		return ret;
3088
3089	cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
3090	if (id_priv->id.ps == RDMA_PS_UDP)
3091		rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
3092	rdma_addr_get_sgid(dev_addr, &rec.port_gid);
3093	rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
3094	rec.join_state = 1;
3095
3096	comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
3097		    IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
3098		    IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
3099		    IB_SA_MCMEMBER_REC_FLOW_LABEL |
3100		    IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
3101
3102	if (id_priv->id.ps == RDMA_PS_IPOIB)
3103		comp_mask |= IB_SA_MCMEMBER_REC_RATE |
3104			     IB_SA_MCMEMBER_REC_RATE_SELECTOR;
3105
3106	mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
3107						id_priv->id.port_num, &rec,
3108						comp_mask, GFP_KERNEL,
3109						cma_ib_mc_handler, mc);
3110	if (IS_ERR(mc->multicast.ib))
3111		return PTR_ERR(mc->multicast.ib);
3112
3113	return 0;
3114}
3115
3116
3117static void iboe_mcast_work_handler(struct work_struct *work)
3118{
3119	struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
3120	struct cma_multicast *mc = mw->mc;
3121	struct ib_sa_multicast *m = mc->multicast.ib;
3122
3123	mc->multicast.ib->context = mc;
3124	cma_ib_mc_handler(0, m);
3125	kref_put(&mc->mcref, release_mc);
3126	kfree(mw);
3127}
3128
3129static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
3130{
3131	struct sockaddr_in *sin = (struct sockaddr_in *)addr;
3132	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
3133
3134	if (cma_any_addr(addr)) {
3135		memset(mgid, 0, sizeof *mgid);
3136	} else if (addr->sa_family == AF_INET6)
3137		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
3138	else {
3139		mgid->raw[0] = 0xff;
3140		mgid->raw[1] = 0x0e;
3141		mgid->raw[2] = 0;
3142		mgid->raw[3] = 0;
3143		mgid->raw[4] = 0;
3144		mgid->raw[5] = 0;
3145		mgid->raw[6] = 0;
3146		mgid->raw[7] = 0;
3147		mgid->raw[8] = 0;
3148		mgid->raw[9] = 0;
3149		mgid->raw[10] = 0xff;
3150		mgid->raw[11] = 0xff;
3151		*(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr;
3152	}
3153}
3154
3155static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
3156				   struct cma_multicast *mc)
3157{
3158	struct iboe_mcast_work *work;
3159	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
3160	int err;
3161	struct sockaddr *addr = (struct sockaddr *)&mc->addr;
3162	struct net_device *ndev = NULL;
3163
3164	if (cma_zero_addr((struct sockaddr *)&mc->addr))
3165		return -EINVAL;
3166
3167	work = kzalloc(sizeof *work, GFP_KERNEL);
3168	if (!work)
3169		return -ENOMEM;
3170
3171	mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
3172	if (!mc->multicast.ib) {
3173		err = -ENOMEM;
3174		goto out1;
3175	}
3176
3177	cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid);
3178
3179	mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
3180	if (id_priv->id.ps == RDMA_PS_UDP)
3181		mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
3182
3183	if (dev_addr->bound_dev_if)
3184		ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
3185	if (!ndev) {
3186		err = -ENODEV;
3187		goto out2;
3188	}
3189
3190	mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
3191	mc->multicast.ib->rec.hop_limit = 1;
3192#ifdef __linux__
3193	mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
3194#else
3195	mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->if_mtu);
3196#endif
3197	dev_put(ndev);
3198	if (!mc->multicast.ib->rec.mtu) {
3199		err = -EINVAL;
3200		goto out2;
3201	}
3202	iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid);
3203	work->id = id_priv;
3204	work->mc = mc;
3205	INIT_WORK(&work->work, iboe_mcast_work_handler);
3206	kref_get(&mc->mcref);
3207	queue_work(cma_wq, &work->work);
3208
3209	return 0;
3210
3211out2:
3212	kfree(mc->multicast.ib);
3213out1:
3214	kfree(work);
3215	return err;
3216}
3217
3218int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
3219			void *context)
3220{
3221	struct rdma_id_private *id_priv;
3222	struct cma_multicast *mc;
3223	int ret;
3224
3225	id_priv = container_of(id, struct rdma_id_private, id);
3226	if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
3227	    !cma_comp(id_priv, CMA_ADDR_RESOLVED))
3228		return -EINVAL;
3229
3230	mc = kmalloc(sizeof *mc, GFP_KERNEL);
3231	if (!mc)
3232		return -ENOMEM;
3233
3234	memcpy(&mc->addr, addr, ip_addr_size(addr));
3235	mc->context = context;
3236	mc->id_priv = id_priv;
3237
3238	spin_lock(&id_priv->lock);
3239	list_add(&mc->list, &id_priv->mc_list);
3240	spin_unlock(&id_priv->lock);
3241
3242	switch (rdma_node_get_transport(id->device->node_type)) {
3243	case RDMA_TRANSPORT_IB:
3244		switch (rdma_port_get_link_layer(id->device, id->port_num)) {
3245		case IB_LINK_LAYER_INFINIBAND:
3246			ret = cma_join_ib_multicast(id_priv, mc);
3247			break;
3248		case IB_LINK_LAYER_ETHERNET:
3249			kref_init(&mc->mcref);
3250			ret = cma_iboe_join_multicast(id_priv, mc);
3251			break;
3252		default:
3253			ret = -EINVAL;
3254		}
3255		break;
3256	default:
3257		ret = -ENOSYS;
3258		break;
3259	}
3260
3261	if (ret) {
3262		spin_lock_irq(&id_priv->lock);
3263		list_del(&mc->list);
3264		spin_unlock_irq(&id_priv->lock);
3265		kfree(mc);
3266	}
3267
3268	return ret;
3269}
3270EXPORT_SYMBOL(rdma_join_multicast);
3271
3272void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
3273{
3274	struct rdma_id_private *id_priv;
3275	struct cma_multicast *mc;
3276
3277	id_priv = container_of(id, struct rdma_id_private, id);
3278	spin_lock_irq(&id_priv->lock);
3279	list_for_each_entry(mc, &id_priv->mc_list, list) {
3280		if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
3281			list_del(&mc->list);
3282			spin_unlock_irq(&id_priv->lock);
3283
3284			if (id->qp)
3285				ib_detach_mcast(id->qp,
3286						&mc->multicast.ib->rec.mgid,
3287						mc->multicast.ib->rec.mlid);
3288			if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) {
3289				switch (rdma_port_get_link_layer(id->device, id->port_num)) {
3290				case IB_LINK_LAYER_INFINIBAND:
3291					ib_sa_free_multicast(mc->multicast.ib);
3292					kfree(mc);
3293					break;
3294				case IB_LINK_LAYER_ETHERNET:
3295					kref_put(&mc->mcref, release_mc);
3296					break;
3297				default:
3298					break;
3299				}
3300			}
3301			return;
3302		}
3303	}
3304	spin_unlock_irq(&id_priv->lock);
3305}
3306EXPORT_SYMBOL(rdma_leave_multicast);
3307
3308static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
3309{
3310	struct rdma_dev_addr *dev_addr;
3311	struct cma_ndev_work *work;
3312
3313	dev_addr = &id_priv->id.route.addr.dev_addr;
3314
3315#ifdef __linux__
3316	if ((dev_addr->bound_dev_if == ndev->ifindex) &&
3317	    memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
3318		printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
3319		       ndev->name, &id_priv->id);
3320#else
3321	if ((dev_addr->bound_dev_if == ndev->if_index) &&
3322	    memcmp(dev_addr->src_dev_addr, IF_LLADDR(ndev), ndev->if_addrlen)) {
3323		printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
3324		       ndev->if_xname, &id_priv->id);
3325#endif
3326		work = kzalloc(sizeof *work, GFP_KERNEL);
3327		if (!work)
3328			return -ENOMEM;
3329
3330		INIT_WORK(&work->work, cma_ndev_work_handler);
3331		work->id = id_priv;
3332		work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
3333		atomic_inc(&id_priv->refcount);
3334		queue_work(cma_wq, &work->work);
3335	}
3336
3337	return 0;
3338}
3339
3340static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
3341			       void *ctx)
3342{
3343	struct net_device *ndev = (struct net_device *)ctx;
3344	struct cma_device *cma_dev;
3345	struct rdma_id_private *id_priv;
3346	int ret = NOTIFY_DONE;
3347
3348#ifdef __linux__
3349	if (dev_net(ndev) != &init_net)
3350		return NOTIFY_DONE;
3351
3352	if (event != NETDEV_BONDING_FAILOVER)
3353		return NOTIFY_DONE;
3354
3355	if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
3356		return NOTIFY_DONE;
3357#else
3358	if (event != NETDEV_DOWN && event != NETDEV_UNREGISTER)
3359		return NOTIFY_DONE;
3360#endif
3361
3362	mutex_lock(&lock);
3363	list_for_each_entry(cma_dev, &dev_list, list)
3364		list_for_each_entry(id_priv, &cma_dev->id_list, list) {
3365			ret = cma_netdev_change(ndev, id_priv);
3366			if (ret)
3367				goto out;
3368		}
3369
3370out:
3371	mutex_unlock(&lock);
3372	return ret;
3373}
3374
3375static struct notifier_block cma_nb = {
3376	.notifier_call = cma_netdev_callback
3377};
3378
3379static void cma_add_one(struct ib_device *device)
3380{
3381	struct cma_device *cma_dev;
3382	struct rdma_id_private *id_priv;
3383
3384	cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
3385	if (!cma_dev)
3386		return;
3387
3388	cma_dev->device = device;
3389
3390	init_completion(&cma_dev->comp);
3391	atomic_set(&cma_dev->refcount, 1);
3392	INIT_LIST_HEAD(&cma_dev->id_list);
3393	ib_set_client_data(device, &cma_client, cma_dev);
3394
3395	mutex_lock(&lock);
3396	list_add_tail(&cma_dev->list, &dev_list);
3397	list_for_each_entry(id_priv, &listen_any_list, list)
3398		cma_listen_on_dev(id_priv, cma_dev);
3399	mutex_unlock(&lock);
3400}
3401
3402static int cma_remove_id_dev(struct rdma_id_private *id_priv)
3403{
3404	struct rdma_cm_event event;
3405	enum cma_state state;
3406	int ret = 0;
3407
3408	/* Record that we want to remove the device */
3409	state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
3410	if (state == CMA_DESTROYING)
3411		return 0;
3412
3413	cma_cancel_operation(id_priv, state);
3414	mutex_lock(&id_priv->handler_mutex);
3415
3416	/* Check for destruction from another callback. */
3417	if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
3418		goto out;
3419
3420	memset(&event, 0, sizeof event);
3421	event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
3422	ret = id_priv->id.event_handler(&id_priv->id, &event);
3423out:
3424	mutex_unlock(&id_priv->handler_mutex);
3425	return ret;
3426}
3427
3428static void cma_process_remove(struct cma_device *cma_dev)
3429{
3430	struct rdma_id_private *id_priv;
3431	int ret;
3432
3433	mutex_lock(&lock);
3434	while (!list_empty(&cma_dev->id_list)) {
3435		id_priv = list_entry(cma_dev->id_list.next,
3436				     struct rdma_id_private, list);
3437
3438		list_del(&id_priv->listen_list);
3439		list_del_init(&id_priv->list);
3440		atomic_inc(&id_priv->refcount);
3441		mutex_unlock(&lock);
3442
3443		ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
3444		cma_deref_id(id_priv);
3445		if (ret)
3446			rdma_destroy_id(&id_priv->id);
3447
3448		mutex_lock(&lock);
3449	}
3450	mutex_unlock(&lock);
3451
3452	cma_deref_dev(cma_dev);
3453	wait_for_completion(&cma_dev->comp);
3454}
3455
3456static void cma_remove_one(struct ib_device *device)
3457{
3458	struct cma_device *cma_dev;
3459
3460	cma_dev = ib_get_client_data(device, &cma_client);
3461	if (!cma_dev)
3462		return;
3463
3464	mutex_lock(&lock);
3465	list_del(&cma_dev->list);
3466	mutex_unlock(&lock);
3467
3468	cma_process_remove(cma_dev);
3469	kfree(cma_dev);
3470}
3471
3472static int cma_init(void)
3473{
3474	int ret;
3475#if defined(INET)
3476	int low, high, remaining;
3477
3478	get_random_bytes(&next_port, sizeof next_port);
3479	inet_get_local_port_range(&low, &high);
3480	remaining = (high - low) + 1;
3481	next_port = ((unsigned int) next_port % remaining) + low;
3482#endif
3483
3484	cma_wq = create_singlethread_workqueue("rdma_cm");
3485	if (!cma_wq)
3486		return -ENOMEM;
3487
3488	ib_sa_register_client(&sa_client);
3489	rdma_addr_register_client(&addr_client);
3490	register_netdevice_notifier(&cma_nb);
3491
3492	ret = ib_register_client(&cma_client);
3493	if (ret)
3494		goto err;
3495	return 0;
3496
3497err:
3498	unregister_netdevice_notifier(&cma_nb);
3499	rdma_addr_unregister_client(&addr_client);
3500	ib_sa_unregister_client(&sa_client);
3501	destroy_workqueue(cma_wq);
3502	return ret;
3503}
3504
3505static void cma_cleanup(void)
3506{
3507	ib_unregister_client(&cma_client);
3508	unregister_netdevice_notifier(&cma_nb);
3509	rdma_addr_unregister_client(&addr_client);
3510	ib_sa_unregister_client(&sa_client);
3511	destroy_workqueue(cma_wq);
3512	idr_destroy(&sdp_ps);
3513	idr_destroy(&tcp_ps);
3514	idr_destroy(&udp_ps);
3515	idr_destroy(&ipoib_ps);
3516}
3517
3518module_init(cma_init);
3519module_exit(cma_cleanup);
3520