1/*
2 * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
5 * Copyright (c) 2009 HNR Consulting. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 *
35 */
36
37#define	LINUXKPI_PARAM_PREFIX ibcore_
38
39#include <linux/dma-mapping.h>
40#include <rdma/ib_cache.h>
41
42#include "mad_priv.h"
43#include "mad_rmpp.h"
44#include "smi.h"
45#include "agent.h"
46
47MODULE_LICENSE("Dual BSD/GPL");
48MODULE_DESCRIPTION("kernel IB MAD API");
49MODULE_AUTHOR("Hal Rosenstock");
50MODULE_AUTHOR("Sean Hefty");
51
52int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
53int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
54
55module_param_named(send_queue_size, mad_sendq_size, int, 0444);
56MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests");
57module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
58MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
59
60static struct kmem_cache *ib_mad_cache;
61
62static struct list_head ib_mad_port_list;
63static u32 ib_mad_client_id = 0;
64
65/* Port list lock */
66static spinlock_t ib_mad_port_list_lock;
67
68
69/* Forward declarations */
70static int method_in_use(struct ib_mad_mgmt_method_table **method,
71			 struct ib_mad_reg_req *mad_reg_req);
72static void remove_mad_reg_req(struct ib_mad_agent_private *priv);
73static struct ib_mad_agent_private *find_mad_agent(
74					struct ib_mad_port_private *port_priv,
75					struct ib_mad *mad);
76static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
77				    struct ib_mad_private *mad);
78static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv);
79static void timeout_sends(struct work_struct *work);
80static void local_completions(struct work_struct *work);
81static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
82			      struct ib_mad_agent_private *agent_priv,
83			      u8 mgmt_class);
84static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
85			   struct ib_mad_agent_private *agent_priv);
86
87/*
88 * Returns a ib_mad_port_private structure or NULL for a device/port
89 * Assumes ib_mad_port_list_lock is being held
90 */
91static inline struct ib_mad_port_private *
92__ib_get_mad_port(struct ib_device *device, int port_num)
93{
94	struct ib_mad_port_private *entry;
95
96	list_for_each_entry(entry, &ib_mad_port_list, port_list) {
97		if (entry->device == device && entry->port_num == port_num)
98			return entry;
99	}
100	return NULL;
101}
102
103/*
104 * Wrapper function to return a ib_mad_port_private structure or NULL
105 * for a device/port
106 */
107static inline struct ib_mad_port_private *
108ib_get_mad_port(struct ib_device *device, int port_num)
109{
110	struct ib_mad_port_private *entry;
111	unsigned long flags;
112
113	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
114	entry = __ib_get_mad_port(device, port_num);
115	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
116
117	return entry;
118}
119
120static inline u8 convert_mgmt_class(u8 mgmt_class)
121{
122	/* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */
123	return mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ?
124		0 : mgmt_class;
125}
126
127static int get_spl_qp_index(enum ib_qp_type qp_type)
128{
129	switch (qp_type)
130	{
131	case IB_QPT_SMI:
132		return 0;
133	case IB_QPT_GSI:
134		return 1;
135	default:
136		return -1;
137	}
138}
139
140static int vendor_class_index(u8 mgmt_class)
141{
142	return mgmt_class - IB_MGMT_CLASS_VENDOR_RANGE2_START;
143}
144
145static int is_vendor_class(u8 mgmt_class)
146{
147	if ((mgmt_class < IB_MGMT_CLASS_VENDOR_RANGE2_START) ||
148	    (mgmt_class > IB_MGMT_CLASS_VENDOR_RANGE2_END))
149		return 0;
150	return 1;
151}
152
153static int is_vendor_oui(char *oui)
154{
155	if (oui[0] || oui[1] || oui[2])
156		return 1;
157	return 0;
158}
159
160static int is_vendor_method_in_use(
161		struct ib_mad_mgmt_vendor_class *vendor_class,
162		struct ib_mad_reg_req *mad_reg_req)
163{
164	struct ib_mad_mgmt_method_table *method;
165	int i;
166
167	for (i = 0; i < MAX_MGMT_OUI; i++) {
168		if (!memcmp(vendor_class->oui[i], mad_reg_req->oui, 3)) {
169			method = vendor_class->method_table[i];
170			if (method) {
171				if (method_in_use(&method, mad_reg_req))
172					return 1;
173				else
174					break;
175			}
176		}
177	}
178	return 0;
179}
180
181int ib_response_mad(struct ib_mad *mad)
182{
183	return ((mad->mad_hdr.method & IB_MGMT_METHOD_RESP) ||
184		(mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) ||
185		((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_BM) &&
186		 (mad->mad_hdr.attr_mod & IB_BM_ATTR_MOD_RESP)));
187}
188EXPORT_SYMBOL(ib_response_mad);
189
190static void timeout_callback(unsigned long data)
191{
192	struct ib_mad_agent_private *mad_agent_priv =
193		(struct ib_mad_agent_private *) data;
194
195	queue_work(mad_agent_priv->qp_info->port_priv->wq,
196		   &mad_agent_priv->timeout_work);
197}
198
199/*
200 * ib_register_mad_agent - Register to send/receive MADs
201 */
202struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
203					   u8 port_num,
204					   enum ib_qp_type qp_type,
205					   struct ib_mad_reg_req *mad_reg_req,
206					   u8 rmpp_version,
207					   ib_mad_send_handler send_handler,
208					   ib_mad_recv_handler recv_handler,
209					   void *context)
210{
211	struct ib_mad_port_private *port_priv;
212	struct ib_mad_agent *ret = ERR_PTR(-EINVAL);
213	struct ib_mad_agent_private *mad_agent_priv;
214	struct ib_mad_reg_req *reg_req = NULL;
215	struct ib_mad_mgmt_class_table *class;
216	struct ib_mad_mgmt_vendor_class_table *vendor;
217	struct ib_mad_mgmt_vendor_class *vendor_class;
218	struct ib_mad_mgmt_method_table *method;
219	int ret2, qpn;
220	unsigned long flags;
221	u8 mgmt_class, vclass;
222
223	/* Validate parameters */
224	qpn = get_spl_qp_index(qp_type);
225	if (qpn == -1)
226		goto error1;
227
228	if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION)
229		goto error1;
230
231	/* Validate MAD registration request if supplied */
232	if (mad_reg_req) {
233		if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION)
234			goto error1;
235		if (!recv_handler)
236			goto error1;
237		if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
238			/*
239			 * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only
240			 * one in this range currently allowed
241			 */
242			if (mad_reg_req->mgmt_class !=
243			    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
244				goto error1;
245		} else if (mad_reg_req->mgmt_class == 0) {
246			/*
247			 * Class 0 is reserved in IBA and is used for
248			 * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
249			 */
250			goto error1;
251		} else if (is_vendor_class(mad_reg_req->mgmt_class)) {
252			/*
253			 * If class is in "new" vendor range,
254			 * ensure supplied OUI is not zero
255			 */
256			if (!is_vendor_oui(mad_reg_req->oui))
257				goto error1;
258		}
259		/* Make sure class supplied is consistent with RMPP */
260		if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
261			if (rmpp_version)
262				goto error1;
263		}
264		/* Make sure class supplied is consistent with QP type */
265		if (qp_type == IB_QPT_SMI) {
266			if ((mad_reg_req->mgmt_class !=
267					IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
268			    (mad_reg_req->mgmt_class !=
269					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE))
270				goto error1;
271		} else {
272			if ((mad_reg_req->mgmt_class ==
273					IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
274			    (mad_reg_req->mgmt_class ==
275					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE))
276				goto error1;
277		}
278	} else {
279		/* No registration request supplied */
280		if (!send_handler)
281			goto error1;
282	}
283
284	/* Validate device and port */
285	port_priv = ib_get_mad_port(device, port_num);
286	if (!port_priv) {
287		ret = ERR_PTR(-ENODEV);
288		goto error1;
289	}
290
291	/* Allocate structures */
292	mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL);
293	if (!mad_agent_priv) {
294		ret = ERR_PTR(-ENOMEM);
295		goto error1;
296	}
297
298	mad_agent_priv->agent.mr = ib_get_dma_mr(port_priv->qp_info[qpn].qp->pd,
299						 IB_ACCESS_LOCAL_WRITE);
300	if (IS_ERR(mad_agent_priv->agent.mr)) {
301		ret = ERR_PTR(-ENOMEM);
302		goto error2;
303	}
304
305	if (mad_reg_req) {
306		reg_req = kmalloc(sizeof *reg_req, GFP_KERNEL);
307		if (!reg_req) {
308			ret = ERR_PTR(-ENOMEM);
309			goto error3;
310		}
311		/* Make a copy of the MAD registration request */
312		memcpy(reg_req, mad_reg_req, sizeof *reg_req);
313	}
314
315	/* Now, fill in the various structures */
316	mad_agent_priv->qp_info = &port_priv->qp_info[qpn];
317	mad_agent_priv->reg_req = reg_req;
318	mad_agent_priv->agent.rmpp_version = rmpp_version;
319	mad_agent_priv->agent.device = device;
320	mad_agent_priv->agent.recv_handler = recv_handler;
321	mad_agent_priv->agent.send_handler = send_handler;
322	mad_agent_priv->agent.context = context;
323	mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp;
324	mad_agent_priv->agent.port_num = port_num;
325	spin_lock_init(&mad_agent_priv->lock);
326	INIT_LIST_HEAD(&mad_agent_priv->send_list);
327	INIT_LIST_HEAD(&mad_agent_priv->wait_list);
328	INIT_LIST_HEAD(&mad_agent_priv->done_list);
329	INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
330	INIT_WORK(&mad_agent_priv->timeout_work, timeout_sends);
331	setup_timer(&mad_agent_priv->timeout_timer, timeout_callback,
332		    (unsigned long) mad_agent_priv);
333	INIT_LIST_HEAD(&mad_agent_priv->local_list);
334	INIT_WORK(&mad_agent_priv->local_work, local_completions);
335	atomic_set(&mad_agent_priv->refcount, 1);
336	init_completion(&mad_agent_priv->comp);
337
338	spin_lock_irqsave(&port_priv->reg_lock, flags);
339	mad_agent_priv->agent.hi_tid = ++ib_mad_client_id;
340
341	/*
342	 * Make sure MAD registration (if supplied)
343	 * is non overlapping with any existing ones
344	 */
345	if (mad_reg_req) {
346		mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class);
347		if (!is_vendor_class(mgmt_class)) {
348			class = port_priv->version[mad_reg_req->
349						   mgmt_class_version].class;
350			if (class) {
351				method = class->method_table[mgmt_class];
352				if (method) {
353					if (method_in_use(&method,
354							   mad_reg_req))
355						goto error4;
356				}
357			}
358			ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv,
359						  mgmt_class);
360		} else {
361			/* "New" vendor class range */
362			vendor = port_priv->version[mad_reg_req->
363						    mgmt_class_version].vendor;
364			if (vendor) {
365				vclass = vendor_class_index(mgmt_class);
366				vendor_class = vendor->vendor_class[vclass];
367				if (vendor_class) {
368					if (is_vendor_method_in_use(
369							vendor_class,
370							mad_reg_req))
371						goto error4;
372				}
373			}
374			ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv);
375		}
376		if (ret2) {
377			ret = ERR_PTR(ret2);
378			goto error4;
379		}
380	}
381
382	/* Add mad agent into port's agent list */
383	list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list);
384	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
385
386	return &mad_agent_priv->agent;
387
388error4:
389	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
390	kfree(reg_req);
391error3:
392	ib_dereg_mr(mad_agent_priv->agent.mr);
393error2:
394	kfree(mad_agent_priv);
395error1:
396	return ret;
397}
398EXPORT_SYMBOL(ib_register_mad_agent);
399
400static inline int is_snooping_sends(int mad_snoop_flags)
401{
402	return (mad_snoop_flags &
403		(/*IB_MAD_SNOOP_POSTED_SENDS |
404		 IB_MAD_SNOOP_RMPP_SENDS |*/
405		 IB_MAD_SNOOP_SEND_COMPLETIONS /*|
406		 IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/));
407}
408
409static inline int is_snooping_recvs(int mad_snoop_flags)
410{
411	return (mad_snoop_flags &
412		(IB_MAD_SNOOP_RECVS /*|
413		 IB_MAD_SNOOP_RMPP_RECVS*/));
414}
415
416static int register_snoop_agent(struct ib_mad_qp_info *qp_info,
417				struct ib_mad_snoop_private *mad_snoop_priv)
418{
419	struct ib_mad_snoop_private **new_snoop_table;
420	unsigned long flags;
421	int i;
422
423	spin_lock_irqsave(&qp_info->snoop_lock, flags);
424	/* Check for empty slot in array. */
425	for (i = 0; i < qp_info->snoop_table_size; i++)
426		if (!qp_info->snoop_table[i])
427			break;
428
429	if (i == qp_info->snoop_table_size) {
430		/* Grow table. */
431		new_snoop_table = krealloc(qp_info->snoop_table,
432					   sizeof mad_snoop_priv *
433					   (qp_info->snoop_table_size + 1),
434					   GFP_ATOMIC);
435		if (!new_snoop_table) {
436			i = -ENOMEM;
437			goto out;
438		}
439
440		qp_info->snoop_table = new_snoop_table;
441		qp_info->snoop_table_size++;
442	}
443	qp_info->snoop_table[i] = mad_snoop_priv;
444	atomic_inc(&qp_info->snoop_count);
445out:
446	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
447	return i;
448}
449
450struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
451					   u8 port_num,
452					   enum ib_qp_type qp_type,
453					   int mad_snoop_flags,
454					   ib_mad_snoop_handler snoop_handler,
455					   ib_mad_recv_handler recv_handler,
456					   void *context)
457{
458	struct ib_mad_port_private *port_priv;
459	struct ib_mad_agent *ret;
460	struct ib_mad_snoop_private *mad_snoop_priv;
461	int qpn;
462
463	/* Validate parameters */
464	if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) ||
465	    (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) {
466		ret = ERR_PTR(-EINVAL);
467		goto error1;
468	}
469	qpn = get_spl_qp_index(qp_type);
470	if (qpn == -1) {
471		ret = ERR_PTR(-EINVAL);
472		goto error1;
473	}
474	port_priv = ib_get_mad_port(device, port_num);
475	if (!port_priv) {
476		ret = ERR_PTR(-ENODEV);
477		goto error1;
478	}
479	/* Allocate structures */
480	mad_snoop_priv = kzalloc(sizeof *mad_snoop_priv, GFP_KERNEL);
481	if (!mad_snoop_priv) {
482		ret = ERR_PTR(-ENOMEM);
483		goto error1;
484	}
485
486	/* Now, fill in the various structures */
487	mad_snoop_priv->qp_info = &port_priv->qp_info[qpn];
488	mad_snoop_priv->agent.device = device;
489	mad_snoop_priv->agent.recv_handler = recv_handler;
490	mad_snoop_priv->agent.snoop_handler = snoop_handler;
491	mad_snoop_priv->agent.context = context;
492	mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp;
493	mad_snoop_priv->agent.port_num = port_num;
494	mad_snoop_priv->mad_snoop_flags = mad_snoop_flags;
495	init_completion(&mad_snoop_priv->comp);
496	mad_snoop_priv->snoop_index = register_snoop_agent(
497						&port_priv->qp_info[qpn],
498						mad_snoop_priv);
499	if (mad_snoop_priv->snoop_index < 0) {
500		ret = ERR_PTR(mad_snoop_priv->snoop_index);
501		goto error2;
502	}
503
504	atomic_set(&mad_snoop_priv->refcount, 1);
505	return &mad_snoop_priv->agent;
506
507error2:
508	kfree(mad_snoop_priv);
509error1:
510	return ret;
511}
512EXPORT_SYMBOL(ib_register_mad_snoop);
513
514static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
515{
516	if (atomic_dec_and_test(&mad_agent_priv->refcount))
517		complete(&mad_agent_priv->comp);
518}
519
520static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv)
521{
522	if (atomic_dec_and_test(&mad_snoop_priv->refcount))
523		complete(&mad_snoop_priv->comp);
524}
525
526static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
527{
528	struct ib_mad_port_private *port_priv;
529	unsigned long flags;
530
531	/* Note that we could still be handling received MADs */
532
533	/*
534	 * Canceling all sends results in dropping received response
535	 * MADs, preventing us from queuing additional work
536	 */
537	cancel_mads(mad_agent_priv);
538	port_priv = mad_agent_priv->qp_info->port_priv;
539	del_timer_sync(&mad_agent_priv->timeout_timer);
540	cancel_work_sync(&mad_agent_priv->timeout_work);
541
542	spin_lock_irqsave(&port_priv->reg_lock, flags);
543	remove_mad_reg_req(mad_agent_priv);
544	list_del(&mad_agent_priv->agent_list);
545	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
546
547	flush_workqueue(port_priv->wq);
548	ib_cancel_rmpp_recvs(mad_agent_priv);
549
550	deref_mad_agent(mad_agent_priv);
551	wait_for_completion(&mad_agent_priv->comp);
552
553	kfree(mad_agent_priv->reg_req);
554	ib_dereg_mr(mad_agent_priv->agent.mr);
555	kfree(mad_agent_priv);
556}
557
558static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
559{
560	struct ib_mad_qp_info *qp_info;
561	unsigned long flags;
562
563	qp_info = mad_snoop_priv->qp_info;
564	spin_lock_irqsave(&qp_info->snoop_lock, flags);
565	qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL;
566	atomic_dec(&qp_info->snoop_count);
567	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
568
569	deref_snoop_agent(mad_snoop_priv);
570	wait_for_completion(&mad_snoop_priv->comp);
571
572	kfree(mad_snoop_priv);
573}
574
575/*
576 * ib_unregister_mad_agent - Unregisters a client from using MAD services
577 */
578int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
579{
580	struct ib_mad_agent_private *mad_agent_priv;
581	struct ib_mad_snoop_private *mad_snoop_priv;
582
583	/* If the TID is zero, the agent can only snoop. */
584	if (mad_agent->hi_tid) {
585		mad_agent_priv = container_of(mad_agent,
586					      struct ib_mad_agent_private,
587					      agent);
588		unregister_mad_agent(mad_agent_priv);
589	} else {
590		mad_snoop_priv = container_of(mad_agent,
591					      struct ib_mad_snoop_private,
592					      agent);
593		unregister_mad_snoop(mad_snoop_priv);
594	}
595	return 0;
596}
597EXPORT_SYMBOL(ib_unregister_mad_agent);
598
599static void dequeue_mad(struct ib_mad_list_head *mad_list)
600{
601	struct ib_mad_queue *mad_queue;
602	unsigned long flags;
603
604	BUG_ON(!mad_list->mad_queue);
605	mad_queue = mad_list->mad_queue;
606	spin_lock_irqsave(&mad_queue->lock, flags);
607	list_del(&mad_list->list);
608	mad_queue->count--;
609	spin_unlock_irqrestore(&mad_queue->lock, flags);
610}
611
612static void snoop_send(struct ib_mad_qp_info *qp_info,
613		       struct ib_mad_send_buf *send_buf,
614		       struct ib_mad_send_wc *mad_send_wc,
615		       int mad_snoop_flags)
616{
617	struct ib_mad_snoop_private *mad_snoop_priv;
618	unsigned long flags;
619	int i;
620
621	spin_lock_irqsave(&qp_info->snoop_lock, flags);
622	for (i = 0; i < qp_info->snoop_table_size; i++) {
623		mad_snoop_priv = qp_info->snoop_table[i];
624		if (!mad_snoop_priv ||
625		    !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
626			continue;
627
628		atomic_inc(&mad_snoop_priv->refcount);
629		spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
630		mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent,
631						    send_buf, mad_send_wc);
632		deref_snoop_agent(mad_snoop_priv);
633		spin_lock_irqsave(&qp_info->snoop_lock, flags);
634	}
635	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
636}
637
638static void snoop_recv(struct ib_mad_qp_info *qp_info,
639		       struct ib_mad_recv_wc *mad_recv_wc,
640		       int mad_snoop_flags)
641{
642	struct ib_mad_snoop_private *mad_snoop_priv;
643	unsigned long flags;
644	int i;
645
646	spin_lock_irqsave(&qp_info->snoop_lock, flags);
647	for (i = 0; i < qp_info->snoop_table_size; i++) {
648		mad_snoop_priv = qp_info->snoop_table[i];
649		if (!mad_snoop_priv ||
650		    !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
651			continue;
652
653		atomic_inc(&mad_snoop_priv->refcount);
654		spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
655		mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent,
656						   mad_recv_wc);
657		deref_snoop_agent(mad_snoop_priv);
658		spin_lock_irqsave(&qp_info->snoop_lock, flags);
659	}
660	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
661}
662
663static void build_smp_wc(struct ib_qp *qp,
664			 u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
665			 struct ib_wc *wc)
666{
667	memset(wc, 0, sizeof *wc);
668	wc->wr_id = wr_id;
669	wc->status = IB_WC_SUCCESS;
670	wc->opcode = IB_WC_RECV;
671	wc->pkey_index = pkey_index;
672	wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh);
673	wc->src_qp = IB_QP0;
674	wc->qp = qp;
675	wc->slid = slid;
676	wc->sl = 0;
677	wc->dlid_path_bits = 0;
678	wc->port_num = port_num;
679}
680
681/*
682 * Return 0 if SMP is to be sent
683 * Return 1 if SMP was consumed locally (whether or not solicited)
684 * Return < 0 if error
685 */
686static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
687				  struct ib_mad_send_wr_private *mad_send_wr)
688{
689	int ret = 0;
690	struct ib_smp *smp = mad_send_wr->send_buf.mad;
691	unsigned long flags;
692	struct ib_mad_local_private *local;
693	struct ib_mad_private *mad_priv;
694	struct ib_mad_port_private *port_priv;
695	struct ib_mad_agent_private *recv_mad_agent = NULL;
696	struct ib_device *device = mad_agent_priv->agent.device;
697	u8 port_num;
698	struct ib_wc mad_wc;
699	struct ib_send_wr *send_wr = &mad_send_wr->send_wr;
700
701	if (device->node_type == RDMA_NODE_IB_SWITCH)
702		port_num = send_wr->wr.ud.port_num;
703	else
704		port_num = mad_agent_priv->agent.port_num;
705
706	/*
707	 * Directed route handling starts if the initial LID routed part of
708	 * a request or the ending LID routed part of a response is empty.
709	 * If we are at the start of the LID routed part, don't update the
710	 * hop_ptr or hop_cnt.  See section 14.2.2, Vol 1 IB spec.
711	 */
712	if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) !=
713	     IB_LID_PERMISSIVE)
714		goto out;
715	if (smi_handle_dr_smp_send(smp, device->node_type, port_num) ==
716	     IB_SMI_DISCARD) {
717		ret = -EINVAL;
718		printk(KERN_ERR PFX "Invalid directed route\n");
719		goto out;
720	}
721
722	/* Check to post send on QP or process locally */
723	if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD &&
724	    smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD)
725		goto out;
726
727	local = kmalloc(sizeof *local, GFP_ATOMIC);
728	if (!local) {
729		ret = -ENOMEM;
730		printk(KERN_ERR PFX "No memory for ib_mad_local_private\n");
731		goto out;
732	}
733	local->mad_priv = NULL;
734	local->recv_mad_agent = NULL;
735	mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_ATOMIC);
736	if (!mad_priv) {
737		ret = -ENOMEM;
738		printk(KERN_ERR PFX "No memory for local response MAD\n");
739		kfree(local);
740		goto out;
741	}
742
743	build_smp_wc(mad_agent_priv->agent.qp,
744		     send_wr->wr_id, be16_to_cpu(smp->dr_slid),
745		     send_wr->wr.ud.pkey_index,
746		     send_wr->wr.ud.port_num, &mad_wc);
747
748	/* No GRH for DR SMP */
749	ret = device->process_mad(device, 0, port_num, &mad_wc, NULL,
750				  (struct ib_mad *)smp,
751				  (struct ib_mad *)&mad_priv->mad);
752	switch (ret)
753	{
754	case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
755		if (ib_response_mad(&mad_priv->mad.mad) &&
756		    mad_agent_priv->agent.recv_handler) {
757			local->mad_priv = mad_priv;
758			local->recv_mad_agent = mad_agent_priv;
759			/*
760			 * Reference MAD agent until receive
761			 * side of local completion handled
762			 */
763			atomic_inc(&mad_agent_priv->refcount);
764		} else
765			kmem_cache_free(ib_mad_cache, mad_priv);
766		break;
767	case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED:
768		kmem_cache_free(ib_mad_cache, mad_priv);
769		break;
770	case IB_MAD_RESULT_SUCCESS:
771		/* Treat like an incoming receive MAD */
772		port_priv = ib_get_mad_port(mad_agent_priv->agent.device,
773					    mad_agent_priv->agent.port_num);
774		if (port_priv) {
775			memcpy(&mad_priv->mad.mad, smp, sizeof(struct ib_mad));
776			recv_mad_agent = find_mad_agent(port_priv,
777						        &mad_priv->mad.mad);
778		}
779		if (!port_priv || !recv_mad_agent) {
780			/*
781			 * No receiving agent so drop packet and
782			 * generate send completion.
783			 */
784			kmem_cache_free(ib_mad_cache, mad_priv);
785			break;
786		}
787		local->mad_priv = mad_priv;
788		local->recv_mad_agent = recv_mad_agent;
789		break;
790	default:
791		kmem_cache_free(ib_mad_cache, mad_priv);
792		kfree(local);
793		ret = -EINVAL;
794		goto out;
795	}
796
797	local->mad_send_wr = mad_send_wr;
798	/* Reference MAD agent until send side of local completion handled */
799	atomic_inc(&mad_agent_priv->refcount);
800	/* Queue local completion to local list */
801	spin_lock_irqsave(&mad_agent_priv->lock, flags);
802	list_add_tail(&local->completion_list, &mad_agent_priv->local_list);
803	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
804	queue_work(mad_agent_priv->qp_info->port_priv->wq,
805		   &mad_agent_priv->local_work);
806	ret = 1;
807out:
808	return ret;
809}
810
811static int get_pad_size(int hdr_len, int data_len)
812{
813	int seg_size, pad;
814
815	seg_size = sizeof(struct ib_mad) - hdr_len;
816	if (data_len && seg_size) {
817		pad = seg_size - data_len % seg_size;
818		return pad == seg_size ? 0 : pad;
819	} else
820		return seg_size;
821}
822
823static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr)
824{
825	struct ib_rmpp_segment *s, *t;
826
827	list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) {
828		list_del(&s->list);
829		kfree(s);
830	}
831}
832
833static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
834				gfp_t gfp_mask)
835{
836	struct ib_mad_send_buf *send_buf = &send_wr->send_buf;
837	struct ib_rmpp_mad *rmpp_mad = send_buf->mad;
838	struct ib_rmpp_segment *seg = NULL;
839	int left, seg_size, pad;
840
841	send_buf->seg_size = sizeof (struct ib_mad) - send_buf->hdr_len;
842	seg_size = send_buf->seg_size;
843	pad = send_wr->pad;
844
845	/* Allocate data segments. */
846	for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
847		seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
848		if (!seg) {
849			printk(KERN_ERR "alloc_send_rmpp_segs: RMPP mem "
850			       "alloc failed for len %zd, gfp %#x\n",
851			       sizeof (*seg) + seg_size, gfp_mask);
852			free_send_rmpp_list(send_wr);
853			return -ENOMEM;
854		}
855		seg->num = ++send_buf->seg_count;
856		list_add_tail(&seg->list, &send_wr->rmpp_list);
857	}
858
859	/* Zero any padding */
860	if (pad)
861		memset(seg->data + seg_size - pad, 0, pad);
862
863	rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv->
864					  agent.rmpp_version;
865	rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
866	ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
867
868	send_wr->cur_seg = container_of(send_wr->rmpp_list.next,
869					struct ib_rmpp_segment, list);
870	send_wr->last_ack_seg = send_wr->cur_seg;
871	return 0;
872}
873
874struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
875					    u32 remote_qpn, u16 pkey_index,
876					    int rmpp_active,
877					    int hdr_len, int data_len,
878					    gfp_t gfp_mask)
879{
880	struct ib_mad_agent_private *mad_agent_priv;
881	struct ib_mad_send_wr_private *mad_send_wr;
882	int pad, message_size, ret, size;
883	void *buf;
884
885	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
886				      agent);
887	pad = get_pad_size(hdr_len, data_len);
888	message_size = hdr_len + data_len + pad;
889
890	if ((!mad_agent->rmpp_version &&
891	     (rmpp_active || message_size > sizeof(struct ib_mad))) ||
892	    (!rmpp_active && message_size > sizeof(struct ib_mad)))
893		return ERR_PTR(-EINVAL);
894
895	size = rmpp_active ? hdr_len : sizeof(struct ib_mad);
896	buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
897	if (!buf)
898		return ERR_PTR(-ENOMEM);
899
900	mad_send_wr = buf + size;
901	INIT_LIST_HEAD(&mad_send_wr->rmpp_list);
902	mad_send_wr->send_buf.mad = buf;
903	mad_send_wr->send_buf.hdr_len = hdr_len;
904	mad_send_wr->send_buf.data_len = data_len;
905	mad_send_wr->pad = pad;
906
907	mad_send_wr->mad_agent_priv = mad_agent_priv;
908	mad_send_wr->sg_list[0].length = hdr_len;
909	mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey;
910	mad_send_wr->sg_list[1].length = sizeof(struct ib_mad) - hdr_len;
911	mad_send_wr->sg_list[1].lkey = mad_agent->mr->lkey;
912
913	mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
914	mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
915	mad_send_wr->send_wr.num_sge = 2;
916	mad_send_wr->send_wr.opcode = IB_WR_SEND;
917	mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED;
918	mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn;
919	mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
920	mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index;
921
922	if (rmpp_active) {
923		ret = alloc_send_rmpp_list(mad_send_wr, gfp_mask);
924		if (ret) {
925			kfree(buf);
926			return ERR_PTR(ret);
927		}
928	}
929
930	mad_send_wr->send_buf.mad_agent = mad_agent;
931	atomic_inc(&mad_agent_priv->refcount);
932	return &mad_send_wr->send_buf;
933}
934EXPORT_SYMBOL(ib_create_send_mad);
935
936int ib_get_mad_data_offset(u8 mgmt_class)
937{
938	if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
939		return IB_MGMT_SA_HDR;
940	else if ((mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
941		 (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
942		 (mgmt_class == IB_MGMT_CLASS_BIS))
943		return IB_MGMT_DEVICE_HDR;
944	else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
945		 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
946		return IB_MGMT_VENDOR_HDR;
947	else
948		return IB_MGMT_MAD_HDR;
949}
950EXPORT_SYMBOL(ib_get_mad_data_offset);
951
952int ib_is_mad_class_rmpp(u8 mgmt_class)
953{
954	if ((mgmt_class == IB_MGMT_CLASS_SUBN_ADM) ||
955	    (mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
956	    (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
957	    (mgmt_class == IB_MGMT_CLASS_BIS) ||
958	    ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
959	     (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)))
960		return 1;
961	return 0;
962}
963EXPORT_SYMBOL(ib_is_mad_class_rmpp);
964
965void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num)
966{
967	struct ib_mad_send_wr_private *mad_send_wr;
968	struct list_head *list;
969
970	mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
971				   send_buf);
972	list = &mad_send_wr->cur_seg->list;
973
974	if (mad_send_wr->cur_seg->num < seg_num) {
975		list_for_each_entry(mad_send_wr->cur_seg, list, list)
976			if (mad_send_wr->cur_seg->num == seg_num)
977				break;
978	} else if (mad_send_wr->cur_seg->num > seg_num) {
979		list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list)
980			if (mad_send_wr->cur_seg->num == seg_num)
981				break;
982	}
983	return mad_send_wr->cur_seg->data;
984}
985EXPORT_SYMBOL(ib_get_rmpp_segment);
986
987static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr)
988{
989	if (mad_send_wr->send_buf.seg_count)
990		return ib_get_rmpp_segment(&mad_send_wr->send_buf,
991					   mad_send_wr->seg_num);
992	else
993		return mad_send_wr->send_buf.mad +
994		       mad_send_wr->send_buf.hdr_len;
995}
996
997void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
998{
999	struct ib_mad_agent_private *mad_agent_priv;
1000	struct ib_mad_send_wr_private *mad_send_wr;
1001
1002	mad_agent_priv = container_of(send_buf->mad_agent,
1003				      struct ib_mad_agent_private, agent);
1004	mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
1005				   send_buf);
1006
1007	free_send_rmpp_list(mad_send_wr);
1008	kfree(send_buf->mad);
1009	deref_mad_agent(mad_agent_priv);
1010}
1011EXPORT_SYMBOL(ib_free_send_mad);
1012
1013int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
1014{
1015	struct ib_mad_qp_info *qp_info;
1016	struct list_head *list;
1017	struct ib_send_wr *bad_send_wr;
1018	struct ib_mad_agent *mad_agent;
1019	struct ib_sge *sge;
1020	unsigned long flags;
1021	int ret;
1022
1023	/* Set WR ID to find mad_send_wr upon completion */
1024	qp_info = mad_send_wr->mad_agent_priv->qp_info;
1025	mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
1026	mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
1027
1028	mad_agent = mad_send_wr->send_buf.mad_agent;
1029	sge = mad_send_wr->sg_list;
1030	sge[0].addr = ib_dma_map_single(mad_agent->device,
1031					mad_send_wr->send_buf.mad,
1032					sge[0].length,
1033					DMA_TO_DEVICE);
1034	mad_send_wr->header_mapping = sge[0].addr;
1035
1036	sge[1].addr = ib_dma_map_single(mad_agent->device,
1037					ib_get_payload(mad_send_wr),
1038					sge[1].length,
1039					DMA_TO_DEVICE);
1040	mad_send_wr->payload_mapping = sge[1].addr;
1041
1042	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
1043	if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
1044		ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr,
1045				   &bad_send_wr);
1046		list = &qp_info->send_queue.list;
1047	} else {
1048		ret = 0;
1049		list = &qp_info->overflow_list;
1050	}
1051
1052	if (!ret) {
1053		qp_info->send_queue.count++;
1054		list_add_tail(&mad_send_wr->mad_list.list, list);
1055	}
1056	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
1057	if (ret) {
1058		ib_dma_unmap_single(mad_agent->device,
1059				    mad_send_wr->header_mapping,
1060				    sge[0].length, DMA_TO_DEVICE);
1061		ib_dma_unmap_single(mad_agent->device,
1062				    mad_send_wr->payload_mapping,
1063				    sge[1].length, DMA_TO_DEVICE);
1064	}
1065	return ret;
1066}
1067
1068/*
1069 * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
1070 *  with the registered client
1071 */
1072int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
1073		     struct ib_mad_send_buf **bad_send_buf)
1074{
1075	struct ib_mad_agent_private *mad_agent_priv;
1076	struct ib_mad_send_buf *next_send_buf;
1077	struct ib_mad_send_wr_private *mad_send_wr;
1078	unsigned long flags;
1079	int ret = -EINVAL;
1080
1081	/* Walk list of send WRs and post each on send list */
1082	for (; send_buf; send_buf = next_send_buf) {
1083
1084		mad_send_wr = container_of(send_buf,
1085					   struct ib_mad_send_wr_private,
1086					   send_buf);
1087		mad_agent_priv = mad_send_wr->mad_agent_priv;
1088
1089		if (!send_buf->mad_agent->send_handler ||
1090		    (send_buf->timeout_ms &&
1091		     !send_buf->mad_agent->recv_handler)) {
1092			ret = -EINVAL;
1093			goto error;
1094		}
1095
1096		if (!ib_is_mad_class_rmpp(((struct ib_mad_hdr *) send_buf->mad)->mgmt_class)) {
1097			if (mad_agent_priv->agent.rmpp_version) {
1098				ret = -EINVAL;
1099				goto error;
1100			}
1101		}
1102
1103		/*
1104		 * Save pointer to next work request to post in case the
1105		 * current one completes, and the user modifies the work
1106		 * request associated with the completion
1107		 */
1108		next_send_buf = send_buf->next;
1109		mad_send_wr->send_wr.wr.ud.ah = send_buf->ah;
1110
1111		if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class ==
1112		    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
1113			ret = handle_outgoing_dr_smp(mad_agent_priv,
1114						     mad_send_wr);
1115			if (ret < 0)		/* error */
1116				goto error;
1117			else if (ret == 1)	/* locally consumed */
1118				continue;
1119		}
1120
1121		mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
1122		/* Timeout will be updated after send completes */
1123		mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
1124		mad_send_wr->max_retries = send_buf->retries;
1125		mad_send_wr->retries_left = send_buf->retries;
1126		send_buf->retries = 0;
1127		/* Reference for work request to QP + response */
1128		mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
1129		mad_send_wr->status = IB_WC_SUCCESS;
1130
1131		/* Reference MAD agent until send completes */
1132		atomic_inc(&mad_agent_priv->refcount);
1133		spin_lock_irqsave(&mad_agent_priv->lock, flags);
1134		list_add_tail(&mad_send_wr->agent_list,
1135			      &mad_agent_priv->send_list);
1136		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1137
1138		if (mad_agent_priv->agent.rmpp_version) {
1139			ret = ib_send_rmpp_mad(mad_send_wr);
1140			if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED)
1141				ret = ib_send_mad(mad_send_wr);
1142		} else
1143			ret = ib_send_mad(mad_send_wr);
1144		if (ret < 0) {
1145			/* Fail send request */
1146			spin_lock_irqsave(&mad_agent_priv->lock, flags);
1147			list_del(&mad_send_wr->agent_list);
1148			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1149			atomic_dec(&mad_agent_priv->refcount);
1150			goto error;
1151		}
1152	}
1153	return 0;
1154error:
1155	if (bad_send_buf)
1156		*bad_send_buf = send_buf;
1157	return ret;
1158}
1159EXPORT_SYMBOL(ib_post_send_mad);
1160
1161/*
1162 * ib_free_recv_mad - Returns data buffers used to receive
1163 *  a MAD to the access layer
1164 */
1165void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc)
1166{
1167	struct ib_mad_recv_buf *mad_recv_buf, *temp_recv_buf;
1168	struct ib_mad_private_header *mad_priv_hdr;
1169	struct ib_mad_private *priv;
1170	struct list_head free_list;
1171
1172	INIT_LIST_HEAD(&free_list);
1173	list_splice_init(&mad_recv_wc->rmpp_list, &free_list);
1174
1175	list_for_each_entry_safe(mad_recv_buf, temp_recv_buf,
1176					&free_list, list) {
1177		mad_recv_wc = container_of(mad_recv_buf, struct ib_mad_recv_wc,
1178					   recv_buf);
1179		mad_priv_hdr = container_of(mad_recv_wc,
1180					    struct ib_mad_private_header,
1181					    recv_wc);
1182		priv = container_of(mad_priv_hdr, struct ib_mad_private,
1183				    header);
1184		kmem_cache_free(ib_mad_cache, priv);
1185	}
1186}
1187EXPORT_SYMBOL(ib_free_recv_mad);
1188
1189struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp,
1190					u8 rmpp_version,
1191					ib_mad_send_handler send_handler,
1192					ib_mad_recv_handler recv_handler,
1193					void *context)
1194{
1195	return ERR_PTR(-EINVAL);	/* XXX: for now */
1196}
1197EXPORT_SYMBOL(ib_redirect_mad_qp);
1198
1199int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
1200		      struct ib_wc *wc)
1201{
1202	printk(KERN_ERR PFX "ib_process_mad_wc() not implemented yet\n");
1203	return 0;
1204}
1205EXPORT_SYMBOL(ib_process_mad_wc);
1206
1207static int method_in_use(struct ib_mad_mgmt_method_table **method,
1208			 struct ib_mad_reg_req *mad_reg_req)
1209{
1210	int i;
1211
1212	for (i = find_first_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS);
1213	     i < IB_MGMT_MAX_METHODS;
1214	     i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
1215			       1+i)) {
1216		if ((*method)->agent[i]) {
1217			printk(KERN_ERR PFX "Method %d already in use\n", i);
1218			return -EINVAL;
1219		}
1220	}
1221	return 0;
1222}
1223
1224static int allocate_method_table(struct ib_mad_mgmt_method_table **method)
1225{
1226	/* Allocate management method table */
1227	*method = kzalloc(sizeof **method, GFP_ATOMIC);
1228	if (!*method) {
1229		printk(KERN_ERR PFX "No memory for "
1230		       "ib_mad_mgmt_method_table\n");
1231		return -ENOMEM;
1232	}
1233
1234	return 0;
1235}
1236
1237/*
1238 * Check to see if there are any methods still in use
1239 */
1240static int check_method_table(struct ib_mad_mgmt_method_table *method)
1241{
1242	int i;
1243
1244	for (i = 0; i < IB_MGMT_MAX_METHODS; i++)
1245		if (method->agent[i])
1246			return 1;
1247	return 0;
1248}
1249
1250/*
1251 * Check to see if there are any method tables for this class still in use
1252 */
1253static int check_class_table(struct ib_mad_mgmt_class_table *class)
1254{
1255	int i;
1256
1257	for (i = 0; i < MAX_MGMT_CLASS; i++)
1258		if (class->method_table[i])
1259			return 1;
1260	return 0;
1261}
1262
1263static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class)
1264{
1265	int i;
1266
1267	for (i = 0; i < MAX_MGMT_OUI; i++)
1268		if (vendor_class->method_table[i])
1269			return 1;
1270	return 0;
1271}
1272
1273static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class,
1274			   char *oui)
1275{
1276	int i;
1277
1278	for (i = 0; i < MAX_MGMT_OUI; i++)
1279		/* Is there matching OUI for this vendor class ? */
1280		if (!memcmp(vendor_class->oui[i], oui, 3))
1281			return i;
1282
1283	return -1;
1284}
1285
1286static int check_vendor_table(struct ib_mad_mgmt_vendor_class_table *vendor)
1287{
1288	int i;
1289
1290	for (i = 0; i < MAX_MGMT_VENDOR_RANGE2; i++)
1291		if (vendor->vendor_class[i])
1292			return 1;
1293
1294	return 0;
1295}
1296
1297static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method,
1298				     struct ib_mad_agent_private *agent)
1299{
1300	int i;
1301
1302	/* Remove any methods for this mad agent */
1303	for (i = 0; i < IB_MGMT_MAX_METHODS; i++) {
1304		if (method->agent[i] == agent) {
1305			method->agent[i] = NULL;
1306		}
1307	}
1308}
1309
1310static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1311			      struct ib_mad_agent_private *agent_priv,
1312			      u8 mgmt_class)
1313{
1314	struct ib_mad_port_private *port_priv;
1315	struct ib_mad_mgmt_class_table **class;
1316	struct ib_mad_mgmt_method_table **method;
1317	int i, ret;
1318
1319	port_priv = agent_priv->qp_info->port_priv;
1320	class = &port_priv->version[mad_reg_req->mgmt_class_version].class;
1321	if (!*class) {
1322		/* Allocate management class table for "new" class version */
1323		*class = kzalloc(sizeof **class, GFP_ATOMIC);
1324		if (!*class) {
1325			printk(KERN_ERR PFX "No memory for "
1326			       "ib_mad_mgmt_class_table\n");
1327			ret = -ENOMEM;
1328			goto error1;
1329		}
1330
1331		/* Allocate method table for this management class */
1332		method = &(*class)->method_table[mgmt_class];
1333		if ((ret = allocate_method_table(method)))
1334			goto error2;
1335	} else {
1336		method = &(*class)->method_table[mgmt_class];
1337		if (!*method) {
1338			/* Allocate method table for this management class */
1339			if ((ret = allocate_method_table(method)))
1340				goto error1;
1341		}
1342	}
1343
1344	/* Now, make sure methods are not already in use */
1345	if (method_in_use(method, mad_reg_req))
1346		goto error3;
1347
1348	/* Finally, add in methods being registered */
1349	for (i = find_first_bit(mad_reg_req->method_mask,
1350				IB_MGMT_MAX_METHODS);
1351	     i < IB_MGMT_MAX_METHODS;
1352	     i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
1353			       1+i)) {
1354		(*method)->agent[i] = agent_priv;
1355	}
1356	return 0;
1357
1358error3:
1359	/* Remove any methods for this mad agent */
1360	remove_methods_mad_agent(*method, agent_priv);
1361	/* Now, check to see if there are any methods in use */
1362	if (!check_method_table(*method)) {
1363		/* If not, release management method table */
1364		kfree(*method);
1365		*method = NULL;
1366	}
1367	ret = -EINVAL;
1368	goto error1;
1369error2:
1370	kfree(*class);
1371	*class = NULL;
1372error1:
1373	return ret;
1374}
1375
1376static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1377			   struct ib_mad_agent_private *agent_priv)
1378{
1379	struct ib_mad_port_private *port_priv;
1380	struct ib_mad_mgmt_vendor_class_table **vendor_table;
1381	struct ib_mad_mgmt_vendor_class_table *vendor = NULL;
1382	struct ib_mad_mgmt_vendor_class *vendor_class = NULL;
1383	struct ib_mad_mgmt_method_table **method;
1384	int i, ret = -ENOMEM;
1385	u8 vclass;
1386
1387	/* "New" vendor (with OUI) class */
1388	vclass = vendor_class_index(mad_reg_req->mgmt_class);
1389	port_priv = agent_priv->qp_info->port_priv;
1390	vendor_table = &port_priv->version[
1391				mad_reg_req->mgmt_class_version].vendor;
1392	if (!*vendor_table) {
1393		/* Allocate mgmt vendor class table for "new" class version */
1394		vendor = kzalloc(sizeof *vendor, GFP_ATOMIC);
1395		if (!vendor) {
1396			printk(KERN_ERR PFX "No memory for "
1397			       "ib_mad_mgmt_vendor_class_table\n");
1398			goto error1;
1399		}
1400
1401		*vendor_table = vendor;
1402	}
1403	if (!(*vendor_table)->vendor_class[vclass]) {
1404		/* Allocate table for this management vendor class */
1405		vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC);
1406		if (!vendor_class) {
1407			printk(KERN_ERR PFX "No memory for "
1408			       "ib_mad_mgmt_vendor_class\n");
1409			goto error2;
1410		}
1411
1412		(*vendor_table)->vendor_class[vclass] = vendor_class;
1413	}
1414	for (i = 0; i < MAX_MGMT_OUI; i++) {
1415		/* Is there matching OUI for this vendor class ? */
1416		if (!memcmp((*vendor_table)->vendor_class[vclass]->oui[i],
1417			    mad_reg_req->oui, 3)) {
1418			method = &(*vendor_table)->vendor_class[
1419						vclass]->method_table[i];
1420			BUG_ON(!*method);
1421			goto check_in_use;
1422		}
1423	}
1424	for (i = 0; i < MAX_MGMT_OUI; i++) {
1425		/* OUI slot available ? */
1426		if (!is_vendor_oui((*vendor_table)->vendor_class[
1427				vclass]->oui[i])) {
1428			method = &(*vendor_table)->vendor_class[
1429				vclass]->method_table[i];
1430			BUG_ON(*method);
1431			/* Allocate method table for this OUI */
1432			if ((ret = allocate_method_table(method)))
1433				goto error3;
1434			memcpy((*vendor_table)->vendor_class[vclass]->oui[i],
1435			       mad_reg_req->oui, 3);
1436			goto check_in_use;
1437		}
1438	}
1439	printk(KERN_ERR PFX "All OUI slots in use\n");
1440	goto error3;
1441
1442check_in_use:
1443	/* Now, make sure methods are not already in use */
1444	if (method_in_use(method, mad_reg_req))
1445		goto error4;
1446
1447	/* Finally, add in methods being registered */
1448	for (i = find_first_bit(mad_reg_req->method_mask,
1449				IB_MGMT_MAX_METHODS);
1450	     i < IB_MGMT_MAX_METHODS;
1451	     i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
1452			       1+i)) {
1453		(*method)->agent[i] = agent_priv;
1454	}
1455	return 0;
1456
1457error4:
1458	/* Remove any methods for this mad agent */
1459	remove_methods_mad_agent(*method, agent_priv);
1460	/* Now, check to see if there are any methods in use */
1461	if (!check_method_table(*method)) {
1462		/* If not, release management method table */
1463		kfree(*method);
1464		*method = NULL;
1465	}
1466	ret = -EINVAL;
1467error3:
1468	if (vendor_class) {
1469		(*vendor_table)->vendor_class[vclass] = NULL;
1470		kfree(vendor_class);
1471	}
1472error2:
1473	if (vendor) {
1474		*vendor_table = NULL;
1475		kfree(vendor);
1476	}
1477error1:
1478	return ret;
1479}
1480
1481static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv)
1482{
1483	struct ib_mad_port_private *port_priv;
1484	struct ib_mad_mgmt_class_table *class;
1485	struct ib_mad_mgmt_method_table *method;
1486	struct ib_mad_mgmt_vendor_class_table *vendor;
1487	struct ib_mad_mgmt_vendor_class *vendor_class;
1488	int index;
1489	u8 mgmt_class;
1490
1491	/*
1492	 * Was MAD registration request supplied
1493	 * with original registration ?
1494	 */
1495	if (!agent_priv->reg_req) {
1496		goto out;
1497	}
1498
1499	port_priv = agent_priv->qp_info->port_priv;
1500	mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class);
1501	class = port_priv->version[
1502			agent_priv->reg_req->mgmt_class_version].class;
1503	if (!class)
1504		goto vendor_check;
1505
1506	method = class->method_table[mgmt_class];
1507	if (method) {
1508		/* Remove any methods for this mad agent */
1509		remove_methods_mad_agent(method, agent_priv);
1510		/* Now, check to see if there are any methods still in use */
1511		if (!check_method_table(method)) {
1512			/* If not, release management method table */
1513			 kfree(method);
1514			 class->method_table[mgmt_class] = NULL;
1515			 /* Any management classes left ? */
1516			if (!check_class_table(class)) {
1517				/* If not, release management class table */
1518				kfree(class);
1519				port_priv->version[
1520					agent_priv->reg_req->
1521					mgmt_class_version].class = NULL;
1522			}
1523		}
1524	}
1525
1526vendor_check:
1527	if (!is_vendor_class(mgmt_class))
1528		goto out;
1529
1530	/* normalize mgmt_class to vendor range 2 */
1531	mgmt_class = vendor_class_index(agent_priv->reg_req->mgmt_class);
1532	vendor = port_priv->version[
1533			agent_priv->reg_req->mgmt_class_version].vendor;
1534
1535	if (!vendor)
1536		goto out;
1537
1538	vendor_class = vendor->vendor_class[mgmt_class];
1539	if (vendor_class) {
1540		index = find_vendor_oui(vendor_class, agent_priv->reg_req->oui);
1541		if (index < 0)
1542			goto out;
1543		method = vendor_class->method_table[index];
1544		if (method) {
1545			/* Remove any methods for this mad agent */
1546			remove_methods_mad_agent(method, agent_priv);
1547			/*
1548			 * Now, check to see if there are
1549			 * any methods still in use
1550			 */
1551			if (!check_method_table(method)) {
1552				/* If not, release management method table */
1553				kfree(method);
1554				vendor_class->method_table[index] = NULL;
1555				memset(vendor_class->oui[index], 0, 3);
1556				/* Any OUIs left ? */
1557				if (!check_vendor_class(vendor_class)) {
1558					/* If not, release vendor class table */
1559					kfree(vendor_class);
1560					vendor->vendor_class[mgmt_class] = NULL;
1561					/* Any other vendor classes left ? */
1562					if (!check_vendor_table(vendor)) {
1563						kfree(vendor);
1564						port_priv->version[
1565							agent_priv->reg_req->
1566							mgmt_class_version].
1567							vendor = NULL;
1568					}
1569				}
1570			}
1571		}
1572	}
1573
1574out:
1575	return;
1576}
1577
1578static struct ib_mad_agent_private *
1579find_mad_agent(struct ib_mad_port_private *port_priv,
1580	       struct ib_mad *mad)
1581{
1582	struct ib_mad_agent_private *mad_agent = NULL;
1583	unsigned long flags;
1584
1585	spin_lock_irqsave(&port_priv->reg_lock, flags);
1586	if (ib_response_mad(mad)) {
1587		u32 hi_tid;
1588		struct ib_mad_agent_private *entry;
1589
1590		/*
1591		 * Routing is based on high 32 bits of transaction ID
1592		 * of MAD.
1593		 */
1594		hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32;
1595		list_for_each_entry(entry, &port_priv->agent_list, agent_list) {
1596			if (entry->agent.hi_tid == hi_tid) {
1597				mad_agent = entry;
1598				break;
1599			}
1600		}
1601	} else {
1602		struct ib_mad_mgmt_class_table *class;
1603		struct ib_mad_mgmt_method_table *method;
1604		struct ib_mad_mgmt_vendor_class_table *vendor;
1605		struct ib_mad_mgmt_vendor_class *vendor_class;
1606		struct ib_vendor_mad *vendor_mad;
1607		int index;
1608
1609		/*
1610		 * Routing is based on version, class, and method
1611		 * For "newer" vendor MADs, also based on OUI
1612		 */
1613		if (mad->mad_hdr.class_version >= MAX_MGMT_VERSION)
1614			goto out;
1615		if (!is_vendor_class(mad->mad_hdr.mgmt_class)) {
1616			class = port_priv->version[
1617					mad->mad_hdr.class_version].class;
1618			if (!class)
1619				goto out;
1620			method = class->method_table[convert_mgmt_class(
1621							mad->mad_hdr.mgmt_class)];
1622			if (method)
1623				mad_agent = method->agent[mad->mad_hdr.method &
1624							  ~IB_MGMT_METHOD_RESP];
1625		} else {
1626			vendor = port_priv->version[
1627					mad->mad_hdr.class_version].vendor;
1628			if (!vendor)
1629				goto out;
1630			vendor_class = vendor->vendor_class[vendor_class_index(
1631						mad->mad_hdr.mgmt_class)];
1632			if (!vendor_class)
1633				goto out;
1634			/* Find matching OUI */
1635			vendor_mad = (struct ib_vendor_mad *)mad;
1636			index = find_vendor_oui(vendor_class, vendor_mad->oui);
1637			if (index == -1)
1638				goto out;
1639			method = vendor_class->method_table[index];
1640			if (method) {
1641				mad_agent = method->agent[mad->mad_hdr.method &
1642							  ~IB_MGMT_METHOD_RESP];
1643			}
1644		}
1645	}
1646
1647	if (mad_agent) {
1648		if (mad_agent->agent.recv_handler)
1649			atomic_inc(&mad_agent->refcount);
1650		else {
1651			printk(KERN_NOTICE PFX "No receive handler for client "
1652			       "%p on port %d\n",
1653			       &mad_agent->agent, port_priv->port_num);
1654			mad_agent = NULL;
1655		}
1656	}
1657out:
1658	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
1659
1660	return mad_agent;
1661}
1662
1663static int validate_mad(struct ib_mad *mad, u32 qp_num)
1664{
1665	int valid = 0;
1666
1667	/* Make sure MAD base version is understood */
1668	if (mad->mad_hdr.base_version != IB_MGMT_BASE_VERSION) {
1669		printk(KERN_ERR PFX "MAD received with unsupported base "
1670		       "version %d\n", mad->mad_hdr.base_version);
1671		goto out;
1672	}
1673
1674	/* Filter SMI packets sent to other than QP0 */
1675	if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
1676	    (mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
1677		if (qp_num == 0)
1678			valid = 1;
1679	} else {
1680		/* Filter GSI packets sent to QP0 */
1681		if (qp_num != 0)
1682			valid = 1;
1683	}
1684
1685out:
1686	return valid;
1687}
1688
1689static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv,
1690		       struct ib_mad_hdr *mad_hdr)
1691{
1692	struct ib_rmpp_mad *rmpp_mad;
1693
1694	rmpp_mad = (struct ib_rmpp_mad *)mad_hdr;
1695	return !mad_agent_priv->agent.rmpp_version ||
1696		!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
1697				    IB_MGMT_RMPP_FLAG_ACTIVE) ||
1698		(rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA);
1699}
1700
1701static inline int rcv_has_same_class(struct ib_mad_send_wr_private *wr,
1702				     struct ib_mad_recv_wc *rwc)
1703{
1704	return ((struct ib_mad *)(wr->send_buf.mad))->mad_hdr.mgmt_class ==
1705		rwc->recv_buf.mad->mad_hdr.mgmt_class;
1706}
1707
1708static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv,
1709				   struct ib_mad_send_wr_private *wr,
1710				   struct ib_mad_recv_wc *rwc )
1711{
1712	struct ib_ah_attr attr;
1713	u8 send_resp, rcv_resp;
1714	union ib_gid sgid;
1715	struct ib_device *device = mad_agent_priv->agent.device;
1716	u8 port_num = mad_agent_priv->agent.port_num;
1717	u8 lmc;
1718
1719	send_resp = ib_response_mad((struct ib_mad *)wr->send_buf.mad);
1720	rcv_resp = ib_response_mad(rwc->recv_buf.mad);
1721
1722	if (send_resp == rcv_resp)
1723		/* both requests, or both responses. GIDs different */
1724		return 0;
1725
1726	if (ib_query_ah(wr->send_buf.ah, &attr))
1727		/* Assume not equal, to avoid false positives. */
1728		return 0;
1729
1730	if (!!(attr.ah_flags & IB_AH_GRH) !=
1731	    !!(rwc->wc->wc_flags & IB_WC_GRH))
1732		/* one has GID, other does not.  Assume different */
1733		return 0;
1734
1735	if (!send_resp && rcv_resp) {
1736		/* is request/response. */
1737		if (!(attr.ah_flags & IB_AH_GRH)) {
1738			if (ib_get_cached_lmc(device, port_num, &lmc))
1739				return 0;
1740			return (!lmc || !((attr.src_path_bits ^
1741					   rwc->wc->dlid_path_bits) &
1742					  ((1 << lmc) - 1)));
1743		} else {
1744			if (ib_get_cached_gid(device, port_num,
1745					      attr.grh.sgid_index, &sgid))
1746				return 0;
1747			return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
1748				       16);
1749		}
1750	}
1751
1752	if (!(attr.ah_flags & IB_AH_GRH))
1753		return attr.dlid == rwc->wc->slid;
1754	else
1755		return !memcmp(attr.grh.dgid.raw, rwc->recv_buf.grh->sgid.raw,
1756			       16);
1757}
1758
1759static inline int is_direct(u8 class)
1760{
1761	return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE);
1762}
1763
1764struct ib_mad_send_wr_private*
1765ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv,
1766		 struct ib_mad_recv_wc *wc)
1767{
1768	struct ib_mad_send_wr_private *wr;
1769	struct ib_mad *mad;
1770
1771	mad = (struct ib_mad *)wc->recv_buf.mad;
1772
1773	list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) {
1774		if ((wr->tid == mad->mad_hdr.tid) &&
1775		    rcv_has_same_class(wr, wc) &&
1776		    /*
1777		     * Don't check GID for direct routed MADs.
1778		     * These might have permissive LIDs.
1779		     */
1780		    (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) ||
1781		     rcv_has_same_gid(mad_agent_priv, wr, wc)))
1782			return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
1783	}
1784
1785	/*
1786	 * It's possible to receive the response before we've
1787	 * been notified that the send has completed
1788	 */
1789	list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) {
1790		if (is_data_mad(mad_agent_priv, wr->send_buf.mad) &&
1791		    wr->tid == mad->mad_hdr.tid &&
1792		    wr->timeout &&
1793		    rcv_has_same_class(wr, wc) &&
1794		    /*
1795		     * Don't check GID for direct routed MADs.
1796		     * These might have permissive LIDs.
1797		     */
1798		    (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) ||
1799		     rcv_has_same_gid(mad_agent_priv, wr, wc)))
1800			/* Verify request has not been canceled */
1801			return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
1802	}
1803	return NULL;
1804}
1805
1806void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr)
1807{
1808	mad_send_wr->timeout = 0;
1809	if (mad_send_wr->refcount == 1)
1810		list_move_tail(&mad_send_wr->agent_list,
1811			      &mad_send_wr->mad_agent_priv->done_list);
1812}
1813
1814static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
1815				 struct ib_mad_recv_wc *mad_recv_wc)
1816{
1817	struct ib_mad_send_wr_private *mad_send_wr;
1818	struct ib_mad_send_wc mad_send_wc;
1819	unsigned long flags;
1820
1821	INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
1822	list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
1823	if (mad_agent_priv->agent.rmpp_version) {
1824		mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv,
1825						      mad_recv_wc);
1826		if (!mad_recv_wc) {
1827			deref_mad_agent(mad_agent_priv);
1828			return;
1829		}
1830	}
1831
1832	/* Complete corresponding request */
1833	if (ib_response_mad(mad_recv_wc->recv_buf.mad)) {
1834		spin_lock_irqsave(&mad_agent_priv->lock, flags);
1835		mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
1836		if (!mad_send_wr) {
1837			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1838			ib_free_recv_mad(mad_recv_wc);
1839			deref_mad_agent(mad_agent_priv);
1840			return;
1841		}
1842		ib_mark_mad_done(mad_send_wr);
1843		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1844
1845		/* Defined behavior is to complete response before request */
1846		mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
1847		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
1848						   mad_recv_wc);
1849		atomic_dec(&mad_agent_priv->refcount);
1850
1851		mad_send_wc.status = IB_WC_SUCCESS;
1852		mad_send_wc.vendor_err = 0;
1853		mad_send_wc.send_buf = &mad_send_wr->send_buf;
1854		ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
1855	} else {
1856		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
1857						   mad_recv_wc);
1858		deref_mad_agent(mad_agent_priv);
1859	}
1860}
1861
1862static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
1863				     struct ib_wc *wc)
1864{
1865	struct ib_mad_qp_info *qp_info;
1866	struct ib_mad_private_header *mad_priv_hdr;
1867	struct ib_mad_private *recv, *response = NULL;
1868	struct ib_mad_list_head *mad_list;
1869	struct ib_mad_agent_private *mad_agent;
1870	int port_num;
1871
1872	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
1873	qp_info = mad_list->mad_queue->qp_info;
1874	dequeue_mad(mad_list);
1875
1876	mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header,
1877				    mad_list);
1878	recv = container_of(mad_priv_hdr, struct ib_mad_private, header);
1879	ib_dma_unmap_single(port_priv->device,
1880			    recv->header.mapping,
1881			    sizeof(struct ib_mad_private) -
1882			      sizeof(struct ib_mad_private_header),
1883			    DMA_FROM_DEVICE);
1884
1885	/* Setup MAD receive work completion from "normal" work completion */
1886	recv->header.wc = *wc;
1887	recv->header.recv_wc.wc = &recv->header.wc;
1888	recv->header.recv_wc.mad_len = sizeof(struct ib_mad);
1889	recv->header.recv_wc.recv_buf.mad = &recv->mad.mad;
1890	recv->header.recv_wc.recv_buf.grh = &recv->grh;
1891
1892	if (atomic_read(&qp_info->snoop_count))
1893		snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS);
1894
1895	/* Validate MAD */
1896	if (!validate_mad(&recv->mad.mad, qp_info->qp->qp_num))
1897		goto out;
1898
1899	response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
1900	if (!response) {
1901		printk(KERN_ERR PFX "ib_mad_recv_done_handler no memory "
1902		       "for response buffer\n");
1903		goto out;
1904	}
1905
1906	if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH)
1907		port_num = wc->port_num;
1908	else
1909		port_num = port_priv->port_num;
1910
1911	if (recv->mad.mad.mad_hdr.mgmt_class ==
1912	    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
1913		enum smi_forward_action retsmi;
1914
1915		if (smi_handle_dr_smp_recv(&recv->mad.smp,
1916					   port_priv->device->node_type,
1917					   port_num,
1918					   port_priv->device->phys_port_cnt) ==
1919					   IB_SMI_DISCARD)
1920			goto out;
1921
1922		retsmi = smi_check_forward_dr_smp(&recv->mad.smp);
1923		if (retsmi == IB_SMI_LOCAL)
1924			goto local;
1925
1926		if (retsmi == IB_SMI_SEND) { /* don't forward */
1927			if (smi_handle_dr_smp_send(&recv->mad.smp,
1928						   port_priv->device->node_type,
1929						   port_num) == IB_SMI_DISCARD)
1930				goto out;
1931
1932			if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD)
1933				goto out;
1934		} else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) {
1935			/* forward case for switches */
1936			memcpy(response, recv, sizeof(*response));
1937			response->header.recv_wc.wc = &response->header.wc;
1938			response->header.recv_wc.recv_buf.mad = &response->mad.mad;
1939			response->header.recv_wc.recv_buf.grh = &response->grh;
1940
1941			agent_send_response(&response->mad.mad,
1942					    &response->grh, wc,
1943					    port_priv->device,
1944					    smi_get_fwd_port(&recv->mad.smp),
1945					    qp_info->qp->qp_num);
1946
1947			goto out;
1948		}
1949	}
1950
1951local:
1952	/* Give driver "right of first refusal" on incoming MAD */
1953	if (port_priv->device->process_mad) {
1954		int ret;
1955
1956		ret = port_priv->device->process_mad(port_priv->device, 0,
1957						     port_priv->port_num,
1958						     wc, &recv->grh,
1959						     &recv->mad.mad,
1960						     &response->mad.mad);
1961		if (ret & IB_MAD_RESULT_SUCCESS) {
1962			if (ret & IB_MAD_RESULT_CONSUMED)
1963				goto out;
1964			if (ret & IB_MAD_RESULT_REPLY) {
1965				agent_send_response(&response->mad.mad,
1966						    &recv->grh, wc,
1967						    port_priv->device,
1968						    port_num,
1969						    qp_info->qp->qp_num);
1970				goto out;
1971			}
1972		}
1973	}
1974
1975	mad_agent = find_mad_agent(port_priv, &recv->mad.mad);
1976	if (mad_agent) {
1977		ib_mad_complete_recv(mad_agent, &recv->header.recv_wc);
1978		/*
1979		 * recv is freed up in error cases in ib_mad_complete_recv
1980		 * or via recv_handler in ib_mad_complete_recv()
1981		 */
1982		recv = NULL;
1983	}
1984
1985out:
1986	/* Post another receive request for this QP */
1987	if (response) {
1988		ib_mad_post_receive_mads(qp_info, response);
1989		if (recv)
1990			kmem_cache_free(ib_mad_cache, recv);
1991	} else
1992		ib_mad_post_receive_mads(qp_info, recv);
1993}
1994
1995static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
1996{
1997	struct ib_mad_send_wr_private *mad_send_wr;
1998
1999	if (list_empty(&mad_agent_priv->wait_list)) {
2000		del_timer(&mad_agent_priv->timeout_timer);
2001	} else {
2002		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
2003					 struct ib_mad_send_wr_private,
2004					 agent_list);
2005
2006		if (time_after(mad_agent_priv->timeout,
2007			       mad_send_wr->timeout)) {
2008			mad_agent_priv->timeout = mad_send_wr->timeout;
2009			mod_timer(&mad_agent_priv->timeout_timer,
2010				  mad_send_wr->timeout);
2011		}
2012	}
2013}
2014
2015static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
2016{
2017	struct ib_mad_agent_private *mad_agent_priv;
2018	struct ib_mad_send_wr_private *temp_mad_send_wr;
2019	struct list_head *list_item;
2020	unsigned long delay;
2021
2022	mad_agent_priv = mad_send_wr->mad_agent_priv;
2023	list_del(&mad_send_wr->agent_list);
2024
2025	delay = mad_send_wr->timeout;
2026	mad_send_wr->timeout += jiffies;
2027
2028	if (delay) {
2029		list_for_each_prev(list_item, &mad_agent_priv->wait_list) {
2030			temp_mad_send_wr = list_entry(list_item,
2031						struct ib_mad_send_wr_private,
2032						agent_list);
2033			if (time_after(mad_send_wr->timeout,
2034				       temp_mad_send_wr->timeout))
2035				break;
2036		}
2037	} else
2038		list_item = &mad_agent_priv->wait_list;
2039	list_add(&mad_send_wr->agent_list, list_item);
2040
2041	/* Reschedule a work item if we have a shorter timeout */
2042	if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list)
2043		mod_timer(&mad_agent_priv->timeout_timer,
2044			  mad_send_wr->timeout);
2045}
2046
2047void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
2048			  int timeout_ms)
2049{
2050	mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
2051	wait_for_response(mad_send_wr);
2052}
2053
2054/*
2055 * Process a send work completion
2056 */
2057void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
2058			     struct ib_mad_send_wc *mad_send_wc)
2059{
2060	struct ib_mad_agent_private	*mad_agent_priv;
2061	unsigned long			flags;
2062	int				ret;
2063
2064	mad_agent_priv = mad_send_wr->mad_agent_priv;
2065	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2066	if (mad_agent_priv->agent.rmpp_version) {
2067		ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc);
2068		if (ret == IB_RMPP_RESULT_CONSUMED)
2069			goto done;
2070	} else
2071		ret = IB_RMPP_RESULT_UNHANDLED;
2072
2073	if (mad_send_wc->status != IB_WC_SUCCESS &&
2074	    mad_send_wr->status == IB_WC_SUCCESS) {
2075		mad_send_wr->status = mad_send_wc->status;
2076		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2077	}
2078
2079	if (--mad_send_wr->refcount > 0) {
2080		if (mad_send_wr->refcount == 1 && mad_send_wr->timeout &&
2081		    mad_send_wr->status == IB_WC_SUCCESS) {
2082			wait_for_response(mad_send_wr);
2083		}
2084		goto done;
2085	}
2086
2087	/* Remove send from MAD agent and notify client of completion */
2088	list_del(&mad_send_wr->agent_list);
2089	adjust_timeout(mad_agent_priv);
2090	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2091
2092	if (mad_send_wr->status != IB_WC_SUCCESS )
2093		mad_send_wc->status = mad_send_wr->status;
2094	if (ret == IB_RMPP_RESULT_INTERNAL)
2095		ib_rmpp_send_handler(mad_send_wc);
2096	else
2097		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2098						   mad_send_wc);
2099
2100	/* Release reference on agent taken when sending */
2101	deref_mad_agent(mad_agent_priv);
2102	return;
2103done:
2104	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2105}
2106
2107static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
2108				     struct ib_wc *wc)
2109{
2110	struct ib_mad_send_wr_private	*mad_send_wr, *queued_send_wr;
2111	struct ib_mad_list_head		*mad_list;
2112	struct ib_mad_qp_info		*qp_info;
2113	struct ib_mad_queue		*send_queue;
2114	struct ib_send_wr		*bad_send_wr;
2115	struct ib_mad_send_wc		mad_send_wc;
2116	unsigned long flags;
2117	int ret;
2118
2119	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
2120	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
2121				   mad_list);
2122	send_queue = mad_list->mad_queue;
2123	qp_info = send_queue->qp_info;
2124
2125retry:
2126	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
2127			    mad_send_wr->header_mapping,
2128			    mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
2129	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
2130			    mad_send_wr->payload_mapping,
2131			    mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
2132	queued_send_wr = NULL;
2133	spin_lock_irqsave(&send_queue->lock, flags);
2134	list_del(&mad_list->list);
2135
2136	/* Move queued send to the send queue */
2137	if (send_queue->count-- > send_queue->max_active) {
2138		mad_list = container_of(qp_info->overflow_list.next,
2139					struct ib_mad_list_head, list);
2140		queued_send_wr = container_of(mad_list,
2141					struct ib_mad_send_wr_private,
2142					mad_list);
2143		list_move_tail(&mad_list->list, &send_queue->list);
2144	}
2145	spin_unlock_irqrestore(&send_queue->lock, flags);
2146
2147	mad_send_wc.send_buf = &mad_send_wr->send_buf;
2148	mad_send_wc.status = wc->status;
2149	mad_send_wc.vendor_err = wc->vendor_err;
2150	if (atomic_read(&qp_info->snoop_count))
2151		snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc,
2152			   IB_MAD_SNOOP_SEND_COMPLETIONS);
2153	ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
2154
2155	if (queued_send_wr) {
2156		ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
2157				   &bad_send_wr);
2158		if (ret) {
2159			printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret);
2160			mad_send_wr = queued_send_wr;
2161			wc->status = IB_WC_LOC_QP_OP_ERR;
2162			goto retry;
2163		}
2164	}
2165}
2166
2167static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
2168{
2169	struct ib_mad_send_wr_private *mad_send_wr;
2170	struct ib_mad_list_head *mad_list;
2171	unsigned long flags;
2172
2173	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
2174	list_for_each_entry(mad_list, &qp_info->send_queue.list, list) {
2175		mad_send_wr = container_of(mad_list,
2176					   struct ib_mad_send_wr_private,
2177					   mad_list);
2178		mad_send_wr->retry = 1;
2179	}
2180	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
2181}
2182
2183static void mad_error_handler(struct ib_mad_port_private *port_priv,
2184			      struct ib_wc *wc)
2185{
2186	struct ib_mad_list_head *mad_list;
2187	struct ib_mad_qp_info *qp_info;
2188	struct ib_mad_send_wr_private *mad_send_wr;
2189	int ret;
2190
2191	/* Determine if failure was a send or receive */
2192	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
2193	qp_info = mad_list->mad_queue->qp_info;
2194	if (mad_list->mad_queue == &qp_info->recv_queue)
2195		/*
2196		 * Receive errors indicate that the QP has entered the error
2197		 * state - error handling/shutdown code will cleanup
2198		 */
2199		return;
2200
2201	/*
2202	 * Send errors will transition the QP to SQE - move
2203	 * QP to RTS and repost flushed work requests
2204	 */
2205	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
2206				   mad_list);
2207	if (wc->status == IB_WC_WR_FLUSH_ERR) {
2208		if (mad_send_wr->retry) {
2209			/* Repost send */
2210			struct ib_send_wr *bad_send_wr;
2211
2212			mad_send_wr->retry = 0;
2213			ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr,
2214					&bad_send_wr);
2215			if (ret)
2216				ib_mad_send_done_handler(port_priv, wc);
2217		} else
2218			ib_mad_send_done_handler(port_priv, wc);
2219	} else {
2220		struct ib_qp_attr *attr;
2221
2222		/* Transition QP to RTS and fail offending send */
2223		attr = kmalloc(sizeof *attr, GFP_KERNEL);
2224		if (attr) {
2225			attr->qp_state = IB_QPS_RTS;
2226			attr->cur_qp_state = IB_QPS_SQE;
2227			ret = ib_modify_qp(qp_info->qp, attr,
2228					   IB_QP_STATE | IB_QP_CUR_STATE);
2229			kfree(attr);
2230			if (ret)
2231				printk(KERN_ERR PFX "mad_error_handler - "
2232				       "ib_modify_qp to RTS : %d\n", ret);
2233			else
2234				mark_sends_for_retry(qp_info);
2235		}
2236		ib_mad_send_done_handler(port_priv, wc);
2237	}
2238}
2239
2240/*
2241 * IB MAD completion callback
2242 */
2243static void ib_mad_completion_handler(struct work_struct *work)
2244{
2245	struct ib_mad_port_private *port_priv;
2246	struct ib_wc wc;
2247
2248	port_priv = container_of(work, struct ib_mad_port_private, work);
2249	ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
2250
2251	while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
2252		if (wc.status == IB_WC_SUCCESS) {
2253			switch (wc.opcode) {
2254			case IB_WC_SEND:
2255				ib_mad_send_done_handler(port_priv, &wc);
2256				break;
2257			case IB_WC_RECV:
2258				ib_mad_recv_done_handler(port_priv, &wc);
2259				break;
2260			default:
2261				BUG_ON(1);
2262				break;
2263			}
2264		} else
2265			mad_error_handler(port_priv, &wc);
2266	}
2267}
2268
2269static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
2270{
2271	unsigned long flags;
2272	struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr;
2273	struct ib_mad_send_wc mad_send_wc;
2274	struct list_head cancel_list;
2275
2276	INIT_LIST_HEAD(&cancel_list);
2277
2278	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2279	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
2280				 &mad_agent_priv->send_list, agent_list) {
2281		if (mad_send_wr->status == IB_WC_SUCCESS) {
2282			mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
2283			mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2284		}
2285	}
2286
2287	/* Empty wait list to prevent receives from finding a request */
2288	list_splice_init(&mad_agent_priv->wait_list, &cancel_list);
2289	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2290
2291	/* Report all cancelled requests */
2292	mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
2293	mad_send_wc.vendor_err = 0;
2294
2295	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
2296				 &cancel_list, agent_list) {
2297		mad_send_wc.send_buf = &mad_send_wr->send_buf;
2298		list_del(&mad_send_wr->agent_list);
2299		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2300						   &mad_send_wc);
2301		atomic_dec(&mad_agent_priv->refcount);
2302	}
2303}
2304
2305static struct ib_mad_send_wr_private*
2306find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
2307	     struct ib_mad_send_buf *send_buf)
2308{
2309	struct ib_mad_send_wr_private *mad_send_wr;
2310
2311	list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
2312			    agent_list) {
2313		if (&mad_send_wr->send_buf == send_buf)
2314			return mad_send_wr;
2315	}
2316
2317	list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
2318			    agent_list) {
2319		if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) &&
2320		    &mad_send_wr->send_buf == send_buf)
2321			return mad_send_wr;
2322	}
2323	return NULL;
2324}
2325
2326int ib_modify_mad(struct ib_mad_agent *mad_agent,
2327		  struct ib_mad_send_buf *send_buf, u32 timeout_ms)
2328{
2329	struct ib_mad_agent_private *mad_agent_priv;
2330	struct ib_mad_send_wr_private *mad_send_wr;
2331	unsigned long flags;
2332	int active;
2333
2334	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
2335				      agent);
2336	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2337	mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
2338	if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
2339		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2340		return -EINVAL;
2341	}
2342
2343	active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1);
2344	if (!timeout_ms) {
2345		mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
2346		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2347	}
2348
2349	mad_send_wr->send_buf.timeout_ms = timeout_ms;
2350	if (active)
2351		mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
2352	else
2353		ib_reset_mad_timeout(mad_send_wr, timeout_ms);
2354
2355	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2356	return 0;
2357}
2358EXPORT_SYMBOL(ib_modify_mad);
2359
2360void ib_cancel_mad(struct ib_mad_agent *mad_agent,
2361		   struct ib_mad_send_buf *send_buf)
2362{
2363	ib_modify_mad(mad_agent, send_buf, 0);
2364}
2365EXPORT_SYMBOL(ib_cancel_mad);
2366
2367static void local_completions(struct work_struct *work)
2368{
2369	struct ib_mad_agent_private *mad_agent_priv;
2370	struct ib_mad_local_private *local;
2371	struct ib_mad_agent_private *recv_mad_agent;
2372	unsigned long flags;
2373	int free_mad;
2374	struct ib_wc wc;
2375	struct ib_mad_send_wc mad_send_wc;
2376
2377	mad_agent_priv =
2378		container_of(work, struct ib_mad_agent_private, local_work);
2379
2380	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2381	while (!list_empty(&mad_agent_priv->local_list)) {
2382		local = list_entry(mad_agent_priv->local_list.next,
2383				   struct ib_mad_local_private,
2384				   completion_list);
2385		list_del(&local->completion_list);
2386		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2387		free_mad = 0;
2388		if (local->mad_priv) {
2389			recv_mad_agent = local->recv_mad_agent;
2390			if (!recv_mad_agent) {
2391				printk(KERN_ERR PFX "No receive MAD agent for local completion\n");
2392				free_mad = 1;
2393				goto local_send_completion;
2394			}
2395
2396			/*
2397			 * Defined behavior is to complete response
2398			 * before request
2399			 */
2400			build_smp_wc(recv_mad_agent->agent.qp,
2401				     (unsigned long) local->mad_send_wr,
2402				     be16_to_cpu(IB_LID_PERMISSIVE),
2403				     0, recv_mad_agent->agent.port_num, &wc);
2404
2405			local->mad_priv->header.recv_wc.wc = &wc;
2406			local->mad_priv->header.recv_wc.mad_len =
2407						sizeof(struct ib_mad);
2408			INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list);
2409			list_add(&local->mad_priv->header.recv_wc.recv_buf.list,
2410				 &local->mad_priv->header.recv_wc.rmpp_list);
2411			local->mad_priv->header.recv_wc.recv_buf.grh = NULL;
2412			local->mad_priv->header.recv_wc.recv_buf.mad =
2413						&local->mad_priv->mad.mad;
2414			if (atomic_read(&recv_mad_agent->qp_info->snoop_count))
2415				snoop_recv(recv_mad_agent->qp_info,
2416					  &local->mad_priv->header.recv_wc,
2417					   IB_MAD_SNOOP_RECVS);
2418			recv_mad_agent->agent.recv_handler(
2419						&recv_mad_agent->agent,
2420						&local->mad_priv->header.recv_wc);
2421			spin_lock_irqsave(&recv_mad_agent->lock, flags);
2422			atomic_dec(&recv_mad_agent->refcount);
2423			spin_unlock_irqrestore(&recv_mad_agent->lock, flags);
2424		}
2425
2426local_send_completion:
2427		/* Complete send */
2428		mad_send_wc.status = IB_WC_SUCCESS;
2429		mad_send_wc.vendor_err = 0;
2430		mad_send_wc.send_buf = &local->mad_send_wr->send_buf;
2431		if (atomic_read(&mad_agent_priv->qp_info->snoop_count))
2432			snoop_send(mad_agent_priv->qp_info,
2433				   &local->mad_send_wr->send_buf,
2434				   &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS);
2435		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2436						   &mad_send_wc);
2437
2438		spin_lock_irqsave(&mad_agent_priv->lock, flags);
2439		atomic_dec(&mad_agent_priv->refcount);
2440		if (free_mad)
2441			kmem_cache_free(ib_mad_cache, local->mad_priv);
2442		kfree(local);
2443	}
2444	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2445}
2446
2447static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
2448{
2449	int ret;
2450
2451	if (!mad_send_wr->retries_left)
2452		return -ETIMEDOUT;
2453
2454	mad_send_wr->retries_left--;
2455	mad_send_wr->send_buf.retries++;
2456
2457	mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
2458
2459	if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
2460		ret = ib_retry_rmpp(mad_send_wr);
2461		switch (ret) {
2462		case IB_RMPP_RESULT_UNHANDLED:
2463			ret = ib_send_mad(mad_send_wr);
2464			break;
2465		case IB_RMPP_RESULT_CONSUMED:
2466			ret = 0;
2467			break;
2468		default:
2469			ret = -ECOMM;
2470			break;
2471		}
2472	} else
2473		ret = ib_send_mad(mad_send_wr);
2474
2475	if (!ret) {
2476		mad_send_wr->refcount++;
2477		list_add_tail(&mad_send_wr->agent_list,
2478			      &mad_send_wr->mad_agent_priv->send_list);
2479	}
2480	return ret;
2481}
2482
2483static void timeout_sends(struct work_struct *work)
2484{
2485	struct ib_mad_agent_private *mad_agent_priv;
2486	struct ib_mad_send_wr_private *mad_send_wr;
2487	struct ib_mad_send_wc mad_send_wc;
2488	unsigned long flags;
2489
2490	mad_agent_priv = container_of(work, struct ib_mad_agent_private,
2491				      timeout_work);
2492	mad_send_wc.vendor_err = 0;
2493
2494	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2495	while (!list_empty(&mad_agent_priv->wait_list)) {
2496		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
2497					 struct ib_mad_send_wr_private,
2498					 agent_list);
2499
2500		if (time_after(mad_send_wr->timeout, jiffies)) {
2501			mod_timer(&mad_agent_priv->timeout_timer,
2502				  mad_send_wr->timeout);
2503			break;
2504		}
2505
2506		list_del(&mad_send_wr->agent_list);
2507		if (mad_send_wr->status == IB_WC_SUCCESS &&
2508		    !retry_send(mad_send_wr))
2509			continue;
2510
2511		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2512
2513		if (mad_send_wr->status == IB_WC_SUCCESS)
2514			mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
2515		else
2516			mad_send_wc.status = mad_send_wr->status;
2517		mad_send_wc.send_buf = &mad_send_wr->send_buf;
2518		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2519						   &mad_send_wc);
2520
2521		atomic_dec(&mad_agent_priv->refcount);
2522		spin_lock_irqsave(&mad_agent_priv->lock, flags);
2523	}
2524	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2525}
2526
2527static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg)
2528{
2529	struct ib_mad_port_private *port_priv = cq->cq_context;
2530	unsigned long flags;
2531
2532	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2533	if (!list_empty(&port_priv->port_list))
2534		queue_work(port_priv->wq, &port_priv->work);
2535	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2536}
2537
2538/*
2539 * Allocate receive MADs and post receive WRs for them
2540 */
2541static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
2542				    struct ib_mad_private *mad)
2543{
2544	unsigned long flags;
2545	int post, ret;
2546	struct ib_mad_private *mad_priv;
2547	struct ib_sge sg_list;
2548	struct ib_recv_wr recv_wr, *bad_recv_wr;
2549	struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
2550
2551	/* Initialize common scatter list fields */
2552	sg_list.length = sizeof *mad_priv - sizeof mad_priv->header;
2553	sg_list.lkey = (*qp_info->port_priv->mr).lkey;
2554
2555	/* Initialize common receive WR fields */
2556	recv_wr.next = NULL;
2557	recv_wr.sg_list = &sg_list;
2558	recv_wr.num_sge = 1;
2559
2560	do {
2561		/* Allocate and map receive buffer */
2562		if (mad) {
2563			mad_priv = mad;
2564			mad = NULL;
2565		} else {
2566			mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
2567			if (!mad_priv) {
2568				printk(KERN_ERR PFX "No memory for receive buffer\n");
2569				ret = -ENOMEM;
2570				break;
2571			}
2572		}
2573		sg_list.addr = ib_dma_map_single(qp_info->port_priv->device,
2574						 &mad_priv->grh,
2575						 sizeof *mad_priv -
2576						   sizeof mad_priv->header,
2577						 DMA_FROM_DEVICE);
2578		mad_priv->header.mapping = sg_list.addr;
2579		recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
2580		mad_priv->header.mad_list.mad_queue = recv_queue;
2581
2582		/* Post receive WR */
2583		spin_lock_irqsave(&recv_queue->lock, flags);
2584		post = (++recv_queue->count < recv_queue->max_active);
2585		list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list);
2586		spin_unlock_irqrestore(&recv_queue->lock, flags);
2587		ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr);
2588		if (ret) {
2589			spin_lock_irqsave(&recv_queue->lock, flags);
2590			list_del(&mad_priv->header.mad_list.list);
2591			recv_queue->count--;
2592			spin_unlock_irqrestore(&recv_queue->lock, flags);
2593			ib_dma_unmap_single(qp_info->port_priv->device,
2594					    mad_priv->header.mapping,
2595					    sizeof *mad_priv -
2596					      sizeof mad_priv->header,
2597					    DMA_FROM_DEVICE);
2598			kmem_cache_free(ib_mad_cache, mad_priv);
2599			printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret);
2600			break;
2601		}
2602	} while (post);
2603
2604	return ret;
2605}
2606
2607/*
2608 * Return all the posted receive MADs
2609 */
2610static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info)
2611{
2612	struct ib_mad_private_header *mad_priv_hdr;
2613	struct ib_mad_private *recv;
2614	struct ib_mad_list_head *mad_list;
2615
2616	if (!qp_info->qp)
2617		return;
2618
2619	while (!list_empty(&qp_info->recv_queue.list)) {
2620
2621		mad_list = list_entry(qp_info->recv_queue.list.next,
2622				      struct ib_mad_list_head, list);
2623		mad_priv_hdr = container_of(mad_list,
2624					    struct ib_mad_private_header,
2625					    mad_list);
2626		recv = container_of(mad_priv_hdr, struct ib_mad_private,
2627				    header);
2628
2629		/* Remove from posted receive MAD list */
2630		list_del(&mad_list->list);
2631
2632		ib_dma_unmap_single(qp_info->port_priv->device,
2633				    recv->header.mapping,
2634				    sizeof(struct ib_mad_private) -
2635				      sizeof(struct ib_mad_private_header),
2636				    DMA_FROM_DEVICE);
2637		kmem_cache_free(ib_mad_cache, recv);
2638	}
2639
2640	qp_info->recv_queue.count = 0;
2641}
2642
2643/*
2644 * Start the port
2645 */
2646static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
2647{
2648	int ret, i;
2649	struct ib_qp_attr *attr;
2650	struct ib_qp *qp;
2651
2652	attr = kmalloc(sizeof *attr, GFP_KERNEL);
2653	if (!attr) {
2654		printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n");
2655		return -ENOMEM;
2656	}
2657
2658	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
2659		qp = port_priv->qp_info[i].qp;
2660		if (!qp)
2661			continue;
2662
2663		/*
2664		 * PKey index for QP1 is irrelevant but
2665		 * one is needed for the Reset to Init transition
2666		 */
2667		attr->qp_state = IB_QPS_INIT;
2668		attr->pkey_index = 0;
2669		attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY;
2670		ret = ib_modify_qp(qp, attr, IB_QP_STATE |
2671					     IB_QP_PKEY_INDEX | IB_QP_QKEY);
2672		if (ret) {
2673			printk(KERN_ERR PFX "Couldn't change QP%d state to "
2674			       "INIT: %d\n", i, ret);
2675			goto out;
2676		}
2677
2678		attr->qp_state = IB_QPS_RTR;
2679		ret = ib_modify_qp(qp, attr, IB_QP_STATE);
2680		if (ret) {
2681			printk(KERN_ERR PFX "Couldn't change QP%d state to "
2682			       "RTR: %d\n", i, ret);
2683			goto out;
2684		}
2685
2686		attr->qp_state = IB_QPS_RTS;
2687		attr->sq_psn = IB_MAD_SEND_Q_PSN;
2688		ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN);
2689		if (ret) {
2690			printk(KERN_ERR PFX "Couldn't change QP%d state to "
2691			       "RTS: %d\n", i, ret);
2692			goto out;
2693		}
2694	}
2695
2696	ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
2697	if (ret) {
2698		printk(KERN_ERR PFX "Failed to request completion "
2699		       "notification: %d\n", ret);
2700		goto out;
2701	}
2702
2703	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
2704		if (!port_priv->qp_info[i].qp)
2705			continue;
2706
2707		ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL);
2708		if (ret) {
2709			printk(KERN_ERR PFX "Couldn't post receive WRs\n");
2710			goto out;
2711		}
2712	}
2713out:
2714	kfree(attr);
2715	return ret;
2716}
2717
2718static void qp_event_handler(struct ib_event *event, void *qp_context)
2719{
2720	struct ib_mad_qp_info	*qp_info = qp_context;
2721
2722	/* It's worse than that! He's dead, Jim! */
2723	printk(KERN_ERR PFX "Fatal error (%d) on MAD QP (%d)\n",
2724		event->event, qp_info->qp->qp_num);
2725}
2726
2727static void init_mad_queue(struct ib_mad_qp_info *qp_info,
2728			   struct ib_mad_queue *mad_queue)
2729{
2730	mad_queue->qp_info = qp_info;
2731	mad_queue->count = 0;
2732	spin_lock_init(&mad_queue->lock);
2733	INIT_LIST_HEAD(&mad_queue->list);
2734}
2735
2736static void init_mad_qp(struct ib_mad_port_private *port_priv,
2737			struct ib_mad_qp_info *qp_info)
2738{
2739	qp_info->port_priv = port_priv;
2740	init_mad_queue(qp_info, &qp_info->send_queue);
2741	init_mad_queue(qp_info, &qp_info->recv_queue);
2742	INIT_LIST_HEAD(&qp_info->overflow_list);
2743	spin_lock_init(&qp_info->snoop_lock);
2744	qp_info->snoop_table = NULL;
2745	qp_info->snoop_table_size = 0;
2746	atomic_set(&qp_info->snoop_count, 0);
2747}
2748
2749static int create_mad_qp(struct ib_mad_qp_info *qp_info,
2750			 enum ib_qp_type qp_type)
2751{
2752	struct ib_qp_init_attr	qp_init_attr;
2753	int ret;
2754
2755	memset(&qp_init_attr, 0, sizeof qp_init_attr);
2756	qp_init_attr.send_cq = qp_info->port_priv->cq;
2757	qp_init_attr.recv_cq = qp_info->port_priv->cq;
2758	qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
2759	qp_init_attr.cap.max_send_wr = mad_sendq_size;
2760	qp_init_attr.cap.max_recv_wr = mad_recvq_size;
2761	qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
2762	qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
2763	qp_init_attr.qp_type = qp_type;
2764	qp_init_attr.port_num = qp_info->port_priv->port_num;
2765	qp_init_attr.qp_context = qp_info;
2766	qp_init_attr.event_handler = qp_event_handler;
2767	qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr);
2768	if (IS_ERR(qp_info->qp)) {
2769		printk(KERN_ERR PFX "Couldn't create ib_mad QP%d\n",
2770		       get_spl_qp_index(qp_type));
2771		ret = PTR_ERR(qp_info->qp);
2772		goto error;
2773	}
2774	/* Use minimum queue sizes unless the CQ is resized */
2775	qp_info->send_queue.max_active = mad_sendq_size;
2776	qp_info->recv_queue.max_active = mad_recvq_size;
2777	return 0;
2778
2779error:
2780	return ret;
2781}
2782
2783static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
2784{
2785	if (!qp_info->qp)
2786		return;
2787
2788	ib_destroy_qp(qp_info->qp);
2789	kfree(qp_info->snoop_table);
2790}
2791
2792/*
2793 * Open the port
2794 * Create the QP, PD, MR, and CQ if needed
2795 */
2796static int ib_mad_port_open(struct ib_device *device,
2797			    int port_num)
2798{
2799	int ret, cq_size;
2800	struct ib_mad_port_private *port_priv;
2801	unsigned long flags;
2802	char name[sizeof "ib_mad123"];
2803	int has_smi;
2804
2805	/* Create new device info */
2806	port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
2807	if (!port_priv) {
2808		printk(KERN_ERR PFX "No memory for ib_mad_port_private\n");
2809		return -ENOMEM;
2810	}
2811
2812	port_priv->device = device;
2813	port_priv->port_num = port_num;
2814	spin_lock_init(&port_priv->reg_lock);
2815	INIT_LIST_HEAD(&port_priv->agent_list);
2816	init_mad_qp(port_priv, &port_priv->qp_info[0]);
2817	init_mad_qp(port_priv, &port_priv->qp_info[1]);
2818
2819	cq_size = mad_sendq_size + mad_recvq_size;
2820	has_smi = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND;
2821	if (has_smi)
2822		cq_size *= 2;
2823
2824	port_priv->cq = ib_create_cq(port_priv->device,
2825				     ib_mad_thread_completion_handler,
2826				     NULL, port_priv, cq_size, 0);
2827	if (IS_ERR(port_priv->cq)) {
2828		printk(KERN_ERR PFX "Couldn't create ib_mad CQ\n");
2829		ret = PTR_ERR(port_priv->cq);
2830		goto error3;
2831	}
2832
2833	port_priv->pd = ib_alloc_pd(device);
2834	if (IS_ERR(port_priv->pd)) {
2835		printk(KERN_ERR PFX "Couldn't create ib_mad PD\n");
2836		ret = PTR_ERR(port_priv->pd);
2837		goto error4;
2838	}
2839
2840	port_priv->mr = ib_get_dma_mr(port_priv->pd, IB_ACCESS_LOCAL_WRITE);
2841	if (IS_ERR(port_priv->mr)) {
2842		printk(KERN_ERR PFX "Couldn't get ib_mad DMA MR\n");
2843		ret = PTR_ERR(port_priv->mr);
2844		goto error5;
2845	}
2846
2847	if (has_smi) {
2848		ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI);
2849		if (ret)
2850			goto error6;
2851	}
2852	ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI);
2853	if (ret)
2854		goto error7;
2855
2856	snprintf(name, sizeof name, "ib_mad%d", port_num);
2857	port_priv->wq = create_singlethread_workqueue(name);
2858	if (!port_priv->wq) {
2859		ret = -ENOMEM;
2860		goto error8;
2861	}
2862	INIT_WORK(&port_priv->work, ib_mad_completion_handler);
2863
2864	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2865	list_add_tail(&port_priv->port_list, &ib_mad_port_list);
2866	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2867
2868	ret = ib_mad_port_start(port_priv);
2869	if (ret) {
2870		printk(KERN_ERR PFX "Couldn't start port\n");
2871		goto error9;
2872	}
2873
2874	return 0;
2875
2876error9:
2877	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2878	list_del_init(&port_priv->port_list);
2879	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2880
2881	destroy_workqueue(port_priv->wq);
2882error8:
2883	destroy_mad_qp(&port_priv->qp_info[1]);
2884error7:
2885	destroy_mad_qp(&port_priv->qp_info[0]);
2886error6:
2887	ib_dereg_mr(port_priv->mr);
2888error5:
2889	ib_dealloc_pd(port_priv->pd);
2890error4:
2891	ib_destroy_cq(port_priv->cq);
2892	cleanup_recv_queue(&port_priv->qp_info[1]);
2893	cleanup_recv_queue(&port_priv->qp_info[0]);
2894error3:
2895	kfree(port_priv);
2896
2897	return ret;
2898}
2899
2900/*
2901 * Close the port
2902 * If there are no classes using the port, free the port
2903 * resources (CQ, MR, PD, QP) and remove the port's info structure
2904 */
2905static int ib_mad_port_close(struct ib_device *device, int port_num)
2906{
2907	struct ib_mad_port_private *port_priv;
2908	unsigned long flags;
2909
2910	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2911	port_priv = __ib_get_mad_port(device, port_num);
2912	if (port_priv == NULL) {
2913		spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2914		printk(KERN_ERR PFX "Port %d not found\n", port_num);
2915		return -ENODEV;
2916	}
2917	list_del_init(&port_priv->port_list);
2918	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2919
2920	destroy_workqueue(port_priv->wq);
2921	destroy_mad_qp(&port_priv->qp_info[1]);
2922	destroy_mad_qp(&port_priv->qp_info[0]);
2923	ib_dereg_mr(port_priv->mr);
2924	ib_dealloc_pd(port_priv->pd);
2925	ib_destroy_cq(port_priv->cq);
2926	cleanup_recv_queue(&port_priv->qp_info[1]);
2927	cleanup_recv_queue(&port_priv->qp_info[0]);
2928	/* XXX: Handle deallocation of MAD registration tables */
2929
2930	kfree(port_priv);
2931
2932	return 0;
2933}
2934
2935static void ib_mad_init_device(struct ib_device *device)
2936{
2937	int start, end, i;
2938
2939	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
2940		return;
2941
2942	if (device->node_type == RDMA_NODE_IB_SWITCH) {
2943		start = 0;
2944		end   = 0;
2945	} else {
2946		start = 1;
2947		end   = device->phys_port_cnt;
2948	}
2949
2950	for (i = start; i <= end; i++) {
2951		if (ib_mad_port_open(device, i)) {
2952			printk(KERN_ERR PFX "Couldn't open %s port %d\n",
2953			       device->name, i);
2954			goto error;
2955		}
2956		if (ib_agent_port_open(device, i)) {
2957			printk(KERN_ERR PFX "Couldn't open %s port %d "
2958			       "for agents\n",
2959			       device->name, i);
2960			goto error_agent;
2961		}
2962	}
2963	return;
2964
2965error_agent:
2966	if (ib_mad_port_close(device, i))
2967		printk(KERN_ERR PFX "Couldn't close %s port %d\n",
2968		       device->name, i);
2969
2970error:
2971	i--;
2972
2973	while (i >= start) {
2974		if (ib_agent_port_close(device, i))
2975			printk(KERN_ERR PFX "Couldn't close %s port %d "
2976			       "for agents\n",
2977			       device->name, i);
2978		if (ib_mad_port_close(device, i))
2979			printk(KERN_ERR PFX "Couldn't close %s port %d\n",
2980			       device->name, i);
2981		i--;
2982	}
2983}
2984
2985static void ib_mad_remove_device(struct ib_device *device)
2986{
2987	int i, num_ports, cur_port;
2988
2989	if (device->node_type == RDMA_NODE_IB_SWITCH) {
2990		num_ports = 1;
2991		cur_port = 0;
2992	} else {
2993		num_ports = device->phys_port_cnt;
2994		cur_port = 1;
2995	}
2996	for (i = 0; i < num_ports; i++, cur_port++) {
2997		if (ib_agent_port_close(device, cur_port))
2998			printk(KERN_ERR PFX "Couldn't close %s port %d "
2999			       "for agents\n",
3000			       device->name, cur_port);
3001		if (ib_mad_port_close(device, cur_port))
3002			printk(KERN_ERR PFX "Couldn't close %s port %d\n",
3003			       device->name, cur_port);
3004	}
3005}
3006
3007static struct ib_client mad_client = {
3008	.name   = "mad",
3009	.add = ib_mad_init_device,
3010	.remove = ib_mad_remove_device
3011};
3012
3013static int __init ib_mad_init_module(void)
3014{
3015	int ret;
3016
3017	mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE);
3018	mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE);
3019
3020	mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE);
3021	mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE);
3022
3023	spin_lock_init(&ib_mad_port_list_lock);
3024
3025	ib_mad_cache = kmem_cache_create("ib_mad",
3026					 sizeof(struct ib_mad_private),
3027					 0,
3028					 SLAB_HWCACHE_ALIGN,
3029					 NULL);
3030	if (!ib_mad_cache) {
3031		printk(KERN_ERR PFX "Couldn't create ib_mad cache\n");
3032		ret = -ENOMEM;
3033		goto error1;
3034	}
3035
3036	INIT_LIST_HEAD(&ib_mad_port_list);
3037
3038	if (ib_register_client(&mad_client)) {
3039		printk(KERN_ERR PFX "Couldn't register ib_mad client\n");
3040		ret = -EINVAL;
3041		goto error2;
3042	}
3043
3044	return 0;
3045
3046error2:
3047	kmem_cache_destroy(ib_mad_cache);
3048error1:
3049	return ret;
3050}
3051
3052static void __exit ib_mad_cleanup_module(void)
3053{
3054	ib_unregister_client(&mad_client);
3055	kmem_cache_destroy(ib_mad_cache);
3056}
3057
3058module_init(ib_mad_init_module);
3059module_exit(ib_mad_cleanup_module);
3060
3061