1219820Sjeff/*
2219820Sjeff * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
3219820Sjeff * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
4219820Sjeff * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5219820Sjeff *
6219820Sjeff * This software is available to you under a choice of one of two
7219820Sjeff * licenses.  You may choose to be licensed under the terms of the GNU
8219820Sjeff * General Public License (GPL) Version 2, available from the file
9219820Sjeff * COPYING in the main directory of this source tree, or the
10219820Sjeff * OpenIB.org BSD license below:
11219820Sjeff *
12219820Sjeff *     Redistribution and use in source and binary forms, with or
13219820Sjeff *     without modification, are permitted provided that the following
14219820Sjeff *     conditions are met:
15219820Sjeff *
16219820Sjeff *      - Redistributions of source code must retain the above
17219820Sjeff *        copyright notice, this list of conditions and the following
18219820Sjeff *        disclaimer.
19219820Sjeff *
20219820Sjeff *      - Redistributions in binary form must reproduce the above
21219820Sjeff *        copyright notice, this list of conditions and the following
22219820Sjeff *        disclaimer in the documentation and/or other materials
23219820Sjeff *        provided with the distribution.
24219820Sjeff *
25219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32219820Sjeff * SOFTWARE.
33219820Sjeff *
34219820Sjeff */
35219820Sjeff
36219820Sjeff/*
37219820Sjeff * Abstract:
38219820Sjeff *    Implementation of osm_ni_rcv_t.
39219820Sjeff * This object represents the NodeInfo Receiver object.
40219820Sjeff * This object is part of the opensm family of objects.
41219820Sjeff */
42219820Sjeff
43219820Sjeff#if HAVE_CONFIG_H
44219820Sjeff#  include <config.h>
45219820Sjeff#endif				/* HAVE_CONFIG_H */
46219820Sjeff
47219820Sjeff#include <stdlib.h>
48219820Sjeff#include <string.h>
49219820Sjeff#include <iba/ib_types.h>
50219820Sjeff#include <complib/cl_qmap.h>
51219820Sjeff#include <complib/cl_passivelock.h>
52219820Sjeff#include <complib/cl_debug.h>
53219820Sjeff#include <opensm/osm_madw.h>
54219820Sjeff#include <opensm/osm_log.h>
55219820Sjeff#include <opensm/osm_node.h>
56219820Sjeff#include <opensm/osm_subnet.h>
57219820Sjeff#include <opensm/osm_router.h>
58219820Sjeff#include <opensm/osm_mad_pool.h>
59219820Sjeff#include <opensm/osm_helper.h>
60219820Sjeff#include <opensm/osm_msgdef.h>
61219820Sjeff#include <opensm/osm_opensm.h>
62219820Sjeff#include <opensm/osm_ucast_mgr.h>
63219820Sjeff
64219820Sjeffstatic void
65219820Sjeffreport_duplicated_guid(IN osm_sm_t * sm,
66219820Sjeff		       osm_physp_t * p_physp,
67219820Sjeff		       osm_node_t * p_neighbor_node, const uint8_t port_num)
68219820Sjeff{
69219820Sjeff	osm_physp_t *p_old, *p_new;
70219820Sjeff	osm_dr_path_t path;
71219820Sjeff
72219820Sjeff	p_old = p_physp->p_remote_physp;
73219820Sjeff	p_new = osm_node_get_physp_ptr(p_neighbor_node, port_num);
74219820Sjeff
75219820Sjeff	OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D01: "
76219820Sjeff		"Found duplicated node.\n"
77219820Sjeff		"Node 0x%" PRIx64 " port %u is reachable from remote node "
78219820Sjeff		"0x%" PRIx64 " port %u and remote node 0x%" PRIx64 " port %u.\n"
79219820Sjeff		"Paths are:\n",
80219820Sjeff		cl_ntoh64(p_physp->p_node->node_info.node_guid),
81219820Sjeff		p_physp->port_num,
82219820Sjeff		cl_ntoh64(p_old->p_node->node_info.node_guid), p_old->port_num,
83219820Sjeff		cl_ntoh64(p_new->p_node->node_info.node_guid), p_new->port_num);
84219820Sjeff
85219820Sjeff	osm_dump_dr_path(sm->p_log, osm_physp_get_dr_path_ptr(p_physp),
86219820Sjeff			 OSM_LOG_ERROR);
87219820Sjeff
88219820Sjeff	path = *osm_physp_get_dr_path_ptr(p_new);
89219820Sjeff	osm_dr_path_extend(&path, port_num);
90219820Sjeff	osm_dump_dr_path(sm->p_log, &path, OSM_LOG_ERROR);
91219820Sjeff
92219820Sjeff	osm_log(sm->p_log, OSM_LOG_SYS,
93219820Sjeff		"FATAL: duplicated guids or 12x lane reversal\n");
94219820Sjeff}
95219820Sjeff
96219820Sjeffstatic void requery_dup_node_info(IN osm_sm_t * sm,
97219820Sjeff				  osm_physp_t * p_physp, unsigned count)
98219820Sjeff{
99219820Sjeff	osm_madw_context_t context;
100219820Sjeff	osm_dr_path_t path;
101219820Sjeff	cl_status_t status;
102219820Sjeff
103219820Sjeff	path = *osm_physp_get_dr_path_ptr(p_physp->p_remote_physp);
104219820Sjeff	osm_dr_path_extend(&path, p_physp->p_remote_physp->port_num);
105219820Sjeff
106219820Sjeff	context.ni_context.node_guid =
107219820Sjeff	    p_physp->p_remote_physp->p_node->node_info.port_guid;
108219820Sjeff	context.ni_context.port_num = p_physp->p_remote_physp->port_num;
109219820Sjeff	context.ni_context.dup_node_guid = p_physp->p_node->node_info.node_guid;
110219820Sjeff	context.ni_context.dup_port_num = p_physp->port_num;
111219820Sjeff	context.ni_context.dup_count = count;
112219820Sjeff
113219820Sjeff	status = osm_req_get(sm, &path, IB_MAD_ATTR_NODE_INFO,
114219820Sjeff			     0, CL_DISP_MSGID_NONE, &context);
115219820Sjeff
116219820Sjeff	if (status != IB_SUCCESS)
117219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D02: "
118219820Sjeff			"Failure initiating NodeInfo request (%s)\n",
119219820Sjeff			ib_get_err_str(status));
120219820Sjeff}
121219820Sjeff
122219820Sjeff/**********************************************************************
123219820Sjeff The plock must be held before calling this function.
124219820Sjeff**********************************************************************/
125219820Sjeffstatic void
126219820Sjeff__osm_ni_rcv_set_links(IN osm_sm_t * sm,
127219820Sjeff		       osm_node_t * p_node,
128219820Sjeff		       const uint8_t port_num,
129219820Sjeff		       const osm_ni_context_t * const p_ni_context)
130219820Sjeff{
131219820Sjeff	osm_node_t *p_neighbor_node;
132219820Sjeff	osm_physp_t *p_physp;
133219820Sjeff
134219820Sjeff	OSM_LOG_ENTER(sm->p_log);
135219820Sjeff
136219820Sjeff	/*
137219820Sjeff	   A special case exists in which the node we're trying to
138219820Sjeff	   link is our own node.  In this case, the guid value in
139219820Sjeff	   the ni_context will be zero.
140219820Sjeff	 */
141219820Sjeff	if (p_ni_context->node_guid == 0) {
142219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
143219820Sjeff			"Nothing to link for our own node 0x%" PRIx64 "\n",
144219820Sjeff			cl_ntoh64(osm_node_get_node_guid(p_node)));
145219820Sjeff		goto _exit;
146219820Sjeff	}
147219820Sjeff
148219820Sjeff	p_neighbor_node = osm_get_node_by_guid(sm->p_subn,
149219820Sjeff					       p_ni_context->node_guid);
150219820Sjeff	if (!p_neighbor_node) {
151219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D10: "
152219820Sjeff			"Unexpected removal of neighbor node "
153219820Sjeff			"0x%" PRIx64 "\n", cl_ntoh64(p_ni_context->node_guid));
154219820Sjeff		goto _exit;
155219820Sjeff	}
156219820Sjeff
157219820Sjeff	/*
158219820Sjeff	   We have seen this neighbor node before, but we might
159219820Sjeff	   not have seen this port on the neighbor node before.
160219820Sjeff	   We should not set links to an uninitialized port on the
161219820Sjeff	   neighbor, so check validity up front.  If it's not
162219820Sjeff	   valid, do nothing, since we'll see this link again
163219820Sjeff	   when we probe the neighbor.
164219820Sjeff	 */
165219820Sjeff	if (!osm_node_link_has_valid_ports(p_node, port_num,
166219820Sjeff					   p_neighbor_node,
167219820Sjeff					   p_ni_context->port_num))
168219820Sjeff		goto _exit;
169219820Sjeff
170219820Sjeff	if (osm_node_link_exists(p_node, port_num,
171219820Sjeff				 p_neighbor_node, p_ni_context->port_num)) {
172219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Link already exists\n");
173219820Sjeff		goto _exit;
174219820Sjeff	}
175219820Sjeff
176219820Sjeff	if (osm_node_has_any_link(p_node, port_num) &&
177219820Sjeff	    sm->p_subn->force_heavy_sweep == FALSE &&
178219820Sjeff	    (!p_ni_context->dup_count ||
179219820Sjeff	     (p_ni_context->dup_node_guid == osm_node_get_node_guid(p_node) &&
180219820Sjeff	      p_ni_context->dup_port_num == port_num))) {
181219820Sjeff		/*
182219820Sjeff		   Uh oh...
183219820Sjeff		   This could be reconnected ports, but also duplicated GUID
184219820Sjeff		   (2 nodes have the same guid) or a 12x link with lane reversal
185219820Sjeff		   that is not configured correctly.
186219820Sjeff		   We will try to recover by querying NodeInfo again.
187219820Sjeff		   In order to catch even fast port moving to new location(s) and
188219820Sjeff		   back we will count up to 5.
189219820Sjeff		   Some crazy reconnections (newly created switch loop right before
190219820Sjeff		   targeted CA) will not be catched this way. So in worst case -
191219820Sjeff		   report GUID duplication and request new discovery.
192219820Sjeff		   When switch node is targeted NodeInfo querying will be done in
193219820Sjeff		   opposite order, this is much stronger check, unfortunately it is
194219820Sjeff		   impossible with CAs.
195219820Sjeff		 */
196219820Sjeff		p_physp = osm_node_get_physp_ptr(p_node, port_num);
197219820Sjeff		if (p_ni_context->dup_count > 5) {
198219820Sjeff			report_duplicated_guid(sm, p_physp,
199219820Sjeff					       p_neighbor_node,
200219820Sjeff					       p_ni_context->port_num);
201219820Sjeff			sm->p_subn->force_heavy_sweep = TRUE;
202219820Sjeff		} else if (p_node->sw)
203219820Sjeff			requery_dup_node_info(sm, p_physp->p_remote_physp,
204219820Sjeff					      p_ni_context->dup_count + 1);
205219820Sjeff		else
206219820Sjeff			requery_dup_node_info(sm, p_physp,
207219820Sjeff					      p_ni_context->dup_count + 1);
208219820Sjeff	}
209219820Sjeff
210219820Sjeff	/*
211219820Sjeff	   When there are only two nodes with exact same guids (connected back
212219820Sjeff	   to back) - the previous check for duplicated guid will not catch
213219820Sjeff	   them. But the link will be from the port to itself...
214219820Sjeff	   Enhanced Port 0 is an exception to this
215219820Sjeff	 */
216219820Sjeff	if ((osm_node_get_node_guid(p_node) == p_ni_context->node_guid) &&
217219820Sjeff	    (port_num == p_ni_context->port_num) &&
218219820Sjeff	    port_num != 0 && cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) {
219219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
220219820Sjeff			"Duplicate GUID found by link from a port to itself:"
221219820Sjeff			"node 0x%" PRIx64 ", port number %u\n",
222219820Sjeff			cl_ntoh64(osm_node_get_node_guid(p_node)), port_num);
223219820Sjeff		p_physp = osm_node_get_physp_ptr(p_node, port_num);
224219820Sjeff		osm_dump_dr_path(sm->p_log,
225219820Sjeff				 osm_physp_get_dr_path_ptr(p_physp),
226219820Sjeff				 OSM_LOG_VERBOSE);
227219820Sjeff
228219820Sjeff		if (sm->p_subn->opt.exit_on_fatal == TRUE) {
229219820Sjeff			osm_log(sm->p_log, OSM_LOG_SYS,
230219820Sjeff				"Errors on subnet. Duplicate GUID found "
231219820Sjeff				"by link from a port to itself. "
232219820Sjeff				"See verbose opensm.log for more details\n");
233219820Sjeff			exit(1);
234219820Sjeff		}
235219820Sjeff	}
236219820Sjeff
237219820Sjeff	OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
238219820Sjeff		"Creating new link between:\n\t\t\t\tnode 0x%" PRIx64
239219820Sjeff		", port number %u and\n\t\t\t\tnode 0x%" PRIx64
240219820Sjeff		", port number %u\n",
241219820Sjeff		cl_ntoh64(osm_node_get_node_guid(p_node)), port_num,
242219820Sjeff		cl_ntoh64(p_ni_context->node_guid), p_ni_context->port_num);
243219820Sjeff
244219820Sjeff	if (sm->ucast_mgr.cache_valid)
245219820Sjeff		osm_ucast_cache_check_new_link(&sm->ucast_mgr,
246219820Sjeff					       p_node, port_num,
247219820Sjeff					       p_neighbor_node,
248219820Sjeff					       p_ni_context->port_num);
249219820Sjeff
250219820Sjeff	osm_node_link(p_node, port_num, p_neighbor_node,
251219820Sjeff		      p_ni_context->port_num);
252219820Sjeff
253219820Sjeff_exit:
254219820Sjeff	OSM_LOG_EXIT(sm->p_log);
255219820Sjeff}
256219820Sjeff
257219820Sjeff/**********************************************************************
258219820Sjeff The plock must be held before calling this function.
259219820Sjeff**********************************************************************/
260219820Sjeffstatic void
261219820Sjeff__osm_ni_rcv_process_new_node(IN osm_sm_t * sm,
262219820Sjeff			      IN osm_node_t * const p_node,
263219820Sjeff			      IN const osm_madw_t * const p_madw)
264219820Sjeff{
265219820Sjeff	ib_api_status_t status = IB_SUCCESS;
266219820Sjeff	osm_madw_context_t context;
267219820Sjeff	osm_physp_t *p_physp;
268219820Sjeff	ib_node_info_t *p_ni;
269219820Sjeff	ib_smp_t *p_smp;
270219820Sjeff	uint8_t port_num;
271219820Sjeff
272219820Sjeff	OSM_LOG_ENTER(sm->p_log);
273219820Sjeff
274219820Sjeff	p_smp = osm_madw_get_smp_ptr(p_madw);
275219820Sjeff	p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
276219820Sjeff	port_num = ib_node_info_get_local_port_num(p_ni);
277219820Sjeff
278219820Sjeff	/*
279219820Sjeff	   Request PortInfo & NodeDescription attributes for the port
280219820Sjeff	   that responded to the NodeInfo attribute.
281219820Sjeff	   Because this is a channel adapter or router, we are
282219820Sjeff	   not allowed to request PortInfo for the other ports.
283219820Sjeff	   Set the context union properly, so the recipient
284219820Sjeff	   knows which node & port are relevant.
285219820Sjeff	 */
286219820Sjeff	p_physp = osm_node_get_physp_ptr(p_node, port_num);
287219820Sjeff
288219820Sjeff	context.pi_context.node_guid = p_ni->node_guid;
289219820Sjeff	context.pi_context.port_guid = p_ni->port_guid;
290219820Sjeff	context.pi_context.set_method = FALSE;
291219820Sjeff	context.pi_context.light_sweep = FALSE;
292219820Sjeff	context.pi_context.active_transition = FALSE;
293219820Sjeff
294219820Sjeff	status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp),
295219820Sjeff			     IB_MAD_ATTR_PORT_INFO,
296219820Sjeff			     cl_hton32(port_num), CL_DISP_MSGID_NONE, &context);
297219820Sjeff	if (status != IB_SUCCESS)
298219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D02: "
299219820Sjeff			"Failure initiating PortInfo request (%s)\n",
300219820Sjeff			ib_get_err_str(status));
301219820Sjeff
302219820Sjeff	OSM_LOG_EXIT(sm->p_log);
303219820Sjeff}
304219820Sjeff
305219820Sjeff/**********************************************************************
306219820Sjeff The plock must be held before calling this function.
307219820Sjeff**********************************************************************/
308219820Sjeffvoid
309219820Sjeffosm_req_get_node_desc(IN osm_sm_t * sm,
310219820Sjeff			osm_physp_t *p_physp)
311219820Sjeff{
312219820Sjeff	ib_api_status_t status = IB_SUCCESS;
313219820Sjeff	osm_madw_context_t context;
314219820Sjeff
315219820Sjeff	OSM_LOG_ENTER(sm->p_log);
316219820Sjeff
317219820Sjeff	context.nd_context.node_guid =
318219820Sjeff		osm_node_get_node_guid(osm_physp_get_node_ptr(p_physp));
319219820Sjeff
320219820Sjeff	status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp),
321219820Sjeff			     IB_MAD_ATTR_NODE_DESC,
322219820Sjeff			     0, CL_DISP_MSGID_NONE, &context);
323219820Sjeff	if (status != IB_SUCCESS)
324219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D03: "
325219820Sjeff			"Failure initiating NodeDescription request (%s)\n",
326219820Sjeff			ib_get_err_str(status));
327219820Sjeff
328219820Sjeff	OSM_LOG_EXIT(sm->p_log);
329219820Sjeff}
330219820Sjeff
331219820Sjeff/**********************************************************************
332219820Sjeff The plock must be held before calling this function.
333219820Sjeff**********************************************************************/
334219820Sjeffstatic void
335219820Sjeff__osm_ni_rcv_get_node_desc(IN osm_sm_t * sm,
336219820Sjeff			   IN osm_node_t * const p_node,
337219820Sjeff			   IN const osm_madw_t * const p_madw)
338219820Sjeff{
339219820Sjeff	ib_node_info_t *p_ni;
340219820Sjeff	ib_smp_t *p_smp;
341219820Sjeff	uint8_t port_num;
342219820Sjeff	osm_physp_t *p_physp = NULL;
343219820Sjeff
344219820Sjeff	OSM_LOG_ENTER(sm->p_log);
345219820Sjeff
346219820Sjeff	p_smp = osm_madw_get_smp_ptr(p_madw);
347219820Sjeff	p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
348219820Sjeff	port_num = ib_node_info_get_local_port_num(p_ni);
349219820Sjeff
350219820Sjeff	/*
351219820Sjeff	   Request PortInfo & NodeDescription attributes for the port
352219820Sjeff	   that responded to the NodeInfo attribute.
353219820Sjeff	   Because this is a channel adapter or router, we are
354219820Sjeff	   not allowed to request PortInfo for the other ports.
355219820Sjeff	   Set the context union properly, so the recipient
356219820Sjeff	   knows which node & port are relevant.
357219820Sjeff	 */
358219820Sjeff	p_physp = osm_node_get_physp_ptr(p_node, port_num);
359219820Sjeff
360219820Sjeff	osm_req_get_node_desc(sm, p_physp);
361219820Sjeff
362219820Sjeff	OSM_LOG_EXIT(sm->p_log);
363219820Sjeff}
364219820Sjeff
365219820Sjeff/**********************************************************************
366219820Sjeff The plock must be held before calling this function.
367219820Sjeff**********************************************************************/
368219820Sjeffstatic void
369219820Sjeff__osm_ni_rcv_process_new_ca_or_router(IN osm_sm_t * sm,
370219820Sjeff				      IN osm_node_t * const p_node,
371219820Sjeff				      IN const osm_madw_t * const p_madw)
372219820Sjeff{
373219820Sjeff	OSM_LOG_ENTER(sm->p_log);
374219820Sjeff
375219820Sjeff	__osm_ni_rcv_process_new_node(sm, p_node, p_madw);
376219820Sjeff
377219820Sjeff	/*
378219820Sjeff	   A node guid of 0 is the corner case that indicates
379219820Sjeff	   we discovered our own node.  Initialize the subnet
380219820Sjeff	   object with the SM's own port guid.
381219820Sjeff	 */
382219820Sjeff	if (osm_madw_get_ni_context_ptr(p_madw)->node_guid == 0)
383219820Sjeff		sm->p_subn->sm_port_guid = p_node->node_info.port_guid;
384219820Sjeff
385219820Sjeff	OSM_LOG_EXIT(sm->p_log);
386219820Sjeff}
387219820Sjeff
388219820Sjeff/**********************************************************************
389219820Sjeff The plock must be held before calling this function.
390219820Sjeff**********************************************************************/
391219820Sjeffstatic void
392219820Sjeff__osm_ni_rcv_process_existing_ca_or_router(IN osm_sm_t * sm,
393219820Sjeff					   IN osm_node_t * const p_node,
394219820Sjeff					   IN const osm_madw_t * const p_madw)
395219820Sjeff{
396219820Sjeff	ib_node_info_t *p_ni;
397219820Sjeff	ib_smp_t *p_smp;
398219820Sjeff	osm_port_t *p_port;
399219820Sjeff	osm_port_t *p_port_check;
400219820Sjeff	osm_madw_context_t context;
401219820Sjeff	uint8_t port_num;
402219820Sjeff	osm_physp_t *p_physp;
403219820Sjeff	ib_api_status_t status;
404219820Sjeff	osm_dr_path_t *p_dr_path;
405219820Sjeff	osm_bind_handle_t h_bind;
406219820Sjeff
407219820Sjeff	OSM_LOG_ENTER(sm->p_log);
408219820Sjeff
409219820Sjeff	p_smp = osm_madw_get_smp_ptr(p_madw);
410219820Sjeff	p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
411219820Sjeff	port_num = ib_node_info_get_local_port_num(p_ni);
412219820Sjeff	h_bind = osm_madw_get_bind_handle(p_madw);
413219820Sjeff
414219820Sjeff	/*
415219820Sjeff	   Determine if we have encountered this node through a
416219820Sjeff	   previously undiscovered port.  If so, build the new
417219820Sjeff	   port object.
418219820Sjeff	 */
419219820Sjeff	p_port = osm_get_port_by_guid(sm->p_subn, p_ni->port_guid);
420219820Sjeff	if (!p_port) {
421219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
422219820Sjeff			"Creating new port object with GUID 0x%" PRIx64 "\n",
423219820Sjeff			cl_ntoh64(p_ni->port_guid));
424219820Sjeff
425219820Sjeff		osm_node_init_physp(p_node, p_madw);
426219820Sjeff
427219820Sjeff		p_port = osm_port_new(p_ni, p_node);
428219820Sjeff		if (p_port == NULL) {
429219820Sjeff			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D04: "
430219820Sjeff				"Unable to create new port object\n");
431219820Sjeff			goto Exit;
432219820Sjeff		}
433219820Sjeff
434219820Sjeff		/*
435219820Sjeff		   Add the new port object to the database.
436219820Sjeff		 */
437219820Sjeff		p_port_check =
438219820Sjeff		    (osm_port_t *) cl_qmap_insert(&sm->p_subn->port_guid_tbl,
439219820Sjeff						  p_ni->port_guid,
440219820Sjeff						  &p_port->map_item);
441219820Sjeff		if (p_port_check != p_port) {
442219820Sjeff			/*
443219820Sjeff			   We should never be here!
444219820Sjeff			   Somehow, this port GUID already exists in the table.
445219820Sjeff			 */
446219820Sjeff			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D12: "
447219820Sjeff				"Port 0x%" PRIx64 " already in the database!\n",
448219820Sjeff				cl_ntoh64(p_ni->port_guid));
449219820Sjeff
450219820Sjeff			osm_port_delete(&p_port);
451219820Sjeff			goto Exit;
452219820Sjeff		}
453219820Sjeff
454219820Sjeff		/* If we are a master, then this means the port is new on the subnet.
455219820Sjeff		   Mark it as new - need to send trap 64 on these ports.
456219820Sjeff		   The condition that we are master is true, since if we are in discovering
457219820Sjeff		   state (meaning we woke up from standby or we are just initializing),
458219820Sjeff		   then these ports may be new to us, but are not new on the subnet.
459219820Sjeff		   If we are master, then the subnet as we know it is the updated one,
460219820Sjeff		   and any new ports we encounter should cause trap 64. C14-72.1.1 */
461219820Sjeff		if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER)
462219820Sjeff			p_port->is_new = 1;
463219820Sjeff
464219820Sjeff		p_physp = osm_node_get_physp_ptr(p_node, port_num);
465219820Sjeff	} else {
466219820Sjeff		p_physp = osm_node_get_physp_ptr(p_node, port_num);
467219820Sjeff		/*
468219820Sjeff		   Update the DR Path to the port,
469219820Sjeff		   in case the old one is no longer available.
470219820Sjeff		 */
471219820Sjeff		p_dr_path = osm_physp_get_dr_path_ptr(p_physp);
472219820Sjeff
473219820Sjeff		osm_dr_path_init(p_dr_path, h_bind, p_smp->hop_count,
474219820Sjeff				 p_smp->initial_path);
475219820Sjeff	}
476219820Sjeff
477219820Sjeff	context.pi_context.node_guid = p_ni->node_guid;
478219820Sjeff	context.pi_context.port_guid = p_ni->port_guid;
479219820Sjeff	context.pi_context.set_method = FALSE;
480219820Sjeff	context.pi_context.light_sweep = FALSE;
481219820Sjeff
482219820Sjeff	status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp),
483219820Sjeff			     IB_MAD_ATTR_PORT_INFO,
484219820Sjeff			     cl_hton32(port_num), CL_DISP_MSGID_NONE, &context);
485219820Sjeff
486219820Sjeff	if (status != IB_SUCCESS)
487219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D13: "
488219820Sjeff			"Failure initiating PortInfo request (%s)\n",
489219820Sjeff			ib_get_err_str(status));
490219820Sjeff
491219820SjeffExit:
492219820Sjeff	OSM_LOG_EXIT(sm->p_log);
493219820Sjeff}
494219820Sjeff
495219820Sjeff/**********************************************************************
496219820Sjeff **********************************************************************/
497219820Sjeffstatic void
498219820Sjeff__osm_ni_rcv_process_switch(IN osm_sm_t * sm,
499219820Sjeff			    IN osm_node_t * const p_node,
500219820Sjeff			    IN const osm_madw_t * const p_madw)
501219820Sjeff{
502219820Sjeff	ib_api_status_t status = IB_SUCCESS;
503219820Sjeff	osm_madw_context_t context;
504219820Sjeff	osm_dr_path_t *path;
505219820Sjeff	ib_smp_t *p_smp;
506219820Sjeff
507219820Sjeff	OSM_LOG_ENTER(sm->p_log);
508219820Sjeff
509219820Sjeff	p_smp = osm_madw_get_smp_ptr(p_madw);
510219820Sjeff
511219820Sjeff	/* update DR path of already initialized switch port 0 */
512219820Sjeff	path = osm_physp_get_dr_path_ptr(osm_node_get_physp_ptr(p_node, 0));
513219820Sjeff	osm_dr_path_init(path, osm_madw_get_bind_handle(p_madw),
514219820Sjeff			 p_smp->hop_count, p_smp->initial_path);
515219820Sjeff
516219820Sjeff	context.si_context.node_guid = osm_node_get_node_guid(p_node);
517219820Sjeff	context.si_context.set_method = FALSE;
518219820Sjeff	context.si_context.light_sweep = FALSE;
519219820Sjeff
520219820Sjeff	/* Request a SwitchInfo attribute */
521219820Sjeff	status = osm_req_get(sm, path, IB_MAD_ATTR_SWITCH_INFO,
522219820Sjeff			     0, CL_DISP_MSGID_NONE, &context);
523219820Sjeff	if (status != IB_SUCCESS)
524219820Sjeff		/* continue despite error */
525219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D06: "
526219820Sjeff			"Failure initiating SwitchInfo request (%s)\n",
527219820Sjeff			ib_get_err_str(status));
528219820Sjeff
529219820Sjeff	OSM_LOG_EXIT(sm->p_log);
530219820Sjeff}
531219820Sjeff
532219820Sjeff/**********************************************************************
533219820Sjeff The plock must be held before calling this function.
534219820Sjeff**********************************************************************/
535219820Sjeffstatic void
536219820Sjeff__osm_ni_rcv_process_existing_switch(IN osm_sm_t * sm,
537219820Sjeff				     IN osm_node_t * const p_node,
538219820Sjeff				     IN const osm_madw_t * const p_madw)
539219820Sjeff{
540219820Sjeff	OSM_LOG_ENTER(sm->p_log);
541219820Sjeff
542219820Sjeff	/*
543219820Sjeff	   If this switch has already been probed during this sweep,
544219820Sjeff	   then don't bother reprobing it.
545219820Sjeff	   There is one exception - if the node has been visited, but
546219820Sjeff	   for some reason we don't have the switch object (this can happen
547219820Sjeff	   if the SwitchInfo mad didn't reach the SM) then we want
548219820Sjeff	   to retry to probe the switch.
549219820Sjeff	 */
550219820Sjeff	if (p_node->discovery_count == 1)
551219820Sjeff		__osm_ni_rcv_process_switch(sm, p_node, p_madw);
552219820Sjeff	else if (!p_node->sw || p_node->sw->discovery_count == 0) {
553219820Sjeff		/* we don't have the SwitchInfo - retry to get it */
554219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
555219820Sjeff			"Retry to get SwitchInfo on node GUID:0x%"
556219820Sjeff			PRIx64 "\n", cl_ntoh64(osm_node_get_node_guid(p_node)));
557219820Sjeff		__osm_ni_rcv_process_switch(sm, p_node, p_madw);
558219820Sjeff	}
559219820Sjeff
560219820Sjeff	OSM_LOG_EXIT(sm->p_log);
561219820Sjeff}
562219820Sjeff
563219820Sjeff/**********************************************************************
564219820Sjeff The plock must be held before calling this function.
565219820Sjeff**********************************************************************/
566219820Sjeffstatic void
567219820Sjeff__osm_ni_rcv_process_new_switch(IN osm_sm_t * sm,
568219820Sjeff				IN osm_node_t * const p_node,
569219820Sjeff				IN const osm_madw_t * const p_madw)
570219820Sjeff{
571219820Sjeff	OSM_LOG_ENTER(sm->p_log);
572219820Sjeff
573219820Sjeff	__osm_ni_rcv_process_switch(sm, p_node, p_madw);
574219820Sjeff
575219820Sjeff	/*
576219820Sjeff	   A node guid of 0 is the corner case that indicates
577219820Sjeff	   we discovered our own node.  Initialize the subnet
578219820Sjeff	   object with the SM's own port guid.
579219820Sjeff	 */
580219820Sjeff	if (osm_madw_get_ni_context_ptr(p_madw)->node_guid == 0)
581219820Sjeff		sm->p_subn->sm_port_guid = p_node->node_info.port_guid;
582219820Sjeff
583219820Sjeff	OSM_LOG_EXIT(sm->p_log);
584219820Sjeff}
585219820Sjeff
586219820Sjeff/**********************************************************************
587219820Sjeff The plock must NOT be held before calling this function.
588219820Sjeff**********************************************************************/
589219820Sjeffstatic void
590219820Sjeff__osm_ni_rcv_process_new(IN osm_sm_t * sm,
591219820Sjeff			 IN const osm_madw_t * const p_madw)
592219820Sjeff{
593219820Sjeff	osm_node_t *p_node;
594219820Sjeff	osm_node_t *p_node_check;
595219820Sjeff	osm_port_t *p_port;
596219820Sjeff	osm_port_t *p_port_check;
597219820Sjeff	osm_router_t *p_rtr = NULL;
598219820Sjeff	osm_router_t *p_rtr_check;
599219820Sjeff	cl_qmap_t *p_rtr_guid_tbl;
600219820Sjeff	ib_node_info_t *p_ni;
601219820Sjeff	ib_smp_t *p_smp;
602219820Sjeff	osm_ni_context_t *p_ni_context;
603219820Sjeff	uint8_t port_num;
604219820Sjeff
605219820Sjeff	OSM_LOG_ENTER(sm->p_log);
606219820Sjeff
607219820Sjeff	p_smp = osm_madw_get_smp_ptr(p_madw);
608219820Sjeff	p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
609219820Sjeff	p_ni_context = osm_madw_get_ni_context_ptr(p_madw);
610219820Sjeff	port_num = ib_node_info_get_local_port_num(p_ni);
611219820Sjeff
612219820Sjeff	osm_dump_smp_dr_path(sm->p_log, p_smp, OSM_LOG_VERBOSE);
613219820Sjeff
614219820Sjeff	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
615219820Sjeff		"Discovered new %s node,"
616219820Sjeff		"\n\t\t\t\tGUID 0x%" PRIx64 ", TID 0x%" PRIx64 "\n",
617219820Sjeff		ib_get_node_type_str(p_ni->node_type),
618219820Sjeff		cl_ntoh64(p_ni->node_guid), cl_ntoh64(p_smp->trans_id));
619219820Sjeff
620219820Sjeff	p_node = osm_node_new(p_madw);
621219820Sjeff	if (p_node == NULL) {
622219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D07: "
623219820Sjeff			"Unable to create new node object\n");
624219820Sjeff		goto Exit;
625219820Sjeff	}
626219820Sjeff
627219820Sjeff	/*
628219820Sjeff	   Create a new port object to represent this node's physical
629219820Sjeff	   ports in the port table.
630219820Sjeff	 */
631219820Sjeff	p_port = osm_port_new(p_ni, p_node);
632219820Sjeff	if (p_port == NULL) {
633219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D14: "
634219820Sjeff			"Unable to create new port object\n");
635219820Sjeff		osm_node_delete(&p_node);
636219820Sjeff		goto Exit;
637219820Sjeff	}
638219820Sjeff
639219820Sjeff	/*
640219820Sjeff	   Add the new port object to the database.
641219820Sjeff	 */
642219820Sjeff	p_port_check =
643219820Sjeff	    (osm_port_t *) cl_qmap_insert(&sm->p_subn->port_guid_tbl,
644219820Sjeff					  p_ni->port_guid, &p_port->map_item);
645219820Sjeff	if (p_port_check != p_port) {
646219820Sjeff		/*
647219820Sjeff		   We should never be here!
648219820Sjeff		   Somehow, this port GUID already exists in the table.
649219820Sjeff		 */
650219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D15: "
651219820Sjeff			"Duplicate Port GUID 0x%" PRIx64
652219820Sjeff			"! Found by the two directed routes:\n",
653219820Sjeff			cl_ntoh64(p_ni->port_guid));
654219820Sjeff		osm_dump_dr_path(sm->p_log,
655219820Sjeff				 osm_physp_get_dr_path_ptr(p_port->p_physp),
656219820Sjeff				 OSM_LOG_ERROR);
657219820Sjeff		osm_dump_dr_path(sm->p_log,
658219820Sjeff				 osm_physp_get_dr_path_ptr(p_port_check->
659219820Sjeff							   p_physp),
660219820Sjeff				 OSM_LOG_ERROR);
661219820Sjeff		osm_port_delete(&p_port);
662219820Sjeff		osm_node_delete(&p_node);
663219820Sjeff		goto Exit;
664219820Sjeff	}
665219820Sjeff
666219820Sjeff	/* If we are a master, then this means the port is new on the subnet.
667219820Sjeff	   Mark it as new - need to send trap 64 on these ports.
668219820Sjeff	   The condition that we are master is true, since if we are in discovering
669219820Sjeff	   state (meaning we woke up from standby or we are just initializing),
670219820Sjeff	   then these ports may be new to us, but are not new on the subnet.
671219820Sjeff	   If we are master, then the subnet as we know it is the updated one,
672219820Sjeff	   and any new ports we encounter should cause trap 64. C14-72.1.1 */
673219820Sjeff	if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER)
674219820Sjeff		p_port->is_new = 1;
675219820Sjeff
676219820Sjeff	/* If there were RouterInfo or other router attribute,
677219820Sjeff	   this would be elsewhere */
678219820Sjeff	if (p_ni->node_type == IB_NODE_TYPE_ROUTER) {
679219820Sjeff		if ((p_rtr = osm_router_new(p_port)) == NULL)
680219820Sjeff			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1A: "
681219820Sjeff				"Unable to create new router object\n");
682219820Sjeff		else {
683219820Sjeff			p_rtr_guid_tbl = &sm->p_subn->rtr_guid_tbl;
684219820Sjeff			p_rtr_check =
685219820Sjeff			    (osm_router_t *) cl_qmap_insert(p_rtr_guid_tbl,
686219820Sjeff							    p_ni->port_guid,
687219820Sjeff							    &p_rtr->map_item);
688219820Sjeff			if (p_rtr_check != p_rtr)
689219820Sjeff				OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1B: "
690219820Sjeff					"Unable to add port GUID:0x%016" PRIx64
691219820Sjeff					" to router table\n",
692219820Sjeff					cl_ntoh64(p_ni->port_guid));
693219820Sjeff		}
694219820Sjeff	}
695219820Sjeff
696219820Sjeff	p_node_check =
697219820Sjeff	    (osm_node_t *) cl_qmap_insert(&sm->p_subn->node_guid_tbl,
698219820Sjeff					  p_ni->node_guid, &p_node->map_item);
699219820Sjeff	if (p_node_check != p_node) {
700219820Sjeff		/*
701219820Sjeff		   This node must have been inserted by another thread.
702219820Sjeff		   This is unexpected, but is not an error.
703219820Sjeff		   We can simply clean-up, since the other thread will
704219820Sjeff		   see this processing through to completion.
705219820Sjeff		 */
706219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
707219820Sjeff			"Discovery race detected at node 0x%" PRIx64 "\n",
708219820Sjeff			cl_ntoh64(p_ni->node_guid));
709219820Sjeff		osm_node_delete(&p_node);
710219820Sjeff		p_node = p_node_check;
711219820Sjeff		__osm_ni_rcv_set_links(sm, p_node, port_num, p_ni_context);
712219820Sjeff		goto Exit;
713219820Sjeff	} else
714219820Sjeff		__osm_ni_rcv_set_links(sm, p_node, port_num, p_ni_context);
715219820Sjeff
716219820Sjeff	p_node->discovery_count++;
717219820Sjeff	__osm_ni_rcv_get_node_desc(sm, p_node, p_madw);
718219820Sjeff
719219820Sjeff	switch (p_ni->node_type) {
720219820Sjeff	case IB_NODE_TYPE_CA:
721219820Sjeff	case IB_NODE_TYPE_ROUTER:
722219820Sjeff		__osm_ni_rcv_process_new_ca_or_router(sm, p_node, p_madw);
723219820Sjeff		break;
724219820Sjeff	case IB_NODE_TYPE_SWITCH:
725219820Sjeff		__osm_ni_rcv_process_new_switch(sm, p_node, p_madw);
726219820Sjeff		break;
727219820Sjeff	default:
728219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: "
729219820Sjeff			"Unknown node type %u with GUID 0x%" PRIx64 "\n",
730219820Sjeff			p_ni->node_type, cl_ntoh64(p_ni->node_guid));
731219820Sjeff		break;
732219820Sjeff	}
733219820Sjeff
734219820SjeffExit:
735219820Sjeff	OSM_LOG_EXIT(sm->p_log);
736219820Sjeff}
737219820Sjeff
738219820Sjeff/**********************************************************************
739219820Sjeff The plock must be held before calling this function.
740219820Sjeff**********************************************************************/
741219820Sjeffstatic void
742219820Sjeff__osm_ni_rcv_process_existing(IN osm_sm_t * sm,
743219820Sjeff			      IN osm_node_t * const p_node,
744219820Sjeff			      IN const osm_madw_t * const p_madw)
745219820Sjeff{
746219820Sjeff	ib_node_info_t *p_ni;
747219820Sjeff	ib_smp_t *p_smp;
748219820Sjeff	osm_ni_context_t *p_ni_context;
749219820Sjeff	uint8_t port_num;
750219820Sjeff
751219820Sjeff	OSM_LOG_ENTER(sm->p_log);
752219820Sjeff
753219820Sjeff	p_smp = osm_madw_get_smp_ptr(p_madw);
754219820Sjeff	p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
755219820Sjeff	p_ni_context = osm_madw_get_ni_context_ptr(p_madw);
756219820Sjeff	port_num = ib_node_info_get_local_port_num(p_ni);
757219820Sjeff
758219820Sjeff	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
759219820Sjeff		"Rediscovered %s node 0x%" PRIx64 " TID 0x%" PRIx64
760219820Sjeff		", discovered %u times already\n",
761219820Sjeff		ib_get_node_type_str(p_ni->node_type),
762219820Sjeff		cl_ntoh64(p_ni->node_guid),
763219820Sjeff		cl_ntoh64(p_smp->trans_id), p_node->discovery_count);
764219820Sjeff
765219820Sjeff	/*
766219820Sjeff	   If we haven't already encountered this existing node
767219820Sjeff	   on this particular sweep, then process further.
768219820Sjeff	 */
769219820Sjeff	p_node->discovery_count++;
770219820Sjeff
771219820Sjeff	switch (p_ni->node_type) {
772219820Sjeff	case IB_NODE_TYPE_CA:
773219820Sjeff	case IB_NODE_TYPE_ROUTER:
774219820Sjeff		__osm_ni_rcv_process_existing_ca_or_router(sm, p_node,
775219820Sjeff							   p_madw);
776219820Sjeff		break;
777219820Sjeff
778219820Sjeff	case IB_NODE_TYPE_SWITCH:
779219820Sjeff		__osm_ni_rcv_process_existing_switch(sm, p_node, p_madw);
780219820Sjeff		break;
781219820Sjeff
782219820Sjeff	default:
783219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D09: "
784219820Sjeff			"Unknown node type %u with GUID 0x%" PRIx64 "\n",
785219820Sjeff			p_ni->node_type, cl_ntoh64(p_ni->node_guid));
786219820Sjeff		break;
787219820Sjeff	}
788219820Sjeff
789219820Sjeff	__osm_ni_rcv_set_links(sm, p_node, port_num, p_ni_context);
790219820Sjeff
791219820Sjeff	OSM_LOG_EXIT(sm->p_log);
792219820Sjeff}
793219820Sjeff
794219820Sjeff/**********************************************************************
795219820Sjeff **********************************************************************/
796219820Sjeffvoid osm_ni_rcv_process(IN void *context, IN void *data)
797219820Sjeff{
798219820Sjeff	osm_sm_t *sm = context;
799219820Sjeff	osm_madw_t *p_madw = data;
800219820Sjeff	ib_node_info_t *p_ni;
801219820Sjeff	ib_smp_t *p_smp;
802219820Sjeff	osm_node_t *p_node;
803219820Sjeff
804219820Sjeff	CL_ASSERT(sm);
805219820Sjeff
806219820Sjeff	OSM_LOG_ENTER(sm->p_log);
807219820Sjeff
808219820Sjeff	CL_ASSERT(p_madw);
809219820Sjeff
810219820Sjeff	p_smp = osm_madw_get_smp_ptr(p_madw);
811219820Sjeff	p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp);
812219820Sjeff
813219820Sjeff	CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_NODE_INFO);
814219820Sjeff
815219820Sjeff	if (p_ni->node_guid == 0) {
816219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: "
817219820Sjeff			"Got Zero Node GUID! Found on the directed route:\n");
818219820Sjeff		osm_dump_smp_dr_path(sm->p_log, p_smp, OSM_LOG_ERROR);
819219820Sjeff		goto Exit;
820219820Sjeff	}
821219820Sjeff
822219820Sjeff	if (p_ni->port_guid == 0) {
823219820Sjeff		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D17: "
824219820Sjeff			"Got Zero Port GUID! Found on the directed route:\n");
825219820Sjeff		osm_dump_smp_dr_path(sm->p_log, p_smp, OSM_LOG_ERROR);
826219820Sjeff		goto Exit;
827219820Sjeff	}
828219820Sjeff
829219820Sjeff	/*
830219820Sjeff	   Determine if this node has already been discovered,
831219820Sjeff	   and process accordingly.
832219820Sjeff	   During processing of this node, hold the shared lock.
833219820Sjeff	 */
834219820Sjeff
835219820Sjeff	CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
836219820Sjeff	p_node = osm_get_node_by_guid(sm->p_subn, p_ni->node_guid);
837219820Sjeff
838219820Sjeff	osm_dump_node_info(sm->p_log, p_ni, OSM_LOG_DEBUG);
839219820Sjeff
840219820Sjeff	if (!p_node)
841219820Sjeff		__osm_ni_rcv_process_new(sm, p_madw);
842219820Sjeff	else
843219820Sjeff		__osm_ni_rcv_process_existing(sm, p_node, p_madw);
844219820Sjeff
845219820Sjeff	CL_PLOCK_RELEASE(sm->p_lock);
846219820Sjeff
847219820SjeffExit:
848219820Sjeff	OSM_LOG_EXIT(sm->p_log);
849219820Sjeff}
850