1/*
2 * Copyright (c) 2004-2009 Voltaire Inc.  All rights reserved.
3 * Copyright (c) 2007 Xsigo Systems Inc.  All rights reserved.
4 * Copyright (c) 2008 Lawrence Livermore National Lab.  All rights reserved.
5 * Copyright (c) 2009 HNR Consulting.  All rights reserved.
6 * Copyright (c) 2010,2011 Mellanox Technologies LTD.  All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses.  You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 *     Redistribution and use in source and binary forms, with or
15 *     without modification, are permitted provided that the following
16 *     conditions are met:
17 *
18 *      - Redistributions of source code must retain the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer.
21 *
22 *      - Redistributions in binary form must reproduce the above
23 *        copyright notice, this list of conditions and the following
24 *        disclaimer in the documentation and/or other materials
25 *        provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 *
36 */
37
38#if HAVE_CONFIG_H
39#  include <config.h>
40#endif				/* HAVE_CONFIG_H */
41
42#define _GNU_SOURCE
43#include <stdio.h>
44#include <stdlib.h>
45#include <unistd.h>
46#include <stdarg.h>
47#include <time.h>
48#include <string.h>
49#include <getopt.h>
50#include <errno.h>
51#include <inttypes.h>
52
53#include <complib/cl_nodenamemap.h>
54#include <infiniband/ibnetdisc.h>
55#include <infiniband/mad.h>
56
57#include "ibdiag_common.h"
58#include "ibdiag_sa.h"
59
60struct ibmad_port *ibmad_port;
61static char *node_name_map_file = NULL;
62static nn_map_t *node_name_map = NULL;
63static char *load_cache_file = NULL;
64static uint16_t lid2sl_table[sizeof(uint8_t) * 1024 * 48] = { 0 };
65static int obtain_sl = 1;
66
67int data_counters = 0;
68int data_counters_only = 0;
69int port_config = 0;
70uint64_t port_guid = 0;
71char *port_guid_str = NULL;
72#define SUP_MAX 64
73int sup_total = 0;
74enum MAD_FIELDS suppressed_fields[SUP_MAX];
75char *dr_path = NULL;
76uint8_t node_type_to_print = 0;
77unsigned clear_errors = 0, clear_counts = 0, details = 0;
78
79#define PRINT_SWITCH 0x1
80#define PRINT_CA     0x2
81#define PRINT_ROUTER 0x4
82#define PRINT_ALL 0xFF		/* all nodes default flag */
83
84#define DEFAULT_HALF_WORLD_PR_TIMEOUT (3000)
85
86struct {
87	int nodes_checked;
88	int bad_nodes;
89	int ports_checked;
90	int bad_ports;
91	int pma_query_failures;
92} summary = { 0 };
93
94#define DEF_THRES_FILE IBDIAG_CONFIG_PATH"/error_thresholds"
95static char *threshold_file = DEF_THRES_FILE;
96
97/* define a "packet" with threshold values in it */
98uint8_t thresholds[1204] = { 0 };
99char * threshold_str = "";
100
101static unsigned valid_gid(ib_gid_t * gid)
102{
103	ib_gid_t zero_gid;
104	memset(&zero_gid, 0, sizeof zero_gid);
105	return memcmp(&zero_gid, gid, sizeof(*gid));
106}
107
108static void set_thres(char *name, uint32_t val)
109{
110	int f;
111	int n;
112	char tmp[256];
113	for (f = IB_PC_FIRST_F; f <= IB_PC_LAST_F; f++) {
114		if (strcmp(name, mad_field_name(f)) == 0) {
115			mad_encode_field(thresholds, f, &val);
116			snprintf(tmp, 255, "[%s = %u]", name, val);
117			threshold_str = realloc(threshold_str,
118					strlen(threshold_str)+strlen(tmp)+1);
119			if (!threshold_str) {
120				fprintf(stderr, "Failed to allocate memory: "
121					"%s\n", strerror(errno));
122				exit(1);
123			}
124			n = strlen(threshold_str);
125			strcpy(threshold_str+n, tmp);
126		}
127	}
128}
129
130static void set_thresholds(char *threshold_file)
131{
132	char buf[1024];
133	char orig_buf[1024];
134	int val = 0;
135	FILE *thresf = fopen(threshold_file, "r");
136	char *p_prefix, *p_last;
137	char *name;
138	char *val_str;
139	char str[64];
140
141	if (!thresf)
142		return;
143
144	snprintf(str, 63, "Thresholds: ");
145	threshold_str = malloc(strlen(str)+1);
146	if (!threshold_str) {
147		fprintf(stderr, "Failed to allocate memory: %s\n",
148			strerror(errno));
149		exit(1);
150	}
151	strcpy(threshold_str, str);
152	while (fgets(buf, sizeof buf, thresf) != NULL) {
153		p_prefix = strtok_r(buf, "\n", &p_last);
154		if (!p_prefix)
155			continue; /* ignore blank lines */
156
157		if (*p_prefix == '#')
158			continue; /* ignore comment lines */
159
160		strlcpy(orig_buf, buf, sizeof(orig_buf));
161		name = strtok_r(p_prefix, "=", &p_last);
162		val_str = strtok_r(NULL, "\n", &p_last);
163		if (!name || !val_str) {
164			fprintf(stderr, "malformed line in \"%s\":\n%s\n",
165			    threshold_file, orig_buf);
166			continue;
167		}
168
169		val = strtoul(val_str, NULL, 0);
170		set_thres(name, val);
171	}
172
173	fclose(thresf);
174}
175
176static int exceeds_threshold(int field, unsigned val)
177{
178	uint32_t thres = 0;
179	mad_decode_field(thresholds, field, &thres);
180	return (val > thres);
181}
182
183static void print_port_config(ibnd_node_t * node, int portnum)
184{
185	char width[64], speed[64], state[64], physstate[64];
186	char remote_str[256];
187	char link_str[256];
188	char width_msg[256];
189	char speed_msg[256];
190	char ext_port_str[256];
191	int iwidth, ispeed, fdr10, espeed, istate, iphystate, cap_mask;
192	uint8_t *info;
193
194	ibnd_port_t *port = node->ports[portnum];
195
196	if (!port)
197		return;
198
199	iwidth = mad_get_field(port->info, 0, IB_PORT_LINK_WIDTH_ACTIVE_F);
200	ispeed = mad_get_field(port->info, 0, IB_PORT_LINK_SPEED_ACTIVE_F);
201	fdr10 = mad_get_field(port->ext_info, 0,
202			      IB_MLNX_EXT_PORT_LINK_SPEED_ACTIVE_F) & FDR10;
203
204	if (port->node->type == IB_NODE_SWITCH)
205		info = (uint8_t *)&port->node->ports[0]->info;
206	else
207		info = (uint8_t *)&port->info;
208	cap_mask = mad_get_field(info, 0, IB_PORT_CAPMASK_F);
209	if (cap_mask & CL_NTOH32(IB_PORT_CAP_HAS_EXT_SPEEDS))
210		espeed = mad_get_field(port->info, 0,
211				       IB_PORT_LINK_SPEED_EXT_ACTIVE_F);
212	else
213		espeed = 0;
214	istate = mad_get_field(port->info, 0, IB_PORT_STATE_F);
215	iphystate = mad_get_field(port->info, 0, IB_PORT_PHYS_STATE_F);
216
217	remote_str[0] = '\0';
218	link_str[0] = '\0';
219	width_msg[0] = '\0';
220	speed_msg[0] = '\0';
221
222	/* C14-24.2.1 states that a down port allows for invalid data to be
223	 * returned for all PortInfo components except PortState and
224	 * PortPhysicalState */
225	if (istate != IB_LINK_DOWN) {
226		if (!espeed) {
227			if (fdr10)
228				sprintf(speed, "10.0 Gbps (FDR10)");
229			else
230				mad_dump_val(IB_PORT_LINK_SPEED_ACTIVE_F, speed,
231					     64, &ispeed);
232		} else
233			mad_dump_val(IB_PORT_LINK_SPEED_EXT_ACTIVE_F, speed,
234			     64, &espeed);
235
236		snprintf(link_str, 256, "(%3s %18s %6s/%8s)",
237			 mad_dump_val(IB_PORT_LINK_WIDTH_ACTIVE_F, width, 64, &iwidth),
238			 speed,
239			 mad_dump_val(IB_PORT_STATE_F, state, 64, &istate),
240			 mad_dump_val(IB_PORT_PHYS_STATE_F, physstate, 64, &iphystate));
241	} else {
242		snprintf(link_str, 256, "(              %6s/%8s)",
243			 mad_dump_val(IB_PORT_STATE_F, state, 64, &istate),
244			 mad_dump_val(IB_PORT_PHYS_STATE_F, physstate, 64, &iphystate));
245	}
246
247	if (port->remoteport) {
248		char *rem_node_name = NULL;
249
250		if (port->remoteport->ext_portnum)
251			snprintf(ext_port_str, 256, "%d",
252				 port->remoteport->ext_portnum);
253		else
254			ext_port_str[0] = '\0';
255
256		get_max_msg(width_msg, speed_msg, 256, port);
257
258		rem_node_name = remap_node_name(node_name_map,
259						port->remoteport->node->guid,
260						port->remoteport->node->
261						nodedesc);
262
263		snprintf(remote_str, 256,
264			 "0x%016" PRIx64 " %6d %4d[%2s] \"%s\" (%s %s)\n",
265			 port->remoteport->guid,
266			 port->remoteport->base_lid ? port->remoteport->
267			 base_lid : port->remoteport->node->smalid,
268			 port->remoteport->portnum, ext_port_str, rem_node_name,
269			 width_msg, speed_msg);
270
271		free(rem_node_name);
272	} else
273		snprintf(remote_str, 256, "           [  ] \"\" ( )\n");
274
275	if (port->ext_portnum)
276		snprintf(ext_port_str, 256, "%d", port->ext_portnum);
277	else
278		ext_port_str[0] = '\0';
279
280	if (node->type == IB_NODE_SWITCH)
281		printf("       Link info: %6d", node->smalid);
282	else
283		printf("       Link info: %6d", port->base_lid);
284
285	printf("%4d[%2s] ==%s==>  %s",
286	       port->portnum, ext_port_str, link_str, remote_str);
287}
288
289static int suppress(enum MAD_FIELDS field)
290{
291	int i = 0;
292	for (i = 0; i < sup_total; i++)
293		if (field == suppressed_fields[i])
294			return 1;
295	return 0;
296}
297
298static void report_suppressed(void)
299{
300	int i = 0;
301	printf("## Suppressed:");
302	for (i = 0; i < sup_total; i++)
303		printf(" %s", mad_field_name(suppressed_fields[i]));
304	printf("\n");
305}
306
307static int print_summary(void)
308{
309	printf("\n## Summary: %d nodes checked, %d bad nodes found\n",
310		summary.nodes_checked, summary.bad_nodes);
311	printf("##          %d ports checked, %d ports have errors beyond threshold\n",
312		summary.ports_checked, summary.bad_ports);
313	printf("## %s\n", threshold_str);
314	if (summary.pma_query_failures)
315		printf("##          %d PMA query failures\n", summary.pma_query_failures);
316	report_suppressed();
317	return (summary.bad_ports);
318}
319
320static void insert_lid2sl_table(struct sa_query_result *r)
321{
322    unsigned int i;
323    for (i = 0; i < r->result_cnt; i++) {
324	    ib_path_rec_t *p_pr = (ib_path_rec_t *)sa_get_query_rec(r->p_result_madw, i);
325	    lid2sl_table[cl_ntoh16(p_pr->dlid)] = ib_path_rec_sl(p_pr);
326    }
327}
328
329static int path_record_query(ib_gid_t sgid,uint64_t dguid)
330{
331     ib_path_rec_t pr;
332     ib_net64_t comp_mask = 0;
333     uint8_t reversible = 0;
334     struct sa_handle * h;
335
336     if (!(h = sa_get_handle()))
337	return -1;
338
339     ibd_timeout = DEFAULT_HALF_WORLD_PR_TIMEOUT;
340     memset(&pr, 0, sizeof(pr));
341
342     CHECK_AND_SET_GID(sgid, pr.sgid, PR, SGID);
343     if(dguid) {
344	     mad_encode_field(sgid.raw, IB_GID_GUID_F, &dguid);
345	     CHECK_AND_SET_GID(sgid, pr.dgid, PR, DGID);
346     }
347
348     CHECK_AND_SET_VAL(1, 8, -1, pr.num_path, PR, NUMBPATH);/*to get only one PathRecord for each source and destination pair*/
349     CHECK_AND_SET_VAL(1, 8, -1, reversible, PR, REVERSIBLE);/*for a reversible path*/
350     pr.num_path |= reversible << 7;
351     struct sa_query_result result;
352     int ret = sa_query(h, IB_MAD_METHOD_GET_TABLE,
353                        (uint16_t)IB_SA_ATTR_PATHRECORD,0,cl_ntoh64(comp_mask),ibd_sakey,
354                        &pr, sizeof(pr), &result);
355     if (ret) {
356             sa_free_handle(h);
357             fprintf(stderr, "Query SA failed: %s; sa call path_query failed\n", strerror(ret));
358             return ret;
359     }
360     if (result.status != IB_SA_MAD_STATUS_SUCCESS) {
361             sa_report_err(result.status);
362             ret = EIO;
363             goto Exit;
364     }
365
366     insert_lid2sl_table(&result);
367Exit:
368     sa_free_handle(h);
369     sa_free_result_mad(&result);
370     return ret;
371}
372
373static int query_and_dump(char *buf, size_t size, ib_portid_t * portid,
374			  char *node_name, int portnum,
375			  const char *attr_name, uint16_t attr_id,
376			  int start_field, int end_field)
377{
378	uint8_t pc[1024];
379	uint32_t val = 0;
380	int i, n;
381
382	memset(pc, 0, sizeof(pc));
383
384	if (!pma_query_via(pc, portid, portnum, ibd_timeout, attr_id,
385			   ibmad_port)) {
386		IBWARN("%s query failed on %s, %s port %d", attr_name,
387		       node_name, portid2str(portid), portnum);
388		summary.pma_query_failures++;
389		return 0;
390	}
391
392	for (n = 0, i = start_field; i < end_field; i++) {
393		mad_decode_field(pc, i, (void *)&val);
394		if (val)
395			n += snprintf(buf + n, size - n, " [%s == %u]",
396				      mad_field_name(i), val);
397	}
398
399	return n;
400}
401
402
403static int print_results(ib_portid_t * portid, char *node_name,
404			 ibnd_node_t * node, uint8_t * pc, int portnum,
405			 int *header_printed, uint8_t *pce, uint16_t cap_mask)
406{
407	char buf[1024];
408	char *str = buf;
409	uint32_t val = 0;
410	int i, n;
411
412	for (n = 0, i = IB_PC_ERR_SYM_F; i <= IB_PC_VL15_DROPPED_F; i++) {
413		if (suppress(i))
414			continue;
415
416		/* this is not a counter, skip it */
417		if (i == IB_PC_COUNTER_SELECT2_F)
418			continue;
419
420		mad_decode_field(pc, i, (void *)&val);
421		if (exceeds_threshold(i, val)) {
422			n += snprintf(str + n, 1024 - n, " [%s == %u]",
423				      mad_field_name(i), val);
424
425			/* If there are PortXmitDiscards, get details (if supported) */
426			if (i == IB_PC_XMT_DISCARDS_F && details) {
427				n += query_and_dump(str + n, sizeof(buf) - n, portid,
428						    node_name, portnum,
429						    "PortXmitDiscardDetails",
430						    IB_GSI_PORT_XMIT_DISCARD_DETAILS,
431						    IB_PC_RCV_LOCAL_PHY_ERR_F,
432						    IB_PC_RCV_ERR_LAST_F);
433				/* If there are PortRcvErrors, get details (if supported) */
434			} else if (i == IB_PC_ERR_RCV_F && details) {
435				n += query_and_dump(str + n, sizeof(buf) - n, portid,
436						    node_name, portnum,
437						    "PortRcvErrorDetails",
438						    IB_GSI_PORT_RCV_ERROR_DETAILS,
439						    IB_PC_XMT_INACT_DISC_F,
440						    IB_PC_XMT_DISC_LAST_F);
441			}
442		}
443	}
444
445	if (!suppress(IB_PC_XMT_WAIT_F)) {
446		mad_decode_field(pc, IB_PC_XMT_WAIT_F, (void *)&val);
447		if (exceeds_threshold(IB_PC_XMT_WAIT_F, val))
448			n += snprintf(str + n, 1024 - n, " [%s == %u]",
449				      mad_field_name(IB_PC_XMT_WAIT_F), val);
450	}
451
452	/* if we found errors. */
453	if (n != 0) {
454		if (data_counters) {
455			uint8_t *pkt = pc;
456			int start_field = IB_PC_XMT_BYTES_F;
457			int end_field = IB_PC_RCV_PKTS_F;
458
459			if (pce) {
460				pkt = pce;
461				start_field = IB_PC_EXT_XMT_BYTES_F;
462				if (cap_mask & IB_PM_EXT_WIDTH_SUPPORTED)
463					end_field = IB_PC_EXT_RCV_MPKTS_F;
464				else
465					end_field = IB_PC_EXT_RCV_PKTS_F;
466			}
467
468			for (i = start_field; i <= end_field; i++) {
469				uint64_t val64 = 0;
470				float val = 0;
471				char *unit = "";
472				mad_decode_field(pkt, i, (void *)&val64);
473				if (val64) {
474					int data = 0;
475					if (i == IB_PC_EXT_XMT_BYTES_F ||
476					    i == IB_PC_EXT_RCV_BYTES_F ||
477					    i == IB_PC_XMT_BYTES_F ||
478					    i == IB_PC_RCV_BYTES_F)
479						data = 1;
480					unit = conv_cnt_human_readable(val64,
481								&val, data);
482					n += snprintf(str + n, 1024 - n,
483						" [%s == %" PRIu64
484						" (%5.3f%s)]",
485						mad_field_name(i), val64, val,
486						unit);
487				}
488			}
489		}
490
491		if (!*header_printed) {
492			if (node->type == IB_NODE_SWITCH)
493				printf("Errors for 0x%" PRIx64 " \"%s\"\n",
494					node->ports[0]->guid, node_name);
495			else
496				printf("Errors for \"%s\"\n", node_name);
497			*header_printed = 1;
498			summary.bad_nodes++;
499		}
500
501		if (portnum == 0xFF) {
502			if (node->type == IB_NODE_SWITCH)
503				printf("   GUID 0x%" PRIx64 " port ALL:%s\n",
504				       node->ports[0]->guid, str);
505		} else {
506			printf("   GUID 0x%" PRIx64 " port %d:%s\n",
507			       node->ports[portnum]->guid, portnum, str);
508			if (port_config)
509				print_port_config(node, portnum);
510			summary.bad_ports++;
511		}
512	}
513	return (n);
514}
515
516static int query_cap_mask(ib_portid_t * portid, char *node_name, int portnum,
517			  uint16_t * cap_mask)
518{
519	uint8_t pc[1024] = { 0 };
520	uint16_t rc_cap_mask;
521
522	portid->sl = lid2sl_table[portid->lid];
523
524	/* PerfMgt ClassPortInfo is a required attribute */
525	if (!pma_query_via(pc, portid, portnum, ibd_timeout, CLASS_PORT_INFO,
526			   ibmad_port)) {
527		IBWARN("classportinfo query failed on %s, %s port %d",
528		       node_name, portid2str(portid), portnum);
529		summary.pma_query_failures++;
530		return -1;
531	}
532
533	/* ClassPortInfo should be supported as part of libibmad */
534	memcpy(&rc_cap_mask, pc + 2, sizeof(rc_cap_mask));	/* CapabilityMask */
535
536	*cap_mask = rc_cap_mask;
537	return 0;
538}
539
540static int print_data_cnts(ib_portid_t * portid, uint16_t cap_mask,
541			   char *node_name, ibnd_node_t * node, int portnum,
542			   int *header_printed)
543{
544	uint8_t pc[1024];
545	int i;
546	int start_field = IB_PC_XMT_BYTES_F;
547	int end_field = IB_PC_RCV_PKTS_F;
548
549	memset(pc, 0, 1024);
550
551	portid->sl = lid2sl_table[portid->lid];
552
553	if (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP)) {
554		if (!pma_query_via(pc, portid, portnum, ibd_timeout,
555				   IB_GSI_PORT_COUNTERS_EXT, ibmad_port)) {
556			IBWARN("IB_GSI_PORT_COUNTERS_EXT query failed on %s, %s port %d",
557			       node_name, portid2str(portid), portnum);
558			summary.pma_query_failures++;
559			return (1);
560		}
561		start_field = IB_PC_EXT_XMT_BYTES_F;
562		if (cap_mask & IB_PM_EXT_WIDTH_SUPPORTED)
563			end_field = IB_PC_EXT_RCV_MPKTS_F;
564		else
565			end_field = IB_PC_EXT_RCV_PKTS_F;
566	} else {
567		if (!pma_query_via(pc, portid, portnum, ibd_timeout,
568				   IB_GSI_PORT_COUNTERS, ibmad_port)) {
569			IBWARN("IB_GSI_PORT_COUNTERS query failed on %s, %s port %d",
570			       node_name, portid2str(portid), portnum);
571			summary.pma_query_failures++;
572			return (1);
573		}
574		start_field = IB_PC_XMT_BYTES_F;
575		end_field = IB_PC_RCV_PKTS_F;
576	}
577
578	if (!*header_printed) {
579		printf("Data Counters for 0x%" PRIx64 " \"%s\"\n", node->guid,
580		       node_name);
581		*header_printed = 1;
582	}
583
584	if (portnum == 0xFF)
585		printf("   GUID 0x%" PRIx64 " port ALL:", node->guid);
586	else
587		printf("   GUID 0x%" PRIx64 " port %d:",
588		       node->guid, portnum);
589
590	for (i = start_field; i <= end_field; i++) {
591		uint64_t val64 = 0;
592		float val = 0;
593		char *unit = "";
594		int data = 0;
595		mad_decode_field(pc, i, (void *)&val64);
596		if (i == IB_PC_EXT_XMT_BYTES_F || i == IB_PC_EXT_RCV_BYTES_F ||
597		    i == IB_PC_XMT_BYTES_F || i == IB_PC_RCV_BYTES_F)
598			data = 1;
599		unit = conv_cnt_human_readable(val64, &val, data);
600		printf(" [%s == %" PRIu64 " (%5.3f%s)]", mad_field_name(i),
601			val64, val, unit);
602	}
603	printf("\n");
604
605	if (portnum != 0xFF && port_config)
606		print_port_config(node, portnum);
607
608	return (0);
609}
610
611static int print_errors(ib_portid_t * portid, uint16_t cap_mask,
612			char *node_name, ibnd_node_t * node, int portnum,
613			int *header_printed)
614{
615	uint8_t pc[1024];
616	uint8_t pce[1024];
617	uint8_t *pc_ext = NULL;
618
619	memset(pc, 0, 1024);
620	memset(pce, 0, 1024);
621
622	portid->sl = lid2sl_table[portid->lid];
623
624	if (!pma_query_via(pc, portid, portnum, ibd_timeout,
625			   IB_GSI_PORT_COUNTERS, ibmad_port)) {
626		IBWARN("IB_GSI_PORT_COUNTERS query failed on %s, %s port %d",
627		       node_name, portid2str(portid), portnum);
628		summary.pma_query_failures++;
629		return (0);
630	}
631
632	if (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP)) {
633		if (!pma_query_via(pce, portid, portnum, ibd_timeout,
634		    IB_GSI_PORT_COUNTERS_EXT, ibmad_port)) {
635			IBWARN("IB_GSI_PORT_COUNTERS_EXT query failed on %s, %s port %d",
636			       node_name, portid2str(portid), portnum);
637			summary.pma_query_failures++;
638			return (0);
639		}
640		pc_ext = pce;
641	}
642
643	if (!(cap_mask & IB_PM_PC_XMIT_WAIT_SUP)) {
644		/* if PortCounters:PortXmitWait not supported clear this counter */
645		uint32_t foo = 0;
646		mad_encode_field(pc, IB_PC_XMT_WAIT_F, &foo);
647	}
648	return (print_results(portid, node_name, node, pc, portnum,
649			      header_printed, pc_ext, cap_mask));
650}
651
652uint8_t *reset_pc_ext(void *rcvbuf, ib_portid_t * dest,
653		      int port, unsigned mask, unsigned timeout,
654		      const struct ibmad_port * srcport)
655{
656	ib_rpc_t rpc = { 0 };
657	int lid = dest->lid;
658
659	DEBUG("lid %u port %d mask 0x%x", lid, port, mask);
660
661	if (lid == -1) {
662		IBWARN("only lid routed is supported");
663		return NULL;
664	}
665
666	if (!mask)
667		mask = ~0;
668
669	rpc.mgtclass = IB_PERFORMANCE_CLASS;
670	rpc.method = IB_MAD_METHOD_SET;
671	rpc.attr.id = IB_GSI_PORT_COUNTERS_EXT;
672
673	memset(rcvbuf, 0, IB_MAD_SIZE);
674
675	/* Same for attribute IDs */
676	mad_set_field(rcvbuf, 0, IB_PC_EXT_PORT_SELECT_F, port);
677	mad_set_field(rcvbuf, 0, IB_PC_EXT_COUNTER_SELECT_F, mask);
678	rpc.attr.mod = 0;
679	rpc.timeout = timeout;
680	rpc.datasz = IB_PC_DATA_SZ;
681	rpc.dataoffs = IB_PC_DATA_OFFS;
682	if (!dest->qp)
683		dest->qp = 1;
684	if (!dest->qkey)
685		dest->qkey = IB_DEFAULT_QP1_QKEY;
686
687	return mad_rpc(srcport, &rpc, dest, rcvbuf, rcvbuf);
688}
689
690static void clear_port(ib_portid_t * portid, uint16_t cap_mask,
691		       char *node_name, int port)
692{
693	uint8_t pc[1024] = { 0 };
694	/* bits defined in Table 228 PortCounters CounterSelect and
695	 * CounterSelect2
696	 */
697	uint32_t mask = 0;
698
699	if (clear_errors) {
700		mask |= 0xFFF;
701		if (cap_mask & IB_PM_PC_XMIT_WAIT_SUP)
702			mask |= 0x10000;
703	}
704	if (clear_counts)
705		mask |= 0xF000;
706
707	if (mask)
708		if (!performance_reset_via(pc, portid, port, mask, ibd_timeout,
709					   IB_GSI_PORT_COUNTERS, ibmad_port))
710			fprintf(stderr, "Failed to reset errors %s port %d\n", node_name,
711				port);
712
713	if (clear_errors && details) {
714		memset(pc, 0, 1024);
715		performance_reset_via(pc, portid, port, 0xf, ibd_timeout,
716				      IB_GSI_PORT_XMIT_DISCARD_DETAILS,
717				      ibmad_port);
718		memset(pc, 0, 1024);
719		performance_reset_via(pc, portid, port, 0x3f, ibd_timeout,
720				      IB_GSI_PORT_RCV_ERROR_DETAILS,
721				      ibmad_port);
722	}
723
724	if (clear_counts &&
725	    (cap_mask &
726	     (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP))) {
727		if (cap_mask & IB_PM_EXT_WIDTH_SUPPORTED)
728			mask = 0xFF;
729		else
730			mask = 0x0F;
731
732		if (!reset_pc_ext(pc, portid, port, mask, ibd_timeout,
733		    ibmad_port))
734			fprintf(stderr, "Failed to reset extended data counters %s, "
735				"%s port %d\n", node_name, portid2str(portid),
736				port);
737	}
738}
739
740void print_node(ibnd_node_t * node, void *user_data)
741{
742	int header_printed = 0;
743	int p = 0;
744	int startport = 1;
745	int type = 0;
746	int all_port_sup = 0;
747	ib_portid_t portid = { 0 };
748	uint16_t cap_mask = 0;
749	char *node_name = NULL;
750
751	switch (node->type) {
752	case IB_NODE_SWITCH:
753		type = PRINT_SWITCH;
754		break;
755	case IB_NODE_CA:
756		type = PRINT_CA;
757		break;
758	case IB_NODE_ROUTER:
759		type = PRINT_ROUTER;
760		break;
761	}
762
763	if ((type & node_type_to_print) == 0)
764		return;
765
766	if (node->type == IB_NODE_SWITCH && node->smaenhsp0)
767		startport = 0;
768
769	node_name = remap_node_name(node_name_map, node->guid, node->nodedesc);
770
771	if (node->type == IB_NODE_SWITCH) {
772		ib_portid_set(&portid, node->smalid, 0, 0);
773		p = 0;
774	} else {
775		for (p = 1; p <= node->numports; p++) {
776			if (node->ports[p]) {
777				ib_portid_set(&portid,
778					      node->ports[p]->base_lid,
779					      0, 0);
780				break;
781			}
782		}
783	}
784
785	if ((query_cap_mask(&portid, node_name, p, &cap_mask) == 0) &&
786	    (cap_mask & IB_PM_ALL_PORT_SELECT))
787		all_port_sup = 1;
788
789	if (data_counters_only) {
790		for (p = startport; p <= node->numports; p++) {
791			if (node->ports[p]) {
792				if (node->type == IB_NODE_SWITCH)
793					ib_portid_set(&portid, node->smalid, 0, 0);
794				else
795					ib_portid_set(&portid, node->ports[p]->base_lid,
796						      0, 0);
797
798				print_data_cnts(&portid, cap_mask, node_name, node, p,
799						&header_printed);
800				summary.ports_checked++;
801				if (!all_port_sup)
802					clear_port(&portid, cap_mask, node_name, p);
803			}
804		}
805	} else {
806		if (all_port_sup)
807			if (!print_errors(&portid, cap_mask, node_name, node,
808					  0xFF, &header_printed)) {
809				summary.ports_checked += node->numports;
810				goto clear;
811			}
812
813		for (p = startport; p <= node->numports; p++) {
814			if (node->ports[p]) {
815				if (node->type == IB_NODE_SWITCH)
816					ib_portid_set(&portid, node->smalid, 0, 0);
817				else
818					ib_portid_set(&portid, node->ports[p]->base_lid,
819						      0, 0);
820
821				print_errors(&portid, cap_mask, node_name, node, p,
822					     &header_printed);
823				summary.ports_checked++;
824				if (!all_port_sup)
825					clear_port(&portid, cap_mask, node_name, p);
826			}
827		}
828	}
829
830clear:
831	summary.nodes_checked++;
832	if (all_port_sup)
833		clear_port(&portid, cap_mask, node_name, 0xFF);
834
835	free(node_name);
836}
837
838static void add_suppressed(enum MAD_FIELDS field)
839{
840	if (sup_total >= SUP_MAX) {
841		IBWARN("Maximum (%d) fields have been suppressed; skipping %s",
842		       sup_total, mad_field_name(field));
843		return;
844	}
845	suppressed_fields[sup_total++] = field;
846}
847
848static void calculate_suppressed_fields(char *str)
849{
850	enum MAD_FIELDS f;
851	char *val, *lasts = NULL;
852	char *tmp = strdup(str);
853
854	val = strtok_r(tmp, ",", &lasts);
855	while (val) {
856		for (f = IB_PC_FIRST_F; f <= IB_PC_LAST_F; f++)
857			if (strcmp(val, mad_field_name(f)) == 0)
858				add_suppressed(f);
859		val = strtok_r(NULL, ",", &lasts);
860	}
861
862	free(tmp);
863}
864
865static int process_opt(void *context, int ch, char *optarg)
866{
867	struct ibnd_config *cfg = context;
868	switch (ch) {
869	case 's':
870		calculate_suppressed_fields(optarg);
871		break;
872	case 'c':
873		/* Right now this is the only "common" error */
874		add_suppressed(IB_PC_ERR_SWITCH_REL_F);
875		break;
876	case 1:
877		node_name_map_file = strdup(optarg);
878		break;
879	case 2:
880		data_counters++;
881		break;
882	case 3:
883		node_type_to_print |= PRINT_SWITCH;
884		break;
885	case 4:
886		node_type_to_print |= PRINT_CA;
887		break;
888	case 5:
889		node_type_to_print |= PRINT_ROUTER;
890		break;
891	case 6:
892		details = 1;
893		break;
894	case 7:
895		load_cache_file = strdup(optarg);
896		break;
897	case 8:
898		threshold_file = strdup(optarg);
899		break;
900	case 9:
901		data_counters_only = 1;
902		break;
903	case 10:
904		obtain_sl = 0;
905		break;
906	case 'G':
907	case 'S':
908		port_guid_str = optarg;
909		port_guid = strtoull(optarg, 0, 0);
910		break;
911	case 'D':
912		dr_path = strdup(optarg);
913		break;
914	case 'r':
915		port_config++;
916		break;
917	case 'R':		/* nop */
918		break;
919	case 'k':
920		clear_errors = 1;
921		break;
922	case 'K':
923		clear_counts = 1;
924		break;
925	case 'o':
926		cfg->max_smps = strtoul(optarg, NULL, 0);
927		break;
928	default:
929		return -1;
930	}
931
932	return 0;
933}
934
935int main(int argc, char **argv)
936{
937	struct ibnd_config config = { 0 };
938	int resolved = -1;
939	ib_portid_t portid = { 0 };
940	ib_portid_t self_portid = { 0 };
941	int rc = 0;
942	ibnd_fabric_t *fabric = NULL;
943	ib_gid_t self_gid;
944	int port = 0;
945
946	int mgmt_classes[4] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS,
947		IB_PERFORMANCE_CLASS
948	};
949
950	const struct ibdiag_opt opts[] = {
951		{"suppress", 's', 1, "<err1,err2,...>",
952		 "suppress errors listed"},
953		{"suppress-common", 'c', 0, NULL,
954		 "suppress some of the common counters"},
955		{"node-name-map", 1, 1, "<file>", "node name map file"},
956		{"port-guid", 'G', 1, "<port_guid>",
957		 "report the node containing the port specified by <port_guid>"},
958		{"", 'S', 1, "<port_guid>",
959		 "Same as \"-G\" for backward compatibility"},
960		{"Direct", 'D', 1, "<dr_path>",
961		 "report the node containing the port specified by <dr_path>"},
962		{"skip-sl", 10, 0, NULL,"don't obtain SL to all destinations"},
963		{"report-port", 'r', 0, NULL,
964		 "report port link information"},
965		{"threshold-file", 8, 1, NULL,
966		 "specify an alternate threshold file, default: " DEF_THRES_FILE},
967		{"GNDN", 'R', 0, NULL,
968		 "(This option is obsolete and does nothing)"},
969		{"data", 2, 0, NULL, "include data counters for ports with errors"},
970		{"switch", 3, 0, NULL, "print data for switches only"},
971		{"ca", 4, 0, NULL, "print data for CA's only"},
972		{"router", 5, 0, NULL, "print data for routers only"},
973		{"details", 6, 0, NULL, "include transmit discard details"},
974		{"counters", 9, 0, NULL, "print data counters only"},
975		{"clear-errors", 'k', 0, NULL,
976		 "Clear error counters after read"},
977		{"clear-counts", 'K', 0, NULL,
978		 "Clear data counters after read"},
979		{"load-cache", 7, 1, "<file>",
980		 "filename of ibnetdiscover cache to load"},
981		{"outstanding_smps", 'o', 1, NULL,
982		 "specify the number of outstanding SMP's which should be "
983		 "issued during the scan"},
984		{0}
985	};
986	char usage_args[] = "";
987
988	memset(suppressed_fields, 0, sizeof suppressed_fields);
989	ibdiag_process_opts(argc, argv, &config, "cDGKLnRrSs", opts, process_opt,
990			    usage_args, NULL);
991
992	argc -= optind;
993	argv += optind;
994
995	if (!node_type_to_print)
996		node_type_to_print = PRINT_ALL;
997
998	ibmad_port = mad_rpc_open_port(ibd_ca, ibd_ca_port, mgmt_classes, 4);
999	if (!ibmad_port)
1000		IBEXIT("Failed to open port; %s:%d\n", ibd_ca, ibd_ca_port);
1001
1002	smp_mkey_set(ibmad_port, ibd_mkey);
1003
1004	if (ibd_timeout) {
1005		mad_rpc_set_timeout(ibmad_port, ibd_timeout);
1006		config.timeout_ms = ibd_timeout;
1007	}
1008
1009	config.flags = ibd_ibnetdisc_flags;
1010	config.mkey = ibd_mkey;
1011
1012	if (dr_path && load_cache_file) {
1013		mad_rpc_close_port(ibmad_port);
1014		fprintf(stderr, "Cannot specify cache and direct route path\n");
1015		exit(-1);
1016	}
1017
1018	if (resolve_self(ibd_ca, ibd_ca_port, &self_portid, &port, &self_gid.raw) < 0) {
1019		mad_rpc_close_port(ibmad_port);
1020		IBEXIT("can't resolve self port %s", argv[0]);
1021	}
1022
1023	node_name_map = open_node_name_map(node_name_map_file);
1024
1025	/* limit the scan the fabric around the target */
1026	if (dr_path) {
1027		if ((resolved =
1028		     resolve_portid_str(ibd_ca, ibd_ca_port, &portid, dr_path,
1029					IB_DEST_DRPATH, NULL, ibmad_port)) < 0)
1030			IBWARN("Failed to resolve %s; attempting full scan",
1031			       dr_path);
1032	} else if (port_guid_str) {
1033		if ((resolved =
1034		     resolve_portid_str(ibd_ca, ibd_ca_port, &portid,
1035					port_guid_str, IB_DEST_GUID, ibd_sm_id,
1036					       ibmad_port)) < 0)
1037			IBWARN("Failed to resolve %s; attempting full scan",
1038			       port_guid_str);
1039		if(obtain_sl)
1040			lid2sl_table[portid.lid] = portid.sl;
1041	}
1042
1043	mad_rpc_close_port(ibmad_port);
1044
1045	if (load_cache_file) {
1046		if ((fabric = ibnd_load_fabric(load_cache_file, 0)) == NULL) {
1047			fprintf(stderr, "loading cached fabric failed\n");
1048			rc = -1;
1049			goto close_port;
1050		}
1051	} else {
1052		if (resolved >= 0) {
1053			if (!config.max_hops)
1054				config.max_hops = 1;
1055			if (!(fabric = ibnd_discover_fabric(ibd_ca, ibd_ca_port,
1056						    &portid, &config)))
1057				IBWARN("Single node discover failed;"
1058				       " attempting full scan");
1059		}
1060
1061		if (!fabric && !(fabric = ibnd_discover_fabric(ibd_ca,
1062							       ibd_ca_port,
1063							       NULL,
1064							       &config))) {
1065			fprintf(stderr, "discover failed\n");
1066			rc = -1;
1067			goto close_port;
1068		}
1069	}
1070
1071	set_thresholds(threshold_file);
1072
1073	/* reopen the global ibmad_port */
1074	ibmad_port = mad_rpc_open_port(ibd_ca, ibd_ca_port,
1075				       mgmt_classes, 4);
1076	if (!ibmad_port) {
1077		ibnd_destroy_fabric(fabric);
1078		close_node_name_map(node_name_map);
1079		IBEXIT("Failed to reopen port: %s:%d\n",
1080			ibd_ca, ibd_ca_port);
1081	}
1082
1083	smp_mkey_set(ibmad_port, ibd_mkey);
1084
1085	if (ibd_timeout)
1086		mad_rpc_set_timeout(ibmad_port, ibd_timeout);
1087
1088	if (port_guid_str) {
1089		ibnd_port_t *port = ibnd_find_port_guid(fabric, port_guid);
1090		if (port)
1091			print_node(port->node, NULL);
1092		else
1093			fprintf(stderr, "Failed to find node: %s\n",
1094				port_guid_str);
1095	} else if (dr_path) {
1096		ibnd_port_t *port;
1097		uint8_t ni[IB_SMP_DATA_SIZE] = { 0 };
1098		if (!smp_query_via(ni, &portid, IB_ATTR_NODE_INFO, 0,
1099			   ibd_timeout, ibmad_port)) {
1100				fprintf(stderr, "Failed to query local Node Info\n");
1101				goto destroy_fabric;
1102		}
1103
1104		mad_decode_field(ni, IB_NODE_PORT_GUID_F, &(port_guid));
1105
1106		port = ibnd_find_port_guid(fabric, port_guid);
1107		if (port) {
1108			if(obtain_sl)
1109				if(path_record_query(self_gid,port->guid))
1110					goto destroy_fabric;
1111			print_node(port->node, NULL);
1112		} else
1113			fprintf(stderr, "Failed to find node: %s\n", dr_path);
1114	} else {
1115		if(obtain_sl)
1116			if(path_record_query(self_gid,0))
1117				goto destroy_fabric;
1118
1119		ibnd_iter_nodes(fabric, print_node, NULL);
1120	}
1121
1122	rc = print_summary();
1123	if (rc)
1124		rc = 1;
1125
1126destroy_fabric:
1127	mad_rpc_close_port(ibmad_port);
1128	ibnd_destroy_fabric(fabric);
1129
1130close_port:
1131	close_node_name_map(node_name_map);
1132	exit(rc);
1133}
1134