pmcpl_calltree.c revision 206994
1/*-
2 * Copyright (c) 2009, Fabien Thomas
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27/*
28 * Process hwpmc(4) samples as calltree.
29 *
30 * Output file format compatible with Kcachegrind (kdesdk).
31 * Handle top mode with a sorted tree display.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: head/usr.sbin/pmcstat/pmcpl_calltree.c 206994 2010-04-21 11:50:13Z fabient $");
36
37#include <sys/param.h>
38#include <sys/endian.h>
39#include <sys/queue.h>
40
41#include <assert.h>
42#include <curses.h>
43#include <ctype.h>
44#include <err.h>
45#include <errno.h>
46#include <fcntl.h>
47#include <pmc.h>
48#include <pmclog.h>
49#include <sysexits.h>
50#include <stdint.h>
51#include <stdio.h>
52#include <stdlib.h>
53#include <string.h>
54#include <unistd.h>
55#include <sysexits.h>
56
57#include "pmcstat.h"
58#include "pmcstat_log.h"
59#include "pmcstat_top.h"
60#include "pmcpl_calltree.h"
61
62#define PMCPL_CT_GROWSIZE	4
63
64static pmcstat_interned_string pmcpl_ct_prevfn;
65
66static int pmcstat_skiplink = 0;
67
68struct pmcpl_ct_node;
69
70/* Get the sample value for PMC a. */
71#define PMCPL_CT_SAMPLE(a, b) \
72	((a) < (b)->npmcs ? (b)->sb[a] : 0)
73
74/* Get the sample value in percent related to rsamples. */
75#define PMCPL_CT_SAMPLEP(a, b) \
76	(PMCPL_CT_SAMPLE(a, b) * 100.0 / rsamples->sb[a])
77
78struct pmcpl_ct_sample {
79	int		npmcs;		/* Max pmc index available. */
80	unsigned	*sb;		/* Sample buffer for 0..npmcs. */
81};
82
83struct pmcpl_ct_arc {
84	struct pmcpl_ct_sample	pcta_samples;
85	struct pmcpl_ct_sample	pcta_callid;
86	unsigned		pcta_call;
87	struct pmcpl_ct_node	*pcta_child;
88};
89
90struct pmcpl_ct_instr {
91	uintfptr_t		pctf_func;
92	struct pmcpl_ct_sample	pctf_samples;
93};
94
95/*
96 * Each calltree node is tracked by a pmcpl_ct_node struct.
97 */
98struct pmcpl_ct_node {
99#define PMCPL_PCT_TAG	0x00000001	/* Loop detection. */
100	uint32_t		pct_flags;
101	struct pmcstat_image	*pct_image;
102	uintfptr_t		pct_func;
103	struct pmcpl_ct_sample	pct_samples;
104
105	int			pct_narc;
106	int			pct_arc_c;
107	struct pmcpl_ct_arc 	*pct_arc;
108
109	/* TODO: optimize for large number of items. */
110	int			pct_ninstr;
111	int			pct_instr_c;
112	struct pmcpl_ct_instr	*pct_instr;
113};
114
115struct pmcpl_ct_node_hash {
116	struct pmcpl_ct_node  *pch_ctnode;
117	LIST_ENTRY(pmcpl_ct_node_hash) pch_next;
118};
119
120struct pmcpl_ct_sample pmcpl_ct_callid;
121
122#define PMCPL_CT_MAXCOL		PMC_CALLCHAIN_DEPTH_MAX
123#define PMCPL_CT_MAXLINE	256
124struct pmcpl_ct_node  *pmcpl_ct_topscreen[PMCPL_CT_MAXCOL][PMCPL_CT_MAXLINE];
125
126/*
127 * All nodes indexed by function/image name are placed in a hash table.
128 */
129static LIST_HEAD(,pmcpl_ct_node_hash) pmcpl_ct_node_hash[PMCSTAT_NHASH];
130
131/*
132 * Root node for the graph.
133 */
134static struct pmcpl_ct_node *pmcpl_ct_root;
135
136/*
137 * Prototypes
138 */
139
140/*
141 * Initialize a samples.
142 */
143
144static void
145pmcpl_ct_samples_init(struct pmcpl_ct_sample *samples)
146{
147
148	samples->npmcs = 0;
149	samples->sb = NULL;
150}
151
152/*
153 * Free a samples.
154 */
155
156static void
157pmcpl_ct_samples_free(struct pmcpl_ct_sample *samples)
158{
159
160	samples->npmcs = 0;
161	free(samples->sb);
162	samples->sb = NULL;
163}
164
165/*
166 * Grow a sample block to store pmcstat_npmcs PMCs.
167 */
168
169static void
170pmcpl_ct_samples_grow(struct pmcpl_ct_sample *samples)
171{
172	int npmcs;
173
174	/* Enough storage. */
175	if (pmcstat_npmcs <= samples->npmcs)
176                return;
177
178	npmcs = samples->npmcs +
179	    max(pmcstat_npmcs - samples->npmcs, PMCPL_CT_GROWSIZE);
180	samples->sb = realloc(samples->sb, npmcs * sizeof(unsigned));
181	if (samples->sb == NULL)
182		errx(EX_SOFTWARE, "ERROR: out of memory");
183	bzero((char *)samples->sb + samples->npmcs * sizeof(unsigned),
184	    (npmcs - samples->npmcs) * sizeof(unsigned));
185	samples->npmcs = npmcs;
186}
187
188/*
189 * Compute the sum of all root arcs.
190 */
191
192static void
193pmcpl_ct_samples_root(struct pmcpl_ct_sample *samples)
194{
195	int i, pmcin;
196
197	pmcpl_ct_samples_init(samples);
198	pmcpl_ct_samples_grow(samples);
199
200	for (i = 0; i < pmcpl_ct_root->pct_narc; i++)
201		for (pmcin = 0; pmcin < pmcstat_npmcs; pmcin++)
202			samples->sb[pmcin] += PMCPL_CT_SAMPLE(pmcin,
203			    &pmcpl_ct_root->pct_arc[i].pcta_samples);
204}
205
206/*
207 * Grow the arc table.
208 */
209
210static void
211pmcpl_ct_arc_grow(int cursize, int *maxsize, struct pmcpl_ct_arc **items)
212{
213	int nmaxsize;
214
215	if (cursize < *maxsize)
216		return;
217
218	nmaxsize = *maxsize + max(cursize + 1 - *maxsize, PMCPL_CT_GROWSIZE);
219	*items = realloc(*items, nmaxsize * sizeof(struct pmcpl_ct_arc));
220	if (*items == NULL)
221		errx(EX_SOFTWARE, "ERROR: out of memory");
222	bzero((char *)*items + *maxsize * sizeof(struct pmcpl_ct_arc),
223	    (nmaxsize - *maxsize) * sizeof(struct pmcpl_ct_arc));
224	*maxsize = nmaxsize;
225}
226
227/*
228 * Compare two arc by samples value.
229 */
230static int
231pmcpl_ct_arc_compare(void *thunk, const void *a, const void *b)
232{
233	const struct pmcpl_ct_arc *ct1, *ct2;
234	int pmcin = *(int *)thunk;
235
236	ct1 = (const struct pmcpl_ct_arc *) a;
237	ct2 = (const struct pmcpl_ct_arc *) b;
238
239	/* Sort in reverse order */
240	if (PMCPL_CT_SAMPLE(pmcin, &ct1->pcta_samples) <
241	    PMCPL_CT_SAMPLE(pmcin, &ct2->pcta_samples))
242		return (1);
243	if (PMCPL_CT_SAMPLE(pmcin, &ct1->pcta_samples) >
244	    PMCPL_CT_SAMPLE(pmcin, &ct2->pcta_samples))
245		return (-1);
246	return (0);
247}
248
249/*
250 * Grow the instr table.
251 */
252
253static void
254pmcpl_ct_instr_grow(int cursize, int *maxsize, struct pmcpl_ct_instr **items)
255{
256	int nmaxsize;
257
258	if (cursize < *maxsize)
259		return;
260
261	nmaxsize = *maxsize + max(cursize + 1 - *maxsize, PMCPL_CT_GROWSIZE);
262	*items = realloc(*items, nmaxsize * sizeof(struct pmcpl_ct_instr));
263	if (*items == NULL)
264		errx(EX_SOFTWARE, "ERROR: out of memory");
265	bzero((char *)*items + *maxsize * sizeof(struct pmcpl_ct_instr),
266	    (nmaxsize - *maxsize) * sizeof(struct pmcpl_ct_instr));
267	*maxsize = nmaxsize;
268}
269
270/*
271 * Add a new instruction sample to given node.
272 */
273
274static void
275pmcpl_ct_instr_add(struct pmcpl_ct_node *ct, int pmcin, uintfptr_t pc)
276{
277	int i;
278	struct pmcpl_ct_instr *in;
279
280	for (i = 0; i<ct->pct_ninstr; i++) {
281		if (ct->pct_instr[i].pctf_func == pc) {
282			in = &ct->pct_instr[i];
283			pmcpl_ct_samples_grow(&in->pctf_samples);
284			in->pctf_samples.sb[pmcin]++;
285			return;
286		}
287	}
288
289	pmcpl_ct_instr_grow(ct->pct_ninstr, &ct->pct_instr_c, &ct->pct_instr);
290	in = &ct->pct_instr[ct->pct_ninstr];
291	in->pctf_func = pc;
292	pmcpl_ct_samples_init(&in->pctf_samples);
293	pmcpl_ct_samples_grow(&in->pctf_samples);
294	in->pctf_samples.sb[pmcin] = 1;
295	ct->pct_ninstr++;
296}
297
298/*
299 * Allocate a new node.
300 */
301
302static struct pmcpl_ct_node *
303pmcpl_ct_node_allocate(struct pmcstat_image *image, uintfptr_t pc)
304{
305	struct pmcpl_ct_node *ct;
306
307	if ((ct = malloc(sizeof(*ct))) == NULL)
308		err(EX_OSERR, "ERROR: Cannot allocate callgraph node");
309
310	ct->pct_flags	= 0;
311	ct->pct_image 	= image;
312	ct->pct_func	= pc;
313
314	pmcpl_ct_samples_init(&ct->pct_samples);
315
316	ct->pct_narc	= 0;
317	ct->pct_arc_c	= 0;
318	ct->pct_arc	= NULL;
319
320	ct->pct_ninstr	= 0;
321	ct->pct_instr_c	= 0;
322	ct->pct_instr	= NULL;
323
324	return (ct);
325}
326
327/*
328 * Free a node.
329 */
330
331static void
332pmcpl_ct_node_free(struct pmcpl_ct_node *ct)
333{
334	int i;
335
336	for (i = 0; i < ct->pct_narc; i++) {
337		pmcpl_ct_samples_free(&ct->pct_arc[i].pcta_samples);
338		pmcpl_ct_samples_free(&ct->pct_arc[i].pcta_callid);
339	}
340
341	pmcpl_ct_samples_free(&ct->pct_samples);
342	free(ct->pct_arc);
343	free(ct->pct_instr);
344	free(ct);
345}
346
347/*
348 * Clear the graph tag on each node.
349 */
350static void
351pmcpl_ct_node_cleartag(void)
352{
353	int i;
354	struct pmcpl_ct_node_hash *pch;
355
356	for (i = 0; i < PMCSTAT_NHASH; i++)
357		LIST_FOREACH(pch, &pmcpl_ct_node_hash[i], pch_next)
358			pch->pch_ctnode->pct_flags &= ~PMCPL_PCT_TAG;
359
360	pmcpl_ct_root->pct_flags &= ~PMCPL_PCT_TAG;
361}
362
363/*
364 * Print the callchain line by line with maximum cost at top.
365 */
366
367static int
368pmcpl_ct_node_dumptop(int pmcin, struct pmcpl_ct_node *ct,
369    struct pmcpl_ct_sample *rsamples, int x, int *y, int maxy)
370{
371	int i;
372
373	if (ct->pct_flags & PMCPL_PCT_TAG)
374		return 0;
375
376	ct->pct_flags |= PMCPL_PCT_TAG;
377
378	if (x >= PMCPL_CT_MAXCOL) {
379		pmcpl_ct_topscreen[x][*y] = NULL;
380		return 1;
381	}
382	pmcpl_ct_topscreen[x][*y] = ct;
383
384	/*
385	 * This is a terminal node
386	 */
387	if (ct->pct_narc == 0) {
388		pmcpl_ct_topscreen[x+1][*y] = NULL;
389		if (*y >= PMCPL_CT_MAXLINE ||
390		    *y >= maxy)
391			return 1;
392		*y = *y + 1;
393		for (i=0; i < x; i++)
394			pmcpl_ct_topscreen[i][*y] =
395			    pmcpl_ct_topscreen[i][*y - 1];
396		return 0;
397	}
398
399	/*
400	 * Quicksort the arcs.
401	 */
402	qsort_r(ct->pct_arc, ct->pct_narc, sizeof(struct pmcpl_ct_arc),
403	    &pmcin, pmcpl_ct_arc_compare);
404
405	for (i = 0; i < ct->pct_narc; i++) {
406		/* Skip this arc if there is no sample at all. */
407		if (PMCPL_CT_SAMPLE(pmcin,
408		    &ct->pct_arc[i].pcta_samples) == 0)
409			continue;
410		if (PMCPL_CT_SAMPLEP(pmcin,
411		    &ct->pct_arc[i].pcta_samples) > pmcstat_threshold) {
412			if (pmcpl_ct_node_dumptop(pmcin,
413			        ct->pct_arc[i].pcta_child,
414			        rsamples, x+1, y, maxy))
415				return 1;
416		}
417	}
418
419	return 0;
420}
421
422/*
423 * Format and display given PMC index.
424 */
425
426static void
427pmcpl_ct_node_printtop(struct pmcpl_ct_sample *rsamples, int pmcin, int maxy)
428{
429	int v_attrs, ns_len, vs_len, is_len, width, indentwidth, x, y;
430	float v;
431	char ns[30], vs[10], is[20];
432	struct pmcpl_ct_node *ct;
433	struct pmcstat_symbol *sym;
434	const char *space = " ";
435
436	for (y = 0; y < maxy; y++) {
437		/* Output image. */
438		ct = pmcpl_ct_topscreen[0][y];
439		snprintf(is, sizeof(is), "%-10.10s",
440		    pmcstat_string_unintern(ct->pct_image->pi_name));
441		PMCSTAT_PRINTW("%s ", is);
442		width = indentwidth = 11;
443
444		for (x = 0; pmcpl_ct_topscreen[x][y] !=NULL; x++) {
445
446			ct = pmcpl_ct_topscreen[x][y];
447
448			ns[0] = '\0'; ns_len = 0;
449			vs[0] = '\0'; vs_len = 0;
450			is[0] = '\0'; is_len = 0;
451
452			/* Format value. */
453			v = PMCPL_CT_SAMPLEP(pmcin, &ct->pct_samples);
454			if (v > pmcstat_threshold)
455				vs_len  = snprintf(vs, sizeof(vs), "(%.1f%%)", v);
456			v_attrs = PMCSTAT_ATTRPERCENT(v);
457
458			if (pmcstat_skiplink && v <= pmcstat_threshold) {
459				PMCSTAT_PRINTW(". ");
460				width += 2;
461				continue;
462			}
463			sym = pmcstat_symbol_search(ct->pct_image, ct->pct_func);
464			if (sym != NULL) {
465				ns_len = snprintf(ns, sizeof(ns), "%s",
466				    pmcstat_string_unintern(sym->ps_name));
467			} else
468				ns_len = snprintf(ns, sizeof(ns), "%p",
469				    (void *)ct->pct_func);
470
471			/* Format image. */
472			if (x > 0 && pmcpl_ct_topscreen[x-1][y]->pct_image != ct->pct_image)
473				is_len = snprintf(is, sizeof(is), "@%s",
474				    pmcstat_string_unintern(ct->pct_image->pi_name));
475
476			/* Check for line wrap. */
477			width += ns_len + is_len + vs_len + 1;
478			if (width >= pmcstat_displaywidth) {
479				maxy--;
480				if (y >= maxy)
481					break;
482				PMCSTAT_PRINTW("\n%*s", indentwidth, space);
483				width = indentwidth + ns_len + is_len + vs_len;
484			}
485
486			PMCSTAT_ATTRON(v_attrs);
487			PMCSTAT_PRINTW("%s%s%s ", ns, is, vs);
488			PMCSTAT_ATTROFF(v_attrs);
489		}
490		PMCSTAT_PRINTW("\n");
491	}
492}
493
494/*
495 * Output top mode snapshot.
496 */
497
498void
499pmcpl_ct_topdisplay(void)
500{
501	int i, x, y, pmcin;
502	struct pmcpl_ct_sample r, *rsamples;
503
504	rsamples = &r;
505	pmcpl_ct_samples_root(rsamples);
506
507	PMCSTAT_PRINTW("%-10.10s %s\n", "IMAGE", "CALLTREE");
508
509	for (pmcin = 0; pmcin < pmcstat_npmcs; pmcin++) {
510		/* Filter PMCs. */
511		if (pmcstat_pmcinfilter != pmcin)
512			continue;
513
514		pmcpl_ct_node_cleartag();
515
516		/* Quicksort the arcs. */
517		qsort_r(pmcpl_ct_root->pct_arc,
518		    pmcpl_ct_root->pct_narc,
519		    sizeof(struct pmcpl_ct_arc),
520		    &pmcin, pmcpl_ct_arc_compare);
521
522		x = y = 0;
523		for (i = 0; i < pmcpl_ct_root->pct_narc; i++) {
524			/* Skip this arc if there is no sample at all. */
525			if (PMCPL_CT_SAMPLE(pmcin,
526			    &pmcpl_ct_root->pct_arc[i].pcta_samples) == 0)
527				continue;
528			if (PMCPL_CT_SAMPLEP(pmcin,
529			    &pmcpl_ct_root->pct_arc[i].pcta_samples) <=
530			    pmcstat_threshold)
531				continue;
532			if (pmcpl_ct_node_dumptop(pmcin,
533			        pmcpl_ct_root->pct_arc[i].pcta_child,
534			        rsamples, x, &y, pmcstat_displayheight - 2)) {
535				break;
536			}
537		}
538
539		pmcpl_ct_node_printtop(rsamples, pmcin, y);
540	}
541	pmcpl_ct_samples_free(rsamples);
542}
543
544/*
545 * Handle top mode keypress.
546 */
547
548int
549pmcpl_ct_topkeypress(int c, WINDOW *w)
550{
551
552	switch (c) {
553	case 'f':
554		pmcstat_skiplink = !pmcstat_skiplink;
555		wprintw(w, "skip empty link %s", pmcstat_skiplink ? "on" : "off");
556		break;
557	}
558
559	return 0;
560}
561
562/*
563 * Look for a callgraph node associated with pmc `pmcid' in the global
564 * hash table that corresponds to the given `pc' value in the process map
565 * `ppm'.
566 */
567
568static struct pmcpl_ct_node *
569pmcpl_ct_node_hash_lookup_pc(struct pmcpl_ct_node *parent,
570    struct pmcstat_pcmap *ppm, uintfptr_t pc, int pmcin)
571{
572	struct pmcstat_symbol *sym;
573	struct pmcstat_image *image;
574	struct pmcpl_ct_node *ct;
575	struct pmcpl_ct_node_hash *h;
576	struct pmcpl_ct_arc *arc;
577	uintfptr_t loadaddress;
578	int i;
579	unsigned int hash;
580
581	assert(parent != NULL);
582
583	image = ppm->ppm_image;
584
585	loadaddress = ppm->ppm_lowpc + image->pi_vaddr - image->pi_start;
586	pc -= loadaddress;	/* Convert to an offset in the image. */
587
588	/*
589	 * Try determine the function at this offset.  If we can't
590	 * find a function round leave the `pc' value alone.
591	 */
592	if ((sym = pmcstat_symbol_search(image, pc)) != NULL)
593		pc = sym->ps_start;
594
595	for (hash = i = 0; i < (int)sizeof(uintfptr_t); i++)
596		hash += (pc >> i) & 0xFF;
597
598	hash &= PMCSTAT_HASH_MASK;
599
600	ct = NULL;
601	LIST_FOREACH(h, &pmcpl_ct_node_hash[hash], pch_next) {
602		ct = h->pch_ctnode;
603
604		assert(ct != NULL);
605
606		if (ct->pct_image == image && ct->pct_func == pc) {
607			/*
608			 * Find related arc in parent node and
609			 * increment the sample count.
610			 */
611			for (i = 0; i < parent->pct_narc; i++) {
612				if (parent->pct_arc[i].pcta_child == ct) {
613					arc = &parent->pct_arc[i];
614					pmcpl_ct_samples_grow(&arc->pcta_samples);
615					arc->pcta_samples.sb[pmcin]++;
616					/* Estimate call count. */
617					pmcpl_ct_samples_grow(&arc->pcta_callid);
618					if (pmcpl_ct_callid.sb[pmcin] -
619					    arc->pcta_callid.sb[pmcin] > 1)
620						arc->pcta_call++;
621					arc->pcta_callid.sb[pmcin] =
622					    pmcpl_ct_callid.sb[pmcin];
623					return (ct);
624				}
625			}
626
627			/*
628			 * No arc found for us, add ourself to the parent.
629			 */
630			pmcpl_ct_arc_grow(parent->pct_narc,
631			    &parent->pct_arc_c, &parent->pct_arc);
632			arc = &parent->pct_arc[parent->pct_narc];
633			pmcpl_ct_samples_grow(&arc->pcta_samples);
634			arc->pcta_samples.sb[pmcin] = 1;
635			arc->pcta_call = 1;
636			pmcpl_ct_samples_grow(&arc->pcta_callid);
637			arc->pcta_callid.sb[pmcin] = pmcpl_ct_callid.sb[pmcin];
638			arc->pcta_child = ct;
639			parent->pct_narc++;
640			return (ct);
641		}
642	}
643
644	/*
645	 * We haven't seen this (pmcid, pc) tuple yet, so allocate a
646	 * new callgraph node and a new hash table entry for it.
647	 */
648	ct = pmcpl_ct_node_allocate(image, pc);
649	if ((h = malloc(sizeof(*h))) == NULL)
650		err(EX_OSERR, "ERROR: Could not allocate callgraph node");
651
652	h->pch_ctnode = ct;
653	LIST_INSERT_HEAD(&pmcpl_ct_node_hash[hash], h, pch_next);
654
655	pmcpl_ct_arc_grow(parent->pct_narc,
656	    &parent->pct_arc_c, &parent->pct_arc);
657	arc = &parent->pct_arc[parent->pct_narc];
658	pmcpl_ct_samples_grow(&arc->pcta_samples);
659	arc->pcta_samples.sb[pmcin] = 1;
660	arc->pcta_call = 1;
661	pmcpl_ct_samples_grow(&arc->pcta_callid);
662	arc->pcta_callid.sb[pmcin] = pmcpl_ct_callid.sb[pmcin];
663	arc->pcta_child = ct;
664	parent->pct_narc++;
665	return (ct);
666}
667
668/*
669 * Record a callchain.
670 */
671
672void
673pmcpl_ct_process(struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr,
674    uint32_t nsamples, uintfptr_t *cc, int usermode, uint32_t cpu)
675{
676	int n, pmcin;
677	struct pmcstat_pcmap *ppm[PMC_CALLCHAIN_DEPTH_MAX];
678	struct pmcstat_process *km;
679	struct pmcpl_ct_node *parent, *child;
680
681	(void) cpu;
682
683	assert(nsamples>0 && nsamples<=PMC_CALLCHAIN_DEPTH_MAX);
684
685	/* Get the PMC index. */
686	pmcin = pmcr->pr_pmcin;
687
688	/*
689	 * Validate mapping for the callchain.
690	 * Go from bottom to first invalid entry.
691	 */
692	km = pmcstat_kernproc;
693	for (n = 0; n < (int)nsamples; n++) {
694		ppm[n] = pmcstat_process_find_map(usermode ?
695		    pp : km, cc[n]);
696		if (ppm[n] == NULL) {
697			/* Detect full frame capture (kernel + user). */
698			if (!usermode) {
699				ppm[n] = pmcstat_process_find_map(pp, cc[n]);
700				if (ppm[n] != NULL)
701					km = pp;
702			}
703		}
704		if (ppm[n] == NULL)
705			break;
706	}
707	if (n-- == 0) {
708		pmcstat_stats.ps_callchain_dubious_frames++;
709		pmcr->pr_dubious_frames++;
710		return;
711	}
712
713	/* Increase the call generation counter. */
714	pmcpl_ct_samples_grow(&pmcpl_ct_callid);
715	pmcpl_ct_callid.sb[pmcin]++;
716
717	/*
718	 * Iterate remaining addresses.
719	 */
720	for (parent = pmcpl_ct_root, child = NULL; n >= 0; n--) {
721		child = pmcpl_ct_node_hash_lookup_pc(parent, ppm[n], cc[n],
722		    pmcin);
723		if (child == NULL) {
724			pmcstat_stats.ps_callchain_dubious_frames++;
725			continue;
726		}
727		parent = child;
728	}
729
730	/*
731	 * Increment the sample count for this PMC.
732	 */
733	if (child != NULL) {
734		pmcpl_ct_samples_grow(&child->pct_samples);
735		child->pct_samples.sb[pmcin]++;
736
737		/* Update per instruction sample if required. */
738		if (args.pa_ctdumpinstr)
739			pmcpl_ct_instr_add(child, pmcin, cc[0] -
740			    (ppm[0]->ppm_lowpc + ppm[0]->ppm_image->pi_vaddr -
741			     ppm[0]->ppm_image->pi_start));
742	}
743}
744
745/*
746 * Print node self cost.
747 */
748
749static void
750pmcpl_ct_node_printself(struct pmcpl_ct_node *ct)
751{
752	int i, j, line;
753	uintptr_t addr;
754	struct pmcstat_symbol *sym;
755	char sourcefile[PATH_MAX];
756	char funcname[PATH_MAX];
757
758	/*
759	 * Object binary.
760	 */
761#ifdef PMCPL_CT_OPTIMIZEFN
762	if (pmcpl_ct_prevfn != ct->pct_image->pi_fullpath) {
763#endif
764		pmcpl_ct_prevfn = ct->pct_image->pi_fullpath;
765		fprintf(args.pa_graphfile, "ob=%s\n",
766		    pmcstat_string_unintern(pmcpl_ct_prevfn));
767#ifdef PMCPL_CT_OPTIMIZEFN
768	}
769#endif
770
771	/*
772	 * Function name.
773	 */
774	if (pmcstat_image_addr2line(ct->pct_image, ct->pct_func,
775	    sourcefile, sizeof(sourcefile), &line,
776	    funcname, sizeof(funcname))) {
777		fprintf(args.pa_graphfile, "fn=%s\n",
778		    funcname);
779	} else {
780		sym = pmcstat_symbol_search(ct->pct_image, ct->pct_func);
781		if (sym != NULL)
782			fprintf(args.pa_graphfile, "fn=%s\n",
783			    pmcstat_string_unintern(sym->ps_name));
784		else
785			fprintf(args.pa_graphfile, "fn=%p\n",
786			    (void *)(ct->pct_image->pi_vaddr + ct->pct_func));
787	}
788
789	/*
790	 * Self cost.
791	 */
792	if (ct->pct_ninstr > 0) {
793		for (i = 0; i < ct->pct_ninstr; i++) {
794			addr = ct->pct_image->pi_vaddr +
795			    ct->pct_instr[i].pctf_func;
796			line = 0;
797			if (pmcstat_image_addr2line(ct->pct_image, addr,
798			    sourcefile, sizeof(sourcefile), &line,
799			    funcname, sizeof(funcname)))
800				fprintf(args.pa_graphfile, "fl=%s\n", sourcefile);
801			fprintf(args.pa_graphfile, "%p %u", (void *)addr, line);
802			for (j = 0; j<pmcstat_npmcs; j++)
803				fprintf(args.pa_graphfile, " %u",
804				    PMCPL_CT_SAMPLE(j,
805				    &ct->pct_instr[i].pctf_samples));
806			fprintf(args.pa_graphfile, "\n");
807		}
808	} else {
809		addr = ct->pct_image->pi_vaddr + ct->pct_func;
810		line = 0;
811		if (pmcstat_image_addr2line(ct->pct_image, addr,
812		    sourcefile, sizeof(sourcefile), &line,
813		    funcname, sizeof(funcname)))
814			fprintf(args.pa_graphfile, "fl=%s\n", sourcefile);
815		fprintf(args.pa_graphfile, "* *");
816		for (i = 0; i<pmcstat_npmcs ; i++)
817			fprintf(args.pa_graphfile, " %u",
818			    PMCPL_CT_SAMPLE(i, &ct->pct_samples));
819		fprintf(args.pa_graphfile, "\n");
820	}
821}
822
823/*
824 * Print node child cost.
825 */
826
827static void
828pmcpl_ct_node_printchild(struct pmcpl_ct_node *ct)
829{
830	int i, j, line;
831	uintptr_t addr;
832	struct pmcstat_symbol *sym;
833	struct pmcpl_ct_node *child;
834	char sourcefile[PATH_MAX];
835	char funcname[PATH_MAX];
836
837	/*
838	 * Child cost.
839	 * TODO: attach child cost to the real position in the funtion.
840	 * TODO: cfn=<fn> / call <ncall> addr(<fn>) / addr(call <fn>) <arccost>
841	 */
842	for (i=0 ; i<ct->pct_narc; i++) {
843		child = ct->pct_arc[i].pcta_child;
844
845		/* Object binary. */
846#ifdef PMCPL_CT_OPTIMIZEFN
847		if (pmcpl_ct_prevfn != child->pct_image->pi_fullpath) {
848#endif
849			pmcpl_ct_prevfn = child->pct_image->pi_fullpath;
850			fprintf(args.pa_graphfile, "cob=%s\n",
851			    pmcstat_string_unintern(pmcpl_ct_prevfn));
852#if PMCPL_CT_OPTIMIZEFN
853		}
854#endif
855		/* Child function name. */
856		addr = child->pct_image->pi_vaddr + child->pct_func;
857		/* Child function source file. */
858		if (pmcstat_image_addr2line(child->pct_image, addr,
859		    sourcefile, sizeof(sourcefile), &line,
860		    funcname, sizeof(funcname))) {
861			fprintf(args.pa_graphfile, "cfn=%s\n", funcname);
862			fprintf(args.pa_graphfile, "cfl=%s\n", sourcefile);
863		} else {
864			sym = pmcstat_symbol_search(child->pct_image,
865			    child->pct_func);
866			if (sym != NULL)
867				fprintf(args.pa_graphfile, "cfn=%s\n",
868				    pmcstat_string_unintern(sym->ps_name));
869			else
870				fprintf(args.pa_graphfile, "cfn=%p\n", (void *)addr);
871		}
872
873		/* Child function address, line and call count. */
874		fprintf(args.pa_graphfile, "calls=%u %p %u\n",
875		    ct->pct_arc[i].pcta_call, (void *)addr, line);
876
877		if (ct->pct_image != NULL) {
878			/* Call address, line, sample. */
879			addr = ct->pct_image->pi_vaddr + ct->pct_func;
880			line = 0;
881			pmcstat_image_addr2line(ct->pct_image, addr, sourcefile,
882			    sizeof(sourcefile), &line,
883			    funcname, sizeof(funcname));
884			fprintf(args.pa_graphfile, "%p %u", (void *)addr, line);
885		}
886		else
887			fprintf(args.pa_graphfile, "* *");
888		for (j = 0; j<pmcstat_npmcs; j++)
889			fprintf(args.pa_graphfile, " %u",
890			    PMCPL_CT_SAMPLE(j, &ct->pct_arc[i].pcta_samples));
891		fprintf(args.pa_graphfile, "\n");
892	}
893}
894
895/*
896 * Clean the PMC name for Kcachegrind formula
897 */
898
899static void
900pmcpl_ct_fixup_pmcname(char *s)
901{
902	char *p;
903
904	for (p = s; *p; p++)
905		if (!isalnum(*p))
906			*p = '_';
907}
908
909/*
910 * Print a calltree (KCachegrind) for all PMCs.
911 */
912
913static void
914pmcpl_ct_print(void)
915{
916	int n, i;
917	struct pmcpl_ct_node_hash *pch;
918	struct pmcpl_ct_sample rsamples;
919	char name[40];
920
921	pmcpl_ct_samples_root(&rsamples);
922	pmcpl_ct_prevfn = NULL;
923
924	fprintf(args.pa_graphfile,
925		"version: 1\n"
926		"creator: pmcstat\n"
927		"positions: instr line\n"
928		"events:");
929	for (i=0; i<pmcstat_npmcs; i++) {
930		snprintf(name, sizeof(name), "%s_%d",
931		    pmcstat_pmcindex_to_name(i), i);
932		pmcpl_ct_fixup_pmcname(name);
933		fprintf(args.pa_graphfile, " %s", name);
934	}
935	fprintf(args.pa_graphfile, "\nsummary:");
936	for (i=0; i<pmcstat_npmcs ; i++)
937		fprintf(args.pa_graphfile, " %u",
938		    PMCPL_CT_SAMPLE(i, &rsamples));
939	fprintf(args.pa_graphfile, "\n\n");
940
941	/*
942	 * Fake root node
943	 */
944	fprintf(args.pa_graphfile, "ob=FreeBSD\n");
945	fprintf(args.pa_graphfile, "fn=ROOT\n");
946	fprintf(args.pa_graphfile, "* *");
947	for (i = 0; i<pmcstat_npmcs ; i++)
948		fprintf(args.pa_graphfile, " 0");
949	fprintf(args.pa_graphfile, "\n");
950	pmcpl_ct_node_printchild(pmcpl_ct_root);
951
952	for (n = 0; n < PMCSTAT_NHASH; n++)
953		LIST_FOREACH(pch, &pmcpl_ct_node_hash[n], pch_next) {
954			pmcpl_ct_node_printself(pch->pch_ctnode);
955			pmcpl_ct_node_printchild(pch->pch_ctnode);
956	}
957
958	pmcpl_ct_samples_free(&rsamples);
959}
960
961int
962pmcpl_ct_configure(char *opt)
963{
964
965	if (strncmp(opt, "skiplink=", 9) == 0) {
966		pmcstat_skiplink = atoi(opt+9);
967	} else
968		return (0);
969
970	return (1);
971}
972
973int
974pmcpl_ct_init(void)
975{
976	int i;
977
978	pmcpl_ct_prevfn = NULL;
979	pmcpl_ct_root = pmcpl_ct_node_allocate(NULL, 0);
980
981	for (i = 0; i < PMCSTAT_NHASH; i++)
982		LIST_INIT(&pmcpl_ct_node_hash[i]);
983
984	pmcpl_ct_samples_init(&pmcpl_ct_callid);
985
986	return (0);
987}
988
989void
990pmcpl_ct_shutdown(FILE *mf)
991{
992	int i;
993	struct pmcpl_ct_node_hash *pch, *pchtmp;
994
995	(void) mf;
996
997	if (args.pa_flags & FLAG_DO_CALLGRAPHS)
998		pmcpl_ct_print();
999
1000	/*
1001	 * Free memory.
1002	 */
1003
1004	for (i = 0; i < PMCSTAT_NHASH; i++) {
1005		LIST_FOREACH_SAFE(pch, &pmcpl_ct_node_hash[i], pch_next,
1006		    pchtmp) {
1007			pmcpl_ct_node_free(pch->pch_ctnode);
1008			free(pch);
1009		}
1010	}
1011
1012	pmcpl_ct_node_free(pmcpl_ct_root);
1013	pmcpl_ct_root = NULL;
1014
1015	pmcpl_ct_samples_free(&pmcpl_ct_callid);
1016}
1017
1018