fbt_powerpc.c revision 260670
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 *
21 * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
22 * Portions Copyright 2013 Justin Hibbits jhibbits@freebsd.org
23 *
24 * $FreeBSD: stable/10/sys/cddl/dev/fbt/fbt_powerpc.c 260670 2014-01-15 05:19:37Z jhibbits $
25 *
26 */
27
28/*
29 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
30 * Use is subject to license terms.
31 */
32
33#include <sys/cdefs.h>
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/conf.h>
37#include <sys/cpuvar.h>
38#include <sys/fcntl.h>
39#include <sys/filio.h>
40#include <sys/kdb.h>
41#include <sys/kernel.h>
42#include <sys/kmem.h>
43#include <sys/kthread.h>
44#include <sys/limits.h>
45#include <sys/linker.h>
46#include <sys/lock.h>
47#include <sys/malloc.h>
48#include <sys/module.h>
49#include <sys/mutex.h>
50#include <sys/pcpu.h>
51#include <sys/poll.h>
52#include <sys/proc.h>
53#include <sys/selinfo.h>
54#include <sys/smp.h>
55#include <sys/syscall.h>
56#include <sys/sysent.h>
57#include <sys/sysproto.h>
58#include <sys/uio.h>
59#include <sys/unistd.h>
60#include <machine/md_var.h>
61#include <machine/stdarg.h>
62
63#include <sys/dtrace.h>
64#include <sys/dtrace_bsd.h>
65
66static MALLOC_DEFINE(M_FBT, "fbt", "Function Boundary Tracing");
67
68#define FBT_PATCHVAL		0x7c810808
69#define FBT_MFLR_R0		0x7c0802a6
70#define FBT_MTLR_R0		0x7c0803a6
71#define FBT_BLR			0x4e800020
72#define FBT_BCTR		0x4e800030
73#define FBT_BRANCH		0x48000000
74#define FBT_BR_MASK		0x03fffffc
75#define FBT_IS_JUMP(instr)	((instr & ~FBT_BR_MASK) == FBT_BRANCH)
76
77static d_open_t	fbt_open;
78static int	fbt_unload(void);
79static void	fbt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
80static void	fbt_provide_module(void *, modctl_t *);
81static void	fbt_destroy(void *, dtrace_id_t, void *);
82static void	fbt_enable(void *, dtrace_id_t, void *);
83static void	fbt_disable(void *, dtrace_id_t, void *);
84static void	fbt_load(void *);
85static void	fbt_suspend(void *, dtrace_id_t, void *);
86static void	fbt_resume(void *, dtrace_id_t, void *);
87
88#define	FBT_ENTRY	"entry"
89#define	FBT_RETURN	"return"
90#define	FBT_ADDR2NDX(addr)	((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
91#define	FBT_PROBETAB_SIZE	0x8000		/* 32k entries -- 128K total */
92
93static struct cdevsw fbt_cdevsw = {
94	.d_version	= D_VERSION,
95	.d_open		= fbt_open,
96	.d_name		= "fbt",
97};
98
99static dtrace_pattr_t fbt_attr = {
100{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
101{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
102{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
103{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
104{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
105};
106
107static dtrace_pops_t fbt_pops = {
108	NULL,
109	fbt_provide_module,
110	fbt_enable,
111	fbt_disable,
112	fbt_suspend,
113	fbt_resume,
114	fbt_getargdesc,
115	NULL,
116	NULL,
117	fbt_destroy
118};
119
120typedef struct fbt_probe {
121	struct fbt_probe *fbtp_hashnext;
122	uint32_t	*fbtp_patchpoint;
123	int8_t		fbtp_rval;
124	uint32_t	fbtp_patchval;
125	uint32_t	fbtp_savedval;
126	uintptr_t	fbtp_roffset;
127	dtrace_id_t	fbtp_id;
128	const char	*fbtp_name;
129	modctl_t	*fbtp_ctl;
130	int		fbtp_loadcnt;
131	int		fbtp_primary;
132	int		fbtp_invop_cnt;
133	int		fbtp_symindx;
134	struct fbt_probe *fbtp_next;
135} fbt_probe_t;
136
137static struct cdev		*fbt_cdev;
138static dtrace_provider_id_t	fbt_id;
139static fbt_probe_t		**fbt_probetab;
140static int			fbt_probetab_size;
141static int			fbt_probetab_mask;
142static int			fbt_verbose = 0;
143
144static int
145fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval)
146{
147	struct trapframe *frame = (struct trapframe *)stack;
148	solaris_cpu_t *cpu = &solaris_cpu[curcpu];
149	fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
150	uintptr_t tmp;
151
152	for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
153		if ((uintptr_t)fbt->fbtp_patchpoint == addr) {
154			fbt->fbtp_invop_cnt++;
155			if (fbt->fbtp_roffset == 0) {
156				cpu->cpu_dtrace_caller = addr;
157
158				dtrace_probe(fbt->fbtp_id, frame->fixreg[3],
159				    frame->fixreg[4], frame->fixreg[5],
160				    frame->fixreg[6], frame->fixreg[7]);
161
162				cpu->cpu_dtrace_caller = 0;
163			} else {
164
165				dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset,
166				    rval, 0, 0, 0);
167				/*
168				 * The caller doesn't have the fbt item, so
169				 * fixup tail calls here.
170				 */
171				if (fbt->fbtp_rval == DTRACE_INVOP_JUMP) {
172					frame->srr0 = (uintptr_t)fbt->fbtp_patchpoint;
173					tmp = fbt->fbtp_savedval & FBT_BR_MASK;
174					/* Sign extend. */
175					if (tmp & 0x02000000)
176#ifdef __powerpc64__
177						tmp |= 0xfffffffffc000000ULL;
178#else
179						tmp |= 0xfc000000UL;
180#endif
181					frame->srr0 += tmp;
182				}
183				cpu->cpu_dtrace_caller = 0;
184			}
185
186			return (fbt->fbtp_rval);
187		}
188	}
189
190	return (0);
191}
192
193static int
194fbt_provide_module_function(linker_file_t lf, int symindx,
195    linker_symval_t *symval, void *opaque)
196{
197	char *modname = opaque;
198	const char *name = symval->name;
199	fbt_probe_t *fbt, *retfbt;
200	int j;
201	u_int32_t *instr, *limit;
202
203	/* PowerPC64 uses '.' prefixes on symbol names, ignore it. */
204	if (name[0] == '.')
205		name++;
206
207	if (strncmp(name, "dtrace_", 7) == 0 &&
208	    strncmp(name, "dtrace_safe_", 12) != 0) {
209		/*
210		 * Anything beginning with "dtrace_" may be called
211		 * from probe context unless it explicitly indicates
212		 * that it won't be called from probe context by
213		 * using the prefix "dtrace_safe_".
214		 */
215		return (0);
216	}
217
218	if (name[0] == '_' && name[1] == '_')
219		return (0);
220
221	instr = (u_int32_t *) symval->value;
222	limit = (u_int32_t *) (symval->value + symval->size);
223
224	for (; instr < limit; instr++)
225		if (*instr == FBT_MFLR_R0)
226			break;
227
228	if (*instr != FBT_MFLR_R0)
229		return (0);
230
231	fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO);
232	fbt->fbtp_name = name;
233	fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
234	    name, FBT_ENTRY, 3, fbt);
235	fbt->fbtp_patchpoint = instr;
236	fbt->fbtp_ctl = lf;
237	fbt->fbtp_loadcnt = lf->loadcnt;
238	fbt->fbtp_savedval = *instr;
239	fbt->fbtp_patchval = FBT_PATCHVAL;
240	fbt->fbtp_rval = DTRACE_INVOP_MFLR_R0;
241	fbt->fbtp_symindx = symindx;
242
243	fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
244	fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
245
246	lf->fbt_nentries++;
247
248	retfbt = NULL;
249again:
250	if (instr >= limit)
251		return (0);
252
253	/*
254	 * We (desperately) want to avoid erroneously instrumenting a
255	 * jump table To determine if we're looking at a true instruction
256	 * sequence or an inline jump table that happens to contain the same
257	 * byte sequences, we resort to some heuristic sleeze:  we treat this
258	 * instruction as being contained within a pointer, and see if that
259	 * pointer points to within the body of the function.  If it does, we
260	 * refuse to instrument it.
261	 */
262	{
263		uint32_t *ptr;
264
265		ptr = *(uint32_t **)instr;
266
267		if (ptr >= (uint32_t *) symval->value && ptr < limit) {
268			instr++;
269			goto again;
270		}
271	}
272
273	if (*instr != FBT_MTLR_R0) {
274		instr++;
275		goto again;
276	}
277
278	instr++;
279
280	for (j = 0; j < 12 && instr < limit; j++, instr++) {
281		if ((*instr == FBT_BCTR) || (*instr == FBT_BLR) ||
282		    FBT_IS_JUMP(*instr))
283			break;
284	}
285
286	if (!(*instr == FBT_BCTR || *instr == FBT_BLR || FBT_IS_JUMP(*instr)))
287		goto again;
288
289	/*
290	 * We have a winner!
291	 */
292	fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO);
293	fbt->fbtp_name = name;
294
295	if (retfbt == NULL) {
296		fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
297		    name, FBT_RETURN, 5, fbt);
298	} else {
299		retfbt->fbtp_next = fbt;
300		fbt->fbtp_id = retfbt->fbtp_id;
301	}
302
303	retfbt = fbt;
304	fbt->fbtp_patchpoint = instr;
305	fbt->fbtp_ctl = lf;
306	fbt->fbtp_loadcnt = lf->loadcnt;
307	fbt->fbtp_symindx = symindx;
308
309	if (*instr == FBT_BCTR)
310		fbt->fbtp_rval = DTRACE_INVOP_BCTR;
311	else if (*instr == FBT_BLR)
312		fbt->fbtp_rval = DTRACE_INVOP_RET;
313	else
314		fbt->fbtp_rval = DTRACE_INVOP_JUMP;
315
316	fbt->fbtp_savedval = *instr;
317	fbt->fbtp_patchval = FBT_PATCHVAL;
318	fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
319	fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
320
321	lf->fbt_nentries++;
322
323	instr += 4;
324	goto again;
325}
326
327static void
328fbt_provide_module(void *arg, modctl_t *lf)
329{
330	char modname[MAXPATHLEN];
331	int i;
332	size_t len;
333
334	strlcpy(modname, lf->filename, sizeof(modname));
335	len = strlen(modname);
336	if (len > 3 && strcmp(modname + len - 3, ".ko") == 0)
337		modname[len - 3] = '\0';
338
339	/*
340	 * Employees of dtrace and their families are ineligible.  Void
341	 * where prohibited.
342	 */
343	if (strcmp(modname, "dtrace") == 0)
344		return;
345
346	/*
347	 * The cyclic timer subsystem can be built as a module and DTrace
348	 * depends on that, so it is ineligible too.
349	 */
350	if (strcmp(modname, "cyclic") == 0)
351		return;
352
353	/*
354	 * To register with DTrace, a module must list 'dtrace' as a
355	 * dependency in order for the kernel linker to resolve
356	 * symbols like dtrace_register(). All modules with such a
357	 * dependency are ineligible for FBT tracing.
358	 */
359	for (i = 0; i < lf->ndeps; i++)
360		if (strncmp(lf->deps[i]->filename, "dtrace", 6) == 0)
361			return;
362
363	if (lf->fbt_nentries) {
364		/*
365		 * This module has some FBT entries allocated; we're afraid
366		 * to screw with it.
367		 */
368		return;
369	}
370
371	/*
372	 * List the functions in the module and the symbol values.
373	 */
374	(void) linker_file_function_listall(lf, fbt_provide_module_function, modname);
375}
376
377static void
378fbt_destroy(void *arg, dtrace_id_t id, void *parg)
379{
380	fbt_probe_t *fbt = parg, *next, *hash, *last;
381	modctl_t *ctl;
382	int ndx;
383
384	do {
385		ctl = fbt->fbtp_ctl;
386
387		ctl->fbt_nentries--;
388
389		/*
390		 * Now we need to remove this probe from the fbt_probetab.
391		 */
392		ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint);
393		last = NULL;
394		hash = fbt_probetab[ndx];
395
396		while (hash != fbt) {
397			ASSERT(hash != NULL);
398			last = hash;
399			hash = hash->fbtp_hashnext;
400		}
401
402		if (last != NULL) {
403			last->fbtp_hashnext = fbt->fbtp_hashnext;
404		} else {
405			fbt_probetab[ndx] = fbt->fbtp_hashnext;
406		}
407
408		next = fbt->fbtp_next;
409		free(fbt, M_FBT);
410
411		fbt = next;
412	} while (fbt != NULL);
413}
414
415static void
416fbt_enable(void *arg, dtrace_id_t id, void *parg)
417{
418	fbt_probe_t *fbt = parg;
419	modctl_t *ctl = fbt->fbtp_ctl;
420
421	ctl->nenabled++;
422
423	/*
424	 * Now check that our modctl has the expected load count.  If it
425	 * doesn't, this module must have been unloaded and reloaded -- and
426	 * we're not going to touch it.
427	 */
428	if (ctl->loadcnt != fbt->fbtp_loadcnt) {
429		if (fbt_verbose) {
430			printf("fbt is failing for probe %s "
431			    "(module %s reloaded)",
432			    fbt->fbtp_name, ctl->filename);
433		}
434
435		return;
436	}
437
438	for (; fbt != NULL; fbt = fbt->fbtp_next) {
439		*fbt->fbtp_patchpoint = fbt->fbtp_patchval;
440		__syncicache(fbt->fbtp_patchpoint, 4);
441	}
442}
443
444static void
445fbt_disable(void *arg, dtrace_id_t id, void *parg)
446{
447	fbt_probe_t *fbt = parg;
448	modctl_t *ctl = fbt->fbtp_ctl;
449
450	ASSERT(ctl->nenabled > 0);
451	ctl->nenabled--;
452
453	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
454		return;
455
456	for (; fbt != NULL; fbt = fbt->fbtp_next) {
457		*fbt->fbtp_patchpoint = fbt->fbtp_savedval;
458		__syncicache(fbt->fbtp_patchpoint, 4);
459	}
460}
461
462static void
463fbt_suspend(void *arg, dtrace_id_t id, void *parg)
464{
465	fbt_probe_t *fbt = parg;
466	modctl_t *ctl = fbt->fbtp_ctl;
467
468	ASSERT(ctl->nenabled > 0);
469
470	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
471		return;
472
473	for (; fbt != NULL; fbt = fbt->fbtp_next) {
474		*fbt->fbtp_patchpoint = fbt->fbtp_savedval;
475		__syncicache(fbt->fbtp_patchpoint, 4);
476	}
477}
478
479static void
480fbt_resume(void *arg, dtrace_id_t id, void *parg)
481{
482	fbt_probe_t *fbt = parg;
483	modctl_t *ctl = fbt->fbtp_ctl;
484
485	ASSERT(ctl->nenabled > 0);
486
487	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
488		return;
489
490	for (; fbt != NULL; fbt = fbt->fbtp_next) {
491		*fbt->fbtp_patchpoint = fbt->fbtp_patchval;
492		__syncicache(fbt->fbtp_patchpoint, 4);
493	}
494}
495
496static int
497fbt_ctfoff_init(modctl_t *lf, linker_ctf_t *lc)
498{
499	const Elf_Sym *symp = lc->symtab;;
500	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
501	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
502	int i;
503	uint32_t *ctfoff;
504	uint32_t objtoff = hp->cth_objtoff;
505	uint32_t funcoff = hp->cth_funcoff;
506	ushort_t info;
507	ushort_t vlen;
508
509	/* Sanity check. */
510	if (hp->cth_magic != CTF_MAGIC) {
511		printf("Bad magic value in CTF data of '%s'\n",lf->pathname);
512		return (EINVAL);
513	}
514
515	if (lc->symtab == NULL) {
516		printf("No symbol table in '%s'\n",lf->pathname);
517		return (EINVAL);
518	}
519
520	if ((ctfoff = malloc(sizeof(uint32_t) * lc->nsym, M_LINKER, M_WAITOK)) == NULL)
521		return (ENOMEM);
522
523	*lc->ctfoffp = ctfoff;
524
525	for (i = 0; i < lc->nsym; i++, ctfoff++, symp++) {
526		if (symp->st_name == 0 || symp->st_shndx == SHN_UNDEF) {
527			*ctfoff = 0xffffffff;
528			continue;
529		}
530
531		switch (ELF_ST_TYPE(symp->st_info)) {
532		case STT_OBJECT:
533			if (objtoff >= hp->cth_funcoff ||
534                            (symp->st_shndx == SHN_ABS && symp->st_value == 0)) {
535				*ctfoff = 0xffffffff;
536                                break;
537                        }
538
539                        *ctfoff = objtoff;
540                        objtoff += sizeof (ushort_t);
541			break;
542
543		case STT_FUNC:
544			if (funcoff >= hp->cth_typeoff) {
545				*ctfoff = 0xffffffff;
546				break;
547			}
548
549			*ctfoff = funcoff;
550
551			info = *((const ushort_t *)(ctfdata + funcoff));
552			vlen = CTF_INFO_VLEN(info);
553
554			/*
555			 * If we encounter a zero pad at the end, just skip it.
556			 * Otherwise skip over the function and its return type
557			 * (+2) and the argument list (vlen).
558			 */
559			if (CTF_INFO_KIND(info) == CTF_K_UNKNOWN && vlen == 0)
560				funcoff += sizeof (ushort_t); /* skip pad */
561			else
562				funcoff += sizeof (ushort_t) * (vlen + 2);
563			break;
564
565		default:
566			*ctfoff = 0xffffffff;
567			break;
568		}
569	}
570
571	return (0);
572}
573
574static ssize_t
575fbt_get_ctt_size(uint8_t version, const ctf_type_t *tp, ssize_t *sizep,
576    ssize_t *incrementp)
577{
578	ssize_t size, increment;
579
580	if (version > CTF_VERSION_1 &&
581	    tp->ctt_size == CTF_LSIZE_SENT) {
582		size = CTF_TYPE_LSIZE(tp);
583		increment = sizeof (ctf_type_t);
584	} else {
585		size = tp->ctt_size;
586		increment = sizeof (ctf_stype_t);
587	}
588
589	if (sizep)
590		*sizep = size;
591	if (incrementp)
592		*incrementp = increment;
593
594	return (size);
595}
596
597static int
598fbt_typoff_init(linker_ctf_t *lc)
599{
600	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
601	const ctf_type_t *tbuf;
602	const ctf_type_t *tend;
603	const ctf_type_t *tp;
604	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
605	int ctf_typemax = 0;
606	uint32_t *xp;
607	ulong_t pop[CTF_K_MAX + 1] = { 0 };
608
609
610	/* Sanity check. */
611	if (hp->cth_magic != CTF_MAGIC)
612		return (EINVAL);
613
614	tbuf = (const ctf_type_t *) (ctfdata + hp->cth_typeoff);
615	tend = (const ctf_type_t *) (ctfdata + hp->cth_stroff);
616
617	int child = hp->cth_parname != 0;
618
619	/*
620	 * We make two passes through the entire type section.  In this first
621	 * pass, we count the number of each type and the total number of types.
622	 */
623	for (tp = tbuf; tp < tend; ctf_typemax++) {
624		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
625		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
626		ssize_t size, increment;
627
628		size_t vbytes;
629		uint_t n;
630
631		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
632
633		switch (kind) {
634		case CTF_K_INTEGER:
635		case CTF_K_FLOAT:
636			vbytes = sizeof (uint_t);
637			break;
638		case CTF_K_ARRAY:
639			vbytes = sizeof (ctf_array_t);
640			break;
641		case CTF_K_FUNCTION:
642			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
643			break;
644		case CTF_K_STRUCT:
645		case CTF_K_UNION:
646			if (size < CTF_LSTRUCT_THRESH) {
647				ctf_member_t *mp = (ctf_member_t *)
648				    ((uintptr_t)tp + increment);
649
650				vbytes = sizeof (ctf_member_t) * vlen;
651				for (n = vlen; n != 0; n--, mp++)
652					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
653			} else {
654				ctf_lmember_t *lmp = (ctf_lmember_t *)
655				    ((uintptr_t)tp + increment);
656
657				vbytes = sizeof (ctf_lmember_t) * vlen;
658				for (n = vlen; n != 0; n--, lmp++)
659					child |=
660					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
661			}
662			break;
663		case CTF_K_ENUM:
664			vbytes = sizeof (ctf_enum_t) * vlen;
665			break;
666		case CTF_K_FORWARD:
667			/*
668			 * For forward declarations, ctt_type is the CTF_K_*
669			 * kind for the tag, so bump that population count too.
670			 * If ctt_type is unknown, treat the tag as a struct.
671			 */
672			if (tp->ctt_type == CTF_K_UNKNOWN ||
673			    tp->ctt_type >= CTF_K_MAX)
674				pop[CTF_K_STRUCT]++;
675			else
676				pop[tp->ctt_type]++;
677			/*FALLTHRU*/
678		case CTF_K_UNKNOWN:
679			vbytes = 0;
680			break;
681		case CTF_K_POINTER:
682		case CTF_K_TYPEDEF:
683		case CTF_K_VOLATILE:
684		case CTF_K_CONST:
685		case CTF_K_RESTRICT:
686			child |= CTF_TYPE_ISCHILD(tp->ctt_type);
687			vbytes = 0;
688			break;
689		default:
690			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
691			return (EIO);
692		}
693		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
694		pop[kind]++;
695	}
696
697	/* account for a sentinel value below */
698	ctf_typemax++;
699	*lc->typlenp = ctf_typemax;
700
701	if ((xp = malloc(sizeof(uint32_t) * ctf_typemax, M_LINKER, M_ZERO | M_WAITOK)) == NULL)
702		return (ENOMEM);
703
704	*lc->typoffp = xp;
705
706	/* type id 0 is used as a sentinel value */
707	*xp++ = 0;
708
709	/*
710	 * In the second pass, fill in the type offset.
711	 */
712	for (tp = tbuf; tp < tend; xp++) {
713		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
714		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
715		ssize_t size, increment;
716
717		size_t vbytes;
718		uint_t n;
719
720		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
721
722		switch (kind) {
723		case CTF_K_INTEGER:
724		case CTF_K_FLOAT:
725			vbytes = sizeof (uint_t);
726			break;
727		case CTF_K_ARRAY:
728			vbytes = sizeof (ctf_array_t);
729			break;
730		case CTF_K_FUNCTION:
731			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
732			break;
733		case CTF_K_STRUCT:
734		case CTF_K_UNION:
735			if (size < CTF_LSTRUCT_THRESH) {
736				ctf_member_t *mp = (ctf_member_t *)
737				    ((uintptr_t)tp + increment);
738
739				vbytes = sizeof (ctf_member_t) * vlen;
740				for (n = vlen; n != 0; n--, mp++)
741					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
742			} else {
743				ctf_lmember_t *lmp = (ctf_lmember_t *)
744				    ((uintptr_t)tp + increment);
745
746				vbytes = sizeof (ctf_lmember_t) * vlen;
747				for (n = vlen; n != 0; n--, lmp++)
748					child |=
749					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
750			}
751			break;
752		case CTF_K_ENUM:
753			vbytes = sizeof (ctf_enum_t) * vlen;
754			break;
755		case CTF_K_FORWARD:
756		case CTF_K_UNKNOWN:
757			vbytes = 0;
758			break;
759		case CTF_K_POINTER:
760		case CTF_K_TYPEDEF:
761		case CTF_K_VOLATILE:
762		case CTF_K_CONST:
763		case CTF_K_RESTRICT:
764			vbytes = 0;
765			break;
766		default:
767			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
768			return (EIO);
769		}
770		*xp = (uint32_t)((uintptr_t) tp - (uintptr_t) ctfdata);
771		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
772	}
773
774	return (0);
775}
776
777/*
778 * CTF Declaration Stack
779 *
780 * In order to implement ctf_type_name(), we must convert a type graph back
781 * into a C type declaration.  Unfortunately, a type graph represents a storage
782 * class ordering of the type whereas a type declaration must obey the C rules
783 * for operator precedence, and the two orderings are frequently in conflict.
784 * For example, consider these CTF type graphs and their C declarations:
785 *
786 * CTF_K_POINTER -> CTF_K_FUNCTION -> CTF_K_INTEGER  : int (*)()
787 * CTF_K_POINTER -> CTF_K_ARRAY -> CTF_K_INTEGER     : int (*)[]
788 *
789 * In each case, parentheses are used to raise operator * to higher lexical
790 * precedence, so the string form of the C declaration cannot be constructed by
791 * walking the type graph links and forming the string from left to right.
792 *
793 * The functions in this file build a set of stacks from the type graph nodes
794 * corresponding to the C operator precedence levels in the appropriate order.
795 * The code in ctf_type_name() can then iterate over the levels and nodes in
796 * lexical precedence order and construct the final C declaration string.
797 */
798typedef struct ctf_list {
799	struct ctf_list *l_prev; /* previous pointer or tail pointer */
800	struct ctf_list *l_next; /* next pointer or head pointer */
801} ctf_list_t;
802
803#define	ctf_list_prev(elem)	((void *)(((ctf_list_t *)(elem))->l_prev))
804#define	ctf_list_next(elem)	((void *)(((ctf_list_t *)(elem))->l_next))
805
806typedef enum {
807	CTF_PREC_BASE,
808	CTF_PREC_POINTER,
809	CTF_PREC_ARRAY,
810	CTF_PREC_FUNCTION,
811	CTF_PREC_MAX
812} ctf_decl_prec_t;
813
814typedef struct ctf_decl_node {
815	ctf_list_t cd_list;			/* linked list pointers */
816	ctf_id_t cd_type;			/* type identifier */
817	uint_t cd_kind;				/* type kind */
818	uint_t cd_n;				/* type dimension if array */
819} ctf_decl_node_t;
820
821typedef struct ctf_decl {
822	ctf_list_t cd_nodes[CTF_PREC_MAX];	/* declaration node stacks */
823	int cd_order[CTF_PREC_MAX];		/* storage order of decls */
824	ctf_decl_prec_t cd_qualp;		/* qualifier precision */
825	ctf_decl_prec_t cd_ordp;		/* ordered precision */
826	char *cd_buf;				/* buffer for output */
827	char *cd_ptr;				/* buffer location */
828	char *cd_end;				/* buffer limit */
829	size_t cd_len;				/* buffer space required */
830	int cd_err;				/* saved error value */
831} ctf_decl_t;
832
833/*
834 * Simple doubly-linked list append routine.  This implementation assumes that
835 * each list element contains an embedded ctf_list_t as the first member.
836 * An additional ctf_list_t is used to store the head (l_next) and tail
837 * (l_prev) pointers.  The current head and tail list elements have their
838 * previous and next pointers set to NULL, respectively.
839 */
840static void
841ctf_list_append(ctf_list_t *lp, void *new)
842{
843	ctf_list_t *p = lp->l_prev;	/* p = tail list element */
844	ctf_list_t *q = new;		/* q = new list element */
845
846	lp->l_prev = q;
847	q->l_prev = p;
848	q->l_next = NULL;
849
850	if (p != NULL)
851		p->l_next = q;
852	else
853		lp->l_next = q;
854}
855
856/*
857 * Prepend the specified existing element to the given ctf_list_t.  The
858 * existing pointer should be pointing at a struct with embedded ctf_list_t.
859 */
860static void
861ctf_list_prepend(ctf_list_t *lp, void *new)
862{
863	ctf_list_t *p = new;		/* p = new list element */
864	ctf_list_t *q = lp->l_next;	/* q = head list element */
865
866	lp->l_next = p;
867	p->l_prev = NULL;
868	p->l_next = q;
869
870	if (q != NULL)
871		q->l_prev = p;
872	else
873		lp->l_prev = p;
874}
875
876static void
877ctf_decl_init(ctf_decl_t *cd, char *buf, size_t len)
878{
879	int i;
880
881	bzero(cd, sizeof (ctf_decl_t));
882
883	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++)
884		cd->cd_order[i] = CTF_PREC_BASE - 1;
885
886	cd->cd_qualp = CTF_PREC_BASE;
887	cd->cd_ordp = CTF_PREC_BASE;
888
889	cd->cd_buf = buf;
890	cd->cd_ptr = buf;
891	cd->cd_end = buf + len;
892}
893
894static void
895ctf_decl_fini(ctf_decl_t *cd)
896{
897	ctf_decl_node_t *cdp, *ndp;
898	int i;
899
900	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++) {
901		for (cdp = ctf_list_next(&cd->cd_nodes[i]);
902		    cdp != NULL; cdp = ndp) {
903			ndp = ctf_list_next(cdp);
904			free(cdp, M_FBT);
905		}
906	}
907}
908
909static const ctf_type_t *
910ctf_lookup_by_id(linker_ctf_t *lc, ctf_id_t type)
911{
912	const ctf_type_t *tp;
913	uint32_t offset;
914	uint32_t *typoff = *lc->typoffp;
915
916	if (type >= *lc->typlenp) {
917		printf("%s(%d): type %d exceeds max %ld\n",__func__,__LINE__,(int) type,*lc->typlenp);
918		return(NULL);
919	}
920
921	/* Check if the type isn't cross-referenced. */
922	if ((offset = typoff[type]) == 0) {
923		printf("%s(%d): type %d isn't cross referenced\n",__func__,__LINE__, (int) type);
924		return(NULL);
925	}
926
927	tp = (const ctf_type_t *)(lc->ctftab + offset + sizeof(ctf_header_t));
928
929	return (tp);
930}
931
932static void
933fbt_array_info(linker_ctf_t *lc, ctf_id_t type, ctf_arinfo_t *arp)
934{
935	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
936	const ctf_type_t *tp;
937	const ctf_array_t *ap;
938	ssize_t increment;
939
940	bzero(arp, sizeof(*arp));
941
942	if ((tp = ctf_lookup_by_id(lc, type)) == NULL)
943		return;
944
945	if (CTF_INFO_KIND(tp->ctt_info) != CTF_K_ARRAY)
946		return;
947
948	(void) fbt_get_ctt_size(hp->cth_version, tp, NULL, &increment);
949
950	ap = (const ctf_array_t *)((uintptr_t)tp + increment);
951	arp->ctr_contents = ap->cta_contents;
952	arp->ctr_index = ap->cta_index;
953	arp->ctr_nelems = ap->cta_nelems;
954}
955
956static const char *
957ctf_strptr(linker_ctf_t *lc, int name)
958{
959	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;;
960	const char *strp = "";
961
962	if (name < 0 || name >= hp->cth_strlen)
963		return(strp);
964
965	strp = (const char *)(lc->ctftab + hp->cth_stroff + name + sizeof(ctf_header_t));
966
967	return (strp);
968}
969
970static void
971ctf_decl_push(ctf_decl_t *cd, linker_ctf_t *lc, ctf_id_t type)
972{
973	ctf_decl_node_t *cdp;
974	ctf_decl_prec_t prec;
975	uint_t kind, n = 1;
976	int is_qual = 0;
977
978	const ctf_type_t *tp;
979	ctf_arinfo_t ar;
980
981	if ((tp = ctf_lookup_by_id(lc, type)) == NULL) {
982		cd->cd_err = ENOENT;
983		return;
984	}
985
986	switch (kind = CTF_INFO_KIND(tp->ctt_info)) {
987	case CTF_K_ARRAY:
988		fbt_array_info(lc, type, &ar);
989		ctf_decl_push(cd, lc, ar.ctr_contents);
990		n = ar.ctr_nelems;
991		prec = CTF_PREC_ARRAY;
992		break;
993
994	case CTF_K_TYPEDEF:
995		if (ctf_strptr(lc, tp->ctt_name)[0] == '\0') {
996			ctf_decl_push(cd, lc, tp->ctt_type);
997			return;
998		}
999		prec = CTF_PREC_BASE;
1000		break;
1001
1002	case CTF_K_FUNCTION:
1003		ctf_decl_push(cd, lc, tp->ctt_type);
1004		prec = CTF_PREC_FUNCTION;
1005		break;
1006
1007	case CTF_K_POINTER:
1008		ctf_decl_push(cd, lc, tp->ctt_type);
1009		prec = CTF_PREC_POINTER;
1010		break;
1011
1012	case CTF_K_VOLATILE:
1013	case CTF_K_CONST:
1014	case CTF_K_RESTRICT:
1015		ctf_decl_push(cd, lc, tp->ctt_type);
1016		prec = cd->cd_qualp;
1017		is_qual++;
1018		break;
1019
1020	default:
1021		prec = CTF_PREC_BASE;
1022	}
1023
1024	if ((cdp = malloc(sizeof (ctf_decl_node_t), M_FBT, M_WAITOK)) == NULL) {
1025		cd->cd_err = EAGAIN;
1026		return;
1027	}
1028
1029	cdp->cd_type = type;
1030	cdp->cd_kind = kind;
1031	cdp->cd_n = n;
1032
1033	if (ctf_list_next(&cd->cd_nodes[prec]) == NULL)
1034		cd->cd_order[prec] = cd->cd_ordp++;
1035
1036	/*
1037	 * Reset cd_qualp to the highest precedence level that we've seen so
1038	 * far that can be qualified (CTF_PREC_BASE or CTF_PREC_POINTER).
1039	 */
1040	if (prec > cd->cd_qualp && prec < CTF_PREC_ARRAY)
1041		cd->cd_qualp = prec;
1042
1043	/*
1044	 * C array declarators are ordered inside out so prepend them.  Also by
1045	 * convention qualifiers of base types precede the type specifier (e.g.
1046	 * const int vs. int const) even though the two forms are equivalent.
1047	 */
1048	if (kind == CTF_K_ARRAY || (is_qual && prec == CTF_PREC_BASE))
1049		ctf_list_prepend(&cd->cd_nodes[prec], cdp);
1050	else
1051		ctf_list_append(&cd->cd_nodes[prec], cdp);
1052}
1053
1054static void
1055ctf_decl_sprintf(ctf_decl_t *cd, const char *format, ...)
1056{
1057	size_t len = (size_t)(cd->cd_end - cd->cd_ptr);
1058	va_list ap;
1059	size_t n;
1060
1061	va_start(ap, format);
1062	n = vsnprintf(cd->cd_ptr, len, format, ap);
1063	va_end(ap);
1064
1065	cd->cd_ptr += MIN(n, len);
1066	cd->cd_len += n;
1067}
1068
1069static ssize_t
1070fbt_type_name(linker_ctf_t *lc, ctf_id_t type, char *buf, size_t len)
1071{
1072	ctf_decl_t cd;
1073	ctf_decl_node_t *cdp;
1074	ctf_decl_prec_t prec, lp, rp;
1075	int ptr, arr;
1076	uint_t k;
1077
1078	if (lc == NULL && type == CTF_ERR)
1079		return (-1); /* simplify caller code by permitting CTF_ERR */
1080
1081	ctf_decl_init(&cd, buf, len);
1082	ctf_decl_push(&cd, lc, type);
1083
1084	if (cd.cd_err != 0) {
1085		ctf_decl_fini(&cd);
1086		return (-1);
1087	}
1088
1089	/*
1090	 * If the type graph's order conflicts with lexical precedence order
1091	 * for pointers or arrays, then we need to surround the declarations at
1092	 * the corresponding lexical precedence with parentheses.  This can
1093	 * result in either a parenthesized pointer (*) as in int (*)() or
1094	 * int (*)[], or in a parenthesized pointer and array as in int (*[])().
1095	 */
1096	ptr = cd.cd_order[CTF_PREC_POINTER] > CTF_PREC_POINTER;
1097	arr = cd.cd_order[CTF_PREC_ARRAY] > CTF_PREC_ARRAY;
1098
1099	rp = arr ? CTF_PREC_ARRAY : ptr ? CTF_PREC_POINTER : -1;
1100	lp = ptr ? CTF_PREC_POINTER : arr ? CTF_PREC_ARRAY : -1;
1101
1102	k = CTF_K_POINTER; /* avoid leading whitespace (see below) */
1103
1104	for (prec = CTF_PREC_BASE; prec < CTF_PREC_MAX; prec++) {
1105		for (cdp = ctf_list_next(&cd.cd_nodes[prec]);
1106		    cdp != NULL; cdp = ctf_list_next(cdp)) {
1107
1108			const ctf_type_t *tp =
1109			    ctf_lookup_by_id(lc, cdp->cd_type);
1110			const char *name = ctf_strptr(lc, tp->ctt_name);
1111
1112			if (k != CTF_K_POINTER && k != CTF_K_ARRAY)
1113				ctf_decl_sprintf(&cd, " ");
1114
1115			if (lp == prec) {
1116				ctf_decl_sprintf(&cd, "(");
1117				lp = -1;
1118			}
1119
1120			switch (cdp->cd_kind) {
1121			case CTF_K_INTEGER:
1122			case CTF_K_FLOAT:
1123			case CTF_K_TYPEDEF:
1124				ctf_decl_sprintf(&cd, "%s", name);
1125				break;
1126			case CTF_K_POINTER:
1127				ctf_decl_sprintf(&cd, "*");
1128				break;
1129			case CTF_K_ARRAY:
1130				ctf_decl_sprintf(&cd, "[%u]", cdp->cd_n);
1131				break;
1132			case CTF_K_FUNCTION:
1133				ctf_decl_sprintf(&cd, "()");
1134				break;
1135			case CTF_K_STRUCT:
1136			case CTF_K_FORWARD:
1137				ctf_decl_sprintf(&cd, "struct %s", name);
1138				break;
1139			case CTF_K_UNION:
1140				ctf_decl_sprintf(&cd, "union %s", name);
1141				break;
1142			case CTF_K_ENUM:
1143				ctf_decl_sprintf(&cd, "enum %s", name);
1144				break;
1145			case CTF_K_VOLATILE:
1146				ctf_decl_sprintf(&cd, "volatile");
1147				break;
1148			case CTF_K_CONST:
1149				ctf_decl_sprintf(&cd, "const");
1150				break;
1151			case CTF_K_RESTRICT:
1152				ctf_decl_sprintf(&cd, "restrict");
1153				break;
1154			}
1155
1156			k = cdp->cd_kind;
1157		}
1158
1159		if (rp == prec)
1160			ctf_decl_sprintf(&cd, ")");
1161	}
1162
1163	ctf_decl_fini(&cd);
1164	return (cd.cd_len);
1165}
1166
1167static void
1168fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_argdesc_t *desc)
1169{
1170	const ushort_t *dp;
1171	fbt_probe_t *fbt = parg;
1172	linker_ctf_t lc;
1173	modctl_t *ctl = fbt->fbtp_ctl;
1174	int ndx = desc->dtargd_ndx;
1175	int symindx = fbt->fbtp_symindx;
1176	uint32_t *ctfoff;
1177	uint32_t offset;
1178	ushort_t info, kind, n;
1179
1180	if (fbt->fbtp_roffset != 0 && desc->dtargd_ndx == 0) {
1181		(void) strcpy(desc->dtargd_native, "int");
1182		return;
1183	}
1184
1185	desc->dtargd_ndx = DTRACE_ARGNONE;
1186
1187	/* Get a pointer to the CTF data and it's length. */
1188	if (linker_ctf_get(ctl, &lc) != 0)
1189		/* No CTF data? Something wrong? *shrug* */
1190		return;
1191
1192	/* Check if this module hasn't been initialised yet. */
1193	if (*lc.ctfoffp == NULL) {
1194		/*
1195		 * Initialise the CTF object and function symindx to
1196		 * byte offset array.
1197		 */
1198		if (fbt_ctfoff_init(ctl, &lc) != 0)
1199			return;
1200
1201		/* Initialise the CTF type to byte offset array. */
1202		if (fbt_typoff_init(&lc) != 0)
1203			return;
1204	}
1205
1206	ctfoff = *lc.ctfoffp;
1207
1208	if (ctfoff == NULL || *lc.typoffp == NULL)
1209		return;
1210
1211	/* Check if the symbol index is out of range. */
1212	if (symindx >= lc.nsym)
1213		return;
1214
1215	/* Check if the symbol isn't cross-referenced. */
1216	if ((offset = ctfoff[symindx]) == 0xffffffff)
1217		return;
1218
1219	dp = (const ushort_t *)(lc.ctftab + offset + sizeof(ctf_header_t));
1220
1221	info = *dp++;
1222	kind = CTF_INFO_KIND(info);
1223	n = CTF_INFO_VLEN(info);
1224
1225	if (kind == CTF_K_UNKNOWN && n == 0) {
1226		printf("%s(%d): Unknown function!\n",__func__,__LINE__);
1227		return;
1228	}
1229
1230	if (kind != CTF_K_FUNCTION) {
1231		printf("%s(%d): Expected a function!\n",__func__,__LINE__);
1232		return;
1233	}
1234
1235	if (fbt->fbtp_roffset != 0) {
1236		/* Only return type is available for args[1] in return probe. */
1237		if (ndx > 1)
1238			return;
1239		ASSERT(ndx == 1);
1240	} else {
1241		/* Check if the requested argument doesn't exist. */
1242		if (ndx >= n)
1243			return;
1244
1245		/* Skip the return type and arguments up to the one requested. */
1246		dp += ndx + 1;
1247	}
1248
1249	if (fbt_type_name(&lc, *dp, desc->dtargd_native, sizeof(desc->dtargd_native)) > 0)
1250		desc->dtargd_ndx = ndx;
1251
1252	return;
1253}
1254
1255static int
1256fbt_linker_file_cb(linker_file_t lf, void *arg)
1257{
1258
1259	fbt_provide_module(arg, lf);
1260
1261	return (0);
1262}
1263
1264static void
1265fbt_load(void *dummy)
1266{
1267	/* Create the /dev/dtrace/fbt entry. */
1268	fbt_cdev = make_dev(&fbt_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
1269	    "dtrace/fbt");
1270
1271	/* Default the probe table size if not specified. */
1272	if (fbt_probetab_size == 0)
1273		fbt_probetab_size = FBT_PROBETAB_SIZE;
1274
1275	/* Choose the hash mask for the probe table. */
1276	fbt_probetab_mask = fbt_probetab_size - 1;
1277
1278	/* Allocate memory for the probe table. */
1279	fbt_probetab =
1280	    malloc(fbt_probetab_size * sizeof (fbt_probe_t *), M_FBT, M_WAITOK | M_ZERO);
1281
1282	dtrace_invop_add(fbt_invop);
1283
1284	if (dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_USER,
1285	    NULL, &fbt_pops, NULL, &fbt_id) != 0)
1286		return;
1287
1288	/* Create probes for the kernel and already-loaded modules. */
1289	linker_file_foreach(fbt_linker_file_cb, NULL);
1290}
1291
1292
1293static int
1294fbt_unload()
1295{
1296	int error = 0;
1297
1298	/* De-register the invalid opcode handler. */
1299	dtrace_invop_remove(fbt_invop);
1300
1301	/* De-register this DTrace provider. */
1302	if ((error = dtrace_unregister(fbt_id)) != 0)
1303		return (error);
1304
1305	/* Free the probe table. */
1306	free(fbt_probetab, M_FBT);
1307	fbt_probetab = NULL;
1308	fbt_probetab_mask = 0;
1309
1310	destroy_dev(fbt_cdev);
1311
1312	return (error);
1313}
1314
1315static int
1316fbt_modevent(module_t mod __unused, int type, void *data __unused)
1317{
1318	int error = 0;
1319
1320	switch (type) {
1321	case MOD_LOAD:
1322		break;
1323
1324	case MOD_UNLOAD:
1325		break;
1326
1327	case MOD_SHUTDOWN:
1328		break;
1329
1330	default:
1331		error = EOPNOTSUPP;
1332		break;
1333
1334	}
1335
1336	return (error);
1337}
1338
1339static int
1340fbt_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused)
1341{
1342	return (0);
1343}
1344
1345SYSINIT(fbt_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_load, NULL);
1346SYSUNINIT(fbt_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_unload, NULL);
1347
1348DEV_MODULE(fbt, fbt_modevent, NULL);
1349MODULE_VERSION(fbt, 1);
1350MODULE_DEPEND(fbt, dtrace, 1, 1, 1);
1351MODULE_DEPEND(fbt, opensolaris, 1, 1, 1);
1352