dtrace_isa.c revision 289786
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 *
22 * $FreeBSD: stable/10/sys/cddl/dev/dtrace/amd64/dtrace_isa.c 289786 2015-10-23 07:31:04Z avg $
23 */
24/*
25 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28#include <sys/cdefs.h>
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/stack.h>
34#include <sys/pcpu.h>
35
36#include <machine/frame.h>
37#include <machine/md_var.h>
38#include <machine/reg.h>
39#include <machine/stack.h>
40
41#include <vm/vm.h>
42#include <vm/vm_param.h>
43#include <vm/pmap.h>
44
45#include "regset.h"
46
47uint8_t dtrace_fuword8_nocheck(void *);
48uint16_t dtrace_fuword16_nocheck(void *);
49uint32_t dtrace_fuword32_nocheck(void *);
50uint64_t dtrace_fuword64_nocheck(void *);
51
52int	dtrace_ustackdepth_max = 2048;
53
54void
55dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
56    uint32_t *intrpc)
57{
58	int depth = 0;
59	register_t rbp;
60	struct amd64_frame *frame;
61	vm_offset_t callpc;
62	pc_t caller = (pc_t) solaris_cpu[curcpu].cpu_dtrace_caller;
63
64	if (intrpc != 0)
65		pcstack[depth++] = (pc_t) intrpc;
66
67	aframes++;
68
69	__asm __volatile("movq %%rbp,%0" : "=r" (rbp));
70
71	frame = (struct amd64_frame *)rbp;
72	while (depth < pcstack_limit) {
73		if (!INKERNEL((long) frame))
74			break;
75
76		callpc = frame->f_retaddr;
77
78		if (!INKERNEL(callpc))
79			break;
80
81		if (aframes > 0) {
82			aframes--;
83			if ((aframes == 0) && (caller != 0)) {
84				pcstack[depth++] = caller;
85			}
86		}
87		else {
88			pcstack[depth++] = callpc;
89		}
90
91		if (frame->f_frame <= frame ||
92		    (vm_offset_t)frame->f_frame >= curthread->td_kstack +
93		    curthread->td_kstack_pages * PAGE_SIZE)
94			break;
95		frame = frame->f_frame;
96	}
97
98	for (; depth < pcstack_limit; depth++) {
99		pcstack[depth] = 0;
100	}
101}
102
103static int
104dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc,
105    uintptr_t sp)
106{
107	uintptr_t oldsp;
108	volatile uint16_t *flags =
109	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
110	int ret = 0;
111
112	ASSERT(pcstack == NULL || pcstack_limit > 0);
113	ASSERT(dtrace_ustackdepth_max > 0);
114
115	while (pc != 0) {
116		/*
117		 * We limit the number of times we can go around this
118		 * loop to account for a circular stack.
119		 */
120		if (ret++ >= dtrace_ustackdepth_max) {
121			*flags |= CPU_DTRACE_BADSTACK;
122			cpu_core[curcpu].cpuc_dtrace_illval = sp;
123			break;
124		}
125
126		if (pcstack != NULL) {
127			*pcstack++ = (uint64_t)pc;
128			pcstack_limit--;
129			if (pcstack_limit <= 0)
130				break;
131		}
132
133		if (sp == 0)
134			break;
135
136		oldsp = sp;
137
138		pc = dtrace_fuword64((void *)(sp +
139			offsetof(struct amd64_frame, f_retaddr)));
140		sp = dtrace_fuword64((void *)sp);
141
142		if (sp == oldsp) {
143			*flags |= CPU_DTRACE_BADSTACK;
144			cpu_core[curcpu].cpuc_dtrace_illval = sp;
145			break;
146		}
147
148		/*
149		 * This is totally bogus:  if we faulted, we're going to clear
150		 * the fault and break.  This is to deal with the apparently
151		 * broken Java stacks on x86.
152		 */
153		if (*flags & CPU_DTRACE_FAULT) {
154			*flags &= ~CPU_DTRACE_FAULT;
155			break;
156		}
157	}
158
159	return (ret);
160}
161
162void
163dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
164{
165	proc_t *p = curproc;
166	struct trapframe *tf;
167	uintptr_t pc, sp, fp;
168	volatile uint16_t *flags =
169	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
170	int n;
171
172	if (*flags & CPU_DTRACE_FAULT)
173		return;
174
175	if (pcstack_limit <= 0)
176		return;
177
178	/*
179	 * If there's no user context we still need to zero the stack.
180	 */
181	if (p == NULL || (tf = curthread->td_frame) == NULL)
182		goto zero;
183
184	*pcstack++ = (uint64_t)p->p_pid;
185	pcstack_limit--;
186
187	if (pcstack_limit <= 0)
188		return;
189
190	pc = tf->tf_rip;
191	fp = tf->tf_rbp;
192	sp = tf->tf_rsp;
193
194	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
195		/*
196		 * In an entry probe.  The frame pointer has not yet been
197		 * pushed (that happens in the function prologue).  The
198		 * best approach is to add the current pc as a missing top
199		 * of stack and back the pc up to the caller, which is stored
200		 * at the current stack pointer address since the call
201		 * instruction puts it there right before the branch.
202		 */
203
204		*pcstack++ = (uint64_t)pc;
205		pcstack_limit--;
206		if (pcstack_limit <= 0)
207			return;
208
209		pc = dtrace_fuword64((void *) sp);
210	}
211
212	n = dtrace_getustack_common(pcstack, pcstack_limit, pc, fp);
213	ASSERT(n >= 0);
214	ASSERT(n <= pcstack_limit);
215
216	pcstack += n;
217	pcstack_limit -= n;
218
219zero:
220	while (pcstack_limit-- > 0)
221		*pcstack++ = 0;
222}
223
224int
225dtrace_getustackdepth(void)
226{
227	proc_t *p = curproc;
228	struct trapframe *tf;
229	uintptr_t pc, fp, sp;
230	int n = 0;
231
232	if (p == NULL || (tf = curthread->td_frame) == NULL)
233		return (0);
234
235	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
236		return (-1);
237
238	pc = tf->tf_rip;
239	fp = tf->tf_rbp;
240	sp = tf->tf_rsp;
241
242	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
243		/*
244		 * In an entry probe.  The frame pointer has not yet been
245		 * pushed (that happens in the function prologue).  The
246		 * best approach is to add the current pc as a missing top
247		 * of stack and back the pc up to the caller, which is stored
248		 * at the current stack pointer address since the call
249		 * instruction puts it there right before the branch.
250		 */
251
252		pc = dtrace_fuword64((void *) sp);
253		n++;
254	}
255
256	n += dtrace_getustack_common(NULL, 0, pc, fp);
257
258	return (n);
259}
260
261void
262dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
263{
264	proc_t *p = curproc;
265	struct trapframe *tf;
266	uintptr_t pc, sp, fp;
267	volatile uint16_t *flags =
268	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
269#ifdef notyet	/* XXX signal stack */
270	uintptr_t oldcontext;
271	size_t s1, s2;
272#endif
273
274	if (*flags & CPU_DTRACE_FAULT)
275		return;
276
277	if (pcstack_limit <= 0)
278		return;
279
280	/*
281	 * If there's no user context we still need to zero the stack.
282	 */
283	if (p == NULL || (tf = curthread->td_frame) == NULL)
284		goto zero;
285
286	*pcstack++ = (uint64_t)p->p_pid;
287	pcstack_limit--;
288
289	if (pcstack_limit <= 0)
290		return;
291
292	pc = tf->tf_rip;
293	sp = tf->tf_rsp;
294	fp = tf->tf_rbp;
295
296#ifdef notyet /* XXX signal stack */
297	oldcontext = lwp->lwp_oldcontext;
298	s1 = sizeof (struct xframe) + 2 * sizeof (long);
299	s2 = s1 + sizeof (siginfo_t);
300#endif
301
302	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
303		*pcstack++ = (uint64_t)pc;
304		*fpstack++ = 0;
305		pcstack_limit--;
306		if (pcstack_limit <= 0)
307			return;
308
309		pc = dtrace_fuword64((void *)sp);
310	}
311
312	while (pc != 0) {
313		*pcstack++ = (uint64_t)pc;
314		*fpstack++ = fp;
315		pcstack_limit--;
316		if (pcstack_limit <= 0)
317			break;
318
319		if (fp == 0)
320			break;
321
322#ifdef notyet /* XXX signal stack */
323		if (oldcontext == sp + s1 || oldcontext == sp + s2) {
324			ucontext_t *ucp = (ucontext_t *)oldcontext;
325			greg_t *gregs = ucp->uc_mcontext.gregs;
326
327			sp = dtrace_fulword(&gregs[REG_FP]);
328			pc = dtrace_fulword(&gregs[REG_PC]);
329
330			oldcontext = dtrace_fulword(&ucp->uc_link);
331		} else
332#endif /* XXX */
333		{
334			pc = dtrace_fuword64((void *)(fp +
335				offsetof(struct amd64_frame, f_retaddr)));
336			fp = dtrace_fuword64((void *)fp);
337		}
338
339		/*
340		 * This is totally bogus:  if we faulted, we're going to clear
341		 * the fault and break.  This is to deal with the apparently
342		 * broken Java stacks on x86.
343		 */
344		if (*flags & CPU_DTRACE_FAULT) {
345			*flags &= ~CPU_DTRACE_FAULT;
346			break;
347		}
348	}
349
350zero:
351	while (pcstack_limit-- > 0)
352		*pcstack++ = 0;
353}
354
355/*ARGSUSED*/
356uint64_t
357dtrace_getarg(int arg, int aframes)
358{
359	uintptr_t val;
360	struct amd64_frame *fp = (struct amd64_frame *)dtrace_getfp();
361	uintptr_t *stack;
362	int i;
363
364	/*
365	 * A total of 6 arguments are passed via registers; any argument with
366	 * index of 5 or lower is therefore in a register.
367	 */
368	int inreg = 5;
369
370	for (i = 1; i <= aframes; i++) {
371		fp = fp->f_frame;
372
373		if (P2ROUNDUP(fp->f_retaddr, 16) ==
374		    (long)dtrace_invop_callsite) {
375			/*
376			 * In the case of amd64, we will use the pointer to the
377			 * regs structure that was pushed when we took the
378			 * trap.  To get this structure, we must increment
379			 * beyond the frame structure, and then again beyond
380			 * the calling RIP stored in dtrace_invop().  If the
381			 * argument that we're seeking is passed on the stack,
382			 * we'll pull the true stack pointer out of the saved
383			 * registers and decrement our argument by the number
384			 * of arguments passed in registers; if the argument
385			 * we're seeking is passed in regsiters, we can just
386			 * load it directly.
387			 */
388			struct trapframe *tf =
389			    (struct trapframe *)((uintptr_t)&fp[1]);
390
391			if (arg <= inreg) {
392				switch (arg) {
393				case 0:
394					stack = (uintptr_t *)&tf->tf_rdi;
395					break;
396				case 1:
397					stack = (uintptr_t *)&tf->tf_rsi;
398					break;
399				case 2:
400					stack = (uintptr_t *)&tf->tf_rdx;
401					break;
402				case 3:
403					stack = (uintptr_t *)&tf->tf_rcx;
404					break;
405				case 4:
406					stack = (uintptr_t *)&tf->tf_r8;
407					break;
408				case 5:
409					stack = (uintptr_t *)&tf->tf_r9;
410					break;
411				}
412				arg = 0;
413			} else {
414				stack = (uintptr_t *)(tf->tf_rsp);
415				arg -= inreg;
416			}
417			goto load;
418		}
419
420	}
421
422	/*
423	 * We know that we did not come through a trap to get into
424	 * dtrace_probe() -- the provider simply called dtrace_probe()
425	 * directly.  As this is the case, we need to shift the argument
426	 * that we're looking for:  the probe ID is the first argument to
427	 * dtrace_probe(), so the argument n will actually be found where
428	 * one would expect to find argument (n + 1).
429	 */
430	arg++;
431
432	if (arg <= inreg) {
433		/*
434		 * This shouldn't happen.  If the argument is passed in a
435		 * register then it should have been, well, passed in a
436		 * register...
437		 */
438		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
439		return (0);
440	}
441
442	arg -= (inreg + 1);
443	stack = (uintptr_t *)fp + 2;
444
445load:
446	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
447	val = stack[arg];
448	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
449
450	return (val);
451}
452
453int
454dtrace_getstackdepth(int aframes)
455{
456	int depth = 0;
457	struct amd64_frame *frame;
458	vm_offset_t rbp;
459
460	aframes++;
461	rbp = dtrace_getfp();
462	frame = (struct amd64_frame *)rbp;
463	depth++;
464	for(;;) {
465		if (!INKERNEL((long) frame))
466			break;
467		if (!INKERNEL((long) frame->f_frame))
468			break;
469		depth++;
470		if (frame->f_frame <= frame ||
471		    (vm_offset_t)frame->f_frame >= curthread->td_kstack +
472		    curthread->td_kstack_pages * PAGE_SIZE)
473			break;
474		frame = frame->f_frame;
475	}
476	if (depth < aframes)
477		return 0;
478	else
479		return depth - aframes;
480}
481
482ulong_t
483dtrace_getreg(struct trapframe *rp, uint_t reg)
484{
485	/* This table is dependent on reg.d. */
486	int regmap[] = {
487		REG_GS,		/* 0  GS */
488		REG_FS,		/* 1  FS */
489		REG_ES,		/* 2  ES */
490		REG_DS,		/* 3  DS */
491		REG_RDI,	/* 4  EDI */
492		REG_RSI,	/* 5  ESI */
493		REG_RBP,	/* 6  EBP, REG_FP */
494		REG_RSP,	/* 7  ESP */
495		REG_RBX,	/* 8  EBX, REG_R1 */
496		REG_RDX,	/* 9  EDX */
497		REG_RCX,	/* 10 ECX */
498		REG_RAX,	/* 11 EAX, REG_R0 */
499		REG_TRAPNO,	/* 12 TRAPNO */
500		REG_ERR,	/* 13 ERR */
501		REG_RIP,	/* 14 EIP, REG_PC */
502		REG_CS,		/* 15 CS */
503		REG_RFL,	/* 16 EFL, REG_PS */
504		REG_RSP,	/* 17 UESP, REG_SP */
505		REG_SS		/* 18 SS */
506	};
507
508	if (reg <= SS) {
509		if (reg >= sizeof (regmap) / sizeof (int)) {
510			DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
511			return (0);
512		}
513
514		reg = regmap[reg];
515	} else {
516		/* This is dependent on reg.d. */
517		reg -= SS + 1;
518	}
519
520	switch (reg) {
521	case REG_RDI:
522		return (rp->tf_rdi);
523	case REG_RSI:
524		return (rp->tf_rsi);
525	case REG_RDX:
526		return (rp->tf_rdx);
527	case REG_RCX:
528		return (rp->tf_rcx);
529	case REG_R8:
530		return (rp->tf_r8);
531	case REG_R9:
532		return (rp->tf_r9);
533	case REG_RAX:
534		return (rp->tf_rax);
535	case REG_RBX:
536		return (rp->tf_rbx);
537	case REG_RBP:
538		return (rp->tf_rbp);
539	case REG_R10:
540		return (rp->tf_r10);
541	case REG_R11:
542		return (rp->tf_r11);
543	case REG_R12:
544		return (rp->tf_r12);
545	case REG_R13:
546		return (rp->tf_r13);
547	case REG_R14:
548		return (rp->tf_r14);
549	case REG_R15:
550		return (rp->tf_r15);
551	case REG_DS:
552		return (rp->tf_ds);
553	case REG_ES:
554		return (rp->tf_es);
555	case REG_FS:
556		return (rp->tf_fs);
557	case REG_GS:
558		return (rp->tf_gs);
559	case REG_TRAPNO:
560		return (rp->tf_trapno);
561	case REG_ERR:
562		return (rp->tf_err);
563	case REG_RIP:
564		return (rp->tf_rip);
565	case REG_CS:
566		return (rp->tf_cs);
567	case REG_SS:
568		return (rp->tf_ss);
569	case REG_RFL:
570		return (rp->tf_rflags);
571	case REG_RSP:
572		return (rp->tf_rsp);
573	default:
574		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
575		return (0);
576	}
577}
578
579static int
580dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size)
581{
582	ASSERT(INKERNEL(kaddr) && kaddr + size >= kaddr);
583
584	if (uaddr + size > VM_MAXUSER_ADDRESS || uaddr + size < uaddr) {
585		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
586		cpu_core[curcpu].cpuc_dtrace_illval = uaddr;
587		return (0);
588	}
589
590	return (1);
591}
592
593void
594dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size,
595    volatile uint16_t *flags)
596{
597	if (dtrace_copycheck(uaddr, kaddr, size))
598		dtrace_copy(uaddr, kaddr, size);
599}
600
601void
602dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size,
603    volatile uint16_t *flags)
604{
605	if (dtrace_copycheck(uaddr, kaddr, size))
606		dtrace_copy(kaddr, uaddr, size);
607}
608
609void
610dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size,
611    volatile uint16_t *flags)
612{
613	if (dtrace_copycheck(uaddr, kaddr, size))
614		dtrace_copystr(uaddr, kaddr, size, flags);
615}
616
617void
618dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size,
619    volatile uint16_t *flags)
620{
621	if (dtrace_copycheck(uaddr, kaddr, size))
622		dtrace_copystr(kaddr, uaddr, size, flags);
623}
624
625uint8_t
626dtrace_fuword8(void *uaddr)
627{
628	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
629		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
630		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
631		return (0);
632	}
633	return (dtrace_fuword8_nocheck(uaddr));
634}
635
636uint16_t
637dtrace_fuword16(void *uaddr)
638{
639	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
640		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
641		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
642		return (0);
643	}
644	return (dtrace_fuword16_nocheck(uaddr));
645}
646
647uint32_t
648dtrace_fuword32(void *uaddr)
649{
650	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
651		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
652		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
653		return (0);
654	}
655	return (dtrace_fuword32_nocheck(uaddr));
656}
657
658uint64_t
659dtrace_fuword64(void *uaddr)
660{
661	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
662		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
663		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
664		return (0);
665	}
666	return (dtrace_fuword64_nocheck(uaddr));
667}
668