fasttrap_isa.c revision 299003
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 * 21 * Portions Copyright 2010 The FreeBSD Foundation 22 * 23 * $FreeBSD$ 24 */ 25 26/* 27 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31#ifdef illumos 32#pragma ident "%Z%%M% %I% %E% SMI" 33#endif 34 35#include <sys/fasttrap_isa.h> 36#include <sys/fasttrap_impl.h> 37#include <sys/dtrace.h> 38#include <sys/dtrace_impl.h> 39#include <sys/cmn_err.h> 40#ifdef illumos 41#include <sys/regset.h> 42#include <sys/privregs.h> 43#include <sys/segments.h> 44#include <sys/x86_archext.h> 45#else 46#include <cddl/dev/dtrace/dtrace_cddl.h> 47#include <sys/types.h> 48#include <sys/proc.h> 49#include <sys/rmlock.h> 50#include <sys/dtrace_bsd.h> 51#include <cddl/dev/dtrace/x86/regset.h> 52#include <machine/segments.h> 53#include <machine/reg.h> 54#include <machine/pcb.h> 55#endif 56#include <sys/sysmacros.h> 57#ifdef illumos 58#include <sys/trap.h> 59#include <sys/archsystm.h> 60#else 61#include <sys/ptrace.h> 62 63static int 64proc_ops(int op, proc_t *p, void *kaddr, off_t uaddr, size_t len) 65{ 66 struct iovec iov; 67 struct uio uio; 68 69 iov.iov_base = kaddr; 70 iov.iov_len = len; 71 uio.uio_offset = uaddr; 72 uio.uio_iov = &iov; 73 uio.uio_resid = len; 74 uio.uio_iovcnt = 1; 75 uio.uio_segflg = UIO_SYSSPACE; 76 uio.uio_td = curthread; 77 uio.uio_rw = op; 78 PHOLD(p); 79 if (proc_rwmem(p, &uio) != 0) { 80 PRELE(p); 81 return (-1); 82 } 83 PRELE(p); 84 85 return (0); 86} 87 88static int 89uread(proc_t *p, void *kaddr, size_t len, uintptr_t uaddr) 90{ 91 92 return (proc_ops(UIO_READ, p, kaddr, uaddr, len)); 93} 94 95static int 96uwrite(proc_t *p, void *kaddr, size_t len, uintptr_t uaddr) 97{ 98 99 return (proc_ops(UIO_WRITE, p, kaddr, uaddr, len)); 100} 101#endif /* illumos */ 102#ifdef __i386__ 103#define r_rax r_eax 104#define r_rbx r_ebx 105#define r_rip r_eip 106#define r_rflags r_eflags 107#define r_rsp r_esp 108#define r_rbp r_ebp 109#endif 110 111/* 112 * Lossless User-Land Tracing on x86 113 * --------------------------------- 114 * 115 * The execution of most instructions is not dependent on the address; for 116 * these instructions it is sufficient to copy them into the user process's 117 * address space and execute them. To effectively single-step an instruction 118 * in user-land, we copy out the following sequence of instructions to scratch 119 * space in the user thread's ulwp_t structure. 120 * 121 * We then set the program counter (%eip or %rip) to point to this scratch 122 * space. Once execution resumes, the original instruction is executed and 123 * then control flow is redirected to what was originally the subsequent 124 * instruction. If the kernel attemps to deliver a signal while single- 125 * stepping, the signal is deferred and the program counter is moved into the 126 * second sequence of instructions. The second sequence ends in a trap into 127 * the kernel where the deferred signal is then properly handled and delivered. 128 * 129 * For instructions whose execute is position dependent, we perform simple 130 * emulation. These instructions are limited to control transfer 131 * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle 132 * of %rip-relative addressing that means that almost any instruction can be 133 * position dependent. For all the details on how we emulate generic 134 * instructions included %rip-relative instructions, see the code in 135 * fasttrap_pid_probe() below where we handle instructions of type 136 * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing). 137 */ 138 139#define FASTTRAP_MODRM_MOD(modrm) (((modrm) >> 6) & 0x3) 140#define FASTTRAP_MODRM_REG(modrm) (((modrm) >> 3) & 0x7) 141#define FASTTRAP_MODRM_RM(modrm) ((modrm) & 0x7) 142#define FASTTRAP_MODRM(mod, reg, rm) (((mod) << 6) | ((reg) << 3) | (rm)) 143 144#define FASTTRAP_SIB_SCALE(sib) (((sib) >> 6) & 0x3) 145#define FASTTRAP_SIB_INDEX(sib) (((sib) >> 3) & 0x7) 146#define FASTTRAP_SIB_BASE(sib) ((sib) & 0x7) 147 148#define FASTTRAP_REX_W(rex) (((rex) >> 3) & 1) 149#define FASTTRAP_REX_R(rex) (((rex) >> 2) & 1) 150#define FASTTRAP_REX_X(rex) (((rex) >> 1) & 1) 151#define FASTTRAP_REX_B(rex) ((rex) & 1) 152#define FASTTRAP_REX(w, r, x, b) \ 153 (0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b)) 154 155/* 156 * Single-byte op-codes. 157 */ 158#define FASTTRAP_PUSHL_EBP 0x55 159 160#define FASTTRAP_JO 0x70 161#define FASTTRAP_JNO 0x71 162#define FASTTRAP_JB 0x72 163#define FASTTRAP_JAE 0x73 164#define FASTTRAP_JE 0x74 165#define FASTTRAP_JNE 0x75 166#define FASTTRAP_JBE 0x76 167#define FASTTRAP_JA 0x77 168#define FASTTRAP_JS 0x78 169#define FASTTRAP_JNS 0x79 170#define FASTTRAP_JP 0x7a 171#define FASTTRAP_JNP 0x7b 172#define FASTTRAP_JL 0x7c 173#define FASTTRAP_JGE 0x7d 174#define FASTTRAP_JLE 0x7e 175#define FASTTRAP_JG 0x7f 176 177#define FASTTRAP_NOP 0x90 178 179#define FASTTRAP_MOV_EAX 0xb8 180#define FASTTRAP_MOV_ECX 0xb9 181 182#define FASTTRAP_RET16 0xc2 183#define FASTTRAP_RET 0xc3 184 185#define FASTTRAP_LOOPNZ 0xe0 186#define FASTTRAP_LOOPZ 0xe1 187#define FASTTRAP_LOOP 0xe2 188#define FASTTRAP_JCXZ 0xe3 189 190#define FASTTRAP_CALL 0xe8 191#define FASTTRAP_JMP32 0xe9 192#define FASTTRAP_JMP8 0xeb 193 194#define FASTTRAP_INT3 0xcc 195#define FASTTRAP_INT 0xcd 196 197#define FASTTRAP_2_BYTE_OP 0x0f 198#define FASTTRAP_GROUP5_OP 0xff 199 200/* 201 * Two-byte op-codes (second byte only). 202 */ 203#define FASTTRAP_0F_JO 0x80 204#define FASTTRAP_0F_JNO 0x81 205#define FASTTRAP_0F_JB 0x82 206#define FASTTRAP_0F_JAE 0x83 207#define FASTTRAP_0F_JE 0x84 208#define FASTTRAP_0F_JNE 0x85 209#define FASTTRAP_0F_JBE 0x86 210#define FASTTRAP_0F_JA 0x87 211#define FASTTRAP_0F_JS 0x88 212#define FASTTRAP_0F_JNS 0x89 213#define FASTTRAP_0F_JP 0x8a 214#define FASTTRAP_0F_JNP 0x8b 215#define FASTTRAP_0F_JL 0x8c 216#define FASTTRAP_0F_JGE 0x8d 217#define FASTTRAP_0F_JLE 0x8e 218#define FASTTRAP_0F_JG 0x8f 219 220#define FASTTRAP_EFLAGS_OF 0x800 221#define FASTTRAP_EFLAGS_DF 0x400 222#define FASTTRAP_EFLAGS_SF 0x080 223#define FASTTRAP_EFLAGS_ZF 0x040 224#define FASTTRAP_EFLAGS_AF 0x010 225#define FASTTRAP_EFLAGS_PF 0x004 226#define FASTTRAP_EFLAGS_CF 0x001 227 228/* 229 * Instruction prefixes. 230 */ 231#define FASTTRAP_PREFIX_OPERAND 0x66 232#define FASTTRAP_PREFIX_ADDRESS 0x67 233#define FASTTRAP_PREFIX_CS 0x2E 234#define FASTTRAP_PREFIX_DS 0x3E 235#define FASTTRAP_PREFIX_ES 0x26 236#define FASTTRAP_PREFIX_FS 0x64 237#define FASTTRAP_PREFIX_GS 0x65 238#define FASTTRAP_PREFIX_SS 0x36 239#define FASTTRAP_PREFIX_LOCK 0xF0 240#define FASTTRAP_PREFIX_REP 0xF3 241#define FASTTRAP_PREFIX_REPNE 0xF2 242 243#define FASTTRAP_NOREG 0xff 244 245/* 246 * Map between instruction register encodings and the kernel constants which 247 * correspond to indicies into struct regs. 248 */ 249#ifdef __amd64 250static const uint8_t regmap[16] = { 251 REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI, 252 REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15, 253}; 254#else 255static const uint8_t regmap[8] = { 256 EAX, ECX, EDX, EBX, UESP, EBP, ESI, EDI 257}; 258#endif 259 260static ulong_t fasttrap_getreg(struct reg *, uint_t); 261 262static uint64_t 263fasttrap_anarg(struct reg *rp, int function_entry, int argno) 264{ 265 uint64_t value = 0; 266 int shift = function_entry ? 1 : 0; 267 268#ifdef __amd64 269 if (curproc->p_model == DATAMODEL_LP64) { 270 uintptr_t *stack; 271 272 /* 273 * In 64-bit mode, the first six arguments are stored in 274 * registers. 275 */ 276 if (argno < 6) 277 switch (argno) { 278 case 0: 279 return (rp->r_rdi); 280 case 1: 281 return (rp->r_rsi); 282 case 2: 283 return (rp->r_rdx); 284 case 3: 285 return (rp->r_rcx); 286 case 4: 287 return (rp->r_r8); 288 case 5: 289 return (rp->r_r9); 290 } 291 292 stack = (uintptr_t *)rp->r_rsp; 293 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 294 value = dtrace_fulword(&stack[argno - 6 + shift]); 295 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); 296 } else { 297#endif 298#ifdef __i386 299 uint32_t *stack = (uint32_t *)rp->r_esp; 300 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 301 value = dtrace_fuword32(&stack[argno + shift]); 302 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); 303#endif 304#ifdef __amd64 305 } 306#endif 307 308 return (value); 309} 310 311/*ARGSUSED*/ 312int 313fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc, 314 fasttrap_probe_type_t type) 315{ 316 uint8_t instr[FASTTRAP_MAX_INSTR_SIZE + 10]; 317 size_t len = FASTTRAP_MAX_INSTR_SIZE; 318 size_t first = MIN(len, PAGESIZE - (pc & PAGEOFFSET)); 319 uint_t start = 0; 320 int rmindex, size; 321 uint8_t seg, rex = 0; 322 323 /* 324 * Read the instruction at the given address out of the process's 325 * address space. We don't have to worry about a debugger 326 * changing this instruction before we overwrite it with our trap 327 * instruction since P_PR_LOCK is set. Since instructions can span 328 * pages, we potentially read the instruction in two parts. If the 329 * second part fails, we just zero out that part of the instruction. 330 */ 331 if (uread(p, &instr[0], first, pc) != 0) 332 return (-1); 333 if (len > first && 334 uread(p, &instr[first], len - first, pc + first) != 0) { 335 bzero(&instr[first], len - first); 336 len = first; 337 } 338 339 /* 340 * If the disassembly fails, then we have a malformed instruction. 341 */ 342 if ((size = dtrace_instr_size_isa(instr, p->p_model, &rmindex)) <= 0) 343 return (-1); 344 345 /* 346 * Make sure the disassembler isn't completely broken. 347 */ 348 ASSERT(-1 <= rmindex && rmindex < size); 349 350 /* 351 * If the computed size is greater than the number of bytes read, 352 * then it was a malformed instruction possibly because it fell on a 353 * page boundary and the subsequent page was missing or because of 354 * some malicious user. 355 */ 356 if (size > len) 357 return (-1); 358 359 tp->ftt_size = (uint8_t)size; 360 tp->ftt_segment = FASTTRAP_SEG_NONE; 361 362 /* 363 * Find the start of the instruction's opcode by processing any 364 * legacy prefixes. 365 */ 366 for (;;) { 367 seg = 0; 368 switch (instr[start]) { 369 case FASTTRAP_PREFIX_SS: 370 seg++; 371 /*FALLTHRU*/ 372 case FASTTRAP_PREFIX_GS: 373 seg++; 374 /*FALLTHRU*/ 375 case FASTTRAP_PREFIX_FS: 376 seg++; 377 /*FALLTHRU*/ 378 case FASTTRAP_PREFIX_ES: 379 seg++; 380 /*FALLTHRU*/ 381 case FASTTRAP_PREFIX_DS: 382 seg++; 383 /*FALLTHRU*/ 384 case FASTTRAP_PREFIX_CS: 385 seg++; 386 /*FALLTHRU*/ 387 case FASTTRAP_PREFIX_OPERAND: 388 case FASTTRAP_PREFIX_ADDRESS: 389 case FASTTRAP_PREFIX_LOCK: 390 case FASTTRAP_PREFIX_REP: 391 case FASTTRAP_PREFIX_REPNE: 392 if (seg != 0) { 393 /* 394 * It's illegal for an instruction to specify 395 * two segment prefixes -- give up on this 396 * illegal instruction. 397 */ 398 if (tp->ftt_segment != FASTTRAP_SEG_NONE) 399 return (-1); 400 401 tp->ftt_segment = seg; 402 } 403 start++; 404 continue; 405 } 406 break; 407 } 408 409#ifdef __amd64 410 /* 411 * Identify the REX prefix on 64-bit processes. 412 */ 413 if (p->p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40) 414 rex = instr[start++]; 415#endif 416 417 /* 418 * Now that we're pretty sure that the instruction is okay, copy the 419 * valid part to the tracepoint. 420 */ 421 bcopy(instr, tp->ftt_instr, FASTTRAP_MAX_INSTR_SIZE); 422 423 tp->ftt_type = FASTTRAP_T_COMMON; 424 if (instr[start] == FASTTRAP_2_BYTE_OP) { 425 switch (instr[start + 1]) { 426 case FASTTRAP_0F_JO: 427 case FASTTRAP_0F_JNO: 428 case FASTTRAP_0F_JB: 429 case FASTTRAP_0F_JAE: 430 case FASTTRAP_0F_JE: 431 case FASTTRAP_0F_JNE: 432 case FASTTRAP_0F_JBE: 433 case FASTTRAP_0F_JA: 434 case FASTTRAP_0F_JS: 435 case FASTTRAP_0F_JNS: 436 case FASTTRAP_0F_JP: 437 case FASTTRAP_0F_JNP: 438 case FASTTRAP_0F_JL: 439 case FASTTRAP_0F_JGE: 440 case FASTTRAP_0F_JLE: 441 case FASTTRAP_0F_JG: 442 tp->ftt_type = FASTTRAP_T_JCC; 443 tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO; 444 tp->ftt_dest = pc + tp->ftt_size + 445 /* LINTED - alignment */ 446 *(int32_t *)&instr[start + 2]; 447 break; 448 } 449 } else if (instr[start] == FASTTRAP_GROUP5_OP) { 450 uint_t mod = FASTTRAP_MODRM_MOD(instr[start + 1]); 451 uint_t reg = FASTTRAP_MODRM_REG(instr[start + 1]); 452 uint_t rm = FASTTRAP_MODRM_RM(instr[start + 1]); 453 454 if (reg == 2 || reg == 4) { 455 uint_t i, sz; 456 457 if (reg == 2) 458 tp->ftt_type = FASTTRAP_T_CALL; 459 else 460 tp->ftt_type = FASTTRAP_T_JMP; 461 462 if (mod == 3) 463 tp->ftt_code = 2; 464 else 465 tp->ftt_code = 1; 466 467 ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0); 468 469 /* 470 * See AMD x86-64 Architecture Programmer's Manual 471 * Volume 3, Section 1.2.7, Table 1-12, and 472 * Appendix A.3.1, Table A-15. 473 */ 474 if (mod != 3 && rm == 4) { 475 uint8_t sib = instr[start + 2]; 476 uint_t index = FASTTRAP_SIB_INDEX(sib); 477 uint_t base = FASTTRAP_SIB_BASE(sib); 478 479 tp->ftt_scale = FASTTRAP_SIB_SCALE(sib); 480 481 tp->ftt_index = (index == 4) ? 482 FASTTRAP_NOREG : 483 regmap[index | (FASTTRAP_REX_X(rex) << 3)]; 484 tp->ftt_base = (mod == 0 && base == 5) ? 485 FASTTRAP_NOREG : 486 regmap[base | (FASTTRAP_REX_B(rex) << 3)]; 487 488 i = 3; 489 sz = mod == 1 ? 1 : 4; 490 } else { 491 /* 492 * In 64-bit mode, mod == 0 and r/m == 5 493 * denotes %rip-relative addressing; in 32-bit 494 * mode, the base register isn't used. In both 495 * modes, there is a 32-bit operand. 496 */ 497 if (mod == 0 && rm == 5) { 498#ifdef __amd64 499 if (p->p_model == DATAMODEL_LP64) 500 tp->ftt_base = REG_RIP; 501 else 502#endif 503 tp->ftt_base = FASTTRAP_NOREG; 504 sz = 4; 505 } else { 506 uint8_t base = rm | 507 (FASTTRAP_REX_B(rex) << 3); 508 509 tp->ftt_base = regmap[base]; 510 sz = mod == 1 ? 1 : mod == 2 ? 4 : 0; 511 } 512 tp->ftt_index = FASTTRAP_NOREG; 513 i = 2; 514 } 515 516 if (sz == 1) { 517 tp->ftt_dest = *(int8_t *)&instr[start + i]; 518 } else if (sz == 4) { 519 /* LINTED - alignment */ 520 tp->ftt_dest = *(int32_t *)&instr[start + i]; 521 } else { 522 tp->ftt_dest = 0; 523 } 524 } 525 } else { 526 switch (instr[start]) { 527 case FASTTRAP_RET: 528 tp->ftt_type = FASTTRAP_T_RET; 529 break; 530 531 case FASTTRAP_RET16: 532 tp->ftt_type = FASTTRAP_T_RET16; 533 /* LINTED - alignment */ 534 tp->ftt_dest = *(uint16_t *)&instr[start + 1]; 535 break; 536 537 case FASTTRAP_JO: 538 case FASTTRAP_JNO: 539 case FASTTRAP_JB: 540 case FASTTRAP_JAE: 541 case FASTTRAP_JE: 542 case FASTTRAP_JNE: 543 case FASTTRAP_JBE: 544 case FASTTRAP_JA: 545 case FASTTRAP_JS: 546 case FASTTRAP_JNS: 547 case FASTTRAP_JP: 548 case FASTTRAP_JNP: 549 case FASTTRAP_JL: 550 case FASTTRAP_JGE: 551 case FASTTRAP_JLE: 552 case FASTTRAP_JG: 553 tp->ftt_type = FASTTRAP_T_JCC; 554 tp->ftt_code = instr[start]; 555 tp->ftt_dest = pc + tp->ftt_size + 556 (int8_t)instr[start + 1]; 557 break; 558 559 case FASTTRAP_LOOPNZ: 560 case FASTTRAP_LOOPZ: 561 case FASTTRAP_LOOP: 562 tp->ftt_type = FASTTRAP_T_LOOP; 563 tp->ftt_code = instr[start]; 564 tp->ftt_dest = pc + tp->ftt_size + 565 (int8_t)instr[start + 1]; 566 break; 567 568 case FASTTRAP_JCXZ: 569 tp->ftt_type = FASTTRAP_T_JCXZ; 570 tp->ftt_dest = pc + tp->ftt_size + 571 (int8_t)instr[start + 1]; 572 break; 573 574 case FASTTRAP_CALL: 575 tp->ftt_type = FASTTRAP_T_CALL; 576 tp->ftt_dest = pc + tp->ftt_size + 577 /* LINTED - alignment */ 578 *(int32_t *)&instr[start + 1]; 579 tp->ftt_code = 0; 580 break; 581 582 case FASTTRAP_JMP32: 583 tp->ftt_type = FASTTRAP_T_JMP; 584 tp->ftt_dest = pc + tp->ftt_size + 585 /* LINTED - alignment */ 586 *(int32_t *)&instr[start + 1]; 587 break; 588 case FASTTRAP_JMP8: 589 tp->ftt_type = FASTTRAP_T_JMP; 590 tp->ftt_dest = pc + tp->ftt_size + 591 (int8_t)instr[start + 1]; 592 break; 593 594 case FASTTRAP_PUSHL_EBP: 595 if (start == 0) 596 tp->ftt_type = FASTTRAP_T_PUSHL_EBP; 597 break; 598 599 case FASTTRAP_NOP: 600#ifdef __amd64 601 ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0); 602 603 /* 604 * On amd64 we have to be careful not to confuse a nop 605 * (actually xchgl %eax, %eax) with an instruction using 606 * the same opcode, but that does something different 607 * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax). 608 */ 609 if (FASTTRAP_REX_B(rex) == 0) 610#endif 611 tp->ftt_type = FASTTRAP_T_NOP; 612 break; 613 614 case FASTTRAP_INT3: 615 /* 616 * The pid provider shares the int3 trap with debugger 617 * breakpoints so we can't instrument them. 618 */ 619 ASSERT(instr[start] == FASTTRAP_INSTR); 620 return (-1); 621 622 case FASTTRAP_INT: 623 /* 624 * Interrupts seem like they could be traced with 625 * no negative implications, but it's possible that 626 * a thread could be redirected by the trap handling 627 * code which would eventually return to the 628 * instruction after the interrupt. If the interrupt 629 * were in our scratch space, the subsequent 630 * instruction might be overwritten before we return. 631 * Accordingly we refuse to instrument any interrupt. 632 */ 633 return (-1); 634 } 635 } 636 637#ifdef __amd64 638 if (p->p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) { 639 /* 640 * If the process is 64-bit and the instruction type is still 641 * FASTTRAP_T_COMMON -- meaning we're going to copy it out an 642 * execute it -- we need to watch for %rip-relative 643 * addressing mode. See the portion of fasttrap_pid_probe() 644 * below where we handle tracepoints with type 645 * FASTTRAP_T_COMMON for how we emulate instructions that 646 * employ %rip-relative addressing. 647 */ 648 if (rmindex != -1) { 649 uint_t mod = FASTTRAP_MODRM_MOD(instr[rmindex]); 650 uint_t reg = FASTTRAP_MODRM_REG(instr[rmindex]); 651 uint_t rm = FASTTRAP_MODRM_RM(instr[rmindex]); 652 653 ASSERT(rmindex > start); 654 655 if (mod == 0 && rm == 5) { 656 /* 657 * We need to be sure to avoid other 658 * registers used by this instruction. While 659 * the reg field may determine the op code 660 * rather than denoting a register, assuming 661 * that it denotes a register is always safe. 662 * We leave the REX field intact and use 663 * whatever value's there for simplicity. 664 */ 665 if (reg != 0) { 666 tp->ftt_ripmode = FASTTRAP_RIP_1 | 667 (FASTTRAP_RIP_X * 668 FASTTRAP_REX_B(rex)); 669 rm = 0; 670 } else { 671 tp->ftt_ripmode = FASTTRAP_RIP_2 | 672 (FASTTRAP_RIP_X * 673 FASTTRAP_REX_B(rex)); 674 rm = 1; 675 } 676 677 tp->ftt_modrm = tp->ftt_instr[rmindex]; 678 tp->ftt_instr[rmindex] = 679 FASTTRAP_MODRM(2, reg, rm); 680 } 681 } 682 } 683#endif 684 685 return (0); 686} 687 688int 689fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp) 690{ 691 fasttrap_instr_t instr = FASTTRAP_INSTR; 692 693 if (uwrite(p, &instr, 1, tp->ftt_pc) != 0) 694 return (-1); 695 696 return (0); 697} 698 699int 700fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp) 701{ 702 uint8_t instr; 703 704 /* 705 * Distinguish between read or write failures and a changed 706 * instruction. 707 */ 708 if (uread(p, &instr, 1, tp->ftt_pc) != 0) 709 return (0); 710 if (instr != FASTTRAP_INSTR) 711 return (0); 712 if (uwrite(p, &tp->ftt_instr[0], 1, tp->ftt_pc) != 0) 713 return (-1); 714 715 return (0); 716} 717 718#ifdef __amd64 719static uintptr_t 720fasttrap_fulword_noerr(const void *uaddr) 721{ 722 uintptr_t ret; 723 724 if ((ret = fasttrap_fulword(uaddr)) != -1) 725 return (ret); 726 727 return (0); 728} 729#endif 730 731#ifdef __i386__ 732static uint32_t 733fasttrap_fuword32_noerr(const void *uaddr) 734{ 735 uint32_t ret; 736 737 if ((ret = fasttrap_fuword32(uaddr)) != -1) 738 return (ret); 739 740 return (0); 741} 742#endif 743 744static void 745fasttrap_return_common(struct reg *rp, uintptr_t pc, pid_t pid, 746 uintptr_t new_pc) 747{ 748 fasttrap_tracepoint_t *tp; 749 fasttrap_bucket_t *bucket; 750 fasttrap_id_t *id; 751#ifdef illumos 752 kmutex_t *pid_mtx; 753 754 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; 755 mutex_enter(pid_mtx); 756#else 757 struct rm_priotracker tracker; 758 759 rm_rlock(&fasttrap_tp_lock, &tracker); 760#endif 761 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; 762 763 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { 764 if (pid == tp->ftt_pid && pc == tp->ftt_pc && 765 tp->ftt_proc->ftpc_acount != 0) 766 break; 767 } 768 769 /* 770 * Don't sweat it if we can't find the tracepoint again; unlike 771 * when we're in fasttrap_pid_probe(), finding the tracepoint here 772 * is not essential to the correct execution of the process. 773 */ 774 if (tp == NULL) { 775#ifdef illumos 776 mutex_exit(pid_mtx); 777#else 778 rm_runlock(&fasttrap_tp_lock, &tracker); 779#endif 780 return; 781 } 782 783 for (id = tp->ftt_retids; id != NULL; id = id->fti_next) { 784 /* 785 * If there's a branch that could act as a return site, we 786 * need to trace it, and check here if the program counter is 787 * external to the function. 788 */ 789 if (tp->ftt_type != FASTTRAP_T_RET && 790 tp->ftt_type != FASTTRAP_T_RET16 && 791 new_pc - id->fti_probe->ftp_faddr < 792 id->fti_probe->ftp_fsize) 793 continue; 794 795 dtrace_probe(id->fti_probe->ftp_id, 796 pc - id->fti_probe->ftp_faddr, 797 rp->r_rax, rp->r_rbx, 0, 0); 798 } 799 800#ifdef illumos 801 mutex_exit(pid_mtx); 802#else 803 rm_runlock(&fasttrap_tp_lock, &tracker); 804#endif 805} 806 807static void 808fasttrap_sigsegv(proc_t *p, kthread_t *t, uintptr_t addr) 809{ 810#ifdef illumos 811 sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 812 813 sqp->sq_info.si_signo = SIGSEGV; 814 sqp->sq_info.si_code = SEGV_MAPERR; 815 sqp->sq_info.si_addr = (caddr_t)addr; 816 817 mutex_enter(&p->p_lock); 818 sigaddqa(p, t, sqp); 819 mutex_exit(&p->p_lock); 820 821 if (t != NULL) 822 aston(t); 823#else 824 ksiginfo_t *ksi = kmem_zalloc(sizeof (ksiginfo_t), KM_SLEEP); 825 826 ksiginfo_init(ksi); 827 ksi->ksi_signo = SIGSEGV; 828 ksi->ksi_code = SEGV_MAPERR; 829 ksi->ksi_addr = (caddr_t)addr; 830 (void) tdksignal(t, SIGSEGV, ksi); 831#endif 832} 833 834#ifdef __amd64 835static void 836fasttrap_usdt_args64(fasttrap_probe_t *probe, struct reg *rp, int argc, 837 uintptr_t *argv) 838{ 839 int i, x, cap = MIN(argc, probe->ftp_nargs); 840 uintptr_t *stack = (uintptr_t *)rp->r_rsp; 841 842 for (i = 0; i < cap; i++) { 843 x = probe->ftp_argmap[i]; 844 845 if (x < 6) 846 argv[i] = (&rp->r_rdi)[x]; 847 else 848 argv[i] = fasttrap_fulword_noerr(&stack[x]); 849 } 850 851 for (; i < argc; i++) { 852 argv[i] = 0; 853 } 854} 855#endif 856 857#ifdef __i386__ 858static void 859fasttrap_usdt_args32(fasttrap_probe_t *probe, struct reg *rp, int argc, 860 uint32_t *argv) 861{ 862 int i, x, cap = MIN(argc, probe->ftp_nargs); 863 uint32_t *stack = (uint32_t *)rp->r_rsp; 864 865 for (i = 0; i < cap; i++) { 866 x = probe->ftp_argmap[i]; 867 868 argv[i] = fasttrap_fuword32_noerr(&stack[x]); 869 } 870 871 for (; i < argc; i++) { 872 argv[i] = 0; 873 } 874} 875#endif 876 877static int 878fasttrap_do_seg(fasttrap_tracepoint_t *tp, struct reg *rp, uintptr_t *addr) 879{ 880 proc_t *p = curproc; 881#ifdef __i386__ 882 struct segment_descriptor *desc; 883#else 884 struct user_segment_descriptor *desc; 885#endif 886 uint16_t sel = 0, ndx, type; 887 uintptr_t limit; 888 889 switch (tp->ftt_segment) { 890 case FASTTRAP_SEG_CS: 891 sel = rp->r_cs; 892 break; 893 case FASTTRAP_SEG_DS: 894 sel = rp->r_ds; 895 break; 896 case FASTTRAP_SEG_ES: 897 sel = rp->r_es; 898 break; 899 case FASTTRAP_SEG_FS: 900 sel = rp->r_fs; 901 break; 902 case FASTTRAP_SEG_GS: 903 sel = rp->r_gs; 904 break; 905 case FASTTRAP_SEG_SS: 906 sel = rp->r_ss; 907 break; 908 } 909 910 /* 911 * Make sure the given segment register specifies a user priority 912 * selector rather than a kernel selector. 913 */ 914 if (ISPL(sel) != SEL_UPL) 915 return (-1); 916 917 ndx = IDXSEL(sel); 918 919 /* 920 * Check the bounds and grab the descriptor out of the specified 921 * descriptor table. 922 */ 923 if (ISLDT(sel)) { 924#ifdef __i386__ 925 if (ndx > p->p_md.md_ldt->ldt_len) 926 return (-1); 927 928 desc = (struct segment_descriptor *) 929 p->p_md.md_ldt[ndx].ldt_base; 930#else 931 if (ndx > max_ldt_segment) 932 return (-1); 933 934 desc = (struct user_segment_descriptor *) 935 p->p_md.md_ldt[ndx].ldt_base; 936#endif 937 938 } else { 939 if (ndx >= NGDT) 940 return (-1); 941 942#ifdef __i386__ 943 desc = &gdt[ndx].sd; 944#else 945 desc = &gdt[ndx]; 946#endif 947 } 948 949 /* 950 * The descriptor must have user privilege level and it must be 951 * present in memory. 952 */ 953 if (desc->sd_dpl != SEL_UPL || desc->sd_p != 1) 954 return (-1); 955 956 type = desc->sd_type; 957 958 /* 959 * If the S bit in the type field is not set, this descriptor can 960 * only be used in system context. 961 */ 962 if ((type & 0x10) != 0x10) 963 return (-1); 964 965 limit = USD_GETLIMIT(desc) * (desc->sd_gran ? PAGESIZE : 1); 966 967 if (tp->ftt_segment == FASTTRAP_SEG_CS) { 968 /* 969 * The code/data bit and readable bit must both be set. 970 */ 971 if ((type & 0xa) != 0xa) 972 return (-1); 973 974 if (*addr > limit) 975 return (-1); 976 } else { 977 /* 978 * The code/data bit must be clear. 979 */ 980 if ((type & 0x8) != 0) 981 return (-1); 982 983 /* 984 * If the expand-down bit is clear, we just check the limit as 985 * it would naturally be applied. Otherwise, we need to check 986 * that the address is the range [limit + 1 .. 0xffff] or 987 * [limit + 1 ... 0xffffffff] depending on if the default 988 * operand size bit is set. 989 */ 990 if ((type & 0x4) == 0) { 991 if (*addr > limit) 992 return (-1); 993 } else if (desc->sd_def32) { 994 if (*addr < limit + 1 || 0xffff < *addr) 995 return (-1); 996 } else { 997 if (*addr < limit + 1 || 0xffffffff < *addr) 998 return (-1); 999 } 1000 } 1001 1002 *addr += USD_GETBASE(desc); 1003 1004 return (0); 1005} 1006 1007int 1008fasttrap_pid_probe(struct reg *rp) 1009{ 1010 proc_t *p = curproc; 1011#ifndef illumos 1012 struct rm_priotracker tracker; 1013 proc_t *pp; 1014#endif 1015 uintptr_t pc = rp->r_rip - 1; 1016 uintptr_t new_pc = 0; 1017 fasttrap_bucket_t *bucket; 1018#ifdef illumos 1019 kmutex_t *pid_mtx; 1020#endif 1021 fasttrap_tracepoint_t *tp, tp_local; 1022 pid_t pid; 1023 dtrace_icookie_t cookie; 1024 uint_t is_enabled = 0; 1025 1026 /* 1027 * It's possible that a user (in a veritable orgy of bad planning) 1028 * could redirect this thread's flow of control before it reached the 1029 * return probe fasttrap. In this case we need to kill the process 1030 * since it's in a unrecoverable state. 1031 */ 1032 if (curthread->t_dtrace_step) { 1033 ASSERT(curthread->t_dtrace_on); 1034 fasttrap_sigtrap(p, curthread, pc); 1035 return (0); 1036 } 1037 1038 /* 1039 * Clear all user tracing flags. 1040 */ 1041 curthread->t_dtrace_ft = 0; 1042 curthread->t_dtrace_pc = 0; 1043 curthread->t_dtrace_npc = 0; 1044 curthread->t_dtrace_scrpc = 0; 1045 curthread->t_dtrace_astpc = 0; 1046#ifdef __amd64 1047 curthread->t_dtrace_regv = 0; 1048#endif 1049 1050 /* 1051 * Treat a child created by a call to vfork(2) as if it were its 1052 * parent. We know that there's only one thread of control in such a 1053 * process: this one. 1054 */ 1055#ifdef illumos 1056 while (p->p_flag & SVFORK) { 1057 p = p->p_parent; 1058 } 1059 1060 pid = p->p_pid; 1061 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; 1062 mutex_enter(pid_mtx); 1063#else 1064 pp = p; 1065 sx_slock(&proctree_lock); 1066 while (pp->p_vmspace == pp->p_pptr->p_vmspace) 1067 pp = pp->p_pptr; 1068 pid = pp->p_pid; 1069 sx_sunlock(&proctree_lock); 1070 pp = NULL; 1071 1072 rm_rlock(&fasttrap_tp_lock, &tracker); 1073#endif 1074 1075 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; 1076 1077 /* 1078 * Lookup the tracepoint that the process just hit. 1079 */ 1080 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { 1081 if (pid == tp->ftt_pid && pc == tp->ftt_pc && 1082 tp->ftt_proc->ftpc_acount != 0) 1083 break; 1084 } 1085 1086 /* 1087 * If we couldn't find a matching tracepoint, either a tracepoint has 1088 * been inserted without using the pid<pid> ioctl interface (see 1089 * fasttrap_ioctl), or somehow we have mislaid this tracepoint. 1090 */ 1091 if (tp == NULL) { 1092#ifdef illumos 1093 mutex_exit(pid_mtx); 1094#else 1095 rm_runlock(&fasttrap_tp_lock, &tracker); 1096#endif 1097 return (-1); 1098 } 1099 1100 /* 1101 * Set the program counter to the address of the traced instruction 1102 * so that it looks right in ustack() output. 1103 */ 1104 rp->r_rip = pc; 1105 1106 if (tp->ftt_ids != NULL) { 1107 fasttrap_id_t *id; 1108 1109#ifdef __amd64 1110 if (p->p_model == DATAMODEL_LP64) { 1111 for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { 1112 fasttrap_probe_t *probe = id->fti_probe; 1113 1114 if (id->fti_ptype == DTFTP_ENTRY) { 1115 /* 1116 * We note that this was an entry 1117 * probe to help ustack() find the 1118 * first caller. 1119 */ 1120 cookie = dtrace_interrupt_disable(); 1121 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); 1122 dtrace_probe(probe->ftp_id, rp->r_rdi, 1123 rp->r_rsi, rp->r_rdx, rp->r_rcx, 1124 rp->r_r8); 1125 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); 1126 dtrace_interrupt_enable(cookie); 1127 } else if (id->fti_ptype == DTFTP_IS_ENABLED) { 1128 /* 1129 * Note that in this case, we don't 1130 * call dtrace_probe() since it's only 1131 * an artificial probe meant to change 1132 * the flow of control so that it 1133 * encounters the true probe. 1134 */ 1135 is_enabled = 1; 1136 } else if (probe->ftp_argmap == NULL) { 1137 dtrace_probe(probe->ftp_id, rp->r_rdi, 1138 rp->r_rsi, rp->r_rdx, rp->r_rcx, 1139 rp->r_r8); 1140 } else { 1141 uintptr_t t[5]; 1142 1143 fasttrap_usdt_args64(probe, rp, 1144 sizeof (t) / sizeof (t[0]), t); 1145 1146 dtrace_probe(probe->ftp_id, t[0], t[1], 1147 t[2], t[3], t[4]); 1148 } 1149 } 1150 } else { 1151#else /* __amd64 */ 1152 uintptr_t s0, s1, s2, s3, s4, s5; 1153 uint32_t *stack = (uint32_t *)rp->r_esp; 1154 1155 /* 1156 * In 32-bit mode, all arguments are passed on the 1157 * stack. If this is a function entry probe, we need 1158 * to skip the first entry on the stack as it 1159 * represents the return address rather than a 1160 * parameter to the function. 1161 */ 1162 s0 = fasttrap_fuword32_noerr(&stack[0]); 1163 s1 = fasttrap_fuword32_noerr(&stack[1]); 1164 s2 = fasttrap_fuword32_noerr(&stack[2]); 1165 s3 = fasttrap_fuword32_noerr(&stack[3]); 1166 s4 = fasttrap_fuword32_noerr(&stack[4]); 1167 s5 = fasttrap_fuword32_noerr(&stack[5]); 1168 1169 for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { 1170 fasttrap_probe_t *probe = id->fti_probe; 1171 1172 if (id->fti_ptype == DTFTP_ENTRY) { 1173 /* 1174 * We note that this was an entry 1175 * probe to help ustack() find the 1176 * first caller. 1177 */ 1178 cookie = dtrace_interrupt_disable(); 1179 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); 1180 dtrace_probe(probe->ftp_id, s1, s2, 1181 s3, s4, s5); 1182 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); 1183 dtrace_interrupt_enable(cookie); 1184 } else if (id->fti_ptype == DTFTP_IS_ENABLED) { 1185 /* 1186 * Note that in this case, we don't 1187 * call dtrace_probe() since it's only 1188 * an artificial probe meant to change 1189 * the flow of control so that it 1190 * encounters the true probe. 1191 */ 1192 is_enabled = 1; 1193 } else if (probe->ftp_argmap == NULL) { 1194 dtrace_probe(probe->ftp_id, s0, s1, 1195 s2, s3, s4); 1196 } else { 1197 uint32_t t[5]; 1198 1199 fasttrap_usdt_args32(probe, rp, 1200 sizeof (t) / sizeof (t[0]), t); 1201 1202 dtrace_probe(probe->ftp_id, t[0], t[1], 1203 t[2], t[3], t[4]); 1204 } 1205 } 1206#endif /* __amd64 */ 1207#ifdef __amd64 1208 } 1209#endif 1210 } 1211 1212 /* 1213 * We're about to do a bunch of work so we cache a local copy of 1214 * the tracepoint to emulate the instruction, and then find the 1215 * tracepoint again later if we need to light up any return probes. 1216 */ 1217 tp_local = *tp; 1218#ifdef illumos 1219 mutex_exit(pid_mtx); 1220#else 1221 rm_runlock(&fasttrap_tp_lock, &tracker); 1222#endif 1223 tp = &tp_local; 1224 1225 /* 1226 * Set the program counter to appear as though the traced instruction 1227 * had completely executed. This ensures that fasttrap_getreg() will 1228 * report the expected value for REG_RIP. 1229 */ 1230 rp->r_rip = pc + tp->ftt_size; 1231 1232 /* 1233 * If there's an is-enabled probe connected to this tracepoint it 1234 * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax' 1235 * instruction that was placed there by DTrace when the binary was 1236 * linked. As this probe is, in fact, enabled, we need to stuff 1 1237 * into %eax or %rax. Accordingly, we can bypass all the instruction 1238 * emulation logic since we know the inevitable result. It's possible 1239 * that a user could construct a scenario where the 'is-enabled' 1240 * probe was on some other instruction, but that would be a rather 1241 * exotic way to shoot oneself in the foot. 1242 */ 1243 if (is_enabled) { 1244 rp->r_rax = 1; 1245 new_pc = rp->r_rip; 1246 goto done; 1247 } 1248 1249 /* 1250 * We emulate certain types of instructions to ensure correctness 1251 * (in the case of position dependent instructions) or optimize 1252 * common cases. The rest we have the thread execute back in user- 1253 * land. 1254 */ 1255 switch (tp->ftt_type) { 1256 case FASTTRAP_T_RET: 1257 case FASTTRAP_T_RET16: 1258 { 1259 uintptr_t dst = 0; 1260 uintptr_t addr = 0; 1261 int ret = 0; 1262 1263 /* 1264 * We have to emulate _every_ facet of the behavior of a ret 1265 * instruction including what happens if the load from %esp 1266 * fails; in that case, we send a SIGSEGV. 1267 */ 1268#ifdef __amd64 1269 if (p->p_model == DATAMODEL_NATIVE) { 1270 ret = dst = fasttrap_fulword((void *)rp->r_rsp); 1271 addr = rp->r_rsp + sizeof (uintptr_t); 1272 } else { 1273#endif 1274#ifdef __i386__ 1275 uint32_t dst32; 1276 ret = dst32 = fasttrap_fuword32((void *)rp->r_esp); 1277 dst = dst32; 1278 addr = rp->r_esp + sizeof (uint32_t); 1279#endif 1280#ifdef __amd64 1281 } 1282#endif 1283 1284 if (ret == -1) { 1285 fasttrap_sigsegv(p, curthread, rp->r_rsp); 1286 new_pc = pc; 1287 break; 1288 } 1289 1290 if (tp->ftt_type == FASTTRAP_T_RET16) 1291 addr += tp->ftt_dest; 1292 1293 rp->r_rsp = addr; 1294 new_pc = dst; 1295 break; 1296 } 1297 1298 case FASTTRAP_T_JCC: 1299 { 1300 uint_t taken = 0; 1301 1302 switch (tp->ftt_code) { 1303 case FASTTRAP_JO: 1304 taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) != 0; 1305 break; 1306 case FASTTRAP_JNO: 1307 taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0; 1308 break; 1309 case FASTTRAP_JB: 1310 taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0; 1311 break; 1312 case FASTTRAP_JAE: 1313 taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0; 1314 break; 1315 case FASTTRAP_JE: 1316 taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0; 1317 break; 1318 case FASTTRAP_JNE: 1319 taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0; 1320 break; 1321 case FASTTRAP_JBE: 1322 taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0 || 1323 (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0; 1324 break; 1325 case FASTTRAP_JA: 1326 taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0 && 1327 (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0; 1328 break; 1329 case FASTTRAP_JS: 1330 taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) != 0; 1331 break; 1332 case FASTTRAP_JNS: 1333 taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0; 1334 break; 1335 case FASTTRAP_JP: 1336 taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) != 0; 1337 break; 1338 case FASTTRAP_JNP: 1339 taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) == 0; 1340 break; 1341 case FASTTRAP_JL: 1342 taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) != 1343 ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0); 1344 break; 1345 case FASTTRAP_JGE: 1346 taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) == 1347 ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0); 1348 break; 1349 case FASTTRAP_JLE: 1350 taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 || 1351 ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) != 1352 ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0); 1353 break; 1354 case FASTTRAP_JG: 1355 taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 && 1356 ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) == 1357 ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0); 1358 break; 1359 1360 } 1361 1362 if (taken) 1363 new_pc = tp->ftt_dest; 1364 else 1365 new_pc = pc + tp->ftt_size; 1366 break; 1367 } 1368 1369 case FASTTRAP_T_LOOP: 1370 { 1371 uint_t taken = 0; 1372#ifdef __amd64 1373 greg_t cx = rp->r_rcx--; 1374#else 1375 greg_t cx = rp->r_ecx--; 1376#endif 1377 1378 switch (tp->ftt_code) { 1379 case FASTTRAP_LOOPNZ: 1380 taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 && 1381 cx != 0; 1382 break; 1383 case FASTTRAP_LOOPZ: 1384 taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 && 1385 cx != 0; 1386 break; 1387 case FASTTRAP_LOOP: 1388 taken = (cx != 0); 1389 break; 1390 } 1391 1392 if (taken) 1393 new_pc = tp->ftt_dest; 1394 else 1395 new_pc = pc + tp->ftt_size; 1396 break; 1397 } 1398 1399 case FASTTRAP_T_JCXZ: 1400 { 1401#ifdef __amd64 1402 greg_t cx = rp->r_rcx; 1403#else 1404 greg_t cx = rp->r_ecx; 1405#endif 1406 1407 if (cx == 0) 1408 new_pc = tp->ftt_dest; 1409 else 1410 new_pc = pc + tp->ftt_size; 1411 break; 1412 } 1413 1414 case FASTTRAP_T_PUSHL_EBP: 1415 { 1416 int ret = 0; 1417 1418#ifdef __amd64 1419 if (p->p_model == DATAMODEL_NATIVE) { 1420 rp->r_rsp -= sizeof (uintptr_t); 1421 ret = fasttrap_sulword((void *)rp->r_rsp, rp->r_rbp); 1422 } else { 1423#endif 1424#ifdef __i386__ 1425 rp->r_rsp -= sizeof (uint32_t); 1426 ret = fasttrap_suword32((void *)rp->r_rsp, rp->r_rbp); 1427#endif 1428#ifdef __amd64 1429 } 1430#endif 1431 1432 if (ret == -1) { 1433 fasttrap_sigsegv(p, curthread, rp->r_rsp); 1434 new_pc = pc; 1435 break; 1436 } 1437 1438 new_pc = pc + tp->ftt_size; 1439 break; 1440 } 1441 1442 case FASTTRAP_T_NOP: 1443 new_pc = pc + tp->ftt_size; 1444 break; 1445 1446 case FASTTRAP_T_JMP: 1447 case FASTTRAP_T_CALL: 1448 if (tp->ftt_code == 0) { 1449 new_pc = tp->ftt_dest; 1450 } else { 1451 uintptr_t value, addr = tp->ftt_dest; 1452 1453 if (tp->ftt_base != FASTTRAP_NOREG) 1454 addr += fasttrap_getreg(rp, tp->ftt_base); 1455 if (tp->ftt_index != FASTTRAP_NOREG) 1456 addr += fasttrap_getreg(rp, tp->ftt_index) << 1457 tp->ftt_scale; 1458 1459 if (tp->ftt_code == 1) { 1460 /* 1461 * If there's a segment prefix for this 1462 * instruction, we'll need to check permissions 1463 * and bounds on the given selector, and adjust 1464 * the address accordingly. 1465 */ 1466 if (tp->ftt_segment != FASTTRAP_SEG_NONE && 1467 fasttrap_do_seg(tp, rp, &addr) != 0) { 1468 fasttrap_sigsegv(p, curthread, addr); 1469 new_pc = pc; 1470 break; 1471 } 1472 1473#ifdef __amd64 1474 if (p->p_model == DATAMODEL_NATIVE) { 1475#endif 1476 if ((value = fasttrap_fulword((void *)addr)) 1477 == -1) { 1478 fasttrap_sigsegv(p, curthread, 1479 addr); 1480 new_pc = pc; 1481 break; 1482 } 1483 new_pc = value; 1484#ifdef __amd64 1485 } else { 1486 uint32_t value32; 1487 addr = (uintptr_t)(uint32_t)addr; 1488 if ((value32 = fasttrap_fuword32((void *)addr)) 1489 == -1) { 1490 fasttrap_sigsegv(p, curthread, 1491 addr); 1492 new_pc = pc; 1493 break; 1494 } 1495 new_pc = value32; 1496 } 1497#endif 1498 } else { 1499 new_pc = addr; 1500 } 1501 } 1502 1503 /* 1504 * If this is a call instruction, we need to push the return 1505 * address onto the stack. If this fails, we send the process 1506 * a SIGSEGV and reset the pc to emulate what would happen if 1507 * this instruction weren't traced. 1508 */ 1509 if (tp->ftt_type == FASTTRAP_T_CALL) { 1510 int ret = 0; 1511 uintptr_t addr = 0, pcps; 1512#ifdef __amd64 1513 if (p->p_model == DATAMODEL_NATIVE) { 1514 addr = rp->r_rsp - sizeof (uintptr_t); 1515 pcps = pc + tp->ftt_size; 1516 ret = fasttrap_sulword((void *)addr, pcps); 1517 } else { 1518#endif 1519 addr = rp->r_rsp - sizeof (uint32_t); 1520 pcps = (uint32_t)(pc + tp->ftt_size); 1521 ret = fasttrap_suword32((void *)addr, pcps); 1522#ifdef __amd64 1523 } 1524#endif 1525 1526 if (ret == -1) { 1527 fasttrap_sigsegv(p, curthread, addr); 1528 new_pc = pc; 1529 break; 1530 } 1531 1532 rp->r_rsp = addr; 1533 } 1534 1535 break; 1536 1537 case FASTTRAP_T_COMMON: 1538 { 1539 uintptr_t addr; 1540#if defined(__amd64) 1541 uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22]; 1542#else 1543 uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 7]; 1544#endif 1545 uint_t i = 0; 1546#ifdef illumos 1547 klwp_t *lwp = ttolwp(curthread); 1548 1549 /* 1550 * Compute the address of the ulwp_t and step over the 1551 * ul_self pointer. The method used to store the user-land 1552 * thread pointer is very different on 32- and 64-bit 1553 * kernels. 1554 */ 1555#if defined(__amd64) 1556 if (p->p_model == DATAMODEL_LP64) { 1557 addr = lwp->lwp_pcb.pcb_fsbase; 1558 addr += sizeof (void *); 1559 } else { 1560 addr = lwp->lwp_pcb.pcb_gsbase; 1561 addr += sizeof (caddr32_t); 1562 } 1563#else 1564 addr = USD_GETBASE(&lwp->lwp_pcb.pcb_gsdesc); 1565 addr += sizeof (void *); 1566#endif 1567#else /* !illumos */ 1568 fasttrap_scrspace_t *scrspace; 1569 scrspace = fasttrap_scraddr(curthread, tp->ftt_proc); 1570 if (scrspace == NULL) { 1571 /* 1572 * We failed to allocate scratch space for this thread. 1573 * Try to write the original instruction back out and 1574 * reset the pc. 1575 */ 1576 if (fasttrap_copyout(tp->ftt_instr, (void *)pc, 1577 tp->ftt_size)) 1578 fasttrap_sigtrap(p, curthread, pc); 1579 new_pc = pc; 1580 break; 1581 } 1582 addr = scrspace->ftss_addr; 1583#endif /* illumos */ 1584 1585 /* 1586 * Generic Instruction Tracing 1587 * --------------------------- 1588 * 1589 * This is the layout of the scratch space in the user-land 1590 * thread structure for our generated instructions. 1591 * 1592 * 32-bit mode bytes 1593 * ------------------------ ----- 1594 * a: <original instruction> <= 15 1595 * jmp <pc + tp->ftt_size> 5 1596 * b: <original instruction> <= 15 1597 * int T_DTRACE_RET 2 1598 * ----- 1599 * <= 37 1600 * 1601 * 64-bit mode bytes 1602 * ------------------------ ----- 1603 * a: <original instruction> <= 15 1604 * jmp 0(%rip) 6 1605 * <pc + tp->ftt_size> 8 1606 * b: <original instruction> <= 15 1607 * int T_DTRACE_RET 2 1608 * ----- 1609 * <= 46 1610 * 1611 * The %pc is set to a, and curthread->t_dtrace_astpc is set 1612 * to b. If we encounter a signal on the way out of the 1613 * kernel, trap() will set %pc to curthread->t_dtrace_astpc 1614 * so that we execute the original instruction and re-enter 1615 * the kernel rather than redirecting to the next instruction. 1616 * 1617 * If there are return probes (so we know that we're going to 1618 * need to reenter the kernel after executing the original 1619 * instruction), the scratch space will just contain the 1620 * original instruction followed by an interrupt -- the same 1621 * data as at b. 1622 * 1623 * %rip-relative Addressing 1624 * ------------------------ 1625 * 1626 * There's a further complication in 64-bit mode due to %rip- 1627 * relative addressing. While this is clearly a beneficial 1628 * architectural decision for position independent code, it's 1629 * hard not to see it as a personal attack against the pid 1630 * provider since before there was a relatively small set of 1631 * instructions to emulate; with %rip-relative addressing, 1632 * almost every instruction can potentially depend on the 1633 * address at which it's executed. Rather than emulating 1634 * the broad spectrum of instructions that can now be 1635 * position dependent, we emulate jumps and others as in 1636 * 32-bit mode, and take a different tack for instructions 1637 * using %rip-relative addressing. 1638 * 1639 * For every instruction that uses the ModRM byte, the 1640 * in-kernel disassembler reports its location. We use the 1641 * ModRM byte to identify that an instruction uses 1642 * %rip-relative addressing and to see what other registers 1643 * the instruction uses. To emulate those instructions, 1644 * we modify the instruction to be %rax-relative rather than 1645 * %rip-relative (or %rcx-relative if the instruction uses 1646 * %rax; or %r8- or %r9-relative if the REX.B is present so 1647 * we don't have to rewrite the REX prefix). We then load 1648 * the value that %rip would have been into the scratch 1649 * register and generate an instruction to reset the scratch 1650 * register back to its original value. The instruction 1651 * sequence looks like this: 1652 * 1653 * 64-mode %rip-relative bytes 1654 * ------------------------ ----- 1655 * a: <modified instruction> <= 15 1656 * movq $<value>, %<scratch> 6 1657 * jmp 0(%rip) 6 1658 * <pc + tp->ftt_size> 8 1659 * b: <modified instruction> <= 15 1660 * int T_DTRACE_RET 2 1661 * ----- 1662 * 52 1663 * 1664 * We set curthread->t_dtrace_regv so that upon receiving 1665 * a signal we can reset the value of the scratch register. 1666 */ 1667 1668 ASSERT(tp->ftt_size < FASTTRAP_MAX_INSTR_SIZE); 1669 1670 curthread->t_dtrace_scrpc = addr; 1671 bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); 1672 i += tp->ftt_size; 1673 1674#ifdef __amd64 1675 if (tp->ftt_ripmode != 0) { 1676 greg_t *reg = NULL; 1677 1678 ASSERT(p->p_model == DATAMODEL_LP64); 1679 ASSERT(tp->ftt_ripmode & 1680 (FASTTRAP_RIP_1 | FASTTRAP_RIP_2)); 1681 1682 /* 1683 * If this was a %rip-relative instruction, we change 1684 * it to be either a %rax- or %rcx-relative 1685 * instruction (depending on whether those registers 1686 * are used as another operand; or %r8- or %r9- 1687 * relative depending on the value of REX.B). We then 1688 * set that register and generate a movq instruction 1689 * to reset the value. 1690 */ 1691 if (tp->ftt_ripmode & FASTTRAP_RIP_X) 1692 scratch[i++] = FASTTRAP_REX(1, 0, 0, 1); 1693 else 1694 scratch[i++] = FASTTRAP_REX(1, 0, 0, 0); 1695 1696 if (tp->ftt_ripmode & FASTTRAP_RIP_1) 1697 scratch[i++] = FASTTRAP_MOV_EAX; 1698 else 1699 scratch[i++] = FASTTRAP_MOV_ECX; 1700 1701 switch (tp->ftt_ripmode) { 1702 case FASTTRAP_RIP_1: 1703 reg = &rp->r_rax; 1704 curthread->t_dtrace_reg = REG_RAX; 1705 break; 1706 case FASTTRAP_RIP_2: 1707 reg = &rp->r_rcx; 1708 curthread->t_dtrace_reg = REG_RCX; 1709 break; 1710 case FASTTRAP_RIP_1 | FASTTRAP_RIP_X: 1711 reg = &rp->r_r8; 1712 curthread->t_dtrace_reg = REG_R8; 1713 break; 1714 case FASTTRAP_RIP_2 | FASTTRAP_RIP_X: 1715 reg = &rp->r_r9; 1716 curthread->t_dtrace_reg = REG_R9; 1717 break; 1718 } 1719 1720 /* LINTED - alignment */ 1721 *(uint64_t *)&scratch[i] = *reg; 1722 curthread->t_dtrace_regv = *reg; 1723 *reg = pc + tp->ftt_size; 1724 i += sizeof (uint64_t); 1725 } 1726#endif 1727 1728 /* 1729 * Generate the branch instruction to what would have 1730 * normally been the subsequent instruction. In 32-bit mode, 1731 * this is just a relative branch; in 64-bit mode this is a 1732 * %rip-relative branch that loads the 64-bit pc value 1733 * immediately after the jmp instruction. 1734 */ 1735#ifdef __amd64 1736 if (p->p_model == DATAMODEL_LP64) { 1737 scratch[i++] = FASTTRAP_GROUP5_OP; 1738 scratch[i++] = FASTTRAP_MODRM(0, 4, 5); 1739 /* LINTED - alignment */ 1740 *(uint32_t *)&scratch[i] = 0; 1741 i += sizeof (uint32_t); 1742 /* LINTED - alignment */ 1743 *(uint64_t *)&scratch[i] = pc + tp->ftt_size; 1744 i += sizeof (uint64_t); 1745 } else { 1746#endif 1747#ifdef __i386__ 1748 /* 1749 * Set up the jmp to the next instruction; note that 1750 * the size of the traced instruction cancels out. 1751 */ 1752 scratch[i++] = FASTTRAP_JMP32; 1753 /* LINTED - alignment */ 1754 *(uint32_t *)&scratch[i] = pc - addr - 5; 1755 i += sizeof (uint32_t); 1756#endif 1757#ifdef __amd64 1758 } 1759#endif 1760 1761 curthread->t_dtrace_astpc = addr + i; 1762 bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); 1763 i += tp->ftt_size; 1764 scratch[i++] = FASTTRAP_INT; 1765 scratch[i++] = T_DTRACE_RET; 1766 1767 ASSERT(i <= sizeof (scratch)); 1768 1769#ifdef illumos 1770 if (fasttrap_copyout(scratch, (char *)addr, i)) { 1771#else 1772 if (uwrite(p, scratch, i, addr)) { 1773#endif 1774 fasttrap_sigtrap(p, curthread, pc); 1775 new_pc = pc; 1776 break; 1777 } 1778 if (tp->ftt_retids != NULL) { 1779 curthread->t_dtrace_step = 1; 1780 curthread->t_dtrace_ret = 1; 1781 new_pc = curthread->t_dtrace_astpc; 1782 } else { 1783 new_pc = curthread->t_dtrace_scrpc; 1784 } 1785 1786 curthread->t_dtrace_pc = pc; 1787 curthread->t_dtrace_npc = pc + tp->ftt_size; 1788 curthread->t_dtrace_on = 1; 1789 break; 1790 } 1791 1792 default: 1793 panic("fasttrap: mishandled an instruction"); 1794 } 1795 1796done: 1797 /* 1798 * If there were no return probes when we first found the tracepoint, 1799 * we should feel no obligation to honor any return probes that were 1800 * subsequently enabled -- they'll just have to wait until the next 1801 * time around. 1802 */ 1803 if (tp->ftt_retids != NULL) { 1804 /* 1805 * We need to wait until the results of the instruction are 1806 * apparent before invoking any return probes. If this 1807 * instruction was emulated we can just call 1808 * fasttrap_return_common(); if it needs to be executed, we 1809 * need to wait until the user thread returns to the kernel. 1810 */ 1811 if (tp->ftt_type != FASTTRAP_T_COMMON) { 1812 /* 1813 * Set the program counter to the address of the traced 1814 * instruction so that it looks right in ustack() 1815 * output. We had previously set it to the end of the 1816 * instruction to simplify %rip-relative addressing. 1817 */ 1818 rp->r_rip = pc; 1819 1820 fasttrap_return_common(rp, pc, pid, new_pc); 1821 } else { 1822 ASSERT(curthread->t_dtrace_ret != 0); 1823 ASSERT(curthread->t_dtrace_pc == pc); 1824 ASSERT(curthread->t_dtrace_scrpc != 0); 1825 ASSERT(new_pc == curthread->t_dtrace_astpc); 1826 } 1827 } 1828 1829 rp->r_rip = new_pc; 1830 1831#ifndef illumos 1832 PROC_LOCK(p); 1833 proc_write_regs(curthread, rp); 1834 PROC_UNLOCK(p); 1835#endif 1836 1837 return (0); 1838} 1839 1840int 1841fasttrap_return_probe(struct reg *rp) 1842{ 1843 proc_t *p = curproc; 1844 uintptr_t pc = curthread->t_dtrace_pc; 1845 uintptr_t npc = curthread->t_dtrace_npc; 1846 1847 curthread->t_dtrace_pc = 0; 1848 curthread->t_dtrace_npc = 0; 1849 curthread->t_dtrace_scrpc = 0; 1850 curthread->t_dtrace_astpc = 0; 1851 1852#ifdef illumos 1853 /* 1854 * Treat a child created by a call to vfork(2) as if it were its 1855 * parent. We know that there's only one thread of control in such a 1856 * process: this one. 1857 */ 1858 while (p->p_flag & SVFORK) { 1859 p = p->p_parent; 1860 } 1861#endif 1862 1863 /* 1864 * We set rp->r_rip to the address of the traced instruction so 1865 * that it appears to dtrace_probe() that we're on the original 1866 * instruction, and so that the user can't easily detect our 1867 * complex web of lies. dtrace_return_probe() (our caller) 1868 * will correctly set %pc after we return. 1869 */ 1870 rp->r_rip = pc; 1871 1872 fasttrap_return_common(rp, pc, p->p_pid, npc); 1873 1874 return (0); 1875} 1876 1877/*ARGSUSED*/ 1878uint64_t 1879fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno, 1880 int aframes) 1881{ 1882 struct reg r; 1883 1884 fill_regs(curthread, &r); 1885 1886 return (fasttrap_anarg(&r, 1, argno)); 1887} 1888 1889/*ARGSUSED*/ 1890uint64_t 1891fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, 1892 int aframes) 1893{ 1894 struct reg r; 1895 1896 fill_regs(curthread, &r); 1897 1898 return (fasttrap_anarg(&r, 0, argno)); 1899} 1900 1901static ulong_t 1902fasttrap_getreg(struct reg *rp, uint_t reg) 1903{ 1904#ifdef __amd64 1905 switch (reg) { 1906 case REG_R15: return (rp->r_r15); 1907 case REG_R14: return (rp->r_r14); 1908 case REG_R13: return (rp->r_r13); 1909 case REG_R12: return (rp->r_r12); 1910 case REG_R11: return (rp->r_r11); 1911 case REG_R10: return (rp->r_r10); 1912 case REG_R9: return (rp->r_r9); 1913 case REG_R8: return (rp->r_r8); 1914 case REG_RDI: return (rp->r_rdi); 1915 case REG_RSI: return (rp->r_rsi); 1916 case REG_RBP: return (rp->r_rbp); 1917 case REG_RBX: return (rp->r_rbx); 1918 case REG_RDX: return (rp->r_rdx); 1919 case REG_RCX: return (rp->r_rcx); 1920 case REG_RAX: return (rp->r_rax); 1921 case REG_TRAPNO: return (rp->r_trapno); 1922 case REG_ERR: return (rp->r_err); 1923 case REG_RIP: return (rp->r_rip); 1924 case REG_CS: return (rp->r_cs); 1925#ifdef illumos 1926 case REG_RFL: return (rp->r_rfl); 1927#endif 1928 case REG_RSP: return (rp->r_rsp); 1929 case REG_SS: return (rp->r_ss); 1930 case REG_FS: return (rp->r_fs); 1931 case REG_GS: return (rp->r_gs); 1932 case REG_DS: return (rp->r_ds); 1933 case REG_ES: return (rp->r_es); 1934 case REG_FSBASE: return (rdmsr(MSR_FSBASE)); 1935 case REG_GSBASE: return (rdmsr(MSR_GSBASE)); 1936 } 1937 1938 panic("dtrace: illegal register constant"); 1939 /*NOTREACHED*/ 1940#else 1941#define _NGREG 19 1942 if (reg >= _NGREG) 1943 panic("dtrace: illegal register constant"); 1944 1945 return (((greg_t *)&rp->r_gs)[reg]); 1946#endif 1947} 1948