trap.c revision 321343
1/*- 2 * Copyright (C) 1994, David Greenman 3 * Copyright (c) 1990, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the University of Utah, and William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 38 */ 39 40#include <sys/cdefs.h> 41__FBSDID("$FreeBSD: stable/11/sys/amd64/amd64/trap.c 321343 2017-07-21 18:06:57Z kib $"); 42 43/* 44 * AMD64 Trap and System call handling 45 */ 46 47#include "opt_clock.h" 48#include "opt_cpu.h" 49#include "opt_hwpmc_hooks.h" 50#include "opt_isa.h" 51#include "opt_kdb.h" 52#include "opt_stack.h" 53 54#include <sys/param.h> 55#include <sys/bus.h> 56#include <sys/systm.h> 57#include <sys/proc.h> 58#include <sys/pioctl.h> 59#include <sys/ptrace.h> 60#include <sys/kdb.h> 61#include <sys/kernel.h> 62#include <sys/ktr.h> 63#include <sys/lock.h> 64#include <sys/mutex.h> 65#include <sys/resourcevar.h> 66#include <sys/signalvar.h> 67#include <sys/syscall.h> 68#include <sys/sysctl.h> 69#include <sys/sysent.h> 70#include <sys/uio.h> 71#include <sys/vmmeter.h> 72#ifdef HWPMC_HOOKS 73#include <sys/pmckern.h> 74PMC_SOFT_DEFINE( , , page_fault, all); 75PMC_SOFT_DEFINE( , , page_fault, read); 76PMC_SOFT_DEFINE( , , page_fault, write); 77#endif 78 79#include <vm/vm.h> 80#include <vm/vm_param.h> 81#include <vm/pmap.h> 82#include <vm/vm_kern.h> 83#include <vm/vm_map.h> 84#include <vm/vm_page.h> 85#include <vm/vm_extern.h> 86 87#include <machine/cpu.h> 88#include <machine/intr_machdep.h> 89#include <x86/mca.h> 90#include <machine/md_var.h> 91#include <machine/pcb.h> 92#ifdef SMP 93#include <machine/smp.h> 94#endif 95#include <machine/stack.h> 96#include <machine/tss.h> 97 98#ifdef KDTRACE_HOOKS 99#include <sys/dtrace_bsd.h> 100#endif 101 102extern void __noinline trap(struct trapframe *frame); 103extern void trap_check(struct trapframe *frame); 104extern void syscall(struct trapframe *frame); 105void dblfault_handler(struct trapframe *frame); 106 107static int trap_pfault(struct trapframe *, int); 108static void trap_fatal(struct trapframe *, vm_offset_t); 109 110#define MAX_TRAP_MSG 32 111static char *trap_msg[] = { 112 "", /* 0 unused */ 113 "privileged instruction fault", /* 1 T_PRIVINFLT */ 114 "", /* 2 unused */ 115 "breakpoint instruction fault", /* 3 T_BPTFLT */ 116 "", /* 4 unused */ 117 "", /* 5 unused */ 118 "arithmetic trap", /* 6 T_ARITHTRAP */ 119 "", /* 7 unused */ 120 "", /* 8 unused */ 121 "general protection fault", /* 9 T_PROTFLT */ 122 "trace trap", /* 10 T_TRCTRAP */ 123 "", /* 11 unused */ 124 "page fault", /* 12 T_PAGEFLT */ 125 "", /* 13 unused */ 126 "alignment fault", /* 14 T_ALIGNFLT */ 127 "", /* 15 unused */ 128 "", /* 16 unused */ 129 "", /* 17 unused */ 130 "integer divide fault", /* 18 T_DIVIDE */ 131 "non-maskable interrupt trap", /* 19 T_NMI */ 132 "overflow trap", /* 20 T_OFLOW */ 133 "FPU bounds check fault", /* 21 T_BOUND */ 134 "FPU device not available", /* 22 T_DNA */ 135 "double fault", /* 23 T_DOUBLEFLT */ 136 "FPU operand fetch fault", /* 24 T_FPOPFLT */ 137 "invalid TSS fault", /* 25 T_TSSFLT */ 138 "segment not present fault", /* 26 T_SEGNPFLT */ 139 "stack fault", /* 27 T_STKFLT */ 140 "machine check trap", /* 28 T_MCHK */ 141 "SIMD floating-point exception", /* 29 T_XMMFLT */ 142 "reserved (unknown) fault", /* 30 T_RESERVED */ 143 "", /* 31 unused (reserved) */ 144 "DTrace pid return trap", /* 32 T_DTRACE_RET */ 145}; 146 147static int prot_fault_translation; 148SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RWTUN, 149 &prot_fault_translation, 0, 150 "Select signal to deliver on protection fault"); 151static int uprintf_signal; 152SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RWTUN, 153 &uprintf_signal, 0, 154 "Print debugging information on trap signal to ctty"); 155 156/* 157 * Exception, fault, and trap interface to the FreeBSD kernel. 158 * This common code is called from assembly language IDT gate entry 159 * routines that prepare a suitable stack frame, and restore this 160 * frame after the exception has been processed. 161 */ 162 163void 164trap(struct trapframe *frame) 165{ 166#ifdef KDTRACE_HOOKS 167 struct reg regs; 168#endif 169 struct thread *td = curthread; 170 struct proc *p = td->td_proc; 171#ifdef KDB 172 register_t dr6; 173#endif 174 int i = 0, ucode = 0; 175 u_int type; 176 register_t addr = 0; 177 ksiginfo_t ksi; 178 179 PCPU_INC(cnt.v_trap); 180 type = frame->tf_trapno; 181 182#ifdef SMP 183 /* Handler for NMI IPIs used for stopping CPUs. */ 184 if (type == T_NMI) { 185 if (ipi_nmi_handler() == 0) 186 goto out; 187 } 188#endif /* SMP */ 189 190#ifdef KDB 191 if (kdb_active) { 192 kdb_reenter(); 193 goto out; 194 } 195#endif 196 197 if (type == T_RESERVED) { 198 trap_fatal(frame, 0); 199 goto out; 200 } 201 202 if (type == T_NMI) { 203#ifdef HWPMC_HOOKS 204 /* 205 * CPU PMCs interrupt using an NMI. If the PMC module is 206 * active, pass the 'rip' value to the PMC module's interrupt 207 * handler. A non-zero return value from the handler means that 208 * the NMI was consumed by it and we can return immediately. 209 */ 210 if (pmc_intr != NULL && 211 (*pmc_intr)(PCPU_GET(cpuid), frame) != 0) 212 goto out; 213#endif 214 215#ifdef STACK 216 if (stack_nmi_handler(frame) != 0) 217 goto out; 218#endif 219 } 220 221 if (type == T_MCHK) { 222 mca_intr(); 223 goto out; 224 } 225 226 if ((frame->tf_rflags & PSL_I) == 0) { 227 /* 228 * Buggy application or kernel code has disabled 229 * interrupts and then trapped. Enabling interrupts 230 * now is wrong, but it is better than running with 231 * interrupts disabled until they are accidentally 232 * enabled later. 233 */ 234 if (TRAPF_USERMODE(frame)) 235 uprintf( 236 "pid %ld (%s): trap %d with interrupts disabled\n", 237 (long)curproc->p_pid, curthread->td_name, type); 238 else if (type != T_NMI && type != T_BPTFLT && 239 type != T_TRCTRAP) { 240 /* 241 * XXX not quite right, since this may be for a 242 * multiple fault in user mode. 243 */ 244 printf("kernel trap %d with interrupts disabled\n", 245 type); 246 247 /* 248 * We shouldn't enable interrupts while holding a 249 * spin lock. 250 */ 251 if (td->td_md.md_spinlock_count == 0) 252 enable_intr(); 253 } 254 } 255 256 if (TRAPF_USERMODE(frame)) { 257 /* user trap */ 258 259 td->td_pticks = 0; 260 td->td_frame = frame; 261 addr = frame->tf_rip; 262 if (td->td_cowgen != p->p_cowgen) 263 thread_cow_update(td); 264 265 switch (type) { 266 case T_PRIVINFLT: /* privileged instruction fault */ 267 i = SIGILL; 268 ucode = ILL_PRVOPC; 269 break; 270 271 case T_BPTFLT: /* bpt instruction fault */ 272 case T_TRCTRAP: /* trace trap */ 273 enable_intr(); 274#ifdef KDTRACE_HOOKS 275 if (type == T_BPTFLT) { 276 fill_frame_regs(frame, ®s); 277 if (dtrace_pid_probe_ptr != NULL && 278 dtrace_pid_probe_ptr(®s) == 0) 279 goto out; 280 } 281#endif 282 frame->tf_rflags &= ~PSL_T; 283 i = SIGTRAP; 284 ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); 285 break; 286 287 case T_ARITHTRAP: /* arithmetic trap */ 288 ucode = fputrap_x87(); 289 if (ucode == -1) 290 goto userout; 291 i = SIGFPE; 292 break; 293 294 case T_PROTFLT: /* general protection fault */ 295 i = SIGBUS; 296 ucode = BUS_OBJERR; 297 break; 298 case T_STKFLT: /* stack fault */ 299 case T_SEGNPFLT: /* segment not present fault */ 300 i = SIGBUS; 301 ucode = BUS_ADRERR; 302 break; 303 case T_TSSFLT: /* invalid TSS fault */ 304 i = SIGBUS; 305 ucode = BUS_OBJERR; 306 break; 307 case T_ALIGNFLT: 308 i = SIGBUS; 309 ucode = BUS_ADRALN; 310 break; 311 case T_DOUBLEFLT: /* double fault */ 312 default: 313 i = SIGBUS; 314 ucode = BUS_OBJERR; 315 break; 316 317 case T_PAGEFLT: /* page fault */ 318 /* 319 * Emulator can take care about this trap? 320 */ 321 if (*p->p_sysent->sv_trap != NULL && 322 (*p->p_sysent->sv_trap)(td) == 0) 323 goto userout; 324 325 addr = frame->tf_addr; 326 i = trap_pfault(frame, TRUE); 327 if (i == -1) 328 goto userout; 329 if (i == 0) 330 goto user; 331 332 if (i == SIGSEGV) 333 ucode = SEGV_MAPERR; 334 else { 335 if (prot_fault_translation == 0) { 336 /* 337 * Autodetect. 338 * This check also covers the images 339 * without the ABI-tag ELF note. 340 */ 341 if (SV_CURPROC_ABI() == SV_ABI_FREEBSD 342 && p->p_osrel >= P_OSREL_SIGSEGV) { 343 i = SIGSEGV; 344 ucode = SEGV_ACCERR; 345 } else { 346 i = SIGBUS; 347 ucode = BUS_PAGE_FAULT; 348 } 349 } else if (prot_fault_translation == 1) { 350 /* 351 * Always compat mode. 352 */ 353 i = SIGBUS; 354 ucode = BUS_PAGE_FAULT; 355 } else { 356 /* 357 * Always SIGSEGV mode. 358 */ 359 i = SIGSEGV; 360 ucode = SEGV_ACCERR; 361 } 362 } 363 break; 364 365 case T_DIVIDE: /* integer divide fault */ 366 ucode = FPE_INTDIV; 367 i = SIGFPE; 368 break; 369 370#ifdef DEV_ISA 371 case T_NMI: 372 nmi_handle_intr(type, frame); 373 break; 374#endif /* DEV_ISA */ 375 376 case T_OFLOW: /* integer overflow fault */ 377 ucode = FPE_INTOVF; 378 i = SIGFPE; 379 break; 380 381 case T_BOUND: /* bounds check fault */ 382 ucode = FPE_FLTSUB; 383 i = SIGFPE; 384 break; 385 386 case T_DNA: 387 /* transparent fault (due to context switch "late") */ 388 KASSERT(PCB_USER_FPU(td->td_pcb), 389 ("kernel FPU ctx has leaked")); 390 fpudna(); 391 goto userout; 392 393 case T_FPOPFLT: /* FPU operand fetch fault */ 394 ucode = ILL_COPROC; 395 i = SIGILL; 396 break; 397 398 case T_XMMFLT: /* SIMD floating-point exception */ 399 ucode = fputrap_sse(); 400 if (ucode == -1) 401 goto userout; 402 i = SIGFPE; 403 break; 404#ifdef KDTRACE_HOOKS 405 case T_DTRACE_RET: 406 enable_intr(); 407 fill_frame_regs(frame, ®s); 408 if (dtrace_return_probe_ptr != NULL && 409 dtrace_return_probe_ptr(®s) == 0) 410 goto out; 411 break; 412#endif 413 } 414 } else { 415 /* kernel trap */ 416 417 KASSERT(cold || td->td_ucred != NULL, 418 ("kernel trap doesn't have ucred")); 419 switch (type) { 420 case T_PAGEFLT: /* page fault */ 421 (void) trap_pfault(frame, FALSE); 422 goto out; 423 424 case T_DNA: 425 if (PCB_USER_FPU(td->td_pcb)) 426 panic("Unregistered use of FPU in kernel"); 427 fpudna(); 428 goto out; 429 430 case T_ARITHTRAP: /* arithmetic trap */ 431 case T_XMMFLT: /* SIMD floating-point exception */ 432 case T_FPOPFLT: /* FPU operand fetch fault */ 433 /* 434 * For now, supporting kernel handler 435 * registration for FPU traps is overkill. 436 */ 437 trap_fatal(frame, 0); 438 goto out; 439 440 case T_STKFLT: /* stack fault */ 441 case T_PROTFLT: /* general protection fault */ 442 case T_SEGNPFLT: /* segment not present fault */ 443 if (td->td_intr_nesting_level != 0) 444 break; 445 446 /* 447 * Invalid segment selectors and out of bounds 448 * %rip's and %rsp's can be set up in user mode. 449 * This causes a fault in kernel mode when the 450 * kernel tries to return to user mode. We want 451 * to get this fault so that we can fix the 452 * problem here and not have to check all the 453 * selectors and pointers when the user changes 454 * them. 455 */ 456 if (frame->tf_rip == (long)doreti_iret) { 457 frame->tf_rip = (long)doreti_iret_fault; 458 goto out; 459 } 460 if (frame->tf_rip == (long)ld_ds) { 461 frame->tf_rip = (long)ds_load_fault; 462 goto out; 463 } 464 if (frame->tf_rip == (long)ld_es) { 465 frame->tf_rip = (long)es_load_fault; 466 goto out; 467 } 468 if (frame->tf_rip == (long)ld_fs) { 469 frame->tf_rip = (long)fs_load_fault; 470 goto out; 471 } 472 if (frame->tf_rip == (long)ld_gs) { 473 frame->tf_rip = (long)gs_load_fault; 474 goto out; 475 } 476 if (frame->tf_rip == (long)ld_gsbase) { 477 frame->tf_rip = (long)gsbase_load_fault; 478 goto out; 479 } 480 if (frame->tf_rip == (long)ld_fsbase) { 481 frame->tf_rip = (long)fsbase_load_fault; 482 goto out; 483 } 484 if (curpcb->pcb_onfault != NULL) { 485 frame->tf_rip = (long)curpcb->pcb_onfault; 486 goto out; 487 } 488 break; 489 490 case T_TSSFLT: 491 /* 492 * PSL_NT can be set in user mode and isn't cleared 493 * automatically when the kernel is entered. This 494 * causes a TSS fault when the kernel attempts to 495 * `iret' because the TSS link is uninitialized. We 496 * want to get this fault so that we can fix the 497 * problem here and not every time the kernel is 498 * entered. 499 */ 500 if (frame->tf_rflags & PSL_NT) { 501 frame->tf_rflags &= ~PSL_NT; 502 goto out; 503 } 504 break; 505 506 case T_TRCTRAP: /* trace trap */ 507 /* 508 * Ignore debug register trace traps due to 509 * accesses in the user's address space, which 510 * can happen under several conditions such as 511 * if a user sets a watchpoint on a buffer and 512 * then passes that buffer to a system call. 513 * We still want to get TRCTRAPS for addresses 514 * in kernel space because that is useful when 515 * debugging the kernel. 516 */ 517 if (user_dbreg_trap()) { 518 /* 519 * Reset breakpoint bits because the 520 * processor doesn't 521 */ 522 load_dr6(rdr6() & ~0xf); 523 goto out; 524 } 525 /* 526 * FALLTHROUGH (TRCTRAP kernel mode, kernel address) 527 */ 528 case T_BPTFLT: 529 /* 530 * If KDB is enabled, let it handle the debugger trap. 531 * Otherwise, debugger traps "can't happen". 532 */ 533#ifdef KDB 534 /* XXX %dr6 is not quite reentrant. */ 535 dr6 = rdr6(); 536 load_dr6(dr6 & ~0x4000); 537 if (kdb_trap(type, dr6, frame)) 538 goto out; 539#endif 540 break; 541 542#ifdef DEV_ISA 543 case T_NMI: 544 nmi_handle_intr(type, frame); 545 goto out; 546#endif /* DEV_ISA */ 547 } 548 549 trap_fatal(frame, 0); 550 goto out; 551 } 552 553 /* Translate fault for emulators (e.g. Linux) */ 554 if (*p->p_sysent->sv_transtrap) 555 i = (*p->p_sysent->sv_transtrap)(i, type); 556 557 ksiginfo_init_trap(&ksi); 558 ksi.ksi_signo = i; 559 ksi.ksi_code = ucode; 560 ksi.ksi_trapno = type; 561 ksi.ksi_addr = (void *)addr; 562 if (uprintf_signal) { 563 uprintf("pid %d comm %s: signal %d err %lx code %d type %d " 564 "addr 0x%lx rsp 0x%lx rip 0x%lx " 565 "<%02x %02x %02x %02x %02x %02x %02x %02x>\n", 566 p->p_pid, p->p_comm, i, frame->tf_err, ucode, type, addr, 567 frame->tf_rsp, frame->tf_rip, 568 fubyte((void *)(frame->tf_rip + 0)), 569 fubyte((void *)(frame->tf_rip + 1)), 570 fubyte((void *)(frame->tf_rip + 2)), 571 fubyte((void *)(frame->tf_rip + 3)), 572 fubyte((void *)(frame->tf_rip + 4)), 573 fubyte((void *)(frame->tf_rip + 5)), 574 fubyte((void *)(frame->tf_rip + 6)), 575 fubyte((void *)(frame->tf_rip + 7))); 576 } 577 KASSERT((read_rflags() & PSL_I) != 0, ("interrupts disabled")); 578 trapsignal(td, &ksi); 579 580user: 581 userret(td, frame); 582 KASSERT(PCB_USER_FPU(td->td_pcb), 583 ("Return from trap with kernel FPU ctx leaked")); 584userout: 585out: 586 return; 587} 588 589/* 590 * Ensure that we ignore any DTrace-induced faults. This function cannot 591 * be instrumented, so it cannot generate such faults itself. 592 */ 593void 594trap_check(struct trapframe *frame) 595{ 596 597#ifdef KDTRACE_HOOKS 598 if (dtrace_trap_func != NULL && 599 (*dtrace_trap_func)(frame, frame->tf_trapno) != 0) 600 return; 601#endif 602 trap(frame); 603} 604 605static int 606trap_pfault(frame, usermode) 607 struct trapframe *frame; 608 int usermode; 609{ 610 vm_offset_t va; 611 vm_map_t map; 612 int rv = 0; 613 vm_prot_t ftype; 614 struct thread *td = curthread; 615 struct proc *p = td->td_proc; 616 vm_offset_t eva = frame->tf_addr; 617 618 if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) { 619 /* 620 * Due to both processor errata and lazy TLB invalidation when 621 * access restrictions are removed from virtual pages, memory 622 * accesses that are allowed by the physical mapping layer may 623 * nonetheless cause one spurious page fault per virtual page. 624 * When the thread is executing a "no faulting" section that 625 * is bracketed by vm_fault_{disable,enable}_pagefaults(), 626 * every page fault is treated as a spurious page fault, 627 * unless it accesses the same virtual address as the most 628 * recent page fault within the same "no faulting" section. 629 */ 630 if (td->td_md.md_spurflt_addr != eva || 631 (td->td_pflags & TDP_RESETSPUR) != 0) { 632 /* 633 * Do nothing to the TLB. A stale TLB entry is 634 * flushed automatically by a page fault. 635 */ 636 td->td_md.md_spurflt_addr = eva; 637 td->td_pflags &= ~TDP_RESETSPUR; 638 return (0); 639 } 640 } else { 641 /* 642 * If we get a page fault while in a critical section, then 643 * it is most likely a fatal kernel page fault. The kernel 644 * is already going to panic trying to get a sleep lock to 645 * do the VM lookup, so just consider it a fatal trap so the 646 * kernel can print out a useful trap message and even get 647 * to the debugger. 648 * 649 * If we get a page fault while holding a non-sleepable 650 * lock, then it is most likely a fatal kernel page fault. 651 * If WITNESS is enabled, then it's going to whine about 652 * bogus LORs with various VM locks, so just skip to the 653 * fatal trap handling directly. 654 */ 655 if (td->td_critnest != 0 || 656 WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, 657 "Kernel page fault") != 0) { 658 trap_fatal(frame, eva); 659 return (-1); 660 } 661 } 662 va = trunc_page(eva); 663 if (va >= VM_MIN_KERNEL_ADDRESS) { 664 /* 665 * Don't allow user-mode faults in kernel address space. 666 */ 667 if (usermode) 668 goto nogo; 669 670 map = kernel_map; 671 } else { 672 map = &p->p_vmspace->vm_map; 673 674 /* 675 * When accessing a usermode address, kernel must be 676 * ready to accept the page fault, and provide a 677 * handling routine. Since accessing the address 678 * without the handler is a bug, do not try to handle 679 * it normally, and panic immediately. 680 */ 681 if (!usermode && (td->td_intr_nesting_level != 0 || 682 curpcb->pcb_onfault == NULL)) { 683 trap_fatal(frame, eva); 684 return (-1); 685 } 686 } 687 688 /* 689 * If the trap was caused by errant bits in the PTE then panic. 690 */ 691 if (frame->tf_err & PGEX_RSV) { 692 trap_fatal(frame, eva); 693 return (-1); 694 } 695 696 /* 697 * PGEX_I is defined only if the execute disable bit capability is 698 * supported and enabled. 699 */ 700 if (frame->tf_err & PGEX_W) 701 ftype = VM_PROT_WRITE; 702 else if ((frame->tf_err & PGEX_I) && pg_nx != 0) 703 ftype = VM_PROT_EXECUTE; 704 else 705 ftype = VM_PROT_READ; 706 707 /* Fault in the page. */ 708 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); 709 if (rv == KERN_SUCCESS) { 710#ifdef HWPMC_HOOKS 711 if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { 712 PMC_SOFT_CALL_TF( , , page_fault, all, frame); 713 if (ftype == VM_PROT_READ) 714 PMC_SOFT_CALL_TF( , , page_fault, read, 715 frame); 716 else 717 PMC_SOFT_CALL_TF( , , page_fault, write, 718 frame); 719 } 720#endif 721 return (0); 722 } 723nogo: 724 if (!usermode) { 725 if (td->td_intr_nesting_level == 0 && 726 curpcb->pcb_onfault != NULL) { 727 frame->tf_rip = (long)curpcb->pcb_onfault; 728 return (0); 729 } 730 trap_fatal(frame, eva); 731 return (-1); 732 } 733 return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); 734} 735 736static void 737trap_fatal(frame, eva) 738 struct trapframe *frame; 739 vm_offset_t eva; 740{ 741 int code, ss; 742 u_int type; 743 struct soft_segment_descriptor softseg; 744 char *msg; 745 746 code = frame->tf_err; 747 type = frame->tf_trapno; 748 sdtossd(&gdt[NGDT * PCPU_GET(cpuid) + IDXSEL(frame->tf_cs & 0xffff)], 749 &softseg); 750 751 if (type <= MAX_TRAP_MSG) 752 msg = trap_msg[type]; 753 else 754 msg = "UNKNOWN"; 755 printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg, 756 TRAPF_USERMODE(frame) ? "user" : "kernel"); 757#ifdef SMP 758 /* two separate prints in case of a trap on an unmapped page */ 759 printf("cpuid = %d; ", PCPU_GET(cpuid)); 760 printf("apic id = %02x\n", PCPU_GET(apic_id)); 761#endif 762 if (type == T_PAGEFLT) { 763 printf("fault virtual address = 0x%lx\n", eva); 764 printf("fault code = %s %s %s, %s\n", 765 code & PGEX_U ? "user" : "supervisor", 766 code & PGEX_W ? "write" : "read", 767 code & PGEX_I ? "instruction" : "data", 768 code & PGEX_RSV ? "reserved bits in PTE" : 769 code & PGEX_P ? "protection violation" : "page not present"); 770 } 771 printf("instruction pointer = 0x%lx:0x%lx\n", 772 frame->tf_cs & 0xffff, frame->tf_rip); 773 ss = frame->tf_ss & 0xffff; 774 printf("stack pointer = 0x%x:0x%lx\n", ss, frame->tf_rsp); 775 printf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp); 776 printf("code segment = base 0x%lx, limit 0x%lx, type 0x%x\n", 777 softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); 778 printf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n", 779 softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32, 780 softseg.ssd_gran); 781 printf("processor eflags = "); 782 if (frame->tf_rflags & PSL_T) 783 printf("trace trap, "); 784 if (frame->tf_rflags & PSL_I) 785 printf("interrupt enabled, "); 786 if (frame->tf_rflags & PSL_NT) 787 printf("nested task, "); 788 if (frame->tf_rflags & PSL_RF) 789 printf("resume, "); 790 printf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12); 791 printf("current process = %d (%s)\n", 792 curproc->p_pid, curthread->td_name); 793 794#ifdef KDB 795 if (debugger_on_panic || kdb_active) 796 if (kdb_trap(type, 0, frame)) 797 return; 798#endif 799 printf("trap number = %d\n", type); 800 if (type <= MAX_TRAP_MSG) 801 panic("%s", trap_msg[type]); 802 else 803 panic("unknown/reserved trap"); 804} 805 806/* 807 * Double fault handler. Called when a fault occurs while writing 808 * a frame for a trap/exception onto the stack. This usually occurs 809 * when the stack overflows (such is the case with infinite recursion, 810 * for example). 811 */ 812void 813dblfault_handler(struct trapframe *frame) 814{ 815#ifdef KDTRACE_HOOKS 816 if (dtrace_doubletrap_func != NULL) 817 (*dtrace_doubletrap_func)(); 818#endif 819 printf("\nFatal double fault\n"); 820 printf("rip = 0x%lx\n", frame->tf_rip); 821 printf("rsp = 0x%lx\n", frame->tf_rsp); 822 printf("rbp = 0x%lx\n", frame->tf_rbp); 823#ifdef SMP 824 /* two separate prints in case of a trap on an unmapped page */ 825 printf("cpuid = %d; ", PCPU_GET(cpuid)); 826 printf("apic id = %02x\n", PCPU_GET(apic_id)); 827#endif 828 panic("double fault"); 829} 830 831int 832cpu_fetch_syscall_args(struct thread *td) 833{ 834 struct proc *p; 835 struct trapframe *frame; 836 register_t *argp; 837 struct syscall_args *sa; 838 caddr_t params; 839 int reg, regcnt, error; 840 841 p = td->td_proc; 842 frame = td->td_frame; 843 sa = &td->td_sa; 844 reg = 0; 845 regcnt = 6; 846 847 params = (caddr_t)frame->tf_rsp + sizeof(register_t); 848 sa->code = frame->tf_rax; 849 850 if (sa->code == SYS_syscall || sa->code == SYS___syscall) { 851 sa->code = frame->tf_rdi; 852 reg++; 853 regcnt--; 854 } 855 if (p->p_sysent->sv_mask) 856 sa->code &= p->p_sysent->sv_mask; 857 858 if (sa->code >= p->p_sysent->sv_size) 859 sa->callp = &p->p_sysent->sv_table[0]; 860 else 861 sa->callp = &p->p_sysent->sv_table[sa->code]; 862 863 sa->narg = sa->callp->sy_narg; 864 KASSERT(sa->narg <= sizeof(sa->args) / sizeof(sa->args[0]), 865 ("Too many syscall arguments!")); 866 error = 0; 867 argp = &frame->tf_rdi; 868 argp += reg; 869 bcopy(argp, sa->args, sizeof(sa->args[0]) * regcnt); 870 if (sa->narg > regcnt) { 871 KASSERT(params != NULL, ("copyin args with no params!")); 872 error = copyin(params, &sa->args[regcnt], 873 (sa->narg - regcnt) * sizeof(sa->args[0])); 874 } 875 876 if (error == 0) { 877 td->td_retval[0] = 0; 878 td->td_retval[1] = frame->tf_rdx; 879 } 880 881 return (error); 882} 883 884#include "../../kern/subr_syscall.c" 885 886/* 887 * System call handler for native binaries. The trap frame is already 888 * set up by the assembler trampoline and a pointer to it is saved in 889 * td_frame. 890 */ 891void 892amd64_syscall(struct thread *td, int traced) 893{ 894 int error; 895 ksiginfo_t ksi; 896 897#ifdef DIAGNOSTIC 898 if (!TRAPF_USERMODE(td->td_frame)) { 899 panic("syscall"); 900 /* NOT REACHED */ 901 } 902#endif 903 error = syscallenter(td); 904 905 /* 906 * Traced syscall. 907 */ 908 if (__predict_false(traced)) { 909 td->td_frame->tf_rflags &= ~PSL_T; 910 ksiginfo_init_trap(&ksi); 911 ksi.ksi_signo = SIGTRAP; 912 ksi.ksi_code = TRAP_TRACE; 913 ksi.ksi_addr = (void *)td->td_frame->tf_rip; 914 trapsignal(td, &ksi); 915 } 916 917 KASSERT(PCB_USER_FPU(td->td_pcb), 918 ("System call %s returning with kernel FPU ctx leaked", 919 syscallname(td->td_proc, td->td_sa.code))); 920 KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td), 921 ("System call %s returning with mangled pcb_save", 922 syscallname(td->td_proc, td->td_sa.code))); 923 KASSERT(td->td_md.md_invl_gen.gen == 0, 924 ("System call %s returning with leaked invl_gen %lu", 925 syscallname(td->td_proc, td->td_sa.code), 926 td->td_md.md_invl_gen.gen)); 927 928 syscallret(td, error); 929 930 /* 931 * If the user-supplied value of %rip is not a canonical 932 * address, then some CPUs will trigger a ring 0 #GP during 933 * the sysret instruction. However, the fault handler would 934 * execute in ring 0 with the user's %gs and %rsp which would 935 * not be safe. Instead, use the full return path which 936 * catches the problem safely. 937 */ 938 if (td->td_frame->tf_rip >= VM_MAXUSER_ADDRESS) 939 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 940} 941