1/*- 2 * Copyright (c) 1986, 1988, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD$"); 39 40#include "opt_ddb.h" 41#include "opt_kdb.h" 42#include "opt_panic.h" 43#include "opt_sched.h" 44#include "opt_watchdog.h" 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/bio.h> 49#include <sys/buf.h> 50#include <sys/conf.h> 51#include <sys/cons.h> 52#include <sys/eventhandler.h> 53#include <sys/filedesc.h> 54#include <sys/jail.h> 55#include <sys/kdb.h> 56#include <sys/kernel.h> 57#include <sys/kerneldump.h> 58#include <sys/kthread.h> 59#include <sys/ktr.h> 60#include <sys/malloc.h> 61#include <sys/mount.h> 62#include <sys/priv.h> 63#include <sys/proc.h> 64#include <sys/reboot.h> 65#include <sys/resourcevar.h> 66#include <sys/rwlock.h> 67#include <sys/sched.h> 68#include <sys/smp.h> 69#include <sys/sysctl.h> 70#include <sys/sysproto.h> 71#include <sys/vnode.h> 72#include <sys/watchdog.h> 73 74#include <ddb/ddb.h> 75 76#include <machine/cpu.h> 77#include <machine/pcb.h> 78#include <machine/smp.h> 79 80#include <security/mac/mac_framework.h> 81 82#include <vm/vm.h> 83#include <vm/vm_object.h> 84#include <vm/vm_page.h> 85#include <vm/vm_pager.h> 86#include <vm/swap_pager.h> 87 88#include <sys/signalvar.h> 89 90#ifndef PANIC_REBOOT_WAIT_TIME 91#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ 92#endif 93static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME; 94SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RW | CTLFLAG_TUN, 95 &panic_reboot_wait_time, 0, 96 "Seconds to wait before rebooting after a panic"); 97TUNABLE_INT("kern.panic_reboot_wait_time", &panic_reboot_wait_time); 98 99/* 100 * Note that stdarg.h and the ANSI style va_start macro is used for both 101 * ANSI and traditional C compilers. 102 */ 103#include <machine/stdarg.h> 104 105#ifdef KDB 106#ifdef KDB_UNATTENDED 107int debugger_on_panic = 0; 108#else 109int debugger_on_panic = 1; 110#endif 111SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, 112 CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_TUN, 113 &debugger_on_panic, 0, "Run debugger on kernel panic"); 114TUNABLE_INT("debug.debugger_on_panic", &debugger_on_panic); 115 116#ifdef KDB_TRACE 117static int trace_on_panic = 1; 118#else 119static int trace_on_panic = 0; 120#endif 121SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, 122 CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_TUN, 123 &trace_on_panic, 0, "Print stack trace on kernel panic"); 124TUNABLE_INT("debug.trace_on_panic", &trace_on_panic); 125#endif /* KDB */ 126 127static int sync_on_panic = 0; 128SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RW | CTLFLAG_TUN, 129 &sync_on_panic, 0, "Do a sync before rebooting from a panic"); 130TUNABLE_INT("kern.sync_on_panic", &sync_on_panic); 131 132static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, 133 "Shutdown environment"); 134 135#ifndef DIAGNOSTIC 136static int show_busybufs; 137#else 138static int show_busybufs = 1; 139#endif 140SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW, 141 &show_busybufs, 0, ""); 142 143int suspend_blocked = 0; 144SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW, 145 &suspend_blocked, 0, "Block suspend due to a pending shutdown"); 146 147/* 148 * Variable panicstr contains argument to first call to panic; used as flag 149 * to indicate that the kernel has already called panic. 150 */ 151const char *panicstr; 152 153int dumping; /* system is dumping */ 154int rebooting; /* system is rebooting */ 155static struct dumperinfo dumper; /* our selected dumper */ 156 157/* Context information for dump-debuggers. */ 158static struct pcb dumppcb; /* Registers. */ 159lwpid_t dumptid; /* Thread ID. */ 160 161static struct cdevsw reroot_cdevsw = { 162 .d_version = D_VERSION, 163 .d_name = "reroot", 164}; 165 166static void poweroff_wait(void *, int); 167static void shutdown_halt(void *junk, int howto); 168static void shutdown_panic(void *junk, int howto); 169static void shutdown_reset(void *junk, int howto); 170static int kern_reroot(void); 171 172/* register various local shutdown events */ 173static void 174shutdown_conf(void *unused) 175{ 176 177 EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, 178 SHUTDOWN_PRI_FIRST); 179 EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, 180 SHUTDOWN_PRI_LAST + 100); 181 EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, 182 SHUTDOWN_PRI_LAST + 100); 183 EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, 184 SHUTDOWN_PRI_LAST + 200); 185} 186 187SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL); 188 189/* 190 * The only reason this exists is to create the /dev/reroot/ directory, 191 * used by reroot code in init(8) as a mountpoint for tmpfs. 192 */ 193static void 194reroot_conf(void *unused) 195{ 196 int error; 197 struct cdev *cdev; 198 199 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev, 200 &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot"); 201 if (error != 0) { 202 printf("%s: failed to create device node, error %d", 203 __func__, error); 204 } 205} 206 207SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL); 208 209/* 210 * The system call that results in a reboot. 211 */ 212/* ARGSUSED */ 213int 214sys_reboot(struct thread *td, struct reboot_args *uap) 215{ 216 int error; 217 218 error = 0; 219#ifdef MAC 220 error = mac_system_check_reboot(td->td_ucred, uap->opt); 221#endif 222 if (error == 0) 223 error = priv_check(td, PRIV_REBOOT); 224 if (error == 0) { 225 if (uap->opt & RB_REROOT) { 226 error = kern_reroot(); 227 } else { 228 mtx_lock(&Giant); 229 kern_reboot(uap->opt); 230 mtx_unlock(&Giant); 231 } 232 } 233 return (error); 234} 235 236/* 237 * Called by events that want to shut down.. e.g <CTL><ALT><DEL> on a PC 238 */ 239static int shutdown_howto = 0; 240 241void 242shutdown_nice(int howto) 243{ 244 245 shutdown_howto = howto; 246 247 /* Send a signal to init(8) and have it shutdown the world */ 248 if (initproc != NULL) { 249 PROC_LOCK(initproc); 250 kern_psignal(initproc, SIGINT); 251 PROC_UNLOCK(initproc); 252 } else { 253 /* No init(8) running, so simply reboot */ 254 kern_reboot(RB_NOSYNC); 255 } 256 return; 257} 258static int waittime = -1; 259 260static void 261print_uptime(void) 262{ 263 int f; 264 struct timespec ts; 265 266 getnanouptime(&ts); 267 printf("Uptime: "); 268 f = 0; 269 if (ts.tv_sec >= 86400) { 270 printf("%ldd", (long)ts.tv_sec / 86400); 271 ts.tv_sec %= 86400; 272 f = 1; 273 } 274 if (f || ts.tv_sec >= 3600) { 275 printf("%ldh", (long)ts.tv_sec / 3600); 276 ts.tv_sec %= 3600; 277 f = 1; 278 } 279 if (f || ts.tv_sec >= 60) { 280 printf("%ldm", (long)ts.tv_sec / 60); 281 ts.tv_sec %= 60; 282 f = 1; 283 } 284 printf("%lds\n", (long)ts.tv_sec); 285} 286 287int 288doadump(boolean_t textdump) 289{ 290 boolean_t coredump; 291 292 if (dumping) 293 return (EBUSY); 294 if (dumper.dumper == NULL) 295 return (ENXIO); 296 297 savectx(&dumppcb); 298 dumptid = curthread->td_tid; 299 dumping++; 300 301 coredump = TRUE; 302#ifdef DDB 303 if (textdump && textdump_pending) { 304 coredump = FALSE; 305 textdump_dumpsys(&dumper); 306 } 307#endif 308 if (coredump) 309 dumpsys(&dumper); 310 311 dumping--; 312 return (0); 313} 314 315static int 316isbufbusy(struct buf *bp) 317{ 318 if (((bp->b_flags & (B_INVAL | B_PERSISTENT)) == 0 && 319 BUF_ISLOCKED(bp)) || 320 ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI)) 321 return (1); 322 return (0); 323} 324 325/* 326 * Shutdown the system cleanly to prepare for reboot, halt, or power off. 327 */ 328void 329kern_reboot(int howto) 330{ 331 static int first_buf_printf = 1; 332 333#if defined(SMP) 334 /* 335 * Bind us to CPU 0 so that all shutdown code runs there. Some 336 * systems don't shutdown properly (i.e., ACPI power off) if we 337 * run on another processor. 338 */ 339 if (!SCHEDULER_STOPPED()) { 340 thread_lock(curthread); 341 sched_bind(curthread, 0); 342 thread_unlock(curthread); 343 KASSERT(PCPU_GET(cpuid) == 0, ("boot: not running on cpu 0")); 344 } 345#endif 346 /* We're in the process of rebooting. */ 347 rebooting = 1; 348 349 /* collect extra flags that shutdown_nice might have set */ 350 howto |= shutdown_howto; 351 352 /* We are out of the debugger now. */ 353 kdb_active = 0; 354 355 /* 356 * Do any callouts that should be done BEFORE syncing the filesystems. 357 */ 358 EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); 359 360 /* 361 * Now sync filesystems 362 */ 363 if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) { 364 register struct buf *bp; 365 int iter, nbusy, pbusy; 366#ifndef PREEMPTION 367 int subiter; 368#endif 369 370 waittime = 0; 371 372 wdog_kern_pat(WD_LASTVAL); 373 sys_sync(curthread, NULL); 374 375 /* 376 * With soft updates, some buffers that are 377 * written will be remarked as dirty until other 378 * buffers are written. 379 */ 380 for (iter = pbusy = 0; iter < 20; iter++) { 381 nbusy = 0; 382 for (bp = &buf[nbuf]; --bp >= buf; ) 383 if (isbufbusy(bp)) 384 nbusy++; 385 if (nbusy == 0) { 386 if (first_buf_printf) 387 printf("All buffers synced."); 388 break; 389 } 390 if (first_buf_printf) { 391 printf("Syncing disks, buffers remaining... "); 392 first_buf_printf = 0; 393 } 394 printf("%d ", nbusy); 395 if (nbusy < pbusy) 396 iter = 0; 397 pbusy = nbusy; 398 399 wdog_kern_pat(WD_LASTVAL); 400 sys_sync(curthread, NULL); 401 402#ifdef PREEMPTION 403 /* 404 * Drop Giant and spin for a while to allow 405 * interrupt threads to run. 406 */ 407 DROP_GIANT(); 408 DELAY(50000 * iter); 409 PICKUP_GIANT(); 410#else 411 /* 412 * Drop Giant and context switch several times to 413 * allow interrupt threads to run. 414 */ 415 DROP_GIANT(); 416 for (subiter = 0; subiter < 50 * iter; subiter++) { 417 thread_lock(curthread); 418 mi_switch(SW_VOL, NULL); 419 thread_unlock(curthread); 420 DELAY(1000); 421 } 422 PICKUP_GIANT(); 423#endif 424 } 425 printf("\n"); 426 /* 427 * Count only busy local buffers to prevent forcing 428 * a fsck if we're just a client of a wedged NFS server 429 */ 430 nbusy = 0; 431 for (bp = &buf[nbuf]; --bp >= buf; ) { 432 if (isbufbusy(bp)) { 433#if 0 434/* XXX: This is bogus. We should probably have a BO_REMOTE flag instead */ 435 if (bp->b_dev == NULL) { 436 TAILQ_REMOVE(&mountlist, 437 bp->b_vp->v_mount, mnt_list); 438 continue; 439 } 440#endif 441 nbusy++; 442 if (show_busybufs > 0) { 443 printf( 444 "%d: buf:%p, vnode:%p, flags:%0x, blkno:%jd, lblkno:%jd, buflock:", 445 nbusy, bp, bp->b_vp, bp->b_flags, 446 (intmax_t)bp->b_blkno, 447 (intmax_t)bp->b_lblkno); 448 BUF_LOCKPRINTINFO(bp); 449 if (show_busybufs > 1) 450 vn_printf(bp->b_vp, 451 "vnode content: "); 452 } 453 } 454 } 455 if (nbusy) { 456 /* 457 * Failed to sync all blocks. Indicate this and don't 458 * unmount filesystems (thus forcing an fsck on reboot). 459 */ 460 printf("Giving up on %d buffers\n", nbusy); 461 DELAY(5000000); /* 5 seconds */ 462 } else { 463 if (!first_buf_printf) 464 printf("Final sync complete\n"); 465 /* 466 * Unmount filesystems 467 */ 468 if (panicstr == 0) 469 vfs_unmountall(); 470 } 471 swapoff_all(); 472 DELAY(100000); /* wait for console output to finish */ 473 } 474 475 print_uptime(); 476 477 cngrab(); 478 479 /* 480 * Ok, now do things that assume all filesystem activity has 481 * been completed. 482 */ 483 EVENTHANDLER_INVOKE(shutdown_post_sync, howto); 484 485 if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) 486 doadump(TRUE); 487 488 /* Now that we're going to really halt the system... */ 489 EVENTHANDLER_INVOKE(shutdown_final, howto); 490 491 for(;;) ; /* safety against shutdown_reset not working */ 492 /* NOTREACHED */ 493} 494 495/* 496 * The system call that results in changing the rootfs. 497 */ 498static int 499kern_reroot(void) 500{ 501 struct vnode *oldrootvnode, *vp; 502 struct mount *mp, *devmp; 503 int error; 504 505 if (curproc != initproc) 506 return (EPERM); 507 508 /* 509 * Mark the filesystem containing currently-running executable 510 * (the temporary copy of init(8)) busy. 511 */ 512 vp = curproc->p_textvp; 513 error = vn_lock(vp, LK_SHARED); 514 if (error != 0) 515 return (error); 516 mp = vp->v_mount; 517 error = vfs_busy(mp, MBF_NOWAIT); 518 if (error != 0) { 519 vfs_ref(mp); 520 VOP_UNLOCK(vp, 0); 521 error = vfs_busy(mp, 0); 522 vn_lock(vp, LK_SHARED | LK_RETRY); 523 vfs_rel(mp); 524 if (error != 0) { 525 VOP_UNLOCK(vp, 0); 526 return (ENOENT); 527 } 528 if (vp->v_iflag & VI_DOOMED) { 529 VOP_UNLOCK(vp, 0); 530 vfs_unbusy(mp); 531 return (ENOENT); 532 } 533 } 534 VOP_UNLOCK(vp, 0); 535 536 /* 537 * Remove the filesystem containing currently-running executable 538 * from the mount list, to prevent it from being unmounted 539 * by vfs_unmountall(), and to avoid confusing vfs_mountroot(). 540 * 541 * Also preserve /dev - forcibly unmounting it could cause driver 542 * reinitialization. 543 */ 544 545 vfs_ref(rootdevmp); 546 devmp = rootdevmp; 547 rootdevmp = NULL; 548 549 mtx_lock(&mountlist_mtx); 550 TAILQ_REMOVE(&mountlist, mp, mnt_list); 551 TAILQ_REMOVE(&mountlist, devmp, mnt_list); 552 mtx_unlock(&mountlist_mtx); 553 554 oldrootvnode = rootvnode; 555 556 /* 557 * Unmount everything except for the two filesystems preserved above. 558 */ 559 vfs_unmountall(); 560 561 /* 562 * Add /dev back; vfs_mountroot() will move it into its new place. 563 */ 564 mtx_lock(&mountlist_mtx); 565 TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list); 566 mtx_unlock(&mountlist_mtx); 567 rootdevmp = devmp; 568 vfs_rel(rootdevmp); 569 570 /* 571 * Mount the new rootfs. 572 */ 573 vfs_mountroot(); 574 575 /* 576 * Update all references to the old rootvnode. 577 */ 578 mountcheckdirs(oldrootvnode, rootvnode); 579 580 /* 581 * Add the temporary filesystem back and unbusy it. 582 */ 583 mtx_lock(&mountlist_mtx); 584 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 585 mtx_unlock(&mountlist_mtx); 586 vfs_unbusy(mp); 587 588 return (0); 589} 590 591/* 592 * If the shutdown was a clean halt, behave accordingly. 593 */ 594static void 595shutdown_halt(void *junk, int howto) 596{ 597 598 if (howto & RB_HALT) { 599 printf("\n"); 600 printf("The operating system has halted.\n"); 601 printf("Please press any key to reboot.\n\n"); 602 switch (cngetc()) { 603 case -1: /* No console, just die */ 604 cpu_halt(); 605 /* NOTREACHED */ 606 default: 607 howto &= ~RB_HALT; 608 break; 609 } 610 } 611} 612 613/* 614 * Check to see if the system paniced, pause and then reboot 615 * according to the specified delay. 616 */ 617static void 618shutdown_panic(void *junk, int howto) 619{ 620 int loop; 621 622 if (howto & RB_DUMP) { 623 if (panic_reboot_wait_time != 0) { 624 if (panic_reboot_wait_time != -1) { 625 printf("Automatic reboot in %d seconds - " 626 "press a key on the console to abort\n", 627 panic_reboot_wait_time); 628 for (loop = panic_reboot_wait_time * 10; 629 loop > 0; --loop) { 630 DELAY(1000 * 100); /* 1/10th second */ 631 /* Did user type a key? */ 632 if (cncheckc() != -1) 633 break; 634 } 635 if (!loop) 636 return; 637 } 638 } else { /* zero time specified - reboot NOW */ 639 return; 640 } 641 printf("--> Press a key on the console to reboot,\n"); 642 printf("--> or switch off the system now.\n"); 643 cngetc(); 644 } 645} 646 647/* 648 * Everything done, now reset 649 */ 650static void 651shutdown_reset(void *junk, int howto) 652{ 653 654 printf("Rebooting...\n"); 655 DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ 656 657 /* 658 * Acquiring smp_ipi_mtx here has a double effect: 659 * - it disables interrupts avoiding CPU0 preemption 660 * by fast handlers (thus deadlocking against other CPUs) 661 * - it avoids deadlocks against smp_rendezvous() or, more 662 * generally, threads busy-waiting, with this spinlock held, 663 * and waiting for responses by threads on other CPUs 664 * (ie. smp_tlb_shootdown()). 665 * 666 * For the !SMP case it just needs to handle the former problem. 667 */ 668#ifdef SMP 669 mtx_lock_spin(&smp_ipi_mtx); 670#else 671 spinlock_enter(); 672#endif 673 674 /* cpu_boot(howto); */ /* doesn't do anything at the moment */ 675 cpu_reset(); 676 /* NOTREACHED */ /* assuming reset worked */ 677} 678 679#if defined(WITNESS) || defined(INVARIANTS) 680static int kassert_warn_only = 0; 681#ifdef KDB 682static int kassert_do_kdb = 0; 683#endif 684#ifdef KTR 685static int kassert_do_ktr = 0; 686#endif 687static int kassert_do_log = 1; 688static int kassert_log_pps_limit = 4; 689static int kassert_log_mute_at = 0; 690static int kassert_log_panic_at = 0; 691static int kassert_warnings = 0; 692 693SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW, NULL, "kassert options"); 694 695SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, CTLFLAG_RW | CTLFLAG_TUN, 696 &kassert_warn_only, 0, 697 "KASSERT triggers a panic (1) or just a warning (0)"); 698TUNABLE_INT("debug.kassert.warn_only", &kassert_warn_only); 699 700#ifdef KDB 701SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, CTLFLAG_RW | CTLFLAG_TUN, 702 &kassert_do_kdb, 0, "KASSERT will enter the debugger"); 703TUNABLE_INT("debug.kassert.do_kdb", &kassert_do_kdb); 704#endif 705 706#ifdef KTR 707SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, CTLFLAG_RW | CTLFLAG_TUN, 708 &kassert_do_ktr, 0, 709 "KASSERT does a KTR, set this to the KTRMASK you want"); 710TUNABLE_INT("debug.kassert.do_ktr", &kassert_do_ktr); 711#endif 712 713SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, CTLFLAG_RW | CTLFLAG_TUN, 714 &kassert_do_log, 0, "KASSERT triggers a panic (1) or just a warning (0)"); 715TUNABLE_INT("debug.kassert.do_log", &kassert_do_log); 716 717SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RW | CTLFLAG_TUN, 718 &kassert_warnings, 0, "number of KASSERTs that have been triggered"); 719TUNABLE_INT("debug.kassert.warnings", &kassert_warnings); 720 721SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, CTLFLAG_RW | CTLFLAG_TUN, 722 &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic"); 723TUNABLE_INT("debug.kassert.log_panic_at", &kassert_log_panic_at); 724 725SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, CTLFLAG_RW | CTLFLAG_TUN, 726 &kassert_log_pps_limit, 0, "limit number of log messages per second"); 727TUNABLE_INT("debug.kassert.log_pps_limit", &kassert_log_pps_limit); 728 729SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, CTLFLAG_RW | CTLFLAG_TUN, 730 &kassert_log_mute_at, 0, "max number of KASSERTS to log"); 731TUNABLE_INT("debug.kassert.log_mute_at", &kassert_log_mute_at); 732 733static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS); 734 735SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert, 736 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0, 737 kassert_sysctl_kassert, "I", "set to trigger a test kassert"); 738 739static int 740kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS) 741{ 742 int error, i; 743 744 error = sysctl_wire_old_buffer(req, sizeof(int)); 745 if (error == 0) { 746 i = 0; 747 error = sysctl_handle_int(oidp, &i, 0, req); 748 } 749 if (error != 0 || req->newptr == NULL) 750 return (error); 751 KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i)); 752 return (0); 753} 754 755/* 756 * Called by KASSERT, this decides if we will panic 757 * or if we will log via printf and/or ktr. 758 */ 759void 760kassert_panic(const char *fmt, ...) 761{ 762 static char buf[256]; 763 va_list ap; 764 765 va_start(ap, fmt); 766 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 767 va_end(ap); 768 769 /* 770 * panic if we're not just warning, or if we've exceeded 771 * kassert_log_panic_at warnings. 772 */ 773 if (!kassert_warn_only || 774 (kassert_log_panic_at > 0 && 775 kassert_warnings >= kassert_log_panic_at)) { 776 va_start(ap, fmt); 777 vpanic(fmt, ap); 778 /* NORETURN */ 779 } 780#ifdef KTR 781 if (kassert_do_ktr) 782 CTR0(ktr_mask, buf); 783#endif /* KTR */ 784 /* 785 * log if we've not yet met the mute limit. 786 */ 787 if (kassert_do_log && 788 (kassert_log_mute_at == 0 || 789 kassert_warnings < kassert_log_mute_at)) { 790 static struct timeval lasterr; 791 static int curerr; 792 793 if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) { 794 printf("KASSERT failed: %s\n", buf); 795 kdb_backtrace(); 796 } 797 } 798#ifdef KDB 799 if (kassert_do_kdb) { 800 kdb_enter(KDB_WHY_KASSERT, buf); 801 } 802#endif 803 atomic_add_int(&kassert_warnings, 1); 804} 805#endif 806 807/* 808 * Panic is called on unresolvable fatal errors. It prints "panic: mesg", 809 * and then reboots. If we are called twice, then we avoid trying to sync 810 * the disks as this often leads to recursive panics. 811 */ 812void 813panic(const char *fmt, ...) 814{ 815 va_list ap; 816 817 va_start(ap, fmt); 818 vpanic(fmt, ap); 819} 820 821void 822vpanic(const char *fmt, va_list ap) 823{ 824#ifdef SMP 825 cpuset_t other_cpus; 826#endif 827 struct thread *td = curthread; 828 int bootopt, newpanic; 829 static char buf[256]; 830 831 spinlock_enter(); 832 833#ifdef SMP 834 /* 835 * stop_cpus_hard(other_cpus) should prevent multiple CPUs from 836 * concurrently entering panic. Only the winner will proceed 837 * further. 838 */ 839 if (panicstr == NULL && !kdb_active) { 840 other_cpus = all_cpus; 841 CPU_CLR(PCPU_GET(cpuid), &other_cpus); 842 stop_cpus_hard(other_cpus); 843 } 844 845 /* 846 * We set stop_scheduler here and not in the block above, 847 * because we want to ensure that if panic has been called and 848 * stop_scheduler_on_panic is true, then stop_scheduler will 849 * always be set. Even if panic has been entered from kdb. 850 */ 851 td->td_stopsched = 1; 852#endif 853 854 bootopt = RB_AUTOBOOT; 855 newpanic = 0; 856 if (panicstr) 857 bootopt |= RB_NOSYNC; 858 else { 859 bootopt |= RB_DUMP; 860 panicstr = fmt; 861 newpanic = 1; 862 } 863 864 if (newpanic) { 865 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 866 panicstr = buf; 867 cngrab(); 868 printf("panic: %s\n", buf); 869 } else { 870 printf("panic: "); 871 vprintf(fmt, ap); 872 printf("\n"); 873 } 874#ifdef SMP 875 printf("cpuid = %d\n", PCPU_GET(cpuid)); 876#endif 877 878#ifdef KDB 879 if (newpanic && trace_on_panic) 880 kdb_backtrace(); 881 if (debugger_on_panic) 882 kdb_enter(KDB_WHY_PANIC, "panic"); 883#endif 884 /*thread_lock(td); */ 885 td->td_flags |= TDF_INPANIC; 886 /* thread_unlock(td); */ 887 if (!sync_on_panic) 888 bootopt |= RB_NOSYNC; 889 kern_reboot(bootopt); 890} 891 892/* 893 * Support for poweroff delay. 894 * 895 * Please note that setting this delay too short might power off your machine 896 * before the write cache on your hard disk has been flushed, leading to 897 * soft-updates inconsistencies. 898 */ 899#ifndef POWEROFF_DELAY 900# define POWEROFF_DELAY 5000 901#endif 902static int poweroff_delay = POWEROFF_DELAY; 903 904SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, 905 &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)"); 906 907static void 908poweroff_wait(void *junk, int howto) 909{ 910 911 if (!(howto & RB_POWEROFF) || poweroff_delay <= 0) 912 return; 913 DELAY(poweroff_delay * 1000); 914} 915 916/* 917 * Some system processes (e.g. syncer) need to be stopped at appropriate 918 * points in their main loops prior to a system shutdown, so that they 919 * won't interfere with the shutdown process (e.g. by holding a disk buf 920 * to cause sync to fail). For each of these system processes, register 921 * shutdown_kproc() as a handler for one of shutdown events. 922 */ 923static int kproc_shutdown_wait = 60; 924SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW, 925 &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process"); 926 927void 928kproc_shutdown(void *arg, int howto) 929{ 930 struct proc *p; 931 int error; 932 933 if (panicstr) 934 return; 935 936 p = (struct proc *)arg; 937 printf("Waiting (max %d seconds) for system process `%s' to stop...", 938 kproc_shutdown_wait, p->p_comm); 939 error = kproc_suspend(p, kproc_shutdown_wait * hz); 940 941 if (error == EWOULDBLOCK) 942 printf("timed out\n"); 943 else 944 printf("done\n"); 945} 946 947void 948kthread_shutdown(void *arg, int howto) 949{ 950 struct thread *td; 951 int error; 952 953 if (panicstr) 954 return; 955 956 td = (struct thread *)arg; 957 printf("Waiting (max %d seconds) for system thread `%s' to stop...", 958 kproc_shutdown_wait, td->td_name); 959 error = kthread_suspend(td, kproc_shutdown_wait * hz); 960 961 if (error == EWOULDBLOCK) 962 printf("timed out\n"); 963 else 964 printf("done\n"); 965} 966 967static char dumpdevname[sizeof(((struct cdev*)NULL)->si_name)]; 968SYSCTL_STRING(_kern_shutdown, OID_AUTO, dumpdevname, CTLFLAG_RD, 969 dumpdevname, 0, "Device for kernel dumps"); 970 971/* Registration of dumpers */ 972int 973set_dumper(struct dumperinfo *di, const char *devname, struct thread *td) 974{ 975 size_t wantcopy; 976 int error; 977 978 error = priv_check(td, PRIV_SETDUMPER); 979 if (error != 0) 980 return (error); 981 982 if (di == NULL) { 983 bzero(&dumper, sizeof dumper); 984 dumpdevname[0] = '\0'; 985 return (0); 986 } 987 if (dumper.dumper != NULL) 988 return (EBUSY); 989 dumper = *di; 990 wantcopy = strlcpy(dumpdevname, devname, sizeof(dumpdevname)); 991 if (wantcopy >= sizeof(dumpdevname)) { 992 printf("set_dumper: device name truncated from '%s' -> '%s'\n", 993 devname, dumpdevname); 994 } 995 return (0); 996} 997 998/* Call dumper with bounds checking. */ 999int 1000dump_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1001 off_t offset, size_t length) 1002{ 1003 1004 if (length != 0 && (offset < di->mediaoffset || 1005 offset - di->mediaoffset + length > di->mediasize)) { 1006 printf("Attempt to write outside dump device boundaries.\n" 1007 "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n", 1008 (intmax_t)offset, (intmax_t)di->mediaoffset, 1009 (uintmax_t)length, (intmax_t)di->mediasize); 1010 return (ENOSPC); 1011 } 1012 return (di->dumper(di->priv, virtual, physical, offset, length)); 1013} 1014 1015void 1016mkdumpheader(struct kerneldumpheader *kdh, char *magic, uint32_t archver, 1017 uint64_t dumplen, uint32_t blksz) 1018{ 1019 1020 bzero(kdh, sizeof(*kdh)); 1021 strlcpy(kdh->magic, magic, sizeof(kdh->magic)); 1022 strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture)); 1023 kdh->version = htod32(KERNELDUMPVERSION); 1024 kdh->architectureversion = htod32(archver); 1025 kdh->dumplength = htod64(dumplen); 1026 kdh->dumptime = htod64(time_second); 1027 kdh->blocksize = htod32(blksz); 1028 strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname)); 1029 strlcpy(kdh->versionstring, version, sizeof(kdh->versionstring)); 1030 if (panicstr != NULL) 1031 strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring)); 1032 kdh->parity = kerneldump_parity(kdh); 1033} 1034 1035#ifdef DDB 1036DB_SHOW_COMMAND(panic, db_show_panic) 1037{ 1038 1039 if (panicstr == NULL) 1040 db_printf("panicstr not set\n"); 1041 else 1042 db_printf("panic: %s\n", panicstr); 1043} 1044#endif 1045