1/*- 2 * Copyright (c) 2002 Marcel Moolenaar 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#include <sys/param.h> 28#include <sys/systm.h> 29#include <sys/conf.h> 30#include <sys/cons.h> 31#include <sys/kdb.h> 32#include <sys/kernel.h> 33#include <sys/kerneldump.h> 34#include <sys/malloc.h> 35#include <sys/msgbuf.h> 36#include <sys/proc.h> 37#include <sys/watchdog.h> 38 39#include <vm/vm.h> 40#include <vm/vm_param.h> 41#include <vm/vm_page.h> 42#include <vm/vm_phys.h> 43#include <vm/vm_dumpset.h> 44#include <vm/pmap.h> 45 46#include <machine/dump.h> 47#include <machine/elf.h> 48#include <machine/md_var.h> 49#include <machine/pcb.h> 50 51CTASSERT(sizeof(struct kerneldumpheader) == 512); 52 53#define MD_ALIGN(x) roundup2((off_t)(x), PAGE_SIZE) 54 55/* Handle buffered writes. */ 56static size_t fragsz; 57 58struct dump_pa dump_map[DUMPSYS_MD_PA_NPAIRS]; 59 60#if !defined(__powerpc__) 61void 62dumpsys_gen_pa_init(void) 63{ 64 int n, idx; 65 66 bzero(dump_map, sizeof(dump_map)); 67 for (n = 0; n < nitems(dump_map); n++) { 68 idx = n * 2; 69 if (dump_avail[idx] == 0 && dump_avail[idx + 1] == 0) 70 break; 71 dump_map[n].pa_start = dump_avail[idx]; 72 dump_map[n].pa_size = dump_avail[idx + 1] - dump_avail[idx]; 73 } 74} 75#endif 76 77struct dump_pa * 78dumpsys_gen_pa_next(struct dump_pa *mdp) 79{ 80 81 if (mdp == NULL) 82 return (&dump_map[0]); 83 84 mdp++; 85 if (mdp->pa_size == 0) 86 mdp = NULL; 87 return (mdp); 88} 89 90void 91dumpsys_gen_wbinv_all(void) 92{ 93 94} 95 96void 97dumpsys_gen_unmap_chunk(vm_paddr_t pa __unused, size_t chunk __unused, 98 void *va __unused) 99{ 100 101} 102 103int 104dumpsys_gen_write_aux_headers(struct dumperinfo *di) 105{ 106 107 return (0); 108} 109 110int 111dumpsys_buf_seek(struct dumperinfo *di, size_t sz) 112{ 113 static uint8_t buf[DEV_BSIZE]; 114 size_t nbytes; 115 int error; 116 117 bzero(buf, sizeof(buf)); 118 119 while (sz > 0) { 120 nbytes = MIN(sz, sizeof(buf)); 121 122 error = dump_append(di, buf, nbytes); 123 if (error) 124 return (error); 125 sz -= nbytes; 126 } 127 128 return (0); 129} 130 131int 132dumpsys_buf_write(struct dumperinfo *di, char *ptr, size_t sz) 133{ 134 size_t len; 135 int error; 136 137 while (sz) { 138 len = di->blocksize - fragsz; 139 if (len > sz) 140 len = sz; 141 memcpy((char *)di->blockbuf + fragsz, ptr, len); 142 fragsz += len; 143 ptr += len; 144 sz -= len; 145 if (fragsz == di->blocksize) { 146 error = dump_append(di, di->blockbuf, di->blocksize); 147 if (error) 148 return (error); 149 fragsz = 0; 150 } 151 } 152 return (0); 153} 154 155int 156dumpsys_buf_flush(struct dumperinfo *di) 157{ 158 int error; 159 160 if (fragsz == 0) 161 return (0); 162 163 error = dump_append(di, di->blockbuf, di->blocksize); 164 fragsz = 0; 165 return (error); 166} 167 168CTASSERT(PAGE_SHIFT < 20); 169#define PG2MB(pgs) ((pgs + (1 << (20 - PAGE_SHIFT)) - 1) >> (20 - PAGE_SHIFT)) 170 171int 172dumpsys_cb_dumpdata(struct dump_pa *mdp, int seqnr, void *arg) 173{ 174 struct dumperinfo *di = (struct dumperinfo*)arg; 175 vm_paddr_t pa; 176 void *va; 177 uint64_t pgs; 178 size_t counter, sz, chunk; 179 int c, error; 180 u_int maxdumppgs; 181 182 error = 0; /* catch case in which chunk size is 0 */ 183 counter = 0; /* Update twiddle every 16MB */ 184 va = NULL; 185 pgs = mdp->pa_size / PAGE_SIZE; 186 pa = mdp->pa_start; 187 maxdumppgs = min(di->maxiosize / PAGE_SIZE, MAXDUMPPGS); 188 if (maxdumppgs == 0) /* seatbelt */ 189 maxdumppgs = 1; 190 191 printf(" chunk %d: %juMB (%ju pages)", seqnr, (uintmax_t)PG2MB(pgs), 192 (uintmax_t)pgs); 193 194 dumpsys_wbinv_all(); 195 while (pgs) { 196 chunk = pgs; 197 if (chunk > maxdumppgs) 198 chunk = maxdumppgs; 199 sz = chunk << PAGE_SHIFT; 200 counter += sz; 201 if (counter >> 24) { 202 printf(" %ju", (uintmax_t)PG2MB(pgs)); 203 counter &= (1 << 24) - 1; 204 } 205 206 dumpsys_map_chunk(pa, chunk, &va); 207 wdog_kern_pat(WD_LASTVAL); 208 209 error = dump_append(di, va, sz); 210 dumpsys_unmap_chunk(pa, chunk, va); 211 if (error) 212 break; 213 pgs -= chunk; 214 pa += sz; 215 216 /* Check for user abort. */ 217 c = cncheckc(); 218 if (c == 0x03) 219 return (ECANCELED); 220 if (c != -1) 221 printf(" (CTRL-C to abort) "); 222 } 223 printf(" ... %s\n", (error) ? "fail" : "ok"); 224 return (error); 225} 226 227int 228dumpsys_foreach_chunk(dumpsys_callback_t cb, void *arg) 229{ 230 struct dump_pa *mdp; 231 int error, seqnr; 232 233 seqnr = 0; 234 mdp = dumpsys_pa_next(NULL); 235 while (mdp != NULL) { 236 error = (*cb)(mdp, seqnr++, arg); 237 if (error) 238 return (-error); 239 mdp = dumpsys_pa_next(mdp); 240 } 241 return (seqnr); 242} 243 244static off_t fileofs; 245 246static int 247cb_dumphdr(struct dump_pa *mdp, int seqnr, void *arg) 248{ 249 struct dumperinfo *di = (struct dumperinfo*)arg; 250 Elf_Phdr phdr; 251 uint64_t size; 252 int error; 253 254 size = mdp->pa_size; 255 bzero(&phdr, sizeof(phdr)); 256 phdr.p_type = PT_LOAD; 257 phdr.p_flags = PF_R; /* XXX */ 258 phdr.p_offset = fileofs; 259#ifdef __powerpc__ 260 phdr.p_vaddr = (do_minidump? mdp->pa_start : ~0L); 261 phdr.p_paddr = (do_minidump? ~0L : mdp->pa_start); 262#else 263 phdr.p_vaddr = mdp->pa_start; 264 phdr.p_paddr = mdp->pa_start; 265#endif 266 phdr.p_filesz = size; 267 phdr.p_memsz = size; 268 phdr.p_align = PAGE_SIZE; 269 270 error = dumpsys_buf_write(di, (char*)&phdr, sizeof(phdr)); 271 fileofs += phdr.p_filesz; 272 return (error); 273} 274 275static int 276cb_size(struct dump_pa *mdp, int seqnr, void *arg) 277{ 278 uint64_t *sz; 279 280 sz = (uint64_t *)arg; 281 *sz += (uint64_t)mdp->pa_size; 282 return (0); 283} 284 285int 286dumpsys_generic(struct dumperinfo *di) 287{ 288 static struct kerneldumpheader kdh; 289 Elf_Ehdr ehdr; 290 uint64_t dumpsize; 291 off_t hdrgap; 292 size_t hdrsz; 293 int error; 294 295#if MINIDUMP_PAGE_TRACKING == 1 296 if (do_minidump) 297 return (minidumpsys(di, false)); 298#endif 299 300 bzero(&ehdr, sizeof(ehdr)); 301 ehdr.e_ident[EI_MAG0] = ELFMAG0; 302 ehdr.e_ident[EI_MAG1] = ELFMAG1; 303 ehdr.e_ident[EI_MAG2] = ELFMAG2; 304 ehdr.e_ident[EI_MAG3] = ELFMAG3; 305 ehdr.e_ident[EI_CLASS] = ELF_CLASS; 306#if BYTE_ORDER == LITTLE_ENDIAN 307 ehdr.e_ident[EI_DATA] = ELFDATA2LSB; 308#else 309 ehdr.e_ident[EI_DATA] = ELFDATA2MSB; 310#endif 311 ehdr.e_ident[EI_VERSION] = EV_CURRENT; 312 ehdr.e_ident[EI_OSABI] = ELFOSABI_STANDALONE; /* XXX big picture? */ 313 ehdr.e_type = ET_CORE; 314 ehdr.e_machine = EM_VALUE; 315 ehdr.e_phoff = sizeof(ehdr); 316 ehdr.e_flags = 0; 317 ehdr.e_ehsize = sizeof(ehdr); 318 ehdr.e_phentsize = sizeof(Elf_Phdr); 319 ehdr.e_shentsize = sizeof(Elf_Shdr); 320 321 dumpsys_pa_init(); 322 323 /* Calculate dump size. */ 324 dumpsize = 0L; 325 ehdr.e_phnum = dumpsys_foreach_chunk(cb_size, &dumpsize) + 326 DUMPSYS_NUM_AUX_HDRS; 327 hdrsz = ehdr.e_phoff + ehdr.e_phnum * ehdr.e_phentsize; 328 fileofs = MD_ALIGN(hdrsz); 329 dumpsize += fileofs; 330 hdrgap = fileofs - roundup2((off_t)hdrsz, di->blocksize); 331 332 dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_ARCH_VERSION, 333 dumpsize); 334 335 error = dump_start(di, &kdh); 336 if (error != 0) 337 goto fail; 338 339 printf("Dumping %ju MB (%d chunks)\n", (uintmax_t)dumpsize >> 20, 340 ehdr.e_phnum - DUMPSYS_NUM_AUX_HDRS); 341 342 /* Dump ELF header */ 343 error = dumpsys_buf_write(di, (char*)&ehdr, sizeof(ehdr)); 344 if (error) 345 goto fail; 346 347 /* Dump program headers */ 348 error = dumpsys_foreach_chunk(cb_dumphdr, di); 349 if (error < 0) 350 goto fail; 351 error = dumpsys_write_aux_headers(di); 352 if (error < 0) 353 goto fail; 354 dumpsys_buf_flush(di); 355 356 /* 357 * All headers are written using blocked I/O, so we know the 358 * current offset is (still) block aligned. Skip the alignement 359 * in the file to have the segment contents aligned at page 360 * boundary. 361 */ 362 error = dumpsys_buf_seek(di, (size_t)hdrgap); 363 if (error) 364 goto fail; 365 366 /* Dump memory chunks. */ 367 error = dumpsys_foreach_chunk(dumpsys_cb_dumpdata, di); 368 if (error < 0) 369 goto fail; 370 371 error = dump_finish(di, &kdh); 372 if (error != 0) 373 goto fail; 374 375 printf("\nDump complete\n"); 376 return (0); 377 378 fail: 379 if (error < 0) 380 error = -error; 381 382 if (error == ECANCELED) 383 printf("\nDump aborted\n"); 384 else if (error == E2BIG || error == ENOSPC) 385 printf("\nDump failed. Partition too small.\n"); 386 else 387 printf("\n** DUMP FAILED (ERROR %d) **\n", error); 388 return (error); 389} 390 391#if MINIDUMP_PAGE_TRACKING == 1 392 393/* Minidump progress bar */ 394static struct { 395 const int min_per; 396 const int max_per; 397 bool visited; 398} progress_track[10] = { 399 { 0, 10, false}, 400 { 10, 20, false}, 401 { 20, 30, false}, 402 { 30, 40, false}, 403 { 40, 50, false}, 404 { 50, 60, false}, 405 { 60, 70, false}, 406 { 70, 80, false}, 407 { 80, 90, false}, 408 { 90, 100, false} 409}; 410 411static uint64_t dumpsys_pb_size; 412static uint64_t dumpsys_pb_remaining; 413static uint64_t dumpsys_pb_check; 414 415/* Reset the progress bar for a dump of dumpsize. */ 416void 417dumpsys_pb_init(uint64_t dumpsize) 418{ 419 int i; 420 421 dumpsys_pb_size = dumpsys_pb_remaining = dumpsize; 422 dumpsys_pb_check = 0; 423 424 for (i = 0; i < nitems(progress_track); i++) 425 progress_track[i].visited = false; 426} 427 428/* 429 * Update the progress according to the delta bytes that were written out. 430 * Check and print the progress percentage. 431 */ 432void 433dumpsys_pb_progress(size_t delta) 434{ 435 int sofar, i; 436 437 dumpsys_pb_remaining -= delta; 438 dumpsys_pb_check += delta; 439 440 /* 441 * To save time while dumping, only loop through progress_track 442 * occasionally. 443 */ 444 if ((dumpsys_pb_check >> DUMPSYS_PB_CHECK_BITS) == 0) 445 return; 446 else 447 dumpsys_pb_check &= (1 << DUMPSYS_PB_CHECK_BITS) - 1; 448 449 sofar = 100 - ((dumpsys_pb_remaining * 100) / dumpsys_pb_size); 450 for (i = 0; i < nitems(progress_track); i++) { 451 if (sofar < progress_track[i].min_per || 452 sofar > progress_track[i].max_per) 453 continue; 454 if (!progress_track[i].visited) { 455 progress_track[i].visited = true; 456 printf("..%d%%", sofar); 457 } 458 break; 459 } 460} 461 462int 463minidumpsys(struct dumperinfo *di, bool livedump) 464{ 465 struct minidumpstate state; 466 struct msgbuf mb_copy; 467 char *msg_ptr; 468 size_t sz; 469 int error; 470 471 if (livedump) { 472 KASSERT(!dumping, ("live dump invoked from incorrect context")); 473 474 /* 475 * Before invoking cpu_minidumpsys() on the live system, we 476 * must snapshot some required global state: the message 477 * buffer, and the page dump bitset. They may be modified at 478 * any moment, so for the sake of the live dump it is best to 479 * have an unchanging snapshot to work with. Both are included 480 * as part of the dump and consumed by userspace tools. 481 * 482 * Other global state important to the minidump code is the 483 * dump_avail array and the kernel's page tables, but snapshots 484 * are not taken of these. For one, dump_avail[] is expected 485 * not to change after boot. Snapshotting the kernel page 486 * tables would involve an additional walk, so this is avoided 487 * too. 488 * 489 * This means live dumps are best effort, and the result may or 490 * may not be usable; there are no guarantees about the 491 * consistency of the dump's contents. Any of the following 492 * (and likely more) may affect the live dump: 493 * 494 * - Data may be modified, freed, or remapped during the 495 * course of the dump, such that the contents written out 496 * are partially or entirely unrecognizable. This means 497 * valid references may point to destroyed/mangled objects, 498 * and vice versa. 499 * 500 * - The dumped context of any threads that ran during the 501 * dump process may be unreliable. 502 * 503 * - The set of kernel page tables included in the dump likely 504 * won't correspond exactly to the copy of the dump bitset. 505 * This means some pages will be dumped without any way to 506 * locate them, and some pages may not have been dumped 507 * despite appearing as if they should. 508 */ 509 msg_ptr = malloc(msgbufsize, M_TEMP, M_WAITOK); 510 msgbuf_duplicate(msgbufp, &mb_copy, msg_ptr); 511 state.msgbufp = &mb_copy; 512 513 sz = BITSET_SIZE(vm_page_dump_pages); 514 state.dump_bitset = malloc(sz, M_TEMP, M_WAITOK); 515 BIT_COPY_STORE_REL(sz, vm_page_dump, state.dump_bitset); 516 } else { 517 KASSERT(dumping, ("minidump invoked outside of doadump()")); 518 519 /* Use the globals. */ 520 state.msgbufp = msgbufp; 521 state.dump_bitset = vm_page_dump; 522 } 523 524 error = cpu_minidumpsys(di, &state); 525 if (livedump) { 526 free(msg_ptr, M_TEMP); 527 free(state.dump_bitset, M_TEMP); 528 } 529 530 return (error); 531} 532#endif /* MINIDUMP_PAGE_TRACKING == 1 */ 533