1/*- 2 * Copyright (c) 2008 Semihalf, Grzegorz Bernacki 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * from: FreeBSD: src/sys/i386/i386/minidump_machdep.c,v 1.6 2008/08/17 23:27:27 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD$"); 31 32#include "opt_watchdog.h" 33 34#include <sys/param.h> 35#include <sys/systm.h> 36#include <sys/conf.h> 37#include <sys/cons.h> 38#include <sys/kernel.h> 39#include <sys/kerneldump.h> 40#include <sys/msgbuf.h> 41#ifdef SW_WATCHDOG 42#include <sys/watchdog.h> 43#endif 44#include <vm/vm.h> 45#include <vm/pmap.h> 46#include <machine/atomic.h> 47#include <machine/elf.h> 48#include <machine/md_var.h> 49#include <machine/vmparam.h> 50#include <machine/minidump.h> 51#include <machine/cpufunc.h> 52 53CTASSERT(sizeof(struct kerneldumpheader) == 512); 54 55/* 56 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This 57 * is to protect us from metadata and to protect metadata from us. 58 */ 59#define SIZEOF_METADATA (64*1024) 60 61uint32_t *vm_page_dump; 62int vm_page_dump_size; 63 64static struct kerneldumpheader kdh; 65static off_t dumplo; 66 67/* Handle chunked writes. */ 68static size_t fragsz, offset; 69static void *dump_va; 70static uint64_t counter, progress; 71 72CTASSERT(sizeof(*vm_page_dump) == 4); 73 74static int 75is_dumpable(vm_paddr_t pa) 76{ 77 int i; 78 79 for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { 80 if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) 81 return (1); 82 } 83 return (0); 84} 85 86#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) 87 88static int 89blk_flush(struct dumperinfo *di) 90{ 91 int error; 92 93 if (fragsz == 0) 94 return (0); 95 96 error = dump_write(di, (char*)dump_va + offset, 0, dumplo, fragsz - offset); 97 dumplo += (fragsz - offset); 98 fragsz = 0; 99 offset = 0; 100 return (error); 101} 102 103static int 104blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) 105{ 106 size_t len; 107 int error, i, c; 108 u_int maxdumpsz; 109 110 maxdumpsz = di->maxiosize; 111 112 if (maxdumpsz == 0) /* seatbelt */ 113 maxdumpsz = PAGE_SIZE; 114 115 error = 0; 116 117 if (ptr != NULL && pa != 0) { 118 printf("cant have both va and pa!\n"); 119 return (EINVAL); 120 } 121 122 if (ptr != NULL) { 123 /* If we're doing a virtual dump, flush any pre-existing pa pages */ 124 error = blk_flush(di); 125 if (error) 126 return (error); 127 } 128 129 while (sz) { 130 if (fragsz == 0) { 131 offset = pa & PAGE_MASK; 132 fragsz += offset; 133 } 134 len = maxdumpsz - fragsz; 135 if (len > sz) 136 len = sz; 137 counter += len; 138 progress -= len; 139 140 if (counter >> 22) { 141 printf(" %lld", PG2MB(progress >> PAGE_SHIFT)); 142 counter &= (1<<22) - 1; 143 } 144 145#ifdef SW_WATCHDOG 146 wdog_kern_pat(WD_LASTVAL); 147#endif 148 if (ptr) { 149 error = dump_write(di, ptr, 0, dumplo, len); 150 if (error) 151 return (error); 152 dumplo += len; 153 ptr += len; 154 sz -= len; 155 } else { 156 for (i = 0; i < len; i += PAGE_SIZE) 157 dump_va = pmap_kenter_temp(pa + i, 158 (i + fragsz) >> PAGE_SHIFT); 159 fragsz += len; 160 pa += len; 161 sz -= len; 162 if (fragsz == maxdumpsz) { 163 error = blk_flush(di); 164 if (error) 165 return (error); 166 } 167 } 168 169 /* Check for user abort. */ 170 c = cncheckc(); 171 if (c == 0x03) 172 return (ECANCELED); 173 if (c != -1) 174 printf(" (CTRL-C to abort) "); 175 } 176 177 return (0); 178} 179 180static int 181blk_write_cont(struct dumperinfo *di, vm_paddr_t pa, size_t sz) 182{ 183 int error; 184 185 error = blk_write(di, 0, pa, sz); 186 if (error) 187 return (error); 188 189 error = blk_flush(di); 190 if (error) 191 return (error); 192 193 return (0); 194} 195 196/* A fake page table page, to avoid having to handle both 4K and 2M pages */ 197static pt_entry_t fakept[NPTEPG]; 198 199void 200minidumpsys(struct dumperinfo *di) 201{ 202 struct minidumphdr mdhdr; 203 uint64_t dumpsize; 204 uint32_t ptesize; 205 uint32_t bits; 206 uint32_t pa, prev_pa = 0, count = 0; 207 vm_offset_t va; 208 pd_entry_t *pdp; 209 pt_entry_t *pt, *ptp; 210 int i, k, bit, error; 211 char *addr; 212 213 /* 214 * Flush caches. Note that in the SMP case this operates only on the 215 * current CPU's L1 cache. Before we reach this point, code in either 216 * the system shutdown or kernel debugger has called stop_cpus() to stop 217 * all cores other than this one. Part of the ARM handling of 218 * stop_cpus() is to call wbinv_all() on that core's local L1 cache. So 219 * by time we get to here, all that remains is to flush the L1 for the 220 * current CPU, then the L2. 221 */ 222 cpu_idcache_wbinv_all(); 223 cpu_l2cache_wbinv_all(); 224 225 counter = 0; 226 /* Walk page table pages, set bits in vm_page_dump */ 227 ptesize = 0; 228 for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { 229 /* 230 * We always write a page, even if it is zero. Each 231 * page written corresponds to 2MB of space 232 */ 233 ptesize += L2_TABLE_SIZE_REAL; 234 pmap_get_pde_pte(pmap_kernel(), va, &pdp, &ptp); 235 if (pmap_pde_v(pdp) && pmap_pde_section(pdp)) { 236 /* This is a section mapping 1M page. */ 237 pa = (*pdp & L1_S_ADDR_MASK) | (va & ~L1_S_ADDR_MASK); 238 for (k = 0; k < (L1_S_SIZE / PAGE_SIZE); k++) { 239 if (is_dumpable(pa)) 240 dump_add_page(pa); 241 pa += PAGE_SIZE; 242 } 243 continue; 244 } 245 if (pmap_pde_v(pdp) && pmap_pde_page(pdp)) { 246 /* Set bit for each valid page in this 1MB block */ 247 addr = pmap_kenter_temp(*pdp & L1_C_ADDR_MASK, 0); 248 pt = (pt_entry_t*)(addr + 249 (((uint32_t)*pdp & L1_C_ADDR_MASK) & PAGE_MASK)); 250 for (k = 0; k < 256; k++) { 251 if ((pt[k] & L2_TYPE_MASK) == L2_TYPE_L) { 252 pa = (pt[k] & L2_L_FRAME) | 253 (va & L2_L_OFFSET); 254 for (i = 0; i < 16; i++) { 255 if (is_dumpable(pa)) 256 dump_add_page(pa); 257 k++; 258 pa += PAGE_SIZE; 259 } 260 } else if ((pt[k] & L2_TYPE_MASK) == L2_TYPE_S) { 261 pa = (pt[k] & L2_S_FRAME) | 262 (va & L2_S_OFFSET); 263 if (is_dumpable(pa)) 264 dump_add_page(pa); 265 } 266 } 267 } else { 268 /* Nothing, we're going to dump a null page */ 269 } 270 } 271 272 /* Calculate dump size. */ 273 dumpsize = ptesize; 274 dumpsize += round_page(msgbufp->msg_size); 275 dumpsize += round_page(vm_page_dump_size); 276 277 for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 278 bits = vm_page_dump[i]; 279 while (bits) { 280 bit = ffs(bits) - 1; 281 pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + 282 bit) * PAGE_SIZE; 283 /* Clear out undumpable pages now if needed */ 284 if (is_dumpable(pa)) 285 dumpsize += PAGE_SIZE; 286 else 287 dump_drop_page(pa); 288 bits &= ~(1ul << bit); 289 } 290 } 291 292 dumpsize += PAGE_SIZE; 293 294 /* Determine dump offset on device. */ 295 if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) { 296 error = ENOSPC; 297 goto fail; 298 } 299 300 dumplo = di->mediaoffset + di->mediasize - dumpsize; 301 dumplo -= sizeof(kdh) * 2; 302 progress = dumpsize; 303 304 /* Initialize mdhdr */ 305 bzero(&mdhdr, sizeof(mdhdr)); 306 strcpy(mdhdr.magic, MINIDUMP_MAGIC); 307 mdhdr.version = MINIDUMP_VERSION; 308 mdhdr.msgbufsize = msgbufp->msg_size; 309 mdhdr.bitmapsize = vm_page_dump_size; 310 mdhdr.ptesize = ptesize; 311 mdhdr.kernbase = KERNBASE; 312 313 mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_ARM_VERSION, dumpsize, 314 di->blocksize); 315 316 printf("Physical memory: %u MB\n", ptoa((uintmax_t)physmem) / 1048576); 317 printf("Dumping %llu MB:", (long long)dumpsize >> 20); 318 319 /* Dump leader */ 320 error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); 321 if (error) 322 goto fail; 323 dumplo += sizeof(kdh); 324 325 /* Dump my header */ 326 bzero(&fakept, sizeof(fakept)); 327 bcopy(&mdhdr, &fakept, sizeof(mdhdr)); 328 error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); 329 if (error) 330 goto fail; 331 332 /* Dump msgbuf up front */ 333 error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); 334 if (error) 335 goto fail; 336 337 /* Dump bitmap */ 338 error = blk_write(di, (char *)vm_page_dump, 0, 339 round_page(vm_page_dump_size)); 340 if (error) 341 goto fail; 342 343 /* Dump kernel page table pages */ 344 for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { 345 /* We always write a page, even if it is zero */ 346 pmap_get_pde_pte(pmap_kernel(), va, &pdp, &ptp); 347 348 if (pmap_pde_v(pdp) && pmap_pde_section(pdp)) { 349 if (count) { 350 error = blk_write_cont(di, prev_pa, 351 count * L2_TABLE_SIZE_REAL); 352 if (error) 353 goto fail; 354 count = 0; 355 prev_pa = 0; 356 } 357 /* This is a single 2M block. Generate a fake PTP */ 358 pa = (*pdp & L1_S_ADDR_MASK) | (va & ~L1_S_ADDR_MASK); 359 for (k = 0; k < (L1_S_SIZE / PAGE_SIZE); k++) { 360 fakept[k] = L2_S_PROTO | (pa + (k * PAGE_SIZE)) | 361 L2_S_PROT(PTE_KERNEL, 362 VM_PROT_READ | VM_PROT_WRITE); 363 } 364 error = blk_write(di, (char *)&fakept, 0, 365 L2_TABLE_SIZE_REAL); 366 if (error) 367 goto fail; 368 /* Flush, in case we reuse fakept in the same block */ 369 error = blk_flush(di); 370 if (error) 371 goto fail; 372 continue; 373 } 374 if (pmap_pde_v(pdp) && pmap_pde_page(pdp)) { 375 pa = *pdp & L1_C_ADDR_MASK; 376 if (!count) { 377 prev_pa = pa; 378 count++; 379 } 380 else { 381 if (pa == (prev_pa + count * L2_TABLE_SIZE_REAL)) 382 count++; 383 else { 384 error = blk_write_cont(di, prev_pa, 385 count * L2_TABLE_SIZE_REAL); 386 if (error) 387 goto fail; 388 count = 1; 389 prev_pa = pa; 390 } 391 } 392 } else { 393 if (count) { 394 error = blk_write_cont(di, prev_pa, 395 count * L2_TABLE_SIZE_REAL); 396 if (error) 397 goto fail; 398 count = 0; 399 prev_pa = 0; 400 } 401 bzero(fakept, sizeof(fakept)); 402 error = blk_write(di, (char *)&fakept, 0, 403 L2_TABLE_SIZE_REAL); 404 if (error) 405 goto fail; 406 /* Flush, in case we reuse fakept in the same block */ 407 error = blk_flush(di); 408 if (error) 409 goto fail; 410 } 411 } 412 413 if (count) { 414 error = blk_write_cont(di, prev_pa, count * L2_TABLE_SIZE_REAL); 415 if (error) 416 goto fail; 417 count = 0; 418 prev_pa = 0; 419 } 420 421 /* Dump memory chunks */ 422 for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 423 bits = vm_page_dump[i]; 424 while (bits) { 425 bit = ffs(bits) - 1; 426 pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + 427 bit) * PAGE_SIZE; 428 if (!count) { 429 prev_pa = pa; 430 count++; 431 } else { 432 if (pa == (prev_pa + count * PAGE_SIZE)) 433 count++; 434 else { 435 error = blk_write_cont(di, prev_pa, 436 count * PAGE_SIZE); 437 if (error) 438 goto fail; 439 count = 1; 440 prev_pa = pa; 441 } 442 } 443 bits &= ~(1ul << bit); 444 } 445 } 446 if (count) { 447 error = blk_write_cont(di, prev_pa, count * PAGE_SIZE); 448 if (error) 449 goto fail; 450 count = 0; 451 prev_pa = 0; 452 } 453 454 /* Dump trailer */ 455 error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); 456 if (error) 457 goto fail; 458 dumplo += sizeof(kdh); 459 460 /* Signal completion, signoff and exit stage left. */ 461 dump_write(di, NULL, 0, 0, 0); 462 printf("\nDump complete\n"); 463 return; 464 465fail: 466 if (error < 0) 467 error = -error; 468 469 if (error == ECANCELED) 470 printf("\nDump aborted\n"); 471 else if (error == ENOSPC) 472 printf("\nDump failed. Partition too small.\n"); 473 else 474 printf("\n** DUMP FAILED (ERROR %d) **\n", error); 475} 476 477void 478dump_add_page(vm_paddr_t pa) 479{ 480 int idx, bit; 481 482 pa >>= PAGE_SHIFT; 483 idx = pa >> 5; /* 2^5 = 32 */ 484 bit = pa & 31; 485 atomic_set_int(&vm_page_dump[idx], 1ul << bit); 486} 487 488void 489dump_drop_page(vm_paddr_t pa) 490{ 491 int idx, bit; 492 493 pa >>= PAGE_SHIFT; 494 idx = pa >> 5; /* 2^5 = 32 */ 495 bit = pa & 31; 496 atomic_clear_int(&vm_page_dump[idx], 1ul << bit); 497} 498