1184728Sraj/*- 2184728Sraj * Copyright (c) 2008 Semihalf, Grzegorz Bernacki 3184728Sraj * All rights reserved. 4184728Sraj * 5184728Sraj * Redistribution and use in source and binary forms, with or without 6184728Sraj * modification, are permitted provided that the following conditions 7184728Sraj * are met: 8184728Sraj * 9184728Sraj * 1. Redistributions of source code must retain the above copyright 10184728Sraj * notice, this list of conditions and the following disclaimer. 11184728Sraj * 2. Redistributions in binary form must reproduce the above copyright 12184728Sraj * notice, this list of conditions and the following disclaimer in the 13184728Sraj * documentation and/or other materials provided with the distribution. 14184728Sraj * 15184728Sraj * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16184728Sraj * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17184728Sraj * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18184728Sraj * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19184728Sraj * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20184728Sraj * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21184728Sraj * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22184728Sraj * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23184728Sraj * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24184728Sraj * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25184728Sraj * 26184728Sraj * from: FreeBSD: src/sys/i386/i386/minidump_machdep.c,v 1.6 2008/08/17 23:27:27 27184728Sraj */ 28184728Sraj 29184728Sraj#include <sys/cdefs.h> 30184728Sraj__FBSDID("$FreeBSD$"); 31184728Sraj 32221173Sattilio#include "opt_watchdog.h" 33221173Sattilio 34184728Sraj#include <sys/param.h> 35184728Sraj#include <sys/systm.h> 36184728Sraj#include <sys/conf.h> 37184728Sraj#include <sys/cons.h> 38184728Sraj#include <sys/kernel.h> 39184728Sraj#include <sys/kerneldump.h> 40184728Sraj#include <sys/msgbuf.h> 41221173Sattilio#ifdef SW_WATCHDOG 42221173Sattilio#include <sys/watchdog.h> 43221173Sattilio#endif 44184728Sraj#include <vm/vm.h> 45184728Sraj#include <vm/pmap.h> 46184728Sraj#include <machine/pmap.h> 47184728Sraj#include <machine/atomic.h> 48184728Sraj#include <machine/elf.h> 49184728Sraj#include <machine/md_var.h> 50184728Sraj#include <machine/vmparam.h> 51184728Sraj#include <machine/minidump.h> 52184728Sraj#include <machine/cpufunc.h> 53184728Sraj 54184728SrajCTASSERT(sizeof(struct kerneldumpheader) == 512); 55184728Sraj 56184728Sraj/* 57184728Sraj * Don't touch the first SIZEOF_METADATA bytes on the dump device. This 58184728Sraj * is to protect us from metadata and to protect metadata from us. 59184728Sraj */ 60184728Sraj#define SIZEOF_METADATA (64*1024) 61184728Sraj 62184728Srajuint32_t *vm_page_dump; 63184728Srajint vm_page_dump_size; 64184728Sraj 65184728Srajstatic struct kerneldumpheader kdh; 66184728Srajstatic off_t dumplo; 67184728Sraj 68184728Sraj/* Handle chunked writes. */ 69184728Srajstatic size_t fragsz, offset; 70184728Srajstatic void *dump_va; 71184728Srajstatic uint64_t counter, progress; 72184728Sraj 73184728SrajCTASSERT(sizeof(*vm_page_dump) == 4); 74184728Sraj 75184728Srajstatic int 76184728Srajis_dumpable(vm_paddr_t pa) 77184728Sraj{ 78184728Sraj int i; 79184728Sraj 80184728Sraj for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { 81184728Sraj if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) 82184728Sraj return (1); 83184728Sraj } 84184728Sraj return (0); 85184728Sraj} 86184728Sraj 87184728Sraj#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) 88184728Sraj 89184728Srajstatic int 90184728Srajblk_flush(struct dumperinfo *di) 91184728Sraj{ 92184728Sraj int error; 93184728Sraj 94184728Sraj if (fragsz == 0) 95184728Sraj return (0); 96184728Sraj 97184728Sraj error = dump_write(di, (char*)dump_va + offset, 0, dumplo, fragsz - offset); 98184728Sraj dumplo += (fragsz - offset); 99184728Sraj fragsz = 0; 100184728Sraj offset = 0; 101184728Sraj return (error); 102184728Sraj} 103184728Sraj 104184728Srajstatic int 105184728Srajblk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) 106184728Sraj{ 107184728Sraj size_t len; 108184728Sraj int error, i, c; 109184728Sraj u_int maxdumpsz; 110184728Sraj 111184728Sraj maxdumpsz = di->maxiosize; 112184728Sraj 113184728Sraj if (maxdumpsz == 0) /* seatbelt */ 114184728Sraj maxdumpsz = PAGE_SIZE; 115184728Sraj 116184728Sraj error = 0; 117184728Sraj 118184728Sraj if (ptr != NULL && pa != 0) { 119184728Sraj printf("cant have both va and pa!\n"); 120184728Sraj return (EINVAL); 121184728Sraj } 122184728Sraj 123184728Sraj if (ptr != NULL) { 124184728Sraj /* If we're doing a virtual dump, flush any pre-existing pa pages */ 125184728Sraj error = blk_flush(di); 126184728Sraj if (error) 127184728Sraj return (error); 128184728Sraj } 129184728Sraj 130184728Sraj while (sz) { 131184728Sraj if (fragsz == 0) { 132184728Sraj offset = pa & PAGE_MASK; 133184728Sraj fragsz += offset; 134184728Sraj } 135184728Sraj len = maxdumpsz - fragsz; 136184728Sraj if (len > sz) 137184728Sraj len = sz; 138184728Sraj counter += len; 139184728Sraj progress -= len; 140184728Sraj 141184728Sraj if (counter >> 22) { 142184728Sraj printf(" %lld", PG2MB(progress >> PAGE_SHIFT)); 143184728Sraj counter &= (1<<22) - 1; 144184728Sraj } 145184728Sraj 146221173Sattilio#ifdef SW_WATCHDOG 147221173Sattilio wdog_kern_pat(WD_LASTVAL); 148221173Sattilio#endif 149184728Sraj if (ptr) { 150184728Sraj error = dump_write(di, ptr, 0, dumplo, len); 151184728Sraj if (error) 152184728Sraj return (error); 153184728Sraj dumplo += len; 154184728Sraj ptr += len; 155184728Sraj sz -= len; 156184728Sraj } else { 157184728Sraj for (i = 0; i < len; i += PAGE_SIZE) 158184728Sraj dump_va = pmap_kenter_temp(pa + i, 159184728Sraj (i + fragsz) >> PAGE_SHIFT); 160184728Sraj fragsz += len; 161184728Sraj pa += len; 162184728Sraj sz -= len; 163184728Sraj if (fragsz == maxdumpsz) { 164184728Sraj error = blk_flush(di); 165184728Sraj if (error) 166184728Sraj return (error); 167184728Sraj } 168184728Sraj } 169184728Sraj 170184728Sraj /* Check for user abort. */ 171184728Sraj c = cncheckc(); 172184728Sraj if (c == 0x03) 173184728Sraj return (ECANCELED); 174184728Sraj if (c != -1) 175184728Sraj printf(" (CTRL-C to abort) "); 176184728Sraj } 177184728Sraj 178184728Sraj return (0); 179184728Sraj} 180184728Sraj 181184728Srajstatic int 182184728Srajblk_write_cont(struct dumperinfo *di, vm_paddr_t pa, size_t sz) 183184728Sraj{ 184184728Sraj int error; 185184728Sraj 186184728Sraj error = blk_write(di, 0, pa, sz); 187184728Sraj if (error) 188184728Sraj return (error); 189184728Sraj 190184728Sraj error = blk_flush(di); 191184728Sraj if (error) 192184728Sraj return (error); 193184728Sraj 194184728Sraj return (0); 195184728Sraj} 196184728Sraj 197184728Sraj/* A fake page table page, to avoid having to handle both 4K and 2M pages */ 198184728Srajstatic pt_entry_t fakept[NPTEPG]; 199184728Sraj 200184728Srajvoid 201184728Srajminidumpsys(struct dumperinfo *di) 202184728Sraj{ 203184728Sraj struct minidumphdr mdhdr; 204184728Sraj uint64_t dumpsize; 205184728Sraj uint32_t ptesize; 206184728Sraj uint32_t bits; 207184728Sraj uint32_t pa, prev_pa = 0, count = 0; 208184728Sraj vm_offset_t va; 209184728Sraj pd_entry_t *pdp; 210184728Sraj pt_entry_t *pt, *ptp; 211184728Sraj int i, k, bit, error; 212184728Sraj char *addr; 213184728Sraj 214184728Sraj /* Flush cache */ 215184728Sraj cpu_idcache_wbinv_all(); 216184728Sraj cpu_l2cache_wbinv_all(); 217184728Sraj 218184728Sraj counter = 0; 219184728Sraj /* Walk page table pages, set bits in vm_page_dump */ 220184728Sraj ptesize = 0; 221184728Sraj for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { 222184728Sraj /* 223184728Sraj * We always write a page, even if it is zero. Each 224184728Sraj * page written corresponds to 2MB of space 225184728Sraj */ 226184728Sraj ptesize += L2_TABLE_SIZE_REAL; 227184728Sraj pmap_get_pde_pte(pmap_kernel(), va, &pdp, &ptp); 228184728Sraj if (pmap_pde_v(pdp) && pmap_pde_section(pdp)) { 229184728Sraj /* This is a section mapping 1M page. */ 230184728Sraj pa = (*pdp & L1_S_ADDR_MASK) | (va & ~L1_S_ADDR_MASK); 231184728Sraj for (k = 0; k < (L1_S_SIZE / PAGE_SIZE); k++) { 232184728Sraj if (is_dumpable(pa)) 233184728Sraj dump_add_page(pa); 234184728Sraj pa += PAGE_SIZE; 235184728Sraj } 236184728Sraj continue; 237184728Sraj } 238184728Sraj if (pmap_pde_v(pdp) && pmap_pde_page(pdp)) { 239184728Sraj /* Set bit for each valid page in this 1MB block */ 240184728Sraj addr = pmap_kenter_temp(*pdp & L1_C_ADDR_MASK, 0); 241184728Sraj pt = (pt_entry_t*)(addr + 242184728Sraj (((uint32_t)*pdp & L1_C_ADDR_MASK) & PAGE_MASK)); 243184728Sraj for (k = 0; k < 256; k++) { 244184728Sraj if ((pt[k] & L2_TYPE_MASK) == L2_TYPE_L) { 245184728Sraj pa = (pt[k] & L2_L_FRAME) | 246184728Sraj (va & L2_L_OFFSET); 247184728Sraj for (i = 0; i < 16; i++) { 248184728Sraj if (is_dumpable(pa)) 249184728Sraj dump_add_page(pa); 250184728Sraj k++; 251184728Sraj pa += PAGE_SIZE; 252184728Sraj } 253184728Sraj } else if ((pt[k] & L2_TYPE_MASK) == L2_TYPE_S) { 254184728Sraj pa = (pt[k] & L2_S_FRAME) | 255184728Sraj (va & L2_S_OFFSET); 256184728Sraj if (is_dumpable(pa)) 257184728Sraj dump_add_page(pa); 258184728Sraj } 259184728Sraj } 260184728Sraj } else { 261184728Sraj /* Nothing, we're going to dump a null page */ 262184728Sraj } 263184728Sraj } 264184728Sraj 265184728Sraj /* Calculate dump size. */ 266184728Sraj dumpsize = ptesize; 267184728Sraj dumpsize += round_page(msgbufp->msg_size); 268184728Sraj dumpsize += round_page(vm_page_dump_size); 269184728Sraj 270184728Sraj for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 271184728Sraj bits = vm_page_dump[i]; 272184728Sraj while (bits) { 273184728Sraj bit = ffs(bits) - 1; 274184728Sraj pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + 275184728Sraj bit) * PAGE_SIZE; 276184728Sraj /* Clear out undumpable pages now if needed */ 277184728Sraj if (is_dumpable(pa)) 278184728Sraj dumpsize += PAGE_SIZE; 279184728Sraj else 280184728Sraj dump_drop_page(pa); 281184728Sraj bits &= ~(1ul << bit); 282184728Sraj } 283184728Sraj } 284184728Sraj 285184728Sraj dumpsize += PAGE_SIZE; 286184728Sraj 287184728Sraj /* Determine dump offset on device. */ 288184728Sraj if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) { 289184728Sraj error = ENOSPC; 290184728Sraj goto fail; 291184728Sraj } 292184728Sraj 293184728Sraj dumplo = di->mediaoffset + di->mediasize - dumpsize; 294184728Sraj dumplo -= sizeof(kdh) * 2; 295184728Sraj progress = dumpsize; 296184728Sraj 297184728Sraj /* Initialize mdhdr */ 298184728Sraj bzero(&mdhdr, sizeof(mdhdr)); 299184728Sraj strcpy(mdhdr.magic, MINIDUMP_MAGIC); 300184728Sraj mdhdr.version = MINIDUMP_VERSION; 301184728Sraj mdhdr.msgbufsize = msgbufp->msg_size; 302184728Sraj mdhdr.bitmapsize = vm_page_dump_size; 303184728Sraj mdhdr.ptesize = ptesize; 304184728Sraj mdhdr.kernbase = KERNBASE; 305184728Sraj 306184728Sraj mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_ARM_VERSION, dumpsize, 307184728Sraj di->blocksize); 308184728Sraj 309184728Sraj printf("Physical memory: %u MB\n", ptoa((uintmax_t)physmem) / 1048576); 310184728Sraj printf("Dumping %llu MB:", (long long)dumpsize >> 20); 311184728Sraj 312184728Sraj /* Dump leader */ 313184728Sraj error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); 314184728Sraj if (error) 315184728Sraj goto fail; 316184728Sraj dumplo += sizeof(kdh); 317184728Sraj 318184728Sraj /* Dump my header */ 319184728Sraj bzero(&fakept, sizeof(fakept)); 320184728Sraj bcopy(&mdhdr, &fakept, sizeof(mdhdr)); 321184728Sraj error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); 322184728Sraj if (error) 323184728Sraj goto fail; 324184728Sraj 325184728Sraj /* Dump msgbuf up front */ 326184728Sraj error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); 327184728Sraj if (error) 328184728Sraj goto fail; 329184728Sraj 330184728Sraj /* Dump bitmap */ 331184728Sraj error = blk_write(di, (char *)vm_page_dump, 0, 332184728Sraj round_page(vm_page_dump_size)); 333184728Sraj if (error) 334184728Sraj goto fail; 335184728Sraj 336184728Sraj /* Dump kernel page table pages */ 337184728Sraj for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { 338184728Sraj /* We always write a page, even if it is zero */ 339184728Sraj pmap_get_pde_pte(pmap_kernel(), va, &pdp, &ptp); 340184728Sraj 341184728Sraj if (pmap_pde_v(pdp) && pmap_pde_section(pdp)) { 342184728Sraj if (count) { 343184728Sraj error = blk_write_cont(di, prev_pa, 344184728Sraj count * L2_TABLE_SIZE_REAL); 345184728Sraj if (error) 346184728Sraj goto fail; 347184728Sraj count = 0; 348184728Sraj prev_pa = 0; 349184728Sraj } 350184728Sraj /* This is a single 2M block. Generate a fake PTP */ 351184728Sraj pa = (*pdp & L1_S_ADDR_MASK) | (va & ~L1_S_ADDR_MASK); 352184728Sraj for (k = 0; k < (L1_S_SIZE / PAGE_SIZE); k++) { 353184728Sraj fakept[k] = L2_S_PROTO | (pa + (k * PAGE_SIZE)) | 354184728Sraj L2_S_PROT(PTE_KERNEL, 355184728Sraj VM_PROT_READ | VM_PROT_WRITE); 356184728Sraj } 357184728Sraj error = blk_write(di, (char *)&fakept, 0, 358184728Sraj L2_TABLE_SIZE_REAL); 359184728Sraj if (error) 360184728Sraj goto fail; 361184728Sraj /* Flush, in case we reuse fakept in the same block */ 362184728Sraj error = blk_flush(di); 363184728Sraj if (error) 364184728Sraj goto fail; 365184728Sraj continue; 366184728Sraj } 367184728Sraj if (pmap_pde_v(pdp) && pmap_pde_page(pdp)) { 368184728Sraj pa = *pdp & L1_C_ADDR_MASK; 369184728Sraj if (!count) { 370184728Sraj prev_pa = pa; 371184728Sraj count++; 372184728Sraj } 373184728Sraj else { 374184728Sraj if (pa == (prev_pa + count * L2_TABLE_SIZE_REAL)) 375184728Sraj count++; 376184728Sraj else { 377184728Sraj error = blk_write_cont(di, prev_pa, 378184728Sraj count * L2_TABLE_SIZE_REAL); 379184728Sraj if (error) 380184728Sraj goto fail; 381184728Sraj count = 1; 382184728Sraj prev_pa = pa; 383184728Sraj } 384184728Sraj } 385184728Sraj } else { 386184728Sraj if (count) { 387184728Sraj error = blk_write_cont(di, prev_pa, 388184728Sraj count * L2_TABLE_SIZE_REAL); 389184728Sraj if (error) 390184728Sraj goto fail; 391184728Sraj count = 0; 392184728Sraj prev_pa = 0; 393184728Sraj } 394184728Sraj bzero(fakept, sizeof(fakept)); 395184728Sraj error = blk_write(di, (char *)&fakept, 0, 396184728Sraj L2_TABLE_SIZE_REAL); 397184728Sraj if (error) 398184728Sraj goto fail; 399184728Sraj /* Flush, in case we reuse fakept in the same block */ 400184728Sraj error = blk_flush(di); 401184728Sraj if (error) 402184728Sraj goto fail; 403184728Sraj } 404184728Sraj } 405184728Sraj 406184728Sraj if (count) { 407184728Sraj error = blk_write_cont(di, prev_pa, count * L2_TABLE_SIZE_REAL); 408184728Sraj if (error) 409184728Sraj goto fail; 410184728Sraj count = 0; 411184728Sraj prev_pa = 0; 412184728Sraj } 413184728Sraj 414184728Sraj /* Dump memory chunks */ 415184728Sraj for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { 416184728Sraj bits = vm_page_dump[i]; 417184728Sraj while (bits) { 418184728Sraj bit = ffs(bits) - 1; 419184728Sraj pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + 420184728Sraj bit) * PAGE_SIZE; 421184728Sraj if (!count) { 422184728Sraj prev_pa = pa; 423184728Sraj count++; 424184728Sraj } else { 425184728Sraj if (pa == (prev_pa + count * PAGE_SIZE)) 426184728Sraj count++; 427184728Sraj else { 428184728Sraj error = blk_write_cont(di, prev_pa, 429184728Sraj count * PAGE_SIZE); 430184728Sraj if (error) 431184728Sraj goto fail; 432184728Sraj count = 1; 433184728Sraj prev_pa = pa; 434184728Sraj } 435184728Sraj } 436184728Sraj bits &= ~(1ul << bit); 437184728Sraj } 438184728Sraj } 439184728Sraj if (count) { 440184728Sraj error = blk_write_cont(di, prev_pa, count * PAGE_SIZE); 441184728Sraj if (error) 442184728Sraj goto fail; 443184728Sraj count = 0; 444184728Sraj prev_pa = 0; 445184728Sraj } 446184728Sraj 447184728Sraj /* Dump trailer */ 448184728Sraj error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); 449184728Sraj if (error) 450184728Sraj goto fail; 451184728Sraj dumplo += sizeof(kdh); 452184728Sraj 453184728Sraj /* Signal completion, signoff and exit stage left. */ 454184728Sraj dump_write(di, NULL, 0, 0, 0); 455184728Sraj printf("\nDump complete\n"); 456184728Sraj return; 457184728Sraj 458184728Srajfail: 459184728Sraj if (error < 0) 460184728Sraj error = -error; 461184728Sraj 462184728Sraj if (error == ECANCELED) 463184728Sraj printf("\nDump aborted\n"); 464184728Sraj else if (error == ENOSPC) 465184728Sraj printf("\nDump failed. Partition too small.\n"); 466184728Sraj else 467184728Sraj printf("\n** DUMP FAILED (ERROR %d) **\n", error); 468184728Sraj} 469184728Sraj 470184728Srajvoid 471184728Srajdump_add_page(vm_paddr_t pa) 472184728Sraj{ 473184728Sraj int idx, bit; 474184728Sraj 475184728Sraj pa >>= PAGE_SHIFT; 476184728Sraj idx = pa >> 5; /* 2^5 = 32 */ 477184728Sraj bit = pa & 31; 478184728Sraj atomic_set_int(&vm_page_dump[idx], 1ul << bit); 479184728Sraj} 480184728Sraj 481184728Srajvoid 482184728Srajdump_drop_page(vm_paddr_t pa) 483184728Sraj{ 484184728Sraj int idx, bit; 485184728Sraj 486184728Sraj pa >>= PAGE_SHIFT; 487184728Sraj idx = pa >> 5; /* 2^5 = 32 */ 488184728Sraj bit = pa & 31; 489184728Sraj atomic_clear_int(&vm_page_dump[idx], 1ul << bit); 490184728Sraj} 491