1184728Sraj/*-
2184728Sraj * Copyright (c) 2008 Semihalf, Grzegorz Bernacki
3184728Sraj * All rights reserved.
4184728Sraj *
5184728Sraj * Redistribution and use in source and binary forms, with or without
6184728Sraj * modification, are permitted provided that the following conditions
7184728Sraj * are met:
8184728Sraj *
9184728Sraj * 1. Redistributions of source code must retain the above copyright
10184728Sraj *    notice, this list of conditions and the following disclaimer.
11184728Sraj * 2. Redistributions in binary form must reproduce the above copyright
12184728Sraj *    notice, this list of conditions and the following disclaimer in the
13184728Sraj *    documentation and/or other materials provided with the distribution.
14184728Sraj *
15184728Sraj * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16184728Sraj * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17184728Sraj * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18184728Sraj * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19184728Sraj * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20184728Sraj * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21184728Sraj * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22184728Sraj * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23184728Sraj * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24184728Sraj * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25184728Sraj *
26184728Sraj * from: FreeBSD: src/sys/i386/i386/minidump_machdep.c,v 1.6 2008/08/17 23:27:27
27184728Sraj */
28184728Sraj
29184728Sraj#include <sys/cdefs.h>
30184728Sraj__FBSDID("$FreeBSD$");
31184728Sraj
32221173Sattilio#include "opt_watchdog.h"
33221173Sattilio
34184728Sraj#include <sys/param.h>
35184728Sraj#include <sys/systm.h>
36184728Sraj#include <sys/conf.h>
37184728Sraj#include <sys/cons.h>
38184728Sraj#include <sys/kernel.h>
39184728Sraj#include <sys/kerneldump.h>
40184728Sraj#include <sys/msgbuf.h>
41221173Sattilio#ifdef SW_WATCHDOG
42221173Sattilio#include <sys/watchdog.h>
43221173Sattilio#endif
44184728Sraj#include <vm/vm.h>
45184728Sraj#include <vm/pmap.h>
46184728Sraj#include <machine/pmap.h>
47184728Sraj#include <machine/atomic.h>
48184728Sraj#include <machine/elf.h>
49184728Sraj#include <machine/md_var.h>
50184728Sraj#include <machine/vmparam.h>
51184728Sraj#include <machine/minidump.h>
52184728Sraj#include <machine/cpufunc.h>
53184728Sraj
54184728SrajCTASSERT(sizeof(struct kerneldumpheader) == 512);
55184728Sraj
56184728Sraj/*
57184728Sraj * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
58184728Sraj * is to protect us from metadata and to protect metadata from us.
59184728Sraj */
60184728Sraj#define	SIZEOF_METADATA		(64*1024)
61184728Sraj
62184728Srajuint32_t *vm_page_dump;
63184728Srajint vm_page_dump_size;
64184728Sraj
65184728Srajstatic struct kerneldumpheader kdh;
66184728Srajstatic off_t dumplo;
67184728Sraj
68184728Sraj/* Handle chunked writes. */
69184728Srajstatic size_t fragsz, offset;
70184728Srajstatic void *dump_va;
71184728Srajstatic uint64_t counter, progress;
72184728Sraj
73184728SrajCTASSERT(sizeof(*vm_page_dump) == 4);
74184728Sraj
75184728Srajstatic int
76184728Srajis_dumpable(vm_paddr_t pa)
77184728Sraj{
78184728Sraj	int i;
79184728Sraj
80184728Sraj	for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
81184728Sraj		if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
82184728Sraj			return (1);
83184728Sraj	}
84184728Sraj	return (0);
85184728Sraj}
86184728Sraj
87184728Sraj#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
88184728Sraj
89184728Srajstatic int
90184728Srajblk_flush(struct dumperinfo *di)
91184728Sraj{
92184728Sraj	int error;
93184728Sraj
94184728Sraj	if (fragsz == 0)
95184728Sraj		return (0);
96184728Sraj
97184728Sraj	error = dump_write(di, (char*)dump_va + offset, 0, dumplo, fragsz - offset);
98184728Sraj	dumplo += (fragsz - offset);
99184728Sraj	fragsz = 0;
100184728Sraj	offset = 0;
101184728Sraj	return (error);
102184728Sraj}
103184728Sraj
104184728Srajstatic int
105184728Srajblk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
106184728Sraj{
107184728Sraj	size_t len;
108184728Sraj	int error, i, c;
109184728Sraj	u_int maxdumpsz;
110184728Sraj
111184728Sraj	maxdumpsz = di->maxiosize;
112184728Sraj
113184728Sraj	if (maxdumpsz == 0)	/* seatbelt */
114184728Sraj		maxdumpsz = PAGE_SIZE;
115184728Sraj
116184728Sraj	error = 0;
117184728Sraj
118184728Sraj	if (ptr != NULL && pa != 0) {
119184728Sraj		printf("cant have both va and pa!\n");
120184728Sraj		return (EINVAL);
121184728Sraj	}
122184728Sraj
123184728Sraj	if (ptr != NULL) {
124184728Sraj		/* If we're doing a virtual dump, flush any pre-existing pa pages */
125184728Sraj		error = blk_flush(di);
126184728Sraj		if (error)
127184728Sraj			return (error);
128184728Sraj	}
129184728Sraj
130184728Sraj	while (sz) {
131184728Sraj		if (fragsz == 0) {
132184728Sraj			offset = pa & PAGE_MASK;
133184728Sraj			fragsz += offset;
134184728Sraj		}
135184728Sraj		len = maxdumpsz - fragsz;
136184728Sraj		if (len > sz)
137184728Sraj			len = sz;
138184728Sraj		counter += len;
139184728Sraj		progress -= len;
140184728Sraj
141184728Sraj		if (counter >> 22) {
142184728Sraj			printf(" %lld", PG2MB(progress >> PAGE_SHIFT));
143184728Sraj			counter &= (1<<22) - 1;
144184728Sraj		}
145184728Sraj
146221173Sattilio#ifdef SW_WATCHDOG
147221173Sattilio		wdog_kern_pat(WD_LASTVAL);
148221173Sattilio#endif
149184728Sraj		if (ptr) {
150184728Sraj			error = dump_write(di, ptr, 0, dumplo, len);
151184728Sraj			if (error)
152184728Sraj				return (error);
153184728Sraj			dumplo += len;
154184728Sraj			ptr += len;
155184728Sraj			sz -= len;
156184728Sraj		} else {
157184728Sraj			for (i = 0; i < len; i += PAGE_SIZE)
158184728Sraj				dump_va = pmap_kenter_temp(pa + i,
159184728Sraj				    (i + fragsz) >> PAGE_SHIFT);
160184728Sraj			fragsz += len;
161184728Sraj			pa += len;
162184728Sraj			sz -= len;
163184728Sraj			if (fragsz == maxdumpsz) {
164184728Sraj				error = blk_flush(di);
165184728Sraj				if (error)
166184728Sraj					return (error);
167184728Sraj			}
168184728Sraj		}
169184728Sraj
170184728Sraj		/* Check for user abort. */
171184728Sraj		c = cncheckc();
172184728Sraj		if (c == 0x03)
173184728Sraj			return (ECANCELED);
174184728Sraj		if (c != -1)
175184728Sraj			printf(" (CTRL-C to abort) ");
176184728Sraj	}
177184728Sraj
178184728Sraj	return (0);
179184728Sraj}
180184728Sraj
181184728Srajstatic int
182184728Srajblk_write_cont(struct dumperinfo *di, vm_paddr_t pa, size_t sz)
183184728Sraj{
184184728Sraj	int error;
185184728Sraj
186184728Sraj	error = blk_write(di, 0, pa, sz);
187184728Sraj	if (error)
188184728Sraj		return (error);
189184728Sraj
190184728Sraj	error = blk_flush(di);
191184728Sraj	if (error)
192184728Sraj		return (error);
193184728Sraj
194184728Sraj	return (0);
195184728Sraj}
196184728Sraj
197184728Sraj/* A fake page table page, to avoid having to handle both 4K and 2M pages */
198184728Srajstatic pt_entry_t fakept[NPTEPG];
199184728Sraj
200184728Srajvoid
201184728Srajminidumpsys(struct dumperinfo *di)
202184728Sraj{
203184728Sraj	struct minidumphdr mdhdr;
204184728Sraj	uint64_t dumpsize;
205184728Sraj	uint32_t ptesize;
206184728Sraj	uint32_t bits;
207184728Sraj	uint32_t pa, prev_pa = 0, count = 0;
208184728Sraj	vm_offset_t va;
209184728Sraj	pd_entry_t *pdp;
210184728Sraj	pt_entry_t *pt, *ptp;
211184728Sraj	int i, k, bit, error;
212184728Sraj	char *addr;
213184728Sraj
214184728Sraj	/* Flush cache */
215184728Sraj	cpu_idcache_wbinv_all();
216184728Sraj	cpu_l2cache_wbinv_all();
217184728Sraj
218184728Sraj	counter = 0;
219184728Sraj	/* Walk page table pages, set bits in vm_page_dump */
220184728Sraj	ptesize = 0;
221184728Sraj	for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
222184728Sraj		/*
223184728Sraj		 * We always write a page, even if it is zero. Each
224184728Sraj		 * page written corresponds to 2MB of space
225184728Sraj		 */
226184728Sraj		ptesize += L2_TABLE_SIZE_REAL;
227184728Sraj		pmap_get_pde_pte(pmap_kernel(), va, &pdp, &ptp);
228184728Sraj		if (pmap_pde_v(pdp) && pmap_pde_section(pdp)) {
229184728Sraj			/* This is a section mapping 1M page. */
230184728Sraj			pa = (*pdp & L1_S_ADDR_MASK) | (va & ~L1_S_ADDR_MASK);
231184728Sraj			for (k = 0; k < (L1_S_SIZE / PAGE_SIZE); k++) {
232184728Sraj				if (is_dumpable(pa))
233184728Sraj					dump_add_page(pa);
234184728Sraj				pa += PAGE_SIZE;
235184728Sraj			}
236184728Sraj			continue;
237184728Sraj		}
238184728Sraj		if (pmap_pde_v(pdp) && pmap_pde_page(pdp)) {
239184728Sraj			/* Set bit for each valid page in this 1MB block */
240184728Sraj			addr = pmap_kenter_temp(*pdp & L1_C_ADDR_MASK, 0);
241184728Sraj			pt = (pt_entry_t*)(addr +
242184728Sraj			    (((uint32_t)*pdp  & L1_C_ADDR_MASK) & PAGE_MASK));
243184728Sraj			for (k = 0; k < 256; k++) {
244184728Sraj				if ((pt[k] & L2_TYPE_MASK) == L2_TYPE_L) {
245184728Sraj					pa = (pt[k] & L2_L_FRAME) |
246184728Sraj					    (va & L2_L_OFFSET);
247184728Sraj					for (i = 0; i < 16; i++) {
248184728Sraj						if (is_dumpable(pa))
249184728Sraj							dump_add_page(pa);
250184728Sraj						k++;
251184728Sraj						pa += PAGE_SIZE;
252184728Sraj					}
253184728Sraj				} else if ((pt[k] & L2_TYPE_MASK) == L2_TYPE_S) {
254184728Sraj					pa = (pt[k] & L2_S_FRAME) |
255184728Sraj					    (va & L2_S_OFFSET);
256184728Sraj					if (is_dumpable(pa))
257184728Sraj						dump_add_page(pa);
258184728Sraj				}
259184728Sraj			}
260184728Sraj		} else {
261184728Sraj			/* Nothing, we're going to dump a null page */
262184728Sraj		}
263184728Sraj	}
264184728Sraj
265184728Sraj	/* Calculate dump size. */
266184728Sraj	dumpsize = ptesize;
267184728Sraj	dumpsize += round_page(msgbufp->msg_size);
268184728Sraj	dumpsize += round_page(vm_page_dump_size);
269184728Sraj
270184728Sraj	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
271184728Sraj		bits = vm_page_dump[i];
272184728Sraj		while (bits) {
273184728Sraj			bit = ffs(bits) - 1;
274184728Sraj			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) +
275184728Sraj			    bit) * PAGE_SIZE;
276184728Sraj			/* Clear out undumpable pages now if needed */
277184728Sraj			if (is_dumpable(pa))
278184728Sraj				dumpsize += PAGE_SIZE;
279184728Sraj			else
280184728Sraj				dump_drop_page(pa);
281184728Sraj			bits &= ~(1ul << bit);
282184728Sraj		}
283184728Sraj	}
284184728Sraj
285184728Sraj	dumpsize += PAGE_SIZE;
286184728Sraj
287184728Sraj	/* Determine dump offset on device. */
288184728Sraj	if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
289184728Sraj		error = ENOSPC;
290184728Sraj		goto fail;
291184728Sraj	}
292184728Sraj
293184728Sraj	dumplo = di->mediaoffset + di->mediasize - dumpsize;
294184728Sraj	dumplo -= sizeof(kdh) * 2;
295184728Sraj	progress = dumpsize;
296184728Sraj
297184728Sraj	/* Initialize mdhdr */
298184728Sraj	bzero(&mdhdr, sizeof(mdhdr));
299184728Sraj	strcpy(mdhdr.magic, MINIDUMP_MAGIC);
300184728Sraj	mdhdr.version = MINIDUMP_VERSION;
301184728Sraj	mdhdr.msgbufsize = msgbufp->msg_size;
302184728Sraj	mdhdr.bitmapsize = vm_page_dump_size;
303184728Sraj	mdhdr.ptesize = ptesize;
304184728Sraj	mdhdr.kernbase = KERNBASE;
305184728Sraj
306184728Sraj	mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_ARM_VERSION, dumpsize,
307184728Sraj	    di->blocksize);
308184728Sraj
309184728Sraj	printf("Physical memory: %u MB\n", ptoa((uintmax_t)physmem) / 1048576);
310184728Sraj	printf("Dumping %llu MB:", (long long)dumpsize >> 20);
311184728Sraj
312184728Sraj	/* Dump leader */
313184728Sraj	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
314184728Sraj	if (error)
315184728Sraj		goto fail;
316184728Sraj	dumplo += sizeof(kdh);
317184728Sraj
318184728Sraj	/* Dump my header */
319184728Sraj	bzero(&fakept, sizeof(fakept));
320184728Sraj	bcopy(&mdhdr, &fakept, sizeof(mdhdr));
321184728Sraj	error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
322184728Sraj	if (error)
323184728Sraj		goto fail;
324184728Sraj
325184728Sraj	/* Dump msgbuf up front */
326184728Sraj	error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
327184728Sraj	if (error)
328184728Sraj		goto fail;
329184728Sraj
330184728Sraj	/* Dump bitmap */
331184728Sraj	error = blk_write(di, (char *)vm_page_dump, 0,
332184728Sraj	    round_page(vm_page_dump_size));
333184728Sraj	if (error)
334184728Sraj		goto fail;
335184728Sraj
336184728Sraj	/* Dump kernel page table pages */
337184728Sraj	for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
338184728Sraj		/* We always write a page, even if it is zero */
339184728Sraj		pmap_get_pde_pte(pmap_kernel(), va, &pdp, &ptp);
340184728Sraj
341184728Sraj		if (pmap_pde_v(pdp) && pmap_pde_section(pdp))  {
342184728Sraj			if (count) {
343184728Sraj				error = blk_write_cont(di, prev_pa,
344184728Sraj				    count * L2_TABLE_SIZE_REAL);
345184728Sraj				if (error)
346184728Sraj					goto fail;
347184728Sraj				count = 0;
348184728Sraj				prev_pa = 0;
349184728Sraj			}
350184728Sraj			/* This is a single 2M block. Generate a fake PTP */
351184728Sraj			pa = (*pdp & L1_S_ADDR_MASK) | (va & ~L1_S_ADDR_MASK);
352184728Sraj			for (k = 0; k < (L1_S_SIZE / PAGE_SIZE); k++) {
353184728Sraj				fakept[k] = L2_S_PROTO | (pa + (k * PAGE_SIZE)) |
354184728Sraj				    L2_S_PROT(PTE_KERNEL,
355184728Sraj				    VM_PROT_READ | VM_PROT_WRITE);
356184728Sraj			}
357184728Sraj			error = blk_write(di, (char *)&fakept, 0,
358184728Sraj			    L2_TABLE_SIZE_REAL);
359184728Sraj			if (error)
360184728Sraj				goto fail;
361184728Sraj			/* Flush, in case we reuse fakept in the same block */
362184728Sraj			error = blk_flush(di);
363184728Sraj			if (error)
364184728Sraj				goto fail;
365184728Sraj			continue;
366184728Sraj		}
367184728Sraj		if (pmap_pde_v(pdp) && pmap_pde_page(pdp)) {
368184728Sraj			pa = *pdp & L1_C_ADDR_MASK;
369184728Sraj			if (!count) {
370184728Sraj				prev_pa = pa;
371184728Sraj				count++;
372184728Sraj			}
373184728Sraj			else {
374184728Sraj				if (pa == (prev_pa + count * L2_TABLE_SIZE_REAL))
375184728Sraj					count++;
376184728Sraj				else {
377184728Sraj					error = blk_write_cont(di, prev_pa,
378184728Sraj					    count * L2_TABLE_SIZE_REAL);
379184728Sraj					if (error)
380184728Sraj						goto fail;
381184728Sraj					count = 1;
382184728Sraj					prev_pa = pa;
383184728Sraj				}
384184728Sraj			}
385184728Sraj		} else {
386184728Sraj			if (count) {
387184728Sraj				error = blk_write_cont(di, prev_pa,
388184728Sraj				    count * L2_TABLE_SIZE_REAL);
389184728Sraj				if (error)
390184728Sraj					goto fail;
391184728Sraj				count = 0;
392184728Sraj				prev_pa = 0;
393184728Sraj			}
394184728Sraj			bzero(fakept, sizeof(fakept));
395184728Sraj			error = blk_write(di, (char *)&fakept, 0,
396184728Sraj			    L2_TABLE_SIZE_REAL);
397184728Sraj			if (error)
398184728Sraj				goto fail;
399184728Sraj			/* Flush, in case we reuse fakept in the same block */
400184728Sraj			error = blk_flush(di);
401184728Sraj			if (error)
402184728Sraj				goto fail;
403184728Sraj		}
404184728Sraj	}
405184728Sraj
406184728Sraj	if (count) {
407184728Sraj		error = blk_write_cont(di, prev_pa, count * L2_TABLE_SIZE_REAL);
408184728Sraj		if (error)
409184728Sraj			goto fail;
410184728Sraj		count = 0;
411184728Sraj		prev_pa = 0;
412184728Sraj	}
413184728Sraj
414184728Sraj	/* Dump memory chunks */
415184728Sraj	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
416184728Sraj		bits = vm_page_dump[i];
417184728Sraj		while (bits) {
418184728Sraj			bit = ffs(bits) - 1;
419184728Sraj			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) +
420184728Sraj			    bit) * PAGE_SIZE;
421184728Sraj			if (!count) {
422184728Sraj				prev_pa = pa;
423184728Sraj				count++;
424184728Sraj			} else {
425184728Sraj				if (pa == (prev_pa + count * PAGE_SIZE))
426184728Sraj					count++;
427184728Sraj				else {
428184728Sraj					error = blk_write_cont(di, prev_pa,
429184728Sraj					    count * PAGE_SIZE);
430184728Sraj					if (error)
431184728Sraj						goto fail;
432184728Sraj					count = 1;
433184728Sraj					prev_pa = pa;
434184728Sraj				}
435184728Sraj			}
436184728Sraj			bits &= ~(1ul << bit);
437184728Sraj		}
438184728Sraj	}
439184728Sraj	if (count) {
440184728Sraj		error = blk_write_cont(di, prev_pa, count * PAGE_SIZE);
441184728Sraj		if (error)
442184728Sraj			goto fail;
443184728Sraj		count = 0;
444184728Sraj		prev_pa = 0;
445184728Sraj	}
446184728Sraj
447184728Sraj	/* Dump trailer */
448184728Sraj	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
449184728Sraj	if (error)
450184728Sraj		goto fail;
451184728Sraj	dumplo += sizeof(kdh);
452184728Sraj
453184728Sraj	/* Signal completion, signoff and exit stage left. */
454184728Sraj	dump_write(di, NULL, 0, 0, 0);
455184728Sraj	printf("\nDump complete\n");
456184728Sraj	return;
457184728Sraj
458184728Srajfail:
459184728Sraj	if (error < 0)
460184728Sraj		error = -error;
461184728Sraj
462184728Sraj	if (error == ECANCELED)
463184728Sraj		printf("\nDump aborted\n");
464184728Sraj	else if (error == ENOSPC)
465184728Sraj		printf("\nDump failed. Partition too small.\n");
466184728Sraj	else
467184728Sraj		printf("\n** DUMP FAILED (ERROR %d) **\n", error);
468184728Sraj}
469184728Sraj
470184728Srajvoid
471184728Srajdump_add_page(vm_paddr_t pa)
472184728Sraj{
473184728Sraj	int idx, bit;
474184728Sraj
475184728Sraj	pa >>= PAGE_SHIFT;
476184728Sraj	idx = pa >> 5;		/* 2^5 = 32 */
477184728Sraj	bit = pa & 31;
478184728Sraj	atomic_set_int(&vm_page_dump[idx], 1ul << bit);
479184728Sraj}
480184728Sraj
481184728Srajvoid
482184728Srajdump_drop_page(vm_paddr_t pa)
483184728Sraj{
484184728Sraj	int idx, bit;
485184728Sraj
486184728Sraj	pa >>= PAGE_SHIFT;
487184728Sraj	idx = pa >> 5;		/* 2^5 = 32 */
488184728Sraj	bit = pa & 31;
489184728Sraj	atomic_clear_int(&vm_page_dump[idx], 1ul << bit);
490184728Sraj}
491