minidump_machdep.c revision 282065
1/*-
2 * Copyright (c) 2006 Peter Wemm
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/10/sys/i386/i386/minidump_machdep.c 282065 2015-04-27 08:02:12Z kib $");
29
30#include "opt_watchdog.h"
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/conf.h>
35#include <sys/cons.h>
36#include <sys/kernel.h>
37#include <sys/kerneldump.h>
38#include <sys/msgbuf.h>
39#include <sys/watchdog.h>
40#include <vm/vm.h>
41#include <vm/pmap.h>
42#include <machine/atomic.h>
43#include <machine/elf.h>
44#include <machine/md_var.h>
45#include <machine/vmparam.h>
46#include <machine/minidump.h>
47
48CTASSERT(sizeof(struct kerneldumpheader) == 512);
49
50/*
51 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
52 * is to protect us from metadata and to protect metadata from us.
53 */
54#define	SIZEOF_METADATA		(64*1024)
55
56#define	MD_ALIGN(x)	(((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
57#define	DEV_ALIGN(x)	(((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1))
58
59uint32_t *vm_page_dump;
60int vm_page_dump_size;
61
62static struct kerneldumpheader kdh;
63static off_t dumplo;
64
65/* Handle chunked writes. */
66static size_t fragsz;
67static void *dump_va;
68static uint64_t counter, progress;
69
70CTASSERT(sizeof(*vm_page_dump) == 4);
71#ifndef XEN
72#define xpmap_mtop(x) (x)
73#define xpmap_ptom(x) (x)
74#endif
75
76
77static int
78is_dumpable(vm_paddr_t pa)
79{
80	int i;
81
82	for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
83		if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
84			return (1);
85	}
86	return (0);
87}
88
89#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
90
91static int
92blk_flush(struct dumperinfo *di)
93{
94	int error;
95
96	if (fragsz == 0)
97		return (0);
98
99	error = dump_write(di, dump_va, 0, dumplo, fragsz);
100	dumplo += fragsz;
101	fragsz = 0;
102	return (error);
103}
104
105static int
106blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
107{
108	size_t len;
109	int error, i, c;
110	u_int maxdumpsz;
111
112	maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE);
113	if (maxdumpsz == 0)	/* seatbelt */
114		maxdumpsz = PAGE_SIZE;
115	error = 0;
116	if ((sz % PAGE_SIZE) != 0) {
117		printf("size not page aligned\n");
118		return (EINVAL);
119	}
120	if (ptr != NULL && pa != 0) {
121		printf("cant have both va and pa!\n");
122		return (EINVAL);
123	}
124	if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) {
125		printf("address not page aligned\n");
126		return (EINVAL);
127	}
128	if (ptr != NULL) {
129		/* If we're doing a virtual dump, flush any pre-existing pa pages */
130		error = blk_flush(di);
131		if (error)
132			return (error);
133	}
134	while (sz) {
135		len = maxdumpsz - fragsz;
136		if (len > sz)
137			len = sz;
138		counter += len;
139		progress -= len;
140		if (counter >> 24) {
141			printf(" %lld", PG2MB(progress >> PAGE_SHIFT));
142			counter &= (1<<24) - 1;
143		}
144
145		wdog_kern_pat(WD_LASTVAL);
146
147		if (ptr) {
148			error = dump_write(di, ptr, 0, dumplo, len);
149			if (error)
150				return (error);
151			dumplo += len;
152			ptr += len;
153			sz -= len;
154		} else {
155			for (i = 0; i < len; i += PAGE_SIZE)
156				dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT);
157			fragsz += len;
158			pa += len;
159			sz -= len;
160			if (fragsz == maxdumpsz) {
161				error = blk_flush(di);
162				if (error)
163					return (error);
164			}
165		}
166
167		/* Check for user abort. */
168		c = cncheckc();
169		if (c == 0x03)
170			return (ECANCELED);
171		if (c != -1)
172			printf(" (CTRL-C to abort) ");
173	}
174
175	return (0);
176}
177
178/* A fake page table page, to avoid having to handle both 4K and 2M pages */
179static pt_entry_t fakept[NPTEPG];
180
181void
182minidumpsys(struct dumperinfo *di)
183{
184	uint64_t dumpsize;
185	uint32_t ptesize;
186	vm_offset_t va;
187	int error;
188	uint32_t bits;
189	uint64_t pa;
190	pd_entry_t *pd;
191	pt_entry_t *pt;
192	int i, j, k, bit;
193	struct minidumphdr mdhdr;
194
195	counter = 0;
196	/* Walk page table pages, set bits in vm_page_dump */
197	ptesize = 0;
198	for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
199		/*
200		 * We always write a page, even if it is zero. Each
201		 * page written corresponds to 2MB of space
202		 */
203		ptesize += PAGE_SIZE;
204		pd = (pd_entry_t *)((uintptr_t)IdlePTD + KERNBASE);	/* always mapped! */
205		j = va >> PDRSHIFT;
206		if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V))  {
207			/* This is an entire 2M page. */
208			pa = xpmap_mtop(pd[j] & PG_PS_FRAME);
209			for (k = 0; k < NPTEPG; k++) {
210				if (is_dumpable(pa))
211					dump_add_page(pa);
212				pa += PAGE_SIZE;
213			}
214			continue;
215		}
216		if ((pd[j] & PG_V) == PG_V) {
217			/* set bit for each valid page in this 2MB block */
218			pt = pmap_kenter_temporary(xpmap_mtop(pd[j] & PG_FRAME), 0);
219			for (k = 0; k < NPTEPG; k++) {
220				if ((pt[k] & PG_V) == PG_V) {
221					pa = xpmap_mtop(pt[k] & PG_FRAME);
222					if (is_dumpable(pa))
223						dump_add_page(pa);
224				}
225			}
226		} else {
227			/* nothing, we're going to dump a null page */
228		}
229	}
230
231	/* Calculate dump size. */
232	dumpsize = ptesize;
233	dumpsize += round_page(msgbufp->msg_size);
234	dumpsize += round_page(vm_page_dump_size);
235	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
236		bits = vm_page_dump[i];
237		while (bits) {
238			bit = bsfl(bits);
239			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
240			/* Clear out undumpable pages now if needed */
241			if (is_dumpable(pa)) {
242				dumpsize += PAGE_SIZE;
243			} else {
244				dump_drop_page(pa);
245			}
246			bits &= ~(1ul << bit);
247		}
248	}
249	dumpsize += PAGE_SIZE;
250
251	/* Determine dump offset on device. */
252	if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
253		error = ENOSPC;
254		goto fail;
255	}
256	dumplo = di->mediaoffset + di->mediasize - dumpsize;
257	dumplo -= sizeof(kdh) * 2;
258	progress = dumpsize;
259
260	/* Initialize mdhdr */
261	bzero(&mdhdr, sizeof(mdhdr));
262	strcpy(mdhdr.magic, MINIDUMP_MAGIC);
263	mdhdr.version = MINIDUMP_VERSION;
264	mdhdr.msgbufsize = msgbufp->msg_size;
265	mdhdr.bitmapsize = vm_page_dump_size;
266	mdhdr.ptesize = ptesize;
267	mdhdr.kernbase = KERNBASE;
268#if defined(PAE) || defined(PAE_TABLES)
269	mdhdr.paemode = 1;
270#endif
271
272	mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_I386_VERSION, dumpsize, di->blocksize);
273
274	printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576);
275	printf("Dumping %llu MB:", (long long)dumpsize >> 20);
276
277	/* Dump leader */
278	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
279	if (error)
280		goto fail;
281	dumplo += sizeof(kdh);
282
283	/* Dump my header */
284	bzero(&fakept, sizeof(fakept));
285	bcopy(&mdhdr, &fakept, sizeof(mdhdr));
286	error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
287	if (error)
288		goto fail;
289
290	/* Dump msgbuf up front */
291	error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
292	if (error)
293		goto fail;
294
295	/* Dump bitmap */
296	error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size));
297	if (error)
298		goto fail;
299
300	/* Dump kernel page table pages */
301	for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
302		/* We always write a page, even if it is zero */
303		pd = (pd_entry_t *)((uintptr_t)IdlePTD + KERNBASE);	/* always mapped! */
304		j = va >> PDRSHIFT;
305		if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V))  {
306			/* This is a single 2M block. Generate a fake PTP */
307			pa = pd[j] & PG_PS_FRAME;
308			for (k = 0; k < NPTEPG; k++) {
309				fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M;
310			}
311			error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
312			if (error)
313				goto fail;
314			/* flush, in case we reuse fakept in the same block */
315			error = blk_flush(di);
316			if (error)
317				goto fail;
318			continue;
319		}
320		if ((pd[j] & PG_V) == PG_V) {
321			pa = xpmap_mtop(pd[j] & PG_FRAME);
322#ifndef XEN
323			error = blk_write(di, 0, pa, PAGE_SIZE);
324#else
325			pt = pmap_kenter_temporary(pa, 0);
326			memcpy(fakept, pt, PAGE_SIZE);
327			for (i = 0; i < NPTEPG; i++)
328				fakept[i] = xpmap_mtop(fakept[i]);
329			error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
330			if (error)
331				goto fail;
332			/* flush, in case we reuse fakept in the same block */
333			error = blk_flush(di);
334			if (error)
335				goto fail;
336			bzero(fakept, sizeof(fakept));
337#endif
338
339			if (error)
340				goto fail;
341		} else {
342			bzero(fakept, sizeof(fakept));
343			error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
344			if (error)
345				goto fail;
346			/* flush, in case we reuse fakept in the same block */
347			error = blk_flush(di);
348			if (error)
349				goto fail;
350		}
351	}
352
353	/* Dump memory chunks */
354	/* XXX cluster it up and use blk_dump() */
355	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
356		bits = vm_page_dump[i];
357		while (bits) {
358			bit = bsfl(bits);
359			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
360			error = blk_write(di, 0, pa, PAGE_SIZE);
361			if (error)
362				goto fail;
363			bits &= ~(1ul << bit);
364		}
365	}
366
367	error = blk_flush(di);
368	if (error)
369		goto fail;
370
371	/* Dump trailer */
372	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
373	if (error)
374		goto fail;
375	dumplo += sizeof(kdh);
376
377	/* Signal completion, signoff and exit stage left. */
378	dump_write(di, NULL, 0, 0, 0);
379	printf("\nDump complete\n");
380	return;
381
382 fail:
383	if (error < 0)
384		error = -error;
385
386	if (error == ECANCELED)
387		printf("\nDump aborted\n");
388	else if (error == ENOSPC)
389		printf("\nDump failed. Partition too small.\n");
390	else
391		printf("\n** DUMP FAILED (ERROR %d) **\n", error);
392}
393
394void
395dump_add_page(vm_paddr_t pa)
396{
397	int idx, bit;
398
399	pa >>= PAGE_SHIFT;
400	idx = pa >> 5;		/* 2^5 = 32 */
401	bit = pa & 31;
402	atomic_set_int(&vm_page_dump[idx], 1ul << bit);
403}
404
405void
406dump_drop_page(vm_paddr_t pa)
407{
408	int idx, bit;
409
410	pa >>= PAGE_SHIFT;
411	idx = pa >> 5;		/* 2^5 = 32 */
412	bit = pa & 31;
413	atomic_clear_int(&vm_page_dump[idx], 1ul << bit);
414}
415
416