1210284Sjmallett/*-
2215990Sjmallett * SPDX-License-Identifier: BSD-2-Clause
3215990Sjmallett *
4210284Sjmallett * Copyright (c) 2006 Peter Wemm
5210284Sjmallett *
6215990Sjmallett * Redistribution and use in source and binary forms, with or without
7215990Sjmallett * modification, are permitted provided that the following conditions
8215990Sjmallett * are met:
9210284Sjmallett * 1. Redistributions of source code must retain the above copyright
10215990Sjmallett *    notice, this list of conditions and the following disclaimer.
11215990Sjmallett * 2. Redistributions in binary form must reproduce the above copyright
12210284Sjmallett *    notice, this list of conditions and the following disclaimer in the
13215990Sjmallett *    documentation and/or other materials provided with the distribution.
14215990Sjmallett *
15215990Sjmallett * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16215990Sjmallett * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17215990Sjmallett * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18215990Sjmallett * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19215990Sjmallett * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20215990Sjmallett * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21215990Sjmallett * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22215990Sjmallett * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23215990Sjmallett * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24215990Sjmallett * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25215990Sjmallett * SUCH DAMAGE.
26215990Sjmallett */
27215990Sjmallett
28215990Sjmallett#include <sys/cdefs.h>
29215990Sjmallett/*
30215990Sjmallett * AMD64 machine dependent routines for kvm and minidumps.
31215990Sjmallett */
32215990Sjmallett
33215990Sjmallett#include <sys/param.h>
34215990Sjmallett#include <sys/endian.h>
35215990Sjmallett#include <stdint.h>
36215990Sjmallett#include <stdlib.h>
37215990Sjmallett#include <string.h>
38210284Sjmallett#include <unistd.h>
39210284Sjmallett#include <vm/vm.h>
40210284Sjmallett#include <kvm.h>
41210284Sjmallett
42210284Sjmallett#include "../../sys/amd64/include/minidump.h"
43210284Sjmallett
44210284Sjmallett#include <limits.h>
45215990Sjmallett
46210284Sjmallett#include "kvm_private.h"
47210284Sjmallett#include "kvm_amd64.h"
48210284Sjmallett
49210284Sjmallett#define	amd64_round_page(x)	roundup2((kvaddr_t)(x), AMD64_PAGE_SIZE)
50210284Sjmallett#define	VM_IS_V1(vm)		(vm->hdr.version == 1)
51210284Sjmallett#define	VA_OFF(vm, va)		\
52210284Sjmallett	(VM_IS_V1(vm) ? ((va) & (AMD64_PAGE_SIZE - 1)) : ((va) & AMD64_PAGE_MASK))
53210284Sjmallett
54215990Sjmallettstruct vmstate {
55210284Sjmallett	struct minidumphdr hdr;
56215990Sjmallett};
57215990Sjmallett
58215990Sjmallettstatic vm_prot_t
59215990Sjmallett_amd64_entry_to_prot(uint64_t entry)
60215990Sjmallett{
61215990Sjmallett	vm_prot_t prot = VM_PROT_READ;
62215990Sjmallett
63215990Sjmallett	if ((entry & AMD64_PG_RW) != 0)
64215990Sjmallett		prot |= VM_PROT_WRITE;
65215990Sjmallett	if ((entry & AMD64_PG_NX) == 0)
66215990Sjmallett		prot |= VM_PROT_EXECUTE;
67215990Sjmallett	return prot;
68215990Sjmallett}
69210284Sjmallett
70215990Sjmallett/*
71210284Sjmallett * Version 2 minidumps use page directory entries, while version 1 use page
72210284Sjmallett * table entries.
73210284Sjmallett */
74210284Sjmallett
75210284Sjmallettstatic amd64_pde_t
76210284Sjmallett_amd64_pde_get(kvm_t *kd, u_long pdeindex)
77210284Sjmallett{
78210284Sjmallett	amd64_pde_t *pde = _kvm_pmap_get(kd, pdeindex, sizeof(*pde));
79210284Sjmallett
80215990Sjmallett	return le64toh(*pde);
81215990Sjmallett}
82210284Sjmallett
83215990Sjmallettstatic amd64_pte_t
84210284Sjmallett_amd64_pte_get(kvm_t *kd, u_long pteindex)
85210284Sjmallett{
86210284Sjmallett	amd64_pte_t *pte = _kvm_pmap_get(kd, pteindex, sizeof(*pte));
87210284Sjmallett
88210284Sjmallett	return le64toh(*pte);
89210284Sjmallett}
90210284Sjmallett
91210284Sjmallett/* Get the first page table entry for a given page directory index. */
92210284Sjmallettstatic amd64_pte_t *
93210284Sjmallett_amd64_pde_first_pte(kvm_t *kd, u_long pdeindex)
94210284Sjmallett{
95210284Sjmallett	u_long *pa;
96210284Sjmallett
97210284Sjmallett	pa = _kvm_pmap_get(kd, pdeindex, sizeof(amd64_pde_t));
98210284Sjmallett	if (pa == NULL)
99210284Sjmallett		return NULL;
100210284Sjmallett	return _kvm_map_get(kd, *pa & AMD64_PG_FRAME, AMD64_PAGE_SIZE);
101210284Sjmallett}
102210284Sjmallett
103210284Sjmallettstatic int
104210284Sjmallett_amd64_minidump_probe(kvm_t *kd)
105210284Sjmallett{
106210284Sjmallett
107210284Sjmallett	return (_kvm_probe_elf_kernel(kd, ELFCLASS64, EM_X86_64) &&
108210284Sjmallett	    _kvm_is_minidump(kd));
109210284Sjmallett}
110210284Sjmallett
111210284Sjmallettstatic void
112210284Sjmallett_amd64_minidump_freevtop(kvm_t *kd)
113210284Sjmallett{
114210284Sjmallett	struct vmstate *vm = kd->vmst;
115210284Sjmallett
116210284Sjmallett	free(vm);
117210284Sjmallett	kd->vmst = NULL;
118210284Sjmallett}
119210284Sjmallett
120210284Sjmallettstatic int
121210284Sjmallett_amd64_minidump_initvtop(kvm_t *kd)
122210284Sjmallett{
123210284Sjmallett	struct vmstate *vmst;
124210284Sjmallett	off_t off, dump_avail_off, sparse_off;
125210284Sjmallett
126210284Sjmallett	vmst = _kvm_malloc(kd, sizeof(*vmst));
127210284Sjmallett	if (vmst == NULL) {
128210284Sjmallett		_kvm_err(kd, kd->program, "cannot allocate vm");
129210284Sjmallett		return (-1);
130210284Sjmallett	}
131210284Sjmallett	kd->vmst = vmst;
132210284Sjmallett	if (pread(kd->pmfd, &vmst->hdr, sizeof(vmst->hdr), 0) !=
133210284Sjmallett	    sizeof(vmst->hdr)) {
134210284Sjmallett		_kvm_err(kd, kd->program, "cannot read dump header");
135210284Sjmallett		return (-1);
136210284Sjmallett	}
137210284Sjmallett	if (strncmp(MINIDUMP_MAGIC, vmst->hdr.magic, sizeof(vmst->hdr.magic)) != 0) {
138210284Sjmallett		_kvm_err(kd, kd->program, "not a minidump for this platform");
139210284Sjmallett		return (-1);
140210284Sjmallett	}
141210284Sjmallett
142210284Sjmallett	/*
143210284Sjmallett	 * NB: amd64 minidump header is binary compatible between version 1
144210284Sjmallett	 * and version 2; version 3 adds the dumpavailsize field
145210284Sjmallett	 */
146210284Sjmallett	vmst->hdr.version = le32toh(vmst->hdr.version);
147210284Sjmallett	if (vmst->hdr.version > MINIDUMP_VERSION || vmst->hdr.version < 1) {
148210284Sjmallett		_kvm_err(kd, kd->program, "wrong minidump version. expected %d got %d",
149210284Sjmallett		    MINIDUMP_VERSION, vmst->hdr.version);
150210284Sjmallett		return (-1);
151210284Sjmallett	}
152210284Sjmallett	vmst->hdr.msgbufsize = le32toh(vmst->hdr.msgbufsize);
153210284Sjmallett	vmst->hdr.bitmapsize = le32toh(vmst->hdr.bitmapsize);
154210284Sjmallett	vmst->hdr.pmapsize = le32toh(vmst->hdr.pmapsize);
155210284Sjmallett	vmst->hdr.kernbase = le64toh(vmst->hdr.kernbase);
156210284Sjmallett	vmst->hdr.dmapbase = le64toh(vmst->hdr.dmapbase);
157210284Sjmallett	vmst->hdr.dmapend = le64toh(vmst->hdr.dmapend);
158210284Sjmallett	vmst->hdr.dumpavailsize = vmst->hdr.version == MINIDUMP_VERSION ?
159210284Sjmallett	    le32toh(vmst->hdr.dumpavailsize) : 0;
160210284Sjmallett
161210284Sjmallett	/* Skip header and msgbuf */
162210284Sjmallett	dump_avail_off = AMD64_PAGE_SIZE + amd64_round_page(vmst->hdr.msgbufsize);
163210284Sjmallett
164210284Sjmallett	/* Skip dump_avail */
165210284Sjmallett	off = dump_avail_off + amd64_round_page(vmst->hdr.dumpavailsize);
166210284Sjmallett
167210284Sjmallett	sparse_off = off + amd64_round_page(vmst->hdr.bitmapsize) +
168210284Sjmallett	    amd64_round_page(vmst->hdr.pmapsize);
169210284Sjmallett	if (_kvm_pt_init(kd, vmst->hdr.dumpavailsize, dump_avail_off,
170210284Sjmallett	    vmst->hdr.bitmapsize, off, sparse_off, AMD64_PAGE_SIZE) == -1) {
171210284Sjmallett		return (-1);
172210284Sjmallett	}
173210284Sjmallett	off += amd64_round_page(vmst->hdr.bitmapsize);
174210284Sjmallett
175210284Sjmallett	if (_kvm_pmap_init(kd, vmst->hdr.pmapsize, off) == -1) {
176210284Sjmallett		return (-1);
177210284Sjmallett	}
178210284Sjmallett	off += amd64_round_page(vmst->hdr.pmapsize);
179210284Sjmallett
180210284Sjmallett	return (0);
181210284Sjmallett}
182210284Sjmallett
183210284Sjmallettstatic int
184210284Sjmallett_amd64_minidump_vatop_v1(kvm_t *kd, kvaddr_t va, off_t *pa)
185210284Sjmallett{
186210284Sjmallett	struct vmstate *vm;
187210284Sjmallett	amd64_physaddr_t offset;
188210284Sjmallett	amd64_pte_t pte;
189210284Sjmallett	kvaddr_t pteindex;
190210284Sjmallett	amd64_physaddr_t a;
191210284Sjmallett	off_t ofs;
192210284Sjmallett
193210284Sjmallett	vm = kd->vmst;
194210284Sjmallett	offset = va & AMD64_PAGE_MASK;
195210284Sjmallett
196210284Sjmallett	if (va >= vm->hdr.kernbase) {
197210284Sjmallett		pteindex = (va - vm->hdr.kernbase) >> AMD64_PAGE_SHIFT;
198210284Sjmallett		if (pteindex >= vm->hdr.pmapsize / sizeof(pte))
199210284Sjmallett			goto invalid;
200210284Sjmallett		pte = _amd64_pte_get(kd, pteindex);
201210284Sjmallett		if ((pte & AMD64_PG_V) == 0) {
202210284Sjmallett			_kvm_err(kd, kd->program,
203210284Sjmallett			    "_amd64_minidump_vatop_v1: pte not valid");
204210284Sjmallett			goto invalid;
205210284Sjmallett		}
206210284Sjmallett		a = pte & AMD64_PG_FRAME;
207210284Sjmallett		ofs = _kvm_pt_find(kd, a, AMD64_PAGE_SIZE);
208210284Sjmallett		if (ofs == -1) {
209210284Sjmallett			_kvm_err(kd, kd->program,
210210284Sjmallett	    "_amd64_minidump_vatop_v1: physical address 0x%jx not in minidump",
211210284Sjmallett			    (uintmax_t)a);
212210284Sjmallett			goto invalid;
213215990Sjmallett		}
214210284Sjmallett		*pa = ofs + offset;
215210284Sjmallett		return (AMD64_PAGE_SIZE - offset);
216210284Sjmallett	} else if (va >= vm->hdr.dmapbase && va < vm->hdr.dmapend) {
217210284Sjmallett		a = (va - vm->hdr.dmapbase) & ~AMD64_PAGE_MASK;
218210284Sjmallett		ofs = _kvm_pt_find(kd, a, AMD64_PAGE_SIZE);
219210284Sjmallett		if (ofs == -1) {
220210284Sjmallett			_kvm_err(kd, kd->program,
221210284Sjmallett    "_amd64_minidump_vatop_v1: direct map address 0x%jx not in minidump",
222210284Sjmallett			    (uintmax_t)va);
223210284Sjmallett			goto invalid;
224210284Sjmallett		}
225210284Sjmallett		*pa = ofs + offset;
226210284Sjmallett		return (AMD64_PAGE_SIZE - offset);
227210284Sjmallett	} else {
228210284Sjmallett		_kvm_err(kd, kd->program,
229210284Sjmallett	    "_amd64_minidump_vatop_v1: virtual address 0x%jx not minidumped",
230210284Sjmallett		    (uintmax_t)va);
231210284Sjmallett		goto invalid;
232210284Sjmallett	}
233210284Sjmallett
234210284Sjmallettinvalid:
235210284Sjmallett	_kvm_err(kd, 0, "invalid address (0x%jx)", (uintmax_t)va);
236210284Sjmallett	return (0);
237210284Sjmallett}
238210284Sjmallett
239210284Sjmallettstatic int
240210284Sjmallett_amd64_minidump_vatop(kvm_t *kd, kvaddr_t va, off_t *pa)
241210284Sjmallett{
242210284Sjmallett	amd64_pte_t pt[AMD64_NPTEPG];
243210284Sjmallett	struct vmstate *vm;
244210284Sjmallett	amd64_physaddr_t offset;
245210284Sjmallett	amd64_pde_t pde;
246210284Sjmallett	amd64_pte_t pte;
247210284Sjmallett	kvaddr_t pteindex;
248210284Sjmallett	kvaddr_t pdeindex;
249210284Sjmallett	amd64_physaddr_t a;
250210284Sjmallett	off_t ofs;
251210284Sjmallett
252210284Sjmallett	vm = kd->vmst;
253210284Sjmallett	offset = va & AMD64_PAGE_MASK;
254210284Sjmallett
255210284Sjmallett	if (va >= vm->hdr.kernbase) {
256210284Sjmallett		pdeindex = (va - vm->hdr.kernbase) >> AMD64_PDRSHIFT;
257210284Sjmallett		if (pdeindex >= vm->hdr.pmapsize / sizeof(pde))
258210284Sjmallett			goto invalid;
259210284Sjmallett		pde = _amd64_pde_get(kd, pdeindex);
260210284Sjmallett		if ((pde & AMD64_PG_V) == 0) {
261210284Sjmallett			_kvm_err(kd, kd->program,
262210284Sjmallett			    "_amd64_minidump_vatop: pde not valid");
263210284Sjmallett			goto invalid;
264210284Sjmallett		}
265210284Sjmallett		if ((pde & AMD64_PG_PS) == 0) {
266210284Sjmallett			a = pde & AMD64_PG_FRAME;
267210284Sjmallett			/* TODO: Just read the single PTE */
268210284Sjmallett			ofs = _kvm_pt_find(kd, a, AMD64_PAGE_SIZE);
269210284Sjmallett			if (ofs == -1) {
270210284Sjmallett				_kvm_err(kd, kd->program,
271210284Sjmallett				    "cannot find page table entry for %ju",
272210284Sjmallett				    (uintmax_t)a);
273210284Sjmallett				goto invalid;
274210284Sjmallett			}
275210284Sjmallett			if (pread(kd->pmfd, &pt, AMD64_PAGE_SIZE, ofs) !=
276210284Sjmallett			    AMD64_PAGE_SIZE) {
277210284Sjmallett				_kvm_err(kd, kd->program,
278210284Sjmallett				    "cannot read page table entry for %ju",
279210284Sjmallett				    (uintmax_t)a);
280210284Sjmallett				goto invalid;
281210284Sjmallett			}
282210284Sjmallett			pteindex = (va >> AMD64_PAGE_SHIFT) &
283210284Sjmallett			    (AMD64_NPTEPG - 1);
284210284Sjmallett			pte = le64toh(pt[pteindex]);
285210284Sjmallett			if ((pte & AMD64_PG_V) == 0) {
286210284Sjmallett				_kvm_err(kd, kd->program,
287210284Sjmallett				    "_amd64_minidump_vatop: pte not valid");
288210284Sjmallett				goto invalid;
289210284Sjmallett			}
290210284Sjmallett			a = pte & AMD64_PG_FRAME;
291210284Sjmallett		} else {
292210284Sjmallett			a = pde & AMD64_PG_PS_FRAME;
293210284Sjmallett			a += (va & AMD64_PDRMASK) ^ offset;
294210284Sjmallett		}
295210284Sjmallett		ofs = _kvm_pt_find(kd, a, AMD64_PAGE_SIZE);
296210284Sjmallett		if (ofs == -1) {
297210284Sjmallett			_kvm_err(kd, kd->program,
298210284Sjmallett	    "_amd64_minidump_vatop: physical address 0x%jx not in minidump",
299210284Sjmallett			    (uintmax_t)a);
300210284Sjmallett			goto invalid;
301210284Sjmallett		}
302210284Sjmallett		*pa = ofs + offset;
303210284Sjmallett		return (AMD64_PAGE_SIZE - offset);
304210284Sjmallett	} else if (va >= vm->hdr.dmapbase && va < vm->hdr.dmapend) {
305210284Sjmallett		a = (va - vm->hdr.dmapbase) & ~AMD64_PAGE_MASK;
306210284Sjmallett		ofs = _kvm_pt_find(kd, a, AMD64_PAGE_SIZE);
307210284Sjmallett		if (ofs == -1) {
308210284Sjmallett			_kvm_err(kd, kd->program,
309210284Sjmallett	    "_amd64_minidump_vatop: direct map address 0x%jx not in minidump",
310210284Sjmallett			    (uintmax_t)va);
311210284Sjmallett			goto invalid;
312210284Sjmallett		}
313210284Sjmallett		*pa = ofs + offset;
314210284Sjmallett		return (AMD64_PAGE_SIZE - offset);
315210284Sjmallett	} else {
316210284Sjmallett		_kvm_err(kd, kd->program,
317210284Sjmallett	    "_amd64_minidump_vatop: virtual address 0x%jx not minidumped",
318210284Sjmallett		    (uintmax_t)va);
319210284Sjmallett		goto invalid;
320210284Sjmallett	}
321210284Sjmallett
322210284Sjmallettinvalid:
323210284Sjmallett	_kvm_err(kd, 0, "invalid address (0x%jx)", (uintmax_t)va);
324210284Sjmallett	return (0);
325210284Sjmallett}
326
327static int
328_amd64_minidump_kvatop(kvm_t *kd, kvaddr_t va, off_t *pa)
329{
330
331	if (ISALIVE(kd)) {
332		_kvm_err(kd, 0,
333		    "_amd64_minidump_kvatop called in live kernel!");
334		return (0);
335	}
336	if (((struct vmstate *)kd->vmst)->hdr.version == 1)
337		return (_amd64_minidump_vatop_v1(kd, va, pa));
338	else
339		return (_amd64_minidump_vatop(kd, va, pa));
340}
341
342static int
343_amd64_minidump_walk_pages(kvm_t *kd, kvm_walk_pages_cb_t *cb, void *arg)
344{
345	struct vmstate *vm = kd->vmst;
346	u_long npdes = vm->hdr.pmapsize / sizeof(amd64_pde_t);
347	u_long bmindex, dva, pa, pdeindex, va;
348	struct kvm_bitmap bm;
349	int ret = 0;
350	vm_prot_t prot;
351	unsigned int pgsz = AMD64_PAGE_SIZE;
352
353	if (vm->hdr.version < 2)
354		return (0);
355
356	if (!_kvm_bitmap_init(&bm, vm->hdr.bitmapsize, &bmindex))
357		return (0);
358
359	for (pdeindex = 0; pdeindex < npdes; pdeindex++) {
360		amd64_pde_t pde = _amd64_pde_get(kd, pdeindex);
361		amd64_pte_t *ptes;
362		u_long i;
363
364		va = vm->hdr.kernbase + (pdeindex << AMD64_PDRSHIFT);
365		if ((pde & AMD64_PG_V) == 0)
366			continue;
367
368		if ((pde & AMD64_PG_PS) != 0) {
369			/*
370			 * Large page.  Iterate on each 4K page section
371			 * within this page.  This differs from 4K pages in
372			 * that every page here uses the same PDE to
373			 * generate permissions.
374			 */
375			pa = (pde & AMD64_PG_PS_FRAME) +
376			    ((va & AMD64_PDRMASK) ^ VA_OFF(vm, va));
377			dva = vm->hdr.dmapbase + pa;
378			_kvm_bitmap_set(&bm, _kvm_pa_bit_id(kd, pa, AMD64_PAGE_SIZE));
379			if (!_kvm_visit_cb(kd, cb, arg, pa, va, dva,
380			    _amd64_entry_to_prot(pde), AMD64_NBPDR, pgsz)) {
381				goto out;
382			}
383			continue;
384		}
385
386		/* 4K pages: pde references another page of entries. */
387		ptes = _amd64_pde_first_pte(kd, pdeindex);
388		/* Ignore page directory pages that were not dumped. */
389		if (ptes == NULL)
390			continue;
391
392		for (i = 0; i < AMD64_NPTEPG; i++) {
393			amd64_pte_t pte = (u_long)ptes[i];
394
395			pa = pte & AMD64_PG_FRAME;
396			dva = vm->hdr.dmapbase + pa;
397			if ((pte & AMD64_PG_V) != 0) {
398				_kvm_bitmap_set(&bm,
399				    _kvm_pa_bit_id(kd, pa, AMD64_PAGE_SIZE));
400				if (!_kvm_visit_cb(kd, cb, arg, pa, va, dva,
401				    _amd64_entry_to_prot(pte), pgsz, 0)) {
402					goto out;
403				}
404			}
405			va += AMD64_PAGE_SIZE;
406		}
407	}
408
409	while (_kvm_bitmap_next(&bm, &bmindex)) {
410		pa = _kvm_bit_id_pa(kd, bmindex, AMD64_PAGE_SIZE);
411		if (pa == _KVM_PA_INVALID)
412			break;
413		dva = vm->hdr.dmapbase + pa;
414		if (vm->hdr.dmapend < (dva + pgsz))
415			break;
416		va = 0;
417		/* amd64/pmap.c: create_pagetables(): dmap always R|W. */
418		prot = VM_PROT_READ | VM_PROT_WRITE;
419		if (!_kvm_visit_cb(kd, cb, arg, pa, va, dva, prot, pgsz, 0)) {
420			goto out;
421		}
422	}
423
424	ret = 1;
425
426out:
427	_kvm_bitmap_deinit(&bm);
428	return (ret);
429}
430
431static struct kvm_arch kvm_amd64_minidump = {
432	.ka_probe = _amd64_minidump_probe,
433	.ka_initvtop = _amd64_minidump_initvtop,
434	.ka_freevtop = _amd64_minidump_freevtop,
435	.ka_kvatop = _amd64_minidump_kvatop,
436	.ka_native = _amd64_native,
437	.ka_walk_pages = _amd64_minidump_walk_pages,
438};
439
440KVM_ARCH(kvm_amd64_minidump);
441