1210284Sjmallett/*- 2215990Sjmallett * SPDX-License-Identifier: BSD-2-Clause 3215990Sjmallett * 4210284Sjmallett * Copyright (c) 2006 Peter Wemm 5210284Sjmallett * 6215990Sjmallett * Redistribution and use in source and binary forms, with or without 7215990Sjmallett * modification, are permitted provided that the following conditions 8215990Sjmallett * are met: 9210284Sjmallett * 1. Redistributions of source code must retain the above copyright 10215990Sjmallett * notice, this list of conditions and the following disclaimer. 11215990Sjmallett * 2. Redistributions in binary form must reproduce the above copyright 12210284Sjmallett * notice, this list of conditions and the following disclaimer in the 13215990Sjmallett * documentation and/or other materials provided with the distribution. 14215990Sjmallett * 15215990Sjmallett * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16215990Sjmallett * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17215990Sjmallett * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18215990Sjmallett * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19215990Sjmallett * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20215990Sjmallett * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21215990Sjmallett * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22215990Sjmallett * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23215990Sjmallett * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24215990Sjmallett * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25215990Sjmallett * SUCH DAMAGE. 26215990Sjmallett */ 27215990Sjmallett 28215990Sjmallett#include <sys/cdefs.h> 29215990Sjmallett/* 30215990Sjmallett * AMD64 machine dependent routines for kvm and minidumps. 31215990Sjmallett */ 32215990Sjmallett 33215990Sjmallett#include <sys/param.h> 34215990Sjmallett#include <sys/endian.h> 35215990Sjmallett#include <stdint.h> 36215990Sjmallett#include <stdlib.h> 37215990Sjmallett#include <string.h> 38210284Sjmallett#include <unistd.h> 39210284Sjmallett#include <vm/vm.h> 40210284Sjmallett#include <kvm.h> 41210284Sjmallett 42210284Sjmallett#include "../../sys/amd64/include/minidump.h" 43210284Sjmallett 44210284Sjmallett#include <limits.h> 45215990Sjmallett 46210284Sjmallett#include "kvm_private.h" 47210284Sjmallett#include "kvm_amd64.h" 48210284Sjmallett 49210284Sjmallett#define amd64_round_page(x) roundup2((kvaddr_t)(x), AMD64_PAGE_SIZE) 50210284Sjmallett#define VM_IS_V1(vm) (vm->hdr.version == 1) 51210284Sjmallett#define VA_OFF(vm, va) \ 52210284Sjmallett (VM_IS_V1(vm) ? ((va) & (AMD64_PAGE_SIZE - 1)) : ((va) & AMD64_PAGE_MASK)) 53210284Sjmallett 54215990Sjmallettstruct vmstate { 55210284Sjmallett struct minidumphdr hdr; 56215990Sjmallett}; 57215990Sjmallett 58215990Sjmallettstatic vm_prot_t 59215990Sjmallett_amd64_entry_to_prot(uint64_t entry) 60215990Sjmallett{ 61215990Sjmallett vm_prot_t prot = VM_PROT_READ; 62215990Sjmallett 63215990Sjmallett if ((entry & AMD64_PG_RW) != 0) 64215990Sjmallett prot |= VM_PROT_WRITE; 65215990Sjmallett if ((entry & AMD64_PG_NX) == 0) 66215990Sjmallett prot |= VM_PROT_EXECUTE; 67215990Sjmallett return prot; 68215990Sjmallett} 69210284Sjmallett 70215990Sjmallett/* 71210284Sjmallett * Version 2 minidumps use page directory entries, while version 1 use page 72210284Sjmallett * table entries. 73210284Sjmallett */ 74210284Sjmallett 75210284Sjmallettstatic amd64_pde_t 76210284Sjmallett_amd64_pde_get(kvm_t *kd, u_long pdeindex) 77210284Sjmallett{ 78210284Sjmallett amd64_pde_t *pde = _kvm_pmap_get(kd, pdeindex, sizeof(*pde)); 79210284Sjmallett 80215990Sjmallett return le64toh(*pde); 81215990Sjmallett} 82210284Sjmallett 83215990Sjmallettstatic amd64_pte_t 84210284Sjmallett_amd64_pte_get(kvm_t *kd, u_long pteindex) 85210284Sjmallett{ 86210284Sjmallett amd64_pte_t *pte = _kvm_pmap_get(kd, pteindex, sizeof(*pte)); 87210284Sjmallett 88210284Sjmallett return le64toh(*pte); 89210284Sjmallett} 90210284Sjmallett 91210284Sjmallett/* Get the first page table entry for a given page directory index. */ 92210284Sjmallettstatic amd64_pte_t * 93210284Sjmallett_amd64_pde_first_pte(kvm_t *kd, u_long pdeindex) 94210284Sjmallett{ 95210284Sjmallett u_long *pa; 96210284Sjmallett 97210284Sjmallett pa = _kvm_pmap_get(kd, pdeindex, sizeof(amd64_pde_t)); 98210284Sjmallett if (pa == NULL) 99210284Sjmallett return NULL; 100210284Sjmallett return _kvm_map_get(kd, *pa & AMD64_PG_FRAME, AMD64_PAGE_SIZE); 101210284Sjmallett} 102210284Sjmallett 103210284Sjmallettstatic int 104210284Sjmallett_amd64_minidump_probe(kvm_t *kd) 105210284Sjmallett{ 106210284Sjmallett 107210284Sjmallett return (_kvm_probe_elf_kernel(kd, ELFCLASS64, EM_X86_64) && 108210284Sjmallett _kvm_is_minidump(kd)); 109210284Sjmallett} 110210284Sjmallett 111210284Sjmallettstatic void 112210284Sjmallett_amd64_minidump_freevtop(kvm_t *kd) 113210284Sjmallett{ 114210284Sjmallett struct vmstate *vm = kd->vmst; 115210284Sjmallett 116210284Sjmallett free(vm); 117210284Sjmallett kd->vmst = NULL; 118210284Sjmallett} 119210284Sjmallett 120210284Sjmallettstatic int 121210284Sjmallett_amd64_minidump_initvtop(kvm_t *kd) 122210284Sjmallett{ 123210284Sjmallett struct vmstate *vmst; 124210284Sjmallett off_t off, dump_avail_off, sparse_off; 125210284Sjmallett 126210284Sjmallett vmst = _kvm_malloc(kd, sizeof(*vmst)); 127210284Sjmallett if (vmst == NULL) { 128210284Sjmallett _kvm_err(kd, kd->program, "cannot allocate vm"); 129210284Sjmallett return (-1); 130210284Sjmallett } 131210284Sjmallett kd->vmst = vmst; 132210284Sjmallett if (pread(kd->pmfd, &vmst->hdr, sizeof(vmst->hdr), 0) != 133210284Sjmallett sizeof(vmst->hdr)) { 134210284Sjmallett _kvm_err(kd, kd->program, "cannot read dump header"); 135210284Sjmallett return (-1); 136210284Sjmallett } 137210284Sjmallett if (strncmp(MINIDUMP_MAGIC, vmst->hdr.magic, sizeof(vmst->hdr.magic)) != 0) { 138210284Sjmallett _kvm_err(kd, kd->program, "not a minidump for this platform"); 139210284Sjmallett return (-1); 140210284Sjmallett } 141210284Sjmallett 142210284Sjmallett /* 143210284Sjmallett * NB: amd64 minidump header is binary compatible between version 1 144210284Sjmallett * and version 2; version 3 adds the dumpavailsize field 145210284Sjmallett */ 146210284Sjmallett vmst->hdr.version = le32toh(vmst->hdr.version); 147210284Sjmallett if (vmst->hdr.version > MINIDUMP_VERSION || vmst->hdr.version < 1) { 148210284Sjmallett _kvm_err(kd, kd->program, "wrong minidump version. expected %d got %d", 149210284Sjmallett MINIDUMP_VERSION, vmst->hdr.version); 150210284Sjmallett return (-1); 151210284Sjmallett } 152210284Sjmallett vmst->hdr.msgbufsize = le32toh(vmst->hdr.msgbufsize); 153210284Sjmallett vmst->hdr.bitmapsize = le32toh(vmst->hdr.bitmapsize); 154210284Sjmallett vmst->hdr.pmapsize = le32toh(vmst->hdr.pmapsize); 155210284Sjmallett vmst->hdr.kernbase = le64toh(vmst->hdr.kernbase); 156210284Sjmallett vmst->hdr.dmapbase = le64toh(vmst->hdr.dmapbase); 157210284Sjmallett vmst->hdr.dmapend = le64toh(vmst->hdr.dmapend); 158210284Sjmallett vmst->hdr.dumpavailsize = vmst->hdr.version == MINIDUMP_VERSION ? 159210284Sjmallett le32toh(vmst->hdr.dumpavailsize) : 0; 160210284Sjmallett 161210284Sjmallett /* Skip header and msgbuf */ 162210284Sjmallett dump_avail_off = AMD64_PAGE_SIZE + amd64_round_page(vmst->hdr.msgbufsize); 163210284Sjmallett 164210284Sjmallett /* Skip dump_avail */ 165210284Sjmallett off = dump_avail_off + amd64_round_page(vmst->hdr.dumpavailsize); 166210284Sjmallett 167210284Sjmallett sparse_off = off + amd64_round_page(vmst->hdr.bitmapsize) + 168210284Sjmallett amd64_round_page(vmst->hdr.pmapsize); 169210284Sjmallett if (_kvm_pt_init(kd, vmst->hdr.dumpavailsize, dump_avail_off, 170210284Sjmallett vmst->hdr.bitmapsize, off, sparse_off, AMD64_PAGE_SIZE) == -1) { 171210284Sjmallett return (-1); 172210284Sjmallett } 173210284Sjmallett off += amd64_round_page(vmst->hdr.bitmapsize); 174210284Sjmallett 175210284Sjmallett if (_kvm_pmap_init(kd, vmst->hdr.pmapsize, off) == -1) { 176210284Sjmallett return (-1); 177210284Sjmallett } 178210284Sjmallett off += amd64_round_page(vmst->hdr.pmapsize); 179210284Sjmallett 180210284Sjmallett return (0); 181210284Sjmallett} 182210284Sjmallett 183210284Sjmallettstatic int 184210284Sjmallett_amd64_minidump_vatop_v1(kvm_t *kd, kvaddr_t va, off_t *pa) 185210284Sjmallett{ 186210284Sjmallett struct vmstate *vm; 187210284Sjmallett amd64_physaddr_t offset; 188210284Sjmallett amd64_pte_t pte; 189210284Sjmallett kvaddr_t pteindex; 190210284Sjmallett amd64_physaddr_t a; 191210284Sjmallett off_t ofs; 192210284Sjmallett 193210284Sjmallett vm = kd->vmst; 194210284Sjmallett offset = va & AMD64_PAGE_MASK; 195210284Sjmallett 196210284Sjmallett if (va >= vm->hdr.kernbase) { 197210284Sjmallett pteindex = (va - vm->hdr.kernbase) >> AMD64_PAGE_SHIFT; 198210284Sjmallett if (pteindex >= vm->hdr.pmapsize / sizeof(pte)) 199210284Sjmallett goto invalid; 200210284Sjmallett pte = _amd64_pte_get(kd, pteindex); 201210284Sjmallett if ((pte & AMD64_PG_V) == 0) { 202210284Sjmallett _kvm_err(kd, kd->program, 203210284Sjmallett "_amd64_minidump_vatop_v1: pte not valid"); 204210284Sjmallett goto invalid; 205210284Sjmallett } 206210284Sjmallett a = pte & AMD64_PG_FRAME; 207210284Sjmallett ofs = _kvm_pt_find(kd, a, AMD64_PAGE_SIZE); 208210284Sjmallett if (ofs == -1) { 209210284Sjmallett _kvm_err(kd, kd->program, 210210284Sjmallett "_amd64_minidump_vatop_v1: physical address 0x%jx not in minidump", 211210284Sjmallett (uintmax_t)a); 212210284Sjmallett goto invalid; 213215990Sjmallett } 214210284Sjmallett *pa = ofs + offset; 215210284Sjmallett return (AMD64_PAGE_SIZE - offset); 216210284Sjmallett } else if (va >= vm->hdr.dmapbase && va < vm->hdr.dmapend) { 217210284Sjmallett a = (va - vm->hdr.dmapbase) & ~AMD64_PAGE_MASK; 218210284Sjmallett ofs = _kvm_pt_find(kd, a, AMD64_PAGE_SIZE); 219210284Sjmallett if (ofs == -1) { 220210284Sjmallett _kvm_err(kd, kd->program, 221210284Sjmallett "_amd64_minidump_vatop_v1: direct map address 0x%jx not in minidump", 222210284Sjmallett (uintmax_t)va); 223210284Sjmallett goto invalid; 224210284Sjmallett } 225210284Sjmallett *pa = ofs + offset; 226210284Sjmallett return (AMD64_PAGE_SIZE - offset); 227210284Sjmallett } else { 228210284Sjmallett _kvm_err(kd, kd->program, 229210284Sjmallett "_amd64_minidump_vatop_v1: virtual address 0x%jx not minidumped", 230210284Sjmallett (uintmax_t)va); 231210284Sjmallett goto invalid; 232210284Sjmallett } 233210284Sjmallett 234210284Sjmallettinvalid: 235210284Sjmallett _kvm_err(kd, 0, "invalid address (0x%jx)", (uintmax_t)va); 236210284Sjmallett return (0); 237210284Sjmallett} 238210284Sjmallett 239210284Sjmallettstatic int 240210284Sjmallett_amd64_minidump_vatop(kvm_t *kd, kvaddr_t va, off_t *pa) 241210284Sjmallett{ 242210284Sjmallett amd64_pte_t pt[AMD64_NPTEPG]; 243210284Sjmallett struct vmstate *vm; 244210284Sjmallett amd64_physaddr_t offset; 245210284Sjmallett amd64_pde_t pde; 246210284Sjmallett amd64_pte_t pte; 247210284Sjmallett kvaddr_t pteindex; 248210284Sjmallett kvaddr_t pdeindex; 249210284Sjmallett amd64_physaddr_t a; 250210284Sjmallett off_t ofs; 251210284Sjmallett 252210284Sjmallett vm = kd->vmst; 253210284Sjmallett offset = va & AMD64_PAGE_MASK; 254210284Sjmallett 255210284Sjmallett if (va >= vm->hdr.kernbase) { 256210284Sjmallett pdeindex = (va - vm->hdr.kernbase) >> AMD64_PDRSHIFT; 257210284Sjmallett if (pdeindex >= vm->hdr.pmapsize / sizeof(pde)) 258210284Sjmallett goto invalid; 259210284Sjmallett pde = _amd64_pde_get(kd, pdeindex); 260210284Sjmallett if ((pde & AMD64_PG_V) == 0) { 261210284Sjmallett _kvm_err(kd, kd->program, 262210284Sjmallett "_amd64_minidump_vatop: pde not valid"); 263210284Sjmallett goto invalid; 264210284Sjmallett } 265210284Sjmallett if ((pde & AMD64_PG_PS) == 0) { 266210284Sjmallett a = pde & AMD64_PG_FRAME; 267210284Sjmallett /* TODO: Just read the single PTE */ 268210284Sjmallett ofs = _kvm_pt_find(kd, a, AMD64_PAGE_SIZE); 269210284Sjmallett if (ofs == -1) { 270210284Sjmallett _kvm_err(kd, kd->program, 271210284Sjmallett "cannot find page table entry for %ju", 272210284Sjmallett (uintmax_t)a); 273210284Sjmallett goto invalid; 274210284Sjmallett } 275210284Sjmallett if (pread(kd->pmfd, &pt, AMD64_PAGE_SIZE, ofs) != 276210284Sjmallett AMD64_PAGE_SIZE) { 277210284Sjmallett _kvm_err(kd, kd->program, 278210284Sjmallett "cannot read page table entry for %ju", 279210284Sjmallett (uintmax_t)a); 280210284Sjmallett goto invalid; 281210284Sjmallett } 282210284Sjmallett pteindex = (va >> AMD64_PAGE_SHIFT) & 283210284Sjmallett (AMD64_NPTEPG - 1); 284210284Sjmallett pte = le64toh(pt[pteindex]); 285210284Sjmallett if ((pte & AMD64_PG_V) == 0) { 286210284Sjmallett _kvm_err(kd, kd->program, 287210284Sjmallett "_amd64_minidump_vatop: pte not valid"); 288210284Sjmallett goto invalid; 289210284Sjmallett } 290210284Sjmallett a = pte & AMD64_PG_FRAME; 291210284Sjmallett } else { 292210284Sjmallett a = pde & AMD64_PG_PS_FRAME; 293210284Sjmallett a += (va & AMD64_PDRMASK) ^ offset; 294210284Sjmallett } 295210284Sjmallett ofs = _kvm_pt_find(kd, a, AMD64_PAGE_SIZE); 296210284Sjmallett if (ofs == -1) { 297210284Sjmallett _kvm_err(kd, kd->program, 298210284Sjmallett "_amd64_minidump_vatop: physical address 0x%jx not in minidump", 299210284Sjmallett (uintmax_t)a); 300210284Sjmallett goto invalid; 301210284Sjmallett } 302210284Sjmallett *pa = ofs + offset; 303210284Sjmallett return (AMD64_PAGE_SIZE - offset); 304210284Sjmallett } else if (va >= vm->hdr.dmapbase && va < vm->hdr.dmapend) { 305210284Sjmallett a = (va - vm->hdr.dmapbase) & ~AMD64_PAGE_MASK; 306210284Sjmallett ofs = _kvm_pt_find(kd, a, AMD64_PAGE_SIZE); 307210284Sjmallett if (ofs == -1) { 308210284Sjmallett _kvm_err(kd, kd->program, 309210284Sjmallett "_amd64_minidump_vatop: direct map address 0x%jx not in minidump", 310210284Sjmallett (uintmax_t)va); 311210284Sjmallett goto invalid; 312210284Sjmallett } 313210284Sjmallett *pa = ofs + offset; 314210284Sjmallett return (AMD64_PAGE_SIZE - offset); 315210284Sjmallett } else { 316210284Sjmallett _kvm_err(kd, kd->program, 317210284Sjmallett "_amd64_minidump_vatop: virtual address 0x%jx not minidumped", 318210284Sjmallett (uintmax_t)va); 319210284Sjmallett goto invalid; 320210284Sjmallett } 321210284Sjmallett 322210284Sjmallettinvalid: 323210284Sjmallett _kvm_err(kd, 0, "invalid address (0x%jx)", (uintmax_t)va); 324210284Sjmallett return (0); 325210284Sjmallett} 326 327static int 328_amd64_minidump_kvatop(kvm_t *kd, kvaddr_t va, off_t *pa) 329{ 330 331 if (ISALIVE(kd)) { 332 _kvm_err(kd, 0, 333 "_amd64_minidump_kvatop called in live kernel!"); 334 return (0); 335 } 336 if (((struct vmstate *)kd->vmst)->hdr.version == 1) 337 return (_amd64_minidump_vatop_v1(kd, va, pa)); 338 else 339 return (_amd64_minidump_vatop(kd, va, pa)); 340} 341 342static int 343_amd64_minidump_walk_pages(kvm_t *kd, kvm_walk_pages_cb_t *cb, void *arg) 344{ 345 struct vmstate *vm = kd->vmst; 346 u_long npdes = vm->hdr.pmapsize / sizeof(amd64_pde_t); 347 u_long bmindex, dva, pa, pdeindex, va; 348 struct kvm_bitmap bm; 349 int ret = 0; 350 vm_prot_t prot; 351 unsigned int pgsz = AMD64_PAGE_SIZE; 352 353 if (vm->hdr.version < 2) 354 return (0); 355 356 if (!_kvm_bitmap_init(&bm, vm->hdr.bitmapsize, &bmindex)) 357 return (0); 358 359 for (pdeindex = 0; pdeindex < npdes; pdeindex++) { 360 amd64_pde_t pde = _amd64_pde_get(kd, pdeindex); 361 amd64_pte_t *ptes; 362 u_long i; 363 364 va = vm->hdr.kernbase + (pdeindex << AMD64_PDRSHIFT); 365 if ((pde & AMD64_PG_V) == 0) 366 continue; 367 368 if ((pde & AMD64_PG_PS) != 0) { 369 /* 370 * Large page. Iterate on each 4K page section 371 * within this page. This differs from 4K pages in 372 * that every page here uses the same PDE to 373 * generate permissions. 374 */ 375 pa = (pde & AMD64_PG_PS_FRAME) + 376 ((va & AMD64_PDRMASK) ^ VA_OFF(vm, va)); 377 dva = vm->hdr.dmapbase + pa; 378 _kvm_bitmap_set(&bm, _kvm_pa_bit_id(kd, pa, AMD64_PAGE_SIZE)); 379 if (!_kvm_visit_cb(kd, cb, arg, pa, va, dva, 380 _amd64_entry_to_prot(pde), AMD64_NBPDR, pgsz)) { 381 goto out; 382 } 383 continue; 384 } 385 386 /* 4K pages: pde references another page of entries. */ 387 ptes = _amd64_pde_first_pte(kd, pdeindex); 388 /* Ignore page directory pages that were not dumped. */ 389 if (ptes == NULL) 390 continue; 391 392 for (i = 0; i < AMD64_NPTEPG; i++) { 393 amd64_pte_t pte = (u_long)ptes[i]; 394 395 pa = pte & AMD64_PG_FRAME; 396 dva = vm->hdr.dmapbase + pa; 397 if ((pte & AMD64_PG_V) != 0) { 398 _kvm_bitmap_set(&bm, 399 _kvm_pa_bit_id(kd, pa, AMD64_PAGE_SIZE)); 400 if (!_kvm_visit_cb(kd, cb, arg, pa, va, dva, 401 _amd64_entry_to_prot(pte), pgsz, 0)) { 402 goto out; 403 } 404 } 405 va += AMD64_PAGE_SIZE; 406 } 407 } 408 409 while (_kvm_bitmap_next(&bm, &bmindex)) { 410 pa = _kvm_bit_id_pa(kd, bmindex, AMD64_PAGE_SIZE); 411 if (pa == _KVM_PA_INVALID) 412 break; 413 dva = vm->hdr.dmapbase + pa; 414 if (vm->hdr.dmapend < (dva + pgsz)) 415 break; 416 va = 0; 417 /* amd64/pmap.c: create_pagetables(): dmap always R|W. */ 418 prot = VM_PROT_READ | VM_PROT_WRITE; 419 if (!_kvm_visit_cb(kd, cb, arg, pa, va, dva, prot, pgsz, 0)) { 420 goto out; 421 } 422 } 423 424 ret = 1; 425 426out: 427 _kvm_bitmap_deinit(&bm); 428 return (ret); 429} 430 431static struct kvm_arch kvm_amd64_minidump = { 432 .ka_probe = _amd64_minidump_probe, 433 .ka_initvtop = _amd64_minidump_initvtop, 434 .ka_freevtop = _amd64_minidump_freevtop, 435 .ka_kvatop = _amd64_minidump_kvatop, 436 .ka_native = _amd64_native, 437 .ka_walk_pages = _amd64_minidump_walk_pages, 438}; 439 440KVM_ARCH(kvm_amd64_minidump); 441