vmm_instruction_emul.c revision 267399
1/*- 2 * Copyright (c) 2012 Sandvine, Inc. 3 * Copyright (c) 2012 NetApp, Inc. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD: stable/10/sys/amd64/vmm/vmm_instruction_emul.c 267399 2014-06-12 15:20:59Z jhb $ 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/vmm_instruction_emul.c 267399 2014-06-12 15:20:59Z jhb $"); 32 33#ifdef _KERNEL 34#include <sys/param.h> 35#include <sys/pcpu.h> 36#include <sys/systm.h> 37 38#include <vm/vm.h> 39#include <vm/pmap.h> 40 41#include <machine/vmparam.h> 42#include <machine/vmm.h> 43#else /* !_KERNEL */ 44#include <sys/types.h> 45#include <sys/errno.h> 46 47#include <machine/vmm.h> 48 49#include <vmmapi.h> 50#endif /* _KERNEL */ 51 52/* struct vie_op.op_type */ 53enum { 54 VIE_OP_TYPE_NONE = 0, 55 VIE_OP_TYPE_MOV, 56 VIE_OP_TYPE_MOVSX, 57 VIE_OP_TYPE_MOVZX, 58 VIE_OP_TYPE_AND, 59 VIE_OP_TYPE_OR, 60 VIE_OP_TYPE_TWO_BYTE, 61 VIE_OP_TYPE_LAST 62}; 63 64/* struct vie_op.op_flags */ 65#define VIE_OP_F_IMM (1 << 0) /* immediate operand present */ 66#define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ 67 68static const struct vie_op two_byte_opcodes[256] = { 69 [0xB6] = { 70 .op_byte = 0xB6, 71 .op_type = VIE_OP_TYPE_MOVZX, 72 }, 73 [0xBE] = { 74 .op_byte = 0xBE, 75 .op_type = VIE_OP_TYPE_MOVSX, 76 }, 77}; 78 79static const struct vie_op one_byte_opcodes[256] = { 80 [0x0F] = { 81 .op_byte = 0x0F, 82 .op_type = VIE_OP_TYPE_TWO_BYTE 83 }, 84 [0x88] = { 85 .op_byte = 0x88, 86 .op_type = VIE_OP_TYPE_MOV, 87 }, 88 [0x89] = { 89 .op_byte = 0x89, 90 .op_type = VIE_OP_TYPE_MOV, 91 }, 92 [0x8A] = { 93 .op_byte = 0x8A, 94 .op_type = VIE_OP_TYPE_MOV, 95 }, 96 [0x8B] = { 97 .op_byte = 0x8B, 98 .op_type = VIE_OP_TYPE_MOV, 99 }, 100 [0xC7] = { 101 .op_byte = 0xC7, 102 .op_type = VIE_OP_TYPE_MOV, 103 .op_flags = VIE_OP_F_IMM, 104 }, 105 [0x23] = { 106 .op_byte = 0x23, 107 .op_type = VIE_OP_TYPE_AND, 108 }, 109 [0x81] = { 110 /* XXX Group 1 extended opcode - not just AND */ 111 .op_byte = 0x81, 112 .op_type = VIE_OP_TYPE_AND, 113 .op_flags = VIE_OP_F_IMM, 114 }, 115 [0x83] = { 116 /* XXX Group 1 extended opcode - not just OR */ 117 .op_byte = 0x83, 118 .op_type = VIE_OP_TYPE_OR, 119 .op_flags = VIE_OP_F_IMM8, 120 }, 121}; 122 123/* struct vie.mod */ 124#define VIE_MOD_INDIRECT 0 125#define VIE_MOD_INDIRECT_DISP8 1 126#define VIE_MOD_INDIRECT_DISP32 2 127#define VIE_MOD_DIRECT 3 128 129/* struct vie.rm */ 130#define VIE_RM_SIB 4 131#define VIE_RM_DISP32 5 132 133#define GB (1024 * 1024 * 1024) 134 135static enum vm_reg_name gpr_map[16] = { 136 VM_REG_GUEST_RAX, 137 VM_REG_GUEST_RCX, 138 VM_REG_GUEST_RDX, 139 VM_REG_GUEST_RBX, 140 VM_REG_GUEST_RSP, 141 VM_REG_GUEST_RBP, 142 VM_REG_GUEST_RSI, 143 VM_REG_GUEST_RDI, 144 VM_REG_GUEST_R8, 145 VM_REG_GUEST_R9, 146 VM_REG_GUEST_R10, 147 VM_REG_GUEST_R11, 148 VM_REG_GUEST_R12, 149 VM_REG_GUEST_R13, 150 VM_REG_GUEST_R14, 151 VM_REG_GUEST_R15 152}; 153 154static uint64_t size2mask[] = { 155 [1] = 0xff, 156 [2] = 0xffff, 157 [4] = 0xffffffff, 158 [8] = 0xffffffffffffffff, 159}; 160 161static int 162vie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval) 163{ 164 int error; 165 166 error = vm_get_register(vm, vcpuid, reg, rval); 167 168 return (error); 169} 170 171static int 172vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval) 173{ 174 uint64_t val; 175 int error, rshift; 176 enum vm_reg_name reg; 177 178 rshift = 0; 179 reg = gpr_map[vie->reg]; 180 181 /* 182 * 64-bit mode imposes limitations on accessing legacy byte registers. 183 * 184 * The legacy high-byte registers cannot be addressed if the REX 185 * prefix is present. In this case the values 4, 5, 6 and 7 of the 186 * 'ModRM:reg' field address %spl, %bpl, %sil and %dil respectively. 187 * 188 * If the REX prefix is not present then the values 4, 5, 6 and 7 189 * of the 'ModRM:reg' field address the legacy high-byte registers, 190 * %ah, %ch, %dh and %bh respectively. 191 */ 192 if (!vie->rex_present) { 193 if (vie->reg & 0x4) { 194 /* 195 * Obtain the value of %ah by reading %rax and shifting 196 * right by 8 bits (same for %bh, %ch and %dh). 197 */ 198 rshift = 8; 199 reg = gpr_map[vie->reg & 0x3]; 200 } 201 } 202 203 error = vm_get_register(vm, vcpuid, reg, &val); 204 *rval = val >> rshift; 205 return (error); 206} 207 208static int 209vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, 210 uint64_t val, int size) 211{ 212 int error; 213 uint64_t origval; 214 215 switch (size) { 216 case 1: 217 case 2: 218 error = vie_read_register(vm, vcpuid, reg, &origval); 219 if (error) 220 return (error); 221 val &= size2mask[size]; 222 val |= origval & ~size2mask[size]; 223 break; 224 case 4: 225 val &= 0xffffffffUL; 226 break; 227 case 8: 228 break; 229 default: 230 return (EINVAL); 231 } 232 233 error = vm_set_register(vm, vcpuid, reg, val); 234 return (error); 235} 236 237/* 238 * The following simplifying assumptions are made during emulation: 239 * 240 * - guest is in 64-bit mode 241 * - default address size is 64-bits 242 * - default operand size is 32-bits 243 * 244 * - operand size override is not supported 245 * 246 * - address size override is not supported 247 */ 248static int 249emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 250 mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 251{ 252 int error, size; 253 enum vm_reg_name reg; 254 uint8_t byte; 255 uint64_t val; 256 257 size = 4; 258 error = EINVAL; 259 260 switch (vie->op.op_byte) { 261 case 0x88: 262 /* 263 * MOV byte from reg (ModRM:reg) to mem (ModRM:r/m) 264 * 88/r: mov r/m8, r8 265 * REX + 88/r: mov r/m8, r8 (%ah, %ch, %dh, %bh not available) 266 */ 267 size = 1; 268 error = vie_read_bytereg(vm, vcpuid, vie, &byte); 269 if (error == 0) 270 error = memwrite(vm, vcpuid, gpa, byte, size, arg); 271 break; 272 case 0x89: 273 /* 274 * MOV from reg (ModRM:reg) to mem (ModRM:r/m) 275 * 89/r: mov r/m32, r32 276 * REX.W + 89/r mov r/m64, r64 277 */ 278 if (vie->rex_w) 279 size = 8; 280 reg = gpr_map[vie->reg]; 281 error = vie_read_register(vm, vcpuid, reg, &val); 282 if (error == 0) { 283 val &= size2mask[size]; 284 error = memwrite(vm, vcpuid, gpa, val, size, arg); 285 } 286 break; 287 case 0x8A: 288 case 0x8B: 289 /* 290 * MOV from mem (ModRM:r/m) to reg (ModRM:reg) 291 * 8A/r: mov r/m8, r8 292 * REX + 8A/r: mov r/m8, r8 293 * 8B/r: mov r32, r/m32 294 * REX.W 8B/r: mov r64, r/m64 295 */ 296 if (vie->op.op_byte == 0x8A) 297 size = 1; 298 else if (vie->rex_w) 299 size = 8; 300 error = memread(vm, vcpuid, gpa, &val, size, arg); 301 if (error == 0) { 302 reg = gpr_map[vie->reg]; 303 error = vie_update_register(vm, vcpuid, reg, val, size); 304 } 305 break; 306 case 0xC7: 307 /* 308 * MOV from imm32 to mem (ModRM:r/m) 309 * C7/0 mov r/m32, imm32 310 * REX.W + C7/0 mov r/m64, imm32 (sign-extended to 64-bits) 311 */ 312 val = vie->immediate; /* already sign-extended */ 313 314 if (vie->rex_w) 315 size = 8; 316 317 if (size != 8) 318 val &= size2mask[size]; 319 320 error = memwrite(vm, vcpuid, gpa, val, size, arg); 321 break; 322 default: 323 break; 324 } 325 326 return (error); 327} 328 329/* 330 * The following simplifying assumptions are made during emulation: 331 * 332 * - guest is in 64-bit mode 333 * - default address size is 64-bits 334 * - default operand size is 32-bits 335 * 336 * - operand size override is not supported 337 * 338 * - address size override is not supported 339 */ 340static int 341emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 342 mem_region_read_t memread, mem_region_write_t memwrite, 343 void *arg) 344{ 345 int error, size; 346 enum vm_reg_name reg; 347 uint64_t val; 348 349 size = 4; 350 error = EINVAL; 351 352 switch (vie->op.op_byte) { 353 case 0xB6: 354 /* 355 * MOV and zero extend byte from mem (ModRM:r/m) to 356 * reg (ModRM:reg). 357 * 358 * 0F B6/r movzx r/m8, r32 359 * REX.W + 0F B6/r movzx r/m8, r64 360 */ 361 362 /* get the first operand */ 363 error = memread(vm, vcpuid, gpa, &val, 1, arg); 364 if (error) 365 break; 366 367 /* get the second operand */ 368 reg = gpr_map[vie->reg]; 369 370 if (vie->rex_w) 371 size = 8; 372 373 /* write the result */ 374 error = vie_update_register(vm, vcpuid, reg, val, size); 375 break; 376 case 0xBE: 377 /* 378 * MOV and sign extend byte from mem (ModRM:r/m) to 379 * reg (ModRM:reg). 380 * 381 * 0F BE/r movsx r/m8, r32 382 * REX.W + 0F BE/r movsx r/m8, r64 383 */ 384 385 /* get the first operand */ 386 error = memread(vm, vcpuid, gpa, &val, 1, arg); 387 if (error) 388 break; 389 390 /* get the second operand */ 391 reg = gpr_map[vie->reg]; 392 393 if (vie->rex_w) 394 size = 8; 395 396 /* sign extend byte */ 397 val = (int8_t)val; 398 399 /* write the result */ 400 error = vie_update_register(vm, vcpuid, reg, val, size); 401 break; 402 default: 403 break; 404 } 405 return (error); 406} 407 408static int 409emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 410 mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 411{ 412 int error, size; 413 enum vm_reg_name reg; 414 uint64_t val1, val2; 415 416 size = 4; 417 error = EINVAL; 418 419 switch (vie->op.op_byte) { 420 case 0x23: 421 /* 422 * AND reg (ModRM:reg) and mem (ModRM:r/m) and store the 423 * result in reg. 424 * 425 * 23/r and r32, r/m32 426 * REX.W + 23/r and r64, r/m64 427 */ 428 if (vie->rex_w) 429 size = 8; 430 431 /* get the first operand */ 432 reg = gpr_map[vie->reg]; 433 error = vie_read_register(vm, vcpuid, reg, &val1); 434 if (error) 435 break; 436 437 /* get the second operand */ 438 error = memread(vm, vcpuid, gpa, &val2, size, arg); 439 if (error) 440 break; 441 442 /* perform the operation and write the result */ 443 val1 &= val2; 444 error = vie_update_register(vm, vcpuid, reg, val1, size); 445 break; 446 case 0x81: 447 /* 448 * AND mem (ModRM:r/m) with immediate and store the 449 * result in mem. 450 * 451 * 81/ and r/m32, imm32 452 * REX.W + 81/ and r/m64, imm32 sign-extended to 64 453 * 454 * Currently, only the AND operation of the 0x81 opcode 455 * is implemented (ModRM:reg = b100). 456 */ 457 if ((vie->reg & 7) != 4) 458 break; 459 460 if (vie->rex_w) 461 size = 8; 462 463 /* get the first operand */ 464 error = memread(vm, vcpuid, gpa, &val1, size, arg); 465 if (error) 466 break; 467 468 /* 469 * perform the operation with the pre-fetched immediate 470 * operand and write the result 471 */ 472 val1 &= vie->immediate; 473 error = memwrite(vm, vcpuid, gpa, val1, size, arg); 474 break; 475 default: 476 break; 477 } 478 return (error); 479} 480 481static int 482emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 483 mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 484{ 485 int error, size; 486 uint64_t val1; 487 488 size = 4; 489 error = EINVAL; 490 491 switch (vie->op.op_byte) { 492 case 0x83: 493 /* 494 * OR mem (ModRM:r/m) with immediate and store the 495 * result in mem. 496 * 497 * 83/ OR r/m32, imm8 sign-extended to 32 498 * REX.W + 83/ OR r/m64, imm8 sign-extended to 64 499 * 500 * Currently, only the OR operation of the 0x83 opcode 501 * is implemented (ModRM:reg = b001). 502 */ 503 if ((vie->reg & 7) != 1) 504 break; 505 506 if (vie->rex_w) 507 size = 8; 508 509 /* get the first operand */ 510 error = memread(vm, vcpuid, gpa, &val1, size, arg); 511 if (error) 512 break; 513 514 /* 515 * perform the operation with the pre-fetched immediate 516 * operand and write the result 517 */ 518 val1 |= vie->immediate; 519 error = memwrite(vm, vcpuid, gpa, val1, size, arg); 520 break; 521 default: 522 break; 523 } 524 return (error); 525} 526 527int 528vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 529 mem_region_read_t memread, mem_region_write_t memwrite, 530 void *memarg) 531{ 532 int error; 533 534 if (!vie->decoded) 535 return (EINVAL); 536 537 switch (vie->op.op_type) { 538 case VIE_OP_TYPE_MOV: 539 error = emulate_mov(vm, vcpuid, gpa, vie, 540 memread, memwrite, memarg); 541 break; 542 case VIE_OP_TYPE_MOVSX: 543 case VIE_OP_TYPE_MOVZX: 544 error = emulate_movx(vm, vcpuid, gpa, vie, 545 memread, memwrite, memarg); 546 break; 547 case VIE_OP_TYPE_AND: 548 error = emulate_and(vm, vcpuid, gpa, vie, 549 memread, memwrite, memarg); 550 break; 551 case VIE_OP_TYPE_OR: 552 error = emulate_or(vm, vcpuid, gpa, vie, 553 memread, memwrite, memarg); 554 break; 555 default: 556 error = EINVAL; 557 break; 558 } 559 560 return (error); 561} 562 563#ifdef _KERNEL 564void 565vie_init(struct vie *vie) 566{ 567 568 bzero(vie, sizeof(struct vie)); 569 570 vie->base_register = VM_REG_LAST; 571 vie->index_register = VM_REG_LAST; 572} 573 574static int 575gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys, 576 uint64_t *gpa, enum vie_paging_mode paging_mode) 577{ 578 int nlevels, ptpshift, ptpindex; 579 uint64_t *ptpbase, pte, pgsize; 580 uint32_t *ptpbase32, pte32; 581 void *cookie; 582 583 if (paging_mode == PAGING_MODE_FLAT) { 584 *gpa = gla; 585 return (0); 586 } 587 588 if (paging_mode == PAGING_MODE_32) { 589 nlevels = 2; 590 while (--nlevels >= 0) { 591 /* Zero out the lower 12 bits. */ 592 ptpphys &= ~0xfff; 593 594 ptpbase32 = vm_gpa_hold(vm, ptpphys, PAGE_SIZE, 595 VM_PROT_READ, &cookie); 596 597 if (ptpbase32 == NULL) 598 goto error; 599 600 ptpshift = PAGE_SHIFT + nlevels * 10; 601 ptpindex = (gla >> ptpshift) & 0x3FF; 602 pgsize = 1UL << ptpshift; 603 604 pte32 = ptpbase32[ptpindex]; 605 606 vm_gpa_release(cookie); 607 608 if ((pte32 & PG_V) == 0) 609 goto error; 610 611 if (pte32 & PG_PS) 612 break; 613 614 ptpphys = pte32; 615 } 616 617 /* Zero out the lower 'ptpshift' bits */ 618 pte32 >>= ptpshift; pte32 <<= ptpshift; 619 *gpa = pte32 | (gla & (pgsize - 1)); 620 return (0); 621 } 622 623 if (paging_mode == PAGING_MODE_PAE) { 624 /* Zero out the lower 5 bits and the upper 12 bits */ 625 ptpphys >>= 5; ptpphys <<= 17; ptpphys >>= 12; 626 627 ptpbase = vm_gpa_hold(vm, ptpphys, sizeof(*ptpbase) * 4, 628 VM_PROT_READ, &cookie); 629 if (ptpbase == NULL) 630 goto error; 631 632 ptpindex = (gla >> 30) & 0x3; 633 634 pte = ptpbase[ptpindex]; 635 636 vm_gpa_release(cookie); 637 638 if ((pte & PG_V) == 0) 639 goto error; 640 641 ptpphys = pte; 642 643 nlevels = 2; 644 } else 645 nlevels = 4; 646 while (--nlevels >= 0) { 647 /* Zero out the lower 12 bits and the upper 12 bits */ 648 ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12; 649 650 ptpbase = vm_gpa_hold(vm, ptpphys, PAGE_SIZE, VM_PROT_READ, 651 &cookie); 652 if (ptpbase == NULL) 653 goto error; 654 655 ptpshift = PAGE_SHIFT + nlevels * 9; 656 ptpindex = (gla >> ptpshift) & 0x1FF; 657 pgsize = 1UL << ptpshift; 658 659 pte = ptpbase[ptpindex]; 660 661 vm_gpa_release(cookie); 662 663 if ((pte & PG_V) == 0) 664 goto error; 665 666 if (pte & PG_PS) { 667 if (pgsize > 1 * GB) 668 goto error; 669 else 670 break; 671 } 672 673 ptpphys = pte; 674 } 675 676 /* Zero out the lower 'ptpshift' bits and the upper 12 bits */ 677 pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12; 678 *gpa = pte | (gla & (pgsize - 1)); 679 return (0); 680 681error: 682 return (-1); 683} 684 685int 686vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length, 687 uint64_t cr3, enum vie_paging_mode paging_mode, 688 struct vie *vie) 689{ 690 int n, err, prot; 691 uint64_t gpa, off; 692 void *hpa, *cookie; 693 694 /* 695 * XXX cache previously fetched instructions using 'rip' as the tag 696 */ 697 698 prot = VM_PROT_READ | VM_PROT_EXECUTE; 699 if (inst_length > VIE_INST_SIZE) 700 panic("vmm_fetch_instruction: invalid length %d", inst_length); 701 702 /* Copy the instruction into 'vie' */ 703 while (vie->num_valid < inst_length) { 704 err = gla2gpa(vm, rip, cr3, &gpa, paging_mode); 705 if (err) 706 break; 707 708 off = gpa & PAGE_MASK; 709 n = min(inst_length - vie->num_valid, PAGE_SIZE - off); 710 711 if ((hpa = vm_gpa_hold(vm, gpa, n, prot, &cookie)) == NULL) 712 break; 713 714 bcopy(hpa, &vie->inst[vie->num_valid], n); 715 716 vm_gpa_release(cookie); 717 718 rip += n; 719 vie->num_valid += n; 720 } 721 722 if (vie->num_valid == inst_length) 723 return (0); 724 else 725 return (-1); 726} 727 728static int 729vie_peek(struct vie *vie, uint8_t *x) 730{ 731 732 if (vie->num_processed < vie->num_valid) { 733 *x = vie->inst[vie->num_processed]; 734 return (0); 735 } else 736 return (-1); 737} 738 739static void 740vie_advance(struct vie *vie) 741{ 742 743 vie->num_processed++; 744} 745 746static int 747decode_rex(struct vie *vie) 748{ 749 uint8_t x; 750 751 if (vie_peek(vie, &x)) 752 return (-1); 753 754 if (x >= 0x40 && x <= 0x4F) { 755 vie->rex_present = 1; 756 757 vie->rex_w = x & 0x8 ? 1 : 0; 758 vie->rex_r = x & 0x4 ? 1 : 0; 759 vie->rex_x = x & 0x2 ? 1 : 0; 760 vie->rex_b = x & 0x1 ? 1 : 0; 761 762 vie_advance(vie); 763 } 764 765 return (0); 766} 767 768static int 769decode_two_byte_opcode(struct vie *vie) 770{ 771 uint8_t x; 772 773 if (vie_peek(vie, &x)) 774 return (-1); 775 776 vie->op = two_byte_opcodes[x]; 777 778 if (vie->op.op_type == VIE_OP_TYPE_NONE) 779 return (-1); 780 781 vie_advance(vie); 782 return (0); 783} 784 785static int 786decode_opcode(struct vie *vie) 787{ 788 uint8_t x; 789 790 if (vie_peek(vie, &x)) 791 return (-1); 792 793 vie->op = one_byte_opcodes[x]; 794 795 if (vie->op.op_type == VIE_OP_TYPE_NONE) 796 return (-1); 797 798 vie_advance(vie); 799 800 if (vie->op.op_type == VIE_OP_TYPE_TWO_BYTE) 801 return (decode_two_byte_opcode(vie)); 802 803 return (0); 804} 805 806static int 807decode_modrm(struct vie *vie, enum vie_cpu_mode cpu_mode) 808{ 809 uint8_t x; 810 811 if (vie_peek(vie, &x)) 812 return (-1); 813 814 vie->mod = (x >> 6) & 0x3; 815 vie->rm = (x >> 0) & 0x7; 816 vie->reg = (x >> 3) & 0x7; 817 818 /* 819 * A direct addressing mode makes no sense in the context of an EPT 820 * fault. There has to be a memory access involved to cause the 821 * EPT fault. 822 */ 823 if (vie->mod == VIE_MOD_DIRECT) 824 return (-1); 825 826 if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) || 827 (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) { 828 /* 829 * Table 2-5: Special Cases of REX Encodings 830 * 831 * mod=0, r/m=5 is used in the compatibility mode to 832 * indicate a disp32 without a base register. 833 * 834 * mod!=3, r/m=4 is used in the compatibility mode to 835 * indicate that the SIB byte is present. 836 * 837 * The 'b' bit in the REX prefix is don't care in 838 * this case. 839 */ 840 } else { 841 vie->rm |= (vie->rex_b << 3); 842 } 843 844 vie->reg |= (vie->rex_r << 3); 845 846 /* SIB */ 847 if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB) 848 goto done; 849 850 vie->base_register = gpr_map[vie->rm]; 851 852 switch (vie->mod) { 853 case VIE_MOD_INDIRECT_DISP8: 854 vie->disp_bytes = 1; 855 break; 856 case VIE_MOD_INDIRECT_DISP32: 857 vie->disp_bytes = 4; 858 break; 859 case VIE_MOD_INDIRECT: 860 if (vie->rm == VIE_RM_DISP32) { 861 vie->disp_bytes = 4; 862 /* 863 * Table 2-7. RIP-Relative Addressing 864 * 865 * In 64-bit mode mod=00 r/m=101 implies [rip] + disp32 866 * whereas in compatibility mode it just implies disp32. 867 */ 868 869 if (cpu_mode == CPU_MODE_64BIT) 870 vie->base_register = VM_REG_GUEST_RIP; 871 else 872 vie->base_register = VM_REG_LAST; 873 } 874 break; 875 } 876 877done: 878 vie_advance(vie); 879 880 return (0); 881} 882 883static int 884decode_sib(struct vie *vie) 885{ 886 uint8_t x; 887 888 /* Proceed only if SIB byte is present */ 889 if (vie->mod == VIE_MOD_DIRECT || vie->rm != VIE_RM_SIB) 890 return (0); 891 892 if (vie_peek(vie, &x)) 893 return (-1); 894 895 /* De-construct the SIB byte */ 896 vie->ss = (x >> 6) & 0x3; 897 vie->index = (x >> 3) & 0x7; 898 vie->base = (x >> 0) & 0x7; 899 900 /* Apply the REX prefix modifiers */ 901 vie->index |= vie->rex_x << 3; 902 vie->base |= vie->rex_b << 3; 903 904 switch (vie->mod) { 905 case VIE_MOD_INDIRECT_DISP8: 906 vie->disp_bytes = 1; 907 break; 908 case VIE_MOD_INDIRECT_DISP32: 909 vie->disp_bytes = 4; 910 break; 911 } 912 913 if (vie->mod == VIE_MOD_INDIRECT && 914 (vie->base == 5 || vie->base == 13)) { 915 /* 916 * Special case when base register is unused if mod = 0 917 * and base = %rbp or %r13. 918 * 919 * Documented in: 920 * Table 2-3: 32-bit Addressing Forms with the SIB Byte 921 * Table 2-5: Special Cases of REX Encodings 922 */ 923 vie->disp_bytes = 4; 924 } else { 925 vie->base_register = gpr_map[vie->base]; 926 } 927 928 /* 929 * All encodings of 'index' are valid except for %rsp (4). 930 * 931 * Documented in: 932 * Table 2-3: 32-bit Addressing Forms with the SIB Byte 933 * Table 2-5: Special Cases of REX Encodings 934 */ 935 if (vie->index != 4) 936 vie->index_register = gpr_map[vie->index]; 937 938 /* 'scale' makes sense only in the context of an index register */ 939 if (vie->index_register < VM_REG_LAST) 940 vie->scale = 1 << vie->ss; 941 942 vie_advance(vie); 943 944 return (0); 945} 946 947static int 948decode_displacement(struct vie *vie) 949{ 950 int n, i; 951 uint8_t x; 952 953 union { 954 char buf[4]; 955 int8_t signed8; 956 int32_t signed32; 957 } u; 958 959 if ((n = vie->disp_bytes) == 0) 960 return (0); 961 962 if (n != 1 && n != 4) 963 panic("decode_displacement: invalid disp_bytes %d", n); 964 965 for (i = 0; i < n; i++) { 966 if (vie_peek(vie, &x)) 967 return (-1); 968 969 u.buf[i] = x; 970 vie_advance(vie); 971 } 972 973 if (n == 1) 974 vie->displacement = u.signed8; /* sign-extended */ 975 else 976 vie->displacement = u.signed32; /* sign-extended */ 977 978 return (0); 979} 980 981static int 982decode_immediate(struct vie *vie) 983{ 984 int i, n; 985 uint8_t x; 986 union { 987 char buf[4]; 988 int8_t signed8; 989 int32_t signed32; 990 } u; 991 992 /* Figure out immediate operand size (if any) */ 993 if (vie->op.op_flags & VIE_OP_F_IMM) 994 vie->imm_bytes = 4; 995 else if (vie->op.op_flags & VIE_OP_F_IMM8) 996 vie->imm_bytes = 1; 997 998 if ((n = vie->imm_bytes) == 0) 999 return (0); 1000 1001 if (n != 1 && n != 4) 1002 panic("decode_immediate: invalid imm_bytes %d", n); 1003 1004 for (i = 0; i < n; i++) { 1005 if (vie_peek(vie, &x)) 1006 return (-1); 1007 1008 u.buf[i] = x; 1009 vie_advance(vie); 1010 } 1011 1012 if (n == 1) 1013 vie->immediate = u.signed8; /* sign-extended */ 1014 else 1015 vie->immediate = u.signed32; /* sign-extended */ 1016 1017 return (0); 1018} 1019 1020/* 1021 * Verify that all the bytes in the instruction buffer were consumed. 1022 */ 1023static int 1024verify_inst_length(struct vie *vie) 1025{ 1026 1027 if (vie->num_processed == vie->num_valid) 1028 return (0); 1029 else 1030 return (-1); 1031} 1032 1033/* 1034 * Verify that the 'guest linear address' provided as collateral of the nested 1035 * page table fault matches with our instruction decoding. 1036 */ 1037static int 1038verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie) 1039{ 1040 int error; 1041 uint64_t base, idx; 1042 1043 /* Skip 'gla' verification */ 1044 if (gla == VIE_INVALID_GLA) 1045 return (0); 1046 1047 base = 0; 1048 if (vie->base_register != VM_REG_LAST) { 1049 error = vm_get_register(vm, cpuid, vie->base_register, &base); 1050 if (error) { 1051 printf("verify_gla: error %d getting base reg %d\n", 1052 error, vie->base_register); 1053 return (-1); 1054 } 1055 1056 /* 1057 * RIP-relative addressing starts from the following 1058 * instruction 1059 */ 1060 if (vie->base_register == VM_REG_GUEST_RIP) 1061 base += vie->num_valid; 1062 } 1063 1064 idx = 0; 1065 if (vie->index_register != VM_REG_LAST) { 1066 error = vm_get_register(vm, cpuid, vie->index_register, &idx); 1067 if (error) { 1068 printf("verify_gla: error %d getting index reg %d\n", 1069 error, vie->index_register); 1070 return (-1); 1071 } 1072 } 1073 1074 if (base + vie->scale * idx + vie->displacement != gla) { 1075 printf("verify_gla mismatch: " 1076 "base(0x%0lx), scale(%d), index(0x%0lx), " 1077 "disp(0x%0lx), gla(0x%0lx)\n", 1078 base, vie->scale, idx, vie->displacement, gla); 1079 return (-1); 1080 } 1081 1082 return (0); 1083} 1084 1085int 1086vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, 1087 enum vie_cpu_mode cpu_mode, struct vie *vie) 1088{ 1089 1090 if (cpu_mode == CPU_MODE_64BIT) { 1091 if (decode_rex(vie)) 1092 return (-1); 1093 } 1094 1095 if (decode_opcode(vie)) 1096 return (-1); 1097 1098 if (decode_modrm(vie, cpu_mode)) 1099 return (-1); 1100 1101 if (decode_sib(vie)) 1102 return (-1); 1103 1104 if (decode_displacement(vie)) 1105 return (-1); 1106 1107 if (decode_immediate(vie)) 1108 return (-1); 1109 1110 if (verify_inst_length(vie)) 1111 return (-1); 1112 1113 if (verify_gla(vm, cpuid, gla, vie)) 1114 return (-1); 1115 1116 vie->decoded = 1; /* success */ 1117 1118 return (0); 1119} 1120#endif /* _KERNEL */ 1121