vmm_instruction_emul.c revision 276349
1275970Scy/*- 2275970Scy * Copyright (c) 2012 Sandvine, Inc. 3275970Scy * Copyright (c) 2012 NetApp, Inc. 4275970Scy * All rights reserved. 5275970Scy * 6275970Scy * Redistribution and use in source and binary forms, with or without 7275970Scy * modification, are permitted provided that the following conditions 8275970Scy * are met: 9285612Sdelphij * 1. Redistributions of source code must retain the above copyright 10285612Sdelphij * notice, this list of conditions and the following disclaimer. 11285612Sdelphij * 2. Redistributions in binary form must reproduce the above copyright 12275970Scy * notice, this list of conditions and the following disclaimer in the 13275970Scy * documentation and/or other materials provided with the distribution. 14275970Scy * 15275970Scy * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16275970Scy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17275970Scy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18285612Sdelphij * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19285612Sdelphij * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20285612Sdelphij * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21275970Scy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22275970Scy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23275970Scy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24275970Scy * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25275970Scy * SUCH DAMAGE. 26285612Sdelphij * 27285612Sdelphij * $FreeBSD: stable/10/sys/amd64/vmm/vmm_instruction_emul.c 276349 2014-12-28 21:27:13Z neel $ 28285612Sdelphij */ 29285612Sdelphij 30285612Sdelphij#include <sys/cdefs.h> 31285612Sdelphij__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/vmm_instruction_emul.c 276349 2014-12-28 21:27:13Z neel $"); 32285612Sdelphij 33285612Sdelphij#ifdef _KERNEL 34285612Sdelphij#include <sys/param.h> 35285612Sdelphij#include <sys/pcpu.h> 36285612Sdelphij#include <sys/systm.h> 37285612Sdelphij#include <sys/proc.h> 38285612Sdelphij 39285612Sdelphij#include <vm/vm.h> 40285612Sdelphij#include <vm/pmap.h> 41285612Sdelphij 42285612Sdelphij#include <machine/vmparam.h> 43285612Sdelphij#include <machine/vmm.h> 44285612Sdelphij#else /* !_KERNEL */ 45285612Sdelphij#include <sys/types.h> 46285612Sdelphij#include <sys/errno.h> 47285612Sdelphij#include <sys/_iovec.h> 48285612Sdelphij 49285612Sdelphij#include <machine/vmm.h> 50285612Sdelphij 51285612Sdelphij#include <assert.h> 52285612Sdelphij#include <vmmapi.h> 53285612Sdelphij#define KASSERT(exp,msg) assert((exp)) 54285612Sdelphij#endif /* _KERNEL */ 55285612Sdelphij 56285612Sdelphij#include <machine/vmm_instruction_emul.h> 57285612Sdelphij#include <x86/psl.h> 58285612Sdelphij#include <x86/specialreg.h> 59285612Sdelphij 60285612Sdelphij/* struct vie_op.op_type */ 61285612Sdelphijenum { 62285612Sdelphij VIE_OP_TYPE_NONE = 0, 63285612Sdelphij VIE_OP_TYPE_MOV, 64285612Sdelphij VIE_OP_TYPE_MOVSX, 65285612Sdelphij VIE_OP_TYPE_MOVZX, 66285612Sdelphij VIE_OP_TYPE_AND, 67285612Sdelphij VIE_OP_TYPE_OR, 68285612Sdelphij VIE_OP_TYPE_SUB, 69285612Sdelphij VIE_OP_TYPE_TWO_BYTE, 70285612Sdelphij VIE_OP_TYPE_PUSH, 71285612Sdelphij VIE_OP_TYPE_CMP, 72285612Sdelphij VIE_OP_TYPE_POP, 73285612Sdelphij VIE_OP_TYPE_LAST 74285612Sdelphij}; 75275970Scy 76275970Scy/* struct vie_op.op_flags */ 77285612Sdelphij#define VIE_OP_F_IMM (1 << 0) /* 16/32-bit immediate operand */ 78275970Scy#define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ 79275970Scy#define VIE_OP_F_MOFFSET (1 << 2) /* 16/32/64-bit immediate moffset */ 80275970Scy#define VIE_OP_F_NO_MODRM (1 << 3) 81275970Scy 82275970Scystatic const struct vie_op two_byte_opcodes[256] = { 83275970Scy [0xB6] = { 84285612Sdelphij .op_byte = 0xB6, 85275970Scy .op_type = VIE_OP_TYPE_MOVZX, 86275970Scy }, 87285612Sdelphij [0xB7] = { 88285612Sdelphij .op_byte = 0xB7, 89275970Scy .op_type = VIE_OP_TYPE_MOVZX, 90285612Sdelphij }, 91275970Scy [0xBE] = { 92275970Scy .op_byte = 0xBE, 93275970Scy .op_type = VIE_OP_TYPE_MOVSX, 94285612Sdelphij }, 95285612Sdelphij}; 96285612Sdelphij 97285612Sdelphijstatic const struct vie_op one_byte_opcodes[256] = { 98285612Sdelphij [0x0F] = { 99285612Sdelphij .op_byte = 0x0F, 100285612Sdelphij .op_type = VIE_OP_TYPE_TWO_BYTE 101285612Sdelphij }, 102285612Sdelphij [0x2B] = { 103285612Sdelphij .op_byte = 0x2B, 104285612Sdelphij .op_type = VIE_OP_TYPE_SUB, 105285612Sdelphij }, 106285612Sdelphij [0x3B] = { 107285612Sdelphij .op_byte = 0x3B, 108285612Sdelphij .op_type = VIE_OP_TYPE_CMP, 109285612Sdelphij }, 110285612Sdelphij [0x88] = { 111285612Sdelphij .op_byte = 0x88, 112285612Sdelphij .op_type = VIE_OP_TYPE_MOV, 113285612Sdelphij }, 114285612Sdelphij [0x89] = { 115285612Sdelphij .op_byte = 0x89, 116285612Sdelphij .op_type = VIE_OP_TYPE_MOV, 117285612Sdelphij }, 118285612Sdelphij [0x8A] = { 119285612Sdelphij .op_byte = 0x8A, 120285612Sdelphij .op_type = VIE_OP_TYPE_MOV, 121285612Sdelphij }, 122285612Sdelphij [0x8B] = { 123285612Sdelphij .op_byte = 0x8B, 124275970Scy .op_type = VIE_OP_TYPE_MOV, 125275970Scy }, 126275970Scy [0xA1] = { 127275970Scy .op_byte = 0xA1, 128275970Scy .op_type = VIE_OP_TYPE_MOV, 129275970Scy .op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM, 130275970Scy }, 131275970Scy [0xA3] = { 132285612Sdelphij .op_byte = 0xA3, 133275970Scy .op_type = VIE_OP_TYPE_MOV, 134275970Scy .op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM, 135275970Scy }, 136275970Scy [0xC6] = { 137275970Scy /* XXX Group 11 extended opcode - not just MOV */ 138275970Scy .op_byte = 0xC6, 139275970Scy .op_type = VIE_OP_TYPE_MOV, 140275970Scy .op_flags = VIE_OP_F_IMM8, 141280849Scy }, 142275970Scy [0xC7] = { 143275970Scy .op_byte = 0xC7, 144275970Scy .op_type = VIE_OP_TYPE_MOV, 145275970Scy .op_flags = VIE_OP_F_IMM, 146275970Scy }, 147275970Scy [0x23] = { 148275970Scy .op_byte = 0x23, 149275970Scy .op_type = VIE_OP_TYPE_AND, 150275970Scy }, 151275970Scy [0x81] = { 152275970Scy /* XXX Group 1 extended opcode - not just AND */ 153275970Scy .op_byte = 0x81, 154275970Scy .op_type = VIE_OP_TYPE_AND, 155285612Sdelphij .op_flags = VIE_OP_F_IMM, 156285612Sdelphij }, 157285612Sdelphij [0x83] = { 158285612Sdelphij /* XXX Group 1 extended opcode - not just OR */ 159285612Sdelphij .op_byte = 0x83, 160285612Sdelphij .op_type = VIE_OP_TYPE_OR, 161285612Sdelphij .op_flags = VIE_OP_F_IMM8, 162285612Sdelphij }, 163285612Sdelphij [0x8F] = { 164285612Sdelphij /* XXX Group 1A extended opcode - not just POP */ 165285612Sdelphij .op_byte = 0x8F, 166285612Sdelphij .op_type = VIE_OP_TYPE_POP, 167285612Sdelphij }, 168285612Sdelphij [0xFF] = { 169285612Sdelphij /* XXX Group 5 extended opcode - not just PUSH */ 170285612Sdelphij .op_byte = 0xFF, 171285612Sdelphij .op_type = VIE_OP_TYPE_PUSH, 172285612Sdelphij } 173285612Sdelphij}; 174285612Sdelphij 175285612Sdelphij/* struct vie.mod */ 176285612Sdelphij#define VIE_MOD_INDIRECT 0 177285612Sdelphij#define VIE_MOD_INDIRECT_DISP8 1 178285612Sdelphij#define VIE_MOD_INDIRECT_DISP32 2 179285612Sdelphij#define VIE_MOD_DIRECT 3 180285612Sdelphij 181285612Sdelphij/* struct vie.rm */ 182285612Sdelphij#define VIE_RM_SIB 4 183285612Sdelphij#define VIE_RM_DISP32 5 184285612Sdelphij 185285612Sdelphij#define GB (1024 * 1024 * 1024) 186285612Sdelphij 187285612Sdelphijstatic enum vm_reg_name gpr_map[16] = { 188285612Sdelphij VM_REG_GUEST_RAX, 189285612Sdelphij VM_REG_GUEST_RCX, 190285612Sdelphij VM_REG_GUEST_RDX, 191275970Scy VM_REG_GUEST_RBX, 192275970Scy VM_REG_GUEST_RSP, 193275970Scy VM_REG_GUEST_RBP, 194275970Scy VM_REG_GUEST_RSI, 195275970Scy VM_REG_GUEST_RDI, 196275970Scy VM_REG_GUEST_R8, 197275970Scy VM_REG_GUEST_R9, 198275970Scy VM_REG_GUEST_R10, 199275970Scy VM_REG_GUEST_R11, 200275970Scy VM_REG_GUEST_R12, 201285612Sdelphij VM_REG_GUEST_R13, 202285612Sdelphij VM_REG_GUEST_R14, 203285612Sdelphij VM_REG_GUEST_R15 204285612Sdelphij}; 205285612Sdelphij 206285612Sdelphijstatic uint64_t size2mask[] = { 207285612Sdelphij [1] = 0xff, 208285612Sdelphij [2] = 0xffff, 209285612Sdelphij [4] = 0xffffffff, 210285612Sdelphij [8] = 0xffffffffffffffff, 211285612Sdelphij}; 212285612Sdelphij 213285612Sdelphijstatic int 214275970Scyvie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval) 215285612Sdelphij{ 216285612Sdelphij int error; 217285612Sdelphij 218285612Sdelphij error = vm_get_register(vm, vcpuid, reg, rval); 219285612Sdelphij 220275970Scy return (error); 221275970Scy} 222275970Scy 223275970Scystatic void 224275970Scyvie_calc_bytereg(struct vie *vie, enum vm_reg_name *reg, int *lhbr) 225275970Scy{ 226275970Scy *lhbr = 0; 227275970Scy *reg = gpr_map[vie->reg]; 228275970Scy 229275970Scy /* 230285612Sdelphij * 64-bit mode imposes limitations on accessing legacy high byte 231285612Sdelphij * registers (lhbr). 232285612Sdelphij * 233285612Sdelphij * The legacy high-byte registers cannot be addressed if the REX 234285612Sdelphij * prefix is present. In this case the values 4, 5, 6 and 7 of the 235275970Scy * 'ModRM:reg' field address %spl, %bpl, %sil and %dil respectively. 236275970Scy * 237275970Scy * If the REX prefix is not present then the values 4, 5, 6 and 7 238275970Scy * of the 'ModRM:reg' field address the legacy high-byte registers, 239275970Scy * %ah, %ch, %dh and %bh respectively. 240275970Scy */ 241275970Scy if (!vie->rex_present) { 242275970Scy if (vie->reg & 0x4) { 243275970Scy *lhbr = 1; 244275970Scy *reg = gpr_map[vie->reg & 0x3]; 245275970Scy } 246275970Scy } 247275970Scy} 248275970Scy 249275970Scystatic int 250275970Scyvie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval) 251275970Scy{ 252275970Scy uint64_t val; 253275970Scy int error, lhbr; 254275970Scy enum vm_reg_name reg; 255275970Scy 256275970Scy vie_calc_bytereg(vie, ®, &lhbr); 257275970Scy error = vm_get_register(vm, vcpuid, reg, &val); 258275970Scy 259275970Scy /* 260275970Scy * To obtain the value of a legacy high byte register shift the 261285612Sdelphij * base register right by 8 bits (%ah = %rax >> 8). 262275970Scy */ 263285612Sdelphij if (lhbr) 264285612Sdelphij *rval = val >> 8; 265285612Sdelphij else 266275970Scy *rval = val; 267275970Scy return (error); 268275970Scy} 269275970Scy 270275970Scystatic int 271275970Scyvie_write_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t byte) 272275970Scy{ 273275970Scy uint64_t origval, val, mask; 274275970Scy int error, lhbr; 275275970Scy enum vm_reg_name reg; 276275970Scy 277275970Scy vie_calc_bytereg(vie, ®, &lhbr); 278275970Scy error = vm_get_register(vm, vcpuid, reg, &origval); 279285612Sdelphij if (error == 0) { 280285612Sdelphij val = byte; 281285612Sdelphij mask = 0xff; 282285612Sdelphij if (lhbr) { 283285612Sdelphij /* 284285612Sdelphij * Shift left by 8 to store 'byte' in a legacy high 285285612Sdelphij * byte register. 286285612Sdelphij */ 287285612Sdelphij val <<= 8; 288285612Sdelphij mask <<= 8; 289285612Sdelphij } 290285612Sdelphij val |= origval & ~mask; 291285612Sdelphij error = vm_set_register(vm, vcpuid, reg, val); 292285612Sdelphij } 293285612Sdelphij return (error); 294285612Sdelphij} 295285612Sdelphij 296275970Scyint 297285612Sdelphijvie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, 298275970Scy uint64_t val, int size) 299285612Sdelphij{ 300275970Scy int error; 301275970Scy uint64_t origval; 302275970Scy 303285612Sdelphij switch (size) { 304285612Sdelphij case 1: 305285612Sdelphij case 2: 306285612Sdelphij error = vie_read_register(vm, vcpuid, reg, &origval); 307275970Scy if (error) 308285612Sdelphij return (error); 309285612Sdelphij val &= size2mask[size]; 310285612Sdelphij val |= origval & ~size2mask[size]; 311285612Sdelphij break; 312275970Scy case 4: 313285612Sdelphij val &= 0xffffffffUL; 314285612Sdelphij break; 315285612Sdelphij case 8: 316285612Sdelphij break; 317275970Scy default: 318285612Sdelphij return (EINVAL); 319285612Sdelphij } 320285612Sdelphij 321275970Scy error = vm_set_register(vm, vcpuid, reg, val); 322275970Scy return (error); 323285612Sdelphij} 324285612Sdelphij 325275970Scy#define RFLAGS_STATUS_BITS (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V) 326285612Sdelphij 327285612Sdelphij/* 328275970Scy * Return the status flags that would result from doing (x - y). 329275970Scy */ 330285612Sdelphij#define GETCC(sz) \ 331285612Sdelphijstatic u_long \ 332285612Sdelphijgetcc##sz(uint##sz##_t x, uint##sz##_t y) \ 333275970Scy{ \ 334275970Scy u_long rflags; \ 335275970Scy \ 336275970Scy __asm __volatile("sub %2,%1; pushfq; popq %0" : \ 337275970Scy "=r" (rflags), "+r" (x) : "m" (y)); \ 338275970Scy return (rflags); \ 339275970Scy} struct __hack 340275970Scy 341275970ScyGETCC(8); 342285612SdelphijGETCC(16); 343285612SdelphijGETCC(32); 344285612SdelphijGETCC(64); 345285612Sdelphij 346285612Sdelphijstatic u_long 347285612Sdelphijgetcc(int opsize, uint64_t x, uint64_t y) 348275970Scy{ 349275970Scy KASSERT(opsize == 1 || opsize == 2 || opsize == 4 || opsize == 8, 350275970Scy ("getcc: invalid operand size %d", opsize)); 351275970Scy 352285612Sdelphij if (opsize == 1) 353285612Sdelphij return (getcc8(x, y)); 354285612Sdelphij else if (opsize == 2) 355285612Sdelphij return (getcc16(x, y)); 356275970Scy else if (opsize == 4) 357275970Scy return (getcc32(x, y)); 358285612Sdelphij else 359275970Scy return (getcc64(x, y)); 360275970Scy} 361275970Scy 362275970Scystatic int 363275970Scyemulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 364275970Scy mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 365275970Scy{ 366275970Scy int error, size; 367275970Scy enum vm_reg_name reg; 368275970Scy uint8_t byte; 369275970Scy uint64_t val; 370275970Scy 371275970Scy size = vie->opsize; 372285612Sdelphij error = EINVAL; 373285612Sdelphij 374285612Sdelphij switch (vie->op.op_byte) { 375275970Scy case 0x88: 376275970Scy /* 377275970Scy * MOV byte from reg (ModRM:reg) to mem (ModRM:r/m) 378275970Scy * 88/r: mov r/m8, r8 379275970Scy * REX + 88/r: mov r/m8, r8 (%ah, %ch, %dh, %bh not available) 380285612Sdelphij */ 381285612Sdelphij size = 1; /* override for byte operation */ 382285612Sdelphij error = vie_read_bytereg(vm, vcpuid, vie, &byte); 383285612Sdelphij if (error == 0) 384285612Sdelphij error = memwrite(vm, vcpuid, gpa, byte, size, arg); 385285612Sdelphij break; 386285612Sdelphij case 0x89: 387285612Sdelphij /* 388285612Sdelphij * MOV from reg (ModRM:reg) to mem (ModRM:r/m) 389285612Sdelphij * 89/r: mov r/m16, r16 390285612Sdelphij * 89/r: mov r/m32, r32 391285612Sdelphij * REX.W + 89/r mov r/m64, r64 392285612Sdelphij */ 393275970Scy reg = gpr_map[vie->reg]; 394285612Sdelphij error = vie_read_register(vm, vcpuid, reg, &val); 395285612Sdelphij if (error == 0) { 396275970Scy val &= size2mask[size]; 397285612Sdelphij error = memwrite(vm, vcpuid, gpa, val, size, arg); 398285612Sdelphij } 399285612Sdelphij break; 400285612Sdelphij case 0x8A: 401285612Sdelphij /* 402285612Sdelphij * MOV byte from mem (ModRM:r/m) to reg (ModRM:reg) 403285612Sdelphij * 8A/r: mov r8, r/m8 404285612Sdelphij * REX + 8A/r: mov r8, r/m8 405285612Sdelphij */ 406285612Sdelphij size = 1; /* override for byte operation */ 407285612Sdelphij error = memread(vm, vcpuid, gpa, &val, size, arg); 408285612Sdelphij if (error == 0) 409285612Sdelphij error = vie_write_bytereg(vm, vcpuid, vie, val); 410285612Sdelphij break; 411275970Scy case 0x8B: 412275970Scy /* 413275970Scy * MOV from mem (ModRM:r/m) to reg (ModRM:reg) 414275970Scy * 8B/r: mov r16, r/m16 415275970Scy * 8B/r: mov r32, r/m32 416275970Scy * REX.W 8B/r: mov r64, r/m64 417275970Scy */ 418275970Scy error = memread(vm, vcpuid, gpa, &val, size, arg); 419275970Scy if (error == 0) { 420275970Scy reg = gpr_map[vie->reg]; 421275970Scy error = vie_update_register(vm, vcpuid, reg, val, size); 422275970Scy } 423275970Scy break; 424275970Scy case 0xA1: 425275970Scy /* 426275970Scy * MOV from seg:moffset to AX/EAX/RAX 427275970Scy * A1: mov AX, moffs16 428275970Scy * A1: mov EAX, moffs32 429275970Scy * REX.W + A1: mov RAX, moffs64 430275970Scy */ 431275970Scy error = memread(vm, vcpuid, gpa, &val, size, arg); 432275970Scy if (error == 0) { 433275970Scy reg = VM_REG_GUEST_RAX; 434275970Scy error = vie_update_register(vm, vcpuid, reg, val, size); 435275970Scy } 436275970Scy break; 437285612Sdelphij case 0xA3: 438285612Sdelphij /* 439285612Sdelphij * MOV from AX/EAX/RAX to seg:moffset 440285612Sdelphij * A3: mov moffs16, AX 441275970Scy * A3: mov moffs32, EAX 442275970Scy * REX.W + A3: mov moffs64, RAX 443275970Scy */ 444275970Scy error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val); 445275970Scy if (error == 0) { 446285612Sdelphij val &= size2mask[size]; 447285612Sdelphij error = memwrite(vm, vcpuid, gpa, val, size, arg); 448285612Sdelphij } 449285612Sdelphij break; 450285612Sdelphij case 0xC6: 451285612Sdelphij /* 452275970Scy * MOV from imm8 to mem (ModRM:r/m) 453285612Sdelphij * C6/0 mov r/m8, imm8 454285612Sdelphij * REX + C6/0 mov r/m8, imm8 455275970Scy */ 456275970Scy size = 1; /* override for byte operation */ 457275970Scy error = memwrite(vm, vcpuid, gpa, vie->immediate, size, arg); 458285612Sdelphij break; 459285612Sdelphij case 0xC7: 460285612Sdelphij /* 461285612Sdelphij * MOV from imm16/imm32 to mem (ModRM:r/m) 462285612Sdelphij * C7/0 mov r/m16, imm16 463285612Sdelphij * C7/0 mov r/m32, imm32 464285612Sdelphij * REX.W + C7/0 mov r/m64, imm32 (sign-extended to 64-bits) 465285612Sdelphij */ 466285612Sdelphij val = vie->immediate & size2mask[size]; 467285612Sdelphij error = memwrite(vm, vcpuid, gpa, val, size, arg); 468285612Sdelphij break; 469285612Sdelphij default: 470285612Sdelphij break; 471285612Sdelphij } 472285612Sdelphij 473285612Sdelphij return (error); 474285612Sdelphij} 475285612Sdelphij 476285612Sdelphijstatic int 477285612Sdelphijemulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 478285612Sdelphij mem_region_read_t memread, mem_region_write_t memwrite, 479285612Sdelphij void *arg) 480285612Sdelphij{ 481285612Sdelphij int error, size; 482285612Sdelphij enum vm_reg_name reg; 483285612Sdelphij uint64_t val; 484285612Sdelphij 485285612Sdelphij size = vie->opsize; 486285612Sdelphij error = EINVAL; 487285612Sdelphij 488285612Sdelphij switch (vie->op.op_byte) { 489285612Sdelphij case 0xB6: 490285612Sdelphij /* 491285612Sdelphij * MOV and zero extend byte from mem (ModRM:r/m) to 492285612Sdelphij * reg (ModRM:reg). 493285612Sdelphij * 494285612Sdelphij * 0F B6/r movzx r16, r/m8 495275970Scy * 0F B6/r movzx r32, r/m8 496275970Scy * REX.W + 0F B6/r movzx r64, r/m8 497275970Scy */ 498275970Scy 499275970Scy /* get the first operand */ 500275970Scy error = memread(vm, vcpuid, gpa, &val, 1, arg); 501275970Scy if (error) 502285612Sdelphij break; 503285612Sdelphij 504285612Sdelphij /* get the second operand */ 505275970Scy reg = gpr_map[vie->reg]; 506275970Scy 507275970Scy /* zero-extend byte */ 508285612Sdelphij val = (uint8_t)val; 509285612Sdelphij 510285612Sdelphij /* write the result */ 511275970Scy error = vie_update_register(vm, vcpuid, reg, val, size); 512285612Sdelphij break; 513285612Sdelphij case 0xB7: 514285612Sdelphij /* 515285612Sdelphij * MOV and zero extend word from mem (ModRM:r/m) to 516285612Sdelphij * reg (ModRM:reg). 517285612Sdelphij * 518285612Sdelphij * 0F B7/r movzx r32, r/m16 519285612Sdelphij * REX.W + 0F B7/r movzx r64, r/m16 520285612Sdelphij */ 521285612Sdelphij error = memread(vm, vcpuid, gpa, &val, 2, arg); 522285612Sdelphij if (error) 523285612Sdelphij return (error); 524285612Sdelphij 525285612Sdelphij reg = gpr_map[vie->reg]; 526285612Sdelphij 527285612Sdelphij /* zero-extend word */ 528285612Sdelphij val = (uint16_t)val; 529285612Sdelphij 530285612Sdelphij error = vie_update_register(vm, vcpuid, reg, val, size); 531285612Sdelphij break; 532285612Sdelphij case 0xBE: 533285612Sdelphij /* 534285612Sdelphij * MOV and sign extend byte from mem (ModRM:r/m) to 535285612Sdelphij * reg (ModRM:reg). 536285612Sdelphij * 537285612Sdelphij * 0F BE/r movsx r16, r/m8 538285612Sdelphij * 0F BE/r movsx r32, r/m8 539285612Sdelphij * REX.W + 0F BE/r movsx r64, r/m8 540285612Sdelphij */ 541285612Sdelphij 542285612Sdelphij /* get the first operand */ 543285612Sdelphij error = memread(vm, vcpuid, gpa, &val, 1, arg); 544285612Sdelphij if (error) 545285612Sdelphij break; 546285612Sdelphij 547285612Sdelphij /* get the second operand */ 548285612Sdelphij reg = gpr_map[vie->reg]; 549285612Sdelphij 550275970Scy /* sign extend byte */ 551275970Scy val = (int8_t)val; 552285612Sdelphij 553275970Scy /* write the result */ 554275970Scy error = vie_update_register(vm, vcpuid, reg, val, size); 555275970Scy break; 556275970Scy default: 557275970Scy break; 558275970Scy } 559275970Scy return (error); 560275970Scy} 561275970Scy 562285612Sdelphijstatic int 563285612Sdelphijemulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 564285612Sdelphij mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 565285612Sdelphij{ 566275970Scy int error, size; 567285612Sdelphij enum vm_reg_name reg; 568285612Sdelphij uint64_t result, rflags, rflags2, val1, val2; 569285612Sdelphij 570285612Sdelphij size = vie->opsize; 571285612Sdelphij error = EINVAL; 572275970Scy 573285612Sdelphij switch (vie->op.op_byte) { 574275970Scy case 0x23: 575275970Scy /* 576275970Scy * AND reg (ModRM:reg) and mem (ModRM:r/m) and store the 577285612Sdelphij * result in reg. 578285612Sdelphij * 579285612Sdelphij * 23/r and r16, r/m16 580285612Sdelphij * 23/r and r32, r/m32 581285612Sdelphij * REX.W + 23/r and r64, r/m64 582285612Sdelphij */ 583285612Sdelphij 584275970Scy /* get the first operand */ 585275970Scy reg = gpr_map[vie->reg]; 586275970Scy error = vie_read_register(vm, vcpuid, reg, &val1); 587275970Scy if (error) 588275970Scy break; 589275970Scy 590285612Sdelphij /* get the second operand */ 591285612Sdelphij error = memread(vm, vcpuid, gpa, &val2, size, arg); 592285612Sdelphij if (error) 593285612Sdelphij break; 594285612Sdelphij 595275970Scy /* perform the operation and write the result */ 596275970Scy result = val1 & val2; 597275970Scy error = vie_update_register(vm, vcpuid, reg, result, size); 598275970Scy break; 599275970Scy case 0x81: 600275970Scy /* 601275970Scy * AND/OR mem (ModRM:r/m) with immediate and store the 602275970Scy * result in mem. 603275970Scy * 604275970Scy * AND: i = 4 605275970Scy * OR: i = 1 606275970Scy * 81 /i op r/m16, imm16 607275970Scy * 81 /i op r/m32, imm32 608275970Scy * REX.W + 81 /i op r/m64, imm32 sign-extended to 64 609285612Sdelphij * 610275970Scy */ 611275970Scy 612275970Scy /* get the first operand */ 613285612Sdelphij error = memread(vm, vcpuid, gpa, &val1, size, arg); 614285612Sdelphij if (error) 615285612Sdelphij break; 616285612Sdelphij 617285612Sdelphij /* 618285612Sdelphij * perform the operation with the pre-fetched immediate 619285612Sdelphij * operand and write the result 620285612Sdelphij */ 621285612Sdelphij switch (vie->reg & 7) { 622285612Sdelphij case 0x4: 623285612Sdelphij /* modrm:reg == b100, AND */ 624285612Sdelphij result = val1 & vie->immediate; 625285612Sdelphij break; 626285612Sdelphij case 0x1: 627275970Scy /* modrm:reg == b001, OR */ 628285612Sdelphij result = val1 | vie->immediate; 629285612Sdelphij break; 630285612Sdelphij default: 631285612Sdelphij error = EINVAL; 632285612Sdelphij break; 633285612Sdelphij } 634285612Sdelphij if (error) 635285612Sdelphij break; 636285612Sdelphij 637285612Sdelphij error = memwrite(vm, vcpuid, gpa, result, size, arg); 638285612Sdelphij break; 639285612Sdelphij default: 640275970Scy break; 641275970Scy } 642275970Scy if (error) 643275970Scy return (error); 644275970Scy 645275970Scy error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); 646275970Scy if (error) 647275970Scy return (error); 648275970Scy 649275970Scy /* 650275970Scy * OF and CF are cleared; the SF, ZF and PF flags are set according 651275970Scy * to the result; AF is undefined. 652275970Scy * 653275970Scy * The updated status flags are obtained by subtracting 0 from 'result'. 654285612Sdelphij */ 655275970Scy rflags2 = getcc(size, result, 0); 656275970Scy rflags &= ~RFLAGS_STATUS_BITS; 657275970Scy rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N); 658275970Scy 659285612Sdelphij error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); 660285612Sdelphij return (error); 661285612Sdelphij} 662285612Sdelphij 663285612Sdelphijstatic int 664275970Scyemulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 665275970Scy mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 666275970Scy{ 667275970Scy int error, size; 668275970Scy uint64_t val1, result, rflags, rflags2; 669275970Scy 670275970Scy size = vie->opsize; 671275970Scy error = EINVAL; 672275970Scy 673275970Scy switch (vie->op.op_byte) { 674275970Scy case 0x83: 675275970Scy /* 676275970Scy * OR mem (ModRM:r/m) with immediate and store the 677275970Scy * result in mem. 678275970Scy * 679275970Scy * 83 /1 OR r/m16, imm8 sign-extended to 16 680275970Scy * 83 /1 OR r/m32, imm8 sign-extended to 32 681275970Scy * REX.W + 83/1 OR r/m64, imm8 sign-extended to 64 682275970Scy * 683275970Scy * Currently, only the OR operation of the 0x83 opcode 684275970Scy * is implemented (ModRM:reg = b001). 685285612Sdelphij */ 686275970Scy if ((vie->reg & 7) != 1) 687275970Scy break; 688275970Scy 689275970Scy /* get the first operand */ 690275970Scy error = memread(vm, vcpuid, gpa, &val1, size, arg); 691275970Scy if (error) 692275970Scy break; 693275970Scy 694275970Scy /* 695275970Scy * perform the operation with the pre-fetched immediate 696275970Scy * operand and write the result 697275970Scy */ 698275970Scy result = val1 | vie->immediate; 699275970Scy error = memwrite(vm, vcpuid, gpa, result, size, arg); 700275970Scy break; 701285612Sdelphij default: 702285612Sdelphij break; 703285612Sdelphij } 704285612Sdelphij if (error) 705275970Scy return (error); 706275970Scy 707275970Scy error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); 708275970Scy if (error) 709285612Sdelphij return (error); 710275970Scy 711275970Scy /* 712285612Sdelphij * OF and CF are cleared; the SF, ZF and PF flags are set according 713285612Sdelphij * to the result; AF is undefined. 714285612Sdelphij * 715285612Sdelphij * The updated status flags are obtained by subtracting 0 from 'result'. 716285612Sdelphij */ 717275970Scy rflags2 = getcc(size, result, 0); 718285612Sdelphij rflags &= ~RFLAGS_STATUS_BITS; 719275970Scy rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N); 720285612Sdelphij 721275970Scy error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); 722285612Sdelphij return (error); 723275970Scy} 724275970Scy 725275970Scystatic int 726275970Scyemulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 727285612Sdelphij mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 728285612Sdelphij{ 729285612Sdelphij int error, size; 730285612Sdelphij uint64_t op1, op2, rflags, rflags2; 731285612Sdelphij enum vm_reg_name reg; 732285612Sdelphij 733275970Scy size = vie->opsize; 734275970Scy switch (vie->op.op_byte) { 735285612Sdelphij case 0x3B: 736285612Sdelphij /* 737285612Sdelphij * 3B/r CMP r16, r/m16 738285612Sdelphij * 3B/r CMP r32, r/m32 739285612Sdelphij * REX.W + 3B/r CMP r64, r/m64 740285612Sdelphij * 741285612Sdelphij * Compare first operand (reg) with second operand (r/m) and 742275970Scy * set status flags in EFLAGS register. The comparison is 743275970Scy * performed by subtracting the second operand from the first 744285612Sdelphij * operand and then setting the status flags. 745285612Sdelphij */ 746285612Sdelphij 747285612Sdelphij /* Get the first operand */ 748285612Sdelphij reg = gpr_map[vie->reg]; 749285612Sdelphij error = vie_read_register(vm, vcpuid, reg, &op1); 750285612Sdelphij if (error) 751285612Sdelphij return (error); 752285612Sdelphij 753285612Sdelphij /* Get the second operand */ 754285612Sdelphij error = memread(vm, vcpuid, gpa, &op2, size, arg); 755285612Sdelphij if (error) 756285612Sdelphij return (error); 757285612Sdelphij 758285612Sdelphij break; 759285612Sdelphij default: 760285612Sdelphij return (EINVAL); 761285612Sdelphij } 762285612Sdelphij rflags2 = getcc(size, op1, op2); 763285612Sdelphij error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); 764285612Sdelphij if (error) 765285612Sdelphij return (error); 766285612Sdelphij rflags &= ~RFLAGS_STATUS_BITS; 767285612Sdelphij rflags |= rflags2 & RFLAGS_STATUS_BITS; 768285612Sdelphij 769285612Sdelphij error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); 770285612Sdelphij return (error); 771285612Sdelphij} 772285612Sdelphij 773285612Sdelphijstatic int 774285612Sdelphijemulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 775285612Sdelphij mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 776285612Sdelphij{ 777275970Scy int error, size; 778275970Scy uint64_t nval, rflags, rflags2, val1, val2; 779275970Scy enum vm_reg_name reg; 780275970Scy 781275970Scy size = vie->opsize; 782275970Scy error = EINVAL; 783275970Scy 784275970Scy switch (vie->op.op_byte) { 785275970Scy case 0x2B: 786275970Scy /* 787275970Scy * SUB r/m from r and store the result in r 788275970Scy * 789285612Sdelphij * 2B/r SUB r16, r/m16 790285612Sdelphij * 2B/r SUB r32, r/m32 791285612Sdelphij * REX.W + 2B/r SUB r64, r/m64 792285612Sdelphij */ 793285612Sdelphij 794285612Sdelphij /* get the first operand */ 795285612Sdelphij reg = gpr_map[vie->reg]; 796285612Sdelphij error = vie_read_register(vm, vcpuid, reg, &val1); 797275970Scy if (error) 798285612Sdelphij break; 799285612Sdelphij 800285612Sdelphij /* get the second operand */ 801285612Sdelphij error = memread(vm, vcpuid, gpa, &val2, size, arg); 802285612Sdelphij if (error) 803285612Sdelphij break; 804285612Sdelphij 805285612Sdelphij /* perform the operation and write the result */ 806275970Scy nval = val1 - val2; 807275970Scy error = vie_update_register(vm, vcpuid, reg, nval, size); 808275970Scy break; 809285612Sdelphij default: 810285612Sdelphij break; 811285612Sdelphij } 812285612Sdelphij 813275970Scy if (!error) { 814285612Sdelphij rflags2 = getcc(size, val1, val2); 815275970Scy error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, 816275970Scy &rflags); 817275970Scy if (error) 818275970Scy return (error); 819275970Scy 820275970Scy rflags &= ~RFLAGS_STATUS_BITS; 821275970Scy rflags |= rflags2 & RFLAGS_STATUS_BITS; 822275970Scy error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, 823275970Scy rflags, 8); 824275970Scy } 825275970Scy 826275970Scy return (error); 827275970Scy} 828275970Scy 829275970Scystatic int 830285612Sdelphijemulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, 831285612Sdelphij struct vm_guest_paging *paging, mem_region_read_t memread, 832285612Sdelphij mem_region_write_t memwrite, void *arg) 833275970Scy{ 834275970Scy#ifdef _KERNEL 835285612Sdelphij struct vm_copyinfo copyinfo[2]; 836285612Sdelphij#else 837285612Sdelphij struct iovec copyinfo[2]; 838285612Sdelphij#endif 839285612Sdelphij struct seg_desc ss_desc; 840285612Sdelphij uint64_t cr0, rflags, rsp, stack_gla, val; 841275970Scy int error, size, stackaddrsize, pushop; 842275970Scy 843275970Scy val = 0; 844275970Scy size = vie->opsize; 845275970Scy pushop = (vie->op.op_type == VIE_OP_TYPE_PUSH) ? 1 : 0; 846275970Scy 847275970Scy /* 848275970Scy * From "Address-Size Attributes for Stack Accesses", Intel SDL, Vol 1 849275970Scy */ 850275970Scy if (paging->cpu_mode == CPU_MODE_REAL) { 851275970Scy stackaddrsize = 2; 852275970Scy } else if (paging->cpu_mode == CPU_MODE_64BIT) { 853275970Scy /* 854275970Scy * "Stack Manipulation Instructions in 64-bit Mode", SDM, Vol 3 855275970Scy * - Stack pointer size is always 64-bits. 856275970Scy * - PUSH/POP of 32-bit values is not possible in 64-bit mode. 857275970Scy * - 16-bit PUSH/POP is supported by using the operand size 858275970Scy * override prefix (66H). 859285612Sdelphij */ 860275970Scy stackaddrsize = 8; 861285612Sdelphij size = vie->opsize_override ? 2 : 8; 862285612Sdelphij } else { 863285612Sdelphij /* 864285612Sdelphij * In protected or compability mode the 'B' flag in the 865285612Sdelphij * stack-segment descriptor determines the size of the 866285612Sdelphij * stack pointer. 867285612Sdelphij */ 868285612Sdelphij error = vm_get_seg_desc(vm, vcpuid, VM_REG_GUEST_SS, &ss_desc); 869285612Sdelphij KASSERT(error == 0, ("%s: error %d getting SS descriptor", 870285612Sdelphij __func__, error)); 871285612Sdelphij if (SEG_DESC_DEF32(ss_desc.access)) 872285612Sdelphij stackaddrsize = 4; 873285612Sdelphij else 874285612Sdelphij stackaddrsize = 2; 875285612Sdelphij } 876275970Scy 877285612Sdelphij error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0); 878275970Scy KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error)); 879275970Scy 880285612Sdelphij error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); 881285612Sdelphij KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); 882285612Sdelphij 883285612Sdelphij error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp); 884285612Sdelphij KASSERT(error == 0, ("%s: error %d getting rsp", __func__, error)); 885285612Sdelphij if (pushop) { 886285612Sdelphij rsp -= size; 887285612Sdelphij } 888285612Sdelphij 889285612Sdelphij if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &ss_desc, 890285612Sdelphij rsp, size, stackaddrsize, pushop ? PROT_WRITE : PROT_READ, 891285612Sdelphij &stack_gla)) { 892285612Sdelphij vm_inject_ss(vm, vcpuid, 0); 893285612Sdelphij return (0); 894275970Scy } 895285612Sdelphij 896285612Sdelphij if (vie_canonical_check(paging->cpu_mode, stack_gla)) { 897285612Sdelphij vm_inject_ss(vm, vcpuid, 0); 898285612Sdelphij return (0); 899285612Sdelphij } 900285612Sdelphij 901285612Sdelphij if (vie_alignment_check(paging->cpl, size, cr0, rflags, stack_gla)) { 902285612Sdelphij vm_inject_ac(vm, vcpuid, 0); 903285612Sdelphij return (0); 904285612Sdelphij } 905285612Sdelphij 906285612Sdelphij error = vm_copy_setup(vm, vcpuid, paging, stack_gla, size, 907285612Sdelphij pushop ? PROT_WRITE : PROT_READ, copyinfo, nitems(copyinfo)); 908285612Sdelphij if (error == -1) { 909285612Sdelphij /* 910285612Sdelphij * XXX cannot return a negative error value here because it 911285612Sdelphij * ends up being the return value of the VM_RUN() ioctl and 912285612Sdelphij * is interpreted as a pseudo-error (for e.g. ERESTART). 913285612Sdelphij */ 914285612Sdelphij return (EFAULT); 915285612Sdelphij } else if (error == 1) { 916285612Sdelphij /* Resume guest execution to handle page fault */ 917285612Sdelphij return (0); 918285612Sdelphij } 919285612Sdelphij 920285612Sdelphij if (pushop) { 921285612Sdelphij error = memread(vm, vcpuid, mmio_gpa, &val, size, arg); 922285612Sdelphij if (error == 0) 923285612Sdelphij vm_copyout(vm, vcpuid, &val, copyinfo, size); 924285612Sdelphij } else { 925285612Sdelphij vm_copyin(vm, vcpuid, copyinfo, &val, size); 926285612Sdelphij error = memwrite(vm, vcpuid, mmio_gpa, val, size, arg); 927285612Sdelphij rsp += size; 928285612Sdelphij } 929285612Sdelphij#ifdef _KERNEL 930285612Sdelphij vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); 931285612Sdelphij#endif 932285612Sdelphij 933285612Sdelphij if (error == 0) { 934285612Sdelphij error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSP, rsp, 935285612Sdelphij stackaddrsize); 936285612Sdelphij KASSERT(error == 0, ("error %d updating rsp", error)); 937285612Sdelphij } 938285612Sdelphij return (error); 939285612Sdelphij} 940285612Sdelphij 941285612Sdelphijstatic int 942285612Sdelphijemulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, 943285612Sdelphij struct vm_guest_paging *paging, mem_region_read_t memread, 944285612Sdelphij mem_region_write_t memwrite, void *arg) 945285612Sdelphij{ 946285612Sdelphij int error; 947285612Sdelphij 948285612Sdelphij /* 949285612Sdelphij * Table A-6, "Opcode Extensions", Intel SDM, Vol 2. 950285612Sdelphij * 951285612Sdelphij * PUSH is part of the group 5 extended opcodes and is identified 952310419Sdelphij * by ModRM:reg = b110. 953285612Sdelphij */ 954285612Sdelphij if ((vie->reg & 7) != 6) 955285612Sdelphij return (EINVAL); 956285612Sdelphij 957285612Sdelphij error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread, 958285612Sdelphij memwrite, arg); 959285612Sdelphij return (error); 960285612Sdelphij} 961285612Sdelphij 962285612Sdelphijstatic int 963285612Sdelphijemulate_pop(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, 964285612Sdelphij struct vm_guest_paging *paging, mem_region_read_t memread, 965285612Sdelphij mem_region_write_t memwrite, void *arg) 966285612Sdelphij{ 967285612Sdelphij int error; 968285612Sdelphij 969285612Sdelphij /* 970285612Sdelphij * Table A-6, "Opcode Extensions", Intel SDM, Vol 2. 971285612Sdelphij * 972285612Sdelphij * POP is part of the group 1A extended opcodes and is identified 973285612Sdelphij * by ModRM:reg = b000. 974285612Sdelphij */ 975285612Sdelphij if ((vie->reg & 7) != 0) 976285612Sdelphij return (EINVAL); 977285612Sdelphij 978285612Sdelphij error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread, 979285612Sdelphij memwrite, arg); 980285612Sdelphij return (error); 981285612Sdelphij} 982285612Sdelphij 983285612Sdelphijint 984285612Sdelphijvmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 985285612Sdelphij struct vm_guest_paging *paging, mem_region_read_t memread, 986285612Sdelphij mem_region_write_t memwrite, void *memarg) 987285612Sdelphij{ 988285612Sdelphij int error; 989285612Sdelphij 990285612Sdelphij if (!vie->decoded) 991285612Sdelphij return (EINVAL); 992285612Sdelphij 993285612Sdelphij switch (vie->op.op_type) { 994285612Sdelphij case VIE_OP_TYPE_POP: 995285612Sdelphij error = emulate_pop(vm, vcpuid, gpa, vie, paging, memread, 996285612Sdelphij memwrite, memarg); 997285612Sdelphij break; 998285612Sdelphij case VIE_OP_TYPE_PUSH: 999285612Sdelphij error = emulate_push(vm, vcpuid, gpa, vie, paging, memread, 1000285612Sdelphij memwrite, memarg); 1001285612Sdelphij break; 1002285612Sdelphij case VIE_OP_TYPE_CMP: 1003285612Sdelphij error = emulate_cmp(vm, vcpuid, gpa, vie, 1004285612Sdelphij memread, memwrite, memarg); 1005285612Sdelphij break; 1006285612Sdelphij case VIE_OP_TYPE_MOV: 1007285612Sdelphij error = emulate_mov(vm, vcpuid, gpa, vie, 1008285612Sdelphij memread, memwrite, memarg); 1009285612Sdelphij break; 1010285612Sdelphij case VIE_OP_TYPE_MOVSX: 1011285612Sdelphij case VIE_OP_TYPE_MOVZX: 1012285612Sdelphij error = emulate_movx(vm, vcpuid, gpa, vie, 1013285612Sdelphij memread, memwrite, memarg); 1014285612Sdelphij break; 1015285612Sdelphij case VIE_OP_TYPE_AND: 1016285612Sdelphij error = emulate_and(vm, vcpuid, gpa, vie, 1017285612Sdelphij memread, memwrite, memarg); 1018285612Sdelphij break; 1019285612Sdelphij case VIE_OP_TYPE_OR: 1020285612Sdelphij error = emulate_or(vm, vcpuid, gpa, vie, 1021285612Sdelphij memread, memwrite, memarg); 1022285612Sdelphij break; 1023285612Sdelphij case VIE_OP_TYPE_SUB: 1024285612Sdelphij error = emulate_sub(vm, vcpuid, gpa, vie, 1025285612Sdelphij memread, memwrite, memarg); 1026285612Sdelphij break; 1027285612Sdelphij default: 1028285612Sdelphij error = EINVAL; 1029285612Sdelphij break; 1030285612Sdelphij } 1031285612Sdelphij 1032285612Sdelphij return (error); 1033285612Sdelphij} 1034285612Sdelphij 1035285612Sdelphijint 1036285612Sdelphijvie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla) 1037285612Sdelphij{ 1038285612Sdelphij KASSERT(size == 1 || size == 2 || size == 4 || size == 8, 1039285612Sdelphij ("%s: invalid size %d", __func__, size)); 1040285612Sdelphij KASSERT(cpl >= 0 && cpl <= 3, ("%s: invalid cpl %d", __func__, cpl)); 1041285612Sdelphij 1042285612Sdelphij if (cpl != 3 || (cr0 & CR0_AM) == 0 || (rf & PSL_AC) == 0) 1043285612Sdelphij return (0); 1044285612Sdelphij 1045285612Sdelphij return ((gla & (size - 1)) ? 1 : 0); 1046285612Sdelphij} 1047285612Sdelphij 1048285612Sdelphijint 1049285612Sdelphijvie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla) 1050285612Sdelphij{ 1051285612Sdelphij uint64_t mask; 1052285612Sdelphij 1053285612Sdelphij if (cpu_mode != CPU_MODE_64BIT) 1054285612Sdelphij return (0); 1055285612Sdelphij 1056285612Sdelphij /* 1057285612Sdelphij * The value of the bit 47 in the 'gla' should be replicated in the 1058285612Sdelphij * most significant 16 bits. 1059285612Sdelphij */ 1060285612Sdelphij mask = ~((1UL << 48) - 1); 1061285612Sdelphij if (gla & (1UL << 47)) 1062285612Sdelphij return ((gla & mask) != mask); 1063275970Scy else 1064275970Scy return ((gla & mask) != 0); 1065275970Scy} 1066285612Sdelphij 1067285612Sdelphijuint64_t 1068285612Sdelphijvie_size2mask(int size) 1069285612Sdelphij{ 1070285612Sdelphij KASSERT(size == 1 || size == 2 || size == 4 || size == 8, 1071285612Sdelphij ("vie_size2mask: invalid size %d", size)); 1072285612Sdelphij return (size2mask[size]); 1073285612Sdelphij} 1074285612Sdelphij 1075285612Sdelphijint 1076285612Sdelphijvie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, 1077285612Sdelphij struct seg_desc *desc, uint64_t offset, int length, int addrsize, 1078285612Sdelphij int prot, uint64_t *gla) 1079275970Scy{ 1080285612Sdelphij uint64_t firstoff, low_limit, high_limit, segbase; 1081285612Sdelphij int glasize, type; 1082275970Scy 1083285612Sdelphij KASSERT(seg >= VM_REG_GUEST_ES && seg <= VM_REG_GUEST_GS, 1084285612Sdelphij ("%s: invalid segment %d", __func__, seg)); 1085275970Scy KASSERT(length == 1 || length == 2 || length == 4 || length == 8, 1086285612Sdelphij ("%s: invalid operand size %d", __func__, length)); 1087285612Sdelphij KASSERT((prot & ~(PROT_READ | PROT_WRITE)) == 0, 1088285612Sdelphij ("%s: invalid prot %#x", __func__, prot)); 1089285612Sdelphij 1090285612Sdelphij firstoff = offset; 1091285612Sdelphij if (cpu_mode == CPU_MODE_64BIT) { 1092285612Sdelphij KASSERT(addrsize == 4 || addrsize == 8, ("%s: invalid address " 1093285612Sdelphij "size %d for cpu_mode %d", __func__, addrsize, cpu_mode)); 1094285612Sdelphij glasize = 8; 1095285612Sdelphij } else { 1096285612Sdelphij KASSERT(addrsize == 2 || addrsize == 4, ("%s: invalid address " 1097285612Sdelphij "size %d for cpu mode %d", __func__, addrsize, cpu_mode)); 1098285612Sdelphij glasize = 4; 1099285612Sdelphij /* 1100285612Sdelphij * If the segment selector is loaded with a NULL selector 1101275970Scy * then the descriptor is unusable and attempting to use 1102285612Sdelphij * it results in a #GP(0). 1103285612Sdelphij */ 1104285612Sdelphij if (SEG_DESC_UNUSABLE(desc->access)) 1105289999Sglebius return (-1); 1106285612Sdelphij 1107289999Sglebius /* 1108285612Sdelphij * The processor generates a #NP exception when a segment 1109285612Sdelphij * register is loaded with a selector that points to a 1110285612Sdelphij * descriptor that is not present. If this was the case then 1111285612Sdelphij * it would have been checked before the VM-exit. 1112285612Sdelphij */ 1113285612Sdelphij KASSERT(SEG_DESC_PRESENT(desc->access), 1114285612Sdelphij ("segment %d not present: %#x", seg, desc->access)); 1115285612Sdelphij 1116285612Sdelphij /* 1117285612Sdelphij * The descriptor type must indicate a code/data segment. 1118285612Sdelphij */ 1119285612Sdelphij type = SEG_DESC_TYPE(desc->access); 1120285612Sdelphij KASSERT(type >= 16 && type <= 31, ("segment %d has invalid " 1121285612Sdelphij "descriptor type %#x", seg, type)); 1122285612Sdelphij 1123285612Sdelphij if (prot & PROT_READ) { 1124285612Sdelphij /* #GP on a read access to a exec-only code segment */ 1125285612Sdelphij if ((type & 0xA) == 0x8) 1126285612Sdelphij return (-1); 1127285612Sdelphij } 1128285612Sdelphij 1129285612Sdelphij if (prot & PROT_WRITE) { 1130285612Sdelphij /* 1131285612Sdelphij * #GP on a write access to a code segment or a 1132275970Scy * read-only data segment. 1133275970Scy */ 1134275970Scy if (type & 0x8) /* code segment */ 1135275970Scy return (-1); 1136275970Scy 1137275970Scy if ((type & 0xA) == 0) /* read-only data seg */ 1138275970Scy return (-1); 1139294904Sdelphij } 1140285612Sdelphij 1141285612Sdelphij /* 1142285612Sdelphij * 'desc->limit' is fully expanded taking granularity into 1143285612Sdelphij * account. 1144285612Sdelphij */ 1145285612Sdelphij if ((type & 0xC) == 0x4) { 1146285612Sdelphij /* expand-down data segment */ 1147285612Sdelphij low_limit = desc->limit + 1; 1148285612Sdelphij high_limit = SEG_DESC_DEF32(desc->access) ? 1149285612Sdelphij 0xffffffff : 0xffff; 1150285612Sdelphij } else { 1151285612Sdelphij /* code segment or expand-up data segment */ 1152285612Sdelphij low_limit = 0; 1153285612Sdelphij high_limit = desc->limit; 1154285612Sdelphij } 1155285612Sdelphij 1156285612Sdelphij while (length > 0) { 1157285612Sdelphij offset &= vie_size2mask(addrsize); 1158275970Scy if (offset < low_limit || offset > high_limit) 1159285612Sdelphij return (-1); 1160285612Sdelphij offset++; 1161285612Sdelphij length--; 1162285612Sdelphij } 1163285612Sdelphij } 1164294904Sdelphij 1165294904Sdelphij /* 1166294904Sdelphij * In 64-bit mode all segments except %fs and %gs have a segment 1167294904Sdelphij * base address of 0. 1168285612Sdelphij */ 1169285612Sdelphij if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS && 1170275970Scy seg != VM_REG_GUEST_GS) { 1171275970Scy segbase = 0; 1172285612Sdelphij } else { 1173275970Scy segbase = desc->base; 1174275970Scy } 1175275970Scy 1176275970Scy /* 1177285612Sdelphij * Truncate 'firstoff' to the effective address size before adding 1178285612Sdelphij * it to the segment base. 1179285612Sdelphij */ 1180285612Sdelphij firstoff &= vie_size2mask(addrsize); 1181275970Scy *gla = (segbase + firstoff) & vie_size2mask(glasize); 1182275970Scy return (0); 1183275970Scy} 1184285612Sdelphij 1185285612Sdelphij#ifdef _KERNEL 1186275970Scyvoid 1187285612Sdelphijvie_init(struct vie *vie) 1188285612Sdelphij{ 1189285612Sdelphij 1190285612Sdelphij bzero(vie, sizeof(struct vie)); 1191285612Sdelphij 1192285612Sdelphij vie->base_register = VM_REG_LAST; 1193285612Sdelphij vie->index_register = VM_REG_LAST; 1194285612Sdelphij} 1195294904Sdelphij 1196275970Scystatic int 1197285612Sdelphijpf_error_code(int usermode, int prot, int rsvd, uint64_t pte) 1198285612Sdelphij{ 1199285612Sdelphij int error_code = 0; 1200285612Sdelphij 1201285612Sdelphij if (pte & PG_V) 1202285612Sdelphij error_code |= PGEX_P; 1203275970Scy if (prot & VM_PROT_WRITE) 1204275970Scy error_code |= PGEX_W; 1205275970Scy if (usermode) 1206275970Scy error_code |= PGEX_U; 1207275970Scy if (rsvd) 1208275970Scy error_code |= PGEX_RSV; 1209275970Scy if (prot & VM_PROT_EXECUTE) 1210285612Sdelphij error_code |= PGEX_I; 1211285612Sdelphij 1212285612Sdelphij return (error_code); 1213285612Sdelphij} 1214285612Sdelphij 1215285612Sdelphijstatic void 1216285612Sdelphijptp_release(void **cookie) 1217285612Sdelphij{ 1218285612Sdelphij if (*cookie != NULL) { 1219285612Sdelphij vm_gpa_release(*cookie); 1220285612Sdelphij *cookie = NULL; 1221285612Sdelphij } 1222285612Sdelphij} 1223285612Sdelphij 1224285612Sdelphijstatic void * 1225285612Sdelphijptp_hold(struct vm *vm, vm_paddr_t ptpphys, size_t len, void **cookie) 1226285612Sdelphij{ 1227285612Sdelphij void *ptr; 1228285612Sdelphij 1229285612Sdelphij ptp_release(cookie); 1230285612Sdelphij ptr = vm_gpa_hold(vm, ptpphys, len, VM_PROT_RW, cookie); 1231285612Sdelphij return (ptr); 1232285612Sdelphij} 1233285612Sdelphij 1234285612Sdelphijint 1235285612Sdelphijvmm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, 1236285612Sdelphij uint64_t gla, int prot, uint64_t *gpa) 1237285612Sdelphij{ 1238285612Sdelphij int nlevels, pfcode, ptpshift, ptpindex, retval, usermode, writable; 1239285612Sdelphij u_int retries; 1240285612Sdelphij uint64_t *ptpbase, ptpphys, pte, pgsize; 1241285612Sdelphij uint32_t *ptpbase32, pte32; 1242285612Sdelphij void *cookie; 1243275970Scy 1244275970Scy usermode = (paging->cpl == 3 ? 1 : 0); 1245275970Scy writable = prot & VM_PROT_WRITE; 1246275970Scy cookie = NULL; 1247275970Scy retval = 0; 1248285612Sdelphij retries = 0; 1249285612Sdelphijrestart: 1250285612Sdelphij ptpphys = paging->cr3; /* root of the page tables */ 1251275970Scy ptp_release(&cookie); 1252275970Scy if (retries++ > 0) 1253275970Scy maybe_yield(); 1254275970Scy 1255275970Scy if (vie_canonical_check(paging->cpu_mode, gla)) { 1256275970Scy /* 1257275970Scy * XXX assuming a non-stack reference otherwise a stack fault 1258275970Scy * should be generated. 1259275970Scy */ 1260275970Scy vm_inject_gp(vm, vcpuid); 1261285612Sdelphij goto fault; 1262285612Sdelphij } 1263285612Sdelphij 1264285612Sdelphij if (paging->paging_mode == PAGING_MODE_FLAT) { 1265275970Scy *gpa = gla; 1266275970Scy goto done; 1267275970Scy } 1268275970Scy 1269275970Scy if (paging->paging_mode == PAGING_MODE_32) { 1270275970Scy nlevels = 2; 1271275970Scy while (--nlevels >= 0) { 1272275970Scy /* Zero out the lower 12 bits. */ 1273275970Scy ptpphys &= ~0xfff; 1274275970Scy 1275285612Sdelphij ptpbase32 = ptp_hold(vm, ptpphys, PAGE_SIZE, &cookie); 1276285612Sdelphij 1277285612Sdelphij if (ptpbase32 == NULL) 1278275970Scy goto error; 1279285612Sdelphij 1280285612Sdelphij ptpshift = PAGE_SHIFT + nlevels * 10; 1281285612Sdelphij ptpindex = (gla >> ptpshift) & 0x3FF; 1282285612Sdelphij pgsize = 1UL << ptpshift; 1283285612Sdelphij 1284285612Sdelphij pte32 = ptpbase32[ptpindex]; 1285285612Sdelphij 1286285612Sdelphij if ((pte32 & PG_V) == 0 || 1287275970Scy (usermode && (pte32 & PG_U) == 0) || 1288275970Scy (writable && (pte32 & PG_RW) == 0)) { 1289275970Scy pfcode = pf_error_code(usermode, prot, 0, 1290275970Scy pte32); 1291275970Scy vm_inject_pf(vm, vcpuid, pfcode, gla); 1292275970Scy goto fault; 1293275970Scy } 1294275970Scy 1295275970Scy /* 1296275970Scy * Emulate the x86 MMU's management of the accessed 1297285612Sdelphij * and dirty flags. While the accessed flag is set 1298285612Sdelphij * at every level of the page table, the dirty flag 1299285612Sdelphij * is only set at the last level providing the guest 1300285612Sdelphij * physical address. 1301285612Sdelphij */ 1302285612Sdelphij if ((pte32 & PG_A) == 0) { 1303285612Sdelphij if (atomic_cmpset_32(&ptpbase32[ptpindex], 1304285612Sdelphij pte32, pte32 | PG_A) == 0) { 1305285612Sdelphij goto restart; 1306285612Sdelphij } 1307285612Sdelphij } 1308275970Scy 1309275970Scy /* XXX must be ignored if CR4.PSE=0 */ 1310275970Scy if (nlevels > 0 && (pte32 & PG_PS) != 0) 1311285612Sdelphij break; 1312275970Scy 1313275970Scy ptpphys = pte32; 1314275970Scy } 1315275970Scy 1316275970Scy /* Set the dirty bit in the page table entry if necessary */ 1317275970Scy if (writable && (pte32 & PG_M) == 0) { 1318285612Sdelphij if (atomic_cmpset_32(&ptpbase32[ptpindex], 1319285612Sdelphij pte32, pte32 | PG_M) == 0) { 1320285612Sdelphij goto restart; 1321275970Scy } 1322285612Sdelphij } 1323285612Sdelphij 1324285612Sdelphij /* Zero out the lower 'ptpshift' bits */ 1325285612Sdelphij pte32 >>= ptpshift; pte32 <<= ptpshift; 1326285612Sdelphij *gpa = pte32 | (gla & (pgsize - 1)); 1327285612Sdelphij goto done; 1328285612Sdelphij } 1329285612Sdelphij 1330275970Scy if (paging->paging_mode == PAGING_MODE_PAE) { 1331275970Scy /* Zero out the lower 5 bits and the upper 32 bits */ 1332285612Sdelphij ptpphys &= 0xffffffe0UL; 1333275970Scy 1334275970Scy ptpbase = ptp_hold(vm, ptpphys, sizeof(*ptpbase) * 4, &cookie); 1335275970Scy if (ptpbase == NULL) 1336275970Scy goto error; 1337275970Scy 1338275970Scy ptpindex = (gla >> 30) & 0x3; 1339275970Scy 1340275970Scy pte = ptpbase[ptpindex]; 1341285612Sdelphij 1342285612Sdelphij if ((pte & PG_V) == 0) { 1343285612Sdelphij pfcode = pf_error_code(usermode, prot, 0, pte); 1344285612Sdelphij vm_inject_pf(vm, vcpuid, pfcode, gla); 1345285612Sdelphij goto fault; 1346285612Sdelphij } 1347285612Sdelphij 1348285612Sdelphij ptpphys = pte; 1349285612Sdelphij 1350285612Sdelphij nlevels = 2; 1351285612Sdelphij } else 1352275970Scy nlevels = 4; 1353275970Scy while (--nlevels >= 0) { 1354275970Scy /* Zero out the lower 12 bits and the upper 12 bits */ 1355275970Scy ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12; 1356275970Scy 1357275970Scy ptpbase = ptp_hold(vm, ptpphys, PAGE_SIZE, &cookie); 1358275970Scy if (ptpbase == NULL) 1359285612Sdelphij goto error; 1360285612Sdelphij 1361275970Scy ptpshift = PAGE_SHIFT + nlevels * 9; 1362275970Scy ptpindex = (gla >> ptpshift) & 0x1FF; 1363275970Scy pgsize = 1UL << ptpshift; 1364275970Scy 1365275970Scy pte = ptpbase[ptpindex]; 1366285612Sdelphij 1367285612Sdelphij if ((pte & PG_V) == 0 || 1368285612Sdelphij (usermode && (pte & PG_U) == 0) || 1369275970Scy (writable && (pte & PG_RW) == 0)) { 1370285612Sdelphij pfcode = pf_error_code(usermode, prot, 0, pte); 1371275970Scy vm_inject_pf(vm, vcpuid, pfcode, gla); 1372285612Sdelphij goto fault; 1373285612Sdelphij } 1374285612Sdelphij 1375275970Scy /* Set the accessed bit in the page table entry */ 1376285612Sdelphij if ((pte & PG_A) == 0) { 1377285612Sdelphij if (atomic_cmpset_64(&ptpbase[ptpindex], 1378275970Scy pte, pte | PG_A) == 0) { 1379285612Sdelphij goto restart; 1380275970Scy } 1381275970Scy } 1382275970Scy 1383275970Scy if (nlevels > 0 && (pte & PG_PS) != 0) { 1384275970Scy if (pgsize > 1 * GB) { 1385275970Scy pfcode = pf_error_code(usermode, prot, 1, pte); 1386275970Scy vm_inject_pf(vm, vcpuid, pfcode, gla); 1387275970Scy goto fault; 1388275970Scy } 1389285612Sdelphij break; 1390285612Sdelphij } 1391285612Sdelphij 1392285612Sdelphij ptpphys = pte; 1393285612Sdelphij } 1394285612Sdelphij 1395285612Sdelphij /* Set the dirty bit in the page table entry if necessary */ 1396285612Sdelphij if (writable && (pte & PG_M) == 0) { 1397285612Sdelphij if (atomic_cmpset_64(&ptpbase[ptpindex], pte, pte | PG_M) == 0) 1398285612Sdelphij goto restart; 1399275970Scy } 1400285612Sdelphij 1401285612Sdelphij /* Zero out the lower 'ptpshift' bits and the upper 12 bits */ 1402285612Sdelphij pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12; 1403285612Sdelphij *gpa = pte | (gla & (pgsize - 1)); 1404285612Sdelphijdone: 1405285612Sdelphij ptp_release(&cookie); 1406285612Sdelphij return (retval); 1407285612Sdelphijerror: 1408285612Sdelphij retval = -1; 1409285612Sdelphij goto done; 1410285612Sdelphijfault: 1411275970Scy retval = 1; 1412275970Scy goto done; 1413275970Scy} 1414275970Scy 1415275970Scyint 1416275970Scyvmm_fetch_instruction(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, 1417275970Scy uint64_t rip, int inst_length, struct vie *vie) 1418275970Scy{ 1419275970Scy struct vm_copyinfo copyinfo[2]; 1420275970Scy int error, prot; 1421275970Scy 1422275970Scy if (inst_length > VIE_INST_SIZE) 1423275970Scy panic("vmm_fetch_instruction: invalid length %d", inst_length); 1424275970Scy 1425275970Scy prot = PROT_READ | PROT_EXEC; 1426285612Sdelphij error = vm_copy_setup(vm, vcpuid, paging, rip, inst_length, prot, 1427285612Sdelphij copyinfo, nitems(copyinfo)); 1428285612Sdelphij if (error == 0) { 1429285612Sdelphij vm_copyin(vm, vcpuid, copyinfo, vie->inst, inst_length); 1430285612Sdelphij vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); 1431285612Sdelphij vie->num_valid = inst_length; 1432285612Sdelphij } 1433285612Sdelphij return (error); 1434285612Sdelphij} 1435285612Sdelphij 1436285612Sdelphijstatic int 1437285612Sdelphijvie_peek(struct vie *vie, uint8_t *x) 1438285612Sdelphij{ 1439275970Scy 1440275970Scy if (vie->num_processed < vie->num_valid) { 1441275970Scy *x = vie->inst[vie->num_processed]; 1442275970Scy return (0); 1443275970Scy } else 1444275970Scy return (-1); 1445275970Scy} 1446275970Scy 1447275970Scystatic void 1448275970Scyvie_advance(struct vie *vie) 1449275970Scy{ 1450275970Scy 1451275970Scy vie->num_processed++; 1452275970Scy} 1453275970Scy 1454275970Scystatic int 1455275970Scydecode_prefixes(struct vie *vie, enum vm_cpu_mode cpu_mode, int cs_d) 1456285612Sdelphij{ 1457275970Scy uint8_t x; 1458275970Scy 1459275970Scy while (1) { 1460285612Sdelphij if (vie_peek(vie, &x)) 1461275970Scy return (-1); 1462285612Sdelphij 1463275970Scy if (x == 0x66) 1464285612Sdelphij vie->opsize_override = 1; 1465285612Sdelphij else if (x == 0x67) 1466285612Sdelphij vie->addrsize_override = 1; 1467275970Scy else 1468285612Sdelphij break; 1469285612Sdelphij 1470285612Sdelphij vie_advance(vie); 1471285612Sdelphij } 1472285612Sdelphij 1473285612Sdelphij /* 1474275970Scy * From section 2.2.1, "REX Prefixes", Intel SDM Vol 2: 1475285612Sdelphij * - Only one REX prefix is allowed per instruction. 1476275970Scy * - The REX prefix must immediately precede the opcode byte or the 1477275970Scy * escape opcode byte. 1478285612Sdelphij * - If an instruction has a mandatory prefix (0x66, 0xF2 or 0xF3) 1479285612Sdelphij * the mandatory prefix must come before the REX prefix. 1480285612Sdelphij */ 1481275970Scy if (cpu_mode == CPU_MODE_64BIT && x >= 0x40 && x <= 0x4F) { 1482285612Sdelphij vie->rex_present = 1; 1483285612Sdelphij vie->rex_w = x & 0x8 ? 1 : 0; 1484285612Sdelphij vie->rex_r = x & 0x4 ? 1 : 0; 1485285612Sdelphij vie->rex_x = x & 0x2 ? 1 : 0; 1486275970Scy vie->rex_b = x & 0x1 ? 1 : 0; 1487285612Sdelphij vie_advance(vie); 1488285612Sdelphij } 1489275970Scy 1490285612Sdelphij /* 1491275970Scy * Section "Operand-Size And Address-Size Attributes", Intel SDM, Vol 1 1492285612Sdelphij */ 1493285612Sdelphij if (cpu_mode == CPU_MODE_64BIT) { 1494285612Sdelphij /* 1495285612Sdelphij * Default address size is 64-bits and default operand size 1496285612Sdelphij * is 32-bits. 1497285612Sdelphij */ 1498275970Scy vie->addrsize = vie->addrsize_override ? 4 : 8; 1499275970Scy if (vie->rex_w) 1500275970Scy vie->opsize = 8; 1501275970Scy else if (vie->opsize_override) 1502275970Scy vie->opsize = 2; 1503275970Scy else 1504275970Scy vie->opsize = 4; 1505285612Sdelphij } else if (cs_d) { 1506285612Sdelphij /* Default address and operand sizes are 32-bits */ 1507285612Sdelphij vie->addrsize = vie->addrsize_override ? 2 : 4; 1508294904Sdelphij vie->opsize = vie->opsize_override ? 2 : 4; 1509294904Sdelphij } else { 1510294904Sdelphij /* Default address and operand sizes are 16-bits */ 1511294904Sdelphij vie->addrsize = vie->addrsize_override ? 4 : 2; 1512294904Sdelphij vie->opsize = vie->opsize_override ? 4 : 2; 1513275970Scy } 1514294904Sdelphij return (0); 1515294904Sdelphij} 1516275970Scy 1517275970Scystatic int 1518285612Sdelphijdecode_two_byte_opcode(struct vie *vie) 1519285612Sdelphij{ 1520285612Sdelphij uint8_t x; 1521275970Scy 1522275970Scy if (vie_peek(vie, &x)) 1523275970Scy return (-1); 1524285612Sdelphij 1525285612Sdelphij vie->op = two_byte_opcodes[x]; 1526275970Scy 1527275970Scy if (vie->op.op_type == VIE_OP_TYPE_NONE) 1528275970Scy return (-1); 1529275970Scy 1530275970Scy vie_advance(vie); 1531275970Scy return (0); 1532275970Scy} 1533275970Scy 1534275970Scystatic int 1535275970Scydecode_opcode(struct vie *vie) 1536275970Scy{ 1537275970Scy uint8_t x; 1538275970Scy 1539275970Scy if (vie_peek(vie, &x)) 1540275970Scy return (-1); 1541275970Scy 1542285612Sdelphij vie->op = one_byte_opcodes[x]; 1543280849Scy 1544275970Scy if (vie->op.op_type == VIE_OP_TYPE_NONE) 1545275970Scy return (-1); 1546275970Scy 1547275970Scy vie_advance(vie); 1548285612Sdelphij 1549285612Sdelphij if (vie->op.op_type == VIE_OP_TYPE_TWO_BYTE) 1550275970Scy return (decode_two_byte_opcode(vie)); 1551285612Sdelphij 1552285612Sdelphij return (0); 1553275970Scy} 1554285612Sdelphij 1555285612Sdelphijstatic int 1556285612Sdelphijdecode_modrm(struct vie *vie, enum vm_cpu_mode cpu_mode) 1557285612Sdelphij{ 1558285612Sdelphij uint8_t x; 1559285612Sdelphij 1560275970Scy if (cpu_mode == CPU_MODE_REAL) 1561275970Scy return (-1); 1562285612Sdelphij 1563275970Scy if (vie->op.op_flags & VIE_OP_F_NO_MODRM) 1564285612Sdelphij return (0); 1565285612Sdelphij 1566285612Sdelphij if (vie_peek(vie, &x)) 1567285612Sdelphij return (-1); 1568285612Sdelphij 1569285612Sdelphij vie->mod = (x >> 6) & 0x3; 1570285612Sdelphij vie->rm = (x >> 0) & 0x7; 1571285612Sdelphij vie->reg = (x >> 3) & 0x7; 1572285612Sdelphij 1573285612Sdelphij /* 1574285612Sdelphij * A direct addressing mode makes no sense in the context of an EPT 1575285612Sdelphij * fault. There has to be a memory access involved to cause the 1576285612Sdelphij * EPT fault. 1577285612Sdelphij */ 1578285612Sdelphij if (vie->mod == VIE_MOD_DIRECT) 1579275970Scy return (-1); 1580285612Sdelphij 1581285612Sdelphij if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) || 1582285612Sdelphij (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) { 1583285612Sdelphij /* 1584285612Sdelphij * Table 2-5: Special Cases of REX Encodings 1585285612Sdelphij * 1586285612Sdelphij * mod=0, r/m=5 is used in the compatibility mode to 1587285612Sdelphij * indicate a disp32 without a base register. 1588285612Sdelphij * 1589285612Sdelphij * mod!=3, r/m=4 is used in the compatibility mode to 1590285612Sdelphij * indicate that the SIB byte is present. 1591285612Sdelphij * 1592275970Scy * The 'b' bit in the REX prefix is don't care in 1593285612Sdelphij * this case. 1594275970Scy */ 1595285612Sdelphij } else { 1596285612Sdelphij vie->rm |= (vie->rex_b << 3); 1597285612Sdelphij } 1598285612Sdelphij 1599285612Sdelphij vie->reg |= (vie->rex_r << 3); 1600275970Scy 1601285612Sdelphij /* SIB */ 1602285612Sdelphij if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB) 1603285612Sdelphij goto done; 1604285612Sdelphij 1605285612Sdelphij vie->base_register = gpr_map[vie->rm]; 1606285612Sdelphij 1607285612Sdelphij switch (vie->mod) { 1608275970Scy case VIE_MOD_INDIRECT_DISP8: 1609275970Scy vie->disp_bytes = 1; 1610275970Scy break; 1611275970Scy case VIE_MOD_INDIRECT_DISP32: 1612275970Scy vie->disp_bytes = 4; 1613275970Scy break; 1614275970Scy case VIE_MOD_INDIRECT: 1615275970Scy if (vie->rm == VIE_RM_DISP32) { 1616275970Scy vie->disp_bytes = 4; 1617275970Scy /* 1618275970Scy * Table 2-7. RIP-Relative Addressing 1619275970Scy * 1620275970Scy * In 64-bit mode mod=00 r/m=101 implies [rip] + disp32 1621285612Sdelphij * whereas in compatibility mode it just implies disp32. 1622275970Scy */ 1623285612Sdelphij 1624275970Scy if (cpu_mode == CPU_MODE_64BIT) 1625285612Sdelphij vie->base_register = VM_REG_GUEST_RIP; 1626285612Sdelphij else 1627285612Sdelphij vie->base_register = VM_REG_LAST; 1628285612Sdelphij } 1629285612Sdelphij break; 1630285612Sdelphij } 1631275970Scy 1632285612Sdelphijdone: 1633285612Sdelphij vie_advance(vie); 1634285612Sdelphij 1635285612Sdelphij return (0); 1636285612Sdelphij} 1637285612Sdelphij 1638285612Sdelphijstatic int 1639285612Sdelphijdecode_sib(struct vie *vie) 1640285612Sdelphij{ 1641285612Sdelphij uint8_t x; 1642285612Sdelphij 1643285612Sdelphij /* Proceed only if SIB byte is present */ 1644285612Sdelphij if (vie->mod == VIE_MOD_DIRECT || vie->rm != VIE_RM_SIB) 1645285612Sdelphij return (0); 1646285612Sdelphij 1647285612Sdelphij if (vie_peek(vie, &x)) 1648285612Sdelphij return (-1); 1649285612Sdelphij 1650285612Sdelphij /* De-construct the SIB byte */ 1651275970Scy vie->ss = (x >> 6) & 0x3; 1652285612Sdelphij vie->index = (x >> 3) & 0x7; 1653285612Sdelphij vie->base = (x >> 0) & 0x7; 1654285612Sdelphij 1655285612Sdelphij /* Apply the REX prefix modifiers */ 1656285612Sdelphij vie->index |= vie->rex_x << 3; 1657285612Sdelphij vie->base |= vie->rex_b << 3; 1658285612Sdelphij 1659285612Sdelphij switch (vie->mod) { 1660285612Sdelphij case VIE_MOD_INDIRECT_DISP8: 1661285612Sdelphij vie->disp_bytes = 1; 1662285612Sdelphij break; 1663285612Sdelphij case VIE_MOD_INDIRECT_DISP32: 1664285612Sdelphij vie->disp_bytes = 4; 1665285612Sdelphij break; 1666285612Sdelphij } 1667285612Sdelphij 1668275970Scy if (vie->mod == VIE_MOD_INDIRECT && 1669275970Scy (vie->base == 5 || vie->base == 13)) { 1670275970Scy /* 1671275970Scy * Special case when base register is unused if mod = 0 1672285612Sdelphij * and base = %rbp or %r13. 1673285612Sdelphij * 1674285612Sdelphij * Documented in: 1675285612Sdelphij * Table 2-3: 32-bit Addressing Forms with the SIB Byte 1676285612Sdelphij * Table 2-5: Special Cases of REX Encodings 1677285612Sdelphij */ 1678285612Sdelphij vie->disp_bytes = 4; 1679285612Sdelphij } else { 1680275970Scy vie->base_register = gpr_map[vie->base]; 1681285612Sdelphij } 1682285612Sdelphij 1683275970Scy /* 1684275970Scy * All encodings of 'index' are valid except for %rsp (4). 1685275970Scy * 1686285612Sdelphij * Documented in: 1687285612Sdelphij * Table 2-3: 32-bit Addressing Forms with the SIB Byte 1688285612Sdelphij * Table 2-5: Special Cases of REX Encodings 1689275970Scy */ 1690275970Scy if (vie->index != 4) 1691275970Scy vie->index_register = gpr_map[vie->index]; 1692275970Scy 1693275970Scy /* 'scale' makes sense only in the context of an index register */ 1694285612Sdelphij if (vie->index_register < VM_REG_LAST) 1695285612Sdelphij vie->scale = 1 << vie->ss; 1696285612Sdelphij 1697285612Sdelphij vie_advance(vie); 1698285612Sdelphij 1699285612Sdelphij return (0); 1700285612Sdelphij} 1701285612Sdelphij 1702285612Sdelphijstatic int 1703285612Sdelphijdecode_displacement(struct vie *vie) 1704285612Sdelphij{ 1705285612Sdelphij int n, i; 1706285612Sdelphij uint8_t x; 1707285612Sdelphij 1708285612Sdelphij union { 1709285612Sdelphij char buf[4]; 1710285612Sdelphij int8_t signed8; 1711285612Sdelphij int32_t signed32; 1712285612Sdelphij } u; 1713285612Sdelphij 1714285612Sdelphij if ((n = vie->disp_bytes) == 0) 1715285612Sdelphij return (0); 1716285612Sdelphij 1717285612Sdelphij if (n != 1 && n != 4) 1718285612Sdelphij panic("decode_displacement: invalid disp_bytes %d", n); 1719285612Sdelphij 1720285612Sdelphij for (i = 0; i < n; i++) { 1721285612Sdelphij if (vie_peek(vie, &x)) 1722285612Sdelphij return (-1); 1723285612Sdelphij 1724285612Sdelphij u.buf[i] = x; 1725285612Sdelphij vie_advance(vie); 1726285612Sdelphij } 1727285612Sdelphij 1728285612Sdelphij if (n == 1) 1729285612Sdelphij vie->displacement = u.signed8; /* sign-extended */ 1730285612Sdelphij else 1731285612Sdelphij vie->displacement = u.signed32; /* sign-extended */ 1732285612Sdelphij 1733285612Sdelphij return (0); 1734285612Sdelphij} 1735285612Sdelphij 1736285612Sdelphijstatic int 1737285612Sdelphijdecode_immediate(struct vie *vie) 1738275970Scy{ 1739275970Scy int i, n; 1740275970Scy uint8_t x; 1741275970Scy union { 1742275970Scy char buf[4]; 1743275970Scy int8_t signed8; 1744275970Scy int16_t signed16; 1745275970Scy int32_t signed32; 1746275970Scy } u; 1747285612Sdelphij 1748285612Sdelphij /* Figure out immediate operand size (if any) */ 1749285612Sdelphij if (vie->op.op_flags & VIE_OP_F_IMM) { 1750275970Scy /* 1751285612Sdelphij * Section 2.2.1.5 "Immediates", Intel SDM: 1752285612Sdelphij * In 64-bit mode the typical size of immediate operands 1753285612Sdelphij * remains 32-bits. When the operand size if 64-bits, the 1754285612Sdelphij * processor sign-extends all immediates to 64-bits prior 1755285612Sdelphij * to their use. 1756275970Scy */ 1757285612Sdelphij if (vie->opsize == 4 || vie->opsize == 8) 1758285612Sdelphij vie->imm_bytes = 4; 1759275970Scy else 1760285612Sdelphij vie->imm_bytes = 2; 1761285612Sdelphij } else if (vie->op.op_flags & VIE_OP_F_IMM8) { 1762285612Sdelphij vie->imm_bytes = 1; 1763285612Sdelphij } 1764285612Sdelphij 1765275970Scy if ((n = vie->imm_bytes) == 0) 1766285612Sdelphij return (0); 1767285612Sdelphij 1768275970Scy KASSERT(n == 1 || n == 2 || n == 4, 1769285612Sdelphij ("%s: invalid number of immediate bytes: %d", __func__, n)); 1770285612Sdelphij 1771285612Sdelphij for (i = 0; i < n; i++) { 1772285612Sdelphij if (vie_peek(vie, &x)) 1773285612Sdelphij return (-1); 1774275970Scy 1775285612Sdelphij u.buf[i] = x; 1776275970Scy vie_advance(vie); 1777275970Scy } 1778285612Sdelphij 1779275970Scy /* sign-extend the immediate value before use */ 1780285612Sdelphij if (n == 1) 1781285612Sdelphij vie->immediate = u.signed8; 1782285612Sdelphij else if (n == 2) 1783285612Sdelphij vie->immediate = u.signed16; 1784275970Scy else 1785285612Sdelphij vie->immediate = u.signed32; 1786285612Sdelphij 1787285612Sdelphij return (0); 1788285612Sdelphij} 1789285612Sdelphij 1790285612Sdelphijstatic int 1791285612Sdelphijdecode_moffset(struct vie *vie) 1792285612Sdelphij{ 1793285612Sdelphij int i, n; 1794285612Sdelphij uint8_t x; 1795285612Sdelphij union { 1796285612Sdelphij char buf[8]; 1797285612Sdelphij uint64_t u64; 1798275970Scy } u; 1799285612Sdelphij 1800285612Sdelphij if ((vie->op.op_flags & VIE_OP_F_MOFFSET) == 0) 1801285612Sdelphij return (0); 1802285612Sdelphij 1803285612Sdelphij /* 1804285612Sdelphij * Section 2.2.1.4, "Direct Memory-Offset MOVs", Intel SDM: 1805285612Sdelphij * The memory offset size follows the address-size of the instruction. 1806285612Sdelphij */ 1807285612Sdelphij n = vie->addrsize; 1808285612Sdelphij KASSERT(n == 2 || n == 4 || n == 8, ("invalid moffset bytes: %d", n)); 1809285612Sdelphij 1810285612Sdelphij u.u64 = 0; 1811285612Sdelphij for (i = 0; i < n; i++) { 1812285612Sdelphij if (vie_peek(vie, &x)) 1813285612Sdelphij return (-1); 1814285612Sdelphij 1815275970Scy u.buf[i] = x; 1816275970Scy vie_advance(vie); 1817275970Scy } 1818275970Scy vie->displacement = u.u64; 1819275970Scy return (0); 1820275970Scy} 1821275970Scy 1822275970Scy/* 1823275970Scy * Verify that all the bytes in the instruction buffer were consumed. 1824275970Scy */ 1825275970Scystatic int 1826285612Sdelphijverify_inst_length(struct vie *vie) 1827285612Sdelphij{ 1828285612Sdelphij 1829285612Sdelphij if (vie->num_processed == vie->num_valid) 1830285612Sdelphij return (0); 1831285612Sdelphij else 1832285612Sdelphij return (-1); 1833285612Sdelphij} 1834275970Scy 1835285612Sdelphij/* 1836285612Sdelphij * Verify that the 'guest linear address' provided as collateral of the nested 1837275970Scy * page table fault matches with our instruction decoding. 1838275970Scy */ 1839275970Scystatic int 1840285612Sdelphijverify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie) 1841275970Scy{ 1842275970Scy int error; 1843275970Scy uint64_t base, idx, gla2; 1844275970Scy 1845275970Scy /* Skip 'gla' verification */ 1846275970Scy if (gla == VIE_INVALID_GLA) 1847275970Scy return (0); 1848275970Scy 1849275970Scy base = 0; 1850275970Scy if (vie->base_register != VM_REG_LAST) { 1851275970Scy error = vm_get_register(vm, cpuid, vie->base_register, &base); 1852275970Scy if (error) { 1853275970Scy printf("verify_gla: error %d getting base reg %d\n", 1854275970Scy error, vie->base_register); 1855275970Scy return (-1); 1856275970Scy } 1857275970Scy 1858275970Scy /* 1859275970Scy * RIP-relative addressing starts from the following 1860275970Scy * instruction 1861275970Scy */ 1862275970Scy if (vie->base_register == VM_REG_GUEST_RIP) 1863275970Scy base += vie->num_valid; 1864275970Scy } 1865275970Scy 1866275970Scy idx = 0; 1867275970Scy if (vie->index_register != VM_REG_LAST) { 1868275970Scy error = vm_get_register(vm, cpuid, vie->index_register, &idx); 1869275970Scy if (error) { 1870285612Sdelphij printf("verify_gla: error %d getting index reg %d\n", 1871275970Scy error, vie->index_register); 1872275970Scy return (-1); 1873285612Sdelphij } 1874285612Sdelphij } 1875285612Sdelphij 1876285612Sdelphij /* XXX assuming that the base address of the segment is 0 */ 1877285612Sdelphij gla2 = base + vie->scale * idx + vie->displacement; 1878275970Scy gla2 &= size2mask[vie->addrsize]; 1879275970Scy if (gla != gla2) { 1880275970Scy printf("verify_gla mismatch: " 1881275970Scy "base(0x%0lx), scale(%d), index(0x%0lx), " 1882275970Scy "disp(0x%0lx), gla(0x%0lx), gla2(0x%0lx)\n", 1883285612Sdelphij base, vie->scale, idx, vie->displacement, gla, gla2); 1884275970Scy return (-1); 1885275970Scy } 1886285612Sdelphij 1887285612Sdelphij return (0); 1888285612Sdelphij} 1889285612Sdelphij 1890285612Sdelphijint 1891285612Sdelphijvmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, 1892275970Scy enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie) 1893275970Scy{ 1894275970Scy 1895275970Scy if (decode_prefixes(vie, cpu_mode, cs_d)) 1896275970Scy return (-1); 1897275970Scy 1898275970Scy if (decode_opcode(vie)) 1899285612Sdelphij return (-1); 1900275970Scy 1901275970Scy if (decode_modrm(vie, cpu_mode)) 1902285612Sdelphij return (-1); 1903285612Sdelphij 1904285612Sdelphij if (decode_sib(vie)) 1905275970Scy return (-1); 1906285612Sdelphij 1907285612Sdelphij if (decode_displacement(vie)) 1908285612Sdelphij return (-1); 1909285612Sdelphij 1910285612Sdelphij if (decode_immediate(vie)) 1911285612Sdelphij return (-1); 1912285612Sdelphij 1913275970Scy if (decode_moffset(vie)) 1914275970Scy return (-1); 1915275970Scy 1916285612Sdelphij if (verify_inst_length(vie)) 1917275970Scy return (-1); 1918275970Scy 1919275970Scy if (verify_gla(vm, cpuid, gla, vie)) 1920285612Sdelphij return (-1); 1921285612Sdelphij 1922285612Sdelphij vie->decoded = 1; /* success */ 1923285612Sdelphij 1924285612Sdelphij return (0); 1925285612Sdelphij} 1926285612Sdelphij#endif /* _KERNEL */ 1927285612Sdelphij