1240941Sneel/*- 2240941Sneel * Copyright (c) 2012 Sandvine, Inc. 3240941Sneel * Copyright (c) 2012 NetApp, Inc. 4240941Sneel * All rights reserved. 5240941Sneel * 6240941Sneel * Redistribution and use in source and binary forms, with or without 7240941Sneel * modification, are permitted provided that the following conditions 8240941Sneel * are met: 9240941Sneel * 1. Redistributions of source code must retain the above copyright 10240941Sneel * notice, this list of conditions and the following disclaimer. 11240941Sneel * 2. Redistributions in binary form must reproduce the above copyright 12240941Sneel * notice, this list of conditions and the following disclaimer in the 13240941Sneel * documentation and/or other materials provided with the distribution. 14240941Sneel * 15250175Semaste * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16240941Sneel * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17240941Sneel * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18250175Semaste * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19240941Sneel * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20240941Sneel * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21240941Sneel * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22240941Sneel * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23240941Sneel * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24240941Sneel * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25240941Sneel * SUCH DAMAGE. 26240941Sneel * 27240941Sneel * $FreeBSD$ 28240941Sneel */ 29240941Sneel 30240941Sneel#include <sys/cdefs.h> 31240941Sneel__FBSDID("$FreeBSD$"); 32240941Sneel 33243640Sneel#ifdef _KERNEL 34240941Sneel#include <sys/param.h> 35240941Sneel#include <sys/pcpu.h> 36240941Sneel#include <sys/systm.h> 37268976Sjhb#include <sys/proc.h> 38240941Sneel 39240941Sneel#include <vm/vm.h> 40240941Sneel#include <vm/pmap.h> 41240941Sneel 42240941Sneel#include <machine/vmparam.h> 43240941Sneel#include <machine/vmm.h> 44243640Sneel#else /* !_KERNEL */ 45243640Sneel#include <sys/types.h> 46243640Sneel#include <sys/errno.h> 47270159Sgrehan#include <sys/_iovec.h> 48240941Sneel 49243640Sneel#include <machine/vmm.h> 50240941Sneel 51268976Sjhb#include <assert.h> 52243640Sneel#include <vmmapi.h> 53268976Sjhb#define KASSERT(exp,msg) assert((exp)) 54243640Sneel#endif /* _KERNEL */ 55240941Sneel 56268976Sjhb#include <machine/vmm_instruction_emul.h> 57268976Sjhb#include <x86/psl.h> 58268976Sjhb#include <x86/specialreg.h> 59268976Sjhb 60243640Sneel/* struct vie_op.op_type */ 61243640Sneelenum { 62243640Sneel VIE_OP_TYPE_NONE = 0, 63243640Sneel VIE_OP_TYPE_MOV, 64267396Sjhb VIE_OP_TYPE_MOVSX, 65267396Sjhb VIE_OP_TYPE_MOVZX, 66243640Sneel VIE_OP_TYPE_AND, 67253585Sneel VIE_OP_TYPE_OR, 68271659Sgrehan VIE_OP_TYPE_SUB, 69267396Sjhb VIE_OP_TYPE_TWO_BYTE, 70270159Sgrehan VIE_OP_TYPE_PUSH, 71270159Sgrehan VIE_OP_TYPE_CMP, 72243640Sneel VIE_OP_TYPE_LAST 73243640Sneel}; 74243640Sneel 75243640Sneel/* struct vie_op.op_flags */ 76270159Sgrehan#define VIE_OP_F_IMM (1 << 0) /* 16/32-bit immediate operand */ 77270159Sgrehan#define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ 78270159Sgrehan#define VIE_OP_F_MOFFSET (1 << 2) /* 16/32/64-bit immediate moffset */ 79270159Sgrehan#define VIE_OP_F_NO_MODRM (1 << 3) 80243640Sneel 81267396Sjhbstatic const struct vie_op two_byte_opcodes[256] = { 82267396Sjhb [0xB6] = { 83267396Sjhb .op_byte = 0xB6, 84267396Sjhb .op_type = VIE_OP_TYPE_MOVZX, 85267396Sjhb }, 86270159Sgrehan [0xB7] = { 87270159Sgrehan .op_byte = 0xB7, 88270159Sgrehan .op_type = VIE_OP_TYPE_MOVZX, 89270159Sgrehan }, 90267396Sjhb [0xBE] = { 91267396Sjhb .op_byte = 0xBE, 92267396Sjhb .op_type = VIE_OP_TYPE_MOVSX, 93267396Sjhb }, 94267396Sjhb}; 95267396Sjhb 96243640Sneelstatic const struct vie_op one_byte_opcodes[256] = { 97267396Sjhb [0x0F] = { 98267396Sjhb .op_byte = 0x0F, 99267396Sjhb .op_type = VIE_OP_TYPE_TWO_BYTE 100267396Sjhb }, 101271659Sgrehan [0x2B] = { 102271659Sgrehan .op_byte = 0x2B, 103271659Sgrehan .op_type = VIE_OP_TYPE_SUB, 104271659Sgrehan }, 105270159Sgrehan [0x3B] = { 106270159Sgrehan .op_byte = 0x3B, 107270159Sgrehan .op_type = VIE_OP_TYPE_CMP, 108270159Sgrehan }, 109246108Sneel [0x88] = { 110246108Sneel .op_byte = 0x88, 111246108Sneel .op_type = VIE_OP_TYPE_MOV, 112246108Sneel }, 113243640Sneel [0x89] = { 114243640Sneel .op_byte = 0x89, 115243640Sneel .op_type = VIE_OP_TYPE_MOV, 116243640Sneel }, 117254964Sneel [0x8A] = { 118254964Sneel .op_byte = 0x8A, 119254964Sneel .op_type = VIE_OP_TYPE_MOV, 120254964Sneel }, 121243640Sneel [0x8B] = { 122243640Sneel .op_byte = 0x8B, 123243640Sneel .op_type = VIE_OP_TYPE_MOV, 124243640Sneel }, 125270159Sgrehan [0xA1] = { 126270159Sgrehan .op_byte = 0xA1, 127270159Sgrehan .op_type = VIE_OP_TYPE_MOV, 128270159Sgrehan .op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM, 129270159Sgrehan }, 130270159Sgrehan [0xA3] = { 131270159Sgrehan .op_byte = 0xA3, 132270159Sgrehan .op_type = VIE_OP_TYPE_MOV, 133270159Sgrehan .op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM, 134270159Sgrehan }, 135270159Sgrehan [0xC6] = { 136270159Sgrehan /* XXX Group 11 extended opcode - not just MOV */ 137270159Sgrehan .op_byte = 0xC6, 138270159Sgrehan .op_type = VIE_OP_TYPE_MOV, 139270159Sgrehan .op_flags = VIE_OP_F_IMM8, 140270159Sgrehan }, 141243640Sneel [0xC7] = { 142243640Sneel .op_byte = 0xC7, 143243640Sneel .op_type = VIE_OP_TYPE_MOV, 144243640Sneel .op_flags = VIE_OP_F_IMM, 145243640Sneel }, 146243640Sneel [0x23] = { 147243640Sneel .op_byte = 0x23, 148243640Sneel .op_type = VIE_OP_TYPE_AND, 149243667Sgrehan }, 150243667Sgrehan [0x81] = { 151243703Sgrehan /* XXX Group 1 extended opcode - not just AND */ 152243667Sgrehan .op_byte = 0x81, 153243667Sgrehan .op_type = VIE_OP_TYPE_AND, 154243667Sgrehan .op_flags = VIE_OP_F_IMM, 155253585Sneel }, 156253585Sneel [0x83] = { 157253585Sneel /* XXX Group 1 extended opcode - not just OR */ 158253585Sneel .op_byte = 0x83, 159253585Sneel .op_type = VIE_OP_TYPE_OR, 160253585Sneel .op_flags = VIE_OP_F_IMM8, 161253585Sneel }, 162270159Sgrehan [0xFF] = { 163270159Sgrehan /* XXX Group 5 extended opcode - not just PUSH */ 164270159Sgrehan .op_byte = 0xFF, 165270159Sgrehan .op_type = VIE_OP_TYPE_PUSH, 166270159Sgrehan } 167243640Sneel}; 168243640Sneel 169243640Sneel/* struct vie.mod */ 170243640Sneel#define VIE_MOD_INDIRECT 0 171243640Sneel#define VIE_MOD_INDIRECT_DISP8 1 172243640Sneel#define VIE_MOD_INDIRECT_DISP32 2 173243640Sneel#define VIE_MOD_DIRECT 3 174243640Sneel 175243640Sneel/* struct vie.rm */ 176243640Sneel#define VIE_RM_SIB 4 177243640Sneel#define VIE_RM_DISP32 5 178243640Sneel 179243640Sneel#define GB (1024 * 1024 * 1024) 180243640Sneel 181240941Sneelstatic enum vm_reg_name gpr_map[16] = { 182240941Sneel VM_REG_GUEST_RAX, 183240941Sneel VM_REG_GUEST_RCX, 184240941Sneel VM_REG_GUEST_RDX, 185240941Sneel VM_REG_GUEST_RBX, 186240941Sneel VM_REG_GUEST_RSP, 187240941Sneel VM_REG_GUEST_RBP, 188240941Sneel VM_REG_GUEST_RSI, 189240941Sneel VM_REG_GUEST_RDI, 190240941Sneel VM_REG_GUEST_R8, 191240941Sneel VM_REG_GUEST_R9, 192240941Sneel VM_REG_GUEST_R10, 193240941Sneel VM_REG_GUEST_R11, 194240941Sneel VM_REG_GUEST_R12, 195240941Sneel VM_REG_GUEST_R13, 196240941Sneel VM_REG_GUEST_R14, 197240941Sneel VM_REG_GUEST_R15 198240941Sneel}; 199240941Sneel 200243640Sneelstatic uint64_t size2mask[] = { 201243640Sneel [1] = 0xff, 202243640Sneel [2] = 0xffff, 203243640Sneel [4] = 0xffffffff, 204243640Sneel [8] = 0xffffffffffffffff, 205243640Sneel}; 206243640Sneel 207243640Sneelstatic int 208243640Sneelvie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval) 209243640Sneel{ 210243640Sneel int error; 211243640Sneel 212243640Sneel error = vm_get_register(vm, vcpuid, reg, rval); 213243640Sneel 214243640Sneel return (error); 215243640Sneel} 216243640Sneel 217270159Sgrehanstatic void 218270159Sgrehanvie_calc_bytereg(struct vie *vie, enum vm_reg_name *reg, int *lhbr) 219246108Sneel{ 220270159Sgrehan *lhbr = 0; 221270159Sgrehan *reg = gpr_map[vie->reg]; 222246108Sneel 223246108Sneel /* 224270159Sgrehan * 64-bit mode imposes limitations on accessing legacy high byte 225270159Sgrehan * registers (lhbr). 226246108Sneel * 227246108Sneel * The legacy high-byte registers cannot be addressed if the REX 228246108Sneel * prefix is present. In this case the values 4, 5, 6 and 7 of the 229246108Sneel * 'ModRM:reg' field address %spl, %bpl, %sil and %dil respectively. 230246108Sneel * 231246108Sneel * If the REX prefix is not present then the values 4, 5, 6 and 7 232246108Sneel * of the 'ModRM:reg' field address the legacy high-byte registers, 233246108Sneel * %ah, %ch, %dh and %bh respectively. 234246108Sneel */ 235246108Sneel if (!vie->rex_present) { 236246108Sneel if (vie->reg & 0x4) { 237270159Sgrehan *lhbr = 1; 238270159Sgrehan *reg = gpr_map[vie->reg & 0x3]; 239246108Sneel } 240246108Sneel } 241270159Sgrehan} 242246108Sneel 243270159Sgrehanstatic int 244270159Sgrehanvie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval) 245270159Sgrehan{ 246270159Sgrehan uint64_t val; 247270159Sgrehan int error, lhbr; 248270159Sgrehan enum vm_reg_name reg; 249270159Sgrehan 250270159Sgrehan vie_calc_bytereg(vie, ®, &lhbr); 251246108Sneel error = vm_get_register(vm, vcpuid, reg, &val); 252270159Sgrehan 253270159Sgrehan /* 254270159Sgrehan * To obtain the value of a legacy high byte register shift the 255270159Sgrehan * base register right by 8 bits (%ah = %rax >> 8). 256270159Sgrehan */ 257270159Sgrehan if (lhbr) 258270159Sgrehan *rval = val >> 8; 259270159Sgrehan else 260270159Sgrehan *rval = val; 261246108Sneel return (error); 262246108Sneel} 263246108Sneel 264270159Sgrehanstatic int 265270159Sgrehanvie_write_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t byte) 266270159Sgrehan{ 267270159Sgrehan uint64_t origval, val, mask; 268270159Sgrehan int error, lhbr; 269270159Sgrehan enum vm_reg_name reg; 270270159Sgrehan 271270159Sgrehan vie_calc_bytereg(vie, ®, &lhbr); 272270159Sgrehan error = vm_get_register(vm, vcpuid, reg, &origval); 273270159Sgrehan if (error == 0) { 274270159Sgrehan val = byte; 275270159Sgrehan mask = 0xff; 276270159Sgrehan if (lhbr) { 277270159Sgrehan /* 278270159Sgrehan * Shift left by 8 to store 'byte' in a legacy high 279270159Sgrehan * byte register. 280270159Sgrehan */ 281270159Sgrehan val <<= 8; 282270159Sgrehan mask <<= 8; 283270159Sgrehan } 284270159Sgrehan val |= origval & ~mask; 285270159Sgrehan error = vm_set_register(vm, vcpuid, reg, val); 286270159Sgrehan } 287270159Sgrehan return (error); 288270159Sgrehan} 289270159Sgrehan 290268976Sjhbint 291243640Sneelvie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, 292243640Sneel uint64_t val, int size) 293243640Sneel{ 294243640Sneel int error; 295243640Sneel uint64_t origval; 296243640Sneel 297243640Sneel switch (size) { 298243640Sneel case 1: 299243640Sneel case 2: 300243640Sneel error = vie_read_register(vm, vcpuid, reg, &origval); 301243640Sneel if (error) 302243640Sneel return (error); 303243640Sneel val &= size2mask[size]; 304243640Sneel val |= origval & ~size2mask[size]; 305243640Sneel break; 306243640Sneel case 4: 307243640Sneel val &= 0xffffffffUL; 308243640Sneel break; 309243640Sneel case 8: 310243640Sneel break; 311243640Sneel default: 312243640Sneel return (EINVAL); 313243640Sneel } 314243640Sneel 315243640Sneel error = vm_set_register(vm, vcpuid, reg, val); 316243640Sneel return (error); 317243640Sneel} 318243640Sneel 319243640Sneel/* 320270159Sgrehan * Return the status flags that would result from doing (x - y). 321243640Sneel */ 322270159Sgrehanstatic u_long 323270159Sgrehangetcc16(uint16_t x, uint16_t y) 324270159Sgrehan{ 325270159Sgrehan u_long rflags; 326270159Sgrehan 327270159Sgrehan __asm __volatile("sub %1,%2; pushfq; popq %0" : 328270159Sgrehan "=r" (rflags) : "m" (y), "r" (x)); 329270159Sgrehan return (rflags); 330270159Sgrehan} 331270159Sgrehan 332270159Sgrehanstatic u_long 333270159Sgrehangetcc32(uint32_t x, uint32_t y) 334270159Sgrehan{ 335270159Sgrehan u_long rflags; 336270159Sgrehan 337270159Sgrehan __asm __volatile("sub %1,%2; pushfq; popq %0" : 338270159Sgrehan "=r" (rflags) : "m" (y), "r" (x)); 339270159Sgrehan return (rflags); 340270159Sgrehan} 341270159Sgrehan 342270159Sgrehanstatic u_long 343270159Sgrehangetcc64(uint64_t x, uint64_t y) 344270159Sgrehan{ 345270159Sgrehan u_long rflags; 346270159Sgrehan 347270159Sgrehan __asm __volatile("sub %1,%2; pushfq; popq %0" : 348270159Sgrehan "=r" (rflags) : "m" (y), "r" (x)); 349270159Sgrehan return (rflags); 350270159Sgrehan} 351270159Sgrehan 352270159Sgrehanstatic u_long 353270159Sgrehangetcc(int opsize, uint64_t x, uint64_t y) 354270159Sgrehan{ 355270159Sgrehan KASSERT(opsize == 2 || opsize == 4 || opsize == 8, 356270159Sgrehan ("getcc: invalid operand size %d", opsize)); 357270159Sgrehan 358270159Sgrehan if (opsize == 2) 359270159Sgrehan return (getcc16(x, y)); 360270159Sgrehan else if (opsize == 4) 361270159Sgrehan return (getcc32(x, y)); 362270159Sgrehan else 363270159Sgrehan return (getcc64(x, y)); 364270159Sgrehan} 365270159Sgrehan 366243640Sneelstatic int 367243640Sneelemulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 368243640Sneel mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 369243640Sneel{ 370243640Sneel int error, size; 371243640Sneel enum vm_reg_name reg; 372246108Sneel uint8_t byte; 373243640Sneel uint64_t val; 374243640Sneel 375270159Sgrehan size = vie->opsize; 376243640Sneel error = EINVAL; 377243640Sneel 378243640Sneel switch (vie->op.op_byte) { 379246108Sneel case 0x88: 380246108Sneel /* 381246108Sneel * MOV byte from reg (ModRM:reg) to mem (ModRM:r/m) 382246108Sneel * 88/r: mov r/m8, r8 383246108Sneel * REX + 88/r: mov r/m8, r8 (%ah, %ch, %dh, %bh not available) 384246108Sneel */ 385270159Sgrehan size = 1; /* override for byte operation */ 386246108Sneel error = vie_read_bytereg(vm, vcpuid, vie, &byte); 387246108Sneel if (error == 0) 388246108Sneel error = memwrite(vm, vcpuid, gpa, byte, size, arg); 389246108Sneel break; 390243640Sneel case 0x89: 391243640Sneel /* 392243640Sneel * MOV from reg (ModRM:reg) to mem (ModRM:r/m) 393270159Sgrehan * 89/r: mov r/m16, r16 394243640Sneel * 89/r: mov r/m32, r32 395243640Sneel * REX.W + 89/r mov r/m64, r64 396243640Sneel */ 397243640Sneel reg = gpr_map[vie->reg]; 398243640Sneel error = vie_read_register(vm, vcpuid, reg, &val); 399243640Sneel if (error == 0) { 400243640Sneel val &= size2mask[size]; 401243640Sneel error = memwrite(vm, vcpuid, gpa, val, size, arg); 402243640Sneel } 403243640Sneel break; 404254964Sneel case 0x8A: 405270159Sgrehan /* 406270159Sgrehan * MOV byte from mem (ModRM:r/m) to reg (ModRM:reg) 407270159Sgrehan * 8A/r: mov r8, r/m8 408270159Sgrehan * REX + 8A/r: mov r8, r/m8 409270159Sgrehan */ 410270159Sgrehan size = 1; /* override for byte operation */ 411270159Sgrehan error = memread(vm, vcpuid, gpa, &val, size, arg); 412270159Sgrehan if (error == 0) 413270159Sgrehan error = vie_write_bytereg(vm, vcpuid, vie, val); 414270159Sgrehan break; 415243640Sneel case 0x8B: 416243640Sneel /* 417243640Sneel * MOV from mem (ModRM:r/m) to reg (ModRM:reg) 418270159Sgrehan * 8B/r: mov r16, r/m16 419243640Sneel * 8B/r: mov r32, r/m32 420243640Sneel * REX.W 8B/r: mov r64, r/m64 421243640Sneel */ 422243640Sneel error = memread(vm, vcpuid, gpa, &val, size, arg); 423243640Sneel if (error == 0) { 424243640Sneel reg = gpr_map[vie->reg]; 425243640Sneel error = vie_update_register(vm, vcpuid, reg, val, size); 426243640Sneel } 427243640Sneel break; 428270159Sgrehan case 0xA1: 429270159Sgrehan /* 430270159Sgrehan * MOV from seg:moffset to AX/EAX/RAX 431270159Sgrehan * A1: mov AX, moffs16 432270159Sgrehan * A1: mov EAX, moffs32 433270159Sgrehan * REX.W + A1: mov RAX, moffs64 434270159Sgrehan */ 435270159Sgrehan error = memread(vm, vcpuid, gpa, &val, size, arg); 436270159Sgrehan if (error == 0) { 437270159Sgrehan reg = VM_REG_GUEST_RAX; 438270159Sgrehan error = vie_update_register(vm, vcpuid, reg, val, size); 439270159Sgrehan } 440270159Sgrehan break; 441270159Sgrehan case 0xA3: 442270159Sgrehan /* 443270159Sgrehan * MOV from AX/EAX/RAX to seg:moffset 444270159Sgrehan * A3: mov moffs16, AX 445270159Sgrehan * A3: mov moffs32, EAX 446270159Sgrehan * REX.W + A3: mov moffs64, RAX 447270159Sgrehan */ 448270159Sgrehan error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val); 449270159Sgrehan if (error == 0) { 450270159Sgrehan val &= size2mask[size]; 451270159Sgrehan error = memwrite(vm, vcpuid, gpa, val, size, arg); 452270159Sgrehan } 453270159Sgrehan break; 454270159Sgrehan case 0xC6: 455270159Sgrehan /* 456270159Sgrehan * MOV from imm8 to mem (ModRM:r/m) 457270159Sgrehan * C6/0 mov r/m8, imm8 458270159Sgrehan * REX + C6/0 mov r/m8, imm8 459270159Sgrehan */ 460270159Sgrehan size = 1; /* override for byte operation */ 461270159Sgrehan error = memwrite(vm, vcpuid, gpa, vie->immediate, size, arg); 462270159Sgrehan break; 463243640Sneel case 0xC7: 464243640Sneel /* 465270159Sgrehan * MOV from imm16/imm32 to mem (ModRM:r/m) 466270159Sgrehan * C7/0 mov r/m16, imm16 467243640Sneel * C7/0 mov r/m32, imm32 468243640Sneel * REX.W + C7/0 mov r/m64, imm32 (sign-extended to 64-bits) 469243640Sneel */ 470270159Sgrehan val = vie->immediate & size2mask[size]; 471243640Sneel error = memwrite(vm, vcpuid, gpa, val, size, arg); 472243640Sneel break; 473243640Sneel default: 474243640Sneel break; 475243640Sneel } 476243640Sneel 477243640Sneel return (error); 478243640Sneel} 479243640Sneel 480243640Sneelstatic int 481267396Sjhbemulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 482267396Sjhb mem_region_read_t memread, mem_region_write_t memwrite, 483267396Sjhb void *arg) 484267396Sjhb{ 485267396Sjhb int error, size; 486267396Sjhb enum vm_reg_name reg; 487267396Sjhb uint64_t val; 488267396Sjhb 489270159Sgrehan size = vie->opsize; 490267396Sjhb error = EINVAL; 491267396Sjhb 492267396Sjhb switch (vie->op.op_byte) { 493267396Sjhb case 0xB6: 494267396Sjhb /* 495267396Sjhb * MOV and zero extend byte from mem (ModRM:r/m) to 496267396Sjhb * reg (ModRM:reg). 497267396Sjhb * 498270159Sgrehan * 0F B6/r movzx r16, r/m8 499270159Sgrehan * 0F B6/r movzx r32, r/m8 500270159Sgrehan * REX.W + 0F B6/r movzx r64, r/m8 501267396Sjhb */ 502267396Sjhb 503267396Sjhb /* get the first operand */ 504267396Sjhb error = memread(vm, vcpuid, gpa, &val, 1, arg); 505267396Sjhb if (error) 506267396Sjhb break; 507267396Sjhb 508267396Sjhb /* get the second operand */ 509267396Sjhb reg = gpr_map[vie->reg]; 510267396Sjhb 511270159Sgrehan /* zero-extend byte */ 512270159Sgrehan val = (uint8_t)val; 513267396Sjhb 514267396Sjhb /* write the result */ 515267396Sjhb error = vie_update_register(vm, vcpuid, reg, val, size); 516267396Sjhb break; 517270159Sgrehan case 0xB7: 518270159Sgrehan /* 519270159Sgrehan * MOV and zero extend word from mem (ModRM:r/m) to 520270159Sgrehan * reg (ModRM:reg). 521270159Sgrehan * 522270159Sgrehan * 0F B7/r movzx r32, r/m16 523270159Sgrehan * REX.W + 0F B7/r movzx r64, r/m16 524270159Sgrehan */ 525270159Sgrehan error = memread(vm, vcpuid, gpa, &val, 2, arg); 526270159Sgrehan if (error) 527270159Sgrehan return (error); 528270159Sgrehan 529270159Sgrehan reg = gpr_map[vie->reg]; 530270159Sgrehan 531270159Sgrehan /* zero-extend word */ 532270159Sgrehan val = (uint16_t)val; 533270159Sgrehan 534270159Sgrehan error = vie_update_register(vm, vcpuid, reg, val, size); 535270159Sgrehan break; 536267396Sjhb case 0xBE: 537267396Sjhb /* 538267396Sjhb * MOV and sign extend byte from mem (ModRM:r/m) to 539267396Sjhb * reg (ModRM:reg). 540267396Sjhb * 541270159Sgrehan * 0F BE/r movsx r16, r/m8 542270159Sgrehan * 0F BE/r movsx r32, r/m8 543270159Sgrehan * REX.W + 0F BE/r movsx r64, r/m8 544267396Sjhb */ 545267396Sjhb 546267396Sjhb /* get the first operand */ 547267396Sjhb error = memread(vm, vcpuid, gpa, &val, 1, arg); 548267396Sjhb if (error) 549267396Sjhb break; 550267396Sjhb 551267396Sjhb /* get the second operand */ 552267396Sjhb reg = gpr_map[vie->reg]; 553267396Sjhb 554267396Sjhb /* sign extend byte */ 555267396Sjhb val = (int8_t)val; 556267396Sjhb 557267396Sjhb /* write the result */ 558267396Sjhb error = vie_update_register(vm, vcpuid, reg, val, size); 559267396Sjhb break; 560267396Sjhb default: 561267396Sjhb break; 562267396Sjhb } 563267396Sjhb return (error); 564267396Sjhb} 565267396Sjhb 566267396Sjhbstatic int 567243640Sneelemulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 568243640Sneel mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 569243640Sneel{ 570243640Sneel int error, size; 571243640Sneel enum vm_reg_name reg; 572243640Sneel uint64_t val1, val2; 573243640Sneel 574270159Sgrehan size = vie->opsize; 575243640Sneel error = EINVAL; 576243640Sneel 577243640Sneel switch (vie->op.op_byte) { 578243640Sneel case 0x23: 579243640Sneel /* 580243640Sneel * AND reg (ModRM:reg) and mem (ModRM:r/m) and store the 581243640Sneel * result in reg. 582243640Sneel * 583270159Sgrehan * 23/r and r16, r/m16 584243640Sneel * 23/r and r32, r/m32 585243640Sneel * REX.W + 23/r and r64, r/m64 586243640Sneel */ 587243640Sneel 588243640Sneel /* get the first operand */ 589243640Sneel reg = gpr_map[vie->reg]; 590243640Sneel error = vie_read_register(vm, vcpuid, reg, &val1); 591243640Sneel if (error) 592243640Sneel break; 593243640Sneel 594243640Sneel /* get the second operand */ 595243640Sneel error = memread(vm, vcpuid, gpa, &val2, size, arg); 596243640Sneel if (error) 597243640Sneel break; 598243640Sneel 599243640Sneel /* perform the operation and write the result */ 600243640Sneel val1 &= val2; 601243640Sneel error = vie_update_register(vm, vcpuid, reg, val1, size); 602243640Sneel break; 603243667Sgrehan case 0x81: 604243667Sgrehan /* 605271659Sgrehan * AND/OR mem (ModRM:r/m) with immediate and store the 606253585Sneel * result in mem. 607243667Sgrehan * 608271659Sgrehan * AND: i = 4 609271659Sgrehan * OR: i = 1 610271659Sgrehan * 81 /i op r/m16, imm16 611271659Sgrehan * 81 /i op r/m32, imm32 612271659Sgrehan * REX.W + 81 /i op r/m64, imm32 sign-extended to 64 613243703Sgrehan * 614243667Sgrehan */ 615243703Sgrehan 616243667Sgrehan /* get the first operand */ 617243667Sgrehan error = memread(vm, vcpuid, gpa, &val1, size, arg); 618243667Sgrehan if (error) 619243667Sgrehan break; 620243667Sgrehan 621243667Sgrehan /* 622271659Sgrehan * perform the operation with the pre-fetched immediate 623271659Sgrehan * operand and write the result 624271659Sgrehan */ 625271659Sgrehan switch (vie->reg & 7) { 626271659Sgrehan case 0x4: 627271659Sgrehan /* modrm:reg == b100, AND */ 628271659Sgrehan val1 &= vie->immediate; 629271659Sgrehan break; 630271659Sgrehan case 0x1: 631271659Sgrehan /* modrm:reg == b001, OR */ 632271659Sgrehan val1 |= vie->immediate; 633271659Sgrehan break; 634271659Sgrehan default: 635271659Sgrehan error = EINVAL; 636271659Sgrehan break; 637271659Sgrehan } 638271659Sgrehan if (error) 639271659Sgrehan break; 640271659Sgrehan 641271659Sgrehan error = memwrite(vm, vcpuid, gpa, val1, size, arg); 642243667Sgrehan break; 643243640Sneel default: 644243640Sneel break; 645243640Sneel } 646243640Sneel return (error); 647243640Sneel} 648243640Sneel 649253585Sneelstatic int 650253585Sneelemulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 651253585Sneel mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 652253585Sneel{ 653253585Sneel int error, size; 654253585Sneel uint64_t val1; 655253585Sneel 656270159Sgrehan size = vie->opsize; 657253585Sneel error = EINVAL; 658253585Sneel 659253585Sneel switch (vie->op.op_byte) { 660253585Sneel case 0x83: 661253585Sneel /* 662253585Sneel * OR mem (ModRM:r/m) with immediate and store the 663253585Sneel * result in mem. 664253585Sneel * 665270159Sgrehan * 83 /1 OR r/m16, imm8 sign-extended to 16 666270159Sgrehan * 83 /1 OR r/m32, imm8 sign-extended to 32 667270159Sgrehan * REX.W + 83/1 OR r/m64, imm8 sign-extended to 64 668253585Sneel * 669253585Sneel * Currently, only the OR operation of the 0x83 opcode 670253585Sneel * is implemented (ModRM:reg = b001). 671253585Sneel */ 672253585Sneel if ((vie->reg & 7) != 1) 673253585Sneel break; 674253585Sneel 675253585Sneel /* get the first operand */ 676253585Sneel error = memread(vm, vcpuid, gpa, &val1, size, arg); 677253585Sneel if (error) 678253585Sneel break; 679253585Sneel 680253585Sneel /* 681253585Sneel * perform the operation with the pre-fetched immediate 682253585Sneel * operand and write the result 683253585Sneel */ 684253585Sneel val1 |= vie->immediate; 685253585Sneel error = memwrite(vm, vcpuid, gpa, val1, size, arg); 686253585Sneel break; 687253585Sneel default: 688253585Sneel break; 689253585Sneel } 690253585Sneel return (error); 691253585Sneel} 692253585Sneel 693270159Sgrehan#define RFLAGS_STATUS_BITS (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V) 694270159Sgrehan 695270159Sgrehanstatic int 696270159Sgrehanemulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 697270159Sgrehan mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 698270159Sgrehan{ 699270159Sgrehan int error, size; 700270159Sgrehan uint64_t op1, op2, rflags, rflags2; 701270159Sgrehan enum vm_reg_name reg; 702270159Sgrehan 703270159Sgrehan size = vie->opsize; 704270159Sgrehan switch (vie->op.op_byte) { 705270159Sgrehan case 0x3B: 706270159Sgrehan /* 707270159Sgrehan * 3B/r CMP r16, r/m16 708270159Sgrehan * 3B/r CMP r32, r/m32 709270159Sgrehan * REX.W + 3B/r CMP r64, r/m64 710270159Sgrehan * 711270159Sgrehan * Compare first operand (reg) with second operand (r/m) and 712270159Sgrehan * set status flags in EFLAGS register. The comparison is 713270159Sgrehan * performed by subtracting the second operand from the first 714270159Sgrehan * operand and then setting the status flags. 715270159Sgrehan */ 716270159Sgrehan 717270159Sgrehan /* Get the first operand */ 718270159Sgrehan reg = gpr_map[vie->reg]; 719270159Sgrehan error = vie_read_register(vm, vcpuid, reg, &op1); 720270159Sgrehan if (error) 721270159Sgrehan return (error); 722270159Sgrehan 723270159Sgrehan /* Get the second operand */ 724270159Sgrehan error = memread(vm, vcpuid, gpa, &op2, size, arg); 725270159Sgrehan if (error) 726270159Sgrehan return (error); 727270159Sgrehan 728270159Sgrehan break; 729270159Sgrehan default: 730270159Sgrehan return (EINVAL); 731270159Sgrehan } 732270159Sgrehan rflags2 = getcc(size, op1, op2); 733270159Sgrehan error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); 734270159Sgrehan if (error) 735270159Sgrehan return (error); 736270159Sgrehan rflags &= ~RFLAGS_STATUS_BITS; 737270159Sgrehan rflags |= rflags2 & RFLAGS_STATUS_BITS; 738270159Sgrehan 739270159Sgrehan error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); 740270159Sgrehan return (error); 741270159Sgrehan} 742270159Sgrehan 743270159Sgrehanstatic int 744271659Sgrehanemulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 745271659Sgrehan mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 746271659Sgrehan{ 747271659Sgrehan int error, size; 748271659Sgrehan uint64_t nval, rflags, rflags2, val1, val2; 749271659Sgrehan enum vm_reg_name reg; 750271659Sgrehan 751271659Sgrehan size = vie->opsize; 752271659Sgrehan error = EINVAL; 753271659Sgrehan 754271659Sgrehan switch (vie->op.op_byte) { 755271659Sgrehan case 0x2B: 756271659Sgrehan /* 757271659Sgrehan * SUB r/m from r and store the result in r 758271659Sgrehan * 759271659Sgrehan * 2B/r SUB r16, r/m16 760271659Sgrehan * 2B/r SUB r32, r/m32 761271659Sgrehan * REX.W + 2B/r SUB r64, r/m64 762271659Sgrehan */ 763271659Sgrehan 764271659Sgrehan /* get the first operand */ 765271659Sgrehan reg = gpr_map[vie->reg]; 766271659Sgrehan error = vie_read_register(vm, vcpuid, reg, &val1); 767271659Sgrehan if (error) 768271659Sgrehan break; 769271659Sgrehan 770271659Sgrehan /* get the second operand */ 771271659Sgrehan error = memread(vm, vcpuid, gpa, &val2, size, arg); 772271659Sgrehan if (error) 773271659Sgrehan break; 774271659Sgrehan 775271659Sgrehan /* perform the operation and write the result */ 776271659Sgrehan nval = val1 - val2; 777271659Sgrehan error = vie_update_register(vm, vcpuid, reg, nval, size); 778271659Sgrehan break; 779271659Sgrehan default: 780271659Sgrehan break; 781271659Sgrehan } 782271659Sgrehan 783271659Sgrehan if (!error) { 784271659Sgrehan rflags2 = getcc(size, val1, val2); 785271659Sgrehan error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, 786271659Sgrehan &rflags); 787271659Sgrehan if (error) 788271659Sgrehan return (error); 789271659Sgrehan 790271659Sgrehan rflags &= ~RFLAGS_STATUS_BITS; 791271659Sgrehan rflags |= rflags2 & RFLAGS_STATUS_BITS; 792271659Sgrehan error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, 793271659Sgrehan rflags, 8); 794271659Sgrehan } 795271659Sgrehan 796271659Sgrehan return (error); 797271659Sgrehan} 798271659Sgrehan 799271659Sgrehanstatic int 800270159Sgrehanemulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, 801270159Sgrehan struct vm_guest_paging *paging, mem_region_read_t memread, 802270159Sgrehan mem_region_write_t memwrite, void *arg) 803270159Sgrehan{ 804270159Sgrehan#ifdef _KERNEL 805270159Sgrehan struct vm_copyinfo copyinfo[2]; 806270159Sgrehan#else 807270159Sgrehan struct iovec copyinfo[2]; 808270159Sgrehan#endif 809270159Sgrehan struct seg_desc ss_desc; 810270159Sgrehan uint64_t cr0, rflags, rsp, stack_gla, val; 811270159Sgrehan int error, size, stackaddrsize; 812270159Sgrehan 813270159Sgrehan /* 814270159Sgrehan * Table A-6, "Opcode Extensions", Intel SDM, Vol 2. 815270159Sgrehan * 816270159Sgrehan * PUSH is part of the group 5 extended opcodes and is identified 817270159Sgrehan * by ModRM:reg = b110. 818270159Sgrehan */ 819270159Sgrehan if ((vie->reg & 7) != 6) 820270159Sgrehan return (EINVAL); 821270159Sgrehan 822270159Sgrehan size = vie->opsize; 823270159Sgrehan /* 824270159Sgrehan * From "Address-Size Attributes for Stack Accesses", Intel SDL, Vol 1 825270159Sgrehan */ 826270159Sgrehan if (paging->cpu_mode == CPU_MODE_REAL) { 827270159Sgrehan stackaddrsize = 2; 828270159Sgrehan } else if (paging->cpu_mode == CPU_MODE_64BIT) { 829270159Sgrehan /* 830270159Sgrehan * "Stack Manipulation Instructions in 64-bit Mode", SDM, Vol 3 831270159Sgrehan * - Stack pointer size is always 64-bits. 832270159Sgrehan * - PUSH/POP of 32-bit values is not possible in 64-bit mode. 833270159Sgrehan * - 16-bit PUSH/POP is supported by using the operand size 834270159Sgrehan * override prefix (66H). 835270159Sgrehan */ 836270159Sgrehan stackaddrsize = 8; 837270159Sgrehan size = vie->opsize_override ? 2 : 8; 838270159Sgrehan } else { 839270159Sgrehan /* 840270159Sgrehan * In protected or compability mode the 'B' flag in the 841270159Sgrehan * stack-segment descriptor determines the size of the 842270159Sgrehan * stack pointer. 843270159Sgrehan */ 844270159Sgrehan error = vm_get_seg_desc(vm, vcpuid, VM_REG_GUEST_SS, &ss_desc); 845270159Sgrehan KASSERT(error == 0, ("%s: error %d getting SS descriptor", 846270159Sgrehan __func__, error)); 847270159Sgrehan if (SEG_DESC_DEF32(ss_desc.access)) 848270159Sgrehan stackaddrsize = 4; 849270159Sgrehan else 850270159Sgrehan stackaddrsize = 2; 851270159Sgrehan } 852270159Sgrehan 853270159Sgrehan error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0); 854270159Sgrehan KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error)); 855270159Sgrehan 856270159Sgrehan error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); 857270159Sgrehan KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); 858270159Sgrehan 859270159Sgrehan error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp); 860270159Sgrehan KASSERT(error == 0, ("%s: error %d getting rsp", __func__, error)); 861270159Sgrehan 862270159Sgrehan rsp -= size; 863270159Sgrehan if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &ss_desc, 864270159Sgrehan rsp, size, stackaddrsize, PROT_WRITE, &stack_gla)) { 865270159Sgrehan vm_inject_ss(vm, vcpuid, 0); 866270159Sgrehan return (0); 867270159Sgrehan } 868270159Sgrehan 869270159Sgrehan if (vie_canonical_check(paging->cpu_mode, stack_gla)) { 870270159Sgrehan vm_inject_ss(vm, vcpuid, 0); 871270159Sgrehan return (0); 872270159Sgrehan } 873270159Sgrehan 874270159Sgrehan if (vie_alignment_check(paging->cpl, size, cr0, rflags, stack_gla)) { 875270159Sgrehan vm_inject_ac(vm, vcpuid, 0); 876270159Sgrehan return (0); 877270159Sgrehan } 878270159Sgrehan 879270159Sgrehan error = vm_copy_setup(vm, vcpuid, paging, stack_gla, size, PROT_WRITE, 880270159Sgrehan copyinfo, nitems(copyinfo)); 881270159Sgrehan if (error == -1) { 882270159Sgrehan /* 883270159Sgrehan * XXX cannot return a negative error value here because it 884270159Sgrehan * ends up being the return value of the VM_RUN() ioctl and 885270159Sgrehan * is interpreted as a pseudo-error (for e.g. ERESTART). 886270159Sgrehan */ 887270159Sgrehan return (EFAULT); 888270159Sgrehan } else if (error == 1) { 889270159Sgrehan /* Resume guest execution to handle page fault */ 890270159Sgrehan return (0); 891270159Sgrehan } 892270159Sgrehan 893270159Sgrehan error = memread(vm, vcpuid, mmio_gpa, &val, size, arg); 894270159Sgrehan if (error == 0) { 895270159Sgrehan vm_copyout(vm, vcpuid, &val, copyinfo, size); 896270159Sgrehan error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSP, rsp, 897270159Sgrehan stackaddrsize); 898270159Sgrehan KASSERT(error == 0, ("error %d updating rsp", error)); 899270159Sgrehan } 900270159Sgrehan#ifdef _KERNEL 901270159Sgrehan vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); 902270159Sgrehan#endif 903270159Sgrehan return (error); 904270159Sgrehan} 905270159Sgrehan 906243640Sneelint 907243640Sneelvmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 908270159Sgrehan struct vm_guest_paging *paging, mem_region_read_t memread, 909270159Sgrehan mem_region_write_t memwrite, void *memarg) 910243640Sneel{ 911243640Sneel int error; 912243640Sneel 913243640Sneel if (!vie->decoded) 914243640Sneel return (EINVAL); 915243640Sneel 916243640Sneel switch (vie->op.op_type) { 917270159Sgrehan case VIE_OP_TYPE_PUSH: 918270159Sgrehan error = emulate_push(vm, vcpuid, gpa, vie, paging, memread, 919270159Sgrehan memwrite, memarg); 920270159Sgrehan break; 921270159Sgrehan case VIE_OP_TYPE_CMP: 922270159Sgrehan error = emulate_cmp(vm, vcpuid, gpa, vie, 923270159Sgrehan memread, memwrite, memarg); 924270159Sgrehan break; 925243640Sneel case VIE_OP_TYPE_MOV: 926243640Sneel error = emulate_mov(vm, vcpuid, gpa, vie, 927243640Sneel memread, memwrite, memarg); 928243640Sneel break; 929267396Sjhb case VIE_OP_TYPE_MOVSX: 930267396Sjhb case VIE_OP_TYPE_MOVZX: 931267396Sjhb error = emulate_movx(vm, vcpuid, gpa, vie, 932267396Sjhb memread, memwrite, memarg); 933267396Sjhb break; 934243640Sneel case VIE_OP_TYPE_AND: 935243640Sneel error = emulate_and(vm, vcpuid, gpa, vie, 936243640Sneel memread, memwrite, memarg); 937243640Sneel break; 938253585Sneel case VIE_OP_TYPE_OR: 939253585Sneel error = emulate_or(vm, vcpuid, gpa, vie, 940253585Sneel memread, memwrite, memarg); 941253585Sneel break; 942271659Sgrehan case VIE_OP_TYPE_SUB: 943271659Sgrehan error = emulate_sub(vm, vcpuid, gpa, vie, 944271659Sgrehan memread, memwrite, memarg); 945271659Sgrehan break; 946243640Sneel default: 947243640Sneel error = EINVAL; 948243640Sneel break; 949243640Sneel } 950243640Sneel 951243640Sneel return (error); 952243640Sneel} 953243640Sneel 954268976Sjhbint 955268976Sjhbvie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla) 956268976Sjhb{ 957268976Sjhb KASSERT(size == 1 || size == 2 || size == 4 || size == 8, 958268976Sjhb ("%s: invalid size %d", __func__, size)); 959268976Sjhb KASSERT(cpl >= 0 && cpl <= 3, ("%s: invalid cpl %d", __func__, cpl)); 960268976Sjhb 961268976Sjhb if (cpl != 3 || (cr0 & CR0_AM) == 0 || (rf & PSL_AC) == 0) 962268976Sjhb return (0); 963268976Sjhb 964268976Sjhb return ((gla & (size - 1)) ? 1 : 0); 965268976Sjhb} 966268976Sjhb 967268976Sjhbint 968268976Sjhbvie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla) 969268976Sjhb{ 970268976Sjhb uint64_t mask; 971268976Sjhb 972268976Sjhb if (cpu_mode != CPU_MODE_64BIT) 973268976Sjhb return (0); 974268976Sjhb 975268976Sjhb /* 976268976Sjhb * The value of the bit 47 in the 'gla' should be replicated in the 977268976Sjhb * most significant 16 bits. 978268976Sjhb */ 979268976Sjhb mask = ~((1UL << 48) - 1); 980268976Sjhb if (gla & (1UL << 47)) 981268976Sjhb return ((gla & mask) != mask); 982268976Sjhb else 983268976Sjhb return ((gla & mask) != 0); 984268976Sjhb} 985268976Sjhb 986268976Sjhbuint64_t 987268976Sjhbvie_size2mask(int size) 988268976Sjhb{ 989268976Sjhb KASSERT(size == 1 || size == 2 || size == 4 || size == 8, 990268976Sjhb ("vie_size2mask: invalid size %d", size)); 991268976Sjhb return (size2mask[size]); 992268976Sjhb} 993268976Sjhb 994268976Sjhbint 995268976Sjhbvie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, 996268976Sjhb struct seg_desc *desc, uint64_t offset, int length, int addrsize, 997268976Sjhb int prot, uint64_t *gla) 998268976Sjhb{ 999268976Sjhb uint64_t firstoff, low_limit, high_limit, segbase; 1000268976Sjhb int glasize, type; 1001268976Sjhb 1002268976Sjhb KASSERT(seg >= VM_REG_GUEST_ES && seg <= VM_REG_GUEST_GS, 1003268976Sjhb ("%s: invalid segment %d", __func__, seg)); 1004268976Sjhb KASSERT(length == 1 || length == 2 || length == 4 || length == 8, 1005268976Sjhb ("%s: invalid operand size %d", __func__, length)); 1006268976Sjhb KASSERT((prot & ~(PROT_READ | PROT_WRITE)) == 0, 1007268976Sjhb ("%s: invalid prot %#x", __func__, prot)); 1008268976Sjhb 1009268976Sjhb firstoff = offset; 1010268976Sjhb if (cpu_mode == CPU_MODE_64BIT) { 1011268976Sjhb KASSERT(addrsize == 4 || addrsize == 8, ("%s: invalid address " 1012268976Sjhb "size %d for cpu_mode %d", __func__, addrsize, cpu_mode)); 1013268976Sjhb glasize = 8; 1014268976Sjhb } else { 1015268976Sjhb KASSERT(addrsize == 2 || addrsize == 4, ("%s: invalid address " 1016268976Sjhb "size %d for cpu mode %d", __func__, addrsize, cpu_mode)); 1017268976Sjhb glasize = 4; 1018268976Sjhb /* 1019268976Sjhb * If the segment selector is loaded with a NULL selector 1020268976Sjhb * then the descriptor is unusable and attempting to use 1021268976Sjhb * it results in a #GP(0). 1022268976Sjhb */ 1023270159Sgrehan if (SEG_DESC_UNUSABLE(desc->access)) 1024268976Sjhb return (-1); 1025268976Sjhb 1026268976Sjhb /* 1027268976Sjhb * The processor generates a #NP exception when a segment 1028268976Sjhb * register is loaded with a selector that points to a 1029268976Sjhb * descriptor that is not present. If this was the case then 1030268976Sjhb * it would have been checked before the VM-exit. 1031268976Sjhb */ 1032270159Sgrehan KASSERT(SEG_DESC_PRESENT(desc->access), 1033270159Sgrehan ("segment %d not present: %#x", seg, desc->access)); 1034268976Sjhb 1035268976Sjhb /* 1036268976Sjhb * The descriptor type must indicate a code/data segment. 1037268976Sjhb */ 1038270159Sgrehan type = SEG_DESC_TYPE(desc->access); 1039268976Sjhb KASSERT(type >= 16 && type <= 31, ("segment %d has invalid " 1040268976Sjhb "descriptor type %#x", seg, type)); 1041268976Sjhb 1042268976Sjhb if (prot & PROT_READ) { 1043268976Sjhb /* #GP on a read access to a exec-only code segment */ 1044268976Sjhb if ((type & 0xA) == 0x8) 1045268976Sjhb return (-1); 1046268976Sjhb } 1047268976Sjhb 1048268976Sjhb if (prot & PROT_WRITE) { 1049268976Sjhb /* 1050268976Sjhb * #GP on a write access to a code segment or a 1051268976Sjhb * read-only data segment. 1052268976Sjhb */ 1053268976Sjhb if (type & 0x8) /* code segment */ 1054268976Sjhb return (-1); 1055268976Sjhb 1056268976Sjhb if ((type & 0xA) == 0) /* read-only data seg */ 1057268976Sjhb return (-1); 1058268976Sjhb } 1059268976Sjhb 1060268976Sjhb /* 1061268976Sjhb * 'desc->limit' is fully expanded taking granularity into 1062268976Sjhb * account. 1063268976Sjhb */ 1064268976Sjhb if ((type & 0xC) == 0x4) { 1065268976Sjhb /* expand-down data segment */ 1066268976Sjhb low_limit = desc->limit + 1; 1067270159Sgrehan high_limit = SEG_DESC_DEF32(desc->access) ? 1068270159Sgrehan 0xffffffff : 0xffff; 1069268976Sjhb } else { 1070268976Sjhb /* code segment or expand-up data segment */ 1071268976Sjhb low_limit = 0; 1072268976Sjhb high_limit = desc->limit; 1073268976Sjhb } 1074268976Sjhb 1075268976Sjhb while (length > 0) { 1076268976Sjhb offset &= vie_size2mask(addrsize); 1077268976Sjhb if (offset < low_limit || offset > high_limit) 1078268976Sjhb return (-1); 1079268976Sjhb offset++; 1080268976Sjhb length--; 1081268976Sjhb } 1082268976Sjhb } 1083268976Sjhb 1084268976Sjhb /* 1085268976Sjhb * In 64-bit mode all segments except %fs and %gs have a segment 1086268976Sjhb * base address of 0. 1087268976Sjhb */ 1088268976Sjhb if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS && 1089268976Sjhb seg != VM_REG_GUEST_GS) { 1090268976Sjhb segbase = 0; 1091268976Sjhb } else { 1092268976Sjhb segbase = desc->base; 1093268976Sjhb } 1094268976Sjhb 1095268976Sjhb /* 1096268976Sjhb * Truncate 'firstoff' to the effective address size before adding 1097268976Sjhb * it to the segment base. 1098268976Sjhb */ 1099268976Sjhb firstoff &= vie_size2mask(addrsize); 1100268976Sjhb *gla = (segbase + firstoff) & vie_size2mask(glasize); 1101268976Sjhb return (0); 1102268976Sjhb} 1103268976Sjhb 1104243640Sneel#ifdef _KERNEL 1105256072Sneelvoid 1106240941Sneelvie_init(struct vie *vie) 1107240941Sneel{ 1108240941Sneel 1109240941Sneel bzero(vie, sizeof(struct vie)); 1110240941Sneel 1111240941Sneel vie->base_register = VM_REG_LAST; 1112240941Sneel vie->index_register = VM_REG_LAST; 1113240941Sneel} 1114240941Sneel 1115240941Sneelstatic int 1116268976Sjhbpf_error_code(int usermode, int prot, int rsvd, uint64_t pte) 1117240941Sneel{ 1118268976Sjhb int error_code = 0; 1119268976Sjhb 1120268976Sjhb if (pte & PG_V) 1121268976Sjhb error_code |= PGEX_P; 1122268976Sjhb if (prot & VM_PROT_WRITE) 1123268976Sjhb error_code |= PGEX_W; 1124268976Sjhb if (usermode) 1125268976Sjhb error_code |= PGEX_U; 1126268976Sjhb if (rsvd) 1127268976Sjhb error_code |= PGEX_RSV; 1128268976Sjhb if (prot & VM_PROT_EXECUTE) 1129268976Sjhb error_code |= PGEX_I; 1130268976Sjhb 1131268976Sjhb return (error_code); 1132268976Sjhb} 1133268976Sjhb 1134268976Sjhbstatic void 1135268976Sjhbptp_release(void **cookie) 1136268976Sjhb{ 1137268976Sjhb if (*cookie != NULL) { 1138268976Sjhb vm_gpa_release(*cookie); 1139268976Sjhb *cookie = NULL; 1140268976Sjhb } 1141268976Sjhb} 1142268976Sjhb 1143268976Sjhbstatic void * 1144268976Sjhbptp_hold(struct vm *vm, vm_paddr_t ptpphys, size_t len, void **cookie) 1145268976Sjhb{ 1146268976Sjhb void *ptr; 1147268976Sjhb 1148268976Sjhb ptp_release(cookie); 1149268976Sjhb ptr = vm_gpa_hold(vm, ptpphys, len, VM_PROT_RW, cookie); 1150268976Sjhb return (ptr); 1151268976Sjhb} 1152268976Sjhb 1153268976Sjhbint 1154268976Sjhbvmm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, 1155268976Sjhb uint64_t gla, int prot, uint64_t *gpa) 1156268976Sjhb{ 1157268976Sjhb int nlevels, pfcode, ptpshift, ptpindex, retval, usermode, writable; 1158268976Sjhb u_int retries; 1159268976Sjhb uint64_t *ptpbase, ptpphys, pte, pgsize; 1160267399Sjhb uint32_t *ptpbase32, pte32; 1161256072Sneel void *cookie; 1162240941Sneel 1163268976Sjhb usermode = (paging->cpl == 3 ? 1 : 0); 1164268976Sjhb writable = prot & VM_PROT_WRITE; 1165268976Sjhb cookie = NULL; 1166268976Sjhb retval = 0; 1167268976Sjhb retries = 0; 1168268976Sjhbrestart: 1169268976Sjhb ptpphys = paging->cr3; /* root of the page tables */ 1170268976Sjhb ptp_release(&cookie); 1171268976Sjhb if (retries++ > 0) 1172268976Sjhb maybe_yield(); 1173268976Sjhb 1174268976Sjhb if (vie_canonical_check(paging->cpu_mode, gla)) { 1175268976Sjhb /* 1176268976Sjhb * XXX assuming a non-stack reference otherwise a stack fault 1177268976Sjhb * should be generated. 1178268976Sjhb */ 1179268976Sjhb vm_inject_gp(vm, vcpuid); 1180268976Sjhb goto fault; 1181268976Sjhb } 1182268976Sjhb 1183268976Sjhb if (paging->paging_mode == PAGING_MODE_FLAT) { 1184267399Sjhb *gpa = gla; 1185268976Sjhb goto done; 1186267399Sjhb } 1187267399Sjhb 1188268976Sjhb if (paging->paging_mode == PAGING_MODE_32) { 1189267399Sjhb nlevels = 2; 1190267399Sjhb while (--nlevels >= 0) { 1191267399Sjhb /* Zero out the lower 12 bits. */ 1192267399Sjhb ptpphys &= ~0xfff; 1193267399Sjhb 1194268976Sjhb ptpbase32 = ptp_hold(vm, ptpphys, PAGE_SIZE, &cookie); 1195268976Sjhb 1196267399Sjhb if (ptpbase32 == NULL) 1197267399Sjhb goto error; 1198267399Sjhb 1199267399Sjhb ptpshift = PAGE_SHIFT + nlevels * 10; 1200267399Sjhb ptpindex = (gla >> ptpshift) & 0x3FF; 1201267399Sjhb pgsize = 1UL << ptpshift; 1202267399Sjhb 1203267399Sjhb pte32 = ptpbase32[ptpindex]; 1204267399Sjhb 1205268976Sjhb if ((pte32 & PG_V) == 0 || 1206268976Sjhb (usermode && (pte32 & PG_U) == 0) || 1207268976Sjhb (writable && (pte32 & PG_RW) == 0)) { 1208268976Sjhb pfcode = pf_error_code(usermode, prot, 0, 1209268976Sjhb pte32); 1210268976Sjhb vm_inject_pf(vm, vcpuid, pfcode, gla); 1211268976Sjhb goto fault; 1212268976Sjhb } 1213267399Sjhb 1214268976Sjhb /* 1215268976Sjhb * Emulate the x86 MMU's management of the accessed 1216268976Sjhb * and dirty flags. While the accessed flag is set 1217268976Sjhb * at every level of the page table, the dirty flag 1218268976Sjhb * is only set at the last level providing the guest 1219268976Sjhb * physical address. 1220268976Sjhb */ 1221268976Sjhb if ((pte32 & PG_A) == 0) { 1222268976Sjhb if (atomic_cmpset_32(&ptpbase32[ptpindex], 1223268976Sjhb pte32, pte32 | PG_A) == 0) { 1224268976Sjhb goto restart; 1225268976Sjhb } 1226268976Sjhb } 1227267399Sjhb 1228268976Sjhb /* XXX must be ignored if CR4.PSE=0 */ 1229268976Sjhb if (nlevels > 0 && (pte32 & PG_PS) != 0) 1230267399Sjhb break; 1231267399Sjhb 1232267399Sjhb ptpphys = pte32; 1233267399Sjhb } 1234267399Sjhb 1235268976Sjhb /* Set the dirty bit in the page table entry if necessary */ 1236268976Sjhb if (writable && (pte32 & PG_M) == 0) { 1237268976Sjhb if (atomic_cmpset_32(&ptpbase32[ptpindex], 1238268976Sjhb pte32, pte32 | PG_M) == 0) { 1239268976Sjhb goto restart; 1240268976Sjhb } 1241268976Sjhb } 1242268976Sjhb 1243267399Sjhb /* Zero out the lower 'ptpshift' bits */ 1244267399Sjhb pte32 >>= ptpshift; pte32 <<= ptpshift; 1245267399Sjhb *gpa = pte32 | (gla & (pgsize - 1)); 1246268976Sjhb goto done; 1247267399Sjhb } 1248267399Sjhb 1249268976Sjhb if (paging->paging_mode == PAGING_MODE_PAE) { 1250268976Sjhb /* Zero out the lower 5 bits and the upper 32 bits */ 1251268976Sjhb ptpphys &= 0xffffffe0UL; 1252267399Sjhb 1253268976Sjhb ptpbase = ptp_hold(vm, ptpphys, sizeof(*ptpbase) * 4, &cookie); 1254267399Sjhb if (ptpbase == NULL) 1255267399Sjhb goto error; 1256267399Sjhb 1257267399Sjhb ptpindex = (gla >> 30) & 0x3; 1258267399Sjhb 1259267399Sjhb pte = ptpbase[ptpindex]; 1260267399Sjhb 1261268976Sjhb if ((pte & PG_V) == 0) { 1262268976Sjhb pfcode = pf_error_code(usermode, prot, 0, pte); 1263268976Sjhb vm_inject_pf(vm, vcpuid, pfcode, gla); 1264268976Sjhb goto fault; 1265268976Sjhb } 1266267399Sjhb 1267267399Sjhb ptpphys = pte; 1268267399Sjhb 1269267399Sjhb nlevels = 2; 1270267399Sjhb } else 1271267399Sjhb nlevels = 4; 1272240941Sneel while (--nlevels >= 0) { 1273240941Sneel /* Zero out the lower 12 bits and the upper 12 bits */ 1274240941Sneel ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12; 1275240941Sneel 1276268976Sjhb ptpbase = ptp_hold(vm, ptpphys, PAGE_SIZE, &cookie); 1277256072Sneel if (ptpbase == NULL) 1278240941Sneel goto error; 1279240941Sneel 1280240941Sneel ptpshift = PAGE_SHIFT + nlevels * 9; 1281240941Sneel ptpindex = (gla >> ptpshift) & 0x1FF; 1282240941Sneel pgsize = 1UL << ptpshift; 1283240941Sneel 1284240941Sneel pte = ptpbase[ptpindex]; 1285240941Sneel 1286268976Sjhb if ((pte & PG_V) == 0 || 1287268976Sjhb (usermode && (pte & PG_U) == 0) || 1288268976Sjhb (writable && (pte & PG_RW) == 0)) { 1289268976Sjhb pfcode = pf_error_code(usermode, prot, 0, pte); 1290268976Sjhb vm_inject_pf(vm, vcpuid, pfcode, gla); 1291268976Sjhb goto fault; 1292268976Sjhb } 1293256072Sneel 1294268976Sjhb /* Set the accessed bit in the page table entry */ 1295268976Sjhb if ((pte & PG_A) == 0) { 1296268976Sjhb if (atomic_cmpset_64(&ptpbase[ptpindex], 1297268976Sjhb pte, pte | PG_A) == 0) { 1298268976Sjhb goto restart; 1299268976Sjhb } 1300268976Sjhb } 1301240941Sneel 1302268976Sjhb if (nlevels > 0 && (pte & PG_PS) != 0) { 1303268976Sjhb if (pgsize > 1 * GB) { 1304268976Sjhb pfcode = pf_error_code(usermode, prot, 1, pte); 1305268976Sjhb vm_inject_pf(vm, vcpuid, pfcode, gla); 1306268976Sjhb goto fault; 1307268976Sjhb } 1308268976Sjhb break; 1309240941Sneel } 1310240941Sneel 1311240941Sneel ptpphys = pte; 1312240941Sneel } 1313240941Sneel 1314268976Sjhb /* Set the dirty bit in the page table entry if necessary */ 1315268976Sjhb if (writable && (pte & PG_M) == 0) { 1316268976Sjhb if (atomic_cmpset_64(&ptpbase[ptpindex], pte, pte | PG_M) == 0) 1317268976Sjhb goto restart; 1318268976Sjhb } 1319268976Sjhb 1320240941Sneel /* Zero out the lower 'ptpshift' bits and the upper 12 bits */ 1321240941Sneel pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12; 1322240941Sneel *gpa = pte | (gla & (pgsize - 1)); 1323268976Sjhbdone: 1324268976Sjhb ptp_release(&cookie); 1325268976Sjhb return (retval); 1326240941Sneelerror: 1327268976Sjhb retval = -1; 1328268976Sjhb goto done; 1329268976Sjhbfault: 1330268976Sjhb retval = 1; 1331268976Sjhb goto done; 1332240941Sneel} 1333240941Sneel 1334240978Sneelint 1335270159Sgrehanvmm_fetch_instruction(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, 1336268976Sjhb uint64_t rip, int inst_length, struct vie *vie) 1337240941Sneel{ 1338270159Sgrehan struct vm_copyinfo copyinfo[2]; 1339270159Sgrehan int error, prot; 1340240941Sneel 1341240978Sneel if (inst_length > VIE_INST_SIZE) 1342240978Sneel panic("vmm_fetch_instruction: invalid length %d", inst_length); 1343240978Sneel 1344270159Sgrehan prot = PROT_READ | PROT_EXEC; 1345270159Sgrehan error = vm_copy_setup(vm, vcpuid, paging, rip, inst_length, prot, 1346270159Sgrehan copyinfo, nitems(copyinfo)); 1347270159Sgrehan if (error == 0) { 1348270159Sgrehan vm_copyin(vm, vcpuid, copyinfo, vie->inst, inst_length); 1349270159Sgrehan vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); 1350270159Sgrehan vie->num_valid = inst_length; 1351240941Sneel } 1352270159Sgrehan return (error); 1353240941Sneel} 1354240941Sneel 1355240941Sneelstatic int 1356240941Sneelvie_peek(struct vie *vie, uint8_t *x) 1357240941Sneel{ 1358243640Sneel 1359240941Sneel if (vie->num_processed < vie->num_valid) { 1360240941Sneel *x = vie->inst[vie->num_processed]; 1361240941Sneel return (0); 1362240941Sneel } else 1363240941Sneel return (-1); 1364240941Sneel} 1365240941Sneel 1366240941Sneelstatic void 1367240941Sneelvie_advance(struct vie *vie) 1368240941Sneel{ 1369240941Sneel 1370240941Sneel vie->num_processed++; 1371240941Sneel} 1372240941Sneel 1373240941Sneelstatic int 1374270159Sgrehandecode_prefixes(struct vie *vie, enum vm_cpu_mode cpu_mode, int cs_d) 1375240941Sneel{ 1376240941Sneel uint8_t x; 1377240941Sneel 1378270159Sgrehan while (1) { 1379270159Sgrehan if (vie_peek(vie, &x)) 1380270159Sgrehan return (-1); 1381240941Sneel 1382270159Sgrehan if (x == 0x66) 1383270159Sgrehan vie->opsize_override = 1; 1384270159Sgrehan else if (x == 0x67) 1385270159Sgrehan vie->addrsize_override = 1; 1386270159Sgrehan else 1387270159Sgrehan break; 1388270159Sgrehan 1389270159Sgrehan vie_advance(vie); 1390270159Sgrehan } 1391270159Sgrehan 1392270159Sgrehan /* 1393270159Sgrehan * From section 2.2.1, "REX Prefixes", Intel SDM Vol 2: 1394270159Sgrehan * - Only one REX prefix is allowed per instruction. 1395270159Sgrehan * - The REX prefix must immediately precede the opcode byte or the 1396270159Sgrehan * escape opcode byte. 1397270159Sgrehan * - If an instruction has a mandatory prefix (0x66, 0xF2 or 0xF3) 1398270159Sgrehan * the mandatory prefix must come before the REX prefix. 1399270159Sgrehan */ 1400270159Sgrehan if (cpu_mode == CPU_MODE_64BIT && x >= 0x40 && x <= 0x4F) { 1401246108Sneel vie->rex_present = 1; 1402240941Sneel vie->rex_w = x & 0x8 ? 1 : 0; 1403240941Sneel vie->rex_r = x & 0x4 ? 1 : 0; 1404240941Sneel vie->rex_x = x & 0x2 ? 1 : 0; 1405240941Sneel vie->rex_b = x & 0x1 ? 1 : 0; 1406240941Sneel vie_advance(vie); 1407240941Sneel } 1408240941Sneel 1409270159Sgrehan /* 1410270159Sgrehan * Section "Operand-Size And Address-Size Attributes", Intel SDM, Vol 1 1411270159Sgrehan */ 1412270159Sgrehan if (cpu_mode == CPU_MODE_64BIT) { 1413270159Sgrehan /* 1414270159Sgrehan * Default address size is 64-bits and default operand size 1415270159Sgrehan * is 32-bits. 1416270159Sgrehan */ 1417270159Sgrehan vie->addrsize = vie->addrsize_override ? 4 : 8; 1418270159Sgrehan if (vie->rex_w) 1419270159Sgrehan vie->opsize = 8; 1420270159Sgrehan else if (vie->opsize_override) 1421270159Sgrehan vie->opsize = 2; 1422270159Sgrehan else 1423270159Sgrehan vie->opsize = 4; 1424270159Sgrehan } else if (cs_d) { 1425270159Sgrehan /* Default address and operand sizes are 32-bits */ 1426270159Sgrehan vie->addrsize = vie->addrsize_override ? 2 : 4; 1427270159Sgrehan vie->opsize = vie->opsize_override ? 2 : 4; 1428270159Sgrehan } else { 1429270159Sgrehan /* Default address and operand sizes are 16-bits */ 1430270159Sgrehan vie->addrsize = vie->addrsize_override ? 4 : 2; 1431270159Sgrehan vie->opsize = vie->opsize_override ? 4 : 2; 1432270159Sgrehan } 1433240941Sneel return (0); 1434240941Sneel} 1435240941Sneel 1436240941Sneelstatic int 1437267396Sjhbdecode_two_byte_opcode(struct vie *vie) 1438267396Sjhb{ 1439267396Sjhb uint8_t x; 1440267396Sjhb 1441267396Sjhb if (vie_peek(vie, &x)) 1442267396Sjhb return (-1); 1443267396Sjhb 1444267396Sjhb vie->op = two_byte_opcodes[x]; 1445267396Sjhb 1446267396Sjhb if (vie->op.op_type == VIE_OP_TYPE_NONE) 1447267396Sjhb return (-1); 1448267396Sjhb 1449267396Sjhb vie_advance(vie); 1450267396Sjhb return (0); 1451267396Sjhb} 1452267396Sjhb 1453267396Sjhbstatic int 1454240941Sneeldecode_opcode(struct vie *vie) 1455240941Sneel{ 1456240941Sneel uint8_t x; 1457240941Sneel 1458240941Sneel if (vie_peek(vie, &x)) 1459240941Sneel return (-1); 1460240941Sneel 1461243640Sneel vie->op = one_byte_opcodes[x]; 1462240941Sneel 1463243640Sneel if (vie->op.op_type == VIE_OP_TYPE_NONE) 1464243640Sneel return (-1); 1465243640Sneel 1466240941Sneel vie_advance(vie); 1467267396Sjhb 1468267396Sjhb if (vie->op.op_type == VIE_OP_TYPE_TWO_BYTE) 1469267396Sjhb return (decode_two_byte_opcode(vie)); 1470267396Sjhb 1471243640Sneel return (0); 1472240941Sneel} 1473240941Sneel 1474240941Sneelstatic int 1475268976Sjhbdecode_modrm(struct vie *vie, enum vm_cpu_mode cpu_mode) 1476240941Sneel{ 1477240941Sneel uint8_t x; 1478240941Sneel 1479270159Sgrehan if (cpu_mode == CPU_MODE_REAL) 1480270159Sgrehan return (-1); 1481270159Sgrehan 1482270159Sgrehan if (vie->op.op_flags & VIE_OP_F_NO_MODRM) 1483270159Sgrehan return (0); 1484270159Sgrehan 1485240941Sneel if (vie_peek(vie, &x)) 1486240941Sneel return (-1); 1487240941Sneel 1488240941Sneel vie->mod = (x >> 6) & 0x3; 1489240941Sneel vie->rm = (x >> 0) & 0x7; 1490240941Sneel vie->reg = (x >> 3) & 0x7; 1491240941Sneel 1492243640Sneel /* 1493243640Sneel * A direct addressing mode makes no sense in the context of an EPT 1494243640Sneel * fault. There has to be a memory access involved to cause the 1495243640Sneel * EPT fault. 1496243640Sneel */ 1497243640Sneel if (vie->mod == VIE_MOD_DIRECT) 1498243640Sneel return (-1); 1499243640Sneel 1500240941Sneel if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) || 1501240941Sneel (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) { 1502243640Sneel /* 1503243640Sneel * Table 2-5: Special Cases of REX Encodings 1504243640Sneel * 1505243640Sneel * mod=0, r/m=5 is used in the compatibility mode to 1506243640Sneel * indicate a disp32 without a base register. 1507243640Sneel * 1508243640Sneel * mod!=3, r/m=4 is used in the compatibility mode to 1509243640Sneel * indicate that the SIB byte is present. 1510243640Sneel * 1511243640Sneel * The 'b' bit in the REX prefix is don't care in 1512243640Sneel * this case. 1513243640Sneel */ 1514240941Sneel } else { 1515240941Sneel vie->rm |= (vie->rex_b << 3); 1516240941Sneel } 1517240941Sneel 1518240941Sneel vie->reg |= (vie->rex_r << 3); 1519240941Sneel 1520243640Sneel /* SIB */ 1521240941Sneel if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB) 1522243640Sneel goto done; 1523240941Sneel 1524240941Sneel vie->base_register = gpr_map[vie->rm]; 1525240941Sneel 1526240941Sneel switch (vie->mod) { 1527240941Sneel case VIE_MOD_INDIRECT_DISP8: 1528240941Sneel vie->disp_bytes = 1; 1529240941Sneel break; 1530240941Sneel case VIE_MOD_INDIRECT_DISP32: 1531240941Sneel vie->disp_bytes = 4; 1532240941Sneel break; 1533240941Sneel case VIE_MOD_INDIRECT: 1534240941Sneel if (vie->rm == VIE_RM_DISP32) { 1535240941Sneel vie->disp_bytes = 4; 1536249879Sgrehan /* 1537249879Sgrehan * Table 2-7. RIP-Relative Addressing 1538249879Sgrehan * 1539249879Sgrehan * In 64-bit mode mod=00 r/m=101 implies [rip] + disp32 1540249879Sgrehan * whereas in compatibility mode it just implies disp32. 1541249879Sgrehan */ 1542249879Sgrehan 1543249879Sgrehan if (cpu_mode == CPU_MODE_64BIT) 1544249879Sgrehan vie->base_register = VM_REG_GUEST_RIP; 1545249879Sgrehan else 1546249879Sgrehan vie->base_register = VM_REG_LAST; 1547240941Sneel } 1548240941Sneel break; 1549240941Sneel } 1550240941Sneel 1551243640Sneeldone: 1552240941Sneel vie_advance(vie); 1553240941Sneel 1554240941Sneel return (0); 1555240941Sneel} 1556240941Sneel 1557240941Sneelstatic int 1558243640Sneeldecode_sib(struct vie *vie) 1559243640Sneel{ 1560243640Sneel uint8_t x; 1561243640Sneel 1562243640Sneel /* Proceed only if SIB byte is present */ 1563243640Sneel if (vie->mod == VIE_MOD_DIRECT || vie->rm != VIE_RM_SIB) 1564243640Sneel return (0); 1565243640Sneel 1566243640Sneel if (vie_peek(vie, &x)) 1567243640Sneel return (-1); 1568243640Sneel 1569243640Sneel /* De-construct the SIB byte */ 1570243640Sneel vie->ss = (x >> 6) & 0x3; 1571243640Sneel vie->index = (x >> 3) & 0x7; 1572243640Sneel vie->base = (x >> 0) & 0x7; 1573243640Sneel 1574243640Sneel /* Apply the REX prefix modifiers */ 1575243640Sneel vie->index |= vie->rex_x << 3; 1576243640Sneel vie->base |= vie->rex_b << 3; 1577243640Sneel 1578243640Sneel switch (vie->mod) { 1579243640Sneel case VIE_MOD_INDIRECT_DISP8: 1580243640Sneel vie->disp_bytes = 1; 1581243640Sneel break; 1582243640Sneel case VIE_MOD_INDIRECT_DISP32: 1583243640Sneel vie->disp_bytes = 4; 1584243640Sneel break; 1585243640Sneel } 1586243640Sneel 1587243640Sneel if (vie->mod == VIE_MOD_INDIRECT && 1588243640Sneel (vie->base == 5 || vie->base == 13)) { 1589243640Sneel /* 1590243640Sneel * Special case when base register is unused if mod = 0 1591243640Sneel * and base = %rbp or %r13. 1592243640Sneel * 1593243640Sneel * Documented in: 1594243640Sneel * Table 2-3: 32-bit Addressing Forms with the SIB Byte 1595243640Sneel * Table 2-5: Special Cases of REX Encodings 1596243640Sneel */ 1597243640Sneel vie->disp_bytes = 4; 1598243640Sneel } else { 1599243640Sneel vie->base_register = gpr_map[vie->base]; 1600243640Sneel } 1601243640Sneel 1602243640Sneel /* 1603243640Sneel * All encodings of 'index' are valid except for %rsp (4). 1604243640Sneel * 1605243640Sneel * Documented in: 1606243640Sneel * Table 2-3: 32-bit Addressing Forms with the SIB Byte 1607243640Sneel * Table 2-5: Special Cases of REX Encodings 1608243640Sneel */ 1609243640Sneel if (vie->index != 4) 1610243640Sneel vie->index_register = gpr_map[vie->index]; 1611243640Sneel 1612243640Sneel /* 'scale' makes sense only in the context of an index register */ 1613243640Sneel if (vie->index_register < VM_REG_LAST) 1614243640Sneel vie->scale = 1 << vie->ss; 1615243640Sneel 1616243640Sneel vie_advance(vie); 1617243640Sneel 1618243640Sneel return (0); 1619243640Sneel} 1620243640Sneel 1621243640Sneelstatic int 1622240941Sneeldecode_displacement(struct vie *vie) 1623240941Sneel{ 1624240941Sneel int n, i; 1625240941Sneel uint8_t x; 1626240941Sneel 1627240941Sneel union { 1628240941Sneel char buf[4]; 1629240941Sneel int8_t signed8; 1630240941Sneel int32_t signed32; 1631240941Sneel } u; 1632240941Sneel 1633240941Sneel if ((n = vie->disp_bytes) == 0) 1634240941Sneel return (0); 1635240941Sneel 1636240941Sneel if (n != 1 && n != 4) 1637240941Sneel panic("decode_displacement: invalid disp_bytes %d", n); 1638240941Sneel 1639240941Sneel for (i = 0; i < n; i++) { 1640240941Sneel if (vie_peek(vie, &x)) 1641240941Sneel return (-1); 1642240941Sneel 1643240941Sneel u.buf[i] = x; 1644240941Sneel vie_advance(vie); 1645240941Sneel } 1646240941Sneel 1647240941Sneel if (n == 1) 1648240941Sneel vie->displacement = u.signed8; /* sign-extended */ 1649240941Sneel else 1650240941Sneel vie->displacement = u.signed32; /* sign-extended */ 1651240941Sneel 1652240941Sneel return (0); 1653240941Sneel} 1654240941Sneel 1655240941Sneelstatic int 1656240941Sneeldecode_immediate(struct vie *vie) 1657240941Sneel{ 1658240941Sneel int i, n; 1659240941Sneel uint8_t x; 1660240941Sneel union { 1661240941Sneel char buf[4]; 1662243640Sneel int8_t signed8; 1663270159Sgrehan int16_t signed16; 1664240941Sneel int32_t signed32; 1665240941Sneel } u; 1666240941Sneel 1667255638Sneel /* Figure out immediate operand size (if any) */ 1668270159Sgrehan if (vie->op.op_flags & VIE_OP_F_IMM) { 1669270159Sgrehan /* 1670270159Sgrehan * Section 2.2.1.5 "Immediates", Intel SDM: 1671270159Sgrehan * In 64-bit mode the typical size of immediate operands 1672270159Sgrehan * remains 32-bits. When the operand size if 64-bits, the 1673270159Sgrehan * processor sign-extends all immediates to 64-bits prior 1674270159Sgrehan * to their use. 1675270159Sgrehan */ 1676270159Sgrehan if (vie->opsize == 4 || vie->opsize == 8) 1677270159Sgrehan vie->imm_bytes = 4; 1678270159Sgrehan else 1679270159Sgrehan vie->imm_bytes = 2; 1680270159Sgrehan } else if (vie->op.op_flags & VIE_OP_F_IMM8) { 1681255638Sneel vie->imm_bytes = 1; 1682270159Sgrehan } 1683255638Sneel 1684240941Sneel if ((n = vie->imm_bytes) == 0) 1685240941Sneel return (0); 1686240941Sneel 1687270159Sgrehan KASSERT(n == 1 || n == 2 || n == 4, 1688270159Sgrehan ("%s: invalid number of immediate bytes: %d", __func__, n)); 1689240941Sneel 1690240941Sneel for (i = 0; i < n; i++) { 1691240941Sneel if (vie_peek(vie, &x)) 1692240941Sneel return (-1); 1693240941Sneel 1694240941Sneel u.buf[i] = x; 1695240941Sneel vie_advance(vie); 1696240941Sneel } 1697270159Sgrehan 1698270159Sgrehan /* sign-extend the immediate value before use */ 1699243640Sneel if (n == 1) 1700270159Sgrehan vie->immediate = u.signed8; 1701270159Sgrehan else if (n == 2) 1702270159Sgrehan vie->immediate = u.signed16; 1703243640Sneel else 1704270159Sgrehan vie->immediate = u.signed32; 1705240941Sneel 1706240941Sneel return (0); 1707240941Sneel} 1708240941Sneel 1709270159Sgrehanstatic int 1710270159Sgrehandecode_moffset(struct vie *vie) 1711270159Sgrehan{ 1712270159Sgrehan int i, n; 1713270159Sgrehan uint8_t x; 1714270159Sgrehan union { 1715270159Sgrehan char buf[8]; 1716270159Sgrehan uint64_t u64; 1717270159Sgrehan } u; 1718270159Sgrehan 1719270159Sgrehan if ((vie->op.op_flags & VIE_OP_F_MOFFSET) == 0) 1720270159Sgrehan return (0); 1721270159Sgrehan 1722270159Sgrehan /* 1723270159Sgrehan * Section 2.2.1.4, "Direct Memory-Offset MOVs", Intel SDM: 1724270159Sgrehan * The memory offset size follows the address-size of the instruction. 1725270159Sgrehan */ 1726270159Sgrehan n = vie->addrsize; 1727270159Sgrehan KASSERT(n == 2 || n == 4 || n == 8, ("invalid moffset bytes: %d", n)); 1728270159Sgrehan 1729270159Sgrehan u.u64 = 0; 1730270159Sgrehan for (i = 0; i < n; i++) { 1731270159Sgrehan if (vie_peek(vie, &x)) 1732270159Sgrehan return (-1); 1733270159Sgrehan 1734270159Sgrehan u.buf[i] = x; 1735270159Sgrehan vie_advance(vie); 1736270159Sgrehan } 1737270159Sgrehan vie->displacement = u.u64; 1738270159Sgrehan return (0); 1739270159Sgrehan} 1740270159Sgrehan 1741243640Sneel/* 1742252641Sneel * Verify that all the bytes in the instruction buffer were consumed. 1743252641Sneel */ 1744252641Sneelstatic int 1745252641Sneelverify_inst_length(struct vie *vie) 1746252641Sneel{ 1747252641Sneel 1748252641Sneel if (vie->num_processed == vie->num_valid) 1749252641Sneel return (0); 1750252641Sneel else 1751252641Sneel return (-1); 1752252641Sneel} 1753252641Sneel 1754252641Sneel/* 1755243640Sneel * Verify that the 'guest linear address' provided as collateral of the nested 1756243640Sneel * page table fault matches with our instruction decoding. 1757243640Sneel */ 1758243640Sneelstatic int 1759243640Sneelverify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie) 1760243640Sneel{ 1761243640Sneel int error; 1762270159Sgrehan uint64_t base, idx, gla2; 1763243640Sneel 1764248855Sneel /* Skip 'gla' verification */ 1765248855Sneel if (gla == VIE_INVALID_GLA) 1766248855Sneel return (0); 1767248855Sneel 1768243640Sneel base = 0; 1769243640Sneel if (vie->base_register != VM_REG_LAST) { 1770243640Sneel error = vm_get_register(vm, cpuid, vie->base_register, &base); 1771243640Sneel if (error) { 1772243640Sneel printf("verify_gla: error %d getting base reg %d\n", 1773243640Sneel error, vie->base_register); 1774243640Sneel return (-1); 1775243640Sneel } 1776249879Sgrehan 1777249879Sgrehan /* 1778249879Sgrehan * RIP-relative addressing starts from the following 1779249879Sgrehan * instruction 1780249879Sgrehan */ 1781249879Sgrehan if (vie->base_register == VM_REG_GUEST_RIP) 1782249879Sgrehan base += vie->num_valid; 1783243640Sneel } 1784243640Sneel 1785243640Sneel idx = 0; 1786243640Sneel if (vie->index_register != VM_REG_LAST) { 1787243640Sneel error = vm_get_register(vm, cpuid, vie->index_register, &idx); 1788243640Sneel if (error) { 1789243640Sneel printf("verify_gla: error %d getting index reg %d\n", 1790243640Sneel error, vie->index_register); 1791243640Sneel return (-1); 1792243640Sneel } 1793243640Sneel } 1794243640Sneel 1795270159Sgrehan /* XXX assuming that the base address of the segment is 0 */ 1796270159Sgrehan gla2 = base + vie->scale * idx + vie->displacement; 1797270159Sgrehan gla2 &= size2mask[vie->addrsize]; 1798270159Sgrehan if (gla != gla2) { 1799243640Sneel printf("verify_gla mismatch: " 1800243640Sneel "base(0x%0lx), scale(%d), index(0x%0lx), " 1801270159Sgrehan "disp(0x%0lx), gla(0x%0lx), gla2(0x%0lx)\n", 1802270159Sgrehan base, vie->scale, idx, vie->displacement, gla, gla2); 1803243640Sneel return (-1); 1804243640Sneel } 1805243640Sneel 1806243640Sneel return (0); 1807243640Sneel} 1808243640Sneel 1809240941Sneelint 1810267399Sjhbvmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, 1811270159Sgrehan enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie) 1812240941Sneel{ 1813243640Sneel 1814270159Sgrehan if (decode_prefixes(vie, cpu_mode, cs_d)) 1815270159Sgrehan return (-1); 1816240941Sneel 1817240941Sneel if (decode_opcode(vie)) 1818240941Sneel return (-1); 1819240941Sneel 1820267399Sjhb if (decode_modrm(vie, cpu_mode)) 1821240941Sneel return (-1); 1822240941Sneel 1823243640Sneel if (decode_sib(vie)) 1824243640Sneel return (-1); 1825243640Sneel 1826240941Sneel if (decode_displacement(vie)) 1827240941Sneel return (-1); 1828270159Sgrehan 1829240941Sneel if (decode_immediate(vie)) 1830240941Sneel return (-1); 1831240941Sneel 1832270159Sgrehan if (decode_moffset(vie)) 1833270159Sgrehan return (-1); 1834270159Sgrehan 1835252641Sneel if (verify_inst_length(vie)) 1836252641Sneel return (-1); 1837252641Sneel 1838243640Sneel if (verify_gla(vm, cpuid, gla, vie)) 1839243640Sneel return (-1); 1840243640Sneel 1841243640Sneel vie->decoded = 1; /* success */ 1842243640Sneel 1843240941Sneel return (0); 1844240941Sneel} 1845243640Sneel#endif /* _KERNEL */ 1846