1240941Sneel/*- 2240941Sneel * Copyright (c) 2012 Sandvine, Inc. 3240941Sneel * Copyright (c) 2012 NetApp, Inc. 4240941Sneel * All rights reserved. 5240941Sneel * 6240941Sneel * Redistribution and use in source and binary forms, with or without 7240941Sneel * modification, are permitted provided that the following conditions 8240941Sneel * are met: 9240941Sneel * 1. Redistributions of source code must retain the above copyright 10240941Sneel * notice, this list of conditions and the following disclaimer. 11240941Sneel * 2. Redistributions in binary form must reproduce the above copyright 12240941Sneel * notice, this list of conditions and the following disclaimer in the 13240941Sneel * documentation and/or other materials provided with the distribution. 14240941Sneel * 15250175Semaste * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16240941Sneel * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17240941Sneel * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18250175Semaste * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19240941Sneel * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20240941Sneel * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21240941Sneel * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22240941Sneel * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23240941Sneel * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24240941Sneel * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25240941Sneel * SUCH DAMAGE. 26240941Sneel * 27240941Sneel * $FreeBSD$ 28240941Sneel */ 29240941Sneel 30240941Sneel#include <sys/cdefs.h> 31240941Sneel__FBSDID("$FreeBSD$"); 32240941Sneel 33243640Sneel#ifdef _KERNEL 34240941Sneel#include <sys/param.h> 35240941Sneel#include <sys/pcpu.h> 36240941Sneel#include <sys/systm.h> 37268976Sjhb#include <sys/proc.h> 38240941Sneel 39240941Sneel#include <vm/vm.h> 40240941Sneel#include <vm/pmap.h> 41240941Sneel 42240941Sneel#include <machine/vmparam.h> 43240941Sneel#include <machine/vmm.h> 44243640Sneel#else /* !_KERNEL */ 45243640Sneel#include <sys/types.h> 46243640Sneel#include <sys/errno.h> 47270159Sgrehan#include <sys/_iovec.h> 48240941Sneel 49243640Sneel#include <machine/vmm.h> 50240941Sneel 51268976Sjhb#include <assert.h> 52243640Sneel#include <vmmapi.h> 53268976Sjhb#define KASSERT(exp,msg) assert((exp)) 54243640Sneel#endif /* _KERNEL */ 55240941Sneel 56268976Sjhb#include <machine/vmm_instruction_emul.h> 57268976Sjhb#include <x86/psl.h> 58268976Sjhb#include <x86/specialreg.h> 59268976Sjhb 60243640Sneel/* struct vie_op.op_type */ 61243640Sneelenum { 62243640Sneel VIE_OP_TYPE_NONE = 0, 63243640Sneel VIE_OP_TYPE_MOV, 64267396Sjhb VIE_OP_TYPE_MOVSX, 65267396Sjhb VIE_OP_TYPE_MOVZX, 66243640Sneel VIE_OP_TYPE_AND, 67253585Sneel VIE_OP_TYPE_OR, 68271659Sgrehan VIE_OP_TYPE_SUB, 69267396Sjhb VIE_OP_TYPE_TWO_BYTE, 70270159Sgrehan VIE_OP_TYPE_PUSH, 71270159Sgrehan VIE_OP_TYPE_CMP, 72276349Sneel VIE_OP_TYPE_POP, 73284894Sneel VIE_OP_TYPE_MOVS, 74284899Sneel VIE_OP_TYPE_GROUP1, 75284899Sneel VIE_OP_TYPE_STOS, 76284900Sneel VIE_OP_TYPE_BITTEST, 77243640Sneel VIE_OP_TYPE_LAST 78243640Sneel}; 79243640Sneel 80243640Sneel/* struct vie_op.op_flags */ 81270159Sgrehan#define VIE_OP_F_IMM (1 << 0) /* 16/32-bit immediate operand */ 82270159Sgrehan#define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ 83270159Sgrehan#define VIE_OP_F_MOFFSET (1 << 2) /* 16/32/64-bit immediate moffset */ 84270159Sgrehan#define VIE_OP_F_NO_MODRM (1 << 3) 85284894Sneel#define VIE_OP_F_NO_GLA_VERIFICATION (1 << 4) 86243640Sneel 87267396Sjhbstatic const struct vie_op two_byte_opcodes[256] = { 88267396Sjhb [0xB6] = { 89267396Sjhb .op_byte = 0xB6, 90267396Sjhb .op_type = VIE_OP_TYPE_MOVZX, 91267396Sjhb }, 92270159Sgrehan [0xB7] = { 93270159Sgrehan .op_byte = 0xB7, 94270159Sgrehan .op_type = VIE_OP_TYPE_MOVZX, 95270159Sgrehan }, 96284900Sneel [0xBA] = { 97284900Sneel .op_byte = 0xBA, 98284900Sneel .op_type = VIE_OP_TYPE_BITTEST, 99284900Sneel .op_flags = VIE_OP_F_IMM8, 100284900Sneel }, 101267396Sjhb [0xBE] = { 102267396Sjhb .op_byte = 0xBE, 103267396Sjhb .op_type = VIE_OP_TYPE_MOVSX, 104267396Sjhb }, 105267396Sjhb}; 106267396Sjhb 107243640Sneelstatic const struct vie_op one_byte_opcodes[256] = { 108267396Sjhb [0x0F] = { 109267396Sjhb .op_byte = 0x0F, 110267396Sjhb .op_type = VIE_OP_TYPE_TWO_BYTE 111267396Sjhb }, 112271659Sgrehan [0x2B] = { 113271659Sgrehan .op_byte = 0x2B, 114271659Sgrehan .op_type = VIE_OP_TYPE_SUB, 115271659Sgrehan }, 116284900Sneel [0x39] = { 117284900Sneel .op_byte = 0x39, 118284900Sneel .op_type = VIE_OP_TYPE_CMP, 119284900Sneel }, 120270159Sgrehan [0x3B] = { 121270159Sgrehan .op_byte = 0x3B, 122270159Sgrehan .op_type = VIE_OP_TYPE_CMP, 123270159Sgrehan }, 124246108Sneel [0x88] = { 125246108Sneel .op_byte = 0x88, 126246108Sneel .op_type = VIE_OP_TYPE_MOV, 127246108Sneel }, 128243640Sneel [0x89] = { 129243640Sneel .op_byte = 0x89, 130243640Sneel .op_type = VIE_OP_TYPE_MOV, 131243640Sneel }, 132254964Sneel [0x8A] = { 133254964Sneel .op_byte = 0x8A, 134254964Sneel .op_type = VIE_OP_TYPE_MOV, 135254964Sneel }, 136243640Sneel [0x8B] = { 137243640Sneel .op_byte = 0x8B, 138243640Sneel .op_type = VIE_OP_TYPE_MOV, 139243640Sneel }, 140270159Sgrehan [0xA1] = { 141270159Sgrehan .op_byte = 0xA1, 142270159Sgrehan .op_type = VIE_OP_TYPE_MOV, 143270159Sgrehan .op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM, 144270159Sgrehan }, 145270159Sgrehan [0xA3] = { 146270159Sgrehan .op_byte = 0xA3, 147270159Sgrehan .op_type = VIE_OP_TYPE_MOV, 148270159Sgrehan .op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM, 149270159Sgrehan }, 150284894Sneel [0xA4] = { 151284894Sneel .op_byte = 0xA4, 152284894Sneel .op_type = VIE_OP_TYPE_MOVS, 153284894Sneel .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION 154284894Sneel }, 155284894Sneel [0xA5] = { 156284894Sneel .op_byte = 0xA5, 157284894Sneel .op_type = VIE_OP_TYPE_MOVS, 158284894Sneel .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION 159284894Sneel }, 160284899Sneel [0xAA] = { 161284899Sneel .op_byte = 0xAA, 162284899Sneel .op_type = VIE_OP_TYPE_STOS, 163284899Sneel .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION 164284899Sneel }, 165284899Sneel [0xAB] = { 166284899Sneel .op_byte = 0xAB, 167284899Sneel .op_type = VIE_OP_TYPE_STOS, 168284899Sneel .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION 169284899Sneel }, 170270159Sgrehan [0xC6] = { 171270159Sgrehan /* XXX Group 11 extended opcode - not just MOV */ 172270159Sgrehan .op_byte = 0xC6, 173270159Sgrehan .op_type = VIE_OP_TYPE_MOV, 174270159Sgrehan .op_flags = VIE_OP_F_IMM8, 175270159Sgrehan }, 176243640Sneel [0xC7] = { 177243640Sneel .op_byte = 0xC7, 178243640Sneel .op_type = VIE_OP_TYPE_MOV, 179243640Sneel .op_flags = VIE_OP_F_IMM, 180243640Sneel }, 181243640Sneel [0x23] = { 182243640Sneel .op_byte = 0x23, 183243640Sneel .op_type = VIE_OP_TYPE_AND, 184243667Sgrehan }, 185284900Sneel [0x80] = { 186284900Sneel /* Group 1 extended opcode */ 187284900Sneel .op_byte = 0x80, 188284900Sneel .op_type = VIE_OP_TYPE_GROUP1, 189284900Sneel .op_flags = VIE_OP_F_IMM8, 190284900Sneel }, 191243667Sgrehan [0x81] = { 192284900Sneel /* Group 1 extended opcode */ 193243667Sgrehan .op_byte = 0x81, 194284899Sneel .op_type = VIE_OP_TYPE_GROUP1, 195243667Sgrehan .op_flags = VIE_OP_F_IMM, 196253585Sneel }, 197253585Sneel [0x83] = { 198284900Sneel /* Group 1 extended opcode */ 199253585Sneel .op_byte = 0x83, 200284899Sneel .op_type = VIE_OP_TYPE_GROUP1, 201253585Sneel .op_flags = VIE_OP_F_IMM8, 202253585Sneel }, 203276349Sneel [0x8F] = { 204276349Sneel /* XXX Group 1A extended opcode - not just POP */ 205276349Sneel .op_byte = 0x8F, 206276349Sneel .op_type = VIE_OP_TYPE_POP, 207276349Sneel }, 208270159Sgrehan [0xFF] = { 209270159Sgrehan /* XXX Group 5 extended opcode - not just PUSH */ 210270159Sgrehan .op_byte = 0xFF, 211270159Sgrehan .op_type = VIE_OP_TYPE_PUSH, 212270159Sgrehan } 213243640Sneel}; 214243640Sneel 215243640Sneel/* struct vie.mod */ 216243640Sneel#define VIE_MOD_INDIRECT 0 217243640Sneel#define VIE_MOD_INDIRECT_DISP8 1 218243640Sneel#define VIE_MOD_INDIRECT_DISP32 2 219243640Sneel#define VIE_MOD_DIRECT 3 220243640Sneel 221243640Sneel/* struct vie.rm */ 222243640Sneel#define VIE_RM_SIB 4 223243640Sneel#define VIE_RM_DISP32 5 224243640Sneel 225243640Sneel#define GB (1024 * 1024 * 1024) 226243640Sneel 227240941Sneelstatic enum vm_reg_name gpr_map[16] = { 228240941Sneel VM_REG_GUEST_RAX, 229240941Sneel VM_REG_GUEST_RCX, 230240941Sneel VM_REG_GUEST_RDX, 231240941Sneel VM_REG_GUEST_RBX, 232240941Sneel VM_REG_GUEST_RSP, 233240941Sneel VM_REG_GUEST_RBP, 234240941Sneel VM_REG_GUEST_RSI, 235240941Sneel VM_REG_GUEST_RDI, 236240941Sneel VM_REG_GUEST_R8, 237240941Sneel VM_REG_GUEST_R9, 238240941Sneel VM_REG_GUEST_R10, 239240941Sneel VM_REG_GUEST_R11, 240240941Sneel VM_REG_GUEST_R12, 241240941Sneel VM_REG_GUEST_R13, 242240941Sneel VM_REG_GUEST_R14, 243240941Sneel VM_REG_GUEST_R15 244240941Sneel}; 245240941Sneel 246243640Sneelstatic uint64_t size2mask[] = { 247243640Sneel [1] = 0xff, 248243640Sneel [2] = 0xffff, 249243640Sneel [4] = 0xffffffff, 250243640Sneel [8] = 0xffffffffffffffff, 251243640Sneel}; 252243640Sneel 253243640Sneelstatic int 254243640Sneelvie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval) 255243640Sneel{ 256243640Sneel int error; 257243640Sneel 258243640Sneel error = vm_get_register(vm, vcpuid, reg, rval); 259243640Sneel 260243640Sneel return (error); 261243640Sneel} 262243640Sneel 263270159Sgrehanstatic void 264270159Sgrehanvie_calc_bytereg(struct vie *vie, enum vm_reg_name *reg, int *lhbr) 265246108Sneel{ 266270159Sgrehan *lhbr = 0; 267270159Sgrehan *reg = gpr_map[vie->reg]; 268246108Sneel 269246108Sneel /* 270270159Sgrehan * 64-bit mode imposes limitations on accessing legacy high byte 271270159Sgrehan * registers (lhbr). 272246108Sneel * 273246108Sneel * The legacy high-byte registers cannot be addressed if the REX 274246108Sneel * prefix is present. In this case the values 4, 5, 6 and 7 of the 275246108Sneel * 'ModRM:reg' field address %spl, %bpl, %sil and %dil respectively. 276246108Sneel * 277246108Sneel * If the REX prefix is not present then the values 4, 5, 6 and 7 278246108Sneel * of the 'ModRM:reg' field address the legacy high-byte registers, 279246108Sneel * %ah, %ch, %dh and %bh respectively. 280246108Sneel */ 281246108Sneel if (!vie->rex_present) { 282246108Sneel if (vie->reg & 0x4) { 283270159Sgrehan *lhbr = 1; 284270159Sgrehan *reg = gpr_map[vie->reg & 0x3]; 285246108Sneel } 286246108Sneel } 287270159Sgrehan} 288246108Sneel 289270159Sgrehanstatic int 290270159Sgrehanvie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval) 291270159Sgrehan{ 292270159Sgrehan uint64_t val; 293270159Sgrehan int error, lhbr; 294270159Sgrehan enum vm_reg_name reg; 295270159Sgrehan 296270159Sgrehan vie_calc_bytereg(vie, ®, &lhbr); 297246108Sneel error = vm_get_register(vm, vcpuid, reg, &val); 298270159Sgrehan 299270159Sgrehan /* 300270159Sgrehan * To obtain the value of a legacy high byte register shift the 301270159Sgrehan * base register right by 8 bits (%ah = %rax >> 8). 302270159Sgrehan */ 303270159Sgrehan if (lhbr) 304270159Sgrehan *rval = val >> 8; 305270159Sgrehan else 306270159Sgrehan *rval = val; 307246108Sneel return (error); 308246108Sneel} 309246108Sneel 310270159Sgrehanstatic int 311270159Sgrehanvie_write_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t byte) 312270159Sgrehan{ 313270159Sgrehan uint64_t origval, val, mask; 314270159Sgrehan int error, lhbr; 315270159Sgrehan enum vm_reg_name reg; 316270159Sgrehan 317270159Sgrehan vie_calc_bytereg(vie, ®, &lhbr); 318270159Sgrehan error = vm_get_register(vm, vcpuid, reg, &origval); 319270159Sgrehan if (error == 0) { 320270159Sgrehan val = byte; 321270159Sgrehan mask = 0xff; 322270159Sgrehan if (lhbr) { 323270159Sgrehan /* 324270159Sgrehan * Shift left by 8 to store 'byte' in a legacy high 325270159Sgrehan * byte register. 326270159Sgrehan */ 327270159Sgrehan val <<= 8; 328270159Sgrehan mask <<= 8; 329270159Sgrehan } 330270159Sgrehan val |= origval & ~mask; 331270159Sgrehan error = vm_set_register(vm, vcpuid, reg, val); 332270159Sgrehan } 333270159Sgrehan return (error); 334270159Sgrehan} 335270159Sgrehan 336268976Sjhbint 337243640Sneelvie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, 338243640Sneel uint64_t val, int size) 339243640Sneel{ 340243640Sneel int error; 341243640Sneel uint64_t origval; 342243640Sneel 343243640Sneel switch (size) { 344243640Sneel case 1: 345243640Sneel case 2: 346243640Sneel error = vie_read_register(vm, vcpuid, reg, &origval); 347243640Sneel if (error) 348243640Sneel return (error); 349243640Sneel val &= size2mask[size]; 350243640Sneel val |= origval & ~size2mask[size]; 351243640Sneel break; 352243640Sneel case 4: 353243640Sneel val &= 0xffffffffUL; 354243640Sneel break; 355243640Sneel case 8: 356243640Sneel break; 357243640Sneel default: 358243640Sneel return (EINVAL); 359243640Sneel } 360243640Sneel 361243640Sneel error = vm_set_register(vm, vcpuid, reg, val); 362243640Sneel return (error); 363243640Sneel} 364243640Sneel 365276349Sneel#define RFLAGS_STATUS_BITS (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V) 366276349Sneel 367243640Sneel/* 368270159Sgrehan * Return the status flags that would result from doing (x - y). 369243640Sneel */ 370276349Sneel#define GETCC(sz) \ 371276349Sneelstatic u_long \ 372276349Sneelgetcc##sz(uint##sz##_t x, uint##sz##_t y) \ 373276349Sneel{ \ 374276349Sneel u_long rflags; \ 375276349Sneel \ 376276349Sneel __asm __volatile("sub %2,%1; pushfq; popq %0" : \ 377276349Sneel "=r" (rflags), "+r" (x) : "m" (y)); \ 378276349Sneel return (rflags); \ 379276349Sneel} struct __hack 380270159Sgrehan 381276349SneelGETCC(8); 382276349SneelGETCC(16); 383276349SneelGETCC(32); 384276349SneelGETCC(64); 385270159Sgrehan 386270159Sgrehanstatic u_long 387270159Sgrehangetcc(int opsize, uint64_t x, uint64_t y) 388270159Sgrehan{ 389276349Sneel KASSERT(opsize == 1 || opsize == 2 || opsize == 4 || opsize == 8, 390270159Sgrehan ("getcc: invalid operand size %d", opsize)); 391270159Sgrehan 392276349Sneel if (opsize == 1) 393276349Sneel return (getcc8(x, y)); 394276349Sneel else if (opsize == 2) 395270159Sgrehan return (getcc16(x, y)); 396270159Sgrehan else if (opsize == 4) 397270159Sgrehan return (getcc32(x, y)); 398270159Sgrehan else 399270159Sgrehan return (getcc64(x, y)); 400270159Sgrehan} 401270159Sgrehan 402243640Sneelstatic int 403243640Sneelemulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 404243640Sneel mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 405243640Sneel{ 406243640Sneel int error, size; 407243640Sneel enum vm_reg_name reg; 408246108Sneel uint8_t byte; 409243640Sneel uint64_t val; 410243640Sneel 411270159Sgrehan size = vie->opsize; 412243640Sneel error = EINVAL; 413243640Sneel 414243640Sneel switch (vie->op.op_byte) { 415246108Sneel case 0x88: 416246108Sneel /* 417246108Sneel * MOV byte from reg (ModRM:reg) to mem (ModRM:r/m) 418246108Sneel * 88/r: mov r/m8, r8 419246108Sneel * REX + 88/r: mov r/m8, r8 (%ah, %ch, %dh, %bh not available) 420246108Sneel */ 421270159Sgrehan size = 1; /* override for byte operation */ 422246108Sneel error = vie_read_bytereg(vm, vcpuid, vie, &byte); 423246108Sneel if (error == 0) 424246108Sneel error = memwrite(vm, vcpuid, gpa, byte, size, arg); 425246108Sneel break; 426243640Sneel case 0x89: 427243640Sneel /* 428243640Sneel * MOV from reg (ModRM:reg) to mem (ModRM:r/m) 429270159Sgrehan * 89/r: mov r/m16, r16 430243640Sneel * 89/r: mov r/m32, r32 431243640Sneel * REX.W + 89/r mov r/m64, r64 432243640Sneel */ 433243640Sneel reg = gpr_map[vie->reg]; 434243640Sneel error = vie_read_register(vm, vcpuid, reg, &val); 435243640Sneel if (error == 0) { 436243640Sneel val &= size2mask[size]; 437243640Sneel error = memwrite(vm, vcpuid, gpa, val, size, arg); 438243640Sneel } 439243640Sneel break; 440254964Sneel case 0x8A: 441270159Sgrehan /* 442270159Sgrehan * MOV byte from mem (ModRM:r/m) to reg (ModRM:reg) 443270159Sgrehan * 8A/r: mov r8, r/m8 444270159Sgrehan * REX + 8A/r: mov r8, r/m8 445270159Sgrehan */ 446270159Sgrehan size = 1; /* override for byte operation */ 447270159Sgrehan error = memread(vm, vcpuid, gpa, &val, size, arg); 448270159Sgrehan if (error == 0) 449270159Sgrehan error = vie_write_bytereg(vm, vcpuid, vie, val); 450270159Sgrehan break; 451243640Sneel case 0x8B: 452243640Sneel /* 453243640Sneel * MOV from mem (ModRM:r/m) to reg (ModRM:reg) 454270159Sgrehan * 8B/r: mov r16, r/m16 455243640Sneel * 8B/r: mov r32, r/m32 456243640Sneel * REX.W 8B/r: mov r64, r/m64 457243640Sneel */ 458243640Sneel error = memread(vm, vcpuid, gpa, &val, size, arg); 459243640Sneel if (error == 0) { 460243640Sneel reg = gpr_map[vie->reg]; 461243640Sneel error = vie_update_register(vm, vcpuid, reg, val, size); 462243640Sneel } 463243640Sneel break; 464270159Sgrehan case 0xA1: 465270159Sgrehan /* 466270159Sgrehan * MOV from seg:moffset to AX/EAX/RAX 467270159Sgrehan * A1: mov AX, moffs16 468270159Sgrehan * A1: mov EAX, moffs32 469270159Sgrehan * REX.W + A1: mov RAX, moffs64 470270159Sgrehan */ 471270159Sgrehan error = memread(vm, vcpuid, gpa, &val, size, arg); 472270159Sgrehan if (error == 0) { 473270159Sgrehan reg = VM_REG_GUEST_RAX; 474270159Sgrehan error = vie_update_register(vm, vcpuid, reg, val, size); 475270159Sgrehan } 476270159Sgrehan break; 477270159Sgrehan case 0xA3: 478270159Sgrehan /* 479270159Sgrehan * MOV from AX/EAX/RAX to seg:moffset 480270159Sgrehan * A3: mov moffs16, AX 481270159Sgrehan * A3: mov moffs32, EAX 482270159Sgrehan * REX.W + A3: mov moffs64, RAX 483270159Sgrehan */ 484270159Sgrehan error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val); 485270159Sgrehan if (error == 0) { 486270159Sgrehan val &= size2mask[size]; 487270159Sgrehan error = memwrite(vm, vcpuid, gpa, val, size, arg); 488270159Sgrehan } 489270159Sgrehan break; 490270159Sgrehan case 0xC6: 491270159Sgrehan /* 492270159Sgrehan * MOV from imm8 to mem (ModRM:r/m) 493270159Sgrehan * C6/0 mov r/m8, imm8 494270159Sgrehan * REX + C6/0 mov r/m8, imm8 495270159Sgrehan */ 496270159Sgrehan size = 1; /* override for byte operation */ 497270159Sgrehan error = memwrite(vm, vcpuid, gpa, vie->immediate, size, arg); 498270159Sgrehan break; 499243640Sneel case 0xC7: 500243640Sneel /* 501270159Sgrehan * MOV from imm16/imm32 to mem (ModRM:r/m) 502270159Sgrehan * C7/0 mov r/m16, imm16 503243640Sneel * C7/0 mov r/m32, imm32 504243640Sneel * REX.W + C7/0 mov r/m64, imm32 (sign-extended to 64-bits) 505243640Sneel */ 506270159Sgrehan val = vie->immediate & size2mask[size]; 507243640Sneel error = memwrite(vm, vcpuid, gpa, val, size, arg); 508243640Sneel break; 509243640Sneel default: 510243640Sneel break; 511243640Sneel } 512243640Sneel 513243640Sneel return (error); 514243640Sneel} 515243640Sneel 516243640Sneelstatic int 517267396Sjhbemulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 518267396Sjhb mem_region_read_t memread, mem_region_write_t memwrite, 519267396Sjhb void *arg) 520267396Sjhb{ 521267396Sjhb int error, size; 522267396Sjhb enum vm_reg_name reg; 523267396Sjhb uint64_t val; 524267396Sjhb 525270159Sgrehan size = vie->opsize; 526267396Sjhb error = EINVAL; 527267396Sjhb 528267396Sjhb switch (vie->op.op_byte) { 529267396Sjhb case 0xB6: 530267396Sjhb /* 531267396Sjhb * MOV and zero extend byte from mem (ModRM:r/m) to 532267396Sjhb * reg (ModRM:reg). 533267396Sjhb * 534270159Sgrehan * 0F B6/r movzx r16, r/m8 535270159Sgrehan * 0F B6/r movzx r32, r/m8 536270159Sgrehan * REX.W + 0F B6/r movzx r64, r/m8 537267396Sjhb */ 538267396Sjhb 539267396Sjhb /* get the first operand */ 540267396Sjhb error = memread(vm, vcpuid, gpa, &val, 1, arg); 541267396Sjhb if (error) 542267396Sjhb break; 543267396Sjhb 544267396Sjhb /* get the second operand */ 545267396Sjhb reg = gpr_map[vie->reg]; 546267396Sjhb 547270159Sgrehan /* zero-extend byte */ 548270159Sgrehan val = (uint8_t)val; 549267396Sjhb 550267396Sjhb /* write the result */ 551267396Sjhb error = vie_update_register(vm, vcpuid, reg, val, size); 552267396Sjhb break; 553270159Sgrehan case 0xB7: 554270159Sgrehan /* 555270159Sgrehan * MOV and zero extend word from mem (ModRM:r/m) to 556270159Sgrehan * reg (ModRM:reg). 557270159Sgrehan * 558270159Sgrehan * 0F B7/r movzx r32, r/m16 559270159Sgrehan * REX.W + 0F B7/r movzx r64, r/m16 560270159Sgrehan */ 561270159Sgrehan error = memread(vm, vcpuid, gpa, &val, 2, arg); 562270159Sgrehan if (error) 563270159Sgrehan return (error); 564270159Sgrehan 565270159Sgrehan reg = gpr_map[vie->reg]; 566270159Sgrehan 567270159Sgrehan /* zero-extend word */ 568270159Sgrehan val = (uint16_t)val; 569270159Sgrehan 570270159Sgrehan error = vie_update_register(vm, vcpuid, reg, val, size); 571270159Sgrehan break; 572267396Sjhb case 0xBE: 573267396Sjhb /* 574267396Sjhb * MOV and sign extend byte from mem (ModRM:r/m) to 575267396Sjhb * reg (ModRM:reg). 576267396Sjhb * 577270159Sgrehan * 0F BE/r movsx r16, r/m8 578270159Sgrehan * 0F BE/r movsx r32, r/m8 579270159Sgrehan * REX.W + 0F BE/r movsx r64, r/m8 580267396Sjhb */ 581267396Sjhb 582267396Sjhb /* get the first operand */ 583267396Sjhb error = memread(vm, vcpuid, gpa, &val, 1, arg); 584267396Sjhb if (error) 585267396Sjhb break; 586267396Sjhb 587267396Sjhb /* get the second operand */ 588267396Sjhb reg = gpr_map[vie->reg]; 589267396Sjhb 590267396Sjhb /* sign extend byte */ 591267396Sjhb val = (int8_t)val; 592267396Sjhb 593267396Sjhb /* write the result */ 594267396Sjhb error = vie_update_register(vm, vcpuid, reg, val, size); 595267396Sjhb break; 596267396Sjhb default: 597267396Sjhb break; 598267396Sjhb } 599267396Sjhb return (error); 600267396Sjhb} 601267396Sjhb 602284894Sneel/* 603284894Sneel * Helper function to calculate and validate a linear address. 604284894Sneel */ 605267396Sjhbstatic int 606284894Sneelget_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging, 607284894Sneel int opsize, int addrsize, int prot, enum vm_reg_name seg, 608284900Sneel enum vm_reg_name gpr, uint64_t *gla, int *fault) 609284894Sneel{ 610284894Sneel struct seg_desc desc; 611284894Sneel uint64_t cr0, val, rflags; 612284894Sneel int error; 613284894Sneel 614284894Sneel error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0); 615284894Sneel KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error)); 616284894Sneel 617284894Sneel error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); 618284894Sneel KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); 619284894Sneel 620284894Sneel error = vm_get_seg_desc(vm, vcpuid, seg, &desc); 621284894Sneel KASSERT(error == 0, ("%s: error %d getting segment descriptor %d", 622284894Sneel __func__, error, seg)); 623284894Sneel 624284894Sneel error = vie_read_register(vm, vcpuid, gpr, &val); 625284894Sneel KASSERT(error == 0, ("%s: error %d getting register %d", __func__, 626284894Sneel error, gpr)); 627284894Sneel 628284894Sneel if (vie_calculate_gla(paging->cpu_mode, seg, &desc, val, opsize, 629284894Sneel addrsize, prot, gla)) { 630284894Sneel if (seg == VM_REG_GUEST_SS) 631284894Sneel vm_inject_ss(vm, vcpuid, 0); 632284894Sneel else 633284894Sneel vm_inject_gp(vm, vcpuid); 634284900Sneel goto guest_fault; 635284894Sneel } 636284894Sneel 637284894Sneel if (vie_canonical_check(paging->cpu_mode, *gla)) { 638284894Sneel if (seg == VM_REG_GUEST_SS) 639284894Sneel vm_inject_ss(vm, vcpuid, 0); 640284894Sneel else 641284894Sneel vm_inject_gp(vm, vcpuid); 642284900Sneel goto guest_fault; 643284894Sneel } 644284894Sneel 645284894Sneel if (vie_alignment_check(paging->cpl, opsize, cr0, rflags, *gla)) { 646284894Sneel vm_inject_ac(vm, vcpuid, 0); 647284900Sneel goto guest_fault; 648284894Sneel } 649284894Sneel 650284900Sneel *fault = 0; 651284894Sneel return (0); 652284900Sneel 653284900Sneelguest_fault: 654284900Sneel *fault = 1; 655284900Sneel return (0); 656284894Sneel} 657284894Sneel 658284894Sneelstatic int 659284894Sneelemulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 660284894Sneel struct vm_guest_paging *paging, mem_region_read_t memread, 661284894Sneel mem_region_write_t memwrite, void *arg) 662284894Sneel{ 663284894Sneel#ifdef _KERNEL 664284894Sneel struct vm_copyinfo copyinfo[2]; 665284894Sneel#else 666284894Sneel struct iovec copyinfo[2]; 667284894Sneel#endif 668284899Sneel uint64_t dstaddr, srcaddr, dstgpa, srcgpa, val; 669284894Sneel uint64_t rcx, rdi, rsi, rflags; 670284900Sneel int error, fault, opsize, seg, repeat; 671284894Sneel 672284894Sneel opsize = (vie->op.op_byte == 0xA4) ? 1 : vie->opsize; 673284894Sneel val = 0; 674284894Sneel error = 0; 675284894Sneel 676284894Sneel /* 677284894Sneel * XXX although the MOVS instruction is only supposed to be used with 678284894Sneel * the "rep" prefix some guests like FreeBSD will use "repnz" instead. 679284894Sneel * 680284894Sneel * Empirically the "repnz" prefix has identical behavior to "rep" 681284894Sneel * and the zero flag does not make a difference. 682284894Sneel */ 683284894Sneel repeat = vie->repz_present | vie->repnz_present; 684284894Sneel 685284894Sneel if (repeat) { 686284894Sneel error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx); 687284894Sneel KASSERT(!error, ("%s: error %d getting rcx", __func__, error)); 688284894Sneel 689284894Sneel /* 690284894Sneel * The count register is %rcx, %ecx or %cx depending on the 691284894Sneel * address size of the instruction. 692284894Sneel */ 693284900Sneel if ((rcx & vie_size2mask(vie->addrsize)) == 0) { 694284900Sneel error = 0; 695284900Sneel goto done; 696284900Sneel } 697284894Sneel } 698284894Sneel 699284894Sneel /* 700284894Sneel * Source Destination Comments 701284894Sneel * -------------------------------------------- 702284894Sneel * (1) memory memory n/a 703284894Sneel * (2) memory mmio emulated 704284894Sneel * (3) mmio memory emulated 705284899Sneel * (4) mmio mmio emulated 706284894Sneel * 707284894Sneel * At this point we don't have sufficient information to distinguish 708284894Sneel * between (2), (3) and (4). We use 'vm_copy_setup()' to tease this 709284894Sneel * out because it will succeed only when operating on regular memory. 710284894Sneel * 711284894Sneel * XXX the emulation doesn't properly handle the case where 'gpa' 712284894Sneel * is straddling the boundary between the normal memory and MMIO. 713284894Sneel */ 714284894Sneel 715284894Sneel seg = vie->segment_override ? vie->segment_register : VM_REG_GUEST_DS; 716284894Sneel error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize, 717284900Sneel PROT_READ, seg, VM_REG_GUEST_RSI, &srcaddr, &fault); 718284900Sneel if (error || fault) 719284894Sneel goto done; 720284894Sneel 721284894Sneel error = vm_copy_setup(vm, vcpuid, paging, srcaddr, opsize, PROT_READ, 722284900Sneel copyinfo, nitems(copyinfo), &fault); 723284894Sneel if (error == 0) { 724284900Sneel if (fault) 725284900Sneel goto done; /* Resume guest to handle fault */ 726284900Sneel 727284894Sneel /* 728284894Sneel * case (2): read from system memory and write to mmio. 729284894Sneel */ 730284894Sneel vm_copyin(vm, vcpuid, copyinfo, &val, opsize); 731284894Sneel vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); 732284894Sneel error = memwrite(vm, vcpuid, gpa, val, opsize, arg); 733284899Sneel if (error) 734284899Sneel goto done; 735284894Sneel } else { 736284894Sneel /* 737284894Sneel * 'vm_copy_setup()' is expected to fail for cases (3) and (4) 738284894Sneel * if 'srcaddr' is in the mmio space. 739284894Sneel */ 740284894Sneel 741284899Sneel error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize, 742284900Sneel PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI, &dstaddr, 743284900Sneel &fault); 744284900Sneel if (error || fault) 745284894Sneel goto done; 746284894Sneel 747284899Sneel error = vm_copy_setup(vm, vcpuid, paging, dstaddr, opsize, 748284900Sneel PROT_WRITE, copyinfo, nitems(copyinfo), &fault); 749284899Sneel if (error == 0) { 750284900Sneel if (fault) 751284900Sneel goto done; /* Resume guest to handle fault */ 752284900Sneel 753284899Sneel /* 754284899Sneel * case (3): read from MMIO and write to system memory. 755284899Sneel * 756284899Sneel * A MMIO read can have side-effects so we 757284899Sneel * commit to it only after vm_copy_setup() is 758284899Sneel * successful. If a page-fault needs to be 759284899Sneel * injected into the guest then it will happen 760284899Sneel * before the MMIO read is attempted. 761284899Sneel */ 762284899Sneel error = memread(vm, vcpuid, gpa, &val, opsize, arg); 763284899Sneel if (error) 764284899Sneel goto done; 765284899Sneel 766284899Sneel vm_copyout(vm, vcpuid, &val, copyinfo, opsize); 767284899Sneel vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); 768284899Sneel } else { 769284899Sneel /* 770284899Sneel * Case (4): read from and write to mmio. 771284900Sneel * 772284900Sneel * Commit to the MMIO read/write (with potential 773284900Sneel * side-effects) only after we are sure that the 774284900Sneel * instruction is not going to be restarted due 775284900Sneel * to address translation faults. 776284899Sneel */ 777284899Sneel error = vm_gla2gpa(vm, vcpuid, paging, srcaddr, 778284900Sneel PROT_READ, &srcgpa, &fault); 779284900Sneel if (error || fault) 780284899Sneel goto done; 781284900Sneel 782284900Sneel error = vm_gla2gpa(vm, vcpuid, paging, dstaddr, 783284900Sneel PROT_WRITE, &dstgpa, &fault); 784284900Sneel if (error || fault) 785284900Sneel goto done; 786284900Sneel 787284899Sneel error = memread(vm, vcpuid, srcgpa, &val, opsize, arg); 788284899Sneel if (error) 789284899Sneel goto done; 790284899Sneel 791284899Sneel error = memwrite(vm, vcpuid, dstgpa, val, opsize, arg); 792284899Sneel if (error) 793284899Sneel goto done; 794284899Sneel } 795284894Sneel } 796284894Sneel 797284894Sneel error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSI, &rsi); 798284894Sneel KASSERT(error == 0, ("%s: error %d getting rsi", __func__, error)); 799284894Sneel 800284894Sneel error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi); 801284894Sneel KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error)); 802284894Sneel 803284894Sneel error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); 804284894Sneel KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); 805284894Sneel 806284894Sneel if (rflags & PSL_D) { 807284894Sneel rsi -= opsize; 808284894Sneel rdi -= opsize; 809284894Sneel } else { 810284894Sneel rsi += opsize; 811284894Sneel rdi += opsize; 812284894Sneel } 813284894Sneel 814284894Sneel error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSI, rsi, 815284894Sneel vie->addrsize); 816284894Sneel KASSERT(error == 0, ("%s: error %d updating rsi", __func__, error)); 817284894Sneel 818284894Sneel error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RDI, rdi, 819284894Sneel vie->addrsize); 820284894Sneel KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error)); 821284894Sneel 822284894Sneel if (repeat) { 823284894Sneel rcx = rcx - 1; 824284894Sneel error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX, 825284894Sneel rcx, vie->addrsize); 826284894Sneel KASSERT(!error, ("%s: error %d updating rcx", __func__, error)); 827284894Sneel 828284894Sneel /* 829284894Sneel * Repeat the instruction if the count register is not zero. 830284894Sneel */ 831284894Sneel if ((rcx & vie_size2mask(vie->addrsize)) != 0) 832284894Sneel vm_restart_instruction(vm, vcpuid); 833284894Sneel } 834284894Sneeldone: 835284900Sneel KASSERT(error == 0 || error == EFAULT, ("%s: unexpected error %d", 836284900Sneel __func__, error)); 837284900Sneel return (error); 838284894Sneel} 839284894Sneel 840284894Sneelstatic int 841284899Sneelemulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 842284899Sneel struct vm_guest_paging *paging, mem_region_read_t memread, 843284899Sneel mem_region_write_t memwrite, void *arg) 844284899Sneel{ 845284899Sneel int error, opsize, repeat; 846284899Sneel uint64_t val; 847284899Sneel uint64_t rcx, rdi, rflags; 848284899Sneel 849284899Sneel opsize = (vie->op.op_byte == 0xAA) ? 1 : vie->opsize; 850284899Sneel repeat = vie->repz_present | vie->repnz_present; 851284899Sneel 852284899Sneel if (repeat) { 853284899Sneel error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx); 854284899Sneel KASSERT(!error, ("%s: error %d getting rcx", __func__, error)); 855284899Sneel 856284899Sneel /* 857284899Sneel * The count register is %rcx, %ecx or %cx depending on the 858284899Sneel * address size of the instruction. 859284899Sneel */ 860284899Sneel if ((rcx & vie_size2mask(vie->addrsize)) == 0) 861284899Sneel return (0); 862284899Sneel } 863284899Sneel 864284899Sneel error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val); 865284899Sneel KASSERT(!error, ("%s: error %d getting rax", __func__, error)); 866284899Sneel 867284899Sneel error = memwrite(vm, vcpuid, gpa, val, opsize, arg); 868284899Sneel if (error) 869284899Sneel return (error); 870284899Sneel 871284899Sneel error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi); 872284899Sneel KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error)); 873284899Sneel 874284899Sneel error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); 875284899Sneel KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); 876284899Sneel 877284899Sneel if (rflags & PSL_D) 878284899Sneel rdi -= opsize; 879284899Sneel else 880284899Sneel rdi += opsize; 881284899Sneel 882284899Sneel error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RDI, rdi, 883284899Sneel vie->addrsize); 884284899Sneel KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error)); 885284899Sneel 886284899Sneel if (repeat) { 887284899Sneel rcx = rcx - 1; 888284899Sneel error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX, 889284899Sneel rcx, vie->addrsize); 890284899Sneel KASSERT(!error, ("%s: error %d updating rcx", __func__, error)); 891284899Sneel 892284899Sneel /* 893284899Sneel * Repeat the instruction if the count register is not zero. 894284899Sneel */ 895284899Sneel if ((rcx & vie_size2mask(vie->addrsize)) != 0) 896284899Sneel vm_restart_instruction(vm, vcpuid); 897284899Sneel } 898284899Sneel 899284899Sneel return (0); 900284899Sneel} 901284899Sneel 902284899Sneelstatic int 903243640Sneelemulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 904243640Sneel mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 905243640Sneel{ 906243640Sneel int error, size; 907243640Sneel enum vm_reg_name reg; 908276349Sneel uint64_t result, rflags, rflags2, val1, val2; 909243640Sneel 910270159Sgrehan size = vie->opsize; 911243640Sneel error = EINVAL; 912243640Sneel 913243640Sneel switch (vie->op.op_byte) { 914243640Sneel case 0x23: 915243640Sneel /* 916243640Sneel * AND reg (ModRM:reg) and mem (ModRM:r/m) and store the 917243640Sneel * result in reg. 918243640Sneel * 919270159Sgrehan * 23/r and r16, r/m16 920243640Sneel * 23/r and r32, r/m32 921243640Sneel * REX.W + 23/r and r64, r/m64 922243640Sneel */ 923243640Sneel 924243640Sneel /* get the first operand */ 925243640Sneel reg = gpr_map[vie->reg]; 926243640Sneel error = vie_read_register(vm, vcpuid, reg, &val1); 927243640Sneel if (error) 928243640Sneel break; 929243640Sneel 930243640Sneel /* get the second operand */ 931243640Sneel error = memread(vm, vcpuid, gpa, &val2, size, arg); 932243640Sneel if (error) 933243640Sneel break; 934243640Sneel 935243640Sneel /* perform the operation and write the result */ 936276349Sneel result = val1 & val2; 937276349Sneel error = vie_update_register(vm, vcpuid, reg, result, size); 938243640Sneel break; 939243667Sgrehan case 0x81: 940284899Sneel case 0x83: 941243667Sgrehan /* 942284899Sneel * AND mem (ModRM:r/m) with immediate and store the 943253585Sneel * result in mem. 944243667Sgrehan * 945284899Sneel * 81 /4 and r/m16, imm16 946284899Sneel * 81 /4 and r/m32, imm32 947284899Sneel * REX.W + 81 /4 and r/m64, imm32 sign-extended to 64 948243703Sgrehan * 949284899Sneel * 83 /4 and r/m16, imm8 sign-extended to 16 950284899Sneel * 83 /4 and r/m32, imm8 sign-extended to 32 951284899Sneel * REX.W + 83/4 and r/m64, imm8 sign-extended to 64 952243667Sgrehan */ 953243703Sgrehan 954243667Sgrehan /* get the first operand */ 955243667Sgrehan error = memread(vm, vcpuid, gpa, &val1, size, arg); 956243667Sgrehan if (error) 957243667Sgrehan break; 958243667Sgrehan 959243667Sgrehan /* 960284899Sneel * perform the operation with the pre-fetched immediate 961284899Sneel * operand and write the result 962284899Sneel */ 963284899Sneel result = val1 & vie->immediate; 964284899Sneel error = memwrite(vm, vcpuid, gpa, result, size, arg); 965243667Sgrehan break; 966243640Sneel default: 967243640Sneel break; 968243640Sneel } 969276349Sneel if (error) 970276349Sneel return (error); 971276349Sneel 972276349Sneel error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); 973276349Sneel if (error) 974276349Sneel return (error); 975276349Sneel 976276349Sneel /* 977276349Sneel * OF and CF are cleared; the SF, ZF and PF flags are set according 978276349Sneel * to the result; AF is undefined. 979276349Sneel * 980276349Sneel * The updated status flags are obtained by subtracting 0 from 'result'. 981276349Sneel */ 982276349Sneel rflags2 = getcc(size, result, 0); 983276349Sneel rflags &= ~RFLAGS_STATUS_BITS; 984276349Sneel rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N); 985276349Sneel 986276349Sneel error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); 987243640Sneel return (error); 988243640Sneel} 989243640Sneel 990253585Sneelstatic int 991253585Sneelemulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 992253585Sneel mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 993253585Sneel{ 994253585Sneel int error, size; 995276349Sneel uint64_t val1, result, rflags, rflags2; 996253585Sneel 997270159Sgrehan size = vie->opsize; 998253585Sneel error = EINVAL; 999253585Sneel 1000253585Sneel switch (vie->op.op_byte) { 1001284899Sneel case 0x81: 1002253585Sneel case 0x83: 1003253585Sneel /* 1004253585Sneel * OR mem (ModRM:r/m) with immediate and store the 1005253585Sneel * result in mem. 1006253585Sneel * 1007284899Sneel * 81 /1 or r/m16, imm16 1008284899Sneel * 81 /1 or r/m32, imm32 1009284899Sneel * REX.W + 81 /1 or r/m64, imm32 sign-extended to 64 1010253585Sneel * 1011284899Sneel * 83 /1 or r/m16, imm8 sign-extended to 16 1012284899Sneel * 83 /1 or r/m32, imm8 sign-extended to 32 1013284899Sneel * REX.W + 83/1 or r/m64, imm8 sign-extended to 64 1014253585Sneel */ 1015253585Sneel 1016253585Sneel /* get the first operand */ 1017253585Sneel error = memread(vm, vcpuid, gpa, &val1, size, arg); 1018253585Sneel if (error) 1019253585Sneel break; 1020253585Sneel 1021253585Sneel /* 1022253585Sneel * perform the operation with the pre-fetched immediate 1023253585Sneel * operand and write the result 1024253585Sneel */ 1025276349Sneel result = val1 | vie->immediate; 1026276349Sneel error = memwrite(vm, vcpuid, gpa, result, size, arg); 1027253585Sneel break; 1028253585Sneel default: 1029253585Sneel break; 1030253585Sneel } 1031276349Sneel if (error) 1032276349Sneel return (error); 1033276349Sneel 1034276349Sneel error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); 1035276349Sneel if (error) 1036276349Sneel return (error); 1037276349Sneel 1038276349Sneel /* 1039276349Sneel * OF and CF are cleared; the SF, ZF and PF flags are set according 1040276349Sneel * to the result; AF is undefined. 1041276349Sneel * 1042276349Sneel * The updated status flags are obtained by subtracting 0 from 'result'. 1043276349Sneel */ 1044276349Sneel rflags2 = getcc(size, result, 0); 1045276349Sneel rflags &= ~RFLAGS_STATUS_BITS; 1046276349Sneel rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N); 1047276349Sneel 1048276349Sneel error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); 1049253585Sneel return (error); 1050253585Sneel} 1051253585Sneel 1052270159Sgrehanstatic int 1053270159Sgrehanemulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 1054270159Sgrehan mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 1055270159Sgrehan{ 1056270159Sgrehan int error, size; 1057284900Sneel uint64_t regop, memop, op1, op2, rflags, rflags2; 1058270159Sgrehan enum vm_reg_name reg; 1059270159Sgrehan 1060270159Sgrehan size = vie->opsize; 1061270159Sgrehan switch (vie->op.op_byte) { 1062284900Sneel case 0x39: 1063270159Sgrehan case 0x3B: 1064270159Sgrehan /* 1065284900Sneel * 39/r CMP r/m16, r16 1066284900Sneel * 39/r CMP r/m32, r32 1067284900Sneel * REX.W 39/r CMP r/m64, r64 1068284900Sneel * 1069270159Sgrehan * 3B/r CMP r16, r/m16 1070270159Sgrehan * 3B/r CMP r32, r/m32 1071270159Sgrehan * REX.W + 3B/r CMP r64, r/m64 1072270159Sgrehan * 1073284900Sneel * Compare the first operand with the second operand and 1074270159Sgrehan * set status flags in EFLAGS register. The comparison is 1075270159Sgrehan * performed by subtracting the second operand from the first 1076270159Sgrehan * operand and then setting the status flags. 1077270159Sgrehan */ 1078270159Sgrehan 1079284900Sneel /* Get the register operand */ 1080270159Sgrehan reg = gpr_map[vie->reg]; 1081284900Sneel error = vie_read_register(vm, vcpuid, reg, ®op); 1082270159Sgrehan if (error) 1083270159Sgrehan return (error); 1084270159Sgrehan 1085284900Sneel /* Get the memory operand */ 1086284900Sneel error = memread(vm, vcpuid, gpa, &memop, size, arg); 1087270159Sgrehan if (error) 1088270159Sgrehan return (error); 1089270159Sgrehan 1090284900Sneel if (vie->op.op_byte == 0x3B) { 1091284900Sneel op1 = regop; 1092284900Sneel op2 = memop; 1093284900Sneel } else { 1094284900Sneel op1 = memop; 1095284900Sneel op2 = regop; 1096284900Sneel } 1097284899Sneel rflags2 = getcc(size, op1, op2); 1098270159Sgrehan break; 1099284900Sneel case 0x80: 1100284899Sneel case 0x81: 1101284899Sneel case 0x83: 1102284899Sneel /* 1103284900Sneel * 80 /7 cmp r/m8, imm8 1104284900Sneel * REX + 80 /7 cmp r/m8, imm8 1105284900Sneel * 1106284899Sneel * 81 /7 cmp r/m16, imm16 1107284899Sneel * 81 /7 cmp r/m32, imm32 1108284899Sneel * REX.W + 81 /7 cmp r/m64, imm32 sign-extended to 64 1109284899Sneel * 1110284899Sneel * 83 /7 cmp r/m16, imm8 sign-extended to 16 1111284899Sneel * 83 /7 cmp r/m32, imm8 sign-extended to 32 1112284899Sneel * REX.W + 83 /7 cmp r/m64, imm8 sign-extended to 64 1113284899Sneel * 1114284899Sneel * Compare mem (ModRM:r/m) with immediate and set 1115284899Sneel * status flags according to the results. The 1116284899Sneel * comparison is performed by subtracting the 1117284899Sneel * immediate from the first operand and then setting 1118284899Sneel * the status flags. 1119284899Sneel * 1120284899Sneel */ 1121284900Sneel if (vie->op.op_byte == 0x80) 1122284900Sneel size = 1; 1123284899Sneel 1124284899Sneel /* get the first operand */ 1125284899Sneel error = memread(vm, vcpuid, gpa, &op1, size, arg); 1126284899Sneel if (error) 1127284899Sneel return (error); 1128284899Sneel 1129284899Sneel rflags2 = getcc(size, op1, vie->immediate); 1130284899Sneel break; 1131270159Sgrehan default: 1132270159Sgrehan return (EINVAL); 1133270159Sgrehan } 1134270159Sgrehan error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); 1135270159Sgrehan if (error) 1136270159Sgrehan return (error); 1137270159Sgrehan rflags &= ~RFLAGS_STATUS_BITS; 1138270159Sgrehan rflags |= rflags2 & RFLAGS_STATUS_BITS; 1139270159Sgrehan 1140270159Sgrehan error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); 1141270159Sgrehan return (error); 1142270159Sgrehan} 1143270159Sgrehan 1144270159Sgrehanstatic int 1145271659Sgrehanemulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 1146271659Sgrehan mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 1147271659Sgrehan{ 1148271659Sgrehan int error, size; 1149271659Sgrehan uint64_t nval, rflags, rflags2, val1, val2; 1150271659Sgrehan enum vm_reg_name reg; 1151271659Sgrehan 1152271659Sgrehan size = vie->opsize; 1153271659Sgrehan error = EINVAL; 1154271659Sgrehan 1155271659Sgrehan switch (vie->op.op_byte) { 1156271659Sgrehan case 0x2B: 1157271659Sgrehan /* 1158271659Sgrehan * SUB r/m from r and store the result in r 1159271659Sgrehan * 1160271659Sgrehan * 2B/r SUB r16, r/m16 1161271659Sgrehan * 2B/r SUB r32, r/m32 1162271659Sgrehan * REX.W + 2B/r SUB r64, r/m64 1163271659Sgrehan */ 1164271659Sgrehan 1165271659Sgrehan /* get the first operand */ 1166271659Sgrehan reg = gpr_map[vie->reg]; 1167271659Sgrehan error = vie_read_register(vm, vcpuid, reg, &val1); 1168271659Sgrehan if (error) 1169271659Sgrehan break; 1170271659Sgrehan 1171271659Sgrehan /* get the second operand */ 1172271659Sgrehan error = memread(vm, vcpuid, gpa, &val2, size, arg); 1173271659Sgrehan if (error) 1174271659Sgrehan break; 1175271659Sgrehan 1176271659Sgrehan /* perform the operation and write the result */ 1177271659Sgrehan nval = val1 - val2; 1178271659Sgrehan error = vie_update_register(vm, vcpuid, reg, nval, size); 1179271659Sgrehan break; 1180271659Sgrehan default: 1181271659Sgrehan break; 1182271659Sgrehan } 1183271659Sgrehan 1184271659Sgrehan if (!error) { 1185271659Sgrehan rflags2 = getcc(size, val1, val2); 1186271659Sgrehan error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, 1187271659Sgrehan &rflags); 1188271659Sgrehan if (error) 1189271659Sgrehan return (error); 1190271659Sgrehan 1191271659Sgrehan rflags &= ~RFLAGS_STATUS_BITS; 1192271659Sgrehan rflags |= rflags2 & RFLAGS_STATUS_BITS; 1193271659Sgrehan error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, 1194271659Sgrehan rflags, 8); 1195271659Sgrehan } 1196271659Sgrehan 1197271659Sgrehan return (error); 1198271659Sgrehan} 1199271659Sgrehan 1200271659Sgrehanstatic int 1201276349Sneelemulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, 1202270159Sgrehan struct vm_guest_paging *paging, mem_region_read_t memread, 1203270159Sgrehan mem_region_write_t memwrite, void *arg) 1204270159Sgrehan{ 1205270159Sgrehan#ifdef _KERNEL 1206270159Sgrehan struct vm_copyinfo copyinfo[2]; 1207270159Sgrehan#else 1208270159Sgrehan struct iovec copyinfo[2]; 1209270159Sgrehan#endif 1210270159Sgrehan struct seg_desc ss_desc; 1211270159Sgrehan uint64_t cr0, rflags, rsp, stack_gla, val; 1212284900Sneel int error, fault, size, stackaddrsize, pushop; 1213270159Sgrehan 1214276349Sneel val = 0; 1215276349Sneel size = vie->opsize; 1216276349Sneel pushop = (vie->op.op_type == VIE_OP_TYPE_PUSH) ? 1 : 0; 1217270159Sgrehan 1218270159Sgrehan /* 1219270159Sgrehan * From "Address-Size Attributes for Stack Accesses", Intel SDL, Vol 1 1220270159Sgrehan */ 1221270159Sgrehan if (paging->cpu_mode == CPU_MODE_REAL) { 1222270159Sgrehan stackaddrsize = 2; 1223270159Sgrehan } else if (paging->cpu_mode == CPU_MODE_64BIT) { 1224270159Sgrehan /* 1225270159Sgrehan * "Stack Manipulation Instructions in 64-bit Mode", SDM, Vol 3 1226270159Sgrehan * - Stack pointer size is always 64-bits. 1227270159Sgrehan * - PUSH/POP of 32-bit values is not possible in 64-bit mode. 1228270159Sgrehan * - 16-bit PUSH/POP is supported by using the operand size 1229270159Sgrehan * override prefix (66H). 1230270159Sgrehan */ 1231270159Sgrehan stackaddrsize = 8; 1232270159Sgrehan size = vie->opsize_override ? 2 : 8; 1233270159Sgrehan } else { 1234270159Sgrehan /* 1235270159Sgrehan * In protected or compability mode the 'B' flag in the 1236270159Sgrehan * stack-segment descriptor determines the size of the 1237270159Sgrehan * stack pointer. 1238270159Sgrehan */ 1239270159Sgrehan error = vm_get_seg_desc(vm, vcpuid, VM_REG_GUEST_SS, &ss_desc); 1240270159Sgrehan KASSERT(error == 0, ("%s: error %d getting SS descriptor", 1241270159Sgrehan __func__, error)); 1242270159Sgrehan if (SEG_DESC_DEF32(ss_desc.access)) 1243270159Sgrehan stackaddrsize = 4; 1244270159Sgrehan else 1245270159Sgrehan stackaddrsize = 2; 1246270159Sgrehan } 1247270159Sgrehan 1248270159Sgrehan error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0); 1249270159Sgrehan KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error)); 1250270159Sgrehan 1251270159Sgrehan error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); 1252270159Sgrehan KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); 1253270159Sgrehan 1254270159Sgrehan error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp); 1255270159Sgrehan KASSERT(error == 0, ("%s: error %d getting rsp", __func__, error)); 1256276349Sneel if (pushop) { 1257276349Sneel rsp -= size; 1258276349Sneel } 1259270159Sgrehan 1260270159Sgrehan if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &ss_desc, 1261276349Sneel rsp, size, stackaddrsize, pushop ? PROT_WRITE : PROT_READ, 1262276349Sneel &stack_gla)) { 1263270159Sgrehan vm_inject_ss(vm, vcpuid, 0); 1264270159Sgrehan return (0); 1265270159Sgrehan } 1266270159Sgrehan 1267270159Sgrehan if (vie_canonical_check(paging->cpu_mode, stack_gla)) { 1268270159Sgrehan vm_inject_ss(vm, vcpuid, 0); 1269270159Sgrehan return (0); 1270270159Sgrehan } 1271270159Sgrehan 1272270159Sgrehan if (vie_alignment_check(paging->cpl, size, cr0, rflags, stack_gla)) { 1273270159Sgrehan vm_inject_ac(vm, vcpuid, 0); 1274270159Sgrehan return (0); 1275270159Sgrehan } 1276270159Sgrehan 1277276349Sneel error = vm_copy_setup(vm, vcpuid, paging, stack_gla, size, 1278284900Sneel pushop ? PROT_WRITE : PROT_READ, copyinfo, nitems(copyinfo), 1279284900Sneel &fault); 1280284900Sneel if (error || fault) 1281284900Sneel return (error); 1282270159Sgrehan 1283276349Sneel if (pushop) { 1284276349Sneel error = memread(vm, vcpuid, mmio_gpa, &val, size, arg); 1285276349Sneel if (error == 0) 1286276349Sneel vm_copyout(vm, vcpuid, &val, copyinfo, size); 1287276349Sneel } else { 1288276349Sneel vm_copyin(vm, vcpuid, copyinfo, &val, size); 1289276349Sneel error = memwrite(vm, vcpuid, mmio_gpa, val, size, arg); 1290276349Sneel rsp += size; 1291276349Sneel } 1292276349Sneel vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); 1293276349Sneel 1294270159Sgrehan if (error == 0) { 1295270159Sgrehan error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSP, rsp, 1296270159Sgrehan stackaddrsize); 1297270159Sgrehan KASSERT(error == 0, ("error %d updating rsp", error)); 1298270159Sgrehan } 1299270159Sgrehan return (error); 1300270159Sgrehan} 1301270159Sgrehan 1302276349Sneelstatic int 1303276349Sneelemulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, 1304276349Sneel struct vm_guest_paging *paging, mem_region_read_t memread, 1305276349Sneel mem_region_write_t memwrite, void *arg) 1306276349Sneel{ 1307276349Sneel int error; 1308276349Sneel 1309276349Sneel /* 1310276349Sneel * Table A-6, "Opcode Extensions", Intel SDM, Vol 2. 1311276349Sneel * 1312276349Sneel * PUSH is part of the group 5 extended opcodes and is identified 1313276349Sneel * by ModRM:reg = b110. 1314276349Sneel */ 1315276349Sneel if ((vie->reg & 7) != 6) 1316276349Sneel return (EINVAL); 1317276349Sneel 1318276349Sneel error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread, 1319276349Sneel memwrite, arg); 1320276349Sneel return (error); 1321276349Sneel} 1322276349Sneel 1323276349Sneelstatic int 1324276349Sneelemulate_pop(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, 1325276349Sneel struct vm_guest_paging *paging, mem_region_read_t memread, 1326276349Sneel mem_region_write_t memwrite, void *arg) 1327276349Sneel{ 1328276349Sneel int error; 1329276349Sneel 1330276349Sneel /* 1331276349Sneel * Table A-6, "Opcode Extensions", Intel SDM, Vol 2. 1332276349Sneel * 1333276349Sneel * POP is part of the group 1A extended opcodes and is identified 1334276349Sneel * by ModRM:reg = b000. 1335276349Sneel */ 1336276349Sneel if ((vie->reg & 7) != 0) 1337276349Sneel return (EINVAL); 1338276349Sneel 1339276349Sneel error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread, 1340276349Sneel memwrite, arg); 1341276349Sneel return (error); 1342276349Sneel} 1343276349Sneel 1344284899Sneelstatic int 1345284899Sneelemulate_group1(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 1346284899Sneel struct vm_guest_paging *paging, mem_region_read_t memread, 1347284899Sneel mem_region_write_t memwrite, void *memarg) 1348284899Sneel{ 1349284899Sneel int error; 1350284899Sneel 1351284899Sneel switch (vie->reg & 7) { 1352284899Sneel case 0x1: /* OR */ 1353284899Sneel error = emulate_or(vm, vcpuid, gpa, vie, 1354284899Sneel memread, memwrite, memarg); 1355284899Sneel break; 1356284899Sneel case 0x4: /* AND */ 1357284899Sneel error = emulate_and(vm, vcpuid, gpa, vie, 1358284899Sneel memread, memwrite, memarg); 1359284899Sneel break; 1360284899Sneel case 0x7: /* CMP */ 1361284899Sneel error = emulate_cmp(vm, vcpuid, gpa, vie, 1362284899Sneel memread, memwrite, memarg); 1363284899Sneel break; 1364284899Sneel default: 1365284899Sneel error = EINVAL; 1366284899Sneel break; 1367284899Sneel } 1368284899Sneel 1369284899Sneel return (error); 1370284899Sneel} 1371284899Sneel 1372284900Sneelstatic int 1373284900Sneelemulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 1374284900Sneel mem_region_read_t memread, mem_region_write_t memwrite, void *memarg) 1375284900Sneel{ 1376284900Sneel uint64_t val, rflags; 1377284900Sneel int error, bitmask, bitoff; 1378284900Sneel 1379284900Sneel /* 1380284900Sneel * 0F BA is a Group 8 extended opcode. 1381284900Sneel * 1382284900Sneel * Currently we only emulate the 'Bit Test' instruction which is 1383284900Sneel * identified by a ModR/M:reg encoding of 100b. 1384284900Sneel */ 1385284900Sneel if ((vie->reg & 7) != 4) 1386284900Sneel return (EINVAL); 1387284900Sneel 1388284900Sneel error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); 1389284900Sneel KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); 1390284900Sneel 1391284900Sneel error = memread(vm, vcpuid, gpa, &val, vie->opsize, memarg); 1392284900Sneel if (error) 1393284900Sneel return (error); 1394284900Sneel 1395284900Sneel /* 1396284900Sneel * Intel SDM, Vol 2, Table 3-2: 1397284900Sneel * "Range of Bit Positions Specified by Bit Offset Operands" 1398284900Sneel */ 1399284900Sneel bitmask = vie->opsize * 8 - 1; 1400284900Sneel bitoff = vie->immediate & bitmask; 1401284900Sneel 1402284900Sneel /* Copy the bit into the Carry flag in %rflags */ 1403284900Sneel if (val & (1UL << bitoff)) 1404284900Sneel rflags |= PSL_C; 1405284900Sneel else 1406284900Sneel rflags &= ~PSL_C; 1407284900Sneel 1408284900Sneel error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); 1409284900Sneel KASSERT(error == 0, ("%s: error %d updating rflags", __func__, error)); 1410284900Sneel 1411284900Sneel return (0); 1412284900Sneel} 1413284900Sneel 1414243640Sneelint 1415243640Sneelvmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 1416270159Sgrehan struct vm_guest_paging *paging, mem_region_read_t memread, 1417270159Sgrehan mem_region_write_t memwrite, void *memarg) 1418243640Sneel{ 1419243640Sneel int error; 1420243640Sneel 1421243640Sneel if (!vie->decoded) 1422243640Sneel return (EINVAL); 1423243640Sneel 1424243640Sneel switch (vie->op.op_type) { 1425284899Sneel case VIE_OP_TYPE_GROUP1: 1426284899Sneel error = emulate_group1(vm, vcpuid, gpa, vie, paging, memread, 1427284899Sneel memwrite, memarg); 1428284899Sneel break; 1429276349Sneel case VIE_OP_TYPE_POP: 1430276349Sneel error = emulate_pop(vm, vcpuid, gpa, vie, paging, memread, 1431276349Sneel memwrite, memarg); 1432276349Sneel break; 1433270159Sgrehan case VIE_OP_TYPE_PUSH: 1434270159Sgrehan error = emulate_push(vm, vcpuid, gpa, vie, paging, memread, 1435270159Sgrehan memwrite, memarg); 1436270159Sgrehan break; 1437270159Sgrehan case VIE_OP_TYPE_CMP: 1438270159Sgrehan error = emulate_cmp(vm, vcpuid, gpa, vie, 1439270159Sgrehan memread, memwrite, memarg); 1440270159Sgrehan break; 1441243640Sneel case VIE_OP_TYPE_MOV: 1442243640Sneel error = emulate_mov(vm, vcpuid, gpa, vie, 1443243640Sneel memread, memwrite, memarg); 1444243640Sneel break; 1445267396Sjhb case VIE_OP_TYPE_MOVSX: 1446267396Sjhb case VIE_OP_TYPE_MOVZX: 1447267396Sjhb error = emulate_movx(vm, vcpuid, gpa, vie, 1448267396Sjhb memread, memwrite, memarg); 1449267396Sjhb break; 1450284894Sneel case VIE_OP_TYPE_MOVS: 1451284894Sneel error = emulate_movs(vm, vcpuid, gpa, vie, paging, memread, 1452284894Sneel memwrite, memarg); 1453284894Sneel break; 1454284899Sneel case VIE_OP_TYPE_STOS: 1455284899Sneel error = emulate_stos(vm, vcpuid, gpa, vie, paging, memread, 1456284899Sneel memwrite, memarg); 1457284899Sneel break; 1458243640Sneel case VIE_OP_TYPE_AND: 1459243640Sneel error = emulate_and(vm, vcpuid, gpa, vie, 1460243640Sneel memread, memwrite, memarg); 1461243640Sneel break; 1462253585Sneel case VIE_OP_TYPE_OR: 1463253585Sneel error = emulate_or(vm, vcpuid, gpa, vie, 1464253585Sneel memread, memwrite, memarg); 1465253585Sneel break; 1466271659Sgrehan case VIE_OP_TYPE_SUB: 1467271659Sgrehan error = emulate_sub(vm, vcpuid, gpa, vie, 1468271659Sgrehan memread, memwrite, memarg); 1469271659Sgrehan break; 1470284900Sneel case VIE_OP_TYPE_BITTEST: 1471284900Sneel error = emulate_bittest(vm, vcpuid, gpa, vie, 1472284900Sneel memread, memwrite, memarg); 1473284900Sneel break; 1474243640Sneel default: 1475243640Sneel error = EINVAL; 1476243640Sneel break; 1477243640Sneel } 1478243640Sneel 1479243640Sneel return (error); 1480243640Sneel} 1481243640Sneel 1482268976Sjhbint 1483268976Sjhbvie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla) 1484268976Sjhb{ 1485268976Sjhb KASSERT(size == 1 || size == 2 || size == 4 || size == 8, 1486268976Sjhb ("%s: invalid size %d", __func__, size)); 1487268976Sjhb KASSERT(cpl >= 0 && cpl <= 3, ("%s: invalid cpl %d", __func__, cpl)); 1488268976Sjhb 1489268976Sjhb if (cpl != 3 || (cr0 & CR0_AM) == 0 || (rf & PSL_AC) == 0) 1490268976Sjhb return (0); 1491268976Sjhb 1492268976Sjhb return ((gla & (size - 1)) ? 1 : 0); 1493268976Sjhb} 1494268976Sjhb 1495268976Sjhbint 1496268976Sjhbvie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla) 1497268976Sjhb{ 1498268976Sjhb uint64_t mask; 1499268976Sjhb 1500268976Sjhb if (cpu_mode != CPU_MODE_64BIT) 1501268976Sjhb return (0); 1502268976Sjhb 1503268976Sjhb /* 1504268976Sjhb * The value of the bit 47 in the 'gla' should be replicated in the 1505268976Sjhb * most significant 16 bits. 1506268976Sjhb */ 1507268976Sjhb mask = ~((1UL << 48) - 1); 1508268976Sjhb if (gla & (1UL << 47)) 1509268976Sjhb return ((gla & mask) != mask); 1510268976Sjhb else 1511268976Sjhb return ((gla & mask) != 0); 1512268976Sjhb} 1513268976Sjhb 1514268976Sjhbuint64_t 1515268976Sjhbvie_size2mask(int size) 1516268976Sjhb{ 1517268976Sjhb KASSERT(size == 1 || size == 2 || size == 4 || size == 8, 1518268976Sjhb ("vie_size2mask: invalid size %d", size)); 1519268976Sjhb return (size2mask[size]); 1520268976Sjhb} 1521268976Sjhb 1522268976Sjhbint 1523268976Sjhbvie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, 1524268976Sjhb struct seg_desc *desc, uint64_t offset, int length, int addrsize, 1525268976Sjhb int prot, uint64_t *gla) 1526268976Sjhb{ 1527268976Sjhb uint64_t firstoff, low_limit, high_limit, segbase; 1528268976Sjhb int glasize, type; 1529268976Sjhb 1530268976Sjhb KASSERT(seg >= VM_REG_GUEST_ES && seg <= VM_REG_GUEST_GS, 1531268976Sjhb ("%s: invalid segment %d", __func__, seg)); 1532268976Sjhb KASSERT(length == 1 || length == 2 || length == 4 || length == 8, 1533268976Sjhb ("%s: invalid operand size %d", __func__, length)); 1534268976Sjhb KASSERT((prot & ~(PROT_READ | PROT_WRITE)) == 0, 1535268976Sjhb ("%s: invalid prot %#x", __func__, prot)); 1536268976Sjhb 1537268976Sjhb firstoff = offset; 1538268976Sjhb if (cpu_mode == CPU_MODE_64BIT) { 1539268976Sjhb KASSERT(addrsize == 4 || addrsize == 8, ("%s: invalid address " 1540268976Sjhb "size %d for cpu_mode %d", __func__, addrsize, cpu_mode)); 1541268976Sjhb glasize = 8; 1542268976Sjhb } else { 1543268976Sjhb KASSERT(addrsize == 2 || addrsize == 4, ("%s: invalid address " 1544268976Sjhb "size %d for cpu mode %d", __func__, addrsize, cpu_mode)); 1545268976Sjhb glasize = 4; 1546268976Sjhb /* 1547268976Sjhb * If the segment selector is loaded with a NULL selector 1548268976Sjhb * then the descriptor is unusable and attempting to use 1549268976Sjhb * it results in a #GP(0). 1550268976Sjhb */ 1551270159Sgrehan if (SEG_DESC_UNUSABLE(desc->access)) 1552268976Sjhb return (-1); 1553268976Sjhb 1554268976Sjhb /* 1555268976Sjhb * The processor generates a #NP exception when a segment 1556268976Sjhb * register is loaded with a selector that points to a 1557268976Sjhb * descriptor that is not present. If this was the case then 1558268976Sjhb * it would have been checked before the VM-exit. 1559268976Sjhb */ 1560270159Sgrehan KASSERT(SEG_DESC_PRESENT(desc->access), 1561270159Sgrehan ("segment %d not present: %#x", seg, desc->access)); 1562268976Sjhb 1563268976Sjhb /* 1564268976Sjhb * The descriptor type must indicate a code/data segment. 1565268976Sjhb */ 1566270159Sgrehan type = SEG_DESC_TYPE(desc->access); 1567268976Sjhb KASSERT(type >= 16 && type <= 31, ("segment %d has invalid " 1568268976Sjhb "descriptor type %#x", seg, type)); 1569268976Sjhb 1570268976Sjhb if (prot & PROT_READ) { 1571268976Sjhb /* #GP on a read access to a exec-only code segment */ 1572268976Sjhb if ((type & 0xA) == 0x8) 1573268976Sjhb return (-1); 1574268976Sjhb } 1575268976Sjhb 1576268976Sjhb if (prot & PROT_WRITE) { 1577268976Sjhb /* 1578268976Sjhb * #GP on a write access to a code segment or a 1579268976Sjhb * read-only data segment. 1580268976Sjhb */ 1581268976Sjhb if (type & 0x8) /* code segment */ 1582268976Sjhb return (-1); 1583268976Sjhb 1584268976Sjhb if ((type & 0xA) == 0) /* read-only data seg */ 1585268976Sjhb return (-1); 1586268976Sjhb } 1587268976Sjhb 1588268976Sjhb /* 1589268976Sjhb * 'desc->limit' is fully expanded taking granularity into 1590268976Sjhb * account. 1591268976Sjhb */ 1592268976Sjhb if ((type & 0xC) == 0x4) { 1593268976Sjhb /* expand-down data segment */ 1594268976Sjhb low_limit = desc->limit + 1; 1595270159Sgrehan high_limit = SEG_DESC_DEF32(desc->access) ? 1596270159Sgrehan 0xffffffff : 0xffff; 1597268976Sjhb } else { 1598268976Sjhb /* code segment or expand-up data segment */ 1599268976Sjhb low_limit = 0; 1600268976Sjhb high_limit = desc->limit; 1601268976Sjhb } 1602268976Sjhb 1603268976Sjhb while (length > 0) { 1604268976Sjhb offset &= vie_size2mask(addrsize); 1605268976Sjhb if (offset < low_limit || offset > high_limit) 1606268976Sjhb return (-1); 1607268976Sjhb offset++; 1608268976Sjhb length--; 1609268976Sjhb } 1610268976Sjhb } 1611268976Sjhb 1612268976Sjhb /* 1613268976Sjhb * In 64-bit mode all segments except %fs and %gs have a segment 1614268976Sjhb * base address of 0. 1615268976Sjhb */ 1616268976Sjhb if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS && 1617268976Sjhb seg != VM_REG_GUEST_GS) { 1618268976Sjhb segbase = 0; 1619268976Sjhb } else { 1620268976Sjhb segbase = desc->base; 1621268976Sjhb } 1622268976Sjhb 1623268976Sjhb /* 1624268976Sjhb * Truncate 'firstoff' to the effective address size before adding 1625268976Sjhb * it to the segment base. 1626268976Sjhb */ 1627268976Sjhb firstoff &= vie_size2mask(addrsize); 1628268976Sjhb *gla = (segbase + firstoff) & vie_size2mask(glasize); 1629268976Sjhb return (0); 1630268976Sjhb} 1631268976Sjhb 1632243640Sneel#ifdef _KERNEL 1633256072Sneelvoid 1634276403Sneelvie_init(struct vie *vie, const char *inst_bytes, int inst_length) 1635240941Sneel{ 1636276403Sneel KASSERT(inst_length >= 0 && inst_length <= VIE_INST_SIZE, 1637276403Sneel ("%s: invalid instruction length (%d)", __func__, inst_length)); 1638240941Sneel 1639240941Sneel bzero(vie, sizeof(struct vie)); 1640240941Sneel 1641240941Sneel vie->base_register = VM_REG_LAST; 1642240941Sneel vie->index_register = VM_REG_LAST; 1643284894Sneel vie->segment_register = VM_REG_LAST; 1644276403Sneel 1645276403Sneel if (inst_length) { 1646276403Sneel bcopy(inst_bytes, vie->inst, inst_length); 1647276403Sneel vie->num_valid = inst_length; 1648276403Sneel } 1649240941Sneel} 1650240941Sneel 1651240941Sneelstatic int 1652268976Sjhbpf_error_code(int usermode, int prot, int rsvd, uint64_t pte) 1653240941Sneel{ 1654268976Sjhb int error_code = 0; 1655268976Sjhb 1656268976Sjhb if (pte & PG_V) 1657268976Sjhb error_code |= PGEX_P; 1658268976Sjhb if (prot & VM_PROT_WRITE) 1659268976Sjhb error_code |= PGEX_W; 1660268976Sjhb if (usermode) 1661268976Sjhb error_code |= PGEX_U; 1662268976Sjhb if (rsvd) 1663268976Sjhb error_code |= PGEX_RSV; 1664268976Sjhb if (prot & VM_PROT_EXECUTE) 1665268976Sjhb error_code |= PGEX_I; 1666268976Sjhb 1667268976Sjhb return (error_code); 1668268976Sjhb} 1669268976Sjhb 1670268976Sjhbstatic void 1671268976Sjhbptp_release(void **cookie) 1672268976Sjhb{ 1673268976Sjhb if (*cookie != NULL) { 1674268976Sjhb vm_gpa_release(*cookie); 1675268976Sjhb *cookie = NULL; 1676268976Sjhb } 1677268976Sjhb} 1678268976Sjhb 1679268976Sjhbstatic void * 1680295124Sgrehanptp_hold(struct vm *vm, int vcpu, vm_paddr_t ptpphys, size_t len, void **cookie) 1681268976Sjhb{ 1682268976Sjhb void *ptr; 1683268976Sjhb 1684268976Sjhb ptp_release(cookie); 1685295124Sgrehan ptr = vm_gpa_hold(vm, vcpu, ptpphys, len, VM_PROT_RW, cookie); 1686268976Sjhb return (ptr); 1687268976Sjhb} 1688268976Sjhb 1689268976Sjhbint 1690284899Sneelvm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, 1691284900Sneel uint64_t gla, int prot, uint64_t *gpa, int *guest_fault) 1692268976Sjhb{ 1693268976Sjhb int nlevels, pfcode, ptpshift, ptpindex, retval, usermode, writable; 1694268976Sjhb u_int retries; 1695268976Sjhb uint64_t *ptpbase, ptpphys, pte, pgsize; 1696267399Sjhb uint32_t *ptpbase32, pte32; 1697256072Sneel void *cookie; 1698240941Sneel 1699284900Sneel *guest_fault = 0; 1700284900Sneel 1701268976Sjhb usermode = (paging->cpl == 3 ? 1 : 0); 1702268976Sjhb writable = prot & VM_PROT_WRITE; 1703268976Sjhb cookie = NULL; 1704268976Sjhb retval = 0; 1705268976Sjhb retries = 0; 1706268976Sjhbrestart: 1707268976Sjhb ptpphys = paging->cr3; /* root of the page tables */ 1708268976Sjhb ptp_release(&cookie); 1709268976Sjhb if (retries++ > 0) 1710268976Sjhb maybe_yield(); 1711268976Sjhb 1712268976Sjhb if (vie_canonical_check(paging->cpu_mode, gla)) { 1713268976Sjhb /* 1714268976Sjhb * XXX assuming a non-stack reference otherwise a stack fault 1715268976Sjhb * should be generated. 1716268976Sjhb */ 1717268976Sjhb vm_inject_gp(vm, vcpuid); 1718268976Sjhb goto fault; 1719268976Sjhb } 1720268976Sjhb 1721268976Sjhb if (paging->paging_mode == PAGING_MODE_FLAT) { 1722267399Sjhb *gpa = gla; 1723268976Sjhb goto done; 1724267399Sjhb } 1725267399Sjhb 1726268976Sjhb if (paging->paging_mode == PAGING_MODE_32) { 1727267399Sjhb nlevels = 2; 1728267399Sjhb while (--nlevels >= 0) { 1729267399Sjhb /* Zero out the lower 12 bits. */ 1730267399Sjhb ptpphys &= ~0xfff; 1731267399Sjhb 1732295124Sgrehan ptpbase32 = ptp_hold(vm, vcpuid, ptpphys, PAGE_SIZE, 1733295124Sgrehan &cookie); 1734268976Sjhb 1735267399Sjhb if (ptpbase32 == NULL) 1736267399Sjhb goto error; 1737267399Sjhb 1738267399Sjhb ptpshift = PAGE_SHIFT + nlevels * 10; 1739267399Sjhb ptpindex = (gla >> ptpshift) & 0x3FF; 1740267399Sjhb pgsize = 1UL << ptpshift; 1741267399Sjhb 1742267399Sjhb pte32 = ptpbase32[ptpindex]; 1743267399Sjhb 1744268976Sjhb if ((pte32 & PG_V) == 0 || 1745268976Sjhb (usermode && (pte32 & PG_U) == 0) || 1746268976Sjhb (writable && (pte32 & PG_RW) == 0)) { 1747268976Sjhb pfcode = pf_error_code(usermode, prot, 0, 1748268976Sjhb pte32); 1749268976Sjhb vm_inject_pf(vm, vcpuid, pfcode, gla); 1750268976Sjhb goto fault; 1751268976Sjhb } 1752267399Sjhb 1753268976Sjhb /* 1754268976Sjhb * Emulate the x86 MMU's management of the accessed 1755268976Sjhb * and dirty flags. While the accessed flag is set 1756268976Sjhb * at every level of the page table, the dirty flag 1757268976Sjhb * is only set at the last level providing the guest 1758268976Sjhb * physical address. 1759268976Sjhb */ 1760268976Sjhb if ((pte32 & PG_A) == 0) { 1761268976Sjhb if (atomic_cmpset_32(&ptpbase32[ptpindex], 1762268976Sjhb pte32, pte32 | PG_A) == 0) { 1763268976Sjhb goto restart; 1764268976Sjhb } 1765268976Sjhb } 1766267399Sjhb 1767268976Sjhb /* XXX must be ignored if CR4.PSE=0 */ 1768268976Sjhb if (nlevels > 0 && (pte32 & PG_PS) != 0) 1769267399Sjhb break; 1770267399Sjhb 1771267399Sjhb ptpphys = pte32; 1772267399Sjhb } 1773267399Sjhb 1774268976Sjhb /* Set the dirty bit in the page table entry if necessary */ 1775268976Sjhb if (writable && (pte32 & PG_M) == 0) { 1776268976Sjhb if (atomic_cmpset_32(&ptpbase32[ptpindex], 1777268976Sjhb pte32, pte32 | PG_M) == 0) { 1778268976Sjhb goto restart; 1779268976Sjhb } 1780268976Sjhb } 1781268976Sjhb 1782267399Sjhb /* Zero out the lower 'ptpshift' bits */ 1783267399Sjhb pte32 >>= ptpshift; pte32 <<= ptpshift; 1784267399Sjhb *gpa = pte32 | (gla & (pgsize - 1)); 1785268976Sjhb goto done; 1786267399Sjhb } 1787267399Sjhb 1788268976Sjhb if (paging->paging_mode == PAGING_MODE_PAE) { 1789268976Sjhb /* Zero out the lower 5 bits and the upper 32 bits */ 1790268976Sjhb ptpphys &= 0xffffffe0UL; 1791267399Sjhb 1792295124Sgrehan ptpbase = ptp_hold(vm, vcpuid, ptpphys, sizeof(*ptpbase) * 4, 1793295124Sgrehan &cookie); 1794267399Sjhb if (ptpbase == NULL) 1795267399Sjhb goto error; 1796267399Sjhb 1797267399Sjhb ptpindex = (gla >> 30) & 0x3; 1798267399Sjhb 1799267399Sjhb pte = ptpbase[ptpindex]; 1800267399Sjhb 1801268976Sjhb if ((pte & PG_V) == 0) { 1802268976Sjhb pfcode = pf_error_code(usermode, prot, 0, pte); 1803268976Sjhb vm_inject_pf(vm, vcpuid, pfcode, gla); 1804268976Sjhb goto fault; 1805268976Sjhb } 1806267399Sjhb 1807267399Sjhb ptpphys = pte; 1808267399Sjhb 1809267399Sjhb nlevels = 2; 1810267399Sjhb } else 1811267399Sjhb nlevels = 4; 1812240941Sneel while (--nlevels >= 0) { 1813240941Sneel /* Zero out the lower 12 bits and the upper 12 bits */ 1814240941Sneel ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12; 1815240941Sneel 1816295124Sgrehan ptpbase = ptp_hold(vm, vcpuid, ptpphys, PAGE_SIZE, &cookie); 1817256072Sneel if (ptpbase == NULL) 1818240941Sneel goto error; 1819240941Sneel 1820240941Sneel ptpshift = PAGE_SHIFT + nlevels * 9; 1821240941Sneel ptpindex = (gla >> ptpshift) & 0x1FF; 1822240941Sneel pgsize = 1UL << ptpshift; 1823240941Sneel 1824240941Sneel pte = ptpbase[ptpindex]; 1825240941Sneel 1826268976Sjhb if ((pte & PG_V) == 0 || 1827268976Sjhb (usermode && (pte & PG_U) == 0) || 1828268976Sjhb (writable && (pte & PG_RW) == 0)) { 1829268976Sjhb pfcode = pf_error_code(usermode, prot, 0, pte); 1830268976Sjhb vm_inject_pf(vm, vcpuid, pfcode, gla); 1831268976Sjhb goto fault; 1832268976Sjhb } 1833256072Sneel 1834268976Sjhb /* Set the accessed bit in the page table entry */ 1835268976Sjhb if ((pte & PG_A) == 0) { 1836268976Sjhb if (atomic_cmpset_64(&ptpbase[ptpindex], 1837268976Sjhb pte, pte | PG_A) == 0) { 1838268976Sjhb goto restart; 1839268976Sjhb } 1840268976Sjhb } 1841240941Sneel 1842268976Sjhb if (nlevels > 0 && (pte & PG_PS) != 0) { 1843268976Sjhb if (pgsize > 1 * GB) { 1844268976Sjhb pfcode = pf_error_code(usermode, prot, 1, pte); 1845268976Sjhb vm_inject_pf(vm, vcpuid, pfcode, gla); 1846268976Sjhb goto fault; 1847268976Sjhb } 1848268976Sjhb break; 1849240941Sneel } 1850240941Sneel 1851240941Sneel ptpphys = pte; 1852240941Sneel } 1853240941Sneel 1854268976Sjhb /* Set the dirty bit in the page table entry if necessary */ 1855268976Sjhb if (writable && (pte & PG_M) == 0) { 1856268976Sjhb if (atomic_cmpset_64(&ptpbase[ptpindex], pte, pte | PG_M) == 0) 1857268976Sjhb goto restart; 1858268976Sjhb } 1859268976Sjhb 1860240941Sneel /* Zero out the lower 'ptpshift' bits and the upper 12 bits */ 1861240941Sneel pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12; 1862240941Sneel *gpa = pte | (gla & (pgsize - 1)); 1863268976Sjhbdone: 1864268976Sjhb ptp_release(&cookie); 1865284900Sneel KASSERT(retval == 0 || retval == EFAULT, ("%s: unexpected retval %d", 1866284900Sneel __func__, retval)); 1867268976Sjhb return (retval); 1868240941Sneelerror: 1869284900Sneel retval = EFAULT; 1870268976Sjhb goto done; 1871268976Sjhbfault: 1872284900Sneel *guest_fault = 1; 1873268976Sjhb goto done; 1874240941Sneel} 1875240941Sneel 1876240978Sneelint 1877270159Sgrehanvmm_fetch_instruction(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, 1878284900Sneel uint64_t rip, int inst_length, struct vie *vie, int *faultptr) 1879240941Sneel{ 1880270159Sgrehan struct vm_copyinfo copyinfo[2]; 1881270159Sgrehan int error, prot; 1882240941Sneel 1883240978Sneel if (inst_length > VIE_INST_SIZE) 1884240978Sneel panic("vmm_fetch_instruction: invalid length %d", inst_length); 1885240978Sneel 1886270159Sgrehan prot = PROT_READ | PROT_EXEC; 1887270159Sgrehan error = vm_copy_setup(vm, vcpuid, paging, rip, inst_length, prot, 1888284900Sneel copyinfo, nitems(copyinfo), faultptr); 1889284900Sneel if (error || *faultptr) 1890284900Sneel return (error); 1891284900Sneel 1892284900Sneel vm_copyin(vm, vcpuid, copyinfo, vie->inst, inst_length); 1893284900Sneel vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); 1894284900Sneel vie->num_valid = inst_length; 1895284900Sneel return (0); 1896240941Sneel} 1897240941Sneel 1898240941Sneelstatic int 1899240941Sneelvie_peek(struct vie *vie, uint8_t *x) 1900240941Sneel{ 1901243640Sneel 1902240941Sneel if (vie->num_processed < vie->num_valid) { 1903240941Sneel *x = vie->inst[vie->num_processed]; 1904240941Sneel return (0); 1905240941Sneel } else 1906240941Sneel return (-1); 1907240941Sneel} 1908240941Sneel 1909240941Sneelstatic void 1910240941Sneelvie_advance(struct vie *vie) 1911240941Sneel{ 1912240941Sneel 1913240941Sneel vie->num_processed++; 1914240941Sneel} 1915240941Sneel 1916284894Sneelstatic bool 1917284894Sneelsegment_override(uint8_t x, int *seg) 1918284894Sneel{ 1919284894Sneel 1920284894Sneel switch (x) { 1921284894Sneel case 0x2E: 1922284894Sneel *seg = VM_REG_GUEST_CS; 1923284894Sneel break; 1924284894Sneel case 0x36: 1925284894Sneel *seg = VM_REG_GUEST_SS; 1926284894Sneel break; 1927284894Sneel case 0x3E: 1928284894Sneel *seg = VM_REG_GUEST_DS; 1929284894Sneel break; 1930284894Sneel case 0x26: 1931284894Sneel *seg = VM_REG_GUEST_ES; 1932284894Sneel break; 1933284894Sneel case 0x64: 1934284894Sneel *seg = VM_REG_GUEST_FS; 1935284894Sneel break; 1936284894Sneel case 0x65: 1937284894Sneel *seg = VM_REG_GUEST_GS; 1938284894Sneel break; 1939284894Sneel default: 1940284894Sneel return (false); 1941284894Sneel } 1942284894Sneel return (true); 1943284894Sneel} 1944284894Sneel 1945240941Sneelstatic int 1946270159Sgrehandecode_prefixes(struct vie *vie, enum vm_cpu_mode cpu_mode, int cs_d) 1947240941Sneel{ 1948240941Sneel uint8_t x; 1949240941Sneel 1950270159Sgrehan while (1) { 1951270159Sgrehan if (vie_peek(vie, &x)) 1952270159Sgrehan return (-1); 1953240941Sneel 1954270159Sgrehan if (x == 0x66) 1955270159Sgrehan vie->opsize_override = 1; 1956270159Sgrehan else if (x == 0x67) 1957270159Sgrehan vie->addrsize_override = 1; 1958284894Sneel else if (x == 0xF3) 1959284894Sneel vie->repz_present = 1; 1960284894Sneel else if (x == 0xF2) 1961284894Sneel vie->repnz_present = 1; 1962284894Sneel else if (segment_override(x, &vie->segment_register)) 1963284894Sneel vie->segment_override = 1; 1964270159Sgrehan else 1965270159Sgrehan break; 1966270159Sgrehan 1967270159Sgrehan vie_advance(vie); 1968270159Sgrehan } 1969270159Sgrehan 1970270159Sgrehan /* 1971270159Sgrehan * From section 2.2.1, "REX Prefixes", Intel SDM Vol 2: 1972270159Sgrehan * - Only one REX prefix is allowed per instruction. 1973270159Sgrehan * - The REX prefix must immediately precede the opcode byte or the 1974270159Sgrehan * escape opcode byte. 1975270159Sgrehan * - If an instruction has a mandatory prefix (0x66, 0xF2 or 0xF3) 1976270159Sgrehan * the mandatory prefix must come before the REX prefix. 1977270159Sgrehan */ 1978270159Sgrehan if (cpu_mode == CPU_MODE_64BIT && x >= 0x40 && x <= 0x4F) { 1979246108Sneel vie->rex_present = 1; 1980240941Sneel vie->rex_w = x & 0x8 ? 1 : 0; 1981240941Sneel vie->rex_r = x & 0x4 ? 1 : 0; 1982240941Sneel vie->rex_x = x & 0x2 ? 1 : 0; 1983240941Sneel vie->rex_b = x & 0x1 ? 1 : 0; 1984240941Sneel vie_advance(vie); 1985240941Sneel } 1986240941Sneel 1987270159Sgrehan /* 1988270159Sgrehan * Section "Operand-Size And Address-Size Attributes", Intel SDM, Vol 1 1989270159Sgrehan */ 1990270159Sgrehan if (cpu_mode == CPU_MODE_64BIT) { 1991270159Sgrehan /* 1992270159Sgrehan * Default address size is 64-bits and default operand size 1993270159Sgrehan * is 32-bits. 1994270159Sgrehan */ 1995270159Sgrehan vie->addrsize = vie->addrsize_override ? 4 : 8; 1996270159Sgrehan if (vie->rex_w) 1997270159Sgrehan vie->opsize = 8; 1998270159Sgrehan else if (vie->opsize_override) 1999270159Sgrehan vie->opsize = 2; 2000270159Sgrehan else 2001270159Sgrehan vie->opsize = 4; 2002270159Sgrehan } else if (cs_d) { 2003270159Sgrehan /* Default address and operand sizes are 32-bits */ 2004270159Sgrehan vie->addrsize = vie->addrsize_override ? 2 : 4; 2005270159Sgrehan vie->opsize = vie->opsize_override ? 2 : 4; 2006270159Sgrehan } else { 2007270159Sgrehan /* Default address and operand sizes are 16-bits */ 2008270159Sgrehan vie->addrsize = vie->addrsize_override ? 4 : 2; 2009270159Sgrehan vie->opsize = vie->opsize_override ? 4 : 2; 2010270159Sgrehan } 2011240941Sneel return (0); 2012240941Sneel} 2013240941Sneel 2014240941Sneelstatic int 2015267396Sjhbdecode_two_byte_opcode(struct vie *vie) 2016267396Sjhb{ 2017267396Sjhb uint8_t x; 2018267396Sjhb 2019267396Sjhb if (vie_peek(vie, &x)) 2020267396Sjhb return (-1); 2021267396Sjhb 2022267396Sjhb vie->op = two_byte_opcodes[x]; 2023267396Sjhb 2024267396Sjhb if (vie->op.op_type == VIE_OP_TYPE_NONE) 2025267396Sjhb return (-1); 2026267396Sjhb 2027267396Sjhb vie_advance(vie); 2028267396Sjhb return (0); 2029267396Sjhb} 2030267396Sjhb 2031267396Sjhbstatic int 2032240941Sneeldecode_opcode(struct vie *vie) 2033240941Sneel{ 2034240941Sneel uint8_t x; 2035240941Sneel 2036240941Sneel if (vie_peek(vie, &x)) 2037240941Sneel return (-1); 2038240941Sneel 2039243640Sneel vie->op = one_byte_opcodes[x]; 2040240941Sneel 2041243640Sneel if (vie->op.op_type == VIE_OP_TYPE_NONE) 2042243640Sneel return (-1); 2043243640Sneel 2044240941Sneel vie_advance(vie); 2045267396Sjhb 2046267396Sjhb if (vie->op.op_type == VIE_OP_TYPE_TWO_BYTE) 2047267396Sjhb return (decode_two_byte_opcode(vie)); 2048267396Sjhb 2049243640Sneel return (0); 2050240941Sneel} 2051240941Sneel 2052240941Sneelstatic int 2053268976Sjhbdecode_modrm(struct vie *vie, enum vm_cpu_mode cpu_mode) 2054240941Sneel{ 2055240941Sneel uint8_t x; 2056240941Sneel 2057284899Sneel if (vie->op.op_flags & VIE_OP_F_NO_MODRM) 2058284899Sneel return (0); 2059284899Sneel 2060270159Sgrehan if (cpu_mode == CPU_MODE_REAL) 2061270159Sgrehan return (-1); 2062270159Sgrehan 2063240941Sneel if (vie_peek(vie, &x)) 2064240941Sneel return (-1); 2065240941Sneel 2066240941Sneel vie->mod = (x >> 6) & 0x3; 2067240941Sneel vie->rm = (x >> 0) & 0x7; 2068240941Sneel vie->reg = (x >> 3) & 0x7; 2069240941Sneel 2070243640Sneel /* 2071243640Sneel * A direct addressing mode makes no sense in the context of an EPT 2072243640Sneel * fault. There has to be a memory access involved to cause the 2073243640Sneel * EPT fault. 2074243640Sneel */ 2075243640Sneel if (vie->mod == VIE_MOD_DIRECT) 2076243640Sneel return (-1); 2077243640Sneel 2078240941Sneel if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) || 2079240941Sneel (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) { 2080243640Sneel /* 2081243640Sneel * Table 2-5: Special Cases of REX Encodings 2082243640Sneel * 2083243640Sneel * mod=0, r/m=5 is used in the compatibility mode to 2084243640Sneel * indicate a disp32 without a base register. 2085243640Sneel * 2086243640Sneel * mod!=3, r/m=4 is used in the compatibility mode to 2087243640Sneel * indicate that the SIB byte is present. 2088243640Sneel * 2089243640Sneel * The 'b' bit in the REX prefix is don't care in 2090243640Sneel * this case. 2091243640Sneel */ 2092240941Sneel } else { 2093240941Sneel vie->rm |= (vie->rex_b << 3); 2094240941Sneel } 2095240941Sneel 2096240941Sneel vie->reg |= (vie->rex_r << 3); 2097240941Sneel 2098243640Sneel /* SIB */ 2099240941Sneel if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB) 2100243640Sneel goto done; 2101240941Sneel 2102240941Sneel vie->base_register = gpr_map[vie->rm]; 2103240941Sneel 2104240941Sneel switch (vie->mod) { 2105240941Sneel case VIE_MOD_INDIRECT_DISP8: 2106240941Sneel vie->disp_bytes = 1; 2107240941Sneel break; 2108240941Sneel case VIE_MOD_INDIRECT_DISP32: 2109240941Sneel vie->disp_bytes = 4; 2110240941Sneel break; 2111240941Sneel case VIE_MOD_INDIRECT: 2112240941Sneel if (vie->rm == VIE_RM_DISP32) { 2113240941Sneel vie->disp_bytes = 4; 2114249879Sgrehan /* 2115249879Sgrehan * Table 2-7. RIP-Relative Addressing 2116249879Sgrehan * 2117249879Sgrehan * In 64-bit mode mod=00 r/m=101 implies [rip] + disp32 2118249879Sgrehan * whereas in compatibility mode it just implies disp32. 2119249879Sgrehan */ 2120249879Sgrehan 2121249879Sgrehan if (cpu_mode == CPU_MODE_64BIT) 2122249879Sgrehan vie->base_register = VM_REG_GUEST_RIP; 2123249879Sgrehan else 2124249879Sgrehan vie->base_register = VM_REG_LAST; 2125240941Sneel } 2126240941Sneel break; 2127240941Sneel } 2128240941Sneel 2129243640Sneeldone: 2130240941Sneel vie_advance(vie); 2131240941Sneel 2132240941Sneel return (0); 2133240941Sneel} 2134240941Sneel 2135240941Sneelstatic int 2136243640Sneeldecode_sib(struct vie *vie) 2137243640Sneel{ 2138243640Sneel uint8_t x; 2139243640Sneel 2140243640Sneel /* Proceed only if SIB byte is present */ 2141243640Sneel if (vie->mod == VIE_MOD_DIRECT || vie->rm != VIE_RM_SIB) 2142243640Sneel return (0); 2143243640Sneel 2144243640Sneel if (vie_peek(vie, &x)) 2145243640Sneel return (-1); 2146243640Sneel 2147243640Sneel /* De-construct the SIB byte */ 2148243640Sneel vie->ss = (x >> 6) & 0x3; 2149243640Sneel vie->index = (x >> 3) & 0x7; 2150243640Sneel vie->base = (x >> 0) & 0x7; 2151243640Sneel 2152243640Sneel /* Apply the REX prefix modifiers */ 2153243640Sneel vie->index |= vie->rex_x << 3; 2154243640Sneel vie->base |= vie->rex_b << 3; 2155243640Sneel 2156243640Sneel switch (vie->mod) { 2157243640Sneel case VIE_MOD_INDIRECT_DISP8: 2158243640Sneel vie->disp_bytes = 1; 2159243640Sneel break; 2160243640Sneel case VIE_MOD_INDIRECT_DISP32: 2161243640Sneel vie->disp_bytes = 4; 2162243640Sneel break; 2163243640Sneel } 2164243640Sneel 2165243640Sneel if (vie->mod == VIE_MOD_INDIRECT && 2166243640Sneel (vie->base == 5 || vie->base == 13)) { 2167243640Sneel /* 2168243640Sneel * Special case when base register is unused if mod = 0 2169243640Sneel * and base = %rbp or %r13. 2170243640Sneel * 2171243640Sneel * Documented in: 2172243640Sneel * Table 2-3: 32-bit Addressing Forms with the SIB Byte 2173243640Sneel * Table 2-5: Special Cases of REX Encodings 2174243640Sneel */ 2175243640Sneel vie->disp_bytes = 4; 2176243640Sneel } else { 2177243640Sneel vie->base_register = gpr_map[vie->base]; 2178243640Sneel } 2179243640Sneel 2180243640Sneel /* 2181243640Sneel * All encodings of 'index' are valid except for %rsp (4). 2182243640Sneel * 2183243640Sneel * Documented in: 2184243640Sneel * Table 2-3: 32-bit Addressing Forms with the SIB Byte 2185243640Sneel * Table 2-5: Special Cases of REX Encodings 2186243640Sneel */ 2187243640Sneel if (vie->index != 4) 2188243640Sneel vie->index_register = gpr_map[vie->index]; 2189243640Sneel 2190243640Sneel /* 'scale' makes sense only in the context of an index register */ 2191243640Sneel if (vie->index_register < VM_REG_LAST) 2192243640Sneel vie->scale = 1 << vie->ss; 2193243640Sneel 2194243640Sneel vie_advance(vie); 2195243640Sneel 2196243640Sneel return (0); 2197243640Sneel} 2198243640Sneel 2199243640Sneelstatic int 2200240941Sneeldecode_displacement(struct vie *vie) 2201240941Sneel{ 2202240941Sneel int n, i; 2203240941Sneel uint8_t x; 2204240941Sneel 2205240941Sneel union { 2206240941Sneel char buf[4]; 2207240941Sneel int8_t signed8; 2208240941Sneel int32_t signed32; 2209240941Sneel } u; 2210240941Sneel 2211240941Sneel if ((n = vie->disp_bytes) == 0) 2212240941Sneel return (0); 2213240941Sneel 2214240941Sneel if (n != 1 && n != 4) 2215240941Sneel panic("decode_displacement: invalid disp_bytes %d", n); 2216240941Sneel 2217240941Sneel for (i = 0; i < n; i++) { 2218240941Sneel if (vie_peek(vie, &x)) 2219240941Sneel return (-1); 2220240941Sneel 2221240941Sneel u.buf[i] = x; 2222240941Sneel vie_advance(vie); 2223240941Sneel } 2224240941Sneel 2225240941Sneel if (n == 1) 2226240941Sneel vie->displacement = u.signed8; /* sign-extended */ 2227240941Sneel else 2228240941Sneel vie->displacement = u.signed32; /* sign-extended */ 2229240941Sneel 2230240941Sneel return (0); 2231240941Sneel} 2232240941Sneel 2233240941Sneelstatic int 2234240941Sneeldecode_immediate(struct vie *vie) 2235240941Sneel{ 2236240941Sneel int i, n; 2237240941Sneel uint8_t x; 2238240941Sneel union { 2239240941Sneel char buf[4]; 2240243640Sneel int8_t signed8; 2241270159Sgrehan int16_t signed16; 2242240941Sneel int32_t signed32; 2243240941Sneel } u; 2244240941Sneel 2245255638Sneel /* Figure out immediate operand size (if any) */ 2246270159Sgrehan if (vie->op.op_flags & VIE_OP_F_IMM) { 2247270159Sgrehan /* 2248270159Sgrehan * Section 2.2.1.5 "Immediates", Intel SDM: 2249270159Sgrehan * In 64-bit mode the typical size of immediate operands 2250270159Sgrehan * remains 32-bits. When the operand size if 64-bits, the 2251270159Sgrehan * processor sign-extends all immediates to 64-bits prior 2252270159Sgrehan * to their use. 2253270159Sgrehan */ 2254270159Sgrehan if (vie->opsize == 4 || vie->opsize == 8) 2255270159Sgrehan vie->imm_bytes = 4; 2256270159Sgrehan else 2257270159Sgrehan vie->imm_bytes = 2; 2258270159Sgrehan } else if (vie->op.op_flags & VIE_OP_F_IMM8) { 2259255638Sneel vie->imm_bytes = 1; 2260270159Sgrehan } 2261255638Sneel 2262240941Sneel if ((n = vie->imm_bytes) == 0) 2263240941Sneel return (0); 2264240941Sneel 2265270159Sgrehan KASSERT(n == 1 || n == 2 || n == 4, 2266270159Sgrehan ("%s: invalid number of immediate bytes: %d", __func__, n)); 2267240941Sneel 2268240941Sneel for (i = 0; i < n; i++) { 2269240941Sneel if (vie_peek(vie, &x)) 2270240941Sneel return (-1); 2271240941Sneel 2272240941Sneel u.buf[i] = x; 2273240941Sneel vie_advance(vie); 2274240941Sneel } 2275270159Sgrehan 2276270159Sgrehan /* sign-extend the immediate value before use */ 2277243640Sneel if (n == 1) 2278270159Sgrehan vie->immediate = u.signed8; 2279270159Sgrehan else if (n == 2) 2280270159Sgrehan vie->immediate = u.signed16; 2281243640Sneel else 2282270159Sgrehan vie->immediate = u.signed32; 2283240941Sneel 2284240941Sneel return (0); 2285240941Sneel} 2286240941Sneel 2287270159Sgrehanstatic int 2288270159Sgrehandecode_moffset(struct vie *vie) 2289270159Sgrehan{ 2290270159Sgrehan int i, n; 2291270159Sgrehan uint8_t x; 2292270159Sgrehan union { 2293270159Sgrehan char buf[8]; 2294270159Sgrehan uint64_t u64; 2295270159Sgrehan } u; 2296270159Sgrehan 2297270159Sgrehan if ((vie->op.op_flags & VIE_OP_F_MOFFSET) == 0) 2298270159Sgrehan return (0); 2299270159Sgrehan 2300270159Sgrehan /* 2301270159Sgrehan * Section 2.2.1.4, "Direct Memory-Offset MOVs", Intel SDM: 2302270159Sgrehan * The memory offset size follows the address-size of the instruction. 2303270159Sgrehan */ 2304270159Sgrehan n = vie->addrsize; 2305270159Sgrehan KASSERT(n == 2 || n == 4 || n == 8, ("invalid moffset bytes: %d", n)); 2306270159Sgrehan 2307270159Sgrehan u.u64 = 0; 2308270159Sgrehan for (i = 0; i < n; i++) { 2309270159Sgrehan if (vie_peek(vie, &x)) 2310270159Sgrehan return (-1); 2311270159Sgrehan 2312270159Sgrehan u.buf[i] = x; 2313270159Sgrehan vie_advance(vie); 2314270159Sgrehan } 2315270159Sgrehan vie->displacement = u.u64; 2316270159Sgrehan return (0); 2317270159Sgrehan} 2318270159Sgrehan 2319243640Sneel/* 2320243640Sneel * Verify that the 'guest linear address' provided as collateral of the nested 2321243640Sneel * page table fault matches with our instruction decoding. 2322243640Sneel */ 2323243640Sneelstatic int 2324295124Sgrehanverify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie, 2325295124Sgrehan enum vm_cpu_mode cpu_mode) 2326243640Sneel{ 2327243640Sneel int error; 2328295124Sgrehan uint64_t base, segbase, idx, gla2; 2329295124Sgrehan enum vm_reg_name seg; 2330295124Sgrehan struct seg_desc desc; 2331243640Sneel 2332248855Sneel /* Skip 'gla' verification */ 2333248855Sneel if (gla == VIE_INVALID_GLA) 2334248855Sneel return (0); 2335248855Sneel 2336243640Sneel base = 0; 2337243640Sneel if (vie->base_register != VM_REG_LAST) { 2338243640Sneel error = vm_get_register(vm, cpuid, vie->base_register, &base); 2339243640Sneel if (error) { 2340243640Sneel printf("verify_gla: error %d getting base reg %d\n", 2341243640Sneel error, vie->base_register); 2342243640Sneel return (-1); 2343243640Sneel } 2344249879Sgrehan 2345249879Sgrehan /* 2346249879Sgrehan * RIP-relative addressing starts from the following 2347249879Sgrehan * instruction 2348249879Sgrehan */ 2349249879Sgrehan if (vie->base_register == VM_REG_GUEST_RIP) 2350284900Sneel base += vie->num_processed; 2351243640Sneel } 2352243640Sneel 2353243640Sneel idx = 0; 2354243640Sneel if (vie->index_register != VM_REG_LAST) { 2355243640Sneel error = vm_get_register(vm, cpuid, vie->index_register, &idx); 2356243640Sneel if (error) { 2357243640Sneel printf("verify_gla: error %d getting index reg %d\n", 2358243640Sneel error, vie->index_register); 2359243640Sneel return (-1); 2360243640Sneel } 2361243640Sneel } 2362243640Sneel 2363295124Sgrehan /* 2364295124Sgrehan * From "Specifying a Segment Selector", Intel SDM, Vol 1 2365295124Sgrehan * 2366295124Sgrehan * In 64-bit mode, segmentation is generally (but not 2367295124Sgrehan * completely) disabled. The exceptions are the FS and GS 2368295124Sgrehan * segments. 2369295124Sgrehan * 2370295124Sgrehan * In legacy IA-32 mode, when the ESP or EBP register is used 2371295124Sgrehan * as the base, the SS segment is the default segment. For 2372295124Sgrehan * other data references, except when relative to stack or 2373295124Sgrehan * string destination the DS segment is the default. These 2374295124Sgrehan * can be overridden to allow other segments to be accessed. 2375295124Sgrehan */ 2376295124Sgrehan if (vie->segment_override) 2377295124Sgrehan seg = vie->segment_register; 2378295124Sgrehan else if (vie->base_register == VM_REG_GUEST_RSP || 2379295124Sgrehan vie->base_register == VM_REG_GUEST_RBP) 2380295124Sgrehan seg = VM_REG_GUEST_SS; 2381295124Sgrehan else 2382295124Sgrehan seg = VM_REG_GUEST_DS; 2383295124Sgrehan if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS && 2384295124Sgrehan seg != VM_REG_GUEST_GS) { 2385295124Sgrehan segbase = 0; 2386295124Sgrehan } else { 2387295124Sgrehan error = vm_get_seg_desc(vm, cpuid, seg, &desc); 2388295124Sgrehan if (error) { 2389295124Sgrehan printf("verify_gla: error %d getting segment" 2390295124Sgrehan " descriptor %d", error, 2391295124Sgrehan vie->segment_register); 2392295124Sgrehan return (-1); 2393295124Sgrehan } 2394295124Sgrehan segbase = desc.base; 2395295124Sgrehan } 2396295124Sgrehan 2397295124Sgrehan gla2 = segbase + base + vie->scale * idx + vie->displacement; 2398270159Sgrehan gla2 &= size2mask[vie->addrsize]; 2399270159Sgrehan if (gla != gla2) { 2400295124Sgrehan printf("verify_gla mismatch: segbase(0x%0lx)" 2401243640Sneel "base(0x%0lx), scale(%d), index(0x%0lx), " 2402270159Sgrehan "disp(0x%0lx), gla(0x%0lx), gla2(0x%0lx)\n", 2403295124Sgrehan segbase, base, vie->scale, idx, vie->displacement, 2404295124Sgrehan gla, gla2); 2405243640Sneel return (-1); 2406243640Sneel } 2407243640Sneel 2408243640Sneel return (0); 2409243640Sneel} 2410243640Sneel 2411240941Sneelint 2412267399Sjhbvmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, 2413270159Sgrehan enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie) 2414240941Sneel{ 2415243640Sneel 2416270159Sgrehan if (decode_prefixes(vie, cpu_mode, cs_d)) 2417270159Sgrehan return (-1); 2418240941Sneel 2419240941Sneel if (decode_opcode(vie)) 2420240941Sneel return (-1); 2421240941Sneel 2422267399Sjhb if (decode_modrm(vie, cpu_mode)) 2423240941Sneel return (-1); 2424240941Sneel 2425243640Sneel if (decode_sib(vie)) 2426243640Sneel return (-1); 2427243640Sneel 2428240941Sneel if (decode_displacement(vie)) 2429240941Sneel return (-1); 2430270159Sgrehan 2431240941Sneel if (decode_immediate(vie)) 2432240941Sneel return (-1); 2433240941Sneel 2434270159Sgrehan if (decode_moffset(vie)) 2435270159Sgrehan return (-1); 2436270159Sgrehan 2437284894Sneel if ((vie->op.op_flags & VIE_OP_F_NO_GLA_VERIFICATION) == 0) { 2438295124Sgrehan if (verify_gla(vm, cpuid, gla, vie, cpu_mode)) 2439284894Sneel return (-1); 2440284894Sneel } 2441243640Sneel 2442243640Sneel vie->decoded = 1; /* success */ 2443243640Sneel 2444240941Sneel return (0); 2445240941Sneel} 2446243640Sneel#endif /* _KERNEL */ 2447