1234353Sdim/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===* 2201360Srdivacky * 3201360Srdivacky * The LLVM Compiler Infrastructure 4201360Srdivacky * 5201360Srdivacky * This file is distributed under the University of Illinois Open Source 6201360Srdivacky * License. See LICENSE.TXT for details. 7201360Srdivacky * 8201360Srdivacky *===----------------------------------------------------------------------===* 9201360Srdivacky * 10201360Srdivacky * This file is part of the X86 Disassembler. 11201360Srdivacky * It contains the implementation of the instruction decoder. 12201360Srdivacky * Documentation for the disassembler can be found in X86Disassembler.h. 13201360Srdivacky * 14201360Srdivacky *===----------------------------------------------------------------------===*/ 15201360Srdivacky 16201360Srdivacky#include <stdarg.h> /* for va_*() */ 17201360Srdivacky#include <stdio.h> /* for vsnprintf() */ 18201360Srdivacky#include <stdlib.h> /* for exit() */ 19201360Srdivacky#include <string.h> /* for memset() */ 20201360Srdivacky 21201360Srdivacky#include "X86DisassemblerDecoder.h" 22201360Srdivacky 23201360Srdivacky#include "X86GenDisassemblerTables.inc" 24201360Srdivacky 25201360Srdivacky#define TRUE 1 26201360Srdivacky#define FALSE 0 27201360Srdivacky 28206124Srdivackytypedef int8_t bool; 29206124Srdivacky 30206124Srdivacky#ifndef NDEBUG 31206124Srdivacky#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0) 32206124Srdivacky#else 33206124Srdivacky#define debug(s) do { } while (0) 34206124Srdivacky#endif 35201360Srdivacky 36206124Srdivacky 37201360Srdivacky/* 38201360Srdivacky * contextForAttrs - Client for the instruction context table. Takes a set of 39201360Srdivacky * attributes and returns the appropriate decode context. 40201360Srdivacky * 41201360Srdivacky * @param attrMask - Attributes, from the enumeration attributeBits. 42201360Srdivacky * @return - The InstructionContext to use when looking up an 43201360Srdivacky * an instruction with these attributes. 44201360Srdivacky */ 45201360Srdivackystatic InstructionContext contextForAttrs(uint8_t attrMask) { 46201360Srdivacky return CONTEXTS_SYM[attrMask]; 47201360Srdivacky} 48201360Srdivacky 49201360Srdivacky/* 50201360Srdivacky * modRMRequired - Reads the appropriate instruction table to determine whether 51201360Srdivacky * the ModR/M byte is required to decode a particular instruction. 52201360Srdivacky * 53201360Srdivacky * @param type - The opcode type (i.e., how many bytes it has). 54201360Srdivacky * @param insnContext - The context for the instruction, as returned by 55201360Srdivacky * contextForAttrs. 56201360Srdivacky * @param opcode - The last byte of the instruction's opcode, not counting 57201360Srdivacky * ModR/M extensions and escapes. 58201360Srdivacky * @return - TRUE if the ModR/M byte is required, FALSE otherwise. 59201360Srdivacky */ 60201360Srdivackystatic int modRMRequired(OpcodeType type, 61226633Sdim InstructionContext insnContext, 62226633Sdim uint8_t opcode) { 63201360Srdivacky const struct ContextDecision* decision = 0; 64249423Sdim 65201360Srdivacky switch (type) { 66201360Srdivacky case ONEBYTE: 67201360Srdivacky decision = &ONEBYTE_SYM; 68201360Srdivacky break; 69201360Srdivacky case TWOBYTE: 70201360Srdivacky decision = &TWOBYTE_SYM; 71201360Srdivacky break; 72201360Srdivacky case THREEBYTE_38: 73201360Srdivacky decision = &THREEBYTE38_SYM; 74201360Srdivacky break; 75201360Srdivacky case THREEBYTE_3A: 76201360Srdivacky decision = &THREEBYTE3A_SYM; 77201360Srdivacky break; 78221345Sdim case THREEBYTE_A6: 79221345Sdim decision = &THREEBYTEA6_SYM; 80221345Sdim break; 81221345Sdim case THREEBYTE_A7: 82221345Sdim decision = &THREEBYTEA7_SYM; 83221345Sdim break; 84201360Srdivacky } 85234353Sdim 86201360Srdivacky return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. 87201360Srdivacky modrm_type != MODRM_ONEENTRY; 88201360Srdivacky} 89201360Srdivacky 90201360Srdivacky/* 91201360Srdivacky * decode - Reads the appropriate instruction table to obtain the unique ID of 92201360Srdivacky * an instruction. 93201360Srdivacky * 94201360Srdivacky * @param type - See modRMRequired(). 95201360Srdivacky * @param insnContext - See modRMRequired(). 96201360Srdivacky * @param opcode - See modRMRequired(). 97201360Srdivacky * @param modRM - The ModR/M byte if required, or any value if not. 98206124Srdivacky * @return - The UID of the instruction, or 0 on failure. 99201360Srdivacky */ 100201360Srdivackystatic InstrUID decode(OpcodeType type, 101206124Srdivacky InstructionContext insnContext, 102206124Srdivacky uint8_t opcode, 103206124Srdivacky uint8_t modRM) { 104234353Sdim const struct ModRMDecision* dec = 0; 105249423Sdim 106201360Srdivacky switch (type) { 107201360Srdivacky case ONEBYTE: 108201360Srdivacky dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 109201360Srdivacky break; 110201360Srdivacky case TWOBYTE: 111201360Srdivacky dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 112201360Srdivacky break; 113201360Srdivacky case THREEBYTE_38: 114201360Srdivacky dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 115201360Srdivacky break; 116201360Srdivacky case THREEBYTE_3A: 117201360Srdivacky dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 118201360Srdivacky break; 119221345Sdim case THREEBYTE_A6: 120221345Sdim dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 121221345Sdim break; 122221345Sdim case THREEBYTE_A7: 123221345Sdim dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 124221345Sdim break; 125201360Srdivacky } 126249423Sdim 127201360Srdivacky switch (dec->modrm_type) { 128201360Srdivacky default: 129206124Srdivacky debug("Corrupt table! Unknown modrm_type"); 130206124Srdivacky return 0; 131201360Srdivacky case MODRM_ONEENTRY: 132234353Sdim return modRMTable[dec->instructionIDs]; 133201360Srdivacky case MODRM_SPLITRM: 134201360Srdivacky if (modFromModRM(modRM) == 0x3) 135234353Sdim return modRMTable[dec->instructionIDs+1]; 136234353Sdim return modRMTable[dec->instructionIDs]; 137234353Sdim case MODRM_SPLITREG: 138234353Sdim if (modFromModRM(modRM) == 0x3) 139234353Sdim return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; 140234353Sdim return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 141243830Sdim case MODRM_SPLITMISC: 142243830Sdim if (modFromModRM(modRM) == 0x3) 143243830Sdim return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8]; 144243830Sdim return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 145201360Srdivacky case MODRM_FULL: 146234353Sdim return modRMTable[dec->instructionIDs+modRM]; 147201360Srdivacky } 148201360Srdivacky} 149201360Srdivacky 150201360Srdivacky/* 151201360Srdivacky * specifierForUID - Given a UID, returns the name and operand specification for 152201360Srdivacky * that instruction. 153201360Srdivacky * 154201360Srdivacky * @param uid - The unique ID for the instruction. This should be returned by 155201360Srdivacky * decode(); specifierForUID will not check bounds. 156201360Srdivacky * @return - A pointer to the specification for that instruction. 157201360Srdivacky */ 158218893Sdimstatic const struct InstructionSpecifier *specifierForUID(InstrUID uid) { 159201360Srdivacky return &INSTRUCTIONS_SYM[uid]; 160201360Srdivacky} 161201360Srdivacky 162201360Srdivacky/* 163201360Srdivacky * consumeByte - Uses the reader function provided by the user to consume one 164201360Srdivacky * byte from the instruction's memory and advance the cursor. 165201360Srdivacky * 166201360Srdivacky * @param insn - The instruction with the reader function to use. The cursor 167201360Srdivacky * for this instruction is advanced. 168201360Srdivacky * @param byte - A pointer to a pre-allocated memory buffer to be populated 169201360Srdivacky * with the data read. 170201360Srdivacky * @return - 0 if the read was successful; nonzero otherwise. 171201360Srdivacky */ 172201360Srdivackystatic int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { 173201360Srdivacky int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); 174249423Sdim 175201360Srdivacky if (!ret) 176201360Srdivacky ++(insn->readerCursor); 177249423Sdim 178201360Srdivacky return ret; 179201360Srdivacky} 180201360Srdivacky 181201360Srdivacky/* 182201360Srdivacky * lookAtByte - Like consumeByte, but does not advance the cursor. 183201360Srdivacky * 184201360Srdivacky * @param insn - See consumeByte(). 185201360Srdivacky * @param byte - See consumeByte(). 186201360Srdivacky * @return - See consumeByte(). 187201360Srdivacky */ 188201360Srdivackystatic int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { 189201360Srdivacky return insn->reader(insn->readerArg, byte, insn->readerCursor); 190201360Srdivacky} 191201360Srdivacky 192201360Srdivackystatic void unconsumeByte(struct InternalInstruction* insn) { 193201360Srdivacky insn->readerCursor--; 194201360Srdivacky} 195201360Srdivacky 196201360Srdivacky#define CONSUME_FUNC(name, type) \ 197201360Srdivacky static int name(struct InternalInstruction* insn, type* ptr) { \ 198201360Srdivacky type combined = 0; \ 199201360Srdivacky unsigned offset; \ 200201360Srdivacky for (offset = 0; offset < sizeof(type); ++offset) { \ 201201360Srdivacky uint8_t byte; \ 202201360Srdivacky int ret = insn->reader(insn->readerArg, \ 203201360Srdivacky &byte, \ 204201360Srdivacky insn->readerCursor + offset); \ 205201360Srdivacky if (ret) \ 206201360Srdivacky return ret; \ 207243830Sdim combined = combined | ((uint64_t)byte << (offset * 8)); \ 208201360Srdivacky } \ 209201360Srdivacky *ptr = combined; \ 210201360Srdivacky insn->readerCursor += sizeof(type); \ 211201360Srdivacky return 0; \ 212201360Srdivacky } 213201360Srdivacky 214201360Srdivacky/* 215201360Srdivacky * consume* - Use the reader function provided by the user to consume data 216201360Srdivacky * values of various sizes from the instruction's memory and advance the 217201360Srdivacky * cursor appropriately. These readers perform endian conversion. 218201360Srdivacky * 219201360Srdivacky * @param insn - See consumeByte(). 220201360Srdivacky * @param ptr - A pointer to a pre-allocated memory of appropriate size to 221201360Srdivacky * be populated with the data read. 222201360Srdivacky * @return - See consumeByte(). 223201360Srdivacky */ 224201360SrdivackyCONSUME_FUNC(consumeInt8, int8_t) 225201360SrdivackyCONSUME_FUNC(consumeInt16, int16_t) 226201360SrdivackyCONSUME_FUNC(consumeInt32, int32_t) 227201360SrdivackyCONSUME_FUNC(consumeUInt16, uint16_t) 228201360SrdivackyCONSUME_FUNC(consumeUInt32, uint32_t) 229201360SrdivackyCONSUME_FUNC(consumeUInt64, uint64_t) 230201360Srdivacky 231201360Srdivacky/* 232201360Srdivacky * dbgprintf - Uses the logging function provided by the user to log a single 233201360Srdivacky * message, typically without a carriage-return. 234201360Srdivacky * 235201360Srdivacky * @param insn - The instruction containing the logging function. 236201360Srdivacky * @param format - See printf(). 237201360Srdivacky * @param ... - See printf(). 238201360Srdivacky */ 239201360Srdivackystatic void dbgprintf(struct InternalInstruction* insn, 240201360Srdivacky const char* format, 241249423Sdim ...) { 242201360Srdivacky char buffer[256]; 243201360Srdivacky va_list ap; 244249423Sdim 245201360Srdivacky if (!insn->dlog) 246201360Srdivacky return; 247249423Sdim 248201360Srdivacky va_start(ap, format); 249201360Srdivacky (void)vsnprintf(buffer, sizeof(buffer), format, ap); 250201360Srdivacky va_end(ap); 251249423Sdim 252201360Srdivacky insn->dlog(insn->dlogArg, buffer); 253249423Sdim 254201360Srdivacky return; 255201360Srdivacky} 256201360Srdivacky 257201360Srdivacky/* 258201360Srdivacky * setPrefixPresent - Marks that a particular prefix is present at a particular 259201360Srdivacky * location. 260201360Srdivacky * 261201360Srdivacky * @param insn - The instruction to be marked as having the prefix. 262201360Srdivacky * @param prefix - The prefix that is present. 263201360Srdivacky * @param location - The location where the prefix is located (in the address 264201360Srdivacky * space of the instruction's reader). 265201360Srdivacky */ 266201360Srdivackystatic void setPrefixPresent(struct InternalInstruction* insn, 267201360Srdivacky uint8_t prefix, 268201360Srdivacky uint64_t location) 269201360Srdivacky{ 270201360Srdivacky insn->prefixPresent[prefix] = 1; 271201360Srdivacky insn->prefixLocations[prefix] = location; 272201360Srdivacky} 273201360Srdivacky 274201360Srdivacky/* 275201360Srdivacky * isPrefixAtLocation - Queries an instruction to determine whether a prefix is 276201360Srdivacky * present at a given location. 277201360Srdivacky * 278201360Srdivacky * @param insn - The instruction to be queried. 279201360Srdivacky * @param prefix - The prefix. 280201360Srdivacky * @param location - The location to query. 281201360Srdivacky * @return - Whether the prefix is at that location. 282201360Srdivacky */ 283201360Srdivackystatic BOOL isPrefixAtLocation(struct InternalInstruction* insn, 284201360Srdivacky uint8_t prefix, 285201360Srdivacky uint64_t location) 286201360Srdivacky{ 287201360Srdivacky if (insn->prefixPresent[prefix] == 1 && 288201360Srdivacky insn->prefixLocations[prefix] == location) 289201360Srdivacky return TRUE; 290201360Srdivacky else 291201360Srdivacky return FALSE; 292201360Srdivacky} 293201360Srdivacky 294201360Srdivacky/* 295201360Srdivacky * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the 296201360Srdivacky * instruction as having them. Also sets the instruction's default operand, 297201360Srdivacky * address, and other relevant data sizes to report operands correctly. 298201360Srdivacky * 299201360Srdivacky * @param insn - The instruction whose prefixes are to be read. 300201360Srdivacky * @return - 0 if the instruction could be read until the end of the prefix 301201360Srdivacky * bytes, and no prefixes conflicted; nonzero otherwise. 302201360Srdivacky */ 303201360Srdivackystatic int readPrefixes(struct InternalInstruction* insn) { 304201360Srdivacky BOOL isPrefix = TRUE; 305201360Srdivacky BOOL prefixGroups[4] = { FALSE }; 306201360Srdivacky uint64_t prefixLocation; 307218893Sdim uint8_t byte = 0; 308249423Sdim 309201360Srdivacky BOOL hasAdSize = FALSE; 310201360Srdivacky BOOL hasOpSize = FALSE; 311249423Sdim 312201360Srdivacky dbgprintf(insn, "readPrefixes()"); 313249423Sdim 314201360Srdivacky while (isPrefix) { 315201360Srdivacky prefixLocation = insn->readerCursor; 316249423Sdim 317201360Srdivacky if (consumeByte(insn, &byte)) 318201360Srdivacky return -1; 319234353Sdim 320234353Sdim /* 321249423Sdim * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then 322249423Sdim * break and let it be disassembled as a normal "instruction". 323234353Sdim */ 324249423Sdim if (insn->readerCursor - 1 == insn->startLocation 325249423Sdim && (byte == 0xf0 || byte == 0xf2 || byte == 0xf3)) { 326249423Sdim uint8_t nextByte; 327249423Sdim if (byte == 0xf0) 328249423Sdim break; 329249423Sdim if (lookAtByte(insn, &nextByte)) 330249423Sdim return -1; 331249423Sdim if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) { 332249423Sdim if (consumeByte(insn, &nextByte)) 333249423Sdim return -1; 334249423Sdim if (lookAtByte(insn, &nextByte)) 335249423Sdim return -1; 336249423Sdim unconsumeByte(insn); 337249423Sdim } 338249423Sdim if (nextByte != 0x0f && nextByte != 0x90) 339249423Sdim break; 340249423Sdim } 341249423Sdim 342201360Srdivacky switch (byte) { 343201360Srdivacky case 0xf0: /* LOCK */ 344201360Srdivacky case 0xf2: /* REPNE/REPNZ */ 345201360Srdivacky case 0xf3: /* REP or REPE/REPZ */ 346201360Srdivacky if (prefixGroups[0]) 347201360Srdivacky dbgprintf(insn, "Redundant Group 1 prefix"); 348201360Srdivacky prefixGroups[0] = TRUE; 349201360Srdivacky setPrefixPresent(insn, byte, prefixLocation); 350201360Srdivacky break; 351201360Srdivacky case 0x2e: /* CS segment override -OR- Branch not taken */ 352201360Srdivacky case 0x36: /* SS segment override -OR- Branch taken */ 353201360Srdivacky case 0x3e: /* DS segment override */ 354201360Srdivacky case 0x26: /* ES segment override */ 355201360Srdivacky case 0x64: /* FS segment override */ 356201360Srdivacky case 0x65: /* GS segment override */ 357201360Srdivacky switch (byte) { 358201360Srdivacky case 0x2e: 359201360Srdivacky insn->segmentOverride = SEG_OVERRIDE_CS; 360201360Srdivacky break; 361201360Srdivacky case 0x36: 362201360Srdivacky insn->segmentOverride = SEG_OVERRIDE_SS; 363201360Srdivacky break; 364201360Srdivacky case 0x3e: 365201360Srdivacky insn->segmentOverride = SEG_OVERRIDE_DS; 366201360Srdivacky break; 367201360Srdivacky case 0x26: 368201360Srdivacky insn->segmentOverride = SEG_OVERRIDE_ES; 369201360Srdivacky break; 370201360Srdivacky case 0x64: 371201360Srdivacky insn->segmentOverride = SEG_OVERRIDE_FS; 372201360Srdivacky break; 373201360Srdivacky case 0x65: 374201360Srdivacky insn->segmentOverride = SEG_OVERRIDE_GS; 375201360Srdivacky break; 376201360Srdivacky default: 377206124Srdivacky debug("Unhandled override"); 378206124Srdivacky return -1; 379201360Srdivacky } 380201360Srdivacky if (prefixGroups[1]) 381201360Srdivacky dbgprintf(insn, "Redundant Group 2 prefix"); 382201360Srdivacky prefixGroups[1] = TRUE; 383201360Srdivacky setPrefixPresent(insn, byte, prefixLocation); 384201360Srdivacky break; 385201360Srdivacky case 0x66: /* Operand-size override */ 386201360Srdivacky if (prefixGroups[2]) 387201360Srdivacky dbgprintf(insn, "Redundant Group 3 prefix"); 388201360Srdivacky prefixGroups[2] = TRUE; 389201360Srdivacky hasOpSize = TRUE; 390201360Srdivacky setPrefixPresent(insn, byte, prefixLocation); 391201360Srdivacky break; 392201360Srdivacky case 0x67: /* Address-size override */ 393201360Srdivacky if (prefixGroups[3]) 394201360Srdivacky dbgprintf(insn, "Redundant Group 4 prefix"); 395201360Srdivacky prefixGroups[3] = TRUE; 396201360Srdivacky hasAdSize = TRUE; 397201360Srdivacky setPrefixPresent(insn, byte, prefixLocation); 398201360Srdivacky break; 399201360Srdivacky default: /* Not a prefix byte */ 400201360Srdivacky isPrefix = FALSE; 401201360Srdivacky break; 402201360Srdivacky } 403249423Sdim 404201360Srdivacky if (isPrefix) 405201360Srdivacky dbgprintf(insn, "Found prefix 0x%hhx", byte); 406201360Srdivacky } 407249423Sdim 408221345Sdim insn->vexSize = 0; 409249423Sdim 410221345Sdim if (byte == 0xc4) { 411221345Sdim uint8_t byte1; 412249423Sdim 413221345Sdim if (lookAtByte(insn, &byte1)) { 414221345Sdim dbgprintf(insn, "Couldn't read second byte of VEX"); 415221345Sdim return -1; 416221345Sdim } 417249423Sdim 418226633Sdim if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 419221345Sdim insn->vexSize = 3; 420221345Sdim insn->necessaryPrefixLocation = insn->readerCursor - 1; 421221345Sdim } 422221345Sdim else { 423221345Sdim unconsumeByte(insn); 424221345Sdim insn->necessaryPrefixLocation = insn->readerCursor - 1; 425221345Sdim } 426249423Sdim 427221345Sdim if (insn->vexSize == 3) { 428221345Sdim insn->vexPrefix[0] = byte; 429221345Sdim consumeByte(insn, &insn->vexPrefix[1]); 430221345Sdim consumeByte(insn, &insn->vexPrefix[2]); 431221345Sdim 432221345Sdim /* We simulate the REX prefix for simplicity's sake */ 433249423Sdim 434226633Sdim if (insn->mode == MODE_64BIT) { 435249423Sdim insn->rexPrefix = 0x40 436226633Sdim | (wFromVEX3of3(insn->vexPrefix[2]) << 3) 437226633Sdim | (rFromVEX2of3(insn->vexPrefix[1]) << 2) 438226633Sdim | (xFromVEX2of3(insn->vexPrefix[1]) << 1) 439226633Sdim | (bFromVEX2of3(insn->vexPrefix[1]) << 0); 440226633Sdim } 441249423Sdim 442221345Sdim switch (ppFromVEX3of3(insn->vexPrefix[2])) 443221345Sdim { 444221345Sdim default: 445221345Sdim break; 446221345Sdim case VEX_PREFIX_66: 447249423Sdim hasOpSize = TRUE; 448221345Sdim break; 449201360Srdivacky } 450249423Sdim 451221345Sdim dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]); 452221345Sdim } 453221345Sdim } 454221345Sdim else if (byte == 0xc5) { 455221345Sdim uint8_t byte1; 456249423Sdim 457221345Sdim if (lookAtByte(insn, &byte1)) { 458221345Sdim dbgprintf(insn, "Couldn't read second byte of VEX"); 459221345Sdim return -1; 460221345Sdim } 461249423Sdim 462226633Sdim if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 463221345Sdim insn->vexSize = 2; 464221345Sdim } 465221345Sdim else { 466201360Srdivacky unconsumeByte(insn); 467221345Sdim } 468249423Sdim 469221345Sdim if (insn->vexSize == 2) { 470221345Sdim insn->vexPrefix[0] = byte; 471221345Sdim consumeByte(insn, &insn->vexPrefix[1]); 472249423Sdim 473226633Sdim if (insn->mode == MODE_64BIT) { 474249423Sdim insn->rexPrefix = 0x40 475226633Sdim | (rFromVEX2of2(insn->vexPrefix[1]) << 2); 476226633Sdim } 477249423Sdim 478221345Sdim switch (ppFromVEX2of2(insn->vexPrefix[1])) 479221345Sdim { 480221345Sdim default: 481221345Sdim break; 482221345Sdim case VEX_PREFIX_66: 483249423Sdim hasOpSize = TRUE; 484221345Sdim break; 485221345Sdim } 486249423Sdim 487221345Sdim dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]); 488221345Sdim } 489221345Sdim } 490221345Sdim else { 491221345Sdim if (insn->mode == MODE_64BIT) { 492221345Sdim if ((byte & 0xf0) == 0x40) { 493221345Sdim uint8_t opcodeByte; 494249423Sdim 495221345Sdim if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { 496221345Sdim dbgprintf(insn, "Redundant REX prefix"); 497221345Sdim return -1; 498221345Sdim } 499249423Sdim 500221345Sdim insn->rexPrefix = byte; 501221345Sdim insn->necessaryPrefixLocation = insn->readerCursor - 2; 502249423Sdim 503221345Sdim dbgprintf(insn, "Found REX prefix 0x%hhx", byte); 504249423Sdim } else { 505221345Sdim unconsumeByte(insn); 506221345Sdim insn->necessaryPrefixLocation = insn->readerCursor - 1; 507221345Sdim } 508221345Sdim } else { 509221345Sdim unconsumeByte(insn); 510201360Srdivacky insn->necessaryPrefixLocation = insn->readerCursor - 1; 511201360Srdivacky } 512201360Srdivacky } 513221345Sdim 514201360Srdivacky if (insn->mode == MODE_16BIT) { 515201360Srdivacky insn->registerSize = (hasOpSize ? 4 : 2); 516201360Srdivacky insn->addressSize = (hasAdSize ? 4 : 2); 517201360Srdivacky insn->displacementSize = (hasAdSize ? 4 : 2); 518201360Srdivacky insn->immediateSize = (hasOpSize ? 4 : 2); 519201360Srdivacky } else if (insn->mode == MODE_32BIT) { 520201360Srdivacky insn->registerSize = (hasOpSize ? 2 : 4); 521201360Srdivacky insn->addressSize = (hasAdSize ? 2 : 4); 522201360Srdivacky insn->displacementSize = (hasAdSize ? 2 : 4); 523218893Sdim insn->immediateSize = (hasOpSize ? 2 : 4); 524201360Srdivacky } else if (insn->mode == MODE_64BIT) { 525201360Srdivacky if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 526201360Srdivacky insn->registerSize = 8; 527201360Srdivacky insn->addressSize = (hasAdSize ? 4 : 8); 528201360Srdivacky insn->displacementSize = 4; 529201360Srdivacky insn->immediateSize = 4; 530201360Srdivacky } else if (insn->rexPrefix) { 531201360Srdivacky insn->registerSize = (hasOpSize ? 2 : 4); 532201360Srdivacky insn->addressSize = (hasAdSize ? 4 : 8); 533201360Srdivacky insn->displacementSize = (hasOpSize ? 2 : 4); 534201360Srdivacky insn->immediateSize = (hasOpSize ? 2 : 4); 535201360Srdivacky } else { 536201360Srdivacky insn->registerSize = (hasOpSize ? 2 : 4); 537201360Srdivacky insn->addressSize = (hasAdSize ? 4 : 8); 538201360Srdivacky insn->displacementSize = (hasOpSize ? 2 : 4); 539201360Srdivacky insn->immediateSize = (hasOpSize ? 2 : 4); 540201360Srdivacky } 541201360Srdivacky } 542249423Sdim 543201360Srdivacky return 0; 544201360Srdivacky} 545201360Srdivacky 546201360Srdivacky/* 547201360Srdivacky * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of 548201360Srdivacky * extended or escape opcodes). 549201360Srdivacky * 550201360Srdivacky * @param insn - The instruction whose opcode is to be read. 551201360Srdivacky * @return - 0 if the opcode could be read successfully; nonzero otherwise. 552201360Srdivacky */ 553249423Sdimstatic int readOpcode(struct InternalInstruction* insn) { 554201360Srdivacky /* Determine the length of the primary opcode */ 555249423Sdim 556201360Srdivacky uint8_t current; 557249423Sdim 558201360Srdivacky dbgprintf(insn, "readOpcode()"); 559249423Sdim 560201360Srdivacky insn->opcodeType = ONEBYTE; 561249423Sdim 562221345Sdim if (insn->vexSize == 3) 563221345Sdim { 564221345Sdim switch (mmmmmFromVEX2of3(insn->vexPrefix[1])) 565221345Sdim { 566221345Sdim default: 567221345Sdim dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1])); 568249423Sdim return -1; 569221345Sdim case 0: 570221345Sdim break; 571221345Sdim case VEX_LOB_0F: 572221345Sdim insn->twoByteEscape = 0x0f; 573221345Sdim insn->opcodeType = TWOBYTE; 574221345Sdim return consumeByte(insn, &insn->opcode); 575221345Sdim case VEX_LOB_0F38: 576221345Sdim insn->twoByteEscape = 0x0f; 577221345Sdim insn->threeByteEscape = 0x38; 578221345Sdim insn->opcodeType = THREEBYTE_38; 579221345Sdim return consumeByte(insn, &insn->opcode); 580249423Sdim case VEX_LOB_0F3A: 581221345Sdim insn->twoByteEscape = 0x0f; 582221345Sdim insn->threeByteEscape = 0x3a; 583221345Sdim insn->opcodeType = THREEBYTE_3A; 584221345Sdim return consumeByte(insn, &insn->opcode); 585221345Sdim } 586221345Sdim } 587221345Sdim else if (insn->vexSize == 2) 588221345Sdim { 589221345Sdim insn->twoByteEscape = 0x0f; 590221345Sdim insn->opcodeType = TWOBYTE; 591221345Sdim return consumeByte(insn, &insn->opcode); 592221345Sdim } 593249423Sdim 594201360Srdivacky if (consumeByte(insn, ¤t)) 595201360Srdivacky return -1; 596249423Sdim 597201360Srdivacky if (current == 0x0f) { 598201360Srdivacky dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); 599249423Sdim 600201360Srdivacky insn->twoByteEscape = current; 601249423Sdim 602201360Srdivacky if (consumeByte(insn, ¤t)) 603201360Srdivacky return -1; 604249423Sdim 605201360Srdivacky if (current == 0x38) { 606201360Srdivacky dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 607249423Sdim 608201360Srdivacky insn->threeByteEscape = current; 609249423Sdim 610201360Srdivacky if (consumeByte(insn, ¤t)) 611201360Srdivacky return -1; 612249423Sdim 613201360Srdivacky insn->opcodeType = THREEBYTE_38; 614201360Srdivacky } else if (current == 0x3a) { 615201360Srdivacky dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 616249423Sdim 617201360Srdivacky insn->threeByteEscape = current; 618249423Sdim 619201360Srdivacky if (consumeByte(insn, ¤t)) 620201360Srdivacky return -1; 621249423Sdim 622201360Srdivacky insn->opcodeType = THREEBYTE_3A; 623221345Sdim } else if (current == 0xa6) { 624221345Sdim dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 625249423Sdim 626221345Sdim insn->threeByteEscape = current; 627249423Sdim 628221345Sdim if (consumeByte(insn, ¤t)) 629221345Sdim return -1; 630249423Sdim 631221345Sdim insn->opcodeType = THREEBYTE_A6; 632221345Sdim } else if (current == 0xa7) { 633221345Sdim dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 634249423Sdim 635221345Sdim insn->threeByteEscape = current; 636249423Sdim 637221345Sdim if (consumeByte(insn, ¤t)) 638221345Sdim return -1; 639249423Sdim 640221345Sdim insn->opcodeType = THREEBYTE_A7; 641201360Srdivacky } else { 642201360Srdivacky dbgprintf(insn, "Didn't find a three-byte escape prefix"); 643249423Sdim 644201360Srdivacky insn->opcodeType = TWOBYTE; 645201360Srdivacky } 646201360Srdivacky } 647249423Sdim 648201360Srdivacky /* 649201360Srdivacky * At this point we have consumed the full opcode. 650201360Srdivacky * Anything we consume from here on must be unconsumed. 651201360Srdivacky */ 652249423Sdim 653201360Srdivacky insn->opcode = current; 654249423Sdim 655201360Srdivacky return 0; 656201360Srdivacky} 657201360Srdivacky 658201360Srdivackystatic int readModRM(struct InternalInstruction* insn); 659201360Srdivacky 660201360Srdivacky/* 661201360Srdivacky * getIDWithAttrMask - Determines the ID of an instruction, consuming 662201360Srdivacky * the ModR/M byte as appropriate for extended and escape opcodes, 663201360Srdivacky * and using a supplied attribute mask. 664201360Srdivacky * 665201360Srdivacky * @param instructionID - A pointer whose target is filled in with the ID of the 666201360Srdivacky * instruction. 667201360Srdivacky * @param insn - The instruction whose ID is to be determined. 668201360Srdivacky * @param attrMask - The attribute mask to search. 669201360Srdivacky * @return - 0 if the ModR/M could be read when needed or was not 670201360Srdivacky * needed; nonzero otherwise. 671201360Srdivacky */ 672201360Srdivackystatic int getIDWithAttrMask(uint16_t* instructionID, 673201360Srdivacky struct InternalInstruction* insn, 674201360Srdivacky uint8_t attrMask) { 675201360Srdivacky BOOL hasModRMExtension; 676249423Sdim 677201360Srdivacky uint8_t instructionClass; 678201360Srdivacky 679201360Srdivacky instructionClass = contextForAttrs(attrMask); 680249423Sdim 681201360Srdivacky hasModRMExtension = modRMRequired(insn->opcodeType, 682201360Srdivacky instructionClass, 683201360Srdivacky insn->opcode); 684249423Sdim 685201360Srdivacky if (hasModRMExtension) { 686218893Sdim if (readModRM(insn)) 687218893Sdim return -1; 688249423Sdim 689201360Srdivacky *instructionID = decode(insn->opcodeType, 690201360Srdivacky instructionClass, 691201360Srdivacky insn->opcode, 692201360Srdivacky insn->modRM); 693201360Srdivacky } else { 694201360Srdivacky *instructionID = decode(insn->opcodeType, 695201360Srdivacky instructionClass, 696201360Srdivacky insn->opcode, 697201360Srdivacky 0); 698201360Srdivacky } 699249423Sdim 700201360Srdivacky return 0; 701201360Srdivacky} 702201360Srdivacky 703201360Srdivacky/* 704201360Srdivacky * is16BitEquivalent - Determines whether two instruction names refer to 705201360Srdivacky * equivalent instructions but one is 16-bit whereas the other is not. 706201360Srdivacky * 707201360Srdivacky * @param orig - The instruction that is not 16-bit 708201360Srdivacky * @param equiv - The instruction that is 16-bit 709201360Srdivacky */ 710243830Sdimstatic BOOL is16BitEquivalent(const char* orig, const char* equiv) { 711201360Srdivacky off_t i; 712249423Sdim 713206124Srdivacky for (i = 0;; i++) { 714206124Srdivacky if (orig[i] == '\0' && equiv[i] == '\0') 715201360Srdivacky return TRUE; 716206124Srdivacky if (orig[i] == '\0' || equiv[i] == '\0') 717201360Srdivacky return FALSE; 718206124Srdivacky if (orig[i] != equiv[i]) { 719206124Srdivacky if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') 720201360Srdivacky continue; 721206124Srdivacky if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') 722201360Srdivacky continue; 723206124Srdivacky if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') 724201360Srdivacky continue; 725201360Srdivacky return FALSE; 726201360Srdivacky } 727201360Srdivacky } 728201360Srdivacky} 729201360Srdivacky 730201360Srdivacky/* 731249423Sdim * getID - Determines the ID of an instruction, consuming the ModR/M byte as 732249423Sdim * appropriate for extended and escape opcodes. Determines the attributes and 733201360Srdivacky * context for the instruction before doing so. 734201360Srdivacky * 735201360Srdivacky * @param insn - The instruction whose ID is to be determined. 736201360Srdivacky * @return - 0 if the ModR/M could be read when needed or was not needed; 737201360Srdivacky * nonzero otherwise. 738201360Srdivacky */ 739243830Sdimstatic int getID(struct InternalInstruction* insn, const void *miiArg) { 740201360Srdivacky uint8_t attrMask; 741201360Srdivacky uint16_t instructionID; 742249423Sdim 743201360Srdivacky dbgprintf(insn, "getID()"); 744249423Sdim 745201360Srdivacky attrMask = ATTR_NONE; 746221345Sdim 747201360Srdivacky if (insn->mode == MODE_64BIT) 748201360Srdivacky attrMask |= ATTR_64BIT; 749249423Sdim 750221345Sdim if (insn->vexSize) { 751221345Sdim attrMask |= ATTR_VEX; 752221345Sdim 753221345Sdim if (insn->vexSize == 3) { 754221345Sdim switch (ppFromVEX3of3(insn->vexPrefix[2])) { 755221345Sdim case VEX_PREFIX_66: 756249423Sdim attrMask |= ATTR_OPSIZE; 757221345Sdim break; 758221345Sdim case VEX_PREFIX_F3: 759221345Sdim attrMask |= ATTR_XS; 760221345Sdim break; 761221345Sdim case VEX_PREFIX_F2: 762221345Sdim attrMask |= ATTR_XD; 763221345Sdim break; 764221345Sdim } 765249423Sdim 766221345Sdim if (lFromVEX3of3(insn->vexPrefix[2])) 767221345Sdim attrMask |= ATTR_VEXL; 768221345Sdim } 769221345Sdim else if (insn->vexSize == 2) { 770221345Sdim switch (ppFromVEX2of2(insn->vexPrefix[1])) { 771221345Sdim case VEX_PREFIX_66: 772249423Sdim attrMask |= ATTR_OPSIZE; 773221345Sdim break; 774221345Sdim case VEX_PREFIX_F3: 775221345Sdim attrMask |= ATTR_XS; 776221345Sdim break; 777221345Sdim case VEX_PREFIX_F2: 778221345Sdim attrMask |= ATTR_XD; 779221345Sdim break; 780221345Sdim } 781249423Sdim 782221345Sdim if (lFromVEX2of2(insn->vexPrefix[1])) 783221345Sdim attrMask |= ATTR_VEXL; 784221345Sdim } 785221345Sdim else { 786221345Sdim return -1; 787221345Sdim } 788221345Sdim } 789221345Sdim else { 790221345Sdim if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) 791221345Sdim attrMask |= ATTR_OPSIZE; 792234353Sdim else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) 793234353Sdim attrMask |= ATTR_ADSIZE; 794221345Sdim else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) 795221345Sdim attrMask |= ATTR_XS; 796221345Sdim else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) 797221345Sdim attrMask |= ATTR_XD; 798221345Sdim } 799221345Sdim 800226633Sdim if (insn->rexPrefix & 0x08) 801226633Sdim attrMask |= ATTR_REXW; 802234353Sdim 803206124Srdivacky if (getIDWithAttrMask(&instructionID, insn, attrMask)) 804201360Srdivacky return -1; 805234353Sdim 806201360Srdivacky /* The following clauses compensate for limitations of the tables. */ 807234353Sdim 808234353Sdim if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) && 809234353Sdim !(attrMask & ATTR_OPSIZE)) { 810201360Srdivacky /* 811226633Sdim * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit 812226633Sdim * has precedence since there are no L-bit with W-bit entries in the tables. 813226633Sdim * So if the L-bit isn't significant we should use the W-bit instead. 814234353Sdim * We only need to do this if the instruction doesn't specify OpSize since 815234353Sdim * there is a VEX_L_W_OPSIZE table. 816201360Srdivacky */ 817226633Sdim 818218893Sdim const struct InstructionSpecifier *spec; 819226633Sdim uint16_t instructionIDWithWBit; 820226633Sdim const struct InstructionSpecifier *specWithWBit; 821226633Sdim 822201360Srdivacky spec = specifierForUID(instructionID); 823226633Sdim 824226633Sdim if (getIDWithAttrMask(&instructionIDWithWBit, 825201360Srdivacky insn, 826226633Sdim (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) { 827201360Srdivacky insn->instructionID = instructionID; 828201360Srdivacky insn->spec = spec; 829201360Srdivacky return 0; 830201360Srdivacky } 831226633Sdim 832226633Sdim specWithWBit = specifierForUID(instructionIDWithWBit); 833226633Sdim 834226633Sdim if (instructionID != instructionIDWithWBit) { 835226633Sdim insn->instructionID = instructionIDWithWBit; 836226633Sdim insn->spec = specWithWBit; 837201360Srdivacky } else { 838201360Srdivacky insn->instructionID = instructionID; 839201360Srdivacky insn->spec = spec; 840201360Srdivacky } 841201360Srdivacky return 0; 842201360Srdivacky } 843226633Sdim 844201360Srdivacky if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) { 845201360Srdivacky /* 846201360Srdivacky * The instruction tables make no distinction between instructions that 847201360Srdivacky * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 848201360Srdivacky * particular spot (i.e., many MMX operations). In general we're 849201360Srdivacky * conservative, but in the specific case where OpSize is present but not 850201360Srdivacky * in the right place we check if there's a 16-bit operation. 851201360Srdivacky */ 852249423Sdim 853218893Sdim const struct InstructionSpecifier *spec; 854201360Srdivacky uint16_t instructionIDWithOpsize; 855234353Sdim const char *specName, *specWithOpSizeName; 856249423Sdim 857201360Srdivacky spec = specifierForUID(instructionID); 858249423Sdim 859201360Srdivacky if (getIDWithAttrMask(&instructionIDWithOpsize, 860201360Srdivacky insn, 861201360Srdivacky attrMask | ATTR_OPSIZE)) { 862249423Sdim /* 863201360Srdivacky * ModRM required with OpSize but not present; give up and return version 864201360Srdivacky * without OpSize set 865201360Srdivacky */ 866249423Sdim 867201360Srdivacky insn->instructionID = instructionID; 868201360Srdivacky insn->spec = spec; 869201360Srdivacky return 0; 870201360Srdivacky } 871249423Sdim 872234353Sdim specName = x86DisassemblerGetInstrName(instructionID, miiArg); 873234353Sdim specWithOpSizeName = 874234353Sdim x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg); 875234353Sdim 876243830Sdim if (is16BitEquivalent(specName, specWithOpSizeName)) { 877201360Srdivacky insn->instructionID = instructionIDWithOpsize; 878234353Sdim insn->spec = specifierForUID(instructionIDWithOpsize); 879201360Srdivacky } else { 880201360Srdivacky insn->instructionID = instructionID; 881201360Srdivacky insn->spec = spec; 882201360Srdivacky } 883201360Srdivacky return 0; 884201360Srdivacky } 885226633Sdim 886226633Sdim if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && 887226633Sdim insn->rexPrefix & 0x01) { 888226633Sdim /* 889226633Sdim * NOOP shouldn't decode as NOOP if REX.b is set. Instead 890226633Sdim * it should decode as XCHG %r8, %eax. 891226633Sdim */ 892226633Sdim 893226633Sdim const struct InstructionSpecifier *spec; 894226633Sdim uint16_t instructionIDWithNewOpcode; 895226633Sdim const struct InstructionSpecifier *specWithNewOpcode; 896226633Sdim 897226633Sdim spec = specifierForUID(instructionID); 898249423Sdim 899226633Sdim /* Borrow opcode from one of the other XCHGar opcodes */ 900226633Sdim insn->opcode = 0x91; 901249423Sdim 902226633Sdim if (getIDWithAttrMask(&instructionIDWithNewOpcode, 903226633Sdim insn, 904226633Sdim attrMask)) { 905226633Sdim insn->opcode = 0x90; 906226633Sdim 907226633Sdim insn->instructionID = instructionID; 908226633Sdim insn->spec = spec; 909226633Sdim return 0; 910226633Sdim } 911226633Sdim 912226633Sdim specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); 913226633Sdim 914226633Sdim /* Change back */ 915226633Sdim insn->opcode = 0x90; 916226633Sdim 917226633Sdim insn->instructionID = instructionIDWithNewOpcode; 918226633Sdim insn->spec = specWithNewOpcode; 919226633Sdim 920226633Sdim return 0; 921226633Sdim } 922249423Sdim 923201360Srdivacky insn->instructionID = instructionID; 924201360Srdivacky insn->spec = specifierForUID(insn->instructionID); 925249423Sdim 926201360Srdivacky return 0; 927201360Srdivacky} 928201360Srdivacky 929201360Srdivacky/* 930201360Srdivacky * readSIB - Consumes the SIB byte to determine addressing information for an 931201360Srdivacky * instruction. 932201360Srdivacky * 933201360Srdivacky * @param insn - The instruction whose SIB byte is to be read. 934201360Srdivacky * @return - 0 if the SIB byte was successfully read; nonzero otherwise. 935201360Srdivacky */ 936201360Srdivackystatic int readSIB(struct InternalInstruction* insn) { 937201360Srdivacky SIBIndex sibIndexBase = 0; 938201360Srdivacky SIBBase sibBaseBase = 0; 939201360Srdivacky uint8_t index, base; 940249423Sdim 941201360Srdivacky dbgprintf(insn, "readSIB()"); 942249423Sdim 943201360Srdivacky if (insn->consumedSIB) 944201360Srdivacky return 0; 945249423Sdim 946201360Srdivacky insn->consumedSIB = TRUE; 947249423Sdim 948201360Srdivacky switch (insn->addressSize) { 949201360Srdivacky case 2: 950201360Srdivacky dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); 951201360Srdivacky return -1; 952201360Srdivacky break; 953201360Srdivacky case 4: 954201360Srdivacky sibIndexBase = SIB_INDEX_EAX; 955201360Srdivacky sibBaseBase = SIB_BASE_EAX; 956201360Srdivacky break; 957201360Srdivacky case 8: 958201360Srdivacky sibIndexBase = SIB_INDEX_RAX; 959201360Srdivacky sibBaseBase = SIB_BASE_RAX; 960201360Srdivacky break; 961201360Srdivacky } 962201360Srdivacky 963201360Srdivacky if (consumeByte(insn, &insn->sib)) 964201360Srdivacky return -1; 965249423Sdim 966201360Srdivacky index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); 967249423Sdim 968201360Srdivacky switch (index) { 969201360Srdivacky case 0x4: 970201360Srdivacky insn->sibIndex = SIB_INDEX_NONE; 971201360Srdivacky break; 972201360Srdivacky default: 973221345Sdim insn->sibIndex = (SIBIndex)(sibIndexBase + index); 974201360Srdivacky if (insn->sibIndex == SIB_INDEX_sib || 975201360Srdivacky insn->sibIndex == SIB_INDEX_sib64) 976201360Srdivacky insn->sibIndex = SIB_INDEX_NONE; 977201360Srdivacky break; 978201360Srdivacky } 979249423Sdim 980201360Srdivacky switch (scaleFromSIB(insn->sib)) { 981201360Srdivacky case 0: 982201360Srdivacky insn->sibScale = 1; 983201360Srdivacky break; 984201360Srdivacky case 1: 985201360Srdivacky insn->sibScale = 2; 986201360Srdivacky break; 987201360Srdivacky case 2: 988201360Srdivacky insn->sibScale = 4; 989201360Srdivacky break; 990201360Srdivacky case 3: 991201360Srdivacky insn->sibScale = 8; 992201360Srdivacky break; 993201360Srdivacky } 994249423Sdim 995201360Srdivacky base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); 996249423Sdim 997201360Srdivacky switch (base) { 998201360Srdivacky case 0x5: 999201360Srdivacky switch (modFromModRM(insn->modRM)) { 1000201360Srdivacky case 0x0: 1001201360Srdivacky insn->eaDisplacement = EA_DISP_32; 1002201360Srdivacky insn->sibBase = SIB_BASE_NONE; 1003201360Srdivacky break; 1004201360Srdivacky case 0x1: 1005201360Srdivacky insn->eaDisplacement = EA_DISP_8; 1006249423Sdim insn->sibBase = (insn->addressSize == 4 ? 1007201360Srdivacky SIB_BASE_EBP : SIB_BASE_RBP); 1008201360Srdivacky break; 1009201360Srdivacky case 0x2: 1010201360Srdivacky insn->eaDisplacement = EA_DISP_32; 1011249423Sdim insn->sibBase = (insn->addressSize == 4 ? 1012201360Srdivacky SIB_BASE_EBP : SIB_BASE_RBP); 1013201360Srdivacky break; 1014201360Srdivacky case 0x3: 1015206124Srdivacky debug("Cannot have Mod = 0b11 and a SIB byte"); 1016206124Srdivacky return -1; 1017201360Srdivacky } 1018201360Srdivacky break; 1019201360Srdivacky default: 1020221345Sdim insn->sibBase = (SIBBase)(sibBaseBase + base); 1021201360Srdivacky break; 1022201360Srdivacky } 1023249423Sdim 1024201360Srdivacky return 0; 1025201360Srdivacky} 1026201360Srdivacky 1027201360Srdivacky/* 1028201360Srdivacky * readDisplacement - Consumes the displacement of an instruction. 1029201360Srdivacky * 1030201360Srdivacky * @param insn - The instruction whose displacement is to be read. 1031249423Sdim * @return - 0 if the displacement byte was successfully read; nonzero 1032201360Srdivacky * otherwise. 1033201360Srdivacky */ 1034249423Sdimstatic int readDisplacement(struct InternalInstruction* insn) { 1035201360Srdivacky int8_t d8; 1036201360Srdivacky int16_t d16; 1037201360Srdivacky int32_t d32; 1038249423Sdim 1039201360Srdivacky dbgprintf(insn, "readDisplacement()"); 1040249423Sdim 1041201360Srdivacky if (insn->consumedDisplacement) 1042201360Srdivacky return 0; 1043249423Sdim 1044201360Srdivacky insn->consumedDisplacement = TRUE; 1045234353Sdim insn->displacementOffset = insn->readerCursor - insn->startLocation; 1046249423Sdim 1047201360Srdivacky switch (insn->eaDisplacement) { 1048201360Srdivacky case EA_DISP_NONE: 1049201360Srdivacky insn->consumedDisplacement = FALSE; 1050201360Srdivacky break; 1051201360Srdivacky case EA_DISP_8: 1052201360Srdivacky if (consumeInt8(insn, &d8)) 1053201360Srdivacky return -1; 1054201360Srdivacky insn->displacement = d8; 1055201360Srdivacky break; 1056201360Srdivacky case EA_DISP_16: 1057201360Srdivacky if (consumeInt16(insn, &d16)) 1058201360Srdivacky return -1; 1059201360Srdivacky insn->displacement = d16; 1060201360Srdivacky break; 1061201360Srdivacky case EA_DISP_32: 1062201360Srdivacky if (consumeInt32(insn, &d32)) 1063201360Srdivacky return -1; 1064201360Srdivacky insn->displacement = d32; 1065201360Srdivacky break; 1066201360Srdivacky } 1067249423Sdim 1068201360Srdivacky insn->consumedDisplacement = TRUE; 1069201360Srdivacky return 0; 1070201360Srdivacky} 1071201360Srdivacky 1072201360Srdivacky/* 1073201360Srdivacky * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and 1074201360Srdivacky * displacement) for an instruction and interprets it. 1075201360Srdivacky * 1076201360Srdivacky * @param insn - The instruction whose addressing information is to be read. 1077201360Srdivacky * @return - 0 if the information was successfully read; nonzero otherwise. 1078201360Srdivacky */ 1079249423Sdimstatic int readModRM(struct InternalInstruction* insn) { 1080201360Srdivacky uint8_t mod, rm, reg; 1081249423Sdim 1082201360Srdivacky dbgprintf(insn, "readModRM()"); 1083249423Sdim 1084201360Srdivacky if (insn->consumedModRM) 1085201360Srdivacky return 0; 1086249423Sdim 1087218893Sdim if (consumeByte(insn, &insn->modRM)) 1088218893Sdim return -1; 1089201360Srdivacky insn->consumedModRM = TRUE; 1090249423Sdim 1091201360Srdivacky mod = modFromModRM(insn->modRM); 1092201360Srdivacky rm = rmFromModRM(insn->modRM); 1093201360Srdivacky reg = regFromModRM(insn->modRM); 1094249423Sdim 1095201360Srdivacky /* 1096201360Srdivacky * This goes by insn->registerSize to pick the correct register, which messes 1097201360Srdivacky * up if we're using (say) XMM or 8-bit register operands. That gets fixed in 1098201360Srdivacky * fixupReg(). 1099201360Srdivacky */ 1100201360Srdivacky switch (insn->registerSize) { 1101201360Srdivacky case 2: 1102201360Srdivacky insn->regBase = MODRM_REG_AX; 1103201360Srdivacky insn->eaRegBase = EA_REG_AX; 1104201360Srdivacky break; 1105201360Srdivacky case 4: 1106201360Srdivacky insn->regBase = MODRM_REG_EAX; 1107201360Srdivacky insn->eaRegBase = EA_REG_EAX; 1108201360Srdivacky break; 1109201360Srdivacky case 8: 1110201360Srdivacky insn->regBase = MODRM_REG_RAX; 1111201360Srdivacky insn->eaRegBase = EA_REG_RAX; 1112201360Srdivacky break; 1113201360Srdivacky } 1114249423Sdim 1115201360Srdivacky reg |= rFromREX(insn->rexPrefix) << 3; 1116201360Srdivacky rm |= bFromREX(insn->rexPrefix) << 3; 1117249423Sdim 1118201360Srdivacky insn->reg = (Reg)(insn->regBase + reg); 1119249423Sdim 1120201360Srdivacky switch (insn->addressSize) { 1121201360Srdivacky case 2: 1122201360Srdivacky insn->eaBaseBase = EA_BASE_BX_SI; 1123249423Sdim 1124201360Srdivacky switch (mod) { 1125201360Srdivacky case 0x0: 1126201360Srdivacky if (rm == 0x6) { 1127201360Srdivacky insn->eaBase = EA_BASE_NONE; 1128201360Srdivacky insn->eaDisplacement = EA_DISP_16; 1129206124Srdivacky if (readDisplacement(insn)) 1130201360Srdivacky return -1; 1131201360Srdivacky } else { 1132201360Srdivacky insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1133201360Srdivacky insn->eaDisplacement = EA_DISP_NONE; 1134201360Srdivacky } 1135201360Srdivacky break; 1136201360Srdivacky case 0x1: 1137201360Srdivacky insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1138201360Srdivacky insn->eaDisplacement = EA_DISP_8; 1139206124Srdivacky if (readDisplacement(insn)) 1140201360Srdivacky return -1; 1141201360Srdivacky break; 1142201360Srdivacky case 0x2: 1143201360Srdivacky insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1144201360Srdivacky insn->eaDisplacement = EA_DISP_16; 1145206124Srdivacky if (readDisplacement(insn)) 1146201360Srdivacky return -1; 1147201360Srdivacky break; 1148201360Srdivacky case 0x3: 1149201360Srdivacky insn->eaBase = (EABase)(insn->eaRegBase + rm); 1150206124Srdivacky if (readDisplacement(insn)) 1151201360Srdivacky return -1; 1152201360Srdivacky break; 1153201360Srdivacky } 1154201360Srdivacky break; 1155201360Srdivacky case 4: 1156201360Srdivacky case 8: 1157201360Srdivacky insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 1158249423Sdim 1159201360Srdivacky switch (mod) { 1160201360Srdivacky case 0x0: 1161201360Srdivacky insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ 1162201360Srdivacky switch (rm) { 1163201360Srdivacky case 0x4: 1164201360Srdivacky case 0xc: /* in case REXW.b is set */ 1165249423Sdim insn->eaBase = (insn->addressSize == 4 ? 1166201360Srdivacky EA_BASE_sib : EA_BASE_sib64); 1167201360Srdivacky readSIB(insn); 1168206124Srdivacky if (readDisplacement(insn)) 1169201360Srdivacky return -1; 1170201360Srdivacky break; 1171201360Srdivacky case 0x5: 1172201360Srdivacky insn->eaBase = EA_BASE_NONE; 1173201360Srdivacky insn->eaDisplacement = EA_DISP_32; 1174206124Srdivacky if (readDisplacement(insn)) 1175201360Srdivacky return -1; 1176201360Srdivacky break; 1177201360Srdivacky default: 1178201360Srdivacky insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1179201360Srdivacky break; 1180201360Srdivacky } 1181201360Srdivacky break; 1182201360Srdivacky case 0x1: 1183201360Srdivacky case 0x2: 1184201360Srdivacky insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 1185201360Srdivacky switch (rm) { 1186201360Srdivacky case 0x4: 1187201360Srdivacky case 0xc: /* in case REXW.b is set */ 1188201360Srdivacky insn->eaBase = EA_BASE_sib; 1189201360Srdivacky readSIB(insn); 1190206124Srdivacky if (readDisplacement(insn)) 1191201360Srdivacky return -1; 1192201360Srdivacky break; 1193201360Srdivacky default: 1194201360Srdivacky insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1195206124Srdivacky if (readDisplacement(insn)) 1196201360Srdivacky return -1; 1197201360Srdivacky break; 1198201360Srdivacky } 1199201360Srdivacky break; 1200201360Srdivacky case 0x3: 1201201360Srdivacky insn->eaDisplacement = EA_DISP_NONE; 1202201360Srdivacky insn->eaBase = (EABase)(insn->eaRegBase + rm); 1203201360Srdivacky break; 1204201360Srdivacky } 1205201360Srdivacky break; 1206201360Srdivacky } /* switch (insn->addressSize) */ 1207249423Sdim 1208201360Srdivacky return 0; 1209201360Srdivacky} 1210201360Srdivacky 1211201360Srdivacky#define GENERIC_FIXUP_FUNC(name, base, prefix) \ 1212201360Srdivacky static uint8_t name(struct InternalInstruction *insn, \ 1213201360Srdivacky OperandType type, \ 1214201360Srdivacky uint8_t index, \ 1215201360Srdivacky uint8_t *valid) { \ 1216201360Srdivacky *valid = 1; \ 1217201360Srdivacky switch (type) { \ 1218201360Srdivacky default: \ 1219206124Srdivacky debug("Unhandled register type"); \ 1220206124Srdivacky *valid = 0; \ 1221206124Srdivacky return 0; \ 1222201360Srdivacky case TYPE_Rv: \ 1223201360Srdivacky return base + index; \ 1224201360Srdivacky case TYPE_R8: \ 1225206124Srdivacky if (insn->rexPrefix && \ 1226201360Srdivacky index >= 4 && index <= 7) { \ 1227201360Srdivacky return prefix##_SPL + (index - 4); \ 1228201360Srdivacky } else { \ 1229201360Srdivacky return prefix##_AL + index; \ 1230201360Srdivacky } \ 1231201360Srdivacky case TYPE_R16: \ 1232201360Srdivacky return prefix##_AX + index; \ 1233201360Srdivacky case TYPE_R32: \ 1234201360Srdivacky return prefix##_EAX + index; \ 1235201360Srdivacky case TYPE_R64: \ 1236201360Srdivacky return prefix##_RAX + index; \ 1237221345Sdim case TYPE_XMM256: \ 1238221345Sdim return prefix##_YMM0 + index; \ 1239201360Srdivacky case TYPE_XMM128: \ 1240201360Srdivacky case TYPE_XMM64: \ 1241201360Srdivacky case TYPE_XMM32: \ 1242201360Srdivacky case TYPE_XMM: \ 1243201360Srdivacky return prefix##_XMM0 + index; \ 1244201360Srdivacky case TYPE_MM64: \ 1245201360Srdivacky case TYPE_MM32: \ 1246201360Srdivacky case TYPE_MM: \ 1247206124Srdivacky if (index > 7) \ 1248201360Srdivacky *valid = 0; \ 1249201360Srdivacky return prefix##_MM0 + index; \ 1250201360Srdivacky case TYPE_SEGMENTREG: \ 1251206124Srdivacky if (index > 5) \ 1252201360Srdivacky *valid = 0; \ 1253201360Srdivacky return prefix##_ES + index; \ 1254201360Srdivacky case TYPE_DEBUGREG: \ 1255206124Srdivacky if (index > 7) \ 1256201360Srdivacky *valid = 0; \ 1257201360Srdivacky return prefix##_DR0 + index; \ 1258208599Srdivacky case TYPE_CONTROLREG: \ 1259206124Srdivacky if (index > 8) \ 1260201360Srdivacky *valid = 0; \ 1261208599Srdivacky return prefix##_CR0 + index; \ 1262201360Srdivacky } \ 1263201360Srdivacky } 1264201360Srdivacky 1265201360Srdivacky/* 1266201360Srdivacky * fixup*Value - Consults an operand type to determine the meaning of the 1267201360Srdivacky * reg or R/M field. If the operand is an XMM operand, for example, an 1268201360Srdivacky * operand would be XMM0 instead of AX, which readModRM() would otherwise 1269201360Srdivacky * misinterpret it as. 1270201360Srdivacky * 1271201360Srdivacky * @param insn - The instruction containing the operand. 1272201360Srdivacky * @param type - The operand type. 1273201360Srdivacky * @param index - The existing value of the field as reported by readModRM(). 1274201360Srdivacky * @param valid - The address of a uint8_t. The target is set to 1 if the 1275201360Srdivacky * field is valid for the register class; 0 if not. 1276206124Srdivacky * @return - The proper value. 1277201360Srdivacky */ 1278201360SrdivackyGENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) 1279201360SrdivackyGENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) 1280201360Srdivacky 1281201360Srdivacky/* 1282201360Srdivacky * fixupReg - Consults an operand specifier to determine which of the 1283201360Srdivacky * fixup*Value functions to use in correcting readModRM()'ss interpretation. 1284201360Srdivacky * 1285201360Srdivacky * @param insn - See fixup*Value(). 1286201360Srdivacky * @param op - The operand specifier. 1287201360Srdivacky * @return - 0 if fixup was successful; -1 if the register returned was 1288201360Srdivacky * invalid for its class. 1289201360Srdivacky */ 1290249423Sdimstatic int fixupReg(struct InternalInstruction *insn, 1291218893Sdim const struct OperandSpecifier *op) { 1292201360Srdivacky uint8_t valid; 1293249423Sdim 1294201360Srdivacky dbgprintf(insn, "fixupReg()"); 1295249423Sdim 1296201360Srdivacky switch ((OperandEncoding)op->encoding) { 1297201360Srdivacky default: 1298206124Srdivacky debug("Expected a REG or R/M encoding in fixupReg"); 1299206124Srdivacky return -1; 1300221345Sdim case ENCODING_VVVV: 1301221345Sdim insn->vvvv = (Reg)fixupRegValue(insn, 1302221345Sdim (OperandType)op->type, 1303221345Sdim insn->vvvv, 1304221345Sdim &valid); 1305221345Sdim if (!valid) 1306221345Sdim return -1; 1307221345Sdim break; 1308201360Srdivacky case ENCODING_REG: 1309201360Srdivacky insn->reg = (Reg)fixupRegValue(insn, 1310201360Srdivacky (OperandType)op->type, 1311201360Srdivacky insn->reg - insn->regBase, 1312201360Srdivacky &valid); 1313201360Srdivacky if (!valid) 1314201360Srdivacky return -1; 1315201360Srdivacky break; 1316201360Srdivacky case ENCODING_RM: 1317201360Srdivacky if (insn->eaBase >= insn->eaRegBase) { 1318201360Srdivacky insn->eaBase = (EABase)fixupRMValue(insn, 1319201360Srdivacky (OperandType)op->type, 1320201360Srdivacky insn->eaBase - insn->eaRegBase, 1321201360Srdivacky &valid); 1322201360Srdivacky if (!valid) 1323201360Srdivacky return -1; 1324201360Srdivacky } 1325201360Srdivacky break; 1326201360Srdivacky } 1327249423Sdim 1328201360Srdivacky return 0; 1329201360Srdivacky} 1330201360Srdivacky 1331201360Srdivacky/* 1332249423Sdim * readOpcodeModifier - Reads an operand from the opcode field of an 1333201360Srdivacky * instruction. Handles AddRegFrm instructions. 1334201360Srdivacky * 1335201360Srdivacky * @param insn - The instruction whose opcode field is to be read. 1336201360Srdivacky * @param inModRM - Indicates that the opcode field is to be read from the 1337201360Srdivacky * ModR/M extension; useful for escape opcodes 1338206124Srdivacky * @return - 0 on success; nonzero otherwise. 1339201360Srdivacky */ 1340206124Srdivackystatic int readOpcodeModifier(struct InternalInstruction* insn) { 1341201360Srdivacky dbgprintf(insn, "readOpcodeModifier()"); 1342249423Sdim 1343201360Srdivacky if (insn->consumedOpcodeModifier) 1344206124Srdivacky return 0; 1345249423Sdim 1346201360Srdivacky insn->consumedOpcodeModifier = TRUE; 1347249423Sdim 1348206124Srdivacky switch (insn->spec->modifierType) { 1349201360Srdivacky default: 1350206124Srdivacky debug("Unknown modifier type."); 1351206124Srdivacky return -1; 1352201360Srdivacky case MODIFIER_NONE: 1353206124Srdivacky debug("No modifier but an operand expects one."); 1354206124Srdivacky return -1; 1355201360Srdivacky case MODIFIER_OPCODE: 1356201360Srdivacky insn->opcodeModifier = insn->opcode - insn->spec->modifierBase; 1357206124Srdivacky return 0; 1358201360Srdivacky case MODIFIER_MODRM: 1359201360Srdivacky insn->opcodeModifier = insn->modRM - insn->spec->modifierBase; 1360206124Srdivacky return 0; 1361249423Sdim } 1362201360Srdivacky} 1363201360Srdivacky 1364201360Srdivacky/* 1365249423Sdim * readOpcodeRegister - Reads an operand from the opcode field of an 1366201360Srdivacky * instruction and interprets it appropriately given the operand width. 1367201360Srdivacky * Handles AddRegFrm instructions. 1368201360Srdivacky * 1369201360Srdivacky * @param insn - See readOpcodeModifier(). 1370201360Srdivacky * @param size - The width (in bytes) of the register being specified. 1371201360Srdivacky * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 1372201360Srdivacky * RAX. 1373206124Srdivacky * @return - 0 on success; nonzero otherwise. 1374201360Srdivacky */ 1375206124Srdivackystatic int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { 1376201360Srdivacky dbgprintf(insn, "readOpcodeRegister()"); 1377201360Srdivacky 1378206124Srdivacky if (readOpcodeModifier(insn)) 1379206124Srdivacky return -1; 1380249423Sdim 1381201360Srdivacky if (size == 0) 1382201360Srdivacky size = insn->registerSize; 1383249423Sdim 1384201360Srdivacky switch (size) { 1385201360Srdivacky case 1: 1386249423Sdim insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) 1387201360Srdivacky | insn->opcodeModifier)); 1388249423Sdim if (insn->rexPrefix && 1389206124Srdivacky insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 1390206124Srdivacky insn->opcodeRegister < MODRM_REG_AL + 0x8) { 1391201360Srdivacky insn->opcodeRegister = (Reg)(MODRM_REG_SPL 1392201360Srdivacky + (insn->opcodeRegister - MODRM_REG_AL - 4)); 1393201360Srdivacky } 1394249423Sdim 1395201360Srdivacky break; 1396201360Srdivacky case 2: 1397201360Srdivacky insn->opcodeRegister = (Reg)(MODRM_REG_AX 1398249423Sdim + ((bFromREX(insn->rexPrefix) << 3) 1399201360Srdivacky | insn->opcodeModifier)); 1400201360Srdivacky break; 1401201360Srdivacky case 4: 1402206124Srdivacky insn->opcodeRegister = (Reg)(MODRM_REG_EAX 1403249423Sdim + ((bFromREX(insn->rexPrefix) << 3) 1404201360Srdivacky | insn->opcodeModifier)); 1405201360Srdivacky break; 1406201360Srdivacky case 8: 1407249423Sdim insn->opcodeRegister = (Reg)(MODRM_REG_RAX 1408249423Sdim + ((bFromREX(insn->rexPrefix) << 3) 1409201360Srdivacky | insn->opcodeModifier)); 1410201360Srdivacky break; 1411201360Srdivacky } 1412249423Sdim 1413206124Srdivacky return 0; 1414201360Srdivacky} 1415201360Srdivacky 1416201360Srdivacky/* 1417201360Srdivacky * readImmediate - Consumes an immediate operand from an instruction, given the 1418201360Srdivacky * desired operand size. 1419201360Srdivacky * 1420201360Srdivacky * @param insn - The instruction whose operand is to be read. 1421201360Srdivacky * @param size - The width (in bytes) of the operand. 1422201360Srdivacky * @return - 0 if the immediate was successfully consumed; nonzero 1423201360Srdivacky * otherwise. 1424201360Srdivacky */ 1425201360Srdivackystatic int readImmediate(struct InternalInstruction* insn, uint8_t size) { 1426201360Srdivacky uint8_t imm8; 1427201360Srdivacky uint16_t imm16; 1428201360Srdivacky uint32_t imm32; 1429201360Srdivacky uint64_t imm64; 1430249423Sdim 1431201360Srdivacky dbgprintf(insn, "readImmediate()"); 1432249423Sdim 1433206124Srdivacky if (insn->numImmediatesConsumed == 2) { 1434206124Srdivacky debug("Already consumed two immediates"); 1435206124Srdivacky return -1; 1436206124Srdivacky } 1437249423Sdim 1438201360Srdivacky if (size == 0) 1439201360Srdivacky size = insn->immediateSize; 1440201360Srdivacky else 1441201360Srdivacky insn->immediateSize = size; 1442234353Sdim insn->immediateOffset = insn->readerCursor - insn->startLocation; 1443249423Sdim 1444201360Srdivacky switch (size) { 1445201360Srdivacky case 1: 1446201360Srdivacky if (consumeByte(insn, &imm8)) 1447201360Srdivacky return -1; 1448201360Srdivacky insn->immediates[insn->numImmediatesConsumed] = imm8; 1449201360Srdivacky break; 1450201360Srdivacky case 2: 1451201360Srdivacky if (consumeUInt16(insn, &imm16)) 1452201360Srdivacky return -1; 1453201360Srdivacky insn->immediates[insn->numImmediatesConsumed] = imm16; 1454201360Srdivacky break; 1455201360Srdivacky case 4: 1456201360Srdivacky if (consumeUInt32(insn, &imm32)) 1457201360Srdivacky return -1; 1458201360Srdivacky insn->immediates[insn->numImmediatesConsumed] = imm32; 1459201360Srdivacky break; 1460201360Srdivacky case 8: 1461201360Srdivacky if (consumeUInt64(insn, &imm64)) 1462201360Srdivacky return -1; 1463201360Srdivacky insn->immediates[insn->numImmediatesConsumed] = imm64; 1464201360Srdivacky break; 1465201360Srdivacky } 1466249423Sdim 1467201360Srdivacky insn->numImmediatesConsumed++; 1468249423Sdim 1469201360Srdivacky return 0; 1470201360Srdivacky} 1471201360Srdivacky 1472201360Srdivacky/* 1473226633Sdim * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. 1474221345Sdim * 1475221345Sdim * @param insn - The instruction whose operand is to be read. 1476226633Sdim * @return - 0 if the vvvv was successfully consumed; nonzero 1477221345Sdim * otherwise. 1478221345Sdim */ 1479221345Sdimstatic int readVVVV(struct InternalInstruction* insn) { 1480221345Sdim dbgprintf(insn, "readVVVV()"); 1481249423Sdim 1482221345Sdim if (insn->vexSize == 3) 1483221345Sdim insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]); 1484221345Sdim else if (insn->vexSize == 2) 1485221345Sdim insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]); 1486221345Sdim else 1487221345Sdim return -1; 1488221345Sdim 1489226633Sdim if (insn->mode != MODE_64BIT) 1490226633Sdim insn->vvvv &= 0x7; 1491226633Sdim 1492221345Sdim return 0; 1493221345Sdim} 1494221345Sdim 1495221345Sdim/* 1496201360Srdivacky * readOperands - Consults the specifier for an instruction and consumes all 1497201360Srdivacky * operands for that instruction, interpreting them as it goes. 1498201360Srdivacky * 1499201360Srdivacky * @param insn - The instruction whose operands are to be read and interpreted. 1500201360Srdivacky * @return - 0 if all operands could be read; nonzero otherwise. 1501201360Srdivacky */ 1502201360Srdivackystatic int readOperands(struct InternalInstruction* insn) { 1503201360Srdivacky int index; 1504226633Sdim int hasVVVV, needVVVV; 1505234353Sdim int sawRegImm = 0; 1506249423Sdim 1507201360Srdivacky dbgprintf(insn, "readOperands()"); 1508226633Sdim 1509226633Sdim /* If non-zero vvvv specified, need to make sure one of the operands 1510226633Sdim uses it. */ 1511226633Sdim hasVVVV = !readVVVV(insn); 1512226633Sdim needVVVV = hasVVVV && (insn->vvvv != 0); 1513249423Sdim 1514201360Srdivacky for (index = 0; index < X86_MAX_OPERANDS; ++index) { 1515239462Sdim switch (x86OperandSets[insn->spec->operands][index].encoding) { 1516201360Srdivacky case ENCODING_NONE: 1517201360Srdivacky break; 1518201360Srdivacky case ENCODING_REG: 1519201360Srdivacky case ENCODING_RM: 1520201360Srdivacky if (readModRM(insn)) 1521201360Srdivacky return -1; 1522239462Sdim if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) 1523201360Srdivacky return -1; 1524201360Srdivacky break; 1525201360Srdivacky case ENCODING_CB: 1526201360Srdivacky case ENCODING_CW: 1527201360Srdivacky case ENCODING_CD: 1528201360Srdivacky case ENCODING_CP: 1529201360Srdivacky case ENCODING_CO: 1530201360Srdivacky case ENCODING_CT: 1531201360Srdivacky dbgprintf(insn, "We currently don't hande code-offset encodings"); 1532201360Srdivacky return -1; 1533201360Srdivacky case ENCODING_IB: 1534234353Sdim if (sawRegImm) { 1535234353Sdim /* Saw a register immediate so don't read again and instead split the 1536234353Sdim previous immediate. FIXME: This is a hack. */ 1537234353Sdim insn->immediates[insn->numImmediatesConsumed] = 1538234353Sdim insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; 1539234353Sdim ++insn->numImmediatesConsumed; 1540234353Sdim break; 1541234353Sdim } 1542201360Srdivacky if (readImmediate(insn, 1)) 1543201360Srdivacky return -1; 1544239462Sdim if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 && 1545207618Srdivacky insn->immediates[insn->numImmediatesConsumed - 1] > 7) 1546207618Srdivacky return -1; 1547239462Sdim if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 && 1548234353Sdim insn->immediates[insn->numImmediatesConsumed - 1] > 31) 1549234353Sdim return -1; 1550239462Sdim if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 || 1551239462Sdim x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256) 1552234353Sdim sawRegImm = 1; 1553201360Srdivacky break; 1554201360Srdivacky case ENCODING_IW: 1555201360Srdivacky if (readImmediate(insn, 2)) 1556201360Srdivacky return -1; 1557201360Srdivacky break; 1558201360Srdivacky case ENCODING_ID: 1559201360Srdivacky if (readImmediate(insn, 4)) 1560201360Srdivacky return -1; 1561201360Srdivacky break; 1562201360Srdivacky case ENCODING_IO: 1563201360Srdivacky if (readImmediate(insn, 8)) 1564201360Srdivacky return -1; 1565201360Srdivacky break; 1566201360Srdivacky case ENCODING_Iv: 1567206124Srdivacky if (readImmediate(insn, insn->immediateSize)) 1568206124Srdivacky return -1; 1569207618Srdivacky break; 1570201360Srdivacky case ENCODING_Ia: 1571206124Srdivacky if (readImmediate(insn, insn->addressSize)) 1572206124Srdivacky return -1; 1573201360Srdivacky break; 1574201360Srdivacky case ENCODING_RB: 1575206124Srdivacky if (readOpcodeRegister(insn, 1)) 1576206124Srdivacky return -1; 1577201360Srdivacky break; 1578201360Srdivacky case ENCODING_RW: 1579206124Srdivacky if (readOpcodeRegister(insn, 2)) 1580206124Srdivacky return -1; 1581201360Srdivacky break; 1582201360Srdivacky case ENCODING_RD: 1583206124Srdivacky if (readOpcodeRegister(insn, 4)) 1584206124Srdivacky return -1; 1585201360Srdivacky break; 1586201360Srdivacky case ENCODING_RO: 1587206124Srdivacky if (readOpcodeRegister(insn, 8)) 1588206124Srdivacky return -1; 1589201360Srdivacky break; 1590201360Srdivacky case ENCODING_Rv: 1591206124Srdivacky if (readOpcodeRegister(insn, 0)) 1592206124Srdivacky return -1; 1593201360Srdivacky break; 1594201360Srdivacky case ENCODING_I: 1595206124Srdivacky if (readOpcodeModifier(insn)) 1596206124Srdivacky return -1; 1597221345Sdim break; 1598221345Sdim case ENCODING_VVVV: 1599226633Sdim needVVVV = 0; /* Mark that we have found a VVVV operand. */ 1600226633Sdim if (!hasVVVV) 1601221345Sdim return -1; 1602239462Sdim if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) 1603221345Sdim return -1; 1604221345Sdim break; 1605201360Srdivacky case ENCODING_DUP: 1606201360Srdivacky break; 1607201360Srdivacky default: 1608201360Srdivacky dbgprintf(insn, "Encountered an operand with an unknown encoding."); 1609201360Srdivacky return -1; 1610201360Srdivacky } 1611201360Srdivacky } 1612226633Sdim 1613226633Sdim /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ 1614226633Sdim if (needVVVV) return -1; 1615249423Sdim 1616201360Srdivacky return 0; 1617201360Srdivacky} 1618201360Srdivacky 1619201360Srdivacky/* 1620201360Srdivacky * decodeInstruction - Reads and interprets a full instruction provided by the 1621201360Srdivacky * user. 1622201360Srdivacky * 1623249423Sdim * @param insn - A pointer to the instruction to be populated. Must be 1624201360Srdivacky * pre-allocated. 1625201360Srdivacky * @param reader - The function to be used to read the instruction's bytes. 1626201360Srdivacky * @param readerArg - A generic argument to be passed to the reader to store 1627201360Srdivacky * any internal state. 1628201360Srdivacky * @param logger - If non-NULL, the function to be used to write log messages 1629201360Srdivacky * and warnings. 1630201360Srdivacky * @param loggerArg - A generic argument to be passed to the logger to store 1631201360Srdivacky * any internal state. 1632201360Srdivacky * @param startLoc - The address (in the reader's address space) of the first 1633201360Srdivacky * byte in the instruction. 1634201360Srdivacky * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to 1635201360Srdivacky * decode the instruction in. 1636201360Srdivacky * @return - 0 if the instruction's memory could be read; nonzero if 1637201360Srdivacky * not. 1638201360Srdivacky */ 1639201360Srdivackyint decodeInstruction(struct InternalInstruction* insn, 1640201360Srdivacky byteReader_t reader, 1641243830Sdim const void* readerArg, 1642201360Srdivacky dlog_t logger, 1643201360Srdivacky void* loggerArg, 1644243830Sdim const void* miiArg, 1645201360Srdivacky uint64_t startLoc, 1646201360Srdivacky DisassemblerMode mode) { 1647201360Srdivacky memset(insn, 0, sizeof(struct InternalInstruction)); 1648249423Sdim 1649201360Srdivacky insn->reader = reader; 1650201360Srdivacky insn->readerArg = readerArg; 1651201360Srdivacky insn->dlog = logger; 1652201360Srdivacky insn->dlogArg = loggerArg; 1653201360Srdivacky insn->startLocation = startLoc; 1654201360Srdivacky insn->readerCursor = startLoc; 1655201360Srdivacky insn->mode = mode; 1656201360Srdivacky insn->numImmediatesConsumed = 0; 1657249423Sdim 1658201360Srdivacky if (readPrefixes(insn) || 1659201360Srdivacky readOpcode(insn) || 1660234353Sdim getID(insn, miiArg) || 1661201360Srdivacky insn->instructionID == 0 || 1662201360Srdivacky readOperands(insn)) 1663201360Srdivacky return -1; 1664239462Sdim 1665239462Sdim insn->operands = &x86OperandSets[insn->spec->operands][0]; 1666249423Sdim 1667201360Srdivacky insn->length = insn->readerCursor - insn->startLocation; 1668249423Sdim 1669205407Srdivacky dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", 1670205407Srdivacky startLoc, insn->readerCursor, insn->length); 1671249423Sdim 1672201360Srdivacky if (insn->length > 15) 1673201360Srdivacky dbgprintf(insn, "Instruction exceeds 15-byte limit"); 1674249423Sdim 1675201360Srdivacky return 0; 1676201360Srdivacky} 1677