1234353Sdim/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===*
2201360Srdivacky *
3201360Srdivacky *                     The LLVM Compiler Infrastructure
4201360Srdivacky *
5201360Srdivacky * This file is distributed under the University of Illinois Open Source
6201360Srdivacky * License. See LICENSE.TXT for details.
7201360Srdivacky *
8201360Srdivacky *===----------------------------------------------------------------------===*
9201360Srdivacky *
10201360Srdivacky * This file is part of the X86 Disassembler.
11201360Srdivacky * It contains the implementation of the instruction decoder.
12201360Srdivacky * Documentation for the disassembler can be found in X86Disassembler.h.
13201360Srdivacky *
14201360Srdivacky *===----------------------------------------------------------------------===*/
15201360Srdivacky
16201360Srdivacky#include <stdarg.h>   /* for va_*()       */
17201360Srdivacky#include <stdio.h>    /* for vsnprintf()  */
18201360Srdivacky#include <stdlib.h>   /* for exit()       */
19201360Srdivacky#include <string.h>   /* for memset()     */
20201360Srdivacky
21201360Srdivacky#include "X86DisassemblerDecoder.h"
22201360Srdivacky
23201360Srdivacky#include "X86GenDisassemblerTables.inc"
24201360Srdivacky
25201360Srdivacky#define TRUE  1
26201360Srdivacky#define FALSE 0
27201360Srdivacky
28206124Srdivackytypedef int8_t bool;
29206124Srdivacky
30206124Srdivacky#ifndef NDEBUG
31206124Srdivacky#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
32206124Srdivacky#else
33206124Srdivacky#define debug(s) do { } while (0)
34206124Srdivacky#endif
35201360Srdivacky
36206124Srdivacky
37201360Srdivacky/*
38201360Srdivacky * contextForAttrs - Client for the instruction context table.  Takes a set of
39201360Srdivacky *   attributes and returns the appropriate decode context.
40201360Srdivacky *
41201360Srdivacky * @param attrMask  - Attributes, from the enumeration attributeBits.
42201360Srdivacky * @return          - The InstructionContext to use when looking up an
43201360Srdivacky *                    an instruction with these attributes.
44201360Srdivacky */
45201360Srdivackystatic InstructionContext contextForAttrs(uint8_t attrMask) {
46201360Srdivacky  return CONTEXTS_SYM[attrMask];
47201360Srdivacky}
48201360Srdivacky
49201360Srdivacky/*
50201360Srdivacky * modRMRequired - Reads the appropriate instruction table to determine whether
51201360Srdivacky *   the ModR/M byte is required to decode a particular instruction.
52201360Srdivacky *
53201360Srdivacky * @param type        - The opcode type (i.e., how many bytes it has).
54201360Srdivacky * @param insnContext - The context for the instruction, as returned by
55201360Srdivacky *                      contextForAttrs.
56201360Srdivacky * @param opcode      - The last byte of the instruction's opcode, not counting
57201360Srdivacky *                      ModR/M extensions and escapes.
58201360Srdivacky * @return            - TRUE if the ModR/M byte is required, FALSE otherwise.
59201360Srdivacky */
60201360Srdivackystatic int modRMRequired(OpcodeType type,
61226633Sdim                         InstructionContext insnContext,
62226633Sdim                         uint8_t opcode) {
63201360Srdivacky  const struct ContextDecision* decision = 0;
64249423Sdim
65201360Srdivacky  switch (type) {
66201360Srdivacky  case ONEBYTE:
67201360Srdivacky    decision = &ONEBYTE_SYM;
68201360Srdivacky    break;
69201360Srdivacky  case TWOBYTE:
70201360Srdivacky    decision = &TWOBYTE_SYM;
71201360Srdivacky    break;
72201360Srdivacky  case THREEBYTE_38:
73201360Srdivacky    decision = &THREEBYTE38_SYM;
74201360Srdivacky    break;
75201360Srdivacky  case THREEBYTE_3A:
76201360Srdivacky    decision = &THREEBYTE3A_SYM;
77201360Srdivacky    break;
78221345Sdim  case THREEBYTE_A6:
79221345Sdim    decision = &THREEBYTEA6_SYM;
80221345Sdim    break;
81221345Sdim  case THREEBYTE_A7:
82221345Sdim    decision = &THREEBYTEA7_SYM;
83221345Sdim    break;
84201360Srdivacky  }
85234353Sdim
86201360Srdivacky  return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
87201360Srdivacky    modrm_type != MODRM_ONEENTRY;
88201360Srdivacky}
89201360Srdivacky
90201360Srdivacky/*
91201360Srdivacky * decode - Reads the appropriate instruction table to obtain the unique ID of
92201360Srdivacky *   an instruction.
93201360Srdivacky *
94201360Srdivacky * @param type        - See modRMRequired().
95201360Srdivacky * @param insnContext - See modRMRequired().
96201360Srdivacky * @param opcode      - See modRMRequired().
97201360Srdivacky * @param modRM       - The ModR/M byte if required, or any value if not.
98206124Srdivacky * @return            - The UID of the instruction, or 0 on failure.
99201360Srdivacky */
100201360Srdivackystatic InstrUID decode(OpcodeType type,
101206124Srdivacky                       InstructionContext insnContext,
102206124Srdivacky                       uint8_t opcode,
103206124Srdivacky                       uint8_t modRM) {
104234353Sdim  const struct ModRMDecision* dec = 0;
105249423Sdim
106201360Srdivacky  switch (type) {
107201360Srdivacky  case ONEBYTE:
108201360Srdivacky    dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
109201360Srdivacky    break;
110201360Srdivacky  case TWOBYTE:
111201360Srdivacky    dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
112201360Srdivacky    break;
113201360Srdivacky  case THREEBYTE_38:
114201360Srdivacky    dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
115201360Srdivacky    break;
116201360Srdivacky  case THREEBYTE_3A:
117201360Srdivacky    dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
118201360Srdivacky    break;
119221345Sdim  case THREEBYTE_A6:
120221345Sdim    dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
121221345Sdim    break;
122221345Sdim  case THREEBYTE_A7:
123221345Sdim    dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
124221345Sdim    break;
125201360Srdivacky  }
126249423Sdim
127201360Srdivacky  switch (dec->modrm_type) {
128201360Srdivacky  default:
129206124Srdivacky    debug("Corrupt table!  Unknown modrm_type");
130206124Srdivacky    return 0;
131201360Srdivacky  case MODRM_ONEENTRY:
132234353Sdim    return modRMTable[dec->instructionIDs];
133201360Srdivacky  case MODRM_SPLITRM:
134201360Srdivacky    if (modFromModRM(modRM) == 0x3)
135234353Sdim      return modRMTable[dec->instructionIDs+1];
136234353Sdim    return modRMTable[dec->instructionIDs];
137234353Sdim  case MODRM_SPLITREG:
138234353Sdim    if (modFromModRM(modRM) == 0x3)
139234353Sdim      return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
140234353Sdim    return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
141243830Sdim  case MODRM_SPLITMISC:
142243830Sdim    if (modFromModRM(modRM) == 0x3)
143243830Sdim      return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
144243830Sdim    return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
145201360Srdivacky  case MODRM_FULL:
146234353Sdim    return modRMTable[dec->instructionIDs+modRM];
147201360Srdivacky  }
148201360Srdivacky}
149201360Srdivacky
150201360Srdivacky/*
151201360Srdivacky * specifierForUID - Given a UID, returns the name and operand specification for
152201360Srdivacky *   that instruction.
153201360Srdivacky *
154201360Srdivacky * @param uid - The unique ID for the instruction.  This should be returned by
155201360Srdivacky *              decode(); specifierForUID will not check bounds.
156201360Srdivacky * @return    - A pointer to the specification for that instruction.
157201360Srdivacky */
158218893Sdimstatic const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
159201360Srdivacky  return &INSTRUCTIONS_SYM[uid];
160201360Srdivacky}
161201360Srdivacky
162201360Srdivacky/*
163201360Srdivacky * consumeByte - Uses the reader function provided by the user to consume one
164201360Srdivacky *   byte from the instruction's memory and advance the cursor.
165201360Srdivacky *
166201360Srdivacky * @param insn  - The instruction with the reader function to use.  The cursor
167201360Srdivacky *                for this instruction is advanced.
168201360Srdivacky * @param byte  - A pointer to a pre-allocated memory buffer to be populated
169201360Srdivacky *                with the data read.
170201360Srdivacky * @return      - 0 if the read was successful; nonzero otherwise.
171201360Srdivacky */
172201360Srdivackystatic int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
173201360Srdivacky  int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
174249423Sdim
175201360Srdivacky  if (!ret)
176201360Srdivacky    ++(insn->readerCursor);
177249423Sdim
178201360Srdivacky  return ret;
179201360Srdivacky}
180201360Srdivacky
181201360Srdivacky/*
182201360Srdivacky * lookAtByte - Like consumeByte, but does not advance the cursor.
183201360Srdivacky *
184201360Srdivacky * @param insn  - See consumeByte().
185201360Srdivacky * @param byte  - See consumeByte().
186201360Srdivacky * @return      - See consumeByte().
187201360Srdivacky */
188201360Srdivackystatic int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
189201360Srdivacky  return insn->reader(insn->readerArg, byte, insn->readerCursor);
190201360Srdivacky}
191201360Srdivacky
192201360Srdivackystatic void unconsumeByte(struct InternalInstruction* insn) {
193201360Srdivacky  insn->readerCursor--;
194201360Srdivacky}
195201360Srdivacky
196201360Srdivacky#define CONSUME_FUNC(name, type)                                  \
197201360Srdivacky  static int name(struct InternalInstruction* insn, type* ptr) {  \
198201360Srdivacky    type combined = 0;                                            \
199201360Srdivacky    unsigned offset;                                              \
200201360Srdivacky    for (offset = 0; offset < sizeof(type); ++offset) {           \
201201360Srdivacky      uint8_t byte;                                               \
202201360Srdivacky      int ret = insn->reader(insn->readerArg,                     \
203201360Srdivacky                             &byte,                               \
204201360Srdivacky                             insn->readerCursor + offset);        \
205201360Srdivacky      if (ret)                                                    \
206201360Srdivacky        return ret;                                               \
207243830Sdim      combined = combined | ((uint64_t)byte << (offset * 8));     \
208201360Srdivacky    }                                                             \
209201360Srdivacky    *ptr = combined;                                              \
210201360Srdivacky    insn->readerCursor += sizeof(type);                           \
211201360Srdivacky    return 0;                                                     \
212201360Srdivacky  }
213201360Srdivacky
214201360Srdivacky/*
215201360Srdivacky * consume* - Use the reader function provided by the user to consume data
216201360Srdivacky *   values of various sizes from the instruction's memory and advance the
217201360Srdivacky *   cursor appropriately.  These readers perform endian conversion.
218201360Srdivacky *
219201360Srdivacky * @param insn    - See consumeByte().
220201360Srdivacky * @param ptr     - A pointer to a pre-allocated memory of appropriate size to
221201360Srdivacky *                  be populated with the data read.
222201360Srdivacky * @return        - See consumeByte().
223201360Srdivacky */
224201360SrdivackyCONSUME_FUNC(consumeInt8, int8_t)
225201360SrdivackyCONSUME_FUNC(consumeInt16, int16_t)
226201360SrdivackyCONSUME_FUNC(consumeInt32, int32_t)
227201360SrdivackyCONSUME_FUNC(consumeUInt16, uint16_t)
228201360SrdivackyCONSUME_FUNC(consumeUInt32, uint32_t)
229201360SrdivackyCONSUME_FUNC(consumeUInt64, uint64_t)
230201360Srdivacky
231201360Srdivacky/*
232201360Srdivacky * dbgprintf - Uses the logging function provided by the user to log a single
233201360Srdivacky *   message, typically without a carriage-return.
234201360Srdivacky *
235201360Srdivacky * @param insn    - The instruction containing the logging function.
236201360Srdivacky * @param format  - See printf().
237201360Srdivacky * @param ...     - See printf().
238201360Srdivacky */
239201360Srdivackystatic void dbgprintf(struct InternalInstruction* insn,
240201360Srdivacky                      const char* format,
241249423Sdim                      ...) {
242201360Srdivacky  char buffer[256];
243201360Srdivacky  va_list ap;
244249423Sdim
245201360Srdivacky  if (!insn->dlog)
246201360Srdivacky    return;
247249423Sdim
248201360Srdivacky  va_start(ap, format);
249201360Srdivacky  (void)vsnprintf(buffer, sizeof(buffer), format, ap);
250201360Srdivacky  va_end(ap);
251249423Sdim
252201360Srdivacky  insn->dlog(insn->dlogArg, buffer);
253249423Sdim
254201360Srdivacky  return;
255201360Srdivacky}
256201360Srdivacky
257201360Srdivacky/*
258201360Srdivacky * setPrefixPresent - Marks that a particular prefix is present at a particular
259201360Srdivacky *   location.
260201360Srdivacky *
261201360Srdivacky * @param insn      - The instruction to be marked as having the prefix.
262201360Srdivacky * @param prefix    - The prefix that is present.
263201360Srdivacky * @param location  - The location where the prefix is located (in the address
264201360Srdivacky *                    space of the instruction's reader).
265201360Srdivacky */
266201360Srdivackystatic void setPrefixPresent(struct InternalInstruction* insn,
267201360Srdivacky                                    uint8_t prefix,
268201360Srdivacky                                    uint64_t location)
269201360Srdivacky{
270201360Srdivacky  insn->prefixPresent[prefix] = 1;
271201360Srdivacky  insn->prefixLocations[prefix] = location;
272201360Srdivacky}
273201360Srdivacky
274201360Srdivacky/*
275201360Srdivacky * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
276201360Srdivacky *   present at a given location.
277201360Srdivacky *
278201360Srdivacky * @param insn      - The instruction to be queried.
279201360Srdivacky * @param prefix    - The prefix.
280201360Srdivacky * @param location  - The location to query.
281201360Srdivacky * @return          - Whether the prefix is at that location.
282201360Srdivacky */
283201360Srdivackystatic BOOL isPrefixAtLocation(struct InternalInstruction* insn,
284201360Srdivacky                               uint8_t prefix,
285201360Srdivacky                               uint64_t location)
286201360Srdivacky{
287201360Srdivacky  if (insn->prefixPresent[prefix] == 1 &&
288201360Srdivacky     insn->prefixLocations[prefix] == location)
289201360Srdivacky    return TRUE;
290201360Srdivacky  else
291201360Srdivacky    return FALSE;
292201360Srdivacky}
293201360Srdivacky
294201360Srdivacky/*
295201360Srdivacky * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
296201360Srdivacky *   instruction as having them.  Also sets the instruction's default operand,
297201360Srdivacky *   address, and other relevant data sizes to report operands correctly.
298201360Srdivacky *
299201360Srdivacky * @param insn  - The instruction whose prefixes are to be read.
300201360Srdivacky * @return      - 0 if the instruction could be read until the end of the prefix
301201360Srdivacky *                bytes, and no prefixes conflicted; nonzero otherwise.
302201360Srdivacky */
303201360Srdivackystatic int readPrefixes(struct InternalInstruction* insn) {
304201360Srdivacky  BOOL isPrefix = TRUE;
305201360Srdivacky  BOOL prefixGroups[4] = { FALSE };
306201360Srdivacky  uint64_t prefixLocation;
307218893Sdim  uint8_t byte = 0;
308249423Sdim
309201360Srdivacky  BOOL hasAdSize = FALSE;
310201360Srdivacky  BOOL hasOpSize = FALSE;
311249423Sdim
312201360Srdivacky  dbgprintf(insn, "readPrefixes()");
313249423Sdim
314201360Srdivacky  while (isPrefix) {
315201360Srdivacky    prefixLocation = insn->readerCursor;
316249423Sdim
317201360Srdivacky    if (consumeByte(insn, &byte))
318201360Srdivacky      return -1;
319234353Sdim
320234353Sdim    /*
321249423Sdim     * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
322249423Sdim     * break and let it be disassembled as a normal "instruction".
323234353Sdim     */
324249423Sdim    if (insn->readerCursor - 1 == insn->startLocation
325249423Sdim        && (byte == 0xf0 || byte == 0xf2 || byte == 0xf3)) {
326249423Sdim      uint8_t nextByte;
327249423Sdim      if (byte == 0xf0)
328249423Sdim        break;
329249423Sdim      if (lookAtByte(insn, &nextByte))
330249423Sdim        return -1;
331249423Sdim      if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) {
332249423Sdim        if (consumeByte(insn, &nextByte))
333249423Sdim          return -1;
334249423Sdim        if (lookAtByte(insn, &nextByte))
335249423Sdim          return -1;
336249423Sdim        unconsumeByte(insn);
337249423Sdim      }
338249423Sdim      if (nextByte != 0x0f && nextByte != 0x90)
339249423Sdim        break;
340249423Sdim    }
341249423Sdim
342201360Srdivacky    switch (byte) {
343201360Srdivacky    case 0xf0:  /* LOCK */
344201360Srdivacky    case 0xf2:  /* REPNE/REPNZ */
345201360Srdivacky    case 0xf3:  /* REP or REPE/REPZ */
346201360Srdivacky      if (prefixGroups[0])
347201360Srdivacky        dbgprintf(insn, "Redundant Group 1 prefix");
348201360Srdivacky      prefixGroups[0] = TRUE;
349201360Srdivacky      setPrefixPresent(insn, byte, prefixLocation);
350201360Srdivacky      break;
351201360Srdivacky    case 0x2e:  /* CS segment override -OR- Branch not taken */
352201360Srdivacky    case 0x36:  /* SS segment override -OR- Branch taken */
353201360Srdivacky    case 0x3e:  /* DS segment override */
354201360Srdivacky    case 0x26:  /* ES segment override */
355201360Srdivacky    case 0x64:  /* FS segment override */
356201360Srdivacky    case 0x65:  /* GS segment override */
357201360Srdivacky      switch (byte) {
358201360Srdivacky      case 0x2e:
359201360Srdivacky        insn->segmentOverride = SEG_OVERRIDE_CS;
360201360Srdivacky        break;
361201360Srdivacky      case 0x36:
362201360Srdivacky        insn->segmentOverride = SEG_OVERRIDE_SS;
363201360Srdivacky        break;
364201360Srdivacky      case 0x3e:
365201360Srdivacky        insn->segmentOverride = SEG_OVERRIDE_DS;
366201360Srdivacky        break;
367201360Srdivacky      case 0x26:
368201360Srdivacky        insn->segmentOverride = SEG_OVERRIDE_ES;
369201360Srdivacky        break;
370201360Srdivacky      case 0x64:
371201360Srdivacky        insn->segmentOverride = SEG_OVERRIDE_FS;
372201360Srdivacky        break;
373201360Srdivacky      case 0x65:
374201360Srdivacky        insn->segmentOverride = SEG_OVERRIDE_GS;
375201360Srdivacky        break;
376201360Srdivacky      default:
377206124Srdivacky        debug("Unhandled override");
378206124Srdivacky        return -1;
379201360Srdivacky      }
380201360Srdivacky      if (prefixGroups[1])
381201360Srdivacky        dbgprintf(insn, "Redundant Group 2 prefix");
382201360Srdivacky      prefixGroups[1] = TRUE;
383201360Srdivacky      setPrefixPresent(insn, byte, prefixLocation);
384201360Srdivacky      break;
385201360Srdivacky    case 0x66:  /* Operand-size override */
386201360Srdivacky      if (prefixGroups[2])
387201360Srdivacky        dbgprintf(insn, "Redundant Group 3 prefix");
388201360Srdivacky      prefixGroups[2] = TRUE;
389201360Srdivacky      hasOpSize = TRUE;
390201360Srdivacky      setPrefixPresent(insn, byte, prefixLocation);
391201360Srdivacky      break;
392201360Srdivacky    case 0x67:  /* Address-size override */
393201360Srdivacky      if (prefixGroups[3])
394201360Srdivacky        dbgprintf(insn, "Redundant Group 4 prefix");
395201360Srdivacky      prefixGroups[3] = TRUE;
396201360Srdivacky      hasAdSize = TRUE;
397201360Srdivacky      setPrefixPresent(insn, byte, prefixLocation);
398201360Srdivacky      break;
399201360Srdivacky    default:    /* Not a prefix byte */
400201360Srdivacky      isPrefix = FALSE;
401201360Srdivacky      break;
402201360Srdivacky    }
403249423Sdim
404201360Srdivacky    if (isPrefix)
405201360Srdivacky      dbgprintf(insn, "Found prefix 0x%hhx", byte);
406201360Srdivacky  }
407249423Sdim
408221345Sdim  insn->vexSize = 0;
409249423Sdim
410221345Sdim  if (byte == 0xc4) {
411221345Sdim    uint8_t byte1;
412249423Sdim
413221345Sdim    if (lookAtByte(insn, &byte1)) {
414221345Sdim      dbgprintf(insn, "Couldn't read second byte of VEX");
415221345Sdim      return -1;
416221345Sdim    }
417249423Sdim
418226633Sdim    if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
419221345Sdim      insn->vexSize = 3;
420221345Sdim      insn->necessaryPrefixLocation = insn->readerCursor - 1;
421221345Sdim    }
422221345Sdim    else {
423221345Sdim      unconsumeByte(insn);
424221345Sdim      insn->necessaryPrefixLocation = insn->readerCursor - 1;
425221345Sdim    }
426249423Sdim
427221345Sdim    if (insn->vexSize == 3) {
428221345Sdim      insn->vexPrefix[0] = byte;
429221345Sdim      consumeByte(insn, &insn->vexPrefix[1]);
430221345Sdim      consumeByte(insn, &insn->vexPrefix[2]);
431221345Sdim
432221345Sdim      /* We simulate the REX prefix for simplicity's sake */
433249423Sdim
434226633Sdim      if (insn->mode == MODE_64BIT) {
435249423Sdim        insn->rexPrefix = 0x40
436226633Sdim                        | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
437226633Sdim                        | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
438226633Sdim                        | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
439226633Sdim                        | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
440226633Sdim      }
441249423Sdim
442221345Sdim      switch (ppFromVEX3of3(insn->vexPrefix[2]))
443221345Sdim      {
444221345Sdim      default:
445221345Sdim        break;
446221345Sdim      case VEX_PREFIX_66:
447249423Sdim        hasOpSize = TRUE;
448221345Sdim        break;
449201360Srdivacky      }
450249423Sdim
451221345Sdim      dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
452221345Sdim    }
453221345Sdim  }
454221345Sdim  else if (byte == 0xc5) {
455221345Sdim    uint8_t byte1;
456249423Sdim
457221345Sdim    if (lookAtByte(insn, &byte1)) {
458221345Sdim      dbgprintf(insn, "Couldn't read second byte of VEX");
459221345Sdim      return -1;
460221345Sdim    }
461249423Sdim
462226633Sdim    if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
463221345Sdim      insn->vexSize = 2;
464221345Sdim    }
465221345Sdim    else {
466201360Srdivacky      unconsumeByte(insn);
467221345Sdim    }
468249423Sdim
469221345Sdim    if (insn->vexSize == 2) {
470221345Sdim      insn->vexPrefix[0] = byte;
471221345Sdim      consumeByte(insn, &insn->vexPrefix[1]);
472249423Sdim
473226633Sdim      if (insn->mode == MODE_64BIT) {
474249423Sdim        insn->rexPrefix = 0x40
475226633Sdim                        | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
476226633Sdim      }
477249423Sdim
478221345Sdim      switch (ppFromVEX2of2(insn->vexPrefix[1]))
479221345Sdim      {
480221345Sdim      default:
481221345Sdim        break;
482221345Sdim      case VEX_PREFIX_66:
483249423Sdim        hasOpSize = TRUE;
484221345Sdim        break;
485221345Sdim      }
486249423Sdim
487221345Sdim      dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
488221345Sdim    }
489221345Sdim  }
490221345Sdim  else {
491221345Sdim    if (insn->mode == MODE_64BIT) {
492221345Sdim      if ((byte & 0xf0) == 0x40) {
493221345Sdim        uint8_t opcodeByte;
494249423Sdim
495221345Sdim        if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
496221345Sdim          dbgprintf(insn, "Redundant REX prefix");
497221345Sdim          return -1;
498221345Sdim        }
499249423Sdim
500221345Sdim        insn->rexPrefix = byte;
501221345Sdim        insn->necessaryPrefixLocation = insn->readerCursor - 2;
502249423Sdim
503221345Sdim        dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
504249423Sdim      } else {
505221345Sdim        unconsumeByte(insn);
506221345Sdim        insn->necessaryPrefixLocation = insn->readerCursor - 1;
507221345Sdim      }
508221345Sdim    } else {
509221345Sdim      unconsumeByte(insn);
510201360Srdivacky      insn->necessaryPrefixLocation = insn->readerCursor - 1;
511201360Srdivacky    }
512201360Srdivacky  }
513221345Sdim
514201360Srdivacky  if (insn->mode == MODE_16BIT) {
515201360Srdivacky    insn->registerSize       = (hasOpSize ? 4 : 2);
516201360Srdivacky    insn->addressSize        = (hasAdSize ? 4 : 2);
517201360Srdivacky    insn->displacementSize   = (hasAdSize ? 4 : 2);
518201360Srdivacky    insn->immediateSize      = (hasOpSize ? 4 : 2);
519201360Srdivacky  } else if (insn->mode == MODE_32BIT) {
520201360Srdivacky    insn->registerSize       = (hasOpSize ? 2 : 4);
521201360Srdivacky    insn->addressSize        = (hasAdSize ? 2 : 4);
522201360Srdivacky    insn->displacementSize   = (hasAdSize ? 2 : 4);
523218893Sdim    insn->immediateSize      = (hasOpSize ? 2 : 4);
524201360Srdivacky  } else if (insn->mode == MODE_64BIT) {
525201360Srdivacky    if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
526201360Srdivacky      insn->registerSize       = 8;
527201360Srdivacky      insn->addressSize        = (hasAdSize ? 4 : 8);
528201360Srdivacky      insn->displacementSize   = 4;
529201360Srdivacky      insn->immediateSize      = 4;
530201360Srdivacky    } else if (insn->rexPrefix) {
531201360Srdivacky      insn->registerSize       = (hasOpSize ? 2 : 4);
532201360Srdivacky      insn->addressSize        = (hasAdSize ? 4 : 8);
533201360Srdivacky      insn->displacementSize   = (hasOpSize ? 2 : 4);
534201360Srdivacky      insn->immediateSize      = (hasOpSize ? 2 : 4);
535201360Srdivacky    } else {
536201360Srdivacky      insn->registerSize       = (hasOpSize ? 2 : 4);
537201360Srdivacky      insn->addressSize        = (hasAdSize ? 4 : 8);
538201360Srdivacky      insn->displacementSize   = (hasOpSize ? 2 : 4);
539201360Srdivacky      insn->immediateSize      = (hasOpSize ? 2 : 4);
540201360Srdivacky    }
541201360Srdivacky  }
542249423Sdim
543201360Srdivacky  return 0;
544201360Srdivacky}
545201360Srdivacky
546201360Srdivacky/*
547201360Srdivacky * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
548201360Srdivacky *   extended or escape opcodes).
549201360Srdivacky *
550201360Srdivacky * @param insn  - The instruction whose opcode is to be read.
551201360Srdivacky * @return      - 0 if the opcode could be read successfully; nonzero otherwise.
552201360Srdivacky */
553249423Sdimstatic int readOpcode(struct InternalInstruction* insn) {
554201360Srdivacky  /* Determine the length of the primary opcode */
555249423Sdim
556201360Srdivacky  uint8_t current;
557249423Sdim
558201360Srdivacky  dbgprintf(insn, "readOpcode()");
559249423Sdim
560201360Srdivacky  insn->opcodeType = ONEBYTE;
561249423Sdim
562221345Sdim  if (insn->vexSize == 3)
563221345Sdim  {
564221345Sdim    switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
565221345Sdim    {
566221345Sdim    default:
567221345Sdim      dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
568249423Sdim      return -1;
569221345Sdim    case 0:
570221345Sdim      break;
571221345Sdim    case VEX_LOB_0F:
572221345Sdim      insn->twoByteEscape = 0x0f;
573221345Sdim      insn->opcodeType = TWOBYTE;
574221345Sdim      return consumeByte(insn, &insn->opcode);
575221345Sdim    case VEX_LOB_0F38:
576221345Sdim      insn->twoByteEscape = 0x0f;
577221345Sdim      insn->threeByteEscape = 0x38;
578221345Sdim      insn->opcodeType = THREEBYTE_38;
579221345Sdim      return consumeByte(insn, &insn->opcode);
580249423Sdim    case VEX_LOB_0F3A:
581221345Sdim      insn->twoByteEscape = 0x0f;
582221345Sdim      insn->threeByteEscape = 0x3a;
583221345Sdim      insn->opcodeType = THREEBYTE_3A;
584221345Sdim      return consumeByte(insn, &insn->opcode);
585221345Sdim    }
586221345Sdim  }
587221345Sdim  else if (insn->vexSize == 2)
588221345Sdim  {
589221345Sdim    insn->twoByteEscape = 0x0f;
590221345Sdim    insn->opcodeType = TWOBYTE;
591221345Sdim    return consumeByte(insn, &insn->opcode);
592221345Sdim  }
593249423Sdim
594201360Srdivacky  if (consumeByte(insn, &current))
595201360Srdivacky    return -1;
596249423Sdim
597201360Srdivacky  if (current == 0x0f) {
598201360Srdivacky    dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
599249423Sdim
600201360Srdivacky    insn->twoByteEscape = current;
601249423Sdim
602201360Srdivacky    if (consumeByte(insn, &current))
603201360Srdivacky      return -1;
604249423Sdim
605201360Srdivacky    if (current == 0x38) {
606201360Srdivacky      dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
607249423Sdim
608201360Srdivacky      insn->threeByteEscape = current;
609249423Sdim
610201360Srdivacky      if (consumeByte(insn, &current))
611201360Srdivacky        return -1;
612249423Sdim
613201360Srdivacky      insn->opcodeType = THREEBYTE_38;
614201360Srdivacky    } else if (current == 0x3a) {
615201360Srdivacky      dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
616249423Sdim
617201360Srdivacky      insn->threeByteEscape = current;
618249423Sdim
619201360Srdivacky      if (consumeByte(insn, &current))
620201360Srdivacky        return -1;
621249423Sdim
622201360Srdivacky      insn->opcodeType = THREEBYTE_3A;
623221345Sdim    } else if (current == 0xa6) {
624221345Sdim      dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
625249423Sdim
626221345Sdim      insn->threeByteEscape = current;
627249423Sdim
628221345Sdim      if (consumeByte(insn, &current))
629221345Sdim        return -1;
630249423Sdim
631221345Sdim      insn->opcodeType = THREEBYTE_A6;
632221345Sdim    } else if (current == 0xa7) {
633221345Sdim      dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
634249423Sdim
635221345Sdim      insn->threeByteEscape = current;
636249423Sdim
637221345Sdim      if (consumeByte(insn, &current))
638221345Sdim        return -1;
639249423Sdim
640221345Sdim      insn->opcodeType = THREEBYTE_A7;
641201360Srdivacky    } else {
642201360Srdivacky      dbgprintf(insn, "Didn't find a three-byte escape prefix");
643249423Sdim
644201360Srdivacky      insn->opcodeType = TWOBYTE;
645201360Srdivacky    }
646201360Srdivacky  }
647249423Sdim
648201360Srdivacky  /*
649201360Srdivacky   * At this point we have consumed the full opcode.
650201360Srdivacky   * Anything we consume from here on must be unconsumed.
651201360Srdivacky   */
652249423Sdim
653201360Srdivacky  insn->opcode = current;
654249423Sdim
655201360Srdivacky  return 0;
656201360Srdivacky}
657201360Srdivacky
658201360Srdivackystatic int readModRM(struct InternalInstruction* insn);
659201360Srdivacky
660201360Srdivacky/*
661201360Srdivacky * getIDWithAttrMask - Determines the ID of an instruction, consuming
662201360Srdivacky *   the ModR/M byte as appropriate for extended and escape opcodes,
663201360Srdivacky *   and using a supplied attribute mask.
664201360Srdivacky *
665201360Srdivacky * @param instructionID - A pointer whose target is filled in with the ID of the
666201360Srdivacky *                        instruction.
667201360Srdivacky * @param insn          - The instruction whose ID is to be determined.
668201360Srdivacky * @param attrMask      - The attribute mask to search.
669201360Srdivacky * @return              - 0 if the ModR/M could be read when needed or was not
670201360Srdivacky *                        needed; nonzero otherwise.
671201360Srdivacky */
672201360Srdivackystatic int getIDWithAttrMask(uint16_t* instructionID,
673201360Srdivacky                             struct InternalInstruction* insn,
674201360Srdivacky                             uint8_t attrMask) {
675201360Srdivacky  BOOL hasModRMExtension;
676249423Sdim
677201360Srdivacky  uint8_t instructionClass;
678201360Srdivacky
679201360Srdivacky  instructionClass = contextForAttrs(attrMask);
680249423Sdim
681201360Srdivacky  hasModRMExtension = modRMRequired(insn->opcodeType,
682201360Srdivacky                                    instructionClass,
683201360Srdivacky                                    insn->opcode);
684249423Sdim
685201360Srdivacky  if (hasModRMExtension) {
686218893Sdim    if (readModRM(insn))
687218893Sdim      return -1;
688249423Sdim
689201360Srdivacky    *instructionID = decode(insn->opcodeType,
690201360Srdivacky                            instructionClass,
691201360Srdivacky                            insn->opcode,
692201360Srdivacky                            insn->modRM);
693201360Srdivacky  } else {
694201360Srdivacky    *instructionID = decode(insn->opcodeType,
695201360Srdivacky                            instructionClass,
696201360Srdivacky                            insn->opcode,
697201360Srdivacky                            0);
698201360Srdivacky  }
699249423Sdim
700201360Srdivacky  return 0;
701201360Srdivacky}
702201360Srdivacky
703201360Srdivacky/*
704201360Srdivacky * is16BitEquivalent - Determines whether two instruction names refer to
705201360Srdivacky * equivalent instructions but one is 16-bit whereas the other is not.
706201360Srdivacky *
707201360Srdivacky * @param orig  - The instruction that is not 16-bit
708201360Srdivacky * @param equiv - The instruction that is 16-bit
709201360Srdivacky */
710243830Sdimstatic BOOL is16BitEquivalent(const char* orig, const char* equiv) {
711201360Srdivacky  off_t i;
712249423Sdim
713206124Srdivacky  for (i = 0;; i++) {
714206124Srdivacky    if (orig[i] == '\0' && equiv[i] == '\0')
715201360Srdivacky      return TRUE;
716206124Srdivacky    if (orig[i] == '\0' || equiv[i] == '\0')
717201360Srdivacky      return FALSE;
718206124Srdivacky    if (orig[i] != equiv[i]) {
719206124Srdivacky      if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
720201360Srdivacky        continue;
721206124Srdivacky      if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
722201360Srdivacky        continue;
723206124Srdivacky      if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
724201360Srdivacky        continue;
725201360Srdivacky      return FALSE;
726201360Srdivacky    }
727201360Srdivacky  }
728201360Srdivacky}
729201360Srdivacky
730201360Srdivacky/*
731249423Sdim * getID - Determines the ID of an instruction, consuming the ModR/M byte as
732249423Sdim *   appropriate for extended and escape opcodes.  Determines the attributes and
733201360Srdivacky *   context for the instruction before doing so.
734201360Srdivacky *
735201360Srdivacky * @param insn  - The instruction whose ID is to be determined.
736201360Srdivacky * @return      - 0 if the ModR/M could be read when needed or was not needed;
737201360Srdivacky *                nonzero otherwise.
738201360Srdivacky */
739243830Sdimstatic int getID(struct InternalInstruction* insn, const void *miiArg) {
740201360Srdivacky  uint8_t attrMask;
741201360Srdivacky  uint16_t instructionID;
742249423Sdim
743201360Srdivacky  dbgprintf(insn, "getID()");
744249423Sdim
745201360Srdivacky  attrMask = ATTR_NONE;
746221345Sdim
747201360Srdivacky  if (insn->mode == MODE_64BIT)
748201360Srdivacky    attrMask |= ATTR_64BIT;
749249423Sdim
750221345Sdim  if (insn->vexSize) {
751221345Sdim    attrMask |= ATTR_VEX;
752221345Sdim
753221345Sdim    if (insn->vexSize == 3) {
754221345Sdim      switch (ppFromVEX3of3(insn->vexPrefix[2])) {
755221345Sdim      case VEX_PREFIX_66:
756249423Sdim        attrMask |= ATTR_OPSIZE;
757221345Sdim        break;
758221345Sdim      case VEX_PREFIX_F3:
759221345Sdim        attrMask |= ATTR_XS;
760221345Sdim        break;
761221345Sdim      case VEX_PREFIX_F2:
762221345Sdim        attrMask |= ATTR_XD;
763221345Sdim        break;
764221345Sdim      }
765249423Sdim
766221345Sdim      if (lFromVEX3of3(insn->vexPrefix[2]))
767221345Sdim        attrMask |= ATTR_VEXL;
768221345Sdim    }
769221345Sdim    else if (insn->vexSize == 2) {
770221345Sdim      switch (ppFromVEX2of2(insn->vexPrefix[1])) {
771221345Sdim      case VEX_PREFIX_66:
772249423Sdim        attrMask |= ATTR_OPSIZE;
773221345Sdim        break;
774221345Sdim      case VEX_PREFIX_F3:
775221345Sdim        attrMask |= ATTR_XS;
776221345Sdim        break;
777221345Sdim      case VEX_PREFIX_F2:
778221345Sdim        attrMask |= ATTR_XD;
779221345Sdim        break;
780221345Sdim      }
781249423Sdim
782221345Sdim      if (lFromVEX2of2(insn->vexPrefix[1]))
783221345Sdim        attrMask |= ATTR_VEXL;
784221345Sdim    }
785221345Sdim    else {
786221345Sdim      return -1;
787221345Sdim    }
788221345Sdim  }
789221345Sdim  else {
790221345Sdim    if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
791221345Sdim      attrMask |= ATTR_OPSIZE;
792234353Sdim    else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
793234353Sdim      attrMask |= ATTR_ADSIZE;
794221345Sdim    else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
795221345Sdim      attrMask |= ATTR_XS;
796221345Sdim    else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
797221345Sdim      attrMask |= ATTR_XD;
798221345Sdim  }
799221345Sdim
800226633Sdim  if (insn->rexPrefix & 0x08)
801226633Sdim    attrMask |= ATTR_REXW;
802234353Sdim
803206124Srdivacky  if (getIDWithAttrMask(&instructionID, insn, attrMask))
804201360Srdivacky    return -1;
805234353Sdim
806201360Srdivacky  /* The following clauses compensate for limitations of the tables. */
807234353Sdim
808234353Sdim  if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) &&
809234353Sdim      !(attrMask & ATTR_OPSIZE)) {
810201360Srdivacky    /*
811226633Sdim     * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit
812226633Sdim     * has precedence since there are no L-bit with W-bit entries in the tables.
813226633Sdim     * So if the L-bit isn't significant we should use the W-bit instead.
814234353Sdim     * We only need to do this if the instruction doesn't specify OpSize since
815234353Sdim     * there is a VEX_L_W_OPSIZE table.
816201360Srdivacky     */
817226633Sdim
818218893Sdim    const struct InstructionSpecifier *spec;
819226633Sdim    uint16_t instructionIDWithWBit;
820226633Sdim    const struct InstructionSpecifier *specWithWBit;
821226633Sdim
822201360Srdivacky    spec = specifierForUID(instructionID);
823226633Sdim
824226633Sdim    if (getIDWithAttrMask(&instructionIDWithWBit,
825201360Srdivacky                          insn,
826226633Sdim                          (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) {
827201360Srdivacky      insn->instructionID = instructionID;
828201360Srdivacky      insn->spec = spec;
829201360Srdivacky      return 0;
830201360Srdivacky    }
831226633Sdim
832226633Sdim    specWithWBit = specifierForUID(instructionIDWithWBit);
833226633Sdim
834226633Sdim    if (instructionID != instructionIDWithWBit) {
835226633Sdim      insn->instructionID = instructionIDWithWBit;
836226633Sdim      insn->spec = specWithWBit;
837201360Srdivacky    } else {
838201360Srdivacky      insn->instructionID = instructionID;
839201360Srdivacky      insn->spec = spec;
840201360Srdivacky    }
841201360Srdivacky    return 0;
842201360Srdivacky  }
843226633Sdim
844201360Srdivacky  if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
845201360Srdivacky    /*
846201360Srdivacky     * The instruction tables make no distinction between instructions that
847201360Srdivacky     * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
848201360Srdivacky     * particular spot (i.e., many MMX operations).  In general we're
849201360Srdivacky     * conservative, but in the specific case where OpSize is present but not
850201360Srdivacky     * in the right place we check if there's a 16-bit operation.
851201360Srdivacky     */
852249423Sdim
853218893Sdim    const struct InstructionSpecifier *spec;
854201360Srdivacky    uint16_t instructionIDWithOpsize;
855234353Sdim    const char *specName, *specWithOpSizeName;
856249423Sdim
857201360Srdivacky    spec = specifierForUID(instructionID);
858249423Sdim
859201360Srdivacky    if (getIDWithAttrMask(&instructionIDWithOpsize,
860201360Srdivacky                          insn,
861201360Srdivacky                          attrMask | ATTR_OPSIZE)) {
862249423Sdim      /*
863201360Srdivacky       * ModRM required with OpSize but not present; give up and return version
864201360Srdivacky       * without OpSize set
865201360Srdivacky       */
866249423Sdim
867201360Srdivacky      insn->instructionID = instructionID;
868201360Srdivacky      insn->spec = spec;
869201360Srdivacky      return 0;
870201360Srdivacky    }
871249423Sdim
872234353Sdim    specName = x86DisassemblerGetInstrName(instructionID, miiArg);
873234353Sdim    specWithOpSizeName =
874234353Sdim      x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);
875234353Sdim
876243830Sdim    if (is16BitEquivalent(specName, specWithOpSizeName)) {
877201360Srdivacky      insn->instructionID = instructionIDWithOpsize;
878234353Sdim      insn->spec = specifierForUID(instructionIDWithOpsize);
879201360Srdivacky    } else {
880201360Srdivacky      insn->instructionID = instructionID;
881201360Srdivacky      insn->spec = spec;
882201360Srdivacky    }
883201360Srdivacky    return 0;
884201360Srdivacky  }
885226633Sdim
886226633Sdim  if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
887226633Sdim      insn->rexPrefix & 0x01) {
888226633Sdim    /*
889226633Sdim     * NOOP shouldn't decode as NOOP if REX.b is set. Instead
890226633Sdim     * it should decode as XCHG %r8, %eax.
891226633Sdim     */
892226633Sdim
893226633Sdim    const struct InstructionSpecifier *spec;
894226633Sdim    uint16_t instructionIDWithNewOpcode;
895226633Sdim    const struct InstructionSpecifier *specWithNewOpcode;
896226633Sdim
897226633Sdim    spec = specifierForUID(instructionID);
898249423Sdim
899226633Sdim    /* Borrow opcode from one of the other XCHGar opcodes */
900226633Sdim    insn->opcode = 0x91;
901249423Sdim
902226633Sdim    if (getIDWithAttrMask(&instructionIDWithNewOpcode,
903226633Sdim                          insn,
904226633Sdim                          attrMask)) {
905226633Sdim      insn->opcode = 0x90;
906226633Sdim
907226633Sdim      insn->instructionID = instructionID;
908226633Sdim      insn->spec = spec;
909226633Sdim      return 0;
910226633Sdim    }
911226633Sdim
912226633Sdim    specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
913226633Sdim
914226633Sdim    /* Change back */
915226633Sdim    insn->opcode = 0x90;
916226633Sdim
917226633Sdim    insn->instructionID = instructionIDWithNewOpcode;
918226633Sdim    insn->spec = specWithNewOpcode;
919226633Sdim
920226633Sdim    return 0;
921226633Sdim  }
922249423Sdim
923201360Srdivacky  insn->instructionID = instructionID;
924201360Srdivacky  insn->spec = specifierForUID(insn->instructionID);
925249423Sdim
926201360Srdivacky  return 0;
927201360Srdivacky}
928201360Srdivacky
929201360Srdivacky/*
930201360Srdivacky * readSIB - Consumes the SIB byte to determine addressing information for an
931201360Srdivacky *   instruction.
932201360Srdivacky *
933201360Srdivacky * @param insn  - The instruction whose SIB byte is to be read.
934201360Srdivacky * @return      - 0 if the SIB byte was successfully read; nonzero otherwise.
935201360Srdivacky */
936201360Srdivackystatic int readSIB(struct InternalInstruction* insn) {
937201360Srdivacky  SIBIndex sibIndexBase = 0;
938201360Srdivacky  SIBBase sibBaseBase = 0;
939201360Srdivacky  uint8_t index, base;
940249423Sdim
941201360Srdivacky  dbgprintf(insn, "readSIB()");
942249423Sdim
943201360Srdivacky  if (insn->consumedSIB)
944201360Srdivacky    return 0;
945249423Sdim
946201360Srdivacky  insn->consumedSIB = TRUE;
947249423Sdim
948201360Srdivacky  switch (insn->addressSize) {
949201360Srdivacky  case 2:
950201360Srdivacky    dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
951201360Srdivacky    return -1;
952201360Srdivacky    break;
953201360Srdivacky  case 4:
954201360Srdivacky    sibIndexBase = SIB_INDEX_EAX;
955201360Srdivacky    sibBaseBase = SIB_BASE_EAX;
956201360Srdivacky    break;
957201360Srdivacky  case 8:
958201360Srdivacky    sibIndexBase = SIB_INDEX_RAX;
959201360Srdivacky    sibBaseBase = SIB_BASE_RAX;
960201360Srdivacky    break;
961201360Srdivacky  }
962201360Srdivacky
963201360Srdivacky  if (consumeByte(insn, &insn->sib))
964201360Srdivacky    return -1;
965249423Sdim
966201360Srdivacky  index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
967249423Sdim
968201360Srdivacky  switch (index) {
969201360Srdivacky  case 0x4:
970201360Srdivacky    insn->sibIndex = SIB_INDEX_NONE;
971201360Srdivacky    break;
972201360Srdivacky  default:
973221345Sdim    insn->sibIndex = (SIBIndex)(sibIndexBase + index);
974201360Srdivacky    if (insn->sibIndex == SIB_INDEX_sib ||
975201360Srdivacky        insn->sibIndex == SIB_INDEX_sib64)
976201360Srdivacky      insn->sibIndex = SIB_INDEX_NONE;
977201360Srdivacky    break;
978201360Srdivacky  }
979249423Sdim
980201360Srdivacky  switch (scaleFromSIB(insn->sib)) {
981201360Srdivacky  case 0:
982201360Srdivacky    insn->sibScale = 1;
983201360Srdivacky    break;
984201360Srdivacky  case 1:
985201360Srdivacky    insn->sibScale = 2;
986201360Srdivacky    break;
987201360Srdivacky  case 2:
988201360Srdivacky    insn->sibScale = 4;
989201360Srdivacky    break;
990201360Srdivacky  case 3:
991201360Srdivacky    insn->sibScale = 8;
992201360Srdivacky    break;
993201360Srdivacky  }
994249423Sdim
995201360Srdivacky  base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
996249423Sdim
997201360Srdivacky  switch (base) {
998201360Srdivacky  case 0x5:
999201360Srdivacky    switch (modFromModRM(insn->modRM)) {
1000201360Srdivacky    case 0x0:
1001201360Srdivacky      insn->eaDisplacement = EA_DISP_32;
1002201360Srdivacky      insn->sibBase = SIB_BASE_NONE;
1003201360Srdivacky      break;
1004201360Srdivacky    case 0x1:
1005201360Srdivacky      insn->eaDisplacement = EA_DISP_8;
1006249423Sdim      insn->sibBase = (insn->addressSize == 4 ?
1007201360Srdivacky                       SIB_BASE_EBP : SIB_BASE_RBP);
1008201360Srdivacky      break;
1009201360Srdivacky    case 0x2:
1010201360Srdivacky      insn->eaDisplacement = EA_DISP_32;
1011249423Sdim      insn->sibBase = (insn->addressSize == 4 ?
1012201360Srdivacky                       SIB_BASE_EBP : SIB_BASE_RBP);
1013201360Srdivacky      break;
1014201360Srdivacky    case 0x3:
1015206124Srdivacky      debug("Cannot have Mod = 0b11 and a SIB byte");
1016206124Srdivacky      return -1;
1017201360Srdivacky    }
1018201360Srdivacky    break;
1019201360Srdivacky  default:
1020221345Sdim    insn->sibBase = (SIBBase)(sibBaseBase + base);
1021201360Srdivacky    break;
1022201360Srdivacky  }
1023249423Sdim
1024201360Srdivacky  return 0;
1025201360Srdivacky}
1026201360Srdivacky
1027201360Srdivacky/*
1028201360Srdivacky * readDisplacement - Consumes the displacement of an instruction.
1029201360Srdivacky *
1030201360Srdivacky * @param insn  - The instruction whose displacement is to be read.
1031249423Sdim * @return      - 0 if the displacement byte was successfully read; nonzero
1032201360Srdivacky *                otherwise.
1033201360Srdivacky */
1034249423Sdimstatic int readDisplacement(struct InternalInstruction* insn) {
1035201360Srdivacky  int8_t d8;
1036201360Srdivacky  int16_t d16;
1037201360Srdivacky  int32_t d32;
1038249423Sdim
1039201360Srdivacky  dbgprintf(insn, "readDisplacement()");
1040249423Sdim
1041201360Srdivacky  if (insn->consumedDisplacement)
1042201360Srdivacky    return 0;
1043249423Sdim
1044201360Srdivacky  insn->consumedDisplacement = TRUE;
1045234353Sdim  insn->displacementOffset = insn->readerCursor - insn->startLocation;
1046249423Sdim
1047201360Srdivacky  switch (insn->eaDisplacement) {
1048201360Srdivacky  case EA_DISP_NONE:
1049201360Srdivacky    insn->consumedDisplacement = FALSE;
1050201360Srdivacky    break;
1051201360Srdivacky  case EA_DISP_8:
1052201360Srdivacky    if (consumeInt8(insn, &d8))
1053201360Srdivacky      return -1;
1054201360Srdivacky    insn->displacement = d8;
1055201360Srdivacky    break;
1056201360Srdivacky  case EA_DISP_16:
1057201360Srdivacky    if (consumeInt16(insn, &d16))
1058201360Srdivacky      return -1;
1059201360Srdivacky    insn->displacement = d16;
1060201360Srdivacky    break;
1061201360Srdivacky  case EA_DISP_32:
1062201360Srdivacky    if (consumeInt32(insn, &d32))
1063201360Srdivacky      return -1;
1064201360Srdivacky    insn->displacement = d32;
1065201360Srdivacky    break;
1066201360Srdivacky  }
1067249423Sdim
1068201360Srdivacky  insn->consumedDisplacement = TRUE;
1069201360Srdivacky  return 0;
1070201360Srdivacky}
1071201360Srdivacky
1072201360Srdivacky/*
1073201360Srdivacky * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
1074201360Srdivacky *   displacement) for an instruction and interprets it.
1075201360Srdivacky *
1076201360Srdivacky * @param insn  - The instruction whose addressing information is to be read.
1077201360Srdivacky * @return      - 0 if the information was successfully read; nonzero otherwise.
1078201360Srdivacky */
1079249423Sdimstatic int readModRM(struct InternalInstruction* insn) {
1080201360Srdivacky  uint8_t mod, rm, reg;
1081249423Sdim
1082201360Srdivacky  dbgprintf(insn, "readModRM()");
1083249423Sdim
1084201360Srdivacky  if (insn->consumedModRM)
1085201360Srdivacky    return 0;
1086249423Sdim
1087218893Sdim  if (consumeByte(insn, &insn->modRM))
1088218893Sdim    return -1;
1089201360Srdivacky  insn->consumedModRM = TRUE;
1090249423Sdim
1091201360Srdivacky  mod     = modFromModRM(insn->modRM);
1092201360Srdivacky  rm      = rmFromModRM(insn->modRM);
1093201360Srdivacky  reg     = regFromModRM(insn->modRM);
1094249423Sdim
1095201360Srdivacky  /*
1096201360Srdivacky   * This goes by insn->registerSize to pick the correct register, which messes
1097201360Srdivacky   * up if we're using (say) XMM or 8-bit register operands.  That gets fixed in
1098201360Srdivacky   * fixupReg().
1099201360Srdivacky   */
1100201360Srdivacky  switch (insn->registerSize) {
1101201360Srdivacky  case 2:
1102201360Srdivacky    insn->regBase = MODRM_REG_AX;
1103201360Srdivacky    insn->eaRegBase = EA_REG_AX;
1104201360Srdivacky    break;
1105201360Srdivacky  case 4:
1106201360Srdivacky    insn->regBase = MODRM_REG_EAX;
1107201360Srdivacky    insn->eaRegBase = EA_REG_EAX;
1108201360Srdivacky    break;
1109201360Srdivacky  case 8:
1110201360Srdivacky    insn->regBase = MODRM_REG_RAX;
1111201360Srdivacky    insn->eaRegBase = EA_REG_RAX;
1112201360Srdivacky    break;
1113201360Srdivacky  }
1114249423Sdim
1115201360Srdivacky  reg |= rFromREX(insn->rexPrefix) << 3;
1116201360Srdivacky  rm  |= bFromREX(insn->rexPrefix) << 3;
1117249423Sdim
1118201360Srdivacky  insn->reg = (Reg)(insn->regBase + reg);
1119249423Sdim
1120201360Srdivacky  switch (insn->addressSize) {
1121201360Srdivacky  case 2:
1122201360Srdivacky    insn->eaBaseBase = EA_BASE_BX_SI;
1123249423Sdim
1124201360Srdivacky    switch (mod) {
1125201360Srdivacky    case 0x0:
1126201360Srdivacky      if (rm == 0x6) {
1127201360Srdivacky        insn->eaBase = EA_BASE_NONE;
1128201360Srdivacky        insn->eaDisplacement = EA_DISP_16;
1129206124Srdivacky        if (readDisplacement(insn))
1130201360Srdivacky          return -1;
1131201360Srdivacky      } else {
1132201360Srdivacky        insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1133201360Srdivacky        insn->eaDisplacement = EA_DISP_NONE;
1134201360Srdivacky      }
1135201360Srdivacky      break;
1136201360Srdivacky    case 0x1:
1137201360Srdivacky      insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1138201360Srdivacky      insn->eaDisplacement = EA_DISP_8;
1139206124Srdivacky      if (readDisplacement(insn))
1140201360Srdivacky        return -1;
1141201360Srdivacky      break;
1142201360Srdivacky    case 0x2:
1143201360Srdivacky      insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1144201360Srdivacky      insn->eaDisplacement = EA_DISP_16;
1145206124Srdivacky      if (readDisplacement(insn))
1146201360Srdivacky        return -1;
1147201360Srdivacky      break;
1148201360Srdivacky    case 0x3:
1149201360Srdivacky      insn->eaBase = (EABase)(insn->eaRegBase + rm);
1150206124Srdivacky      if (readDisplacement(insn))
1151201360Srdivacky        return -1;
1152201360Srdivacky      break;
1153201360Srdivacky    }
1154201360Srdivacky    break;
1155201360Srdivacky  case 4:
1156201360Srdivacky  case 8:
1157201360Srdivacky    insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
1158249423Sdim
1159201360Srdivacky    switch (mod) {
1160201360Srdivacky    case 0x0:
1161201360Srdivacky      insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
1162201360Srdivacky      switch (rm) {
1163201360Srdivacky      case 0x4:
1164201360Srdivacky      case 0xc:   /* in case REXW.b is set */
1165249423Sdim        insn->eaBase = (insn->addressSize == 4 ?
1166201360Srdivacky                        EA_BASE_sib : EA_BASE_sib64);
1167201360Srdivacky        readSIB(insn);
1168206124Srdivacky        if (readDisplacement(insn))
1169201360Srdivacky          return -1;
1170201360Srdivacky        break;
1171201360Srdivacky      case 0x5:
1172201360Srdivacky        insn->eaBase = EA_BASE_NONE;
1173201360Srdivacky        insn->eaDisplacement = EA_DISP_32;
1174206124Srdivacky        if (readDisplacement(insn))
1175201360Srdivacky          return -1;
1176201360Srdivacky        break;
1177201360Srdivacky      default:
1178201360Srdivacky        insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1179201360Srdivacky        break;
1180201360Srdivacky      }
1181201360Srdivacky      break;
1182201360Srdivacky    case 0x1:
1183201360Srdivacky    case 0x2:
1184201360Srdivacky      insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
1185201360Srdivacky      switch (rm) {
1186201360Srdivacky      case 0x4:
1187201360Srdivacky      case 0xc:   /* in case REXW.b is set */
1188201360Srdivacky        insn->eaBase = EA_BASE_sib;
1189201360Srdivacky        readSIB(insn);
1190206124Srdivacky        if (readDisplacement(insn))
1191201360Srdivacky          return -1;
1192201360Srdivacky        break;
1193201360Srdivacky      default:
1194201360Srdivacky        insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1195206124Srdivacky        if (readDisplacement(insn))
1196201360Srdivacky          return -1;
1197201360Srdivacky        break;
1198201360Srdivacky      }
1199201360Srdivacky      break;
1200201360Srdivacky    case 0x3:
1201201360Srdivacky      insn->eaDisplacement = EA_DISP_NONE;
1202201360Srdivacky      insn->eaBase = (EABase)(insn->eaRegBase + rm);
1203201360Srdivacky      break;
1204201360Srdivacky    }
1205201360Srdivacky    break;
1206201360Srdivacky  } /* switch (insn->addressSize) */
1207249423Sdim
1208201360Srdivacky  return 0;
1209201360Srdivacky}
1210201360Srdivacky
1211201360Srdivacky#define GENERIC_FIXUP_FUNC(name, base, prefix)            \
1212201360Srdivacky  static uint8_t name(struct InternalInstruction *insn,   \
1213201360Srdivacky                      OperandType type,                   \
1214201360Srdivacky                      uint8_t index,                      \
1215201360Srdivacky                      uint8_t *valid) {                   \
1216201360Srdivacky    *valid = 1;                                           \
1217201360Srdivacky    switch (type) {                                       \
1218201360Srdivacky    default:                                              \
1219206124Srdivacky      debug("Unhandled register type");                   \
1220206124Srdivacky      *valid = 0;                                         \
1221206124Srdivacky      return 0;                                           \
1222201360Srdivacky    case TYPE_Rv:                                         \
1223201360Srdivacky      return base + index;                                \
1224201360Srdivacky    case TYPE_R8:                                         \
1225206124Srdivacky      if (insn->rexPrefix &&                              \
1226201360Srdivacky         index >= 4 && index <= 7) {                      \
1227201360Srdivacky        return prefix##_SPL + (index - 4);                \
1228201360Srdivacky      } else {                                            \
1229201360Srdivacky        return prefix##_AL + index;                       \
1230201360Srdivacky      }                                                   \
1231201360Srdivacky    case TYPE_R16:                                        \
1232201360Srdivacky      return prefix##_AX + index;                         \
1233201360Srdivacky    case TYPE_R32:                                        \
1234201360Srdivacky      return prefix##_EAX + index;                        \
1235201360Srdivacky    case TYPE_R64:                                        \
1236201360Srdivacky      return prefix##_RAX + index;                        \
1237221345Sdim    case TYPE_XMM256:                                     \
1238221345Sdim      return prefix##_YMM0 + index;                       \
1239201360Srdivacky    case TYPE_XMM128:                                     \
1240201360Srdivacky    case TYPE_XMM64:                                      \
1241201360Srdivacky    case TYPE_XMM32:                                      \
1242201360Srdivacky    case TYPE_XMM:                                        \
1243201360Srdivacky      return prefix##_XMM0 + index;                       \
1244201360Srdivacky    case TYPE_MM64:                                       \
1245201360Srdivacky    case TYPE_MM32:                                       \
1246201360Srdivacky    case TYPE_MM:                                         \
1247206124Srdivacky      if (index > 7)                                      \
1248201360Srdivacky        *valid = 0;                                       \
1249201360Srdivacky      return prefix##_MM0 + index;                        \
1250201360Srdivacky    case TYPE_SEGMENTREG:                                 \
1251206124Srdivacky      if (index > 5)                                      \
1252201360Srdivacky        *valid = 0;                                       \
1253201360Srdivacky      return prefix##_ES + index;                         \
1254201360Srdivacky    case TYPE_DEBUGREG:                                   \
1255206124Srdivacky      if (index > 7)                                      \
1256201360Srdivacky        *valid = 0;                                       \
1257201360Srdivacky      return prefix##_DR0 + index;                        \
1258208599Srdivacky    case TYPE_CONTROLREG:                                 \
1259206124Srdivacky      if (index > 8)                                      \
1260201360Srdivacky        *valid = 0;                                       \
1261208599Srdivacky      return prefix##_CR0 + index;                        \
1262201360Srdivacky    }                                                     \
1263201360Srdivacky  }
1264201360Srdivacky
1265201360Srdivacky/*
1266201360Srdivacky * fixup*Value - Consults an operand type to determine the meaning of the
1267201360Srdivacky *   reg or R/M field.  If the operand is an XMM operand, for example, an
1268201360Srdivacky *   operand would be XMM0 instead of AX, which readModRM() would otherwise
1269201360Srdivacky *   misinterpret it as.
1270201360Srdivacky *
1271201360Srdivacky * @param insn  - The instruction containing the operand.
1272201360Srdivacky * @param type  - The operand type.
1273201360Srdivacky * @param index - The existing value of the field as reported by readModRM().
1274201360Srdivacky * @param valid - The address of a uint8_t.  The target is set to 1 if the
1275201360Srdivacky *                field is valid for the register class; 0 if not.
1276206124Srdivacky * @return      - The proper value.
1277201360Srdivacky */
1278201360SrdivackyGENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase,    MODRM_REG)
1279201360SrdivackyGENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG)
1280201360Srdivacky
1281201360Srdivacky/*
1282201360Srdivacky * fixupReg - Consults an operand specifier to determine which of the
1283201360Srdivacky *   fixup*Value functions to use in correcting readModRM()'ss interpretation.
1284201360Srdivacky *
1285201360Srdivacky * @param insn  - See fixup*Value().
1286201360Srdivacky * @param op    - The operand specifier.
1287201360Srdivacky * @return      - 0 if fixup was successful; -1 if the register returned was
1288201360Srdivacky *                invalid for its class.
1289201360Srdivacky */
1290249423Sdimstatic int fixupReg(struct InternalInstruction *insn,
1291218893Sdim                    const struct OperandSpecifier *op) {
1292201360Srdivacky  uint8_t valid;
1293249423Sdim
1294201360Srdivacky  dbgprintf(insn, "fixupReg()");
1295249423Sdim
1296201360Srdivacky  switch ((OperandEncoding)op->encoding) {
1297201360Srdivacky  default:
1298206124Srdivacky    debug("Expected a REG or R/M encoding in fixupReg");
1299206124Srdivacky    return -1;
1300221345Sdim  case ENCODING_VVVV:
1301221345Sdim    insn->vvvv = (Reg)fixupRegValue(insn,
1302221345Sdim                                    (OperandType)op->type,
1303221345Sdim                                    insn->vvvv,
1304221345Sdim                                    &valid);
1305221345Sdim    if (!valid)
1306221345Sdim      return -1;
1307221345Sdim    break;
1308201360Srdivacky  case ENCODING_REG:
1309201360Srdivacky    insn->reg = (Reg)fixupRegValue(insn,
1310201360Srdivacky                                   (OperandType)op->type,
1311201360Srdivacky                                   insn->reg - insn->regBase,
1312201360Srdivacky                                   &valid);
1313201360Srdivacky    if (!valid)
1314201360Srdivacky      return -1;
1315201360Srdivacky    break;
1316201360Srdivacky  case ENCODING_RM:
1317201360Srdivacky    if (insn->eaBase >= insn->eaRegBase) {
1318201360Srdivacky      insn->eaBase = (EABase)fixupRMValue(insn,
1319201360Srdivacky                                          (OperandType)op->type,
1320201360Srdivacky                                          insn->eaBase - insn->eaRegBase,
1321201360Srdivacky                                          &valid);
1322201360Srdivacky      if (!valid)
1323201360Srdivacky        return -1;
1324201360Srdivacky    }
1325201360Srdivacky    break;
1326201360Srdivacky  }
1327249423Sdim
1328201360Srdivacky  return 0;
1329201360Srdivacky}
1330201360Srdivacky
1331201360Srdivacky/*
1332249423Sdim * readOpcodeModifier - Reads an operand from the opcode field of an
1333201360Srdivacky *   instruction.  Handles AddRegFrm instructions.
1334201360Srdivacky *
1335201360Srdivacky * @param insn    - The instruction whose opcode field is to be read.
1336201360Srdivacky * @param inModRM - Indicates that the opcode field is to be read from the
1337201360Srdivacky *                  ModR/M extension; useful for escape opcodes
1338206124Srdivacky * @return        - 0 on success; nonzero otherwise.
1339201360Srdivacky */
1340206124Srdivackystatic int readOpcodeModifier(struct InternalInstruction* insn) {
1341201360Srdivacky  dbgprintf(insn, "readOpcodeModifier()");
1342249423Sdim
1343201360Srdivacky  if (insn->consumedOpcodeModifier)
1344206124Srdivacky    return 0;
1345249423Sdim
1346201360Srdivacky  insn->consumedOpcodeModifier = TRUE;
1347249423Sdim
1348206124Srdivacky  switch (insn->spec->modifierType) {
1349201360Srdivacky  default:
1350206124Srdivacky    debug("Unknown modifier type.");
1351206124Srdivacky    return -1;
1352201360Srdivacky  case MODIFIER_NONE:
1353206124Srdivacky    debug("No modifier but an operand expects one.");
1354206124Srdivacky    return -1;
1355201360Srdivacky  case MODIFIER_OPCODE:
1356201360Srdivacky    insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
1357206124Srdivacky    return 0;
1358201360Srdivacky  case MODIFIER_MODRM:
1359201360Srdivacky    insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
1360206124Srdivacky    return 0;
1361249423Sdim  }
1362201360Srdivacky}
1363201360Srdivacky
1364201360Srdivacky/*
1365249423Sdim * readOpcodeRegister - Reads an operand from the opcode field of an
1366201360Srdivacky *   instruction and interprets it appropriately given the operand width.
1367201360Srdivacky *   Handles AddRegFrm instructions.
1368201360Srdivacky *
1369201360Srdivacky * @param insn  - See readOpcodeModifier().
1370201360Srdivacky * @param size  - The width (in bytes) of the register being specified.
1371201360Srdivacky *                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1372201360Srdivacky *                RAX.
1373206124Srdivacky * @return      - 0 on success; nonzero otherwise.
1374201360Srdivacky */
1375206124Srdivackystatic int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
1376201360Srdivacky  dbgprintf(insn, "readOpcodeRegister()");
1377201360Srdivacky
1378206124Srdivacky  if (readOpcodeModifier(insn))
1379206124Srdivacky    return -1;
1380249423Sdim
1381201360Srdivacky  if (size == 0)
1382201360Srdivacky    size = insn->registerSize;
1383249423Sdim
1384201360Srdivacky  switch (size) {
1385201360Srdivacky  case 1:
1386249423Sdim    insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
1387201360Srdivacky                                                  | insn->opcodeModifier));
1388249423Sdim    if (insn->rexPrefix &&
1389206124Srdivacky        insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1390206124Srdivacky        insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1391201360Srdivacky      insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1392201360Srdivacky                                   + (insn->opcodeRegister - MODRM_REG_AL - 4));
1393201360Srdivacky    }
1394249423Sdim
1395201360Srdivacky    break;
1396201360Srdivacky  case 2:
1397201360Srdivacky    insn->opcodeRegister = (Reg)(MODRM_REG_AX
1398249423Sdim                                 + ((bFromREX(insn->rexPrefix) << 3)
1399201360Srdivacky                                    | insn->opcodeModifier));
1400201360Srdivacky    break;
1401201360Srdivacky  case 4:
1402206124Srdivacky    insn->opcodeRegister = (Reg)(MODRM_REG_EAX
1403249423Sdim                                 + ((bFromREX(insn->rexPrefix) << 3)
1404201360Srdivacky                                    | insn->opcodeModifier));
1405201360Srdivacky    break;
1406201360Srdivacky  case 8:
1407249423Sdim    insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1408249423Sdim                                 + ((bFromREX(insn->rexPrefix) << 3)
1409201360Srdivacky                                    | insn->opcodeModifier));
1410201360Srdivacky    break;
1411201360Srdivacky  }
1412249423Sdim
1413206124Srdivacky  return 0;
1414201360Srdivacky}
1415201360Srdivacky
1416201360Srdivacky/*
1417201360Srdivacky * readImmediate - Consumes an immediate operand from an instruction, given the
1418201360Srdivacky *   desired operand size.
1419201360Srdivacky *
1420201360Srdivacky * @param insn  - The instruction whose operand is to be read.
1421201360Srdivacky * @param size  - The width (in bytes) of the operand.
1422201360Srdivacky * @return      - 0 if the immediate was successfully consumed; nonzero
1423201360Srdivacky *                otherwise.
1424201360Srdivacky */
1425201360Srdivackystatic int readImmediate(struct InternalInstruction* insn, uint8_t size) {
1426201360Srdivacky  uint8_t imm8;
1427201360Srdivacky  uint16_t imm16;
1428201360Srdivacky  uint32_t imm32;
1429201360Srdivacky  uint64_t imm64;
1430249423Sdim
1431201360Srdivacky  dbgprintf(insn, "readImmediate()");
1432249423Sdim
1433206124Srdivacky  if (insn->numImmediatesConsumed == 2) {
1434206124Srdivacky    debug("Already consumed two immediates");
1435206124Srdivacky    return -1;
1436206124Srdivacky  }
1437249423Sdim
1438201360Srdivacky  if (size == 0)
1439201360Srdivacky    size = insn->immediateSize;
1440201360Srdivacky  else
1441201360Srdivacky    insn->immediateSize = size;
1442234353Sdim  insn->immediateOffset = insn->readerCursor - insn->startLocation;
1443249423Sdim
1444201360Srdivacky  switch (size) {
1445201360Srdivacky  case 1:
1446201360Srdivacky    if (consumeByte(insn, &imm8))
1447201360Srdivacky      return -1;
1448201360Srdivacky    insn->immediates[insn->numImmediatesConsumed] = imm8;
1449201360Srdivacky    break;
1450201360Srdivacky  case 2:
1451201360Srdivacky    if (consumeUInt16(insn, &imm16))
1452201360Srdivacky      return -1;
1453201360Srdivacky    insn->immediates[insn->numImmediatesConsumed] = imm16;
1454201360Srdivacky    break;
1455201360Srdivacky  case 4:
1456201360Srdivacky    if (consumeUInt32(insn, &imm32))
1457201360Srdivacky      return -1;
1458201360Srdivacky    insn->immediates[insn->numImmediatesConsumed] = imm32;
1459201360Srdivacky    break;
1460201360Srdivacky  case 8:
1461201360Srdivacky    if (consumeUInt64(insn, &imm64))
1462201360Srdivacky      return -1;
1463201360Srdivacky    insn->immediates[insn->numImmediatesConsumed] = imm64;
1464201360Srdivacky    break;
1465201360Srdivacky  }
1466249423Sdim
1467201360Srdivacky  insn->numImmediatesConsumed++;
1468249423Sdim
1469201360Srdivacky  return 0;
1470201360Srdivacky}
1471201360Srdivacky
1472201360Srdivacky/*
1473226633Sdim * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
1474221345Sdim *
1475221345Sdim * @param insn  - The instruction whose operand is to be read.
1476226633Sdim * @return      - 0 if the vvvv was successfully consumed; nonzero
1477221345Sdim *                otherwise.
1478221345Sdim */
1479221345Sdimstatic int readVVVV(struct InternalInstruction* insn) {
1480221345Sdim  dbgprintf(insn, "readVVVV()");
1481249423Sdim
1482221345Sdim  if (insn->vexSize == 3)
1483221345Sdim    insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
1484221345Sdim  else if (insn->vexSize == 2)
1485221345Sdim    insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]);
1486221345Sdim  else
1487221345Sdim    return -1;
1488221345Sdim
1489226633Sdim  if (insn->mode != MODE_64BIT)
1490226633Sdim    insn->vvvv &= 0x7;
1491226633Sdim
1492221345Sdim  return 0;
1493221345Sdim}
1494221345Sdim
1495221345Sdim/*
1496201360Srdivacky * readOperands - Consults the specifier for an instruction and consumes all
1497201360Srdivacky *   operands for that instruction, interpreting them as it goes.
1498201360Srdivacky *
1499201360Srdivacky * @param insn  - The instruction whose operands are to be read and interpreted.
1500201360Srdivacky * @return      - 0 if all operands could be read; nonzero otherwise.
1501201360Srdivacky */
1502201360Srdivackystatic int readOperands(struct InternalInstruction* insn) {
1503201360Srdivacky  int index;
1504226633Sdim  int hasVVVV, needVVVV;
1505234353Sdim  int sawRegImm = 0;
1506249423Sdim
1507201360Srdivacky  dbgprintf(insn, "readOperands()");
1508226633Sdim
1509226633Sdim  /* If non-zero vvvv specified, need to make sure one of the operands
1510226633Sdim     uses it. */
1511226633Sdim  hasVVVV = !readVVVV(insn);
1512226633Sdim  needVVVV = hasVVVV && (insn->vvvv != 0);
1513249423Sdim
1514201360Srdivacky  for (index = 0; index < X86_MAX_OPERANDS; ++index) {
1515239462Sdim    switch (x86OperandSets[insn->spec->operands][index].encoding) {
1516201360Srdivacky    case ENCODING_NONE:
1517201360Srdivacky      break;
1518201360Srdivacky    case ENCODING_REG:
1519201360Srdivacky    case ENCODING_RM:
1520201360Srdivacky      if (readModRM(insn))
1521201360Srdivacky        return -1;
1522239462Sdim      if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
1523201360Srdivacky        return -1;
1524201360Srdivacky      break;
1525201360Srdivacky    case ENCODING_CB:
1526201360Srdivacky    case ENCODING_CW:
1527201360Srdivacky    case ENCODING_CD:
1528201360Srdivacky    case ENCODING_CP:
1529201360Srdivacky    case ENCODING_CO:
1530201360Srdivacky    case ENCODING_CT:
1531201360Srdivacky      dbgprintf(insn, "We currently don't hande code-offset encodings");
1532201360Srdivacky      return -1;
1533201360Srdivacky    case ENCODING_IB:
1534234353Sdim      if (sawRegImm) {
1535234353Sdim        /* Saw a register immediate so don't read again and instead split the
1536234353Sdim           previous immediate.  FIXME: This is a hack. */
1537234353Sdim        insn->immediates[insn->numImmediatesConsumed] =
1538234353Sdim          insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
1539234353Sdim        ++insn->numImmediatesConsumed;
1540234353Sdim        break;
1541234353Sdim      }
1542201360Srdivacky      if (readImmediate(insn, 1))
1543201360Srdivacky        return -1;
1544239462Sdim      if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 &&
1545207618Srdivacky          insn->immediates[insn->numImmediatesConsumed - 1] > 7)
1546207618Srdivacky        return -1;
1547239462Sdim      if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 &&
1548234353Sdim          insn->immediates[insn->numImmediatesConsumed - 1] > 31)
1549234353Sdim        return -1;
1550239462Sdim      if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
1551239462Sdim          x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
1552234353Sdim        sawRegImm = 1;
1553201360Srdivacky      break;
1554201360Srdivacky    case ENCODING_IW:
1555201360Srdivacky      if (readImmediate(insn, 2))
1556201360Srdivacky        return -1;
1557201360Srdivacky      break;
1558201360Srdivacky    case ENCODING_ID:
1559201360Srdivacky      if (readImmediate(insn, 4))
1560201360Srdivacky        return -1;
1561201360Srdivacky      break;
1562201360Srdivacky    case ENCODING_IO:
1563201360Srdivacky      if (readImmediate(insn, 8))
1564201360Srdivacky        return -1;
1565201360Srdivacky      break;
1566201360Srdivacky    case ENCODING_Iv:
1567206124Srdivacky      if (readImmediate(insn, insn->immediateSize))
1568206124Srdivacky        return -1;
1569207618Srdivacky      break;
1570201360Srdivacky    case ENCODING_Ia:
1571206124Srdivacky      if (readImmediate(insn, insn->addressSize))
1572206124Srdivacky        return -1;
1573201360Srdivacky      break;
1574201360Srdivacky    case ENCODING_RB:
1575206124Srdivacky      if (readOpcodeRegister(insn, 1))
1576206124Srdivacky        return -1;
1577201360Srdivacky      break;
1578201360Srdivacky    case ENCODING_RW:
1579206124Srdivacky      if (readOpcodeRegister(insn, 2))
1580206124Srdivacky        return -1;
1581201360Srdivacky      break;
1582201360Srdivacky    case ENCODING_RD:
1583206124Srdivacky      if (readOpcodeRegister(insn, 4))
1584206124Srdivacky        return -1;
1585201360Srdivacky      break;
1586201360Srdivacky    case ENCODING_RO:
1587206124Srdivacky      if (readOpcodeRegister(insn, 8))
1588206124Srdivacky        return -1;
1589201360Srdivacky      break;
1590201360Srdivacky    case ENCODING_Rv:
1591206124Srdivacky      if (readOpcodeRegister(insn, 0))
1592206124Srdivacky        return -1;
1593201360Srdivacky      break;
1594201360Srdivacky    case ENCODING_I:
1595206124Srdivacky      if (readOpcodeModifier(insn))
1596206124Srdivacky        return -1;
1597221345Sdim      break;
1598221345Sdim    case ENCODING_VVVV:
1599226633Sdim      needVVVV = 0; /* Mark that we have found a VVVV operand. */
1600226633Sdim      if (!hasVVVV)
1601221345Sdim        return -1;
1602239462Sdim      if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
1603221345Sdim        return -1;
1604221345Sdim      break;
1605201360Srdivacky    case ENCODING_DUP:
1606201360Srdivacky      break;
1607201360Srdivacky    default:
1608201360Srdivacky      dbgprintf(insn, "Encountered an operand with an unknown encoding.");
1609201360Srdivacky      return -1;
1610201360Srdivacky    }
1611201360Srdivacky  }
1612226633Sdim
1613226633Sdim  /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
1614226633Sdim  if (needVVVV) return -1;
1615249423Sdim
1616201360Srdivacky  return 0;
1617201360Srdivacky}
1618201360Srdivacky
1619201360Srdivacky/*
1620201360Srdivacky * decodeInstruction - Reads and interprets a full instruction provided by the
1621201360Srdivacky *   user.
1622201360Srdivacky *
1623249423Sdim * @param insn      - A pointer to the instruction to be populated.  Must be
1624201360Srdivacky *                    pre-allocated.
1625201360Srdivacky * @param reader    - The function to be used to read the instruction's bytes.
1626201360Srdivacky * @param readerArg - A generic argument to be passed to the reader to store
1627201360Srdivacky *                    any internal state.
1628201360Srdivacky * @param logger    - If non-NULL, the function to be used to write log messages
1629201360Srdivacky *                    and warnings.
1630201360Srdivacky * @param loggerArg - A generic argument to be passed to the logger to store
1631201360Srdivacky *                    any internal state.
1632201360Srdivacky * @param startLoc  - The address (in the reader's address space) of the first
1633201360Srdivacky *                    byte in the instruction.
1634201360Srdivacky * @param mode      - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1635201360Srdivacky *                    decode the instruction in.
1636201360Srdivacky * @return          - 0 if the instruction's memory could be read; nonzero if
1637201360Srdivacky *                    not.
1638201360Srdivacky */
1639201360Srdivackyint decodeInstruction(struct InternalInstruction* insn,
1640201360Srdivacky                      byteReader_t reader,
1641243830Sdim                      const void* readerArg,
1642201360Srdivacky                      dlog_t logger,
1643201360Srdivacky                      void* loggerArg,
1644243830Sdim                      const void* miiArg,
1645201360Srdivacky                      uint64_t startLoc,
1646201360Srdivacky                      DisassemblerMode mode) {
1647201360Srdivacky  memset(insn, 0, sizeof(struct InternalInstruction));
1648249423Sdim
1649201360Srdivacky  insn->reader = reader;
1650201360Srdivacky  insn->readerArg = readerArg;
1651201360Srdivacky  insn->dlog = logger;
1652201360Srdivacky  insn->dlogArg = loggerArg;
1653201360Srdivacky  insn->startLocation = startLoc;
1654201360Srdivacky  insn->readerCursor = startLoc;
1655201360Srdivacky  insn->mode = mode;
1656201360Srdivacky  insn->numImmediatesConsumed = 0;
1657249423Sdim
1658201360Srdivacky  if (readPrefixes(insn)       ||
1659201360Srdivacky      readOpcode(insn)         ||
1660234353Sdim      getID(insn, miiArg)      ||
1661201360Srdivacky      insn->instructionID == 0 ||
1662201360Srdivacky      readOperands(insn))
1663201360Srdivacky    return -1;
1664239462Sdim
1665239462Sdim  insn->operands = &x86OperandSets[insn->spec->operands][0];
1666249423Sdim
1667201360Srdivacky  insn->length = insn->readerCursor - insn->startLocation;
1668249423Sdim
1669205407Srdivacky  dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
1670205407Srdivacky            startLoc, insn->readerCursor, insn->length);
1671249423Sdim
1672201360Srdivacky  if (insn->length > 15)
1673201360Srdivacky    dbgprintf(insn, "Instruction exceeds 15-byte limit");
1674249423Sdim
1675201360Srdivacky  return 0;
1676201360Srdivacky}
1677