X86DisassemblerDecoder.h revision 263508
1/*===-- X86DisassemblerDecoderInternal.h - Disassembler decoder ---*- C -*-===*
2 *
3 *                     The LLVM Compiler Infrastructure
4 *
5 * This file is distributed under the University of Illinois Open Source
6 * License. See LICENSE.TXT for details.
7 *
8 *===----------------------------------------------------------------------===*
9 *
10 * This file is part of the X86 Disassembler.
11 * It contains the public interface of the instruction decoder.
12 * Documentation for the disassembler can be found in X86Disassembler.h.
13 *
14 *===----------------------------------------------------------------------===*/
15
16#ifndef X86DISASSEMBLERDECODER_H
17#define X86DISASSEMBLERDECODER_H
18
19#ifdef __cplusplus
20extern "C" {
21#endif
22
23#define INSTRUCTION_SPECIFIER_FIELDS \
24  uint16_t operands;
25
26#define INSTRUCTION_IDS     \
27  uint16_t instructionIDs;
28
29#include "X86DisassemblerDecoderCommon.h"
30
31#undef INSTRUCTION_SPECIFIER_FIELDS
32#undef INSTRUCTION_IDS
33
34/*
35 * Accessor functions for various fields of an Intel instruction
36 */
37#define modFromModRM(modRM)  (((modRM) & 0xc0) >> 6)
38#define regFromModRM(modRM)  (((modRM) & 0x38) >> 3)
39#define rmFromModRM(modRM)   ((modRM) & 0x7)
40#define scaleFromSIB(sib)    (((sib) & 0xc0) >> 6)
41#define indexFromSIB(sib)    (((sib) & 0x38) >> 3)
42#define baseFromSIB(sib)     ((sib) & 0x7)
43#define wFromREX(rex)        (((rex) & 0x8) >> 3)
44#define rFromREX(rex)        (((rex) & 0x4) >> 2)
45#define xFromREX(rex)        (((rex) & 0x2) >> 1)
46#define bFromREX(rex)        ((rex) & 0x1)
47
48#define rFromVEX2of3(vex)       (((~(vex)) & 0x80) >> 7)
49#define xFromVEX2of3(vex)       (((~(vex)) & 0x40) >> 6)
50#define bFromVEX2of3(vex)       (((~(vex)) & 0x20) >> 5)
51#define mmmmmFromVEX2of3(vex)   ((vex) & 0x1f)
52#define wFromVEX3of3(vex)       (((vex) & 0x80) >> 7)
53#define vvvvFromVEX3of3(vex)    (((~(vex)) & 0x78) >> 3)
54#define lFromVEX3of3(vex)       (((vex) & 0x4) >> 2)
55#define ppFromVEX3of3(vex)      ((vex) & 0x3)
56
57#define rFromVEX2of2(vex)       (((~(vex)) & 0x80) >> 7)
58#define vvvvFromVEX2of2(vex)    (((~(vex)) & 0x78) >> 3)
59#define lFromVEX2of2(vex)       (((vex) & 0x4) >> 2)
60#define ppFromVEX2of2(vex)      ((vex) & 0x3)
61
62#define rFromXOP2of3(xop)       (((~(xop)) & 0x80) >> 7)
63#define xFromXOP2of3(xop)       (((~(xop)) & 0x40) >> 6)
64#define bFromXOP2of3(xop)       (((~(xop)) & 0x20) >> 5)
65#define mmmmmFromXOP2of3(xop)   ((xop) & 0x1f)
66#define wFromXOP3of3(xop)       (((xop) & 0x80) >> 7)
67#define vvvvFromXOP3of3(vex)    (((~(vex)) & 0x78) >> 3)
68#define lFromXOP3of3(xop)       (((xop) & 0x4) >> 2)
69#define ppFromXOP3of3(xop)      ((xop) & 0x3)
70
71/*
72 * These enums represent Intel registers for use by the decoder.
73 */
74
75#define REGS_8BIT     \
76  ENTRY(AL)           \
77  ENTRY(CL)           \
78  ENTRY(DL)           \
79  ENTRY(BL)           \
80  ENTRY(AH)           \
81  ENTRY(CH)           \
82  ENTRY(DH)           \
83  ENTRY(BH)           \
84  ENTRY(R8B)          \
85  ENTRY(R9B)          \
86  ENTRY(R10B)         \
87  ENTRY(R11B)         \
88  ENTRY(R12B)         \
89  ENTRY(R13B)         \
90  ENTRY(R14B)         \
91  ENTRY(R15B)         \
92  ENTRY(SPL)          \
93  ENTRY(BPL)          \
94  ENTRY(SIL)          \
95  ENTRY(DIL)
96
97#define EA_BASES_16BIT  \
98  ENTRY(BX_SI)          \
99  ENTRY(BX_DI)          \
100  ENTRY(BP_SI)          \
101  ENTRY(BP_DI)          \
102  ENTRY(SI)             \
103  ENTRY(DI)             \
104  ENTRY(BP)             \
105  ENTRY(BX)             \
106  ENTRY(R8W)            \
107  ENTRY(R9W)            \
108  ENTRY(R10W)           \
109  ENTRY(R11W)           \
110  ENTRY(R12W)           \
111  ENTRY(R13W)           \
112  ENTRY(R14W)           \
113  ENTRY(R15W)
114
115#define REGS_16BIT    \
116  ENTRY(AX)           \
117  ENTRY(CX)           \
118  ENTRY(DX)           \
119  ENTRY(BX)           \
120  ENTRY(SP)           \
121  ENTRY(BP)           \
122  ENTRY(SI)           \
123  ENTRY(DI)           \
124  ENTRY(R8W)          \
125  ENTRY(R9W)          \
126  ENTRY(R10W)         \
127  ENTRY(R11W)         \
128  ENTRY(R12W)         \
129  ENTRY(R13W)         \
130  ENTRY(R14W)         \
131  ENTRY(R15W)
132
133#define EA_BASES_32BIT  \
134  ENTRY(EAX)            \
135  ENTRY(ECX)            \
136  ENTRY(EDX)            \
137  ENTRY(EBX)            \
138  ENTRY(sib)            \
139  ENTRY(EBP)            \
140  ENTRY(ESI)            \
141  ENTRY(EDI)            \
142  ENTRY(R8D)            \
143  ENTRY(R9D)            \
144  ENTRY(R10D)           \
145  ENTRY(R11D)           \
146  ENTRY(R12D)           \
147  ENTRY(R13D)           \
148  ENTRY(R14D)           \
149  ENTRY(R15D)
150
151#define REGS_32BIT  \
152  ENTRY(EAX)        \
153  ENTRY(ECX)        \
154  ENTRY(EDX)        \
155  ENTRY(EBX)        \
156  ENTRY(ESP)        \
157  ENTRY(EBP)        \
158  ENTRY(ESI)        \
159  ENTRY(EDI)        \
160  ENTRY(R8D)        \
161  ENTRY(R9D)        \
162  ENTRY(R10D)       \
163  ENTRY(R11D)       \
164  ENTRY(R12D)       \
165  ENTRY(R13D)       \
166  ENTRY(R14D)       \
167  ENTRY(R15D)
168
169#define EA_BASES_64BIT  \
170  ENTRY(RAX)            \
171  ENTRY(RCX)            \
172  ENTRY(RDX)            \
173  ENTRY(RBX)            \
174  ENTRY(sib64)          \
175  ENTRY(RBP)            \
176  ENTRY(RSI)            \
177  ENTRY(RDI)            \
178  ENTRY(R8)             \
179  ENTRY(R9)             \
180  ENTRY(R10)            \
181  ENTRY(R11)            \
182  ENTRY(R12)            \
183  ENTRY(R13)            \
184  ENTRY(R14)            \
185  ENTRY(R15)
186
187#define REGS_64BIT  \
188  ENTRY(RAX)        \
189  ENTRY(RCX)        \
190  ENTRY(RDX)        \
191  ENTRY(RBX)        \
192  ENTRY(RSP)        \
193  ENTRY(RBP)        \
194  ENTRY(RSI)        \
195  ENTRY(RDI)        \
196  ENTRY(R8)         \
197  ENTRY(R9)         \
198  ENTRY(R10)        \
199  ENTRY(R11)        \
200  ENTRY(R12)        \
201  ENTRY(R13)        \
202  ENTRY(R14)        \
203  ENTRY(R15)
204
205#define REGS_MMX  \
206  ENTRY(MM0)      \
207  ENTRY(MM1)      \
208  ENTRY(MM2)      \
209  ENTRY(MM3)      \
210  ENTRY(MM4)      \
211  ENTRY(MM5)      \
212  ENTRY(MM6)      \
213  ENTRY(MM7)
214
215#define REGS_XMM  \
216  ENTRY(XMM0)     \
217  ENTRY(XMM1)     \
218  ENTRY(XMM2)     \
219  ENTRY(XMM3)     \
220  ENTRY(XMM4)     \
221  ENTRY(XMM5)     \
222  ENTRY(XMM6)     \
223  ENTRY(XMM7)     \
224  ENTRY(XMM8)     \
225  ENTRY(XMM9)     \
226  ENTRY(XMM10)    \
227  ENTRY(XMM11)    \
228  ENTRY(XMM12)    \
229  ENTRY(XMM13)    \
230  ENTRY(XMM14)    \
231  ENTRY(XMM15)    \
232  ENTRY(XMM16)    \
233  ENTRY(XMM17)    \
234  ENTRY(XMM18)    \
235  ENTRY(XMM19)    \
236  ENTRY(XMM20)    \
237  ENTRY(XMM21)    \
238  ENTRY(XMM22)    \
239  ENTRY(XMM23)    \
240  ENTRY(XMM24)    \
241  ENTRY(XMM25)    \
242  ENTRY(XMM26)    \
243  ENTRY(XMM27)    \
244  ENTRY(XMM28)    \
245  ENTRY(XMM29)    \
246  ENTRY(XMM30)    \
247  ENTRY(XMM31)
248
249#define REGS_YMM  \
250  ENTRY(YMM0)     \
251  ENTRY(YMM1)     \
252  ENTRY(YMM2)     \
253  ENTRY(YMM3)     \
254  ENTRY(YMM4)     \
255  ENTRY(YMM5)     \
256  ENTRY(YMM6)     \
257  ENTRY(YMM7)     \
258  ENTRY(YMM8)     \
259  ENTRY(YMM9)     \
260  ENTRY(YMM10)    \
261  ENTRY(YMM11)    \
262  ENTRY(YMM12)    \
263  ENTRY(YMM13)    \
264  ENTRY(YMM14)    \
265  ENTRY(YMM15)    \
266  ENTRY(YMM16)    \
267  ENTRY(YMM17)    \
268  ENTRY(YMM18)    \
269  ENTRY(YMM19)    \
270  ENTRY(YMM20)    \
271  ENTRY(YMM21)    \
272  ENTRY(YMM22)    \
273  ENTRY(YMM23)    \
274  ENTRY(YMM24)    \
275  ENTRY(YMM25)    \
276  ENTRY(YMM26)    \
277  ENTRY(YMM27)    \
278  ENTRY(YMM28)    \
279  ENTRY(YMM29)    \
280  ENTRY(YMM30)    \
281  ENTRY(YMM31)
282
283#define REGS_ZMM  \
284  ENTRY(ZMM0)     \
285  ENTRY(ZMM1)     \
286  ENTRY(ZMM2)     \
287  ENTRY(ZMM3)     \
288  ENTRY(ZMM4)     \
289  ENTRY(ZMM5)     \
290  ENTRY(ZMM6)     \
291  ENTRY(ZMM7)     \
292  ENTRY(ZMM8)     \
293  ENTRY(ZMM9)     \
294  ENTRY(ZMM10)    \
295  ENTRY(ZMM11)    \
296  ENTRY(ZMM12)    \
297  ENTRY(ZMM13)    \
298  ENTRY(ZMM14)    \
299  ENTRY(ZMM15)    \
300  ENTRY(ZMM16)    \
301  ENTRY(ZMM17)    \
302  ENTRY(ZMM18)    \
303  ENTRY(ZMM19)    \
304  ENTRY(ZMM20)    \
305  ENTRY(ZMM21)    \
306  ENTRY(ZMM22)    \
307  ENTRY(ZMM23)    \
308  ENTRY(ZMM24)    \
309  ENTRY(ZMM25)    \
310  ENTRY(ZMM26)    \
311  ENTRY(ZMM27)    \
312  ENTRY(ZMM28)    \
313  ENTRY(ZMM29)    \
314  ENTRY(ZMM30)    \
315  ENTRY(ZMM31)
316
317#define REGS_SEGMENT \
318  ENTRY(ES)          \
319  ENTRY(CS)          \
320  ENTRY(SS)          \
321  ENTRY(DS)          \
322  ENTRY(FS)          \
323  ENTRY(GS)
324
325#define REGS_DEBUG  \
326  ENTRY(DR0)        \
327  ENTRY(DR1)        \
328  ENTRY(DR2)        \
329  ENTRY(DR3)        \
330  ENTRY(DR4)        \
331  ENTRY(DR5)        \
332  ENTRY(DR6)        \
333  ENTRY(DR7)
334
335#define REGS_CONTROL  \
336  ENTRY(CR0)          \
337  ENTRY(CR1)          \
338  ENTRY(CR2)          \
339  ENTRY(CR3)          \
340  ENTRY(CR4)          \
341  ENTRY(CR5)          \
342  ENTRY(CR6)          \
343  ENTRY(CR7)          \
344  ENTRY(CR8)
345
346#define ALL_EA_BASES  \
347  EA_BASES_16BIT      \
348  EA_BASES_32BIT      \
349  EA_BASES_64BIT
350
351#define ALL_SIB_BASES \
352  REGS_32BIT          \
353  REGS_64BIT
354
355#define ALL_REGS      \
356  REGS_8BIT           \
357  REGS_16BIT          \
358  REGS_32BIT          \
359  REGS_64BIT          \
360  REGS_MMX            \
361  REGS_XMM            \
362  REGS_YMM            \
363  REGS_ZMM            \
364  REGS_SEGMENT        \
365  REGS_DEBUG          \
366  REGS_CONTROL        \
367  ENTRY(RIP)
368
369/*
370 * EABase - All possible values of the base field for effective-address
371 *   computations, a.k.a. the Mod and R/M fields of the ModR/M byte.  We
372 *   distinguish between bases (EA_BASE_*) and registers that just happen to be
373 *   referred to when Mod == 0b11 (EA_REG_*).
374 */
375typedef enum {
376  EA_BASE_NONE,
377#define ENTRY(x) EA_BASE_##x,
378  ALL_EA_BASES
379#undef ENTRY
380#define ENTRY(x) EA_REG_##x,
381  ALL_REGS
382#undef ENTRY
383  EA_max
384} EABase;
385
386/*
387 * SIBIndex - All possible values of the SIB index field.
388 *   Borrows entries from ALL_EA_BASES with the special case that
389 *   sib is synonymous with NONE.
390 * Vector SIB: index can be XMM or YMM.
391 */
392typedef enum {
393  SIB_INDEX_NONE,
394#define ENTRY(x) SIB_INDEX_##x,
395  ALL_EA_BASES
396  REGS_XMM
397  REGS_YMM
398  REGS_ZMM
399#undef ENTRY
400  SIB_INDEX_max
401} SIBIndex;
402
403/*
404 * SIBBase - All possible values of the SIB base field.
405 */
406typedef enum {
407  SIB_BASE_NONE,
408#define ENTRY(x) SIB_BASE_##x,
409  ALL_SIB_BASES
410#undef ENTRY
411  SIB_BASE_max
412} SIBBase;
413
414/*
415 * EADisplacement - Possible displacement types for effective-address
416 *   computations.
417 */
418typedef enum {
419  EA_DISP_NONE,
420  EA_DISP_8,
421  EA_DISP_16,
422  EA_DISP_32
423} EADisplacement;
424
425/*
426 * Reg - All possible values of the reg field in the ModR/M byte.
427 */
428typedef enum {
429#define ENTRY(x) MODRM_REG_##x,
430  ALL_REGS
431#undef ENTRY
432  MODRM_REG_max
433} Reg;
434
435/*
436 * SegmentOverride - All possible segment overrides.
437 */
438typedef enum {
439  SEG_OVERRIDE_NONE,
440  SEG_OVERRIDE_CS,
441  SEG_OVERRIDE_SS,
442  SEG_OVERRIDE_DS,
443  SEG_OVERRIDE_ES,
444  SEG_OVERRIDE_FS,
445  SEG_OVERRIDE_GS,
446  SEG_OVERRIDE_max
447} SegmentOverride;
448
449/*
450 * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
451 */
452
453typedef enum {
454  VEX_LOB_0F = 0x1,
455  VEX_LOB_0F38 = 0x2,
456  VEX_LOB_0F3A = 0x3
457} VEXLeadingOpcodeByte;
458
459typedef enum {
460  XOP_MAP_SELECT_8 = 0x8,
461  XOP_MAP_SELECT_9 = 0x9,
462  XOP_MAP_SELECT_A = 0xA
463} XOPMapSelect;
464
465/*
466 * VEXPrefixCode - Possible values for the VEX.pp field
467 */
468
469typedef enum {
470  VEX_PREFIX_NONE = 0x0,
471  VEX_PREFIX_66 = 0x1,
472  VEX_PREFIX_F3 = 0x2,
473  VEX_PREFIX_F2 = 0x3
474} VEXPrefixCode;
475
476typedef enum {
477  TYPE_NO_VEX_XOP = 0x0,
478  TYPE_VEX_2B = 0x1,
479  TYPE_VEX_3B = 0x2,
480  TYPE_XOP = 0x3
481} VEXXOPType;
482
483typedef uint8_t BOOL;
484
485/*
486 * byteReader_t - Type for the byte reader that the consumer must provide to
487 *   the decoder.  Reads a single byte from the instruction's address space.
488 * @param arg     - A baton that the consumer can associate with any internal
489 *                  state that it needs.
490 * @param byte    - A pointer to a single byte in memory that should be set to
491 *                  contain the value at address.
492 * @param address - The address in the instruction's address space that should
493 *                  be read from.
494 * @return        - -1 if the byte cannot be read for any reason; 0 otherwise.
495 */
496typedef int (*byteReader_t)(const void* arg, uint8_t* byte, uint64_t address);
497
498/*
499 * dlog_t - Type for the logging function that the consumer can provide to
500 *   get debugging output from the decoder.
501 * @param arg     - A baton that the consumer can associate with any internal
502 *                  state that it needs.
503 * @param log     - A string that contains the message.  Will be reused after
504 *                  the logger returns.
505 */
506typedef void (*dlog_t)(void* arg, const char *log);
507
508/*
509 * The x86 internal instruction, which is produced by the decoder.
510 */
511struct InternalInstruction {
512  /* Reader interface (C) */
513  byteReader_t reader;
514  /* Opaque value passed to the reader */
515  const void* readerArg;
516  /* The address of the next byte to read via the reader */
517  uint64_t readerCursor;
518
519  /* Logger interface (C) */
520  dlog_t dlog;
521  /* Opaque value passed to the logger */
522  void* dlogArg;
523
524  /* General instruction information */
525
526  /* The mode to disassemble for (64-bit, protected, real) */
527  DisassemblerMode mode;
528  /* The start of the instruction, usable with the reader */
529  uint64_t startLocation;
530  /* The length of the instruction, in bytes */
531  size_t length;
532
533  /* Prefix state */
534
535  /* 1 if the prefix byte corresponding to the entry is present; 0 if not */
536  uint8_t prefixPresent[0x100];
537  /* contains the location (for use with the reader) of the prefix byte */
538  uint64_t prefixLocations[0x100];
539  /* The value of the VEX/XOP prefix, if present */
540  uint8_t vexXopPrefix[3];
541  /* The length of the VEX prefix (0 if not present) */
542  VEXXOPType vexXopType;
543  /* The value of the REX prefix, if present */
544  uint8_t rexPrefix;
545  /* The location where a mandatory prefix would have to be (i.e., right before
546     the opcode, or right before the REX prefix if one is present) */
547  uint64_t necessaryPrefixLocation;
548  /* The segment override type */
549  SegmentOverride segmentOverride;
550  /* 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease */
551  BOOL xAcquireRelease;
552
553  /* Sizes of various critical pieces of data, in bytes */
554  uint8_t registerSize;
555  uint8_t addressSize;
556  uint8_t displacementSize;
557  uint8_t immediateSize;
558
559  /* Offsets from the start of the instruction to the pieces of data, which is
560     needed to find relocation entries for adding symbolic operands */
561  uint8_t displacementOffset;
562  uint8_t immediateOffset;
563
564  /* opcode state */
565
566  /* The last byte of the opcode, not counting any ModR/M extension */
567  uint8_t opcode;
568  /* The ModR/M byte of the instruction, if it is an opcode extension */
569  uint8_t modRMExtension;
570
571  /* decode state */
572
573  /* The type of opcode, used for indexing into the array of decode tables */
574  OpcodeType opcodeType;
575  /* The instruction ID, extracted from the decode table */
576  uint16_t instructionID;
577  /* The specifier for the instruction, from the instruction info table */
578  const struct InstructionSpecifier *spec;
579
580  /* state for additional bytes, consumed during operand decode.  Pattern:
581     consumed___ indicates that the byte was already consumed and does not
582     need to be consumed again */
583
584  /* The VEX.vvvv field, which contains a third register operand for some AVX
585     instructions */
586  Reg                           vvvv;
587
588  /* The ModR/M byte, which contains most register operands and some portion of
589     all memory operands */
590  BOOL                          consumedModRM;
591  uint8_t                       modRM;
592
593  /* The SIB byte, used for more complex 32- or 64-bit memory operands */
594  BOOL                          consumedSIB;
595  uint8_t                       sib;
596
597  /* The displacement, used for memory operands */
598  BOOL                          consumedDisplacement;
599  int32_t                       displacement;
600
601  /* Immediates.  There can be two in some cases */
602  uint8_t                       numImmediatesConsumed;
603  uint8_t                       numImmediatesTranslated;
604  uint64_t                      immediates[2];
605
606  /* A register or immediate operand encoded into the opcode */
607  BOOL                          consumedOpcodeModifier;
608  uint8_t                       opcodeModifier;
609  Reg                           opcodeRegister;
610
611  /* Portions of the ModR/M byte */
612
613  /* These fields determine the allowable values for the ModR/M fields, which
614     depend on operand and address widths */
615  EABase                        eaBaseBase;
616  EABase                        eaRegBase;
617  Reg                           regBase;
618
619  /* The Mod and R/M fields can encode a base for an effective address, or a
620     register.  These are separated into two fields here */
621  EABase                        eaBase;
622  EADisplacement                eaDisplacement;
623  /* The reg field always encodes a register */
624  Reg                           reg;
625
626  /* SIB state */
627  SIBIndex                      sibIndex;
628  uint8_t                       sibScale;
629  SIBBase                       sibBase;
630
631  const struct OperandSpecifier *operands;
632};
633
634/* decodeInstruction - Decode one instruction and store the decoding results in
635 *   a buffer provided by the consumer.
636 * @param insn      - The buffer to store the instruction in.  Allocated by the
637 *                    consumer.
638 * @param reader    - The byteReader_t for the bytes to be read.
639 * @param readerArg - An argument to pass to the reader for storing context
640 *                    specific to the consumer.  May be NULL.
641 * @param logger    - The dlog_t to be used in printing status messages from the
642 *                    disassembler.  May be NULL.
643 * @param loggerArg - An argument to pass to the logger for storing context
644 *                    specific to the logger.  May be NULL.
645 * @param startLoc  - The address (in the reader's address space) of the first
646 *                    byte in the instruction.
647 * @param mode      - The mode (16-bit, 32-bit, 64-bit) to decode in.
648 * @return          - Nonzero if there was an error during decode, 0 otherwise.
649 */
650int decodeInstruction(struct InternalInstruction* insn,
651                      byteReader_t reader,
652                      const void* readerArg,
653                      dlog_t logger,
654                      void* loggerArg,
655                      const void* miiArg,
656                      uint64_t startLoc,
657                      DisassemblerMode mode);
658
659/* x86DisassemblerDebug - C-accessible function for printing a message to
660 *   debugs()
661 * @param file  - The name of the file printing the debug message.
662 * @param line  - The line number that printed the debug message.
663 * @param s     - The message to print.
664 */
665
666void x86DisassemblerDebug(const char *file,
667                          unsigned line,
668                          const char *s);
669
670const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii);
671
672#ifdef __cplusplus
673}
674#endif
675
676#endif
677