X86DisassemblerDecoder.h revision 263508
1/*===-- X86DisassemblerDecoderInternal.h - Disassembler decoder ---*- C -*-===* 2 * 3 * The LLVM Compiler Infrastructure 4 * 5 * This file is distributed under the University of Illinois Open Source 6 * License. See LICENSE.TXT for details. 7 * 8 *===----------------------------------------------------------------------===* 9 * 10 * This file is part of the X86 Disassembler. 11 * It contains the public interface of the instruction decoder. 12 * Documentation for the disassembler can be found in X86Disassembler.h. 13 * 14 *===----------------------------------------------------------------------===*/ 15 16#ifndef X86DISASSEMBLERDECODER_H 17#define X86DISASSEMBLERDECODER_H 18 19#ifdef __cplusplus 20extern "C" { 21#endif 22 23#define INSTRUCTION_SPECIFIER_FIELDS \ 24 uint16_t operands; 25 26#define INSTRUCTION_IDS \ 27 uint16_t instructionIDs; 28 29#include "X86DisassemblerDecoderCommon.h" 30 31#undef INSTRUCTION_SPECIFIER_FIELDS 32#undef INSTRUCTION_IDS 33 34/* 35 * Accessor functions for various fields of an Intel instruction 36 */ 37#define modFromModRM(modRM) (((modRM) & 0xc0) >> 6) 38#define regFromModRM(modRM) (((modRM) & 0x38) >> 3) 39#define rmFromModRM(modRM) ((modRM) & 0x7) 40#define scaleFromSIB(sib) (((sib) & 0xc0) >> 6) 41#define indexFromSIB(sib) (((sib) & 0x38) >> 3) 42#define baseFromSIB(sib) ((sib) & 0x7) 43#define wFromREX(rex) (((rex) & 0x8) >> 3) 44#define rFromREX(rex) (((rex) & 0x4) >> 2) 45#define xFromREX(rex) (((rex) & 0x2) >> 1) 46#define bFromREX(rex) ((rex) & 0x1) 47 48#define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7) 49#define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6) 50#define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5) 51#define mmmmmFromVEX2of3(vex) ((vex) & 0x1f) 52#define wFromVEX3of3(vex) (((vex) & 0x80) >> 7) 53#define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3) 54#define lFromVEX3of3(vex) (((vex) & 0x4) >> 2) 55#define ppFromVEX3of3(vex) ((vex) & 0x3) 56 57#define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7) 58#define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3) 59#define lFromVEX2of2(vex) (((vex) & 0x4) >> 2) 60#define ppFromVEX2of2(vex) ((vex) & 0x3) 61 62#define rFromXOP2of3(xop) (((~(xop)) & 0x80) >> 7) 63#define xFromXOP2of3(xop) (((~(xop)) & 0x40) >> 6) 64#define bFromXOP2of3(xop) (((~(xop)) & 0x20) >> 5) 65#define mmmmmFromXOP2of3(xop) ((xop) & 0x1f) 66#define wFromXOP3of3(xop) (((xop) & 0x80) >> 7) 67#define vvvvFromXOP3of3(vex) (((~(vex)) & 0x78) >> 3) 68#define lFromXOP3of3(xop) (((xop) & 0x4) >> 2) 69#define ppFromXOP3of3(xop) ((xop) & 0x3) 70 71/* 72 * These enums represent Intel registers for use by the decoder. 73 */ 74 75#define REGS_8BIT \ 76 ENTRY(AL) \ 77 ENTRY(CL) \ 78 ENTRY(DL) \ 79 ENTRY(BL) \ 80 ENTRY(AH) \ 81 ENTRY(CH) \ 82 ENTRY(DH) \ 83 ENTRY(BH) \ 84 ENTRY(R8B) \ 85 ENTRY(R9B) \ 86 ENTRY(R10B) \ 87 ENTRY(R11B) \ 88 ENTRY(R12B) \ 89 ENTRY(R13B) \ 90 ENTRY(R14B) \ 91 ENTRY(R15B) \ 92 ENTRY(SPL) \ 93 ENTRY(BPL) \ 94 ENTRY(SIL) \ 95 ENTRY(DIL) 96 97#define EA_BASES_16BIT \ 98 ENTRY(BX_SI) \ 99 ENTRY(BX_DI) \ 100 ENTRY(BP_SI) \ 101 ENTRY(BP_DI) \ 102 ENTRY(SI) \ 103 ENTRY(DI) \ 104 ENTRY(BP) \ 105 ENTRY(BX) \ 106 ENTRY(R8W) \ 107 ENTRY(R9W) \ 108 ENTRY(R10W) \ 109 ENTRY(R11W) \ 110 ENTRY(R12W) \ 111 ENTRY(R13W) \ 112 ENTRY(R14W) \ 113 ENTRY(R15W) 114 115#define REGS_16BIT \ 116 ENTRY(AX) \ 117 ENTRY(CX) \ 118 ENTRY(DX) \ 119 ENTRY(BX) \ 120 ENTRY(SP) \ 121 ENTRY(BP) \ 122 ENTRY(SI) \ 123 ENTRY(DI) \ 124 ENTRY(R8W) \ 125 ENTRY(R9W) \ 126 ENTRY(R10W) \ 127 ENTRY(R11W) \ 128 ENTRY(R12W) \ 129 ENTRY(R13W) \ 130 ENTRY(R14W) \ 131 ENTRY(R15W) 132 133#define EA_BASES_32BIT \ 134 ENTRY(EAX) \ 135 ENTRY(ECX) \ 136 ENTRY(EDX) \ 137 ENTRY(EBX) \ 138 ENTRY(sib) \ 139 ENTRY(EBP) \ 140 ENTRY(ESI) \ 141 ENTRY(EDI) \ 142 ENTRY(R8D) \ 143 ENTRY(R9D) \ 144 ENTRY(R10D) \ 145 ENTRY(R11D) \ 146 ENTRY(R12D) \ 147 ENTRY(R13D) \ 148 ENTRY(R14D) \ 149 ENTRY(R15D) 150 151#define REGS_32BIT \ 152 ENTRY(EAX) \ 153 ENTRY(ECX) \ 154 ENTRY(EDX) \ 155 ENTRY(EBX) \ 156 ENTRY(ESP) \ 157 ENTRY(EBP) \ 158 ENTRY(ESI) \ 159 ENTRY(EDI) \ 160 ENTRY(R8D) \ 161 ENTRY(R9D) \ 162 ENTRY(R10D) \ 163 ENTRY(R11D) \ 164 ENTRY(R12D) \ 165 ENTRY(R13D) \ 166 ENTRY(R14D) \ 167 ENTRY(R15D) 168 169#define EA_BASES_64BIT \ 170 ENTRY(RAX) \ 171 ENTRY(RCX) \ 172 ENTRY(RDX) \ 173 ENTRY(RBX) \ 174 ENTRY(sib64) \ 175 ENTRY(RBP) \ 176 ENTRY(RSI) \ 177 ENTRY(RDI) \ 178 ENTRY(R8) \ 179 ENTRY(R9) \ 180 ENTRY(R10) \ 181 ENTRY(R11) \ 182 ENTRY(R12) \ 183 ENTRY(R13) \ 184 ENTRY(R14) \ 185 ENTRY(R15) 186 187#define REGS_64BIT \ 188 ENTRY(RAX) \ 189 ENTRY(RCX) \ 190 ENTRY(RDX) \ 191 ENTRY(RBX) \ 192 ENTRY(RSP) \ 193 ENTRY(RBP) \ 194 ENTRY(RSI) \ 195 ENTRY(RDI) \ 196 ENTRY(R8) \ 197 ENTRY(R9) \ 198 ENTRY(R10) \ 199 ENTRY(R11) \ 200 ENTRY(R12) \ 201 ENTRY(R13) \ 202 ENTRY(R14) \ 203 ENTRY(R15) 204 205#define REGS_MMX \ 206 ENTRY(MM0) \ 207 ENTRY(MM1) \ 208 ENTRY(MM2) \ 209 ENTRY(MM3) \ 210 ENTRY(MM4) \ 211 ENTRY(MM5) \ 212 ENTRY(MM6) \ 213 ENTRY(MM7) 214 215#define REGS_XMM \ 216 ENTRY(XMM0) \ 217 ENTRY(XMM1) \ 218 ENTRY(XMM2) \ 219 ENTRY(XMM3) \ 220 ENTRY(XMM4) \ 221 ENTRY(XMM5) \ 222 ENTRY(XMM6) \ 223 ENTRY(XMM7) \ 224 ENTRY(XMM8) \ 225 ENTRY(XMM9) \ 226 ENTRY(XMM10) \ 227 ENTRY(XMM11) \ 228 ENTRY(XMM12) \ 229 ENTRY(XMM13) \ 230 ENTRY(XMM14) \ 231 ENTRY(XMM15) \ 232 ENTRY(XMM16) \ 233 ENTRY(XMM17) \ 234 ENTRY(XMM18) \ 235 ENTRY(XMM19) \ 236 ENTRY(XMM20) \ 237 ENTRY(XMM21) \ 238 ENTRY(XMM22) \ 239 ENTRY(XMM23) \ 240 ENTRY(XMM24) \ 241 ENTRY(XMM25) \ 242 ENTRY(XMM26) \ 243 ENTRY(XMM27) \ 244 ENTRY(XMM28) \ 245 ENTRY(XMM29) \ 246 ENTRY(XMM30) \ 247 ENTRY(XMM31) 248 249#define REGS_YMM \ 250 ENTRY(YMM0) \ 251 ENTRY(YMM1) \ 252 ENTRY(YMM2) \ 253 ENTRY(YMM3) \ 254 ENTRY(YMM4) \ 255 ENTRY(YMM5) \ 256 ENTRY(YMM6) \ 257 ENTRY(YMM7) \ 258 ENTRY(YMM8) \ 259 ENTRY(YMM9) \ 260 ENTRY(YMM10) \ 261 ENTRY(YMM11) \ 262 ENTRY(YMM12) \ 263 ENTRY(YMM13) \ 264 ENTRY(YMM14) \ 265 ENTRY(YMM15) \ 266 ENTRY(YMM16) \ 267 ENTRY(YMM17) \ 268 ENTRY(YMM18) \ 269 ENTRY(YMM19) \ 270 ENTRY(YMM20) \ 271 ENTRY(YMM21) \ 272 ENTRY(YMM22) \ 273 ENTRY(YMM23) \ 274 ENTRY(YMM24) \ 275 ENTRY(YMM25) \ 276 ENTRY(YMM26) \ 277 ENTRY(YMM27) \ 278 ENTRY(YMM28) \ 279 ENTRY(YMM29) \ 280 ENTRY(YMM30) \ 281 ENTRY(YMM31) 282 283#define REGS_ZMM \ 284 ENTRY(ZMM0) \ 285 ENTRY(ZMM1) \ 286 ENTRY(ZMM2) \ 287 ENTRY(ZMM3) \ 288 ENTRY(ZMM4) \ 289 ENTRY(ZMM5) \ 290 ENTRY(ZMM6) \ 291 ENTRY(ZMM7) \ 292 ENTRY(ZMM8) \ 293 ENTRY(ZMM9) \ 294 ENTRY(ZMM10) \ 295 ENTRY(ZMM11) \ 296 ENTRY(ZMM12) \ 297 ENTRY(ZMM13) \ 298 ENTRY(ZMM14) \ 299 ENTRY(ZMM15) \ 300 ENTRY(ZMM16) \ 301 ENTRY(ZMM17) \ 302 ENTRY(ZMM18) \ 303 ENTRY(ZMM19) \ 304 ENTRY(ZMM20) \ 305 ENTRY(ZMM21) \ 306 ENTRY(ZMM22) \ 307 ENTRY(ZMM23) \ 308 ENTRY(ZMM24) \ 309 ENTRY(ZMM25) \ 310 ENTRY(ZMM26) \ 311 ENTRY(ZMM27) \ 312 ENTRY(ZMM28) \ 313 ENTRY(ZMM29) \ 314 ENTRY(ZMM30) \ 315 ENTRY(ZMM31) 316 317#define REGS_SEGMENT \ 318 ENTRY(ES) \ 319 ENTRY(CS) \ 320 ENTRY(SS) \ 321 ENTRY(DS) \ 322 ENTRY(FS) \ 323 ENTRY(GS) 324 325#define REGS_DEBUG \ 326 ENTRY(DR0) \ 327 ENTRY(DR1) \ 328 ENTRY(DR2) \ 329 ENTRY(DR3) \ 330 ENTRY(DR4) \ 331 ENTRY(DR5) \ 332 ENTRY(DR6) \ 333 ENTRY(DR7) 334 335#define REGS_CONTROL \ 336 ENTRY(CR0) \ 337 ENTRY(CR1) \ 338 ENTRY(CR2) \ 339 ENTRY(CR3) \ 340 ENTRY(CR4) \ 341 ENTRY(CR5) \ 342 ENTRY(CR6) \ 343 ENTRY(CR7) \ 344 ENTRY(CR8) 345 346#define ALL_EA_BASES \ 347 EA_BASES_16BIT \ 348 EA_BASES_32BIT \ 349 EA_BASES_64BIT 350 351#define ALL_SIB_BASES \ 352 REGS_32BIT \ 353 REGS_64BIT 354 355#define ALL_REGS \ 356 REGS_8BIT \ 357 REGS_16BIT \ 358 REGS_32BIT \ 359 REGS_64BIT \ 360 REGS_MMX \ 361 REGS_XMM \ 362 REGS_YMM \ 363 REGS_ZMM \ 364 REGS_SEGMENT \ 365 REGS_DEBUG \ 366 REGS_CONTROL \ 367 ENTRY(RIP) 368 369/* 370 * EABase - All possible values of the base field for effective-address 371 * computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We 372 * distinguish between bases (EA_BASE_*) and registers that just happen to be 373 * referred to when Mod == 0b11 (EA_REG_*). 374 */ 375typedef enum { 376 EA_BASE_NONE, 377#define ENTRY(x) EA_BASE_##x, 378 ALL_EA_BASES 379#undef ENTRY 380#define ENTRY(x) EA_REG_##x, 381 ALL_REGS 382#undef ENTRY 383 EA_max 384} EABase; 385 386/* 387 * SIBIndex - All possible values of the SIB index field. 388 * Borrows entries from ALL_EA_BASES with the special case that 389 * sib is synonymous with NONE. 390 * Vector SIB: index can be XMM or YMM. 391 */ 392typedef enum { 393 SIB_INDEX_NONE, 394#define ENTRY(x) SIB_INDEX_##x, 395 ALL_EA_BASES 396 REGS_XMM 397 REGS_YMM 398 REGS_ZMM 399#undef ENTRY 400 SIB_INDEX_max 401} SIBIndex; 402 403/* 404 * SIBBase - All possible values of the SIB base field. 405 */ 406typedef enum { 407 SIB_BASE_NONE, 408#define ENTRY(x) SIB_BASE_##x, 409 ALL_SIB_BASES 410#undef ENTRY 411 SIB_BASE_max 412} SIBBase; 413 414/* 415 * EADisplacement - Possible displacement types for effective-address 416 * computations. 417 */ 418typedef enum { 419 EA_DISP_NONE, 420 EA_DISP_8, 421 EA_DISP_16, 422 EA_DISP_32 423} EADisplacement; 424 425/* 426 * Reg - All possible values of the reg field in the ModR/M byte. 427 */ 428typedef enum { 429#define ENTRY(x) MODRM_REG_##x, 430 ALL_REGS 431#undef ENTRY 432 MODRM_REG_max 433} Reg; 434 435/* 436 * SegmentOverride - All possible segment overrides. 437 */ 438typedef enum { 439 SEG_OVERRIDE_NONE, 440 SEG_OVERRIDE_CS, 441 SEG_OVERRIDE_SS, 442 SEG_OVERRIDE_DS, 443 SEG_OVERRIDE_ES, 444 SEG_OVERRIDE_FS, 445 SEG_OVERRIDE_GS, 446 SEG_OVERRIDE_max 447} SegmentOverride; 448 449/* 450 * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field 451 */ 452 453typedef enum { 454 VEX_LOB_0F = 0x1, 455 VEX_LOB_0F38 = 0x2, 456 VEX_LOB_0F3A = 0x3 457} VEXLeadingOpcodeByte; 458 459typedef enum { 460 XOP_MAP_SELECT_8 = 0x8, 461 XOP_MAP_SELECT_9 = 0x9, 462 XOP_MAP_SELECT_A = 0xA 463} XOPMapSelect; 464 465/* 466 * VEXPrefixCode - Possible values for the VEX.pp field 467 */ 468 469typedef enum { 470 VEX_PREFIX_NONE = 0x0, 471 VEX_PREFIX_66 = 0x1, 472 VEX_PREFIX_F3 = 0x2, 473 VEX_PREFIX_F2 = 0x3 474} VEXPrefixCode; 475 476typedef enum { 477 TYPE_NO_VEX_XOP = 0x0, 478 TYPE_VEX_2B = 0x1, 479 TYPE_VEX_3B = 0x2, 480 TYPE_XOP = 0x3 481} VEXXOPType; 482 483typedef uint8_t BOOL; 484 485/* 486 * byteReader_t - Type for the byte reader that the consumer must provide to 487 * the decoder. Reads a single byte from the instruction's address space. 488 * @param arg - A baton that the consumer can associate with any internal 489 * state that it needs. 490 * @param byte - A pointer to a single byte in memory that should be set to 491 * contain the value at address. 492 * @param address - The address in the instruction's address space that should 493 * be read from. 494 * @return - -1 if the byte cannot be read for any reason; 0 otherwise. 495 */ 496typedef int (*byteReader_t)(const void* arg, uint8_t* byte, uint64_t address); 497 498/* 499 * dlog_t - Type for the logging function that the consumer can provide to 500 * get debugging output from the decoder. 501 * @param arg - A baton that the consumer can associate with any internal 502 * state that it needs. 503 * @param log - A string that contains the message. Will be reused after 504 * the logger returns. 505 */ 506typedef void (*dlog_t)(void* arg, const char *log); 507 508/* 509 * The x86 internal instruction, which is produced by the decoder. 510 */ 511struct InternalInstruction { 512 /* Reader interface (C) */ 513 byteReader_t reader; 514 /* Opaque value passed to the reader */ 515 const void* readerArg; 516 /* The address of the next byte to read via the reader */ 517 uint64_t readerCursor; 518 519 /* Logger interface (C) */ 520 dlog_t dlog; 521 /* Opaque value passed to the logger */ 522 void* dlogArg; 523 524 /* General instruction information */ 525 526 /* The mode to disassemble for (64-bit, protected, real) */ 527 DisassemblerMode mode; 528 /* The start of the instruction, usable with the reader */ 529 uint64_t startLocation; 530 /* The length of the instruction, in bytes */ 531 size_t length; 532 533 /* Prefix state */ 534 535 /* 1 if the prefix byte corresponding to the entry is present; 0 if not */ 536 uint8_t prefixPresent[0x100]; 537 /* contains the location (for use with the reader) of the prefix byte */ 538 uint64_t prefixLocations[0x100]; 539 /* The value of the VEX/XOP prefix, if present */ 540 uint8_t vexXopPrefix[3]; 541 /* The length of the VEX prefix (0 if not present) */ 542 VEXXOPType vexXopType; 543 /* The value of the REX prefix, if present */ 544 uint8_t rexPrefix; 545 /* The location where a mandatory prefix would have to be (i.e., right before 546 the opcode, or right before the REX prefix if one is present) */ 547 uint64_t necessaryPrefixLocation; 548 /* The segment override type */ 549 SegmentOverride segmentOverride; 550 /* 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease */ 551 BOOL xAcquireRelease; 552 553 /* Sizes of various critical pieces of data, in bytes */ 554 uint8_t registerSize; 555 uint8_t addressSize; 556 uint8_t displacementSize; 557 uint8_t immediateSize; 558 559 /* Offsets from the start of the instruction to the pieces of data, which is 560 needed to find relocation entries for adding symbolic operands */ 561 uint8_t displacementOffset; 562 uint8_t immediateOffset; 563 564 /* opcode state */ 565 566 /* The last byte of the opcode, not counting any ModR/M extension */ 567 uint8_t opcode; 568 /* The ModR/M byte of the instruction, if it is an opcode extension */ 569 uint8_t modRMExtension; 570 571 /* decode state */ 572 573 /* The type of opcode, used for indexing into the array of decode tables */ 574 OpcodeType opcodeType; 575 /* The instruction ID, extracted from the decode table */ 576 uint16_t instructionID; 577 /* The specifier for the instruction, from the instruction info table */ 578 const struct InstructionSpecifier *spec; 579 580 /* state for additional bytes, consumed during operand decode. Pattern: 581 consumed___ indicates that the byte was already consumed and does not 582 need to be consumed again */ 583 584 /* The VEX.vvvv field, which contains a third register operand for some AVX 585 instructions */ 586 Reg vvvv; 587 588 /* The ModR/M byte, which contains most register operands and some portion of 589 all memory operands */ 590 BOOL consumedModRM; 591 uint8_t modRM; 592 593 /* The SIB byte, used for more complex 32- or 64-bit memory operands */ 594 BOOL consumedSIB; 595 uint8_t sib; 596 597 /* The displacement, used for memory operands */ 598 BOOL consumedDisplacement; 599 int32_t displacement; 600 601 /* Immediates. There can be two in some cases */ 602 uint8_t numImmediatesConsumed; 603 uint8_t numImmediatesTranslated; 604 uint64_t immediates[2]; 605 606 /* A register or immediate operand encoded into the opcode */ 607 BOOL consumedOpcodeModifier; 608 uint8_t opcodeModifier; 609 Reg opcodeRegister; 610 611 /* Portions of the ModR/M byte */ 612 613 /* These fields determine the allowable values for the ModR/M fields, which 614 depend on operand and address widths */ 615 EABase eaBaseBase; 616 EABase eaRegBase; 617 Reg regBase; 618 619 /* The Mod and R/M fields can encode a base for an effective address, or a 620 register. These are separated into two fields here */ 621 EABase eaBase; 622 EADisplacement eaDisplacement; 623 /* The reg field always encodes a register */ 624 Reg reg; 625 626 /* SIB state */ 627 SIBIndex sibIndex; 628 uint8_t sibScale; 629 SIBBase sibBase; 630 631 const struct OperandSpecifier *operands; 632}; 633 634/* decodeInstruction - Decode one instruction and store the decoding results in 635 * a buffer provided by the consumer. 636 * @param insn - The buffer to store the instruction in. Allocated by the 637 * consumer. 638 * @param reader - The byteReader_t for the bytes to be read. 639 * @param readerArg - An argument to pass to the reader for storing context 640 * specific to the consumer. May be NULL. 641 * @param logger - The dlog_t to be used in printing status messages from the 642 * disassembler. May be NULL. 643 * @param loggerArg - An argument to pass to the logger for storing context 644 * specific to the logger. May be NULL. 645 * @param startLoc - The address (in the reader's address space) of the first 646 * byte in the instruction. 647 * @param mode - The mode (16-bit, 32-bit, 64-bit) to decode in. 648 * @return - Nonzero if there was an error during decode, 0 otherwise. 649 */ 650int decodeInstruction(struct InternalInstruction* insn, 651 byteReader_t reader, 652 const void* readerArg, 653 dlog_t logger, 654 void* loggerArg, 655 const void* miiArg, 656 uint64_t startLoc, 657 DisassemblerMode mode); 658 659/* x86DisassemblerDebug - C-accessible function for printing a message to 660 * debugs() 661 * @param file - The name of the file printing the debug message. 662 * @param line - The line number that printed the debug message. 663 * @param s - The message to print. 664 */ 665 666void x86DisassemblerDebug(const char *file, 667 unsigned line, 668 const char *s); 669 670const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii); 671 672#ifdef __cplusplus 673} 674#endif 675 676#endif 677