1//===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is part of the X86 Disassembler.
11// It contains code to translate the data produced by the decoder into
12//  MCInsts.
13// Documentation for the disassembler can be found in X86Disassembler.h.
14//
15//===----------------------------------------------------------------------===//
16
17#include "X86Disassembler.h"
18#include "X86DisassemblerDecoder.h"
19#include "llvm/MC/MCContext.h"
20#include "llvm/MC/MCDisassembler.h"
21#include "llvm/MC/MCExpr.h"
22#include "llvm/MC/MCInst.h"
23#include "llvm/MC/MCInstrInfo.h"
24#include "llvm/MC/MCSubtargetInfo.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/MemoryObject.h"
27#include "llvm/Support/TargetRegistry.h"
28#include "llvm/Support/raw_ostream.h"
29
30#define GET_REGINFO_ENUM
31#include "X86GenRegisterInfo.inc"
32#define GET_INSTRINFO_ENUM
33#include "X86GenInstrInfo.inc"
34
35using namespace llvm;
36using namespace llvm::X86Disassembler;
37
38void x86DisassemblerDebug(const char *file,
39                          unsigned line,
40                          const char *s) {
41  dbgs() << file << ":" << line << ": " << s;
42}
43
44const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii) {
45  const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii);
46  return MII->getName(Opcode);
47}
48
49#define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s));
50
51namespace llvm {
52
53// Fill-ins to make the compiler happy.  These constants are never actually
54//   assigned; they are just filler to make an automatically-generated switch
55//   statement work.
56namespace X86 {
57  enum {
58    BX_SI = 500,
59    BX_DI = 501,
60    BP_SI = 502,
61    BP_DI = 503,
62    sib   = 504,
63    sib64 = 505
64  };
65}
66
67extern Target TheX86_32Target, TheX86_64Target;
68
69}
70
71static bool translateInstruction(MCInst &target,
72                                InternalInstruction &source,
73                                const MCDisassembler *Dis);
74
75X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI,
76                                               DisassemblerMode mode,
77                                               const MCInstrInfo *MII)
78  : MCDisassembler(STI), MII(MII), fMode(mode) {}
79
80X86GenericDisassembler::~X86GenericDisassembler() {
81  delete MII;
82}
83
84/// regionReader - a callback function that wraps the readByte method from
85///   MemoryObject.
86///
87/// @param arg      - The generic callback parameter.  In this case, this should
88///                   be a pointer to a MemoryObject.
89/// @param byte     - A pointer to the byte to be read.
90/// @param address  - The address to be read.
91static int regionReader(const void* arg, uint8_t* byte, uint64_t address) {
92  const MemoryObject* region = static_cast<const MemoryObject*>(arg);
93  return region->readByte(address, byte);
94}
95
96/// logger - a callback function that wraps the operator<< method from
97///   raw_ostream.
98///
99/// @param arg      - The generic callback parameter.  This should be a pointe
100///                   to a raw_ostream.
101/// @param log      - A string to be logged.  logger() adds a newline.
102static void logger(void* arg, const char* log) {
103  if (!arg)
104    return;
105
106  raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
107  vStream << log << "\n";
108}
109
110//
111// Public interface for the disassembler
112//
113
114MCDisassembler::DecodeStatus
115X86GenericDisassembler::getInstruction(MCInst &instr,
116                                       uint64_t &size,
117                                       const MemoryObject &region,
118                                       uint64_t address,
119                                       raw_ostream &vStream,
120                                       raw_ostream &cStream) const {
121  CommentStream = &cStream;
122
123  InternalInstruction internalInstr;
124
125  dlog_t loggerFn = logger;
126  if (&vStream == &nulls())
127    loggerFn = 0; // Disable logging completely if it's going to nulls().
128
129  int ret = decodeInstruction(&internalInstr,
130                              regionReader,
131                              (const void*)&region,
132                              loggerFn,
133                              (void*)&vStream,
134                              (const void*)MII,
135                              address,
136                              fMode);
137
138  if (ret) {
139    size = internalInstr.readerCursor - address;
140    return Fail;
141  }
142  else {
143    size = internalInstr.length;
144    return (!translateInstruction(instr, internalInstr, this)) ?
145            Success : Fail;
146  }
147}
148
149//
150// Private code that translates from struct InternalInstructions to MCInsts.
151//
152
153/// translateRegister - Translates an internal register to the appropriate LLVM
154///   register, and appends it as an operand to an MCInst.
155///
156/// @param mcInst     - The MCInst to append to.
157/// @param reg        - The Reg to append.
158static void translateRegister(MCInst &mcInst, Reg reg) {
159#define ENTRY(x) X86::x,
160  uint8_t llvmRegnums[] = {
161    ALL_REGS
162    0
163  };
164#undef ENTRY
165
166  uint8_t llvmRegnum = llvmRegnums[reg];
167  mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
168}
169
170/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
171/// immediate Value in the MCInst.
172///
173/// @param Value      - The immediate Value, has had any PC adjustment made by
174///                     the caller.
175/// @param isBranch   - If the instruction is a branch instruction
176/// @param Address    - The starting address of the instruction
177/// @param Offset     - The byte offset to this immediate in the instruction
178/// @param Width      - The byte width of this immediate in the instruction
179///
180/// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
181/// called then that function is called to get any symbolic information for the
182/// immediate in the instruction using the Address, Offset and Width.  If that
183/// returns non-zero then the symbolic information it returns is used to create
184/// an MCExpr and that is added as an operand to the MCInst.  If getOpInfo()
185/// returns zero and isBranch is true then a symbol look up for immediate Value
186/// is done and if a symbol is found an MCExpr is created with that, else
187/// an MCExpr with the immediate Value is created.  This function returns true
188/// if it adds an operand to the MCInst and false otherwise.
189static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
190                                     uint64_t Address, uint64_t Offset,
191                                     uint64_t Width, MCInst &MI,
192                                     const MCDisassembler *Dis) {
193  LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback();
194  struct LLVMOpInfo1 SymbolicOp;
195  memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
196  SymbolicOp.Value = Value;
197  void *DisInfo = Dis->getDisInfoBlock();
198
199  if (!getOpInfo ||
200      !getOpInfo(DisInfo, Address, Offset, Width, 1, &SymbolicOp)) {
201    // Clear SymbolicOp.Value from above and also all other fields.
202    memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
203    LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
204    if (!SymbolLookUp)
205      return false;
206    uint64_t ReferenceType;
207    if (isBranch)
208       ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
209    else
210       ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
211    const char *ReferenceName;
212    const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
213                                    &ReferenceName);
214    if (Name) {
215      SymbolicOp.AddSymbol.Name = Name;
216      SymbolicOp.AddSymbol.Present = true;
217    }
218    // For branches always create an MCExpr so it gets printed as hex address.
219    else if (isBranch) {
220      SymbolicOp.Value = Value;
221    }
222    if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
223      (*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
224    if (!Name && !isBranch)
225      return false;
226  }
227
228  MCContext *Ctx = Dis->getMCContext();
229  const MCExpr *Add = NULL;
230  if (SymbolicOp.AddSymbol.Present) {
231    if (SymbolicOp.AddSymbol.Name) {
232      StringRef Name(SymbolicOp.AddSymbol.Name);
233      MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
234      Add = MCSymbolRefExpr::Create(Sym, *Ctx);
235    } else {
236      Add = MCConstantExpr::Create((int)SymbolicOp.AddSymbol.Value, *Ctx);
237    }
238  }
239
240  const MCExpr *Sub = NULL;
241  if (SymbolicOp.SubtractSymbol.Present) {
242      if (SymbolicOp.SubtractSymbol.Name) {
243      StringRef Name(SymbolicOp.SubtractSymbol.Name);
244      MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
245      Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
246    } else {
247      Sub = MCConstantExpr::Create((int)SymbolicOp.SubtractSymbol.Value, *Ctx);
248    }
249  }
250
251  const MCExpr *Off = NULL;
252  if (SymbolicOp.Value != 0)
253    Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
254
255  const MCExpr *Expr;
256  if (Sub) {
257    const MCExpr *LHS;
258    if (Add)
259      LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
260    else
261      LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
262    if (Off != 0)
263      Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
264    else
265      Expr = LHS;
266  } else if (Add) {
267    if (Off != 0)
268      Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
269    else
270      Expr = Add;
271  } else {
272    if (Off != 0)
273      Expr = Off;
274    else
275      Expr = MCConstantExpr::Create(0, *Ctx);
276  }
277
278  MI.addOperand(MCOperand::CreateExpr(Expr));
279
280  return true;
281}
282
283/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
284/// referenced by a load instruction with the base register that is the rip.
285/// These can often be addresses in a literal pool.  The Address of the
286/// instruction and its immediate Value are used to determine the address
287/// being referenced in the literal pool entry.  The SymbolLookUp call back will
288/// return a pointer to a literal 'C' string if the referenced address is an
289/// address into a section with 'C' string literals.
290static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value,
291                                            const void *Decoder) {
292  const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
293  LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
294  if (SymbolLookUp) {
295    void *DisInfo = Dis->getDisInfoBlock();
296    uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load;
297    const char *ReferenceName;
298    (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName);
299    if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
300      (*Dis->CommentStream) << "literal pool for: " << ReferenceName;
301  }
302}
303
304/// translateImmediate  - Appends an immediate operand to an MCInst.
305///
306/// @param mcInst       - The MCInst to append to.
307/// @param immediate    - The immediate value to append.
308/// @param operand      - The operand, as stored in the descriptor table.
309/// @param insn         - The internal instruction.
310static void translateImmediate(MCInst &mcInst, uint64_t immediate,
311                               const OperandSpecifier &operand,
312                               InternalInstruction &insn,
313                               const MCDisassembler *Dis) {
314  // Sign-extend the immediate if necessary.
315
316  OperandType type = (OperandType)operand.type;
317
318  bool isBranch = false;
319  uint64_t pcrel = 0;
320  if (type == TYPE_RELv) {
321    isBranch = true;
322    pcrel = insn.startLocation +
323            insn.immediateOffset + insn.immediateSize;
324    switch (insn.displacementSize) {
325    default:
326      break;
327    case 1:
328      type = TYPE_MOFFS8;
329      break;
330    case 2:
331      type = TYPE_MOFFS16;
332      break;
333    case 4:
334      type = TYPE_MOFFS32;
335      break;
336    case 8:
337      type = TYPE_MOFFS64;
338      break;
339    }
340  }
341  // By default sign-extend all X86 immediates based on their encoding.
342  else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 ||
343           type == TYPE_IMM64) {
344    uint32_t Opcode = mcInst.getOpcode();
345    switch (operand.encoding) {
346    default:
347      break;
348    case ENCODING_IB:
349      // Special case those X86 instructions that use the imm8 as a set of
350      // bits, bit count, etc. and are not sign-extend.
351      if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri &&
352          Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri &&
353          Opcode != X86::DPPSrri && Opcode != X86::DPPDrri &&
354          Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri &&
355          Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri &&
356          Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri &&
357          Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri &&
358          Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri &&
359          Opcode != X86::VINSERTPSrr)
360        type = TYPE_MOFFS8;
361      break;
362    case ENCODING_IW:
363      type = TYPE_MOFFS16;
364      break;
365    case ENCODING_ID:
366      type = TYPE_MOFFS32;
367      break;
368    case ENCODING_IO:
369      type = TYPE_MOFFS64;
370      break;
371    }
372  }
373
374  switch (type) {
375  case TYPE_XMM32:
376  case TYPE_XMM64:
377  case TYPE_XMM128:
378    mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4)));
379    return;
380  case TYPE_XMM256:
381    mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4)));
382    return;
383  case TYPE_REL8:
384    isBranch = true;
385    pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
386    // fall through to sign extend the immediate if needed.
387  case TYPE_MOFFS8:
388    if(immediate & 0x80)
389      immediate |= ~(0xffull);
390    break;
391  case TYPE_MOFFS16:
392    if(immediate & 0x8000)
393      immediate |= ~(0xffffull);
394    break;
395  case TYPE_REL32:
396  case TYPE_REL64:
397    isBranch = true;
398    pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
399    // fall through to sign extend the immediate if needed.
400  case TYPE_MOFFS32:
401    if(immediate & 0x80000000)
402      immediate |= ~(0xffffffffull);
403    break;
404  case TYPE_MOFFS64:
405  default:
406    // operand is 64 bits wide.  Do nothing.
407    break;
408  }
409
410  if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
411                               insn.immediateOffset, insn.immediateSize,
412                               mcInst, Dis))
413    mcInst.addOperand(MCOperand::CreateImm(immediate));
414}
415
416/// translateRMRegister - Translates a register stored in the R/M field of the
417///   ModR/M byte to its LLVM equivalent and appends it to an MCInst.
418/// @param mcInst       - The MCInst to append to.
419/// @param insn         - The internal instruction to extract the R/M field
420///                       from.
421/// @return             - 0 on success; -1 otherwise
422static bool translateRMRegister(MCInst &mcInst,
423                                InternalInstruction &insn) {
424  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
425    debug("A R/M register operand may not have a SIB byte");
426    return true;
427  }
428
429  switch (insn.eaBase) {
430  default:
431    debug("Unexpected EA base register");
432    return true;
433  case EA_BASE_NONE:
434    debug("EA_BASE_NONE for ModR/M base");
435    return true;
436#define ENTRY(x) case EA_BASE_##x:
437  ALL_EA_BASES
438#undef ENTRY
439    debug("A R/M register operand may not have a base; "
440          "the operand must be a register.");
441    return true;
442#define ENTRY(x)                                                      \
443  case EA_REG_##x:                                                    \
444    mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
445  ALL_REGS
446#undef ENTRY
447  }
448
449  return false;
450}
451
452/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
453///   fields of an internal instruction (and possibly its SIB byte) to a memory
454///   operand in LLVM's format, and appends it to an MCInst.
455///
456/// @param mcInst       - The MCInst to append to.
457/// @param insn         - The instruction to extract Mod, R/M, and SIB fields
458///                       from.
459/// @return             - 0 on success; nonzero otherwise
460static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
461                              const MCDisassembler *Dis) {
462  // Addresses in an MCInst are represented as five operands:
463  //   1. basereg       (register)  The R/M base, or (if there is a SIB) the
464  //                                SIB base
465  //   2. scaleamount   (immediate) 1, or (if there is a SIB) the specified
466  //                                scale amount
467  //   3. indexreg      (register)  x86_registerNONE, or (if there is a SIB)
468  //                                the index (which is multiplied by the
469  //                                scale amount)
470  //   4. displacement  (immediate) 0, or the displacement if there is one
471  //   5. segmentreg    (register)  x86_registerNONE for now, but could be set
472  //                                if we have segment overrides
473
474  MCOperand baseReg;
475  MCOperand scaleAmount;
476  MCOperand indexReg;
477  MCOperand displacement;
478  MCOperand segmentReg;
479  uint64_t pcrel = 0;
480
481  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
482    if (insn.sibBase != SIB_BASE_NONE) {
483      switch (insn.sibBase) {
484      default:
485        debug("Unexpected sibBase");
486        return true;
487#define ENTRY(x)                                          \
488      case SIB_BASE_##x:                                  \
489        baseReg = MCOperand::CreateReg(X86::x); break;
490      ALL_SIB_BASES
491#undef ENTRY
492      }
493    } else {
494      baseReg = MCOperand::CreateReg(0);
495    }
496
497    // Check whether we are handling VSIB addressing mode for GATHER.
498    // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and
499    // we should use SIB_INDEX_XMM4|YMM4 for VSIB.
500    // I don't see a way to get the correct IndexReg in readSIB:
501    //   We can tell whether it is VSIB or SIB after instruction ID is decoded,
502    //   but instruction ID may not be decoded yet when calling readSIB.
503    uint32_t Opcode = mcInst.getOpcode();
504    bool IndexIs128 = (Opcode == X86::VGATHERDPDrm ||
505                       Opcode == X86::VGATHERDPDYrm ||
506                       Opcode == X86::VGATHERQPDrm ||
507                       Opcode == X86::VGATHERDPSrm ||
508                       Opcode == X86::VGATHERQPSrm ||
509                       Opcode == X86::VPGATHERDQrm ||
510                       Opcode == X86::VPGATHERDQYrm ||
511                       Opcode == X86::VPGATHERQQrm ||
512                       Opcode == X86::VPGATHERDDrm ||
513                       Opcode == X86::VPGATHERQDrm);
514    bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm ||
515                       Opcode == X86::VGATHERDPSYrm ||
516                       Opcode == X86::VGATHERQPSYrm ||
517                       Opcode == X86::VPGATHERQQYrm ||
518                       Opcode == X86::VPGATHERDDYrm ||
519                       Opcode == X86::VPGATHERQDYrm);
520    if (IndexIs128 || IndexIs256) {
521      unsigned IndexOffset = insn.sibIndex -
522                         (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX);
523      SIBIndex IndexBase = IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0;
524      insn.sibIndex = (SIBIndex)(IndexBase +
525                           (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset));
526    }
527
528    if (insn.sibIndex != SIB_INDEX_NONE) {
529      switch (insn.sibIndex) {
530      default:
531        debug("Unexpected sibIndex");
532        return true;
533#define ENTRY(x)                                          \
534      case SIB_INDEX_##x:                                 \
535        indexReg = MCOperand::CreateReg(X86::x); break;
536      EA_BASES_32BIT
537      EA_BASES_64BIT
538      REGS_XMM
539      REGS_YMM
540#undef ENTRY
541      }
542    } else {
543      indexReg = MCOperand::CreateReg(0);
544    }
545
546    scaleAmount = MCOperand::CreateImm(insn.sibScale);
547  } else {
548    switch (insn.eaBase) {
549    case EA_BASE_NONE:
550      if (insn.eaDisplacement == EA_DISP_NONE) {
551        debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
552        return true;
553      }
554      if (insn.mode == MODE_64BIT){
555        pcrel = insn.startLocation +
556                insn.displacementOffset + insn.displacementSize;
557        tryAddingPcLoadReferenceComment(insn.startLocation +
558                                        insn.displacementOffset,
559                                        insn.displacement + pcrel, Dis);
560        baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
561      }
562      else
563        baseReg = MCOperand::CreateReg(0);
564
565      indexReg = MCOperand::CreateReg(0);
566      break;
567    case EA_BASE_BX_SI:
568      baseReg = MCOperand::CreateReg(X86::BX);
569      indexReg = MCOperand::CreateReg(X86::SI);
570      break;
571    case EA_BASE_BX_DI:
572      baseReg = MCOperand::CreateReg(X86::BX);
573      indexReg = MCOperand::CreateReg(X86::DI);
574      break;
575    case EA_BASE_BP_SI:
576      baseReg = MCOperand::CreateReg(X86::BP);
577      indexReg = MCOperand::CreateReg(X86::SI);
578      break;
579    case EA_BASE_BP_DI:
580      baseReg = MCOperand::CreateReg(X86::BP);
581      indexReg = MCOperand::CreateReg(X86::DI);
582      break;
583    default:
584      indexReg = MCOperand::CreateReg(0);
585      switch (insn.eaBase) {
586      default:
587        debug("Unexpected eaBase");
588        return true;
589        // Here, we will use the fill-ins defined above.  However,
590        //   BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
591        //   sib and sib64 were handled in the top-level if, so they're only
592        //   placeholders to keep the compiler happy.
593#define ENTRY(x)                                        \
594      case EA_BASE_##x:                                 \
595        baseReg = MCOperand::CreateReg(X86::x); break;
596      ALL_EA_BASES
597#undef ENTRY
598#define ENTRY(x) case EA_REG_##x:
599      ALL_REGS
600#undef ENTRY
601        debug("A R/M memory operand may not be a register; "
602              "the base field must be a base.");
603        return true;
604      }
605    }
606
607    scaleAmount = MCOperand::CreateImm(1);
608  }
609
610  displacement = MCOperand::CreateImm(insn.displacement);
611
612  static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
613    0,        // SEG_OVERRIDE_NONE
614    X86::CS,
615    X86::SS,
616    X86::DS,
617    X86::ES,
618    X86::FS,
619    X86::GS
620  };
621
622  segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
623
624  mcInst.addOperand(baseReg);
625  mcInst.addOperand(scaleAmount);
626  mcInst.addOperand(indexReg);
627  if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false,
628                               insn.startLocation, insn.displacementOffset,
629                               insn.displacementSize, mcInst, Dis))
630    mcInst.addOperand(displacement);
631  mcInst.addOperand(segmentReg);
632  return false;
633}
634
635/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
636///   byte of an instruction to LLVM form, and appends it to an MCInst.
637///
638/// @param mcInst       - The MCInst to append to.
639/// @param operand      - The operand, as stored in the descriptor table.
640/// @param insn         - The instruction to extract Mod, R/M, and SIB fields
641///                       from.
642/// @return             - 0 on success; nonzero otherwise
643static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
644                        InternalInstruction &insn, const MCDisassembler *Dis) {
645  switch (operand.type) {
646  default:
647    debug("Unexpected type for a R/M operand");
648    return true;
649  case TYPE_R8:
650  case TYPE_R16:
651  case TYPE_R32:
652  case TYPE_R64:
653  case TYPE_Rv:
654  case TYPE_MM:
655  case TYPE_MM32:
656  case TYPE_MM64:
657  case TYPE_XMM:
658  case TYPE_XMM32:
659  case TYPE_XMM64:
660  case TYPE_XMM128:
661  case TYPE_XMM256:
662  case TYPE_DEBUGREG:
663  case TYPE_CONTROLREG:
664    return translateRMRegister(mcInst, insn);
665  case TYPE_M:
666  case TYPE_M8:
667  case TYPE_M16:
668  case TYPE_M32:
669  case TYPE_M64:
670  case TYPE_M128:
671  case TYPE_M256:
672  case TYPE_M512:
673  case TYPE_Mv:
674  case TYPE_M32FP:
675  case TYPE_M64FP:
676  case TYPE_M80FP:
677  case TYPE_M16INT:
678  case TYPE_M32INT:
679  case TYPE_M64INT:
680  case TYPE_M1616:
681  case TYPE_M1632:
682  case TYPE_M1664:
683  case TYPE_LEA:
684    return translateRMMemory(mcInst, insn, Dis);
685  }
686}
687
688/// translateFPRegister - Translates a stack position on the FPU stack to its
689///   LLVM form, and appends it to an MCInst.
690///
691/// @param mcInst       - The MCInst to append to.
692/// @param stackPos     - The stack position to translate.
693/// @return             - 0 on success; nonzero otherwise.
694static bool translateFPRegister(MCInst &mcInst,
695                               uint8_t stackPos) {
696  if (stackPos >= 8) {
697    debug("Invalid FP stack position");
698    return true;
699  }
700
701  mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
702
703  return false;
704}
705
706/// translateOperand - Translates an operand stored in an internal instruction
707///   to LLVM's format and appends it to an MCInst.
708///
709/// @param mcInst       - The MCInst to append to.
710/// @param operand      - The operand, as stored in the descriptor table.
711/// @param insn         - The internal instruction.
712/// @return             - false on success; true otherwise.
713static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
714                             InternalInstruction &insn,
715                             const MCDisassembler *Dis) {
716  switch (operand.encoding) {
717  default:
718    debug("Unhandled operand encoding during translation");
719    return true;
720  case ENCODING_REG:
721    translateRegister(mcInst, insn.reg);
722    return false;
723  case ENCODING_RM:
724    return translateRM(mcInst, operand, insn, Dis);
725  case ENCODING_CB:
726  case ENCODING_CW:
727  case ENCODING_CD:
728  case ENCODING_CP:
729  case ENCODING_CO:
730  case ENCODING_CT:
731    debug("Translation of code offsets isn't supported.");
732    return true;
733  case ENCODING_IB:
734  case ENCODING_IW:
735  case ENCODING_ID:
736  case ENCODING_IO:
737  case ENCODING_Iv:
738  case ENCODING_Ia:
739    translateImmediate(mcInst,
740                       insn.immediates[insn.numImmediatesTranslated++],
741                       operand,
742                       insn,
743                       Dis);
744    return false;
745  case ENCODING_RB:
746  case ENCODING_RW:
747  case ENCODING_RD:
748  case ENCODING_RO:
749    translateRegister(mcInst, insn.opcodeRegister);
750    return false;
751  case ENCODING_I:
752    return translateFPRegister(mcInst, insn.opcodeModifier);
753  case ENCODING_Rv:
754    translateRegister(mcInst, insn.opcodeRegister);
755    return false;
756  case ENCODING_VVVV:
757    translateRegister(mcInst, insn.vvvv);
758    return false;
759  case ENCODING_DUP:
760    return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],
761                            insn, Dis);
762  }
763}
764
765/// translateInstruction - Translates an internal instruction and all its
766///   operands to an MCInst.
767///
768/// @param mcInst       - The MCInst to populate with the instruction's data.
769/// @param insn         - The internal instruction.
770/// @return             - false on success; true otherwise.
771static bool translateInstruction(MCInst &mcInst,
772                                InternalInstruction &insn,
773                                const MCDisassembler *Dis) {
774  if (!insn.spec) {
775    debug("Instruction has no specification");
776    return true;
777  }
778
779  mcInst.setOpcode(insn.instructionID);
780
781  int index;
782
783  insn.numImmediatesTranslated = 0;
784
785  for (index = 0; index < X86_MAX_OPERANDS; ++index) {
786    if (insn.operands[index].encoding != ENCODING_NONE) {
787      if (translateOperand(mcInst, insn.operands[index], insn, Dis)) {
788        return true;
789      }
790    }
791  }
792
793  return false;
794}
795
796static MCDisassembler *createX86_32Disassembler(const Target &T,
797                                                const MCSubtargetInfo &STI) {
798  return new X86Disassembler::X86GenericDisassembler(STI, MODE_32BIT,
799                                                     T.createMCInstrInfo());
800}
801
802static MCDisassembler *createX86_64Disassembler(const Target &T,
803                                                const MCSubtargetInfo &STI) {
804  return new X86Disassembler::X86GenericDisassembler(STI, MODE_64BIT,
805                                                     T.createMCInstrInfo());
806}
807
808extern "C" void LLVMInitializeX86Disassembler() {
809  // Register the disassembler.
810  TargetRegistry::RegisterMCDisassembler(TheX86_32Target,
811                                         createX86_32Disassembler);
812  TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
813                                         createX86_64Disassembler);
814}
815