MCObjectDisassembler.h revision 263508
1//===-- llvm/MC/MCObjectDisassembler.h --------------------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains the declaration of the MCObjectDisassembler class, which 11// can be used to construct an MCModule and an MC CFG from an ObjectFile. 12// 13//===----------------------------------------------------------------------===// 14 15#ifndef LLVM_MC_MCOBJECTDISASSEMBLER_H 16#define LLVM_MC_MCOBJECTDISASSEMBLER_H 17 18#include "llvm/ADT/ArrayRef.h" 19#include "llvm/ADT/OwningPtr.h" 20#include "llvm/ADT/StringRef.h" 21#include "llvm/Support/DataTypes.h" 22#include "llvm/Support/MemoryObject.h" 23#include <vector> 24 25namespace llvm { 26 27namespace object { 28 class ObjectFile; 29 class MachOObjectFile; 30} 31 32class MCBasicBlock; 33class MCDisassembler; 34class MCFunction; 35class MCInstrAnalysis; 36class MCModule; 37class MCObjectSymbolizer; 38 39/// \brief Disassemble an ObjectFile to an MCModule and MCFunctions. 40/// This class builds on MCDisassembler to disassemble whole sections, creating 41/// MCAtom (MCTextAtom for disassembled sections and MCDataAtom for raw data). 42/// It can also be used to create a control flow graph consisting of MCFunctions 43/// and MCBasicBlocks. 44class MCObjectDisassembler { 45public: 46 MCObjectDisassembler(const object::ObjectFile &Obj, 47 const MCDisassembler &Dis, 48 const MCInstrAnalysis &MIA); 49 virtual ~MCObjectDisassembler() {} 50 51 /// \brief Build an MCModule, creating atoms and optionally functions. 52 /// \param withCFG Also build a CFG by adding MCFunctions to the Module. 53 /// If withCFG is false, the MCModule built only contains atoms, representing 54 /// what was found in the object file. If withCFG is true, MCFunctions are 55 /// created, containing MCBasicBlocks. All text atoms are split to form basic 56 /// block atoms, which then each back an MCBasicBlock. 57 MCModule *buildModule(bool withCFG = false); 58 59 MCModule *buildEmptyModule(); 60 61 typedef std::vector<uint64_t> AddressSetTy; 62 /// \name Create a new MCFunction. 63 MCFunction *createFunction(MCModule *Module, uint64_t BeginAddr, 64 AddressSetTy &CallTargets, 65 AddressSetTy &TailCallTargets); 66 67 /// \brief Set the region on which to fallback if disassembly was requested 68 /// somewhere not accessible in the object file. 69 /// This is used for dynamic disassembly (see RawMemoryObject). 70 void setFallbackRegion(OwningPtr<MemoryObject> &Region) { 71 FallbackRegion.reset(Region.take()); 72 } 73 74 /// \brief Set the symbolizer to use to get information on external functions. 75 /// Note that this isn't used to do instruction-level symbolization (that is, 76 /// plugged into MCDisassembler), but to symbolize function call targets. 77 void setSymbolizer(MCObjectSymbolizer *ObjectSymbolizer) { 78 MOS = ObjectSymbolizer; 79 } 80 81 /// \brief Get the effective address of the entrypoint, or 0 if there is none. 82 virtual uint64_t getEntrypoint(); 83 84 /// \name Get the addresses of static constructors/destructors in the object. 85 /// The caller is expected to know how to interpret the addresses; 86 /// for example, Mach-O init functions expect 5 arguments, not for ELF. 87 /// The addresses are original object file load addresses, not effective. 88 /// @{ 89 virtual ArrayRef<uint64_t> getStaticInitFunctions(); 90 virtual ArrayRef<uint64_t> getStaticExitFunctions(); 91 /// @} 92 93 /// \name Translation between effective and objectfile load address. 94 /// @{ 95 /// \brief Compute the effective load address, from an objectfile virtual 96 /// address. This is implemented in a format-specific way, to take into 97 /// account things like PIE/ASLR when doing dynamic disassembly. 98 /// For example, on Mach-O this would be done by adding the VM addr slide, 99 /// on glibc ELF by keeping a map between segment load addresses, filled 100 /// using dl_iterate_phdr, etc.. 101 /// In most static situations and in the default impl., this returns \p Addr. 102 virtual uint64_t getEffectiveLoadAddr(uint64_t Addr); 103 104 /// \brief Compute the original load address, as specified in the objectfile. 105 /// This is the inverse of getEffectiveLoadAddr. 106 virtual uint64_t getOriginalLoadAddr(uint64_t EffectiveAddr); 107 /// @} 108 109protected: 110 const object::ObjectFile &Obj; 111 const MCDisassembler &Dis; 112 const MCInstrAnalysis &MIA; 113 MCObjectSymbolizer *MOS; 114 115 /// \brief The fallback memory region, outside the object file. 116 OwningPtr<MemoryObject> FallbackRegion; 117 118 /// \brief Return a memory region suitable for reading starting at \p Addr. 119 /// In most cases, this returns a StringRefMemoryObject backed by the 120 /// containing section. When no section was found, this returns the 121 /// FallbackRegion, if it is suitable. 122 /// If it is not, or if there is no fallback region, this returns 0. 123 MemoryObject *getRegionFor(uint64_t Addr); 124 125private: 126 /// \brief Fill \p Module by creating an atom for each section. 127 /// This could be made much smarter, using information like symbols, but also 128 /// format-specific features, like mach-o function_start or data_in_code LCs. 129 void buildSectionAtoms(MCModule *Module); 130 131 /// \brief Enrich \p Module with a CFG consisting of MCFunctions. 132 /// \param Module An MCModule returned by buildModule, with no CFG. 133 /// NOTE: Each MCBasicBlock in a MCFunction is backed by a single MCTextAtom. 134 /// When the CFG is built, contiguous instructions that were previously in a 135 /// single MCTextAtom will be split in multiple basic block atoms. 136 void buildCFG(MCModule *Module); 137 138 MCBasicBlock *getBBAt(MCModule *Module, MCFunction *MCFN, uint64_t BeginAddr, 139 AddressSetTy &CallTargets, 140 AddressSetTy &TailCallTargets); 141}; 142 143class MCMachOObjectDisassembler : public MCObjectDisassembler { 144 const object::MachOObjectFile &MOOF; 145 146 uint64_t VMAddrSlide; 147 uint64_t HeaderLoadAddress; 148 149 // __DATA;__mod_init_func support. 150 llvm::StringRef ModInitContents; 151 // __DATA;__mod_exit_func support. 152 llvm::StringRef ModExitContents; 153 154public: 155 /// \brief Construct a Mach-O specific object disassembler. 156 /// \param VMAddrSlide The virtual address slide applied by dyld. 157 /// \param HeaderLoadAddress The load address of the mach_header for this 158 /// object. 159 MCMachOObjectDisassembler(const object::MachOObjectFile &MOOF, 160 const MCDisassembler &Dis, 161 const MCInstrAnalysis &MIA, uint64_t VMAddrSlide, 162 uint64_t HeaderLoadAddress); 163 164protected: 165 uint64_t getEffectiveLoadAddr(uint64_t Addr) LLVM_OVERRIDE; 166 uint64_t getOriginalLoadAddr(uint64_t EffectiveAddr) LLVM_OVERRIDE; 167 uint64_t getEntrypoint() LLVM_OVERRIDE; 168 169 ArrayRef<uint64_t> getStaticInitFunctions() LLVM_OVERRIDE; 170 ArrayRef<uint64_t> getStaticExitFunctions() LLVM_OVERRIDE; 171}; 172 173} 174 175#endif 176