DisassemblerLLVMC.cpp revision 360784
1//===-- DisassemblerLLVMC.cpp -----------------------------------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#include "DisassemblerLLVMC.h" 10 11#include "llvm-c/Disassembler.h" 12#include "llvm/ADT/SmallString.h" 13#include "llvm/MC/MCAsmInfo.h" 14#include "llvm/MC/MCContext.h" 15#include "llvm/MC/MCDisassembler/MCDisassembler.h" 16#include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h" 17#include "llvm/MC/MCDisassembler/MCRelocationInfo.h" 18#include "llvm/MC/MCInst.h" 19#include "llvm/MC/MCInstPrinter.h" 20#include "llvm/MC/MCInstrInfo.h" 21#include "llvm/MC/MCRegisterInfo.h" 22#include "llvm/MC/MCSubtargetInfo.h" 23#include "llvm/MC/MCTargetOptions.h" 24#include "llvm/Support/ErrorHandling.h" 25#include "llvm/Support/ScopedPrinter.h" 26#include "llvm/Support/TargetRegistry.h" 27#include "llvm/Support/TargetSelect.h" 28 29#include "lldb/Core/Address.h" 30#include "lldb/Core/Module.h" 31#include "lldb/Symbol/SymbolContext.h" 32#include "lldb/Target/ExecutionContext.h" 33#include "lldb/Target/Process.h" 34#include "lldb/Target/RegisterContext.h" 35#include "lldb/Target/SectionLoadList.h" 36#include "lldb/Target/StackFrame.h" 37#include "lldb/Target/Target.h" 38#include "lldb/Utility/DataExtractor.h" 39#include "lldb/Utility/Log.h" 40#include "lldb/Utility/RegularExpression.h" 41#include "lldb/Utility/Stream.h" 42 43using namespace lldb; 44using namespace lldb_private; 45 46class DisassemblerLLVMC::MCDisasmInstance { 47public: 48 static std::unique_ptr<MCDisasmInstance> 49 Create(const char *triple, const char *cpu, const char *features_str, 50 unsigned flavor, DisassemblerLLVMC &owner); 51 52 ~MCDisasmInstance() = default; 53 54 uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len, 55 lldb::addr_t pc, llvm::MCInst &mc_inst) const; 56 void PrintMCInst(llvm::MCInst &mc_inst, std::string &inst_string, 57 std::string &comments_string); 58 void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style); 59 bool CanBranch(llvm::MCInst &mc_inst) const; 60 bool HasDelaySlot(llvm::MCInst &mc_inst) const; 61 bool IsCall(llvm::MCInst &mc_inst) const; 62 63private: 64 MCDisasmInstance(std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up, 65 std::unique_ptr<llvm::MCRegisterInfo> &®_info_up, 66 std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up, 67 std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up, 68 std::unique_ptr<llvm::MCContext> &&context_up, 69 std::unique_ptr<llvm::MCDisassembler> &&disasm_up, 70 std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up); 71 72 std::unique_ptr<llvm::MCInstrInfo> m_instr_info_up; 73 std::unique_ptr<llvm::MCRegisterInfo> m_reg_info_up; 74 std::unique_ptr<llvm::MCSubtargetInfo> m_subtarget_info_up; 75 std::unique_ptr<llvm::MCAsmInfo> m_asm_info_up; 76 std::unique_ptr<llvm::MCContext> m_context_up; 77 std::unique_ptr<llvm::MCDisassembler> m_disasm_up; 78 std::unique_ptr<llvm::MCInstPrinter> m_instr_printer_up; 79}; 80 81class InstructionLLVMC : public lldb_private::Instruction { 82public: 83 InstructionLLVMC(DisassemblerLLVMC &disasm, 84 const lldb_private::Address &address, 85 AddressClass addr_class) 86 : Instruction(address, addr_class), 87 m_disasm_wp(std::static_pointer_cast<DisassemblerLLVMC>( 88 disasm.shared_from_this())), 89 m_does_branch(eLazyBoolCalculate), m_has_delay_slot(eLazyBoolCalculate), 90 m_is_call(eLazyBoolCalculate), m_is_valid(false), 91 m_using_file_addr(false) {} 92 93 ~InstructionLLVMC() override = default; 94 95 bool DoesBranch() override { 96 if (m_does_branch == eLazyBoolCalculate) { 97 DisassemblerScope disasm(*this); 98 if (disasm) { 99 DataExtractor data; 100 if (m_opcode.GetData(data)) { 101 bool is_alternate_isa; 102 lldb::addr_t pc = m_address.GetFileAddress(); 103 104 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr = 105 GetDisasmToUse(is_alternate_isa, disasm); 106 const uint8_t *opcode_data = data.GetDataStart(); 107 const size_t opcode_data_len = data.GetByteSize(); 108 llvm::MCInst inst; 109 const size_t inst_size = 110 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); 111 // Be conservative, if we didn't understand the instruction, say it 112 // might branch... 113 if (inst_size == 0) 114 m_does_branch = eLazyBoolYes; 115 else { 116 const bool can_branch = mc_disasm_ptr->CanBranch(inst); 117 if (can_branch) 118 m_does_branch = eLazyBoolYes; 119 else 120 m_does_branch = eLazyBoolNo; 121 } 122 } 123 } 124 } 125 return m_does_branch == eLazyBoolYes; 126 } 127 128 bool HasDelaySlot() override { 129 if (m_has_delay_slot == eLazyBoolCalculate) { 130 DisassemblerScope disasm(*this); 131 if (disasm) { 132 DataExtractor data; 133 if (m_opcode.GetData(data)) { 134 bool is_alternate_isa; 135 lldb::addr_t pc = m_address.GetFileAddress(); 136 137 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr = 138 GetDisasmToUse(is_alternate_isa, disasm); 139 const uint8_t *opcode_data = data.GetDataStart(); 140 const size_t opcode_data_len = data.GetByteSize(); 141 llvm::MCInst inst; 142 const size_t inst_size = 143 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); 144 // if we didn't understand the instruction, say it doesn't have a 145 // delay slot... 146 if (inst_size == 0) 147 m_has_delay_slot = eLazyBoolNo; 148 else { 149 const bool has_delay_slot = mc_disasm_ptr->HasDelaySlot(inst); 150 if (has_delay_slot) 151 m_has_delay_slot = eLazyBoolYes; 152 else 153 m_has_delay_slot = eLazyBoolNo; 154 } 155 } 156 } 157 } 158 return m_has_delay_slot == eLazyBoolYes; 159 } 160 161 DisassemblerLLVMC::MCDisasmInstance *GetDisasmToUse(bool &is_alternate_isa) { 162 DisassemblerScope disasm(*this); 163 return GetDisasmToUse(is_alternate_isa, disasm); 164 } 165 166 size_t Decode(const lldb_private::Disassembler &disassembler, 167 const lldb_private::DataExtractor &data, 168 lldb::offset_t data_offset) override { 169 // All we have to do is read the opcode which can be easy for some 170 // architectures 171 bool got_op = false; 172 DisassemblerScope disasm(*this); 173 if (disasm) { 174 const ArchSpec &arch = disasm->GetArchitecture(); 175 const lldb::ByteOrder byte_order = data.GetByteOrder(); 176 177 const uint32_t min_op_byte_size = arch.GetMinimumOpcodeByteSize(); 178 const uint32_t max_op_byte_size = arch.GetMaximumOpcodeByteSize(); 179 if (min_op_byte_size == max_op_byte_size) { 180 // Fixed size instructions, just read that amount of data. 181 if (!data.ValidOffsetForDataOfSize(data_offset, min_op_byte_size)) 182 return false; 183 184 switch (min_op_byte_size) { 185 case 1: 186 m_opcode.SetOpcode8(data.GetU8(&data_offset), byte_order); 187 got_op = true; 188 break; 189 190 case 2: 191 m_opcode.SetOpcode16(data.GetU16(&data_offset), byte_order); 192 got_op = true; 193 break; 194 195 case 4: 196 m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order); 197 got_op = true; 198 break; 199 200 case 8: 201 m_opcode.SetOpcode64(data.GetU64(&data_offset), byte_order); 202 got_op = true; 203 break; 204 205 default: 206 m_opcode.SetOpcodeBytes(data.PeekData(data_offset, min_op_byte_size), 207 min_op_byte_size); 208 got_op = true; 209 break; 210 } 211 } 212 if (!got_op) { 213 bool is_alternate_isa = false; 214 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr = 215 GetDisasmToUse(is_alternate_isa, disasm); 216 217 const llvm::Triple::ArchType machine = arch.GetMachine(); 218 if (machine == llvm::Triple::arm || machine == llvm::Triple::thumb) { 219 if (machine == llvm::Triple::thumb || is_alternate_isa) { 220 uint32_t thumb_opcode = data.GetU16(&data_offset); 221 if ((thumb_opcode & 0xe000) != 0xe000 || 222 ((thumb_opcode & 0x1800u) == 0)) { 223 m_opcode.SetOpcode16(thumb_opcode, byte_order); 224 m_is_valid = true; 225 } else { 226 thumb_opcode <<= 16; 227 thumb_opcode |= data.GetU16(&data_offset); 228 m_opcode.SetOpcode16_2(thumb_opcode, byte_order); 229 m_is_valid = true; 230 } 231 } else { 232 m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order); 233 m_is_valid = true; 234 } 235 } else { 236 // The opcode isn't evenly sized, so we need to actually use the llvm 237 // disassembler to parse it and get the size. 238 uint8_t *opcode_data = 239 const_cast<uint8_t *>(data.PeekData(data_offset, 1)); 240 const size_t opcode_data_len = data.BytesLeft(data_offset); 241 const addr_t pc = m_address.GetFileAddress(); 242 llvm::MCInst inst; 243 244 const size_t inst_size = 245 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); 246 if (inst_size == 0) 247 m_opcode.Clear(); 248 else { 249 m_opcode.SetOpcodeBytes(opcode_data, inst_size); 250 m_is_valid = true; 251 } 252 } 253 } 254 return m_opcode.GetByteSize(); 255 } 256 return 0; 257 } 258 259 void AppendComment(std::string &description) { 260 if (m_comment.empty()) 261 m_comment.swap(description); 262 else { 263 m_comment.append(", "); 264 m_comment.append(description); 265 } 266 } 267 268 void CalculateMnemonicOperandsAndComment( 269 const lldb_private::ExecutionContext *exe_ctx) override { 270 DataExtractor data; 271 const AddressClass address_class = GetAddressClass(); 272 273 if (m_opcode.GetData(data)) { 274 std::string out_string; 275 std::string comment_string; 276 277 DisassemblerScope disasm(*this, exe_ctx); 278 if (disasm) { 279 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr; 280 281 if (address_class == AddressClass::eCodeAlternateISA) 282 mc_disasm_ptr = disasm->m_alternate_disasm_up.get(); 283 else 284 mc_disasm_ptr = disasm->m_disasm_up.get(); 285 286 lldb::addr_t pc = m_address.GetFileAddress(); 287 m_using_file_addr = true; 288 289 const bool data_from_file = disasm->m_data_from_file; 290 bool use_hex_immediates = true; 291 Disassembler::HexImmediateStyle hex_style = Disassembler::eHexStyleC; 292 293 if (exe_ctx) { 294 Target *target = exe_ctx->GetTargetPtr(); 295 if (target) { 296 use_hex_immediates = target->GetUseHexImmediates(); 297 hex_style = target->GetHexImmediateStyle(); 298 299 if (!data_from_file) { 300 const lldb::addr_t load_addr = m_address.GetLoadAddress(target); 301 if (load_addr != LLDB_INVALID_ADDRESS) { 302 pc = load_addr; 303 m_using_file_addr = false; 304 } 305 } 306 } 307 } 308 309 const uint8_t *opcode_data = data.GetDataStart(); 310 const size_t opcode_data_len = data.GetByteSize(); 311 llvm::MCInst inst; 312 size_t inst_size = 313 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); 314 315 if (inst_size > 0) { 316 mc_disasm_ptr->SetStyle(use_hex_immediates, hex_style); 317 mc_disasm_ptr->PrintMCInst(inst, out_string, comment_string); 318 319 if (!comment_string.empty()) { 320 AppendComment(comment_string); 321 } 322 } 323 324 if (inst_size == 0) { 325 m_comment.assign("unknown opcode"); 326 inst_size = m_opcode.GetByteSize(); 327 StreamString mnemonic_strm; 328 lldb::offset_t offset = 0; 329 lldb::ByteOrder byte_order = data.GetByteOrder(); 330 switch (inst_size) { 331 case 1: { 332 const uint8_t uval8 = data.GetU8(&offset); 333 m_opcode.SetOpcode8(uval8, byte_order); 334 m_opcode_name.assign(".byte"); 335 mnemonic_strm.Printf("0x%2.2x", uval8); 336 } break; 337 case 2: { 338 const uint16_t uval16 = data.GetU16(&offset); 339 m_opcode.SetOpcode16(uval16, byte_order); 340 m_opcode_name.assign(".short"); 341 mnemonic_strm.Printf("0x%4.4x", uval16); 342 } break; 343 case 4: { 344 const uint32_t uval32 = data.GetU32(&offset); 345 m_opcode.SetOpcode32(uval32, byte_order); 346 m_opcode_name.assign(".long"); 347 mnemonic_strm.Printf("0x%8.8x", uval32); 348 } break; 349 case 8: { 350 const uint64_t uval64 = data.GetU64(&offset); 351 m_opcode.SetOpcode64(uval64, byte_order); 352 m_opcode_name.assign(".quad"); 353 mnemonic_strm.Printf("0x%16.16" PRIx64, uval64); 354 } break; 355 default: 356 if (inst_size == 0) 357 return; 358 else { 359 const uint8_t *bytes = data.PeekData(offset, inst_size); 360 if (bytes == nullptr) 361 return; 362 m_opcode_name.assign(".byte"); 363 m_opcode.SetOpcodeBytes(bytes, inst_size); 364 mnemonic_strm.Printf("0x%2.2x", bytes[0]); 365 for (uint32_t i = 1; i < inst_size; ++i) 366 mnemonic_strm.Printf(" 0x%2.2x", bytes[i]); 367 } 368 break; 369 } 370 m_mnemonics = mnemonic_strm.GetString(); 371 return; 372 } else { 373 if (m_does_branch == eLazyBoolCalculate) { 374 const bool can_branch = mc_disasm_ptr->CanBranch(inst); 375 if (can_branch) 376 m_does_branch = eLazyBoolYes; 377 else 378 m_does_branch = eLazyBoolNo; 379 } 380 } 381 382 static RegularExpression s_regex( 383 llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?")); 384 385 llvm::SmallVector<llvm::StringRef, 4> matches; 386 if (s_regex.Execute(out_string, &matches)) { 387 m_opcode_name = matches[1].str(); 388 m_mnemonics = matches[2].str(); 389 } 390 } 391 } 392 } 393 394 bool IsValid() const { return m_is_valid; } 395 396 bool UsingFileAddress() const { return m_using_file_addr; } 397 size_t GetByteSize() const { return m_opcode.GetByteSize(); } 398 399 /// Grants exclusive access to the disassembler and initializes it with the 400 /// given InstructionLLVMC and an optional ExecutionContext. 401 class DisassemblerScope { 402 std::shared_ptr<DisassemblerLLVMC> m_disasm; 403 404 public: 405 explicit DisassemblerScope( 406 InstructionLLVMC &i, 407 const lldb_private::ExecutionContext *exe_ctx = nullptr) 408 : m_disasm(i.m_disasm_wp.lock()) { 409 m_disasm->m_mutex.lock(); 410 m_disasm->m_inst = &i; 411 m_disasm->m_exe_ctx = exe_ctx; 412 } 413 ~DisassemblerScope() { m_disasm->m_mutex.unlock(); } 414 415 /// Evaluates to true if this scope contains a valid disassembler. 416 operator bool() const { return static_cast<bool>(m_disasm); } 417 418 std::shared_ptr<DisassemblerLLVMC> operator->() { return m_disasm; } 419 }; 420 421 static llvm::StringRef::const_iterator 422 ConsumeWhitespace(llvm::StringRef::const_iterator osi, 423 llvm::StringRef::const_iterator ose) { 424 while (osi != ose) { 425 switch (*osi) { 426 default: 427 return osi; 428 case ' ': 429 case '\t': 430 break; 431 } 432 ++osi; 433 } 434 435 return osi; 436 } 437 438 static std::pair<bool, llvm::StringRef::const_iterator> 439 ConsumeChar(llvm::StringRef::const_iterator osi, const char c, 440 llvm::StringRef::const_iterator ose) { 441 bool found = false; 442 443 osi = ConsumeWhitespace(osi, ose); 444 if (osi != ose && *osi == c) { 445 found = true; 446 ++osi; 447 } 448 449 return std::make_pair(found, osi); 450 } 451 452 static std::pair<Operand, llvm::StringRef::const_iterator> 453 ParseRegisterName(llvm::StringRef::const_iterator osi, 454 llvm::StringRef::const_iterator ose) { 455 Operand ret; 456 ret.m_type = Operand::Type::Register; 457 std::string str; 458 459 osi = ConsumeWhitespace(osi, ose); 460 461 while (osi != ose) { 462 if (*osi >= '0' && *osi <= '9') { 463 if (str.empty()) { 464 return std::make_pair(Operand(), osi); 465 } else { 466 str.push_back(*osi); 467 } 468 } else if (*osi >= 'a' && *osi <= 'z') { 469 str.push_back(*osi); 470 } else { 471 switch (*osi) { 472 default: 473 if (str.empty()) { 474 return std::make_pair(Operand(), osi); 475 } else { 476 ret.m_register = ConstString(str); 477 return std::make_pair(ret, osi); 478 } 479 case '%': 480 if (!str.empty()) { 481 return std::make_pair(Operand(), osi); 482 } 483 break; 484 } 485 } 486 ++osi; 487 } 488 489 ret.m_register = ConstString(str); 490 return std::make_pair(ret, osi); 491 } 492 493 static std::pair<Operand, llvm::StringRef::const_iterator> 494 ParseImmediate(llvm::StringRef::const_iterator osi, 495 llvm::StringRef::const_iterator ose) { 496 Operand ret; 497 ret.m_type = Operand::Type::Immediate; 498 std::string str; 499 bool is_hex = false; 500 501 osi = ConsumeWhitespace(osi, ose); 502 503 while (osi != ose) { 504 if (*osi >= '0' && *osi <= '9') { 505 str.push_back(*osi); 506 } else if (*osi >= 'a' && *osi <= 'f') { 507 if (is_hex) { 508 str.push_back(*osi); 509 } else { 510 return std::make_pair(Operand(), osi); 511 } 512 } else { 513 switch (*osi) { 514 default: 515 if (str.empty()) { 516 return std::make_pair(Operand(), osi); 517 } else { 518 ret.m_immediate = strtoull(str.c_str(), nullptr, 0); 519 return std::make_pair(ret, osi); 520 } 521 case 'x': 522 if (!str.compare("0")) { 523 is_hex = true; 524 str.push_back(*osi); 525 } else { 526 return std::make_pair(Operand(), osi); 527 } 528 break; 529 case '#': 530 case '$': 531 if (!str.empty()) { 532 return std::make_pair(Operand(), osi); 533 } 534 break; 535 case '-': 536 if (str.empty()) { 537 ret.m_negative = true; 538 } else { 539 return std::make_pair(Operand(), osi); 540 } 541 } 542 } 543 ++osi; 544 } 545 546 ret.m_immediate = strtoull(str.c_str(), nullptr, 0); 547 return std::make_pair(ret, osi); 548 } 549 550 // -0x5(%rax,%rax,2) 551 static std::pair<Operand, llvm::StringRef::const_iterator> 552 ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi, 553 llvm::StringRef::const_iterator ose) { 554 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = 555 ParseImmediate(osi, ose); 556 if (offset_and_iterator.first.IsValid()) { 557 osi = offset_and_iterator.second; 558 } 559 560 bool found = false; 561 std::tie(found, osi) = ConsumeChar(osi, '(', ose); 562 if (!found) { 563 return std::make_pair(Operand(), osi); 564 } 565 566 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = 567 ParseRegisterName(osi, ose); 568 if (base_and_iterator.first.IsValid()) { 569 osi = base_and_iterator.second; 570 } else { 571 return std::make_pair(Operand(), osi); 572 } 573 574 std::tie(found, osi) = ConsumeChar(osi, ',', ose); 575 if (!found) { 576 return std::make_pair(Operand(), osi); 577 } 578 579 std::pair<Operand, llvm::StringRef::const_iterator> index_and_iterator = 580 ParseRegisterName(osi, ose); 581 if (index_and_iterator.first.IsValid()) { 582 osi = index_and_iterator.second; 583 } else { 584 return std::make_pair(Operand(), osi); 585 } 586 587 std::tie(found, osi) = ConsumeChar(osi, ',', ose); 588 if (!found) { 589 return std::make_pair(Operand(), osi); 590 } 591 592 std::pair<Operand, llvm::StringRef::const_iterator> 593 multiplier_and_iterator = ParseImmediate(osi, ose); 594 if (index_and_iterator.first.IsValid()) { 595 osi = index_and_iterator.second; 596 } else { 597 return std::make_pair(Operand(), osi); 598 } 599 600 std::tie(found, osi) = ConsumeChar(osi, ')', ose); 601 if (!found) { 602 return std::make_pair(Operand(), osi); 603 } 604 605 Operand product; 606 product.m_type = Operand::Type::Product; 607 product.m_children.push_back(index_and_iterator.first); 608 product.m_children.push_back(multiplier_and_iterator.first); 609 610 Operand index; 611 index.m_type = Operand::Type::Sum; 612 index.m_children.push_back(base_and_iterator.first); 613 index.m_children.push_back(product); 614 615 if (offset_and_iterator.first.IsValid()) { 616 Operand offset; 617 offset.m_type = Operand::Type::Sum; 618 offset.m_children.push_back(offset_and_iterator.first); 619 offset.m_children.push_back(index); 620 621 Operand deref; 622 deref.m_type = Operand::Type::Dereference; 623 deref.m_children.push_back(offset); 624 return std::make_pair(deref, osi); 625 } else { 626 Operand deref; 627 deref.m_type = Operand::Type::Dereference; 628 deref.m_children.push_back(index); 629 return std::make_pair(deref, osi); 630 } 631 } 632 633 // -0x10(%rbp) 634 static std::pair<Operand, llvm::StringRef::const_iterator> 635 ParseIntelDerefAccess(llvm::StringRef::const_iterator osi, 636 llvm::StringRef::const_iterator ose) { 637 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = 638 ParseImmediate(osi, ose); 639 if (offset_and_iterator.first.IsValid()) { 640 osi = offset_and_iterator.second; 641 } 642 643 bool found = false; 644 std::tie(found, osi) = ConsumeChar(osi, '(', ose); 645 if (!found) { 646 return std::make_pair(Operand(), osi); 647 } 648 649 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = 650 ParseRegisterName(osi, ose); 651 if (base_and_iterator.first.IsValid()) { 652 osi = base_and_iterator.second; 653 } else { 654 return std::make_pair(Operand(), osi); 655 } 656 657 std::tie(found, osi) = ConsumeChar(osi, ')', ose); 658 if (!found) { 659 return std::make_pair(Operand(), osi); 660 } 661 662 if (offset_and_iterator.first.IsValid()) { 663 Operand offset; 664 offset.m_type = Operand::Type::Sum; 665 offset.m_children.push_back(offset_and_iterator.first); 666 offset.m_children.push_back(base_and_iterator.first); 667 668 Operand deref; 669 deref.m_type = Operand::Type::Dereference; 670 deref.m_children.push_back(offset); 671 return std::make_pair(deref, osi); 672 } else { 673 Operand deref; 674 deref.m_type = Operand::Type::Dereference; 675 deref.m_children.push_back(base_and_iterator.first); 676 return std::make_pair(deref, osi); 677 } 678 } 679 680 // [sp, #8]! 681 static std::pair<Operand, llvm::StringRef::const_iterator> 682 ParseARMOffsetAccess(llvm::StringRef::const_iterator osi, 683 llvm::StringRef::const_iterator ose) { 684 bool found = false; 685 std::tie(found, osi) = ConsumeChar(osi, '[', ose); 686 if (!found) { 687 return std::make_pair(Operand(), osi); 688 } 689 690 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = 691 ParseRegisterName(osi, ose); 692 if (base_and_iterator.first.IsValid()) { 693 osi = base_and_iterator.second; 694 } else { 695 return std::make_pair(Operand(), osi); 696 } 697 698 std::tie(found, osi) = ConsumeChar(osi, ',', ose); 699 if (!found) { 700 return std::make_pair(Operand(), osi); 701 } 702 703 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator = 704 ParseImmediate(osi, ose); 705 if (offset_and_iterator.first.IsValid()) { 706 osi = offset_and_iterator.second; 707 } 708 709 std::tie(found, osi) = ConsumeChar(osi, ']', ose); 710 if (!found) { 711 return std::make_pair(Operand(), osi); 712 } 713 714 Operand offset; 715 offset.m_type = Operand::Type::Sum; 716 offset.m_children.push_back(offset_and_iterator.first); 717 offset.m_children.push_back(base_and_iterator.first); 718 719 Operand deref; 720 deref.m_type = Operand::Type::Dereference; 721 deref.m_children.push_back(offset); 722 return std::make_pair(deref, osi); 723 } 724 725 // [sp] 726 static std::pair<Operand, llvm::StringRef::const_iterator> 727 ParseARMDerefAccess(llvm::StringRef::const_iterator osi, 728 llvm::StringRef::const_iterator ose) { 729 bool found = false; 730 std::tie(found, osi) = ConsumeChar(osi, '[', ose); 731 if (!found) { 732 return std::make_pair(Operand(), osi); 733 } 734 735 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator = 736 ParseRegisterName(osi, ose); 737 if (base_and_iterator.first.IsValid()) { 738 osi = base_and_iterator.second; 739 } else { 740 return std::make_pair(Operand(), osi); 741 } 742 743 std::tie(found, osi) = ConsumeChar(osi, ']', ose); 744 if (!found) { 745 return std::make_pair(Operand(), osi); 746 } 747 748 Operand deref; 749 deref.m_type = Operand::Type::Dereference; 750 deref.m_children.push_back(base_and_iterator.first); 751 return std::make_pair(deref, osi); 752 } 753 754 static void DumpOperand(const Operand &op, Stream &s) { 755 switch (op.m_type) { 756 case Operand::Type::Dereference: 757 s.PutCString("*"); 758 DumpOperand(op.m_children[0], s); 759 break; 760 case Operand::Type::Immediate: 761 if (op.m_negative) { 762 s.PutCString("-"); 763 } 764 s.PutCString(llvm::to_string(op.m_immediate)); 765 break; 766 case Operand::Type::Invalid: 767 s.PutCString("Invalid"); 768 break; 769 case Operand::Type::Product: 770 s.PutCString("("); 771 DumpOperand(op.m_children[0], s); 772 s.PutCString("*"); 773 DumpOperand(op.m_children[1], s); 774 s.PutCString(")"); 775 break; 776 case Operand::Type::Register: 777 s.PutCString(op.m_register.AsCString()); 778 break; 779 case Operand::Type::Sum: 780 s.PutCString("("); 781 DumpOperand(op.m_children[0], s); 782 s.PutCString("+"); 783 DumpOperand(op.m_children[1], s); 784 s.PutCString(")"); 785 break; 786 } 787 } 788 789 bool ParseOperands( 790 llvm::SmallVectorImpl<Instruction::Operand> &operands) override { 791 const char *operands_string = GetOperands(nullptr); 792 793 if (!operands_string) { 794 return false; 795 } 796 797 llvm::StringRef operands_ref(operands_string); 798 799 llvm::StringRef::const_iterator osi = operands_ref.begin(); 800 llvm::StringRef::const_iterator ose = operands_ref.end(); 801 802 while (osi != ose) { 803 Operand operand; 804 llvm::StringRef::const_iterator iter; 805 806 if ((std::tie(operand, iter) = ParseIntelIndexedAccess(osi, ose), 807 operand.IsValid()) || 808 (std::tie(operand, iter) = ParseIntelDerefAccess(osi, ose), 809 operand.IsValid()) || 810 (std::tie(operand, iter) = ParseARMOffsetAccess(osi, ose), 811 operand.IsValid()) || 812 (std::tie(operand, iter) = ParseARMDerefAccess(osi, ose), 813 operand.IsValid()) || 814 (std::tie(operand, iter) = ParseRegisterName(osi, ose), 815 operand.IsValid()) || 816 (std::tie(operand, iter) = ParseImmediate(osi, ose), 817 operand.IsValid())) { 818 osi = iter; 819 operands.push_back(operand); 820 } else { 821 return false; 822 } 823 824 std::pair<bool, llvm::StringRef::const_iterator> found_and_iter = 825 ConsumeChar(osi, ',', ose); 826 if (found_and_iter.first) { 827 osi = found_and_iter.second; 828 } 829 830 osi = ConsumeWhitespace(osi, ose); 831 } 832 833 DisassemblerSP disasm_sp = m_disasm_wp.lock(); 834 835 if (disasm_sp && operands.size() > 1) { 836 // TODO tie this into the MC Disassembler's notion of clobbers. 837 switch (disasm_sp->GetArchitecture().GetMachine()) { 838 default: 839 break; 840 case llvm::Triple::x86: 841 case llvm::Triple::x86_64: 842 operands[operands.size() - 1].m_clobbered = true; 843 break; 844 case llvm::Triple::arm: 845 operands[0].m_clobbered = true; 846 break; 847 } 848 } 849 850 if (Log *log = 851 lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)) { 852 StreamString ss; 853 854 ss.Printf("[%s] expands to %zu operands:\n", operands_string, 855 operands.size()); 856 for (const Operand &operand : operands) { 857 ss.PutCString(" "); 858 DumpOperand(operand, ss); 859 ss.PutCString("\n"); 860 } 861 862 log->PutString(ss.GetString()); 863 } 864 865 return true; 866 } 867 868 bool IsCall() override { 869 if (m_is_call == eLazyBoolCalculate) { 870 DisassemblerScope disasm(*this); 871 if (disasm) { 872 DataExtractor data; 873 if (m_opcode.GetData(data)) { 874 bool is_alternate_isa; 875 lldb::addr_t pc = m_address.GetFileAddress(); 876 877 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr = 878 GetDisasmToUse(is_alternate_isa, disasm); 879 const uint8_t *opcode_data = data.GetDataStart(); 880 const size_t opcode_data_len = data.GetByteSize(); 881 llvm::MCInst inst; 882 const size_t inst_size = 883 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); 884 if (inst_size == 0) { 885 m_is_call = eLazyBoolNo; 886 } else { 887 if (mc_disasm_ptr->IsCall(inst)) 888 m_is_call = eLazyBoolYes; 889 else 890 m_is_call = eLazyBoolNo; 891 } 892 } 893 } 894 } 895 return m_is_call == eLazyBoolYes; 896 } 897 898protected: 899 std::weak_ptr<DisassemblerLLVMC> m_disasm_wp; 900 LazyBool m_does_branch; 901 LazyBool m_has_delay_slot; 902 LazyBool m_is_call; 903 bool m_is_valid; 904 bool m_using_file_addr; 905 906private: 907 DisassemblerLLVMC::MCDisasmInstance * 908 GetDisasmToUse(bool &is_alternate_isa, DisassemblerScope &disasm) { 909 is_alternate_isa = false; 910 if (disasm) { 911 if (disasm->m_alternate_disasm_up) { 912 const AddressClass address_class = GetAddressClass(); 913 914 if (address_class == AddressClass::eCodeAlternateISA) { 915 is_alternate_isa = true; 916 return disasm->m_alternate_disasm_up.get(); 917 } 918 } 919 return disasm->m_disasm_up.get(); 920 } 921 return nullptr; 922 } 923}; 924 925std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance> 926DisassemblerLLVMC::MCDisasmInstance::Create(const char *triple, const char *cpu, 927 const char *features_str, 928 unsigned flavor, 929 DisassemblerLLVMC &owner) { 930 using Instance = std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>; 931 932 std::string Status; 933 const llvm::Target *curr_target = 934 llvm::TargetRegistry::lookupTarget(triple, Status); 935 if (!curr_target) 936 return Instance(); 937 938 std::unique_ptr<llvm::MCInstrInfo> instr_info_up( 939 curr_target->createMCInstrInfo()); 940 if (!instr_info_up) 941 return Instance(); 942 943 std::unique_ptr<llvm::MCRegisterInfo> reg_info_up( 944 curr_target->createMCRegInfo(triple)); 945 if (!reg_info_up) 946 return Instance(); 947 948 std::unique_ptr<llvm::MCSubtargetInfo> subtarget_info_up( 949 curr_target->createMCSubtargetInfo(triple, cpu, features_str)); 950 if (!subtarget_info_up) 951 return Instance(); 952 953 llvm::MCTargetOptions MCOptions; 954 std::unique_ptr<llvm::MCAsmInfo> asm_info_up( 955 curr_target->createMCAsmInfo(*reg_info_up, triple, MCOptions)); 956 if (!asm_info_up) 957 return Instance(); 958 959 std::unique_ptr<llvm::MCContext> context_up( 960 new llvm::MCContext(asm_info_up.get(), reg_info_up.get(), nullptr)); 961 if (!context_up) 962 return Instance(); 963 964 std::unique_ptr<llvm::MCDisassembler> disasm_up( 965 curr_target->createMCDisassembler(*subtarget_info_up, *context_up)); 966 if (!disasm_up) 967 return Instance(); 968 969 std::unique_ptr<llvm::MCRelocationInfo> rel_info_up( 970 curr_target->createMCRelocationInfo(triple, *context_up)); 971 if (!rel_info_up) 972 return Instance(); 973 974 std::unique_ptr<llvm::MCSymbolizer> symbolizer_up( 975 curr_target->createMCSymbolizer( 976 triple, nullptr, DisassemblerLLVMC::SymbolLookupCallback, &owner, 977 context_up.get(), std::move(rel_info_up))); 978 disasm_up->setSymbolizer(std::move(symbolizer_up)); 979 980 unsigned asm_printer_variant = 981 flavor == ~0U ? asm_info_up->getAssemblerDialect() : flavor; 982 983 std::unique_ptr<llvm::MCInstPrinter> instr_printer_up( 984 curr_target->createMCInstPrinter(llvm::Triple{triple}, 985 asm_printer_variant, *asm_info_up, 986 *instr_info_up, *reg_info_up)); 987 if (!instr_printer_up) 988 return Instance(); 989 990 return Instance( 991 new MCDisasmInstance(std::move(instr_info_up), std::move(reg_info_up), 992 std::move(subtarget_info_up), std::move(asm_info_up), 993 std::move(context_up), std::move(disasm_up), 994 std::move(instr_printer_up))); 995} 996 997DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance( 998 std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up, 999 std::unique_ptr<llvm::MCRegisterInfo> &®_info_up, 1000 std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up, 1001 std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up, 1002 std::unique_ptr<llvm::MCContext> &&context_up, 1003 std::unique_ptr<llvm::MCDisassembler> &&disasm_up, 1004 std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up) 1005 : m_instr_info_up(std::move(instr_info_up)), 1006 m_reg_info_up(std::move(reg_info_up)), 1007 m_subtarget_info_up(std::move(subtarget_info_up)), 1008 m_asm_info_up(std::move(asm_info_up)), 1009 m_context_up(std::move(context_up)), m_disasm_up(std::move(disasm_up)), 1010 m_instr_printer_up(std::move(instr_printer_up)) { 1011 assert(m_instr_info_up && m_reg_info_up && m_subtarget_info_up && 1012 m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up); 1013} 1014 1015uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst( 1016 const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc, 1017 llvm::MCInst &mc_inst) const { 1018 llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len); 1019 llvm::MCDisassembler::DecodeStatus status; 1020 1021 uint64_t new_inst_size; 1022 status = m_disasm_up->getInstruction(mc_inst, new_inst_size, data, pc, 1023 llvm::nulls()); 1024 if (status == llvm::MCDisassembler::Success) 1025 return new_inst_size; 1026 else 1027 return 0; 1028} 1029 1030void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst( 1031 llvm::MCInst &mc_inst, std::string &inst_string, 1032 std::string &comments_string) { 1033 llvm::raw_string_ostream inst_stream(inst_string); 1034 llvm::raw_string_ostream comments_stream(comments_string); 1035 1036 m_instr_printer_up->setCommentStream(comments_stream); 1037 m_instr_printer_up->printInst(&mc_inst, 0, llvm::StringRef(), 1038 *m_subtarget_info_up, inst_stream); 1039 m_instr_printer_up->setCommentStream(llvm::nulls()); 1040 comments_stream.flush(); 1041 1042 static std::string g_newlines("\r\n"); 1043 1044 for (size_t newline_pos = 0; 1045 (newline_pos = comments_string.find_first_of(g_newlines, newline_pos)) != 1046 comments_string.npos; 1047 /**/) { 1048 comments_string.replace(comments_string.begin() + newline_pos, 1049 comments_string.begin() + newline_pos + 1, 1, ' '); 1050 } 1051} 1052 1053void DisassemblerLLVMC::MCDisasmInstance::SetStyle( 1054 bool use_hex_immed, HexImmediateStyle hex_style) { 1055 m_instr_printer_up->setPrintImmHex(use_hex_immed); 1056 switch (hex_style) { 1057 case eHexStyleC: 1058 m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::C); 1059 break; 1060 case eHexStyleAsm: 1061 m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::Asm); 1062 break; 1063 } 1064} 1065 1066bool DisassemblerLLVMC::MCDisasmInstance::CanBranch( 1067 llvm::MCInst &mc_inst) const { 1068 return m_instr_info_up->get(mc_inst.getOpcode()) 1069 .mayAffectControlFlow(mc_inst, *m_reg_info_up); 1070} 1071 1072bool DisassemblerLLVMC::MCDisasmInstance::HasDelaySlot( 1073 llvm::MCInst &mc_inst) const { 1074 return m_instr_info_up->get(mc_inst.getOpcode()).hasDelaySlot(); 1075} 1076 1077bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const { 1078 return m_instr_info_up->get(mc_inst.getOpcode()).isCall(); 1079} 1080 1081DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch, 1082 const char *flavor_string) 1083 : Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr), 1084 m_data_from_file(false) { 1085 if (!FlavorValidForArchSpec(arch, m_flavor.c_str())) { 1086 m_flavor.assign("default"); 1087 } 1088 1089 unsigned flavor = ~0U; 1090 llvm::Triple triple = arch.GetTriple(); 1091 1092 // So far the only supported flavor is "intel" on x86. The base class will 1093 // set this correctly coming in. 1094 if (triple.getArch() == llvm::Triple::x86 || 1095 triple.getArch() == llvm::Triple::x86_64) { 1096 if (m_flavor == "intel") { 1097 flavor = 1; 1098 } else if (m_flavor == "att") { 1099 flavor = 0; 1100 } 1101 } 1102 1103 ArchSpec thumb_arch(arch); 1104 if (triple.getArch() == llvm::Triple::arm) { 1105 std::string thumb_arch_name(thumb_arch.GetTriple().getArchName().str()); 1106 // Replace "arm" with "thumb" so we get all thumb variants correct 1107 if (thumb_arch_name.size() > 3) { 1108 thumb_arch_name.erase(0, 3); 1109 thumb_arch_name.insert(0, "thumb"); 1110 } else { 1111 thumb_arch_name = "thumbv8.2a"; 1112 } 1113 thumb_arch.GetTriple().setArchName(llvm::StringRef(thumb_arch_name)); 1114 } 1115 1116 // If no sub architecture specified then use the most recent arm architecture 1117 // so the disassembler will return all instruction. Without it we will see a 1118 // lot of unknow opcode in case the code uses instructions which are not 1119 // available in the oldest arm version (used when no sub architecture is 1120 // specified) 1121 if (triple.getArch() == llvm::Triple::arm && 1122 triple.getSubArch() == llvm::Triple::NoSubArch) 1123 triple.setArchName("armv8.2a"); 1124 1125 std::string features_str = ""; 1126 const char *triple_str = triple.getTriple().c_str(); 1127 1128 // ARM Cortex M0-M7 devices only execute thumb instructions 1129 if (arch.IsAlwaysThumbInstructions()) { 1130 triple_str = thumb_arch.GetTriple().getTriple().c_str(); 1131 features_str += "+fp-armv8,"; 1132 } 1133 1134 const char *cpu = ""; 1135 1136 switch (arch.GetCore()) { 1137 case ArchSpec::eCore_mips32: 1138 case ArchSpec::eCore_mips32el: 1139 cpu = "mips32"; 1140 break; 1141 case ArchSpec::eCore_mips32r2: 1142 case ArchSpec::eCore_mips32r2el: 1143 cpu = "mips32r2"; 1144 break; 1145 case ArchSpec::eCore_mips32r3: 1146 case ArchSpec::eCore_mips32r3el: 1147 cpu = "mips32r3"; 1148 break; 1149 case ArchSpec::eCore_mips32r5: 1150 case ArchSpec::eCore_mips32r5el: 1151 cpu = "mips32r5"; 1152 break; 1153 case ArchSpec::eCore_mips32r6: 1154 case ArchSpec::eCore_mips32r6el: 1155 cpu = "mips32r6"; 1156 break; 1157 case ArchSpec::eCore_mips64: 1158 case ArchSpec::eCore_mips64el: 1159 cpu = "mips64"; 1160 break; 1161 case ArchSpec::eCore_mips64r2: 1162 case ArchSpec::eCore_mips64r2el: 1163 cpu = "mips64r2"; 1164 break; 1165 case ArchSpec::eCore_mips64r3: 1166 case ArchSpec::eCore_mips64r3el: 1167 cpu = "mips64r3"; 1168 break; 1169 case ArchSpec::eCore_mips64r5: 1170 case ArchSpec::eCore_mips64r5el: 1171 cpu = "mips64r5"; 1172 break; 1173 case ArchSpec::eCore_mips64r6: 1174 case ArchSpec::eCore_mips64r6el: 1175 cpu = "mips64r6"; 1176 break; 1177 default: 1178 cpu = ""; 1179 break; 1180 } 1181 1182 if (arch.IsMIPS()) { 1183 uint32_t arch_flags = arch.GetFlags(); 1184 if (arch_flags & ArchSpec::eMIPSAse_msa) 1185 features_str += "+msa,"; 1186 if (arch_flags & ArchSpec::eMIPSAse_dsp) 1187 features_str += "+dsp,"; 1188 if (arch_flags & ArchSpec::eMIPSAse_dspr2) 1189 features_str += "+dspr2,"; 1190 } 1191 1192 // If any AArch64 variant, enable the ARMv8.5 ISA with SVE extensions so we 1193 // can disassemble newer instructions. 1194 if (triple.getArch() == llvm::Triple::aarch64 || 1195 triple.getArch() == llvm::Triple::aarch64_32) 1196 features_str += "+v8.5a,+sve2"; 1197 1198 if ((triple.getArch() == llvm::Triple::aarch64 || 1199 triple.getArch() == llvm::Triple::aarch64_32) 1200 && triple.getVendor() == llvm::Triple::Apple) { 1201 cpu = "apple-latest"; 1202 } 1203 1204 // We use m_disasm_up.get() to tell whether we are valid or not, so if this 1205 // isn't good for some reason, we won't be valid and FindPlugin will fail and 1206 // we won't get used. 1207 m_disasm_up = MCDisasmInstance::Create(triple_str, cpu, features_str.c_str(), 1208 flavor, *this); 1209 1210 llvm::Triple::ArchType llvm_arch = triple.getArch(); 1211 1212 // For arm CPUs that can execute arm or thumb instructions, also create a 1213 // thumb instruction disassembler. 1214 if (llvm_arch == llvm::Triple::arm) { 1215 std::string thumb_triple(thumb_arch.GetTriple().getTriple()); 1216 m_alternate_disasm_up = 1217 MCDisasmInstance::Create(thumb_triple.c_str(), "", features_str.c_str(), 1218 flavor, *this); 1219 if (!m_alternate_disasm_up) 1220 m_disasm_up.reset(); 1221 1222 } else if (arch.IsMIPS()) { 1223 /* Create alternate disassembler for MIPS16 and microMIPS */ 1224 uint32_t arch_flags = arch.GetFlags(); 1225 if (arch_flags & ArchSpec::eMIPSAse_mips16) 1226 features_str += "+mips16,"; 1227 else if (arch_flags & ArchSpec::eMIPSAse_micromips) 1228 features_str += "+micromips,"; 1229 1230 m_alternate_disasm_up = MCDisasmInstance::Create( 1231 triple_str, cpu, features_str.c_str(), flavor, *this); 1232 if (!m_alternate_disasm_up) 1233 m_disasm_up.reset(); 1234 } 1235} 1236 1237DisassemblerLLVMC::~DisassemblerLLVMC() = default; 1238 1239Disassembler *DisassemblerLLVMC::CreateInstance(const ArchSpec &arch, 1240 const char *flavor) { 1241 if (arch.GetTriple().getArch() != llvm::Triple::UnknownArch) { 1242 std::unique_ptr<DisassemblerLLVMC> disasm_up( 1243 new DisassemblerLLVMC(arch, flavor)); 1244 1245 if (disasm_up.get() && disasm_up->IsValid()) 1246 return disasm_up.release(); 1247 } 1248 return nullptr; 1249} 1250 1251size_t DisassemblerLLVMC::DecodeInstructions(const Address &base_addr, 1252 const DataExtractor &data, 1253 lldb::offset_t data_offset, 1254 size_t num_instructions, 1255 bool append, bool data_from_file) { 1256 if (!append) 1257 m_instruction_list.Clear(); 1258 1259 if (!IsValid()) 1260 return 0; 1261 1262 m_data_from_file = data_from_file; 1263 uint32_t data_cursor = data_offset; 1264 const size_t data_byte_size = data.GetByteSize(); 1265 uint32_t instructions_parsed = 0; 1266 Address inst_addr(base_addr); 1267 1268 while (data_cursor < data_byte_size && 1269 instructions_parsed < num_instructions) { 1270 1271 AddressClass address_class = AddressClass::eCode; 1272 1273 if (m_alternate_disasm_up) 1274 address_class = inst_addr.GetAddressClass(); 1275 1276 InstructionSP inst_sp( 1277 new InstructionLLVMC(*this, inst_addr, address_class)); 1278 1279 if (!inst_sp) 1280 break; 1281 1282 uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor); 1283 1284 if (inst_size == 0) 1285 break; 1286 1287 m_instruction_list.Append(inst_sp); 1288 data_cursor += inst_size; 1289 inst_addr.Slide(inst_size); 1290 instructions_parsed++; 1291 } 1292 1293 return data_cursor - data_offset; 1294} 1295 1296void DisassemblerLLVMC::Initialize() { 1297 PluginManager::RegisterPlugin(GetPluginNameStatic(), 1298 "Disassembler that uses LLVM MC to disassemble " 1299 "i386, x86_64, ARM, and ARM64.", 1300 CreateInstance); 1301 1302 llvm::InitializeAllTargetInfos(); 1303 llvm::InitializeAllTargetMCs(); 1304 llvm::InitializeAllAsmParsers(); 1305 llvm::InitializeAllDisassemblers(); 1306} 1307 1308void DisassemblerLLVMC::Terminate() { 1309 PluginManager::UnregisterPlugin(CreateInstance); 1310} 1311 1312ConstString DisassemblerLLVMC::GetPluginNameStatic() { 1313 static ConstString g_name("llvm-mc"); 1314 return g_name; 1315} 1316 1317int DisassemblerLLVMC::OpInfoCallback(void *disassembler, uint64_t pc, 1318 uint64_t offset, uint64_t size, 1319 int tag_type, void *tag_bug) { 1320 return static_cast<DisassemblerLLVMC *>(disassembler) 1321 ->OpInfo(pc, offset, size, tag_type, tag_bug); 1322} 1323 1324const char *DisassemblerLLVMC::SymbolLookupCallback(void *disassembler, 1325 uint64_t value, 1326 uint64_t *type, uint64_t pc, 1327 const char **name) { 1328 return static_cast<DisassemblerLLVMC *>(disassembler) 1329 ->SymbolLookup(value, type, pc, name); 1330} 1331 1332bool DisassemblerLLVMC::FlavorValidForArchSpec( 1333 const lldb_private::ArchSpec &arch, const char *flavor) { 1334 llvm::Triple triple = arch.GetTriple(); 1335 if (flavor == nullptr || strcmp(flavor, "default") == 0) 1336 return true; 1337 1338 if (triple.getArch() == llvm::Triple::x86 || 1339 triple.getArch() == llvm::Triple::x86_64) { 1340 return strcmp(flavor, "intel") == 0 || strcmp(flavor, "att") == 0; 1341 } else 1342 return false; 1343} 1344 1345bool DisassemblerLLVMC::IsValid() const { return m_disasm_up.operator bool(); } 1346 1347int DisassemblerLLVMC::OpInfo(uint64_t PC, uint64_t Offset, uint64_t Size, 1348 int tag_type, void *tag_bug) { 1349 switch (tag_type) { 1350 default: 1351 break; 1352 case 1: 1353 memset(tag_bug, 0, sizeof(::LLVMOpInfo1)); 1354 break; 1355 } 1356 return 0; 1357} 1358 1359const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr, 1360 uint64_t pc, const char **name) { 1361 if (*type_ptr) { 1362 if (m_exe_ctx && m_inst) { 1363 // std::string remove_this_prior_to_checkin; 1364 Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr; 1365 Address value_so_addr; 1366 Address pc_so_addr; 1367 if (m_inst->UsingFileAddress()) { 1368 ModuleSP module_sp(m_inst->GetAddress().GetModule()); 1369 if (module_sp) { 1370 module_sp->ResolveFileAddress(value, value_so_addr); 1371 module_sp->ResolveFileAddress(pc, pc_so_addr); 1372 } 1373 } else if (target && !target->GetSectionLoadList().IsEmpty()) { 1374 target->GetSectionLoadList().ResolveLoadAddress(value, value_so_addr); 1375 target->GetSectionLoadList().ResolveLoadAddress(pc, pc_so_addr); 1376 } 1377 1378 SymbolContext sym_ctx; 1379 const SymbolContextItem resolve_scope = 1380 eSymbolContextFunction | eSymbolContextSymbol; 1381 if (pc_so_addr.IsValid() && pc_so_addr.GetModule()) { 1382 pc_so_addr.GetModule()->ResolveSymbolContextForAddress( 1383 pc_so_addr, resolve_scope, sym_ctx); 1384 } 1385 1386 if (value_so_addr.IsValid() && value_so_addr.GetSection()) { 1387 StreamString ss; 1388 1389 bool format_omitting_current_func_name = false; 1390 if (sym_ctx.symbol || sym_ctx.function) { 1391 AddressRange range; 1392 if (sym_ctx.GetAddressRange(resolve_scope, 0, false, range) && 1393 range.GetBaseAddress().IsValid() && 1394 range.ContainsLoadAddress(value_so_addr, target)) { 1395 format_omitting_current_func_name = true; 1396 } 1397 } 1398 1399 // If the "value" address (the target address we're symbolicating) is 1400 // inside the same SymbolContext as the current instruction pc 1401 // (pc_so_addr), don't print the full function name - just print it 1402 // with DumpStyleNoFunctionName style, e.g. "<+36>". 1403 if (format_omitting_current_func_name) { 1404 value_so_addr.Dump(&ss, target, Address::DumpStyleNoFunctionName, 1405 Address::DumpStyleSectionNameOffset); 1406 } else { 1407 value_so_addr.Dump( 1408 &ss, target, 1409 Address::DumpStyleResolvedDescriptionNoFunctionArguments, 1410 Address::DumpStyleSectionNameOffset); 1411 } 1412 1413 if (!ss.GetString().empty()) { 1414 // If Address::Dump returned a multi-line description, most commonly 1415 // seen when we have multiple levels of inlined functions at an 1416 // address, only show the first line. 1417 std::string str = ss.GetString(); 1418 size_t first_eol_char = str.find_first_of("\r\n"); 1419 if (first_eol_char != std::string::npos) { 1420 str.erase(first_eol_char); 1421 } 1422 m_inst->AppendComment(str); 1423 } 1424 } 1425 } 1426 } 1427 1428 *type_ptr = LLVMDisassembler_ReferenceType_InOut_None; 1429 *name = nullptr; 1430 return nullptr; 1431} 1432 1433// PluginInterface protocol 1434ConstString DisassemblerLLVMC::GetPluginName() { return GetPluginNameStatic(); } 1435 1436uint32_t DisassemblerLLVMC::GetPluginVersion() { return 1; } 1437