1//===-- sanitizer_symbolizer_libcdep.cc -----------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file is shared between AddressSanitizer and ThreadSanitizer 11// run-time libraries. 12//===----------------------------------------------------------------------===// 13 14#include "sanitizer_allocator_internal.h" 15#include "sanitizer_internal_defs.h" 16#include "sanitizer_symbolizer_internal.h" 17 18namespace __sanitizer { 19 20Symbolizer *Symbolizer::GetOrInit() { 21 SpinMutexLock l(&init_mu_); 22 if (symbolizer_) 23 return symbolizer_; 24 symbolizer_ = PlatformInit(); 25 CHECK(symbolizer_); 26 return symbolizer_; 27} 28 29// See sanitizer_symbolizer_markup.cc. 30#if !SANITIZER_SYMBOLIZER_MARKUP 31 32const char *ExtractToken(const char *str, const char *delims, char **result) { 33 uptr prefix_len = internal_strcspn(str, delims); 34 *result = (char*)InternalAlloc(prefix_len + 1); 35 internal_memcpy(*result, str, prefix_len); 36 (*result)[prefix_len] = '\0'; 37 const char *prefix_end = str + prefix_len; 38 if (*prefix_end != '\0') prefix_end++; 39 return prefix_end; 40} 41 42const char *ExtractInt(const char *str, const char *delims, int *result) { 43 char *buff; 44 const char *ret = ExtractToken(str, delims, &buff); 45 if (buff != 0) { 46 *result = (int)internal_atoll(buff); 47 } 48 InternalFree(buff); 49 return ret; 50} 51 52const char *ExtractUptr(const char *str, const char *delims, uptr *result) { 53 char *buff; 54 const char *ret = ExtractToken(str, delims, &buff); 55 if (buff != 0) { 56 *result = (uptr)internal_atoll(buff); 57 } 58 InternalFree(buff); 59 return ret; 60} 61 62const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter, 63 char **result) { 64 const char *found_delimiter = internal_strstr(str, delimiter); 65 uptr prefix_len = 66 found_delimiter ? found_delimiter - str : internal_strlen(str); 67 *result = (char *)InternalAlloc(prefix_len + 1); 68 internal_memcpy(*result, str, prefix_len); 69 (*result)[prefix_len] = '\0'; 70 const char *prefix_end = str + prefix_len; 71 if (*prefix_end != '\0') prefix_end += internal_strlen(delimiter); 72 return prefix_end; 73} 74 75SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) { 76 BlockingMutexLock l(&mu_); 77 const char *module_name; 78 uptr module_offset; 79 ModuleArch arch; 80 SymbolizedStack *res = SymbolizedStack::New(addr); 81 if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset, 82 &arch)) 83 return res; 84 // Always fill data about module name and offset. 85 res->info.FillModuleInfo(module_name, module_offset, arch); 86 for (auto &tool : tools_) { 87 SymbolizerScope sym_scope(this); 88 if (tool.SymbolizePC(addr, res)) { 89 return res; 90 } 91 } 92 return res; 93} 94 95bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) { 96 BlockingMutexLock l(&mu_); 97 const char *module_name; 98 uptr module_offset; 99 ModuleArch arch; 100 if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset, 101 &arch)) 102 return false; 103 info->Clear(); 104 info->module = internal_strdup(module_name); 105 info->module_offset = module_offset; 106 info->module_arch = arch; 107 for (auto &tool : tools_) { 108 SymbolizerScope sym_scope(this); 109 if (tool.SymbolizeData(addr, info)) { 110 return true; 111 } 112 } 113 return true; 114} 115 116bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name, 117 uptr *module_address) { 118 BlockingMutexLock l(&mu_); 119 const char *internal_module_name = nullptr; 120 ModuleArch arch; 121 if (!FindModuleNameAndOffsetForAddress(pc, &internal_module_name, 122 module_address, &arch)) 123 return false; 124 125 if (module_name) 126 *module_name = module_names_.GetOwnedCopy(internal_module_name); 127 return true; 128} 129 130void Symbolizer::Flush() { 131 BlockingMutexLock l(&mu_); 132 for (auto &tool : tools_) { 133 SymbolizerScope sym_scope(this); 134 tool.Flush(); 135 } 136} 137 138const char *Symbolizer::Demangle(const char *name) { 139 BlockingMutexLock l(&mu_); 140 for (auto &tool : tools_) { 141 SymbolizerScope sym_scope(this); 142 if (const char *demangled = tool.Demangle(name)) 143 return demangled; 144 } 145 return PlatformDemangle(name); 146} 147 148bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address, 149 const char **module_name, 150 uptr *module_offset, 151 ModuleArch *module_arch) { 152 const LoadedModule *module = FindModuleForAddress(address); 153 if (module == nullptr) 154 return false; 155 *module_name = module->full_name(); 156 *module_offset = address - module->base_address(); 157 *module_arch = module->arch(); 158 return true; 159} 160 161void Symbolizer::RefreshModules() { 162 modules_.init(); 163 fallback_modules_.fallbackInit(); 164 RAW_CHECK(modules_.size() > 0); 165 modules_fresh_ = true; 166} 167 168static const LoadedModule *SearchForModule(const ListOfModules &modules, 169 uptr address) { 170 for (uptr i = 0; i < modules.size(); i++) { 171 if (modules[i].containsAddress(address)) { 172 return &modules[i]; 173 } 174 } 175 return nullptr; 176} 177 178const LoadedModule *Symbolizer::FindModuleForAddress(uptr address) { 179 bool modules_were_reloaded = false; 180 if (!modules_fresh_) { 181 RefreshModules(); 182 modules_were_reloaded = true; 183 } 184 const LoadedModule *module = SearchForModule(modules_, address); 185 if (module) return module; 186 187 // dlopen/dlclose interceptors invalidate the module list, but when 188 // interception is disabled, we need to retry if the lookup fails in 189 // case the module list changed. 190#if !SANITIZER_INTERCEPT_DLOPEN_DLCLOSE 191 if (!modules_were_reloaded) { 192 RefreshModules(); 193 module = SearchForModule(modules_, address); 194 if (module) return module; 195 } 196#endif 197 198 if (fallback_modules_.size()) { 199 module = SearchForModule(fallback_modules_, address); 200 } 201 return module; 202} 203 204// For now we assume the following protocol: 205// For each request of the form 206// <module_name> <module_offset> 207// passed to STDIN, external symbolizer prints to STDOUT response: 208// <function_name> 209// <file_name>:<line_number>:<column_number> 210// <function_name> 211// <file_name>:<line_number>:<column_number> 212// ... 213// <empty line> 214class LLVMSymbolizerProcess : public SymbolizerProcess { 215 public: 216 explicit LLVMSymbolizerProcess(const char *path) : SymbolizerProcess(path) {} 217 218 private: 219 bool ReachedEndOfOutput(const char *buffer, uptr length) const override { 220 // Empty line marks the end of llvm-symbolizer output. 221 return length >= 2 && buffer[length - 1] == '\n' && 222 buffer[length - 2] == '\n'; 223 } 224 225 // When adding a new architecture, don't forget to also update 226 // script/asan_symbolize.py and sanitizer_common.h. 227 void GetArgV(const char *path_to_binary, 228 const char *(&argv)[kArgVMax]) const override { 229#if defined(__x86_64h__) 230 const char* const kSymbolizerArch = "--default-arch=x86_64h"; 231#elif defined(__x86_64__) 232 const char* const kSymbolizerArch = "--default-arch=x86_64"; 233#elif defined(__i386__) 234 const char* const kSymbolizerArch = "--default-arch=i386"; 235#elif defined(__aarch64__) 236 const char* const kSymbolizerArch = "--default-arch=arm64"; 237#elif defined(__arm__) 238 const char* const kSymbolizerArch = "--default-arch=arm"; 239#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 240 const char* const kSymbolizerArch = "--default-arch=powerpc64"; 241#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 242 const char* const kSymbolizerArch = "--default-arch=powerpc64le"; 243#elif defined(__s390x__) 244 const char* const kSymbolizerArch = "--default-arch=s390x"; 245#elif defined(__s390__) 246 const char* const kSymbolizerArch = "--default-arch=s390"; 247#else 248 const char* const kSymbolizerArch = "--default-arch=unknown"; 249#endif 250 251 const char *const inline_flag = common_flags()->symbolize_inline_frames 252 ? "--inlining=true" 253 : "--inlining=false"; 254 int i = 0; 255 argv[i++] = path_to_binary; 256 argv[i++] = inline_flag; 257 argv[i++] = kSymbolizerArch; 258 argv[i++] = nullptr; 259 } 260}; 261 262LLVMSymbolizer::LLVMSymbolizer(const char *path, LowLevelAllocator *allocator) 263 : symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {} 264 265// Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on 266// Windows, so extract tokens from the right hand side first. The column info is 267// also optional. 268static const char *ParseFileLineInfo(AddressInfo *info, const char *str) { 269 char *file_line_info = 0; 270 str = ExtractToken(str, "\n", &file_line_info); 271 CHECK(file_line_info); 272 273 if (uptr size = internal_strlen(file_line_info)) { 274 char *back = file_line_info + size - 1; 275 for (int i = 0; i < 2; ++i) { 276 while (back > file_line_info && IsDigit(*back)) --back; 277 if (*back != ':' || !IsDigit(back[1])) break; 278 info->column = info->line; 279 info->line = internal_atoll(back + 1); 280 // Truncate the string at the colon to keep only filename. 281 *back = '\0'; 282 --back; 283 } 284 ExtractToken(file_line_info, "", &info->file); 285 } 286 287 InternalFree(file_line_info); 288 return str; 289} 290 291// Parses one or more two-line strings in the following format: 292// <function_name> 293// <file_name>:<line_number>[:<column_number>] 294// Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of 295// them use the same output format. 296void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) { 297 bool top_frame = true; 298 SymbolizedStack *last = res; 299 while (true) { 300 char *function_name = 0; 301 str = ExtractToken(str, "\n", &function_name); 302 CHECK(function_name); 303 if (function_name[0] == '\0') { 304 // There are no more frames. 305 InternalFree(function_name); 306 break; 307 } 308 SymbolizedStack *cur; 309 if (top_frame) { 310 cur = res; 311 top_frame = false; 312 } else { 313 cur = SymbolizedStack::New(res->info.address); 314 cur->info.FillModuleInfo(res->info.module, res->info.module_offset, 315 res->info.module_arch); 316 last->next = cur; 317 last = cur; 318 } 319 320 AddressInfo *info = &cur->info; 321 info->function = function_name; 322 str = ParseFileLineInfo(info, str); 323 324 // Functions and filenames can be "??", in which case we write 0 325 // to address info to mark that names are unknown. 326 if (0 == internal_strcmp(info->function, "??")) { 327 InternalFree(info->function); 328 info->function = 0; 329 } 330 if (0 == internal_strcmp(info->file, "??")) { 331 InternalFree(info->file); 332 info->file = 0; 333 } 334 } 335} 336 337// Parses a two-line string in the following format: 338// <symbol_name> 339// <start_address> <size> 340// Used by LLVMSymbolizer and InternalSymbolizer. 341void ParseSymbolizeDataOutput(const char *str, DataInfo *info) { 342 str = ExtractToken(str, "\n", &info->name); 343 str = ExtractUptr(str, " ", &info->start); 344 str = ExtractUptr(str, "\n", &info->size); 345} 346 347bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) { 348 AddressInfo *info = &stack->info; 349 const char *buf = FormatAndSendCommand( 350 /*is_data*/ false, info->module, info->module_offset, info->module_arch); 351 if (buf) { 352 ParseSymbolizePCOutput(buf, stack); 353 return true; 354 } 355 return false; 356} 357 358bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) { 359 const char *buf = FormatAndSendCommand( 360 /*is_data*/ true, info->module, info->module_offset, info->module_arch); 361 if (buf) { 362 ParseSymbolizeDataOutput(buf, info); 363 info->start += (addr - info->module_offset); // Add the base address. 364 return true; 365 } 366 return false; 367} 368 369const char *LLVMSymbolizer::FormatAndSendCommand(bool is_data, 370 const char *module_name, 371 uptr module_offset, 372 ModuleArch arch) { 373 CHECK(module_name); 374 const char *is_data_str = is_data ? "DATA " : ""; 375 if (arch == kModuleArchUnknown) { 376 if (internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n", is_data_str, 377 module_name, 378 module_offset) >= static_cast<int>(kBufferSize)) { 379 Report("WARNING: Command buffer too small"); 380 return nullptr; 381 } 382 } else { 383 if (internal_snprintf(buffer_, kBufferSize, "%s\"%s:%s\" 0x%zx\n", 384 is_data_str, module_name, ModuleArchToString(arch), 385 module_offset) >= static_cast<int>(kBufferSize)) { 386 Report("WARNING: Command buffer too small"); 387 return nullptr; 388 } 389 } 390 return symbolizer_process_->SendCommand(buffer_); 391} 392 393SymbolizerProcess::SymbolizerProcess(const char *path, bool use_forkpty) 394 : path_(path), 395 input_fd_(kInvalidFd), 396 output_fd_(kInvalidFd), 397 times_restarted_(0), 398 failed_to_start_(false), 399 reported_invalid_path_(false), 400 use_forkpty_(use_forkpty) { 401 CHECK(path_); 402 CHECK_NE(path_[0], '\0'); 403} 404 405static bool IsSameModule(const char* path) { 406 if (const char* ProcessName = GetProcessName()) { 407 if (const char* SymbolizerName = StripModuleName(path)) { 408 return !internal_strcmp(ProcessName, SymbolizerName); 409 } 410 } 411 return false; 412} 413 414const char *SymbolizerProcess::SendCommand(const char *command) { 415 if (failed_to_start_) 416 return nullptr; 417 if (IsSameModule(path_)) { 418 Report("WARNING: Symbolizer was blocked from starting itself!\n"); 419 failed_to_start_ = true; 420 return nullptr; 421 } 422 for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) { 423 // Start or restart symbolizer if we failed to send command to it. 424 if (const char *res = SendCommandImpl(command)) 425 return res; 426 Restart(); 427 } 428 if (!failed_to_start_) { 429 Report("WARNING: Failed to use and restart external symbolizer!\n"); 430 failed_to_start_ = true; 431 } 432 return 0; 433} 434 435const char *SymbolizerProcess::SendCommandImpl(const char *command) { 436 if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd) 437 return 0; 438 if (!WriteToSymbolizer(command, internal_strlen(command))) 439 return 0; 440 if (!ReadFromSymbolizer(buffer_, kBufferSize)) 441 return 0; 442 return buffer_; 443} 444 445bool SymbolizerProcess::Restart() { 446 if (input_fd_ != kInvalidFd) 447 CloseFile(input_fd_); 448 if (output_fd_ != kInvalidFd) 449 CloseFile(output_fd_); 450 return StartSymbolizerSubprocess(); 451} 452 453bool SymbolizerProcess::ReadFromSymbolizer(char *buffer, uptr max_length) { 454 if (max_length == 0) 455 return true; 456 uptr read_len = 0; 457 while (true) { 458 uptr just_read = 0; 459 bool success = ReadFromFile(input_fd_, buffer + read_len, 460 max_length - read_len - 1, &just_read); 461 // We can't read 0 bytes, as we don't expect external symbolizer to close 462 // its stdout. 463 if (!success || just_read == 0) { 464 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_); 465 return false; 466 } 467 read_len += just_read; 468 if (ReachedEndOfOutput(buffer, read_len)) 469 break; 470 if (read_len + 1 == max_length) { 471 Report("WARNING: Symbolizer buffer too small\n"); 472 read_len = 0; 473 break; 474 } 475 } 476 buffer[read_len] = '\0'; 477 return true; 478} 479 480bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) { 481 if (length == 0) 482 return true; 483 uptr write_len = 0; 484 bool success = WriteToFile(output_fd_, buffer, length, &write_len); 485 if (!success || write_len != length) { 486 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_); 487 return false; 488 } 489 return true; 490} 491 492#endif // !SANITIZER_SYMBOLIZER_MARKUP 493 494} // namespace __sanitizer 495