1//===-- sanitizer_symbolizer.h ----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Symbolizer is used by sanitizers to map instruction address to a location in
10// source code at run-time. Symbolizer either uses __sanitizer_symbolize_*
11// defined in the program, or (if they are missing) tries to find and
12// launch "llvm-symbolizer" commandline tool in a separate process and
13// communicate with it.
14//
15// Generally we should try to avoid calling system library functions during
16// symbolization (and use their replacements from sanitizer_libc.h instead).
17//===----------------------------------------------------------------------===//
18#ifndef SANITIZER_SYMBOLIZER_H
19#define SANITIZER_SYMBOLIZER_H
20
21#include "sanitizer_common.h"
22#include "sanitizer_mutex.h"
23#include "sanitizer_vector.h"
24
25namespace __sanitizer {
26
27struct AddressInfo {
28  // Owns all the string members. Storage for them is
29  // (de)allocated using sanitizer internal allocator.
30  uptr address;
31
32  char *module;
33  uptr module_offset;
34  ModuleArch module_arch;
35  u8 uuid[kModuleUUIDSize];
36  uptr uuid_size;
37
38  static const uptr kUnknown = ~(uptr)0;
39  char *function;
40  uptr function_offset;
41
42  char *file;
43  int line;
44  int column;
45
46  AddressInfo();
47  // Deletes all strings and resets all fields.
48  void Clear();
49  void FillModuleInfo(const char *mod_name, uptr mod_offset, ModuleArch arch);
50  void FillModuleInfo(const LoadedModule &mod);
51  uptr module_base() const { return address - module_offset; }
52};
53
54// Linked list of symbolized frames (each frame is described by AddressInfo).
55struct SymbolizedStack {
56  SymbolizedStack *next;
57  AddressInfo info;
58  static SymbolizedStack *New(uptr addr);
59  // Deletes current, and all subsequent frames in the linked list.
60  // The object cannot be accessed after the call to this function.
61  void ClearAll();
62
63 private:
64  SymbolizedStack();
65};
66
67class SymbolizedStackHolder {
68  SymbolizedStack *Stack;
69
70  void clear() {
71    if (Stack)
72      Stack->ClearAll();
73  }
74
75 public:
76  explicit SymbolizedStackHolder(SymbolizedStack *Stack = nullptr)
77      : Stack(Stack) {}
78  ~SymbolizedStackHolder() { clear(); }
79  void reset(SymbolizedStack *S = nullptr) {
80    if (Stack != S)
81      clear();
82    Stack = S;
83  }
84  const SymbolizedStack *get() const { return Stack; }
85};
86
87// For now, DataInfo is used to describe global variable.
88struct DataInfo {
89  // Owns all the string members. Storage for them is
90  // (de)allocated using sanitizer internal allocator.
91  char *module;
92  uptr module_offset;
93  ModuleArch module_arch;
94
95  char *file;
96  uptr line;
97  char *name;
98  uptr start;
99  uptr size;
100
101  DataInfo();
102  void Clear();
103};
104
105struct LocalInfo {
106  char *function_name = nullptr;
107  char *name = nullptr;
108  char *decl_file = nullptr;
109  unsigned decl_line = 0;
110
111  bool has_frame_offset = false;
112  bool has_size = false;
113  bool has_tag_offset = false;
114
115  sptr frame_offset;
116  uptr size;
117  uptr tag_offset;
118
119  void Clear();
120};
121
122struct FrameInfo {
123  char *module;
124  uptr module_offset;
125  ModuleArch module_arch;
126
127  InternalMmapVector<LocalInfo> locals;
128  void Clear();
129};
130
131class SymbolizerTool;
132
133class Symbolizer final {
134 public:
135  /// Initialize and return platform-specific implementation of symbolizer
136  /// (if it wasn't already initialized).
137  static Symbolizer *GetOrInit();
138  static void LateInitialize();
139  // Returns a list of symbolized frames for a given address (containing
140  // all inlined functions, if necessary).
141  SymbolizedStack *SymbolizePC(uptr address);
142  bool SymbolizeData(uptr address, DataInfo *info);
143  bool SymbolizeFrame(uptr address, FrameInfo *info);
144
145  // The module names Symbolizer returns are stable and unique for every given
146  // module.  It is safe to store and compare them as pointers.
147  bool GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
148                                   uptr *module_address);
149  const char *GetModuleNameForPc(uptr pc) {
150    const char *module_name = nullptr;
151    uptr unused;
152    if (GetModuleNameAndOffsetForPC(pc, &module_name, &unused))
153      return module_name;
154    return nullptr;
155  }
156
157  // Release internal caches (if any).
158  void Flush();
159  // Attempts to demangle the provided C++ mangled name. Never returns nullptr.
160  const char *Demangle(const char *name);
161
162  // Allow user to install hooks that would be called before/after Symbolizer
163  // does the actual file/line info fetching. Specific sanitizers may need this
164  // to distinguish system library calls made in user code from calls made
165  // during in-process symbolization.
166  typedef void (*StartSymbolizationHook)();
167  typedef void (*EndSymbolizationHook)();
168  // May be called at most once.
169  void AddHooks(StartSymbolizationHook start_hook,
170                EndSymbolizationHook end_hook);
171
172  void RefreshModules();
173  const LoadedModule *FindModuleForAddress(uptr address);
174
175  void InvalidateModuleList();
176
177  const ListOfModules &GetRefreshedListOfModules();
178
179 private:
180  // GetModuleNameAndOffsetForPC has to return a string to the caller.
181  // Since the corresponding module might get unloaded later, we should create
182  // our owned copies of the strings that we can safely return.
183  // ModuleNameOwner does not provide any synchronization, thus calls to
184  // its method should be protected by |mu_|.
185  class ModuleNameOwner {
186   public:
187    explicit ModuleNameOwner(Mutex *synchronized_by)
188        : last_match_(nullptr), mu_(synchronized_by) {
189      storage_.reserve(kInitialCapacity);
190    }
191    const char *GetOwnedCopy(const char *str);
192
193   private:
194    static const uptr kInitialCapacity = 1000;
195    InternalMmapVector<const char*> storage_;
196    const char *last_match_;
197
198    Mutex *mu_;
199  } module_names_;
200
201  /// Platform-specific function for creating a Symbolizer object.
202  static Symbolizer *PlatformInit();
203
204  bool FindModuleNameAndOffsetForAddress(uptr address, const char **module_name,
205                                         uptr *module_offset,
206                                         ModuleArch *module_arch);
207  ListOfModules modules_;
208  ListOfModules fallback_modules_;
209  // If stale, need to reload the modules before looking up addresses.
210  bool modules_fresh_;
211
212  // Platform-specific default demangler, returns nullptr on failure.
213  const char *PlatformDemangle(const char *name);
214
215  static Symbolizer *symbolizer_;
216  static StaticSpinMutex init_mu_;
217
218  // Mutex locked from public methods of |Symbolizer|, so that the internals
219  // (including individual symbolizer tools and platform-specific methods) are
220  // always synchronized.
221  Mutex mu_;
222
223  IntrusiveList<SymbolizerTool> tools_;
224
225  explicit Symbolizer(IntrusiveList<SymbolizerTool> tools);
226
227  static LowLevelAllocator symbolizer_allocator_;
228
229  StartSymbolizationHook start_hook_;
230  EndSymbolizationHook end_hook_;
231  class SymbolizerScope {
232   public:
233    explicit SymbolizerScope(const Symbolizer *sym);
234    ~SymbolizerScope();
235   private:
236    const Symbolizer *sym_;
237    int errno_;  // Backup errno in case symbolizer change the value.
238  };
239};
240
241#ifdef SANITIZER_WINDOWS
242void InitializeDbgHelpIfNeeded();
243#endif
244
245}  // namespace __sanitizer
246
247#endif  // SANITIZER_SYMBOLIZER_H
248