1//===- MCSymbol.h - Machine Code Symbols ------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the declaration of the MCSymbol class.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_MC_MCSYMBOL_H
14#define LLVM_MC_MCSYMBOL_H
15
16#include "llvm/ADT/StringMapEntry.h"
17#include "llvm/ADT/StringRef.h"
18#include "llvm/MC/MCExpr.h"
19#include "llvm/MC/MCFragment.h"
20#include "llvm/Support/ErrorHandling.h"
21#include "llvm/Support/MathExtras.h"
22#include <cassert>
23#include <cstddef>
24#include <cstdint>
25
26namespace llvm {
27
28class MCAsmInfo;
29class MCContext;
30class MCSection;
31class raw_ostream;
32
33/// MCSymbol - Instances of this class represent a symbol name in the MC file,
34/// and MCSymbols are created and uniqued by the MCContext class.  MCSymbols
35/// should only be constructed with valid names for the object file.
36///
37/// If the symbol is defined/emitted into the current translation unit, the
38/// Section member is set to indicate what section it lives in.  Otherwise, if
39/// it is a reference to an external entity, it has a null section.
40class MCSymbol {
41protected:
42  /// The kind of the symbol.  If it is any value other than unset then this
43  /// class is actually one of the appropriate subclasses of MCSymbol.
44  enum SymbolKind {
45    SymbolKindUnset,
46    SymbolKindCOFF,
47    SymbolKindELF,
48    SymbolKindGOFF,
49    SymbolKindMachO,
50    SymbolKindWasm,
51    SymbolKindXCOFF,
52  };
53
54  /// A symbol can contain an Offset, or Value, or be Common, but never more
55  /// than one of these.
56  enum Contents : uint8_t {
57    SymContentsUnset,
58    SymContentsOffset,
59    SymContentsVariable,
60    SymContentsCommon,
61    SymContentsTargetCommon, // Index stores the section index
62  };
63
64  // Special sentinel value for the absolute pseudo fragment.
65  static MCFragment *AbsolutePseudoFragment;
66
67  /// If a symbol has a Fragment, the section is implied, so we only need
68  /// one pointer.
69  /// The special AbsolutePseudoFragment value is for absolute symbols.
70  /// If this is a variable symbol, this caches the variable value's fragment.
71  /// FIXME: We might be able to simplify this by having the asm streamer create
72  /// dummy fragments.
73  /// If this is a section, then it gives the symbol is defined in. This is null
74  /// for undefined symbols.
75  ///
76  /// If this is a fragment, then it gives the fragment this symbol's value is
77  /// relative to, if any.
78  mutable MCFragment *Fragment = nullptr;
79
80  /// True if this symbol is named.  A named symbol will have a pointer to the
81  /// name allocated in the bytes immediately prior to the MCSymbol.
82  unsigned HasName : 1;
83
84  /// IsTemporary - True if this is an assembler temporary label, which
85  /// typically does not survive in the .o file's symbol table.  Usually
86  /// "Lfoo" or ".foo".
87  unsigned IsTemporary : 1;
88
89  /// True if this symbol can be redefined.
90  unsigned IsRedefinable : 1;
91
92  /// IsUsed - True if this symbol has been used.
93  mutable unsigned IsUsed : 1;
94
95  mutable unsigned IsRegistered : 1;
96
97  /// True if this symbol is visible outside this translation unit. Note: ELF
98  /// uses binding instead of this bit.
99  mutable unsigned IsExternal : 1;
100
101  /// This symbol is private extern.
102  mutable unsigned IsPrivateExtern : 1;
103
104  /// This symbol is weak external.
105  mutable unsigned IsWeakExternal : 1;
106
107  /// LLVM RTTI discriminator. This is actually a SymbolKind enumerator, but is
108  /// unsigned to avoid sign extension and achieve better bitpacking with MSVC.
109  unsigned Kind : 3;
110
111  /// True if we have created a relocation that uses this symbol.
112  mutable unsigned IsUsedInReloc : 1;
113
114  /// This is actually a Contents enumerator, but is unsigned to avoid sign
115  /// extension and achieve better bitpacking with MSVC.
116  unsigned SymbolContents : 3;
117
118  /// The alignment of the symbol if it is 'common'.
119  ///
120  /// Internally, this is stored as log2(align) + 1.
121  /// We reserve 5 bits to encode this value which allows the following values
122  /// 0b00000 -> unset
123  /// 0b00001 -> 1ULL <<  0 = 1
124  /// 0b00010 -> 1ULL <<  1 = 2
125  /// 0b00011 -> 1ULL <<  2 = 4
126  /// ...
127  /// 0b11111 -> 1ULL << 30 = 1 GiB
128  enum : unsigned { NumCommonAlignmentBits = 5 };
129  unsigned CommonAlignLog2 : NumCommonAlignmentBits;
130
131  /// The Flags field is used by object file implementations to store
132  /// additional per symbol information which is not easily classified.
133  enum : unsigned { NumFlagsBits = 16 };
134  mutable uint32_t Flags : NumFlagsBits;
135
136  /// Index field, for use by the object file implementation.
137  mutable uint32_t Index = 0;
138
139  union {
140    /// The offset to apply to the fragment address to form this symbol's value.
141    uint64_t Offset;
142
143    /// The size of the symbol, if it is 'common'.
144    uint64_t CommonSize;
145
146    /// If non-null, the value for a variable symbol.
147    const MCExpr *Value;
148  };
149
150  // MCContext creates and uniques these.
151  friend class MCExpr;
152  friend class MCContext;
153
154  /// The name for a symbol.
155  /// MCSymbol contains a uint64_t so is probably aligned to 8.  On a 32-bit
156  /// system, the name is a pointer so isn't going to satisfy the 8 byte
157  /// alignment of uint64_t.  Account for that here.
158  using NameEntryStorageTy = union {
159    const StringMapEntry<bool> *NameEntry;
160    uint64_t AlignmentPadding;
161  };
162
163  MCSymbol(SymbolKind Kind, const StringMapEntry<bool> *Name, bool isTemporary)
164      : IsTemporary(isTemporary), IsRedefinable(false), IsUsed(false),
165        IsRegistered(false), IsExternal(false), IsPrivateExtern(false),
166        IsWeakExternal(false), Kind(Kind), IsUsedInReloc(false),
167        SymbolContents(SymContentsUnset), CommonAlignLog2(0), Flags(0) {
168    Offset = 0;
169    HasName = !!Name;
170    if (Name)
171      getNameEntryPtr() = Name;
172  }
173
174  // Provide custom new/delete as we will only allocate space for a name
175  // if we need one.
176  void *operator new(size_t s, const StringMapEntry<bool> *Name,
177                     MCContext &Ctx);
178
179private:
180  void operator delete(void *);
181  /// Placement delete - required by std, but never called.
182  void operator delete(void*, unsigned) {
183    llvm_unreachable("Constructor throws?");
184  }
185  /// Placement delete - required by std, but never called.
186  void operator delete(void*, unsigned, bool) {
187    llvm_unreachable("Constructor throws?");
188  }
189
190  /// Get a reference to the name field.  Requires that we have a name
191  const StringMapEntry<bool> *&getNameEntryPtr() {
192    assert(HasName && "Name is required");
193    NameEntryStorageTy *Name = reinterpret_cast<NameEntryStorageTy *>(this);
194    return (*(Name - 1)).NameEntry;
195  }
196  const StringMapEntry<bool> *&getNameEntryPtr() const {
197    return const_cast<MCSymbol*>(this)->getNameEntryPtr();
198  }
199
200public:
201  MCSymbol(const MCSymbol &) = delete;
202  MCSymbol &operator=(const MCSymbol &) = delete;
203
204  /// getName - Get the symbol name.
205  StringRef getName() const {
206    if (!HasName)
207      return StringRef();
208
209    return getNameEntryPtr()->first();
210  }
211
212  bool isRegistered() const { return IsRegistered; }
213  void setIsRegistered(bool Value) const { IsRegistered = Value; }
214
215  void setUsedInReloc() const { IsUsedInReloc = true; }
216  bool isUsedInReloc() const { return IsUsedInReloc; }
217
218  /// \name Accessors
219  /// @{
220
221  /// isTemporary - Check if this is an assembler temporary symbol.
222  bool isTemporary() const { return IsTemporary; }
223
224  /// isUsed - Check if this is used.
225  bool isUsed() const { return IsUsed; }
226
227  /// Check if this symbol is redefinable.
228  bool isRedefinable() const { return IsRedefinable; }
229  /// Mark this symbol as redefinable.
230  void setRedefinable(bool Value) { IsRedefinable = Value; }
231  /// Prepare this symbol to be redefined.
232  void redefineIfPossible() {
233    if (IsRedefinable) {
234      if (SymbolContents == SymContentsVariable) {
235        Value = nullptr;
236        SymbolContents = SymContentsUnset;
237      }
238      setUndefined();
239      IsRedefinable = false;
240    }
241  }
242
243  /// @}
244  /// \name Associated Sections
245  /// @{
246
247  /// isDefined - Check if this symbol is defined (i.e., it has an address).
248  ///
249  /// Defined symbols are either absolute or in some section.
250  bool isDefined() const { return !isUndefined(); }
251
252  /// isInSection - Check if this symbol is defined in some section (i.e., it
253  /// is defined but not absolute).
254  bool isInSection() const {
255    return isDefined() && !isAbsolute();
256  }
257
258  /// isUndefined - Check if this symbol undefined (i.e., implicitly defined).
259  bool isUndefined(bool SetUsed = true) const {
260    return getFragment(SetUsed) == nullptr;
261  }
262
263  /// isAbsolute - Check if this is an absolute symbol.
264  bool isAbsolute() const {
265    return getFragment() == AbsolutePseudoFragment;
266  }
267
268  /// Get the section associated with a defined, non-absolute symbol.
269  MCSection &getSection() const {
270    assert(isInSection() && "Invalid accessor!");
271    return *getFragment()->getParent();
272  }
273
274  /// Mark the symbol as defined in the fragment \p F.
275  void setFragment(MCFragment *F) const {
276    assert(!isVariable() && "Cannot set fragment of variable");
277    Fragment = F;
278  }
279
280  /// Mark the symbol as undefined.
281  void setUndefined() { Fragment = nullptr; }
282
283  bool isELF() const { return Kind == SymbolKindELF; }
284
285  bool isCOFF() const { return Kind == SymbolKindCOFF; }
286
287  bool isGOFF() const { return Kind == SymbolKindGOFF; }
288
289  bool isMachO() const { return Kind == SymbolKindMachO; }
290
291  bool isWasm() const { return Kind == SymbolKindWasm; }
292
293  bool isXCOFF() const { return Kind == SymbolKindXCOFF; }
294
295  /// @}
296  /// \name Variable Symbols
297  /// @{
298
299  /// isVariable - Check if this is a variable symbol.
300  bool isVariable() const {
301    return SymbolContents == SymContentsVariable;
302  }
303
304  /// getVariableValue - Get the value for variable symbols.
305  const MCExpr *getVariableValue(bool SetUsed = true) const {
306    assert(isVariable() && "Invalid accessor!");
307    IsUsed |= SetUsed;
308    return Value;
309  }
310
311  void setVariableValue(const MCExpr *Value);
312
313  /// @}
314
315  /// Get the (implementation defined) index.
316  uint32_t getIndex() const {
317    return Index;
318  }
319
320  /// Set the (implementation defined) index.
321  void setIndex(uint32_t Value) const {
322    Index = Value;
323  }
324
325  bool isUnset() const { return SymbolContents == SymContentsUnset; }
326
327  uint64_t getOffset() const {
328    assert((SymbolContents == SymContentsUnset ||
329            SymbolContents == SymContentsOffset) &&
330           "Cannot get offset for a common/variable symbol");
331    return Offset;
332  }
333  void setOffset(uint64_t Value) {
334    assert((SymbolContents == SymContentsUnset ||
335            SymbolContents == SymContentsOffset) &&
336           "Cannot set offset for a common/variable symbol");
337    Offset = Value;
338    SymbolContents = SymContentsOffset;
339  }
340
341  /// Return the size of a 'common' symbol.
342  uint64_t getCommonSize() const {
343    assert(isCommon() && "Not a 'common' symbol!");
344    return CommonSize;
345  }
346
347  /// Mark this symbol as being 'common'.
348  ///
349  /// \param Size - The size of the symbol.
350  /// \param Alignment - The alignment of the symbol.
351  /// \param Target - Is the symbol a target-specific common-like symbol.
352  void setCommon(uint64_t Size, Align Alignment, bool Target = false) {
353    assert(getOffset() == 0);
354    CommonSize = Size;
355    SymbolContents = Target ? SymContentsTargetCommon : SymContentsCommon;
356
357    unsigned Log2Align = encode(Alignment);
358    assert(Log2Align < (1U << NumCommonAlignmentBits) &&
359           "Out of range alignment");
360    CommonAlignLog2 = Log2Align;
361  }
362
363  ///  Return the alignment of a 'common' symbol.
364  MaybeAlign getCommonAlignment() const {
365    assert(isCommon() && "Not a 'common' symbol!");
366    return decodeMaybeAlign(CommonAlignLog2);
367  }
368
369  /// Declare this symbol as being 'common'.
370  ///
371  /// \param Size - The size of the symbol.
372  /// \param Alignment - The alignment of the symbol.
373  /// \param Target - Is the symbol a target-specific common-like symbol.
374  /// \return True if symbol was already declared as a different type
375  bool declareCommon(uint64_t Size, Align Alignment, bool Target = false) {
376    assert(isCommon() || getOffset() == 0);
377    if(isCommon()) {
378      if (CommonSize != Size || getCommonAlignment() != Alignment ||
379          isTargetCommon() != Target)
380        return true;
381    } else
382      setCommon(Size, Alignment, Target);
383    return false;
384  }
385
386  /// Is this a 'common' symbol.
387  bool isCommon() const {
388    return SymbolContents == SymContentsCommon ||
389           SymbolContents == SymContentsTargetCommon;
390  }
391
392  /// Is this a target-specific common-like symbol.
393  bool isTargetCommon() const {
394    return SymbolContents == SymContentsTargetCommon;
395  }
396
397  MCFragment *getFragment(bool SetUsed = true) const {
398    if (Fragment || !isVariable() || isWeakExternal())
399      return Fragment;
400    // If the symbol is a non-weak alias, get information about
401    // the aliasee. (Don't try to resolve weak aliases.)
402    Fragment = getVariableValue(SetUsed)->findAssociatedFragment();
403    return Fragment;
404  }
405
406  bool isExternal() const { return IsExternal; }
407  void setExternal(bool Value) const { IsExternal = Value; }
408
409  bool isPrivateExtern() const { return IsPrivateExtern; }
410  void setPrivateExtern(bool Value) { IsPrivateExtern = Value; }
411
412  bool isWeakExternal() const { return IsWeakExternal; }
413
414  /// print - Print the value to the stream \p OS.
415  void print(raw_ostream &OS, const MCAsmInfo *MAI) const;
416
417  /// dump - Print the value to stderr.
418  void dump() const;
419
420protected:
421  /// Get the (implementation defined) symbol flags.
422  uint32_t getFlags() const { return Flags; }
423
424  /// Set the (implementation defined) symbol flags.
425  void setFlags(uint32_t Value) const {
426    assert(Value < (1U << NumFlagsBits) && "Out of range flags");
427    Flags = Value;
428  }
429
430  /// Modify the flags via a mask
431  void modifyFlags(uint32_t Value, uint32_t Mask) const {
432    assert(Value < (1U << NumFlagsBits) && "Out of range flags");
433    Flags = (Flags & ~Mask) | Value;
434  }
435};
436
437inline raw_ostream &operator<<(raw_ostream &OS, const MCSymbol &Sym) {
438  Sym.print(OS, nullptr);
439  return OS;
440}
441
442} // end namespace llvm
443
444#endif // LLVM_MC_MCSYMBOL_H
445