BitCodes.h revision 360784
1//===- BitCodes.h - Enum values for the bitstream format --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This header defines bitstream enum values.
10//
11// The enum values defined in this file should be considered permanent.  If
12// new features are added, they should have values added at the end of the
13// respective lists.
14//
15//===----------------------------------------------------------------------===//
16
17#ifndef LLVM_BITSTREAM_BITCODES_H
18#define LLVM_BITSTREAM_BITCODES_H
19
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/Support/DataTypes.h"
22#include "llvm/Support/ErrorHandling.h"
23#include <cassert>
24
25namespace llvm {
26/// Offsets of the 32-bit fields of bitstream wrapper header.
27enum BitstreamWrapperHeader : unsigned {
28  BWH_MagicField   = 0 * 4,
29  BWH_VersionField = 1 * 4,
30  BWH_OffsetField  = 2 * 4,
31  BWH_SizeField    = 3 * 4,
32  BWH_CPUTypeField = 4 * 4,
33  BWH_HeaderSize   = 5 * 4
34};
35
36namespace bitc {
37  enum StandardWidths {
38    BlockIDWidth   = 8,  // We use VBR-8 for block IDs.
39    CodeLenWidth   = 4,  // Codelen are VBR-4.
40    BlockSizeWidth = 32  // BlockSize up to 2^32 32-bit words = 16GB per block.
41  };
42
43  // The standard abbrev namespace always has a way to exit a block, enter a
44  // nested block, define abbrevs, and define an unabbreviated record.
45  enum FixedAbbrevIDs {
46    END_BLOCK = 0,  // Must be zero to guarantee termination for broken bitcode.
47    ENTER_SUBBLOCK = 1,
48
49    /// DEFINE_ABBREV - Defines an abbrev for the current block.  It consists
50    /// of a vbr5 for # operand infos.  Each operand info is emitted with a
51    /// single bit to indicate if it is a literal encoding.  If so, the value is
52    /// emitted with a vbr8.  If not, the encoding is emitted as 3 bits followed
53    /// by the info value as a vbr5 if needed.
54    DEFINE_ABBREV = 2,
55
56    // UNABBREV_RECORDs are emitted with a vbr6 for the record code, followed by
57    // a vbr6 for the # operands, followed by vbr6's for each operand.
58    UNABBREV_RECORD = 3,
59
60    // This is not a code, this is a marker for the first abbrev assignment.
61    FIRST_APPLICATION_ABBREV = 4
62  };
63
64  /// StandardBlockIDs - All bitcode files can optionally include a BLOCKINFO
65  /// block, which contains metadata about other blocks in the file.
66  enum StandardBlockIDs {
67    /// BLOCKINFO_BLOCK is used to define metadata about blocks, for example,
68    /// standard abbrevs that should be available to all blocks of a specified
69    /// ID.
70    BLOCKINFO_BLOCK_ID = 0,
71
72    // Block IDs 1-7 are reserved for future expansion.
73    FIRST_APPLICATION_BLOCKID = 8
74  };
75
76  /// BlockInfoCodes - The blockinfo block contains metadata about user-defined
77  /// blocks.
78  enum BlockInfoCodes {
79    // DEFINE_ABBREV has magic semantics here, applying to the current SETBID'd
80    // block, instead of the BlockInfo block.
81
82    BLOCKINFO_CODE_SETBID        = 1, // SETBID: [blockid#]
83    BLOCKINFO_CODE_BLOCKNAME     = 2, // BLOCKNAME: [name]
84    BLOCKINFO_CODE_SETRECORDNAME = 3  // BLOCKINFO_CODE_SETRECORDNAME:
85                                      //                             [id, name]
86  };
87
88} // End bitc namespace
89
90/// BitCodeAbbrevOp - This describes one or more operands in an abbreviation.
91/// This is actually a union of two different things:
92///   1. It could be a literal integer value ("the operand is always 17").
93///   2. It could be an encoding specification ("this operand encoded like so").
94///
95class BitCodeAbbrevOp {
96  uint64_t Val;           // A literal value or data for an encoding.
97  bool IsLiteral : 1;     // Indicate whether this is a literal value or not.
98  unsigned Enc   : 3;     // The encoding to use.
99public:
100  enum Encoding {
101    Fixed = 1,  // A fixed width field, Val specifies number of bits.
102    VBR   = 2,  // A VBR field where Val specifies the width of each chunk.
103    Array = 3,  // A sequence of fields, next field species elt encoding.
104    Char6 = 4,  // A 6-bit fixed field which maps to [a-zA-Z0-9._].
105    Blob  = 5   // 32-bit aligned array of 8-bit characters.
106  };
107
108  explicit BitCodeAbbrevOp(uint64_t V) :  Val(V), IsLiteral(true) {}
109  explicit BitCodeAbbrevOp(Encoding E, uint64_t Data = 0)
110    : Val(Data), IsLiteral(false), Enc(E) {}
111
112  bool isLiteral() const  { return IsLiteral; }
113  bool isEncoding() const { return !IsLiteral; }
114
115  // Accessors for literals.
116  uint64_t getLiteralValue() const { assert(isLiteral()); return Val; }
117
118  // Accessors for encoding info.
119  Encoding getEncoding() const { assert(isEncoding()); return (Encoding)Enc; }
120  uint64_t getEncodingData() const {
121    assert(isEncoding() && hasEncodingData());
122    return Val;
123  }
124
125  bool hasEncodingData() const { return hasEncodingData(getEncoding()); }
126  static bool hasEncodingData(Encoding E) {
127    switch (E) {
128    case Fixed:
129    case VBR:
130      return true;
131    case Array:
132    case Char6:
133    case Blob:
134      return false;
135    }
136    report_fatal_error("Invalid encoding");
137  }
138
139  /// isChar6 - Return true if this character is legal in the Char6 encoding.
140  static bool isChar6(char C) {
141    if (C >= 'a' && C <= 'z') return true;
142    if (C >= 'A' && C <= 'Z') return true;
143    if (C >= '0' && C <= '9') return true;
144    if (C == '.' || C == '_') return true;
145    return false;
146  }
147  static unsigned EncodeChar6(char C) {
148    if (C >= 'a' && C <= 'z') return C-'a';
149    if (C >= 'A' && C <= 'Z') return C-'A'+26;
150    if (C >= '0' && C <= '9') return C-'0'+26+26;
151    if (C == '.')             return 62;
152    if (C == '_')             return 63;
153    llvm_unreachable("Not a value Char6 character!");
154  }
155
156  static char DecodeChar6(unsigned V) {
157    assert((V & ~63) == 0 && "Not a Char6 encoded character!");
158    return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._"
159        [V];
160  }
161
162};
163
164/// BitCodeAbbrev - This class represents an abbreviation record.  An
165/// abbreviation allows a complex record that has redundancy to be stored in a
166/// specialized format instead of the fully-general, fully-vbr, format.
167class BitCodeAbbrev {
168  SmallVector<BitCodeAbbrevOp, 32> OperandList;
169
170public:
171  BitCodeAbbrev() = default;
172
173  explicit BitCodeAbbrev(std::initializer_list<BitCodeAbbrevOp> OperandList)
174      : OperandList(OperandList) {}
175
176  unsigned getNumOperandInfos() const {
177    return static_cast<unsigned>(OperandList.size());
178  }
179  const BitCodeAbbrevOp &getOperandInfo(unsigned N) const {
180    return OperandList[N];
181  }
182
183  void Add(const BitCodeAbbrevOp &OpInfo) {
184    OperandList.push_back(OpInfo);
185  }
186};
187} // End llvm namespace
188
189#endif
190