MILexer.h revision 360784
1//===- MILexer.h - Lexer for machine instructions ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the function that lexes the machine instruction source
10// string.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
15#define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
16
17#include "llvm/ADT/APSInt.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/StringRef.h"
20#include <string>
21
22namespace llvm {
23
24class Twine;
25
26/// A token produced by the machine instruction lexer.
27struct MIToken {
28  enum TokenKind {
29    // Markers
30    Eof,
31    Error,
32    Newline,
33
34    // Tokens with no info.
35    comma,
36    equal,
37    underscore,
38    colon,
39    coloncolon,
40    dot,
41    exclaim,
42    lparen,
43    rparen,
44    lbrace,
45    rbrace,
46    plus,
47    minus,
48    less,
49    greater,
50
51    // Keywords
52    kw_implicit,
53    kw_implicit_define,
54    kw_def,
55    kw_dead,
56    kw_dereferenceable,
57    kw_killed,
58    kw_undef,
59    kw_internal,
60    kw_early_clobber,
61    kw_debug_use,
62    kw_renamable,
63    kw_tied_def,
64    kw_frame_setup,
65    kw_frame_destroy,
66    kw_nnan,
67    kw_ninf,
68    kw_nsz,
69    kw_arcp,
70    kw_contract,
71    kw_afn,
72    kw_reassoc,
73    kw_nuw,
74    kw_nsw,
75    kw_exact,
76    kw_nofpexcept,
77    kw_debug_location,
78    kw_cfi_same_value,
79    kw_cfi_offset,
80    kw_cfi_rel_offset,
81    kw_cfi_def_cfa_register,
82    kw_cfi_def_cfa_offset,
83    kw_cfi_adjust_cfa_offset,
84    kw_cfi_escape,
85    kw_cfi_def_cfa,
86    kw_cfi_register,
87    kw_cfi_remember_state,
88    kw_cfi_restore,
89    kw_cfi_restore_state,
90    kw_cfi_undefined,
91    kw_cfi_window_save,
92    kw_cfi_aarch64_negate_ra_sign_state,
93    kw_blockaddress,
94    kw_intrinsic,
95    kw_target_index,
96    kw_half,
97    kw_float,
98    kw_double,
99    kw_x86_fp80,
100    kw_fp128,
101    kw_ppc_fp128,
102    kw_target_flags,
103    kw_volatile,
104    kw_non_temporal,
105    kw_invariant,
106    kw_align,
107    kw_addrspace,
108    kw_stack,
109    kw_got,
110    kw_jump_table,
111    kw_constant_pool,
112    kw_call_entry,
113    kw_custom,
114    kw_liveout,
115    kw_address_taken,
116    kw_landing_pad,
117    kw_liveins,
118    kw_successors,
119    kw_floatpred,
120    kw_intpred,
121    kw_shufflemask,
122    kw_pre_instr_symbol,
123    kw_post_instr_symbol,
124    kw_heap_alloc_marker,
125    kw_unknown_size,
126
127    // Named metadata keywords
128    md_tbaa,
129    md_alias_scope,
130    md_noalias,
131    md_range,
132    md_diexpr,
133    md_dilocation,
134
135    // Identifier tokens
136    Identifier,
137    NamedRegister,
138    NamedVirtualRegister,
139    MachineBasicBlockLabel,
140    MachineBasicBlock,
141    StackObject,
142    FixedStackObject,
143    NamedGlobalValue,
144    GlobalValue,
145    ExternalSymbol,
146    MCSymbol,
147
148    // Other tokens
149    IntegerLiteral,
150    FloatingPointLiteral,
151    HexLiteral,
152    VectorLiteral,
153    VirtualRegister,
154    ConstantPoolItem,
155    JumpTableIndex,
156    NamedIRBlock,
157    IRBlock,
158    NamedIRValue,
159    IRValue,
160    QuotedIRValue, // `<constant value>`
161    SubRegisterIndex,
162    StringConstant
163  };
164
165private:
166  TokenKind Kind = Error;
167  StringRef Range;
168  StringRef StringValue;
169  std::string StringValueStorage;
170  APSInt IntVal;
171
172public:
173  MIToken() = default;
174
175  MIToken &reset(TokenKind Kind, StringRef Range);
176
177  MIToken &setStringValue(StringRef StrVal);
178  MIToken &setOwnedStringValue(std::string StrVal);
179  MIToken &setIntegerValue(APSInt IntVal);
180
181  TokenKind kind() const { return Kind; }
182
183  bool isError() const { return Kind == Error; }
184
185  bool isNewlineOrEOF() const { return Kind == Newline || Kind == Eof; }
186
187  bool isErrorOrEOF() const { return Kind == Error || Kind == Eof; }
188
189  bool isRegister() const {
190    return Kind == NamedRegister || Kind == underscore ||
191           Kind == NamedVirtualRegister || Kind == VirtualRegister;
192  }
193
194  bool isRegisterFlag() const {
195    return Kind == kw_implicit || Kind == kw_implicit_define ||
196           Kind == kw_def || Kind == kw_dead || Kind == kw_killed ||
197           Kind == kw_undef || Kind == kw_internal ||
198           Kind == kw_early_clobber || Kind == kw_debug_use ||
199           Kind == kw_renamable;
200  }
201
202  bool isMemoryOperandFlag() const {
203    return Kind == kw_volatile || Kind == kw_non_temporal ||
204           Kind == kw_dereferenceable || Kind == kw_invariant ||
205           Kind == StringConstant;
206  }
207
208  bool is(TokenKind K) const { return Kind == K; }
209
210  bool isNot(TokenKind K) const { return Kind != K; }
211
212  StringRef::iterator location() const { return Range.begin(); }
213
214  StringRef range() const { return Range; }
215
216  /// Return the token's string value.
217  StringRef stringValue() const { return StringValue; }
218
219  const APSInt &integerValue() const { return IntVal; }
220
221  bool hasIntegerValue() const {
222    return Kind == IntegerLiteral || Kind == MachineBasicBlock ||
223           Kind == MachineBasicBlockLabel || Kind == StackObject ||
224           Kind == FixedStackObject || Kind == GlobalValue ||
225           Kind == VirtualRegister || Kind == ConstantPoolItem ||
226           Kind == JumpTableIndex || Kind == IRBlock || Kind == IRValue;
227  }
228};
229
230/// Consume a single machine instruction token in the given source and return
231/// the remaining source string.
232StringRef lexMIToken(
233    StringRef Source, MIToken &Token,
234    function_ref<void(StringRef::iterator, const Twine &)> ErrorCallback);
235
236} // end namespace llvm
237
238#endif // LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
239