1//===--- LiteralSupport.h ---------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the NumericLiteralParser, CharLiteralParser, and
10// StringLiteralParser interfaces.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_LEX_LITERALSUPPORT_H
15#define LLVM_CLANG_LEX_LITERALSUPPORT_H
16
17#include "clang/Basic/CharInfo.h"
18#include "clang/Basic/LLVM.h"
19#include "clang/Basic/TokenKinds.h"
20#include "llvm/ADT/APFloat.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/SmallString.h"
23#include "llvm/ADT/StringRef.h"
24#include "llvm/Support/DataTypes.h"
25
26namespace clang {
27
28class DiagnosticsEngine;
29class Preprocessor;
30class Token;
31class SourceLocation;
32class TargetInfo;
33class SourceManager;
34class LangOptions;
35
36/// Copy characters from Input to Buf, expanding any UCNs.
37void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input);
38
39/// Return true if the token corresponds to a function local predefined macro,
40/// which expands to a string literal, that can be concatenated with other
41/// string literals (only in Microsoft mode).
42bool isFunctionLocalStringLiteralMacro(tok::TokenKind K, const LangOptions &LO);
43
44/// Return true if the token is a string literal, or a function local
45/// predefined macro, which expands to a string literal.
46bool tokenIsLikeStringLiteral(const Token &Tok, const LangOptions &LO);
47
48/// NumericLiteralParser - This performs strict semantic analysis of the content
49/// of a ppnumber, classifying it as either integer, floating, or erroneous,
50/// determines the radix of the value and can convert it to a useful value.
51class NumericLiteralParser {
52  const SourceManager &SM;
53  const LangOptions &LangOpts;
54  DiagnosticsEngine &Diags;
55
56  const char *const ThisTokBegin;
57  const char *const ThisTokEnd;
58  const char *DigitsBegin, *SuffixBegin; // markers
59  const char *s; // cursor
60
61  unsigned radix;
62
63  bool saw_exponent, saw_period, saw_ud_suffix, saw_fixed_point_suffix;
64
65  SmallString<32> UDSuffixBuf;
66
67public:
68  NumericLiteralParser(StringRef TokSpelling, SourceLocation TokLoc,
69                       const SourceManager &SM, const LangOptions &LangOpts,
70                       const TargetInfo &Target, DiagnosticsEngine &Diags);
71  bool hadError : 1;
72  bool isUnsigned : 1;
73  bool isLong : 1;          // This is *not* set for long long.
74  bool isLongLong : 1;
75  bool isSizeT : 1;         // 1z, 1uz (C++23)
76  bool isHalf : 1;          // 1.0h
77  bool isFloat : 1;         // 1.0f
78  bool isImaginary : 1;     // 1.0i
79  bool isFloat16 : 1;       // 1.0f16
80  bool isFloat128 : 1;      // 1.0q
81  bool isFract : 1;         // 1.0hr/r/lr/uhr/ur/ulr
82  bool isAccum : 1;         // 1.0hk/k/lk/uhk/uk/ulk
83  bool isBitInt : 1;        // 1wb, 1uwb (C23)
84  uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
85
86
87  bool isFixedPointLiteral() const {
88    return (saw_period || saw_exponent) && saw_fixed_point_suffix;
89  }
90
91  bool isIntegerLiteral() const {
92    return !saw_period && !saw_exponent && !isFixedPointLiteral();
93  }
94  bool isFloatingLiteral() const {
95    return (saw_period || saw_exponent) && !isFixedPointLiteral();
96  }
97
98  bool hasUDSuffix() const {
99    return saw_ud_suffix;
100  }
101  StringRef getUDSuffix() const {
102    assert(saw_ud_suffix);
103    return UDSuffixBuf;
104  }
105  unsigned getUDSuffixOffset() const {
106    assert(saw_ud_suffix);
107    return SuffixBegin - ThisTokBegin;
108  }
109
110  static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
111
112  unsigned getRadix() const { return radix; }
113
114  /// GetIntegerValue - Convert this numeric literal value to an APInt that
115  /// matches Val's input width.  If there is an overflow (i.e., if the unsigned
116  /// value read is larger than the APInt's bits will hold), set Val to the low
117  /// bits of the result and return true.  Otherwise, return false.
118  bool GetIntegerValue(llvm::APInt &Val);
119
120  /// GetFloatValue - Convert this numeric literal to a floating value, using
121  /// the specified APFloat fltSemantics (specifying float, double, etc).
122  /// The optional bool isExact (passed-by-reference) has its value
123  /// set to true if the returned APFloat can represent the number in the
124  /// literal exactly, and false otherwise.
125  llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result);
126
127  /// GetFixedPointValue - Convert this numeric literal value into a
128  /// scaled integer that represents this value. Returns true if an overflow
129  /// occurred when calculating the integral part of the scaled integer or
130  /// calculating the digit sequence of the exponent.
131  bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale);
132
133  /// Get the digits that comprise the literal. This excludes any prefix or
134  /// suffix associated with the literal.
135  StringRef getLiteralDigits() const {
136    assert(!hadError && "cannot reliably get the literal digits with an error");
137    return StringRef(DigitsBegin, SuffixBegin - DigitsBegin);
138  }
139
140private:
141
142  void ParseNumberStartingWithZero(SourceLocation TokLoc);
143  void ParseDecimalOrOctalCommon(SourceLocation TokLoc);
144
145  static bool isDigitSeparator(char C) { return C == '\''; }
146
147  /// Determine whether the sequence of characters [Start, End) contains
148  /// any real digits (not digit separators).
149  bool containsDigits(const char *Start, const char *End) {
150    return Start != End && (Start + 1 != End || !isDigitSeparator(Start[0]));
151  }
152
153  enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits };
154
155  /// Ensure that we don't have a digit separator here.
156  void checkSeparator(SourceLocation TokLoc, const char *Pos,
157                      CheckSeparatorKind IsAfterDigits);
158
159  /// SkipHexDigits - Read and skip over any hex digits, up to End.
160  /// Return a pointer to the first non-hex digit or End.
161  const char *SkipHexDigits(const char *ptr) {
162    while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr)))
163      ptr++;
164    return ptr;
165  }
166
167  /// SkipOctalDigits - Read and skip over any octal digits, up to End.
168  /// Return a pointer to the first non-hex digit or End.
169  const char *SkipOctalDigits(const char *ptr) {
170    while (ptr != ThisTokEnd &&
171           ((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr)))
172      ptr++;
173    return ptr;
174  }
175
176  /// SkipDigits - Read and skip over any digits, up to End.
177  /// Return a pointer to the first non-hex digit or End.
178  const char *SkipDigits(const char *ptr) {
179    while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr)))
180      ptr++;
181    return ptr;
182  }
183
184  /// SkipBinaryDigits - Read and skip over any binary digits, up to End.
185  /// Return a pointer to the first non-binary digit or End.
186  const char *SkipBinaryDigits(const char *ptr) {
187    while (ptr != ThisTokEnd &&
188           (*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr)))
189      ptr++;
190    return ptr;
191  }
192
193};
194
195/// CharLiteralParser - Perform interpretation and semantic analysis of a
196/// character literal.
197class CharLiteralParser {
198  uint64_t Value;
199  tok::TokenKind Kind;
200  bool IsMultiChar;
201  bool HadError;
202  SmallString<32> UDSuffixBuf;
203  unsigned UDSuffixOffset;
204public:
205  CharLiteralParser(const char *begin, const char *end,
206                    SourceLocation Loc, Preprocessor &PP,
207                    tok::TokenKind kind);
208
209  bool hadError() const { return HadError; }
210  bool isOrdinary() const { return Kind == tok::char_constant; }
211  bool isWide() const { return Kind == tok::wide_char_constant; }
212  bool isUTF8() const { return Kind == tok::utf8_char_constant; }
213  bool isUTF16() const { return Kind == tok::utf16_char_constant; }
214  bool isUTF32() const { return Kind == tok::utf32_char_constant; }
215  bool isMultiChar() const { return IsMultiChar; }
216  uint64_t getValue() const { return Value; }
217  StringRef getUDSuffix() const { return UDSuffixBuf; }
218  unsigned getUDSuffixOffset() const {
219    assert(!UDSuffixBuf.empty() && "no ud-suffix");
220    return UDSuffixOffset;
221  }
222};
223
224enum class StringLiteralEvalMethod {
225  Evaluated,
226  Unevaluated,
227};
228
229/// StringLiteralParser - This decodes string escape characters and performs
230/// wide string analysis and Translation Phase #6 (concatenation of string
231/// literals) (C99 5.1.1.2p1).
232class StringLiteralParser {
233  const SourceManager &SM;
234  const LangOptions &Features;
235  const TargetInfo &Target;
236  DiagnosticsEngine *Diags;
237
238  unsigned MaxTokenLength;
239  unsigned SizeBound;
240  unsigned CharByteWidth;
241  tok::TokenKind Kind;
242  SmallString<512> ResultBuf;
243  char *ResultPtr; // cursor
244  SmallString<32> UDSuffixBuf;
245  unsigned UDSuffixToken;
246  unsigned UDSuffixOffset;
247  StringLiteralEvalMethod EvalMethod;
248
249public:
250  StringLiteralParser(ArrayRef<Token> StringToks, Preprocessor &PP,
251                      StringLiteralEvalMethod StringMethod =
252                          StringLiteralEvalMethod::Evaluated);
253  StringLiteralParser(ArrayRef<Token> StringToks, const SourceManager &sm,
254                      const LangOptions &features, const TargetInfo &target,
255                      DiagnosticsEngine *diags = nullptr)
256      : SM(sm), Features(features), Target(target), Diags(diags),
257        MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
258        ResultPtr(ResultBuf.data()),
259        EvalMethod(StringLiteralEvalMethod::Evaluated), hadError(false),
260        Pascal(false) {
261    init(StringToks);
262  }
263
264  bool hadError;
265  bool Pascal;
266
267  StringRef GetString() const {
268    return StringRef(ResultBuf.data(), GetStringLength());
269  }
270  unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); }
271
272  unsigned GetNumStringChars() const {
273    return GetStringLength() / CharByteWidth;
274  }
275  /// getOffsetOfStringByte - This function returns the offset of the
276  /// specified byte of the string data represented by Token.  This handles
277  /// advancing over escape sequences in the string.
278  ///
279  /// If the Diagnostics pointer is non-null, then this will do semantic
280  /// checking of the string literal and emit errors and warnings.
281  unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const;
282
283  bool isOrdinary() const { return Kind == tok::string_literal; }
284  bool isWide() const { return Kind == tok::wide_string_literal; }
285  bool isUTF8() const { return Kind == tok::utf8_string_literal; }
286  bool isUTF16() const { return Kind == tok::utf16_string_literal; }
287  bool isUTF32() const { return Kind == tok::utf32_string_literal; }
288  bool isPascal() const { return Pascal; }
289  bool isUnevaluated() const {
290    return EvalMethod == StringLiteralEvalMethod::Unevaluated;
291  }
292
293  StringRef getUDSuffix() const { return UDSuffixBuf; }
294
295  /// Get the index of a token containing a ud-suffix.
296  unsigned getUDSuffixToken() const {
297    assert(!UDSuffixBuf.empty() && "no ud-suffix");
298    return UDSuffixToken;
299  }
300  /// Get the spelling offset of the first byte of the ud-suffix.
301  unsigned getUDSuffixOffset() const {
302    assert(!UDSuffixBuf.empty() && "no ud-suffix");
303    return UDSuffixOffset;
304  }
305
306  static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
307
308private:
309  void init(ArrayRef<Token> StringToks);
310  bool CopyStringFragment(const Token &Tok, const char *TokBegin,
311                          StringRef Fragment);
312  void DiagnoseLexingError(SourceLocation Loc);
313};
314
315}  // end namespace clang
316
317#endif
318