MacroArgs.cpp revision 360784
1238106Sdes//===--- MacroArgs.cpp - Formal argument info for Macros ------------------===// 2238106Sdes// 3238106Sdes// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4238106Sdes// See https://llvm.org/LICENSE.txt for license information. 5238106Sdes// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6238106Sdes// 7238106Sdes//===----------------------------------------------------------------------===// 8238106Sdes// 9238106Sdes// This file implements the MacroArgs interface. 10238106Sdes// 11238106Sdes//===----------------------------------------------------------------------===// 12238106Sdes 13238106Sdes#include "clang/Lex/MacroArgs.h" 14238106Sdes#include "clang/Lex/LexDiagnostic.h" 15238106Sdes#include "clang/Lex/MacroInfo.h" 16238106Sdes#include "clang/Lex/Preprocessor.h" 17238106Sdes#include "llvm/ADT/SmallString.h" 18238106Sdes#include "llvm/Support/SaveAndRestore.h" 19238106Sdes#include <algorithm> 20238106Sdes 21238106Sdesusing namespace clang; 22238106Sdes 23238106Sdes/// MacroArgs ctor function - This destroys the vector passed in. 24238106SdesMacroArgs *MacroArgs::create(const MacroInfo *MI, 25238106Sdes ArrayRef<Token> UnexpArgTokens, 26238106Sdes bool VarargsElided, Preprocessor &PP) { 27238106Sdes assert(MI->isFunctionLike() && 28238106Sdes "Can't have args for an object-like macro!"); 29238106Sdes MacroArgs **ResultEnt = nullptr; 30238106Sdes unsigned ClosestMatch = ~0U; 31238106Sdes 32238106Sdes // See if we have an entry with a big enough argument list to reuse on the 33238106Sdes // free list. If so, reuse it. 34238106Sdes for (MacroArgs **Entry = &PP.MacroArgCache; *Entry; 35238106Sdes Entry = &(*Entry)->ArgCache) { 36238106Sdes if ((*Entry)->NumUnexpArgTokens >= UnexpArgTokens.size() && 37238106Sdes (*Entry)->NumUnexpArgTokens < ClosestMatch) { 38238106Sdes ResultEnt = Entry; 39238106Sdes 40238106Sdes // If we have an exact match, use it. 41238106Sdes if ((*Entry)->NumUnexpArgTokens == UnexpArgTokens.size()) 42238106Sdes break; 43238106Sdes // Otherwise, use the best fit. 44238106Sdes ClosestMatch = (*Entry)->NumUnexpArgTokens; 45238106Sdes } 46238106Sdes } 47238106Sdes MacroArgs *Result; 48238106Sdes if (!ResultEnt) { 49238106Sdes // Allocate memory for a MacroArgs object with the lexer tokens at the end, 50238106Sdes // and construct the MacroArgs object. 51238106Sdes Result = new ( 52238106Sdes llvm::safe_malloc(totalSizeToAlloc<Token>(UnexpArgTokens.size()))) 53238106Sdes MacroArgs(UnexpArgTokens.size(), VarargsElided, MI->getNumParams()); 54238106Sdes } else { 55238106Sdes Result = *ResultEnt; 56238106Sdes // Unlink this node from the preprocessors singly linked list. 57238106Sdes *ResultEnt = Result->ArgCache; 58238106Sdes Result->NumUnexpArgTokens = UnexpArgTokens.size(); 59238106Sdes Result->VarargsElided = VarargsElided; 60238106Sdes Result->NumMacroArgs = MI->getNumParams(); 61238106Sdes } 62238106Sdes 63238106Sdes // Copy the actual unexpanded tokens to immediately after the result ptr. 64238106Sdes if (!UnexpArgTokens.empty()) { 65238106Sdes static_assert(std::is_trivial<Token>::value, 66238106Sdes "assume trivial copyability if copying into the " 67238106Sdes "uninitialized array (as opposed to reusing a cached " 68238106Sdes "MacroArgs)"); 69238106Sdes std::copy(UnexpArgTokens.begin(), UnexpArgTokens.end(), 70238106Sdes Result->getTrailingObjects<Token>()); 71238106Sdes } 72238106Sdes 73238106Sdes return Result; 74238106Sdes} 75238106Sdes 76238106Sdes/// destroy - Destroy and deallocate the memory for this object. 77238106Sdes/// 78238106Sdesvoid MacroArgs::destroy(Preprocessor &PP) { 79238106Sdes // Don't clear PreExpArgTokens, just clear the entries. Clearing the entries 80238106Sdes // would deallocate the element vectors. 81238106Sdes for (unsigned i = 0, e = PreExpArgTokens.size(); i != e; ++i) 82238106Sdes PreExpArgTokens[i].clear(); 83238106Sdes 84238106Sdes // Add this to the preprocessor's free list. 85238106Sdes ArgCache = PP.MacroArgCache; 86238106Sdes PP.MacroArgCache = this; 87238106Sdes} 88238106Sdes 89238106Sdes/// deallocate - This should only be called by the Preprocessor when managing 90238106Sdes/// its freelist. 91238106SdesMacroArgs *MacroArgs::deallocate() { 92238106Sdes MacroArgs *Next = ArgCache; 93238106Sdes 94238106Sdes // Run the dtor to deallocate the vectors. 95238106Sdes this->~MacroArgs(); 96238106Sdes // Release the memory for the object. 97238106Sdes static_assert(std::is_trivially_destructible<Token>::value, 98238106Sdes "assume trivially destructible and forego destructors"); 99238106Sdes free(this); 100238106Sdes 101238106Sdes return Next; 102238106Sdes} 103238106Sdes 104238106Sdes 105238106Sdes/// getArgLength - Given a pointer to an expanded or unexpanded argument, 106238106Sdes/// return the number of tokens, not counting the EOF, that make up the 107238106Sdes/// argument. 108238106Sdesunsigned MacroArgs::getArgLength(const Token *ArgPtr) { 109238106Sdes unsigned NumArgTokens = 0; 110238106Sdes for (; ArgPtr->isNot(tok::eof); ++ArgPtr) 111238106Sdes ++NumArgTokens; 112238106Sdes return NumArgTokens; 113238106Sdes} 114238106Sdes 115238106Sdes 116238106Sdes/// getUnexpArgument - Return the unexpanded tokens for the specified formal. 117238106Sdes/// 118238106Sdesconst Token *MacroArgs::getUnexpArgument(unsigned Arg) const { 119238106Sdes 120238106Sdes assert(Arg < getNumMacroArguments() && "Invalid arg #"); 121238106Sdes // The unexpanded argument tokens start immediately after the MacroArgs object 122238106Sdes // in memory. 123238106Sdes const Token *Start = getTrailingObjects<Token>(); 124238106Sdes const Token *Result = Start; 125238106Sdes 126238106Sdes // Scan to find Arg. 127238106Sdes for (; Arg; ++Result) { 128238106Sdes assert(Result < Start+NumUnexpArgTokens && "Invalid arg #"); 129238106Sdes if (Result->is(tok::eof)) 130238106Sdes --Arg; 131238106Sdes } 132238106Sdes assert(Result < Start+NumUnexpArgTokens && "Invalid arg #"); 133238106Sdes return Result; 134238106Sdes} 135238106Sdes 136238106Sdesbool MacroArgs::invokedWithVariadicArgument(const MacroInfo *const MI, 137238106Sdes Preprocessor &PP) { 138238106Sdes if (!MI->isVariadic()) 139238106Sdes return false; 140238106Sdes const int VariadicArgIndex = getNumMacroArguments() - 1; 141238106Sdes return getPreExpArgument(VariadicArgIndex, PP).front().isNot(tok::eof); 142238106Sdes} 143238106Sdes 144238106Sdes/// ArgNeedsPreexpansion - If we can prove that the argument won't be affected 145238106Sdes/// by pre-expansion, return false. Otherwise, conservatively return true. 146238106Sdesbool MacroArgs::ArgNeedsPreexpansion(const Token *ArgTok, 147238106Sdes Preprocessor &PP) const { 148238106Sdes // If there are no identifiers in the argument list, or if the identifiers are 149238106Sdes // known to not be macros, pre-expansion won't modify it. 150238106Sdes for (; ArgTok->isNot(tok::eof); ++ArgTok) 151238106Sdes if (IdentifierInfo *II = ArgTok->getIdentifierInfo()) 152238106Sdes if (II->hasMacroDefinition()) 153238106Sdes // Return true even though the macro could be a function-like macro 154238106Sdes // without a following '(' token, or could be disabled, or not visible. 155238106Sdes return true; 156238106Sdes return false; 157238106Sdes} 158238106Sdes 159238106Sdes/// getPreExpArgument - Return the pre-expanded form of the specified 160238106Sdes/// argument. 161238106Sdesconst std::vector<Token> &MacroArgs::getPreExpArgument(unsigned Arg, 162238106Sdes Preprocessor &PP) { 163238106Sdes assert(Arg < getNumMacroArguments() && "Invalid argument number!"); 164238106Sdes 165238106Sdes // If we have already computed this, return it. 166238106Sdes if (PreExpArgTokens.size() < getNumMacroArguments()) 167238106Sdes PreExpArgTokens.resize(getNumMacroArguments()); 168238106Sdes 169238106Sdes std::vector<Token> &Result = PreExpArgTokens[Arg]; 170238106Sdes if (!Result.empty()) return Result; 171238106Sdes 172238106Sdes SaveAndRestore<bool> PreExpandingMacroArgs(PP.InMacroArgPreExpansion, true); 173238106Sdes 174238106Sdes const Token *AT = getUnexpArgument(Arg); 175238106Sdes unsigned NumToks = getArgLength(AT)+1; // Include the EOF. 176238106Sdes 177238106Sdes // Otherwise, we have to pre-expand this argument, populating Result. To do 178238106Sdes // this, we set up a fake TokenLexer to lex from the unexpanded argument 179238106Sdes // list. With this installed, we lex expanded tokens until we hit the EOF 180238106Sdes // token at the end of the unexp list. 181238106Sdes PP.EnterTokenStream(AT, NumToks, false /*disable expand*/, 182238106Sdes false /*owns tokens*/, false /*is reinject*/); 183238106Sdes 184238106Sdes // Lex all of the macro-expanded tokens into Result. 185238106Sdes do { 186238106Sdes Result.push_back(Token()); 187238106Sdes Token &Tok = Result.back(); 188238106Sdes PP.Lex(Tok); 189238106Sdes } while (Result.back().isNot(tok::eof)); 190238106Sdes 191238106Sdes // Pop the token stream off the top of the stack. We know that the internal 192238106Sdes // pointer inside of it is to the "end" of the token stream, but the stack 193238106Sdes // will not otherwise be popped until the next token is lexed. The problem is 194238106Sdes // that the token may be lexed sometime after the vector of tokens itself is 195238106Sdes // destroyed, which would be badness. 196238106Sdes if (PP.InCachingLexMode()) 197238106Sdes PP.ExitCachingLexMode(); 198238106Sdes PP.RemoveTopOfLexerStack(); 199238106Sdes return Result; 200238106Sdes} 201238106Sdes 202238106Sdes 203238106Sdes/// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of 204238106Sdes/// tokens into the literal string token that should be produced by the C # 205238106Sdes/// preprocessor operator. If Charify is true, then it should be turned into 206238106Sdes/// a character literal for the Microsoft charize (#@) extension. 207238106Sdes/// 208238106SdesToken MacroArgs::StringifyArgument(const Token *ArgToks, 209238106Sdes Preprocessor &PP, bool Charify, 210238106Sdes SourceLocation ExpansionLocStart, 211238106Sdes SourceLocation ExpansionLocEnd) { 212238106Sdes Token Tok; 213238106Sdes Tok.startToken(); 214238106Sdes Tok.setKind(Charify ? tok::char_constant : tok::string_literal); 215238106Sdes 216238106Sdes const Token *ArgTokStart = ArgToks; 217238106Sdes 218238106Sdes // Stringify all the tokens. 219238106Sdes SmallString<128> Result; 220238106Sdes Result += "\""; 221238106Sdes 222238106Sdes bool isFirst = true; 223238106Sdes for (; ArgToks->isNot(tok::eof); ++ArgToks) { 224238106Sdes const Token &Tok = *ArgToks; 225238106Sdes if (!isFirst && (Tok.hasLeadingSpace() || Tok.isAtStartOfLine())) 226238106Sdes Result += ' '; 227238106Sdes isFirst = false; 228238106Sdes 229238106Sdes // If this is a string or character constant, escape the token as specified 230238106Sdes // by 6.10.3.2p2. 231238106Sdes if (tok::isStringLiteral(Tok.getKind()) || // "foo", u8R"x(foo)x"_bar, etc. 232 Tok.is(tok::char_constant) || // 'x' 233 Tok.is(tok::wide_char_constant) || // L'x'. 234 Tok.is(tok::utf8_char_constant) || // u8'x'. 235 Tok.is(tok::utf16_char_constant) || // u'x'. 236 Tok.is(tok::utf32_char_constant)) { // U'x'. 237 bool Invalid = false; 238 std::string TokStr = PP.getSpelling(Tok, &Invalid); 239 if (!Invalid) { 240 std::string Str = Lexer::Stringify(TokStr); 241 Result.append(Str.begin(), Str.end()); 242 } 243 } else if (Tok.is(tok::code_completion)) { 244 PP.CodeCompleteNaturalLanguage(); 245 } else { 246 // Otherwise, just append the token. Do some gymnastics to get the token 247 // in place and avoid copies where possible. 248 unsigned CurStrLen = Result.size(); 249 Result.resize(CurStrLen+Tok.getLength()); 250 const char *BufPtr = Result.data() + CurStrLen; 251 bool Invalid = false; 252 unsigned ActualTokLen = PP.getSpelling(Tok, BufPtr, &Invalid); 253 254 if (!Invalid) { 255 // If getSpelling returned a pointer to an already uniqued version of 256 // the string instead of filling in BufPtr, memcpy it onto our string. 257 if (ActualTokLen && BufPtr != &Result[CurStrLen]) 258 memcpy(&Result[CurStrLen], BufPtr, ActualTokLen); 259 260 // If the token was dirty, the spelling may be shorter than the token. 261 if (ActualTokLen != Tok.getLength()) 262 Result.resize(CurStrLen+ActualTokLen); 263 } 264 } 265 } 266 267 // If the last character of the string is a \, and if it isn't escaped, this 268 // is an invalid string literal, diagnose it as specified in C99. 269 if (Result.back() == '\\') { 270 // Count the number of consecutive \ characters. If even, then they are 271 // just escaped backslashes, otherwise it's an error. 272 unsigned FirstNonSlash = Result.size()-2; 273 // Guaranteed to find the starting " if nothing else. 274 while (Result[FirstNonSlash] == '\\') 275 --FirstNonSlash; 276 if ((Result.size()-1-FirstNonSlash) & 1) { 277 // Diagnose errors for things like: #define F(X) #X / F(\) 278 PP.Diag(ArgToks[-1], diag::pp_invalid_string_literal); 279 Result.pop_back(); // remove one of the \'s. 280 } 281 } 282 Result += '"'; 283 284 // If this is the charify operation and the result is not a legal character 285 // constant, diagnose it. 286 if (Charify) { 287 // First step, turn double quotes into single quotes: 288 Result[0] = '\''; 289 Result[Result.size()-1] = '\''; 290 291 // Check for bogus character. 292 bool isBad = false; 293 if (Result.size() == 3) 294 isBad = Result[1] == '\''; // ''' is not legal. '\' already fixed above. 295 else 296 isBad = (Result.size() != 4 || Result[1] != '\\'); // Not '\x' 297 298 if (isBad) { 299 PP.Diag(ArgTokStart[0], diag::err_invalid_character_to_charify); 300 Result = "' '"; // Use something arbitrary, but legal. 301 } 302 } 303 304 PP.CreateString(Result, Tok, 305 ExpansionLocStart, ExpansionLocEnd); 306 return Tok; 307} 308