1//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file implements a token annotator, i.e. creates 12/// \c AnnotatedTokens out of \c FormatTokens with required extra information. 13/// 14//===----------------------------------------------------------------------===// 15 16#include "TokenAnnotator.h" 17#include "clang/Basic/SourceManager.h" 18#include "clang/Lex/Lexer.h" 19#include "llvm/Support/Debug.h" 20 21namespace clang { 22namespace format { 23 24bool AnnotatedToken::isUnaryOperator() const { 25 switch (FormatTok.Tok.getKind()) { 26 case tok::plus: 27 case tok::plusplus: 28 case tok::minus: 29 case tok::minusminus: 30 case tok::exclaim: 31 case tok::tilde: 32 case tok::kw_sizeof: 33 case tok::kw_alignof: 34 return true; 35 default: 36 return false; 37 } 38} 39 40bool AnnotatedToken::isBinaryOperator() const { 41 // Comma is a binary operator, but does not behave as such wrt. formatting. 42 return getPrecedence(*this) > prec::Comma; 43} 44 45bool AnnotatedToken::isTrailingComment() const { 46 return is(tok::comment) && 47 (Children.empty() || Children[0].FormatTok.NewlinesBefore > 0); 48} 49 50AnnotatedToken *AnnotatedToken::getPreviousNoneComment() const { 51 AnnotatedToken *Tok = Parent; 52 while (Tok != NULL && Tok->is(tok::comment)) 53 Tok = Tok->Parent; 54 return Tok; 55} 56 57const AnnotatedToken *AnnotatedToken::getNextNoneComment() const { 58 const AnnotatedToken *Tok = Children.empty() ? NULL : &Children[0]; 59 while (Tok != NULL && Tok->is(tok::comment)) 60 Tok = Tok->Children.empty() ? NULL : &Tok->Children[0]; 61 return Tok; 62} 63 64bool AnnotatedToken::closesScope() const { 65 return isOneOf(tok::r_paren, tok::r_brace, tok::r_square) || 66 Type == TT_TemplateCloser; 67} 68 69bool AnnotatedToken::opensScope() const { 70 return isOneOf(tok::l_paren, tok::l_brace, tok::l_square) || 71 Type == TT_TemplateOpener; 72} 73 74/// \brief A parser that gathers additional information about tokens. 75/// 76/// The \c TokenAnnotator tries to match parenthesis and square brakets and 77/// store a parenthesis levels. It also tries to resolve matching "<" and ">" 78/// into template parameter lists. 79class AnnotatingParser { 80public: 81 AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line, 82 IdentifierInfo &Ident_in) 83 : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First), 84 KeywordVirtualFound(false), NameFound(false), Ident_in(Ident_in) { 85 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/ false)); 86 } 87 88private: 89 bool parseAngle() { 90 if (CurrentToken == NULL) 91 return false; 92 ScopedContextCreator ContextCreator(*this, tok::less, 10); 93 AnnotatedToken *Left = CurrentToken->Parent; 94 Contexts.back().IsExpression = false; 95 while (CurrentToken != NULL) { 96 if (CurrentToken->is(tok::greater)) { 97 Left->MatchingParen = CurrentToken; 98 CurrentToken->MatchingParen = Left; 99 CurrentToken->Type = TT_TemplateCloser; 100 next(); 101 return true; 102 } 103 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace, 104 tok::pipepipe, tok::ampamp, tok::question, 105 tok::colon)) 106 return false; 107 updateParameterCount(Left, CurrentToken); 108 if (!consumeToken()) 109 return false; 110 } 111 return false; 112 } 113 114 bool parseParens(bool LookForDecls = false) { 115 if (CurrentToken == NULL) 116 return false; 117 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1); 118 119 // FIXME: This is a bit of a hack. Do better. 120 Contexts.back().ColonIsForRangeExpr = 121 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr; 122 123 bool StartsObjCMethodExpr = false; 124 AnnotatedToken *Left = CurrentToken->Parent; 125 if (CurrentToken->is(tok::caret)) { 126 // ^( starts a block. 127 Left->Type = TT_ObjCBlockLParen; 128 } else if (AnnotatedToken *MaybeSel = Left->Parent) { 129 // @selector( starts a selector. 130 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent && 131 MaybeSel->Parent->is(tok::at)) { 132 StartsObjCMethodExpr = true; 133 } 134 } 135 136 if (StartsObjCMethodExpr) { 137 Contexts.back().ColonIsObjCMethodExpr = true; 138 Left->Type = TT_ObjCMethodExpr; 139 } 140 141 while (CurrentToken != NULL) { 142 // LookForDecls is set when "if (" has been seen. Check for 143 // 'identifier' '*' 'identifier' followed by not '=' -- this 144 // '*' has to be a binary operator but determineStarAmpUsage() will 145 // categorize it as an unary operator, so set the right type here. 146 if (LookForDecls && !CurrentToken->Children.empty()) { 147 AnnotatedToken &Prev = *CurrentToken->Parent; 148 AnnotatedToken &Next = CurrentToken->Children[0]; 149 if (Prev.Parent->is(tok::identifier) && 150 Prev.isOneOf(tok::star, tok::amp, tok::ampamp) && 151 CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) { 152 Prev.Type = TT_BinaryOperator; 153 LookForDecls = false; 154 } 155 } 156 157 if (CurrentToken->is(tok::r_paren)) { 158 if (CurrentToken->Parent->closesScope()) 159 CurrentToken->Parent->MatchingParen->NoMoreTokensOnLevel = true; 160 Left->MatchingParen = CurrentToken; 161 CurrentToken->MatchingParen = Left; 162 163 if (StartsObjCMethodExpr) { 164 CurrentToken->Type = TT_ObjCMethodExpr; 165 if (Contexts.back().FirstObjCSelectorName != NULL) { 166 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 167 Contexts.back().LongestObjCSelectorName; 168 } 169 } 170 171 next(); 172 return true; 173 } 174 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace)) 175 return false; 176 updateParameterCount(Left, CurrentToken); 177 if (!consumeToken()) 178 return false; 179 } 180 return false; 181 } 182 183 bool parseSquare() { 184 if (!CurrentToken) 185 return false; 186 187 // A '[' could be an index subscript (after an indentifier or after 188 // ')' or ']'), it could be the start of an Objective-C method 189 // expression, or it could the the start of an Objective-C array literal. 190 AnnotatedToken *Left = CurrentToken->Parent; 191 AnnotatedToken *Parent = Left->getPreviousNoneComment(); 192 bool StartsObjCMethodExpr = 193 Contexts.back().CanBeExpression && 194 (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren, 195 tok::kw_return, tok::kw_throw) || 196 Parent->isUnaryOperator() || Parent->Type == TT_ObjCForIn || 197 Parent->Type == TT_CastRParen || 198 getBinOpPrecedence(Parent->FormatTok.Tok.getKind(), true, true) > 199 prec::Unknown); 200 ScopedContextCreator ContextCreator(*this, tok::l_square, 10); 201 Contexts.back().IsExpression = true; 202 bool StartsObjCArrayLiteral = Parent && Parent->is(tok::at); 203 204 if (StartsObjCMethodExpr) { 205 Contexts.back().ColonIsObjCMethodExpr = true; 206 Left->Type = TT_ObjCMethodExpr; 207 } else if (StartsObjCArrayLiteral) { 208 Left->Type = TT_ObjCArrayLiteral; 209 } 210 211 while (CurrentToken != NULL) { 212 if (CurrentToken->is(tok::r_square)) { 213 if (!CurrentToken->Children.empty() && 214 CurrentToken->Children[0].is(tok::l_paren)) { 215 // An ObjC method call is rarely followed by an open parenthesis. 216 // FIXME: Do we incorrectly label ":" with this? 217 StartsObjCMethodExpr = false; 218 Left->Type = TT_Unknown; 219 } 220 if (StartsObjCMethodExpr) { 221 CurrentToken->Type = TT_ObjCMethodExpr; 222 // determineStarAmpUsage() thinks that '*' '[' is allocating an 223 // array of pointers, but if '[' starts a selector then '*' is a 224 // binary operator. 225 if (Parent != NULL && Parent->Type == TT_PointerOrReference) 226 Parent->Type = TT_BinaryOperator; 227 } else if (StartsObjCArrayLiteral) { 228 CurrentToken->Type = TT_ObjCArrayLiteral; 229 } 230 Left->MatchingParen = CurrentToken; 231 CurrentToken->MatchingParen = Left; 232 if (Contexts.back().FirstObjCSelectorName != NULL) 233 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 234 Contexts.back().LongestObjCSelectorName; 235 next(); 236 return true; 237 } 238 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace)) 239 return false; 240 updateParameterCount(Left, CurrentToken); 241 if (!consumeToken()) 242 return false; 243 } 244 return false; 245 } 246 247 bool parseBrace() { 248 if (CurrentToken != NULL) { 249 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1); 250 AnnotatedToken *Left = CurrentToken->Parent; 251 while (CurrentToken != NULL) { 252 if (CurrentToken->is(tok::r_brace)) { 253 Left->MatchingParen = CurrentToken; 254 CurrentToken->MatchingParen = Left; 255 next(); 256 return true; 257 } 258 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square)) 259 return false; 260 updateParameterCount(Left, CurrentToken); 261 if (!consumeToken()) 262 return false; 263 } 264 } 265 // No closing "}" found, this probably starts a definition. 266 Line.StartsDefinition = true; 267 return true; 268 } 269 270 void updateParameterCount(AnnotatedToken *Left, AnnotatedToken *Current) { 271 if (Current->is(tok::comma)) 272 ++Left->ParameterCount; 273 else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) 274 Left->ParameterCount = 1; 275 } 276 277 bool parseConditional() { 278 while (CurrentToken != NULL) { 279 if (CurrentToken->is(tok::colon)) { 280 CurrentToken->Type = TT_ConditionalExpr; 281 next(); 282 return true; 283 } 284 if (!consumeToken()) 285 return false; 286 } 287 return false; 288 } 289 290 bool parseTemplateDeclaration() { 291 if (CurrentToken != NULL && CurrentToken->is(tok::less)) { 292 CurrentToken->Type = TT_TemplateOpener; 293 next(); 294 if (!parseAngle()) 295 return false; 296 if (CurrentToken != NULL) 297 CurrentToken->Parent->ClosesTemplateDeclaration = true; 298 return true; 299 } 300 return false; 301 } 302 303 bool consumeToken() { 304 AnnotatedToken *Tok = CurrentToken; 305 next(); 306 switch (Tok->FormatTok.Tok.getKind()) { 307 case tok::plus: 308 case tok::minus: 309 if (Tok->Parent == NULL && Line.MustBeDeclaration) 310 Tok->Type = TT_ObjCMethodSpecifier; 311 break; 312 case tok::colon: 313 if (Tok->Parent == NULL) 314 return false; 315 // Colons from ?: are handled in parseConditional(). 316 if (Tok->Parent->is(tok::r_paren) && Contexts.size() == 1) { 317 Tok->Type = TT_CtorInitializerColon; 318 } else if (Contexts.back().ColonIsObjCMethodExpr || 319 Line.First.Type == TT_ObjCMethodSpecifier) { 320 Tok->Type = TT_ObjCMethodExpr; 321 Tok->Parent->Type = TT_ObjCSelectorName; 322 if (Tok->Parent->FormatTok.TokenLength > 323 Contexts.back().LongestObjCSelectorName) 324 Contexts.back().LongestObjCSelectorName = 325 Tok->Parent->FormatTok.TokenLength; 326 if (Contexts.back().FirstObjCSelectorName == NULL) 327 Contexts.back().FirstObjCSelectorName = Tok->Parent; 328 } else if (Contexts.back().ColonIsForRangeExpr) { 329 Tok->Type = TT_RangeBasedForLoopColon; 330 } else if (Contexts.size() == 1) { 331 Tok->Type = TT_InheritanceColon; 332 } else if (Contexts.back().ContextKind == tok::l_paren) { 333 Tok->Type = TT_InlineASMColon; 334 } 335 break; 336 case tok::kw_if: 337 case tok::kw_while: 338 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) { 339 next(); 340 if (!parseParens(/*LookForDecls=*/ true)) 341 return false; 342 } 343 break; 344 case tok::kw_for: 345 Contexts.back().ColonIsForRangeExpr = true; 346 next(); 347 if (!parseParens()) 348 return false; 349 break; 350 case tok::l_paren: 351 if (!parseParens()) 352 return false; 353 if (Line.MustBeDeclaration && NameFound && !Contexts.back().IsExpression) 354 Line.MightBeFunctionDecl = true; 355 break; 356 case tok::l_square: 357 if (!parseSquare()) 358 return false; 359 break; 360 case tok::l_brace: 361 if (!parseBrace()) 362 return false; 363 break; 364 case tok::less: 365 if (parseAngle()) 366 Tok->Type = TT_TemplateOpener; 367 else { 368 Tok->Type = TT_BinaryOperator; 369 CurrentToken = Tok; 370 next(); 371 } 372 break; 373 case tok::r_paren: 374 case tok::r_square: 375 return false; 376 case tok::r_brace: 377 // Lines can start with '}'. 378 if (Tok->Parent != NULL) 379 return false; 380 break; 381 case tok::greater: 382 Tok->Type = TT_BinaryOperator; 383 break; 384 case tok::kw_operator: 385 while (CurrentToken && CurrentToken->isNot(tok::l_paren)) { 386 if (CurrentToken->isOneOf(tok::star, tok::amp)) 387 CurrentToken->Type = TT_PointerOrReference; 388 consumeToken(); 389 } 390 if (CurrentToken) 391 CurrentToken->Type = TT_OverloadedOperatorLParen; 392 break; 393 case tok::question: 394 parseConditional(); 395 break; 396 case tok::kw_template: 397 parseTemplateDeclaration(); 398 break; 399 case tok::identifier: 400 if (Line.First.is(tok::kw_for) && 401 Tok->FormatTok.Tok.getIdentifierInfo() == &Ident_in) 402 Tok->Type = TT_ObjCForIn; 403 break; 404 case tok::comma: 405 if (Contexts.back().FirstStartOfName) 406 Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true; 407 break; 408 default: 409 break; 410 } 411 return true; 412 } 413 414 void parseIncludeDirective() { 415 next(); 416 if (CurrentToken != NULL && CurrentToken->is(tok::less)) { 417 next(); 418 while (CurrentToken != NULL) { 419 if (CurrentToken->isNot(tok::comment) || 420 !CurrentToken->Children.empty()) 421 CurrentToken->Type = TT_ImplicitStringLiteral; 422 next(); 423 } 424 } else { 425 while (CurrentToken != NULL) { 426 if (CurrentToken->is(tok::string_literal)) 427 // Mark these string literals as "implicit" literals, too, so that 428 // they are not split or line-wrapped. 429 CurrentToken->Type = TT_ImplicitStringLiteral; 430 next(); 431 } 432 } 433 } 434 435 void parseWarningOrError() { 436 next(); 437 // We still want to format the whitespace left of the first token of the 438 // warning or error. 439 next(); 440 while (CurrentToken != NULL) { 441 CurrentToken->Type = TT_ImplicitStringLiteral; 442 next(); 443 } 444 } 445 446 void parsePreprocessorDirective() { 447 next(); 448 if (CurrentToken == NULL) 449 return; 450 // Hashes in the middle of a line can lead to any strange token 451 // sequence. 452 if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL) 453 return; 454 switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { 455 case tok::pp_include: 456 case tok::pp_import: 457 parseIncludeDirective(); 458 break; 459 case tok::pp_error: 460 case tok::pp_warning: 461 parseWarningOrError(); 462 break; 463 case tok::pp_if: 464 case tok::pp_elif: 465 parseLine(); 466 break; 467 default: 468 break; 469 } 470 while (CurrentToken != NULL) 471 next(); 472 } 473 474public: 475 LineType parseLine() { 476 int PeriodsAndArrows = 0; 477 AnnotatedToken *LastPeriodOrArrow = NULL; 478 bool CanBeBuilderTypeStmt = true; 479 if (CurrentToken->is(tok::hash)) { 480 parsePreprocessorDirective(); 481 return LT_PreprocessorDirective; 482 } 483 while (CurrentToken != NULL) { 484 if (CurrentToken->is(tok::kw_virtual)) 485 KeywordVirtualFound = true; 486 if (CurrentToken->isOneOf(tok::period, tok::arrow)) { 487 ++PeriodsAndArrows; 488 LastPeriodOrArrow = CurrentToken; 489 } 490 AnnotatedToken *TheToken = CurrentToken; 491 if (!consumeToken()) 492 return LT_Invalid; 493 if (getPrecedence(*TheToken) > prec::Assignment && 494 TheToken->Type == TT_BinaryOperator) 495 CanBeBuilderTypeStmt = false; 496 } 497 if (KeywordVirtualFound) 498 return LT_VirtualFunctionDecl; 499 500 // Assume a builder-type call if there are 2 or more "." and "->". 501 if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt) { 502 LastPeriodOrArrow->LastInChainOfCalls = true; 503 return LT_BuilderTypeCall; 504 } 505 506 if (Line.First.Type == TT_ObjCMethodSpecifier) { 507 if (Contexts.back().FirstObjCSelectorName != NULL) 508 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 509 Contexts.back().LongestObjCSelectorName; 510 return LT_ObjCMethodDecl; 511 } 512 513 return LT_Other; 514 } 515 516private: 517 void next() { 518 if (CurrentToken != NULL) { 519 determineTokenType(*CurrentToken); 520 CurrentToken->BindingStrength = Contexts.back().BindingStrength; 521 } 522 523 if (CurrentToken != NULL && !CurrentToken->Children.empty()) 524 CurrentToken = &CurrentToken->Children[0]; 525 else 526 CurrentToken = NULL; 527 528 // Reset token type in case we have already looked at it and then recovered 529 // from an error (e.g. failure to find the matching >). 530 if (CurrentToken != NULL) 531 CurrentToken->Type = TT_Unknown; 532 } 533 534 /// \brief A struct to hold information valid in a specific context, e.g. 535 /// a pair of parenthesis. 536 struct Context { 537 Context(tok::TokenKind ContextKind, unsigned BindingStrength, 538 bool IsExpression) 539 : ContextKind(ContextKind), BindingStrength(BindingStrength), 540 LongestObjCSelectorName(0), ColonIsForRangeExpr(false), 541 ColonIsObjCMethodExpr(false), FirstObjCSelectorName(NULL), 542 FirstStartOfName(NULL), IsExpression(IsExpression), 543 CanBeExpression(true) {} 544 545 tok::TokenKind ContextKind; 546 unsigned BindingStrength; 547 unsigned LongestObjCSelectorName; 548 bool ColonIsForRangeExpr; 549 bool ColonIsObjCMethodExpr; 550 AnnotatedToken *FirstObjCSelectorName; 551 AnnotatedToken *FirstStartOfName; 552 bool IsExpression; 553 bool CanBeExpression; 554 }; 555 556 /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime 557 /// of each instance. 558 struct ScopedContextCreator { 559 AnnotatingParser &P; 560 561 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind, 562 unsigned Increase) 563 : P(P) { 564 P.Contexts.push_back( 565 Context(ContextKind, P.Contexts.back().BindingStrength + Increase, 566 P.Contexts.back().IsExpression)); 567 } 568 569 ~ScopedContextCreator() { P.Contexts.pop_back(); } 570 }; 571 572 void determineTokenType(AnnotatedToken &Current) { 573 if (getPrecedence(Current) == prec::Assignment && 574 (!Current.Parent || Current.Parent->isNot(tok::kw_operator))) { 575 Contexts.back().IsExpression = true; 576 for (AnnotatedToken *Previous = Current.Parent; 577 Previous && Previous->isNot(tok::comma); 578 Previous = Previous->Parent) { 579 if (Previous->is(tok::r_square)) 580 Previous = Previous->MatchingParen; 581 if (Previous->Type == TT_BinaryOperator && 582 Previous->isOneOf(tok::star, tok::amp)) { 583 Previous->Type = TT_PointerOrReference; 584 } 585 } 586 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw) || 587 (Current.is(tok::l_paren) && !Line.MustBeDeclaration && 588 (!Current.Parent || Current.Parent->isNot(tok::kw_for)))) { 589 Contexts.back().IsExpression = true; 590 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { 591 for (AnnotatedToken *Previous = Current.Parent; 592 Previous && Previous->isOneOf(tok::star, tok::amp); 593 Previous = Previous->Parent) 594 Previous->Type = TT_PointerOrReference; 595 } else if (Current.Parent && 596 Current.Parent->Type == TT_CtorInitializerColon) { 597 Contexts.back().IsExpression = true; 598 } else if (Current.is(tok::kw_new)) { 599 Contexts.back().CanBeExpression = false; 600 } else if (Current.is(tok::semi)) { 601 // This should be the condition or increment in a for-loop. 602 Contexts.back().IsExpression = true; 603 } 604 605 if (Current.Type == TT_Unknown) { 606 if (Current.Parent && Current.is(tok::identifier) && 607 ((Current.Parent->is(tok::identifier) && 608 Current.Parent->FormatTok.Tok.getIdentifierInfo() 609 ->getPPKeywordID() == tok::pp_not_keyword) || 610 isSimpleTypeSpecifier(*Current.Parent) || 611 Current.Parent->Type == TT_PointerOrReference || 612 Current.Parent->Type == TT_TemplateCloser)) { 613 Contexts.back().FirstStartOfName = &Current; 614 Current.Type = TT_StartOfName; 615 NameFound = true; 616 } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) { 617 Current.Type = 618 determineStarAmpUsage(Current, Contexts.back().IsExpression); 619 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { 620 Current.Type = determinePlusMinusCaretUsage(Current); 621 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) { 622 Current.Type = determineIncrementUsage(Current); 623 } else if (Current.is(tok::exclaim)) { 624 Current.Type = TT_UnaryOperator; 625 } else if (Current.isBinaryOperator()) { 626 Current.Type = TT_BinaryOperator; 627 } else if (Current.is(tok::comment)) { 628 std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr, 629 Lex.getLangOpts())); 630 if (StringRef(Data).startswith("//")) 631 Current.Type = TT_LineComment; 632 else 633 Current.Type = TT_BlockComment; 634 } else if (Current.is(tok::r_paren)) { 635 bool ParensNotExpr = !Current.Parent || 636 Current.Parent->Type == TT_PointerOrReference || 637 Current.Parent->Type == TT_TemplateCloser; 638 bool ParensCouldEndDecl = 639 !Current.Children.empty() && 640 Current.Children[0].isOneOf(tok::equal, tok::semi, tok::l_brace); 641 bool IsSizeOfOrAlignOf = 642 Current.MatchingParen && Current.MatchingParen->Parent && 643 Current.MatchingParen->Parent->isOneOf(tok::kw_sizeof, 644 tok::kw_alignof); 645 if (ParensNotExpr && !ParensCouldEndDecl && !IsSizeOfOrAlignOf && 646 Contexts.back().IsExpression) 647 // FIXME: We need to get smarter and understand more cases of casts. 648 Current.Type = TT_CastRParen; 649 } else if (Current.is(tok::at) && Current.Children.size()) { 650 switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) { 651 case tok::objc_interface: 652 case tok::objc_implementation: 653 case tok::objc_protocol: 654 Current.Type = TT_ObjCDecl; 655 break; 656 case tok::objc_property: 657 Current.Type = TT_ObjCProperty; 658 break; 659 default: 660 break; 661 } 662 } 663 } 664 } 665 666 /// \brief Return the type of the given token assuming it is * or &. 667 TokenType 668 determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) { 669 const AnnotatedToken *PrevToken = Tok.getPreviousNoneComment(); 670 if (PrevToken == NULL) 671 return TT_UnaryOperator; 672 673 const AnnotatedToken *NextToken = Tok.getNextNoneComment(); 674 if (NextToken == NULL) 675 return TT_Unknown; 676 677 if (PrevToken->is(tok::l_paren) && !IsExpression) 678 return TT_PointerOrReference; 679 680 if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace, 681 tok::comma, tok::semi, tok::kw_return, tok::colon, 682 tok::equal) || 683 PrevToken->Type == TT_BinaryOperator || 684 PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen) 685 return TT_UnaryOperator; 686 687 if (NextToken->is(tok::l_square)) 688 return TT_PointerOrReference; 689 690 if (PrevToken->FormatTok.Tok.isLiteral() || 691 PrevToken->isOneOf(tok::r_paren, tok::r_square) || 692 NextToken->FormatTok.Tok.isLiteral() || NextToken->isUnaryOperator()) 693 return TT_BinaryOperator; 694 695 // It is very unlikely that we are going to find a pointer or reference type 696 // definition on the RHS of an assignment. 697 if (IsExpression) 698 return TT_BinaryOperator; 699 700 return TT_PointerOrReference; 701 } 702 703 TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) { 704 const AnnotatedToken *PrevToken = Tok.getPreviousNoneComment(); 705 if (PrevToken == NULL) 706 return TT_UnaryOperator; 707 708 // Use heuristics to recognize unary operators. 709 if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square, 710 tok::question, tok::colon, tok::kw_return, 711 tok::kw_case, tok::at, tok::l_brace)) 712 return TT_UnaryOperator; 713 714 // There can't be two consecutive binary operators. 715 if (PrevToken->Type == TT_BinaryOperator) 716 return TT_UnaryOperator; 717 718 // Fall back to marking the token as binary operator. 719 return TT_BinaryOperator; 720 } 721 722 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. 723 TokenType determineIncrementUsage(const AnnotatedToken &Tok) { 724 const AnnotatedToken *PrevToken = Tok.getPreviousNoneComment(); 725 if (PrevToken == NULL) 726 return TT_UnaryOperator; 727 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier)) 728 return TT_TrailingUnaryOperator; 729 730 return TT_UnaryOperator; 731 } 732 733 // FIXME: This is copy&pasted from Sema. Put it in a common place and remove 734 // duplication. 735 /// \brief Determine whether the token kind starts a simple-type-specifier. 736 bool isSimpleTypeSpecifier(const AnnotatedToken &Tok) const { 737 switch (Tok.FormatTok.Tok.getKind()) { 738 case tok::kw_short: 739 case tok::kw_long: 740 case tok::kw___int64: 741 case tok::kw___int128: 742 case tok::kw_signed: 743 case tok::kw_unsigned: 744 case tok::kw_void: 745 case tok::kw_char: 746 case tok::kw_int: 747 case tok::kw_half: 748 case tok::kw_float: 749 case tok::kw_double: 750 case tok::kw_wchar_t: 751 case tok::kw_bool: 752 case tok::kw___underlying_type: 753 return true; 754 case tok::annot_typename: 755 case tok::kw_char16_t: 756 case tok::kw_char32_t: 757 case tok::kw_typeof: 758 case tok::kw_decltype: 759 return Lex.getLangOpts().CPlusPlus; 760 default: 761 break; 762 } 763 return false; 764 } 765 766 SmallVector<Context, 8> Contexts; 767 768 SourceManager &SourceMgr; 769 Lexer &Lex; 770 AnnotatedLine &Line; 771 AnnotatedToken *CurrentToken; 772 bool KeywordVirtualFound; 773 bool NameFound; 774 IdentifierInfo &Ident_in; 775}; 776 777/// \brief Parses binary expressions by inserting fake parenthesis based on 778/// operator precedence. 779class ExpressionParser { 780public: 781 ExpressionParser(AnnotatedLine &Line) : Current(&Line.First) {} 782 783 /// \brief Parse expressions with the given operatore precedence. 784 void parse(int Precedence = 0) { 785 if (Precedence > prec::PointerToMember || Current == NULL) 786 return; 787 788 // Eagerly consume trailing comments. 789 while (Current && Current->isTrailingComment()) { 790 next(); 791 } 792 793 AnnotatedToken *Start = Current; 794 bool OperatorFound = false; 795 796 while (Current) { 797 // Consume operators with higher precedence. 798 parse(Precedence + 1); 799 800 int CurrentPrecedence = 0; 801 if (Current) { 802 if (Current->Type == TT_ConditionalExpr) 803 CurrentPrecedence = 1 + (int) prec::Conditional; 804 else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon) 805 CurrentPrecedence = 1; 806 else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma)) 807 CurrentPrecedence = 1 + (int) getPrecedence(*Current); 808 } 809 810 // At the end of the line or when an operator with higher precedence is 811 // found, insert fake parenthesis and return. 812 if (Current == NULL || Current->closesScope() || 813 (CurrentPrecedence != 0 && CurrentPrecedence < Precedence)) { 814 if (OperatorFound) { 815 Start->FakeLParens.push_back(prec::Level(Precedence - 1)); 816 if (Current) 817 ++Current->Parent->FakeRParens; 818 } 819 return; 820 } 821 822 // Consume scopes: (), [], <> and {} 823 if (Current->opensScope()) { 824 while (Current && !Current->closesScope()) { 825 next(); 826 parse(); 827 } 828 next(); 829 } else { 830 // Operator found. 831 if (CurrentPrecedence == Precedence) 832 OperatorFound = true; 833 834 next(); 835 } 836 } 837 } 838 839private: 840 void next() { 841 if (Current != NULL) 842 Current = Current->Children.empty() ? NULL : &Current->Children[0]; 843 } 844 845 AnnotatedToken *Current; 846}; 847 848void TokenAnnotator::annotate(AnnotatedLine &Line) { 849 AnnotatingParser Parser(SourceMgr, Lex, Line, Ident_in); 850 Line.Type = Parser.parseLine(); 851 if (Line.Type == LT_Invalid) 852 return; 853 854 ExpressionParser ExprParser(Line); 855 ExprParser.parse(); 856 857 if (Line.First.Type == TT_ObjCMethodSpecifier) 858 Line.Type = LT_ObjCMethodDecl; 859 else if (Line.First.Type == TT_ObjCDecl) 860 Line.Type = LT_ObjCDecl; 861 else if (Line.First.Type == TT_ObjCProperty) 862 Line.Type = LT_ObjCProperty; 863 864 Line.First.SpacesRequiredBefore = 1; 865 Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore; 866 Line.First.CanBreakBefore = Line.First.MustBreakBefore; 867 868 Line.First.TotalLength = Line.First.FormatTok.TokenLength; 869} 870 871void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { 872 if (Line.First.Children.empty()) 873 return; 874 AnnotatedToken *Current = &Line.First.Children[0]; 875 while (Current != NULL) { 876 if (Current->Type == TT_LineComment) 877 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments; 878 else 879 Current->SpacesRequiredBefore = 880 spaceRequiredBefore(Line, *Current) ? 1 : 0; 881 882 if (Current->FormatTok.MustBreakBefore) { 883 Current->MustBreakBefore = true; 884 } else if (Current->Type == TT_LineComment) { 885 Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0; 886 } else if (Current->Parent->isTrailingComment() || 887 (Current->is(tok::string_literal) && 888 Current->Parent->is(tok::string_literal))) { 889 Current->MustBreakBefore = true; 890 } else if (Current->is(tok::lessless) && !Current->Children.empty() && 891 Current->Parent->is(tok::string_literal) && 892 Current->Children[0].is(tok::string_literal)) { 893 Current->MustBreakBefore = true; 894 } else { 895 Current->MustBreakBefore = false; 896 } 897 Current->CanBreakBefore = 898 Current->MustBreakBefore || canBreakBefore(Line, *Current); 899 if (Current->MustBreakBefore) 900 Current->TotalLength = Current->Parent->TotalLength + Style.ColumnLimit; 901 else 902 Current->TotalLength = 903 Current->Parent->TotalLength + Current->FormatTok.TokenLength + 904 Current->SpacesRequiredBefore; 905 // FIXME: Only calculate this if CanBreakBefore is true once static 906 // initializers etc. are sorted out. 907 // FIXME: Move magic numbers to a better place. 908 Current->SplitPenalty = 909 20 * Current->BindingStrength + splitPenalty(Line, *Current); 910 911 Current = Current->Children.empty() ? NULL : &Current->Children[0]; 912 } 913 914 DEBUG({ 915 printDebugInfo(Line); 916 }); 917} 918 919unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, 920 const AnnotatedToken &Tok) { 921 const AnnotatedToken &Left = *Tok.Parent; 922 const AnnotatedToken &Right = Tok; 923 924 if (Right.Type == TT_StartOfName) { 925 if (Line.First.is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt) 926 return 3; 927 else if (Line.MightBeFunctionDecl && Right.BindingStrength == 1) 928 // FIXME: Clean up hack of using BindingStrength to find top-level names. 929 return Style.PenaltyReturnTypeOnItsOwnLine; 930 else 931 return 200; 932 } 933 if (Left.is(tok::equal) && Right.is(tok::l_brace)) 934 return 150; 935 if (Left.is(tok::coloncolon)) 936 return 500; 937 if (Left.isOneOf(tok::kw_class, tok::kw_struct)) 938 return 5000; 939 940 if (Left.Type == TT_RangeBasedForLoopColon || 941 Left.Type == TT_InheritanceColon) 942 return 2; 943 944 if (Right.isOneOf(tok::arrow, tok::period)) { 945 if (Line.Type == LT_BuilderTypeCall) 946 return prec::PointerToMember; 947 if (Left.isOneOf(tok::r_paren, tok::r_square) && Left.MatchingParen && 948 Left.MatchingParen->ParameterCount > 0) 949 return 20; // Should be smaller than breaking at a nested comma. 950 return 150; 951 } 952 953 // In for-loops, prefer breaking at ',' and ';'. 954 if (Line.First.is(tok::kw_for) && Left.is(tok::equal)) 955 return 4; 956 957 if (Left.is(tok::semi)) 958 return 0; 959 if (Left.is(tok::comma)) 960 return 1; 961 962 // In Objective-C method expressions, prefer breaking before "param:" over 963 // breaking after it. 964 if (Right.Type == TT_ObjCSelectorName) 965 return 0; 966 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) 967 return 20; 968 969 if (Left.is(tok::l_paren) && Line.MightBeFunctionDecl) 970 return 100; 971 if (Left.opensScope()) 972 return Left.ParameterCount > 1 ? prec::Comma : 20; 973 974 if (Right.is(tok::lessless)) { 975 if (Left.is(tok::string_literal)) { 976 StringRef Content = StringRef(Left.FormatTok.Tok.getLiteralData(), 977 Left.FormatTok.TokenLength); 978 Content = Content.drop_back(1).drop_front(1).trim(); 979 if (Content.size() > 1 && 980 (Content.back() == ':' || Content.back() == '=')) 981 return 100; 982 } 983 return prec::Shift; 984 } 985 if (Left.Type == TT_ConditionalExpr) 986 return prec::Conditional; 987 prec::Level Level = getPrecedence(Left); 988 989 if (Level != prec::Unknown) 990 return Level; 991 992 return 3; 993} 994 995bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, 996 const AnnotatedToken &Left, 997 const AnnotatedToken &Right) { 998 if (Right.is(tok::hashhash)) 999 return Left.is(tok::hash); 1000 if (Left.isOneOf(tok::hashhash, tok::hash)) 1001 return Right.is(tok::hash); 1002 if (Right.isOneOf(tok::r_paren, tok::semi, tok::comma)) 1003 return false; 1004 if (Right.is(tok::less) && 1005 (Left.is(tok::kw_template) || 1006 (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) 1007 return true; 1008 if (Left.is(tok::arrow) || Right.is(tok::arrow)) 1009 return false; 1010 if (Left.isOneOf(tok::exclaim, tok::tilde)) 1011 return false; 1012 if (Left.is(tok::at) && 1013 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant, 1014 tok::numeric_constant, tok::l_paren, tok::l_brace, 1015 tok::kw_true, tok::kw_false)) 1016 return false; 1017 if (Left.is(tok::coloncolon)) 1018 return false; 1019 if (Right.is(tok::coloncolon)) 1020 return !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren); 1021 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) 1022 return false; 1023 if (Right.Type == TT_PointerOrReference) 1024 return Left.FormatTok.Tok.isLiteral() || 1025 ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) && 1026 !Style.PointerBindsToType); 1027 if (Left.Type == TT_PointerOrReference) 1028 return Right.FormatTok.Tok.isLiteral() || 1029 ((Right.Type != TT_PointerOrReference) && 1030 Right.isNot(tok::l_paren) && Style.PointerBindsToType && 1031 Left.Parent && Left.Parent->isNot(tok::l_paren)); 1032 if (Right.is(tok::star) && Left.is(tok::l_paren)) 1033 return false; 1034 if (Left.is(tok::l_square)) 1035 return Left.Type == TT_ObjCArrayLiteral && Right.isNot(tok::r_square); 1036 if (Right.is(tok::r_square)) 1037 return Right.Type == TT_ObjCArrayLiteral; 1038 if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr) 1039 return false; 1040 if (Left.is(tok::period) || Right.is(tok::period)) 1041 return false; 1042 if (Left.is(tok::colon)) 1043 return Left.Type != TT_ObjCMethodExpr; 1044 if (Right.is(tok::colon)) 1045 return Right.Type != TT_ObjCMethodExpr; 1046 if (Left.is(tok::l_paren)) 1047 return false; 1048 if (Right.is(tok::l_paren)) { 1049 return Line.Type == LT_ObjCDecl || 1050 Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch, 1051 tok::kw_return, tok::kw_catch, tok::kw_new, 1052 tok::kw_delete, tok::semi); 1053 } 1054 if (Left.is(tok::at) && 1055 Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword) 1056 return false; 1057 if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) 1058 return false; 1059 if (Right.is(tok::ellipsis)) 1060 return false; 1061 return true; 1062} 1063 1064bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, 1065 const AnnotatedToken &Tok) { 1066 if (Tok.FormatTok.Tok.getIdentifierInfo() && 1067 Tok.Parent->FormatTok.Tok.getIdentifierInfo()) 1068 return true; // Never ever merge two identifiers. 1069 if (Line.Type == LT_ObjCMethodDecl) { 1070 if (Tok.Parent->Type == TT_ObjCMethodSpecifier) 1071 return true; 1072 if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier)) 1073 // Don't space between ')' and <id> 1074 return false; 1075 } 1076 if (Line.Type == LT_ObjCProperty && 1077 (Tok.is(tok::equal) || Tok.Parent->is(tok::equal))) 1078 return false; 1079 1080 if (Tok.Parent->is(tok::comma)) 1081 return true; 1082 if (Tok.is(tok::comma)) 1083 return false; 1084 if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen) 1085 return true; 1086 if (Tok.Parent->FormatTok.Tok.is(tok::kw_operator)) 1087 return false; 1088 if (Tok.Type == TT_OverloadedOperatorLParen) 1089 return false; 1090 if (Tok.is(tok::colon)) 1091 return !Line.First.isOneOf(tok::kw_case, tok::kw_default) && 1092 Tok.getNextNoneComment() != NULL && Tok.Type != TT_ObjCMethodExpr; 1093 if (Tok.is(tok::l_paren) && !Tok.Children.empty() && 1094 Tok.Children[0].Type == TT_PointerOrReference && 1095 !Tok.Children[0].Children.empty() && 1096 Tok.Children[0].Children[0].isNot(tok::r_paren) && 1097 Tok.Parent->isNot(tok::l_paren) && 1098 (Tok.Parent->Type != TT_PointerOrReference || Style.PointerBindsToType)) 1099 return true; 1100 if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen) 1101 return false; 1102 if (Tok.Type == TT_UnaryOperator) 1103 return !Tok.Parent->isOneOf(tok::l_paren, tok::l_square, tok::at) && 1104 (Tok.Parent->isNot(tok::colon) || 1105 Tok.Parent->Type != TT_ObjCMethodExpr); 1106 if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) { 1107 return Tok.Type == TT_TemplateCloser && 1108 Tok.Parent->Type == TT_TemplateCloser && 1109 Style.Standard != FormatStyle::LS_Cpp11; 1110 } 1111 if (Tok.isOneOf(tok::arrowstar, tok::periodstar) || 1112 Tok.Parent->isOneOf(tok::arrowstar, tok::periodstar)) 1113 return false; 1114 if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator) 1115 return true; 1116 if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren)) 1117 return false; 1118 if (Tok.is(tok::less) && Line.First.is(tok::hash)) 1119 return true; 1120 if (Tok.Type == TT_TrailingUnaryOperator) 1121 return false; 1122 return spaceRequiredBetween(Line, *Tok.Parent, Tok); 1123} 1124 1125bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, 1126 const AnnotatedToken &Right) { 1127 const AnnotatedToken &Left = *Right.Parent; 1128 if (Right.Type == TT_StartOfName) 1129 return true; 1130 if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr) 1131 return false; 1132 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) 1133 return true; 1134 if (Right.Type == TT_ObjCSelectorName) 1135 return true; 1136 if (Left.ClosesTemplateDeclaration) 1137 return true; 1138 if (Right.Type == TT_ConditionalExpr || Right.is(tok::question)) 1139 return true; 1140 if (Right.Type == TT_RangeBasedForLoopColon || 1141 Right.Type == TT_OverloadedOperatorLParen) 1142 return false; 1143 if (Left.Type == TT_RangeBasedForLoopColon) 1144 return true; 1145 if (Right.Type == TT_RangeBasedForLoopColon) 1146 return false; 1147 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser || 1148 Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr || 1149 Left.isOneOf(tok::question, tok::kw_operator)) 1150 return false; 1151 if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl) 1152 return false; 1153 if (Left.is(tok::l_paren) && Right.is(tok::l_paren) && Left.Parent && 1154 Left.Parent->is(tok::kw___attribute)) 1155 return false; 1156 1157 if (Right.Type == TT_LineComment) 1158 // We rely on MustBreakBefore being set correctly here as we should not 1159 // change the "binding" behavior of a comment. 1160 return false; 1161 1162 // Allow breaking after a trailing 'const', e.g. after a method declaration, 1163 // unless it is follow by ';', '{' or '='. 1164 if (Left.is(tok::kw_const) && Left.Parent != NULL && 1165 Left.Parent->is(tok::r_paren)) 1166 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal); 1167 1168 if (Right.is(tok::kw___attribute)) 1169 return true; 1170 1171 // We only break before r_brace if there was a corresponding break before 1172 // the l_brace, which is tracked by BreakBeforeClosingBrace. 1173 if (Right.isOneOf(tok::r_brace, tok::r_paren, tok::greater)) 1174 return false; 1175 if (Left.is(tok::identifier) && Right.is(tok::string_literal)) 1176 return true; 1177 return (Left.isBinaryOperator() && Left.isNot(tok::lessless)) || 1178 Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace, 1179 tok::kw_class, tok::kw_struct) || 1180 Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon) || 1181 (Left.is(tok::r_paren) && Left.Type != TT_CastRParen && 1182 Right.isOneOf(tok::identifier, tok::kw___attribute)) || 1183 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) || 1184 (Left.is(tok::l_square) && !Right.is(tok::r_square)); 1185} 1186 1187void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { 1188 llvm::errs() << "AnnotatedTokens:\n"; 1189 const AnnotatedToken *Tok = &Line.First; 1190 while (Tok) { 1191 llvm::errs() << " M=" << Tok->MustBreakBefore 1192 << " C=" << Tok->CanBreakBefore << " T=" << Tok->Type 1193 << " S=" << Tok->SpacesRequiredBefore 1194 << " P=" << Tok->SplitPenalty 1195 << " Name=" << Tok->FormatTok.Tok.getName() << " FakeLParens="; 1196 for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i) 1197 llvm::errs() << Tok->FakeLParens[i] << "/"; 1198 llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n"; 1199 Tok = Tok->Children.empty() ? NULL : &Tok->Children[0]; 1200 } 1201 llvm::errs() << "----\n"; 1202} 1203 1204} // namespace format 1205} // namespace clang 1206