1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file contains the implementation of the UnwrappedLineParser, 12/// which turns a stream of tokens into UnwrappedLines. 13/// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "format-parser" 17 18#include "UnwrappedLineParser.h" 19#include "clang/Basic/Diagnostic.h" 20#include "llvm/Support/Debug.h" 21 22namespace clang { 23namespace format { 24 25class ScopedDeclarationState { 26public: 27 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 28 bool MustBeDeclaration) 29 : Line(Line), Stack(Stack) { 30 Line.MustBeDeclaration = MustBeDeclaration; 31 Stack.push_back(MustBeDeclaration); 32 } 33 ~ScopedDeclarationState() { 34 Stack.pop_back(); 35 if (!Stack.empty()) 36 Line.MustBeDeclaration = Stack.back(); 37 else 38 Line.MustBeDeclaration = true; 39 } 40private: 41 UnwrappedLine &Line; 42 std::vector<bool> &Stack; 43}; 44 45class ScopedMacroState : public FormatTokenSource { 46public: 47 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 48 FormatToken &ResetToken, bool &StructuralError) 49 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 50 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 51 StructuralError(StructuralError), 52 PreviousStructuralError(StructuralError) { 53 TokenSource = this; 54 Line.Level = 0; 55 Line.InPPDirective = true; 56 } 57 58 ~ScopedMacroState() { 59 TokenSource = PreviousTokenSource; 60 ResetToken = Token; 61 Line.InPPDirective = false; 62 Line.Level = PreviousLineLevel; 63 StructuralError = PreviousStructuralError; 64 } 65 66 virtual FormatToken getNextToken() { 67 // The \c UnwrappedLineParser guards against this by never calling 68 // \c getNextToken() after it has encountered the first eof token. 69 assert(!eof()); 70 Token = PreviousTokenSource->getNextToken(); 71 if (eof()) 72 return createEOF(); 73 return Token; 74 } 75 76private: 77 bool eof() { return Token.HasUnescapedNewline; } 78 79 FormatToken createEOF() { 80 FormatToken FormatTok; 81 FormatTok.Tok.startToken(); 82 FormatTok.Tok.setKind(tok::eof); 83 return FormatTok; 84 } 85 86 UnwrappedLine &Line; 87 FormatTokenSource *&TokenSource; 88 FormatToken &ResetToken; 89 unsigned PreviousLineLevel; 90 FormatTokenSource *PreviousTokenSource; 91 bool &StructuralError; 92 bool PreviousStructuralError; 93 94 FormatToken Token; 95}; 96 97class ScopedLineState { 98public: 99 ScopedLineState(UnwrappedLineParser &Parser, 100 bool SwitchToPreprocessorLines = false) 101 : Parser(Parser), SwitchToPreprocessorLines(SwitchToPreprocessorLines) { 102 if (SwitchToPreprocessorLines) 103 Parser.CurrentLines = &Parser.PreprocessorDirectives; 104 PreBlockLine = Parser.Line.take(); 105 Parser.Line.reset(new UnwrappedLine()); 106 Parser.Line->Level = PreBlockLine->Level; 107 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 108 } 109 110 ~ScopedLineState() { 111 if (!Parser.Line->Tokens.empty()) { 112 Parser.addUnwrappedLine(); 113 } 114 assert(Parser.Line->Tokens.empty()); 115 Parser.Line.reset(PreBlockLine); 116 Parser.MustBreakBeforeNextToken = true; 117 if (SwitchToPreprocessorLines) 118 Parser.CurrentLines = &Parser.Lines; 119 } 120 121private: 122 UnwrappedLineParser &Parser; 123 const bool SwitchToPreprocessorLines; 124 125 UnwrappedLine *PreBlockLine; 126}; 127 128UnwrappedLineParser::UnwrappedLineParser( 129 clang::DiagnosticsEngine &Diag, const FormatStyle &Style, 130 FormatTokenSource &Tokens, UnwrappedLineConsumer &Callback) 131 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 132 CurrentLines(&Lines), StructuralError(false), Diag(Diag), Style(Style), 133 Tokens(&Tokens), Callback(Callback) {} 134 135bool UnwrappedLineParser::parse() { 136 DEBUG(llvm::dbgs() << "----\n"); 137 readToken(); 138 parseFile(); 139 for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E = Lines.end(); 140 I != E; ++I) { 141 Callback.consumeUnwrappedLine(*I); 142 } 143 144 // Create line with eof token. 145 pushToken(FormatTok); 146 Callback.consumeUnwrappedLine(*Line); 147 return StructuralError; 148} 149 150void UnwrappedLineParser::parseFile() { 151 ScopedDeclarationState DeclarationState( 152 *Line, DeclarationScopeStack, 153 /*MustBeDeclaration=*/ !Line->InPPDirective); 154 parseLevel(/*HasOpeningBrace=*/ false); 155 // Make sure to format the remaining tokens. 156 flushComments(true); 157 addUnwrappedLine(); 158} 159 160void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 161 do { 162 switch (FormatTok.Tok.getKind()) { 163 case tok::comment: 164 nextToken(); 165 addUnwrappedLine(); 166 break; 167 case tok::l_brace: 168 // FIXME: Add parameter whether this can happen - if this happens, we must 169 // be in a non-declaration context. 170 parseBlock(/*MustBeDeclaration=*/ false); 171 addUnwrappedLine(); 172 break; 173 case tok::r_brace: 174 if (HasOpeningBrace) 175 return; 176 Diag.Report(FormatTok.Tok.getLocation(), 177 Diag.getCustomDiagID(clang::DiagnosticsEngine::Error, 178 "unexpected '}'")); 179 StructuralError = true; 180 nextToken(); 181 addUnwrappedLine(); 182 break; 183 default: 184 parseStructuralElement(); 185 break; 186 } 187 } while (!eof()); 188} 189 190void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, 191 unsigned AddLevels) { 192 assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected"); 193 nextToken(); 194 195 addUnwrappedLine(); 196 197 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 198 MustBeDeclaration); 199 Line->Level += AddLevels; 200 parseLevel(/*HasOpeningBrace=*/ true); 201 202 if (!FormatTok.Tok.is(tok::r_brace)) { 203 Line->Level -= AddLevels; 204 StructuralError = true; 205 return; 206 } 207 208 nextToken(); // Munch the closing brace. 209 Line->Level -= AddLevels; 210} 211 212void UnwrappedLineParser::parsePPDirective() { 213 assert(FormatTok.Tok.is(tok::hash) && "'#' expected"); 214 ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError); 215 nextToken(); 216 217 if (FormatTok.Tok.getIdentifierInfo() == NULL) { 218 parsePPUnknown(); 219 return; 220 } 221 222 switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { 223 case tok::pp_define: 224 parsePPDefine(); 225 break; 226 default: 227 parsePPUnknown(); 228 break; 229 } 230} 231 232void UnwrappedLineParser::parsePPDefine() { 233 nextToken(); 234 235 if (FormatTok.Tok.getKind() != tok::identifier) { 236 parsePPUnknown(); 237 return; 238 } 239 nextToken(); 240 if (FormatTok.Tok.getKind() == tok::l_paren && 241 FormatTok.WhiteSpaceLength == 0) { 242 parseParens(); 243 } 244 addUnwrappedLine(); 245 Line->Level = 1; 246 247 // Errors during a preprocessor directive can only affect the layout of the 248 // preprocessor directive, and thus we ignore them. An alternative approach 249 // would be to use the same approach we use on the file level (no 250 // re-indentation if there was a structural error) within the macro 251 // definition. 252 parseFile(); 253} 254 255void UnwrappedLineParser::parsePPUnknown() { 256 do { 257 nextToken(); 258 } while (!eof()); 259 addUnwrappedLine(); 260} 261 262// Here we blacklist certain tokens that are not usually the first token in an 263// unwrapped line. This is used in attempt to distinguish macro calls without 264// trailing semicolons from other constructs split to several lines. 265bool tokenCanStartNewLine(clang::Token Tok) { 266 // Semicolon can be a null-statement, l_square can be a start of a macro or 267 // a C++11 attribute, but this doesn't seem to be common. 268 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 269 Tok.isNot(tok::l_square) && 270 // Tokens that can only be used as binary operators and a part of 271 // overloaded operator names. 272 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 273 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 274 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 275 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 276 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 277 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 278 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 279 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 280 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 281 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 282 Tok.isNot(tok::lesslessequal) && 283 // Colon is used in labels, base class lists, initializer lists, 284 // range-based for loops, ternary operator, but should never be the 285 // first token in an unwrapped line. 286 Tok.isNot(tok::colon); 287} 288 289void UnwrappedLineParser::parseStructuralElement() { 290 assert(!FormatTok.Tok.is(tok::l_brace)); 291 switch (FormatTok.Tok.getKind()) { 292 case tok::at: 293 nextToken(); 294 if (FormatTok.Tok.is(tok::l_brace)) { 295 parseBracedList(); 296 break; 297 } 298 switch (FormatTok.Tok.getObjCKeywordID()) { 299 case tok::objc_public: 300 case tok::objc_protected: 301 case tok::objc_package: 302 case tok::objc_private: 303 return parseAccessSpecifier(); 304 case tok::objc_interface: 305 case tok::objc_implementation: 306 return parseObjCInterfaceOrImplementation(); 307 case tok::objc_protocol: 308 return parseObjCProtocol(); 309 case tok::objc_end: 310 return; // Handled by the caller. 311 case tok::objc_optional: 312 case tok::objc_required: 313 nextToken(); 314 addUnwrappedLine(); 315 return; 316 default: 317 break; 318 } 319 break; 320 case tok::kw_namespace: 321 parseNamespace(); 322 return; 323 case tok::kw_inline: 324 nextToken(); 325 if (FormatTok.Tok.is(tok::kw_namespace)) { 326 parseNamespace(); 327 return; 328 } 329 break; 330 case tok::kw_public: 331 case tok::kw_protected: 332 case tok::kw_private: 333 parseAccessSpecifier(); 334 return; 335 case tok::kw_if: 336 parseIfThenElse(); 337 return; 338 case tok::kw_for: 339 case tok::kw_while: 340 parseForOrWhileLoop(); 341 return; 342 case tok::kw_do: 343 parseDoWhile(); 344 return; 345 case tok::kw_switch: 346 parseSwitch(); 347 return; 348 case tok::kw_default: 349 nextToken(); 350 parseLabel(); 351 return; 352 case tok::kw_case: 353 parseCaseLabel(); 354 return; 355 case tok::kw_return: 356 parseReturn(); 357 return; 358 case tok::kw_extern: 359 nextToken(); 360 if (FormatTok.Tok.is(tok::string_literal)) { 361 nextToken(); 362 if (FormatTok.Tok.is(tok::l_brace)) { 363 parseBlock(/*MustBeDeclaration=*/ true, 0); 364 addUnwrappedLine(); 365 return; 366 } 367 } 368 // In all other cases, parse the declaration. 369 break; 370 default: 371 break; 372 } 373 do { 374 switch (FormatTok.Tok.getKind()) { 375 case tok::at: 376 nextToken(); 377 if (FormatTok.Tok.is(tok::l_brace)) 378 parseBracedList(); 379 break; 380 case tok::kw_enum: 381 parseEnum(); 382 break; 383 case tok::kw_struct: 384 case tok::kw_union: 385 case tok::kw_class: 386 parseRecord(); 387 // A record declaration or definition is always the start of a structural 388 // element. 389 break; 390 case tok::semi: 391 nextToken(); 392 addUnwrappedLine(); 393 return; 394 case tok::r_brace: 395 addUnwrappedLine(); 396 return; 397 case tok::l_paren: 398 parseParens(); 399 break; 400 case tok::l_brace: 401 // A block outside of parentheses must be the last part of a 402 // structural element. 403 // FIXME: Figure out cases where this is not true, and add projections for 404 // them (the one we know is missing are lambdas). 405 parseBlock(/*MustBeDeclaration=*/ false); 406 addUnwrappedLine(); 407 return; 408 case tok::identifier: 409 nextToken(); 410 if (Line->Tokens.size() == 1) { 411 if (FormatTok.Tok.is(tok::colon)) { 412 parseLabel(); 413 return; 414 } 415 // Recognize function-like macro usages without trailing semicolon. 416 if (FormatTok.Tok.is(tok::l_paren)) { 417 parseParens(); 418 if (FormatTok.HasUnescapedNewline && 419 tokenCanStartNewLine(FormatTok.Tok)) { 420 addUnwrappedLine(); 421 return; 422 } 423 } 424 } 425 break; 426 case tok::equal: 427 nextToken(); 428 if (FormatTok.Tok.is(tok::l_brace)) { 429 parseBracedList(); 430 } 431 break; 432 default: 433 nextToken(); 434 break; 435 } 436 } while (!eof()); 437} 438 439void UnwrappedLineParser::parseBracedList() { 440 nextToken(); 441 442 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 443 // replace this by using parseAssigmentExpression() inside. 444 bool StartOfExpression = true; 445 do { 446 // FIXME: When we start to support lambdas, we'll want to parse them away 447 // here, otherwise our bail-out scenarios below break. The better solution 448 // might be to just implement a more or less complete expression parser. 449 switch (FormatTok.Tok.getKind()) { 450 case tok::l_brace: 451 if (!StartOfExpression) { 452 // Probably a missing closing brace. Bail out. 453 addUnwrappedLine(); 454 return; 455 } 456 parseBracedList(); 457 StartOfExpression = false; 458 break; 459 case tok::r_brace: 460 nextToken(); 461 return; 462 case tok::semi: 463 // Probably a missing closing brace. Bail out. 464 return; 465 case tok::comma: 466 nextToken(); 467 StartOfExpression = true; 468 break; 469 default: 470 nextToken(); 471 StartOfExpression = false; 472 break; 473 } 474 } while (!eof()); 475} 476 477void UnwrappedLineParser::parseReturn() { 478 nextToken(); 479 480 do { 481 switch (FormatTok.Tok.getKind()) { 482 case tok::l_brace: 483 parseBracedList(); 484 if (FormatTok.Tok.isNot(tok::semi)) { 485 // Assume missing ';'. 486 addUnwrappedLine(); 487 return; 488 } 489 break; 490 case tok::l_paren: 491 parseParens(); 492 break; 493 case tok::r_brace: 494 // Assume missing ';'. 495 addUnwrappedLine(); 496 return; 497 case tok::semi: 498 nextToken(); 499 addUnwrappedLine(); 500 return; 501 default: 502 nextToken(); 503 break; 504 } 505 } while (!eof()); 506} 507 508void UnwrappedLineParser::parseParens() { 509 assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected."); 510 nextToken(); 511 do { 512 switch (FormatTok.Tok.getKind()) { 513 case tok::l_paren: 514 parseParens(); 515 break; 516 case tok::r_paren: 517 nextToken(); 518 return; 519 case tok::l_brace: { 520 nextToken(); 521 ScopedLineState LineState(*this); 522 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 523 /*MustBeDeclaration=*/ false); 524 Line->Level += 1; 525 parseLevel(/*HasOpeningBrace=*/ true); 526 Line->Level -= 1; 527 break; 528 } 529 case tok::at: 530 nextToken(); 531 if (FormatTok.Tok.is(tok::l_brace)) 532 parseBracedList(); 533 break; 534 default: 535 nextToken(); 536 break; 537 } 538 } while (!eof()); 539} 540 541void UnwrappedLineParser::parseIfThenElse() { 542 assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected"); 543 nextToken(); 544 if (FormatTok.Tok.is(tok::l_paren)) 545 parseParens(); 546 bool NeedsUnwrappedLine = false; 547 if (FormatTok.Tok.is(tok::l_brace)) { 548 parseBlock(/*MustBeDeclaration=*/ false); 549 NeedsUnwrappedLine = true; 550 } else { 551 addUnwrappedLine(); 552 ++Line->Level; 553 parseStructuralElement(); 554 --Line->Level; 555 } 556 if (FormatTok.Tok.is(tok::kw_else)) { 557 nextToken(); 558 if (FormatTok.Tok.is(tok::l_brace)) { 559 parseBlock(/*MustBeDeclaration=*/ false); 560 addUnwrappedLine(); 561 } else if (FormatTok.Tok.is(tok::kw_if)) { 562 parseIfThenElse(); 563 } else { 564 addUnwrappedLine(); 565 ++Line->Level; 566 parseStructuralElement(); 567 --Line->Level; 568 } 569 } else if (NeedsUnwrappedLine) { 570 addUnwrappedLine(); 571 } 572} 573 574void UnwrappedLineParser::parseNamespace() { 575 assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected"); 576 nextToken(); 577 if (FormatTok.Tok.is(tok::identifier)) 578 nextToken(); 579 if (FormatTok.Tok.is(tok::l_brace)) { 580 parseBlock(/*MustBeDeclaration=*/ true, 0); 581 // Munch the semicolon after a namespace. This is more common than one would 582 // think. Puttin the semicolon into its own line is very ugly. 583 if (FormatTok.Tok.is(tok::semi)) 584 nextToken(); 585 addUnwrappedLine(); 586 } 587 // FIXME: Add error handling. 588} 589 590void UnwrappedLineParser::parseForOrWhileLoop() { 591 assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) && 592 "'for' or 'while' expected"); 593 nextToken(); 594 if (FormatTok.Tok.is(tok::l_paren)) 595 parseParens(); 596 if (FormatTok.Tok.is(tok::l_brace)) { 597 parseBlock(/*MustBeDeclaration=*/ false); 598 addUnwrappedLine(); 599 } else { 600 addUnwrappedLine(); 601 ++Line->Level; 602 parseStructuralElement(); 603 --Line->Level; 604 } 605} 606 607void UnwrappedLineParser::parseDoWhile() { 608 assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected"); 609 nextToken(); 610 if (FormatTok.Tok.is(tok::l_brace)) { 611 parseBlock(/*MustBeDeclaration=*/ false); 612 } else { 613 addUnwrappedLine(); 614 ++Line->Level; 615 parseStructuralElement(); 616 --Line->Level; 617 } 618 619 // FIXME: Add error handling. 620 if (!FormatTok.Tok.is(tok::kw_while)) { 621 addUnwrappedLine(); 622 return; 623 } 624 625 nextToken(); 626 parseStructuralElement(); 627} 628 629void UnwrappedLineParser::parseLabel() { 630 if (FormatTok.Tok.isNot(tok::colon)) 631 return; 632 nextToken(); 633 unsigned OldLineLevel = Line->Level; 634 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 635 --Line->Level; 636 if (CommentsBeforeNextToken.empty() && FormatTok.Tok.is(tok::l_brace)) { 637 parseBlock(/*MustBeDeclaration=*/ false); 638 if (FormatTok.Tok.is(tok::kw_break)) 639 parseStructuralElement(); // "break;" after "}" goes on the same line. 640 } 641 addUnwrappedLine(); 642 Line->Level = OldLineLevel; 643} 644 645void UnwrappedLineParser::parseCaseLabel() { 646 assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected"); 647 // FIXME: fix handling of complex expressions here. 648 do { 649 nextToken(); 650 } while (!eof() && !FormatTok.Tok.is(tok::colon)); 651 parseLabel(); 652} 653 654void UnwrappedLineParser::parseSwitch() { 655 assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected"); 656 nextToken(); 657 if (FormatTok.Tok.is(tok::l_paren)) 658 parseParens(); 659 if (FormatTok.Tok.is(tok::l_brace)) { 660 parseBlock(/*MustBeDeclaration=*/ false, Style.IndentCaseLabels ? 2 : 1); 661 addUnwrappedLine(); 662 } else { 663 addUnwrappedLine(); 664 Line->Level += (Style.IndentCaseLabels ? 2 : 1); 665 parseStructuralElement(); 666 Line->Level -= (Style.IndentCaseLabels ? 2 : 1); 667 } 668} 669 670void UnwrappedLineParser::parseAccessSpecifier() { 671 nextToken(); 672 // Otherwise, we don't know what it is, and we'd better keep the next token. 673 if (FormatTok.Tok.is(tok::colon)) 674 nextToken(); 675 addUnwrappedLine(); 676} 677 678void UnwrappedLineParser::parseEnum() { 679 nextToken(); 680 if (FormatTok.Tok.is(tok::identifier) || 681 FormatTok.Tok.is(tok::kw___attribute) || 682 FormatTok.Tok.is(tok::kw___declspec)) { 683 nextToken(); 684 // We can have macros or attributes in between 'enum' and the enum name. 685 if (FormatTok.Tok.is(tok::l_paren)) { 686 parseParens(); 687 } 688 if (FormatTok.Tok.is(tok::identifier)) 689 nextToken(); 690 } 691 if (FormatTok.Tok.is(tok::l_brace)) { 692 nextToken(); 693 addUnwrappedLine(); 694 ++Line->Level; 695 do { 696 switch (FormatTok.Tok.getKind()) { 697 case tok::l_paren: 698 parseParens(); 699 break; 700 case tok::r_brace: 701 addUnwrappedLine(); 702 nextToken(); 703 --Line->Level; 704 return; 705 case tok::comma: 706 nextToken(); 707 addUnwrappedLine(); 708 break; 709 default: 710 nextToken(); 711 break; 712 } 713 } while (!eof()); 714 } 715 // We fall through to parsing a structural element afterwards, so that in 716 // enum A {} n, m; 717 // "} n, m;" will end up in one unwrapped line. 718} 719 720void UnwrappedLineParser::parseRecord() { 721 nextToken(); 722 if (FormatTok.Tok.is(tok::identifier) || 723 FormatTok.Tok.is(tok::kw___attribute) || 724 FormatTok.Tok.is(tok::kw___declspec)) { 725 nextToken(); 726 // We can have macros or attributes in between 'class' and the class name. 727 if (FormatTok.Tok.is(tok::l_paren)) { 728 parseParens(); 729 } 730 // The actual identifier can be a nested name specifier, and in macros 731 // it is often token-pasted. 732 while (FormatTok.Tok.is(tok::identifier) || 733 FormatTok.Tok.is(tok::coloncolon) || FormatTok.Tok.is(tok::hashhash)) 734 nextToken(); 735 736 // Note that parsing away template declarations here leads to incorrectly 737 // accepting function declarations as record declarations. 738 // In general, we cannot solve this problem. Consider: 739 // class A<int> B() {} 740 // which can be a function definition or a class definition when B() is a 741 // macro. If we find enough real-world cases where this is a problem, we 742 // can parse for the 'template' keyword in the beginning of the statement, 743 // and thus rule out the record production in case there is no template 744 // (this would still leave us with an ambiguity between template function 745 // and class declarations). 746 if (FormatTok.Tok.is(tok::colon) || FormatTok.Tok.is(tok::less)) { 747 while (!eof() && FormatTok.Tok.isNot(tok::l_brace)) { 748 if (FormatTok.Tok.is(tok::semi)) 749 return; 750 nextToken(); 751 } 752 } 753 } 754 if (FormatTok.Tok.is(tok::l_brace)) 755 parseBlock(/*MustBeDeclaration=*/ true); 756 // We fall through to parsing a structural element afterwards, so 757 // class A {} n, m; 758 // will end up in one unwrapped line. 759} 760 761void UnwrappedLineParser::parseObjCProtocolList() { 762 assert(FormatTok.Tok.is(tok::less) && "'<' expected."); 763 do 764 nextToken(); 765 while (!eof() && FormatTok.Tok.isNot(tok::greater)); 766 nextToken(); // Skip '>'. 767} 768 769void UnwrappedLineParser::parseObjCUntilAtEnd() { 770 do { 771 if (FormatTok.Tok.isObjCAtKeyword(tok::objc_end)) { 772 nextToken(); 773 addUnwrappedLine(); 774 break; 775 } 776 parseStructuralElement(); 777 } while (!eof()); 778} 779 780void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 781 nextToken(); 782 nextToken(); // interface name 783 784 // @interface can be followed by either a base class, or a category. 785 if (FormatTok.Tok.is(tok::colon)) { 786 nextToken(); 787 nextToken(); // base class name 788 } else if (FormatTok.Tok.is(tok::l_paren)) 789 // Skip category, if present. 790 parseParens(); 791 792 if (FormatTok.Tok.is(tok::less)) 793 parseObjCProtocolList(); 794 795 // If instance variables are present, keep the '{' on the first line too. 796 if (FormatTok.Tok.is(tok::l_brace)) 797 parseBlock(/*MustBeDeclaration=*/ true); 798 799 // With instance variables, this puts '}' on its own line. Without instance 800 // variables, this ends the @interface line. 801 addUnwrappedLine(); 802 803 parseObjCUntilAtEnd(); 804} 805 806void UnwrappedLineParser::parseObjCProtocol() { 807 nextToken(); 808 nextToken(); // protocol name 809 810 if (FormatTok.Tok.is(tok::less)) 811 parseObjCProtocolList(); 812 813 // Check for protocol declaration. 814 if (FormatTok.Tok.is(tok::semi)) { 815 nextToken(); 816 return addUnwrappedLine(); 817 } 818 819 addUnwrappedLine(); 820 parseObjCUntilAtEnd(); 821} 822 823void UnwrappedLineParser::addUnwrappedLine() { 824 if (Line->Tokens.empty()) 825 return; 826 DEBUG({ 827 llvm::dbgs() << "Line(" << Line->Level << ")" 828 << (Line->InPPDirective ? " MACRO" : "") << ": "; 829 for (std::list<FormatToken>::iterator I = Line->Tokens.begin(), 830 E = Line->Tokens.end(); 831 I != E; ++I) { 832 llvm::dbgs() << I->Tok.getName() << " "; 833 834 } 835 llvm::dbgs() << "\n"; 836 }); 837 CurrentLines->push_back(*Line); 838 Line->Tokens.clear(); 839 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 840 for (std::vector<UnwrappedLine>::iterator 841 I = PreprocessorDirectives.begin(), 842 E = PreprocessorDirectives.end(); 843 I != E; ++I) { 844 CurrentLines->push_back(*I); 845 } 846 PreprocessorDirectives.clear(); 847 } 848} 849 850bool UnwrappedLineParser::eof() const { return FormatTok.Tok.is(tok::eof); } 851 852void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 853 bool JustComments = Line->Tokens.empty(); 854 for (SmallVectorImpl<FormatToken>::const_iterator 855 I = CommentsBeforeNextToken.begin(), 856 E = CommentsBeforeNextToken.end(); 857 I != E; ++I) { 858 if (I->NewlinesBefore && JustComments) { 859 addUnwrappedLine(); 860 } 861 pushToken(*I); 862 } 863 if (NewlineBeforeNext && JustComments) { 864 addUnwrappedLine(); 865 } 866 CommentsBeforeNextToken.clear(); 867} 868 869void UnwrappedLineParser::nextToken() { 870 if (eof()) 871 return; 872 flushComments(FormatTok.NewlinesBefore > 0); 873 pushToken(FormatTok); 874 readToken(); 875} 876 877void UnwrappedLineParser::readToken() { 878 bool CommentsInCurrentLine = true; 879 do { 880 FormatTok = Tokens->getNextToken(); 881 while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) && 882 (FormatTok.HasUnescapedNewline || FormatTok.IsFirst)) { 883 // If there is an unfinished unwrapped line, we flush the preprocessor 884 // directives only after that unwrapped line was finished later. 885 bool SwitchToPreprocessorLines = 886 !Line->Tokens.empty() && CurrentLines == &Lines; 887 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 888 // Comments stored before the preprocessor directive need to be output 889 // before the preprocessor directive, at the same level as the 890 // preprocessor directive, as we consider them to apply to the directive. 891 flushComments(FormatTok.NewlinesBefore > 0); 892 parsePPDirective(); 893 } 894 if (!FormatTok.Tok.is(tok::comment)) 895 return; 896 if (FormatTok.NewlinesBefore > 0 || FormatTok.IsFirst) { 897 CommentsInCurrentLine = false; 898 } 899 if (CommentsInCurrentLine) { 900 pushToken(FormatTok); 901 } else { 902 CommentsBeforeNextToken.push_back(FormatTok); 903 } 904 } while (!eof()); 905} 906 907void UnwrappedLineParser::pushToken(const FormatToken &Tok) { 908 Line->Tokens.push_back(Tok); 909 if (MustBreakBeforeNextToken) { 910 Line->Tokens.back().MustBreakBefore = true; 911 MustBreakBeforeNextToken = false; 912 } 913} 914 915} // end namespace format 916} // end namespace clang 917