1//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements a token annotator, i.e. creates
12/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13///
14//===----------------------------------------------------------------------===//
15
16#include "TokenAnnotator.h"
17#include "clang/Basic/SourceManager.h"
18#include "clang/Lex/Lexer.h"
19#include "llvm/Support/Debug.h"
20
21namespace clang {
22namespace format {
23
24bool AnnotatedToken::isUnaryOperator() const {
25  switch (FormatTok.Tok.getKind()) {
26  case tok::plus:
27  case tok::plusplus:
28  case tok::minus:
29  case tok::minusminus:
30  case tok::exclaim:
31  case tok::tilde:
32  case tok::kw_sizeof:
33  case tok::kw_alignof:
34    return true;
35  default:
36    return false;
37  }
38}
39
40bool AnnotatedToken::isBinaryOperator() const {
41  // Comma is a binary operator, but does not behave as such wrt. formatting.
42  return getPrecedence(*this) > prec::Comma;
43}
44
45bool AnnotatedToken::isTrailingComment() const {
46  return is(tok::comment) &&
47         (Children.empty() || Children[0].FormatTok.NewlinesBefore > 0);
48}
49
50AnnotatedToken *AnnotatedToken::getPreviousNoneComment() const {
51  AnnotatedToken *Tok = Parent;
52  while (Tok != NULL && Tok->is(tok::comment))
53    Tok = Tok->Parent;
54  return Tok;
55}
56
57const AnnotatedToken *AnnotatedToken::getNextNoneComment() const {
58  const AnnotatedToken *Tok = Children.empty() ? NULL : &Children[0];
59  while (Tok != NULL && Tok->is(tok::comment))
60    Tok = Tok->Children.empty() ? NULL : &Tok->Children[0];
61  return Tok;
62}
63
64bool AnnotatedToken::closesScope() const {
65  return isOneOf(tok::r_paren, tok::r_brace, tok::r_square) ||
66         Type == TT_TemplateCloser;
67}
68
69bool AnnotatedToken::opensScope() const {
70  return isOneOf(tok::l_paren, tok::l_brace, tok::l_square) ||
71         Type == TT_TemplateOpener;
72}
73
74/// \brief A parser that gathers additional information about tokens.
75///
76/// The \c TokenAnnotator tries to match parenthesis and square brakets and
77/// store a parenthesis levels. It also tries to resolve matching "<" and ">"
78/// into template parameter lists.
79class AnnotatingParser {
80public:
81  AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line,
82                   IdentifierInfo &Ident_in)
83      : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First),
84        KeywordVirtualFound(false), NameFound(false), Ident_in(Ident_in) {
85    Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/ false));
86  }
87
88private:
89  bool parseAngle() {
90    if (CurrentToken == NULL)
91      return false;
92    ScopedContextCreator ContextCreator(*this, tok::less, 10);
93    AnnotatedToken *Left = CurrentToken->Parent;
94    Contexts.back().IsExpression = false;
95    while (CurrentToken != NULL) {
96      if (CurrentToken->is(tok::greater)) {
97        Left->MatchingParen = CurrentToken;
98        CurrentToken->MatchingParen = Left;
99        CurrentToken->Type = TT_TemplateCloser;
100        next();
101        return true;
102      }
103      if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace,
104                                tok::pipepipe, tok::ampamp, tok::question,
105                                tok::colon))
106        return false;
107      updateParameterCount(Left, CurrentToken);
108      if (!consumeToken())
109        return false;
110    }
111    return false;
112  }
113
114  bool parseParens(bool LookForDecls = false) {
115    if (CurrentToken == NULL)
116      return false;
117    ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
118
119    // FIXME: This is a bit of a hack. Do better.
120    Contexts.back().ColonIsForRangeExpr =
121        Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
122
123    bool StartsObjCMethodExpr = false;
124    AnnotatedToken *Left = CurrentToken->Parent;
125    if (CurrentToken->is(tok::caret)) {
126      // ^( starts a block.
127      Left->Type = TT_ObjCBlockLParen;
128    } else if (AnnotatedToken *MaybeSel = Left->Parent) {
129      // @selector( starts a selector.
130      if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent &&
131          MaybeSel->Parent->is(tok::at)) {
132        StartsObjCMethodExpr = true;
133      }
134    }
135
136    if (StartsObjCMethodExpr) {
137      Contexts.back().ColonIsObjCMethodExpr = true;
138      Left->Type = TT_ObjCMethodExpr;
139    }
140
141    while (CurrentToken != NULL) {
142      // LookForDecls is set when "if (" has been seen. Check for
143      // 'identifier' '*' 'identifier' followed by not '=' -- this
144      // '*' has to be a binary operator but determineStarAmpUsage() will
145      // categorize it as an unary operator, so set the right type here.
146      if (LookForDecls && !CurrentToken->Children.empty()) {
147        AnnotatedToken &Prev = *CurrentToken->Parent;
148        AnnotatedToken &Next = CurrentToken->Children[0];
149        if (Prev.Parent->is(tok::identifier) &&
150            Prev.isOneOf(tok::star, tok::amp, tok::ampamp) &&
151            CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) {
152          Prev.Type = TT_BinaryOperator;
153          LookForDecls = false;
154        }
155      }
156
157      if (CurrentToken->is(tok::r_paren)) {
158        if (CurrentToken->Parent->closesScope())
159          CurrentToken->Parent->MatchingParen->NoMoreTokensOnLevel = true;
160        Left->MatchingParen = CurrentToken;
161        CurrentToken->MatchingParen = Left;
162
163        if (StartsObjCMethodExpr) {
164          CurrentToken->Type = TT_ObjCMethodExpr;
165          if (Contexts.back().FirstObjCSelectorName != NULL) {
166            Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
167                Contexts.back().LongestObjCSelectorName;
168          }
169        }
170
171        next();
172        return true;
173      }
174      if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
175        return false;
176      updateParameterCount(Left, CurrentToken);
177      if (!consumeToken())
178        return false;
179    }
180    return false;
181  }
182
183  bool parseSquare() {
184    if (!CurrentToken)
185      return false;
186
187    // A '[' could be an index subscript (after an indentifier or after
188    // ')' or ']'), it could be the start of an Objective-C method
189    // expression, or it could the the start of an Objective-C array literal.
190    AnnotatedToken *Left = CurrentToken->Parent;
191    AnnotatedToken *Parent = Left->getPreviousNoneComment();
192    bool StartsObjCMethodExpr =
193        Contexts.back().CanBeExpression &&
194        (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
195                                    tok::kw_return, tok::kw_throw) ||
196         Parent->isUnaryOperator() || Parent->Type == TT_ObjCForIn ||
197         Parent->Type == TT_CastRParen ||
198         getBinOpPrecedence(Parent->FormatTok.Tok.getKind(), true, true) >
199             prec::Unknown);
200    ScopedContextCreator ContextCreator(*this, tok::l_square, 10);
201    Contexts.back().IsExpression = true;
202    bool StartsObjCArrayLiteral = Parent && Parent->is(tok::at);
203
204    if (StartsObjCMethodExpr) {
205      Contexts.back().ColonIsObjCMethodExpr = true;
206      Left->Type = TT_ObjCMethodExpr;
207    } else if (StartsObjCArrayLiteral) {
208      Left->Type = TT_ObjCArrayLiteral;
209    }
210
211    while (CurrentToken != NULL) {
212      if (CurrentToken->is(tok::r_square)) {
213        if (!CurrentToken->Children.empty() &&
214            CurrentToken->Children[0].is(tok::l_paren)) {
215          // An ObjC method call is rarely followed by an open parenthesis.
216          // FIXME: Do we incorrectly label ":" with this?
217          StartsObjCMethodExpr = false;
218          Left->Type = TT_Unknown;
219        }
220        if (StartsObjCMethodExpr) {
221          CurrentToken->Type = TT_ObjCMethodExpr;
222          // determineStarAmpUsage() thinks that '*' '[' is allocating an
223          // array of pointers, but if '[' starts a selector then '*' is a
224          // binary operator.
225          if (Parent != NULL && Parent->Type == TT_PointerOrReference)
226            Parent->Type = TT_BinaryOperator;
227        } else if (StartsObjCArrayLiteral) {
228          CurrentToken->Type = TT_ObjCArrayLiteral;
229        }
230        Left->MatchingParen = CurrentToken;
231        CurrentToken->MatchingParen = Left;
232        if (Contexts.back().FirstObjCSelectorName != NULL)
233          Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
234              Contexts.back().LongestObjCSelectorName;
235        next();
236        return true;
237      }
238      if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
239        return false;
240      updateParameterCount(Left, CurrentToken);
241      if (!consumeToken())
242        return false;
243    }
244    return false;
245  }
246
247  bool parseBrace() {
248    if (CurrentToken != NULL) {
249      ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
250      AnnotatedToken *Left = CurrentToken->Parent;
251      while (CurrentToken != NULL) {
252        if (CurrentToken->is(tok::r_brace)) {
253          Left->MatchingParen = CurrentToken;
254          CurrentToken->MatchingParen = Left;
255          next();
256          return true;
257        }
258        if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
259          return false;
260        updateParameterCount(Left, CurrentToken);
261        if (!consumeToken())
262          return false;
263      }
264    }
265    // No closing "}" found, this probably starts a definition.
266    Line.StartsDefinition = true;
267    return true;
268  }
269
270  void updateParameterCount(AnnotatedToken *Left, AnnotatedToken *Current) {
271    if (Current->is(tok::comma))
272      ++Left->ParameterCount;
273    else if (Left->ParameterCount == 0 && Current->isNot(tok::comment))
274      Left->ParameterCount = 1;
275  }
276
277  bool parseConditional() {
278    while (CurrentToken != NULL) {
279      if (CurrentToken->is(tok::colon)) {
280        CurrentToken->Type = TT_ConditionalExpr;
281        next();
282        return true;
283      }
284      if (!consumeToken())
285        return false;
286    }
287    return false;
288  }
289
290  bool parseTemplateDeclaration() {
291    if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
292      CurrentToken->Type = TT_TemplateOpener;
293      next();
294      if (!parseAngle())
295        return false;
296      if (CurrentToken != NULL)
297        CurrentToken->Parent->ClosesTemplateDeclaration = true;
298      return true;
299    }
300    return false;
301  }
302
303  bool consumeToken() {
304    AnnotatedToken *Tok = CurrentToken;
305    next();
306    switch (Tok->FormatTok.Tok.getKind()) {
307    case tok::plus:
308    case tok::minus:
309      if (Tok->Parent == NULL && Line.MustBeDeclaration)
310        Tok->Type = TT_ObjCMethodSpecifier;
311      break;
312    case tok::colon:
313      if (Tok->Parent == NULL)
314        return false;
315      // Colons from ?: are handled in parseConditional().
316      if (Tok->Parent->is(tok::r_paren) && Contexts.size() == 1) {
317        Tok->Type = TT_CtorInitializerColon;
318      } else if (Contexts.back().ColonIsObjCMethodExpr ||
319                 Line.First.Type == TT_ObjCMethodSpecifier) {
320        Tok->Type = TT_ObjCMethodExpr;
321        Tok->Parent->Type = TT_ObjCSelectorName;
322        if (Tok->Parent->FormatTok.TokenLength >
323                Contexts.back().LongestObjCSelectorName)
324          Contexts.back().LongestObjCSelectorName =
325              Tok->Parent->FormatTok.TokenLength;
326        if (Contexts.back().FirstObjCSelectorName == NULL)
327          Contexts.back().FirstObjCSelectorName = Tok->Parent;
328      } else if (Contexts.back().ColonIsForRangeExpr) {
329        Tok->Type = TT_RangeBasedForLoopColon;
330      } else if (Contexts.size() == 1) {
331        Tok->Type = TT_InheritanceColon;
332      } else if (Contexts.back().ContextKind == tok::l_paren) {
333        Tok->Type = TT_InlineASMColon;
334      }
335      break;
336    case tok::kw_if:
337    case tok::kw_while:
338      if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
339        next();
340        if (!parseParens(/*LookForDecls=*/ true))
341          return false;
342      }
343      break;
344    case tok::kw_for:
345      Contexts.back().ColonIsForRangeExpr = true;
346      next();
347      if (!parseParens())
348        return false;
349      break;
350    case tok::l_paren:
351      if (!parseParens())
352        return false;
353      if (Line.MustBeDeclaration && NameFound && !Contexts.back().IsExpression)
354        Line.MightBeFunctionDecl = true;
355      break;
356    case tok::l_square:
357      if (!parseSquare())
358        return false;
359      break;
360    case tok::l_brace:
361      if (!parseBrace())
362        return false;
363      break;
364    case tok::less:
365      if (parseAngle())
366        Tok->Type = TT_TemplateOpener;
367      else {
368        Tok->Type = TT_BinaryOperator;
369        CurrentToken = Tok;
370        next();
371      }
372      break;
373    case tok::r_paren:
374    case tok::r_square:
375      return false;
376    case tok::r_brace:
377      // Lines can start with '}'.
378      if (Tok->Parent != NULL)
379        return false;
380      break;
381    case tok::greater:
382      Tok->Type = TT_BinaryOperator;
383      break;
384    case tok::kw_operator:
385      while (CurrentToken && CurrentToken->isNot(tok::l_paren)) {
386        if (CurrentToken->isOneOf(tok::star, tok::amp))
387          CurrentToken->Type = TT_PointerOrReference;
388        consumeToken();
389      }
390      if (CurrentToken)
391        CurrentToken->Type = TT_OverloadedOperatorLParen;
392      break;
393    case tok::question:
394      parseConditional();
395      break;
396    case tok::kw_template:
397      parseTemplateDeclaration();
398      break;
399    case tok::identifier:
400      if (Line.First.is(tok::kw_for) &&
401          Tok->FormatTok.Tok.getIdentifierInfo() == &Ident_in)
402        Tok->Type = TT_ObjCForIn;
403      break;
404    case tok::comma:
405      if (Contexts.back().FirstStartOfName)
406        Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
407      break;
408    default:
409      break;
410    }
411    return true;
412  }
413
414  void parseIncludeDirective() {
415    next();
416    if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
417      next();
418      while (CurrentToken != NULL) {
419        if (CurrentToken->isNot(tok::comment) ||
420            !CurrentToken->Children.empty())
421          CurrentToken->Type = TT_ImplicitStringLiteral;
422        next();
423      }
424    } else {
425      while (CurrentToken != NULL) {
426        if (CurrentToken->is(tok::string_literal))
427          // Mark these string literals as "implicit" literals, too, so that
428          // they are not split or line-wrapped.
429          CurrentToken->Type = TT_ImplicitStringLiteral;
430        next();
431      }
432    }
433  }
434
435  void parseWarningOrError() {
436    next();
437    // We still want to format the whitespace left of the first token of the
438    // warning or error.
439    next();
440    while (CurrentToken != NULL) {
441      CurrentToken->Type = TT_ImplicitStringLiteral;
442      next();
443    }
444  }
445
446  void parsePreprocessorDirective() {
447    next();
448    if (CurrentToken == NULL)
449      return;
450    // Hashes in the middle of a line can lead to any strange token
451    // sequence.
452    if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL)
453      return;
454    switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
455    case tok::pp_include:
456    case tok::pp_import:
457      parseIncludeDirective();
458      break;
459    case tok::pp_error:
460    case tok::pp_warning:
461      parseWarningOrError();
462      break;
463    case tok::pp_if:
464    case tok::pp_elif:
465      parseLine();
466      break;
467    default:
468      break;
469    }
470    while (CurrentToken != NULL)
471      next();
472  }
473
474public:
475  LineType parseLine() {
476    int PeriodsAndArrows = 0;
477    AnnotatedToken *LastPeriodOrArrow = NULL;
478    bool CanBeBuilderTypeStmt = true;
479    if (CurrentToken->is(tok::hash)) {
480      parsePreprocessorDirective();
481      return LT_PreprocessorDirective;
482    }
483    while (CurrentToken != NULL) {
484      if (CurrentToken->is(tok::kw_virtual))
485        KeywordVirtualFound = true;
486      if (CurrentToken->isOneOf(tok::period, tok::arrow)) {
487        ++PeriodsAndArrows;
488        LastPeriodOrArrow = CurrentToken;
489      }
490      AnnotatedToken *TheToken = CurrentToken;
491      if (!consumeToken())
492        return LT_Invalid;
493      if (getPrecedence(*TheToken) > prec::Assignment &&
494          TheToken->Type == TT_BinaryOperator)
495        CanBeBuilderTypeStmt = false;
496    }
497    if (KeywordVirtualFound)
498      return LT_VirtualFunctionDecl;
499
500    // Assume a builder-type call if there are 2 or more "." and "->".
501    if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt) {
502      LastPeriodOrArrow->LastInChainOfCalls = true;
503      return LT_BuilderTypeCall;
504    }
505
506    if (Line.First.Type == TT_ObjCMethodSpecifier) {
507      if (Contexts.back().FirstObjCSelectorName != NULL)
508        Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
509            Contexts.back().LongestObjCSelectorName;
510      return LT_ObjCMethodDecl;
511    }
512
513    return LT_Other;
514  }
515
516private:
517  void next() {
518    if (CurrentToken != NULL) {
519      determineTokenType(*CurrentToken);
520      CurrentToken->BindingStrength = Contexts.back().BindingStrength;
521    }
522
523    if (CurrentToken != NULL && !CurrentToken->Children.empty())
524      CurrentToken = &CurrentToken->Children[0];
525    else
526      CurrentToken = NULL;
527
528    // Reset token type in case we have already looked at it and then recovered
529    // from an error (e.g. failure to find the matching >).
530    if (CurrentToken != NULL)
531      CurrentToken->Type = TT_Unknown;
532  }
533
534  /// \brief A struct to hold information valid in a specific context, e.g.
535  /// a pair of parenthesis.
536  struct Context {
537    Context(tok::TokenKind ContextKind, unsigned BindingStrength,
538            bool IsExpression)
539        : ContextKind(ContextKind), BindingStrength(BindingStrength),
540          LongestObjCSelectorName(0), ColonIsForRangeExpr(false),
541          ColonIsObjCMethodExpr(false), FirstObjCSelectorName(NULL),
542          FirstStartOfName(NULL), IsExpression(IsExpression),
543          CanBeExpression(true) {}
544
545    tok::TokenKind ContextKind;
546    unsigned BindingStrength;
547    unsigned LongestObjCSelectorName;
548    bool ColonIsForRangeExpr;
549    bool ColonIsObjCMethodExpr;
550    AnnotatedToken *FirstObjCSelectorName;
551    AnnotatedToken *FirstStartOfName;
552    bool IsExpression;
553    bool CanBeExpression;
554  };
555
556  /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
557  /// of each instance.
558  struct ScopedContextCreator {
559    AnnotatingParser &P;
560
561    ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
562                         unsigned Increase)
563        : P(P) {
564      P.Contexts.push_back(
565          Context(ContextKind, P.Contexts.back().BindingStrength + Increase,
566                  P.Contexts.back().IsExpression));
567    }
568
569    ~ScopedContextCreator() { P.Contexts.pop_back(); }
570  };
571
572  void determineTokenType(AnnotatedToken &Current) {
573    if (getPrecedence(Current) == prec::Assignment &&
574        (!Current.Parent || Current.Parent->isNot(tok::kw_operator))) {
575      Contexts.back().IsExpression = true;
576      for (AnnotatedToken *Previous = Current.Parent;
577           Previous && Previous->isNot(tok::comma);
578           Previous = Previous->Parent) {
579        if (Previous->is(tok::r_square))
580          Previous = Previous->MatchingParen;
581        if (Previous->Type == TT_BinaryOperator &&
582            Previous->isOneOf(tok::star, tok::amp)) {
583          Previous->Type = TT_PointerOrReference;
584        }
585      }
586    } else if (Current.isOneOf(tok::kw_return, tok::kw_throw) ||
587               (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
588                (!Current.Parent || Current.Parent->isNot(tok::kw_for)))) {
589      Contexts.back().IsExpression = true;
590    } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
591      for (AnnotatedToken *Previous = Current.Parent;
592           Previous && Previous->isOneOf(tok::star, tok::amp);
593           Previous = Previous->Parent)
594        Previous->Type = TT_PointerOrReference;
595    } else if (Current.Parent &&
596               Current.Parent->Type == TT_CtorInitializerColon) {
597      Contexts.back().IsExpression = true;
598    } else if (Current.is(tok::kw_new)) {
599      Contexts.back().CanBeExpression = false;
600    } else if (Current.is(tok::semi)) {
601      // This should be the condition or increment in a for-loop.
602      Contexts.back().IsExpression = true;
603    }
604
605    if (Current.Type == TT_Unknown) {
606      if (Current.Parent && Current.is(tok::identifier) &&
607          ((Current.Parent->is(tok::identifier) &&
608            Current.Parent->FormatTok.Tok.getIdentifierInfo()
609                ->getPPKeywordID() == tok::pp_not_keyword) ||
610           isSimpleTypeSpecifier(*Current.Parent) ||
611           Current.Parent->Type == TT_PointerOrReference ||
612           Current.Parent->Type == TT_TemplateCloser)) {
613        Contexts.back().FirstStartOfName = &Current;
614        Current.Type = TT_StartOfName;
615        NameFound = true;
616      } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
617        Current.Type =
618            determineStarAmpUsage(Current, Contexts.back().IsExpression);
619      } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
620        Current.Type = determinePlusMinusCaretUsage(Current);
621      } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
622        Current.Type = determineIncrementUsage(Current);
623      } else if (Current.is(tok::exclaim)) {
624        Current.Type = TT_UnaryOperator;
625      } else if (Current.isBinaryOperator()) {
626        Current.Type = TT_BinaryOperator;
627      } else if (Current.is(tok::comment)) {
628        std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
629                                            Lex.getLangOpts()));
630        if (StringRef(Data).startswith("//"))
631          Current.Type = TT_LineComment;
632        else
633          Current.Type = TT_BlockComment;
634      } else if (Current.is(tok::r_paren)) {
635        bool ParensNotExpr = !Current.Parent ||
636                             Current.Parent->Type == TT_PointerOrReference ||
637                             Current.Parent->Type == TT_TemplateCloser;
638        bool ParensCouldEndDecl =
639            !Current.Children.empty() &&
640            Current.Children[0].isOneOf(tok::equal, tok::semi, tok::l_brace);
641        bool IsSizeOfOrAlignOf =
642            Current.MatchingParen && Current.MatchingParen->Parent &&
643            Current.MatchingParen->Parent->isOneOf(tok::kw_sizeof,
644                                                   tok::kw_alignof);
645        if (ParensNotExpr && !ParensCouldEndDecl && !IsSizeOfOrAlignOf &&
646            Contexts.back().IsExpression)
647          // FIXME: We need to get smarter and understand more cases of casts.
648          Current.Type = TT_CastRParen;
649      } else if (Current.is(tok::at) && Current.Children.size()) {
650        switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
651        case tok::objc_interface:
652        case tok::objc_implementation:
653        case tok::objc_protocol:
654          Current.Type = TT_ObjCDecl;
655          break;
656        case tok::objc_property:
657          Current.Type = TT_ObjCProperty;
658          break;
659        default:
660          break;
661        }
662      }
663    }
664  }
665
666  /// \brief Return the type of the given token assuming it is * or &.
667  TokenType
668  determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) {
669    const AnnotatedToken *PrevToken = Tok.getPreviousNoneComment();
670    if (PrevToken == NULL)
671      return TT_UnaryOperator;
672
673    const AnnotatedToken *NextToken = Tok.getNextNoneComment();
674    if (NextToken == NULL)
675      return TT_Unknown;
676
677    if (PrevToken->is(tok::l_paren) && !IsExpression)
678      return TT_PointerOrReference;
679
680    if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
681                           tok::comma, tok::semi, tok::kw_return, tok::colon,
682                           tok::equal) ||
683        PrevToken->Type == TT_BinaryOperator ||
684        PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
685      return TT_UnaryOperator;
686
687    if (NextToken->is(tok::l_square))
688      return TT_PointerOrReference;
689
690    if (PrevToken->FormatTok.Tok.isLiteral() ||
691        PrevToken->isOneOf(tok::r_paren, tok::r_square) ||
692        NextToken->FormatTok.Tok.isLiteral() || NextToken->isUnaryOperator())
693      return TT_BinaryOperator;
694
695    // It is very unlikely that we are going to find a pointer or reference type
696    // definition on the RHS of an assignment.
697    if (IsExpression)
698      return TT_BinaryOperator;
699
700    return TT_PointerOrReference;
701  }
702
703  TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
704    const AnnotatedToken *PrevToken = Tok.getPreviousNoneComment();
705    if (PrevToken == NULL)
706      return TT_UnaryOperator;
707
708    // Use heuristics to recognize unary operators.
709    if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
710                           tok::question, tok::colon, tok::kw_return,
711                           tok::kw_case, tok::at, tok::l_brace))
712      return TT_UnaryOperator;
713
714    // There can't be two consecutive binary operators.
715    if (PrevToken->Type == TT_BinaryOperator)
716      return TT_UnaryOperator;
717
718    // Fall back to marking the token as binary operator.
719    return TT_BinaryOperator;
720  }
721
722  /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
723  TokenType determineIncrementUsage(const AnnotatedToken &Tok) {
724    const AnnotatedToken *PrevToken = Tok.getPreviousNoneComment();
725    if (PrevToken == NULL)
726      return TT_UnaryOperator;
727    if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
728      return TT_TrailingUnaryOperator;
729
730    return TT_UnaryOperator;
731  }
732
733  // FIXME: This is copy&pasted from Sema. Put it in a common place and remove
734  // duplication.
735  /// \brief Determine whether the token kind starts a simple-type-specifier.
736  bool isSimpleTypeSpecifier(const AnnotatedToken &Tok) const {
737    switch (Tok.FormatTok.Tok.getKind()) {
738    case tok::kw_short:
739    case tok::kw_long:
740    case tok::kw___int64:
741    case tok::kw___int128:
742    case tok::kw_signed:
743    case tok::kw_unsigned:
744    case tok::kw_void:
745    case tok::kw_char:
746    case tok::kw_int:
747    case tok::kw_half:
748    case tok::kw_float:
749    case tok::kw_double:
750    case tok::kw_wchar_t:
751    case tok::kw_bool:
752    case tok::kw___underlying_type:
753      return true;
754    case tok::annot_typename:
755    case tok::kw_char16_t:
756    case tok::kw_char32_t:
757    case tok::kw_typeof:
758    case tok::kw_decltype:
759      return Lex.getLangOpts().CPlusPlus;
760    default:
761      break;
762    }
763    return false;
764  }
765
766  SmallVector<Context, 8> Contexts;
767
768  SourceManager &SourceMgr;
769  Lexer &Lex;
770  AnnotatedLine &Line;
771  AnnotatedToken *CurrentToken;
772  bool KeywordVirtualFound;
773  bool NameFound;
774  IdentifierInfo &Ident_in;
775};
776
777/// \brief Parses binary expressions by inserting fake parenthesis based on
778/// operator precedence.
779class ExpressionParser {
780public:
781  ExpressionParser(AnnotatedLine &Line) : Current(&Line.First) {}
782
783  /// \brief Parse expressions with the given operatore precedence.
784  void parse(int Precedence = 0) {
785    if (Precedence > prec::PointerToMember || Current == NULL)
786      return;
787
788    // Eagerly consume trailing comments.
789    while (Current && Current->isTrailingComment()) {
790      next();
791    }
792
793    AnnotatedToken *Start = Current;
794    bool OperatorFound = false;
795
796    while (Current) {
797      // Consume operators with higher precedence.
798      parse(Precedence + 1);
799
800      int CurrentPrecedence = 0;
801      if (Current) {
802        if (Current->Type == TT_ConditionalExpr)
803          CurrentPrecedence = 1 + (int) prec::Conditional;
804        else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon)
805          CurrentPrecedence = 1;
806        else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma))
807          CurrentPrecedence = 1 + (int) getPrecedence(*Current);
808      }
809
810      // At the end of the line or when an operator with higher precedence is
811      // found, insert fake parenthesis and return.
812      if (Current == NULL || Current->closesScope() ||
813          (CurrentPrecedence != 0 && CurrentPrecedence < Precedence)) {
814        if (OperatorFound) {
815          Start->FakeLParens.push_back(prec::Level(Precedence - 1));
816          if (Current)
817            ++Current->Parent->FakeRParens;
818        }
819        return;
820      }
821
822      // Consume scopes: (), [], <> and {}
823      if (Current->opensScope()) {
824        while (Current && !Current->closesScope()) {
825          next();
826          parse();
827        }
828        next();
829      } else {
830        // Operator found.
831        if (CurrentPrecedence == Precedence)
832          OperatorFound = true;
833
834        next();
835      }
836    }
837  }
838
839private:
840  void next() {
841    if (Current != NULL)
842      Current = Current->Children.empty() ? NULL : &Current->Children[0];
843  }
844
845  AnnotatedToken *Current;
846};
847
848void TokenAnnotator::annotate(AnnotatedLine &Line) {
849  AnnotatingParser Parser(SourceMgr, Lex, Line, Ident_in);
850  Line.Type = Parser.parseLine();
851  if (Line.Type == LT_Invalid)
852    return;
853
854  ExpressionParser ExprParser(Line);
855  ExprParser.parse();
856
857  if (Line.First.Type == TT_ObjCMethodSpecifier)
858    Line.Type = LT_ObjCMethodDecl;
859  else if (Line.First.Type == TT_ObjCDecl)
860    Line.Type = LT_ObjCDecl;
861  else if (Line.First.Type == TT_ObjCProperty)
862    Line.Type = LT_ObjCProperty;
863
864  Line.First.SpacesRequiredBefore = 1;
865  Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
866  Line.First.CanBreakBefore = Line.First.MustBreakBefore;
867
868  Line.First.TotalLength = Line.First.FormatTok.TokenLength;
869}
870
871void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
872  if (Line.First.Children.empty())
873    return;
874  AnnotatedToken *Current = &Line.First.Children[0];
875  while (Current != NULL) {
876    if (Current->Type == TT_LineComment)
877      Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
878    else
879      Current->SpacesRequiredBefore =
880          spaceRequiredBefore(Line, *Current) ? 1 : 0;
881
882    if (Current->FormatTok.MustBreakBefore) {
883      Current->MustBreakBefore = true;
884    } else if (Current->Type == TT_LineComment) {
885      Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0;
886    } else if (Current->Parent->isTrailingComment() ||
887               (Current->is(tok::string_literal) &&
888                Current->Parent->is(tok::string_literal))) {
889      Current->MustBreakBefore = true;
890    } else if (Current->is(tok::lessless) && !Current->Children.empty() &&
891               Current->Parent->is(tok::string_literal) &&
892               Current->Children[0].is(tok::string_literal)) {
893      Current->MustBreakBefore = true;
894    } else {
895      Current->MustBreakBefore = false;
896    }
897    Current->CanBreakBefore =
898        Current->MustBreakBefore || canBreakBefore(Line, *Current);
899    if (Current->MustBreakBefore)
900      Current->TotalLength = Current->Parent->TotalLength + Style.ColumnLimit;
901    else
902      Current->TotalLength =
903          Current->Parent->TotalLength + Current->FormatTok.TokenLength +
904          Current->SpacesRequiredBefore;
905    // FIXME: Only calculate this if CanBreakBefore is true once static
906    // initializers etc. are sorted out.
907    // FIXME: Move magic numbers to a better place.
908    Current->SplitPenalty =
909        20 * Current->BindingStrength + splitPenalty(Line, *Current);
910
911    Current = Current->Children.empty() ? NULL : &Current->Children[0];
912  }
913
914  DEBUG({
915    printDebugInfo(Line);
916  });
917}
918
919unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
920                                      const AnnotatedToken &Tok) {
921  const AnnotatedToken &Left = *Tok.Parent;
922  const AnnotatedToken &Right = Tok;
923
924  if (Right.Type == TT_StartOfName) {
925    if (Line.First.is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
926      return 3;
927    else if (Line.MightBeFunctionDecl && Right.BindingStrength == 1)
928      // FIXME: Clean up hack of using BindingStrength to find top-level names.
929      return Style.PenaltyReturnTypeOnItsOwnLine;
930    else
931      return 200;
932  }
933  if (Left.is(tok::equal) && Right.is(tok::l_brace))
934    return 150;
935  if (Left.is(tok::coloncolon))
936    return 500;
937  if (Left.isOneOf(tok::kw_class, tok::kw_struct))
938    return 5000;
939
940  if (Left.Type == TT_RangeBasedForLoopColon ||
941      Left.Type == TT_InheritanceColon)
942    return 2;
943
944  if (Right.isOneOf(tok::arrow, tok::period)) {
945    if (Line.Type == LT_BuilderTypeCall)
946      return prec::PointerToMember;
947    if (Left.isOneOf(tok::r_paren, tok::r_square) && Left.MatchingParen &&
948        Left.MatchingParen->ParameterCount > 0)
949      return 20; // Should be smaller than breaking at a nested comma.
950    return 150;
951  }
952
953  // In for-loops, prefer breaking at ',' and ';'.
954  if (Line.First.is(tok::kw_for) && Left.is(tok::equal))
955    return 4;
956
957  if (Left.is(tok::semi))
958    return 0;
959  if (Left.is(tok::comma))
960    return 1;
961
962  // In Objective-C method expressions, prefer breaking before "param:" over
963  // breaking after it.
964  if (Right.Type == TT_ObjCSelectorName)
965    return 0;
966  if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
967    return 20;
968
969  if (Left.is(tok::l_paren) && Line.MightBeFunctionDecl)
970    return 100;
971  if (Left.opensScope())
972    return Left.ParameterCount > 1 ? prec::Comma : 20;
973
974  if (Right.is(tok::lessless)) {
975    if (Left.is(tok::string_literal)) {
976      StringRef Content = StringRef(Left.FormatTok.Tok.getLiteralData(),
977                                    Left.FormatTok.TokenLength);
978      Content = Content.drop_back(1).drop_front(1).trim();
979      if (Content.size() > 1 &&
980          (Content.back() == ':' || Content.back() == '='))
981        return 100;
982    }
983    return prec::Shift;
984  }
985  if (Left.Type == TT_ConditionalExpr)
986    return prec::Conditional;
987  prec::Level Level = getPrecedence(Left);
988
989  if (Level != prec::Unknown)
990    return Level;
991
992  return 3;
993}
994
995bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
996                                          const AnnotatedToken &Left,
997                                          const AnnotatedToken &Right) {
998  if (Right.is(tok::hashhash))
999    return Left.is(tok::hash);
1000  if (Left.isOneOf(tok::hashhash, tok::hash))
1001    return Right.is(tok::hash);
1002  if (Right.isOneOf(tok::r_paren, tok::semi, tok::comma))
1003    return false;
1004  if (Right.is(tok::less) &&
1005      (Left.is(tok::kw_template) ||
1006       (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
1007    return true;
1008  if (Left.is(tok::arrow) || Right.is(tok::arrow))
1009    return false;
1010  if (Left.isOneOf(tok::exclaim, tok::tilde))
1011    return false;
1012  if (Left.is(tok::at) &&
1013      Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
1014                    tok::numeric_constant, tok::l_paren, tok::l_brace,
1015                    tok::kw_true, tok::kw_false))
1016    return false;
1017  if (Left.is(tok::coloncolon))
1018    return false;
1019  if (Right.is(tok::coloncolon))
1020    return !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren);
1021  if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
1022    return false;
1023  if (Right.Type == TT_PointerOrReference)
1024    return Left.FormatTok.Tok.isLiteral() ||
1025           ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) &&
1026            !Style.PointerBindsToType);
1027  if (Left.Type == TT_PointerOrReference)
1028    return Right.FormatTok.Tok.isLiteral() ||
1029           ((Right.Type != TT_PointerOrReference) &&
1030            Right.isNot(tok::l_paren) && Style.PointerBindsToType &&
1031            Left.Parent && Left.Parent->isNot(tok::l_paren));
1032  if (Right.is(tok::star) && Left.is(tok::l_paren))
1033    return false;
1034  if (Left.is(tok::l_square))
1035    return Left.Type == TT_ObjCArrayLiteral && Right.isNot(tok::r_square);
1036  if (Right.is(tok::r_square))
1037    return Right.Type == TT_ObjCArrayLiteral;
1038  if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr)
1039    return false;
1040  if (Left.is(tok::period) || Right.is(tok::period))
1041    return false;
1042  if (Left.is(tok::colon))
1043    return Left.Type != TT_ObjCMethodExpr;
1044  if (Right.is(tok::colon))
1045    return Right.Type != TT_ObjCMethodExpr;
1046  if (Left.is(tok::l_paren))
1047    return false;
1048  if (Right.is(tok::l_paren)) {
1049    return Line.Type == LT_ObjCDecl ||
1050           Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
1051                        tok::kw_return, tok::kw_catch, tok::kw_new,
1052                        tok::kw_delete, tok::semi);
1053  }
1054  if (Left.is(tok::at) &&
1055      Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword)
1056    return false;
1057  if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
1058    return false;
1059  if (Right.is(tok::ellipsis))
1060    return false;
1061  return true;
1062}
1063
1064bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
1065                                         const AnnotatedToken &Tok) {
1066  if (Tok.FormatTok.Tok.getIdentifierInfo() &&
1067      Tok.Parent->FormatTok.Tok.getIdentifierInfo())
1068    return true; // Never ever merge two identifiers.
1069  if (Line.Type == LT_ObjCMethodDecl) {
1070    if (Tok.Parent->Type == TT_ObjCMethodSpecifier)
1071      return true;
1072    if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier))
1073      // Don't space between ')' and <id>
1074      return false;
1075  }
1076  if (Line.Type == LT_ObjCProperty &&
1077      (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
1078    return false;
1079
1080  if (Tok.Parent->is(tok::comma))
1081    return true;
1082  if (Tok.is(tok::comma))
1083    return false;
1084  if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
1085    return true;
1086  if (Tok.Parent->FormatTok.Tok.is(tok::kw_operator))
1087    return false;
1088  if (Tok.Type == TT_OverloadedOperatorLParen)
1089    return false;
1090  if (Tok.is(tok::colon))
1091    return !Line.First.isOneOf(tok::kw_case, tok::kw_default) &&
1092           Tok.getNextNoneComment() != NULL && Tok.Type != TT_ObjCMethodExpr;
1093  if (Tok.is(tok::l_paren) && !Tok.Children.empty() &&
1094      Tok.Children[0].Type == TT_PointerOrReference &&
1095      !Tok.Children[0].Children.empty() &&
1096      Tok.Children[0].Children[0].isNot(tok::r_paren) &&
1097      Tok.Parent->isNot(tok::l_paren) &&
1098      (Tok.Parent->Type != TT_PointerOrReference || Style.PointerBindsToType))
1099    return true;
1100  if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen)
1101    return false;
1102  if (Tok.Type == TT_UnaryOperator)
1103    return !Tok.Parent->isOneOf(tok::l_paren, tok::l_square, tok::at) &&
1104           (Tok.Parent->isNot(tok::colon) ||
1105            Tok.Parent->Type != TT_ObjCMethodExpr);
1106  if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
1107    return Tok.Type == TT_TemplateCloser &&
1108           Tok.Parent->Type == TT_TemplateCloser &&
1109           Style.Standard != FormatStyle::LS_Cpp11;
1110  }
1111  if (Tok.isOneOf(tok::arrowstar, tok::periodstar) ||
1112      Tok.Parent->isOneOf(tok::arrowstar, tok::periodstar))
1113    return false;
1114  if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
1115    return true;
1116  if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
1117    return false;
1118  if (Tok.is(tok::less) && Line.First.is(tok::hash))
1119    return true;
1120  if (Tok.Type == TT_TrailingUnaryOperator)
1121    return false;
1122  return spaceRequiredBetween(Line, *Tok.Parent, Tok);
1123}
1124
1125bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
1126                                    const AnnotatedToken &Right) {
1127  const AnnotatedToken &Left = *Right.Parent;
1128  if (Right.Type == TT_StartOfName)
1129    return true;
1130  if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
1131    return false;
1132  if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
1133    return true;
1134  if (Right.Type == TT_ObjCSelectorName)
1135    return true;
1136  if (Left.ClosesTemplateDeclaration)
1137    return true;
1138  if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
1139    return true;
1140  if (Right.Type == TT_RangeBasedForLoopColon ||
1141      Right.Type == TT_OverloadedOperatorLParen)
1142    return false;
1143  if (Left.Type == TT_RangeBasedForLoopColon)
1144    return true;
1145  if (Right.Type == TT_RangeBasedForLoopColon)
1146    return false;
1147  if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
1148      Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr ||
1149      Left.isOneOf(tok::question, tok::kw_operator))
1150    return false;
1151  if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
1152    return false;
1153  if (Left.is(tok::l_paren) && Right.is(tok::l_paren) && Left.Parent &&
1154      Left.Parent->is(tok::kw___attribute))
1155    return false;
1156
1157  if (Right.Type == TT_LineComment)
1158    // We rely on MustBreakBefore being set correctly here as we should not
1159    // change the "binding" behavior of a comment.
1160    return false;
1161
1162  // Allow breaking after a trailing 'const', e.g. after a method declaration,
1163  // unless it is follow by ';', '{' or '='.
1164  if (Left.is(tok::kw_const) && Left.Parent != NULL &&
1165      Left.Parent->is(tok::r_paren))
1166    return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal);
1167
1168  if (Right.is(tok::kw___attribute))
1169    return true;
1170
1171  // We only break before r_brace if there was a corresponding break before
1172  // the l_brace, which is tracked by BreakBeforeClosingBrace.
1173  if (Right.isOneOf(tok::r_brace, tok::r_paren, tok::greater))
1174    return false;
1175  if (Left.is(tok::identifier) && Right.is(tok::string_literal))
1176    return true;
1177  return (Left.isBinaryOperator() && Left.isNot(tok::lessless)) ||
1178         Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
1179                      tok::kw_class, tok::kw_struct) ||
1180         Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon) ||
1181         (Left.is(tok::r_paren) && Left.Type != TT_CastRParen &&
1182          Right.isOneOf(tok::identifier, tok::kw___attribute)) ||
1183         (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
1184         (Left.is(tok::l_square) && !Right.is(tok::r_square));
1185}
1186
1187void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
1188  llvm::errs() << "AnnotatedTokens:\n";
1189  const AnnotatedToken *Tok = &Line.First;
1190  while (Tok) {
1191    llvm::errs() << " M=" << Tok->MustBreakBefore
1192                 << " C=" << Tok->CanBreakBefore << " T=" << Tok->Type
1193                 << " S=" << Tok->SpacesRequiredBefore
1194                 << " P=" << Tok->SplitPenalty
1195                 << " Name=" << Tok->FormatTok.Tok.getName() << " FakeLParens=";
1196    for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
1197      llvm::errs() << Tok->FakeLParens[i] << "/";
1198    llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n";
1199    Tok = Tok->Children.empty() ? NULL : &Tok->Children[0];
1200  }
1201  llvm::errs() << "----\n";
1202}
1203
1204} // namespace format
1205} // namespace clang
1206