TokenAnnotator.cpp revision 263508
1//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements a token annotator, i.e. creates
12/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13///
14//===----------------------------------------------------------------------===//
15
16#include "TokenAnnotator.h"
17#include "clang/Basic/SourceManager.h"
18#include "llvm/Support/Debug.h"
19
20namespace clang {
21namespace format {
22
23namespace {
24
25/// \brief A parser that gathers additional information about tokens.
26///
27/// The \c TokenAnnotator tries to match parenthesis and square brakets and
28/// store a parenthesis levels. It also tries to resolve matching "<" and ">"
29/// into template parameter lists.
30class AnnotatingParser {
31public:
32  AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
33                   IdentifierInfo &Ident_in)
34      : Style(Style), Line(Line), CurrentToken(Line.First),
35        KeywordVirtualFound(false), AutoFound(false), Ident_in(Ident_in) {
36    Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
37  }
38
39private:
40  bool parseAngle() {
41    if (CurrentToken == NULL)
42      return false;
43    ScopedContextCreator ContextCreator(*this, tok::less, 10);
44    FormatToken *Left = CurrentToken->Previous;
45    Contexts.back().IsExpression = false;
46    while (CurrentToken != NULL) {
47      if (CurrentToken->is(tok::greater)) {
48        Left->MatchingParen = CurrentToken;
49        CurrentToken->MatchingParen = Left;
50        CurrentToken->Type = TT_TemplateCloser;
51        next();
52        return true;
53      }
54      if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace,
55                                tok::question, tok::colon))
56        return false;
57      // If a && or || is found and interpreted as a binary operator, this set
58      // of angles is likely part of something like "a < b && c > d". If the
59      // angles are inside an expression, the ||/&& might also be a binary
60      // operator that was misinterpreted because we are parsing template
61      // parameters.
62      // FIXME: This is getting out of hand, write a decent parser.
63      if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
64          (CurrentToken->Previous->Type == TT_BinaryOperator ||
65           Contexts[Contexts.size() - 2].IsExpression) &&
66          Line.First->isNot(tok::kw_template))
67        return false;
68      updateParameterCount(Left, CurrentToken);
69      if (!consumeToken())
70        return false;
71    }
72    return false;
73  }
74
75  bool parseParens(bool LookForDecls = false) {
76    if (CurrentToken == NULL)
77      return false;
78    ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
79
80    // FIXME: This is a bit of a hack. Do better.
81    Contexts.back().ColonIsForRangeExpr =
82        Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
83
84    bool StartsObjCMethodExpr = false;
85    FormatToken *Left = CurrentToken->Previous;
86    if (CurrentToken->is(tok::caret)) {
87      // ^( starts a block.
88      Left->Type = TT_ObjCBlockLParen;
89    } else if (FormatToken *MaybeSel = Left->Previous) {
90      // @selector( starts a selector.
91      if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous &&
92          MaybeSel->Previous->is(tok::at)) {
93        StartsObjCMethodExpr = true;
94      }
95    }
96
97    if (Left->Previous && Left->Previous->isOneOf(tok::kw_static_assert,
98                                                  tok::kw_if, tok::kw_while)) {
99      // static_assert, if and while usually contain expressions.
100      Contexts.back().IsExpression = true;
101    } else if (Left->Previous && Left->Previous->is(tok::r_square) &&
102               Left->Previous->MatchingParen &&
103               Left->Previous->MatchingParen->Type == TT_LambdaLSquare) {
104      // This is a parameter list of a lambda expression.
105      Contexts.back().IsExpression = false;
106    }
107
108    if (StartsObjCMethodExpr) {
109      Contexts.back().ColonIsObjCMethodExpr = true;
110      Left->Type = TT_ObjCMethodExpr;
111    }
112
113    bool MightBeFunctionType = CurrentToken->is(tok::star);
114    bool HasMultipleLines = false;
115    bool HasMultipleParametersOnALine = false;
116    while (CurrentToken != NULL) {
117      // LookForDecls is set when "if (" has been seen. Check for
118      // 'identifier' '*' 'identifier' followed by not '=' -- this
119      // '*' has to be a binary operator but determineStarAmpUsage() will
120      // categorize it as an unary operator, so set the right type here.
121      if (LookForDecls && CurrentToken->Next) {
122        FormatToken *Prev = CurrentToken->getPreviousNonComment();
123        if (Prev) {
124          FormatToken *PrevPrev = Prev->getPreviousNonComment();
125          FormatToken *Next = CurrentToken->Next;
126          if (PrevPrev && PrevPrev->is(tok::identifier) &&
127              Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
128              CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
129            Prev->Type = TT_BinaryOperator;
130            LookForDecls = false;
131          }
132        }
133      }
134
135      if (CurrentToken->Previous->Type == TT_PointerOrReference &&
136          CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
137                                                    tok::coloncolon))
138        MightBeFunctionType = true;
139      if (CurrentToken->is(tok::r_paren)) {
140        if (MightBeFunctionType && CurrentToken->Next &&
141            (CurrentToken->Next->is(tok::l_paren) ||
142             (CurrentToken->Next->is(tok::l_square) &&
143              !Contexts.back().IsExpression)))
144          Left->Type = TT_FunctionTypeLParen;
145        Left->MatchingParen = CurrentToken;
146        CurrentToken->MatchingParen = Left;
147
148        if (StartsObjCMethodExpr) {
149          CurrentToken->Type = TT_ObjCMethodExpr;
150          if (Contexts.back().FirstObjCSelectorName != NULL) {
151            Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
152                Contexts.back().LongestObjCSelectorName;
153          }
154        }
155
156        if (!HasMultipleLines)
157          Left->PackingKind = PPK_Inconclusive;
158        else if (HasMultipleParametersOnALine)
159          Left->PackingKind = PPK_BinPacked;
160        else
161          Left->PackingKind = PPK_OnePerLine;
162
163        next();
164        return true;
165      }
166      if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
167        return false;
168      updateParameterCount(Left, CurrentToken);
169      if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
170          !CurrentToken->Next->HasUnescapedNewline &&
171          !CurrentToken->Next->isTrailingComment())
172        HasMultipleParametersOnALine = true;
173      if (!consumeToken())
174        return false;
175      if (CurrentToken && CurrentToken->HasUnescapedNewline)
176        HasMultipleLines = true;
177    }
178    return false;
179  }
180
181  bool parseSquare() {
182    if (!CurrentToken)
183      return false;
184
185    // A '[' could be an index subscript (after an identifier or after
186    // ')' or ']'), it could be the start of an Objective-C method
187    // expression, or it could the the start of an Objective-C array literal.
188    FormatToken *Left = CurrentToken->Previous;
189    FormatToken *Parent = Left->getPreviousNonComment();
190    bool StartsObjCMethodExpr =
191        Contexts.back().CanBeExpression && Left->Type != TT_LambdaLSquare &&
192        (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
193                                    tok::kw_return, tok::kw_throw) ||
194         Parent->isUnaryOperator() || Parent->Type == TT_ObjCForIn ||
195         Parent->Type == TT_CastRParen ||
196         getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown);
197    ScopedContextCreator ContextCreator(*this, tok::l_square, 10);
198    Contexts.back().IsExpression = true;
199    bool ColonFound = false;
200
201    if (StartsObjCMethodExpr) {
202      Contexts.back().ColonIsObjCMethodExpr = true;
203      Left->Type = TT_ObjCMethodExpr;
204    } else if (Parent && Parent->is(tok::at)) {
205      Left->Type = TT_ArrayInitializerLSquare;
206    } else if (Left->Type == TT_Unknown) {
207      Left->Type = TT_ArraySubscriptLSquare;
208    }
209
210    while (CurrentToken != NULL) {
211      if (CurrentToken->is(tok::r_square)) {
212        if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) &&
213            Left->Type == TT_ObjCMethodExpr) {
214          // An ObjC method call is rarely followed by an open parenthesis.
215          // FIXME: Do we incorrectly label ":" with this?
216          StartsObjCMethodExpr = false;
217          Left->Type = TT_Unknown;
218        }
219        if (StartsObjCMethodExpr) {
220          CurrentToken->Type = TT_ObjCMethodExpr;
221          // determineStarAmpUsage() thinks that '*' '[' is allocating an
222          // array of pointers, but if '[' starts a selector then '*' is a
223          // binary operator.
224          if (Parent != NULL && Parent->Type == TT_PointerOrReference)
225            Parent->Type = TT_BinaryOperator;
226        }
227        Left->MatchingParen = CurrentToken;
228        CurrentToken->MatchingParen = Left;
229        if (Contexts.back().FirstObjCSelectorName != NULL)
230          Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
231              Contexts.back().LongestObjCSelectorName;
232        next();
233        return true;
234      }
235      if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
236        return false;
237      if (CurrentToken->is(tok::colon))
238        ColonFound = true;
239      if (CurrentToken->is(tok::comma) &&
240          (Left->Type == TT_ArraySubscriptLSquare ||
241           (Left->Type == TT_ObjCMethodExpr && !ColonFound)))
242        Left->Type = TT_ArrayInitializerLSquare;
243      updateParameterCount(Left, CurrentToken);
244      if (!consumeToken())
245        return false;
246    }
247    return false;
248  }
249
250  bool parseBrace() {
251    if (CurrentToken != NULL) {
252      FormatToken *Left = CurrentToken->Previous;
253      ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
254      Contexts.back().ColonIsDictLiteral = true;
255
256      while (CurrentToken != NULL) {
257        if (CurrentToken->is(tok::r_brace)) {
258          Left->MatchingParen = CurrentToken;
259          CurrentToken->MatchingParen = Left;
260          next();
261          return true;
262        }
263        if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
264          return false;
265        updateParameterCount(Left, CurrentToken);
266        if (CurrentToken->is(tok::colon))
267          Left->Type = TT_DictLiteral;
268        if (!consumeToken())
269          return false;
270      }
271    }
272    // No closing "}" found, this probably starts a definition.
273    Line.StartsDefinition = true;
274    return true;
275  }
276
277  void updateParameterCount(FormatToken *Left, FormatToken *Current) {
278    if (Current->is(tok::comma)) {
279      ++Left->ParameterCount;
280      if (!Left->Role)
281        Left->Role.reset(new CommaSeparatedList(Style));
282      Left->Role->CommaFound(Current);
283    } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
284      Left->ParameterCount = 1;
285    }
286  }
287
288  bool parseConditional() {
289    while (CurrentToken != NULL) {
290      if (CurrentToken->is(tok::colon)) {
291        CurrentToken->Type = TT_ConditionalExpr;
292        next();
293        return true;
294      }
295      if (!consumeToken())
296        return false;
297    }
298    return false;
299  }
300
301  bool parseTemplateDeclaration() {
302    if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
303      CurrentToken->Type = TT_TemplateOpener;
304      next();
305      if (!parseAngle())
306        return false;
307      if (CurrentToken != NULL)
308        CurrentToken->Previous->ClosesTemplateDeclaration = true;
309      return true;
310    }
311    return false;
312  }
313
314  bool consumeToken() {
315    FormatToken *Tok = CurrentToken;
316    next();
317    switch (Tok->Tok.getKind()) {
318    case tok::plus:
319    case tok::minus:
320      if (Tok->Previous == NULL && Line.MustBeDeclaration)
321        Tok->Type = TT_ObjCMethodSpecifier;
322      break;
323    case tok::colon:
324      if (Tok->Previous == NULL)
325        return false;
326      // Colons from ?: are handled in parseConditional().
327      if (Tok->Previous->is(tok::r_paren) && Contexts.size() == 1) {
328        Tok->Type = TT_CtorInitializerColon;
329      } else if (Contexts.back().ColonIsDictLiteral) {
330        Tok->Type = TT_DictLiteral;
331      } else if (Contexts.back().ColonIsObjCMethodExpr ||
332                 Line.First->Type == TT_ObjCMethodSpecifier) {
333        Tok->Type = TT_ObjCMethodExpr;
334        Tok->Previous->Type = TT_ObjCSelectorName;
335        if (Tok->Previous->ColumnWidth >
336            Contexts.back().LongestObjCSelectorName) {
337          Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth;
338        }
339        if (Contexts.back().FirstObjCSelectorName == NULL)
340          Contexts.back().FirstObjCSelectorName = Tok->Previous;
341      } else if (Contexts.back().ColonIsForRangeExpr) {
342        Tok->Type = TT_RangeBasedForLoopColon;
343      } else if (CurrentToken != NULL &&
344                 CurrentToken->is(tok::numeric_constant)) {
345        Tok->Type = TT_BitFieldColon;
346      } else if (Contexts.size() == 1 && Line.First->isNot(tok::kw_enum)) {
347        Tok->Type = TT_InheritanceColon;
348      } else if (Contexts.back().ContextKind == tok::l_paren) {
349        Tok->Type = TT_InlineASMColon;
350      }
351      break;
352    case tok::kw_if:
353    case tok::kw_while:
354      if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
355        next();
356        if (!parseParens(/*LookForDecls=*/true))
357          return false;
358      }
359      break;
360    case tok::kw_for:
361      Contexts.back().ColonIsForRangeExpr = true;
362      next();
363      if (!parseParens())
364        return false;
365      break;
366    case tok::l_paren:
367      if (!parseParens())
368        return false;
369      if (Line.MustBeDeclaration && Contexts.size() == 1 &&
370          !Contexts.back().IsExpression)
371        Line.MightBeFunctionDecl = true;
372      break;
373    case tok::l_square:
374      if (!parseSquare())
375        return false;
376      break;
377    case tok::l_brace:
378      if (!parseBrace())
379        return false;
380      break;
381    case tok::less:
382      if (Tok->Previous && !Tok->Previous->Tok.isLiteral() && parseAngle())
383        Tok->Type = TT_TemplateOpener;
384      else {
385        Tok->Type = TT_BinaryOperator;
386        CurrentToken = Tok;
387        next();
388      }
389      break;
390    case tok::r_paren:
391    case tok::r_square:
392      return false;
393    case tok::r_brace:
394      // Lines can start with '}'.
395      if (Tok->Previous != NULL)
396        return false;
397      break;
398    case tok::greater:
399      Tok->Type = TT_BinaryOperator;
400      break;
401    case tok::kw_operator:
402      while (CurrentToken &&
403             !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
404        if (CurrentToken->isOneOf(tok::star, tok::amp))
405          CurrentToken->Type = TT_PointerOrReference;
406        consumeToken();
407        if (CurrentToken && CurrentToken->Previous->Type == TT_BinaryOperator)
408          CurrentToken->Previous->Type = TT_OverloadedOperator;
409      }
410      if (CurrentToken) {
411        CurrentToken->Type = TT_OverloadedOperatorLParen;
412        if (CurrentToken->Previous->Type == TT_BinaryOperator)
413          CurrentToken->Previous->Type = TT_OverloadedOperator;
414      }
415      break;
416    case tok::question:
417      parseConditional();
418      break;
419    case tok::kw_template:
420      parseTemplateDeclaration();
421      break;
422    case tok::identifier:
423      if (Line.First->is(tok::kw_for) &&
424          Tok->Tok.getIdentifierInfo() == &Ident_in)
425        Tok->Type = TT_ObjCForIn;
426      break;
427    case tok::comma:
428      if (Contexts.back().FirstStartOfName)
429        Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
430      if (Contexts.back().InCtorInitializer)
431        Tok->Type = TT_CtorInitializerComma;
432      break;
433    default:
434      break;
435    }
436    return true;
437  }
438
439  void parseIncludeDirective() {
440    next();
441    if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
442      next();
443      while (CurrentToken != NULL) {
444        if (CurrentToken->isNot(tok::comment) || CurrentToken->Next)
445          CurrentToken->Type = TT_ImplicitStringLiteral;
446        next();
447      }
448    } else {
449      while (CurrentToken != NULL) {
450        if (CurrentToken->is(tok::string_literal))
451          // Mark these string literals as "implicit" literals, too, so that
452          // they are not split or line-wrapped.
453          CurrentToken->Type = TT_ImplicitStringLiteral;
454        next();
455      }
456    }
457  }
458
459  void parseWarningOrError() {
460    next();
461    // We still want to format the whitespace left of the first token of the
462    // warning or error.
463    next();
464    while (CurrentToken != NULL) {
465      CurrentToken->Type = TT_ImplicitStringLiteral;
466      next();
467    }
468  }
469
470  void parsePreprocessorDirective() {
471    next();
472    if (CurrentToken == NULL)
473      return;
474    if (CurrentToken->Tok.is(tok::numeric_constant)) {
475      CurrentToken->SpacesRequiredBefore = 1;
476      return;
477    }
478    // Hashes in the middle of a line can lead to any strange token
479    // sequence.
480    if (CurrentToken->Tok.getIdentifierInfo() == NULL)
481      return;
482    switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
483    case tok::pp_include:
484    case tok::pp_import:
485      parseIncludeDirective();
486      break;
487    case tok::pp_error:
488    case tok::pp_warning:
489      parseWarningOrError();
490      break;
491    case tok::pp_if:
492    case tok::pp_elif:
493      parseLine();
494      break;
495    default:
496      break;
497    }
498    while (CurrentToken != NULL)
499      next();
500  }
501
502public:
503  LineType parseLine() {
504    if (CurrentToken->is(tok::hash)) {
505      parsePreprocessorDirective();
506      return LT_PreprocessorDirective;
507    }
508    while (CurrentToken != NULL) {
509      if (CurrentToken->is(tok::kw_virtual))
510        KeywordVirtualFound = true;
511      if (!consumeToken())
512        return LT_Invalid;
513    }
514    if (KeywordVirtualFound)
515      return LT_VirtualFunctionDecl;
516
517    if (Line.First->Type == TT_ObjCMethodSpecifier) {
518      if (Contexts.back().FirstObjCSelectorName != NULL)
519        Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
520            Contexts.back().LongestObjCSelectorName;
521      return LT_ObjCMethodDecl;
522    }
523
524    return LT_Other;
525  }
526
527private:
528  void next() {
529    if (CurrentToken != NULL) {
530      determineTokenType(*CurrentToken);
531      CurrentToken->BindingStrength = Contexts.back().BindingStrength;
532    }
533
534    if (CurrentToken != NULL)
535      CurrentToken = CurrentToken->Next;
536
537    if (CurrentToken != NULL) {
538      // Reset token type in case we have already looked at it and then
539      // recovered from an error (e.g. failure to find the matching >).
540      if (CurrentToken->Type != TT_LambdaLSquare &&
541          CurrentToken->Type != TT_ImplicitStringLiteral)
542        CurrentToken->Type = TT_Unknown;
543      if (CurrentToken->Role)
544        CurrentToken->Role.reset(NULL);
545      CurrentToken->FakeLParens.clear();
546      CurrentToken->FakeRParens = 0;
547    }
548  }
549
550  /// \brief A struct to hold information valid in a specific context, e.g.
551  /// a pair of parenthesis.
552  struct Context {
553    Context(tok::TokenKind ContextKind, unsigned BindingStrength,
554            bool IsExpression)
555        : ContextKind(ContextKind), BindingStrength(BindingStrength),
556          LongestObjCSelectorName(0), ColonIsForRangeExpr(false),
557          ColonIsDictLiteral(false), ColonIsObjCMethodExpr(false),
558          FirstObjCSelectorName(NULL), FirstStartOfName(NULL),
559          IsExpression(IsExpression), CanBeExpression(true),
560          InCtorInitializer(false) {}
561
562    tok::TokenKind ContextKind;
563    unsigned BindingStrength;
564    unsigned LongestObjCSelectorName;
565    bool ColonIsForRangeExpr;
566    bool ColonIsDictLiteral;
567    bool ColonIsObjCMethodExpr;
568    FormatToken *FirstObjCSelectorName;
569    FormatToken *FirstStartOfName;
570    bool IsExpression;
571    bool CanBeExpression;
572    bool InCtorInitializer;
573  };
574
575  /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
576  /// of each instance.
577  struct ScopedContextCreator {
578    AnnotatingParser &P;
579
580    ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
581                         unsigned Increase)
582        : P(P) {
583      P.Contexts.push_back(Context(ContextKind,
584                                   P.Contexts.back().BindingStrength + Increase,
585                                   P.Contexts.back().IsExpression));
586    }
587
588    ~ScopedContextCreator() { P.Contexts.pop_back(); }
589  };
590
591  void determineTokenType(FormatToken &Current) {
592    if (Current.getPrecedence() == prec::Assignment &&
593        !Line.First->isOneOf(tok::kw_template, tok::kw_using) &&
594        (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
595      Contexts.back().IsExpression = true;
596      for (FormatToken *Previous = Current.Previous;
597           Previous && !Previous->isOneOf(tok::comma, tok::semi);
598           Previous = Previous->Previous) {
599        if (Previous->is(tok::r_square))
600          Previous = Previous->MatchingParen;
601        if (Previous->Type == TT_BinaryOperator &&
602            Previous->isOneOf(tok::star, tok::amp)) {
603          Previous->Type = TT_PointerOrReference;
604        }
605      }
606    } else if (Current.isOneOf(tok::kw_return, tok::kw_throw) ||
607               (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
608                !Line.InPPDirective &&
609                (!Current.Previous ||
610                 !Current.Previous->isOneOf(tok::kw_for, tok::kw_catch)))) {
611      Contexts.back().IsExpression = true;
612    } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
613      for (FormatToken *Previous = Current.Previous;
614           Previous && Previous->isOneOf(tok::star, tok::amp);
615           Previous = Previous->Previous)
616        Previous->Type = TT_PointerOrReference;
617    } else if (Current.Previous &&
618               Current.Previous->Type == TT_CtorInitializerColon) {
619      Contexts.back().IsExpression = true;
620      Contexts.back().InCtorInitializer = true;
621    } else if (Current.is(tok::kw_new)) {
622      Contexts.back().CanBeExpression = false;
623    } else if (Current.is(tok::semi) || Current.is(tok::exclaim)) {
624      // This should be the condition or increment in a for-loop.
625      Contexts.back().IsExpression = true;
626    }
627
628    if (Current.Type == TT_Unknown) {
629      // Line.MightBeFunctionDecl can only be true after the parentheses of a
630      // function declaration have been found. In this case, 'Current' is a
631      // trailing token of this declaration and thus cannot be a name.
632      if (isStartOfName(Current) && !Line.MightBeFunctionDecl) {
633        Contexts.back().FirstStartOfName = &Current;
634        Current.Type = TT_StartOfName;
635      } else if (Current.is(tok::kw_auto)) {
636        AutoFound = true;
637      } else if (Current.is(tok::arrow) && AutoFound &&
638                 Line.MustBeDeclaration) {
639        Current.Type = TT_TrailingReturnArrow;
640      } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
641        Current.Type =
642            determineStarAmpUsage(Current, Contexts.back().CanBeExpression &&
643                                               Contexts.back().IsExpression);
644      } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
645        Current.Type = determinePlusMinusCaretUsage(Current);
646      } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
647        Current.Type = determineIncrementUsage(Current);
648      } else if (Current.is(tok::exclaim)) {
649        Current.Type = TT_UnaryOperator;
650      } else if (Current.isBinaryOperator() &&
651                 (!Current.Previous ||
652                  Current.Previous->isNot(tok::l_square))) {
653        Current.Type = TT_BinaryOperator;
654      } else if (Current.is(tok::comment)) {
655        if (Current.TokenText.startswith("//"))
656          Current.Type = TT_LineComment;
657        else
658          Current.Type = TT_BlockComment;
659      } else if (Current.is(tok::r_paren)) {
660        FormatToken *LeftOfParens = NULL;
661        if (Current.MatchingParen)
662          LeftOfParens = Current.MatchingParen->getPreviousNonComment();
663        bool IsCast = false;
664        bool ParensAreEmpty = Current.Previous == Current.MatchingParen;
665        bool ParensAreType = !Current.Previous ||
666                             Current.Previous->Type == TT_PointerOrReference ||
667                             Current.Previous->Type == TT_TemplateCloser ||
668                             isSimpleTypeSpecifier(*Current.Previous);
669        bool ParensCouldEndDecl =
670            Current.Next &&
671            Current.Next->isOneOf(tok::equal, tok::semi, tok::l_brace);
672        bool IsSizeOfOrAlignOf =
673            LeftOfParens &&
674            LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof);
675        if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf &&
676            (Contexts.back().IsExpression ||
677             (Current.Next && Current.Next->isBinaryOperator())))
678          IsCast = true;
679        if (Current.Next && Current.Next->isNot(tok::string_literal) &&
680            (Current.Next->Tok.isLiteral() ||
681             Current.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
682          IsCast = true;
683        // If there is an identifier after the (), it is likely a cast, unless
684        // there is also an identifier before the ().
685        if (LeftOfParens && (LeftOfParens->Tok.getIdentifierInfo() == NULL ||
686                             LeftOfParens->is(tok::kw_return)) &&
687            LeftOfParens->Type != TT_OverloadedOperator &&
688            LeftOfParens->Type != TT_TemplateCloser && Current.Next &&
689            Current.Next->is(tok::identifier))
690          IsCast = true;
691        if (IsCast && !ParensAreEmpty)
692          Current.Type = TT_CastRParen;
693      } else if (Current.is(tok::at) && Current.Next) {
694        switch (Current.Next->Tok.getObjCKeywordID()) {
695        case tok::objc_interface:
696        case tok::objc_implementation:
697        case tok::objc_protocol:
698          Current.Type = TT_ObjCDecl;
699          break;
700        case tok::objc_property:
701          Current.Type = TT_ObjCProperty;
702          break;
703        default:
704          break;
705        }
706      } else if (Current.is(tok::period)) {
707        FormatToken *PreviousNoComment = Current.getPreviousNonComment();
708        if (PreviousNoComment &&
709            PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
710          Current.Type = TT_DesignatedInitializerPeriod;
711      }
712    }
713  }
714
715  /// \brief Take a guess at whether \p Tok starts a name of a function or
716  /// variable declaration.
717  ///
718  /// This is a heuristic based on whether \p Tok is an identifier following
719  /// something that is likely a type.
720  bool isStartOfName(const FormatToken &Tok) {
721    if (Tok.isNot(tok::identifier) || Tok.Previous == NULL)
722      return false;
723
724    // Skip "const" as it does not have an influence on whether this is a name.
725    FormatToken *PreviousNotConst = Tok.Previous;
726    while (PreviousNotConst != NULL && PreviousNotConst->is(tok::kw_const))
727      PreviousNotConst = PreviousNotConst->Previous;
728
729    if (PreviousNotConst == NULL)
730      return false;
731
732    bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
733                       PreviousNotConst->Previous &&
734                       PreviousNotConst->Previous->is(tok::hash);
735
736    if (PreviousNotConst->Type == TT_TemplateCloser)
737      return PreviousNotConst && PreviousNotConst->MatchingParen &&
738             PreviousNotConst->MatchingParen->Previous &&
739             PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
740
741    return (!IsPPKeyword && PreviousNotConst->is(tok::identifier)) ||
742           PreviousNotConst->Type == TT_PointerOrReference ||
743           isSimpleTypeSpecifier(*PreviousNotConst);
744  }
745
746  /// \brief Return the type of the given token assuming it is * or &.
747  TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression) {
748    const FormatToken *PrevToken = Tok.getPreviousNonComment();
749    if (PrevToken == NULL)
750      return TT_UnaryOperator;
751
752    const FormatToken *NextToken = Tok.getNextNonComment();
753    if (NextToken == NULL)
754      return TT_Unknown;
755
756    if (PrevToken->is(tok::coloncolon) ||
757        (PrevToken->is(tok::l_paren) && !IsExpression))
758      return TT_PointerOrReference;
759
760    if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
761                           tok::comma, tok::semi, tok::kw_return, tok::colon,
762                           tok::equal, tok::kw_delete, tok::kw_sizeof) ||
763        PrevToken->Type == TT_BinaryOperator ||
764        PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
765      return TT_UnaryOperator;
766
767    if (NextToken->is(tok::l_square))
768      return TT_PointerOrReference;
769
770    if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen &&
771        PrevToken->MatchingParen->Previous &&
772        PrevToken->MatchingParen->Previous->is(tok::kw_typeof))
773      return TT_PointerOrReference;
774
775    if (PrevToken->Tok.isLiteral() ||
776        PrevToken->isOneOf(tok::r_paren, tok::r_square) ||
777        NextToken->Tok.isLiteral() || NextToken->isUnaryOperator())
778      return TT_BinaryOperator;
779
780    // It is very unlikely that we are going to find a pointer or reference type
781    // definition on the RHS of an assignment.
782    if (IsExpression)
783      return TT_BinaryOperator;
784
785    return TT_PointerOrReference;
786  }
787
788  TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
789    const FormatToken *PrevToken = Tok.getPreviousNonComment();
790    if (PrevToken == NULL || PrevToken->Type == TT_CastRParen)
791      return TT_UnaryOperator;
792
793    // Use heuristics to recognize unary operators.
794    if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
795                           tok::question, tok::colon, tok::kw_return,
796                           tok::kw_case, tok::at, tok::l_brace))
797      return TT_UnaryOperator;
798
799    // There can't be two consecutive binary operators.
800    if (PrevToken->Type == TT_BinaryOperator)
801      return TT_UnaryOperator;
802
803    // Fall back to marking the token as binary operator.
804    return TT_BinaryOperator;
805  }
806
807  /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
808  TokenType determineIncrementUsage(const FormatToken &Tok) {
809    const FormatToken *PrevToken = Tok.getPreviousNonComment();
810    if (PrevToken == NULL || PrevToken->Type == TT_CastRParen)
811      return TT_UnaryOperator;
812    if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
813      return TT_TrailingUnaryOperator;
814
815    return TT_UnaryOperator;
816  }
817
818  // FIXME: This is copy&pasted from Sema. Put it in a common place and remove
819  // duplication.
820  /// \brief Determine whether the token kind starts a simple-type-specifier.
821  bool isSimpleTypeSpecifier(const FormatToken &Tok) const {
822    switch (Tok.Tok.getKind()) {
823    case tok::kw_short:
824    case tok::kw_long:
825    case tok::kw___int64:
826    case tok::kw___int128:
827    case tok::kw_signed:
828    case tok::kw_unsigned:
829    case tok::kw_void:
830    case tok::kw_char:
831    case tok::kw_int:
832    case tok::kw_half:
833    case tok::kw_float:
834    case tok::kw_double:
835    case tok::kw_wchar_t:
836    case tok::kw_bool:
837    case tok::kw___underlying_type:
838    case tok::annot_typename:
839    case tok::kw_char16_t:
840    case tok::kw_char32_t:
841    case tok::kw_typeof:
842    case tok::kw_decltype:
843      return true;
844    default:
845      return false;
846    }
847  }
848
849  SmallVector<Context, 8> Contexts;
850
851  const FormatStyle &Style;
852  AnnotatedLine &Line;
853  FormatToken *CurrentToken;
854  bool KeywordVirtualFound;
855  bool AutoFound;
856  IdentifierInfo &Ident_in;
857};
858
859static int PrecedenceUnaryOperator = prec::PointerToMember + 1;
860static int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
861
862/// \brief Parses binary expressions by inserting fake parenthesis based on
863/// operator precedence.
864class ExpressionParser {
865public:
866  ExpressionParser(AnnotatedLine &Line) : Current(Line.First) {
867    // Skip leading "}", e.g. in "} else if (...) {".
868    if (Current->is(tok::r_brace))
869      next();
870  }
871
872  /// \brief Parse expressions with the given operatore precedence.
873  void parse(int Precedence = 0) {
874    // Skip 'return' and ObjC selector colons as they are not part of a binary
875    // expression.
876    while (Current &&
877           (Current->is(tok::kw_return) ||
878            (Current->is(tok::colon) && Current->Type == TT_ObjCMethodExpr)))
879      next();
880
881    if (Current == NULL || Precedence > PrecedenceArrowAndPeriod)
882      return;
883
884    // Conditional expressions need to be parsed separately for proper nesting.
885    if (Precedence == prec::Conditional) {
886      parseConditionalExpr();
887      return;
888    }
889
890    // Parse unary operators, which all have a higher precedence than binary
891    // operators.
892    if (Precedence == PrecedenceUnaryOperator) {
893      parseUnaryOperator();
894      return;
895    }
896
897    FormatToken *Start = Current;
898    FormatToken *LatestOperator = NULL;
899
900    while (Current) {
901      // Consume operators with higher precedence.
902      parse(Precedence + 1);
903
904      int CurrentPrecedence = getCurrentPrecedence();
905
906      if (Current && Current->Type == TT_ObjCSelectorName &&
907          Precedence == CurrentPrecedence)
908        Start = Current;
909
910      // At the end of the line or when an operator with higher precedence is
911      // found, insert fake parenthesis and return.
912      if (Current == NULL || Current->closesScope() ||
913          (CurrentPrecedence != -1 && CurrentPrecedence < Precedence)) {
914        if (LatestOperator) {
915          if (Precedence == PrecedenceArrowAndPeriod) {
916            LatestOperator->LastInChainOfCalls = true;
917            // Call expressions don't have a binary operator precedence.
918            addFakeParenthesis(Start, prec::Unknown);
919          } else {
920            addFakeParenthesis(Start, prec::Level(Precedence));
921          }
922        }
923        return;
924      }
925
926      // Consume scopes: (), [], <> and {}
927      if (Current->opensScope()) {
928        while (Current && !Current->closesScope()) {
929          next();
930          parse();
931        }
932        next();
933      } else {
934        // Operator found.
935        if (CurrentPrecedence == Precedence)
936          LatestOperator = Current;
937
938        next();
939      }
940    }
941  }
942
943private:
944  /// \brief Gets the precedence (+1) of the given token for binary operators
945  /// and other tokens that we treat like binary operators.
946  int getCurrentPrecedence() {
947    if (Current) {
948      if (Current->Type == TT_ConditionalExpr)
949        return prec::Conditional;
950      else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon ||
951               Current->Type == TT_ObjCSelectorName)
952        return 0;
953      else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma))
954        return Current->getPrecedence();
955      else if (Current->isOneOf(tok::period, tok::arrow))
956        return PrecedenceArrowAndPeriod;
957    }
958    return -1;
959  }
960
961  void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) {
962    Start->FakeLParens.push_back(Precedence);
963    if (Precedence > prec::Unknown)
964      Start->StartsBinaryExpression = true;
965    if (Current) {
966      ++Current->Previous->FakeRParens;
967      if (Precedence > prec::Unknown)
968        Current->Previous->EndsBinaryExpression = true;
969    }
970  }
971
972  /// \brief Parse unary operator expressions and surround them with fake
973  /// parentheses if appropriate.
974  void parseUnaryOperator() {
975    if (Current == NULL || Current->Type != TT_UnaryOperator) {
976      parse(PrecedenceArrowAndPeriod);
977      return;
978    }
979
980    FormatToken *Start = Current;
981    next();
982    parseUnaryOperator();
983
984    // The actual precedence doesn't matter.
985    addFakeParenthesis(Start, prec::Unknown);
986  }
987
988  void parseConditionalExpr() {
989    FormatToken *Start = Current;
990    parse(prec::LogicalOr);
991    if (!Current || !Current->is(tok::question))
992      return;
993    next();
994    parse(prec::LogicalOr);
995    if (!Current || Current->Type != TT_ConditionalExpr)
996      return;
997    next();
998    parseConditionalExpr();
999    addFakeParenthesis(Start, prec::Conditional);
1000  }
1001
1002  void next() {
1003    if (Current)
1004      Current = Current->Next;
1005    while (Current && Current->isTrailingComment())
1006      Current = Current->Next;
1007  }
1008
1009  FormatToken *Current;
1010};
1011
1012} // end anonymous namespace
1013
1014void
1015TokenAnnotator::setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) {
1016  const AnnotatedLine *NextNonCommentLine = NULL;
1017  for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
1018                                                          E = Lines.rend();
1019       I != E; ++I) {
1020    if (NextNonCommentLine && (*I)->First->is(tok::comment) &&
1021        (*I)->First->Next == NULL)
1022      (*I)->Level = NextNonCommentLine->Level;
1023    else
1024      NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : NULL;
1025
1026    setCommentLineLevels((*I)->Children);
1027  }
1028}
1029
1030void TokenAnnotator::annotate(AnnotatedLine &Line) {
1031  for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
1032                                                  E = Line.Children.end();
1033       I != E; ++I) {
1034    annotate(**I);
1035  }
1036  AnnotatingParser Parser(Style, Line, Ident_in);
1037  Line.Type = Parser.parseLine();
1038  if (Line.Type == LT_Invalid)
1039    return;
1040
1041  ExpressionParser ExprParser(Line);
1042  ExprParser.parse();
1043
1044  if (Line.First->Type == TT_ObjCMethodSpecifier)
1045    Line.Type = LT_ObjCMethodDecl;
1046  else if (Line.First->Type == TT_ObjCDecl)
1047    Line.Type = LT_ObjCDecl;
1048  else if (Line.First->Type == TT_ObjCProperty)
1049    Line.Type = LT_ObjCProperty;
1050
1051  Line.First->SpacesRequiredBefore = 1;
1052  Line.First->CanBreakBefore = Line.First->MustBreakBefore;
1053}
1054
1055void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
1056  Line.First->TotalLength =
1057      Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth;
1058  if (!Line.First->Next)
1059    return;
1060  FormatToken *Current = Line.First->Next;
1061  bool InFunctionDecl = Line.MightBeFunctionDecl;
1062  while (Current != NULL) {
1063    if (Current->Type == TT_LineComment)
1064      Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
1065    else if (Current->SpacesRequiredBefore == 0 &&
1066             spaceRequiredBefore(Line, *Current))
1067      Current->SpacesRequiredBefore = 1;
1068
1069    Current->MustBreakBefore =
1070        Current->MustBreakBefore || mustBreakBefore(Line, *Current);
1071
1072    Current->CanBreakBefore =
1073        Current->MustBreakBefore || canBreakBefore(Line, *Current);
1074    if (Current->MustBreakBefore || !Current->Children.empty() ||
1075        Current->IsMultiline)
1076      Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit;
1077    else
1078      Current->TotalLength = Current->Previous->TotalLength +
1079                             Current->ColumnWidth +
1080                             Current->SpacesRequiredBefore;
1081
1082    if (Current->Type == TT_CtorInitializerColon)
1083      InFunctionDecl = false;
1084
1085    // FIXME: Only calculate this if CanBreakBefore is true once static
1086    // initializers etc. are sorted out.
1087    // FIXME: Move magic numbers to a better place.
1088    Current->SplitPenalty = 20 * Current->BindingStrength +
1089                            splitPenalty(Line, *Current, InFunctionDecl);
1090
1091    Current = Current->Next;
1092  }
1093
1094  calculateUnbreakableTailLengths(Line);
1095  for (Current = Line.First; Current != NULL; Current = Current->Next) {
1096    if (Current->Role)
1097      Current->Role->precomputeFormattingInfos(Current);
1098  }
1099
1100  DEBUG({ printDebugInfo(Line); });
1101
1102  for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
1103                                                  E = Line.Children.end();
1104       I != E; ++I) {
1105    calculateFormattingInformation(**I);
1106  }
1107}
1108
1109void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
1110  unsigned UnbreakableTailLength = 0;
1111  FormatToken *Current = Line.Last;
1112  while (Current != NULL) {
1113    Current->UnbreakableTailLength = UnbreakableTailLength;
1114    if (Current->CanBreakBefore ||
1115        Current->isOneOf(tok::comment, tok::string_literal)) {
1116      UnbreakableTailLength = 0;
1117    } else {
1118      UnbreakableTailLength +=
1119          Current->ColumnWidth + Current->SpacesRequiredBefore;
1120    }
1121    Current = Current->Previous;
1122  }
1123}
1124
1125unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
1126                                      const FormatToken &Tok,
1127                                      bool InFunctionDecl) {
1128  const FormatToken &Left = *Tok.Previous;
1129  const FormatToken &Right = Tok;
1130
1131  if (Left.is(tok::semi))
1132    return 0;
1133  if (Left.is(tok::comma))
1134    return 1;
1135  if (Right.is(tok::l_square))
1136    return 150;
1137
1138  if (Right.Type == TT_StartOfName || Right.is(tok::kw_operator)) {
1139    if (Line.First->is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
1140      return 3;
1141    if (Left.Type == TT_StartOfName)
1142      return 20;
1143    if (InFunctionDecl && Right.BindingStrength == 1)
1144      // FIXME: Clean up hack of using BindingStrength to find top-level names.
1145      return Style.PenaltyReturnTypeOnItsOwnLine;
1146    return 200;
1147  }
1148  if (Left.is(tok::equal) && Right.is(tok::l_brace))
1149    return 150;
1150  if (Left.Type == TT_CastRParen)
1151    return 100;
1152  if (Left.is(tok::coloncolon))
1153    return 500;
1154  if (Left.isOneOf(tok::kw_class, tok::kw_struct))
1155    return 5000;
1156
1157  if (Left.Type == TT_RangeBasedForLoopColon ||
1158      Left.Type == TT_InheritanceColon)
1159    return 2;
1160
1161  if (Right.isMemberAccess()) {
1162    if (Left.isOneOf(tok::r_paren, tok::r_square) && Left.MatchingParen &&
1163        Left.MatchingParen->ParameterCount > 0)
1164      return 20; // Should be smaller than breaking at a nested comma.
1165    return 150;
1166  }
1167
1168  // Breaking before a trailing 'const' or not-function-like annotation is bad.
1169  if (Left.is(tok::r_paren) && Line.Type != LT_ObjCProperty &&
1170      (Right.is(tok::kw_const) || (Right.is(tok::identifier) && Right.Next &&
1171                                   Right.Next->isNot(tok::l_paren))))
1172    return 100;
1173
1174  // In for-loops, prefer breaking at ',' and ';'.
1175  if (Line.First->is(tok::kw_for) && Left.is(tok::equal))
1176    return 4;
1177
1178  // In Objective-C method expressions, prefer breaking before "param:" over
1179  // breaking after it.
1180  if (Right.Type == TT_ObjCSelectorName)
1181    return 0;
1182  if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
1183    return 50;
1184
1185  if (Left.is(tok::l_paren) && InFunctionDecl)
1186    return 100;
1187  if (Left.opensScope())
1188    return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
1189                                   : 19;
1190
1191  if (Right.is(tok::lessless)) {
1192    if (Left.is(tok::string_literal)) {
1193      StringRef Content = Left.TokenText;
1194      if (Content.startswith("\""))
1195        Content = Content.drop_front(1);
1196      if (Content.endswith("\""))
1197        Content = Content.drop_back(1);
1198      Content = Content.trim();
1199      if (Content.size() > 1 &&
1200          (Content.back() == ':' || Content.back() == '='))
1201        return 25;
1202    }
1203    return 1; // Breaking at a << is really cheap.
1204  }
1205  if (Left.Type == TT_ConditionalExpr)
1206    return prec::Conditional;
1207  prec::Level Level = Left.getPrecedence();
1208
1209  if (Level != prec::Unknown)
1210    return Level;
1211
1212  return 3;
1213}
1214
1215bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
1216                                          const FormatToken &Left,
1217                                          const FormatToken &Right) {
1218  if (Right.is(tok::hashhash))
1219    return Left.is(tok::hash);
1220  if (Left.isOneOf(tok::hashhash, tok::hash))
1221    return Right.is(tok::hash);
1222  if (Left.is(tok::l_paren) && Right.is(tok::r_paren))
1223    return Style.SpaceInEmptyParentheses;
1224  if (Left.is(tok::l_paren) || Right.is(tok::r_paren))
1225    return (Right.Type == TT_CastRParen ||
1226            (Left.MatchingParen && Left.MatchingParen->Type == TT_CastRParen))
1227               ? Style.SpacesInCStyleCastParentheses
1228               : Style.SpacesInParentheses;
1229  if (Style.SpacesInAngles &&
1230      ((Left.Type == TT_TemplateOpener) != (Right.Type == TT_TemplateCloser)))
1231    return true;
1232  if (Right.isOneOf(tok::semi, tok::comma))
1233    return false;
1234  if (Right.is(tok::less) &&
1235      (Left.is(tok::kw_template) ||
1236       (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
1237    return true;
1238  if (Left.is(tok::arrow) || Right.is(tok::arrow))
1239    return false;
1240  if (Left.isOneOf(tok::exclaim, tok::tilde))
1241    return false;
1242  if (Left.is(tok::at) &&
1243      Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
1244                    tok::numeric_constant, tok::l_paren, tok::l_brace,
1245                    tok::kw_true, tok::kw_false))
1246    return false;
1247  if (Left.is(tok::coloncolon))
1248    return false;
1249  if (Right.is(tok::coloncolon))
1250    return (Left.is(tok::less) && Style.Standard == FormatStyle::LS_Cpp03) ||
1251           !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren,
1252                         tok::r_paren, tok::less);
1253  if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
1254    return false;
1255  if (Right.is(tok::ellipsis))
1256    return Left.Tok.isLiteral();
1257  if (Left.is(tok::l_square) && Right.is(tok::amp))
1258    return false;
1259  if (Right.Type == TT_PointerOrReference)
1260    return Left.Tok.isLiteral() ||
1261           ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) &&
1262            !Style.PointerBindsToType);
1263  if (Right.Type == TT_FunctionTypeLParen && Left.isNot(tok::l_paren) &&
1264      (Left.Type != TT_PointerOrReference || Style.PointerBindsToType))
1265    return true;
1266  if (Left.Type == TT_PointerOrReference)
1267    return Right.Tok.isLiteral() || Right.Type == TT_BlockComment ||
1268           ((Right.Type != TT_PointerOrReference) &&
1269            Right.isNot(tok::l_paren) && Style.PointerBindsToType &&
1270            Left.Previous &&
1271            !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon));
1272  if (Right.is(tok::star) && Left.is(tok::l_paren))
1273    return false;
1274  if (Left.is(tok::l_square))
1275    return Left.Type == TT_ArrayInitializerLSquare &&
1276           Right.isNot(tok::r_square);
1277  if (Right.is(tok::r_square))
1278    return Right.MatchingParen &&
1279           Right.MatchingParen->Type == TT_ArrayInitializerLSquare;
1280  if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr &&
1281      Right.Type != TT_LambdaLSquare && Left.isNot(tok::numeric_constant))
1282    return false;
1283  if (Left.is(tok::colon))
1284    return Left.Type != TT_ObjCMethodExpr;
1285  if (Right.is(tok::colon))
1286    return Right.Type != TT_ObjCMethodExpr && !Left.is(tok::question);
1287  if (Right.is(tok::l_paren)) {
1288    if (Left.is(tok::r_paren) && Left.MatchingParen &&
1289        Left.MatchingParen->Previous &&
1290        Left.MatchingParen->Previous->is(tok::kw___attribute))
1291      return true;
1292    return Line.Type == LT_ObjCDecl ||
1293           Left.isOneOf(tok::kw_return, tok::kw_new, tok::kw_delete,
1294                        tok::semi) ||
1295           (Style.SpaceAfterControlStatementKeyword &&
1296            Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
1297                         tok::kw_catch));
1298  }
1299  if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
1300    return false;
1301  if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
1302    return !Left.Children.empty(); // No spaces in "{}".
1303  if (Left.is(tok::l_brace) || Right.is(tok::r_brace))
1304    return !Style.Cpp11BracedListStyle;
1305  if (Right.Type == TT_UnaryOperator)
1306    return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
1307           (Left.isNot(tok::colon) || Left.Type != TT_ObjCMethodExpr);
1308  if (Left.isOneOf(tok::identifier, tok::greater, tok::r_square) &&
1309      Right.is(tok::l_brace) && Right.getNextNonComment() &&
1310      Right.BlockKind != BK_Block)
1311    return false;
1312  if (Left.is(tok::period) || Right.is(tok::period))
1313    return false;
1314  if (Left.Type == TT_BlockComment && Left.TokenText.endswith("=*/"))
1315    return false;
1316  if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L")
1317    return false;
1318  return true;
1319}
1320
1321bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
1322                                         const FormatToken &Tok) {
1323  if (Tok.Tok.getIdentifierInfo() && Tok.Previous->Tok.getIdentifierInfo())
1324    return true; // Never ever merge two identifiers.
1325  if (Tok.Previous->Type == TT_ImplicitStringLiteral)
1326    return Tok.WhitespaceRange.getBegin() != Tok.WhitespaceRange.getEnd();
1327  if (Line.Type == LT_ObjCMethodDecl) {
1328    if (Tok.Previous->Type == TT_ObjCMethodSpecifier)
1329      return true;
1330    if (Tok.Previous->is(tok::r_paren) && Tok.is(tok::identifier))
1331      // Don't space between ')' and <id>
1332      return false;
1333  }
1334  if (Line.Type == LT_ObjCProperty &&
1335      (Tok.is(tok::equal) || Tok.Previous->is(tok::equal)))
1336    return false;
1337
1338  if (Tok.Type == TT_TrailingReturnArrow ||
1339      Tok.Previous->Type == TT_TrailingReturnArrow)
1340    return true;
1341  if (Tok.Previous->is(tok::comma))
1342    return true;
1343  if (Tok.is(tok::comma))
1344    return false;
1345  if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
1346    return true;
1347  if (Tok.Previous->Tok.is(tok::kw_operator))
1348    return Tok.is(tok::coloncolon);
1349  if (Tok.Type == TT_OverloadedOperatorLParen)
1350    return false;
1351  if (Tok.is(tok::colon))
1352    return !Line.First->isOneOf(tok::kw_case, tok::kw_default) &&
1353           Tok.getNextNonComment() != NULL && Tok.Type != TT_ObjCMethodExpr &&
1354           !Tok.Previous->is(tok::question);
1355  if (Tok.Previous->Type == TT_UnaryOperator ||
1356      Tok.Previous->Type == TT_CastRParen)
1357    return false;
1358  if (Tok.Previous->is(tok::greater) && Tok.is(tok::greater)) {
1359    return Tok.Type == TT_TemplateCloser &&
1360           Tok.Previous->Type == TT_TemplateCloser &&
1361           (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles);
1362  }
1363  if (Tok.isOneOf(tok::arrowstar, tok::periodstar) ||
1364      Tok.Previous->isOneOf(tok::arrowstar, tok::periodstar))
1365    return false;
1366  if (!Style.SpaceBeforeAssignmentOperators &&
1367      Tok.getPrecedence() == prec::Assignment)
1368    return false;
1369  if ((Tok.Type == TT_BinaryOperator && !Tok.Previous->is(tok::l_paren)) ||
1370      Tok.Previous->Type == TT_BinaryOperator)
1371    return true;
1372  if (Tok.Previous->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
1373    return false;
1374  if (Tok.is(tok::less) && Tok.Previous->isNot(tok::l_paren) &&
1375      Line.First->is(tok::hash))
1376    return true;
1377  if (Tok.Type == TT_TrailingUnaryOperator)
1378    return false;
1379  return spaceRequiredBetween(Line, *Tok.Previous, Tok);
1380}
1381
1382bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
1383                                     const FormatToken &Right) {
1384  if (Right.is(tok::comment)) {
1385    return Right.NewlinesBefore > 0;
1386  } else if (Right.Previous->isTrailingComment() ||
1387             (Right.is(tok::string_literal) &&
1388              Right.Previous->is(tok::string_literal))) {
1389    return true;
1390  } else if (Right.Previous->IsUnterminatedLiteral) {
1391    return true;
1392  } else if (Right.is(tok::lessless) && Right.Next &&
1393             Right.Previous->is(tok::string_literal) &&
1394             Right.Next->is(tok::string_literal)) {
1395    return true;
1396  } else if (Right.Previous->ClosesTemplateDeclaration &&
1397             Right.Previous->MatchingParen &&
1398             Right.Previous->MatchingParen->BindingStrength == 1 &&
1399             Style.AlwaysBreakTemplateDeclarations) {
1400    // FIXME: Fix horrible hack of using BindingStrength to find top-level <>.
1401    return true;
1402  } else if (Right.Type == TT_CtorInitializerComma &&
1403             Style.BreakConstructorInitializersBeforeComma &&
1404             !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) {
1405    return true;
1406  } else if (Right.Previous->BlockKind == BK_Block &&
1407             Right.Previous->isNot(tok::r_brace) && Right.isNot(tok::r_brace)) {
1408    return true;
1409  } else if (Right.is(tok::l_brace) && (Right.BlockKind == BK_Block)) {
1410    return Style.BreakBeforeBraces == FormatStyle::BS_Allman;
1411  }
1412  return false;
1413}
1414
1415bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
1416                                    const FormatToken &Right) {
1417  const FormatToken &Left = *Right.Previous;
1418  if (Right.Type == TT_StartOfName || Right.is(tok::kw_operator))
1419    return true;
1420  if (Right.isTrailingComment())
1421    // We rely on MustBreakBefore being set correctly here as we should not
1422    // change the "binding" behavior of a comment.
1423    return false;
1424  if (Left.is(tok::question) && Right.is(tok::colon))
1425    return false;
1426  if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
1427    return Style.BreakBeforeTernaryOperators;
1428  if (Left.Type == TT_ConditionalExpr || Left.is(tok::question))
1429    return !Style.BreakBeforeTernaryOperators;
1430  if (Right.is(tok::colon) &&
1431      (Right.Type == TT_DictLiteral || Right.Type == TT_ObjCMethodExpr))
1432    return false;
1433  if (Left.is(tok::colon) &&
1434      (Left.Type == TT_DictLiteral || Left.Type == TT_ObjCMethodExpr))
1435    return true;
1436  if (Right.Type == TT_ObjCSelectorName)
1437    return true;
1438  if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
1439    return true;
1440  if (Left.ClosesTemplateDeclaration)
1441    return true;
1442  if (Right.Type == TT_RangeBasedForLoopColon ||
1443      Right.Type == TT_OverloadedOperatorLParen ||
1444      Right.Type == TT_OverloadedOperator)
1445    return false;
1446  if (Left.Type == TT_RangeBasedForLoopColon)
1447    return true;
1448  if (Right.Type == TT_RangeBasedForLoopColon)
1449    return false;
1450  if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
1451      Left.Type == TT_UnaryOperator || Left.is(tok::kw_operator))
1452    return false;
1453  if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
1454    return false;
1455  if (Left.Previous) {
1456    if (Left.is(tok::l_paren) && Right.is(tok::l_paren) &&
1457        Left.Previous->is(tok::kw___attribute))
1458      return false;
1459    if (Left.is(tok::l_paren) && (Left.Previous->Type == TT_BinaryOperator ||
1460                                  Left.Previous->Type == TT_CastRParen))
1461      return false;
1462  }
1463  if (Right.Type == TT_ImplicitStringLiteral)
1464    return false;
1465
1466  if (Right.is(tok::r_paren) || Right.Type == TT_TemplateCloser)
1467    return false;
1468
1469  // We only break before r_brace if there was a corresponding break before
1470  // the l_brace, which is tracked by BreakBeforeClosingBrace.
1471  if (Right.is(tok::r_brace))
1472    return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block;
1473
1474  // Allow breaking after a trailing 'const', e.g. after a method declaration,
1475  // unless it is follow by ';', '{' or '='.
1476  if (Left.is(tok::kw_const) && Left.Previous != NULL &&
1477      Left.Previous->is(tok::r_paren))
1478    return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal);
1479
1480  if (Right.is(tok::kw___attribute))
1481    return true;
1482
1483  if (Left.is(tok::identifier) && Right.is(tok::string_literal))
1484    return true;
1485
1486  if (Left.Type == TT_CtorInitializerComma &&
1487      Style.BreakConstructorInitializersBeforeComma)
1488    return false;
1489  if (Right.Type == TT_CtorInitializerComma &&
1490      Style.BreakConstructorInitializersBeforeComma)
1491    return true;
1492  if (Right.isBinaryOperator() && Style.BreakBeforeBinaryOperators)
1493    return true;
1494  if (Left.is(tok::greater) && Right.is(tok::greater) &&
1495      Left.Type != TT_TemplateCloser)
1496    return false;
1497  if (Left.Type == TT_ArrayInitializerLSquare)
1498    return true;
1499  return (Left.isBinaryOperator() && Left.isNot(tok::lessless) &&
1500          !Style.BreakBeforeBinaryOperators) ||
1501         Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
1502                      tok::kw_class, tok::kw_struct) ||
1503         Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon,
1504                       tok::l_square, tok::at) ||
1505         (Left.is(tok::r_paren) &&
1506          Right.isOneOf(tok::identifier, tok::kw_const, tok::kw___attribute)) ||
1507         (Left.is(tok::l_paren) && !Right.is(tok::r_paren));
1508}
1509
1510void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
1511  llvm::errs() << "AnnotatedTokens:\n";
1512  const FormatToken *Tok = Line.First;
1513  while (Tok) {
1514    llvm::errs() << " M=" << Tok->MustBreakBefore
1515                 << " C=" << Tok->CanBreakBefore << " T=" << Tok->Type
1516                 << " S=" << Tok->SpacesRequiredBefore
1517                 << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName()
1518                 << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind
1519                 << " FakeLParens=";
1520    for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
1521      llvm::errs() << Tok->FakeLParens[i] << "/";
1522    llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n";
1523    if (Tok->Next == NULL)
1524      assert(Tok == Line.Last);
1525    Tok = Tok->Next;
1526  }
1527  llvm::errs() << "----\n";
1528}
1529
1530} // namespace format
1531} // namespace clang
1532