1//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements a token annotator, i.e. creates
12/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
17#define LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
18
19#include "UnwrappedLineParser.h"
20#include "clang/Basic/OperatorPrecedence.h"
21#include "clang/Format/Format.h"
22#include <string>
23
24namespace clang {
25class Lexer;
26class SourceManager;
27
28namespace format {
29
30enum TokenType {
31  TT_BinaryOperator,
32  TT_BlockComment,
33  TT_CastRParen,
34  TT_ConditionalExpr,
35  TT_CtorInitializerColon,
36  TT_ImplicitStringLiteral,
37  TT_InlineASMColon,
38  TT_InheritanceColon,
39  TT_LineComment,
40  TT_ObjCArrayLiteral,
41  TT_ObjCBlockLParen,
42  TT_ObjCDecl,
43  TT_ObjCForIn,
44  TT_ObjCMethodExpr,
45  TT_ObjCMethodSpecifier,
46  TT_ObjCProperty,
47  TT_ObjCSelectorName,
48  TT_OverloadedOperatorLParen,
49  TT_PointerOrReference,
50  TT_PureVirtualSpecifier,
51  TT_RangeBasedForLoopColon,
52  TT_StartOfName,
53  TT_TemplateCloser,
54  TT_TemplateOpener,
55  TT_TrailingUnaryOperator,
56  TT_UnaryOperator,
57  TT_Unknown
58};
59
60enum LineType {
61  LT_Invalid,
62  LT_Other,
63  LT_BuilderTypeCall,
64  LT_PreprocessorDirective,
65  LT_VirtualFunctionDecl,
66  LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
67  LT_ObjCMethodDecl,
68  LT_ObjCProperty // An @property line.
69};
70
71class AnnotatedToken {
72public:
73  explicit AnnotatedToken(const FormatToken &FormatTok)
74      : FormatTok(FormatTok), Type(TT_Unknown), SpacesRequiredBefore(0),
75        CanBreakBefore(false), MustBreakBefore(false),
76        ClosesTemplateDeclaration(false), MatchingParen(NULL),
77        ParameterCount(0), BindingStrength(0), SplitPenalty(0),
78        LongestObjCSelectorName(0), Parent(NULL),
79        FakeRParens(0), LastInChainOfCalls(false),
80        PartOfMultiVariableDeclStmt(false), NoMoreTokensOnLevel(false) {}
81
82  bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); }
83
84  bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
85    return is(K1) || is(K2);
86  }
87
88  bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const {
89    return is(K1) || is(K2) || is(K3);
90  }
91
92  bool isOneOf(
93      tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3,
94      tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS,
95      tok::TokenKind K6 = tok::NUM_TOKENS, tok::TokenKind K7 = tok::NUM_TOKENS,
96      tok::TokenKind K8 = tok::NUM_TOKENS, tok::TokenKind K9 = tok::NUM_TOKENS,
97      tok::TokenKind K10 = tok::NUM_TOKENS,
98      tok::TokenKind K11 = tok::NUM_TOKENS,
99      tok::TokenKind K12 = tok::NUM_TOKENS) const {
100    return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) ||
101           is(K8) || is(K9) || is(K10) || is(K11) || is(K12);
102  }
103
104  bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); }
105
106  bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
107    return FormatTok.Tok.isObjCAtKeyword(Kind);
108  }
109
110  bool isAccessSpecifier(bool ColonRequired = true) const {
111    return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
112           (!ColonRequired ||
113            (!Children.empty() && Children[0].is(tok::colon)));
114  }
115
116  bool isObjCAccessSpecifier() const {
117    return is(tok::at) && !Children.empty() &&
118           (Children[0].isObjCAtKeyword(tok::objc_public) ||
119            Children[0].isObjCAtKeyword(tok::objc_protected) ||
120            Children[0].isObjCAtKeyword(tok::objc_package) ||
121            Children[0].isObjCAtKeyword(tok::objc_private));
122  }
123
124  /// \brief Returns whether \p Tok is ([{ or a template opening <.
125  bool opensScope() const;
126  /// \brief Returns whether \p Tok is )]} or a template opening >.
127  bool closesScope() const;
128
129  bool isUnaryOperator() const;
130  bool isBinaryOperator() const;
131  bool isTrailingComment() const;
132
133  FormatToken FormatTok;
134
135  TokenType Type;
136
137  unsigned SpacesRequiredBefore;
138  bool CanBreakBefore;
139  bool MustBreakBefore;
140
141  bool ClosesTemplateDeclaration;
142
143  AnnotatedToken *MatchingParen;
144
145  /// \brief Number of parameters, if this is "(", "[" or "<".
146  ///
147  /// This is initialized to 1 as we don't need to distinguish functions with
148  /// 0 parameters from functions with 1 parameter. Thus, we can simply count
149  /// the number of commas.
150  unsigned ParameterCount;
151
152  /// \brief The total length of the line up to and including this token.
153  unsigned TotalLength;
154
155  // FIXME: Come up with a 'cleaner' concept.
156  /// \brief The binding strength of a token. This is a combined value of
157  /// operator precedence, parenthesis nesting, etc.
158  unsigned BindingStrength;
159
160  /// \brief Penalty for inserting a line break before this token.
161  unsigned SplitPenalty;
162
163  /// \brief If this is the first ObjC selector name in an ObjC method
164  /// definition or call, this contains the length of the longest name.
165  unsigned LongestObjCSelectorName;
166
167  std::vector<AnnotatedToken> Children;
168  AnnotatedToken *Parent;
169
170  /// \brief Stores the number of required fake parentheses and the
171  /// corresponding operator precedence.
172  ///
173  /// If multiple fake parentheses start at a token, this vector stores them in
174  /// reverse order, i.e. inner fake parenthesis first.
175  SmallVector<prec::Level, 4>  FakeLParens;
176  /// \brief Insert this many fake ) after this token for correct indentation.
177  unsigned FakeRParens;
178
179  /// \brief Is this the last "." or "->" in a builder-type call?
180  bool LastInChainOfCalls;
181
182  /// \brief Is this token part of a \c DeclStmt defining multiple variables?
183  ///
184  /// Only set if \c Type == \c TT_StartOfName.
185  bool PartOfMultiVariableDeclStmt;
186
187  /// \brief Set to \c true for "("-tokens if this is the last token other than
188  /// ")" in the next higher parenthesis level.
189  ///
190  /// If this is \c true, no more formatting decisions have to be made on the
191  /// next higher parenthesis level, enabling optimizations.
192  ///
193  /// Example:
194  /// \code
195  /// aaaaaa(aaaaaa());
196  ///              ^  // Set to true for this parenthesis.
197  /// \endcode
198  bool NoMoreTokensOnLevel;
199
200  /// \brief Returns the previous token ignoring comments.
201  AnnotatedToken *getPreviousNoneComment() const;
202
203  /// \brief Returns the next token ignoring comments.
204  const AnnotatedToken *getNextNoneComment() const;
205};
206
207class AnnotatedLine {
208public:
209  AnnotatedLine(const UnwrappedLine &Line)
210      : First(Line.Tokens.front()), Level(Line.Level),
211        InPPDirective(Line.InPPDirective),
212        MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
213        StartsDefinition(false) {
214    assert(!Line.Tokens.empty());
215    AnnotatedToken *Current = &First;
216    for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(),
217                                                E = Line.Tokens.end();
218         I != E; ++I) {
219      Current->Children.push_back(AnnotatedToken(*I));
220      Current->Children[0].Parent = Current;
221      Current = &Current->Children[0];
222    }
223    Last = Current;
224  }
225  AnnotatedLine(const AnnotatedLine &Other)
226      : First(Other.First), Type(Other.Type), Level(Other.Level),
227        InPPDirective(Other.InPPDirective),
228        MustBeDeclaration(Other.MustBeDeclaration),
229        MightBeFunctionDecl(Other.MightBeFunctionDecl),
230        StartsDefinition(Other.StartsDefinition) {
231    Last = &First;
232    while (!Last->Children.empty()) {
233      Last->Children[0].Parent = Last;
234      Last = &Last->Children[0];
235    }
236  }
237
238  AnnotatedToken First;
239  AnnotatedToken *Last;
240
241  LineType Type;
242  unsigned Level;
243  bool InPPDirective;
244  bool MustBeDeclaration;
245  bool MightBeFunctionDecl;
246  bool StartsDefinition;
247};
248
249inline prec::Level getPrecedence(const AnnotatedToken &Tok) {
250  return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true);
251}
252
253/// \brief Determines extra information about the tokens comprising an
254/// \c UnwrappedLine.
255class TokenAnnotator {
256public:
257  TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex,
258                 IdentifierInfo &Ident_in)
259      : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Ident_in(Ident_in) {
260  }
261
262  void annotate(AnnotatedLine &Line);
263  void calculateFormattingInformation(AnnotatedLine &Line);
264
265private:
266  /// \brief Calculate the penalty for splitting before \c Tok.
267  unsigned splitPenalty(const AnnotatedLine &Line, const AnnotatedToken &Tok);
268
269  bool spaceRequiredBetween(const AnnotatedLine &Line,
270                            const AnnotatedToken &Left,
271                            const AnnotatedToken &Right);
272
273  bool spaceRequiredBefore(const AnnotatedLine &Line,
274                           const AnnotatedToken &Tok);
275
276  bool canBreakBefore(const AnnotatedLine &Line, const AnnotatedToken &Right);
277
278  void printDebugInfo(const AnnotatedLine &Line);
279
280  const FormatStyle &Style;
281  SourceManager &SourceMgr;
282  Lexer &Lex;
283
284  // Contextual keywords:
285  IdentifierInfo &Ident_in;
286};
287
288} // end namespace format
289} // end namespace clang
290
291#endif // LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
292