1249261Sdim//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2249261Sdim//
3249261Sdim//                     The LLVM Compiler Infrastructure
4249261Sdim//
5249261Sdim// This file is distributed under the University of Illinois Open Source
6249261Sdim// License. See LICENSE.TXT for details.
7249261Sdim//
8249261Sdim//===----------------------------------------------------------------------===//
9249261Sdim///
10249261Sdim/// \file
11249261Sdim/// \brief This file implements a token annotator, i.e. creates
12249261Sdim/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
13249261Sdim///
14249261Sdim//===----------------------------------------------------------------------===//
15249261Sdim
16249261Sdim#ifndef LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
17249261Sdim#define LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
18249261Sdim
19249261Sdim#include "UnwrappedLineParser.h"
20249261Sdim#include "clang/Basic/OperatorPrecedence.h"
21249261Sdim#include "clang/Format/Format.h"
22249261Sdim#include <string>
23249261Sdim
24249261Sdimnamespace clang {
25249261Sdimclass Lexer;
26249261Sdimclass SourceManager;
27249261Sdim
28249261Sdimnamespace format {
29249261Sdim
30249261Sdimenum TokenType {
31249261Sdim  TT_BinaryOperator,
32249261Sdim  TT_BlockComment,
33249261Sdim  TT_CastRParen,
34249261Sdim  TT_ConditionalExpr,
35249261Sdim  TT_CtorInitializerColon,
36249261Sdim  TT_ImplicitStringLiteral,
37249261Sdim  TT_InlineASMColon,
38249261Sdim  TT_InheritanceColon,
39249261Sdim  TT_LineComment,
40249261Sdim  TT_ObjCArrayLiteral,
41249261Sdim  TT_ObjCBlockLParen,
42249261Sdim  TT_ObjCDecl,
43249261Sdim  TT_ObjCForIn,
44249261Sdim  TT_ObjCMethodExpr,
45249261Sdim  TT_ObjCMethodSpecifier,
46249261Sdim  TT_ObjCProperty,
47249261Sdim  TT_ObjCSelectorName,
48249261Sdim  TT_OverloadedOperatorLParen,
49249261Sdim  TT_PointerOrReference,
50249261Sdim  TT_PureVirtualSpecifier,
51249261Sdim  TT_RangeBasedForLoopColon,
52249261Sdim  TT_StartOfName,
53249261Sdim  TT_TemplateCloser,
54249261Sdim  TT_TemplateOpener,
55249261Sdim  TT_TrailingUnaryOperator,
56249261Sdim  TT_UnaryOperator,
57249261Sdim  TT_Unknown
58249261Sdim};
59249261Sdim
60249261Sdimenum LineType {
61249261Sdim  LT_Invalid,
62249261Sdim  LT_Other,
63249261Sdim  LT_BuilderTypeCall,
64249261Sdim  LT_PreprocessorDirective,
65249261Sdim  LT_VirtualFunctionDecl,
66249261Sdim  LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
67249261Sdim  LT_ObjCMethodDecl,
68249261Sdim  LT_ObjCProperty // An @property line.
69249261Sdim};
70249261Sdim
71249261Sdimclass AnnotatedToken {
72249261Sdimpublic:
73249261Sdim  explicit AnnotatedToken(const FormatToken &FormatTok)
74249261Sdim      : FormatTok(FormatTok), Type(TT_Unknown), SpacesRequiredBefore(0),
75249261Sdim        CanBreakBefore(false), MustBreakBefore(false),
76249261Sdim        ClosesTemplateDeclaration(false), MatchingParen(NULL),
77249261Sdim        ParameterCount(0), BindingStrength(0), SplitPenalty(0),
78251662Sdim        LongestObjCSelectorName(0), Parent(NULL),
79249261Sdim        FakeRParens(0), LastInChainOfCalls(false),
80251662Sdim        PartOfMultiVariableDeclStmt(false), NoMoreTokensOnLevel(false) {}
81249261Sdim
82249261Sdim  bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); }
83249261Sdim
84249261Sdim  bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
85249261Sdim    return is(K1) || is(K2);
86249261Sdim  }
87249261Sdim
88249261Sdim  bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const {
89249261Sdim    return is(K1) || is(K2) || is(K3);
90249261Sdim  }
91249261Sdim
92249261Sdim  bool isOneOf(
93249261Sdim      tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3,
94249261Sdim      tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS,
95249261Sdim      tok::TokenKind K6 = tok::NUM_TOKENS, tok::TokenKind K7 = tok::NUM_TOKENS,
96249261Sdim      tok::TokenKind K8 = tok::NUM_TOKENS, tok::TokenKind K9 = tok::NUM_TOKENS,
97249261Sdim      tok::TokenKind K10 = tok::NUM_TOKENS,
98249261Sdim      tok::TokenKind K11 = tok::NUM_TOKENS,
99249261Sdim      tok::TokenKind K12 = tok::NUM_TOKENS) const {
100249261Sdim    return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) ||
101249261Sdim           is(K8) || is(K9) || is(K10) || is(K11) || is(K12);
102249261Sdim  }
103249261Sdim
104249261Sdim  bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); }
105249261Sdim
106249261Sdim  bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
107249261Sdim    return FormatTok.Tok.isObjCAtKeyword(Kind);
108249261Sdim  }
109249261Sdim
110249261Sdim  bool isAccessSpecifier(bool ColonRequired = true) const {
111249261Sdim    return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
112249261Sdim           (!ColonRequired ||
113249261Sdim            (!Children.empty() && Children[0].is(tok::colon)));
114249261Sdim  }
115249261Sdim
116249261Sdim  bool isObjCAccessSpecifier() const {
117249261Sdim    return is(tok::at) && !Children.empty() &&
118249261Sdim           (Children[0].isObjCAtKeyword(tok::objc_public) ||
119249261Sdim            Children[0].isObjCAtKeyword(tok::objc_protected) ||
120249261Sdim            Children[0].isObjCAtKeyword(tok::objc_package) ||
121249261Sdim            Children[0].isObjCAtKeyword(tok::objc_private));
122249261Sdim  }
123249261Sdim
124251662Sdim  /// \brief Returns whether \p Tok is ([{ or a template opening <.
125251662Sdim  bool opensScope() const;
126251662Sdim  /// \brief Returns whether \p Tok is )]} or a template opening >.
127251662Sdim  bool closesScope() const;
128251662Sdim
129251662Sdim  bool isUnaryOperator() const;
130251662Sdim  bool isBinaryOperator() const;
131251662Sdim  bool isTrailingComment() const;
132251662Sdim
133249261Sdim  FormatToken FormatTok;
134249261Sdim
135249261Sdim  TokenType Type;
136249261Sdim
137249261Sdim  unsigned SpacesRequiredBefore;
138249261Sdim  bool CanBreakBefore;
139249261Sdim  bool MustBreakBefore;
140249261Sdim
141249261Sdim  bool ClosesTemplateDeclaration;
142249261Sdim
143249261Sdim  AnnotatedToken *MatchingParen;
144249261Sdim
145249261Sdim  /// \brief Number of parameters, if this is "(", "[" or "<".
146249261Sdim  ///
147249261Sdim  /// This is initialized to 1 as we don't need to distinguish functions with
148249261Sdim  /// 0 parameters from functions with 1 parameter. Thus, we can simply count
149249261Sdim  /// the number of commas.
150249261Sdim  unsigned ParameterCount;
151249261Sdim
152249261Sdim  /// \brief The total length of the line up to and including this token.
153249261Sdim  unsigned TotalLength;
154249261Sdim
155249261Sdim  // FIXME: Come up with a 'cleaner' concept.
156249261Sdim  /// \brief The binding strength of a token. This is a combined value of
157249261Sdim  /// operator precedence, parenthesis nesting, etc.
158249261Sdim  unsigned BindingStrength;
159249261Sdim
160249261Sdim  /// \brief Penalty for inserting a line break before this token.
161249261Sdim  unsigned SplitPenalty;
162249261Sdim
163249261Sdim  /// \brief If this is the first ObjC selector name in an ObjC method
164249261Sdim  /// definition or call, this contains the length of the longest name.
165249261Sdim  unsigned LongestObjCSelectorName;
166249261Sdim
167249261Sdim  std::vector<AnnotatedToken> Children;
168249261Sdim  AnnotatedToken *Parent;
169249261Sdim
170251662Sdim  /// \brief Stores the number of required fake parentheses and the
171251662Sdim  /// corresponding operator precedence.
172251662Sdim  ///
173251662Sdim  /// If multiple fake parentheses start at a token, this vector stores them in
174251662Sdim  /// reverse order, i.e. inner fake parenthesis first.
175251662Sdim  SmallVector<prec::Level, 4>  FakeLParens;
176249261Sdim  /// \brief Insert this many fake ) after this token for correct indentation.
177249261Sdim  unsigned FakeRParens;
178249261Sdim
179249261Sdim  /// \brief Is this the last "." or "->" in a builder-type call?
180249261Sdim  bool LastInChainOfCalls;
181249261Sdim
182249261Sdim  /// \brief Is this token part of a \c DeclStmt defining multiple variables?
183249261Sdim  ///
184249261Sdim  /// Only set if \c Type == \c TT_StartOfName.
185249261Sdim  bool PartOfMultiVariableDeclStmt;
186249261Sdim
187251662Sdim  /// \brief Set to \c true for "("-tokens if this is the last token other than
188251662Sdim  /// ")" in the next higher parenthesis level.
189251662Sdim  ///
190251662Sdim  /// If this is \c true, no more formatting decisions have to be made on the
191251662Sdim  /// next higher parenthesis level, enabling optimizations.
192251662Sdim  ///
193251662Sdim  /// Example:
194251662Sdim  /// \code
195251662Sdim  /// aaaaaa(aaaaaa());
196251662Sdim  ///              ^  // Set to true for this parenthesis.
197251662Sdim  /// \endcode
198251662Sdim  bool NoMoreTokensOnLevel;
199251662Sdim
200251662Sdim  /// \brief Returns the previous token ignoring comments.
201251662Sdim  AnnotatedToken *getPreviousNoneComment() const;
202251662Sdim
203251662Sdim  /// \brief Returns the next token ignoring comments.
204251662Sdim  const AnnotatedToken *getNextNoneComment() const;
205249261Sdim};
206249261Sdim
207249261Sdimclass AnnotatedLine {
208249261Sdimpublic:
209249261Sdim  AnnotatedLine(const UnwrappedLine &Line)
210249261Sdim      : First(Line.Tokens.front()), Level(Line.Level),
211249261Sdim        InPPDirective(Line.InPPDirective),
212251662Sdim        MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
213251662Sdim        StartsDefinition(false) {
214249261Sdim    assert(!Line.Tokens.empty());
215249261Sdim    AnnotatedToken *Current = &First;
216249261Sdim    for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(),
217249261Sdim                                                E = Line.Tokens.end();
218249261Sdim         I != E; ++I) {
219249261Sdim      Current->Children.push_back(AnnotatedToken(*I));
220249261Sdim      Current->Children[0].Parent = Current;
221249261Sdim      Current = &Current->Children[0];
222249261Sdim    }
223249261Sdim    Last = Current;
224249261Sdim  }
225249261Sdim  AnnotatedLine(const AnnotatedLine &Other)
226249261Sdim      : First(Other.First), Type(Other.Type), Level(Other.Level),
227249261Sdim        InPPDirective(Other.InPPDirective),
228249261Sdim        MustBeDeclaration(Other.MustBeDeclaration),
229251662Sdim        MightBeFunctionDecl(Other.MightBeFunctionDecl),
230251662Sdim        StartsDefinition(Other.StartsDefinition) {
231249261Sdim    Last = &First;
232249261Sdim    while (!Last->Children.empty()) {
233249261Sdim      Last->Children[0].Parent = Last;
234249261Sdim      Last = &Last->Children[0];
235249261Sdim    }
236249261Sdim  }
237249261Sdim
238249261Sdim  AnnotatedToken First;
239249261Sdim  AnnotatedToken *Last;
240249261Sdim
241249261Sdim  LineType Type;
242249261Sdim  unsigned Level;
243249261Sdim  bool InPPDirective;
244249261Sdim  bool MustBeDeclaration;
245249261Sdim  bool MightBeFunctionDecl;
246251662Sdim  bool StartsDefinition;
247249261Sdim};
248249261Sdim
249249261Sdiminline prec::Level getPrecedence(const AnnotatedToken &Tok) {
250249261Sdim  return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true);
251249261Sdim}
252249261Sdim
253249261Sdim/// \brief Determines extra information about the tokens comprising an
254249261Sdim/// \c UnwrappedLine.
255249261Sdimclass TokenAnnotator {
256249261Sdimpublic:
257249261Sdim  TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex,
258249261Sdim                 IdentifierInfo &Ident_in)
259249261Sdim      : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Ident_in(Ident_in) {
260249261Sdim  }
261249261Sdim
262249261Sdim  void annotate(AnnotatedLine &Line);
263249261Sdim  void calculateFormattingInformation(AnnotatedLine &Line);
264249261Sdim
265249261Sdimprivate:
266249261Sdim  /// \brief Calculate the penalty for splitting before \c Tok.
267249261Sdim  unsigned splitPenalty(const AnnotatedLine &Line, const AnnotatedToken &Tok);
268249261Sdim
269249261Sdim  bool spaceRequiredBetween(const AnnotatedLine &Line,
270249261Sdim                            const AnnotatedToken &Left,
271249261Sdim                            const AnnotatedToken &Right);
272249261Sdim
273249261Sdim  bool spaceRequiredBefore(const AnnotatedLine &Line,
274249261Sdim                           const AnnotatedToken &Tok);
275249261Sdim
276249261Sdim  bool canBreakBefore(const AnnotatedLine &Line, const AnnotatedToken &Right);
277249261Sdim
278251662Sdim  void printDebugInfo(const AnnotatedLine &Line);
279251662Sdim
280249261Sdim  const FormatStyle &Style;
281249261Sdim  SourceManager &SourceMgr;
282249261Sdim  Lexer &Lex;
283249261Sdim
284249261Sdim  // Contextual keywords:
285249261Sdim  IdentifierInfo &Ident_in;
286249261Sdim};
287249261Sdim
288249261Sdim} // end namespace format
289249261Sdim} // end namespace clang
290249261Sdim
291249261Sdim#endif // LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
292