1//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file implements a token annotator, i.e. creates 12/// \c AnnotatedTokens out of \c FormatTokens with required extra information. 13/// 14//===----------------------------------------------------------------------===// 15 16#ifndef LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H 17#define LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H 18 19#include "UnwrappedLineParser.h" 20#include "clang/Basic/OperatorPrecedence.h" 21#include "clang/Format/Format.h" 22#include <string> 23 24namespace clang { 25class Lexer; 26class SourceManager; 27 28namespace format { 29 30enum TokenType { 31 TT_BinaryOperator, 32 TT_BlockComment, 33 TT_CastRParen, 34 TT_ConditionalExpr, 35 TT_CtorInitializerColon, 36 TT_ImplicitStringLiteral, 37 TT_InlineASMColon, 38 TT_InheritanceColon, 39 TT_LineComment, 40 TT_ObjCArrayLiteral, 41 TT_ObjCBlockLParen, 42 TT_ObjCDecl, 43 TT_ObjCForIn, 44 TT_ObjCMethodExpr, 45 TT_ObjCMethodSpecifier, 46 TT_ObjCProperty, 47 TT_ObjCSelectorName, 48 TT_OverloadedOperatorLParen, 49 TT_PointerOrReference, 50 TT_PureVirtualSpecifier, 51 TT_RangeBasedForLoopColon, 52 TT_StartOfName, 53 TT_TemplateCloser, 54 TT_TemplateOpener, 55 TT_TrailingUnaryOperator, 56 TT_UnaryOperator, 57 TT_Unknown 58}; 59 60enum LineType { 61 LT_Invalid, 62 LT_Other, 63 LT_BuilderTypeCall, 64 LT_PreprocessorDirective, 65 LT_VirtualFunctionDecl, 66 LT_ObjCDecl, // An @interface, @implementation, or @protocol line. 67 LT_ObjCMethodDecl, 68 LT_ObjCProperty // An @property line. 69}; 70 71class AnnotatedToken { 72public: 73 explicit AnnotatedToken(const FormatToken &FormatTok) 74 : FormatTok(FormatTok), Type(TT_Unknown), SpacesRequiredBefore(0), 75 CanBreakBefore(false), MustBreakBefore(false), 76 ClosesTemplateDeclaration(false), MatchingParen(NULL), 77 ParameterCount(0), BindingStrength(0), SplitPenalty(0), 78 LongestObjCSelectorName(0), Parent(NULL), 79 FakeRParens(0), LastInChainOfCalls(false), 80 PartOfMultiVariableDeclStmt(false), NoMoreTokensOnLevel(false) {} 81 82 bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); } 83 84 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const { 85 return is(K1) || is(K2); 86 } 87 88 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const { 89 return is(K1) || is(K2) || is(K3); 90 } 91 92 bool isOneOf( 93 tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3, 94 tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS, 95 tok::TokenKind K6 = tok::NUM_TOKENS, tok::TokenKind K7 = tok::NUM_TOKENS, 96 tok::TokenKind K8 = tok::NUM_TOKENS, tok::TokenKind K9 = tok::NUM_TOKENS, 97 tok::TokenKind K10 = tok::NUM_TOKENS, 98 tok::TokenKind K11 = tok::NUM_TOKENS, 99 tok::TokenKind K12 = tok::NUM_TOKENS) const { 100 return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) || 101 is(K8) || is(K9) || is(K10) || is(K11) || is(K12); 102 } 103 104 bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); } 105 106 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { 107 return FormatTok.Tok.isObjCAtKeyword(Kind); 108 } 109 110 bool isAccessSpecifier(bool ColonRequired = true) const { 111 return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) && 112 (!ColonRequired || 113 (!Children.empty() && Children[0].is(tok::colon))); 114 } 115 116 bool isObjCAccessSpecifier() const { 117 return is(tok::at) && !Children.empty() && 118 (Children[0].isObjCAtKeyword(tok::objc_public) || 119 Children[0].isObjCAtKeyword(tok::objc_protected) || 120 Children[0].isObjCAtKeyword(tok::objc_package) || 121 Children[0].isObjCAtKeyword(tok::objc_private)); 122 } 123 124 /// \brief Returns whether \p Tok is ([{ or a template opening <. 125 bool opensScope() const; 126 /// \brief Returns whether \p Tok is )]} or a template opening >. 127 bool closesScope() const; 128 129 bool isUnaryOperator() const; 130 bool isBinaryOperator() const; 131 bool isTrailingComment() const; 132 133 FormatToken FormatTok; 134 135 TokenType Type; 136 137 unsigned SpacesRequiredBefore; 138 bool CanBreakBefore; 139 bool MustBreakBefore; 140 141 bool ClosesTemplateDeclaration; 142 143 AnnotatedToken *MatchingParen; 144 145 /// \brief Number of parameters, if this is "(", "[" or "<". 146 /// 147 /// This is initialized to 1 as we don't need to distinguish functions with 148 /// 0 parameters from functions with 1 parameter. Thus, we can simply count 149 /// the number of commas. 150 unsigned ParameterCount; 151 152 /// \brief The total length of the line up to and including this token. 153 unsigned TotalLength; 154 155 // FIXME: Come up with a 'cleaner' concept. 156 /// \brief The binding strength of a token. This is a combined value of 157 /// operator precedence, parenthesis nesting, etc. 158 unsigned BindingStrength; 159 160 /// \brief Penalty for inserting a line break before this token. 161 unsigned SplitPenalty; 162 163 /// \brief If this is the first ObjC selector name in an ObjC method 164 /// definition or call, this contains the length of the longest name. 165 unsigned LongestObjCSelectorName; 166 167 std::vector<AnnotatedToken> Children; 168 AnnotatedToken *Parent; 169 170 /// \brief Stores the number of required fake parentheses and the 171 /// corresponding operator precedence. 172 /// 173 /// If multiple fake parentheses start at a token, this vector stores them in 174 /// reverse order, i.e. inner fake parenthesis first. 175 SmallVector<prec::Level, 4> FakeLParens; 176 /// \brief Insert this many fake ) after this token for correct indentation. 177 unsigned FakeRParens; 178 179 /// \brief Is this the last "." or "->" in a builder-type call? 180 bool LastInChainOfCalls; 181 182 /// \brief Is this token part of a \c DeclStmt defining multiple variables? 183 /// 184 /// Only set if \c Type == \c TT_StartOfName. 185 bool PartOfMultiVariableDeclStmt; 186 187 /// \brief Set to \c true for "("-tokens if this is the last token other than 188 /// ")" in the next higher parenthesis level. 189 /// 190 /// If this is \c true, no more formatting decisions have to be made on the 191 /// next higher parenthesis level, enabling optimizations. 192 /// 193 /// Example: 194 /// \code 195 /// aaaaaa(aaaaaa()); 196 /// ^ // Set to true for this parenthesis. 197 /// \endcode 198 bool NoMoreTokensOnLevel; 199 200 /// \brief Returns the previous token ignoring comments. 201 AnnotatedToken *getPreviousNoneComment() const; 202 203 /// \brief Returns the next token ignoring comments. 204 const AnnotatedToken *getNextNoneComment() const; 205}; 206 207class AnnotatedLine { 208public: 209 AnnotatedLine(const UnwrappedLine &Line) 210 : First(Line.Tokens.front()), Level(Line.Level), 211 InPPDirective(Line.InPPDirective), 212 MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), 213 StartsDefinition(false) { 214 assert(!Line.Tokens.empty()); 215 AnnotatedToken *Current = &First; 216 for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(), 217 E = Line.Tokens.end(); 218 I != E; ++I) { 219 Current->Children.push_back(AnnotatedToken(*I)); 220 Current->Children[0].Parent = Current; 221 Current = &Current->Children[0]; 222 } 223 Last = Current; 224 } 225 AnnotatedLine(const AnnotatedLine &Other) 226 : First(Other.First), Type(Other.Type), Level(Other.Level), 227 InPPDirective(Other.InPPDirective), 228 MustBeDeclaration(Other.MustBeDeclaration), 229 MightBeFunctionDecl(Other.MightBeFunctionDecl), 230 StartsDefinition(Other.StartsDefinition) { 231 Last = &First; 232 while (!Last->Children.empty()) { 233 Last->Children[0].Parent = Last; 234 Last = &Last->Children[0]; 235 } 236 } 237 238 AnnotatedToken First; 239 AnnotatedToken *Last; 240 241 LineType Type; 242 unsigned Level; 243 bool InPPDirective; 244 bool MustBeDeclaration; 245 bool MightBeFunctionDecl; 246 bool StartsDefinition; 247}; 248 249inline prec::Level getPrecedence(const AnnotatedToken &Tok) { 250 return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true); 251} 252 253/// \brief Determines extra information about the tokens comprising an 254/// \c UnwrappedLine. 255class TokenAnnotator { 256public: 257 TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex, 258 IdentifierInfo &Ident_in) 259 : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Ident_in(Ident_in) { 260 } 261 262 void annotate(AnnotatedLine &Line); 263 void calculateFormattingInformation(AnnotatedLine &Line); 264 265private: 266 /// \brief Calculate the penalty for splitting before \c Tok. 267 unsigned splitPenalty(const AnnotatedLine &Line, const AnnotatedToken &Tok); 268 269 bool spaceRequiredBetween(const AnnotatedLine &Line, 270 const AnnotatedToken &Left, 271 const AnnotatedToken &Right); 272 273 bool spaceRequiredBefore(const AnnotatedLine &Line, 274 const AnnotatedToken &Tok); 275 276 bool canBreakBefore(const AnnotatedLine &Line, const AnnotatedToken &Right); 277 278 void printDebugInfo(const AnnotatedLine &Line); 279 280 const FormatStyle &Style; 281 SourceManager &SourceMgr; 282 Lexer &Lex; 283 284 // Contextual keywords: 285 IdentifierInfo &Ident_in; 286}; 287 288} // end namespace format 289} // end namespace clang 290 291#endif // LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H 292