1249261Sdim//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===// 2249261Sdim// 3249261Sdim// The LLVM Compiler Infrastructure 4249261Sdim// 5249261Sdim// This file is distributed under the University of Illinois Open Source 6249261Sdim// License. See LICENSE.TXT for details. 7249261Sdim// 8249261Sdim//===----------------------------------------------------------------------===// 9249261Sdim/// 10249261Sdim/// \file 11249261Sdim/// \brief This file implements a token annotator, i.e. creates 12249261Sdim/// \c AnnotatedTokens out of \c FormatTokens with required extra information. 13249261Sdim/// 14249261Sdim//===----------------------------------------------------------------------===// 15249261Sdim 16249261Sdim#ifndef LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H 17249261Sdim#define LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H 18249261Sdim 19249261Sdim#include "UnwrappedLineParser.h" 20249261Sdim#include "clang/Basic/OperatorPrecedence.h" 21249261Sdim#include "clang/Format/Format.h" 22249261Sdim#include <string> 23249261Sdim 24249261Sdimnamespace clang { 25249261Sdimclass Lexer; 26249261Sdimclass SourceManager; 27249261Sdim 28249261Sdimnamespace format { 29249261Sdim 30249261Sdimenum TokenType { 31249261Sdim TT_BinaryOperator, 32249261Sdim TT_BlockComment, 33249261Sdim TT_CastRParen, 34249261Sdim TT_ConditionalExpr, 35249261Sdim TT_CtorInitializerColon, 36249261Sdim TT_ImplicitStringLiteral, 37249261Sdim TT_InlineASMColon, 38249261Sdim TT_InheritanceColon, 39249261Sdim TT_LineComment, 40249261Sdim TT_ObjCArrayLiteral, 41249261Sdim TT_ObjCBlockLParen, 42249261Sdim TT_ObjCDecl, 43249261Sdim TT_ObjCForIn, 44249261Sdim TT_ObjCMethodExpr, 45249261Sdim TT_ObjCMethodSpecifier, 46249261Sdim TT_ObjCProperty, 47249261Sdim TT_ObjCSelectorName, 48249261Sdim TT_OverloadedOperatorLParen, 49249261Sdim TT_PointerOrReference, 50249261Sdim TT_PureVirtualSpecifier, 51249261Sdim TT_RangeBasedForLoopColon, 52249261Sdim TT_StartOfName, 53249261Sdim TT_TemplateCloser, 54249261Sdim TT_TemplateOpener, 55249261Sdim TT_TrailingUnaryOperator, 56249261Sdim TT_UnaryOperator, 57249261Sdim TT_Unknown 58249261Sdim}; 59249261Sdim 60249261Sdimenum LineType { 61249261Sdim LT_Invalid, 62249261Sdim LT_Other, 63249261Sdim LT_BuilderTypeCall, 64249261Sdim LT_PreprocessorDirective, 65249261Sdim LT_VirtualFunctionDecl, 66249261Sdim LT_ObjCDecl, // An @interface, @implementation, or @protocol line. 67249261Sdim LT_ObjCMethodDecl, 68249261Sdim LT_ObjCProperty // An @property line. 69249261Sdim}; 70249261Sdim 71249261Sdimclass AnnotatedToken { 72249261Sdimpublic: 73249261Sdim explicit AnnotatedToken(const FormatToken &FormatTok) 74249261Sdim : FormatTok(FormatTok), Type(TT_Unknown), SpacesRequiredBefore(0), 75249261Sdim CanBreakBefore(false), MustBreakBefore(false), 76249261Sdim ClosesTemplateDeclaration(false), MatchingParen(NULL), 77249261Sdim ParameterCount(0), BindingStrength(0), SplitPenalty(0), 78251662Sdim LongestObjCSelectorName(0), Parent(NULL), 79249261Sdim FakeRParens(0), LastInChainOfCalls(false), 80251662Sdim PartOfMultiVariableDeclStmt(false), NoMoreTokensOnLevel(false) {} 81249261Sdim 82249261Sdim bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); } 83249261Sdim 84249261Sdim bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const { 85249261Sdim return is(K1) || is(K2); 86249261Sdim } 87249261Sdim 88249261Sdim bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const { 89249261Sdim return is(K1) || is(K2) || is(K3); 90249261Sdim } 91249261Sdim 92249261Sdim bool isOneOf( 93249261Sdim tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3, 94249261Sdim tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS, 95249261Sdim tok::TokenKind K6 = tok::NUM_TOKENS, tok::TokenKind K7 = tok::NUM_TOKENS, 96249261Sdim tok::TokenKind K8 = tok::NUM_TOKENS, tok::TokenKind K9 = tok::NUM_TOKENS, 97249261Sdim tok::TokenKind K10 = tok::NUM_TOKENS, 98249261Sdim tok::TokenKind K11 = tok::NUM_TOKENS, 99249261Sdim tok::TokenKind K12 = tok::NUM_TOKENS) const { 100249261Sdim return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) || 101249261Sdim is(K8) || is(K9) || is(K10) || is(K11) || is(K12); 102249261Sdim } 103249261Sdim 104249261Sdim bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); } 105249261Sdim 106249261Sdim bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { 107249261Sdim return FormatTok.Tok.isObjCAtKeyword(Kind); 108249261Sdim } 109249261Sdim 110249261Sdim bool isAccessSpecifier(bool ColonRequired = true) const { 111249261Sdim return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) && 112249261Sdim (!ColonRequired || 113249261Sdim (!Children.empty() && Children[0].is(tok::colon))); 114249261Sdim } 115249261Sdim 116249261Sdim bool isObjCAccessSpecifier() const { 117249261Sdim return is(tok::at) && !Children.empty() && 118249261Sdim (Children[0].isObjCAtKeyword(tok::objc_public) || 119249261Sdim Children[0].isObjCAtKeyword(tok::objc_protected) || 120249261Sdim Children[0].isObjCAtKeyword(tok::objc_package) || 121249261Sdim Children[0].isObjCAtKeyword(tok::objc_private)); 122249261Sdim } 123249261Sdim 124251662Sdim /// \brief Returns whether \p Tok is ([{ or a template opening <. 125251662Sdim bool opensScope() const; 126251662Sdim /// \brief Returns whether \p Tok is )]} or a template opening >. 127251662Sdim bool closesScope() const; 128251662Sdim 129251662Sdim bool isUnaryOperator() const; 130251662Sdim bool isBinaryOperator() const; 131251662Sdim bool isTrailingComment() const; 132251662Sdim 133249261Sdim FormatToken FormatTok; 134249261Sdim 135249261Sdim TokenType Type; 136249261Sdim 137249261Sdim unsigned SpacesRequiredBefore; 138249261Sdim bool CanBreakBefore; 139249261Sdim bool MustBreakBefore; 140249261Sdim 141249261Sdim bool ClosesTemplateDeclaration; 142249261Sdim 143249261Sdim AnnotatedToken *MatchingParen; 144249261Sdim 145249261Sdim /// \brief Number of parameters, if this is "(", "[" or "<". 146249261Sdim /// 147249261Sdim /// This is initialized to 1 as we don't need to distinguish functions with 148249261Sdim /// 0 parameters from functions with 1 parameter. Thus, we can simply count 149249261Sdim /// the number of commas. 150249261Sdim unsigned ParameterCount; 151249261Sdim 152249261Sdim /// \brief The total length of the line up to and including this token. 153249261Sdim unsigned TotalLength; 154249261Sdim 155249261Sdim // FIXME: Come up with a 'cleaner' concept. 156249261Sdim /// \brief The binding strength of a token. This is a combined value of 157249261Sdim /// operator precedence, parenthesis nesting, etc. 158249261Sdim unsigned BindingStrength; 159249261Sdim 160249261Sdim /// \brief Penalty for inserting a line break before this token. 161249261Sdim unsigned SplitPenalty; 162249261Sdim 163249261Sdim /// \brief If this is the first ObjC selector name in an ObjC method 164249261Sdim /// definition or call, this contains the length of the longest name. 165249261Sdim unsigned LongestObjCSelectorName; 166249261Sdim 167249261Sdim std::vector<AnnotatedToken> Children; 168249261Sdim AnnotatedToken *Parent; 169249261Sdim 170251662Sdim /// \brief Stores the number of required fake parentheses and the 171251662Sdim /// corresponding operator precedence. 172251662Sdim /// 173251662Sdim /// If multiple fake parentheses start at a token, this vector stores them in 174251662Sdim /// reverse order, i.e. inner fake parenthesis first. 175251662Sdim SmallVector<prec::Level, 4> FakeLParens; 176249261Sdim /// \brief Insert this many fake ) after this token for correct indentation. 177249261Sdim unsigned FakeRParens; 178249261Sdim 179249261Sdim /// \brief Is this the last "." or "->" in a builder-type call? 180249261Sdim bool LastInChainOfCalls; 181249261Sdim 182249261Sdim /// \brief Is this token part of a \c DeclStmt defining multiple variables? 183249261Sdim /// 184249261Sdim /// Only set if \c Type == \c TT_StartOfName. 185249261Sdim bool PartOfMultiVariableDeclStmt; 186249261Sdim 187251662Sdim /// \brief Set to \c true for "("-tokens if this is the last token other than 188251662Sdim /// ")" in the next higher parenthesis level. 189251662Sdim /// 190251662Sdim /// If this is \c true, no more formatting decisions have to be made on the 191251662Sdim /// next higher parenthesis level, enabling optimizations. 192251662Sdim /// 193251662Sdim /// Example: 194251662Sdim /// \code 195251662Sdim /// aaaaaa(aaaaaa()); 196251662Sdim /// ^ // Set to true for this parenthesis. 197251662Sdim /// \endcode 198251662Sdim bool NoMoreTokensOnLevel; 199251662Sdim 200251662Sdim /// \brief Returns the previous token ignoring comments. 201251662Sdim AnnotatedToken *getPreviousNoneComment() const; 202251662Sdim 203251662Sdim /// \brief Returns the next token ignoring comments. 204251662Sdim const AnnotatedToken *getNextNoneComment() const; 205249261Sdim}; 206249261Sdim 207249261Sdimclass AnnotatedLine { 208249261Sdimpublic: 209249261Sdim AnnotatedLine(const UnwrappedLine &Line) 210249261Sdim : First(Line.Tokens.front()), Level(Line.Level), 211249261Sdim InPPDirective(Line.InPPDirective), 212251662Sdim MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), 213251662Sdim StartsDefinition(false) { 214249261Sdim assert(!Line.Tokens.empty()); 215249261Sdim AnnotatedToken *Current = &First; 216249261Sdim for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(), 217249261Sdim E = Line.Tokens.end(); 218249261Sdim I != E; ++I) { 219249261Sdim Current->Children.push_back(AnnotatedToken(*I)); 220249261Sdim Current->Children[0].Parent = Current; 221249261Sdim Current = &Current->Children[0]; 222249261Sdim } 223249261Sdim Last = Current; 224249261Sdim } 225249261Sdim AnnotatedLine(const AnnotatedLine &Other) 226249261Sdim : First(Other.First), Type(Other.Type), Level(Other.Level), 227249261Sdim InPPDirective(Other.InPPDirective), 228249261Sdim MustBeDeclaration(Other.MustBeDeclaration), 229251662Sdim MightBeFunctionDecl(Other.MightBeFunctionDecl), 230251662Sdim StartsDefinition(Other.StartsDefinition) { 231249261Sdim Last = &First; 232249261Sdim while (!Last->Children.empty()) { 233249261Sdim Last->Children[0].Parent = Last; 234249261Sdim Last = &Last->Children[0]; 235249261Sdim } 236249261Sdim } 237249261Sdim 238249261Sdim AnnotatedToken First; 239249261Sdim AnnotatedToken *Last; 240249261Sdim 241249261Sdim LineType Type; 242249261Sdim unsigned Level; 243249261Sdim bool InPPDirective; 244249261Sdim bool MustBeDeclaration; 245249261Sdim bool MightBeFunctionDecl; 246251662Sdim bool StartsDefinition; 247249261Sdim}; 248249261Sdim 249249261Sdiminline prec::Level getPrecedence(const AnnotatedToken &Tok) { 250249261Sdim return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true); 251249261Sdim} 252249261Sdim 253249261Sdim/// \brief Determines extra information about the tokens comprising an 254249261Sdim/// \c UnwrappedLine. 255249261Sdimclass TokenAnnotator { 256249261Sdimpublic: 257249261Sdim TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex, 258249261Sdim IdentifierInfo &Ident_in) 259249261Sdim : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Ident_in(Ident_in) { 260249261Sdim } 261249261Sdim 262249261Sdim void annotate(AnnotatedLine &Line); 263249261Sdim void calculateFormattingInformation(AnnotatedLine &Line); 264249261Sdim 265249261Sdimprivate: 266249261Sdim /// \brief Calculate the penalty for splitting before \c Tok. 267249261Sdim unsigned splitPenalty(const AnnotatedLine &Line, const AnnotatedToken &Tok); 268249261Sdim 269249261Sdim bool spaceRequiredBetween(const AnnotatedLine &Line, 270249261Sdim const AnnotatedToken &Left, 271249261Sdim const AnnotatedToken &Right); 272249261Sdim 273249261Sdim bool spaceRequiredBefore(const AnnotatedLine &Line, 274249261Sdim const AnnotatedToken &Tok); 275249261Sdim 276249261Sdim bool canBreakBefore(const AnnotatedLine &Line, const AnnotatedToken &Right); 277249261Sdim 278251662Sdim void printDebugInfo(const AnnotatedLine &Line); 279251662Sdim 280249261Sdim const FormatStyle &Style; 281249261Sdim SourceManager &SourceMgr; 282249261Sdim Lexer &Lex; 283249261Sdim 284249261Sdim // Contextual keywords: 285249261Sdim IdentifierInfo &Ident_in; 286249261Sdim}; 287249261Sdim 288249261Sdim} // end namespace format 289249261Sdim} // end namespace clang 290249261Sdim 291249261Sdim#endif // LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H 292