UnwrappedLineParser.h revision 263508
1//===--- UnwrappedLineParser.h - Format C++ code ----------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file contains the declaration of the UnwrappedLineParser,
12/// which turns a stream of tokens into UnwrappedLines.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H
17#define LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H
18
19#include "clang/Basic/IdentifierTable.h"
20#include "clang/Format/Format.h"
21#include "FormatToken.h"
22#include <list>
23
24namespace clang {
25namespace format {
26
27struct UnwrappedLineNode;
28
29/// \brief An unwrapped line is a sequence of \c Token, that we would like to
30/// put on a single line if there was no column limit.
31///
32/// This is used as a main interface between the \c UnwrappedLineParser and the
33/// \c UnwrappedLineFormatter. The key property is that changing the formatting
34/// within an unwrapped line does not affect any other unwrapped lines.
35struct UnwrappedLine {
36  UnwrappedLine();
37
38  // FIXME: Don't use std::list here.
39  /// \brief The \c Tokens comprising this \c UnwrappedLine.
40  std::list<UnwrappedLineNode> Tokens;
41
42  /// \brief The indent level of the \c UnwrappedLine.
43  unsigned Level;
44
45  /// \brief Whether this \c UnwrappedLine is part of a preprocessor directive.
46  bool InPPDirective;
47
48  bool MustBeDeclaration;
49};
50
51class UnwrappedLineConsumer {
52public:
53  virtual ~UnwrappedLineConsumer() {}
54  virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0;
55  virtual void finishRun() = 0;
56};
57
58class FormatTokenSource;
59
60class UnwrappedLineParser {
61public:
62  UnwrappedLineParser(const FormatStyle &Style, ArrayRef<FormatToken *> Tokens,
63                      UnwrappedLineConsumer &Callback);
64
65  /// Returns true in case of a structural error.
66  bool parse();
67
68private:
69  void reset();
70  void parseFile();
71  void parseLevel(bool HasOpeningBrace);
72  void parseBlock(bool MustBeDeclaration, bool AddLevel = true,
73                  bool MunchSemi = true);
74  void parseChildBlock();
75  void parsePPDirective();
76  void parsePPDefine();
77  void parsePPIf(bool IfDef);
78  void parsePPElIf();
79  void parsePPElse();
80  void parsePPEndIf();
81  void parsePPUnknown();
82  void parseStructuralElement();
83  bool tryToParseBracedList();
84  bool parseBracedList(bool ContinueOnSemicolons = false);
85  void parseReturn();
86  void parseParens();
87  void parseIfThenElse();
88  void parseForOrWhileLoop();
89  void parseDoWhile();
90  void parseLabel();
91  void parseCaseLabel();
92  void parseSwitch();
93  void parseNamespace();
94  void parseAccessSpecifier();
95  void parseEnum();
96  void parseRecord();
97  void parseObjCProtocolList();
98  void parseObjCUntilAtEnd();
99  void parseObjCInterfaceOrImplementation();
100  void parseObjCProtocol();
101  void tryToParseLambda();
102  bool tryToParseLambdaIntroducer();
103  void addUnwrappedLine();
104  bool eof() const;
105  void nextToken();
106  void readToken();
107  void flushComments(bool NewlineBeforeNext);
108  void pushToken(FormatToken *Tok);
109  void calculateBraceTypes();
110  void pushPPConditional();
111
112  // FIXME: We are constantly running into bugs where Line.Level is incorrectly
113  // subtracted from beyond 0. Introduce a method to subtract from Line.Level
114  // and use that everywhere in the Parser.
115  OwningPtr<UnwrappedLine> Line;
116
117  // Comments are sorted into unwrapped lines by whether they are in the same
118  // line as the previous token, or not. If not, they belong to the next token.
119  // Since the next token might already be in a new unwrapped line, we need to
120  // store the comments belonging to that token.
121  SmallVector<FormatToken *, 1> CommentsBeforeNextToken;
122  FormatToken *FormatTok;
123  bool MustBreakBeforeNextToken;
124
125  // The parsed lines. Only added to through \c CurrentLines.
126  SmallVector<UnwrappedLine, 8> Lines;
127
128  // Preprocessor directives are parsed out-of-order from other unwrapped lines.
129  // Thus, we need to keep a list of preprocessor directives to be reported
130  // after an unwarpped line that has been started was finished.
131  SmallVector<UnwrappedLine, 4> PreprocessorDirectives;
132
133  // New unwrapped lines are added via CurrentLines.
134  // Usually points to \c &Lines. While parsing a preprocessor directive when
135  // there is an unfinished previous unwrapped line, will point to
136  // \c &PreprocessorDirectives.
137  SmallVectorImpl<UnwrappedLine> *CurrentLines;
138
139  // We store for each line whether it must be a declaration depending on
140  // whether we are in a compound statement or not.
141  std::vector<bool> DeclarationScopeStack;
142
143  // Will be true if we encounter an error that leads to possibily incorrect
144  // indentation levels.
145  bool StructuralError;
146
147  const FormatStyle &Style;
148  FormatTokenSource *Tokens;
149  UnwrappedLineConsumer &Callback;
150
151  // FIXME: This is a temporary measure until we have reworked the ownership
152  // of the format tokens. The goal is to have the actual tokens created and
153  // owned outside of and handed into the UnwrappedLineParser.
154  ArrayRef<FormatToken *> AllTokens;
155
156  // Represents preprocessor branch type, so we can find matching
157  // #if/#else/#endif directives.
158  enum PPBranchKind {
159    PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0
160    PP_Unreachable  // #if 0 or a conditional preprocessor block inside #if 0
161  };
162
163  // Keeps a stack of currently active preprocessor branching directives.
164  SmallVector<PPBranchKind, 16> PPStack;
165
166  // The \c UnwrappedLineParser re-parses the code for each combination
167  // of preprocessor branches that can be taken.
168  // To that end, we take the same branch (#if, #else, or one of the #elif
169  // branches) for each nesting level of preprocessor branches.
170  // \c PPBranchLevel stores the current nesting level of preprocessor
171  // branches during one pass over the code.
172  int PPBranchLevel;
173
174  // Contains the current branch (#if, #else or one of the #elif branches)
175  // for each nesting level.
176  SmallVector<int, 8> PPLevelBranchIndex;
177
178  // Contains the maximum number of branches at each nesting level.
179  SmallVector<int, 8> PPLevelBranchCount;
180
181  // Contains the number of branches per nesting level we are currently
182  // in while parsing a preprocessor branch sequence.
183  // This is used to update PPLevelBranchCount at the end of a branch
184  // sequence.
185  std::stack<int> PPChainBranchIndex;
186
187  friend class ScopedLineState;
188};
189
190struct UnwrappedLineNode {
191  UnwrappedLineNode() : Tok(NULL) {}
192  UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {}
193
194  FormatToken *Tok;
195  SmallVector<UnwrappedLine, 0> Children;
196};
197
198inline UnwrappedLine::UnwrappedLine()
199    : Level(0), InPPDirective(false), MustBeDeclaration(false) {}
200
201} // end namespace format
202} // end namespace clang
203
204#endif // LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H
205