ContinuationIndenter.h revision 263508
1//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements an indenter that manages the indentation of
12/// continuations.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H
17#define LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H
18
19#include "Encoding.h"
20#include "clang/Format/Format.h"
21
22namespace clang {
23class SourceManager;
24
25namespace format {
26
27class AnnotatedLine;
28struct FormatToken;
29struct LineState;
30struct ParenState;
31class WhitespaceManager;
32
33class ContinuationIndenter {
34public:
35  /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
36  /// column \p FirstIndent.
37  ContinuationIndenter(const FormatStyle &Style, SourceManager &SourceMgr,
38                       WhitespaceManager &Whitespaces,
39                       encoding::Encoding Encoding,
40                       bool BinPackInconclusiveFunctions);
41
42  /// \brief Get the initial state, i.e. the state after placing \p Line's
43  /// first token at \p FirstIndent.
44  LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,
45                            bool DryRun);
46
47  // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
48  // better home.
49  /// \brief Returns \c true, if a line break after \p State is allowed.
50  bool canBreak(const LineState &State);
51
52  /// \brief Returns \c true, if a line break after \p State is mandatory.
53  bool mustBreak(const LineState &State);
54
55  /// \brief Appends the next token to \p State and updates information
56  /// necessary for indentation.
57  ///
58  /// Puts the token on the current line if \p Newline is \c false and adds a
59  /// line break and necessary indentation otherwise.
60  ///
61  /// If \p DryRun is \c false, also creates and stores the required
62  /// \c Replacement.
63  unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
64                           unsigned ExtraSpaces = 0);
65
66  /// \brief Get the column limit for this line. This is the style's column
67  /// limit, potentially reduced for preprocessor definitions.
68  unsigned getColumnLimit(const LineState &State) const;
69
70private:
71  /// \brief Mark the next token as consumed in \p State and modify its stacks
72  /// accordingly.
73  unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
74
75  /// \brief If the current token sticks out over the end of the line, break
76  /// it if possible.
77  ///
78  /// \returns An extra penalty if a token was broken, otherwise 0.
79  ///
80  /// The returned penalty will cover the cost of the additional line breaks and
81  /// column limit violation in all lines except for the last one. The penalty
82  /// for the column limit violation in the last line (and in single line
83  /// tokens) is handled in \c addNextStateToQueue.
84  unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
85                                bool DryRun);
86
87  /// \brief Appends the next token to \p State and updates information
88  /// necessary for indentation.
89  ///
90  /// Puts the token on the current line.
91  ///
92  /// If \p DryRun is \c false, also creates and stores the required
93  /// \c Replacement.
94  void addTokenOnCurrentLine(LineState &State, bool DryRun,
95                             unsigned ExtraSpaces);
96
97  /// \brief Appends the next token to \p State and updates information
98  /// necessary for indentation.
99  ///
100  /// Adds a line break and necessary indentation.
101  ///
102  /// If \p DryRun is \c false, also creates and stores the required
103  /// \c Replacement.
104  unsigned addTokenOnNewLine(LineState &State, bool DryRun);
105
106  /// \brief Adds a multiline token to the \p State.
107  ///
108  /// \returns Extra penalty for the first line of the literal: last line is
109  /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
110  /// matter, as we don't change them.
111  unsigned addMultilineToken(const FormatToken &Current, LineState &State);
112
113  /// \brief Returns \c true if the next token starts a multiline string
114  /// literal.
115  ///
116  /// This includes implicitly concatenated strings, strings that will be broken
117  /// by clang-format and string literals with escaped newlines.
118  bool NextIsMultilineString(const LineState &State);
119
120  FormatStyle Style;
121  SourceManager &SourceMgr;
122  WhitespaceManager &Whitespaces;
123  encoding::Encoding Encoding;
124  bool BinPackInconclusiveFunctions;
125};
126
127struct ParenState {
128  ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace,
129             bool AvoidBinPacking, bool NoLineBreak)
130      : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace),
131        FirstLessLess(0), BreakBeforeClosingBrace(false), QuestionColumn(0),
132        AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
133        NoLineBreak(NoLineBreak), ColonPos(0), StartOfFunctionCall(0),
134        StartOfArraySubscripts(0), NestedNameSpecifierContinuation(0),
135        CallContinuation(0), VariablePos(0), ContainsLineBreak(false),
136        ContainsUnwrappedBuilder(0) {}
137
138  /// \brief The position to which a specific parenthesis level needs to be
139  /// indented.
140  unsigned Indent;
141
142  /// \brief The number of indentation levels of the block.
143  unsigned IndentLevel;
144
145  /// \brief The position of the last space on each level.
146  ///
147  /// Used e.g. to break like:
148  /// functionCall(Parameter, otherCall(
149  ///                             OtherParameter));
150  unsigned LastSpace;
151
152  /// \brief The position the first "<<" operator encountered on each level.
153  ///
154  /// Used to align "<<" operators. 0 if no such operator has been encountered
155  /// on a level.
156  unsigned FirstLessLess;
157
158  /// \brief Whether a newline needs to be inserted before the block's closing
159  /// brace.
160  ///
161  /// We only want to insert a newline before the closing brace if there also
162  /// was a newline after the beginning left brace.
163  bool BreakBeforeClosingBrace;
164
165  /// \brief The column of a \c ? in a conditional expression;
166  unsigned QuestionColumn;
167
168  /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
169  /// lines, in this context.
170  bool AvoidBinPacking;
171
172  /// \brief Break after the next comma (or all the commas in this context if
173  /// \c AvoidBinPacking is \c true).
174  bool BreakBeforeParameter;
175
176  /// \brief Line breaking in this context would break a formatting rule.
177  bool NoLineBreak;
178
179  /// \brief The position of the colon in an ObjC method declaration/call.
180  unsigned ColonPos;
181
182  /// \brief The start of the most recent function in a builder-type call.
183  unsigned StartOfFunctionCall;
184
185  /// \brief Contains the start of array subscript expressions, so that they
186  /// can be aligned.
187  unsigned StartOfArraySubscripts;
188
189  /// \brief If a nested name specifier was broken over multiple lines, this
190  /// contains the start column of the second line. Otherwise 0.
191  unsigned NestedNameSpecifierContinuation;
192
193  /// \brief If a call expression was broken over multiple lines, this
194  /// contains the start column of the second line. Otherwise 0.
195  unsigned CallContinuation;
196
197  /// \brief The column of the first variable name in a variable declaration.
198  ///
199  /// Used to align further variables if necessary.
200  unsigned VariablePos;
201
202  /// \brief \c true if this \c ParenState already contains a line-break.
203  ///
204  /// The first line break in a certain \c ParenState causes extra penalty so
205  /// that clang-format prefers similar breaks, i.e. breaks in the same
206  /// parenthesis.
207  bool ContainsLineBreak;
208
209  /// \brief \c true if this \c ParenState contains multiple segments of a
210  /// builder-type call on one line.
211  bool ContainsUnwrappedBuilder;
212
213  bool operator<(const ParenState &Other) const {
214    if (Indent != Other.Indent)
215      return Indent < Other.Indent;
216    if (LastSpace != Other.LastSpace)
217      return LastSpace < Other.LastSpace;
218    if (FirstLessLess != Other.FirstLessLess)
219      return FirstLessLess < Other.FirstLessLess;
220    if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
221      return BreakBeforeClosingBrace;
222    if (QuestionColumn != Other.QuestionColumn)
223      return QuestionColumn < Other.QuestionColumn;
224    if (AvoidBinPacking != Other.AvoidBinPacking)
225      return AvoidBinPacking;
226    if (BreakBeforeParameter != Other.BreakBeforeParameter)
227      return BreakBeforeParameter;
228    if (NoLineBreak != Other.NoLineBreak)
229      return NoLineBreak;
230    if (ColonPos != Other.ColonPos)
231      return ColonPos < Other.ColonPos;
232    if (StartOfFunctionCall != Other.StartOfFunctionCall)
233      return StartOfFunctionCall < Other.StartOfFunctionCall;
234    if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
235      return StartOfArraySubscripts < Other.StartOfArraySubscripts;
236    if (CallContinuation != Other.CallContinuation)
237      return CallContinuation < Other.CallContinuation;
238    if (VariablePos != Other.VariablePos)
239      return VariablePos < Other.VariablePos;
240    if (ContainsLineBreak != Other.ContainsLineBreak)
241      return ContainsLineBreak < Other.ContainsLineBreak;
242    if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
243      return ContainsUnwrappedBuilder < Other.ContainsUnwrappedBuilder;
244    return false;
245  }
246};
247
248/// \brief The current state when indenting a unwrapped line.
249///
250/// As the indenting tries different combinations this is copied by value.
251struct LineState {
252  /// \brief The number of used columns in the current line.
253  unsigned Column;
254
255  /// \brief The token that needs to be next formatted.
256  FormatToken *NextToken;
257
258  /// \brief \c true if this line contains a continued for-loop section.
259  bool LineContainsContinuedForLoopSection;
260
261  /// \brief The level of nesting inside (), [], <> and {}.
262  unsigned ParenLevel;
263
264  /// \brief The \c ParenLevel at the start of this line.
265  unsigned StartOfLineLevel;
266
267  /// \brief The lowest \c ParenLevel on the current line.
268  unsigned LowestLevelOnLine;
269
270  /// \brief The start column of the string literal, if we're in a string
271  /// literal sequence, 0 otherwise.
272  unsigned StartOfStringLiteral;
273
274  /// \brief A stack keeping track of properties applying to parenthesis
275  /// levels.
276  std::vector<ParenState> Stack;
277
278  /// \brief Ignore the stack of \c ParenStates for state comparison.
279  ///
280  /// In long and deeply nested unwrapped lines, the current algorithm can
281  /// be insufficient for finding the best formatting with a reasonable amount
282  /// of time and memory. Setting this flag will effectively lead to the
283  /// algorithm not analyzing some combinations. However, these combinations
284  /// rarely contain the optimal solution: In short, accepting a higher
285  /// penalty early would need to lead to different values in the \c
286  /// ParenState stack (in an otherwise identical state) and these different
287  /// values would need to lead to a significant amount of avoided penalty
288  /// later.
289  ///
290  /// FIXME: Come up with a better algorithm instead.
291  bool IgnoreStackForComparison;
292
293  /// \brief The indent of the first token.
294  unsigned FirstIndent;
295
296  /// \brief The line that is being formatted.
297  ///
298  /// Does not need to be considered for memoization because it doesn't change.
299  const AnnotatedLine *Line;
300
301  /// \brief Comparison operator to be able to used \c LineState in \c map.
302  bool operator<(const LineState &Other) const {
303    if (NextToken != Other.NextToken)
304      return NextToken < Other.NextToken;
305    if (Column != Other.Column)
306      return Column < Other.Column;
307    if (LineContainsContinuedForLoopSection !=
308        Other.LineContainsContinuedForLoopSection)
309      return LineContainsContinuedForLoopSection;
310    if (ParenLevel != Other.ParenLevel)
311      return ParenLevel < Other.ParenLevel;
312    if (StartOfLineLevel != Other.StartOfLineLevel)
313      return StartOfLineLevel < Other.StartOfLineLevel;
314    if (LowestLevelOnLine != Other.LowestLevelOnLine)
315      return LowestLevelOnLine < Other.LowestLevelOnLine;
316    if (StartOfStringLiteral != Other.StartOfStringLiteral)
317      return StartOfStringLiteral < Other.StartOfStringLiteral;
318    if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
319      return false;
320    return Stack < Other.Stack;
321  }
322};
323
324} // end namespace format
325} // end namespace clang
326
327#endif // LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H
328