TokenAnnotator.h revision 360784
1//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a token annotator, i.e. creates
11/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16#define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17
18#include "UnwrappedLineParser.h"
19#include "clang/Format/Format.h"
20
21namespace clang {
22class SourceManager;
23
24namespace format {
25
26enum LineType {
27  LT_Invalid,
28  LT_ImportStatement,
29  LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
30  LT_ObjCMethodDecl,
31  LT_ObjCProperty, // An @property line.
32  LT_Other,
33  LT_PreprocessorDirective,
34  LT_VirtualFunctionDecl
35};
36
37class AnnotatedLine {
38public:
39  AnnotatedLine(const UnwrappedLine &Line)
40      : First(Line.Tokens.front().Tok), Level(Line.Level),
41        MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex),
42        MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex),
43        InPPDirective(Line.InPPDirective),
44        MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
45        IsMultiVariableDeclStmt(false), Affected(false),
46        LeadingEmptyLinesAffected(false), ChildrenAffected(false),
47        FirstStartColumn(Line.FirstStartColumn) {
48    assert(!Line.Tokens.empty());
49
50    // Calculate Next and Previous for all tokens. Note that we must overwrite
51    // Next and Previous for every token, as previous formatting runs might have
52    // left them in a different state.
53    First->Previous = nullptr;
54    FormatToken *Current = First;
55    for (std::list<UnwrappedLineNode>::const_iterator I = ++Line.Tokens.begin(),
56                                                      E = Line.Tokens.end();
57         I != E; ++I) {
58      const UnwrappedLineNode &Node = *I;
59      Current->Next = I->Tok;
60      I->Tok->Previous = Current;
61      Current = Current->Next;
62      Current->Children.clear();
63      for (const auto &Child : Node.Children) {
64        Children.push_back(new AnnotatedLine(Child));
65        Current->Children.push_back(Children.back());
66      }
67    }
68    Last = Current;
69    Last->Next = nullptr;
70  }
71
72  ~AnnotatedLine() {
73    for (unsigned i = 0, e = Children.size(); i != e; ++i) {
74      delete Children[i];
75    }
76    FormatToken *Current = First;
77    while (Current) {
78      Current->Children.clear();
79      Current->Role.reset();
80      Current = Current->Next;
81    }
82  }
83
84  /// \c true if this line starts with the given tokens in order, ignoring
85  /// comments.
86  template <typename... Ts> bool startsWith(Ts... Tokens) const {
87    return First && First->startsSequence(Tokens...);
88  }
89
90  /// \c true if this line ends with the given tokens in reversed order,
91  /// ignoring comments.
92  /// For example, given tokens [T1, T2, T3, ...], the function returns true if
93  /// this line is like "... T3 T2 T1".
94  template <typename... Ts> bool endsWith(Ts... Tokens) const {
95    return Last && Last->endsSequence(Tokens...);
96  }
97
98  /// \c true if this line looks like a function definition instead of a
99  /// function declaration. Asserts MightBeFunctionDecl.
100  bool mightBeFunctionDefinition() const {
101    assert(MightBeFunctionDecl);
102    // Try to determine if the end of a stream of tokens is either the
103    // Definition or the Declaration for a function. It does this by looking for
104    // the ';' in foo(); and using that it ends with a ; to know this is the
105    // Definition, however the line could end with
106    //    foo(); /* comment */
107    // or
108    //    foo(); // comment
109    // or
110    //    foo() // comment
111    // endsWith() ignores the comment.
112    return !endsWith(tok::semi);
113  }
114
115  /// \c true if this line starts a namespace definition.
116  bool startsWithNamespace() const {
117    return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
118           startsWith(tok::kw_inline, tok::kw_namespace) ||
119           startsWith(tok::kw_export, tok::kw_namespace);
120  }
121
122  FormatToken *First;
123  FormatToken *Last;
124
125  SmallVector<AnnotatedLine *, 0> Children;
126
127  LineType Type;
128  unsigned Level;
129  size_t MatchingOpeningBlockLineIndex;
130  size_t MatchingClosingBlockLineIndex;
131  bool InPPDirective;
132  bool MustBeDeclaration;
133  bool MightBeFunctionDecl;
134  bool IsMultiVariableDeclStmt;
135
136  /// \c True if this line should be formatted, i.e. intersects directly or
137  /// indirectly with one of the input ranges.
138  bool Affected;
139
140  /// \c True if the leading empty lines of this line intersect with one of the
141  /// input ranges.
142  bool LeadingEmptyLinesAffected;
143
144  /// \c True if one of this line's children intersects with an input range.
145  bool ChildrenAffected;
146
147  unsigned FirstStartColumn;
148
149private:
150  // Disallow copying.
151  AnnotatedLine(const AnnotatedLine &) = delete;
152  void operator=(const AnnotatedLine &) = delete;
153};
154
155/// Determines extra information about the tokens comprising an
156/// \c UnwrappedLine.
157class TokenAnnotator {
158public:
159  TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
160      : Style(Style), Keywords(Keywords) {}
161
162  /// Adapts the indent levels of comment lines to the indent of the
163  /// subsequent line.
164  // FIXME: Can/should this be done in the UnwrappedLineParser?
165  void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines);
166
167  void annotate(AnnotatedLine &Line);
168  void calculateFormattingInformation(AnnotatedLine &Line);
169
170private:
171  /// Calculate the penalty for splitting before \c Tok.
172  unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
173                        bool InFunctionDecl);
174
175  bool spaceRequiredBeforeParens(const FormatToken &Right) const;
176
177  bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
178                            const FormatToken &Right);
179
180  bool spaceRequiredBefore(const AnnotatedLine &Line, const FormatToken &Right);
181
182  bool mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right);
183
184  bool canBreakBefore(const AnnotatedLine &Line, const FormatToken &Right);
185
186  bool mustBreakForReturnType(const AnnotatedLine &Line) const;
187
188  void printDebugInfo(const AnnotatedLine &Line);
189
190  void calculateUnbreakableTailLengths(AnnotatedLine &Line);
191
192  const FormatStyle &Style;
193
194  const AdditionalKeywords &Keywords;
195};
196
197} // end namespace format
198} // end namespace clang
199
200#endif
201