NamespaceEndCommentsFixer.cpp revision 360784
1//===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that
11/// fixes namespace end comments.
12///
13//===----------------------------------------------------------------------===//
14
15#include "NamespaceEndCommentsFixer.h"
16#include "llvm/Support/Debug.h"
17#include "llvm/Support/Regex.h"
18
19#define DEBUG_TYPE "namespace-end-comments-fixer"
20
21namespace clang {
22namespace format {
23
24namespace {
25// The maximal number of unwrapped lines that a short namespace spans.
26// Short namespaces don't need an end comment.
27static const int kShortNamespaceMaxLines = 1;
28
29// Computes the name of a namespace given the namespace token.
30// Returns "" for anonymous namespace.
31std::string computeName(const FormatToken *NamespaceTok) {
32  assert(NamespaceTok &&
33         NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
34         "expecting a namespace token");
35  std::string name = "";
36  const FormatToken *Tok = NamespaceTok->getNextNonComment();
37  if (NamespaceTok->is(TT_NamespaceMacro)) {
38    // Collects all the non-comment tokens between opening parenthesis
39    // and closing parenthesis or comma.
40    assert(Tok && Tok->is(tok::l_paren) && "expected an opening parenthesis");
41    Tok = Tok->getNextNonComment();
42    while (Tok && !Tok->isOneOf(tok::r_paren, tok::comma)) {
43      name += Tok->TokenText;
44      Tok = Tok->getNextNonComment();
45    }
46  } else {
47    // For `namespace [[foo]] A::B::inline C {` or
48    // `namespace MACRO1 MACRO2 A::B::inline C {`, returns "A::B::inline C".
49    // Peek for the first '::' (or '{') and then return all tokens from one
50    // token before that up until the '{'.
51    const FormatToken *FirstNSTok = Tok;
52    while (Tok && !Tok->is(tok::l_brace) && !Tok->is(tok::coloncolon)) {
53      FirstNSTok = Tok;
54      Tok = Tok->getNextNonComment();
55    }
56
57    Tok = FirstNSTok;
58    while (Tok && !Tok->is(tok::l_brace)) {
59      name += Tok->TokenText;
60      if (Tok->is(tok::kw_inline))
61        name += " ";
62      Tok = Tok->getNextNonComment();
63    }
64  }
65  return name;
66}
67
68std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline,
69                                  const FormatToken *NamespaceTok) {
70  std::string text = "// ";
71  text += NamespaceTok->TokenText;
72  if (NamespaceTok->is(TT_NamespaceMacro))
73    text += "(";
74  else if (!NamespaceName.empty())
75    text += ' ';
76  text += NamespaceName;
77  if (NamespaceTok->is(TT_NamespaceMacro))
78    text += ")";
79  if (AddNewline)
80    text += '\n';
81  return text;
82}
83
84bool hasEndComment(const FormatToken *RBraceTok) {
85  return RBraceTok->Next && RBraceTok->Next->is(tok::comment);
86}
87
88bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName,
89                     const FormatToken *NamespaceTok) {
90  assert(hasEndComment(RBraceTok));
91  const FormatToken *Comment = RBraceTok->Next;
92
93  // Matches a valid namespace end comment.
94  // Valid namespace end comments don't need to be edited.
95  static const llvm::Regex NamespaceCommentPattern =
96      llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
97                  "namespace( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$",
98                  llvm::Regex::IgnoreCase);
99  static const llvm::Regex NamespaceMacroCommentPattern =
100      llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
101                  "([a-zA-Z0-9_]+)\\(([a-zA-Z0-9:_]*)\\)\\.? *(\\*/)?$",
102                  llvm::Regex::IgnoreCase);
103
104  SmallVector<StringRef, 8> Groups;
105  if (NamespaceTok->is(TT_NamespaceMacro) &&
106      NamespaceMacroCommentPattern.match(Comment->TokenText, &Groups)) {
107    StringRef NamespaceTokenText = Groups.size() > 4 ? Groups[4] : "";
108    // The name of the macro must be used.
109    if (NamespaceTokenText != NamespaceTok->TokenText)
110      return false;
111  } else if (NamespaceTok->isNot(tok::kw_namespace) ||
112             !NamespaceCommentPattern.match(Comment->TokenText, &Groups)) {
113    // Comment does not match regex.
114    return false;
115  }
116  StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5] : "";
117  // Anonymous namespace comments must not mention a namespace name.
118  if (NamespaceName.empty() && !NamespaceNameInComment.empty())
119    return false;
120  StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : "";
121  // Named namespace comments must not mention anonymous namespace.
122  if (!NamespaceName.empty() && !AnonymousInComment.empty())
123    return false;
124  return NamespaceNameInComment == NamespaceName;
125}
126
127void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
128                   const SourceManager &SourceMgr,
129                   tooling::Replacements *Fixes) {
130  auto EndLoc = RBraceTok->Tok.getEndLoc();
131  auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc);
132  auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
133  if (Err) {
134    llvm::errs() << "Error while adding namespace end comment: "
135                 << llvm::toString(std::move(Err)) << "\n";
136  }
137}
138
139void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
140                      const SourceManager &SourceMgr,
141                      tooling::Replacements *Fixes) {
142  assert(hasEndComment(RBraceTok));
143  const FormatToken *Comment = RBraceTok->Next;
144  auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(),
145                                             Comment->Tok.getEndLoc());
146  auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
147  if (Err) {
148    llvm::errs() << "Error while updating namespace end comment: "
149                 << llvm::toString(std::move(Err)) << "\n";
150  }
151}
152} // namespace
153
154const FormatToken *
155getNamespaceToken(const AnnotatedLine *Line,
156                  const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
157  if (!Line->Affected || Line->InPPDirective || !Line->startsWith(tok::r_brace))
158    return nullptr;
159  size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex;
160  if (StartLineIndex == UnwrappedLine::kInvalidIndex)
161    return nullptr;
162  assert(StartLineIndex < AnnotatedLines.size());
163  const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First;
164  if (NamespaceTok->is(tok::l_brace)) {
165    // "namespace" keyword can be on the line preceding '{', e.g. in styles
166    // where BraceWrapping.AfterNamespace is true.
167    if (StartLineIndex > 0)
168      NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First;
169  }
170  return NamespaceTok->getNamespaceToken();
171}
172
173StringRef
174getNamespaceTokenText(const AnnotatedLine *Line,
175                      const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
176  const FormatToken *NamespaceTok = getNamespaceToken(Line, AnnotatedLines);
177  return NamespaceTok ? NamespaceTok->TokenText : StringRef();
178}
179
180NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env,
181                                                     const FormatStyle &Style)
182    : TokenAnalyzer(Env, Style) {}
183
184std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze(
185    TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
186    FormatTokenLexer &Tokens) {
187  const SourceManager &SourceMgr = Env.getSourceManager();
188  AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
189  tooling::Replacements Fixes;
190  std::string AllNamespaceNames = "";
191  size_t StartLineIndex = SIZE_MAX;
192  StringRef NamespaceTokenText;
193  unsigned int CompactedNamespacesCount = 0;
194  for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
195    const AnnotatedLine *EndLine = AnnotatedLines[I];
196    const FormatToken *NamespaceTok =
197        getNamespaceToken(EndLine, AnnotatedLines);
198    if (!NamespaceTok)
199      continue;
200    FormatToken *RBraceTok = EndLine->First;
201    if (RBraceTok->Finalized)
202      continue;
203    RBraceTok->Finalized = true;
204    const FormatToken *EndCommentPrevTok = RBraceTok;
205    // Namespaces often end with '};'. In that case, attach namespace end
206    // comments to the semicolon tokens.
207    if (RBraceTok->Next && RBraceTok->Next->is(tok::semi)) {
208      EndCommentPrevTok = RBraceTok->Next;
209    }
210    if (StartLineIndex == SIZE_MAX)
211      StartLineIndex = EndLine->MatchingOpeningBlockLineIndex;
212    std::string NamespaceName = computeName(NamespaceTok);
213    if (Style.CompactNamespaces) {
214      if (CompactedNamespacesCount == 0)
215        NamespaceTokenText = NamespaceTok->TokenText;
216      if ((I + 1 < E) &&
217          NamespaceTokenText ==
218              getNamespaceTokenText(AnnotatedLines[I + 1], AnnotatedLines) &&
219          StartLineIndex - CompactedNamespacesCount - 1 ==
220              AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex &&
221          !AnnotatedLines[I + 1]->First->Finalized) {
222        if (hasEndComment(EndCommentPrevTok)) {
223          // remove end comment, it will be merged in next one
224          updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes);
225        }
226        CompactedNamespacesCount++;
227        AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames;
228        continue;
229      }
230      NamespaceName += AllNamespaceNames;
231      CompactedNamespacesCount = 0;
232      AllNamespaceNames = std::string();
233    }
234    // The next token in the token stream after the place where the end comment
235    // token must be. This is either the next token on the current line or the
236    // first token on the next line.
237    const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next;
238    if (EndCommentNextTok && EndCommentNextTok->is(tok::comment))
239      EndCommentNextTok = EndCommentNextTok->Next;
240    if (!EndCommentNextTok && I + 1 < E)
241      EndCommentNextTok = AnnotatedLines[I + 1]->First;
242    bool AddNewline = EndCommentNextTok &&
243                      EndCommentNextTok->NewlinesBefore == 0 &&
244                      EndCommentNextTok->isNot(tok::eof);
245    const std::string EndCommentText =
246        computeEndCommentText(NamespaceName, AddNewline, NamespaceTok);
247    if (!hasEndComment(EndCommentPrevTok)) {
248      bool isShort = I - StartLineIndex <= kShortNamespaceMaxLines + 1;
249      if (!isShort)
250        addEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
251    } else if (!validEndComment(EndCommentPrevTok, NamespaceName,
252                                NamespaceTok)) {
253      updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
254    }
255    StartLineIndex = SIZE_MAX;
256  }
257  return {Fixes, 0};
258}
259
260} // namespace format
261} // namespace clang
262