VerifyDiagnosticConsumer.cpp revision 360784
1//===- VerifyDiagnosticConsumer.cpp - Verifying Diagnostic Client ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is a concrete diagnostic client, which buffers the diagnostic messages.
10//
11//===----------------------------------------------------------------------===//
12
13#include "clang/Frontend/VerifyDiagnosticConsumer.h"
14#include "clang/Basic/CharInfo.h"
15#include "clang/Basic/Diagnostic.h"
16#include "clang/Basic/DiagnosticOptions.h"
17#include "clang/Basic/FileManager.h"
18#include "clang/Basic/LLVM.h"
19#include "clang/Basic/SourceLocation.h"
20#include "clang/Basic/SourceManager.h"
21#include "clang/Basic/TokenKinds.h"
22#include "clang/Frontend/FrontendDiagnostic.h"
23#include "clang/Frontend/TextDiagnosticBuffer.h"
24#include "clang/Lex/HeaderSearch.h"
25#include "clang/Lex/Lexer.h"
26#include "clang/Lex/PPCallbacks.h"
27#include "clang/Lex/Preprocessor.h"
28#include "clang/Lex/Token.h"
29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/SmallPtrSet.h"
31#include "llvm/ADT/SmallString.h"
32#include "llvm/ADT/StringRef.h"
33#include "llvm/ADT/Twine.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/Regex.h"
36#include "llvm/Support/raw_ostream.h"
37#include <algorithm>
38#include <cassert>
39#include <cstddef>
40#include <cstring>
41#include <iterator>
42#include <memory>
43#include <string>
44#include <utility>
45#include <vector>
46
47using namespace clang;
48
49using Directive = VerifyDiagnosticConsumer::Directive;
50using DirectiveList = VerifyDiagnosticConsumer::DirectiveList;
51using ExpectedData = VerifyDiagnosticConsumer::ExpectedData;
52
53#ifndef NDEBUG
54
55namespace {
56
57class VerifyFileTracker : public PPCallbacks {
58  VerifyDiagnosticConsumer &Verify;
59  SourceManager &SM;
60
61public:
62  VerifyFileTracker(VerifyDiagnosticConsumer &Verify, SourceManager &SM)
63      : Verify(Verify), SM(SM) {}
64
65  /// Hook into the preprocessor and update the list of parsed
66  /// files when the preprocessor indicates a new file is entered.
67  void FileChanged(SourceLocation Loc, FileChangeReason Reason,
68                   SrcMgr::CharacteristicKind FileType,
69                   FileID PrevFID) override {
70    Verify.UpdateParsedFileStatus(SM, SM.getFileID(Loc),
71                                  VerifyDiagnosticConsumer::IsParsed);
72  }
73};
74
75} // namespace
76
77#endif
78
79//===----------------------------------------------------------------------===//
80// Checking diagnostics implementation.
81//===----------------------------------------------------------------------===//
82
83using DiagList = TextDiagnosticBuffer::DiagList;
84using const_diag_iterator = TextDiagnosticBuffer::const_iterator;
85
86namespace {
87
88/// StandardDirective - Directive with string matching.
89class StandardDirective : public Directive {
90public:
91  StandardDirective(SourceLocation DirectiveLoc, SourceLocation DiagnosticLoc,
92                    bool MatchAnyLine, StringRef Text, unsigned Min,
93                    unsigned Max)
94      : Directive(DirectiveLoc, DiagnosticLoc, MatchAnyLine, Text, Min, Max) {}
95
96  bool isValid(std::string &Error) override {
97    // all strings are considered valid; even empty ones
98    return true;
99  }
100
101  bool match(StringRef S) override {
102    return S.find(Text) != StringRef::npos;
103  }
104};
105
106/// RegexDirective - Directive with regular-expression matching.
107class RegexDirective : public Directive {
108public:
109  RegexDirective(SourceLocation DirectiveLoc, SourceLocation DiagnosticLoc,
110                 bool MatchAnyLine, StringRef Text, unsigned Min, unsigned Max,
111                 StringRef RegexStr)
112      : Directive(DirectiveLoc, DiagnosticLoc, MatchAnyLine, Text, Min, Max),
113        Regex(RegexStr) {}
114
115  bool isValid(std::string &Error) override {
116    return Regex.isValid(Error);
117  }
118
119  bool match(StringRef S) override {
120    return Regex.match(S);
121  }
122
123private:
124  llvm::Regex Regex;
125};
126
127class ParseHelper
128{
129public:
130  ParseHelper(StringRef S)
131      : Begin(S.begin()), End(S.end()), C(Begin), P(Begin) {}
132
133  // Return true if string literal is next.
134  bool Next(StringRef S) {
135    P = C;
136    PEnd = C + S.size();
137    if (PEnd > End)
138      return false;
139    return memcmp(P, S.data(), S.size()) == 0;
140  }
141
142  // Return true if number is next.
143  // Output N only if number is next.
144  bool Next(unsigned &N) {
145    unsigned TMP = 0;
146    P = C;
147    PEnd = P;
148    for (; PEnd < End && *PEnd >= '0' && *PEnd <= '9'; ++PEnd) {
149      TMP *= 10;
150      TMP += *PEnd - '0';
151    }
152    if (PEnd == C)
153      return false;
154    N = TMP;
155    return true;
156  }
157
158  // Return true if a marker is next.
159  // A marker is the longest match for /#[A-Za-z0-9_-]+/.
160  bool NextMarker() {
161    P = C;
162    if (P == End || *P != '#')
163      return false;
164    PEnd = P;
165    ++PEnd;
166    while ((isAlphanumeric(*PEnd) || *PEnd == '-' || *PEnd == '_') &&
167           PEnd < End)
168      ++PEnd;
169    return PEnd > P + 1;
170  }
171
172  // Return true if string literal S is matched in content.
173  // When true, P marks begin-position of the match, and calling Advance sets C
174  // to end-position of the match.
175  // If S is the empty string, then search for any letter instead (makes sense
176  // with FinishDirectiveToken=true).
177  // If EnsureStartOfWord, then skip matches that don't start a new word.
178  // If FinishDirectiveToken, then assume the match is the start of a comment
179  // directive for -verify, and extend the match to include the entire first
180  // token of that directive.
181  bool Search(StringRef S, bool EnsureStartOfWord = false,
182              bool FinishDirectiveToken = false) {
183    do {
184      if (!S.empty()) {
185        P = std::search(C, End, S.begin(), S.end());
186        PEnd = P + S.size();
187      }
188      else {
189        P = C;
190        while (P != End && !isLetter(*P))
191          ++P;
192        PEnd = P + 1;
193      }
194      if (P == End)
195        break;
196      // If not start of word but required, skip and search again.
197      if (EnsureStartOfWord
198               // Check if string literal starts a new word.
199          && !(P == Begin || isWhitespace(P[-1])
200               // Or it could be preceded by the start of a comment.
201               || (P > (Begin + 1) && (P[-1] == '/' || P[-1] == '*')
202                                   &&  P[-2] == '/')))
203        continue;
204      if (FinishDirectiveToken) {
205        while (PEnd != End && (isAlphanumeric(*PEnd)
206                               || *PEnd == '-' || *PEnd == '_'))
207          ++PEnd;
208        // Put back trailing digits and hyphens to be parsed later as a count
209        // or count range.  Because -verify prefixes must start with letters,
210        // we know the actual directive we found starts with a letter, so
211        // we won't put back the entire directive word and thus record an empty
212        // string.
213        assert(isLetter(*P) && "-verify prefix must start with a letter");
214        while (isDigit(PEnd[-1]) || PEnd[-1] == '-')
215          --PEnd;
216      }
217      return true;
218    } while (Advance());
219    return false;
220  }
221
222  // Return true if a CloseBrace that closes the OpenBrace at the current nest
223  // level is found. When true, P marks begin-position of CloseBrace.
224  bool SearchClosingBrace(StringRef OpenBrace, StringRef CloseBrace) {
225    unsigned Depth = 1;
226    P = C;
227    while (P < End) {
228      StringRef S(P, End - P);
229      if (S.startswith(OpenBrace)) {
230        ++Depth;
231        P += OpenBrace.size();
232      } else if (S.startswith(CloseBrace)) {
233        --Depth;
234        if (Depth == 0) {
235          PEnd = P + CloseBrace.size();
236          return true;
237        }
238        P += CloseBrace.size();
239      } else {
240        ++P;
241      }
242    }
243    return false;
244  }
245
246  // Advance 1-past previous next/search.
247  // Behavior is undefined if previous next/search failed.
248  bool Advance() {
249    C = PEnd;
250    return C < End;
251  }
252
253  // Return the text matched by the previous next/search.
254  // Behavior is undefined if previous next/search failed.
255  StringRef Match() { return StringRef(P, PEnd - P); }
256
257  // Skip zero or more whitespace.
258  void SkipWhitespace() {
259    for (; C < End && isWhitespace(*C); ++C)
260      ;
261  }
262
263  // Return true if EOF reached.
264  bool Done() {
265    return !(C < End);
266  }
267
268  // Beginning of expected content.
269  const char * const Begin;
270
271  // End of expected content (1-past).
272  const char * const End;
273
274  // Position of next char in content.
275  const char *C;
276
277  // Previous next/search subject start.
278  const char *P;
279
280private:
281  // Previous next/search subject end (1-past).
282  const char *PEnd = nullptr;
283};
284
285// The information necessary to create a directive.
286struct UnattachedDirective {
287  DirectiveList *DL = nullptr;
288  bool RegexKind = false;
289  SourceLocation DirectivePos, ContentBegin;
290  std::string Text;
291  unsigned Min = 1, Max = 1;
292};
293
294// Attach the specified directive to the line of code indicated by
295// \p ExpectedLoc.
296void attachDirective(DiagnosticsEngine &Diags, const UnattachedDirective &UD,
297                     SourceLocation ExpectedLoc, bool MatchAnyLine = false) {
298  // Construct new directive.
299  std::unique_ptr<Directive> D =
300      Directive::create(UD.RegexKind, UD.DirectivePos, ExpectedLoc,
301                        MatchAnyLine, UD.Text, UD.Min, UD.Max);
302
303  std::string Error;
304  if (!D->isValid(Error)) {
305    Diags.Report(UD.ContentBegin, diag::err_verify_invalid_content)
306      << (UD.RegexKind ? "regex" : "string") << Error;
307  }
308
309  UD.DL->push_back(std::move(D));
310}
311
312} // anonymous
313
314// Tracker for markers in the input files. A marker is a comment of the form
315//
316//   n = 123; // #123
317//
318// ... that can be referred to by a later expected-* directive:
319//
320//   // expected-error@#123 {{undeclared identifier 'n'}}
321//
322// Marker declarations must be at the start of a comment or preceded by
323// whitespace to distinguish them from uses of markers in directives.
324class VerifyDiagnosticConsumer::MarkerTracker {
325  DiagnosticsEngine &Diags;
326
327  struct Marker {
328    SourceLocation DefLoc;
329    SourceLocation RedefLoc;
330    SourceLocation UseLoc;
331  };
332  llvm::StringMap<Marker> Markers;
333
334  // Directives that couldn't be created yet because they name an unknown
335  // marker.
336  llvm::StringMap<llvm::SmallVector<UnattachedDirective, 2>> DeferredDirectives;
337
338public:
339  MarkerTracker(DiagnosticsEngine &Diags) : Diags(Diags) {}
340
341  // Register a marker.
342  void addMarker(StringRef MarkerName, SourceLocation Pos) {
343    auto InsertResult = Markers.insert(
344        {MarkerName, Marker{Pos, SourceLocation(), SourceLocation()}});
345
346    Marker &M = InsertResult.first->second;
347    if (!InsertResult.second) {
348      // Marker was redefined.
349      M.RedefLoc = Pos;
350    } else {
351      // First definition: build any deferred directives.
352      auto Deferred = DeferredDirectives.find(MarkerName);
353      if (Deferred != DeferredDirectives.end()) {
354        for (auto &UD : Deferred->second) {
355          if (M.UseLoc.isInvalid())
356            M.UseLoc = UD.DirectivePos;
357          attachDirective(Diags, UD, Pos);
358        }
359        DeferredDirectives.erase(Deferred);
360      }
361    }
362  }
363
364  // Register a directive at the specified marker.
365  void addDirective(StringRef MarkerName, const UnattachedDirective &UD) {
366    auto MarkerIt = Markers.find(MarkerName);
367    if (MarkerIt != Markers.end()) {
368      Marker &M = MarkerIt->second;
369      if (M.UseLoc.isInvalid())
370        M.UseLoc = UD.DirectivePos;
371      return attachDirective(Diags, UD, M.DefLoc);
372    }
373    DeferredDirectives[MarkerName].push_back(UD);
374  }
375
376  // Ensure we have no remaining deferred directives, and no
377  // multiply-defined-and-used markers.
378  void finalize() {
379    for (auto &MarkerInfo : Markers) {
380      StringRef Name = MarkerInfo.first();
381      Marker &M = MarkerInfo.second;
382      if (M.RedefLoc.isValid() && M.UseLoc.isValid()) {
383        Diags.Report(M.UseLoc, diag::err_verify_ambiguous_marker) << Name;
384        Diags.Report(M.DefLoc, diag::note_verify_ambiguous_marker) << Name;
385        Diags.Report(M.RedefLoc, diag::note_verify_ambiguous_marker) << Name;
386      }
387    }
388
389    for (auto &DeferredPair : DeferredDirectives) {
390      Diags.Report(DeferredPair.second.front().DirectivePos,
391                   diag::err_verify_no_such_marker)
392          << DeferredPair.first();
393    }
394  }
395};
396
397/// ParseDirective - Go through the comment and see if it indicates expected
398/// diagnostics. If so, then put them in the appropriate directive list.
399///
400/// Returns true if any valid directives were found.
401static bool ParseDirective(StringRef S, ExpectedData *ED, SourceManager &SM,
402                           Preprocessor *PP, SourceLocation Pos,
403                           VerifyDiagnosticConsumer::DirectiveStatus &Status,
404                           VerifyDiagnosticConsumer::MarkerTracker &Markers) {
405  DiagnosticsEngine &Diags = PP ? PP->getDiagnostics() : SM.getDiagnostics();
406
407  // First, scan the comment looking for markers.
408  for (ParseHelper PH(S); !PH.Done();) {
409    if (!PH.Search("#", true))
410      break;
411    PH.C = PH.P;
412    if (!PH.NextMarker()) {
413      PH.Next("#");
414      PH.Advance();
415      continue;
416    }
417    PH.Advance();
418    Markers.addMarker(PH.Match(), Pos);
419  }
420
421  // A single comment may contain multiple directives.
422  bool FoundDirective = false;
423  for (ParseHelper PH(S); !PH.Done();) {
424    // Search for the initial directive token.
425    // If one prefix, save time by searching only for its directives.
426    // Otherwise, search for any potential directive token and check it later.
427    const auto &Prefixes = Diags.getDiagnosticOptions().VerifyPrefixes;
428    if (!(Prefixes.size() == 1 ? PH.Search(*Prefixes.begin(), true, true)
429                               : PH.Search("", true, true)))
430      break;
431
432    StringRef DToken = PH.Match();
433    PH.Advance();
434
435    // Default directive kind.
436    UnattachedDirective D;
437    const char *KindStr = "string";
438
439    // Parse the initial directive token in reverse so we can easily determine
440    // its exact actual prefix.  If we were to parse it from the front instead,
441    // it would be harder to determine where the prefix ends because there
442    // might be multiple matching -verify prefixes because some might prefix
443    // others.
444
445    // Regex in initial directive token: -re
446    if (DToken.endswith("-re")) {
447      D.RegexKind = true;
448      KindStr = "regex";
449      DToken = DToken.substr(0, DToken.size()-3);
450    }
451
452    // Type in initial directive token: -{error|warning|note|no-diagnostics}
453    bool NoDiag = false;
454    StringRef DType;
455    if (DToken.endswith(DType="-error"))
456      D.DL = ED ? &ED->Errors : nullptr;
457    else if (DToken.endswith(DType="-warning"))
458      D.DL = ED ? &ED->Warnings : nullptr;
459    else if (DToken.endswith(DType="-remark"))
460      D.DL = ED ? &ED->Remarks : nullptr;
461    else if (DToken.endswith(DType="-note"))
462      D.DL = ED ? &ED->Notes : nullptr;
463    else if (DToken.endswith(DType="-no-diagnostics")) {
464      NoDiag = true;
465      if (D.RegexKind)
466        continue;
467    }
468    else
469      continue;
470    DToken = DToken.substr(0, DToken.size()-DType.size());
471
472    // What's left in DToken is the actual prefix.  That might not be a -verify
473    // prefix even if there is only one -verify prefix (for example, the full
474    // DToken is foo-bar-warning, but foo is the only -verify prefix).
475    if (!std::binary_search(Prefixes.begin(), Prefixes.end(), DToken))
476      continue;
477
478    if (NoDiag) {
479      if (Status == VerifyDiagnosticConsumer::HasOtherExpectedDirectives)
480        Diags.Report(Pos, diag::err_verify_invalid_no_diags)
481          << /*IsExpectedNoDiagnostics=*/true;
482      else
483        Status = VerifyDiagnosticConsumer::HasExpectedNoDiagnostics;
484      continue;
485    }
486    if (Status == VerifyDiagnosticConsumer::HasExpectedNoDiagnostics) {
487      Diags.Report(Pos, diag::err_verify_invalid_no_diags)
488        << /*IsExpectedNoDiagnostics=*/false;
489      continue;
490    }
491    Status = VerifyDiagnosticConsumer::HasOtherExpectedDirectives;
492
493    // If a directive has been found but we're not interested
494    // in storing the directive information, return now.
495    if (!D.DL)
496      return true;
497
498    // Next optional token: @
499    SourceLocation ExpectedLoc;
500    StringRef Marker;
501    bool MatchAnyLine = false;
502    if (!PH.Next("@")) {
503      ExpectedLoc = Pos;
504    } else {
505      PH.Advance();
506      unsigned Line = 0;
507      bool FoundPlus = PH.Next("+");
508      if (FoundPlus || PH.Next("-")) {
509        // Relative to current line.
510        PH.Advance();
511        bool Invalid = false;
512        unsigned ExpectedLine = SM.getSpellingLineNumber(Pos, &Invalid);
513        if (!Invalid && PH.Next(Line) && (FoundPlus || Line < ExpectedLine)) {
514          if (FoundPlus) ExpectedLine += Line;
515          else ExpectedLine -= Line;
516          ExpectedLoc = SM.translateLineCol(SM.getFileID(Pos), ExpectedLine, 1);
517        }
518      } else if (PH.Next(Line)) {
519        // Absolute line number.
520        if (Line > 0)
521          ExpectedLoc = SM.translateLineCol(SM.getFileID(Pos), Line, 1);
522      } else if (PH.NextMarker()) {
523        Marker = PH.Match();
524      } else if (PP && PH.Search(":")) {
525        // Specific source file.
526        StringRef Filename(PH.C, PH.P-PH.C);
527        PH.Advance();
528
529        // Lookup file via Preprocessor, like a #include.
530        const DirectoryLookup *CurDir;
531        Optional<FileEntryRef> File =
532            PP->LookupFile(Pos, Filename, false, nullptr, nullptr, CurDir,
533                           nullptr, nullptr, nullptr, nullptr, nullptr);
534        if (!File) {
535          Diags.Report(Pos.getLocWithOffset(PH.C-PH.Begin),
536                       diag::err_verify_missing_file) << Filename << KindStr;
537          continue;
538        }
539
540        const FileEntry *FE = &File->getFileEntry();
541        if (SM.translateFile(FE).isInvalid())
542          SM.createFileID(FE, Pos, SrcMgr::C_User);
543
544        if (PH.Next(Line) && Line > 0)
545          ExpectedLoc = SM.translateFileLineCol(FE, Line, 1);
546        else if (PH.Next("*")) {
547          MatchAnyLine = true;
548          ExpectedLoc = SM.translateFileLineCol(FE, 1, 1);
549        }
550      } else if (PH.Next("*")) {
551        MatchAnyLine = true;
552        ExpectedLoc = SourceLocation();
553      }
554
555      if (ExpectedLoc.isInvalid() && !MatchAnyLine && Marker.empty()) {
556        Diags.Report(Pos.getLocWithOffset(PH.C-PH.Begin),
557                     diag::err_verify_missing_line) << KindStr;
558        continue;
559      }
560      PH.Advance();
561    }
562
563    // Skip optional whitespace.
564    PH.SkipWhitespace();
565
566    // Next optional token: positive integer or a '+'.
567    if (PH.Next(D.Min)) {
568      PH.Advance();
569      // A positive integer can be followed by a '+' meaning min
570      // or more, or by a '-' meaning a range from min to max.
571      if (PH.Next("+")) {
572        D.Max = Directive::MaxCount;
573        PH.Advance();
574      } else if (PH.Next("-")) {
575        PH.Advance();
576        if (!PH.Next(D.Max) || D.Max < D.Min) {
577          Diags.Report(Pos.getLocWithOffset(PH.C-PH.Begin),
578                       diag::err_verify_invalid_range) << KindStr;
579          continue;
580        }
581        PH.Advance();
582      } else {
583        D.Max = D.Min;
584      }
585    } else if (PH.Next("+")) {
586      // '+' on its own means "1 or more".
587      D.Max = Directive::MaxCount;
588      PH.Advance();
589    }
590
591    // Skip optional whitespace.
592    PH.SkipWhitespace();
593
594    // Next token: {{
595    if (!PH.Next("{{")) {
596      Diags.Report(Pos.getLocWithOffset(PH.C-PH.Begin),
597                   diag::err_verify_missing_start) << KindStr;
598      continue;
599    }
600    PH.Advance();
601    const char* const ContentBegin = PH.C; // mark content begin
602    // Search for token: }}
603    if (!PH.SearchClosingBrace("{{", "}}")) {
604      Diags.Report(Pos.getLocWithOffset(PH.C-PH.Begin),
605                   diag::err_verify_missing_end) << KindStr;
606      continue;
607    }
608    const char* const ContentEnd = PH.P; // mark content end
609    PH.Advance();
610
611    D.DirectivePos = Pos;
612    D.ContentBegin = Pos.getLocWithOffset(ContentBegin - PH.Begin);
613
614    // Build directive text; convert \n to newlines.
615    StringRef NewlineStr = "\\n";
616    StringRef Content(ContentBegin, ContentEnd-ContentBegin);
617    size_t CPos = 0;
618    size_t FPos;
619    while ((FPos = Content.find(NewlineStr, CPos)) != StringRef::npos) {
620      D.Text += Content.substr(CPos, FPos-CPos);
621      D.Text += '\n';
622      CPos = FPos + NewlineStr.size();
623    }
624    if (D.Text.empty())
625      D.Text.assign(ContentBegin, ContentEnd);
626
627    // Check that regex directives contain at least one regex.
628    if (D.RegexKind && D.Text.find("{{") == StringRef::npos) {
629      Diags.Report(D.ContentBegin, diag::err_verify_missing_regex) << D.Text;
630      return false;
631    }
632
633    if (Marker.empty())
634      attachDirective(Diags, D, ExpectedLoc, MatchAnyLine);
635    else
636      Markers.addDirective(Marker, D);
637    FoundDirective = true;
638  }
639
640  return FoundDirective;
641}
642
643VerifyDiagnosticConsumer::VerifyDiagnosticConsumer(DiagnosticsEngine &Diags_)
644    : Diags(Diags_), PrimaryClient(Diags.getClient()),
645      PrimaryClientOwner(Diags.takeClient()),
646      Buffer(new TextDiagnosticBuffer()), Markers(new MarkerTracker(Diags)),
647      Status(HasNoDirectives) {
648  if (Diags.hasSourceManager())
649    setSourceManager(Diags.getSourceManager());
650}
651
652VerifyDiagnosticConsumer::~VerifyDiagnosticConsumer() {
653  assert(!ActiveSourceFiles && "Incomplete parsing of source files!");
654  assert(!CurrentPreprocessor && "CurrentPreprocessor should be invalid!");
655  SrcManager = nullptr;
656  CheckDiagnostics();
657  assert(!Diags.ownsClient() &&
658         "The VerifyDiagnosticConsumer takes over ownership of the client!");
659}
660
661// DiagnosticConsumer interface.
662
663void VerifyDiagnosticConsumer::BeginSourceFile(const LangOptions &LangOpts,
664                                               const Preprocessor *PP) {
665  // Attach comment handler on first invocation.
666  if (++ActiveSourceFiles == 1) {
667    if (PP) {
668      CurrentPreprocessor = PP;
669      this->LangOpts = &LangOpts;
670      setSourceManager(PP->getSourceManager());
671      const_cast<Preprocessor *>(PP)->addCommentHandler(this);
672#ifndef NDEBUG
673      // Debug build tracks parsed files.
674      const_cast<Preprocessor *>(PP)->addPPCallbacks(
675                      std::make_unique<VerifyFileTracker>(*this, *SrcManager));
676#endif
677    }
678  }
679
680  assert((!PP || CurrentPreprocessor == PP) && "Preprocessor changed!");
681  PrimaryClient->BeginSourceFile(LangOpts, PP);
682}
683
684void VerifyDiagnosticConsumer::EndSourceFile() {
685  assert(ActiveSourceFiles && "No active source files!");
686  PrimaryClient->EndSourceFile();
687
688  // Detach comment handler once last active source file completed.
689  if (--ActiveSourceFiles == 0) {
690    if (CurrentPreprocessor)
691      const_cast<Preprocessor *>(CurrentPreprocessor)->
692          removeCommentHandler(this);
693
694    // Diagnose any used-but-not-defined markers.
695    Markers->finalize();
696
697    // Check diagnostics once last file completed.
698    CheckDiagnostics();
699    CurrentPreprocessor = nullptr;
700    LangOpts = nullptr;
701  }
702}
703
704void VerifyDiagnosticConsumer::HandleDiagnostic(
705      DiagnosticsEngine::Level DiagLevel, const Diagnostic &Info) {
706  if (Info.hasSourceManager()) {
707    // If this diagnostic is for a different source manager, ignore it.
708    if (SrcManager && &Info.getSourceManager() != SrcManager)
709      return;
710
711    setSourceManager(Info.getSourceManager());
712  }
713
714#ifndef NDEBUG
715  // Debug build tracks unparsed files for possible
716  // unparsed expected-* directives.
717  if (SrcManager) {
718    SourceLocation Loc = Info.getLocation();
719    if (Loc.isValid()) {
720      ParsedStatus PS = IsUnparsed;
721
722      Loc = SrcManager->getExpansionLoc(Loc);
723      FileID FID = SrcManager->getFileID(Loc);
724
725      const FileEntry *FE = SrcManager->getFileEntryForID(FID);
726      if (FE && CurrentPreprocessor && SrcManager->isLoadedFileID(FID)) {
727        // If the file is a modules header file it shall not be parsed
728        // for expected-* directives.
729        HeaderSearch &HS = CurrentPreprocessor->getHeaderSearchInfo();
730        if (HS.findModuleForHeader(FE))
731          PS = IsUnparsedNoDirectives;
732      }
733
734      UpdateParsedFileStatus(*SrcManager, FID, PS);
735    }
736  }
737#endif
738
739  // Send the diagnostic to the buffer, we will check it once we reach the end
740  // of the source file (or are destructed).
741  Buffer->HandleDiagnostic(DiagLevel, Info);
742}
743
744/// HandleComment - Hook into the preprocessor and extract comments containing
745///  expected errors and warnings.
746bool VerifyDiagnosticConsumer::HandleComment(Preprocessor &PP,
747                                             SourceRange Comment) {
748  SourceManager &SM = PP.getSourceManager();
749
750  // If this comment is for a different source manager, ignore it.
751  if (SrcManager && &SM != SrcManager)
752    return false;
753
754  SourceLocation CommentBegin = Comment.getBegin();
755
756  const char *CommentRaw = SM.getCharacterData(CommentBegin);
757  StringRef C(CommentRaw, SM.getCharacterData(Comment.getEnd()) - CommentRaw);
758
759  if (C.empty())
760    return false;
761
762  // Fold any "\<EOL>" sequences
763  size_t loc = C.find('\\');
764  if (loc == StringRef::npos) {
765    ParseDirective(C, &ED, SM, &PP, CommentBegin, Status, *Markers);
766    return false;
767  }
768
769  std::string C2;
770  C2.reserve(C.size());
771
772  for (size_t last = 0;; loc = C.find('\\', last)) {
773    if (loc == StringRef::npos || loc == C.size()) {
774      C2 += C.substr(last);
775      break;
776    }
777    C2 += C.substr(last, loc-last);
778    last = loc + 1;
779
780    if (C[last] == '\n' || C[last] == '\r') {
781      ++last;
782
783      // Escape \r\n  or \n\r, but not \n\n.
784      if (last < C.size())
785        if (C[last] == '\n' || C[last] == '\r')
786          if (C[last] != C[last-1])
787            ++last;
788    } else {
789      // This was just a normal backslash.
790      C2 += '\\';
791    }
792  }
793
794  if (!C2.empty())
795    ParseDirective(C2, &ED, SM, &PP, CommentBegin, Status, *Markers);
796  return false;
797}
798
799#ifndef NDEBUG
800/// Lex the specified source file to determine whether it contains
801/// any expected-* directives.  As a Lexer is used rather than a full-blown
802/// Preprocessor, directives inside skipped #if blocks will still be found.
803///
804/// \return true if any directives were found.
805static bool findDirectives(SourceManager &SM, FileID FID,
806                           const LangOptions &LangOpts) {
807  // Create a raw lexer to pull all the comments out of FID.
808  if (FID.isInvalid())
809    return false;
810
811  // Create a lexer to lex all the tokens of the main file in raw mode.
812  const llvm::MemoryBuffer *FromFile = SM.getBuffer(FID);
813  Lexer RawLex(FID, FromFile, SM, LangOpts);
814
815  // Return comments as tokens, this is how we find expected diagnostics.
816  RawLex.SetCommentRetentionState(true);
817
818  Token Tok;
819  Tok.setKind(tok::comment);
820  VerifyDiagnosticConsumer::DirectiveStatus Status =
821    VerifyDiagnosticConsumer::HasNoDirectives;
822  while (Tok.isNot(tok::eof)) {
823    RawLex.LexFromRawLexer(Tok);
824    if (!Tok.is(tok::comment)) continue;
825
826    std::string Comment = RawLex.getSpelling(Tok, SM, LangOpts);
827    if (Comment.empty()) continue;
828
829    // We don't care about tracking markers for this phase.
830    VerifyDiagnosticConsumer::MarkerTracker Markers(SM.getDiagnostics());
831
832    // Find first directive.
833    if (ParseDirective(Comment, nullptr, SM, nullptr, Tok.getLocation(),
834                       Status, Markers))
835      return true;
836  }
837  return false;
838}
839#endif // !NDEBUG
840
841/// Takes a list of diagnostics that have been generated but not matched
842/// by an expected-* directive and produces a diagnostic to the user from this.
843static unsigned PrintUnexpected(DiagnosticsEngine &Diags, SourceManager *SourceMgr,
844                                const_diag_iterator diag_begin,
845                                const_diag_iterator diag_end,
846                                const char *Kind) {
847  if (diag_begin == diag_end) return 0;
848
849  SmallString<256> Fmt;
850  llvm::raw_svector_ostream OS(Fmt);
851  for (const_diag_iterator I = diag_begin, E = diag_end; I != E; ++I) {
852    if (I->first.isInvalid() || !SourceMgr)
853      OS << "\n  (frontend)";
854    else {
855      OS << "\n ";
856      if (const FileEntry *File = SourceMgr->getFileEntryForID(
857                                                SourceMgr->getFileID(I->first)))
858        OS << " File " << File->getName();
859      OS << " Line " << SourceMgr->getPresumedLineNumber(I->first);
860    }
861    OS << ": " << I->second;
862  }
863
864  Diags.Report(diag::err_verify_inconsistent_diags).setForceEmit()
865    << Kind << /*Unexpected=*/true << OS.str();
866  return std::distance(diag_begin, diag_end);
867}
868
869/// Takes a list of diagnostics that were expected to have been generated
870/// but were not and produces a diagnostic to the user from this.
871static unsigned PrintExpected(DiagnosticsEngine &Diags,
872                              SourceManager &SourceMgr,
873                              std::vector<Directive *> &DL, const char *Kind) {
874  if (DL.empty())
875    return 0;
876
877  SmallString<256> Fmt;
878  llvm::raw_svector_ostream OS(Fmt);
879  for (const auto *D : DL) {
880    if (D->DiagnosticLoc.isInvalid())
881      OS << "\n  File *";
882    else
883      OS << "\n  File " << SourceMgr.getFilename(D->DiagnosticLoc);
884    if (D->MatchAnyLine)
885      OS << " Line *";
886    else
887      OS << " Line " << SourceMgr.getPresumedLineNumber(D->DiagnosticLoc);
888    if (D->DirectiveLoc != D->DiagnosticLoc)
889      OS << " (directive at "
890         << SourceMgr.getFilename(D->DirectiveLoc) << ':'
891         << SourceMgr.getPresumedLineNumber(D->DirectiveLoc) << ')';
892    OS << ": " << D->Text;
893  }
894
895  Diags.Report(diag::err_verify_inconsistent_diags).setForceEmit()
896    << Kind << /*Unexpected=*/false << OS.str();
897  return DL.size();
898}
899
900/// Determine whether two source locations come from the same file.
901static bool IsFromSameFile(SourceManager &SM, SourceLocation DirectiveLoc,
902                           SourceLocation DiagnosticLoc) {
903  while (DiagnosticLoc.isMacroID())
904    DiagnosticLoc = SM.getImmediateMacroCallerLoc(DiagnosticLoc);
905
906  if (SM.isWrittenInSameFile(DirectiveLoc, DiagnosticLoc))
907    return true;
908
909  const FileEntry *DiagFile = SM.getFileEntryForID(SM.getFileID(DiagnosticLoc));
910  if (!DiagFile && SM.isWrittenInMainFile(DirectiveLoc))
911    return true;
912
913  return (DiagFile == SM.getFileEntryForID(SM.getFileID(DirectiveLoc)));
914}
915
916/// CheckLists - Compare expected to seen diagnostic lists and return the
917/// the difference between them.
918static unsigned CheckLists(DiagnosticsEngine &Diags, SourceManager &SourceMgr,
919                           const char *Label,
920                           DirectiveList &Left,
921                           const_diag_iterator d2_begin,
922                           const_diag_iterator d2_end,
923                           bool IgnoreUnexpected) {
924  std::vector<Directive *> LeftOnly;
925  DiagList Right(d2_begin, d2_end);
926
927  for (auto &Owner : Left) {
928    Directive &D = *Owner;
929    unsigned LineNo1 = SourceMgr.getPresumedLineNumber(D.DiagnosticLoc);
930
931    for (unsigned i = 0; i < D.Max; ++i) {
932      DiagList::iterator II, IE;
933      for (II = Right.begin(), IE = Right.end(); II != IE; ++II) {
934        if (!D.MatchAnyLine) {
935          unsigned LineNo2 = SourceMgr.getPresumedLineNumber(II->first);
936          if (LineNo1 != LineNo2)
937            continue;
938        }
939
940        if (!D.DiagnosticLoc.isInvalid() &&
941            !IsFromSameFile(SourceMgr, D.DiagnosticLoc, II->first))
942          continue;
943
944        const std::string &RightText = II->second;
945        if (D.match(RightText))
946          break;
947      }
948      if (II == IE) {
949        // Not found.
950        if (i >= D.Min) break;
951        LeftOnly.push_back(&D);
952      } else {
953        // Found. The same cannot be found twice.
954        Right.erase(II);
955      }
956    }
957  }
958  // Now all that's left in Right are those that were not matched.
959  unsigned num = PrintExpected(Diags, SourceMgr, LeftOnly, Label);
960  if (!IgnoreUnexpected)
961    num += PrintUnexpected(Diags, &SourceMgr, Right.begin(), Right.end(), Label);
962  return num;
963}
964
965/// CheckResults - This compares the expected results to those that
966/// were actually reported. It emits any discrepencies. Return "true" if there
967/// were problems. Return "false" otherwise.
968static unsigned CheckResults(DiagnosticsEngine &Diags, SourceManager &SourceMgr,
969                             const TextDiagnosticBuffer &Buffer,
970                             ExpectedData &ED) {
971  // We want to capture the delta between what was expected and what was
972  // seen.
973  //
974  //   Expected \ Seen - set expected but not seen
975  //   Seen \ Expected - set seen but not expected
976  unsigned NumProblems = 0;
977
978  const DiagnosticLevelMask DiagMask =
979    Diags.getDiagnosticOptions().getVerifyIgnoreUnexpected();
980
981  // See if there are error mismatches.
982  NumProblems += CheckLists(Diags, SourceMgr, "error", ED.Errors,
983                            Buffer.err_begin(), Buffer.err_end(),
984                            bool(DiagnosticLevelMask::Error & DiagMask));
985
986  // See if there are warning mismatches.
987  NumProblems += CheckLists(Diags, SourceMgr, "warning", ED.Warnings,
988                            Buffer.warn_begin(), Buffer.warn_end(),
989                            bool(DiagnosticLevelMask::Warning & DiagMask));
990
991  // See if there are remark mismatches.
992  NumProblems += CheckLists(Diags, SourceMgr, "remark", ED.Remarks,
993                            Buffer.remark_begin(), Buffer.remark_end(),
994                            bool(DiagnosticLevelMask::Remark & DiagMask));
995
996  // See if there are note mismatches.
997  NumProblems += CheckLists(Diags, SourceMgr, "note", ED.Notes,
998                            Buffer.note_begin(), Buffer.note_end(),
999                            bool(DiagnosticLevelMask::Note & DiagMask));
1000
1001  return NumProblems;
1002}
1003
1004void VerifyDiagnosticConsumer::UpdateParsedFileStatus(SourceManager &SM,
1005                                                      FileID FID,
1006                                                      ParsedStatus PS) {
1007  // Check SourceManager hasn't changed.
1008  setSourceManager(SM);
1009
1010#ifndef NDEBUG
1011  if (FID.isInvalid())
1012    return;
1013
1014  const FileEntry *FE = SM.getFileEntryForID(FID);
1015
1016  if (PS == IsParsed) {
1017    // Move the FileID from the unparsed set to the parsed set.
1018    UnparsedFiles.erase(FID);
1019    ParsedFiles.insert(std::make_pair(FID, FE));
1020  } else if (!ParsedFiles.count(FID) && !UnparsedFiles.count(FID)) {
1021    // Add the FileID to the unparsed set if we haven't seen it before.
1022
1023    // Check for directives.
1024    bool FoundDirectives;
1025    if (PS == IsUnparsedNoDirectives)
1026      FoundDirectives = false;
1027    else
1028      FoundDirectives = !LangOpts || findDirectives(SM, FID, *LangOpts);
1029
1030    // Add the FileID to the unparsed set.
1031    UnparsedFiles.insert(std::make_pair(FID,
1032                                      UnparsedFileStatus(FE, FoundDirectives)));
1033  }
1034#endif
1035}
1036
1037void VerifyDiagnosticConsumer::CheckDiagnostics() {
1038  // Ensure any diagnostics go to the primary client.
1039  DiagnosticConsumer *CurClient = Diags.getClient();
1040  std::unique_ptr<DiagnosticConsumer> Owner = Diags.takeClient();
1041  Diags.setClient(PrimaryClient, false);
1042
1043#ifndef NDEBUG
1044  // In a debug build, scan through any files that may have been missed
1045  // during parsing and issue a fatal error if directives are contained
1046  // within these files.  If a fatal error occurs, this suggests that
1047  // this file is being parsed separately from the main file, in which
1048  // case consider moving the directives to the correct place, if this
1049  // is applicable.
1050  if (!UnparsedFiles.empty()) {
1051    // Generate a cache of parsed FileEntry pointers for alias lookups.
1052    llvm::SmallPtrSet<const FileEntry *, 8> ParsedFileCache;
1053    for (const auto &I : ParsedFiles)
1054      if (const FileEntry *FE = I.second)
1055        ParsedFileCache.insert(FE);
1056
1057    // Iterate through list of unparsed files.
1058    for (const auto &I : UnparsedFiles) {
1059      const UnparsedFileStatus &Status = I.second;
1060      const FileEntry *FE = Status.getFile();
1061
1062      // Skip files that have been parsed via an alias.
1063      if (FE && ParsedFileCache.count(FE))
1064        continue;
1065
1066      // Report a fatal error if this file contained directives.
1067      if (Status.foundDirectives()) {
1068        llvm::report_fatal_error(Twine("-verify directives found after rather"
1069                                       " than during normal parsing of ",
1070                                 StringRef(FE ? FE->getName() : "(unknown)")));
1071      }
1072    }
1073
1074    // UnparsedFiles has been processed now, so clear it.
1075    UnparsedFiles.clear();
1076  }
1077#endif // !NDEBUG
1078
1079  if (SrcManager) {
1080    // Produce an error if no expected-* directives could be found in the
1081    // source file(s) processed.
1082    if (Status == HasNoDirectives) {
1083      Diags.Report(diag::err_verify_no_directives).setForceEmit();
1084      ++NumErrors;
1085      Status = HasNoDirectivesReported;
1086    }
1087
1088    // Check that the expected diagnostics occurred.
1089    NumErrors += CheckResults(Diags, *SrcManager, *Buffer, ED);
1090  } else {
1091    const DiagnosticLevelMask DiagMask =
1092        ~Diags.getDiagnosticOptions().getVerifyIgnoreUnexpected();
1093    if (bool(DiagnosticLevelMask::Error & DiagMask))
1094      NumErrors += PrintUnexpected(Diags, nullptr, Buffer->err_begin(),
1095                                   Buffer->err_end(), "error");
1096    if (bool(DiagnosticLevelMask::Warning & DiagMask))
1097      NumErrors += PrintUnexpected(Diags, nullptr, Buffer->warn_begin(),
1098                                   Buffer->warn_end(), "warn");
1099    if (bool(DiagnosticLevelMask::Remark & DiagMask))
1100      NumErrors += PrintUnexpected(Diags, nullptr, Buffer->remark_begin(),
1101                                   Buffer->remark_end(), "remark");
1102    if (bool(DiagnosticLevelMask::Note & DiagMask))
1103      NumErrors += PrintUnexpected(Diags, nullptr, Buffer->note_begin(),
1104                                   Buffer->note_end(), "note");
1105  }
1106
1107  Diags.setClient(CurClient, Owner.release() != nullptr);
1108
1109  // Reset the buffer, we have processed all the diagnostics in it.
1110  Buffer.reset(new TextDiagnosticBuffer());
1111  ED.Reset();
1112}
1113
1114std::unique_ptr<Directive> Directive::create(bool RegexKind,
1115                                             SourceLocation DirectiveLoc,
1116                                             SourceLocation DiagnosticLoc,
1117                                             bool MatchAnyLine, StringRef Text,
1118                                             unsigned Min, unsigned Max) {
1119  if (!RegexKind)
1120    return std::make_unique<StandardDirective>(DirectiveLoc, DiagnosticLoc,
1121                                                MatchAnyLine, Text, Min, Max);
1122
1123  // Parse the directive into a regular expression.
1124  std::string RegexStr;
1125  StringRef S = Text;
1126  while (!S.empty()) {
1127    if (S.startswith("{{")) {
1128      S = S.drop_front(2);
1129      size_t RegexMatchLength = S.find("}}");
1130      assert(RegexMatchLength != StringRef::npos);
1131      // Append the regex, enclosed in parentheses.
1132      RegexStr += "(";
1133      RegexStr.append(S.data(), RegexMatchLength);
1134      RegexStr += ")";
1135      S = S.drop_front(RegexMatchLength + 2);
1136    } else {
1137      size_t VerbatimMatchLength = S.find("{{");
1138      if (VerbatimMatchLength == StringRef::npos)
1139        VerbatimMatchLength = S.size();
1140      // Escape and append the fixed string.
1141      RegexStr += llvm::Regex::escape(S.substr(0, VerbatimMatchLength));
1142      S = S.drop_front(VerbatimMatchLength);
1143    }
1144  }
1145
1146  return std::make_unique<RegexDirective>(
1147      DirectiveLoc, DiagnosticLoc, MatchAnyLine, Text, Min, Max, RegexStr);
1148}
1149