1239313Sdim//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2239313Sdim//
3239313Sdim//                     The LLVM Compiler Infrastructure
4239313Sdim//
5239313Sdim// This file is distributed under the University of Illinois Open Source
6239313Sdim// License. See LICENSE.TXT for details.
7239313Sdim//
8239313Sdim//===----------------------------------------------------------------------===//
9239313Sdim
10239313Sdim#include "clang/AST/CommentParser.h"
11249423Sdim#include "clang/AST/CommentCommandTraits.h"
12249423Sdim#include "clang/AST/CommentDiagnostic.h"
13239313Sdim#include "clang/AST/CommentSema.h"
14249423Sdim#include "clang/Basic/CharInfo.h"
15239313Sdim#include "clang/Basic/SourceManager.h"
16239313Sdim#include "llvm/Support/ErrorHandling.h"
17239313Sdim
18239313Sdimnamespace clang {
19239313Sdimnamespace comments {
20239313Sdim
21239313Sdim/// Re-lexes a sequence of tok::text tokens.
22239313Sdimclass TextTokenRetokenizer {
23239313Sdim  llvm::BumpPtrAllocator &Allocator;
24239313Sdim  Parser &P;
25239313Sdim
26239313Sdim  /// This flag is set when there are no more tokens we can fetch from lexer.
27239313Sdim  bool NoMoreInterestingTokens;
28239313Sdim
29239313Sdim  /// Token buffer: tokens we have processed and lookahead.
30239313Sdim  SmallVector<Token, 16> Toks;
31239313Sdim
32239313Sdim  /// A position in \c Toks.
33239313Sdim  struct Position {
34239313Sdim    unsigned CurToken;
35239313Sdim    const char *BufferStart;
36239313Sdim    const char *BufferEnd;
37239313Sdim    const char *BufferPtr;
38239313Sdim    SourceLocation BufferStartLoc;
39239313Sdim  };
40239313Sdim
41239313Sdim  /// Current position in Toks.
42239313Sdim  Position Pos;
43239313Sdim
44239313Sdim  bool isEnd() const {
45239313Sdim    return Pos.CurToken >= Toks.size();
46239313Sdim  }
47239313Sdim
48239313Sdim  /// Sets up the buffer pointers to point to current token.
49239313Sdim  void setupBuffer() {
50239313Sdim    assert(!isEnd());
51239313Sdim    const Token &Tok = Toks[Pos.CurToken];
52239313Sdim
53239313Sdim    Pos.BufferStart = Tok.getText().begin();
54239313Sdim    Pos.BufferEnd = Tok.getText().end();
55239313Sdim    Pos.BufferPtr = Pos.BufferStart;
56239313Sdim    Pos.BufferStartLoc = Tok.getLocation();
57239313Sdim  }
58239313Sdim
59239313Sdim  SourceLocation getSourceLocation() const {
60239313Sdim    const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
61239313Sdim    return Pos.BufferStartLoc.getLocWithOffset(CharNo);
62239313Sdim  }
63239313Sdim
64239313Sdim  char peek() const {
65239313Sdim    assert(!isEnd());
66239313Sdim    assert(Pos.BufferPtr != Pos.BufferEnd);
67239313Sdim    return *Pos.BufferPtr;
68239313Sdim  }
69239313Sdim
70239313Sdim  void consumeChar() {
71239313Sdim    assert(!isEnd());
72239313Sdim    assert(Pos.BufferPtr != Pos.BufferEnd);
73239313Sdim    Pos.BufferPtr++;
74239313Sdim    if (Pos.BufferPtr == Pos.BufferEnd) {
75239313Sdim      Pos.CurToken++;
76239313Sdim      if (isEnd() && !addToken())
77239313Sdim        return;
78239313Sdim
79239313Sdim      assert(!isEnd());
80239313Sdim      setupBuffer();
81239313Sdim    }
82239313Sdim  }
83239313Sdim
84239313Sdim  /// Add a token.
85239313Sdim  /// Returns true on success, false if there are no interesting tokens to
86239313Sdim  /// fetch from lexer.
87239313Sdim  bool addToken() {
88239313Sdim    if (NoMoreInterestingTokens)
89239313Sdim      return false;
90239313Sdim
91239313Sdim    if (P.Tok.is(tok::newline)) {
92239313Sdim      // If we see a single newline token between text tokens, skip it.
93239313Sdim      Token Newline = P.Tok;
94239313Sdim      P.consumeToken();
95239313Sdim      if (P.Tok.isNot(tok::text)) {
96239313Sdim        P.putBack(Newline);
97239313Sdim        NoMoreInterestingTokens = true;
98239313Sdim        return false;
99239313Sdim      }
100239313Sdim    }
101239313Sdim    if (P.Tok.isNot(tok::text)) {
102239313Sdim      NoMoreInterestingTokens = true;
103239313Sdim      return false;
104239313Sdim    }
105239313Sdim
106239313Sdim    Toks.push_back(P.Tok);
107239313Sdim    P.consumeToken();
108239313Sdim    if (Toks.size() == 1)
109239313Sdim      setupBuffer();
110239313Sdim    return true;
111239313Sdim  }
112239313Sdim
113239313Sdim  void consumeWhitespace() {
114239313Sdim    while (!isEnd()) {
115239313Sdim      if (isWhitespace(peek()))
116239313Sdim        consumeChar();
117239313Sdim      else
118239313Sdim        break;
119239313Sdim    }
120239313Sdim  }
121239313Sdim
122239313Sdim  void formTokenWithChars(Token &Result,
123239313Sdim                          SourceLocation Loc,
124239313Sdim                          const char *TokBegin,
125239313Sdim                          unsigned TokLength,
126239313Sdim                          StringRef Text) {
127239313Sdim    Result.setLocation(Loc);
128239313Sdim    Result.setKind(tok::text);
129239313Sdim    Result.setLength(TokLength);
130239313Sdim#ifndef NDEBUG
131243830Sdim    Result.TextPtr = "<UNSET>";
132243830Sdim    Result.IntVal = 7;
133239313Sdim#endif
134239313Sdim    Result.setText(Text);
135239313Sdim  }
136239313Sdim
137239313Sdimpublic:
138239313Sdim  TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
139239313Sdim      Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
140239313Sdim    Pos.CurToken = 0;
141239313Sdim    addToken();
142239313Sdim  }
143239313Sdim
144239313Sdim  /// Extract a word -- sequence of non-whitespace characters.
145239313Sdim  bool lexWord(Token &Tok) {
146239313Sdim    if (isEnd())
147239313Sdim      return false;
148239313Sdim
149239313Sdim    Position SavedPos = Pos;
150239313Sdim
151239313Sdim    consumeWhitespace();
152239313Sdim    SmallString<32> WordText;
153239313Sdim    const char *WordBegin = Pos.BufferPtr;
154239313Sdim    SourceLocation Loc = getSourceLocation();
155239313Sdim    while (!isEnd()) {
156239313Sdim      const char C = peek();
157239313Sdim      if (!isWhitespace(C)) {
158239313Sdim        WordText.push_back(C);
159239313Sdim        consumeChar();
160239313Sdim      } else
161239313Sdim        break;
162239313Sdim    }
163239313Sdim    const unsigned Length = WordText.size();
164239313Sdim    if (Length == 0) {
165239313Sdim      Pos = SavedPos;
166239313Sdim      return false;
167239313Sdim    }
168239313Sdim
169239313Sdim    char *TextPtr = Allocator.Allocate<char>(Length + 1);
170239313Sdim
171239313Sdim    memcpy(TextPtr, WordText.c_str(), Length + 1);
172239313Sdim    StringRef Text = StringRef(TextPtr, Length);
173239313Sdim
174249423Sdim    formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
175239313Sdim    return true;
176239313Sdim  }
177239313Sdim
178239313Sdim  bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
179239313Sdim    if (isEnd())
180239313Sdim      return false;
181239313Sdim
182239313Sdim    Position SavedPos = Pos;
183239313Sdim
184239313Sdim    consumeWhitespace();
185239313Sdim    SmallString<32> WordText;
186239313Sdim    const char *WordBegin = Pos.BufferPtr;
187239313Sdim    SourceLocation Loc = getSourceLocation();
188239313Sdim    bool Error = false;
189239313Sdim    if (!isEnd()) {
190239313Sdim      const char C = peek();
191239313Sdim      if (C == OpenDelim) {
192239313Sdim        WordText.push_back(C);
193239313Sdim        consumeChar();
194239313Sdim      } else
195239313Sdim        Error = true;
196239313Sdim    }
197239313Sdim    char C = '\0';
198239313Sdim    while (!Error && !isEnd()) {
199239313Sdim      C = peek();
200239313Sdim      WordText.push_back(C);
201239313Sdim      consumeChar();
202239313Sdim      if (C == CloseDelim)
203239313Sdim        break;
204239313Sdim    }
205239313Sdim    if (!Error && C != CloseDelim)
206239313Sdim      Error = true;
207239313Sdim
208239313Sdim    if (Error) {
209239313Sdim      Pos = SavedPos;
210239313Sdim      return false;
211239313Sdim    }
212239313Sdim
213239313Sdim    const unsigned Length = WordText.size();
214239313Sdim    char *TextPtr = Allocator.Allocate<char>(Length + 1);
215239313Sdim
216239313Sdim    memcpy(TextPtr, WordText.c_str(), Length + 1);
217239313Sdim    StringRef Text = StringRef(TextPtr, Length);
218239313Sdim
219239313Sdim    formTokenWithChars(Tok, Loc, WordBegin,
220239313Sdim                       Pos.BufferPtr - WordBegin, Text);
221239313Sdim    return true;
222239313Sdim  }
223239313Sdim
224239313Sdim  /// Put back tokens that we didn't consume.
225239313Sdim  void putBackLeftoverTokens() {
226239313Sdim    if (isEnd())
227239313Sdim      return;
228239313Sdim
229239313Sdim    bool HavePartialTok = false;
230239313Sdim    Token PartialTok;
231239313Sdim    if (Pos.BufferPtr != Pos.BufferStart) {
232239313Sdim      formTokenWithChars(PartialTok, getSourceLocation(),
233239313Sdim                         Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
234239313Sdim                         StringRef(Pos.BufferPtr,
235239313Sdim                                   Pos.BufferEnd - Pos.BufferPtr));
236239313Sdim      HavePartialTok = true;
237239313Sdim      Pos.CurToken++;
238239313Sdim    }
239239313Sdim
240239313Sdim    P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
241239313Sdim    Pos.CurToken = Toks.size();
242239313Sdim
243239313Sdim    if (HavePartialTok)
244239313Sdim      P.putBack(PartialTok);
245239313Sdim  }
246239313Sdim};
247239313Sdim
248239313SdimParser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
249239313Sdim               const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
250239313Sdim               const CommandTraits &Traits):
251239313Sdim    L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
252239313Sdim    Traits(Traits) {
253239313Sdim  consumeToken();
254239313Sdim}
255239313Sdim
256239313Sdimvoid Parser::parseParamCommandArgs(ParamCommandComment *PC,
257239313Sdim                                   TextTokenRetokenizer &Retokenizer) {
258239313Sdim  Token Arg;
259239313Sdim  // Check if argument looks like direction specification: [dir]
260239313Sdim  // e.g., [in], [out], [in,out]
261239313Sdim  if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
262239313Sdim    S.actOnParamCommandDirectionArg(PC,
263239313Sdim                                    Arg.getLocation(),
264239313Sdim                                    Arg.getEndLocation(),
265239313Sdim                                    Arg.getText());
266239313Sdim
267239313Sdim  if (Retokenizer.lexWord(Arg))
268239313Sdim    S.actOnParamCommandParamNameArg(PC,
269239313Sdim                                    Arg.getLocation(),
270239313Sdim                                    Arg.getEndLocation(),
271239313Sdim                                    Arg.getText());
272239313Sdim}
273239313Sdim
274239313Sdimvoid Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
275239313Sdim                                    TextTokenRetokenizer &Retokenizer) {
276239313Sdim  Token Arg;
277239313Sdim  if (Retokenizer.lexWord(Arg))
278239313Sdim    S.actOnTParamCommandParamNameArg(TPC,
279239313Sdim                                     Arg.getLocation(),
280239313Sdim                                     Arg.getEndLocation(),
281239313Sdim                                     Arg.getText());
282239313Sdim}
283239313Sdim
284239313Sdimvoid Parser::parseBlockCommandArgs(BlockCommandComment *BC,
285239313Sdim                                   TextTokenRetokenizer &Retokenizer,
286239313Sdim                                   unsigned NumArgs) {
287239313Sdim  typedef BlockCommandComment::Argument Argument;
288239313Sdim  Argument *Args =
289239313Sdim      new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];
290239313Sdim  unsigned ParsedArgs = 0;
291239313Sdim  Token Arg;
292239313Sdim  while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
293239313Sdim    Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
294239313Sdim                                            Arg.getEndLocation()),
295239313Sdim                                Arg.getText());
296239313Sdim    ParsedArgs++;
297239313Sdim  }
298239313Sdim
299239313Sdim  S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
300239313Sdim}
301239313Sdim
302239313SdimBlockCommandComment *Parser::parseBlockCommand() {
303249423Sdim  assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
304239313Sdim
305251662Sdim  ParamCommandComment *PC = 0;
306251662Sdim  TParamCommandComment *TPC = 0;
307251662Sdim  BlockCommandComment *BC = 0;
308243830Sdim  const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
309249423Sdim  CommandMarkerKind CommandMarker =
310249423Sdim      Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;
311243830Sdim  if (Info->IsParamCommand) {
312239313Sdim    PC = S.actOnParamCommandStart(Tok.getLocation(),
313239313Sdim                                  Tok.getEndLocation(),
314249423Sdim                                  Tok.getCommandID(),
315249423Sdim                                  CommandMarker);
316249423Sdim  } else if (Info->IsTParamCommand) {
317239313Sdim    TPC = S.actOnTParamCommandStart(Tok.getLocation(),
318239313Sdim                                    Tok.getEndLocation(),
319249423Sdim                                    Tok.getCommandID(),
320249423Sdim                                    CommandMarker);
321239313Sdim  } else {
322239313Sdim    BC = S.actOnBlockCommandStart(Tok.getLocation(),
323239313Sdim                                  Tok.getEndLocation(),
324249423Sdim                                  Tok.getCommandID(),
325249423Sdim                                  CommandMarker);
326239313Sdim  }
327239313Sdim  consumeToken();
328239313Sdim
329249423Sdim  if (isTokBlockCommand()) {
330239313Sdim    // Block command ahead.  We can't nest block commands, so pretend that this
331239313Sdim    // command has an empty argument.
332251662Sdim    ParagraphComment *Paragraph = S.actOnParagraphComment(None);
333251662Sdim    if (PC) {
334239313Sdim      S.actOnParamCommandFinish(PC, Paragraph);
335239313Sdim      return PC;
336251662Sdim    } else if (TPC) {
337239313Sdim      S.actOnTParamCommandFinish(TPC, Paragraph);
338239313Sdim      return TPC;
339239313Sdim    } else {
340239313Sdim      S.actOnBlockCommandFinish(BC, Paragraph);
341239313Sdim      return BC;
342239313Sdim    }
343239313Sdim  }
344239313Sdim
345251662Sdim  if (PC || TPC || Info->NumArgs > 0) {
346239313Sdim    // In order to parse command arguments we need to retokenize a few
347239313Sdim    // following text tokens.
348239313Sdim    TextTokenRetokenizer Retokenizer(Allocator, *this);
349239313Sdim
350251662Sdim    if (PC)
351239313Sdim      parseParamCommandArgs(PC, Retokenizer);
352251662Sdim    else if (TPC)
353239313Sdim      parseTParamCommandArgs(TPC, Retokenizer);
354239313Sdim    else
355243830Sdim      parseBlockCommandArgs(BC, Retokenizer, Info->NumArgs);
356239313Sdim
357239313Sdim    Retokenizer.putBackLeftoverTokens();
358239313Sdim  }
359239313Sdim
360249423Sdim  // If there's a block command ahead, we will attach an empty paragraph to
361249423Sdim  // this command.
362249423Sdim  bool EmptyParagraph = false;
363249423Sdim  if (isTokBlockCommand())
364249423Sdim    EmptyParagraph = true;
365249423Sdim  else if (Tok.is(tok::newline)) {
366249423Sdim    Token PrevTok = Tok;
367249423Sdim    consumeToken();
368249423Sdim    EmptyParagraph = isTokBlockCommand();
369249423Sdim    putBack(PrevTok);
370249423Sdim  }
371249423Sdim
372249423Sdim  ParagraphComment *Paragraph;
373249423Sdim  if (EmptyParagraph)
374251662Sdim    Paragraph = S.actOnParagraphComment(None);
375249423Sdim  else {
376249423Sdim    BlockContentComment *Block = parseParagraphOrBlockCommand();
377249423Sdim    // Since we have checked for a block command, we should have parsed a
378249423Sdim    // paragraph.
379249423Sdim    Paragraph = cast<ParagraphComment>(Block);
380249423Sdim  }
381249423Sdim
382251662Sdim  if (PC) {
383239313Sdim    S.actOnParamCommandFinish(PC, Paragraph);
384239313Sdim    return PC;
385251662Sdim  } else if (TPC) {
386239313Sdim    S.actOnTParamCommandFinish(TPC, Paragraph);
387239313Sdim    return TPC;
388239313Sdim  } else {
389239313Sdim    S.actOnBlockCommandFinish(BC, Paragraph);
390239313Sdim    return BC;
391239313Sdim  }
392239313Sdim}
393239313Sdim
394239313SdimInlineCommandComment *Parser::parseInlineCommand() {
395249423Sdim  assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
396239313Sdim
397239313Sdim  const Token CommandTok = Tok;
398239313Sdim  consumeToken();
399239313Sdim
400239313Sdim  TextTokenRetokenizer Retokenizer(Allocator, *this);
401239313Sdim
402239313Sdim  Token ArgTok;
403239313Sdim  bool ArgTokValid = Retokenizer.lexWord(ArgTok);
404239313Sdim
405239313Sdim  InlineCommandComment *IC;
406239313Sdim  if (ArgTokValid) {
407239313Sdim    IC = S.actOnInlineCommand(CommandTok.getLocation(),
408239313Sdim                              CommandTok.getEndLocation(),
409243830Sdim                              CommandTok.getCommandID(),
410239313Sdim                              ArgTok.getLocation(),
411239313Sdim                              ArgTok.getEndLocation(),
412239313Sdim                              ArgTok.getText());
413239313Sdim  } else {
414239313Sdim    IC = S.actOnInlineCommand(CommandTok.getLocation(),
415239313Sdim                              CommandTok.getEndLocation(),
416243830Sdim                              CommandTok.getCommandID());
417239313Sdim  }
418239313Sdim
419239313Sdim  Retokenizer.putBackLeftoverTokens();
420239313Sdim
421239313Sdim  return IC;
422239313Sdim}
423239313Sdim
424239313SdimHTMLStartTagComment *Parser::parseHTMLStartTag() {
425239313Sdim  assert(Tok.is(tok::html_start_tag));
426239313Sdim  HTMLStartTagComment *HST =
427239313Sdim      S.actOnHTMLStartTagStart(Tok.getLocation(),
428239313Sdim                               Tok.getHTMLTagStartName());
429239313Sdim  consumeToken();
430239313Sdim
431239313Sdim  SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
432239313Sdim  while (true) {
433239313Sdim    switch (Tok.getKind()) {
434239313Sdim    case tok::html_ident: {
435239313Sdim      Token Ident = Tok;
436239313Sdim      consumeToken();
437239313Sdim      if (Tok.isNot(tok::html_equals)) {
438239313Sdim        Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
439239313Sdim                                                       Ident.getHTMLIdent()));
440239313Sdim        continue;
441239313Sdim      }
442239313Sdim      Token Equals = Tok;
443239313Sdim      consumeToken();
444239313Sdim      if (Tok.isNot(tok::html_quoted_string)) {
445239313Sdim        Diag(Tok.getLocation(),
446239313Sdim             diag::warn_doc_html_start_tag_expected_quoted_string)
447239313Sdim          << SourceRange(Equals.getLocation());
448239313Sdim        Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
449239313Sdim                                                       Ident.getHTMLIdent()));
450239313Sdim        while (Tok.is(tok::html_equals) ||
451239313Sdim               Tok.is(tok::html_quoted_string))
452239313Sdim          consumeToken();
453239313Sdim        continue;
454239313Sdim      }
455239313Sdim      Attrs.push_back(HTMLStartTagComment::Attribute(
456239313Sdim                              Ident.getLocation(),
457239313Sdim                              Ident.getHTMLIdent(),
458239313Sdim                              Equals.getLocation(),
459239313Sdim                              SourceRange(Tok.getLocation(),
460239313Sdim                                          Tok.getEndLocation()),
461239313Sdim                              Tok.getHTMLQuotedString()));
462239313Sdim      consumeToken();
463239313Sdim      continue;
464239313Sdim    }
465239313Sdim
466239313Sdim    case tok::html_greater:
467239313Sdim      S.actOnHTMLStartTagFinish(HST,
468239313Sdim                                S.copyArray(llvm::makeArrayRef(Attrs)),
469239313Sdim                                Tok.getLocation(),
470239313Sdim                                /* IsSelfClosing = */ false);
471239313Sdim      consumeToken();
472239313Sdim      return HST;
473239313Sdim
474239313Sdim    case tok::html_slash_greater:
475239313Sdim      S.actOnHTMLStartTagFinish(HST,
476239313Sdim                                S.copyArray(llvm::makeArrayRef(Attrs)),
477239313Sdim                                Tok.getLocation(),
478239313Sdim                                /* IsSelfClosing = */ true);
479239313Sdim      consumeToken();
480239313Sdim      return HST;
481239313Sdim
482239313Sdim    case tok::html_equals:
483239313Sdim    case tok::html_quoted_string:
484239313Sdim      Diag(Tok.getLocation(),
485239313Sdim           diag::warn_doc_html_start_tag_expected_ident_or_greater);
486239313Sdim      while (Tok.is(tok::html_equals) ||
487239313Sdim             Tok.is(tok::html_quoted_string))
488239313Sdim        consumeToken();
489239313Sdim      if (Tok.is(tok::html_ident) ||
490239313Sdim          Tok.is(tok::html_greater) ||
491239313Sdim          Tok.is(tok::html_slash_greater))
492239313Sdim        continue;
493239313Sdim
494239313Sdim      S.actOnHTMLStartTagFinish(HST,
495239313Sdim                                S.copyArray(llvm::makeArrayRef(Attrs)),
496239313Sdim                                SourceLocation(),
497239313Sdim                                /* IsSelfClosing = */ false);
498239313Sdim      return HST;
499239313Sdim
500239313Sdim    default:
501239313Sdim      // Not a token from an HTML start tag.  Thus HTML tag prematurely ended.
502239313Sdim      S.actOnHTMLStartTagFinish(HST,
503239313Sdim                                S.copyArray(llvm::makeArrayRef(Attrs)),
504239313Sdim                                SourceLocation(),
505239313Sdim                                /* IsSelfClosing = */ false);
506239313Sdim      bool StartLineInvalid;
507239313Sdim      const unsigned StartLine = SourceMgr.getPresumedLineNumber(
508239313Sdim                                                  HST->getLocation(),
509239313Sdim                                                  &StartLineInvalid);
510239313Sdim      bool EndLineInvalid;
511239313Sdim      const unsigned EndLine = SourceMgr.getPresumedLineNumber(
512239313Sdim                                                  Tok.getLocation(),
513239313Sdim                                                  &EndLineInvalid);
514239313Sdim      if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
515239313Sdim        Diag(Tok.getLocation(),
516239313Sdim             diag::warn_doc_html_start_tag_expected_ident_or_greater)
517239313Sdim          << HST->getSourceRange();
518239313Sdim      else {
519239313Sdim        Diag(Tok.getLocation(),
520239313Sdim             diag::warn_doc_html_start_tag_expected_ident_or_greater);
521239313Sdim        Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
522239313Sdim          << HST->getSourceRange();
523239313Sdim      }
524239313Sdim      return HST;
525239313Sdim    }
526239313Sdim  }
527239313Sdim}
528239313Sdim
529239313SdimHTMLEndTagComment *Parser::parseHTMLEndTag() {
530239313Sdim  assert(Tok.is(tok::html_end_tag));
531239313Sdim  Token TokEndTag = Tok;
532239313Sdim  consumeToken();
533239313Sdim  SourceLocation Loc;
534239313Sdim  if (Tok.is(tok::html_greater)) {
535239313Sdim    Loc = Tok.getLocation();
536239313Sdim    consumeToken();
537239313Sdim  }
538239313Sdim
539239313Sdim  return S.actOnHTMLEndTag(TokEndTag.getLocation(),
540239313Sdim                           Loc,
541239313Sdim                           TokEndTag.getHTMLTagEndName());
542239313Sdim}
543239313Sdim
544239313SdimBlockContentComment *Parser::parseParagraphOrBlockCommand() {
545239313Sdim  SmallVector<InlineContentComment *, 8> Content;
546239313Sdim
547239313Sdim  while (true) {
548239313Sdim    switch (Tok.getKind()) {
549239313Sdim    case tok::verbatim_block_begin:
550239313Sdim    case tok::verbatim_line_name:
551239313Sdim    case tok::eof:
552239313Sdim      assert(Content.size() != 0);
553239313Sdim      break; // Block content or EOF ahead, finish this parapgaph.
554239313Sdim
555243830Sdim    case tok::unknown_command:
556243830Sdim      Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
557243830Sdim                                              Tok.getEndLocation(),
558243830Sdim                                              Tok.getUnknownCommandName()));
559243830Sdim      consumeToken();
560243830Sdim      continue;
561243830Sdim
562249423Sdim    case tok::backslash_command:
563249423Sdim    case tok::at_command: {
564243830Sdim      const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
565243830Sdim      if (Info->IsBlockCommand) {
566239313Sdim        if (Content.size() == 0)
567239313Sdim          return parseBlockCommand();
568239313Sdim        break; // Block command ahead, finish this parapgaph.
569239313Sdim      }
570243830Sdim      if (Info->IsVerbatimBlockEndCommand) {
571243830Sdim        Diag(Tok.getLocation(),
572243830Sdim             diag::warn_verbatim_block_end_without_start)
573249423Sdim          << Tok.is(tok::at_command)
574243830Sdim          << Info->Name
575243830Sdim          << SourceRange(Tok.getLocation(), Tok.getEndLocation());
576243830Sdim        consumeToken();
577239313Sdim        continue;
578239313Sdim      }
579243830Sdim      if (Info->IsUnknownCommand) {
580243830Sdim        Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
581243830Sdim                                                Tok.getEndLocation(),
582243830Sdim                                                Info->getID()));
583243830Sdim        consumeToken();
584243830Sdim        continue;
585243830Sdim      }
586243830Sdim      assert(Info->IsInlineCommand);
587243830Sdim      Content.push_back(parseInlineCommand());
588239313Sdim      continue;
589243830Sdim    }
590239313Sdim
591239313Sdim    case tok::newline: {
592239313Sdim      consumeToken();
593239313Sdim      if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
594239313Sdim        consumeToken();
595239313Sdim        break; // Two newlines -- end of paragraph.
596239313Sdim      }
597239313Sdim      if (Content.size() > 0)
598239313Sdim        Content.back()->addTrailingNewline();
599239313Sdim      continue;
600239313Sdim    }
601239313Sdim
602239313Sdim    // Don't deal with HTML tag soup now.
603239313Sdim    case tok::html_start_tag:
604239313Sdim      Content.push_back(parseHTMLStartTag());
605239313Sdim      continue;
606239313Sdim
607239313Sdim    case tok::html_end_tag:
608239313Sdim      Content.push_back(parseHTMLEndTag());
609239313Sdim      continue;
610239313Sdim
611239313Sdim    case tok::text:
612239313Sdim      Content.push_back(S.actOnText(Tok.getLocation(),
613239313Sdim                                    Tok.getEndLocation(),
614239313Sdim                                    Tok.getText()));
615239313Sdim      consumeToken();
616239313Sdim      continue;
617239313Sdim
618239313Sdim    case tok::verbatim_block_line:
619239313Sdim    case tok::verbatim_block_end:
620239313Sdim    case tok::verbatim_line_text:
621239313Sdim    case tok::html_ident:
622239313Sdim    case tok::html_equals:
623239313Sdim    case tok::html_quoted_string:
624239313Sdim    case tok::html_greater:
625239313Sdim    case tok::html_slash_greater:
626239313Sdim      llvm_unreachable("should not see this token");
627239313Sdim    }
628239313Sdim    break;
629239313Sdim  }
630239313Sdim
631239313Sdim  return S.actOnParagraphComment(S.copyArray(llvm::makeArrayRef(Content)));
632239313Sdim}
633239313Sdim
634239313SdimVerbatimBlockComment *Parser::parseVerbatimBlock() {
635239313Sdim  assert(Tok.is(tok::verbatim_block_begin));
636239313Sdim
637239313Sdim  VerbatimBlockComment *VB =
638239313Sdim      S.actOnVerbatimBlockStart(Tok.getLocation(),
639243830Sdim                                Tok.getVerbatimBlockID());
640239313Sdim  consumeToken();
641239313Sdim
642239313Sdim  // Don't create an empty line if verbatim opening command is followed
643239313Sdim  // by a newline.
644239313Sdim  if (Tok.is(tok::newline))
645239313Sdim    consumeToken();
646239313Sdim
647239313Sdim  SmallVector<VerbatimBlockLineComment *, 8> Lines;
648239313Sdim  while (Tok.is(tok::verbatim_block_line) ||
649239313Sdim         Tok.is(tok::newline)) {
650239313Sdim    VerbatimBlockLineComment *Line;
651239313Sdim    if (Tok.is(tok::verbatim_block_line)) {
652239313Sdim      Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
653239313Sdim                                      Tok.getVerbatimBlockText());
654239313Sdim      consumeToken();
655239313Sdim      if (Tok.is(tok::newline)) {
656239313Sdim        consumeToken();
657239313Sdim      }
658239313Sdim    } else {
659239313Sdim      // Empty line, just a tok::newline.
660239313Sdim      Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
661239313Sdim      consumeToken();
662239313Sdim    }
663239313Sdim    Lines.push_back(Line);
664239313Sdim  }
665239313Sdim
666239313Sdim  if (Tok.is(tok::verbatim_block_end)) {
667243830Sdim    const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID());
668239313Sdim    S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
669243830Sdim                               Info->Name,
670239313Sdim                               S.copyArray(llvm::makeArrayRef(Lines)));
671239313Sdim    consumeToken();
672239313Sdim  } else {
673239313Sdim    // Unterminated \\verbatim block
674239313Sdim    S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
675239313Sdim                               S.copyArray(llvm::makeArrayRef(Lines)));
676239313Sdim  }
677239313Sdim
678239313Sdim  return VB;
679239313Sdim}
680239313Sdim
681239313SdimVerbatimLineComment *Parser::parseVerbatimLine() {
682239313Sdim  assert(Tok.is(tok::verbatim_line_name));
683239313Sdim
684239313Sdim  Token NameTok = Tok;
685239313Sdim  consumeToken();
686239313Sdim
687239313Sdim  SourceLocation TextBegin;
688239313Sdim  StringRef Text;
689239313Sdim  // Next token might not be a tok::verbatim_line_text if verbatim line
690239313Sdim  // starting command comes just before a newline or comment end.
691239313Sdim  if (Tok.is(tok::verbatim_line_text)) {
692239313Sdim    TextBegin = Tok.getLocation();
693239313Sdim    Text = Tok.getVerbatimLineText();
694239313Sdim  } else {
695239313Sdim    TextBegin = NameTok.getEndLocation();
696239313Sdim    Text = "";
697239313Sdim  }
698239313Sdim
699239313Sdim  VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
700243830Sdim                                                NameTok.getVerbatimLineID(),
701239313Sdim                                                TextBegin,
702239313Sdim                                                Text);
703239313Sdim  consumeToken();
704239313Sdim  return VL;
705239313Sdim}
706239313Sdim
707239313SdimBlockContentComment *Parser::parseBlockContent() {
708239313Sdim  switch (Tok.getKind()) {
709239313Sdim  case tok::text:
710243830Sdim  case tok::unknown_command:
711249423Sdim  case tok::backslash_command:
712249423Sdim  case tok::at_command:
713239313Sdim  case tok::html_start_tag:
714239313Sdim  case tok::html_end_tag:
715239313Sdim    return parseParagraphOrBlockCommand();
716239313Sdim
717239313Sdim  case tok::verbatim_block_begin:
718239313Sdim    return parseVerbatimBlock();
719239313Sdim
720239313Sdim  case tok::verbatim_line_name:
721239313Sdim    return parseVerbatimLine();
722239313Sdim
723239313Sdim  case tok::eof:
724239313Sdim  case tok::newline:
725239313Sdim  case tok::verbatim_block_line:
726239313Sdim  case tok::verbatim_block_end:
727239313Sdim  case tok::verbatim_line_text:
728239313Sdim  case tok::html_ident:
729239313Sdim  case tok::html_equals:
730239313Sdim  case tok::html_quoted_string:
731239313Sdim  case tok::html_greater:
732239313Sdim  case tok::html_slash_greater:
733239313Sdim    llvm_unreachable("should not see this token");
734239313Sdim  }
735239313Sdim  llvm_unreachable("bogus token kind");
736239313Sdim}
737239313Sdim
738239313SdimFullComment *Parser::parseFullComment() {
739239313Sdim  // Skip newlines at the beginning of the comment.
740239313Sdim  while (Tok.is(tok::newline))
741239313Sdim    consumeToken();
742239313Sdim
743239313Sdim  SmallVector<BlockContentComment *, 8> Blocks;
744239313Sdim  while (Tok.isNot(tok::eof)) {
745239313Sdim    Blocks.push_back(parseBlockContent());
746239313Sdim
747239313Sdim    // Skip extra newlines after paragraph end.
748239313Sdim    while (Tok.is(tok::newline))
749239313Sdim      consumeToken();
750239313Sdim  }
751239313Sdim  return S.actOnFullComment(S.copyArray(llvm::makeArrayRef(Blocks)));
752239313Sdim}
753239313Sdim
754239313Sdim} // end namespace comments
755239313Sdim} // end namespace clang
756