1239313Sdim//===--- CommentParser.cpp - Doxygen comment parser -----------------------===// 2239313Sdim// 3239313Sdim// The LLVM Compiler Infrastructure 4239313Sdim// 5239313Sdim// This file is distributed under the University of Illinois Open Source 6239313Sdim// License. See LICENSE.TXT for details. 7239313Sdim// 8239313Sdim//===----------------------------------------------------------------------===// 9239313Sdim 10239313Sdim#include "clang/AST/CommentParser.h" 11249423Sdim#include "clang/AST/CommentCommandTraits.h" 12249423Sdim#include "clang/AST/CommentDiagnostic.h" 13239313Sdim#include "clang/AST/CommentSema.h" 14249423Sdim#include "clang/Basic/CharInfo.h" 15239313Sdim#include "clang/Basic/SourceManager.h" 16239313Sdim#include "llvm/Support/ErrorHandling.h" 17239313Sdim 18239313Sdimnamespace clang { 19239313Sdimnamespace comments { 20239313Sdim 21239313Sdim/// Re-lexes a sequence of tok::text tokens. 22239313Sdimclass TextTokenRetokenizer { 23239313Sdim llvm::BumpPtrAllocator &Allocator; 24239313Sdim Parser &P; 25239313Sdim 26239313Sdim /// This flag is set when there are no more tokens we can fetch from lexer. 27239313Sdim bool NoMoreInterestingTokens; 28239313Sdim 29239313Sdim /// Token buffer: tokens we have processed and lookahead. 30239313Sdim SmallVector<Token, 16> Toks; 31239313Sdim 32239313Sdim /// A position in \c Toks. 33239313Sdim struct Position { 34239313Sdim unsigned CurToken; 35239313Sdim const char *BufferStart; 36239313Sdim const char *BufferEnd; 37239313Sdim const char *BufferPtr; 38239313Sdim SourceLocation BufferStartLoc; 39239313Sdim }; 40239313Sdim 41239313Sdim /// Current position in Toks. 42239313Sdim Position Pos; 43239313Sdim 44239313Sdim bool isEnd() const { 45239313Sdim return Pos.CurToken >= Toks.size(); 46239313Sdim } 47239313Sdim 48239313Sdim /// Sets up the buffer pointers to point to current token. 49239313Sdim void setupBuffer() { 50239313Sdim assert(!isEnd()); 51239313Sdim const Token &Tok = Toks[Pos.CurToken]; 52239313Sdim 53239313Sdim Pos.BufferStart = Tok.getText().begin(); 54239313Sdim Pos.BufferEnd = Tok.getText().end(); 55239313Sdim Pos.BufferPtr = Pos.BufferStart; 56239313Sdim Pos.BufferStartLoc = Tok.getLocation(); 57239313Sdim } 58239313Sdim 59239313Sdim SourceLocation getSourceLocation() const { 60239313Sdim const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart; 61239313Sdim return Pos.BufferStartLoc.getLocWithOffset(CharNo); 62239313Sdim } 63239313Sdim 64239313Sdim char peek() const { 65239313Sdim assert(!isEnd()); 66239313Sdim assert(Pos.BufferPtr != Pos.BufferEnd); 67239313Sdim return *Pos.BufferPtr; 68239313Sdim } 69239313Sdim 70239313Sdim void consumeChar() { 71239313Sdim assert(!isEnd()); 72239313Sdim assert(Pos.BufferPtr != Pos.BufferEnd); 73239313Sdim Pos.BufferPtr++; 74239313Sdim if (Pos.BufferPtr == Pos.BufferEnd) { 75239313Sdim Pos.CurToken++; 76239313Sdim if (isEnd() && !addToken()) 77239313Sdim return; 78239313Sdim 79239313Sdim assert(!isEnd()); 80239313Sdim setupBuffer(); 81239313Sdim } 82239313Sdim } 83239313Sdim 84239313Sdim /// Add a token. 85239313Sdim /// Returns true on success, false if there are no interesting tokens to 86239313Sdim /// fetch from lexer. 87239313Sdim bool addToken() { 88239313Sdim if (NoMoreInterestingTokens) 89239313Sdim return false; 90239313Sdim 91239313Sdim if (P.Tok.is(tok::newline)) { 92239313Sdim // If we see a single newline token between text tokens, skip it. 93239313Sdim Token Newline = P.Tok; 94239313Sdim P.consumeToken(); 95239313Sdim if (P.Tok.isNot(tok::text)) { 96239313Sdim P.putBack(Newline); 97239313Sdim NoMoreInterestingTokens = true; 98239313Sdim return false; 99239313Sdim } 100239313Sdim } 101239313Sdim if (P.Tok.isNot(tok::text)) { 102239313Sdim NoMoreInterestingTokens = true; 103239313Sdim return false; 104239313Sdim } 105239313Sdim 106239313Sdim Toks.push_back(P.Tok); 107239313Sdim P.consumeToken(); 108239313Sdim if (Toks.size() == 1) 109239313Sdim setupBuffer(); 110239313Sdim return true; 111239313Sdim } 112239313Sdim 113239313Sdim void consumeWhitespace() { 114239313Sdim while (!isEnd()) { 115239313Sdim if (isWhitespace(peek())) 116239313Sdim consumeChar(); 117239313Sdim else 118239313Sdim break; 119239313Sdim } 120239313Sdim } 121239313Sdim 122239313Sdim void formTokenWithChars(Token &Result, 123239313Sdim SourceLocation Loc, 124239313Sdim const char *TokBegin, 125239313Sdim unsigned TokLength, 126239313Sdim StringRef Text) { 127239313Sdim Result.setLocation(Loc); 128239313Sdim Result.setKind(tok::text); 129239313Sdim Result.setLength(TokLength); 130239313Sdim#ifndef NDEBUG 131243830Sdim Result.TextPtr = "<UNSET>"; 132243830Sdim Result.IntVal = 7; 133239313Sdim#endif 134239313Sdim Result.setText(Text); 135239313Sdim } 136239313Sdim 137239313Sdimpublic: 138239313Sdim TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P): 139239313Sdim Allocator(Allocator), P(P), NoMoreInterestingTokens(false) { 140239313Sdim Pos.CurToken = 0; 141239313Sdim addToken(); 142239313Sdim } 143239313Sdim 144239313Sdim /// Extract a word -- sequence of non-whitespace characters. 145239313Sdim bool lexWord(Token &Tok) { 146239313Sdim if (isEnd()) 147239313Sdim return false; 148239313Sdim 149239313Sdim Position SavedPos = Pos; 150239313Sdim 151239313Sdim consumeWhitespace(); 152239313Sdim SmallString<32> WordText; 153239313Sdim const char *WordBegin = Pos.BufferPtr; 154239313Sdim SourceLocation Loc = getSourceLocation(); 155239313Sdim while (!isEnd()) { 156239313Sdim const char C = peek(); 157239313Sdim if (!isWhitespace(C)) { 158239313Sdim WordText.push_back(C); 159239313Sdim consumeChar(); 160239313Sdim } else 161239313Sdim break; 162239313Sdim } 163239313Sdim const unsigned Length = WordText.size(); 164239313Sdim if (Length == 0) { 165239313Sdim Pos = SavedPos; 166239313Sdim return false; 167239313Sdim } 168239313Sdim 169239313Sdim char *TextPtr = Allocator.Allocate<char>(Length + 1); 170239313Sdim 171239313Sdim memcpy(TextPtr, WordText.c_str(), Length + 1); 172239313Sdim StringRef Text = StringRef(TextPtr, Length); 173239313Sdim 174249423Sdim formTokenWithChars(Tok, Loc, WordBegin, Length, Text); 175239313Sdim return true; 176239313Sdim } 177239313Sdim 178239313Sdim bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) { 179239313Sdim if (isEnd()) 180239313Sdim return false; 181239313Sdim 182239313Sdim Position SavedPos = Pos; 183239313Sdim 184239313Sdim consumeWhitespace(); 185239313Sdim SmallString<32> WordText; 186239313Sdim const char *WordBegin = Pos.BufferPtr; 187239313Sdim SourceLocation Loc = getSourceLocation(); 188239313Sdim bool Error = false; 189239313Sdim if (!isEnd()) { 190239313Sdim const char C = peek(); 191239313Sdim if (C == OpenDelim) { 192239313Sdim WordText.push_back(C); 193239313Sdim consumeChar(); 194239313Sdim } else 195239313Sdim Error = true; 196239313Sdim } 197239313Sdim char C = '\0'; 198239313Sdim while (!Error && !isEnd()) { 199239313Sdim C = peek(); 200239313Sdim WordText.push_back(C); 201239313Sdim consumeChar(); 202239313Sdim if (C == CloseDelim) 203239313Sdim break; 204239313Sdim } 205239313Sdim if (!Error && C != CloseDelim) 206239313Sdim Error = true; 207239313Sdim 208239313Sdim if (Error) { 209239313Sdim Pos = SavedPos; 210239313Sdim return false; 211239313Sdim } 212239313Sdim 213239313Sdim const unsigned Length = WordText.size(); 214239313Sdim char *TextPtr = Allocator.Allocate<char>(Length + 1); 215239313Sdim 216239313Sdim memcpy(TextPtr, WordText.c_str(), Length + 1); 217239313Sdim StringRef Text = StringRef(TextPtr, Length); 218239313Sdim 219239313Sdim formTokenWithChars(Tok, Loc, WordBegin, 220239313Sdim Pos.BufferPtr - WordBegin, Text); 221239313Sdim return true; 222239313Sdim } 223239313Sdim 224239313Sdim /// Put back tokens that we didn't consume. 225239313Sdim void putBackLeftoverTokens() { 226239313Sdim if (isEnd()) 227239313Sdim return; 228239313Sdim 229239313Sdim bool HavePartialTok = false; 230239313Sdim Token PartialTok; 231239313Sdim if (Pos.BufferPtr != Pos.BufferStart) { 232239313Sdim formTokenWithChars(PartialTok, getSourceLocation(), 233239313Sdim Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr, 234239313Sdim StringRef(Pos.BufferPtr, 235239313Sdim Pos.BufferEnd - Pos.BufferPtr)); 236239313Sdim HavePartialTok = true; 237239313Sdim Pos.CurToken++; 238239313Sdim } 239239313Sdim 240239313Sdim P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end())); 241239313Sdim Pos.CurToken = Toks.size(); 242239313Sdim 243239313Sdim if (HavePartialTok) 244239313Sdim P.putBack(PartialTok); 245239313Sdim } 246239313Sdim}; 247239313Sdim 248239313SdimParser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator, 249239313Sdim const SourceManager &SourceMgr, DiagnosticsEngine &Diags, 250239313Sdim const CommandTraits &Traits): 251239313Sdim L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags), 252239313Sdim Traits(Traits) { 253239313Sdim consumeToken(); 254239313Sdim} 255239313Sdim 256239313Sdimvoid Parser::parseParamCommandArgs(ParamCommandComment *PC, 257239313Sdim TextTokenRetokenizer &Retokenizer) { 258239313Sdim Token Arg; 259239313Sdim // Check if argument looks like direction specification: [dir] 260239313Sdim // e.g., [in], [out], [in,out] 261239313Sdim if (Retokenizer.lexDelimitedSeq(Arg, '[', ']')) 262239313Sdim S.actOnParamCommandDirectionArg(PC, 263239313Sdim Arg.getLocation(), 264239313Sdim Arg.getEndLocation(), 265239313Sdim Arg.getText()); 266239313Sdim 267239313Sdim if (Retokenizer.lexWord(Arg)) 268239313Sdim S.actOnParamCommandParamNameArg(PC, 269239313Sdim Arg.getLocation(), 270239313Sdim Arg.getEndLocation(), 271239313Sdim Arg.getText()); 272239313Sdim} 273239313Sdim 274239313Sdimvoid Parser::parseTParamCommandArgs(TParamCommandComment *TPC, 275239313Sdim TextTokenRetokenizer &Retokenizer) { 276239313Sdim Token Arg; 277239313Sdim if (Retokenizer.lexWord(Arg)) 278239313Sdim S.actOnTParamCommandParamNameArg(TPC, 279239313Sdim Arg.getLocation(), 280239313Sdim Arg.getEndLocation(), 281239313Sdim Arg.getText()); 282239313Sdim} 283239313Sdim 284239313Sdimvoid Parser::parseBlockCommandArgs(BlockCommandComment *BC, 285239313Sdim TextTokenRetokenizer &Retokenizer, 286239313Sdim unsigned NumArgs) { 287239313Sdim typedef BlockCommandComment::Argument Argument; 288239313Sdim Argument *Args = 289239313Sdim new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs]; 290239313Sdim unsigned ParsedArgs = 0; 291239313Sdim Token Arg; 292239313Sdim while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) { 293239313Sdim Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(), 294239313Sdim Arg.getEndLocation()), 295239313Sdim Arg.getText()); 296239313Sdim ParsedArgs++; 297239313Sdim } 298239313Sdim 299239313Sdim S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs)); 300239313Sdim} 301239313Sdim 302239313SdimBlockCommandComment *Parser::parseBlockCommand() { 303249423Sdim assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command)); 304239313Sdim 305251662Sdim ParamCommandComment *PC = 0; 306251662Sdim TParamCommandComment *TPC = 0; 307251662Sdim BlockCommandComment *BC = 0; 308243830Sdim const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); 309249423Sdim CommandMarkerKind CommandMarker = 310249423Sdim Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At; 311243830Sdim if (Info->IsParamCommand) { 312239313Sdim PC = S.actOnParamCommandStart(Tok.getLocation(), 313239313Sdim Tok.getEndLocation(), 314249423Sdim Tok.getCommandID(), 315249423Sdim CommandMarker); 316249423Sdim } else if (Info->IsTParamCommand) { 317239313Sdim TPC = S.actOnTParamCommandStart(Tok.getLocation(), 318239313Sdim Tok.getEndLocation(), 319249423Sdim Tok.getCommandID(), 320249423Sdim CommandMarker); 321239313Sdim } else { 322239313Sdim BC = S.actOnBlockCommandStart(Tok.getLocation(), 323239313Sdim Tok.getEndLocation(), 324249423Sdim Tok.getCommandID(), 325249423Sdim CommandMarker); 326239313Sdim } 327239313Sdim consumeToken(); 328239313Sdim 329249423Sdim if (isTokBlockCommand()) { 330239313Sdim // Block command ahead. We can't nest block commands, so pretend that this 331239313Sdim // command has an empty argument. 332251662Sdim ParagraphComment *Paragraph = S.actOnParagraphComment(None); 333251662Sdim if (PC) { 334239313Sdim S.actOnParamCommandFinish(PC, Paragraph); 335239313Sdim return PC; 336251662Sdim } else if (TPC) { 337239313Sdim S.actOnTParamCommandFinish(TPC, Paragraph); 338239313Sdim return TPC; 339239313Sdim } else { 340239313Sdim S.actOnBlockCommandFinish(BC, Paragraph); 341239313Sdim return BC; 342239313Sdim } 343239313Sdim } 344239313Sdim 345251662Sdim if (PC || TPC || Info->NumArgs > 0) { 346239313Sdim // In order to parse command arguments we need to retokenize a few 347239313Sdim // following text tokens. 348239313Sdim TextTokenRetokenizer Retokenizer(Allocator, *this); 349239313Sdim 350251662Sdim if (PC) 351239313Sdim parseParamCommandArgs(PC, Retokenizer); 352251662Sdim else if (TPC) 353239313Sdim parseTParamCommandArgs(TPC, Retokenizer); 354239313Sdim else 355243830Sdim parseBlockCommandArgs(BC, Retokenizer, Info->NumArgs); 356239313Sdim 357239313Sdim Retokenizer.putBackLeftoverTokens(); 358239313Sdim } 359239313Sdim 360249423Sdim // If there's a block command ahead, we will attach an empty paragraph to 361249423Sdim // this command. 362249423Sdim bool EmptyParagraph = false; 363249423Sdim if (isTokBlockCommand()) 364249423Sdim EmptyParagraph = true; 365249423Sdim else if (Tok.is(tok::newline)) { 366249423Sdim Token PrevTok = Tok; 367249423Sdim consumeToken(); 368249423Sdim EmptyParagraph = isTokBlockCommand(); 369249423Sdim putBack(PrevTok); 370249423Sdim } 371249423Sdim 372249423Sdim ParagraphComment *Paragraph; 373249423Sdim if (EmptyParagraph) 374251662Sdim Paragraph = S.actOnParagraphComment(None); 375249423Sdim else { 376249423Sdim BlockContentComment *Block = parseParagraphOrBlockCommand(); 377249423Sdim // Since we have checked for a block command, we should have parsed a 378249423Sdim // paragraph. 379249423Sdim Paragraph = cast<ParagraphComment>(Block); 380249423Sdim } 381249423Sdim 382251662Sdim if (PC) { 383239313Sdim S.actOnParamCommandFinish(PC, Paragraph); 384239313Sdim return PC; 385251662Sdim } else if (TPC) { 386239313Sdim S.actOnTParamCommandFinish(TPC, Paragraph); 387239313Sdim return TPC; 388239313Sdim } else { 389239313Sdim S.actOnBlockCommandFinish(BC, Paragraph); 390239313Sdim return BC; 391239313Sdim } 392239313Sdim} 393239313Sdim 394239313SdimInlineCommandComment *Parser::parseInlineCommand() { 395249423Sdim assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command)); 396239313Sdim 397239313Sdim const Token CommandTok = Tok; 398239313Sdim consumeToken(); 399239313Sdim 400239313Sdim TextTokenRetokenizer Retokenizer(Allocator, *this); 401239313Sdim 402239313Sdim Token ArgTok; 403239313Sdim bool ArgTokValid = Retokenizer.lexWord(ArgTok); 404239313Sdim 405239313Sdim InlineCommandComment *IC; 406239313Sdim if (ArgTokValid) { 407239313Sdim IC = S.actOnInlineCommand(CommandTok.getLocation(), 408239313Sdim CommandTok.getEndLocation(), 409243830Sdim CommandTok.getCommandID(), 410239313Sdim ArgTok.getLocation(), 411239313Sdim ArgTok.getEndLocation(), 412239313Sdim ArgTok.getText()); 413239313Sdim } else { 414239313Sdim IC = S.actOnInlineCommand(CommandTok.getLocation(), 415239313Sdim CommandTok.getEndLocation(), 416243830Sdim CommandTok.getCommandID()); 417239313Sdim } 418239313Sdim 419239313Sdim Retokenizer.putBackLeftoverTokens(); 420239313Sdim 421239313Sdim return IC; 422239313Sdim} 423239313Sdim 424239313SdimHTMLStartTagComment *Parser::parseHTMLStartTag() { 425239313Sdim assert(Tok.is(tok::html_start_tag)); 426239313Sdim HTMLStartTagComment *HST = 427239313Sdim S.actOnHTMLStartTagStart(Tok.getLocation(), 428239313Sdim Tok.getHTMLTagStartName()); 429239313Sdim consumeToken(); 430239313Sdim 431239313Sdim SmallVector<HTMLStartTagComment::Attribute, 2> Attrs; 432239313Sdim while (true) { 433239313Sdim switch (Tok.getKind()) { 434239313Sdim case tok::html_ident: { 435239313Sdim Token Ident = Tok; 436239313Sdim consumeToken(); 437239313Sdim if (Tok.isNot(tok::html_equals)) { 438239313Sdim Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), 439239313Sdim Ident.getHTMLIdent())); 440239313Sdim continue; 441239313Sdim } 442239313Sdim Token Equals = Tok; 443239313Sdim consumeToken(); 444239313Sdim if (Tok.isNot(tok::html_quoted_string)) { 445239313Sdim Diag(Tok.getLocation(), 446239313Sdim diag::warn_doc_html_start_tag_expected_quoted_string) 447239313Sdim << SourceRange(Equals.getLocation()); 448239313Sdim Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), 449239313Sdim Ident.getHTMLIdent())); 450239313Sdim while (Tok.is(tok::html_equals) || 451239313Sdim Tok.is(tok::html_quoted_string)) 452239313Sdim consumeToken(); 453239313Sdim continue; 454239313Sdim } 455239313Sdim Attrs.push_back(HTMLStartTagComment::Attribute( 456239313Sdim Ident.getLocation(), 457239313Sdim Ident.getHTMLIdent(), 458239313Sdim Equals.getLocation(), 459239313Sdim SourceRange(Tok.getLocation(), 460239313Sdim Tok.getEndLocation()), 461239313Sdim Tok.getHTMLQuotedString())); 462239313Sdim consumeToken(); 463239313Sdim continue; 464239313Sdim } 465239313Sdim 466239313Sdim case tok::html_greater: 467239313Sdim S.actOnHTMLStartTagFinish(HST, 468239313Sdim S.copyArray(llvm::makeArrayRef(Attrs)), 469239313Sdim Tok.getLocation(), 470239313Sdim /* IsSelfClosing = */ false); 471239313Sdim consumeToken(); 472239313Sdim return HST; 473239313Sdim 474239313Sdim case tok::html_slash_greater: 475239313Sdim S.actOnHTMLStartTagFinish(HST, 476239313Sdim S.copyArray(llvm::makeArrayRef(Attrs)), 477239313Sdim Tok.getLocation(), 478239313Sdim /* IsSelfClosing = */ true); 479239313Sdim consumeToken(); 480239313Sdim return HST; 481239313Sdim 482239313Sdim case tok::html_equals: 483239313Sdim case tok::html_quoted_string: 484239313Sdim Diag(Tok.getLocation(), 485239313Sdim diag::warn_doc_html_start_tag_expected_ident_or_greater); 486239313Sdim while (Tok.is(tok::html_equals) || 487239313Sdim Tok.is(tok::html_quoted_string)) 488239313Sdim consumeToken(); 489239313Sdim if (Tok.is(tok::html_ident) || 490239313Sdim Tok.is(tok::html_greater) || 491239313Sdim Tok.is(tok::html_slash_greater)) 492239313Sdim continue; 493239313Sdim 494239313Sdim S.actOnHTMLStartTagFinish(HST, 495239313Sdim S.copyArray(llvm::makeArrayRef(Attrs)), 496239313Sdim SourceLocation(), 497239313Sdim /* IsSelfClosing = */ false); 498239313Sdim return HST; 499239313Sdim 500239313Sdim default: 501239313Sdim // Not a token from an HTML start tag. Thus HTML tag prematurely ended. 502239313Sdim S.actOnHTMLStartTagFinish(HST, 503239313Sdim S.copyArray(llvm::makeArrayRef(Attrs)), 504239313Sdim SourceLocation(), 505239313Sdim /* IsSelfClosing = */ false); 506239313Sdim bool StartLineInvalid; 507239313Sdim const unsigned StartLine = SourceMgr.getPresumedLineNumber( 508239313Sdim HST->getLocation(), 509239313Sdim &StartLineInvalid); 510239313Sdim bool EndLineInvalid; 511239313Sdim const unsigned EndLine = SourceMgr.getPresumedLineNumber( 512239313Sdim Tok.getLocation(), 513239313Sdim &EndLineInvalid); 514239313Sdim if (StartLineInvalid || EndLineInvalid || StartLine == EndLine) 515239313Sdim Diag(Tok.getLocation(), 516239313Sdim diag::warn_doc_html_start_tag_expected_ident_or_greater) 517239313Sdim << HST->getSourceRange(); 518239313Sdim else { 519239313Sdim Diag(Tok.getLocation(), 520239313Sdim diag::warn_doc_html_start_tag_expected_ident_or_greater); 521239313Sdim Diag(HST->getLocation(), diag::note_doc_html_tag_started_here) 522239313Sdim << HST->getSourceRange(); 523239313Sdim } 524239313Sdim return HST; 525239313Sdim } 526239313Sdim } 527239313Sdim} 528239313Sdim 529239313SdimHTMLEndTagComment *Parser::parseHTMLEndTag() { 530239313Sdim assert(Tok.is(tok::html_end_tag)); 531239313Sdim Token TokEndTag = Tok; 532239313Sdim consumeToken(); 533239313Sdim SourceLocation Loc; 534239313Sdim if (Tok.is(tok::html_greater)) { 535239313Sdim Loc = Tok.getLocation(); 536239313Sdim consumeToken(); 537239313Sdim } 538239313Sdim 539239313Sdim return S.actOnHTMLEndTag(TokEndTag.getLocation(), 540239313Sdim Loc, 541239313Sdim TokEndTag.getHTMLTagEndName()); 542239313Sdim} 543239313Sdim 544239313SdimBlockContentComment *Parser::parseParagraphOrBlockCommand() { 545239313Sdim SmallVector<InlineContentComment *, 8> Content; 546239313Sdim 547239313Sdim while (true) { 548239313Sdim switch (Tok.getKind()) { 549239313Sdim case tok::verbatim_block_begin: 550239313Sdim case tok::verbatim_line_name: 551239313Sdim case tok::eof: 552239313Sdim assert(Content.size() != 0); 553239313Sdim break; // Block content or EOF ahead, finish this parapgaph. 554239313Sdim 555243830Sdim case tok::unknown_command: 556243830Sdim Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), 557243830Sdim Tok.getEndLocation(), 558243830Sdim Tok.getUnknownCommandName())); 559243830Sdim consumeToken(); 560243830Sdim continue; 561243830Sdim 562249423Sdim case tok::backslash_command: 563249423Sdim case tok::at_command: { 564243830Sdim const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); 565243830Sdim if (Info->IsBlockCommand) { 566239313Sdim if (Content.size() == 0) 567239313Sdim return parseBlockCommand(); 568239313Sdim break; // Block command ahead, finish this parapgaph. 569239313Sdim } 570243830Sdim if (Info->IsVerbatimBlockEndCommand) { 571243830Sdim Diag(Tok.getLocation(), 572243830Sdim diag::warn_verbatim_block_end_without_start) 573249423Sdim << Tok.is(tok::at_command) 574243830Sdim << Info->Name 575243830Sdim << SourceRange(Tok.getLocation(), Tok.getEndLocation()); 576243830Sdim consumeToken(); 577239313Sdim continue; 578239313Sdim } 579243830Sdim if (Info->IsUnknownCommand) { 580243830Sdim Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), 581243830Sdim Tok.getEndLocation(), 582243830Sdim Info->getID())); 583243830Sdim consumeToken(); 584243830Sdim continue; 585243830Sdim } 586243830Sdim assert(Info->IsInlineCommand); 587243830Sdim Content.push_back(parseInlineCommand()); 588239313Sdim continue; 589243830Sdim } 590239313Sdim 591239313Sdim case tok::newline: { 592239313Sdim consumeToken(); 593239313Sdim if (Tok.is(tok::newline) || Tok.is(tok::eof)) { 594239313Sdim consumeToken(); 595239313Sdim break; // Two newlines -- end of paragraph. 596239313Sdim } 597239313Sdim if (Content.size() > 0) 598239313Sdim Content.back()->addTrailingNewline(); 599239313Sdim continue; 600239313Sdim } 601239313Sdim 602239313Sdim // Don't deal with HTML tag soup now. 603239313Sdim case tok::html_start_tag: 604239313Sdim Content.push_back(parseHTMLStartTag()); 605239313Sdim continue; 606239313Sdim 607239313Sdim case tok::html_end_tag: 608239313Sdim Content.push_back(parseHTMLEndTag()); 609239313Sdim continue; 610239313Sdim 611239313Sdim case tok::text: 612239313Sdim Content.push_back(S.actOnText(Tok.getLocation(), 613239313Sdim Tok.getEndLocation(), 614239313Sdim Tok.getText())); 615239313Sdim consumeToken(); 616239313Sdim continue; 617239313Sdim 618239313Sdim case tok::verbatim_block_line: 619239313Sdim case tok::verbatim_block_end: 620239313Sdim case tok::verbatim_line_text: 621239313Sdim case tok::html_ident: 622239313Sdim case tok::html_equals: 623239313Sdim case tok::html_quoted_string: 624239313Sdim case tok::html_greater: 625239313Sdim case tok::html_slash_greater: 626239313Sdim llvm_unreachable("should not see this token"); 627239313Sdim } 628239313Sdim break; 629239313Sdim } 630239313Sdim 631239313Sdim return S.actOnParagraphComment(S.copyArray(llvm::makeArrayRef(Content))); 632239313Sdim} 633239313Sdim 634239313SdimVerbatimBlockComment *Parser::parseVerbatimBlock() { 635239313Sdim assert(Tok.is(tok::verbatim_block_begin)); 636239313Sdim 637239313Sdim VerbatimBlockComment *VB = 638239313Sdim S.actOnVerbatimBlockStart(Tok.getLocation(), 639243830Sdim Tok.getVerbatimBlockID()); 640239313Sdim consumeToken(); 641239313Sdim 642239313Sdim // Don't create an empty line if verbatim opening command is followed 643239313Sdim // by a newline. 644239313Sdim if (Tok.is(tok::newline)) 645239313Sdim consumeToken(); 646239313Sdim 647239313Sdim SmallVector<VerbatimBlockLineComment *, 8> Lines; 648239313Sdim while (Tok.is(tok::verbatim_block_line) || 649239313Sdim Tok.is(tok::newline)) { 650239313Sdim VerbatimBlockLineComment *Line; 651239313Sdim if (Tok.is(tok::verbatim_block_line)) { 652239313Sdim Line = S.actOnVerbatimBlockLine(Tok.getLocation(), 653239313Sdim Tok.getVerbatimBlockText()); 654239313Sdim consumeToken(); 655239313Sdim if (Tok.is(tok::newline)) { 656239313Sdim consumeToken(); 657239313Sdim } 658239313Sdim } else { 659239313Sdim // Empty line, just a tok::newline. 660239313Sdim Line = S.actOnVerbatimBlockLine(Tok.getLocation(), ""); 661239313Sdim consumeToken(); 662239313Sdim } 663239313Sdim Lines.push_back(Line); 664239313Sdim } 665239313Sdim 666239313Sdim if (Tok.is(tok::verbatim_block_end)) { 667243830Sdim const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID()); 668239313Sdim S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), 669243830Sdim Info->Name, 670239313Sdim S.copyArray(llvm::makeArrayRef(Lines))); 671239313Sdim consumeToken(); 672239313Sdim } else { 673239313Sdim // Unterminated \\verbatim block 674239313Sdim S.actOnVerbatimBlockFinish(VB, SourceLocation(), "", 675239313Sdim S.copyArray(llvm::makeArrayRef(Lines))); 676239313Sdim } 677239313Sdim 678239313Sdim return VB; 679239313Sdim} 680239313Sdim 681239313SdimVerbatimLineComment *Parser::parseVerbatimLine() { 682239313Sdim assert(Tok.is(tok::verbatim_line_name)); 683239313Sdim 684239313Sdim Token NameTok = Tok; 685239313Sdim consumeToken(); 686239313Sdim 687239313Sdim SourceLocation TextBegin; 688239313Sdim StringRef Text; 689239313Sdim // Next token might not be a tok::verbatim_line_text if verbatim line 690239313Sdim // starting command comes just before a newline or comment end. 691239313Sdim if (Tok.is(tok::verbatim_line_text)) { 692239313Sdim TextBegin = Tok.getLocation(); 693239313Sdim Text = Tok.getVerbatimLineText(); 694239313Sdim } else { 695239313Sdim TextBegin = NameTok.getEndLocation(); 696239313Sdim Text = ""; 697239313Sdim } 698239313Sdim 699239313Sdim VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(), 700243830Sdim NameTok.getVerbatimLineID(), 701239313Sdim TextBegin, 702239313Sdim Text); 703239313Sdim consumeToken(); 704239313Sdim return VL; 705239313Sdim} 706239313Sdim 707239313SdimBlockContentComment *Parser::parseBlockContent() { 708239313Sdim switch (Tok.getKind()) { 709239313Sdim case tok::text: 710243830Sdim case tok::unknown_command: 711249423Sdim case tok::backslash_command: 712249423Sdim case tok::at_command: 713239313Sdim case tok::html_start_tag: 714239313Sdim case tok::html_end_tag: 715239313Sdim return parseParagraphOrBlockCommand(); 716239313Sdim 717239313Sdim case tok::verbatim_block_begin: 718239313Sdim return parseVerbatimBlock(); 719239313Sdim 720239313Sdim case tok::verbatim_line_name: 721239313Sdim return parseVerbatimLine(); 722239313Sdim 723239313Sdim case tok::eof: 724239313Sdim case tok::newline: 725239313Sdim case tok::verbatim_block_line: 726239313Sdim case tok::verbatim_block_end: 727239313Sdim case tok::verbatim_line_text: 728239313Sdim case tok::html_ident: 729239313Sdim case tok::html_equals: 730239313Sdim case tok::html_quoted_string: 731239313Sdim case tok::html_greater: 732239313Sdim case tok::html_slash_greater: 733239313Sdim llvm_unreachable("should not see this token"); 734239313Sdim } 735239313Sdim llvm_unreachable("bogus token kind"); 736239313Sdim} 737239313Sdim 738239313SdimFullComment *Parser::parseFullComment() { 739239313Sdim // Skip newlines at the beginning of the comment. 740239313Sdim while (Tok.is(tok::newline)) 741239313Sdim consumeToken(); 742239313Sdim 743239313Sdim SmallVector<BlockContentComment *, 8> Blocks; 744239313Sdim while (Tok.isNot(tok::eof)) { 745239313Sdim Blocks.push_back(parseBlockContent()); 746239313Sdim 747239313Sdim // Skip extra newlines after paragraph end. 748239313Sdim while (Tok.is(tok::newline)) 749239313Sdim consumeToken(); 750239313Sdim } 751239313Sdim return S.actOnFullComment(S.copyArray(llvm::makeArrayRef(Blocks))); 752239313Sdim} 753239313Sdim 754239313Sdim} // end namespace comments 755239313Sdim} // end namespace clang 756