1//===--- YAMLParser.h - Simple YAML parser --------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This is a YAML 1.2 parser. 11// 12// See http://www.yaml.org/spec/1.2/spec.html for the full standard. 13// 14// This currently does not implement the following: 15// * Multi-line literal folding. 16// * Tag resolution. 17// * UTF-16. 18// * BOMs anywhere other than the first Unicode scalar value in the file. 19// 20// The most important class here is Stream. This represents a YAML stream with 21// 0, 1, or many documents. 22// 23// SourceMgr sm; 24// StringRef input = getInput(); 25// yaml::Stream stream(input, sm); 26// 27// for (yaml::document_iterator di = stream.begin(), de = stream.end(); 28// di != de; ++di) { 29// yaml::Node *n = di->getRoot(); 30// if (n) { 31// // Do something with n... 32// } else 33// break; 34// } 35// 36//===----------------------------------------------------------------------===// 37 38#ifndef LLVM_SUPPORT_YAMLPARSER_H 39#define LLVM_SUPPORT_YAMLPARSER_H 40 41#include "llvm/ADT/OwningPtr.h" 42#include "llvm/ADT/SmallString.h" 43#include "llvm/ADT/StringRef.h" 44#include "llvm/Support/Allocator.h" 45#include "llvm/Support/SMLoc.h" 46 47#include <map> 48#include <limits> 49#include <utility> 50 51namespace llvm { 52class MemoryBuffer; 53class SourceMgr; 54class raw_ostream; 55class Twine; 56 57namespace yaml { 58 59class document_iterator; 60class Document; 61class Node; 62class Scanner; 63struct Token; 64 65/// @brief Dump all the tokens in this stream to OS. 66/// @returns true if there was an error, false otherwise. 67bool dumpTokens(StringRef Input, raw_ostream &); 68 69/// @brief Scans all tokens in input without outputting anything. This is used 70/// for benchmarking the tokenizer. 71/// @returns true if there was an error, false otherwise. 72bool scanTokens(StringRef Input); 73 74/// @brief Escape \a Input for a double quoted scalar. 75std::string escape(StringRef Input); 76 77/// @brief This class represents a YAML stream potentially containing multiple 78/// documents. 79class Stream { 80public: 81 /// @brief This keeps a reference to the string referenced by \p Input. 82 Stream(StringRef Input, SourceMgr &); 83 84 /// @brief This takes ownership of \p InputBuffer. 85 Stream(MemoryBuffer *InputBuffer, SourceMgr &); 86 ~Stream(); 87 88 document_iterator begin(); 89 document_iterator end(); 90 void skip(); 91 bool failed(); 92 bool validate() { 93 skip(); 94 return !failed(); 95 } 96 97 void printError(Node *N, const Twine &Msg); 98 99private: 100 OwningPtr<Scanner> scanner; 101 OwningPtr<Document> CurrentDoc; 102 103 friend class Document; 104}; 105 106/// @brief Abstract base class for all Nodes. 107class Node { 108 virtual void anchor(); 109public: 110 enum NodeKind { 111 NK_Null, 112 NK_Scalar, 113 NK_KeyValue, 114 NK_Mapping, 115 NK_Sequence, 116 NK_Alias 117 }; 118 119 Node(unsigned int Type, OwningPtr<Document> &, StringRef Anchor, 120 StringRef Tag); 121 122 /// @brief Get the value of the anchor attached to this node. If it does not 123 /// have one, getAnchor().size() will be 0. 124 StringRef getAnchor() const { return Anchor; } 125 126 /// \brief Get the tag as it was written in the document. This does not 127 /// perform tag resolution. 128 StringRef getRawTag() const { return Tag; } 129 130 /// \brief Get the verbatium tag for a given Node. This performs tag resoluton 131 /// and substitution. 132 std::string getVerbatimTag() const; 133 134 SMRange getSourceRange() const { return SourceRange; } 135 void setSourceRange(SMRange SR) { SourceRange = SR; } 136 137 // These functions forward to Document and Scanner. 138 Token &peekNext(); 139 Token getNext(); 140 Node *parseBlockNode(); 141 BumpPtrAllocator &getAllocator(); 142 void setError(const Twine &Message, Token &Location) const; 143 bool failed() const; 144 145 virtual void skip() {} 146 147 unsigned int getType() const { return TypeID; } 148 149 void *operator new ( size_t Size 150 , BumpPtrAllocator &Alloc 151 , size_t Alignment = 16) throw() { 152 return Alloc.Allocate(Size, Alignment); 153 } 154 155 void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t) throw() { 156 Alloc.Deallocate(Ptr); 157 } 158 159protected: 160 OwningPtr<Document> &Doc; 161 SMRange SourceRange; 162 163 void operator delete(void *) throw() {} 164 165 virtual ~Node() {} 166 167private: 168 unsigned int TypeID; 169 StringRef Anchor; 170 /// \brief The tag as typed in the document. 171 StringRef Tag; 172}; 173 174/// @brief A null value. 175/// 176/// Example: 177/// !!null null 178class NullNode : public Node { 179 virtual void anchor(); 180public: 181 NullNode(OwningPtr<Document> &D) 182 : Node(NK_Null, D, StringRef(), StringRef()) {} 183 184 static inline bool classof(const Node *N) { 185 return N->getType() == NK_Null; 186 } 187}; 188 189/// @brief A scalar node is an opaque datum that can be presented as a 190/// series of zero or more Unicode scalar values. 191/// 192/// Example: 193/// Adena 194class ScalarNode : public Node { 195 virtual void anchor(); 196public: 197 ScalarNode(OwningPtr<Document> &D, StringRef Anchor, StringRef Tag, 198 StringRef Val) 199 : Node(NK_Scalar, D, Anchor, Tag), Value(Val) { 200 SMLoc Start = SMLoc::getFromPointer(Val.begin()); 201 SMLoc End = SMLoc::getFromPointer(Val.end()); 202 SourceRange = SMRange(Start, End); 203 } 204 205 // Return Value without any escaping or folding or other fun YAML stuff. This 206 // is the exact bytes that are contained in the file (after conversion to 207 // utf8). 208 StringRef getRawValue() const { return Value; } 209 210 /// @brief Gets the value of this node as a StringRef. 211 /// 212 /// @param Storage is used to store the content of the returned StringRef iff 213 /// it requires any modification from how it appeared in the source. 214 /// This happens with escaped characters and multi-line literals. 215 StringRef getValue(SmallVectorImpl<char> &Storage) const; 216 217 static inline bool classof(const Node *N) { 218 return N->getType() == NK_Scalar; 219 } 220 221private: 222 StringRef Value; 223 224 StringRef unescapeDoubleQuoted( StringRef UnquotedValue 225 , StringRef::size_type Start 226 , SmallVectorImpl<char> &Storage) const; 227}; 228 229/// @brief A key and value pair. While not technically a Node under the YAML 230/// representation graph, it is easier to treat them this way. 231/// 232/// TODO: Consider making this not a child of Node. 233/// 234/// Example: 235/// Section: .text 236class KeyValueNode : public Node { 237 virtual void anchor(); 238public: 239 KeyValueNode(OwningPtr<Document> &D) 240 : Node(NK_KeyValue, D, StringRef(), StringRef()) 241 , Key(0) 242 , Value(0) 243 {} 244 245 /// @brief Parse and return the key. 246 /// 247 /// This may be called multiple times. 248 /// 249 /// @returns The key, or nullptr if failed() == true. 250 Node *getKey(); 251 252 /// @brief Parse and return the value. 253 /// 254 /// This may be called multiple times. 255 /// 256 /// @returns The value, or nullptr if failed() == true. 257 Node *getValue(); 258 259 virtual void skip() LLVM_OVERRIDE { 260 getKey()->skip(); 261 getValue()->skip(); 262 } 263 264 static inline bool classof(const Node *N) { 265 return N->getType() == NK_KeyValue; 266 } 267 268private: 269 Node *Key; 270 Node *Value; 271}; 272 273/// @brief This is an iterator abstraction over YAML collections shared by both 274/// sequences and maps. 275/// 276/// BaseT must have a ValueT* member named CurrentEntry and a member function 277/// increment() which must set CurrentEntry to 0 to create an end iterator. 278template <class BaseT, class ValueT> 279class basic_collection_iterator 280 : public std::iterator<std::forward_iterator_tag, ValueT> { 281public: 282 basic_collection_iterator() : Base(0) {} 283 basic_collection_iterator(BaseT *B) : Base(B) {} 284 285 ValueT *operator ->() const { 286 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 287 return Base->CurrentEntry; 288 } 289 290 ValueT &operator *() const { 291 assert(Base && Base->CurrentEntry && 292 "Attempted to dereference end iterator!"); 293 return *Base->CurrentEntry; 294 } 295 296 operator ValueT*() const { 297 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 298 return Base->CurrentEntry; 299 } 300 301 bool operator !=(const basic_collection_iterator &Other) const { 302 if(Base != Other.Base) 303 return true; 304 return (Base && Other.Base) && Base->CurrentEntry 305 != Other.Base->CurrentEntry; 306 } 307 308 basic_collection_iterator &operator++() { 309 assert(Base && "Attempted to advance iterator past end!"); 310 Base->increment(); 311 // Create an end iterator. 312 if (Base->CurrentEntry == 0) 313 Base = 0; 314 return *this; 315 } 316 317private: 318 BaseT *Base; 319}; 320 321// The following two templates are used for both MappingNode and Sequence Node. 322template <class CollectionType> 323typename CollectionType::iterator begin(CollectionType &C) { 324 assert(C.IsAtBeginning && "You may only iterate over a collection once!"); 325 C.IsAtBeginning = false; 326 typename CollectionType::iterator ret(&C); 327 ++ret; 328 return ret; 329} 330 331template <class CollectionType> 332void skip(CollectionType &C) { 333 // TODO: support skipping from the middle of a parsed collection ;/ 334 assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!"); 335 if (C.IsAtBeginning) 336 for (typename CollectionType::iterator i = begin(C), e = C.end(); 337 i != e; ++i) 338 i->skip(); 339} 340 341/// @brief Represents a YAML map created from either a block map for a flow map. 342/// 343/// This parses the YAML stream as increment() is called. 344/// 345/// Example: 346/// Name: _main 347/// Scope: Global 348class MappingNode : public Node { 349 virtual void anchor(); 350public: 351 enum MappingType { 352 MT_Block, 353 MT_Flow, 354 MT_Inline ///< An inline mapping node is used for "[key: value]". 355 }; 356 357 MappingNode(OwningPtr<Document> &D, StringRef Anchor, StringRef Tag, 358 MappingType MT) 359 : Node(NK_Mapping, D, Anchor, Tag), Type(MT), IsAtBeginning(true), 360 IsAtEnd(false), CurrentEntry(0) {} 361 362 friend class basic_collection_iterator<MappingNode, KeyValueNode>; 363 typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator; 364 template <class T> friend typename T::iterator yaml::begin(T &); 365 template <class T> friend void yaml::skip(T &); 366 367 iterator begin() { 368 return yaml::begin(*this); 369 } 370 371 iterator end() { return iterator(); } 372 373 virtual void skip() LLVM_OVERRIDE { 374 yaml::skip(*this); 375 } 376 377 static inline bool classof(const Node *N) { 378 return N->getType() == NK_Mapping; 379 } 380 381private: 382 MappingType Type; 383 bool IsAtBeginning; 384 bool IsAtEnd; 385 KeyValueNode *CurrentEntry; 386 387 void increment(); 388}; 389 390/// @brief Represents a YAML sequence created from either a block sequence for a 391/// flow sequence. 392/// 393/// This parses the YAML stream as increment() is called. 394/// 395/// Example: 396/// - Hello 397/// - World 398class SequenceNode : public Node { 399 virtual void anchor(); 400public: 401 enum SequenceType { 402 ST_Block, 403 ST_Flow, 404 // Use for: 405 // 406 // key: 407 // - val1 408 // - val2 409 // 410 // As a BlockMappingEntry and BlockEnd are not created in this case. 411 ST_Indentless 412 }; 413 414 SequenceNode(OwningPtr<Document> &D, StringRef Anchor, StringRef Tag, 415 SequenceType ST) 416 : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST), IsAtBeginning(true), 417 IsAtEnd(false), 418 WasPreviousTokenFlowEntry(true), // Start with an imaginary ','. 419 CurrentEntry(0) {} 420 421 friend class basic_collection_iterator<SequenceNode, Node>; 422 typedef basic_collection_iterator<SequenceNode, Node> iterator; 423 template <class T> friend typename T::iterator yaml::begin(T &); 424 template <class T> friend void yaml::skip(T &); 425 426 void increment(); 427 428 iterator begin() { 429 return yaml::begin(*this); 430 } 431 432 iterator end() { return iterator(); } 433 434 virtual void skip() LLVM_OVERRIDE { 435 yaml::skip(*this); 436 } 437 438 static inline bool classof(const Node *N) { 439 return N->getType() == NK_Sequence; 440 } 441 442private: 443 SequenceType SeqType; 444 bool IsAtBeginning; 445 bool IsAtEnd; 446 bool WasPreviousTokenFlowEntry; 447 Node *CurrentEntry; 448}; 449 450/// @brief Represents an alias to a Node with an anchor. 451/// 452/// Example: 453/// *AnchorName 454class AliasNode : public Node { 455 virtual void anchor(); 456public: 457 AliasNode(OwningPtr<Document> &D, StringRef Val) 458 : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {} 459 460 StringRef getName() const { return Name; } 461 Node *getTarget(); 462 463 static inline bool classof(const Node *N) { 464 return N->getType() == NK_Alias; 465 } 466 467private: 468 StringRef Name; 469}; 470 471/// @brief A YAML Stream is a sequence of Documents. A document contains a root 472/// node. 473class Document { 474public: 475 /// @brief Root for parsing a node. Returns a single node. 476 Node *parseBlockNode(); 477 478 Document(Stream &ParentStream); 479 480 /// @brief Finish parsing the current document and return true if there are 481 /// more. Return false otherwise. 482 bool skip(); 483 484 /// @brief Parse and return the root level node. 485 Node *getRoot() { 486 if (Root) 487 return Root; 488 return Root = parseBlockNode(); 489 } 490 491 const std::map<StringRef, StringRef> &getTagMap() const { 492 return TagMap; 493 } 494 495private: 496 friend class Node; 497 friend class document_iterator; 498 499 /// @brief Stream to read tokens from. 500 Stream &stream; 501 502 /// @brief Used to allocate nodes to. All are destroyed without calling their 503 /// destructor when the document is destroyed. 504 BumpPtrAllocator NodeAllocator; 505 506 /// @brief The root node. Used to support skipping a partially parsed 507 /// document. 508 Node *Root; 509 510 /// \brief Maps tag prefixes to their expansion. 511 std::map<StringRef, StringRef> TagMap; 512 513 Token &peekNext(); 514 Token getNext(); 515 void setError(const Twine &Message, Token &Location) const; 516 bool failed() const; 517 518 /// @brief Parse %BLAH directives and return true if any were encountered. 519 bool parseDirectives(); 520 521 /// \brief Parse %YAML 522 void parseYAMLDirective(); 523 524 /// \brief Parse %TAG 525 void parseTAGDirective(); 526 527 /// @brief Consume the next token and error if it is not \a TK. 528 bool expectToken(int TK); 529}; 530 531/// @brief Iterator abstraction for Documents over a Stream. 532class document_iterator { 533public: 534 document_iterator() : Doc(0) {} 535 document_iterator(OwningPtr<Document> &D) : Doc(&D) {} 536 537 bool operator ==(const document_iterator &Other) { 538 if (isAtEnd() || Other.isAtEnd()) 539 return isAtEnd() && Other.isAtEnd(); 540 541 return Doc == Other.Doc; 542 } 543 bool operator !=(const document_iterator &Other) { 544 return !(*this == Other); 545 } 546 547 document_iterator operator ++() { 548 assert(Doc != 0 && "incrementing iterator past the end."); 549 if (!(*Doc)->skip()) { 550 Doc->reset(0); 551 } else { 552 Stream &S = (*Doc)->stream; 553 Doc->reset(new Document(S)); 554 } 555 return *this; 556 } 557 558 Document &operator *() { 559 return *Doc->get(); 560 } 561 562 OwningPtr<Document> &operator ->() { 563 return *Doc; 564 } 565 566private: 567 bool isAtEnd() const { 568 return !Doc || !*Doc; 569 } 570 571 OwningPtr<Document> *Doc; 572}; 573 574} 575} 576 577#endif 578