1// Copyright 2012 The Kyua Authors. 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are 6// met: 7// 8// * Redistributions of source code must retain the above copyright 9// notice, this list of conditions and the following disclaimer. 10// * Redistributions in binary form must reproduce the above copyright 11// notice, this list of conditions and the following disclaimer in the 12// documentation and/or other materials provided with the distribution. 13// * Neither the name of Google Inc. nor the names of its contributors 14// may be used to endorse or promote products derived from this software 15// without specific prior written permission. 16// 17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29#include "utils/text/templates.hpp" 30 31#include <algorithm> 32#include <fstream> 33#include <sstream> 34#include <stack> 35 36#include "utils/format/macros.hpp" 37#include "utils/fs/path.hpp" 38#include "utils/noncopyable.hpp" 39#include "utils/sanity.hpp" 40#include "utils/text/exceptions.hpp" 41#include "utils/text/operations.ipp" 42 43namespace text = utils::text; 44 45 46namespace { 47 48 49/// Definition of a template statement. 50/// 51/// A template statement is a particular line in the input file that is 52/// preceeded by a template marker. This class provides a high-level 53/// representation of the contents of such statement and a mechanism to parse 54/// the textual line into this high-level representation. 55class statement_def { 56public: 57 /// Types of the known statements. 58 enum statement_type { 59 /// Alternative clause of a conditional. 60 /// 61 /// Takes no arguments. 62 type_else, 63 64 /// End of conditional marker. 65 /// 66 /// Takes no arguments. 67 type_endif, 68 69 /// End of loop marker. 70 /// 71 /// Takes no arguments. 72 type_endloop, 73 74 /// Beginning of a conditional. 75 /// 76 /// Takes a single argument, which denotes the name of the variable or 77 /// vector to check for existence. This is the only expression 78 /// supported. 79 type_if, 80 81 /// Beginning of a loop over all the elements of a vector. 82 /// 83 /// Takes two arguments: the name of the vector over which to iterate 84 /// and the name of the iterator to later index this vector. 85 type_loop, 86 }; 87 88private: 89 /// Internal data describing the structure of a particular statement type. 90 struct type_descriptor { 91 /// The native type of the statement. 92 statement_type type; 93 94 /// The expected number of arguments. 95 unsigned int n_arguments; 96 97 /// Constructs a new type descriptor. 98 /// 99 /// \param type_ The native type of the statement. 100 /// \param n_arguments_ The expected number of arguments. 101 type_descriptor(const statement_type type_, 102 const unsigned int n_arguments_) 103 : type(type_), n_arguments(n_arguments_) 104 { 105 } 106 }; 107 108 /// Mapping of statement type names to their definitions. 109 typedef std::map< std::string, type_descriptor > types_map; 110 111 /// Description of the different statement types. 112 /// 113 /// This static map is initialized once and reused later for any statement 114 /// lookup. Unfortunately, we cannot perform this initialization in a 115 /// static manner without C++11. 116 static types_map _types; 117 118 /// Generates a new types definition map. 119 /// 120 /// \return A new types definition map, to be assigned to _types. 121 static types_map 122 generate_types_map(void) 123 { 124 // If you change this, please edit the comments in the enum above. 125 types_map types; 126 types.insert(types_map::value_type( 127 "else", type_descriptor(type_else, 0))); 128 types.insert(types_map::value_type( 129 "endif", type_descriptor(type_endif, 0))); 130 types.insert(types_map::value_type( 131 "endloop", type_descriptor(type_endloop, 0))); 132 types.insert(types_map::value_type( 133 "if", type_descriptor(type_if, 1))); 134 types.insert(types_map::value_type( 135 "loop", type_descriptor(type_loop, 2))); 136 return types; 137 } 138 139public: 140 /// The type of the statement. 141 statement_type type; 142 143 /// The arguments to the statement, in textual form. 144 const std::vector< std::string > arguments; 145 146 /// Creates a new statement. 147 /// 148 /// \param type_ The type of the statement. 149 /// \param arguments_ The arguments to the statement. 150 statement_def(const statement_type& type_, 151 const std::vector< std::string >& arguments_) : 152 type(type_), arguments(arguments_) 153 { 154#if !defined(NDEBUG) 155 for (types_map::const_iterator iter = _types.begin(); 156 iter != _types.end(); ++iter) { 157 const type_descriptor& descriptor = (*iter).second; 158 if (descriptor.type == type_) { 159 PRE(descriptor.n_arguments == arguments_.size()); 160 return; 161 } 162 } 163 UNREACHABLE; 164#endif 165 } 166 167 /// Parses a statement. 168 /// 169 /// \param line The textual representation of the statement without any 170 /// prefix. 171 /// 172 /// \return The parsed statement. 173 /// 174 /// \throw text::syntax_error If the statement is not correctly defined. 175 static statement_def 176 parse(const std::string& line) 177 { 178 if (_types.empty()) 179 _types = generate_types_map(); 180 181 const std::vector< std::string > words = text::split(line, ' '); 182 if (words.empty()) 183 throw text::syntax_error("Empty statement"); 184 185 const types_map::const_iterator iter = _types.find(words[0]); 186 if (iter == _types.end()) 187 throw text::syntax_error(F("Unknown statement '%s'") % words[0]); 188 const type_descriptor& descriptor = (*iter).second; 189 190 if (words.size() - 1 != descriptor.n_arguments) 191 throw text::syntax_error(F("Invalid number of arguments for " 192 "statement '%s'") % words[0]); 193 194 std::vector< std::string > new_arguments; 195 new_arguments.resize(words.size() - 1); 196 std::copy(words.begin() + 1, words.end(), new_arguments.begin()); 197 198 return statement_def(descriptor.type, new_arguments); 199 } 200}; 201 202 203statement_def::types_map statement_def::_types; 204 205 206/// Definition of a loop. 207/// 208/// This simple structure is used to keep track of the parameters of a loop. 209struct loop_def { 210 /// The name of the vector over which this loop is iterating. 211 std::string vector; 212 213 /// The name of the iterator defined by this loop. 214 std::string iterator; 215 216 /// Position in the input to which to rewind to on looping. 217 /// 218 /// This position points to the line after the loop statement, not the loop 219 /// itself. This is one of the reasons why we have this structure, so that 220 /// we can maintain the data about the loop without having to re-process it. 221 std::istream::pos_type position; 222 223 /// Constructs a new loop definition. 224 /// 225 /// \param vector_ The name of the vector (first argument). 226 /// \param iterator_ The name of the iterator (second argumnet). 227 /// \param position_ Position of the next line after the loop statement. 228 loop_def(const std::string& vector_, const std::string& iterator_, 229 const std::istream::pos_type position_) : 230 vector(vector_), iterator(iterator_), position(position_) 231 { 232 } 233}; 234 235 236/// Stateful class to instantiate the templates in an input stream. 237/// 238/// The goal of this parser is to scan the input once and not buffer anything in 239/// memory. The only exception are loops: loops are reinterpreted on every 240/// iteration from the same input file by rewidining the stream to the 241/// appropriate position. 242class templates_parser : utils::noncopyable { 243 /// The templates to apply. 244 /// 245 /// Note that this is not const because the parser has to have write access 246 /// to the templates. In particular, it needs to be able to define the 247 /// iterators as regular variables. 248 text::templates_def _templates; 249 250 /// Prefix that marks a line as a statement. 251 const std::string _prefix; 252 253 /// Delimiter to surround an expression instantiation. 254 const std::string _delimiter; 255 256 /// Whether to skip incoming lines or not. 257 /// 258 /// The top of the stack is true whenever we encounter a conditional that 259 /// evaluates to false or a loop that does not have any iterations left. 260 /// Under these circumstances, we need to continue scanning the input stream 261 /// until we find the matching closing endif or endloop construct. 262 /// 263 /// This is a stack rather than a plain boolean to allow us deal with 264 /// if-else clauses. 265 std::stack< bool > _skip; 266 267 /// Current count of nested conditionals. 268 unsigned int _if_level; 269 270 /// Level of the top-most conditional that evaluated to false. 271 unsigned int _exit_if_level; 272 273 /// Current count of nested loops. 274 unsigned int _loop_level; 275 276 /// Level of the top-most loop that does not have any iterations left. 277 unsigned int _exit_loop_level; 278 279 /// Information about all the nested loops up to the current point. 280 std::stack< loop_def > _loops; 281 282 /// Checks if a line is a statement or not. 283 /// 284 /// \param line The line to validate. 285 /// 286 /// \return True if the line looks like a statement, which is determined by 287 /// checking if the line starts by the predefined prefix. 288 bool 289 is_statement(const std::string& line) 290 { 291 return ((line.length() >= _prefix.length() && 292 line.substr(0, _prefix.length()) == _prefix) && 293 (line.length() < _delimiter.length() || 294 line.substr(0, _delimiter.length()) != _delimiter)); 295 } 296 297 /// Parses a given statement line into a statement definition. 298 /// 299 /// \param line The line to validate; it must be a valid statement. 300 /// 301 /// \return The parsed statement. 302 /// 303 /// \throw text::syntax_error If the input is not a valid statement. 304 statement_def 305 parse_statement(const std::string& line) 306 { 307 PRE(is_statement(line)); 308 return statement_def::parse(line.substr(_prefix.length())); 309 } 310 311 /// Processes a line from the input when not in skip mode. 312 /// 313 /// \param line The line to be processed. 314 /// \param input The input stream from which the line was read. The current 315 /// position in the stream must be after the line being processed. 316 /// \param output The output stream into which to write the results. 317 /// 318 /// \throw text::syntax_error If the input is not valid. 319 void 320 handle_normal(const std::string& line, std::istream& input, 321 std::ostream& output) 322 { 323 if (!is_statement(line)) { 324 // Fast path. Mostly to avoid an indentation level for the big 325 // chunk of code below. 326 output << line << '\n'; 327 return; 328 } 329 330 const statement_def statement = parse_statement(line); 331 332 switch (statement.type) { 333 case statement_def::type_else: 334 _skip.top() = !_skip.top(); 335 break; 336 337 case statement_def::type_endif: 338 _if_level--; 339 break; 340 341 case statement_def::type_endloop: { 342 PRE(_loops.size() == _loop_level); 343 loop_def& loop = _loops.top(); 344 345 const std::size_t next_index = 1 + text::to_type< std::size_t >( 346 _templates.get_variable(loop.iterator)); 347 348 if (next_index < _templates.get_vector(loop.vector).size()) { 349 _templates.add_variable(loop.iterator, F("%s") % next_index); 350 input.seekg(loop.position); 351 } else { 352 _loop_level--; 353 _loops.pop(); 354 _templates.remove_variable(loop.iterator); 355 } 356 } break; 357 358 case statement_def::type_if: { 359 _if_level++; 360 const std::string value = _templates.evaluate( 361 statement.arguments[0]); 362 if (value.empty() || value == "0" || value == "false") { 363 _exit_if_level = _if_level; 364 _skip.push(true); 365 } else { 366 _skip.push(false); 367 } 368 } break; 369 370 case statement_def::type_loop: { 371 _loop_level++; 372 373 const loop_def loop(statement.arguments[0], statement.arguments[1], 374 input.tellg()); 375 if (_templates.get_vector(loop.vector).empty()) { 376 _exit_loop_level = _loop_level; 377 _skip.push(true); 378 } else { 379 _templates.add_variable(loop.iterator, "0"); 380 _loops.push(loop); 381 _skip.push(false); 382 } 383 } break; 384 } 385 } 386 387 /// Processes a line from the input when in skip mode. 388 /// 389 /// \param line The line to be processed. 390 /// 391 /// \throw text::syntax_error If the input is not valid. 392 void 393 handle_skip(const std::string& line) 394 { 395 PRE(_skip.top()); 396 397 if (!is_statement(line)) 398 return; 399 400 const statement_def statement = parse_statement(line); 401 switch (statement.type) { 402 case statement_def::type_else: 403 if (_exit_if_level == _if_level) 404 _skip.top() = !_skip.top(); 405 break; 406 407 case statement_def::type_endif: 408 INV(_if_level >= _exit_if_level); 409 if (_if_level == _exit_if_level) 410 _skip.top() = false; 411 _if_level--; 412 _skip.pop(); 413 break; 414 415 case statement_def::type_endloop: 416 INV(_loop_level >= _exit_loop_level); 417 if (_loop_level == _exit_loop_level) 418 _skip.top() = false; 419 _loop_level--; 420 _skip.pop(); 421 break; 422 423 case statement_def::type_if: 424 _if_level++; 425 _skip.push(true); 426 break; 427 428 case statement_def::type_loop: 429 _loop_level++; 430 _skip.push(true); 431 break; 432 433 default: 434 break; 435 } 436 } 437 438 /// Evaluates expressions on a given input line. 439 /// 440 /// An expression is surrounded by _delimiter on both sides. We scan the 441 /// string from left to right finding any expressions that may appear, yank 442 /// them out and call templates_def::evaluate() to get their value. 443 /// 444 /// Lonely or unbalanced appearances of _delimiter on the input line are 445 /// not considered an error, given that the user may actually want to supply 446 /// that character sequence without being interpreted as a template. 447 /// 448 /// \param in_line The input line from which to evaluate expressions. 449 /// 450 /// \return The evaluated line. 451 /// 452 /// \throw text::syntax_error If the expressions in the line are malformed. 453 std::string 454 evaluate(const std::string& in_line) 455 { 456 std::string out_line; 457 458 std::string::size_type last_pos = 0; 459 while (last_pos != std::string::npos) { 460 const std::string::size_type open_pos = in_line.find( 461 _delimiter, last_pos); 462 if (open_pos == std::string::npos) { 463 out_line += in_line.substr(last_pos); 464 last_pos = std::string::npos; 465 } else { 466 const std::string::size_type close_pos = in_line.find( 467 _delimiter, open_pos + _delimiter.length()); 468 if (close_pos == std::string::npos) { 469 out_line += in_line.substr(last_pos); 470 last_pos = std::string::npos; 471 } else { 472 out_line += in_line.substr(last_pos, open_pos - last_pos); 473 out_line += _templates.evaluate(in_line.substr( 474 open_pos + _delimiter.length(), 475 close_pos - open_pos - _delimiter.length())); 476 last_pos = close_pos + _delimiter.length(); 477 } 478 } 479 } 480 481 return out_line; 482 } 483 484public: 485 /// Constructs a new template parser. 486 /// 487 /// \param templates_ The templates to apply to the processed file. 488 /// \param prefix_ The prefix that identifies lines as statements. 489 /// \param delimiter_ Delimiter to surround a variable instantiation. 490 templates_parser(const text::templates_def& templates_, 491 const std::string& prefix_, 492 const std::string& delimiter_) : 493 _templates(templates_), 494 _prefix(prefix_), 495 _delimiter(delimiter_), 496 _if_level(0), 497 _exit_if_level(0), 498 _loop_level(0), 499 _exit_loop_level(0) 500 { 501 } 502 503 /// Applies the templates to a given input. 504 /// 505 /// \param input The stream to which to apply the templates. 506 /// \param output The stream into which to write the results. 507 /// 508 /// \throw text::syntax_error If the input is not valid. Note that the 509 /// is not guaranteed to be unmodified on exit if an error is 510 /// encountered. 511 void 512 instantiate(std::istream& input, std::ostream& output) 513 { 514 std::string line; 515 while (std::getline(input, line).good()) { 516 if (!_skip.empty() && _skip.top()) 517 handle_skip(line); 518 else 519 handle_normal(evaluate(line), input, output); 520 } 521 } 522}; 523 524 525} // anonymous namespace 526 527 528/// Constructs an empty templates definition. 529text::templates_def::templates_def(void) 530{ 531} 532 533 534/// Sets a string variable in the templates. 535/// 536/// If the variable already exists, its value is replaced. This behavior is 537/// required to implement iterators, but client code should really not be 538/// redefining variables. 539/// 540/// \pre The variable must not already exist as a vector. 541/// 542/// \param name The name of the variable to set. 543/// \param value The value to set the given variable to. 544void 545text::templates_def::add_variable(const std::string& name, 546 const std::string& value) 547{ 548 PRE(_vectors.find(name) == _vectors.end()); 549 _variables[name] = value; 550} 551 552 553/// Unsets a string variable from the templates. 554/// 555/// Client code has no reason to use this. This is only required to implement 556/// proper scoping of loop iterators. 557/// 558/// \pre The variable must exist. 559/// 560/// \param name The name of the variable to remove from the templates. 561void 562text::templates_def::remove_variable(const std::string& name) 563{ 564 PRE(_variables.find(name) != _variables.end()); 565 _variables.erase(_variables.find(name)); 566} 567 568 569/// Creates a new vector in the templates. 570/// 571/// If the vector already exists, it is cleared. Client code should really not 572/// be redefining variables. 573/// 574/// \pre The vector must not already exist as a variable. 575/// 576/// \param name The name of the vector to set. 577void 578text::templates_def::add_vector(const std::string& name) 579{ 580 PRE(_variables.find(name) == _variables.end()); 581 _vectors[name] = strings_vector(); 582} 583 584 585/// Adds a value to an existing vector in the templates. 586/// 587/// \pre name The vector must exist. 588/// 589/// \param name The name of the vector to append the value to. 590/// \param value The textual value to append to the vector. 591void 592text::templates_def::add_to_vector(const std::string& name, 593 const std::string& value) 594{ 595 PRE(_variables.find(name) == _variables.end()); 596 PRE(_vectors.find(name) != _vectors.end()); 597 _vectors[name].push_back(value); 598} 599 600 601/// Checks whether a given identifier exists as a variable or a vector. 602/// 603/// This is used to implement the evaluation of conditions in if clauses. 604/// 605/// \param name The name of the variable or vector. 606/// 607/// \return True if the given name exists as a variable or a vector; false 608/// otherwise. 609bool 610text::templates_def::exists(const std::string& name) const 611{ 612 return (_variables.find(name) != _variables.end() || 613 _vectors.find(name) != _vectors.end()); 614} 615 616 617/// Gets the value of a variable. 618/// 619/// \param name The name of the variable. 620/// 621/// \return The value of the requested variable. 622/// 623/// \throw text::syntax_error If the variable does not exist. 624const std::string& 625text::templates_def::get_variable(const std::string& name) const 626{ 627 const variables_map::const_iterator iter = _variables.find(name); 628 if (iter == _variables.end()) 629 throw text::syntax_error(F("Unknown variable '%s'") % name); 630 return (*iter).second; 631} 632 633 634/// Gets a vector. 635/// 636/// \param name The name of the vector. 637/// 638/// \return A reference to the requested vector. 639/// 640/// \throw text::syntax_error If the vector does not exist. 641const text::templates_def::strings_vector& 642text::templates_def::get_vector(const std::string& name) const 643{ 644 const vectors_map::const_iterator iter = _vectors.find(name); 645 if (iter == _vectors.end()) 646 throw text::syntax_error(F("Unknown vector '%s'") % name); 647 return (*iter).second; 648} 649 650 651/// Indexes a vector and gets the value. 652/// 653/// \param name The name of the vector to index. 654/// \param index_name The name of a variable representing the index to use. 655/// This must be convertible to a natural. 656/// 657/// \return The value of the vector at the given index. 658/// 659/// \throw text::syntax_error If the vector does not existor if the index is out 660/// of range. 661const std::string& 662text::templates_def::get_vector(const std::string& name, 663 const std::string& index_name) const 664{ 665 const strings_vector& vector = get_vector(name); 666 const std::string& index_str = get_variable(index_name); 667 668 std::size_t index; 669 try { 670 index = text::to_type< std::size_t >(index_str); 671 } catch (const text::syntax_error& e) { 672 throw text::syntax_error(F("Index '%s' not an integer, value '%s'") % 673 index_name % index_str); 674 } 675 if (index >= vector.size()) 676 throw text::syntax_error(F("Index '%s' out of range at position '%s'") % 677 index_name % index); 678 679 return vector[index]; 680} 681 682 683/// Evaluates a expression using these templates. 684/// 685/// An expression is a query on the current templates to fetch a particular 686/// value. The value is always returned as a string, as this is how templates 687/// are internally stored. 688/// 689/// \param expression The expression to evaluate. This should not include any 690/// of the delimiters used in the user input, as otherwise the expression 691/// will not be evaluated properly. 692/// 693/// \return The result of the expression evaluation as a string. 694/// 695/// \throw text::syntax_error If there is any problem while evaluating the 696/// expression. 697std::string 698text::templates_def::evaluate(const std::string& expression) const 699{ 700 const std::string::size_type paren_open = expression.find('('); 701 if (paren_open == std::string::npos) { 702 return get_variable(expression); 703 } else { 704 const std::string::size_type paren_close = expression.find( 705 ')', paren_open); 706 if (paren_close == std::string::npos) 707 throw text::syntax_error(F("Expected ')' in expression '%s')") % 708 expression); 709 if (paren_close != expression.length() - 1) 710 throw text::syntax_error(F("Unexpected text found after ')' in " 711 "expression '%s'") % expression); 712 713 const std::string arg0 = expression.substr(0, paren_open); 714 const std::string arg1 = expression.substr( 715 paren_open + 1, paren_close - paren_open - 1); 716 if (arg0 == "defined") { 717 return exists(arg1) ? "true" : "false"; 718 } else if (arg0 == "length") { 719 return F("%s") % get_vector(arg1).size(); 720 } else { 721 return get_vector(arg0, arg1); 722 } 723 } 724} 725 726 727/// Applies a set of templates to an input stream. 728/// 729/// \param templates The templates to use. 730/// \param input The input to process. 731/// \param output The stream to which to write the processed text. 732/// 733/// \throw text::syntax_error If there is any problem processing the input. 734void 735text::instantiate(const templates_def& templates, 736 std::istream& input, std::ostream& output) 737{ 738 templates_parser parser(templates, "%", "%%"); 739 parser.instantiate(input, output); 740} 741 742 743/// Applies a set of templates to an input file and writes an output file. 744/// 745/// \param templates The templates to use. 746/// \param input_file The path to the input to process. 747/// \param output_file The path to the file into which to write the output. 748/// 749/// \throw text::error If the input or output files cannot be opened. 750/// \throw text::syntax_error If there is any problem processing the input. 751void 752text::instantiate(const templates_def& templates, 753 const fs::path& input_file, const fs::path& output_file) 754{ 755 std::ifstream input(input_file.c_str()); 756 if (!input) 757 throw text::error(F("Failed to open %s for read") % input_file); 758 759 std::ofstream output(output_file.c_str()); 760 if (!output) 761 throw text::error(F("Failed to open %s for write") % output_file); 762 763 instantiate(templates, input, output); 764} 765