1// Copyright 2012 The Kyua Authors.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9//   notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above copyright
11//   notice, this list of conditions and the following disclaimer in the
12//   documentation and/or other materials provided with the distribution.
13// * Neither the name of Google Inc. nor the names of its contributors
14//   may be used to endorse or promote products derived from this software
15//   without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29#include "utils/text/templates.hpp"
30
31#include <algorithm>
32#include <fstream>
33#include <sstream>
34#include <stack>
35
36#include "utils/format/macros.hpp"
37#include "utils/fs/path.hpp"
38#include "utils/noncopyable.hpp"
39#include "utils/sanity.hpp"
40#include "utils/text/exceptions.hpp"
41#include "utils/text/operations.ipp"
42
43namespace text = utils::text;
44
45
46namespace {
47
48
49/// Definition of a template statement.
50///
51/// A template statement is a particular line in the input file that is
52/// preceeded by a template marker.  This class provides a high-level
53/// representation of the contents of such statement and a mechanism to parse
54/// the textual line into this high-level representation.
55class statement_def {
56public:
57    /// Types of the known statements.
58    enum statement_type {
59        /// Alternative clause of a conditional.
60        ///
61        /// Takes no arguments.
62        type_else,
63
64        /// End of conditional marker.
65        ///
66        /// Takes no arguments.
67        type_endif,
68
69        /// End of loop marker.
70        ///
71        /// Takes no arguments.
72        type_endloop,
73
74        /// Beginning of a conditional.
75        ///
76        /// Takes a single argument, which denotes the name of the variable or
77        /// vector to check for existence.  This is the only expression
78        /// supported.
79        type_if,
80
81        /// Beginning of a loop over all the elements of a vector.
82        ///
83        /// Takes two arguments: the name of the vector over which to iterate
84        /// and the name of the iterator to later index this vector.
85        type_loop,
86    };
87
88private:
89    /// Internal data describing the structure of a particular statement type.
90    struct type_descriptor {
91        /// The native type of the statement.
92        statement_type type;
93
94        /// The expected number of arguments.
95        unsigned int n_arguments;
96
97        /// Constructs a new type descriptor.
98        ///
99        /// \param type_ The native type of the statement.
100        /// \param n_arguments_ The expected number of arguments.
101        type_descriptor(const statement_type type_,
102                        const unsigned int n_arguments_)
103            : type(type_), n_arguments(n_arguments_)
104        {
105        }
106    };
107
108    /// Mapping of statement type names to their definitions.
109    typedef std::map< std::string, type_descriptor > types_map;
110
111    /// Description of the different statement types.
112    ///
113    /// This static map is initialized once and reused later for any statement
114    /// lookup.  Unfortunately, we cannot perform this initialization in a
115    /// static manner without C++11.
116    static types_map _types;
117
118    /// Generates a new types definition map.
119    ///
120    /// \return A new types definition map, to be assigned to _types.
121    static types_map
122    generate_types_map(void)
123    {
124        // If you change this, please edit the comments in the enum above.
125        types_map types;
126        types.insert(types_map::value_type(
127            "else", type_descriptor(type_else, 0)));
128        types.insert(types_map::value_type(
129            "endif", type_descriptor(type_endif, 0)));
130        types.insert(types_map::value_type(
131            "endloop", type_descriptor(type_endloop, 0)));
132        types.insert(types_map::value_type(
133            "if", type_descriptor(type_if, 1)));
134        types.insert(types_map::value_type(
135            "loop", type_descriptor(type_loop, 2)));
136        return types;
137    }
138
139public:
140    /// The type of the statement.
141    statement_type type;
142
143    /// The arguments to the statement, in textual form.
144    const std::vector< std::string > arguments;
145
146    /// Creates a new statement.
147    ///
148    /// \param type_ The type of the statement.
149    /// \param arguments_ The arguments to the statement.
150    statement_def(const statement_type& type_,
151                  const std::vector< std::string >& arguments_) :
152        type(type_), arguments(arguments_)
153    {
154#if !defined(NDEBUG)
155        for (types_map::const_iterator iter = _types.begin();
156             iter != _types.end(); ++iter) {
157            const type_descriptor& descriptor = (*iter).second;
158            if (descriptor.type == type_) {
159                PRE(descriptor.n_arguments == arguments_.size());
160                return;
161            }
162        }
163        UNREACHABLE;
164#endif
165    }
166
167    /// Parses a statement.
168    ///
169    /// \param line The textual representation of the statement without any
170    ///     prefix.
171    ///
172    /// \return The parsed statement.
173    ///
174    /// \throw text::syntax_error If the statement is not correctly defined.
175    static statement_def
176    parse(const std::string& line)
177    {
178        if (_types.empty())
179            _types = generate_types_map();
180
181        const std::vector< std::string > words = text::split(line, ' ');
182        if (words.empty())
183            throw text::syntax_error("Empty statement");
184
185        const types_map::const_iterator iter = _types.find(words[0]);
186        if (iter == _types.end())
187            throw text::syntax_error(F("Unknown statement '%s'") % words[0]);
188        const type_descriptor& descriptor = (*iter).second;
189
190        if (words.size() - 1 != descriptor.n_arguments)
191            throw text::syntax_error(F("Invalid number of arguments for "
192                                       "statement '%s'") % words[0]);
193
194        std::vector< std::string > new_arguments;
195        new_arguments.resize(words.size() - 1);
196        std::copy(words.begin() + 1, words.end(), new_arguments.begin());
197
198        return statement_def(descriptor.type, new_arguments);
199    }
200};
201
202
203statement_def::types_map statement_def::_types;
204
205
206/// Definition of a loop.
207///
208/// This simple structure is used to keep track of the parameters of a loop.
209struct loop_def {
210    /// The name of the vector over which this loop is iterating.
211    std::string vector;
212
213    /// The name of the iterator defined by this loop.
214    std::string iterator;
215
216    /// Position in the input to which to rewind to on looping.
217    ///
218    /// This position points to the line after the loop statement, not the loop
219    /// itself.  This is one of the reasons why we have this structure, so that
220    /// we can maintain the data about the loop without having to re-process it.
221    std::istream::pos_type position;
222
223    /// Constructs a new loop definition.
224    ///
225    /// \param vector_ The name of the vector (first argument).
226    /// \param iterator_ The name of the iterator (second argumnet).
227    /// \param position_ Position of the next line after the loop statement.
228    loop_def(const std::string& vector_, const std::string& iterator_,
229             const std::istream::pos_type position_) :
230        vector(vector_), iterator(iterator_), position(position_)
231    {
232    }
233};
234
235
236/// Stateful class to instantiate the templates in an input stream.
237///
238/// The goal of this parser is to scan the input once and not buffer anything in
239/// memory.  The only exception are loops: loops are reinterpreted on every
240/// iteration from the same input file by rewidining the stream to the
241/// appropriate position.
242class templates_parser : utils::noncopyable {
243    /// The templates to apply.
244    ///
245    /// Note that this is not const because the parser has to have write access
246    /// to the templates.  In particular, it needs to be able to define the
247    /// iterators as regular variables.
248    text::templates_def _templates;
249
250    /// Prefix that marks a line as a statement.
251    const std::string _prefix;
252
253    /// Delimiter to surround an expression instantiation.
254    const std::string _delimiter;
255
256    /// Whether to skip incoming lines or not.
257    ///
258    /// The top of the stack is true whenever we encounter a conditional that
259    /// evaluates to false or a loop that does not have any iterations left.
260    /// Under these circumstances, we need to continue scanning the input stream
261    /// until we find the matching closing endif or endloop construct.
262    ///
263    /// This is a stack rather than a plain boolean to allow us deal with
264    /// if-else clauses.
265    std::stack< bool > _skip;
266
267    /// Current count of nested conditionals.
268    unsigned int _if_level;
269
270    /// Level of the top-most conditional that evaluated to false.
271    unsigned int _exit_if_level;
272
273    /// Current count of nested loops.
274    unsigned int _loop_level;
275
276    /// Level of the top-most loop that does not have any iterations left.
277    unsigned int _exit_loop_level;
278
279    /// Information about all the nested loops up to the current point.
280    std::stack< loop_def > _loops;
281
282    /// Checks if a line is a statement or not.
283    ///
284    /// \param line The line to validate.
285    ///
286    /// \return True if the line looks like a statement, which is determined by
287    /// checking if the line starts by the predefined prefix.
288    bool
289    is_statement(const std::string& line)
290    {
291        return ((line.length() >= _prefix.length() &&
292                 line.substr(0, _prefix.length()) == _prefix) &&
293                (line.length() < _delimiter.length() ||
294                 line.substr(0, _delimiter.length()) != _delimiter));
295    }
296
297    /// Parses a given statement line into a statement definition.
298    ///
299    /// \param line The line to validate; it must be a valid statement.
300    ///
301    /// \return The parsed statement.
302    ///
303    /// \throw text::syntax_error If the input is not a valid statement.
304    statement_def
305    parse_statement(const std::string& line)
306    {
307        PRE(is_statement(line));
308        return statement_def::parse(line.substr(_prefix.length()));
309    }
310
311    /// Processes a line from the input when not in skip mode.
312    ///
313    /// \param line The line to be processed.
314    /// \param input The input stream from which the line was read.  The current
315    ///     position in the stream must be after the line being processed.
316    /// \param output The output stream into which to write the results.
317    ///
318    /// \throw text::syntax_error If the input is not valid.
319    void
320    handle_normal(const std::string& line, std::istream& input,
321                  std::ostream& output)
322    {
323        if (!is_statement(line)) {
324            // Fast path.  Mostly to avoid an indentation level for the big
325            // chunk of code below.
326            output << line << '\n';
327            return;
328        }
329
330        const statement_def statement = parse_statement(line);
331
332        switch (statement.type) {
333        case statement_def::type_else:
334            _skip.top() = !_skip.top();
335            break;
336
337        case statement_def::type_endif:
338            _if_level--;
339            break;
340
341        case statement_def::type_endloop: {
342            PRE(_loops.size() == _loop_level);
343            loop_def& loop = _loops.top();
344
345            const std::size_t next_index = 1 + text::to_type< std::size_t >(
346                _templates.get_variable(loop.iterator));
347
348            if (next_index < _templates.get_vector(loop.vector).size()) {
349                _templates.add_variable(loop.iterator, F("%s") % next_index);
350                input.seekg(loop.position);
351            } else {
352                _loop_level--;
353                _loops.pop();
354                _templates.remove_variable(loop.iterator);
355            }
356        } break;
357
358        case statement_def::type_if: {
359            _if_level++;
360            const std::string value = _templates.evaluate(
361                statement.arguments[0]);
362            if (value.empty() || value == "0" || value == "false") {
363                _exit_if_level = _if_level;
364                _skip.push(true);
365            } else {
366                _skip.push(false);
367            }
368        } break;
369
370        case statement_def::type_loop: {
371            _loop_level++;
372
373            const loop_def loop(statement.arguments[0], statement.arguments[1],
374                                input.tellg());
375            if (_templates.get_vector(loop.vector).empty()) {
376                _exit_loop_level = _loop_level;
377                _skip.push(true);
378            } else {
379                _templates.add_variable(loop.iterator, "0");
380                _loops.push(loop);
381                _skip.push(false);
382            }
383        } break;
384        }
385    }
386
387    /// Processes a line from the input when in skip mode.
388    ///
389    /// \param line The line to be processed.
390    ///
391    /// \throw text::syntax_error If the input is not valid.
392    void
393    handle_skip(const std::string& line)
394    {
395        PRE(_skip.top());
396
397        if (!is_statement(line))
398            return;
399
400        const statement_def statement = parse_statement(line);
401        switch (statement.type) {
402        case statement_def::type_else:
403            if (_exit_if_level == _if_level)
404                _skip.top() = !_skip.top();
405            break;
406
407        case statement_def::type_endif:
408            INV(_if_level >= _exit_if_level);
409            if (_if_level == _exit_if_level)
410                _skip.top() = false;
411            _if_level--;
412            _skip.pop();
413            break;
414
415        case statement_def::type_endloop:
416            INV(_loop_level >= _exit_loop_level);
417            if (_loop_level == _exit_loop_level)
418                _skip.top() = false;
419            _loop_level--;
420            _skip.pop();
421            break;
422
423        case statement_def::type_if:
424            _if_level++;
425            _skip.push(true);
426            break;
427
428        case statement_def::type_loop:
429            _loop_level++;
430            _skip.push(true);
431            break;
432
433        default:
434            break;
435        }
436    }
437
438    /// Evaluates expressions on a given input line.
439    ///
440    /// An expression is surrounded by _delimiter on both sides.  We scan the
441    /// string from left to right finding any expressions that may appear, yank
442    /// them out and call templates_def::evaluate() to get their value.
443    ///
444    /// Lonely or unbalanced appearances of _delimiter on the input line are
445    /// not considered an error, given that the user may actually want to supply
446    /// that character sequence without being interpreted as a template.
447    ///
448    /// \param in_line The input line from which to evaluate expressions.
449    ///
450    /// \return The evaluated line.
451    ///
452    /// \throw text::syntax_error If the expressions in the line are malformed.
453    std::string
454    evaluate(const std::string& in_line)
455    {
456        std::string out_line;
457
458        std::string::size_type last_pos = 0;
459        while (last_pos != std::string::npos) {
460            const std::string::size_type open_pos = in_line.find(
461                _delimiter, last_pos);
462            if (open_pos == std::string::npos) {
463                out_line += in_line.substr(last_pos);
464                last_pos = std::string::npos;
465            } else {
466                const std::string::size_type close_pos = in_line.find(
467                    _delimiter, open_pos + _delimiter.length());
468                if (close_pos == std::string::npos) {
469                    out_line += in_line.substr(last_pos);
470                    last_pos = std::string::npos;
471                } else {
472                    out_line += in_line.substr(last_pos, open_pos - last_pos);
473                    out_line += _templates.evaluate(in_line.substr(
474                        open_pos + _delimiter.length(),
475                        close_pos - open_pos - _delimiter.length()));
476                    last_pos = close_pos + _delimiter.length();
477                }
478            }
479        }
480
481        return out_line;
482    }
483
484public:
485    /// Constructs a new template parser.
486    ///
487    /// \param templates_ The templates to apply to the processed file.
488    /// \param prefix_ The prefix that identifies lines as statements.
489    /// \param delimiter_ Delimiter to surround a variable instantiation.
490    templates_parser(const text::templates_def& templates_,
491                     const std::string& prefix_,
492                     const std::string& delimiter_) :
493        _templates(templates_),
494        _prefix(prefix_),
495        _delimiter(delimiter_),
496        _if_level(0),
497        _exit_if_level(0),
498        _loop_level(0),
499        _exit_loop_level(0)
500    {
501    }
502
503    /// Applies the templates to a given input.
504    ///
505    /// \param input The stream to which to apply the templates.
506    /// \param output The stream into which to write the results.
507    ///
508    /// \throw text::syntax_error If the input is not valid.  Note that the
509    ///     is not guaranteed to be unmodified on exit if an error is
510    ///     encountered.
511    void
512    instantiate(std::istream& input, std::ostream& output)
513    {
514        std::string line;
515        while (std::getline(input, line).good()) {
516            if (!_skip.empty() && _skip.top())
517                handle_skip(line);
518            else
519                handle_normal(evaluate(line), input, output);
520        }
521    }
522};
523
524
525}  // anonymous namespace
526
527
528/// Constructs an empty templates definition.
529text::templates_def::templates_def(void)
530{
531}
532
533
534/// Sets a string variable in the templates.
535///
536/// If the variable already exists, its value is replaced.  This behavior is
537/// required to implement iterators, but client code should really not be
538/// redefining variables.
539///
540/// \pre The variable must not already exist as a vector.
541///
542/// \param name The name of the variable to set.
543/// \param value The value to set the given variable to.
544void
545text::templates_def::add_variable(const std::string& name,
546                                  const std::string& value)
547{
548    PRE(_vectors.find(name) == _vectors.end());
549    _variables[name] = value;
550}
551
552
553/// Unsets a string variable from the templates.
554///
555/// Client code has no reason to use this.  This is only required to implement
556/// proper scoping of loop iterators.
557///
558/// \pre The variable must exist.
559///
560/// \param name The name of the variable to remove from the templates.
561void
562text::templates_def::remove_variable(const std::string& name)
563{
564    PRE(_variables.find(name) != _variables.end());
565    _variables.erase(_variables.find(name));
566}
567
568
569/// Creates a new vector in the templates.
570///
571/// If the vector already exists, it is cleared.  Client code should really not
572/// be redefining variables.
573///
574/// \pre The vector must not already exist as a variable.
575///
576/// \param name The name of the vector to set.
577void
578text::templates_def::add_vector(const std::string& name)
579{
580    PRE(_variables.find(name) == _variables.end());
581    _vectors[name] = strings_vector();
582}
583
584
585/// Adds a value to an existing vector in the templates.
586///
587/// \pre name The vector must exist.
588///
589/// \param name The name of the vector to append the value to.
590/// \param value The textual value to append to the vector.
591void
592text::templates_def::add_to_vector(const std::string& name,
593                                   const std::string& value)
594{
595    PRE(_variables.find(name) == _variables.end());
596    PRE(_vectors.find(name) != _vectors.end());
597    _vectors[name].push_back(value);
598}
599
600
601/// Checks whether a given identifier exists as a variable or a vector.
602///
603/// This is used to implement the evaluation of conditions in if clauses.
604///
605/// \param name The name of the variable or vector.
606///
607/// \return True if the given name exists as a variable or a vector; false
608/// otherwise.
609bool
610text::templates_def::exists(const std::string& name) const
611{
612    return (_variables.find(name) != _variables.end() ||
613            _vectors.find(name) != _vectors.end());
614}
615
616
617/// Gets the value of a variable.
618///
619/// \param name The name of the variable.
620///
621/// \return The value of the requested variable.
622///
623/// \throw text::syntax_error If the variable does not exist.
624const std::string&
625text::templates_def::get_variable(const std::string& name) const
626{
627    const variables_map::const_iterator iter = _variables.find(name);
628    if (iter == _variables.end())
629        throw text::syntax_error(F("Unknown variable '%s'") % name);
630    return (*iter).second;
631}
632
633
634/// Gets a vector.
635///
636/// \param name The name of the vector.
637///
638/// \return A reference to the requested vector.
639///
640/// \throw text::syntax_error If the vector does not exist.
641const text::templates_def::strings_vector&
642text::templates_def::get_vector(const std::string& name) const
643{
644    const vectors_map::const_iterator iter = _vectors.find(name);
645    if (iter == _vectors.end())
646        throw text::syntax_error(F("Unknown vector '%s'") % name);
647    return (*iter).second;
648}
649
650
651/// Indexes a vector and gets the value.
652///
653/// \param name The name of the vector to index.
654/// \param index_name The name of a variable representing the index to use.
655///     This must be convertible to a natural.
656///
657/// \return The value of the vector at the given index.
658///
659/// \throw text::syntax_error If the vector does not existor if the index is out
660///     of range.
661const std::string&
662text::templates_def::get_vector(const std::string& name,
663                                const std::string& index_name) const
664{
665    const strings_vector& vector = get_vector(name);
666    const std::string& index_str = get_variable(index_name);
667
668    std::size_t index;
669    try {
670        index = text::to_type< std::size_t >(index_str);
671    } catch (const text::syntax_error& e) {
672        throw text::syntax_error(F("Index '%s' not an integer, value '%s'") %
673                                 index_name % index_str);
674    }
675    if (index >= vector.size())
676        throw text::syntax_error(F("Index '%s' out of range at position '%s'") %
677                                 index_name % index);
678
679    return vector[index];
680}
681
682
683/// Evaluates a expression using these templates.
684///
685/// An expression is a query on the current templates to fetch a particular
686/// value.  The value is always returned as a string, as this is how templates
687/// are internally stored.
688///
689/// \param expression The expression to evaluate.  This should not include any
690///     of the delimiters used in the user input, as otherwise the expression
691///     will not be evaluated properly.
692///
693/// \return The result of the expression evaluation as a string.
694///
695/// \throw text::syntax_error If there is any problem while evaluating the
696///     expression.
697std::string
698text::templates_def::evaluate(const std::string& expression) const
699{
700    const std::string::size_type paren_open = expression.find('(');
701    if (paren_open == std::string::npos) {
702        return get_variable(expression);
703    } else {
704        const std::string::size_type paren_close = expression.find(
705            ')', paren_open);
706        if (paren_close == std::string::npos)
707            throw text::syntax_error(F("Expected ')' in expression '%s')") %
708                                     expression);
709        if (paren_close != expression.length() - 1)
710            throw text::syntax_error(F("Unexpected text found after ')' in "
711                                       "expression '%s'") % expression);
712
713        const std::string arg0 = expression.substr(0, paren_open);
714        const std::string arg1 = expression.substr(
715            paren_open + 1, paren_close - paren_open - 1);
716        if (arg0 == "defined") {
717            return exists(arg1) ? "true" : "false";
718        } else if (arg0 == "length") {
719            return F("%s") % get_vector(arg1).size();
720        } else {
721            return get_vector(arg0, arg1);
722        }
723    }
724}
725
726
727/// Applies a set of templates to an input stream.
728///
729/// \param templates The templates to use.
730/// \param input The input to process.
731/// \param output The stream to which to write the processed text.
732///
733/// \throw text::syntax_error If there is any problem processing the input.
734void
735text::instantiate(const templates_def& templates,
736                  std::istream& input, std::ostream& output)
737{
738    templates_parser parser(templates, "%", "%%");
739    parser.instantiate(input, output);
740}
741
742
743/// Applies a set of templates to an input file and writes an output file.
744///
745/// \param templates The templates to use.
746/// \param input_file The path to the input to process.
747/// \param output_file The path to the file into which to write the output.
748///
749/// \throw text::error If the input or output files cannot be opened.
750/// \throw text::syntax_error If there is any problem processing the input.
751void
752text::instantiate(const templates_def& templates,
753                  const fs::path& input_file, const fs::path& output_file)
754{
755    std::ifstream input(input_file.c_str());
756    if (!input)
757        throw text::error(F("Failed to open %s for read") % input_file);
758
759    std::ofstream output(output_file.c_str());
760    if (!output)
761        throw text::error(F("Failed to open %s for write") % output_file);
762
763    instantiate(templates, input, output);
764}
765