internal/parser/Lexer.java

151497Sru/*
151497Sru * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
18099Spst * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
18099Spst *
18099Spst * This code is free software; you can redistribute it and/or modify it
18099Spst * under the terms of the GNU General Public License version 2 only, as
18099Spst * published by the Free Software Foundation.  Oracle designates this
18099Spst * particular file as subject to the "Classpath" exception as provided
18099Spst * by Oracle in the LICENSE file that accompanied this code.
18099Spst *
18099Spst * This code is distributed in the hope that it will be useful, but WITHOUT
18099Spst * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18099Spst * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18099Spst * version 2 for more details (a copy is included in the LICENSE file that
18099Spst * accompanied this code).
18099Spst *
18099Spst * You should have received a copy of the GNU General Public License version
18099Spst * 2 along with this work; if not, write to the Free Software Foundation,
151497Sru * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18099Spst *
151497Sru * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
151497Sru * or visit www.oracle.com if you need additional information or have any
75584Sru * questions.
151497Sru */
75584Sru
151497Srupackage jdk.nashorn.internal.parser;
75584Sru
151497Sruimport static jdk.nashorn.internal.parser.TokenType.ADD;
151497Sruimport static jdk.nashorn.internal.parser.TokenType.BINARY_NUMBER;
151497Sruimport static jdk.nashorn.internal.parser.TokenType.COMMENT;
151497Sruimport static jdk.nashorn.internal.parser.TokenType.DECIMAL;
151497Sruimport static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
151497Sruimport static jdk.nashorn.internal.parser.TokenType.EOF;
151497Sruimport static jdk.nashorn.internal.parser.TokenType.EOL;
151497Sruimport static jdk.nashorn.internal.parser.TokenType.ERROR;
151497Sruimport static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
75584Sruimport static jdk.nashorn.internal.parser.TokenType.EXECSTRING;
104862Sruimport static jdk.nashorn.internal.parser.TokenType.FLOATING;
75584Sruimport static jdk.nashorn.internal.parser.TokenType.FUNCTION;
75584Sruimport static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL;
18099Spstimport static jdk.nashorn.internal.parser.TokenType.LBRACE;
18099Spstimport static jdk.nashorn.internal.parser.TokenType.LPAREN;
18099Spstimport static jdk.nashorn.internal.parser.TokenType.OCTAL;
18099Spstimport static jdk.nashorn.internal.parser.TokenType.OCTAL_LEGACY;
18099Spstimport static jdk.nashorn.internal.parser.TokenType.RBRACE;
151497Sruimport static jdk.nashorn.internal.parser.TokenType.REGEX;
151497Sruimport static jdk.nashorn.internal.parser.TokenType.RPAREN;
151497Sruimport static jdk.nashorn.internal.parser.TokenType.STRING;
151497Sruimport static jdk.nashorn.internal.parser.TokenType.TEMPLATE;
151497Sruimport static jdk.nashorn.internal.parser.TokenType.TEMPLATE_HEAD;
151497Sruimport static jdk.nashorn.internal.parser.TokenType.TEMPLATE_MIDDLE;
151497Sruimport static jdk.nashorn.internal.parser.TokenType.TEMPLATE_TAIL;
151497Sruimport static jdk.nashorn.internal.parser.TokenType.XML;
151497Sru
151497Sruimport java.io.Serializable;
151497Sru
151497Sruimport jdk.nashorn.internal.runtime.ECMAErrors;
18099Spstimport jdk.nashorn.internal.runtime.ErrorManager;
18099Spstimport jdk.nashorn.internal.runtime.JSErrorType;
18099Spstimport jdk.nashorn.internal.runtime.JSType;
18099Spstimport jdk.nashorn.internal.runtime.ParserException;
18099Spstimport jdk.nashorn.internal.runtime.Source;
18099Spstimport jdk.nashorn.internal.runtime.options.Options;
18099Spst
18099Spst/**
18099Spst * Responsible for converting source content into a stream of tokens.
18099Spst *
18099Spst */
18099Spst@SuppressWarnings("fallthrough")
18099Spstpublic class Lexer extends Scanner {
18099Spst    private static final long MIN_INT_L = Integer.MIN_VALUE;
18099Spst    private static final long MAX_INT_L = Integer.MAX_VALUE;
18099Spst
18099Spst    private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals");
18099Spst
18099Spst    /** Content source. */
18099Spst    private final Source source;
18099Spst
18099Spst    /** Buffered stream for tokens. */
18099Spst    private final TokenStream stream;
104862Sru
18099Spst    /** True if here and edit strings are supported. */
18099Spst    private final boolean scripting;
151497Sru
151497Sru    /** True if parsing in ECMAScript 6 mode. */
151497Sru    private final boolean es6;
151497Sru
151497Sru    /** True if a nested scan. (scan to completion, no EOF.) */
151497Sru    private final boolean nested;
151497Sru
151497Sru    /** Pending new line number and position. */
151497Sru    int pendingLine;
104862Sru
104862Sru    /** Position of last EOL + 1. */
104862Sru    private int linePosition;
69626Sru
104862Sru    /** Type of last token added. */
104862Sru    private TokenType last;
104862Sru
104862Sru    private final boolean pauseOnFunctionBody;
104862Sru    private boolean pauseOnNextLeftBrace;
18099Spst
18099Spst    private int templateExpressionOpenBraces;
18099Spst
104862Sru    private static final String SPACETAB = " \t";  // ASCII space and tab
18099Spst    private static final String LFCR     = "\n\r"; // line feed and carriage return (ctrl-m)
18099Spst
18099Spst    private static final String JAVASCRIPT_WHITESPACE_EOL =
18099Spst        LFCR +
18099Spst        "\u2028" + // line separator
18099Spst        "\u2029"   // paragraph separator
18099Spst        ;
18099Spst    private static final String JAVASCRIPT_WHITESPACE =
18099Spst        SPACETAB +
18099Spst        JAVASCRIPT_WHITESPACE_EOL +
18099Spst        "\u000b" + // tabulation line
18099Spst        "\u000c" + // ff (ctrl-l)
18099Spst        "\u00a0" + // Latin-1 space
18099Spst        "\u1680" + // Ogham space mark
18099Spst        "\u180e" + // separator, Mongolian vowel
18099Spst        "\u2000" + // en quad
104862Sru        "\u2001" + // em quad
75584Sru        "\u2002" + // en space
18099Spst        "\u2003" + // em space
104862Sru        "\u2004" + // three-per-em space
75584Sru        "\u2005" + // four-per-em space
75584Sru        "\u2006" + // six-per-em space
75584Sru        "\u2007" + // figure space
104862Sru        "\u2008" + // punctuation space
69626Sru        "\u2009" + // thin space
75584Sru        "\u200a" + // hair space
75584Sru        "\u202f" + // narrow no-break space
18099Spst        "\u205f" + // medium mathematical space
104862Sru        "\u3000" + // ideographic space
104862Sru        "\ufeff"   // byte order mark
104862Sru        ;
104862Sru
104862Sru    private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP =
104862Sru        "\\u000a" + // line feed
104862Sru        "\\u000d" + // carriage return (ctrl-m)
104862Sru        "\\u2028" + // line separator
104862Sru        "\\u2029" + // paragraph separator
104862Sru        "\\u0009" + // tab
104862Sru        "\\u0020" + // ASCII space
104862Sru        "\\u000b" + // tabulation line
151497Sru        "\\u000c" + // ff (ctrl-l)
151497Sru        "\\u00a0" + // Latin-1 space
151497Sru        "\\u1680" + // Ogham space mark
104862Sru        "\\u180e" + // separator, Mongolian vowel
18099Spst        "\\u2000" + // en quad
18099Spst        "\\u2001" + // em quad
104862Sru        "\\u2002" + // en space
104862Sru        "\\u2003" + // em space
18099Spst        "\\u2004" + // three-per-em space
104862Sru        "\\u2005" + // four-per-em space
104862Sru        "\\u2006" + // six-per-em space
104862Sru        "\\u2007" + // figure space
104862Sru        "\\u2008" + // punctuation space
151497Sru        "\\u2009" + // thin space
104862Sru        "\\u200a" + // hair space
104862Sru        "\\u202f" + // narrow no-break space
18099Spst        "\\u205f" + // medium mathematical space
18099Spst        "\\u3000" + // ideographic space
104862Sru        "\\ufeff"   // byte order mark
75584Sru        ;
75584Sru
104862Sru    static String unicodeEscape(final char ch) {
75584Sru        final StringBuilder sb = new StringBuilder();
75584Sru
151497Sru        sb.append("\\u");
151497Sru
151497Sru        final String hex = Integer.toHexString(ch);
151497Sru        for (int i = hex.length(); i < 4; i++) {
104862Sru            sb.append('0');
75584Sru        }
75584Sru        sb.append(hex);
75584Sru
75584Sru        return sb.toString();
151497Sru    }
18099Spst
104862Sru    /**
18099Spst     * Constructor
18099Spst     *
114402Sru     * @param source    the source
114402Sru     * @param stream    the token stream to lex
114402Sru     */
114402Sru    public Lexer(final Source source, final TokenStream stream) {
114402Sru        this(source, stream, false, false);
104862Sru    }
18099Spst
18099Spst    /**
18099Spst     * Constructor
18099Spst     *
18099Spst     * @param source    the source
18099Spst     * @param stream    the token stream to lex
18099Spst     * @param scripting are we in scripting mode
18099Spst     * @param es6       are we in ECMAScript 6 mode
18099Spst     */
18099Spst    public Lexer(final Source source, final TokenStream stream, final boolean scripting, final boolean es6) {
69626Sru        this(source, 0, source.getLength(), stream, scripting, es6, false);
18099Spst    }
18099Spst
69626Sru    /**
18099Spst     * Constructor
18099Spst     *
69626Sru     * @param source    the source
18099Spst     * @param start     start position in source from which to start lexing
18099Spst     * @param len       length of source segment to lex
18099Spst     * @param stream    token stream to lex
18099Spst     * @param scripting are we in scripting mode
18099Spst     * @param es6       are we in ECMAScript 6 mode
18099Spst     * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a
18099Spst     * function body. This is used with the feature where the parser is skipping nested function bodies to
18099Spst     * avoid reading ahead unnecessarily when we skip the function bodies.
18099Spst     */
18099Spst    public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean es6, final boolean pauseOnFunctionBody) {
18099Spst        super(source.getContent(), 1, start, len);
18099Spst        this.source      = source;
18099Spst        this.stream      = stream;
18099Spst        this.scripting   = scripting;
18099Spst        this.es6         = es6;
18099Spst        this.nested      = false;
18099Spst        this.pendingLine = 1;
18099Spst        this.last        = EOL;
18099Spst
18099Spst        this.pauseOnFunctionBody = pauseOnFunctionBody;
104862Sru    }
79543Sru
79543Sru    private Lexer(final Lexer lexer, final State state) {
18099Spst        super(lexer, state);
104862Sru
18099Spst        source = lexer.source;
18099Spst        stream = lexer.stream;
18099Spst        scripting = lexer.scripting;
104862Sru        es6 = lexer.es6;
18099Spst        nested = true;
18099Spst
18099Spst        pendingLine = state.pendingLine;
104862Sru        linePosition = state.linePosition;
18099Spst        last = EOL;
18099Spst        pauseOnFunctionBody = false;
18099Spst    }
104862Sru
104862Sru    static class State extends Scanner.State {
104862Sru        /** Pending new line number and position. */
104862Sru        public final int pendingLine;
104862Sru
151497Sru        /** Position of last EOL + 1. */
151497Sru        public final int linePosition;
151497Sru
151497Sru        /** Type of last token added. */
151497Sru        public final TokenType last;
18099Spst
18099Spst        /*
18099Spst         * Constructor.
104862Sru         */
18099Spst
18099Spst        State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) {
18099Spst            super(position, limit, line);
18099Spst
104862Sru            this.pendingLine = pendingLine;
18099Spst            this.linePosition = linePosition;
18099Spst            this.last = last;
18099Spst        }
104862Sru    }
104862Sru
151497Sru    /**
18099Spst     * Save the state of the scan.
104862Sru     *
69626Sru     * @return Captured state.
104862Sru     */
104862Sru    @Override
104862Sru    State saveState() {
104862Sru        return new State(position, limit, line, pendingLine, linePosition, last);
104862Sru    }
104862Sru
104862Sru    /**
104862Sru     * Restore the state of the scan.
151497Sru     *
104862Sru     * @param state
104862Sru     *            Captured state.
104862Sru     */
18099Spst    void restoreState(final State state) {
114402Sru        super.restoreState(state);
104862Sru
104862Sru        pendingLine = state.pendingLine;
104862Sru        linePosition = state.linePosition;
104862Sru        last = state.last;
69626Sru    }
104862Sru
151497Sru    /**
104862Sru     * Add a new token to the stream.
104862Sru     *
104862Sru     * @param type
104862Sru     *            Token type.
69626Sru     * @param start
104862Sru     *            Start position.
104862Sru     * @param end
151497Sru     *            End position.
151497Sru     */
114402Sru    protected void add(final TokenType type, final int start, final int end) {
151497Sru        // Record last token.
151497Sru        last = type;
114402Sru
104862Sru        // Only emit the last EOL in a cluster.
104862Sru        if (type == EOL) {
69626Sru            pendingLine = end;
69626Sru            linePosition = start;
69626Sru        } else {
151497Sru            // Write any pending EOL to stream.
151497Sru            if (pendingLine != -1) {
151497Sru                stream.put(Token.toDesc(EOL, linePosition, pendingLine));
151497Sru                pendingLine = -1;
104862Sru            }
104862Sru
55839Sasmodai            // Write token to stream.
55839Sasmodai            stream.put(Token.toDesc(type, start, end - start));
55839Sasmodai        }
18099Spst    }
55839Sasmodai
18099Spst    /**
18099Spst     * Add a new token to the stream.
18099Spst     *
69626Sru     * @param type
151497Sru     *            Token type.
151497Sru     * @param start
151497Sru     *            Start position.
18099Spst     */
18099Spst    protected void add(final TokenType type, final int start) {
104862Sru        add(type, start, position);
104862Sru    }
114402Sru
114402Sru    /**
104862Sru     * Return the String of valid whitespace characters for regular
104862Sru     * expressions in JavaScript
151497Sru     * @return regexp whitespace string
18099Spst     */
18099Spst    public static String getWhitespaceRegExp() {
114402Sru        return JAVASCRIPT_WHITESPACE_IN_REGEXP;
18099Spst    }
18099Spst
18099Spst    /**
18099Spst     * Skip end of line.
18099Spst     *
18099Spst     * @param addEOL true if EOL token should be recorded.
18099Spst     */
18099Spst    private void skipEOL(final boolean addEOL) {
18099Spst
151497Sru        if (ch0 == '\r') { // detect \r\n pattern
55839Sasmodai            skip(1);
151497Sru            if (ch0 == '\n') {
151497Sru                skip(1);
151497Sru            }
151497Sru        } else { // all other space, ch0 is guaranteed to be EOL or \0
151497Sru            skip(1);
151497Sru        }
18099Spst
18099Spst        // bump up line count
18099Spst        line++;
104862Sru
104862Sru        if (addEOL) {
114402Sru            // Add an EOL token.
114402Sru            add(EOL, position, line);
18099Spst        }
55839Sasmodai    }
18099Spst
18099Spst    /**
18099Spst     * Skip over rest of line including end of line.
69626Sru     *
18099Spst     * @param addEOL true if EOL token should be recorded.
104862Sru     */
18099Spst    private void skipLine(final boolean addEOL) {
18099Spst        // Ignore characters.
18099Spst        while (!isEOL(ch0) && !atEOF()) {
18099Spst            skip(1);
18099Spst        }
18099Spst        // Skip over end of line.
18099Spst        skipEOL(addEOL);
18099Spst    }
18099Spst
18099Spst    /**
151497Sru     * Test whether a char is valid JavaScript whitespace
151497Sru     * @param ch a char
18099Spst     * @return true if valid JavaScript whitespace
104862Sru     */
151497Sru    public static boolean isJSWhitespace(final char ch) {
69626Sru        return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1;
18099Spst    }
18099Spst
18099Spst    /**
18099Spst     * Test whether a char is valid JavaScript end of line
18099Spst     * @param ch a char
18099Spst     * @return true if valid JavaScript end of line
18099Spst     */
18099Spst    public static boolean isJSEOL(final char ch) {
69626Sru        return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1;
151497Sru    }
151497Sru
114402Sru    /**
69626Sru     * Test if char is a string delimiter, e.g. '\' or '"'.
114402Sru     * @param ch a char
114402Sru     * @return true if string delimiter
114402Sru     */
114402Sru    protected boolean isStringDelimiter(final char ch) {
114402Sru        return ch == '\'' || ch == '"';
114402Sru    }
151497Sru
69626Sru    /**
114402Sru     * Test if char is a template literal delimiter ('`').
114402Sru     */
114402Sru    private static boolean isTemplateDelimiter(char ch) {
114402Sru        return ch == '`';
114402Sru    }
114402Sru
114402Sru    /**
114402Sru     * Test whether a char is valid JavaScript whitespace
114402Sru     * @param ch a char
114402Sru     * @return true if valid JavaScript whitespace
114402Sru     */
114402Sru    protected boolean isWhitespace(final char ch) {
114402Sru        return Lexer.isJSWhitespace(ch);
114402Sru    }
114402Sru
114402Sru    /**
104862Sru     * Test whether a char is valid JavaScript end of line
114402Sru     * @param ch a char
151497Sru     * @return true if valid JavaScript end of line
114402Sru     */
69626Sru    protected boolean isEOL(final char ch) {
114402Sru        return Lexer.isJSEOL(ch);
114402Sru    }
114402Sru
151497Sru    /**
151497Sru     * Skip over whitespace and detect end of line, adding EOL tokens if
151497Sru     * encountered.
114402Sru     *
151497Sru     * @param addEOL true if EOL tokens should be recorded.
114402Sru     */
151497Sru    private void skipWhitespace(final boolean addEOL) {
151497Sru        while (isWhitespace(ch0)) {
151497Sru            if (isEOL(ch0)) {
151497Sru                skipEOL(addEOL);
151497Sru            } else {
151497Sru                skip(1);
151497Sru            }
114402Sru        }
114402Sru    }
151497Sru
114402Sru    /**
114402Sru     * Skip over comments.
69626Sru     *
75584Sru     * @return True if a comment.
69626Sru     */
104862Sru    protected boolean skipComments() {
104862Sru        // Save the current position.
114402Sru        final int start = position;
114402Sru
114402Sru        if (ch0 == '/') {
114402Sru            // Is it a // comment.
104862Sru            if (ch1 == '/') {
151497Sru                // Skip over //.
114402Sru                skip(2);
114402Sru
114402Sru                boolean directiveComment = false;
114402Sru                if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) {
114402Sru                    directiveComment = true;
75584Sru                }
75584Sru
104862Sru                // Scan for EOL.
75584Sru                while (!atEOF() && !isEOL(ch0)) {
114402Sru                    skip(1);
114402Sru                }
151497Sru                // Did detect a comment.
151497Sru                add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start);
114402Sru                return true;
69626Sru            } else if (ch1 == '*') {
114402Sru                // Skip over /*.
69626Sru                skip(2);
114402Sru                // Scan for */.
69626Sru                while (!atEOF() && !(ch0 == '*' && ch1 == '/')) {
114402Sru                    // If end of line handle else skip character.
69626Sru                    if (isEOL(ch0)) {
114402Sru                        skipEOL(true);
114402Sru                    } else {
69626Sru                        skip(1);
114402Sru                    }
114402Sru                }
18099Spst
114402Sru                if (atEOF()) {
69626Sru                    // TODO - Report closing */ missing in parser.
114402Sru                    add(ERROR, start);
114402Sru                } else {
114402Sru                    // Skip */.
114402Sru                    skip(2);
114402Sru                }
114402Sru
18099Spst                // Did detect a comment.
69626Sru                add(COMMENT, start);
69626Sru                return true;
69626Sru            }
69626Sru        } else if (ch0 == '#') {
151497Sru            assert scripting;
151497Sru            // shell style comment
69626Sru            // Skip over #.
69626Sru            skip(1);
69626Sru            // Scan for EOL.
69626Sru            while (!atEOF() && !isEOL(ch0)) {
69626Sru                skip(1);
69626Sru            }
69626Sru            // Did detect a comment.
69626Sru            add(COMMENT, start);
69626Sru            return true;
75584Sru        }
69626Sru
69626Sru        // Not a comment.
69626Sru        return false;
69626Sru    }
69626Sru
69626Sru    /**
69626Sru     * Convert a regex token to a token object.
69626Sru     *
69626Sru     * @param start  Position in source content.
69626Sru     * @param length Length of regex token.
69626Sru     * @return Regex token object.
69626Sru     */
151497Sru    public RegexToken valueOfPattern(final int start, final int length) {
151497Sru        // Save the current position.
151497Sru        final int savePosition = position;
151497Sru        // Reset to beginning of content.
151497Sru        reset(start);
69626Sru        // Buffer for recording characters.
69626Sru        final StringBuilder sb = new StringBuilder(length);
69626Sru
104862Sru        // Skip /.
69626Sru        skip(1);
69626Sru        boolean inBrackets = false;
69626Sru        // Scan for closing /, stopping at end of line.
69626Sru        while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) {
69626Sru            // Skip over escaped character.
151497Sru            if (ch0 == '\\') {
69626Sru                sb.append(ch0);
69626Sru                sb.append(ch1);
69626Sru                skip(2);
69626Sru            } else {
69626Sru                if (ch0 == '[') {
69626Sru                    inBrackets = true;
151497Sru                } else if (ch0 == ']') {
104862Sru                    inBrackets = false;
104862Sru                }
104862Sru
151497Sru                // Skip literal character.
104862Sru                sb.append(ch0);
104862Sru                skip(1);
151497Sru            }
151497Sru        }
69626Sru
151497Sru        // Get pattern as string.
69626Sru        final String regex = sb.toString();
69626Sru
69626Sru        // Skip /.
104862Sru        skip(1);
69626Sru
104862Sru        // Options as string.
104862Sru        final String options = source.getString(position, scanIdentifier());
104862Sru
104862Sru        reset(savePosition);
104862Sru
104862Sru        // Compile the pattern.
151497Sru        return new RegexToken(regex, options);
104862Sru    }
104862Sru
151497Sru    /**
104862Sru     * Return true if the given token can be the beginning of a literal.
18099Spst     *
18099Spst     * @param token a token
18099Spst     * @return true if token can start a literal.
69626Sru     */
69626Sru    public boolean canStartLiteral(final TokenType token) {
69626Sru        return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<'));
69626Sru    }
151497Sru
151497Sru    /**
69626Sru     * interface to receive line information for multi-line literals.
69626Sru     */
69626Sru    protected interface LineInfoReceiver {
18099Spst        /**
18099Spst         * Receives line information
18099Spst         * @param line last line number
69626Sru         * @param linePosition position of last line
18099Spst         */
18099Spst        public void lineInfo(int line, int linePosition);
18099Spst    }
69626Sru
18099Spst    /**
18099Spst     * Check whether the given token represents the beginning of a literal. If so scan
69626Sru     * the literal and return <tt>true</tt>, otherwise return false.
69626Sru     *
69626Sru     * @param token the token.
69626Sru     * @param startTokenType the token type.
55839Sasmodai     * @param lir LineInfoReceiver that receives line info for multi-line string literals.
18099Spst     * @return True if a literal beginning with startToken was found and scanned.
69626Sru     */
151497Sru    protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) {
151497Sru        // Check if it can be a literal.
151497Sru        if (!canStartLiteral(startTokenType)) {
151497Sru            return false;
18099Spst        }
69626Sru        // We break on ambiguous tokens so if we already moved on it can't be a literal.
18099Spst        if (stream.get(stream.last()) != token) {
69626Sru            return false;
18099Spst        }
69626Sru        // Rewind to token start position
69626Sru        reset(Token.descPosition(token));
69626Sru
69626Sru        if (ch0 == '/') {
18099Spst            return scanRegEx();
18099Spst        } else if (ch0 == '<') {
69626Sru            if (ch1 == '<') {
151497Sru                return scanHereString(lir);
151497Sru            } else if (Character.isJavaIdentifierStart(ch1)) {
151497Sru                return scanXMLLiteral();
151497Sru            }
18099Spst        }
69626Sru
18099Spst        return false;
69626Sru    }
18099Spst
69626Sru    /**
69626Sru     * Scan over regex literal.
69626Sru     *
69626Sru     * @return True if a regex literal.
18099Spst     */
18099Spst    private boolean scanRegEx() {
69626Sru        assert ch0 == '/';
151497Sru        // Make sure it's not a comment.
151497Sru        if (ch1 != '/' && ch1 != '*') {
151497Sru            // Record beginning of literal.
151497Sru            final int start = position;
18099Spst            // Skip /.
69626Sru            skip(1);
18099Spst            boolean inBrackets = false;
69626Sru
18099Spst            // Scan for closing /, stopping at end of line.
69626Sru            while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) {
69626Sru                // Skip over escaped character.
69626Sru                if (ch0 == '\\') {
69626Sru                    skip(1);
18099Spst                    if (isEOL(ch0)) {
151497Sru                        reset(start);
69626Sru                        return false;
151497Sru                    }
151497Sru                    skip(1);
151497Sru                } else {
151497Sru                    if (ch0 == '[') {
18099Spst                        inBrackets = true;
69626Sru                    } else if (ch0 == ']') {
18099Spst                        inBrackets = false;
18099Spst                    }
69626Sru
69626Sru                    // Skip literal character.
69626Sru                    skip(1);
18099Spst                }
151497Sru            }
69626Sru
151497Sru            // If regex literal.
151497Sru            if (ch0 == '/') {
151497Sru                // Skip /.
151497Sru                skip(1);
18099Spst
69626Sru                // Skip over options.
18099Spst                while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') {
18099Spst                    skip(1);
69626Sru                }
69626Sru
69626Sru                // Add regex token.
18099Spst                add(REGEX, start);
18099Spst                // Regex literal detected.
18099Spst                return true;
18099Spst            }
69626Sru
18099Spst            // False start try again.
18099Spst            reset(start);
69626Sru        }
18099Spst
18099Spst        // Regex literal not detected.
18099Spst        return false;
18099Spst    }
18099Spst
104862Sru    /**
18099Spst     * Convert a digit to a integer.  Can't use Character.digit since we are
104862Sru     * restricted to ASCII by the spec.
104862Sru     *
104862Sru     * @param ch   Character to convert.
18099Spst     * @param base Numeric base.
104862Sru     *
104862Sru     * @return The converted digit or -1 if invalid.
18099Spst     */
151497Sru    protected static int convertDigit(final char ch, final int base) {
151497Sru        int digit;
151497Sru
151497Sru        if ('0' <= ch && ch <= '9') {
151497Sru            digit = ch - '0';
151497Sru        } else if ('A' <= ch && ch <= 'Z') {
18099Spst            digit = ch - 'A' + 10;
18099Spst        } else if ('a' <= ch && ch <= 'z') {
18099Spst            digit = ch - 'a' + 10;
69626Sru        } else {
69626Sru            return -1;
69626Sru        }
69626Sru
18099Spst        return digit < base ? digit : -1;
18099Spst    }
18099Spst
18099Spst
18099Spst    /**
18099Spst     * Get the value of a hexadecimal numeric sequence.
18099Spst     *
18099Spst     * @param length Number of digits.
104862Sru     * @param type   Type of token to report against.
104862Sru     * @return Value of sequence or < 0 if no digits.
18099Spst     */
18099Spst    private int hexSequence(final int length, final TokenType type) {
18099Spst        int value = 0;
18099Spst
18099Spst        for (int i = 0; i < length; i++) {
18099Spst            final int digit = convertDigit(ch0, 16);
18099Spst
18099Spst            if (digit == -1) {
18099Spst                error(Lexer.message("invalid.hex"), type, position, limit);
18099Spst                return i == 0 ? -1 : value;
18099Spst            }
18099Spst
18099Spst            value = digit | value << 4;
18099Spst            skip(1);
18099Spst        }
18099Spst
18099Spst        return value;
104862Sru    }
18099Spst
104862Sru    /**
104862Sru     * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255.
151497Sru     *
151497Sru     * @return Value of sequence.
151497Sru     */
151497Sru    private int octalSequence() {
18099Spst        int value = 0;
151497Sru
151497Sru        for (int i = 0; i < 3; i++) {
18099Spst            final int digit = convertDigit(ch0, 8);
151497Sru
151497Sru            if (digit == -1) {
151497Sru                break;
151497Sru            }
151497Sru            value = digit | value << 3;
151497Sru            skip(1);
18099Spst
151497Sru            if (i == 1 && value >= 32) {
151497Sru                break;
151497Sru            }
151497Sru        }
151497Sru        return value;
151497Sru    }
151497Sru
151497Sru    /**
151497Sru     * Convert a string to a JavaScript identifier.
151497Sru     *
151497Sru     * @param start  Position in source content.
151497Sru     * @param length Length of token.
151497Sru     * @return Ident string or null if an error.
151497Sru     */
151497Sru    private String valueOfIdent(final int start, final int length) throws RuntimeException {
151497Sru        // Save the current position.
151497Sru        final int savePosition = position;
18099Spst        // End of scan.
18099Spst        final int end = start + length;
18099Spst        // Reset to beginning of content.
        reset(start);
        // Buffer for recording characters.
        final StringBuilder sb = new StringBuilder(length);

        // Scan until end of line or end of file.
        while (!atEOF() && position < end && !isEOL(ch0)) {
            // If escape character.
            if (ch0 == '\\' && ch1 == 'u') {
                skip(2);
                final int ch = hexSequence(4, TokenType.IDENT);
                if (isWhitespace((char)ch)) {
                    return null;
                }
                if (ch < 0) {
                    sb.append('\\');
                    sb.append('u');
                } else {
                    sb.append((char)ch);
                }
            } else {
                // Add regular character.
                sb.append(ch0);
                skip(1);
            }
        }

        // Restore position.
        reset(savePosition);

        return sb.toString();
    }

    /**
     * Scan over and identifier or keyword. Handles identifiers containing
     * encoded Unicode chars.
     *
     * Example:
     *
     * var \u0042 = 44;
     */
    private void scanIdentifierOrKeyword() {
        // Record beginning of identifier.
        final int start = position;
        // Scan identifier.
        final int length = scanIdentifier();
        // Check to see if it is a keyword.
        final TokenType type = TokenLookup.lookupKeyword(content, start, length);
        if (type == FUNCTION && pauseOnFunctionBody) {
            pauseOnNextLeftBrace = true;
        }
        // Add keyword or identifier token.
        add(type, start);
    }

    /**
     * Convert a string to a JavaScript string object.
     *
     * @param start  Position in source content.
     * @param length Length of token.
     * @return JavaScript string object.
     */
    private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException {
        // Save the current position.
        final int savePosition = position;
        // Calculate the end position.
        final int end = start + length;
        // Reset to beginning of string.
        reset(start);

        // Buffer for recording characters.
        final StringBuilder sb = new StringBuilder(length);

        // Scan until end of string.
        while (position < end) {
            // If escape character.
            if (ch0 == '\\') {
                skip(1);

                final char next = ch0;
                final int afterSlash = position;

                skip(1);

                // Special characters.
                switch (next) {
                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7': {
                    if (strict) {
                        // "\0" itself is allowed in strict mode. Only other 'real'
                        // octal escape sequences are not allowed (eg. "\02", "\31").
                        // See section 7.8.4 String literals production EscapeSequence
                        if (next != '0' || (ch0 >= '0' && ch0 <= '9')) {
                            error(Lexer.message("strict.no.octal"), STRING, position, limit);
                        }
                    }
                    reset(afterSlash);
                    // Octal sequence.
                    final int ch = octalSequence();

                    if (ch < 0) {
                        sb.append('\\');
                        sb.append('x');
                    } else {
                        sb.append((char)ch);
                    }
                    break;
                }
                case 'n':
                    sb.append('\n');
                    break;
                case 't':
                    sb.append('\t');
                    break;
                case 'b':
                    sb.append('\b');
                    break;
                case 'f':
                    sb.append('\f');
                    break;
                case 'r':
                    sb.append('\r');
                    break;
                case '\'':
                    sb.append('\'');
                    break;
                case '\"':
                    sb.append('\"');
                    break;
                case '\\':
                    sb.append('\\');
                    break;
                case '\r': // CR | CRLF
                    if (ch0 == '\n') {
                        skip(1);
                    }
                    // fall through
                case '\n': // LF
                case '\u2028': // LS
                case '\u2029': // PS
                    // continue on the next line, slash-return continues string
                    // literal
                    break;
                case 'x': {
                    // Hex sequence.
                    final int ch = hexSequence(2, STRING);

                    if (ch < 0) {
                        sb.append('\\');
                        sb.append('x');
                    } else {
                        sb.append((char)ch);
                    }
                }
                    break;
                case 'u': {
                    // Unicode sequence.
                    final int ch = hexSequence(4, STRING);

                    if (ch < 0) {
                        sb.append('\\');
                        sb.append('u');
                    } else {
                        sb.append((char)ch);
                    }
                }
                    break;
                case 'v':
                    sb.append('\u000B');
                    break;
                // All other characters.
                default:
                    sb.append(next);
                    break;
                }
            } else if (ch0 == '\r') {
                // Convert CR-LF or CR to LF line terminator.
                sb.append('\n');
                skip(ch1 == '\n' ? 2 : 1);
            } else {
                // Add regular character.
                sb.append(ch0);
                skip(1);
            }
        }

        // Restore position.
        reset(savePosition);

        return sb.toString();
    }

    /**
     * Scan over a string literal.
     * @param add true if we are not just scanning but should actually modify the token stream
     */
    protected void scanString(final boolean add) {
        // Type of string.
        TokenType type = STRING;
        // Record starting quote.
        final char quote = ch0;
        // Skip over quote.
        skip(1);

        // Record beginning of string content.
        final State stringState = saveState();

        // Scan until close quote or end of line.
        while (!atEOF() && ch0 != quote && !isEOL(ch0)) {
            // Skip over escaped character.
            if (ch0 == '\\') {
                type = ESCSTRING;
                skip(1);
                if (! isEscapeCharacter(ch0)) {
                    error(Lexer.message("invalid.escape.char"), STRING, position, limit);
                }
                if (isEOL(ch0)) {
                    // Multiline string literal
                    skipEOL(false);
                    continue;
                }
            }
            // Skip literal character.
            skip(1);
        }

        // If close quote.
        if (ch0 == quote) {
            // Skip close quote.
            skip(1);
        } else {
            error(Lexer.message("missing.close.quote"), STRING, position, limit);
        }

        // If not just scanning.
        if (add) {
            // Record end of string.
            stringState.setLimit(position - 1);

            if (scripting && !stringState.isEmpty()) {
                switch (quote) {
                case '`':
                    // Mark the beginning of an exec string.
                    add(EXECSTRING, stringState.position, stringState.limit);
                    // Frame edit string with left brace.
                    add(LBRACE, stringState.position, stringState.position);
                    // Process edit string.
                    editString(type, stringState);
                    // Frame edit string with right brace.
                    add(RBRACE, stringState.limit, stringState.limit);
                    break;
                case '"':
                    // Only edit double quoted strings.
                    editString(type, stringState);
                    break;
                case '\'':
                    // Add string token without editing.
                    add(type, stringState.position, stringState.limit);
                    break;
                default:
                    break;
                }
            } else {
                /// Add string token without editing.
                add(type, stringState.position, stringState.limit);
            }
        }
    }

    /**
     * Scan over a template string literal.
     */
    private void scanTemplate() {
        assert ch0 == '`';
        TokenType type = TEMPLATE;

        // Skip over quote and record beginning of string content.
        skip(1);
        State stringState = saveState();

        // Scan until close quote
        while (!atEOF()) {
            // Skip over escaped character.
            if (ch0 == '`') {
                skip(1);
                // Record end of string.
                stringState.setLimit(position - 1);
                add(type == TEMPLATE ? type : TEMPLATE_TAIL, stringState.position, stringState.limit);
                return;
            } else if (ch0 == '$' && ch1 == '{') {
                skip(2);
                stringState.setLimit(position - 2);
                add(type == TEMPLATE ? TEMPLATE_HEAD : type, stringState.position, stringState.limit);

                // scan to RBRACE
                Lexer expressionLexer = new Lexer(this, saveState());
                expressionLexer.templateExpressionOpenBraces = 1;
                expressionLexer.lexify();
                restoreState(expressionLexer.saveState());

                // scan next middle or tail of the template literal
                assert ch0 == '}';
                type = TEMPLATE_MIDDLE;

                // Skip over rbrace and record beginning of string content.
                skip(1);
                stringState = saveState();

                continue;
            } else if (ch0 == '\\') {
                skip(1);
                // EscapeSequence
                if (!isEscapeCharacter(ch0)) {
                    error(Lexer.message("invalid.escape.char"), TEMPLATE, position, limit);
                }
                if (isEOL(ch0)) {
                    // LineContinuation
                    skipEOL(false);
                    continue;
                }
            }  else if (isEOL(ch0)) {
                // LineTerminatorSequence
                skipEOL(false);
                continue;
            }

            // Skip literal character.
            skip(1);
        }

        error(Lexer.message("missing.close.quote"), TEMPLATE, position, limit);
    }

    /**
     * Is the given character a valid escape char after "\" ?
     *
     * @param ch character to be checked
     * @return if the given character is valid after "\"
     */
    protected boolean isEscapeCharacter(final char ch) {
        return true;
    }

    /**
     * Convert string to number.
     *
     * @param valueString  String to convert.
     * @param radix        Numeric base.
     * @return Converted number.
     */
    private static Number valueOf(final String valueString, final int radix) throws NumberFormatException {
        try {
            return Integer.parseInt(valueString, radix);
        } catch (final NumberFormatException e) {
            if (radix == 10) {
                return Double.valueOf(valueString);
            }

            double value = 0.0;

            for (int i = 0; i < valueString.length(); i++) {
                final char ch = valueString.charAt(i);
                // Preverified, should always be a valid digit.
                final int digit = convertDigit(ch, radix);
                value *= radix;
                value += digit;
            }

            return value;
        }
    }

    /**
     * Scan a number.
     */
    protected void scanNumber() {
        // Record beginning of number.
        final int start = position;
        // Assume value is a decimal.
        TokenType type = DECIMAL;

        // First digit of number.
        int digit = convertDigit(ch0, 10);

        // If number begins with 0x.
        if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) {
            // Skip over 0xN.
            skip(3);
            // Skip over remaining digits.
            while (convertDigit(ch0, 16) != -1) {
                skip(1);
            }

            type = HEXADECIMAL;
        } else if (digit == 0 && es6 && (ch1 == 'o' || ch1 == 'O') && convertDigit(ch2, 8) != -1) {
            // Skip over 0oN.
            skip(3);
            // Skip over remaining digits.
            while (convertDigit(ch0, 8) != -1) {
                skip(1);
            }

            type = OCTAL;
        } else if (digit == 0 && es6 && (ch1 == 'b' || ch1 == 'B') && convertDigit(ch2, 2) != -1) {
            // Skip over 0bN.
            skip(3);
            // Skip over remaining digits.
            while (convertDigit(ch0, 2) != -1) {
                skip(1);
            }

            type = BINARY_NUMBER;
        } else {
            // Check for possible octal constant.
            boolean octal = digit == 0;
            // Skip first digit if not leading '.'.
            if (digit != -1) {
                skip(1);
            }

            // Skip remaining digits.
            while ((digit = convertDigit(ch0, 10)) != -1) {
                // Check octal only digits.
                octal = octal && digit < 8;
                // Skip digit.
                skip(1);
            }

            if (octal && position - start > 1) {
                type = OCTAL_LEGACY;
            } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
                // Must be a double.
                if (ch0 == '.') {
                    // Skip period.
                    skip(1);
                    // Skip mantissa.
                    while (convertDigit(ch0, 10) != -1) {
                        skip(1);
                    }
                }

                // Detect exponent.
                if (ch0 == 'E' || ch0 == 'e') {
                    // Skip E.
                    skip(1);
                    // Detect and skip exponent sign.
                    if (ch0 == '+' || ch0 == '-') {
                        skip(1);
                    }
                    // Skip exponent.
                    while (convertDigit(ch0, 10) != -1) {
                        skip(1);
                    }
                }

                type = FLOATING;
            }
        }

        if (Character.isJavaIdentifierStart(ch0)) {
            error(Lexer.message("missing.space.after.number"), type, position, 1);
        }

        // Add number token.
        add(type, start);
    }

    /**
     * Convert a regex token to a token object.
     *
     * @param start  Position in source content.
     * @param length Length of regex token.
     * @return Regex token object.
     */
    XMLToken valueOfXML(final int start, final int length) {
        return new XMLToken(source.getString(start, length));
    }

    /**
     * Scan over a XML token.
     *
     * @return TRUE if is an XML literal.
     */
    private boolean scanXMLLiteral() {
        assert ch0 == '<' && Character.isJavaIdentifierStart(ch1);
        if (XML_LITERALS) {
            // Record beginning of xml expression.
            final int start = position;

            int openCount = 0;

            do {
                if (ch0 == '<') {
                    if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) {
                        skip(3);
                        openCount--;
                    } else if (Character.isJavaIdentifierStart(ch1)) {
                        skip(2);
                        openCount++;
                    } else if (ch1 == '?') {
                        skip(2);
                    } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') {
                        skip(4);
                    } else {
                        reset(start);
                        return false;
                    }

                    while (!atEOF() && ch0 != '>') {
                        if (ch0 == '/' && ch1 == '>') {
                            openCount--;
                            skip(1);
                            break;
                        } else if (ch0 == '\"' || ch0 == '\'') {
                            scanString(false);
                        } else {
                            skip(1);
                        }
                    }

                    if (ch0 != '>') {
                        reset(start);
                        return false;
                    }

                    skip(1);
                } else if (atEOF()) {
                    reset(start);
                    return false;
                } else {
                    skip(1);
                }
            } while (openCount > 0);

            add(XML, start);
            return true;
        }

        return false;
    }

    /**
     * Scan over identifier characters.
     *
     * @return Length of identifier or zero if none found.
     */
    private int scanIdentifier() {
        final int start = position;

        // Make sure first character is valid start character.
        if (ch0 == '\\' && ch1 == 'u') {
            skip(2);
            final int ch = hexSequence(4, TokenType.IDENT);

            if (!Character.isJavaIdentifierStart(ch)) {
                error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
            }
        } else if (!Character.isJavaIdentifierStart(ch0)) {
            // Not an identifier.
            return 0;
        }

        // Make sure remaining characters are valid part characters.
        while (!atEOF()) {
            if (ch0 == '\\' && ch1 == 'u') {
                skip(2);
                final int ch = hexSequence(4, TokenType.IDENT);

                if (!Character.isJavaIdentifierPart(ch)) {
                    error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
                }
            } else if (Character.isJavaIdentifierPart(ch0)) {
                skip(1);
            } else {
                break;
            }
        }

        // Length of identifier sequence.
        return position - start;
    }

    /**
     * Compare two identifiers (in content) for equality.
     *
     * @param aStart  Start of first identifier.
     * @param aLength Length of first identifier.
     * @param bStart  Start of second identifier.
     * @param bLength Length of second identifier.
     * @return True if equal.
     */
    private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) {
        if (aLength == bLength) {
            for (int i = 0; i < aLength; i++) {
                if (content[aStart + i] != content[bStart + i]) {
                    return false;
                }
            }

            return true;
        }

        return false;
    }

    /**
     * Detect if a line starts with a marker identifier.
     *
     * @param identStart  Start of identifier.
     * @param identLength Length of identifier.
     * @return True if detected.
     */
    private boolean hasHereMarker(final int identStart, final int identLength) {
        // Skip any whitespace.
        skipWhitespace(false);

        return identifierEqual(identStart, identLength, position, scanIdentifier());
    }

    /**
     * Lexer to service edit strings.
     */
    private static class EditStringLexer extends Lexer {
        /** Type of string literals to emit. */
        final TokenType stringType;

        /*
         * Constructor.
         */

        EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) {
            super(lexer, stringState);

            this.stringType = stringType;
        }

        /**
         * Lexify the contents of the string.
         */
        @Override
        public void lexify() {
            // Record start of string position.
            int stringStart = position;
            // Indicate that the priming first string has not been emitted.
            boolean primed = false;

            while (true) {
                // Detect end of content.
                if (atEOF()) {
                    break;
                }

                // Honour escapes (should be well formed.)
                if (ch0 == '\\' && stringType == ESCSTRING) {
                    skip(2);

                    continue;
                }

                // If start of expression.
                if (ch0 == '$' && ch1 == '{') {
                    if (!primed || stringStart != position) {
                        if (primed) {
                            add(ADD, stringStart, stringStart + 1);
                        }

                        add(stringType, stringStart, position);
                        primed = true;
                    }

                    // Skip ${
                    skip(2);

                    // Save expression state.
                    final State expressionState = saveState();

                    // Start with one open brace.
                    int braceCount = 1;

                    // Scan for the rest of the string.
                    while (!atEOF()) {
                        // If closing brace.
                        if (ch0 == '}') {
                            // Break only only if matching brace.
                            if (--braceCount == 0) {
                                break;
                            }
                        } else if (ch0 == '{') {
                            // Bump up the brace count.
                            braceCount++;
                        }

                        // Skip to next character.
                        skip(1);
                    }

                    // If braces don't match then report an error.
                    if (braceCount != 0) {
                        error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1);
                    }

                    // Mark end of expression.
                    expressionState.setLimit(position);
                    // Skip closing brace.
                    skip(1);

                    // Start next string.
                    stringStart = position;

                    // Concatenate expression.
                    add(ADD, expressionState.position, expressionState.position + 1);
                    add(LPAREN, expressionState.position, expressionState.position + 1);

                    // Scan expression.
                    final Lexer lexer = new Lexer(this, expressionState);
                    lexer.lexify();

                    // Close out expression parenthesis.
                    add(RPAREN, position - 1, position);

                    continue;
                }

                // Next character in string.
                skip(1);
            }

            // If there is any unemitted string portion.
            if (stringStart != limit) {
                // Concatenate remaining string.
                if (primed) {
                    add(ADD, stringStart, 1);
                }

                add(stringType, stringStart, limit);
            }
        }

    }

    /**
     * Edit string for nested expressions.
     *
     * @param stringType  Type of string literals to emit.
     * @param stringState State of lexer at start of string.
     */
    private void editString(final TokenType stringType, final State stringState) {
        // Use special lexer to scan string.
        final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState);
        lexer.lexify();

        // Need to keep lexer informed.
        last = stringType;
    }

    /**
     * Scan over a here string.
     *
     * @return TRUE if is a here string.
     */
    private boolean scanHereString(final LineInfoReceiver lir) {
        assert ch0 == '<' && ch1 == '<';
        if (scripting) {
            // Record beginning of here string.
            final State saved = saveState();

            // << or <<<
            final boolean excludeLastEOL = ch2 != '<';

            if (excludeLastEOL) {
                skip(2);
            } else {
                skip(3);
            }

            // Scan identifier. It might be quoted, indicating that no string editing should take place.
            final char quoteChar = ch0;
            final boolean noStringEditing = quoteChar == '"' || quoteChar == '\'';
            if (noStringEditing) {
                skip(1);
            }
            final int identStart = position;
            final int identLength = scanIdentifier();
            if (noStringEditing) {
                if (ch0 != quoteChar) {
                    error(Lexer.message("here.non.matching.delimiter"), last, position, position);
                    restoreState(saved);
                    return false;
                }
                skip(1);
            }

            // Check for identifier.
            if (identLength == 0) {
                // Treat as shift.
                restoreState(saved);

                return false;
            }

            // Record rest of line.
            final State restState = saveState();
            // keep line number updated
            int lastLine = line;

            skipLine(false);
            lastLine++;
            int lastLinePosition = position;
            restState.setLimit(position);

            // Record beginning of string.
            final State stringState = saveState();
            int stringEnd = position;

            // Hunt down marker.
            while (!atEOF()) {
                // Skip any whitespace.
                skipWhitespace(false);

                if (hasHereMarker(identStart, identLength)) {
                    break;
                }

                skipLine(false);
                lastLine++;
                lastLinePosition = position;
                stringEnd = position;
            }

            // notify last line information
            lir.lineInfo(lastLine, lastLinePosition);

            // Record end of string.
            stringState.setLimit(stringEnd);

            // If marker is missing.
            if (stringState.isEmpty() || atEOF()) {
                error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position);
                restoreState(saved);

                return false;
            }

            // Remove last end of line if specified.
            if (excludeLastEOL) {
                // Handles \n.
                if (content[stringEnd - 1] == '\n') {
                    stringEnd--;
                }

                // Handles \r and \r\n.
                if (content[stringEnd - 1] == '\r') {
                    stringEnd--;
                }

                // Update end of string.
                stringState.setLimit(stringEnd);
            }

            // Edit string if appropriate.
            if (!noStringEditing && !stringState.isEmpty()) {
                editString(STRING, stringState);
            } else {
                // Add here string.
                add(STRING, stringState.position, stringState.limit);
            }

            // Scan rest of original line.
            final Lexer restLexer = new Lexer(this, restState);

            restLexer.lexify();

            return true;
        }

        return false;
    }

    /**
     * Breaks source content down into lex units, adding tokens to the token
     * stream. The routine scans until the stream buffer is full. Can be called
     * repeatedly until EOF is detected.
     */
    public void lexify() {
        while (!stream.isFull() || nested) {
            // Skip over whitespace.
            skipWhitespace(true);

            // Detect end of file.
            if (atEOF()) {
                if (!nested) {
                    // Add an EOF token at the end.
                    add(EOF, position);
                }

                break;
            }

            // Check for comments. Note that we don't scan for regexp and other literals here as
            // we may not have enough context to distinguish them from similar looking operators.
            // Instead we break on ambiguous operators below and let the parser decide.
            if (ch0 == '/' && skipComments()) {
                continue;
            }

            if (scripting && ch0 == '#' && skipComments()) {
                continue;
            }

            // TokenType for lookup of delimiter or operator.
            TokenType type;

            if (ch0 == '.' && convertDigit(ch1, 10) != -1) {
                // '.' followed by digit.
                // Scan and add a number.
                scanNumber();
            } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {
                if (templateExpressionOpenBraces > 0) {
                    if (type == LBRACE) {
                        templateExpressionOpenBraces++;
                    } else if (type == RBRACE) {
                        if (--templateExpressionOpenBraces == 0) {
                            break;
                        }
                    }
                }

                // Get the number of characters in the token.
                final int typeLength = type.getLength();
                // Skip that many characters.
                skip(typeLength);
                // Add operator token.
                add(type, position - typeLength);
                // Some operator tokens also mark the beginning of regexp, XML, or here string literals.
                // We break to let the parser decide what it is.
                if (canStartLiteral(type)) {
                    break;
                } else if (type == LBRACE && pauseOnNextLeftBrace) {
                    pauseOnNextLeftBrace = false;
                    break;
                }
            } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') {
                // Scan and add identifier or keyword.
                scanIdentifierOrKeyword();
            } else if (isStringDelimiter(ch0)) {
                // Scan and add a string.
                scanString(true);
            } else if (Character.isDigit(ch0)) {
                // Scan and add a number.
                scanNumber();
            } else if (isTemplateDelimiter(ch0) && es6) {
                // Scan and add template in ES6 mode.
                scanTemplate();
            } else if (isTemplateDelimiter(ch0) && scripting) {
                // Scan and add an exec string ('`') in scripting mode.
                scanString(true);
            } else {
                // Don't recognize this character.
                skip(1);
                add(ERROR, position - 1);
            }
        }
    }

    /**
     * Return value of token given its token descriptor.
     *
     * @param token  Token descriptor.
     * @return JavaScript value.
     */
    Object getValueOf(final long token, final boolean strict) {
        final int start = Token.descPosition(token);
        final int len   = Token.descLength(token);

        switch (Token.descType(token)) {
        case DECIMAL:
            return Lexer.valueOf(source.getString(start, len), 10); // number
        case HEXADECIMAL:
            return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number
        case OCTAL_LEGACY:
            return Lexer.valueOf(source.getString(start, len), 8); // number
        case OCTAL:
            return Lexer.valueOf(source.getString(start + 2, len - 2), 8); // number
        case BINARY_NUMBER:
            return Lexer.valueOf(source.getString(start + 2, len - 2), 2); // number
        case FLOATING:
            final String str   = source.getString(start, len);
            final double value = Double.valueOf(str);
            if (str.indexOf('.') != -1) {
                return value; //number
            }
            //anything without an explicit decimal point is still subject to a
            //"representable as int or long" check. Then the programmer does not
            //explicitly code something as a double. For example new Color(int, int, int)
            //and new Color(float, float, float) will get ambiguous for cases like
            //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point.
            //yet we don't want e.g. 1e6 to be a double unnecessarily
            if (JSType.isStrictlyRepresentableAsInt(value)) {
                return (int)value;
            }
            return value;
        case STRING:
            return source.getString(start, len); // String
        case ESCSTRING:
            return valueOfString(start, len, strict); // String
        case IDENT:
            return valueOfIdent(start, len); // String
        case REGEX:
            return valueOfPattern(start, len); // RegexToken::LexerToken
        case TEMPLATE:
        case TEMPLATE_HEAD:
        case TEMPLATE_MIDDLE:
        case TEMPLATE_TAIL:
            return valueOfString(start, len, true); // String
        case XML:
            return valueOfXML(start, len); // XMLToken::LexerToken
        case DIRECTIVE_COMMENT:
            return source.getString(start, len);
        default:
            break;
        }

        return null;
    }

    /**
     * Get the raw string value of a template literal string part.
     *
     * @param token template string token
     * @return raw string
     */
    public String valueOfRawString(final long token) {
        final int start  = Token.descPosition(token);
        final int length = Token.descLength(token);

        // Save the current position.
        final int savePosition = position;
        // Calculate the end position.
        final int end = start + length;
        // Reset to beginning of string.
        reset(start);

        // Buffer for recording characters.
        final StringBuilder sb = new StringBuilder(length);

        // Scan until end of string.
        while (position < end) {
            if (ch0 == '\r') {
                // Convert CR-LF or CR to LF line terminator.
                sb.append('\n');
                skip(ch1 == '\n' ? 2 : 1);
            } else {
                // Add regular character.
                sb.append(ch0);
                skip(1);
            }
        }

        // Restore position.
        reset(savePosition);

        return sb.toString();
    }

    /**
     * Get the correctly localized error message for a given message id format arguments
     * @param msgId message id
     * @param args  format arguments
     * @return message
     */
    protected static String message(final String msgId, final String... args) {
        return ECMAErrors.getMessage("lexer.error." + msgId, args);
    }

    /**
     * Generate a runtime exception
     *
     * @param message       error message
     * @param type          token type
     * @param start         start position of lexed error
     * @param length        length of lexed error
     * @throws ParserException  unconditionally
     */
    protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException {
        final long token     = Token.toDesc(type, start, length);
        final int  pos       = Token.descPosition(token);
        final int  lineNum   = source.getLine(pos);
        final int  columnNum = source.getColumn(pos);
        final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token);
        throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token);
    }

    /**
     * Helper class for Lexer tokens, e.g XML or RegExp tokens.
     * This is the abstract superclass
     */
    public static abstract class LexerToken implements Serializable {
        private static final long serialVersionUID = 1L;

        private final String expression;

        /**
         * Constructor
         * @param expression token expression
         */
        protected LexerToken(final String expression) {
            this.expression = expression;
        }

        /**
         * Get the expression
         * @return expression
         */
        public String getExpression() {
            return expression;
        }
    }

    /**
     * Temporary container for regular expressions.
     */
    public static class RegexToken extends LexerToken {
        private static final long serialVersionUID = 1L;

        /** Options. */
        private final String options;

        /**
         * Constructor.
         *
         * @param expression  regexp expression
         * @param options     regexp options
         */
        public RegexToken(final String expression, final String options) {
            super(expression);
            this.options = options;
        }

        /**
         * Get regexp options
         * @return options
         */
        public String getOptions() {
            return options;
        }

        @Override
        public String toString() {
            return '/' + getExpression() + '/' + options;
        }
    }

    /**
     * Temporary container for XML expression.
     */
    public static class XMLToken extends LexerToken {
        private static final long serialVersionUID = 1L;

        /**
         * Constructor.
         *
         * @param expression  XML expression
         */
        public XMLToken(final String expression) {
            super(expression);
        }
    }
}