Lexer.java revision 1571:fd97b9047199
1151497Sru/*
2151497Sru * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
318099Spst * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
418099Spst *
518099Spst * This code is free software; you can redistribute it and/or modify it
618099Spst * under the terms of the GNU General Public License version 2 only, as
718099Spst * published by the Free Software Foundation.  Oracle designates this
818099Spst * particular file as subject to the "Classpath" exception as provided
918099Spst * by Oracle in the LICENSE file that accompanied this code.
1018099Spst *
1118099Spst * This code is distributed in the hope that it will be useful, but WITHOUT
1218099Spst * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1318099Spst * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1418099Spst * version 2 for more details (a copy is included in the LICENSE file that
1518099Spst * accompanied this code).
1618099Spst *
1718099Spst * You should have received a copy of the GNU General Public License version
1818099Spst * 2 along with this work; if not, write to the Free Software Foundation,
19151497Sru * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
2018099Spst *
21151497Sru * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22151497Sru * or visit www.oracle.com if you need additional information or have any
2375584Sru * questions.
24151497Sru */
2575584Sru
26151497Srupackage jdk.nashorn.internal.parser;
2775584Sru
28151497Sruimport static jdk.nashorn.internal.parser.TokenType.ADD;
29151497Sruimport static jdk.nashorn.internal.parser.TokenType.BINARY_NUMBER;
30151497Sruimport static jdk.nashorn.internal.parser.TokenType.COMMENT;
31151497Sruimport static jdk.nashorn.internal.parser.TokenType.DECIMAL;
32151497Sruimport static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
33151497Sruimport static jdk.nashorn.internal.parser.TokenType.EOF;
34151497Sruimport static jdk.nashorn.internal.parser.TokenType.EOL;
35151497Sruimport static jdk.nashorn.internal.parser.TokenType.ERROR;
36151497Sruimport static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
3775584Sruimport static jdk.nashorn.internal.parser.TokenType.EXECSTRING;
38104862Sruimport static jdk.nashorn.internal.parser.TokenType.FLOATING;
3975584Sruimport static jdk.nashorn.internal.parser.TokenType.FUNCTION;
4075584Sruimport static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL;
4118099Spstimport static jdk.nashorn.internal.parser.TokenType.LBRACE;
4218099Spstimport static jdk.nashorn.internal.parser.TokenType.LPAREN;
4318099Spstimport static jdk.nashorn.internal.parser.TokenType.OCTAL;
4418099Spstimport static jdk.nashorn.internal.parser.TokenType.OCTAL_LEGACY;
4518099Spstimport static jdk.nashorn.internal.parser.TokenType.RBRACE;
46151497Sruimport static jdk.nashorn.internal.parser.TokenType.REGEX;
47151497Sruimport static jdk.nashorn.internal.parser.TokenType.RPAREN;
48151497Sruimport static jdk.nashorn.internal.parser.TokenType.STRING;
49151497Sruimport static jdk.nashorn.internal.parser.TokenType.TEMPLATE;
50151497Sruimport static jdk.nashorn.internal.parser.TokenType.TEMPLATE_HEAD;
51151497Sruimport static jdk.nashorn.internal.parser.TokenType.TEMPLATE_MIDDLE;
52151497Sruimport static jdk.nashorn.internal.parser.TokenType.TEMPLATE_TAIL;
53151497Sruimport static jdk.nashorn.internal.parser.TokenType.XML;
54151497Sru
55151497Sruimport java.io.Serializable;
56151497Sru
57151497Sruimport jdk.nashorn.internal.runtime.ECMAErrors;
5818099Spstimport jdk.nashorn.internal.runtime.ErrorManager;
5918099Spstimport jdk.nashorn.internal.runtime.JSErrorType;
6018099Spstimport jdk.nashorn.internal.runtime.JSType;
6118099Spstimport jdk.nashorn.internal.runtime.ParserException;
6218099Spstimport jdk.nashorn.internal.runtime.Source;
6318099Spstimport jdk.nashorn.internal.runtime.options.Options;
6418099Spst
6518099Spst/**
6618099Spst * Responsible for converting source content into a stream of tokens.
6718099Spst *
6818099Spst */
6918099Spst@SuppressWarnings("fallthrough")
7018099Spstpublic class Lexer extends Scanner {
7118099Spst    private static final long MIN_INT_L = Integer.MIN_VALUE;
7218099Spst    private static final long MAX_INT_L = Integer.MAX_VALUE;
7318099Spst
7418099Spst    private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals");
7518099Spst
7618099Spst    /** Content source. */
7718099Spst    private final Source source;
7818099Spst
7918099Spst    /** Buffered stream for tokens. */
8018099Spst    private final TokenStream stream;
81104862Sru
8218099Spst    /** True if here and edit strings are supported. */
8318099Spst    private final boolean scripting;
84151497Sru
85151497Sru    /** True if parsing in ECMAScript 6 mode. */
86151497Sru    private final boolean es6;
87151497Sru
88151497Sru    /** True if a nested scan. (scan to completion, no EOF.) */
89151497Sru    private final boolean nested;
90151497Sru
91151497Sru    /** Pending new line number and position. */
92151497Sru    int pendingLine;
93104862Sru
94104862Sru    /** Position of last EOL + 1. */
95104862Sru    private int linePosition;
9669626Sru
97104862Sru    /** Type of last token added. */
98104862Sru    private TokenType last;
99104862Sru
100104862Sru    private final boolean pauseOnFunctionBody;
101104862Sru    private boolean pauseOnNextLeftBrace;
10218099Spst
10318099Spst    private int templateExpressionOpenBraces;
10418099Spst
105104862Sru    private static final String SPACETAB = " \t";  // ASCII space and tab
10618099Spst    private static final String LFCR     = "\n\r"; // line feed and carriage return (ctrl-m)
10718099Spst
10818099Spst    private static final String JAVASCRIPT_WHITESPACE_EOL =
10918099Spst        LFCR +
11018099Spst        "\u2028" + // line separator
11118099Spst        "\u2029"   // paragraph separator
11218099Spst        ;
11318099Spst    private static final String JAVASCRIPT_WHITESPACE =
11418099Spst        SPACETAB +
11518099Spst        JAVASCRIPT_WHITESPACE_EOL +
11618099Spst        "\u000b" + // tabulation line
11718099Spst        "\u000c" + // ff (ctrl-l)
11818099Spst        "\u00a0" + // Latin-1 space
11918099Spst        "\u1680" + // Ogham space mark
12018099Spst        "\u180e" + // separator, Mongolian vowel
12118099Spst        "\u2000" + // en quad
122104862Sru        "\u2001" + // em quad
12375584Sru        "\u2002" + // en space
12418099Spst        "\u2003" + // em space
125104862Sru        "\u2004" + // three-per-em space
12675584Sru        "\u2005" + // four-per-em space
12775584Sru        "\u2006" + // six-per-em space
12875584Sru        "\u2007" + // figure space
129104862Sru        "\u2008" + // punctuation space
13069626Sru        "\u2009" + // thin space
13175584Sru        "\u200a" + // hair space
13275584Sru        "\u202f" + // narrow no-break space
13318099Spst        "\u205f" + // medium mathematical space
134104862Sru        "\u3000" + // ideographic space
135104862Sru        "\ufeff"   // byte order mark
136104862Sru        ;
137104862Sru
138104862Sru    private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP =
139104862Sru        "\\u000a" + // line feed
140104862Sru        "\\u000d" + // carriage return (ctrl-m)
141104862Sru        "\\u2028" + // line separator
142104862Sru        "\\u2029" + // paragraph separator
143104862Sru        "\\u0009" + // tab
144104862Sru        "\\u0020" + // ASCII space
145104862Sru        "\\u000b" + // tabulation line
146151497Sru        "\\u000c" + // ff (ctrl-l)
147151497Sru        "\\u00a0" + // Latin-1 space
148151497Sru        "\\u1680" + // Ogham space mark
149104862Sru        "\\u180e" + // separator, Mongolian vowel
15018099Spst        "\\u2000" + // en quad
15118099Spst        "\\u2001" + // em quad
152104862Sru        "\\u2002" + // en space
153104862Sru        "\\u2003" + // em space
15418099Spst        "\\u2004" + // three-per-em space
155104862Sru        "\\u2005" + // four-per-em space
156104862Sru        "\\u2006" + // six-per-em space
157104862Sru        "\\u2007" + // figure space
158104862Sru        "\\u2008" + // punctuation space
159151497Sru        "\\u2009" + // thin space
160104862Sru        "\\u200a" + // hair space
161104862Sru        "\\u202f" + // narrow no-break space
16218099Spst        "\\u205f" + // medium mathematical space
16318099Spst        "\\u3000" + // ideographic space
164104862Sru        "\\ufeff"   // byte order mark
16575584Sru        ;
16675584Sru
167104862Sru    static String unicodeEscape(final char ch) {
16875584Sru        final StringBuilder sb = new StringBuilder();
16975584Sru
170151497Sru        sb.append("\\u");
171151497Sru
172151497Sru        final String hex = Integer.toHexString(ch);
173151497Sru        for (int i = hex.length(); i < 4; i++) {
174104862Sru            sb.append('0');
17575584Sru        }
17675584Sru        sb.append(hex);
17775584Sru
17875584Sru        return sb.toString();
179151497Sru    }
18018099Spst
181104862Sru    /**
18218099Spst     * Constructor
18318099Spst     *
184114402Sru     * @param source    the source
185114402Sru     * @param stream    the token stream to lex
186114402Sru     */
187114402Sru    public Lexer(final Source source, final TokenStream stream) {
188114402Sru        this(source, stream, false, false);
189104862Sru    }
19018099Spst
19118099Spst    /**
19218099Spst     * Constructor
19318099Spst     *
19418099Spst     * @param source    the source
19518099Spst     * @param stream    the token stream to lex
19618099Spst     * @param scripting are we in scripting mode
19718099Spst     * @param es6       are we in ECMAScript 6 mode
19818099Spst     */
19918099Spst    public Lexer(final Source source, final TokenStream stream, final boolean scripting, final boolean es6) {
20069626Sru        this(source, 0, source.getLength(), stream, scripting, es6, false);
20118099Spst    }
20218099Spst
20369626Sru    /**
20418099Spst     * Constructor
20518099Spst     *
20669626Sru     * @param source    the source
20718099Spst     * @param start     start position in source from which to start lexing
20818099Spst     * @param len       length of source segment to lex
20918099Spst     * @param stream    token stream to lex
21018099Spst     * @param scripting are we in scripting mode
21118099Spst     * @param es6       are we in ECMAScript 6 mode
21218099Spst     * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a
21318099Spst     * function body. This is used with the feature where the parser is skipping nested function bodies to
21418099Spst     * avoid reading ahead unnecessarily when we skip the function bodies.
21518099Spst     */
21618099Spst    public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean es6, final boolean pauseOnFunctionBody) {
21718099Spst        super(source.getContent(), 1, start, len);
21818099Spst        this.source      = source;
21918099Spst        this.stream      = stream;
22018099Spst        this.scripting   = scripting;
22118099Spst        this.es6         = es6;
22218099Spst        this.nested      = false;
22318099Spst        this.pendingLine = 1;
22418099Spst        this.last        = EOL;
22518099Spst
22618099Spst        this.pauseOnFunctionBody = pauseOnFunctionBody;
227104862Sru    }
22879543Sru
22979543Sru    private Lexer(final Lexer lexer, final State state) {
23018099Spst        super(lexer, state);
231104862Sru
23218099Spst        source = lexer.source;
23318099Spst        stream = lexer.stream;
23418099Spst        scripting = lexer.scripting;
235104862Sru        es6 = lexer.es6;
23618099Spst        nested = true;
23718099Spst
23818099Spst        pendingLine = state.pendingLine;
239104862Sru        linePosition = state.linePosition;
24018099Spst        last = EOL;
24118099Spst        pauseOnFunctionBody = false;
24218099Spst    }
243104862Sru
244104862Sru    static class State extends Scanner.State {
245104862Sru        /** Pending new line number and position. */
246104862Sru        public final int pendingLine;
247104862Sru
248151497Sru        /** Position of last EOL + 1. */
249151497Sru        public final int linePosition;
250151497Sru
251151497Sru        /** Type of last token added. */
252151497Sru        public final TokenType last;
25318099Spst
25418099Spst        /*
25518099Spst         * Constructor.
256104862Sru         */
25718099Spst
25818099Spst        State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) {
25918099Spst            super(position, limit, line);
26018099Spst
261104862Sru            this.pendingLine = pendingLine;
26218099Spst            this.linePosition = linePosition;
26318099Spst            this.last = last;
26418099Spst        }
265104862Sru    }
266104862Sru
267151497Sru    /**
26818099Spst     * Save the state of the scan.
269104862Sru     *
27069626Sru     * @return Captured state.
271104862Sru     */
272104862Sru    @Override
273104862Sru    State saveState() {
274104862Sru        return new State(position, limit, line, pendingLine, linePosition, last);
275104862Sru    }
276104862Sru
277104862Sru    /**
278104862Sru     * Restore the state of the scan.
279151497Sru     *
280104862Sru     * @param state
281104862Sru     *            Captured state.
282104862Sru     */
28318099Spst    void restoreState(final State state) {
284114402Sru        super.restoreState(state);
285104862Sru
286104862Sru        pendingLine = state.pendingLine;
287104862Sru        linePosition = state.linePosition;
288104862Sru        last = state.last;
28969626Sru    }
290104862Sru
291151497Sru    /**
292104862Sru     * Add a new token to the stream.
293104862Sru     *
294104862Sru     * @param type
295104862Sru     *            Token type.
29669626Sru     * @param start
297104862Sru     *            Start position.
298104862Sru     * @param end
299151497Sru     *            End position.
300151497Sru     */
301114402Sru    protected void add(final TokenType type, final int start, final int end) {
302151497Sru        // Record last token.
303151497Sru        last = type;
304114402Sru
305104862Sru        // Only emit the last EOL in a cluster.
306104862Sru        if (type == EOL) {
30769626Sru            pendingLine = end;
30869626Sru            linePosition = start;
30969626Sru        } else {
310151497Sru            // Write any pending EOL to stream.
311151497Sru            if (pendingLine != -1) {
312151497Sru                stream.put(Token.toDesc(EOL, linePosition, pendingLine));
313151497Sru                pendingLine = -1;
314104862Sru            }
315104862Sru
31655839Sasmodai            // Write token to stream.
31755839Sasmodai            stream.put(Token.toDesc(type, start, end - start));
31855839Sasmodai        }
31918099Spst    }
32055839Sasmodai
32118099Spst    /**
32218099Spst     * Add a new token to the stream.
32318099Spst     *
32469626Sru     * @param type
325151497Sru     *            Token type.
326151497Sru     * @param start
327151497Sru     *            Start position.
32818099Spst     */
32918099Spst    protected void add(final TokenType type, final int start) {
330104862Sru        add(type, start, position);
331104862Sru    }
332114402Sru
333114402Sru    /**
334104862Sru     * Return the String of valid whitespace characters for regular
335104862Sru     * expressions in JavaScript
336151497Sru     * @return regexp whitespace string
33718099Spst     */
33818099Spst    public static String getWhitespaceRegExp() {
339114402Sru        return JAVASCRIPT_WHITESPACE_IN_REGEXP;
34018099Spst    }
34118099Spst
34218099Spst    /**
34318099Spst     * Skip end of line.
34418099Spst     *
34518099Spst     * @param addEOL true if EOL token should be recorded.
34618099Spst     */
34718099Spst    private void skipEOL(final boolean addEOL) {
34818099Spst
349151497Sru        if (ch0 == '\r') { // detect \r\n pattern
35055839Sasmodai            skip(1);
351151497Sru            if (ch0 == '\n') {
352151497Sru                skip(1);
353151497Sru            }
354151497Sru        } else { // all other space, ch0 is guaranteed to be EOL or \0
355151497Sru            skip(1);
356151497Sru        }
35718099Spst
35818099Spst        // bump up line count
35918099Spst        line++;
360104862Sru
361104862Sru        if (addEOL) {
362114402Sru            // Add an EOL token.
363114402Sru            add(EOL, position, line);
36418099Spst        }
36555839Sasmodai    }
36618099Spst
36718099Spst    /**
36818099Spst     * Skip over rest of line including end of line.
36969626Sru     *
37018099Spst     * @param addEOL true if EOL token should be recorded.
371104862Sru     */
37218099Spst    private void skipLine(final boolean addEOL) {
37318099Spst        // Ignore characters.
37418099Spst        while (!isEOL(ch0) && !atEOF()) {
37518099Spst            skip(1);
37618099Spst        }
37718099Spst        // Skip over end of line.
37818099Spst        skipEOL(addEOL);
37918099Spst    }
38018099Spst
38118099Spst    /**
382151497Sru     * Test whether a char is valid JavaScript whitespace
383151497Sru     * @param ch a char
38418099Spst     * @return true if valid JavaScript whitespace
385104862Sru     */
386151497Sru    public static boolean isJSWhitespace(final char ch) {
38769626Sru        return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1;
38818099Spst    }
38918099Spst
39018099Spst    /**
39118099Spst     * Test whether a char is valid JavaScript end of line
39218099Spst     * @param ch a char
39318099Spst     * @return true if valid JavaScript end of line
39418099Spst     */
39518099Spst    public static boolean isJSEOL(final char ch) {
39669626Sru        return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1;
397151497Sru    }
398151497Sru
399114402Sru    /**
40069626Sru     * Test if char is a string delimiter, e.g. '\' or '"'.
401114402Sru     * @param ch a char
402114402Sru     * @return true if string delimiter
403114402Sru     */
404114402Sru    protected boolean isStringDelimiter(final char ch) {
405114402Sru        return ch == '\'' || ch == '"';
406114402Sru    }
407151497Sru
40869626Sru    /**
409114402Sru     * Test if char is a template literal delimiter ('`').
410114402Sru     */
411114402Sru    private static boolean isTemplateDelimiter(char ch) {
412114402Sru        return ch == '`';
413114402Sru    }
414114402Sru
415114402Sru    /**
416114402Sru     * Test whether a char is valid JavaScript whitespace
417114402Sru     * @param ch a char
418114402Sru     * @return true if valid JavaScript whitespace
419114402Sru     */
420114402Sru    protected boolean isWhitespace(final char ch) {
421114402Sru        return Lexer.isJSWhitespace(ch);
422114402Sru    }
423114402Sru
424114402Sru    /**
425104862Sru     * Test whether a char is valid JavaScript end of line
426114402Sru     * @param ch a char
427151497Sru     * @return true if valid JavaScript end of line
428114402Sru     */
42969626Sru    protected boolean isEOL(final char ch) {
430114402Sru        return Lexer.isJSEOL(ch);
431114402Sru    }
432114402Sru
433151497Sru    /**
434151497Sru     * Skip over whitespace and detect end of line, adding EOL tokens if
435151497Sru     * encountered.
436114402Sru     *
437151497Sru     * @param addEOL true if EOL tokens should be recorded.
438114402Sru     */
439151497Sru    private void skipWhitespace(final boolean addEOL) {
440151497Sru        while (isWhitespace(ch0)) {
441151497Sru            if (isEOL(ch0)) {
442151497Sru                skipEOL(addEOL);
443151497Sru            } else {
444151497Sru                skip(1);
445151497Sru            }
446114402Sru        }
447114402Sru    }
448151497Sru
449114402Sru    /**
450114402Sru     * Skip over comments.
45169626Sru     *
45275584Sru     * @return True if a comment.
45369626Sru     */
454104862Sru    protected boolean skipComments() {
455104862Sru        // Save the current position.
456114402Sru        final int start = position;
457114402Sru
458114402Sru        if (ch0 == '/') {
459114402Sru            // Is it a // comment.
460104862Sru            if (ch1 == '/') {
461151497Sru                // Skip over //.
462114402Sru                skip(2);
463114402Sru
464114402Sru                boolean directiveComment = false;
465114402Sru                if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) {
466114402Sru                    directiveComment = true;
46775584Sru                }
46875584Sru
469104862Sru                // Scan for EOL.
47075584Sru                while (!atEOF() && !isEOL(ch0)) {
471114402Sru                    skip(1);
472114402Sru                }
473151497Sru                // Did detect a comment.
474151497Sru                add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start);
475114402Sru                return true;
47669626Sru            } else if (ch1 == '*') {
477114402Sru                // Skip over /*.
47869626Sru                skip(2);
479114402Sru                // Scan for */.
48069626Sru                while (!atEOF() && !(ch0 == '*' && ch1 == '/')) {
481114402Sru                    // If end of line handle else skip character.
48269626Sru                    if (isEOL(ch0)) {
483114402Sru                        skipEOL(true);
484114402Sru                    } else {
48569626Sru                        skip(1);
486114402Sru                    }
487114402Sru                }
48818099Spst
489114402Sru                if (atEOF()) {
49069626Sru                    // TODO - Report closing */ missing in parser.
491114402Sru                    add(ERROR, start);
492114402Sru                } else {
493114402Sru                    // Skip */.
494114402Sru                    skip(2);
495114402Sru                }
496114402Sru
49718099Spst                // Did detect a comment.
49869626Sru                add(COMMENT, start);
49969626Sru                return true;
50069626Sru            }
50169626Sru        } else if (ch0 == '#') {
502151497Sru            assert scripting;
503151497Sru            // shell style comment
50469626Sru            // Skip over #.
50569626Sru            skip(1);
50669626Sru            // Scan for EOL.
50769626Sru            while (!atEOF() && !isEOL(ch0)) {
50869626Sru                skip(1);
50969626Sru            }
51069626Sru            // Did detect a comment.
51169626Sru            add(COMMENT, start);
51269626Sru            return true;
51375584Sru        }
51469626Sru
51569626Sru        // Not a comment.
51669626Sru        return false;
51769626Sru    }
51869626Sru
51969626Sru    /**
52069626Sru     * Convert a regex token to a token object.
52169626Sru     *
52269626Sru     * @param start  Position in source content.
52369626Sru     * @param length Length of regex token.
52469626Sru     * @return Regex token object.
52569626Sru     */
526151497Sru    public RegexToken valueOfPattern(final int start, final int length) {
527151497Sru        // Save the current position.
528151497Sru        final int savePosition = position;
529151497Sru        // Reset to beginning of content.
530151497Sru        reset(start);
53169626Sru        // Buffer for recording characters.
53269626Sru        final StringBuilder sb = new StringBuilder(length);
53369626Sru
534104862Sru        // Skip /.
53569626Sru        skip(1);
53669626Sru        boolean inBrackets = false;
53769626Sru        // Scan for closing /, stopping at end of line.
53869626Sru        while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) {
53969626Sru            // Skip over escaped character.
540151497Sru            if (ch0 == '\\') {
54169626Sru                sb.append(ch0);
54269626Sru                sb.append(ch1);
54369626Sru                skip(2);
54469626Sru            } else {
54569626Sru                if (ch0 == '[') {
54669626Sru                    inBrackets = true;
547151497Sru                } else if (ch0 == ']') {
548104862Sru                    inBrackets = false;
549104862Sru                }
550104862Sru
551151497Sru                // Skip literal character.
552104862Sru                sb.append(ch0);
553104862Sru                skip(1);
554151497Sru            }
555151497Sru        }
55669626Sru
557151497Sru        // Get pattern as string.
55869626Sru        final String regex = sb.toString();
55969626Sru
56069626Sru        // Skip /.
561104862Sru        skip(1);
56269626Sru
563104862Sru        // Options as string.
564104862Sru        final String options = source.getString(position, scanIdentifier());
565104862Sru
566104862Sru        reset(savePosition);
567104862Sru
568104862Sru        // Compile the pattern.
569151497Sru        return new RegexToken(regex, options);
570104862Sru    }
571104862Sru
572151497Sru    /**
573104862Sru     * Return true if the given token can be the beginning of a literal.
57418099Spst     *
57518099Spst     * @param token a token
57618099Spst     * @return true if token can start a literal.
57769626Sru     */
57869626Sru    public boolean canStartLiteral(final TokenType token) {
57969626Sru        return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<'));
58069626Sru    }
581151497Sru
582151497Sru    /**
58369626Sru     * interface to receive line information for multi-line literals.
58469626Sru     */
58569626Sru    protected interface LineInfoReceiver {
58618099Spst        /**
58718099Spst         * Receives line information
58818099Spst         * @param line last line number
58969626Sru         * @param linePosition position of last line
59018099Spst         */
59118099Spst        public void lineInfo(int line, int linePosition);
59218099Spst    }
59369626Sru
59418099Spst    /**
59518099Spst     * Check whether the given token represents the beginning of a literal. If so scan
59669626Sru     * the literal and return <tt>true</tt>, otherwise return false.
59769626Sru     *
59869626Sru     * @param token the token.
59969626Sru     * @param startTokenType the token type.
60055839Sasmodai     * @param lir LineInfoReceiver that receives line info for multi-line string literals.
60118099Spst     * @return True if a literal beginning with startToken was found and scanned.
60269626Sru     */
603151497Sru    protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) {
604151497Sru        // Check if it can be a literal.
605151497Sru        if (!canStartLiteral(startTokenType)) {
606151497Sru            return false;
60718099Spst        }
60869626Sru        // We break on ambiguous tokens so if we already moved on it can't be a literal.
60918099Spst        if (stream.get(stream.last()) != token) {
61069626Sru            return false;
61118099Spst        }
61269626Sru        // Rewind to token start position
61369626Sru        reset(Token.descPosition(token));
61469626Sru
61569626Sru        if (ch0 == '/') {
61618099Spst            return scanRegEx();
61718099Spst        } else if (ch0 == '<') {
61869626Sru            if (ch1 == '<') {
619151497Sru                return scanHereString(lir);
620151497Sru            } else if (Character.isJavaIdentifierStart(ch1)) {
621151497Sru                return scanXMLLiteral();
622151497Sru            }
62318099Spst        }
62469626Sru
62518099Spst        return false;
62669626Sru    }
62718099Spst
62869626Sru    /**
62969626Sru     * Scan over regex literal.
63069626Sru     *
63169626Sru     * @return True if a regex literal.
63218099Spst     */
63318099Spst    private boolean scanRegEx() {
63469626Sru        assert ch0 == '/';
635151497Sru        // Make sure it's not a comment.
636151497Sru        if (ch1 != '/' && ch1 != '*') {
637151497Sru            // Record beginning of literal.
638151497Sru            final int start = position;
63918099Spst            // Skip /.
64069626Sru            skip(1);
64118099Spst            boolean inBrackets = false;
64269626Sru
64318099Spst            // Scan for closing /, stopping at end of line.
64469626Sru            while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) {
64569626Sru                // Skip over escaped character.
64669626Sru                if (ch0 == '\\') {
64769626Sru                    skip(1);
64818099Spst                    if (isEOL(ch0)) {
649151497Sru                        reset(start);
65069626Sru                        return false;
651151497Sru                    }
652151497Sru                    skip(1);
653151497Sru                } else {
654151497Sru                    if (ch0 == '[') {
65518099Spst                        inBrackets = true;
65669626Sru                    } else if (ch0 == ']') {
65718099Spst                        inBrackets = false;
65818099Spst                    }
65969626Sru
66069626Sru                    // Skip literal character.
66169626Sru                    skip(1);
66218099Spst                }
663151497Sru            }
66469626Sru
665151497Sru            // If regex literal.
666151497Sru            if (ch0 == '/') {
667151497Sru                // Skip /.
668151497Sru                skip(1);
66918099Spst
67069626Sru                // Skip over options.
67118099Spst                while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') {
67218099Spst                    skip(1);
67369626Sru                }
67469626Sru
67569626Sru                // Add regex token.
67618099Spst                add(REGEX, start);
67718099Spst                // Regex literal detected.
67818099Spst                return true;
67918099Spst            }
68069626Sru
68118099Spst            // False start try again.
68218099Spst            reset(start);
68369626Sru        }
68418099Spst
68518099Spst        // Regex literal not detected.
68618099Spst        return false;
68718099Spst    }
68818099Spst
689104862Sru    /**
69018099Spst     * Convert a digit to a integer.  Can't use Character.digit since we are
691104862Sru     * restricted to ASCII by the spec.
692104862Sru     *
693104862Sru     * @param ch   Character to convert.
69418099Spst     * @param base Numeric base.
695104862Sru     *
696104862Sru     * @return The converted digit or -1 if invalid.
69718099Spst     */
698151497Sru    protected static int convertDigit(final char ch, final int base) {
699151497Sru        int digit;
700151497Sru
701151497Sru        if ('0' <= ch && ch <= '9') {
702151497Sru            digit = ch - '0';
703151497Sru        } else if ('A' <= ch && ch <= 'Z') {
70418099Spst            digit = ch - 'A' + 10;
70518099Spst        } else if ('a' <= ch && ch <= 'z') {
70618099Spst            digit = ch - 'a' + 10;
70769626Sru        } else {
70869626Sru            return -1;
70969626Sru        }
71069626Sru
71118099Spst        return digit < base ? digit : -1;
71218099Spst    }
71318099Spst
71418099Spst
71518099Spst    /**
71618099Spst     * Get the value of a hexadecimal numeric sequence.
71718099Spst     *
71818099Spst     * @param length Number of digits.
719104862Sru     * @param type   Type of token to report against.
720104862Sru     * @return Value of sequence or < 0 if no digits.
72118099Spst     */
72218099Spst    private int hexSequence(final int length, final TokenType type) {
72318099Spst        int value = 0;
72418099Spst
72518099Spst        for (int i = 0; i < length; i++) {
72618099Spst            final int digit = convertDigit(ch0, 16);
72718099Spst
72818099Spst            if (digit == -1) {
72918099Spst                error(Lexer.message("invalid.hex"), type, position, limit);
73018099Spst                return i == 0 ? -1 : value;
73118099Spst            }
73218099Spst
73318099Spst            value = digit | value << 4;
73418099Spst            skip(1);
73518099Spst        }
73618099Spst
73718099Spst        return value;
738104862Sru    }
73918099Spst
740104862Sru    /**
741104862Sru     * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255.
742151497Sru     *
743151497Sru     * @return Value of sequence.
744151497Sru     */
745151497Sru    private int octalSequence() {
74618099Spst        int value = 0;
747151497Sru
748151497Sru        for (int i = 0; i < 3; i++) {
74918099Spst            final int digit = convertDigit(ch0, 8);
750151497Sru
751151497Sru            if (digit == -1) {
752151497Sru                break;
753151497Sru            }
754151497Sru            value = digit | value << 3;
755151497Sru            skip(1);
75618099Spst
757151497Sru            if (i == 1 && value >= 32) {
758151497Sru                break;
759151497Sru            }
760151497Sru        }
761151497Sru        return value;
762151497Sru    }
763151497Sru
764151497Sru    /**
765151497Sru     * Convert a string to a JavaScript identifier.
766151497Sru     *
767151497Sru     * @param start  Position in source content.
768151497Sru     * @param length Length of token.
769151497Sru     * @return Ident string or null if an error.
770151497Sru     */
771151497Sru    private String valueOfIdent(final int start, final int length) throws RuntimeException {
772151497Sru        // Save the current position.
773151497Sru        final int savePosition = position;
77418099Spst        // End of scan.
77518099Spst        final int end = start + length;
77618099Spst        // Reset to beginning of content.
777        reset(start);
778        // Buffer for recording characters.
779        final StringBuilder sb = new StringBuilder(length);
780
781        // Scan until end of line or end of file.
782        while (!atEOF() && position < end && !isEOL(ch0)) {
783            // If escape character.
784            if (ch0 == '\\' && ch1 == 'u') {
785                skip(2);
786                final int ch = hexSequence(4, TokenType.IDENT);
787                if (isWhitespace((char)ch)) {
788                    return null;
789                }
790                if (ch < 0) {
791                    sb.append('\\');
792                    sb.append('u');
793                } else {
794                    sb.append((char)ch);
795                }
796            } else {
797                // Add regular character.
798                sb.append(ch0);
799                skip(1);
800            }
801        }
802
803        // Restore position.
804        reset(savePosition);
805
806        return sb.toString();
807    }
808
809    /**
810     * Scan over and identifier or keyword. Handles identifiers containing
811     * encoded Unicode chars.
812     *
813     * Example:
814     *
815     * var \u0042 = 44;
816     */
817    private void scanIdentifierOrKeyword() {
818        // Record beginning of identifier.
819        final int start = position;
820        // Scan identifier.
821        final int length = scanIdentifier();
822        // Check to see if it is a keyword.
823        final TokenType type = TokenLookup.lookupKeyword(content, start, length);
824        if (type == FUNCTION && pauseOnFunctionBody) {
825            pauseOnNextLeftBrace = true;
826        }
827        // Add keyword or identifier token.
828        add(type, start);
829    }
830
831    /**
832     * Convert a string to a JavaScript string object.
833     *
834     * @param start  Position in source content.
835     * @param length Length of token.
836     * @return JavaScript string object.
837     */
838    private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException {
839        // Save the current position.
840        final int savePosition = position;
841        // Calculate the end position.
842        final int end = start + length;
843        // Reset to beginning of string.
844        reset(start);
845
846        // Buffer for recording characters.
847        final StringBuilder sb = new StringBuilder(length);
848
849        // Scan until end of string.
850        while (position < end) {
851            // If escape character.
852            if (ch0 == '\\') {
853                skip(1);
854
855                final char next = ch0;
856                final int afterSlash = position;
857
858                skip(1);
859
860                // Special characters.
861                switch (next) {
862                case '0':
863                case '1':
864                case '2':
865                case '3':
866                case '4':
867                case '5':
868                case '6':
869                case '7': {
870                    if (strict) {
871                        // "\0" itself is allowed in strict mode. Only other 'real'
872                        // octal escape sequences are not allowed (eg. "\02", "\31").
873                        // See section 7.8.4 String literals production EscapeSequence
874                        if (next != '0' || (ch0 >= '0' && ch0 <= '9')) {
875                            error(Lexer.message("strict.no.octal"), STRING, position, limit);
876                        }
877                    }
878                    reset(afterSlash);
879                    // Octal sequence.
880                    final int ch = octalSequence();
881
882                    if (ch < 0) {
883                        sb.append('\\');
884                        sb.append('x');
885                    } else {
886                        sb.append((char)ch);
887                    }
888                    break;
889                }
890                case 'n':
891                    sb.append('\n');
892                    break;
893                case 't':
894                    sb.append('\t');
895                    break;
896                case 'b':
897                    sb.append('\b');
898                    break;
899                case 'f':
900                    sb.append('\f');
901                    break;
902                case 'r':
903                    sb.append('\r');
904                    break;
905                case '\'':
906                    sb.append('\'');
907                    break;
908                case '\"':
909                    sb.append('\"');
910                    break;
911                case '\\':
912                    sb.append('\\');
913                    break;
914                case '\r': // CR | CRLF
915                    if (ch0 == '\n') {
916                        skip(1);
917                    }
918                    // fall through
919                case '\n': // LF
920                case '\u2028': // LS
921                case '\u2029': // PS
922                    // continue on the next line, slash-return continues string
923                    // literal
924                    break;
925                case 'x': {
926                    // Hex sequence.
927                    final int ch = hexSequence(2, STRING);
928
929                    if (ch < 0) {
930                        sb.append('\\');
931                        sb.append('x');
932                    } else {
933                        sb.append((char)ch);
934                    }
935                }
936                    break;
937                case 'u': {
938                    // Unicode sequence.
939                    final int ch = hexSequence(4, STRING);
940
941                    if (ch < 0) {
942                        sb.append('\\');
943                        sb.append('u');
944                    } else {
945                        sb.append((char)ch);
946                    }
947                }
948                    break;
949                case 'v':
950                    sb.append('\u000B');
951                    break;
952                // All other characters.
953                default:
954                    sb.append(next);
955                    break;
956                }
957            } else if (ch0 == '\r') {
958                // Convert CR-LF or CR to LF line terminator.
959                sb.append('\n');
960                skip(ch1 == '\n' ? 2 : 1);
961            } else {
962                // Add regular character.
963                sb.append(ch0);
964                skip(1);
965            }
966        }
967
968        // Restore position.
969        reset(savePosition);
970
971        return sb.toString();
972    }
973
974    /**
975     * Scan over a string literal.
976     * @param add true if we are not just scanning but should actually modify the token stream
977     */
978    protected void scanString(final boolean add) {
979        // Type of string.
980        TokenType type = STRING;
981        // Record starting quote.
982        final char quote = ch0;
983        // Skip over quote.
984        skip(1);
985
986        // Record beginning of string content.
987        final State stringState = saveState();
988
989        // Scan until close quote or end of line.
990        while (!atEOF() && ch0 != quote && !isEOL(ch0)) {
991            // Skip over escaped character.
992            if (ch0 == '\\') {
993                type = ESCSTRING;
994                skip(1);
995                if (! isEscapeCharacter(ch0)) {
996                    error(Lexer.message("invalid.escape.char"), STRING, position, limit);
997                }
998                if (isEOL(ch0)) {
999                    // Multiline string literal
1000                    skipEOL(false);
1001                    continue;
1002                }
1003            }
1004            // Skip literal character.
1005            skip(1);
1006        }
1007
1008        // If close quote.
1009        if (ch0 == quote) {
1010            // Skip close quote.
1011            skip(1);
1012        } else {
1013            error(Lexer.message("missing.close.quote"), STRING, position, limit);
1014        }
1015
1016        // If not just scanning.
1017        if (add) {
1018            // Record end of string.
1019            stringState.setLimit(position - 1);
1020
1021            if (scripting && !stringState.isEmpty()) {
1022                switch (quote) {
1023                case '`':
1024                    // Mark the beginning of an exec string.
1025                    add(EXECSTRING, stringState.position, stringState.limit);
1026                    // Frame edit string with left brace.
1027                    add(LBRACE, stringState.position, stringState.position);
1028                    // Process edit string.
1029                    editString(type, stringState);
1030                    // Frame edit string with right brace.
1031                    add(RBRACE, stringState.limit, stringState.limit);
1032                    break;
1033                case '"':
1034                    // Only edit double quoted strings.
1035                    editString(type, stringState);
1036                    break;
1037                case '\'':
1038                    // Add string token without editing.
1039                    add(type, stringState.position, stringState.limit);
1040                    break;
1041                default:
1042                    break;
1043                }
1044            } else {
1045                /// Add string token without editing.
1046                add(type, stringState.position, stringState.limit);
1047            }
1048        }
1049    }
1050
1051    /**
1052     * Scan over a template string literal.
1053     */
1054    private void scanTemplate() {
1055        assert ch0 == '`';
1056        TokenType type = TEMPLATE;
1057
1058        // Skip over quote and record beginning of string content.
1059        skip(1);
1060        State stringState = saveState();
1061
1062        // Scan until close quote
1063        while (!atEOF()) {
1064            // Skip over escaped character.
1065            if (ch0 == '`') {
1066                skip(1);
1067                // Record end of string.
1068                stringState.setLimit(position - 1);
1069                add(type == TEMPLATE ? type : TEMPLATE_TAIL, stringState.position, stringState.limit);
1070                return;
1071            } else if (ch0 == '$' && ch1 == '{') {
1072                skip(2);
1073                stringState.setLimit(position - 2);
1074                add(type == TEMPLATE ? TEMPLATE_HEAD : type, stringState.position, stringState.limit);
1075
1076                // scan to RBRACE
1077                Lexer expressionLexer = new Lexer(this, saveState());
1078                expressionLexer.templateExpressionOpenBraces = 1;
1079                expressionLexer.lexify();
1080                restoreState(expressionLexer.saveState());
1081
1082                // scan next middle or tail of the template literal
1083                assert ch0 == '}';
1084                type = TEMPLATE_MIDDLE;
1085
1086                // Skip over rbrace and record beginning of string content.
1087                skip(1);
1088                stringState = saveState();
1089
1090                continue;
1091            } else if (ch0 == '\\') {
1092                skip(1);
1093                // EscapeSequence
1094                if (!isEscapeCharacter(ch0)) {
1095                    error(Lexer.message("invalid.escape.char"), TEMPLATE, position, limit);
1096                }
1097                if (isEOL(ch0)) {
1098                    // LineContinuation
1099                    skipEOL(false);
1100                    continue;
1101                }
1102            }  else if (isEOL(ch0)) {
1103                // LineTerminatorSequence
1104                skipEOL(false);
1105                continue;
1106            }
1107
1108            // Skip literal character.
1109            skip(1);
1110        }
1111
1112        error(Lexer.message("missing.close.quote"), TEMPLATE, position, limit);
1113    }
1114
1115    /**
1116     * Is the given character a valid escape char after "\" ?
1117     *
1118     * @param ch character to be checked
1119     * @return if the given character is valid after "\"
1120     */
1121    protected boolean isEscapeCharacter(final char ch) {
1122        return true;
1123    }
1124
1125    /**
1126     * Convert string to number.
1127     *
1128     * @param valueString  String to convert.
1129     * @param radix        Numeric base.
1130     * @return Converted number.
1131     */
1132    private static Number valueOf(final String valueString, final int radix) throws NumberFormatException {
1133        try {
1134            return Integer.parseInt(valueString, radix);
1135        } catch (final NumberFormatException e) {
1136            if (radix == 10) {
1137                return Double.valueOf(valueString);
1138            }
1139
1140            double value = 0.0;
1141
1142            for (int i = 0; i < valueString.length(); i++) {
1143                final char ch = valueString.charAt(i);
1144                // Preverified, should always be a valid digit.
1145                final int digit = convertDigit(ch, radix);
1146                value *= radix;
1147                value += digit;
1148            }
1149
1150            return value;
1151        }
1152    }
1153
1154    /**
1155     * Scan a number.
1156     */
1157    protected void scanNumber() {
1158        // Record beginning of number.
1159        final int start = position;
1160        // Assume value is a decimal.
1161        TokenType type = DECIMAL;
1162
1163        // First digit of number.
1164        int digit = convertDigit(ch0, 10);
1165
1166        // If number begins with 0x.
1167        if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) {
1168            // Skip over 0xN.
1169            skip(3);
1170            // Skip over remaining digits.
1171            while (convertDigit(ch0, 16) != -1) {
1172                skip(1);
1173            }
1174
1175            type = HEXADECIMAL;
1176        } else if (digit == 0 && es6 && (ch1 == 'o' || ch1 == 'O') && convertDigit(ch2, 8) != -1) {
1177            // Skip over 0oN.
1178            skip(3);
1179            // Skip over remaining digits.
1180            while (convertDigit(ch0, 8) != -1) {
1181                skip(1);
1182            }
1183
1184            type = OCTAL;
1185        } else if (digit == 0 && es6 && (ch1 == 'b' || ch1 == 'B') && convertDigit(ch2, 2) != -1) {
1186            // Skip over 0bN.
1187            skip(3);
1188            // Skip over remaining digits.
1189            while (convertDigit(ch0, 2) != -1) {
1190                skip(1);
1191            }
1192
1193            type = BINARY_NUMBER;
1194        } else {
1195            // Check for possible octal constant.
1196            boolean octal = digit == 0;
1197            // Skip first digit if not leading '.'.
1198            if (digit != -1) {
1199                skip(1);
1200            }
1201
1202            // Skip remaining digits.
1203            while ((digit = convertDigit(ch0, 10)) != -1) {
1204                // Check octal only digits.
1205                octal = octal && digit < 8;
1206                // Skip digit.
1207                skip(1);
1208            }
1209
1210            if (octal && position - start > 1) {
1211                type = OCTAL_LEGACY;
1212            } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
1213                // Must be a double.
1214                if (ch0 == '.') {
1215                    // Skip period.
1216                    skip(1);
1217                    // Skip mantissa.
1218                    while (convertDigit(ch0, 10) != -1) {
1219                        skip(1);
1220                    }
1221                }
1222
1223                // Detect exponent.
1224                if (ch0 == 'E' || ch0 == 'e') {
1225                    // Skip E.
1226                    skip(1);
1227                    // Detect and skip exponent sign.
1228                    if (ch0 == '+' || ch0 == '-') {
1229                        skip(1);
1230                    }
1231                    // Skip exponent.
1232                    while (convertDigit(ch0, 10) != -1) {
1233                        skip(1);
1234                    }
1235                }
1236
1237                type = FLOATING;
1238            }
1239        }
1240
1241        if (Character.isJavaIdentifierStart(ch0)) {
1242            error(Lexer.message("missing.space.after.number"), type, position, 1);
1243        }
1244
1245        // Add number token.
1246        add(type, start);
1247    }
1248
1249    /**
1250     * Convert a regex token to a token object.
1251     *
1252     * @param start  Position in source content.
1253     * @param length Length of regex token.
1254     * @return Regex token object.
1255     */
1256    XMLToken valueOfXML(final int start, final int length) {
1257        return new XMLToken(source.getString(start, length));
1258    }
1259
1260    /**
1261     * Scan over a XML token.
1262     *
1263     * @return TRUE if is an XML literal.
1264     */
1265    private boolean scanXMLLiteral() {
1266        assert ch0 == '<' && Character.isJavaIdentifierStart(ch1);
1267        if (XML_LITERALS) {
1268            // Record beginning of xml expression.
1269            final int start = position;
1270
1271            int openCount = 0;
1272
1273            do {
1274                if (ch0 == '<') {
1275                    if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) {
1276                        skip(3);
1277                        openCount--;
1278                    } else if (Character.isJavaIdentifierStart(ch1)) {
1279                        skip(2);
1280                        openCount++;
1281                    } else if (ch1 == '?') {
1282                        skip(2);
1283                    } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') {
1284                        skip(4);
1285                    } else {
1286                        reset(start);
1287                        return false;
1288                    }
1289
1290                    while (!atEOF() && ch0 != '>') {
1291                        if (ch0 == '/' && ch1 == '>') {
1292                            openCount--;
1293                            skip(1);
1294                            break;
1295                        } else if (ch0 == '\"' || ch0 == '\'') {
1296                            scanString(false);
1297                        } else {
1298                            skip(1);
1299                        }
1300                    }
1301
1302                    if (ch0 != '>') {
1303                        reset(start);
1304                        return false;
1305                    }
1306
1307                    skip(1);
1308                } else if (atEOF()) {
1309                    reset(start);
1310                    return false;
1311                } else {
1312                    skip(1);
1313                }
1314            } while (openCount > 0);
1315
1316            add(XML, start);
1317            return true;
1318        }
1319
1320        return false;
1321    }
1322
1323    /**
1324     * Scan over identifier characters.
1325     *
1326     * @return Length of identifier or zero if none found.
1327     */
1328    private int scanIdentifier() {
1329        final int start = position;
1330
1331        // Make sure first character is valid start character.
1332        if (ch0 == '\\' && ch1 == 'u') {
1333            skip(2);
1334            final int ch = hexSequence(4, TokenType.IDENT);
1335
1336            if (!Character.isJavaIdentifierStart(ch)) {
1337                error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1338            }
1339        } else if (!Character.isJavaIdentifierStart(ch0)) {
1340            // Not an identifier.
1341            return 0;
1342        }
1343
1344        // Make sure remaining characters are valid part characters.
1345        while (!atEOF()) {
1346            if (ch0 == '\\' && ch1 == 'u') {
1347                skip(2);
1348                final int ch = hexSequence(4, TokenType.IDENT);
1349
1350                if (!Character.isJavaIdentifierPart(ch)) {
1351                    error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1352                }
1353            } else if (Character.isJavaIdentifierPart(ch0)) {
1354                skip(1);
1355            } else {
1356                break;
1357            }
1358        }
1359
1360        // Length of identifier sequence.
1361        return position - start;
1362    }
1363
1364    /**
1365     * Compare two identifiers (in content) for equality.
1366     *
1367     * @param aStart  Start of first identifier.
1368     * @param aLength Length of first identifier.
1369     * @param bStart  Start of second identifier.
1370     * @param bLength Length of second identifier.
1371     * @return True if equal.
1372     */
1373    private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) {
1374        if (aLength == bLength) {
1375            for (int i = 0; i < aLength; i++) {
1376                if (content[aStart + i] != content[bStart + i]) {
1377                    return false;
1378                }
1379            }
1380
1381            return true;
1382        }
1383
1384        return false;
1385    }
1386
1387    /**
1388     * Detect if a line starts with a marker identifier.
1389     *
1390     * @param identStart  Start of identifier.
1391     * @param identLength Length of identifier.
1392     * @return True if detected.
1393     */
1394    private boolean hasHereMarker(final int identStart, final int identLength) {
1395        // Skip any whitespace.
1396        skipWhitespace(false);
1397
1398        return identifierEqual(identStart, identLength, position, scanIdentifier());
1399    }
1400
1401    /**
1402     * Lexer to service edit strings.
1403     */
1404    private static class EditStringLexer extends Lexer {
1405        /** Type of string literals to emit. */
1406        final TokenType stringType;
1407
1408        /*
1409         * Constructor.
1410         */
1411
1412        EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) {
1413            super(lexer, stringState);
1414
1415            this.stringType = stringType;
1416        }
1417
1418        /**
1419         * Lexify the contents of the string.
1420         */
1421        @Override
1422        public void lexify() {
1423            // Record start of string position.
1424            int stringStart = position;
1425            // Indicate that the priming first string has not been emitted.
1426            boolean primed = false;
1427
1428            while (true) {
1429                // Detect end of content.
1430                if (atEOF()) {
1431                    break;
1432                }
1433
1434                // Honour escapes (should be well formed.)
1435                if (ch0 == '\\' && stringType == ESCSTRING) {
1436                    skip(2);
1437
1438                    continue;
1439                }
1440
1441                // If start of expression.
1442                if (ch0 == '$' && ch1 == '{') {
1443                    if (!primed || stringStart != position) {
1444                        if (primed) {
1445                            add(ADD, stringStart, stringStart + 1);
1446                        }
1447
1448                        add(stringType, stringStart, position);
1449                        primed = true;
1450                    }
1451
1452                    // Skip ${
1453                    skip(2);
1454
1455                    // Save expression state.
1456                    final State expressionState = saveState();
1457
1458                    // Start with one open brace.
1459                    int braceCount = 1;
1460
1461                    // Scan for the rest of the string.
1462                    while (!atEOF()) {
1463                        // If closing brace.
1464                        if (ch0 == '}') {
1465                            // Break only only if matching brace.
1466                            if (--braceCount == 0) {
1467                                break;
1468                            }
1469                        } else if (ch0 == '{') {
1470                            // Bump up the brace count.
1471                            braceCount++;
1472                        }
1473
1474                        // Skip to next character.
1475                        skip(1);
1476                    }
1477
1478                    // If braces don't match then report an error.
1479                    if (braceCount != 0) {
1480                        error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1);
1481                    }
1482
1483                    // Mark end of expression.
1484                    expressionState.setLimit(position);
1485                    // Skip closing brace.
1486                    skip(1);
1487
1488                    // Start next string.
1489                    stringStart = position;
1490
1491                    // Concatenate expression.
1492                    add(ADD, expressionState.position, expressionState.position + 1);
1493                    add(LPAREN, expressionState.position, expressionState.position + 1);
1494
1495                    // Scan expression.
1496                    final Lexer lexer = new Lexer(this, expressionState);
1497                    lexer.lexify();
1498
1499                    // Close out expression parenthesis.
1500                    add(RPAREN, position - 1, position);
1501
1502                    continue;
1503                }
1504
1505                // Next character in string.
1506                skip(1);
1507            }
1508
1509            // If there is any unemitted string portion.
1510            if (stringStart != limit) {
1511                // Concatenate remaining string.
1512                if (primed) {
1513                    add(ADD, stringStart, 1);
1514                }
1515
1516                add(stringType, stringStart, limit);
1517            }
1518        }
1519
1520    }
1521
1522    /**
1523     * Edit string for nested expressions.
1524     *
1525     * @param stringType  Type of string literals to emit.
1526     * @param stringState State of lexer at start of string.
1527     */
1528    private void editString(final TokenType stringType, final State stringState) {
1529        // Use special lexer to scan string.
1530        final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState);
1531        lexer.lexify();
1532
1533        // Need to keep lexer informed.
1534        last = stringType;
1535    }
1536
1537    /**
1538     * Scan over a here string.
1539     *
1540     * @return TRUE if is a here string.
1541     */
1542    private boolean scanHereString(final LineInfoReceiver lir) {
1543        assert ch0 == '<' && ch1 == '<';
1544        if (scripting) {
1545            // Record beginning of here string.
1546            final State saved = saveState();
1547
1548            // << or <<<
1549            final boolean excludeLastEOL = ch2 != '<';
1550
1551            if (excludeLastEOL) {
1552                skip(2);
1553            } else {
1554                skip(3);
1555            }
1556
1557            // Scan identifier. It might be quoted, indicating that no string editing should take place.
1558            final char quoteChar = ch0;
1559            final boolean noStringEditing = quoteChar == '"' || quoteChar == '\'';
1560            if (noStringEditing) {
1561                skip(1);
1562            }
1563            final int identStart = position;
1564            final int identLength = scanIdentifier();
1565            if (noStringEditing) {
1566                if (ch0 != quoteChar) {
1567                    error(Lexer.message("here.non.matching.delimiter"), last, position, position);
1568                    restoreState(saved);
1569                    return false;
1570                }
1571                skip(1);
1572            }
1573
1574            // Check for identifier.
1575            if (identLength == 0) {
1576                // Treat as shift.
1577                restoreState(saved);
1578
1579                return false;
1580            }
1581
1582            // Record rest of line.
1583            final State restState = saveState();
1584            // keep line number updated
1585            int lastLine = line;
1586
1587            skipLine(false);
1588            lastLine++;
1589            int lastLinePosition = position;
1590            restState.setLimit(position);
1591
1592            // Record beginning of string.
1593            final State stringState = saveState();
1594            int stringEnd = position;
1595
1596            // Hunt down marker.
1597            while (!atEOF()) {
1598                // Skip any whitespace.
1599                skipWhitespace(false);
1600
1601                if (hasHereMarker(identStart, identLength)) {
1602                    break;
1603                }
1604
1605                skipLine(false);
1606                lastLine++;
1607                lastLinePosition = position;
1608                stringEnd = position;
1609            }
1610
1611            // notify last line information
1612            lir.lineInfo(lastLine, lastLinePosition);
1613
1614            // Record end of string.
1615            stringState.setLimit(stringEnd);
1616
1617            // If marker is missing.
1618            if (stringState.isEmpty() || atEOF()) {
1619                error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position);
1620                restoreState(saved);
1621
1622                return false;
1623            }
1624
1625            // Remove last end of line if specified.
1626            if (excludeLastEOL) {
1627                // Handles \n.
1628                if (content[stringEnd - 1] == '\n') {
1629                    stringEnd--;
1630                }
1631
1632                // Handles \r and \r\n.
1633                if (content[stringEnd - 1] == '\r') {
1634                    stringEnd--;
1635                }
1636
1637                // Update end of string.
1638                stringState.setLimit(stringEnd);
1639            }
1640
1641            // Edit string if appropriate.
1642            if (!noStringEditing && !stringState.isEmpty()) {
1643                editString(STRING, stringState);
1644            } else {
1645                // Add here string.
1646                add(STRING, stringState.position, stringState.limit);
1647            }
1648
1649            // Scan rest of original line.
1650            final Lexer restLexer = new Lexer(this, restState);
1651
1652            restLexer.lexify();
1653
1654            return true;
1655        }
1656
1657        return false;
1658    }
1659
1660    /**
1661     * Breaks source content down into lex units, adding tokens to the token
1662     * stream. The routine scans until the stream buffer is full. Can be called
1663     * repeatedly until EOF is detected.
1664     */
1665    public void lexify() {
1666        while (!stream.isFull() || nested) {
1667            // Skip over whitespace.
1668            skipWhitespace(true);
1669
1670            // Detect end of file.
1671            if (atEOF()) {
1672                if (!nested) {
1673                    // Add an EOF token at the end.
1674                    add(EOF, position);
1675                }
1676
1677                break;
1678            }
1679
1680            // Check for comments. Note that we don't scan for regexp and other literals here as
1681            // we may not have enough context to distinguish them from similar looking operators.
1682            // Instead we break on ambiguous operators below and let the parser decide.
1683            if (ch0 == '/' && skipComments()) {
1684                continue;
1685            }
1686
1687            if (scripting && ch0 == '#' && skipComments()) {
1688                continue;
1689            }
1690
1691            // TokenType for lookup of delimiter or operator.
1692            TokenType type;
1693
1694            if (ch0 == '.' && convertDigit(ch1, 10) != -1) {
1695                // '.' followed by digit.
1696                // Scan and add a number.
1697                scanNumber();
1698            } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {
1699                if (templateExpressionOpenBraces > 0) {
1700                    if (type == LBRACE) {
1701                        templateExpressionOpenBraces++;
1702                    } else if (type == RBRACE) {
1703                        if (--templateExpressionOpenBraces == 0) {
1704                            break;
1705                        }
1706                    }
1707                }
1708
1709                // Get the number of characters in the token.
1710                final int typeLength = type.getLength();
1711                // Skip that many characters.
1712                skip(typeLength);
1713                // Add operator token.
1714                add(type, position - typeLength);
1715                // Some operator tokens also mark the beginning of regexp, XML, or here string literals.
1716                // We break to let the parser decide what it is.
1717                if (canStartLiteral(type)) {
1718                    break;
1719                } else if (type == LBRACE && pauseOnNextLeftBrace) {
1720                    pauseOnNextLeftBrace = false;
1721                    break;
1722                }
1723            } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') {
1724                // Scan and add identifier or keyword.
1725                scanIdentifierOrKeyword();
1726            } else if (isStringDelimiter(ch0)) {
1727                // Scan and add a string.
1728                scanString(true);
1729            } else if (Character.isDigit(ch0)) {
1730                // Scan and add a number.
1731                scanNumber();
1732            } else if (isTemplateDelimiter(ch0) && es6) {
1733                // Scan and add template in ES6 mode.
1734                scanTemplate();
1735            } else if (isTemplateDelimiter(ch0) && scripting) {
1736                // Scan and add an exec string ('`') in scripting mode.
1737                scanString(true);
1738            } else {
1739                // Don't recognize this character.
1740                skip(1);
1741                add(ERROR, position - 1);
1742            }
1743        }
1744    }
1745
1746    /**
1747     * Return value of token given its token descriptor.
1748     *
1749     * @param token  Token descriptor.
1750     * @return JavaScript value.
1751     */
1752    Object getValueOf(final long token, final boolean strict) {
1753        final int start = Token.descPosition(token);
1754        final int len   = Token.descLength(token);
1755
1756        switch (Token.descType(token)) {
1757        case DECIMAL:
1758            return Lexer.valueOf(source.getString(start, len), 10); // number
1759        case HEXADECIMAL:
1760            return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number
1761        case OCTAL_LEGACY:
1762            return Lexer.valueOf(source.getString(start, len), 8); // number
1763        case OCTAL:
1764            return Lexer.valueOf(source.getString(start + 2, len - 2), 8); // number
1765        case BINARY_NUMBER:
1766            return Lexer.valueOf(source.getString(start + 2, len - 2), 2); // number
1767        case FLOATING:
1768            final String str   = source.getString(start, len);
1769            final double value = Double.valueOf(str);
1770            if (str.indexOf('.') != -1) {
1771                return value; //number
1772            }
1773            //anything without an explicit decimal point is still subject to a
1774            //"representable as int or long" check. Then the programmer does not
1775            //explicitly code something as a double. For example new Color(int, int, int)
1776            //and new Color(float, float, float) will get ambiguous for cases like
1777            //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point.
1778            //yet we don't want e.g. 1e6 to be a double unnecessarily
1779            if (JSType.isStrictlyRepresentableAsInt(value)) {
1780                return (int)value;
1781            }
1782            return value;
1783        case STRING:
1784            return source.getString(start, len); // String
1785        case ESCSTRING:
1786            return valueOfString(start, len, strict); // String
1787        case IDENT:
1788            return valueOfIdent(start, len); // String
1789        case REGEX:
1790            return valueOfPattern(start, len); // RegexToken::LexerToken
1791        case TEMPLATE:
1792        case TEMPLATE_HEAD:
1793        case TEMPLATE_MIDDLE:
1794        case TEMPLATE_TAIL:
1795            return valueOfString(start, len, true); // String
1796        case XML:
1797            return valueOfXML(start, len); // XMLToken::LexerToken
1798        case DIRECTIVE_COMMENT:
1799            return source.getString(start, len);
1800        default:
1801            break;
1802        }
1803
1804        return null;
1805    }
1806
1807    /**
1808     * Get the raw string value of a template literal string part.
1809     *
1810     * @param token template string token
1811     * @return raw string
1812     */
1813    public String valueOfRawString(final long token) {
1814        final int start  = Token.descPosition(token);
1815        final int length = Token.descLength(token);
1816
1817        // Save the current position.
1818        final int savePosition = position;
1819        // Calculate the end position.
1820        final int end = start + length;
1821        // Reset to beginning of string.
1822        reset(start);
1823
1824        // Buffer for recording characters.
1825        final StringBuilder sb = new StringBuilder(length);
1826
1827        // Scan until end of string.
1828        while (position < end) {
1829            if (ch0 == '\r') {
1830                // Convert CR-LF or CR to LF line terminator.
1831                sb.append('\n');
1832                skip(ch1 == '\n' ? 2 : 1);
1833            } else {
1834                // Add regular character.
1835                sb.append(ch0);
1836                skip(1);
1837            }
1838        }
1839
1840        // Restore position.
1841        reset(savePosition);
1842
1843        return sb.toString();
1844    }
1845
1846    /**
1847     * Get the correctly localized error message for a given message id format arguments
1848     * @param msgId message id
1849     * @param args  format arguments
1850     * @return message
1851     */
1852    protected static String message(final String msgId, final String... args) {
1853        return ECMAErrors.getMessage("lexer.error." + msgId, args);
1854    }
1855
1856    /**
1857     * Generate a runtime exception
1858     *
1859     * @param message       error message
1860     * @param type          token type
1861     * @param start         start position of lexed error
1862     * @param length        length of lexed error
1863     * @throws ParserException  unconditionally
1864     */
1865    protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException {
1866        final long token     = Token.toDesc(type, start, length);
1867        final int  pos       = Token.descPosition(token);
1868        final int  lineNum   = source.getLine(pos);
1869        final int  columnNum = source.getColumn(pos);
1870        final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token);
1871        throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token);
1872    }
1873
1874    /**
1875     * Helper class for Lexer tokens, e.g XML or RegExp tokens.
1876     * This is the abstract superclass
1877     */
1878    public static abstract class LexerToken implements Serializable {
1879        private static final long serialVersionUID = 1L;
1880
1881        private final String expression;
1882
1883        /**
1884         * Constructor
1885         * @param expression token expression
1886         */
1887        protected LexerToken(final String expression) {
1888            this.expression = expression;
1889        }
1890
1891        /**
1892         * Get the expression
1893         * @return expression
1894         */
1895        public String getExpression() {
1896            return expression;
1897        }
1898    }
1899
1900    /**
1901     * Temporary container for regular expressions.
1902     */
1903    public static class RegexToken extends LexerToken {
1904        private static final long serialVersionUID = 1L;
1905
1906        /** Options. */
1907        private final String options;
1908
1909        /**
1910         * Constructor.
1911         *
1912         * @param expression  regexp expression
1913         * @param options     regexp options
1914         */
1915        public RegexToken(final String expression, final String options) {
1916            super(expression);
1917            this.options = options;
1918        }
1919
1920        /**
1921         * Get regexp options
1922         * @return options
1923         */
1924        public String getOptions() {
1925            return options;
1926        }
1927
1928        @Override
1929        public String toString() {
1930            return '/' + getExpression() + '/' + options;
1931        }
1932    }
1933
1934    /**
1935     * Temporary container for XML expression.
1936     */
1937    public static class XMLToken extends LexerToken {
1938        private static final long serialVersionUID = 1L;
1939
1940        /**
1941         * Constructor.
1942         *
1943         * @param expression  XML expression
1944         */
1945        public XMLToken(final String expression) {
1946            super(expression);
1947        }
1948    }
1949}
1950