Lexer.java revision 1732:a32d419d73fe
1/*
2 * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package jdk.nashorn.internal.parser;
27
28import static jdk.nashorn.internal.parser.TokenType.ADD;
29import static jdk.nashorn.internal.parser.TokenType.BINARY_NUMBER;
30import static jdk.nashorn.internal.parser.TokenType.COMMENT;
31import static jdk.nashorn.internal.parser.TokenType.DECIMAL;
32import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
33import static jdk.nashorn.internal.parser.TokenType.EOF;
34import static jdk.nashorn.internal.parser.TokenType.EOL;
35import static jdk.nashorn.internal.parser.TokenType.ERROR;
36import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
37import static jdk.nashorn.internal.parser.TokenType.EXECSTRING;
38import static jdk.nashorn.internal.parser.TokenType.FLOATING;
39import static jdk.nashorn.internal.parser.TokenType.FUNCTION;
40import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL;
41import static jdk.nashorn.internal.parser.TokenType.LBRACE;
42import static jdk.nashorn.internal.parser.TokenType.LPAREN;
43import static jdk.nashorn.internal.parser.TokenType.OCTAL;
44import static jdk.nashorn.internal.parser.TokenType.OCTAL_LEGACY;
45import static jdk.nashorn.internal.parser.TokenType.RBRACE;
46import static jdk.nashorn.internal.parser.TokenType.REGEX;
47import static jdk.nashorn.internal.parser.TokenType.RPAREN;
48import static jdk.nashorn.internal.parser.TokenType.STRING;
49import static jdk.nashorn.internal.parser.TokenType.TEMPLATE;
50import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_HEAD;
51import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_MIDDLE;
52import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_TAIL;
53import static jdk.nashorn.internal.parser.TokenType.XML;
54
55import java.io.Serializable;
56
57import jdk.nashorn.internal.runtime.ECMAErrors;
58import jdk.nashorn.internal.runtime.ErrorManager;
59import jdk.nashorn.internal.runtime.JSErrorType;
60import jdk.nashorn.internal.runtime.JSType;
61import jdk.nashorn.internal.runtime.ParserException;
62import jdk.nashorn.internal.runtime.Source;
63import jdk.nashorn.internal.runtime.options.Options;
64
65/**
66 * Responsible for converting source content into a stream of tokens.
67 *
68 */
69@SuppressWarnings("fallthrough")
70public class Lexer extends Scanner {
71    private static final long MIN_INT_L = Integer.MIN_VALUE;
72    private static final long MAX_INT_L = Integer.MAX_VALUE;
73
74    private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals");
75
76    /** Content source. */
77    private final Source source;
78
79    /** Buffered stream for tokens. */
80    private final TokenStream stream;
81
82    /** True if here and edit strings are supported. */
83    private final boolean scripting;
84
85    /** True if parsing in ECMAScript 6 mode. */
86    private final boolean es6;
87
88    /** True if a nested scan. (scan to completion, no EOF.) */
89    private final boolean nested;
90
91    /** Pending new line number and position. */
92    int pendingLine;
93
94    /** Position of last EOL + 1. */
95    private int linePosition;
96
97    /** Type of last token added. */
98    private TokenType last;
99
100    private final boolean pauseOnFunctionBody;
101    private boolean pauseOnNextLeftBrace;
102
103    private int templateExpressionOpenBraces;
104
105    private static final String SPACETAB = " \t";  // ASCII space and tab
106    private static final String LFCR     = "\n\r"; // line feed and carriage return (ctrl-m)
107
108    private static final String JAVASCRIPT_WHITESPACE_EOL =
109        LFCR +
110        "\u2028" + // line separator
111        "\u2029"   // paragraph separator
112        ;
113    private static final String JAVASCRIPT_WHITESPACE =
114        SPACETAB +
115        JAVASCRIPT_WHITESPACE_EOL +
116        "\u000b" + // tabulation line
117        "\u000c" + // ff (ctrl-l)
118        "\u00a0" + // Latin-1 space
119        "\u1680" + // Ogham space mark
120        "\u180e" + // separator, Mongolian vowel
121        "\u2000" + // en quad
122        "\u2001" + // em quad
123        "\u2002" + // en space
124        "\u2003" + // em space
125        "\u2004" + // three-per-em space
126        "\u2005" + // four-per-em space
127        "\u2006" + // six-per-em space
128        "\u2007" + // figure space
129        "\u2008" + // punctuation space
130        "\u2009" + // thin space
131        "\u200a" + // hair space
132        "\u202f" + // narrow no-break space
133        "\u205f" + // medium mathematical space
134        "\u3000" + // ideographic space
135        "\ufeff"   // byte order mark
136        ;
137
138    private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP =
139        "\\u000a" + // line feed
140        "\\u000d" + // carriage return (ctrl-m)
141        "\\u2028" + // line separator
142        "\\u2029" + // paragraph separator
143        "\\u0009" + // tab
144        "\\u0020" + // ASCII space
145        "\\u000b" + // tabulation line
146        "\\u000c" + // ff (ctrl-l)
147        "\\u00a0" + // Latin-1 space
148        "\\u1680" + // Ogham space mark
149        "\\u180e" + // separator, Mongolian vowel
150        "\\u2000" + // en quad
151        "\\u2001" + // em quad
152        "\\u2002" + // en space
153        "\\u2003" + // em space
154        "\\u2004" + // three-per-em space
155        "\\u2005" + // four-per-em space
156        "\\u2006" + // six-per-em space
157        "\\u2007" + // figure space
158        "\\u2008" + // punctuation space
159        "\\u2009" + // thin space
160        "\\u200a" + // hair space
161        "\\u202f" + // narrow no-break space
162        "\\u205f" + // medium mathematical space
163        "\\u3000" + // ideographic space
164        "\\ufeff"   // byte order mark
165        ;
166
167    static String unicodeEscape(final char ch) {
168        final StringBuilder sb = new StringBuilder();
169
170        sb.append("\\u");
171
172        final String hex = Integer.toHexString(ch);
173        for (int i = hex.length(); i < 4; i++) {
174            sb.append('0');
175        }
176        sb.append(hex);
177
178        return sb.toString();
179    }
180
181    /**
182     * Constructor
183     *
184     * @param source    the source
185     * @param stream    the token stream to lex
186     */
187    public Lexer(final Source source, final TokenStream stream) {
188        this(source, stream, false, false);
189    }
190
191    /**
192     * Constructor
193     *
194     * @param source    the source
195     * @param stream    the token stream to lex
196     * @param scripting are we in scripting mode
197     * @param es6       are we in ECMAScript 6 mode
198     */
199    public Lexer(final Source source, final TokenStream stream, final boolean scripting, final boolean es6) {
200        this(source, 0, source.getLength(), stream, scripting, es6, false);
201    }
202
203    /**
204     * Constructor
205     *
206     * @param source    the source
207     * @param start     start position in source from which to start lexing
208     * @param len       length of source segment to lex
209     * @param stream    token stream to lex
210     * @param scripting are we in scripting mode
211     * @param es6       are we in ECMAScript 6 mode
212     * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a
213     * function body. This is used with the feature where the parser is skipping nested function bodies to
214     * avoid reading ahead unnecessarily when we skip the function bodies.
215     */
216    public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean es6, final boolean pauseOnFunctionBody) {
217        super(source.getContent(), 1, start, len);
218        this.source      = source;
219        this.stream      = stream;
220        this.scripting   = scripting;
221        this.es6         = es6;
222        this.nested      = false;
223        this.pendingLine = 1;
224        this.last        = EOL;
225
226        this.pauseOnFunctionBody = pauseOnFunctionBody;
227    }
228
229    private Lexer(final Lexer lexer, final State state) {
230        super(lexer, state);
231
232        source = lexer.source;
233        stream = lexer.stream;
234        scripting = lexer.scripting;
235        es6 = lexer.es6;
236        nested = true;
237
238        pendingLine = state.pendingLine;
239        linePosition = state.linePosition;
240        last = EOL;
241        pauseOnFunctionBody = false;
242    }
243
244    static class State extends Scanner.State {
245        /** Pending new line number and position. */
246        public final int pendingLine;
247
248        /** Position of last EOL + 1. */
249        public final int linePosition;
250
251        /** Type of last token added. */
252        public final TokenType last;
253
254        /*
255         * Constructor.
256         */
257
258        State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) {
259            super(position, limit, line);
260
261            this.pendingLine = pendingLine;
262            this.linePosition = linePosition;
263            this.last = last;
264        }
265    }
266
267    /**
268     * Save the state of the scan.
269     *
270     * @return Captured state.
271     */
272    @Override
273    State saveState() {
274        return new State(position, limit, line, pendingLine, linePosition, last);
275    }
276
277    /**
278     * Restore the state of the scan.
279     *
280     * @param state
281     *            Captured state.
282     */
283    void restoreState(final State state) {
284        super.restoreState(state);
285
286        pendingLine = state.pendingLine;
287        linePosition = state.linePosition;
288        last = state.last;
289    }
290
291    /**
292     * Add a new token to the stream.
293     *
294     * @param type
295     *            Token type.
296     * @param start
297     *            Start position.
298     * @param end
299     *            End position.
300     */
301    protected void add(final TokenType type, final int start, final int end) {
302        // Record last token.
303        last = type;
304
305        // Only emit the last EOL in a cluster.
306        if (type == EOL) {
307            pendingLine = end;
308            linePosition = start;
309        } else {
310            // Write any pending EOL to stream.
311            if (pendingLine != -1) {
312                stream.put(Token.toDesc(EOL, linePosition, pendingLine));
313                pendingLine = -1;
314            }
315
316            // Write token to stream.
317            stream.put(Token.toDesc(type, start, end - start));
318        }
319    }
320
321    /**
322     * Add a new token to the stream.
323     *
324     * @param type
325     *            Token type.
326     * @param start
327     *            Start position.
328     */
329    protected void add(final TokenType type, final int start) {
330        add(type, start, position);
331    }
332
333    /**
334     * Return the String of valid whitespace characters for regular
335     * expressions in JavaScript
336     * @return regexp whitespace string
337     */
338    public static String getWhitespaceRegExp() {
339        return JAVASCRIPT_WHITESPACE_IN_REGEXP;
340    }
341
342    /**
343     * Skip end of line.
344     *
345     * @param addEOL true if EOL token should be recorded.
346     */
347    private void skipEOL(final boolean addEOL) {
348
349        if (ch0 == '\r') { // detect \r\n pattern
350            skip(1);
351            if (ch0 == '\n') {
352                skip(1);
353            }
354        } else { // all other space, ch0 is guaranteed to be EOL or \0
355            skip(1);
356        }
357
358        // bump up line count
359        line++;
360
361        if (addEOL) {
362            // Add an EOL token.
363            add(EOL, position, line);
364        }
365    }
366
367    /**
368     * Skip over rest of line including end of line.
369     *
370     * @param addEOL true if EOL token should be recorded.
371     */
372    private void skipLine(final boolean addEOL) {
373        // Ignore characters.
374        while (!isEOL(ch0) && !atEOF()) {
375            skip(1);
376        }
377        // Skip over end of line.
378        skipEOL(addEOL);
379    }
380
381    /**
382     * Test whether a char is valid JavaScript whitespace
383     * @param ch a char
384     * @return true if valid JavaScript whitespace
385     */
386    public static boolean isJSWhitespace(final char ch) {
387        return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1;
388    }
389
390    /**
391     * Test whether a char is valid JavaScript end of line
392     * @param ch a char
393     * @return true if valid JavaScript end of line
394     */
395    public static boolean isJSEOL(final char ch) {
396        return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1;
397    }
398
399    /**
400     * Test if char is a string delimiter, e.g. '\' or '"'.
401     * @param ch a char
402     * @return true if string delimiter
403     */
404    protected boolean isStringDelimiter(final char ch) {
405        return ch == '\'' || ch == '"';
406    }
407
408    /**
409     * Test if char is a template literal delimiter ('`').
410     */
411    private static boolean isTemplateDelimiter(char ch) {
412        return ch == '`';
413    }
414
415    /**
416     * Test whether a char is valid JavaScript whitespace
417     * @param ch a char
418     * @return true if valid JavaScript whitespace
419     */
420    protected boolean isWhitespace(final char ch) {
421        return Lexer.isJSWhitespace(ch);
422    }
423
424    /**
425     * Test whether a char is valid JavaScript end of line
426     * @param ch a char
427     * @return true if valid JavaScript end of line
428     */
429    protected boolean isEOL(final char ch) {
430        return Lexer.isJSEOL(ch);
431    }
432
433    /**
434     * Skip over whitespace and detect end of line, adding EOL tokens if
435     * encountered.
436     *
437     * @param addEOL true if EOL tokens should be recorded.
438     */
439    private void skipWhitespace(final boolean addEOL) {
440        while (isWhitespace(ch0)) {
441            if (isEOL(ch0)) {
442                skipEOL(addEOL);
443            } else {
444                skip(1);
445            }
446        }
447    }
448
449    /**
450     * Skip over comments.
451     *
452     * @return True if a comment.
453     */
454    protected boolean skipComments() {
455        // Save the current position.
456        final int start = position;
457
458        if (ch0 == '/') {
459            // Is it a // comment.
460            if (ch1 == '/') {
461                // Skip over //.
462                skip(2);
463
464                boolean directiveComment = false;
465                if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) {
466                    directiveComment = true;
467                }
468
469                // Scan for EOL.
470                while (!atEOF() && !isEOL(ch0)) {
471                    skip(1);
472                }
473                // Did detect a comment.
474                add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start);
475                return true;
476            } else if (ch1 == '*') {
477                // Skip over /*.
478                skip(2);
479                // Scan for */.
480                while (!atEOF() && !(ch0 == '*' && ch1 == '/')) {
481                    // If end of line handle else skip character.
482                    if (isEOL(ch0)) {
483                        skipEOL(true);
484                    } else {
485                        skip(1);
486                    }
487                }
488
489                if (atEOF()) {
490                    // TODO - Report closing */ missing in parser.
491                    add(ERROR, start);
492                } else {
493                    // Skip */.
494                    skip(2);
495                }
496
497                // Did detect a comment.
498                add(COMMENT, start);
499                return true;
500            }
501        } else if (ch0 == '#') {
502            assert scripting;
503            // shell style comment
504            // Skip over #.
505            skip(1);
506            // Scan for EOL.
507            while (!atEOF() && !isEOL(ch0)) {
508                skip(1);
509            }
510            // Did detect a comment.
511            add(COMMENT, start);
512            return true;
513        }
514
515        // Not a comment.
516        return false;
517    }
518
519    /**
520     * Convert a regex token to a token object.
521     *
522     * @param start  Position in source content.
523     * @param length Length of regex token.
524     * @return Regex token object.
525     */
526    public RegexToken valueOfPattern(final int start, final int length) {
527        // Save the current position.
528        final int savePosition = position;
529        // Reset to beginning of content.
530        reset(start);
531        // Buffer for recording characters.
532        final StringBuilder sb = new StringBuilder(length);
533
534        // Skip /.
535        skip(1);
536        boolean inBrackets = false;
537        // Scan for closing /, stopping at end of line.
538        while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) {
539            // Skip over escaped character.
540            if (ch0 == '\\') {
541                sb.append(ch0);
542                sb.append(ch1);
543                skip(2);
544            } else {
545                if (ch0 == '[') {
546                    inBrackets = true;
547                } else if (ch0 == ']') {
548                    inBrackets = false;
549                }
550
551                // Skip literal character.
552                sb.append(ch0);
553                skip(1);
554            }
555        }
556
557        // Get pattern as string.
558        final String regex = sb.toString();
559
560        // Skip /.
561        skip(1);
562
563        // Options as string.
564        final String options = source.getString(position, scanIdentifier());
565
566        reset(savePosition);
567
568        // Compile the pattern.
569        return new RegexToken(regex, options);
570    }
571
572    /**
573     * Return true if the given token can be the beginning of a literal.
574     *
575     * @param token a token
576     * @return true if token can start a literal.
577     */
578    public boolean canStartLiteral(final TokenType token) {
579        return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<'));
580    }
581
582    /**
583     * interface to receive line information for multi-line literals.
584     */
585    protected interface LineInfoReceiver {
586        /**
587         * Receives line information
588         * @param line last line number
589         * @param linePosition position of last line
590         */
591        public void lineInfo(int line, int linePosition);
592    }
593
594    /**
595     * Check whether the given token represents the beginning of a literal. If so scan
596     * the literal and return <tt>true</tt>, otherwise return false.
597     *
598     * @param token the token.
599     * @param startTokenType the token type.
600     * @param lir LineInfoReceiver that receives line info for multi-line string literals.
601     * @return True if a literal beginning with startToken was found and scanned.
602     */
603    protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) {
604        // Check if it can be a literal.
605        if (!canStartLiteral(startTokenType)) {
606            return false;
607        }
608        // We break on ambiguous tokens so if we already moved on it can't be a literal.
609        if (stream.get(stream.last()) != token) {
610            return false;
611        }
612
613        // Record current position in case multiple heredocs start on this line - see JDK-8073653
614        final State state = saveState();
615        // Rewind to token start position
616        reset(Token.descPosition(token));
617
618        if (ch0 == '/') {
619            return scanRegEx();
620        } else if (ch0 == '<') {
621            if (ch1 == '<') {
622                return scanHereString(lir, state);
623            } else if (Character.isJavaIdentifierStart(ch1)) {
624                return scanXMLLiteral();
625            }
626        }
627
628        return false;
629    }
630
631    /**
632     * Scan over regex literal.
633     *
634     * @return True if a regex literal.
635     */
636    private boolean scanRegEx() {
637        assert ch0 == '/';
638        // Make sure it's not a comment.
639        if (ch1 != '/' && ch1 != '*') {
640            // Record beginning of literal.
641            final int start = position;
642            // Skip /.
643            skip(1);
644            boolean inBrackets = false;
645
646            // Scan for closing /, stopping at end of line.
647            while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) {
648                // Skip over escaped character.
649                if (ch0 == '\\') {
650                    skip(1);
651                    if (isEOL(ch0)) {
652                        reset(start);
653                        return false;
654                    }
655                    skip(1);
656                } else {
657                    if (ch0 == '[') {
658                        inBrackets = true;
659                    } else if (ch0 == ']') {
660                        inBrackets = false;
661                    }
662
663                    // Skip literal character.
664                    skip(1);
665                }
666            }
667
668            // If regex literal.
669            if (ch0 == '/') {
670                // Skip /.
671                skip(1);
672
673                // Skip over options.
674                while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') {
675                    skip(1);
676                }
677
678                // Add regex token.
679                add(REGEX, start);
680                // Regex literal detected.
681                return true;
682            }
683
684            // False start try again.
685            reset(start);
686        }
687
688        // Regex literal not detected.
689        return false;
690    }
691
692    /**
693     * Convert a digit to a integer.  Can't use Character.digit since we are
694     * restricted to ASCII by the spec.
695     *
696     * @param ch   Character to convert.
697     * @param base Numeric base.
698     *
699     * @return The converted digit or -1 if invalid.
700     */
701    protected static int convertDigit(final char ch, final int base) {
702        int digit;
703
704        if ('0' <= ch && ch <= '9') {
705            digit = ch - '0';
706        } else if ('A' <= ch && ch <= 'Z') {
707            digit = ch - 'A' + 10;
708        } else if ('a' <= ch && ch <= 'z') {
709            digit = ch - 'a' + 10;
710        } else {
711            return -1;
712        }
713
714        return digit < base ? digit : -1;
715    }
716
717
718    /**
719     * Get the value of a hexadecimal numeric sequence.
720     *
721     * @param length Number of digits.
722     * @param type   Type of token to report against.
723     * @return Value of sequence or < 0 if no digits.
724     */
725    private int hexSequence(final int length, final TokenType type) {
726        int value = 0;
727
728        for (int i = 0; i < length; i++) {
729            final int digit = convertDigit(ch0, 16);
730
731            if (digit == -1) {
732                error(Lexer.message("invalid.hex"), type, position, limit);
733                return i == 0 ? -1 : value;
734            }
735
736            value = digit | value << 4;
737            skip(1);
738        }
739
740        return value;
741    }
742
743    /**
744     * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255.
745     *
746     * @return Value of sequence.
747     */
748    private int octalSequence() {
749        int value = 0;
750
751        for (int i = 0; i < 3; i++) {
752            final int digit = convertDigit(ch0, 8);
753
754            if (digit == -1) {
755                break;
756            }
757            value = digit | value << 3;
758            skip(1);
759
760            if (i == 1 && value >= 32) {
761                break;
762            }
763        }
764        return value;
765    }
766
767    /**
768     * Convert a string to a JavaScript identifier.
769     *
770     * @param start  Position in source content.
771     * @param length Length of token.
772     * @return Ident string or null if an error.
773     */
774    private String valueOfIdent(final int start, final int length) throws RuntimeException {
775        // Save the current position.
776        final int savePosition = position;
777        // End of scan.
778        final int end = start + length;
779        // Reset to beginning of content.
780        reset(start);
781        // Buffer for recording characters.
782        final StringBuilder sb = new StringBuilder(length);
783
784        // Scan until end of line or end of file.
785        while (!atEOF() && position < end && !isEOL(ch0)) {
786            // If escape character.
787            if (ch0 == '\\' && ch1 == 'u') {
788                skip(2);
789                final int ch = hexSequence(4, TokenType.IDENT);
790                if (isWhitespace((char)ch)) {
791                    return null;
792                }
793                if (ch < 0) {
794                    sb.append('\\');
795                    sb.append('u');
796                } else {
797                    sb.append((char)ch);
798                }
799            } else {
800                // Add regular character.
801                sb.append(ch0);
802                skip(1);
803            }
804        }
805
806        // Restore position.
807        reset(savePosition);
808
809        return sb.toString();
810    }
811
812    /**
813     * Scan over and identifier or keyword. Handles identifiers containing
814     * encoded Unicode chars.
815     *
816     * Example:
817     *
818     * var \u0042 = 44;
819     */
820    private void scanIdentifierOrKeyword() {
821        // Record beginning of identifier.
822        final int start = position;
823        // Scan identifier.
824        final int length = scanIdentifier();
825        // Check to see if it is a keyword.
826        final TokenType type = TokenLookup.lookupKeyword(content, start, length);
827        if (type == FUNCTION && pauseOnFunctionBody) {
828            pauseOnNextLeftBrace = true;
829        }
830        // Add keyword or identifier token.
831        add(type, start);
832    }
833
834    /**
835     * Convert a string to a JavaScript string object.
836     *
837     * @param start  Position in source content.
838     * @param length Length of token.
839     * @return JavaScript string object.
840     */
841    private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException {
842        // Save the current position.
843        final int savePosition = position;
844        // Calculate the end position.
845        final int end = start + length;
846        // Reset to beginning of string.
847        reset(start);
848
849        // Buffer for recording characters.
850        final StringBuilder sb = new StringBuilder(length);
851
852        // Scan until end of string.
853        while (position < end) {
854            // If escape character.
855            if (ch0 == '\\') {
856                skip(1);
857
858                final char next = ch0;
859                final int afterSlash = position;
860
861                skip(1);
862
863                // Special characters.
864                switch (next) {
865                case '0':
866                case '1':
867                case '2':
868                case '3':
869                case '4':
870                case '5':
871                case '6':
872                case '7': {
873                    if (strict) {
874                        // "\0" itself is allowed in strict mode. Only other 'real'
875                        // octal escape sequences are not allowed (eg. "\02", "\31").
876                        // See section 7.8.4 String literals production EscapeSequence
877                        if (next != '0' || (ch0 >= '0' && ch0 <= '9')) {
878                            error(Lexer.message("strict.no.octal"), STRING, position, limit);
879                        }
880                    }
881                    reset(afterSlash);
882                    // Octal sequence.
883                    final int ch = octalSequence();
884
885                    if (ch < 0) {
886                        sb.append('\\');
887                        sb.append('x');
888                    } else {
889                        sb.append((char)ch);
890                    }
891                    break;
892                }
893                case 'n':
894                    sb.append('\n');
895                    break;
896                case 't':
897                    sb.append('\t');
898                    break;
899                case 'b':
900                    sb.append('\b');
901                    break;
902                case 'f':
903                    sb.append('\f');
904                    break;
905                case 'r':
906                    sb.append('\r');
907                    break;
908                case '\'':
909                    sb.append('\'');
910                    break;
911                case '\"':
912                    sb.append('\"');
913                    break;
914                case '\\':
915                    sb.append('\\');
916                    break;
917                case '\r': // CR | CRLF
918                    if (ch0 == '\n') {
919                        skip(1);
920                    }
921                    // fall through
922                case '\n': // LF
923                case '\u2028': // LS
924                case '\u2029': // PS
925                    // continue on the next line, slash-return continues string
926                    // literal
927                    break;
928                case 'x': {
929                    // Hex sequence.
930                    final int ch = hexSequence(2, STRING);
931
932                    if (ch < 0) {
933                        sb.append('\\');
934                        sb.append('x');
935                    } else {
936                        sb.append((char)ch);
937                    }
938                }
939                    break;
940                case 'u': {
941                    // Unicode sequence.
942                    final int ch = hexSequence(4, STRING);
943
944                    if (ch < 0) {
945                        sb.append('\\');
946                        sb.append('u');
947                    } else {
948                        sb.append((char)ch);
949                    }
950                }
951                    break;
952                case 'v':
953                    sb.append('\u000B');
954                    break;
955                // All other characters.
956                default:
957                    sb.append(next);
958                    break;
959                }
960            } else if (ch0 == '\r') {
961                // Convert CR-LF or CR to LF line terminator.
962                sb.append('\n');
963                skip(ch1 == '\n' ? 2 : 1);
964            } else {
965                // Add regular character.
966                sb.append(ch0);
967                skip(1);
968            }
969        }
970
971        // Restore position.
972        reset(savePosition);
973
974        return sb.toString();
975    }
976
977    /**
978     * Scan over a string literal.
979     * @param add true if we are not just scanning but should actually modify the token stream
980     */
981    protected void scanString(final boolean add) {
982        // Type of string.
983        TokenType type = STRING;
984        // Record starting quote.
985        final char quote = ch0;
986        // Skip over quote.
987        skip(1);
988
989        // Record beginning of string content.
990        final State stringState = saveState();
991
992        // Scan until close quote or end of line.
993        while (!atEOF() && ch0 != quote && !isEOL(ch0)) {
994            // Skip over escaped character.
995            if (ch0 == '\\') {
996                type = ESCSTRING;
997                skip(1);
998                if (! isEscapeCharacter(ch0)) {
999                    error(Lexer.message("invalid.escape.char"), STRING, position, limit);
1000                }
1001                if (isEOL(ch0)) {
1002                    // Multiline string literal
1003                    skipEOL(false);
1004                    continue;
1005                }
1006            }
1007            // Skip literal character.
1008            skip(1);
1009        }
1010
1011        // If close quote.
1012        if (ch0 == quote) {
1013            // Skip close quote.
1014            skip(1);
1015        } else {
1016            error(Lexer.message("missing.close.quote"), STRING, position, limit);
1017        }
1018
1019        // If not just scanning.
1020        if (add) {
1021            // Record end of string.
1022            stringState.setLimit(position - 1);
1023
1024            if (scripting && !stringState.isEmpty()) {
1025                switch (quote) {
1026                case '`':
1027                    // Mark the beginning of an exec string.
1028                    add(EXECSTRING, stringState.position, stringState.limit);
1029                    // Frame edit string with left brace.
1030                    add(LBRACE, stringState.position, stringState.position);
1031                    // Process edit string.
1032                    editString(type, stringState);
1033                    // Frame edit string with right brace.
1034                    add(RBRACE, stringState.limit, stringState.limit);
1035                    break;
1036                case '"':
1037                    // Only edit double quoted strings.
1038                    editString(type, stringState);
1039                    break;
1040                case '\'':
1041                    // Add string token without editing.
1042                    add(type, stringState.position, stringState.limit);
1043                    break;
1044                default:
1045                    break;
1046                }
1047            } else {
1048                /// Add string token without editing.
1049                add(type, stringState.position, stringState.limit);
1050            }
1051        }
1052    }
1053
1054    /**
1055     * Scan over a template string literal.
1056     */
1057    private void scanTemplate() {
1058        assert ch0 == '`';
1059        TokenType type = TEMPLATE;
1060
1061        // Skip over quote and record beginning of string content.
1062        skip(1);
1063        State stringState = saveState();
1064
1065        // Scan until close quote
1066        while (!atEOF()) {
1067            // Skip over escaped character.
1068            if (ch0 == '`') {
1069                skip(1);
1070                // Record end of string.
1071                stringState.setLimit(position - 1);
1072                add(type == TEMPLATE ? type : TEMPLATE_TAIL, stringState.position, stringState.limit);
1073                return;
1074            } else if (ch0 == '$' && ch1 == '{') {
1075                skip(2);
1076                stringState.setLimit(position - 2);
1077                add(type == TEMPLATE ? TEMPLATE_HEAD : type, stringState.position, stringState.limit);
1078
1079                // scan to RBRACE
1080                Lexer expressionLexer = new Lexer(this, saveState());
1081                expressionLexer.templateExpressionOpenBraces = 1;
1082                expressionLexer.lexify();
1083                restoreState(expressionLexer.saveState());
1084
1085                // scan next middle or tail of the template literal
1086                assert ch0 == '}';
1087                type = TEMPLATE_MIDDLE;
1088
1089                // Skip over rbrace and record beginning of string content.
1090                skip(1);
1091                stringState = saveState();
1092
1093                continue;
1094            } else if (ch0 == '\\') {
1095                skip(1);
1096                // EscapeSequence
1097                if (!isEscapeCharacter(ch0)) {
1098                    error(Lexer.message("invalid.escape.char"), TEMPLATE, position, limit);
1099                }
1100                if (isEOL(ch0)) {
1101                    // LineContinuation
1102                    skipEOL(false);
1103                    continue;
1104                }
1105            }  else if (isEOL(ch0)) {
1106                // LineTerminatorSequence
1107                skipEOL(false);
1108                continue;
1109            }
1110
1111            // Skip literal character.
1112            skip(1);
1113        }
1114
1115        error(Lexer.message("missing.close.quote"), TEMPLATE, position, limit);
1116    }
1117
1118    /**
1119     * Is the given character a valid escape char after "\" ?
1120     *
1121     * @param ch character to be checked
1122     * @return if the given character is valid after "\"
1123     */
1124    protected boolean isEscapeCharacter(final char ch) {
1125        return true;
1126    }
1127
1128    /**
1129     * Convert string to number.
1130     *
1131     * @param valueString  String to convert.
1132     * @param radix        Numeric base.
1133     * @return Converted number.
1134     */
1135    private static Number valueOf(final String valueString, final int radix) throws NumberFormatException {
1136        try {
1137            return Integer.parseInt(valueString, radix);
1138        } catch (final NumberFormatException e) {
1139            if (radix == 10) {
1140                return Double.valueOf(valueString);
1141            }
1142
1143            double value = 0.0;
1144
1145            for (int i = 0; i < valueString.length(); i++) {
1146                final char ch = valueString.charAt(i);
1147                // Preverified, should always be a valid digit.
1148                final int digit = convertDigit(ch, radix);
1149                value *= radix;
1150                value += digit;
1151            }
1152
1153            return value;
1154        }
1155    }
1156
1157    /**
1158     * Scan a number.
1159     */
1160    protected void scanNumber() {
1161        // Record beginning of number.
1162        final int start = position;
1163        // Assume value is a decimal.
1164        TokenType type = DECIMAL;
1165
1166        // First digit of number.
1167        int digit = convertDigit(ch0, 10);
1168
1169        // If number begins with 0x.
1170        if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) {
1171            // Skip over 0xN.
1172            skip(3);
1173            // Skip over remaining digits.
1174            while (convertDigit(ch0, 16) != -1) {
1175                skip(1);
1176            }
1177
1178            type = HEXADECIMAL;
1179        } else if (digit == 0 && es6 && (ch1 == 'o' || ch1 == 'O') && convertDigit(ch2, 8) != -1) {
1180            // Skip over 0oN.
1181            skip(3);
1182            // Skip over remaining digits.
1183            while (convertDigit(ch0, 8) != -1) {
1184                skip(1);
1185            }
1186
1187            type = OCTAL;
1188        } else if (digit == 0 && es6 && (ch1 == 'b' || ch1 == 'B') && convertDigit(ch2, 2) != -1) {
1189            // Skip over 0bN.
1190            skip(3);
1191            // Skip over remaining digits.
1192            while (convertDigit(ch0, 2) != -1) {
1193                skip(1);
1194            }
1195
1196            type = BINARY_NUMBER;
1197        } else {
1198            // Check for possible octal constant.
1199            boolean octal = digit == 0;
1200            // Skip first digit if not leading '.'.
1201            if (digit != -1) {
1202                skip(1);
1203            }
1204
1205            // Skip remaining digits.
1206            while ((digit = convertDigit(ch0, 10)) != -1) {
1207                // Check octal only digits.
1208                octal = octal && digit < 8;
1209                // Skip digit.
1210                skip(1);
1211            }
1212
1213            if (octal && position - start > 1) {
1214                type = OCTAL_LEGACY;
1215            } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
1216                // Must be a double.
1217                if (ch0 == '.') {
1218                    // Skip period.
1219                    skip(1);
1220                    // Skip mantissa.
1221                    while (convertDigit(ch0, 10) != -1) {
1222                        skip(1);
1223                    }
1224                }
1225
1226                // Detect exponent.
1227                if (ch0 == 'E' || ch0 == 'e') {
1228                    // Skip E.
1229                    skip(1);
1230                    // Detect and skip exponent sign.
1231                    if (ch0 == '+' || ch0 == '-') {
1232                        skip(1);
1233                    }
1234                    // Skip exponent.
1235                    while (convertDigit(ch0, 10) != -1) {
1236                        skip(1);
1237                    }
1238                }
1239
1240                type = FLOATING;
1241            }
1242        }
1243
1244        if (Character.isJavaIdentifierStart(ch0)) {
1245            error(Lexer.message("missing.space.after.number"), type, position, 1);
1246        }
1247
1248        // Add number token.
1249        add(type, start);
1250    }
1251
1252    /**
1253     * Convert a regex token to a token object.
1254     *
1255     * @param start  Position in source content.
1256     * @param length Length of regex token.
1257     * @return Regex token object.
1258     */
1259    XMLToken valueOfXML(final int start, final int length) {
1260        return new XMLToken(source.getString(start, length));
1261    }
1262
1263    /**
1264     * Scan over a XML token.
1265     *
1266     * @return TRUE if is an XML literal.
1267     */
1268    private boolean scanXMLLiteral() {
1269        assert ch0 == '<' && Character.isJavaIdentifierStart(ch1);
1270        if (XML_LITERALS) {
1271            // Record beginning of xml expression.
1272            final int start = position;
1273
1274            int openCount = 0;
1275
1276            do {
1277                if (ch0 == '<') {
1278                    if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) {
1279                        skip(3);
1280                        openCount--;
1281                    } else if (Character.isJavaIdentifierStart(ch1)) {
1282                        skip(2);
1283                        openCount++;
1284                    } else if (ch1 == '?') {
1285                        skip(2);
1286                    } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') {
1287                        skip(4);
1288                    } else {
1289                        reset(start);
1290                        return false;
1291                    }
1292
1293                    while (!atEOF() && ch0 != '>') {
1294                        if (ch0 == '/' && ch1 == '>') {
1295                            openCount--;
1296                            skip(1);
1297                            break;
1298                        } else if (ch0 == '\"' || ch0 == '\'') {
1299                            scanString(false);
1300                        } else {
1301                            skip(1);
1302                        }
1303                    }
1304
1305                    if (ch0 != '>') {
1306                        reset(start);
1307                        return false;
1308                    }
1309
1310                    skip(1);
1311                } else if (atEOF()) {
1312                    reset(start);
1313                    return false;
1314                } else {
1315                    skip(1);
1316                }
1317            } while (openCount > 0);
1318
1319            add(XML, start);
1320            return true;
1321        }
1322
1323        return false;
1324    }
1325
1326    /**
1327     * Scan over identifier characters.
1328     *
1329     * @return Length of identifier or zero if none found.
1330     */
1331    private int scanIdentifier() {
1332        final int start = position;
1333
1334        // Make sure first character is valid start character.
1335        if (ch0 == '\\' && ch1 == 'u') {
1336            skip(2);
1337            final int ch = hexSequence(4, TokenType.IDENT);
1338
1339            if (!Character.isJavaIdentifierStart(ch)) {
1340                error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1341            }
1342        } else if (!Character.isJavaIdentifierStart(ch0)) {
1343            // Not an identifier.
1344            return 0;
1345        }
1346
1347        // Make sure remaining characters are valid part characters.
1348        while (!atEOF()) {
1349            if (ch0 == '\\' && ch1 == 'u') {
1350                skip(2);
1351                final int ch = hexSequence(4, TokenType.IDENT);
1352
1353                if (!Character.isJavaIdentifierPart(ch)) {
1354                    error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1355                }
1356            } else if (Character.isJavaIdentifierPart(ch0)) {
1357                skip(1);
1358            } else {
1359                break;
1360            }
1361        }
1362
1363        // Length of identifier sequence.
1364        return position - start;
1365    }
1366
1367    /**
1368     * Compare two identifiers (in content) for equality.
1369     *
1370     * @param aStart  Start of first identifier.
1371     * @param aLength Length of first identifier.
1372     * @param bStart  Start of second identifier.
1373     * @param bLength Length of second identifier.
1374     * @return True if equal.
1375     */
1376    private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) {
1377        if (aLength == bLength) {
1378            for (int i = 0; i < aLength; i++) {
1379                if (content[aStart + i] != content[bStart + i]) {
1380                    return false;
1381                }
1382            }
1383
1384            return true;
1385        }
1386
1387        return false;
1388    }
1389
1390    /**
1391     * Detect if a line starts with a marker identifier.
1392     *
1393     * @param identStart  Start of identifier.
1394     * @param identLength Length of identifier.
1395     * @return True if detected.
1396     */
1397    private boolean hasHereMarker(final int identStart, final int identLength) {
1398        // Skip any whitespace.
1399        skipWhitespace(false);
1400
1401        return identifierEqual(identStart, identLength, position, scanIdentifier());
1402    }
1403
1404    /**
1405     * Lexer to service edit strings.
1406     */
1407    private static class EditStringLexer extends Lexer {
1408        /** Type of string literals to emit. */
1409        final TokenType stringType;
1410
1411        /*
1412         * Constructor.
1413         */
1414
1415        EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) {
1416            super(lexer, stringState);
1417
1418            this.stringType = stringType;
1419        }
1420
1421        /**
1422         * Lexify the contents of the string.
1423         */
1424        @Override
1425        public void lexify() {
1426            // Record start of string position.
1427            int stringStart = position;
1428            // Indicate that the priming first string has not been emitted.
1429            boolean primed = false;
1430
1431            while (true) {
1432                // Detect end of content.
1433                if (atEOF()) {
1434                    break;
1435                }
1436
1437                // Honour escapes (should be well formed.)
1438                if (ch0 == '\\' && stringType == ESCSTRING) {
1439                    skip(2);
1440
1441                    continue;
1442                }
1443
1444                // If start of expression.
1445                if (ch0 == '$' && ch1 == '{') {
1446                    if (!primed || stringStart != position) {
1447                        if (primed) {
1448                            add(ADD, stringStart, stringStart + 1);
1449                        }
1450
1451                        add(stringType, stringStart, position);
1452                        primed = true;
1453                    }
1454
1455                    // Skip ${
1456                    skip(2);
1457
1458                    // Save expression state.
1459                    final State expressionState = saveState();
1460
1461                    // Start with one open brace.
1462                    int braceCount = 1;
1463
1464                    // Scan for the rest of the string.
1465                    while (!atEOF()) {
1466                        // If closing brace.
1467                        if (ch0 == '}') {
1468                            // Break only only if matching brace.
1469                            if (--braceCount == 0) {
1470                                break;
1471                            }
1472                        } else if (ch0 == '{') {
1473                            // Bump up the brace count.
1474                            braceCount++;
1475                        }
1476
1477                        // Skip to next character.
1478                        skip(1);
1479                    }
1480
1481                    // If braces don't match then report an error.
1482                    if (braceCount != 0) {
1483                        error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1);
1484                    }
1485
1486                    // Mark end of expression.
1487                    expressionState.setLimit(position);
1488                    // Skip closing brace.
1489                    skip(1);
1490
1491                    // Start next string.
1492                    stringStart = position;
1493
1494                    // Concatenate expression.
1495                    add(ADD, expressionState.position, expressionState.position + 1);
1496                    add(LPAREN, expressionState.position, expressionState.position + 1);
1497
1498                    // Scan expression.
1499                    final Lexer lexer = new Lexer(this, expressionState);
1500                    lexer.lexify();
1501
1502                    // Close out expression parenthesis.
1503                    add(RPAREN, position - 1, position);
1504
1505                    continue;
1506                }
1507
1508                // Next character in string.
1509                skip(1);
1510            }
1511
1512            // If there is any unemitted string portion.
1513            if (stringStart != limit) {
1514                // Concatenate remaining string.
1515                if (primed) {
1516                    add(ADD, stringStart, 1);
1517                }
1518
1519                add(stringType, stringStart, limit);
1520            }
1521        }
1522
1523    }
1524
1525    /**
1526     * Edit string for nested expressions.
1527     *
1528     * @param stringType  Type of string literals to emit.
1529     * @param stringState State of lexer at start of string.
1530     */
1531    private void editString(final TokenType stringType, final State stringState) {
1532        // Use special lexer to scan string.
1533        final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState);
1534        lexer.lexify();
1535
1536        // Need to keep lexer informed.
1537        last = stringType;
1538    }
1539
1540    /**
1541     * Scan over a here string.
1542     *
1543     * @return TRUE if is a here string.
1544     */
1545    private boolean scanHereString(final LineInfoReceiver lir, final State oldState) {
1546        assert ch0 == '<' && ch1 == '<';
1547        if (scripting) {
1548            // Record beginning of here string.
1549            final State saved = saveState();
1550
1551            // << or <<<
1552            final boolean excludeLastEOL = ch2 != '<';
1553
1554            if (excludeLastEOL) {
1555                skip(2);
1556            } else {
1557                skip(3);
1558            }
1559
1560            // Scan identifier. It might be quoted, indicating that no string editing should take place.
1561            final char quoteChar = ch0;
1562            final boolean noStringEditing = quoteChar == '"' || quoteChar == '\'';
1563            if (noStringEditing) {
1564                skip(1);
1565            }
1566            final int identStart = position;
1567            final int identLength = scanIdentifier();
1568            if (noStringEditing) {
1569                if (ch0 != quoteChar) {
1570                    error(Lexer.message("here.non.matching.delimiter"), last, position, position);
1571                    restoreState(saved);
1572                    return false;
1573                }
1574                skip(1);
1575            }
1576
1577            // Check for identifier.
1578            if (identLength == 0) {
1579                // Treat as shift.
1580                restoreState(saved);
1581
1582                return false;
1583            }
1584
1585            // Record rest of line.
1586            final State restState = saveState();
1587            // keep line number updated
1588            int lastLine = line;
1589
1590            skipLine(false);
1591            lastLine++;
1592            int lastLinePosition = position;
1593            restState.setLimit(position);
1594
1595            if (oldState.position > position) {
1596                restoreState(oldState);
1597                skipLine(false);
1598            }
1599
1600            // Record beginning of string.
1601            final State stringState = saveState();
1602            int stringEnd = position;
1603
1604            // Hunt down marker.
1605            while (!atEOF()) {
1606                // Skip any whitespace.
1607                skipWhitespace(false);
1608
1609                if (hasHereMarker(identStart, identLength)) {
1610                    break;
1611                }
1612
1613                skipLine(false);
1614                lastLine++;
1615                lastLinePosition = position;
1616                stringEnd = position;
1617            }
1618
1619            // notify last line information
1620            lir.lineInfo(lastLine, lastLinePosition);
1621
1622            // Record end of string.
1623            stringState.setLimit(stringEnd);
1624
1625            // If marker is missing.
1626            if (stringState.isEmpty() || atEOF()) {
1627                error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position);
1628                restoreState(saved);
1629
1630                return false;
1631            }
1632
1633            // Remove last end of line if specified.
1634            if (excludeLastEOL) {
1635                // Handles \n.
1636                if (content[stringEnd - 1] == '\n') {
1637                    stringEnd--;
1638                }
1639
1640                // Handles \r and \r\n.
1641                if (content[stringEnd - 1] == '\r') {
1642                    stringEnd--;
1643                }
1644
1645                // Update end of string.
1646                stringState.setLimit(stringEnd);
1647            }
1648
1649            // Edit string if appropriate.
1650            if (!noStringEditing && !stringState.isEmpty()) {
1651                editString(STRING, stringState);
1652            } else {
1653                // Add here string.
1654                add(STRING, stringState.position, stringState.limit);
1655            }
1656
1657            // Scan rest of original line.
1658            final Lexer restLexer = new Lexer(this, restState);
1659
1660            restLexer.lexify();
1661
1662            return true;
1663        }
1664
1665        return false;
1666    }
1667
1668    /**
1669     * Breaks source content down into lex units, adding tokens to the token
1670     * stream. The routine scans until the stream buffer is full. Can be called
1671     * repeatedly until EOF is detected.
1672     */
1673    public void lexify() {
1674        while (!stream.isFull() || nested) {
1675            // Skip over whitespace.
1676            skipWhitespace(true);
1677
1678            // Detect end of file.
1679            if (atEOF()) {
1680                if (!nested) {
1681                    // Add an EOF token at the end.
1682                    add(EOF, position);
1683                }
1684
1685                break;
1686            }
1687
1688            // Check for comments. Note that we don't scan for regexp and other literals here as
1689            // we may not have enough context to distinguish them from similar looking operators.
1690            // Instead we break on ambiguous operators below and let the parser decide.
1691            if (ch0 == '/' && skipComments()) {
1692                continue;
1693            }
1694
1695            if (scripting && ch0 == '#' && skipComments()) {
1696                continue;
1697            }
1698
1699            // TokenType for lookup of delimiter or operator.
1700            TokenType type;
1701
1702            if (ch0 == '.' && convertDigit(ch1, 10) != -1) {
1703                // '.' followed by digit.
1704                // Scan and add a number.
1705                scanNumber();
1706            } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {
1707                if (templateExpressionOpenBraces > 0) {
1708                    if (type == LBRACE) {
1709                        templateExpressionOpenBraces++;
1710                    } else if (type == RBRACE) {
1711                        if (--templateExpressionOpenBraces == 0) {
1712                            break;
1713                        }
1714                    }
1715                }
1716
1717                // Get the number of characters in the token.
1718                final int typeLength = type.getLength();
1719                // Skip that many characters.
1720                skip(typeLength);
1721                // Add operator token.
1722                add(type, position - typeLength);
1723                // Some operator tokens also mark the beginning of regexp, XML, or here string literals.
1724                // We break to let the parser decide what it is.
1725                if (canStartLiteral(type)) {
1726                    break;
1727                } else if (type == LBRACE && pauseOnNextLeftBrace) {
1728                    pauseOnNextLeftBrace = false;
1729                    break;
1730                }
1731            } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') {
1732                // Scan and add identifier or keyword.
1733                scanIdentifierOrKeyword();
1734            } else if (isStringDelimiter(ch0)) {
1735                // Scan and add a string.
1736                scanString(true);
1737            } else if (Character.isDigit(ch0)) {
1738                // Scan and add a number.
1739                scanNumber();
1740            } else if (isTemplateDelimiter(ch0) && es6) {
1741                // Scan and add template in ES6 mode.
1742                scanTemplate();
1743            } else if (isTemplateDelimiter(ch0) && scripting) {
1744                // Scan and add an exec string ('`') in scripting mode.
1745                scanString(true);
1746            } else {
1747                // Don't recognize this character.
1748                skip(1);
1749                add(ERROR, position - 1);
1750            }
1751        }
1752    }
1753
1754    /**
1755     * Return value of token given its token descriptor.
1756     *
1757     * @param token  Token descriptor.
1758     * @return JavaScript value.
1759     */
1760    Object getValueOf(final long token, final boolean strict) {
1761        final int start = Token.descPosition(token);
1762        final int len   = Token.descLength(token);
1763
1764        switch (Token.descType(token)) {
1765        case DECIMAL:
1766            return Lexer.valueOf(source.getString(start, len), 10); // number
1767        case HEXADECIMAL:
1768            return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number
1769        case OCTAL_LEGACY:
1770            return Lexer.valueOf(source.getString(start, len), 8); // number
1771        case OCTAL:
1772            return Lexer.valueOf(source.getString(start + 2, len - 2), 8); // number
1773        case BINARY_NUMBER:
1774            return Lexer.valueOf(source.getString(start + 2, len - 2), 2); // number
1775        case FLOATING:
1776            final String str   = source.getString(start, len);
1777            final double value = Double.valueOf(str);
1778            if (str.indexOf('.') != -1) {
1779                return value; //number
1780            }
1781            //anything without an explicit decimal point is still subject to a
1782            //"representable as int or long" check. Then the programmer does not
1783            //explicitly code something as a double. For example new Color(int, int, int)
1784            //and new Color(float, float, float) will get ambiguous for cases like
1785            //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point.
1786            //yet we don't want e.g. 1e6 to be a double unnecessarily
1787            if (JSType.isStrictlyRepresentableAsInt(value)) {
1788                return (int)value;
1789            }
1790            return value;
1791        case STRING:
1792            return source.getString(start, len); // String
1793        case ESCSTRING:
1794            return valueOfString(start, len, strict); // String
1795        case IDENT:
1796            return valueOfIdent(start, len); // String
1797        case REGEX:
1798            return valueOfPattern(start, len); // RegexToken::LexerToken
1799        case TEMPLATE:
1800        case TEMPLATE_HEAD:
1801        case TEMPLATE_MIDDLE:
1802        case TEMPLATE_TAIL:
1803            return valueOfString(start, len, true); // String
1804        case XML:
1805            return valueOfXML(start, len); // XMLToken::LexerToken
1806        case DIRECTIVE_COMMENT:
1807            return source.getString(start, len);
1808        default:
1809            break;
1810        }
1811
1812        return null;
1813    }
1814
1815    /**
1816     * Get the raw string value of a template literal string part.
1817     *
1818     * @param token template string token
1819     * @return raw string
1820     */
1821    public String valueOfRawString(final long token) {
1822        final int start  = Token.descPosition(token);
1823        final int length = Token.descLength(token);
1824
1825        // Save the current position.
1826        final int savePosition = position;
1827        // Calculate the end position.
1828        final int end = start + length;
1829        // Reset to beginning of string.
1830        reset(start);
1831
1832        // Buffer for recording characters.
1833        final StringBuilder sb = new StringBuilder(length);
1834
1835        // Scan until end of string.
1836        while (position < end) {
1837            if (ch0 == '\r') {
1838                // Convert CR-LF or CR to LF line terminator.
1839                sb.append('\n');
1840                skip(ch1 == '\n' ? 2 : 1);
1841            } else {
1842                // Add regular character.
1843                sb.append(ch0);
1844                skip(1);
1845            }
1846        }
1847
1848        // Restore position.
1849        reset(savePosition);
1850
1851        return sb.toString();
1852    }
1853
1854    /**
1855     * Get the correctly localized error message for a given message id format arguments
1856     * @param msgId message id
1857     * @param args  format arguments
1858     * @return message
1859     */
1860    protected static String message(final String msgId, final String... args) {
1861        return ECMAErrors.getMessage("lexer.error." + msgId, args);
1862    }
1863
1864    /**
1865     * Generate a runtime exception
1866     *
1867     * @param message       error message
1868     * @param type          token type
1869     * @param start         start position of lexed error
1870     * @param length        length of lexed error
1871     * @throws ParserException  unconditionally
1872     */
1873    protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException {
1874        final long token     = Token.toDesc(type, start, length);
1875        final int  pos       = Token.descPosition(token);
1876        final int  lineNum   = source.getLine(pos);
1877        final int  columnNum = source.getColumn(pos);
1878        final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token);
1879        throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token);
1880    }
1881
1882    /**
1883     * Helper class for Lexer tokens, e.g XML or RegExp tokens.
1884     * This is the abstract superclass
1885     */
1886    public static abstract class LexerToken implements Serializable {
1887        private static final long serialVersionUID = 1L;
1888
1889        private final String expression;
1890
1891        /**
1892         * Constructor
1893         * @param expression token expression
1894         */
1895        protected LexerToken(final String expression) {
1896            this.expression = expression;
1897        }
1898
1899        /**
1900         * Get the expression
1901         * @return expression
1902         */
1903        public String getExpression() {
1904            return expression;
1905        }
1906    }
1907
1908    /**
1909     * Temporary container for regular expressions.
1910     */
1911    public static class RegexToken extends LexerToken {
1912        private static final long serialVersionUID = 1L;
1913
1914        /** Options. */
1915        private final String options;
1916
1917        /**
1918         * Constructor.
1919         *
1920         * @param expression  regexp expression
1921         * @param options     regexp options
1922         */
1923        public RegexToken(final String expression, final String options) {
1924            super(expression);
1925            this.options = options;
1926        }
1927
1928        /**
1929         * Get regexp options
1930         * @return options
1931         */
1932        public String getOptions() {
1933            return options;
1934        }
1935
1936        @Override
1937        public String toString() {
1938            return '/' + getExpression() + '/' + options;
1939        }
1940    }
1941
1942    /**
1943     * Temporary container for XML expression.
1944     */
1945    public static class XMLToken extends LexerToken {
1946        private static final long serialVersionUID = 1L;
1947
1948        /**
1949         * Constructor.
1950         *
1951         * @param expression  XML expression
1952         */
1953        public XMLToken(final String expression) {
1954            super(expression);
1955        }
1956    }
1957}
1958