Lexer.java revision 1408:ac8a32176cbe
1/*
2 * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package jdk.nashorn.internal.parser;
27
28import static jdk.nashorn.internal.parser.TokenType.ADD;
29import static jdk.nashorn.internal.parser.TokenType.BINARY_NUMBER;
30import static jdk.nashorn.internal.parser.TokenType.COMMENT;
31import static jdk.nashorn.internal.parser.TokenType.DECIMAL;
32import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
33import static jdk.nashorn.internal.parser.TokenType.EOF;
34import static jdk.nashorn.internal.parser.TokenType.EOL;
35import static jdk.nashorn.internal.parser.TokenType.ERROR;
36import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
37import static jdk.nashorn.internal.parser.TokenType.EXECSTRING;
38import static jdk.nashorn.internal.parser.TokenType.FLOATING;
39import static jdk.nashorn.internal.parser.TokenType.FUNCTION;
40import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL;
41import static jdk.nashorn.internal.parser.TokenType.LBRACE;
42import static jdk.nashorn.internal.parser.TokenType.LPAREN;
43import static jdk.nashorn.internal.parser.TokenType.OCTAL;
44import static jdk.nashorn.internal.parser.TokenType.OCTAL_LEGACY;
45import static jdk.nashorn.internal.parser.TokenType.RBRACE;
46import static jdk.nashorn.internal.parser.TokenType.REGEX;
47import static jdk.nashorn.internal.parser.TokenType.RPAREN;
48import static jdk.nashorn.internal.parser.TokenType.STRING;
49import static jdk.nashorn.internal.parser.TokenType.XML;
50
51import java.io.Serializable;
52
53import jdk.nashorn.internal.runtime.ECMAErrors;
54import jdk.nashorn.internal.runtime.ErrorManager;
55import jdk.nashorn.internal.runtime.JSErrorType;
56import jdk.nashorn.internal.runtime.JSType;
57import jdk.nashorn.internal.runtime.ParserException;
58import jdk.nashorn.internal.runtime.Source;
59import jdk.nashorn.internal.runtime.options.Options;
60
61/**
62 * Responsible for converting source content into a stream of tokens.
63 *
64 */
65@SuppressWarnings("fallthrough")
66public class Lexer extends Scanner {
67    private static final long MIN_INT_L = Integer.MIN_VALUE;
68    private static final long MAX_INT_L = Integer.MAX_VALUE;
69
70    private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals");
71
72    /** Content source. */
73    private final Source source;
74
75    /** Buffered stream for tokens. */
76    private final TokenStream stream;
77
78    /** True if here and edit strings are supported. */
79    private final boolean scripting;
80
81    /** True if parsing in ECMAScript 6 mode. */
82    private final boolean es6;
83
84    /** True if a nested scan. (scan to completion, no EOF.) */
85    private final boolean nested;
86
87    /** Pending new line number and position. */
88    int pendingLine;
89
90    /** Position of last EOL + 1. */
91    private int linePosition;
92
93    /** Type of last token added. */
94    private TokenType last;
95
96    private final boolean pauseOnFunctionBody;
97    private boolean pauseOnNextLeftBrace;
98
99    private static final String SPACETAB = " \t";  // ASCII space and tab
100    private static final String LFCR     = "\n\r"; // line feed and carriage return (ctrl-m)
101
102    private static final String JAVASCRIPT_WHITESPACE_EOL =
103        LFCR +
104        "\u2028" + // line separator
105        "\u2029"   // paragraph separator
106        ;
107    private static final String JAVASCRIPT_WHITESPACE =
108        SPACETAB +
109        JAVASCRIPT_WHITESPACE_EOL +
110        "\u000b" + // tabulation line
111        "\u000c" + // ff (ctrl-l)
112        "\u00a0" + // Latin-1 space
113        "\u1680" + // Ogham space mark
114        "\u180e" + // separator, Mongolian vowel
115        "\u2000" + // en quad
116        "\u2001" + // em quad
117        "\u2002" + // en space
118        "\u2003" + // em space
119        "\u2004" + // three-per-em space
120        "\u2005" + // four-per-em space
121        "\u2006" + // six-per-em space
122        "\u2007" + // figure space
123        "\u2008" + // punctuation space
124        "\u2009" + // thin space
125        "\u200a" + // hair space
126        "\u202f" + // narrow no-break space
127        "\u205f" + // medium mathematical space
128        "\u3000" + // ideographic space
129        "\ufeff"   // byte order mark
130        ;
131
132    private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP =
133        "\\u000a" + // line feed
134        "\\u000d" + // carriage return (ctrl-m)
135        "\\u2028" + // line separator
136        "\\u2029" + // paragraph separator
137        "\\u0009" + // tab
138        "\\u0020" + // ASCII space
139        "\\u000b" + // tabulation line
140        "\\u000c" + // ff (ctrl-l)
141        "\\u00a0" + // Latin-1 space
142        "\\u1680" + // Ogham space mark
143        "\\u180e" + // separator, Mongolian vowel
144        "\\u2000" + // en quad
145        "\\u2001" + // em quad
146        "\\u2002" + // en space
147        "\\u2003" + // em space
148        "\\u2004" + // three-per-em space
149        "\\u2005" + // four-per-em space
150        "\\u2006" + // six-per-em space
151        "\\u2007" + // figure space
152        "\\u2008" + // punctuation space
153        "\\u2009" + // thin space
154        "\\u200a" + // hair space
155        "\\u202f" + // narrow no-break space
156        "\\u205f" + // medium mathematical space
157        "\\u3000" + // ideographic space
158        "\\ufeff"   // byte order mark
159        ;
160
161    static String unicodeEscape(final char ch) {
162        final StringBuilder sb = new StringBuilder();
163
164        sb.append("\\u");
165
166        final String hex = Integer.toHexString(ch);
167        for (int i = hex.length(); i < 4; i++) {
168            sb.append('0');
169        }
170        sb.append(hex);
171
172        return sb.toString();
173    }
174
175    /**
176     * Constructor
177     *
178     * @param source    the source
179     * @param stream    the token stream to lex
180     */
181    public Lexer(final Source source, final TokenStream stream) {
182        this(source, stream, false, false);
183    }
184
185    /**
186     * Constructor
187     *
188     * @param source    the source
189     * @param stream    the token stream to lex
190     * @param scripting are we in scripting mode
191     * @param es6       are we in ECMAScript 6 mode
192     */
193    public Lexer(final Source source, final TokenStream stream, final boolean scripting, final boolean es6) {
194        this(source, 0, source.getLength(), stream, scripting, es6, false);
195    }
196
197    /**
198     * Constructor
199     *
200     * @param source    the source
201     * @param start     start position in source from which to start lexing
202     * @param len       length of source segment to lex
203     * @param stream    token stream to lex
204     * @param scripting are we in scripting mode
205     * @param es6       are we in ECMAScript 6 mode
206     * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a
207     * function body. This is used with the feature where the parser is skipping nested function bodies to
208     * avoid reading ahead unnecessarily when we skip the function bodies.
209     */
210
211    public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean es6, final boolean pauseOnFunctionBody) {
212        super(source.getContent(), 1, start, len);
213        this.source      = source;
214        this.stream      = stream;
215        this.scripting   = scripting;
216        this.es6         = es6;
217        this.nested      = false;
218        this.pendingLine = 1;
219        this.last        = EOL;
220
221        this.pauseOnFunctionBody = pauseOnFunctionBody;
222    }
223
224    private Lexer(final Lexer lexer, final State state) {
225        super(lexer, state);
226
227        source = lexer.source;
228        stream = lexer.stream;
229        scripting = lexer.scripting;
230        es6 = lexer.es6;
231        nested = true;
232
233        pendingLine = state.pendingLine;
234        linePosition = state.linePosition;
235        last = EOL;
236        pauseOnFunctionBody = false;
237    }
238
239    static class State extends Scanner.State {
240        /** Pending new line number and position. */
241        public final int pendingLine;
242
243        /** Position of last EOL + 1. */
244        public final int linePosition;
245
246        /** Type of last token added. */
247        public final TokenType last;
248
249        /*
250         * Constructor.
251         */
252
253        State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) {
254            super(position, limit, line);
255
256            this.pendingLine = pendingLine;
257            this.linePosition = linePosition;
258            this.last = last;
259        }
260    }
261
262    /**
263     * Save the state of the scan.
264     *
265     * @return Captured state.
266     */
267    @Override
268    State saveState() {
269        return new State(position, limit, line, pendingLine, linePosition, last);
270    }
271
272    /**
273     * Restore the state of the scan.
274     *
275     * @param state
276     *            Captured state.
277     */
278    void restoreState(final State state) {
279        super.restoreState(state);
280
281        pendingLine = state.pendingLine;
282        linePosition = state.linePosition;
283        last = state.last;
284    }
285
286    /**
287     * Add a new token to the stream.
288     *
289     * @param type
290     *            Token type.
291     * @param start
292     *            Start position.
293     * @param end
294     *            End position.
295     */
296    protected void add(final TokenType type, final int start, final int end) {
297        // Record last token.
298        last = type;
299
300        // Only emit the last EOL in a cluster.
301        if (type == EOL) {
302            pendingLine = end;
303            linePosition = start;
304        } else {
305            // Write any pending EOL to stream.
306            if (pendingLine != -1) {
307                stream.put(Token.toDesc(EOL, linePosition, pendingLine));
308                pendingLine = -1;
309            }
310
311            // Write token to stream.
312            stream.put(Token.toDesc(type, start, end - start));
313        }
314    }
315
316    /**
317     * Add a new token to the stream.
318     *
319     * @param type
320     *            Token type.
321     * @param start
322     *            Start position.
323     */
324    protected void add(final TokenType type, final int start) {
325        add(type, start, position);
326    }
327
328    /**
329     * Return the String of valid whitespace characters for regular
330     * expressions in JavaScript
331     * @return regexp whitespace string
332     */
333    public static String getWhitespaceRegExp() {
334        return JAVASCRIPT_WHITESPACE_IN_REGEXP;
335    }
336
337    /**
338     * Skip end of line.
339     *
340     * @param addEOL true if EOL token should be recorded.
341     */
342    private void skipEOL(final boolean addEOL) {
343
344        if (ch0 == '\r') { // detect \r\n pattern
345            skip(1);
346            if (ch0 == '\n') {
347                skip(1);
348            }
349        } else { // all other space, ch0 is guaranteed to be EOL or \0
350            skip(1);
351        }
352
353        // bump up line count
354        line++;
355
356        if (addEOL) {
357            // Add an EOL token.
358            add(EOL, position, line);
359        }
360    }
361
362    /**
363     * Skip over rest of line including end of line.
364     *
365     * @param addEOL true if EOL token should be recorded.
366     */
367    private void skipLine(final boolean addEOL) {
368        // Ignore characters.
369        while (!isEOL(ch0) && !atEOF()) {
370            skip(1);
371        }
372        // Skip over end of line.
373        skipEOL(addEOL);
374    }
375
376    /**
377     * Test whether a char is valid JavaScript whitespace
378     * @param ch a char
379     * @return true if valid JavaScript whitespace
380     */
381    public static boolean isJSWhitespace(final char ch) {
382        return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1;
383    }
384
385    /**
386     * Test whether a char is valid JavaScript end of line
387     * @param ch a char
388     * @return true if valid JavaScript end of line
389     */
390    public static boolean isJSEOL(final char ch) {
391        return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1;
392    }
393
394    /**
395     * Test if char is a string delimiter, e.g. '\' or '"'.  Also scans exec
396     * strings ('`') in scripting mode.
397     * @param ch a char
398     * @return true if string delimiter
399     */
400    protected boolean isStringDelimiter(final char ch) {
401        return ch == '\'' || ch == '"' || (scripting && ch == '`');
402    }
403
404    /**
405     * Test whether a char is valid JavaScript whitespace
406     * @param ch a char
407     * @return true if valid JavaScript whitespace
408     */
409    protected boolean isWhitespace(final char ch) {
410        return Lexer.isJSWhitespace(ch);
411    }
412
413    /**
414     * Test whether a char is valid JavaScript end of line
415     * @param ch a char
416     * @return true if valid JavaScript end of line
417     */
418    protected boolean isEOL(final char ch) {
419        return Lexer.isJSEOL(ch);
420    }
421
422    /**
423     * Skip over whitespace and detect end of line, adding EOL tokens if
424     * encountered.
425     *
426     * @param addEOL true if EOL tokens should be recorded.
427     */
428    private void skipWhitespace(final boolean addEOL) {
429        while (isWhitespace(ch0)) {
430            if (isEOL(ch0)) {
431                skipEOL(addEOL);
432            } else {
433                skip(1);
434            }
435        }
436    }
437
438    /**
439     * Skip over comments.
440     *
441     * @return True if a comment.
442     */
443    protected boolean skipComments() {
444        // Save the current position.
445        final int start = position;
446
447        if (ch0 == '/') {
448            // Is it a // comment.
449            if (ch1 == '/') {
450                // Skip over //.
451                skip(2);
452
453                boolean directiveComment = false;
454                if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) {
455                    directiveComment = true;
456                }
457
458                // Scan for EOL.
459                while (!atEOF() && !isEOL(ch0)) {
460                    skip(1);
461                }
462                // Did detect a comment.
463                add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start);
464                return true;
465            } else if (ch1 == '*') {
466                // Skip over /*.
467                skip(2);
468                // Scan for */.
469                while (!atEOF() && !(ch0 == '*' && ch1 == '/')) {
470                    // If end of line handle else skip character.
471                    if (isEOL(ch0)) {
472                        skipEOL(true);
473                    } else {
474                        skip(1);
475                    }
476                }
477
478                if (atEOF()) {
479                    // TODO - Report closing */ missing in parser.
480                    add(ERROR, start);
481                } else {
482                    // Skip */.
483                    skip(2);
484                }
485
486                // Did detect a comment.
487                add(COMMENT, start);
488                return true;
489            }
490        } else if (ch0 == '#') {
491            assert scripting;
492            // shell style comment
493            // Skip over #.
494            skip(1);
495            // Scan for EOL.
496            while (!atEOF() && !isEOL(ch0)) {
497                skip(1);
498            }
499            // Did detect a comment.
500            add(COMMENT, start);
501            return true;
502        }
503
504        // Not a comment.
505        return false;
506    }
507
508    /**
509     * Convert a regex token to a token object.
510     *
511     * @param start  Position in source content.
512     * @param length Length of regex token.
513     * @return Regex token object.
514     */
515    public RegexToken valueOfPattern(final int start, final int length) {
516        // Save the current position.
517        final int savePosition = position;
518        // Reset to beginning of content.
519        reset(start);
520        // Buffer for recording characters.
521        final StringBuilder sb = new StringBuilder(length);
522
523        // Skip /.
524        skip(1);
525        boolean inBrackets = false;
526        // Scan for closing /, stopping at end of line.
527        while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) {
528            // Skip over escaped character.
529            if (ch0 == '\\') {
530                sb.append(ch0);
531                sb.append(ch1);
532                skip(2);
533            } else {
534                if (ch0 == '[') {
535                    inBrackets = true;
536                } else if (ch0 == ']') {
537                    inBrackets = false;
538                }
539
540                // Skip literal character.
541                sb.append(ch0);
542                skip(1);
543            }
544        }
545
546        // Get pattern as string.
547        final String regex = sb.toString();
548
549        // Skip /.
550        skip(1);
551
552        // Options as string.
553        final String options = source.getString(position, scanIdentifier());
554
555        reset(savePosition);
556
557        // Compile the pattern.
558        return new RegexToken(regex, options);
559    }
560
561    /**
562     * Return true if the given token can be the beginning of a literal.
563     *
564     * @param token a token
565     * @return true if token can start a literal.
566     */
567    public boolean canStartLiteral(final TokenType token) {
568        return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<'));
569    }
570
571    /**
572     * interface to receive line information for multi-line literals.
573     */
574    protected interface LineInfoReceiver {
575        /**
576         * Receives line information
577         * @param line last line number
578         * @param linePosition position of last line
579         */
580        public void lineInfo(int line, int linePosition);
581    }
582
583    /**
584     * Check whether the given token represents the beginning of a literal. If so scan
585     * the literal and return <tt>true</tt>, otherwise return false.
586     *
587     * @param token the token.
588     * @param startTokenType the token type.
589     * @param lir LineInfoReceiver that receives line info for multi-line string literals.
590     * @return True if a literal beginning with startToken was found and scanned.
591     */
592    protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) {
593        // Check if it can be a literal.
594        if (!canStartLiteral(startTokenType)) {
595            return false;
596        }
597        // We break on ambiguous tokens so if we already moved on it can't be a literal.
598        if (stream.get(stream.last()) != token) {
599            return false;
600        }
601        // Rewind to token start position
602        reset(Token.descPosition(token));
603
604        if (ch0 == '/') {
605            return scanRegEx();
606        } else if (ch0 == '<') {
607            if (ch1 == '<') {
608                return scanHereString(lir);
609            } else if (Character.isJavaIdentifierStart(ch1)) {
610                return scanXMLLiteral();
611            }
612        }
613
614        return false;
615    }
616
617    /**
618     * Scan over regex literal.
619     *
620     * @return True if a regex literal.
621     */
622    private boolean scanRegEx() {
623        assert ch0 == '/';
624        // Make sure it's not a comment.
625        if (ch1 != '/' && ch1 != '*') {
626            // Record beginning of literal.
627            final int start = position;
628            // Skip /.
629            skip(1);
630            boolean inBrackets = false;
631
632            // Scan for closing /, stopping at end of line.
633            while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) {
634                // Skip over escaped character.
635                if (ch0 == '\\') {
636                    skip(1);
637                    if (isEOL(ch0)) {
638                        reset(start);
639                        return false;
640                    }
641                    skip(1);
642                } else {
643                    if (ch0 == '[') {
644                        inBrackets = true;
645                    } else if (ch0 == ']') {
646                        inBrackets = false;
647                    }
648
649                    // Skip literal character.
650                    skip(1);
651                }
652            }
653
654            // If regex literal.
655            if (ch0 == '/') {
656                // Skip /.
657                skip(1);
658
659                // Skip over options.
660                while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') {
661                    skip(1);
662                }
663
664                // Add regex token.
665                add(REGEX, start);
666                // Regex literal detected.
667                return true;
668            }
669
670            // False start try again.
671            reset(start);
672        }
673
674        // Regex literal not detected.
675        return false;
676    }
677
678    /**
679     * Convert a digit to a integer.  Can't use Character.digit since we are
680     * restricted to ASCII by the spec.
681     *
682     * @param ch   Character to convert.
683     * @param base Numeric base.
684     *
685     * @return The converted digit or -1 if invalid.
686     */
687    protected static int convertDigit(final char ch, final int base) {
688        int digit;
689
690        if ('0' <= ch && ch <= '9') {
691            digit = ch - '0';
692        } else if ('A' <= ch && ch <= 'Z') {
693            digit = ch - 'A' + 10;
694        } else if ('a' <= ch && ch <= 'z') {
695            digit = ch - 'a' + 10;
696        } else {
697            return -1;
698        }
699
700        return digit < base ? digit : -1;
701    }
702
703
704    /**
705     * Get the value of a hexadecimal numeric sequence.
706     *
707     * @param length Number of digits.
708     * @param type   Type of token to report against.
709     * @return Value of sequence or < 0 if no digits.
710     */
711    private int hexSequence(final int length, final TokenType type) {
712        int value = 0;
713
714        for (int i = 0; i < length; i++) {
715            final int digit = convertDigit(ch0, 16);
716
717            if (digit == -1) {
718                error(Lexer.message("invalid.hex"), type, position, limit);
719                return i == 0 ? -1 : value;
720            }
721
722            value = digit | value << 4;
723            skip(1);
724        }
725
726        return value;
727    }
728
729    /**
730     * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255.
731     *
732     * @return Value of sequence.
733     */
734    private int octalSequence() {
735        int value = 0;
736
737        for (int i = 0; i < 3; i++) {
738            final int digit = convertDigit(ch0, 8);
739
740            if (digit == -1) {
741                break;
742            }
743            value = digit | value << 3;
744            skip(1);
745
746            if (i == 1 && value >= 32) {
747                break;
748            }
749        }
750        return value;
751    }
752
753    /**
754     * Convert a string to a JavaScript identifier.
755     *
756     * @param start  Position in source content.
757     * @param length Length of token.
758     * @return Ident string or null if an error.
759     */
760    private String valueOfIdent(final int start, final int length) throws RuntimeException {
761        // Save the current position.
762        final int savePosition = position;
763        // End of scan.
764        final int end = start + length;
765        // Reset to beginning of content.
766        reset(start);
767        // Buffer for recording characters.
768        final StringBuilder sb = new StringBuilder(length);
769
770        // Scan until end of line or end of file.
771        while (!atEOF() && position < end && !isEOL(ch0)) {
772            // If escape character.
773            if (ch0 == '\\' && ch1 == 'u') {
774                skip(2);
775                final int ch = hexSequence(4, TokenType.IDENT);
776                if (isWhitespace((char)ch)) {
777                    return null;
778                }
779                if (ch < 0) {
780                    sb.append('\\');
781                    sb.append('u');
782                } else {
783                    sb.append((char)ch);
784                }
785            } else {
786                // Add regular character.
787                sb.append(ch0);
788                skip(1);
789            }
790        }
791
792        // Restore position.
793        reset(savePosition);
794
795        return sb.toString();
796    }
797
798    /**
799     * Scan over and identifier or keyword. Handles identifiers containing
800     * encoded Unicode chars.
801     *
802     * Example:
803     *
804     * var \u0042 = 44;
805     */
806    private void scanIdentifierOrKeyword() {
807        // Record beginning of identifier.
808        final int start = position;
809        // Scan identifier.
810        final int length = scanIdentifier();
811        // Check to see if it is a keyword.
812        final TokenType type = TokenLookup.lookupKeyword(content, start, length);
813        if (type == FUNCTION && pauseOnFunctionBody) {
814            pauseOnNextLeftBrace = true;
815        }
816        // Add keyword or identifier token.
817        add(type, start);
818    }
819
820    /**
821     * Convert a string to a JavaScript string object.
822     *
823     * @param start  Position in source content.
824     * @param length Length of token.
825     * @return JavaScript string object.
826     */
827    private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException {
828        // Save the current position.
829        final int savePosition = position;
830        // Calculate the end position.
831        final int end = start + length;
832        // Reset to beginning of string.
833        reset(start);
834
835        // Buffer for recording characters.
836        final StringBuilder sb = new StringBuilder(length);
837
838        // Scan until end of string.
839        while (position < end) {
840            // If escape character.
841            if (ch0 == '\\') {
842                skip(1);
843
844                final char next = ch0;
845                final int afterSlash = position;
846
847                skip(1);
848
849                // Special characters.
850                switch (next) {
851                case '0':
852                case '1':
853                case '2':
854                case '3':
855                case '4':
856                case '5':
857                case '6':
858                case '7': {
859                    if (strict) {
860                        // "\0" itself is allowed in strict mode. Only other 'real'
861                        // octal escape sequences are not allowed (eg. "\02", "\31").
862                        // See section 7.8.4 String literals production EscapeSequence
863                        if (next != '0' || (ch0 >= '0' && ch0 <= '9')) {
864                            error(Lexer.message("strict.no.octal"), STRING, position, limit);
865                        }
866                    }
867                    reset(afterSlash);
868                    // Octal sequence.
869                    final int ch = octalSequence();
870
871                    if (ch < 0) {
872                        sb.append('\\');
873                        sb.append('x');
874                    } else {
875                        sb.append((char)ch);
876                    }
877                    break;
878                }
879                case 'n':
880                    sb.append('\n');
881                    break;
882                case 't':
883                    sb.append('\t');
884                    break;
885                case 'b':
886                    sb.append('\b');
887                    break;
888                case 'f':
889                    sb.append('\f');
890                    break;
891                case 'r':
892                    sb.append('\r');
893                    break;
894                case '\'':
895                    sb.append('\'');
896                    break;
897                case '\"':
898                    sb.append('\"');
899                    break;
900                case '\\':
901                    sb.append('\\');
902                    break;
903                case '\r': // CR | CRLF
904                    if (ch0 == '\n') {
905                        skip(1);
906                    }
907                    // fall through
908                case '\n': // LF
909                case '\u2028': // LS
910                case '\u2029': // PS
911                    // continue on the next line, slash-return continues string
912                    // literal
913                    break;
914                case 'x': {
915                    // Hex sequence.
916                    final int ch = hexSequence(2, STRING);
917
918                    if (ch < 0) {
919                        sb.append('\\');
920                        sb.append('x');
921                    } else {
922                        sb.append((char)ch);
923                    }
924                }
925                    break;
926                case 'u': {
927                    // Unicode sequence.
928                    final int ch = hexSequence(4, STRING);
929
930                    if (ch < 0) {
931                        sb.append('\\');
932                        sb.append('u');
933                    } else {
934                        sb.append((char)ch);
935                    }
936                }
937                    break;
938                case 'v':
939                    sb.append('\u000B');
940                    break;
941                // All other characters.
942                default:
943                    sb.append(next);
944                    break;
945                }
946            } else {
947                // Add regular character.
948                sb.append(ch0);
949                skip(1);
950            }
951        }
952
953        // Restore position.
954        reset(savePosition);
955
956        return sb.toString();
957    }
958
959    /**
960     * Scan over a string literal.
961     * @param add true if we nare not just scanning but should actually modify the token stream
962     */
963    protected void scanString(final boolean add) {
964        // Type of string.
965        TokenType type = STRING;
966        // Record starting quote.
967        final char quote = ch0;
968        // Skip over quote.
969        skip(1);
970
971        // Record beginning of string content.
972        final State stringState = saveState();
973
974        // Scan until close quote or end of line.
975        while (!atEOF() && ch0 != quote && !isEOL(ch0)) {
976            // Skip over escaped character.
977            if (ch0 == '\\') {
978                type = ESCSTRING;
979                skip(1);
980                if (! isEscapeCharacter(ch0)) {
981                    error(Lexer.message("invalid.escape.char"), STRING, position, limit);
982                }
983                if (isEOL(ch0)) {
984                    // Multiline string literal
985                    skipEOL(false);
986                    continue;
987                }
988            }
989            // Skip literal character.
990            skip(1);
991        }
992
993        // If close quote.
994        if (ch0 == quote) {
995            // Skip close quote.
996            skip(1);
997        } else {
998            error(Lexer.message("missing.close.quote"), STRING, position, limit);
999        }
1000
1001        // If not just scanning.
1002        if (add) {
1003            // Record end of string.
1004            stringState.setLimit(position - 1);
1005
1006            if (scripting && !stringState.isEmpty()) {
1007                switch (quote) {
1008                case '`':
1009                    // Mark the beginning of an exec string.
1010                    add(EXECSTRING, stringState.position, stringState.limit);
1011                    // Frame edit string with left brace.
1012                    add(LBRACE, stringState.position, stringState.position);
1013                    // Process edit string.
1014                    editString(type, stringState);
1015                    // Frame edit string with right brace.
1016                    add(RBRACE, stringState.limit, stringState.limit);
1017                    break;
1018                case '"':
1019                    // Only edit double quoted strings.
1020                    editString(type, stringState);
1021                    break;
1022                case '\'':
1023                    // Add string token without editing.
1024                    add(type, stringState.position, stringState.limit);
1025                    break;
1026                default:
1027                    break;
1028                }
1029            } else {
1030                /// Add string token without editing.
1031                add(type, stringState.position, stringState.limit);
1032            }
1033        }
1034    }
1035
1036    /**
1037     * Is the given character a valid escape char after "\" ?
1038     *
1039     * @param ch character to be checked
1040     * @return if the given character is valid after "\"
1041     */
1042    protected boolean isEscapeCharacter(final char ch) {
1043        return true;
1044    }
1045
1046    /**
1047     * Convert string to number.
1048     *
1049     * @param valueString  String to convert.
1050     * @param radix        Numeric base.
1051     * @return Converted number.
1052     */
1053    private static Number valueOf(final String valueString, final int radix) throws NumberFormatException {
1054        try {
1055            final long value = Long.parseLong(valueString, radix);
1056            if(value >= MIN_INT_L && value <= MAX_INT_L) {
1057                return (int)value;
1058            }
1059            return value;
1060        } catch (final NumberFormatException e) {
1061            if (radix == 10) {
1062                return Double.valueOf(valueString);
1063            }
1064
1065            double value = 0.0;
1066
1067            for (int i = 0; i < valueString.length(); i++) {
1068                final char ch = valueString.charAt(i);
1069                // Preverified, should always be a valid digit.
1070                final int digit = convertDigit(ch, radix);
1071                value *= radix;
1072                value += digit;
1073            }
1074
1075            return value;
1076        }
1077    }
1078
1079    /**
1080     * Scan a number.
1081     */
1082    protected void scanNumber() {
1083        // Record beginning of number.
1084        final int start = position;
1085        // Assume value is a decimal.
1086        TokenType type = DECIMAL;
1087
1088        // First digit of number.
1089        int digit = convertDigit(ch0, 10);
1090
1091        // If number begins with 0x.
1092        if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) {
1093            // Skip over 0xN.
1094            skip(3);
1095            // Skip over remaining digits.
1096            while (convertDigit(ch0, 16) != -1) {
1097                skip(1);
1098            }
1099
1100            type = HEXADECIMAL;
1101        } else if (digit == 0 && es6 && (ch1 == 'o' || ch1 == 'O') && convertDigit(ch2, 8) != -1) {
1102            // Skip over 0oN.
1103            skip(3);
1104            // Skip over remaining digits.
1105            while (convertDigit(ch0, 8) != -1) {
1106                skip(1);
1107            }
1108
1109            type = OCTAL;
1110        } else if (digit == 0 && es6 && (ch1 == 'b' || ch1 == 'B') && convertDigit(ch2, 2) != -1) {
1111            // Skip over 0bN.
1112            skip(3);
1113            // Skip over remaining digits.
1114            while (convertDigit(ch0, 2) != -1) {
1115                skip(1);
1116            }
1117
1118            type = BINARY_NUMBER;
1119        } else {
1120            // Check for possible octal constant.
1121            boolean octal = digit == 0;
1122            // Skip first digit if not leading '.'.
1123            if (digit != -1) {
1124                skip(1);
1125            }
1126
1127            // Skip remaining digits.
1128            while ((digit = convertDigit(ch0, 10)) != -1) {
1129                // Check octal only digits.
1130                octal = octal && digit < 8;
1131                // Skip digit.
1132                skip(1);
1133            }
1134
1135            if (octal && position - start > 1) {
1136                type = OCTAL_LEGACY;
1137            } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
1138                // Must be a double.
1139                if (ch0 == '.') {
1140                    // Skip period.
1141                    skip(1);
1142                    // Skip mantissa.
1143                    while (convertDigit(ch0, 10) != -1) {
1144                        skip(1);
1145                    }
1146                }
1147
1148                // Detect exponent.
1149                if (ch0 == 'E' || ch0 == 'e') {
1150                    // Skip E.
1151                    skip(1);
1152                    // Detect and skip exponent sign.
1153                    if (ch0 == '+' || ch0 == '-') {
1154                        skip(1);
1155                    }
1156                    // Skip exponent.
1157                    while (convertDigit(ch0, 10) != -1) {
1158                        skip(1);
1159                    }
1160                }
1161
1162                type = FLOATING;
1163            }
1164        }
1165
1166        if (Character.isJavaIdentifierStart(ch0)) {
1167            error(Lexer.message("missing.space.after.number"), type, position, 1);
1168        }
1169
1170        // Add number token.
1171        add(type, start);
1172    }
1173
1174    /**
1175     * Convert a regex token to a token object.
1176     *
1177     * @param start  Position in source content.
1178     * @param length Length of regex token.
1179     * @return Regex token object.
1180     */
1181    XMLToken valueOfXML(final int start, final int length) {
1182        return new XMLToken(source.getString(start, length));
1183    }
1184
1185    /**
1186     * Scan over a XML token.
1187     *
1188     * @return TRUE if is an XML literal.
1189     */
1190    private boolean scanXMLLiteral() {
1191        assert ch0 == '<' && Character.isJavaIdentifierStart(ch1);
1192        if (XML_LITERALS) {
1193            // Record beginning of xml expression.
1194            final int start = position;
1195
1196            int openCount = 0;
1197
1198            do {
1199                if (ch0 == '<') {
1200                    if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) {
1201                        skip(3);
1202                        openCount--;
1203                    } else if (Character.isJavaIdentifierStart(ch1)) {
1204                        skip(2);
1205                        openCount++;
1206                    } else if (ch1 == '?') {
1207                        skip(2);
1208                    } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') {
1209                        skip(4);
1210                    } else {
1211                        reset(start);
1212                        return false;
1213                    }
1214
1215                    while (!atEOF() && ch0 != '>') {
1216                        if (ch0 == '/' && ch1 == '>') {
1217                            openCount--;
1218                            skip(1);
1219                            break;
1220                        } else if (ch0 == '\"' || ch0 == '\'') {
1221                            scanString(false);
1222                        } else {
1223                            skip(1);
1224                        }
1225                    }
1226
1227                    if (ch0 != '>') {
1228                        reset(start);
1229                        return false;
1230                    }
1231
1232                    skip(1);
1233                } else if (atEOF()) {
1234                    reset(start);
1235                    return false;
1236                } else {
1237                    skip(1);
1238                }
1239            } while (openCount > 0);
1240
1241            add(XML, start);
1242            return true;
1243        }
1244
1245        return false;
1246    }
1247
1248    /**
1249     * Scan over identifier characters.
1250     *
1251     * @return Length of identifier or zero if none found.
1252     */
1253    private int scanIdentifier() {
1254        final int start = position;
1255
1256        // Make sure first character is valid start character.
1257        if (ch0 == '\\' && ch1 == 'u') {
1258            skip(2);
1259            final int ch = hexSequence(4, TokenType.IDENT);
1260
1261            if (!Character.isJavaIdentifierStart(ch)) {
1262                error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1263            }
1264        } else if (!Character.isJavaIdentifierStart(ch0)) {
1265            // Not an identifier.
1266            return 0;
1267        }
1268
1269        // Make sure remaining characters are valid part characters.
1270        while (!atEOF()) {
1271            if (ch0 == '\\' && ch1 == 'u') {
1272                skip(2);
1273                final int ch = hexSequence(4, TokenType.IDENT);
1274
1275                if (!Character.isJavaIdentifierPart(ch)) {
1276                    error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1277                }
1278            } else if (Character.isJavaIdentifierPart(ch0)) {
1279                skip(1);
1280            } else {
1281                break;
1282            }
1283        }
1284
1285        // Length of identifier sequence.
1286        return position - start;
1287    }
1288
1289    /**
1290     * Compare two identifiers (in content) for equality.
1291     *
1292     * @param aStart  Start of first identifier.
1293     * @param aLength Length of first identifier.
1294     * @param bStart  Start of second identifier.
1295     * @param bLength Length of second identifier.
1296     * @return True if equal.
1297     */
1298    private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) {
1299        if (aLength == bLength) {
1300            for (int i = 0; i < aLength; i++) {
1301                if (content[aStart + i] != content[bStart + i]) {
1302                    return false;
1303                }
1304            }
1305
1306            return true;
1307        }
1308
1309        return false;
1310    }
1311
1312    /**
1313     * Detect if a line starts with a marker identifier.
1314     *
1315     * @param identStart  Start of identifier.
1316     * @param identLength Length of identifier.
1317     * @return True if detected.
1318     */
1319    private boolean hasHereMarker(final int identStart, final int identLength) {
1320        // Skip any whitespace.
1321        skipWhitespace(false);
1322
1323        return identifierEqual(identStart, identLength, position, scanIdentifier());
1324    }
1325
1326    /**
1327     * Lexer to service edit strings.
1328     */
1329    private static class EditStringLexer extends Lexer {
1330        /** Type of string literals to emit. */
1331        final TokenType stringType;
1332
1333        /*
1334         * Constructor.
1335         */
1336
1337        EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) {
1338            super(lexer, stringState);
1339
1340            this.stringType = stringType;
1341        }
1342
1343        /**
1344         * Lexify the contents of the string.
1345         */
1346        @Override
1347        public void lexify() {
1348            // Record start of string position.
1349            int stringStart = position;
1350            // Indicate that the priming first string has not been emitted.
1351            boolean primed = false;
1352
1353            while (true) {
1354                // Detect end of content.
1355                if (atEOF()) {
1356                    break;
1357                }
1358
1359                // Honour escapes (should be well formed.)
1360                if (ch0 == '\\' && stringType == ESCSTRING) {
1361                    skip(2);
1362
1363                    continue;
1364                }
1365
1366                // If start of expression.
1367                if (ch0 == '$' && ch1 == '{') {
1368                    if (!primed || stringStart != position) {
1369                        if (primed) {
1370                            add(ADD, stringStart, stringStart + 1);
1371                        }
1372
1373                        add(stringType, stringStart, position);
1374                        primed = true;
1375                    }
1376
1377                    // Skip ${
1378                    skip(2);
1379
1380                    // Save expression state.
1381                    final State expressionState = saveState();
1382
1383                    // Start with one open brace.
1384                    int braceCount = 1;
1385
1386                    // Scan for the rest of the string.
1387                    while (!atEOF()) {
1388                        // If closing brace.
1389                        if (ch0 == '}') {
1390                            // Break only only if matching brace.
1391                            if (--braceCount == 0) {
1392                                break;
1393                            }
1394                        } else if (ch0 == '{') {
1395                            // Bump up the brace count.
1396                            braceCount++;
1397                        }
1398
1399                        // Skip to next character.
1400                        skip(1);
1401                    }
1402
1403                    // If braces don't match then report an error.
1404                    if (braceCount != 0) {
1405                        error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1);
1406                    }
1407
1408                    // Mark end of expression.
1409                    expressionState.setLimit(position);
1410                    // Skip closing brace.
1411                    skip(1);
1412
1413                    // Start next string.
1414                    stringStart = position;
1415
1416                    // Concatenate expression.
1417                    add(ADD, expressionState.position, expressionState.position + 1);
1418                    add(LPAREN, expressionState.position, expressionState.position + 1);
1419
1420                    // Scan expression.
1421                    final Lexer lexer = new Lexer(this, expressionState);
1422                    lexer.lexify();
1423
1424                    // Close out expression parenthesis.
1425                    add(RPAREN, position - 1, position);
1426
1427                    continue;
1428                }
1429
1430                // Next character in string.
1431                skip(1);
1432            }
1433
1434            // If there is any unemitted string portion.
1435            if (stringStart != limit) {
1436                // Concatenate remaining string.
1437                if (primed) {
1438                    add(ADD, stringStart, 1);
1439                }
1440
1441                add(stringType, stringStart, limit);
1442            }
1443        }
1444
1445    }
1446
1447    /**
1448     * Edit string for nested expressions.
1449     *
1450     * @param stringType  Type of string literals to emit.
1451     * @param stringState State of lexer at start of string.
1452     */
1453    private void editString(final TokenType stringType, final State stringState) {
1454        // Use special lexer to scan string.
1455        final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState);
1456        lexer.lexify();
1457
1458        // Need to keep lexer informed.
1459        last = stringType;
1460    }
1461
1462    /**
1463     * Scan over a here string.
1464     *
1465     * @return TRUE if is a here string.
1466     */
1467    private boolean scanHereString(final LineInfoReceiver lir) {
1468        assert ch0 == '<' && ch1 == '<';
1469        if (scripting) {
1470            // Record beginning of here string.
1471            final State saved = saveState();
1472
1473            // << or <<<
1474            final boolean excludeLastEOL = ch2 != '<';
1475
1476            if (excludeLastEOL) {
1477                skip(2);
1478            } else {
1479                skip(3);
1480            }
1481
1482            // Scan identifier. It might be quoted, indicating that no string editing should take place.
1483            final char quoteChar = ch0;
1484            final boolean noStringEditing = quoteChar == '"' || quoteChar == '\'';
1485            if (noStringEditing) {
1486                skip(1);
1487            }
1488            final int identStart = position;
1489            final int identLength = scanIdentifier();
1490            if (noStringEditing) {
1491                if (ch0 != quoteChar) {
1492                    error(Lexer.message("here.non.matching.delimiter"), last, position, position);
1493                    restoreState(saved);
1494                    return false;
1495                }
1496                skip(1);
1497            }
1498
1499            // Check for identifier.
1500            if (identLength == 0) {
1501                // Treat as shift.
1502                restoreState(saved);
1503
1504                return false;
1505            }
1506
1507            // Record rest of line.
1508            final State restState = saveState();
1509            // keep line number updated
1510            int lastLine = line;
1511
1512            skipLine(false);
1513            lastLine++;
1514            int lastLinePosition = position;
1515            restState.setLimit(position);
1516
1517            // Record beginning of string.
1518            final State stringState = saveState();
1519            int stringEnd = position;
1520
1521            // Hunt down marker.
1522            while (!atEOF()) {
1523                // Skip any whitespace.
1524                skipWhitespace(false);
1525
1526                if (hasHereMarker(identStart, identLength)) {
1527                    break;
1528                }
1529
1530                skipLine(false);
1531                lastLine++;
1532                lastLinePosition = position;
1533                stringEnd = position;
1534            }
1535
1536            // notify last line information
1537            lir.lineInfo(lastLine, lastLinePosition);
1538
1539            // Record end of string.
1540            stringState.setLimit(stringEnd);
1541
1542            // If marker is missing.
1543            if (stringState.isEmpty() || atEOF()) {
1544                error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position);
1545                restoreState(saved);
1546
1547                return false;
1548            }
1549
1550            // Remove last end of line if specified.
1551            if (excludeLastEOL) {
1552                // Handles \n.
1553                if (content[stringEnd - 1] == '\n') {
1554                    stringEnd--;
1555                }
1556
1557                // Handles \r and \r\n.
1558                if (content[stringEnd - 1] == '\r') {
1559                    stringEnd--;
1560                }
1561
1562                // Update end of string.
1563                stringState.setLimit(stringEnd);
1564            }
1565
1566            // Edit string if appropriate.
1567            if (!noStringEditing && !stringState.isEmpty()) {
1568                editString(STRING, stringState);
1569            } else {
1570                // Add here string.
1571                add(STRING, stringState.position, stringState.limit);
1572            }
1573
1574            // Scan rest of original line.
1575            final Lexer restLexer = new Lexer(this, restState);
1576
1577            restLexer.lexify();
1578
1579            return true;
1580        }
1581
1582        return false;
1583    }
1584
1585    /**
1586     * Breaks source content down into lex units, adding tokens to the token
1587     * stream. The routine scans until the stream buffer is full. Can be called
1588     * repeatedly until EOF is detected.
1589     */
1590    public void lexify() {
1591        while (!stream.isFull() || nested) {
1592            // Skip over whitespace.
1593            skipWhitespace(true);
1594
1595            // Detect end of file.
1596            if (atEOF()) {
1597                if (!nested) {
1598                    // Add an EOF token at the end.
1599                    add(EOF, position);
1600                }
1601
1602                break;
1603            }
1604
1605            // Check for comments. Note that we don't scan for regexp and other literals here as
1606            // we may not have enough context to distinguish them from similar looking operators.
1607            // Instead we break on ambiguous operators below and let the parser decide.
1608            if (ch0 == '/' && skipComments()) {
1609                continue;
1610            }
1611
1612            if (scripting && ch0 == '#' && skipComments()) {
1613                continue;
1614            }
1615
1616            // TokenType for lookup of delimiter or operator.
1617            TokenType type;
1618
1619            if (ch0 == '.' && convertDigit(ch1, 10) != -1) {
1620                // '.' followed by digit.
1621                // Scan and add a number.
1622                scanNumber();
1623            } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {
1624                // Get the number of characters in the token.
1625                final int typeLength = type.getLength();
1626                // Skip that many characters.
1627                skip(typeLength);
1628                // Add operator token.
1629                add(type, position - typeLength);
1630                // Some operator tokens also mark the beginning of regexp, XML, or here string literals.
1631                // We break to let the parser decide what it is.
1632                if (canStartLiteral(type)) {
1633                    break;
1634                } else if (type == LBRACE && pauseOnNextLeftBrace) {
1635                    pauseOnNextLeftBrace = false;
1636                    break;
1637                }
1638            } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') {
1639                // Scan and add identifier or keyword.
1640                scanIdentifierOrKeyword();
1641            } else if (isStringDelimiter(ch0)) {
1642                // Scan and add a string.
1643                scanString(true);
1644            } else if (Character.isDigit(ch0)) {
1645                // Scan and add a number.
1646                scanNumber();
1647            } else {
1648                // Don't recognize this character.
1649                skip(1);
1650                add(ERROR, position - 1);
1651            }
1652        }
1653    }
1654
1655    /**
1656     * Return value of token given its token descriptor.
1657     *
1658     * @param token  Token descriptor.
1659     * @return JavaScript value.
1660     */
1661    Object getValueOf(final long token, final boolean strict) {
1662        final int start = Token.descPosition(token);
1663        final int len   = Token.descLength(token);
1664
1665        switch (Token.descType(token)) {
1666        case DECIMAL:
1667            return Lexer.valueOf(source.getString(start, len), 10); // number
1668        case HEXADECIMAL:
1669            return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number
1670        case OCTAL_LEGACY:
1671            return Lexer.valueOf(source.getString(start, len), 8); // number
1672        case OCTAL:
1673            return Lexer.valueOf(source.getString(start + 2, len - 2), 8); // number
1674        case BINARY_NUMBER:
1675            return Lexer.valueOf(source.getString(start + 2, len - 2), 2); // number
1676        case FLOATING:
1677            final String str   = source.getString(start, len);
1678            final double value = Double.valueOf(str);
1679            if (str.indexOf('.') != -1) {
1680                return value; //number
1681            }
1682            //anything without an explicit decimal point is still subject to a
1683            //"representable as int or long" check. Then the programmer does not
1684            //explicitly code something as a double. For example new Color(int, int, int)
1685            //and new Color(float, float, float) will get ambiguous for cases like
1686            //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point.
1687            //yet we don't want e.g. 1e6 to be a double unnecessarily
1688            if (JSType.isStrictlyRepresentableAsInt(value)) {
1689                return (int)value;
1690            } else if (JSType.isStrictlyRepresentableAsLong(value)) {
1691                return (long)value;
1692            }
1693            return value;
1694        case STRING:
1695            return source.getString(start, len); // String
1696        case ESCSTRING:
1697            return valueOfString(start, len, strict); // String
1698        case IDENT:
1699            return valueOfIdent(start, len); // String
1700        case REGEX:
1701            return valueOfPattern(start, len); // RegexToken::LexerToken
1702        case XML:
1703            return valueOfXML(start, len); // XMLToken::LexerToken
1704        case DIRECTIVE_COMMENT:
1705            return source.getString(start, len);
1706        default:
1707            break;
1708        }
1709
1710        return null;
1711    }
1712
1713    /**
1714     * Get the correctly localized error message for a given message id format arguments
1715     * @param msgId message id
1716     * @param args  format arguments
1717     * @return message
1718     */
1719    protected static String message(final String msgId, final String... args) {
1720        return ECMAErrors.getMessage("lexer.error." + msgId, args);
1721    }
1722
1723    /**
1724     * Generate a runtime exception
1725     *
1726     * @param message       error message
1727     * @param type          token type
1728     * @param start         start position of lexed error
1729     * @param length        length of lexed error
1730     * @throws ParserException  unconditionally
1731     */
1732    protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException {
1733        final long token     = Token.toDesc(type, start, length);
1734        final int  pos       = Token.descPosition(token);
1735        final int  lineNum   = source.getLine(pos);
1736        final int  columnNum = source.getColumn(pos);
1737        final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token);
1738        throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token);
1739    }
1740
1741    /**
1742     * Helper class for Lexer tokens, e.g XML or RegExp tokens.
1743     * This is the abstract superclass
1744     */
1745    public static abstract class LexerToken implements Serializable {
1746        private static final long serialVersionUID = 1L;
1747
1748        private final String expression;
1749
1750        /**
1751         * Constructor
1752         * @param expression token expression
1753         */
1754        protected LexerToken(final String expression) {
1755            this.expression = expression;
1756        }
1757
1758        /**
1759         * Get the expression
1760         * @return expression
1761         */
1762        public String getExpression() {
1763            return expression;
1764        }
1765    }
1766
1767    /**
1768     * Temporary container for regular expressions.
1769     */
1770    public static class RegexToken extends LexerToken {
1771        private static final long serialVersionUID = 1L;
1772
1773        /** Options. */
1774        private final String options;
1775
1776        /**
1777         * Constructor.
1778         *
1779         * @param expression  regexp expression
1780         * @param options     regexp options
1781         */
1782        public RegexToken(final String expression, final String options) {
1783            super(expression);
1784            this.options = options;
1785        }
1786
1787        /**
1788         * Get regexp options
1789         * @return options
1790         */
1791        public String getOptions() {
1792            return options;
1793        }
1794
1795        @Override
1796        public String toString() {
1797            return '/' + getExpression() + '/' + options;
1798        }
1799    }
1800
1801    /**
1802     * Temporary container for XML expression.
1803     */
1804    public static class XMLToken extends LexerToken {
1805        private static final long serialVersionUID = 1L;
1806
1807        /**
1808         * Constructor.
1809         *
1810         * @param expression  XML expression
1811         */
1812        public XMLToken(final String expression) {
1813            super(expression);
1814        }
1815    }
1816}
1817