Lexer.java revision 1408:ac8a32176cbe
1/* 2 * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package jdk.nashorn.internal.parser; 27 28import static jdk.nashorn.internal.parser.TokenType.ADD; 29import static jdk.nashorn.internal.parser.TokenType.BINARY_NUMBER; 30import static jdk.nashorn.internal.parser.TokenType.COMMENT; 31import static jdk.nashorn.internal.parser.TokenType.DECIMAL; 32import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT; 33import static jdk.nashorn.internal.parser.TokenType.EOF; 34import static jdk.nashorn.internal.parser.TokenType.EOL; 35import static jdk.nashorn.internal.parser.TokenType.ERROR; 36import static jdk.nashorn.internal.parser.TokenType.ESCSTRING; 37import static jdk.nashorn.internal.parser.TokenType.EXECSTRING; 38import static jdk.nashorn.internal.parser.TokenType.FLOATING; 39import static jdk.nashorn.internal.parser.TokenType.FUNCTION; 40import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL; 41import static jdk.nashorn.internal.parser.TokenType.LBRACE; 42import static jdk.nashorn.internal.parser.TokenType.LPAREN; 43import static jdk.nashorn.internal.parser.TokenType.OCTAL; 44import static jdk.nashorn.internal.parser.TokenType.OCTAL_LEGACY; 45import static jdk.nashorn.internal.parser.TokenType.RBRACE; 46import static jdk.nashorn.internal.parser.TokenType.REGEX; 47import static jdk.nashorn.internal.parser.TokenType.RPAREN; 48import static jdk.nashorn.internal.parser.TokenType.STRING; 49import static jdk.nashorn.internal.parser.TokenType.XML; 50 51import java.io.Serializable; 52 53import jdk.nashorn.internal.runtime.ECMAErrors; 54import jdk.nashorn.internal.runtime.ErrorManager; 55import jdk.nashorn.internal.runtime.JSErrorType; 56import jdk.nashorn.internal.runtime.JSType; 57import jdk.nashorn.internal.runtime.ParserException; 58import jdk.nashorn.internal.runtime.Source; 59import jdk.nashorn.internal.runtime.options.Options; 60 61/** 62 * Responsible for converting source content into a stream of tokens. 63 * 64 */ 65@SuppressWarnings("fallthrough") 66public class Lexer extends Scanner { 67 private static final long MIN_INT_L = Integer.MIN_VALUE; 68 private static final long MAX_INT_L = Integer.MAX_VALUE; 69 70 private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals"); 71 72 /** Content source. */ 73 private final Source source; 74 75 /** Buffered stream for tokens. */ 76 private final TokenStream stream; 77 78 /** True if here and edit strings are supported. */ 79 private final boolean scripting; 80 81 /** True if parsing in ECMAScript 6 mode. */ 82 private final boolean es6; 83 84 /** True if a nested scan. (scan to completion, no EOF.) */ 85 private final boolean nested; 86 87 /** Pending new line number and position. */ 88 int pendingLine; 89 90 /** Position of last EOL + 1. */ 91 private int linePosition; 92 93 /** Type of last token added. */ 94 private TokenType last; 95 96 private final boolean pauseOnFunctionBody; 97 private boolean pauseOnNextLeftBrace; 98 99 private static final String SPACETAB = " \t"; // ASCII space and tab 100 private static final String LFCR = "\n\r"; // line feed and carriage return (ctrl-m) 101 102 private static final String JAVASCRIPT_WHITESPACE_EOL = 103 LFCR + 104 "\u2028" + // line separator 105 "\u2029" // paragraph separator 106 ; 107 private static final String JAVASCRIPT_WHITESPACE = 108 SPACETAB + 109 JAVASCRIPT_WHITESPACE_EOL + 110 "\u000b" + // tabulation line 111 "\u000c" + // ff (ctrl-l) 112 "\u00a0" + // Latin-1 space 113 "\u1680" + // Ogham space mark 114 "\u180e" + // separator, Mongolian vowel 115 "\u2000" + // en quad 116 "\u2001" + // em quad 117 "\u2002" + // en space 118 "\u2003" + // em space 119 "\u2004" + // three-per-em space 120 "\u2005" + // four-per-em space 121 "\u2006" + // six-per-em space 122 "\u2007" + // figure space 123 "\u2008" + // punctuation space 124 "\u2009" + // thin space 125 "\u200a" + // hair space 126 "\u202f" + // narrow no-break space 127 "\u205f" + // medium mathematical space 128 "\u3000" + // ideographic space 129 "\ufeff" // byte order mark 130 ; 131 132 private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP = 133 "\\u000a" + // line feed 134 "\\u000d" + // carriage return (ctrl-m) 135 "\\u2028" + // line separator 136 "\\u2029" + // paragraph separator 137 "\\u0009" + // tab 138 "\\u0020" + // ASCII space 139 "\\u000b" + // tabulation line 140 "\\u000c" + // ff (ctrl-l) 141 "\\u00a0" + // Latin-1 space 142 "\\u1680" + // Ogham space mark 143 "\\u180e" + // separator, Mongolian vowel 144 "\\u2000" + // en quad 145 "\\u2001" + // em quad 146 "\\u2002" + // en space 147 "\\u2003" + // em space 148 "\\u2004" + // three-per-em space 149 "\\u2005" + // four-per-em space 150 "\\u2006" + // six-per-em space 151 "\\u2007" + // figure space 152 "\\u2008" + // punctuation space 153 "\\u2009" + // thin space 154 "\\u200a" + // hair space 155 "\\u202f" + // narrow no-break space 156 "\\u205f" + // medium mathematical space 157 "\\u3000" + // ideographic space 158 "\\ufeff" // byte order mark 159 ; 160 161 static String unicodeEscape(final char ch) { 162 final StringBuilder sb = new StringBuilder(); 163 164 sb.append("\\u"); 165 166 final String hex = Integer.toHexString(ch); 167 for (int i = hex.length(); i < 4; i++) { 168 sb.append('0'); 169 } 170 sb.append(hex); 171 172 return sb.toString(); 173 } 174 175 /** 176 * Constructor 177 * 178 * @param source the source 179 * @param stream the token stream to lex 180 */ 181 public Lexer(final Source source, final TokenStream stream) { 182 this(source, stream, false, false); 183 } 184 185 /** 186 * Constructor 187 * 188 * @param source the source 189 * @param stream the token stream to lex 190 * @param scripting are we in scripting mode 191 * @param es6 are we in ECMAScript 6 mode 192 */ 193 public Lexer(final Source source, final TokenStream stream, final boolean scripting, final boolean es6) { 194 this(source, 0, source.getLength(), stream, scripting, es6, false); 195 } 196 197 /** 198 * Constructor 199 * 200 * @param source the source 201 * @param start start position in source from which to start lexing 202 * @param len length of source segment to lex 203 * @param stream token stream to lex 204 * @param scripting are we in scripting mode 205 * @param es6 are we in ECMAScript 6 mode 206 * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a 207 * function body. This is used with the feature where the parser is skipping nested function bodies to 208 * avoid reading ahead unnecessarily when we skip the function bodies. 209 */ 210 211 public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean es6, final boolean pauseOnFunctionBody) { 212 super(source.getContent(), 1, start, len); 213 this.source = source; 214 this.stream = stream; 215 this.scripting = scripting; 216 this.es6 = es6; 217 this.nested = false; 218 this.pendingLine = 1; 219 this.last = EOL; 220 221 this.pauseOnFunctionBody = pauseOnFunctionBody; 222 } 223 224 private Lexer(final Lexer lexer, final State state) { 225 super(lexer, state); 226 227 source = lexer.source; 228 stream = lexer.stream; 229 scripting = lexer.scripting; 230 es6 = lexer.es6; 231 nested = true; 232 233 pendingLine = state.pendingLine; 234 linePosition = state.linePosition; 235 last = EOL; 236 pauseOnFunctionBody = false; 237 } 238 239 static class State extends Scanner.State { 240 /** Pending new line number and position. */ 241 public final int pendingLine; 242 243 /** Position of last EOL + 1. */ 244 public final int linePosition; 245 246 /** Type of last token added. */ 247 public final TokenType last; 248 249 /* 250 * Constructor. 251 */ 252 253 State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) { 254 super(position, limit, line); 255 256 this.pendingLine = pendingLine; 257 this.linePosition = linePosition; 258 this.last = last; 259 } 260 } 261 262 /** 263 * Save the state of the scan. 264 * 265 * @return Captured state. 266 */ 267 @Override 268 State saveState() { 269 return new State(position, limit, line, pendingLine, linePosition, last); 270 } 271 272 /** 273 * Restore the state of the scan. 274 * 275 * @param state 276 * Captured state. 277 */ 278 void restoreState(final State state) { 279 super.restoreState(state); 280 281 pendingLine = state.pendingLine; 282 linePosition = state.linePosition; 283 last = state.last; 284 } 285 286 /** 287 * Add a new token to the stream. 288 * 289 * @param type 290 * Token type. 291 * @param start 292 * Start position. 293 * @param end 294 * End position. 295 */ 296 protected void add(final TokenType type, final int start, final int end) { 297 // Record last token. 298 last = type; 299 300 // Only emit the last EOL in a cluster. 301 if (type == EOL) { 302 pendingLine = end; 303 linePosition = start; 304 } else { 305 // Write any pending EOL to stream. 306 if (pendingLine != -1) { 307 stream.put(Token.toDesc(EOL, linePosition, pendingLine)); 308 pendingLine = -1; 309 } 310 311 // Write token to stream. 312 stream.put(Token.toDesc(type, start, end - start)); 313 } 314 } 315 316 /** 317 * Add a new token to the stream. 318 * 319 * @param type 320 * Token type. 321 * @param start 322 * Start position. 323 */ 324 protected void add(final TokenType type, final int start) { 325 add(type, start, position); 326 } 327 328 /** 329 * Return the String of valid whitespace characters for regular 330 * expressions in JavaScript 331 * @return regexp whitespace string 332 */ 333 public static String getWhitespaceRegExp() { 334 return JAVASCRIPT_WHITESPACE_IN_REGEXP; 335 } 336 337 /** 338 * Skip end of line. 339 * 340 * @param addEOL true if EOL token should be recorded. 341 */ 342 private void skipEOL(final boolean addEOL) { 343 344 if (ch0 == '\r') { // detect \r\n pattern 345 skip(1); 346 if (ch0 == '\n') { 347 skip(1); 348 } 349 } else { // all other space, ch0 is guaranteed to be EOL or \0 350 skip(1); 351 } 352 353 // bump up line count 354 line++; 355 356 if (addEOL) { 357 // Add an EOL token. 358 add(EOL, position, line); 359 } 360 } 361 362 /** 363 * Skip over rest of line including end of line. 364 * 365 * @param addEOL true if EOL token should be recorded. 366 */ 367 private void skipLine(final boolean addEOL) { 368 // Ignore characters. 369 while (!isEOL(ch0) && !atEOF()) { 370 skip(1); 371 } 372 // Skip over end of line. 373 skipEOL(addEOL); 374 } 375 376 /** 377 * Test whether a char is valid JavaScript whitespace 378 * @param ch a char 379 * @return true if valid JavaScript whitespace 380 */ 381 public static boolean isJSWhitespace(final char ch) { 382 return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1; 383 } 384 385 /** 386 * Test whether a char is valid JavaScript end of line 387 * @param ch a char 388 * @return true if valid JavaScript end of line 389 */ 390 public static boolean isJSEOL(final char ch) { 391 return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1; 392 } 393 394 /** 395 * Test if char is a string delimiter, e.g. '\' or '"'. Also scans exec 396 * strings ('`') in scripting mode. 397 * @param ch a char 398 * @return true if string delimiter 399 */ 400 protected boolean isStringDelimiter(final char ch) { 401 return ch == '\'' || ch == '"' || (scripting && ch == '`'); 402 } 403 404 /** 405 * Test whether a char is valid JavaScript whitespace 406 * @param ch a char 407 * @return true if valid JavaScript whitespace 408 */ 409 protected boolean isWhitespace(final char ch) { 410 return Lexer.isJSWhitespace(ch); 411 } 412 413 /** 414 * Test whether a char is valid JavaScript end of line 415 * @param ch a char 416 * @return true if valid JavaScript end of line 417 */ 418 protected boolean isEOL(final char ch) { 419 return Lexer.isJSEOL(ch); 420 } 421 422 /** 423 * Skip over whitespace and detect end of line, adding EOL tokens if 424 * encountered. 425 * 426 * @param addEOL true if EOL tokens should be recorded. 427 */ 428 private void skipWhitespace(final boolean addEOL) { 429 while (isWhitespace(ch0)) { 430 if (isEOL(ch0)) { 431 skipEOL(addEOL); 432 } else { 433 skip(1); 434 } 435 } 436 } 437 438 /** 439 * Skip over comments. 440 * 441 * @return True if a comment. 442 */ 443 protected boolean skipComments() { 444 // Save the current position. 445 final int start = position; 446 447 if (ch0 == '/') { 448 // Is it a // comment. 449 if (ch1 == '/') { 450 // Skip over //. 451 skip(2); 452 453 boolean directiveComment = false; 454 if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) { 455 directiveComment = true; 456 } 457 458 // Scan for EOL. 459 while (!atEOF() && !isEOL(ch0)) { 460 skip(1); 461 } 462 // Did detect a comment. 463 add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start); 464 return true; 465 } else if (ch1 == '*') { 466 // Skip over /*. 467 skip(2); 468 // Scan for */. 469 while (!atEOF() && !(ch0 == '*' && ch1 == '/')) { 470 // If end of line handle else skip character. 471 if (isEOL(ch0)) { 472 skipEOL(true); 473 } else { 474 skip(1); 475 } 476 } 477 478 if (atEOF()) { 479 // TODO - Report closing */ missing in parser. 480 add(ERROR, start); 481 } else { 482 // Skip */. 483 skip(2); 484 } 485 486 // Did detect a comment. 487 add(COMMENT, start); 488 return true; 489 } 490 } else if (ch0 == '#') { 491 assert scripting; 492 // shell style comment 493 // Skip over #. 494 skip(1); 495 // Scan for EOL. 496 while (!atEOF() && !isEOL(ch0)) { 497 skip(1); 498 } 499 // Did detect a comment. 500 add(COMMENT, start); 501 return true; 502 } 503 504 // Not a comment. 505 return false; 506 } 507 508 /** 509 * Convert a regex token to a token object. 510 * 511 * @param start Position in source content. 512 * @param length Length of regex token. 513 * @return Regex token object. 514 */ 515 public RegexToken valueOfPattern(final int start, final int length) { 516 // Save the current position. 517 final int savePosition = position; 518 // Reset to beginning of content. 519 reset(start); 520 // Buffer for recording characters. 521 final StringBuilder sb = new StringBuilder(length); 522 523 // Skip /. 524 skip(1); 525 boolean inBrackets = false; 526 // Scan for closing /, stopping at end of line. 527 while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) { 528 // Skip over escaped character. 529 if (ch0 == '\\') { 530 sb.append(ch0); 531 sb.append(ch1); 532 skip(2); 533 } else { 534 if (ch0 == '[') { 535 inBrackets = true; 536 } else if (ch0 == ']') { 537 inBrackets = false; 538 } 539 540 // Skip literal character. 541 sb.append(ch0); 542 skip(1); 543 } 544 } 545 546 // Get pattern as string. 547 final String regex = sb.toString(); 548 549 // Skip /. 550 skip(1); 551 552 // Options as string. 553 final String options = source.getString(position, scanIdentifier()); 554 555 reset(savePosition); 556 557 // Compile the pattern. 558 return new RegexToken(regex, options); 559 } 560 561 /** 562 * Return true if the given token can be the beginning of a literal. 563 * 564 * @param token a token 565 * @return true if token can start a literal. 566 */ 567 public boolean canStartLiteral(final TokenType token) { 568 return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<')); 569 } 570 571 /** 572 * interface to receive line information for multi-line literals. 573 */ 574 protected interface LineInfoReceiver { 575 /** 576 * Receives line information 577 * @param line last line number 578 * @param linePosition position of last line 579 */ 580 public void lineInfo(int line, int linePosition); 581 } 582 583 /** 584 * Check whether the given token represents the beginning of a literal. If so scan 585 * the literal and return <tt>true</tt>, otherwise return false. 586 * 587 * @param token the token. 588 * @param startTokenType the token type. 589 * @param lir LineInfoReceiver that receives line info for multi-line string literals. 590 * @return True if a literal beginning with startToken was found and scanned. 591 */ 592 protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) { 593 // Check if it can be a literal. 594 if (!canStartLiteral(startTokenType)) { 595 return false; 596 } 597 // We break on ambiguous tokens so if we already moved on it can't be a literal. 598 if (stream.get(stream.last()) != token) { 599 return false; 600 } 601 // Rewind to token start position 602 reset(Token.descPosition(token)); 603 604 if (ch0 == '/') { 605 return scanRegEx(); 606 } else if (ch0 == '<') { 607 if (ch1 == '<') { 608 return scanHereString(lir); 609 } else if (Character.isJavaIdentifierStart(ch1)) { 610 return scanXMLLiteral(); 611 } 612 } 613 614 return false; 615 } 616 617 /** 618 * Scan over regex literal. 619 * 620 * @return True if a regex literal. 621 */ 622 private boolean scanRegEx() { 623 assert ch0 == '/'; 624 // Make sure it's not a comment. 625 if (ch1 != '/' && ch1 != '*') { 626 // Record beginning of literal. 627 final int start = position; 628 // Skip /. 629 skip(1); 630 boolean inBrackets = false; 631 632 // Scan for closing /, stopping at end of line. 633 while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) { 634 // Skip over escaped character. 635 if (ch0 == '\\') { 636 skip(1); 637 if (isEOL(ch0)) { 638 reset(start); 639 return false; 640 } 641 skip(1); 642 } else { 643 if (ch0 == '[') { 644 inBrackets = true; 645 } else if (ch0 == ']') { 646 inBrackets = false; 647 } 648 649 // Skip literal character. 650 skip(1); 651 } 652 } 653 654 // If regex literal. 655 if (ch0 == '/') { 656 // Skip /. 657 skip(1); 658 659 // Skip over options. 660 while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') { 661 skip(1); 662 } 663 664 // Add regex token. 665 add(REGEX, start); 666 // Regex literal detected. 667 return true; 668 } 669 670 // False start try again. 671 reset(start); 672 } 673 674 // Regex literal not detected. 675 return false; 676 } 677 678 /** 679 * Convert a digit to a integer. Can't use Character.digit since we are 680 * restricted to ASCII by the spec. 681 * 682 * @param ch Character to convert. 683 * @param base Numeric base. 684 * 685 * @return The converted digit or -1 if invalid. 686 */ 687 protected static int convertDigit(final char ch, final int base) { 688 int digit; 689 690 if ('0' <= ch && ch <= '9') { 691 digit = ch - '0'; 692 } else if ('A' <= ch && ch <= 'Z') { 693 digit = ch - 'A' + 10; 694 } else if ('a' <= ch && ch <= 'z') { 695 digit = ch - 'a' + 10; 696 } else { 697 return -1; 698 } 699 700 return digit < base ? digit : -1; 701 } 702 703 704 /** 705 * Get the value of a hexadecimal numeric sequence. 706 * 707 * @param length Number of digits. 708 * @param type Type of token to report against. 709 * @return Value of sequence or < 0 if no digits. 710 */ 711 private int hexSequence(final int length, final TokenType type) { 712 int value = 0; 713 714 for (int i = 0; i < length; i++) { 715 final int digit = convertDigit(ch0, 16); 716 717 if (digit == -1) { 718 error(Lexer.message("invalid.hex"), type, position, limit); 719 return i == 0 ? -1 : value; 720 } 721 722 value = digit | value << 4; 723 skip(1); 724 } 725 726 return value; 727 } 728 729 /** 730 * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255. 731 * 732 * @return Value of sequence. 733 */ 734 private int octalSequence() { 735 int value = 0; 736 737 for (int i = 0; i < 3; i++) { 738 final int digit = convertDigit(ch0, 8); 739 740 if (digit == -1) { 741 break; 742 } 743 value = digit | value << 3; 744 skip(1); 745 746 if (i == 1 && value >= 32) { 747 break; 748 } 749 } 750 return value; 751 } 752 753 /** 754 * Convert a string to a JavaScript identifier. 755 * 756 * @param start Position in source content. 757 * @param length Length of token. 758 * @return Ident string or null if an error. 759 */ 760 private String valueOfIdent(final int start, final int length) throws RuntimeException { 761 // Save the current position. 762 final int savePosition = position; 763 // End of scan. 764 final int end = start + length; 765 // Reset to beginning of content. 766 reset(start); 767 // Buffer for recording characters. 768 final StringBuilder sb = new StringBuilder(length); 769 770 // Scan until end of line or end of file. 771 while (!atEOF() && position < end && !isEOL(ch0)) { 772 // If escape character. 773 if (ch0 == '\\' && ch1 == 'u') { 774 skip(2); 775 final int ch = hexSequence(4, TokenType.IDENT); 776 if (isWhitespace((char)ch)) { 777 return null; 778 } 779 if (ch < 0) { 780 sb.append('\\'); 781 sb.append('u'); 782 } else { 783 sb.append((char)ch); 784 } 785 } else { 786 // Add regular character. 787 sb.append(ch0); 788 skip(1); 789 } 790 } 791 792 // Restore position. 793 reset(savePosition); 794 795 return sb.toString(); 796 } 797 798 /** 799 * Scan over and identifier or keyword. Handles identifiers containing 800 * encoded Unicode chars. 801 * 802 * Example: 803 * 804 * var \u0042 = 44; 805 */ 806 private void scanIdentifierOrKeyword() { 807 // Record beginning of identifier. 808 final int start = position; 809 // Scan identifier. 810 final int length = scanIdentifier(); 811 // Check to see if it is a keyword. 812 final TokenType type = TokenLookup.lookupKeyword(content, start, length); 813 if (type == FUNCTION && pauseOnFunctionBody) { 814 pauseOnNextLeftBrace = true; 815 } 816 // Add keyword or identifier token. 817 add(type, start); 818 } 819 820 /** 821 * Convert a string to a JavaScript string object. 822 * 823 * @param start Position in source content. 824 * @param length Length of token. 825 * @return JavaScript string object. 826 */ 827 private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException { 828 // Save the current position. 829 final int savePosition = position; 830 // Calculate the end position. 831 final int end = start + length; 832 // Reset to beginning of string. 833 reset(start); 834 835 // Buffer for recording characters. 836 final StringBuilder sb = new StringBuilder(length); 837 838 // Scan until end of string. 839 while (position < end) { 840 // If escape character. 841 if (ch0 == '\\') { 842 skip(1); 843 844 final char next = ch0; 845 final int afterSlash = position; 846 847 skip(1); 848 849 // Special characters. 850 switch (next) { 851 case '0': 852 case '1': 853 case '2': 854 case '3': 855 case '4': 856 case '5': 857 case '6': 858 case '7': { 859 if (strict) { 860 // "\0" itself is allowed in strict mode. Only other 'real' 861 // octal escape sequences are not allowed (eg. "\02", "\31"). 862 // See section 7.8.4 String literals production EscapeSequence 863 if (next != '0' || (ch0 >= '0' && ch0 <= '9')) { 864 error(Lexer.message("strict.no.octal"), STRING, position, limit); 865 } 866 } 867 reset(afterSlash); 868 // Octal sequence. 869 final int ch = octalSequence(); 870 871 if (ch < 0) { 872 sb.append('\\'); 873 sb.append('x'); 874 } else { 875 sb.append((char)ch); 876 } 877 break; 878 } 879 case 'n': 880 sb.append('\n'); 881 break; 882 case 't': 883 sb.append('\t'); 884 break; 885 case 'b': 886 sb.append('\b'); 887 break; 888 case 'f': 889 sb.append('\f'); 890 break; 891 case 'r': 892 sb.append('\r'); 893 break; 894 case '\'': 895 sb.append('\''); 896 break; 897 case '\"': 898 sb.append('\"'); 899 break; 900 case '\\': 901 sb.append('\\'); 902 break; 903 case '\r': // CR | CRLF 904 if (ch0 == '\n') { 905 skip(1); 906 } 907 // fall through 908 case '\n': // LF 909 case '\u2028': // LS 910 case '\u2029': // PS 911 // continue on the next line, slash-return continues string 912 // literal 913 break; 914 case 'x': { 915 // Hex sequence. 916 final int ch = hexSequence(2, STRING); 917 918 if (ch < 0) { 919 sb.append('\\'); 920 sb.append('x'); 921 } else { 922 sb.append((char)ch); 923 } 924 } 925 break; 926 case 'u': { 927 // Unicode sequence. 928 final int ch = hexSequence(4, STRING); 929 930 if (ch < 0) { 931 sb.append('\\'); 932 sb.append('u'); 933 } else { 934 sb.append((char)ch); 935 } 936 } 937 break; 938 case 'v': 939 sb.append('\u000B'); 940 break; 941 // All other characters. 942 default: 943 sb.append(next); 944 break; 945 } 946 } else { 947 // Add regular character. 948 sb.append(ch0); 949 skip(1); 950 } 951 } 952 953 // Restore position. 954 reset(savePosition); 955 956 return sb.toString(); 957 } 958 959 /** 960 * Scan over a string literal. 961 * @param add true if we nare not just scanning but should actually modify the token stream 962 */ 963 protected void scanString(final boolean add) { 964 // Type of string. 965 TokenType type = STRING; 966 // Record starting quote. 967 final char quote = ch0; 968 // Skip over quote. 969 skip(1); 970 971 // Record beginning of string content. 972 final State stringState = saveState(); 973 974 // Scan until close quote or end of line. 975 while (!atEOF() && ch0 != quote && !isEOL(ch0)) { 976 // Skip over escaped character. 977 if (ch0 == '\\') { 978 type = ESCSTRING; 979 skip(1); 980 if (! isEscapeCharacter(ch0)) { 981 error(Lexer.message("invalid.escape.char"), STRING, position, limit); 982 } 983 if (isEOL(ch0)) { 984 // Multiline string literal 985 skipEOL(false); 986 continue; 987 } 988 } 989 // Skip literal character. 990 skip(1); 991 } 992 993 // If close quote. 994 if (ch0 == quote) { 995 // Skip close quote. 996 skip(1); 997 } else { 998 error(Lexer.message("missing.close.quote"), STRING, position, limit); 999 } 1000 1001 // If not just scanning. 1002 if (add) { 1003 // Record end of string. 1004 stringState.setLimit(position - 1); 1005 1006 if (scripting && !stringState.isEmpty()) { 1007 switch (quote) { 1008 case '`': 1009 // Mark the beginning of an exec string. 1010 add(EXECSTRING, stringState.position, stringState.limit); 1011 // Frame edit string with left brace. 1012 add(LBRACE, stringState.position, stringState.position); 1013 // Process edit string. 1014 editString(type, stringState); 1015 // Frame edit string with right brace. 1016 add(RBRACE, stringState.limit, stringState.limit); 1017 break; 1018 case '"': 1019 // Only edit double quoted strings. 1020 editString(type, stringState); 1021 break; 1022 case '\'': 1023 // Add string token without editing. 1024 add(type, stringState.position, stringState.limit); 1025 break; 1026 default: 1027 break; 1028 } 1029 } else { 1030 /// Add string token without editing. 1031 add(type, stringState.position, stringState.limit); 1032 } 1033 } 1034 } 1035 1036 /** 1037 * Is the given character a valid escape char after "\" ? 1038 * 1039 * @param ch character to be checked 1040 * @return if the given character is valid after "\" 1041 */ 1042 protected boolean isEscapeCharacter(final char ch) { 1043 return true; 1044 } 1045 1046 /** 1047 * Convert string to number. 1048 * 1049 * @param valueString String to convert. 1050 * @param radix Numeric base. 1051 * @return Converted number. 1052 */ 1053 private static Number valueOf(final String valueString, final int radix) throws NumberFormatException { 1054 try { 1055 final long value = Long.parseLong(valueString, radix); 1056 if(value >= MIN_INT_L && value <= MAX_INT_L) { 1057 return (int)value; 1058 } 1059 return value; 1060 } catch (final NumberFormatException e) { 1061 if (radix == 10) { 1062 return Double.valueOf(valueString); 1063 } 1064 1065 double value = 0.0; 1066 1067 for (int i = 0; i < valueString.length(); i++) { 1068 final char ch = valueString.charAt(i); 1069 // Preverified, should always be a valid digit. 1070 final int digit = convertDigit(ch, radix); 1071 value *= radix; 1072 value += digit; 1073 } 1074 1075 return value; 1076 } 1077 } 1078 1079 /** 1080 * Scan a number. 1081 */ 1082 protected void scanNumber() { 1083 // Record beginning of number. 1084 final int start = position; 1085 // Assume value is a decimal. 1086 TokenType type = DECIMAL; 1087 1088 // First digit of number. 1089 int digit = convertDigit(ch0, 10); 1090 1091 // If number begins with 0x. 1092 if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) { 1093 // Skip over 0xN. 1094 skip(3); 1095 // Skip over remaining digits. 1096 while (convertDigit(ch0, 16) != -1) { 1097 skip(1); 1098 } 1099 1100 type = HEXADECIMAL; 1101 } else if (digit == 0 && es6 && (ch1 == 'o' || ch1 == 'O') && convertDigit(ch2, 8) != -1) { 1102 // Skip over 0oN. 1103 skip(3); 1104 // Skip over remaining digits. 1105 while (convertDigit(ch0, 8) != -1) { 1106 skip(1); 1107 } 1108 1109 type = OCTAL; 1110 } else if (digit == 0 && es6 && (ch1 == 'b' || ch1 == 'B') && convertDigit(ch2, 2) != -1) { 1111 // Skip over 0bN. 1112 skip(3); 1113 // Skip over remaining digits. 1114 while (convertDigit(ch0, 2) != -1) { 1115 skip(1); 1116 } 1117 1118 type = BINARY_NUMBER; 1119 } else { 1120 // Check for possible octal constant. 1121 boolean octal = digit == 0; 1122 // Skip first digit if not leading '.'. 1123 if (digit != -1) { 1124 skip(1); 1125 } 1126 1127 // Skip remaining digits. 1128 while ((digit = convertDigit(ch0, 10)) != -1) { 1129 // Check octal only digits. 1130 octal = octal && digit < 8; 1131 // Skip digit. 1132 skip(1); 1133 } 1134 1135 if (octal && position - start > 1) { 1136 type = OCTAL_LEGACY; 1137 } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') { 1138 // Must be a double. 1139 if (ch0 == '.') { 1140 // Skip period. 1141 skip(1); 1142 // Skip mantissa. 1143 while (convertDigit(ch0, 10) != -1) { 1144 skip(1); 1145 } 1146 } 1147 1148 // Detect exponent. 1149 if (ch0 == 'E' || ch0 == 'e') { 1150 // Skip E. 1151 skip(1); 1152 // Detect and skip exponent sign. 1153 if (ch0 == '+' || ch0 == '-') { 1154 skip(1); 1155 } 1156 // Skip exponent. 1157 while (convertDigit(ch0, 10) != -1) { 1158 skip(1); 1159 } 1160 } 1161 1162 type = FLOATING; 1163 } 1164 } 1165 1166 if (Character.isJavaIdentifierStart(ch0)) { 1167 error(Lexer.message("missing.space.after.number"), type, position, 1); 1168 } 1169 1170 // Add number token. 1171 add(type, start); 1172 } 1173 1174 /** 1175 * Convert a regex token to a token object. 1176 * 1177 * @param start Position in source content. 1178 * @param length Length of regex token. 1179 * @return Regex token object. 1180 */ 1181 XMLToken valueOfXML(final int start, final int length) { 1182 return new XMLToken(source.getString(start, length)); 1183 } 1184 1185 /** 1186 * Scan over a XML token. 1187 * 1188 * @return TRUE if is an XML literal. 1189 */ 1190 private boolean scanXMLLiteral() { 1191 assert ch0 == '<' && Character.isJavaIdentifierStart(ch1); 1192 if (XML_LITERALS) { 1193 // Record beginning of xml expression. 1194 final int start = position; 1195 1196 int openCount = 0; 1197 1198 do { 1199 if (ch0 == '<') { 1200 if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) { 1201 skip(3); 1202 openCount--; 1203 } else if (Character.isJavaIdentifierStart(ch1)) { 1204 skip(2); 1205 openCount++; 1206 } else if (ch1 == '?') { 1207 skip(2); 1208 } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') { 1209 skip(4); 1210 } else { 1211 reset(start); 1212 return false; 1213 } 1214 1215 while (!atEOF() && ch0 != '>') { 1216 if (ch0 == '/' && ch1 == '>') { 1217 openCount--; 1218 skip(1); 1219 break; 1220 } else if (ch0 == '\"' || ch0 == '\'') { 1221 scanString(false); 1222 } else { 1223 skip(1); 1224 } 1225 } 1226 1227 if (ch0 != '>') { 1228 reset(start); 1229 return false; 1230 } 1231 1232 skip(1); 1233 } else if (atEOF()) { 1234 reset(start); 1235 return false; 1236 } else { 1237 skip(1); 1238 } 1239 } while (openCount > 0); 1240 1241 add(XML, start); 1242 return true; 1243 } 1244 1245 return false; 1246 } 1247 1248 /** 1249 * Scan over identifier characters. 1250 * 1251 * @return Length of identifier or zero if none found. 1252 */ 1253 private int scanIdentifier() { 1254 final int start = position; 1255 1256 // Make sure first character is valid start character. 1257 if (ch0 == '\\' && ch1 == 'u') { 1258 skip(2); 1259 final int ch = hexSequence(4, TokenType.IDENT); 1260 1261 if (!Character.isJavaIdentifierStart(ch)) { 1262 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position); 1263 } 1264 } else if (!Character.isJavaIdentifierStart(ch0)) { 1265 // Not an identifier. 1266 return 0; 1267 } 1268 1269 // Make sure remaining characters are valid part characters. 1270 while (!atEOF()) { 1271 if (ch0 == '\\' && ch1 == 'u') { 1272 skip(2); 1273 final int ch = hexSequence(4, TokenType.IDENT); 1274 1275 if (!Character.isJavaIdentifierPart(ch)) { 1276 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position); 1277 } 1278 } else if (Character.isJavaIdentifierPart(ch0)) { 1279 skip(1); 1280 } else { 1281 break; 1282 } 1283 } 1284 1285 // Length of identifier sequence. 1286 return position - start; 1287 } 1288 1289 /** 1290 * Compare two identifiers (in content) for equality. 1291 * 1292 * @param aStart Start of first identifier. 1293 * @param aLength Length of first identifier. 1294 * @param bStart Start of second identifier. 1295 * @param bLength Length of second identifier. 1296 * @return True if equal. 1297 */ 1298 private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) { 1299 if (aLength == bLength) { 1300 for (int i = 0; i < aLength; i++) { 1301 if (content[aStart + i] != content[bStart + i]) { 1302 return false; 1303 } 1304 } 1305 1306 return true; 1307 } 1308 1309 return false; 1310 } 1311 1312 /** 1313 * Detect if a line starts with a marker identifier. 1314 * 1315 * @param identStart Start of identifier. 1316 * @param identLength Length of identifier. 1317 * @return True if detected. 1318 */ 1319 private boolean hasHereMarker(final int identStart, final int identLength) { 1320 // Skip any whitespace. 1321 skipWhitespace(false); 1322 1323 return identifierEqual(identStart, identLength, position, scanIdentifier()); 1324 } 1325 1326 /** 1327 * Lexer to service edit strings. 1328 */ 1329 private static class EditStringLexer extends Lexer { 1330 /** Type of string literals to emit. */ 1331 final TokenType stringType; 1332 1333 /* 1334 * Constructor. 1335 */ 1336 1337 EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) { 1338 super(lexer, stringState); 1339 1340 this.stringType = stringType; 1341 } 1342 1343 /** 1344 * Lexify the contents of the string. 1345 */ 1346 @Override 1347 public void lexify() { 1348 // Record start of string position. 1349 int stringStart = position; 1350 // Indicate that the priming first string has not been emitted. 1351 boolean primed = false; 1352 1353 while (true) { 1354 // Detect end of content. 1355 if (atEOF()) { 1356 break; 1357 } 1358 1359 // Honour escapes (should be well formed.) 1360 if (ch0 == '\\' && stringType == ESCSTRING) { 1361 skip(2); 1362 1363 continue; 1364 } 1365 1366 // If start of expression. 1367 if (ch0 == '$' && ch1 == '{') { 1368 if (!primed || stringStart != position) { 1369 if (primed) { 1370 add(ADD, stringStart, stringStart + 1); 1371 } 1372 1373 add(stringType, stringStart, position); 1374 primed = true; 1375 } 1376 1377 // Skip ${ 1378 skip(2); 1379 1380 // Save expression state. 1381 final State expressionState = saveState(); 1382 1383 // Start with one open brace. 1384 int braceCount = 1; 1385 1386 // Scan for the rest of the string. 1387 while (!atEOF()) { 1388 // If closing brace. 1389 if (ch0 == '}') { 1390 // Break only only if matching brace. 1391 if (--braceCount == 0) { 1392 break; 1393 } 1394 } else if (ch0 == '{') { 1395 // Bump up the brace count. 1396 braceCount++; 1397 } 1398 1399 // Skip to next character. 1400 skip(1); 1401 } 1402 1403 // If braces don't match then report an error. 1404 if (braceCount != 0) { 1405 error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1); 1406 } 1407 1408 // Mark end of expression. 1409 expressionState.setLimit(position); 1410 // Skip closing brace. 1411 skip(1); 1412 1413 // Start next string. 1414 stringStart = position; 1415 1416 // Concatenate expression. 1417 add(ADD, expressionState.position, expressionState.position + 1); 1418 add(LPAREN, expressionState.position, expressionState.position + 1); 1419 1420 // Scan expression. 1421 final Lexer lexer = new Lexer(this, expressionState); 1422 lexer.lexify(); 1423 1424 // Close out expression parenthesis. 1425 add(RPAREN, position - 1, position); 1426 1427 continue; 1428 } 1429 1430 // Next character in string. 1431 skip(1); 1432 } 1433 1434 // If there is any unemitted string portion. 1435 if (stringStart != limit) { 1436 // Concatenate remaining string. 1437 if (primed) { 1438 add(ADD, stringStart, 1); 1439 } 1440 1441 add(stringType, stringStart, limit); 1442 } 1443 } 1444 1445 } 1446 1447 /** 1448 * Edit string for nested expressions. 1449 * 1450 * @param stringType Type of string literals to emit. 1451 * @param stringState State of lexer at start of string. 1452 */ 1453 private void editString(final TokenType stringType, final State stringState) { 1454 // Use special lexer to scan string. 1455 final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState); 1456 lexer.lexify(); 1457 1458 // Need to keep lexer informed. 1459 last = stringType; 1460 } 1461 1462 /** 1463 * Scan over a here string. 1464 * 1465 * @return TRUE if is a here string. 1466 */ 1467 private boolean scanHereString(final LineInfoReceiver lir) { 1468 assert ch0 == '<' && ch1 == '<'; 1469 if (scripting) { 1470 // Record beginning of here string. 1471 final State saved = saveState(); 1472 1473 // << or <<< 1474 final boolean excludeLastEOL = ch2 != '<'; 1475 1476 if (excludeLastEOL) { 1477 skip(2); 1478 } else { 1479 skip(3); 1480 } 1481 1482 // Scan identifier. It might be quoted, indicating that no string editing should take place. 1483 final char quoteChar = ch0; 1484 final boolean noStringEditing = quoteChar == '"' || quoteChar == '\''; 1485 if (noStringEditing) { 1486 skip(1); 1487 } 1488 final int identStart = position; 1489 final int identLength = scanIdentifier(); 1490 if (noStringEditing) { 1491 if (ch0 != quoteChar) { 1492 error(Lexer.message("here.non.matching.delimiter"), last, position, position); 1493 restoreState(saved); 1494 return false; 1495 } 1496 skip(1); 1497 } 1498 1499 // Check for identifier. 1500 if (identLength == 0) { 1501 // Treat as shift. 1502 restoreState(saved); 1503 1504 return false; 1505 } 1506 1507 // Record rest of line. 1508 final State restState = saveState(); 1509 // keep line number updated 1510 int lastLine = line; 1511 1512 skipLine(false); 1513 lastLine++; 1514 int lastLinePosition = position; 1515 restState.setLimit(position); 1516 1517 // Record beginning of string. 1518 final State stringState = saveState(); 1519 int stringEnd = position; 1520 1521 // Hunt down marker. 1522 while (!atEOF()) { 1523 // Skip any whitespace. 1524 skipWhitespace(false); 1525 1526 if (hasHereMarker(identStart, identLength)) { 1527 break; 1528 } 1529 1530 skipLine(false); 1531 lastLine++; 1532 lastLinePosition = position; 1533 stringEnd = position; 1534 } 1535 1536 // notify last line information 1537 lir.lineInfo(lastLine, lastLinePosition); 1538 1539 // Record end of string. 1540 stringState.setLimit(stringEnd); 1541 1542 // If marker is missing. 1543 if (stringState.isEmpty() || atEOF()) { 1544 error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position); 1545 restoreState(saved); 1546 1547 return false; 1548 } 1549 1550 // Remove last end of line if specified. 1551 if (excludeLastEOL) { 1552 // Handles \n. 1553 if (content[stringEnd - 1] == '\n') { 1554 stringEnd--; 1555 } 1556 1557 // Handles \r and \r\n. 1558 if (content[stringEnd - 1] == '\r') { 1559 stringEnd--; 1560 } 1561 1562 // Update end of string. 1563 stringState.setLimit(stringEnd); 1564 } 1565 1566 // Edit string if appropriate. 1567 if (!noStringEditing && !stringState.isEmpty()) { 1568 editString(STRING, stringState); 1569 } else { 1570 // Add here string. 1571 add(STRING, stringState.position, stringState.limit); 1572 } 1573 1574 // Scan rest of original line. 1575 final Lexer restLexer = new Lexer(this, restState); 1576 1577 restLexer.lexify(); 1578 1579 return true; 1580 } 1581 1582 return false; 1583 } 1584 1585 /** 1586 * Breaks source content down into lex units, adding tokens to the token 1587 * stream. The routine scans until the stream buffer is full. Can be called 1588 * repeatedly until EOF is detected. 1589 */ 1590 public void lexify() { 1591 while (!stream.isFull() || nested) { 1592 // Skip over whitespace. 1593 skipWhitespace(true); 1594 1595 // Detect end of file. 1596 if (atEOF()) { 1597 if (!nested) { 1598 // Add an EOF token at the end. 1599 add(EOF, position); 1600 } 1601 1602 break; 1603 } 1604 1605 // Check for comments. Note that we don't scan for regexp and other literals here as 1606 // we may not have enough context to distinguish them from similar looking operators. 1607 // Instead we break on ambiguous operators below and let the parser decide. 1608 if (ch0 == '/' && skipComments()) { 1609 continue; 1610 } 1611 1612 if (scripting && ch0 == '#' && skipComments()) { 1613 continue; 1614 } 1615 1616 // TokenType for lookup of delimiter or operator. 1617 TokenType type; 1618 1619 if (ch0 == '.' && convertDigit(ch1, 10) != -1) { 1620 // '.' followed by digit. 1621 // Scan and add a number. 1622 scanNumber(); 1623 } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) { 1624 // Get the number of characters in the token. 1625 final int typeLength = type.getLength(); 1626 // Skip that many characters. 1627 skip(typeLength); 1628 // Add operator token. 1629 add(type, position - typeLength); 1630 // Some operator tokens also mark the beginning of regexp, XML, or here string literals. 1631 // We break to let the parser decide what it is. 1632 if (canStartLiteral(type)) { 1633 break; 1634 } else if (type == LBRACE && pauseOnNextLeftBrace) { 1635 pauseOnNextLeftBrace = false; 1636 break; 1637 } 1638 } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') { 1639 // Scan and add identifier or keyword. 1640 scanIdentifierOrKeyword(); 1641 } else if (isStringDelimiter(ch0)) { 1642 // Scan and add a string. 1643 scanString(true); 1644 } else if (Character.isDigit(ch0)) { 1645 // Scan and add a number. 1646 scanNumber(); 1647 } else { 1648 // Don't recognize this character. 1649 skip(1); 1650 add(ERROR, position - 1); 1651 } 1652 } 1653 } 1654 1655 /** 1656 * Return value of token given its token descriptor. 1657 * 1658 * @param token Token descriptor. 1659 * @return JavaScript value. 1660 */ 1661 Object getValueOf(final long token, final boolean strict) { 1662 final int start = Token.descPosition(token); 1663 final int len = Token.descLength(token); 1664 1665 switch (Token.descType(token)) { 1666 case DECIMAL: 1667 return Lexer.valueOf(source.getString(start, len), 10); // number 1668 case HEXADECIMAL: 1669 return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number 1670 case OCTAL_LEGACY: 1671 return Lexer.valueOf(source.getString(start, len), 8); // number 1672 case OCTAL: 1673 return Lexer.valueOf(source.getString(start + 2, len - 2), 8); // number 1674 case BINARY_NUMBER: 1675 return Lexer.valueOf(source.getString(start + 2, len - 2), 2); // number 1676 case FLOATING: 1677 final String str = source.getString(start, len); 1678 final double value = Double.valueOf(str); 1679 if (str.indexOf('.') != -1) { 1680 return value; //number 1681 } 1682 //anything without an explicit decimal point is still subject to a 1683 //"representable as int or long" check. Then the programmer does not 1684 //explicitly code something as a double. For example new Color(int, int, int) 1685 //and new Color(float, float, float) will get ambiguous for cases like 1686 //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point. 1687 //yet we don't want e.g. 1e6 to be a double unnecessarily 1688 if (JSType.isStrictlyRepresentableAsInt(value)) { 1689 return (int)value; 1690 } else if (JSType.isStrictlyRepresentableAsLong(value)) { 1691 return (long)value; 1692 } 1693 return value; 1694 case STRING: 1695 return source.getString(start, len); // String 1696 case ESCSTRING: 1697 return valueOfString(start, len, strict); // String 1698 case IDENT: 1699 return valueOfIdent(start, len); // String 1700 case REGEX: 1701 return valueOfPattern(start, len); // RegexToken::LexerToken 1702 case XML: 1703 return valueOfXML(start, len); // XMLToken::LexerToken 1704 case DIRECTIVE_COMMENT: 1705 return source.getString(start, len); 1706 default: 1707 break; 1708 } 1709 1710 return null; 1711 } 1712 1713 /** 1714 * Get the correctly localized error message for a given message id format arguments 1715 * @param msgId message id 1716 * @param args format arguments 1717 * @return message 1718 */ 1719 protected static String message(final String msgId, final String... args) { 1720 return ECMAErrors.getMessage("lexer.error." + msgId, args); 1721 } 1722 1723 /** 1724 * Generate a runtime exception 1725 * 1726 * @param message error message 1727 * @param type token type 1728 * @param start start position of lexed error 1729 * @param length length of lexed error 1730 * @throws ParserException unconditionally 1731 */ 1732 protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException { 1733 final long token = Token.toDesc(type, start, length); 1734 final int pos = Token.descPosition(token); 1735 final int lineNum = source.getLine(pos); 1736 final int columnNum = source.getColumn(pos); 1737 final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token); 1738 throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token); 1739 } 1740 1741 /** 1742 * Helper class for Lexer tokens, e.g XML or RegExp tokens. 1743 * This is the abstract superclass 1744 */ 1745 public static abstract class LexerToken implements Serializable { 1746 private static final long serialVersionUID = 1L; 1747 1748 private final String expression; 1749 1750 /** 1751 * Constructor 1752 * @param expression token expression 1753 */ 1754 protected LexerToken(final String expression) { 1755 this.expression = expression; 1756 } 1757 1758 /** 1759 * Get the expression 1760 * @return expression 1761 */ 1762 public String getExpression() { 1763 return expression; 1764 } 1765 } 1766 1767 /** 1768 * Temporary container for regular expressions. 1769 */ 1770 public static class RegexToken extends LexerToken { 1771 private static final long serialVersionUID = 1L; 1772 1773 /** Options. */ 1774 private final String options; 1775 1776 /** 1777 * Constructor. 1778 * 1779 * @param expression regexp expression 1780 * @param options regexp options 1781 */ 1782 public RegexToken(final String expression, final String options) { 1783 super(expression); 1784 this.options = options; 1785 } 1786 1787 /** 1788 * Get regexp options 1789 * @return options 1790 */ 1791 public String getOptions() { 1792 return options; 1793 } 1794 1795 @Override 1796 public String toString() { 1797 return '/' + getExpression() + '/' + options; 1798 } 1799 } 1800 1801 /** 1802 * Temporary container for XML expression. 1803 */ 1804 public static class XMLToken extends LexerToken { 1805 private static final long serialVersionUID = 1L; 1806 1807 /** 1808 * Constructor. 1809 * 1810 * @param expression XML expression 1811 */ 1812 public XMLToken(final String expression) { 1813 super(expression); 1814 } 1815 } 1816} 1817