Lexer.java revision 1732:a32d419d73fe
1/* 2 * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package jdk.nashorn.internal.parser; 27 28import static jdk.nashorn.internal.parser.TokenType.ADD; 29import static jdk.nashorn.internal.parser.TokenType.BINARY_NUMBER; 30import static jdk.nashorn.internal.parser.TokenType.COMMENT; 31import static jdk.nashorn.internal.parser.TokenType.DECIMAL; 32import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT; 33import static jdk.nashorn.internal.parser.TokenType.EOF; 34import static jdk.nashorn.internal.parser.TokenType.EOL; 35import static jdk.nashorn.internal.parser.TokenType.ERROR; 36import static jdk.nashorn.internal.parser.TokenType.ESCSTRING; 37import static jdk.nashorn.internal.parser.TokenType.EXECSTRING; 38import static jdk.nashorn.internal.parser.TokenType.FLOATING; 39import static jdk.nashorn.internal.parser.TokenType.FUNCTION; 40import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL; 41import static jdk.nashorn.internal.parser.TokenType.LBRACE; 42import static jdk.nashorn.internal.parser.TokenType.LPAREN; 43import static jdk.nashorn.internal.parser.TokenType.OCTAL; 44import static jdk.nashorn.internal.parser.TokenType.OCTAL_LEGACY; 45import static jdk.nashorn.internal.parser.TokenType.RBRACE; 46import static jdk.nashorn.internal.parser.TokenType.REGEX; 47import static jdk.nashorn.internal.parser.TokenType.RPAREN; 48import static jdk.nashorn.internal.parser.TokenType.STRING; 49import static jdk.nashorn.internal.parser.TokenType.TEMPLATE; 50import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_HEAD; 51import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_MIDDLE; 52import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_TAIL; 53import static jdk.nashorn.internal.parser.TokenType.XML; 54 55import java.io.Serializable; 56 57import jdk.nashorn.internal.runtime.ECMAErrors; 58import jdk.nashorn.internal.runtime.ErrorManager; 59import jdk.nashorn.internal.runtime.JSErrorType; 60import jdk.nashorn.internal.runtime.JSType; 61import jdk.nashorn.internal.runtime.ParserException; 62import jdk.nashorn.internal.runtime.Source; 63import jdk.nashorn.internal.runtime.options.Options; 64 65/** 66 * Responsible for converting source content into a stream of tokens. 67 * 68 */ 69@SuppressWarnings("fallthrough") 70public class Lexer extends Scanner { 71 private static final long MIN_INT_L = Integer.MIN_VALUE; 72 private static final long MAX_INT_L = Integer.MAX_VALUE; 73 74 private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals"); 75 76 /** Content source. */ 77 private final Source source; 78 79 /** Buffered stream for tokens. */ 80 private final TokenStream stream; 81 82 /** True if here and edit strings are supported. */ 83 private final boolean scripting; 84 85 /** True if parsing in ECMAScript 6 mode. */ 86 private final boolean es6; 87 88 /** True if a nested scan. (scan to completion, no EOF.) */ 89 private final boolean nested; 90 91 /** Pending new line number and position. */ 92 int pendingLine; 93 94 /** Position of last EOL + 1. */ 95 private int linePosition; 96 97 /** Type of last token added. */ 98 private TokenType last; 99 100 private final boolean pauseOnFunctionBody; 101 private boolean pauseOnNextLeftBrace; 102 103 private int templateExpressionOpenBraces; 104 105 private static final String SPACETAB = " \t"; // ASCII space and tab 106 private static final String LFCR = "\n\r"; // line feed and carriage return (ctrl-m) 107 108 private static final String JAVASCRIPT_WHITESPACE_EOL = 109 LFCR + 110 "\u2028" + // line separator 111 "\u2029" // paragraph separator 112 ; 113 private static final String JAVASCRIPT_WHITESPACE = 114 SPACETAB + 115 JAVASCRIPT_WHITESPACE_EOL + 116 "\u000b" + // tabulation line 117 "\u000c" + // ff (ctrl-l) 118 "\u00a0" + // Latin-1 space 119 "\u1680" + // Ogham space mark 120 "\u180e" + // separator, Mongolian vowel 121 "\u2000" + // en quad 122 "\u2001" + // em quad 123 "\u2002" + // en space 124 "\u2003" + // em space 125 "\u2004" + // three-per-em space 126 "\u2005" + // four-per-em space 127 "\u2006" + // six-per-em space 128 "\u2007" + // figure space 129 "\u2008" + // punctuation space 130 "\u2009" + // thin space 131 "\u200a" + // hair space 132 "\u202f" + // narrow no-break space 133 "\u205f" + // medium mathematical space 134 "\u3000" + // ideographic space 135 "\ufeff" // byte order mark 136 ; 137 138 private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP = 139 "\\u000a" + // line feed 140 "\\u000d" + // carriage return (ctrl-m) 141 "\\u2028" + // line separator 142 "\\u2029" + // paragraph separator 143 "\\u0009" + // tab 144 "\\u0020" + // ASCII space 145 "\\u000b" + // tabulation line 146 "\\u000c" + // ff (ctrl-l) 147 "\\u00a0" + // Latin-1 space 148 "\\u1680" + // Ogham space mark 149 "\\u180e" + // separator, Mongolian vowel 150 "\\u2000" + // en quad 151 "\\u2001" + // em quad 152 "\\u2002" + // en space 153 "\\u2003" + // em space 154 "\\u2004" + // three-per-em space 155 "\\u2005" + // four-per-em space 156 "\\u2006" + // six-per-em space 157 "\\u2007" + // figure space 158 "\\u2008" + // punctuation space 159 "\\u2009" + // thin space 160 "\\u200a" + // hair space 161 "\\u202f" + // narrow no-break space 162 "\\u205f" + // medium mathematical space 163 "\\u3000" + // ideographic space 164 "\\ufeff" // byte order mark 165 ; 166 167 static String unicodeEscape(final char ch) { 168 final StringBuilder sb = new StringBuilder(); 169 170 sb.append("\\u"); 171 172 final String hex = Integer.toHexString(ch); 173 for (int i = hex.length(); i < 4; i++) { 174 sb.append('0'); 175 } 176 sb.append(hex); 177 178 return sb.toString(); 179 } 180 181 /** 182 * Constructor 183 * 184 * @param source the source 185 * @param stream the token stream to lex 186 */ 187 public Lexer(final Source source, final TokenStream stream) { 188 this(source, stream, false, false); 189 } 190 191 /** 192 * Constructor 193 * 194 * @param source the source 195 * @param stream the token stream to lex 196 * @param scripting are we in scripting mode 197 * @param es6 are we in ECMAScript 6 mode 198 */ 199 public Lexer(final Source source, final TokenStream stream, final boolean scripting, final boolean es6) { 200 this(source, 0, source.getLength(), stream, scripting, es6, false); 201 } 202 203 /** 204 * Constructor 205 * 206 * @param source the source 207 * @param start start position in source from which to start lexing 208 * @param len length of source segment to lex 209 * @param stream token stream to lex 210 * @param scripting are we in scripting mode 211 * @param es6 are we in ECMAScript 6 mode 212 * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a 213 * function body. This is used with the feature where the parser is skipping nested function bodies to 214 * avoid reading ahead unnecessarily when we skip the function bodies. 215 */ 216 public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean es6, final boolean pauseOnFunctionBody) { 217 super(source.getContent(), 1, start, len); 218 this.source = source; 219 this.stream = stream; 220 this.scripting = scripting; 221 this.es6 = es6; 222 this.nested = false; 223 this.pendingLine = 1; 224 this.last = EOL; 225 226 this.pauseOnFunctionBody = pauseOnFunctionBody; 227 } 228 229 private Lexer(final Lexer lexer, final State state) { 230 super(lexer, state); 231 232 source = lexer.source; 233 stream = lexer.stream; 234 scripting = lexer.scripting; 235 es6 = lexer.es6; 236 nested = true; 237 238 pendingLine = state.pendingLine; 239 linePosition = state.linePosition; 240 last = EOL; 241 pauseOnFunctionBody = false; 242 } 243 244 static class State extends Scanner.State { 245 /** Pending new line number and position. */ 246 public final int pendingLine; 247 248 /** Position of last EOL + 1. */ 249 public final int linePosition; 250 251 /** Type of last token added. */ 252 public final TokenType last; 253 254 /* 255 * Constructor. 256 */ 257 258 State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) { 259 super(position, limit, line); 260 261 this.pendingLine = pendingLine; 262 this.linePosition = linePosition; 263 this.last = last; 264 } 265 } 266 267 /** 268 * Save the state of the scan. 269 * 270 * @return Captured state. 271 */ 272 @Override 273 State saveState() { 274 return new State(position, limit, line, pendingLine, linePosition, last); 275 } 276 277 /** 278 * Restore the state of the scan. 279 * 280 * @param state 281 * Captured state. 282 */ 283 void restoreState(final State state) { 284 super.restoreState(state); 285 286 pendingLine = state.pendingLine; 287 linePosition = state.linePosition; 288 last = state.last; 289 } 290 291 /** 292 * Add a new token to the stream. 293 * 294 * @param type 295 * Token type. 296 * @param start 297 * Start position. 298 * @param end 299 * End position. 300 */ 301 protected void add(final TokenType type, final int start, final int end) { 302 // Record last token. 303 last = type; 304 305 // Only emit the last EOL in a cluster. 306 if (type == EOL) { 307 pendingLine = end; 308 linePosition = start; 309 } else { 310 // Write any pending EOL to stream. 311 if (pendingLine != -1) { 312 stream.put(Token.toDesc(EOL, linePosition, pendingLine)); 313 pendingLine = -1; 314 } 315 316 // Write token to stream. 317 stream.put(Token.toDesc(type, start, end - start)); 318 } 319 } 320 321 /** 322 * Add a new token to the stream. 323 * 324 * @param type 325 * Token type. 326 * @param start 327 * Start position. 328 */ 329 protected void add(final TokenType type, final int start) { 330 add(type, start, position); 331 } 332 333 /** 334 * Return the String of valid whitespace characters for regular 335 * expressions in JavaScript 336 * @return regexp whitespace string 337 */ 338 public static String getWhitespaceRegExp() { 339 return JAVASCRIPT_WHITESPACE_IN_REGEXP; 340 } 341 342 /** 343 * Skip end of line. 344 * 345 * @param addEOL true if EOL token should be recorded. 346 */ 347 private void skipEOL(final boolean addEOL) { 348 349 if (ch0 == '\r') { // detect \r\n pattern 350 skip(1); 351 if (ch0 == '\n') { 352 skip(1); 353 } 354 } else { // all other space, ch0 is guaranteed to be EOL or \0 355 skip(1); 356 } 357 358 // bump up line count 359 line++; 360 361 if (addEOL) { 362 // Add an EOL token. 363 add(EOL, position, line); 364 } 365 } 366 367 /** 368 * Skip over rest of line including end of line. 369 * 370 * @param addEOL true if EOL token should be recorded. 371 */ 372 private void skipLine(final boolean addEOL) { 373 // Ignore characters. 374 while (!isEOL(ch0) && !atEOF()) { 375 skip(1); 376 } 377 // Skip over end of line. 378 skipEOL(addEOL); 379 } 380 381 /** 382 * Test whether a char is valid JavaScript whitespace 383 * @param ch a char 384 * @return true if valid JavaScript whitespace 385 */ 386 public static boolean isJSWhitespace(final char ch) { 387 return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1; 388 } 389 390 /** 391 * Test whether a char is valid JavaScript end of line 392 * @param ch a char 393 * @return true if valid JavaScript end of line 394 */ 395 public static boolean isJSEOL(final char ch) { 396 return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1; 397 } 398 399 /** 400 * Test if char is a string delimiter, e.g. '\' or '"'. 401 * @param ch a char 402 * @return true if string delimiter 403 */ 404 protected boolean isStringDelimiter(final char ch) { 405 return ch == '\'' || ch == '"'; 406 } 407 408 /** 409 * Test if char is a template literal delimiter ('`'). 410 */ 411 private static boolean isTemplateDelimiter(char ch) { 412 return ch == '`'; 413 } 414 415 /** 416 * Test whether a char is valid JavaScript whitespace 417 * @param ch a char 418 * @return true if valid JavaScript whitespace 419 */ 420 protected boolean isWhitespace(final char ch) { 421 return Lexer.isJSWhitespace(ch); 422 } 423 424 /** 425 * Test whether a char is valid JavaScript end of line 426 * @param ch a char 427 * @return true if valid JavaScript end of line 428 */ 429 protected boolean isEOL(final char ch) { 430 return Lexer.isJSEOL(ch); 431 } 432 433 /** 434 * Skip over whitespace and detect end of line, adding EOL tokens if 435 * encountered. 436 * 437 * @param addEOL true if EOL tokens should be recorded. 438 */ 439 private void skipWhitespace(final boolean addEOL) { 440 while (isWhitespace(ch0)) { 441 if (isEOL(ch0)) { 442 skipEOL(addEOL); 443 } else { 444 skip(1); 445 } 446 } 447 } 448 449 /** 450 * Skip over comments. 451 * 452 * @return True if a comment. 453 */ 454 protected boolean skipComments() { 455 // Save the current position. 456 final int start = position; 457 458 if (ch0 == '/') { 459 // Is it a // comment. 460 if (ch1 == '/') { 461 // Skip over //. 462 skip(2); 463 464 boolean directiveComment = false; 465 if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) { 466 directiveComment = true; 467 } 468 469 // Scan for EOL. 470 while (!atEOF() && !isEOL(ch0)) { 471 skip(1); 472 } 473 // Did detect a comment. 474 add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start); 475 return true; 476 } else if (ch1 == '*') { 477 // Skip over /*. 478 skip(2); 479 // Scan for */. 480 while (!atEOF() && !(ch0 == '*' && ch1 == '/')) { 481 // If end of line handle else skip character. 482 if (isEOL(ch0)) { 483 skipEOL(true); 484 } else { 485 skip(1); 486 } 487 } 488 489 if (atEOF()) { 490 // TODO - Report closing */ missing in parser. 491 add(ERROR, start); 492 } else { 493 // Skip */. 494 skip(2); 495 } 496 497 // Did detect a comment. 498 add(COMMENT, start); 499 return true; 500 } 501 } else if (ch0 == '#') { 502 assert scripting; 503 // shell style comment 504 // Skip over #. 505 skip(1); 506 // Scan for EOL. 507 while (!atEOF() && !isEOL(ch0)) { 508 skip(1); 509 } 510 // Did detect a comment. 511 add(COMMENT, start); 512 return true; 513 } 514 515 // Not a comment. 516 return false; 517 } 518 519 /** 520 * Convert a regex token to a token object. 521 * 522 * @param start Position in source content. 523 * @param length Length of regex token. 524 * @return Regex token object. 525 */ 526 public RegexToken valueOfPattern(final int start, final int length) { 527 // Save the current position. 528 final int savePosition = position; 529 // Reset to beginning of content. 530 reset(start); 531 // Buffer for recording characters. 532 final StringBuilder sb = new StringBuilder(length); 533 534 // Skip /. 535 skip(1); 536 boolean inBrackets = false; 537 // Scan for closing /, stopping at end of line. 538 while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) { 539 // Skip over escaped character. 540 if (ch0 == '\\') { 541 sb.append(ch0); 542 sb.append(ch1); 543 skip(2); 544 } else { 545 if (ch0 == '[') { 546 inBrackets = true; 547 } else if (ch0 == ']') { 548 inBrackets = false; 549 } 550 551 // Skip literal character. 552 sb.append(ch0); 553 skip(1); 554 } 555 } 556 557 // Get pattern as string. 558 final String regex = sb.toString(); 559 560 // Skip /. 561 skip(1); 562 563 // Options as string. 564 final String options = source.getString(position, scanIdentifier()); 565 566 reset(savePosition); 567 568 // Compile the pattern. 569 return new RegexToken(regex, options); 570 } 571 572 /** 573 * Return true if the given token can be the beginning of a literal. 574 * 575 * @param token a token 576 * @return true if token can start a literal. 577 */ 578 public boolean canStartLiteral(final TokenType token) { 579 return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<')); 580 } 581 582 /** 583 * interface to receive line information for multi-line literals. 584 */ 585 protected interface LineInfoReceiver { 586 /** 587 * Receives line information 588 * @param line last line number 589 * @param linePosition position of last line 590 */ 591 public void lineInfo(int line, int linePosition); 592 } 593 594 /** 595 * Check whether the given token represents the beginning of a literal. If so scan 596 * the literal and return <tt>true</tt>, otherwise return false. 597 * 598 * @param token the token. 599 * @param startTokenType the token type. 600 * @param lir LineInfoReceiver that receives line info for multi-line string literals. 601 * @return True if a literal beginning with startToken was found and scanned. 602 */ 603 protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) { 604 // Check if it can be a literal. 605 if (!canStartLiteral(startTokenType)) { 606 return false; 607 } 608 // We break on ambiguous tokens so if we already moved on it can't be a literal. 609 if (stream.get(stream.last()) != token) { 610 return false; 611 } 612 613 // Record current position in case multiple heredocs start on this line - see JDK-8073653 614 final State state = saveState(); 615 // Rewind to token start position 616 reset(Token.descPosition(token)); 617 618 if (ch0 == '/') { 619 return scanRegEx(); 620 } else if (ch0 == '<') { 621 if (ch1 == '<') { 622 return scanHereString(lir, state); 623 } else if (Character.isJavaIdentifierStart(ch1)) { 624 return scanXMLLiteral(); 625 } 626 } 627 628 return false; 629 } 630 631 /** 632 * Scan over regex literal. 633 * 634 * @return True if a regex literal. 635 */ 636 private boolean scanRegEx() { 637 assert ch0 == '/'; 638 // Make sure it's not a comment. 639 if (ch1 != '/' && ch1 != '*') { 640 // Record beginning of literal. 641 final int start = position; 642 // Skip /. 643 skip(1); 644 boolean inBrackets = false; 645 646 // Scan for closing /, stopping at end of line. 647 while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) { 648 // Skip over escaped character. 649 if (ch0 == '\\') { 650 skip(1); 651 if (isEOL(ch0)) { 652 reset(start); 653 return false; 654 } 655 skip(1); 656 } else { 657 if (ch0 == '[') { 658 inBrackets = true; 659 } else if (ch0 == ']') { 660 inBrackets = false; 661 } 662 663 // Skip literal character. 664 skip(1); 665 } 666 } 667 668 // If regex literal. 669 if (ch0 == '/') { 670 // Skip /. 671 skip(1); 672 673 // Skip over options. 674 while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') { 675 skip(1); 676 } 677 678 // Add regex token. 679 add(REGEX, start); 680 // Regex literal detected. 681 return true; 682 } 683 684 // False start try again. 685 reset(start); 686 } 687 688 // Regex literal not detected. 689 return false; 690 } 691 692 /** 693 * Convert a digit to a integer. Can't use Character.digit since we are 694 * restricted to ASCII by the spec. 695 * 696 * @param ch Character to convert. 697 * @param base Numeric base. 698 * 699 * @return The converted digit or -1 if invalid. 700 */ 701 protected static int convertDigit(final char ch, final int base) { 702 int digit; 703 704 if ('0' <= ch && ch <= '9') { 705 digit = ch - '0'; 706 } else if ('A' <= ch && ch <= 'Z') { 707 digit = ch - 'A' + 10; 708 } else if ('a' <= ch && ch <= 'z') { 709 digit = ch - 'a' + 10; 710 } else { 711 return -1; 712 } 713 714 return digit < base ? digit : -1; 715 } 716 717 718 /** 719 * Get the value of a hexadecimal numeric sequence. 720 * 721 * @param length Number of digits. 722 * @param type Type of token to report against. 723 * @return Value of sequence or < 0 if no digits. 724 */ 725 private int hexSequence(final int length, final TokenType type) { 726 int value = 0; 727 728 for (int i = 0; i < length; i++) { 729 final int digit = convertDigit(ch0, 16); 730 731 if (digit == -1) { 732 error(Lexer.message("invalid.hex"), type, position, limit); 733 return i == 0 ? -1 : value; 734 } 735 736 value = digit | value << 4; 737 skip(1); 738 } 739 740 return value; 741 } 742 743 /** 744 * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255. 745 * 746 * @return Value of sequence. 747 */ 748 private int octalSequence() { 749 int value = 0; 750 751 for (int i = 0; i < 3; i++) { 752 final int digit = convertDigit(ch0, 8); 753 754 if (digit == -1) { 755 break; 756 } 757 value = digit | value << 3; 758 skip(1); 759 760 if (i == 1 && value >= 32) { 761 break; 762 } 763 } 764 return value; 765 } 766 767 /** 768 * Convert a string to a JavaScript identifier. 769 * 770 * @param start Position in source content. 771 * @param length Length of token. 772 * @return Ident string or null if an error. 773 */ 774 private String valueOfIdent(final int start, final int length) throws RuntimeException { 775 // Save the current position. 776 final int savePosition = position; 777 // End of scan. 778 final int end = start + length; 779 // Reset to beginning of content. 780 reset(start); 781 // Buffer for recording characters. 782 final StringBuilder sb = new StringBuilder(length); 783 784 // Scan until end of line or end of file. 785 while (!atEOF() && position < end && !isEOL(ch0)) { 786 // If escape character. 787 if (ch0 == '\\' && ch1 == 'u') { 788 skip(2); 789 final int ch = hexSequence(4, TokenType.IDENT); 790 if (isWhitespace((char)ch)) { 791 return null; 792 } 793 if (ch < 0) { 794 sb.append('\\'); 795 sb.append('u'); 796 } else { 797 sb.append((char)ch); 798 } 799 } else { 800 // Add regular character. 801 sb.append(ch0); 802 skip(1); 803 } 804 } 805 806 // Restore position. 807 reset(savePosition); 808 809 return sb.toString(); 810 } 811 812 /** 813 * Scan over and identifier or keyword. Handles identifiers containing 814 * encoded Unicode chars. 815 * 816 * Example: 817 * 818 * var \u0042 = 44; 819 */ 820 private void scanIdentifierOrKeyword() { 821 // Record beginning of identifier. 822 final int start = position; 823 // Scan identifier. 824 final int length = scanIdentifier(); 825 // Check to see if it is a keyword. 826 final TokenType type = TokenLookup.lookupKeyword(content, start, length); 827 if (type == FUNCTION && pauseOnFunctionBody) { 828 pauseOnNextLeftBrace = true; 829 } 830 // Add keyword or identifier token. 831 add(type, start); 832 } 833 834 /** 835 * Convert a string to a JavaScript string object. 836 * 837 * @param start Position in source content. 838 * @param length Length of token. 839 * @return JavaScript string object. 840 */ 841 private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException { 842 // Save the current position. 843 final int savePosition = position; 844 // Calculate the end position. 845 final int end = start + length; 846 // Reset to beginning of string. 847 reset(start); 848 849 // Buffer for recording characters. 850 final StringBuilder sb = new StringBuilder(length); 851 852 // Scan until end of string. 853 while (position < end) { 854 // If escape character. 855 if (ch0 == '\\') { 856 skip(1); 857 858 final char next = ch0; 859 final int afterSlash = position; 860 861 skip(1); 862 863 // Special characters. 864 switch (next) { 865 case '0': 866 case '1': 867 case '2': 868 case '3': 869 case '4': 870 case '5': 871 case '6': 872 case '7': { 873 if (strict) { 874 // "\0" itself is allowed in strict mode. Only other 'real' 875 // octal escape sequences are not allowed (eg. "\02", "\31"). 876 // See section 7.8.4 String literals production EscapeSequence 877 if (next != '0' || (ch0 >= '0' && ch0 <= '9')) { 878 error(Lexer.message("strict.no.octal"), STRING, position, limit); 879 } 880 } 881 reset(afterSlash); 882 // Octal sequence. 883 final int ch = octalSequence(); 884 885 if (ch < 0) { 886 sb.append('\\'); 887 sb.append('x'); 888 } else { 889 sb.append((char)ch); 890 } 891 break; 892 } 893 case 'n': 894 sb.append('\n'); 895 break; 896 case 't': 897 sb.append('\t'); 898 break; 899 case 'b': 900 sb.append('\b'); 901 break; 902 case 'f': 903 sb.append('\f'); 904 break; 905 case 'r': 906 sb.append('\r'); 907 break; 908 case '\'': 909 sb.append('\''); 910 break; 911 case '\"': 912 sb.append('\"'); 913 break; 914 case '\\': 915 sb.append('\\'); 916 break; 917 case '\r': // CR | CRLF 918 if (ch0 == '\n') { 919 skip(1); 920 } 921 // fall through 922 case '\n': // LF 923 case '\u2028': // LS 924 case '\u2029': // PS 925 // continue on the next line, slash-return continues string 926 // literal 927 break; 928 case 'x': { 929 // Hex sequence. 930 final int ch = hexSequence(2, STRING); 931 932 if (ch < 0) { 933 sb.append('\\'); 934 sb.append('x'); 935 } else { 936 sb.append((char)ch); 937 } 938 } 939 break; 940 case 'u': { 941 // Unicode sequence. 942 final int ch = hexSequence(4, STRING); 943 944 if (ch < 0) { 945 sb.append('\\'); 946 sb.append('u'); 947 } else { 948 sb.append((char)ch); 949 } 950 } 951 break; 952 case 'v': 953 sb.append('\u000B'); 954 break; 955 // All other characters. 956 default: 957 sb.append(next); 958 break; 959 } 960 } else if (ch0 == '\r') { 961 // Convert CR-LF or CR to LF line terminator. 962 sb.append('\n'); 963 skip(ch1 == '\n' ? 2 : 1); 964 } else { 965 // Add regular character. 966 sb.append(ch0); 967 skip(1); 968 } 969 } 970 971 // Restore position. 972 reset(savePosition); 973 974 return sb.toString(); 975 } 976 977 /** 978 * Scan over a string literal. 979 * @param add true if we are not just scanning but should actually modify the token stream 980 */ 981 protected void scanString(final boolean add) { 982 // Type of string. 983 TokenType type = STRING; 984 // Record starting quote. 985 final char quote = ch0; 986 // Skip over quote. 987 skip(1); 988 989 // Record beginning of string content. 990 final State stringState = saveState(); 991 992 // Scan until close quote or end of line. 993 while (!atEOF() && ch0 != quote && !isEOL(ch0)) { 994 // Skip over escaped character. 995 if (ch0 == '\\') { 996 type = ESCSTRING; 997 skip(1); 998 if (! isEscapeCharacter(ch0)) { 999 error(Lexer.message("invalid.escape.char"), STRING, position, limit); 1000 } 1001 if (isEOL(ch0)) { 1002 // Multiline string literal 1003 skipEOL(false); 1004 continue; 1005 } 1006 } 1007 // Skip literal character. 1008 skip(1); 1009 } 1010 1011 // If close quote. 1012 if (ch0 == quote) { 1013 // Skip close quote. 1014 skip(1); 1015 } else { 1016 error(Lexer.message("missing.close.quote"), STRING, position, limit); 1017 } 1018 1019 // If not just scanning. 1020 if (add) { 1021 // Record end of string. 1022 stringState.setLimit(position - 1); 1023 1024 if (scripting && !stringState.isEmpty()) { 1025 switch (quote) { 1026 case '`': 1027 // Mark the beginning of an exec string. 1028 add(EXECSTRING, stringState.position, stringState.limit); 1029 // Frame edit string with left brace. 1030 add(LBRACE, stringState.position, stringState.position); 1031 // Process edit string. 1032 editString(type, stringState); 1033 // Frame edit string with right brace. 1034 add(RBRACE, stringState.limit, stringState.limit); 1035 break; 1036 case '"': 1037 // Only edit double quoted strings. 1038 editString(type, stringState); 1039 break; 1040 case '\'': 1041 // Add string token without editing. 1042 add(type, stringState.position, stringState.limit); 1043 break; 1044 default: 1045 break; 1046 } 1047 } else { 1048 /// Add string token without editing. 1049 add(type, stringState.position, stringState.limit); 1050 } 1051 } 1052 } 1053 1054 /** 1055 * Scan over a template string literal. 1056 */ 1057 private void scanTemplate() { 1058 assert ch0 == '`'; 1059 TokenType type = TEMPLATE; 1060 1061 // Skip over quote and record beginning of string content. 1062 skip(1); 1063 State stringState = saveState(); 1064 1065 // Scan until close quote 1066 while (!atEOF()) { 1067 // Skip over escaped character. 1068 if (ch0 == '`') { 1069 skip(1); 1070 // Record end of string. 1071 stringState.setLimit(position - 1); 1072 add(type == TEMPLATE ? type : TEMPLATE_TAIL, stringState.position, stringState.limit); 1073 return; 1074 } else if (ch0 == '$' && ch1 == '{') { 1075 skip(2); 1076 stringState.setLimit(position - 2); 1077 add(type == TEMPLATE ? TEMPLATE_HEAD : type, stringState.position, stringState.limit); 1078 1079 // scan to RBRACE 1080 Lexer expressionLexer = new Lexer(this, saveState()); 1081 expressionLexer.templateExpressionOpenBraces = 1; 1082 expressionLexer.lexify(); 1083 restoreState(expressionLexer.saveState()); 1084 1085 // scan next middle or tail of the template literal 1086 assert ch0 == '}'; 1087 type = TEMPLATE_MIDDLE; 1088 1089 // Skip over rbrace and record beginning of string content. 1090 skip(1); 1091 stringState = saveState(); 1092 1093 continue; 1094 } else if (ch0 == '\\') { 1095 skip(1); 1096 // EscapeSequence 1097 if (!isEscapeCharacter(ch0)) { 1098 error(Lexer.message("invalid.escape.char"), TEMPLATE, position, limit); 1099 } 1100 if (isEOL(ch0)) { 1101 // LineContinuation 1102 skipEOL(false); 1103 continue; 1104 } 1105 } else if (isEOL(ch0)) { 1106 // LineTerminatorSequence 1107 skipEOL(false); 1108 continue; 1109 } 1110 1111 // Skip literal character. 1112 skip(1); 1113 } 1114 1115 error(Lexer.message("missing.close.quote"), TEMPLATE, position, limit); 1116 } 1117 1118 /** 1119 * Is the given character a valid escape char after "\" ? 1120 * 1121 * @param ch character to be checked 1122 * @return if the given character is valid after "\" 1123 */ 1124 protected boolean isEscapeCharacter(final char ch) { 1125 return true; 1126 } 1127 1128 /** 1129 * Convert string to number. 1130 * 1131 * @param valueString String to convert. 1132 * @param radix Numeric base. 1133 * @return Converted number. 1134 */ 1135 private static Number valueOf(final String valueString, final int radix) throws NumberFormatException { 1136 try { 1137 return Integer.parseInt(valueString, radix); 1138 } catch (final NumberFormatException e) { 1139 if (radix == 10) { 1140 return Double.valueOf(valueString); 1141 } 1142 1143 double value = 0.0; 1144 1145 for (int i = 0; i < valueString.length(); i++) { 1146 final char ch = valueString.charAt(i); 1147 // Preverified, should always be a valid digit. 1148 final int digit = convertDigit(ch, radix); 1149 value *= radix; 1150 value += digit; 1151 } 1152 1153 return value; 1154 } 1155 } 1156 1157 /** 1158 * Scan a number. 1159 */ 1160 protected void scanNumber() { 1161 // Record beginning of number. 1162 final int start = position; 1163 // Assume value is a decimal. 1164 TokenType type = DECIMAL; 1165 1166 // First digit of number. 1167 int digit = convertDigit(ch0, 10); 1168 1169 // If number begins with 0x. 1170 if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) { 1171 // Skip over 0xN. 1172 skip(3); 1173 // Skip over remaining digits. 1174 while (convertDigit(ch0, 16) != -1) { 1175 skip(1); 1176 } 1177 1178 type = HEXADECIMAL; 1179 } else if (digit == 0 && es6 && (ch1 == 'o' || ch1 == 'O') && convertDigit(ch2, 8) != -1) { 1180 // Skip over 0oN. 1181 skip(3); 1182 // Skip over remaining digits. 1183 while (convertDigit(ch0, 8) != -1) { 1184 skip(1); 1185 } 1186 1187 type = OCTAL; 1188 } else if (digit == 0 && es6 && (ch1 == 'b' || ch1 == 'B') && convertDigit(ch2, 2) != -1) { 1189 // Skip over 0bN. 1190 skip(3); 1191 // Skip over remaining digits. 1192 while (convertDigit(ch0, 2) != -1) { 1193 skip(1); 1194 } 1195 1196 type = BINARY_NUMBER; 1197 } else { 1198 // Check for possible octal constant. 1199 boolean octal = digit == 0; 1200 // Skip first digit if not leading '.'. 1201 if (digit != -1) { 1202 skip(1); 1203 } 1204 1205 // Skip remaining digits. 1206 while ((digit = convertDigit(ch0, 10)) != -1) { 1207 // Check octal only digits. 1208 octal = octal && digit < 8; 1209 // Skip digit. 1210 skip(1); 1211 } 1212 1213 if (octal && position - start > 1) { 1214 type = OCTAL_LEGACY; 1215 } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') { 1216 // Must be a double. 1217 if (ch0 == '.') { 1218 // Skip period. 1219 skip(1); 1220 // Skip mantissa. 1221 while (convertDigit(ch0, 10) != -1) { 1222 skip(1); 1223 } 1224 } 1225 1226 // Detect exponent. 1227 if (ch0 == 'E' || ch0 == 'e') { 1228 // Skip E. 1229 skip(1); 1230 // Detect and skip exponent sign. 1231 if (ch0 == '+' || ch0 == '-') { 1232 skip(1); 1233 } 1234 // Skip exponent. 1235 while (convertDigit(ch0, 10) != -1) { 1236 skip(1); 1237 } 1238 } 1239 1240 type = FLOATING; 1241 } 1242 } 1243 1244 if (Character.isJavaIdentifierStart(ch0)) { 1245 error(Lexer.message("missing.space.after.number"), type, position, 1); 1246 } 1247 1248 // Add number token. 1249 add(type, start); 1250 } 1251 1252 /** 1253 * Convert a regex token to a token object. 1254 * 1255 * @param start Position in source content. 1256 * @param length Length of regex token. 1257 * @return Regex token object. 1258 */ 1259 XMLToken valueOfXML(final int start, final int length) { 1260 return new XMLToken(source.getString(start, length)); 1261 } 1262 1263 /** 1264 * Scan over a XML token. 1265 * 1266 * @return TRUE if is an XML literal. 1267 */ 1268 private boolean scanXMLLiteral() { 1269 assert ch0 == '<' && Character.isJavaIdentifierStart(ch1); 1270 if (XML_LITERALS) { 1271 // Record beginning of xml expression. 1272 final int start = position; 1273 1274 int openCount = 0; 1275 1276 do { 1277 if (ch0 == '<') { 1278 if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) { 1279 skip(3); 1280 openCount--; 1281 } else if (Character.isJavaIdentifierStart(ch1)) { 1282 skip(2); 1283 openCount++; 1284 } else if (ch1 == '?') { 1285 skip(2); 1286 } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') { 1287 skip(4); 1288 } else { 1289 reset(start); 1290 return false; 1291 } 1292 1293 while (!atEOF() && ch0 != '>') { 1294 if (ch0 == '/' && ch1 == '>') { 1295 openCount--; 1296 skip(1); 1297 break; 1298 } else if (ch0 == '\"' || ch0 == '\'') { 1299 scanString(false); 1300 } else { 1301 skip(1); 1302 } 1303 } 1304 1305 if (ch0 != '>') { 1306 reset(start); 1307 return false; 1308 } 1309 1310 skip(1); 1311 } else if (atEOF()) { 1312 reset(start); 1313 return false; 1314 } else { 1315 skip(1); 1316 } 1317 } while (openCount > 0); 1318 1319 add(XML, start); 1320 return true; 1321 } 1322 1323 return false; 1324 } 1325 1326 /** 1327 * Scan over identifier characters. 1328 * 1329 * @return Length of identifier or zero if none found. 1330 */ 1331 private int scanIdentifier() { 1332 final int start = position; 1333 1334 // Make sure first character is valid start character. 1335 if (ch0 == '\\' && ch1 == 'u') { 1336 skip(2); 1337 final int ch = hexSequence(4, TokenType.IDENT); 1338 1339 if (!Character.isJavaIdentifierStart(ch)) { 1340 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position); 1341 } 1342 } else if (!Character.isJavaIdentifierStart(ch0)) { 1343 // Not an identifier. 1344 return 0; 1345 } 1346 1347 // Make sure remaining characters are valid part characters. 1348 while (!atEOF()) { 1349 if (ch0 == '\\' && ch1 == 'u') { 1350 skip(2); 1351 final int ch = hexSequence(4, TokenType.IDENT); 1352 1353 if (!Character.isJavaIdentifierPart(ch)) { 1354 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position); 1355 } 1356 } else if (Character.isJavaIdentifierPart(ch0)) { 1357 skip(1); 1358 } else { 1359 break; 1360 } 1361 } 1362 1363 // Length of identifier sequence. 1364 return position - start; 1365 } 1366 1367 /** 1368 * Compare two identifiers (in content) for equality. 1369 * 1370 * @param aStart Start of first identifier. 1371 * @param aLength Length of first identifier. 1372 * @param bStart Start of second identifier. 1373 * @param bLength Length of second identifier. 1374 * @return True if equal. 1375 */ 1376 private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) { 1377 if (aLength == bLength) { 1378 for (int i = 0; i < aLength; i++) { 1379 if (content[aStart + i] != content[bStart + i]) { 1380 return false; 1381 } 1382 } 1383 1384 return true; 1385 } 1386 1387 return false; 1388 } 1389 1390 /** 1391 * Detect if a line starts with a marker identifier. 1392 * 1393 * @param identStart Start of identifier. 1394 * @param identLength Length of identifier. 1395 * @return True if detected. 1396 */ 1397 private boolean hasHereMarker(final int identStart, final int identLength) { 1398 // Skip any whitespace. 1399 skipWhitespace(false); 1400 1401 return identifierEqual(identStart, identLength, position, scanIdentifier()); 1402 } 1403 1404 /** 1405 * Lexer to service edit strings. 1406 */ 1407 private static class EditStringLexer extends Lexer { 1408 /** Type of string literals to emit. */ 1409 final TokenType stringType; 1410 1411 /* 1412 * Constructor. 1413 */ 1414 1415 EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) { 1416 super(lexer, stringState); 1417 1418 this.stringType = stringType; 1419 } 1420 1421 /** 1422 * Lexify the contents of the string. 1423 */ 1424 @Override 1425 public void lexify() { 1426 // Record start of string position. 1427 int stringStart = position; 1428 // Indicate that the priming first string has not been emitted. 1429 boolean primed = false; 1430 1431 while (true) { 1432 // Detect end of content. 1433 if (atEOF()) { 1434 break; 1435 } 1436 1437 // Honour escapes (should be well formed.) 1438 if (ch0 == '\\' && stringType == ESCSTRING) { 1439 skip(2); 1440 1441 continue; 1442 } 1443 1444 // If start of expression. 1445 if (ch0 == '$' && ch1 == '{') { 1446 if (!primed || stringStart != position) { 1447 if (primed) { 1448 add(ADD, stringStart, stringStart + 1); 1449 } 1450 1451 add(stringType, stringStart, position); 1452 primed = true; 1453 } 1454 1455 // Skip ${ 1456 skip(2); 1457 1458 // Save expression state. 1459 final State expressionState = saveState(); 1460 1461 // Start with one open brace. 1462 int braceCount = 1; 1463 1464 // Scan for the rest of the string. 1465 while (!atEOF()) { 1466 // If closing brace. 1467 if (ch0 == '}') { 1468 // Break only only if matching brace. 1469 if (--braceCount == 0) { 1470 break; 1471 } 1472 } else if (ch0 == '{') { 1473 // Bump up the brace count. 1474 braceCount++; 1475 } 1476 1477 // Skip to next character. 1478 skip(1); 1479 } 1480 1481 // If braces don't match then report an error. 1482 if (braceCount != 0) { 1483 error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1); 1484 } 1485 1486 // Mark end of expression. 1487 expressionState.setLimit(position); 1488 // Skip closing brace. 1489 skip(1); 1490 1491 // Start next string. 1492 stringStart = position; 1493 1494 // Concatenate expression. 1495 add(ADD, expressionState.position, expressionState.position + 1); 1496 add(LPAREN, expressionState.position, expressionState.position + 1); 1497 1498 // Scan expression. 1499 final Lexer lexer = new Lexer(this, expressionState); 1500 lexer.lexify(); 1501 1502 // Close out expression parenthesis. 1503 add(RPAREN, position - 1, position); 1504 1505 continue; 1506 } 1507 1508 // Next character in string. 1509 skip(1); 1510 } 1511 1512 // If there is any unemitted string portion. 1513 if (stringStart != limit) { 1514 // Concatenate remaining string. 1515 if (primed) { 1516 add(ADD, stringStart, 1); 1517 } 1518 1519 add(stringType, stringStart, limit); 1520 } 1521 } 1522 1523 } 1524 1525 /** 1526 * Edit string for nested expressions. 1527 * 1528 * @param stringType Type of string literals to emit. 1529 * @param stringState State of lexer at start of string. 1530 */ 1531 private void editString(final TokenType stringType, final State stringState) { 1532 // Use special lexer to scan string. 1533 final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState); 1534 lexer.lexify(); 1535 1536 // Need to keep lexer informed. 1537 last = stringType; 1538 } 1539 1540 /** 1541 * Scan over a here string. 1542 * 1543 * @return TRUE if is a here string. 1544 */ 1545 private boolean scanHereString(final LineInfoReceiver lir, final State oldState) { 1546 assert ch0 == '<' && ch1 == '<'; 1547 if (scripting) { 1548 // Record beginning of here string. 1549 final State saved = saveState(); 1550 1551 // << or <<< 1552 final boolean excludeLastEOL = ch2 != '<'; 1553 1554 if (excludeLastEOL) { 1555 skip(2); 1556 } else { 1557 skip(3); 1558 } 1559 1560 // Scan identifier. It might be quoted, indicating that no string editing should take place. 1561 final char quoteChar = ch0; 1562 final boolean noStringEditing = quoteChar == '"' || quoteChar == '\''; 1563 if (noStringEditing) { 1564 skip(1); 1565 } 1566 final int identStart = position; 1567 final int identLength = scanIdentifier(); 1568 if (noStringEditing) { 1569 if (ch0 != quoteChar) { 1570 error(Lexer.message("here.non.matching.delimiter"), last, position, position); 1571 restoreState(saved); 1572 return false; 1573 } 1574 skip(1); 1575 } 1576 1577 // Check for identifier. 1578 if (identLength == 0) { 1579 // Treat as shift. 1580 restoreState(saved); 1581 1582 return false; 1583 } 1584 1585 // Record rest of line. 1586 final State restState = saveState(); 1587 // keep line number updated 1588 int lastLine = line; 1589 1590 skipLine(false); 1591 lastLine++; 1592 int lastLinePosition = position; 1593 restState.setLimit(position); 1594 1595 if (oldState.position > position) { 1596 restoreState(oldState); 1597 skipLine(false); 1598 } 1599 1600 // Record beginning of string. 1601 final State stringState = saveState(); 1602 int stringEnd = position; 1603 1604 // Hunt down marker. 1605 while (!atEOF()) { 1606 // Skip any whitespace. 1607 skipWhitespace(false); 1608 1609 if (hasHereMarker(identStart, identLength)) { 1610 break; 1611 } 1612 1613 skipLine(false); 1614 lastLine++; 1615 lastLinePosition = position; 1616 stringEnd = position; 1617 } 1618 1619 // notify last line information 1620 lir.lineInfo(lastLine, lastLinePosition); 1621 1622 // Record end of string. 1623 stringState.setLimit(stringEnd); 1624 1625 // If marker is missing. 1626 if (stringState.isEmpty() || atEOF()) { 1627 error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position); 1628 restoreState(saved); 1629 1630 return false; 1631 } 1632 1633 // Remove last end of line if specified. 1634 if (excludeLastEOL) { 1635 // Handles \n. 1636 if (content[stringEnd - 1] == '\n') { 1637 stringEnd--; 1638 } 1639 1640 // Handles \r and \r\n. 1641 if (content[stringEnd - 1] == '\r') { 1642 stringEnd--; 1643 } 1644 1645 // Update end of string. 1646 stringState.setLimit(stringEnd); 1647 } 1648 1649 // Edit string if appropriate. 1650 if (!noStringEditing && !stringState.isEmpty()) { 1651 editString(STRING, stringState); 1652 } else { 1653 // Add here string. 1654 add(STRING, stringState.position, stringState.limit); 1655 } 1656 1657 // Scan rest of original line. 1658 final Lexer restLexer = new Lexer(this, restState); 1659 1660 restLexer.lexify(); 1661 1662 return true; 1663 } 1664 1665 return false; 1666 } 1667 1668 /** 1669 * Breaks source content down into lex units, adding tokens to the token 1670 * stream. The routine scans until the stream buffer is full. Can be called 1671 * repeatedly until EOF is detected. 1672 */ 1673 public void lexify() { 1674 while (!stream.isFull() || nested) { 1675 // Skip over whitespace. 1676 skipWhitespace(true); 1677 1678 // Detect end of file. 1679 if (atEOF()) { 1680 if (!nested) { 1681 // Add an EOF token at the end. 1682 add(EOF, position); 1683 } 1684 1685 break; 1686 } 1687 1688 // Check for comments. Note that we don't scan for regexp and other literals here as 1689 // we may not have enough context to distinguish them from similar looking operators. 1690 // Instead we break on ambiguous operators below and let the parser decide. 1691 if (ch0 == '/' && skipComments()) { 1692 continue; 1693 } 1694 1695 if (scripting && ch0 == '#' && skipComments()) { 1696 continue; 1697 } 1698 1699 // TokenType for lookup of delimiter or operator. 1700 TokenType type; 1701 1702 if (ch0 == '.' && convertDigit(ch1, 10) != -1) { 1703 // '.' followed by digit. 1704 // Scan and add a number. 1705 scanNumber(); 1706 } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) { 1707 if (templateExpressionOpenBraces > 0) { 1708 if (type == LBRACE) { 1709 templateExpressionOpenBraces++; 1710 } else if (type == RBRACE) { 1711 if (--templateExpressionOpenBraces == 0) { 1712 break; 1713 } 1714 } 1715 } 1716 1717 // Get the number of characters in the token. 1718 final int typeLength = type.getLength(); 1719 // Skip that many characters. 1720 skip(typeLength); 1721 // Add operator token. 1722 add(type, position - typeLength); 1723 // Some operator tokens also mark the beginning of regexp, XML, or here string literals. 1724 // We break to let the parser decide what it is. 1725 if (canStartLiteral(type)) { 1726 break; 1727 } else if (type == LBRACE && pauseOnNextLeftBrace) { 1728 pauseOnNextLeftBrace = false; 1729 break; 1730 } 1731 } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') { 1732 // Scan and add identifier or keyword. 1733 scanIdentifierOrKeyword(); 1734 } else if (isStringDelimiter(ch0)) { 1735 // Scan and add a string. 1736 scanString(true); 1737 } else if (Character.isDigit(ch0)) { 1738 // Scan and add a number. 1739 scanNumber(); 1740 } else if (isTemplateDelimiter(ch0) && es6) { 1741 // Scan and add template in ES6 mode. 1742 scanTemplate(); 1743 } else if (isTemplateDelimiter(ch0) && scripting) { 1744 // Scan and add an exec string ('`') in scripting mode. 1745 scanString(true); 1746 } else { 1747 // Don't recognize this character. 1748 skip(1); 1749 add(ERROR, position - 1); 1750 } 1751 } 1752 } 1753 1754 /** 1755 * Return value of token given its token descriptor. 1756 * 1757 * @param token Token descriptor. 1758 * @return JavaScript value. 1759 */ 1760 Object getValueOf(final long token, final boolean strict) { 1761 final int start = Token.descPosition(token); 1762 final int len = Token.descLength(token); 1763 1764 switch (Token.descType(token)) { 1765 case DECIMAL: 1766 return Lexer.valueOf(source.getString(start, len), 10); // number 1767 case HEXADECIMAL: 1768 return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number 1769 case OCTAL_LEGACY: 1770 return Lexer.valueOf(source.getString(start, len), 8); // number 1771 case OCTAL: 1772 return Lexer.valueOf(source.getString(start + 2, len - 2), 8); // number 1773 case BINARY_NUMBER: 1774 return Lexer.valueOf(source.getString(start + 2, len - 2), 2); // number 1775 case FLOATING: 1776 final String str = source.getString(start, len); 1777 final double value = Double.valueOf(str); 1778 if (str.indexOf('.') != -1) { 1779 return value; //number 1780 } 1781 //anything without an explicit decimal point is still subject to a 1782 //"representable as int or long" check. Then the programmer does not 1783 //explicitly code something as a double. For example new Color(int, int, int) 1784 //and new Color(float, float, float) will get ambiguous for cases like 1785 //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point. 1786 //yet we don't want e.g. 1e6 to be a double unnecessarily 1787 if (JSType.isStrictlyRepresentableAsInt(value)) { 1788 return (int)value; 1789 } 1790 return value; 1791 case STRING: 1792 return source.getString(start, len); // String 1793 case ESCSTRING: 1794 return valueOfString(start, len, strict); // String 1795 case IDENT: 1796 return valueOfIdent(start, len); // String 1797 case REGEX: 1798 return valueOfPattern(start, len); // RegexToken::LexerToken 1799 case TEMPLATE: 1800 case TEMPLATE_HEAD: 1801 case TEMPLATE_MIDDLE: 1802 case TEMPLATE_TAIL: 1803 return valueOfString(start, len, true); // String 1804 case XML: 1805 return valueOfXML(start, len); // XMLToken::LexerToken 1806 case DIRECTIVE_COMMENT: 1807 return source.getString(start, len); 1808 default: 1809 break; 1810 } 1811 1812 return null; 1813 } 1814 1815 /** 1816 * Get the raw string value of a template literal string part. 1817 * 1818 * @param token template string token 1819 * @return raw string 1820 */ 1821 public String valueOfRawString(final long token) { 1822 final int start = Token.descPosition(token); 1823 final int length = Token.descLength(token); 1824 1825 // Save the current position. 1826 final int savePosition = position; 1827 // Calculate the end position. 1828 final int end = start + length; 1829 // Reset to beginning of string. 1830 reset(start); 1831 1832 // Buffer for recording characters. 1833 final StringBuilder sb = new StringBuilder(length); 1834 1835 // Scan until end of string. 1836 while (position < end) { 1837 if (ch0 == '\r') { 1838 // Convert CR-LF or CR to LF line terminator. 1839 sb.append('\n'); 1840 skip(ch1 == '\n' ? 2 : 1); 1841 } else { 1842 // Add regular character. 1843 sb.append(ch0); 1844 skip(1); 1845 } 1846 } 1847 1848 // Restore position. 1849 reset(savePosition); 1850 1851 return sb.toString(); 1852 } 1853 1854 /** 1855 * Get the correctly localized error message for a given message id format arguments 1856 * @param msgId message id 1857 * @param args format arguments 1858 * @return message 1859 */ 1860 protected static String message(final String msgId, final String... args) { 1861 return ECMAErrors.getMessage("lexer.error." + msgId, args); 1862 } 1863 1864 /** 1865 * Generate a runtime exception 1866 * 1867 * @param message error message 1868 * @param type token type 1869 * @param start start position of lexed error 1870 * @param length length of lexed error 1871 * @throws ParserException unconditionally 1872 */ 1873 protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException { 1874 final long token = Token.toDesc(type, start, length); 1875 final int pos = Token.descPosition(token); 1876 final int lineNum = source.getLine(pos); 1877 final int columnNum = source.getColumn(pos); 1878 final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token); 1879 throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token); 1880 } 1881 1882 /** 1883 * Helper class for Lexer tokens, e.g XML or RegExp tokens. 1884 * This is the abstract superclass 1885 */ 1886 public static abstract class LexerToken implements Serializable { 1887 private static final long serialVersionUID = 1L; 1888 1889 private final String expression; 1890 1891 /** 1892 * Constructor 1893 * @param expression token expression 1894 */ 1895 protected LexerToken(final String expression) { 1896 this.expression = expression; 1897 } 1898 1899 /** 1900 * Get the expression 1901 * @return expression 1902 */ 1903 public String getExpression() { 1904 return expression; 1905 } 1906 } 1907 1908 /** 1909 * Temporary container for regular expressions. 1910 */ 1911 public static class RegexToken extends LexerToken { 1912 private static final long serialVersionUID = 1L; 1913 1914 /** Options. */ 1915 private final String options; 1916 1917 /** 1918 * Constructor. 1919 * 1920 * @param expression regexp expression 1921 * @param options regexp options 1922 */ 1923 public RegexToken(final String expression, final String options) { 1924 super(expression); 1925 this.options = options; 1926 } 1927 1928 /** 1929 * Get regexp options 1930 * @return options 1931 */ 1932 public String getOptions() { 1933 return options; 1934 } 1935 1936 @Override 1937 public String toString() { 1938 return '/' + getExpression() + '/' + options; 1939 } 1940 } 1941 1942 /** 1943 * Temporary container for XML expression. 1944 */ 1945 public static class XMLToken extends LexerToken { 1946 private static final long serialVersionUID = 1L; 1947 1948 /** 1949 * Constructor. 1950 * 1951 * @param expression XML expression 1952 */ 1953 public XMLToken(final String expression) { 1954 super(expression); 1955 } 1956 } 1957} 1958