Lexer.java revision 953:221a84ef44c0
1/* 2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package jdk.nashorn.internal.parser; 27 28import static jdk.nashorn.internal.parser.TokenType.ADD; 29import static jdk.nashorn.internal.parser.TokenType.COMMENT; 30import static jdk.nashorn.internal.parser.TokenType.DECIMAL; 31import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT; 32import static jdk.nashorn.internal.parser.TokenType.EOF; 33import static jdk.nashorn.internal.parser.TokenType.EOL; 34import static jdk.nashorn.internal.parser.TokenType.ERROR; 35import static jdk.nashorn.internal.parser.TokenType.ESCSTRING; 36import static jdk.nashorn.internal.parser.TokenType.EXECSTRING; 37import static jdk.nashorn.internal.parser.TokenType.FLOATING; 38import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL; 39import static jdk.nashorn.internal.parser.TokenType.LBRACE; 40import static jdk.nashorn.internal.parser.TokenType.LPAREN; 41import static jdk.nashorn.internal.parser.TokenType.OCTAL; 42import static jdk.nashorn.internal.parser.TokenType.RBRACE; 43import static jdk.nashorn.internal.parser.TokenType.REGEX; 44import static jdk.nashorn.internal.parser.TokenType.RPAREN; 45import static jdk.nashorn.internal.parser.TokenType.STRING; 46import static jdk.nashorn.internal.parser.TokenType.XML; 47 48import jdk.nashorn.internal.runtime.ECMAErrors; 49import jdk.nashorn.internal.runtime.ErrorManager; 50import jdk.nashorn.internal.runtime.JSErrorType; 51import jdk.nashorn.internal.runtime.JSType; 52import jdk.nashorn.internal.runtime.ParserException; 53import jdk.nashorn.internal.runtime.Source; 54import jdk.nashorn.internal.runtime.options.Options; 55 56/** 57 * Responsible for converting source content into a stream of tokens. 58 * 59 */ 60@SuppressWarnings("fallthrough") 61public class Lexer extends Scanner { 62 private static final long MIN_INT_L = Integer.MIN_VALUE; 63 private static final long MAX_INT_L = Integer.MAX_VALUE; 64 65 private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals"); 66 67 /** Content source. */ 68 private final Source source; 69 70 /** Buffered stream for tokens. */ 71 private final TokenStream stream; 72 73 /** True if here and edit strings are supported. */ 74 private final boolean scripting; 75 76 /** True if a nested scan. (scan to completion, no EOF.) */ 77 private final boolean nested; 78 79 /** Pending new line number and position. */ 80 int pendingLine; 81 82 /** Position of last EOL + 1. */ 83 private int linePosition; 84 85 /** Type of last token added. */ 86 private TokenType last; 87 88 private static final String SPACETAB = " \t"; // ASCII space and tab 89 private static final String LFCR = "\n\r"; // line feed and carriage return (ctrl-m) 90 91 private static final String JSON_WHITESPACE_EOL = LFCR; 92 private static final String JSON_WHITESPACE = SPACETAB + LFCR; 93 94 private static final String JAVASCRIPT_WHITESPACE_EOL = 95 LFCR + 96 "\u2028" + // line separator 97 "\u2029" // paragraph separator 98 ; 99 private static final String JAVASCRIPT_WHITESPACE = 100 SPACETAB + 101 JAVASCRIPT_WHITESPACE_EOL + 102 "\u000b" + // tabulation line 103 "\u000c" + // ff (ctrl-l) 104 "\u00a0" + // Latin-1 space 105 "\u1680" + // Ogham space mark 106 "\u180e" + // separator, Mongolian vowel 107 "\u2000" + // en quad 108 "\u2001" + // em quad 109 "\u2002" + // en space 110 "\u2003" + // em space 111 "\u2004" + // three-per-em space 112 "\u2005" + // four-per-em space 113 "\u2006" + // six-per-em space 114 "\u2007" + // figure space 115 "\u2008" + // punctuation space 116 "\u2009" + // thin space 117 "\u200a" + // hair space 118 "\u202f" + // narrow no-break space 119 "\u205f" + // medium mathematical space 120 "\u3000" + // ideographic space 121 "\ufeff" // byte order mark 122 ; 123 124 private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP = 125 "\\u000a" + // line feed 126 "\\u000d" + // carriage return (ctrl-m) 127 "\\u2028" + // line separator 128 "\\u2029" + // paragraph separator 129 "\\u0009" + // tab 130 "\\u0020" + // ASCII space 131 "\\u000b" + // tabulation line 132 "\\u000c" + // ff (ctrl-l) 133 "\\u00a0" + // Latin-1 space 134 "\\u1680" + // Ogham space mark 135 "\\u180e" + // separator, Mongolian vowel 136 "\\u2000" + // en quad 137 "\\u2001" + // em quad 138 "\\u2002" + // en space 139 "\\u2003" + // em space 140 "\\u2004" + // three-per-em space 141 "\\u2005" + // four-per-em space 142 "\\u2006" + // six-per-em space 143 "\\u2007" + // figure space 144 "\\u2008" + // punctuation space 145 "\\u2009" + // thin space 146 "\\u200a" + // hair space 147 "\\u202f" + // narrow no-break space 148 "\\u205f" + // medium mathematical space 149 "\\u3000" + // ideographic space 150 "\\ufeff" // byte order mark 151 ; 152 153 static String unicodeEscape(final char ch) { 154 final StringBuilder sb = new StringBuilder(); 155 156 sb.append("\\u"); 157 158 final String hex = Integer.toHexString(ch); 159 for (int i = hex.length(); i < 4; i++) { 160 sb.append('0'); 161 } 162 sb.append(hex); 163 164 return sb.toString(); 165 } 166 167 /** 168 * Constructor 169 * 170 * @param source the source 171 * @param stream the token stream to lex 172 */ 173 public Lexer(final Source source, final TokenStream stream) { 174 this(source, stream, false); 175 } 176 177 /** 178 * Constructor 179 * 180 * @param source the source 181 * @param stream the token stream to lex 182 * @param scripting are we in scripting mode 183 */ 184 public Lexer(final Source source, final TokenStream stream, final boolean scripting) { 185 this(source, 0, source.getLength(), stream, scripting); 186 } 187 188 /** 189 * Contructor 190 * 191 * @param source the source 192 * @param start start position in source from which to start lexing 193 * @param len length of source segment to lex 194 * @param stream token stream to lex 195 * @param scripting are we in scripting mode 196 */ 197 198 public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting) { 199 super(source.getContent(), 1, start, len); 200 this.source = source; 201 this.stream = stream; 202 this.scripting = scripting; 203 this.nested = false; 204 this.pendingLine = 1; 205 this.last = EOL; 206 } 207 208 private Lexer(final Lexer lexer, final State state) { 209 super(lexer, state); 210 211 source = lexer.source; 212 stream = lexer.stream; 213 scripting = lexer.scripting; 214 nested = true; 215 216 pendingLine = state.pendingLine; 217 linePosition = state.linePosition; 218 last = EOL; 219 } 220 221 static class State extends Scanner.State { 222 /** Pending new line number and position. */ 223 public final int pendingLine; 224 225 /** Position of last EOL + 1. */ 226 public final int linePosition; 227 228 /** Type of last token added. */ 229 public final TokenType last; 230 231 /* 232 * Constructor. 233 */ 234 235 State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) { 236 super(position, limit, line); 237 238 this.pendingLine = pendingLine; 239 this.linePosition = linePosition; 240 this.last = last; 241 } 242 } 243 244 /** 245 * Save the state of the scan. 246 * 247 * @return Captured state. 248 */ 249 @Override 250 State saveState() { 251 return new State(position, limit, line, pendingLine, linePosition, last); 252 } 253 254 /** 255 * Restore the state of the scan. 256 * 257 * @param state 258 * Captured state. 259 */ 260 void restoreState(final State state) { 261 super.restoreState(state); 262 263 pendingLine = state.pendingLine; 264 linePosition = state.linePosition; 265 last = state.last; 266 } 267 268 /** 269 * Add a new token to the stream. 270 * 271 * @param type 272 * Token type. 273 * @param start 274 * Start position. 275 * @param end 276 * End position. 277 */ 278 protected void add(final TokenType type, final int start, final int end) { 279 // Record last token. 280 last = type; 281 282 // Only emit the last EOL in a cluster. 283 if (type == EOL) { 284 pendingLine = end; 285 linePosition = start; 286 } else { 287 // Write any pending EOL to stream. 288 if (pendingLine != -1) { 289 stream.put(Token.toDesc(EOL, linePosition, pendingLine)); 290 pendingLine = -1; 291 } 292 293 // Write token to stream. 294 stream.put(Token.toDesc(type, start, end - start)); 295 } 296 } 297 298 /** 299 * Add a new token to the stream. 300 * 301 * @param type 302 * Token type. 303 * @param start 304 * Start position. 305 */ 306 protected void add(final TokenType type, final int start) { 307 add(type, start, position); 308 } 309 310 /** 311 * Return the String of valid whitespace characters for regular 312 * expressions in JavaScript 313 * @return regexp whitespace string 314 */ 315 public static String getWhitespaceRegExp() { 316 return JAVASCRIPT_WHITESPACE_IN_REGEXP; 317 } 318 319 /** 320 * Skip end of line. 321 * 322 * @param addEOL true if EOL token should be recorded. 323 */ 324 private void skipEOL(final boolean addEOL) { 325 326 if (ch0 == '\r') { // detect \r\n pattern 327 skip(1); 328 if (ch0 == '\n') { 329 skip(1); 330 } 331 } else { // all other space, ch0 is guaranteed to be EOL or \0 332 skip(1); 333 } 334 335 // bump up line count 336 line++; 337 338 if (addEOL) { 339 // Add an EOL token. 340 add(EOL, position, line); 341 } 342 } 343 344 /** 345 * Skip over rest of line including end of line. 346 * 347 * @param addEOL true if EOL token should be recorded. 348 */ 349 private void skipLine(final boolean addEOL) { 350 // Ignore characters. 351 while (!isEOL(ch0) && !atEOF()) { 352 skip(1); 353 } 354 // Skip over end of line. 355 skipEOL(addEOL); 356 } 357 358 /** 359 * Test whether a char is valid JavaScript whitespace 360 * @param ch a char 361 * @return true if valid JavaScript whitespace 362 */ 363 public static boolean isJSWhitespace(final char ch) { 364 return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1; 365 } 366 367 /** 368 * Test whether a char is valid JavaScript end of line 369 * @param ch a char 370 * @return true if valid JavaScript end of line 371 */ 372 public static boolean isJSEOL(final char ch) { 373 return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1; 374 } 375 376 /** 377 * Test whether a char is valid JSON whitespace 378 * @param ch a char 379 * @return true if valid JSON whitespace 380 */ 381 public static boolean isJsonWhitespace(final char ch) { 382 return JSON_WHITESPACE.indexOf(ch) != -1; 383 } 384 385 /** 386 * Test whether a char is valid JSON end of line 387 * @param ch a char 388 * @return true if valid JSON end of line 389 */ 390 public static boolean isJsonEOL(final char ch) { 391 return JSON_WHITESPACE_EOL.indexOf(ch) != -1; 392 } 393 394 /** 395 * Test if char is a string delimiter, e.g. '\' or '"'. Also scans exec 396 * strings ('`') in scripting mode. 397 * @param ch a char 398 * @return true if string delimiter 399 */ 400 protected boolean isStringDelimiter(final char ch) { 401 return ch == '\'' || ch == '"' || (scripting && ch == '`'); 402 } 403 404 /** 405 * Test whether a char is valid JavaScript whitespace 406 * @param ch a char 407 * @return true if valid JavaScript whitespace 408 */ 409 protected boolean isWhitespace(final char ch) { 410 return Lexer.isJSWhitespace(ch); 411 } 412 413 /** 414 * Test whether a char is valid JavaScript end of line 415 * @param ch a char 416 * @return true if valid JavaScript end of line 417 */ 418 protected boolean isEOL(final char ch) { 419 return Lexer.isJSEOL(ch); 420 } 421 422 /** 423 * Skip over whitespace and detect end of line, adding EOL tokens if 424 * encountered. 425 * 426 * @param addEOL true if EOL tokens should be recorded. 427 */ 428 private void skipWhitespace(final boolean addEOL) { 429 while (isWhitespace(ch0)) { 430 if (isEOL(ch0)) { 431 skipEOL(addEOL); 432 } else { 433 skip(1); 434 } 435 } 436 } 437 438 /** 439 * Skip over comments. 440 * 441 * @return True if a comment. 442 */ 443 protected boolean skipComments() { 444 // Save the current position. 445 final int start = position; 446 447 if (ch0 == '/') { 448 // Is it a // comment. 449 if (ch1 == '/') { 450 // Skip over //. 451 skip(2); 452 453 boolean directiveComment = false; 454 if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) { 455 directiveComment = true; 456 } 457 458 // Scan for EOL. 459 while (!atEOF() && !isEOL(ch0)) { 460 skip(1); 461 } 462 // Did detect a comment. 463 add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start); 464 return true; 465 } else if (ch1 == '*') { 466 // Skip over /*. 467 skip(2); 468 // Scan for */. 469 while (!atEOF() && !(ch0 == '*' && ch1 == '/')) { 470 // If end of line handle else skip character. 471 if (isEOL(ch0)) { 472 skipEOL(true); 473 } else { 474 skip(1); 475 } 476 } 477 478 if (atEOF()) { 479 // TODO - Report closing */ missing in parser. 480 add(ERROR, start); 481 } else { 482 // Skip */. 483 skip(2); 484 } 485 486 // Did detect a comment. 487 add(COMMENT, start); 488 return true; 489 } 490 } else if (ch0 == '#') { 491 assert scripting; 492 // shell style comment 493 // Skip over #. 494 skip(1); 495 // Scan for EOL. 496 while (!atEOF() && !isEOL(ch0)) { 497 skip(1); 498 } 499 // Did detect a comment. 500 add(COMMENT, start); 501 return true; 502 } 503 504 // Not a comment. 505 return false; 506 } 507 508 /** 509 * Convert a regex token to a token object. 510 * 511 * @param start Position in source content. 512 * @param length Length of regex token. 513 * @return Regex token object. 514 */ 515 public RegexToken valueOfPattern(final int start, final int length) { 516 // Save the current position. 517 final int savePosition = position; 518 // Reset to beginning of content. 519 reset(start); 520 // Buffer for recording characters. 521 final StringBuilder sb = new StringBuilder(length); 522 523 // Skip /. 524 skip(1); 525 boolean inBrackets = false; 526 // Scan for closing /, stopping at end of line. 527 while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) { 528 // Skip over escaped character. 529 if (ch0 == '\\') { 530 sb.append(ch0); 531 sb.append(ch1); 532 skip(2); 533 } else { 534 if (ch0 == '[') { 535 inBrackets = true; 536 } else if (ch0 == ']') { 537 inBrackets = false; 538 } 539 540 // Skip literal character. 541 sb.append(ch0); 542 skip(1); 543 } 544 } 545 546 // Get pattern as string. 547 final String regex = sb.toString(); 548 549 // Skip /. 550 skip(1); 551 552 // Options as string. 553 final String options = source.getString(position, scanIdentifier()); 554 555 reset(savePosition); 556 557 // Compile the pattern. 558 return new RegexToken(regex, options); 559 } 560 561 /** 562 * Return true if the given token can be the beginning of a literal. 563 * 564 * @param token a token 565 * @return true if token can start a literal. 566 */ 567 public boolean canStartLiteral(final TokenType token) { 568 return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<')); 569 } 570 571 /** 572 * interface to receive line information for multi-line literals. 573 */ 574 protected interface LineInfoReceiver { 575 /** 576 * Receives line information 577 * @param line last line number 578 * @param linePosition position of last line 579 */ 580 public void lineInfo(int line, int linePosition); 581 } 582 583 /** 584 * Check whether the given token represents the beginning of a literal. If so scan 585 * the literal and return <tt>true</tt>, otherwise return false. 586 * 587 * @param token the token. 588 * @param startTokenType the token type. 589 * @param lir LineInfoReceiver that receives line info for multi-line string literals. 590 * @return True if a literal beginning with startToken was found and scanned. 591 */ 592 protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) { 593 // Check if it can be a literal. 594 if (!canStartLiteral(startTokenType)) { 595 return false; 596 } 597 // We break on ambiguous tokens so if we already moved on it can't be a literal. 598 if (stream.get(stream.last()) != token) { 599 return false; 600 } 601 // Rewind to token start position 602 reset(Token.descPosition(token)); 603 604 if (ch0 == '/') { 605 return scanRegEx(); 606 } else if (ch0 == '<') { 607 if (ch1 == '<') { 608 return scanHereString(lir); 609 } else if (Character.isJavaIdentifierStart(ch1)) { 610 return scanXMLLiteral(); 611 } 612 } 613 614 return false; 615 } 616 617 /** 618 * Scan over regex literal. 619 * 620 * @return True if a regex literal. 621 */ 622 private boolean scanRegEx() { 623 assert ch0 == '/'; 624 // Make sure it's not a comment. 625 if (ch1 != '/' && ch1 != '*') { 626 // Record beginning of literal. 627 final int start = position; 628 // Skip /. 629 skip(1); 630 boolean inBrackets = false; 631 632 // Scan for closing /, stopping at end of line. 633 while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) { 634 // Skip over escaped character. 635 if (ch0 == '\\') { 636 skip(1); 637 if (isEOL(ch0)) { 638 reset(start); 639 return false; 640 } 641 skip(1); 642 } else { 643 if (ch0 == '[') { 644 inBrackets = true; 645 } else if (ch0 == ']') { 646 inBrackets = false; 647 } 648 649 // Skip literal character. 650 skip(1); 651 } 652 } 653 654 // If regex literal. 655 if (ch0 == '/') { 656 // Skip /. 657 skip(1); 658 659 // Skip over options. 660 while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') { 661 skip(1); 662 } 663 664 // Add regex token. 665 add(REGEX, start); 666 // Regex literal detected. 667 return true; 668 } 669 670 // False start try again. 671 reset(start); 672 } 673 674 // Regex literal not detected. 675 return false; 676 } 677 678 /** 679 * Convert a digit to a integer. Can't use Character.digit since we are 680 * restricted to ASCII by the spec. 681 * 682 * @param ch Character to convert. 683 * @param base Numeric base. 684 * 685 * @return The converted digit or -1 if invalid. 686 */ 687 protected static int convertDigit(final char ch, final int base) { 688 int digit; 689 690 if ('0' <= ch && ch <= '9') { 691 digit = ch - '0'; 692 } else if ('A' <= ch && ch <= 'Z') { 693 digit = ch - 'A' + 10; 694 } else if ('a' <= ch && ch <= 'z') { 695 digit = ch - 'a' + 10; 696 } else { 697 return -1; 698 } 699 700 return digit < base ? digit : -1; 701 } 702 703 704 /** 705 * Get the value of a hexadecimal numeric sequence. 706 * 707 * @param length Number of digits. 708 * @param type Type of token to report against. 709 * @return Value of sequence or < 0 if no digits. 710 */ 711 private int hexSequence(final int length, final TokenType type) { 712 int value = 0; 713 714 for (int i = 0; i < length; i++) { 715 final int digit = convertDigit(ch0, 16); 716 717 if (digit == -1) { 718 error(Lexer.message("invalid.hex"), type, position, limit); 719 return i == 0 ? -1 : value; 720 } 721 722 value = digit | value << 4; 723 skip(1); 724 } 725 726 return value; 727 } 728 729 /** 730 * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255. 731 * 732 * @return Value of sequence. 733 */ 734 private int octalSequence() { 735 int value = 0; 736 737 for (int i = 0; i < 3; i++) { 738 final int digit = convertDigit(ch0, 8); 739 740 if (digit == -1) { 741 break; 742 } 743 value = digit | value << 3; 744 skip(1); 745 746 if (i == 1 && value >= 32) { 747 break; 748 } 749 } 750 return value; 751 } 752 753 /** 754 * Convert a string to a JavaScript identifier. 755 * 756 * @param start Position in source content. 757 * @param length Length of token. 758 * @return Ident string or null if an error. 759 */ 760 private String valueOfIdent(final int start, final int length) throws RuntimeException { 761 // Save the current position. 762 final int savePosition = position; 763 // End of scan. 764 final int end = start + length; 765 // Reset to beginning of content. 766 reset(start); 767 // Buffer for recording characters. 768 final StringBuilder sb = new StringBuilder(length); 769 770 // Scan until end of line or end of file. 771 while (!atEOF() && position < end && !isEOL(ch0)) { 772 // If escape character. 773 if (ch0 == '\\' && ch1 == 'u') { 774 skip(2); 775 final int ch = hexSequence(4, TokenType.IDENT); 776 if (isWhitespace((char)ch)) { 777 return null; 778 } 779 if (ch < 0) { 780 sb.append('\\'); 781 sb.append('u'); 782 } else { 783 sb.append((char)ch); 784 } 785 } else { 786 // Add regular character. 787 sb.append(ch0); 788 skip(1); 789 } 790 } 791 792 // Restore position. 793 reset(savePosition); 794 795 return sb.toString(); 796 } 797 798 /** 799 * Scan over and identifier or keyword. Handles identifiers containing 800 * encoded Unicode chars. 801 * 802 * Example: 803 * 804 * var \u0042 = 44; 805 */ 806 private void scanIdentifierOrKeyword() { 807 // Record beginning of identifier. 808 final int start = position; 809 // Scan identifier. 810 final int length = scanIdentifier(); 811 // Check to see if it is a keyword. 812 final TokenType type = TokenLookup.lookupKeyword(content, start, length); 813 // Add keyword or identifier token. 814 add(type, start); 815 } 816 817 /** 818 * Convert a string to a JavaScript string object. 819 * 820 * @param start Position in source content. 821 * @param length Length of token. 822 * @return JavaScript string object. 823 */ 824 private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException { 825 // Save the current position. 826 final int savePosition = position; 827 // Calculate the end position. 828 final int end = start + length; 829 // Reset to beginning of string. 830 reset(start); 831 832 // Buffer for recording characters. 833 final StringBuilder sb = new StringBuilder(length); 834 835 // Scan until end of string. 836 while (position < end) { 837 // If escape character. 838 if (ch0 == '\\') { 839 skip(1); 840 841 final char next = ch0; 842 final int afterSlash = position; 843 844 skip(1); 845 846 // Special characters. 847 switch (next) { 848 case '0': 849 case '1': 850 case '2': 851 case '3': 852 case '4': 853 case '5': 854 case '6': 855 case '7': { 856 if (strict) { 857 // "\0" itself is allowed in strict mode. Only other 'real' 858 // octal escape sequences are not allowed (eg. "\02", "\31"). 859 // See section 7.8.4 String literals production EscapeSequence 860 if (next != '0' || (ch0 >= '0' && ch0 <= '9')) { 861 error(Lexer.message("strict.no.octal"), STRING, position, limit); 862 } 863 } 864 reset(afterSlash); 865 // Octal sequence. 866 final int ch = octalSequence(); 867 868 if (ch < 0) { 869 sb.append('\\'); 870 sb.append('x'); 871 } else { 872 sb.append((char)ch); 873 } 874 break; 875 } 876 case 'n': 877 sb.append('\n'); 878 break; 879 case 't': 880 sb.append('\t'); 881 break; 882 case 'b': 883 sb.append('\b'); 884 break; 885 case 'f': 886 sb.append('\f'); 887 break; 888 case 'r': 889 sb.append('\r'); 890 break; 891 case '\'': 892 sb.append('\''); 893 break; 894 case '\"': 895 sb.append('\"'); 896 break; 897 case '\\': 898 sb.append('\\'); 899 break; 900 case '\r': // CR | CRLF 901 if (ch0 == '\n') { 902 skip(1); 903 } 904 // fall through 905 case '\n': // LF 906 case '\u2028': // LS 907 case '\u2029': // PS 908 // continue on the next line, slash-return continues string 909 // literal 910 break; 911 case 'x': { 912 // Hex sequence. 913 final int ch = hexSequence(2, STRING); 914 915 if (ch < 0) { 916 sb.append('\\'); 917 sb.append('x'); 918 } else { 919 sb.append((char)ch); 920 } 921 } 922 break; 923 case 'u': { 924 // Unicode sequence. 925 final int ch = hexSequence(4, STRING); 926 927 if (ch < 0) { 928 sb.append('\\'); 929 sb.append('u'); 930 } else { 931 sb.append((char)ch); 932 } 933 } 934 break; 935 case 'v': 936 sb.append('\u000B'); 937 break; 938 // All other characters. 939 default: 940 sb.append(next); 941 break; 942 } 943 } else { 944 // Add regular character. 945 sb.append(ch0); 946 skip(1); 947 } 948 } 949 950 // Restore position. 951 reset(savePosition); 952 953 return sb.toString(); 954 } 955 956 /** 957 * Scan over a string literal. 958 * @param add true if we nare not just scanning but should actually modify the token stream 959 */ 960 protected void scanString(final boolean add) { 961 // Type of string. 962 TokenType type = STRING; 963 // Record starting quote. 964 final char quote = ch0; 965 // Skip over quote. 966 skip(1); 967 968 // Record beginning of string content. 969 final State stringState = saveState(); 970 971 // Scan until close quote or end of line. 972 while (!atEOF() && ch0 != quote && !isEOL(ch0)) { 973 // Skip over escaped character. 974 if (ch0 == '\\') { 975 type = ESCSTRING; 976 skip(1); 977 if (! isEscapeCharacter(ch0)) { 978 error(Lexer.message("invalid.escape.char"), STRING, position, limit); 979 } 980 if (isEOL(ch0)) { 981 // Multiline string literal 982 skipEOL(false); 983 continue; 984 } 985 } 986 // Skip literal character. 987 skip(1); 988 } 989 990 // If close quote. 991 if (ch0 == quote) { 992 // Skip close quote. 993 skip(1); 994 } else { 995 error(Lexer.message("missing.close.quote"), STRING, position, limit); 996 } 997 998 // If not just scanning. 999 if (add) { 1000 // Record end of string. 1001 stringState.setLimit(position - 1); 1002 1003 if (scripting && !stringState.isEmpty()) { 1004 switch (quote) { 1005 case '`': 1006 // Mark the beginning of an exec string. 1007 add(EXECSTRING, stringState.position, stringState.limit); 1008 // Frame edit string with left brace. 1009 add(LBRACE, stringState.position, stringState.position); 1010 // Process edit string. 1011 editString(type, stringState); 1012 // Frame edit string with right brace. 1013 add(RBRACE, stringState.limit, stringState.limit); 1014 break; 1015 case '"': 1016 // Only edit double quoted strings. 1017 editString(type, stringState); 1018 break; 1019 case '\'': 1020 // Add string token without editing. 1021 add(type, stringState.position, stringState.limit); 1022 break; 1023 default: 1024 break; 1025 } 1026 } else { 1027 /// Add string token without editing. 1028 add(type, stringState.position, stringState.limit); 1029 } 1030 } 1031 } 1032 1033 /** 1034 * Is the given character a valid escape char after "\" ? 1035 * 1036 * @param ch character to be checked 1037 * @return if the given character is valid after "\" 1038 */ 1039 protected boolean isEscapeCharacter(final char ch) { 1040 return true; 1041 } 1042 1043 /** 1044 * Convert string to number. 1045 * 1046 * @param valueString String to convert. 1047 * @param radix Numeric base. 1048 * @return Converted number. 1049 */ 1050 private static Number valueOf(final String valueString, final int radix) throws NumberFormatException { 1051 try { 1052 final long value = Long.parseLong(valueString, radix); 1053 if(value >= MIN_INT_L && value <= MAX_INT_L) { 1054 return Integer.valueOf((int)value); 1055 } 1056 return Long.valueOf(value); 1057 } catch (final NumberFormatException e) { 1058 if (radix == 10) { 1059 return Double.valueOf(valueString); 1060 } 1061 1062 double value = 0.0; 1063 1064 for (int i = 0; i < valueString.length(); i++) { 1065 final char ch = valueString.charAt(i); 1066 // Preverified, should always be a valid digit. 1067 final int digit = convertDigit(ch, radix); 1068 value *= radix; 1069 value += digit; 1070 } 1071 1072 return value; 1073 } 1074 } 1075 1076 /** 1077 * Scan a number. 1078 */ 1079 protected void scanNumber() { 1080 // Record beginning of number. 1081 final int start = position; 1082 // Assume value is a decimal. 1083 TokenType type = DECIMAL; 1084 1085 // First digit of number. 1086 int digit = convertDigit(ch0, 10); 1087 1088 // If number begins with 0x. 1089 if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) { 1090 // Skip over 0xN. 1091 skip(3); 1092 // Skip over remaining digits. 1093 while (convertDigit(ch0, 16) != -1) { 1094 skip(1); 1095 } 1096 1097 type = HEXADECIMAL; 1098 } else { 1099 // Check for possible octal constant. 1100 boolean octal = digit == 0; 1101 // Skip first digit if not leading '.'. 1102 if (digit != -1) { 1103 skip(1); 1104 } 1105 1106 // Skip remaining digits. 1107 while ((digit = convertDigit(ch0, 10)) != -1) { 1108 // Check octal only digits. 1109 octal = octal && digit < 8; 1110 // Skip digit. 1111 skip(1); 1112 } 1113 1114 if (octal && position - start > 1) { 1115 type = OCTAL; 1116 } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') { 1117 // Must be a double. 1118 if (ch0 == '.') { 1119 // Skip period. 1120 skip(1); 1121 // Skip mantissa. 1122 while (convertDigit(ch0, 10) != -1) { 1123 skip(1); 1124 } 1125 } 1126 1127 // Detect exponent. 1128 if (ch0 == 'E' || ch0 == 'e') { 1129 // Skip E. 1130 skip(1); 1131 // Detect and skip exponent sign. 1132 if (ch0 == '+' || ch0 == '-') { 1133 skip(1); 1134 } 1135 // Skip exponent. 1136 while (convertDigit(ch0, 10) != -1) { 1137 skip(1); 1138 } 1139 } 1140 1141 type = FLOATING; 1142 } 1143 } 1144 1145 if (Character.isJavaIdentifierStart(ch0)) { 1146 error(Lexer.message("missing.space.after.number"), type, position, 1); 1147 } 1148 1149 // Add number token. 1150 add(type, start); 1151 } 1152 1153 /** 1154 * Convert a regex token to a token object. 1155 * 1156 * @param start Position in source content. 1157 * @param length Length of regex token. 1158 * @return Regex token object. 1159 */ 1160 XMLToken valueOfXML(final int start, final int length) { 1161 return new XMLToken(source.getString(start, length)); 1162 } 1163 1164 /** 1165 * Scan over a XML token. 1166 * 1167 * @return TRUE if is an XML literal. 1168 */ 1169 private boolean scanXMLLiteral() { 1170 assert ch0 == '<' && Character.isJavaIdentifierStart(ch1); 1171 if (XML_LITERALS) { 1172 // Record beginning of xml expression. 1173 final int start = position; 1174 1175 int openCount = 0; 1176 1177 do { 1178 if (ch0 == '<') { 1179 if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) { 1180 skip(3); 1181 openCount--; 1182 } else if (Character.isJavaIdentifierStart(ch1)) { 1183 skip(2); 1184 openCount++; 1185 } else if (ch1 == '?') { 1186 skip(2); 1187 } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') { 1188 skip(4); 1189 } else { 1190 reset(start); 1191 return false; 1192 } 1193 1194 while (!atEOF() && ch0 != '>') { 1195 if (ch0 == '/' && ch1 == '>') { 1196 openCount--; 1197 skip(1); 1198 break; 1199 } else if (ch0 == '\"' || ch0 == '\'') { 1200 scanString(false); 1201 } else { 1202 skip(1); 1203 } 1204 } 1205 1206 if (ch0 != '>') { 1207 reset(start); 1208 return false; 1209 } 1210 1211 skip(1); 1212 } else if (atEOF()) { 1213 reset(start); 1214 return false; 1215 } else { 1216 skip(1); 1217 } 1218 } while (openCount > 0); 1219 1220 add(XML, start); 1221 return true; 1222 } 1223 1224 return false; 1225 } 1226 1227 /** 1228 * Scan over identifier characters. 1229 * 1230 * @return Length of identifier or zero if none found. 1231 */ 1232 private int scanIdentifier() { 1233 final int start = position; 1234 1235 // Make sure first character is valid start character. 1236 if (ch0 == '\\' && ch1 == 'u') { 1237 skip(2); 1238 final int ch = hexSequence(4, TokenType.IDENT); 1239 1240 if (!Character.isJavaIdentifierStart(ch)) { 1241 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position); 1242 } 1243 } else if (!Character.isJavaIdentifierStart(ch0)) { 1244 // Not an identifier. 1245 return 0; 1246 } 1247 1248 // Make sure remaining characters are valid part characters. 1249 while (!atEOF()) { 1250 if (ch0 == '\\' && ch1 == 'u') { 1251 skip(2); 1252 final int ch = hexSequence(4, TokenType.IDENT); 1253 1254 if (!Character.isJavaIdentifierPart(ch)) { 1255 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position); 1256 } 1257 } else if (Character.isJavaIdentifierPart(ch0)) { 1258 skip(1); 1259 } else { 1260 break; 1261 } 1262 } 1263 1264 // Length of identifier sequence. 1265 return position - start; 1266 } 1267 1268 /** 1269 * Compare two identifiers (in content) for equality. 1270 * 1271 * @param aStart Start of first identifier. 1272 * @param aLength Length of first identifier. 1273 * @param bStart Start of second identifier. 1274 * @param bLength Length of second identifier. 1275 * @return True if equal. 1276 */ 1277 private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) { 1278 if (aLength == bLength) { 1279 for (int i = 0; i < aLength; i++) { 1280 if (content[aStart + i] != content[bStart + i]) { 1281 return false; 1282 } 1283 } 1284 1285 return true; 1286 } 1287 1288 return false; 1289 } 1290 1291 /** 1292 * Detect if a line starts with a marker identifier. 1293 * 1294 * @param identStart Start of identifier. 1295 * @param identLength Length of identifier. 1296 * @return True if detected. 1297 */ 1298 private boolean hasHereMarker(final int identStart, final int identLength) { 1299 // Skip any whitespace. 1300 skipWhitespace(false); 1301 1302 return identifierEqual(identStart, identLength, position, scanIdentifier()); 1303 } 1304 1305 /** 1306 * Lexer to service edit strings. 1307 */ 1308 private static class EditStringLexer extends Lexer { 1309 /** Type of string literals to emit. */ 1310 final TokenType stringType; 1311 1312 /* 1313 * Constructor. 1314 */ 1315 1316 EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) { 1317 super(lexer, stringState); 1318 1319 this.stringType = stringType; 1320 } 1321 1322 /** 1323 * Lexify the contents of the string. 1324 */ 1325 @Override 1326 public void lexify() { 1327 // Record start of string position. 1328 int stringStart = position; 1329 // Indicate that the priming first string has not been emitted. 1330 boolean primed = false; 1331 1332 while (true) { 1333 // Detect end of content. 1334 if (atEOF()) { 1335 break; 1336 } 1337 1338 // Honour escapes (should be well formed.) 1339 if (ch0 == '\\' && stringType == ESCSTRING) { 1340 skip(2); 1341 1342 continue; 1343 } 1344 1345 // If start of expression. 1346 if (ch0 == '$' && ch1 == '{') { 1347 if (!primed || stringStart != position) { 1348 if (primed) { 1349 add(ADD, stringStart, stringStart + 1); 1350 } 1351 1352 add(stringType, stringStart, position); 1353 primed = true; 1354 } 1355 1356 // Skip ${ 1357 skip(2); 1358 1359 // Save expression state. 1360 final State expressionState = saveState(); 1361 1362 // Start with one open brace. 1363 int braceCount = 1; 1364 1365 // Scan for the rest of the string. 1366 while (!atEOF()) { 1367 // If closing brace. 1368 if (ch0 == '}') { 1369 // Break only only if matching brace. 1370 if (--braceCount == 0) { 1371 break; 1372 } 1373 } else if (ch0 == '{') { 1374 // Bump up the brace count. 1375 braceCount++; 1376 } 1377 1378 // Skip to next character. 1379 skip(1); 1380 } 1381 1382 // If braces don't match then report an error. 1383 if (braceCount != 0) { 1384 error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1); 1385 } 1386 1387 // Mark end of expression. 1388 expressionState.setLimit(position); 1389 // Skip closing brace. 1390 skip(1); 1391 1392 // Start next string. 1393 stringStart = position; 1394 1395 // Concatenate expression. 1396 add(ADD, expressionState.position, expressionState.position + 1); 1397 add(LPAREN, expressionState.position, expressionState.position + 1); 1398 1399 // Scan expression. 1400 final Lexer lexer = new Lexer(this, expressionState); 1401 lexer.lexify(); 1402 1403 // Close out expression parenthesis. 1404 add(RPAREN, position - 1, position); 1405 1406 continue; 1407 } 1408 1409 // Next character in string. 1410 skip(1); 1411 } 1412 1413 // If there is any unemitted string portion. 1414 if (stringStart != limit) { 1415 // Concatenate remaining string. 1416 if (primed) { 1417 add(ADD, stringStart, 1); 1418 } 1419 1420 add(stringType, stringStart, limit); 1421 } 1422 } 1423 1424 } 1425 1426 /** 1427 * Edit string for nested expressions. 1428 * 1429 * @param stringType Type of string literals to emit. 1430 * @param stringState State of lexer at start of string. 1431 */ 1432 private void editString(final TokenType stringType, final State stringState) { 1433 // Use special lexer to scan string. 1434 final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState); 1435 lexer.lexify(); 1436 1437 // Need to keep lexer informed. 1438 last = stringType; 1439 } 1440 1441 /** 1442 * Scan over a here string. 1443 * 1444 * @return TRUE if is a here string. 1445 */ 1446 private boolean scanHereString(final LineInfoReceiver lir) { 1447 assert ch0 == '<' && ch1 == '<'; 1448 if (scripting) { 1449 // Record beginning of here string. 1450 final State saved = saveState(); 1451 1452 // << or <<< 1453 final boolean excludeLastEOL = ch2 != '<'; 1454 1455 if (excludeLastEOL) { 1456 skip(2); 1457 } else { 1458 skip(3); 1459 } 1460 1461 // Scan identifier. 1462 final int identStart = position; 1463 final int identLength = scanIdentifier(); 1464 1465 // Check for identifier. 1466 if (identLength == 0) { 1467 // Treat as shift. 1468 restoreState(saved); 1469 1470 return false; 1471 } 1472 1473 // Record rest of line. 1474 final State restState = saveState(); 1475 // keep line number updated 1476 int lastLine = line; 1477 1478 skipLine(false); 1479 lastLine++; 1480 int lastLinePosition = position; 1481 restState.setLimit(position); 1482 1483 // Record beginning of string. 1484 final State stringState = saveState(); 1485 int stringEnd = position; 1486 1487 // Hunt down marker. 1488 while (!atEOF()) { 1489 // Skip any whitespace. 1490 skipWhitespace(false); 1491 1492 if (hasHereMarker(identStart, identLength)) { 1493 break; 1494 } 1495 1496 skipLine(false); 1497 lastLine++; 1498 lastLinePosition = position; 1499 stringEnd = position; 1500 } 1501 1502 // notify last line information 1503 lir.lineInfo(lastLine, lastLinePosition); 1504 1505 // Record end of string. 1506 stringState.setLimit(stringEnd); 1507 1508 // If marker is missing. 1509 if (stringState.isEmpty() || atEOF()) { 1510 error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position); 1511 restoreState(saved); 1512 1513 return false; 1514 } 1515 1516 // Remove last end of line if specified. 1517 if (excludeLastEOL) { 1518 // Handles \n. 1519 if (content[stringEnd - 1] == '\n') { 1520 stringEnd--; 1521 } 1522 1523 // Handles \r and \r\n. 1524 if (content[stringEnd - 1] == '\r') { 1525 stringEnd--; 1526 } 1527 1528 // Update end of string. 1529 stringState.setLimit(stringEnd); 1530 } 1531 1532 // Edit string if appropriate. 1533 if (scripting && !stringState.isEmpty()) { 1534 editString(STRING, stringState); 1535 } else { 1536 // Add here string. 1537 add(STRING, stringState.position, stringState.limit); 1538 } 1539 1540 // Scan rest of original line. 1541 final Lexer restLexer = new Lexer(this, restState); 1542 1543 restLexer.lexify(); 1544 1545 return true; 1546 } 1547 1548 return false; 1549 } 1550 1551 /** 1552 * Breaks source content down into lex units, adding tokens to the token 1553 * stream. The routine scans until the stream buffer is full. Can be called 1554 * repeatedly until EOF is detected. 1555 */ 1556 public void lexify() { 1557 while (!stream.isFull() || nested) { 1558 // Skip over whitespace. 1559 skipWhitespace(true); 1560 1561 // Detect end of file. 1562 if (atEOF()) { 1563 if (!nested) { 1564 // Add an EOF token at the end. 1565 add(EOF, position); 1566 } 1567 1568 break; 1569 } 1570 1571 // Check for comments. Note that we don't scan for regexp and other literals here as 1572 // we may not have enough context to distinguish them from similar looking operators. 1573 // Instead we break on ambiguous operators below and let the parser decide. 1574 if (ch0 == '/' && skipComments()) { 1575 continue; 1576 } 1577 1578 if (scripting && ch0 == '#' && skipComments()) { 1579 continue; 1580 } 1581 1582 // TokenType for lookup of delimiter or operator. 1583 TokenType type; 1584 1585 if (ch0 == '.' && convertDigit(ch1, 10) != -1) { 1586 // '.' followed by digit. 1587 // Scan and add a number. 1588 scanNumber(); 1589 } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) { 1590 // Get the number of characters in the token. 1591 final int typeLength = type.getLength(); 1592 // Skip that many characters. 1593 skip(typeLength); 1594 // Add operator token. 1595 add(type, position - typeLength); 1596 // Some operator tokens also mark the beginning of regexp, XML, or here string literals. 1597 // We break to let the parser decide what it is. 1598 if (canStartLiteral(type)) { 1599 break; 1600 } 1601 } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') { 1602 // Scan and add identifier or keyword. 1603 scanIdentifierOrKeyword(); 1604 } else if (isStringDelimiter(ch0)) { 1605 // Scan and add a string. 1606 scanString(true); 1607 } else if (Character.isDigit(ch0)) { 1608 // Scan and add a number. 1609 scanNumber(); 1610 } else { 1611 // Don't recognize this character. 1612 skip(1); 1613 add(ERROR, position - 1); 1614 } 1615 } 1616 } 1617 1618 /** 1619 * Return value of token given its token descriptor. 1620 * 1621 * @param token Token descriptor. 1622 * @return JavaScript value. 1623 */ 1624 Object getValueOf(final long token, final boolean strict) { 1625 final int start = Token.descPosition(token); 1626 final int len = Token.descLength(token); 1627 1628 switch (Token.descType(token)) { 1629 case DECIMAL: 1630 return Lexer.valueOf(source.getString(start, len), 10); // number 1631 case OCTAL: 1632 return Lexer.valueOf(source.getString(start, len), 8); // number 1633 case HEXADECIMAL: 1634 return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number 1635 case FLOATING: 1636 final String str = source.getString(start, len); 1637 final double value = Double.valueOf(str); 1638 if (str.indexOf('.') != -1) { 1639 return value; //number 1640 } 1641 //anything without an explicit decimal point is still subject to a 1642 //"representable as int or long" check. Then the programmer does not 1643 //explicitly code something as a double. For example new Color(int, int, int) 1644 //and new Color(float, float, float) will get ambiguous for cases like 1645 //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point. 1646 //yet we don't want e.g. 1e6 to be a double unnecessarily 1647 if (JSType.isRepresentableAsInt(value) && !JSType.isNegativeZero(value)) { 1648 return (int)value; 1649 } else if (JSType.isRepresentableAsLong(value) && !JSType.isNegativeZero(value)) { 1650 return (long)value; 1651 } 1652 return value; 1653 case STRING: 1654 return source.getString(start, len); // String 1655 case ESCSTRING: 1656 return valueOfString(start, len, strict); // String 1657 case IDENT: 1658 return valueOfIdent(start, len); // String 1659 case REGEX: 1660 return valueOfPattern(start, len); // RegexToken::LexerToken 1661 case XML: 1662 return valueOfXML(start, len); // XMLToken::LexerToken 1663 case DIRECTIVE_COMMENT: 1664 return source.getString(start, len); 1665 default: 1666 break; 1667 } 1668 1669 return null; 1670 } 1671 1672 /** 1673 * Get the correctly localized error message for a given message id format arguments 1674 * @param msgId message id 1675 * @param args format arguments 1676 * @return message 1677 */ 1678 protected static String message(final String msgId, final String... args) { 1679 return ECMAErrors.getMessage("lexer.error." + msgId, args); 1680 } 1681 1682 /** 1683 * Generate a runtime exception 1684 * 1685 * @param message error message 1686 * @param type token type 1687 * @param start start position of lexed error 1688 * @param length length of lexed error 1689 * @throws ParserException unconditionally 1690 */ 1691 protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException { 1692 final long token = Token.toDesc(type, start, length); 1693 final int pos = Token.descPosition(token); 1694 final int lineNum = source.getLine(pos); 1695 final int columnNum = source.getColumn(pos); 1696 final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token); 1697 throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token); 1698 } 1699 1700 /** 1701 * Helper class for Lexer tokens, e.g XML or RegExp tokens. 1702 * This is the abstract superclass 1703 */ 1704 public static abstract class LexerToken { 1705 private final String expression; 1706 1707 /** 1708 * Constructor 1709 * @param expression token expression 1710 */ 1711 protected LexerToken(final String expression) { 1712 this.expression = expression; 1713 } 1714 1715 /** 1716 * Get the expression 1717 * @return expression 1718 */ 1719 public String getExpression() { 1720 return expression; 1721 } 1722 } 1723 1724 /** 1725 * Temporary container for regular expressions. 1726 */ 1727 public static class RegexToken extends LexerToken { 1728 /** Options. */ 1729 private final String options; 1730 1731 /** 1732 * Constructor. 1733 * 1734 * @param expression regexp expression 1735 * @param options regexp options 1736 */ 1737 public RegexToken(final String expression, final String options) { 1738 super(expression); 1739 this.options = options; 1740 } 1741 1742 /** 1743 * Get regexp options 1744 * @return options 1745 */ 1746 public String getOptions() { 1747 return options; 1748 } 1749 1750 @Override 1751 public String toString() { 1752 return '/' + getExpression() + '/' + options; 1753 } 1754 } 1755 1756 /** 1757 * Temporary container for XML expression. 1758 */ 1759 public static class XMLToken extends LexerToken { 1760 1761 /** 1762 * Constructor. 1763 * 1764 * @param expression XML expression 1765 */ 1766 public XMLToken(final String expression) { 1767 super(expression); 1768 } 1769 } 1770} 1771