RegExpScanner.java revision 953:221a84ef44c0
1/* 2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package jdk.nashorn.internal.runtime.regexp; 27 28import java.util.HashMap; 29import java.util.Iterator; 30import java.util.LinkedList; 31import java.util.List; 32import java.util.Map; 33import java.util.regex.PatternSyntaxException; 34import jdk.nashorn.internal.parser.Lexer; 35import jdk.nashorn.internal.parser.Scanner; 36import jdk.nashorn.internal.runtime.BitVector; 37 38/** 39 * Scan a JavaScript regexp, converting to Java regex if necessary. 40 * 41 */ 42final class RegExpScanner extends Scanner { 43 44 /** 45 * String builder used to rewrite the pattern for the currently used regexp factory. 46 */ 47 private final StringBuilder sb; 48 49 /** Expected token table */ 50 private final Map<Character, Integer> expected = new HashMap<>(); 51 52 /** Capturing parenthesis that have been found so far. */ 53 private final List<Capture> caps = new LinkedList<>(); 54 55 /** Forward references to capturing parenthesis to be resolved later.*/ 56 private final LinkedList<Integer> forwardReferences = new LinkedList<>(); 57 58 /** Current level of zero-width negative lookahead assertions. */ 59 private int negLookaheadLevel; 60 61 /** Sequential id of current top-level zero-width negative lookahead assertion. */ 62 private int negLookaheadGroup; 63 64 /** Are we currently inside a character class? */ 65 private boolean inCharClass = false; 66 67 /** Are we currently inside a negated character class? */ 68 private boolean inNegativeClass = false; 69 70 private static final String NON_IDENT_ESCAPES = "$^*+(){}[]|\\.?-"; 71 72 private static class Capture { 73 /** Zero-width negative lookaheads enclosing the capture. */ 74 private final int negLookaheadLevel; 75 /** Sequential id of top-level negative lookaheads containing the capture. */ 76 private final int negLookaheadGroup; 77 78 Capture(final int negLookaheadGroup, final int negLookaheadLevel) { 79 this.negLookaheadGroup = negLookaheadGroup; 80 this.negLookaheadLevel = negLookaheadLevel; 81 } 82 83 boolean isContained(final int group, final int level) { 84 return group == this.negLookaheadGroup && level >= this.negLookaheadLevel; 85 } 86 87 } 88 89 /** 90 * Constructor 91 * @param string the JavaScript regexp to parse 92 */ 93 private RegExpScanner(final String string) { 94 super(string); 95 sb = new StringBuilder(limit); 96 reset(0); 97 expected.put(']', 0); 98 expected.put('}', 0); 99 } 100 101 private void processForwardReferences() { 102 103 final Iterator<Integer> iterator = forwardReferences.descendingIterator(); 104 while (iterator.hasNext()) { 105 final int pos = iterator.next(); 106 final int num = iterator.next(); 107 if (num > caps.size()) { 108 // Non-existing backreference. If the number begins with a valid octal convert it to 109 // Unicode escape and append the rest to a literal character sequence. 110 final StringBuilder buffer = new StringBuilder(); 111 octalOrLiteral(Integer.toString(num), buffer); 112 sb.insert(pos, buffer); 113 } 114 } 115 116 forwardReferences.clear(); 117 } 118 119 /** 120 * Scan a JavaScript regexp string returning a Java safe regex string. 121 * 122 * @param string 123 * JavaScript regexp string. 124 * @return Java safe regex string. 125 */ 126 public static RegExpScanner scan(final String string) { 127 final RegExpScanner scanner = new RegExpScanner(string); 128 129 try { 130 scanner.disjunction(); 131 } catch (final Exception e) { 132 throw new PatternSyntaxException(e.getMessage(), string, scanner.position); 133 } 134 135 scanner.processForwardReferences(); 136 137 // Throw syntax error unless we parsed the entire JavaScript regexp without syntax errors 138 if (scanner.position != string.length()) { 139 final String p = scanner.getStringBuilder().toString(); 140 throw new PatternSyntaxException(string, p, p.length() + 1); 141 } 142 143 return scanner; 144 } 145 146 final StringBuilder getStringBuilder() { 147 return sb; 148 } 149 150 String getJavaPattern() { 151 return sb.toString(); 152 } 153 154 BitVector getGroupsInNegativeLookahead() { 155 BitVector vec = null; 156 for (int i = 0; i < caps.size(); i++) { 157 final Capture cap = caps.get(i); 158 if (cap.negLookaheadLevel > 0) { 159 if (vec == null) { 160 vec = new BitVector(caps.size() + 1); 161 } 162 vec.set(i + 1); 163 } 164 } 165 return vec; 166 } 167 168 /** 169 * Commit n characters to the builder and to a given token 170 * @param n Number of characters. 171 * @return Committed token 172 */ 173 private boolean commit(final int n) { 174 switch (n) { 175 case 1: 176 sb.append(ch0); 177 skip(1); 178 break; 179 case 2: 180 sb.append(ch0); 181 sb.append(ch1); 182 skip(2); 183 break; 184 case 3: 185 sb.append(ch0); 186 sb.append(ch1); 187 sb.append(ch2); 188 skip(3); 189 break; 190 default: 191 assert false : "Should not reach here"; 192 } 193 194 return true; 195 } 196 197 /** 198 * Restart the buffers back at an earlier position. 199 * 200 * @param startIn 201 * Position in the input stream. 202 * @param startOut 203 * Position in the output stream. 204 */ 205 private void restart(final int startIn, final int startOut) { 206 reset(startIn); 207 sb.setLength(startOut); 208 } 209 210 private void push(final char ch) { 211 expected.put(ch, expected.get(ch) + 1); 212 } 213 214 private void pop(final char ch) { 215 expected.put(ch, Math.min(0, expected.get(ch) - 1)); 216 } 217 218 /* 219 * Recursive descent tokenizer starts below. 220 */ 221 222 /* 223 * Disjunction :: 224 * Alternative 225 * Alternative | Disjunction 226 */ 227 private void disjunction() { 228 while (true) { 229 alternative(); 230 231 if (ch0 == '|') { 232 commit(1); 233 } else { 234 break; 235 } 236 } 237 } 238 239 /* 240 * Alternative :: 241 * [empty] 242 * Alternative Term 243 */ 244 private void alternative() { 245 while (term()) { 246 // do nothing 247 } 248 } 249 250 /* 251 * Term :: 252 * Assertion 253 * Atom 254 * Atom Quantifier 255 */ 256 private boolean term() { 257 final int startIn = position; 258 final int startOut = sb.length(); 259 260 if (assertion()) { 261 return true; 262 } 263 264 if (atom()) { 265 quantifier(); 266 return true; 267 } 268 269 restart(startIn, startOut); 270 return false; 271 } 272 273 /* 274 * Assertion :: 275 * ^ 276 * $ 277 * \b 278 * \B 279 * ( ? = Disjunction ) 280 * ( ? ! Disjunction ) 281 */ 282 private boolean assertion() { 283 final int startIn = position; 284 final int startOut = sb.length(); 285 286 switch (ch0) { 287 case '^': 288 case '$': 289 return commit(1); 290 291 case '\\': 292 if (ch1 == 'b' || ch1 == 'B') { 293 return commit(2); 294 } 295 break; 296 297 case '(': 298 if (ch1 != '?') { 299 break; 300 } 301 if (ch2 != '=' && ch2 != '!') { 302 break; 303 } 304 final boolean isNegativeLookahead = (ch2 == '!'); 305 commit(3); 306 307 if (isNegativeLookahead) { 308 if (negLookaheadLevel == 0) { 309 negLookaheadGroup++; 310 } 311 negLookaheadLevel++; 312 } 313 disjunction(); 314 if (isNegativeLookahead) { 315 negLookaheadLevel--; 316 } 317 318 if (ch0 == ')') { 319 return commit(1); 320 } 321 break; 322 323 default: 324 break; 325 } 326 327 restart(startIn, startOut); 328 return false; 329 } 330 331 /* 332 * Quantifier :: 333 * QuantifierPrefix 334 * QuantifierPrefix ? 335 */ 336 private boolean quantifier() { 337 if (quantifierPrefix()) { 338 if (ch0 == '?') { 339 commit(1); 340 } 341 return true; 342 } 343 return false; 344 } 345 346 /* 347 * QuantifierPrefix :: 348 * * 349 * + 350 * ? 351 * { DecimalDigits } 352 * { DecimalDigits , } 353 * { DecimalDigits , DecimalDigits } 354 */ 355 private boolean quantifierPrefix() { 356 final int startIn = position; 357 final int startOut = sb.length(); 358 359 switch (ch0) { 360 case '*': 361 case '+': 362 case '?': 363 return commit(1); 364 365 case '{': 366 commit(1); 367 368 if (!decimalDigits()) { 369 break; // not a quantifier - back out 370 } 371 push('}'); 372 373 if (ch0 == ',') { 374 commit(1); 375 decimalDigits(); 376 } 377 378 if (ch0 == '}') { 379 pop('}'); 380 commit(1); 381 } else { 382 // Bad quantifier should be rejected but is accepted by all major engines 383 restart(startIn, startOut); 384 return false; 385 } 386 387 return true; 388 389 default: 390 break; 391 } 392 393 restart(startIn, startOut); 394 return false; 395 } 396 397 /* 398 * Atom :: 399 * PatternCharacter 400 * . 401 * \ AtomEscape 402 * CharacterClass 403 * ( Disjunction ) 404 * ( ? : Disjunction ) 405 * 406 */ 407 private boolean atom() { 408 final int startIn = position; 409 final int startOut = sb.length(); 410 411 if (patternCharacter()) { 412 return true; 413 } 414 415 if (ch0 == '.') { 416 return commit(1); 417 } 418 419 if (ch0 == '\\') { 420 commit(1); 421 422 if (atomEscape()) { 423 return true; 424 } 425 } 426 427 if (characterClass()) { 428 return true; 429 } 430 431 if (ch0 == '(') { 432 commit(1); 433 if (ch0 == '?' && ch1 == ':') { 434 commit(2); 435 } else { 436 caps.add(new Capture(negLookaheadGroup, negLookaheadLevel)); 437 } 438 439 disjunction(); 440 441 if (ch0 == ')') { 442 commit(1); 443 return true; 444 } 445 } 446 447 restart(startIn, startOut); 448 return false; 449 } 450 451 /* 452 * PatternCharacter :: 453 * SourceCharacter but not any of: ^$\.*+?()[]{}| 454 */ 455 @SuppressWarnings("fallthrough") 456 private boolean patternCharacter() { 457 if (atEOF()) { 458 return false; 459 } 460 461 switch (ch0) { 462 case '^': 463 case '$': 464 case '\\': 465 case '.': 466 case '*': 467 case '+': 468 case '?': 469 case '(': 470 case ')': 471 case '[': 472 case '|': 473 return false; 474 475 case '}': 476 case ']': 477 final int n = expected.get(ch0); 478 if (n != 0) { 479 return false; 480 } 481 482 case '{': 483 // if not a valid quantifier escape curly brace to match itself 484 // this ensures compatibility with other JS implementations 485 if (!quantifierPrefix()) { 486 sb.append('\\'); 487 return commit(1); 488 } 489 return false; 490 491 default: 492 return commit(1); // SOURCECHARACTER 493 } 494 } 495 496 /* 497 * AtomEscape :: 498 * DecimalEscape 499 * CharacterEscape 500 * CharacterClassEscape 501 */ 502 private boolean atomEscape() { 503 // Note that contrary to ES 5.1 spec we put identityEscape() last because it acts as a catch-all 504 return decimalEscape() || characterClassEscape() || characterEscape() || identityEscape(); 505 } 506 507 /* 508 * CharacterEscape :: 509 * ControlEscape 510 * c ControlLetter 511 * HexEscapeSequence 512 * UnicodeEscapeSequence 513 * IdentityEscape 514 */ 515 private boolean characterEscape() { 516 final int startIn = position; 517 final int startOut = sb.length(); 518 519 if (controlEscape()) { 520 return true; 521 } 522 523 if (ch0 == 'c') { 524 commit(1); 525 if (controlLetter()) { 526 return true; 527 } 528 restart(startIn, startOut); 529 } 530 531 if (hexEscapeSequence() || unicodeEscapeSequence()) { 532 return true; 533 } 534 535 restart(startIn, startOut); 536 return false; 537 } 538 539 private boolean scanEscapeSequence(final char leader, final int length) { 540 final int startIn = position; 541 final int startOut = sb.length(); 542 543 if (ch0 != leader) { 544 return false; 545 } 546 547 commit(1); 548 for (int i = 0; i < length; i++) { 549 final char ch0l = Character.toLowerCase(ch0); 550 if ((ch0l >= 'a' && ch0l <= 'f') || isDecimalDigit(ch0)) { 551 commit(1); 552 } else { 553 restart(startIn, startOut); 554 return false; 555 } 556 } 557 558 return true; 559 } 560 561 private boolean hexEscapeSequence() { 562 return scanEscapeSequence('x', 2); 563 } 564 565 private boolean unicodeEscapeSequence() { 566 return scanEscapeSequence('u', 4); 567 } 568 569 /* 570 * ControlEscape :: 571 * one of fnrtv 572 */ 573 private boolean controlEscape() { 574 switch (ch0) { 575 case 'f': 576 case 'n': 577 case 'r': 578 case 't': 579 case 'v': 580 return commit(1); 581 582 default: 583 return false; 584 } 585 } 586 587 /* 588 * ControlLetter :: 589 * one of abcdefghijklmnopqrstuvwxyz 590 * ABCDEFGHIJKLMNOPQRSTUVWXYZ 591 */ 592 private boolean controlLetter() { 593 // To match other engines we also accept '0'..'9' and '_' as control letters inside a character class. 594 if ((ch0 >= 'A' && ch0 <= 'Z') || (ch0 >= 'a' && ch0 <= 'z') 595 || (inCharClass && (isDecimalDigit(ch0) || ch0 == '_'))) { 596 // for some reason java regexps don't like control characters on the 597 // form "\\ca".match([string with ascii 1 at char0]). Translating 598 // them to unicode does it though. 599 sb.setLength(sb.length() - 1); 600 unicode(ch0 % 32, sb); 601 skip(1); 602 return true; 603 } 604 return false; 605 } 606 607 /* 608 * IdentityEscape :: 609 * SourceCharacter but not IdentifierPart 610 * <ZWJ> (200c) 611 * <ZWNJ> (200d) 612 */ 613 private boolean identityEscape() { 614 if (atEOF()) { 615 throw new RuntimeException("\\ at end of pattern"); // will be converted to PatternSyntaxException 616 } 617 // ES 5.1 A.7 requires "not IdentifierPart" here but all major engines accept any character here. 618 if (ch0 == 'c') { 619 sb.append('\\'); // Treat invalid \c control sequence as \\c 620 } else if (NON_IDENT_ESCAPES.indexOf(ch0) == -1) { 621 sb.setLength(sb.length() - 1); 622 } 623 return commit(1); 624 } 625 626 /* 627 * DecimalEscape :: 628 * DecimalIntegerLiteral [lookahead DecimalDigit] 629 */ 630 private boolean decimalEscape() { 631 final int startIn = position; 632 final int startOut = sb.length(); 633 634 if (ch0 == '0' && !isOctalDigit(ch1)) { 635 skip(1); 636 // DecimalEscape :: 0. If i is zero, return the EscapeValue consisting of a <NUL> character (Unicodevalue0000); 637 sb.append("\u0000"); 638 return true; 639 } 640 641 if (isDecimalDigit(ch0)) { 642 643 if (ch0 == '0') { 644 // We know this is an octal escape. 645 if (inCharClass) { 646 // Convert octal escape to unicode escape if inside character class. 647 int octalValue = 0; 648 while (isOctalDigit(ch0)) { 649 octalValue = octalValue * 8 + ch0 - '0'; 650 skip(1); 651 } 652 653 unicode(octalValue, sb); 654 655 } else { 656 // Copy decimal escape as-is 657 decimalDigits(); 658 } 659 } else { 660 // This should be a backreference, but could also be an octal escape or even a literal string. 661 int decimalValue = 0; 662 while (isDecimalDigit(ch0)) { 663 decimalValue = decimalValue * 10 + ch0 - '0'; 664 skip(1); 665 } 666 667 if (inCharClass) { 668 // No backreferences in character classes. Encode as unicode escape or literal char sequence 669 sb.setLength(sb.length() - 1); 670 octalOrLiteral(Integer.toString(decimalValue), sb); 671 672 } else if (decimalValue <= caps.size()) { 673 // Captures inside a negative lookahead are undefined when referenced from the outside. 674 if (!caps.get(decimalValue - 1).isContained(negLookaheadGroup, negLookaheadLevel)) { 675 // Reference to capture in negative lookahead, omit from output buffer. 676 sb.setLength(sb.length() - 1); 677 } else { 678 // Append backreference to output buffer. 679 sb.append(decimalValue); 680 } 681 } else { 682 // Forward references to a capture group are always undefined so we can omit it from the output buffer. 683 // However, if the target capture does not exist, we need to rewrite the reference as hex escape 684 // or literal string, so register the reference for later processing. 685 sb.setLength(sb.length() - 1); 686 forwardReferences.add(decimalValue); 687 forwardReferences.add(sb.length()); 688 } 689 690 } 691 return true; 692 } 693 694 restart(startIn, startOut); 695 return false; 696 } 697 698 /* 699 * CharacterClassEscape :: 700 * one of dDsSwW 701 */ 702 private boolean characterClassEscape() { 703 switch (ch0) { 704 // java.util.regex requires translation of \s and \S to explicit character list 705 case 's': 706 if (RegExpFactory.usesJavaUtilRegex()) { 707 sb.setLength(sb.length() - 1); 708 // No nested class required if we already are inside a character class 709 if (inCharClass) { 710 sb.append(Lexer.getWhitespaceRegExp()); 711 } else { 712 sb.append('[').append(Lexer.getWhitespaceRegExp()).append(']'); 713 } 714 skip(1); 715 return true; 716 } 717 return commit(1); 718 case 'S': 719 if (RegExpFactory.usesJavaUtilRegex()) { 720 sb.setLength(sb.length() - 1); 721 // In negative class we must use intersection to get double negation ("not anything else than space") 722 sb.append(inNegativeClass ? "&&[" : "[^").append(Lexer.getWhitespaceRegExp()).append(']'); 723 skip(1); 724 return true; 725 } 726 return commit(1); 727 case 'd': 728 case 'D': 729 case 'w': 730 case 'W': 731 return commit(1); 732 733 default: 734 return false; 735 } 736 } 737 738 /* 739 * CharacterClass :: 740 * [ [lookahead {^}] ClassRanges ] 741 * [ ^ ClassRanges ] 742 */ 743 private boolean characterClass() { 744 final int startIn = position; 745 final int startOut = sb.length(); 746 747 if (ch0 == '[') { 748 try { 749 inCharClass = true; 750 push(']'); 751 commit(1); 752 753 if (ch0 == '^') { 754 inNegativeClass = true; 755 commit(1); 756 } 757 758 if (classRanges() && ch0 == ']') { 759 pop(']'); 760 commit(1); 761 762 // Substitute empty character classes [] and [^] that never or always match 763 if (position == startIn + 2) { 764 sb.setLength(sb.length() - 1); 765 sb.append("^\\s\\S]"); 766 } else if (position == startIn + 3 && inNegativeClass) { 767 sb.setLength(sb.length() - 2); 768 sb.append("\\s\\S]"); 769 } 770 771 return true; 772 } 773 } finally { 774 inCharClass = false; // no nested character classes in JavaScript 775 inNegativeClass = false; 776 } 777 } 778 779 restart(startIn, startOut); 780 return false; 781 } 782 783 /* 784 * ClassRanges :: 785 * [empty] 786 * NonemptyClassRanges 787 */ 788 private boolean classRanges() { 789 nonemptyClassRanges(); 790 return true; 791 } 792 793 /* 794 * NonemptyClassRanges :: 795 * ClassAtom 796 * ClassAtom NonemptyClassRangesNoDash 797 * ClassAtom - ClassAtom ClassRanges 798 */ 799 private boolean nonemptyClassRanges() { 800 final int startIn = position; 801 final int startOut = sb.length(); 802 803 if (classAtom()) { 804 805 if (ch0 == '-') { 806 commit(1); 807 808 if (classAtom() && classRanges()) { 809 return true; 810 } 811 } 812 813 nonemptyClassRangesNoDash(); 814 815 return true; 816 } 817 818 restart(startIn, startOut); 819 return false; 820 } 821 822 /* 823 * NonemptyClassRangesNoDash :: 824 * ClassAtom 825 * ClassAtomNoDash NonemptyClassRangesNoDash 826 * ClassAtomNoDash - ClassAtom ClassRanges 827 */ 828 private boolean nonemptyClassRangesNoDash() { 829 final int startIn = position; 830 final int startOut = sb.length(); 831 832 if (classAtomNoDash()) { 833 834 // need to check dash first, as for e.g. [a-b|c-d] will otherwise parse - as an atom 835 if (ch0 == '-') { 836 commit(1); 837 838 if (classAtom() && classRanges()) { 839 return true; 840 } 841 //fallthru 842 } 843 844 nonemptyClassRangesNoDash(); 845 return true; // still a class atom 846 } 847 848 if (classAtom()) { 849 return true; 850 } 851 852 restart(startIn, startOut); 853 return false; 854 } 855 856 /* 857 * ClassAtom : - ClassAtomNoDash 858 */ 859 private boolean classAtom() { 860 861 if (ch0 == '-') { 862 return commit(1); 863 } 864 865 return classAtomNoDash(); 866 } 867 868 /* 869 * ClassAtomNoDash :: 870 * SourceCharacter but not one of \ or ] or - 871 * \ ClassEscape 872 */ 873 private boolean classAtomNoDash() { 874 if (atEOF()) { 875 return false; 876 } 877 final int startIn = position; 878 final int startOut = sb.length(); 879 880 switch (ch0) { 881 case ']': 882 case '-': 883 return false; 884 885 case '[': 886 // unescaped left square bracket - add escape 887 sb.append('\\'); 888 return commit(1); 889 890 case '\\': 891 commit(1); 892 if (classEscape()) { 893 return true; 894 } 895 896 restart(startIn, startOut); 897 return false; 898 899 default: 900 return commit(1); 901 } 902 } 903 904 /* 905 * ClassEscape :: 906 * DecimalEscape 907 * b 908 * CharacterEscape 909 * CharacterClassEscape 910 */ 911 private boolean classEscape() { 912 913 if (decimalEscape()) { 914 return true; 915 } 916 917 if (ch0 == 'b') { 918 sb.setLength(sb.length() - 1); 919 sb.append('\b'); 920 skip(1); 921 return true; 922 } 923 924 // Note that contrary to ES 5.1 spec we put identityEscape() last because it acts as a catch-all 925 return characterEscape() || characterClassEscape() || identityEscape(); 926 } 927 928 /* 929 * DecimalDigits 930 */ 931 private boolean decimalDigits() { 932 if (!isDecimalDigit(ch0)) { 933 return false; 934 } 935 936 while (isDecimalDigit(ch0)) { 937 commit(1); 938 } 939 940 return true; 941 } 942 943 private static void unicode(final int value, final StringBuilder buffer) { 944 final String hex = Integer.toHexString(value); 945 buffer.append('u'); 946 for (int i = 0; i < 4 - hex.length(); i++) { 947 buffer.append('0'); 948 } 949 buffer.append(hex); 950 } 951 952 // Convert what would have been a backreference into a unicode escape, or a number literal, or both. 953 private static void octalOrLiteral(final String numberLiteral, final StringBuilder buffer) { 954 final int length = numberLiteral.length(); 955 int octalValue = 0; 956 int pos = 0; 957 // Maximum value for octal escape is 0377 (255) so we stop the loop at 32 958 while (pos < length && octalValue < 0x20) { 959 final char ch = numberLiteral.charAt(pos); 960 if (isOctalDigit(ch)) { 961 octalValue = octalValue * 8 + ch - '0'; 962 } else { 963 break; 964 } 965 pos++; 966 } 967 if (octalValue > 0) { 968 buffer.append('\\'); 969 unicode(octalValue, buffer); 970 buffer.append(numberLiteral.substring(pos)); 971 } else { 972 buffer.append(numberLiteral); 973 } 974 } 975 976 private static boolean isOctalDigit(final char ch) { 977 return ch >= '0' && ch <= '7'; 978 } 979 980 private static boolean isDecimalDigit(final char ch) { 981 return ch >= '0' && ch <= '9'; 982 } 983} 984