RegExpScanner.java revision 953:221a84ef44c0
1/*
2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package jdk.nashorn.internal.runtime.regexp;
27
28import java.util.HashMap;
29import java.util.Iterator;
30import java.util.LinkedList;
31import java.util.List;
32import java.util.Map;
33import java.util.regex.PatternSyntaxException;
34import jdk.nashorn.internal.parser.Lexer;
35import jdk.nashorn.internal.parser.Scanner;
36import jdk.nashorn.internal.runtime.BitVector;
37
38/**
39 * Scan a JavaScript regexp, converting to Java regex if necessary.
40 *
41 */
42final class RegExpScanner extends Scanner {
43
44    /**
45     * String builder used to rewrite the pattern for the currently used regexp factory.
46     */
47    private final StringBuilder sb;
48
49    /** Expected token table */
50    private final Map<Character, Integer> expected = new HashMap<>();
51
52    /** Capturing parenthesis that have been found so far. */
53    private final List<Capture> caps = new LinkedList<>();
54
55    /** Forward references to capturing parenthesis to be resolved later.*/
56    private final LinkedList<Integer> forwardReferences = new LinkedList<>();
57
58    /** Current level of zero-width negative lookahead assertions. */
59    private int negLookaheadLevel;
60
61    /** Sequential id of current top-level zero-width negative lookahead assertion. */
62    private int negLookaheadGroup;
63
64    /** Are we currently inside a character class? */
65    private boolean inCharClass = false;
66
67    /** Are we currently inside a negated character class? */
68    private boolean inNegativeClass = false;
69
70    private static final String NON_IDENT_ESCAPES = "$^*+(){}[]|\\.?-";
71
72    private static class Capture {
73        /** Zero-width negative lookaheads enclosing the capture. */
74        private final int negLookaheadLevel;
75        /** Sequential id of top-level negative lookaheads containing the capture. */
76        private  final int negLookaheadGroup;
77
78        Capture(final int negLookaheadGroup, final int negLookaheadLevel) {
79            this.negLookaheadGroup = negLookaheadGroup;
80            this.negLookaheadLevel = negLookaheadLevel;
81        }
82
83        boolean isContained(final int group, final int level) {
84            return group == this.negLookaheadGroup && level >= this.negLookaheadLevel;
85        }
86
87    }
88
89    /**
90     * Constructor
91     * @param string the JavaScript regexp to parse
92     */
93    private RegExpScanner(final String string) {
94        super(string);
95        sb = new StringBuilder(limit);
96        reset(0);
97        expected.put(']', 0);
98        expected.put('}', 0);
99    }
100
101    private void processForwardReferences() {
102
103        final Iterator<Integer> iterator = forwardReferences.descendingIterator();
104        while (iterator.hasNext()) {
105            final int pos = iterator.next();
106            final int num = iterator.next();
107            if (num > caps.size()) {
108                // Non-existing backreference. If the number begins with a valid octal convert it to
109                // Unicode escape and append the rest to a literal character sequence.
110                final StringBuilder buffer = new StringBuilder();
111                octalOrLiteral(Integer.toString(num), buffer);
112                sb.insert(pos, buffer);
113            }
114        }
115
116        forwardReferences.clear();
117    }
118
119    /**
120     * Scan a JavaScript regexp string returning a Java safe regex string.
121     *
122     * @param string
123     *            JavaScript regexp string.
124     * @return Java safe regex string.
125     */
126    public static RegExpScanner scan(final String string) {
127        final RegExpScanner scanner = new RegExpScanner(string);
128
129        try {
130            scanner.disjunction();
131        } catch (final Exception e) {
132            throw new PatternSyntaxException(e.getMessage(), string, scanner.position);
133        }
134
135        scanner.processForwardReferences();
136
137        // Throw syntax error unless we parsed the entire JavaScript regexp without syntax errors
138        if (scanner.position != string.length()) {
139            final String p = scanner.getStringBuilder().toString();
140            throw new PatternSyntaxException(string, p, p.length() + 1);
141        }
142
143        return scanner;
144    }
145
146    final StringBuilder getStringBuilder() {
147        return sb;
148    }
149
150    String getJavaPattern() {
151        return sb.toString();
152    }
153
154    BitVector getGroupsInNegativeLookahead() {
155        BitVector vec = null;
156        for (int i = 0; i < caps.size(); i++) {
157            final Capture cap = caps.get(i);
158            if (cap.negLookaheadLevel > 0) {
159                if (vec == null) {
160                    vec = new BitVector(caps.size() + 1);
161                }
162                vec.set(i + 1);
163            }
164        }
165        return vec;
166    }
167
168    /**
169     * Commit n characters to the builder and to a given token
170     * @param n     Number of characters.
171     * @return Committed token
172     */
173    private boolean commit(final int n) {
174        switch (n) {
175        case 1:
176            sb.append(ch0);
177            skip(1);
178            break;
179        case 2:
180            sb.append(ch0);
181            sb.append(ch1);
182            skip(2);
183            break;
184        case 3:
185            sb.append(ch0);
186            sb.append(ch1);
187            sb.append(ch2);
188            skip(3);
189            break;
190        default:
191            assert false : "Should not reach here";
192        }
193
194        return true;
195    }
196
197    /**
198     * Restart the buffers back at an earlier position.
199     *
200     * @param startIn
201     *            Position in the input stream.
202     * @param startOut
203     *            Position in the output stream.
204     */
205    private void restart(final int startIn, final int startOut) {
206        reset(startIn);
207        sb.setLength(startOut);
208    }
209
210    private void push(final char ch) {
211        expected.put(ch, expected.get(ch) + 1);
212    }
213
214    private void pop(final char ch) {
215        expected.put(ch, Math.min(0, expected.get(ch) - 1));
216    }
217
218    /*
219     * Recursive descent tokenizer starts below.
220     */
221
222    /*
223     * Disjunction ::
224     *      Alternative
225     *      Alternative | Disjunction
226     */
227    private void disjunction() {
228        while (true) {
229            alternative();
230
231            if (ch0 == '|') {
232                commit(1);
233            } else {
234                break;
235            }
236        }
237    }
238
239    /*
240     * Alternative ::
241     *      [empty]
242     *      Alternative Term
243     */
244    private void alternative() {
245        while (term()) {
246            // do nothing
247        }
248    }
249
250    /*
251     * Term ::
252     *      Assertion
253     *      Atom
254     *      Atom Quantifier
255     */
256    private boolean term() {
257        final int startIn  = position;
258        final int startOut = sb.length();
259
260        if (assertion()) {
261            return true;
262        }
263
264        if (atom()) {
265            quantifier();
266            return true;
267        }
268
269        restart(startIn, startOut);
270        return false;
271    }
272
273    /*
274     * Assertion ::
275     *      ^
276     *      $
277     *      \b
278     *      \B
279     *      ( ? = Disjunction )
280     *      ( ? ! Disjunction )
281     */
282    private boolean assertion() {
283        final int startIn  = position;
284        final int startOut = sb.length();
285
286        switch (ch0) {
287        case '^':
288        case '$':
289            return commit(1);
290
291        case '\\':
292            if (ch1 == 'b' || ch1 == 'B') {
293                return commit(2);
294            }
295            break;
296
297        case '(':
298            if (ch1 != '?') {
299                break;
300            }
301            if (ch2 != '=' && ch2 != '!') {
302                break;
303            }
304            final boolean isNegativeLookahead = (ch2 == '!');
305            commit(3);
306
307            if (isNegativeLookahead) {
308                if (negLookaheadLevel == 0) {
309                    negLookaheadGroup++;
310                }
311                negLookaheadLevel++;
312            }
313            disjunction();
314            if (isNegativeLookahead) {
315                negLookaheadLevel--;
316            }
317
318            if (ch0 == ')') {
319                return commit(1);
320            }
321            break;
322
323        default:
324            break;
325        }
326
327        restart(startIn, startOut);
328        return false;
329    }
330
331    /*
332     * Quantifier ::
333     *      QuantifierPrefix
334     *      QuantifierPrefix ?
335     */
336    private boolean quantifier() {
337        if (quantifierPrefix()) {
338            if (ch0 == '?') {
339                commit(1);
340            }
341            return true;
342        }
343        return false;
344    }
345
346    /*
347     * QuantifierPrefix ::
348     *      *
349     *      +
350     *      ?
351     *      { DecimalDigits }
352     *      { DecimalDigits , }
353     *      { DecimalDigits , DecimalDigits }
354     */
355    private boolean quantifierPrefix() {
356        final int startIn  = position;
357        final int startOut = sb.length();
358
359        switch (ch0) {
360        case '*':
361        case '+':
362        case '?':
363            return commit(1);
364
365        case '{':
366            commit(1);
367
368            if (!decimalDigits()) {
369                break; // not a quantifier - back out
370            }
371            push('}');
372
373            if (ch0 == ',') {
374                commit(1);
375                decimalDigits();
376            }
377
378            if (ch0 == '}') {
379                pop('}');
380                commit(1);
381            } else {
382                // Bad quantifier should be rejected but is accepted by all major engines
383                restart(startIn, startOut);
384                return false;
385            }
386
387            return true;
388
389        default:
390            break;
391        }
392
393        restart(startIn, startOut);
394        return false;
395    }
396
397    /*
398     * Atom ::
399     *      PatternCharacter
400     *      .
401     *      \ AtomEscape
402     *      CharacterClass
403     *      ( Disjunction )
404     *      ( ? : Disjunction )
405     *
406     */
407    private boolean atom() {
408        final int startIn  = position;
409        final int startOut = sb.length();
410
411        if (patternCharacter()) {
412            return true;
413        }
414
415        if (ch0 == '.') {
416            return commit(1);
417        }
418
419        if (ch0 == '\\') {
420            commit(1);
421
422            if (atomEscape()) {
423                return true;
424            }
425        }
426
427        if (characterClass()) {
428            return true;
429        }
430
431        if (ch0 == '(') {
432            commit(1);
433            if (ch0 == '?' && ch1 == ':') {
434                commit(2);
435            } else {
436                caps.add(new Capture(negLookaheadGroup, negLookaheadLevel));
437            }
438
439            disjunction();
440
441            if (ch0 == ')') {
442                commit(1);
443                return true;
444            }
445        }
446
447        restart(startIn, startOut);
448        return false;
449    }
450
451    /*
452     * PatternCharacter ::
453     *      SourceCharacter but not any of: ^$\.*+?()[]{}|
454     */
455    @SuppressWarnings("fallthrough")
456    private boolean patternCharacter() {
457        if (atEOF()) {
458            return false;
459        }
460
461        switch (ch0) {
462        case '^':
463        case '$':
464        case '\\':
465        case '.':
466        case '*':
467        case '+':
468        case '?':
469        case '(':
470        case ')':
471        case '[':
472        case '|':
473            return false;
474
475        case '}':
476        case ']':
477            final int n = expected.get(ch0);
478            if (n != 0) {
479                return false;
480            }
481
482       case '{':
483           // if not a valid quantifier escape curly brace to match itself
484           // this ensures compatibility with other JS implementations
485           if (!quantifierPrefix()) {
486               sb.append('\\');
487               return commit(1);
488           }
489           return false;
490
491        default:
492            return commit(1); // SOURCECHARACTER
493        }
494    }
495
496    /*
497     * AtomEscape ::
498     *      DecimalEscape
499     *      CharacterEscape
500     *      CharacterClassEscape
501     */
502    private boolean atomEscape() {
503        // Note that contrary to ES 5.1 spec we put identityEscape() last because it acts as a catch-all
504        return decimalEscape() || characterClassEscape() || characterEscape() || identityEscape();
505    }
506
507    /*
508     * CharacterEscape ::
509     *      ControlEscape
510     *      c ControlLetter
511     *      HexEscapeSequence
512     *      UnicodeEscapeSequence
513     *      IdentityEscape
514     */
515    private boolean characterEscape() {
516        final int startIn  = position;
517        final int startOut = sb.length();
518
519        if (controlEscape()) {
520            return true;
521        }
522
523        if (ch0 == 'c') {
524            commit(1);
525            if (controlLetter()) {
526                return true;
527            }
528            restart(startIn, startOut);
529        }
530
531        if (hexEscapeSequence() || unicodeEscapeSequence()) {
532            return true;
533        }
534
535        restart(startIn, startOut);
536        return false;
537    }
538
539    private boolean scanEscapeSequence(final char leader, final int length) {
540        final int startIn  = position;
541        final int startOut = sb.length();
542
543        if (ch0 != leader) {
544            return false;
545        }
546
547        commit(1);
548        for (int i = 0; i < length; i++) {
549            final char ch0l = Character.toLowerCase(ch0);
550            if ((ch0l >= 'a' && ch0l <= 'f') || isDecimalDigit(ch0)) {
551                commit(1);
552            } else {
553                restart(startIn, startOut);
554                return false;
555            }
556        }
557
558        return true;
559    }
560
561    private boolean hexEscapeSequence() {
562        return scanEscapeSequence('x', 2);
563    }
564
565    private boolean unicodeEscapeSequence() {
566        return scanEscapeSequence('u', 4);
567    }
568
569    /*
570     * ControlEscape ::
571     *      one of fnrtv
572     */
573    private boolean controlEscape() {
574        switch (ch0) {
575        case 'f':
576        case 'n':
577        case 'r':
578        case 't':
579        case 'v':
580            return commit(1);
581
582        default:
583            return false;
584        }
585    }
586
587    /*
588     * ControlLetter ::
589     *      one of abcdefghijklmnopqrstuvwxyz
590     *      ABCDEFGHIJKLMNOPQRSTUVWXYZ
591     */
592    private boolean controlLetter() {
593        // To match other engines we also accept '0'..'9' and '_' as control letters inside a character class.
594        if ((ch0 >= 'A' && ch0 <= 'Z') || (ch0 >= 'a' && ch0 <= 'z')
595                || (inCharClass && (isDecimalDigit(ch0) || ch0 == '_'))) {
596            // for some reason java regexps don't like control characters on the
597            // form "\\ca".match([string with ascii 1 at char0]). Translating
598            // them to unicode does it though.
599            sb.setLength(sb.length() - 1);
600            unicode(ch0 % 32, sb);
601            skip(1);
602            return true;
603        }
604        return false;
605    }
606
607    /*
608     * IdentityEscape ::
609     *      SourceCharacter but not IdentifierPart
610     *      <ZWJ>  (200c)
611     *      <ZWNJ> (200d)
612     */
613    private boolean identityEscape() {
614        if (atEOF()) {
615            throw new RuntimeException("\\ at end of pattern"); // will be converted to PatternSyntaxException
616        }
617        // ES 5.1 A.7 requires "not IdentifierPart" here but all major engines accept any character here.
618        if (ch0 == 'c') {
619            sb.append('\\'); // Treat invalid \c control sequence as \\c
620        } else if (NON_IDENT_ESCAPES.indexOf(ch0) == -1) {
621            sb.setLength(sb.length() - 1);
622        }
623        return commit(1);
624    }
625
626    /*
627     * DecimalEscape ::
628     *      DecimalIntegerLiteral [lookahead DecimalDigit]
629     */
630    private boolean decimalEscape() {
631        final int startIn  = position;
632        final int startOut = sb.length();
633
634        if (ch0 == '0' && !isOctalDigit(ch1)) {
635            skip(1);
636            //  DecimalEscape :: 0. If i is zero, return the EscapeValue consisting of a <NUL> character (Unicodevalue0000);
637            sb.append("\u0000");
638            return true;
639        }
640
641        if (isDecimalDigit(ch0)) {
642
643            if (ch0 == '0') {
644                // We know this is an octal escape.
645                if (inCharClass) {
646                    // Convert octal escape to unicode escape if inside character class.
647                    int octalValue = 0;
648                    while (isOctalDigit(ch0)) {
649                        octalValue = octalValue * 8 + ch0 - '0';
650                        skip(1);
651                    }
652
653                    unicode(octalValue, sb);
654
655                } else {
656                    // Copy decimal escape as-is
657                    decimalDigits();
658                }
659            } else {
660                // This should be a backreference, but could also be an octal escape or even a literal string.
661                int decimalValue = 0;
662                while (isDecimalDigit(ch0)) {
663                    decimalValue = decimalValue * 10 + ch0 - '0';
664                    skip(1);
665                }
666
667                if (inCharClass) {
668                    // No backreferences in character classes. Encode as unicode escape or literal char sequence
669                    sb.setLength(sb.length() - 1);
670                    octalOrLiteral(Integer.toString(decimalValue), sb);
671
672                } else if (decimalValue <= caps.size()) {
673                    //  Captures inside a negative lookahead are undefined when referenced from the outside.
674                    if (!caps.get(decimalValue - 1).isContained(negLookaheadGroup, negLookaheadLevel)) {
675                        // Reference to capture in negative lookahead, omit from output buffer.
676                        sb.setLength(sb.length() - 1);
677                    } else {
678                        // Append backreference to output buffer.
679                        sb.append(decimalValue);
680                    }
681                } else {
682                    // Forward references to a capture group are always undefined so we can omit it from the output buffer.
683                    // However, if the target capture does not exist, we need to rewrite the reference as hex escape
684                    // or literal string, so register the reference for later processing.
685                    sb.setLength(sb.length() - 1);
686                    forwardReferences.add(decimalValue);
687                    forwardReferences.add(sb.length());
688                }
689
690            }
691            return true;
692        }
693
694        restart(startIn, startOut);
695        return false;
696    }
697
698    /*
699     * CharacterClassEscape ::
700     *  one of dDsSwW
701     */
702    private boolean characterClassEscape() {
703        switch (ch0) {
704        // java.util.regex requires translation of \s and \S to explicit character list
705        case 's':
706            if (RegExpFactory.usesJavaUtilRegex()) {
707                sb.setLength(sb.length() - 1);
708                // No nested class required if we already are inside a character class
709                if (inCharClass) {
710                    sb.append(Lexer.getWhitespaceRegExp());
711                } else {
712                    sb.append('[').append(Lexer.getWhitespaceRegExp()).append(']');
713                }
714                skip(1);
715                return true;
716            }
717            return commit(1);
718        case 'S':
719            if (RegExpFactory.usesJavaUtilRegex()) {
720                sb.setLength(sb.length() - 1);
721                // In negative class we must use intersection to get double negation ("not anything else than space")
722                sb.append(inNegativeClass ? "&&[" : "[^").append(Lexer.getWhitespaceRegExp()).append(']');
723                skip(1);
724                return true;
725            }
726            return commit(1);
727        case 'd':
728        case 'D':
729        case 'w':
730        case 'W':
731            return commit(1);
732
733        default:
734            return false;
735        }
736    }
737
738    /*
739     * CharacterClass ::
740     *      [ [lookahead {^}] ClassRanges ]
741     *      [ ^ ClassRanges ]
742     */
743    private boolean characterClass() {
744        final int startIn  = position;
745        final int startOut = sb.length();
746
747        if (ch0 == '[') {
748            try {
749                inCharClass = true;
750                push(']');
751                commit(1);
752
753                if (ch0 == '^') {
754                    inNegativeClass = true;
755                    commit(1);
756                }
757
758                if (classRanges() && ch0 == ']') {
759                    pop(']');
760                    commit(1);
761
762                    // Substitute empty character classes [] and [^] that never or always match
763                    if (position == startIn + 2) {
764                        sb.setLength(sb.length() - 1);
765                        sb.append("^\\s\\S]");
766                    } else if (position == startIn + 3 && inNegativeClass) {
767                        sb.setLength(sb.length() - 2);
768                        sb.append("\\s\\S]");
769                    }
770
771                    return true;
772                }
773            } finally {
774                inCharClass = false;  // no nested character classes in JavaScript
775                inNegativeClass = false;
776            }
777        }
778
779        restart(startIn, startOut);
780        return false;
781    }
782
783    /*
784     * ClassRanges ::
785     *      [empty]
786     *      NonemptyClassRanges
787     */
788    private boolean classRanges() {
789        nonemptyClassRanges();
790        return true;
791    }
792
793    /*
794     * NonemptyClassRanges ::
795     *      ClassAtom
796     *      ClassAtom NonemptyClassRangesNoDash
797     *      ClassAtom - ClassAtom ClassRanges
798     */
799    private boolean nonemptyClassRanges() {
800        final int startIn  = position;
801        final int startOut = sb.length();
802
803        if (classAtom()) {
804
805            if (ch0 == '-') {
806                commit(1);
807
808                if (classAtom() && classRanges()) {
809                    return true;
810                }
811            }
812
813            nonemptyClassRangesNoDash();
814
815            return true;
816        }
817
818        restart(startIn, startOut);
819        return false;
820    }
821
822    /*
823     * NonemptyClassRangesNoDash ::
824     *      ClassAtom
825     *      ClassAtomNoDash NonemptyClassRangesNoDash
826     *      ClassAtomNoDash - ClassAtom ClassRanges
827     */
828    private boolean nonemptyClassRangesNoDash() {
829        final int startIn  = position;
830        final int startOut = sb.length();
831
832        if (classAtomNoDash()) {
833
834            // need to check dash first, as for e.g. [a-b|c-d] will otherwise parse - as an atom
835            if (ch0 == '-') {
836               commit(1);
837
838               if (classAtom() && classRanges()) {
839                   return true;
840               }
841               //fallthru
842           }
843
844            nonemptyClassRangesNoDash();
845            return true; // still a class atom
846        }
847
848        if (classAtom()) {
849            return true;
850        }
851
852        restart(startIn, startOut);
853        return false;
854    }
855
856    /*
857     * ClassAtom : - ClassAtomNoDash
858     */
859    private boolean classAtom() {
860
861        if (ch0 == '-') {
862            return commit(1);
863        }
864
865        return classAtomNoDash();
866    }
867
868    /*
869     * ClassAtomNoDash ::
870     *      SourceCharacter but not one of \ or ] or -
871     *      \ ClassEscape
872     */
873    private boolean classAtomNoDash() {
874        if (atEOF()) {
875            return false;
876        }
877        final int startIn  = position;
878        final int startOut = sb.length();
879
880        switch (ch0) {
881        case ']':
882        case '-':
883            return false;
884
885        case '[':
886            // unescaped left square bracket - add escape
887            sb.append('\\');
888            return commit(1);
889
890        case '\\':
891            commit(1);
892            if (classEscape()) {
893                return true;
894            }
895
896            restart(startIn, startOut);
897            return false;
898
899        default:
900            return commit(1);
901        }
902    }
903
904    /*
905     * ClassEscape ::
906     *      DecimalEscape
907     *      b
908     *      CharacterEscape
909     *      CharacterClassEscape
910     */
911    private boolean classEscape() {
912
913        if (decimalEscape()) {
914            return true;
915        }
916
917        if (ch0 == 'b') {
918            sb.setLength(sb.length() - 1);
919            sb.append('\b');
920            skip(1);
921            return true;
922        }
923
924        // Note that contrary to ES 5.1 spec we put identityEscape() last because it acts as a catch-all
925        return characterEscape() || characterClassEscape() || identityEscape();
926    }
927
928    /*
929     * DecimalDigits
930     */
931    private boolean decimalDigits() {
932        if (!isDecimalDigit(ch0)) {
933            return false;
934        }
935
936        while (isDecimalDigit(ch0)) {
937            commit(1);
938        }
939
940        return true;
941    }
942
943    private static void unicode(final int value, final StringBuilder buffer) {
944        final String hex = Integer.toHexString(value);
945        buffer.append('u');
946        for (int i = 0; i < 4 - hex.length(); i++) {
947            buffer.append('0');
948        }
949        buffer.append(hex);
950    }
951
952    // Convert what would have been a backreference into a unicode escape, or a number literal, or both.
953    private static void octalOrLiteral(final String numberLiteral, final StringBuilder buffer) {
954        final int length = numberLiteral.length();
955        int octalValue = 0;
956        int pos = 0;
957        // Maximum value for octal escape is 0377 (255) so we stop the loop at 32
958        while (pos < length && octalValue < 0x20) {
959            final char ch = numberLiteral.charAt(pos);
960            if (isOctalDigit(ch)) {
961                octalValue = octalValue * 8 + ch - '0';
962            } else {
963                break;
964            }
965            pos++;
966        }
967        if (octalValue > 0) {
968            buffer.append('\\');
969            unicode(octalValue, buffer);
970            buffer.append(numberLiteral.substring(pos));
971        } else {
972            buffer.append(numberLiteral);
973        }
974    }
975
976    private static boolean isOctalDigit(final char ch) {
977        return ch >= '0' && ch <= '7';
978    }
979
980    private static boolean isDecimalDigit(final char ch) {
981        return ch >= '0' && ch <= '9';
982    }
983}
984