NativeRegExp.java revision 1483:7cb19fa78763
1/*
2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package jdk.nashorn.internal.objects;
27
28import static jdk.nashorn.internal.runtime.ECMAErrors.typeError;
29import static jdk.nashorn.internal.runtime.ScriptRuntime.UNDEFINED;
30
31import java.lang.invoke.MethodHandle;
32import java.util.ArrayList;
33import java.util.Arrays;
34import java.util.List;
35import java.util.concurrent.Callable;
36import jdk.nashorn.internal.objects.annotations.Attribute;
37import jdk.nashorn.internal.objects.annotations.Constructor;
38import jdk.nashorn.internal.objects.annotations.Function;
39import jdk.nashorn.internal.objects.annotations.Getter;
40import jdk.nashorn.internal.objects.annotations.Property;
41import jdk.nashorn.internal.objects.annotations.ScriptClass;
42import jdk.nashorn.internal.objects.annotations.SpecializedFunction;
43import jdk.nashorn.internal.objects.annotations.Where;
44import jdk.nashorn.internal.runtime.BitVector;
45import jdk.nashorn.internal.runtime.JSType;
46import jdk.nashorn.internal.runtime.ParserException;
47import jdk.nashorn.internal.runtime.PropertyMap;
48import jdk.nashorn.internal.runtime.ScriptFunction;
49import jdk.nashorn.internal.runtime.ScriptObject;
50import jdk.nashorn.internal.runtime.ScriptRuntime;
51import jdk.nashorn.internal.runtime.linker.Bootstrap;
52import jdk.nashorn.internal.runtime.regexp.RegExp;
53import jdk.nashorn.internal.runtime.regexp.RegExpFactory;
54import jdk.nashorn.internal.runtime.regexp.RegExpMatcher;
55import jdk.nashorn.internal.runtime.regexp.RegExpResult;
56
57/**
58 * ECMA 15.10 RegExp Objects.
59 */
60@ScriptClass("RegExp")
61public final class NativeRegExp extends ScriptObject {
62    /** ECMA 15.10.7.5 lastIndex property */
63    @Property(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE)
64    public Object lastIndex;
65
66    /** Compiled regexp */
67    private RegExp regexp;
68
69    // Reference to global object needed to support static RegExp properties
70    private final Global globalObject;
71
72    // initialized by nasgen
73    private static PropertyMap $nasgenmap$;
74
75    private NativeRegExp(final Global global) {
76        super(global.getRegExpPrototype(), $nasgenmap$);
77        this.globalObject = global;
78    }
79
80    NativeRegExp(final String input, final String flagString, final Global global, final ScriptObject proto) {
81        super(proto, $nasgenmap$);
82        try {
83            this.regexp = RegExpFactory.create(input, flagString);
84        } catch (final ParserException e) {
85            // translate it as SyntaxError object and throw it
86            e.throwAsEcmaException();
87            throw new AssertionError(); //guard against null warnings below
88        }
89        this.globalObject = global;
90        this.setLastIndex(0);
91    }
92
93    NativeRegExp(final String input, final String flagString, final Global global) {
94        this(input, flagString, global, global.getRegExpPrototype());
95    }
96
97    NativeRegExp(final String input, final String flagString) {
98        this(input, flagString, Global.instance());
99    }
100
101    NativeRegExp(final String string, final Global global) {
102        this(string, "", global);
103    }
104
105    NativeRegExp(final String string) {
106        this(string, Global.instance());
107    }
108
109    NativeRegExp(final NativeRegExp regExp) {
110        this(Global.instance());
111        this.lastIndex  = regExp.getLastIndexObject();
112        this.regexp      = regExp.getRegExp();
113    }
114
115    @Override
116    public String getClassName() {
117        return "RegExp";
118    }
119
120    /**
121     * ECMA 15.10.4
122     *
123     * Constructor
124     *
125     * @param isNew is the new operator used for instantiating this regexp
126     * @param self  self reference
127     * @param args  arguments (optional: pattern and flags)
128     * @return new NativeRegExp
129     */
130    @Constructor(arity = 2)
131    public static NativeRegExp constructor(final boolean isNew, final Object self, final Object... args) {
132        if (args.length > 1) {
133            return newRegExp(args[0], args[1]);
134        } else if (args.length > 0) {
135            return newRegExp(args[0], UNDEFINED);
136        }
137
138        return newRegExp(UNDEFINED, UNDEFINED);
139    }
140
141    /**
142     * ECMA 15.10.4
143     *
144     * Constructor - specialized version, no args, empty regexp
145     *
146     * @param isNew is the new operator used for instantiating this regexp
147     * @param self  self reference
148     * @return new NativeRegExp
149     */
150    @SpecializedFunction(isConstructor=true)
151    public static NativeRegExp constructor(final boolean isNew, final Object self) {
152        return new NativeRegExp("", "");
153    }
154
155    /**
156     * ECMA 15.10.4
157     *
158     * Constructor - specialized version, pattern, no flags
159     *
160     * @param isNew is the new operator used for instantiating this regexp
161     * @param self  self reference
162     * @param pattern pattern
163     * @return new NativeRegExp
164     */
165    @SpecializedFunction(isConstructor=true)
166    public static NativeRegExp constructor(final boolean isNew, final Object self, final Object pattern) {
167        return newRegExp(pattern, UNDEFINED);
168    }
169
170    /**
171     * ECMA 15.10.4
172     *
173     * Constructor - specialized version, pattern and flags
174     *
175     * @param isNew is the new operator used for instantiating this regexp
176     * @param self  self reference
177     * @param pattern pattern
178     * @param flags  flags
179     * @return new NativeRegExp
180     */
181    @SpecializedFunction(isConstructor=true)
182    public static NativeRegExp constructor(final boolean isNew, final Object self, final Object pattern, final Object flags) {
183        return newRegExp(pattern, flags);
184    }
185
186    /**
187     * External constructor used in generated code, which explains the public access
188     *
189     * @param regexp regexp
190     * @param flags  flags
191     * @return new NativeRegExp
192     */
193    public static NativeRegExp newRegExp(final Object regexp, final Object flags) {
194        String  patternString = "";
195        String  flagString    = "";
196
197        if (regexp != UNDEFINED) {
198            if (regexp instanceof NativeRegExp) {
199                if (flags != UNDEFINED) {
200                    throw typeError("regex.cant.supply.flags");
201                }
202                return (NativeRegExp)regexp; // 15.10.3.1 - undefined flags and regexp as
203            }
204            patternString = JSType.toString(regexp);
205        }
206
207        if (flags != UNDEFINED) {
208            flagString = JSType.toString(flags);
209        }
210
211        return new NativeRegExp(patternString, flagString);
212    }
213
214    /**
215     * Build a regexp that matches {@code string} as-is. All meta-characters will be escaped.
216     *
217     * @param string pattern string
218     * @return flat regexp
219     */
220    static NativeRegExp flatRegExp(final String string) {
221        // escape special characters
222        StringBuilder sb = null;
223        final int length = string.length();
224
225        for (int i = 0; i < length; i++) {
226            final char c = string.charAt(i);
227            switch (c) {
228                case '^':
229                case '$':
230                case '\\':
231                case '.':
232                case '*':
233                case '+':
234                case '?':
235                case '(':
236                case ')':
237                case '[':
238                case '{':
239                case '|':
240                    if (sb == null) {
241                        sb = new StringBuilder(length * 2);
242                        sb.append(string, 0, i);
243                    }
244                    sb.append('\\');
245                    sb.append(c);
246                    break;
247                default:
248                    if (sb != null) {
249                        sb.append(c);
250                    }
251                    break;
252            }
253        }
254        return new NativeRegExp(sb == null ? string : sb.toString(), "");
255    }
256
257    private String getFlagString() {
258        final StringBuilder sb = new StringBuilder(3);
259
260        if (regexp.isGlobal()) {
261            sb.append('g');
262        }
263        if (regexp.isIgnoreCase()) {
264            sb.append('i');
265        }
266        if (regexp.isMultiline()) {
267            sb.append('m');
268        }
269
270        return sb.toString();
271    }
272
273    @Override
274    public String safeToString() {
275        return "[RegExp " + toString() + "]";
276    }
277
278    @Override
279    public String toString() {
280        return "/" + regexp.getSource() + "/" + getFlagString();
281    }
282
283    /**
284     * Nashorn extension: RegExp.prototype.compile - everybody implements this!
285     *
286     * @param self    self reference
287     * @param pattern pattern
288     * @param flags   flags
289     * @return new NativeRegExp
290     */
291    @Function(attributes = Attribute.NOT_ENUMERABLE)
292    public static ScriptObject compile(final Object self, final Object pattern, final Object flags) {
293        final NativeRegExp regExp   = checkRegExp(self);
294        final NativeRegExp compiled = newRegExp(pattern, flags);
295        // copy over regexp to 'self'
296        regExp.setRegExp(compiled.getRegExp());
297
298        // Some implementations return undefined. Some return 'self'. Since return
299        // value is most likely be ignored, we can play safe and return 'self'.
300        return regExp;
301    }
302
303    /**
304     * ECMA 15.10.6.2 RegExp.prototype.exec(string)
305     *
306     * @param self   self reference
307     * @param string string to match against regexp
308     * @return array containing the matches or {@code null} if no match
309     */
310    @Function(attributes = Attribute.NOT_ENUMERABLE)
311    public static ScriptObject exec(final Object self, final Object string) {
312        return checkRegExp(self).exec(JSType.toString(string));
313    }
314
315    /**
316     * ECMA 15.10.6.3 RegExp.prototype.test(string)
317     *
318     * @param self   self reference
319     * @param string string to test for matches against regexp
320     * @return true if matches found, false otherwise
321     */
322    @Function(attributes = Attribute.NOT_ENUMERABLE)
323    public static boolean test(final Object self, final Object string) {
324        return checkRegExp(self).test(JSType.toString(string));
325    }
326
327    /**
328     * ECMA 15.10.6.4 RegExp.prototype.toString()
329     *
330     * @param self self reference
331     * @return string version of regexp
332     */
333    @Function(attributes = Attribute.NOT_ENUMERABLE)
334    public static String toString(final Object self) {
335        return checkRegExp(self).toString();
336    }
337
338    /**
339     * ECMA 15.10.7.1 source
340     *
341     * @param self self reference
342     * @return the input string for the regexp
343     */
344    @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT)
345    public static Object source(final Object self) {
346        return checkRegExp(self).getRegExp().getSource();
347    }
348
349    /**
350     * ECMA 15.10.7.2 global
351     *
352     * @param self self reference
353     * @return true if this regexp is flagged global, false otherwise
354     */
355    @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT)
356    public static Object global(final Object self) {
357        return checkRegExp(self).getRegExp().isGlobal();
358    }
359
360    /**
361     * ECMA 15.10.7.3 ignoreCase
362     *
363     * @param self self reference
364     * @return true if this regexp if flagged to ignore case, false otherwise
365     */
366    @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT)
367    public static Object ignoreCase(final Object self) {
368        return checkRegExp(self).getRegExp().isIgnoreCase();
369    }
370
371    /**
372     * ECMA 15.10.7.4 multiline
373     *
374     * @param self self reference
375     * @return true if this regexp is flagged to be multiline, false otherwise
376     */
377    @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT)
378    public static Object multiline(final Object self) {
379        return checkRegExp(self).getRegExp().isMultiline();
380    }
381
382    /**
383     * Getter for non-standard RegExp.input property.
384     * @param self self object
385     * @return last regexp input
386     */
387    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "input")
388    public static Object getLastInput(final Object self) {
389        final RegExpResult match = Global.instance().getLastRegExpResult();
390        return match == null ? "" : match.getInput();
391    }
392
393    /**
394     * Getter for non-standard RegExp.multiline property.
395     * @param self self object
396     * @return last regexp input
397     */
398    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "multiline")
399    public static Object getLastMultiline(final Object self) {
400        return false; // doesn't ever seem to become true and isn't documented anyhwere
401    }
402
403    /**
404     * Getter for non-standard RegExp.lastMatch property.
405     * @param self self object
406     * @return last regexp input
407     */
408    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "lastMatch")
409    public static Object getLastMatch(final Object self) {
410        final RegExpResult match = Global.instance().getLastRegExpResult();
411        return match == null ? "" : match.getGroup(0);
412    }
413
414    /**
415     * Getter for non-standard RegExp.lastParen property.
416     * @param self self object
417     * @return last regexp input
418     */
419    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "lastParen")
420    public static Object getLastParen(final Object self) {
421        final RegExpResult match = Global.instance().getLastRegExpResult();
422        return match == null ? "" : match.getLastParen();
423    }
424
425    /**
426     * Getter for non-standard RegExp.leftContext property.
427     * @param self self object
428     * @return last regexp input
429     */
430    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "leftContext")
431    public static Object getLeftContext(final Object self) {
432        final RegExpResult match = Global.instance().getLastRegExpResult();
433        return match == null ? "" : match.getInput().substring(0, match.getIndex());
434    }
435
436    /**
437     * Getter for non-standard RegExp.rightContext property.
438     * @param self self object
439     * @return last regexp input
440     */
441    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "rightContext")
442    public static Object getRightContext(final Object self) {
443        final RegExpResult match = Global.instance().getLastRegExpResult();
444        return match == null ? "" : match.getInput().substring(match.getIndex() + match.length());
445    }
446
447    /**
448     * Getter for non-standard RegExp.$1 property.
449     * @param self self object
450     * @return last regexp input
451     */
452    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$1")
453    public static Object getGroup1(final Object self) {
454        final RegExpResult match = Global.instance().getLastRegExpResult();
455        return match == null ? "" : match.getGroup(1);
456    }
457
458    /**
459     * Getter for non-standard RegExp.$2 property.
460     * @param self self object
461     * @return last regexp input
462     */
463    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$2")
464    public static Object getGroup2(final Object self) {
465        final RegExpResult match = Global.instance().getLastRegExpResult();
466        return match == null ? "" : match.getGroup(2);
467    }
468
469    /**
470     * Getter for non-standard RegExp.$3 property.
471     * @param self self object
472     * @return last regexp input
473     */
474    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$3")
475    public static Object getGroup3(final Object self) {
476        final RegExpResult match = Global.instance().getLastRegExpResult();
477        return match == null ? "" : match.getGroup(3);
478    }
479
480    /**
481     * Getter for non-standard RegExp.$4 property.
482     * @param self self object
483     * @return last regexp input
484     */
485    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$4")
486    public static Object getGroup4(final Object self) {
487        final RegExpResult match = Global.instance().getLastRegExpResult();
488        return match == null ? "" : match.getGroup(4);
489    }
490
491    /**
492     * Getter for non-standard RegExp.$5 property.
493     * @param self self object
494     * @return last regexp input
495     */
496    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$5")
497    public static Object getGroup5(final Object self) {
498        final RegExpResult match = Global.instance().getLastRegExpResult();
499        return match == null ? "" : match.getGroup(5);
500    }
501
502    /**
503     * Getter for non-standard RegExp.$6 property.
504     * @param self self object
505     * @return last regexp input
506     */
507    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$6")
508    public static Object getGroup6(final Object self) {
509        final RegExpResult match = Global.instance().getLastRegExpResult();
510        return match == null ? "" : match.getGroup(6);
511    }
512
513    /**
514     * Getter for non-standard RegExp.$7 property.
515     * @param self self object
516     * @return last regexp input
517     */
518    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$7")
519    public static Object getGroup7(final Object self) {
520        final RegExpResult match = Global.instance().getLastRegExpResult();
521        return match == null ? "" : match.getGroup(7);
522    }
523
524    /**
525     * Getter for non-standard RegExp.$8 property.
526     * @param self self object
527     * @return last regexp input
528     */
529    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$8")
530    public static Object getGroup8(final Object self) {
531        final RegExpResult match = Global.instance().getLastRegExpResult();
532        return match == null ? "" : match.getGroup(8);
533    }
534
535    /**
536     * Getter for non-standard RegExp.$9 property.
537     * @param self self object
538     * @return last regexp input
539     */
540    @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$9")
541    public static Object getGroup9(final Object self) {
542        final RegExpResult match = Global.instance().getLastRegExpResult();
543        return match == null ? "" : match.getGroup(9);
544    }
545
546    private RegExpResult execInner(final String string) {
547        final boolean isGlobal = regexp.isGlobal();
548        int start = getLastIndex();
549        if (!isGlobal) {
550            start = 0;
551        }
552
553        if (start < 0 || start > string.length()) {
554            if (isGlobal) {
555                setLastIndex(0);
556            }
557            return null;
558        }
559
560        final RegExpMatcher matcher = regexp.match(string);
561        if (matcher == null || !matcher.search(start)) {
562            if (isGlobal) {
563                setLastIndex(0);
564            }
565            return null;
566        }
567
568        if (isGlobal) {
569            setLastIndex(matcher.end());
570        }
571
572        final RegExpResult match = new RegExpResult(string, matcher.start(), groups(matcher));
573        globalObject.setLastRegExpResult(match);
574        return match;
575    }
576
577    // String.prototype.split method ignores the global flag and should not update lastIndex property.
578    private RegExpResult execSplit(final String string, final int start) {
579        if (start < 0 || start > string.length()) {
580            return null;
581        }
582
583        final RegExpMatcher matcher = regexp.match(string);
584        if (matcher == null || !matcher.search(start)) {
585            return null;
586        }
587
588        final RegExpResult match = new RegExpResult(string, matcher.start(), groups(matcher));
589        globalObject.setLastRegExpResult(match);
590        return match;
591    }
592
593    /**
594     * Convert java.util.regex.Matcher groups to JavaScript groups.
595     * That is, replace null and groups that didn't match with undefined.
596     */
597    private Object[] groups(final RegExpMatcher matcher) {
598        final int groupCount = matcher.groupCount();
599        final Object[] groups = new Object[groupCount + 1];
600        final BitVector groupsInNegativeLookahead  = regexp.getGroupsInNegativeLookahead();
601
602        for (int i = 0, lastGroupStart = matcher.start(); i <= groupCount; i++) {
603            final int groupStart = matcher.start(i);
604            if (lastGroupStart > groupStart
605                    || groupsInNegativeLookahead != null && groupsInNegativeLookahead.isSet(i)) {
606                // (1) ECMA 15.10.2.5 NOTE 3: need to clear Atom's captures each time Atom is repeated.
607                // (2) ECMA 15.10.2.8 NOTE 3: Backreferences to captures in (?!Disjunction) from elsewhere
608                // in the pattern always return undefined because the negative lookahead must fail.
609                groups[i] = UNDEFINED;
610                continue;
611            }
612            final String group = matcher.group(i);
613            groups[i] = group == null ? UNDEFINED : group;
614            lastGroupStart = groupStart;
615        }
616        return groups;
617    }
618
619    /**
620     * Executes a search for a match within a string based on a regular
621     * expression. It returns an array of information or null if no match is
622     * found.
623     *
624     * @param string String to match.
625     * @return NativeArray of matches, string or null.
626     */
627    public NativeRegExpExecResult exec(final String string) {
628        final RegExpResult match = execInner(string);
629
630        if (match == null) {
631            return null;
632        }
633
634        return new NativeRegExpExecResult(match, globalObject);
635    }
636
637    /**
638     * Executes a search for a match within a string based on a regular
639     * expression.
640     *
641     * @param string String to match.
642     * @return True if a match is found.
643     */
644    public boolean test(final String string) {
645        return execInner(string) != null;
646    }
647
648    /**
649     * Searches and replaces the regular expression portion (match) with the
650     * replaced text instead. For the "replacement text" parameter, you can use
651     * the keywords $1 to $2 to replace the original text with values from
652     * sub-patterns defined within the main pattern.
653     *
654     * @param string String to match.
655     * @param replacement Replacement string.
656     * @return String with substitutions.
657     */
658    String replace(final String string, final String replacement, final ScriptFunction function) throws Throwable {
659        final RegExpMatcher matcher = regexp.match(string);
660
661        if (matcher == null) {
662            return string;
663        }
664
665        if (!regexp.isGlobal()) {
666            if (!matcher.search(0)) {
667                return string;
668            }
669
670            final StringBuilder sb = new StringBuilder();
671            sb.append(string, 0, matcher.start());
672
673            if (function != null) {
674                final Object self = function.isStrict() ? UNDEFINED : Global.instance();
675                sb.append(callReplaceValue(getReplaceValueInvoker(), function, self, matcher, string));
676            } else {
677                appendReplacement(matcher, string, replacement, sb);
678            }
679            sb.append(string, matcher.end(), string.length());
680            return sb.toString();
681        }
682
683        setLastIndex(0);
684
685        if (!matcher.search(0)) {
686            return string;
687        }
688
689        int thisIndex = 0;
690        int previousLastIndex = 0;
691        final StringBuilder sb = new StringBuilder();
692
693        final MethodHandle invoker = function == null ? null : getReplaceValueInvoker();
694        final Object self = function == null || function.isStrict() ? UNDEFINED : Global.instance();
695
696        do {
697            sb.append(string, thisIndex, matcher.start());
698            if (function != null) {
699                sb.append(callReplaceValue(invoker, function, self, matcher, string));
700            } else {
701                appendReplacement(matcher, string, replacement, sb);
702            }
703
704            thisIndex = matcher.end();
705            if (thisIndex == string.length() && matcher.start() == matcher.end()) {
706                // Avoid getting empty match at end of string twice
707                break;
708            }
709
710            // ECMA 15.5.4.10 String.prototype.match(regexp)
711            if (thisIndex == previousLastIndex) {
712                setLastIndex(thisIndex + 1);
713                previousLastIndex = thisIndex + 1;
714            } else {
715                previousLastIndex = thisIndex;
716            }
717        } while (previousLastIndex <= string.length() && matcher.search(previousLastIndex));
718
719        sb.append(string, thisIndex, string.length());
720
721        return sb.toString();
722    }
723
724    private void appendReplacement(final RegExpMatcher matcher, final String text, final String replacement, final StringBuilder sb) {
725        /*
726         * Process substitution patterns:
727         *
728         * $$ -> $
729         * $& -> the matched substring
730         * $` -> the portion of string that precedes matched substring
731         * $' -> the portion of string that follows the matched substring
732         * $n -> the nth capture, where n is [1-9] and $n is NOT followed by a decimal digit
733         * $nn -> the nnth capture, where nn is a two digit decimal number [01-99].
734         */
735
736        int cursor = 0;
737        Object[] groups = null;
738
739        while (cursor < replacement.length()) {
740            char nextChar = replacement.charAt(cursor);
741            if (nextChar == '$') {
742                // Skip past $
743                cursor++;
744                if (cursor == replacement.length()) {
745                    // nothing after "$"
746                    sb.append('$');
747                    break;
748                }
749
750                nextChar = replacement.charAt(cursor);
751                final int firstDigit = nextChar - '0';
752
753                if (firstDigit >= 0 && firstDigit <= 9 && firstDigit <= matcher.groupCount()) {
754                    // $0 is not supported, but $01 is. implementation-defined: if n>m, ignore second digit.
755                    int refNum = firstDigit;
756                    cursor++;
757                    if (cursor < replacement.length() && firstDigit < matcher.groupCount()) {
758                        final int secondDigit = replacement.charAt(cursor) - '0';
759                        if (secondDigit >= 0 && secondDigit <= 9) {
760                            final int newRefNum = firstDigit * 10 + secondDigit;
761                            if (newRefNum <= matcher.groupCount() && newRefNum > 0) {
762                                // $nn ($01-$99)
763                                refNum = newRefNum;
764                                cursor++;
765                            }
766                        }
767                    }
768                    if (refNum > 0) {
769                        if (groups == null) {
770                            groups = groups(matcher);
771                        }
772                        // Append group if matched.
773                        if (groups[refNum] != UNDEFINED) {
774                            sb.append((String) groups[refNum]);
775                        }
776                    } else { // $0. ignore.
777                        assert refNum == 0;
778                        sb.append("$0");
779                    }
780                } else if (nextChar == '$') {
781                    sb.append('$');
782                    cursor++;
783                } else if (nextChar == '&') {
784                    sb.append(matcher.group());
785                    cursor++;
786                } else if (nextChar == '`') {
787                    sb.append(text, 0, matcher.start());
788                    cursor++;
789                } else if (nextChar == '\'') {
790                    sb.append(text, matcher.end(), text.length());
791                    cursor++;
792                } else {
793                    // unknown substitution or $n with n>m. skip.
794                    sb.append('$');
795                }
796            } else {
797                sb.append(nextChar);
798                cursor++;
799            }
800        }
801    }
802
803    private static final Object REPLACE_VALUE = new Object();
804
805    private static MethodHandle getReplaceValueInvoker() {
806        return Global.instance().getDynamicInvoker(REPLACE_VALUE,
807                new Callable<MethodHandle>() {
808                    @Override
809                    public MethodHandle call() {
810                        return Bootstrap.createDynamicCallInvoker(String.class, ScriptFunction.class, Object.class, Object[].class);
811                    }
812                });
813    }
814
815    private String callReplaceValue(final MethodHandle invoker, final ScriptFunction function, final Object self, final RegExpMatcher matcher, final String string) throws Throwable {
816        final Object[] groups = groups(matcher);
817        final Object[] args   = Arrays.copyOf(groups, groups.length + 2);
818
819        args[groups.length]     = matcher.start();
820        args[groups.length + 1] = string;
821
822        return (String)invoker.invokeExact(function, self, args);
823    }
824
825    /**
826     * Breaks up a string into an array of substrings based on a regular
827     * expression or fixed string.
828     *
829     * @param string String to match.
830     * @param limit  Split limit.
831     * @return Array of substrings.
832     */
833    NativeArray split(final String string, final long limit) {
834        if (limit == 0L) {
835            return new NativeArray();
836        }
837
838        final List<Object> matches = new ArrayList<>();
839
840        RegExpResult match;
841        final int inputLength = string.length();
842        int splitLastLength = -1;
843        int splitLastIndex = 0;
844        int splitLastLastIndex = 0;
845
846        while ((match = execSplit(string, splitLastIndex)) != null) {
847            splitLastIndex = match.getIndex() + match.length();
848
849            if (splitLastIndex > splitLastLastIndex) {
850                matches.add(string.substring(splitLastLastIndex, match.getIndex()));
851                final Object[] groups = match.getGroups();
852                if (groups.length > 1 && match.getIndex() < inputLength) {
853                    for (int index = 1; index < groups.length && matches.size() < limit; index++) {
854                        matches.add(groups[index]);
855                    }
856                }
857
858                splitLastLength = match.length();
859
860                if (matches.size() >= limit) {
861                    break;
862                }
863            }
864
865            // bump the index to avoid infinite loop
866            if (splitLastIndex == splitLastLastIndex) {
867                splitLastIndex++;
868            } else {
869                splitLastLastIndex = splitLastIndex;
870            }
871        }
872
873        if (matches.size() < limit) {
874            // check special case if we need to append an empty string at the
875            // end of the match
876            // if the lastIndex was the entire string
877            if (splitLastLastIndex == string.length()) {
878                if (splitLastLength > 0 || execSplit("", 0) == null) {
879                    matches.add("");
880                }
881            } else {
882                matches.add(string.substring(splitLastLastIndex, inputLength));
883            }
884        }
885
886        return new NativeArray(matches.toArray());
887    }
888
889    /**
890     * Tests for a match in a string. It returns the index of the match, or -1
891     * if not found.
892     *
893     * @param string String to match.
894     * @return Index of match.
895     */
896    int search(final String string) {
897        final RegExpResult match = execInner(string);
898
899        if (match == null) {
900            return -1;
901        }
902
903        return match.getIndex();
904    }
905
906    /**
907     * Fast lastIndex getter
908     * @return last index property as int
909     */
910    public int getLastIndex() {
911        return JSType.toInteger(lastIndex);
912    }
913
914    /**
915     * Fast lastIndex getter
916     * @return last index property as boxed integer
917     */
918    public Object getLastIndexObject() {
919        return lastIndex;
920    }
921
922    /**
923     * Fast lastIndex setter
924     * @param lastIndex lastIndex
925     */
926    public void setLastIndex(final int lastIndex) {
927        this.lastIndex = JSType.toObject(lastIndex);
928    }
929
930    private static NativeRegExp checkRegExp(final Object self) {
931        if (self instanceof NativeRegExp) {
932            return (NativeRegExp)self;
933        } else if (self != null && self == Global.instance().getRegExpPrototype()) {
934            return Global.instance().getDefaultRegExp();
935        } else {
936            throw typeError("not.a.regexp", ScriptRuntime.safeToString(self));
937        }
938    }
939
940    boolean getGlobal() {
941        return regexp.isGlobal();
942    }
943
944    private RegExp getRegExp() {
945        return regexp;
946    }
947
948    private void setRegExp(final RegExp regexp) {
949        this.regexp = regexp;
950    }
951
952}
953