NativeRegExp.java revision 953:221a84ef44c0
1/* 2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package jdk.nashorn.internal.objects; 27 28import static jdk.nashorn.internal.runtime.ECMAErrors.typeError; 29import static jdk.nashorn.internal.runtime.ScriptRuntime.UNDEFINED; 30 31import java.lang.invoke.MethodHandle; 32import java.util.ArrayList; 33import java.util.Arrays; 34import java.util.List; 35import java.util.concurrent.Callable; 36import jdk.nashorn.internal.objects.annotations.Attribute; 37import jdk.nashorn.internal.objects.annotations.Constructor; 38import jdk.nashorn.internal.objects.annotations.Function; 39import jdk.nashorn.internal.objects.annotations.Getter; 40import jdk.nashorn.internal.objects.annotations.Property; 41import jdk.nashorn.internal.objects.annotations.ScriptClass; 42import jdk.nashorn.internal.objects.annotations.SpecializedConstructor; 43import jdk.nashorn.internal.objects.annotations.Where; 44import jdk.nashorn.internal.runtime.BitVector; 45import jdk.nashorn.internal.runtime.JSType; 46import jdk.nashorn.internal.runtime.ParserException; 47import jdk.nashorn.internal.runtime.PropertyMap; 48import jdk.nashorn.internal.runtime.ScriptFunction; 49import jdk.nashorn.internal.runtime.ScriptObject; 50import jdk.nashorn.internal.runtime.ScriptRuntime; 51import jdk.nashorn.internal.runtime.linker.Bootstrap; 52import jdk.nashorn.internal.runtime.regexp.RegExp; 53import jdk.nashorn.internal.runtime.regexp.RegExpFactory; 54import jdk.nashorn.internal.runtime.regexp.RegExpMatcher; 55import jdk.nashorn.internal.runtime.regexp.RegExpResult; 56 57/** 58 * ECMA 15.10 RegExp Objects. 59 */ 60@ScriptClass("RegExp") 61public final class NativeRegExp extends ScriptObject { 62 /** ECMA 15.10.7.5 lastIndex property */ 63 @Property(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE) 64 public Object lastIndex; 65 66 /** Compiled regexp */ 67 private RegExp regexp; 68 69 // Reference to global object needed to support static RegExp properties 70 private final Global globalObject; 71 72 // initialized by nasgen 73 private static PropertyMap $nasgenmap$; 74 75 private NativeRegExp(final Global global) { 76 super(global.getRegExpPrototype(), $nasgenmap$); 77 this.globalObject = global; 78 } 79 80 NativeRegExp(final String input, final String flagString, final Global global) { 81 this(global); 82 try { 83 this.regexp = RegExpFactory.create(input, flagString); 84 } catch (final ParserException e) { 85 // translate it as SyntaxError object and throw it 86 e.throwAsEcmaException(); 87 throw new AssertionError(); //guard against null warnings below 88 } 89 90 this.setLastIndex(0); 91 } 92 93 NativeRegExp(final String input, final String flagString) { 94 this(input, flagString, Global.instance()); 95 } 96 97 NativeRegExp(final String string, final Global global) { 98 this(string, "", global); 99 } 100 101 NativeRegExp(final String string) { 102 this(string, Global.instance()); 103 } 104 105 NativeRegExp(final NativeRegExp regExp) { 106 this(Global.instance()); 107 this.lastIndex = regExp.getLastIndexObject(); 108 this.regexp = regExp.getRegExp(); 109 } 110 111 @Override 112 public String getClassName() { 113 return "RegExp"; 114 } 115 116 /** 117 * ECMA 15.10.4 118 * 119 * Constructor 120 * 121 * @param isNew is the new operator used for instantiating this regexp 122 * @param self self reference 123 * @param args arguments (optional: pattern and flags) 124 * @return new NativeRegExp 125 */ 126 @Constructor(arity = 2) 127 public static NativeRegExp constructor(final boolean isNew, final Object self, final Object... args) { 128 if (args.length > 1) { 129 return newRegExp(args[0], args[1]); 130 } else if (args.length > 0) { 131 return newRegExp(args[0], UNDEFINED); 132 } 133 134 return newRegExp(UNDEFINED, UNDEFINED); 135 } 136 137 /** 138 * ECMA 15.10.4 139 * 140 * Constructor - specialized version, no args, empty regexp 141 * 142 * @param isNew is the new operator used for instantiating this regexp 143 * @param self self reference 144 * @return new NativeRegExp 145 */ 146 @SpecializedConstructor 147 public static NativeRegExp constructor(final boolean isNew, final Object self) { 148 return new NativeRegExp("", ""); 149 } 150 151 /** 152 * ECMA 15.10.4 153 * 154 * Constructor - specialized version, pattern, no flags 155 * 156 * @param isNew is the new operator used for instantiating this regexp 157 * @param self self reference 158 * @param pattern pattern 159 * @return new NativeRegExp 160 */ 161 @SpecializedConstructor 162 public static NativeRegExp constructor(final boolean isNew, final Object self, final Object pattern) { 163 return newRegExp(pattern, UNDEFINED); 164 } 165 166 /** 167 * ECMA 15.10.4 168 * 169 * Constructor - specialized version, pattern and flags 170 * 171 * @param isNew is the new operator used for instantiating this regexp 172 * @param self self reference 173 * @param pattern pattern 174 * @param flags flags 175 * @return new NativeRegExp 176 */ 177 @SpecializedConstructor 178 public static NativeRegExp constructor(final boolean isNew, final Object self, final Object pattern, final Object flags) { 179 return newRegExp(pattern, flags); 180 } 181 182 /** 183 * External constructor used in generated code, which explains the public access 184 * 185 * @param regexp regexp 186 * @param flags flags 187 * @return new NativeRegExp 188 */ 189 public static NativeRegExp newRegExp(final Object regexp, final Object flags) { 190 String patternString = ""; 191 String flagString = ""; 192 193 if (regexp != UNDEFINED) { 194 if (regexp instanceof NativeRegExp) { 195 if (flags != UNDEFINED) { 196 throw typeError("regex.cant.supply.flags"); 197 } 198 return (NativeRegExp)regexp; // 15.10.3.1 - undefined flags and regexp as 199 } 200 patternString = JSType.toString(regexp); 201 } 202 203 if (flags != UNDEFINED) { 204 flagString = JSType.toString(flags); 205 } 206 207 return new NativeRegExp(patternString, flagString); 208 } 209 210 /** 211 * Build a regexp that matches {@code string} as-is. All meta-characters will be escaped. 212 * 213 * @param string pattern string 214 * @return flat regexp 215 */ 216 static NativeRegExp flatRegExp(final String string) { 217 // escape special characters 218 StringBuilder sb = null; 219 final int length = string.length(); 220 221 for (int i = 0; i < length; i++) { 222 final char c = string.charAt(i); 223 switch (c) { 224 case '^': 225 case '$': 226 case '\\': 227 case '.': 228 case '*': 229 case '+': 230 case '?': 231 case '(': 232 case ')': 233 case '[': 234 case '{': 235 case '|': 236 if (sb == null) { 237 sb = new StringBuilder(length * 2); 238 sb.append(string, 0, i); 239 } 240 sb.append('\\'); 241 sb.append(c); 242 break; 243 default: 244 if (sb != null) { 245 sb.append(c); 246 } 247 break; 248 } 249 } 250 return new NativeRegExp(sb == null ? string : sb.toString(), ""); 251 } 252 253 private String getFlagString() { 254 final StringBuilder sb = new StringBuilder(3); 255 256 if (regexp.isGlobal()) { 257 sb.append('g'); 258 } 259 if (regexp.isIgnoreCase()) { 260 sb.append('i'); 261 } 262 if (regexp.isMultiline()) { 263 sb.append('m'); 264 } 265 266 return sb.toString(); 267 } 268 269 @Override 270 public String safeToString() { 271 return "[RegExp " + toString() + "]"; 272 } 273 274 @Override 275 public String toString() { 276 return "/" + regexp.getSource() + "/" + getFlagString(); 277 } 278 279 /** 280 * Nashorn extension: RegExp.prototype.compile - everybody implements this! 281 * 282 * @param self self reference 283 * @param pattern pattern 284 * @param flags flags 285 * @return new NativeRegExp 286 */ 287 @Function(attributes = Attribute.NOT_ENUMERABLE) 288 public static ScriptObject compile(final Object self, final Object pattern, final Object flags) { 289 final NativeRegExp regExp = checkRegExp(self); 290 final NativeRegExp compiled = newRegExp(pattern, flags); 291 // copy over regexp to 'self' 292 regExp.setRegExp(compiled.getRegExp()); 293 294 // Some implementations return undefined. Some return 'self'. Since return 295 // value is most likely be ignored, we can play safe and return 'self'. 296 return regExp; 297 } 298 299 /** 300 * ECMA 15.10.6.2 RegExp.prototype.exec(string) 301 * 302 * @param self self reference 303 * @param string string to match against regexp 304 * @return array containing the matches or {@code null} if no match 305 */ 306 @Function(attributes = Attribute.NOT_ENUMERABLE) 307 public static ScriptObject exec(final Object self, final Object string) { 308 return checkRegExp(self).exec(JSType.toString(string)); 309 } 310 311 /** 312 * ECMA 15.10.6.3 RegExp.prototype.test(string) 313 * 314 * @param self self reference 315 * @param string string to test for matches against regexp 316 * @return true if matches found, false otherwise 317 */ 318 @Function(attributes = Attribute.NOT_ENUMERABLE) 319 public static boolean test(final Object self, final Object string) { 320 return checkRegExp(self).test(JSType.toString(string)); 321 } 322 323 /** 324 * ECMA 15.10.6.4 RegExp.prototype.toString() 325 * 326 * @param self self reference 327 * @return string version of regexp 328 */ 329 @Function(attributes = Attribute.NOT_ENUMERABLE) 330 public static String toString(final Object self) { 331 return checkRegExp(self).toString(); 332 } 333 334 /** 335 * ECMA 15.10.7.1 source 336 * 337 * @param self self reference 338 * @return the input string for the regexp 339 */ 340 @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT) 341 public static Object source(final Object self) { 342 return checkRegExp(self).getRegExp().getSource(); 343 } 344 345 /** 346 * ECMA 15.10.7.2 global 347 * 348 * @param self self reference 349 * @return true if this regexp is flagged global, false otherwise 350 */ 351 @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT) 352 public static Object global(final Object self) { 353 return checkRegExp(self).getRegExp().isGlobal(); 354 } 355 356 /** 357 * ECMA 15.10.7.3 ignoreCase 358 * 359 * @param self self reference 360 * @return true if this regexp if flagged to ignore case, false otherwise 361 */ 362 @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT) 363 public static Object ignoreCase(final Object self) { 364 return checkRegExp(self).getRegExp().isIgnoreCase(); 365 } 366 367 /** 368 * ECMA 15.10.7.4 multiline 369 * 370 * @param self self reference 371 * @return true if this regexp is flagged to be multiline, false otherwise 372 */ 373 @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT) 374 public static Object multiline(final Object self) { 375 return checkRegExp(self).getRegExp().isMultiline(); 376 } 377 378 /** 379 * Getter for non-standard RegExp.input property. 380 * @param self self object 381 * @return last regexp input 382 */ 383 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "input") 384 public static Object getLastInput(final Object self) { 385 final RegExpResult match = Global.instance().getLastRegExpResult(); 386 return match == null ? "" : match.getInput(); 387 } 388 389 /** 390 * Getter for non-standard RegExp.multiline property. 391 * @param self self object 392 * @return last regexp input 393 */ 394 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "multiline") 395 public static Object getLastMultiline(final Object self) { 396 return false; // doesn't ever seem to become true and isn't documented anyhwere 397 } 398 399 /** 400 * Getter for non-standard RegExp.lastMatch property. 401 * @param self self object 402 * @return last regexp input 403 */ 404 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "lastMatch") 405 public static Object getLastMatch(final Object self) { 406 final RegExpResult match = Global.instance().getLastRegExpResult(); 407 return match == null ? "" : match.getGroup(0); 408 } 409 410 /** 411 * Getter for non-standard RegExp.lastParen property. 412 * @param self self object 413 * @return last regexp input 414 */ 415 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "lastParen") 416 public static Object getLastParen(final Object self) { 417 final RegExpResult match = Global.instance().getLastRegExpResult(); 418 return match == null ? "" : match.getLastParen(); 419 } 420 421 /** 422 * Getter for non-standard RegExp.leftContext property. 423 * @param self self object 424 * @return last regexp input 425 */ 426 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "leftContext") 427 public static Object getLeftContext(final Object self) { 428 final RegExpResult match = Global.instance().getLastRegExpResult(); 429 return match == null ? "" : match.getInput().substring(0, match.getIndex()); 430 } 431 432 /** 433 * Getter for non-standard RegExp.rightContext property. 434 * @param self self object 435 * @return last regexp input 436 */ 437 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "rightContext") 438 public static Object getRightContext(final Object self) { 439 final RegExpResult match = Global.instance().getLastRegExpResult(); 440 return match == null ? "" : match.getInput().substring(match.getIndex() + match.length()); 441 } 442 443 /** 444 * Getter for non-standard RegExp.$1 property. 445 * @param self self object 446 * @return last regexp input 447 */ 448 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$1") 449 public static Object getGroup1(final Object self) { 450 final RegExpResult match = Global.instance().getLastRegExpResult(); 451 return match == null ? "" : match.getGroup(1); 452 } 453 454 /** 455 * Getter for non-standard RegExp.$2 property. 456 * @param self self object 457 * @return last regexp input 458 */ 459 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$2") 460 public static Object getGroup2(final Object self) { 461 final RegExpResult match = Global.instance().getLastRegExpResult(); 462 return match == null ? "" : match.getGroup(2); 463 } 464 465 /** 466 * Getter for non-standard RegExp.$3 property. 467 * @param self self object 468 * @return last regexp input 469 */ 470 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$3") 471 public static Object getGroup3(final Object self) { 472 final RegExpResult match = Global.instance().getLastRegExpResult(); 473 return match == null ? "" : match.getGroup(3); 474 } 475 476 /** 477 * Getter for non-standard RegExp.$4 property. 478 * @param self self object 479 * @return last regexp input 480 */ 481 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$4") 482 public static Object getGroup4(final Object self) { 483 final RegExpResult match = Global.instance().getLastRegExpResult(); 484 return match == null ? "" : match.getGroup(4); 485 } 486 487 /** 488 * Getter for non-standard RegExp.$5 property. 489 * @param self self object 490 * @return last regexp input 491 */ 492 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$5") 493 public static Object getGroup5(final Object self) { 494 final RegExpResult match = Global.instance().getLastRegExpResult(); 495 return match == null ? "" : match.getGroup(5); 496 } 497 498 /** 499 * Getter for non-standard RegExp.$6 property. 500 * @param self self object 501 * @return last regexp input 502 */ 503 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$6") 504 public static Object getGroup6(final Object self) { 505 final RegExpResult match = Global.instance().getLastRegExpResult(); 506 return match == null ? "" : match.getGroup(6); 507 } 508 509 /** 510 * Getter for non-standard RegExp.$7 property. 511 * @param self self object 512 * @return last regexp input 513 */ 514 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$7") 515 public static Object getGroup7(final Object self) { 516 final RegExpResult match = Global.instance().getLastRegExpResult(); 517 return match == null ? "" : match.getGroup(7); 518 } 519 520 /** 521 * Getter for non-standard RegExp.$8 property. 522 * @param self self object 523 * @return last regexp input 524 */ 525 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$8") 526 public static Object getGroup8(final Object self) { 527 final RegExpResult match = Global.instance().getLastRegExpResult(); 528 return match == null ? "" : match.getGroup(8); 529 } 530 531 /** 532 * Getter for non-standard RegExp.$9 property. 533 * @param self self object 534 * @return last regexp input 535 */ 536 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$9") 537 public static Object getGroup9(final Object self) { 538 final RegExpResult match = Global.instance().getLastRegExpResult(); 539 return match == null ? "" : match.getGroup(9); 540 } 541 542 private RegExpResult execInner(final String string) { 543 final boolean isGlobal = regexp.isGlobal(); 544 int start = getLastIndex(); 545 if (!isGlobal) { 546 start = 0; 547 } 548 549 if (start < 0 || start > string.length()) { 550 if (isGlobal) { 551 setLastIndex(0); 552 } 553 return null; 554 } 555 556 final RegExpMatcher matcher = regexp.match(string); 557 if (matcher == null || !matcher.search(start)) { 558 if (isGlobal) { 559 setLastIndex(0); 560 } 561 return null; 562 } 563 564 if (isGlobal) { 565 setLastIndex(matcher.end()); 566 } 567 568 final RegExpResult match = new RegExpResult(string, matcher.start(), groups(matcher)); 569 globalObject.setLastRegExpResult(match); 570 return match; 571 } 572 573 // String.prototype.split method ignores the global flag and should not update lastIndex property. 574 private RegExpResult execSplit(final String string, final int start) { 575 if (start < 0 || start > string.length()) { 576 return null; 577 } 578 579 final RegExpMatcher matcher = regexp.match(string); 580 if (matcher == null || !matcher.search(start)) { 581 return null; 582 } 583 584 final RegExpResult match = new RegExpResult(string, matcher.start(), groups(matcher)); 585 globalObject.setLastRegExpResult(match); 586 return match; 587 } 588 589 /** 590 * Convert java.util.regex.Matcher groups to JavaScript groups. 591 * That is, replace null and groups that didn't match with undefined. 592 */ 593 private Object[] groups(final RegExpMatcher matcher) { 594 final int groupCount = matcher.groupCount(); 595 final Object[] groups = new Object[groupCount + 1]; 596 final BitVector groupsInNegativeLookahead = regexp.getGroupsInNegativeLookahead(); 597 598 for (int i = 0, lastGroupStart = matcher.start(); i <= groupCount; i++) { 599 final int groupStart = matcher.start(i); 600 if (lastGroupStart > groupStart 601 || groupsInNegativeLookahead != null && groupsInNegativeLookahead.isSet(i)) { 602 // (1) ECMA 15.10.2.5 NOTE 3: need to clear Atom's captures each time Atom is repeated. 603 // (2) ECMA 15.10.2.8 NOTE 3: Backreferences to captures in (?!Disjunction) from elsewhere 604 // in the pattern always return undefined because the negative lookahead must fail. 605 groups[i] = UNDEFINED; 606 continue; 607 } 608 final String group = matcher.group(i); 609 groups[i] = group == null ? UNDEFINED : group; 610 lastGroupStart = groupStart; 611 } 612 return groups; 613 } 614 615 /** 616 * Executes a search for a match within a string based on a regular 617 * expression. It returns an array of information or null if no match is 618 * found. 619 * 620 * @param string String to match. 621 * @return NativeArray of matches, string or null. 622 */ 623 public NativeRegExpExecResult exec(final String string) { 624 final RegExpResult match = execInner(string); 625 626 if (match == null) { 627 return null; 628 } 629 630 return new NativeRegExpExecResult(match, globalObject); 631 } 632 633 /** 634 * Executes a search for a match within a string based on a regular 635 * expression. 636 * 637 * @param string String to match. 638 * @return True if a match is found. 639 */ 640 public boolean test(final String string) { 641 return execInner(string) != null; 642 } 643 644 /** 645 * Searches and replaces the regular expression portion (match) with the 646 * replaced text instead. For the "replacement text" parameter, you can use 647 * the keywords $1 to $2 to replace the original text with values from 648 * sub-patterns defined within the main pattern. 649 * 650 * @param string String to match. 651 * @param replacement Replacement string. 652 * @return String with substitutions. 653 */ 654 String replace(final String string, final String replacement, final ScriptFunction function) throws Throwable { 655 final RegExpMatcher matcher = regexp.match(string); 656 657 if (matcher == null) { 658 return string; 659 } 660 661 if (!regexp.isGlobal()) { 662 if (!matcher.search(0)) { 663 return string; 664 } 665 666 final StringBuilder sb = new StringBuilder(); 667 sb.append(string, 0, matcher.start()); 668 669 if (function != null) { 670 final Object self = function.isStrict() ? UNDEFINED : Global.instance(); 671 sb.append(callReplaceValue(getReplaceValueInvoker(), function, self, matcher, string)); 672 } else { 673 appendReplacement(matcher, string, replacement, sb); 674 } 675 sb.append(string, matcher.end(), string.length()); 676 return sb.toString(); 677 } 678 679 setLastIndex(0); 680 681 if (!matcher.search(0)) { 682 return string; 683 } 684 685 int thisIndex = 0; 686 int previousLastIndex = 0; 687 final StringBuilder sb = new StringBuilder(); 688 689 final MethodHandle invoker = function == null ? null : getReplaceValueInvoker(); 690 final Object self = function == null || function.isStrict() ? UNDEFINED : Global.instance(); 691 692 do { 693 sb.append(string, thisIndex, matcher.start()); 694 if (function != null) { 695 sb.append(callReplaceValue(invoker, function, self, matcher, string)); 696 } else { 697 appendReplacement(matcher, string, replacement, sb); 698 } 699 700 thisIndex = matcher.end(); 701 if (thisIndex == string.length() && matcher.start() == matcher.end()) { 702 // Avoid getting empty match at end of string twice 703 break; 704 } 705 706 // ECMA 15.5.4.10 String.prototype.match(regexp) 707 if (thisIndex == previousLastIndex) { 708 setLastIndex(thisIndex + 1); 709 previousLastIndex = thisIndex + 1; 710 } else { 711 previousLastIndex = thisIndex; 712 } 713 } while (previousLastIndex <= string.length() && matcher.search(previousLastIndex)); 714 715 sb.append(string, thisIndex, string.length()); 716 717 return sb.toString(); 718 } 719 720 private void appendReplacement(final RegExpMatcher matcher, final String text, final String replacement, final StringBuilder sb) { 721 /* 722 * Process substitution patterns: 723 * 724 * $$ -> $ 725 * $& -> the matched substring 726 * $` -> the portion of string that preceeds matched substring 727 * $' -> the portion of string that follows the matched substring 728 * $n -> the nth capture, where n is [1-9] and $n is NOT followed by a decimal digit 729 * $nn -> the nnth capture, where nn is a two digit decimal number [01-99]. 730 */ 731 732 int cursor = 0; 733 Object[] groups = null; 734 735 while (cursor < replacement.length()) { 736 char nextChar = replacement.charAt(cursor); 737 if (nextChar == '$') { 738 // Skip past $ 739 cursor++; 740 if (cursor == replacement.length()) { 741 // nothing after "$" 742 sb.append('$'); 743 break; 744 } 745 746 nextChar = replacement.charAt(cursor); 747 final int firstDigit = nextChar - '0'; 748 749 if (firstDigit >= 0 && firstDigit <= 9 && firstDigit <= matcher.groupCount()) { 750 // $0 is not supported, but $01 is. implementation-defined: if n>m, ignore second digit. 751 int refNum = firstDigit; 752 cursor++; 753 if (cursor < replacement.length() && firstDigit < matcher.groupCount()) { 754 final int secondDigit = replacement.charAt(cursor) - '0'; 755 if (secondDigit >= 0 && secondDigit <= 9) { 756 final int newRefNum = firstDigit * 10 + secondDigit; 757 if (newRefNum <= matcher.groupCount() && newRefNum > 0) { 758 // $nn ($01-$99) 759 refNum = newRefNum; 760 cursor++; 761 } 762 } 763 } 764 if (refNum > 0) { 765 if (groups == null) { 766 groups = groups(matcher); 767 } 768 // Append group if matched. 769 if (groups[refNum] != UNDEFINED) { 770 sb.append((String) groups[refNum]); 771 } 772 } else { // $0. ignore. 773 assert refNum == 0; 774 sb.append("$0"); 775 } 776 } else if (nextChar == '$') { 777 sb.append('$'); 778 cursor++; 779 } else if (nextChar == '&') { 780 sb.append(matcher.group()); 781 cursor++; 782 } else if (nextChar == '`') { 783 sb.append(text, 0, matcher.start()); 784 cursor++; 785 } else if (nextChar == '\'') { 786 sb.append(text, matcher.end(), text.length()); 787 cursor++; 788 } else { 789 // unknown substitution or $n with n>m. skip. 790 sb.append('$'); 791 } 792 } else { 793 sb.append(nextChar); 794 cursor++; 795 } 796 } 797 } 798 799 private static final Object REPLACE_VALUE = new Object(); 800 801 private static final MethodHandle getReplaceValueInvoker() { 802 return Global.instance().getDynamicInvoker(REPLACE_VALUE, 803 new Callable<MethodHandle>() { 804 @Override 805 public MethodHandle call() { 806 return Bootstrap.createDynamicInvoker("dyn:call", String.class, ScriptFunction.class, Object.class, Object[].class); 807 } 808 }); 809 } 810 811 private String callReplaceValue(final MethodHandle invoker, final ScriptFunction function, final Object self, final RegExpMatcher matcher, final String string) throws Throwable { 812 final Object[] groups = groups(matcher); 813 final Object[] args = Arrays.copyOf(groups, groups.length + 2); 814 815 args[groups.length] = matcher.start(); 816 args[groups.length + 1] = string; 817 818 return (String)invoker.invokeExact(function, self, args); 819 } 820 821 /** 822 * Breaks up a string into an array of substrings based on a regular 823 * expression or fixed string. 824 * 825 * @param string String to match. 826 * @param limit Split limit. 827 * @return Array of substrings. 828 */ 829 NativeArray split(final String string, final long limit) { 830 if (limit == 0L) { 831 return new NativeArray(); 832 } 833 834 final List<Object> matches = new ArrayList<>(); 835 836 RegExpResult match; 837 final int inputLength = string.length(); 838 int splitLastLength = -1; 839 int splitLastIndex = 0; 840 int splitLastLastIndex = 0; 841 842 while ((match = execSplit(string, splitLastIndex)) != null) { 843 splitLastIndex = match.getIndex() + match.length(); 844 845 if (splitLastIndex > splitLastLastIndex) { 846 matches.add(string.substring(splitLastLastIndex, match.getIndex())); 847 final Object[] groups = match.getGroups(); 848 if (groups.length > 1 && match.getIndex() < inputLength) { 849 for (int index = 1; index < groups.length && matches.size() < limit; index++) { 850 matches.add(groups[index]); 851 } 852 } 853 854 splitLastLength = match.length(); 855 856 if (matches.size() >= limit) { 857 break; 858 } 859 } 860 861 // bump the index to avoid infinite loop 862 if (splitLastIndex == splitLastLastIndex) { 863 splitLastIndex++; 864 } else { 865 splitLastLastIndex = splitLastIndex; 866 } 867 } 868 869 if (matches.size() < limit) { 870 // check special case if we need to append an empty string at the 871 // end of the match 872 // if the lastIndex was the entire string 873 if (splitLastLastIndex == string.length()) { 874 if (splitLastLength > 0 || execSplit("", 0) == null) { 875 matches.add(""); 876 } 877 } else { 878 matches.add(string.substring(splitLastLastIndex, inputLength)); 879 } 880 } 881 882 return new NativeArray(matches.toArray()); 883 } 884 885 /** 886 * Tests for a match in a string. It returns the index of the match, or -1 887 * if not found. 888 * 889 * @param string String to match. 890 * @return Index of match. 891 */ 892 int search(final String string) { 893 final RegExpResult match = execInner(string); 894 895 if (match == null) { 896 return -1; 897 } 898 899 return match.getIndex(); 900 } 901 902 /** 903 * Fast lastIndex getter 904 * @return last index property as int 905 */ 906 public int getLastIndex() { 907 return JSType.toInteger(lastIndex); 908 } 909 910 /** 911 * Fast lastIndex getter 912 * @return last index property as boxed integer 913 */ 914 public Object getLastIndexObject() { 915 return lastIndex; 916 } 917 918 /** 919 * Fast lastIndex setter 920 * @param lastIndex lastIndex 921 */ 922 public void setLastIndex(final int lastIndex) { 923 this.lastIndex = JSType.toObject(lastIndex); 924 } 925 926 private static NativeRegExp checkRegExp(final Object self) { 927 if (self instanceof NativeRegExp) { 928 return (NativeRegExp)self; 929 } else if (self != null && self == Global.instance().getRegExpPrototype()) { 930 return Global.instance().DEFAULT_REGEXP; 931 } else { 932 throw typeError("not.a.regexp", ScriptRuntime.safeToString(self)); 933 } 934 } 935 936 boolean getGlobal() { 937 return regexp.isGlobal(); 938 } 939 940 private RegExp getRegExp() { 941 return regexp; 942 } 943 944 private void setRegExp(final RegExp regexp) { 945 this.regexp = regexp; 946 } 947 948} 949