NativeRegExp.java revision 1483:7cb19fa78763
1/* 2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package jdk.nashorn.internal.objects; 27 28import static jdk.nashorn.internal.runtime.ECMAErrors.typeError; 29import static jdk.nashorn.internal.runtime.ScriptRuntime.UNDEFINED; 30 31import java.lang.invoke.MethodHandle; 32import java.util.ArrayList; 33import java.util.Arrays; 34import java.util.List; 35import java.util.concurrent.Callable; 36import jdk.nashorn.internal.objects.annotations.Attribute; 37import jdk.nashorn.internal.objects.annotations.Constructor; 38import jdk.nashorn.internal.objects.annotations.Function; 39import jdk.nashorn.internal.objects.annotations.Getter; 40import jdk.nashorn.internal.objects.annotations.Property; 41import jdk.nashorn.internal.objects.annotations.ScriptClass; 42import jdk.nashorn.internal.objects.annotations.SpecializedFunction; 43import jdk.nashorn.internal.objects.annotations.Where; 44import jdk.nashorn.internal.runtime.BitVector; 45import jdk.nashorn.internal.runtime.JSType; 46import jdk.nashorn.internal.runtime.ParserException; 47import jdk.nashorn.internal.runtime.PropertyMap; 48import jdk.nashorn.internal.runtime.ScriptFunction; 49import jdk.nashorn.internal.runtime.ScriptObject; 50import jdk.nashorn.internal.runtime.ScriptRuntime; 51import jdk.nashorn.internal.runtime.linker.Bootstrap; 52import jdk.nashorn.internal.runtime.regexp.RegExp; 53import jdk.nashorn.internal.runtime.regexp.RegExpFactory; 54import jdk.nashorn.internal.runtime.regexp.RegExpMatcher; 55import jdk.nashorn.internal.runtime.regexp.RegExpResult; 56 57/** 58 * ECMA 15.10 RegExp Objects. 59 */ 60@ScriptClass("RegExp") 61public final class NativeRegExp extends ScriptObject { 62 /** ECMA 15.10.7.5 lastIndex property */ 63 @Property(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE) 64 public Object lastIndex; 65 66 /** Compiled regexp */ 67 private RegExp regexp; 68 69 // Reference to global object needed to support static RegExp properties 70 private final Global globalObject; 71 72 // initialized by nasgen 73 private static PropertyMap $nasgenmap$; 74 75 private NativeRegExp(final Global global) { 76 super(global.getRegExpPrototype(), $nasgenmap$); 77 this.globalObject = global; 78 } 79 80 NativeRegExp(final String input, final String flagString, final Global global, final ScriptObject proto) { 81 super(proto, $nasgenmap$); 82 try { 83 this.regexp = RegExpFactory.create(input, flagString); 84 } catch (final ParserException e) { 85 // translate it as SyntaxError object and throw it 86 e.throwAsEcmaException(); 87 throw new AssertionError(); //guard against null warnings below 88 } 89 this.globalObject = global; 90 this.setLastIndex(0); 91 } 92 93 NativeRegExp(final String input, final String flagString, final Global global) { 94 this(input, flagString, global, global.getRegExpPrototype()); 95 } 96 97 NativeRegExp(final String input, final String flagString) { 98 this(input, flagString, Global.instance()); 99 } 100 101 NativeRegExp(final String string, final Global global) { 102 this(string, "", global); 103 } 104 105 NativeRegExp(final String string) { 106 this(string, Global.instance()); 107 } 108 109 NativeRegExp(final NativeRegExp regExp) { 110 this(Global.instance()); 111 this.lastIndex = regExp.getLastIndexObject(); 112 this.regexp = regExp.getRegExp(); 113 } 114 115 @Override 116 public String getClassName() { 117 return "RegExp"; 118 } 119 120 /** 121 * ECMA 15.10.4 122 * 123 * Constructor 124 * 125 * @param isNew is the new operator used for instantiating this regexp 126 * @param self self reference 127 * @param args arguments (optional: pattern and flags) 128 * @return new NativeRegExp 129 */ 130 @Constructor(arity = 2) 131 public static NativeRegExp constructor(final boolean isNew, final Object self, final Object... args) { 132 if (args.length > 1) { 133 return newRegExp(args[0], args[1]); 134 } else if (args.length > 0) { 135 return newRegExp(args[0], UNDEFINED); 136 } 137 138 return newRegExp(UNDEFINED, UNDEFINED); 139 } 140 141 /** 142 * ECMA 15.10.4 143 * 144 * Constructor - specialized version, no args, empty regexp 145 * 146 * @param isNew is the new operator used for instantiating this regexp 147 * @param self self reference 148 * @return new NativeRegExp 149 */ 150 @SpecializedFunction(isConstructor=true) 151 public static NativeRegExp constructor(final boolean isNew, final Object self) { 152 return new NativeRegExp("", ""); 153 } 154 155 /** 156 * ECMA 15.10.4 157 * 158 * Constructor - specialized version, pattern, no flags 159 * 160 * @param isNew is the new operator used for instantiating this regexp 161 * @param self self reference 162 * @param pattern pattern 163 * @return new NativeRegExp 164 */ 165 @SpecializedFunction(isConstructor=true) 166 public static NativeRegExp constructor(final boolean isNew, final Object self, final Object pattern) { 167 return newRegExp(pattern, UNDEFINED); 168 } 169 170 /** 171 * ECMA 15.10.4 172 * 173 * Constructor - specialized version, pattern and flags 174 * 175 * @param isNew is the new operator used for instantiating this regexp 176 * @param self self reference 177 * @param pattern pattern 178 * @param flags flags 179 * @return new NativeRegExp 180 */ 181 @SpecializedFunction(isConstructor=true) 182 public static NativeRegExp constructor(final boolean isNew, final Object self, final Object pattern, final Object flags) { 183 return newRegExp(pattern, flags); 184 } 185 186 /** 187 * External constructor used in generated code, which explains the public access 188 * 189 * @param regexp regexp 190 * @param flags flags 191 * @return new NativeRegExp 192 */ 193 public static NativeRegExp newRegExp(final Object regexp, final Object flags) { 194 String patternString = ""; 195 String flagString = ""; 196 197 if (regexp != UNDEFINED) { 198 if (regexp instanceof NativeRegExp) { 199 if (flags != UNDEFINED) { 200 throw typeError("regex.cant.supply.flags"); 201 } 202 return (NativeRegExp)regexp; // 15.10.3.1 - undefined flags and regexp as 203 } 204 patternString = JSType.toString(regexp); 205 } 206 207 if (flags != UNDEFINED) { 208 flagString = JSType.toString(flags); 209 } 210 211 return new NativeRegExp(patternString, flagString); 212 } 213 214 /** 215 * Build a regexp that matches {@code string} as-is. All meta-characters will be escaped. 216 * 217 * @param string pattern string 218 * @return flat regexp 219 */ 220 static NativeRegExp flatRegExp(final String string) { 221 // escape special characters 222 StringBuilder sb = null; 223 final int length = string.length(); 224 225 for (int i = 0; i < length; i++) { 226 final char c = string.charAt(i); 227 switch (c) { 228 case '^': 229 case '$': 230 case '\\': 231 case '.': 232 case '*': 233 case '+': 234 case '?': 235 case '(': 236 case ')': 237 case '[': 238 case '{': 239 case '|': 240 if (sb == null) { 241 sb = new StringBuilder(length * 2); 242 sb.append(string, 0, i); 243 } 244 sb.append('\\'); 245 sb.append(c); 246 break; 247 default: 248 if (sb != null) { 249 sb.append(c); 250 } 251 break; 252 } 253 } 254 return new NativeRegExp(sb == null ? string : sb.toString(), ""); 255 } 256 257 private String getFlagString() { 258 final StringBuilder sb = new StringBuilder(3); 259 260 if (regexp.isGlobal()) { 261 sb.append('g'); 262 } 263 if (regexp.isIgnoreCase()) { 264 sb.append('i'); 265 } 266 if (regexp.isMultiline()) { 267 sb.append('m'); 268 } 269 270 return sb.toString(); 271 } 272 273 @Override 274 public String safeToString() { 275 return "[RegExp " + toString() + "]"; 276 } 277 278 @Override 279 public String toString() { 280 return "/" + regexp.getSource() + "/" + getFlagString(); 281 } 282 283 /** 284 * Nashorn extension: RegExp.prototype.compile - everybody implements this! 285 * 286 * @param self self reference 287 * @param pattern pattern 288 * @param flags flags 289 * @return new NativeRegExp 290 */ 291 @Function(attributes = Attribute.NOT_ENUMERABLE) 292 public static ScriptObject compile(final Object self, final Object pattern, final Object flags) { 293 final NativeRegExp regExp = checkRegExp(self); 294 final NativeRegExp compiled = newRegExp(pattern, flags); 295 // copy over regexp to 'self' 296 regExp.setRegExp(compiled.getRegExp()); 297 298 // Some implementations return undefined. Some return 'self'. Since return 299 // value is most likely be ignored, we can play safe and return 'self'. 300 return regExp; 301 } 302 303 /** 304 * ECMA 15.10.6.2 RegExp.prototype.exec(string) 305 * 306 * @param self self reference 307 * @param string string to match against regexp 308 * @return array containing the matches or {@code null} if no match 309 */ 310 @Function(attributes = Attribute.NOT_ENUMERABLE) 311 public static ScriptObject exec(final Object self, final Object string) { 312 return checkRegExp(self).exec(JSType.toString(string)); 313 } 314 315 /** 316 * ECMA 15.10.6.3 RegExp.prototype.test(string) 317 * 318 * @param self self reference 319 * @param string string to test for matches against regexp 320 * @return true if matches found, false otherwise 321 */ 322 @Function(attributes = Attribute.NOT_ENUMERABLE) 323 public static boolean test(final Object self, final Object string) { 324 return checkRegExp(self).test(JSType.toString(string)); 325 } 326 327 /** 328 * ECMA 15.10.6.4 RegExp.prototype.toString() 329 * 330 * @param self self reference 331 * @return string version of regexp 332 */ 333 @Function(attributes = Attribute.NOT_ENUMERABLE) 334 public static String toString(final Object self) { 335 return checkRegExp(self).toString(); 336 } 337 338 /** 339 * ECMA 15.10.7.1 source 340 * 341 * @param self self reference 342 * @return the input string for the regexp 343 */ 344 @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT) 345 public static Object source(final Object self) { 346 return checkRegExp(self).getRegExp().getSource(); 347 } 348 349 /** 350 * ECMA 15.10.7.2 global 351 * 352 * @param self self reference 353 * @return true if this regexp is flagged global, false otherwise 354 */ 355 @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT) 356 public static Object global(final Object self) { 357 return checkRegExp(self).getRegExp().isGlobal(); 358 } 359 360 /** 361 * ECMA 15.10.7.3 ignoreCase 362 * 363 * @param self self reference 364 * @return true if this regexp if flagged to ignore case, false otherwise 365 */ 366 @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT) 367 public static Object ignoreCase(final Object self) { 368 return checkRegExp(self).getRegExp().isIgnoreCase(); 369 } 370 371 /** 372 * ECMA 15.10.7.4 multiline 373 * 374 * @param self self reference 375 * @return true if this regexp is flagged to be multiline, false otherwise 376 */ 377 @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT) 378 public static Object multiline(final Object self) { 379 return checkRegExp(self).getRegExp().isMultiline(); 380 } 381 382 /** 383 * Getter for non-standard RegExp.input property. 384 * @param self self object 385 * @return last regexp input 386 */ 387 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "input") 388 public static Object getLastInput(final Object self) { 389 final RegExpResult match = Global.instance().getLastRegExpResult(); 390 return match == null ? "" : match.getInput(); 391 } 392 393 /** 394 * Getter for non-standard RegExp.multiline property. 395 * @param self self object 396 * @return last regexp input 397 */ 398 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "multiline") 399 public static Object getLastMultiline(final Object self) { 400 return false; // doesn't ever seem to become true and isn't documented anyhwere 401 } 402 403 /** 404 * Getter for non-standard RegExp.lastMatch property. 405 * @param self self object 406 * @return last regexp input 407 */ 408 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "lastMatch") 409 public static Object getLastMatch(final Object self) { 410 final RegExpResult match = Global.instance().getLastRegExpResult(); 411 return match == null ? "" : match.getGroup(0); 412 } 413 414 /** 415 * Getter for non-standard RegExp.lastParen property. 416 * @param self self object 417 * @return last regexp input 418 */ 419 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "lastParen") 420 public static Object getLastParen(final Object self) { 421 final RegExpResult match = Global.instance().getLastRegExpResult(); 422 return match == null ? "" : match.getLastParen(); 423 } 424 425 /** 426 * Getter for non-standard RegExp.leftContext property. 427 * @param self self object 428 * @return last regexp input 429 */ 430 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "leftContext") 431 public static Object getLeftContext(final Object self) { 432 final RegExpResult match = Global.instance().getLastRegExpResult(); 433 return match == null ? "" : match.getInput().substring(0, match.getIndex()); 434 } 435 436 /** 437 * Getter for non-standard RegExp.rightContext property. 438 * @param self self object 439 * @return last regexp input 440 */ 441 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "rightContext") 442 public static Object getRightContext(final Object self) { 443 final RegExpResult match = Global.instance().getLastRegExpResult(); 444 return match == null ? "" : match.getInput().substring(match.getIndex() + match.length()); 445 } 446 447 /** 448 * Getter for non-standard RegExp.$1 property. 449 * @param self self object 450 * @return last regexp input 451 */ 452 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$1") 453 public static Object getGroup1(final Object self) { 454 final RegExpResult match = Global.instance().getLastRegExpResult(); 455 return match == null ? "" : match.getGroup(1); 456 } 457 458 /** 459 * Getter for non-standard RegExp.$2 property. 460 * @param self self object 461 * @return last regexp input 462 */ 463 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$2") 464 public static Object getGroup2(final Object self) { 465 final RegExpResult match = Global.instance().getLastRegExpResult(); 466 return match == null ? "" : match.getGroup(2); 467 } 468 469 /** 470 * Getter for non-standard RegExp.$3 property. 471 * @param self self object 472 * @return last regexp input 473 */ 474 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$3") 475 public static Object getGroup3(final Object self) { 476 final RegExpResult match = Global.instance().getLastRegExpResult(); 477 return match == null ? "" : match.getGroup(3); 478 } 479 480 /** 481 * Getter for non-standard RegExp.$4 property. 482 * @param self self object 483 * @return last regexp input 484 */ 485 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$4") 486 public static Object getGroup4(final Object self) { 487 final RegExpResult match = Global.instance().getLastRegExpResult(); 488 return match == null ? "" : match.getGroup(4); 489 } 490 491 /** 492 * Getter for non-standard RegExp.$5 property. 493 * @param self self object 494 * @return last regexp input 495 */ 496 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$5") 497 public static Object getGroup5(final Object self) { 498 final RegExpResult match = Global.instance().getLastRegExpResult(); 499 return match == null ? "" : match.getGroup(5); 500 } 501 502 /** 503 * Getter for non-standard RegExp.$6 property. 504 * @param self self object 505 * @return last regexp input 506 */ 507 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$6") 508 public static Object getGroup6(final Object self) { 509 final RegExpResult match = Global.instance().getLastRegExpResult(); 510 return match == null ? "" : match.getGroup(6); 511 } 512 513 /** 514 * Getter for non-standard RegExp.$7 property. 515 * @param self self object 516 * @return last regexp input 517 */ 518 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$7") 519 public static Object getGroup7(final Object self) { 520 final RegExpResult match = Global.instance().getLastRegExpResult(); 521 return match == null ? "" : match.getGroup(7); 522 } 523 524 /** 525 * Getter for non-standard RegExp.$8 property. 526 * @param self self object 527 * @return last regexp input 528 */ 529 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$8") 530 public static Object getGroup8(final Object self) { 531 final RegExpResult match = Global.instance().getLastRegExpResult(); 532 return match == null ? "" : match.getGroup(8); 533 } 534 535 /** 536 * Getter for non-standard RegExp.$9 property. 537 * @param self self object 538 * @return last regexp input 539 */ 540 @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$9") 541 public static Object getGroup9(final Object self) { 542 final RegExpResult match = Global.instance().getLastRegExpResult(); 543 return match == null ? "" : match.getGroup(9); 544 } 545 546 private RegExpResult execInner(final String string) { 547 final boolean isGlobal = regexp.isGlobal(); 548 int start = getLastIndex(); 549 if (!isGlobal) { 550 start = 0; 551 } 552 553 if (start < 0 || start > string.length()) { 554 if (isGlobal) { 555 setLastIndex(0); 556 } 557 return null; 558 } 559 560 final RegExpMatcher matcher = regexp.match(string); 561 if (matcher == null || !matcher.search(start)) { 562 if (isGlobal) { 563 setLastIndex(0); 564 } 565 return null; 566 } 567 568 if (isGlobal) { 569 setLastIndex(matcher.end()); 570 } 571 572 final RegExpResult match = new RegExpResult(string, matcher.start(), groups(matcher)); 573 globalObject.setLastRegExpResult(match); 574 return match; 575 } 576 577 // String.prototype.split method ignores the global flag and should not update lastIndex property. 578 private RegExpResult execSplit(final String string, final int start) { 579 if (start < 0 || start > string.length()) { 580 return null; 581 } 582 583 final RegExpMatcher matcher = regexp.match(string); 584 if (matcher == null || !matcher.search(start)) { 585 return null; 586 } 587 588 final RegExpResult match = new RegExpResult(string, matcher.start(), groups(matcher)); 589 globalObject.setLastRegExpResult(match); 590 return match; 591 } 592 593 /** 594 * Convert java.util.regex.Matcher groups to JavaScript groups. 595 * That is, replace null and groups that didn't match with undefined. 596 */ 597 private Object[] groups(final RegExpMatcher matcher) { 598 final int groupCount = matcher.groupCount(); 599 final Object[] groups = new Object[groupCount + 1]; 600 final BitVector groupsInNegativeLookahead = regexp.getGroupsInNegativeLookahead(); 601 602 for (int i = 0, lastGroupStart = matcher.start(); i <= groupCount; i++) { 603 final int groupStart = matcher.start(i); 604 if (lastGroupStart > groupStart 605 || groupsInNegativeLookahead != null && groupsInNegativeLookahead.isSet(i)) { 606 // (1) ECMA 15.10.2.5 NOTE 3: need to clear Atom's captures each time Atom is repeated. 607 // (2) ECMA 15.10.2.8 NOTE 3: Backreferences to captures in (?!Disjunction) from elsewhere 608 // in the pattern always return undefined because the negative lookahead must fail. 609 groups[i] = UNDEFINED; 610 continue; 611 } 612 final String group = matcher.group(i); 613 groups[i] = group == null ? UNDEFINED : group; 614 lastGroupStart = groupStart; 615 } 616 return groups; 617 } 618 619 /** 620 * Executes a search for a match within a string based on a regular 621 * expression. It returns an array of information or null if no match is 622 * found. 623 * 624 * @param string String to match. 625 * @return NativeArray of matches, string or null. 626 */ 627 public NativeRegExpExecResult exec(final String string) { 628 final RegExpResult match = execInner(string); 629 630 if (match == null) { 631 return null; 632 } 633 634 return new NativeRegExpExecResult(match, globalObject); 635 } 636 637 /** 638 * Executes a search for a match within a string based on a regular 639 * expression. 640 * 641 * @param string String to match. 642 * @return True if a match is found. 643 */ 644 public boolean test(final String string) { 645 return execInner(string) != null; 646 } 647 648 /** 649 * Searches and replaces the regular expression portion (match) with the 650 * replaced text instead. For the "replacement text" parameter, you can use 651 * the keywords $1 to $2 to replace the original text with values from 652 * sub-patterns defined within the main pattern. 653 * 654 * @param string String to match. 655 * @param replacement Replacement string. 656 * @return String with substitutions. 657 */ 658 String replace(final String string, final String replacement, final ScriptFunction function) throws Throwable { 659 final RegExpMatcher matcher = regexp.match(string); 660 661 if (matcher == null) { 662 return string; 663 } 664 665 if (!regexp.isGlobal()) { 666 if (!matcher.search(0)) { 667 return string; 668 } 669 670 final StringBuilder sb = new StringBuilder(); 671 sb.append(string, 0, matcher.start()); 672 673 if (function != null) { 674 final Object self = function.isStrict() ? UNDEFINED : Global.instance(); 675 sb.append(callReplaceValue(getReplaceValueInvoker(), function, self, matcher, string)); 676 } else { 677 appendReplacement(matcher, string, replacement, sb); 678 } 679 sb.append(string, matcher.end(), string.length()); 680 return sb.toString(); 681 } 682 683 setLastIndex(0); 684 685 if (!matcher.search(0)) { 686 return string; 687 } 688 689 int thisIndex = 0; 690 int previousLastIndex = 0; 691 final StringBuilder sb = new StringBuilder(); 692 693 final MethodHandle invoker = function == null ? null : getReplaceValueInvoker(); 694 final Object self = function == null || function.isStrict() ? UNDEFINED : Global.instance(); 695 696 do { 697 sb.append(string, thisIndex, matcher.start()); 698 if (function != null) { 699 sb.append(callReplaceValue(invoker, function, self, matcher, string)); 700 } else { 701 appendReplacement(matcher, string, replacement, sb); 702 } 703 704 thisIndex = matcher.end(); 705 if (thisIndex == string.length() && matcher.start() == matcher.end()) { 706 // Avoid getting empty match at end of string twice 707 break; 708 } 709 710 // ECMA 15.5.4.10 String.prototype.match(regexp) 711 if (thisIndex == previousLastIndex) { 712 setLastIndex(thisIndex + 1); 713 previousLastIndex = thisIndex + 1; 714 } else { 715 previousLastIndex = thisIndex; 716 } 717 } while (previousLastIndex <= string.length() && matcher.search(previousLastIndex)); 718 719 sb.append(string, thisIndex, string.length()); 720 721 return sb.toString(); 722 } 723 724 private void appendReplacement(final RegExpMatcher matcher, final String text, final String replacement, final StringBuilder sb) { 725 /* 726 * Process substitution patterns: 727 * 728 * $$ -> $ 729 * $& -> the matched substring 730 * $` -> the portion of string that precedes matched substring 731 * $' -> the portion of string that follows the matched substring 732 * $n -> the nth capture, where n is [1-9] and $n is NOT followed by a decimal digit 733 * $nn -> the nnth capture, where nn is a two digit decimal number [01-99]. 734 */ 735 736 int cursor = 0; 737 Object[] groups = null; 738 739 while (cursor < replacement.length()) { 740 char nextChar = replacement.charAt(cursor); 741 if (nextChar == '$') { 742 // Skip past $ 743 cursor++; 744 if (cursor == replacement.length()) { 745 // nothing after "$" 746 sb.append('$'); 747 break; 748 } 749 750 nextChar = replacement.charAt(cursor); 751 final int firstDigit = nextChar - '0'; 752 753 if (firstDigit >= 0 && firstDigit <= 9 && firstDigit <= matcher.groupCount()) { 754 // $0 is not supported, but $01 is. implementation-defined: if n>m, ignore second digit. 755 int refNum = firstDigit; 756 cursor++; 757 if (cursor < replacement.length() && firstDigit < matcher.groupCount()) { 758 final int secondDigit = replacement.charAt(cursor) - '0'; 759 if (secondDigit >= 0 && secondDigit <= 9) { 760 final int newRefNum = firstDigit * 10 + secondDigit; 761 if (newRefNum <= matcher.groupCount() && newRefNum > 0) { 762 // $nn ($01-$99) 763 refNum = newRefNum; 764 cursor++; 765 } 766 } 767 } 768 if (refNum > 0) { 769 if (groups == null) { 770 groups = groups(matcher); 771 } 772 // Append group if matched. 773 if (groups[refNum] != UNDEFINED) { 774 sb.append((String) groups[refNum]); 775 } 776 } else { // $0. ignore. 777 assert refNum == 0; 778 sb.append("$0"); 779 } 780 } else if (nextChar == '$') { 781 sb.append('$'); 782 cursor++; 783 } else if (nextChar == '&') { 784 sb.append(matcher.group()); 785 cursor++; 786 } else if (nextChar == '`') { 787 sb.append(text, 0, matcher.start()); 788 cursor++; 789 } else if (nextChar == '\'') { 790 sb.append(text, matcher.end(), text.length()); 791 cursor++; 792 } else { 793 // unknown substitution or $n with n>m. skip. 794 sb.append('$'); 795 } 796 } else { 797 sb.append(nextChar); 798 cursor++; 799 } 800 } 801 } 802 803 private static final Object REPLACE_VALUE = new Object(); 804 805 private static MethodHandle getReplaceValueInvoker() { 806 return Global.instance().getDynamicInvoker(REPLACE_VALUE, 807 new Callable<MethodHandle>() { 808 @Override 809 public MethodHandle call() { 810 return Bootstrap.createDynamicCallInvoker(String.class, ScriptFunction.class, Object.class, Object[].class); 811 } 812 }); 813 } 814 815 private String callReplaceValue(final MethodHandle invoker, final ScriptFunction function, final Object self, final RegExpMatcher matcher, final String string) throws Throwable { 816 final Object[] groups = groups(matcher); 817 final Object[] args = Arrays.copyOf(groups, groups.length + 2); 818 819 args[groups.length] = matcher.start(); 820 args[groups.length + 1] = string; 821 822 return (String)invoker.invokeExact(function, self, args); 823 } 824 825 /** 826 * Breaks up a string into an array of substrings based on a regular 827 * expression or fixed string. 828 * 829 * @param string String to match. 830 * @param limit Split limit. 831 * @return Array of substrings. 832 */ 833 NativeArray split(final String string, final long limit) { 834 if (limit == 0L) { 835 return new NativeArray(); 836 } 837 838 final List<Object> matches = new ArrayList<>(); 839 840 RegExpResult match; 841 final int inputLength = string.length(); 842 int splitLastLength = -1; 843 int splitLastIndex = 0; 844 int splitLastLastIndex = 0; 845 846 while ((match = execSplit(string, splitLastIndex)) != null) { 847 splitLastIndex = match.getIndex() + match.length(); 848 849 if (splitLastIndex > splitLastLastIndex) { 850 matches.add(string.substring(splitLastLastIndex, match.getIndex())); 851 final Object[] groups = match.getGroups(); 852 if (groups.length > 1 && match.getIndex() < inputLength) { 853 for (int index = 1; index < groups.length && matches.size() < limit; index++) { 854 matches.add(groups[index]); 855 } 856 } 857 858 splitLastLength = match.length(); 859 860 if (matches.size() >= limit) { 861 break; 862 } 863 } 864 865 // bump the index to avoid infinite loop 866 if (splitLastIndex == splitLastLastIndex) { 867 splitLastIndex++; 868 } else { 869 splitLastLastIndex = splitLastIndex; 870 } 871 } 872 873 if (matches.size() < limit) { 874 // check special case if we need to append an empty string at the 875 // end of the match 876 // if the lastIndex was the entire string 877 if (splitLastLastIndex == string.length()) { 878 if (splitLastLength > 0 || execSplit("", 0) == null) { 879 matches.add(""); 880 } 881 } else { 882 matches.add(string.substring(splitLastLastIndex, inputLength)); 883 } 884 } 885 886 return new NativeArray(matches.toArray()); 887 } 888 889 /** 890 * Tests for a match in a string. It returns the index of the match, or -1 891 * if not found. 892 * 893 * @param string String to match. 894 * @return Index of match. 895 */ 896 int search(final String string) { 897 final RegExpResult match = execInner(string); 898 899 if (match == null) { 900 return -1; 901 } 902 903 return match.getIndex(); 904 } 905 906 /** 907 * Fast lastIndex getter 908 * @return last index property as int 909 */ 910 public int getLastIndex() { 911 return JSType.toInteger(lastIndex); 912 } 913 914 /** 915 * Fast lastIndex getter 916 * @return last index property as boxed integer 917 */ 918 public Object getLastIndexObject() { 919 return lastIndex; 920 } 921 922 /** 923 * Fast lastIndex setter 924 * @param lastIndex lastIndex 925 */ 926 public void setLastIndex(final int lastIndex) { 927 this.lastIndex = JSType.toObject(lastIndex); 928 } 929 930 private static NativeRegExp checkRegExp(final Object self) { 931 if (self instanceof NativeRegExp) { 932 return (NativeRegExp)self; 933 } else if (self != null && self == Global.instance().getRegExpPrototype()) { 934 return Global.instance().getDefaultRegExp(); 935 } else { 936 throw typeError("not.a.regexp", ScriptRuntime.safeToString(self)); 937 } 938 } 939 940 boolean getGlobal() { 941 return regexp.isGlobal(); 942 } 943 944 private RegExp getRegExp() { 945 return regexp; 946 } 947 948 private void setRegExp(final RegExp regexp) { 949 this.regexp = regexp; 950 } 951 952} 953