ByteCodeMachine.java revision 1088:7e62d98d4625
1/* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20package jdk.nashorn.internal.runtime.regexp.joni; 21 22import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt; 23import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isNewLine; 24import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindCondition; 25import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest; 26import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindNotEmpty; 27import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotBol; 28import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotEol; 29import static jdk.nashorn.internal.runtime.regexp.joni.Option.isPosixRegion; 30import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; 31import jdk.nashorn.internal.runtime.regexp.joni.constants.OPCode; 32import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder; 33import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; 34import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; 35 36class ByteCodeMachine extends StackMachine { 37 private int bestLen; // return value 38 private int s = 0; // current char 39 40 private int range; // right range 41 private int sprev; 42 private int sstart; 43 private int sbegin; 44 45 private final int[] code; // byte code 46 private int ip; // instruction pointer 47 48 ByteCodeMachine(final Regex regex, final char[] chars, final int p, final int end) { 49 super(regex, chars, p, end); 50 this.code = regex.code; 51 } 52 53 private boolean stringCmpIC(final int caseFlodFlag, final int s1p, final IntHolder ps2, final int mbLen, final int textEnd) { 54 int s1 = s1p; 55 int s2 = ps2.value; 56 final int end1 = s1 + mbLen; 57 58 while (s1 < end1) { 59 final char c1 = EncodingHelper.toLowerCase(chars[s1++]); 60 final char c2 = EncodingHelper.toLowerCase(chars[s2++]); 61 62 if (c1 != c2) { 63 return false; 64 } 65 } 66 ps2.value = s2; 67 return true; 68 } 69 70 private void debugMatchBegin() { 71 Config.log.println("match_at: " + 72 "str: " + str + 73 ", end: " + end + 74 ", start: " + this.sstart + 75 ", sprev: " + this.sprev); 76 Config.log.println("size: " + (end - str) + ", start offset: " + (this.sstart - str)); 77 } 78 79 private void debugMatchLoop() { 80 if (Config.DEBUG_MATCH) { 81 Config.log.printf("%4d", (s - str)).print("> \""); 82 int q, i; 83 for (i=0, q=s; i<7 && q<end && s>=0; i++) { 84 if (q < end) { 85 Config.log.print(new String(new char[]{chars[q++]})); 86 } 87 } 88 final String string = q < end ? "...\"" : "\""; 89 q += string.length(); 90 Config.log.print(string); 91 for (i=0; i<20-(q-s);i++) { 92 Config.log.print(" "); 93 } 94 final StringBuilder sb = new StringBuilder(); 95 new ByteCodePrinter(regex).compiledByteCodeToString(sb, ip); 96 Config.log.println(sb.toString()); 97 } 98 } 99 100 @Override 101 protected final int matchAt(final int r, final int ss, final int sp) { 102 this.range = r; 103 this.sstart = ss; 104 this.sprev = sp; 105 106 stk = 0; 107 ip = 0; 108 109 if (Config.DEBUG_MATCH) { 110 debugMatchBegin(); 111 } 112 113 init(); 114 115 bestLen = -1; 116 s = ss; 117 118 final int[] c = this.code; 119 while (true) { 120 if (Config.DEBUG_MATCH) { 121 debugMatchLoop(); 122 } 123 124 sbegin = s; 125 switch (c[ip++]) { 126 case OPCode.END: if (opEnd()) { 127 return finish(); 128 } break; 129 case OPCode.EXACT1: opExact1(); break; 130 case OPCode.EXACT2: opExact2(); continue; 131 case OPCode.EXACT3: opExact3(); continue; 132 case OPCode.EXACT4: opExact4(); continue; 133 case OPCode.EXACT5: opExact5(); continue; 134 case OPCode.EXACTN: opExactN(); continue; 135 136 case OPCode.EXACT1_IC: opExact1IC(); break; 137 case OPCode.EXACTN_IC: opExactNIC(); continue; 138 139 case OPCode.CCLASS: opCClass(); break; 140 case OPCode.CCLASS_MB: opCClassMB(); break; 141 case OPCode.CCLASS_MIX: opCClassMIX(); break; 142 case OPCode.CCLASS_NOT: opCClassNot(); break; 143 case OPCode.CCLASS_MB_NOT: opCClassMBNot(); break; 144 case OPCode.CCLASS_MIX_NOT: opCClassMIXNot(); break; 145 case OPCode.CCLASS_NODE: opCClassNode(); break; 146 147 case OPCode.ANYCHAR: opAnyChar(); break; 148 case OPCode.ANYCHAR_ML: opAnyCharML(); break; 149 case OPCode.ANYCHAR_STAR: opAnyCharStar(); break; 150 case OPCode.ANYCHAR_ML_STAR: opAnyCharMLStar(); break; 151 case OPCode.ANYCHAR_STAR_PEEK_NEXT: opAnyCharStarPeekNext(); break; 152 case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: opAnyCharMLStarPeekNext(); break; 153 154 case OPCode.WORD: opWord(); break; 155 case OPCode.NOT_WORD: opNotWord(); break; 156 case OPCode.WORD_BOUND: opWordBound(); continue; 157 case OPCode.NOT_WORD_BOUND: opNotWordBound(); continue; 158 case OPCode.WORD_BEGIN: opWordBegin(); continue; 159 case OPCode.WORD_END: opWordEnd(); continue; 160 161 case OPCode.BEGIN_BUF: opBeginBuf(); continue; 162 case OPCode.END_BUF: opEndBuf(); continue; 163 case OPCode.BEGIN_LINE: opBeginLine(); continue; 164 case OPCode.END_LINE: opEndLine(); continue; 165 case OPCode.SEMI_END_BUF: opSemiEndBuf(); continue; 166 case OPCode.BEGIN_POSITION: opBeginPosition(); continue; 167 168 case OPCode.MEMORY_START_PUSH: opMemoryStartPush(); continue; 169 case OPCode.MEMORY_START: opMemoryStart(); continue; 170 case OPCode.MEMORY_END_PUSH: opMemoryEndPush(); continue; 171 case OPCode.MEMORY_END: opMemoryEnd(); continue; 172 case OPCode.MEMORY_END_PUSH_REC: opMemoryEndPushRec(); continue; 173 case OPCode.MEMORY_END_REC: opMemoryEndRec(); continue; 174 175 case OPCode.BACKREF1: opBackRef1(); continue; 176 case OPCode.BACKREF2: opBackRef2(); continue; 177 case OPCode.BACKREFN: opBackRefN(); continue; 178 case OPCode.BACKREFN_IC: opBackRefNIC(); continue; 179 case OPCode.BACKREF_MULTI: opBackRefMulti(); continue; 180 case OPCode.BACKREF_MULTI_IC: opBackRefMultiIC(); continue; 181 case OPCode.BACKREF_WITH_LEVEL: opBackRefAtLevel(); continue; 182 183 case OPCode.NULL_CHECK_START: opNullCheckStart(); continue; 184 case OPCode.NULL_CHECK_END: opNullCheckEnd(); continue; 185 case OPCode.NULL_CHECK_END_MEMST: opNullCheckEndMemST(); continue; 186 case OPCode.NULL_CHECK_END_MEMST_PUSH: opNullCheckEndMemSTPush(); continue; 187 188 case OPCode.JUMP: opJump(); continue; 189 case OPCode.PUSH: opPush(); continue; 190 191 case OPCode.POP: opPop(); continue; 192 case OPCode.PUSH_OR_JUMP_EXACT1: opPushOrJumpExact1(); continue; 193 case OPCode.PUSH_IF_PEEK_NEXT: opPushIfPeekNext(); continue; 194 195 case OPCode.REPEAT: opRepeat(); continue; 196 case OPCode.REPEAT_NG: opRepeatNG(); continue; 197 case OPCode.REPEAT_INC: opRepeatInc(); continue; 198 case OPCode.REPEAT_INC_SG: opRepeatIncSG(); continue; 199 case OPCode.REPEAT_INC_NG: opRepeatIncNG(); continue; 200 case OPCode.REPEAT_INC_NG_SG: opRepeatIncNGSG(); continue; 201 202 case OPCode.PUSH_POS: opPushPos(); continue; 203 case OPCode.POP_POS: opPopPos(); continue; 204 case OPCode.PUSH_POS_NOT: opPushPosNot(); continue; 205 case OPCode.FAIL_POS: opFailPos(); continue; 206 case OPCode.PUSH_STOP_BT: opPushStopBT(); continue; 207 case OPCode.POP_STOP_BT: opPopStopBT(); continue; 208 209 case OPCode.LOOK_BEHIND: opLookBehind(); continue; 210 case OPCode.PUSH_LOOK_BEHIND_NOT: opPushLookBehindNot(); continue; 211 case OPCode.FAIL_LOOK_BEHIND_NOT: opFailLookBehindNot(); continue; 212 213 case OPCode.FINISH: 214 return finish(); 215 216 case OPCode.FAIL: opFail(); continue; 217 218 default: 219 throw new InternalException(ErrorMessages.ERR_UNDEFINED_BYTECODE); 220 221 } // main switch 222 } // main while 223 } 224 225 private boolean opEnd() { 226 final int n = s - sstart; 227 228 if (n > bestLen) { 229 if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { 230 if (isFindLongest(regex.options)) { 231 if (n > msaBestLen) { 232 msaBestLen = n; 233 msaBestS = sstart; 234 } else { 235 // goto end_best_len; 236 return endBestLength(); 237 } 238 } 239 } // USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 240 241 bestLen = n; 242 final Region region = msaRegion; 243 if (region != null) { 244 // USE_POSIX_REGION_OPTION ... else ... 245 region.beg[0] = msaBegin = sstart - str; 246 region.end[0] = msaEnd = s - str; 247 for (int i = 1; i <= regex.numMem; i++) { 248 // opt! 249 if (repeatStk[memEndStk + i] != INVALID_INDEX) { 250 region.beg[i] = bsAt(regex.btMemStart, i) ? 251 stack[repeatStk[memStartStk + i]].getMemPStr() - str : 252 repeatStk[memStartStk + i] - str; 253 254 255 region.end[i] = bsAt(regex.btMemEnd, i) ? 256 stack[repeatStk[memEndStk + i]].getMemPStr() : 257 repeatStk[memEndStk + i] - str; 258 259 } else { 260 region.beg[i] = region.end[i] = Region.REGION_NOTPOS; 261 } 262 263 } 264 265 } else { 266 msaBegin = sstart - str; 267 msaEnd = s - str; 268 } 269 } else { 270 final Region region = msaRegion; 271 if (Config.USE_POSIX_API_REGION_OPTION) { 272 if (!isPosixRegion(regex.options)) { 273 if (region != null) { 274 region.clear(); 275 } else { 276 msaBegin = msaEnd = 0; 277 } 278 } 279 } else { 280 if (region != null) { 281 region.clear(); 282 } else { 283 msaBegin = msaEnd = 0; 284 } 285 } // USE_POSIX_REGION_OPTION 286 } 287 // end_best_len: 288 /* default behavior: return first-matching result. */ 289 return endBestLength(); 290 } 291 292 private boolean endBestLength() { 293 if (isFindCondition(regex.options)) { 294 if (isFindNotEmpty(regex.options) && s == sstart) { 295 bestLen = -1; 296 {opFail(); return false;} /* for retry */ 297 } 298 if (isFindLongest(regex.options) && s < range) { 299 {opFail(); return false;} /* for retry */ 300 } 301 } 302 // goto finish; 303 return true; 304 } 305 306 private void opExact1() { 307 if (s >= range || code[ip] != chars[s++]) {opFail(); return;} 308 //if (s > range) {opFail(); return;} 309 ip++; 310 sprev = sbegin; // break; 311 } 312 313 private void opExact2() { 314 if (s + 2 > range) {opFail(); return;} 315 if (code[ip] != chars[s]) {opFail(); return;} 316 ip++; s++; 317 if (code[ip] != chars[s]) {opFail(); return;} 318 sprev = s; 319 ip++; s++; 320 } 321 322 private void opExact3() { 323 if (s + 3 > range) {opFail(); return;} 324 if (code[ip] != chars[s]) {opFail(); return;} 325 ip++; s++; 326 if (code[ip] != chars[s]) {opFail(); return;} 327 ip++; s++; 328 if (code[ip] != chars[s]) {opFail(); return;} 329 sprev = s; 330 ip++; s++; 331 } 332 333 private void opExact4() { 334 if (s + 4 > range) {opFail(); return;} 335 if (code[ip] != chars[s]) {opFail(); return;} 336 ip++; s++; 337 if (code[ip] != chars[s]) {opFail(); return;} 338 ip++; s++; 339 if (code[ip] != chars[s]) {opFail(); return;} 340 ip++; s++; 341 if (code[ip] != chars[s]) {opFail(); return;} 342 sprev = s; 343 ip++; s++; 344 } 345 346 private void opExact5() { 347 if (s + 5 > range) {opFail(); return;} 348 if (code[ip] != chars[s]) {opFail(); return;} 349 ip++; s++; 350 if (code[ip] != chars[s]) {opFail(); return;} 351 ip++; s++; 352 if (code[ip] != chars[s]) {opFail(); return;} 353 ip++; s++; 354 if (code[ip] != chars[s]) {opFail(); return;} 355 ip++; s++; 356 if (code[ip] != chars[s]) {opFail(); return;} 357 sprev = s; 358 ip++; s++; 359 } 360 361 private void opExactN() { 362 int tlen = code[ip++]; 363 if (s + tlen > range) {opFail(); return;} 364 365 if (Config.USE_STRING_TEMPLATES) { 366 final char[] bs = regex.templates[code[ip++]]; 367 int ps = code[ip++]; 368 369 while (tlen-- > 0) { 370 if (bs[ps++] != chars[s++]) {opFail(); return;} 371 } 372 373 } else { 374 while (tlen-- > 0) { 375 if (code[ip++] != chars[s++]) {opFail(); return;} 376 } 377 } 378 sprev = s - 1; 379 } 380 381 private void opExact1IC() { 382 if (s >= range || code[ip] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;} 383 ip++; 384 sprev = sbegin; // break; 385 } 386 387 private void opExactNIC() { 388 int tlen = code[ip++]; 389 if (s + tlen > range) {opFail(); return;} 390 391 if (Config.USE_STRING_TEMPLATES) { 392 final char[] bs = regex.templates[code[ip++]]; 393 int ps = code[ip++]; 394 395 while (tlen-- > 0) { 396 if (bs[ps++] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;} 397 } 398 } else { 399 400 while (tlen-- > 0) { 401 if (code[ip++] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;} 402 } 403 } 404 sprev = s - 1; 405 } 406 407 private boolean isInBitSet() { 408 final int c = chars[s]; 409 return (c <= 0xff && (code[ip + (c >>> BitSet.ROOM_SHIFT)] & (1 << c)) != 0); 410 } 411 412 private void opCClass() { 413 if (s >= range || !isInBitSet()) {opFail(); return;} 414 ip += BitSet.BITSET_SIZE; 415 s++; 416 sprev = sbegin; // break; 417 } 418 419 private boolean isInClassMB() { 420 final int tlen = code[ip++]; 421 if (s >= range) { 422 return false; 423 } 424 final int ss = s; 425 s++; 426 final int c = chars[ss]; 427 if (!EncodingHelper.isInCodeRange(code, ip, c)) { 428 return false; 429 } 430 ip += tlen; 431 return true; 432 } 433 434 private void opCClassMB() { 435 // beyond string check 436 if (s >= range || chars[s] <= 0xff) {opFail(); return;} 437 if (!isInClassMB()) {opFail(); return;} // not!!! 438 sprev = sbegin; // break; 439 } 440 441 private void opCClassMIX() { 442 if (s >= range) {opFail(); return;} 443 if (chars[s] > 0xff) { 444 ip += BitSet.BITSET_SIZE; 445 if (!isInClassMB()) {opFail(); return;} 446 } else { 447 if (!isInBitSet()) {opFail(); return;} 448 ip += BitSet.BITSET_SIZE; 449 final int tlen = code[ip++]; // by code range length 450 ip += tlen; 451 s++; 452 } 453 sprev = sbegin; // break; 454 } 455 456 private void opCClassNot() { 457 if (s >= range || isInBitSet()) {opFail(); return;} 458 ip += BitSet.BITSET_SIZE; 459 s++; 460 sprev = sbegin; // break; 461 } 462 463 private boolean isNotInClassMB() { 464 final int tlen = code[ip++]; 465 466 if (!(s + 1 <= range)) { 467 if (s >= range) { 468 return false; 469 } 470 s = end; 471 ip += tlen; 472 return true; 473 } 474 475 final int ss = s; 476 s++; 477 final int c = chars[ss]; 478 479 if (EncodingHelper.isInCodeRange(code, ip, c)) { 480 return false; 481 } 482 ip += tlen; 483 return true; 484 } 485 486 private void opCClassMBNot() { 487 if (s >= range) {opFail(); return;} 488 if (chars[s] <= 0xff) { 489 s++; 490 final int tlen = code[ip++]; 491 ip += tlen; 492 sprev = sbegin; // break; 493 return; 494 } 495 if (!isNotInClassMB()) {opFail(); return;} 496 sprev = sbegin; // break; 497 } 498 499 private void opCClassMIXNot() { 500 if (s >= range) {opFail(); return;} 501 if (chars[s] > 0xff) { 502 ip += BitSet.BITSET_SIZE; 503 if (!isNotInClassMB()) {opFail(); return;} 504 } else { 505 if (isInBitSet()) {opFail(); return;} 506 ip += BitSet.BITSET_SIZE; 507 final int tlen = code[ip++]; 508 ip += tlen; 509 s++; 510 } 511 sprev = sbegin; // break; 512 } 513 514 private void opCClassNode() { 515 if (s >= range) {opFail(); return;} 516 final CClassNode cc = (CClassNode)regex.operands[code[ip++]]; 517 final int ss = s; 518 s++; 519 final int c = chars[ss]; 520 if (!cc.isCodeInCCLength(c)) {opFail(); return;} 521 sprev = sbegin; // break; 522 } 523 524 private void opAnyChar() { 525 if (s >= range) {opFail(); return;} 526 if (isNewLine(chars[s])) {opFail(); return;} 527 s++; 528 sprev = sbegin; // break; 529 } 530 531 private void opAnyCharML() { 532 if (s >= range) {opFail(); return;} 533 s++; 534 sprev = sbegin; // break; 535 } 536 537 private void opAnyCharStar() { 538 final char[] ch = this.chars; 539 while (s < range) { 540 pushAlt(ip, s, sprev); 541 if (isNewLine(ch, s, end)) {opFail(); return;} 542 sprev = s; 543 s++; 544 } 545 sprev = sbegin; // break; 546 } 547 548 private void opAnyCharMLStar() { 549 while (s < range) { 550 pushAlt(ip, s, sprev); 551 sprev = s; 552 s++; 553 } 554 sprev = sbegin; // break; 555 } 556 557 private void opAnyCharStarPeekNext() { 558 final char c = (char)code[ip]; 559 final char[] ch = this.chars; 560 561 while (s < range) { 562 final char b = ch[s]; 563 if (c == b) { 564 pushAlt(ip + 1, s, sprev); 565 } 566 if (isNewLine(b)) {opFail(); return;} 567 sprev = s; 568 s++; 569 } 570 ip++; 571 sprev = sbegin; // break; 572 } 573 574 private void opAnyCharMLStarPeekNext() { 575 final char c = (char)code[ip]; 576 final char[] ch = this.chars; 577 578 while (s < range) { 579 if (c == ch[s]) { 580 pushAlt(ip + 1, s, sprev); 581 } 582 sprev = s; 583 s++; 584 } 585 ip++; 586 sprev = sbegin; // break; 587 } 588 589 private void opWord() { 590 if (s >= range || !EncodingHelper.isWord(chars[s])) {opFail(); return;} 591 s++; 592 sprev = sbegin; // break; 593 } 594 595 private void opNotWord() { 596 if (s >= range || EncodingHelper.isWord(chars[s])) {opFail(); return;} 597 s++; 598 sprev = sbegin; // break; 599 } 600 601 private void opWordBound() { 602 if (s == str) { 603 if (s >= range || !EncodingHelper.isWord(chars[s])) {opFail(); return;} 604 } else if (s == end) { 605 if (sprev >= end || !EncodingHelper.isWord(chars[sprev])) {opFail(); return;} 606 } else { 607 if (EncodingHelper.isWord(chars[s]) == EncodingHelper.isWord(chars[sprev])) {opFail(); return;} 608 } 609 } 610 611 private void opNotWordBound() { 612 if (s == str) { 613 if (s < range && EncodingHelper.isWord(chars[s])) {opFail(); return;} 614 } else if (s == end) { 615 if (sprev < end && EncodingHelper.isWord(chars[sprev])) {opFail(); return;} 616 } else { 617 if (EncodingHelper.isWord(chars[s]) != EncodingHelper.isWord(chars[sprev])) {opFail(); return;} 618 } 619 } 620 621 private void opWordBegin() { 622 if (s < range && EncodingHelper.isWord(chars[s])) { 623 if (s == str || !EncodingHelper.isWord(chars[sprev])) { 624 return; 625 } 626 } 627 opFail(); 628 } 629 630 private void opWordEnd() { 631 if (s != str && EncodingHelper.isWord(chars[sprev])) { 632 if (s == end || !EncodingHelper.isWord(chars[s])) { 633 return; 634 } 635 } 636 opFail(); 637 } 638 639 private void opBeginBuf() { 640 if (s != str) { 641 opFail(); 642 } 643 } 644 645 private void opEndBuf() { 646 if (s != end) { 647 opFail(); 648 } 649 } 650 651 private void opBeginLine() { 652 if (s == str) { 653 if (isNotBol(msaOptions)) { 654 opFail(); 655 } 656 return; 657 } else if (isNewLine(chars, sprev, end) && s != end) { 658 return; 659 } 660 opFail(); 661 } 662 663 private void opEndLine() { 664 if (s == end) { 665 if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { 666 if (str == end || !isNewLine(chars, sprev, end)) { 667 if (isNotEol(msaOptions)) { 668 opFail(); 669 } 670 } 671 return; 672 } 673 if (isNotEol(msaOptions)) { 674 opFail(); 675 } 676 return; 677 } else if (isNewLine(chars, s, end)) { 678 return; 679 } 680 opFail(); 681 } 682 683 private void opSemiEndBuf() { 684 if (s == end) { 685 if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { 686 if (str == end || !isNewLine(chars, sprev, end)) { 687 if (isNotEol(msaOptions)) { 688 opFail(); 689 } 690 } 691 return; 692 } 693 if (isNotEol(msaOptions)) { 694 opFail(); 695 } 696 return; 697 } else if (isNewLine(chars, s, end) && s + 1 == end) { 698 return; 699 } 700 opFail(); 701 } 702 703 private void opBeginPosition() { 704 if (s != msaStart) { 705 opFail(); 706 } 707 } 708 709 private void opMemoryStartPush() { 710 final int mem = code[ip++]; 711 pushMemStart(mem, s); 712 } 713 714 private void opMemoryStart() { 715 final int mem = code[ip++]; 716 repeatStk[memStartStk + mem] = s; 717 } 718 719 private void opMemoryEndPush() { 720 final int mem = code[ip++]; 721 pushMemEnd(mem, s); 722 } 723 724 private void opMemoryEnd() { 725 final int mem = code[ip++]; 726 repeatStk[memEndStk + mem] = s; 727 } 728 729 private void opMemoryEndPushRec() { 730 final int mem = code[ip++]; 731 final int stkp = getMemStart(mem); /* should be before push mem-end. */ 732 pushMemEnd(mem, s); 733 repeatStk[memStartStk + mem] = stkp; 734 } 735 736 private void opMemoryEndRec() { 737 final int mem = code[ip++]; 738 repeatStk[memEndStk + mem] = s; 739 final int stkp = getMemStart(mem); 740 741 if (BitStatus.bsAt(regex.btMemStart, mem)) { 742 repeatStk[memStartStk + mem] = stkp; 743 } else { 744 repeatStk[memStartStk + mem] = stack[stkp].getMemPStr(); 745 } 746 747 pushMemEndMark(mem); 748 } 749 750 private boolean backrefInvalid(final int mem) { 751 return repeatStk[memEndStk + mem] == INVALID_INDEX || repeatStk[memStartStk + mem] == INVALID_INDEX; 752 } 753 754 private int backrefStart(final int mem) { 755 return bsAt(regex.btMemStart, mem) ? stack[repeatStk[memStartStk + mem]].getMemPStr() : repeatStk[memStartStk + mem]; 756 } 757 758 private int backrefEnd(final int mem) { 759 return bsAt(regex.btMemEnd, mem) ? stack[repeatStk[memEndStk + mem]].getMemPStr() : repeatStk[memEndStk + mem]; 760 } 761 762 private void backref(final int mem) { 763 /* if you want to remove following line, 764 you should check in parse and compile time. (numMem) */ 765 if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;} 766 767 int pstart = backrefStart(mem); 768 final int pend = backrefEnd(mem); 769 770 int n = pend - pstart; 771 if (s + n > range) {opFail(); return;} 772 sprev = s; 773 774 // STRING_CMP 775 while(n-- > 0) { 776 if (chars[pstart++] != chars[s++]) {opFail(); return;} 777 } 778 779 // beyond string check 780 if (sprev < range) { 781 while (sprev + 1 < s) { 782 sprev++; 783 } 784 } 785 } 786 787 private void opBackRef1() { 788 backref(1); 789 } 790 791 private void opBackRef2() { 792 backref(2); 793 } 794 795 private void opBackRefN() { 796 backref(code[ip++]); 797 } 798 799 private void opBackRefNIC() { 800 final int mem = code[ip++]; 801 /* if you want to remove following line, 802 you should check in parse and compile time. (numMem) */ 803 if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;} 804 805 final int pstart = backrefStart(mem); 806 final int pend = backrefEnd(mem); 807 808 final int n = pend - pstart; 809 if (s + n > range) {opFail(); return;} 810 sprev = s; 811 812 value = s; 813 if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) {opFail(); return;} 814 s = value; 815 816 // if (sprev < chars.length) 817 while (sprev + 1 < s) { 818 sprev++; 819 } 820 } 821 822 private void opBackRefMulti() { 823 final int tlen = code[ip++]; 824 825 int i; 826 loop:for (i=0; i<tlen; i++) { 827 final int mem = code[ip++]; 828 if (backrefInvalid(mem)) { 829 continue; 830 } 831 832 int pstart = backrefStart(mem); 833 final int pend = backrefEnd(mem); 834 835 int n = pend - pstart; 836 if (s + n > range) {opFail(); return;} 837 838 sprev = s; 839 int swork = s; 840 841 while (n-- > 0) { 842 if (chars[pstart++] != chars[swork++]) { 843 continue loop; 844 } 845 } 846 847 s = swork; 848 849 // beyond string check 850 if (sprev < range) { 851 while (sprev + 1 < s) { 852 sprev++; 853 } 854 } 855 856 ip += tlen - i - 1; // * SIZE_MEMNUM (1) 857 break; /* success */ 858 } 859 if (i == tlen) {opFail(); return;} 860 } 861 862 private void opBackRefMultiIC() { 863 final int tlen = code[ip++]; 864 865 int i; 866 loop:for (i=0; i<tlen; i++) { 867 final int mem = code[ip++]; 868 if (backrefInvalid(mem)) { 869 continue; 870 } 871 872 final int pstart = backrefStart(mem); 873 final int pend = backrefEnd(mem); 874 875 final int n = pend - pstart; 876 if (s + n > range) {opFail(); return;} 877 878 sprev = s; 879 880 value = s; 881 if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) 882 { 883 continue loop; // STRING_CMP_VALUE_IC 884 } 885 s = value; 886 887 // if (sprev < chars.length) 888 while (sprev + 1 < s) { 889 sprev++; 890 } 891 892 ip += tlen - i - 1; // * SIZE_MEMNUM (1) 893 break; /* success */ 894 } 895 if (i == tlen) {opFail(); return;} 896 } 897 898 private boolean memIsInMemp(final int mem, final int num, final int mempp) { 899 for (int i=0, memp = mempp; i<num; i++) { 900 final int m = code[memp++]; 901 if (mem == m) { 902 return true; 903 } 904 } 905 return false; 906 } 907 908 // USE_BACKREF_AT_LEVEL // (s) and (end) implicit 909 private boolean backrefMatchAtNestedLevel(final boolean ignoreCase, final int caseFoldFlag, 910 final int nest, final int memNum, final int memp) { 911 int pend = -1; 912 int level = 0; 913 int k = stk - 1; 914 915 while (k >= 0) { 916 final StackEntry e = stack[k]; 917 918 if (e.type == CALL_FRAME) { 919 level--; 920 } else if (e.type == RETURN) { 921 level++; 922 } else if (level == nest) { 923 if (e.type == MEM_START) { 924 if (memIsInMemp(e.getMemNum(), memNum, memp)) { 925 final int pstart = e.getMemPStr(); 926 if (pend != -1) { 927 if (pend - pstart > end - s) { 928 return false; /* or goto next_mem; */ 929 } 930 int p = pstart; 931 932 value = s; 933 if (ignoreCase) { 934 if (!stringCmpIC(caseFoldFlag, pstart, this, pend - pstart, end)) { 935 return false; /* or goto next_mem; */ 936 } 937 } else { 938 while (p < pend) { 939 if (chars[p++] != chars[value++]) { 940 return false; /* or goto next_mem; */ 941 } 942 } 943 } 944 s = value; 945 946 return true; 947 } 948 } 949 } else if (e.type == MEM_END) { 950 if (memIsInMemp(e.getMemNum(), memNum, memp)) { 951 pend = e.getMemPStr(); 952 } 953 } 954 } 955 k--; 956 } 957 return false; 958 } 959 960 private void opBackRefAtLevel() { 961 final int ic = code[ip++]; 962 final int level = code[ip++]; 963 final int tlen = code[ip++]; 964 965 sprev = s; 966 if (backrefMatchAtNestedLevel(ic != 0, regex.caseFoldFlag, level, tlen, ip)) { // (s) and (end) implicit 967 while (sprev + 1 < s) { 968 sprev++; 969 } 970 ip += tlen; // * SIZE_MEMNUM 971 } else { 972 {opFail(); return;} 973 } 974 } 975 976 private void opNullCheckStart() { 977 final int mem = code[ip++]; 978 pushNullCheckStart(mem, s); 979 } 980 981 private void nullCheckFound() { 982 // null_check_found: 983 /* empty loop founded, skip next instruction */ 984 switch(code[ip++]) { 985 case OPCode.JUMP: 986 case OPCode.PUSH: 987 ip++; // p += SIZE_RELADDR; 988 break; 989 case OPCode.REPEAT_INC: 990 case OPCode.REPEAT_INC_NG: 991 case OPCode.REPEAT_INC_SG: 992 case OPCode.REPEAT_INC_NG_SG: 993 ip++; // p += SIZE_MEMNUM; 994 break; 995 default: 996 throw new InternalException(ErrorMessages.ERR_UNEXPECTED_BYTECODE); 997 } // switch 998 } 999 1000 private void opNullCheckEnd() { 1001 final int mem = code[ip++]; 1002 final int isNull = nullCheck(mem, s); /* mem: null check id */ 1003 1004 if (isNull != 0) { 1005 if (Config.DEBUG_MATCH) { 1006 Config.log.println("NULL_CHECK_END: skip id:" + mem + ", s:" + s); 1007 } 1008 1009 nullCheckFound(); 1010 } 1011 } 1012 1013 // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK 1014 private void opNullCheckEndMemST() { 1015 final int mem = code[ip++]; /* mem: null check id */ 1016 final int isNull = nullCheckMemSt(mem, s); 1017 1018 if (isNull != 0) { 1019 if (Config.DEBUG_MATCH) { 1020 Config.log.println("NULL_CHECK_END_MEMST: skip id:" + mem + ", s:" + s); 1021 } 1022 1023 if (isNull == -1) {opFail(); return;} 1024 nullCheckFound(); 1025 } 1026 } 1027 1028 // USE_SUBEXP_CALL 1029 private void opNullCheckEndMemSTPush() { 1030 final int mem = code[ip++]; /* mem: null check id */ 1031 1032 int isNull; 1033 if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) { 1034 isNull = nullCheckMemStRec(mem, s); 1035 } else { 1036 isNull = nullCheckRec(mem, s); 1037 } 1038 1039 if (isNull != 0) { 1040 if (Config.DEBUG_MATCH) { 1041 Config.log.println("NULL_CHECK_END_MEMST_PUSH: skip id:" + mem + ", s:" + s); 1042 } 1043 1044 if (isNull == -1) {opFail(); return;} 1045 nullCheckFound(); 1046 } else { 1047 pushNullCheckEnd(mem); 1048 } 1049 } 1050 1051 private void opJump() { 1052 ip += code[ip] + 1; 1053 } 1054 1055 private void opPush() { 1056 final int addr = code[ip++]; 1057 pushAlt(ip + addr, s, sprev); 1058 } 1059 1060 private void opPop() { 1061 popOne(); 1062 } 1063 1064 private void opPushOrJumpExact1() { 1065 final int addr = code[ip++]; 1066 // beyond string check 1067 if (s < range && code[ip] == chars[s]) { 1068 ip++; 1069 pushAlt(ip + addr, s, sprev); 1070 return; 1071 } 1072 ip += addr + 1; 1073 } 1074 1075 private void opPushIfPeekNext() { 1076 final int addr = code[ip++]; 1077 // beyond string check 1078 if (s < range && code[ip] == chars[s]) { 1079 ip++; 1080 pushAlt(ip + addr, s, sprev); 1081 return; 1082 } 1083 ip++; 1084 } 1085 1086 private void opRepeat() { 1087 final int mem = code[ip++]; /* mem: OP_REPEAT ID */ 1088 final int addr= code[ip++]; 1089 1090 // ensure1(); 1091 repeatStk[mem] = stk; 1092 pushRepeat(mem, ip); 1093 1094 if (regex.repeatRangeLo[mem] == 0) { // lower 1095 pushAlt(ip + addr, s, sprev); 1096 } 1097 } 1098 1099 private void opRepeatNG() { 1100 final int mem = code[ip++]; /* mem: OP_REPEAT ID */ 1101 final int addr= code[ip++]; 1102 1103 // ensure1(); 1104 repeatStk[mem] = stk; 1105 pushRepeat(mem, ip); 1106 1107 if (regex.repeatRangeLo[mem] == 0) { 1108 pushAlt(ip, s, sprev); 1109 ip += addr; 1110 } 1111 } 1112 1113 private void repeatInc(final int mem, final int si) { 1114 final StackEntry e = stack[si]; 1115 1116 e.increaseRepeatCount(); 1117 1118 if (e.getRepeatCount() >= regex.repeatRangeHi[mem]) { 1119 /* end of repeat. Nothing to do. */ 1120 } else if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) { 1121 pushAlt(ip, s, sprev); 1122 ip = e.getRepeatPCode(); /* Don't use stkp after PUSH. */ 1123 } else { 1124 ip = e.getRepeatPCode(); 1125 } 1126 pushRepeatInc(si); 1127 } 1128 1129 private void opRepeatInc() { 1130 final int mem = code[ip++]; /* mem: OP_REPEAT ID */ 1131 final int si = repeatStk[mem]; 1132 repeatInc(mem, si); 1133 } 1134 1135 private void opRepeatIncSG() { 1136 final int mem = code[ip++]; /* mem: OP_REPEAT ID */ 1137 final int si = getRepeat(mem); 1138 repeatInc(mem, si); 1139 } 1140 1141 private void repeatIncNG(final int mem, final int si) { 1142 final StackEntry e = stack[si]; 1143 1144 e.increaseRepeatCount(); 1145 1146 if (e.getRepeatCount() < regex.repeatRangeHi[mem]) { 1147 if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) { 1148 final int pcode = e.getRepeatPCode(); 1149 pushRepeatInc(si); 1150 pushAlt(pcode, s, sprev); 1151 } else { 1152 ip = e.getRepeatPCode(); 1153 pushRepeatInc(si); 1154 } 1155 } else if (e.getRepeatCount() == regex.repeatRangeHi[mem]) { 1156 pushRepeatInc(si); 1157 } 1158 } 1159 1160 private void opRepeatIncNG() { 1161 final int mem = code[ip++]; 1162 final int si = repeatStk[mem]; 1163 repeatIncNG(mem, si); 1164 } 1165 1166 private void opRepeatIncNGSG() { 1167 final int mem = code[ip++]; 1168 final int si = getRepeat(mem); 1169 repeatIncNG(mem, si); 1170 } 1171 1172 private void opPushPos() { 1173 pushPos(s, sprev); 1174 } 1175 1176 private void opPopPos() { 1177 final StackEntry e = stack[posEnd()]; 1178 s = e.getStatePStr(); 1179 sprev= e.getStatePStrPrev(); 1180 } 1181 1182 private void opPushPosNot() { 1183 final int addr = code[ip++]; 1184 pushPosNot(ip + addr, s, sprev); 1185 } 1186 1187 private void opFailPos() { 1188 popTilPosNot(); 1189 opFail(); 1190 } 1191 1192 private void opPushStopBT() { 1193 pushStopBT(); 1194 } 1195 1196 private void opPopStopBT() { 1197 stopBtEnd(); 1198 } 1199 1200 private void opLookBehind() { 1201 final int tlen = code[ip++]; 1202 s = EncodingHelper.stepBack(str, s, tlen); 1203 if (s == -1) {opFail(); return;} 1204 sprev = EncodingHelper.prevCharHead(str, s); 1205 } 1206 1207 private void opPushLookBehindNot() { 1208 final int addr = code[ip++]; 1209 final int tlen = code[ip++]; 1210 final int q = EncodingHelper.stepBack(str, s, tlen); 1211 if (q == -1) { 1212 /* too short case -> success. ex. /(?<!XXX)a/.match("a") 1213 If you want to change to fail, replace following line. */ 1214 ip += addr; 1215 // return FAIL; 1216 } else { 1217 pushLookBehindNot(ip + addr, s, sprev); 1218 s = q; 1219 sprev = EncodingHelper.prevCharHead(str, s); 1220 } 1221 } 1222 1223 private void opFailLookBehindNot() { 1224 popTilLookBehindNot(); 1225 opFail(); 1226 } 1227 1228 private void opFail() { 1229 if (stack == null) { 1230 ip = regex.codeLength - 1; 1231 return; 1232 } 1233 1234 1235 final StackEntry e = pop(); 1236 ip = e.getStatePCode(); 1237 s = e.getStatePStr(); 1238 sprev = e.getStatePStrPrev(); 1239 } 1240 1241 private int finish() { 1242 return bestLen; 1243 } 1244} 1245