Analyser.java revision 1088:7e62d98d4625
1/* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20package jdk.nashorn.internal.runtime.regexp.joni; 21 22import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAll; 23import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt; 24import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnAt; 25import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindCondition; 26import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase; 27import static jdk.nashorn.internal.runtime.regexp.joni.Option.isMultiline; 28import static jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode.newAltNode; 29import static jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode.isRepeatInfinite; 30import java.util.HashSet; 31import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode; 32import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode; 33import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; 34import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode; 35import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode; 36import jdk.nashorn.internal.runtime.regexp.joni.ast.Node; 37import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode; 38import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode; 39import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; 40import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType; 41import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType; 42import jdk.nashorn.internal.runtime.regexp.joni.constants.StackPopLevel; 43import jdk.nashorn.internal.runtime.regexp.joni.constants.TargetInfo; 44import jdk.nashorn.internal.runtime.regexp.joni.encoding.ObjPtr; 45import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; 46import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException; 47import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; 48 49final class Analyser extends Parser { 50 51 protected Analyser(final ScanEnvironment env, final char[] chars, final int p, final int end) { 52 super(env, chars, p, end); 53 } 54 55 @SuppressWarnings("unused") 56 protected final void compile() { 57 if (Config.DEBUG) { 58 Config.log.println(new String(chars, getBegin(), getEnd())); 59 } 60 61 reset(); 62 63 regex.numMem = 0; 64 regex.numRepeat = 0; 65 regex.numNullCheck = 0; 66 //regex.repeatRangeAlloc = 0; 67 regex.repeatRangeLo = null; 68 regex.repeatRangeHi = null; 69 70 parse(); 71 72 if (Config.DEBUG_PARSE_TREE_RAW && Config.DEBUG_PARSE_TREE) { 73 Config.log.println("<RAW TREE>"); 74 Config.log.println(root + "\n"); 75 } 76 77 root = setupTree(root, 0); 78 if (Config.DEBUG_PARSE_TREE) { 79 if (Config.DEBUG_PARSE_TREE_RAW) { 80 Config.log.println("<TREE>"); 81 } 82 root.verifyTree(new HashSet<Node>(), env.reg.warnings); 83 Config.log.println(root + "\n"); 84 } 85 86 regex.captureHistory = env.captureHistory; 87 regex.btMemStart = env.btMemStart; 88 regex.btMemEnd = env.btMemEnd; 89 90 if (isFindCondition(regex.options)) { 91 regex.btMemEnd = bsAll(); 92 } else { 93 regex.btMemEnd = env.btMemEnd; 94 regex.btMemEnd |= regex.captureHistory; 95 } 96 97 regex.clearOptimizeInfo(); 98 99 if (!Config.DONT_OPTIMIZE) { 100 setOptimizedInfoFromTree(root); 101 } 102 103 env.memNodes = null; 104 105 if (regex.numRepeat != 0 || regex.btMemEnd != 0) { 106 regex.stackPopLevel = StackPopLevel.ALL; 107 } else { 108 if (regex.btMemStart != 0) { 109 regex.stackPopLevel = StackPopLevel.MEM_START; 110 } else { 111 regex.stackPopLevel = StackPopLevel.FREE; 112 } 113 } 114 115 if (Config.DEBUG_COMPILE) { 116 Config.log.println("stack used: " + regex.stackNeeded); 117 if (Config.USE_STRING_TEMPLATES) { 118 Config.log.print("templates: " + regex.templateNum + "\n"); 119 } 120 Config.log.println(new ByteCodePrinter(regex).byteCodeListToString()); 121 122 } // DEBUG_COMPILE 123 } 124 125 private void swap(final Node a, final Node b) { 126 a.swap(b); 127 128 if (root == b) { 129 root = a; 130 } else if (root == a) { 131 root = b; 132 } 133 } 134 135 // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK 136 private int quantifiersMemoryInfo(final Node node) { 137 int info = 0; 138 139 switch(node.getType()) { 140 case NodeType.LIST: 141 case NodeType.ALT: 142 ConsAltNode can = (ConsAltNode)node; 143 do { 144 final int v = quantifiersMemoryInfo(can.car); 145 if (v > info) { 146 info = v; 147 } 148 } while ((can = can.cdr) != null); 149 break; 150 151 case NodeType.QTFR: 152 final QuantifierNode qn = (QuantifierNode)node; 153 if (qn.upper != 0) { 154 info = quantifiersMemoryInfo(qn.target); 155 } 156 break; 157 158 case NodeType.ENCLOSE: 159 final EncloseNode en = (EncloseNode)node; 160 switch (en.type) { 161 case EncloseType.MEMORY: 162 return TargetInfo.IS_EMPTY_MEM; 163 164 case EncloseType.OPTION: 165 case EncloseNode.STOP_BACKTRACK: 166 info = quantifiersMemoryInfo(en.target); 167 break; 168 169 default: 170 break; 171 } // inner switch 172 break; 173 174 case NodeType.BREF: 175 case NodeType.STR: 176 case NodeType.CTYPE: 177 case NodeType.CCLASS: 178 case NodeType.CANY: 179 case NodeType.ANCHOR: 180 default: 181 break; 182 } // switch 183 184 return info; 185 } 186 187 private int getMinMatchLength(final Node node) { 188 int min = 0; 189 190 switch (node.getType()) { 191 case NodeType.BREF: 192 final BackRefNode br = (BackRefNode)node; 193 if (br.isRecursion()) { 194 break; 195 } 196 197 if (br.backRef > env.numMem) { 198 throw new ValueException(ERR_INVALID_BACKREF); 199 } 200 min = getMinMatchLength(env.memNodes[br.backRef]); 201 202 break; 203 204 case NodeType.LIST: 205 ConsAltNode can = (ConsAltNode)node; 206 do { 207 min += getMinMatchLength(can.car); 208 } while ((can = can.cdr) != null); 209 break; 210 211 case NodeType.ALT: 212 ConsAltNode y = (ConsAltNode)node; 213 do { 214 final Node x = y.car; 215 final int tmin = getMinMatchLength(x); 216 if (y == node) { 217 min = tmin; 218 } else if (min > tmin) { 219 min = tmin; 220 } 221 } while ((y = y.cdr) != null); 222 break; 223 224 case NodeType.STR: 225 min = ((StringNode)node).length(); 226 break; 227 228 case NodeType.CTYPE: 229 min = 1; 230 break; 231 232 case NodeType.CCLASS: 233 case NodeType.CANY: 234 min = 1; 235 break; 236 237 case NodeType.QTFR: 238 final QuantifierNode qn = (QuantifierNode)node; 239 if (qn.lower > 0) { 240 min = getMinMatchLength(qn.target); 241 min = MinMaxLen.distanceMultiply(min, qn.lower); 242 } 243 break; 244 245 case NodeType.ENCLOSE: 246 final EncloseNode en = (EncloseNode)node; 247 switch (en.type) { 248 case EncloseType.MEMORY: 249 if (en.isMinFixed()) { 250 min = en.minLength; 251 } else { 252 min = getMinMatchLength(en.target); 253 en.minLength = min; 254 en.setMinFixed(); 255 } 256 break; 257 258 case EncloseType.OPTION: 259 case EncloseType.STOP_BACKTRACK: 260 min = getMinMatchLength(en.target); 261 break; 262 263 default: 264 break; 265 } // inner switch 266 break; 267 268 case NodeType.ANCHOR: 269 default: 270 break; 271 } // switch 272 273 return min; 274 } 275 276 private int getMaxMatchLength(final Node node) { 277 int max = 0; 278 279 switch (node.getType()) { 280 case NodeType.LIST: 281 ConsAltNode ln = (ConsAltNode)node; 282 do { 283 final int tmax = getMaxMatchLength(ln.car); 284 max = MinMaxLen.distanceAdd(max, tmax); 285 } while ((ln = ln.cdr) != null); 286 break; 287 288 case NodeType.ALT: 289 ConsAltNode an = (ConsAltNode)node; 290 do { 291 final int tmax = getMaxMatchLength(an.car); 292 if (max < tmax) { 293 max = tmax; 294 } 295 } while ((an = an.cdr) != null); 296 break; 297 298 case NodeType.STR: 299 max = ((StringNode)node).length(); 300 break; 301 302 case NodeType.CTYPE: 303 max = 1; 304 break; 305 306 case NodeType.CCLASS: 307 case NodeType.CANY: 308 max = 1; 309 break; 310 311 case NodeType.BREF: 312 final BackRefNode br = (BackRefNode)node; 313 if (br.isRecursion()) { 314 max = MinMaxLen.INFINITE_DISTANCE; 315 break; 316 } 317 318 if (br.backRef > env.numMem) { 319 throw new ValueException(ERR_INVALID_BACKREF); 320 } 321 final int tmax = getMaxMatchLength(env.memNodes[br.backRef]); 322 if (max < tmax) { 323 max = tmax; 324 } 325 break; 326 327 case NodeType.QTFR: 328 final QuantifierNode qn = (QuantifierNode)node; 329 if (qn.upper != 0) { 330 max = getMaxMatchLength(qn.target); 331 if (max != 0) { 332 if (!isRepeatInfinite(qn.upper)) { 333 max = MinMaxLen.distanceMultiply(max, qn.upper); 334 } else { 335 max = MinMaxLen.INFINITE_DISTANCE; 336 } 337 } 338 } 339 break; 340 341 case NodeType.ENCLOSE: 342 final EncloseNode en = (EncloseNode)node; 343 switch (en.type) { 344 case EncloseType.MEMORY: 345 if (en.isMaxFixed()) { 346 max = en.maxLength; 347 } else { 348 max = getMaxMatchLength(en.target); 349 en.maxLength = max; 350 en.setMaxFixed(); 351 } 352 break; 353 354 case EncloseType.OPTION: 355 case EncloseType.STOP_BACKTRACK: 356 max = getMaxMatchLength(en.target); 357 break; 358 359 default: 360 break; 361 } // inner switch 362 break; 363 364 case NodeType.ANCHOR: 365 default: 366 break; 367 } // switch 368 369 return max; 370 } 371 372 private static final int GET_CHAR_LEN_VARLEN = -1; 373 private static final int GET_CHAR_LEN_TOP_ALT_VARLEN = -2; 374 protected final int getCharLengthTree(final Node node) { 375 return getCharLengthTree(node, 0); 376 } 377 378 private int getCharLengthTree(final Node node, final int levelp) { 379 final int level = levelp + 1; 380 381 int len = 0; 382 returnCode = 0; 383 384 switch(node.getType()) { 385 case NodeType.LIST: 386 ConsAltNode ln = (ConsAltNode)node; 387 do { 388 final int tlen = getCharLengthTree(ln.car, level); 389 if (returnCode == 0) { 390 len = MinMaxLen.distanceAdd(len, tlen); 391 } 392 } while (returnCode == 0 && (ln = ln.cdr) != null); 393 break; 394 395 case NodeType.ALT: 396 ConsAltNode an = (ConsAltNode)node; 397 boolean varLen = false; 398 399 int tlen = getCharLengthTree(an.car, level); 400 while (returnCode == 0 && (an = an.cdr) != null) { 401 final int tlen2 = getCharLengthTree(an.car, level); 402 if (returnCode == 0) { 403 if (tlen != tlen2) { 404 varLen = true; 405 } 406 } 407 } 408 409 if (returnCode == 0) { 410 if (varLen) { 411 if (level == 1) { 412 returnCode = GET_CHAR_LEN_TOP_ALT_VARLEN; 413 } else { 414 returnCode = GET_CHAR_LEN_VARLEN; 415 } 416 } else { 417 len = tlen; 418 } 419 } 420 break; 421 422 case NodeType.STR: 423 final StringNode sn = (StringNode)node; 424 len = sn.length(); 425 break; 426 427 case NodeType.QTFR: 428 final QuantifierNode qn = (QuantifierNode)node; 429 if (qn.lower == qn.upper) { 430 tlen = getCharLengthTree(qn.target, level); 431 if (returnCode == 0) { 432 len = MinMaxLen.distanceMultiply(tlen, qn.lower); 433 } 434 } else { 435 returnCode = GET_CHAR_LEN_VARLEN; 436 } 437 break; 438 439 case NodeType.CTYPE: 440 case NodeType.CCLASS: 441 case NodeType.CANY: 442 len = 1; 443 break; 444 445 case NodeType.ENCLOSE: 446 final EncloseNode en = (EncloseNode)node; 447 switch(en.type) { 448 case EncloseType.MEMORY: 449 if (en.isCLenFixed()) { 450 len = en.charLength; 451 } else { 452 len = getCharLengthTree(en.target, level); 453 if (returnCode == 0) { 454 en.charLength = len; 455 en.setCLenFixed(); 456 } 457 } 458 break; 459 460 case EncloseType.OPTION: 461 case EncloseType.STOP_BACKTRACK: 462 len = getCharLengthTree(en.target, level); 463 break; 464 465 default: 466 break; 467 } // inner switch 468 break; 469 470 case NodeType.ANCHOR: 471 break; 472 473 default: 474 returnCode = GET_CHAR_LEN_VARLEN; 475 } // switch 476 return len; 477 } 478 479 /* x is not included y ==> 1 : 0 */ 480 private static boolean isNotIncluded(final Node xn, final Node yn) { 481 Node x = xn; 482 Node y = yn; 483 Node tmp; 484 485 // !retry:! 486 retry: while(true) { 487 488 final int yType = y.getType(); 489 490 switch(x.getType()) { 491 case NodeType.CTYPE: 492 switch(yType) { 493 494 case NodeType.CCLASS: 495 // !swap:! 496 tmp = x; 497 x = y; 498 y = tmp; 499 // !goto retry;! 500 continue retry; 501 502 case NodeType.STR: 503 // !goto swap;! 504 tmp = x; 505 x = y; 506 y = tmp; 507 continue retry; 508 509 default: 510 break; 511 } // inner switch 512 break; 513 514 case NodeType.CCLASS: 515 final CClassNode xc = (CClassNode)x; 516 517 switch(yType) { 518 519 case NodeType.CCLASS: 520 final CClassNode yc = (CClassNode)y; 521 522 for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) { 523 boolean v = xc.bs.at(i); 524 if ((v && !xc.isNot()) || (!v && xc.isNot())) { 525 v = yc.bs.at(i); 526 if ((v && !yc.isNot()) || (!v && yc.isNot())) { 527 return false; 528 } 529 } 530 } 531 if ((xc.mbuf == null && !xc.isNot()) || yc.mbuf == null && !yc.isNot()) { 532 return true; 533 } 534 return false; 535 // break; not reached 536 537 case NodeType.STR: 538 // !goto swap;! 539 tmp = x; 540 x = y; 541 y = tmp; 542 continue retry; 543 544 default: 545 break; 546 547 } // inner switch 548 break; // case NodeType.CCLASS 549 550 case NodeType.STR: 551 final StringNode xs = (StringNode)x; 552 if (xs.length() == 0) { 553 break; 554 } 555 556 switch (yType) { 557 558 case NodeType.CCLASS: 559 final CClassNode cc = (CClassNode)y; 560 final int code = xs.chars[xs.p]; 561 return !cc.isCodeInCC(code); 562 563 case NodeType.STR: 564 final StringNode ys = (StringNode)y; 565 int len = xs.length(); 566 if (len > ys.length()) { 567 len = ys.length(); 568 } 569 if (xs.isAmbig() || ys.isAmbig()) { 570 /* tiny version */ 571 return false; 572 } 573 for (int i=0, pt=ys.p, q=xs.p; i<len; i++, pt++, q++) { 574 if (ys.chars[pt] != xs.chars[q]) { 575 return true; 576 } 577 } 578 break; 579 580 default: 581 break; 582 } // inner switch 583 584 break; // case NodeType.STR 585 default: 586 break; 587 588 } // switch 589 590 break; 591 } // retry: while 592 return false; 593 } 594 595 private Node getHeadValueNode(final Node node, final boolean exact) { 596 Node n = null; 597 598 switch(node.getType()) { 599 case NodeType.BREF: 600 case NodeType.ALT: 601 case NodeType.CANY: 602 break; 603 604 case NodeType.CTYPE: 605 case NodeType.CCLASS: 606 if (!exact) { 607 n = node; 608 } 609 break; 610 611 case NodeType.LIST: 612 n = getHeadValueNode(((ConsAltNode)node).car, exact); 613 break; 614 615 case NodeType.STR: 616 final StringNode sn = (StringNode)node; 617 if (sn.end <= sn.p) 618 { 619 break; // ??? 620 } 621 622 if (exact && !sn.isRaw() && isIgnoreCase(regex.options)){ 623 // nothing 624 } else { 625 n = node; 626 } 627 break; 628 629 case NodeType.QTFR: 630 final QuantifierNode qn = (QuantifierNode)node; 631 if (qn.lower > 0) { 632 if (qn.headExact != null) { 633 n = qn.headExact; 634 } else { 635 n = getHeadValueNode(qn.target, exact); 636 } 637 } 638 break; 639 640 case NodeType.ENCLOSE: 641 final EncloseNode en = (EncloseNode)node; 642 643 switch (en.type) { 644 case EncloseType.OPTION: 645 final int options = regex.options; 646 regex.options = en.option; 647 n = getHeadValueNode(en.target, exact); 648 regex.options = options; 649 break; 650 651 case EncloseType.MEMORY: 652 case EncloseType.STOP_BACKTRACK: 653 n = getHeadValueNode(en.target, exact); 654 break; 655 656 default: 657 break; 658 } // inner switch 659 break; 660 661 case NodeType.ANCHOR: 662 final AnchorNode an = (AnchorNode)node; 663 if (an.type == AnchorType.PREC_READ) { 664 n = getHeadValueNode(an.target, exact); 665 } 666 break; 667 668 default: 669 break; 670 } // switch 671 672 return n; 673 } 674 675 // true: invalid 676 private boolean checkTypeTree(final Node node, final int typeMask, final int encloseMask, final int anchorMask) { 677 if ((node.getType2Bit() & typeMask) == 0) { 678 return true; 679 } 680 681 boolean invalid = false; 682 683 switch(node.getType()) { 684 case NodeType.LIST: 685 case NodeType.ALT: 686 ConsAltNode can = (ConsAltNode)node; 687 do { 688 invalid = checkTypeTree(can.car, typeMask, encloseMask, anchorMask); 689 } while (!invalid && (can = can.cdr) != null); 690 break; 691 692 case NodeType.QTFR: 693 invalid = checkTypeTree(((QuantifierNode)node).target, typeMask, encloseMask, anchorMask); 694 break; 695 696 case NodeType.ENCLOSE: 697 final EncloseNode en = (EncloseNode)node; 698 if ((en.type & encloseMask) == 0) { 699 return true; 700 } 701 invalid = checkTypeTree(en.target, typeMask, encloseMask, anchorMask); 702 break; 703 704 case NodeType.ANCHOR: 705 final AnchorNode an = (AnchorNode)node; 706 if ((an.type & anchorMask) == 0) { 707 return true; 708 } 709 710 if (an.target != null) { 711 invalid = checkTypeTree(an.target, typeMask, encloseMask, anchorMask); 712 } 713 break; 714 715 default: 716 break; 717 718 } // switch 719 720 return invalid; 721 } 722 723 /* divide different length alternatives in look-behind. 724 (?<=A|B) ==> (?<=A)|(?<=B) 725 (?<!A|B) ==> (?<!A)(?<!B) 726 */ 727 private Node divideLookBehindAlternatives(final Node nodep) { 728 Node node = nodep; 729 final AnchorNode an = (AnchorNode)node; 730 final int anchorType = an.type; 731 Node head = an.target; 732 Node np = ((ConsAltNode)head).car; 733 734 swap(node, head); 735 736 final Node tmp = node; 737 node = head; 738 head = tmp; 739 740 ((ConsAltNode)node).setCar(head); 741 ((AnchorNode)head).setTarget(np); 742 np = node; 743 744 while ((np = ((ConsAltNode)np).cdr) != null) { 745 final AnchorNode insert = new AnchorNode(anchorType); 746 insert.setTarget(((ConsAltNode)np).car); 747 ((ConsAltNode)np).setCar(insert); 748 } 749 750 if (anchorType == AnchorType.LOOK_BEHIND_NOT) { 751 np = node; 752 do { 753 ((ConsAltNode)np).toListNode(); /* alt -> list */ 754 } while ((np = ((ConsAltNode)np).cdr) != null); 755 } 756 757 return node; 758 } 759 760 private Node setupLookBehind(final Node node) { 761 final AnchorNode an = (AnchorNode)node; 762 final int len = getCharLengthTree(an.target); 763 switch (returnCode) { 764 case 0: 765 an.charLength = len; 766 break; 767 case GET_CHAR_LEN_VARLEN: 768 throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); 769 case GET_CHAR_LEN_TOP_ALT_VARLEN: 770 if (syntax.differentLengthAltLookBehind()) { 771 return divideLookBehindAlternatives(node); 772 } 773 throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); 774 default: 775 break; 776 } 777 return node; 778 } 779 780 private void nextSetup(final Node nodep, final Node nextNode) { 781 Node node = nodep; 782 783 // retry: 784 retry: while(true) { 785 786 final int type = node.getType(); 787 if (type == NodeType.QTFR) { 788 final QuantifierNode qn = (QuantifierNode)node; 789 if (qn.greedy && isRepeatInfinite(qn.upper)) { 790 if (Config.USE_QTFR_PEEK_NEXT) { 791 final StringNode n = (StringNode)getHeadValueNode(nextNode, true); 792 /* '\0': for UTF-16BE etc... */ 793 if (n != null && n.chars[n.p] != 0) { // ????????? 794 qn.nextHeadExact = n; 795 } 796 } // USE_QTFR_PEEK_NEXT 797 /* automatic posseivation a*b ==> (?>a*)b */ 798 if (qn.lower <= 1) { 799 if (qn.target.isSimple()) { 800 final Node x = getHeadValueNode(qn.target, false); 801 if (x != null) { 802 final Node y = getHeadValueNode(nextNode, false); 803 if (y != null && isNotIncluded(x, y)) { 804 final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); //onig_node_new_enclose 805 en.setStopBtSimpleRepeat(); 806 //en.setTarget(qn.target); // optimize it ?? 807 swap(node, en); 808 809 en.setTarget(node); 810 } 811 } 812 } 813 } 814 } 815 } else if (type == NodeType.ENCLOSE) { 816 final EncloseNode en = (EncloseNode)node; 817 if (en.isMemory()) { 818 node = en.target; 819 // !goto retry;! 820 continue retry; 821 } 822 } 823 824 break; 825 } // while 826 } 827 828 private void updateStringNodeCaseFoldMultiByte(final StringNode sn) { 829 final char[] ch = sn.chars; 830 final int end = sn.end; 831 value = sn.p; 832 int sp = 0; 833 char buf; 834 835 while (value < end) { 836 final int ovalue = value; 837 buf = EncodingHelper.toLowerCase(ch[value++]); 838 839 if (ch[ovalue] != buf) { 840 841 char[] sbuf = new char[sn.length() << 1]; 842 System.arraycopy(ch, sn.p, sbuf, 0, ovalue - sn.p); 843 value = ovalue; 844 while (value < end) { 845 buf = EncodingHelper.toLowerCase(ch[value++]); 846 if (sp >= sbuf.length) { 847 final char[]tmp = new char[sbuf.length << 1]; 848 System.arraycopy(sbuf, 0, tmp, 0, sbuf.length); 849 sbuf = tmp; 850 } 851 sbuf[sp++] = buf; 852 } 853 sn.set(sbuf, 0, sp); 854 return; 855 } 856 sp++; 857 } 858 } 859 860 private void updateStringNodeCaseFold(final Node node) { 861 final StringNode sn = (StringNode)node; 862 updateStringNodeCaseFoldMultiByte(sn); 863 } 864 865 private Node expandCaseFoldMakeRemString(final char[] ch, final int pp, final int end) { 866 final StringNode node = new StringNode(ch, pp, end); 867 868 updateStringNodeCaseFold(node); 869 node.setAmbig(); 870 node.setDontGetOptInfo(); 871 return node; 872 } 873 874 private static boolean expandCaseFoldStringAlt(final int itemNum, final char[] items, 875 final char[] chars, final int p, final int slen, final int end, final ObjPtr<Node> node) { 876 877 ConsAltNode altNode; 878 node.p = altNode = newAltNode(null, null); 879 880 StringNode snode = new StringNode(chars, p, p + slen); 881 altNode.setCar(snode); 882 883 for (int i=0; i<itemNum; i++) { 884 snode = new StringNode(); 885 886 snode.catCode(items[i]); 887 888 final ConsAltNode an = newAltNode(null, null); 889 an.setCar(snode); 890 altNode.setCdr(an); 891 altNode = an; 892 } 893 return false; 894 } 895 896 private static final int THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION = 8; 897 private Node expandCaseFoldString(final Node node) { 898 final StringNode sn = (StringNode)node; 899 900 if (sn.isAmbig() || sn.length() <= 0) { 901 return node; 902 } 903 904 final char[] chars1 = sn.chars; 905 int pt = sn.p; 906 final int end = sn.end; 907 int altNum = 1; 908 909 ConsAltNode topRoot = null, r = null; 910 @SuppressWarnings("unused") 911 final ObjPtr<Node> prevNode = new ObjPtr<Node>(); 912 StringNode stringNode = null; 913 914 while (pt < end) { 915 final char[] items = EncodingHelper.caseFoldCodesByString(regex.caseFoldFlag, chars1[pt]); 916 917 if (items.length == 0) { 918 if (stringNode == null) { 919 if (r == null && prevNode.p != null) { 920 topRoot = r = ConsAltNode.listAdd(null, prevNode.p); 921 } 922 923 prevNode.p = stringNode = new StringNode(); // onig_node_new_str(NULL, NULL); 924 925 if (r != null) { 926 ConsAltNode.listAdd(r, stringNode); 927 } 928 929 } 930 931 stringNode.cat(chars1, pt, pt + 1); 932 } else { 933 altNum *= (items.length + 1); 934 if (altNum > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) { 935 break; 936 } 937 938 if (r == null && prevNode.p != null) { 939 topRoot = r = ConsAltNode.listAdd(null, prevNode.p); 940 } 941 942 expandCaseFoldStringAlt(items.length, items, chars1, pt, 1, end, prevNode); 943 if (r != null) { 944 ConsAltNode.listAdd(r, prevNode.p); 945 } 946 stringNode = null; 947 } 948 pt++; 949 } 950 951 if (pt < end) { 952 final Node srem = expandCaseFoldMakeRemString(chars1, pt, end); 953 954 if (prevNode.p != null && r == null) { 955 topRoot = r = ConsAltNode.listAdd(null, prevNode.p); 956 } 957 958 if (r == null) { 959 prevNode.p = srem; 960 } else { 961 ConsAltNode.listAdd(r, srem); 962 } 963 } 964 /* ending */ 965 final Node xnode = topRoot != null ? topRoot : prevNode.p; 966 967 swap(node, xnode); 968 return xnode; 969 } 970 971 private static final int IN_ALT = (1<<0); 972 private static final int IN_NOT = (1<<1); 973 private static final int IN_REPEAT = (1<<2); 974 private static final int IN_VAR_REPEAT = (1<<3); 975 private static final int EXPAND_STRING_MAX_LENGTH = 100; 976 977 /* setup_tree does the following work. 978 1. check empty loop. (set qn->target_empty_info) 979 2. expand ignore-case in char class. 980 3. set memory status bit flags. (reg->mem_stats) 981 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. 982 5. find invalid patterns in look-behind. 983 6. expand repeated string. 984 */ 985 protected final Node setupTree(final Node nodep, final int statep) { 986 Node node = nodep; 987 int state = statep; 988 989 restart: while (true) { 990 switch (node.getType()) { 991 case NodeType.LIST: 992 ConsAltNode lin = (ConsAltNode)node; 993 Node prev = null; 994 do { 995 setupTree(lin.car, state); 996 if (prev != null) { 997 nextSetup(prev, lin.car); 998 } 999 prev = lin.car; 1000 } while ((lin = lin.cdr) != null); 1001 break; 1002 1003 case NodeType.ALT: 1004 ConsAltNode aln = (ConsAltNode)node; 1005 do { 1006 setupTree(aln.car, (state | IN_ALT)); 1007 } while ((aln = aln.cdr) != null); 1008 break; 1009 1010 case NodeType.CCLASS: 1011 break; 1012 1013 case NodeType.STR: 1014 if (isIgnoreCase(regex.options) && !((StringNode)node).isRaw()) { 1015 node = expandCaseFoldString(node); 1016 } 1017 break; 1018 1019 case NodeType.CTYPE: 1020 case NodeType.CANY: 1021 break; 1022 1023 case NodeType.BREF: 1024 final BackRefNode br = (BackRefNode)node; 1025 if (br.backRef > env.numMem) { 1026 throw new ValueException(ERR_INVALID_BACKREF); 1027 } 1028 env.backrefedMem = bsOnAt(env.backrefedMem, br.backRef); 1029 env.btMemStart = bsOnAt(env.btMemStart, br.backRef); 1030 ((EncloseNode)env.memNodes[br.backRef]).setMemBackrefed(); 1031 break; 1032 1033 case NodeType.QTFR: 1034 final QuantifierNode qn = (QuantifierNode)node; 1035 Node target = qn.target; 1036 1037 if ((state & IN_REPEAT) != 0) { 1038 qn.setInRepeat(); 1039 } 1040 1041 if (isRepeatInfinite(qn.upper) || qn.lower >= 1) { 1042 final int d = getMinMatchLength(target); 1043 if (d == 0) { 1044 qn.targetEmptyInfo = TargetInfo.IS_EMPTY; 1045 if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) { 1046 final int info = quantifiersMemoryInfo(target); 1047 if (info > 0) { 1048 qn.targetEmptyInfo = info; 1049 } 1050 } // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK 1051 // strange stuff here (turned off) 1052 } 1053 } 1054 1055 state |= IN_REPEAT; 1056 if (qn.lower != qn.upper) { 1057 state |= IN_VAR_REPEAT; 1058 } 1059 1060 target = setupTree(target, state); 1061 1062 /* expand string */ 1063 if (target.getType() == NodeType.STR) { 1064 if (!isRepeatInfinite(qn.lower) && qn.lower == qn.upper && 1065 qn.lower > 1 && qn.lower <= EXPAND_STRING_MAX_LENGTH) { 1066 final StringNode sn = (StringNode)target; 1067 final int len = sn.length(); 1068 1069 if (len * qn.lower <= EXPAND_STRING_MAX_LENGTH) { 1070 final StringNode str = qn.convertToString(sn.flag); 1071 final int n = qn.lower; 1072 for (int i = 0; i < n; i++) { 1073 str.cat(sn.chars, sn.p, sn.end); 1074 } 1075 break; /* break case NT_QTFR: */ 1076 } 1077 1078 } 1079 } 1080 if (Config.USE_OP_PUSH_OR_JUMP_EXACT) { 1081 if (qn.greedy && qn.targetEmptyInfo != 0) { 1082 if (target.getType() == NodeType.QTFR) { 1083 final QuantifierNode tqn = (QuantifierNode)target; 1084 if (tqn.headExact != null) { 1085 qn.headExact = tqn.headExact; 1086 tqn.headExact = null; 1087 } 1088 } else { 1089 qn.headExact = getHeadValueNode(qn.target, true); 1090 } 1091 } 1092 } // USE_OP_PUSH_OR_JUMP_EXACT 1093 break; 1094 1095 case NodeType.ENCLOSE: 1096 final EncloseNode en = (EncloseNode)node; 1097 switch (en.type) { 1098 case EncloseType.OPTION: 1099 final int options = regex.options; 1100 regex.options = en.option; 1101 setupTree(en.target, state); 1102 regex.options = options; 1103 break; 1104 1105 case EncloseType.MEMORY: 1106 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { 1107 env.btMemStart = bsOnAt(env.btMemStart, en.regNum); 1108 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ 1109 1110 } 1111 setupTree(en.target, state); 1112 break; 1113 1114 case EncloseType.STOP_BACKTRACK: 1115 setupTree(en.target, state); 1116 if (en.target.getType() == NodeType.QTFR) { 1117 final QuantifierNode tqn = (QuantifierNode)en.target; 1118 if (isRepeatInfinite(tqn.upper) && tqn.lower <= 1 && tqn.greedy) { 1119 /* (?>a*), a*+ etc... */ 1120 if (tqn.target.isSimple()) { 1121 en.setStopBtSimpleRepeat(); 1122 } 1123 } 1124 } 1125 break; 1126 1127 default: 1128 break; 1129 1130 } // inner switch 1131 break; 1132 1133 case NodeType.ANCHOR: 1134 final AnchorNode an = (AnchorNode)node; 1135 switch (an.type) { 1136 case AnchorType.PREC_READ: 1137 setupTree(an.target, state); 1138 break; 1139 1140 case AnchorType.PREC_READ_NOT: 1141 setupTree(an.target, (state | IN_NOT)); 1142 break; 1143 1144 case AnchorType.LOOK_BEHIND: 1145 if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) { 1146 throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); 1147 } 1148 node = setupLookBehind(node); 1149 if (node.getType() != NodeType.ANCHOR) { 1150 continue restart; 1151 } 1152 setupTree(((AnchorNode)node).target, state); 1153 break; 1154 1155 case AnchorType.LOOK_BEHIND_NOT: 1156 if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) { 1157 throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); 1158 } 1159 node = setupLookBehind(node); 1160 if (node.getType() != NodeType.ANCHOR) { 1161 continue restart; 1162 } 1163 setupTree(((AnchorNode)node).target, (state | IN_NOT)); 1164 break; 1165 1166 default: 1167 break; 1168 1169 } // inner switch 1170 break; 1171 default: 1172 break; 1173 } // switch 1174 return node; 1175 } // restart: while 1176 } 1177 1178 private static final int MAX_NODE_OPT_INFO_REF_COUNT = 5; 1179 private void optimizeNodeLeft(final Node node, final NodeOptInfo opt, final OptEnvironment oenv) { // oenv remove, pass mmd 1180 opt.clear(); 1181 opt.setBoundNode(oenv.mmd); 1182 1183 switch (node.getType()) { 1184 case NodeType.LIST: { 1185 final OptEnvironment nenv = new OptEnvironment(); 1186 final NodeOptInfo nopt = new NodeOptInfo(); 1187 nenv.copy(oenv); 1188 ConsAltNode lin = (ConsAltNode)node; 1189 do { 1190 optimizeNodeLeft(lin.car, nopt, nenv); 1191 nenv.mmd.add(nopt.length); 1192 opt.concatLeftNode(nopt); 1193 } while ((lin = lin.cdr) != null); 1194 break; 1195 } 1196 1197 case NodeType.ALT: { 1198 final NodeOptInfo nopt = new NodeOptInfo(); 1199 ConsAltNode aln = (ConsAltNode)node; 1200 do { 1201 optimizeNodeLeft(aln.car, nopt, oenv); 1202 if (aln == node) { 1203 opt.copy(nopt); 1204 } else { 1205 opt.altMerge(nopt, oenv); 1206 } 1207 } while ((aln = aln.cdr) != null); 1208 break; 1209 } 1210 1211 case NodeType.STR: { 1212 final StringNode sn = (StringNode)node; 1213 1214 final int slen = sn.length(); 1215 1216 if (!sn.isAmbig()) { 1217 opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw()); 1218 1219 if (slen > 0) { 1220 opt.map.addChar(sn.chars[sn.p]); 1221 } 1222 1223 opt.length.set(slen, slen); 1224 } else { 1225 int max; 1226 if (sn.isDontGetOptInfo()) { 1227 max = sn.length(); 1228 } else { 1229 opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw()); 1230 opt.exb.ignoreCase = true; 1231 1232 if (slen > 0) { 1233 opt.map.addCharAmb(sn.chars, sn.p, sn.end, oenv.caseFoldFlag); 1234 } 1235 1236 max = slen; 1237 } 1238 opt.length.set(slen, max); 1239 } 1240 1241 if (opt.exb.length == slen) { 1242 opt.exb.reachEnd = true; 1243 } 1244 break; 1245 } 1246 1247 case NodeType.CCLASS: { 1248 final CClassNode cc = (CClassNode)node; 1249 /* no need to check ignore case. (setted in setup_tree()) */ 1250 if (cc.mbuf != null || cc.isNot()) { 1251 opt.length.set(1, 1); 1252 } else { 1253 for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) { 1254 final boolean z = cc.bs.at(i); 1255 if ((z && !cc.isNot()) || (!z && cc.isNot())) { 1256 opt.map.addChar(i); 1257 } 1258 } 1259 opt.length.set(1, 1); 1260 } 1261 break; 1262 } 1263 1264 case NodeType.CANY: { 1265 opt.length.set(1, 1); 1266 break; 1267 } 1268 1269 case NodeType.ANCHOR: { 1270 final AnchorNode an = (AnchorNode)node; 1271 switch (an.type) { 1272 case AnchorType.BEGIN_BUF: 1273 case AnchorType.BEGIN_POSITION: 1274 case AnchorType.BEGIN_LINE: 1275 case AnchorType.END_BUF: 1276 case AnchorType.SEMI_END_BUF: 1277 case AnchorType.END_LINE: 1278 opt.anchor.add(an.type); 1279 break; 1280 1281 case AnchorType.PREC_READ: 1282 final NodeOptInfo nopt = new NodeOptInfo(); 1283 optimizeNodeLeft(an.target, nopt, oenv); 1284 if (nopt.exb.length > 0) { 1285 opt.expr.copy(nopt.exb); 1286 } else if (nopt.exm.length > 0) { 1287 opt.expr.copy(nopt.exm); 1288 } 1289 opt.expr.reachEnd = false; 1290 if (nopt.map.value > 0) { 1291 opt.map.copy(nopt.map); 1292 } 1293 break; 1294 1295 case AnchorType.PREC_READ_NOT: 1296 case AnchorType.LOOK_BEHIND: /* Sorry, I can't make use of it. */ 1297 case AnchorType.LOOK_BEHIND_NOT: 1298 break; 1299 1300 default: 1301 break; 1302 1303 } // inner switch 1304 break; 1305 } 1306 1307 case NodeType.BREF: { 1308 final BackRefNode br = (BackRefNode)node; 1309 1310 if (br.isRecursion()) { 1311 opt.length.set(0, MinMaxLen.INFINITE_DISTANCE); 1312 break; 1313 } 1314 1315 final Node[]nodes = oenv.scanEnv.memNodes; 1316 1317 final int min = getMinMatchLength(nodes[br.backRef]); 1318 final int max = getMaxMatchLength(nodes[br.backRef]); 1319 1320 opt.length.set(min, max); 1321 break; 1322 } 1323 1324 1325 case NodeType.QTFR: { 1326 final NodeOptInfo nopt = new NodeOptInfo(); 1327 final QuantifierNode qn = (QuantifierNode)node; 1328 optimizeNodeLeft(qn.target, nopt, oenv); 1329 if (qn.lower == 0 && isRepeatInfinite(qn.upper)) { 1330 if (oenv.mmd.max == 0 && qn.target.getType() == NodeType.CANY && qn.greedy) { 1331 if (isMultiline(oenv.options)) { 1332 opt.anchor.add(AnchorType.ANYCHAR_STAR_ML); 1333 } else { 1334 opt.anchor.add(AnchorType.ANYCHAR_STAR); 1335 } 1336 } 1337 } else { 1338 if (qn.lower > 0) { 1339 opt.copy(nopt); 1340 if (nopt.exb.length > 0) { 1341 if (nopt.exb.reachEnd) { 1342 int i; 1343 for (i = 2; i <= qn.lower && !opt.exb.isFull(); i++) { 1344 opt.exb.concat(nopt.exb); 1345 } 1346 if (i < qn.lower) { 1347 opt.exb.reachEnd = false; 1348 } 1349 } 1350 } 1351 if (qn.lower != qn.upper) { 1352 opt.exb.reachEnd = false; 1353 opt.exm.reachEnd = false; 1354 } 1355 if (qn.lower > 1) { 1356 opt.exm.reachEnd = false; 1357 } 1358 1359 } 1360 } 1361 final int min = MinMaxLen.distanceMultiply(nopt.length.min, qn.lower); 1362 int max; 1363 if (isRepeatInfinite(qn.upper)) { 1364 max = nopt.length.max > 0 ? MinMaxLen.INFINITE_DISTANCE : 0; 1365 } else { 1366 max = MinMaxLen.distanceMultiply(nopt.length.max, qn.upper); 1367 } 1368 opt.length.set(min, max); 1369 break; 1370 } 1371 1372 case NodeType.ENCLOSE: { 1373 final EncloseNode en = (EncloseNode)node; 1374 switch (en.type) { 1375 case EncloseType.OPTION: 1376 final int save = oenv.options; 1377 oenv.options = en.option; 1378 optimizeNodeLeft(en.target, opt, oenv); 1379 oenv.options = save; 1380 break; 1381 1382 case EncloseType.MEMORY: 1383 if (++en.optCount > MAX_NODE_OPT_INFO_REF_COUNT) { 1384 int min = 0; 1385 int max = MinMaxLen.INFINITE_DISTANCE; 1386 if (en.isMinFixed()) { 1387 min = en.minLength; 1388 } 1389 if (en.isMaxFixed()) { 1390 max = en.maxLength; 1391 } 1392 opt.length.set(min, max); 1393 } else { // USE_SUBEXP_CALL 1394 optimizeNodeLeft(en.target, opt, oenv); 1395 if (opt.anchor.isSet(AnchorType.ANYCHAR_STAR_MASK)) { 1396 if (bsAt(oenv.scanEnv.backrefedMem, en.regNum)) { 1397 opt.anchor.remove(AnchorType.ANYCHAR_STAR_MASK); 1398 } 1399 } 1400 } 1401 break; 1402 1403 case EncloseType.STOP_BACKTRACK: 1404 optimizeNodeLeft(en.target, opt, oenv); 1405 break; 1406 1407 default: 1408 break; 1409 } // inner switch 1410 break; 1411 } 1412 1413 default: 1414 throw new InternalException(ERR_PARSER_BUG); 1415 } // switch 1416 } 1417 1418 @SuppressWarnings("unused") 1419 protected final void setOptimizedInfoFromTree(final Node node) { 1420 final NodeOptInfo opt = new NodeOptInfo(); 1421 final OptEnvironment oenv = new OptEnvironment(); 1422 1423 oenv.options = regex.options; 1424 oenv.caseFoldFlag = regex.caseFoldFlag; 1425 oenv.scanEnv = env; 1426 oenv.mmd.clear(); // ?? 1427 1428 optimizeNodeLeft(node, opt, oenv); 1429 1430 regex.anchor = opt.anchor.leftAnchor & (AnchorType.BEGIN_BUF | 1431 AnchorType.BEGIN_POSITION | 1432 AnchorType.ANYCHAR_STAR | 1433 AnchorType.ANYCHAR_STAR_ML); 1434 1435 regex.anchor |= opt.anchor.rightAnchor & (AnchorType.END_BUF | 1436 AnchorType.SEMI_END_BUF); 1437 1438 if ((regex.anchor & (AnchorType.END_BUF | AnchorType.SEMI_END_BUF)) != 0) { 1439 regex.anchorDmin = opt.length.min; 1440 regex.anchorDmax = opt.length.max; 1441 } 1442 1443 if (opt.exb.length > 0 || opt.exm.length > 0) { 1444 opt.exb.select(opt.exm); 1445 if (opt.map.value > 0 && opt.exb.compare(opt.map) > 0) { 1446 // !goto set_map;! 1447 regex.setOptimizeMapInfo(opt.map); 1448 regex.setSubAnchor(opt.map.anchor); 1449 } else { 1450 regex.setExactInfo(opt.exb); 1451 regex.setSubAnchor(opt.exb.anchor); 1452 } 1453 } else if (opt.map.value > 0) { 1454 // !set_map:! 1455 regex.setOptimizeMapInfo(opt.map); 1456 regex.setSubAnchor(opt.map.anchor); 1457 } else { 1458 regex.subAnchor |= opt.anchor.leftAnchor & AnchorType.BEGIN_LINE; 1459 if (opt.length.max == 0) { 1460 regex.subAnchor |= opt.anchor.rightAnchor & AnchorType.END_LINE; 1461 } 1462 } 1463 1464 if (Config.DEBUG_COMPILE || Config.DEBUG_MATCH) { 1465 Config.log.println(regex.optimizeInfoToString()); 1466 } 1467 } 1468} 1469