Analyser.java revision 1088:7e62d98d4625
1/*
2 * Permission is hereby granted, free of charge, to any person obtaining a copy of
3 * this software and associated documentation files (the "Software"), to deal in
4 * the Software without restriction, including without limitation the rights to
5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6 * of the Software, and to permit persons to whom the Software is furnished to do
7 * so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice shall be included in all
10 * copies or substantial portions of the Software.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18 * SOFTWARE.
19 */
20package jdk.nashorn.internal.runtime.regexp.joni;
21
22import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAll;
23import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt;
24import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnAt;
25import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindCondition;
26import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase;
27import static jdk.nashorn.internal.runtime.regexp.joni.Option.isMultiline;
28import static jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode.newAltNode;
29import static jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode.isRepeatInfinite;
30import java.util.HashSet;
31import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode;
32import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode;
33import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
34import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode;
35import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode;
36import jdk.nashorn.internal.runtime.regexp.joni.ast.Node;
37import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode;
38import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode;
39import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
40import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType;
41import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
42import jdk.nashorn.internal.runtime.regexp.joni.constants.StackPopLevel;
43import jdk.nashorn.internal.runtime.regexp.joni.constants.TargetInfo;
44import jdk.nashorn.internal.runtime.regexp.joni.encoding.ObjPtr;
45import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
46import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
47import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
48
49final class Analyser extends Parser {
50
51    protected Analyser(final ScanEnvironment env, final char[] chars, final int p, final int end) {
52        super(env, chars, p, end);
53    }
54
55    @SuppressWarnings("unused")
56    protected final void compile() {
57        if (Config.DEBUG) {
58            Config.log.println(new String(chars, getBegin(), getEnd()));
59        }
60
61        reset();
62
63        regex.numMem = 0;
64        regex.numRepeat = 0;
65        regex.numNullCheck = 0;
66        //regex.repeatRangeAlloc = 0;
67        regex.repeatRangeLo = null;
68        regex.repeatRangeHi = null;
69
70        parse();
71
72        if (Config.DEBUG_PARSE_TREE_RAW && Config.DEBUG_PARSE_TREE) {
73            Config.log.println("<RAW TREE>");
74            Config.log.println(root + "\n");
75        }
76
77        root = setupTree(root, 0);
78        if (Config.DEBUG_PARSE_TREE) {
79            if (Config.DEBUG_PARSE_TREE_RAW) {
80                Config.log.println("<TREE>");
81            }
82            root.verifyTree(new HashSet<Node>(), env.reg.warnings);
83            Config.log.println(root + "\n");
84        }
85
86        regex.captureHistory = env.captureHistory;
87        regex.btMemStart = env.btMemStart;
88        regex.btMemEnd = env.btMemEnd;
89
90        if (isFindCondition(regex.options)) {
91            regex.btMemEnd = bsAll();
92        } else {
93            regex.btMemEnd = env.btMemEnd;
94            regex.btMemEnd |= regex.captureHistory;
95        }
96
97        regex.clearOptimizeInfo();
98
99        if (!Config.DONT_OPTIMIZE) {
100            setOptimizedInfoFromTree(root);
101        }
102
103        env.memNodes = null;
104
105        if (regex.numRepeat != 0 || regex.btMemEnd != 0) {
106            regex.stackPopLevel = StackPopLevel.ALL;
107        } else {
108            if (regex.btMemStart != 0) {
109                regex.stackPopLevel = StackPopLevel.MEM_START;
110            } else {
111                regex.stackPopLevel = StackPopLevel.FREE;
112            }
113        }
114
115        if (Config.DEBUG_COMPILE) {
116            Config.log.println("stack used: " + regex.stackNeeded);
117            if (Config.USE_STRING_TEMPLATES) {
118                Config.log.print("templates: " + regex.templateNum + "\n");
119            }
120            Config.log.println(new ByteCodePrinter(regex).byteCodeListToString());
121
122        } // DEBUG_COMPILE
123    }
124
125    private void swap(final Node a, final Node b) {
126        a.swap(b);
127
128        if (root == b) {
129            root = a;
130        } else if (root == a) {
131            root = b;
132        }
133    }
134
135    // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
136    private int quantifiersMemoryInfo(final Node node) {
137        int info = 0;
138
139        switch(node.getType()) {
140        case NodeType.LIST:
141        case NodeType.ALT:
142            ConsAltNode can = (ConsAltNode)node;
143            do {
144                final int v = quantifiersMemoryInfo(can.car);
145                if (v > info) {
146                    info = v;
147                }
148            } while ((can = can.cdr) != null);
149            break;
150
151        case NodeType.QTFR:
152            final QuantifierNode qn = (QuantifierNode)node;
153            if (qn.upper != 0) {
154                info = quantifiersMemoryInfo(qn.target);
155            }
156            break;
157
158        case NodeType.ENCLOSE:
159            final EncloseNode en = (EncloseNode)node;
160            switch (en.type) {
161            case EncloseType.MEMORY:
162                return TargetInfo.IS_EMPTY_MEM;
163
164            case EncloseType.OPTION:
165            case EncloseNode.STOP_BACKTRACK:
166                info = quantifiersMemoryInfo(en.target);
167                break;
168
169            default:
170                break;
171            } // inner switch
172            break;
173
174        case NodeType.BREF:
175        case NodeType.STR:
176        case NodeType.CTYPE:
177        case NodeType.CCLASS:
178        case NodeType.CANY:
179        case NodeType.ANCHOR:
180        default:
181            break;
182        } // switch
183
184        return info;
185    }
186
187    private int getMinMatchLength(final Node node) {
188        int min = 0;
189
190        switch (node.getType()) {
191        case NodeType.BREF:
192            final BackRefNode br = (BackRefNode)node;
193            if (br.isRecursion()) {
194                break;
195            }
196
197            if (br.backRef > env.numMem) {
198                throw new ValueException(ERR_INVALID_BACKREF);
199            }
200            min = getMinMatchLength(env.memNodes[br.backRef]);
201
202            break;
203
204        case NodeType.LIST:
205            ConsAltNode can = (ConsAltNode)node;
206            do {
207                min += getMinMatchLength(can.car);
208            } while ((can = can.cdr) != null);
209            break;
210
211        case NodeType.ALT:
212            ConsAltNode y = (ConsAltNode)node;
213            do {
214                final Node x = y.car;
215                final int tmin = getMinMatchLength(x);
216                if (y == node) {
217                    min = tmin;
218                } else if (min > tmin) {
219                    min = tmin;
220                }
221            } while ((y = y.cdr) != null);
222            break;
223
224        case NodeType.STR:
225            min = ((StringNode)node).length();
226            break;
227
228        case NodeType.CTYPE:
229            min = 1;
230            break;
231
232        case NodeType.CCLASS:
233        case NodeType.CANY:
234            min = 1;
235            break;
236
237        case NodeType.QTFR:
238            final QuantifierNode qn = (QuantifierNode)node;
239            if (qn.lower > 0) {
240                min = getMinMatchLength(qn.target);
241                min = MinMaxLen.distanceMultiply(min, qn.lower);
242            }
243            break;
244
245        case NodeType.ENCLOSE:
246            final EncloseNode en = (EncloseNode)node;
247            switch (en.type) {
248            case EncloseType.MEMORY:
249                if (en.isMinFixed()) {
250                    min = en.minLength;
251                } else {
252                    min = getMinMatchLength(en.target);
253                    en.minLength = min;
254                    en.setMinFixed();
255                }
256                break;
257
258            case EncloseType.OPTION:
259            case EncloseType.STOP_BACKTRACK:
260                min = getMinMatchLength(en.target);
261                break;
262
263            default:
264                break;
265            } // inner switch
266            break;
267
268        case NodeType.ANCHOR:
269        default:
270            break;
271        } // switch
272
273        return min;
274    }
275
276    private int getMaxMatchLength(final Node node) {
277        int max = 0;
278
279        switch (node.getType()) {
280        case NodeType.LIST:
281            ConsAltNode ln = (ConsAltNode)node;
282            do {
283                final int tmax = getMaxMatchLength(ln.car);
284                max = MinMaxLen.distanceAdd(max, tmax);
285            } while ((ln = ln.cdr) != null);
286            break;
287
288        case NodeType.ALT:
289            ConsAltNode an = (ConsAltNode)node;
290            do {
291                final int tmax = getMaxMatchLength(an.car);
292                if (max < tmax) {
293                    max = tmax;
294                }
295            } while ((an = an.cdr) != null);
296            break;
297
298        case NodeType.STR:
299            max = ((StringNode)node).length();
300            break;
301
302        case NodeType.CTYPE:
303            max = 1;
304            break;
305
306        case NodeType.CCLASS:
307        case NodeType.CANY:
308            max = 1;
309            break;
310
311        case NodeType.BREF:
312            final BackRefNode br = (BackRefNode)node;
313            if (br.isRecursion()) {
314                max = MinMaxLen.INFINITE_DISTANCE;
315                break;
316            }
317
318            if (br.backRef > env.numMem) {
319                throw new ValueException(ERR_INVALID_BACKREF);
320            }
321            final int tmax = getMaxMatchLength(env.memNodes[br.backRef]);
322            if (max < tmax) {
323                max = tmax;
324            }
325            break;
326
327        case NodeType.QTFR:
328            final QuantifierNode qn = (QuantifierNode)node;
329            if (qn.upper != 0) {
330                max = getMaxMatchLength(qn.target);
331                if (max != 0) {
332                    if (!isRepeatInfinite(qn.upper)) {
333                        max = MinMaxLen.distanceMultiply(max, qn.upper);
334                    } else {
335                        max = MinMaxLen.INFINITE_DISTANCE;
336                    }
337                }
338            }
339            break;
340
341        case NodeType.ENCLOSE:
342            final EncloseNode en = (EncloseNode)node;
343            switch (en.type) {
344            case EncloseType.MEMORY:
345                if (en.isMaxFixed()) {
346                    max = en.maxLength;
347                } else {
348                    max = getMaxMatchLength(en.target);
349                    en.maxLength = max;
350                    en.setMaxFixed();
351                }
352                break;
353
354            case EncloseType.OPTION:
355            case EncloseType.STOP_BACKTRACK:
356                max = getMaxMatchLength(en.target);
357                break;
358
359            default:
360                break;
361            } // inner switch
362            break;
363
364        case NodeType.ANCHOR:
365        default:
366            break;
367        } // switch
368
369        return max;
370    }
371
372    private static final int GET_CHAR_LEN_VARLEN            = -1;
373    private static final int GET_CHAR_LEN_TOP_ALT_VARLEN    = -2;
374    protected final int getCharLengthTree(final Node node) {
375        return getCharLengthTree(node, 0);
376    }
377
378    private int getCharLengthTree(final Node node, final int levelp) {
379        final int level = levelp + 1;
380
381        int len = 0;
382        returnCode = 0;
383
384        switch(node.getType()) {
385        case NodeType.LIST:
386            ConsAltNode ln = (ConsAltNode)node;
387            do {
388                final int tlen = getCharLengthTree(ln.car, level);
389                if (returnCode == 0) {
390                    len = MinMaxLen.distanceAdd(len, tlen);
391                }
392            } while (returnCode == 0 && (ln = ln.cdr) != null);
393            break;
394
395        case NodeType.ALT:
396            ConsAltNode an = (ConsAltNode)node;
397            boolean varLen = false;
398
399            int tlen = getCharLengthTree(an.car, level);
400            while (returnCode == 0 && (an = an.cdr) != null) {
401                final int tlen2 = getCharLengthTree(an.car, level);
402                if (returnCode == 0) {
403                    if (tlen != tlen2) {
404                        varLen = true;
405                    }
406                }
407            }
408
409            if (returnCode == 0) {
410                if (varLen) {
411                    if (level == 1) {
412                        returnCode = GET_CHAR_LEN_TOP_ALT_VARLEN;
413                    } else {
414                        returnCode = GET_CHAR_LEN_VARLEN;
415                    }
416                } else {
417                    len = tlen;
418                }
419            }
420            break;
421
422        case NodeType.STR:
423            final StringNode sn = (StringNode)node;
424            len = sn.length();
425            break;
426
427        case NodeType.QTFR:
428            final QuantifierNode qn = (QuantifierNode)node;
429            if (qn.lower == qn.upper) {
430                tlen = getCharLengthTree(qn.target, level);
431                if (returnCode == 0) {
432                    len = MinMaxLen.distanceMultiply(tlen, qn.lower);
433                }
434            } else {
435                returnCode = GET_CHAR_LEN_VARLEN;
436            }
437            break;
438
439        case NodeType.CTYPE:
440        case NodeType.CCLASS:
441        case NodeType.CANY:
442            len = 1;
443            break;
444
445        case NodeType.ENCLOSE:
446            final EncloseNode en = (EncloseNode)node;
447            switch(en.type) {
448            case EncloseType.MEMORY:
449                if (en.isCLenFixed()) {
450                    len = en.charLength;
451                } else {
452                    len = getCharLengthTree(en.target, level);
453                    if (returnCode == 0) {
454                        en.charLength = len;
455                        en.setCLenFixed();
456                    }
457                }
458                break;
459
460            case EncloseType.OPTION:
461            case EncloseType.STOP_BACKTRACK:
462                len = getCharLengthTree(en.target, level);
463                break;
464
465            default:
466                break;
467            } // inner switch
468            break;
469
470        case NodeType.ANCHOR:
471            break;
472
473        default:
474            returnCode = GET_CHAR_LEN_VARLEN;
475        } // switch
476        return len;
477    }
478
479    /* x is not included y ==>  1 : 0 */
480    private static boolean isNotIncluded(final Node xn, final Node yn) {
481        Node x = xn;
482        Node y = yn;
483        Node tmp;
484
485        // !retry:!
486        retry: while(true) {
487
488        final int yType = y.getType();
489
490        switch(x.getType()) {
491        case NodeType.CTYPE:
492            switch(yType) {
493
494            case NodeType.CCLASS:
495                // !swap:!
496                tmp = x;
497                x = y;
498                y = tmp;
499                // !goto retry;!
500                continue retry;
501
502            case NodeType.STR:
503                // !goto swap;!
504                tmp = x;
505                x = y;
506                y = tmp;
507                continue retry;
508
509            default:
510                break;
511            } // inner switch
512            break;
513
514        case NodeType.CCLASS:
515            final CClassNode xc = (CClassNode)x;
516
517            switch(yType) {
518
519            case NodeType.CCLASS:
520                final CClassNode yc = (CClassNode)y;
521
522                for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
523                    boolean v = xc.bs.at(i);
524                    if ((v && !xc.isNot()) || (!v && xc.isNot())) {
525                        v = yc.bs.at(i);
526                        if ((v && !yc.isNot()) || (!v && yc.isNot())) {
527                            return false;
528                        }
529                    }
530                }
531                if ((xc.mbuf == null && !xc.isNot()) || yc.mbuf == null && !yc.isNot()) {
532                    return true;
533                }
534                return false;
535                // break; not reached
536
537            case NodeType.STR:
538                // !goto swap;!
539                tmp = x;
540                x = y;
541                y = tmp;
542                continue retry;
543
544            default:
545                break;
546
547            } // inner switch
548            break; // case NodeType.CCLASS
549
550        case NodeType.STR:
551            final StringNode xs = (StringNode)x;
552            if (xs.length() == 0) {
553                break;
554            }
555
556            switch (yType) {
557
558            case NodeType.CCLASS:
559                final CClassNode cc = (CClassNode)y;
560                final int code = xs.chars[xs.p];
561                return !cc.isCodeInCC(code);
562
563            case NodeType.STR:
564                final StringNode ys = (StringNode)y;
565                int len = xs.length();
566                if (len > ys.length()) {
567                    len = ys.length();
568                }
569                if (xs.isAmbig() || ys.isAmbig()) {
570                    /* tiny version */
571                    return false;
572                }
573                for (int i=0, pt=ys.p, q=xs.p; i<len; i++, pt++, q++) {
574                    if (ys.chars[pt] != xs.chars[q]) {
575                        return true;
576                    }
577                }
578                break;
579
580            default:
581                break;
582            } // inner switch
583
584            break; // case NodeType.STR
585        default:
586            break;
587
588        } // switch
589
590        break;
591        } // retry: while
592        return false;
593    }
594
595    private Node getHeadValueNode(final Node node, final boolean exact) {
596        Node n = null;
597
598        switch(node.getType()) {
599        case NodeType.BREF:
600        case NodeType.ALT:
601        case NodeType.CANY:
602            break;
603
604        case NodeType.CTYPE:
605        case NodeType.CCLASS:
606            if (!exact) {
607                n = node;
608            }
609            break;
610
611        case NodeType.LIST:
612            n = getHeadValueNode(((ConsAltNode)node).car, exact);
613            break;
614
615        case NodeType.STR:
616            final StringNode sn = (StringNode)node;
617            if (sn.end <= sn.p)
618             {
619                break; // ???
620            }
621
622            if (exact && !sn.isRaw() && isIgnoreCase(regex.options)){
623                // nothing
624            } else {
625                n = node;
626            }
627            break;
628
629        case NodeType.QTFR:
630            final QuantifierNode qn = (QuantifierNode)node;
631            if (qn.lower > 0) {
632                if (qn.headExact != null) {
633                    n = qn.headExact;
634                } else {
635                    n = getHeadValueNode(qn.target, exact);
636                }
637            }
638            break;
639
640        case NodeType.ENCLOSE:
641            final EncloseNode en = (EncloseNode)node;
642
643            switch (en.type) {
644            case EncloseType.OPTION:
645                final int options = regex.options;
646                regex.options = en.option;
647                n = getHeadValueNode(en.target, exact);
648                regex.options = options;
649                break;
650
651            case EncloseType.MEMORY:
652            case EncloseType.STOP_BACKTRACK:
653                n = getHeadValueNode(en.target, exact);
654                break;
655
656            default:
657                break;
658            } // inner switch
659            break;
660
661        case NodeType.ANCHOR:
662            final AnchorNode an = (AnchorNode)node;
663            if (an.type == AnchorType.PREC_READ) {
664                n = getHeadValueNode(an.target, exact);
665            }
666            break;
667
668        default:
669            break;
670        } // switch
671
672        return n;
673    }
674
675    // true: invalid
676    private boolean checkTypeTree(final Node node, final int typeMask, final int encloseMask, final int anchorMask) {
677        if ((node.getType2Bit() & typeMask) == 0) {
678            return true;
679        }
680
681        boolean invalid = false;
682
683        switch(node.getType()) {
684        case NodeType.LIST:
685        case NodeType.ALT:
686            ConsAltNode can = (ConsAltNode)node;
687            do {
688                invalid = checkTypeTree(can.car, typeMask, encloseMask, anchorMask);
689            } while (!invalid && (can = can.cdr) != null);
690            break;
691
692        case NodeType.QTFR:
693            invalid = checkTypeTree(((QuantifierNode)node).target, typeMask, encloseMask, anchorMask);
694            break;
695
696        case NodeType.ENCLOSE:
697            final EncloseNode en = (EncloseNode)node;
698            if ((en.type & encloseMask) == 0) {
699                return true;
700            }
701            invalid = checkTypeTree(en.target, typeMask, encloseMask, anchorMask);
702            break;
703
704        case NodeType.ANCHOR:
705            final AnchorNode an = (AnchorNode)node;
706            if ((an.type & anchorMask) == 0) {
707                return true;
708            }
709
710            if (an.target != null) {
711                invalid = checkTypeTree(an.target, typeMask, encloseMask, anchorMask);
712            }
713            break;
714
715        default:
716            break;
717
718        } // switch
719
720        return invalid;
721    }
722
723    /* divide different length alternatives in look-behind.
724    (?<=A|B) ==> (?<=A)|(?<=B)
725    (?<!A|B) ==> (?<!A)(?<!B)
726     */
727    private Node divideLookBehindAlternatives(final Node nodep) {
728        Node node = nodep;
729        final AnchorNode an = (AnchorNode)node;
730        final int anchorType = an.type;
731        Node head = an.target;
732        Node np = ((ConsAltNode)head).car;
733
734        swap(node, head);
735
736        final Node tmp = node;
737        node = head;
738        head = tmp;
739
740        ((ConsAltNode)node).setCar(head);
741        ((AnchorNode)head).setTarget(np);
742        np = node;
743
744        while ((np = ((ConsAltNode)np).cdr) != null) {
745            final AnchorNode insert = new AnchorNode(anchorType);
746            insert.setTarget(((ConsAltNode)np).car);
747            ((ConsAltNode)np).setCar(insert);
748        }
749
750        if (anchorType == AnchorType.LOOK_BEHIND_NOT) {
751            np = node;
752            do {
753                ((ConsAltNode)np).toListNode(); /* alt -> list */
754            } while ((np = ((ConsAltNode)np).cdr) != null);
755        }
756
757        return node;
758    }
759
760    private Node setupLookBehind(final Node node) {
761        final AnchorNode an = (AnchorNode)node;
762        final int len = getCharLengthTree(an.target);
763        switch (returnCode) {
764        case 0:
765            an.charLength = len;
766            break;
767        case GET_CHAR_LEN_VARLEN:
768            throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
769        case GET_CHAR_LEN_TOP_ALT_VARLEN:
770            if (syntax.differentLengthAltLookBehind()) {
771                return divideLookBehindAlternatives(node);
772            }
773            throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
774        default:
775            break;
776        }
777        return node;
778    }
779
780    private void nextSetup(final Node nodep, final Node nextNode) {
781        Node node = nodep;
782
783        // retry:
784        retry: while(true) {
785
786        final int type = node.getType();
787        if (type == NodeType.QTFR) {
788            final QuantifierNode qn = (QuantifierNode)node;
789            if (qn.greedy && isRepeatInfinite(qn.upper)) {
790                if (Config.USE_QTFR_PEEK_NEXT) {
791                    final StringNode n = (StringNode)getHeadValueNode(nextNode, true);
792                    /* '\0': for UTF-16BE etc... */
793                    if (n != null && n.chars[n.p] != 0) { // ?????????
794                        qn.nextHeadExact = n;
795                    }
796                } // USE_QTFR_PEEK_NEXT
797                /* automatic posseivation a*b ==> (?>a*)b */
798                if (qn.lower <= 1) {
799                    if (qn.target.isSimple()) {
800                        final Node x = getHeadValueNode(qn.target, false);
801                        if (x != null) {
802                            final Node y = getHeadValueNode(nextNode, false);
803                            if (y != null && isNotIncluded(x, y)) {
804                                final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); //onig_node_new_enclose
805                                en.setStopBtSimpleRepeat();
806                                //en.setTarget(qn.target); // optimize it ??
807                                swap(node, en);
808
809                                en.setTarget(node);
810                            }
811                        }
812                    }
813                }
814            }
815        } else if (type == NodeType.ENCLOSE) {
816            final EncloseNode en = (EncloseNode)node;
817            if (en.isMemory()) {
818                node = en.target;
819                // !goto retry;!
820                continue retry;
821            }
822        }
823
824        break;
825        } // while
826    }
827
828    private void updateStringNodeCaseFoldMultiByte(final StringNode sn) {
829        final char[] ch = sn.chars;
830        final int end = sn.end;
831        value = sn.p;
832        int sp = 0;
833        char buf;
834
835        while (value < end) {
836            final int ovalue = value;
837            buf = EncodingHelper.toLowerCase(ch[value++]);
838
839            if (ch[ovalue] != buf) {
840
841                char[] sbuf = new char[sn.length() << 1];
842                System.arraycopy(ch, sn.p, sbuf, 0, ovalue - sn.p);
843                value = ovalue;
844                while (value < end) {
845                    buf = EncodingHelper.toLowerCase(ch[value++]);
846                    if (sp >= sbuf.length) {
847                        final char[]tmp = new char[sbuf.length << 1];
848                        System.arraycopy(sbuf, 0, tmp, 0, sbuf.length);
849                        sbuf = tmp;
850                    }
851                    sbuf[sp++] = buf;
852                }
853                sn.set(sbuf, 0, sp);
854                return;
855            }
856            sp++;
857        }
858    }
859
860    private void updateStringNodeCaseFold(final Node node) {
861        final StringNode sn = (StringNode)node;
862        updateStringNodeCaseFoldMultiByte(sn);
863    }
864
865    private Node expandCaseFoldMakeRemString(final char[] ch, final int pp, final int end) {
866        final StringNode node = new StringNode(ch, pp, end);
867
868        updateStringNodeCaseFold(node);
869        node.setAmbig();
870        node.setDontGetOptInfo();
871        return node;
872    }
873
874    private static boolean expandCaseFoldStringAlt(final int itemNum, final char[] items,
875                                              final char[] chars, final int p, final int slen, final int end, final ObjPtr<Node> node) {
876
877        ConsAltNode altNode;
878        node.p = altNode = newAltNode(null, null);
879
880        StringNode snode = new StringNode(chars, p, p + slen);
881        altNode.setCar(snode);
882
883        for (int i=0; i<itemNum; i++) {
884            snode = new StringNode();
885
886            snode.catCode(items[i]);
887
888            final ConsAltNode an = newAltNode(null, null);
889            an.setCar(snode);
890            altNode.setCdr(an);
891            altNode = an;
892        }
893        return false;
894    }
895
896    private static final int THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION = 8;
897    private Node expandCaseFoldString(final Node node) {
898        final StringNode sn = (StringNode)node;
899
900        if (sn.isAmbig() || sn.length() <= 0) {
901            return node;
902        }
903
904        final char[] chars1 = sn.chars;
905        int pt = sn.p;
906        final int end = sn.end;
907        int altNum = 1;
908
909        ConsAltNode topRoot = null, r = null;
910        @SuppressWarnings("unused")
911        final ObjPtr<Node> prevNode = new ObjPtr<Node>();
912        StringNode stringNode = null;
913
914        while (pt < end) {
915            final char[] items = EncodingHelper.caseFoldCodesByString(regex.caseFoldFlag, chars1[pt]);
916
917            if (items.length == 0) {
918                if (stringNode == null) {
919                    if (r == null && prevNode.p != null) {
920                        topRoot = r = ConsAltNode.listAdd(null, prevNode.p);
921                    }
922
923                    prevNode.p = stringNode = new StringNode(); // onig_node_new_str(NULL, NULL);
924
925                    if (r != null) {
926                        ConsAltNode.listAdd(r, stringNode);
927                    }
928
929                }
930
931                stringNode.cat(chars1, pt, pt + 1);
932            } else {
933                altNum *= (items.length + 1);
934                if (altNum > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) {
935                    break;
936                }
937
938                if (r == null && prevNode.p != null) {
939                    topRoot = r = ConsAltNode.listAdd(null, prevNode.p);
940                }
941
942                expandCaseFoldStringAlt(items.length, items, chars1, pt, 1, end, prevNode);
943                if (r != null) {
944                    ConsAltNode.listAdd(r, prevNode.p);
945                }
946                stringNode = null;
947            }
948            pt++;
949        }
950
951        if (pt < end) {
952            final Node srem = expandCaseFoldMakeRemString(chars1, pt, end);
953
954            if (prevNode.p != null && r == null) {
955                topRoot = r = ConsAltNode.listAdd(null, prevNode.p);
956            }
957
958            if (r == null) {
959                prevNode.p = srem;
960            } else {
961                ConsAltNode.listAdd(r, srem);
962            }
963        }
964        /* ending */
965        final Node xnode = topRoot != null ? topRoot : prevNode.p;
966
967        swap(node, xnode);
968        return xnode;
969    }
970
971    private static final int IN_ALT                     = (1<<0);
972    private static final int IN_NOT                     = (1<<1);
973    private static final int IN_REPEAT                  = (1<<2);
974    private static final int IN_VAR_REPEAT              = (1<<3);
975    private static final int EXPAND_STRING_MAX_LENGTH   = 100;
976
977    /* setup_tree does the following work.
978    1. check empty loop. (set qn->target_empty_info)
979    2. expand ignore-case in char class.
980    3. set memory status bit flags. (reg->mem_stats)
981    4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
982    5. find invalid patterns in look-behind.
983    6. expand repeated string.
984    */
985    protected final Node setupTree(final Node nodep, final int statep) {
986        Node node = nodep;
987        int state = statep;
988
989        restart: while (true) {
990        switch (node.getType()) {
991        case NodeType.LIST:
992            ConsAltNode lin = (ConsAltNode)node;
993            Node prev = null;
994            do {
995                setupTree(lin.car, state);
996                if (prev != null) {
997                    nextSetup(prev, lin.car);
998                }
999                prev = lin.car;
1000            } while ((lin = lin.cdr) != null);
1001            break;
1002
1003        case NodeType.ALT:
1004            ConsAltNode aln = (ConsAltNode)node;
1005            do {
1006                setupTree(aln.car, (state | IN_ALT));
1007            } while ((aln = aln.cdr) != null);
1008            break;
1009
1010        case NodeType.CCLASS:
1011            break;
1012
1013        case NodeType.STR:
1014            if (isIgnoreCase(regex.options) && !((StringNode)node).isRaw()) {
1015                node = expandCaseFoldString(node);
1016            }
1017            break;
1018
1019        case NodeType.CTYPE:
1020        case NodeType.CANY:
1021            break;
1022
1023        case NodeType.BREF:
1024            final BackRefNode br = (BackRefNode)node;
1025            if (br.backRef > env.numMem) {
1026                throw new ValueException(ERR_INVALID_BACKREF);
1027            }
1028            env.backrefedMem = bsOnAt(env.backrefedMem, br.backRef);
1029            env.btMemStart = bsOnAt(env.btMemStart, br.backRef);
1030            ((EncloseNode)env.memNodes[br.backRef]).setMemBackrefed();
1031            break;
1032
1033        case NodeType.QTFR:
1034            final QuantifierNode qn = (QuantifierNode)node;
1035            Node target = qn.target;
1036
1037            if ((state & IN_REPEAT) != 0) {
1038                qn.setInRepeat();
1039            }
1040
1041            if (isRepeatInfinite(qn.upper) || qn.lower >= 1) {
1042                final int d = getMinMatchLength(target);
1043                if (d == 0) {
1044                    qn.targetEmptyInfo = TargetInfo.IS_EMPTY;
1045                    if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) {
1046                        final int info = quantifiersMemoryInfo(target);
1047                        if (info > 0) {
1048                            qn.targetEmptyInfo = info;
1049                        }
1050                    } // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
1051                    // strange stuff here (turned off)
1052                }
1053            }
1054
1055            state |= IN_REPEAT;
1056            if (qn.lower != qn.upper) {
1057                state |= IN_VAR_REPEAT;
1058            }
1059
1060            target = setupTree(target, state);
1061
1062            /* expand string */
1063            if (target.getType() == NodeType.STR) {
1064                if (!isRepeatInfinite(qn.lower) && qn.lower == qn.upper &&
1065                    qn.lower > 1 && qn.lower <= EXPAND_STRING_MAX_LENGTH) {
1066                    final StringNode sn = (StringNode)target;
1067                    final int len = sn.length();
1068
1069                    if (len * qn.lower <= EXPAND_STRING_MAX_LENGTH) {
1070                        final StringNode str = qn.convertToString(sn.flag);
1071                        final int n = qn.lower;
1072                        for (int i = 0; i < n; i++) {
1073                            str.cat(sn.chars, sn.p, sn.end);
1074                        }
1075                        break; /* break case NT_QTFR: */
1076                    }
1077
1078                }
1079            }
1080            if (Config.USE_OP_PUSH_OR_JUMP_EXACT) {
1081                if (qn.greedy && qn.targetEmptyInfo != 0) {
1082                    if (target.getType() == NodeType.QTFR) {
1083                        final QuantifierNode tqn = (QuantifierNode)target;
1084                        if (tqn.headExact != null) {
1085                            qn.headExact = tqn.headExact;
1086                            tqn.headExact = null;
1087                        }
1088                    } else {
1089                        qn.headExact = getHeadValueNode(qn.target, true);
1090                    }
1091                }
1092            } // USE_OP_PUSH_OR_JUMP_EXACT
1093            break;
1094
1095        case NodeType.ENCLOSE:
1096            final EncloseNode en = (EncloseNode)node;
1097            switch (en.type) {
1098            case EncloseType.OPTION:
1099                final int options = regex.options;
1100                regex.options = en.option;
1101                setupTree(en.target, state);
1102                regex.options = options;
1103                break;
1104
1105            case EncloseType.MEMORY:
1106                if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
1107                    env.btMemStart = bsOnAt(env.btMemStart, en.regNum);
1108                    /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
1109
1110                }
1111                setupTree(en.target, state);
1112                break;
1113
1114            case EncloseType.STOP_BACKTRACK:
1115                setupTree(en.target, state);
1116                if (en.target.getType() == NodeType.QTFR) {
1117                    final QuantifierNode tqn = (QuantifierNode)en.target;
1118                    if (isRepeatInfinite(tqn.upper) && tqn.lower <= 1 && tqn.greedy) {
1119                        /* (?>a*), a*+ etc... */
1120                        if (tqn.target.isSimple()) {
1121                            en.setStopBtSimpleRepeat();
1122                        }
1123                    }
1124                }
1125                break;
1126
1127            default:
1128                break;
1129
1130            } // inner switch
1131            break;
1132
1133        case NodeType.ANCHOR:
1134            final AnchorNode an = (AnchorNode)node;
1135            switch (an.type) {
1136            case AnchorType.PREC_READ:
1137                setupTree(an.target, state);
1138                break;
1139
1140            case AnchorType.PREC_READ_NOT:
1141                setupTree(an.target, (state | IN_NOT));
1142                break;
1143
1144            case AnchorType.LOOK_BEHIND:
1145                if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) {
1146                    throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
1147                }
1148                node = setupLookBehind(node);
1149                if (node.getType() != NodeType.ANCHOR) {
1150                    continue restart;
1151                }
1152                setupTree(((AnchorNode)node).target, state);
1153                break;
1154
1155            case AnchorType.LOOK_BEHIND_NOT:
1156                if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) {
1157                    throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
1158                }
1159                node = setupLookBehind(node);
1160                if (node.getType() != NodeType.ANCHOR) {
1161                    continue restart;
1162                }
1163                setupTree(((AnchorNode)node).target, (state | IN_NOT));
1164                break;
1165
1166            default:
1167                break;
1168
1169            } // inner switch
1170            break;
1171        default:
1172            break;
1173        } // switch
1174        return node;
1175        } // restart: while
1176    }
1177
1178    private static final int MAX_NODE_OPT_INFO_REF_COUNT   = 5;
1179    private void optimizeNodeLeft(final Node node, final NodeOptInfo opt, final OptEnvironment oenv) { // oenv remove, pass mmd
1180        opt.clear();
1181        opt.setBoundNode(oenv.mmd);
1182
1183        switch (node.getType()) {
1184        case NodeType.LIST: {
1185            final OptEnvironment nenv = new OptEnvironment();
1186            final NodeOptInfo nopt = new NodeOptInfo();
1187            nenv.copy(oenv);
1188            ConsAltNode lin = (ConsAltNode)node;
1189            do {
1190                optimizeNodeLeft(lin.car, nopt, nenv);
1191                nenv.mmd.add(nopt.length);
1192                opt.concatLeftNode(nopt);
1193            } while ((lin = lin.cdr) != null);
1194            break;
1195        }
1196
1197        case NodeType.ALT: {
1198            final NodeOptInfo nopt = new NodeOptInfo();
1199            ConsAltNode aln = (ConsAltNode)node;
1200            do {
1201                optimizeNodeLeft(aln.car, nopt, oenv);
1202                if (aln == node) {
1203                    opt.copy(nopt);
1204                } else {
1205                    opt.altMerge(nopt, oenv);
1206                }
1207            } while ((aln = aln.cdr) != null);
1208            break;
1209        }
1210
1211        case NodeType.STR: {
1212            final StringNode sn = (StringNode)node;
1213
1214            final int slen = sn.length();
1215
1216            if (!sn.isAmbig()) {
1217                opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw());
1218
1219                if (slen > 0) {
1220                    opt.map.addChar(sn.chars[sn.p]);
1221                }
1222
1223                opt.length.set(slen, slen);
1224            } else {
1225                int max;
1226                if (sn.isDontGetOptInfo()) {
1227                    max = sn.length();
1228                } else {
1229                    opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw());
1230                    opt.exb.ignoreCase = true;
1231
1232                    if (slen > 0) {
1233                        opt.map.addCharAmb(sn.chars, sn.p, sn.end, oenv.caseFoldFlag);
1234                    }
1235
1236                    max = slen;
1237                }
1238                opt.length.set(slen, max);
1239            }
1240
1241            if (opt.exb.length == slen) {
1242                opt.exb.reachEnd = true;
1243            }
1244            break;
1245        }
1246
1247        case NodeType.CCLASS: {
1248            final CClassNode cc = (CClassNode)node;
1249            /* no need to check ignore case. (setted in setup_tree()) */
1250            if (cc.mbuf != null || cc.isNot()) {
1251                opt.length.set(1, 1);
1252            } else {
1253                for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
1254                    final boolean z = cc.bs.at(i);
1255                    if ((z && !cc.isNot()) || (!z && cc.isNot())) {
1256                        opt.map.addChar(i);
1257                    }
1258                }
1259                opt.length.set(1, 1);
1260            }
1261            break;
1262        }
1263
1264        case NodeType.CANY: {
1265            opt.length.set(1, 1);
1266            break;
1267        }
1268
1269        case NodeType.ANCHOR: {
1270            final AnchorNode an = (AnchorNode)node;
1271            switch (an.type) {
1272            case AnchorType.BEGIN_BUF:
1273            case AnchorType.BEGIN_POSITION:
1274            case AnchorType.BEGIN_LINE:
1275            case AnchorType.END_BUF:
1276            case AnchorType.SEMI_END_BUF:
1277            case AnchorType.END_LINE:
1278                opt.anchor.add(an.type);
1279                break;
1280
1281            case AnchorType.PREC_READ:
1282                final NodeOptInfo nopt = new NodeOptInfo();
1283                optimizeNodeLeft(an.target, nopt, oenv);
1284                if (nopt.exb.length > 0) {
1285                    opt.expr.copy(nopt.exb);
1286                } else if (nopt.exm.length > 0) {
1287                    opt.expr.copy(nopt.exm);
1288                }
1289                opt.expr.reachEnd = false;
1290                if (nopt.map.value > 0) {
1291                    opt.map.copy(nopt.map);
1292                }
1293                break;
1294
1295            case AnchorType.PREC_READ_NOT:
1296            case AnchorType.LOOK_BEHIND:    /* Sorry, I can't make use of it. */
1297            case AnchorType.LOOK_BEHIND_NOT:
1298                break;
1299
1300            default:
1301                break;
1302
1303            } // inner switch
1304            break;
1305        }
1306
1307        case NodeType.BREF: {
1308            final BackRefNode br = (BackRefNode)node;
1309
1310            if (br.isRecursion()) {
1311                opt.length.set(0, MinMaxLen.INFINITE_DISTANCE);
1312                break;
1313            }
1314
1315            final Node[]nodes = oenv.scanEnv.memNodes;
1316
1317            final int min = getMinMatchLength(nodes[br.backRef]);
1318            final int max = getMaxMatchLength(nodes[br.backRef]);
1319
1320            opt.length.set(min, max);
1321            break;
1322        }
1323
1324
1325        case NodeType.QTFR: {
1326            final NodeOptInfo nopt = new NodeOptInfo();
1327            final QuantifierNode qn = (QuantifierNode)node;
1328            optimizeNodeLeft(qn.target, nopt, oenv);
1329            if (qn.lower == 0 && isRepeatInfinite(qn.upper)) {
1330                if (oenv.mmd.max == 0 && qn.target.getType() == NodeType.CANY && qn.greedy) {
1331                    if (isMultiline(oenv.options)) {
1332                        opt.anchor.add(AnchorType.ANYCHAR_STAR_ML);
1333                    } else {
1334                        opt.anchor.add(AnchorType.ANYCHAR_STAR);
1335                    }
1336                }
1337            } else {
1338                if (qn.lower > 0) {
1339                    opt.copy(nopt);
1340                    if (nopt.exb.length > 0) {
1341                        if (nopt.exb.reachEnd) {
1342                            int i;
1343                            for (i = 2; i <= qn.lower && !opt.exb.isFull(); i++) {
1344                                opt.exb.concat(nopt.exb);
1345                            }
1346                            if (i < qn.lower) {
1347                                opt.exb.reachEnd = false;
1348                            }
1349                        }
1350                    }
1351                    if (qn.lower != qn.upper) {
1352                        opt.exb.reachEnd = false;
1353                        opt.exm.reachEnd = false;
1354                    }
1355                    if (qn.lower > 1) {
1356                        opt.exm.reachEnd = false;
1357                    }
1358
1359                }
1360            }
1361            final int min = MinMaxLen.distanceMultiply(nopt.length.min, qn.lower);
1362            int max;
1363            if (isRepeatInfinite(qn.upper)) {
1364                max = nopt.length.max > 0 ? MinMaxLen.INFINITE_DISTANCE : 0;
1365            } else {
1366                max = MinMaxLen.distanceMultiply(nopt.length.max, qn.upper);
1367            }
1368            opt.length.set(min, max);
1369            break;
1370        }
1371
1372        case NodeType.ENCLOSE: {
1373            final EncloseNode en = (EncloseNode)node;
1374            switch (en.type) {
1375            case EncloseType.OPTION:
1376                final int save = oenv.options;
1377                oenv.options = en.option;
1378                optimizeNodeLeft(en.target, opt, oenv);
1379                oenv.options = save;
1380                break;
1381
1382            case EncloseType.MEMORY:
1383                if (++en.optCount > MAX_NODE_OPT_INFO_REF_COUNT) {
1384                    int min = 0;
1385                    int max = MinMaxLen.INFINITE_DISTANCE;
1386                    if (en.isMinFixed()) {
1387                        min = en.minLength;
1388                    }
1389                    if (en.isMaxFixed()) {
1390                        max = en.maxLength;
1391                    }
1392                    opt.length.set(min, max);
1393                } else { // USE_SUBEXP_CALL
1394                    optimizeNodeLeft(en.target, opt, oenv);
1395                    if (opt.anchor.isSet(AnchorType.ANYCHAR_STAR_MASK)) {
1396                        if (bsAt(oenv.scanEnv.backrefedMem, en.regNum)) {
1397                            opt.anchor.remove(AnchorType.ANYCHAR_STAR_MASK);
1398                        }
1399                    }
1400                }
1401                break;
1402
1403            case EncloseType.STOP_BACKTRACK:
1404                optimizeNodeLeft(en.target, opt, oenv);
1405                break;
1406
1407            default:
1408                break;
1409            } // inner switch
1410            break;
1411        }
1412
1413        default:
1414            throw new InternalException(ERR_PARSER_BUG);
1415        } // switch
1416    }
1417
1418    @SuppressWarnings("unused")
1419    protected final void setOptimizedInfoFromTree(final Node node) {
1420        final NodeOptInfo opt = new NodeOptInfo();
1421        final OptEnvironment oenv = new OptEnvironment();
1422
1423        oenv.options = regex.options;
1424        oenv.caseFoldFlag = regex.caseFoldFlag;
1425        oenv.scanEnv = env;
1426        oenv.mmd.clear(); // ??
1427
1428        optimizeNodeLeft(node, opt, oenv);
1429
1430        regex.anchor = opt.anchor.leftAnchor & (AnchorType.BEGIN_BUF |
1431                                                AnchorType.BEGIN_POSITION |
1432                                                AnchorType.ANYCHAR_STAR |
1433                                                AnchorType.ANYCHAR_STAR_ML);
1434
1435        regex.anchor |= opt.anchor.rightAnchor & (AnchorType.END_BUF |
1436                                                  AnchorType.SEMI_END_BUF);
1437
1438        if ((regex.anchor & (AnchorType.END_BUF | AnchorType.SEMI_END_BUF)) != 0) {
1439            regex.anchorDmin = opt.length.min;
1440            regex.anchorDmax = opt.length.max;
1441        }
1442
1443        if (opt.exb.length > 0 || opt.exm.length > 0) {
1444            opt.exb.select(opt.exm);
1445            if (opt.map.value > 0 && opt.exb.compare(opt.map) > 0) {
1446                // !goto set_map;!
1447                regex.setOptimizeMapInfo(opt.map);
1448                regex.setSubAnchor(opt.map.anchor);
1449            } else {
1450                regex.setExactInfo(opt.exb);
1451                regex.setSubAnchor(opt.exb.anchor);
1452            }
1453        } else if (opt.map.value > 0) {
1454            // !set_map:!
1455            regex.setOptimizeMapInfo(opt.map);
1456            regex.setSubAnchor(opt.map.anchor);
1457        } else {
1458            regex.subAnchor |= opt.anchor.leftAnchor & AnchorType.BEGIN_LINE;
1459            if (opt.length.max == 0) {
1460                regex.subAnchor |= opt.anchor.rightAnchor & AnchorType.END_LINE;
1461            }
1462        }
1463
1464        if (Config.DEBUG_COMPILE || Config.DEBUG_MATCH) {
1465            Config.log.println(regex.optimizeInfoToString());
1466        }
1467    }
1468}
1469