Analyser.java revision 953:221a84ef44c0
1/*
2 * Permission is hereby granted, free of charge, to any person obtaining a copy of
3 * this software and associated documentation files (the "Software"), to deal in
4 * the Software without restriction, including without limitation the rights to
5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6 * of the Software, and to permit persons to whom the Software is furnished to do
7 * so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice shall be included in all
10 * copies or substantial portions of the Software.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18 * SOFTWARE.
19 */
20package jdk.nashorn.internal.runtime.regexp.joni;
21
22import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAll;
23import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt;
24import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnAt;
25import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindCondition;
26import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase;
27import static jdk.nashorn.internal.runtime.regexp.joni.Option.isMultiline;
28import static jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode.newAltNode;
29import static jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode.isRepeatInfinite;
30
31import java.util.HashSet;
32import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode;
33import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode;
34import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
35import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode;
36import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode;
37import jdk.nashorn.internal.runtime.regexp.joni.ast.Node;
38import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode;
39import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode;
40import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
41import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType;
42import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
43import jdk.nashorn.internal.runtime.regexp.joni.constants.StackPopLevel;
44import jdk.nashorn.internal.runtime.regexp.joni.constants.TargetInfo;
45import jdk.nashorn.internal.runtime.regexp.joni.encoding.ObjPtr;
46import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
47import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
48import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
49
50final class Analyser extends Parser {
51
52    protected Analyser(final ScanEnvironment env, final char[] chars, final int p, final int end) {
53        super(env, chars, p, end);
54    }
55
56    protected final void compile() {
57        if (Config.DEBUG) {
58            Config.log.println(new String(chars, getBegin(), getEnd()));
59        }
60
61        reset();
62
63        regex.numMem = 0;
64        regex.numRepeat = 0;
65        regex.numNullCheck = 0;
66        //regex.repeatRangeAlloc = 0;
67        regex.repeatRangeLo = null;
68        regex.repeatRangeHi = null;
69
70        parse();
71
72        if (Config.DEBUG_PARSE_TREE_RAW && Config.DEBUG_PARSE_TREE) {
73            Config.log.println("<RAW TREE>");
74            Config.log.println(root + "\n");
75        }
76
77        root = setupTree(root, 0);
78        if (Config.DEBUG_PARSE_TREE) {
79            if (Config.DEBUG_PARSE_TREE_RAW) Config.log.println("<TREE>");
80            root.verifyTree(new HashSet<Node>(), env.reg.warnings);
81            Config.log.println(root + "\n");
82        }
83
84        regex.captureHistory = env.captureHistory;
85        regex.btMemStart = env.btMemStart;
86        regex.btMemEnd = env.btMemEnd;
87
88        if (isFindCondition(regex.options)) {
89            regex.btMemEnd = bsAll();
90        } else {
91            regex.btMemEnd = env.btMemEnd;
92            regex.btMemEnd |= regex.captureHistory;
93        }
94
95        regex.clearOptimizeInfo();
96
97        if (!Config.DONT_OPTIMIZE) setOptimizedInfoFromTree(root);
98
99        env.memNodes = null;
100
101        if (regex.numRepeat != 0 || regex.btMemEnd != 0) {
102            regex.stackPopLevel = StackPopLevel.ALL;
103        } else {
104            if (regex.btMemStart != 0) {
105                regex.stackPopLevel = StackPopLevel.MEM_START;
106            } else {
107                regex.stackPopLevel = StackPopLevel.FREE;
108            }
109        }
110
111        if (Config.DEBUG_COMPILE) {
112            Config.log.println("stack used: " + regex.stackNeeded);
113            if (Config.USE_STRING_TEMPLATES) Config.log.print("templates: " + regex.templateNum + "\n");
114            Config.log.println(new ByteCodePrinter(regex).byteCodeListToString());
115
116        } // DEBUG_COMPILE
117    }
118
119    private void swap(final Node a, final Node b) {
120        a.swap(b);
121
122        if (root == b) {
123            root = a;
124        } else if (root == a) {
125            root = b;
126        }
127    }
128
129    // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
130    private int quantifiersMemoryInfo(final Node node) {
131        int info = 0;
132
133        switch(node.getType()) {
134        case NodeType.LIST:
135        case NodeType.ALT:
136            ConsAltNode can = (ConsAltNode)node;
137            do {
138                final int v = quantifiersMemoryInfo(can.car);
139                if (v > info) info = v;
140            } while ((can = can.cdr) != null);
141            break;
142
143        case NodeType.QTFR:
144            final QuantifierNode qn = (QuantifierNode)node;
145            if (qn.upper != 0) {
146                info = quantifiersMemoryInfo(qn.target);
147            }
148            break;
149
150        case NodeType.ENCLOSE:
151            final EncloseNode en = (EncloseNode)node;
152            switch (en.type) {
153            case EncloseType.MEMORY:
154                return TargetInfo.IS_EMPTY_MEM;
155
156            case EncloseType.OPTION:
157            case EncloseNode.STOP_BACKTRACK:
158                info = quantifiersMemoryInfo(en.target);
159                break;
160
161            default:
162                break;
163            } // inner switch
164            break;
165
166        case NodeType.BREF:
167        case NodeType.STR:
168        case NodeType.CTYPE:
169        case NodeType.CCLASS:
170        case NodeType.CANY:
171        case NodeType.ANCHOR:
172        default:
173            break;
174        } // switch
175
176        return info;
177    }
178
179    private int getMinMatchLength(final Node node) {
180        int min = 0;
181
182        switch (node.getType()) {
183        case NodeType.BREF:
184            final BackRefNode br = (BackRefNode)node;
185            if (br.isRecursion()) break;
186
187            if (br.backRef > env.numMem) {
188                throw new ValueException(ERR_INVALID_BACKREF);
189            }
190            min = getMinMatchLength(env.memNodes[br.backRef]);
191
192            break;
193
194        case NodeType.LIST:
195            ConsAltNode can = (ConsAltNode)node;
196            do {
197                min += getMinMatchLength(can.car);
198            } while ((can = can.cdr) != null);
199            break;
200
201        case NodeType.ALT:
202            ConsAltNode y = (ConsAltNode)node;
203            do {
204                final Node x = y.car;
205                final int tmin = getMinMatchLength(x);
206                if (y == node) {
207                    min = tmin;
208                } else if (min > tmin) {
209                    min = tmin;
210                }
211            } while ((y = y.cdr) != null);
212            break;
213
214        case NodeType.STR:
215            min = ((StringNode)node).length();
216            break;
217
218        case NodeType.CTYPE:
219            min = 1;
220            break;
221
222        case NodeType.CCLASS:
223        case NodeType.CANY:
224            min = 1;
225            break;
226
227        case NodeType.QTFR:
228            final QuantifierNode qn = (QuantifierNode)node;
229            if (qn.lower > 0) {
230                min = getMinMatchLength(qn.target);
231                min = MinMaxLen.distanceMultiply(min, qn.lower);
232            }
233            break;
234
235        case NodeType.ENCLOSE:
236            final EncloseNode en = (EncloseNode)node;
237            switch (en.type) {
238            case EncloseType.MEMORY:
239                if (en.isMinFixed()) {
240                    min = en.minLength;
241                } else {
242                    min = getMinMatchLength(en.target);
243                    en.minLength = min;
244                    en.setMinFixed();
245                }
246                break;
247
248            case EncloseType.OPTION:
249            case EncloseType.STOP_BACKTRACK:
250                min = getMinMatchLength(en.target);
251                break;
252            } // inner switch
253            break;
254
255        case NodeType.ANCHOR:
256        default:
257            break;
258        } // switch
259
260        return min;
261    }
262
263    private int getMaxMatchLength(final Node node) {
264        int max = 0;
265
266        switch (node.getType()) {
267        case NodeType.LIST:
268            ConsAltNode ln = (ConsAltNode)node;
269            do {
270                final int tmax = getMaxMatchLength(ln.car);
271                max = MinMaxLen.distanceAdd(max, tmax);
272            } while ((ln = ln.cdr) != null);
273            break;
274
275        case NodeType.ALT:
276            ConsAltNode an = (ConsAltNode)node;
277            do {
278                final int tmax = getMaxMatchLength(an.car);
279                if (max < tmax) max = tmax;
280            } while ((an = an.cdr) != null);
281            break;
282
283        case NodeType.STR:
284            max = ((StringNode)node).length();
285            break;
286
287        case NodeType.CTYPE:
288            max = 1;
289            break;
290
291        case NodeType.CCLASS:
292        case NodeType.CANY:
293            max = 1;
294            break;
295
296        case NodeType.BREF:
297            final BackRefNode br = (BackRefNode)node;
298            if (br.isRecursion()) {
299                max = MinMaxLen.INFINITE_DISTANCE;
300                break;
301            }
302
303            if (br.backRef > env.numMem) {
304                throw new ValueException(ERR_INVALID_BACKREF);
305            }
306            final int tmax = getMaxMatchLength(env.memNodes[br.backRef]);
307            if (max < tmax) max = tmax;
308            break;
309
310        case NodeType.QTFR:
311            final QuantifierNode qn = (QuantifierNode)node;
312            if (qn.upper != 0) {
313                max = getMaxMatchLength(qn.target);
314                if (max != 0) {
315                    if (!isRepeatInfinite(qn.upper)) {
316                        max = MinMaxLen.distanceMultiply(max, qn.upper);
317                    } else {
318                        max = MinMaxLen.INFINITE_DISTANCE;
319                    }
320                }
321            }
322            break;
323
324        case NodeType.ENCLOSE:
325            final EncloseNode en = (EncloseNode)node;
326            switch (en.type) {
327            case EncloseType.MEMORY:
328                if (en.isMaxFixed()) {
329                    max = en.maxLength;
330                } else {
331                    max = getMaxMatchLength(en.target);
332                    en.maxLength = max;
333                    en.setMaxFixed();
334                }
335                break;
336
337            case EncloseType.OPTION:
338            case EncloseType.STOP_BACKTRACK:
339                max = getMaxMatchLength(en.target);
340                break;
341            } // inner switch
342            break;
343
344        case NodeType.ANCHOR:
345        default:
346            break;
347        } // switch
348
349        return max;
350    }
351
352    private static final int GET_CHAR_LEN_VARLEN            = -1;
353    private static final int GET_CHAR_LEN_TOP_ALT_VARLEN    = -2;
354    protected final int getCharLengthTree(final Node node) {
355        return getCharLengthTree(node, 0);
356    }
357
358    private int getCharLengthTree(final Node node, int level) {
359        level++;
360
361        int len = 0;
362        returnCode = 0;
363
364        switch(node.getType()) {
365        case NodeType.LIST:
366            ConsAltNode ln = (ConsAltNode)node;
367            do {
368                final int tlen = getCharLengthTree(ln.car, level);
369                if (returnCode == 0) len = MinMaxLen.distanceAdd(len, tlen);
370            } while (returnCode == 0 && (ln = ln.cdr) != null);
371            break;
372
373        case NodeType.ALT:
374            ConsAltNode an = (ConsAltNode)node;
375            boolean varLen = false;
376
377            int tlen = getCharLengthTree(an.car, level);
378            while (returnCode == 0 && (an = an.cdr) != null) {
379                final int tlen2 = getCharLengthTree(an.car, level);
380                if (returnCode == 0) {
381                    if (tlen != tlen2) varLen = true;
382                }
383            }
384
385            if (returnCode == 0) {
386                if (varLen) {
387                    if (level == 1) {
388                        returnCode = GET_CHAR_LEN_TOP_ALT_VARLEN;
389                    } else {
390                        returnCode = GET_CHAR_LEN_VARLEN;
391                    }
392                } else {
393                    len = tlen;
394                }
395            }
396            break;
397
398        case NodeType.STR:
399            final StringNode sn = (StringNode)node;
400            len = sn.length();
401            break;
402
403        case NodeType.QTFR:
404            final QuantifierNode qn = (QuantifierNode)node;
405            if (qn.lower == qn.upper) {
406                tlen = getCharLengthTree(qn.target, level);
407                if (returnCode == 0) len = MinMaxLen.distanceMultiply(tlen, qn.lower);
408            } else {
409                returnCode = GET_CHAR_LEN_VARLEN;
410            }
411            break;
412
413        case NodeType.CTYPE:
414        case NodeType.CCLASS:
415        case NodeType.CANY:
416            len = 1;
417            break;
418
419        case NodeType.ENCLOSE:
420            final EncloseNode en = (EncloseNode)node;
421            switch(en.type) {
422            case EncloseType.MEMORY:
423                if (en.isCLenFixed()) {
424                    len = en.charLength;
425                } else {
426                    len = getCharLengthTree(en.target, level);
427                    if (returnCode == 0) {
428                        en.charLength = len;
429                        en.setCLenFixed();
430                    }
431                }
432                break;
433
434            case EncloseType.OPTION:
435            case EncloseType.STOP_BACKTRACK:
436                len = getCharLengthTree(en.target, level);
437                break;
438            } // inner switch
439            break;
440
441        case NodeType.ANCHOR:
442            break;
443
444        default:
445            returnCode = GET_CHAR_LEN_VARLEN;
446        } // switch
447        return len;
448    }
449
450    /* x is not included y ==>  1 : 0 */
451    private boolean isNotIncluded(Node x, Node y) {
452        Node tmp;
453
454        // !retry:!
455        retry: while(true) {
456
457        final int yType = y.getType();
458
459        switch(x.getType()) {
460        case NodeType.CTYPE:
461            switch(yType) {
462
463            case NodeType.CCLASS:
464                // !swap:!
465                tmp = x;
466                x = y;
467                y = tmp;
468                // !goto retry;!
469                continue retry;
470
471            case NodeType.STR:
472                // !goto swap;!
473                tmp = x;
474                x = y;
475                y = tmp;
476                continue retry;
477
478            default:
479                break;
480            } // inner switch
481            break;
482
483        case NodeType.CCLASS:
484            final CClassNode xc = (CClassNode)x;
485
486            switch(yType) {
487
488            case NodeType.CCLASS:
489                final CClassNode yc = (CClassNode)y;
490
491                for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
492                    boolean v = xc.bs.at(i);
493                    if ((v && !xc.isNot()) || (!v && xc.isNot())) {
494                        v = yc.bs.at(i);
495                        if ((v && !yc.isNot()) || (!v && yc.isNot())) return false;
496                    }
497                }
498                if ((xc.mbuf == null && !xc.isNot()) || yc.mbuf == null && !yc.isNot()) return true;
499                return false;
500                // break; not reached
501
502            case NodeType.STR:
503                // !goto swap;!
504                tmp = x;
505                x = y;
506                y = tmp;
507                continue retry;
508
509            default:
510                break;
511
512            } // inner switch
513            break; // case NodeType.CCLASS
514
515        case NodeType.STR:
516            final StringNode xs = (StringNode)x;
517            if (xs.length() == 0) break;
518
519            switch (yType) {
520
521            case NodeType.CCLASS:
522                final CClassNode cc = (CClassNode)y;
523                final int code = xs.chars[xs.p];
524                return !cc.isCodeInCC(code);
525
526            case NodeType.STR:
527                final StringNode ys = (StringNode)y;
528                int len = xs.length();
529                if (len > ys.length()) len = ys.length();
530                if (xs.isAmbig() || ys.isAmbig()) {
531                    /* tiny version */
532                    return false;
533                } else {
534                    for (int i=0, p=ys.p, q=xs.p; i<len; i++, p++, q++) {
535                        if (ys.chars[p] != xs.chars[q]) return true;
536                    }
537                }
538                break;
539
540            default:
541                break;
542            } // inner switch
543
544            break; // case NodeType.STR
545
546        } // switch
547
548        break;
549        } // retry: while
550        return false;
551    }
552
553    private Node getHeadValueNode(final Node node, final boolean exact) {
554        Node n = null;
555
556        switch(node.getType()) {
557        case NodeType.BREF:
558        case NodeType.ALT:
559        case NodeType.CANY:
560            break;
561
562        case NodeType.CTYPE:
563        case NodeType.CCLASS:
564            if (!exact) n = node;
565            break;
566
567        case NodeType.LIST:
568            n = getHeadValueNode(((ConsAltNode)node).car, exact);
569            break;
570
571        case NodeType.STR:
572            final StringNode sn = (StringNode)node;
573            if (sn.end <= sn.p) break; // ???
574
575            if (exact && !sn.isRaw() && isIgnoreCase(regex.options)){
576                // nothing
577            } else {
578                n = node;
579            }
580            break;
581
582        case NodeType.QTFR:
583            final QuantifierNode qn = (QuantifierNode)node;
584            if (qn.lower > 0) {
585                if (qn.headExact != null) {
586                    n = qn.headExact;
587                } else {
588                    n = getHeadValueNode(qn.target, exact);
589                }
590            }
591            break;
592
593        case NodeType.ENCLOSE:
594            final EncloseNode en = (EncloseNode)node;
595
596            switch (en.type) {
597            case EncloseType.OPTION:
598                final int options = regex.options;
599                regex.options = en.option;
600                n = getHeadValueNode(en.target, exact);
601                regex.options = options;
602                break;
603
604            case EncloseType.MEMORY:
605            case EncloseType.STOP_BACKTRACK:
606                n = getHeadValueNode(en.target, exact);
607                break;
608            } // inner switch
609            break;
610
611        case NodeType.ANCHOR:
612            final AnchorNode an = (AnchorNode)node;
613            if (an.type == AnchorType.PREC_READ) n = getHeadValueNode(an.target, exact);
614            break;
615
616        default:
617            break;
618        } // switch
619
620        return n;
621    }
622
623    // true: invalid
624    private boolean checkTypeTree(final Node node, final int typeMask, final int encloseMask, final int anchorMask) {
625        if ((node.getType2Bit() & typeMask) == 0) return true;
626
627        boolean invalid = false;
628
629        switch(node.getType()) {
630        case NodeType.LIST:
631        case NodeType.ALT:
632            ConsAltNode can = (ConsAltNode)node;
633            do {
634                invalid = checkTypeTree(can.car, typeMask, encloseMask, anchorMask);
635            } while (!invalid && (can = can.cdr) != null);
636            break;
637
638        case NodeType.QTFR:
639            invalid = checkTypeTree(((QuantifierNode)node).target, typeMask, encloseMask, anchorMask);
640            break;
641
642        case NodeType.ENCLOSE:
643            final EncloseNode en = (EncloseNode)node;
644            if ((en.type & encloseMask) == 0) return true;
645            invalid = checkTypeTree(en.target, typeMask, encloseMask, anchorMask);
646            break;
647
648        case NodeType.ANCHOR:
649            final AnchorNode an = (AnchorNode)node;
650            if ((an.type & anchorMask) == 0) return true;
651
652            if (an.target != null) invalid = checkTypeTree(an.target, typeMask, encloseMask, anchorMask);
653            break;
654
655        default:
656            break;
657
658        } // switch
659
660        return invalid;
661    }
662
663    /* divide different length alternatives in look-behind.
664    (?<=A|B) ==> (?<=A)|(?<=B)
665    (?<!A|B) ==> (?<!A)(?<!B)
666     */
667    private Node divideLookBehindAlternatives(Node node) {
668        final AnchorNode an = (AnchorNode)node;
669        final int anchorType = an.type;
670        Node head = an.target;
671        Node np = ((ConsAltNode)head).car;
672
673        swap(node, head);
674
675        final Node tmp = node;
676        node = head;
677        head = tmp;
678
679        ((ConsAltNode)node).setCar(head);
680        ((AnchorNode)head).setTarget(np);
681        np = node;
682
683        while ((np = ((ConsAltNode)np).cdr) != null) {
684            final AnchorNode insert = new AnchorNode(anchorType);
685            insert.setTarget(((ConsAltNode)np).car);
686            ((ConsAltNode)np).setCar(insert);
687        }
688
689        if (anchorType == AnchorType.LOOK_BEHIND_NOT) {
690            np = node;
691            do {
692                ((ConsAltNode)np).toListNode(); /* alt -> list */
693            } while ((np = ((ConsAltNode)np).cdr) != null);
694        }
695
696        return node;
697    }
698
699    private Node setupLookBehind(final Node node) {
700        final AnchorNode an = (AnchorNode)node;
701        final int len = getCharLengthTree(an.target);
702        switch(returnCode) {
703        case 0:
704            an.charLength = len;
705            break;
706        case GET_CHAR_LEN_VARLEN:
707            throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
708        case GET_CHAR_LEN_TOP_ALT_VARLEN:
709            if (syntax.differentLengthAltLookBehind()) {
710                return divideLookBehindAlternatives(node);
711            } else {
712                throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
713            }
714        }
715        return node;
716    }
717
718    private void nextSetup(Node node, final Node nextNode) {
719        // retry:
720        retry: while(true) {
721
722        final int type = node.getType();
723        if (type == NodeType.QTFR) {
724            final QuantifierNode qn = (QuantifierNode)node;
725            if (qn.greedy && isRepeatInfinite(qn.upper)) {
726                if (Config.USE_QTFR_PEEK_NEXT) {
727                    final StringNode n = (StringNode)getHeadValueNode(nextNode, true);
728                    /* '\0': for UTF-16BE etc... */
729                    if (n != null && n.chars[n.p] != 0) { // ?????????
730                        qn.nextHeadExact = n;
731                    }
732                } // USE_QTFR_PEEK_NEXT
733                /* automatic posseivation a*b ==> (?>a*)b */
734                if (qn.lower <= 1) {
735                    if (qn.target.isSimple()) {
736                        final Node x = getHeadValueNode(qn.target, false);
737                        if (x != null) {
738                            final Node y = getHeadValueNode(nextNode, false);
739                            if (y != null && isNotIncluded(x, y)) {
740                                final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); //onig_node_new_enclose
741                                en.setStopBtSimpleRepeat();
742                                //en.setTarget(qn.target); // optimize it ??
743                                swap(node, en);
744
745                                en.setTarget(node);
746                            }
747                        }
748                    }
749                }
750            }
751        } else if (type == NodeType.ENCLOSE) {
752            final EncloseNode en = (EncloseNode)node;
753            if (en.isMemory()) {
754                node = en.target;
755                // !goto retry;!
756                continue retry;
757            }
758        }
759
760        break;
761        } // while
762    }
763
764    private void updateStringNodeCaseFoldMultiByte(final StringNode sn) {
765        final char[] chars = sn.chars;
766        final int end = sn.end;
767        value = sn.p;
768        int sp = 0;
769        char buf;
770
771        while (value < end) {
772            final int ovalue = value;
773            buf = EncodingHelper.toLowerCase(chars[value++]);
774
775            if (chars[ovalue] != buf) {
776
777                char[] sbuf = new char[sn.length() << 1];
778                System.arraycopy(chars, sn.p, sbuf, 0, ovalue - sn.p);
779                value = ovalue;
780                while (value < end) {
781                    buf = EncodingHelper.toLowerCase(chars[value++]);
782                    if (sp >= sbuf.length) {
783                        final char[]tmp = new char[sbuf.length << 1];
784                        System.arraycopy(sbuf, 0, tmp, 0, sbuf.length);
785                        sbuf = tmp;
786                    }
787                    sbuf[sp++] = buf;
788                }
789                sn.set(sbuf, 0, sp);
790                return;
791            }
792            sp++;
793        }
794    }
795
796    private void updateStringNodeCaseFold(final Node node) {
797        final StringNode sn = (StringNode)node;
798        updateStringNodeCaseFoldMultiByte(sn);
799    }
800
801    private Node expandCaseFoldMakeRemString(final char[] chars, final int p, final int end) {
802        final StringNode node = new StringNode(chars, p, end);
803
804        updateStringNodeCaseFold(node);
805        node.setAmbig();
806        node.setDontGetOptInfo();
807        return node;
808    }
809
810    private boolean expandCaseFoldStringAlt(final int itemNum, final char[] items,
811                                              final char[] chars, final int p, final int slen, final int end, final ObjPtr<Node> node) {
812
813        ConsAltNode altNode;
814        node.p = altNode = newAltNode(null, null);
815
816        StringNode snode = new StringNode(chars, p, p + slen);
817        altNode.setCar(snode);
818
819        for (int i=0; i<itemNum; i++) {
820            snode = new StringNode();
821
822            snode.catCode(items[i]);
823
824            final ConsAltNode an = newAltNode(null, null);
825            an.setCar(snode);
826            altNode.setCdr(an);
827            altNode = an;
828        }
829        return false;
830    }
831
832    private static final int THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION = 8;
833    private Node expandCaseFoldString(final Node node) {
834        final StringNode sn = (StringNode)node;
835
836        if (sn.isAmbig() || sn.length() <= 0) return node;
837
838        final char[] chars = sn.chars;
839        int p = sn.p;
840        final int end = sn.end;
841        int altNum = 1;
842
843        ConsAltNode topRoot = null, root = null;
844        final ObjPtr<Node> prevNode = new ObjPtr<Node>();
845        StringNode stringNode = null;
846
847        while (p < end) {
848            final char[] items = EncodingHelper.caseFoldCodesByString(regex.caseFoldFlag, chars[p]);
849
850            if (items.length == 0) {
851                if (stringNode == null) {
852                    if (root == null && prevNode.p != null) {
853                        topRoot = root = ConsAltNode.listAdd(null, prevNode.p);
854                    }
855
856                    prevNode.p = stringNode = new StringNode(); // onig_node_new_str(NULL, NULL);
857
858                    if (root != null) ConsAltNode.listAdd(root, stringNode);
859
860                }
861
862                stringNode.cat(chars, p, p + 1);
863            } else {
864                altNum *= (items.length + 1);
865                if (altNum > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
866
867                if (root == null && prevNode.p != null) {
868                    topRoot = root = ConsAltNode.listAdd(null, prevNode.p);
869                }
870
871                expandCaseFoldStringAlt(items.length, items, chars, p, 1, end, prevNode);
872                if (root != null) ConsAltNode.listAdd(root, prevNode.p);
873                stringNode = null;
874            }
875            p++;
876        }
877
878        if (p < end) {
879            final Node srem = expandCaseFoldMakeRemString(chars, p, end);
880
881            if (prevNode.p != null && root == null) {
882                topRoot = root = ConsAltNode.listAdd(null, prevNode.p);
883            }
884
885            if (root == null) {
886                prevNode.p = srem;
887            } else {
888                ConsAltNode.listAdd(root, srem);
889            }
890        }
891        /* ending */
892        final Node xnode = topRoot != null ? topRoot : prevNode.p;
893
894        swap(node, xnode);
895        return xnode;
896    }
897
898    private static final int IN_ALT                     = (1<<0);
899    private static final int IN_NOT                     = (1<<1);
900    private static final int IN_REPEAT                  = (1<<2);
901    private static final int IN_VAR_REPEAT              = (1<<3);
902    private static final int EXPAND_STRING_MAX_LENGTH   = 100;
903
904    /* setup_tree does the following work.
905    1. check empty loop. (set qn->target_empty_info)
906    2. expand ignore-case in char class.
907    3. set memory status bit flags. (reg->mem_stats)
908    4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
909    5. find invalid patterns in look-behind.
910    6. expand repeated string.
911    */
912    protected final Node setupTree(Node node, int state) {
913        restart: while (true) {
914        switch (node.getType()) {
915        case NodeType.LIST:
916            ConsAltNode lin = (ConsAltNode)node;
917            Node prev = null;
918            do {
919                setupTree(lin.car, state);
920                if (prev != null) {
921                    nextSetup(prev, lin.car);
922                }
923                prev = lin.car;
924            } while ((lin = lin.cdr) != null);
925            break;
926
927        case NodeType.ALT:
928            ConsAltNode aln = (ConsAltNode)node;
929            do {
930                setupTree(aln.car, (state | IN_ALT));
931            } while ((aln = aln.cdr) != null);
932            break;
933
934        case NodeType.CCLASS:
935            break;
936
937        case NodeType.STR:
938            if (isIgnoreCase(regex.options) && !((StringNode)node).isRaw()) {
939                node = expandCaseFoldString(node);
940            }
941            break;
942
943        case NodeType.CTYPE:
944        case NodeType.CANY:
945            break;
946
947        case NodeType.BREF:
948            final BackRefNode br = (BackRefNode)node;
949            if (br.backRef > env.numMem) {
950                throw new ValueException(ERR_INVALID_BACKREF);
951            }
952            env.backrefedMem = bsOnAt(env.backrefedMem, br.backRef);
953            env.btMemStart = bsOnAt(env.btMemStart, br.backRef);
954            ((EncloseNode)env.memNodes[br.backRef]).setMemBackrefed();
955            break;
956
957        case NodeType.QTFR:
958            final QuantifierNode qn = (QuantifierNode)node;
959            Node target = qn.target;
960
961            if ((state & IN_REPEAT) != 0) qn.setInRepeat();
962
963            if (isRepeatInfinite(qn.upper) || qn.lower >= 1) {
964                final int d = getMinMatchLength(target);
965                if (d == 0) {
966                    qn.targetEmptyInfo = TargetInfo.IS_EMPTY;
967                    if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) {
968                        final int info = quantifiersMemoryInfo(target);
969                        if (info > 0) qn.targetEmptyInfo = info;
970                    } // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
971                    // strange stuff here (turned off)
972                }
973            }
974
975            state |= IN_REPEAT;
976            if (qn.lower != qn.upper) state |= IN_VAR_REPEAT;
977
978            target = setupTree(target, state);
979
980            /* expand string */
981            if (target.getType() == NodeType.STR) {
982                if (!isRepeatInfinite(qn.lower) && qn.lower == qn.upper &&
983                    qn.lower > 1 && qn.lower <= EXPAND_STRING_MAX_LENGTH) {
984                    final StringNode sn = (StringNode)target;
985                    final int len = sn.length();
986
987                    if (len * qn.lower <= EXPAND_STRING_MAX_LENGTH) {
988                        final StringNode str = qn.convertToString(sn.flag);
989                        final int n = qn.lower;
990                        for (int i = 0; i < n; i++) {
991                            str.cat(sn.chars, sn.p, sn.end);
992                        }
993                        break; /* break case NT_QTFR: */
994                    }
995
996                }
997            }
998            if (Config.USE_OP_PUSH_OR_JUMP_EXACT) {
999                if (qn.greedy && qn.targetEmptyInfo != 0) {
1000                    if (target.getType() == NodeType.QTFR) {
1001                        final QuantifierNode tqn = (QuantifierNode)target;
1002                        if (tqn.headExact != null) {
1003                            qn.headExact = tqn.headExact;
1004                            tqn.headExact = null;
1005                        }
1006                    } else {
1007                        qn.headExact = getHeadValueNode(qn.target, true);
1008                    }
1009                }
1010            } // USE_OP_PUSH_OR_JUMP_EXACT
1011            break;
1012
1013        case NodeType.ENCLOSE:
1014            final EncloseNode en = (EncloseNode)node;
1015            switch (en.type) {
1016            case EncloseType.OPTION:
1017                final int options = regex.options;
1018                regex.options = en.option;
1019                setupTree(en.target, state);
1020                regex.options = options;
1021                break;
1022
1023            case EncloseType.MEMORY:
1024                if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
1025                    env.btMemStart = bsOnAt(env.btMemStart, en.regNum);
1026                    /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
1027
1028                }
1029                setupTree(en.target, state);
1030                break;
1031
1032            case EncloseType.STOP_BACKTRACK:
1033                setupTree(en.target, state);
1034                if (en.target.getType() == NodeType.QTFR) {
1035                    final QuantifierNode tqn = (QuantifierNode)en.target;
1036                    if (isRepeatInfinite(tqn.upper) && tqn.lower <= 1 && tqn.greedy) {
1037                        /* (?>a*), a*+ etc... */
1038                        if (tqn.target.isSimple()) en.setStopBtSimpleRepeat();
1039                    }
1040                }
1041                break;
1042
1043            } // inner switch
1044            break;
1045
1046        case NodeType.ANCHOR:
1047            final AnchorNode an = (AnchorNode)node;
1048            switch (an.type) {
1049            case AnchorType.PREC_READ:
1050                setupTree(an.target, state);
1051                break;
1052
1053            case AnchorType.PREC_READ_NOT:
1054                setupTree(an.target, (state | IN_NOT));
1055                break;
1056
1057            case AnchorType.LOOK_BEHIND:
1058                if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) {
1059                    throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
1060                }
1061                node = setupLookBehind(node);
1062                if (node.getType() != NodeType.ANCHOR) continue restart;
1063                setupTree(((AnchorNode)node).target, state);
1064                break;
1065
1066            case AnchorType.LOOK_BEHIND_NOT:
1067                if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) {
1068                    throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
1069                }
1070                node = setupLookBehind(node);
1071                if (node.getType() != NodeType.ANCHOR) continue restart;
1072                setupTree(((AnchorNode)node).target, (state | IN_NOT));
1073                break;
1074
1075            } // inner switch
1076            break;
1077        } // switch
1078        return node;
1079        } // restart: while
1080    }
1081
1082    private static final int MAX_NODE_OPT_INFO_REF_COUNT   = 5;
1083    private void optimizeNodeLeft(final Node node, final NodeOptInfo opt, final OptEnvironment oenv) { // oenv remove, pass mmd
1084        opt.clear();
1085        opt.setBoundNode(oenv.mmd);
1086
1087        switch (node.getType()) {
1088        case NodeType.LIST: {
1089            final OptEnvironment nenv = new OptEnvironment();
1090            final NodeOptInfo nopt = new NodeOptInfo();
1091            nenv.copy(oenv);
1092            ConsAltNode lin = (ConsAltNode)node;
1093            do {
1094                optimizeNodeLeft(lin.car, nopt, nenv);
1095                nenv.mmd.add(nopt.length);
1096                opt.concatLeftNode(nopt);
1097            } while ((lin = lin.cdr) != null);
1098            break;
1099        }
1100
1101        case NodeType.ALT: {
1102            final NodeOptInfo nopt = new NodeOptInfo();
1103            ConsAltNode aln = (ConsAltNode)node;
1104            do {
1105                optimizeNodeLeft(aln.car, nopt, oenv);
1106                if (aln == node) {
1107                    opt.copy(nopt);
1108                } else {
1109                    opt.altMerge(nopt, oenv);
1110                }
1111            } while ((aln = aln.cdr) != null);
1112            break;
1113        }
1114
1115        case NodeType.STR: {
1116            final StringNode sn = (StringNode)node;
1117
1118            final int slen = sn.length();
1119
1120            if (!sn.isAmbig()) {
1121                opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw());
1122
1123                if (slen > 0) {
1124                    opt.map.addChar(sn.chars[sn.p]);
1125                }
1126
1127                opt.length.set(slen, slen);
1128            } else {
1129                int max;
1130                if (sn.isDontGetOptInfo()) {
1131                    max = sn.length();
1132                } else {
1133                    opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw());
1134                    opt.exb.ignoreCase = true;
1135
1136                    if (slen > 0) {
1137                        opt.map.addCharAmb(sn.chars, sn.p, sn.end, oenv.caseFoldFlag);
1138                    }
1139
1140                    max = slen;
1141                }
1142                opt.length.set(slen, max);
1143            }
1144
1145            if (opt.exb.length == slen) {
1146                opt.exb.reachEnd = true;
1147            }
1148            break;
1149        }
1150
1151        case NodeType.CCLASS: {
1152            final CClassNode cc = (CClassNode)node;
1153            /* no need to check ignore case. (setted in setup_tree()) */
1154            if (cc.mbuf != null || cc.isNot()) {
1155                opt.length.set(1, 1);
1156            } else {
1157                for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
1158                    final boolean z = cc.bs.at(i);
1159                    if ((z && !cc.isNot()) || (!z && cc.isNot())) {
1160                        opt.map.addChar(i);
1161                    }
1162                }
1163                opt.length.set(1, 1);
1164            }
1165            break;
1166        }
1167
1168        case NodeType.CANY: {
1169            opt.length.set(1, 1);
1170            break;
1171        }
1172
1173        case NodeType.ANCHOR: {
1174            final AnchorNode an = (AnchorNode)node;
1175            switch (an.type) {
1176            case AnchorType.BEGIN_BUF:
1177            case AnchorType.BEGIN_POSITION:
1178            case AnchorType.BEGIN_LINE:
1179            case AnchorType.END_BUF:
1180            case AnchorType.SEMI_END_BUF:
1181            case AnchorType.END_LINE:
1182                opt.anchor.add(an.type);
1183                break;
1184
1185            case AnchorType.PREC_READ:
1186                final NodeOptInfo nopt = new NodeOptInfo();
1187                optimizeNodeLeft(an.target, nopt, oenv);
1188                if (nopt.exb.length > 0) {
1189                    opt.expr.copy(nopt.exb);
1190                } else if (nopt.exm.length > 0) {
1191                    opt.expr.copy(nopt.exm);
1192                }
1193                opt.expr.reachEnd = false;
1194                if (nopt.map.value > 0) opt.map.copy(nopt.map);
1195                break;
1196
1197            case AnchorType.PREC_READ_NOT:
1198            case AnchorType.LOOK_BEHIND:    /* Sorry, I can't make use of it. */
1199            case AnchorType.LOOK_BEHIND_NOT:
1200                break;
1201
1202            } // inner switch
1203            break;
1204        }
1205
1206        case NodeType.BREF: {
1207            final BackRefNode br = (BackRefNode)node;
1208
1209            if (br.isRecursion()) {
1210                opt.length.set(0, MinMaxLen.INFINITE_DISTANCE);
1211                break;
1212            }
1213
1214            final Node[]nodes = oenv.scanEnv.memNodes;
1215
1216            final int min = getMinMatchLength(nodes[br.backRef]);
1217            final int max = getMaxMatchLength(nodes[br.backRef]);
1218
1219            opt.length.set(min, max);
1220            break;
1221        }
1222
1223
1224        case NodeType.QTFR: {
1225            final NodeOptInfo nopt = new NodeOptInfo();
1226            final QuantifierNode qn = (QuantifierNode)node;
1227            optimizeNodeLeft(qn.target, nopt, oenv);
1228            if (qn.lower == 0 && isRepeatInfinite(qn.upper)) {
1229                if (oenv.mmd.max == 0 && qn.target.getType() == NodeType.CANY && qn.greedy) {
1230                    if (isMultiline(oenv.options)) {
1231                        opt.anchor.add(AnchorType.ANYCHAR_STAR_ML);
1232                    } else {
1233                        opt.anchor.add(AnchorType.ANYCHAR_STAR);
1234                    }
1235                }
1236            } else {
1237                if (qn.lower > 0) {
1238                    opt.copy(nopt);
1239                    if (nopt.exb.length > 0) {
1240                        if (nopt.exb.reachEnd) {
1241                            int i;
1242                            for (i = 2; i <= qn.lower && !opt.exb.isFull(); i++) {
1243                                opt.exb.concat(nopt.exb);
1244                            }
1245                            if (i < qn.lower) {
1246                                opt.exb.reachEnd = false;
1247                            }
1248                        }
1249                    }
1250                    if (qn.lower != qn.upper) {
1251                        opt.exb.reachEnd = false;
1252                        opt.exm.reachEnd = false;
1253                    }
1254                    if (qn.lower > 1) {
1255                        opt.exm.reachEnd = false;
1256                    }
1257
1258                }
1259            }
1260            final int min = MinMaxLen.distanceMultiply(nopt.length.min, qn.lower);
1261            int max;
1262            if (isRepeatInfinite(qn.upper)) {
1263                max = nopt.length.max > 0 ? MinMaxLen.INFINITE_DISTANCE : 0;
1264            } else {
1265                max = MinMaxLen.distanceMultiply(nopt.length.max, qn.upper);
1266            }
1267            opt.length.set(min, max);
1268            break;
1269        }
1270
1271        case NodeType.ENCLOSE: {
1272            final EncloseNode en = (EncloseNode)node;
1273            switch (en.type) {
1274            case EncloseType.OPTION:
1275                final int save = oenv.options;
1276                oenv.options = en.option;
1277                optimizeNodeLeft(en.target, opt, oenv);
1278                oenv.options = save;
1279                break;
1280
1281            case EncloseType.MEMORY:
1282                if (++en.optCount > MAX_NODE_OPT_INFO_REF_COUNT) {
1283                    int min = 0;
1284                    int max = MinMaxLen.INFINITE_DISTANCE;
1285                    if (en.isMinFixed()) min = en.minLength;
1286                    if (en.isMaxFixed()) max = en.maxLength;
1287                    opt.length.set(min, max);
1288                } else { // USE_SUBEXP_CALL
1289                    optimizeNodeLeft(en.target, opt, oenv);
1290                    if (opt.anchor.isSet(AnchorType.ANYCHAR_STAR_MASK)) {
1291                        if (bsAt(oenv.scanEnv.backrefedMem, en.regNum)) {
1292                            opt.anchor.remove(AnchorType.ANYCHAR_STAR_MASK);
1293                        }
1294                    }
1295                }
1296                break;
1297
1298            case EncloseType.STOP_BACKTRACK:
1299                optimizeNodeLeft(en.target, opt, oenv);
1300                break;
1301            } // inner switch
1302            break;
1303        }
1304
1305        default:
1306            throw new InternalException(ERR_PARSER_BUG);
1307        } // switch
1308    }
1309
1310    protected final void setOptimizedInfoFromTree(final Node node) {
1311        final NodeOptInfo opt = new NodeOptInfo();
1312        final OptEnvironment oenv = new OptEnvironment();
1313
1314        oenv.options = regex.options;
1315        oenv.caseFoldFlag = regex.caseFoldFlag;
1316        oenv.scanEnv = env;
1317        oenv.mmd.clear(); // ??
1318
1319        optimizeNodeLeft(node, opt, oenv);
1320
1321        regex.anchor = opt.anchor.leftAnchor & (AnchorType.BEGIN_BUF |
1322                                                AnchorType.BEGIN_POSITION |
1323                                                AnchorType.ANYCHAR_STAR |
1324                                                AnchorType.ANYCHAR_STAR_ML);
1325
1326        regex.anchor |= opt.anchor.rightAnchor & (AnchorType.END_BUF |
1327                                                  AnchorType.SEMI_END_BUF);
1328
1329        if ((regex.anchor & (AnchorType.END_BUF | AnchorType.SEMI_END_BUF)) != 0) {
1330            regex.anchorDmin = opt.length.min;
1331            regex.anchorDmax = opt.length.max;
1332        }
1333
1334        if (opt.exb.length > 0 || opt.exm.length > 0) {
1335            opt.exb.select(opt.exm);
1336            if (opt.map.value > 0 && opt.exb.compare(opt.map) > 0) {
1337                // !goto set_map;!
1338                regex.setOptimizeMapInfo(opt.map);
1339                regex.setSubAnchor(opt.map.anchor);
1340            } else {
1341                regex.setExactInfo(opt.exb);
1342                regex.setSubAnchor(opt.exb.anchor);
1343            }
1344        } else if (opt.map.value > 0) {
1345            // !set_map:!
1346            regex.setOptimizeMapInfo(opt.map);
1347            regex.setSubAnchor(opt.map.anchor);
1348        } else {
1349            regex.subAnchor |= opt.anchor.leftAnchor & AnchorType.BEGIN_LINE;
1350            if (opt.length.max == 0) regex.subAnchor |= opt.anchor.rightAnchor & AnchorType.END_LINE;
1351        }
1352
1353        if (Config.DEBUG_COMPILE || Config.DEBUG_MATCH) {
1354            Config.log.println(regex.optimizeInfoToString());
1355        }
1356    }
1357}
1358