Parser.java revision 953:221a84ef44c0
1/*
2 * Permission is hereby granted, free of charge, to any person obtaining a copy of
3 * this software and associated documentation files (the "Software"), to deal in
4 * the Software without restriction, including without limitation the rights to
5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6 * of the Software, and to permit persons to whom the Software is furnished to do
7 * so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice shall be included in all
10 * copies or substantial portions of the Software.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18 * SOFTWARE.
19 */
20package jdk.nashorn.internal.runtime.regexp.joni;
21
22import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnOff;
23import static jdk.nashorn.internal.runtime.regexp.joni.Option.isDontCaptureGroup;
24import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase;
25
26import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode;
27import jdk.nashorn.internal.runtime.regexp.joni.ast.AnyCharNode;
28import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode;
29import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
30import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode.CCStateArg;
31import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode;
32import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode;
33import jdk.nashorn.internal.runtime.regexp.joni.ast.Node;
34import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode;
35import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode;
36import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
37import jdk.nashorn.internal.runtime.regexp.joni.constants.CCSTATE;
38import jdk.nashorn.internal.runtime.regexp.joni.constants.CCVALTYPE;
39import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType;
40import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
41import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType;
42import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
43import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
44import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
45import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
46
47class Parser extends Lexer {
48
49    protected final Regex regex;
50    protected Node root;
51
52    protected int returnCode; // return code used by parser methods (they itself return parsed nodes)
53                              // this approach will not affect recursive calls
54
55    protected Parser(final ScanEnvironment env, final char[] chars, final int p, final int end) {
56        super(env, chars, p, end);
57        regex = env.reg;
58    }
59
60    // onig_parse_make_tree
61    protected final Node parse() {
62        root = parseRegexp();
63        regex.numMem = env.numMem;
64        return root;
65    }
66
67    private boolean codeExistCheck(final int code, final boolean ignoreEscaped) {
68        mark();
69
70        boolean inEsc = false;
71        while (left()) {
72            if (ignoreEscaped && inEsc) {
73                inEsc = false;
74            } else {
75                fetch();
76                if (c == code) {
77                    restore();
78                    return true;
79                }
80                if (c == syntax.metaCharTable.esc) inEsc = true;
81            }
82        }
83
84        restore();
85        return false;
86    }
87
88    private CClassNode parseCharClass() {
89        fetchTokenInCC();
90
91        final boolean neg;
92        if (token.type == TokenType.CHAR && token.getC() == '^' && !token.escaped) {
93            neg = true;
94            fetchTokenInCC();
95        } else {
96            neg = false;
97        }
98
99        if (token.type == TokenType.CC_CLOSE) {
100            if (!codeExistCheck(']', true)) {
101                throw new SyntaxException(ERR_EMPTY_CHAR_CLASS);
102            }
103            env.ccEscWarn("]");
104            token.type = TokenType.CHAR; /* allow []...] */
105        }
106
107        CClassNode cc = new CClassNode();
108        CClassNode prevCC = null;
109        CClassNode workCC = null;
110
111        final CCStateArg arg = new CCStateArg();
112
113        boolean andStart = false;
114        arg.state = CCSTATE.START;
115
116        while (token.type != TokenType.CC_CLOSE) {
117            boolean fetched = false;
118
119            switch (token.type) {
120
121            case CHAR:
122                if (token.getC() > 0xff) {
123                    arg.inType = CCVALTYPE.CODE_POINT;
124                } else {
125                    arg.inType = CCVALTYPE.SB; // sb_char:
126                }
127                arg.v = token.getC();
128                arg.vIsRaw = false;
129                parseCharClassValEntry2(cc, arg); // goto val_entry2
130                break;
131
132            case RAW_BYTE:
133                arg.v = token.getC();
134                arg.inType = CCVALTYPE.SB; // raw_single:
135                arg.vIsRaw = true;
136                parseCharClassValEntry2(cc, arg); // goto val_entry2
137                break;
138
139            case CODE_POINT:
140                arg.v = token.getCode();
141                arg.vIsRaw = true;
142                parseCharClassValEntry(cc, arg); // val_entry:, val_entry2
143                break;
144
145            case CHAR_TYPE:
146                cc.addCType(token.getPropCType(), token.getPropNot(), env, this);
147                cc.nextStateClass(arg, env); // next_class:
148                break;
149
150            case CC_RANGE:
151                if (arg.state == CCSTATE.VALUE) {
152                    fetchTokenInCC();
153                    fetched = true;
154                    if (token.type == TokenType.CC_CLOSE) { /* allow [x-] */
155                        parseCharClassRangeEndVal(cc, arg); // range_end_val:, goto val_entry;
156                        break;
157                    } else if (token.type == TokenType.CC_AND) {
158                        env.ccEscWarn("-");
159                        parseCharClassRangeEndVal(cc, arg); // goto range_end_val
160                        break;
161                    }
162                    arg.state = CCSTATE.RANGE;
163                } else if (arg.state == CCSTATE.START) {
164                    arg.v = token.getC(); /* [-xa] is allowed */
165                    arg.vIsRaw = false;
166                    fetchTokenInCC();
167                    fetched = true;
168                    if (token.type == TokenType.CC_RANGE || andStart) env.ccEscWarn("-"); /* [--x] or [a&&-x] is warned. */
169                    parseCharClassValEntry(cc, arg); // goto val_entry
170                    break;
171                } else if (arg.state == CCSTATE.RANGE) {
172                    env.ccEscWarn("-");
173                    parseCharClassSbChar(cc, arg); // goto sb_char /* [!--x] is allowed */
174                    break;
175                } else { /* CCS_COMPLETE */
176                    fetchTokenInCC();
177                    fetched = true;
178                    if (token.type == TokenType.CC_CLOSE) { /* allow [a-b-] */
179                        parseCharClassRangeEndVal(cc, arg); // goto range_end_val
180                        break;
181                    } else if (token.type == TokenType.CC_AND) {
182                        env.ccEscWarn("-");
183                        parseCharClassRangeEndVal(cc, arg); // goto range_end_val
184                        break;
185                    }
186
187                    if (syntax.allowDoubleRangeOpInCC()) {
188                        env.ccEscWarn("-");
189                        arg.inType = CCVALTYPE.SB;
190                        arg.v = '-';
191                        arg.vIsRaw = false;
192                        parseCharClassValEntry2(cc, arg); // goto val_entry2 /* [0-9-a] is allowed as [0-9\-a] */
193                        break;
194                    }
195                    throw new SyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS);
196                }
197                break;
198
199            case CC_CC_OPEN: /* [ */
200                final CClassNode acc = parseCharClass();
201                cc.or(acc);
202                break;
203
204            case CC_AND:     /* && */
205                if (arg.state == CCSTATE.VALUE) {
206                    arg.v = 0; // ??? safe v ?
207                    arg.vIsRaw = false;
208                    cc.nextStateValue(arg, env);
209                }
210                /* initialize local variables */
211                andStart = true;
212                arg.state = CCSTATE.START;
213                if (prevCC != null) {
214                    prevCC.and(cc);
215                } else {
216                    prevCC = cc;
217                    if (workCC == null) workCC = new CClassNode();
218                    cc = workCC;
219                }
220                cc.clear();
221                break;
222
223            case EOT:
224                throw new SyntaxException(ERR_PREMATURE_END_OF_CHAR_CLASS);
225
226            default:
227                throw new InternalException(ERR_PARSER_BUG);
228            } // switch
229
230            if (!fetched) fetchTokenInCC();
231
232        } // while
233
234        if (arg.state == CCSTATE.VALUE) {
235            arg.v = 0; // ??? safe v ?
236            arg.vIsRaw = false;
237            cc.nextStateValue(arg, env);
238        }
239
240        if (prevCC != null) {
241            prevCC.and(cc);
242            cc = prevCC;
243        }
244
245        if (neg) {
246            cc.setNot();
247        } else {
248            cc.clearNot();
249        }
250
251        if (cc.isNot() && syntax.notNewlineInNegativeCC()) {
252            if (!cc.isEmpty()) {
253                final int NEW_LINE = 0x0a;
254                if (EncodingHelper.isNewLine(NEW_LINE)) {
255                    cc.bs.set(NEW_LINE);
256                }
257            }
258        }
259
260        return cc;
261    }
262
263    private void parseCharClassSbChar(final CClassNode cc, final CCStateArg arg) {
264        arg.inType = CCVALTYPE.SB;
265        arg.v = token.getC();
266        arg.vIsRaw = false;
267        parseCharClassValEntry2(cc, arg); // goto val_entry2
268    }
269
270    private void parseCharClassRangeEndVal(final CClassNode cc, final CCStateArg arg) {
271        arg.v = '-';
272        arg.vIsRaw = false;
273        parseCharClassValEntry(cc, arg); // goto val_entry
274    }
275
276    private void parseCharClassValEntry(final CClassNode cc, final CCStateArg arg) {
277        arg.inType = arg.v <= 0xff ? CCVALTYPE.SB : CCVALTYPE.CODE_POINT;
278        parseCharClassValEntry2(cc, arg); // val_entry2:
279    }
280
281    private void parseCharClassValEntry2(final CClassNode cc, final CCStateArg arg) {
282        cc.nextStateValue(arg, env);
283    }
284
285    private Node parseEnclose(final TokenType term) {
286        Node node = null;
287
288        if (!left()) {
289            throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
290        }
291
292        int option = env.option;
293
294        if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
295            inc();
296            if (!left()) {
297                throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
298            }
299
300            fetch();
301            switch(c) {
302            case ':':  /* (?:...) grouping only */
303                fetchToken(); // group:
304                node = parseSubExp(term);
305                returnCode = 1; /* group */
306                return node;
307            case '=':
308                node = new AnchorNode(AnchorType.PREC_READ);
309                break;
310            case '!':  /*         preceding read */
311                node = new AnchorNode(AnchorType.PREC_READ_NOT);
312                break;
313            case '>':  /* (?>...) stop backtrack */
314                node = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
315                break;
316            case '\'':
317                break;
318            case '<':  /* look behind (?<=...), (?<!...) */
319                fetch();
320                if (c == '=') {
321                    node = new AnchorNode(AnchorType.LOOK_BEHIND);
322                } else if (c == '!') {
323                    node = new AnchorNode(AnchorType.LOOK_BEHIND_NOT);
324                } else {
325                    throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
326                }
327                break;
328            case '@':
329                if (syntax.op2AtMarkCaptureHistory()) {
330                    final EncloseNode en = new EncloseNode(); // node_new_enclose_memory
331                    final int num = env.addMemEntry();
332                    if (num >= BitStatus.BIT_STATUS_BITS_NUM) {
333                        throw new ValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY);
334                    }
335                    en.regNum = num;
336                    node = en;
337                } else {
338                    throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
339                }
340                break;
341
342            // case 'p': #ifdef USE_POSIXLINE_OPTION
343            case '-':
344            case 'i':
345            case 'm':
346            case 's':
347            case 'x':
348                boolean neg = false;
349                while (true) {
350                    switch(c) {
351                    case ':':
352                    case ')':
353                        break;
354                    case '-':
355                        neg = true;
356                        break;
357                    case 'x':
358                        option = bsOnOff(option, Option.EXTEND, neg);
359                        break;
360                    case 'i':
361                        option = bsOnOff(option, Option.IGNORECASE, neg);
362                        break;
363                    case 's':
364                        if (syntax.op2OptionPerl()) {
365                            option = bsOnOff(option, Option.MULTILINE, neg);
366                        } else {
367                            throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
368                        }
369                        break;
370                    case 'm':
371                        if (syntax.op2OptionPerl()) {
372                            option = bsOnOff(option, Option.SINGLELINE, !neg);
373                        } else if (syntax.op2OptionRuby()) {
374                            option = bsOnOff(option, Option.MULTILINE, neg);
375                        } else {
376                            throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
377                        }
378                        break;
379                    // case 'p': #ifdef USE_POSIXLINE_OPTION // not defined
380                    // option = bsOnOff(option, Option.MULTILINE|Option.SINGLELINE, neg);
381                    // break;
382
383                    default:
384                        throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
385                    } // switch
386
387                    if (c == ')') {
388                        final EncloseNode en = new EncloseNode(option, 0); // node_new_option
389                        node = en;
390                        returnCode = 2; /* option only */
391                        return node;
392                    } else if (c == ':') {
393                        final int prev = env.option;
394                        env.option = option;
395                        fetchToken();
396                        final Node target = parseSubExp(term);
397                        env.option = prev;
398                        final EncloseNode en = new EncloseNode(option, 0); // node_new_option
399                        en.setTarget(target);
400                        node = en;
401                        returnCode = 0;
402                        return node;
403                    }
404                    if (!left()) {
405                        throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
406                    }
407                    fetch();
408                } // while
409
410            default:
411                throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
412            } // switch
413
414        } else {
415            if (isDontCaptureGroup(env.option)) {
416                fetchToken(); // goto group
417                node = parseSubExp(term);
418                returnCode = 1; /* group */
419                return node;
420            }
421            final EncloseNode en = new EncloseNode(); // node_new_enclose_memory
422            final int num = env.addMemEntry();
423            en.regNum = num;
424            node = en;
425        }
426
427        fetchToken();
428        final Node target = parseSubExp(term);
429
430        if (node.getType() == NodeType.ANCHOR) {
431            final AnchorNode an = (AnchorNode) node;
432            an.setTarget(target);
433        } else {
434            final EncloseNode en = (EncloseNode)node;
435            en.setTarget(target);
436            if (en.type == EncloseType.MEMORY) {
437                /* Don't move this to previous of parse_subexp() */
438                env.setMemNode(en.regNum, node);
439            }
440        }
441        returnCode = 0;
442        return node; // ??
443    }
444
445    private Node parseExp(final TokenType term) {
446        if (token.type == term) return StringNode.EMPTY; // goto end_of_token
447
448        Node node = null;
449        boolean group = false;
450
451        switch(token.type) {
452        case ALT:
453        case EOT:
454            return StringNode.EMPTY; // end_of_token:, node_new_empty
455
456        case SUBEXP_OPEN:
457            node = parseEnclose(TokenType.SUBEXP_CLOSE);
458            if (returnCode == 1) {
459                group = true;
460            } else if (returnCode == 2) { /* option only */
461                final int prev = env.option;
462                final EncloseNode en = (EncloseNode)node;
463                env.option = en.option;
464                fetchToken();
465                final Node target = parseSubExp(term);
466                env.option = prev;
467                en.setTarget(target);
468                return node;
469            }
470            break;
471        case SUBEXP_CLOSE:
472            if (!syntax.allowUnmatchedCloseSubexp()) {
473                throw new SyntaxException(ERR_UNMATCHED_CLOSE_PARENTHESIS);
474            }
475            if (token.escaped) {
476                return parseExpTkRawByte(group); // goto tk_raw_byte
477            } else {
478                return parseExpTkByte(group); // goto tk_byte
479            }
480        case STRING:
481            return parseExpTkByte(group); // tk_byte:
482
483        case RAW_BYTE:
484            return parseExpTkRawByte(group); // tk_raw_byte:
485        case CODE_POINT:
486            final char[] buf = new char[] {(char)token.getCode()};
487            // #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG ... // setRaw() #else
488            node = new StringNode(buf, 0, 1);
489            break;
490
491        case CHAR_TYPE:
492            switch(token.getPropCType()) {
493            case CharacterType.D:
494            case CharacterType.S:
495            case CharacterType.W:
496                if (Config.NON_UNICODE_SDW) {
497                    final CClassNode cc = new CClassNode();
498                    cc.addCType(token.getPropCType(), false, env, this);
499                    if (token.getPropNot()) cc.setNot();
500                    node = cc;
501                }
502                break;
503
504            case CharacterType.SPACE:
505            case CharacterType.DIGIT:
506            case CharacterType.XDIGIT:
507                // #ifdef USE_SHARED_CCLASS_TABLE ... #endif
508                final CClassNode ccn = new CClassNode();
509                ccn.addCType(token.getPropCType(), false, env, this);
510                if (token.getPropNot()) ccn.setNot();
511                node = ccn;
512                break;
513
514            default:
515                throw new InternalException(ERR_PARSER_BUG);
516
517            } // inner switch
518            break;
519
520        case CC_CC_OPEN:
521            final CClassNode cc = parseCharClass();
522            node = cc;
523            if (isIgnoreCase(env.option)) {
524                final ApplyCaseFoldArg arg = new ApplyCaseFoldArg(env, cc);
525                EncodingHelper.applyAllCaseFold(env.caseFoldFlag, ApplyCaseFold.INSTANCE, arg);
526
527                if (arg.altRoot != null) {
528                    node = ConsAltNode.newAltNode(node, arg.altRoot);
529                }
530            }
531            break;
532
533        case ANYCHAR:
534            node = new AnyCharNode();
535            break;
536
537        case ANYCHAR_ANYTIME:
538            node = new AnyCharNode();
539            final QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
540            qn.setTarget(node);
541            node = qn;
542            break;
543
544        case BACKREF:
545            final int backRef = token.getBackrefRef();
546            node = new BackRefNode(backRef, env);
547            break;
548
549        case ANCHOR:
550            node = new AnchorNode(token.getAnchor()); // possible bug in oniguruma
551            break;
552
553        case OP_REPEAT:
554        case INTERVAL:
555            if (syntax.contextIndepRepeatOps()) {
556                if (syntax.contextInvalidRepeatOps()) {
557                    throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED);
558                } else {
559                    node = StringNode.EMPTY; // node_new_empty
560                }
561            } else {
562                return parseExpTkByte(group); // goto tk_byte
563            }
564            break;
565
566        default:
567            throw new InternalException(ERR_PARSER_BUG);
568        } //switch
569
570        //targetp = node;
571
572        fetchToken(); // re_entry:
573
574        return parseExpRepeat(node, group); // repeat:
575    }
576
577    private Node parseExpTkByte(final boolean group) {
578        final StringNode node = new StringNode(chars, token.backP, p); // tk_byte:
579        while (true) {
580            fetchToken();
581            if (token.type != TokenType.STRING) break;
582
583            if (token.backP == node.end) {
584                node.end = p; // non escaped character, remain shared, just increase shared range
585            } else {
586                node.cat(chars, token.backP, p); // non continuous string stream, need to COW
587            }
588        }
589        // targetp = node;
590        return parseExpRepeat(node, group); // string_end:, goto repeat
591    }
592
593    private Node parseExpTkRawByte(final boolean group) {
594        // tk_raw_byte:
595
596        // important: we don't use 0xff mask here neither in the compiler
597        // (in the template string) so we won't have to mask target
598        // strings when comparing against them in the matcher
599        final StringNode node = new StringNode((char)token.getC());
600        node.setRaw();
601
602        fetchToken();
603        node.clearRaw();
604        // !goto string_end;!
605        return parseExpRepeat(node, group);
606    }
607
608    private Node parseExpRepeat(Node target, final boolean group) {
609        while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat:
610            if (target.isInvalidQuantifier()) {
611                throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
612            }
613
614            final QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
615                                                     token.getRepeatUpper(),
616                                                     token.type == TokenType.INTERVAL);
617
618            qtfr.greedy = token.getRepeatGreedy();
619            final int ret = qtfr.setQuantifier(target, group, env, chars, getBegin(), getEnd());
620            Node qn = qtfr;
621
622            if (token.getRepeatPossessive()) {
623                final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
624                en.setTarget(qn);
625                qn = en;
626            }
627
628            if (ret == 0) {
629                target = qn;
630            } else if (ret == 2) { /* split case: /abc+/ */
631                target = ConsAltNode.newListNode(target, null);
632                final ConsAltNode tmp = ((ConsAltNode)target).setCdr(ConsAltNode.newListNode(qn, null));
633
634                fetchToken();
635                return parseExpRepeatForCar(target, tmp, group);
636            }
637            fetchToken(); // goto re_entry
638        }
639        return target;
640    }
641
642    private Node parseExpRepeatForCar(final Node top, final ConsAltNode target, final boolean group) {
643        while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat:
644            if (target.car.isInvalidQuantifier()) {
645                throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
646            }
647
648            final QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
649                                                     token.getRepeatUpper(),
650                                                     token.type == TokenType.INTERVAL);
651
652            qtfr.greedy = token.getRepeatGreedy();
653            final int ret = qtfr.setQuantifier(target.car, group, env, chars, getBegin(), getEnd());
654            Node qn = qtfr;
655
656            if (token.getRepeatPossessive()) {
657                final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
658                en.setTarget(qn);
659                qn = en;
660            }
661
662            if (ret == 0) {
663                target.setCar(qn);
664            } else if (ret == 2) { /* split case: /abc+/ */
665                assert false;
666            }
667            fetchToken(); // goto re_entry
668        }
669        return top;
670    }
671
672    private Node parseBranch(final TokenType term) {
673        Node node = parseExp(term);
674
675        if (token.type == TokenType.EOT || token.type == term || token.type == TokenType.ALT) {
676            return node;
677        } else {
678            final ConsAltNode top = ConsAltNode.newListNode(node, null);
679            ConsAltNode t = top;
680
681            while (token.type != TokenType.EOT && token.type != term && token.type != TokenType.ALT) {
682                node = parseExp(term);
683                if (node.getType() == NodeType.LIST) {
684                    t.setCdr((ConsAltNode)node);
685                    while (((ConsAltNode)node).cdr != null ) node = ((ConsAltNode)node).cdr;
686
687                    t = ((ConsAltNode)node);
688                } else {
689                    t.setCdr(ConsAltNode.newListNode(node, null));
690                    t = t.cdr;
691                }
692            }
693            return top;
694        }
695    }
696
697    /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
698    private Node parseSubExp(final TokenType term) {
699        Node node = parseBranch(term);
700
701        if (token.type == term) {
702            return node;
703        } else if (token.type == TokenType.ALT) {
704            final ConsAltNode top = ConsAltNode.newAltNode(node, null);
705            ConsAltNode t = top;
706            while (token.type == TokenType.ALT) {
707                fetchToken();
708                node = parseBranch(term);
709
710                t.setCdr(ConsAltNode.newAltNode(node, null));
711                t = t.cdr;
712            }
713
714            if (token.type != term) parseSubExpError(term);
715            return top;
716        } else {
717            parseSubExpError(term);
718            return null; //not reached
719        }
720    }
721
722    private void parseSubExpError(final TokenType term) {
723        if (term == TokenType.SUBEXP_CLOSE) {
724            throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
725        } else {
726            throw new InternalException(ERR_PARSER_BUG);
727        }
728    }
729
730    private Node parseRegexp() {
731        fetchToken();
732        return parseSubExp(TokenType.EOT);
733    }
734}
735