Parser.java revision 953:221a84ef44c0
1/* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20package jdk.nashorn.internal.runtime.regexp.joni; 21 22import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnOff; 23import static jdk.nashorn.internal.runtime.regexp.joni.Option.isDontCaptureGroup; 24import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase; 25 26import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode; 27import jdk.nashorn.internal.runtime.regexp.joni.ast.AnyCharNode; 28import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode; 29import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; 30import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode.CCStateArg; 31import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode; 32import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode; 33import jdk.nashorn.internal.runtime.regexp.joni.ast.Node; 34import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode; 35import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode; 36import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; 37import jdk.nashorn.internal.runtime.regexp.joni.constants.CCSTATE; 38import jdk.nashorn.internal.runtime.regexp.joni.constants.CCVALTYPE; 39import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType; 40import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType; 41import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType; 42import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType; 43import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; 44import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException; 45import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; 46 47class Parser extends Lexer { 48 49 protected final Regex regex; 50 protected Node root; 51 52 protected int returnCode; // return code used by parser methods (they itself return parsed nodes) 53 // this approach will not affect recursive calls 54 55 protected Parser(final ScanEnvironment env, final char[] chars, final int p, final int end) { 56 super(env, chars, p, end); 57 regex = env.reg; 58 } 59 60 // onig_parse_make_tree 61 protected final Node parse() { 62 root = parseRegexp(); 63 regex.numMem = env.numMem; 64 return root; 65 } 66 67 private boolean codeExistCheck(final int code, final boolean ignoreEscaped) { 68 mark(); 69 70 boolean inEsc = false; 71 while (left()) { 72 if (ignoreEscaped && inEsc) { 73 inEsc = false; 74 } else { 75 fetch(); 76 if (c == code) { 77 restore(); 78 return true; 79 } 80 if (c == syntax.metaCharTable.esc) inEsc = true; 81 } 82 } 83 84 restore(); 85 return false; 86 } 87 88 private CClassNode parseCharClass() { 89 fetchTokenInCC(); 90 91 final boolean neg; 92 if (token.type == TokenType.CHAR && token.getC() == '^' && !token.escaped) { 93 neg = true; 94 fetchTokenInCC(); 95 } else { 96 neg = false; 97 } 98 99 if (token.type == TokenType.CC_CLOSE) { 100 if (!codeExistCheck(']', true)) { 101 throw new SyntaxException(ERR_EMPTY_CHAR_CLASS); 102 } 103 env.ccEscWarn("]"); 104 token.type = TokenType.CHAR; /* allow []...] */ 105 } 106 107 CClassNode cc = new CClassNode(); 108 CClassNode prevCC = null; 109 CClassNode workCC = null; 110 111 final CCStateArg arg = new CCStateArg(); 112 113 boolean andStart = false; 114 arg.state = CCSTATE.START; 115 116 while (token.type != TokenType.CC_CLOSE) { 117 boolean fetched = false; 118 119 switch (token.type) { 120 121 case CHAR: 122 if (token.getC() > 0xff) { 123 arg.inType = CCVALTYPE.CODE_POINT; 124 } else { 125 arg.inType = CCVALTYPE.SB; // sb_char: 126 } 127 arg.v = token.getC(); 128 arg.vIsRaw = false; 129 parseCharClassValEntry2(cc, arg); // goto val_entry2 130 break; 131 132 case RAW_BYTE: 133 arg.v = token.getC(); 134 arg.inType = CCVALTYPE.SB; // raw_single: 135 arg.vIsRaw = true; 136 parseCharClassValEntry2(cc, arg); // goto val_entry2 137 break; 138 139 case CODE_POINT: 140 arg.v = token.getCode(); 141 arg.vIsRaw = true; 142 parseCharClassValEntry(cc, arg); // val_entry:, val_entry2 143 break; 144 145 case CHAR_TYPE: 146 cc.addCType(token.getPropCType(), token.getPropNot(), env, this); 147 cc.nextStateClass(arg, env); // next_class: 148 break; 149 150 case CC_RANGE: 151 if (arg.state == CCSTATE.VALUE) { 152 fetchTokenInCC(); 153 fetched = true; 154 if (token.type == TokenType.CC_CLOSE) { /* allow [x-] */ 155 parseCharClassRangeEndVal(cc, arg); // range_end_val:, goto val_entry; 156 break; 157 } else if (token.type == TokenType.CC_AND) { 158 env.ccEscWarn("-"); 159 parseCharClassRangeEndVal(cc, arg); // goto range_end_val 160 break; 161 } 162 arg.state = CCSTATE.RANGE; 163 } else if (arg.state == CCSTATE.START) { 164 arg.v = token.getC(); /* [-xa] is allowed */ 165 arg.vIsRaw = false; 166 fetchTokenInCC(); 167 fetched = true; 168 if (token.type == TokenType.CC_RANGE || andStart) env.ccEscWarn("-"); /* [--x] or [a&&-x] is warned. */ 169 parseCharClassValEntry(cc, arg); // goto val_entry 170 break; 171 } else if (arg.state == CCSTATE.RANGE) { 172 env.ccEscWarn("-"); 173 parseCharClassSbChar(cc, arg); // goto sb_char /* [!--x] is allowed */ 174 break; 175 } else { /* CCS_COMPLETE */ 176 fetchTokenInCC(); 177 fetched = true; 178 if (token.type == TokenType.CC_CLOSE) { /* allow [a-b-] */ 179 parseCharClassRangeEndVal(cc, arg); // goto range_end_val 180 break; 181 } else if (token.type == TokenType.CC_AND) { 182 env.ccEscWarn("-"); 183 parseCharClassRangeEndVal(cc, arg); // goto range_end_val 184 break; 185 } 186 187 if (syntax.allowDoubleRangeOpInCC()) { 188 env.ccEscWarn("-"); 189 arg.inType = CCVALTYPE.SB; 190 arg.v = '-'; 191 arg.vIsRaw = false; 192 parseCharClassValEntry2(cc, arg); // goto val_entry2 /* [0-9-a] is allowed as [0-9\-a] */ 193 break; 194 } 195 throw new SyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS); 196 } 197 break; 198 199 case CC_CC_OPEN: /* [ */ 200 final CClassNode acc = parseCharClass(); 201 cc.or(acc); 202 break; 203 204 case CC_AND: /* && */ 205 if (arg.state == CCSTATE.VALUE) { 206 arg.v = 0; // ??? safe v ? 207 arg.vIsRaw = false; 208 cc.nextStateValue(arg, env); 209 } 210 /* initialize local variables */ 211 andStart = true; 212 arg.state = CCSTATE.START; 213 if (prevCC != null) { 214 prevCC.and(cc); 215 } else { 216 prevCC = cc; 217 if (workCC == null) workCC = new CClassNode(); 218 cc = workCC; 219 } 220 cc.clear(); 221 break; 222 223 case EOT: 224 throw new SyntaxException(ERR_PREMATURE_END_OF_CHAR_CLASS); 225 226 default: 227 throw new InternalException(ERR_PARSER_BUG); 228 } // switch 229 230 if (!fetched) fetchTokenInCC(); 231 232 } // while 233 234 if (arg.state == CCSTATE.VALUE) { 235 arg.v = 0; // ??? safe v ? 236 arg.vIsRaw = false; 237 cc.nextStateValue(arg, env); 238 } 239 240 if (prevCC != null) { 241 prevCC.and(cc); 242 cc = prevCC; 243 } 244 245 if (neg) { 246 cc.setNot(); 247 } else { 248 cc.clearNot(); 249 } 250 251 if (cc.isNot() && syntax.notNewlineInNegativeCC()) { 252 if (!cc.isEmpty()) { 253 final int NEW_LINE = 0x0a; 254 if (EncodingHelper.isNewLine(NEW_LINE)) { 255 cc.bs.set(NEW_LINE); 256 } 257 } 258 } 259 260 return cc; 261 } 262 263 private void parseCharClassSbChar(final CClassNode cc, final CCStateArg arg) { 264 arg.inType = CCVALTYPE.SB; 265 arg.v = token.getC(); 266 arg.vIsRaw = false; 267 parseCharClassValEntry2(cc, arg); // goto val_entry2 268 } 269 270 private void parseCharClassRangeEndVal(final CClassNode cc, final CCStateArg arg) { 271 arg.v = '-'; 272 arg.vIsRaw = false; 273 parseCharClassValEntry(cc, arg); // goto val_entry 274 } 275 276 private void parseCharClassValEntry(final CClassNode cc, final CCStateArg arg) { 277 arg.inType = arg.v <= 0xff ? CCVALTYPE.SB : CCVALTYPE.CODE_POINT; 278 parseCharClassValEntry2(cc, arg); // val_entry2: 279 } 280 281 private void parseCharClassValEntry2(final CClassNode cc, final CCStateArg arg) { 282 cc.nextStateValue(arg, env); 283 } 284 285 private Node parseEnclose(final TokenType term) { 286 Node node = null; 287 288 if (!left()) { 289 throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS); 290 } 291 292 int option = env.option; 293 294 if (peekIs('?') && syntax.op2QMarkGroupEffect()) { 295 inc(); 296 if (!left()) { 297 throw new SyntaxException(ERR_END_PATTERN_IN_GROUP); 298 } 299 300 fetch(); 301 switch(c) { 302 case ':': /* (?:...) grouping only */ 303 fetchToken(); // group: 304 node = parseSubExp(term); 305 returnCode = 1; /* group */ 306 return node; 307 case '=': 308 node = new AnchorNode(AnchorType.PREC_READ); 309 break; 310 case '!': /* preceding read */ 311 node = new AnchorNode(AnchorType.PREC_READ_NOT); 312 break; 313 case '>': /* (?>...) stop backtrack */ 314 node = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose 315 break; 316 case '\'': 317 break; 318 case '<': /* look behind (?<=...), (?<!...) */ 319 fetch(); 320 if (c == '=') { 321 node = new AnchorNode(AnchorType.LOOK_BEHIND); 322 } else if (c == '!') { 323 node = new AnchorNode(AnchorType.LOOK_BEHIND_NOT); 324 } else { 325 throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION); 326 } 327 break; 328 case '@': 329 if (syntax.op2AtMarkCaptureHistory()) { 330 final EncloseNode en = new EncloseNode(); // node_new_enclose_memory 331 final int num = env.addMemEntry(); 332 if (num >= BitStatus.BIT_STATUS_BITS_NUM) { 333 throw new ValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY); 334 } 335 en.regNum = num; 336 node = en; 337 } else { 338 throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION); 339 } 340 break; 341 342 // case 'p': #ifdef USE_POSIXLINE_OPTION 343 case '-': 344 case 'i': 345 case 'm': 346 case 's': 347 case 'x': 348 boolean neg = false; 349 while (true) { 350 switch(c) { 351 case ':': 352 case ')': 353 break; 354 case '-': 355 neg = true; 356 break; 357 case 'x': 358 option = bsOnOff(option, Option.EXTEND, neg); 359 break; 360 case 'i': 361 option = bsOnOff(option, Option.IGNORECASE, neg); 362 break; 363 case 's': 364 if (syntax.op2OptionPerl()) { 365 option = bsOnOff(option, Option.MULTILINE, neg); 366 } else { 367 throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION); 368 } 369 break; 370 case 'm': 371 if (syntax.op2OptionPerl()) { 372 option = bsOnOff(option, Option.SINGLELINE, !neg); 373 } else if (syntax.op2OptionRuby()) { 374 option = bsOnOff(option, Option.MULTILINE, neg); 375 } else { 376 throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION); 377 } 378 break; 379 // case 'p': #ifdef USE_POSIXLINE_OPTION // not defined 380 // option = bsOnOff(option, Option.MULTILINE|Option.SINGLELINE, neg); 381 // break; 382 383 default: 384 throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION); 385 } // switch 386 387 if (c == ')') { 388 final EncloseNode en = new EncloseNode(option, 0); // node_new_option 389 node = en; 390 returnCode = 2; /* option only */ 391 return node; 392 } else if (c == ':') { 393 final int prev = env.option; 394 env.option = option; 395 fetchToken(); 396 final Node target = parseSubExp(term); 397 env.option = prev; 398 final EncloseNode en = new EncloseNode(option, 0); // node_new_option 399 en.setTarget(target); 400 node = en; 401 returnCode = 0; 402 return node; 403 } 404 if (!left()) { 405 throw new SyntaxException(ERR_END_PATTERN_IN_GROUP); 406 } 407 fetch(); 408 } // while 409 410 default: 411 throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION); 412 } // switch 413 414 } else { 415 if (isDontCaptureGroup(env.option)) { 416 fetchToken(); // goto group 417 node = parseSubExp(term); 418 returnCode = 1; /* group */ 419 return node; 420 } 421 final EncloseNode en = new EncloseNode(); // node_new_enclose_memory 422 final int num = env.addMemEntry(); 423 en.regNum = num; 424 node = en; 425 } 426 427 fetchToken(); 428 final Node target = parseSubExp(term); 429 430 if (node.getType() == NodeType.ANCHOR) { 431 final AnchorNode an = (AnchorNode) node; 432 an.setTarget(target); 433 } else { 434 final EncloseNode en = (EncloseNode)node; 435 en.setTarget(target); 436 if (en.type == EncloseType.MEMORY) { 437 /* Don't move this to previous of parse_subexp() */ 438 env.setMemNode(en.regNum, node); 439 } 440 } 441 returnCode = 0; 442 return node; // ?? 443 } 444 445 private Node parseExp(final TokenType term) { 446 if (token.type == term) return StringNode.EMPTY; // goto end_of_token 447 448 Node node = null; 449 boolean group = false; 450 451 switch(token.type) { 452 case ALT: 453 case EOT: 454 return StringNode.EMPTY; // end_of_token:, node_new_empty 455 456 case SUBEXP_OPEN: 457 node = parseEnclose(TokenType.SUBEXP_CLOSE); 458 if (returnCode == 1) { 459 group = true; 460 } else if (returnCode == 2) { /* option only */ 461 final int prev = env.option; 462 final EncloseNode en = (EncloseNode)node; 463 env.option = en.option; 464 fetchToken(); 465 final Node target = parseSubExp(term); 466 env.option = prev; 467 en.setTarget(target); 468 return node; 469 } 470 break; 471 case SUBEXP_CLOSE: 472 if (!syntax.allowUnmatchedCloseSubexp()) { 473 throw new SyntaxException(ERR_UNMATCHED_CLOSE_PARENTHESIS); 474 } 475 if (token.escaped) { 476 return parseExpTkRawByte(group); // goto tk_raw_byte 477 } else { 478 return parseExpTkByte(group); // goto tk_byte 479 } 480 case STRING: 481 return parseExpTkByte(group); // tk_byte: 482 483 case RAW_BYTE: 484 return parseExpTkRawByte(group); // tk_raw_byte: 485 case CODE_POINT: 486 final char[] buf = new char[] {(char)token.getCode()}; 487 // #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG ... // setRaw() #else 488 node = new StringNode(buf, 0, 1); 489 break; 490 491 case CHAR_TYPE: 492 switch(token.getPropCType()) { 493 case CharacterType.D: 494 case CharacterType.S: 495 case CharacterType.W: 496 if (Config.NON_UNICODE_SDW) { 497 final CClassNode cc = new CClassNode(); 498 cc.addCType(token.getPropCType(), false, env, this); 499 if (token.getPropNot()) cc.setNot(); 500 node = cc; 501 } 502 break; 503 504 case CharacterType.SPACE: 505 case CharacterType.DIGIT: 506 case CharacterType.XDIGIT: 507 // #ifdef USE_SHARED_CCLASS_TABLE ... #endif 508 final CClassNode ccn = new CClassNode(); 509 ccn.addCType(token.getPropCType(), false, env, this); 510 if (token.getPropNot()) ccn.setNot(); 511 node = ccn; 512 break; 513 514 default: 515 throw new InternalException(ERR_PARSER_BUG); 516 517 } // inner switch 518 break; 519 520 case CC_CC_OPEN: 521 final CClassNode cc = parseCharClass(); 522 node = cc; 523 if (isIgnoreCase(env.option)) { 524 final ApplyCaseFoldArg arg = new ApplyCaseFoldArg(env, cc); 525 EncodingHelper.applyAllCaseFold(env.caseFoldFlag, ApplyCaseFold.INSTANCE, arg); 526 527 if (arg.altRoot != null) { 528 node = ConsAltNode.newAltNode(node, arg.altRoot); 529 } 530 } 531 break; 532 533 case ANYCHAR: 534 node = new AnyCharNode(); 535 break; 536 537 case ANYCHAR_ANYTIME: 538 node = new AnyCharNode(); 539 final QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false); 540 qn.setTarget(node); 541 node = qn; 542 break; 543 544 case BACKREF: 545 final int backRef = token.getBackrefRef(); 546 node = new BackRefNode(backRef, env); 547 break; 548 549 case ANCHOR: 550 node = new AnchorNode(token.getAnchor()); // possible bug in oniguruma 551 break; 552 553 case OP_REPEAT: 554 case INTERVAL: 555 if (syntax.contextIndepRepeatOps()) { 556 if (syntax.contextInvalidRepeatOps()) { 557 throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); 558 } else { 559 node = StringNode.EMPTY; // node_new_empty 560 } 561 } else { 562 return parseExpTkByte(group); // goto tk_byte 563 } 564 break; 565 566 default: 567 throw new InternalException(ERR_PARSER_BUG); 568 } //switch 569 570 //targetp = node; 571 572 fetchToken(); // re_entry: 573 574 return parseExpRepeat(node, group); // repeat: 575 } 576 577 private Node parseExpTkByte(final boolean group) { 578 final StringNode node = new StringNode(chars, token.backP, p); // tk_byte: 579 while (true) { 580 fetchToken(); 581 if (token.type != TokenType.STRING) break; 582 583 if (token.backP == node.end) { 584 node.end = p; // non escaped character, remain shared, just increase shared range 585 } else { 586 node.cat(chars, token.backP, p); // non continuous string stream, need to COW 587 } 588 } 589 // targetp = node; 590 return parseExpRepeat(node, group); // string_end:, goto repeat 591 } 592 593 private Node parseExpTkRawByte(final boolean group) { 594 // tk_raw_byte: 595 596 // important: we don't use 0xff mask here neither in the compiler 597 // (in the template string) so we won't have to mask target 598 // strings when comparing against them in the matcher 599 final StringNode node = new StringNode((char)token.getC()); 600 node.setRaw(); 601 602 fetchToken(); 603 node.clearRaw(); 604 // !goto string_end;! 605 return parseExpRepeat(node, group); 606 } 607 608 private Node parseExpRepeat(Node target, final boolean group) { 609 while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat: 610 if (target.isInvalidQuantifier()) { 611 throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID); 612 } 613 614 final QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(), 615 token.getRepeatUpper(), 616 token.type == TokenType.INTERVAL); 617 618 qtfr.greedy = token.getRepeatGreedy(); 619 final int ret = qtfr.setQuantifier(target, group, env, chars, getBegin(), getEnd()); 620 Node qn = qtfr; 621 622 if (token.getRepeatPossessive()) { 623 final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose 624 en.setTarget(qn); 625 qn = en; 626 } 627 628 if (ret == 0) { 629 target = qn; 630 } else if (ret == 2) { /* split case: /abc+/ */ 631 target = ConsAltNode.newListNode(target, null); 632 final ConsAltNode tmp = ((ConsAltNode)target).setCdr(ConsAltNode.newListNode(qn, null)); 633 634 fetchToken(); 635 return parseExpRepeatForCar(target, tmp, group); 636 } 637 fetchToken(); // goto re_entry 638 } 639 return target; 640 } 641 642 private Node parseExpRepeatForCar(final Node top, final ConsAltNode target, final boolean group) { 643 while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat: 644 if (target.car.isInvalidQuantifier()) { 645 throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID); 646 } 647 648 final QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(), 649 token.getRepeatUpper(), 650 token.type == TokenType.INTERVAL); 651 652 qtfr.greedy = token.getRepeatGreedy(); 653 final int ret = qtfr.setQuantifier(target.car, group, env, chars, getBegin(), getEnd()); 654 Node qn = qtfr; 655 656 if (token.getRepeatPossessive()) { 657 final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose 658 en.setTarget(qn); 659 qn = en; 660 } 661 662 if (ret == 0) { 663 target.setCar(qn); 664 } else if (ret == 2) { /* split case: /abc+/ */ 665 assert false; 666 } 667 fetchToken(); // goto re_entry 668 } 669 return top; 670 } 671 672 private Node parseBranch(final TokenType term) { 673 Node node = parseExp(term); 674 675 if (token.type == TokenType.EOT || token.type == term || token.type == TokenType.ALT) { 676 return node; 677 } else { 678 final ConsAltNode top = ConsAltNode.newListNode(node, null); 679 ConsAltNode t = top; 680 681 while (token.type != TokenType.EOT && token.type != term && token.type != TokenType.ALT) { 682 node = parseExp(term); 683 if (node.getType() == NodeType.LIST) { 684 t.setCdr((ConsAltNode)node); 685 while (((ConsAltNode)node).cdr != null ) node = ((ConsAltNode)node).cdr; 686 687 t = ((ConsAltNode)node); 688 } else { 689 t.setCdr(ConsAltNode.newListNode(node, null)); 690 t = t.cdr; 691 } 692 } 693 return top; 694 } 695 } 696 697 /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */ 698 private Node parseSubExp(final TokenType term) { 699 Node node = parseBranch(term); 700 701 if (token.type == term) { 702 return node; 703 } else if (token.type == TokenType.ALT) { 704 final ConsAltNode top = ConsAltNode.newAltNode(node, null); 705 ConsAltNode t = top; 706 while (token.type == TokenType.ALT) { 707 fetchToken(); 708 node = parseBranch(term); 709 710 t.setCdr(ConsAltNode.newAltNode(node, null)); 711 t = t.cdr; 712 } 713 714 if (token.type != term) parseSubExpError(term); 715 return top; 716 } else { 717 parseSubExpError(term); 718 return null; //not reached 719 } 720 } 721 722 private void parseSubExpError(final TokenType term) { 723 if (term == TokenType.SUBEXP_CLOSE) { 724 throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS); 725 } else { 726 throw new InternalException(ERR_PARSER_BUG); 727 } 728 } 729 730 private Node parseRegexp() { 731 fetchToken(); 732 return parseSubExp(TokenType.EOT); 733 } 734} 735