1/* dfa - DFA construction routines */ 2 3/* Copyright (c) 1990 The Regents of the University of California. */ 4/* All rights reserved. */ 5 6/* This code is derived from software contributed to Berkeley by */ 7/* Vern Paxson. */ 8 9/* The United States Government has rights in this work pursuant */ 10/* to contract no. DE-AC03-76SF00098 between the United States */ 11/* Department of Energy and the University of California. */ 12 13/* Redistribution and use in source and binary forms, with or without */ 14/* modification, are permitted provided that the following conditions */ 15/* are met: */ 16 17/* 1. Redistributions of source code must retain the above copyright */ 18/* notice, this list of conditions and the following disclaimer. */ 19/* 2. Redistributions in binary form must reproduce the above copyright */ 20/* notice, this list of conditions and the following disclaimer in the */ 21/* documentation and/or other materials provided with the distribution. */ 22 23/* Neither the name of the University nor the names of its contributors */ 24/* may be used to endorse or promote products derived from this software */ 25/* without specific prior written permission. */ 26 27/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 28/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 29/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 30/* PURPOSE. */ 31 32#include "flexdef.h" 33#include "tables.h" 34 35/* declare functions that have forward references */ 36 37void dump_associated_rules PROTO ((FILE *, int)); 38void dump_transitions PROTO ((FILE *, int[])); 39void sympartition PROTO ((int[], int, int[], int[])); 40int symfollowset PROTO ((int[], int, int, int[])); 41 42 43/* check_for_backing_up - check a DFA state for backing up 44 * 45 * synopsis 46 * void check_for_backing_up( int ds, int state[numecs] ); 47 * 48 * ds is the number of the state to check and state[] is its out-transitions, 49 * indexed by equivalence class. 50 */ 51 52void check_for_backing_up (ds, state) 53 int ds; 54 int state[]; 55{ 56 if ((reject && !dfaacc[ds].dfaacc_set) || (!reject && !dfaacc[ds].dfaacc_state)) { /* state is non-accepting */ 57 ++num_backing_up; 58 59 if (backing_up_report) { 60 fprintf (backing_up_file, 61 _("State #%d is non-accepting -\n"), ds); 62 63 /* identify the state */ 64 dump_associated_rules (backing_up_file, ds); 65 66 /* Now identify it further using the out- and 67 * jam-transitions. 68 */ 69 dump_transitions (backing_up_file, state); 70 71 putc ('\n', backing_up_file); 72 } 73 } 74} 75 76 77/* check_trailing_context - check to see if NFA state set constitutes 78 * "dangerous" trailing context 79 * 80 * synopsis 81 * void check_trailing_context( int nfa_states[num_states+1], int num_states, 82 * int accset[nacc+1], int nacc ); 83 * 84 * NOTES 85 * Trailing context is "dangerous" if both the head and the trailing 86 * part are of variable size \and/ there's a DFA state which contains 87 * both an accepting state for the head part of the rule and NFA states 88 * which occur after the beginning of the trailing context. 89 * 90 * When such a rule is matched, it's impossible to tell if having been 91 * in the DFA state indicates the beginning of the trailing context or 92 * further-along scanning of the pattern. In these cases, a warning 93 * message is issued. 94 * 95 * nfa_states[1 .. num_states] is the list of NFA states in the DFA. 96 * accset[1 .. nacc] is the list of accepting numbers for the DFA state. 97 */ 98 99void check_trailing_context (nfa_states, num_states, accset, nacc) 100 int *nfa_states, num_states; 101 int *accset; 102 int nacc; 103{ 104 int i, j; 105 106 for (i = 1; i <= num_states; ++i) { 107 int ns = nfa_states[i]; 108 int type = state_type[ns]; 109 int ar = assoc_rule[ns]; 110 111 if (type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE) { /* do nothing */ 112 } 113 114 else if (type == STATE_TRAILING_CONTEXT) { 115 /* Potential trouble. Scan set of accepting numbers 116 * for the one marking the end of the "head". We 117 * assume that this looping will be fairly cheap 118 * since it's rare that an accepting number set 119 * is large. 120 */ 121 for (j = 1; j <= nacc; ++j) 122 if (accset[j] & YY_TRAILING_HEAD_MASK) { 123 line_warning (_ 124 ("dangerous trailing context"), 125 rule_linenum[ar]); 126 return; 127 } 128 } 129 } 130} 131 132 133/* dump_associated_rules - list the rules associated with a DFA state 134 * 135 * Goes through the set of NFA states associated with the DFA and 136 * extracts the first MAX_ASSOC_RULES unique rules, sorts them, 137 * and writes a report to the given file. 138 */ 139 140void dump_associated_rules (file, ds) 141 FILE *file; 142 int ds; 143{ 144 int i, j; 145 int num_associated_rules = 0; 146 int rule_set[MAX_ASSOC_RULES + 1]; 147 int *dset = dss[ds]; 148 int size = dfasiz[ds]; 149 150 for (i = 1; i <= size; ++i) { 151 int rule_num = rule_linenum[assoc_rule[dset[i]]]; 152 153 for (j = 1; j <= num_associated_rules; ++j) 154 if (rule_num == rule_set[j]) 155 break; 156 157 if (j > num_associated_rules) { /* new rule */ 158 if (num_associated_rules < MAX_ASSOC_RULES) 159 rule_set[++num_associated_rules] = 160 rule_num; 161 } 162 } 163 164 qsort (&rule_set [1], num_associated_rules, sizeof (rule_set [1]), intcmp); 165 166 fprintf (file, _(" associated rule line numbers:")); 167 168 for (i = 1; i <= num_associated_rules; ++i) { 169 if (i % 8 == 1) 170 putc ('\n', file); 171 172 fprintf (file, "\t%d", rule_set[i]); 173 } 174 175 putc ('\n', file); 176} 177 178 179/* dump_transitions - list the transitions associated with a DFA state 180 * 181 * synopsis 182 * dump_transitions( FILE *file, int state[numecs] ); 183 * 184 * Goes through the set of out-transitions and lists them in human-readable 185 * form (i.e., not as equivalence classes); also lists jam transitions 186 * (i.e., all those which are not out-transitions, plus EOF). The dump 187 * is done to the given file. 188 */ 189 190void dump_transitions (file, state) 191 FILE *file; 192 int state[]; 193{ 194 int i, ec; 195 int out_char_set[CSIZE]; 196 197 for (i = 0; i < csize; ++i) { 198 ec = ABS (ecgroup[i]); 199 out_char_set[i] = state[ec]; 200 } 201 202 fprintf (file, _(" out-transitions: ")); 203 204 list_character_set (file, out_char_set); 205 206 /* now invert the members of the set to get the jam transitions */ 207 for (i = 0; i < csize; ++i) 208 out_char_set[i] = !out_char_set[i]; 209 210 fprintf (file, _("\n jam-transitions: EOF ")); 211 212 list_character_set (file, out_char_set); 213 214 putc ('\n', file); 215} 216 217 218/* epsclosure - construct the epsilon closure of a set of ndfa states 219 * 220 * synopsis 221 * int *epsclosure( int t[num_states], int *numstates_addr, 222 * int accset[num_rules+1], int *nacc_addr, 223 * int *hashval_addr ); 224 * 225 * NOTES 226 * The epsilon closure is the set of all states reachable by an arbitrary 227 * number of epsilon transitions, which themselves do not have epsilon 228 * transitions going out, unioned with the set of states which have non-null 229 * accepting numbers. t is an array of size numstates of nfa state numbers. 230 * Upon return, t holds the epsilon closure and *numstates_addr is updated. 231 * accset holds a list of the accepting numbers, and the size of accset is 232 * given by *nacc_addr. t may be subjected to reallocation if it is not 233 * large enough to hold the epsilon closure. 234 * 235 * hashval is the hash value for the dfa corresponding to the state set. 236 */ 237 238int *epsclosure (t, ns_addr, accset, nacc_addr, hv_addr) 239 int *t, *ns_addr, accset[], *nacc_addr, *hv_addr; 240{ 241 int stkpos, ns, tsp; 242 int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum; 243 int stkend, nstate; 244 static int did_stk_init = false, *stk; 245 246#define MARK_STATE(state) \ 247do{ trans1[state] = trans1[state] - MARKER_DIFFERENCE;} while(0) 248 249#define IS_MARKED(state) (trans1[state] < 0) 250 251#define UNMARK_STATE(state) \ 252do{ trans1[state] = trans1[state] + MARKER_DIFFERENCE;} while(0) 253 254#define CHECK_ACCEPT(state) \ 255do{ \ 256nfaccnum = accptnum[state]; \ 257if ( nfaccnum != NIL ) \ 258accset[++nacc] = nfaccnum; \ 259}while(0) 260 261#define DO_REALLOCATION() \ 262do { \ 263current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \ 264++num_reallocs; \ 265t = reallocate_integer_array( t, current_max_dfa_size ); \ 266stk = reallocate_integer_array( stk, current_max_dfa_size ); \ 267}while(0) \ 268 269#define PUT_ON_STACK(state) \ 270do { \ 271if ( ++stkend >= current_max_dfa_size ) \ 272DO_REALLOCATION(); \ 273stk[stkend] = state; \ 274MARK_STATE(state); \ 275}while(0) 276 277#define ADD_STATE(state) \ 278do { \ 279if ( ++numstates >= current_max_dfa_size ) \ 280DO_REALLOCATION(); \ 281t[numstates] = state; \ 282hashval += state; \ 283}while(0) 284 285#define STACK_STATE(state) \ 286do { \ 287PUT_ON_STACK(state); \ 288CHECK_ACCEPT(state); \ 289if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \ 290ADD_STATE(state); \ 291}while(0) 292 293 294 if (!did_stk_init) { 295 stk = allocate_integer_array (current_max_dfa_size); 296 did_stk_init = true; 297 } 298 299 nacc = stkend = hashval = 0; 300 301 for (nstate = 1; nstate <= numstates; ++nstate) { 302 ns = t[nstate]; 303 304 /* The state could be marked if we've already pushed it onto 305 * the stack. 306 */ 307 if (!IS_MARKED (ns)) { 308 PUT_ON_STACK (ns); 309 CHECK_ACCEPT (ns); 310 hashval += ns; 311 } 312 } 313 314 for (stkpos = 1; stkpos <= stkend; ++stkpos) { 315 ns = stk[stkpos]; 316 transsym = transchar[ns]; 317 318 if (transsym == SYM_EPSILON) { 319 tsp = trans1[ns] + MARKER_DIFFERENCE; 320 321 if (tsp != NO_TRANSITION) { 322 if (!IS_MARKED (tsp)) 323 STACK_STATE (tsp); 324 325 tsp = trans2[ns]; 326 327 if (tsp != NO_TRANSITION 328 && !IS_MARKED (tsp)) 329 STACK_STATE (tsp); 330 } 331 } 332 } 333 334 /* Clear out "visit" markers. */ 335 336 for (stkpos = 1; stkpos <= stkend; ++stkpos) { 337 if (IS_MARKED (stk[stkpos])) 338 UNMARK_STATE (stk[stkpos]); 339 else 340 flexfatal (_ 341 ("consistency check failed in epsclosure()")); 342 } 343 344 *ns_addr = numstates; 345 *hv_addr = hashval; 346 *nacc_addr = nacc; 347 348 return t; 349} 350 351 352/* increase_max_dfas - increase the maximum number of DFAs */ 353 354void increase_max_dfas () 355{ 356 current_max_dfas += MAX_DFAS_INCREMENT; 357 358 ++num_reallocs; 359 360 base = reallocate_integer_array (base, current_max_dfas); 361 def = reallocate_integer_array (def, current_max_dfas); 362 dfasiz = reallocate_integer_array (dfasiz, current_max_dfas); 363 accsiz = reallocate_integer_array (accsiz, current_max_dfas); 364 dhash = reallocate_integer_array (dhash, current_max_dfas); 365 dss = reallocate_int_ptr_array (dss, current_max_dfas); 366 dfaacc = reallocate_dfaacc_union (dfaacc, current_max_dfas); 367 368 if (nultrans) 369 nultrans = 370 reallocate_integer_array (nultrans, 371 current_max_dfas); 372} 373 374 375/* ntod - convert an ndfa to a dfa 376 * 377 * Creates the dfa corresponding to the ndfa we've constructed. The 378 * dfa starts out in state #1. 379 */ 380 381void ntod () 382{ 383 int *accset, ds, nacc, newds; 384 int sym, hashval, numstates, dsize; 385 int num_full_table_rows=0; /* used only for -f */ 386 int *nset, *dset; 387 int targptr, totaltrans, i, comstate, comfreq, targ; 388 int symlist[CSIZE + 1]; 389 int num_start_states; 390 int todo_head, todo_next; 391 392 struct yytbl_data *yynxt_tbl = 0; 393 flex_int32_t *yynxt_data = 0, yynxt_curr = 0; 394 395 /* Note that the following are indexed by *equivalence classes* 396 * and not by characters. Since equivalence classes are indexed 397 * beginning with 1, even if the scanner accepts NUL's, this 398 * means that (since every character is potentially in its own 399 * equivalence class) these arrays must have room for indices 400 * from 1 to CSIZE, so their size must be CSIZE + 1. 401 */ 402 int duplist[CSIZE + 1], state[CSIZE + 1]; 403 int targfreq[CSIZE + 1], targstate[CSIZE + 1]; 404 405 /* accset needs to be large enough to hold all of the rules present 406 * in the input, *plus* their YY_TRAILING_HEAD_MASK variants. 407 */ 408 accset = allocate_integer_array ((num_rules + 1) * 2); 409 nset = allocate_integer_array (current_max_dfa_size); 410 411 /* The "todo" queue is represented by the head, which is the DFA 412 * state currently being processed, and the "next", which is the 413 * next DFA state number available (not in use). We depend on the 414 * fact that snstods() returns DFA's \in increasing order/, and thus 415 * need only know the bounds of the dfas to be processed. 416 */ 417 todo_head = todo_next = 0; 418 419 for (i = 0; i <= csize; ++i) { 420 duplist[i] = NIL; 421 symlist[i] = false; 422 } 423 424 for (i = 0; i <= num_rules; ++i) 425 accset[i] = NIL; 426 427 if (trace) { 428 dumpnfa (scset[1]); 429 fputs (_("\n\nDFA Dump:\n\n"), stderr); 430 } 431 432 inittbl (); 433 434 /* Check to see whether we should build a separate table for 435 * transitions on NUL characters. We don't do this for full-speed 436 * (-F) scanners, since for them we don't have a simple state 437 * number lying around with which to index the table. We also 438 * don't bother doing it for scanners unless (1) NUL is in its own 439 * equivalence class (indicated by a positive value of 440 * ecgroup[NUL]), (2) NUL's equivalence class is the last 441 * equivalence class, and (3) the number of equivalence classes is 442 * the same as the number of characters. This latter case comes 443 * about when useecs is false or when it's true but every character 444 * still manages to land in its own class (unlikely, but it's 445 * cheap to check for). If all these things are true then the 446 * character code needed to represent NUL's equivalence class for 447 * indexing the tables is going to take one more bit than the 448 * number of characters, and therefore we won't be assured of 449 * being able to fit it into a YY_CHAR variable. This rules out 450 * storing the transitions in a compressed table, since the code 451 * for interpreting them uses a YY_CHAR variable (perhaps it 452 * should just use an integer, though; this is worth pondering ... 453 * ###). 454 * 455 * Finally, for full tables, we want the number of entries in the 456 * table to be a power of two so the array references go fast (it 457 * will just take a shift to compute the major index). If 458 * encoding NUL's transitions in the table will spoil this, we 459 * give it its own table (note that this will be the case if we're 460 * not using equivalence classes). 461 */ 462 463 /* Note that the test for ecgroup[0] == numecs below accomplishes 464 * both (1) and (2) above 465 */ 466 if (!fullspd && ecgroup[0] == numecs) { 467 /* NUL is alone in its equivalence class, which is the 468 * last one. 469 */ 470 int use_NUL_table = (numecs == csize); 471 472 if (fulltbl && !use_NUL_table) { 473 /* We still may want to use the table if numecs 474 * is a power of 2. 475 */ 476 int power_of_two; 477 478 for (power_of_two = 1; power_of_two <= csize; 479 power_of_two *= 2) 480 if (numecs == power_of_two) { 481 use_NUL_table = true; 482 break; 483 } 484 } 485 486 if (use_NUL_table) 487 nultrans = 488 allocate_integer_array (current_max_dfas); 489 490 /* From now on, nultrans != nil indicates that we're 491 * saving null transitions for later, separate encoding. 492 */ 493 } 494 495 496 if (fullspd) { 497 for (i = 0; i <= numecs; ++i) 498 state[i] = 0; 499 500 place_state (state, 0, 0); 501 dfaacc[0].dfaacc_state = 0; 502 } 503 504 else if (fulltbl) { 505 if (nultrans) 506 /* We won't be including NUL's transitions in the 507 * table, so build it for entries from 0 .. numecs - 1. 508 */ 509 num_full_table_rows = numecs; 510 511 else 512 /* Take into account the fact that we'll be including 513 * the NUL entries in the transition table. Build it 514 * from 0 .. numecs. 515 */ 516 num_full_table_rows = numecs + 1; 517 518 /* Begin generating yy_nxt[][] 519 * This spans the entire LONG function. 520 * This table is tricky because we don't know how big it will be. 521 * So we'll have to realloc() on the way... 522 * we'll wait until we can calculate yynxt_tbl->td_hilen. 523 */ 524 yynxt_tbl = 525 (struct yytbl_data *) calloc (1, 526 sizeof (struct 527 yytbl_data)); 528 yytbl_data_init (yynxt_tbl, YYTD_ID_NXT); 529 yynxt_tbl->td_hilen = 1; 530 yynxt_tbl->td_lolen = num_full_table_rows; 531 yynxt_tbl->td_data = yynxt_data = 532 (flex_int32_t *) calloc (yynxt_tbl->td_lolen * 533 yynxt_tbl->td_hilen, 534 sizeof (flex_int32_t)); 535 yynxt_curr = 0; 536 537 buf_prints (&yydmap_buf, 538 "\t{YYTD_ID_NXT, (void**)&yy_nxt, sizeof(%s)},\n", 539 long_align ? "flex_int32_t" : "flex_int16_t"); 540 541 /* Unless -Ca, declare it "short" because it's a real 542 * long-shot that that won't be large enough. 543 */ 544 if (gentables) 545 out_str_dec 546 ("static yyconst %s yy_nxt[][%d] =\n {\n", 547 long_align ? "flex_int32_t" : "flex_int16_t", 548 num_full_table_rows); 549 else { 550 out_dec ("#undef YY_NXT_LOLEN\n#define YY_NXT_LOLEN (%d)\n", num_full_table_rows); 551 out_str ("static yyconst %s *yy_nxt =0;\n", 552 long_align ? "flex_int32_t" : "flex_int16_t"); 553 } 554 555 556 if (gentables) 557 outn (" {"); 558 559 /* Generate 0 entries for state #0. */ 560 for (i = 0; i < num_full_table_rows; ++i) { 561 mk2data (0); 562 yynxt_data[yynxt_curr++] = 0; 563 } 564 565 dataflush (); 566 if (gentables) 567 outn (" },\n"); 568 } 569 570 /* Create the first states. */ 571 572 num_start_states = lastsc * 2; 573 574 for (i = 1; i <= num_start_states; ++i) { 575 numstates = 1; 576 577 /* For each start condition, make one state for the case when 578 * we're at the beginning of the line (the '^' operator) and 579 * one for the case when we're not. 580 */ 581 if (i % 2 == 1) 582 nset[numstates] = scset[(i / 2) + 1]; 583 else 584 nset[numstates] = 585 mkbranch (scbol[i / 2], scset[i / 2]); 586 587 nset = epsclosure (nset, &numstates, accset, &nacc, 588 &hashval); 589 590 if (snstods (nset, numstates, accset, nacc, hashval, &ds)) { 591 numas += nacc; 592 totnst += numstates; 593 ++todo_next; 594 595 if (variable_trailing_context_rules && nacc > 0) 596 check_trailing_context (nset, numstates, 597 accset, nacc); 598 } 599 } 600 601 if (!fullspd) { 602 if (!snstods (nset, 0, accset, 0, 0, &end_of_buffer_state)) 603 flexfatal (_ 604 ("could not create unique end-of-buffer state")); 605 606 ++numas; 607 ++num_start_states; 608 ++todo_next; 609 } 610 611 612 while (todo_head < todo_next) { 613 targptr = 0; 614 totaltrans = 0; 615 616 for (i = 1; i <= numecs; ++i) 617 state[i] = 0; 618 619 ds = ++todo_head; 620 621 dset = dss[ds]; 622 dsize = dfasiz[ds]; 623 624 if (trace) 625 fprintf (stderr, _("state # %d:\n"), ds); 626 627 sympartition (dset, dsize, symlist, duplist); 628 629 for (sym = 1; sym <= numecs; ++sym) { 630 if (symlist[sym]) { 631 symlist[sym] = 0; 632 633 if (duplist[sym] == NIL) { 634 /* Symbol has unique out-transitions. */ 635 numstates = 636 symfollowset (dset, dsize, 637 sym, nset); 638 nset = epsclosure (nset, 639 &numstates, 640 accset, &nacc, 641 &hashval); 642 643 if (snstods 644 (nset, numstates, accset, nacc, 645 hashval, &newds)) { 646 totnst = totnst + 647 numstates; 648 ++todo_next; 649 numas += nacc; 650 651 if (variable_trailing_context_rules && nacc > 0) 652 check_trailing_context 653 (nset, 654 numstates, 655 accset, 656 nacc); 657 } 658 659 state[sym] = newds; 660 661 if (trace) 662 fprintf (stderr, 663 "\t%d\t%d\n", sym, 664 newds); 665 666 targfreq[++targptr] = 1; 667 targstate[targptr] = newds; 668 ++numuniq; 669 } 670 671 else { 672 /* sym's equivalence class has the same 673 * transitions as duplist(sym)'s 674 * equivalence class. 675 */ 676 targ = state[duplist[sym]]; 677 state[sym] = targ; 678 679 if (trace) 680 fprintf (stderr, 681 "\t%d\t%d\n", sym, 682 targ); 683 684 /* Update frequency count for 685 * destination state. 686 */ 687 688 i = 0; 689 while (targstate[++i] != targ) ; 690 691 ++targfreq[i]; 692 ++numdup; 693 } 694 695 ++totaltrans; 696 duplist[sym] = NIL; 697 } 698 } 699 700 701 numsnpairs += totaltrans; 702 703 if (ds > num_start_states) 704 check_for_backing_up (ds, state); 705 706 if (nultrans) { 707 nultrans[ds] = state[NUL_ec]; 708 state[NUL_ec] = 0; /* remove transition */ 709 } 710 711 if (fulltbl) { 712 713 /* Each time we hit here, it's another td_hilen, so we realloc. */ 714 yynxt_tbl->td_hilen++; 715 yynxt_tbl->td_data = yynxt_data = 716 (flex_int32_t *) realloc (yynxt_data, 717 yynxt_tbl->td_hilen * 718 yynxt_tbl->td_lolen * 719 sizeof (flex_int32_t)); 720 721 722 if (gentables) 723 outn (" {"); 724 725 /* Supply array's 0-element. */ 726 if (ds == end_of_buffer_state) { 727 mk2data (-end_of_buffer_state); 728 yynxt_data[yynxt_curr++] = 729 -end_of_buffer_state; 730 } 731 else { 732 mk2data (end_of_buffer_state); 733 yynxt_data[yynxt_curr++] = 734 end_of_buffer_state; 735 } 736 737 for (i = 1; i < num_full_table_rows; ++i) { 738 /* Jams are marked by negative of state 739 * number. 740 */ 741 mk2data (state[i] ? state[i] : -ds); 742 yynxt_data[yynxt_curr++] = 743 state[i] ? state[i] : -ds; 744 } 745 746 dataflush (); 747 if (gentables) 748 outn (" },\n"); 749 } 750 751 else if (fullspd) 752 place_state (state, ds, totaltrans); 753 754 else if (ds == end_of_buffer_state) 755 /* Special case this state to make sure it does what 756 * it's supposed to, i.e., jam on end-of-buffer. 757 */ 758 stack1 (ds, 0, 0, JAMSTATE); 759 760 else { /* normal, compressed state */ 761 762 /* Determine which destination state is the most 763 * common, and how many transitions to it there are. 764 */ 765 766 comfreq = 0; 767 comstate = 0; 768 769 for (i = 1; i <= targptr; ++i) 770 if (targfreq[i] > comfreq) { 771 comfreq = targfreq[i]; 772 comstate = targstate[i]; 773 } 774 775 bldtbl (state, ds, totaltrans, comstate, comfreq); 776 } 777 } 778 779 if (fulltbl) { 780 dataend (); 781 if (tablesext) { 782 yytbl_data_compress (yynxt_tbl); 783 if (yytbl_data_fwrite (&tableswr, yynxt_tbl) < 0) 784 flexerror (_ 785 ("Could not write yynxt_tbl[][]")); 786 } 787 if (yynxt_tbl) { 788 yytbl_data_destroy (yynxt_tbl); 789 yynxt_tbl = 0; 790 } 791 } 792 793 else if (!fullspd) { 794 cmptmps (); /* create compressed template entries */ 795 796 /* Create tables for all the states with only one 797 * out-transition. 798 */ 799 while (onesp > 0) { 800 mk1tbl (onestate[onesp], onesym[onesp], 801 onenext[onesp], onedef[onesp]); 802 --onesp; 803 } 804 805 mkdeftbl (); 806 } 807 808 flex_free ((void *) accset); 809 flex_free ((void *) nset); 810} 811 812 813/* snstods - converts a set of ndfa states into a dfa state 814 * 815 * synopsis 816 * is_new_state = snstods( int sns[numstates], int numstates, 817 * int accset[num_rules+1], int nacc, 818 * int hashval, int *newds_addr ); 819 * 820 * On return, the dfa state number is in newds. 821 */ 822 823int snstods (sns, numstates, accset, nacc, hashval, newds_addr) 824 int sns[], numstates, accset[], nacc, hashval, *newds_addr; 825{ 826 int didsort = 0; 827 int i, j; 828 int newds, *oldsns; 829 830 for (i = 1; i <= lastdfa; ++i) 831 if (hashval == dhash[i]) { 832 if (numstates == dfasiz[i]) { 833 oldsns = dss[i]; 834 835 if (!didsort) { 836 /* We sort the states in sns so we 837 * can compare it to oldsns quickly. 838 */ 839 qsort (&sns [1], numstates, sizeof (sns [1]), intcmp); 840 didsort = 1; 841 } 842 843 for (j = 1; j <= numstates; ++j) 844 if (sns[j] != oldsns[j]) 845 break; 846 847 if (j > numstates) { 848 ++dfaeql; 849 *newds_addr = i; 850 return 0; 851 } 852 853 ++hshcol; 854 } 855 856 else 857 ++hshsave; 858 } 859 860 /* Make a new dfa. */ 861 862 if (++lastdfa >= current_max_dfas) 863 increase_max_dfas (); 864 865 newds = lastdfa; 866 867 dss[newds] = allocate_integer_array (numstates + 1); 868 869 /* If we haven't already sorted the states in sns, we do so now, 870 * so that future comparisons with it can be made quickly. 871 */ 872 873 if (!didsort) 874 qsort (&sns [1], numstates, sizeof (sns [1]), intcmp); 875 876 for (i = 1; i <= numstates; ++i) 877 dss[newds][i] = sns[i]; 878 879 dfasiz[newds] = numstates; 880 dhash[newds] = hashval; 881 882 if (nacc == 0) { 883 if (reject) 884 dfaacc[newds].dfaacc_set = (int *) 0; 885 else 886 dfaacc[newds].dfaacc_state = 0; 887 888 accsiz[newds] = 0; 889 } 890 891 else if (reject) { 892 /* We sort the accepting set in increasing order so the 893 * disambiguating rule that the first rule listed is considered 894 * match in the event of ties will work. 895 */ 896 897 qsort (&accset [1], nacc, sizeof (accset [1]), intcmp); 898 899 dfaacc[newds].dfaacc_set = 900 allocate_integer_array (nacc + 1); 901 902 /* Save the accepting set for later */ 903 for (i = 1; i <= nacc; ++i) { 904 dfaacc[newds].dfaacc_set[i] = accset[i]; 905 906 if (accset[i] <= num_rules) 907 /* Who knows, perhaps a REJECT can yield 908 * this rule. 909 */ 910 rule_useful[accset[i]] = true; 911 } 912 913 accsiz[newds] = nacc; 914 } 915 916 else { 917 /* Find lowest numbered rule so the disambiguating rule 918 * will work. 919 */ 920 j = num_rules + 1; 921 922 for (i = 1; i <= nacc; ++i) 923 if (accset[i] < j) 924 j = accset[i]; 925 926 dfaacc[newds].dfaacc_state = j; 927 928 if (j <= num_rules) 929 rule_useful[j] = true; 930 } 931 932 *newds_addr = newds; 933 934 return 1; 935} 936 937 938/* symfollowset - follow the symbol transitions one step 939 * 940 * synopsis 941 * numstates = symfollowset( int ds[current_max_dfa_size], int dsize, 942 * int transsym, int nset[current_max_dfa_size] ); 943 */ 944 945int symfollowset (ds, dsize, transsym, nset) 946 int ds[], dsize, transsym, nset[]; 947{ 948 int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist; 949 950 numstates = 0; 951 952 for (i = 1; i <= dsize; ++i) { /* for each nfa state ns in the state set of ds */ 953 ns = ds[i]; 954 sym = transchar[ns]; 955 tsp = trans1[ns]; 956 957 if (sym < 0) { /* it's a character class */ 958 sym = -sym; 959 ccllist = cclmap[sym]; 960 lenccl = ccllen[sym]; 961 962 if (cclng[sym]) { 963 for (j = 0; j < lenccl; ++j) { 964 /* Loop through negated character 965 * class. 966 */ 967 ch = ccltbl[ccllist + j]; 968 969 if (ch == 0) 970 ch = NUL_ec; 971 972 if (ch > transsym) 973 /* Transsym isn't in negated 974 * ccl. 975 */ 976 break; 977 978 else if (ch == transsym) 979 /* next 2 */ 980 goto bottom; 981 } 982 983 /* Didn't find transsym in ccl. */ 984 nset[++numstates] = tsp; 985 } 986 987 else 988 for (j = 0; j < lenccl; ++j) { 989 ch = ccltbl[ccllist + j]; 990 991 if (ch == 0) 992 ch = NUL_ec; 993 994 if (ch > transsym) 995 break; 996 else if (ch == transsym) { 997 nset[++numstates] = tsp; 998 break; 999 } 1000 } 1001 } 1002 1003 else if (sym == SYM_EPSILON) { /* do nothing */ 1004 } 1005 1006 else if (ABS (ecgroup[sym]) == transsym) 1007 nset[++numstates] = tsp; 1008 1009 bottom:; 1010 } 1011 1012 return numstates; 1013} 1014 1015 1016/* sympartition - partition characters with same out-transitions 1017 * 1018 * synopsis 1019 * sympartition( int ds[current_max_dfa_size], int numstates, 1020 * int symlist[numecs], int duplist[numecs] ); 1021 */ 1022 1023void sympartition (ds, numstates, symlist, duplist) 1024 int ds[], numstates; 1025 int symlist[], duplist[]; 1026{ 1027 int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich; 1028 1029 /* Partitioning is done by creating equivalence classes for those 1030 * characters which have out-transitions from the given state. Thus 1031 * we are really creating equivalence classes of equivalence classes. 1032 */ 1033 1034 for (i = 1; i <= numecs; ++i) { /* initialize equivalence class list */ 1035 duplist[i] = i - 1; 1036 dupfwd[i] = i + 1; 1037 } 1038 1039 duplist[1] = NIL; 1040 dupfwd[numecs] = NIL; 1041 1042 for (i = 1; i <= numstates; ++i) { 1043 ns = ds[i]; 1044 tch = transchar[ns]; 1045 1046 if (tch != SYM_EPSILON) { 1047 if (tch < -lastccl || tch >= csize) { 1048 flexfatal (_ 1049 ("bad transition character detected in sympartition()")); 1050 } 1051 1052 if (tch >= 0) { /* character transition */ 1053 int ec = ecgroup[tch]; 1054 1055 mkechar (ec, dupfwd, duplist); 1056 symlist[ec] = 1; 1057 } 1058 1059 else { /* character class */ 1060 tch = -tch; 1061 1062 lenccl = ccllen[tch]; 1063 cclp = cclmap[tch]; 1064 mkeccl (ccltbl + cclp, lenccl, dupfwd, 1065 duplist, numecs, NUL_ec); 1066 1067 if (cclng[tch]) { 1068 j = 0; 1069 1070 for (k = 0; k < lenccl; ++k) { 1071 ich = ccltbl[cclp + k]; 1072 1073 if (ich == 0) 1074 ich = NUL_ec; 1075 1076 for (++j; j < ich; ++j) 1077 symlist[j] = 1; 1078 } 1079 1080 for (++j; j <= numecs; ++j) 1081 symlist[j] = 1; 1082 } 1083 1084 else 1085 for (k = 0; k < lenccl; ++k) { 1086 ich = ccltbl[cclp + k]; 1087 1088 if (ich == 0) 1089 ich = NUL_ec; 1090 1091 symlist[ich] = 1; 1092 } 1093 } 1094 } 1095 } 1096} 1097