1228072Sbapt/* nfa - NFA construction routines */ 2228072Sbapt 3228072Sbapt/* Copyright (c) 1990 The Regents of the University of California. */ 4228072Sbapt/* All rights reserved. */ 5228072Sbapt 6228072Sbapt/* This code is derived from software contributed to Berkeley by */ 7228072Sbapt/* Vern Paxson. */ 8228072Sbapt 9228072Sbapt/* The United States Government has rights in this work pursuant */ 10228072Sbapt/* to contract no. DE-AC03-76SF00098 between the United States */ 11228072Sbapt/* Department of Energy and the University of California. */ 12228072Sbapt 13228072Sbapt/* This file is part of flex. */ 14228072Sbapt 15228072Sbapt/* Redistribution and use in source and binary forms, with or without */ 16228072Sbapt/* modification, are permitted provided that the following conditions */ 17228072Sbapt/* are met: */ 18228072Sbapt 19228072Sbapt/* 1. Redistributions of source code must retain the above copyright */ 20228072Sbapt/* notice, this list of conditions and the following disclaimer. */ 21228072Sbapt/* 2. Redistributions in binary form must reproduce the above copyright */ 22228072Sbapt/* notice, this list of conditions and the following disclaimer in the */ 23228072Sbapt/* documentation and/or other materials provided with the distribution. */ 24228072Sbapt 25228072Sbapt/* Neither the name of the University nor the names of its contributors */ 26228072Sbapt/* may be used to endorse or promote products derived from this software */ 27228072Sbapt/* without specific prior written permission. */ 28228072Sbapt 29228072Sbapt/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 30228072Sbapt/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 31228072Sbapt/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 32228072Sbapt/* PURPOSE. */ 33228072Sbapt 34228072Sbapt#include "flexdef.h" 35228072Sbapt 36228072Sbapt 37228072Sbapt/* declare functions that have forward references */ 38228072Sbapt 39228072Sbaptint dupmachine PROTO ((int)); 40228072Sbaptvoid mkxtion PROTO ((int, int)); 41228072Sbapt 42228072Sbapt 43228072Sbapt/* add_accept - add an accepting state to a machine 44228072Sbapt * 45228072Sbapt * accepting_number becomes mach's accepting number. 46228072Sbapt */ 47228072Sbapt 48228072Sbaptvoid add_accept (mach, accepting_number) 49228072Sbapt int mach, accepting_number; 50228072Sbapt{ 51228072Sbapt /* Hang the accepting number off an epsilon state. if it is associated 52228072Sbapt * with a state that has a non-epsilon out-transition, then the state 53228072Sbapt * will accept BEFORE it makes that transition, i.e., one character 54228072Sbapt * too soon. 55228072Sbapt */ 56228072Sbapt 57228072Sbapt if (transchar[finalst[mach]] == SYM_EPSILON) 58228072Sbapt accptnum[finalst[mach]] = accepting_number; 59228072Sbapt 60228072Sbapt else { 61228072Sbapt int astate = mkstate (SYM_EPSILON); 62228072Sbapt 63228072Sbapt accptnum[astate] = accepting_number; 64228072Sbapt (void) link_machines (mach, astate); 65228072Sbapt } 66228072Sbapt} 67228072Sbapt 68228072Sbapt 69228072Sbapt/* copysingl - make a given number of copies of a singleton machine 70228072Sbapt * 71228072Sbapt * synopsis 72228072Sbapt * 73228072Sbapt * newsng = copysingl( singl, num ); 74228072Sbapt * 75228072Sbapt * newsng - a new singleton composed of num copies of singl 76228072Sbapt * singl - a singleton machine 77228072Sbapt * num - the number of copies of singl to be present in newsng 78228072Sbapt */ 79228072Sbapt 80228072Sbaptint copysingl (singl, num) 81228072Sbapt int singl, num; 82228072Sbapt{ 83228072Sbapt int copy, i; 84228072Sbapt 85228072Sbapt copy = mkstate (SYM_EPSILON); 86228072Sbapt 87228072Sbapt for (i = 1; i <= num; ++i) 88228072Sbapt copy = link_machines (copy, dupmachine (singl)); 89228072Sbapt 90228072Sbapt return copy; 91228072Sbapt} 92228072Sbapt 93228072Sbapt 94228072Sbapt/* dumpnfa - debugging routine to write out an nfa */ 95228072Sbapt 96228072Sbaptvoid dumpnfa (state1) 97228072Sbapt int state1; 98228072Sbapt 99228072Sbapt{ 100228072Sbapt int sym, tsp1, tsp2, anum, ns; 101228072Sbapt 102228072Sbapt fprintf (stderr, 103228072Sbapt _ 104228072Sbapt ("\n\n********** beginning dump of nfa with start state %d\n"), 105228072Sbapt state1); 106228072Sbapt 107228072Sbapt /* We probably should loop starting at firstst[state1] and going to 108228072Sbapt * lastst[state1], but they're not maintained properly when we "or" 109228072Sbapt * all of the rules together. So we use our knowledge that the machine 110228072Sbapt * starts at state 1 and ends at lastnfa. 111228072Sbapt */ 112228072Sbapt 113228072Sbapt /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */ 114228072Sbapt for (ns = 1; ns <= lastnfa; ++ns) { 115228072Sbapt fprintf (stderr, _("state # %4d\t"), ns); 116228072Sbapt 117228072Sbapt sym = transchar[ns]; 118228072Sbapt tsp1 = trans1[ns]; 119228072Sbapt tsp2 = trans2[ns]; 120228072Sbapt anum = accptnum[ns]; 121228072Sbapt 122228072Sbapt fprintf (stderr, "%3d: %4d, %4d", sym, tsp1, tsp2); 123228072Sbapt 124228072Sbapt if (anum != NIL) 125228072Sbapt fprintf (stderr, " [%d]", anum); 126228072Sbapt 127228072Sbapt fprintf (stderr, "\n"); 128228072Sbapt } 129228072Sbapt 130228072Sbapt fprintf (stderr, _("********** end of dump\n")); 131228072Sbapt} 132228072Sbapt 133228072Sbapt 134228072Sbapt/* dupmachine - make a duplicate of a given machine 135228072Sbapt * 136228072Sbapt * synopsis 137228072Sbapt * 138228072Sbapt * copy = dupmachine( mach ); 139228072Sbapt * 140228072Sbapt * copy - holds duplicate of mach 141228072Sbapt * mach - machine to be duplicated 142228072Sbapt * 143228072Sbapt * note that the copy of mach is NOT an exact duplicate; rather, all the 144228072Sbapt * transition states values are adjusted so that the copy is self-contained, 145228072Sbapt * as the original should have been. 146228072Sbapt * 147228072Sbapt * also note that the original MUST be contiguous, with its low and high 148228072Sbapt * states accessible by the arrays firstst and lastst 149228072Sbapt */ 150228072Sbapt 151228072Sbaptint dupmachine (mach) 152228072Sbapt int mach; 153228072Sbapt{ 154228072Sbapt int i, init, state_offset; 155228072Sbapt int state = 0; 156228072Sbapt int last = lastst[mach]; 157228072Sbapt 158228072Sbapt for (i = firstst[mach]; i <= last; ++i) { 159228072Sbapt state = mkstate (transchar[i]); 160228072Sbapt 161228072Sbapt if (trans1[i] != NO_TRANSITION) { 162228072Sbapt mkxtion (finalst[state], trans1[i] + state - i); 163228072Sbapt 164228072Sbapt if (transchar[i] == SYM_EPSILON && 165228072Sbapt trans2[i] != NO_TRANSITION) 166228072Sbapt mkxtion (finalst[state], 167228072Sbapt trans2[i] + state - i); 168228072Sbapt } 169228072Sbapt 170228072Sbapt accptnum[state] = accptnum[i]; 171228072Sbapt } 172228072Sbapt 173228072Sbapt if (state == 0) 174228072Sbapt flexfatal (_("empty machine in dupmachine()")); 175228072Sbapt 176228072Sbapt state_offset = state - i + 1; 177228072Sbapt 178228072Sbapt init = mach + state_offset; 179228072Sbapt firstst[init] = firstst[mach] + state_offset; 180228072Sbapt finalst[init] = finalst[mach] + state_offset; 181228072Sbapt lastst[init] = lastst[mach] + state_offset; 182228072Sbapt 183228072Sbapt return init; 184228072Sbapt} 185228072Sbapt 186228072Sbapt 187228072Sbapt/* finish_rule - finish up the processing for a rule 188228072Sbapt * 189228072Sbapt * An accepting number is added to the given machine. If variable_trail_rule 190228072Sbapt * is true then the rule has trailing context and both the head and trail 191228072Sbapt * are variable size. Otherwise if headcnt or trailcnt is non-zero then 192228072Sbapt * the machine recognizes a pattern with trailing context and headcnt is 193228072Sbapt * the number of characters in the matched part of the pattern, or zero 194228072Sbapt * if the matched part has variable length. trailcnt is the number of 195228072Sbapt * trailing context characters in the pattern, or zero if the trailing 196228072Sbapt * context has variable length. 197228072Sbapt */ 198228072Sbapt 199228072Sbaptvoid finish_rule (mach, variable_trail_rule, headcnt, trailcnt, 200228072Sbapt pcont_act) 201228072Sbapt int mach, variable_trail_rule, headcnt, trailcnt, pcont_act; 202228072Sbapt{ 203228072Sbapt char action_text[MAXLINE]; 204228072Sbapt 205228072Sbapt add_accept (mach, num_rules); 206228072Sbapt 207228072Sbapt /* We did this in new_rule(), but it often gets the wrong 208228072Sbapt * number because we do it before we start parsing the current rule. 209228072Sbapt */ 210228072Sbapt rule_linenum[num_rules] = linenum; 211228072Sbapt 212228072Sbapt /* If this is a continued action, then the line-number has already 213228072Sbapt * been updated, giving us the wrong number. 214228072Sbapt */ 215228072Sbapt if (continued_action) 216228072Sbapt --rule_linenum[num_rules]; 217228072Sbapt 218228072Sbapt 219228072Sbapt /* If the previous rule was continued action, then we inherit the 220228072Sbapt * previous newline flag, possibly overriding the current one. 221228072Sbapt */ 222228072Sbapt if (pcont_act && rule_has_nl[num_rules - 1]) 223228072Sbapt rule_has_nl[num_rules] = true; 224228072Sbapt 225228072Sbapt snprintf (action_text, sizeof(action_text), "case %d:\n", num_rules); 226228072Sbapt add_action (action_text); 227228072Sbapt if (rule_has_nl[num_rules]) { 228228072Sbapt snprintf (action_text, sizeof(action_text), "/* rule %d can match eol */\n", 229228072Sbapt num_rules); 230228072Sbapt add_action (action_text); 231228072Sbapt } 232228072Sbapt 233228072Sbapt 234228072Sbapt if (variable_trail_rule) { 235228072Sbapt rule_type[num_rules] = RULE_VARIABLE; 236228072Sbapt 237228072Sbapt if (performance_report > 0) 238228072Sbapt fprintf (stderr, 239228072Sbapt _ 240228072Sbapt ("Variable trailing context rule at line %d\n"), 241228072Sbapt rule_linenum[num_rules]); 242228072Sbapt 243228072Sbapt variable_trailing_context_rules = true; 244228072Sbapt } 245228072Sbapt 246228072Sbapt else { 247228072Sbapt rule_type[num_rules] = RULE_NORMAL; 248228072Sbapt 249228072Sbapt if (headcnt > 0 || trailcnt > 0) { 250228072Sbapt /* Do trailing context magic to not match the trailing 251228072Sbapt * characters. 252228072Sbapt */ 253228072Sbapt char *scanner_cp = "YY_G(yy_c_buf_p) = yy_cp"; 254228072Sbapt char *scanner_bp = "yy_bp"; 255228072Sbapt 256228072Sbapt add_action 257228072Sbapt ("*yy_cp = YY_G(yy_hold_char); /* undo effects of setting up yytext */\n"); 258228072Sbapt 259228072Sbapt if (headcnt > 0) { 260228072Sbapt snprintf (action_text, sizeof(action_text), "%s = %s + %d;\n", 261228072Sbapt scanner_cp, scanner_bp, headcnt); 262228072Sbapt add_action (action_text); 263228072Sbapt } 264228072Sbapt 265228072Sbapt else { 266228072Sbapt snprintf (action_text, sizeof(action_text), "%s -= %d;\n", 267228072Sbapt scanner_cp, trailcnt); 268228072Sbapt add_action (action_text); 269228072Sbapt } 270228072Sbapt 271228072Sbapt add_action 272228072Sbapt ("YY_DO_BEFORE_ACTION; /* set up yytext again */\n"); 273228072Sbapt } 274228072Sbapt } 275228072Sbapt 276228072Sbapt /* Okay, in the action code at this point yytext and yyleng have 277228072Sbapt * their proper final values for this rule, so here's the point 278228072Sbapt * to do any user action. But don't do it for continued actions, 279228072Sbapt * as that'll result in multiple YY_RULE_SETUP's. 280228072Sbapt */ 281228072Sbapt if (!continued_action) 282228072Sbapt add_action ("YY_RULE_SETUP\n"); 283228072Sbapt 284228072Sbapt line_directive_out ((FILE *) 0, 1); 285228072Sbapt} 286228072Sbapt 287228072Sbapt 288228072Sbapt/* link_machines - connect two machines together 289228072Sbapt * 290228072Sbapt * synopsis 291228072Sbapt * 292228072Sbapt * new = link_machines( first, last ); 293228072Sbapt * 294228072Sbapt * new - a machine constructed by connecting first to last 295228072Sbapt * first - the machine whose successor is to be last 296228072Sbapt * last - the machine whose predecessor is to be first 297228072Sbapt * 298228072Sbapt * note: this routine concatenates the machine first with the machine 299228072Sbapt * last to produce a machine new which will pattern-match first first 300228072Sbapt * and then last, and will fail if either of the sub-patterns fails. 301228072Sbapt * FIRST is set to new by the operation. last is unmolested. 302228072Sbapt */ 303228072Sbapt 304228072Sbaptint link_machines (first, last) 305228072Sbapt int first, last; 306228072Sbapt{ 307228072Sbapt if (first == NIL) 308228072Sbapt return last; 309228072Sbapt 310228072Sbapt else if (last == NIL) 311228072Sbapt return first; 312228072Sbapt 313228072Sbapt else { 314228072Sbapt mkxtion (finalst[first], last); 315228072Sbapt finalst[first] = finalst[last]; 316228072Sbapt lastst[first] = MAX (lastst[first], lastst[last]); 317228072Sbapt firstst[first] = MIN (firstst[first], firstst[last]); 318228072Sbapt 319228072Sbapt return first; 320228072Sbapt } 321228072Sbapt} 322228072Sbapt 323228072Sbapt 324228072Sbapt/* mark_beginning_as_normal - mark each "beginning" state in a machine 325228072Sbapt * as being a "normal" (i.e., not trailing context- 326228072Sbapt * associated) states 327228072Sbapt * 328228072Sbapt * The "beginning" states are the epsilon closure of the first state 329228072Sbapt */ 330228072Sbapt 331228072Sbaptvoid mark_beginning_as_normal (mach) 332250874Sjkim int mach; 333228072Sbapt{ 334228072Sbapt switch (state_type[mach]) { 335228072Sbapt case STATE_NORMAL: 336228072Sbapt /* Oh, we've already visited here. */ 337228072Sbapt return; 338228072Sbapt 339228072Sbapt case STATE_TRAILING_CONTEXT: 340228072Sbapt state_type[mach] = STATE_NORMAL; 341228072Sbapt 342228072Sbapt if (transchar[mach] == SYM_EPSILON) { 343228072Sbapt if (trans1[mach] != NO_TRANSITION) 344228072Sbapt mark_beginning_as_normal (trans1[mach]); 345228072Sbapt 346228072Sbapt if (trans2[mach] != NO_TRANSITION) 347228072Sbapt mark_beginning_as_normal (trans2[mach]); 348228072Sbapt } 349228072Sbapt break; 350228072Sbapt 351228072Sbapt default: 352228072Sbapt flexerror (_ 353228072Sbapt ("bad state type in mark_beginning_as_normal()")); 354228072Sbapt break; 355228072Sbapt } 356228072Sbapt} 357228072Sbapt 358228072Sbapt 359228072Sbapt/* mkbranch - make a machine that branches to two machines 360228072Sbapt * 361228072Sbapt * synopsis 362228072Sbapt * 363228072Sbapt * branch = mkbranch( first, second ); 364228072Sbapt * 365228072Sbapt * branch - a machine which matches either first's pattern or second's 366228072Sbapt * first, second - machines whose patterns are to be or'ed (the | operator) 367228072Sbapt * 368228072Sbapt * Note that first and second are NEITHER destroyed by the operation. Also, 369228072Sbapt * the resulting machine CANNOT be used with any other "mk" operation except 370228072Sbapt * more mkbranch's. Compare with mkor() 371228072Sbapt */ 372228072Sbapt 373228072Sbaptint mkbranch (first, second) 374228072Sbapt int first, second; 375228072Sbapt{ 376228072Sbapt int eps; 377228072Sbapt 378228072Sbapt if (first == NO_TRANSITION) 379228072Sbapt return second; 380228072Sbapt 381228072Sbapt else if (second == NO_TRANSITION) 382228072Sbapt return first; 383228072Sbapt 384228072Sbapt eps = mkstate (SYM_EPSILON); 385228072Sbapt 386228072Sbapt mkxtion (eps, first); 387228072Sbapt mkxtion (eps, second); 388228072Sbapt 389228072Sbapt return eps; 390228072Sbapt} 391228072Sbapt 392228072Sbapt 393228072Sbapt/* mkclos - convert a machine into a closure 394228072Sbapt * 395228072Sbapt * synopsis 396228072Sbapt * new = mkclos( state ); 397228072Sbapt * 398228072Sbapt * new - a new state which matches the closure of "state" 399228072Sbapt */ 400228072Sbapt 401228072Sbaptint mkclos (state) 402228072Sbapt int state; 403228072Sbapt{ 404228072Sbapt return mkopt (mkposcl (state)); 405228072Sbapt} 406228072Sbapt 407228072Sbapt 408228072Sbapt/* mkopt - make a machine optional 409228072Sbapt * 410228072Sbapt * synopsis 411228072Sbapt * 412228072Sbapt * new = mkopt( mach ); 413228072Sbapt * 414228072Sbapt * new - a machine which optionally matches whatever mach matched 415228072Sbapt * mach - the machine to make optional 416228072Sbapt * 417228072Sbapt * notes: 418228072Sbapt * 1. mach must be the last machine created 419228072Sbapt * 2. mach is destroyed by the call 420228072Sbapt */ 421228072Sbapt 422228072Sbaptint mkopt (mach) 423228072Sbapt int mach; 424228072Sbapt{ 425228072Sbapt int eps; 426228072Sbapt 427228072Sbapt if (!SUPER_FREE_EPSILON (finalst[mach])) { 428228072Sbapt eps = mkstate (SYM_EPSILON); 429228072Sbapt mach = link_machines (mach, eps); 430228072Sbapt } 431228072Sbapt 432228072Sbapt /* Can't skimp on the following if FREE_EPSILON(mach) is true because 433228072Sbapt * some state interior to "mach" might point back to the beginning 434228072Sbapt * for a closure. 435228072Sbapt */ 436228072Sbapt eps = mkstate (SYM_EPSILON); 437228072Sbapt mach = link_machines (eps, mach); 438228072Sbapt 439228072Sbapt mkxtion (mach, finalst[mach]); 440228072Sbapt 441228072Sbapt return mach; 442228072Sbapt} 443228072Sbapt 444228072Sbapt 445228072Sbapt/* mkor - make a machine that matches either one of two machines 446228072Sbapt * 447228072Sbapt * synopsis 448228072Sbapt * 449228072Sbapt * new = mkor( first, second ); 450228072Sbapt * 451228072Sbapt * new - a machine which matches either first's pattern or second's 452228072Sbapt * first, second - machines whose patterns are to be or'ed (the | operator) 453228072Sbapt * 454228072Sbapt * note that first and second are both destroyed by the operation 455228072Sbapt * the code is rather convoluted because an attempt is made to minimize 456228072Sbapt * the number of epsilon states needed 457228072Sbapt */ 458228072Sbapt 459228072Sbaptint mkor (first, second) 460228072Sbapt int first, second; 461228072Sbapt{ 462228072Sbapt int eps, orend; 463228072Sbapt 464228072Sbapt if (first == NIL) 465228072Sbapt return second; 466228072Sbapt 467228072Sbapt else if (second == NIL) 468228072Sbapt return first; 469228072Sbapt 470228072Sbapt else { 471228072Sbapt /* See comment in mkopt() about why we can't use the first 472228072Sbapt * state of "first" or "second" if they satisfy "FREE_EPSILON". 473228072Sbapt */ 474228072Sbapt eps = mkstate (SYM_EPSILON); 475228072Sbapt 476228072Sbapt first = link_machines (eps, first); 477228072Sbapt 478228072Sbapt mkxtion (first, second); 479228072Sbapt 480228072Sbapt if (SUPER_FREE_EPSILON (finalst[first]) && 481228072Sbapt accptnum[finalst[first]] == NIL) { 482228072Sbapt orend = finalst[first]; 483228072Sbapt mkxtion (finalst[second], orend); 484228072Sbapt } 485228072Sbapt 486228072Sbapt else if (SUPER_FREE_EPSILON (finalst[second]) && 487228072Sbapt accptnum[finalst[second]] == NIL) { 488228072Sbapt orend = finalst[second]; 489228072Sbapt mkxtion (finalst[first], orend); 490228072Sbapt } 491228072Sbapt 492228072Sbapt else { 493228072Sbapt eps = mkstate (SYM_EPSILON); 494228072Sbapt 495228072Sbapt first = link_machines (first, eps); 496228072Sbapt orend = finalst[first]; 497228072Sbapt 498228072Sbapt mkxtion (finalst[second], orend); 499228072Sbapt } 500228072Sbapt } 501228072Sbapt 502228072Sbapt finalst[first] = orend; 503228072Sbapt return first; 504228072Sbapt} 505228072Sbapt 506228072Sbapt 507228072Sbapt/* mkposcl - convert a machine into a positive closure 508228072Sbapt * 509228072Sbapt * synopsis 510228072Sbapt * new = mkposcl( state ); 511228072Sbapt * 512228072Sbapt * new - a machine matching the positive closure of "state" 513228072Sbapt */ 514228072Sbapt 515228072Sbaptint mkposcl (state) 516228072Sbapt int state; 517228072Sbapt{ 518228072Sbapt int eps; 519228072Sbapt 520228072Sbapt if (SUPER_FREE_EPSILON (finalst[state])) { 521228072Sbapt mkxtion (finalst[state], state); 522228072Sbapt return state; 523228072Sbapt } 524228072Sbapt 525228072Sbapt else { 526228072Sbapt eps = mkstate (SYM_EPSILON); 527228072Sbapt mkxtion (eps, state); 528228072Sbapt return link_machines (state, eps); 529228072Sbapt } 530228072Sbapt} 531228072Sbapt 532228072Sbapt 533228072Sbapt/* mkrep - make a replicated machine 534228072Sbapt * 535228072Sbapt * synopsis 536228072Sbapt * new = mkrep( mach, lb, ub ); 537228072Sbapt * 538228072Sbapt * new - a machine that matches whatever "mach" matched from "lb" 539228072Sbapt * number of times to "ub" number of times 540228072Sbapt * 541228072Sbapt * note 542228072Sbapt * if "ub" is INFINITE_REPEAT then "new" matches "lb" or more occurrences of "mach" 543228072Sbapt */ 544228072Sbapt 545228072Sbaptint mkrep (mach, lb, ub) 546228072Sbapt int mach, lb, ub; 547228072Sbapt{ 548228072Sbapt int base_mach, tail, copy, i; 549228072Sbapt 550228072Sbapt base_mach = copysingl (mach, lb - 1); 551228072Sbapt 552228072Sbapt if (ub == INFINITE_REPEAT) { 553228072Sbapt copy = dupmachine (mach); 554228072Sbapt mach = link_machines (mach, 555228072Sbapt link_machines (base_mach, 556228072Sbapt mkclos (copy))); 557228072Sbapt } 558228072Sbapt 559228072Sbapt else { 560228072Sbapt tail = mkstate (SYM_EPSILON); 561228072Sbapt 562228072Sbapt for (i = lb; i < ub; ++i) { 563228072Sbapt copy = dupmachine (mach); 564228072Sbapt tail = mkopt (link_machines (copy, tail)); 565228072Sbapt } 566228072Sbapt 567228072Sbapt mach = 568228072Sbapt link_machines (mach, 569228072Sbapt link_machines (base_mach, tail)); 570228072Sbapt } 571228072Sbapt 572228072Sbapt return mach; 573228072Sbapt} 574228072Sbapt 575228072Sbapt 576228072Sbapt/* mkstate - create a state with a transition on a given symbol 577228072Sbapt * 578228072Sbapt * synopsis 579228072Sbapt * 580228072Sbapt * state = mkstate( sym ); 581228072Sbapt * 582228072Sbapt * state - a new state matching sym 583228072Sbapt * sym - the symbol the new state is to have an out-transition on 584228072Sbapt * 585228072Sbapt * note that this routine makes new states in ascending order through the 586228072Sbapt * state array (and increments LASTNFA accordingly). The routine DUPMACHINE 587228072Sbapt * relies on machines being made in ascending order and that they are 588228072Sbapt * CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge 589228072Sbapt * that it admittedly is) 590228072Sbapt */ 591228072Sbapt 592228072Sbaptint mkstate (sym) 593228072Sbapt int sym; 594228072Sbapt{ 595228072Sbapt if (++lastnfa >= current_mns) { 596228072Sbapt if ((current_mns += MNS_INCREMENT) >= maximum_mns) 597228072Sbapt lerrif (_ 598228072Sbapt ("input rules are too complicated (>= %d NFA states)"), 599228072Sbaptcurrent_mns); 600228072Sbapt 601228072Sbapt ++num_reallocs; 602228072Sbapt 603228072Sbapt firstst = reallocate_integer_array (firstst, current_mns); 604228072Sbapt lastst = reallocate_integer_array (lastst, current_mns); 605228072Sbapt finalst = reallocate_integer_array (finalst, current_mns); 606228072Sbapt transchar = 607228072Sbapt reallocate_integer_array (transchar, current_mns); 608228072Sbapt trans1 = reallocate_integer_array (trans1, current_mns); 609228072Sbapt trans2 = reallocate_integer_array (trans2, current_mns); 610228072Sbapt accptnum = 611228072Sbapt reallocate_integer_array (accptnum, current_mns); 612228072Sbapt assoc_rule = 613228072Sbapt reallocate_integer_array (assoc_rule, current_mns); 614228072Sbapt state_type = 615228072Sbapt reallocate_integer_array (state_type, current_mns); 616228072Sbapt } 617228072Sbapt 618228072Sbapt firstst[lastnfa] = lastnfa; 619228072Sbapt finalst[lastnfa] = lastnfa; 620228072Sbapt lastst[lastnfa] = lastnfa; 621228072Sbapt transchar[lastnfa] = sym; 622228072Sbapt trans1[lastnfa] = NO_TRANSITION; 623228072Sbapt trans2[lastnfa] = NO_TRANSITION; 624228072Sbapt accptnum[lastnfa] = NIL; 625228072Sbapt assoc_rule[lastnfa] = num_rules; 626228072Sbapt state_type[lastnfa] = current_state_type; 627228072Sbapt 628228072Sbapt /* Fix up equivalence classes base on this transition. Note that any 629228072Sbapt * character which has its own transition gets its own equivalence 630228072Sbapt * class. Thus only characters which are only in character classes 631228072Sbapt * have a chance at being in the same equivalence class. E.g. "a|b" 632228072Sbapt * puts 'a' and 'b' into two different equivalence classes. "[ab]" 633228072Sbapt * puts them in the same equivalence class (barring other differences 634228072Sbapt * elsewhere in the input). 635228072Sbapt */ 636228072Sbapt 637228072Sbapt if (sym < 0) { 638228072Sbapt /* We don't have to update the equivalence classes since 639228072Sbapt * that was already done when the ccl was created for the 640228072Sbapt * first time. 641228072Sbapt */ 642228072Sbapt } 643228072Sbapt 644228072Sbapt else if (sym == SYM_EPSILON) 645228072Sbapt ++numeps; 646228072Sbapt 647228072Sbapt else { 648228072Sbapt check_char (sym); 649228072Sbapt 650228072Sbapt if (useecs) 651228072Sbapt /* Map NUL's to csize. */ 652228072Sbapt mkechar (sym ? sym : csize, nextecm, ecgroup); 653228072Sbapt } 654228072Sbapt 655228072Sbapt return lastnfa; 656228072Sbapt} 657228072Sbapt 658228072Sbapt 659228072Sbapt/* mkxtion - make a transition from one state to another 660228072Sbapt * 661228072Sbapt * synopsis 662228072Sbapt * 663228072Sbapt * mkxtion( statefrom, stateto ); 664228072Sbapt * 665228072Sbapt * statefrom - the state from which the transition is to be made 666228072Sbapt * stateto - the state to which the transition is to be made 667228072Sbapt */ 668228072Sbapt 669228072Sbaptvoid mkxtion (statefrom, stateto) 670228072Sbapt int statefrom, stateto; 671228072Sbapt{ 672228072Sbapt if (trans1[statefrom] == NO_TRANSITION) 673228072Sbapt trans1[statefrom] = stateto; 674228072Sbapt 675228072Sbapt else if ((transchar[statefrom] != SYM_EPSILON) || 676228072Sbapt (trans2[statefrom] != NO_TRANSITION)) 677228072Sbapt flexfatal (_("found too many transitions in mkxtion()")); 678228072Sbapt 679228072Sbapt else { /* second out-transition for an epsilon state */ 680228072Sbapt ++eps2; 681228072Sbapt trans2[statefrom] = stateto; 682228072Sbapt } 683228072Sbapt} 684228072Sbapt 685228072Sbapt/* new_rule - initialize for a new rule */ 686228072Sbapt 687228072Sbaptvoid new_rule () 688228072Sbapt{ 689228072Sbapt if (++num_rules >= current_max_rules) { 690228072Sbapt ++num_reallocs; 691228072Sbapt current_max_rules += MAX_RULES_INCREMENT; 692228072Sbapt rule_type = reallocate_integer_array (rule_type, 693228072Sbapt current_max_rules); 694228072Sbapt rule_linenum = reallocate_integer_array (rule_linenum, 695228072Sbapt current_max_rules); 696228072Sbapt rule_useful = reallocate_integer_array (rule_useful, 697228072Sbapt current_max_rules); 698228072Sbapt rule_has_nl = reallocate_bool_array (rule_has_nl, 699228072Sbapt current_max_rules); 700228072Sbapt } 701228072Sbapt 702228072Sbapt if (num_rules > MAX_RULE) 703228072Sbapt lerrif (_("too many rules (> %d)!"), MAX_RULE); 704228072Sbapt 705228072Sbapt rule_linenum[num_rules] = linenum; 706228072Sbapt rule_useful[num_rules] = false; 707228072Sbapt rule_has_nl[num_rules] = false; 708228072Sbapt} 709