1228072Sbapt/* tblcmp - table compression routines */ 2228072Sbapt 3228072Sbapt/* Copyright (c) 1990 The Regents of the University of California. */ 4228072Sbapt/* All rights reserved. */ 5228072Sbapt 6228072Sbapt/* This code is derived from software contributed to Berkeley by */ 7228072Sbapt/* Vern Paxson. */ 8228072Sbapt 9228072Sbapt/* The United States Government has rights in this work pursuant */ 10228072Sbapt/* to contract no. DE-AC03-76SF00098 between the United States */ 11228072Sbapt/* Department of Energy and the University of California. */ 12228072Sbapt 13228072Sbapt/* This file is part of flex. */ 14228072Sbapt 15228072Sbapt/* Redistribution and use in source and binary forms, with or without */ 16228072Sbapt/* modification, are permitted provided that the following conditions */ 17228072Sbapt/* are met: */ 18228072Sbapt 19228072Sbapt/* 1. Redistributions of source code must retain the above copyright */ 20228072Sbapt/* notice, this list of conditions and the following disclaimer. */ 21228072Sbapt/* 2. Redistributions in binary form must reproduce the above copyright */ 22228072Sbapt/* notice, this list of conditions and the following disclaimer in the */ 23228072Sbapt/* documentation and/or other materials provided with the distribution. */ 24228072Sbapt 25228072Sbapt/* Neither the name of the University nor the names of its contributors */ 26228072Sbapt/* may be used to endorse or promote products derived from this software */ 27228072Sbapt/* without specific prior written permission. */ 28228072Sbapt 29228072Sbapt/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 30228072Sbapt/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 31228072Sbapt/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 32228072Sbapt/* PURPOSE. */ 33228072Sbapt 34228072Sbapt#include "flexdef.h" 35228072Sbapt 36228072Sbapt 37228072Sbapt/* declarations for functions that have forward references */ 38228072Sbapt 39250874Sjkimvoid mkentry PROTO ((int *, int, int, int, int)); 40228072Sbaptvoid mkprot PROTO ((int[], int, int)); 41228072Sbaptvoid mktemplate PROTO ((int[], int, int)); 42228072Sbaptvoid mv2front PROTO ((int)); 43228072Sbaptint tbldiff PROTO ((int[], int, int[])); 44228072Sbapt 45228072Sbapt 46228072Sbapt/* bldtbl - build table entries for dfa state 47228072Sbapt * 48228072Sbapt * synopsis 49228072Sbapt * int state[numecs], statenum, totaltrans, comstate, comfreq; 50228072Sbapt * bldtbl( state, statenum, totaltrans, comstate, comfreq ); 51228072Sbapt * 52228072Sbapt * State is the statenum'th dfa state. It is indexed by equivalence class and 53228072Sbapt * gives the number of the state to enter for a given equivalence class. 54228072Sbapt * totaltrans is the total number of transitions out of the state. Comstate 55228072Sbapt * is that state which is the destination of the most transitions out of State. 56228072Sbapt * Comfreq is how many transitions there are out of State to Comstate. 57228072Sbapt * 58228072Sbapt * A note on terminology: 59228072Sbapt * "protos" are transition tables which have a high probability of 60228072Sbapt * either being redundant (a state processed later will have an identical 61228072Sbapt * transition table) or nearly redundant (a state processed later will have 62228072Sbapt * many of the same out-transitions). A "most recently used" queue of 63228072Sbapt * protos is kept around with the hope that most states will find a proto 64228072Sbapt * which is similar enough to be usable, and therefore compacting the 65228072Sbapt * output tables. 66228072Sbapt * "templates" are a special type of proto. If a transition table is 67228072Sbapt * homogeneous or nearly homogeneous (all transitions go to the same 68228072Sbapt * destination) then the odds are good that future states will also go 69228072Sbapt * to the same destination state on basically the same character set. 70228072Sbapt * These homogeneous states are so common when dealing with large rule 71228072Sbapt * sets that they merit special attention. If the transition table were 72228072Sbapt * simply made into a proto, then (typically) each subsequent, similar 73228072Sbapt * state will differ from the proto for two out-transitions. One of these 74228072Sbapt * out-transitions will be that character on which the proto does not go 75228072Sbapt * to the common destination, and one will be that character on which the 76228072Sbapt * state does not go to the common destination. Templates, on the other 77228072Sbapt * hand, go to the common state on EVERY transition character, and therefore 78228072Sbapt * cost only one difference. 79228072Sbapt */ 80228072Sbapt 81228072Sbaptvoid bldtbl (state, statenum, totaltrans, comstate, comfreq) 82228072Sbapt int state[], statenum, totaltrans, comstate, comfreq; 83228072Sbapt{ 84228072Sbapt int extptr, extrct[2][CSIZE + 1]; 85228072Sbapt int mindiff, minprot, i, d; 86228072Sbapt 87228072Sbapt /* If extptr is 0 then the first array of extrct holds the result 88228072Sbapt * of the "best difference" to date, which is those transitions 89228072Sbapt * which occur in "state" but not in the proto which, to date, 90228072Sbapt * has the fewest differences between itself and "state". If 91228072Sbapt * extptr is 1 then the second array of extrct hold the best 92228072Sbapt * difference. The two arrays are toggled between so that the 93228072Sbapt * best difference to date can be kept around and also a difference 94228072Sbapt * just created by checking against a candidate "best" proto. 95228072Sbapt */ 96228072Sbapt 97228072Sbapt extptr = 0; 98228072Sbapt 99228072Sbapt /* If the state has too few out-transitions, don't bother trying to 100228072Sbapt * compact its tables. 101228072Sbapt */ 102228072Sbapt 103228072Sbapt if ((totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE)) 104228072Sbapt mkentry (state, numecs, statenum, JAMSTATE, totaltrans); 105228072Sbapt 106228072Sbapt else { 107228072Sbapt /* "checkcom" is true if we should only check "state" against 108228072Sbapt * protos which have the same "comstate" value. 109228072Sbapt */ 110228072Sbapt int checkcom = 111228072Sbapt 112228072Sbapt comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE; 113228072Sbapt 114228072Sbapt minprot = firstprot; 115228072Sbapt mindiff = totaltrans; 116228072Sbapt 117228072Sbapt if (checkcom) { 118228072Sbapt /* Find first proto which has the same "comstate". */ 119228072Sbapt for (i = firstprot; i != NIL; i = protnext[i]) 120228072Sbapt if (protcomst[i] == comstate) { 121228072Sbapt minprot = i; 122228072Sbapt mindiff = tbldiff (state, minprot, 123228072Sbapt extrct[extptr]); 124228072Sbapt break; 125228072Sbapt } 126228072Sbapt } 127228072Sbapt 128228072Sbapt else { 129228072Sbapt /* Since we've decided that the most common destination 130228072Sbapt * out of "state" does not occur with a high enough 131228072Sbapt * frequency, we set the "comstate" to zero, assuring 132228072Sbapt * that if this state is entered into the proto list, 133228072Sbapt * it will not be considered a template. 134228072Sbapt */ 135228072Sbapt comstate = 0; 136228072Sbapt 137228072Sbapt if (firstprot != NIL) { 138228072Sbapt minprot = firstprot; 139228072Sbapt mindiff = tbldiff (state, minprot, 140228072Sbapt extrct[extptr]); 141228072Sbapt } 142228072Sbapt } 143228072Sbapt 144228072Sbapt /* We now have the first interesting proto in "minprot". If 145228072Sbapt * it matches within the tolerances set for the first proto, 146228072Sbapt * we don't want to bother scanning the rest of the proto list 147228072Sbapt * to see if we have any other reasonable matches. 148228072Sbapt */ 149228072Sbapt 150228072Sbapt if (mindiff * 100 > 151228072Sbapt totaltrans * FIRST_MATCH_DIFF_PERCENTAGE) { 152228072Sbapt /* Not a good enough match. Scan the rest of the 153228072Sbapt * protos. 154228072Sbapt */ 155228072Sbapt for (i = minprot; i != NIL; i = protnext[i]) { 156228072Sbapt d = tbldiff (state, i, extrct[1 - extptr]); 157228072Sbapt if (d < mindiff) { 158228072Sbapt extptr = 1 - extptr; 159228072Sbapt mindiff = d; 160228072Sbapt minprot = i; 161228072Sbapt } 162228072Sbapt } 163228072Sbapt } 164228072Sbapt 165228072Sbapt /* Check if the proto we've decided on as our best bet is close 166228072Sbapt * enough to the state we want to match to be usable. 167228072Sbapt */ 168228072Sbapt 169228072Sbapt if (mindiff * 100 > 170228072Sbapt totaltrans * ACCEPTABLE_DIFF_PERCENTAGE) { 171228072Sbapt /* No good. If the state is homogeneous enough, 172228072Sbapt * we make a template out of it. Otherwise, we 173228072Sbapt * make a proto. 174228072Sbapt */ 175228072Sbapt 176228072Sbapt if (comfreq * 100 >= 177228072Sbapt totaltrans * TEMPLATE_SAME_PERCENTAGE) 178228072Sbapt mktemplate (state, statenum, 179228072Sbapt comstate); 180228072Sbapt 181228072Sbapt else { 182228072Sbapt mkprot (state, statenum, comstate); 183228072Sbapt mkentry (state, numecs, statenum, 184228072Sbapt JAMSTATE, totaltrans); 185228072Sbapt } 186228072Sbapt } 187228072Sbapt 188228072Sbapt else { /* use the proto */ 189228072Sbapt mkentry (extrct[extptr], numecs, statenum, 190228072Sbapt prottbl[minprot], mindiff); 191228072Sbapt 192228072Sbapt /* If this state was sufficiently different from the 193228072Sbapt * proto we built it from, make it, too, a proto. 194228072Sbapt */ 195228072Sbapt 196228072Sbapt if (mindiff * 100 >= 197228072Sbapt totaltrans * NEW_PROTO_DIFF_PERCENTAGE) 198228072Sbapt mkprot (state, statenum, comstate); 199228072Sbapt 200228072Sbapt /* Since mkprot added a new proto to the proto queue, 201228072Sbapt * it's possible that "minprot" is no longer on the 202228072Sbapt * proto queue (if it happened to have been the last 203228072Sbapt * entry, it would have been bumped off). If it's 204228072Sbapt * not there, then the new proto took its physical 205228072Sbapt * place (though logically the new proto is at the 206228072Sbapt * beginning of the queue), so in that case the 207228072Sbapt * following call will do nothing. 208228072Sbapt */ 209228072Sbapt 210228072Sbapt mv2front (minprot); 211228072Sbapt } 212228072Sbapt } 213228072Sbapt} 214228072Sbapt 215228072Sbapt 216228072Sbapt/* cmptmps - compress template table entries 217228072Sbapt * 218228072Sbapt * Template tables are compressed by using the 'template equivalence 219228072Sbapt * classes', which are collections of transition character equivalence 220228072Sbapt * classes which always appear together in templates - really meta-equivalence 221228072Sbapt * classes. 222228072Sbapt */ 223228072Sbapt 224228072Sbaptvoid cmptmps () 225228072Sbapt{ 226228072Sbapt int tmpstorage[CSIZE + 1]; 227250874Sjkim int *tmp = tmpstorage, i, j; 228228072Sbapt int totaltrans, trans; 229228072Sbapt 230228072Sbapt peakpairs = numtemps * numecs + tblend; 231228072Sbapt 232228072Sbapt if (usemecs) { 233228072Sbapt /* Create equivalence classes based on data gathered on 234228072Sbapt * template transitions. 235228072Sbapt */ 236228072Sbapt nummecs = cre8ecs (tecfwd, tecbck, numecs); 237228072Sbapt } 238228072Sbapt 239228072Sbapt else 240228072Sbapt nummecs = numecs; 241228072Sbapt 242228072Sbapt while (lastdfa + numtemps + 1 >= current_max_dfas) 243228072Sbapt increase_max_dfas (); 244228072Sbapt 245228072Sbapt /* Loop through each template. */ 246228072Sbapt 247228072Sbapt for (i = 1; i <= numtemps; ++i) { 248228072Sbapt /* Number of non-jam transitions out of this template. */ 249228072Sbapt totaltrans = 0; 250228072Sbapt 251228072Sbapt for (j = 1; j <= numecs; ++j) { 252228072Sbapt trans = tnxt[numecs * i + j]; 253228072Sbapt 254228072Sbapt if (usemecs) { 255228072Sbapt /* The absolute value of tecbck is the 256228072Sbapt * meta-equivalence class of a given 257228072Sbapt * equivalence class, as set up by cre8ecs(). 258228072Sbapt */ 259228072Sbapt if (tecbck[j] > 0) { 260228072Sbapt tmp[tecbck[j]] = trans; 261228072Sbapt 262228072Sbapt if (trans > 0) 263228072Sbapt ++totaltrans; 264228072Sbapt } 265228072Sbapt } 266228072Sbapt 267228072Sbapt else { 268228072Sbapt tmp[j] = trans; 269228072Sbapt 270228072Sbapt if (trans > 0) 271228072Sbapt ++totaltrans; 272228072Sbapt } 273228072Sbapt } 274228072Sbapt 275228072Sbapt /* It is assumed (in a rather subtle way) in the skeleton 276228072Sbapt * that if we're using meta-equivalence classes, the def[] 277228072Sbapt * entry for all templates is the jam template, i.e., 278228072Sbapt * templates never default to other non-jam table entries 279228072Sbapt * (e.g., another template) 280228072Sbapt */ 281228072Sbapt 282228072Sbapt /* Leave room for the jam-state after the last real state. */ 283228072Sbapt mkentry (tmp, nummecs, lastdfa + i + 1, JAMSTATE, 284228072Sbapt totaltrans); 285228072Sbapt } 286228072Sbapt} 287228072Sbapt 288228072Sbapt 289228072Sbapt 290228072Sbapt/* expand_nxt_chk - expand the next check arrays */ 291228072Sbapt 292228072Sbaptvoid expand_nxt_chk () 293228072Sbapt{ 294250874Sjkim int old_max = current_max_xpairs; 295228072Sbapt 296228072Sbapt current_max_xpairs += MAX_XPAIRS_INCREMENT; 297228072Sbapt 298228072Sbapt ++num_reallocs; 299228072Sbapt 300228072Sbapt nxt = reallocate_integer_array (nxt, current_max_xpairs); 301228072Sbapt chk = reallocate_integer_array (chk, current_max_xpairs); 302228072Sbapt 303228072Sbapt zero_out ((char *) (chk + old_max), 304228072Sbapt (size_t) (MAX_XPAIRS_INCREMENT * sizeof (int))); 305228072Sbapt} 306228072Sbapt 307228072Sbapt 308228072Sbapt/* find_table_space - finds a space in the table for a state to be placed 309228072Sbapt * 310228072Sbapt * synopsis 311228072Sbapt * int *state, numtrans, block_start; 312228072Sbapt * int find_table_space(); 313228072Sbapt * 314228072Sbapt * block_start = find_table_space( state, numtrans ); 315228072Sbapt * 316228072Sbapt * State is the state to be added to the full speed transition table. 317228072Sbapt * Numtrans is the number of out-transitions for the state. 318228072Sbapt * 319228072Sbapt * find_table_space() returns the position of the start of the first block (in 320228072Sbapt * chk) able to accommodate the state 321228072Sbapt * 322228072Sbapt * In determining if a state will or will not fit, find_table_space() must take 323228072Sbapt * into account the fact that an end-of-buffer state will be added at [0], 324228072Sbapt * and an action number will be added in [-1]. 325228072Sbapt */ 326228072Sbapt 327228072Sbaptint find_table_space (state, numtrans) 328228072Sbapt int *state, numtrans; 329228072Sbapt{ 330228072Sbapt /* Firstfree is the position of the first possible occurrence of two 331228072Sbapt * consecutive unused records in the chk and nxt arrays. 332228072Sbapt */ 333250874Sjkim int i; 334250874Sjkim int *state_ptr, *chk_ptr; 335250874Sjkim int *ptr_to_last_entry_in_state; 336228072Sbapt 337228072Sbapt /* If there are too many out-transitions, put the state at the end of 338228072Sbapt * nxt and chk. 339228072Sbapt */ 340228072Sbapt if (numtrans > MAX_XTIONS_FULL_INTERIOR_FIT) { 341228072Sbapt /* If table is empty, return the first available spot in 342228072Sbapt * chk/nxt, which should be 1. 343228072Sbapt */ 344228072Sbapt if (tblend < 2) 345228072Sbapt return 1; 346228072Sbapt 347228072Sbapt /* Start searching for table space near the end of 348228072Sbapt * chk/nxt arrays. 349228072Sbapt */ 350228072Sbapt i = tblend - numecs; 351228072Sbapt } 352228072Sbapt 353228072Sbapt else 354228072Sbapt /* Start searching for table space from the beginning 355228072Sbapt * (skipping only the elements which will definitely not 356228072Sbapt * hold the new state). 357228072Sbapt */ 358228072Sbapt i = firstfree; 359228072Sbapt 360228072Sbapt while (1) { /* loops until a space is found */ 361228072Sbapt while (i + numecs >= current_max_xpairs) 362228072Sbapt expand_nxt_chk (); 363228072Sbapt 364228072Sbapt /* Loops until space for end-of-buffer and action number 365228072Sbapt * are found. 366228072Sbapt */ 367228072Sbapt while (1) { 368228072Sbapt /* Check for action number space. */ 369228072Sbapt if (chk[i - 1] == 0) { 370228072Sbapt /* Check for end-of-buffer space. */ 371228072Sbapt if (chk[i] == 0) 372228072Sbapt break; 373228072Sbapt 374228072Sbapt else 375228072Sbapt /* Since i != 0, there is no use 376228072Sbapt * checking to see if (++i) - 1 == 0, 377228072Sbapt * because that's the same as i == 0, 378228072Sbapt * so we skip a space. 379228072Sbapt */ 380228072Sbapt i += 2; 381228072Sbapt } 382228072Sbapt 383228072Sbapt else 384228072Sbapt ++i; 385228072Sbapt 386228072Sbapt while (i + numecs >= current_max_xpairs) 387228072Sbapt expand_nxt_chk (); 388228072Sbapt } 389228072Sbapt 390228072Sbapt /* If we started search from the beginning, store the new 391228072Sbapt * firstfree for the next call of find_table_space(). 392228072Sbapt */ 393228072Sbapt if (numtrans <= MAX_XTIONS_FULL_INTERIOR_FIT) 394228072Sbapt firstfree = i + 1; 395228072Sbapt 396228072Sbapt /* Check to see if all elements in chk (and therefore nxt) 397228072Sbapt * that are needed for the new state have not yet been taken. 398228072Sbapt */ 399228072Sbapt 400228072Sbapt state_ptr = &state[1]; 401228072Sbapt ptr_to_last_entry_in_state = &chk[i + numecs + 1]; 402228072Sbapt 403228072Sbapt for (chk_ptr = &chk[i + 1]; 404228072Sbapt chk_ptr != ptr_to_last_entry_in_state; ++chk_ptr) 405228072Sbapt if (*(state_ptr++) != 0 && *chk_ptr != 0) 406228072Sbapt break; 407228072Sbapt 408228072Sbapt if (chk_ptr == ptr_to_last_entry_in_state) 409228072Sbapt return i; 410228072Sbapt 411228072Sbapt else 412228072Sbapt ++i; 413228072Sbapt } 414228072Sbapt} 415228072Sbapt 416228072Sbapt 417228072Sbapt/* inittbl - initialize transition tables 418228072Sbapt * 419228072Sbapt * Initializes "firstfree" to be one beyond the end of the table. Initializes 420228072Sbapt * all "chk" entries to be zero. 421228072Sbapt */ 422228072Sbaptvoid inittbl () 423228072Sbapt{ 424250874Sjkim int i; 425228072Sbapt 426228072Sbapt zero_out ((char *) chk, 427228072Sbapt 428228072Sbapt (size_t) (current_max_xpairs * sizeof (int))); 429228072Sbapt 430228072Sbapt tblend = 0; 431228072Sbapt firstfree = tblend + 1; 432228072Sbapt numtemps = 0; 433228072Sbapt 434228072Sbapt if (usemecs) { 435228072Sbapt /* Set up doubly-linked meta-equivalence classes; these 436228072Sbapt * are sets of equivalence classes which all have identical 437228072Sbapt * transitions out of TEMPLATES. 438228072Sbapt */ 439228072Sbapt 440228072Sbapt tecbck[1] = NIL; 441228072Sbapt 442228072Sbapt for (i = 2; i <= numecs; ++i) { 443228072Sbapt tecbck[i] = i - 1; 444228072Sbapt tecfwd[i - 1] = i; 445228072Sbapt } 446228072Sbapt 447228072Sbapt tecfwd[numecs] = NIL; 448228072Sbapt } 449228072Sbapt} 450228072Sbapt 451228072Sbapt 452228072Sbapt/* mkdeftbl - make the default, "jam" table entries */ 453228072Sbapt 454228072Sbaptvoid mkdeftbl () 455228072Sbapt{ 456228072Sbapt int i; 457228072Sbapt 458228072Sbapt jamstate = lastdfa + 1; 459228072Sbapt 460228072Sbapt ++tblend; /* room for transition on end-of-buffer character */ 461228072Sbapt 462228072Sbapt while (tblend + numecs >= current_max_xpairs) 463228072Sbapt expand_nxt_chk (); 464228072Sbapt 465228072Sbapt /* Add in default end-of-buffer transition. */ 466228072Sbapt nxt[tblend] = end_of_buffer_state; 467228072Sbapt chk[tblend] = jamstate; 468228072Sbapt 469228072Sbapt for (i = 1; i <= numecs; ++i) { 470228072Sbapt nxt[tblend + i] = 0; 471228072Sbapt chk[tblend + i] = jamstate; 472228072Sbapt } 473228072Sbapt 474228072Sbapt jambase = tblend; 475228072Sbapt 476228072Sbapt base[jamstate] = jambase; 477228072Sbapt def[jamstate] = 0; 478228072Sbapt 479228072Sbapt tblend += numecs; 480228072Sbapt ++numtemps; 481228072Sbapt} 482228072Sbapt 483228072Sbapt 484228072Sbapt/* mkentry - create base/def and nxt/chk entries for transition array 485228072Sbapt * 486228072Sbapt * synopsis 487228072Sbapt * int state[numchars + 1], numchars, statenum, deflink, totaltrans; 488228072Sbapt * mkentry( state, numchars, statenum, deflink, totaltrans ); 489228072Sbapt * 490228072Sbapt * "state" is a transition array "numchars" characters in size, "statenum" 491228072Sbapt * is the offset to be used into the base/def tables, and "deflink" is the 492228072Sbapt * entry to put in the "def" table entry. If "deflink" is equal to 493228072Sbapt * "JAMSTATE", then no attempt will be made to fit zero entries of "state" 494228072Sbapt * (i.e., jam entries) into the table. It is assumed that by linking to 495228072Sbapt * "JAMSTATE" they will be taken care of. In any case, entries in "state" 496228072Sbapt * marking transitions to "SAME_TRANS" are treated as though they will be 497250874Sjkim * taken care of by wherever "deflink" points. "totaltrans" is the total 498228072Sbapt * number of transitions out of the state. If it is below a certain threshold, 499228072Sbapt * the tables are searched for an interior spot that will accommodate the 500228072Sbapt * state array. 501228072Sbapt */ 502228072Sbapt 503228072Sbaptvoid mkentry (state, numchars, statenum, deflink, totaltrans) 504250874Sjkim int *state; 505228072Sbapt int numchars, statenum, deflink, totaltrans; 506228072Sbapt{ 507250874Sjkim int minec, maxec, i, baseaddr; 508228072Sbapt int tblbase, tbllast; 509228072Sbapt 510228072Sbapt if (totaltrans == 0) { /* there are no out-transitions */ 511228072Sbapt if (deflink == JAMSTATE) 512228072Sbapt base[statenum] = JAMSTATE; 513228072Sbapt else 514228072Sbapt base[statenum] = 0; 515228072Sbapt 516228072Sbapt def[statenum] = deflink; 517228072Sbapt return; 518228072Sbapt } 519228072Sbapt 520228072Sbapt for (minec = 1; minec <= numchars; ++minec) { 521228072Sbapt if (state[minec] != SAME_TRANS) 522228072Sbapt if (state[minec] != 0 || deflink != JAMSTATE) 523228072Sbapt break; 524228072Sbapt } 525228072Sbapt 526228072Sbapt if (totaltrans == 1) { 527228072Sbapt /* There's only one out-transition. Save it for later to fill 528228072Sbapt * in holes in the tables. 529228072Sbapt */ 530228072Sbapt stack1 (statenum, minec, state[minec], deflink); 531228072Sbapt return; 532228072Sbapt } 533228072Sbapt 534228072Sbapt for (maxec = numchars; maxec > 0; --maxec) { 535228072Sbapt if (state[maxec] != SAME_TRANS) 536228072Sbapt if (state[maxec] != 0 || deflink != JAMSTATE) 537228072Sbapt break; 538228072Sbapt } 539228072Sbapt 540228072Sbapt /* Whether we try to fit the state table in the middle of the table 541228072Sbapt * entries we have already generated, or if we just take the state 542228072Sbapt * table at the end of the nxt/chk tables, we must make sure that we 543228072Sbapt * have a valid base address (i.e., non-negative). Note that 544228072Sbapt * negative base addresses dangerous at run-time (because indexing 545228072Sbapt * the nxt array with one and a low-valued character will access 546228072Sbapt * memory before the start of the array. 547228072Sbapt */ 548228072Sbapt 549228072Sbapt /* Find the first transition of state that we need to worry about. */ 550228072Sbapt if (totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE) { 551228072Sbapt /* Attempt to squeeze it into the middle of the tables. */ 552228072Sbapt baseaddr = firstfree; 553228072Sbapt 554228072Sbapt while (baseaddr < minec) { 555228072Sbapt /* Using baseaddr would result in a negative base 556228072Sbapt * address below; find the next free slot. 557228072Sbapt */ 558228072Sbapt for (++baseaddr; chk[baseaddr] != 0; ++baseaddr) ; 559228072Sbapt } 560228072Sbapt 561228072Sbapt while (baseaddr + maxec - minec + 1 >= current_max_xpairs) 562228072Sbapt expand_nxt_chk (); 563228072Sbapt 564228072Sbapt for (i = minec; i <= maxec; ++i) 565228072Sbapt if (state[i] != SAME_TRANS && 566228072Sbapt (state[i] != 0 || deflink != JAMSTATE) && 567228072Sbapt chk[baseaddr + i - minec] != 0) { /* baseaddr unsuitable - find another */ 568228072Sbapt for (++baseaddr; 569228072Sbapt baseaddr < current_max_xpairs && 570228072Sbapt chk[baseaddr] != 0; ++baseaddr) ; 571228072Sbapt 572228072Sbapt while (baseaddr + maxec - minec + 1 >= 573228072Sbapt current_max_xpairs) 574228072Sbapt expand_nxt_chk (); 575228072Sbapt 576228072Sbapt /* Reset the loop counter so we'll start all 577228072Sbapt * over again next time it's incremented. 578228072Sbapt */ 579228072Sbapt 580228072Sbapt i = minec - 1; 581228072Sbapt } 582228072Sbapt } 583228072Sbapt 584228072Sbapt else { 585228072Sbapt /* Ensure that the base address we eventually generate is 586228072Sbapt * non-negative. 587228072Sbapt */ 588228072Sbapt baseaddr = MAX (tblend + 1, minec); 589228072Sbapt } 590228072Sbapt 591228072Sbapt tblbase = baseaddr - minec; 592228072Sbapt tbllast = tblbase + maxec; 593228072Sbapt 594228072Sbapt while (tbllast + 1 >= current_max_xpairs) 595228072Sbapt expand_nxt_chk (); 596228072Sbapt 597228072Sbapt base[statenum] = tblbase; 598228072Sbapt def[statenum] = deflink; 599228072Sbapt 600228072Sbapt for (i = minec; i <= maxec; ++i) 601228072Sbapt if (state[i] != SAME_TRANS) 602228072Sbapt if (state[i] != 0 || deflink != JAMSTATE) { 603228072Sbapt nxt[tblbase + i] = state[i]; 604228072Sbapt chk[tblbase + i] = statenum; 605228072Sbapt } 606228072Sbapt 607228072Sbapt if (baseaddr == firstfree) 608228072Sbapt /* Find next free slot in tables. */ 609228072Sbapt for (++firstfree; chk[firstfree] != 0; ++firstfree) ; 610228072Sbapt 611228072Sbapt tblend = MAX (tblend, tbllast); 612228072Sbapt} 613228072Sbapt 614228072Sbapt 615228072Sbapt/* mk1tbl - create table entries for a state (or state fragment) which 616228072Sbapt * has only one out-transition 617228072Sbapt */ 618228072Sbapt 619228072Sbaptvoid mk1tbl (state, sym, onenxt, onedef) 620228072Sbapt int state, sym, onenxt, onedef; 621228072Sbapt{ 622228072Sbapt if (firstfree < sym) 623228072Sbapt firstfree = sym; 624228072Sbapt 625228072Sbapt while (chk[firstfree] != 0) 626228072Sbapt if (++firstfree >= current_max_xpairs) 627228072Sbapt expand_nxt_chk (); 628228072Sbapt 629228072Sbapt base[state] = firstfree - sym; 630228072Sbapt def[state] = onedef; 631228072Sbapt chk[firstfree] = state; 632228072Sbapt nxt[firstfree] = onenxt; 633228072Sbapt 634228072Sbapt if (firstfree > tblend) { 635228072Sbapt tblend = firstfree++; 636228072Sbapt 637228072Sbapt if (firstfree >= current_max_xpairs) 638228072Sbapt expand_nxt_chk (); 639228072Sbapt } 640228072Sbapt} 641228072Sbapt 642228072Sbapt 643228072Sbapt/* mkprot - create new proto entry */ 644228072Sbapt 645228072Sbaptvoid mkprot (state, statenum, comstate) 646228072Sbapt int state[], statenum, comstate; 647228072Sbapt{ 648228072Sbapt int i, slot, tblbase; 649228072Sbapt 650228072Sbapt if (++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE) { 651228072Sbapt /* Gotta make room for the new proto by dropping last entry in 652228072Sbapt * the queue. 653228072Sbapt */ 654228072Sbapt slot = lastprot; 655228072Sbapt lastprot = protprev[lastprot]; 656228072Sbapt protnext[lastprot] = NIL; 657228072Sbapt } 658228072Sbapt 659228072Sbapt else 660228072Sbapt slot = numprots; 661228072Sbapt 662228072Sbapt protnext[slot] = firstprot; 663228072Sbapt 664228072Sbapt if (firstprot != NIL) 665228072Sbapt protprev[firstprot] = slot; 666228072Sbapt 667228072Sbapt firstprot = slot; 668228072Sbapt prottbl[slot] = statenum; 669228072Sbapt protcomst[slot] = comstate; 670228072Sbapt 671228072Sbapt /* Copy state into save area so it can be compared with rapidly. */ 672228072Sbapt tblbase = numecs * (slot - 1); 673228072Sbapt 674228072Sbapt for (i = 1; i <= numecs; ++i) 675228072Sbapt protsave[tblbase + i] = state[i]; 676228072Sbapt} 677228072Sbapt 678228072Sbapt 679228072Sbapt/* mktemplate - create a template entry based on a state, and connect the state 680228072Sbapt * to it 681228072Sbapt */ 682228072Sbapt 683228072Sbaptvoid mktemplate (state, statenum, comstate) 684228072Sbapt int state[], statenum, comstate; 685228072Sbapt{ 686228072Sbapt int i, numdiff, tmpbase, tmp[CSIZE + 1]; 687228072Sbapt Char transset[CSIZE + 1]; 688228072Sbapt int tsptr; 689228072Sbapt 690228072Sbapt ++numtemps; 691228072Sbapt 692228072Sbapt tsptr = 0; 693228072Sbapt 694228072Sbapt /* Calculate where we will temporarily store the transition table 695228072Sbapt * of the template in the tnxt[] array. The final transition table 696228072Sbapt * gets created by cmptmps(). 697228072Sbapt */ 698228072Sbapt 699228072Sbapt tmpbase = numtemps * numecs; 700228072Sbapt 701228072Sbapt if (tmpbase + numecs >= current_max_template_xpairs) { 702228072Sbapt current_max_template_xpairs += 703228072Sbapt MAX_TEMPLATE_XPAIRS_INCREMENT; 704228072Sbapt 705228072Sbapt ++num_reallocs; 706228072Sbapt 707228072Sbapt tnxt = reallocate_integer_array (tnxt, 708228072Sbapt current_max_template_xpairs); 709228072Sbapt } 710228072Sbapt 711228072Sbapt for (i = 1; i <= numecs; ++i) 712228072Sbapt if (state[i] == 0) 713228072Sbapt tnxt[tmpbase + i] = 0; 714228072Sbapt else { 715228072Sbapt transset[tsptr++] = i; 716228072Sbapt tnxt[tmpbase + i] = comstate; 717228072Sbapt } 718228072Sbapt 719228072Sbapt if (usemecs) 720228072Sbapt mkeccl (transset, tsptr, tecfwd, tecbck, numecs, 0); 721228072Sbapt 722228072Sbapt mkprot (tnxt + tmpbase, -numtemps, comstate); 723228072Sbapt 724228072Sbapt /* We rely on the fact that mkprot adds things to the beginning 725228072Sbapt * of the proto queue. 726228072Sbapt */ 727228072Sbapt 728228072Sbapt numdiff = tbldiff (state, firstprot, tmp); 729228072Sbapt mkentry (tmp, numecs, statenum, -numtemps, numdiff); 730228072Sbapt} 731228072Sbapt 732228072Sbapt 733228072Sbapt/* mv2front - move proto queue element to front of queue */ 734228072Sbapt 735228072Sbaptvoid mv2front (qelm) 736228072Sbapt int qelm; 737228072Sbapt{ 738228072Sbapt if (firstprot != qelm) { 739228072Sbapt if (qelm == lastprot) 740228072Sbapt lastprot = protprev[lastprot]; 741228072Sbapt 742228072Sbapt protnext[protprev[qelm]] = protnext[qelm]; 743228072Sbapt 744228072Sbapt if (protnext[qelm] != NIL) 745228072Sbapt protprev[protnext[qelm]] = protprev[qelm]; 746228072Sbapt 747228072Sbapt protprev[qelm] = NIL; 748228072Sbapt protnext[qelm] = firstprot; 749228072Sbapt protprev[firstprot] = qelm; 750228072Sbapt firstprot = qelm; 751228072Sbapt } 752228072Sbapt} 753228072Sbapt 754228072Sbapt 755228072Sbapt/* place_state - place a state into full speed transition table 756228072Sbapt * 757228072Sbapt * State is the statenum'th state. It is indexed by equivalence class and 758228072Sbapt * gives the number of the state to enter for a given equivalence class. 759228072Sbapt * Transnum is the number of out-transitions for the state. 760228072Sbapt */ 761228072Sbapt 762228072Sbaptvoid place_state (state, statenum, transnum) 763228072Sbapt int *state, statenum, transnum; 764228072Sbapt{ 765250874Sjkim int i; 766250874Sjkim int *state_ptr; 767228072Sbapt int position = find_table_space (state, transnum); 768228072Sbapt 769228072Sbapt /* "base" is the table of start positions. */ 770228072Sbapt base[statenum] = position; 771228072Sbapt 772228072Sbapt /* Put in action number marker; this non-zero number makes sure that 773228072Sbapt * find_table_space() knows that this position in chk/nxt is taken 774228072Sbapt * and should not be used for another accepting number in another 775228072Sbapt * state. 776228072Sbapt */ 777228072Sbapt chk[position - 1] = 1; 778228072Sbapt 779228072Sbapt /* Put in end-of-buffer marker; this is for the same purposes as 780228072Sbapt * above. 781228072Sbapt */ 782228072Sbapt chk[position] = 1; 783228072Sbapt 784228072Sbapt /* Place the state into chk and nxt. */ 785228072Sbapt state_ptr = &state[1]; 786228072Sbapt 787228072Sbapt for (i = 1; i <= numecs; ++i, ++state_ptr) 788228072Sbapt if (*state_ptr != 0) { 789228072Sbapt chk[position + i] = i; 790228072Sbapt nxt[position + i] = *state_ptr; 791228072Sbapt } 792228072Sbapt 793228072Sbapt if (position + numecs > tblend) 794228072Sbapt tblend = position + numecs; 795228072Sbapt} 796228072Sbapt 797228072Sbapt 798228072Sbapt/* stack1 - save states with only one out-transition to be processed later 799228072Sbapt * 800228072Sbapt * If there's room for another state on the "one-transition" stack, the 801228072Sbapt * state is pushed onto it, to be processed later by mk1tbl. If there's 802228072Sbapt * no room, we process the sucker right now. 803228072Sbapt */ 804228072Sbapt 805228072Sbaptvoid stack1 (statenum, sym, nextstate, deflink) 806228072Sbapt int statenum, sym, nextstate, deflink; 807228072Sbapt{ 808228072Sbapt if (onesp >= ONE_STACK_SIZE - 1) 809228072Sbapt mk1tbl (statenum, sym, nextstate, deflink); 810228072Sbapt 811228072Sbapt else { 812228072Sbapt ++onesp; 813228072Sbapt onestate[onesp] = statenum; 814228072Sbapt onesym[onesp] = sym; 815228072Sbapt onenext[onesp] = nextstate; 816228072Sbapt onedef[onesp] = deflink; 817228072Sbapt } 818228072Sbapt} 819228072Sbapt 820228072Sbapt 821228072Sbapt/* tbldiff - compute differences between two state tables 822228072Sbapt * 823228072Sbapt * "state" is the state array which is to be extracted from the pr'th 824228072Sbapt * proto. "pr" is both the number of the proto we are extracting from 825228072Sbapt * and an index into the save area where we can find the proto's complete 826228072Sbapt * state table. Each entry in "state" which differs from the corresponding 827228072Sbapt * entry of "pr" will appear in "ext". 828228072Sbapt * 829228072Sbapt * Entries which are the same in both "state" and "pr" will be marked 830228072Sbapt * as transitions to "SAME_TRANS" in "ext". The total number of differences 831228072Sbapt * between "state" and "pr" is returned as function value. Note that this 832228072Sbapt * number is "numecs" minus the number of "SAME_TRANS" entries in "ext". 833228072Sbapt */ 834228072Sbapt 835228072Sbaptint tbldiff (state, pr, ext) 836228072Sbapt int state[], pr, ext[]; 837228072Sbapt{ 838250874Sjkim int i, *sp = state, *ep = ext, *protp; 839250874Sjkim int numdiff = 0; 840228072Sbapt 841228072Sbapt protp = &protsave[numecs * (pr - 1)]; 842228072Sbapt 843228072Sbapt for (i = numecs; i > 0; --i) { 844228072Sbapt if (*++protp == *++sp) 845228072Sbapt *++ep = SAME_TRANS; 846228072Sbapt else { 847228072Sbapt *++ep = *sp; 848228072Sbapt ++numdiff; 849228072Sbapt } 850228072Sbapt } 851228072Sbapt 852228072Sbapt return numdiff; 853228072Sbapt} 854