1228072Sbapt/* ecs - equivalence class routines */ 2228072Sbapt 3228072Sbapt/* Copyright (c) 1990 The Regents of the University of California. */ 4228072Sbapt/* All rights reserved. */ 5228072Sbapt 6228072Sbapt/* This code is derived from software contributed to Berkeley by */ 7228072Sbapt/* Vern Paxson. */ 8228072Sbapt 9228072Sbapt/* The United States Government has rights in this work pursuant */ 10228072Sbapt/* to contract no. DE-AC03-76SF00098 between the United States */ 11228072Sbapt/* Department of Energy and the University of California. */ 12228072Sbapt 13228072Sbapt/* This file is part of flex */ 14228072Sbapt 15228072Sbapt/* Redistribution and use in source and binary forms, with or without */ 16228072Sbapt/* modification, are permitted provided that the following conditions */ 17228072Sbapt/* are met: */ 18228072Sbapt 19228072Sbapt/* 1. Redistributions of source code must retain the above copyright */ 20228072Sbapt/* notice, this list of conditions and the following disclaimer. */ 21228072Sbapt/* 2. Redistributions in binary form must reproduce the above copyright */ 22228072Sbapt/* notice, this list of conditions and the following disclaimer in the */ 23228072Sbapt/* documentation and/or other materials provided with the distribution. */ 24228072Sbapt 25228072Sbapt/* Neither the name of the University nor the names of its contributors */ 26228072Sbapt/* may be used to endorse or promote products derived from this software */ 27228072Sbapt/* without specific prior written permission. */ 28228072Sbapt 29228072Sbapt/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 30228072Sbapt/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 31228072Sbapt/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 32228072Sbapt/* PURPOSE. */ 33228072Sbapt 34228072Sbapt 35228072Sbapt#include "flexdef.h" 36228072Sbapt 37228072Sbapt/* ccl2ecl - convert character classes to set of equivalence classes */ 38228072Sbapt 39228072Sbaptvoid ccl2ecl () 40228072Sbapt{ 41228072Sbapt int i, ich, newlen, cclp, ccls, cclmec; 42228072Sbapt 43228072Sbapt for (i = 1; i <= lastccl; ++i) { 44228072Sbapt /* We loop through each character class, and for each character 45228072Sbapt * in the class, add the character's equivalence class to the 46228072Sbapt * new "character" class we are creating. Thus when we are all 47228072Sbapt * done, character classes will really consist of collections 48228072Sbapt * of equivalence classes 49228072Sbapt */ 50228072Sbapt 51228072Sbapt newlen = 0; 52228072Sbapt cclp = cclmap[i]; 53228072Sbapt 54228072Sbapt for (ccls = 0; ccls < ccllen[i]; ++ccls) { 55228072Sbapt ich = ccltbl[cclp + ccls]; 56228072Sbapt cclmec = ecgroup[ich]; 57228072Sbapt 58228072Sbapt if (cclmec > 0) { 59228072Sbapt ccltbl[cclp + newlen] = cclmec; 60228072Sbapt ++newlen; 61228072Sbapt } 62228072Sbapt } 63228072Sbapt 64228072Sbapt ccllen[i] = newlen; 65228072Sbapt } 66228072Sbapt} 67228072Sbapt 68228072Sbapt 69228072Sbapt/* cre8ecs - associate equivalence class numbers with class members 70228072Sbapt * 71228072Sbapt * fwd is the forward linked-list of equivalence class members. bck 72228072Sbapt * is the backward linked-list, and num is the number of class members. 73228072Sbapt * 74228072Sbapt * Returned is the number of classes. 75228072Sbapt */ 76228072Sbapt 77228072Sbaptint cre8ecs (fwd, bck, num) 78228072Sbapt int fwd[], bck[], num; 79228072Sbapt{ 80228072Sbapt int i, j, numcl; 81228072Sbapt 82228072Sbapt numcl = 0; 83228072Sbapt 84228072Sbapt /* Create equivalence class numbers. From now on, ABS( bck(x) ) 85228072Sbapt * is the equivalence class number for object x. If bck(x) 86228072Sbapt * is positive, then x is the representative of its equivalence 87228072Sbapt * class. 88228072Sbapt */ 89228072Sbapt for (i = 1; i <= num; ++i) 90228072Sbapt if (bck[i] == NIL) { 91228072Sbapt bck[i] = ++numcl; 92228072Sbapt for (j = fwd[i]; j != NIL; j = fwd[j]) 93228072Sbapt bck[j] = -numcl; 94228072Sbapt } 95228072Sbapt 96228072Sbapt return numcl; 97228072Sbapt} 98228072Sbapt 99228072Sbapt 100228072Sbapt/* mkeccl - update equivalence classes based on character class xtions 101228072Sbapt * 102228072Sbapt * synopsis 103228072Sbapt * Char ccls[]; 104228072Sbapt * int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping; 105228072Sbapt * void mkeccl( Char ccls[], int lenccl, int fwd[llsiz], int bck[llsiz], 106228072Sbapt * int llsiz, int NUL_mapping ); 107228072Sbapt * 108228072Sbapt * ccls contains the elements of the character class, lenccl is the 109228072Sbapt * number of elements in the ccl, fwd is the forward link-list of equivalent 110228072Sbapt * characters, bck is the backward link-list, and llsiz size of the link-list. 111228072Sbapt * 112228072Sbapt * NUL_mapping is the value which NUL (0) should be mapped to. 113228072Sbapt */ 114228072Sbapt 115228072Sbaptvoid mkeccl (ccls, lenccl, fwd, bck, llsiz, NUL_mapping) 116228072Sbapt Char ccls[]; 117228072Sbapt int lenccl, fwd[], bck[], llsiz, NUL_mapping; 118228072Sbapt{ 119228072Sbapt int cclp, oldec, newec; 120228072Sbapt int cclm, i, j; 121228072Sbapt static unsigned char cclflags[CSIZE]; /* initialized to all '\0' */ 122228072Sbapt 123228072Sbapt /* Note that it doesn't matter whether or not the character class is 124228072Sbapt * negated. The same results will be obtained in either case. 125228072Sbapt */ 126228072Sbapt 127228072Sbapt cclp = 0; 128228072Sbapt 129228072Sbapt while (cclp < lenccl) { 130228072Sbapt cclm = ccls[cclp]; 131228072Sbapt 132228072Sbapt if (NUL_mapping && cclm == 0) 133228072Sbapt cclm = NUL_mapping; 134228072Sbapt 135228072Sbapt oldec = bck[cclm]; 136228072Sbapt newec = cclm; 137228072Sbapt 138228072Sbapt j = cclp + 1; 139228072Sbapt 140228072Sbapt for (i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i]) { /* look for the symbol in the character class */ 141228072Sbapt for (; j < lenccl; ++j) { 142250874Sjkim int ccl_char; 143228072Sbapt 144228072Sbapt if (NUL_mapping && ccls[j] == 0) 145228072Sbapt ccl_char = NUL_mapping; 146228072Sbapt else 147228072Sbapt ccl_char = ccls[j]; 148228072Sbapt 149228072Sbapt if (ccl_char > i) 150228072Sbapt break; 151228072Sbapt 152228072Sbapt if (ccl_char == i && !cclflags[j]) { 153228072Sbapt /* We found an old companion of cclm 154228072Sbapt * in the ccl. Link it into the new 155228072Sbapt * equivalence class and flag it as 156228072Sbapt * having been processed. 157228072Sbapt */ 158228072Sbapt 159228072Sbapt bck[i] = newec; 160228072Sbapt fwd[newec] = i; 161228072Sbapt newec = i; 162228072Sbapt /* Set flag so we don't reprocess. */ 163228072Sbapt cclflags[j] = 1; 164228072Sbapt 165228072Sbapt /* Get next equivalence class member. */ 166228072Sbapt /* continue 2 */ 167228072Sbapt goto next_pt; 168228072Sbapt } 169228072Sbapt } 170228072Sbapt 171228072Sbapt /* Symbol isn't in character class. Put it in the old 172228072Sbapt * equivalence class. 173228072Sbapt */ 174228072Sbapt 175228072Sbapt bck[i] = oldec; 176228072Sbapt 177228072Sbapt if (oldec != NIL) 178228072Sbapt fwd[oldec] = i; 179228072Sbapt 180228072Sbapt oldec = i; 181228072Sbapt 182228072Sbapt next_pt:; 183228072Sbapt } 184228072Sbapt 185228072Sbapt if (bck[cclm] != NIL || oldec != bck[cclm]) { 186228072Sbapt bck[cclm] = NIL; 187228072Sbapt fwd[oldec] = NIL; 188228072Sbapt } 189228072Sbapt 190228072Sbapt fwd[newec] = NIL; 191228072Sbapt 192228072Sbapt /* Find next ccl member to process. */ 193228072Sbapt 194228072Sbapt for (++cclp; cclflags[cclp] && cclp < lenccl; ++cclp) { 195228072Sbapt /* Reset "doesn't need processing" flag. */ 196228072Sbapt cclflags[cclp] = 0; 197228072Sbapt } 198228072Sbapt } 199228072Sbapt} 200228072Sbapt 201228072Sbapt 202228072Sbapt/* mkechar - create equivalence class for single character */ 203228072Sbapt 204228072Sbaptvoid mkechar (tch, fwd, bck) 205228072Sbapt int tch, fwd[], bck[]; 206228072Sbapt{ 207228072Sbapt /* If until now the character has been a proper subset of 208228072Sbapt * an equivalence class, break it away to create a new ec 209228072Sbapt */ 210228072Sbapt 211228072Sbapt if (fwd[tch] != NIL) 212228072Sbapt bck[fwd[tch]] = bck[tch]; 213228072Sbapt 214228072Sbapt if (bck[tch] != NIL) 215228072Sbapt fwd[bck[tch]] = fwd[tch]; 216228072Sbapt 217228072Sbapt fwd[tch] = NIL; 218228072Sbapt bck[tch] = NIL; 219228072Sbapt} 220