ccl.c revision 250156
1/* ccl - routines for character classes */ 2 3/* Copyright (c) 1990 The Regents of the University of California. */ 4/* All rights reserved. */ 5 6/* This code is derived from software contributed to Berkeley by */ 7/* Vern Paxson. */ 8 9/* The United States Government has rights in this work pursuant */ 10/* to contract no. DE-AC03-76SF00098 between the United States */ 11 /* Department of Energy and the University of California. */ 12 13/* This file is part of flex. */ 14 15/* Redistribution and use in source and binary forms, with or without */ 16/* modification, are permitted provided that the following conditions */ 17/* are met: */ 18 19/* 1. Redistributions of source code must retain the above copyright */ 20/* notice, this list of conditions and the following disclaimer. */ 21/* 2. Redistributions in binary form must reproduce the above copyright */ 22/* notice, this list of conditions and the following disclaimer in the */ 23/* documentation and/or other materials provided with the distribution. */ 24 25/* Neither the name of the University nor the names of its contributors */ 26/* may be used to endorse or promote products derived from this software */ 27/* without specific prior written permission. */ 28 29/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 30/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 31/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 32/* PURPOSE. */ 33 34#include "flexdef.h" 35 36/* return true if the chr is in the ccl. Takes negation into account. */ 37static bool 38ccl_contains (const int cclp, const int ch) 39{ 40 int ind, len, i; 41 42 len = ccllen[cclp]; 43 ind = cclmap[cclp]; 44 45 for (i = 0; i < len; ++i) 46 if (ccltbl[ind + i] == ch) 47 return !cclng[cclp]; 48 49 return cclng[cclp]; 50} 51 52 53/* ccladd - add a single character to a ccl */ 54 55void ccladd (cclp, ch) 56 int cclp; 57 int ch; 58{ 59 int ind, len, newpos, i; 60 61 check_char (ch); 62 63 len = ccllen[cclp]; 64 ind = cclmap[cclp]; 65 66 /* check to see if the character is already in the ccl */ 67 68 for (i = 0; i < len; ++i) 69 if (ccltbl[ind + i] == ch) 70 return; 71 72 /* mark newlines */ 73 if (ch == nlch) 74 ccl_has_nl[cclp] = true; 75 76 newpos = ind + len; 77 78 if (newpos >= current_max_ccl_tbl_size) { 79 current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT; 80 81 ++num_reallocs; 82 83 ccltbl = reallocate_Character_array (ccltbl, 84 current_max_ccl_tbl_size); 85 } 86 87 ccllen[cclp] = len + 1; 88 ccltbl[newpos] = ch; 89} 90 91/* dump_cclp - same thing as list_character_set, but for cclps. */ 92 93static void dump_cclp (FILE* file, int cclp) 94{ 95 register int i; 96 97 putc ('[', file); 98 99 for (i = 0; i < csize; ++i) { 100 if (ccl_contains(cclp, i)){ 101 register int start_char = i; 102 103 putc (' ', file); 104 105 fputs (readable_form (i), file); 106 107 while (++i < csize && ccl_contains(cclp,i)) ; 108 109 if (i - 1 > start_char) 110 /* this was a run */ 111 fprintf (file, "-%s", 112 readable_form (i - 1)); 113 114 putc (' ', file); 115 } 116 } 117 118 putc (']', file); 119} 120 121 122 123/* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */ 124int 125ccl_set_diff (int a, int b) 126{ 127 int d, ch; 128 129 /* create new class */ 130 d = cclinit(); 131 132 /* In order to handle negation, we spin through all possible chars, 133 * addding each char in a that is not in b. 134 * (This could be O(n^2), but n is small and bounded.) 135 */ 136 for ( ch = 0; ch < csize; ++ch ) 137 if (ccl_contains (a, ch) && !ccl_contains(b, ch)) 138 ccladd (d, ch); 139 140 /* debug */ 141 if (0){ 142 fprintf(stderr, "ccl_set_diff ("); 143 fprintf(stderr, "\n "); 144 dump_cclp (stderr, a); 145 fprintf(stderr, "\n "); 146 dump_cclp (stderr, b); 147 fprintf(stderr, "\n "); 148 dump_cclp (stderr, d); 149 fprintf(stderr, "\n)\n"); 150 } 151 return d; 152} 153 154/* ccl_set_union - create a new ccl as the set union of the two given ccls. */ 155int 156ccl_set_union (int a, int b) 157{ 158 int d, i; 159 160 /* create new class */ 161 d = cclinit(); 162 163 /* Add all of a */ 164 for (i = 0; i < ccllen[a]; ++i) 165 ccladd (d, ccltbl[cclmap[a] + i]); 166 167 /* Add all of b */ 168 for (i = 0; i < ccllen[b]; ++i) 169 ccladd (d, ccltbl[cclmap[b] + i]); 170 171 /* debug */ 172 if (0){ 173 fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d); 174 fprintf(stderr, "\n "); 175 dump_cclp (stderr, a); 176 fprintf(stderr, "\n "); 177 dump_cclp (stderr, b); 178 fprintf(stderr, "\n "); 179 dump_cclp (stderr, d); 180 fprintf(stderr, "\n)\n"); 181 } 182 return d; 183} 184 185 186/* cclinit - return an empty ccl */ 187 188int cclinit () 189{ 190 if (++lastccl >= current_maxccls) { 191 current_maxccls += MAX_CCLS_INCREMENT; 192 193 ++num_reallocs; 194 195 cclmap = 196 reallocate_integer_array (cclmap, current_maxccls); 197 ccllen = 198 reallocate_integer_array (ccllen, current_maxccls); 199 cclng = reallocate_integer_array (cclng, current_maxccls); 200 ccl_has_nl = 201 reallocate_bool_array (ccl_has_nl, 202 current_maxccls); 203 } 204 205 if (lastccl == 1) 206 /* we're making the first ccl */ 207 cclmap[lastccl] = 0; 208 209 else 210 /* The new pointer is just past the end of the last ccl. 211 * Since the cclmap points to the \first/ character of a 212 * ccl, adding the length of the ccl to the cclmap pointer 213 * will produce a cursor to the first free space. 214 */ 215 cclmap[lastccl] = 216 cclmap[lastccl - 1] + ccllen[lastccl - 1]; 217 218 ccllen[lastccl] = 0; 219 cclng[lastccl] = 0; /* ccl's start out life un-negated */ 220 ccl_has_nl[lastccl] = false; 221 222 return lastccl; 223} 224 225 226/* cclnegate - negate the given ccl */ 227 228void cclnegate (cclp) 229 int cclp; 230{ 231 cclng[cclp] = 1; 232 ccl_has_nl[cclp] = !ccl_has_nl[cclp]; 233} 234 235 236/* list_character_set - list the members of a set of characters in CCL form 237 * 238 * Writes to the given file a character-class representation of those 239 * characters present in the given CCL. A character is present if it 240 * has a non-zero value in the cset array. 241 */ 242 243void list_character_set (file, cset) 244 FILE *file; 245 int cset[]; 246{ 247 register int i; 248 249 putc ('[', file); 250 251 for (i = 0; i < csize; ++i) { 252 if (cset[i]) { 253 register int start_char = i; 254 255 putc (' ', file); 256 257 fputs (readable_form (i), file); 258 259 while (++i < csize && cset[i]) ; 260 261 if (i - 1 > start_char) 262 /* this was a run */ 263 fprintf (file, "-%s", 264 readable_form (i - 1)); 265 266 putc (' ', file); 267 } 268 } 269 270 putc (']', file); 271} 272 273/** Determines if the range [c1-c2] is unambiguous in a case-insensitive 274 * scanner. Specifically, if a lowercase or uppercase character, x, is in the 275 * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also 276 * be in the range. If not, then this range is ambiguous, and the function 277 * returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that 278 * [a-z] will be labeled ambiguous because it does not include [A-Z]. 279 * 280 * @param c1 the lower end of the range 281 * @param c2 the upper end of the range 282 * @return true if [c1-c2] is not ambiguous for a caseless scanner. 283 */ 284bool range_covers_case (int c1, int c2) 285{ 286 int i, o; 287 288 for (i = c1; i <= c2; i++) { 289 if (has_case (i)) { 290 o = reverse_case (i); 291 if (o < c1 || c2 < o) 292 return false; 293 } 294 } 295 return true; 296} 297 298/** Reverse the case of a character, if possible. 299 * @return c if case-reversal does not apply. 300 */ 301int reverse_case (int c) 302{ 303 return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c); 304} 305 306/** Return true if c is uppercase or lowercase. */ 307bool has_case (int c) 308{ 309 return (isupper (c) || islower (c)) ? true : false; 310} 311