1253592Serwin/* 2253592Serwin * Copyright (C) 2013 Internet Systems Consortium, Inc. ("ISC") 3253592Serwin * 4253592Serwin * Permission to use, copy, modify, and/or distribute this software for any 5253592Serwin * purpose with or without fee is hereby granted, provided that the above 6253592Serwin * copyright notice and this permission notice appear in all copies. 7253592Serwin * 8253592Serwin * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 9253592Serwin * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 10253592Serwin * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 11253592Serwin * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 12253592Serwin * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 13253592Serwin * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 14253592Serwin * PERFORMANCE OF THIS SOFTWARE. 15253592Serwin */ 16253592Serwin 17253592Serwin#include <config.h> 18253592Serwin 19253592Serwin#include <isc/file.h> 20253592Serwin#include <isc/regex.h> 21253592Serwin#include <isc/string.h> 22253592Serwin 23253592Serwin#if VALREGEX_REPORT_REASON 24253592Serwin#define FAIL(x) do { reason = (x); goto error; } while(0) 25253592Serwin#else 26253592Serwin#define FAIL(x) goto error 27253592Serwin#endif 28253592Serwin 29253592Serwin/* 30253592Serwin * Validate the regular expression 'C' locale. 31253592Serwin */ 32253592Serwinint 33253592Serwinisc_regex_validate(const char *c) { 34253592Serwin enum { 35253592Serwin none, parse_bracket, parse_bound, 36253592Serwin parse_ce, parse_ec, parse_cc 37253592Serwin } state = none; 38253592Serwin /* Well known character classes. */ 39253592Serwin const char *cc[] = { 40253592Serwin ":alnum:", ":digit:", ":punct:", ":alpha:", ":graph:", 41253592Serwin ":space:", ":blank:", ":lower:", ":upper:", ":cntrl:", 42253592Serwin ":print:", ":xdigit:" 43253592Serwin }; 44253592Serwin isc_boolean_t seen_comma = ISC_FALSE; 45253592Serwin isc_boolean_t seen_high = ISC_FALSE; 46253592Serwin isc_boolean_t seen_char = ISC_FALSE; 47253592Serwin isc_boolean_t seen_ec = ISC_FALSE; 48253592Serwin isc_boolean_t seen_ce = ISC_FALSE; 49253592Serwin isc_boolean_t have_atom = ISC_FALSE; 50253592Serwin int group = 0; 51253592Serwin int range = 0; 52253592Serwin int sub = 0; 53253592Serwin isc_boolean_t empty_ok = ISC_FALSE; 54253592Serwin isc_boolean_t neg = ISC_FALSE; 55253592Serwin isc_boolean_t was_multiple = ISC_FALSE; 56253592Serwin unsigned int low = 0; 57253592Serwin unsigned int high = 0; 58253592Serwin const char *ccname = NULL; 59253592Serwin int range_start = 0; 60253592Serwin#if VALREGEX_REPORT_REASON 61253592Serwin const char *reason = ""; 62253592Serwin#endif 63253592Serwin 64253592Serwin if (c == NULL || *c == 0) 65253592Serwin FAIL("empty string"); 66253592Serwin 67253592Serwin while (c != NULL && *c != 0) { 68253592Serwin switch (state) { 69253592Serwin case none: 70253592Serwin switch (*c) { 71253592Serwin case '\\': /* make literal */ 72253592Serwin ++c; 73253592Serwin switch (*c) { 74253592Serwin case '1': case '2': case '3': 75253592Serwin case '4': case '5': case '6': 76253592Serwin case '7': case '8': case '9': 77253592Serwin if ((*c - '0') > sub) 78253592Serwin FAIL("bad back reference"); 79253592Serwin have_atom = ISC_TRUE; 80253592Serwin was_multiple = ISC_FALSE; 81253592Serwin break; 82253592Serwin case 0: 83253592Serwin FAIL("escaped end-of-string"); 84253592Serwin default: 85253592Serwin goto literal; 86253592Serwin } 87253592Serwin ++c; 88253592Serwin break; 89253592Serwin case '[': /* bracket start */ 90253592Serwin ++c; 91253592Serwin neg = ISC_FALSE; 92253592Serwin was_multiple = ISC_FALSE; 93253592Serwin seen_char = ISC_FALSE; 94253592Serwin state = parse_bracket; 95253592Serwin break; 96253592Serwin case '{': /* bound start */ 97253592Serwin switch (c[1]) { 98253592Serwin case '0': case '1': case '2': case '3': 99253592Serwin case '4': case '5': case '6': case '7': 100253592Serwin case '8': case '9': 101253592Serwin if (!have_atom) 102253592Serwin FAIL("no atom"); 103253592Serwin if (was_multiple) 104253592Serwin FAIL("was multiple"); 105253592Serwin seen_comma = ISC_FALSE; 106253592Serwin seen_high = ISC_FALSE; 107253592Serwin low = high = 0; 108253592Serwin state = parse_bound; 109253592Serwin break; 110253592Serwin default: 111253592Serwin goto literal; 112253592Serwin } 113253592Serwin ++c; 114253592Serwin have_atom = ISC_TRUE; 115253592Serwin was_multiple = ISC_TRUE; 116253592Serwin break; 117253592Serwin case '}': 118253592Serwin goto literal; 119253592Serwin case '(': /* group start */ 120253592Serwin have_atom = ISC_FALSE; 121253592Serwin was_multiple = ISC_FALSE; 122253592Serwin empty_ok = ISC_TRUE; 123253592Serwin ++group; 124253592Serwin ++sub; 125253592Serwin ++c; 126253592Serwin break; 127253592Serwin case ')': /* group end */ 128253592Serwin if (group && !have_atom && !empty_ok) 129253592Serwin FAIL("empty alternative"); 130253592Serwin have_atom = ISC_TRUE; 131253592Serwin was_multiple = ISC_FALSE; 132253592Serwin if (group != 0) 133253592Serwin --group; 134253592Serwin ++c; 135253592Serwin break; 136253592Serwin case '|': /* alternative seperator */ 137253592Serwin if (!have_atom) 138253592Serwin FAIL("no atom"); 139253592Serwin have_atom = ISC_FALSE; 140253592Serwin empty_ok = ISC_FALSE; 141253592Serwin was_multiple = ISC_FALSE; 142253592Serwin ++c; 143253592Serwin break; 144253592Serwin case '^': 145253592Serwin case '$': 146253592Serwin have_atom = ISC_TRUE; 147253592Serwin was_multiple = ISC_TRUE; 148253592Serwin ++c; 149253592Serwin break; 150253592Serwin case '+': 151253592Serwin case '*': 152253592Serwin case '?': 153253592Serwin if (was_multiple) 154253592Serwin FAIL("was multiple"); 155253592Serwin if (!have_atom) 156253592Serwin FAIL("no atom"); 157253592Serwin have_atom = ISC_TRUE; 158253592Serwin was_multiple = ISC_TRUE; 159253592Serwin ++c; 160253592Serwin break; 161253592Serwin case '.': 162253592Serwin default: 163253592Serwin literal: 164253592Serwin have_atom = ISC_TRUE; 165253592Serwin was_multiple = ISC_FALSE; 166253592Serwin ++c; 167253592Serwin break; 168253592Serwin } 169253592Serwin break; 170253592Serwin case parse_bound: 171253592Serwin switch (*c) { 172253592Serwin case '0': case '1': case '2': case '3': case '4': 173253592Serwin case '5': case '6': case '7': case '8': case '9': 174253592Serwin if (!seen_comma) { 175253592Serwin low = low * 10 + *c - '0'; 176253592Serwin if (low > 255) 177253592Serwin FAIL("lower bound too big"); 178253592Serwin } else { 179253592Serwin seen_high = ISC_TRUE; 180253592Serwin high = high * 10 + *c - '0'; 181253592Serwin if (high > 255) 182253592Serwin FAIL("upper bound too big"); 183253592Serwin } 184253592Serwin ++c; 185253592Serwin break; 186253592Serwin case ',': 187253592Serwin if (seen_comma) 188253592Serwin FAIL("multiple commas"); 189253592Serwin seen_comma = ISC_TRUE; 190253592Serwin ++c; 191253592Serwin break; 192253592Serwin default: 193253592Serwin case '{': 194253592Serwin FAIL("non digit/comma"); 195253592Serwin case '}': 196253592Serwin if (seen_high && low > high) 197253592Serwin FAIL("bad parse bound"); 198253592Serwin seen_comma = ISC_FALSE; 199253592Serwin state = none; 200253592Serwin ++c; 201253592Serwin break; 202253592Serwin } 203253592Serwin break; 204253592Serwin case parse_bracket: 205253592Serwin switch (*c) { 206253592Serwin case '^': 207253592Serwin if (seen_char || neg) goto inside; 208253592Serwin neg = ISC_TRUE; 209253592Serwin ++c; 210253592Serwin break; 211253592Serwin case '-': 212253592Serwin if (range == 2) goto inside; 213253592Serwin if (!seen_char) goto inside; 214253592Serwin if (range == 1) 215253592Serwin FAIL("bad range"); 216253592Serwin range = 2; 217253592Serwin ++c; 218253592Serwin break; 219253592Serwin case '[': 220253592Serwin ++c; 221253592Serwin switch (*c) { 222253592Serwin case '.': /* collating element */ 223253592Serwin if (range) --range; 224253592Serwin ++c; 225253592Serwin state = parse_ce; 226253592Serwin seen_ce = ISC_FALSE; 227253592Serwin break; 228253592Serwin case '=': /* equivalence class */ 229253592Serwin if (range == 2) 230253592Serwin FAIL("equivalence class in range"); 231253592Serwin ++c; 232253592Serwin state = parse_ec; 233253592Serwin seen_ec = ISC_FALSE; 234253592Serwin break; 235253592Serwin case ':': /* character class */ 236253592Serwin if (range == 2) 237253592Serwin FAIL("character class in range"); 238253592Serwin ccname = c; 239253592Serwin ++c; 240253592Serwin state = parse_cc; 241253592Serwin break; 242253592Serwin } 243253592Serwin seen_char = ISC_TRUE; 244253592Serwin break; 245253592Serwin case ']': 246253592Serwin if (!c[1] && !seen_char) 247253592Serwin FAIL("unfinished brace"); 248253592Serwin if (!seen_char) 249253592Serwin goto inside; 250253592Serwin ++c; 251253592Serwin range = 0; 252253592Serwin have_atom = ISC_TRUE; 253253592Serwin state = none; 254253592Serwin break; 255253592Serwin default: 256253592Serwin inside: 257253592Serwin seen_char = ISC_TRUE; 258253592Serwin if (range == 2 && *c < range_start) 259253592Serwin FAIL("out of order range"); 260253592Serwin if (range != 0) 261253592Serwin --range; 262253592Serwin range_start = *c; 263253592Serwin ++c; 264253592Serwin break; 265253592Serwin }; 266253592Serwin break; 267253592Serwin case parse_ce: 268253592Serwin switch (*c) { 269253592Serwin case '.': 270253592Serwin ++c; 271253592Serwin switch (*c) { 272253592Serwin case ']': 273253592Serwin if (!seen_ce) 274253592Serwin FAIL("empty ce"); 275253592Serwin ++c; 276253592Serwin state = parse_bracket; 277253592Serwin break; 278253592Serwin default: 279253592Serwin if (seen_ce) 280253592Serwin range_start = 256; 281253592Serwin else 282253592Serwin range_start = '.'; 283253592Serwin seen_ce = ISC_TRUE; 284253592Serwin break; 285253592Serwin } 286253592Serwin break; 287253592Serwin default: 288253592Serwin if (seen_ce) 289253592Serwin range_start = 256; 290253592Serwin else 291253592Serwin range_start = *c; 292253592Serwin seen_ce = ISC_TRUE; 293253592Serwin ++c; 294253592Serwin break; 295253592Serwin } 296253592Serwin break; 297253592Serwin case parse_ec: 298253592Serwin switch (*c) { 299253592Serwin case '=': 300253592Serwin ++c; 301253592Serwin switch (*c) { 302253592Serwin case ']': 303253592Serwin if (!seen_ec) 304253592Serwin FAIL("no ec"); 305253592Serwin ++c; 306253592Serwin state = parse_bracket; 307253592Serwin break; 308253592Serwin default: 309253592Serwin seen_ec = ISC_TRUE; 310253592Serwin break; 311253592Serwin } 312253592Serwin break; 313253592Serwin default: 314253592Serwin seen_ec = ISC_TRUE; 315253592Serwin ++c; 316253592Serwin break; 317253592Serwin } 318253592Serwin break; 319253592Serwin case parse_cc: 320253592Serwin switch (*c) { 321253592Serwin case ':': 322253592Serwin ++c; 323253592Serwin switch (*c) { 324253592Serwin case ']': { 325253592Serwin unsigned int i; 326253592Serwin isc_boolean_t found = ISC_FALSE; 327253592Serwin for (i = 0; 328253592Serwin i < sizeof(cc)/sizeof(*cc); 329253592Serwin i++) 330253592Serwin { 331253592Serwin unsigned int len; 332253592Serwin len = strlen(cc[i]); 333253592Serwin if (len != 334253592Serwin (unsigned int)(c - ccname)) 335253592Serwin continue; 336253592Serwin if (strncmp(cc[i], ccname, len)) 337253592Serwin continue; 338253592Serwin found = ISC_TRUE; 339253592Serwin } 340253592Serwin if (!found) 341253592Serwin FAIL("unknown cc"); 342253592Serwin ++c; 343253592Serwin state = parse_bracket; 344253592Serwin break; 345253592Serwin } 346253592Serwin default: 347253592Serwin break; 348253592Serwin } 349253592Serwin break; 350253592Serwin default: 351253592Serwin ++c; 352253592Serwin break; 353253592Serwin } 354253592Serwin break; 355253592Serwin } 356253592Serwin } 357253592Serwin if (group != 0) 358253592Serwin FAIL("group open"); 359253592Serwin if (state != none) 360253592Serwin FAIL("incomplete"); 361253592Serwin if (!have_atom) 362253592Serwin FAIL("no atom"); 363253592Serwin return (sub); 364253592Serwin 365253592Serwin error: 366253592Serwin#if VALREGEX_REPORT_REASON 367253592Serwin fprintf(stderr, "%s\n", reason); 368253592Serwin#endif 369253592Serwin return (-1); 370253592Serwin} 371