1198090Srdivacky/*- 2198090Srdivacky * This code is derived from OpenBSD's libc/regex, original license follows: 3198090Srdivacky * 4198090Srdivacky * Copyright (c) 1992, 1993, 1994 Henry Spencer. 5198090Srdivacky * Copyright (c) 1992, 1993, 1994 6198090Srdivacky * The Regents of the University of California. All rights reserved. 7198090Srdivacky * 8198090Srdivacky * This code is derived from software contributed to Berkeley by 9198090Srdivacky * Henry Spencer. 10198090Srdivacky * 11198090Srdivacky * Redistribution and use in source and binary forms, with or without 12198090Srdivacky * modification, are permitted provided that the following conditions 13198090Srdivacky * are met: 14198090Srdivacky * 1. Redistributions of source code must retain the above copyright 15198090Srdivacky * notice, this list of conditions and the following disclaimer. 16198090Srdivacky * 2. Redistributions in binary form must reproduce the above copyright 17198090Srdivacky * notice, this list of conditions and the following disclaimer in the 18198090Srdivacky * documentation and/or other materials provided with the distribution. 19198090Srdivacky * 3. Neither the name of the University nor the names of its contributors 20198090Srdivacky * may be used to endorse or promote products derived from this software 21198090Srdivacky * without specific prior written permission. 22198090Srdivacky * 23198090Srdivacky * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24198090Srdivacky * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25198090Srdivacky * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26198090Srdivacky * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27198090Srdivacky * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28198090Srdivacky * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29198090Srdivacky * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30198090Srdivacky * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31198090Srdivacky * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32198090Srdivacky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33198090Srdivacky * SUCH DAMAGE. 34198090Srdivacky * 35198090Srdivacky * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 36198090Srdivacky */ 37198090Srdivacky 38198090Srdivacky#include <sys/types.h> 39198090Srdivacky#include <stdio.h> 40198090Srdivacky#include <string.h> 41198090Srdivacky#include <ctype.h> 42198090Srdivacky#include <limits.h> 43198090Srdivacky#include <stdlib.h> 44198090Srdivacky#include "regex_impl.h" 45198090Srdivacky 46198090Srdivacky#include "regutils.h" 47198090Srdivacky#include "regex2.h" 48198090Srdivacky 49198090Srdivacky#include "regcclass.h" 50198090Srdivacky#include "regcname.h" 51198090Srdivacky 52198090Srdivacky/* 53198090Srdivacky * parse structure, passed up and down to avoid global variables and 54198090Srdivacky * other clumsinesses 55198090Srdivacky */ 56198090Srdivackystruct parse { 57198090Srdivacky char *next; /* next character in RE */ 58198090Srdivacky char *end; /* end of string (-> NUL normally) */ 59198090Srdivacky int error; /* has an error been seen? */ 60198090Srdivacky sop *strip; /* malloced strip */ 61198090Srdivacky sopno ssize; /* malloced strip size (allocated) */ 62198090Srdivacky sopno slen; /* malloced strip length (used) */ 63198090Srdivacky int ncsalloc; /* number of csets allocated */ 64198090Srdivacky struct re_guts *g; 65198090Srdivacky# define NPAREN 10 /* we need to remember () 1-9 for back refs */ 66198090Srdivacky sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ 67198090Srdivacky sopno pend[NPAREN]; /* -> ) ([0] unused) */ 68198090Srdivacky}; 69198090Srdivacky 70198090Srdivackystatic void p_ere(struct parse *, int); 71198090Srdivackystatic void p_ere_exp(struct parse *); 72198090Srdivackystatic void p_str(struct parse *); 73198090Srdivackystatic void p_bre(struct parse *, int, int); 74198090Srdivackystatic int p_simp_re(struct parse *, int); 75198090Srdivackystatic int p_count(struct parse *); 76198090Srdivackystatic void p_bracket(struct parse *); 77198090Srdivackystatic void p_b_term(struct parse *, cset *); 78198090Srdivackystatic void p_b_cclass(struct parse *, cset *); 79198090Srdivackystatic void p_b_eclass(struct parse *, cset *); 80198090Srdivackystatic char p_b_symbol(struct parse *); 81198090Srdivackystatic char p_b_coll_elem(struct parse *, int); 82198090Srdivackystatic char othercase(int); 83198090Srdivackystatic void bothcases(struct parse *, int); 84198090Srdivackystatic void ordinary(struct parse *, int); 85198090Srdivackystatic void nonnewline(struct parse *); 86198090Srdivackystatic void repeat(struct parse *, sopno, int, int); 87198090Srdivackystatic int seterr(struct parse *, int); 88198090Srdivackystatic cset *allocset(struct parse *); 89198090Srdivackystatic void freeset(struct parse *, cset *); 90198090Srdivackystatic int freezeset(struct parse *, cset *); 91198090Srdivackystatic int firstch(struct parse *, cset *); 92198090Srdivackystatic int nch(struct parse *, cset *); 93198090Srdivackystatic void mcadd(struct parse *, cset *, const char *); 94198090Srdivackystatic void mcinvert(struct parse *, cset *); 95198090Srdivackystatic void mccase(struct parse *, cset *); 96198090Srdivackystatic int isinsets(struct re_guts *, int); 97198090Srdivackystatic int samesets(struct re_guts *, int, int); 98198090Srdivackystatic void categorize(struct parse *, struct re_guts *); 99198090Srdivackystatic sopno dupl(struct parse *, sopno, sopno); 100198090Srdivackystatic void doemit(struct parse *, sop, size_t); 101198090Srdivackystatic void doinsert(struct parse *, sop, size_t, sopno); 102198090Srdivackystatic void dofwd(struct parse *, sopno, sop); 103198090Srdivackystatic void enlarge(struct parse *, sopno); 104198090Srdivackystatic void stripsnug(struct parse *, struct re_guts *); 105198090Srdivackystatic void findmust(struct parse *, struct re_guts *); 106198090Srdivackystatic sopno pluscount(struct parse *, struct re_guts *); 107198090Srdivacky 108198090Srdivackystatic char nuls[10]; /* place to point scanner in event of error */ 109198090Srdivacky 110198090Srdivacky/* 111198090Srdivacky * macros for use with parse structure 112198090Srdivacky * BEWARE: these know that the parse structure is named `p' !!! 113198090Srdivacky */ 114198090Srdivacky#define PEEK() (*p->next) 115198090Srdivacky#define PEEK2() (*(p->next+1)) 116198090Srdivacky#define MORE() (p->next < p->end) 117198090Srdivacky#define MORE2() (p->next+1 < p->end) 118198090Srdivacky#define SEE(c) (MORE() && PEEK() == (c)) 119198090Srdivacky#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) 120198090Srdivacky#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) 121198090Srdivacky#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) 122198090Srdivacky#define NEXT() (p->next++) 123198090Srdivacky#define NEXT2() (p->next += 2) 124198090Srdivacky#define NEXTn(n) (p->next += (n)) 125198090Srdivacky#define GETNEXT() (*p->next++) 126198090Srdivacky#define SETERROR(e) seterr(p, (e)) 127198090Srdivacky#define REQUIRE(co, e) (void)((co) || SETERROR(e)) 128198090Srdivacky#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) 129198090Srdivacky#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) 130198090Srdivacky#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) 131198090Srdivacky#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) 132198090Srdivacky#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) 133198090Srdivacky#define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) 134198090Srdivacky#define ASTERN(sop, pos) EMIT(sop, HERE()-pos) 135198090Srdivacky#define HERE() (p->slen) 136198090Srdivacky#define THERE() (p->slen - 1) 137198090Srdivacky#define THERETHERE() (p->slen - 2) 138198090Srdivacky#define DROP(n) (p->slen -= (n)) 139198090Srdivacky 140198090Srdivacky#ifdef _POSIX2_RE_DUP_MAX 141198090Srdivacky#define DUPMAX _POSIX2_RE_DUP_MAX 142198090Srdivacky#else 143198090Srdivacky#define DUPMAX 255 144198090Srdivacky#endif 145198090Srdivacky#define INFINITY (DUPMAX + 1) 146198090Srdivacky 147198090Srdivacky#ifndef NDEBUG 148198090Srdivackystatic int never = 0; /* for use in asserts; shuts lint up */ 149198090Srdivacky#else 150198090Srdivacky#define never 0 /* some <assert.h>s have bugs too */ 151198090Srdivacky#endif 152198090Srdivacky 153198090Srdivacky/* 154198090Srdivacky - llvm_regcomp - interface for parser and compilation 155198090Srdivacky */ 156198090Srdivackyint /* 0 success, otherwise REG_something */ 157198090Srdivackyllvm_regcomp(llvm_regex_t *preg, const char *pattern, int cflags) 158198090Srdivacky{ 159198090Srdivacky struct parse pa; 160198090Srdivacky struct re_guts *g; 161198090Srdivacky struct parse *p = &pa; 162198090Srdivacky int i; 163198090Srdivacky size_t len; 164198090Srdivacky#ifdef REDEBUG 165198090Srdivacky# define GOODFLAGS(f) (f) 166198090Srdivacky#else 167198090Srdivacky# define GOODFLAGS(f) ((f)&~REG_DUMP) 168198090Srdivacky#endif 169198090Srdivacky 170198090Srdivacky cflags = GOODFLAGS(cflags); 171198090Srdivacky if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) 172198090Srdivacky return(REG_INVARG); 173198090Srdivacky 174198090Srdivacky if (cflags®_PEND) { 175198090Srdivacky if (preg->re_endp < pattern) 176198090Srdivacky return(REG_INVARG); 177198090Srdivacky len = preg->re_endp - pattern; 178198090Srdivacky } else 179198090Srdivacky len = strlen((const char *)pattern); 180198090Srdivacky 181198090Srdivacky /* do the mallocs early so failure handling is easy */ 182198090Srdivacky g = (struct re_guts *)malloc(sizeof(struct re_guts) + 183198090Srdivacky (NC-1)*sizeof(cat_t)); 184198090Srdivacky if (g == NULL) 185198090Srdivacky return(REG_ESPACE); 186198090Srdivacky p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ 187198090Srdivacky p->strip = (sop *)calloc(p->ssize, sizeof(sop)); 188198090Srdivacky p->slen = 0; 189198090Srdivacky if (p->strip == NULL) { 190198090Srdivacky free((char *)g); 191198090Srdivacky return(REG_ESPACE); 192198090Srdivacky } 193198090Srdivacky 194198090Srdivacky /* set things up */ 195198090Srdivacky p->g = g; 196198090Srdivacky p->next = (char *)pattern; /* convenience; we do not modify it */ 197198090Srdivacky p->end = p->next + len; 198198090Srdivacky p->error = 0; 199198090Srdivacky p->ncsalloc = 0; 200198090Srdivacky for (i = 0; i < NPAREN; i++) { 201198090Srdivacky p->pbegin[i] = 0; 202198090Srdivacky p->pend[i] = 0; 203198090Srdivacky } 204198090Srdivacky g->csetsize = NC; 205198090Srdivacky g->sets = NULL; 206198090Srdivacky g->setbits = NULL; 207198090Srdivacky g->ncsets = 0; 208198090Srdivacky g->cflags = cflags; 209198090Srdivacky g->iflags = 0; 210198090Srdivacky g->nbol = 0; 211198090Srdivacky g->neol = 0; 212198090Srdivacky g->must = NULL; 213198090Srdivacky g->mlen = 0; 214198090Srdivacky g->nsub = 0; 215198090Srdivacky g->ncategories = 1; /* category 0 is "everything else" */ 216198090Srdivacky g->categories = &g->catspace[-(CHAR_MIN)]; 217198090Srdivacky (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); 218198090Srdivacky g->backrefs = 0; 219198090Srdivacky 220198090Srdivacky /* do it */ 221198090Srdivacky EMIT(OEND, 0); 222198090Srdivacky g->firststate = THERE(); 223198090Srdivacky if (cflags®_EXTENDED) 224198090Srdivacky p_ere(p, OUT); 225198090Srdivacky else if (cflags®_NOSPEC) 226198090Srdivacky p_str(p); 227198090Srdivacky else 228198090Srdivacky p_bre(p, OUT, OUT); 229198090Srdivacky EMIT(OEND, 0); 230198090Srdivacky g->laststate = THERE(); 231198090Srdivacky 232198090Srdivacky /* tidy up loose ends and fill things in */ 233198090Srdivacky categorize(p, g); 234198090Srdivacky stripsnug(p, g); 235198090Srdivacky findmust(p, g); 236198090Srdivacky g->nplus = pluscount(p, g); 237198090Srdivacky g->magic = MAGIC2; 238198090Srdivacky preg->re_nsub = g->nsub; 239198090Srdivacky preg->re_g = g; 240198090Srdivacky preg->re_magic = MAGIC1; 241198090Srdivacky#ifndef REDEBUG 242198090Srdivacky /* not debugging, so can't rely on the assert() in llvm_regexec() */ 243198090Srdivacky if (g->iflags®EX_BAD) 244198090Srdivacky SETERROR(REG_ASSERT); 245198090Srdivacky#endif 246198090Srdivacky 247198090Srdivacky /* win or lose, we're done */ 248198090Srdivacky if (p->error != 0) /* lose */ 249198090Srdivacky llvm_regfree(preg); 250198090Srdivacky return(p->error); 251198090Srdivacky} 252198090Srdivacky 253198090Srdivacky/* 254198090Srdivacky - p_ere - ERE parser top level, concatenation and alternation 255198090Srdivacky */ 256198090Srdivackystatic void 257198090Srdivackyp_ere(struct parse *p, int stop) /* character this ERE should end at */ 258198090Srdivacky{ 259198090Srdivacky char c; 260198090Srdivacky sopno prevback = 0; 261198090Srdivacky sopno prevfwd = 0; 262198090Srdivacky sopno conc; 263198090Srdivacky int first = 1; /* is this the first alternative? */ 264198090Srdivacky 265198090Srdivacky for (;;) { 266198090Srdivacky /* do a bunch of concatenated expressions */ 267198090Srdivacky conc = HERE(); 268198090Srdivacky while (MORE() && (c = PEEK()) != '|' && c != stop) 269198090Srdivacky p_ere_exp(p); 270198090Srdivacky REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ 271198090Srdivacky 272198090Srdivacky if (!EAT('|')) 273198090Srdivacky break; /* NOTE BREAK OUT */ 274198090Srdivacky 275198090Srdivacky if (first) { 276198090Srdivacky INSERT(OCH_, conc); /* offset is wrong */ 277198090Srdivacky prevfwd = conc; 278198090Srdivacky prevback = conc; 279198090Srdivacky first = 0; 280198090Srdivacky } 281198090Srdivacky ASTERN(OOR1, prevback); 282198090Srdivacky prevback = THERE(); 283198090Srdivacky AHEAD(prevfwd); /* fix previous offset */ 284198090Srdivacky prevfwd = HERE(); 285198090Srdivacky EMIT(OOR2, 0); /* offset is very wrong */ 286198090Srdivacky } 287198090Srdivacky 288198090Srdivacky if (!first) { /* tail-end fixups */ 289198090Srdivacky AHEAD(prevfwd); 290198090Srdivacky ASTERN(O_CH, prevback); 291198090Srdivacky } 292198090Srdivacky 293198090Srdivacky assert(!MORE() || SEE(stop)); 294198090Srdivacky} 295198090Srdivacky 296198090Srdivacky/* 297198090Srdivacky - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op 298198090Srdivacky */ 299198090Srdivackystatic void 300198090Srdivackyp_ere_exp(struct parse *p) 301198090Srdivacky{ 302198090Srdivacky char c; 303198090Srdivacky sopno pos; 304198090Srdivacky int count; 305198090Srdivacky int count2; 306249423Sdim int backrefnum; 307198090Srdivacky sopno subno; 308198090Srdivacky int wascaret = 0; 309198090Srdivacky 310198090Srdivacky assert(MORE()); /* caller should have ensured this */ 311198090Srdivacky c = GETNEXT(); 312198090Srdivacky 313198090Srdivacky pos = HERE(); 314198090Srdivacky switch (c) { 315198090Srdivacky case '(': 316198090Srdivacky REQUIRE(MORE(), REG_EPAREN); 317198090Srdivacky p->g->nsub++; 318198090Srdivacky subno = p->g->nsub; 319198090Srdivacky if (subno < NPAREN) 320198090Srdivacky p->pbegin[subno] = HERE(); 321198090Srdivacky EMIT(OLPAREN, subno); 322198090Srdivacky if (!SEE(')')) 323198090Srdivacky p_ere(p, ')'); 324198090Srdivacky if (subno < NPAREN) { 325198090Srdivacky p->pend[subno] = HERE(); 326198090Srdivacky assert(p->pend[subno] != 0); 327198090Srdivacky } 328198090Srdivacky EMIT(ORPAREN, subno); 329198090Srdivacky MUSTEAT(')', REG_EPAREN); 330198090Srdivacky break; 331198090Srdivacky#ifndef POSIX_MISTAKE 332198090Srdivacky case ')': /* happens only if no current unmatched ( */ 333198090Srdivacky /* 334198090Srdivacky * You may ask, why the ifndef? Because I didn't notice 335198090Srdivacky * this until slightly too late for 1003.2, and none of the 336198090Srdivacky * other 1003.2 regular-expression reviewers noticed it at 337198090Srdivacky * all. So an unmatched ) is legal POSIX, at least until 338198090Srdivacky * we can get it fixed. 339198090Srdivacky */ 340198090Srdivacky SETERROR(REG_EPAREN); 341198090Srdivacky break; 342198090Srdivacky#endif 343198090Srdivacky case '^': 344198090Srdivacky EMIT(OBOL, 0); 345198090Srdivacky p->g->iflags |= USEBOL; 346198090Srdivacky p->g->nbol++; 347198090Srdivacky wascaret = 1; 348198090Srdivacky break; 349198090Srdivacky case '$': 350198090Srdivacky EMIT(OEOL, 0); 351198090Srdivacky p->g->iflags |= USEEOL; 352198090Srdivacky p->g->neol++; 353198090Srdivacky break; 354198090Srdivacky case '|': 355198090Srdivacky SETERROR(REG_EMPTY); 356198090Srdivacky break; 357198090Srdivacky case '*': 358198090Srdivacky case '+': 359198090Srdivacky case '?': 360198090Srdivacky SETERROR(REG_BADRPT); 361198090Srdivacky break; 362198090Srdivacky case '.': 363198090Srdivacky if (p->g->cflags®_NEWLINE) 364198090Srdivacky nonnewline(p); 365198090Srdivacky else 366198090Srdivacky EMIT(OANY, 0); 367198090Srdivacky break; 368198090Srdivacky case '[': 369198090Srdivacky p_bracket(p); 370198090Srdivacky break; 371198090Srdivacky case '\\': 372198090Srdivacky REQUIRE(MORE(), REG_EESCAPE); 373198090Srdivacky c = GETNEXT(); 374249423Sdim if (c >= '1' && c <= '9') { 375249423Sdim /* \[0-9] is taken to be a back-reference to a previously specified 376249423Sdim * matching group. backrefnum will hold the number. The matching 377249423Sdim * group must exist (i.e. if \4 is found there must have been at 378249423Sdim * least 4 matching groups specified in the pattern previously). 379249423Sdim */ 380249423Sdim backrefnum = c - '0'; 381249423Sdim if (p->pend[backrefnum] == 0) { 382249423Sdim SETERROR(REG_ESUBREG); 383249423Sdim break; 384249423Sdim } 385249423Sdim 386249423Sdim /* Make sure everything checks out and emit the sequence 387249423Sdim * that marks a back-reference to the parse structure. 388249423Sdim */ 389249423Sdim assert(backrefnum <= p->g->nsub); 390249423Sdim EMIT(OBACK_, backrefnum); 391249423Sdim assert(p->pbegin[backrefnum] != 0); 392249423Sdim assert(OP(p->strip[p->pbegin[backrefnum]]) != OLPAREN); 393249423Sdim assert(OP(p->strip[p->pend[backrefnum]]) != ORPAREN); 394249423Sdim (void) dupl(p, p->pbegin[backrefnum]+1, p->pend[backrefnum]); 395249423Sdim EMIT(O_BACK, backrefnum); 396249423Sdim p->g->backrefs = 1; 397249423Sdim } else { 398249423Sdim /* Other chars are simply themselves when escaped with a backslash. 399249423Sdim */ 400249423Sdim ordinary(p, c); 401249423Sdim } 402198090Srdivacky break; 403198090Srdivacky case '{': /* okay as ordinary except if digit follows */ 404198090Srdivacky REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT); 405198090Srdivacky /* FALLTHROUGH */ 406198090Srdivacky default: 407198090Srdivacky ordinary(p, c); 408198090Srdivacky break; 409198090Srdivacky } 410198090Srdivacky 411198090Srdivacky if (!MORE()) 412198090Srdivacky return; 413198090Srdivacky c = PEEK(); 414198090Srdivacky /* we call { a repetition if followed by a digit */ 415198090Srdivacky if (!( c == '*' || c == '+' || c == '?' || 416198090Srdivacky (c == '{' && MORE2() && isdigit((uch)PEEK2())) )) 417198090Srdivacky return; /* no repetition, we're done */ 418198090Srdivacky NEXT(); 419198090Srdivacky 420198090Srdivacky REQUIRE(!wascaret, REG_BADRPT); 421198090Srdivacky switch (c) { 422198090Srdivacky case '*': /* implemented as +? */ 423198090Srdivacky /* this case does not require the (y|) trick, noKLUDGE */ 424198090Srdivacky INSERT(OPLUS_, pos); 425198090Srdivacky ASTERN(O_PLUS, pos); 426198090Srdivacky INSERT(OQUEST_, pos); 427198090Srdivacky ASTERN(O_QUEST, pos); 428198090Srdivacky break; 429198090Srdivacky case '+': 430198090Srdivacky INSERT(OPLUS_, pos); 431198090Srdivacky ASTERN(O_PLUS, pos); 432198090Srdivacky break; 433198090Srdivacky case '?': 434198090Srdivacky /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 435198090Srdivacky INSERT(OCH_, pos); /* offset slightly wrong */ 436198090Srdivacky ASTERN(OOR1, pos); /* this one's right */ 437198090Srdivacky AHEAD(pos); /* fix the OCH_ */ 438198090Srdivacky EMIT(OOR2, 0); /* offset very wrong... */ 439198090Srdivacky AHEAD(THERE()); /* ...so fix it */ 440198090Srdivacky ASTERN(O_CH, THERETHERE()); 441198090Srdivacky break; 442198090Srdivacky case '{': 443198090Srdivacky count = p_count(p); 444198090Srdivacky if (EAT(',')) { 445198090Srdivacky if (isdigit((uch)PEEK())) { 446198090Srdivacky count2 = p_count(p); 447198090Srdivacky REQUIRE(count <= count2, REG_BADBR); 448198090Srdivacky } else /* single number with comma */ 449198090Srdivacky count2 = INFINITY; 450198090Srdivacky } else /* just a single number */ 451198090Srdivacky count2 = count; 452198090Srdivacky repeat(p, pos, count, count2); 453198090Srdivacky if (!EAT('}')) { /* error heuristics */ 454198090Srdivacky while (MORE() && PEEK() != '}') 455198090Srdivacky NEXT(); 456198090Srdivacky REQUIRE(MORE(), REG_EBRACE); 457198090Srdivacky SETERROR(REG_BADBR); 458198090Srdivacky } 459198090Srdivacky break; 460198090Srdivacky } 461198090Srdivacky 462198090Srdivacky if (!MORE()) 463198090Srdivacky return; 464198090Srdivacky c = PEEK(); 465198090Srdivacky if (!( c == '*' || c == '+' || c == '?' || 466198090Srdivacky (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) ) 467198090Srdivacky return; 468198090Srdivacky SETERROR(REG_BADRPT); 469198090Srdivacky} 470198090Srdivacky 471198090Srdivacky/* 472198090Srdivacky - p_str - string (no metacharacters) "parser" 473198090Srdivacky */ 474198090Srdivackystatic void 475198090Srdivackyp_str(struct parse *p) 476198090Srdivacky{ 477198090Srdivacky REQUIRE(MORE(), REG_EMPTY); 478198090Srdivacky while (MORE()) 479198090Srdivacky ordinary(p, GETNEXT()); 480198090Srdivacky} 481198090Srdivacky 482198090Srdivacky/* 483198090Srdivacky - p_bre - BRE parser top level, anchoring and concatenation 484198090Srdivacky * Giving end1 as OUT essentially eliminates the end1/end2 check. 485198090Srdivacky * 486198090Srdivacky * This implementation is a bit of a kludge, in that a trailing $ is first 487198090Srdivacky * taken as an ordinary character and then revised to be an anchor. The 488198090Srdivacky * only undesirable side effect is that '$' gets included as a character 489198090Srdivacky * category in such cases. This is fairly harmless; not worth fixing. 490198090Srdivacky * The amount of lookahead needed to avoid this kludge is excessive. 491198090Srdivacky */ 492198090Srdivackystatic void 493198090Srdivackyp_bre(struct parse *p, 494198090Srdivacky int end1, /* first terminating character */ 495198090Srdivacky int end2) /* second terminating character */ 496198090Srdivacky{ 497198090Srdivacky sopno start = HERE(); 498198090Srdivacky int first = 1; /* first subexpression? */ 499198090Srdivacky int wasdollar = 0; 500198090Srdivacky 501198090Srdivacky if (EAT('^')) { 502198090Srdivacky EMIT(OBOL, 0); 503198090Srdivacky p->g->iflags |= USEBOL; 504198090Srdivacky p->g->nbol++; 505198090Srdivacky } 506198090Srdivacky while (MORE() && !SEETWO(end1, end2)) { 507198090Srdivacky wasdollar = p_simp_re(p, first); 508198090Srdivacky first = 0; 509198090Srdivacky } 510198090Srdivacky if (wasdollar) { /* oops, that was a trailing anchor */ 511198090Srdivacky DROP(1); 512198090Srdivacky EMIT(OEOL, 0); 513198090Srdivacky p->g->iflags |= USEEOL; 514198090Srdivacky p->g->neol++; 515198090Srdivacky } 516198090Srdivacky 517198090Srdivacky REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ 518198090Srdivacky} 519198090Srdivacky 520198090Srdivacky/* 521198090Srdivacky - p_simp_re - parse a simple RE, an atom possibly followed by a repetition 522198090Srdivacky */ 523198090Srdivackystatic int /* was the simple RE an unbackslashed $? */ 524198090Srdivackyp_simp_re(struct parse *p, 525198090Srdivacky int starordinary) /* is a leading * an ordinary character? */ 526198090Srdivacky{ 527198090Srdivacky int c; 528198090Srdivacky int count; 529198090Srdivacky int count2; 530198090Srdivacky sopno pos; 531198090Srdivacky int i; 532198090Srdivacky sopno subno; 533198090Srdivacky# define BACKSL (1<<CHAR_BIT) 534198090Srdivacky 535198090Srdivacky pos = HERE(); /* repetion op, if any, covers from here */ 536198090Srdivacky 537198090Srdivacky assert(MORE()); /* caller should have ensured this */ 538198090Srdivacky c = GETNEXT(); 539198090Srdivacky if (c == '\\') { 540198090Srdivacky REQUIRE(MORE(), REG_EESCAPE); 541198090Srdivacky c = BACKSL | GETNEXT(); 542198090Srdivacky } 543198090Srdivacky switch (c) { 544198090Srdivacky case '.': 545198090Srdivacky if (p->g->cflags®_NEWLINE) 546198090Srdivacky nonnewline(p); 547198090Srdivacky else 548198090Srdivacky EMIT(OANY, 0); 549198090Srdivacky break; 550198090Srdivacky case '[': 551198090Srdivacky p_bracket(p); 552198090Srdivacky break; 553198090Srdivacky case BACKSL|'{': 554198090Srdivacky SETERROR(REG_BADRPT); 555198090Srdivacky break; 556198090Srdivacky case BACKSL|'(': 557198090Srdivacky p->g->nsub++; 558198090Srdivacky subno = p->g->nsub; 559198090Srdivacky if (subno < NPAREN) 560198090Srdivacky p->pbegin[subno] = HERE(); 561198090Srdivacky EMIT(OLPAREN, subno); 562198090Srdivacky /* the MORE here is an error heuristic */ 563198090Srdivacky if (MORE() && !SEETWO('\\', ')')) 564198090Srdivacky p_bre(p, '\\', ')'); 565198090Srdivacky if (subno < NPAREN) { 566198090Srdivacky p->pend[subno] = HERE(); 567198090Srdivacky assert(p->pend[subno] != 0); 568198090Srdivacky } 569198090Srdivacky EMIT(ORPAREN, subno); 570198090Srdivacky REQUIRE(EATTWO('\\', ')'), REG_EPAREN); 571198090Srdivacky break; 572198090Srdivacky case BACKSL|')': /* should not get here -- must be user */ 573198090Srdivacky case BACKSL|'}': 574198090Srdivacky SETERROR(REG_EPAREN); 575198090Srdivacky break; 576198090Srdivacky case BACKSL|'1': 577198090Srdivacky case BACKSL|'2': 578198090Srdivacky case BACKSL|'3': 579198090Srdivacky case BACKSL|'4': 580198090Srdivacky case BACKSL|'5': 581198090Srdivacky case BACKSL|'6': 582198090Srdivacky case BACKSL|'7': 583198090Srdivacky case BACKSL|'8': 584198090Srdivacky case BACKSL|'9': 585198090Srdivacky i = (c&~BACKSL) - '0'; 586198090Srdivacky assert(i < NPAREN); 587198090Srdivacky if (p->pend[i] != 0) { 588198090Srdivacky assert(i <= p->g->nsub); 589198090Srdivacky EMIT(OBACK_, i); 590198090Srdivacky assert(p->pbegin[i] != 0); 591198090Srdivacky assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); 592198090Srdivacky assert(OP(p->strip[p->pend[i]]) == ORPAREN); 593198090Srdivacky (void) dupl(p, p->pbegin[i]+1, p->pend[i]); 594198090Srdivacky EMIT(O_BACK, i); 595198090Srdivacky } else 596198090Srdivacky SETERROR(REG_ESUBREG); 597198090Srdivacky p->g->backrefs = 1; 598198090Srdivacky break; 599198090Srdivacky case '*': 600198090Srdivacky REQUIRE(starordinary, REG_BADRPT); 601198090Srdivacky /* FALLTHROUGH */ 602198090Srdivacky default: 603198090Srdivacky ordinary(p, (char)c); 604198090Srdivacky break; 605198090Srdivacky } 606198090Srdivacky 607198090Srdivacky if (EAT('*')) { /* implemented as +? */ 608198090Srdivacky /* this case does not require the (y|) trick, noKLUDGE */ 609198090Srdivacky INSERT(OPLUS_, pos); 610198090Srdivacky ASTERN(O_PLUS, pos); 611198090Srdivacky INSERT(OQUEST_, pos); 612198090Srdivacky ASTERN(O_QUEST, pos); 613198090Srdivacky } else if (EATTWO('\\', '{')) { 614198090Srdivacky count = p_count(p); 615198090Srdivacky if (EAT(',')) { 616198090Srdivacky if (MORE() && isdigit((uch)PEEK())) { 617198090Srdivacky count2 = p_count(p); 618198090Srdivacky REQUIRE(count <= count2, REG_BADBR); 619198090Srdivacky } else /* single number with comma */ 620198090Srdivacky count2 = INFINITY; 621198090Srdivacky } else /* just a single number */ 622198090Srdivacky count2 = count; 623198090Srdivacky repeat(p, pos, count, count2); 624198090Srdivacky if (!EATTWO('\\', '}')) { /* error heuristics */ 625198090Srdivacky while (MORE() && !SEETWO('\\', '}')) 626198090Srdivacky NEXT(); 627198090Srdivacky REQUIRE(MORE(), REG_EBRACE); 628198090Srdivacky SETERROR(REG_BADBR); 629198090Srdivacky } 630198090Srdivacky } else if (c == '$') /* $ (but not \$) ends it */ 631198090Srdivacky return(1); 632198090Srdivacky 633198090Srdivacky return(0); 634198090Srdivacky} 635198090Srdivacky 636198090Srdivacky/* 637198090Srdivacky - p_count - parse a repetition count 638198090Srdivacky */ 639198090Srdivackystatic int /* the value */ 640198090Srdivackyp_count(struct parse *p) 641198090Srdivacky{ 642198090Srdivacky int count = 0; 643198090Srdivacky int ndigits = 0; 644198090Srdivacky 645198090Srdivacky while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) { 646198090Srdivacky count = count*10 + (GETNEXT() - '0'); 647198090Srdivacky ndigits++; 648198090Srdivacky } 649198090Srdivacky 650198090Srdivacky REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); 651198090Srdivacky return(count); 652198090Srdivacky} 653198090Srdivacky 654198090Srdivacky/* 655198090Srdivacky - p_bracket - parse a bracketed character list 656198090Srdivacky * 657198090Srdivacky * Note a significant property of this code: if the allocset() did SETERROR, 658198090Srdivacky * no set operations are done. 659198090Srdivacky */ 660198090Srdivackystatic void 661198090Srdivackyp_bracket(struct parse *p) 662198090Srdivacky{ 663198090Srdivacky cset *cs; 664198090Srdivacky int invert = 0; 665198090Srdivacky 666198090Srdivacky /* Dept of Truly Sickening Special-Case Kludges */ 667198090Srdivacky if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { 668198090Srdivacky EMIT(OBOW, 0); 669198090Srdivacky NEXTn(6); 670198090Srdivacky return; 671198090Srdivacky } 672198090Srdivacky if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { 673198090Srdivacky EMIT(OEOW, 0); 674198090Srdivacky NEXTn(6); 675198090Srdivacky return; 676198090Srdivacky } 677198090Srdivacky 678198090Srdivacky if ((cs = allocset(p)) == NULL) { 679198090Srdivacky /* allocset did set error status in p */ 680198090Srdivacky return; 681198090Srdivacky } 682198090Srdivacky 683198090Srdivacky if (EAT('^')) 684198090Srdivacky invert++; /* make note to invert set at end */ 685198090Srdivacky if (EAT(']')) 686198090Srdivacky CHadd(cs, ']'); 687198090Srdivacky else if (EAT('-')) 688198090Srdivacky CHadd(cs, '-'); 689198090Srdivacky while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) 690198090Srdivacky p_b_term(p, cs); 691198090Srdivacky if (EAT('-')) 692198090Srdivacky CHadd(cs, '-'); 693198090Srdivacky MUSTEAT(']', REG_EBRACK); 694198090Srdivacky 695198090Srdivacky if (p->error != 0) { /* don't mess things up further */ 696198090Srdivacky freeset(p, cs); 697198090Srdivacky return; 698198090Srdivacky } 699198090Srdivacky 700198090Srdivacky if (p->g->cflags®_ICASE) { 701198090Srdivacky int i; 702198090Srdivacky int ci; 703198090Srdivacky 704198090Srdivacky for (i = p->g->csetsize - 1; i >= 0; i--) 705198090Srdivacky if (CHIN(cs, i) && isalpha(i)) { 706198090Srdivacky ci = othercase(i); 707198090Srdivacky if (ci != i) 708198090Srdivacky CHadd(cs, ci); 709198090Srdivacky } 710198090Srdivacky if (cs->multis != NULL) 711198090Srdivacky mccase(p, cs); 712198090Srdivacky } 713198090Srdivacky if (invert) { 714198090Srdivacky int i; 715198090Srdivacky 716198090Srdivacky for (i = p->g->csetsize - 1; i >= 0; i--) 717198090Srdivacky if (CHIN(cs, i)) 718198090Srdivacky CHsub(cs, i); 719198090Srdivacky else 720198090Srdivacky CHadd(cs, i); 721198090Srdivacky if (p->g->cflags®_NEWLINE) 722198090Srdivacky CHsub(cs, '\n'); 723198090Srdivacky if (cs->multis != NULL) 724198090Srdivacky mcinvert(p, cs); 725198090Srdivacky } 726198090Srdivacky 727198090Srdivacky assert(cs->multis == NULL); /* xxx */ 728198090Srdivacky 729198090Srdivacky if (nch(p, cs) == 1) { /* optimize singleton sets */ 730198090Srdivacky ordinary(p, firstch(p, cs)); 731198090Srdivacky freeset(p, cs); 732198090Srdivacky } else 733198090Srdivacky EMIT(OANYOF, freezeset(p, cs)); 734198090Srdivacky} 735198090Srdivacky 736198090Srdivacky/* 737198090Srdivacky - p_b_term - parse one term of a bracketed character list 738198090Srdivacky */ 739198090Srdivackystatic void 740198090Srdivackyp_b_term(struct parse *p, cset *cs) 741198090Srdivacky{ 742198090Srdivacky char c; 743198090Srdivacky char start, finish; 744198090Srdivacky int i; 745198090Srdivacky 746198090Srdivacky /* classify what we've got */ 747198090Srdivacky switch ((MORE()) ? PEEK() : '\0') { 748198090Srdivacky case '[': 749198090Srdivacky c = (MORE2()) ? PEEK2() : '\0'; 750198090Srdivacky break; 751198090Srdivacky case '-': 752198090Srdivacky SETERROR(REG_ERANGE); 753198090Srdivacky return; /* NOTE RETURN */ 754198090Srdivacky break; 755198090Srdivacky default: 756198090Srdivacky c = '\0'; 757198090Srdivacky break; 758198090Srdivacky } 759198090Srdivacky 760198090Srdivacky switch (c) { 761198090Srdivacky case ':': /* character class */ 762198090Srdivacky NEXT2(); 763198090Srdivacky REQUIRE(MORE(), REG_EBRACK); 764198090Srdivacky c = PEEK(); 765198090Srdivacky REQUIRE(c != '-' && c != ']', REG_ECTYPE); 766198090Srdivacky p_b_cclass(p, cs); 767198090Srdivacky REQUIRE(MORE(), REG_EBRACK); 768198090Srdivacky REQUIRE(EATTWO(':', ']'), REG_ECTYPE); 769198090Srdivacky break; 770198090Srdivacky case '=': /* equivalence class */ 771198090Srdivacky NEXT2(); 772198090Srdivacky REQUIRE(MORE(), REG_EBRACK); 773198090Srdivacky c = PEEK(); 774198090Srdivacky REQUIRE(c != '-' && c != ']', REG_ECOLLATE); 775198090Srdivacky p_b_eclass(p, cs); 776198090Srdivacky REQUIRE(MORE(), REG_EBRACK); 777198090Srdivacky REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); 778198090Srdivacky break; 779198090Srdivacky default: /* symbol, ordinary character, or range */ 780198090Srdivacky/* xxx revision needed for multichar stuff */ 781198090Srdivacky start = p_b_symbol(p); 782198090Srdivacky if (SEE('-') && MORE2() && PEEK2() != ']') { 783198090Srdivacky /* range */ 784198090Srdivacky NEXT(); 785198090Srdivacky if (EAT('-')) 786198090Srdivacky finish = '-'; 787198090Srdivacky else 788198090Srdivacky finish = p_b_symbol(p); 789198090Srdivacky } else 790198090Srdivacky finish = start; 791198090Srdivacky/* xxx what about signed chars here... */ 792198090Srdivacky REQUIRE(start <= finish, REG_ERANGE); 793198090Srdivacky for (i = start; i <= finish; i++) 794198090Srdivacky CHadd(cs, i); 795198090Srdivacky break; 796198090Srdivacky } 797198090Srdivacky} 798198090Srdivacky 799198090Srdivacky/* 800198090Srdivacky - p_b_cclass - parse a character-class name and deal with it 801198090Srdivacky */ 802198090Srdivackystatic void 803198090Srdivackyp_b_cclass(struct parse *p, cset *cs) 804198090Srdivacky{ 805198090Srdivacky char *sp = p->next; 806198090Srdivacky struct cclass *cp; 807198090Srdivacky size_t len; 808198090Srdivacky const char *u; 809198090Srdivacky char c; 810198090Srdivacky 811221345Sdim while (MORE() && isalpha((uch)PEEK())) 812198090Srdivacky NEXT(); 813198090Srdivacky len = p->next - sp; 814198090Srdivacky for (cp = cclasses; cp->name != NULL; cp++) 815198090Srdivacky if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 816198090Srdivacky break; 817198090Srdivacky if (cp->name == NULL) { 818198090Srdivacky /* oops, didn't find it */ 819198090Srdivacky SETERROR(REG_ECTYPE); 820198090Srdivacky return; 821198090Srdivacky } 822198090Srdivacky 823198090Srdivacky u = cp->chars; 824198090Srdivacky while ((c = *u++) != '\0') 825198090Srdivacky CHadd(cs, c); 826198090Srdivacky for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) 827198090Srdivacky MCadd(p, cs, u); 828198090Srdivacky} 829198090Srdivacky 830198090Srdivacky/* 831198090Srdivacky - p_b_eclass - parse an equivalence-class name and deal with it 832198090Srdivacky * 833198090Srdivacky * This implementation is incomplete. xxx 834198090Srdivacky */ 835198090Srdivackystatic void 836198090Srdivackyp_b_eclass(struct parse *p, cset *cs) 837198090Srdivacky{ 838198090Srdivacky char c; 839198090Srdivacky 840198090Srdivacky c = p_b_coll_elem(p, '='); 841198090Srdivacky CHadd(cs, c); 842198090Srdivacky} 843198090Srdivacky 844198090Srdivacky/* 845198090Srdivacky - p_b_symbol - parse a character or [..]ed multicharacter collating symbol 846198090Srdivacky */ 847198090Srdivackystatic char /* value of symbol */ 848198090Srdivackyp_b_symbol(struct parse *p) 849198090Srdivacky{ 850198090Srdivacky char value; 851198090Srdivacky 852198090Srdivacky REQUIRE(MORE(), REG_EBRACK); 853198090Srdivacky if (!EATTWO('[', '.')) 854198090Srdivacky return(GETNEXT()); 855198090Srdivacky 856198090Srdivacky /* collating symbol */ 857198090Srdivacky value = p_b_coll_elem(p, '.'); 858198090Srdivacky REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); 859198090Srdivacky return(value); 860198090Srdivacky} 861198090Srdivacky 862198090Srdivacky/* 863198090Srdivacky - p_b_coll_elem - parse a collating-element name and look it up 864198090Srdivacky */ 865198090Srdivackystatic char /* value of collating element */ 866198090Srdivackyp_b_coll_elem(struct parse *p, 867198090Srdivacky int endc) /* name ended by endc,']' */ 868198090Srdivacky{ 869198090Srdivacky char *sp = p->next; 870198090Srdivacky struct cname *cp; 871198090Srdivacky int len; 872198090Srdivacky 873198090Srdivacky while (MORE() && !SEETWO(endc, ']')) 874198090Srdivacky NEXT(); 875198090Srdivacky if (!MORE()) { 876198090Srdivacky SETERROR(REG_EBRACK); 877198090Srdivacky return(0); 878198090Srdivacky } 879198090Srdivacky len = p->next - sp; 880198090Srdivacky for (cp = cnames; cp->name != NULL; cp++) 881198090Srdivacky if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 882198090Srdivacky return(cp->code); /* known name */ 883198090Srdivacky if (len == 1) 884198090Srdivacky return(*sp); /* single character */ 885198090Srdivacky SETERROR(REG_ECOLLATE); /* neither */ 886198090Srdivacky return(0); 887198090Srdivacky} 888198090Srdivacky 889198090Srdivacky/* 890198090Srdivacky - othercase - return the case counterpart of an alphabetic 891198090Srdivacky */ 892198090Srdivackystatic char /* if no counterpart, return ch */ 893198090Srdivackyothercase(int ch) 894198090Srdivacky{ 895198090Srdivacky ch = (uch)ch; 896198090Srdivacky assert(isalpha(ch)); 897198090Srdivacky if (isupper(ch)) 898198090Srdivacky return ((uch)tolower(ch)); 899198090Srdivacky else if (islower(ch)) 900198090Srdivacky return ((uch)toupper(ch)); 901198090Srdivacky else /* peculiar, but could happen */ 902198090Srdivacky return(ch); 903198090Srdivacky} 904198090Srdivacky 905198090Srdivacky/* 906198090Srdivacky - bothcases - emit a dualcase version of a two-case character 907198090Srdivacky * 908198090Srdivacky * Boy, is this implementation ever a kludge... 909198090Srdivacky */ 910198090Srdivackystatic void 911198090Srdivackybothcases(struct parse *p, int ch) 912198090Srdivacky{ 913198090Srdivacky char *oldnext = p->next; 914198090Srdivacky char *oldend = p->end; 915198090Srdivacky char bracket[3]; 916198090Srdivacky 917198090Srdivacky ch = (uch)ch; 918198090Srdivacky assert(othercase(ch) != ch); /* p_bracket() would recurse */ 919198090Srdivacky p->next = bracket; 920198090Srdivacky p->end = bracket+2; 921198090Srdivacky bracket[0] = ch; 922198090Srdivacky bracket[1] = ']'; 923198090Srdivacky bracket[2] = '\0'; 924198090Srdivacky p_bracket(p); 925198090Srdivacky assert(p->next == bracket+2); 926198090Srdivacky p->next = oldnext; 927198090Srdivacky p->end = oldend; 928198090Srdivacky} 929198090Srdivacky 930198090Srdivacky/* 931198090Srdivacky - ordinary - emit an ordinary character 932198090Srdivacky */ 933198090Srdivackystatic void 934198090Srdivackyordinary(struct parse *p, int ch) 935198090Srdivacky{ 936198090Srdivacky cat_t *cap = p->g->categories; 937198090Srdivacky 938198090Srdivacky if ((p->g->cflags®_ICASE) && isalpha((uch)ch) && othercase(ch) != ch) 939198090Srdivacky bothcases(p, ch); 940198090Srdivacky else { 941198090Srdivacky EMIT(OCHAR, (uch)ch); 942198090Srdivacky if (cap[ch] == 0) 943198090Srdivacky cap[ch] = p->g->ncategories++; 944198090Srdivacky } 945198090Srdivacky} 946198090Srdivacky 947198090Srdivacky/* 948198090Srdivacky - nonnewline - emit REG_NEWLINE version of OANY 949198090Srdivacky * 950198090Srdivacky * Boy, is this implementation ever a kludge... 951198090Srdivacky */ 952198090Srdivackystatic void 953198090Srdivackynonnewline(struct parse *p) 954198090Srdivacky{ 955198090Srdivacky char *oldnext = p->next; 956198090Srdivacky char *oldend = p->end; 957198090Srdivacky char bracket[4]; 958198090Srdivacky 959198090Srdivacky p->next = bracket; 960198090Srdivacky p->end = bracket+3; 961198090Srdivacky bracket[0] = '^'; 962198090Srdivacky bracket[1] = '\n'; 963198090Srdivacky bracket[2] = ']'; 964198090Srdivacky bracket[3] = '\0'; 965198090Srdivacky p_bracket(p); 966198090Srdivacky assert(p->next == bracket+3); 967198090Srdivacky p->next = oldnext; 968198090Srdivacky p->end = oldend; 969198090Srdivacky} 970198090Srdivacky 971198090Srdivacky/* 972198090Srdivacky - repeat - generate code for a bounded repetition, recursively if needed 973198090Srdivacky */ 974198090Srdivackystatic void 975198090Srdivackyrepeat(struct parse *p, 976198090Srdivacky sopno start, /* operand from here to end of strip */ 977198090Srdivacky int from, /* repeated from this number */ 978198090Srdivacky int to) /* to this number of times (maybe INFINITY) */ 979198090Srdivacky{ 980198090Srdivacky sopno finish = HERE(); 981198090Srdivacky# define N 2 982198090Srdivacky# define INF 3 983198090Srdivacky# define REP(f, t) ((f)*8 + (t)) 984198090Srdivacky# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) 985198090Srdivacky sopno copy; 986198090Srdivacky 987198090Srdivacky if (p->error != 0) /* head off possible runaway recursion */ 988198090Srdivacky return; 989198090Srdivacky 990198090Srdivacky assert(from <= to); 991198090Srdivacky 992198090Srdivacky switch (REP(MAP(from), MAP(to))) { 993198090Srdivacky case REP(0, 0): /* must be user doing this */ 994198090Srdivacky DROP(finish-start); /* drop the operand */ 995198090Srdivacky break; 996198090Srdivacky case REP(0, 1): /* as x{1,1}? */ 997198090Srdivacky case REP(0, N): /* as x{1,n}? */ 998198090Srdivacky case REP(0, INF): /* as x{1,}? */ 999198090Srdivacky /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 1000198090Srdivacky INSERT(OCH_, start); /* offset is wrong... */ 1001198090Srdivacky repeat(p, start+1, 1, to); 1002198090Srdivacky ASTERN(OOR1, start); 1003198090Srdivacky AHEAD(start); /* ... fix it */ 1004198090Srdivacky EMIT(OOR2, 0); 1005198090Srdivacky AHEAD(THERE()); 1006198090Srdivacky ASTERN(O_CH, THERETHERE()); 1007198090Srdivacky break; 1008198090Srdivacky case REP(1, 1): /* trivial case */ 1009198090Srdivacky /* done */ 1010198090Srdivacky break; 1011198090Srdivacky case REP(1, N): /* as x?x{1,n-1} */ 1012198090Srdivacky /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 1013198090Srdivacky INSERT(OCH_, start); 1014198090Srdivacky ASTERN(OOR1, start); 1015198090Srdivacky AHEAD(start); 1016198090Srdivacky EMIT(OOR2, 0); /* offset very wrong... */ 1017198090Srdivacky AHEAD(THERE()); /* ...so fix it */ 1018198090Srdivacky ASTERN(O_CH, THERETHERE()); 1019198090Srdivacky copy = dupl(p, start+1, finish+1); 1020198090Srdivacky assert(copy == finish+4); 1021198090Srdivacky repeat(p, copy, 1, to-1); 1022198090Srdivacky break; 1023198090Srdivacky case REP(1, INF): /* as x+ */ 1024198090Srdivacky INSERT(OPLUS_, start); 1025198090Srdivacky ASTERN(O_PLUS, start); 1026198090Srdivacky break; 1027198090Srdivacky case REP(N, N): /* as xx{m-1,n-1} */ 1028198090Srdivacky copy = dupl(p, start, finish); 1029198090Srdivacky repeat(p, copy, from-1, to-1); 1030198090Srdivacky break; 1031198090Srdivacky case REP(N, INF): /* as xx{n-1,INF} */ 1032198090Srdivacky copy = dupl(p, start, finish); 1033198090Srdivacky repeat(p, copy, from-1, to); 1034198090Srdivacky break; 1035198090Srdivacky default: /* "can't happen" */ 1036198090Srdivacky SETERROR(REG_ASSERT); /* just in case */ 1037198090Srdivacky break; 1038198090Srdivacky } 1039198090Srdivacky} 1040198090Srdivacky 1041198090Srdivacky/* 1042198090Srdivacky - seterr - set an error condition 1043198090Srdivacky */ 1044198090Srdivackystatic int /* useless but makes type checking happy */ 1045198090Srdivackyseterr(struct parse *p, int e) 1046198090Srdivacky{ 1047198090Srdivacky if (p->error == 0) /* keep earliest error condition */ 1048198090Srdivacky p->error = e; 1049198090Srdivacky p->next = nuls; /* try to bring things to a halt */ 1050198090Srdivacky p->end = nuls; 1051198090Srdivacky return(0); /* make the return value well-defined */ 1052198090Srdivacky} 1053198090Srdivacky 1054198090Srdivacky/* 1055198090Srdivacky - allocset - allocate a set of characters for [] 1056198090Srdivacky */ 1057198090Srdivackystatic cset * 1058198090Srdivackyallocset(struct parse *p) 1059198090Srdivacky{ 1060198090Srdivacky int no = p->g->ncsets++; 1061198090Srdivacky size_t nc; 1062198090Srdivacky size_t nbytes; 1063198090Srdivacky cset *cs; 1064198090Srdivacky size_t css = (size_t)p->g->csetsize; 1065198090Srdivacky int i; 1066198090Srdivacky 1067198090Srdivacky if (no >= p->ncsalloc) { /* need another column of space */ 1068198090Srdivacky void *ptr; 1069198090Srdivacky 1070198090Srdivacky p->ncsalloc += CHAR_BIT; 1071198090Srdivacky nc = p->ncsalloc; 1072198090Srdivacky assert(nc % CHAR_BIT == 0); 1073198090Srdivacky nbytes = nc / CHAR_BIT * css; 1074198090Srdivacky 1075198090Srdivacky ptr = (cset *)realloc((char *)p->g->sets, nc * sizeof(cset)); 1076198090Srdivacky if (ptr == NULL) 1077198090Srdivacky goto nomem; 1078198090Srdivacky p->g->sets = ptr; 1079198090Srdivacky 1080198090Srdivacky ptr = (uch *)realloc((char *)p->g->setbits, nbytes); 1081198090Srdivacky if (ptr == NULL) 1082198090Srdivacky goto nomem; 1083198090Srdivacky p->g->setbits = ptr; 1084198090Srdivacky 1085198090Srdivacky for (i = 0; i < no; i++) 1086198090Srdivacky p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); 1087198090Srdivacky 1088198090Srdivacky (void) memset((char *)p->g->setbits + (nbytes - css), 0, css); 1089198090Srdivacky } 1090198090Srdivacky /* XXX should not happen */ 1091198090Srdivacky if (p->g->sets == NULL || p->g->setbits == NULL) 1092198090Srdivacky goto nomem; 1093198090Srdivacky 1094198090Srdivacky cs = &p->g->sets[no]; 1095198090Srdivacky cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); 1096198090Srdivacky cs->mask = 1 << ((no) % CHAR_BIT); 1097198090Srdivacky cs->hash = 0; 1098198090Srdivacky cs->smultis = 0; 1099198090Srdivacky cs->multis = NULL; 1100198090Srdivacky 1101198090Srdivacky return(cs); 1102198090Srdivackynomem: 1103198090Srdivacky free(p->g->sets); 1104198090Srdivacky p->g->sets = NULL; 1105198090Srdivacky free(p->g->setbits); 1106198090Srdivacky p->g->setbits = NULL; 1107198090Srdivacky 1108198090Srdivacky SETERROR(REG_ESPACE); 1109198090Srdivacky /* caller's responsibility not to do set ops */ 1110198090Srdivacky return(NULL); 1111198090Srdivacky} 1112198090Srdivacky 1113198090Srdivacky/* 1114198090Srdivacky - freeset - free a now-unused set 1115198090Srdivacky */ 1116198090Srdivackystatic void 1117198090Srdivackyfreeset(struct parse *p, cset *cs) 1118198090Srdivacky{ 1119198090Srdivacky size_t i; 1120198090Srdivacky cset *top = &p->g->sets[p->g->ncsets]; 1121198090Srdivacky size_t css = (size_t)p->g->csetsize; 1122198090Srdivacky 1123198090Srdivacky for (i = 0; i < css; i++) 1124198090Srdivacky CHsub(cs, i); 1125198090Srdivacky if (cs == top-1) /* recover only the easy case */ 1126198090Srdivacky p->g->ncsets--; 1127198090Srdivacky} 1128198090Srdivacky 1129198090Srdivacky/* 1130198090Srdivacky - freezeset - final processing on a set of characters 1131198090Srdivacky * 1132198090Srdivacky * The main task here is merging identical sets. This is usually a waste 1133198090Srdivacky * of time (although the hash code minimizes the overhead), but can win 1134198090Srdivacky * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash 1135198090Srdivacky * is done using addition rather than xor -- all ASCII [aA] sets xor to 1136198090Srdivacky * the same value! 1137198090Srdivacky */ 1138198090Srdivackystatic int /* set number */ 1139198090Srdivackyfreezeset(struct parse *p, cset *cs) 1140198090Srdivacky{ 1141198090Srdivacky uch h = cs->hash; 1142198090Srdivacky size_t i; 1143198090Srdivacky cset *top = &p->g->sets[p->g->ncsets]; 1144198090Srdivacky cset *cs2; 1145198090Srdivacky size_t css = (size_t)p->g->csetsize; 1146198090Srdivacky 1147198090Srdivacky /* look for an earlier one which is the same */ 1148198090Srdivacky for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) 1149198090Srdivacky if (cs2->hash == h && cs2 != cs) { 1150198090Srdivacky /* maybe */ 1151198090Srdivacky for (i = 0; i < css; i++) 1152198090Srdivacky if (!!CHIN(cs2, i) != !!CHIN(cs, i)) 1153198090Srdivacky break; /* no */ 1154198090Srdivacky if (i == css) 1155198090Srdivacky break; /* yes */ 1156198090Srdivacky } 1157198090Srdivacky 1158198090Srdivacky if (cs2 < top) { /* found one */ 1159198090Srdivacky freeset(p, cs); 1160198090Srdivacky cs = cs2; 1161198090Srdivacky } 1162198090Srdivacky 1163198090Srdivacky return((int)(cs - p->g->sets)); 1164198090Srdivacky} 1165198090Srdivacky 1166198090Srdivacky/* 1167198090Srdivacky - firstch - return first character in a set (which must have at least one) 1168198090Srdivacky */ 1169198090Srdivackystatic int /* character; there is no "none" value */ 1170198090Srdivackyfirstch(struct parse *p, cset *cs) 1171198090Srdivacky{ 1172198090Srdivacky size_t i; 1173198090Srdivacky size_t css = (size_t)p->g->csetsize; 1174198090Srdivacky 1175198090Srdivacky for (i = 0; i < css; i++) 1176198090Srdivacky if (CHIN(cs, i)) 1177198090Srdivacky return((char)i); 1178198090Srdivacky assert(never); 1179198090Srdivacky return(0); /* arbitrary */ 1180198090Srdivacky} 1181198090Srdivacky 1182198090Srdivacky/* 1183198090Srdivacky - nch - number of characters in a set 1184198090Srdivacky */ 1185198090Srdivackystatic int 1186198090Srdivackynch(struct parse *p, cset *cs) 1187198090Srdivacky{ 1188198090Srdivacky size_t i; 1189198090Srdivacky size_t css = (size_t)p->g->csetsize; 1190198090Srdivacky int n = 0; 1191198090Srdivacky 1192198090Srdivacky for (i = 0; i < css; i++) 1193198090Srdivacky if (CHIN(cs, i)) 1194198090Srdivacky n++; 1195198090Srdivacky return(n); 1196198090Srdivacky} 1197198090Srdivacky 1198198090Srdivacky/* 1199198090Srdivacky - mcadd - add a collating element to a cset 1200198090Srdivacky */ 1201198090Srdivackystatic void 1202198090Srdivackymcadd( struct parse *p, cset *cs, const char *cp) 1203198090Srdivacky{ 1204198090Srdivacky size_t oldend = cs->smultis; 1205198090Srdivacky void *np; 1206198090Srdivacky 1207198090Srdivacky cs->smultis += strlen(cp) + 1; 1208198090Srdivacky np = realloc(cs->multis, cs->smultis); 1209198090Srdivacky if (np == NULL) { 1210198090Srdivacky if (cs->multis) 1211198090Srdivacky free(cs->multis); 1212198090Srdivacky cs->multis = NULL; 1213198090Srdivacky SETERROR(REG_ESPACE); 1214198090Srdivacky return; 1215198090Srdivacky } 1216198090Srdivacky cs->multis = np; 1217198090Srdivacky 1218198090Srdivacky llvm_strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1); 1219198090Srdivacky} 1220198090Srdivacky 1221198090Srdivacky/* 1222198090Srdivacky - mcinvert - invert the list of collating elements in a cset 1223198090Srdivacky * 1224198090Srdivacky * This would have to know the set of possibilities. Implementation 1225198090Srdivacky * is deferred. 1226198090Srdivacky */ 1227198090Srdivacky/* ARGSUSED */ 1228198090Srdivackystatic void 1229198090Srdivackymcinvert(struct parse *p, cset *cs) 1230198090Srdivacky{ 1231198090Srdivacky assert(cs->multis == NULL); /* xxx */ 1232198090Srdivacky} 1233198090Srdivacky 1234198090Srdivacky/* 1235198090Srdivacky - mccase - add case counterparts of the list of collating elements in a cset 1236198090Srdivacky * 1237198090Srdivacky * This would have to know the set of possibilities. Implementation 1238198090Srdivacky * is deferred. 1239198090Srdivacky */ 1240198090Srdivacky/* ARGSUSED */ 1241198090Srdivackystatic void 1242198090Srdivackymccase(struct parse *p, cset *cs) 1243198090Srdivacky{ 1244198090Srdivacky assert(cs->multis == NULL); /* xxx */ 1245198090Srdivacky} 1246198090Srdivacky 1247198090Srdivacky/* 1248198090Srdivacky - isinsets - is this character in any sets? 1249198090Srdivacky */ 1250198090Srdivackystatic int /* predicate */ 1251198090Srdivackyisinsets(struct re_guts *g, int c) 1252198090Srdivacky{ 1253198090Srdivacky uch *col; 1254198090Srdivacky int i; 1255198090Srdivacky int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 1256198090Srdivacky unsigned uc = (uch)c; 1257198090Srdivacky 1258198090Srdivacky for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 1259198090Srdivacky if (col[uc] != 0) 1260198090Srdivacky return(1); 1261198090Srdivacky return(0); 1262198090Srdivacky} 1263198090Srdivacky 1264198090Srdivacky/* 1265198090Srdivacky - samesets - are these two characters in exactly the same sets? 1266198090Srdivacky */ 1267198090Srdivackystatic int /* predicate */ 1268198090Srdivackysamesets(struct re_guts *g, int c1, int c2) 1269198090Srdivacky{ 1270198090Srdivacky uch *col; 1271198090Srdivacky int i; 1272198090Srdivacky int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 1273198090Srdivacky unsigned uc1 = (uch)c1; 1274198090Srdivacky unsigned uc2 = (uch)c2; 1275198090Srdivacky 1276198090Srdivacky for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 1277198090Srdivacky if (col[uc1] != col[uc2]) 1278198090Srdivacky return(0); 1279198090Srdivacky return(1); 1280198090Srdivacky} 1281198090Srdivacky 1282198090Srdivacky/* 1283198090Srdivacky - categorize - sort out character categories 1284198090Srdivacky */ 1285198090Srdivackystatic void 1286198090Srdivackycategorize(struct parse *p, struct re_guts *g) 1287198090Srdivacky{ 1288198090Srdivacky cat_t *cats = g->categories; 1289198090Srdivacky int c; 1290198090Srdivacky int c2; 1291198090Srdivacky cat_t cat; 1292198090Srdivacky 1293198090Srdivacky /* avoid making error situations worse */ 1294198090Srdivacky if (p->error != 0) 1295198090Srdivacky return; 1296198090Srdivacky 1297198090Srdivacky for (c = CHAR_MIN; c <= CHAR_MAX; c++) 1298198090Srdivacky if (cats[c] == 0 && isinsets(g, c)) { 1299198090Srdivacky cat = g->ncategories++; 1300198090Srdivacky cats[c] = cat; 1301198090Srdivacky for (c2 = c+1; c2 <= CHAR_MAX; c2++) 1302198090Srdivacky if (cats[c2] == 0 && samesets(g, c, c2)) 1303198090Srdivacky cats[c2] = cat; 1304198090Srdivacky } 1305198090Srdivacky} 1306198090Srdivacky 1307198090Srdivacky/* 1308198090Srdivacky - dupl - emit a duplicate of a bunch of sops 1309198090Srdivacky */ 1310198090Srdivackystatic sopno /* start of duplicate */ 1311198090Srdivackydupl(struct parse *p, 1312198090Srdivacky sopno start, /* from here */ 1313198090Srdivacky sopno finish) /* to this less one */ 1314198090Srdivacky{ 1315198090Srdivacky sopno ret = HERE(); 1316198090Srdivacky sopno len = finish - start; 1317198090Srdivacky 1318198090Srdivacky assert(finish >= start); 1319198090Srdivacky if (len == 0) 1320198090Srdivacky return(ret); 1321198090Srdivacky enlarge(p, p->ssize + len); /* this many unexpected additions */ 1322198090Srdivacky assert(p->ssize >= p->slen + len); 1323198090Srdivacky (void) memmove((char *)(p->strip + p->slen), 1324198090Srdivacky (char *)(p->strip + start), (size_t)len*sizeof(sop)); 1325198090Srdivacky p->slen += len; 1326198090Srdivacky return(ret); 1327198090Srdivacky} 1328198090Srdivacky 1329198090Srdivacky/* 1330198090Srdivacky - doemit - emit a strip operator 1331198090Srdivacky * 1332198090Srdivacky * It might seem better to implement this as a macro with a function as 1333198090Srdivacky * hard-case backup, but it's just too big and messy unless there are 1334198090Srdivacky * some changes to the data structures. Maybe later. 1335198090Srdivacky */ 1336198090Srdivackystatic void 1337198090Srdivackydoemit(struct parse *p, sop op, size_t opnd) 1338198090Srdivacky{ 1339198090Srdivacky /* avoid making error situations worse */ 1340198090Srdivacky if (p->error != 0) 1341198090Srdivacky return; 1342198090Srdivacky 1343198090Srdivacky /* deal with oversize operands ("can't happen", more or less) */ 1344198090Srdivacky assert(opnd < 1<<OPSHIFT); 1345198090Srdivacky 1346198090Srdivacky /* deal with undersized strip */ 1347198090Srdivacky if (p->slen >= p->ssize) 1348198090Srdivacky enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ 1349198090Srdivacky assert(p->slen < p->ssize); 1350198090Srdivacky 1351198090Srdivacky /* finally, it's all reduced to the easy case */ 1352198090Srdivacky p->strip[p->slen++] = SOP(op, opnd); 1353198090Srdivacky} 1354198090Srdivacky 1355198090Srdivacky/* 1356198090Srdivacky - doinsert - insert a sop into the strip 1357198090Srdivacky */ 1358198090Srdivackystatic void 1359198090Srdivackydoinsert(struct parse *p, sop op, size_t opnd, sopno pos) 1360198090Srdivacky{ 1361198090Srdivacky sopno sn; 1362198090Srdivacky sop s; 1363198090Srdivacky int i; 1364198090Srdivacky 1365198090Srdivacky /* avoid making error situations worse */ 1366198090Srdivacky if (p->error != 0) 1367198090Srdivacky return; 1368198090Srdivacky 1369198090Srdivacky sn = HERE(); 1370198090Srdivacky EMIT(op, opnd); /* do checks, ensure space */ 1371198090Srdivacky assert(HERE() == sn+1); 1372198090Srdivacky s = p->strip[sn]; 1373198090Srdivacky 1374198090Srdivacky /* adjust paren pointers */ 1375198090Srdivacky assert(pos > 0); 1376198090Srdivacky for (i = 1; i < NPAREN; i++) { 1377198090Srdivacky if (p->pbegin[i] >= pos) { 1378198090Srdivacky p->pbegin[i]++; 1379198090Srdivacky } 1380198090Srdivacky if (p->pend[i] >= pos) { 1381198090Srdivacky p->pend[i]++; 1382198090Srdivacky } 1383198090Srdivacky } 1384198090Srdivacky 1385198090Srdivacky memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], 1386198090Srdivacky (HERE()-pos-1)*sizeof(sop)); 1387198090Srdivacky p->strip[pos] = s; 1388198090Srdivacky} 1389198090Srdivacky 1390198090Srdivacky/* 1391198090Srdivacky - dofwd - complete a forward reference 1392198090Srdivacky */ 1393198090Srdivackystatic void 1394198090Srdivackydofwd(struct parse *p, sopno pos, sop value) 1395198090Srdivacky{ 1396198090Srdivacky /* avoid making error situations worse */ 1397198090Srdivacky if (p->error != 0) 1398198090Srdivacky return; 1399198090Srdivacky 1400198090Srdivacky assert(value < 1<<OPSHIFT); 1401198090Srdivacky p->strip[pos] = OP(p->strip[pos]) | value; 1402198090Srdivacky} 1403198090Srdivacky 1404198090Srdivacky/* 1405198090Srdivacky - enlarge - enlarge the strip 1406198090Srdivacky */ 1407198090Srdivackystatic void 1408198090Srdivackyenlarge(struct parse *p, sopno size) 1409198090Srdivacky{ 1410198090Srdivacky sop *sp; 1411198090Srdivacky 1412198090Srdivacky if (p->ssize >= size) 1413198090Srdivacky return; 1414198090Srdivacky 1415198090Srdivacky sp = (sop *)realloc(p->strip, size*sizeof(sop)); 1416198090Srdivacky if (sp == NULL) { 1417198090Srdivacky SETERROR(REG_ESPACE); 1418198090Srdivacky return; 1419198090Srdivacky } 1420198090Srdivacky p->strip = sp; 1421198090Srdivacky p->ssize = size; 1422198090Srdivacky} 1423198090Srdivacky 1424198090Srdivacky/* 1425198090Srdivacky - stripsnug - compact the strip 1426198090Srdivacky */ 1427198090Srdivackystatic void 1428198090Srdivackystripsnug(struct parse *p, struct re_guts *g) 1429198090Srdivacky{ 1430198090Srdivacky g->nstates = p->slen; 1431198090Srdivacky g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); 1432198090Srdivacky if (g->strip == NULL) { 1433198090Srdivacky SETERROR(REG_ESPACE); 1434198090Srdivacky g->strip = p->strip; 1435198090Srdivacky } 1436198090Srdivacky} 1437198090Srdivacky 1438198090Srdivacky/* 1439198090Srdivacky - findmust - fill in must and mlen with longest mandatory literal string 1440198090Srdivacky * 1441198090Srdivacky * This algorithm could do fancy things like analyzing the operands of | 1442198090Srdivacky * for common subsequences. Someday. This code is simple and finds most 1443198090Srdivacky * of the interesting cases. 1444198090Srdivacky * 1445198090Srdivacky * Note that must and mlen got initialized during setup. 1446198090Srdivacky */ 1447198090Srdivackystatic void 1448198090Srdivackyfindmust(struct parse *p, struct re_guts *g) 1449198090Srdivacky{ 1450198090Srdivacky sop *scan; 1451198090Srdivacky sop *start = 0; /* start initialized in the default case, after that */ 1452198090Srdivacky sop *newstart = 0; /* newstart was initialized in the OCHAR case */ 1453198090Srdivacky sopno newlen; 1454198090Srdivacky sop s; 1455198090Srdivacky char *cp; 1456198090Srdivacky sopno i; 1457198090Srdivacky 1458198090Srdivacky /* avoid making error situations worse */ 1459198090Srdivacky if (p->error != 0) 1460198090Srdivacky return; 1461198090Srdivacky 1462198090Srdivacky /* find the longest OCHAR sequence in strip */ 1463198090Srdivacky newlen = 0; 1464198090Srdivacky scan = g->strip + 1; 1465198090Srdivacky do { 1466198090Srdivacky s = *scan++; 1467198090Srdivacky switch (OP(s)) { 1468198090Srdivacky case OCHAR: /* sequence member */ 1469198090Srdivacky if (newlen == 0) /* new sequence */ 1470198090Srdivacky newstart = scan - 1; 1471198090Srdivacky newlen++; 1472198090Srdivacky break; 1473198090Srdivacky case OPLUS_: /* things that don't break one */ 1474198090Srdivacky case OLPAREN: 1475198090Srdivacky case ORPAREN: 1476198090Srdivacky break; 1477198090Srdivacky case OQUEST_: /* things that must be skipped */ 1478198090Srdivacky case OCH_: 1479198090Srdivacky scan--; 1480198090Srdivacky do { 1481198090Srdivacky scan += OPND(s); 1482198090Srdivacky s = *scan; 1483198090Srdivacky /* assert() interferes w debug printouts */ 1484198090Srdivacky if (OP(s) != O_QUEST && OP(s) != O_CH && 1485198090Srdivacky OP(s) != OOR2) { 1486198090Srdivacky g->iflags |= REGEX_BAD; 1487198090Srdivacky return; 1488198090Srdivacky } 1489198090Srdivacky } while (OP(s) != O_QUEST && OP(s) != O_CH); 1490198090Srdivacky /* fallthrough */ 1491198090Srdivacky default: /* things that break a sequence */ 1492198090Srdivacky if (newlen > g->mlen) { /* ends one */ 1493198090Srdivacky start = newstart; 1494198090Srdivacky g->mlen = newlen; 1495198090Srdivacky } 1496198090Srdivacky newlen = 0; 1497198090Srdivacky break; 1498198090Srdivacky } 1499198090Srdivacky } while (OP(s) != OEND); 1500198090Srdivacky 1501198090Srdivacky if (g->mlen == 0) /* there isn't one */ 1502198090Srdivacky return; 1503198090Srdivacky 1504198090Srdivacky /* turn it into a character string */ 1505198090Srdivacky g->must = malloc((size_t)g->mlen + 1); 1506198090Srdivacky if (g->must == NULL) { /* argh; just forget it */ 1507198090Srdivacky g->mlen = 0; 1508198090Srdivacky return; 1509198090Srdivacky } 1510198090Srdivacky cp = g->must; 1511198090Srdivacky scan = start; 1512198090Srdivacky for (i = g->mlen; i > 0; i--) { 1513198090Srdivacky while (OP(s = *scan++) != OCHAR) 1514198090Srdivacky continue; 1515198090Srdivacky assert(cp < g->must + g->mlen); 1516198090Srdivacky *cp++ = (char)OPND(s); 1517198090Srdivacky } 1518198090Srdivacky assert(cp == g->must + g->mlen); 1519198090Srdivacky *cp++ = '\0'; /* just on general principles */ 1520198090Srdivacky} 1521198090Srdivacky 1522198090Srdivacky/* 1523198090Srdivacky - pluscount - count + nesting 1524198090Srdivacky */ 1525198090Srdivackystatic sopno /* nesting depth */ 1526198090Srdivackypluscount(struct parse *p, struct re_guts *g) 1527198090Srdivacky{ 1528198090Srdivacky sop *scan; 1529198090Srdivacky sop s; 1530198090Srdivacky sopno plusnest = 0; 1531198090Srdivacky sopno maxnest = 0; 1532198090Srdivacky 1533198090Srdivacky if (p->error != 0) 1534198090Srdivacky return(0); /* there may not be an OEND */ 1535198090Srdivacky 1536198090Srdivacky scan = g->strip + 1; 1537198090Srdivacky do { 1538198090Srdivacky s = *scan++; 1539198090Srdivacky switch (OP(s)) { 1540198090Srdivacky case OPLUS_: 1541198090Srdivacky plusnest++; 1542198090Srdivacky break; 1543198090Srdivacky case O_PLUS: 1544198090Srdivacky if (plusnest > maxnest) 1545198090Srdivacky maxnest = plusnest; 1546198090Srdivacky plusnest--; 1547198090Srdivacky break; 1548198090Srdivacky } 1549198090Srdivacky } while (OP(s) != OEND); 1550198090Srdivacky if (plusnest != 0) 1551198090Srdivacky g->iflags |= REGEX_BAD; 1552198090Srdivacky return(maxnest); 1553198090Srdivacky} 1554