192986Sobrien#include <sys/cdefs.h> 292986Sobrien__FBSDID("$FreeBSD$"); 392986Sobrien 4292144Sngie#include <sys/types.h> 5292144Sngie#include <assert.h> 6292144Sngie#include <regex.h> 762856Sdcs#include <stdio.h> 8292144Sngie#include <stdlib.h> 962856Sdcs#include <string.h> 10292144Sngie#include <unistd.h> 1162856Sdcs 12292170Sngie#include "debug.ih" 1362856Sdcs#include "main.ih" 14292170Sngie#include "split.ih" 1562856Sdcs 1662856Sdcschar *progname; 1762856Sdcsint debug = 0; 1862856Sdcsint line = 0; 1962856Sdcsint status = 0; 2062856Sdcs 2162856Sdcsint copts = REG_EXTENDED; 2262856Sdcsint eopts = 0; 2362856Sdcsregoff_t startoff = 0; 2462856Sdcsregoff_t endoff = 0; 2562856Sdcs 2662856Sdcs 2762856Sdcs/* 2862856Sdcs - main - do the simple case, hand off to regress() for regression 2962856Sdcs */ 30292144Sngieint 31292144Sngiemain(int argc, char **argv) 3262856Sdcs{ 3362856Sdcs regex_t re; 3462856Sdcs# define NS 10 3562856Sdcs regmatch_t subs[NS]; 3662856Sdcs char erbuf[100]; 3762856Sdcs int err; 3862856Sdcs size_t len; 3962856Sdcs int c; 4062856Sdcs int errflg = 0; 4192889Sobrien int i; 4262856Sdcs extern int optind; 4362856Sdcs extern char *optarg; 4462856Sdcs 4562856Sdcs progname = argv[0]; 4662856Sdcs 47176380Skevlo while ((c = getopt(argc, argv, "c:e:S:E:x")) != -1) 4862856Sdcs switch (c) { 4962856Sdcs case 'c': /* compile options */ 5062856Sdcs copts = options('c', optarg); 5162856Sdcs break; 5262856Sdcs case 'e': /* execute options */ 5362856Sdcs eopts = options('e', optarg); 5462856Sdcs break; 5562856Sdcs case 'S': /* start offset */ 5662856Sdcs startoff = (regoff_t)atoi(optarg); 5762856Sdcs break; 5862856Sdcs case 'E': /* end offset */ 5962856Sdcs endoff = (regoff_t)atoi(optarg); 6062856Sdcs break; 6162856Sdcs case 'x': /* Debugging. */ 6262856Sdcs debug++; 6362856Sdcs break; 6462856Sdcs case '?': 6562856Sdcs default: 6662856Sdcs errflg++; 6762856Sdcs break; 6862856Sdcs } 6962856Sdcs if (errflg) { 7062856Sdcs fprintf(stderr, "usage: %s ", progname); 7162856Sdcs fprintf(stderr, "[-c copt][-C][-d] [re]\n"); 7262856Sdcs exit(2); 7362856Sdcs } 7462856Sdcs 7562856Sdcs if (optind >= argc) { 7662856Sdcs regress(stdin); 7762856Sdcs exit(status); 7862856Sdcs } 7962856Sdcs 8062856Sdcs err = regcomp(&re, argv[optind++], copts); 8162856Sdcs if (err) { 8262856Sdcs len = regerror(err, &re, erbuf, sizeof(erbuf)); 83292144Sngie fprintf(stderr, "error %s, %zu/%zu `%s'\n", 84292144Sngie eprint(err), len, sizeof(erbuf), erbuf); 8562856Sdcs exit(status); 8662856Sdcs } 87292144Sngie regprint(&re, stdout); 8862856Sdcs 8962856Sdcs if (optind >= argc) { 9062856Sdcs regfree(&re); 9162856Sdcs exit(status); 9262856Sdcs } 9362856Sdcs 94292144Sngie if ((eopts & REG_STARTEND) != 0) { 9562856Sdcs subs[0].rm_so = startoff; 9662856Sdcs subs[0].rm_eo = strlen(argv[optind]) - endoff; 9762856Sdcs } 9862856Sdcs err = regexec(&re, argv[optind], (size_t)NS, subs, eopts); 9962856Sdcs if (err) { 10062856Sdcs len = regerror(err, &re, erbuf, sizeof(erbuf)); 101292144Sngie fprintf(stderr, "error %s, %zu/%zu `%s'\n", 102292144Sngie eprint(err), len, sizeof(erbuf), erbuf); 10362856Sdcs exit(status); 10462856Sdcs } 105292144Sngie if ((copts & REG_NOSUB) == 0) { 10662856Sdcs len = (int)(subs[0].rm_eo - subs[0].rm_so); 10762856Sdcs if (subs[0].rm_so != -1) { 10862856Sdcs if (len != 0) 109292144Sngie printf("match `%.*s'\n", (int)len, 110292144Sngie argv[optind] + subs[0].rm_so); 11162856Sdcs else 11262856Sdcs printf("match `'@%.1s\n", 113292144Sngie argv[optind] + subs[0].rm_so); 11462856Sdcs } 11562856Sdcs for (i = 1; i < NS; i++) 11662856Sdcs if (subs[i].rm_so != -1) 11762856Sdcs printf("(%d) `%.*s'\n", i, 118292144Sngie (int)(subs[i].rm_eo - subs[i].rm_so), 119292144Sngie argv[optind] + subs[i].rm_so); 12062856Sdcs } 12162856Sdcs exit(status); 12262856Sdcs} 12362856Sdcs 12462856Sdcs/* 12562856Sdcs - regress - main loop of regression test 12662856Sdcs == void regress(FILE *in); 12762856Sdcs */ 12862856Sdcsvoid 129292144Sngieregress(FILE *in) 13062856Sdcs{ 13162856Sdcs char inbuf[1000]; 13262856Sdcs# define MAXF 10 13362856Sdcs char *f[MAXF]; 13462856Sdcs int nf; 13562856Sdcs int i; 13662856Sdcs char erbuf[100]; 13762856Sdcs size_t ne; 13862856Sdcs char *badpat = "invalid regular expression"; 13962856Sdcs# define SHORT 10 14062856Sdcs char *bpname = "REG_BADPAT"; 14162856Sdcs regex_t re; 14262856Sdcs 14362856Sdcs while (fgets(inbuf, sizeof(inbuf), in) != NULL) { 14462856Sdcs line++; 14562856Sdcs if (inbuf[0] == '#' || inbuf[0] == '\n') 14662856Sdcs continue; /* NOTE CONTINUE */ 14762856Sdcs inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */ 14862856Sdcs if (debug) 14962856Sdcs fprintf(stdout, "%d:\n", line); 15062856Sdcs nf = split(inbuf, f, MAXF, "\t\t"); 15162856Sdcs if (nf < 3) { 15262856Sdcs fprintf(stderr, "bad input, line %d\n", line); 15362856Sdcs exit(1); 15462856Sdcs } 15562856Sdcs for (i = 0; i < nf; i++) 15662856Sdcs if (strcmp(f[i], "\"\"") == 0) 15762856Sdcs f[i] = ""; 15862856Sdcs if (nf <= 3) 15962856Sdcs f[3] = NULL; 16062856Sdcs if (nf <= 4) 16162856Sdcs f[4] = NULL; 16262856Sdcs try(f[0], f[1], f[2], f[3], f[4], options('c', f[1])); 16362856Sdcs if (opt('&', f[1])) /* try with either type of RE */ 16462856Sdcs try(f[0], f[1], f[2], f[3], f[4], 16562856Sdcs options('c', f[1]) &~ REG_EXTENDED); 16662856Sdcs } 16762856Sdcs 16862856Sdcs ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); 16962856Sdcs if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) { 17062856Sdcs fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n", 17162856Sdcs erbuf, badpat); 17262856Sdcs status = 1; 17362856Sdcs } 17462856Sdcs ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT); 17562856Sdcs if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' || 176292144Sngie ne != strlen(badpat)+1) { 17762856Sdcs fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n", 17862856Sdcs erbuf, SHORT-1, badpat); 17962856Sdcs status = 1; 18062856Sdcs } 18162856Sdcs ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); 182292144Sngie if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname) + 1) { 18362856Sdcs fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n", 18462856Sdcs erbuf, bpname); 18562856Sdcs status = 1; 18662856Sdcs } 18762856Sdcs re.re_endp = bpname; 18862856Sdcs ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf)); 18962856Sdcs if (atoi(erbuf) != (int)REG_BADPAT) { 19062856Sdcs fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n", 19162856Sdcs erbuf, (long)REG_BADPAT); 19262856Sdcs status = 1; 193292144Sngie } else if (ne != strlen(erbuf) + 1) { 19462856Sdcs fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n", 19562856Sdcs erbuf, (long)REG_BADPAT); 19662856Sdcs status = 1; 19762856Sdcs } 19862856Sdcs} 19962856Sdcs 20062856Sdcs/* 20162856Sdcs - try - try it, and report on problems 20262856Sdcs == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts); 203292144Sngie - opts: may not match f1 20462856Sdcs */ 20562856Sdcsvoid 206292144Sngietry(char *f0, char *f1, char *f2, char *f3, char *f4, int opts) 20762856Sdcs{ 20862856Sdcs regex_t re; 20962856Sdcs# define NSUBS 10 21062856Sdcs regmatch_t subs[NSUBS]; 21162856Sdcs# define NSHOULD 15 21262856Sdcs char *should[NSHOULD]; 21362856Sdcs char erbuf[100]; 214292144Sngie size_t len; 215292144Sngie int err, i, nshould; 216292144Sngie char *grump; 21762856Sdcs char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE"; 21862856Sdcs char f0copy[1000]; 21962856Sdcs char f2copy[1000]; 22062856Sdcs 22162856Sdcs strcpy(f0copy, f0); 22262856Sdcs re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL; 22362856Sdcs fixstr(f0copy); 22462856Sdcs err = regcomp(&re, f0copy, opts); 22562856Sdcs if (err != 0 && (!opt('C', f1) || err != efind(f2))) { 22662856Sdcs /* unexpected error or wrong error */ 22762856Sdcs len = regerror(err, &re, erbuf, sizeof(erbuf)); 228292144Sngie fprintf(stderr, "%d: %s error %s, %zu/%zu `%s'\n", 229292144Sngie line, type, eprint(err), len, sizeof(erbuf), erbuf); 23062856Sdcs status = 1; 23162856Sdcs } else if (err == 0 && opt('C', f1)) { 23262856Sdcs /* unexpected success */ 23362856Sdcs fprintf(stderr, "%d: %s should have given REG_%s\n", 23462856Sdcs line, type, f2); 23562856Sdcs status = 1; 23662856Sdcs err = 1; /* so we won't try regexec */ 23762856Sdcs } 23862856Sdcs 23962856Sdcs if (err != 0) { 24062856Sdcs regfree(&re); 24162856Sdcs return; 24262856Sdcs } 24362856Sdcs 24462856Sdcs strcpy(f2copy, f2); 24562856Sdcs fixstr(f2copy); 24662856Sdcs 24762856Sdcs if (options('e', f1)®_STARTEND) { 24862856Sdcs if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL) 24962856Sdcs fprintf(stderr, "%d: bad STARTEND syntax\n", line); 25062856Sdcs subs[0].rm_so = strchr(f2, '(') - f2 + 1; 25162856Sdcs subs[0].rm_eo = strchr(f2, ')') - f2; 25262856Sdcs } 25362856Sdcs err = regexec(&re, f2copy, NSUBS, subs, options('e', f1)); 25462856Sdcs 25562856Sdcs if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) { 25662856Sdcs /* unexpected error or wrong error */ 25762856Sdcs len = regerror(err, &re, erbuf, sizeof(erbuf)); 258292144Sngie fprintf(stderr, "%d: %s exec error %s, %zu/%zu `%s'\n", 259292144Sngie line, type, eprint(err), len, sizeof(erbuf), erbuf); 26062856Sdcs status = 1; 26162856Sdcs } else if (err != 0) { 26262856Sdcs /* nothing more to check */ 26362856Sdcs } else if (f3 == NULL) { 26462856Sdcs /* unexpected success */ 26562856Sdcs fprintf(stderr, "%d: %s exec should have failed\n", 266292144Sngie line, type); 26762856Sdcs status = 1; 26862856Sdcs err = 1; /* just on principle */ 26962856Sdcs } else if (opts®_NOSUB) { 27062856Sdcs /* nothing more to check */ 27162856Sdcs } else if ((grump = check(f2, subs[0], f3)) != NULL) { 27262856Sdcs fprintf(stderr, "%d: %s %s\n", line, type, grump); 27362856Sdcs status = 1; 27462856Sdcs err = 1; 27562856Sdcs } 27662856Sdcs 27762856Sdcs if (err != 0 || f4 == NULL) { 27862856Sdcs regfree(&re); 27962856Sdcs return; 28062856Sdcs } 28162856Sdcs 28262856Sdcs for (i = 1; i < NSHOULD; i++) 28362856Sdcs should[i] = NULL; 28462856Sdcs nshould = split(f4, should+1, NSHOULD-1, ","); 28562856Sdcs if (nshould == 0) { 28662856Sdcs nshould = 1; 28762856Sdcs should[1] = ""; 28862856Sdcs } 28962856Sdcs for (i = 1; i < NSUBS; i++) { 29062856Sdcs grump = check(f2, subs[i], should[i]); 29162856Sdcs if (grump != NULL) { 29262856Sdcs fprintf(stderr, "%d: %s $%d %s\n", line, 293292144Sngie type, i, grump); 29462856Sdcs status = 1; 29562856Sdcs err = 1; 29662856Sdcs } 29762856Sdcs } 29862856Sdcs 29962856Sdcs regfree(&re); 30062856Sdcs} 30162856Sdcs 30262856Sdcs/* 30362856Sdcs - options - pick options out of a regression-test string 304292144Sngie - type: 'c' - compile, 'e' - exec 30562856Sdcs == int options(int type, char *s); 30662856Sdcs */ 30762856Sdcsint 308292144Sngieoptions(int type, char *s) 30962856Sdcs{ 31092889Sobrien char *p; 31192889Sobrien int o = (type == 'c') ? copts : eopts; 31292889Sobrien char *legal = (type == 'c') ? "bisnmp" : "^$#tl"; 31362856Sdcs 31462856Sdcs for (p = s; *p != '\0'; p++) 31562856Sdcs if (strchr(legal, *p) != NULL) 31662856Sdcs switch (*p) { 31762856Sdcs case 'b': 31862856Sdcs o &= ~REG_EXTENDED; 31962856Sdcs break; 32062856Sdcs case 'i': 32162856Sdcs o |= REG_ICASE; 32262856Sdcs break; 32362856Sdcs case 's': 32462856Sdcs o |= REG_NOSUB; 32562856Sdcs break; 32662856Sdcs case 'n': 32762856Sdcs o |= REG_NEWLINE; 32862856Sdcs break; 32962856Sdcs case 'm': 33062856Sdcs o &= ~REG_EXTENDED; 33162856Sdcs o |= REG_NOSPEC; 33262856Sdcs break; 33362856Sdcs case 'p': 33462856Sdcs o |= REG_PEND; 33562856Sdcs break; 33662856Sdcs case '^': 33762856Sdcs o |= REG_NOTBOL; 33862856Sdcs break; 33962856Sdcs case '$': 34062856Sdcs o |= REG_NOTEOL; 34162856Sdcs break; 34262856Sdcs case '#': 34362856Sdcs o |= REG_STARTEND; 34462856Sdcs break; 34562856Sdcs case 't': /* trace */ 34662856Sdcs o |= REG_TRACE; 34762856Sdcs break; 34862856Sdcs case 'l': /* force long representation */ 34962856Sdcs o |= REG_LARGE; 35062856Sdcs break; 35162856Sdcs case 'r': /* force backref use */ 35262856Sdcs o |= REG_BACKR; 35362856Sdcs break; 35462856Sdcs } 35562856Sdcs return(o); 35662856Sdcs} 35762856Sdcs 35862856Sdcs/* 35962856Sdcs - opt - is a particular option in a regression string? 36062856Sdcs == int opt(int c, char *s); 36162856Sdcs */ 36262856Sdcsint /* predicate */ 363292144Sngieopt(int c, char *s) 36462856Sdcs{ 36562856Sdcs return(strchr(s, c) != NULL); 36662856Sdcs} 36762856Sdcs 36862856Sdcs/* 36962856Sdcs - fixstr - transform magic characters in strings 37092889Sobrien == void fixstr(char *p); 37162856Sdcs */ 37262856Sdcsvoid 373292144Sngiefixstr(char *p) 37462856Sdcs{ 37562856Sdcs if (p == NULL) 37662856Sdcs return; 37762856Sdcs 37862856Sdcs for (; *p != '\0'; p++) 37962856Sdcs if (*p == 'N') 38062856Sdcs *p = '\n'; 38162856Sdcs else if (*p == 'T') 38262856Sdcs *p = '\t'; 38362856Sdcs else if (*p == 'S') 38462856Sdcs *p = ' '; 38562856Sdcs else if (*p == 'Z') 38662856Sdcs *p = '\0'; 38762856Sdcs} 38862856Sdcs 38962856Sdcs/* 39062856Sdcs - check - check a substring match 39162856Sdcs == char *check(char *str, regmatch_t sub, char *should); 39262856Sdcs */ 39362856Sdcschar * /* NULL or complaint */ 394292144Sngiecheck(char *str, regmatch_t sub, char *should) 39562856Sdcs{ 39692889Sobrien int len; 39792889Sobrien int shlen; 39892889Sobrien char *p; 39962856Sdcs static char grump[500]; 40092889Sobrien char *at = NULL; 40162856Sdcs 40262856Sdcs if (should != NULL && strcmp(should, "-") == 0) 40362856Sdcs should = NULL; 40462856Sdcs if (should != NULL && should[0] == '@') { 40562856Sdcs at = should + 1; 40662856Sdcs should = ""; 40762856Sdcs } 40862856Sdcs 40962856Sdcs /* check rm_so and rm_eo for consistency */ 41062856Sdcs if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) || 41162856Sdcs (sub.rm_so != -1 && sub.rm_eo == -1) || 41262856Sdcs (sub.rm_so != -1 && sub.rm_so < 0) || 41362856Sdcs (sub.rm_eo != -1 && sub.rm_eo < 0) ) { 41462856Sdcs sprintf(grump, "start %ld end %ld", (long)sub.rm_so, 41562856Sdcs (long)sub.rm_eo); 41662856Sdcs return(grump); 41762856Sdcs } 41862856Sdcs 41962856Sdcs /* check for no match */ 42062856Sdcs if (sub.rm_so == -1 && should == NULL) 42162856Sdcs return(NULL); 42262856Sdcs if (sub.rm_so == -1) 42362856Sdcs return("did not match"); 42462856Sdcs 42562856Sdcs /* check for in range */ 42662856Sdcs if (sub.rm_eo > strlen(str)) { 42762856Sdcs sprintf(grump, "start %ld end %ld, past end of string", 428292144Sngie (long)sub.rm_so, (long)sub.rm_eo); 42962856Sdcs return(grump); 43062856Sdcs } 43162856Sdcs 43262856Sdcs len = (int)(sub.rm_eo - sub.rm_so); 43362856Sdcs shlen = (int)strlen(should); 43462856Sdcs p = str + sub.rm_so; 43562856Sdcs 43662856Sdcs /* check for not supposed to match */ 43762856Sdcs if (should == NULL) { 43862856Sdcs sprintf(grump, "matched `%.*s'", len, p); 43962856Sdcs return(grump); 44062856Sdcs } 44162856Sdcs 44262856Sdcs /* check for wrong match */ 44362856Sdcs if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) { 44462856Sdcs sprintf(grump, "matched `%.*s' instead", len, p); 44562856Sdcs return(grump); 44662856Sdcs } 44762856Sdcs if (shlen > 0) 44862856Sdcs return(NULL); 44962856Sdcs 45062856Sdcs /* check null match in right place */ 45162856Sdcs if (at == NULL) 45262856Sdcs return(NULL); 45362856Sdcs shlen = strlen(at); 45462856Sdcs if (shlen == 0) 45562856Sdcs shlen = 1; /* force check for end-of-string */ 45662856Sdcs if (strncmp(p, at, shlen) != 0) { 45762856Sdcs sprintf(grump, "matched null at `%.20s'", p); 45862856Sdcs return(grump); 45962856Sdcs } 46062856Sdcs return(NULL); 46162856Sdcs} 46262856Sdcs 46362856Sdcs/* 46462856Sdcs - eprint - convert error number to name 46562856Sdcs == static char *eprint(int err); 46662856Sdcs */ 46762856Sdcsstatic char * 468292144Sngieeprint(int err) 46962856Sdcs{ 47062856Sdcs static char epbuf[100]; 47162856Sdcs size_t len; 47262856Sdcs 47362856Sdcs len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf)); 47462856Sdcs assert(len <= sizeof(epbuf)); 47562856Sdcs return(epbuf); 47662856Sdcs} 47762856Sdcs 47862856Sdcs/* 47962856Sdcs - efind - convert error name to number 48062856Sdcs == static int efind(char *name); 48162856Sdcs */ 48262856Sdcsstatic int 483292144Sngieefind(char *name) 48462856Sdcs{ 48562856Sdcs static char efbuf[100]; 48662856Sdcs size_t n; 48762856Sdcs regex_t re; 48862856Sdcs 48962856Sdcs sprintf(efbuf, "REG_%s", name); 49062856Sdcs assert(strlen(efbuf) < sizeof(efbuf)); 49162856Sdcs re.re_endp = efbuf; 49262856Sdcs (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf)); 49362856Sdcs return(atoi(efbuf)); 49462856Sdcs} 495