1238730Sdelphij/* 2238730Sdelphij * Copyright (C) 1984-2012 Mark Nudelman 3238730Sdelphij * 4238730Sdelphij * You may distribute under the terms of either the GNU General Public 5238730Sdelphij * License or the Less License, as specified in the README file. 6238730Sdelphij * 7238730Sdelphij * For more information, see the README file. 8238730Sdelphij */ 9195902Sdelphij 10195902Sdelphij/* 11195902Sdelphij * Routines to do pattern matching. 12195902Sdelphij */ 13195902Sdelphij 14195902Sdelphij#include "less.h" 15195902Sdelphij#include "pattern.h" 16195902Sdelphij 17195902Sdelphijextern int caseless; 18195902Sdelphij 19195902Sdelphij/* 20195902Sdelphij * Compile a search pattern, for future use by match_pattern. 21195902Sdelphij */ 22195902Sdelphij static int 23195902Sdelphijcompile_pattern2(pattern, search_type, comp_pattern) 24195902Sdelphij char *pattern; 25195902Sdelphij int search_type; 26195902Sdelphij void **comp_pattern; 27195902Sdelphij{ 28237613Sdelphij if (search_type & SRCH_NO_REGEX) 29237613Sdelphij return (0); 30237613Sdelphij { 31237613Sdelphij#if HAVE_GNU_REGEX 32237613Sdelphij struct re_pattern_buffer *comp = (struct re_pattern_buffer *) 33237613Sdelphij ecalloc(1, sizeof(struct re_pattern_buffer)); 34237613Sdelphij struct re_pattern_buffer **pcomp = 35237613Sdelphij (struct re_pattern_buffer **) comp_pattern; 36237613Sdelphij re_set_syntax(RE_SYNTAX_POSIX_EXTENDED); 37237613Sdelphij if (re_compile_pattern(pattern, strlen(pattern), comp)) 38195902Sdelphij { 39237613Sdelphij free(comp); 40237613Sdelphij error("Invalid pattern", NULL_PARG); 41237613Sdelphij return (-1); 42237613Sdelphij } 43237613Sdelphij if (*pcomp != NULL) 44237613Sdelphij regfree(*pcomp); 45237613Sdelphij *pcomp = comp; 46237613Sdelphij#endif 47195902Sdelphij#if HAVE_POSIX_REGCOMP 48237613Sdelphij regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t)); 49237613Sdelphij regex_t **pcomp = (regex_t **) comp_pattern; 50237613Sdelphij if (regcomp(comp, pattern, REGCOMP_FLAG)) 51237613Sdelphij { 52237613Sdelphij free(comp); 53237613Sdelphij error("Invalid pattern", NULL_PARG); 54237613Sdelphij return (-1); 55237613Sdelphij } 56237613Sdelphij if (*pcomp != NULL) 57237613Sdelphij regfree(*pcomp); 58237613Sdelphij *pcomp = comp; 59195902Sdelphij#endif 60195902Sdelphij#if HAVE_PCRE 61237613Sdelphij pcre *comp; 62237613Sdelphij pcre **pcomp = (pcre **) comp_pattern; 63237613Sdelphij constant char *errstring; 64237613Sdelphij int erroffset; 65237613Sdelphij PARG parg; 66237613Sdelphij comp = pcre_compile(pattern, 0, 67237613Sdelphij &errstring, &erroffset, NULL); 68237613Sdelphij if (comp == NULL) 69237613Sdelphij { 70237613Sdelphij parg.p_string = (char *) errstring; 71237613Sdelphij error("%s", &parg); 72237613Sdelphij return (-1); 73237613Sdelphij } 74237613Sdelphij *pcomp = comp; 75195902Sdelphij#endif 76195902Sdelphij#if HAVE_RE_COMP 77237613Sdelphij PARG parg; 78237613Sdelphij int *pcomp = (int *) comp_pattern; 79237613Sdelphij if ((parg.p_string = re_comp(pattern)) != NULL) 80237613Sdelphij { 81237613Sdelphij error("%s", &parg); 82237613Sdelphij return (-1); 83237613Sdelphij } 84237613Sdelphij *pcomp = 1; 85195902Sdelphij#endif 86195902Sdelphij#if HAVE_REGCMP 87237613Sdelphij char *comp; 88237613Sdelphij char **pcomp = (char **) comp_pattern; 89237613Sdelphij if ((comp = regcmp(pattern, 0)) == NULL) 90237613Sdelphij { 91237613Sdelphij error("Invalid pattern", NULL_PARG); 92237613Sdelphij return (-1); 93237613Sdelphij } 94237613Sdelphij if (pcomp != NULL) 95237613Sdelphij free(*pcomp); 96237613Sdelphij *pcomp = comp; 97195902Sdelphij#endif 98195902Sdelphij#if HAVE_V8_REGCOMP 99237613Sdelphij struct regexp *comp; 100237613Sdelphij struct regexp **pcomp = (struct regexp **) comp_pattern; 101237613Sdelphij if ((comp = regcomp(pattern)) == NULL) 102237613Sdelphij { 103237613Sdelphij /* 104237613Sdelphij * regcomp has already printed an error message 105237613Sdelphij * via regerror(). 106237613Sdelphij */ 107237613Sdelphij return (-1); 108237613Sdelphij } 109237613Sdelphij if (*pcomp != NULL) 110237613Sdelphij free(*pcomp); 111237613Sdelphij *pcomp = comp; 112195902Sdelphij#endif 113237613Sdelphij } 114195902Sdelphij return (0); 115195902Sdelphij} 116195902Sdelphij 117195902Sdelphij/* 118195902Sdelphij * Like compile_pattern2, but convert the pattern to lowercase if necessary. 119195902Sdelphij */ 120195902Sdelphij public int 121195902Sdelphijcompile_pattern(pattern, search_type, comp_pattern) 122195902Sdelphij char *pattern; 123195902Sdelphij int search_type; 124195902Sdelphij void **comp_pattern; 125195902Sdelphij{ 126195902Sdelphij char *cvt_pattern; 127195902Sdelphij int result; 128195902Sdelphij 129195902Sdelphij if (caseless != OPT_ONPLUS) 130195902Sdelphij cvt_pattern = pattern; 131195902Sdelphij else 132195902Sdelphij { 133195902Sdelphij cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC)); 134195902Sdelphij cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC); 135195902Sdelphij } 136195902Sdelphij result = compile_pattern2(cvt_pattern, search_type, comp_pattern); 137195902Sdelphij if (cvt_pattern != pattern) 138195902Sdelphij free(cvt_pattern); 139195902Sdelphij return (result); 140195902Sdelphij} 141195902Sdelphij 142195902Sdelphij/* 143195902Sdelphij * Forget that we have a compiled pattern. 144195902Sdelphij */ 145195902Sdelphij public void 146195902Sdelphijuncompile_pattern(pattern) 147195902Sdelphij void **pattern; 148195902Sdelphij{ 149237613Sdelphij#if HAVE_GNU_REGEX 150237613Sdelphij struct re_pattern_buffer **pcomp = (struct re_pattern_buffer **) pattern; 151237613Sdelphij if (*pcomp != NULL) 152237613Sdelphij regfree(*pcomp); 153237613Sdelphij *pcomp = NULL; 154237613Sdelphij#endif 155195902Sdelphij#if HAVE_POSIX_REGCOMP 156195902Sdelphij regex_t **pcomp = (regex_t **) pattern; 157195902Sdelphij if (*pcomp != NULL) 158195902Sdelphij regfree(*pcomp); 159195902Sdelphij *pcomp = NULL; 160195902Sdelphij#endif 161195902Sdelphij#if HAVE_PCRE 162195902Sdelphij pcre **pcomp = (pcre **) pattern; 163195902Sdelphij if (*pcomp != NULL) 164195902Sdelphij pcre_free(*pcomp); 165195902Sdelphij *pcomp = NULL; 166195902Sdelphij#endif 167195902Sdelphij#if HAVE_RE_COMP 168195902Sdelphij int *pcomp = (int *) pattern; 169195902Sdelphij *pcomp = 0; 170195902Sdelphij#endif 171195902Sdelphij#if HAVE_REGCMP 172195902Sdelphij char **pcomp = (char **) pattern; 173195902Sdelphij if (*pcomp != NULL) 174195902Sdelphij free(*pcomp); 175195902Sdelphij *pcomp = NULL; 176195902Sdelphij#endif 177195902Sdelphij#if HAVE_V8_REGCOMP 178195902Sdelphij struct regexp **pcomp = (struct regexp **) pattern; 179195902Sdelphij if (*pcomp != NULL) 180195902Sdelphij free(*pcomp); 181195902Sdelphij *pcomp = NULL; 182195902Sdelphij#endif 183195902Sdelphij} 184195902Sdelphij 185195902Sdelphij/* 186195902Sdelphij * Is a compiled pattern null? 187195902Sdelphij */ 188195902Sdelphij public int 189195902Sdelphijis_null_pattern(pattern) 190195902Sdelphij void *pattern; 191195902Sdelphij{ 192237613Sdelphij#if HAVE_GNU_REGEX 193237613Sdelphij return (pattern == NULL); 194237613Sdelphij#endif 195195902Sdelphij#if HAVE_POSIX_REGCOMP 196195902Sdelphij return (pattern == NULL); 197195902Sdelphij#endif 198195902Sdelphij#if HAVE_PCRE 199195902Sdelphij return (pattern == NULL); 200195902Sdelphij#endif 201195902Sdelphij#if HAVE_RE_COMP 202195902Sdelphij return (pattern == 0); 203195902Sdelphij#endif 204195902Sdelphij#if HAVE_REGCMP 205195902Sdelphij return (pattern == NULL); 206195902Sdelphij#endif 207195902Sdelphij#if HAVE_V8_REGCOMP 208195902Sdelphij return (pattern == NULL); 209195902Sdelphij#endif 210195902Sdelphij} 211195902Sdelphij 212195902Sdelphij/* 213195902Sdelphij * Simple pattern matching function. 214195902Sdelphij * It supports no metacharacters like *, etc. 215195902Sdelphij */ 216195902Sdelphij static int 217195902Sdelphijmatch(pattern, pattern_len, buf, buf_len, pfound, pend) 218195902Sdelphij char *pattern; 219195902Sdelphij int pattern_len; 220195902Sdelphij char *buf; 221195902Sdelphij int buf_len; 222195902Sdelphij char **pfound, **pend; 223195902Sdelphij{ 224195902Sdelphij register char *pp, *lp; 225195902Sdelphij register char *pattern_end = pattern + pattern_len; 226195902Sdelphij register char *buf_end = buf + buf_len; 227195902Sdelphij 228195902Sdelphij for ( ; buf < buf_end; buf++) 229195902Sdelphij { 230195902Sdelphij for (pp = pattern, lp = buf; *pp == *lp; pp++, lp++) 231195902Sdelphij if (pp == pattern_end || lp == buf_end) 232195902Sdelphij break; 233195902Sdelphij if (pp == pattern_end) 234195902Sdelphij { 235195902Sdelphij if (pfound != NULL) 236195902Sdelphij *pfound = buf; 237195902Sdelphij if (pend != NULL) 238195902Sdelphij *pend = lp; 239195902Sdelphij return (1); 240195902Sdelphij } 241195902Sdelphij } 242195902Sdelphij return (0); 243195902Sdelphij} 244195902Sdelphij 245195902Sdelphij/* 246195902Sdelphij * Perform a pattern match with the previously compiled pattern. 247195902Sdelphij * Set sp and ep to the start and end of the matched string. 248195902Sdelphij */ 249195902Sdelphij public int 250195902Sdelphijmatch_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type) 251195902Sdelphij void *pattern; 252195902Sdelphij char *tpattern; 253195902Sdelphij char *line; 254195902Sdelphij int line_len; 255195902Sdelphij char **sp; 256195902Sdelphij char **ep; 257195902Sdelphij int notbol; 258195902Sdelphij int search_type; 259195902Sdelphij{ 260195902Sdelphij int matched; 261237613Sdelphij#if HAVE_GNU_REGEX 262237613Sdelphij struct re_pattern_buffer *spattern = (struct re_pattern_buffer *) pattern; 263237613Sdelphij#endif 264195902Sdelphij#if HAVE_POSIX_REGCOMP 265195902Sdelphij regex_t *spattern = (regex_t *) pattern; 266195902Sdelphij#endif 267195902Sdelphij#if HAVE_PCRE 268195902Sdelphij pcre *spattern = (pcre *) pattern; 269195902Sdelphij#endif 270195902Sdelphij#if HAVE_RE_COMP 271195902Sdelphij int spattern = (int) pattern; 272195902Sdelphij#endif 273195902Sdelphij#if HAVE_REGCMP 274195902Sdelphij char *spattern = (char *) pattern; 275195902Sdelphij#endif 276195902Sdelphij#if HAVE_V8_REGCOMP 277195902Sdelphij struct regexp *spattern = (struct regexp *) pattern; 278195902Sdelphij#endif 279195902Sdelphij 280237613Sdelphij#if NO_REGEX 281237613Sdelphij search_type |= SRCH_NO_REGEX; 282237613Sdelphij#endif 283195902Sdelphij if (search_type & SRCH_NO_REGEX) 284195902Sdelphij matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep); 285221715Sdelphij else 286221715Sdelphij { 287237613Sdelphij#if HAVE_GNU_REGEX 288237613Sdelphij { 289237613Sdelphij struct re_registers search_regs; 290237613Sdelphij regoff_t *starts = (regoff_t *) ecalloc(1, sizeof (regoff_t)); 291237613Sdelphij regoff_t *ends = (regoff_t *) ecalloc(1, sizeof (regoff_t)); 292237613Sdelphij spattern->not_bol = notbol; 293237613Sdelphij re_set_registers(spattern, &search_regs, 1, starts, ends); 294237613Sdelphij matched = re_search(spattern, line, line_len, 0, line_len, &search_regs) >= 0; 295237613Sdelphij if (matched) 296237613Sdelphij { 297237613Sdelphij *sp = line + search_regs.start[0]; 298237613Sdelphij *ep = line + search_regs.end[0]; 299237613Sdelphij } 300237613Sdelphij free(starts); 301237613Sdelphij free(ends); 302237613Sdelphij } 303237613Sdelphij#endif 304195902Sdelphij#if HAVE_POSIX_REGCOMP 305195902Sdelphij { 306195902Sdelphij regmatch_t rm; 307195902Sdelphij int flags = (notbol) ? REG_NOTBOL : 0; 308195902Sdelphij matched = !regexec(spattern, line, 1, &rm, flags); 309195902Sdelphij if (matched) 310195902Sdelphij { 311195902Sdelphij#ifndef __WATCOMC__ 312195902Sdelphij *sp = line + rm.rm_so; 313195902Sdelphij *ep = line + rm.rm_eo; 314195902Sdelphij#else 315195902Sdelphij *sp = rm.rm_sp; 316195902Sdelphij *ep = rm.rm_ep; 317195902Sdelphij#endif 318195902Sdelphij } 319195902Sdelphij } 320195902Sdelphij#endif 321195902Sdelphij#if HAVE_PCRE 322195902Sdelphij { 323195902Sdelphij int flags = (notbol) ? PCRE_NOTBOL : 0; 324195902Sdelphij int ovector[3]; 325195902Sdelphij matched = pcre_exec(spattern, NULL, line, line_len, 326195902Sdelphij 0, flags, ovector, 3) >= 0; 327195902Sdelphij if (matched) 328195902Sdelphij { 329195902Sdelphij *sp = line + ovector[0]; 330195902Sdelphij *ep = line + ovector[1]; 331195902Sdelphij } 332195902Sdelphij } 333195902Sdelphij#endif 334195902Sdelphij#if HAVE_RE_COMP 335195902Sdelphij matched = (re_exec(line) == 1); 336195902Sdelphij /* 337195902Sdelphij * re_exec doesn't seem to provide a way to get the matched string. 338195902Sdelphij */ 339195902Sdelphij *sp = *ep = NULL; 340195902Sdelphij#endif 341195902Sdelphij#if HAVE_REGCMP 342195902Sdelphij *ep = regex(spattern, line); 343195902Sdelphij matched = (*ep != NULL); 344195902Sdelphij if (matched) 345195902Sdelphij *sp = __loc1; 346195902Sdelphij#endif 347195902Sdelphij#if HAVE_V8_REGCOMP 348195902Sdelphij#if HAVE_REGEXEC2 349195902Sdelphij matched = regexec2(spattern, line, notbol); 350195902Sdelphij#else 351195902Sdelphij matched = regexec(spattern, line); 352195902Sdelphij#endif 353195902Sdelphij if (matched) 354195902Sdelphij { 355195902Sdelphij *sp = spattern->startp[0]; 356195902Sdelphij *ep = spattern->endp[0]; 357195902Sdelphij } 358195902Sdelphij#endif 359221715Sdelphij } 360195902Sdelphij matched = (!(search_type & SRCH_NO_MATCH) && matched) || 361195902Sdelphij ((search_type & SRCH_NO_MATCH) && !matched); 362195902Sdelphij return (matched); 363195902Sdelphij} 364195902Sdelphij 365