1238730Sdelphij/* 2330571Sdelphij * Copyright (C) 1984-2017 Mark Nudelman 3238730Sdelphij * 4238730Sdelphij * You may distribute under the terms of either the GNU General Public 5238730Sdelphij * License or the Less License, as specified in the README file. 6238730Sdelphij * 7238730Sdelphij * For more information, see the README file. 8238730Sdelphij */ 9195902Sdelphij 10195902Sdelphij/* 11195902Sdelphij * Routines to do pattern matching. 12195902Sdelphij */ 13195902Sdelphij 14195902Sdelphij#include "less.h" 15195902Sdelphij 16195902Sdelphijextern int caseless; 17195902Sdelphij 18195902Sdelphij/* 19195902Sdelphij * Compile a search pattern, for future use by match_pattern. 20195902Sdelphij */ 21195902Sdelphij static int 22294286Sdelphijcompile_pattern2(pattern, search_type, comp_pattern, show_error) 23195902Sdelphij char *pattern; 24195902Sdelphij int search_type; 25330571Sdelphij PATTERN_TYPE *comp_pattern; 26294286Sdelphij int show_error; 27195902Sdelphij{ 28237613Sdelphij if (search_type & SRCH_NO_REGEX) 29237613Sdelphij return (0); 30237613Sdelphij { 31237613Sdelphij#if HAVE_GNU_REGEX 32237613Sdelphij struct re_pattern_buffer *comp = (struct re_pattern_buffer *) 33237613Sdelphij ecalloc(1, sizeof(struct re_pattern_buffer)); 34237613Sdelphij re_set_syntax(RE_SYNTAX_POSIX_EXTENDED); 35237613Sdelphij if (re_compile_pattern(pattern, strlen(pattern), comp)) 36195902Sdelphij { 37237613Sdelphij free(comp); 38294286Sdelphij if (show_error) 39294286Sdelphij error("Invalid pattern", NULL_PARG); 40237613Sdelphij return (-1); 41237613Sdelphij } 42330571Sdelphij if (*comp_pattern != NULL) 43330571Sdelphij { 44330571Sdelphij regfree(*comp_pattern); 45330571Sdelphij free(*comp_pattern); 46330571Sdelphij } 47330571Sdelphij *comp_pattern = comp; 48237613Sdelphij#endif 49195902Sdelphij#if HAVE_POSIX_REGCOMP 50237613Sdelphij regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t)); 51237613Sdelphij if (regcomp(comp, pattern, REGCOMP_FLAG)) 52237613Sdelphij { 53237613Sdelphij free(comp); 54294286Sdelphij if (show_error) 55294286Sdelphij error("Invalid pattern", NULL_PARG); 56237613Sdelphij return (-1); 57237613Sdelphij } 58330571Sdelphij if (*comp_pattern != NULL) 59330571Sdelphij { 60330571Sdelphij regfree(*comp_pattern); 61330571Sdelphij free(*comp_pattern); 62330571Sdelphij } 63330571Sdelphij *comp_pattern = comp; 64195902Sdelphij#endif 65195902Sdelphij#if HAVE_PCRE 66237613Sdelphij pcre *comp; 67237613Sdelphij constant char *errstring; 68237613Sdelphij int erroffset; 69237613Sdelphij PARG parg; 70237613Sdelphij comp = pcre_compile(pattern, 0, 71237613Sdelphij &errstring, &erroffset, NULL); 72237613Sdelphij if (comp == NULL) 73237613Sdelphij { 74237613Sdelphij parg.p_string = (char *) errstring; 75294286Sdelphij if (show_error) 76294286Sdelphij error("%s", &parg); 77237613Sdelphij return (-1); 78237613Sdelphij } 79330571Sdelphij *comp_pattern = comp; 80195902Sdelphij#endif 81195902Sdelphij#if HAVE_RE_COMP 82237613Sdelphij PARG parg; 83237613Sdelphij if ((parg.p_string = re_comp(pattern)) != NULL) 84237613Sdelphij { 85294286Sdelphij if (show_error) 86294286Sdelphij error("%s", &parg); 87237613Sdelphij return (-1); 88237613Sdelphij } 89330571Sdelphij *comp_pattern = 1; 90195902Sdelphij#endif 91195902Sdelphij#if HAVE_REGCMP 92237613Sdelphij char *comp; 93237613Sdelphij if ((comp = regcmp(pattern, 0)) == NULL) 94237613Sdelphij { 95294286Sdelphij if (show_error) 96294286Sdelphij error("Invalid pattern", NULL_PARG); 97237613Sdelphij return (-1); 98237613Sdelphij } 99330571Sdelphij if (comp_pattern != NULL) 100330571Sdelphij free(*comp_pattern); 101330571Sdelphij *comp_pattern = comp; 102195902Sdelphij#endif 103195902Sdelphij#if HAVE_V8_REGCOMP 104237613Sdelphij struct regexp *comp; 105294286Sdelphij reg_show_error = show_error; 106294286Sdelphij comp = regcomp(pattern); 107294286Sdelphij reg_show_error = 1; 108294286Sdelphij if (comp == NULL) 109237613Sdelphij { 110237613Sdelphij /* 111237613Sdelphij * regcomp has already printed an error message 112237613Sdelphij * via regerror(). 113237613Sdelphij */ 114237613Sdelphij return (-1); 115237613Sdelphij } 116330571Sdelphij if (*comp_pattern != NULL) 117330571Sdelphij free(*comp_pattern); 118330571Sdelphij *comp_pattern = comp; 119195902Sdelphij#endif 120237613Sdelphij } 121195902Sdelphij return (0); 122195902Sdelphij} 123195902Sdelphij 124195902Sdelphij/* 125195902Sdelphij * Like compile_pattern2, but convert the pattern to lowercase if necessary. 126195902Sdelphij */ 127195902Sdelphij public int 128195902Sdelphijcompile_pattern(pattern, search_type, comp_pattern) 129195902Sdelphij char *pattern; 130195902Sdelphij int search_type; 131330571Sdelphij PATTERN_TYPE *comp_pattern; 132195902Sdelphij{ 133195902Sdelphij char *cvt_pattern; 134195902Sdelphij int result; 135195902Sdelphij 136195902Sdelphij if (caseless != OPT_ONPLUS) 137195902Sdelphij cvt_pattern = pattern; 138195902Sdelphij else 139195902Sdelphij { 140195902Sdelphij cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC)); 141195902Sdelphij cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC); 142195902Sdelphij } 143294286Sdelphij result = compile_pattern2(cvt_pattern, search_type, comp_pattern, 1); 144195902Sdelphij if (cvt_pattern != pattern) 145195902Sdelphij free(cvt_pattern); 146195902Sdelphij return (result); 147195902Sdelphij} 148195902Sdelphij 149195902Sdelphij/* 150195902Sdelphij * Forget that we have a compiled pattern. 151195902Sdelphij */ 152195902Sdelphij public void 153195902Sdelphijuncompile_pattern(pattern) 154330571Sdelphij PATTERN_TYPE *pattern; 155195902Sdelphij{ 156237613Sdelphij#if HAVE_GNU_REGEX 157330571Sdelphij if (*pattern != NULL) 158330571Sdelphij { 159330571Sdelphij regfree(*pattern); 160330571Sdelphij free(*pattern); 161330571Sdelphij } 162330571Sdelphij *pattern = NULL; 163237613Sdelphij#endif 164195902Sdelphij#if HAVE_POSIX_REGCOMP 165330571Sdelphij if (*pattern != NULL) 166330571Sdelphij { 167330571Sdelphij regfree(*pattern); 168330571Sdelphij free(*pattern); 169330571Sdelphij } 170330571Sdelphij *pattern = NULL; 171195902Sdelphij#endif 172195902Sdelphij#if HAVE_PCRE 173330571Sdelphij if (*pattern != NULL) 174330571Sdelphij pcre_free(*pattern); 175330571Sdelphij *pattern = NULL; 176195902Sdelphij#endif 177195902Sdelphij#if HAVE_RE_COMP 178330571Sdelphij *pattern = 0; 179195902Sdelphij#endif 180195902Sdelphij#if HAVE_REGCMP 181330571Sdelphij if (*pattern != NULL) 182330571Sdelphij free(*pattern); 183330571Sdelphij *pattern = NULL; 184195902Sdelphij#endif 185195902Sdelphij#if HAVE_V8_REGCOMP 186330571Sdelphij if (*pattern != NULL) 187330571Sdelphij free(*pattern); 188330571Sdelphij *pattern = NULL; 189195902Sdelphij#endif 190195902Sdelphij} 191195902Sdelphij 192195902Sdelphij/* 193294286Sdelphij * Can a pattern be successfully compiled? 194294286Sdelphij */ 195294286Sdelphij public int 196294286Sdelphijvalid_pattern(pattern) 197294286Sdelphij char *pattern; 198294286Sdelphij{ 199330571Sdelphij PATTERN_TYPE comp_pattern; 200294286Sdelphij int result; 201294286Sdelphij 202294286Sdelphij CLEAR_PATTERN(comp_pattern); 203294286Sdelphij result = compile_pattern2(pattern, 0, &comp_pattern, 0); 204294286Sdelphij if (result != 0) 205294286Sdelphij return (0); 206294286Sdelphij uncompile_pattern(&comp_pattern); 207294286Sdelphij return (1); 208294286Sdelphij} 209294286Sdelphij 210294286Sdelphij/* 211195902Sdelphij * Is a compiled pattern null? 212195902Sdelphij */ 213195902Sdelphij public int 214195902Sdelphijis_null_pattern(pattern) 215330571Sdelphij PATTERN_TYPE pattern; 216195902Sdelphij{ 217237613Sdelphij#if HAVE_GNU_REGEX 218237613Sdelphij return (pattern == NULL); 219237613Sdelphij#endif 220195902Sdelphij#if HAVE_POSIX_REGCOMP 221195902Sdelphij return (pattern == NULL); 222195902Sdelphij#endif 223195902Sdelphij#if HAVE_PCRE 224195902Sdelphij return (pattern == NULL); 225195902Sdelphij#endif 226195902Sdelphij#if HAVE_RE_COMP 227195902Sdelphij return (pattern == 0); 228195902Sdelphij#endif 229195902Sdelphij#if HAVE_REGCMP 230195902Sdelphij return (pattern == NULL); 231195902Sdelphij#endif 232195902Sdelphij#if HAVE_V8_REGCOMP 233195902Sdelphij return (pattern == NULL); 234195902Sdelphij#endif 235294286Sdelphij#if NO_REGEX 236294286Sdelphij return (pattern == NULL); 237294286Sdelphij#endif 238195902Sdelphij} 239195902Sdelphij 240195902Sdelphij/* 241195902Sdelphij * Simple pattern matching function. 242195902Sdelphij * It supports no metacharacters like *, etc. 243195902Sdelphij */ 244195902Sdelphij static int 245195902Sdelphijmatch(pattern, pattern_len, buf, buf_len, pfound, pend) 246195902Sdelphij char *pattern; 247195902Sdelphij int pattern_len; 248195902Sdelphij char *buf; 249195902Sdelphij int buf_len; 250195902Sdelphij char **pfound, **pend; 251195902Sdelphij{ 252330571Sdelphij char *pp, *lp; 253330571Sdelphij char *pattern_end = pattern + pattern_len; 254330571Sdelphij char *buf_end = buf + buf_len; 255195902Sdelphij 256195902Sdelphij for ( ; buf < buf_end; buf++) 257195902Sdelphij { 258294286Sdelphij for (pp = pattern, lp = buf; ; pp++, lp++) 259294286Sdelphij { 260294286Sdelphij char cp = *pp; 261294286Sdelphij char cl = *lp; 262294286Sdelphij if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp)) 263294286Sdelphij cp = ASCII_TO_LOWER(cp); 264294286Sdelphij if (cp != cl) 265294286Sdelphij break; 266195902Sdelphij if (pp == pattern_end || lp == buf_end) 267195902Sdelphij break; 268294286Sdelphij } 269195902Sdelphij if (pp == pattern_end) 270195902Sdelphij { 271195902Sdelphij if (pfound != NULL) 272195902Sdelphij *pfound = buf; 273195902Sdelphij if (pend != NULL) 274195902Sdelphij *pend = lp; 275195902Sdelphij return (1); 276195902Sdelphij } 277195902Sdelphij } 278195902Sdelphij return (0); 279195902Sdelphij} 280195902Sdelphij 281195902Sdelphij/* 282195902Sdelphij * Perform a pattern match with the previously compiled pattern. 283195902Sdelphij * Set sp and ep to the start and end of the matched string. 284195902Sdelphij */ 285195902Sdelphij public int 286195902Sdelphijmatch_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type) 287330571Sdelphij PATTERN_TYPE pattern; 288195902Sdelphij char *tpattern; 289195902Sdelphij char *line; 290195902Sdelphij int line_len; 291195902Sdelphij char **sp; 292195902Sdelphij char **ep; 293195902Sdelphij int notbol; 294195902Sdelphij int search_type; 295195902Sdelphij{ 296195902Sdelphij int matched; 297195902Sdelphij 298294286Sdelphij *sp = *ep = NULL; 299237613Sdelphij#if NO_REGEX 300237613Sdelphij search_type |= SRCH_NO_REGEX; 301237613Sdelphij#endif 302195902Sdelphij if (search_type & SRCH_NO_REGEX) 303195902Sdelphij matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep); 304221715Sdelphij else 305221715Sdelphij { 306237613Sdelphij#if HAVE_GNU_REGEX 307237613Sdelphij { 308237613Sdelphij struct re_registers search_regs; 309330571Sdelphij pattern->not_bol = notbol; 310330571Sdelphij pattern->regs_allocated = REGS_UNALLOCATED; 311330571Sdelphij matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0; 312237613Sdelphij if (matched) 313237613Sdelphij { 314237613Sdelphij *sp = line + search_regs.start[0]; 315237613Sdelphij *ep = line + search_regs.end[0]; 316237613Sdelphij } 317237613Sdelphij } 318237613Sdelphij#endif 319195902Sdelphij#if HAVE_POSIX_REGCOMP 320195902Sdelphij { 321195902Sdelphij regmatch_t rm; 322195902Sdelphij int flags = (notbol) ? REG_NOTBOL : 0; 323294286Sdelphij#ifdef REG_STARTEND 324294286Sdelphij flags |= REG_STARTEND; 325294286Sdelphij rm.rm_so = 0; 326294286Sdelphij rm.rm_eo = line_len; 327294286Sdelphij#endif 328330571Sdelphij matched = !regexec(pattern, line, 1, &rm, flags); 329195902Sdelphij if (matched) 330195902Sdelphij { 331195902Sdelphij#ifndef __WATCOMC__ 332195902Sdelphij *sp = line + rm.rm_so; 333195902Sdelphij *ep = line + rm.rm_eo; 334195902Sdelphij#else 335195902Sdelphij *sp = rm.rm_sp; 336195902Sdelphij *ep = rm.rm_ep; 337195902Sdelphij#endif 338195902Sdelphij } 339195902Sdelphij } 340195902Sdelphij#endif 341195902Sdelphij#if HAVE_PCRE 342195902Sdelphij { 343195902Sdelphij int flags = (notbol) ? PCRE_NOTBOL : 0; 344195902Sdelphij int ovector[3]; 345330571Sdelphij matched = pcre_exec(pattern, NULL, line, line_len, 346195902Sdelphij 0, flags, ovector, 3) >= 0; 347195902Sdelphij if (matched) 348195902Sdelphij { 349195902Sdelphij *sp = line + ovector[0]; 350195902Sdelphij *ep = line + ovector[1]; 351195902Sdelphij } 352195902Sdelphij } 353195902Sdelphij#endif 354195902Sdelphij#if HAVE_RE_COMP 355195902Sdelphij matched = (re_exec(line) == 1); 356195902Sdelphij /* 357195902Sdelphij * re_exec doesn't seem to provide a way to get the matched string. 358195902Sdelphij */ 359195902Sdelphij *sp = *ep = NULL; 360195902Sdelphij#endif 361195902Sdelphij#if HAVE_REGCMP 362330571Sdelphij *ep = regex(pattern, line); 363195902Sdelphij matched = (*ep != NULL); 364195902Sdelphij if (matched) 365195902Sdelphij *sp = __loc1; 366195902Sdelphij#endif 367195902Sdelphij#if HAVE_V8_REGCOMP 368195902Sdelphij#if HAVE_REGEXEC2 369330571Sdelphij matched = regexec2(pattern, line, notbol); 370195902Sdelphij#else 371330571Sdelphij matched = regexec(pattern, line); 372195902Sdelphij#endif 373195902Sdelphij if (matched) 374195902Sdelphij { 375330571Sdelphij *sp = pattern->startp[0]; 376330571Sdelphij *ep = pattern->endp[0]; 377195902Sdelphij } 378195902Sdelphij#endif 379221715Sdelphij } 380195902Sdelphij matched = (!(search_type & SRCH_NO_MATCH) && matched) || 381195902Sdelphij ((search_type & SRCH_NO_MATCH) && !matched); 382195902Sdelphij return (matched); 383195902Sdelphij} 384195902Sdelphij 385