pattern.c revision 330571
1/* 2 * Copyright (C) 1984-2017 Mark Nudelman 3 * 4 * You may distribute under the terms of either the GNU General Public 5 * License or the Less License, as specified in the README file. 6 * 7 * For more information, see the README file. 8 */ 9 10/* 11 * Routines to do pattern matching. 12 */ 13 14#include "less.h" 15 16extern int caseless; 17 18/* 19 * Compile a search pattern, for future use by match_pattern. 20 */ 21 static int 22compile_pattern2(pattern, search_type, comp_pattern, show_error) 23 char *pattern; 24 int search_type; 25 PATTERN_TYPE *comp_pattern; 26 int show_error; 27{ 28 if (search_type & SRCH_NO_REGEX) 29 return (0); 30 { 31#if HAVE_GNU_REGEX 32 struct re_pattern_buffer *comp = (struct re_pattern_buffer *) 33 ecalloc(1, sizeof(struct re_pattern_buffer)); 34 re_set_syntax(RE_SYNTAX_POSIX_EXTENDED); 35 if (re_compile_pattern(pattern, strlen(pattern), comp)) 36 { 37 free(comp); 38 if (show_error) 39 error("Invalid pattern", NULL_PARG); 40 return (-1); 41 } 42 if (*comp_pattern != NULL) 43 { 44 regfree(*comp_pattern); 45 free(*comp_pattern); 46 } 47 *comp_pattern = comp; 48#endif 49#if HAVE_POSIX_REGCOMP 50 regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t)); 51 if (regcomp(comp, pattern, REGCOMP_FLAG)) 52 { 53 free(comp); 54 if (show_error) 55 error("Invalid pattern", NULL_PARG); 56 return (-1); 57 } 58 if (*comp_pattern != NULL) 59 { 60 regfree(*comp_pattern); 61 free(*comp_pattern); 62 } 63 *comp_pattern = comp; 64#endif 65#if HAVE_PCRE 66 pcre *comp; 67 constant char *errstring; 68 int erroffset; 69 PARG parg; 70 comp = pcre_compile(pattern, 0, 71 &errstring, &erroffset, NULL); 72 if (comp == NULL) 73 { 74 parg.p_string = (char *) errstring; 75 if (show_error) 76 error("%s", &parg); 77 return (-1); 78 } 79 *comp_pattern = comp; 80#endif 81#if HAVE_RE_COMP 82 PARG parg; 83 if ((parg.p_string = re_comp(pattern)) != NULL) 84 { 85 if (show_error) 86 error("%s", &parg); 87 return (-1); 88 } 89 *comp_pattern = 1; 90#endif 91#if HAVE_REGCMP 92 char *comp; 93 if ((comp = regcmp(pattern, 0)) == NULL) 94 { 95 if (show_error) 96 error("Invalid pattern", NULL_PARG); 97 return (-1); 98 } 99 if (comp_pattern != NULL) 100 free(*comp_pattern); 101 *comp_pattern = comp; 102#endif 103#if HAVE_V8_REGCOMP 104 struct regexp *comp; 105 reg_show_error = show_error; 106 comp = regcomp(pattern); 107 reg_show_error = 1; 108 if (comp == NULL) 109 { 110 /* 111 * regcomp has already printed an error message 112 * via regerror(). 113 */ 114 return (-1); 115 } 116 if (*comp_pattern != NULL) 117 free(*comp_pattern); 118 *comp_pattern = comp; 119#endif 120 } 121 return (0); 122} 123 124/* 125 * Like compile_pattern2, but convert the pattern to lowercase if necessary. 126 */ 127 public int 128compile_pattern(pattern, search_type, comp_pattern) 129 char *pattern; 130 int search_type; 131 PATTERN_TYPE *comp_pattern; 132{ 133 char *cvt_pattern; 134 int result; 135 136 if (caseless != OPT_ONPLUS) 137 cvt_pattern = pattern; 138 else 139 { 140 cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC)); 141 cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC); 142 } 143 result = compile_pattern2(cvt_pattern, search_type, comp_pattern, 1); 144 if (cvt_pattern != pattern) 145 free(cvt_pattern); 146 return (result); 147} 148 149/* 150 * Forget that we have a compiled pattern. 151 */ 152 public void 153uncompile_pattern(pattern) 154 PATTERN_TYPE *pattern; 155{ 156#if HAVE_GNU_REGEX 157 if (*pattern != NULL) 158 { 159 regfree(*pattern); 160 free(*pattern); 161 } 162 *pattern = NULL; 163#endif 164#if HAVE_POSIX_REGCOMP 165 if (*pattern != NULL) 166 { 167 regfree(*pattern); 168 free(*pattern); 169 } 170 *pattern = NULL; 171#endif 172#if HAVE_PCRE 173 if (*pattern != NULL) 174 pcre_free(*pattern); 175 *pattern = NULL; 176#endif 177#if HAVE_RE_COMP 178 *pattern = 0; 179#endif 180#if HAVE_REGCMP 181 if (*pattern != NULL) 182 free(*pattern); 183 *pattern = NULL; 184#endif 185#if HAVE_V8_REGCOMP 186 if (*pattern != NULL) 187 free(*pattern); 188 *pattern = NULL; 189#endif 190} 191 192/* 193 * Can a pattern be successfully compiled? 194 */ 195 public int 196valid_pattern(pattern) 197 char *pattern; 198{ 199 PATTERN_TYPE comp_pattern; 200 int result; 201 202 CLEAR_PATTERN(comp_pattern); 203 result = compile_pattern2(pattern, 0, &comp_pattern, 0); 204 if (result != 0) 205 return (0); 206 uncompile_pattern(&comp_pattern); 207 return (1); 208} 209 210/* 211 * Is a compiled pattern null? 212 */ 213 public int 214is_null_pattern(pattern) 215 PATTERN_TYPE pattern; 216{ 217#if HAVE_GNU_REGEX 218 return (pattern == NULL); 219#endif 220#if HAVE_POSIX_REGCOMP 221 return (pattern == NULL); 222#endif 223#if HAVE_PCRE 224 return (pattern == NULL); 225#endif 226#if HAVE_RE_COMP 227 return (pattern == 0); 228#endif 229#if HAVE_REGCMP 230 return (pattern == NULL); 231#endif 232#if HAVE_V8_REGCOMP 233 return (pattern == NULL); 234#endif 235#if NO_REGEX 236 return (pattern == NULL); 237#endif 238} 239 240/* 241 * Simple pattern matching function. 242 * It supports no metacharacters like *, etc. 243 */ 244 static int 245match(pattern, pattern_len, buf, buf_len, pfound, pend) 246 char *pattern; 247 int pattern_len; 248 char *buf; 249 int buf_len; 250 char **pfound, **pend; 251{ 252 char *pp, *lp; 253 char *pattern_end = pattern + pattern_len; 254 char *buf_end = buf + buf_len; 255 256 for ( ; buf < buf_end; buf++) 257 { 258 for (pp = pattern, lp = buf; ; pp++, lp++) 259 { 260 char cp = *pp; 261 char cl = *lp; 262 if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp)) 263 cp = ASCII_TO_LOWER(cp); 264 if (cp != cl) 265 break; 266 if (pp == pattern_end || lp == buf_end) 267 break; 268 } 269 if (pp == pattern_end) 270 { 271 if (pfound != NULL) 272 *pfound = buf; 273 if (pend != NULL) 274 *pend = lp; 275 return (1); 276 } 277 } 278 return (0); 279} 280 281/* 282 * Perform a pattern match with the previously compiled pattern. 283 * Set sp and ep to the start and end of the matched string. 284 */ 285 public int 286match_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type) 287 PATTERN_TYPE pattern; 288 char *tpattern; 289 char *line; 290 int line_len; 291 char **sp; 292 char **ep; 293 int notbol; 294 int search_type; 295{ 296 int matched; 297 298 *sp = *ep = NULL; 299#if NO_REGEX 300 search_type |= SRCH_NO_REGEX; 301#endif 302 if (search_type & SRCH_NO_REGEX) 303 matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep); 304 else 305 { 306#if HAVE_GNU_REGEX 307 { 308 struct re_registers search_regs; 309 pattern->not_bol = notbol; 310 pattern->regs_allocated = REGS_UNALLOCATED; 311 matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0; 312 if (matched) 313 { 314 *sp = line + search_regs.start[0]; 315 *ep = line + search_regs.end[0]; 316 } 317 } 318#endif 319#if HAVE_POSIX_REGCOMP 320 { 321 regmatch_t rm; 322 int flags = (notbol) ? REG_NOTBOL : 0; 323#ifdef REG_STARTEND 324 flags |= REG_STARTEND; 325 rm.rm_so = 0; 326 rm.rm_eo = line_len; 327#endif 328 matched = !regexec(pattern, line, 1, &rm, flags); 329 if (matched) 330 { 331#ifndef __WATCOMC__ 332 *sp = line + rm.rm_so; 333 *ep = line + rm.rm_eo; 334#else 335 *sp = rm.rm_sp; 336 *ep = rm.rm_ep; 337#endif 338 } 339 } 340#endif 341#if HAVE_PCRE 342 { 343 int flags = (notbol) ? PCRE_NOTBOL : 0; 344 int ovector[3]; 345 matched = pcre_exec(pattern, NULL, line, line_len, 346 0, flags, ovector, 3) >= 0; 347 if (matched) 348 { 349 *sp = line + ovector[0]; 350 *ep = line + ovector[1]; 351 } 352 } 353#endif 354#if HAVE_RE_COMP 355 matched = (re_exec(line) == 1); 356 /* 357 * re_exec doesn't seem to provide a way to get the matched string. 358 */ 359 *sp = *ep = NULL; 360#endif 361#if HAVE_REGCMP 362 *ep = regex(pattern, line); 363 matched = (*ep != NULL); 364 if (matched) 365 *sp = __loc1; 366#endif 367#if HAVE_V8_REGCOMP 368#if HAVE_REGEXEC2 369 matched = regexec2(pattern, line, notbol); 370#else 371 matched = regexec(pattern, line); 372#endif 373 if (matched) 374 { 375 *sp = pattern->startp[0]; 376 *ep = pattern->endp[0]; 377 } 378#endif 379 } 380 matched = (!(search_type & SRCH_NO_MATCH) && matched) || 381 ((search_type & SRCH_NO_MATCH) && !matched); 382 return (matched); 383} 384 385