1/*
2 * Copyright (C) 1984-2023  Mark Nudelman
3 *
4 * You may distribute under the terms of either the GNU General Public
5 * License or the Less License, as specified in the README file.
6 *
7 * For more information, see the README file.
8 */
9
10/*
11 * Routines to do pattern matching.
12 */
13
14#include "less.h"
15
16extern int caseless;
17extern int is_caseless;
18extern int utf_mode;
19
20/*
21 * Compile a search pattern, for future use by match_pattern.
22 */
23static int compile_pattern2(char *pattern, int search_type, PATTERN_TYPE *comp_pattern, int show_error)
24{
25	if (search_type & SRCH_NO_REGEX)
26		return (0);
27  {
28#if HAVE_GNU_REGEX
29	struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
30		ecalloc(1, sizeof(struct re_pattern_buffer));
31	re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
32	if (re_compile_pattern(pattern, strlen(pattern), comp))
33	{
34		free(comp);
35		if (show_error)
36			error("Invalid pattern", NULL_PARG);
37		return (-1);
38	}
39	if (*comp_pattern != NULL)
40	{
41		regfree(*comp_pattern);
42		free(*comp_pattern);
43	}
44	*comp_pattern = comp;
45#endif
46#if HAVE_POSIX_REGCOMP
47	regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
48	if (regcomp(comp, pattern, REGCOMP_FLAG | (is_caseless ? REG_ICASE : 0)))
49	{
50		free(comp);
51		if (show_error)
52			error("Invalid pattern", NULL_PARG);
53		return (-1);
54	}
55	if (*comp_pattern != NULL)
56	{
57		regfree(*comp_pattern);
58		free(*comp_pattern);
59	}
60	*comp_pattern = comp;
61#endif
62#if HAVE_PCRE
63	constant char *errstring;
64	int erroffset;
65	PARG parg;
66	pcre *comp = pcre_compile(pattern,
67			((utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0) |
68			(is_caseless ? PCRE_CASELESS : 0),
69			&errstring, &erroffset, NULL);
70	if (comp == NULL)
71	{
72		parg.p_string = (char *) errstring;
73		if (show_error)
74			error("%s", &parg);
75		return (-1);
76	}
77	*comp_pattern = comp;
78#endif
79#if HAVE_PCRE2
80	int errcode;
81	PCRE2_SIZE erroffset;
82	PARG parg;
83	pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern),
84			(is_caseless ? PCRE2_CASELESS : 0),
85			&errcode, &erroffset, NULL);
86	if (comp == NULL)
87	{
88		if (show_error)
89		{
90			char msg[160];
91			pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg));
92			parg.p_string = msg;
93			error("%s", &parg);
94		}
95		return (-1);
96	}
97	*comp_pattern = comp;
98#endif
99#if HAVE_RE_COMP
100	PARG parg;
101	if ((parg.p_string = re_comp(pattern)) != NULL)
102	{
103		if (show_error)
104			error("%s", &parg);
105		return (-1);
106	}
107	*comp_pattern = 1;
108#endif
109#if HAVE_REGCMP
110	char *comp;
111	if ((comp = regcmp(pattern, 0)) == NULL)
112	{
113		if (show_error)
114			error("Invalid pattern", NULL_PARG);
115		return (-1);
116	}
117	if (comp_pattern != NULL)
118		free(*comp_pattern);
119	*comp_pattern = comp;
120#endif
121#if HAVE_V8_REGCOMP
122	struct regexp *comp;
123	reg_show_error = show_error;
124	comp = regcomp(pattern);
125	reg_show_error = 1;
126	if (comp == NULL)
127	{
128		/*
129		 * regcomp has already printed an error message
130		 * via regerror().
131		 */
132		return (-1);
133	}
134	if (*comp_pattern != NULL)
135		free(*comp_pattern);
136	*comp_pattern = comp;
137#endif
138  }
139	return (0);
140}
141
142/*
143 * Like compile_pattern2, but convert the pattern to lowercase if necessary.
144 */
145public int compile_pattern(char *pattern, int search_type, int show_error, PATTERN_TYPE *comp_pattern)
146{
147	char *cvt_pattern;
148	int result;
149
150	if (caseless != OPT_ONPLUS || (re_handles_caseless && !(search_type & SRCH_NO_REGEX)))
151		cvt_pattern = pattern;
152	else
153	{
154		cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
155		cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
156	}
157	result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error);
158	if (cvt_pattern != pattern)
159		free(cvt_pattern);
160	return (result);
161}
162
163/*
164 * Forget that we have a compiled pattern.
165 */
166public void uncompile_pattern(PATTERN_TYPE *pattern)
167{
168#if HAVE_GNU_REGEX
169	if (*pattern != NULL)
170	{
171		regfree(*pattern);
172		free(*pattern);
173	}
174	*pattern = NULL;
175#endif
176#if HAVE_POSIX_REGCOMP
177	if (*pattern != NULL)
178	{
179		regfree(*pattern);
180		free(*pattern);
181	}
182	*pattern = NULL;
183#endif
184#if HAVE_PCRE
185	if (*pattern != NULL)
186		pcre_free(*pattern);
187	*pattern = NULL;
188#endif
189#if HAVE_PCRE2
190	if (*pattern != NULL)
191		pcre2_code_free(*pattern);
192	*pattern = NULL;
193#endif
194#if HAVE_RE_COMP
195	*pattern = 0;
196#endif
197#if HAVE_REGCMP
198	if (*pattern != NULL)
199		free(*pattern);
200	*pattern = NULL;
201#endif
202#if HAVE_V8_REGCOMP
203	if (*pattern != NULL)
204		free(*pattern);
205	*pattern = NULL;
206#endif
207}
208
209#if 0
210/*
211 * Can a pattern be successfully compiled?
212 */
213public int valid_pattern(char *pattern)
214{
215	PATTERN_TYPE comp_pattern;
216	int result;
217
218	SET_NULL_PATTERN(comp_pattern);
219	result = compile_pattern2(pattern, 0, &comp_pattern, 0);
220	if (result != 0)
221		return (0);
222	uncompile_pattern(&comp_pattern);
223	return (1);
224}
225#endif
226
227/*
228 * Is a compiled pattern null?
229 */
230public int is_null_pattern(PATTERN_TYPE pattern)
231{
232#if HAVE_GNU_REGEX
233	return (pattern == NULL);
234#endif
235#if HAVE_POSIX_REGCOMP
236	return (pattern == NULL);
237#endif
238#if HAVE_PCRE
239	return (pattern == NULL);
240#endif
241#if HAVE_PCRE2
242	return (pattern == NULL);
243#endif
244#if HAVE_RE_COMP
245	return (pattern == 0);
246#endif
247#if HAVE_REGCMP
248	return (pattern == NULL);
249#endif
250#if HAVE_V8_REGCOMP
251	return (pattern == NULL);
252#endif
253#if NO_REGEX
254	return (pattern == NULL);
255#endif
256}
257/*
258 * Simple pattern matching function.
259 * It supports no metacharacters like *, etc.
260 */
261static int match(char *pattern, int pattern_len, char *buf, int buf_len, char ***sp, char ***ep, int nsubs)
262{
263	char *pp, *lp;
264	char *pattern_end = pattern + pattern_len;
265	char *buf_end = buf + buf_len;
266
267	for ( ;  buf < buf_end;  buf++)
268	{
269		for (pp = pattern, lp = buf;  ;  pp++, lp++)
270		{
271			char cp = *pp;
272			char cl = *lp;
273			if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
274				cp = ASCII_TO_LOWER(cp);
275			if (cp != cl)
276				break;
277			if (pp == pattern_end || lp == buf_end)
278				break;
279		}
280		if (pp == pattern_end)
281		{
282			*(*sp)++ = buf;
283			*(*ep)++ = lp;
284			return (1);
285		}
286	}
287	**sp = **ep = NULL;
288	return (0);
289}
290
291/*
292 * Perform a pattern match with the previously compiled pattern.
293 * Set sp[0] and ep[0] to the start and end of the matched string.
294 * Set sp[i] and ep[i] to the start and end of the i-th matched subpattern.
295 * Subpatterns are defined by parentheses in the regex language.
296 */
297static int match_pattern1(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
298{
299	int matched;
300
301#if NO_REGEX
302	search_type |= SRCH_NO_REGEX;
303#endif
304	if (search_type & SRCH_NO_REGEX)
305		matched = match(tpattern, strlen(tpattern), line, line_len, &sp, &ep, nsp);
306	else
307	{
308#if HAVE_GNU_REGEX
309	{
310		struct re_registers search_regs;
311		pattern->not_bol = notbol;
312		pattern->regs_allocated = REGS_UNALLOCATED;
313		matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
314		if (matched)
315		{
316			*sp++ = line + search_regs.start[0];
317			*ep++ = line + search_regs.end[0];
318		}
319	}
320#endif
321#if HAVE_POSIX_REGCOMP
322	{
323		#define RM_COUNT (NUM_SEARCH_COLORS+2)
324		regmatch_t rm[RM_COUNT];
325		int flags = (notbol) ? REG_NOTBOL : 0;
326#ifdef REG_STARTEND
327		flags |= REG_STARTEND;
328		rm[0].rm_so = 0;
329		rm[0].rm_eo = line_len;
330#endif
331		matched = !regexec(pattern, line, RM_COUNT, rm, flags);
332		if (matched)
333		{
334			int i;
335			int ecount;
336			for (ecount = RM_COUNT;  ecount > 0;  ecount--)
337				if (rm[ecount-1].rm_so >= 0)
338					break;
339			if (ecount >= nsp)
340				ecount = nsp-1;
341			for (i = 0;  i < ecount;  i++)
342			{
343				if (rm[i].rm_so < 0)
344				{
345					*sp++ = *ep++ = line;
346				} else
347				{
348#ifndef __WATCOMC__
349					*sp++ = line + rm[i].rm_so;
350					*ep++ = line + rm[i].rm_eo;
351#else
352					*sp++ = rm[i].rm_sp;
353					*ep++ = rm[i].rm_ep;
354#endif
355				}
356			}
357		}
358	}
359#endif
360#if HAVE_PCRE
361	{
362		#define OVECTOR_COUNT ((3*NUM_SEARCH_COLORS)+3)
363		int ovector[OVECTOR_COUNT];
364		int flags = (notbol) ? PCRE_NOTBOL : 0;
365		int i;
366		int ecount;
367		int mcount = pcre_exec(pattern, NULL, line, line_len,
368			0, flags, ovector, OVECTOR_COUNT);
369		matched = (mcount > 0);
370		ecount = nsp-1;
371		if (ecount > mcount) ecount = mcount;
372		for (i = 0;  i < ecount*2; )
373		{
374			if (ovector[i] < 0 || ovector[i+1] < 0)
375			{
376				*sp++ = *ep++ = line;
377				i += 2;
378			} else
379			{
380				*sp++ = line + ovector[i++];
381				*ep++ = line + ovector[i++];
382			}
383		}
384	}
385#endif
386#if HAVE_PCRE2
387	{
388		int flags = (notbol) ? PCRE2_NOTBOL : 0;
389		pcre2_match_data *md = pcre2_match_data_create(nsp-1, NULL);
390		int mcount = pcre2_match(pattern, (PCRE2_SPTR)line, line_len,
391			0, flags, md, NULL);
392		matched = (mcount > 0);
393		if (matched)
394		{
395			PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
396			int i;
397			int ecount = nsp-1;
398			if (ecount > mcount) ecount = mcount;
399			for (i = 0;  i < ecount*2; )
400			{
401				if (ovector[i] < 0 || ovector[i+1] < 0)
402				{
403					*sp++ = *ep++ = line;
404					i += 2;
405				} else
406				{
407					*sp++ = line + ovector[i++];
408					*ep++ = line + ovector[i++];
409				}
410			}
411		}
412		pcre2_match_data_free(md);
413	}
414#endif
415#if HAVE_RE_COMP
416	matched = (re_exec(line) == 1);
417	/*
418	 * re_exec doesn't seem to provide a way to get the matched string.
419	 */
420#endif
421#if HAVE_REGCMP
422	matched = ((*ep++ = regex(pattern, line)) != NULL);
423	if (matched)
424		*sp++ = __loc1;
425#endif
426#if HAVE_V8_REGCOMP
427#if HAVE_REGEXEC2
428	matched = regexec2(pattern, line, notbol);
429#else
430	matched = regexec(pattern, line);
431#endif
432	if (matched)
433	{
434		*sp++ = pattern->startp[0];
435		*ep++ = pattern->endp[0];
436	}
437#endif
438	}
439	*sp = *ep = NULL;
440	matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
441			((search_type & SRCH_NO_MATCH) && !matched);
442	return (matched);
443}
444
445public int match_pattern(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
446{
447	int matched = match_pattern1(pattern, tpattern, line, line_len, sp, ep, nsp, notbol, search_type);
448	int i;
449	for (i = 1;  i <= NUM_SEARCH_COLORS;  i++)
450	{
451		if ((search_type & SRCH_SUBSEARCH(i)) && ep[i] == sp[i])
452			matched = 0;
453	}
454	return matched;
455}
456
457/*
458 * Return the name of the pattern matching library.
459 */
460public char * pattern_lib_name(void)
461{
462#if HAVE_GNU_REGEX
463	return ("GNU");
464#else
465#if HAVE_POSIX_REGCOMP
466	return ("POSIX");
467#else
468#if HAVE_PCRE2
469	return ("PCRE2");
470#else
471#if HAVE_PCRE
472	return ("PCRE");
473#else
474#if HAVE_RE_COMP
475	return ("BSD");
476#else
477#if HAVE_REGCMP
478	return ("V8");
479#else
480#if HAVE_V8_REGCOMP
481	return ("Spencer V8");
482#else
483	return ("no");
484#endif
485#endif
486#endif
487#endif
488#endif
489#endif
490#endif
491}
492