1238730Sdelphij/*
2330571Sdelphij * Copyright (C) 1984-2017  Mark Nudelman
3238730Sdelphij *
4238730Sdelphij * You may distribute under the terms of either the GNU General Public
5238730Sdelphij * License or the Less License, as specified in the README file.
6238730Sdelphij *
7238730Sdelphij * For more information, see the README file.
8238730Sdelphij */
9195902Sdelphij
10195902Sdelphij/*
11195902Sdelphij * Routines to do pattern matching.
12195902Sdelphij */
13195902Sdelphij
14195902Sdelphij#include "less.h"
15195902Sdelphij
16195902Sdelphijextern int caseless;
17195902Sdelphij
18195902Sdelphij/*
19195902Sdelphij * Compile a search pattern, for future use by match_pattern.
20195902Sdelphij */
21195902Sdelphij	static int
22294286Sdelphijcompile_pattern2(pattern, search_type, comp_pattern, show_error)
23195902Sdelphij	char *pattern;
24195902Sdelphij	int search_type;
25330571Sdelphij	PATTERN_TYPE *comp_pattern;
26294286Sdelphij	int show_error;
27195902Sdelphij{
28237613Sdelphij	if (search_type & SRCH_NO_REGEX)
29237613Sdelphij		return (0);
30237613Sdelphij  {
31237613Sdelphij#if HAVE_GNU_REGEX
32237613Sdelphij	struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
33237613Sdelphij		ecalloc(1, sizeof(struct re_pattern_buffer));
34237613Sdelphij	re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
35237613Sdelphij	if (re_compile_pattern(pattern, strlen(pattern), comp))
36195902Sdelphij	{
37237613Sdelphij		free(comp);
38294286Sdelphij		if (show_error)
39294286Sdelphij			error("Invalid pattern", NULL_PARG);
40237613Sdelphij		return (-1);
41237613Sdelphij	}
42330571Sdelphij	if (*comp_pattern != NULL)
43330571Sdelphij	{
44330571Sdelphij		regfree(*comp_pattern);
45330571Sdelphij		free(*comp_pattern);
46330571Sdelphij	}
47330571Sdelphij	*comp_pattern = comp;
48237613Sdelphij#endif
49195902Sdelphij#if HAVE_POSIX_REGCOMP
50237613Sdelphij	regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
51237613Sdelphij	if (regcomp(comp, pattern, REGCOMP_FLAG))
52237613Sdelphij	{
53237613Sdelphij		free(comp);
54294286Sdelphij		if (show_error)
55294286Sdelphij			error("Invalid pattern", NULL_PARG);
56237613Sdelphij		return (-1);
57237613Sdelphij	}
58330571Sdelphij	if (*comp_pattern != NULL)
59330571Sdelphij	{
60330571Sdelphij		regfree(*comp_pattern);
61330571Sdelphij		free(*comp_pattern);
62330571Sdelphij	}
63330571Sdelphij	*comp_pattern = comp;
64195902Sdelphij#endif
65195902Sdelphij#if HAVE_PCRE
66237613Sdelphij	pcre *comp;
67237613Sdelphij	constant char *errstring;
68237613Sdelphij	int erroffset;
69237613Sdelphij	PARG parg;
70237613Sdelphij	comp = pcre_compile(pattern, 0,
71237613Sdelphij			&errstring, &erroffset, NULL);
72237613Sdelphij	if (comp == NULL)
73237613Sdelphij	{
74237613Sdelphij		parg.p_string = (char *) errstring;
75294286Sdelphij		if (show_error)
76294286Sdelphij			error("%s", &parg);
77237613Sdelphij		return (-1);
78237613Sdelphij	}
79330571Sdelphij	*comp_pattern = comp;
80195902Sdelphij#endif
81195902Sdelphij#if HAVE_RE_COMP
82237613Sdelphij	PARG parg;
83237613Sdelphij	if ((parg.p_string = re_comp(pattern)) != NULL)
84237613Sdelphij	{
85294286Sdelphij		if (show_error)
86294286Sdelphij			error("%s", &parg);
87237613Sdelphij		return (-1);
88237613Sdelphij	}
89330571Sdelphij	*comp_pattern = 1;
90195902Sdelphij#endif
91195902Sdelphij#if HAVE_REGCMP
92237613Sdelphij	char *comp;
93237613Sdelphij	if ((comp = regcmp(pattern, 0)) == NULL)
94237613Sdelphij	{
95294286Sdelphij		if (show_error)
96294286Sdelphij			error("Invalid pattern", NULL_PARG);
97237613Sdelphij		return (-1);
98237613Sdelphij	}
99330571Sdelphij	if (comp_pattern != NULL)
100330571Sdelphij		free(*comp_pattern);
101330571Sdelphij	*comp_pattern = comp;
102195902Sdelphij#endif
103195902Sdelphij#if HAVE_V8_REGCOMP
104237613Sdelphij	struct regexp *comp;
105294286Sdelphij	reg_show_error = show_error;
106294286Sdelphij	comp = regcomp(pattern);
107294286Sdelphij	reg_show_error = 1;
108294286Sdelphij	if (comp == NULL)
109237613Sdelphij	{
110237613Sdelphij		/*
111237613Sdelphij		 * regcomp has already printed an error message
112237613Sdelphij		 * via regerror().
113237613Sdelphij		 */
114237613Sdelphij		return (-1);
115237613Sdelphij	}
116330571Sdelphij	if (*comp_pattern != NULL)
117330571Sdelphij		free(*comp_pattern);
118330571Sdelphij	*comp_pattern = comp;
119195902Sdelphij#endif
120237613Sdelphij  }
121195902Sdelphij	return (0);
122195902Sdelphij}
123195902Sdelphij
124195902Sdelphij/*
125195902Sdelphij * Like compile_pattern2, but convert the pattern to lowercase if necessary.
126195902Sdelphij */
127195902Sdelphij	public int
128195902Sdelphijcompile_pattern(pattern, search_type, comp_pattern)
129195902Sdelphij	char *pattern;
130195902Sdelphij	int search_type;
131330571Sdelphij	PATTERN_TYPE *comp_pattern;
132195902Sdelphij{
133195902Sdelphij	char *cvt_pattern;
134195902Sdelphij	int result;
135195902Sdelphij
136195902Sdelphij	if (caseless != OPT_ONPLUS)
137195902Sdelphij		cvt_pattern = pattern;
138195902Sdelphij	else
139195902Sdelphij	{
140195902Sdelphij		cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
141195902Sdelphij		cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
142195902Sdelphij	}
143294286Sdelphij	result = compile_pattern2(cvt_pattern, search_type, comp_pattern, 1);
144195902Sdelphij	if (cvt_pattern != pattern)
145195902Sdelphij		free(cvt_pattern);
146195902Sdelphij	return (result);
147195902Sdelphij}
148195902Sdelphij
149195902Sdelphij/*
150195902Sdelphij * Forget that we have a compiled pattern.
151195902Sdelphij */
152195902Sdelphij	public void
153195902Sdelphijuncompile_pattern(pattern)
154330571Sdelphij	PATTERN_TYPE *pattern;
155195902Sdelphij{
156237613Sdelphij#if HAVE_GNU_REGEX
157330571Sdelphij	if (*pattern != NULL)
158330571Sdelphij	{
159330571Sdelphij		regfree(*pattern);
160330571Sdelphij		free(*pattern);
161330571Sdelphij	}
162330571Sdelphij	*pattern = NULL;
163237613Sdelphij#endif
164195902Sdelphij#if HAVE_POSIX_REGCOMP
165330571Sdelphij	if (*pattern != NULL)
166330571Sdelphij	{
167330571Sdelphij		regfree(*pattern);
168330571Sdelphij		free(*pattern);
169330571Sdelphij	}
170330571Sdelphij	*pattern = NULL;
171195902Sdelphij#endif
172195902Sdelphij#if HAVE_PCRE
173330571Sdelphij	if (*pattern != NULL)
174330571Sdelphij		pcre_free(*pattern);
175330571Sdelphij	*pattern = NULL;
176195902Sdelphij#endif
177195902Sdelphij#if HAVE_RE_COMP
178330571Sdelphij	*pattern = 0;
179195902Sdelphij#endif
180195902Sdelphij#if HAVE_REGCMP
181330571Sdelphij	if (*pattern != NULL)
182330571Sdelphij		free(*pattern);
183330571Sdelphij	*pattern = NULL;
184195902Sdelphij#endif
185195902Sdelphij#if HAVE_V8_REGCOMP
186330571Sdelphij	if (*pattern != NULL)
187330571Sdelphij		free(*pattern);
188330571Sdelphij	*pattern = NULL;
189195902Sdelphij#endif
190195902Sdelphij}
191195902Sdelphij
192195902Sdelphij/*
193294286Sdelphij * Can a pattern be successfully compiled?
194294286Sdelphij */
195294286Sdelphij	public int
196294286Sdelphijvalid_pattern(pattern)
197294286Sdelphij	char *pattern;
198294286Sdelphij{
199330571Sdelphij	PATTERN_TYPE comp_pattern;
200294286Sdelphij	int result;
201294286Sdelphij
202294286Sdelphij	CLEAR_PATTERN(comp_pattern);
203294286Sdelphij	result = compile_pattern2(pattern, 0, &comp_pattern, 0);
204294286Sdelphij	if (result != 0)
205294286Sdelphij		return (0);
206294286Sdelphij	uncompile_pattern(&comp_pattern);
207294286Sdelphij	return (1);
208294286Sdelphij}
209294286Sdelphij
210294286Sdelphij/*
211195902Sdelphij * Is a compiled pattern null?
212195902Sdelphij */
213195902Sdelphij	public int
214195902Sdelphijis_null_pattern(pattern)
215330571Sdelphij	PATTERN_TYPE pattern;
216195902Sdelphij{
217237613Sdelphij#if HAVE_GNU_REGEX
218237613Sdelphij	return (pattern == NULL);
219237613Sdelphij#endif
220195902Sdelphij#if HAVE_POSIX_REGCOMP
221195902Sdelphij	return (pattern == NULL);
222195902Sdelphij#endif
223195902Sdelphij#if HAVE_PCRE
224195902Sdelphij	return (pattern == NULL);
225195902Sdelphij#endif
226195902Sdelphij#if HAVE_RE_COMP
227195902Sdelphij	return (pattern == 0);
228195902Sdelphij#endif
229195902Sdelphij#if HAVE_REGCMP
230195902Sdelphij	return (pattern == NULL);
231195902Sdelphij#endif
232195902Sdelphij#if HAVE_V8_REGCOMP
233195902Sdelphij	return (pattern == NULL);
234195902Sdelphij#endif
235294286Sdelphij#if NO_REGEX
236294286Sdelphij	return (pattern == NULL);
237294286Sdelphij#endif
238195902Sdelphij}
239195902Sdelphij
240195902Sdelphij/*
241195902Sdelphij * Simple pattern matching function.
242195902Sdelphij * It supports no metacharacters like *, etc.
243195902Sdelphij */
244195902Sdelphij	static int
245195902Sdelphijmatch(pattern, pattern_len, buf, buf_len, pfound, pend)
246195902Sdelphij	char *pattern;
247195902Sdelphij	int pattern_len;
248195902Sdelphij	char *buf;
249195902Sdelphij	int buf_len;
250195902Sdelphij	char **pfound, **pend;
251195902Sdelphij{
252330571Sdelphij	char *pp, *lp;
253330571Sdelphij	char *pattern_end = pattern + pattern_len;
254330571Sdelphij	char *buf_end = buf + buf_len;
255195902Sdelphij
256195902Sdelphij	for ( ;  buf < buf_end;  buf++)
257195902Sdelphij	{
258294286Sdelphij		for (pp = pattern, lp = buf;  ;  pp++, lp++)
259294286Sdelphij		{
260294286Sdelphij			char cp = *pp;
261294286Sdelphij			char cl = *lp;
262294286Sdelphij			if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
263294286Sdelphij				cp = ASCII_TO_LOWER(cp);
264294286Sdelphij			if (cp != cl)
265294286Sdelphij				break;
266195902Sdelphij			if (pp == pattern_end || lp == buf_end)
267195902Sdelphij				break;
268294286Sdelphij		}
269195902Sdelphij		if (pp == pattern_end)
270195902Sdelphij		{
271195902Sdelphij			if (pfound != NULL)
272195902Sdelphij				*pfound = buf;
273195902Sdelphij			if (pend != NULL)
274195902Sdelphij				*pend = lp;
275195902Sdelphij			return (1);
276195902Sdelphij		}
277195902Sdelphij	}
278195902Sdelphij	return (0);
279195902Sdelphij}
280195902Sdelphij
281195902Sdelphij/*
282195902Sdelphij * Perform a pattern match with the previously compiled pattern.
283195902Sdelphij * Set sp and ep to the start and end of the matched string.
284195902Sdelphij */
285195902Sdelphij	public int
286195902Sdelphijmatch_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type)
287330571Sdelphij	PATTERN_TYPE pattern;
288195902Sdelphij	char *tpattern;
289195902Sdelphij	char *line;
290195902Sdelphij	int line_len;
291195902Sdelphij	char **sp;
292195902Sdelphij	char **ep;
293195902Sdelphij	int notbol;
294195902Sdelphij	int search_type;
295195902Sdelphij{
296195902Sdelphij	int matched;
297195902Sdelphij
298294286Sdelphij	*sp = *ep = NULL;
299237613Sdelphij#if NO_REGEX
300237613Sdelphij	search_type |= SRCH_NO_REGEX;
301237613Sdelphij#endif
302195902Sdelphij	if (search_type & SRCH_NO_REGEX)
303195902Sdelphij		matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep);
304221715Sdelphij	else
305221715Sdelphij	{
306237613Sdelphij#if HAVE_GNU_REGEX
307237613Sdelphij	{
308237613Sdelphij		struct re_registers search_regs;
309330571Sdelphij		pattern->not_bol = notbol;
310330571Sdelphij		pattern->regs_allocated = REGS_UNALLOCATED;
311330571Sdelphij		matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
312237613Sdelphij		if (matched)
313237613Sdelphij		{
314237613Sdelphij			*sp = line + search_regs.start[0];
315237613Sdelphij			*ep = line + search_regs.end[0];
316237613Sdelphij		}
317237613Sdelphij	}
318237613Sdelphij#endif
319195902Sdelphij#if HAVE_POSIX_REGCOMP
320195902Sdelphij	{
321195902Sdelphij		regmatch_t rm;
322195902Sdelphij		int flags = (notbol) ? REG_NOTBOL : 0;
323294286Sdelphij#ifdef REG_STARTEND
324294286Sdelphij		flags |= REG_STARTEND;
325294286Sdelphij		rm.rm_so = 0;
326294286Sdelphij		rm.rm_eo = line_len;
327294286Sdelphij#endif
328330571Sdelphij		matched = !regexec(pattern, line, 1, &rm, flags);
329195902Sdelphij		if (matched)
330195902Sdelphij		{
331195902Sdelphij#ifndef __WATCOMC__
332195902Sdelphij			*sp = line + rm.rm_so;
333195902Sdelphij			*ep = line + rm.rm_eo;
334195902Sdelphij#else
335195902Sdelphij			*sp = rm.rm_sp;
336195902Sdelphij			*ep = rm.rm_ep;
337195902Sdelphij#endif
338195902Sdelphij		}
339195902Sdelphij	}
340195902Sdelphij#endif
341195902Sdelphij#if HAVE_PCRE
342195902Sdelphij	{
343195902Sdelphij		int flags = (notbol) ? PCRE_NOTBOL : 0;
344195902Sdelphij		int ovector[3];
345330571Sdelphij		matched = pcre_exec(pattern, NULL, line, line_len,
346195902Sdelphij			0, flags, ovector, 3) >= 0;
347195902Sdelphij		if (matched)
348195902Sdelphij		{
349195902Sdelphij			*sp = line + ovector[0];
350195902Sdelphij			*ep = line + ovector[1];
351195902Sdelphij		}
352195902Sdelphij	}
353195902Sdelphij#endif
354195902Sdelphij#if HAVE_RE_COMP
355195902Sdelphij	matched = (re_exec(line) == 1);
356195902Sdelphij	/*
357195902Sdelphij	 * re_exec doesn't seem to provide a way to get the matched string.
358195902Sdelphij	 */
359195902Sdelphij	*sp = *ep = NULL;
360195902Sdelphij#endif
361195902Sdelphij#if HAVE_REGCMP
362330571Sdelphij	*ep = regex(pattern, line);
363195902Sdelphij	matched = (*ep != NULL);
364195902Sdelphij	if (matched)
365195902Sdelphij		*sp = __loc1;
366195902Sdelphij#endif
367195902Sdelphij#if HAVE_V8_REGCOMP
368195902Sdelphij#if HAVE_REGEXEC2
369330571Sdelphij	matched = regexec2(pattern, line, notbol);
370195902Sdelphij#else
371330571Sdelphij	matched = regexec(pattern, line);
372195902Sdelphij#endif
373195902Sdelphij	if (matched)
374195902Sdelphij	{
375330571Sdelphij		*sp = pattern->startp[0];
376330571Sdelphij		*ep = pattern->endp[0];
377195902Sdelphij	}
378195902Sdelphij#endif
379221715Sdelphij	}
380195902Sdelphij	matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
381195902Sdelphij			((search_type & SRCH_NO_MATCH) && !matched);
382195902Sdelphij	return (matched);
383195902Sdelphij}
384195902Sdelphij
385