1238730Sdelphij/*
2238730Sdelphij * Copyright (C) 1984-2012  Mark Nudelman
3238730Sdelphij *
4238730Sdelphij * You may distribute under the terms of either the GNU General Public
5238730Sdelphij * License or the Less License, as specified in the README file.
6238730Sdelphij *
7238730Sdelphij * For more information, see the README file.
8238730Sdelphij */
9195902Sdelphij
10195902Sdelphij/*
11195902Sdelphij * Routines to do pattern matching.
12195902Sdelphij */
13195902Sdelphij
14195902Sdelphij#include "less.h"
15195902Sdelphij#include "pattern.h"
16195902Sdelphij
17195902Sdelphijextern int caseless;
18195902Sdelphij
19195902Sdelphij/*
20195902Sdelphij * Compile a search pattern, for future use by match_pattern.
21195902Sdelphij */
22195902Sdelphij	static int
23195902Sdelphijcompile_pattern2(pattern, search_type, comp_pattern)
24195902Sdelphij	char *pattern;
25195902Sdelphij	int search_type;
26195902Sdelphij	void **comp_pattern;
27195902Sdelphij{
28237613Sdelphij	if (search_type & SRCH_NO_REGEX)
29237613Sdelphij		return (0);
30237613Sdelphij  {
31237613Sdelphij#if HAVE_GNU_REGEX
32237613Sdelphij	struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
33237613Sdelphij		ecalloc(1, sizeof(struct re_pattern_buffer));
34237613Sdelphij	struct re_pattern_buffer **pcomp =
35237613Sdelphij		(struct re_pattern_buffer **) comp_pattern;
36237613Sdelphij	re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
37237613Sdelphij	if (re_compile_pattern(pattern, strlen(pattern), comp))
38195902Sdelphij	{
39237613Sdelphij		free(comp);
40237613Sdelphij		error("Invalid pattern", NULL_PARG);
41237613Sdelphij		return (-1);
42237613Sdelphij	}
43237613Sdelphij	if (*pcomp != NULL)
44237613Sdelphij		regfree(*pcomp);
45237613Sdelphij	*pcomp = comp;
46237613Sdelphij#endif
47195902Sdelphij#if HAVE_POSIX_REGCOMP
48237613Sdelphij	regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
49237613Sdelphij	regex_t **pcomp = (regex_t **) comp_pattern;
50237613Sdelphij	if (regcomp(comp, pattern, REGCOMP_FLAG))
51237613Sdelphij	{
52237613Sdelphij		free(comp);
53237613Sdelphij		error("Invalid pattern", NULL_PARG);
54237613Sdelphij		return (-1);
55237613Sdelphij	}
56237613Sdelphij	if (*pcomp != NULL)
57237613Sdelphij		regfree(*pcomp);
58237613Sdelphij	*pcomp = comp;
59195902Sdelphij#endif
60195902Sdelphij#if HAVE_PCRE
61237613Sdelphij	pcre *comp;
62237613Sdelphij	pcre **pcomp = (pcre **) comp_pattern;
63237613Sdelphij	constant char *errstring;
64237613Sdelphij	int erroffset;
65237613Sdelphij	PARG parg;
66237613Sdelphij	comp = pcre_compile(pattern, 0,
67237613Sdelphij			&errstring, &erroffset, NULL);
68237613Sdelphij	if (comp == NULL)
69237613Sdelphij	{
70237613Sdelphij		parg.p_string = (char *) errstring;
71237613Sdelphij		error("%s", &parg);
72237613Sdelphij		return (-1);
73237613Sdelphij	}
74237613Sdelphij	*pcomp = comp;
75195902Sdelphij#endif
76195902Sdelphij#if HAVE_RE_COMP
77237613Sdelphij	PARG parg;
78237613Sdelphij	int *pcomp = (int *) comp_pattern;
79237613Sdelphij	if ((parg.p_string = re_comp(pattern)) != NULL)
80237613Sdelphij	{
81237613Sdelphij		error("%s", &parg);
82237613Sdelphij		return (-1);
83237613Sdelphij	}
84237613Sdelphij	*pcomp = 1;
85195902Sdelphij#endif
86195902Sdelphij#if HAVE_REGCMP
87237613Sdelphij	char *comp;
88237613Sdelphij	char **pcomp = (char **) comp_pattern;
89237613Sdelphij	if ((comp = regcmp(pattern, 0)) == NULL)
90237613Sdelphij	{
91237613Sdelphij		error("Invalid pattern", NULL_PARG);
92237613Sdelphij		return (-1);
93237613Sdelphij	}
94237613Sdelphij	if (pcomp != NULL)
95237613Sdelphij		free(*pcomp);
96237613Sdelphij	*pcomp = comp;
97195902Sdelphij#endif
98195902Sdelphij#if HAVE_V8_REGCOMP
99237613Sdelphij	struct regexp *comp;
100237613Sdelphij	struct regexp **pcomp = (struct regexp **) comp_pattern;
101237613Sdelphij	if ((comp = regcomp(pattern)) == NULL)
102237613Sdelphij	{
103237613Sdelphij		/*
104237613Sdelphij		 * regcomp has already printed an error message
105237613Sdelphij		 * via regerror().
106237613Sdelphij		 */
107237613Sdelphij		return (-1);
108237613Sdelphij	}
109237613Sdelphij	if (*pcomp != NULL)
110237613Sdelphij		free(*pcomp);
111237613Sdelphij	*pcomp = comp;
112195902Sdelphij#endif
113237613Sdelphij  }
114195902Sdelphij	return (0);
115195902Sdelphij}
116195902Sdelphij
117195902Sdelphij/*
118195902Sdelphij * Like compile_pattern2, but convert the pattern to lowercase if necessary.
119195902Sdelphij */
120195902Sdelphij	public int
121195902Sdelphijcompile_pattern(pattern, search_type, comp_pattern)
122195902Sdelphij	char *pattern;
123195902Sdelphij	int search_type;
124195902Sdelphij	void **comp_pattern;
125195902Sdelphij{
126195902Sdelphij	char *cvt_pattern;
127195902Sdelphij	int result;
128195902Sdelphij
129195902Sdelphij	if (caseless != OPT_ONPLUS)
130195902Sdelphij		cvt_pattern = pattern;
131195902Sdelphij	else
132195902Sdelphij	{
133195902Sdelphij		cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
134195902Sdelphij		cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
135195902Sdelphij	}
136195902Sdelphij	result = compile_pattern2(cvt_pattern, search_type, comp_pattern);
137195902Sdelphij	if (cvt_pattern != pattern)
138195902Sdelphij		free(cvt_pattern);
139195902Sdelphij	return (result);
140195902Sdelphij}
141195902Sdelphij
142195902Sdelphij/*
143195902Sdelphij * Forget that we have a compiled pattern.
144195902Sdelphij */
145195902Sdelphij	public void
146195902Sdelphijuncompile_pattern(pattern)
147195902Sdelphij	void **pattern;
148195902Sdelphij{
149237613Sdelphij#if HAVE_GNU_REGEX
150237613Sdelphij	struct re_pattern_buffer **pcomp = (struct re_pattern_buffer **) pattern;
151237613Sdelphij	if (*pcomp != NULL)
152237613Sdelphij		regfree(*pcomp);
153237613Sdelphij	*pcomp = NULL;
154237613Sdelphij#endif
155195902Sdelphij#if HAVE_POSIX_REGCOMP
156195902Sdelphij	regex_t **pcomp = (regex_t **) pattern;
157195902Sdelphij	if (*pcomp != NULL)
158195902Sdelphij		regfree(*pcomp);
159195902Sdelphij	*pcomp = NULL;
160195902Sdelphij#endif
161195902Sdelphij#if HAVE_PCRE
162195902Sdelphij	pcre **pcomp = (pcre **) pattern;
163195902Sdelphij	if (*pcomp != NULL)
164195902Sdelphij		pcre_free(*pcomp);
165195902Sdelphij	*pcomp = NULL;
166195902Sdelphij#endif
167195902Sdelphij#if HAVE_RE_COMP
168195902Sdelphij	int *pcomp = (int *) pattern;
169195902Sdelphij	*pcomp = 0;
170195902Sdelphij#endif
171195902Sdelphij#if HAVE_REGCMP
172195902Sdelphij	char **pcomp = (char **) pattern;
173195902Sdelphij	if (*pcomp != NULL)
174195902Sdelphij		free(*pcomp);
175195902Sdelphij	*pcomp = NULL;
176195902Sdelphij#endif
177195902Sdelphij#if HAVE_V8_REGCOMP
178195902Sdelphij	struct regexp **pcomp = (struct regexp **) pattern;
179195902Sdelphij	if (*pcomp != NULL)
180195902Sdelphij		free(*pcomp);
181195902Sdelphij	*pcomp = NULL;
182195902Sdelphij#endif
183195902Sdelphij}
184195902Sdelphij
185195902Sdelphij/*
186195902Sdelphij * Is a compiled pattern null?
187195902Sdelphij */
188195902Sdelphij	public int
189195902Sdelphijis_null_pattern(pattern)
190195902Sdelphij	void *pattern;
191195902Sdelphij{
192237613Sdelphij#if HAVE_GNU_REGEX
193237613Sdelphij	return (pattern == NULL);
194237613Sdelphij#endif
195195902Sdelphij#if HAVE_POSIX_REGCOMP
196195902Sdelphij	return (pattern == NULL);
197195902Sdelphij#endif
198195902Sdelphij#if HAVE_PCRE
199195902Sdelphij	return (pattern == NULL);
200195902Sdelphij#endif
201195902Sdelphij#if HAVE_RE_COMP
202195902Sdelphij	return (pattern == 0);
203195902Sdelphij#endif
204195902Sdelphij#if HAVE_REGCMP
205195902Sdelphij	return (pattern == NULL);
206195902Sdelphij#endif
207195902Sdelphij#if HAVE_V8_REGCOMP
208195902Sdelphij	return (pattern == NULL);
209195902Sdelphij#endif
210195902Sdelphij}
211195902Sdelphij
212195902Sdelphij/*
213195902Sdelphij * Simple pattern matching function.
214195902Sdelphij * It supports no metacharacters like *, etc.
215195902Sdelphij */
216195902Sdelphij	static int
217195902Sdelphijmatch(pattern, pattern_len, buf, buf_len, pfound, pend)
218195902Sdelphij	char *pattern;
219195902Sdelphij	int pattern_len;
220195902Sdelphij	char *buf;
221195902Sdelphij	int buf_len;
222195902Sdelphij	char **pfound, **pend;
223195902Sdelphij{
224195902Sdelphij	register char *pp, *lp;
225195902Sdelphij	register char *pattern_end = pattern + pattern_len;
226195902Sdelphij	register char *buf_end = buf + buf_len;
227195902Sdelphij
228195902Sdelphij	for ( ;  buf < buf_end;  buf++)
229195902Sdelphij	{
230195902Sdelphij		for (pp = pattern, lp = buf;  *pp == *lp;  pp++, lp++)
231195902Sdelphij			if (pp == pattern_end || lp == buf_end)
232195902Sdelphij				break;
233195902Sdelphij		if (pp == pattern_end)
234195902Sdelphij		{
235195902Sdelphij			if (pfound != NULL)
236195902Sdelphij				*pfound = buf;
237195902Sdelphij			if (pend != NULL)
238195902Sdelphij				*pend = lp;
239195902Sdelphij			return (1);
240195902Sdelphij		}
241195902Sdelphij	}
242195902Sdelphij	return (0);
243195902Sdelphij}
244195902Sdelphij
245195902Sdelphij/*
246195902Sdelphij * Perform a pattern match with the previously compiled pattern.
247195902Sdelphij * Set sp and ep to the start and end of the matched string.
248195902Sdelphij */
249195902Sdelphij	public int
250195902Sdelphijmatch_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type)
251195902Sdelphij	void *pattern;
252195902Sdelphij	char *tpattern;
253195902Sdelphij	char *line;
254195902Sdelphij	int line_len;
255195902Sdelphij	char **sp;
256195902Sdelphij	char **ep;
257195902Sdelphij	int notbol;
258195902Sdelphij	int search_type;
259195902Sdelphij{
260195902Sdelphij	int matched;
261237613Sdelphij#if HAVE_GNU_REGEX
262237613Sdelphij	struct re_pattern_buffer *spattern = (struct re_pattern_buffer *) pattern;
263237613Sdelphij#endif
264195902Sdelphij#if HAVE_POSIX_REGCOMP
265195902Sdelphij	regex_t *spattern = (regex_t *) pattern;
266195902Sdelphij#endif
267195902Sdelphij#if HAVE_PCRE
268195902Sdelphij	pcre *spattern = (pcre *) pattern;
269195902Sdelphij#endif
270195902Sdelphij#if HAVE_RE_COMP
271195902Sdelphij	int spattern = (int) pattern;
272195902Sdelphij#endif
273195902Sdelphij#if HAVE_REGCMP
274195902Sdelphij	char *spattern = (char *) pattern;
275195902Sdelphij#endif
276195902Sdelphij#if HAVE_V8_REGCOMP
277195902Sdelphij	struct regexp *spattern = (struct regexp *) pattern;
278195902Sdelphij#endif
279195902Sdelphij
280237613Sdelphij#if NO_REGEX
281237613Sdelphij	search_type |= SRCH_NO_REGEX;
282237613Sdelphij#endif
283195902Sdelphij	if (search_type & SRCH_NO_REGEX)
284195902Sdelphij		matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep);
285221715Sdelphij	else
286221715Sdelphij	{
287237613Sdelphij#if HAVE_GNU_REGEX
288237613Sdelphij	{
289237613Sdelphij		struct re_registers search_regs;
290237613Sdelphij		regoff_t *starts = (regoff_t *) ecalloc(1, sizeof (regoff_t));
291237613Sdelphij		regoff_t *ends = (regoff_t *) ecalloc(1, sizeof (regoff_t));
292237613Sdelphij		spattern->not_bol = notbol;
293237613Sdelphij		re_set_registers(spattern, &search_regs, 1, starts, ends);
294237613Sdelphij		matched = re_search(spattern, line, line_len, 0, line_len, &search_regs) >= 0;
295237613Sdelphij		if (matched)
296237613Sdelphij		{
297237613Sdelphij			*sp = line + search_regs.start[0];
298237613Sdelphij			*ep = line + search_regs.end[0];
299237613Sdelphij		}
300237613Sdelphij		free(starts);
301237613Sdelphij		free(ends);
302237613Sdelphij	}
303237613Sdelphij#endif
304195902Sdelphij#if HAVE_POSIX_REGCOMP
305195902Sdelphij	{
306195902Sdelphij		regmatch_t rm;
307195902Sdelphij		int flags = (notbol) ? REG_NOTBOL : 0;
308195902Sdelphij		matched = !regexec(spattern, line, 1, &rm, flags);
309195902Sdelphij		if (matched)
310195902Sdelphij		{
311195902Sdelphij#ifndef __WATCOMC__
312195902Sdelphij			*sp = line + rm.rm_so;
313195902Sdelphij			*ep = line + rm.rm_eo;
314195902Sdelphij#else
315195902Sdelphij			*sp = rm.rm_sp;
316195902Sdelphij			*ep = rm.rm_ep;
317195902Sdelphij#endif
318195902Sdelphij		}
319195902Sdelphij	}
320195902Sdelphij#endif
321195902Sdelphij#if HAVE_PCRE
322195902Sdelphij	{
323195902Sdelphij		int flags = (notbol) ? PCRE_NOTBOL : 0;
324195902Sdelphij		int ovector[3];
325195902Sdelphij		matched = pcre_exec(spattern, NULL, line, line_len,
326195902Sdelphij			0, flags, ovector, 3) >= 0;
327195902Sdelphij		if (matched)
328195902Sdelphij		{
329195902Sdelphij			*sp = line + ovector[0];
330195902Sdelphij			*ep = line + ovector[1];
331195902Sdelphij		}
332195902Sdelphij	}
333195902Sdelphij#endif
334195902Sdelphij#if HAVE_RE_COMP
335195902Sdelphij	matched = (re_exec(line) == 1);
336195902Sdelphij	/*
337195902Sdelphij	 * re_exec doesn't seem to provide a way to get the matched string.
338195902Sdelphij	 */
339195902Sdelphij	*sp = *ep = NULL;
340195902Sdelphij#endif
341195902Sdelphij#if HAVE_REGCMP
342195902Sdelphij	*ep = regex(spattern, line);
343195902Sdelphij	matched = (*ep != NULL);
344195902Sdelphij	if (matched)
345195902Sdelphij		*sp = __loc1;
346195902Sdelphij#endif
347195902Sdelphij#if HAVE_V8_REGCOMP
348195902Sdelphij#if HAVE_REGEXEC2
349195902Sdelphij	matched = regexec2(spattern, line, notbol);
350195902Sdelphij#else
351195902Sdelphij	matched = regexec(spattern, line);
352195902Sdelphij#endif
353195902Sdelphij	if (matched)
354195902Sdelphij	{
355195902Sdelphij		*sp = spattern->startp[0];
356195902Sdelphij		*ep = spattern->endp[0];
357195902Sdelphij	}
358195902Sdelphij#endif
359221715Sdelphij	}
360195902Sdelphij	matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
361195902Sdelphij			((search_type & SRCH_NO_MATCH) && !matched);
362195902Sdelphij	return (matched);
363195902Sdelphij}
364195902Sdelphij
365