pattern.c revision 330571
1/*
2 * Copyright (C) 1984-2017  Mark Nudelman
3 *
4 * You may distribute under the terms of either the GNU General Public
5 * License or the Less License, as specified in the README file.
6 *
7 * For more information, see the README file.
8 */
9
10/*
11 * Routines to do pattern matching.
12 */
13
14#include "less.h"
15
16extern int caseless;
17
18/*
19 * Compile a search pattern, for future use by match_pattern.
20 */
21	static int
22compile_pattern2(pattern, search_type, comp_pattern, show_error)
23	char *pattern;
24	int search_type;
25	PATTERN_TYPE *comp_pattern;
26	int show_error;
27{
28	if (search_type & SRCH_NO_REGEX)
29		return (0);
30  {
31#if HAVE_GNU_REGEX
32	struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
33		ecalloc(1, sizeof(struct re_pattern_buffer));
34	re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
35	if (re_compile_pattern(pattern, strlen(pattern), comp))
36	{
37		free(comp);
38		if (show_error)
39			error("Invalid pattern", NULL_PARG);
40		return (-1);
41	}
42	if (*comp_pattern != NULL)
43	{
44		regfree(*comp_pattern);
45		free(*comp_pattern);
46	}
47	*comp_pattern = comp;
48#endif
49#if HAVE_POSIX_REGCOMP
50	regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
51	if (regcomp(comp, pattern, REGCOMP_FLAG))
52	{
53		free(comp);
54		if (show_error)
55			error("Invalid pattern", NULL_PARG);
56		return (-1);
57	}
58	if (*comp_pattern != NULL)
59	{
60		regfree(*comp_pattern);
61		free(*comp_pattern);
62	}
63	*comp_pattern = comp;
64#endif
65#if HAVE_PCRE
66	pcre *comp;
67	constant char *errstring;
68	int erroffset;
69	PARG parg;
70	comp = pcre_compile(pattern, 0,
71			&errstring, &erroffset, NULL);
72	if (comp == NULL)
73	{
74		parg.p_string = (char *) errstring;
75		if (show_error)
76			error("%s", &parg);
77		return (-1);
78	}
79	*comp_pattern = comp;
80#endif
81#if HAVE_RE_COMP
82	PARG parg;
83	if ((parg.p_string = re_comp(pattern)) != NULL)
84	{
85		if (show_error)
86			error("%s", &parg);
87		return (-1);
88	}
89	*comp_pattern = 1;
90#endif
91#if HAVE_REGCMP
92	char *comp;
93	if ((comp = regcmp(pattern, 0)) == NULL)
94	{
95		if (show_error)
96			error("Invalid pattern", NULL_PARG);
97		return (-1);
98	}
99	if (comp_pattern != NULL)
100		free(*comp_pattern);
101	*comp_pattern = comp;
102#endif
103#if HAVE_V8_REGCOMP
104	struct regexp *comp;
105	reg_show_error = show_error;
106	comp = regcomp(pattern);
107	reg_show_error = 1;
108	if (comp == NULL)
109	{
110		/*
111		 * regcomp has already printed an error message
112		 * via regerror().
113		 */
114		return (-1);
115	}
116	if (*comp_pattern != NULL)
117		free(*comp_pattern);
118	*comp_pattern = comp;
119#endif
120  }
121	return (0);
122}
123
124/*
125 * Like compile_pattern2, but convert the pattern to lowercase if necessary.
126 */
127	public int
128compile_pattern(pattern, search_type, comp_pattern)
129	char *pattern;
130	int search_type;
131	PATTERN_TYPE *comp_pattern;
132{
133	char *cvt_pattern;
134	int result;
135
136	if (caseless != OPT_ONPLUS)
137		cvt_pattern = pattern;
138	else
139	{
140		cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
141		cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
142	}
143	result = compile_pattern2(cvt_pattern, search_type, comp_pattern, 1);
144	if (cvt_pattern != pattern)
145		free(cvt_pattern);
146	return (result);
147}
148
149/*
150 * Forget that we have a compiled pattern.
151 */
152	public void
153uncompile_pattern(pattern)
154	PATTERN_TYPE *pattern;
155{
156#if HAVE_GNU_REGEX
157	if (*pattern != NULL)
158	{
159		regfree(*pattern);
160		free(*pattern);
161	}
162	*pattern = NULL;
163#endif
164#if HAVE_POSIX_REGCOMP
165	if (*pattern != NULL)
166	{
167		regfree(*pattern);
168		free(*pattern);
169	}
170	*pattern = NULL;
171#endif
172#if HAVE_PCRE
173	if (*pattern != NULL)
174		pcre_free(*pattern);
175	*pattern = NULL;
176#endif
177#if HAVE_RE_COMP
178	*pattern = 0;
179#endif
180#if HAVE_REGCMP
181	if (*pattern != NULL)
182		free(*pattern);
183	*pattern = NULL;
184#endif
185#if HAVE_V8_REGCOMP
186	if (*pattern != NULL)
187		free(*pattern);
188	*pattern = NULL;
189#endif
190}
191
192/*
193 * Can a pattern be successfully compiled?
194 */
195	public int
196valid_pattern(pattern)
197	char *pattern;
198{
199	PATTERN_TYPE comp_pattern;
200	int result;
201
202	CLEAR_PATTERN(comp_pattern);
203	result = compile_pattern2(pattern, 0, &comp_pattern, 0);
204	if (result != 0)
205		return (0);
206	uncompile_pattern(&comp_pattern);
207	return (1);
208}
209
210/*
211 * Is a compiled pattern null?
212 */
213	public int
214is_null_pattern(pattern)
215	PATTERN_TYPE pattern;
216{
217#if HAVE_GNU_REGEX
218	return (pattern == NULL);
219#endif
220#if HAVE_POSIX_REGCOMP
221	return (pattern == NULL);
222#endif
223#if HAVE_PCRE
224	return (pattern == NULL);
225#endif
226#if HAVE_RE_COMP
227	return (pattern == 0);
228#endif
229#if HAVE_REGCMP
230	return (pattern == NULL);
231#endif
232#if HAVE_V8_REGCOMP
233	return (pattern == NULL);
234#endif
235#if NO_REGEX
236	return (pattern == NULL);
237#endif
238}
239
240/*
241 * Simple pattern matching function.
242 * It supports no metacharacters like *, etc.
243 */
244	static int
245match(pattern, pattern_len, buf, buf_len, pfound, pend)
246	char *pattern;
247	int pattern_len;
248	char *buf;
249	int buf_len;
250	char **pfound, **pend;
251{
252	char *pp, *lp;
253	char *pattern_end = pattern + pattern_len;
254	char *buf_end = buf + buf_len;
255
256	for ( ;  buf < buf_end;  buf++)
257	{
258		for (pp = pattern, lp = buf;  ;  pp++, lp++)
259		{
260			char cp = *pp;
261			char cl = *lp;
262			if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
263				cp = ASCII_TO_LOWER(cp);
264			if (cp != cl)
265				break;
266			if (pp == pattern_end || lp == buf_end)
267				break;
268		}
269		if (pp == pattern_end)
270		{
271			if (pfound != NULL)
272				*pfound = buf;
273			if (pend != NULL)
274				*pend = lp;
275			return (1);
276		}
277	}
278	return (0);
279}
280
281/*
282 * Perform a pattern match with the previously compiled pattern.
283 * Set sp and ep to the start and end of the matched string.
284 */
285	public int
286match_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type)
287	PATTERN_TYPE pattern;
288	char *tpattern;
289	char *line;
290	int line_len;
291	char **sp;
292	char **ep;
293	int notbol;
294	int search_type;
295{
296	int matched;
297
298	*sp = *ep = NULL;
299#if NO_REGEX
300	search_type |= SRCH_NO_REGEX;
301#endif
302	if (search_type & SRCH_NO_REGEX)
303		matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep);
304	else
305	{
306#if HAVE_GNU_REGEX
307	{
308		struct re_registers search_regs;
309		pattern->not_bol = notbol;
310		pattern->regs_allocated = REGS_UNALLOCATED;
311		matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
312		if (matched)
313		{
314			*sp = line + search_regs.start[0];
315			*ep = line + search_regs.end[0];
316		}
317	}
318#endif
319#if HAVE_POSIX_REGCOMP
320	{
321		regmatch_t rm;
322		int flags = (notbol) ? REG_NOTBOL : 0;
323#ifdef REG_STARTEND
324		flags |= REG_STARTEND;
325		rm.rm_so = 0;
326		rm.rm_eo = line_len;
327#endif
328		matched = !regexec(pattern, line, 1, &rm, flags);
329		if (matched)
330		{
331#ifndef __WATCOMC__
332			*sp = line + rm.rm_so;
333			*ep = line + rm.rm_eo;
334#else
335			*sp = rm.rm_sp;
336			*ep = rm.rm_ep;
337#endif
338		}
339	}
340#endif
341#if HAVE_PCRE
342	{
343		int flags = (notbol) ? PCRE_NOTBOL : 0;
344		int ovector[3];
345		matched = pcre_exec(pattern, NULL, line, line_len,
346			0, flags, ovector, 3) >= 0;
347		if (matched)
348		{
349			*sp = line + ovector[0];
350			*ep = line + ovector[1];
351		}
352	}
353#endif
354#if HAVE_RE_COMP
355	matched = (re_exec(line) == 1);
356	/*
357	 * re_exec doesn't seem to provide a way to get the matched string.
358	 */
359	*sp = *ep = NULL;
360#endif
361#if HAVE_REGCMP
362	*ep = regex(pattern, line);
363	matched = (*ep != NULL);
364	if (matched)
365		*sp = __loc1;
366#endif
367#if HAVE_V8_REGCOMP
368#if HAVE_REGEXEC2
369	matched = regexec2(pattern, line, notbol);
370#else
371	matched = regexec(pattern, line);
372#endif
373	if (matched)
374	{
375		*sp = pattern->startp[0];
376		*ep = pattern->endp[0];
377	}
378#endif
379	}
380	matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
381			((search_type & SRCH_NO_MATCH) && !matched);
382	return (matched);
383}
384
385