1/*-
2 * Copyright (c) 1992, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 * Copyright (c) 1992, 1993, 1994, 1995, 1996
5 *	Keith Bostic.  All rights reserved.
6 *
7 * See the LICENSE file for redistribution information.
8 */
9
10#include "config.h"
11
12#ifndef lint
13static const char sccsid[] = "$Id: search.c,v 10.26 2011/07/04 20:16:26 zy Exp $";
14#endif /* not lint */
15
16#include <sys/types.h>
17#include <sys/queue.h>
18#include <sys/time.h>
19
20#include <bitstring.h>
21#include <ctype.h>
22#include <errno.h>
23#include <limits.h>
24#include <stdio.h>
25#include <stdlib.h>
26#include <string.h>
27#include <unistd.h>
28
29#include "common.h"
30
31typedef enum { S_EMPTY, S_EOF, S_NOPREV, S_NOTFOUND, S_SOF, S_WRAP } smsg_t;
32
33static void	search_msg __P((SCR *, smsg_t));
34static int	search_init __P((SCR *, dir_t, CHAR_T *, size_t, CHAR_T **, u_int));
35
36/*
37 * search_init --
38 *	Set up a search.
39 */
40static int
41search_init(
42	SCR *sp,
43	dir_t dir,
44	CHAR_T *ptrn,
45	size_t plen,
46	CHAR_T **epp,
47	u_int flags)
48{
49	recno_t lno;
50	int delim;
51	CHAR_T *p, *t;
52
53	/* If the file is empty, it's a fast search. */
54	if (sp->lno <= 1) {
55		if (db_last(sp, &lno))
56			return (1);
57		if (lno == 0) {
58			if (LF_ISSET(SEARCH_MSG))
59				search_msg(sp, S_EMPTY);
60			return (1);
61		}
62	}
63
64	if (LF_ISSET(SEARCH_PARSE)) {		/* Parse the string. */
65		/*
66		 * Use the saved pattern if no pattern specified, or if only
67		 * one or two delimiter characters specified.
68		 *
69		 * !!!
70		 * Historically, only the pattern itself was saved, vi didn't
71		 * preserve addressing or delta information.
72		 */
73		if (ptrn == NULL)
74			goto prev;
75		if (plen == 1) {
76			if (epp != NULL)
77				*epp = ptrn + 1;
78			goto prev;
79		}
80		if (ptrn[0] == ptrn[1]) {
81			if (epp != NULL)
82				*epp = ptrn + 2;
83
84			/* Complain if we don't have a previous pattern. */
85prev:			if (sp->re == NULL) {
86				search_msg(sp, S_NOPREV);
87				return (1);
88			}
89			/* Re-compile the search pattern if necessary. */
90			if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
91			    sp->re, sp->re_len, NULL, NULL, &sp->re_c,
92			    RE_C_SEARCH |
93			    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT)))
94				return (1);
95
96			/* Set the search direction. */
97			if (LF_ISSET(SEARCH_SET))
98				sp->searchdir = dir;
99			return (0);
100		}
101
102		/*
103		 * Set the delimiter, and move forward to the terminating
104		 * delimiter, handling escaped delimiters.
105		 *
106		 * QUOTING NOTE:
107		 * Only discard an escape character if it escapes a delimiter.
108		 */
109		for (delim = *ptrn, p = t = ++ptrn;; *t++ = *p++) {
110			if (--plen == 0 || p[0] == delim) {
111				if (plen != 0)
112					++p;
113				break;
114			}
115			if (plen > 1 && p[0] == '\\' && p[1] == delim) {
116				++p;
117				--plen;
118			}
119		}
120		if (epp != NULL)
121			*epp = p;
122
123		plen = t - ptrn;
124	}
125
126	/* Compile the RE. */
127	if (re_compile(sp, ptrn, plen, &sp->re, &sp->re_len, &sp->re_c,
128	    RE_C_SEARCH |
129	    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT) |
130	    (LF_ISSET(SEARCH_TAG) ? RE_C_TAG : 0) |
131	    (LF_ISSET(SEARCH_CSCOPE) ? RE_C_CSCOPE : 0)))
132		return (1);
133
134	/* Set the search direction. */
135	if (LF_ISSET(SEARCH_SET))
136		sp->searchdir = dir;
137
138	return (0);
139}
140
141/*
142 * f_search --
143 *	Do a forward search.
144 *
145 * PUBLIC: int f_search __P((SCR *,
146 * PUBLIC:    MARK *, MARK *, CHAR_T *, size_t, CHAR_T **, u_int));
147 */
148int
149f_search(
150	SCR *sp,
151	MARK *fm,
152	MARK *rm,
153	CHAR_T *ptrn,
154	size_t plen,
155	CHAR_T **eptrn,
156	u_int flags)
157{
158	busy_t btype;
159	recno_t lno;
160	regmatch_t match[1];
161	size_t coff, len;
162	int cnt, eval, rval, wrapped;
163	CHAR_T *l;
164
165	if (search_init(sp, FORWARD, ptrn, plen, eptrn, flags))
166		return (1);
167
168	if (LF_ISSET(SEARCH_FILE)) {
169		lno = 1;
170		coff = 0;
171	} else {
172		if (db_get(sp, fm->lno, DBG_FATAL, &l, &len))
173			return (1);
174		lno = fm->lno;
175
176		/*
177		 * If doing incremental search, start searching at the previous
178		 * column, so that we search a minimal distance and still match
179		 * special patterns, e.g., \< for beginning of a word.
180		 *
181		 * Otherwise, start searching immediately after the cursor.  If
182		 * at the end of the line, start searching on the next line.
183		 * This is incompatible (read bug fix) with the historic vi --
184		 * searches for the '$' pattern never moved forward, and the
185		 * "-t foo" didn't work if the 'f' was the first character in
186		 * the file.
187		 */
188		if (LF_ISSET(SEARCH_INCR)) {
189			if ((coff = fm->cno) != 0)
190				--coff;
191		} else if (fm->cno + 1 >= len) {
192			coff = 0;
193			lno = fm->lno + 1;
194			if (db_get(sp, lno, 0, &l, &len)) {
195				if (!O_ISSET(sp, O_WRAPSCAN)) {
196					if (LF_ISSET(SEARCH_MSG))
197						search_msg(sp, S_EOF);
198					return (1);
199				}
200				lno = 1;
201			}
202		} else
203			coff = fm->cno + 1;
204	}
205
206	btype = BUSY_ON;
207	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; ++lno, coff = 0) {
208		if (cnt-- == 0) {
209			if (INTERRUPTED(sp))
210				break;
211			if (LF_ISSET(SEARCH_MSG)) {
212				search_busy(sp, btype);
213				btype = BUSY_UPDATE;
214			}
215			cnt = INTERRUPT_CHECK;
216		}
217		if ((wrapped && lno > fm->lno) || db_get(sp, lno, 0, &l, &len)) {
218			if (wrapped) {
219				if (LF_ISSET(SEARCH_MSG))
220					search_msg(sp, S_NOTFOUND);
221				break;
222			}
223			if (!O_ISSET(sp, O_WRAPSCAN)) {
224				if (LF_ISSET(SEARCH_MSG))
225					search_msg(sp, S_EOF);
226				break;
227			}
228			lno = 0;
229			wrapped = 1;
230			continue;
231		}
232
233		/* If already at EOL, just keep going. */
234		if (len != 0 && coff == len)
235			continue;
236
237		/* Set the termination. */
238		match[0].rm_so = coff;
239		match[0].rm_eo = len;
240
241#if defined(DEBUG) && 0
242		TRACE(sp, "F search: %lu from %u to %u\n",
243		    lno, coff, len != 0 ? len - 1 : len);
244#endif
245		/* Search the line. */
246		eval = regexec(&sp->re_c, l, 1, match,
247		    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND);
248		if (eval == REG_NOMATCH)
249			continue;
250		if (eval != 0) {
251			if (LF_ISSET(SEARCH_MSG))
252				re_error(sp, eval, &sp->re_c);
253			else
254				(void)sp->gp->scr_bell(sp);
255			break;
256		}
257
258		/* Warn if the search wrapped. */
259		if (wrapped && LF_ISSET(SEARCH_WMSG))
260			search_msg(sp, S_WRAP);
261
262#if defined(DEBUG) && 0
263		TRACE(sp, "F search: %qu to %qu\n",
264		    match[0].rm_so, match[0].rm_eo);
265#endif
266		rm->lno = lno;
267		rm->cno = match[0].rm_so;
268
269		/*
270		 * If a change command, it's possible to move beyond the end
271		 * of a line.  Historic vi generally got this wrong (e.g. try
272		 * "c?$<cr>").  Not all that sure this gets it right, there
273		 * are lots of strange cases.
274		 */
275		if (!LF_ISSET(SEARCH_EOL) && rm->cno >= len)
276			rm->cno = len != 0 ? len - 1 : 0;
277
278		rval = 0;
279		break;
280	}
281
282	if (LF_ISSET(SEARCH_MSG))
283		search_busy(sp, BUSY_OFF);
284	return (rval);
285}
286
287/*
288 * b_search --
289 *	Do a backward search.
290 *
291 * PUBLIC: int b_search __P((SCR *,
292 * PUBLIC:    MARK *, MARK *, CHAR_T *, size_t, CHAR_T **, u_int));
293 */
294int
295b_search(
296	SCR *sp,
297	MARK *fm,
298	MARK *rm,
299	CHAR_T *ptrn,
300	size_t plen,
301	CHAR_T **eptrn,
302	u_int flags)
303{
304	busy_t btype;
305	recno_t lno;
306	regmatch_t match[1];
307	size_t coff, last, len;
308	int cnt, eval, rval, wrapped;
309	CHAR_T *l;
310
311	if (search_init(sp, BACKWARD, ptrn, plen, eptrn, flags))
312		return (1);
313
314	/*
315	 * If doing incremental search, set the "starting" position past the
316	 * current column, so that we search a minimal distance and still
317	 * match special patterns, e.g., \> for the end of a word.  This is
318	 * safe when the cursor is at the end of a line because we only use
319	 * it for comparison with the location of the match.
320	 *
321	 * Otherwise, start searching immediately before the cursor.  If in
322	 * the first column, start search on the previous line.
323	 */
324	if (LF_ISSET(SEARCH_INCR)) {
325		lno = fm->lno;
326		coff = fm->cno + 1;
327	} else {
328		if (fm->cno == 0) {
329			if (fm->lno == 1 && !O_ISSET(sp, O_WRAPSCAN)) {
330				if (LF_ISSET(SEARCH_MSG))
331					search_msg(sp, S_SOF);
332				return (1);
333			}
334			lno = fm->lno - 1;
335		} else
336			lno = fm->lno;
337		coff = fm->cno;
338	}
339
340	btype = BUSY_ON;
341	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; --lno, coff = 0) {
342		if (cnt-- == 0) {
343			if (INTERRUPTED(sp))
344				break;
345			if (LF_ISSET(SEARCH_MSG)) {
346				search_busy(sp, btype);
347				btype = BUSY_UPDATE;
348			}
349			cnt = INTERRUPT_CHECK;
350		}
351		if ((wrapped && lno < fm->lno) || lno == 0) {
352			if (wrapped) {
353				if (LF_ISSET(SEARCH_MSG))
354					search_msg(sp, S_NOTFOUND);
355				break;
356			}
357			if (!O_ISSET(sp, O_WRAPSCAN)) {
358				if (LF_ISSET(SEARCH_MSG))
359					search_msg(sp, S_SOF);
360				break;
361			}
362			if (db_last(sp, &lno))
363				break;
364			if (lno == 0) {
365				if (LF_ISSET(SEARCH_MSG))
366					search_msg(sp, S_EMPTY);
367				break;
368			}
369			++lno;
370			wrapped = 1;
371			continue;
372		}
373
374		if (db_get(sp, lno, 0, &l, &len))
375			break;
376
377		/* Set the termination. */
378		match[0].rm_so = 0;
379		match[0].rm_eo = len;
380
381#if defined(DEBUG) && 0
382		TRACE(sp, "B search: %lu from 0 to %qu\n", lno, match[0].rm_eo);
383#endif
384		/* Search the line. */
385		eval = regexec(&sp->re_c, l, 1, match,
386		    (match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND);
387		if (eval == REG_NOMATCH)
388			continue;
389		if (eval != 0) {
390			if (LF_ISSET(SEARCH_MSG))
391				re_error(sp, eval, &sp->re_c);
392			else
393				(void)sp->gp->scr_bell(sp);
394			break;
395		}
396
397		/* Check for a match starting past the cursor. */
398		if (coff != 0 && match[0].rm_so >= coff)
399			continue;
400
401		/* Warn if the search wrapped. */
402		if (wrapped && LF_ISSET(SEARCH_WMSG))
403			search_msg(sp, S_WRAP);
404
405#if defined(DEBUG) && 0
406		TRACE(sp, "B found: %qu to %qu\n",
407		    match[0].rm_so, match[0].rm_eo);
408#endif
409		/*
410		 * We now have the first match on the line.  Step through the
411		 * line character by character until find the last acceptable
412		 * match.  This is painful, we need a better interface to regex
413		 * to make this work.
414		 */
415		for (;;) {
416			last = match[0].rm_so++;
417			if (match[0].rm_so >= len)
418				break;
419			match[0].rm_eo = len;
420			eval = regexec(&sp->re_c, l, 1, match,
421			    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) |
422			    REG_STARTEND);
423			if (eval == REG_NOMATCH)
424				break;
425			if (eval != 0) {
426				if (LF_ISSET(SEARCH_MSG))
427					re_error(sp, eval, &sp->re_c);
428				else
429					(void)sp->gp->scr_bell(sp);
430				goto err;
431			}
432			if (coff && match[0].rm_so >= coff)
433				break;
434		}
435		rm->lno = lno;
436
437		/* See comment in f_search(). */
438		if (!LF_ISSET(SEARCH_EOL) && last >= len)
439			rm->cno = len != 0 ? len - 1 : 0;
440		else
441			rm->cno = last;
442		rval = 0;
443		break;
444	}
445
446err:	if (LF_ISSET(SEARCH_MSG))
447		search_busy(sp, BUSY_OFF);
448	return (rval);
449}
450
451/*
452 * search_msg --
453 *	Display one of the search messages.
454 */
455static void
456search_msg(
457	SCR *sp,
458	smsg_t msg)
459{
460	switch (msg) {
461	case S_EMPTY:
462		msgq(sp, M_ERR, "072|File empty; nothing to search");
463		break;
464	case S_EOF:
465		msgq(sp, M_ERR,
466		    "073|Reached end-of-file without finding the pattern");
467		break;
468	case S_NOPREV:
469		msgq(sp, M_ERR, "074|No previous search pattern");
470		break;
471	case S_NOTFOUND:
472		msgq(sp, M_ERR, "075|Pattern not found");
473		break;
474	case S_SOF:
475		msgq(sp, M_ERR,
476		    "076|Reached top-of-file without finding the pattern");
477		break;
478	case S_WRAP:
479		msgq(sp, M_ERR, "077|Search wrapped");
480		break;
481	default:
482		abort();
483	}
484}
485
486/*
487 * search_busy --
488 *	Put up the busy searching message.
489 *
490 * PUBLIC: void search_busy __P((SCR *, busy_t));
491 */
492void
493search_busy(
494	SCR *sp,
495	busy_t btype)
496{
497	sp->gp->scr_busy(sp, "078|Searching...", btype);
498}
499