119304Speter/*-
219304Speter * Copyright (c) 1992, 1993, 1994
319304Speter *	The Regents of the University of California.  All rights reserved.
419304Speter * Copyright (c) 1992, 1993, 1994, 1995, 1996
519304Speter *	Keith Bostic.  All rights reserved.
619304Speter *
719304Speter * See the LICENSE file for redistribution information.
819304Speter */
919304Speter
1019304Speter#include "config.h"
1119304Speter
1219304Speter#ifndef lint
13254225Speterstatic const char sccsid[] = "$Id: search.c,v 10.26 2011/07/04 20:16:26 zy Exp $";
1419304Speter#endif /* not lint */
1519304Speter
1619304Speter#include <sys/types.h>
1719304Speter#include <sys/queue.h>
18254225Speter#include <sys/time.h>
1919304Speter
2019304Speter#include <bitstring.h>
2119304Speter#include <ctype.h>
2219304Speter#include <errno.h>
2319304Speter#include <limits.h>
2419304Speter#include <stdio.h>
2519304Speter#include <stdlib.h>
2619304Speter#include <string.h>
2719304Speter#include <unistd.h>
2819304Speter
2919304Speter#include "common.h"
3019304Speter
3119304Spetertypedef enum { S_EMPTY, S_EOF, S_NOPREV, S_NOTFOUND, S_SOF, S_WRAP } smsg_t;
3219304Speter
3319304Speterstatic void	search_msg __P((SCR *, smsg_t));
34254225Speterstatic int	search_init __P((SCR *, dir_t, CHAR_T *, size_t, CHAR_T **, u_int));
3519304Speter
3619304Speter/*
3719304Speter * search_init --
3819304Speter *	Set up a search.
3919304Speter */
4019304Speterstatic int
41254225Spetersearch_init(
42254225Speter	SCR *sp,
43254225Speter	dir_t dir,
44254225Speter	CHAR_T *ptrn,
45254225Speter	size_t plen,
46254225Speter	CHAR_T **epp,
47254225Speter	u_int flags)
4819304Speter{
4919304Speter	recno_t lno;
5019304Speter	int delim;
51254225Speter	CHAR_T *p, *t;
5219304Speter
5319304Speter	/* If the file is empty, it's a fast search. */
5419304Speter	if (sp->lno <= 1) {
5519304Speter		if (db_last(sp, &lno))
5619304Speter			return (1);
5719304Speter		if (lno == 0) {
5819304Speter			if (LF_ISSET(SEARCH_MSG))
5919304Speter				search_msg(sp, S_EMPTY);
6019304Speter			return (1);
6119304Speter		}
6219304Speter	}
6319304Speter
6419304Speter	if (LF_ISSET(SEARCH_PARSE)) {		/* Parse the string. */
6519304Speter		/*
6619304Speter		 * Use the saved pattern if no pattern specified, or if only
6719304Speter		 * one or two delimiter characters specified.
6819304Speter		 *
6919304Speter		 * !!!
7019304Speter		 * Historically, only the pattern itself was saved, vi didn't
7119304Speter		 * preserve addressing or delta information.
7219304Speter		 */
7319304Speter		if (ptrn == NULL)
7419304Speter			goto prev;
7519304Speter		if (plen == 1) {
7619304Speter			if (epp != NULL)
7719304Speter				*epp = ptrn + 1;
7819304Speter			goto prev;
7919304Speter		}
8019304Speter		if (ptrn[0] == ptrn[1]) {
8119304Speter			if (epp != NULL)
8219304Speter				*epp = ptrn + 2;
8319304Speter
8419304Speter			/* Complain if we don't have a previous pattern. */
8519304Speterprev:			if (sp->re == NULL) {
8619304Speter				search_msg(sp, S_NOPREV);
8719304Speter				return (1);
8819304Speter			}
8919304Speter			/* Re-compile the search pattern if necessary. */
9019304Speter			if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
9119304Speter			    sp->re, sp->re_len, NULL, NULL, &sp->re_c,
9219304Speter			    RE_C_SEARCH |
9319304Speter			    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT)))
9419304Speter				return (1);
9519304Speter
9619304Speter			/* Set the search direction. */
9719304Speter			if (LF_ISSET(SEARCH_SET))
9819304Speter				sp->searchdir = dir;
9919304Speter			return (0);
10019304Speter		}
10119304Speter
10219304Speter		/*
10319304Speter		 * Set the delimiter, and move forward to the terminating
10419304Speter		 * delimiter, handling escaped delimiters.
10519304Speter		 *
10619304Speter		 * QUOTING NOTE:
10719304Speter		 * Only discard an escape character if it escapes a delimiter.
10819304Speter		 */
10919304Speter		for (delim = *ptrn, p = t = ++ptrn;; *t++ = *p++) {
11019304Speter			if (--plen == 0 || p[0] == delim) {
11119304Speter				if (plen != 0)
11219304Speter					++p;
11319304Speter				break;
11419304Speter			}
11519304Speter			if (plen > 1 && p[0] == '\\' && p[1] == delim) {
11619304Speter				++p;
11719304Speter				--plen;
11819304Speter			}
11919304Speter		}
12019304Speter		if (epp != NULL)
12119304Speter			*epp = p;
12219304Speter
12319304Speter		plen = t - ptrn;
12419304Speter	}
12519304Speter
12619304Speter	/* Compile the RE. */
12719304Speter	if (re_compile(sp, ptrn, plen, &sp->re, &sp->re_len, &sp->re_c,
12819304Speter	    RE_C_SEARCH |
12919304Speter	    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT) |
13019304Speter	    (LF_ISSET(SEARCH_TAG) ? RE_C_TAG : 0) |
13119304Speter	    (LF_ISSET(SEARCH_CSCOPE) ? RE_C_CSCOPE : 0)))
13219304Speter		return (1);
13319304Speter
13419304Speter	/* Set the search direction. */
13519304Speter	if (LF_ISSET(SEARCH_SET))
13619304Speter		sp->searchdir = dir;
13719304Speter
13819304Speter	return (0);
13919304Speter}
14019304Speter
14119304Speter/*
14219304Speter * f_search --
14319304Speter *	Do a forward search.
14419304Speter *
14519304Speter * PUBLIC: int f_search __P((SCR *,
146254225Speter * PUBLIC:    MARK *, MARK *, CHAR_T *, size_t, CHAR_T **, u_int));
14719304Speter */
14819304Speterint
149254225Speterf_search(
150254225Speter	SCR *sp,
151254225Speter	MARK *fm,
152254225Speter	MARK *rm,
153254225Speter	CHAR_T *ptrn,
154254225Speter	size_t plen,
155254225Speter	CHAR_T **eptrn,
156254225Speter	u_int flags)
15719304Speter{
15819304Speter	busy_t btype;
15919304Speter	recno_t lno;
16019304Speter	regmatch_t match[1];
16119304Speter	size_t coff, len;
16219304Speter	int cnt, eval, rval, wrapped;
163254225Speter	CHAR_T *l;
16419304Speter
16519304Speter	if (search_init(sp, FORWARD, ptrn, plen, eptrn, flags))
16619304Speter		return (1);
16719304Speter
16819304Speter	if (LF_ISSET(SEARCH_FILE)) {
16919304Speter		lno = 1;
17019304Speter		coff = 0;
17119304Speter	} else {
17219304Speter		if (db_get(sp, fm->lno, DBG_FATAL, &l, &len))
17319304Speter			return (1);
17419304Speter		lno = fm->lno;
17519304Speter
17619304Speter		/*
17719304Speter		 * If doing incremental search, start searching at the previous
17819304Speter		 * column, so that we search a minimal distance and still match
17919304Speter		 * special patterns, e.g., \< for beginning of a word.
18019304Speter		 *
18119304Speter		 * Otherwise, start searching immediately after the cursor.  If
18219304Speter		 * at the end of the line, start searching on the next line.
18319304Speter		 * This is incompatible (read bug fix) with the historic vi --
18419304Speter		 * searches for the '$' pattern never moved forward, and the
18519304Speter		 * "-t foo" didn't work if the 'f' was the first character in
18619304Speter		 * the file.
18719304Speter		 */
18819304Speter		if (LF_ISSET(SEARCH_INCR)) {
18919304Speter			if ((coff = fm->cno) != 0)
19019304Speter				--coff;
19119304Speter		} else if (fm->cno + 1 >= len) {
19219304Speter			coff = 0;
19319304Speter			lno = fm->lno + 1;
19419304Speter			if (db_get(sp, lno, 0, &l, &len)) {
19519304Speter				if (!O_ISSET(sp, O_WRAPSCAN)) {
19619304Speter					if (LF_ISSET(SEARCH_MSG))
19719304Speter						search_msg(sp, S_EOF);
19819304Speter					return (1);
19919304Speter				}
20019304Speter				lno = 1;
20119304Speter			}
20219304Speter		} else
20319304Speter			coff = fm->cno + 1;
20419304Speter	}
20519304Speter
20619304Speter	btype = BUSY_ON;
20719304Speter	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; ++lno, coff = 0) {
20819304Speter		if (cnt-- == 0) {
20919304Speter			if (INTERRUPTED(sp))
21019304Speter				break;
21119304Speter			if (LF_ISSET(SEARCH_MSG)) {
21219304Speter				search_busy(sp, btype);
21319304Speter				btype = BUSY_UPDATE;
21419304Speter			}
21519304Speter			cnt = INTERRUPT_CHECK;
21619304Speter		}
217254225Speter		if ((wrapped && lno > fm->lno) || db_get(sp, lno, 0, &l, &len)) {
21819304Speter			if (wrapped) {
21919304Speter				if (LF_ISSET(SEARCH_MSG))
22019304Speter					search_msg(sp, S_NOTFOUND);
22119304Speter				break;
22219304Speter			}
22319304Speter			if (!O_ISSET(sp, O_WRAPSCAN)) {
22419304Speter				if (LF_ISSET(SEARCH_MSG))
22519304Speter					search_msg(sp, S_EOF);
22619304Speter				break;
22719304Speter			}
22819304Speter			lno = 0;
22919304Speter			wrapped = 1;
23019304Speter			continue;
23119304Speter		}
23219304Speter
23319304Speter		/* If already at EOL, just keep going. */
23419304Speter		if (len != 0 && coff == len)
23519304Speter			continue;
23619304Speter
23719304Speter		/* Set the termination. */
23819304Speter		match[0].rm_so = coff;
23919304Speter		match[0].rm_eo = len;
24019304Speter
24119304Speter#if defined(DEBUG) && 0
24219304Speter		TRACE(sp, "F search: %lu from %u to %u\n",
24319304Speter		    lno, coff, len != 0 ? len - 1 : len);
24419304Speter#endif
24519304Speter		/* Search the line. */
24619304Speter		eval = regexec(&sp->re_c, l, 1, match,
24719304Speter		    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND);
24819304Speter		if (eval == REG_NOMATCH)
24919304Speter			continue;
25019304Speter		if (eval != 0) {
25119304Speter			if (LF_ISSET(SEARCH_MSG))
25219304Speter				re_error(sp, eval, &sp->re_c);
25319304Speter			else
25419304Speter				(void)sp->gp->scr_bell(sp);
25519304Speter			break;
25619304Speter		}
25719304Speter
25819304Speter		/* Warn if the search wrapped. */
25919304Speter		if (wrapped && LF_ISSET(SEARCH_WMSG))
26019304Speter			search_msg(sp, S_WRAP);
26119304Speter
26219304Speter#if defined(DEBUG) && 0
26319304Speter		TRACE(sp, "F search: %qu to %qu\n",
26419304Speter		    match[0].rm_so, match[0].rm_eo);
26519304Speter#endif
26619304Speter		rm->lno = lno;
26719304Speter		rm->cno = match[0].rm_so;
26819304Speter
26919304Speter		/*
27019304Speter		 * If a change command, it's possible to move beyond the end
27119304Speter		 * of a line.  Historic vi generally got this wrong (e.g. try
27219304Speter		 * "c?$<cr>").  Not all that sure this gets it right, there
27319304Speter		 * are lots of strange cases.
27419304Speter		 */
27519304Speter		if (!LF_ISSET(SEARCH_EOL) && rm->cno >= len)
27619304Speter			rm->cno = len != 0 ? len - 1 : 0;
27719304Speter
27819304Speter		rval = 0;
27919304Speter		break;
28019304Speter	}
28119304Speter
28219304Speter	if (LF_ISSET(SEARCH_MSG))
28319304Speter		search_busy(sp, BUSY_OFF);
28419304Speter	return (rval);
28519304Speter}
28619304Speter
28719304Speter/*
28819304Speter * b_search --
28919304Speter *	Do a backward search.
29019304Speter *
29119304Speter * PUBLIC: int b_search __P((SCR *,
292254225Speter * PUBLIC:    MARK *, MARK *, CHAR_T *, size_t, CHAR_T **, u_int));
29319304Speter */
29419304Speterint
295254225Speterb_search(
296254225Speter	SCR *sp,
297254225Speter	MARK *fm,
298254225Speter	MARK *rm,
299254225Speter	CHAR_T *ptrn,
300254225Speter	size_t plen,
301254225Speter	CHAR_T **eptrn,
302254225Speter	u_int flags)
30319304Speter{
30419304Speter	busy_t btype;
30519304Speter	recno_t lno;
30619304Speter	regmatch_t match[1];
30719304Speter	size_t coff, last, len;
30819304Speter	int cnt, eval, rval, wrapped;
309254225Speter	CHAR_T *l;
31019304Speter
31119304Speter	if (search_init(sp, BACKWARD, ptrn, plen, eptrn, flags))
31219304Speter		return (1);
31319304Speter
31419304Speter	/*
31519304Speter	 * If doing incremental search, set the "starting" position past the
31619304Speter	 * current column, so that we search a minimal distance and still
31719304Speter	 * match special patterns, e.g., \> for the end of a word.  This is
31819304Speter	 * safe when the cursor is at the end of a line because we only use
31919304Speter	 * it for comparison with the location of the match.
32019304Speter	 *
32119304Speter	 * Otherwise, start searching immediately before the cursor.  If in
32219304Speter	 * the first column, start search on the previous line.
32319304Speter	 */
32419304Speter	if (LF_ISSET(SEARCH_INCR)) {
32519304Speter		lno = fm->lno;
32619304Speter		coff = fm->cno + 1;
32719304Speter	} else {
32819304Speter		if (fm->cno == 0) {
32919304Speter			if (fm->lno == 1 && !O_ISSET(sp, O_WRAPSCAN)) {
33019304Speter				if (LF_ISSET(SEARCH_MSG))
33119304Speter					search_msg(sp, S_SOF);
33219304Speter				return (1);
33319304Speter			}
33419304Speter			lno = fm->lno - 1;
33519304Speter		} else
33619304Speter			lno = fm->lno;
33719304Speter		coff = fm->cno;
33819304Speter	}
33919304Speter
34019304Speter	btype = BUSY_ON;
34119304Speter	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; --lno, coff = 0) {
34219304Speter		if (cnt-- == 0) {
34319304Speter			if (INTERRUPTED(sp))
34419304Speter				break;
34519304Speter			if (LF_ISSET(SEARCH_MSG)) {
34619304Speter				search_busy(sp, btype);
34719304Speter				btype = BUSY_UPDATE;
34819304Speter			}
34919304Speter			cnt = INTERRUPT_CHECK;
35019304Speter		}
351254225Speter		if ((wrapped && lno < fm->lno) || lno == 0) {
35219304Speter			if (wrapped) {
35319304Speter				if (LF_ISSET(SEARCH_MSG))
35419304Speter					search_msg(sp, S_NOTFOUND);
35519304Speter				break;
35619304Speter			}
35719304Speter			if (!O_ISSET(sp, O_WRAPSCAN)) {
35819304Speter				if (LF_ISSET(SEARCH_MSG))
35919304Speter					search_msg(sp, S_SOF);
36019304Speter				break;
36119304Speter			}
36219304Speter			if (db_last(sp, &lno))
36319304Speter				break;
36419304Speter			if (lno == 0) {
36519304Speter				if (LF_ISSET(SEARCH_MSG))
36619304Speter					search_msg(sp, S_EMPTY);
36719304Speter				break;
36819304Speter			}
36919304Speter			++lno;
37019304Speter			wrapped = 1;
37119304Speter			continue;
37219304Speter		}
37319304Speter
37419304Speter		if (db_get(sp, lno, 0, &l, &len))
37519304Speter			break;
37619304Speter
37719304Speter		/* Set the termination. */
37819304Speter		match[0].rm_so = 0;
37919304Speter		match[0].rm_eo = len;
38019304Speter
38119304Speter#if defined(DEBUG) && 0
38219304Speter		TRACE(sp, "B search: %lu from 0 to %qu\n", lno, match[0].rm_eo);
38319304Speter#endif
38419304Speter		/* Search the line. */
38519304Speter		eval = regexec(&sp->re_c, l, 1, match,
38619304Speter		    (match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND);
38719304Speter		if (eval == REG_NOMATCH)
38819304Speter			continue;
38919304Speter		if (eval != 0) {
39019304Speter			if (LF_ISSET(SEARCH_MSG))
39119304Speter				re_error(sp, eval, &sp->re_c);
39219304Speter			else
39319304Speter				(void)sp->gp->scr_bell(sp);
39419304Speter			break;
39519304Speter		}
39619304Speter
39719304Speter		/* Check for a match starting past the cursor. */
39819304Speter		if (coff != 0 && match[0].rm_so >= coff)
39919304Speter			continue;
40019304Speter
40119304Speter		/* Warn if the search wrapped. */
40219304Speter		if (wrapped && LF_ISSET(SEARCH_WMSG))
40319304Speter			search_msg(sp, S_WRAP);
40419304Speter
40519304Speter#if defined(DEBUG) && 0
40619304Speter		TRACE(sp, "B found: %qu to %qu\n",
40719304Speter		    match[0].rm_so, match[0].rm_eo);
40819304Speter#endif
40919304Speter		/*
41019304Speter		 * We now have the first match on the line.  Step through the
41119304Speter		 * line character by character until find the last acceptable
41219304Speter		 * match.  This is painful, we need a better interface to regex
41319304Speter		 * to make this work.
41419304Speter		 */
41519304Speter		for (;;) {
41619304Speter			last = match[0].rm_so++;
41719304Speter			if (match[0].rm_so >= len)
41819304Speter				break;
41919304Speter			match[0].rm_eo = len;
42019304Speter			eval = regexec(&sp->re_c, l, 1, match,
42119304Speter			    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) |
42219304Speter			    REG_STARTEND);
42319304Speter			if (eval == REG_NOMATCH)
42419304Speter				break;
42519304Speter			if (eval != 0) {
42619304Speter				if (LF_ISSET(SEARCH_MSG))
42719304Speter					re_error(sp, eval, &sp->re_c);
42819304Speter				else
42919304Speter					(void)sp->gp->scr_bell(sp);
43019304Speter				goto err;
43119304Speter			}
43219304Speter			if (coff && match[0].rm_so >= coff)
43319304Speter				break;
43419304Speter		}
43519304Speter		rm->lno = lno;
43619304Speter
43719304Speter		/* See comment in f_search(). */
43819304Speter		if (!LF_ISSET(SEARCH_EOL) && last >= len)
43919304Speter			rm->cno = len != 0 ? len - 1 : 0;
44019304Speter		else
44119304Speter			rm->cno = last;
44219304Speter		rval = 0;
44319304Speter		break;
44419304Speter	}
44519304Speter
44619304Spetererr:	if (LF_ISSET(SEARCH_MSG))
44719304Speter		search_busy(sp, BUSY_OFF);
44819304Speter	return (rval);
44919304Speter}
45019304Speter
45119304Speter/*
45219304Speter * search_msg --
45319304Speter *	Display one of the search messages.
45419304Speter */
45519304Speterstatic void
456254225Spetersearch_msg(
457254225Speter	SCR *sp,
458254225Speter	smsg_t msg)
45919304Speter{
46019304Speter	switch (msg) {
46119304Speter	case S_EMPTY:
46219304Speter		msgq(sp, M_ERR, "072|File empty; nothing to search");
46319304Speter		break;
46419304Speter	case S_EOF:
46519304Speter		msgq(sp, M_ERR,
46619304Speter		    "073|Reached end-of-file without finding the pattern");
46719304Speter		break;
46819304Speter	case S_NOPREV:
46919304Speter		msgq(sp, M_ERR, "074|No previous search pattern");
47019304Speter		break;
47119304Speter	case S_NOTFOUND:
47219304Speter		msgq(sp, M_ERR, "075|Pattern not found");
47319304Speter		break;
47419304Speter	case S_SOF:
47519304Speter		msgq(sp, M_ERR,
47619304Speter		    "076|Reached top-of-file without finding the pattern");
47719304Speter		break;
47819304Speter	case S_WRAP:
47919304Speter		msgq(sp, M_ERR, "077|Search wrapped");
48019304Speter		break;
48119304Speter	default:
48219304Speter		abort();
48319304Speter	}
48419304Speter}
48519304Speter
48619304Speter/*
48719304Speter * search_busy --
48819304Speter *	Put up the busy searching message.
48919304Speter *
49019304Speter * PUBLIC: void search_busy __P((SCR *, busy_t));
49119304Speter */
49219304Spetervoid
493254225Spetersearch_busy(
494254225Speter	SCR *sp,
495254225Speter	busy_t btype)
49619304Speter{
49719304Speter	sp->gp->scr_busy(sp, "078|Searching...", btype);
49819304Speter}
499