1/*	$NetBSD: v_search.c,v 1.6 2014/01/26 21:43:45 christos Exp $ */
2/*-
3 * Copyright (c) 1992, 1993, 1994
4 *	The Regents of the University of California.  All rights reserved.
5 * Copyright (c) 1992, 1993, 1994, 1995, 1996
6 *	Keith Bostic.  All rights reserved.
7 *
8 * See the LICENSE file for redistribution information.
9 */
10
11#include "config.h"
12
13#include <sys/cdefs.h>
14#if 0
15#ifndef lint
16static const char sccsid[] = "Id: v_search.c,v 10.30 2001/09/11 20:52:46 skimo Exp  (Berkeley) Date: 2001/09/11 20:52:46 ";
17#endif /* not lint */
18#else
19__RCSID("$NetBSD: v_search.c,v 1.6 2014/01/26 21:43:45 christos Exp $");
20#endif
21
22#include <sys/types.h>
23#include <sys/queue.h>
24#include <sys/time.h>
25
26#include <bitstring.h>
27#include <ctype.h>
28#include <errno.h>
29#include <limits.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33
34#include "../common/common.h"
35#include "vi.h"
36#include "../ipc/ip.h"
37
38static int v_exaddr __P((SCR *, VICMD *, dir_t));
39static int v_search __P((SCR *, VICMD *, CHAR_T *, size_t, u_int, dir_t));
40
41/*
42 * v_srch -- [count]?RE[? offset]
43 *	Ex address search backward.
44 *
45 * PUBLIC: int v_searchb __P((SCR *, VICMD *));
46 */
47int
48v_searchb(SCR *sp, VICMD *vp)
49{
50	return (v_exaddr(sp, vp, BACKWARD));
51}
52
53/*
54 * v_searchf -- [count]/RE[/ offset]
55 *	Ex address search forward.
56 *
57 * PUBLIC: int v_searchf __P((SCR *, VICMD *));
58 */
59int
60v_searchf(SCR *sp, VICMD *vp)
61{
62	return (v_exaddr(sp, vp, FORWARD));
63}
64
65/*
66 * v_exaddr --
67 *	Do a vi search (which is really an ex address).
68 */
69static int
70v_exaddr(SCR *sp, VICMD *vp, dir_t dir)
71{
72	static EXCMDLIST fake = { .name = L("search") };
73	EXCMD *cmdp;
74	WIN *wp;
75	TEXT *tp;
76	db_recno_t s_lno;
77	size_t len, s_cno, tlen;
78	int error, nb, type;
79	char buf[20];
80	CHAR_T *cmd, *t;
81	const CHAR_T *w;
82	size_t wlen;
83
84	/*
85	 * !!!
86	 * If using the search command as a motion, any addressing components
87	 * are lost, i.e. y/ptrn/+2, when repeated, is the same as y/ptrn/.
88	 */
89	if (F_ISSET(vp, VC_ISDOT))
90		return (v_search(sp, vp,
91		    NULL, 0, SEARCH_PARSE | SEARCH_MSG | SEARCH_SET, dir));
92
93	/* Get the search pattern. */
94	if (v_tcmd(sp, vp, dir == BACKWARD ? CH_BSEARCH : CH_FSEARCH,
95	    TXT_BS | TXT_CR | TXT_ESCAPE | TXT_PROMPT |
96	    (O_ISSET(sp, O_SEARCHINCR) ? TXT_SEARCHINCR : 0)))
97		return (1);
98
99	tp = TAILQ_FIRST(&sp->tiq);
100
101	/* If the user backspaced over the prompt, do nothing. */
102	if (tp->term == TERM_BS)
103		return (1);
104
105	/*
106	 * If the user was doing an incremental search, then we've already
107	 * updated the cursor and moved to the right location.  Return the
108	 * correct values, we're done.
109	 */
110	if (tp->term == TERM_SEARCH) {
111		vp->m_stop.lno = sp->lno;
112		vp->m_stop.cno = sp->cno;
113		if (ISMOTION(vp))
114			return (v_correct(sp, vp, 0));
115		vp->m_final = vp->m_stop;
116		return (0);
117	}
118
119	/*
120	 * If the user entered <escape> or <carriage-return>, the length is
121	 * 1 and the right thing will happen, i.e. the prompt will be used
122	 * as a command character.
123	 *
124	 * Build a fake ex command structure.
125	 */
126	wp = sp->wp;
127	wp->excmd.cp = tp->lb;
128	wp->excmd.clen = tp->len;
129	F_INIT(&wp->excmd, E_VISEARCH);
130
131	/*
132	 * XXX
133	 * Warn if the search wraps.  This is a pretty special case, but it's
134	 * nice feature that wasn't in the original implementations of ex/vi.
135	 * (It was added at some point to System V's version.)  This message
136	 * is only displayed if there are no keys in the queue. The problem is
137	 * the command is going to succeed, and the message is informational,
138	 * not an error.  If a macro displays it repeatedly, e.g., the pattern
139	 * only occurs once in the file and wrapscan is set, you lose big.  For
140	 * example, if the macro does something like:
141	 *
142	 *	:map K /pattern/^MjK
143	 *
144	 * Each search will display the message, but the following "/pattern/"
145	 * will immediately overwrite it, with strange results.  The System V
146	 * vi displays the "wrapped" message multiple times, but because it's
147	 * overwritten each time, it's not as noticeable.  As we don't discard
148	 * messages, it's a real problem for us.
149	 */
150	if (!KEYS_WAITING(sp))
151		F_SET(&wp->excmd, E_SEARCH_WMSG);
152
153	/* Save the current line/column. */
154	s_lno = sp->lno;
155	s_cno = sp->cno;
156
157	/*
158	 * !!!
159	 * Historically, vi / and ? commands were full-blown ex addresses,
160	 * including ';' delimiters, trailing <blank>'s, multiple search
161	 * strings (separated by semi-colons) and, finally, full-blown z
162	 * commands after the / and ? search strings.  (If the search was
163	 * being used as a motion, the trailing z command was ignored.
164	 * Also, we do some argument checking on the z command, to be sure
165	 * that it's not some other random command.) For multiple search
166	 * strings, leading <blank>'s at the second and subsequent strings
167	 * were eaten as well.  This has some (unintended?) side-effects:
168	 * the command /ptrn/;3 is legal and results in moving to line 3.
169	 * I suppose you could use it to optionally move to line 3...
170	 *
171	 * !!!
172	 * Historically, if any part of the search command failed, the cursor
173	 * remained unmodified (even if ; was used).  We have to play games
174	 * because the underlying ex parser thinks we're modifying the cursor
175	 * as we go, but I think we're compatible with historic practice.
176	 *
177	 * !!!
178	 * Historically, the command "/STRING/;   " failed, apparently it
179	 * confused the parser.  We're not that compatible.
180	 */
181	cmdp = &wp->excmd;
182	if (ex_range(sp, cmdp, &error))
183		return (1);
184
185	/*
186	 * Remember where any remaining command information is, and clean
187	 * up the fake ex command.
188	 */
189	cmd = cmdp->cp;
190	len = cmdp->clen;
191	wp->excmd.clen = 0;
192
193	if (error)
194		goto err2;
195
196	/* Copy out the new cursor position and make sure it's okay. */
197	switch (cmdp->addrcnt) {
198	case 1:
199		vp->m_stop = cmdp->addr1;
200		break;
201	case 2:
202		vp->m_stop = cmdp->addr2;
203		break;
204	}
205	if (!db_exist(sp, vp->m_stop.lno)) {
206		ex_badaddr(sp, &fake,
207		    vp->m_stop.lno == 0 ? A_ZERO : A_EOF, NUM_OK);
208		goto err2;
209	}
210
211	/*
212	 * !!!
213	 * Historic practice is that a trailing 'z' was ignored if it was a
214	 * motion command.  Should probably be an error, but not worth the
215	 * effort.
216	 */
217	if (ISMOTION(vp))
218		return (v_correct(sp, vp, F_ISSET(cmdp, E_DELTA)));
219
220	/*
221	 * !!!
222	 * Historically, if it wasn't a motion command, a delta in the search
223	 * pattern turns it into a first nonblank movement.
224	 */
225	nb = F_ISSET(cmdp, E_DELTA);
226
227	/* Check for the 'z' command. */
228	if (len != 0) {
229		if (*cmd != 'z')
230			goto err1;
231
232		/* No blanks, just like the z command. */
233		for (t = cmd + 1, tlen = len - 1; tlen > 0; ++t, --tlen)
234			if (!ISDIGIT((UCHAR_T)*t))
235				break;
236		if (tlen &&
237		    (*t == '-' || *t == '.' || *t == '+' || *t == '^')) {
238			++t;
239			--tlen;
240			type = 1;
241		} else
242			type = 0;
243		if (tlen)
244			goto err1;
245
246		/* The z command will do the nonblank for us. */
247		nb = 0;
248
249		/* Default to z+. */
250		if (!type &&
251		    v_event_push(sp, NULL, L("+"), 1, CH_NOMAP | CH_QUOTED))
252			return (1);
253
254		/* Push the user's command. */
255		if (v_event_push(sp, NULL, cmd, len, CH_NOMAP | CH_QUOTED))
256			return (1);
257
258		/* Push line number so get correct z display. */
259		tlen = snprintf(buf,
260		    sizeof(buf), "%lu", (u_long)vp->m_stop.lno);
261		CHAR2INT(sp, buf, tlen, w, wlen);
262		if (v_event_push(sp, NULL, w, wlen, CH_NOMAP | CH_QUOTED))
263			return (1);
264
265		/* Don't refresh until after 'z' happens. */
266		F_SET(VIP(sp), VIP_S_REFRESH);
267	}
268
269	/* Non-motion commands move to the end of the range. */
270	vp->m_final = vp->m_stop;
271	if (nb) {
272		F_CLR(vp, VM_RCM_MASK);
273		F_SET(vp, VM_RCM_SETFNB);
274	}
275	return (0);
276
277err1:	msgq(sp, M_ERR,
278	    "188|Characters after search string, line offset and/or z command");
279err2:	vp->m_final.lno = s_lno;
280	vp->m_final.cno = s_cno;
281	return (1);
282}
283
284/*
285 * v_searchN -- N
286 *	Reverse last search.
287 *
288 * PUBLIC: int v_searchN __P((SCR *, VICMD *));
289 */
290int
291v_searchN(SCR *sp, VICMD *vp)
292{
293	dir_t dir;
294
295	switch (sp->searchdir) {
296	case BACKWARD:
297		dir = FORWARD;
298		break;
299	case FORWARD:
300		dir = BACKWARD;
301		break;
302	default:
303		dir = sp->searchdir;
304		break;
305	}
306	return (v_search(sp, vp, NULL, 0, SEARCH_PARSE, dir));
307}
308
309/*
310 * v_searchn -- n
311 *	Repeat last search.
312 *
313 * PUBLIC: int v_searchn __P((SCR *, VICMD *));
314 */
315int
316v_searchn(SCR *sp, VICMD *vp)
317{
318	return (v_search(sp, vp, NULL, 0, SEARCH_PARSE, sp->searchdir));
319}
320
321/*
322 * is_especial --
323 *	Test if the character is special in an extended RE.
324 */
325static int
326is_especial(CHAR_T c)
327{
328	/*
329	 * !!!
330	 * Right-brace is not an ERE special according to IEEE 1003.1-2001.
331	 * Right-parenthesis is a special character (so quoting doesn't hurt),
332	 * though it has no special meaning in this context, viz. at the
333	 * beginning of the string.  So we need not quote it.  Then again,
334	 * see the BUGS section in regex/re_format.7.
335	 * The tilde is vi-specific, of course.
336	 */
337	return (STRCHR(L(".[\\()*+?{|^$~"), c) && c);
338}
339
340/*
341 * Rear delimiter for word search when the keyword ends in
342 * (i.e., consists of) a non-word character.  See v_searchw below.
343 */
344#define RE_NWSTOP	L("([^[:alnum:]_]|$)")
345#define RE_NWSTOP_LEN	(SIZE(RE_NWSTOP) - 1)
346
347/*
348 * v_searchw -- [count]^A
349 *	Search for the word under the cursor.
350 *
351 * PUBLIC: int v_searchw __P((SCR *, VICMD *));
352 */
353int
354v_searchw(SCR *sp, VICMD *vp)
355{
356	size_t blen, len;
357	int rval;
358	CHAR_T *bp, *p;
359
360	len = VIP(sp)->klen + MAX(RE_WSTART_LEN, 1)
361	    + MAX(RE_WSTOP_LEN, RE_NWSTOP_LEN);
362
363	GET_SPACE_RETW(sp, bp, blen, len);
364	p = bp;
365
366	/* Only the first character can be non-word, see v_curword. */
367	if (inword(VIP(sp)->keyw[0]))
368		p = MEMPCPY(p, RE_WSTART, RE_WSTART_LEN);
369	else if (is_especial(VIP(sp)->keyw[0]))
370		p = MEMPCPY(p, L("\\"), 1);
371
372	p = MEMPCPY(p, VIP(sp)->keyw, VIP(sp)->klen);
373
374	if (inword(p[-1]))
375		p = MEMPCPY(p, RE_WSTOP, RE_WSTOP_LEN);
376	else
377		/*
378		 * The keyword is a single non-word character.
379		 * We want it to stay the same when typing ^A several times
380		 * in a row, just the way the other cases behave.
381		 */
382		p = MEMPCPY(p, RE_NWSTOP, RE_NWSTOP_LEN);
383
384	len = p - bp;
385	rval = v_search(sp, vp, bp, len, SEARCH_SET | SEARCH_EXTEND, FORWARD);
386
387	FREE_SPACEW(sp, bp, blen);
388	return (rval);
389}
390
391/*
392 * v_esearch -- <dialog box>
393 *	Search command from the screen.
394 *
395 * PUBLIC: int v_esearch __P((SCR *, VICMD *));
396 */
397int
398v_esearch(SCR *sp, VICMD *vp)
399{
400	int flags;
401
402	LF_INIT(SEARCH_NOOPT);
403	if (FL_ISSET(vp->ev.e_flags, VI_SEARCH_EXT))
404		LF_SET(SEARCH_EXTEND);
405	if (FL_ISSET(vp->ev.e_flags, VI_SEARCH_IC))
406		LF_SET(SEARCH_IC);
407	if (FL_ISSET(vp->ev.e_flags, VI_SEARCH_ICL))
408		LF_SET(SEARCH_ICL);
409	if (FL_ISSET(vp->ev.e_flags, VI_SEARCH_INCR))
410		LF_SET(SEARCH_INCR);
411	if (FL_ISSET(vp->ev.e_flags, VI_SEARCH_LIT))
412		LF_SET(SEARCH_LITERAL);
413	if (FL_ISSET(vp->ev.e_flags, VI_SEARCH_WR))
414		LF_SET(SEARCH_WRAP);
415	return (v_search(sp, vp, vp->ev.e_csp, vp->ev.e_len, flags,
416	    FL_ISSET(vp->ev.e_flags, VI_SEARCH_REV) ? BACKWARD : FORWARD));
417}
418
419/*
420 * v_search --
421 *	The search commands.
422 */
423static int
424v_search(SCR *sp, VICMD *vp, CHAR_T *ptrn, size_t plen, u_int flags, dir_t dir)
425{
426	/* Display messages. */
427	LF_SET(SEARCH_MSG);
428
429	/* If it's a motion search, offset past end-of-line is okay. */
430	if (ISMOTION(vp))
431		LF_SET(SEARCH_EOL);
432
433	/*
434	 * XXX
435	 * Warn if the search wraps.  See the comment above, in v_exaddr().
436	 */
437	if (!KEYS_WAITING(sp))
438		LF_SET(SEARCH_WMSG);
439
440	switch (dir) {
441	case BACKWARD:
442		if (b_search(sp,
443		    &vp->m_start, &vp->m_stop, ptrn, plen, NULL, flags))
444			return (1);
445		break;
446	case FORWARD:
447		if (f_search(sp,
448		    &vp->m_start, &vp->m_stop, ptrn, plen, NULL, flags))
449			return (1);
450		break;
451	case NOTSET:
452		msgq(sp, M_ERR, "189|No previous search pattern");
453		return (1);
454	default:
455		abort();
456	}
457
458	/* Correct motion commands, otherwise, simply move to the location. */
459	if (ISMOTION(vp)) {
460		if (v_correct(sp, vp, 0))
461			return(1);
462	} else
463		vp->m_final = vp->m_stop;
464	return (0);
465}
466
467/*
468 * v_correct --
469 *	Handle command with a search as the motion.
470 *
471 * !!!
472 * Historically, commands didn't affect the line searched to/from if the
473 * motion command was a search and the final position was the start/end
474 * of the line.  There were some special cases and vi was not consistent;
475 * it was fairly easy to confuse it.  For example, given the two lines:
476 *
477 *	abcdefghi
478 *	ABCDEFGHI
479 *
480 * placing the cursor on the 'A' and doing y?$ would so confuse it that 'h'
481 * 'k' and put would no longer work correctly.  In any case, we try to do
482 * the right thing, but it's not going to exactly match historic practice.
483 *
484 * PUBLIC: int v_correct __P((SCR *, VICMD *, int));
485 */
486int
487v_correct(SCR *sp, VICMD *vp, int isdelta)
488{
489	MARK m;
490	size_t len;
491
492	/*
493	 * !!!
494	 * We may have wrapped if wrapscan was set, and we may have returned
495	 * to the position where the cursor started.  Historic vi didn't cope
496	 * with this well.  Yank wouldn't beep, but the first put after the
497	 * yank would move the cursor right one column (without adding any
498	 * text) and the second would put a copy of the current line.  The
499	 * change and delete commands would beep, but would leave the cursor
500	 * on the colon command line.  I believe that there are macros that
501	 * depend on delete, at least, failing.  For now, commands that use
502	 * search as a motion component fail when the search returns to the
503	 * original cursor position.
504	 */
505	if (vp->m_start.lno == vp->m_stop.lno &&
506	    vp->m_start.cno == vp->m_stop.cno) {
507		msgq(sp, M_BERR, "190|Search wrapped to original position");
508		return (1);
509	}
510
511	/*
512	 * !!!
513	 * Searches become line mode operations if there was a delta specified
514	 * to the search pattern.
515	 */
516	if (isdelta)
517		F_SET(vp, VM_LMODE);
518
519	/*
520	 * If the motion is in the reverse direction, switch the start and
521	 * stop MARK's so that it's in a forward direction.  (There's no
522	 * reason for this other than to make the tests below easier.  The
523	 * code in vi.c:vi() would have done the switch.)  Both forward
524	 * and backward motions can happen for any kind of search command
525	 * because of the wrapscan option.
526	 */
527	if (vp->m_start.lno > vp->m_stop.lno ||
528	    (vp->m_start.lno == vp->m_stop.lno &&
529	    vp->m_start.cno > vp->m_stop.cno)) {
530		m = vp->m_start;
531		vp->m_start = vp->m_stop;
532		vp->m_stop = m;
533	}
534
535	/*
536	 * BACKWARD:
537	 *	Delete and yank commands move to the end of the range.
538	 *	Ignore others.
539	 *
540	 * FORWARD:
541	 *	Delete and yank commands don't move.  Ignore others.
542	 */
543	vp->m_final = vp->m_start;
544
545	/*
546	 * !!!
547	 * Delta'd searches don't correct based on column positions.
548	 */
549	if (isdelta)
550		return (0);
551
552	/*
553	 * !!!
554	 * Backward searches starting at column 0, and forward searches ending
555	 * at column 0 are corrected to the last column of the previous line.
556	 * Otherwise, adjust the starting/ending point to the character before
557	 * the current one (this is safe because we know the search had to move
558	 * to succeed).
559	 *
560	 * Searches become line mode operations if they start at the first
561	 * nonblank and end at column 0 of another line.
562	 */
563	if (vp->m_start.lno < vp->m_stop.lno && vp->m_stop.cno == 0) {
564		if (db_get(sp, --vp->m_stop.lno, DBG_FATAL, NULL, &len))
565			return (1);
566		vp->m_stop.cno = len ? len - 1 : 0;
567		len = 0;
568		if (nonblank(sp, vp->m_start.lno, &len))
569			return (1);
570		if (vp->m_start.cno <= len)
571			F_SET(vp, VM_LMODE);
572	} else
573		--vp->m_stop.cno;
574
575	return (0);
576}
577