1/*-
2 * Copyright (c) 1992, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 * Copyright (c) 1992, 1993, 1994, 1995, 1996
5 *	Keith Bostic.  All rights reserved.
6 *
7 * See the LICENSE file for redistribution information.
8 */
9
10#include "config.h"
11
12#ifndef lint
13static const char sccsid[] = "$Id: v_word.c,v 10.7 2011/12/27 00:49:31 zy Exp $";
14#endif /* not lint */
15
16#include <sys/types.h>
17#include <sys/queue.h>
18#include <sys/time.h>
19
20#include <bitstring.h>
21#include <ctype.h>
22#include <limits.h>
23#include <stdio.h>
24
25#include "../common/common.h"
26#include "vi.h"
27
28/*
29 * There are two types of "words".  Bigwords are easy -- groups of anything
30 * delimited by whitespace.  Normal words are trickier.  They are either a
31 * group of characters, numbers and underscores, or a group of anything but,
32 * delimited by whitespace.  When for a word, if you're in whitespace, it's
33 * easy, just remove the whitespace and go to the beginning or end of the
34 * word.  Otherwise, figure out if the next character is in a different group.
35 * If it is, go to the beginning or end of that group, otherwise, go to the
36 * beginning or end of the current group.  The historic version of vi didn't
37 * get this right, so, for example, there were cases where "4e" was not the
38 * same as "eeee" -- in particular, single character words, and commands that
39 * began in whitespace were almost always handled incorrectly.  To get it right
40 * you have to resolve the cursor after each search so that the look-ahead to
41 * figure out what type of "word" the cursor is in will be correct.
42 *
43 * Empty lines, and lines that consist of only white-space characters count
44 * as a single word, and the beginning and end of the file counts as an
45 * infinite number of words.
46 *
47 * Movements associated with commands are different than movement commands.
48 * For example, in "abc  def", with the cursor on the 'a', "cw" is from
49 * 'a' to 'c', while "w" is from 'a' to 'd'.  In general, trailing white
50 * space is discarded from the change movement.  Another example is that,
51 * in the same string, a "cw" on any white space character replaces that
52 * single character, and nothing else.  Ain't nothin' in here that's easy.
53 *
54 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
55 * would treat groups of empty lines as individual words, i.e. the command
56 * would move the cursor to each new empty line.  The 'e' and 'E' commands
57 * would treat groups of empty lines as a single word, i.e. the first use
58 * would move past the group of lines.  The 'b' command would just beep at
59 * you, or, if you did it from the start of the line as part of a motion
60 * command, go absolutely nuts.  If the lines contained only white-space
61 * characters, the 'w' and 'W' commands would just beep at you, and the 'B',
62 * 'b', 'E' and 'e' commands would treat the group as a single word, and
63 * the 'B' and 'b' commands will treat the lines as individual words.  This
64 * implementation treats all of these cases as a single white-space word.
65 */
66
67enum which {BIGWORD, LITTLEWORD};
68
69static int bword __P((SCR *, VICMD *, enum which));
70static int eword __P((SCR *, VICMD *, enum which));
71static int fword __P((SCR *, VICMD *, enum which));
72
73/*
74 * v_wordW -- [count]W
75 *	Move forward a bigword at a time.
76 *
77 * PUBLIC: int v_wordW __P((SCR *, VICMD *));
78 */
79int
80v_wordW(SCR *sp, VICMD *vp)
81{
82	return (fword(sp, vp, BIGWORD));
83}
84
85/*
86 * v_wordw -- [count]w
87 *	Move forward a word at a time.
88 *
89 * PUBLIC: int v_wordw __P((SCR *, VICMD *));
90 */
91int
92v_wordw(SCR *sp, VICMD *vp)
93{
94	return (fword(sp, vp, LITTLEWORD));
95}
96
97/*
98 * fword --
99 *	Move forward by words.
100 */
101static int
102fword(SCR *sp, VICMD *vp, enum which type)
103{
104	enum { INWORD, NOTWORD } state;
105	VCS cs;
106	u_long cnt;
107
108	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
109	cs.cs_lno = vp->m_start.lno;
110	cs.cs_cno = vp->m_start.cno;
111	if (cs_init(sp, &cs))
112		return (1);
113
114	/*
115	 * If in white-space:
116	 *	If the count is 1, and it's a change command, we're done.
117	 *	Else, move to the first non-white-space character, which
118	 *	counts as a single word move.  If it's a motion command,
119	 *	don't move off the end of the line.
120	 */
121	if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && ISBLANK(cs.cs_ch))) {
122		if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) {
123			if (ISCMD(vp->rkp, 'c'))
124				return (0);
125			if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) {
126				if (cs_fspace(sp, &cs))
127					return (1);
128				goto ret;
129			}
130		}
131		if (cs_fblank(sp, &cs))
132			return (1);
133		--cnt;
134	}
135
136	/*
137	 * Cyclically move to the next word -- this involves skipping
138	 * over word characters and then any trailing non-word characters.
139	 * Note, for the 'w' command, the definition of a word keeps
140	 * switching.
141	 */
142	if (type == BIGWORD)
143		while (cnt--) {
144			for (;;) {
145				if (cs_next(sp, &cs))
146					return (1);
147				if (cs.cs_flags == CS_EOF)
148					goto ret;
149				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
150					break;
151			}
152			/*
153			 * If a motion command and we're at the end of the
154			 * last word, we're done.  Delete and yank eat any
155			 * trailing blanks, but we don't move off the end
156			 * of the line regardless.
157			 */
158			if (cnt == 0 && ISMOTION(vp)) {
159				if ((ISCMD(vp->rkp, 'd') ||
160				    ISCMD(vp->rkp, 'y')) &&
161				    cs_fspace(sp, &cs))
162					return (1);
163				break;
164			}
165
166			/* Eat whitespace characters. */
167			if (cs_fblank(sp, &cs))
168				return (1);
169			if (cs.cs_flags == CS_EOF)
170				goto ret;
171		}
172	else
173		while (cnt--) {
174			state = cs.cs_flags == 0 &&
175			    inword(cs.cs_ch) ? INWORD : NOTWORD;
176			for (;;) {
177				if (cs_next(sp, &cs))
178					return (1);
179				if (cs.cs_flags == CS_EOF)
180					goto ret;
181				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
182					break;
183				if (state == INWORD) {
184					if (!inword(cs.cs_ch))
185						break;
186				} else
187					if (inword(cs.cs_ch))
188						break;
189			}
190			/* See comment above. */
191			if (cnt == 0 && ISMOTION(vp)) {
192				if ((ISCMD(vp->rkp, 'd') ||
193				    ISCMD(vp->rkp, 'y')) &&
194				    cs_fspace(sp, &cs))
195					return (1);
196				break;
197			}
198
199			/* Eat whitespace characters. */
200			if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
201				if (cs_fblank(sp, &cs))
202					return (1);
203			if (cs.cs_flags == CS_EOF)
204				goto ret;
205		}
206
207	/*
208	 * If we didn't move, we must be at EOF.
209	 *
210	 * !!!
211	 * That's okay for motion commands, however.
212	 */
213ret:	if (!ISMOTION(vp) &&
214	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
215		v_eof(sp, &vp->m_start);
216		return (1);
217	}
218
219	/* Adjust the end of the range for motion commands. */
220	vp->m_stop.lno = cs.cs_lno;
221	vp->m_stop.cno = cs.cs_cno;
222	if (ISMOTION(vp) && cs.cs_flags == 0)
223		--vp->m_stop.cno;
224
225	/*
226	 * Non-motion commands move to the end of the range.  Delete
227	 * and yank stay at the start, ignore others.
228	 */
229	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
230	return (0);
231}
232
233/*
234 * v_wordE -- [count]E
235 *	Move forward to the end of the bigword.
236 *
237 * PUBLIC: int v_wordE __P((SCR *, VICMD *));
238 */
239int
240v_wordE(SCR *sp, VICMD *vp)
241{
242	return (eword(sp, vp, BIGWORD));
243}
244
245/*
246 * v_worde -- [count]e
247 *	Move forward to the end of the word.
248 *
249 * PUBLIC: int v_worde __P((SCR *, VICMD *));
250 */
251int
252v_worde(SCR *sp, VICMD *vp)
253{
254	return (eword(sp, vp, LITTLEWORD));
255}
256
257/*
258 * eword --
259 *	Move forward to the end of the word.
260 */
261static int
262eword(SCR *sp, VICMD *vp, enum which type)
263{
264	enum { INWORD, NOTWORD } state;
265	VCS cs;
266	u_long cnt;
267
268	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
269	cs.cs_lno = vp->m_start.lno;
270	cs.cs_cno = vp->m_start.cno;
271	if (cs_init(sp, &cs))
272		return (1);
273
274	/*
275	 * !!!
276	 * If in whitespace, or the next character is whitespace, move past
277	 * it.  (This doesn't count as a word move.)  Stay at the character
278	 * past the current one, it sets word "state" for the 'e' command.
279	 */
280	if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) {
281		if (cs_next(sp, &cs))
282			return (1);
283		if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch))
284			goto start;
285	}
286	if (cs_fblank(sp, &cs))
287		return (1);
288
289	/*
290	 * Cyclically move to the next word -- this involves skipping
291	 * over word characters and then any trailing non-word characters.
292	 * Note, for the 'e' command, the definition of a word keeps
293	 * switching.
294	 */
295start:	if (type == BIGWORD)
296		while (cnt--) {
297			for (;;) {
298				if (cs_next(sp, &cs))
299					return (1);
300				if (cs.cs_flags == CS_EOF)
301					goto ret;
302				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
303					break;
304			}
305			/*
306			 * When we reach the start of the word after the last
307			 * word, we're done.  If we changed state, back up one
308			 * to the end of the previous word.
309			 */
310			if (cnt == 0) {
311				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
312					return (1);
313				break;
314			}
315
316			/* Eat whitespace characters. */
317			if (cs_fblank(sp, &cs))
318				return (1);
319			if (cs.cs_flags == CS_EOF)
320				goto ret;
321		}
322	else
323		while (cnt--) {
324			state = cs.cs_flags == 0 &&
325			    inword(cs.cs_ch) ? INWORD : NOTWORD;
326			for (;;) {
327				if (cs_next(sp, &cs))
328					return (1);
329				if (cs.cs_flags == CS_EOF)
330					goto ret;
331				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
332					break;
333				if (state == INWORD) {
334					if (!inword(cs.cs_ch))
335						break;
336				} else
337					if (inword(cs.cs_ch))
338						break;
339			}
340			/* See comment above. */
341			if (cnt == 0) {
342				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
343					return (1);
344				break;
345			}
346
347			/* Eat whitespace characters. */
348			if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
349				if (cs_fblank(sp, &cs))
350					return (1);
351			if (cs.cs_flags == CS_EOF)
352				goto ret;
353		}
354
355	/*
356	 * If we didn't move, we must be at EOF.
357	 *
358	 * !!!
359	 * That's okay for motion commands, however.
360	 */
361ret:	if (!ISMOTION(vp) &&
362	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
363		v_eof(sp, &vp->m_start);
364		return (1);
365	}
366
367	/* Set the end of the range for motion commands. */
368	vp->m_stop.lno = cs.cs_lno;
369	vp->m_stop.cno = cs.cs_cno;
370
371	/*
372	 * Non-motion commands move to the end of the range.
373	 * Delete and yank stay at the start, ignore others.
374	 */
375	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
376	return (0);
377}
378
379/*
380 * v_WordB -- [count]B
381 *	Move backward a bigword at a time.
382 *
383 * PUBLIC: int v_wordB __P((SCR *, VICMD *));
384 */
385int
386v_wordB(SCR *sp, VICMD *vp)
387{
388	return (bword(sp, vp, BIGWORD));
389}
390
391/*
392 * v_wordb -- [count]b
393 *	Move backward a word at a time.
394 *
395 * PUBLIC: int v_wordb __P((SCR *, VICMD *));
396 */
397int
398v_wordb(SCR *sp, VICMD *vp)
399{
400	return (bword(sp, vp, LITTLEWORD));
401}
402
403/*
404 * bword --
405 *	Move backward by words.
406 */
407static int
408bword(SCR *sp, VICMD *vp, enum which type)
409{
410	enum { INWORD, NOTWORD } state;
411	VCS cs;
412	u_long cnt;
413
414	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
415	cs.cs_lno = vp->m_start.lno;
416	cs.cs_cno = vp->m_start.cno;
417	if (cs_init(sp, &cs))
418		return (1);
419
420	/*
421	 * !!!
422	 * If in whitespace, or the previous character is whitespace, move
423	 * past it.  (This doesn't count as a word move.)  Stay at the
424	 * character before the current one, it sets word "state" for the
425	 * 'b' command.
426	 */
427	if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) {
428		if (cs_prev(sp, &cs))
429			return (1);
430		if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch))
431			goto start;
432	}
433	if (cs_bblank(sp, &cs))
434		return (1);
435
436	/*
437	 * Cyclically move to the beginning of the previous word -- this
438	 * involves skipping over word characters and then any trailing
439	 * non-word characters.  Note, for the 'b' command, the definition
440	 * of a word keeps switching.
441	 */
442start:	if (type == BIGWORD)
443		while (cnt--) {
444			for (;;) {
445				if (cs_prev(sp, &cs))
446					return (1);
447				if (cs.cs_flags == CS_SOF)
448					goto ret;
449				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
450					break;
451			}
452			/*
453			 * When we reach the end of the word before the last
454			 * word, we're done.  If we changed state, move forward
455			 * one to the end of the next word.
456			 */
457			if (cnt == 0) {
458				if (cs.cs_flags == 0 && cs_next(sp, &cs))
459					return (1);
460				break;
461			}
462
463			/* Eat whitespace characters. */
464			if (cs_bblank(sp, &cs))
465				return (1);
466			if (cs.cs_flags == CS_SOF)
467				goto ret;
468		}
469	else
470		while (cnt--) {
471			state = cs.cs_flags == 0 &&
472			    inword(cs.cs_ch) ? INWORD : NOTWORD;
473			for (;;) {
474				if (cs_prev(sp, &cs))
475					return (1);
476				if (cs.cs_flags == CS_SOF)
477					goto ret;
478				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
479					break;
480				if (state == INWORD) {
481					if (!inword(cs.cs_ch))
482						break;
483				} else
484					if (inword(cs.cs_ch))
485						break;
486			}
487			/* See comment above. */
488			if (cnt == 0) {
489				if (cs.cs_flags == 0 && cs_next(sp, &cs))
490					return (1);
491				break;
492			}
493
494			/* Eat whitespace characters. */
495			if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
496				if (cs_bblank(sp, &cs))
497					return (1);
498			if (cs.cs_flags == CS_SOF)
499				goto ret;
500		}
501
502	/* If we didn't move, we must be at SOF. */
503ret:	if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
504		v_sof(sp, &vp->m_start);
505		return (1);
506	}
507
508	/* Set the end of the range for motion commands. */
509	vp->m_stop.lno = cs.cs_lno;
510	vp->m_stop.cno = cs.cs_cno;
511
512	/*
513	 * All commands move to the end of the range.  Motion commands
514	 * adjust the starting point to the character before the current
515	 * one.
516	 *
517	 * !!!
518	 * The historic vi didn't get this right -- the `yb' command yanked
519	 * the right stuff and even updated the cursor value, but the cursor
520	 * was not actually updated on the screen.
521	 */
522	vp->m_final = vp->m_stop;
523	if (ISMOTION(vp))
524		--vp->m_start.cno;
525	return (0);
526}
527