119304Speter/*-
219304Speter * Copyright (c) 1992, 1993, 1994
319304Speter *	The Regents of the University of California.  All rights reserved.
419304Speter * Copyright (c) 1992, 1993, 1994, 1995, 1996
519304Speter *	Keith Bostic.  All rights reserved.
619304Speter *
719304Speter * See the LICENSE file for redistribution information.
819304Speter */
919304Speter
1019304Speter#include "config.h"
1119304Speter
1219304Speter#ifndef lint
13254225Speterstatic const char sccsid[] = "$Id: v_sentence.c,v 10.9 2001/06/25 15:19:35 skimo Exp $";
1419304Speter#endif /* not lint */
1519304Speter
1619304Speter#include <sys/types.h>
1719304Speter#include <sys/queue.h>
1819304Speter#include <sys/time.h>
1919304Speter
2019304Speter#include <bitstring.h>
2119304Speter#include <ctype.h>
2219304Speter#include <limits.h>
2319304Speter#include <stdio.h>
2419304Speter
2519304Speter#include "../common/common.h"
2619304Speter#include "vi.h"
2719304Speter
2819304Speter/*
2919304Speter * !!!
3019304Speter * In historic vi, a sentence was delimited by a '.', '?' or '!' character
3119304Speter * followed by TWO spaces or a newline.  One or more empty lines was also
3219304Speter * treated as a separate sentence.  The Berkeley documentation for historical
3319304Speter * vi states that any number of ')', ']', '"' and '\'' characters can be
3419304Speter * between the delimiter character and the spaces or end of line, however,
3519304Speter * the historical implementation did not handle additional '"' characters.
3619304Speter * We follow the documentation here, not the implementation.
3719304Speter *
3819304Speter * Once again, historical vi didn't do sentence movements associated with
3919304Speter * counts consistently, mostly in the presence of lines containing only
4019304Speter * white-space characters.
4119304Speter *
4219304Speter * This implementation also permits a single tab to delimit sentences, and
4319304Speter * treats lines containing only white-space characters as empty lines.
4419304Speter * Finally, tabs are eaten (along with spaces) when skipping to the start
4519304Speter * of the text following a "sentence".
4619304Speter */
4719304Speter
4819304Speter/*
4919304Speter * v_sentencef -- [count])
5019304Speter *	Move forward count sentences.
5119304Speter *
5219304Speter * PUBLIC: int v_sentencef __P((SCR *, VICMD *));
5319304Speter */
5419304Speterint
55254225Speterv_sentencef(SCR *sp, VICMD *vp)
5619304Speter{
5719304Speter	enum { BLANK, NONE, PERIOD } state;
5819304Speter	VCS cs;
5919304Speter	size_t len;
6019304Speter	u_long cnt;
6119304Speter
6219304Speter	cs.cs_lno = vp->m_start.lno;
6319304Speter	cs.cs_cno = vp->m_start.cno;
6419304Speter	if (cs_init(sp, &cs))
6519304Speter		return (1);
6619304Speter
6719304Speter	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
6819304Speter
6919304Speter	/*
7019304Speter	 * !!!
7119304Speter	 * If in white-space, the next start of sentence counts as one.
7219304Speter	 * This may not handle "  .  " correctly, but it's real unclear
7319304Speter	 * what correctly means in that case.
7419304Speter	 */
75254225Speter	if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && isblank(cs.cs_ch))) {
7619304Speter		if (cs_fblank(sp, &cs))
7719304Speter			return (1);
7819304Speter		if (--cnt == 0) {
7919304Speter			if (vp->m_start.lno != cs.cs_lno ||
8019304Speter			    vp->m_start.cno != cs.cs_cno)
8119304Speter				goto okret;
8219304Speter			return (1);
8319304Speter		}
8419304Speter	}
8519304Speter
8619304Speter	for (state = NONE;;) {
8719304Speter		if (cs_next(sp, &cs))
8819304Speter			return (1);
8919304Speter		if (cs.cs_flags == CS_EOF)
9019304Speter			break;
9119304Speter		if (cs.cs_flags == CS_EOL) {
9219304Speter			if ((state == PERIOD || state == BLANK) && --cnt == 0) {
9319304Speter				if (cs_next(sp, &cs))
9419304Speter					return (1);
9519304Speter				if (cs.cs_flags == 0 &&
9619304Speter				    isblank(cs.cs_ch) && cs_fblank(sp, &cs))
9719304Speter					return (1);
9819304Speter				goto okret;
9919304Speter			}
10019304Speter			state = NONE;
10119304Speter			continue;
10219304Speter		}
10319304Speter		if (cs.cs_flags == CS_EMP) {	/* An EMP is two sentences. */
10419304Speter			if (--cnt == 0)
10519304Speter				goto okret;
10619304Speter			if (cs_fblank(sp, &cs))
10719304Speter				return (1);
10819304Speter			if (--cnt == 0)
10919304Speter				goto okret;
11019304Speter			state = NONE;
11119304Speter			continue;
11219304Speter		}
11319304Speter		switch (cs.cs_ch) {
11419304Speter		case '.':
11519304Speter		case '?':
11619304Speter		case '!':
11719304Speter			state = PERIOD;
11819304Speter			break;
11919304Speter		case ')':
12019304Speter		case ']':
12119304Speter		case '"':
12219304Speter		case '\'':
12319304Speter			if (state != PERIOD)
12419304Speter				state = NONE;
12519304Speter			break;
12619304Speter		case '\t':
12719304Speter			if (state == PERIOD)
12819304Speter				state = BLANK;
12919304Speter			/* FALLTHROUGH */
13019304Speter		case ' ':
13119304Speter			if (state == PERIOD) {
13219304Speter				state = BLANK;
13319304Speter				break;
13419304Speter			}
13519304Speter			if (state == BLANK && --cnt == 0) {
13619304Speter				if (cs_fblank(sp, &cs))
13719304Speter					return (1);
13819304Speter				goto okret;
13919304Speter			}
14019304Speter			/* FALLTHROUGH */
14119304Speter		default:
14219304Speter			state = NONE;
14319304Speter			break;
14419304Speter		}
14519304Speter	}
14619304Speter
14719304Speter	/* EOF is a movement sink, but it's an error not to have moved. */
14819304Speter	if (vp->m_start.lno == cs.cs_lno && vp->m_start.cno == cs.cs_cno) {
14919304Speter		v_eof(sp, NULL);
15019304Speter		return (1);
15119304Speter	}
15219304Speter
15319304Speterokret:	vp->m_stop.lno = cs.cs_lno;
15419304Speter	vp->m_stop.cno = cs.cs_cno;
15519304Speter
15619304Speter	/*
15719304Speter	 * !!!
15819304Speter	 * Historic, uh, features, yeah, that's right, call 'em features.
15919304Speter	 * If the starting and ending cursor positions are at the first
16019304Speter	 * column in their lines, i.e. the movement is cutting entire lines,
16119304Speter	 * the buffer is in line mode, and the ending position is the last
16219304Speter	 * character of the previous line.  Note check to make sure that
16319304Speter	 * it's not within a single line.
16419304Speter	 *
16519304Speter	 * Non-motion commands move to the end of the range.  Delete and
16619304Speter	 * yank stay at the start.  Ignore others.  Adjust the end of the
16719304Speter	 * range for motion commands.
16819304Speter	 */
16919304Speter	if (ISMOTION(vp)) {
17019304Speter		if (vp->m_start.cno == 0 &&
17119304Speter		    (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
17219304Speter			if (vp->m_start.lno < vp->m_stop.lno) {
17319304Speter				if (db_get(sp,
17419304Speter				    --vp->m_stop.lno, DBG_FATAL, NULL, &len))
17519304Speter					return (1);
17619304Speter				vp->m_stop.cno = len ? len - 1 : 0;
17719304Speter			}
17819304Speter			F_SET(vp, VM_LMODE);
17919304Speter		} else
18019304Speter			--vp->m_stop.cno;
18119304Speter		vp->m_final = vp->m_start;
18219304Speter	} else
18319304Speter		vp->m_final = vp->m_stop;
18419304Speter	return (0);
18519304Speter}
18619304Speter
18719304Speter/*
18819304Speter * v_sentenceb -- [count](
18919304Speter *	Move backward count sentences.
19019304Speter *
19119304Speter * PUBLIC: int v_sentenceb __P((SCR *, VICMD *));
19219304Speter */
19319304Speterint
194254225Speterv_sentenceb(SCR *sp, VICMD *vp)
19519304Speter{
19619304Speter	VCS cs;
19719304Speter	recno_t slno;
19819304Speter	size_t len, scno;
19919304Speter	u_long cnt;
20019304Speter	int last;
20119304Speter
20219304Speter	/*
20319304Speter	 * !!!
20419304Speter	 * Historic vi permitted the user to hit SOF repeatedly.
20519304Speter	 */
20619304Speter	if (vp->m_start.lno == 1 && vp->m_start.cno == 0)
20719304Speter		return (0);
20819304Speter
20919304Speter	cs.cs_lno = vp->m_start.lno;
21019304Speter	cs.cs_cno = vp->m_start.cno;
21119304Speter	if (cs_init(sp, &cs))
21219304Speter		return (1);
21319304Speter
21419304Speter	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
21519304Speter
21619304Speter	/*
21719304Speter	 * !!!
21819304Speter	 * In empty lines, skip to the previous non-white-space character.
21919304Speter	 * If in text, skip to the prevous white-space character.  Believe
22019304Speter	 * it or not, in the paragraph:
22119304Speter	 *	ab cd.
22219304Speter	 *	AB CD.
22319304Speter	 * if the cursor is on the 'A' or 'B', ( moves to the 'a'.  If it
22419304Speter	 * is on the ' ', 'C' or 'D', it moves to the 'A'.  Yes, Virginia,
22519304Speter	 * Berkeley was once a major center of drug activity.
22619304Speter	 */
22719304Speter	if (cs.cs_flags == CS_EMP) {
22819304Speter		if (cs_bblank(sp, &cs))
22919304Speter			return (1);
23019304Speter		for (;;) {
23119304Speter			if (cs_prev(sp, &cs))
23219304Speter				return (1);
23319304Speter			if (cs.cs_flags != CS_EOL)
23419304Speter				break;
23519304Speter		}
23619304Speter	} else if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
23719304Speter		for (;;) {
23819304Speter			if (cs_prev(sp, &cs))
23919304Speter				return (1);
24019304Speter			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
24119304Speter				break;
24219304Speter		}
24319304Speter
24419304Speter	for (last = 0;;) {
24519304Speter		if (cs_prev(sp, &cs))
24619304Speter			return (1);
24719304Speter		if (cs.cs_flags == CS_SOF)	/* SOF is a movement sink. */
24819304Speter			break;
24919304Speter		if (cs.cs_flags == CS_EOL) {
25019304Speter			last = 1;
25119304Speter			continue;
25219304Speter		}
25319304Speter		if (cs.cs_flags == CS_EMP) {
25419304Speter			if (--cnt == 0)
25519304Speter				goto ret;
25619304Speter			if (cs_bblank(sp, &cs))
25719304Speter				return (1);
25819304Speter			last = 0;
25919304Speter			continue;
26019304Speter		}
26119304Speter		switch (cs.cs_ch) {
26219304Speter		case '.':
26319304Speter		case '?':
26419304Speter		case '!':
26519304Speter			if (!last || --cnt != 0) {
26619304Speter				last = 0;
26719304Speter				continue;
26819304Speter			}
26919304Speter
27019304Speterret:			slno = cs.cs_lno;
27119304Speter			scno = cs.cs_cno;
27219304Speter
27319304Speter			/*
27419304Speter			 * Move to the start of the sentence, skipping blanks
27519304Speter			 * and special characters.
27619304Speter			 */
27719304Speter			do {
27819304Speter				if (cs_next(sp, &cs))
27919304Speter					return (1);
28019304Speter			} while (!cs.cs_flags &&
28119304Speter			    (cs.cs_ch == ')' || cs.cs_ch == ']' ||
28219304Speter			    cs.cs_ch == '"' || cs.cs_ch == '\''));
28319304Speter			if ((cs.cs_flags || isblank(cs.cs_ch)) &&
28419304Speter			    cs_fblank(sp, &cs))
28519304Speter				return (1);
28619304Speter
28719304Speter			/*
28819304Speter			 * If it was ".  xyz", with the cursor on the 'x', or
28919304Speter			 * "end.  ", with the cursor in the spaces, or the
29019304Speter			 * beginning of a sentence preceded by an empty line,
29119304Speter			 * we can end up where we started.  Fix it.
29219304Speter			 */
29319304Speter			if (vp->m_start.lno != cs.cs_lno ||
29419304Speter			    vp->m_start.cno != cs.cs_cno)
29519304Speter				goto okret;
29619304Speter
29719304Speter			/*
29819304Speter			 * Well, if an empty line preceded possible blanks
29919304Speter			 * and the sentence, it could be a real sentence.
30019304Speter			 */
30119304Speter			for (;;) {
30219304Speter				if (cs_prev(sp, &cs))
30319304Speter					return (1);
30419304Speter				if (cs.cs_flags == CS_EOL)
30519304Speter					continue;
30619304Speter				if (cs.cs_flags == 0 && isblank(cs.cs_ch))
30719304Speter					continue;
30819304Speter				break;
30919304Speter			}
31019304Speter			if (cs.cs_flags == CS_EMP)
31119304Speter				goto okret;
31219304Speter
31319304Speter			/* But it wasn't; try again. */
31419304Speter			++cnt;
31519304Speter			cs.cs_lno = slno;
31619304Speter			cs.cs_cno = scno;
31719304Speter			last = 0;
31819304Speter			break;
31919304Speter		case '\t':
32019304Speter			last = 1;
32119304Speter			break;
32219304Speter		default:
32319304Speter			last =
32419304Speter			    cs.cs_flags == CS_EOL || isblank(cs.cs_ch) ||
32519304Speter			    cs.cs_ch == ')' || cs.cs_ch == ']' ||
32619304Speter			    cs.cs_ch == '"' || cs.cs_ch == '\'' ? 1 : 0;
32719304Speter		}
32819304Speter	}
32919304Speter
33019304Speterokret:	vp->m_stop.lno = cs.cs_lno;
33119304Speter	vp->m_stop.cno = cs.cs_cno;
33219304Speter
33319304Speter	/*
33419304Speter	 * !!!
33519304Speter	 * If the starting and stopping cursor positions are at the first
33619304Speter	 * columns in the line, i.e. the movement is cutting an entire line,
33719304Speter	 * the buffer is in line mode, and the starting position is the last
33819304Speter	 * character of the previous line.
33919304Speter	 *
34019304Speter	 * All commands move to the end of the range.  Adjust the start of
34119304Speter	 * the range for motion commands.
34219304Speter	 */
34319304Speter	if (ISMOTION(vp))
34419304Speter		if (vp->m_start.cno == 0 &&
34519304Speter		    (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
34619304Speter			if (db_get(sp,
34719304Speter			    --vp->m_start.lno, DBG_FATAL, NULL, &len))
34819304Speter				return (1);
34919304Speter			vp->m_start.cno = len ? len - 1 : 0;
35019304Speter			F_SET(vp, VM_LMODE);
35119304Speter		} else
35219304Speter			--vp->m_start.cno;
35319304Speter	vp->m_final = vp->m_stop;
35419304Speter	return (0);
35519304Speter}
356