cvt.c revision 294286
1108930Speter/*
244743Smarkm * Copyright (C) 1984-2015  Mark Nudelman
344743Smarkm *
444743Smarkm * You may distribute under the terms of either the GNU General Public
544743Smarkm * License or the Less License, as specified in the README file.
644743Smarkm *
744743Smarkm * For more information, see the README file.
844743Smarkm */
944743Smarkm
1044743Smarkm/*
1144743Smarkm * Routines to convert text in various ways.  Used by search.
1244743Smarkm */
1344743Smarkm
1444743Smarkm#include "less.h"
1544743Smarkm#include "charset.h"
1644743Smarkm
1744743Smarkmextern int utf_mode;
1844743Smarkm
1944743Smarkm/*
2044743Smarkm * Get the length of a buffer needed to convert a string.
21108930Speter */
2244743Smarkm	public int
2344743Smarkmcvt_length(len, ops)
2444743Smarkm	int len;
2544743Smarkm	int ops;
2644743Smarkm{
2744743Smarkm	if (utf_mode)
2844743Smarkm		/*
2944743Smarkm		 * Just copying a string in UTF-8 mode can cause it to grow
3044743Smarkm		 * in length.
3144743Smarkm		 * Four output bytes for one input byte is the worst case.
3244743Smarkm		 */
3344743Smarkm		len *= 4;
3444743Smarkm	return (len + 1);
3544743Smarkm}
3644743Smarkm
3744743Smarkm/*
3844743Smarkm * Allocate a chpos array for use by cvt_text.
3944743Smarkm */
4044743Smarkm	public int *
4144743Smarkmcvt_alloc_chpos(len)
4244743Smarkm	int len;
4344743Smarkm{
4444743Smarkm	int i;
4544743Smarkm	int *chpos = (int *) ecalloc(sizeof(int), len);
4644743Smarkm	/* Initialize all entries to an invalid position. */
4744743Smarkm	for (i = 0;  i < len;  i++)
4844743Smarkm		chpos[i] = -1;
4944743Smarkm	return (chpos);
5044743Smarkm}
5144743Smarkm
5244743Smarkm/*
5344743Smarkm * Convert text.  Perform the transformations specified by ops.
5444743Smarkm * Returns converted text in odst.  The original offset of each
5544743Smarkm * odst character (when it was in osrc) is returned in the chpos array.
5644743Smarkm */
5744743Smarkm	public void
5844743Smarkmcvt_text(odst, osrc, chpos, lenp, ops)
5944743Smarkm	char *odst;
6044743Smarkm	char *osrc;
6144743Smarkm	int *chpos;
6244743Smarkm	int *lenp;
6344743Smarkm	int ops;
6444743Smarkm{
6544743Smarkm	char *dst;
6644743Smarkm	char *edst = odst;
6744743Smarkm	char *src;
6844743Smarkm	register char *src_end;
6944743Smarkm	LWCHAR ch;
7044743Smarkm
7144743Smarkm	if (lenp != NULL)
7244743Smarkm		src_end = osrc + *lenp;
7344743Smarkm	else
7444743Smarkm		src_end = osrc + strlen(osrc);
7544743Smarkm
7644743Smarkm	for (src = osrc, dst = odst;  src < src_end;  )
7744743Smarkm	{
7844743Smarkm		int src_pos = (int) (src - osrc);
7944743Smarkm		int dst_pos = (int) (dst - odst);
8044743Smarkm		ch = step_char(&src, +1, src_end);
8144743Smarkm		if ((ops & CVT_BS) && ch == '\b' && dst > odst)
8244743Smarkm		{
8344743Smarkm			/* Delete backspace and preceding char. */
8444743Smarkm			do {
8544743Smarkm				dst--;
8644743Smarkm			} while (dst > odst &&
8744743Smarkm				!IS_ASCII_OCTET(*dst) && !IS_UTF8_LEAD(*dst));
8844743Smarkm		} else if ((ops & CVT_ANSI) && IS_CSI_START(ch))
8944743Smarkm		{
9044743Smarkm			/* Skip to end of ANSI escape sequence. */
9144743Smarkm			src++;  /* skip the CSI start char */
9244743Smarkm			while (src < src_end)
9344743Smarkm				if (!is_ansi_middle(*src++))
9444743Smarkm					break;
9544743Smarkm		} else
9644743Smarkm		{
9744743Smarkm			/* Just copy the char to the destination buffer. */
9844743Smarkm			if ((ops & CVT_TO_LC) && IS_UPPER(ch))
9944743Smarkm				ch = TO_LOWER(ch);
10044743Smarkm			put_wchar(&dst, ch);
10144743Smarkm			/* Record the original position of the char. */
10244743Smarkm			if (chpos != NULL)
10344743Smarkm				chpos[dst_pos] = src_pos;
10444743Smarkm		}
10544743Smarkm		if (dst > edst)
10644743Smarkm			edst = dst;
10744743Smarkm	}
10844743Smarkm	if ((ops & CVT_CRLF) && edst > odst && edst[-1] == '\r')
10944743Smarkm		edst--;
11044743Smarkm	*edst = '\0';
11144743Smarkm	if (lenp != NULL)
11244743Smarkm		*lenp = (int) (edst - odst);
11344743Smarkm	/* FIXME: why was this here?  if (chpos != NULL) chpos[dst - odst] = src - osrc; */
11444743Smarkm}
11544743Smarkm