1/* mbutil.c -- readline multibyte character utility functions */
2
3/* Copyright (C) 2001-2005 Free Software Foundation, Inc.
4
5   This file is part of the GNU Readline Library, a library for
6   reading lines of text with interactive input and history editing.
7
8   The GNU Readline Library is free software; you can redistribute it
9   and/or modify it under the terms of the GNU General Public License
10   as published by the Free Software Foundation; either version 2, or
11   (at your option) any later version.
12
13   The GNU Readline Library is distributed in the hope that it will be
14   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17
18   The GNU General Public License is often shipped with GNU software, and
19   is generally kept in a file called COPYING or LICENSE.  If you do not
20   have a copy of the license, write to the Free Software Foundation,
21   59 Temple Place, Suite 330, Boston, MA 02111 USA. */
22#define READLINE_LIBRARY
23
24#if defined (HAVE_CONFIG_H)
25#  include <config.h>
26#endif
27
28#include <sys/types.h>
29#include <fcntl.h>
30#include "posixjmp.h"
31
32#if defined (HAVE_UNISTD_H)
33#  include <unistd.h>	   /* for _POSIX_VERSION */
34#endif /* HAVE_UNISTD_H */
35
36#if defined (HAVE_STDLIB_H)
37#  include <stdlib.h>
38#else
39#  include "ansi_stdlib.h"
40#endif /* HAVE_STDLIB_H */
41
42#include <stdio.h>
43#include <ctype.h>
44
45/* System-specific feature definitions and include files. */
46#include "rldefs.h"
47#include "rlmbutil.h"
48
49#if defined (TIOCSTAT_IN_SYS_IOCTL)
50#  include <sys/ioctl.h>
51#endif /* TIOCSTAT_IN_SYS_IOCTL */
52
53/* Some standard library routines. */
54#include "readline.h"
55
56#include "rlprivate.h"
57#include "xmalloc.h"
58
59/* Declared here so it can be shared between the readline and history
60   libraries. */
61#if defined (HANDLE_MULTIBYTE)
62int rl_byte_oriented = 0;
63#else
64int rl_byte_oriented = 1;
65#endif
66
67/* **************************************************************** */
68/*								    */
69/*		Multibyte Character Utility Functions		    */
70/*								    */
71/* **************************************************************** */
72
73#if defined(HANDLE_MULTIBYTE)
74
75static int
76_rl_find_next_mbchar_internal (string, seed, count, find_non_zero)
77     char *string;
78     int seed, count, find_non_zero;
79{
80  size_t tmp;
81  mbstate_t ps;
82  int point;
83  wchar_t wc;
84
85  tmp = 0;
86
87  memset(&ps, 0, sizeof (mbstate_t));
88  if (seed < 0)
89    seed = 0;
90  if (count <= 0)
91    return seed;
92
93  point = seed + _rl_adjust_point (string, seed, &ps);
94  /* if this is true, means that seed was not pointed character
95     started byte.  So correct the point and consume count */
96  if (seed < point)
97    count--;
98
99  while (count > 0)
100    {
101      tmp = mbrtowc (&wc, string+point, strlen(string + point), &ps);
102      if (MB_INVALIDCH ((size_t)tmp))
103	{
104	  /* invalid bytes. asume a byte represents a character */
105	  point++;
106	  count--;
107	  /* reset states. */
108	  memset(&ps, 0, sizeof(mbstate_t));
109	}
110      else if (MB_NULLWCH (tmp))
111	break;			/* found wide '\0' */
112      else
113	{
114	  /* valid bytes */
115	  point += tmp;
116	  if (find_non_zero)
117	    {
118	      if (wcwidth (wc) == 0)
119		continue;
120	      else
121		count--;
122	    }
123	  else
124	    count--;
125	}
126    }
127
128  if (find_non_zero)
129    {
130      tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
131      while (tmp > 0 && wcwidth (wc) == 0)
132	{
133	  point += tmp;
134	  tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
135	  if (MB_NULLWCH (tmp) || MB_INVALIDCH (tmp))
136	    break;
137	}
138    }
139
140  return point;
141}
142
143static int
144_rl_find_prev_mbchar_internal (string, seed, find_non_zero)
145     char *string;
146     int seed, find_non_zero;
147{
148  mbstate_t ps;
149  int prev, non_zero_prev, point, length;
150  size_t tmp;
151  wchar_t wc;
152
153  memset(&ps, 0, sizeof(mbstate_t));
154  length = strlen(string);
155
156  if (seed < 0)
157    return 0;
158  else if (length < seed)
159    return length;
160
161  prev = non_zero_prev = point = 0;
162  while (point < seed)
163    {
164      tmp = mbrtowc (&wc, string + point, length - point, &ps);
165      if (MB_INVALIDCH ((size_t)tmp))
166	{
167	  /* in this case, bytes are invalid or shorted to compose
168	     multibyte char, so assume that the first byte represents
169	     a single character anyway. */
170	  tmp = 1;
171	  /* clear the state of the byte sequence, because
172	     in this case effect of mbstate is undefined  */
173	  memset(&ps, 0, sizeof (mbstate_t));
174
175	  /* Since we're assuming that this byte represents a single
176	     non-zero-width character, don't forget about it. */
177	  prev = point;
178	}
179      else if (MB_NULLWCH (tmp))
180	break;			/* Found '\0' char.  Can this happen? */
181      else
182	{
183	  if (find_non_zero)
184	    {
185	      if (wcwidth (wc) != 0)
186		prev = point;
187	    }
188	  else
189	    prev = point;
190	}
191
192      point += tmp;
193    }
194
195  return prev;
196}
197
198/* return the number of bytes parsed from the multibyte sequence starting
199   at src, if a non-L'\0' wide character was recognized. It returns 0,
200   if a L'\0' wide character was recognized. It  returns (size_t)(-1),
201   if an invalid multibyte sequence was encountered. It returns (size_t)(-2)
202   if it couldn't parse a complete  multibyte character.  */
203int
204_rl_get_char_len (src, ps)
205     char *src;
206     mbstate_t *ps;
207{
208  size_t tmp;
209
210  tmp = mbrlen((const char *)src, (size_t)strlen (src), ps);
211  if (tmp == (size_t)(-2))
212    {
213      /* shorted to compose multibyte char */
214      if (ps)
215	memset (ps, 0, sizeof(mbstate_t));
216      return -2;
217    }
218  else if (tmp == (size_t)(-1))
219    {
220      /* invalid to compose multibyte char */
221      /* initialize the conversion state */
222      if (ps)
223	memset (ps, 0, sizeof(mbstate_t));
224      return -1;
225    }
226  else if (tmp == (size_t)0)
227    return 0;
228  else
229    return (int)tmp;
230}
231
232/* compare the specified two characters. If the characters matched,
233   return 1. Otherwise return 0. */
234int
235_rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2)
236     char *buf1;
237     int pos1;
238     mbstate_t *ps1;
239     char *buf2;
240     int pos2;
241     mbstate_t *ps2;
242{
243  int i, w1, w2;
244
245  if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 ||
246	(w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 ||
247	(w1 != w2) ||
248	(buf1[pos1] != buf2[pos2]))
249    return 0;
250
251  for (i = 1; i < w1; i++)
252    if (buf1[pos1+i] != buf2[pos2+i])
253      return 0;
254
255  return 1;
256}
257
258/* adjust pointed byte and find mbstate of the point of string.
259   adjusted point will be point <= adjusted_point, and returns
260   differences of the byte(adjusted_point - point).
261   if point is invalied (point < 0 || more than string length),
262   it returns -1 */
263int
264_rl_adjust_point(string, point, ps)
265     char *string;
266     int point;
267     mbstate_t *ps;
268{
269  size_t tmp = 0;
270  int length;
271  int pos = 0;
272
273  length = strlen(string);
274  if (point < 0)
275    return -1;
276  if (length < point)
277    return -1;
278
279  while (pos < point)
280    {
281      tmp = mbrlen (string + pos, length - pos, ps);
282      if (MB_INVALIDCH ((size_t)tmp))
283	{
284	  /* in this case, bytes are invalid or shorted to compose
285	     multibyte char, so assume that the first byte represents
286	     a single character anyway. */
287	  pos++;
288	  /* clear the state of the byte sequence, because
289	     in this case effect of mbstate is undefined  */
290	  if (ps)
291	    memset (ps, 0, sizeof (mbstate_t));
292	}
293      else if (MB_NULLWCH (tmp))
294	pos++;
295      else
296	pos += tmp;
297    }
298
299  return (pos - point);
300}
301
302int
303_rl_is_mbchar_matched (string, seed, end, mbchar, length)
304     char *string;
305     int seed, end;
306     char *mbchar;
307     int length;
308{
309  int i;
310
311  if ((end - seed) < length)
312    return 0;
313
314  for (i = 0; i < length; i++)
315    if (string[seed + i] != mbchar[i])
316      return 0;
317  return 1;
318}
319
320wchar_t
321_rl_char_value (buf, ind)
322     char *buf;
323     int ind;
324{
325  size_t tmp;
326  wchar_t wc;
327  mbstate_t ps;
328  int l;
329
330  if (MB_LEN_MAX == 1 || rl_byte_oriented)
331    return ((wchar_t) buf[ind]);
332  l = strlen (buf);
333  if (ind >= l - 1)
334    return ((wchar_t) buf[ind]);
335  memset (&ps, 0, sizeof (mbstate_t));
336  tmp = mbrtowc (&wc, buf + ind, l - ind, &ps);
337  if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp))
338    return ((wchar_t) buf[ind]);
339  return wc;
340}
341#endif /* HANDLE_MULTIBYTE */
342
343/* Find next `count' characters started byte point of the specified seed.
344   If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
345   characters. */
346#undef _rl_find_next_mbchar
347int
348_rl_find_next_mbchar (string, seed, count, flags)
349     char *string;
350     int seed, count, flags;
351{
352#if defined (HANDLE_MULTIBYTE)
353  return _rl_find_next_mbchar_internal (string, seed, count, flags);
354#else
355  return (seed + count);
356#endif
357}
358
359/* Find previous character started byte point of the specified seed.
360   Returned point will be point <= seed.  If flags is MB_FIND_NONZERO,
361   we look for non-zero-width multibyte characters. */
362#undef _rl_find_prev_mbchar
363int
364_rl_find_prev_mbchar (string, seed, flags)
365     char *string;
366     int seed, flags;
367{
368#if defined (HANDLE_MULTIBYTE)
369  return _rl_find_prev_mbchar_internal (string, seed, flags);
370#else
371  return ((seed == 0) ? seed : seed - 1);
372#endif
373}
374