1/* mbutil.c -- readline multibyte character utility functions */ 2 3/* Copyright (C) 2001-2005 Free Software Foundation, Inc. 4 5 This file is part of the GNU Readline Library, a library for 6 reading lines of text with interactive input and history editing. 7 8 The GNU Readline Library is free software; you can redistribute it 9 and/or modify it under the terms of the GNU General Public License 10 as published by the Free Software Foundation; either version 2, or 11 (at your option) any later version. 12 13 The GNU Readline Library is distributed in the hope that it will be 14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty 15 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 The GNU General Public License is often shipped with GNU software, and 19 is generally kept in a file called COPYING or LICENSE. If you do not 20 have a copy of the license, write to the Free Software Foundation, 21 59 Temple Place, Suite 330, Boston, MA 02111 USA. */ 22#define READLINE_LIBRARY 23 24#if defined (HAVE_CONFIG_H) 25# include <config.h> 26#endif 27 28#include <sys/types.h> 29#include <fcntl.h> 30#include "posixjmp.h" 31 32#if defined (HAVE_UNISTD_H) 33# include <unistd.h> /* for _POSIX_VERSION */ 34#endif /* HAVE_UNISTD_H */ 35 36#if defined (HAVE_STDLIB_H) 37# include <stdlib.h> 38#else 39# include "ansi_stdlib.h" 40#endif /* HAVE_STDLIB_H */ 41 42#include <stdio.h> 43#include <ctype.h> 44 45/* System-specific feature definitions and include files. */ 46#include "rldefs.h" 47#include "rlmbutil.h" 48 49#if defined (TIOCSTAT_IN_SYS_IOCTL) 50# include <sys/ioctl.h> 51#endif /* TIOCSTAT_IN_SYS_IOCTL */ 52 53/* Some standard library routines. */ 54#include "readline.h" 55 56#include "rlprivate.h" 57#include "xmalloc.h" 58 59/* Declared here so it can be shared between the readline and history 60 libraries. */ 61#if defined (HANDLE_MULTIBYTE) 62int rl_byte_oriented = 0; 63#else 64int rl_byte_oriented = 1; 65#endif 66 67/* **************************************************************** */ 68/* */ 69/* Multibyte Character Utility Functions */ 70/* */ 71/* **************************************************************** */ 72 73#if defined(HANDLE_MULTIBYTE) 74 75static int 76_rl_find_next_mbchar_internal (string, seed, count, find_non_zero) 77 char *string; 78 int seed, count, find_non_zero; 79{ 80 size_t tmp; 81 mbstate_t ps; 82 int point; 83 wchar_t wc; 84 85 tmp = 0; 86 87 memset(&ps, 0, sizeof (mbstate_t)); 88 if (seed < 0) 89 seed = 0; 90 if (count <= 0) 91 return seed; 92 93 point = seed + _rl_adjust_point (string, seed, &ps); 94 /* if this is true, means that seed was not pointed character 95 started byte. So correct the point and consume count */ 96 if (seed < point) 97 count--; 98 99 while (count > 0) 100 { 101 tmp = mbrtowc (&wc, string+point, strlen(string + point), &ps); 102 if (MB_INVALIDCH ((size_t)tmp)) 103 { 104 /* invalid bytes. asume a byte represents a character */ 105 point++; 106 count--; 107 /* reset states. */ 108 memset(&ps, 0, sizeof(mbstate_t)); 109 } 110 else if (MB_NULLWCH (tmp)) 111 break; /* found wide '\0' */ 112 else 113 { 114 /* valid bytes */ 115 point += tmp; 116 if (find_non_zero) 117 { 118 if (wcwidth (wc) == 0) 119 continue; 120 else 121 count--; 122 } 123 else 124 count--; 125 } 126 } 127 128 if (find_non_zero) 129 { 130 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps); 131 while (tmp > 0 && wcwidth (wc) == 0) 132 { 133 point += tmp; 134 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps); 135 if (MB_NULLWCH (tmp) || MB_INVALIDCH (tmp)) 136 break; 137 } 138 } 139 140 return point; 141} 142 143static int 144_rl_find_prev_mbchar_internal (string, seed, find_non_zero) 145 char *string; 146 int seed, find_non_zero; 147{ 148 mbstate_t ps; 149 int prev, non_zero_prev, point, length; 150 size_t tmp; 151 wchar_t wc; 152 153 memset(&ps, 0, sizeof(mbstate_t)); 154 length = strlen(string); 155 156 if (seed < 0) 157 return 0; 158 else if (length < seed) 159 return length; 160 161 prev = non_zero_prev = point = 0; 162 while (point < seed) 163 { 164 tmp = mbrtowc (&wc, string + point, length - point, &ps); 165 if (MB_INVALIDCH ((size_t)tmp)) 166 { 167 /* in this case, bytes are invalid or shorted to compose 168 multibyte char, so assume that the first byte represents 169 a single character anyway. */ 170 tmp = 1; 171 /* clear the state of the byte sequence, because 172 in this case effect of mbstate is undefined */ 173 memset(&ps, 0, sizeof (mbstate_t)); 174 175 /* Since we're assuming that this byte represents a single 176 non-zero-width character, don't forget about it. */ 177 prev = point; 178 } 179 else if (MB_NULLWCH (tmp)) 180 break; /* Found '\0' char. Can this happen? */ 181 else 182 { 183 if (find_non_zero) 184 { 185 if (wcwidth (wc) != 0) 186 prev = point; 187 } 188 else 189 prev = point; 190 } 191 192 point += tmp; 193 } 194 195 return prev; 196} 197 198/* return the number of bytes parsed from the multibyte sequence starting 199 at src, if a non-L'\0' wide character was recognized. It returns 0, 200 if a L'\0' wide character was recognized. It returns (size_t)(-1), 201 if an invalid multibyte sequence was encountered. It returns (size_t)(-2) 202 if it couldn't parse a complete multibyte character. */ 203int 204_rl_get_char_len (src, ps) 205 char *src; 206 mbstate_t *ps; 207{ 208 size_t tmp; 209 210 tmp = mbrlen((const char *)src, (size_t)strlen (src), ps); 211 if (tmp == (size_t)(-2)) 212 { 213 /* shorted to compose multibyte char */ 214 if (ps) 215 memset (ps, 0, sizeof(mbstate_t)); 216 return -2; 217 } 218 else if (tmp == (size_t)(-1)) 219 { 220 /* invalid to compose multibyte char */ 221 /* initialize the conversion state */ 222 if (ps) 223 memset (ps, 0, sizeof(mbstate_t)); 224 return -1; 225 } 226 else if (tmp == (size_t)0) 227 return 0; 228 else 229 return (int)tmp; 230} 231 232/* compare the specified two characters. If the characters matched, 233 return 1. Otherwise return 0. */ 234int 235_rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2) 236 char *buf1; 237 int pos1; 238 mbstate_t *ps1; 239 char *buf2; 240 int pos2; 241 mbstate_t *ps2; 242{ 243 int i, w1, w2; 244 245 if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 || 246 (w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 || 247 (w1 != w2) || 248 (buf1[pos1] != buf2[pos2])) 249 return 0; 250 251 for (i = 1; i < w1; i++) 252 if (buf1[pos1+i] != buf2[pos2+i]) 253 return 0; 254 255 return 1; 256} 257 258/* adjust pointed byte and find mbstate of the point of string. 259 adjusted point will be point <= adjusted_point, and returns 260 differences of the byte(adjusted_point - point). 261 if point is invalied (point < 0 || more than string length), 262 it returns -1 */ 263int 264_rl_adjust_point(string, point, ps) 265 char *string; 266 int point; 267 mbstate_t *ps; 268{ 269 size_t tmp = 0; 270 int length; 271 int pos = 0; 272 273 length = strlen(string); 274 if (point < 0) 275 return -1; 276 if (length < point) 277 return -1; 278 279 while (pos < point) 280 { 281 tmp = mbrlen (string + pos, length - pos, ps); 282 if (MB_INVALIDCH ((size_t)tmp)) 283 { 284 /* in this case, bytes are invalid or shorted to compose 285 multibyte char, so assume that the first byte represents 286 a single character anyway. */ 287 pos++; 288 /* clear the state of the byte sequence, because 289 in this case effect of mbstate is undefined */ 290 if (ps) 291 memset (ps, 0, sizeof (mbstate_t)); 292 } 293 else if (MB_NULLWCH (tmp)) 294 pos++; 295 else 296 pos += tmp; 297 } 298 299 return (pos - point); 300} 301 302int 303_rl_is_mbchar_matched (string, seed, end, mbchar, length) 304 char *string; 305 int seed, end; 306 char *mbchar; 307 int length; 308{ 309 int i; 310 311 if ((end - seed) < length) 312 return 0; 313 314 for (i = 0; i < length; i++) 315 if (string[seed + i] != mbchar[i]) 316 return 0; 317 return 1; 318} 319 320wchar_t 321_rl_char_value (buf, ind) 322 char *buf; 323 int ind; 324{ 325 size_t tmp; 326 wchar_t wc; 327 mbstate_t ps; 328 int l; 329 330 if (MB_LEN_MAX == 1 || rl_byte_oriented) 331 return ((wchar_t) buf[ind]); 332 l = strlen (buf); 333 if (ind >= l - 1) 334 return ((wchar_t) buf[ind]); 335 memset (&ps, 0, sizeof (mbstate_t)); 336 tmp = mbrtowc (&wc, buf + ind, l - ind, &ps); 337 if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp)) 338 return ((wchar_t) buf[ind]); 339 return wc; 340} 341#endif /* HANDLE_MULTIBYTE */ 342 343/* Find next `count' characters started byte point of the specified seed. 344 If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte 345 characters. */ 346#undef _rl_find_next_mbchar 347int 348_rl_find_next_mbchar (string, seed, count, flags) 349 char *string; 350 int seed, count, flags; 351{ 352#if defined (HANDLE_MULTIBYTE) 353 return _rl_find_next_mbchar_internal (string, seed, count, flags); 354#else 355 return (seed + count); 356#endif 357} 358 359/* Find previous character started byte point of the specified seed. 360 Returned point will be point <= seed. If flags is MB_FIND_NONZERO, 361 we look for non-zero-width multibyte characters. */ 362#undef _rl_find_prev_mbchar 363int 364_rl_find_prev_mbchar (string, seed, flags) 365 char *string; 366 int seed, flags; 367{ 368#if defined (HANDLE_MULTIBYTE) 369 return _rl_find_prev_mbchar_internal (string, seed, flags); 370#else 371 return ((seed == 0) ? seed : seed - 1); 372#endif 373} 374