citrus_gbk2k.c revision 282275
1/* $FreeBSD: stable/10/lib/libiconv_modules/GBK2K/citrus_gbk2k.c 282275 2015-04-30 16:08:47Z tijl $ */ 2/* $NetBSD: citrus_gbk2k.c,v 1.7 2008/06/14 16:01:07 tnozaki Exp $ */ 3 4/*- 5 * Copyright (c)2003 Citrus Project, 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31#include <sys/types.h> 32 33#include <assert.h> 34#include <errno.h> 35#include <limits.h> 36#include <stdbool.h> 37#include <stddef.h> 38#include <stdio.h> 39#include <stdlib.h> 40#include <string.h> 41#include <wchar.h> 42 43#include "citrus_namespace.h" 44#include "citrus_types.h" 45#include "citrus_bcs.h" 46#include "citrus_module.h" 47#include "citrus_stdenc.h" 48#include "citrus_gbk2k.h" 49 50 51/* ---------------------------------------------------------------------- 52 * private stuffs used by templates 53 */ 54 55typedef struct _GBK2KState { 56 int chlen; 57 char ch[4]; 58} _GBK2KState; 59 60typedef struct { 61 int mb_cur_max; 62} _GBK2KEncodingInfo; 63 64#define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 65#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 66 67#define _FUNCNAME(m) _citrus_GBK2K_##m 68#define _ENCODING_INFO _GBK2KEncodingInfo 69#define _ENCODING_STATE _GBK2KState 70#define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 71#define _ENCODING_IS_STATE_DEPENDENT 0 72#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 73 74static __inline void 75/*ARGSUSED*/ 76_citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei __unused, 77 _GBK2KState * __restrict s) 78{ 79 80 memset(s, 0, sizeof(*s)); 81} 82 83#if 0 84static __inline void 85/*ARGSUSED*/ 86_citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei __unused, 87 void * __restrict pspriv, const _GBK2KState * __restrict s) 88{ 89 90 memcpy(pspriv, (const void *)s, sizeof(*s)); 91} 92 93static __inline void 94/*ARGSUSED*/ 95_citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei __unused, 96 _GBK2KState * __restrict s, const void * __restrict pspriv) 97{ 98 99 memcpy((void *)s, pspriv, sizeof(*s)); 100} 101#endif 102 103static __inline bool 104_mb_singlebyte(int c) 105{ 106 107 return ((c & 0xff) <= 0x7f); 108} 109 110static __inline bool 111_mb_leadbyte(int c) 112{ 113 114 c &= 0xff; 115 return (0x81 <= c && c <= 0xfe); 116} 117 118static __inline bool 119_mb_trailbyte(int c) 120{ 121 122 c &= 0xff; 123 return ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfe)); 124} 125 126static __inline bool 127_mb_surrogate(int c) 128{ 129 130 c &= 0xff; 131 return (0x30 <= c && c <= 0x39); 132} 133 134static __inline int 135_mb_count(wchar_t v) 136{ 137 uint32_t c; 138 139 c = (uint32_t)v; /* XXX */ 140 if (!(c & 0xffffff00)) 141 return (1); 142 if (!(c & 0xffff0000)) 143 return (2); 144 return (4); 145} 146 147#define _PSENC (psenc->ch[psenc->chlen - 1]) 148#define _PUSH_PSENC(c) (psenc->ch[psenc->chlen++] = (c)) 149 150static int 151_citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei, 152 wchar_t * __restrict pwc, char ** __restrict s, size_t n, 153 _GBK2KState * __restrict psenc, size_t * __restrict nresult) 154{ 155 char *s0, *s1; 156 wchar_t wc; 157 int chlenbak, len; 158 159 s0 = *s; 160 161 if (s0 == NULL) { 162 /* _citrus_GBK2K_init_state(ei, psenc); */ 163 psenc->chlen = 0; 164 *nresult = 0; 165 return (0); 166 } 167 168 chlenbak = psenc->chlen; 169 170 switch (psenc->chlen) { 171 case 3: 172 if (!_mb_leadbyte (_PSENC)) 173 goto invalid; 174 /* FALLTHROUGH */ 175 case 2: 176 if (!_mb_surrogate(_PSENC) || _mb_trailbyte(_PSENC)) 177 goto invalid; 178 /* FALLTHROUGH */ 179 case 1: 180 if (!_mb_leadbyte (_PSENC)) 181 goto invalid; 182 /* FALLTHOROUGH */ 183 case 0: 184 break; 185 default: 186 goto invalid; 187 } 188 189 for (;;) { 190 if (n-- < 1) 191 goto restart; 192 193 _PUSH_PSENC(*s0++); 194 195 switch (psenc->chlen) { 196 case 1: 197 if (_mb_singlebyte(_PSENC)) 198 goto convert; 199 if (_mb_leadbyte (_PSENC)) 200 continue; 201 goto ilseq; 202 case 2: 203 if (_mb_trailbyte (_PSENC)) 204 goto convert; 205 if (ei->mb_cur_max == 4 && 206 _mb_surrogate (_PSENC)) 207 continue; 208 goto ilseq; 209 case 3: 210 if (_mb_leadbyte (_PSENC)) 211 continue; 212 goto ilseq; 213 case 4: 214 if (_mb_surrogate (_PSENC)) 215 goto convert; 216 goto ilseq; 217 } 218 } 219 220convert: 221 len = psenc->chlen; 222 s1 = &psenc->ch[0]; 223 wc = 0; 224 while (len-- > 0) 225 wc = (wc << 8) | (*s1++ & 0xff); 226 227 if (pwc != NULL) 228 *pwc = wc; 229 *s = s0; 230 *nresult = (wc == 0) ? 0 : psenc->chlen - chlenbak; 231 /* _citrus_GBK2K_init_state(ei, psenc); */ 232 psenc->chlen = 0; 233 234 return (0); 235 236restart: 237 *s = s0; 238 *nresult = (size_t)-2; 239 240 return (0); 241 242invalid: 243 return (EINVAL); 244 245ilseq: 246 *nresult = (size_t)-1; 247 return (EILSEQ); 248} 249 250static int 251_citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei, 252 char * __restrict s, size_t n, wchar_t wc, _GBK2KState * __restrict psenc, 253 size_t * __restrict nresult) 254{ 255 size_t len; 256 int ret; 257 258 if (psenc->chlen != 0) { 259 ret = EINVAL; 260 goto err; 261 } 262 263 len = _mb_count(wc); 264 if (n < len) { 265 ret = E2BIG; 266 goto err; 267 } 268 269 switch (len) { 270 case 1: 271 if (!_mb_singlebyte(_PUSH_PSENC(wc ))) { 272 ret = EILSEQ; 273 goto err; 274 } 275 break; 276 case 2: 277 if (!_mb_leadbyte (_PUSH_PSENC(wc >> 8)) || 278 !_mb_trailbyte (_PUSH_PSENC(wc))) { 279 ret = EILSEQ; 280 goto err; 281 } 282 break; 283 case 4: 284 if (ei->mb_cur_max != 4 || 285 !_mb_leadbyte (_PUSH_PSENC(wc >> 24)) || 286 !_mb_surrogate (_PUSH_PSENC(wc >> 16)) || 287 !_mb_leadbyte (_PUSH_PSENC(wc >> 8)) || 288 !_mb_surrogate (_PUSH_PSENC(wc))) { 289 ret = EILSEQ; 290 goto err; 291 } 292 break; 293 } 294 295 memcpy(s, psenc->ch, psenc->chlen); 296 *nresult = psenc->chlen; 297 /* _citrus_GBK2K_init_state(ei, psenc); */ 298 psenc->chlen = 0; 299 300 return (0); 301 302err: 303 *nresult = (size_t)-1; 304 return (ret); 305} 306 307static __inline int 308/*ARGSUSED*/ 309_citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei __unused, 310 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 311{ 312 uint8_t ch, cl; 313 314 if ((uint32_t)wc < 0x80) { 315 /* ISO646 */ 316 *csid = 0; 317 *idx = (_index_t)wc; 318 } else if ((uint32_t)wc >= 0x10000) { 319 /* GBKUCS : XXX */ 320 *csid = 3; 321 *idx = (_index_t)wc; 322 } else { 323 ch = (uint8_t)(wc >> 8); 324 cl = (uint8_t)wc; 325 if (ch >= 0xA1 && cl >= 0xA1) { 326 /* EUC G1 */ 327 *csid = 1; 328 *idx = (_index_t)wc & 0x7F7FU; 329 } else { 330 /* extended area (0x8140-) */ 331 *csid = 2; 332 *idx = (_index_t)wc; 333 } 334 } 335 336 return (0); 337} 338 339static __inline int 340/*ARGSUSED*/ 341_citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei, 342 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 343{ 344 345 switch (csid) { 346 case 0: 347 /* ISO646 */ 348 *wc = (wchar_t)idx; 349 break; 350 case 1: 351 /* EUC G1 */ 352 *wc = (wchar_t)idx | 0x8080U; 353 break; 354 case 2: 355 /* extended area */ 356 *wc = (wchar_t)idx; 357 break; 358 case 3: 359 /* GBKUCS : XXX */ 360 if (ei->mb_cur_max != 4) 361 return (EINVAL); 362 *wc = (wchar_t)idx; 363 break; 364 default: 365 return (EILSEQ); 366 } 367 368 return (0); 369} 370 371static __inline int 372/*ARGSUSED*/ 373_citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * __restrict ei __unused, 374 _GBK2KState * __restrict psenc, int * __restrict rstate) 375{ 376 377 *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL : 378 _STDENC_SDGEN_INCOMPLETE_CHAR; 379 return (0); 380} 381 382static int 383/*ARGSUSED*/ 384_citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei, 385 const void * __restrict var, size_t lenvar) 386{ 387 const char *p; 388 389 p = var; 390 memset((void *)ei, 0, sizeof(*ei)); 391 ei->mb_cur_max = 4; 392 while (lenvar > 0) { 393 switch (_bcs_tolower(*p)) { 394 case '2': 395 MATCH("2byte", ei->mb_cur_max = 2); 396 break; 397 } 398 p++; 399 lenvar--; 400 } 401 402 return (0); 403} 404 405static void 406/*ARGSUSED*/ 407_citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo *ei __unused) 408{ 409 410} 411 412/* ---------------------------------------------------------------------- 413 * public interface for stdenc 414 */ 415 416_CITRUS_STDENC_DECLS(GBK2K); 417_CITRUS_STDENC_DEF_OPS(GBK2K); 418 419#include "citrus_stdenc_template.h" 420