citrus_hz.c revision 267829
1/* $FreeBSD: releng/10.0/lib/libiconv_modules/HZ/citrus_hz.c 267829 2014-06-24 19:05:08Z delphij $ */ 2/* $NetBSD: citrus_hz.c,v 1.2 2008/06/14 16:01:07 tnozaki Exp $ */ 3 4/*- 5 * Copyright (c)2004, 2006 Citrus Project, 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 */ 30 31#include <sys/cdefs.h> 32#include <sys/queue.h> 33#include <sys/types.h> 34 35#include <assert.h> 36#include <errno.h> 37#include <limits.h> 38#include <stddef.h> 39#include <stdint.h> 40#include <stdlib.h> 41#include <string.h> 42#include <wchar.h> 43 44#include "citrus_namespace.h" 45#include "citrus_types.h" 46#include "citrus_bcs.h" 47#include "citrus_module.h" 48#include "citrus_stdenc.h" 49 50#include "citrus_hz.h" 51#include "citrus_prop.h" 52 53/* 54 * wchar_t mapping: 55 * 56 * CTRL/ASCII 00000000 00000000 00000000 gxxxxxxx 57 * GB2312 00000000 00000000 0xxxxxxx gxxxxxxx 58 * 94/96*n (~M) 0mmmmmmm 0xxxxxxx 0xxxxxxx gxxxxxxx 59 */ 60 61#define ESCAPE_CHAR '~' 62 63typedef enum { 64 CTRL = 0, ASCII = 1, GB2312 = 2, CS94 = 3, CS96 = 4 65} charset_t; 66 67typedef struct { 68 int start; 69 int end; 70 int width; 71} range_t; 72 73static const range_t ranges[] = { 74#define RANGE(start, end) { start, end, (end - start) + 1 } 75/* CTRL */ RANGE(0x00, 0x1F), 76/* ASCII */ RANGE(0x20, 0x7F), 77/* GB2312 */ RANGE(0x21, 0x7E), 78/* CS94 */ RANGE(0x21, 0x7E), 79/* CS96 */ RANGE(0x20, 0x7F), 80#undef RANGE 81}; 82 83typedef struct escape_t escape_t; 84typedef struct { 85 charset_t charset; 86 escape_t *escape; 87 ssize_t length; 88#define ROWCOL_MAX 3 89} graphic_t; 90 91typedef TAILQ_HEAD(escape_list, escape_t) escape_list; 92struct escape_t { 93 TAILQ_ENTRY(escape_t) entry; 94 escape_list *set; 95 graphic_t *left; 96 graphic_t *right; 97 int ch; 98}; 99 100#define GL(escape) ((escape)->left) 101#define GR(escape) ((escape)->right) 102#define SET(escape) ((escape)->set) 103#define ESC(escape) ((escape)->ch) 104#define INIT(escape) (TAILQ_FIRST(SET(escape))) 105 106static __inline escape_t * 107find_escape(escape_list *set, int ch) 108{ 109 escape_t *escape; 110 111 TAILQ_FOREACH(escape, set, entry) { 112 if (ESC(escape) == ch) 113 break; 114 } 115 116 return (escape); 117} 118 119typedef struct { 120 escape_list e0; 121 escape_list e1; 122 graphic_t *ascii; 123 graphic_t *gb2312; 124} _HZEncodingInfo; 125 126#define E0SET(ei) (&(ei)->e0) 127#define E1SET(ei) (&(ei)->e1) 128#define INIT0(ei) (TAILQ_FIRST(E0SET(ei))) 129#define INIT1(ei) (TAILQ_FIRST(E1SET(ei))) 130 131typedef struct { 132 escape_t *inuse; 133 int chlen; 134 char ch[ROWCOL_MAX]; 135} _HZState; 136 137#define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 138#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 139 140#define _FUNCNAME(m) _citrus_HZ_##m 141#define _ENCODING_INFO _HZEncodingInfo 142#define _ENCODING_STATE _HZState 143#define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 144#define _ENCODING_IS_STATE_DEPENDENT 1 145#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->inuse == NULL) 146 147static __inline void 148_citrus_HZ_init_state(_HZEncodingInfo * __restrict ei, 149 _HZState * __restrict psenc) 150{ 151 152 psenc->chlen = 0; 153 psenc->inuse = INIT0(ei); 154} 155 156static __inline void 157/*ARGSUSED*/ 158_citrus_HZ_pack_state(_HZEncodingInfo * __restrict ei __unused, 159 void *__restrict pspriv, const _HZState * __restrict psenc) 160{ 161 162 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 163} 164 165static __inline void 166/*ARGSUSED*/ 167_citrus_HZ_unpack_state(_HZEncodingInfo * __restrict ei __unused, 168 _HZState * __restrict psenc, const void * __restrict pspriv) 169{ 170 171 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 172} 173 174static int 175_citrus_HZ_mbrtowc_priv(_HZEncodingInfo * __restrict ei, 176 wchar_t * __restrict pwc, const char ** __restrict s, size_t n, 177 _HZState * __restrict psenc, size_t * __restrict nresult) 178{ 179 escape_t *candidate, *init; 180 graphic_t *graphic; 181 const range_t *range; 182 const char *s0; 183 wchar_t wc; 184 int bit, ch, head, len, tail; 185 186 if (*s == NULL) { 187 _citrus_HZ_init_state(ei, psenc); 188 *nresult = 1; 189 return (0); 190 } 191 s0 = *s; 192 if (psenc->chlen < 0 || psenc->inuse == NULL) 193 return (EINVAL); 194 195 wc = (wchar_t)0; 196 bit = head = tail = 0; 197 graphic = NULL; 198 for (len = 0; len <= MB_LEN_MAX;) { 199 if (psenc->chlen == tail) { 200 if (n-- < 1) { 201 *s = s0; 202 *nresult = (size_t)-2; 203 return (0); 204 } 205 psenc->ch[psenc->chlen++] = *s0++; 206 ++len; 207 } 208 ch = (unsigned char)psenc->ch[tail++]; 209 if (tail == 1) { 210 if ((ch & ~0x80) <= 0x1F) { 211 if (psenc->inuse != INIT0(ei)) 212 break; 213 wc = (wchar_t)ch; 214 goto done; 215 } 216 if (ch & 0x80) { 217 graphic = GR(psenc->inuse); 218 bit = 0x80; 219 ch &= ~0x80; 220 } else { 221 graphic = GL(psenc->inuse); 222 if (ch == ESCAPE_CHAR) 223 continue; 224 bit = 0x0; 225 } 226 if (graphic == NULL) 227 break; 228 } else if (tail == 2 && psenc->ch[0] == ESCAPE_CHAR) { 229 if (tail < psenc->chlen) 230 return (EINVAL); 231 if (ch == ESCAPE_CHAR) { 232 ++head; 233 } else if (ch == '\n') { 234 if (psenc->inuse != INIT0(ei)) 235 break; 236 tail = psenc->chlen = 0; 237 continue; 238 } else { 239 candidate = NULL; 240 init = INIT0(ei); 241 if (psenc->inuse == init) { 242 init = INIT1(ei); 243 } else if (INIT(psenc->inuse) == init) { 244 if (ESC(init) != ch) 245 break; 246 candidate = init; 247 } 248 if (candidate == NULL) { 249 candidate = find_escape( 250 SET(psenc->inuse), ch); 251 if (candidate == NULL) { 252 if (init == NULL || 253 ESC(init) != ch) 254 break; 255 candidate = init; 256 } 257 } 258 psenc->inuse = candidate; 259 tail = psenc->chlen = 0; 260 continue; 261 } 262 } else if (ch & 0x80) { 263 if (graphic != GR(psenc->inuse)) 264 break; 265 ch &= ~0x80; 266 } else { 267 if (graphic != GL(psenc->inuse)) 268 break; 269 } 270 range = &ranges[(size_t)graphic->charset]; 271 if (range->start > ch || range->end < ch) 272 break; 273 wc <<= 8; 274 wc |= ch; 275 if (graphic->length == (tail - head)) { 276 if (graphic->charset > GB2312) 277 bit |= ESC(psenc->inuse) << 24; 278 wc |= bit; 279 goto done; 280 } 281 } 282 *nresult = (size_t)-1; 283 return (EILSEQ); 284done: 285 if (tail < psenc->chlen) 286 return (EINVAL); 287 *s = s0; 288 if (pwc != NULL) 289 *pwc = wc; 290 psenc->chlen = 0; 291 *nresult = (wc == 0) ? 0 : len; 292 293 return (0); 294} 295 296static int 297_citrus_HZ_wcrtomb_priv(_HZEncodingInfo * __restrict ei, 298 char * __restrict s, size_t n, wchar_t wc, 299 _HZState * __restrict psenc, size_t * __restrict nresult) 300{ 301 escape_t *candidate, *init; 302 graphic_t *graphic; 303 const range_t *range; 304 size_t len; 305 int bit, ch; 306 307 if (psenc->chlen != 0 || psenc->inuse == NULL) 308 return (EINVAL); 309 if (wc & 0x80) { 310 bit = 0x80; 311 wc &= ~0x80; 312 } else { 313 bit = 0x0; 314 } 315 if ((uint32_t)wc <= 0x1F) { 316 candidate = INIT0(ei); 317 graphic = (bit == 0) ? candidate->left : candidate->right; 318 if (graphic == NULL) 319 goto ilseq; 320 range = &ranges[(size_t)CTRL]; 321 len = 1; 322 } else if ((uint32_t)wc <= 0x7F) { 323 graphic = ei->ascii; 324 if (graphic == NULL) 325 goto ilseq; 326 candidate = graphic->escape; 327 range = &ranges[(size_t)graphic->charset]; 328 len = graphic->length; 329 } else if ((uint32_t)wc <= 0x7F7F) { 330 graphic = ei->gb2312; 331 if (graphic == NULL) 332 goto ilseq; 333 candidate = graphic->escape; 334 range = &ranges[(size_t)graphic->charset]; 335 len = graphic->length; 336 } else { 337 ch = (wc >> 24) & 0xFF; 338 candidate = find_escape(E0SET(ei), ch); 339 if (candidate == NULL) { 340 candidate = find_escape(E1SET(ei), ch); 341 if (candidate == NULL) 342 goto ilseq; 343 } 344 wc &= ~0xFF000000; 345 graphic = (bit == 0) ? candidate->left : candidate->right; 346 if (graphic == NULL) 347 goto ilseq; 348 range = &ranges[(size_t)graphic->charset]; 349 len = graphic->length; 350 } 351 if (psenc->inuse != candidate) { 352 init = INIT0(ei); 353 if (SET(psenc->inuse) == SET(candidate)) { 354 if (INIT(psenc->inuse) != init || 355 psenc->inuse == init || candidate == init) 356 init = NULL; 357 } else if (candidate == (init = INIT(candidate))) { 358 init = NULL; 359 } 360 if (init != NULL) { 361 if (n < 2) 362 return (E2BIG); 363 n -= 2; 364 psenc->ch[psenc->chlen++] = ESCAPE_CHAR; 365 psenc->ch[psenc->chlen++] = ESC(init); 366 } 367 if (n < 2) 368 return (E2BIG); 369 n -= 2; 370 psenc->ch[psenc->chlen++] = ESCAPE_CHAR; 371 psenc->ch[psenc->chlen++] = ESC(candidate); 372 psenc->inuse = candidate; 373 } 374 if (n < len) 375 return (E2BIG); 376 while (len-- > 0) { 377 ch = (wc >> (len * 8)) & 0xFF; 378 if (range->start > ch || range->end < ch) 379 goto ilseq; 380 psenc->ch[psenc->chlen++] = ch | bit; 381 } 382 memcpy(s, psenc->ch, psenc->chlen); 383 *nresult = psenc->chlen; 384 psenc->chlen = 0; 385 386 return (0); 387 388ilseq: 389 *nresult = (size_t)-1; 390 return (EILSEQ); 391} 392 393static __inline int 394_citrus_HZ_put_state_reset(_HZEncodingInfo * __restrict ei, 395 char * __restrict s, size_t n, _HZState * __restrict psenc, 396 size_t * __restrict nresult) 397{ 398 escape_t *candidate; 399 400 if (psenc->chlen != 0 || psenc->inuse == NULL) 401 return (EINVAL); 402 candidate = INIT0(ei); 403 if (psenc->inuse != candidate) { 404 if (n < 2) 405 return (E2BIG); 406 n -= 2; 407 psenc->ch[psenc->chlen++] = ESCAPE_CHAR; 408 psenc->ch[psenc->chlen++] = ESC(candidate); 409 } 410 if (n < 1) 411 return (E2BIG); 412 if (psenc->chlen > 0) 413 memcpy(s, psenc->ch, psenc->chlen); 414 *nresult = psenc->chlen; 415 _citrus_HZ_init_state(ei, psenc); 416 417 return (0); 418} 419 420static __inline int 421_citrus_HZ_stdenc_get_state_desc_generic(_HZEncodingInfo * __restrict ei, 422 _HZState * __restrict psenc, int * __restrict rstate) 423{ 424 425 if (psenc->chlen < 0 || psenc->inuse == NULL) 426 return (EINVAL); 427 *rstate = (psenc->chlen == 0) 428 ? ((psenc->inuse == INIT0(ei)) 429 ? _STDENC_SDGEN_INITIAL 430 : _STDENC_SDGEN_STABLE) 431 : ((psenc->ch[0] == ESCAPE_CHAR) 432 ? _STDENC_SDGEN_INCOMPLETE_SHIFT 433 : _STDENC_SDGEN_INCOMPLETE_CHAR); 434 435 return (0); 436} 437 438static __inline int 439/*ARGSUSED*/ 440_citrus_HZ_stdenc_wctocs(_HZEncodingInfo * __restrict ei __unused, 441 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 442{ 443 int bit; 444 445 if (wc & 0x80) { 446 bit = 0x80; 447 wc &= ~0x80; 448 } else 449 bit = 0x0; 450 if ((uint32_t)wc <= 0x7F) { 451 *csid = (_csid_t)bit; 452 *idx = (_index_t)wc; 453 } else if ((uint32_t)wc <= 0x7F7F) { 454 *csid = (_csid_t)(bit | 0x8000); 455 *idx = (_index_t)wc; 456 } else { 457 *csid = (_index_t)(wc & ~0x00FFFF7F); 458 *idx = (_csid_t)(wc & 0x00FFFF7F); 459 } 460 461 return (0); 462} 463 464static __inline int 465/*ARGSUSED*/ 466_citrus_HZ_stdenc_cstowc(_HZEncodingInfo * __restrict ei __unused, 467 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 468{ 469 470 *wc = (wchar_t)idx; 471 switch (csid) { 472 case 0x80: 473 case 0x8080: 474 *wc |= (wchar_t)0x80; 475 /*FALLTHROUGH*/ 476 case 0x0: 477 case 0x8000: 478 break; 479 default: 480 *wc |= (wchar_t)csid; 481 } 482 483 return (0); 484} 485 486static void 487_citrus_HZ_encoding_module_uninit(_HZEncodingInfo *ei) 488{ 489 escape_t *escape; 490 491 while ((escape = TAILQ_FIRST(E0SET(ei))) != NULL) { 492 TAILQ_REMOVE(E0SET(ei), escape, entry); 493 free(GL(escape)); 494 free(GR(escape)); 495 free(escape); 496 } 497 while ((escape = TAILQ_FIRST(E1SET(ei))) != NULL) { 498 TAILQ_REMOVE(E1SET(ei), escape, entry); 499 free(GL(escape)); 500 free(GR(escape)); 501 free(escape); 502 } 503} 504 505static int 506_citrus_HZ_parse_char(void *context, const char *name __unused, const char *s) 507{ 508 escape_t *escape; 509 void **p; 510 511 p = (void **)context; 512 escape = (escape_t *)p[0]; 513 if (escape->ch != '\0') 514 return (EINVAL); 515 escape->ch = *s++; 516 if (escape->ch == ESCAPE_CHAR || *s != '\0') 517 return (EINVAL); 518 519 return (0); 520} 521 522static int 523_citrus_HZ_parse_graphic(void *context, const char *name, const char *s) 524{ 525 _HZEncodingInfo *ei; 526 escape_t *escape; 527 graphic_t *graphic; 528 void **p; 529 530 p = (void **)context; 531 escape = (escape_t *)p[0]; 532 ei = (_HZEncodingInfo *)p[1]; 533 graphic = malloc(sizeof(*graphic)); 534 if (graphic == NULL) 535 return (ENOMEM); 536 memset(graphic, 0, sizeof(*graphic)); 537 if (strcmp("GL", name) == 0) { 538 if (GL(escape) != NULL) 539 goto release; 540 GL(escape) = graphic; 541 } else if (strcmp("GR", name) == 0) { 542 if (GR(escape) != NULL) 543 goto release; 544 GR(escape) = graphic; 545 } else { 546release: 547 free(graphic); 548 return (EINVAL); 549 } 550 graphic->escape = escape; 551 if (_bcs_strncasecmp("ASCII", s, 5) == 0) { 552 if (s[5] != '\0') 553 return (EINVAL); 554 graphic->charset = ASCII; 555 graphic->length = 1; 556 ei->ascii = graphic; 557 return (0); 558 } else if (_bcs_strncasecmp("GB2312", s, 6) == 0) { 559 if (s[6] != '\0') 560 return (EINVAL); 561 graphic->charset = GB2312; 562 graphic->length = 2; 563 ei->gb2312 = graphic; 564 return (0); 565 } else if (strncmp("94*", s, 3) == 0) 566 graphic->charset = CS94; 567 else if (strncmp("96*", s, 3) == 0) 568 graphic->charset = CS96; 569 else 570 return (EINVAL); 571 s += 3; 572 switch(*s) { 573 case '1': case '2': case '3': 574 graphic->length = (size_t)(*s - '0'); 575 if (*++s == '\0') 576 break; 577 /*FALLTHROUGH*/ 578 default: 579 return (EINVAL); 580 } 581 return (0); 582} 583 584static const _citrus_prop_hint_t escape_hints[] = { 585_CITRUS_PROP_HINT_STR("CH", &_citrus_HZ_parse_char), 586_CITRUS_PROP_HINT_STR("GL", &_citrus_HZ_parse_graphic), 587_CITRUS_PROP_HINT_STR("GR", &_citrus_HZ_parse_graphic), 588_CITRUS_PROP_HINT_END 589}; 590 591static int 592_citrus_HZ_parse_escape(void *context, const char *name, const char *s) 593{ 594 _HZEncodingInfo *ei; 595 escape_t *escape; 596 void *p[2]; 597 598 ei = (_HZEncodingInfo *)context; 599 escape = malloc(sizeof(*escape)); 600 if (escape == NULL) 601 return (EINVAL); 602 memset(escape, 0, sizeof(*escape)); 603 if (strcmp("0", name) == 0) { 604 escape->set = E0SET(ei); 605 TAILQ_INSERT_TAIL(E0SET(ei), escape, entry); 606 } else if (strcmp("1", name) == 0) { 607 escape->set = E1SET(ei); 608 TAILQ_INSERT_TAIL(E1SET(ei), escape, entry); 609 } else { 610 free(escape); 611 return (EINVAL); 612 } 613 p[0] = (void *)escape; 614 p[1] = (void *)ei; 615 return (_citrus_prop_parse_variable( 616 escape_hints, (void *)&p[0], s, strlen(s))); 617} 618 619static const _citrus_prop_hint_t root_hints[] = { 620_CITRUS_PROP_HINT_STR("0", &_citrus_HZ_parse_escape), 621_CITRUS_PROP_HINT_STR("1", &_citrus_HZ_parse_escape), 622_CITRUS_PROP_HINT_END 623}; 624 625static int 626_citrus_HZ_encoding_module_init(_HZEncodingInfo * __restrict ei, 627 const void * __restrict var, size_t lenvar) 628{ 629 int errnum; 630 631 memset(ei, 0, sizeof(*ei)); 632 TAILQ_INIT(E0SET(ei)); 633 TAILQ_INIT(E1SET(ei)); 634 errnum = _citrus_prop_parse_variable( 635 root_hints, (void *)ei, var, lenvar); 636 if (errnum != 0) 637 _citrus_HZ_encoding_module_uninit(ei); 638 return (errnum); 639} 640 641/* ---------------------------------------------------------------------- 642 * public interface for stdenc 643 */ 644 645_CITRUS_STDENC_DECLS(HZ); 646_CITRUS_STDENC_DEF_OPS(HZ); 647 648#include "citrus_stdenc_template.h" 649