1/* $NetBSD: citrus_iconv_std.c,v 1.15 2006/11/13 19:08:19 tnozaki Exp $ */ 2 3/*- 4 * Copyright (c)2003 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include <sys/cdefs.h> 30#if defined(LIBC_SCCS) && !defined(lint) 31__RCSID("$NetBSD: citrus_iconv_std.c,v 1.15 2006/11/13 19:08:19 tnozaki Exp $"); 32#endif /* LIBC_SCCS and not lint */ 33 34#include <assert.h> 35#include <errno.h> 36#include <limits.h> 37#include <stdio.h> 38#include <stdlib.h> 39#include <string.h> 40#include <machine/endian.h> 41#include <sys/queue.h> 42 43#include "citrus_namespace.h" 44#include "citrus_types.h" 45#include "citrus_module.h" 46#include "citrus_region.h" 47#include "citrus_mmap.h" 48#include "citrus_hash.h" 49#include "citrus_iconv.h" 50#include "citrus_stdenc.h" 51#include "citrus_mapper.h" 52#include "citrus_csmapper.h" 53#include "citrus_memstream.h" 54#include "citrus_iconv_std.h" 55#include "citrus_esdb.h" 56 57/* ---------------------------------------------------------------------- */ 58 59_CITRUS_ICONV_DECLS(iconv_std); 60_CITRUS_ICONV_DEF_OPS(iconv_std); 61 62 63/* ---------------------------------------------------------------------- */ 64 65int 66_citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops, size_t lenops, 67 u_int32_t expected_version) 68{ 69 if (expected_version<_CITRUS_ICONV_ABI_VERSION || lenops<sizeof(*ops)) 70 return (EINVAL); 71 72 memcpy(ops, &_citrus_iconv_std_iconv_ops, 73 sizeof(_citrus_iconv_std_iconv_ops)); 74 75 return (0); 76} 77 78/* ---------------------------------------------------------------------- */ 79 80/* 81 * convenience routines for stdenc. 82 */ 83static __inline void 84save_encoding_state(struct _citrus_iconv_std_encoding *se) 85{ 86 if (se->se_ps) 87 memcpy(se->se_pssaved, se->se_ps, 88 _stdenc_get_state_size(se->se_handle)); 89} 90 91static __inline void 92restore_encoding_state(struct _citrus_iconv_std_encoding *se) 93{ 94 if (se->se_ps) 95 memcpy(se->se_ps, se->se_pssaved, 96 _stdenc_get_state_size(se->se_handle)); 97} 98 99static __inline void 100init_encoding_state(struct _citrus_iconv_std_encoding *se) 101{ 102 if (se->se_ps) 103 _stdenc_init_state(se->se_handle, se->se_ps); 104} 105 106static __inline int 107mbtocsx(struct _citrus_iconv_std_encoding *se, 108 _csid_t *csid, _index_t *idx, const char **s, size_t n, 109 size_t *nresult) 110{ 111 return _stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps, 112 nresult); 113} 114 115static __inline int 116cstombx(struct _citrus_iconv_std_encoding *se, 117 char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult) 118{ 119 return _stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps, 120 nresult); 121} 122 123static __inline int 124wctombx(struct _citrus_iconv_std_encoding *se, 125 char *s, size_t n, _wc_t wc, size_t *nresult) 126{ 127 return _stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult); 128} 129 130static __inline int 131put_state_resetx(struct _citrus_iconv_std_encoding *se, 132 char *s, size_t n, size_t *nresult) 133{ 134 return _stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult); 135} 136 137static __inline int 138get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate) 139{ 140 int ret; 141 struct _stdenc_state_desc ssd; 142 143 ret = _stdenc_get_state_desc(se->se_handle, se->se_ps, 144 _STDENC_SDID_GENERIC, &ssd); 145 if (!ret) 146 *rstate = ssd.u.generic.state; 147 148 return ret; 149} 150 151/* 152 * init encoding context 153 */ 154static int 155init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs, 156 void *ps1, void *ps2) 157{ 158 int ret = -1; 159 160 se->se_handle = cs; 161 se->se_ps = ps1; 162 se->se_pssaved = ps2; 163 164 if (se->se_ps) 165 ret = _stdenc_init_state(cs, se->se_ps); 166 if (!ret && se->se_pssaved) 167 ret = _stdenc_init_state(cs, se->se_pssaved); 168 169 return ret; 170} 171 172static int 173open_csmapper(struct _csmapper **rcm, const char *src, const char *dst, 174 unsigned long *rnorm) 175{ 176 int ret; 177 struct _csmapper *cm; 178 179 ret = _csmapper_open(&cm, src, dst, 0, rnorm); 180 if (ret) 181 return ret; 182 if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 || 183 _csmapper_get_state_size(cm) != 0) { 184 _csmapper_close(cm); 185 return EINVAL; 186 } 187 188 *rcm = cm; 189 190 return 0; 191} 192 193static void 194close_dsts(struct _citrus_iconv_std_dst_list *dl) 195{ 196 struct _citrus_iconv_std_dst *sd; 197 198 while ((sd=TAILQ_FIRST(dl)) != NULL) { 199 TAILQ_REMOVE(dl, sd, sd_entry); 200 _csmapper_close(sd->sd_mapper); 201 free(sd); 202 } 203} 204 205static int 206open_dsts(struct _citrus_iconv_std_dst_list *dl, 207 const struct _esdb_charset *ec, const struct _esdb *dbdst) 208{ 209 int i, ret; 210 struct _citrus_iconv_std_dst *sd, *sdtmp; 211 unsigned long norm; 212 213 sd = malloc(sizeof(*sd)); 214 if (sd == NULL) 215 return errno; 216 217 for (i=0; i<dbdst->db_num_charsets; i++) { 218 ret = open_csmapper(&sd->sd_mapper, ec->ec_csname, 219 dbdst->db_charsets[i].ec_csname, &norm); 220 if (ret == 0) { 221 sd->sd_csid = dbdst->db_charsets[i].ec_csid; 222 sd->sd_norm = norm; 223 /* insert this mapper by sorted order. */ 224 TAILQ_FOREACH(sdtmp, dl, sd_entry) { 225 if (sdtmp->sd_norm > norm) { 226 TAILQ_INSERT_BEFORE(sdtmp, sd, 227 sd_entry); 228 sd = NULL; 229 break; 230 } 231 } 232 if (sd) 233 TAILQ_INSERT_TAIL(dl, sd, sd_entry); 234 sd = malloc(sizeof(*sd)); 235 if (sd == NULL) { 236 ret = errno; 237 close_dsts(dl); 238 return ret; 239 } 240 } else if (ret != ENOENT) { 241 close_dsts(dl); 242 free(sd); 243 return ret; 244 } 245 } 246 free(sd); 247 return 0; 248} 249 250static void 251close_srcs(struct _citrus_iconv_std_src_list *sl) 252{ 253 struct _citrus_iconv_std_src *ss; 254 255 while ((ss=TAILQ_FIRST(sl)) != NULL) { 256 TAILQ_REMOVE(sl, ss, ss_entry); 257 close_dsts(&ss->ss_dsts); 258 free(ss); 259 } 260} 261 262static int 263open_srcs(struct _citrus_iconv_std_src_list *sl, 264 const struct _esdb *dbsrc, const struct _esdb *dbdst) 265{ 266 int i, ret, count = 0; 267 struct _citrus_iconv_std_src *ss; 268 269 ss = malloc(sizeof(*ss)); 270 if (ss == NULL) 271 return errno; 272 273 TAILQ_INIT(&ss->ss_dsts); 274 275 for (i=0; i<dbsrc->db_num_charsets; i++) { 276 ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst); 277 if (ret) 278 goto err; 279 if (!TAILQ_EMPTY(&ss->ss_dsts)) { 280 ss->ss_csid = dbsrc->db_charsets[i].ec_csid; 281 TAILQ_INSERT_TAIL(sl, ss, ss_entry); 282 ss = malloc(sizeof(*ss)); 283 if (ss == NULL) { 284 ret = errno; 285 goto err; 286 } 287 count++; 288 TAILQ_INIT(&ss->ss_dsts); 289 } 290 } 291 free(ss); 292 293 return count ? 0 : ENOENT; 294 295err: 296 free(ss); 297 close_srcs(sl); 298 return ret; 299} 300 301/* do convert a character */ 302#define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */ 303static int 304/*ARGSUSED*/ 305do_conv(const struct _citrus_iconv_std_shared *is, 306 struct _citrus_iconv_std_context *sc, _csid_t *csid, _index_t *idx) 307{ 308 _index_t tmpidx; 309 int ret; 310 struct _citrus_iconv_std_src *ss; 311 struct _citrus_iconv_std_dst *sd; 312 313 TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) { 314 if (ss->ss_csid == *csid) { 315 TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) { 316 ret = _csmapper_convert(sd->sd_mapper, 317 &tmpidx, *idx, NULL); 318 switch (ret) { 319 case _MAPPER_CONVERT_SUCCESS: 320 *csid = sd->sd_csid; 321 *idx = tmpidx; 322 return 0; 323 case _MAPPER_CONVERT_NONIDENTICAL: 324 break; 325 case _MAPPER_CONVERT_SRC_MORE: 326 /*FALLTHROUGH*/ 327 case _MAPPER_CONVERT_DST_MORE: 328 /*FALLTHROUGH*/ 329 case _MAPPER_CONVERT_FATAL: 330 return EINVAL; 331 case _MAPPER_CONVERT_ILSEQ: 332 return EILSEQ; 333 } 334 } 335 break; 336 } 337 } 338 339 return E_NO_CORRESPONDING_CHAR; 340} 341/* ---------------------------------------------------------------------- */ 342 343static int 344/*ARGSUSED*/ 345_citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci, 346 const char * __restrict curdir, 347 const char * __restrict src, 348 const char * __restrict dst, 349 const void * __restrict var, size_t lenvar) 350{ 351 int ret; 352 struct _citrus_iconv_std_shared *is; 353 struct _citrus_esdb esdbsrc, esdbdst; 354 355 is = malloc(sizeof(*is)); 356 if (is==NULL) { 357 ret = errno; 358 goto err0; 359 } 360 ret = _citrus_esdb_open(&esdbsrc, src); 361 if (ret) 362 goto err1; 363 ret = _citrus_esdb_open(&esdbdst, dst); 364 if (ret) 365 goto err2; 366 ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname, 367 esdbsrc.db_variable, esdbsrc.db_len_variable); 368 if (ret) 369 goto err3; 370 ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname, 371 esdbdst.db_variable, esdbdst.db_len_variable); 372 if (ret) 373 goto err4; 374 is->is_use_invalid = esdbdst.db_use_invalid; 375 is->is_invalid = esdbdst.db_invalid; 376 377 TAILQ_INIT(&is->is_srcs); 378 ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst); 379 if (ret) 380 goto err5; 381 382 _esdb_close(&esdbsrc); 383 _esdb_close(&esdbdst); 384 ci->ci_closure = is; 385 386 return 0; 387 388err5: 389 _stdenc_close(is->is_dst_encoding); 390err4: 391 _stdenc_close(is->is_src_encoding); 392err3: 393 _esdb_close(&esdbdst); 394err2: 395 _esdb_close(&esdbsrc); 396err1: 397 free(is); 398err0: 399 return ret; 400} 401 402static void 403_citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci) 404{ 405 struct _citrus_iconv_std_shared *is = ci->ci_closure; 406 407 if (is == NULL) 408 return; 409 410 _stdenc_close(is->is_src_encoding); 411 _stdenc_close(is->is_dst_encoding); 412 close_srcs(&is->is_srcs); 413 free(is); 414} 415 416static int 417_citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv) 418{ 419 const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure; 420 struct _citrus_iconv_std_context *sc; 421 size_t szpssrc, szpsdst, sz; 422 char *ptr; 423 424 szpssrc = _stdenc_get_state_size(is->is_src_encoding); 425 szpsdst = _stdenc_get_state_size(is->is_dst_encoding); 426 427 sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context); 428 sc = malloc(sz); 429 if (sc == NULL) 430 return errno; 431 432 ptr = (char *)&sc[1]; 433 if (szpssrc) 434 init_encoding(&sc->sc_src_encoding, is->is_src_encoding, 435 ptr, ptr+szpssrc); 436 else 437 init_encoding(&sc->sc_src_encoding, is->is_src_encoding, 438 NULL, NULL); 439 ptr += szpssrc*2; 440 if (szpsdst) 441 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding, 442 ptr, ptr+szpsdst); 443 else 444 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding, 445 NULL, NULL); 446 447 cv->cv_closure = (void *)sc; 448 449 return 0; 450} 451 452static void 453_citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv) 454{ 455 free(cv->cv_closure); 456} 457 458static int 459_citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv, 460 const char * __restrict * __restrict in, 461 size_t * __restrict inbytes, 462 char * __restrict * __restrict out, 463 size_t * __restrict outbytes, u_int32_t flags, 464 size_t * __restrict invalids) 465{ 466 const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure; 467 struct _citrus_iconv_std_context *sc = cv->cv_closure; 468 _index_t idx; 469 _csid_t csid; 470 int ret, state; 471 size_t szrin, szrout; 472 size_t inval; 473 const char *tmpin; 474 475 inval = 0; 476 if (in==NULL || *in==NULL) { 477 /* special cases */ 478 if (out!=NULL && *out!=NULL) { 479 /* init output state and store the shift sequence */ 480 save_encoding_state(&sc->sc_src_encoding); 481 save_encoding_state(&sc->sc_dst_encoding); 482 szrout = 0; 483 484 ret = put_state_resetx(&sc->sc_dst_encoding, 485 *out, *outbytes, 486 &szrout); 487 if (ret) 488 goto err; 489 490 if (szrout == (size_t)-2) { 491 /* too small to store the character */ 492 ret = EINVAL; 493 goto err; 494 } 495 *out += szrout; 496 *outbytes -= szrout; 497 } else 498 /* otherwise, discard the shift sequence */ 499 init_encoding_state(&sc->sc_dst_encoding); 500 init_encoding_state(&sc->sc_src_encoding); 501 *invalids = 0; 502 return 0; 503 } 504 505 /* normal case */ 506 for (;;) { 507 if (*inbytes==0) { 508 ret = get_state_desc_gen(&sc->sc_src_encoding, &state); 509 if (state == _STDENC_SDGEN_INITIAL || 510 state == _STDENC_SDGEN_STABLE) 511 break; 512 } 513 514 /* save the encoding states for the error recovery */ 515 save_encoding_state(&sc->sc_src_encoding); 516 save_encoding_state(&sc->sc_dst_encoding); 517 518 /* mb -> csid/index */ 519 tmpin = *in; 520 szrin = szrout = 0; 521 ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx, 522 &tmpin, *inbytes, &szrin); 523 if (ret) 524 goto err; 525 526 if (szrin == (size_t)-2) { 527 /* incompleted character */ 528 ret = get_state_desc_gen(&sc->sc_src_encoding, &state); 529 if (ret) { 530 ret = EINVAL; 531 goto err; 532 } 533 switch (state) { 534 case _STDENC_SDGEN_INITIAL: 535 case _STDENC_SDGEN_STABLE: 536 /* fetch shift sequences only. */ 537 goto next; 538 } 539 ret = EINVAL; 540 goto err; 541 } 542 /* convert the character */ 543 ret = do_conv(is, sc, &csid, &idx); 544 if (ret) { 545 if (ret == E_NO_CORRESPONDING_CHAR) { 546 inval++; 547 szrout = 0; 548 if ((flags&_CITRUS_ICONV_F_HIDE_INVALID)==0 && 549 is->is_use_invalid) { 550 ret = wctombx(&sc->sc_dst_encoding, 551 *out, *outbytes, 552 is->is_invalid, 553 &szrout); 554 if (ret) 555 goto err; 556 } 557 goto next; 558 } else { 559 goto err; 560 } 561 } 562 /* csid/index -> mb */ 563 ret = cstombx(&sc->sc_dst_encoding, 564 *out, *outbytes, csid, idx, &szrout); 565 if (ret) 566 goto err; 567next: 568 _DIAGASSERT(*inbytes>=szrin && *outbytes>=szrout); 569 *inbytes -= tmpin-*in; /* szrin is insufficient on \0. */ 570 *in = tmpin; 571 *outbytes -= szrout; 572 *out += szrout; 573 } 574 *invalids = inval; 575 576 return 0; 577 578err: 579 restore_encoding_state(&sc->sc_src_encoding); 580 restore_encoding_state(&sc->sc_dst_encoding); 581err_norestore: 582 *invalids = inval; 583 584 return ret; 585} 586