1/*	$NetBSD: citrus_iconv_std.c,v 1.15 2006/11/13 19:08:19 tnozaki Exp $	*/
2
3/*-
4 * Copyright (c)2003 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30#if defined(LIBC_SCCS) && !defined(lint)
31__RCSID("$NetBSD: citrus_iconv_std.c,v 1.15 2006/11/13 19:08:19 tnozaki Exp $");
32#endif /* LIBC_SCCS and not lint */
33
34#include <assert.h>
35#include <errno.h>
36#include <limits.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <string.h>
40#include <machine/endian.h>
41#include <sys/queue.h>
42
43#include "citrus_namespace.h"
44#include "citrus_types.h"
45#include "citrus_module.h"
46#include "citrus_region.h"
47#include "citrus_mmap.h"
48#include "citrus_hash.h"
49#include "citrus_iconv.h"
50#include "citrus_stdenc.h"
51#include "citrus_mapper.h"
52#include "citrus_csmapper.h"
53#include "citrus_memstream.h"
54#include "citrus_iconv_std.h"
55#include "citrus_esdb.h"
56
57/* ---------------------------------------------------------------------- */
58
59_CITRUS_ICONV_DECLS(iconv_std);
60_CITRUS_ICONV_DEF_OPS(iconv_std);
61
62
63/* ---------------------------------------------------------------------- */
64
65int
66_citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops, size_t lenops,
67			       u_int32_t expected_version)
68{
69	if (expected_version<_CITRUS_ICONV_ABI_VERSION || lenops<sizeof(*ops))
70		return (EINVAL);
71
72	memcpy(ops, &_citrus_iconv_std_iconv_ops,
73	       sizeof(_citrus_iconv_std_iconv_ops));
74
75	return (0);
76}
77
78/* ---------------------------------------------------------------------- */
79
80/*
81 * convenience routines for stdenc.
82 */
83static __inline void
84save_encoding_state(struct _citrus_iconv_std_encoding *se)
85{
86	if (se->se_ps)
87		memcpy(se->se_pssaved, se->se_ps,
88		       _stdenc_get_state_size(se->se_handle));
89}
90
91static __inline void
92restore_encoding_state(struct _citrus_iconv_std_encoding *se)
93{
94	if (se->se_ps)
95		memcpy(se->se_ps, se->se_pssaved,
96		       _stdenc_get_state_size(se->se_handle));
97}
98
99static __inline void
100init_encoding_state(struct _citrus_iconv_std_encoding *se)
101{
102	if (se->se_ps)
103		_stdenc_init_state(se->se_handle, se->se_ps);
104}
105
106static __inline int
107mbtocsx(struct _citrus_iconv_std_encoding *se,
108	_csid_t *csid, _index_t *idx, const char **s, size_t n,
109	size_t *nresult)
110{
111	return _stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
112			      nresult);
113}
114
115static __inline int
116cstombx(struct _citrus_iconv_std_encoding *se,
117	char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult)
118{
119	return _stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
120			      nresult);
121}
122
123static __inline int
124wctombx(struct _citrus_iconv_std_encoding *se,
125	char *s, size_t n, _wc_t wc, size_t *nresult)
126{
127	return _stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult);
128}
129
130static __inline int
131put_state_resetx(struct _citrus_iconv_std_encoding *se,
132		 char *s, size_t n, size_t *nresult)
133{
134	return _stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult);
135}
136
137static __inline int
138get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate)
139{
140	int ret;
141	struct _stdenc_state_desc ssd;
142
143	ret = _stdenc_get_state_desc(se->se_handle, se->se_ps,
144				     _STDENC_SDID_GENERIC, &ssd);
145	if (!ret)
146		*rstate = ssd.u.generic.state;
147
148	return ret;
149}
150
151/*
152 * init encoding context
153 */
154static int
155init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
156	      void *ps1, void *ps2)
157{
158	int ret = -1;
159
160	se->se_handle = cs;
161	se->se_ps = ps1;
162	se->se_pssaved = ps2;
163
164	if (se->se_ps)
165		ret = _stdenc_init_state(cs, se->se_ps);
166	if (!ret && se->se_pssaved)
167		ret = _stdenc_init_state(cs, se->se_pssaved);
168
169	return ret;
170}
171
172static int
173open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
174	      unsigned long *rnorm)
175{
176	int ret;
177	struct _csmapper *cm;
178
179	ret = _csmapper_open(&cm, src, dst, 0, rnorm);
180	if (ret)
181		return ret;
182	if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
183	    _csmapper_get_state_size(cm) != 0) {
184		_csmapper_close(cm);
185		return EINVAL;
186	}
187
188	*rcm = cm;
189
190	return 0;
191}
192
193static void
194close_dsts(struct _citrus_iconv_std_dst_list *dl)
195{
196	struct _citrus_iconv_std_dst *sd;
197
198	while ((sd=TAILQ_FIRST(dl)) != NULL) {
199		TAILQ_REMOVE(dl, sd, sd_entry);
200		_csmapper_close(sd->sd_mapper);
201		free(sd);
202	}
203}
204
205static int
206open_dsts(struct _citrus_iconv_std_dst_list *dl,
207	  const struct _esdb_charset *ec, const struct _esdb *dbdst)
208{
209	int i, ret;
210	struct _citrus_iconv_std_dst *sd, *sdtmp;
211	unsigned long norm;
212
213	sd = malloc(sizeof(*sd));
214	if (sd == NULL)
215		return errno;
216
217	for (i=0; i<dbdst->db_num_charsets; i++) {
218		ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
219				    dbdst->db_charsets[i].ec_csname, &norm);
220		if (ret == 0) {
221			sd->sd_csid = dbdst->db_charsets[i].ec_csid;
222			sd->sd_norm = norm;
223			/* insert this mapper by sorted order. */
224			TAILQ_FOREACH(sdtmp, dl, sd_entry) {
225				if (sdtmp->sd_norm > norm) {
226					TAILQ_INSERT_BEFORE(sdtmp, sd,
227							    sd_entry);
228					sd = NULL;
229					break;
230				}
231			}
232			if (sd)
233				TAILQ_INSERT_TAIL(dl, sd, sd_entry);
234			sd = malloc(sizeof(*sd));
235			if (sd == NULL) {
236				ret = errno;
237				close_dsts(dl);
238				return ret;
239			}
240		} else if (ret != ENOENT) {
241			close_dsts(dl);
242			free(sd);
243			return ret;
244		}
245	}
246	free(sd);
247	return 0;
248}
249
250static void
251close_srcs(struct _citrus_iconv_std_src_list *sl)
252{
253	struct _citrus_iconv_std_src *ss;
254
255	while ((ss=TAILQ_FIRST(sl)) != NULL) {
256		TAILQ_REMOVE(sl, ss, ss_entry);
257		close_dsts(&ss->ss_dsts);
258		free(ss);
259	}
260}
261
262static int
263open_srcs(struct _citrus_iconv_std_src_list *sl,
264	  const struct _esdb *dbsrc, const struct _esdb *dbdst)
265{
266	int i, ret, count = 0;
267	struct _citrus_iconv_std_src *ss;
268
269	ss = malloc(sizeof(*ss));
270	if (ss == NULL)
271		return errno;
272
273	TAILQ_INIT(&ss->ss_dsts);
274
275	for (i=0; i<dbsrc->db_num_charsets; i++) {
276		ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
277		if (ret)
278			goto err;
279		if (!TAILQ_EMPTY(&ss->ss_dsts)) {
280			ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
281			TAILQ_INSERT_TAIL(sl, ss, ss_entry);
282			ss = malloc(sizeof(*ss));
283			if (ss == NULL) {
284				ret = errno;
285				goto err;
286			}
287			count++;
288			TAILQ_INIT(&ss->ss_dsts);
289		}
290	}
291	free(ss);
292
293	return count ? 0 : ENOENT;
294
295err:
296	free(ss);
297	close_srcs(sl);
298	return ret;
299}
300
301/* do convert a character */
302#define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
303static int
304/*ARGSUSED*/
305do_conv(const struct _citrus_iconv_std_shared *is,
306	struct _citrus_iconv_std_context *sc, _csid_t *csid, _index_t *idx)
307{
308	_index_t tmpidx;
309	int ret;
310	struct _citrus_iconv_std_src *ss;
311	struct _citrus_iconv_std_dst *sd;
312
313	TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
314		if (ss->ss_csid == *csid) {
315			TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
316				ret = _csmapper_convert(sd->sd_mapper,
317							&tmpidx, *idx, NULL);
318				switch (ret) {
319				case _MAPPER_CONVERT_SUCCESS:
320					*csid = sd->sd_csid;
321					*idx = tmpidx;
322					return 0;
323				case _MAPPER_CONVERT_NONIDENTICAL:
324					break;
325				case _MAPPER_CONVERT_SRC_MORE:
326					/*FALLTHROUGH*/
327				case _MAPPER_CONVERT_DST_MORE:
328					/*FALLTHROUGH*/
329				case _MAPPER_CONVERT_FATAL:
330					return EINVAL;
331				case _MAPPER_CONVERT_ILSEQ:
332					return EILSEQ;
333				}
334			}
335			break;
336		}
337	}
338
339	return E_NO_CORRESPONDING_CHAR;
340}
341/* ---------------------------------------------------------------------- */
342
343static int
344/*ARGSUSED*/
345_citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
346				    const char * __restrict curdir,
347				    const char * __restrict src,
348				    const char * __restrict dst,
349				    const void * __restrict var, size_t lenvar)
350{
351	int ret;
352	struct _citrus_iconv_std_shared *is;
353	struct _citrus_esdb esdbsrc, esdbdst;
354
355	is = malloc(sizeof(*is));
356	if (is==NULL) {
357		ret = errno;
358		goto err0;
359	}
360	ret = _citrus_esdb_open(&esdbsrc, src);
361	if (ret)
362		goto err1;
363	ret = _citrus_esdb_open(&esdbdst, dst);
364	if (ret)
365		goto err2;
366	ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
367			   esdbsrc.db_variable, esdbsrc.db_len_variable);
368	if (ret)
369		goto err3;
370	ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
371			   esdbdst.db_variable, esdbdst.db_len_variable);
372	if (ret)
373		goto err4;
374	is->is_use_invalid = esdbdst.db_use_invalid;
375	is->is_invalid = esdbdst.db_invalid;
376
377	TAILQ_INIT(&is->is_srcs);
378	ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
379	if (ret)
380		goto err5;
381
382	_esdb_close(&esdbsrc);
383	_esdb_close(&esdbdst);
384	ci->ci_closure = is;
385
386	return 0;
387
388err5:
389	_stdenc_close(is->is_dst_encoding);
390err4:
391	_stdenc_close(is->is_src_encoding);
392err3:
393	_esdb_close(&esdbdst);
394err2:
395	_esdb_close(&esdbsrc);
396err1:
397	free(is);
398err0:
399	return ret;
400}
401
402static void
403_citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
404{
405	struct _citrus_iconv_std_shared *is = ci->ci_closure;
406
407	if (is == NULL)
408		return;
409
410	_stdenc_close(is->is_src_encoding);
411	_stdenc_close(is->is_dst_encoding);
412	close_srcs(&is->is_srcs);
413	free(is);
414}
415
416static int
417_citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
418{
419	const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
420	struct _citrus_iconv_std_context *sc;
421	size_t szpssrc, szpsdst, sz;
422	char *ptr;
423
424	szpssrc = _stdenc_get_state_size(is->is_src_encoding);
425	szpsdst = _stdenc_get_state_size(is->is_dst_encoding);
426
427	sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
428	sc = malloc(sz);
429	if (sc == NULL)
430		return errno;
431
432	ptr = (char *)&sc[1];
433	if (szpssrc)
434		init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
435			      ptr, ptr+szpssrc);
436	else
437		init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
438			      NULL, NULL);
439	ptr += szpssrc*2;
440	if (szpsdst)
441		init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
442			      ptr, ptr+szpsdst);
443	else
444		init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
445			      NULL, NULL);
446
447	cv->cv_closure = (void *)sc;
448
449	return 0;
450}
451
452static void
453_citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
454{
455	free(cv->cv_closure);
456}
457
458static int
459_citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
460				const char * __restrict * __restrict in,
461				size_t * __restrict inbytes,
462				char * __restrict * __restrict out,
463				size_t * __restrict outbytes, u_int32_t flags,
464				size_t * __restrict invalids)
465{
466	const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
467	struct _citrus_iconv_std_context *sc = cv->cv_closure;
468	_index_t idx;
469	_csid_t csid;
470	int ret, state;
471	size_t szrin, szrout;
472	size_t inval;
473	const char *tmpin;
474
475	inval = 0;
476	if (in==NULL || *in==NULL) {
477		/* special cases */
478		if (out!=NULL && *out!=NULL) {
479			/* init output state and store the shift sequence */
480			save_encoding_state(&sc->sc_src_encoding);
481			save_encoding_state(&sc->sc_dst_encoding);
482			szrout = 0;
483
484			ret = put_state_resetx(&sc->sc_dst_encoding,
485					       *out, *outbytes,
486					       &szrout);
487			if (ret)
488				goto err;
489
490			if (szrout == (size_t)-2) {
491				/* too small to store the character */
492				ret = EINVAL;
493				goto err;
494			}
495			*out += szrout;
496			*outbytes -= szrout;
497		} else
498			/* otherwise, discard the shift sequence */
499			init_encoding_state(&sc->sc_dst_encoding);
500		init_encoding_state(&sc->sc_src_encoding);
501		*invalids = 0;
502		return 0;
503	}
504
505	/* normal case */
506	for (;;) {
507		if (*inbytes==0) {
508			ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
509			if (state == _STDENC_SDGEN_INITIAL ||
510			    state == _STDENC_SDGEN_STABLE)
511				break;
512		}
513
514		/* save the encoding states for the error recovery */
515		save_encoding_state(&sc->sc_src_encoding);
516		save_encoding_state(&sc->sc_dst_encoding);
517
518		/* mb -> csid/index */
519		tmpin = *in;
520		szrin = szrout = 0;
521		ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx,
522			      &tmpin, *inbytes, &szrin);
523		if (ret)
524			goto err;
525
526		if (szrin == (size_t)-2) {
527			/* incompleted character */
528			ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
529			if (ret) {
530				ret = EINVAL;
531				goto err;
532			}
533			switch (state) {
534			case _STDENC_SDGEN_INITIAL:
535			case _STDENC_SDGEN_STABLE:
536				/* fetch shift sequences only. */
537				goto next;
538			}
539			ret = EINVAL;
540			goto err;
541		}
542		/* convert the character */
543		ret = do_conv(is, sc, &csid, &idx);
544		if (ret) {
545			if (ret == E_NO_CORRESPONDING_CHAR) {
546				inval++;
547				szrout = 0;
548				if ((flags&_CITRUS_ICONV_F_HIDE_INVALID)==0 &&
549				    is->is_use_invalid) {
550					ret = wctombx(&sc->sc_dst_encoding,
551						      *out, *outbytes,
552						      is->is_invalid,
553						      &szrout);
554					if (ret)
555						goto err;
556				}
557				goto next;
558			} else {
559				goto err;
560			}
561		}
562		/* csid/index -> mb */
563		ret = cstombx(&sc->sc_dst_encoding,
564			      *out, *outbytes, csid, idx, &szrout);
565		if (ret)
566			goto err;
567next:
568		_DIAGASSERT(*inbytes>=szrin && *outbytes>=szrout);
569		*inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
570		*in = tmpin;
571		*outbytes -= szrout;
572		*out += szrout;
573	}
574	*invalids = inval;
575
576	return 0;
577
578err:
579	restore_encoding_state(&sc->sc_src_encoding);
580	restore_encoding_state(&sc->sc_dst_encoding);
581err_norestore:
582	*invalids = inval;
583
584	return ret;
585}
586