citrus_iconv_std.c revision 282275
1/* $FreeBSD: stable/10/lib/libiconv_modules/iconv_std/citrus_iconv_std.c 282275 2015-04-30 16:08:47Z tijl $ */
2/*	$NetBSD: citrus_iconv_std.c,v 1.16 2012/02/12 13:51:29 wiz Exp $	*/
3
4/*-
5 * Copyright (c)2003 Citrus Project,
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31#include <sys/endian.h>
32#include <sys/queue.h>
33
34#include <assert.h>
35#include <errno.h>
36#include <limits.h>
37#include <stdbool.h>
38#include <stdio.h>
39#include <stdlib.h>
40#include <string.h>
41
42#include "citrus_namespace.h"
43#include "citrus_types.h"
44#include "citrus_module.h"
45#include "citrus_region.h"
46#include "citrus_mmap.h"
47#include "citrus_hash.h"
48#include "citrus_iconv.h"
49#include "citrus_stdenc.h"
50#include "citrus_mapper.h"
51#include "citrus_csmapper.h"
52#include "citrus_memstream.h"
53#include "citrus_iconv_std.h"
54#include "citrus_esdb.h"
55
56/* ---------------------------------------------------------------------- */
57
58_CITRUS_ICONV_DECLS(iconv_std);
59_CITRUS_ICONV_DEF_OPS(iconv_std);
60
61
62/* ---------------------------------------------------------------------- */
63
64int
65_citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops)
66{
67
68	memcpy(ops, &_citrus_iconv_std_iconv_ops,
69	    sizeof(_citrus_iconv_std_iconv_ops));
70
71	return (0);
72}
73
74/* ---------------------------------------------------------------------- */
75
76/*
77 * convenience routines for stdenc.
78 */
79static __inline void
80save_encoding_state(struct _citrus_iconv_std_encoding *se)
81{
82
83	if (se->se_ps)
84		memcpy(se->se_pssaved, se->se_ps,
85		    _stdenc_get_state_size(se->se_handle));
86}
87
88static __inline void
89restore_encoding_state(struct _citrus_iconv_std_encoding *se)
90{
91
92	if (se->se_ps)
93		memcpy(se->se_ps, se->se_pssaved,
94		    _stdenc_get_state_size(se->se_handle));
95}
96
97static __inline void
98init_encoding_state(struct _citrus_iconv_std_encoding *se)
99{
100
101	if (se->se_ps)
102		_stdenc_init_state(se->se_handle, se->se_ps);
103}
104
105static __inline int
106mbtocsx(struct _citrus_iconv_std_encoding *se,
107    _csid_t *csid, _index_t *idx, char **s, size_t n, size_t *nresult,
108    struct iconv_hooks *hooks)
109{
110
111	return (_stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
112			      nresult, hooks));
113}
114
115static __inline int
116cstombx(struct _citrus_iconv_std_encoding *se,
117    char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult,
118    struct iconv_hooks *hooks)
119{
120
121	return (_stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
122			      nresult, hooks));
123}
124
125static __inline int
126wctombx(struct _citrus_iconv_std_encoding *se,
127    char *s, size_t n, _wc_t wc, size_t *nresult,
128    struct iconv_hooks *hooks)
129{
130
131	return (_stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult,
132			     hooks));
133}
134
135static __inline int
136put_state_resetx(struct _citrus_iconv_std_encoding *se, char *s, size_t n,
137    size_t *nresult)
138{
139
140	return (_stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult));
141}
142
143static __inline int
144get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate)
145{
146	struct _stdenc_state_desc ssd;
147	int ret;
148
149	ret = _stdenc_get_state_desc(se->se_handle, se->se_ps,
150	    _STDENC_SDID_GENERIC, &ssd);
151	if (!ret)
152		*rstate = ssd.u.generic.state;
153
154	return (ret);
155}
156
157/*
158 * init encoding context
159 */
160static int
161init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
162    void *ps1, void *ps2)
163{
164	int ret = -1;
165
166	se->se_handle = cs;
167	se->se_ps = ps1;
168	se->se_pssaved = ps2;
169
170	if (se->se_ps)
171		ret = _stdenc_init_state(cs, se->se_ps);
172	if (!ret && se->se_pssaved)
173		ret = _stdenc_init_state(cs, se->se_pssaved);
174
175	return (ret);
176}
177
178static int
179open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
180    unsigned long *rnorm)
181{
182	struct _csmapper *cm;
183	int ret;
184
185	ret = _csmapper_open(&cm, src, dst, 0, rnorm);
186	if (ret)
187		return (ret);
188	if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
189	    _csmapper_get_state_size(cm) != 0) {
190		_csmapper_close(cm);
191		return (EINVAL);
192	}
193
194	*rcm = cm;
195
196	return (0);
197}
198
199static void
200close_dsts(struct _citrus_iconv_std_dst_list *dl)
201{
202	struct _citrus_iconv_std_dst *sd;
203
204	while ((sd = TAILQ_FIRST(dl)) != NULL) {
205		TAILQ_REMOVE(dl, sd, sd_entry);
206		_csmapper_close(sd->sd_mapper);
207		free(sd);
208	}
209}
210
211static int
212open_dsts(struct _citrus_iconv_std_dst_list *dl,
213    const struct _esdb_charset *ec, const struct _esdb *dbdst)
214{
215	struct _citrus_iconv_std_dst *sd, *sdtmp;
216	unsigned long norm;
217	int i, ret;
218
219	sd = malloc(sizeof(*sd));
220	if (sd == NULL)
221		return (errno);
222
223	for (i = 0; i < dbdst->db_num_charsets; i++) {
224		ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
225		    dbdst->db_charsets[i].ec_csname, &norm);
226		if (ret == 0) {
227			sd->sd_csid = dbdst->db_charsets[i].ec_csid;
228			sd->sd_norm = norm;
229			/* insert this mapper by sorted order. */
230			TAILQ_FOREACH(sdtmp, dl, sd_entry) {
231				if (sdtmp->sd_norm > norm) {
232					TAILQ_INSERT_BEFORE(sdtmp, sd,
233					    sd_entry);
234					sd = NULL;
235					break;
236				}
237			}
238			if (sd)
239				TAILQ_INSERT_TAIL(dl, sd, sd_entry);
240			sd = malloc(sizeof(*sd));
241			if (sd == NULL) {
242				ret = errno;
243				close_dsts(dl);
244				return (ret);
245			}
246		} else if (ret != ENOENT) {
247			close_dsts(dl);
248			free(sd);
249			return (ret);
250		}
251	}
252	free(sd);
253	return (0);
254}
255
256static void
257close_srcs(struct _citrus_iconv_std_src_list *sl)
258{
259	struct _citrus_iconv_std_src *ss;
260
261	while ((ss = TAILQ_FIRST(sl)) != NULL) {
262		TAILQ_REMOVE(sl, ss, ss_entry);
263		close_dsts(&ss->ss_dsts);
264		free(ss);
265	}
266}
267
268static int
269open_srcs(struct _citrus_iconv_std_src_list *sl,
270    const struct _esdb *dbsrc, const struct _esdb *dbdst)
271{
272	struct _citrus_iconv_std_src *ss;
273	int count = 0, i, ret;
274
275	ss = malloc(sizeof(*ss));
276	if (ss == NULL)
277		return (errno);
278
279	TAILQ_INIT(&ss->ss_dsts);
280
281	for (i = 0; i < dbsrc->db_num_charsets; i++) {
282		ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
283		if (ret)
284			goto err;
285		if (!TAILQ_EMPTY(&ss->ss_dsts)) {
286			ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
287			TAILQ_INSERT_TAIL(sl, ss, ss_entry);
288			ss = malloc(sizeof(*ss));
289			if (ss == NULL) {
290				ret = errno;
291				goto err;
292			}
293			count++;
294			TAILQ_INIT(&ss->ss_dsts);
295		}
296	}
297	free(ss);
298
299	return (count ? 0 : ENOENT);
300
301err:
302	free(ss);
303	close_srcs(sl);
304	return (ret);
305}
306
307/* do convert a character */
308#define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
309static int
310/*ARGSUSED*/
311do_conv(const struct _citrus_iconv_std_shared *is,
312	_csid_t *csid, _index_t *idx)
313{
314	struct _citrus_iconv_std_dst *sd;
315	struct _citrus_iconv_std_src *ss;
316	_index_t tmpidx;
317	int ret;
318
319	TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
320		if (ss->ss_csid == *csid) {
321			TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
322				ret = _csmapper_convert(sd->sd_mapper,
323				    &tmpidx, *idx, NULL);
324				switch (ret) {
325				case _MAPPER_CONVERT_SUCCESS:
326					*csid = sd->sd_csid;
327					*idx = tmpidx;
328					return (0);
329				case _MAPPER_CONVERT_NONIDENTICAL:
330					break;
331				case _MAPPER_CONVERT_SRC_MORE:
332					/*FALLTHROUGH*/
333				case _MAPPER_CONVERT_DST_MORE:
334					/*FALLTHROUGH*/
335				case _MAPPER_CONVERT_ILSEQ:
336					return (EILSEQ);
337				case _MAPPER_CONVERT_FATAL:
338					return (EINVAL);
339				}
340			}
341			break;
342		}
343	}
344
345	return (E_NO_CORRESPONDING_CHAR);
346}
347/* ---------------------------------------------------------------------- */
348
349static int
350/*ARGSUSED*/
351_citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
352    const char * __restrict src, const char * __restrict dst)
353{
354	struct _citrus_esdb esdbdst, esdbsrc;
355	struct _citrus_iconv_std_shared *is;
356	int ret;
357
358	is = malloc(sizeof(*is));
359	if (is == NULL) {
360		ret = errno;
361		goto err0;
362	}
363	ret = _citrus_esdb_open(&esdbsrc, src);
364	if (ret)
365		goto err1;
366	ret = _citrus_esdb_open(&esdbdst, dst);
367	if (ret)
368		goto err2;
369	ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
370	    esdbsrc.db_variable, esdbsrc.db_len_variable);
371	if (ret)
372		goto err3;
373	ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
374	    esdbdst.db_variable, esdbdst.db_len_variable);
375	if (ret)
376		goto err4;
377	is->is_use_invalid = esdbdst.db_use_invalid;
378	is->is_invalid = esdbdst.db_invalid;
379
380	TAILQ_INIT(&is->is_srcs);
381	ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
382	if (ret)
383		goto err5;
384
385	_esdb_close(&esdbsrc);
386	_esdb_close(&esdbdst);
387	ci->ci_closure = is;
388
389	return (0);
390
391err5:
392	_stdenc_close(is->is_dst_encoding);
393err4:
394	_stdenc_close(is->is_src_encoding);
395err3:
396	_esdb_close(&esdbdst);
397err2:
398	_esdb_close(&esdbsrc);
399err1:
400	free(is);
401err0:
402	return (ret);
403}
404
405static void
406_citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
407{
408	struct _citrus_iconv_std_shared *is = ci->ci_closure;
409
410	if (is == NULL)
411		return;
412
413	_stdenc_close(is->is_src_encoding);
414	_stdenc_close(is->is_dst_encoding);
415	close_srcs(&is->is_srcs);
416	free(is);
417}
418
419static int
420_citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
421{
422	const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
423	struct _citrus_iconv_std_context *sc;
424	char *ptr;
425	size_t sz, szpsdst, szpssrc;
426
427	szpssrc = _stdenc_get_state_size(is->is_src_encoding);
428	szpsdst = _stdenc_get_state_size(is->is_dst_encoding);
429
430	sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
431	sc = malloc(sz);
432	if (sc == NULL)
433		return (errno);
434
435	ptr = (char *)&sc[1];
436	if (szpssrc > 0)
437		init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
438		    ptr, ptr+szpssrc);
439	else
440		init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
441		    NULL, NULL);
442	ptr += szpssrc*2;
443	if (szpsdst > 0)
444		init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
445		    ptr, ptr+szpsdst);
446	else
447		init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
448		    NULL, NULL);
449
450	cv->cv_closure = (void *)sc;
451
452	return (0);
453}
454
455static void
456_citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
457{
458
459	free(cv->cv_closure);
460}
461
462static int
463_citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
464    char * __restrict * __restrict in, size_t * __restrict inbytes,
465    char * __restrict * __restrict out, size_t * __restrict outbytes,
466    uint32_t flags, size_t * __restrict invalids)
467{
468	const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
469	struct _citrus_iconv_std_context *sc = cv->cv_closure;
470	_csid_t csid;
471	_index_t idx;
472	char *tmpin;
473	size_t inval, szrin, szrout;
474	int ret, state = 0;
475
476	inval = 0;
477	if (in == NULL || *in == NULL) {
478		/* special cases */
479		if (out != NULL && *out != NULL) {
480			/* init output state and store the shift sequence */
481			save_encoding_state(&sc->sc_src_encoding);
482			save_encoding_state(&sc->sc_dst_encoding);
483			szrout = 0;
484
485			ret = put_state_resetx(&sc->sc_dst_encoding,
486			    *out, *outbytes, &szrout);
487			if (ret)
488				goto err;
489
490			if (szrout == (size_t)-2) {
491				/* too small to store the character */
492				ret = EINVAL;
493				goto err;
494			}
495			*out += szrout;
496			*outbytes -= szrout;
497		} else
498			/* otherwise, discard the shift sequence */
499			init_encoding_state(&sc->sc_dst_encoding);
500		init_encoding_state(&sc->sc_src_encoding);
501		*invalids = 0;
502		return (0);
503	}
504
505	/* normal case */
506	for (;;) {
507		if (*inbytes == 0) {
508			ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
509			if (state == _STDENC_SDGEN_INITIAL ||
510			    state == _STDENC_SDGEN_STABLE)
511				break;
512		}
513
514		/* save the encoding states for the error recovery */
515		save_encoding_state(&sc->sc_src_encoding);
516		save_encoding_state(&sc->sc_dst_encoding);
517
518		/* mb -> csid/index */
519		tmpin = *in;
520		szrin = szrout = 0;
521		ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx, &tmpin,
522		    *inbytes, &szrin, cv->cv_shared->ci_hooks);
523		if (ret)
524			goto err;
525
526		if (szrin == (size_t)-2) {
527			/* incompleted character */
528			ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
529			if (ret) {
530				ret = EINVAL;
531				goto err;
532			}
533			switch (state) {
534			case _STDENC_SDGEN_INITIAL:
535			case _STDENC_SDGEN_STABLE:
536				/* fetch shift sequences only. */
537				goto next;
538			}
539			ret = EINVAL;
540			goto err;
541		}
542		/* convert the character */
543		ret = do_conv(is, &csid, &idx);
544		if (ret) {
545			if (ret == E_NO_CORRESPONDING_CHAR) {
546				inval++;
547				szrout = 0;
548				if ((((flags & _CITRUS_ICONV_F_HIDE_INVALID) == 0) &&
549				    !cv->cv_shared->ci_discard_ilseq) &&
550				    is->is_use_invalid) {
551					ret = wctombx(&sc->sc_dst_encoding,
552					    *out, *outbytes, is->is_invalid,
553					    &szrout, cv->cv_shared->ci_hooks);
554					if (ret)
555						goto err;
556				}
557				goto next;
558			} else
559				goto err;
560		}
561		/* csid/index -> mb */
562		ret = cstombx(&sc->sc_dst_encoding,
563		    *out, *outbytes, csid, idx, &szrout,
564		    cv->cv_shared->ci_hooks);
565		if (ret)
566			goto err;
567next:
568		*inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
569		*in = tmpin;
570		*outbytes -= szrout;
571		*out += szrout;
572	}
573	*invalids = inval;
574
575	return (0);
576
577err:
578	restore_encoding_state(&sc->sc_src_encoding);
579	restore_encoding_state(&sc->sc_dst_encoding);
580	*invalids = inval;
581
582	return (ret);
583}
584