1219019Sgabor/* $FreeBSD$ */
2264497Stijl/*	$NetBSD: citrus_big5.c,v 1.13 2011/05/23 14:53:46 joerg Exp $	*/
3219019Sgabor
4219019Sgabor/*-
5219019Sgabor * Copyright (c)2002, 2006 Citrus Project,
6219019Sgabor * All rights reserved.
7219019Sgabor *
8219019Sgabor * Redistribution and use in source and binary forms, with or without
9219019Sgabor * modification, are permitted provided that the following conditions
10219019Sgabor * are met:
11219019Sgabor * 1. Redistributions of source code must retain the above copyright
12219019Sgabor *    notice, this list of conditions and the following disclaimer.
13219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright
14219019Sgabor *    notice, this list of conditions and the following disclaimer in the
15219019Sgabor *    documentation and/or other materials provided with the distribution.
16219019Sgabor *
17219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20219019Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27219019Sgabor * SUCH DAMAGE.
28219019Sgabor */
29219019Sgabor
30219019Sgabor/*-
31219019Sgabor * Copyright (c) 1993
32219019Sgabor *	The Regents of the University of California.  All rights reserved.
33219019Sgabor *
34219019Sgabor * This code is derived from software contributed to Berkeley by
35219019Sgabor * Paul Borman at Krystal Technologies.
36219019Sgabor *
37219019Sgabor * Redistribution and use in source and binary forms, with or without
38219019Sgabor * modification, are permitted provided that the following conditions
39219019Sgabor * are met:
40219019Sgabor * 1. Redistributions of source code must retain the above copyright
41219019Sgabor *    notice, this list of conditions and the following disclaimer.
42219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright
43219019Sgabor *    notice, this list of conditions and the following disclaimer in the
44219019Sgabor *    documentation and/or other materials provided with the distribution.
45219019Sgabor * 3. Neither the name of the University nor the names of its contributors
46219019Sgabor *    may be used to endorse or promote products derived from this software
47219019Sgabor *    without specific prior written permission.
48219019Sgabor *
49219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52219019Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59219019Sgabor * SUCH DAMAGE.
60219019Sgabor */
61219019Sgabor
62219019Sgabor#include <sys/cdefs.h>
63219019Sgabor#include <sys/queue.h>
64219019Sgabor#include <sys/types.h>
65219019Sgabor
66219019Sgabor#include <assert.h>
67219019Sgabor#include <errno.h>
68219019Sgabor#include <limits.h>
69219019Sgabor#include <stddef.h>
70219019Sgabor#include <stdint.h>
71219019Sgabor#include <stdio.h>
72219019Sgabor#include <stdlib.h>
73219019Sgabor#include <string.h>
74219019Sgabor#include <wchar.h>
75219019Sgabor
76219019Sgabor#include "citrus_namespace.h"
77219019Sgabor#include "citrus_prop.h"
78219019Sgabor#include "citrus_types.h"
79219019Sgabor#include "citrus_bcs.h"
80219019Sgabor#include "citrus_module.h"
81219019Sgabor#include "citrus_stdenc.h"
82219019Sgabor#include "citrus_big5.h"
83219019Sgabor
84219019Sgabor/* ----------------------------------------------------------------------
85219019Sgabor * private stuffs used by templates
86219019Sgabor */
87219019Sgabor
88219019Sgabortypedef struct {
89219019Sgabor	int	 chlen;
90219019Sgabor	char	 ch[2];
91219019Sgabor} _BIG5State;
92219019Sgabor
93219019Sgabortypedef struct _BIG5Exclude {
94219019Sgabor	TAILQ_ENTRY(_BIG5Exclude)	 entry;
95264497Stijl	wint_t				 start;
96219019Sgabor	wint_t				 end;
97219019Sgabor} _BIG5Exclude;
98219019Sgabor
99219019Sgabortypedef TAILQ_HEAD(_BIG5ExcludeList, _BIG5Exclude) _BIG5ExcludeList;
100219019Sgabor
101219019Sgabortypedef struct {
102219019Sgabor	_BIG5ExcludeList	 excludes;
103219019Sgabor	int			 cell[0x100];
104219019Sgabor} _BIG5EncodingInfo;
105219019Sgabor
106219019Sgabor#define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
107219019Sgabor#define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
108219019Sgabor
109219019Sgabor#define _FUNCNAME(m)			_citrus_BIG5_##m
110219019Sgabor#define _ENCODING_INFO			_BIG5EncodingInfo
111219019Sgabor#define _ENCODING_STATE			_BIG5State
112219019Sgabor#define _ENCODING_MB_CUR_MAX(_ei_)	2
113219019Sgabor#define _ENCODING_IS_STATE_DEPENDENT	0
114219019Sgabor#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
115219019Sgabor
116219019Sgabor
117219019Sgaborstatic __inline void
118219019Sgabor/*ARGSUSED*/
119219019Sgabor_citrus_BIG5_init_state(_BIG5EncodingInfo * __restrict ei __unused,
120219019Sgabor    _BIG5State * __restrict s)
121219019Sgabor{
122219019Sgabor
123219019Sgabor	memset(s, 0, sizeof(*s));
124219019Sgabor}
125219019Sgabor
126260264Sdim#if 0
127219019Sgaborstatic __inline void
128219019Sgabor/*ARGSUSED*/
129219019Sgabor_citrus_BIG5_pack_state(_BIG5EncodingInfo * __restrict ei __unused,
130219019Sgabor    void * __restrict pspriv,
131219019Sgabor    const _BIG5State * __restrict s)
132219019Sgabor{
133219019Sgabor
134219019Sgabor	memcpy(pspriv, (const void *)s, sizeof(*s));
135219019Sgabor}
136219019Sgabor
137219019Sgaborstatic __inline void
138219019Sgabor/*ARGSUSED*/
139219019Sgabor_citrus_BIG5_unpack_state(_BIG5EncodingInfo * __restrict ei __unused,
140219019Sgabor    _BIG5State * __restrict s,
141219019Sgabor    const void * __restrict pspriv)
142219019Sgabor{
143219019Sgabor
144219019Sgabor	memcpy((void *)s, pspriv, sizeof(*s));
145219019Sgabor}
146260264Sdim#endif
147219019Sgabor
148219019Sgaborstatic __inline int
149219019Sgabor_citrus_BIG5_check(_BIG5EncodingInfo *ei, unsigned int c)
150219019Sgabor{
151219019Sgabor
152219019Sgabor	return ((ei->cell[c & 0xFF] & 0x1) ? 2 : 1);
153219019Sgabor}
154219019Sgabor
155219019Sgaborstatic __inline int
156219019Sgabor_citrus_BIG5_check2(_BIG5EncodingInfo *ei, unsigned int c)
157219019Sgabor{
158219019Sgabor
159219019Sgabor	return ((ei->cell[c & 0xFF] & 0x2) ? 1 : 0);
160219019Sgabor}
161219019Sgabor
162219019Sgaborstatic __inline int
163219019Sgabor_citrus_BIG5_check_excludes(_BIG5EncodingInfo *ei, wint_t c)
164219019Sgabor{
165219019Sgabor	_BIG5Exclude *exclude;
166219019Sgabor
167219019Sgabor	TAILQ_FOREACH(exclude, &ei->excludes, entry) {
168219019Sgabor		if (c >= exclude->start && c <= exclude->end)
169219019Sgabor			return (EILSEQ);
170219019Sgabor	}
171219019Sgabor	return (0);
172219019Sgabor}
173219019Sgabor
174219019Sgaborstatic int
175262731Stijl_citrus_BIG5_fill_rowcol(void * __restrict ctx, const char * __restrict s,
176219019Sgabor    uint64_t start, uint64_t end)
177219019Sgabor{
178219019Sgabor	_BIG5EncodingInfo *ei;
179219019Sgabor	uint64_t n;
180219019Sgabor	int i;
181219019Sgabor
182219019Sgabor	if (start > 0xFF || end > 0xFF)
183219019Sgabor		return (EINVAL);
184219019Sgabor	ei = (_BIG5EncodingInfo *)ctx;
185219019Sgabor	i = strcmp("row", s) ? 1 : 0;
186219019Sgabor	i = 1 << i;
187219019Sgabor	for (n = start; n <= end; ++n)
188219019Sgabor		ei->cell[n & 0xFF] |= i;
189219019Sgabor	return (0);
190219019Sgabor}
191219019Sgabor
192219019Sgaborstatic int
193219019Sgabor/*ARGSUSED*/
194262731Stijl_citrus_BIG5_fill_excludes(void * __restrict ctx,
195219019Sgabor    const char * __restrict s __unused, uint64_t start, uint64_t end)
196219019Sgabor{
197219019Sgabor	_BIG5EncodingInfo *ei;
198219019Sgabor	_BIG5Exclude *exclude;
199219019Sgabor
200219019Sgabor	if (start > 0xFFFF || end > 0xFFFF)
201219019Sgabor		return (EINVAL);
202219019Sgabor	ei = (_BIG5EncodingInfo *)ctx;
203219019Sgabor	exclude = TAILQ_LAST(&ei->excludes, _BIG5ExcludeList);
204219019Sgabor	if (exclude != NULL && (wint_t)start <= exclude->end)
205219019Sgabor		return (EINVAL);
206219019Sgabor	exclude = (void *)malloc(sizeof(*exclude));
207219019Sgabor	if (exclude == NULL)
208219019Sgabor		return (ENOMEM);
209219019Sgabor	exclude->start = (wint_t)start;
210219019Sgabor	exclude->end = (wint_t)end;
211219019Sgabor	TAILQ_INSERT_TAIL(&ei->excludes, exclude, entry);
212219019Sgabor
213219019Sgabor	return (0);
214219019Sgabor}
215219019Sgabor
216219019Sgaborstatic const _citrus_prop_hint_t root_hints[] = {
217219019Sgabor    _CITRUS_PROP_HINT_NUM("row", &_citrus_BIG5_fill_rowcol),
218219019Sgabor    _CITRUS_PROP_HINT_NUM("col", &_citrus_BIG5_fill_rowcol),
219219019Sgabor    _CITRUS_PROP_HINT_NUM("excludes", &_citrus_BIG5_fill_excludes),
220219019Sgabor    _CITRUS_PROP_HINT_END
221219019Sgabor};
222219019Sgabor
223219019Sgaborstatic void
224219019Sgabor/*ARGSUSED*/
225219019Sgabor_citrus_BIG5_encoding_module_uninit(_BIG5EncodingInfo *ei)
226219019Sgabor{
227219019Sgabor	_BIG5Exclude *exclude;
228219019Sgabor
229219019Sgabor	while ((exclude = TAILQ_FIRST(&ei->excludes)) != NULL) {
230219019Sgabor		TAILQ_REMOVE(&ei->excludes, exclude, entry);
231219019Sgabor		free(exclude);
232219019Sgabor	}
233219019Sgabor}
234219019Sgabor
235219019Sgaborstatic int
236219019Sgabor/*ARGSUSED*/
237219019Sgabor_citrus_BIG5_encoding_module_init(_BIG5EncodingInfo * __restrict ei,
238219019Sgabor    const void * __restrict var, size_t lenvar)
239219019Sgabor{
240219019Sgabor	const char *s;
241219019Sgabor	int err;
242219019Sgabor
243219019Sgabor	memset((void *)ei, 0, sizeof(*ei));
244219019Sgabor	TAILQ_INIT(&ei->excludes);
245219019Sgabor
246219019Sgabor	if (lenvar > 0 && var != NULL) {
247219019Sgabor		s = _bcs_skip_ws_len((const char *)var, &lenvar);
248219019Sgabor		if (lenvar > 0 && *s != '\0') {
249219019Sgabor			err = _citrus_prop_parse_variable(
250219019Sgabor			    root_hints, (void *)ei, s, lenvar);
251219019Sgabor			if (err == 0)
252219019Sgabor				return (0);
253219019Sgabor
254219019Sgabor			_citrus_BIG5_encoding_module_uninit(ei);
255219019Sgabor			memset((void *)ei, 0, sizeof(*ei));
256219019Sgabor			TAILQ_INIT(&ei->excludes);
257219019Sgabor		}
258219019Sgabor	}
259219019Sgabor
260219019Sgabor	/* fallback Big5-1984, for backward compatibility. */
261262731Stijl	_citrus_BIG5_fill_rowcol(ei, "row", 0xA1, 0xFE);
262262731Stijl	_citrus_BIG5_fill_rowcol(ei, "col", 0x40, 0x7E);
263262731Stijl	_citrus_BIG5_fill_rowcol(ei, "col", 0xA1, 0xFE);
264219019Sgabor
265219019Sgabor	return (0);
266219019Sgabor}
267219019Sgabor
268219019Sgaborstatic int
269219019Sgabor/*ARGSUSED*/
270219019Sgabor_citrus_BIG5_mbrtowc_priv(_BIG5EncodingInfo * __restrict ei,
271219019Sgabor    wchar_t * __restrict pwc,
272252583Speter    const char ** __restrict s, size_t n,
273219019Sgabor    _BIG5State * __restrict psenc,
274219019Sgabor    size_t * __restrict nresult)
275219019Sgabor{
276219019Sgabor	wchar_t wchar;
277252583Speter	const char *s0;
278219019Sgabor	int c, chlenbak;
279219019Sgabor
280219019Sgabor	s0 = *s;
281219019Sgabor
282219019Sgabor	if (s0 == NULL) {
283219019Sgabor		_citrus_BIG5_init_state(ei, psenc);
284219019Sgabor		*nresult = 0;
285219019Sgabor		return (0);
286219019Sgabor	}
287219019Sgabor
288219019Sgabor	chlenbak = psenc->chlen;
289219019Sgabor
290219019Sgabor	/* make sure we have the first byte in the buffer */
291219019Sgabor	switch (psenc->chlen) {
292219019Sgabor	case 0:
293219019Sgabor		if (n < 1)
294219019Sgabor			goto restart;
295219019Sgabor		psenc->ch[0] = *s0++;
296219019Sgabor		psenc->chlen = 1;
297219019Sgabor		n--;
298219019Sgabor		break;
299219019Sgabor	case 1:
300219019Sgabor		break;
301219019Sgabor	default:
302219019Sgabor		/* illegal state */
303219019Sgabor		goto ilseq;
304219019Sgabor	}
305219019Sgabor
306219019Sgabor	c = _citrus_BIG5_check(ei, psenc->ch[0] & 0xff);
307219019Sgabor	if (c == 0)
308219019Sgabor		goto ilseq;
309219019Sgabor	while (psenc->chlen < c) {
310219019Sgabor		if (n < 1) {
311219019Sgabor			goto restart;
312219019Sgabor		}
313219019Sgabor		psenc->ch[psenc->chlen] = *s0++;
314219019Sgabor		psenc->chlen++;
315219019Sgabor		n--;
316219019Sgabor	}
317219019Sgabor
318219019Sgabor	switch (c) {
319219019Sgabor	case 1:
320219019Sgabor		wchar = psenc->ch[0] & 0xff;
321219019Sgabor		break;
322219019Sgabor	case 2:
323219019Sgabor		if (!_citrus_BIG5_check2(ei, psenc->ch[1] & 0xff))
324219019Sgabor			goto ilseq;
325219019Sgabor		wchar = ((psenc->ch[0] & 0xff) << 8) | (psenc->ch[1] & 0xff);
326219019Sgabor		break;
327219019Sgabor	default:
328219019Sgabor		/* illegal state */
329219019Sgabor		goto ilseq;
330219019Sgabor	}
331219019Sgabor
332219019Sgabor	if (_citrus_BIG5_check_excludes(ei, (wint_t)wchar) != 0)
333219019Sgabor		goto ilseq;
334219019Sgabor
335219019Sgabor	*s = s0;
336219019Sgabor	psenc->chlen = 0;
337219019Sgabor	if (pwc)
338219019Sgabor		*pwc = wchar;
339219019Sgabor	*nresult = wchar ? c - chlenbak : 0;
340219019Sgabor
341219019Sgabor	return (0);
342219019Sgabor
343219019Sgaborilseq:
344219019Sgabor	psenc->chlen = 0;
345219019Sgabor	*nresult = (size_t)-1;
346219019Sgabor	return (EILSEQ);
347219019Sgabor
348219019Sgaborrestart:
349219019Sgabor	*s = s0;
350219019Sgabor	*nresult = (size_t)-2;
351219019Sgabor	return (0);
352219019Sgabor}
353219019Sgabor
354219019Sgaborstatic int
355219019Sgabor/*ARGSUSED*/
356219019Sgabor_citrus_BIG5_wcrtomb_priv(_BIG5EncodingInfo * __restrict ei,
357219019Sgabor    char * __restrict s,
358219019Sgabor    size_t n, wchar_t wc, _BIG5State * __restrict psenc __unused,
359219019Sgabor    size_t * __restrict nresult)
360219019Sgabor{
361264497Stijl	size_t l;
362219019Sgabor	int ret;
363219019Sgabor
364219019Sgabor	/* check invalid sequence */
365219019Sgabor	if (wc & ~0xffff ||
366219019Sgabor	    _citrus_BIG5_check_excludes(ei, (wint_t)wc) != 0) {
367219019Sgabor		ret = EILSEQ;
368219019Sgabor		goto err;
369219019Sgabor	}
370219019Sgabor
371219019Sgabor	if (wc & 0x8000) {
372219019Sgabor		if (_citrus_BIG5_check(ei, (wc >> 8) & 0xff) != 2 ||
373219019Sgabor		    !_citrus_BIG5_check2(ei, wc & 0xff)) {
374219019Sgabor			ret = EILSEQ;
375219019Sgabor			goto err;
376219019Sgabor		}
377219019Sgabor		l = 2;
378219019Sgabor	} else {
379219019Sgabor		if (wc & ~0xff || !_citrus_BIG5_check(ei, wc & 0xff)) {
380219019Sgabor			ret = EILSEQ;
381219019Sgabor			goto err;
382219019Sgabor		}
383219019Sgabor		l = 1;
384219019Sgabor	}
385219019Sgabor
386219019Sgabor	if (n < l) {
387219019Sgabor		/* bound check failure */
388219019Sgabor		ret = E2BIG;
389219019Sgabor		goto err;
390219019Sgabor	}
391219019Sgabor
392219019Sgabor	if (l == 2) {
393219019Sgabor		s[0] = (wc >> 8) & 0xff;
394219019Sgabor		s[1] = wc & 0xff;
395219019Sgabor	} else
396219019Sgabor		s[0] = wc & 0xff;
397219019Sgabor
398219019Sgabor	*nresult = l;
399219019Sgabor
400219019Sgabor	return (0);
401219019Sgabor
402219019Sgaborerr:
403219019Sgabor	*nresult = (size_t)-1;
404219019Sgabor	return (ret);
405219019Sgabor}
406219019Sgabor
407219019Sgaborstatic __inline int
408219019Sgabor/*ARGSUSED*/
409219019Sgabor_citrus_BIG5_stdenc_wctocs(_BIG5EncodingInfo * __restrict ei __unused,
410219019Sgabor    _csid_t * __restrict csid,
411219019Sgabor    _index_t * __restrict idx, wchar_t wc)
412219019Sgabor{
413219019Sgabor
414219019Sgabor	*csid = (wc < 0x100) ? 0 : 1;
415219019Sgabor	*idx = (_index_t)wc;
416219019Sgabor
417219019Sgabor	return (0);
418219019Sgabor}
419219019Sgabor
420219019Sgaborstatic __inline int
421219019Sgabor/*ARGSUSED*/
422219019Sgabor_citrus_BIG5_stdenc_cstowc(_BIG5EncodingInfo * __restrict ei __unused,
423219019Sgabor    wchar_t * __restrict wc,
424219019Sgabor    _csid_t csid, _index_t idx)
425219019Sgabor{
426219019Sgabor
427219019Sgabor	switch (csid) {
428219019Sgabor	case 0:
429219019Sgabor	case 1:
430219019Sgabor		*wc = (wchar_t)idx;
431219019Sgabor		break;
432219019Sgabor	default:
433219019Sgabor		return (EILSEQ);
434219019Sgabor	}
435219019Sgabor
436219019Sgabor	return (0);
437219019Sgabor}
438219019Sgabor
439219019Sgaborstatic __inline int
440219019Sgabor/*ARGSUSED*/
441219019Sgabor_citrus_BIG5_stdenc_get_state_desc_generic(_BIG5EncodingInfo * __restrict ei __unused,
442219019Sgabor    _BIG5State * __restrict psenc,
443219019Sgabor    int * __restrict rstate)
444219019Sgabor{
445219019Sgabor
446219019Sgabor	*rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL :
447219019Sgabor	    _STDENC_SDGEN_INCOMPLETE_CHAR;
448219019Sgabor	return (0);
449219019Sgabor}
450219019Sgabor
451219019Sgabor/* ----------------------------------------------------------------------
452219019Sgabor * public interface for stdenc
453219019Sgabor */
454219019Sgabor
455219019Sgabor_CITRUS_STDENC_DECLS(BIG5);
456219019Sgabor_CITRUS_STDENC_DEF_OPS(BIG5);
457219019Sgabor
458219019Sgabor#include "citrus_stdenc_template.h"
459