1219019Sgabor/* $FreeBSD$ */
2219019Sgabor/*	$NetBSD: citrus_big5.c,v 1.12 2008/06/14 16:01:07 tnozaki Exp $	*/
3219019Sgabor
4219019Sgabor/*-
5219019Sgabor * Copyright (c)2002, 2006 Citrus Project,
6219019Sgabor * All rights reserved.
7219019Sgabor *
8219019Sgabor * Redistribution and use in source and binary forms, with or without
9219019Sgabor * modification, are permitted provided that the following conditions
10219019Sgabor * are met:
11219019Sgabor * 1. Redistributions of source code must retain the above copyright
12219019Sgabor *    notice, this list of conditions and the following disclaimer.
13219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright
14219019Sgabor *    notice, this list of conditions and the following disclaimer in the
15219019Sgabor *    documentation and/or other materials provided with the distribution.
16219019Sgabor *
17219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20219019Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27219019Sgabor * SUCH DAMAGE.
28219019Sgabor */
29219019Sgabor
30219019Sgabor/*-
31219019Sgabor * Copyright (c) 1993
32219019Sgabor *	The Regents of the University of California.  All rights reserved.
33219019Sgabor *
34219019Sgabor * This code is derived from software contributed to Berkeley by
35219019Sgabor * Paul Borman at Krystal Technologies.
36219019Sgabor *
37219019Sgabor * Redistribution and use in source and binary forms, with or without
38219019Sgabor * modification, are permitted provided that the following conditions
39219019Sgabor * are met:
40219019Sgabor * 1. Redistributions of source code must retain the above copyright
41219019Sgabor *    notice, this list of conditions and the following disclaimer.
42219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright
43219019Sgabor *    notice, this list of conditions and the following disclaimer in the
44219019Sgabor *    documentation and/or other materials provided with the distribution.
45219019Sgabor * 3. Neither the name of the University nor the names of its contributors
46219019Sgabor *    may be used to endorse or promote products derived from this software
47219019Sgabor *    without specific prior written permission.
48219019Sgabor *
49219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52219019Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59219019Sgabor * SUCH DAMAGE.
60219019Sgabor */
61219019Sgabor
62219019Sgabor#include <sys/cdefs.h>
63219019Sgabor#include <sys/queue.h>
64219019Sgabor#include <sys/types.h>
65219019Sgabor
66219019Sgabor#include <assert.h>
67219019Sgabor#include <errno.h>
68219019Sgabor#include <limits.h>
69219019Sgabor#include <stddef.h>
70219019Sgabor#include <stdint.h>
71219019Sgabor#include <stdio.h>
72219019Sgabor#include <stdlib.h>
73219019Sgabor#include <string.h>
74219019Sgabor#include <wchar.h>
75219019Sgabor
76219019Sgabor#include "citrus_namespace.h"
77219019Sgabor#include "citrus_prop.h"
78219019Sgabor#include "citrus_types.h"
79219019Sgabor#include "citrus_bcs.h"
80219019Sgabor#include "citrus_module.h"
81219019Sgabor#include "citrus_stdenc.h"
82219019Sgabor#include "citrus_big5.h"
83219019Sgabor
84219019Sgabor/* ----------------------------------------------------------------------
85219019Sgabor * private stuffs used by templates
86219019Sgabor */
87219019Sgabor
88219019Sgabortypedef struct {
89219019Sgabor	int	 chlen;
90219019Sgabor	char	 ch[2];
91219019Sgabor} _BIG5State;
92219019Sgabor
93219019Sgabortypedef struct _BIG5Exclude {
94219019Sgabor	TAILQ_ENTRY(_BIG5Exclude)	 entry;
95219019Sgabor	wint_t				 end;
96219019Sgabor	wint_t				 start;
97219019Sgabor} _BIG5Exclude;
98219019Sgabor
99219019Sgabortypedef TAILQ_HEAD(_BIG5ExcludeList, _BIG5Exclude) _BIG5ExcludeList;
100219019Sgabor
101219019Sgabortypedef struct {
102219019Sgabor	_BIG5ExcludeList	 excludes;
103219019Sgabor	int			 cell[0x100];
104219019Sgabor} _BIG5EncodingInfo;
105219019Sgabor
106219019Sgabor#define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
107219019Sgabor#define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
108219019Sgabor
109219019Sgabor#define _FUNCNAME(m)			_citrus_BIG5_##m
110219019Sgabor#define _ENCODING_INFO			_BIG5EncodingInfo
111219019Sgabor#define _ENCODING_STATE			_BIG5State
112219019Sgabor#define _ENCODING_MB_CUR_MAX(_ei_)	2
113219019Sgabor#define _ENCODING_IS_STATE_DEPENDENT	0
114219019Sgabor#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
115219019Sgabor
116219019Sgabor
117219019Sgaborstatic __inline void
118219019Sgabor/*ARGSUSED*/
119219019Sgabor_citrus_BIG5_init_state(_BIG5EncodingInfo * __restrict ei __unused,
120219019Sgabor    _BIG5State * __restrict s)
121219019Sgabor{
122219019Sgabor
123219019Sgabor	memset(s, 0, sizeof(*s));
124219019Sgabor}
125219019Sgabor
126219019Sgaborstatic __inline void
127219019Sgabor/*ARGSUSED*/
128219019Sgabor_citrus_BIG5_pack_state(_BIG5EncodingInfo * __restrict ei __unused,
129219019Sgabor    void * __restrict pspriv,
130219019Sgabor    const _BIG5State * __restrict s)
131219019Sgabor{
132219019Sgabor
133219019Sgabor	memcpy(pspriv, (const void *)s, sizeof(*s));
134219019Sgabor}
135219019Sgabor
136219019Sgaborstatic __inline void
137219019Sgabor/*ARGSUSED*/
138219019Sgabor_citrus_BIG5_unpack_state(_BIG5EncodingInfo * __restrict ei __unused,
139219019Sgabor    _BIG5State * __restrict s,
140219019Sgabor    const void * __restrict pspriv)
141219019Sgabor{
142219019Sgabor
143219019Sgabor	memcpy((void *)s, pspriv, sizeof(*s));
144219019Sgabor}
145219019Sgabor
146219019Sgaborstatic __inline int
147219019Sgabor_citrus_BIG5_check(_BIG5EncodingInfo *ei, unsigned int c)
148219019Sgabor{
149219019Sgabor
150219019Sgabor	return ((ei->cell[c & 0xFF] & 0x1) ? 2 : 1);
151219019Sgabor}
152219019Sgabor
153219019Sgaborstatic __inline int
154219019Sgabor_citrus_BIG5_check2(_BIG5EncodingInfo *ei, unsigned int c)
155219019Sgabor{
156219019Sgabor
157219019Sgabor	return ((ei->cell[c & 0xFF] & 0x2) ? 1 : 0);
158219019Sgabor}
159219019Sgabor
160219019Sgaborstatic __inline int
161219019Sgabor_citrus_BIG5_check_excludes(_BIG5EncodingInfo *ei, wint_t c)
162219019Sgabor{
163219019Sgabor	_BIG5Exclude *exclude;
164219019Sgabor
165219019Sgabor	TAILQ_FOREACH(exclude, &ei->excludes, entry) {
166219019Sgabor		if (c >= exclude->start && c <= exclude->end)
167219019Sgabor			return (EILSEQ);
168219019Sgabor	}
169219019Sgabor	return (0);
170219019Sgabor}
171219019Sgabor
172219019Sgaborstatic int
173267829Sdelphij_citrus_BIG5_fill_rowcol(void * __restrict ctx, const char * __restrict s,
174219019Sgabor    uint64_t start, uint64_t end)
175219019Sgabor{
176219019Sgabor	_BIG5EncodingInfo *ei;
177219019Sgabor	uint64_t n;
178219019Sgabor	int i;
179219019Sgabor
180219019Sgabor	if (start > 0xFF || end > 0xFF)
181219019Sgabor		return (EINVAL);
182219019Sgabor	ei = (_BIG5EncodingInfo *)ctx;
183219019Sgabor	i = strcmp("row", s) ? 1 : 0;
184219019Sgabor	i = 1 << i;
185219019Sgabor	for (n = start; n <= end; ++n)
186219019Sgabor		ei->cell[n & 0xFF] |= i;
187219019Sgabor	return (0);
188219019Sgabor}
189219019Sgabor
190219019Sgaborstatic int
191219019Sgabor/*ARGSUSED*/
192267829Sdelphij_citrus_BIG5_fill_excludes(void * __restrict ctx,
193219019Sgabor    const char * __restrict s __unused, uint64_t start, uint64_t end)
194219019Sgabor{
195219019Sgabor	_BIG5EncodingInfo *ei;
196219019Sgabor	_BIG5Exclude *exclude;
197219019Sgabor
198219019Sgabor	if (start > 0xFFFF || end > 0xFFFF)
199219019Sgabor		return (EINVAL);
200219019Sgabor	ei = (_BIG5EncodingInfo *)ctx;
201219019Sgabor	exclude = TAILQ_LAST(&ei->excludes, _BIG5ExcludeList);
202219019Sgabor	if (exclude != NULL && (wint_t)start <= exclude->end)
203219019Sgabor		return (EINVAL);
204219019Sgabor	exclude = (void *)malloc(sizeof(*exclude));
205219019Sgabor	if (exclude == NULL)
206219019Sgabor		return (ENOMEM);
207219019Sgabor	exclude->start = (wint_t)start;
208219019Sgabor	exclude->end = (wint_t)end;
209219019Sgabor	TAILQ_INSERT_TAIL(&ei->excludes, exclude, entry);
210219019Sgabor
211219019Sgabor	return (0);
212219019Sgabor}
213219019Sgabor
214219019Sgaborstatic const _citrus_prop_hint_t root_hints[] = {
215219019Sgabor    _CITRUS_PROP_HINT_NUM("row", &_citrus_BIG5_fill_rowcol),
216219019Sgabor    _CITRUS_PROP_HINT_NUM("col", &_citrus_BIG5_fill_rowcol),
217219019Sgabor    _CITRUS_PROP_HINT_NUM("excludes", &_citrus_BIG5_fill_excludes),
218219019Sgabor    _CITRUS_PROP_HINT_END
219219019Sgabor};
220219019Sgabor
221219019Sgaborstatic void
222219019Sgabor/*ARGSUSED*/
223219019Sgabor_citrus_BIG5_encoding_module_uninit(_BIG5EncodingInfo *ei)
224219019Sgabor{
225219019Sgabor	_BIG5Exclude *exclude;
226219019Sgabor
227219019Sgabor	while ((exclude = TAILQ_FIRST(&ei->excludes)) != NULL) {
228219019Sgabor		TAILQ_REMOVE(&ei->excludes, exclude, entry);
229219019Sgabor		free(exclude);
230219019Sgabor	}
231219019Sgabor}
232219019Sgabor
233219019Sgaborstatic int
234219019Sgabor/*ARGSUSED*/
235219019Sgabor_citrus_BIG5_encoding_module_init(_BIG5EncodingInfo * __restrict ei,
236219019Sgabor    const void * __restrict var, size_t lenvar)
237219019Sgabor{
238219019Sgabor	const char *s;
239219019Sgabor	int err;
240219019Sgabor
241219019Sgabor	memset((void *)ei, 0, sizeof(*ei));
242219019Sgabor	TAILQ_INIT(&ei->excludes);
243219019Sgabor
244219019Sgabor	if (lenvar > 0 && var != NULL) {
245219019Sgabor		s = _bcs_skip_ws_len((const char *)var, &lenvar);
246219019Sgabor		if (lenvar > 0 && *s != '\0') {
247219019Sgabor			err = _citrus_prop_parse_variable(
248219019Sgabor			    root_hints, (void *)ei, s, lenvar);
249219019Sgabor			if (err == 0)
250219019Sgabor				return (0);
251219019Sgabor
252219019Sgabor			_citrus_BIG5_encoding_module_uninit(ei);
253219019Sgabor			memset((void *)ei, 0, sizeof(*ei));
254219019Sgabor			TAILQ_INIT(&ei->excludes);
255219019Sgabor		}
256219019Sgabor	}
257219019Sgabor
258219019Sgabor	/* fallback Big5-1984, for backward compatibility. */
259267829Sdelphij	_citrus_BIG5_fill_rowcol(ei, "row", 0xA1, 0xFE);
260267829Sdelphij	_citrus_BIG5_fill_rowcol(ei, "col", 0x40, 0x7E);
261267829Sdelphij	_citrus_BIG5_fill_rowcol(ei, "col", 0xA1, 0xFE);
262219019Sgabor
263219019Sgabor	return (0);
264219019Sgabor}
265219019Sgabor
266219019Sgaborstatic int
267219019Sgabor/*ARGSUSED*/
268219019Sgabor_citrus_BIG5_mbrtowc_priv(_BIG5EncodingInfo * __restrict ei,
269219019Sgabor    wchar_t * __restrict pwc,
270252583Speter    const char ** __restrict s, size_t n,
271219019Sgabor    _BIG5State * __restrict psenc,
272219019Sgabor    size_t * __restrict nresult)
273219019Sgabor{
274219019Sgabor	wchar_t wchar;
275252583Speter	const char *s0;
276219019Sgabor	int c, chlenbak;
277219019Sgabor
278219019Sgabor	s0 = *s;
279219019Sgabor
280219019Sgabor	if (s0 == NULL) {
281219019Sgabor		_citrus_BIG5_init_state(ei, psenc);
282219019Sgabor		*nresult = 0;
283219019Sgabor		return (0);
284219019Sgabor	}
285219019Sgabor
286219019Sgabor	chlenbak = psenc->chlen;
287219019Sgabor
288219019Sgabor	/* make sure we have the first byte in the buffer */
289219019Sgabor	switch (psenc->chlen) {
290219019Sgabor	case 0:
291219019Sgabor		if (n < 1)
292219019Sgabor			goto restart;
293219019Sgabor		psenc->ch[0] = *s0++;
294219019Sgabor		psenc->chlen = 1;
295219019Sgabor		n--;
296219019Sgabor		break;
297219019Sgabor	case 1:
298219019Sgabor		break;
299219019Sgabor	default:
300219019Sgabor		/* illegal state */
301219019Sgabor		goto ilseq;
302219019Sgabor	}
303219019Sgabor
304219019Sgabor	c = _citrus_BIG5_check(ei, psenc->ch[0] & 0xff);
305219019Sgabor	if (c == 0)
306219019Sgabor		goto ilseq;
307219019Sgabor	while (psenc->chlen < c) {
308219019Sgabor		if (n < 1) {
309219019Sgabor			goto restart;
310219019Sgabor		}
311219019Sgabor		psenc->ch[psenc->chlen] = *s0++;
312219019Sgabor		psenc->chlen++;
313219019Sgabor		n--;
314219019Sgabor	}
315219019Sgabor
316219019Sgabor	switch (c) {
317219019Sgabor	case 1:
318219019Sgabor		wchar = psenc->ch[0] & 0xff;
319219019Sgabor		break;
320219019Sgabor	case 2:
321219019Sgabor		if (!_citrus_BIG5_check2(ei, psenc->ch[1] & 0xff))
322219019Sgabor			goto ilseq;
323219019Sgabor		wchar = ((psenc->ch[0] & 0xff) << 8) | (psenc->ch[1] & 0xff);
324219019Sgabor		break;
325219019Sgabor	default:
326219019Sgabor		/* illegal state */
327219019Sgabor		goto ilseq;
328219019Sgabor	}
329219019Sgabor
330219019Sgabor	if (_citrus_BIG5_check_excludes(ei, (wint_t)wchar) != 0)
331219019Sgabor		goto ilseq;
332219019Sgabor
333219019Sgabor	*s = s0;
334219019Sgabor	psenc->chlen = 0;
335219019Sgabor	if (pwc)
336219019Sgabor		*pwc = wchar;
337219019Sgabor	*nresult = wchar ? c - chlenbak : 0;
338219019Sgabor
339219019Sgabor	return (0);
340219019Sgabor
341219019Sgaborilseq:
342219019Sgabor	psenc->chlen = 0;
343219019Sgabor	*nresult = (size_t)-1;
344219019Sgabor	return (EILSEQ);
345219019Sgabor
346219019Sgaborrestart:
347219019Sgabor	*s = s0;
348219019Sgabor	*nresult = (size_t)-2;
349219019Sgabor	return (0);
350219019Sgabor}
351219019Sgabor
352219019Sgaborstatic int
353219019Sgabor/*ARGSUSED*/
354219019Sgabor_citrus_BIG5_wcrtomb_priv(_BIG5EncodingInfo * __restrict ei,
355219019Sgabor    char * __restrict s,
356219019Sgabor    size_t n, wchar_t wc, _BIG5State * __restrict psenc __unused,
357219019Sgabor    size_t * __restrict nresult)
358219019Sgabor{
359219019Sgabor	unsigned char l;
360219019Sgabor	int ret;
361219019Sgabor
362219019Sgabor	/* check invalid sequence */
363219019Sgabor	if (wc & ~0xffff ||
364219019Sgabor	    _citrus_BIG5_check_excludes(ei, (wint_t)wc) != 0) {
365219019Sgabor		ret = EILSEQ;
366219019Sgabor		goto err;
367219019Sgabor	}
368219019Sgabor
369219019Sgabor	if (wc & 0x8000) {
370219019Sgabor		if (_citrus_BIG5_check(ei, (wc >> 8) & 0xff) != 2 ||
371219019Sgabor		    !_citrus_BIG5_check2(ei, wc & 0xff)) {
372219019Sgabor			ret = EILSEQ;
373219019Sgabor			goto err;
374219019Sgabor		}
375219019Sgabor		l = 2;
376219019Sgabor	} else {
377219019Sgabor		if (wc & ~0xff || !_citrus_BIG5_check(ei, wc & 0xff)) {
378219019Sgabor			ret = EILSEQ;
379219019Sgabor			goto err;
380219019Sgabor		}
381219019Sgabor		l = 1;
382219019Sgabor	}
383219019Sgabor
384219019Sgabor	if (n < l) {
385219019Sgabor		/* bound check failure */
386219019Sgabor		ret = E2BIG;
387219019Sgabor		goto err;
388219019Sgabor	}
389219019Sgabor
390219019Sgabor	if (l == 2) {
391219019Sgabor		s[0] = (wc >> 8) & 0xff;
392219019Sgabor		s[1] = wc & 0xff;
393219019Sgabor	} else
394219019Sgabor		s[0] = wc & 0xff;
395219019Sgabor
396219019Sgabor	*nresult = l;
397219019Sgabor
398219019Sgabor	return (0);
399219019Sgabor
400219019Sgaborerr:
401219019Sgabor	*nresult = (size_t)-1;
402219019Sgabor	return (ret);
403219019Sgabor}
404219019Sgabor
405219019Sgaborstatic __inline int
406219019Sgabor/*ARGSUSED*/
407219019Sgabor_citrus_BIG5_stdenc_wctocs(_BIG5EncodingInfo * __restrict ei __unused,
408219019Sgabor    _csid_t * __restrict csid,
409219019Sgabor    _index_t * __restrict idx, wchar_t wc)
410219019Sgabor{
411219019Sgabor
412219019Sgabor	*csid = (wc < 0x100) ? 0 : 1;
413219019Sgabor	*idx = (_index_t)wc;
414219019Sgabor
415219019Sgabor	return (0);
416219019Sgabor}
417219019Sgabor
418219019Sgaborstatic __inline int
419219019Sgabor/*ARGSUSED*/
420219019Sgabor_citrus_BIG5_stdenc_cstowc(_BIG5EncodingInfo * __restrict ei __unused,
421219019Sgabor    wchar_t * __restrict wc,
422219019Sgabor    _csid_t csid, _index_t idx)
423219019Sgabor{
424219019Sgabor
425219019Sgabor	switch (csid) {
426219019Sgabor	case 0:
427219019Sgabor	case 1:
428219019Sgabor		*wc = (wchar_t)idx;
429219019Sgabor		break;
430219019Sgabor	default:
431219019Sgabor		return (EILSEQ);
432219019Sgabor	}
433219019Sgabor
434219019Sgabor	return (0);
435219019Sgabor}
436219019Sgabor
437219019Sgaborstatic __inline int
438219019Sgabor/*ARGSUSED*/
439219019Sgabor_citrus_BIG5_stdenc_get_state_desc_generic(_BIG5EncodingInfo * __restrict ei __unused,
440219019Sgabor    _BIG5State * __restrict psenc,
441219019Sgabor    int * __restrict rstate)
442219019Sgabor{
443219019Sgabor
444219019Sgabor	*rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL :
445219019Sgabor	    _STDENC_SDGEN_INCOMPLETE_CHAR;
446219019Sgabor	return (0);
447219019Sgabor}
448219019Sgabor
449219019Sgabor/* ----------------------------------------------------------------------
450219019Sgabor * public interface for stdenc
451219019Sgabor */
452219019Sgabor
453219019Sgabor_CITRUS_STDENC_DECLS(BIG5);
454219019Sgabor_CITRUS_STDENC_DEF_OPS(BIG5);
455219019Sgabor
456219019Sgabor#include "citrus_stdenc_template.h"
457