1219019Sgabor/* $FreeBSD$ */
2219019Sgabor/* $NetBSD: citrus_gbk2k.c,v 1.7 2008/06/14 16:01:07 tnozaki Exp $ */
3219019Sgabor
4219019Sgabor/*-
5219019Sgabor * Copyright (c)2003 Citrus Project,
6219019Sgabor * All rights reserved.
7219019Sgabor *
8219019Sgabor * Redistribution and use in source and binary forms, with or without
9219019Sgabor * modification, are permitted provided that the following conditions
10219019Sgabor * are met:
11219019Sgabor * 1. Redistributions of source code must retain the above copyright
12219019Sgabor *    notice, this list of conditions and the following disclaimer.
13219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright
14219019Sgabor *    notice, this list of conditions and the following disclaimer in the
15219019Sgabor *    documentation and/or other materials provided with the distribution.
16219019Sgabor *
17219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20219019Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27219019Sgabor * SUCH DAMAGE.
28219019Sgabor */
29219019Sgabor
30219019Sgabor#include <sys/cdefs.h>
31219019Sgabor#include <sys/types.h>
32219019Sgabor
33219019Sgabor#include <assert.h>
34219019Sgabor#include <errno.h>
35219019Sgabor#include <limits.h>
36219019Sgabor#include <stdbool.h>
37219019Sgabor#include <stddef.h>
38219019Sgabor#include <stdio.h>
39219019Sgabor#include <stdlib.h>
40219019Sgabor#include <string.h>
41219019Sgabor#include <wchar.h>
42219019Sgabor
43219019Sgabor#include "citrus_namespace.h"
44219019Sgabor#include "citrus_types.h"
45219019Sgabor#include "citrus_bcs.h"
46219019Sgabor#include "citrus_module.h"
47219019Sgabor#include "citrus_stdenc.h"
48219019Sgabor#include "citrus_gbk2k.h"
49219019Sgabor
50219019Sgabor
51219019Sgabor/* ----------------------------------------------------------------------
52219019Sgabor * private stuffs used by templates
53219019Sgabor */
54219019Sgabor
55219019Sgabortypedef struct _GBK2KState {
56219019Sgabor	int	 chlen;
57219019Sgabor	char	 ch[4];
58219019Sgabor} _GBK2KState;
59219019Sgabor
60219019Sgabortypedef struct {
61219019Sgabor	int	 mb_cur_max;
62219019Sgabor} _GBK2KEncodingInfo;
63219019Sgabor
64219019Sgabor#define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
65219019Sgabor#define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
66219019Sgabor
67219019Sgabor#define _FUNCNAME(m)			_citrus_GBK2K_##m
68219019Sgabor#define _ENCODING_INFO			_GBK2KEncodingInfo
69219019Sgabor#define _ENCODING_STATE			_GBK2KState
70219019Sgabor#define _ENCODING_MB_CUR_MAX(_ei_)	(_ei_)->mb_cur_max
71219019Sgabor#define _ENCODING_IS_STATE_DEPENDENT	0
72219019Sgabor#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
73219019Sgabor
74219019Sgaborstatic __inline void
75219019Sgabor/*ARGSUSED*/
76219019Sgabor_citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei __unused,
77219019Sgabor    _GBK2KState * __restrict s)
78219019Sgabor{
79219019Sgabor
80219019Sgabor	memset(s, 0, sizeof(*s));
81219019Sgabor}
82219019Sgabor
83219019Sgaborstatic __inline void
84219019Sgabor/*ARGSUSED*/
85219019Sgabor_citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei __unused,
86219019Sgabor    void * __restrict pspriv, const _GBK2KState * __restrict s)
87219019Sgabor{
88219019Sgabor
89219019Sgabor	memcpy(pspriv, (const void *)s, sizeof(*s));
90219019Sgabor}
91219019Sgabor
92219019Sgaborstatic __inline void
93219019Sgabor/*ARGSUSED*/
94219019Sgabor_citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei __unused,
95219019Sgabor    _GBK2KState * __restrict s, const void * __restrict pspriv)
96219019Sgabor{
97219019Sgabor
98219019Sgabor	memcpy((void *)s, pspriv, sizeof(*s));
99219019Sgabor}
100219019Sgabor
101219019Sgaborstatic  __inline bool
102219019Sgabor_mb_singlebyte(int c)
103219019Sgabor{
104219019Sgabor
105219019Sgabor	return ((c & 0xff) <= 0x7f);
106219019Sgabor}
107219019Sgabor
108219019Sgaborstatic __inline bool
109219019Sgabor_mb_leadbyte(int c)
110219019Sgabor{
111219019Sgabor
112219019Sgabor	c &= 0xff;
113219019Sgabor	return (0x81 <= c && c <= 0xfe);
114219019Sgabor}
115219019Sgabor
116219019Sgaborstatic __inline bool
117219019Sgabor_mb_trailbyte(int c)
118219019Sgabor{
119219019Sgabor
120219019Sgabor	c &= 0xff;
121219019Sgabor	return ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfe));
122219019Sgabor}
123219019Sgabor
124219019Sgaborstatic __inline bool
125219019Sgabor_mb_surrogate(int c)
126219019Sgabor{
127219019Sgabor
128219019Sgabor	c &= 0xff;
129219019Sgabor	return (0x30 <= c && c <= 0x39);
130219019Sgabor}
131219019Sgabor
132219019Sgaborstatic __inline int
133219019Sgabor_mb_count(wchar_t v)
134219019Sgabor{
135219019Sgabor	uint32_t c;
136219019Sgabor
137219019Sgabor	c = (uint32_t)v; /* XXX */
138219019Sgabor	if (!(c & 0xffffff00))
139219019Sgabor		return (1);
140219019Sgabor	if (!(c & 0xffff0000))
141219019Sgabor		return (2);
142219019Sgabor	return (4);
143219019Sgabor}
144219019Sgabor
145219019Sgabor#define	_PSENC		(psenc->ch[psenc->chlen - 1])
146219019Sgabor#define	_PUSH_PSENC(c)	(psenc->ch[psenc->chlen++] = (c))
147219019Sgabor
148219019Sgaborstatic int
149219019Sgabor_citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei,
150252583Speter    wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
151219019Sgabor    _GBK2KState * __restrict psenc, size_t * __restrict nresult)
152219019Sgabor{
153252583Speter	const char *s0, *s1;
154219019Sgabor	wchar_t wc;
155219019Sgabor	int chlenbak, len;
156219019Sgabor
157219019Sgabor	s0 = *s;
158219019Sgabor
159219019Sgabor	if (s0 == NULL) {
160219019Sgabor		/* _citrus_GBK2K_init_state(ei, psenc); */
161219019Sgabor		psenc->chlen = 0;
162219019Sgabor		*nresult = 0;
163219019Sgabor		return (0);
164219019Sgabor	}
165219019Sgabor
166219019Sgabor	chlenbak = psenc->chlen;
167219019Sgabor
168219019Sgabor	switch (psenc->chlen) {
169219019Sgabor	case 3:
170219019Sgabor		if (!_mb_leadbyte (_PSENC))
171219019Sgabor			goto invalid;
172219019Sgabor	/* FALLTHROUGH */
173219019Sgabor	case 2:
174219019Sgabor		if (!_mb_surrogate(_PSENC) || _mb_trailbyte(_PSENC))
175219019Sgabor			goto invalid;
176219019Sgabor	/* FALLTHROUGH */
177219019Sgabor	case 1:
178219019Sgabor		if (!_mb_leadbyte (_PSENC))
179219019Sgabor			goto invalid;
180219019Sgabor	/* FALLTHOROUGH */
181219019Sgabor	case 0:
182219019Sgabor		break;
183219019Sgabor	default:
184219019Sgabor		goto invalid;
185219019Sgabor	}
186219019Sgabor
187219019Sgabor	for (;;) {
188219019Sgabor		if (n-- < 1)
189219019Sgabor			goto restart;
190219019Sgabor
191219019Sgabor		_PUSH_PSENC(*s0++);
192219019Sgabor
193219019Sgabor		switch (psenc->chlen) {
194219019Sgabor		case 1:
195219019Sgabor			if (_mb_singlebyte(_PSENC))
196219019Sgabor				goto convert;
197219019Sgabor			if (_mb_leadbyte  (_PSENC))
198219019Sgabor				continue;
199219019Sgabor			goto ilseq;
200219019Sgabor		case 2:
201219019Sgabor			if (_mb_trailbyte (_PSENC))
202219019Sgabor				goto convert;
203219019Sgabor			if (ei->mb_cur_max == 4 &&
204219019Sgabor			    _mb_surrogate (_PSENC))
205219019Sgabor				continue;
206219019Sgabor			goto ilseq;
207219019Sgabor		case 3:
208219019Sgabor			if (_mb_leadbyte  (_PSENC))
209219019Sgabor				continue;
210219019Sgabor			goto ilseq;
211219019Sgabor		case 4:
212219019Sgabor			if (_mb_surrogate (_PSENC))
213219019Sgabor				goto convert;
214219019Sgabor			goto ilseq;
215219019Sgabor		}
216219019Sgabor	}
217219019Sgabor
218219019Sgaborconvert:
219219019Sgabor	len = psenc->chlen;
220219019Sgabor	s1  = &psenc->ch[0];
221219019Sgabor	wc  = 0;
222219019Sgabor	while (len-- > 0)
223219019Sgabor		wc = (wc << 8) | (*s1++ & 0xff);
224219019Sgabor
225219019Sgabor	if (pwc != NULL)
226219019Sgabor		*pwc = wc;
227219019Sgabor	*s = s0;
228219019Sgabor	*nresult = (wc == 0) ? 0 : psenc->chlen - chlenbak;
229219019Sgabor	/* _citrus_GBK2K_init_state(ei, psenc); */
230219019Sgabor	psenc->chlen = 0;
231219019Sgabor
232219019Sgabor	return (0);
233219019Sgabor
234219019Sgaborrestart:
235219019Sgabor	*s = s0;
236219019Sgabor	*nresult = (size_t)-2;
237219019Sgabor
238219019Sgabor	return (0);
239219019Sgabor
240219019Sgaborinvalid:
241219019Sgabor	return (EINVAL);
242219019Sgabor
243219019Sgaborilseq:
244219019Sgabor	*nresult = (size_t)-1;
245219019Sgabor	return (EILSEQ);
246219019Sgabor}
247219019Sgabor
248219019Sgaborstatic int
249219019Sgabor_citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei,
250219019Sgabor    char * __restrict s, size_t n, wchar_t wc, _GBK2KState * __restrict psenc,
251219019Sgabor    size_t * __restrict nresult)
252219019Sgabor{
253219019Sgabor	size_t len;
254219019Sgabor	int ret;
255219019Sgabor
256219019Sgabor	if (psenc->chlen != 0) {
257219019Sgabor		ret = EINVAL;
258219019Sgabor		goto err;
259219019Sgabor	}
260219019Sgabor
261219019Sgabor	len = _mb_count(wc);
262219019Sgabor	if (n < len) {
263219019Sgabor		ret = E2BIG;
264219019Sgabor		goto err;
265219019Sgabor	}
266219019Sgabor
267219019Sgabor	switch (len) {
268219019Sgabor	case 1:
269219019Sgabor		if (!_mb_singlebyte(_PUSH_PSENC(wc     ))) {
270219019Sgabor			ret = EILSEQ;
271219019Sgabor			goto err;
272219019Sgabor		}
273219019Sgabor		break;
274219019Sgabor	case 2:
275219019Sgabor		if (!_mb_leadbyte  (_PUSH_PSENC(wc >> 8)) ||
276219019Sgabor		    !_mb_trailbyte (_PUSH_PSENC(wc))) {
277219019Sgabor			ret = EILSEQ;
278219019Sgabor			goto err;
279219019Sgabor		}
280219019Sgabor		break;
281219019Sgabor	case 4:
282219019Sgabor		if (ei->mb_cur_max != 4 ||
283219019Sgabor		    !_mb_leadbyte  (_PUSH_PSENC(wc >> 24)) ||
284219019Sgabor		    !_mb_surrogate (_PUSH_PSENC(wc >> 16)) ||
285219019Sgabor		    !_mb_leadbyte  (_PUSH_PSENC(wc >>  8)) ||
286219019Sgabor		    !_mb_surrogate (_PUSH_PSENC(wc))) {
287219019Sgabor			ret = EILSEQ;
288219019Sgabor			goto err;
289219019Sgabor		}
290219019Sgabor		break;
291219019Sgabor	}
292219019Sgabor
293219019Sgabor	memcpy(s, psenc->ch, psenc->chlen);
294219019Sgabor	*nresult = psenc->chlen;
295219019Sgabor	/* _citrus_GBK2K_init_state(ei, psenc); */
296219019Sgabor	psenc->chlen = 0;
297219019Sgabor
298219019Sgabor	return (0);
299219019Sgabor
300219019Sgaborerr:
301219019Sgabor	*nresult = (size_t)-1;
302219019Sgabor	return (ret);
303219019Sgabor}
304219019Sgabor
305219019Sgaborstatic __inline int
306219019Sgabor/*ARGSUSED*/
307219019Sgabor_citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei __unused,
308219019Sgabor    _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
309219019Sgabor{
310219019Sgabor	uint8_t ch, cl;
311219019Sgabor
312219019Sgabor	if ((uint32_t)wc < 0x80) {
313219019Sgabor		/* ISO646 */
314219019Sgabor		*csid = 0;
315219019Sgabor		*idx = (_index_t)wc;
316219019Sgabor	} else if ((uint32_t)wc >= 0x10000) {
317219019Sgabor		/* GBKUCS : XXX */
318219019Sgabor		*csid = 3;
319219019Sgabor		*idx = (_index_t)wc;
320219019Sgabor	} else {
321219019Sgabor		ch = (uint8_t)(wc >> 8);
322219019Sgabor		cl = (uint8_t)wc;
323219019Sgabor		if (ch >= 0xA1 && cl >= 0xA1) {
324219019Sgabor			/* EUC G1 */
325219019Sgabor			*csid = 1;
326219019Sgabor			*idx = (_index_t)wc & 0x7F7FU;
327219019Sgabor		} else {
328219019Sgabor			/* extended area (0x8140-) */
329219019Sgabor			*csid = 2;
330219019Sgabor			*idx = (_index_t)wc;
331219019Sgabor		}
332219019Sgabor	}
333219019Sgabor
334219019Sgabor	return (0);
335219019Sgabor}
336219019Sgabor
337219019Sgaborstatic __inline int
338219019Sgabor/*ARGSUSED*/
339219019Sgabor_citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei,
340219019Sgabor    wchar_t * __restrict wc, _csid_t csid, _index_t idx)
341219019Sgabor{
342219019Sgabor
343219019Sgabor	switch (csid) {
344219019Sgabor	case 0:
345219019Sgabor		/* ISO646 */
346219019Sgabor		*wc = (wchar_t)idx;
347219019Sgabor		break;
348219019Sgabor	case 1:
349219019Sgabor		/* EUC G1 */
350219019Sgabor		*wc = (wchar_t)idx | 0x8080U;
351219019Sgabor		break;
352219019Sgabor	case 2:
353219019Sgabor		/* extended area */
354219019Sgabor		*wc = (wchar_t)idx;
355219019Sgabor		break;
356219019Sgabor	case 3:
357219019Sgabor		/* GBKUCS : XXX */
358219019Sgabor		if (ei->mb_cur_max != 4)
359219019Sgabor			return (EINVAL);
360219019Sgabor		*wc = (wchar_t)idx;
361219019Sgabor		break;
362219019Sgabor	default:
363219019Sgabor		return (EILSEQ);
364219019Sgabor	}
365219019Sgabor
366219019Sgabor	return (0);
367219019Sgabor}
368219019Sgabor
369219019Sgaborstatic __inline int
370219019Sgabor/*ARGSUSED*/
371219019Sgabor_citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * __restrict ei __unused,
372219019Sgabor    _GBK2KState * __restrict psenc, int * __restrict rstate)
373219019Sgabor{
374219019Sgabor
375219019Sgabor	*rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL :
376219019Sgabor	    _STDENC_SDGEN_INCOMPLETE_CHAR;
377219019Sgabor	return (0);
378219019Sgabor}
379219019Sgabor
380219019Sgaborstatic int
381219019Sgabor/*ARGSUSED*/
382219019Sgabor_citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei,
383219019Sgabor    const void * __restrict var, size_t lenvar)
384219019Sgabor{
385219019Sgabor	const char *p;
386219019Sgabor
387219019Sgabor	p = var;
388219019Sgabor	memset((void *)ei, 0, sizeof(*ei));
389219019Sgabor	ei->mb_cur_max = 4;
390219019Sgabor	while (lenvar > 0) {
391219019Sgabor		switch (_bcs_tolower(*p)) {
392219019Sgabor		case '2':
393219019Sgabor			MATCH("2byte", ei->mb_cur_max = 2);
394219019Sgabor			break;
395219019Sgabor		}
396219019Sgabor		p++;
397219019Sgabor		lenvar--;
398219019Sgabor	}
399219019Sgabor
400219019Sgabor	return (0);
401219019Sgabor}
402219019Sgabor
403219019Sgaborstatic void
404219019Sgabor/*ARGSUSED*/
405219019Sgabor_citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo *ei __unused)
406219019Sgabor{
407219019Sgabor
408219019Sgabor}
409219019Sgabor
410219019Sgabor/* ----------------------------------------------------------------------
411219019Sgabor * public interface for stdenc
412219019Sgabor */
413219019Sgabor
414219019Sgabor_CITRUS_STDENC_DECLS(GBK2K);
415219019Sgabor_CITRUS_STDENC_DEF_OPS(GBK2K);
416219019Sgabor
417219019Sgabor#include "citrus_stdenc_template.h"
418