citrus_hz.c revision 267829
1/* $FreeBSD: releng/10.0/lib/libiconv_modules/HZ/citrus_hz.c 267829 2014-06-24 19:05:08Z delphij $ */
2/* $NetBSD: citrus_hz.c,v 1.2 2008/06/14 16:01:07 tnozaki Exp $ */
3
4/*-
5 * Copyright (c)2004, 2006 Citrus Project,
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30
31#include <sys/cdefs.h>
32#include <sys/queue.h>
33#include <sys/types.h>
34
35#include <assert.h>
36#include <errno.h>
37#include <limits.h>
38#include <stddef.h>
39#include <stdint.h>
40#include <stdlib.h>
41#include <string.h>
42#include <wchar.h>
43
44#include "citrus_namespace.h"
45#include "citrus_types.h"
46#include "citrus_bcs.h"
47#include "citrus_module.h"
48#include "citrus_stdenc.h"
49
50#include "citrus_hz.h"
51#include "citrus_prop.h"
52
53/*
54 * wchar_t mapping:
55 *
56 * CTRL/ASCII	00000000 00000000 00000000 gxxxxxxx
57 * GB2312	00000000 00000000 0xxxxxxx gxxxxxxx
58 * 94/96*n (~M)	0mmmmmmm 0xxxxxxx 0xxxxxxx gxxxxxxx
59 */
60
61#define ESCAPE_CHAR	'~'
62
63typedef enum {
64	CTRL = 0, ASCII = 1, GB2312 = 2, CS94 = 3, CS96 = 4
65} charset_t;
66
67typedef struct {
68	int	 start;
69	int	 end;
70	int	 width;
71} range_t;
72
73static const range_t ranges[] = {
74#define RANGE(start, end) { start, end, (end - start) + 1 }
75/* CTRL   */ RANGE(0x00, 0x1F),
76/* ASCII  */ RANGE(0x20, 0x7F),
77/* GB2312 */ RANGE(0x21, 0x7E),
78/* CS94   */ RANGE(0x21, 0x7E),
79/* CS96   */ RANGE(0x20, 0x7F),
80#undef RANGE
81};
82
83typedef struct escape_t escape_t;
84typedef struct {
85	charset_t	 charset;
86	escape_t	*escape;
87	ssize_t		 length;
88#define ROWCOL_MAX	3
89} graphic_t;
90
91typedef TAILQ_HEAD(escape_list, escape_t) escape_list;
92struct escape_t {
93	TAILQ_ENTRY(escape_t)	 entry;
94	escape_list		*set;
95	graphic_t		*left;
96	graphic_t		*right;
97	int			 ch;
98};
99
100#define GL(escape)	((escape)->left)
101#define GR(escape)	((escape)->right)
102#define SET(escape)	((escape)->set)
103#define ESC(escape)	((escape)->ch)
104#define INIT(escape)	(TAILQ_FIRST(SET(escape)))
105
106static __inline escape_t *
107find_escape(escape_list *set, int ch)
108{
109	escape_t *escape;
110
111	TAILQ_FOREACH(escape, set, entry) {
112		if (ESC(escape) == ch)
113			break;
114	}
115
116	return (escape);
117}
118
119typedef struct {
120	escape_list	 e0;
121	escape_list	 e1;
122	graphic_t	*ascii;
123	graphic_t	*gb2312;
124} _HZEncodingInfo;
125
126#define E0SET(ei)	(&(ei)->e0)
127#define E1SET(ei)	(&(ei)->e1)
128#define INIT0(ei)	(TAILQ_FIRST(E0SET(ei)))
129#define INIT1(ei)	(TAILQ_FIRST(E1SET(ei)))
130
131typedef struct {
132	escape_t	*inuse;
133	int		 chlen;
134	char		 ch[ROWCOL_MAX];
135} _HZState;
136
137#define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
138#define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
139
140#define _FUNCNAME(m)			_citrus_HZ_##m
141#define _ENCODING_INFO			_HZEncodingInfo
142#define _ENCODING_STATE			_HZState
143#define _ENCODING_MB_CUR_MAX(_ei_)	MB_LEN_MAX
144#define _ENCODING_IS_STATE_DEPENDENT		1
145#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	((_ps_)->inuse == NULL)
146
147static __inline void
148_citrus_HZ_init_state(_HZEncodingInfo * __restrict ei,
149    _HZState * __restrict psenc)
150{
151
152	psenc->chlen = 0;
153	psenc->inuse = INIT0(ei);
154}
155
156static __inline void
157/*ARGSUSED*/
158_citrus_HZ_pack_state(_HZEncodingInfo * __restrict ei __unused,
159    void *__restrict pspriv, const _HZState * __restrict psenc)
160{
161
162	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
163}
164
165static __inline void
166/*ARGSUSED*/
167_citrus_HZ_unpack_state(_HZEncodingInfo * __restrict ei __unused,
168    _HZState * __restrict psenc, const void * __restrict pspriv)
169{
170
171	memcpy((void *)psenc, pspriv, sizeof(*psenc));
172}
173
174static int
175_citrus_HZ_mbrtowc_priv(_HZEncodingInfo * __restrict ei,
176    wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
177    _HZState * __restrict psenc, size_t * __restrict nresult)
178{
179	escape_t *candidate, *init;
180	graphic_t *graphic;
181	const range_t *range;
182	const char *s0;
183	wchar_t wc;
184	int bit, ch, head, len, tail;
185
186	if (*s == NULL) {
187		_citrus_HZ_init_state(ei, psenc);
188		*nresult = 1;
189		return (0);
190	}
191	s0 = *s;
192	if (psenc->chlen < 0 || psenc->inuse == NULL)
193		return (EINVAL);
194
195	wc = (wchar_t)0;
196	bit = head = tail = 0;
197	graphic = NULL;
198	for (len = 0; len <= MB_LEN_MAX;) {
199		if (psenc->chlen == tail) {
200			if (n-- < 1) {
201				*s = s0;
202				*nresult = (size_t)-2;
203				return (0);
204			}
205			psenc->ch[psenc->chlen++] = *s0++;
206			++len;
207		}
208		ch = (unsigned char)psenc->ch[tail++];
209		if (tail == 1) {
210			if ((ch & ~0x80) <= 0x1F) {
211				if (psenc->inuse != INIT0(ei))
212					break;
213				wc = (wchar_t)ch;
214				goto done;
215			}
216			if (ch & 0x80) {
217				graphic = GR(psenc->inuse);
218				bit = 0x80;
219				ch &= ~0x80;
220			} else {
221				graphic = GL(psenc->inuse);
222				if (ch == ESCAPE_CHAR)
223					continue;
224				bit = 0x0;
225			}
226			if (graphic == NULL)
227				break;
228		} else if (tail == 2 && psenc->ch[0] == ESCAPE_CHAR) {
229			if (tail < psenc->chlen)
230				return (EINVAL);
231			if (ch == ESCAPE_CHAR) {
232				++head;
233			} else if (ch == '\n') {
234				if (psenc->inuse != INIT0(ei))
235					break;
236				tail = psenc->chlen = 0;
237				continue;
238			} else {
239				candidate = NULL;
240				init = INIT0(ei);
241				if (psenc->inuse == init) {
242					init = INIT1(ei);
243				} else if (INIT(psenc->inuse) == init) {
244					if (ESC(init) != ch)
245						break;
246					candidate = init;
247				}
248				if (candidate == NULL) {
249					candidate = find_escape(
250					    SET(psenc->inuse), ch);
251					if (candidate == NULL) {
252						if (init == NULL ||
253						    ESC(init) != ch)
254							break;
255						candidate = init;
256					}
257				}
258				psenc->inuse = candidate;
259				tail = psenc->chlen = 0;
260				continue;
261			}
262		} else if (ch & 0x80) {
263			if (graphic != GR(psenc->inuse))
264				break;
265			ch &= ~0x80;
266		} else {
267			if (graphic != GL(psenc->inuse))
268				break;
269		}
270		range = &ranges[(size_t)graphic->charset];
271		if (range->start > ch || range->end < ch)
272			break;
273		wc <<= 8;
274		wc |= ch;
275		if (graphic->length == (tail - head)) {
276			if (graphic->charset > GB2312)
277				bit |= ESC(psenc->inuse) << 24;
278			wc |= bit;
279			goto done;
280		}
281	}
282	*nresult = (size_t)-1;
283	return (EILSEQ);
284done:
285	if (tail < psenc->chlen)
286		return (EINVAL);
287	*s = s0;
288	if (pwc != NULL)
289		*pwc = wc;
290	psenc->chlen = 0;
291	*nresult = (wc == 0) ? 0 : len;
292
293	return (0);
294}
295
296static int
297_citrus_HZ_wcrtomb_priv(_HZEncodingInfo * __restrict ei,
298    char * __restrict s, size_t n, wchar_t wc,
299    _HZState * __restrict psenc, size_t * __restrict nresult)
300{
301	escape_t *candidate, *init;
302	graphic_t *graphic;
303	const range_t *range;
304	size_t len;
305	int bit, ch;
306
307	if (psenc->chlen != 0 || psenc->inuse == NULL)
308		return (EINVAL);
309	if (wc & 0x80) {
310		bit = 0x80;
311		wc &= ~0x80;
312	} else {
313		bit = 0x0;
314	}
315	if ((uint32_t)wc <= 0x1F) {
316		candidate = INIT0(ei);
317		graphic = (bit == 0) ? candidate->left : candidate->right;
318		if (graphic == NULL)
319			goto ilseq;
320		range = &ranges[(size_t)CTRL];
321		len = 1;
322	} else if ((uint32_t)wc <= 0x7F) {
323		graphic = ei->ascii;
324		if (graphic == NULL)
325			goto ilseq;
326		candidate = graphic->escape;
327		range = &ranges[(size_t)graphic->charset];
328		len = graphic->length;
329	} else if ((uint32_t)wc <= 0x7F7F) {
330		graphic = ei->gb2312;
331		if (graphic == NULL)
332			goto ilseq;
333		candidate = graphic->escape;
334		range = &ranges[(size_t)graphic->charset];
335		len = graphic->length;
336	} else {
337		ch = (wc >> 24) & 0xFF;
338		candidate = find_escape(E0SET(ei), ch);
339		if (candidate == NULL) {
340			candidate = find_escape(E1SET(ei), ch);
341			if (candidate == NULL)
342				goto ilseq;
343		}
344		wc &= ~0xFF000000;
345		graphic = (bit == 0) ? candidate->left : candidate->right;
346		if (graphic == NULL)
347			goto ilseq;
348		range = &ranges[(size_t)graphic->charset];
349		len = graphic->length;
350	}
351	if (psenc->inuse != candidate) {
352		init = INIT0(ei);
353		if (SET(psenc->inuse) == SET(candidate)) {
354			if (INIT(psenc->inuse) != init ||
355			    psenc->inuse == init || candidate == init)
356				init = NULL;
357		} else if (candidate == (init = INIT(candidate))) {
358			init = NULL;
359		}
360		if (init != NULL) {
361			if (n < 2)
362				return (E2BIG);
363			n -= 2;
364			psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
365			psenc->ch[psenc->chlen++] = ESC(init);
366		}
367		if (n < 2)
368			return (E2BIG);
369		n -= 2;
370		psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
371		psenc->ch[psenc->chlen++] = ESC(candidate);
372		psenc->inuse = candidate;
373	}
374	if (n < len)
375		return (E2BIG);
376	while (len-- > 0) {
377		ch = (wc >> (len * 8)) & 0xFF;
378		if (range->start > ch || range->end < ch)
379			goto ilseq;
380		psenc->ch[psenc->chlen++] = ch | bit;
381	}
382	memcpy(s, psenc->ch, psenc->chlen);
383	*nresult = psenc->chlen;
384	psenc->chlen = 0;
385
386	return (0);
387
388ilseq:
389	*nresult = (size_t)-1;
390	return (EILSEQ);
391}
392
393static __inline int
394_citrus_HZ_put_state_reset(_HZEncodingInfo * __restrict ei,
395    char * __restrict s, size_t n, _HZState * __restrict psenc,
396    size_t * __restrict nresult)
397{
398	escape_t *candidate;
399
400	if (psenc->chlen != 0 || psenc->inuse == NULL)
401		return (EINVAL);
402	candidate = INIT0(ei);
403	if (psenc->inuse != candidate) {
404		if (n < 2)
405			return (E2BIG);
406		n -= 2;
407		psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
408		psenc->ch[psenc->chlen++] = ESC(candidate);
409	}
410	if (n < 1)
411		return (E2BIG);
412	if (psenc->chlen > 0)
413		memcpy(s, psenc->ch, psenc->chlen);
414	*nresult = psenc->chlen;
415	_citrus_HZ_init_state(ei, psenc);
416
417	return (0);
418}
419
420static __inline int
421_citrus_HZ_stdenc_get_state_desc_generic(_HZEncodingInfo * __restrict ei,
422    _HZState * __restrict psenc, int * __restrict rstate)
423{
424
425	if (psenc->chlen < 0 || psenc->inuse == NULL)
426		return (EINVAL);
427	*rstate = (psenc->chlen == 0)
428	    ? ((psenc->inuse == INIT0(ei))
429	        ? _STDENC_SDGEN_INITIAL
430	        : _STDENC_SDGEN_STABLE)
431	    : ((psenc->ch[0] == ESCAPE_CHAR)
432	        ? _STDENC_SDGEN_INCOMPLETE_SHIFT
433	        : _STDENC_SDGEN_INCOMPLETE_CHAR);
434
435	return (0);
436}
437
438static __inline int
439/*ARGSUSED*/
440_citrus_HZ_stdenc_wctocs(_HZEncodingInfo * __restrict ei __unused,
441    _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
442{
443	int bit;
444
445	if (wc & 0x80) {
446		bit = 0x80;
447		wc &= ~0x80;
448	} else
449		bit = 0x0;
450	if ((uint32_t)wc <= 0x7F) {
451		*csid = (_csid_t)bit;
452		*idx = (_index_t)wc;
453	} else if ((uint32_t)wc <= 0x7F7F) {
454		*csid = (_csid_t)(bit | 0x8000);
455		*idx = (_index_t)wc;
456	} else {
457		*csid = (_index_t)(wc & ~0x00FFFF7F);
458		*idx = (_csid_t)(wc & 0x00FFFF7F);
459	}
460
461	return (0);
462}
463
464static __inline int
465/*ARGSUSED*/
466_citrus_HZ_stdenc_cstowc(_HZEncodingInfo * __restrict ei __unused,
467    wchar_t * __restrict wc, _csid_t csid, _index_t idx)
468{
469
470	*wc = (wchar_t)idx;
471	switch (csid) {
472	case 0x80:
473	case 0x8080:
474		*wc |= (wchar_t)0x80;
475		/*FALLTHROUGH*/
476	case 0x0:
477	case 0x8000:
478		break;
479	default:
480		*wc |= (wchar_t)csid;
481	}
482
483	return (0);
484}
485
486static void
487_citrus_HZ_encoding_module_uninit(_HZEncodingInfo *ei)
488{
489	escape_t *escape;
490
491	while ((escape = TAILQ_FIRST(E0SET(ei))) != NULL) {
492		TAILQ_REMOVE(E0SET(ei), escape, entry);
493		free(GL(escape));
494		free(GR(escape));
495		free(escape);
496	}
497	while ((escape = TAILQ_FIRST(E1SET(ei))) != NULL) {
498		TAILQ_REMOVE(E1SET(ei), escape, entry);
499		free(GL(escape));
500		free(GR(escape));
501		free(escape);
502	}
503}
504
505static int
506_citrus_HZ_parse_char(void *context, const char *name __unused, const char *s)
507{
508	escape_t *escape;
509	void **p;
510
511	p = (void **)context;
512	escape = (escape_t *)p[0];
513	if (escape->ch != '\0')
514		return (EINVAL);
515	escape->ch = *s++;
516	if (escape->ch == ESCAPE_CHAR || *s != '\0')
517		return (EINVAL);
518
519	return (0);
520}
521
522static int
523_citrus_HZ_parse_graphic(void *context, const char *name, const char *s)
524{
525	_HZEncodingInfo *ei;
526	escape_t *escape;
527	graphic_t *graphic;
528	void **p;
529
530	p = (void **)context;
531	escape = (escape_t *)p[0];
532	ei = (_HZEncodingInfo *)p[1];
533	graphic = malloc(sizeof(*graphic));
534	if (graphic == NULL)
535		return (ENOMEM);
536	memset(graphic, 0, sizeof(*graphic));
537	if (strcmp("GL", name) == 0) {
538		if (GL(escape) != NULL)
539			goto release;
540		GL(escape) = graphic;
541	} else if (strcmp("GR", name) == 0) {
542		if (GR(escape) != NULL)
543			goto release;
544		GR(escape) = graphic;
545	} else {
546release:
547		free(graphic);
548		return (EINVAL);
549	}
550	graphic->escape = escape;
551	if (_bcs_strncasecmp("ASCII", s, 5) == 0) {
552		if (s[5] != '\0')
553			return (EINVAL);
554		graphic->charset = ASCII;
555		graphic->length = 1;
556		ei->ascii = graphic;
557		return (0);
558	} else if (_bcs_strncasecmp("GB2312", s, 6) == 0) {
559		if (s[6] != '\0')
560			return (EINVAL);
561		graphic->charset = GB2312;
562		graphic->length = 2;
563		ei->gb2312 = graphic;
564		return (0);
565	} else if (strncmp("94*", s, 3) == 0)
566		graphic->charset = CS94;
567	else if (strncmp("96*", s, 3) == 0)
568		graphic->charset = CS96;
569	else
570		return (EINVAL);
571	s += 3;
572	switch(*s) {
573	case '1': case '2': case '3':
574		graphic->length = (size_t)(*s - '0');
575		if (*++s == '\0')
576			break;
577	/*FALLTHROUGH*/
578	default:
579		return (EINVAL);
580	}
581	return (0);
582}
583
584static const _citrus_prop_hint_t escape_hints[] = {
585_CITRUS_PROP_HINT_STR("CH", &_citrus_HZ_parse_char),
586_CITRUS_PROP_HINT_STR("GL", &_citrus_HZ_parse_graphic),
587_CITRUS_PROP_HINT_STR("GR", &_citrus_HZ_parse_graphic),
588_CITRUS_PROP_HINT_END
589};
590
591static int
592_citrus_HZ_parse_escape(void *context, const char *name, const char *s)
593{
594	_HZEncodingInfo *ei;
595	escape_t *escape;
596	void *p[2];
597
598	ei = (_HZEncodingInfo *)context;
599	escape = malloc(sizeof(*escape));
600	if (escape == NULL)
601		return (EINVAL);
602	memset(escape, 0, sizeof(*escape));
603	if (strcmp("0", name) == 0) {
604		escape->set = E0SET(ei);
605		TAILQ_INSERT_TAIL(E0SET(ei), escape, entry);
606	} else if (strcmp("1", name) == 0) {
607		escape->set = E1SET(ei);
608		TAILQ_INSERT_TAIL(E1SET(ei), escape, entry);
609	} else {
610		free(escape);
611		return (EINVAL);
612	}
613	p[0] = (void *)escape;
614	p[1] = (void *)ei;
615	return (_citrus_prop_parse_variable(
616	    escape_hints, (void *)&p[0], s, strlen(s)));
617}
618
619static const _citrus_prop_hint_t root_hints[] = {
620_CITRUS_PROP_HINT_STR("0", &_citrus_HZ_parse_escape),
621_CITRUS_PROP_HINT_STR("1", &_citrus_HZ_parse_escape),
622_CITRUS_PROP_HINT_END
623};
624
625static int
626_citrus_HZ_encoding_module_init(_HZEncodingInfo * __restrict ei,
627    const void * __restrict var, size_t lenvar)
628{
629	int errnum;
630
631	memset(ei, 0, sizeof(*ei));
632	TAILQ_INIT(E0SET(ei));
633	TAILQ_INIT(E1SET(ei));
634	errnum = _citrus_prop_parse_variable(
635	    root_hints, (void *)ei, var, lenvar);
636	if (errnum != 0)
637		_citrus_HZ_encoding_module_uninit(ei);
638	return (errnum);
639}
640
641/* ----------------------------------------------------------------------
642 * public interface for stdenc
643 */
644
645_CITRUS_STDENC_DECLS(HZ);
646_CITRUS_STDENC_DEF_OPS(HZ);
647
648#include "citrus_stdenc_template.h"
649