citrus_hz.c revision 282275
1/* $FreeBSD: stable/10/lib/libiconv_modules/HZ/citrus_hz.c 282275 2015-04-30 16:08:47Z tijl $ */
2/* $NetBSD: citrus_hz.c,v 1.2 2008/06/14 16:01:07 tnozaki Exp $ */
3
4/*-
5 * Copyright (c)2004, 2006 Citrus Project,
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30
31#include <sys/cdefs.h>
32#include <sys/queue.h>
33#include <sys/types.h>
34
35#include <assert.h>
36#include <errno.h>
37#include <limits.h>
38#include <stddef.h>
39#include <stdint.h>
40#include <stdlib.h>
41#include <string.h>
42#include <wchar.h>
43
44#include "citrus_namespace.h"
45#include "citrus_types.h"
46#include "citrus_bcs.h"
47#include "citrus_module.h"
48#include "citrus_stdenc.h"
49
50#include "citrus_hz.h"
51#include "citrus_prop.h"
52
53/*
54 * wchar_t mapping:
55 *
56 * CTRL/ASCII	00000000 00000000 00000000 gxxxxxxx
57 * GB2312	00000000 00000000 0xxxxxxx gxxxxxxx
58 * 94/96*n (~M)	0mmmmmmm 0xxxxxxx 0xxxxxxx gxxxxxxx
59 */
60
61#define ESCAPE_CHAR	'~'
62
63typedef enum {
64	CTRL = 0, ASCII = 1, GB2312 = 2, CS94 = 3, CS96 = 4
65} charset_t;
66
67typedef struct {
68	int	 start;
69	int	 end;
70	int	 width;
71} range_t;
72
73static const range_t ranges[] = {
74#define RANGE(start, end) { start, end, (end - start) + 1 }
75/* CTRL   */ RANGE(0x00, 0x1F),
76/* ASCII  */ RANGE(0x20, 0x7F),
77/* GB2312 */ RANGE(0x21, 0x7E),
78/* CS94   */ RANGE(0x21, 0x7E),
79/* CS96   */ RANGE(0x20, 0x7F),
80#undef RANGE
81};
82
83typedef struct escape_t escape_t;
84typedef struct {
85	charset_t	 charset;
86	escape_t	*escape;
87	ssize_t		 length;
88#define ROWCOL_MAX	3
89} graphic_t;
90
91typedef TAILQ_HEAD(escape_list, escape_t) escape_list;
92struct escape_t {
93	TAILQ_ENTRY(escape_t)	 entry;
94	escape_list		*set;
95	graphic_t		*left;
96	graphic_t		*right;
97	int			 ch;
98};
99
100#define GL(escape)	((escape)->left)
101#define GR(escape)	((escape)->right)
102#define SET(escape)	((escape)->set)
103#define ESC(escape)	((escape)->ch)
104#define INIT(escape)	(TAILQ_FIRST(SET(escape)))
105
106static __inline escape_t *
107find_escape(escape_list *set, int ch)
108{
109	escape_t *escape;
110
111	TAILQ_FOREACH(escape, set, entry) {
112		if (ESC(escape) == ch)
113			break;
114	}
115
116	return (escape);
117}
118
119typedef struct {
120	escape_list	 e0;
121	escape_list	 e1;
122	graphic_t	*ascii;
123	graphic_t	*gb2312;
124} _HZEncodingInfo;
125
126#define E0SET(ei)	(&(ei)->e0)
127#define E1SET(ei)	(&(ei)->e1)
128#define INIT0(ei)	(TAILQ_FIRST(E0SET(ei)))
129#define INIT1(ei)	(TAILQ_FIRST(E1SET(ei)))
130
131typedef struct {
132	escape_t	*inuse;
133	int		 chlen;
134	char		 ch[ROWCOL_MAX];
135} _HZState;
136
137#define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
138#define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
139
140#define _FUNCNAME(m)			_citrus_HZ_##m
141#define _ENCODING_INFO			_HZEncodingInfo
142#define _ENCODING_STATE			_HZState
143#define _ENCODING_MB_CUR_MAX(_ei_)	MB_LEN_MAX
144#define _ENCODING_IS_STATE_DEPENDENT		1
145#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	((_ps_)->inuse == NULL)
146
147static __inline void
148_citrus_HZ_init_state(_HZEncodingInfo * __restrict ei,
149    _HZState * __restrict psenc)
150{
151
152	psenc->chlen = 0;
153	psenc->inuse = INIT0(ei);
154}
155
156#if 0
157static __inline void
158/*ARGSUSED*/
159_citrus_HZ_pack_state(_HZEncodingInfo * __restrict ei __unused,
160    void *__restrict pspriv, const _HZState * __restrict psenc)
161{
162
163	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
164}
165
166static __inline void
167/*ARGSUSED*/
168_citrus_HZ_unpack_state(_HZEncodingInfo * __restrict ei __unused,
169    _HZState * __restrict psenc, const void * __restrict pspriv)
170{
171
172	memcpy((void *)psenc, pspriv, sizeof(*psenc));
173}
174#endif
175
176static int
177_citrus_HZ_mbrtowc_priv(_HZEncodingInfo * __restrict ei,
178    wchar_t * __restrict pwc, char ** __restrict s, size_t n,
179    _HZState * __restrict psenc, size_t * __restrict nresult)
180{
181	escape_t *candidate, *init;
182	graphic_t *graphic;
183	const range_t *range;
184	char *s0;
185	wchar_t wc;
186	int bit, ch, head, len, tail;
187
188	if (*s == NULL) {
189		_citrus_HZ_init_state(ei, psenc);
190		*nresult = 1;
191		return (0);
192	}
193	s0 = *s;
194	if (psenc->chlen < 0 || psenc->inuse == NULL)
195		return (EINVAL);
196
197	wc = (wchar_t)0;
198	bit = head = tail = 0;
199	graphic = NULL;
200	for (len = 0; len <= MB_LEN_MAX;) {
201		if (psenc->chlen == tail) {
202			if (n-- < 1) {
203				*s = s0;
204				*nresult = (size_t)-2;
205				return (0);
206			}
207			psenc->ch[psenc->chlen++] = *s0++;
208			++len;
209		}
210		ch = (unsigned char)psenc->ch[tail++];
211		if (tail == 1) {
212			if ((ch & ~0x80) <= 0x1F) {
213				if (psenc->inuse != INIT0(ei))
214					break;
215				wc = (wchar_t)ch;
216				goto done;
217			}
218			if (ch & 0x80) {
219				graphic = GR(psenc->inuse);
220				bit = 0x80;
221				ch &= ~0x80;
222			} else {
223				graphic = GL(psenc->inuse);
224				if (ch == ESCAPE_CHAR)
225					continue;
226				bit = 0x0;
227			}
228			if (graphic == NULL)
229				break;
230		} else if (tail == 2 && psenc->ch[0] == ESCAPE_CHAR) {
231			if (tail < psenc->chlen)
232				return (EINVAL);
233			if (ch == ESCAPE_CHAR) {
234				++head;
235			} else if (ch == '\n') {
236				if (psenc->inuse != INIT0(ei))
237					break;
238				tail = psenc->chlen = 0;
239				continue;
240			} else {
241				candidate = NULL;
242				init = INIT0(ei);
243				if (psenc->inuse == init) {
244					init = INIT1(ei);
245				} else if (INIT(psenc->inuse) == init) {
246					if (ESC(init) != ch)
247						break;
248					candidate = init;
249				}
250				if (candidate == NULL) {
251					candidate = find_escape(
252					    SET(psenc->inuse), ch);
253					if (candidate == NULL) {
254						if (init == NULL ||
255						    ESC(init) != ch)
256							break;
257						candidate = init;
258					}
259				}
260				psenc->inuse = candidate;
261				tail = psenc->chlen = 0;
262				continue;
263			}
264		} else if (ch & 0x80) {
265			if (graphic != GR(psenc->inuse))
266				break;
267			ch &= ~0x80;
268		} else {
269			if (graphic != GL(psenc->inuse))
270				break;
271		}
272		range = &ranges[(size_t)graphic->charset];
273		if (range->start > ch || range->end < ch)
274			break;
275		wc <<= 8;
276		wc |= ch;
277		if (graphic->length == (tail - head)) {
278			if (graphic->charset > GB2312)
279				bit |= ESC(psenc->inuse) << 24;
280			wc |= bit;
281			goto done;
282		}
283	}
284	*nresult = (size_t)-1;
285	return (EILSEQ);
286done:
287	if (tail < psenc->chlen)
288		return (EINVAL);
289	*s = s0;
290	if (pwc != NULL)
291		*pwc = wc;
292	psenc->chlen = 0;
293	*nresult = (wc == 0) ? 0 : len;
294
295	return (0);
296}
297
298static int
299_citrus_HZ_wcrtomb_priv(_HZEncodingInfo * __restrict ei,
300    char * __restrict s, size_t n, wchar_t wc,
301    _HZState * __restrict psenc, size_t * __restrict nresult)
302{
303	escape_t *candidate, *init;
304	graphic_t *graphic;
305	const range_t *range;
306	size_t len;
307	int bit, ch;
308
309	if (psenc->chlen != 0 || psenc->inuse == NULL)
310		return (EINVAL);
311	if (wc & 0x80) {
312		bit = 0x80;
313		wc &= ~0x80;
314	} else {
315		bit = 0x0;
316	}
317	if ((uint32_t)wc <= 0x1F) {
318		candidate = INIT0(ei);
319		graphic = (bit == 0) ? candidate->left : candidate->right;
320		if (graphic == NULL)
321			goto ilseq;
322		range = &ranges[(size_t)CTRL];
323		len = 1;
324	} else if ((uint32_t)wc <= 0x7F) {
325		graphic = ei->ascii;
326		if (graphic == NULL)
327			goto ilseq;
328		candidate = graphic->escape;
329		range = &ranges[(size_t)graphic->charset];
330		len = graphic->length;
331	} else if ((uint32_t)wc <= 0x7F7F) {
332		graphic = ei->gb2312;
333		if (graphic == NULL)
334			goto ilseq;
335		candidate = graphic->escape;
336		range = &ranges[(size_t)graphic->charset];
337		len = graphic->length;
338	} else {
339		ch = (wc >> 24) & 0xFF;
340		candidate = find_escape(E0SET(ei), ch);
341		if (candidate == NULL) {
342			candidate = find_escape(E1SET(ei), ch);
343			if (candidate == NULL)
344				goto ilseq;
345		}
346		wc &= ~0xFF000000;
347		graphic = (bit == 0) ? candidate->left : candidate->right;
348		if (graphic == NULL)
349			goto ilseq;
350		range = &ranges[(size_t)graphic->charset];
351		len = graphic->length;
352	}
353	if (psenc->inuse != candidate) {
354		init = INIT0(ei);
355		if (SET(psenc->inuse) == SET(candidate)) {
356			if (INIT(psenc->inuse) != init ||
357			    psenc->inuse == init || candidate == init)
358				init = NULL;
359		} else if (candidate == (init = INIT(candidate))) {
360			init = NULL;
361		}
362		if (init != NULL) {
363			if (n < 2)
364				return (E2BIG);
365			n -= 2;
366			psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
367			psenc->ch[psenc->chlen++] = ESC(init);
368		}
369		if (n < 2)
370			return (E2BIG);
371		n -= 2;
372		psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
373		psenc->ch[psenc->chlen++] = ESC(candidate);
374		psenc->inuse = candidate;
375	}
376	if (n < len)
377		return (E2BIG);
378	while (len-- > 0) {
379		ch = (wc >> (len * 8)) & 0xFF;
380		if (range->start > ch || range->end < ch)
381			goto ilseq;
382		psenc->ch[psenc->chlen++] = ch | bit;
383	}
384	memcpy(s, psenc->ch, psenc->chlen);
385	*nresult = psenc->chlen;
386	psenc->chlen = 0;
387
388	return (0);
389
390ilseq:
391	*nresult = (size_t)-1;
392	return (EILSEQ);
393}
394
395static __inline int
396_citrus_HZ_put_state_reset(_HZEncodingInfo * __restrict ei,
397    char * __restrict s, size_t n, _HZState * __restrict psenc,
398    size_t * __restrict nresult)
399{
400	escape_t *candidate;
401
402	if (psenc->chlen != 0 || psenc->inuse == NULL)
403		return (EINVAL);
404	candidate = INIT0(ei);
405	if (psenc->inuse != candidate) {
406		if (n < 2)
407			return (E2BIG);
408		n -= 2;
409		psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
410		psenc->ch[psenc->chlen++] = ESC(candidate);
411	}
412	if (n < 1)
413		return (E2BIG);
414	if (psenc->chlen > 0)
415		memcpy(s, psenc->ch, psenc->chlen);
416	*nresult = psenc->chlen;
417	_citrus_HZ_init_state(ei, psenc);
418
419	return (0);
420}
421
422static __inline int
423_citrus_HZ_stdenc_get_state_desc_generic(_HZEncodingInfo * __restrict ei,
424    _HZState * __restrict psenc, int * __restrict rstate)
425{
426
427	if (psenc->chlen < 0 || psenc->inuse == NULL)
428		return (EINVAL);
429	*rstate = (psenc->chlen == 0)
430	    ? ((psenc->inuse == INIT0(ei))
431	        ? _STDENC_SDGEN_INITIAL
432	        : _STDENC_SDGEN_STABLE)
433	    : ((psenc->ch[0] == ESCAPE_CHAR)
434	        ? _STDENC_SDGEN_INCOMPLETE_SHIFT
435	        : _STDENC_SDGEN_INCOMPLETE_CHAR);
436
437	return (0);
438}
439
440static __inline int
441/*ARGSUSED*/
442_citrus_HZ_stdenc_wctocs(_HZEncodingInfo * __restrict ei __unused,
443    _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
444{
445	int bit;
446
447	if (wc & 0x80) {
448		bit = 0x80;
449		wc &= ~0x80;
450	} else
451		bit = 0x0;
452	if ((uint32_t)wc <= 0x7F) {
453		*csid = (_csid_t)bit;
454		*idx = (_index_t)wc;
455	} else if ((uint32_t)wc <= 0x7F7F) {
456		*csid = (_csid_t)(bit | 0x8000);
457		*idx = (_index_t)wc;
458	} else {
459		*csid = (_index_t)(wc & ~0x00FFFF7F);
460		*idx = (_csid_t)(wc & 0x00FFFF7F);
461	}
462
463	return (0);
464}
465
466static __inline int
467/*ARGSUSED*/
468_citrus_HZ_stdenc_cstowc(_HZEncodingInfo * __restrict ei __unused,
469    wchar_t * __restrict wc, _csid_t csid, _index_t idx)
470{
471
472	*wc = (wchar_t)idx;
473	switch (csid) {
474	case 0x80:
475	case 0x8080:
476		*wc |= (wchar_t)0x80;
477		/*FALLTHROUGH*/
478	case 0x0:
479	case 0x8000:
480		break;
481	default:
482		*wc |= (wchar_t)csid;
483	}
484
485	return (0);
486}
487
488static void
489_citrus_HZ_encoding_module_uninit(_HZEncodingInfo *ei)
490{
491	escape_t *escape;
492
493	while ((escape = TAILQ_FIRST(E0SET(ei))) != NULL) {
494		TAILQ_REMOVE(E0SET(ei), escape, entry);
495		free(GL(escape));
496		free(GR(escape));
497		free(escape);
498	}
499	while ((escape = TAILQ_FIRST(E1SET(ei))) != NULL) {
500		TAILQ_REMOVE(E1SET(ei), escape, entry);
501		free(GL(escape));
502		free(GR(escape));
503		free(escape);
504	}
505}
506
507static int
508_citrus_HZ_parse_char(void *context, const char *name __unused, const char *s)
509{
510	escape_t *escape;
511	void **p;
512
513	p = (void **)context;
514	escape = (escape_t *)p[0];
515	if (escape->ch != '\0')
516		return (EINVAL);
517	escape->ch = *s++;
518	if (escape->ch == ESCAPE_CHAR || *s != '\0')
519		return (EINVAL);
520
521	return (0);
522}
523
524static int
525_citrus_HZ_parse_graphic(void *context, const char *name, const char *s)
526{
527	_HZEncodingInfo *ei;
528	escape_t *escape;
529	graphic_t *graphic;
530	void **p;
531
532	p = (void **)context;
533	escape = (escape_t *)p[0];
534	ei = (_HZEncodingInfo *)p[1];
535	graphic = calloc(1, sizeof(*graphic));
536	if (graphic == NULL)
537		return (ENOMEM);
538	if (strcmp("GL", name) == 0) {
539		if (GL(escape) != NULL)
540			goto release;
541		GL(escape) = graphic;
542	} else if (strcmp("GR", name) == 0) {
543		if (GR(escape) != NULL)
544			goto release;
545		GR(escape) = graphic;
546	} else {
547release:
548		free(graphic);
549		return (EINVAL);
550	}
551	graphic->escape = escape;
552	if (_bcs_strncasecmp("ASCII", s, 5) == 0) {
553		if (s[5] != '\0')
554			return (EINVAL);
555		graphic->charset = ASCII;
556		graphic->length = 1;
557		ei->ascii = graphic;
558		return (0);
559	} else if (_bcs_strncasecmp("GB2312", s, 6) == 0) {
560		if (s[6] != '\0')
561			return (EINVAL);
562		graphic->charset = GB2312;
563		graphic->length = 2;
564		ei->gb2312 = graphic;
565		return (0);
566	} else if (strncmp("94*", s, 3) == 0)
567		graphic->charset = CS94;
568	else if (strncmp("96*", s, 3) == 0)
569		graphic->charset = CS96;
570	else
571		return (EINVAL);
572	s += 3;
573	switch(*s) {
574	case '1': case '2': case '3':
575		graphic->length = (size_t)(*s - '0');
576		if (*++s == '\0')
577			break;
578	/*FALLTHROUGH*/
579	default:
580		return (EINVAL);
581	}
582	return (0);
583}
584
585static const _citrus_prop_hint_t escape_hints[] = {
586_CITRUS_PROP_HINT_STR("CH", &_citrus_HZ_parse_char),
587_CITRUS_PROP_HINT_STR("GL", &_citrus_HZ_parse_graphic),
588_CITRUS_PROP_HINT_STR("GR", &_citrus_HZ_parse_graphic),
589_CITRUS_PROP_HINT_END
590};
591
592static int
593_citrus_HZ_parse_escape(void *context, const char *name, const char *s)
594{
595	_HZEncodingInfo *ei;
596	escape_t *escape;
597	void *p[2];
598
599	ei = (_HZEncodingInfo *)context;
600	escape = calloc(1, sizeof(*escape));
601	if (escape == NULL)
602		return (EINVAL);
603	if (strcmp("0", name) == 0) {
604		escape->set = E0SET(ei);
605		TAILQ_INSERT_TAIL(E0SET(ei), escape, entry);
606	} else if (strcmp("1", name) == 0) {
607		escape->set = E1SET(ei);
608		TAILQ_INSERT_TAIL(E1SET(ei), escape, entry);
609	} else {
610		free(escape);
611		return (EINVAL);
612	}
613	p[0] = (void *)escape;
614	p[1] = (void *)ei;
615	return (_citrus_prop_parse_variable(
616	    escape_hints, (void *)&p[0], s, strlen(s)));
617}
618
619static const _citrus_prop_hint_t root_hints[] = {
620_CITRUS_PROP_HINT_STR("0", &_citrus_HZ_parse_escape),
621_CITRUS_PROP_HINT_STR("1", &_citrus_HZ_parse_escape),
622_CITRUS_PROP_HINT_END
623};
624
625static int
626_citrus_HZ_encoding_module_init(_HZEncodingInfo * __restrict ei,
627    const void * __restrict var, size_t lenvar)
628{
629	int errnum;
630
631	memset(ei, 0, sizeof(*ei));
632	TAILQ_INIT(E0SET(ei));
633	TAILQ_INIT(E1SET(ei));
634	errnum = _citrus_prop_parse_variable(
635	    root_hints, (void *)ei, var, lenvar);
636	if (errnum != 0)
637		_citrus_HZ_encoding_module_uninit(ei);
638	return (errnum);
639}
640
641/* ----------------------------------------------------------------------
642 * public interface for stdenc
643 */
644
645_CITRUS_STDENC_DECLS(HZ);
646_CITRUS_STDENC_DEF_OPS(HZ);
647
648#include "citrus_stdenc_template.h"
649