citrus_utf7.c revision 258496
1/* $FreeBSD: stable/10/lib/libiconv_modules/UTF7/citrus_utf7.c 258496 2013-11-23 12:17:05Z tijl $ */
2/*	$NetBSD: citrus_utf7.c,v 1.5 2006/08/23 12:57:24 tnozaki Exp $	*/
3
4/*-
5 * Copyright (c)2004, 2005 Citrus Project,
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30
31#include <sys/cdefs.h>
32
33#include <assert.h>
34#include <errno.h>
35#include <limits.h>
36#include <stdio.h>
37#include <stdint.h>
38#include <stdlib.h>
39#include <string.h>
40#include <wchar.h>
41
42#include "citrus_namespace.h"
43#include "citrus_types.h"
44#include "citrus_module.h"
45#include "citrus_stdenc.h"
46#include "citrus_utf7.h"
47
48/* ----------------------------------------------------------------------
49 * private stuffs used by templates
50 */
51
52#define EI_MASK		UINT16_C(0xff)
53#define EI_DIRECT	UINT16_C(0x100)
54#define EI_OPTION	UINT16_C(0x200)
55#define EI_SPACE	UINT16_C(0x400)
56
57typedef struct {
58	uint16_t	 cell[0x80];
59} _UTF7EncodingInfo;
60
61typedef struct {
62	unsigned int
63		mode: 1,	/* whether base64 mode */
64		bits: 4,	/* need to hold 0 - 15 */
65		cache: 22,	/* 22 = BASE64_BIT + UTF16_BIT */
66		surrogate: 1;	/* whether surrogate pair or not */
67	int chlen;
68	char ch[4]; /* BASE64_IN, 3 * 6 = 18, most closed to UTF16_BIT */
69} _UTF7State;
70
71#define	_CEI_TO_EI(_cei_)		(&(_cei_)->ei)
72#define	_CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
73
74#define	_FUNCNAME(m)			_citrus_UTF7_##m
75#define	_ENCODING_INFO			_UTF7EncodingInfo
76#define	_ENCODING_STATE			_UTF7State
77#define	_ENCODING_MB_CUR_MAX(_ei_)		4
78#define	_ENCODING_IS_STATE_DEPENDENT		1
79#define	_STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
80
81static __inline void
82/*ARGSUSED*/
83_citrus_UTF7_init_state(_UTF7EncodingInfo * __restrict ei __unused,
84    _UTF7State * __restrict s)
85{
86
87	memset((void *)s, 0, sizeof(*s));
88}
89
90static __inline void
91/*ARGSUSED*/
92_citrus_UTF7_pack_state(_UTF7EncodingInfo * __restrict ei __unused,
93    void *__restrict pspriv, const _UTF7State * __restrict s)
94{
95
96	memcpy(pspriv, (const void *)s, sizeof(*s));
97}
98
99static __inline void
100/*ARGSUSED*/
101_citrus_UTF7_unpack_state(_UTF7EncodingInfo * __restrict ei __unused,
102    _UTF7State * __restrict s, const void * __restrict pspriv)
103{
104
105	memcpy((void *)s, pspriv, sizeof(*s));
106}
107
108static const char base64[] =
109	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
110	"abcdefghijklmnopqrstuvwxyz"
111	"0123456789+/";
112
113static const char direct[] =
114	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
115	"abcdefghijklmnopqrstuvwxyz"
116	"0123456789'(),-./:?";
117
118static const char option[] = "!\"#$%&*;<=>@[]^_`{|}";
119static const char spaces[] = " \t\r\n";
120
121#define	BASE64_BIT	6
122#define	UTF16_BIT	16
123
124#define	BASE64_MAX	0x3f
125#define	UTF16_MAX	UINT16_C(0xffff)
126#define	UTF32_MAX	UINT32_C(0x10ffff)
127
128#define	BASE64_IN	'+'
129#define	BASE64_OUT	'-'
130
131#define	SHIFT7BIT(c)	((c) >> 7)
132#define	ISSPECIAL(c)	((c) == '\0' || (c) == BASE64_IN)
133
134#define	FINDLEN(ei, c) \
135	(SHIFT7BIT((c)) ? -1 : (((ei)->cell[(c)] & EI_MASK) - 1))
136
137#define	ISDIRECT(ei, c)	(!SHIFT7BIT((c)) && (ISSPECIAL((c)) || \
138	ei->cell[(c)] & (EI_DIRECT | EI_OPTION | EI_SPACE)))
139
140#define	ISSAFE(ei, c)	(!SHIFT7BIT((c)) && (ISSPECIAL((c)) || \
141	(c < 0x80 && ei->cell[(c)] & (EI_DIRECT | EI_SPACE))))
142
143/* surrogate pair */
144#define	SRG_BASE	UINT32_C(0x10000)
145#define	HISRG_MIN	UINT16_C(0xd800)
146#define	HISRG_MAX	UINT16_C(0xdbff)
147#define	LOSRG_MIN	UINT16_C(0xdc00)
148#define	LOSRG_MAX	UINT16_C(0xdfff)
149
150static int
151_citrus_UTF7_mbtoutf16(_UTF7EncodingInfo * __restrict ei,
152    uint16_t * __restrict u16, const char ** __restrict s, size_t n,
153    _UTF7State * __restrict psenc, size_t * __restrict nresult)
154{
155	_UTF7State sv;
156	const char *s0;
157	int done, i, len;
158
159	s0 = *s;
160	sv = *psenc;
161
162	for (i = 0, done = 0; done == 0; i++) {
163		if (i == psenc->chlen) {
164			if (n-- < 1) {
165				*nresult = (size_t)-2;
166				*s = s0;
167				sv.chlen = psenc->chlen;
168				memcpy(sv.ch, psenc->ch, sizeof(sv.ch));
169				*psenc = sv;
170				return (0);
171			}
172			psenc->ch[psenc->chlen++] = *s0++;
173		}
174		if (SHIFT7BIT((int)psenc->ch[i]))
175			goto ilseq;
176		if (!psenc->mode) {
177			if (psenc->bits > 0 || psenc->cache > 0)
178				return (EINVAL);
179			if (psenc->ch[i] == BASE64_IN)
180				psenc->mode = 1;
181			else {
182				if (!ISDIRECT(ei, (int)psenc->ch[i]))
183					goto ilseq;
184				*u16 = (uint16_t)psenc->ch[i];
185				done = 1;
186				continue;
187			}
188		} else {
189			if (psenc->ch[i] == BASE64_OUT && psenc->cache == 0) {
190				psenc->mode = 0;
191				*u16 = (uint16_t)BASE64_IN;
192				done = 1;
193				continue;
194			}
195			len = FINDLEN(ei, (int)psenc->ch[i]);
196			if (len < 0) {
197				if (psenc->bits >= BASE64_BIT)
198					return (EINVAL);
199				psenc->mode = 0;
200				psenc->bits = psenc->cache = 0;
201				if (psenc->ch[i] != BASE64_OUT) {
202					if (!ISDIRECT(ei, (int)psenc->ch[i]))
203						goto ilseq;
204					*u16 = (uint16_t)psenc->ch[i];
205					done = 1;
206				} else {
207					psenc->chlen--;
208					i--;
209				}
210			} else {
211				psenc->cache =
212				    (psenc->cache << BASE64_BIT) | len;
213				switch (psenc->bits) {
214				case 0: case 2: case 4: case 6: case 8:
215					psenc->bits += BASE64_BIT;
216					break;
217				case 10: case 12: case 14:
218					psenc->bits -= (UTF16_BIT - BASE64_BIT);
219					*u16 = (psenc->cache >> psenc->bits) &
220					    UTF16_MAX;
221					done = 1;
222					break;
223				default:
224					return (EINVAL);
225				}
226			}
227		}
228	}
229
230	if (psenc->chlen > i)
231		return (EINVAL);
232	psenc->chlen = 0;
233	*nresult = (size_t)((*u16 == 0) ? 0 : s0 - *s);
234	*s = s0;
235
236	return (0);
237
238ilseq:
239	*nresult = (size_t)-1;
240	return (EILSEQ);
241}
242
243static int
244_citrus_UTF7_mbrtowc_priv(_UTF7EncodingInfo * __restrict ei,
245    wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
246    _UTF7State * __restrict psenc, size_t * __restrict nresult)
247{
248	uint32_t u32;
249	uint16_t hi, lo;
250	size_t nr, siz;
251	int err;
252
253	if (*s == NULL) {
254		_citrus_UTF7_init_state(ei, psenc);
255		*nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT;
256		return (0);
257	}
258	if (psenc->surrogate) {
259		hi = (psenc->cache >> psenc->bits) & UTF16_MAX;
260		if (hi < HISRG_MIN || hi > HISRG_MAX)
261			return (EINVAL);
262		siz = 0;
263	} else {
264		err = _citrus_UTF7_mbtoutf16(ei, &hi, s, n, psenc, &nr);
265		if (nr == (size_t)-1 || nr == (size_t)-2) {
266			*nresult = nr;
267			return (err);
268		}
269		if (err != 0)
270			return (err);
271		n -= nr;
272		siz = nr;
273		if (hi < HISRG_MIN || hi > HISRG_MAX) {
274			u32 = (uint32_t)hi;
275			goto done;
276		}
277		psenc->surrogate = 1;
278	}
279	err = _citrus_UTF7_mbtoutf16(ei, &lo, s, n, psenc, &nr);
280	if (nr == (size_t)-1 || nr == (size_t)-2) {
281		*nresult = nr;
282		return (err);
283	}
284	if (err != 0)
285		return (err);
286	hi -= HISRG_MIN;
287	lo -= LOSRG_MIN;
288	u32 = (hi << 10 | lo) + SRG_BASE;
289	siz += nr;
290done:
291	if (pwc != NULL)
292		*pwc = (wchar_t)u32;
293	if (u32 == (uint32_t)0) {
294		*nresult = (size_t)0;
295		_citrus_UTF7_init_state(ei, psenc);
296	} else {
297		*nresult = siz;
298		psenc->surrogate = 0;
299	}
300	return (err);
301}
302
303static int
304_citrus_UTF7_utf16tomb(_UTF7EncodingInfo * __restrict ei,
305    char * __restrict s, size_t n __unused, uint16_t u16,
306    _UTF7State * __restrict psenc, size_t * __restrict nresult)
307{
308	int bits, i;
309
310	if (psenc->chlen != 0 || psenc->bits > BASE64_BIT)
311		return (EINVAL);
312
313	if (ISSAFE(ei, u16)) {
314		if (psenc->mode) {
315			if (psenc->bits > 0) {
316				bits = BASE64_BIT - psenc->bits;
317				i = (psenc->cache << bits) & BASE64_MAX;
318				psenc->ch[psenc->chlen++] = base64[i];
319				psenc->bits = psenc->cache = 0;
320			}
321			if (u16 == BASE64_OUT || FINDLEN(ei, u16) >= 0)
322				psenc->ch[psenc->chlen++] = BASE64_OUT;
323			psenc->mode = 0;
324		}
325		if (psenc->bits != 0)
326			return (EINVAL);
327		psenc->ch[psenc->chlen++] = (char)u16;
328		if (u16 == BASE64_IN)
329			psenc->ch[psenc->chlen++] = BASE64_OUT;
330	} else {
331		if (!psenc->mode) {
332			if (psenc->bits > 0)
333				return (EINVAL);
334			psenc->ch[psenc->chlen++] = BASE64_IN;
335			psenc->mode = 1;
336		}
337		psenc->cache = (psenc->cache << UTF16_BIT) | u16;
338		bits = UTF16_BIT + psenc->bits;
339		psenc->bits = bits % BASE64_BIT;
340		while ((bits -= BASE64_BIT) >= 0) {
341			i = (psenc->cache >> bits) & BASE64_MAX;
342			psenc->ch[psenc->chlen++] = base64[i];
343		}
344	}
345	memcpy(s, psenc->ch, psenc->chlen);
346	*nresult = psenc->chlen;
347	psenc->chlen = 0;
348
349	return (0);
350}
351
352static int
353_citrus_UTF7_wcrtomb_priv(_UTF7EncodingInfo * __restrict ei,
354    char * __restrict s, size_t n, wchar_t wchar,
355    _UTF7State * __restrict psenc, size_t * __restrict nresult)
356{
357	uint32_t u32;
358	uint16_t u16[2];
359	int err, i, len;
360	size_t nr, siz;
361
362	u32 = (uint32_t)wchar;
363	if (u32 <= UTF16_MAX) {
364		u16[0] = (uint16_t)u32;
365		len = 1;
366	} else if (u32 <= UTF32_MAX) {
367		u32 -= SRG_BASE;
368		u16[0] = (u32 >> 10) + HISRG_MIN;
369		u16[1] = ((uint16_t)(u32 & UINT32_C(0x3ff))) + LOSRG_MIN;
370		len = 2;
371	} else {
372		*nresult = (size_t)-1;
373		return (EILSEQ);
374	}
375	siz = 0;
376	for (i = 0; i < len; ++i) {
377		err = _citrus_UTF7_utf16tomb(ei, s, n, u16[i], psenc, &nr);
378		if (err != 0)
379			return (err); /* XXX: state has been modified */
380		s += nr;
381		n -= nr;
382		siz += nr;
383	}
384	*nresult = siz;
385
386	return (0);
387}
388
389static int
390/* ARGSUSED */
391_citrus_UTF7_put_state_reset(_UTF7EncodingInfo * __restrict ei __unused,
392    char * __restrict s, size_t n, _UTF7State * __restrict psenc,
393    size_t * __restrict nresult)
394{
395	int bits, pos;
396
397	if (psenc->chlen != 0 || psenc->bits > BASE64_BIT || psenc->surrogate)
398		return (EINVAL);
399
400	if (psenc->mode) {
401		if (psenc->bits > 0) {
402			if (n-- < 1)
403				return (E2BIG);
404			bits = BASE64_BIT - psenc->bits;
405			pos = (psenc->cache << bits) & BASE64_MAX;
406			psenc->ch[psenc->chlen++] = base64[pos];
407			psenc->ch[psenc->chlen++] = BASE64_OUT;
408			psenc->bits = psenc->cache = 0;
409		}
410		psenc->mode = 0;
411	}
412	if (psenc->bits != 0)
413		return (EINVAL);
414	if (n-- < 1)
415		return (E2BIG);
416
417	*nresult = (size_t)psenc->chlen;
418	if (psenc->chlen > 0) {
419		memcpy(s, psenc->ch, psenc->chlen);
420		psenc->chlen = 0;
421	}
422
423	return (0);
424}
425
426static __inline int
427/*ARGSUSED*/
428_citrus_UTF7_stdenc_wctocs(_UTF7EncodingInfo * __restrict ei __unused,
429    _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
430{
431
432	*csid = 0;
433	*idx = (_index_t)wc;
434
435	return (0);
436}
437
438static __inline int
439/*ARGSUSED*/
440_citrus_UTF7_stdenc_cstowc(_UTF7EncodingInfo * __restrict ei __unused,
441    wchar_t * __restrict wc, _csid_t csid, _index_t idx)
442{
443
444	if (csid != 0)
445		return (EILSEQ);
446	*wc = (wchar_t)idx;
447
448	return (0);
449}
450
451static __inline int
452/*ARGSUSED*/
453_citrus_UTF7_stdenc_get_state_desc_generic(_UTF7EncodingInfo * __restrict ei __unused,
454    _UTF7State * __restrict psenc, int * __restrict rstate)
455{
456
457	*rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL :
458	    _STDENC_SDGEN_INCOMPLETE_CHAR;
459	return (0);
460}
461
462static void
463/*ARGSUSED*/
464_citrus_UTF7_encoding_module_uninit(_UTF7EncodingInfo *ei __unused)
465{
466
467	/* ei seems to be unused */
468}
469
470static int
471/*ARGSUSED*/
472_citrus_UTF7_encoding_module_init(_UTF7EncodingInfo * __restrict ei,
473    const void * __restrict var __unused, size_t lenvar __unused)
474{
475	const char *s;
476
477	memset(ei, 0, sizeof(*ei));
478
479#define FILL(str, flag)				\
480do {						\
481	for (s = str; *s != '\0'; s++)		\
482		ei->cell[*s & 0x7f] |= flag;	\
483} while (/*CONSTCOND*/0)
484
485	FILL(base64, (s - base64) + 1);
486	FILL(direct, EI_DIRECT);
487	FILL(option, EI_OPTION);
488	FILL(spaces, EI_SPACE);
489
490	return (0);
491}
492
493/* ----------------------------------------------------------------------
494 * public interface for stdenc
495 */
496
497_CITRUS_STDENC_DECLS(UTF7);
498_CITRUS_STDENC_DEF_OPS(UTF7);
499
500#include "citrus_stdenc_template.h"
501