citrus_utf7.c revision 331722
1/* $FreeBSD: stable/11/lib/libiconv_modules/UTF7/citrus_utf7.c 331722 2018-03-29 02:50:57Z eadler $ */
2/*	$NetBSD: citrus_utf7.c,v 1.5 2006/08/23 12:57:24 tnozaki Exp $	*/
3
4/*-
5 * Copyright (c)2004, 2005 Citrus Project,
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30
31#include <sys/cdefs.h>
32
33#include <assert.h>
34#include <errno.h>
35#include <limits.h>
36#include <stdio.h>
37#include <stdint.h>
38#include <stdlib.h>
39#include <string.h>
40#include <wchar.h>
41
42#include "citrus_namespace.h"
43#include "citrus_types.h"
44#include "citrus_module.h"
45#include "citrus_stdenc.h"
46#include "citrus_utf7.h"
47
48/* ----------------------------------------------------------------------
49 * private stuffs used by templates
50 */
51
52#define EI_MASK		UINT16_C(0xff)
53#define EI_DIRECT	UINT16_C(0x100)
54#define EI_OPTION	UINT16_C(0x200)
55#define EI_SPACE	UINT16_C(0x400)
56
57typedef struct {
58	uint16_t	 cell[0x80];
59} _UTF7EncodingInfo;
60
61typedef struct {
62	unsigned int
63		mode: 1,	/* whether base64 mode */
64		bits: 4,	/* need to hold 0 - 15 */
65		cache: 22;	/* 22 = BASE64_BIT + UTF16_BIT */
66	int chlen;
67	char ch[4]; /* BASE64_IN, 3 * 6 = 18, most closed to UTF16_BIT */
68} _UTF7State;
69
70#define	_CEI_TO_EI(_cei_)		(&(_cei_)->ei)
71#define	_CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
72
73#define	_FUNCNAME(m)			_citrus_UTF7_##m
74#define	_ENCODING_INFO			_UTF7EncodingInfo
75#define	_ENCODING_STATE			_UTF7State
76#define	_ENCODING_MB_CUR_MAX(_ei_)		4
77#define	_ENCODING_IS_STATE_DEPENDENT		1
78#define	_STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
79
80static __inline void
81/*ARGSUSED*/
82_citrus_UTF7_init_state(_UTF7EncodingInfo * __restrict ei __unused,
83    _UTF7State * __restrict s)
84{
85
86	memset((void *)s, 0, sizeof(*s));
87}
88
89#if 0
90static __inline void
91/*ARGSUSED*/
92_citrus_UTF7_pack_state(_UTF7EncodingInfo * __restrict ei __unused,
93    void *__restrict pspriv, const _UTF7State * __restrict s)
94{
95
96	memcpy(pspriv, (const void *)s, sizeof(*s));
97}
98
99static __inline void
100/*ARGSUSED*/
101_citrus_UTF7_unpack_state(_UTF7EncodingInfo * __restrict ei __unused,
102    _UTF7State * __restrict s, const void * __restrict pspriv)
103{
104
105	memcpy((void *)s, pspriv, sizeof(*s));
106}
107#endif
108
109static const char base64[] =
110	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
111	"abcdefghijklmnopqrstuvwxyz"
112	"0123456789+/";
113
114static const char direct[] =
115	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
116	"abcdefghijklmnopqrstuvwxyz"
117	"0123456789'(),-./:?";
118
119static const char option[] = "!\"#$%&*;<=>@[]^_`{|}";
120static const char spaces[] = " \t\r\n";
121
122#define	BASE64_BIT	6
123#define	UTF16_BIT	16
124
125#define	BASE64_MAX	0x3f
126#define	UTF16_MAX	UINT16_C(0xffff)
127#define	UTF32_MAX	UINT32_C(0x10ffff)
128
129#define	BASE64_IN	'+'
130#define	BASE64_OUT	'-'
131
132#define	SHIFT7BIT(c)	((c) >> 7)
133#define	ISSPECIAL(c)	((c) == '\0' || (c) == BASE64_IN)
134
135#define	FINDLEN(ei, c) \
136	(SHIFT7BIT((c)) ? -1 : (((ei)->cell[(c)] & EI_MASK) - 1))
137
138#define	ISDIRECT(ei, c)	(!SHIFT7BIT((c)) && (ISSPECIAL((c)) || \
139	ei->cell[(c)] & (EI_DIRECT | EI_OPTION | EI_SPACE)))
140
141#define	ISSAFE(ei, c)	(!SHIFT7BIT((c)) && (ISSPECIAL((c)) || \
142	(c < 0x80 && ei->cell[(c)] & (EI_DIRECT | EI_SPACE))))
143
144/* surrogate pair */
145#define	SRG_BASE	UINT32_C(0x10000)
146#define	HISRG_MIN	UINT16_C(0xd800)
147#define	HISRG_MAX	UINT16_C(0xdbff)
148#define	LOSRG_MIN	UINT16_C(0xdc00)
149#define	LOSRG_MAX	UINT16_C(0xdfff)
150
151static int
152_citrus_UTF7_mbtoutf16(_UTF7EncodingInfo * __restrict ei,
153    uint16_t * __restrict u16, char ** __restrict s, size_t n,
154    _UTF7State * __restrict psenc, size_t * __restrict nresult)
155{
156	char *s0;
157	int done, i, len;
158
159	*nresult = 0;
160	s0 = *s;
161
162	for (i = 0, done = 0; done == 0; i++) {
163		if (i == psenc->chlen) {
164			if (n-- < 1) {
165				*nresult = (size_t)-2;
166				*s = s0;
167				return (0);
168			}
169			psenc->ch[psenc->chlen++] = *s0++;
170		}
171		if (SHIFT7BIT((int)psenc->ch[i]))
172			goto ilseq;
173		if (!psenc->mode) {
174			if (psenc->bits > 0 || psenc->cache > 0)
175				return (EINVAL);
176			if (psenc->ch[i] == BASE64_IN)
177				psenc->mode = 1;
178			else {
179				if (!ISDIRECT(ei, (int)psenc->ch[i]))
180					goto ilseq;
181				*u16 = (uint16_t)psenc->ch[i];
182				done = 1;
183				continue;
184			}
185		} else {
186			if (psenc->ch[i] == BASE64_OUT && psenc->cache == 0) {
187				psenc->mode = 0;
188				*u16 = (uint16_t)BASE64_IN;
189				done = 1;
190				continue;
191			}
192			len = FINDLEN(ei, (int)psenc->ch[i]);
193			if (len < 0) {
194				if (psenc->bits >= BASE64_BIT)
195					return (EINVAL);
196				psenc->mode = 0;
197				psenc->bits = psenc->cache = 0;
198				if (psenc->ch[i] != BASE64_OUT) {
199					if (!ISDIRECT(ei, (int)psenc->ch[i]))
200						goto ilseq;
201					*u16 = (uint16_t)psenc->ch[i];
202					done = 1;
203				} else {
204					psenc->chlen--;
205					i--;
206				}
207			} else {
208				psenc->cache =
209				    (psenc->cache << BASE64_BIT) | len;
210				switch (psenc->bits) {
211				case 0: case 2: case 4: case 6: case 8:
212					psenc->bits += BASE64_BIT;
213					break;
214				case 10: case 12: case 14:
215					psenc->bits -= (UTF16_BIT - BASE64_BIT);
216					*u16 = (psenc->cache >> psenc->bits) &
217					    UTF16_MAX;
218					done = 1;
219					break;
220				default:
221					return (EINVAL);
222				}
223			}
224		}
225	}
226
227	if (psenc->chlen > i)
228		return (EINVAL);
229	psenc->chlen = 0;
230	*nresult = (size_t)((*u16 == 0) ? 0 : s0 - *s);
231	*s = s0;
232
233	return (0);
234
235ilseq:
236	*nresult = (size_t)-1;
237	return (EILSEQ);
238}
239
240static int
241_citrus_UTF7_mbrtowc_priv(_UTF7EncodingInfo * __restrict ei,
242    wchar_t * __restrict pwc, char ** __restrict s, size_t n,
243    _UTF7State * __restrict psenc, size_t * __restrict nresult)
244{
245	uint32_t u32;
246	uint16_t hi, lo;
247	size_t nr, siz;
248	int err;
249
250	if (*s == NULL) {
251		_citrus_UTF7_init_state(ei, psenc);
252		*nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT;
253		return (0);
254	}
255	err = _citrus_UTF7_mbtoutf16(ei, &hi, s, n, psenc, &nr);
256	if (nr == (size_t)-1 || nr == (size_t)-2) {
257		*nresult = nr;
258		return (err);
259	}
260	if (err != 0)
261		return (err);
262	n -= nr;
263	siz = nr;
264	if (hi < HISRG_MIN || hi > HISRG_MAX) {
265		u32 = (uint32_t)hi;
266		goto done;
267	}
268	err = _citrus_UTF7_mbtoutf16(ei, &lo, s, n, psenc, &nr);
269	if (nr == (size_t)-1 || nr == (size_t)-2) {
270		psenc->chlen = 1; /* make get_state_desc return incomplete */
271		*nresult = nr;
272		return (err);
273	}
274	if (err != 0)
275		return (err);
276	if (lo < LOSRG_MIN || lo > LOSRG_MAX) {
277		*nresult = (size_t)-1;
278		return (EILSEQ);
279	}
280	hi -= HISRG_MIN;
281	lo -= LOSRG_MIN;
282	u32 = (hi << 10 | lo) + SRG_BASE;
283	siz += nr;
284done:
285	if (pwc != NULL)
286		*pwc = (wchar_t)u32;
287	if (u32 == (uint32_t)0) {
288		*nresult = (size_t)0;
289		_citrus_UTF7_init_state(ei, psenc);
290	} else {
291		*nresult = siz;
292	}
293	return (err);
294}
295
296static int
297_citrus_UTF7_utf16tomb(_UTF7EncodingInfo * __restrict ei,
298    char * __restrict s, size_t n __unused, uint16_t u16,
299    _UTF7State * __restrict psenc, size_t * __restrict nresult)
300{
301	int bits, i;
302
303	if (psenc->chlen != 0 || psenc->bits > BASE64_BIT)
304		return (EINVAL);
305
306	if (ISSAFE(ei, u16)) {
307		if (psenc->mode) {
308			if (psenc->bits > 0) {
309				bits = BASE64_BIT - psenc->bits;
310				i = (psenc->cache << bits) & BASE64_MAX;
311				psenc->ch[psenc->chlen++] = base64[i];
312				psenc->bits = psenc->cache = 0;
313			}
314			if (u16 == BASE64_OUT || FINDLEN(ei, u16) >= 0)
315				psenc->ch[psenc->chlen++] = BASE64_OUT;
316			psenc->mode = 0;
317		}
318		if (psenc->bits != 0)
319			return (EINVAL);
320		psenc->ch[psenc->chlen++] = (char)u16;
321		if (u16 == BASE64_IN)
322			psenc->ch[psenc->chlen++] = BASE64_OUT;
323	} else {
324		if (!psenc->mode) {
325			if (psenc->bits > 0)
326				return (EINVAL);
327			psenc->ch[psenc->chlen++] = BASE64_IN;
328			psenc->mode = 1;
329		}
330		psenc->cache = (psenc->cache << UTF16_BIT) | u16;
331		bits = UTF16_BIT + psenc->bits;
332		psenc->bits = bits % BASE64_BIT;
333		while ((bits -= BASE64_BIT) >= 0) {
334			i = (psenc->cache >> bits) & BASE64_MAX;
335			psenc->ch[psenc->chlen++] = base64[i];
336		}
337	}
338	memcpy(s, psenc->ch, psenc->chlen);
339	*nresult = psenc->chlen;
340	psenc->chlen = 0;
341
342	return (0);
343}
344
345static int
346_citrus_UTF7_wcrtomb_priv(_UTF7EncodingInfo * __restrict ei,
347    char * __restrict s, size_t n, wchar_t wchar,
348    _UTF7State * __restrict psenc, size_t * __restrict nresult)
349{
350	uint32_t u32;
351	uint16_t u16[2];
352	int err, i, len;
353	size_t nr, siz;
354
355	u32 = (uint32_t)wchar;
356	if (u32 <= UTF16_MAX) {
357		u16[0] = (uint16_t)u32;
358		len = 1;
359	} else if (u32 <= UTF32_MAX) {
360		u32 -= SRG_BASE;
361		u16[0] = (u32 >> 10) + HISRG_MIN;
362		u16[1] = ((uint16_t)(u32 & UINT32_C(0x3ff))) + LOSRG_MIN;
363		len = 2;
364	} else {
365		*nresult = (size_t)-1;
366		return (EILSEQ);
367	}
368	siz = 0;
369	for (i = 0; i < len; ++i) {
370		err = _citrus_UTF7_utf16tomb(ei, s, n, u16[i], psenc, &nr);
371		if (err != 0)
372			return (err); /* XXX: state has been modified */
373		s += nr;
374		n -= nr;
375		siz += nr;
376	}
377	*nresult = siz;
378
379	return (0);
380}
381
382static int
383/* ARGSUSED */
384_citrus_UTF7_put_state_reset(_UTF7EncodingInfo * __restrict ei __unused,
385    char * __restrict s, size_t n, _UTF7State * __restrict psenc,
386    size_t * __restrict nresult)
387{
388	int bits, pos;
389
390	if (psenc->chlen != 0 || psenc->bits > BASE64_BIT)
391		return (EINVAL);
392
393	if (psenc->mode) {
394		if (psenc->bits > 0) {
395			if (n-- < 1)
396				return (E2BIG);
397			bits = BASE64_BIT - psenc->bits;
398			pos = (psenc->cache << bits) & BASE64_MAX;
399			psenc->ch[psenc->chlen++] = base64[pos];
400			psenc->ch[psenc->chlen++] = BASE64_OUT;
401			psenc->bits = psenc->cache = 0;
402		}
403		psenc->mode = 0;
404	}
405	if (psenc->bits != 0)
406		return (EINVAL);
407	if (n-- < 1)
408		return (E2BIG);
409
410	*nresult = (size_t)psenc->chlen;
411	if (psenc->chlen > 0) {
412		memcpy(s, psenc->ch, psenc->chlen);
413		psenc->chlen = 0;
414	}
415
416	return (0);
417}
418
419static __inline int
420/*ARGSUSED*/
421_citrus_UTF7_stdenc_wctocs(_UTF7EncodingInfo * __restrict ei __unused,
422    _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
423{
424
425	*csid = 0;
426	*idx = (_index_t)wc;
427
428	return (0);
429}
430
431static __inline int
432/*ARGSUSED*/
433_citrus_UTF7_stdenc_cstowc(_UTF7EncodingInfo * __restrict ei __unused,
434    wchar_t * __restrict wc, _csid_t csid, _index_t idx)
435{
436
437	if (csid != 0)
438		return (EILSEQ);
439	*wc = (wchar_t)idx;
440
441	return (0);
442}
443
444static __inline int
445/*ARGSUSED*/
446_citrus_UTF7_stdenc_get_state_desc_generic(_UTF7EncodingInfo * __restrict ei __unused,
447    _UTF7State * __restrict psenc, int * __restrict rstate)
448{
449
450	*rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL :
451	    _STDENC_SDGEN_INCOMPLETE_CHAR;
452	return (0);
453}
454
455static void
456/*ARGSUSED*/
457_citrus_UTF7_encoding_module_uninit(_UTF7EncodingInfo *ei __unused)
458{
459
460	/* ei seems to be unused */
461}
462
463static int
464/*ARGSUSED*/
465_citrus_UTF7_encoding_module_init(_UTF7EncodingInfo * __restrict ei,
466    const void * __restrict var __unused, size_t lenvar __unused)
467{
468	const char *s;
469
470	memset(ei, 0, sizeof(*ei));
471
472#define FILL(str, flag)				\
473do {						\
474	for (s = str; *s != '\0'; s++)		\
475		ei->cell[*s & 0x7f] |= flag;	\
476} while (/*CONSTCOND*/0)
477
478	FILL(base64, (s - base64) + 1);
479	FILL(direct, EI_DIRECT);
480	FILL(option, EI_OPTION);
481	FILL(spaces, EI_SPACE);
482
483	return (0);
484}
485
486/* ----------------------------------------------------------------------
487 * public interface for stdenc
488 */
489
490_CITRUS_STDENC_DECLS(UTF7);
491_CITRUS_STDENC_DEF_OPS(UTF7);
492
493#include "citrus_stdenc_template.h"
494