1/*
2 * Copyright (C) 1999-2001, 2004 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
4 *
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20
21/*
22 * CP1258
23 */
24
25#include "flushwc.h"
26#include "vietcomb.h"
27
28static const unsigned char cp1258_comb_table[] = {
29  0xcc, 0xec, 0xde, 0xd2, 0xf2,
30};
31
32/* The possible bases in viet_comp_table_data:
33   0x0041..0x0045, 0x0047..0x0049, 0x004B..0x0050, 0x0052..0x0057,
34   0x0059..0x005A, 0x0061..0x0065, 0x0067..0x0069, 0x006B..0x0070,
35   0x0072..0x0077, 0x0079..0x007A, 0x00A5, 0x00A8, 0x00C2, 0x00C5..0x00C7,
36   0x00CA, 0x00CF, 0x00D3..0x00D4, 0x00D6, 0x00D8, 0x00DA, 0x00DC, 0x00E2,
37   0x00E5..0x00E7, 0x00EA, 0x00EF, 0x00F3..0x00F4, 0x00F6, 0x00F8, 0x00FA,
38   0x00FC, 0x0102..0x0103, 0x01A0..0x01A1, 0x01AF..0x01B0. */
39static const unsigned int cp1258_comp_bases[] = {
40  0x06fdfbbe, 0x06fdfbbe, 0x00000000, 0x00000120, 0x155884e4, 0x155884e4,
41  0x0000000c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00018003
42};
43
44static const unsigned short cp1258_2uni[128] = {
45  /* 0x80 */
46  0x20ac, 0xfffd, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
47  0x02c6, 0x2030, 0xfffd, 0x2039, 0x0152, 0xfffd, 0xfffd, 0xfffd,
48  /* 0x90 */
49  0xfffd, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
50  0x02dc, 0x2122, 0xfffd, 0x203a, 0x0153, 0xfffd, 0xfffd, 0x0178,
51  /* 0xa0 */
52  0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
53  0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
54  /* 0xb0 */
55  0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
56  0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
57  /* 0xc0 */
58  0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
59  0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x0300, 0x00cd, 0x00ce, 0x00cf,
60  /* 0xd0 */
61  0x0110, 0x00d1, 0x0309, 0x00d3, 0x00d4, 0x01a0, 0x00d6, 0x00d7,
62  0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x01af, 0x0303, 0x00df,
63  /* 0xe0 */
64  0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
65  0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x0301, 0x00ed, 0x00ee, 0x00ef,
66  /* 0xf0 */
67  0x0111, 0x00f1, 0x0323, 0x00f3, 0x00f4, 0x01a1, 0x00f6, 0x00f7,
68  0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x01b0, 0x20ab, 0x00ff,
69};
70
71/* In the CP1258 to Unicode direction, the state contains a buffered
72   character, or 0 if none. */
73
74static int
75cp1258_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
76{
77  unsigned char c = *s;
78  unsigned short wc;
79  unsigned short last_wc;
80  if (c < 0x80) {
81    wc = c;
82  } else {
83    wc = cp1258_2uni[c-0x80];
84    if (wc == 0xfffd)
85      return RET_ILSEQ;
86  }
87  last_wc = conv->istate;
88  if (last_wc) {
89    if (wc >= 0x0300 && wc < 0x0340) {
90      /* See whether last_wc and wc can be combined. */
91      unsigned int k;
92      unsigned int i1, i2;
93      switch (wc) {
94        case 0x0300: k = 0; break;
95        case 0x0301: k = 1; break;
96        case 0x0303: k = 2; break;
97        case 0x0309: k = 3; break;
98        case 0x0323: k = 4; break;
99        default: abort();
100      }
101      i1 = viet_comp_table[k].idx;
102      i2 = i1 + viet_comp_table[k].len-1;
103      if (last_wc >= viet_comp_table_data[i1].base
104          && last_wc <= viet_comp_table_data[i2].base) {
105        unsigned int i;
106        for (;;) {
107          i = (i1+i2)>>1;
108          if (last_wc == viet_comp_table_data[i].base)
109            break;
110          if (last_wc < viet_comp_table_data[i].base) {
111            if (i1 == i)
112              goto not_combining;
113            i2 = i;
114          } else {
115            if (i1 != i)
116              i1 = i;
117            else {
118              i = i2;
119              if (last_wc == viet_comp_table_data[i].base)
120                break;
121              goto not_combining;
122            }
123          }
124        }
125        last_wc = viet_comp_table_data[i].composed;
126        /* Output the combined character. */
127        conv->istate = 0;
128        *pwc = (ucs4_t) last_wc;
129        return 1;
130      }
131    }
132  not_combining:
133    /* Output the buffered character. */
134    conv->istate = 0;
135    *pwc = (ucs4_t) last_wc;
136    return 0; /* Don't advance the input pointer. */
137  }
138  if (wc >= 0x0041 && wc <= 0x01b0
139      && ((cp1258_comp_bases[(wc - 0x0040) >> 5] >> (wc & 0x1f)) & 1)) {
140    /* wc is a possible match in viet_comp_table_data. Buffer it. */
141    conv->istate = wc;
142    return RET_TOOFEW(1);
143  } else {
144    /* Output wc immediately. */
145    *pwc = (ucs4_t) wc;
146    return 1;
147  }
148}
149
150#define cp1258_flushwc normal_flushwc
151
152static const unsigned char cp1258_page00[88] = {
153  0xc0, 0xc1, 0xc2, 0x00, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
154  0xc8, 0xc9, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
155  0x00, 0xd1, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7, /* 0xd0-0xd7 */
156  0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf, /* 0xd8-0xdf */
157  0xe0, 0xe1, 0xe2, 0x00, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
158  0xe8, 0xe9, 0xea, 0xeb, 0x00, 0xed, 0xee, 0xef, /* 0xe8-0xef */
159  0x00, 0xf1, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7, /* 0xf0-0xf7 */
160  0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x00, 0x00, 0xff, /* 0xf8-0xff */
161  /* 0x0100 */
162  0x00, 0x00, 0xc3, 0xe3, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */
163  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */
164  0xd0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */
165};
166static const unsigned char cp1258_page01[104] = {
167  0x00, 0x00, 0x8c, 0x9c, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */
168  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */
169  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */
170  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */
171  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */
172  0x9f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */
173  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */
174  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */
175  0x00, 0x00, 0x83, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */
176  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */
177  0xd5, 0xf5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */
178  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdd, /* 0xa8-0xaf */
179  0xfd, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */
180};
181static const unsigned char cp1258_page02[32] = {
182  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x88, 0x00, /* 0xc0-0xc7 */
183  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */
184  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */
185  0x00, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, /* 0xd8-0xdf */
186};
187static const unsigned char cp1258_page03[40] = {
188  0xcc, 0xec, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */
189  0x00, 0xd2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */
190  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */
191  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */
192  0x00, 0x00, 0x00, 0xf2, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */
193};
194static const unsigned char cp1258_page20[48] = {
195  0x00, 0x00, 0x00, 0x96, 0x97, 0x00, 0x00, 0x00, /* 0x10-0x17 */
196  0x91, 0x92, 0x82, 0x00, 0x93, 0x94, 0x84, 0x00, /* 0x18-0x1f */
197  0x86, 0x87, 0x95, 0x00, 0x00, 0x00, 0x85, 0x00, /* 0x20-0x27 */
198  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */
199  0x89, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */
200  0x00, 0x8b, 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */
201};
202
203static int
204cp1258_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
205{
206  unsigned char c = 0;
207  if (wc < 0x0080) {
208    *r = wc;
209    return 1;
210  }
211  else if (wc >= 0x00a0 && wc < 0x00c0)
212    c = wc;
213  else if (wc >= 0x00c0 && wc < 0x0118)
214    c = cp1258_page00[wc-0x00c0];
215  else if (wc >= 0x0150 && wc < 0x01b8)
216    c = cp1258_page01[wc-0x0150];
217  else if (wc >= 0x02c0 && wc < 0x02e0)
218    c = cp1258_page02[wc-0x02c0];
219  else if (wc >= 0x0300 && wc < 0x0328)
220    c = cp1258_page03[wc-0x0300];
221  else if (wc >= 0x0340 && wc < 0x0342) /* deprecated Vietnamese tone marks */
222    c = cp1258_page03[wc-0x0340];
223  else if (wc >= 0x2010 && wc < 0x2040)
224    c = cp1258_page20[wc-0x2010];
225  else if (wc == 0x20ab)
226    c = 0xfe;
227  else if (wc == 0x20ac)
228    c = 0x80;
229  else if (wc == 0x2122)
230    c = 0x99;
231  if (c != 0) {
232    *r = c;
233    return 1;
234  }
235  /* Try canonical decomposition. */
236  {
237    /* Binary search through viet_decomp_table. */
238    unsigned int i1 = 0;
239    unsigned int i2 = sizeof(viet_decomp_table)/sizeof(viet_decomp_table[0])-1;
240    if (wc >= viet_decomp_table[i1].composed
241        && wc <= viet_decomp_table[i2].composed) {
242      unsigned int i;
243      for (;;) {
244        /* Here i2 - i1 > 0. */
245        i = (i1+i2)>>1;
246        if (wc == viet_decomp_table[i].composed)
247          break;
248        if (wc < viet_decomp_table[i].composed) {
249          if (i1 == i)
250            return RET_ILUNI;
251          /* Here i1 < i < i2. */
252          i2 = i;
253        } else {
254          /* Here i1 <= i < i2. */
255          if (i1 != i)
256            i1 = i;
257          else {
258            /* Here i2 - i1 = 1. */
259            i = i2;
260            if (wc == viet_decomp_table[i].composed)
261              break;
262            else
263              return RET_ILUNI;
264          }
265        }
266      }
267      /* Found a canonical decomposition. */
268      wc = viet_decomp_table[i].base;
269      /* wc is one of 0x0020, 0x0041..0x005a, 0x0061..0x007a, 0x00a5, 0x00a8,
270         0x00c2, 0x00c5..0x00c7, 0x00ca, 0x00cf, 0x00d3, 0x00d4, 0x00d6,
271         0x00d8, 0x00da, 0x00dc, 0x00e2, 0x00e5..0x00e7, 0x00ea, 0x00ef,
272         0x00f3, 0x00f4, 0x00f6, 0x00f8, 0x00fc, 0x0102, 0x0103, 0x01a0,
273         0x01a1, 0x01af, 0x01b0. */
274      if (wc < 0x0100)
275        c = wc;
276      else if (wc < 0x0118)
277        c = cp1258_page00[wc-0x00c0];
278      else
279        c = cp1258_page01[wc-0x0150];
280      if (n < 2)
281        return RET_TOOSMALL;
282      r[0] = c;
283      r[1] = cp1258_comb_table[viet_decomp_table[i].comb1];
284      return 2;
285    }
286  }
287  return RET_ILUNI;
288}
289