1/*
2 * Copyright (C) 1999-2003, 2005-2006 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
4 *
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20
21/* This file defines the conversion loop via Unicode as a pivot encoding. */
22
23/* Attempt to transliterate wc. Return code as in xxx_wctomb. */
24static int unicode_transliterate (conv_t cd, ucs4_t wc,
25                                  unsigned char* outptr, size_t outleft)
26{
27  if (cd->oflags & HAVE_HANGUL_JAMO) {
28    /* Decompose Hangul into Jamo. Use double-width Jamo (contained
29       in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
30       (contained in Unicode only). */
31    ucs4_t buf[3];
32    int ret = johab_hangul_decompose(cd,buf,wc);
33    if (ret != RET_ILUNI) {
34      /* we know 1 <= ret <= 3 */
35      state_t backup_state = cd->ostate;
36      unsigned char* backup_outptr = outptr;
37      size_t backup_outleft = outleft;
38      int i, sub_outcount;
39      for (i = 0; i < ret; i++) {
40        if (outleft == 0) {
41          sub_outcount = RET_TOOSMALL;
42          goto johab_hangul_failed;
43        }
44        sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
45        if (sub_outcount <= RET_ILUNI)
46          goto johab_hangul_failed;
47        if (!(sub_outcount <= outleft)) abort();
48        outptr += sub_outcount; outleft -= sub_outcount;
49      }
50      return outptr-backup_outptr;
51    johab_hangul_failed:
52      cd->ostate = backup_state;
53      outptr = backup_outptr;
54      outleft = backup_outleft;
55      if (sub_outcount != RET_ILUNI)
56        return RET_TOOSMALL;
57    }
58  }
59  {
60    /* Try to use a variant, but postfix it with
61       U+303E IDEOGRAPHIC VARIATION INDICATOR
62       (cf. Ken Lunde's "CJKV information processing", p. 188). */
63    int indx = -1;
64    if (wc == 0x3006)
65      indx = 0;
66    else if (wc == 0x30f6)
67      indx = 1;
68    else if (wc >= 0x4e00 && wc < 0xa000)
69      indx = cjk_variants_indx[wc-0x4e00];
70    if (indx >= 0) {
71      for (;; indx++) {
72        ucs4_t buf[2];
73        unsigned short variant = cjk_variants[indx];
74        unsigned short last = variant & 0x8000;
75        variant &= 0x7fff;
76        variant += 0x3000;
77        buf[0] = variant; buf[1] = 0x303e;
78        {
79          state_t backup_state = cd->ostate;
80          unsigned char* backup_outptr = outptr;
81          size_t backup_outleft = outleft;
82          int i, sub_outcount;
83          for (i = 0; i < 2; i++) {
84            if (outleft == 0) {
85              sub_outcount = RET_TOOSMALL;
86              goto variant_failed;
87            }
88            sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
89            if (sub_outcount <= RET_ILUNI)
90              goto variant_failed;
91            if (!(sub_outcount <= outleft)) abort();
92            outptr += sub_outcount; outleft -= sub_outcount;
93          }
94          return outptr-backup_outptr;
95        variant_failed:
96          cd->ostate = backup_state;
97          outptr = backup_outptr;
98          outleft = backup_outleft;
99          if (sub_outcount != RET_ILUNI)
100            return RET_TOOSMALL;
101        }
102        if (last)
103          break;
104      }
105    }
106  }
107  if (wc >= 0x2018 && wc <= 0x201a) {
108    /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
109    ucs4_t substitute =
110      (cd->oflags & HAVE_QUOTATION_MARKS
111       ? (wc == 0x201a ? 0x2018 : wc)
112       : (cd->oflags & HAVE_ACCENTS
113          ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */
114          : 0x0027 /* use apostrophe */
115      )  );
116    int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft);
117    if (outcount != RET_ILUNI)
118      return outcount;
119  }
120  {
121    /* Use the transliteration table. */
122    int indx = translit_index(wc);
123    if (indx >= 0) {
124      const unsigned int * cp = &translit_data[indx];
125      unsigned int num = *cp++;
126      state_t backup_state = cd->ostate;
127      unsigned char* backup_outptr = outptr;
128      size_t backup_outleft = outleft;
129      unsigned int i;
130      int sub_outcount;
131      for (i = 0; i < num; i++) {
132        if (outleft == 0) {
133          sub_outcount = RET_TOOSMALL;
134          goto translit_failed;
135        }
136        sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft);
137        if (sub_outcount == RET_ILUNI)
138          /* Recursive transliteration. */
139          sub_outcount = unicode_transliterate(cd,cp[i],outptr,outleft);
140        if (sub_outcount <= RET_ILUNI)
141          goto translit_failed;
142        if (!(sub_outcount <= outleft)) abort();
143        outptr += sub_outcount; outleft -= sub_outcount;
144      }
145      return outptr-backup_outptr;
146    translit_failed:
147      cd->ostate = backup_state;
148      outptr = backup_outptr;
149      outleft = backup_outleft;
150      if (sub_outcount != RET_ILUNI)
151        return RET_TOOSMALL;
152    }
153  }
154  return RET_ILUNI;
155}
156
157#ifndef LIBICONV_PLUG
158
159struct uc_to_mb_fallback_locals {
160  unsigned char* l_outbuf;
161  size_t l_outbytesleft;
162  int l_errno;
163};
164
165static void uc_to_mb_write_replacement (const char *buf, size_t buflen,
166                                        void* callback_arg)
167{
168  struct uc_to_mb_fallback_locals * plocals =
169    (struct uc_to_mb_fallback_locals *) callback_arg;
170  /* Do nothing if already encountered an error in a previous call. */
171  if (plocals->l_errno == 0) {
172    /* Attempt to copy the passed buffer to the output buffer. */
173    if (plocals->l_outbytesleft < buflen)
174      plocals->l_errno = E2BIG;
175    else {
176      memcpy(plocals->l_outbuf, buf, buflen);
177      plocals->l_outbuf += buflen;
178      plocals->l_outbytesleft -= buflen;
179    }
180  }
181}
182
183struct mb_to_uc_fallback_locals {
184  conv_t l_cd;
185  unsigned char* l_outbuf;
186  size_t l_outbytesleft;
187  int l_errno;
188};
189
190static void mb_to_uc_write_replacement (const unsigned int *buf, size_t buflen,
191                                        void* callback_arg)
192{
193  struct mb_to_uc_fallback_locals * plocals =
194    (struct mb_to_uc_fallback_locals *) callback_arg;
195  /* Do nothing if already encountered an error in a previous call. */
196  if (plocals->l_errno == 0) {
197    /* Attempt to convert the passed buffer to the target encoding. */
198    conv_t cd = plocals->l_cd;
199    unsigned char* outptr = plocals->l_outbuf;
200    size_t outleft = plocals->l_outbytesleft;
201    for (; buflen > 0; buf++, buflen--) {
202      ucs4_t wc = *buf;
203      int outcount;
204      if (outleft == 0) {
205        plocals->l_errno = E2BIG;
206        break;
207      }
208      outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
209      if (outcount != RET_ILUNI)
210        goto outcount_ok;
211      /* Handle Unicode tag characters (range U+E0000..U+E007F). */
212      if ((wc >> 7) == (0xe0000 >> 7))
213        goto outcount_zero;
214      /* Try transliteration. */
215      if (cd->transliterate) {
216        outcount = unicode_transliterate(cd,wc,outptr,outleft);
217        if (outcount != RET_ILUNI)
218          goto outcount_ok;
219      }
220      if (cd->discard_ilseq) {
221        outcount = 0;
222        goto outcount_ok;
223      }
224      #ifndef LIBICONV_PLUG
225      else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
226        struct uc_to_mb_fallback_locals locals;
227        locals.l_outbuf = outptr;
228        locals.l_outbytesleft = outleft;
229        locals.l_errno = 0;
230        cd->fallbacks.uc_to_mb_fallback(wc,
231                                        uc_to_mb_write_replacement,
232                                        &locals,
233                                        cd->fallbacks.data);
234        if (locals.l_errno != 0) {
235          plocals->l_errno = locals.l_errno;
236          break;
237        }
238        outptr = locals.l_outbuf;
239        outleft = locals.l_outbytesleft;
240        outcount = 0;
241        goto outcount_ok;
242      }
243      #endif
244      outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
245      if (outcount != RET_ILUNI)
246        goto outcount_ok;
247      plocals->l_errno = EILSEQ;
248      break;
249    outcount_ok:
250      if (outcount < 0) {
251        plocals->l_errno = E2BIG;
252        break;
253      }
254      #ifndef LIBICONV_PLUG
255      if (cd->hooks.uc_hook)
256        (*cd->hooks.uc_hook)(wc, cd->hooks.data);
257      #endif
258      if (!(outcount <= outleft)) abort();
259      outptr += outcount; outleft -= outcount;
260    outcount_zero: ;
261    }
262    plocals->l_outbuf = outptr;
263    plocals->l_outbytesleft = outleft;
264  }
265}
266
267#endif /* !LIBICONV_PLUG */
268
269static size_t unicode_loop_convert (iconv_t icd,
270                                    const char* * inbuf, size_t *inbytesleft,
271                                    char* * outbuf, size_t *outbytesleft)
272{
273  conv_t cd = (conv_t) icd;
274  size_t result = 0;
275  const unsigned char* inptr = (const unsigned char*) *inbuf;
276  size_t inleft = *inbytesleft;
277  unsigned char* outptr = (unsigned char*) *outbuf;
278  size_t outleft = *outbytesleft;
279  while (inleft > 0) {
280    state_t last_istate = cd->istate;
281    ucs4_t wc;
282    int incount;
283    int outcount;
284    incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
285    if (incount < 0) {
286      if (incount == RET_ILSEQ) {
287        /* Case 1: invalid input */
288        if (cd->discard_ilseq) {
289          switch (cd->iindex) {
290            case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
291            case ei_utf32: case ei_utf32be: case ei_utf32le:
292            case ei_ucs4internal: case ei_ucs4swapped:
293              incount = 4; break;
294            case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
295            case ei_utf16: case ei_utf16be: case ei_utf16le:
296            case ei_ucs2internal: case ei_ucs2swapped:
297              incount = 2; break;
298            default:
299              incount = 1; break;
300          }
301          goto outcount_zero;
302        }
303        #ifndef LIBICONV_PLUG
304        else if (cd->fallbacks.mb_to_uc_fallback != NULL) {
305          struct mb_to_uc_fallback_locals locals;
306          switch (cd->iindex) {
307            case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
308            case ei_utf32: case ei_utf32be: case ei_utf32le:
309            case ei_ucs4internal: case ei_ucs4swapped:
310              incount = 4; break;
311            case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
312            case ei_utf16: case ei_utf16be: case ei_utf16le:
313            case ei_ucs2internal: case ei_ucs2swapped:
314              incount = 2; break;
315            default:
316              incount = 1; break;
317          }
318          locals.l_cd = cd;
319          locals.l_outbuf = outptr;
320          locals.l_outbytesleft = outleft;
321          locals.l_errno = 0;
322          cd->fallbacks.mb_to_uc_fallback(inptr, incount,
323                                          mb_to_uc_write_replacement,
324                                          &locals,
325                                          cd->fallbacks.data);
326          if (locals.l_errno != 0) {
327            errno = locals.l_errno;
328            result = -1;
329            break;
330          }
331          outptr = locals.l_outbuf;
332          outleft = locals.l_outbytesleft;
333          result += 1;
334          goto outcount_zero;
335        }
336        #endif
337        errno = EILSEQ;
338        result = -1;
339        break;
340      }
341      if (incount == RET_TOOFEW(0)) {
342        /* Case 2: not enough bytes available to detect anything */
343        errno = EINVAL;
344        result = -1;
345        break;
346      }
347      /* Case 3: k bytes read, but only a shift sequence */
348      incount = -2-incount;
349    } else {
350      /* Case 4: k bytes read, making up a wide character */
351      if (outleft == 0) {
352        cd->istate = last_istate;
353        errno = E2BIG;
354        result = -1;
355        break;
356      }
357      outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
358      if (outcount != RET_ILUNI)
359        goto outcount_ok;
360      /* Handle Unicode tag characters (range U+E0000..U+E007F). */
361      if ((wc >> 7) == (0xe0000 >> 7))
362        goto outcount_zero;
363      /* Try transliteration. */
364      result++;
365      if (cd->transliterate) {
366        outcount = unicode_transliterate(cd,wc,outptr,outleft);
367        if (outcount != RET_ILUNI)
368          goto outcount_ok;
369      }
370      if (cd->discard_ilseq) {
371        outcount = 0;
372        goto outcount_ok;
373      }
374      #ifndef LIBICONV_PLUG
375      else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
376        struct uc_to_mb_fallback_locals locals;
377        locals.l_outbuf = outptr;
378        locals.l_outbytesleft = outleft;
379        locals.l_errno = 0;
380        cd->fallbacks.uc_to_mb_fallback(wc,
381                                        uc_to_mb_write_replacement,
382                                        &locals,
383                                        cd->fallbacks.data);
384        if (locals.l_errno != 0) {
385          cd->istate = last_istate;
386          errno = locals.l_errno;
387          return -1;
388        }
389        outptr = locals.l_outbuf;
390        outleft = locals.l_outbytesleft;
391        outcount = 0;
392        goto outcount_ok;
393      }
394      #endif
395      outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
396      if (outcount != RET_ILUNI)
397        goto outcount_ok;
398      cd->istate = last_istate;
399      errno = EILSEQ;
400      result = -1;
401      break;
402    outcount_ok:
403      if (outcount < 0) {
404        cd->istate = last_istate;
405        errno = E2BIG;
406        result = -1;
407        break;
408      }
409      #ifndef LIBICONV_PLUG
410      if (cd->hooks.uc_hook)
411        (*cd->hooks.uc_hook)(wc, cd->hooks.data);
412      #endif
413      if (!(outcount <= outleft)) abort();
414      outptr += outcount; outleft -= outcount;
415    }
416  outcount_zero:
417    if (!(incount <= inleft)) abort();
418    inptr += incount; inleft -= incount;
419  }
420  *inbuf = (const char*) inptr;
421  *inbytesleft = inleft;
422  *outbuf = (char*) outptr;
423  *outbytesleft = outleft;
424  return result;
425}
426
427static size_t unicode_loop_reset (iconv_t icd,
428                                  char* * outbuf, size_t *outbytesleft)
429{
430  conv_t cd = (conv_t) icd;
431  if (outbuf == NULL || *outbuf == NULL) {
432    /* Reset the states. */
433    memset(&cd->istate,'\0',sizeof(state_t));
434    memset(&cd->ostate,'\0',sizeof(state_t));
435    return 0;
436  } else {
437    size_t result = 0;
438    if (cd->ifuncs.xxx_flushwc) {
439      state_t last_istate = cd->istate;
440      ucs4_t wc;
441      if (cd->ifuncs.xxx_flushwc(cd, &wc)) {
442        unsigned char* outptr = (unsigned char*) *outbuf;
443        size_t outleft = *outbytesleft;
444        int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
445        if (outcount != RET_ILUNI)
446          goto outcount_ok;
447        /* Handle Unicode tag characters (range U+E0000..U+E007F). */
448        if ((wc >> 7) == (0xe0000 >> 7))
449          goto outcount_zero;
450        /* Try transliteration. */
451        result++;
452        if (cd->transliterate) {
453          outcount = unicode_transliterate(cd,wc,outptr,outleft);
454          if (outcount != RET_ILUNI)
455            goto outcount_ok;
456        }
457        if (cd->discard_ilseq) {
458          outcount = 0;
459          goto outcount_ok;
460        }
461        #ifndef LIBICONV_PLUG
462        else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
463          struct uc_to_mb_fallback_locals locals;
464          locals.l_outbuf = outptr;
465          locals.l_outbytesleft = outleft;
466          locals.l_errno = 0;
467          cd->fallbacks.uc_to_mb_fallback(wc,
468                                          uc_to_mb_write_replacement,
469                                          &locals,
470                                          cd->fallbacks.data);
471          if (locals.l_errno != 0) {
472            cd->istate = last_istate;
473            errno = locals.l_errno;
474            return -1;
475          }
476          outptr = locals.l_outbuf;
477          outleft = locals.l_outbytesleft;
478          outcount = 0;
479          goto outcount_ok;
480        }
481        #endif
482        outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
483        if (outcount != RET_ILUNI)
484          goto outcount_ok;
485        cd->istate = last_istate;
486        errno = EILSEQ;
487        return -1;
488      outcount_ok:
489        if (outcount < 0) {
490          cd->istate = last_istate;
491          errno = E2BIG;
492          return -1;
493        }
494        #ifndef LIBICONV_PLUG
495        if (cd->hooks.uc_hook)
496          (*cd->hooks.uc_hook)(wc, cd->hooks.data);
497        #endif
498        if (!(outcount <= outleft)) abort();
499        outptr += outcount;
500        outleft -= outcount;
501      outcount_zero:
502        *outbuf = (char*) outptr;
503        *outbytesleft = outleft;
504      }
505    }
506    if (cd->ofuncs.xxx_reset) {
507      unsigned char* outptr = (unsigned char*) *outbuf;
508      size_t outleft = *outbytesleft;
509      int outcount = cd->ofuncs.xxx_reset(cd,outptr,outleft);
510      if (outcount < 0) {
511        errno = E2BIG;
512        return -1;
513      }
514      if (!(outcount <= outleft)) abort();
515      *outbuf = (char*) (outptr + outcount);
516      *outbytesleft = outleft - outcount;
517    }
518    memset(&cd->istate,'\0',sizeof(state_t));
519    memset(&cd->ostate,'\0',sizeof(state_t));
520    return result;
521  }
522}
523