xlat16_iconv.c revision 123293
1120492Sfjoe/*- 2120492Sfjoe * Copyright (c) 2003 Ryuichiro Imura 3120492Sfjoe * All rights reserved. 4120492Sfjoe * 5120492Sfjoe * Redistribution and use in source and binary forms, with or without 6120492Sfjoe * modification, are permitted provided that the following conditions 7120492Sfjoe * are met: 8120492Sfjoe * 1. Redistributions of source code must retain the above copyright 9120492Sfjoe * notice, this list of conditions and the following disclaimer. 10120492Sfjoe * 2. Redistributions in binary form must reproduce the above copyright 11120492Sfjoe * notice, this list of conditions and the following disclaimer in the 12120492Sfjoe * documentation and/or other materials provided with the distribution. 13120492Sfjoe * 14120492Sfjoe * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15120492Sfjoe * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16120492Sfjoe * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17120492Sfjoe * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18120492Sfjoe * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19120492Sfjoe * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20120492Sfjoe * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21120492Sfjoe * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22120492Sfjoe * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23120492Sfjoe * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24120492Sfjoe * SUCH DAMAGE. 25120492Sfjoe * 26120492Sfjoe * $FreeBSD: head/lib/libkiconv/xlat16_iconv.c 123293 2003-12-08 08:32:20Z fjoe $ 27120492Sfjoe */ 28120492Sfjoe 29120492Sfjoe/* 30120492Sfjoe * kiconv(3) requires shared linked, and reduce module size 31120492Sfjoe * when statically linked. 32120492Sfjoe */ 33120492Sfjoe 34120492Sfjoe#ifdef PIC 35120492Sfjoe 36120492Sfjoe#include <sys/types.h> 37120492Sfjoe#include <sys/iconv.h> 38120492Sfjoe#include <sys/sysctl.h> 39120492Sfjoe 40120492Sfjoe#include <ctype.h> 41120492Sfjoe#include <dlfcn.h> 42120492Sfjoe#include <err.h> 43120492Sfjoe#include <errno.h> 44120492Sfjoe#include <stdio.h> 45120492Sfjoe#include <stdlib.h> 46120492Sfjoe#include <string.h> 47120492Sfjoe 48120492Sfjoe#include "quirks.h" 49120492Sfjoe 50120492Sfjoetypedef void *iconv_t; 51120492Sfjoe 52120492Sfjoestruct xlat16_table { 53120492Sfjoe uint32_t * idx[0x200]; 54120492Sfjoe void * data; 55120492Sfjoe size_t size; 56120492Sfjoe}; 57120492Sfjoe 58120492Sfjoestatic struct xlat16_table kiconv_xlat16_open(const char *, const char *, int); 59120492Sfjoe 60120492Sfjoestatic int my_iconv_init(void); 61120492Sfjoestatic iconv_t (*my_iconv_open)(const char *, const char *); 62120492Sfjoestatic size_t (*my_iconv)(iconv_t, const char **, size_t *, char **, size_t *); 63120492Sfjoestatic int (*my_iconv_close)(iconv_t); 64120492Sfjoestatic size_t my_iconv_char(iconv_t, const u_char **, size_t *, u_char **, size_t *); 65120492Sfjoe 66120492Sfjoeint 67120492Sfjoekiconv_add_xlat16_cspair(const char *tocode, const char *fromcode, int flag) 68120492Sfjoe{ 69120492Sfjoe int error; 70120492Sfjoe size_t i, size, idxsize; 71120492Sfjoe struct iconv_cspair_info *csi; 72120492Sfjoe struct xlat16_table xt; 73120492Sfjoe void *data, *p; 74120492Sfjoe 75120492Sfjoe if (sysctlbyname("kern.iconv.cslist", NULL, &size, NULL, 0) == -1) 76120492Sfjoe return (-1); 77120492Sfjoe if (size > 0) { 78120492Sfjoe csi = malloc(size); 79120492Sfjoe if (csi == NULL) 80120492Sfjoe return (-1); 81120492Sfjoe if (sysctlbyname("kern.iconv.cslist", csi, &size, NULL, 0) == -1) { 82120492Sfjoe free(csi); 83120492Sfjoe return (-1); 84120492Sfjoe } 85120492Sfjoe for (i = 0; i < (size/sizeof(*csi)); i++, csi++){ 86120492Sfjoe if (strcmp(csi->cs_to, tocode) == 0 && 87120492Sfjoe strcmp(csi->cs_from, fromcode) == 0) 88120492Sfjoe return (0); 89120492Sfjoe } 90120492Sfjoe } 91120492Sfjoe 92120492Sfjoe xt = kiconv_xlat16_open(tocode, fromcode, flag); 93120492Sfjoe if (xt.size == 0) 94120492Sfjoe return (-1); 95120492Sfjoe 96120492Sfjoe idxsize = sizeof(xt.idx); 97120492Sfjoe 98120492Sfjoe if ((idxsize + xt.size) > ICONV_CSMAXDATALEN) { 99120492Sfjoe errno = E2BIG; 100120492Sfjoe return (-1); 101120492Sfjoe } 102120492Sfjoe 103120492Sfjoe if ((data = malloc(idxsize + xt.size)) != NULL) { 104120492Sfjoe p = data; 105120492Sfjoe memcpy(p, xt.idx, idxsize); 106120492Sfjoe p += idxsize; 107120492Sfjoe memcpy(p, xt.data, xt.size); 108120492Sfjoe error = kiconv_add_xlat16_table(tocode, fromcode, data, 109120492Sfjoe (int)(idxsize + xt.size)); 110120492Sfjoe return (error); 111120492Sfjoe } 112120492Sfjoe 113120492Sfjoe return (-1); 114120492Sfjoe} 115120492Sfjoe 116123293Sfjoeint 117123293Sfjoekiconv_add_xlat16_cspairs(const char *foreigncode, const char *localcode) 118123293Sfjoe{ 119123293Sfjoe int error; 120123293Sfjoe 121123293Sfjoe error = kiconv_add_xlat16_cspair(foreigncode, localcode, 122123293Sfjoe KICONV_FROM_LOWER | KICONV_FROM_UPPER); 123123293Sfjoe if (error) 124123293Sfjoe return (error); 125123293Sfjoe error = kiconv_add_xlat16_cspair(localcode, foreigncode, 126123293Sfjoe KICONV_LOWER | KICONV_UPPER); 127123293Sfjoe if (error) 128123293Sfjoe return (error); 129123293Sfjoe 130123293Sfjoe return (0); 131123293Sfjoe} 132123293Sfjoe 133120492Sfjoestatic struct xlat16_table 134120492Sfjoekiconv_xlat16_open(const char *tocode, const char *fromcode, int lcase) 135120492Sfjoe{ 136120492Sfjoe u_char src[3], dst[4], *srcp, *dstp, ud, ld; 137120492Sfjoe int us, ls, ret; 138120492Sfjoe uint16_t c; 139120492Sfjoe uint32_t table[0x80]; 140120492Sfjoe size_t inbytesleft, outbytesleft, pre_q_size, post_q_size; 141120492Sfjoe struct xlat16_table xt; 142120492Sfjoe struct quirk_replace_list *pre_q_list, *post_q_list; 143120492Sfjoe iconv_t cd; 144120492Sfjoe void *p; 145120492Sfjoe 146120492Sfjoe xt.data = NULL; 147120492Sfjoe xt.size = 0; 148120492Sfjoe 149120492Sfjoe src[2] = NULL; 150120492Sfjoe dst[3] = NULL; 151120492Sfjoe 152120492Sfjoe ret = my_iconv_init(); 153120492Sfjoe if (ret) 154120492Sfjoe return (xt); 155120492Sfjoe 156120492Sfjoe cd = my_iconv_open(search_quirk(tocode, fromcode, &pre_q_list, &pre_q_size), 157120492Sfjoe search_quirk(fromcode, tocode, &post_q_list, &post_q_size)); 158120492Sfjoe if (cd == (iconv_t) (-1)) 159120492Sfjoe return (xt); 160120492Sfjoe 161120492Sfjoe if ((xt.data = malloc(0x200 * 0x80 * sizeof(uint32_t))) == NULL) 162120492Sfjoe return (xt); 163120492Sfjoe 164120492Sfjoe p = xt.data; 165120492Sfjoe 166120492Sfjoe for (ls = 0 ; ls < 0x200 ; ls++) { 167120492Sfjoe xt.idx[ls] = NULL; 168120492Sfjoe for (us = 0 ; us < 0x80 ; us++) { 169120492Sfjoe srcp = src; 170120492Sfjoe dstp = dst; 171120492Sfjoe 172120492Sfjoe inbytesleft = 2; 173120492Sfjoe outbytesleft = 3; 174120492Sfjoe bzero(dst, outbytesleft); 175120492Sfjoe 176120492Sfjoe c = ((ls & 0x100 ? us | 0x80 : us) << 8) | (u_char)ls; 177120492Sfjoe c = quirk_vendor2unix(c, pre_q_list, pre_q_size); 178120492Sfjoe src[0] = (u_char)(c >> 8); 179120492Sfjoe src[1] = (u_char)c; 180120492Sfjoe 181120492Sfjoe ret = my_iconv_char(cd, (const u_char **)&srcp, 182120492Sfjoe &inbytesleft, &dstp, &outbytesleft); 183120492Sfjoe if (ret == -1) { 184120492Sfjoe table[us] = 0; 185120492Sfjoe continue; 186120492Sfjoe } 187120492Sfjoe 188120492Sfjoe ud = (u_char)dst[0]; 189120492Sfjoe ld = (u_char)dst[1]; 190120492Sfjoe 191120492Sfjoe switch(outbytesleft) { 192120492Sfjoe case 0: 193120492Sfjoe#ifdef XLAT16_ACCEPT_3BYTE_CHR 194120492Sfjoe table[us] = (ud << 8) | ld; 195120492Sfjoe table[us] |= (u_char)dst[2] << 16; 196120492Sfjoe table[us] |= XLAT16_IS_3BYTE_CHR; 197120492Sfjoe#else 198120492Sfjoe table[us] = 0; 199120492Sfjoe continue; 200120492Sfjoe#endif 201120492Sfjoe break; 202120492Sfjoe case 1: 203120492Sfjoe table[us] = quirk_unix2vendor((ud << 8) | ld, 204120492Sfjoe post_q_list, post_q_size); 205120492Sfjoe if ((table[us] >> 8) == 0) 206120492Sfjoe table[us] |= XLAT16_ACCEPT_NULL_OUT; 207120492Sfjoe break; 208120492Sfjoe case 2: 209120492Sfjoe table[us] = ud; 210120492Sfjoe if (lcase & KICONV_LOWER && ud != tolower(ud)) { 211120492Sfjoe table[us] |= (u_char)tolower(ud) << 16; 212120492Sfjoe table[us] |= XLAT16_HAS_LOWER_CASE; 213120492Sfjoe } 214120492Sfjoe if (lcase & KICONV_UPPER && ud != toupper(ud)) { 215120492Sfjoe table[us] |= (u_char)toupper(ud) << 16; 216120492Sfjoe table[us] |= XLAT16_HAS_UPPER_CASE; 217120492Sfjoe } 218120492Sfjoe break; 219120492Sfjoe } 220120492Sfjoe 221120492Sfjoe switch(inbytesleft) { 222120492Sfjoe case 0: 223120492Sfjoe if ((ls & 0xff) == 0) 224120492Sfjoe table[us] |= XLAT16_ACCEPT_NULL_IN; 225120492Sfjoe break; 226120492Sfjoe case 1: 227120492Sfjoe c = ls > 0xff ? us | 0x80 : us; 228120492Sfjoe if (lcase & KICONV_FROM_LOWER && c != tolower(c)) { 229120492Sfjoe table[us] |= (u_char)tolower(c) << 16; 230120492Sfjoe table[us] |= XLAT16_HAS_FROM_LOWER_CASE; 231120492Sfjoe } 232120492Sfjoe if (lcase & KICONV_FROM_UPPER && c != toupper(c)) { 233120492Sfjoe table[us] |= (u_char)toupper(c) << 16; 234120492Sfjoe table[us] |= XLAT16_HAS_FROM_UPPER_CASE; 235120492Sfjoe } 236120492Sfjoe break; 237120492Sfjoe } 238120492Sfjoe 239120492Sfjoe if (table[us] == 0) 240120492Sfjoe continue; 241120492Sfjoe 242120492Sfjoe /* 243120492Sfjoe * store not NULL 244120492Sfjoe */ 245120492Sfjoe xt.idx[ls] = table; 246120492Sfjoe } 247120492Sfjoe if (xt.idx[ls]) { 248120492Sfjoe memcpy(p, table, sizeof(table)); 249120492Sfjoe p += sizeof(table); 250120492Sfjoe } 251120492Sfjoe } 252120492Sfjoe my_iconv_close(cd); 253120492Sfjoe 254120492Sfjoe xt.size = p - xt.data; 255120492Sfjoe xt.data = realloc(xt.data, xt.size); 256120492Sfjoe return (xt); 257120492Sfjoe} 258120492Sfjoe 259120492Sfjoestatic int 260120492Sfjoemy_iconv_init(void) 261120492Sfjoe{ 262120492Sfjoe void *iconv_lib; 263120492Sfjoe 264120492Sfjoe iconv_lib = dlopen("libiconv.so", RTLD_LAZY | RTLD_GLOBAL); 265120492Sfjoe if (iconv_lib == NULL) { 266120492Sfjoe warn("Unable to load iconv library: %s\n", dlerror()); 267120492Sfjoe errno = ENOENT; 268120492Sfjoe return (-1); 269120492Sfjoe } 270120492Sfjoe my_iconv_open = dlsym(iconv_lib, "iconv_open"); 271120492Sfjoe my_iconv = dlsym(iconv_lib, "iconv"); 272120492Sfjoe my_iconv_close = dlsym(iconv_lib, "iconv_close"); 273120492Sfjoe 274120492Sfjoe return (0); 275120492Sfjoe} 276120492Sfjoe 277120492Sfjoestatic size_t 278120492Sfjoemy_iconv_char(iconv_t cd, const u_char **ibuf, size_t * ilen, u_char **obuf, 279120492Sfjoe size_t * olen) 280120492Sfjoe{ 281120492Sfjoe const u_char *sp; 282120492Sfjoe u_char *dp, ilocal[3], olocal[3]; 283120492Sfjoe u_char c1, c2; 284120492Sfjoe int ret; 285120492Sfjoe size_t ir, or; 286120492Sfjoe 287120492Sfjoe sp = *ibuf; 288120492Sfjoe dp = *obuf; 289120492Sfjoe ir = *ilen; 290120492Sfjoe 291120492Sfjoe bzero(*obuf, *olen); 292120492Sfjoe ret = my_iconv(cd, (const char **)&sp, ilen, (char **)&dp, olen); 293120492Sfjoe c1 = (*obuf)[0]; 294120492Sfjoe c2 = (*obuf)[1]; 295120492Sfjoe 296120492Sfjoe if (ret == -1) { 297120492Sfjoe if (*ilen == ir - 1 && (*ibuf)[1] == '\0' && (c1 || c2)) 298120492Sfjoe return (0); 299120492Sfjoe else 300120492Sfjoe return (-1); 301120492Sfjoe } 302120492Sfjoe 303120492Sfjoe /* 304120492Sfjoe * We must judge if inbuf is a single byte char or double byte char. 305120492Sfjoe * Here, to judge, try first byte(*sp) conversion and compare. 306120492Sfjoe */ 307120492Sfjoe ir = 1; 308120492Sfjoe or = 3; 309120492Sfjoe 310120492Sfjoe bzero(olocal, or); 311120492Sfjoe memcpy(ilocal, *ibuf, sizeof(ilocal)); 312120492Sfjoe sp = ilocal; 313120492Sfjoe dp = olocal; 314120492Sfjoe 315120492Sfjoe if ((my_iconv(cd,(const char **)&sp, &ir, (char **)&dp, &or)) != -1) { 316120492Sfjoe if (olocal[0] != c1) 317120492Sfjoe return (ret); 318120492Sfjoe 319120492Sfjoe if (olocal[1] == c2 && (*ibuf)[1] == '\0') { 320120492Sfjoe /* 321120492Sfjoe * inbuf is a single byte char 322120492Sfjoe */ 323120492Sfjoe *ilen = 1; 324120492Sfjoe *olen = or; 325120492Sfjoe return (ret); 326120492Sfjoe } 327120492Sfjoe 328120492Sfjoe switch(or) { 329120492Sfjoe case 0: 330120492Sfjoe case 1: 331120492Sfjoe if (olocal[1] == c2) { 332120492Sfjoe /* 333120492Sfjoe * inbuf is a single byte char, 334120492Sfjoe * so return false here. 335120492Sfjoe */ 336120492Sfjoe return (-1); 337120492Sfjoe } else { 338120492Sfjoe /* 339120492Sfjoe * inbuf is a double byte char 340120492Sfjoe */ 341120492Sfjoe return (ret); 342120492Sfjoe } 343120492Sfjoe break; 344120492Sfjoe case 2: 345120492Sfjoe /* 346120492Sfjoe * should compare second byte of inbuf 347120492Sfjoe */ 348120492Sfjoe break; 349120492Sfjoe } 350120492Sfjoe } else { 351120492Sfjoe /* 352120492Sfjoe * inbuf clould not be splitted, so inbuf is 353120492Sfjoe * a double byte char. 354120492Sfjoe */ 355120492Sfjoe return (ret); 356120492Sfjoe } 357120492Sfjoe 358120492Sfjoe /* 359120492Sfjoe * try second byte(*(sp+1)) conversion, and compare 360120492Sfjoe */ 361120492Sfjoe ir = 1; 362120492Sfjoe or = 3; 363120492Sfjoe 364120492Sfjoe bzero(olocal, or); 365120492Sfjoe 366120492Sfjoe sp = ilocal + 1; 367120492Sfjoe dp = olocal; 368120492Sfjoe 369120492Sfjoe if ((my_iconv(cd,(const char **)&sp, &ir, (char **)&dp, &or)) != -1) { 370120492Sfjoe if (olocal[0] == c2) 371120492Sfjoe /* 372120492Sfjoe * inbuf is a single byte char 373120492Sfjoe */ 374120492Sfjoe return (-1); 375120492Sfjoe } 376120492Sfjoe 377120492Sfjoe return (ret); 378120492Sfjoe} 379120492Sfjoe 380120492Sfjoe#else /* statically linked */ 381120492Sfjoe 382120492Sfjoe#include <errno.h> 383120492Sfjoe 384120492Sfjoeint 385120492Sfjoekiconv_add_xlat16_cspair(const char *tocode, const char *fromcode, int flag) 386120492Sfjoe{ 387120492Sfjoe errno = EINVAL; 388120492Sfjoe return (-1); 389120492Sfjoe} 390120492Sfjoe 391123293Sfjoeint 392123293Sfjoekiconv_add_xlat16_cspairs(const char *tocode, const char *fromcode) 393123293Sfjoe{ 394123293Sfjoe errno = EINVAL; 395123293Sfjoe return (-1); 396123293Sfjoe} 397123293Sfjoe 398120492Sfjoe#endif /* PIC */ 399