12116Sjkh/*- 22116Sjkh * Copyright (c) 2003 Ryuichiro Imura 32116Sjkh * All rights reserved. 42116Sjkh * 52116Sjkh * Redistribution and use in source and binary forms, with or without 62116Sjkh * modification, are permitted provided that the following conditions 72116Sjkh * are met: 82116Sjkh * 1. Redistributions of source code must retain the above copyright 92116Sjkh * notice, this list of conditions and the following disclaimer. 102116Sjkh * 2. Redistributions in binary form must reproduce the above copyright 118870Srgrimes * notice, this list of conditions and the following disclaimer in the 122116Sjkh * documentation and/or other materials provided with the distribution. 132116Sjkh * 142116Sjkh * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 152116Sjkh * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16176451Sdas * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17176451Sdas * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 182116Sjkh * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 192116Sjkh * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 202116Sjkh * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 212116Sjkh * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 228870Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 232116Sjkh * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 242116Sjkh * SUCH DAMAGE. 252116Sjkh * 26181100Sdas * $FreeBSD$ 27181100Sdas */ 28181100Sdas 29181100Sdas/* 302116Sjkh * kiconv(3) requires shared linked, and reduce module size 31181100Sdas * when statically linked. 32181100Sdas */ 33181100Sdas 3497407Salfred#ifdef PIC 3597407Salfred 362116Sjkh/* 37181100Sdas * Why do we need quirks? 38181405Sdas * Since each vendors has their own Unicode mapping rules, 392116Sjkh * we need some quirks until iconv(3) supports them. 402116Sjkh * We can define Microsoft mappings here. 412116Sjkh * 42181257Sdas * For example, the eucJP and Unocode mapping rule is based on 43181257Sdas * the JIS standard. Since Microsoft uses cp932 for Unicode mapping 44181257Sdas * witch is not truly based on the JIS standard, reading a file 458870Srgrimes * system created by Microsoft Windows family using eucJP/Unicode 462116Sjkh * mapping rule will cause a problem. That's why we define eucJP-ms here. 47181257Sdas * The eucJP-ms has been defined by The Open Group Japan Vendor Coucil. 482116Sjkh * 49181257Sdas * Well, Apple Mac OS also has their own Unicode mappings, 50181257Sdas * but we won't require these quirks here, because HFS doesn't have 51181257Sdas * Unicode and HFS+ has decomposed Unicode which can not be 52181257Sdas * handled by this xlat16 converter. 53181257Sdas */ 54181257Sdas 552116Sjkh#include <sys/types.h> 562116Sjkh#include <sys/iconv.h> 572116Sjkh 582116Sjkh#include <stdio.h> 59181100Sdas#include <string.h> 60181100Sdas 61181257Sdas#include "quirks.h" 62181100Sdas 63181100Sdas/* 648870Srgrimes * All lists of quirk character set 652116Sjkh */ 66static struct { 67 int vendor; /* reserved for non MS mapping */ 68 const char *base_codeset, *quirk_codeset; 69} quirk_list[] = { 70 { KICONV_VENDOR_MICSFT, "eucJP", "eucJP-ms" }, 71 { KICONV_VENDOR_MICSFT, "EUC-JP", "eucJP-ms" }, 72 { KICONV_VENDOR_MICSFT, "SJIS", "SJIS-ms" }, 73 { KICONV_VENDOR_MICSFT, "Shift_JIS", "SJIS-ms" }, 74 { KICONV_VENDOR_MICSFT, "Big5", "Big5-ms" } 75}; 76 77/* 78 * The character list to replace for Japanese MS-Windows. 79 */ 80static struct quirk_replace_list quirk_jis_cp932[] = { 81 { 0x00a2, 0xffe0 }, /* Cent Sign, Fullwidth Cent Sign */ 82 { 0x00a3, 0xffe1 }, /* Pound Sign, Fullwidth Pound Sign */ 83 { 0x00ac, 0xffe2 }, /* Not Sign, Fullwidth Not Sign */ 84 { 0x2016, 0x2225 }, /* Double Vertical Line, Parallel To */ 85 { 0x203e, 0x007e }, /* Overline, Tilde */ 86 { 0x2212, 0xff0d }, /* Minus Sign, Fullwidth Hyphenminus */ 87 { 0x301c, 0xff5e } /* Wave Dash, Fullwidth Tilde */ 88}; 89 90/* 91 * All entries of quirks 92 */ 93#define NumOf(n) (sizeof((n)) / sizeof((n)[0])) 94static struct { 95 const char *quirk_codeset, *iconv_codeset, *pair_codeset; 96 struct quirk_replace_list (*replace_list)[]; 97 size_t num_of_replaces; 98} quirk_table[] = { 99 { 100 "eucJP-ms", "eucJP", ENCODING_UNICODE, 101 (struct quirk_replace_list (*)[])&quirk_jis_cp932, 102 NumOf(quirk_jis_cp932) 103 }, 104 { 105 "SJIS-ms", "CP932", ENCODING_UNICODE, 106 /* XXX - quirk_replace_list should be NULL */ 107 (struct quirk_replace_list (*)[])&quirk_jis_cp932, 108 NumOf(quirk_jis_cp932) 109 }, 110 { 111 "Big5-ms", "CP950", ENCODING_UNICODE, 112 NULL, 0 113 } 114}; 115 116 117const char * 118kiconv_quirkcs(const char* base, int vendor) 119{ 120 size_t i; 121 122 /* 123 * We should compare codeset names ignoring case here, 124 * so that quirk could be used for all of the user input 125 * patterns. 126 */ 127 for (i = 0; i < NumOf(quirk_list); i++) 128 if (quirk_list[i].vendor == vendor && 129 strcasecmp(quirk_list[i].base_codeset, base) == 0) 130 return (quirk_list[i].quirk_codeset); 131 132 return (base); 133} 134 135/* 136 * Internal Functions 137 */ 138const char * 139search_quirk(const char *given_codeset, 140 const char *pair_codeset, 141 struct quirk_replace_list **replace_list, 142 size_t *num_of_replaces) 143{ 144 size_t i; 145 146 *replace_list = NULL; 147 *num_of_replaces = 0; 148 for (i = 0; i < NumOf(quirk_table); i++) 149 if (strcmp(quirk_table[i].quirk_codeset, given_codeset) == 0) { 150 if (strcmp(quirk_table[i].pair_codeset, pair_codeset) == 0) { 151 *replace_list = *quirk_table[i].replace_list; 152 *num_of_replaces = quirk_table[i].num_of_replaces; 153 } 154 return (quirk_table[i].iconv_codeset); 155 } 156 157 return (given_codeset); 158} 159 160uint16_t 161quirk_vendor2unix(uint16_t c, struct quirk_replace_list *replace_list, size_t num) 162{ 163 size_t i; 164 165 for (i = 0; i < num; i++) 166 if (replace_list[i].vendor_code == c) 167 return (replace_list[i].standard_code); 168 169 return (c); 170} 171 172uint16_t 173quirk_unix2vendor(uint16_t c, struct quirk_replace_list *replace_list, size_t num) 174{ 175 size_t i; 176 177 for (i = 0; i < num; i++) 178 if (replace_list[i].standard_code == c) 179 return (replace_list[i].vendor_code); 180 181 return (c); 182} 183 184#else /* statically linked */ 185 186#include <sys/types.h> 187#include <sys/iconv.h> 188 189const char * 190kiconv_quirkcs(const char* base __unused, int vendor __unused) 191{ 192 193 return (base); 194} 195 196#endif /* PIC */ 197