1/*- 2 * Copyright (c) 2011 Michihiro NAKAJIMA 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25#include "test.h" 26#include <locale.h> 27 28DEFINE_TEST(test_gnutar_filename_encoding_UTF8_CP866) 29{ 30 struct archive *a; 31 struct archive_entry *entry; 32 char buff[4096]; 33 size_t used; 34 35 if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { 36 skipping("en_US.UTF-8 locale not available on this system."); 37 return; 38 } 39 40 /* 41 * Verify that UTF-8 filenames are correctly translated into CP866 42 * and stored with hdrcharset=CP866 option. 43 */ 44 a = archive_write_new(); 45 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 46 if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) { 47 skipping("This system cannot convert character-set" 48 " from UTF-8 to CP866."); 49 archive_write_free(a); 50 return; 51 } 52 assertEqualInt(ARCHIVE_OK, 53 archive_write_open_memory(a, buff, sizeof(buff), &used)); 54 55 entry = archive_entry_new2(a); 56 /* Set a UTF-8 filename. */ 57 archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8"); 58 archive_entry_set_filetype(entry, AE_IFREG); 59 archive_entry_set_size(entry, 0); 60 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 61 archive_entry_free(entry); 62 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 63 64 /* Above three characters in UTF-8 should translate to the following 65 * three characters in CP866. */ 66 assertEqualMem(buff, "\xAF\xE0\xA8", 3); 67} 68 69DEFINE_TEST(test_gnutar_filename_encoding_KOI8R_UTF8) 70{ 71 struct archive *a; 72 struct archive_entry *entry; 73 char buff[4096]; 74 size_t used; 75 76 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 77 skipping("KOI8-R locale not available on this system."); 78 return; 79 } 80 81 /* 82 * Verify that KOI8-R filenames are correctly translated into UTF-8 83 * and stored with hdrcharset=UTF-8 option. 84 */ 85 a = archive_write_new(); 86 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 87 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 88 skipping("This system cannot convert character-set" 89 " from KOI8-R to UTF-8."); 90 archive_write_free(a); 91 return; 92 } 93 assertEqualInt(ARCHIVE_OK, 94 archive_write_open_memory(a, buff, sizeof(buff), &used)); 95 96 entry = archive_entry_new2(a); 97 /* Set a KOI8-R filename. */ 98 archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 99 archive_entry_set_filetype(entry, AE_IFREG); 100 archive_entry_set_size(entry, 0); 101 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 102 archive_entry_free(entry); 103 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 104 105 /* Above three characters in KOI8-R should translate to the following 106 * three characters (two bytes each) in UTF-8. */ 107 assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 108} 109 110DEFINE_TEST(test_gnutar_filename_encoding_KOI8R_CP866) 111{ 112 struct archive *a; 113 struct archive_entry *entry; 114 char buff[4096]; 115 size_t used; 116 117 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 118 skipping("KOI8-R locale not available on this system."); 119 return; 120 } 121 122 /* 123 * Verify that KOI8-R filenames are correctly translated into CP866 124 * and stored with hdrcharset=CP866 option. 125 */ 126 a = archive_write_new(); 127 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 128 if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) { 129 skipping("This system cannot convert character-set" 130 " from KOI8-R to CP866."); 131 archive_write_free(a); 132 return; 133 } 134 assertEqualInt(ARCHIVE_OK, 135 archive_write_open_memory(a, buff, sizeof(buff), &used)); 136 137 entry = archive_entry_new2(a); 138 /* Set a KOI8-R filename. */ 139 archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 140 archive_entry_set_filetype(entry, AE_IFREG); 141 archive_entry_set_size(entry, 0); 142 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 143 archive_entry_free(entry); 144 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 145 146 /* Above three characters in KOI8-R should translate to the following 147 * three characters in CP866. */ 148 assertEqualMem(buff, "\xAF\xE0\xA8", 3); 149} 150 151DEFINE_TEST(test_gnutar_filename_encoding_CP1251_UTF8) 152{ 153 struct archive *a; 154 struct archive_entry *entry; 155 char buff[4096]; 156 size_t used; 157 158 if (NULL == setlocale(LC_ALL, "Russian_Russia") && 159 NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 160 skipping("KOI8-R locale not available on this system."); 161 return; 162 } 163 164 /* 165 * Verify that CP1251 filenames are correctly translated into UTF-8 166 * and stored with hdrcharset=UTF-8 option. 167 */ 168 a = archive_write_new(); 169 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 170 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 171 skipping("This system cannot convert character-set" 172 " from KOI8-R to UTF-8."); 173 archive_write_free(a); 174 return; 175 } 176 assertEqualInt(ARCHIVE_OK, 177 archive_write_open_memory(a, buff, sizeof(buff), &used)); 178 179 entry = archive_entry_new2(a); 180 /* Set a KOI8-R filename. */ 181 archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 182 archive_entry_set_filetype(entry, AE_IFREG); 183 archive_entry_set_size(entry, 0); 184 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 185 archive_entry_free(entry); 186 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 187 188 /* Above three characters in CP1251 should translate to the following 189 * three characters (two bytes each) in UTF-8. */ 190 assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 191} 192 193/* 194 * Do not translate CP1251 into CP866 if non Windows platform. 195 */ 196DEFINE_TEST(test_gnutar_filename_encoding_ru_RU_CP1251) 197{ 198 struct archive *a; 199 struct archive_entry *entry; 200 char buff[4096]; 201 size_t used; 202 203 if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 204 skipping("KOI8-R locale not available on this system."); 205 return; 206 } 207 208 /* 209 * Verify that CP1251 filenames are not translated into any 210 * other character-set, in particular, CP866. 211 */ 212 a = archive_write_new(); 213 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 214 assertEqualInt(ARCHIVE_OK, 215 archive_write_open_memory(a, buff, sizeof(buff), &used)); 216 217 entry = archive_entry_new2(a); 218 /* Set a KOI8-R filename. */ 219 archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 220 archive_entry_set_filetype(entry, AE_IFREG); 221 archive_entry_set_size(entry, 0); 222 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 223 archive_entry_free(entry); 224 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 225 226 /* Above three characters in CP1251 should not translate to 227 * any other character-set. */ 228 assertEqualMem(buff, "\xEF\xF0\xE8", 3); 229} 230 231/* 232 * Other archiver applications on Windows translate CP1251 filenames 233 * into CP866 filenames and store it in the gnutar file. 234 * Test above behavior works well. 235 */ 236DEFINE_TEST(test_gnutar_filename_encoding_Russian_Russia) 237{ 238 struct archive *a; 239 struct archive_entry *entry; 240 char buff[4096]; 241 size_t used; 242 243 if (NULL == setlocale(LC_ALL, "Russian_Russia")) { 244 skipping("Russian_Russia locale not available on this system."); 245 return; 246 } 247 248 /* 249 * Verify that Russian_Russia(CP1251) filenames are correctly translated 250 * to CP866. 251 */ 252 a = archive_write_new(); 253 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 254 assertEqualInt(ARCHIVE_OK, 255 archive_write_open_memory(a, buff, sizeof(buff), &used)); 256 257 entry = archive_entry_new2(a); 258 /* Set a CP1251 filename. */ 259 archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 260 archive_entry_set_filetype(entry, AE_IFREG); 261 archive_entry_set_size(entry, 0); 262 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 263 archive_entry_free(entry); 264 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 265 266 /* Above three characters in CP1251 should translate to the following 267 * three characters in CP866. */ 268 assertEqualMem(buff, "\xAF\xE0\xA8", 3); 269} 270 271DEFINE_TEST(test_gnutar_filename_encoding_EUCJP_UTF8) 272{ 273 struct archive *a; 274 struct archive_entry *entry; 275 char buff[4096]; 276 size_t used; 277 278 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) { 279 skipping("eucJP locale not available on this system."); 280 return; 281 } 282 283 /* 284 * Verify that EUC-JP filenames are correctly translated to UTF-8. 285 */ 286 a = archive_write_new(); 287 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 288 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 289 skipping("This system cannot convert character-set" 290 " from eucJP to UTF-8."); 291 archive_write_free(a); 292 return; 293 } 294 assertEqualInt(ARCHIVE_OK, 295 archive_write_open_memory(a, buff, sizeof(buff), &used)); 296 297 entry = archive_entry_new2(a); 298 /* Set an EUC-JP filename. */ 299 archive_entry_set_pathname(entry, "\xC9\xBD.txt"); 300 /* Check the Unicode version. */ 301 archive_entry_set_filetype(entry, AE_IFREG); 302 archive_entry_set_size(entry, 0); 303 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 304 archive_entry_free(entry); 305 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 306 307 /* Check UTF-8 version. */ 308 assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); 309} 310 311DEFINE_TEST(test_gnutar_filename_encoding_EUCJP_CP932) 312{ 313 struct archive *a; 314 struct archive_entry *entry; 315 char buff[4096]; 316 size_t used; 317 318 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) { 319 skipping("eucJP locale not available on this system."); 320 return; 321 } 322 323 /* 324 * Verify that EUC-JP filenames are correctly translated to CP932. 325 */ 326 a = archive_write_new(); 327 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 328 if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) { 329 skipping("This system cannot convert character-set" 330 " from eucJP to CP932."); 331 archive_write_free(a); 332 return; 333 } 334 assertEqualInt(ARCHIVE_OK, 335 archive_write_open_memory(a, buff, sizeof(buff), &used)); 336 337 entry = archive_entry_new2(a); 338 /* Set an EUC-JP filename. */ 339 archive_entry_set_pathname(entry, "\xC9\xBD.txt"); 340 /* Check the Unicode version. */ 341 archive_entry_set_filetype(entry, AE_IFREG); 342 archive_entry_set_size(entry, 0); 343 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 344 archive_entry_free(entry); 345 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 346 347 /* Check CP932 version. */ 348 assertEqualMem(buff, "\x95\x5C.txt", 6); 349} 350 351DEFINE_TEST(test_gnutar_filename_encoding_CP932_UTF8) 352{ 353 struct archive *a; 354 struct archive_entry *entry; 355 char buff[4096]; 356 size_t used; 357 358 if (NULL == setlocale(LC_ALL, "Japanese_Japan") && 359 NULL == setlocale(LC_ALL, "ja_JP.SJIS")) { 360 skipping("CP932/SJIS locale not available on this system."); 361 return; 362 } 363 364 /* 365 * Verify that CP932/SJIS filenames are correctly translated to UTF-8. 366 */ 367 a = archive_write_new(); 368 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 369 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 370 skipping("This system cannot convert character-set" 371 " from CP932/SJIS to UTF-8."); 372 archive_write_free(a); 373 return; 374 } 375 assertEqualInt(ARCHIVE_OK, 376 archive_write_open_memory(a, buff, sizeof(buff), &used)); 377 378 entry = archive_entry_new2(a); 379 /* Set an CP932/SJIS filename. */ 380 archive_entry_set_pathname(entry, "\x95\x5C.txt"); 381 /* Check the Unicode version. */ 382 archive_entry_set_filetype(entry, AE_IFREG); 383 archive_entry_set_size(entry, 0); 384 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 385 archive_entry_free(entry); 386 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 387 388 /* Check UTF-8 version. */ 389 assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); 390} 391 392