1/*- 2 * Copyright (c) 2011 Michihiro NAKAJIMA 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25#include "test.h" 26 27#include <locale.h> 28 29DEFINE_TEST(test_zip_filename_encoding_UTF8) 30{ 31 struct archive *a; 32 struct archive_entry *entry; 33 char buff[4096]; 34 size_t used; 35 36 if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { 37 skipping("en_US.UTF-8 locale not available on this system."); 38 return; 39 } 40 41 /* 42 * Verify that UTF-8 filenames are correctly stored with 43 * hdrcharset=UTF-8 option. 44 */ 45 a = archive_write_new(); 46 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 47 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 48 skipping("This system cannot convert character-set" 49 " for UTF-8."); 50 archive_write_free(a); 51 return; 52 } 53 assertEqualInt(ARCHIVE_OK, 54 archive_write_open_memory(a, buff, sizeof(buff), &used)); 55 56 entry = archive_entry_new2(a); 57 /* Set a UTF-8 filename. */ 58 archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8"); 59 archive_entry_set_filetype(entry, AE_IFREG); 60 archive_entry_set_size(entry, 0); 61 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 62 archive_entry_free(entry); 63 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 64 65 /* A bit 11 of general purpose flag should be 0x08, 66 * which indicates the filename charset is UTF-8. */ 67 assertEqualInt(0x08, buff[7]); 68 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 69 70 /* 71 * Verify that UTF-8 filenames are correctly stored without 72 * hdrcharset=UTF-8 option. 73 * Skip on Windows where we default to OEMCP 74 */ 75#if !defined(_WIN32) || defined(__CYGWIN__) 76 a = archive_write_new(); 77 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 78 assertEqualInt(ARCHIVE_OK, 79 archive_write_open_memory(a, buff, sizeof(buff), &used)); 80 81 entry = archive_entry_new2(a); 82 /* Set a UTF-8 filename. */ 83 archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8"); 84 archive_entry_set_filetype(entry, AE_IFREG); 85 archive_entry_set_size(entry, 0); 86 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 87 archive_entry_free(entry); 88 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 89 90 /* A bit 11 of general purpose flag should be 0x08, 91 * which indicates the filename charset is UTF-8. */ 92 assertEqualInt(0x08, buff[7]); 93 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 94#endif 95 96 /* 97 * Verify that A bit 11 of general purpose flag is not set 98 * when ASCII filenames are stored. 99 */ 100 a = archive_write_new(); 101 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 102 assertEqualInt(ARCHIVE_OK, 103 archive_write_open_memory(a, buff, sizeof(buff), &used)); 104 105 entry = archive_entry_new2(a); 106 /* Set an ASCII filename. */ 107 archive_entry_set_pathname(entry, "abcABC"); 108 archive_entry_set_filetype(entry, AE_IFREG); 109 archive_entry_set_size(entry, 0); 110 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 111 archive_entry_free(entry); 112 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 113 114 /* A bit 11 of general purpose flag should be 0, 115 * which indicates the filename charset is unknown. */ 116 assertEqualInt(0, buff[7]); 117 assertEqualMem(buff + 30, "abcABC", 6); 118} 119 120DEFINE_TEST(test_zip_filename_encoding_KOI8R) 121{ 122 struct archive *a; 123 struct archive_entry *entry; 124 char buff[4096]; 125 size_t used; 126 127 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 128 skipping("KOI8-R locale not available on this system."); 129 return; 130 } 131 132 /* 133 * Verify that KOI8-R filenames are correctly translated to UTF-8. 134 */ 135 a = archive_write_new(); 136 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 137 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 138 skipping("This system cannot convert character-set" 139 " from KOI8-R to UTF-8."); 140 archive_write_free(a); 141 return; 142 } 143 assertEqualInt(ARCHIVE_OK, 144 archive_write_open_memory(a, buff, sizeof(buff), &used)); 145 146 entry = archive_entry_new2(a); 147 /* Set a KOI8-R filename. */ 148 archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 149 archive_entry_set_filetype(entry, AE_IFREG); 150 archive_entry_set_size(entry, 0); 151 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 152 archive_entry_free(entry); 153 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 154 155 /* A bit 11 of general purpose flag should be 0x08, 156 * which indicates the filename charset is UTF-8. */ 157 assertEqualInt(0x08, buff[7]); 158 /* Above three characters in KOI8-R should translate to the following 159 * three characters (two bytes each) in UTF-8. */ 160 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 161 162 /* 163 * Verify that KOI8-R filenames are not translated to UTF-8. 164 */ 165 a = archive_write_new(); 166 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 167 assertEqualInt(ARCHIVE_OK, 168 archive_write_open_memory(a, buff, sizeof(buff), &used)); 169 170 entry = archive_entry_new2(a); 171 /* Set a KOI8-R filename. */ 172 archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 173 archive_entry_set_filetype(entry, AE_IFREG); 174 archive_entry_set_size(entry, 0); 175 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 176 archive_entry_free(entry); 177 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 178 179 /* A bit 11 of general purpose flag should be 0, 180 * which indicates the filename charset is unknown. */ 181 assertEqualInt(0, buff[7]); 182 /* Above three characters in KOI8-R should not translate to 183 * any character-set. */ 184 assertEqualMem(buff + 30, "\xD0\xD2\xC9", 3); 185 186 /* 187 * Verify that A bit 11 of general purpose flag is not set 188 * when ASCII filenames are stored even if hdrcharset=UTF-8 189 * is specified. 190 */ 191 a = archive_write_new(); 192 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 193 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 194 skipping("This system cannot convert character-set" 195 " from KOI8-R to UTF-8."); 196 archive_write_free(a); 197 return; 198 } 199 assertEqualInt(ARCHIVE_OK, 200 archive_write_open_memory(a, buff, sizeof(buff), &used)); 201 202 entry = archive_entry_new2(a); 203 /* Set an ASCII filename. */ 204 archive_entry_set_pathname(entry, "abcABC"); 205 archive_entry_set_filetype(entry, AE_IFREG); 206 archive_entry_set_size(entry, 0); 207 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 208 archive_entry_free(entry); 209 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 210 211 /* A bit 11 of general purpose flag should be 0, 212 * which indicates the filename charset is unknown. */ 213 assertEqualInt(0, buff[7]); 214 assertEqualMem(buff + 30, "abcABC", 6); 215} 216 217/* 218 * Do not translate CP1251 into CP866 if non Windows platform. 219 */ 220DEFINE_TEST(test_zip_filename_encoding_ru_RU_CP1251) 221{ 222 struct archive *a; 223 struct archive_entry *entry; 224 char buff[4096]; 225 size_t used; 226 227 if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 228 skipping("Russian_Russia locale not available on this system."); 229 return; 230 } 231 232 /* 233 * Verify that CP1251 filenames are not translated into any 234 * other character-set, in particular, CP866. 235 */ 236 a = archive_write_new(); 237 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 238 assertEqualInt(ARCHIVE_OK, 239 archive_write_open_memory(a, buff, sizeof(buff), &used)); 240 241 entry = archive_entry_new2(a); 242 /* Set a CP1251 filename. */ 243 archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 244 archive_entry_set_filetype(entry, AE_IFREG); 245 archive_entry_set_size(entry, 0); 246 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 247 archive_entry_free(entry); 248 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 249 250 /* A bit 11 of general purpose flag should be 0, 251 * which indicates the filename charset is unknown. */ 252 assertEqualInt(0, buff[7]); 253 /* Above three characters in CP1251 should not translate into 254 * any other character-set. */ 255 assertEqualMem(buff + 30, "\xEF\xF0\xE8", 3); 256} 257 258/* 259 * Other archiver applications on Windows translate CP1251 filenames 260 * into CP866 filenames and store it in the zip file. 261 * Test above behavior works well. 262 */ 263DEFINE_TEST(test_zip_filename_encoding_Russian_Russia) 264{ 265 struct archive *a; 266 struct archive_entry *entry; 267 char buff[4096]; 268 size_t used; 269 270 if (NULL == setlocale(LC_ALL, "Russian_Russia")) { 271 skipping("Russian_Russia locale not available on this system."); 272 return; 273 } 274 275 /* 276 * Verify that Russian_Russia(CP1251) filenames are correctly translated 277 * to UTF-8. 278 */ 279 a = archive_write_new(); 280 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 281 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 282 skipping("This system cannot convert character-set" 283 " from Russian_Russia.CP1251 to UTF-8."); 284 archive_write_free(a); 285 return; 286 } 287 assertEqualInt(ARCHIVE_OK, 288 archive_write_open_memory(a, buff, sizeof(buff), &used)); 289 290 entry = archive_entry_new2(a); 291 /* Set a CP1251 filename. */ 292 archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 293 archive_entry_set_filetype(entry, AE_IFREG); 294 archive_entry_set_size(entry, 0); 295 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 296 archive_entry_free(entry); 297 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 298 299 /* A bit 11 of general purpose flag should be 0x08, 300 * which indicates the filename charset is UTF-8. */ 301 assertEqualInt(0x08, buff[7]); 302 /* Above three characters in CP1251 should translate to the following 303 * three characters (two bytes each) in UTF-8. */ 304 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 305 306 /* 307 * Verify that Russian_Russia(CP1251) filenames are correctly translated 308 * to CP866. 309 */ 310 a = archive_write_new(); 311 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 312 assertEqualInt(ARCHIVE_OK, 313 archive_write_open_memory(a, buff, sizeof(buff), &used)); 314 315 entry = archive_entry_new2(a); 316 /* Set a CP1251 filename. */ 317 archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 318 archive_entry_set_filetype(entry, AE_IFREG); 319 archive_entry_set_size(entry, 0); 320 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 321 archive_entry_free(entry); 322 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 323 324 /* A bit 11 of general purpose flag should be 0, 325 * which indicates the filename charset is unknown. */ 326 assertEqualInt(0, buff[7]); 327 /* Above three characters in CP1251 should translate to the following 328 * three characters in CP866. */ 329 assertEqualMem(buff + 30, "\xAF\xE0\xA8", 3); 330} 331 332DEFINE_TEST(test_zip_filename_encoding_EUCJP) 333{ 334 struct archive *a; 335 struct archive_entry *entry; 336 char buff[4096]; 337 size_t used; 338 339 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) { 340 skipping("eucJP locale not available on this system."); 341 return; 342 } 343 344 /* 345 * Verify that EUC-JP filenames are correctly translated to UTF-8. 346 */ 347 a = archive_write_new(); 348 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 349 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 350 skipping("This system cannot convert character-set" 351 " from eucJP to UTF-8."); 352 archive_write_free(a); 353 return; 354 } 355 assertEqualInt(ARCHIVE_OK, 356 archive_write_open_memory(a, buff, sizeof(buff), &used)); 357 358 entry = archive_entry_new2(a); 359 /* Set an EUC-JP filename. */ 360 archive_entry_set_pathname(entry, "\xC9\xBD.txt"); 361 /* Check the Unicode version. */ 362 archive_entry_set_filetype(entry, AE_IFREG); 363 archive_entry_set_size(entry, 0); 364 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 365 archive_entry_free(entry); 366 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 367 368 /* A bit 11 of general purpose flag should be 0x08, 369 * which indicates the filename charset is UTF-8. */ 370 assertEqualInt(0x08, buff[7]); 371 /* Check UTF-8 version. */ 372 assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7); 373 374 /* 375 * Verify that EUC-JP filenames are not translated to UTF-8. 376 */ 377 a = archive_write_new(); 378 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 379 assertEqualInt(ARCHIVE_OK, 380 archive_write_open_memory(a, buff, sizeof(buff), &used)); 381 382 entry = archive_entry_new2(a); 383 /* Set an EUC-JP filename. */ 384 archive_entry_set_pathname(entry, "\xC9\xBD.txt"); 385 /* Check the Unicode version. */ 386 archive_entry_set_filetype(entry, AE_IFREG); 387 archive_entry_set_size(entry, 0); 388 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 389 archive_entry_free(entry); 390 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 391 392 /* A bit 11 of general purpose flag should be 0, 393 * which indicates the filename charset is unknown. */ 394 assertEqualInt(0, buff[7]); 395 /* Above three characters in EUC-JP should not translate to 396 * any character-set. */ 397 assertEqualMem(buff + 30, "\xC9\xBD.txt", 6); 398 399 /* 400 * Verify that A bit 11 of general purpose flag is not set 401 * when ASCII filenames are stored even if hdrcharset=UTF-8 402 * is specified. 403 */ 404 a = archive_write_new(); 405 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 406 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 407 skipping("This system cannot convert character-set" 408 " from eucJP to UTF-8."); 409 archive_write_free(a); 410 return; 411 } 412 assertEqualInt(ARCHIVE_OK, 413 archive_write_open_memory(a, buff, sizeof(buff), &used)); 414 415 entry = archive_entry_new2(a); 416 /* Set an ASCII filename. */ 417 archive_entry_set_pathname(entry, "abcABC"); 418 /* Check the Unicode version. */ 419 archive_entry_set_filetype(entry, AE_IFREG); 420 archive_entry_set_size(entry, 0); 421 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 422 archive_entry_free(entry); 423 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 424 425 /* A bit 11 of general purpose flag should be 0, 426 * which indicates the filename charset is unknown. */ 427 assertEqualInt(0, buff[7]); 428 assertEqualMem(buff + 30, "abcABC", 6); 429} 430 431DEFINE_TEST(test_zip_filename_encoding_CP932) 432{ 433 struct archive *a; 434 struct archive_entry *entry; 435 char buff[4096]; 436 size_t used; 437 438 if (NULL == setlocale(LC_ALL, "Japanese_Japan") && 439 NULL == setlocale(LC_ALL, "ja_JP.SJIS")) { 440 skipping("CP932/SJIS locale not available on this system."); 441 return; 442 } 443 444 /* 445 * Verify that EUC-JP filenames are correctly translated to UTF-8. 446 */ 447 a = archive_write_new(); 448 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 449 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 450 skipping("This system cannot convert character-set" 451 " from CP932/SJIS to UTF-8."); 452 archive_write_free(a); 453 return; 454 } 455 assertEqualInt(ARCHIVE_OK, 456 archive_write_open_memory(a, buff, sizeof(buff), &used)); 457 458 entry = archive_entry_new2(a); 459 /* Set a CP932/SJIS filename. */ 460 archive_entry_set_pathname(entry, "\x95\x5C.txt"); 461 /* Check the Unicode version. */ 462 archive_entry_set_filetype(entry, AE_IFREG); 463 archive_entry_set_size(entry, 0); 464 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 465 archive_entry_free(entry); 466 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 467 468 /* A bit 11 of general purpose flag should be 0x08, 469 * which indicates the filename charset is UTF-8. */ 470 assertEqualInt(0x08, buff[7]); 471 /* Check UTF-8 version. */ 472 assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7); 473 474 /* 475 * Verify that CP932/SJIS filenames are not translated to UTF-8. 476 */ 477 a = archive_write_new(); 478 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 479 assertEqualInt(ARCHIVE_OK, 480 archive_write_open_memory(a, buff, sizeof(buff), &used)); 481 482 entry = archive_entry_new2(a); 483 /* Set a CP932/SJIS filename. */ 484 archive_entry_set_pathname(entry, "\x95\x5C.txt"); 485 /* Check the Unicode version. */ 486 archive_entry_set_filetype(entry, AE_IFREG); 487 archive_entry_set_size(entry, 0); 488 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 489 archive_entry_free(entry); 490 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 491 492 /* A bit 11 of general purpose flag should be 0, 493 * which indicates the filename charset is unknown. */ 494 assertEqualInt(0, buff[7]); 495 /* Above three characters in CP932/SJIS should not translate to 496 * any character-set. */ 497 assertEqualMem(buff + 30, "\x95\x5C.txt", 6); 498 499 /* 500 * Verify that A bit 11 of general purpose flag is not set 501 * when ASCII filenames are stored even if hdrcharset=UTF-8 502 * is specified. 503 */ 504 a = archive_write_new(); 505 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 506 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 507 skipping("This system cannot convert character-set" 508 " from CP932/SJIS to UTF-8."); 509 archive_write_free(a); 510 return; 511 } 512 assertEqualInt(ARCHIVE_OK, 513 archive_write_open_memory(a, buff, sizeof(buff), &used)); 514 515 entry = archive_entry_new2(a); 516 /* Set an ASCII filename. */ 517 archive_entry_set_pathname(entry, "abcABC"); 518 /* Check the Unicode version. */ 519 archive_entry_set_filetype(entry, AE_IFREG); 520 archive_entry_set_size(entry, 0); 521 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 522 archive_entry_free(entry); 523 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 524 525 /* A bit 11 of general purpose flag should be 0, 526 * which indicates the filename charset is unknown. */ 527 assertEqualInt(0, buff[7]); 528 assertEqualMem(buff + 30, "abcABC", 6); 529} 530