1/*-
2 * Copyright (c) 2011 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25#include "test.h"
26
27#include <locale.h>
28
29DEFINE_TEST(test_ustar_filename_encoding_UTF8_CP866)
30{
31  	struct archive *a;
32  	struct archive_entry *entry;
33	char buff[4096];
34	size_t used;
35
36	if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
37		skipping("en_US.UTF-8 locale not available on this system.");
38		return;
39	}
40
41	/*
42	 * Verify that UTF-8 filenames are correctly translated into CP866
43	 * and stored with hdrcharset=CP866 option.
44	 */
45	a = archive_write_new();
46	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
47	if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
48		skipping("This system cannot convert character-set"
49		    " from UTF-8 to CP866.");
50		archive_write_free(a);
51		return;
52	}
53	assertEqualInt(ARCHIVE_OK,
54	    archive_write_open_memory(a, buff, sizeof(buff), &used));
55
56	entry = archive_entry_new2(a);
57	/* Set a UTF-8 filename. */
58	archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
59	archive_entry_set_filetype(entry, AE_IFREG);
60	archive_entry_set_size(entry, 0);
61	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
62	archive_entry_free(entry);
63	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
64
65	/* Above three characters in UTF-8 should translate to the following
66	 * three characters in CP866. */
67	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
68}
69
70DEFINE_TEST(test_ustar_filename_encoding_KOI8R_UTF8)
71{
72  	struct archive *a;
73  	struct archive_entry *entry;
74	char buff[4096];
75	size_t used;
76
77	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
78		skipping("KOI8-R locale not available on this system.");
79		return;
80	}
81
82	/*
83	 * Verify that KOI8-R filenames are correctly translated into UTF-8
84	 * and stored with hdrcharset=UTF-8 option.
85	 */
86	a = archive_write_new();
87	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
88	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
89		skipping("This system cannot convert character-set"
90		    " from KOI8-R to UTF-8.");
91		archive_write_free(a);
92		return;
93	}
94	assertEqualInt(ARCHIVE_OK,
95	    archive_write_open_memory(a, buff, sizeof(buff), &used));
96
97	entry = archive_entry_new2(a);
98	/* Set a KOI8-R filename. */
99	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
100	archive_entry_set_filetype(entry, AE_IFREG);
101	archive_entry_set_size(entry, 0);
102	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
103	archive_entry_free(entry);
104	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
105
106	/* Above three characters in KOI8-R should translate to the following
107	 * three characters (two bytes each) in UTF-8. */
108	assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
109}
110
111DEFINE_TEST(test_ustar_filename_encoding_KOI8R_CP866)
112{
113  	struct archive *a;
114  	struct archive_entry *entry;
115	char buff[4096];
116	size_t used;
117
118	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
119		skipping("KOI8-R locale not available on this system.");
120		return;
121	}
122
123	/*
124	 * Verify that KOI8-R filenames are correctly translated into CP866
125	 * and stored with hdrcharset=CP866 option.
126	 */
127	a = archive_write_new();
128	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
129	if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
130		skipping("This system cannot convert character-set"
131		    " from KOI8-R to CP866.");
132		archive_write_free(a);
133		return;
134	}
135	assertEqualInt(ARCHIVE_OK,
136	    archive_write_open_memory(a, buff, sizeof(buff), &used));
137
138	entry = archive_entry_new2(a);
139	/* Set a KOI8-R filename. */
140	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
141	archive_entry_set_filetype(entry, AE_IFREG);
142	archive_entry_set_size(entry, 0);
143	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
144	archive_entry_free(entry);
145	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
146
147	/* Above three characters in KOI8-R should translate to the following
148	 * three characters in CP866. */
149	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
150}
151
152DEFINE_TEST(test_ustar_filename_encoding_CP1251_UTF8)
153{
154  	struct archive *a;
155  	struct archive_entry *entry;
156	char buff[4096];
157	size_t used;
158
159	if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
160	    NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
161		skipping("KOI8-R locale not available on this system.");
162		return;
163	}
164
165	/*
166	 * Verify that CP1251 filenames are correctly translated into UTF-8
167	 * and stored with hdrcharset=UTF-8 option.
168	 */
169	a = archive_write_new();
170	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
171	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
172		skipping("This system cannot convert character-set"
173		    " from KOI8-R to UTF-8.");
174		archive_write_free(a);
175		return;
176	}
177	assertEqualInt(ARCHIVE_OK,
178	    archive_write_open_memory(a, buff, sizeof(buff), &used));
179
180	entry = archive_entry_new2(a);
181	/* Set a KOI8-R filename. */
182	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
183	archive_entry_set_filetype(entry, AE_IFREG);
184	archive_entry_set_size(entry, 0);
185	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
186	archive_entry_free(entry);
187	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
188
189	/* Above three characters in CP1251 should translate to the following
190	 * three characters (two bytes each) in UTF-8. */
191	assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
192}
193
194/*
195 * Do not translate CP1251 into CP866 if non Windows platform.
196 */
197DEFINE_TEST(test_ustar_filename_encoding_ru_RU_CP1251)
198{
199  	struct archive *a;
200  	struct archive_entry *entry;
201	char buff[4096];
202	size_t used;
203
204	if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
205		skipping("KOI8-R locale not available on this system.");
206		return;
207	}
208
209	/*
210	 * Verify that CP1251 filenames are not translated into any
211	 * other character-set, in particular, CP866.
212	 */
213	a = archive_write_new();
214	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
215	assertEqualInt(ARCHIVE_OK,
216	    archive_write_open_memory(a, buff, sizeof(buff), &used));
217
218	entry = archive_entry_new2(a);
219	/* Set a KOI8-R filename. */
220	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
221	archive_entry_set_filetype(entry, AE_IFREG);
222	archive_entry_set_size(entry, 0);
223	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
224	archive_entry_free(entry);
225	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
226
227	/* Above three characters in CP1251 should not translate to
228	 * any other character-set. */
229	assertEqualMem(buff, "\xEF\xF0\xE8", 3);
230}
231
232/*
233 * Other archiver applications on Windows translate CP1251 filenames
234 * into CP866 filenames and store it in the ustar file.
235 * Test above behavior works well.
236 */
237DEFINE_TEST(test_ustar_filename_encoding_Russian_Russia)
238{
239  	struct archive *a;
240  	struct archive_entry *entry;
241	char buff[4096];
242	size_t used;
243
244	if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
245		skipping("Russian_Russia locale not available on this system.");
246		return;
247	}
248
249	/*
250	 * Verify that Russian_Russia(CP1251) filenames are correctly translated
251	 * to CP866.
252	 */
253	a = archive_write_new();
254	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
255	assertEqualInt(ARCHIVE_OK,
256	    archive_write_open_memory(a, buff, sizeof(buff), &used));
257
258	entry = archive_entry_new2(a);
259	/* Set a CP1251 filename. */
260	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
261	archive_entry_set_filetype(entry, AE_IFREG);
262	archive_entry_set_size(entry, 0);
263	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
264	archive_entry_free(entry);
265	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
266
267	/* Above three characters in CP1251 should translate to the following
268	 * three characters in CP866. */
269	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
270}
271
272DEFINE_TEST(test_ustar_filename_encoding_EUCJP_UTF8)
273{
274  	struct archive *a;
275  	struct archive_entry *entry;
276	char buff[4096];
277	size_t used;
278
279	if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
280		skipping("eucJP locale not available on this system.");
281		return;
282	}
283
284	/*
285	 * Verify that EUC-JP filenames are correctly translated to UTF-8.
286	 */
287	a = archive_write_new();
288	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
289	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
290		skipping("This system cannot convert character-set"
291		    " from eucJP to UTF-8.");
292		archive_write_free(a);
293		return;
294	}
295	assertEqualInt(ARCHIVE_OK,
296	    archive_write_open_memory(a, buff, sizeof(buff), &used));
297
298	entry = archive_entry_new2(a);
299	/* Set an EUC-JP filename. */
300	archive_entry_set_pathname(entry, "\xC9\xBD.txt");
301	/* Check the Unicode version. */
302	archive_entry_set_filetype(entry, AE_IFREG);
303	archive_entry_set_size(entry, 0);
304	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
305	archive_entry_free(entry);
306	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
307
308	/* Check UTF-8 version. */
309	assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
310}
311
312DEFINE_TEST(test_ustar_filename_encoding_EUCJP_CP932)
313{
314  	struct archive *a;
315  	struct archive_entry *entry;
316	char buff[4096];
317	size_t used;
318
319	if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
320		skipping("eucJP locale not available on this system.");
321		return;
322	}
323
324	/*
325	 * Verify that EUC-JP filenames are correctly translated to CP932.
326	 */
327	a = archive_write_new();
328	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
329	if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) {
330		skipping("This system cannot convert character-set"
331		    " from eucJP to CP932.");
332		archive_write_free(a);
333		return;
334	}
335	assertEqualInt(ARCHIVE_OK,
336	    archive_write_open_memory(a, buff, sizeof(buff), &used));
337
338	entry = archive_entry_new2(a);
339	/* Set an EUC-JP filename. */
340	archive_entry_set_pathname(entry, "\xC9\xBD.txt");
341	/* Check the Unicode version. */
342	archive_entry_set_filetype(entry, AE_IFREG);
343	archive_entry_set_size(entry, 0);
344	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
345	archive_entry_free(entry);
346	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
347
348	/* Check CP932 version. */
349	assertEqualMem(buff, "\x95\x5C.txt", 6);
350}
351
352DEFINE_TEST(test_ustar_filename_encoding_CP932_UTF8)
353{
354  	struct archive *a;
355  	struct archive_entry *entry;
356	char buff[4096];
357	size_t used;
358
359	if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
360	    NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
361		skipping("CP932/SJIS locale not available on this system.");
362		return;
363	}
364
365	/*
366	 * Verify that CP932/SJIS filenames are correctly translated to UTF-8.
367	 */
368	a = archive_write_new();
369	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
370	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
371		skipping("This system cannot convert character-set"
372		    " from CP932/SJIS to UTF-8.");
373		archive_write_free(a);
374		return;
375	}
376	assertEqualInt(ARCHIVE_OK,
377	    archive_write_open_memory(a, buff, sizeof(buff), &used));
378
379	entry = archive_entry_new2(a);
380	/* Set a CP932/SJIS filename. */
381	archive_entry_set_pathname(entry, "\x95\x5C.txt");
382	/* Check the Unicode version. */
383	archive_entry_set_filetype(entry, AE_IFREG);
384	archive_entry_set_size(entry, 0);
385	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
386	archive_entry_free(entry);
387	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
388
389	/* Check UTF-8 version. */
390	assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
391}
392
393