1/*-
2 * Copyright (c) 2011 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25#include "test.h"
26#include <locale.h>
27
28DEFINE_TEST(test_gnutar_filename_encoding_UTF8_CP866)
29{
30  	struct archive *a;
31  	struct archive_entry *entry;
32	char buff[4096];
33	size_t used;
34
35	if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
36		skipping("en_US.UTF-8 locale not available on this system.");
37		return;
38	}
39
40	/*
41	 * Verify that UTF-8 filenames are correctly translated into CP866
42	 * and stored with hdrcharset=CP866 option.
43	 */
44	a = archive_write_new();
45	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
46	if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
47		skipping("This system cannot convert character-set"
48		    " from UTF-8 to CP866.");
49		archive_write_free(a);
50		return;
51	}
52	assertEqualInt(ARCHIVE_OK,
53	    archive_write_open_memory(a, buff, sizeof(buff), &used));
54
55	entry = archive_entry_new2(a);
56	/* Set a UTF-8 filename. */
57	archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
58	archive_entry_set_filetype(entry, AE_IFREG);
59	archive_entry_set_size(entry, 0);
60	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
61	archive_entry_free(entry);
62	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
63
64	/* Above three characters in UTF-8 should translate to the following
65	 * three characters in CP866. */
66	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
67}
68
69DEFINE_TEST(test_gnutar_filename_encoding_KOI8R_UTF8)
70{
71  	struct archive *a;
72  	struct archive_entry *entry;
73	char buff[4096];
74	size_t used;
75
76	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
77		skipping("KOI8-R locale not available on this system.");
78		return;
79	}
80
81	/*
82	 * Verify that KOI8-R filenames are correctly translated into UTF-8
83	 * and stored with hdrcharset=UTF-8 option.
84	 */
85	a = archive_write_new();
86	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
87	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
88		skipping("This system cannot convert character-set"
89		    " from KOI8-R to UTF-8.");
90		archive_write_free(a);
91		return;
92	}
93	assertEqualInt(ARCHIVE_OK,
94	    archive_write_open_memory(a, buff, sizeof(buff), &used));
95
96	entry = archive_entry_new2(a);
97	/* Set a KOI8-R filename. */
98	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
99	archive_entry_set_filetype(entry, AE_IFREG);
100	archive_entry_set_size(entry, 0);
101	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
102	archive_entry_free(entry);
103	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
104
105	/* Above three characters in KOI8-R should translate to the following
106	 * three characters (two bytes each) in UTF-8. */
107	assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
108}
109
110DEFINE_TEST(test_gnutar_filename_encoding_KOI8R_CP866)
111{
112  	struct archive *a;
113  	struct archive_entry *entry;
114	char buff[4096];
115	size_t used;
116
117	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
118		skipping("KOI8-R locale not available on this system.");
119		return;
120	}
121
122	/*
123	 * Verify that KOI8-R filenames are correctly translated into CP866
124	 * and stored with hdrcharset=CP866 option.
125	 */
126	a = archive_write_new();
127	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
128	if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
129		skipping("This system cannot convert character-set"
130		    " from KOI8-R to CP866.");
131		archive_write_free(a);
132		return;
133	}
134	assertEqualInt(ARCHIVE_OK,
135	    archive_write_open_memory(a, buff, sizeof(buff), &used));
136
137	entry = archive_entry_new2(a);
138	/* Set a KOI8-R filename. */
139	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
140	archive_entry_set_filetype(entry, AE_IFREG);
141	archive_entry_set_size(entry, 0);
142	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
143	archive_entry_free(entry);
144	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
145
146	/* Above three characters in KOI8-R should translate to the following
147	 * three characters in CP866. */
148	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
149}
150
151DEFINE_TEST(test_gnutar_filename_encoding_CP1251_UTF8)
152{
153  	struct archive *a;
154  	struct archive_entry *entry;
155	char buff[4096];
156	size_t used;
157
158	if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
159	    NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
160		skipping("KOI8-R locale not available on this system.");
161		return;
162	}
163
164	/*
165	 * Verify that CP1251 filenames are correctly translated into UTF-8
166	 * and stored with hdrcharset=UTF-8 option.
167	 */
168	a = archive_write_new();
169	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
170	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
171		skipping("This system cannot convert character-set"
172		    " from KOI8-R to UTF-8.");
173		archive_write_free(a);
174		return;
175	}
176	assertEqualInt(ARCHIVE_OK,
177	    archive_write_open_memory(a, buff, sizeof(buff), &used));
178
179	entry = archive_entry_new2(a);
180	/* Set a KOI8-R filename. */
181	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
182	archive_entry_set_filetype(entry, AE_IFREG);
183	archive_entry_set_size(entry, 0);
184	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
185	archive_entry_free(entry);
186	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
187
188	/* Above three characters in CP1251 should translate to the following
189	 * three characters (two bytes each) in UTF-8. */
190	assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
191}
192
193/*
194 * Do not translate CP1251 into CP866 if non Windows platform.
195 */
196DEFINE_TEST(test_gnutar_filename_encoding_ru_RU_CP1251)
197{
198  	struct archive *a;
199  	struct archive_entry *entry;
200	char buff[4096];
201	size_t used;
202
203	if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
204		skipping("KOI8-R locale not available on this system.");
205		return;
206	}
207
208	/*
209	 * Verify that CP1251 filenames are not translated into any
210	 * other character-set, in particular, CP866.
211	 */
212	a = archive_write_new();
213	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
214	assertEqualInt(ARCHIVE_OK,
215	    archive_write_open_memory(a, buff, sizeof(buff), &used));
216
217	entry = archive_entry_new2(a);
218	/* Set a KOI8-R filename. */
219	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
220	archive_entry_set_filetype(entry, AE_IFREG);
221	archive_entry_set_size(entry, 0);
222	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
223	archive_entry_free(entry);
224	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
225
226	/* Above three characters in CP1251 should not translate to
227	 * any other character-set. */
228	assertEqualMem(buff, "\xEF\xF0\xE8", 3);
229}
230
231/*
232 * Other archiver applications on Windows translate CP1251 filenames
233 * into CP866 filenames and store it in the gnutar file.
234 * Test above behavior works well.
235 */
236DEFINE_TEST(test_gnutar_filename_encoding_Russian_Russia)
237{
238  	struct archive *a;
239  	struct archive_entry *entry;
240	char buff[4096];
241	size_t used;
242
243	if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
244		skipping("Russian_Russia locale not available on this system.");
245		return;
246	}
247
248	/*
249	 * Verify that Russian_Russia(CP1251) filenames are correctly translated
250	 * to CP866.
251	 */
252	a = archive_write_new();
253	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
254	assertEqualInt(ARCHIVE_OK,
255	    archive_write_open_memory(a, buff, sizeof(buff), &used));
256
257	entry = archive_entry_new2(a);
258	/* Set a CP1251 filename. */
259	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
260	archive_entry_set_filetype(entry, AE_IFREG);
261	archive_entry_set_size(entry, 0);
262	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
263	archive_entry_free(entry);
264	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
265
266	/* Above three characters in CP1251 should translate to the following
267	 * three characters in CP866. */
268	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
269}
270
271DEFINE_TEST(test_gnutar_filename_encoding_EUCJP_UTF8)
272{
273  	struct archive *a;
274  	struct archive_entry *entry;
275	char buff[4096];
276	size_t used;
277
278	if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
279		skipping("eucJP locale not available on this system.");
280		return;
281	}
282
283	/*
284	 * Verify that EUC-JP filenames are correctly translated to UTF-8.
285	 */
286	a = archive_write_new();
287	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
288	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
289		skipping("This system cannot convert character-set"
290		    " from eucJP to UTF-8.");
291		archive_write_free(a);
292		return;
293	}
294	assertEqualInt(ARCHIVE_OK,
295	    archive_write_open_memory(a, buff, sizeof(buff), &used));
296
297	entry = archive_entry_new2(a);
298	/* Set an EUC-JP filename. */
299	archive_entry_set_pathname(entry, "\xC9\xBD.txt");
300	/* Check the Unicode version. */
301	archive_entry_set_filetype(entry, AE_IFREG);
302	archive_entry_set_size(entry, 0);
303	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
304	archive_entry_free(entry);
305	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
306
307	/* Check UTF-8 version. */
308	assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
309}
310
311DEFINE_TEST(test_gnutar_filename_encoding_EUCJP_CP932)
312{
313  	struct archive *a;
314  	struct archive_entry *entry;
315	char buff[4096];
316	size_t used;
317
318	if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
319		skipping("eucJP locale not available on this system.");
320		return;
321	}
322
323	/*
324	 * Verify that EUC-JP filenames are correctly translated to CP932.
325	 */
326	a = archive_write_new();
327	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
328	if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) {
329		skipping("This system cannot convert character-set"
330		    " from eucJP to CP932.");
331		archive_write_free(a);
332		return;
333	}
334	assertEqualInt(ARCHIVE_OK,
335	    archive_write_open_memory(a, buff, sizeof(buff), &used));
336
337	entry = archive_entry_new2(a);
338	/* Set an EUC-JP filename. */
339	archive_entry_set_pathname(entry, "\xC9\xBD.txt");
340	/* Check the Unicode version. */
341	archive_entry_set_filetype(entry, AE_IFREG);
342	archive_entry_set_size(entry, 0);
343	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
344	archive_entry_free(entry);
345	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
346
347	/* Check CP932 version. */
348	assertEqualMem(buff, "\x95\x5C.txt", 6);
349}
350
351DEFINE_TEST(test_gnutar_filename_encoding_CP932_UTF8)
352{
353  	struct archive *a;
354  	struct archive_entry *entry;
355	char buff[4096];
356	size_t used;
357
358	if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
359	    NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
360		skipping("CP932/SJIS locale not available on this system.");
361		return;
362	}
363
364	/*
365	 * Verify that CP932/SJIS filenames are correctly translated to UTF-8.
366	 */
367	a = archive_write_new();
368	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
369	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
370		skipping("This system cannot convert character-set"
371		    " from CP932/SJIS to UTF-8.");
372		archive_write_free(a);
373		return;
374	}
375	assertEqualInt(ARCHIVE_OK,
376	    archive_write_open_memory(a, buff, sizeof(buff), &used));
377
378	entry = archive_entry_new2(a);
379	/* Set an CP932/SJIS filename. */
380	archive_entry_set_pathname(entry, "\x95\x5C.txt");
381	/* Check the Unicode version. */
382	archive_entry_set_filetype(entry, AE_IFREG);
383	archive_entry_set_size(entry, 0);
384	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
385	archive_entry_free(entry);
386	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
387
388	/* Check UTF-8 version. */
389	assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
390}
391
392