test_read_format_tar_filename.c revision 302001
1/*-
2 * Copyright (c) 2011 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25#include "test.h"
26__FBSDID("$FreeBSD");
27
28#include <locale.h>
29
30/*
31 * The sample tar file was made in LANG=KOI8-R and it contains two
32 * files the charset of which are different.
33 * - the filename of first file is stored in BINARY mode.
34 * - the filename of second file is stored in UTF-8.
35 *
36 * Whenever hdrcharset option is specified, we will correctly read the
37 * filename of second file, which is stored in UTF-8 by default.
38 */
39
40static void
41test_read_format_tar_filename_KOI8R_CP866(const char *refname)
42{
43	struct archive *a;
44	struct archive_entry *ae;
45
46	/*
47 	* Read filename in ru_RU.CP866 with "hdrcharset=KOI8-R" option.
48 	* We should correctly read two filenames.
49	*/
50	if (NULL == setlocale(LC_ALL, "Russian_Russia.866") &&
51	    NULL == setlocale(LC_ALL, "ru_RU.CP866")) {
52		skipping("ru_RU.CP866 locale not available on this system.");
53		return;
54	}
55
56	/* Test if the platform can convert from UTF-8. */
57	assert((a = archive_read_new()) != NULL);
58	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a));
59	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) {
60		assertEqualInt(ARCHIVE_OK, archive_read_free(a));
61		skipping("This system cannot convert character-set"
62		    " from UTF-8 to CP866.");
63		return;
64	}
65	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
66
67	assert((a = archive_read_new()) != NULL);
68	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
69	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
70	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) {
71		skipping("This system cannot convert character-set"
72		    " from KOI8-R to CP866.");
73		goto next_test;
74	}
75	assertEqualIntA(a, ARCHIVE_OK,
76	    archive_read_open_filename(a, refname, 10240));
77
78	/* Verify regular first file. */
79	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
80	assertEqualString("\x8f\x90\x88\x82\x85\x92",
81	    archive_entry_pathname(ae));
82	assertEqualInt(6, archive_entry_size(ae));
83	assertEqualInt(archive_entry_is_encrypted(ae), 0);
84	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
85
86	/* Verify regular second file. */
87	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
88	assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2",
89	    archive_entry_pathname(ae));
90	assertEqualInt(6, archive_entry_size(ae));
91	assertEqualInt(archive_entry_is_encrypted(ae), 0);
92	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
93
94
95	/* End of archive. */
96	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
97
98	/* Verify archive format. */
99	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
100	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
101	    archive_format(a));
102
103	/* Close the archive. */
104	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
105next_test:
106	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
107
108
109	/*
110	 * Read filename in ru_RU.CP866 without "hdrcharset=KOI8-R" option.
111	 * The filename we can properly read is only second file.
112	 */
113
114	assert((a = archive_read_new()) != NULL);
115	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
116	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
117	assertEqualIntA(a, ARCHIVE_OK,
118	    archive_read_open_filename(a, refname, 10240));
119
120	/*
121	 * Verify regular first file.
122	 * The filename is not translated to CP866 because hdrcharset
123	 * attribute is BINARY and there is not way to know its charset.
124	 */
125	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
126	/* A filename is in KOI8-R. */
127	assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4",
128	    archive_entry_pathname(ae));
129	assertEqualInt(6, archive_entry_size(ae));
130	assertEqualInt(archive_entry_is_encrypted(ae), 0);
131	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
132
133	/*
134	 * Verify regular second file.
135	 * The filename is translated from UTF-8 to CP866
136	 */
137	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
138	assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2",
139	    archive_entry_pathname(ae));
140	assertEqualInt(6, archive_entry_size(ae));
141	assertEqualInt(archive_entry_is_encrypted(ae), 0);
142	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
143
144
145	/* End of archive. */
146	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
147
148	/* Verify archive format. */
149	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
150	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
151	    archive_format(a));
152
153	/* Close the archive. */
154	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
155	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
156}
157
158static void
159test_read_format_tar_filename_KOI8R_UTF8(const char *refname)
160{
161	struct archive *a;
162	struct archive_entry *ae;
163
164	/*
165	 * Read filename in en_US.UTF-8 with "hdrcharset=KOI8-R" option.
166	 * We should correctly read two filenames.
167	 */
168	if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
169		skipping("en_US.UTF-8 locale not available on this system.");
170		return;
171	}
172
173	assert((a = archive_read_new()) != NULL);
174	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
175	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
176	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) {
177		assertEqualInt(ARCHIVE_OK, archive_read_free(a));
178		skipping("This system cannot convert character-set"
179		    " from KOI8-R to UTF-8.");
180		return;
181	}
182	assertEqualIntA(a, ARCHIVE_OK,
183	    archive_read_open_filename(a, refname, 10240));
184
185	/* Verify regular file. */
186	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
187	assertEqualString("\xd0\x9f\xd0\xa0\xd0\x98\xd0\x92\xd0\x95\xd0\xa2",
188	    archive_entry_pathname(ae));
189	assertEqualInt(6, archive_entry_size(ae));
190	assertEqualInt(archive_entry_is_encrypted(ae), 0);
191	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
192
193	/* Verify regular file. */
194	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
195	assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82",
196	    archive_entry_pathname(ae));
197	assertEqualInt(6, archive_entry_size(ae));
198	assertEqualInt(archive_entry_is_encrypted(ae), 0);
199	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
200
201	/* Verify encryption status */
202	assertEqualInt(archive_entry_is_encrypted(ae), 0);
203	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
204
205	/* End of archive. */
206	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
207
208	/* Verify archive format. */
209	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
210	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
211	    archive_format(a));
212
213	/* Verify encryption status */
214	assertEqualInt(archive_entry_is_encrypted(ae), 0);
215	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
216
217	/* Close the archive. */
218	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
219	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
220
221	/*
222	 * Read filename in en_US.UTF-8 without "hdrcharset=KOI8-R" option.
223	 * The filename we can properly read is only second file.
224	 */
225
226	assert((a = archive_read_new()) != NULL);
227	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
228	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
229	assertEqualIntA(a, ARCHIVE_OK,
230	    archive_read_open_filename(a, refname, 10240));
231
232	/*
233	 * Verify regular first file.
234	 * The filename is not translated to UTF-8 because hdrcharset
235	 * attribute is BINARY and there is not way to know its charset.
236	 */
237	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
238	/* A filename is in KOI8-R. */
239	assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4",
240	    archive_entry_pathname(ae));
241	assertEqualInt(6, archive_entry_size(ae));
242
243	/* Verify encryption status */
244	assertEqualInt(archive_entry_is_encrypted(ae), 0);
245	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
246
247	/*
248	 * Verify regular second file.
249	 */
250	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
251	assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82",
252	    archive_entry_pathname(ae));
253	assertEqualInt(6, archive_entry_size(ae));
254
255
256	/* End of archive. */
257	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
258
259	/* Verify archive format. */
260	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
261	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
262	    archive_format(a));
263
264	/* Close the archive. */
265	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
266	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
267}
268
269static void
270test_read_format_tar_filename_KOI8R_CP1251(const char *refname)
271{
272	struct archive *a;
273	struct archive_entry *ae;
274
275	/*
276 	* Read filename in CP1251 with "hdrcharset=KOI8-R" option.
277 	* We should correctly read two filenames.
278	*/
279	if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
280	    NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
281		skipping("CP1251 locale not available on this system.");
282		return;
283	}
284
285	/* Test if the platform can convert from UTF-8. */
286	assert((a = archive_read_new()) != NULL);
287	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a));
288	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) {
289		assertEqualInt(ARCHIVE_OK, archive_read_free(a));
290		skipping("This system cannot convert character-set"
291		    " from UTF-8 to CP1251.");
292		return;
293	}
294	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
295
296	assert((a = archive_read_new()) != NULL);
297	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
298	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
299	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) {
300		skipping("This system cannot convert character-set"
301		    " from KOI8-R to CP1251.");
302		goto next_test;
303	}
304	assertEqualIntA(a, ARCHIVE_OK,
305	    archive_read_open_filename(a, refname, 10240));
306
307	/* Verify regular first file. */
308	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
309	assertEqualString("\xcf\xd0\xc8\xc2\xc5\xd2",
310	    archive_entry_pathname(ae));
311	assertEqualInt(6, archive_entry_size(ae));
312	assertEqualInt(archive_entry_is_encrypted(ae), 0);
313	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
314
315	/* Verify regular second file. */
316	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
317	assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2",
318	    archive_entry_pathname(ae));
319	assertEqualInt(6, archive_entry_size(ae));
320	assertEqualInt(archive_entry_is_encrypted(ae), 0);
321	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
322
323
324	/* End of archive. */
325	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
326
327	/* Verify archive format. */
328	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
329	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
330	    archive_format(a));
331
332	/* Close the archive. */
333	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
334next_test:
335	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
336
337	/*
338	 * Read filename in CP1251 without "hdrcharset=KOI8-R" option.
339	 * The filename we can properly read is only second file.
340	 */
341
342	assert((a = archive_read_new()) != NULL);
343	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
344	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
345	assertEqualIntA(a, ARCHIVE_OK,
346	    archive_read_open_filename(a, refname, 10240));
347
348	/*
349	 * Verify regular first file.
350	 * The filename is not translated to CP1251 because hdrcharset
351	 * attribute is BINARY and there is not way to know its charset.
352	 */
353	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
354	/* A filename is in KOI8-R. */
355	assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4",
356	    archive_entry_pathname(ae));
357	assertEqualInt(6, archive_entry_size(ae));
358	assertEqualInt(archive_entry_is_encrypted(ae), 0);
359	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
360
361	/*
362	 * Verify regular second file.
363	 */
364	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
365	assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2",
366	    archive_entry_pathname(ae));
367	assertEqualInt(6, archive_entry_size(ae));
368	assertEqualInt(archive_entry_is_encrypted(ae), 0);
369	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
370
371
372	/* End of archive. */
373	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
374
375	/* Verify archive format. */
376	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
377	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
378	    archive_format(a));
379
380	/* Close the archive. */
381	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
382	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
383}
384
385
386DEFINE_TEST(test_read_format_tar_filename)
387{
388	const char *refname = "test_read_format_tar_filename_koi8r.tar.Z";
389
390	extract_reference_file(refname);
391	test_read_format_tar_filename_KOI8R_CP866(refname);
392	test_read_format_tar_filename_KOI8R_UTF8(refname);
393	test_read_format_tar_filename_KOI8R_CP1251(refname);
394}
395