test_archive_pathmatch.c revision 370535
1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25#include "test.h"
26__FBSDID("$FreeBSD$");
27
28#define __LIBARCHIVE_TEST
29#include "archive_pathmatch.h"
30
31/*
32 * Verify that the pattern matcher implements the wildcard logic specified
33 * in SUSv2 for the cpio command.  This is essentially the
34 * shell glob syntax:
35 *   * - matches any sequence of chars, including '/'
36 *   ? - matches any single char, including '/'
37 *   [...] - matches any of a set of chars, '-' specifies a range,
38 *        initial '!' is undefined
39 *
40 * The specification in SUSv2 is a bit incomplete, I assume the following:
41 *   Trailing '-' in [...] is not special.
42 *
43 * TODO: Figure out if there's a good way to extend this to handle
44 * Windows paths that use '\' as a path separator.  <sigh>
45 */
46
47DEFINE_TEST(test_archive_pathmatch)
48{
49	assertEqualInt(1, archive_pathmatch("a/b/c", "a/b/c", 0));
50	assertEqualInt(0, archive_pathmatch("a/b/", "a/b/c", 0));
51	assertEqualInt(0, archive_pathmatch("a/b", "a/b/c", 0));
52	assertEqualInt(0, archive_pathmatch("a/b/c", "a/b/", 0));
53	assertEqualInt(0, archive_pathmatch("a/b/c", "a/b", 0));
54
55    /* Null string and non-empty pattern returns false. */
56	assertEqualInt(0, archive_pathmatch("a/b/c", NULL, 0));
57	assertEqualInt(0, archive_pathmatch_w(L"a/b/c", NULL, 0));
58
59	/* Empty pattern only matches empty string. */
60	assertEqualInt(1, archive_pathmatch("","", 0));
61	assertEqualInt(0, archive_pathmatch("","a", 0));
62	assertEqualInt(1, archive_pathmatch("*","", 0));
63	assertEqualInt(1, archive_pathmatch("*","a", 0));
64	assertEqualInt(1, archive_pathmatch("*","abcd", 0));
65	/* SUSv2: * matches / */
66	assertEqualInt(1, archive_pathmatch("*","abcd/efgh/ijkl", 0));
67	assertEqualInt(1, archive_pathmatch("abcd*efgh/ijkl","abcd/efgh/ijkl", 0));
68	assertEqualInt(1, archive_pathmatch("abcd***efgh/ijkl","abcd/efgh/ijkl", 0));
69	assertEqualInt(1, archive_pathmatch("abcd***/efgh/ijkl","abcd/efgh/ijkl", 0));
70	assertEqualInt(0, archive_pathmatch("?", "", 0));
71	assertEqualInt(0, archive_pathmatch("?", "\0", 0));
72	assertEqualInt(1, archive_pathmatch("?", "a", 0));
73	assertEqualInt(0, archive_pathmatch("?", "ab", 0));
74	assertEqualInt(1, archive_pathmatch("?", ".", 0));
75	assertEqualInt(1, archive_pathmatch("?", "?", 0));
76	assertEqualInt(1, archive_pathmatch("a", "a", 0));
77	assertEqualInt(0, archive_pathmatch("a", "ab", 0));
78	assertEqualInt(0, archive_pathmatch("a", "ab", 0));
79	assertEqualInt(1, archive_pathmatch("a?c", "abc", 0));
80	/* SUSv2: ? matches / */
81	assertEqualInt(1, archive_pathmatch("a?c", "a/c", 0));
82	assertEqualInt(1, archive_pathmatch("a?*c*", "a/c", 0));
83	assertEqualInt(1, archive_pathmatch("*a*", "a/c", 0));
84	assertEqualInt(1, archive_pathmatch("*a*", "/a/c", 0));
85	assertEqualInt(1, archive_pathmatch("*a*", "defaaaaaaa", 0));
86	assertEqualInt(0, archive_pathmatch("a*", "defghi", 0));
87	assertEqualInt(0, archive_pathmatch("*a*", "defghi", 0));
88
89	/* Character classes */
90	assertEqualInt(1, archive_pathmatch("abc[def", "abc[def", 0));
91	assertEqualInt(0, archive_pathmatch("abc[def]", "abc[def", 0));
92	assertEqualInt(0, archive_pathmatch("abc[def", "abcd", 0));
93	assertEqualInt(1, archive_pathmatch("abc[def]", "abcd", 0));
94	assertEqualInt(1, archive_pathmatch("abc[def]", "abce", 0));
95	assertEqualInt(1, archive_pathmatch("abc[def]", "abcf", 0));
96	assertEqualInt(0, archive_pathmatch("abc[def]", "abcg", 0));
97	assertEqualInt(1, archive_pathmatch("abc[d*f]", "abcd", 0));
98	assertEqualInt(1, archive_pathmatch("abc[d*f]", "abc*", 0));
99	assertEqualInt(0, archive_pathmatch("abc[d*f]", "abcdefghi", 0));
100	assertEqualInt(0, archive_pathmatch("abc[d*", "abcdefghi", 0));
101	assertEqualInt(1, archive_pathmatch("abc[d*", "abc[defghi", 0));
102	assertEqualInt(1, archive_pathmatch("abc[d-f]", "abcd", 0));
103	assertEqualInt(1, archive_pathmatch("abc[d-f]", "abce", 0));
104	assertEqualInt(1, archive_pathmatch("abc[d-f]", "abcf", 0));
105	assertEqualInt(0, archive_pathmatch("abc[d-f]", "abcg", 0));
106	assertEqualInt(0, archive_pathmatch("abc[d-fh-k]", "abca", 0));
107	assertEqualInt(1, archive_pathmatch("abc[d-fh-k]", "abcd", 0));
108	assertEqualInt(1, archive_pathmatch("abc[d-fh-k]", "abce", 0));
109	assertEqualInt(1, archive_pathmatch("abc[d-fh-k]", "abcf", 0));
110	assertEqualInt(0, archive_pathmatch("abc[d-fh-k]", "abcg", 0));
111	assertEqualInt(1, archive_pathmatch("abc[d-fh-k]", "abch", 0));
112	assertEqualInt(1, archive_pathmatch("abc[d-fh-k]", "abci", 0));
113	assertEqualInt(1, archive_pathmatch("abc[d-fh-k]", "abcj", 0));
114	assertEqualInt(1, archive_pathmatch("abc[d-fh-k]", "abck", 0));
115	assertEqualInt(0, archive_pathmatch("abc[d-fh-k]", "abcl", 0));
116	assertEqualInt(0, archive_pathmatch("abc[d-fh-k]", "abc-", 0));
117
118	/* [] matches nothing, [!] is the same as ? */
119	assertEqualInt(0, archive_pathmatch("abc[]efg", "abcdefg", 0));
120	assertEqualInt(0, archive_pathmatch("abc[]efg", "abcqefg", 0));
121	assertEqualInt(0, archive_pathmatch("abc[]efg", "abcefg", 0));
122	assertEqualInt(1, archive_pathmatch("abc[!]efg", "abcdefg", 0));
123	assertEqualInt(1, archive_pathmatch("abc[!]efg", "abcqefg", 0));
124	assertEqualInt(0, archive_pathmatch("abc[!]efg", "abcefg", 0));
125
126	/* I assume: Trailing '-' is non-special. */
127	assertEqualInt(0, archive_pathmatch("abc[d-fh-]", "abcl", 0));
128	assertEqualInt(1, archive_pathmatch("abc[d-fh-]", "abch", 0));
129	assertEqualInt(1, archive_pathmatch("abc[d-fh-]", "abc-", 0));
130	assertEqualInt(1, archive_pathmatch("abc[d-fh-]", "abc-", 0));
131
132	/* ']' can be backslash-quoted within a character class. */
133	assertEqualInt(1, archive_pathmatch("abc[\\]]", "abc]", 0));
134	assertEqualInt(1, archive_pathmatch("abc[\\]d]", "abc]", 0));
135	assertEqualInt(1, archive_pathmatch("abc[\\]d]", "abcd", 0));
136	assertEqualInt(1, archive_pathmatch("abc[d\\]]", "abc]", 0));
137	assertEqualInt(1, archive_pathmatch("abc[d\\]]", "abcd", 0));
138	assertEqualInt(1, archive_pathmatch("abc[d]e]", "abcde]", 0));
139	assertEqualInt(1, archive_pathmatch("abc[d\\]e]", "abc]", 0));
140	assertEqualInt(0, archive_pathmatch("abc[d\\]e]", "abcd]e", 0));
141	assertEqualInt(0, archive_pathmatch("abc[d]e]", "abc]", 0));
142
143	/* backslash-quoted chars can appear as either end of a range. */
144	assertEqualInt(1, archive_pathmatch("abc[\\d-f]gh", "abcegh", 0));
145	assertEqualInt(0, archive_pathmatch("abc[\\d-f]gh", "abcggh", 0));
146	assertEqualInt(0, archive_pathmatch("abc[\\d-f]gh", "abc\\gh", 0));
147	assertEqualInt(1, archive_pathmatch("abc[d-\\f]gh", "abcegh", 0));
148	assertEqualInt(1, archive_pathmatch("abc[\\d-\\f]gh", "abcegh", 0));
149	assertEqualInt(1, archive_pathmatch("abc[\\d-\\f]gh", "abcegh", 0));
150	/* backslash-quoted '-' isn't special. */
151	assertEqualInt(0, archive_pathmatch("abc[d\\-f]gh", "abcegh", 0));
152	assertEqualInt(1, archive_pathmatch("abc[d\\-f]gh", "abc-gh", 0));
153
154	/* Leading '!' negates a character class. */
155	assertEqualInt(0, archive_pathmatch("abc[!d]", "abcd", 0));
156	assertEqualInt(1, archive_pathmatch("abc[!d]", "abce", 0));
157	assertEqualInt(1, archive_pathmatch("abc[!d]", "abcc", 0));
158	assertEqualInt(0, archive_pathmatch("abc[!d-z]", "abcq", 0));
159	assertEqualInt(1, archive_pathmatch("abc[!d-gi-z]", "abch", 0));
160	assertEqualInt(1, archive_pathmatch("abc[!fgijkl]", "abch", 0));
161	assertEqualInt(0, archive_pathmatch("abc[!fghijkl]", "abch", 0));
162
163	/* Backslash quotes next character. */
164	assertEqualInt(0, archive_pathmatch("abc\\[def]", "abc\\d", 0));
165	assertEqualInt(1, archive_pathmatch("abc\\[def]", "abc[def]", 0));
166	assertEqualInt(0, archive_pathmatch("abc\\\\[def]", "abc[def]", 0));
167	assertEqualInt(0, archive_pathmatch("abc\\\\[def]", "abc\\[def]", 0));
168	assertEqualInt(1, archive_pathmatch("abc\\\\[def]", "abc\\d", 0));
169	assertEqualInt(1, archive_pathmatch("abcd\\", "abcd\\", 0));
170	assertEqualInt(0, archive_pathmatch("abcd\\", "abcd\\[", 0));
171	assertEqualInt(0, archive_pathmatch("abcd\\", "abcde", 0));
172	assertEqualInt(0, archive_pathmatch("abcd\\[", "abcd\\", 0));
173
174	/*
175	 * Because '.' and '/' have special meanings, we can
176	 * identify many equivalent paths even if they're expressed
177	 * differently.  (But quoting a character with '\\' suppresses
178	 * special meanings!)
179	 */
180	assertEqualInt(0, archive_pathmatch("a/b/", "a/bc", 0));
181	assertEqualInt(1, archive_pathmatch("a/./b", "a/b", 0));
182	assertEqualInt(0, archive_pathmatch("a\\/./b", "a/b", 0));
183	assertEqualInt(0, archive_pathmatch("a/\\./b", "a/b", 0));
184	assertEqualInt(0, archive_pathmatch("a/.\\/b", "a/b", 0));
185	assertEqualInt(0, archive_pathmatch("a\\/\\.\\/b", "a/b", 0));
186	assertEqualInt(1, archive_pathmatch("./abc/./def/", "abc/def/", 0));
187	assertEqualInt(1, archive_pathmatch("abc/def", "./././abc/./def", 0));
188	assertEqualInt(1, archive_pathmatch("abc/def/././//", "./././abc/./def/", 0));
189	assertEqualInt(1, archive_pathmatch(".////abc/.//def", "./././abc/./def", 0));
190	assertEqualInt(1, archive_pathmatch("./abc?def/", "abc/def/", 0));
191	failure("\"?./\" is not the same as \"/./\"");
192	assertEqualInt(0, archive_pathmatch("./abc?./def/", "abc/def/", 0));
193	failure("Trailing '/' should match no trailing '/'");
194	assertEqualInt(1, archive_pathmatch("./abc/./def/", "abc/def", 0));
195	failure("Trailing '/./' is still the same directory.");
196	assertEqualInt(1, archive_pathmatch("./abc/./def/./", "abc/def", 0));
197	failure("Trailing '/.' is still the same directory.");
198	assertEqualInt(1, archive_pathmatch("./abc/./def/.", "abc/def", 0));
199	assertEqualInt(1, archive_pathmatch("./abc/./def", "abc/def/", 0));
200	failure("Trailing '/./' is still the same directory.");
201	assertEqualInt(1, archive_pathmatch("./abc/./def", "abc/def/./", 0));
202	failure("Trailing '/.' is still the same directory.");
203	assertEqualInt(1, archive_pathmatch("./abc*/./def", "abc/def/.", 0));
204
205	/* Matches not anchored at beginning. */
206	assertEqualInt(0,
207	    archive_pathmatch("bcd", "abcd", PATHMATCH_NO_ANCHOR_START));
208	assertEqualInt(1,
209	    archive_pathmatch("abcd", "abcd", PATHMATCH_NO_ANCHOR_START));
210	assertEqualInt(0,
211	    archive_pathmatch("^bcd", "abcd", PATHMATCH_NO_ANCHOR_START));
212	assertEqualInt(1,
213	    archive_pathmatch("b/c/d", "a/b/c/d", PATHMATCH_NO_ANCHOR_START));
214	assertEqualInt(0,
215	    archive_pathmatch("^b/c/d", "a/b/c/d", PATHMATCH_NO_ANCHOR_START));
216	assertEqualInt(0,
217	    archive_pathmatch("/b/c/d", "a/b/c/d", PATHMATCH_NO_ANCHOR_START));
218	assertEqualInt(0,
219	    archive_pathmatch("a/b/c", "a/b/c/d", PATHMATCH_NO_ANCHOR_START));
220	assertEqualInt(1,
221	    archive_pathmatch("a/b/c/d", "a/b/c/d", PATHMATCH_NO_ANCHOR_START));
222	assertEqualInt(0,
223	    archive_pathmatch("b/c", "a/b/c/d", PATHMATCH_NO_ANCHOR_START));
224	assertEqualInt(0,
225	    archive_pathmatch("^b/c", "a/b/c/d", PATHMATCH_NO_ANCHOR_START));
226
227
228	assertEqualInt(1,
229	    archive_pathmatch("b/c/d", "a/b/c/d", PATHMATCH_NO_ANCHOR_START));
230	assertEqualInt(1,
231	    archive_pathmatch("b/c/d", "/a/b/c/d", PATHMATCH_NO_ANCHOR_START));
232
233
234	/* Matches not anchored at end. */
235	assertEqualInt(0,
236	    archive_pathmatch("bcd", "abcd", PATHMATCH_NO_ANCHOR_END));
237	assertEqualInt(1,
238	    archive_pathmatch("abcd", "abcd", PATHMATCH_NO_ANCHOR_END));
239	assertEqualInt(1,
240	    archive_pathmatch("abcd", "abcd/", PATHMATCH_NO_ANCHOR_END));
241	assertEqualInt(1,
242	    archive_pathmatch("abcd", "abcd/.", PATHMATCH_NO_ANCHOR_END));
243	assertEqualInt(0,
244	    archive_pathmatch("abc", "abcd", PATHMATCH_NO_ANCHOR_END));
245	assertEqualInt(1,
246	    archive_pathmatch("a/b/c", "a/b/c/d", PATHMATCH_NO_ANCHOR_END));
247	assertEqualInt(0,
248	    archive_pathmatch("a/b/c$", "a/b/c/d", PATHMATCH_NO_ANCHOR_END));
249	assertEqualInt(1,
250	    archive_pathmatch("a/b/c$", "a/b/c", PATHMATCH_NO_ANCHOR_END));
251	assertEqualInt(1,
252	    archive_pathmatch("a/b/c$", "a/b/c/", PATHMATCH_NO_ANCHOR_END));
253	assertEqualInt(1,
254	    archive_pathmatch("a/b/c/", "a/b/c/d", PATHMATCH_NO_ANCHOR_END));
255	assertEqualInt(0,
256	    archive_pathmatch("a/b/c/$", "a/b/c/d", PATHMATCH_NO_ANCHOR_END));
257	assertEqualInt(1,
258	    archive_pathmatch("a/b/c/$", "a/b/c/", PATHMATCH_NO_ANCHOR_END));
259	assertEqualInt(1,
260	    archive_pathmatch("a/b/c/$", "a/b/c", PATHMATCH_NO_ANCHOR_END));
261	assertEqualInt(0,
262	    archive_pathmatch("b/c", "a/b/c/d", PATHMATCH_NO_ANCHOR_END));
263
264	/* Matches not anchored at either end. */
265	assertEqualInt(1,
266	    archive_pathmatch("b/c", "a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
267	assertEqualInt(0,
268	    archive_pathmatch("/b/c", "a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
269	assertEqualInt(0,
270	    archive_pathmatch("/a/b/c", "a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
271	assertEqualInt(1,
272	    archive_pathmatch("/a/b/c", "/a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
273	assertEqualInt(0,
274	    archive_pathmatch("/a/b/c$", "a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
275	assertEqualInt(0,
276	    archive_pathmatch("/a/b/c/d$", "a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
277	assertEqualInt(0,
278	    archive_pathmatch("/a/b/c/d$", "/a/b/c/d/e", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
279	assertEqualInt(1,
280	    archive_pathmatch("/a/b/c/d$", "/a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
281	assertEqualInt(1,
282	    archive_pathmatch("^a/b/c", "a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
283	assertEqualInt(0,
284	    archive_pathmatch("^a/b/c$", "a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
285	assertEqualInt(0,
286	    archive_pathmatch("a/b/c$", "a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
287	assertEqualInt(1,
288	    archive_pathmatch("b/c/d$", "a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
289}
290