1235267Sgabor/*-
2251245Sgabor * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
3235267Sgabor * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
4235267Sgabor * All rights reserved.
5235267Sgabor *
6235267Sgabor * Redistribution and use in source and binary forms, with or without
7235267Sgabor * modification, are permitted provided that the following conditions
8235267Sgabor * are met:
9235267Sgabor * 1. Redistributions of source code must retain the above copyright
10235267Sgabor *    notice, this list of conditions and the following disclaimer.
11235267Sgabor * 2. Redistributions in binary form must reproduce the above copyright
12235267Sgabor *    notice, this list of conditions and the following disclaimer in the
13235267Sgabor *    documentation and/or other materials provided with the distribution.
14235267Sgabor *
15235267Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16235267Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17235267Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18235267Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19235267Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20235267Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21235267Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22235267Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23235267Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24235267Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25235267Sgabor * SUCH DAMAGE.
26235267Sgabor */
27235267Sgabor
28235267Sgabor#include <sys/cdefs.h>
29235267Sgabor__FBSDID("$FreeBSD$");
30235267Sgabor
31235267Sgabor#include <sys/types.h>
32235267Sgabor
33235267Sgabor#include <ctype.h>
34235267Sgabor#include <stdlib.h>
35235267Sgabor#include <string.h>
36235267Sgabor
37235267Sgabor#include "sort.h"
38235267Sgabor#include "vsort.h"
39235267Sgabor
40235267Sgaborstatic inline bool
41235267Sgaborisdigit_clocale(wchar_t c)
42235267Sgabor{
43235267Sgabor
44235267Sgabor	return (c >= L'0' && c <= L'9');
45235267Sgabor}
46235267Sgabor
47235267Sgaborstatic inline bool
48235267Sgaborisalpha_clocale(wchar_t c)
49235267Sgabor{
50235267Sgabor
51235267Sgabor	return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z'));
52235267Sgabor}
53235267Sgabor
54235267Sgaborstatic inline bool
55235267Sgaborisalnum_clocale(wchar_t c)
56235267Sgabor{
57235267Sgabor
58235267Sgabor	return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') ||
59235267Sgabor	    (c >= L'0' && c <= L'9'));
60235267Sgabor}
61235267Sgabor
62235267Sgabor/*
63235267Sgabor * Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$
64235267Sgabor * Set length of string before suffix.
65235267Sgabor */
66235267Sgaborstatic void
67235267Sgaborfind_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len)
68235267Sgabor{
69235267Sgabor	wchar_t c;
70235267Sgabor	size_t clen;
71235267Sgabor	bool expect_alpha, sfx;
72235267Sgabor
73235267Sgabor	sfx = false;
74235267Sgabor	expect_alpha = false;
75235267Sgabor	*len = 0;
76235267Sgabor	clen = 0;
77235267Sgabor
78235267Sgabor	while ((si < se) && (c = bws_get_iter_value(si))) {
79235267Sgabor		if (expect_alpha) {
80235267Sgabor			expect_alpha = false;
81235267Sgabor			if (!isalpha_clocale(c) && (c != L'~'))
82235267Sgabor				sfx = false;
83235267Sgabor		} else if (c == L'.') {
84235267Sgabor			expect_alpha = true;
85235267Sgabor			if (!sfx) {
86235267Sgabor				sfx = true;
87235267Sgabor				*len = clen;
88235267Sgabor			}
89235267Sgabor		} else if (!isalnum_clocale(c) && (c != L'~'))
90235267Sgabor			sfx = false;
91235267Sgabor
92235267Sgabor		si = bws_iterator_inc(si, 1);
93235267Sgabor		++clen;
94235267Sgabor	}
95235267Sgabor
96235267Sgabor	/* This code must be here to make the implementation compatible
97235267Sgabor	 * with WORDING of GNU sort documentation.
98235267Sgabor	 * But the GNU sort implementation is not following its own
99235267Sgabor	 * documentation.  GNU sort allows empty file extensions
100235267Sgabor	 * (just dot with nothing after); but the regular expression in
101235267Sgabor	 * their documentation does not allow empty file extensions.
102235267Sgabor	 * We chose to make our implementation compatible with GNU sort
103235267Sgabor	 * implementation.  If they will ever fix their bug, this code
104235267Sgabor	 * must be uncommented. Or they may choose to fix the info page,
105235267Sgabor	 * then the code stays commented.
106235267Sgabor	 *
107235267Sgabor	 if (expect_alpha)
108235267Sgabor	 	sfx = false;
109235267Sgabor	 */
110235267Sgabor
111235267Sgabor	if (!sfx)
112235267Sgabor		*len = clen;
113235267Sgabor}
114235267Sgabor
115235267Sgaborstatic inline int
116235267Sgaborcmp_chars(wchar_t c1, wchar_t c2)
117235267Sgabor{
118235267Sgabor
119235267Sgabor	if (c1 == c2)
120235267Sgabor		return (0);
121235267Sgabor
122235267Sgabor	if (c1 == L'~')
123235267Sgabor		return (-1);
124235267Sgabor	if (c2 == L'~')
125235267Sgabor		return (+1);
126235267Sgabor
127235267Sgabor	if (isdigit_clocale(c1) || !c1)
128235267Sgabor		return ((isdigit_clocale(c2) || !c2) ? 0 : -1);
129235267Sgabor
130235267Sgabor	if (isdigit_clocale(c2) || !c2)
131235267Sgabor		return (+1);
132235267Sgabor
133235267Sgabor	if (isalpha_clocale(c1))
134235267Sgabor		return ((isalpha_clocale(c2)) ? ((int) c1 - (int) c2) : -1);
135235267Sgabor
136235267Sgabor	if (isalpha_clocale(c2))
137235267Sgabor		return (+1);
138235267Sgabor
139235267Sgabor	return ((int) c1 - (int) c2);
140235267Sgabor}
141235267Sgabor
142235267Sgaborstatic int
143235267Sgaborcmpversions(bwstring_iterator si1, bwstring_iterator se1,
144235267Sgabor    bwstring_iterator si2, bwstring_iterator se2)
145235267Sgabor{
146235267Sgabor	int cmp, diff;
147235267Sgabor
148235267Sgabor	while ((si1 < se1) || (si2 < se2)) {
149235267Sgabor		diff = 0;
150235267Sgabor
151235267Sgabor		while (((si1 < se1) &&
152235267Sgabor		    !isdigit_clocale(bws_get_iter_value(si1))) ||
153235267Sgabor		    ((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) {
154235267Sgabor			wchar_t c1, c2;
155235267Sgabor
156235267Sgabor			c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0;
157235267Sgabor			c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0;
158235267Sgabor
159235267Sgabor			cmp = cmp_chars(c1, c2);
160235267Sgabor			if (cmp)
161235267Sgabor				return (cmp);
162235267Sgabor
163235267Sgabor			if (si1 < se1)
164235267Sgabor				si1 = bws_iterator_inc(si1, 1);
165235267Sgabor			if (si2 < se2)
166235267Sgabor				si2 = bws_iterator_inc(si2, 1);
167235267Sgabor		}
168235267Sgabor
169235267Sgabor		while (bws_get_iter_value(si1) == L'0')
170235267Sgabor			si1 = bws_iterator_inc(si1, 1);
171235267Sgabor
172235267Sgabor		while (bws_get_iter_value(si2) == L'0')
173235267Sgabor			si2 = bws_iterator_inc(si2, 1);
174235267Sgabor
175235267Sgabor		while (isdigit_clocale(bws_get_iter_value(si1)) &&
176235267Sgabor		    isdigit_clocale(bws_get_iter_value(si2))) {
177235267Sgabor			if (!diff)
178235267Sgabor				diff = ((int)bws_get_iter_value(si1) -
179235267Sgabor				    (int)bws_get_iter_value(si2));
180235267Sgabor			si1 = bws_iterator_inc(si1, 1);
181235267Sgabor			si2 = bws_iterator_inc(si2, 1);
182235267Sgabor		}
183235267Sgabor
184235267Sgabor		if (isdigit_clocale(bws_get_iter_value(si1)))
185235267Sgabor			return (1);
186235267Sgabor
187235267Sgabor		if (isdigit_clocale(bws_get_iter_value(si2)))
188235267Sgabor			return (-1);
189235267Sgabor
190235267Sgabor		if (diff)
191235267Sgabor			return (diff);
192235267Sgabor	}
193235267Sgabor
194235267Sgabor	return (0);
195235267Sgabor}
196235267Sgabor
197235267Sgabor/*
198235267Sgabor * Compare two version strings
199235267Sgabor */
200235267Sgaborint
201235267Sgaborvcmp(struct bwstring *s1, struct bwstring *s2)
202235267Sgabor{
203235267Sgabor	bwstring_iterator si1, si2;
204235267Sgabor	wchar_t c1, c2;
205235267Sgabor	size_t len1, len2, slen1, slen2;
206235267Sgabor	int cmp_bytes, cmp_res;
207235267Sgabor
208235267Sgabor	if (s1 == s2)
209235267Sgabor		return (0);
210235267Sgabor
211235267Sgabor	cmp_bytes = bwscmp(s1, s2, 0);
212235267Sgabor	if (cmp_bytes == 0)
213235267Sgabor		return (0);
214235267Sgabor
215235267Sgabor	len1 = slen1 = BWSLEN(s1);
216235267Sgabor	len2 = slen2 = BWSLEN(s2);
217235267Sgabor
218235267Sgabor	if (slen1 < 1)
219235267Sgabor		return (-1);
220235267Sgabor	if (slen2 < 1)
221235267Sgabor		return (+1);
222235267Sgabor
223235267Sgabor	si1 = bws_begin(s1);
224235267Sgabor	si2 = bws_begin(s2);
225235267Sgabor
226235267Sgabor	c1 = bws_get_iter_value(si1);
227235267Sgabor	c2 = bws_get_iter_value(si2);
228235267Sgabor
229235267Sgabor	if (c1 == L'.' && (slen1 == 1))
230235267Sgabor		return (-1);
231235267Sgabor
232235267Sgabor	if (c2 == L'.' && (slen2 == 1))
233235267Sgabor		return (+1);
234235267Sgabor
235235267Sgabor	if (slen1 == 2 && c1 == L'.' &&
236235267Sgabor	    bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.')
237235267Sgabor		return (-1);
238235267Sgabor	if (slen2 == 2 && c2 == L'.' &&
239235267Sgabor	    bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.')
240235267Sgabor		return (+1);
241235267Sgabor
242235267Sgabor	if (c1 == L'.' && c2 != L'.')
243235267Sgabor		return (-1);
244235267Sgabor	if (c1 != L'.' && c2 == L'.')
245235267Sgabor		return (+1);
246235267Sgabor
247235267Sgabor	if (c1 == L'.' && c2 == L'.') {
248235267Sgabor		si1 = bws_iterator_inc(si1, 1);
249235267Sgabor		si2 = bws_iterator_inc(si2, 1);
250235267Sgabor	}
251235267Sgabor
252235267Sgabor	find_suffix(si1, bws_end(s1), &len1);
253235267Sgabor	find_suffix(si2, bws_end(s2), &len2);
254235267Sgabor
255235267Sgabor	if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0))
256235267Sgabor		return (cmp_bytes);
257235267Sgabor
258235267Sgabor	cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2,
259235267Sgabor	    bws_iterator_inc(si2, len2));
260235267Sgabor
261235267Sgabor	if (cmp_res == 0)
262235267Sgabor	  cmp_res = cmp_bytes;
263235267Sgabor
264235267Sgabor	return (cmp_res);
265235267Sgabor}
266