1235267Sgabor/*- 2251245Sgabor * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 3235267Sgabor * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org> 4235267Sgabor * All rights reserved. 5235267Sgabor * 6235267Sgabor * Redistribution and use in source and binary forms, with or without 7235267Sgabor * modification, are permitted provided that the following conditions 8235267Sgabor * are met: 9235267Sgabor * 1. Redistributions of source code must retain the above copyright 10235267Sgabor * notice, this list of conditions and the following disclaimer. 11235267Sgabor * 2. Redistributions in binary form must reproduce the above copyright 12235267Sgabor * notice, this list of conditions and the following disclaimer in the 13235267Sgabor * documentation and/or other materials provided with the distribution. 14235267Sgabor * 15235267Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16235267Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17235267Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18235267Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19235267Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20235267Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21235267Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22235267Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23235267Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24235267Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25235267Sgabor * SUCH DAMAGE. 26235267Sgabor */ 27235267Sgabor 28235267Sgabor#include <sys/cdefs.h> 29235267Sgabor__FBSDID("$FreeBSD$"); 30235267Sgabor 31235267Sgabor#include <sys/types.h> 32235267Sgabor 33235267Sgabor#include <ctype.h> 34235267Sgabor#include <stdlib.h> 35235267Sgabor#include <string.h> 36235267Sgabor 37235267Sgabor#include "sort.h" 38235267Sgabor#include "vsort.h" 39235267Sgabor 40235267Sgaborstatic inline bool 41235267Sgaborisdigit_clocale(wchar_t c) 42235267Sgabor{ 43235267Sgabor 44235267Sgabor return (c >= L'0' && c <= L'9'); 45235267Sgabor} 46235267Sgabor 47235267Sgaborstatic inline bool 48235267Sgaborisalpha_clocale(wchar_t c) 49235267Sgabor{ 50235267Sgabor 51235267Sgabor return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z')); 52235267Sgabor} 53235267Sgabor 54235267Sgaborstatic inline bool 55235267Sgaborisalnum_clocale(wchar_t c) 56235267Sgabor{ 57235267Sgabor 58235267Sgabor return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') || 59235267Sgabor (c >= L'0' && c <= L'9')); 60235267Sgabor} 61235267Sgabor 62235267Sgabor/* 63235267Sgabor * Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$ 64235267Sgabor * Set length of string before suffix. 65235267Sgabor */ 66235267Sgaborstatic void 67235267Sgaborfind_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len) 68235267Sgabor{ 69235267Sgabor wchar_t c; 70235267Sgabor size_t clen; 71235267Sgabor bool expect_alpha, sfx; 72235267Sgabor 73235267Sgabor sfx = false; 74235267Sgabor expect_alpha = false; 75235267Sgabor *len = 0; 76235267Sgabor clen = 0; 77235267Sgabor 78235267Sgabor while ((si < se) && (c = bws_get_iter_value(si))) { 79235267Sgabor if (expect_alpha) { 80235267Sgabor expect_alpha = false; 81235267Sgabor if (!isalpha_clocale(c) && (c != L'~')) 82235267Sgabor sfx = false; 83235267Sgabor } else if (c == L'.') { 84235267Sgabor expect_alpha = true; 85235267Sgabor if (!sfx) { 86235267Sgabor sfx = true; 87235267Sgabor *len = clen; 88235267Sgabor } 89235267Sgabor } else if (!isalnum_clocale(c) && (c != L'~')) 90235267Sgabor sfx = false; 91235267Sgabor 92235267Sgabor si = bws_iterator_inc(si, 1); 93235267Sgabor ++clen; 94235267Sgabor } 95235267Sgabor 96235267Sgabor /* This code must be here to make the implementation compatible 97235267Sgabor * with WORDING of GNU sort documentation. 98235267Sgabor * But the GNU sort implementation is not following its own 99235267Sgabor * documentation. GNU sort allows empty file extensions 100235267Sgabor * (just dot with nothing after); but the regular expression in 101235267Sgabor * their documentation does not allow empty file extensions. 102235267Sgabor * We chose to make our implementation compatible with GNU sort 103235267Sgabor * implementation. If they will ever fix their bug, this code 104235267Sgabor * must be uncommented. Or they may choose to fix the info page, 105235267Sgabor * then the code stays commented. 106235267Sgabor * 107235267Sgabor if (expect_alpha) 108235267Sgabor sfx = false; 109235267Sgabor */ 110235267Sgabor 111235267Sgabor if (!sfx) 112235267Sgabor *len = clen; 113235267Sgabor} 114235267Sgabor 115235267Sgaborstatic inline int 116235267Sgaborcmp_chars(wchar_t c1, wchar_t c2) 117235267Sgabor{ 118235267Sgabor 119235267Sgabor if (c1 == c2) 120235267Sgabor return (0); 121235267Sgabor 122235267Sgabor if (c1 == L'~') 123235267Sgabor return (-1); 124235267Sgabor if (c2 == L'~') 125235267Sgabor return (+1); 126235267Sgabor 127235267Sgabor if (isdigit_clocale(c1) || !c1) 128235267Sgabor return ((isdigit_clocale(c2) || !c2) ? 0 : -1); 129235267Sgabor 130235267Sgabor if (isdigit_clocale(c2) || !c2) 131235267Sgabor return (+1); 132235267Sgabor 133235267Sgabor if (isalpha_clocale(c1)) 134235267Sgabor return ((isalpha_clocale(c2)) ? ((int) c1 - (int) c2) : -1); 135235267Sgabor 136235267Sgabor if (isalpha_clocale(c2)) 137235267Sgabor return (+1); 138235267Sgabor 139235267Sgabor return ((int) c1 - (int) c2); 140235267Sgabor} 141235267Sgabor 142235267Sgaborstatic int 143235267Sgaborcmpversions(bwstring_iterator si1, bwstring_iterator se1, 144235267Sgabor bwstring_iterator si2, bwstring_iterator se2) 145235267Sgabor{ 146235267Sgabor int cmp, diff; 147235267Sgabor 148235267Sgabor while ((si1 < se1) || (si2 < se2)) { 149235267Sgabor diff = 0; 150235267Sgabor 151235267Sgabor while (((si1 < se1) && 152235267Sgabor !isdigit_clocale(bws_get_iter_value(si1))) || 153235267Sgabor ((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) { 154235267Sgabor wchar_t c1, c2; 155235267Sgabor 156235267Sgabor c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0; 157235267Sgabor c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0; 158235267Sgabor 159235267Sgabor cmp = cmp_chars(c1, c2); 160235267Sgabor if (cmp) 161235267Sgabor return (cmp); 162235267Sgabor 163235267Sgabor if (si1 < se1) 164235267Sgabor si1 = bws_iterator_inc(si1, 1); 165235267Sgabor if (si2 < se2) 166235267Sgabor si2 = bws_iterator_inc(si2, 1); 167235267Sgabor } 168235267Sgabor 169235267Sgabor while (bws_get_iter_value(si1) == L'0') 170235267Sgabor si1 = bws_iterator_inc(si1, 1); 171235267Sgabor 172235267Sgabor while (bws_get_iter_value(si2) == L'0') 173235267Sgabor si2 = bws_iterator_inc(si2, 1); 174235267Sgabor 175235267Sgabor while (isdigit_clocale(bws_get_iter_value(si1)) && 176235267Sgabor isdigit_clocale(bws_get_iter_value(si2))) { 177235267Sgabor if (!diff) 178235267Sgabor diff = ((int)bws_get_iter_value(si1) - 179235267Sgabor (int)bws_get_iter_value(si2)); 180235267Sgabor si1 = bws_iterator_inc(si1, 1); 181235267Sgabor si2 = bws_iterator_inc(si2, 1); 182235267Sgabor } 183235267Sgabor 184235267Sgabor if (isdigit_clocale(bws_get_iter_value(si1))) 185235267Sgabor return (1); 186235267Sgabor 187235267Sgabor if (isdigit_clocale(bws_get_iter_value(si2))) 188235267Sgabor return (-1); 189235267Sgabor 190235267Sgabor if (diff) 191235267Sgabor return (diff); 192235267Sgabor } 193235267Sgabor 194235267Sgabor return (0); 195235267Sgabor} 196235267Sgabor 197235267Sgabor/* 198235267Sgabor * Compare two version strings 199235267Sgabor */ 200235267Sgaborint 201235267Sgaborvcmp(struct bwstring *s1, struct bwstring *s2) 202235267Sgabor{ 203235267Sgabor bwstring_iterator si1, si2; 204235267Sgabor wchar_t c1, c2; 205235267Sgabor size_t len1, len2, slen1, slen2; 206235267Sgabor int cmp_bytes, cmp_res; 207235267Sgabor 208235267Sgabor if (s1 == s2) 209235267Sgabor return (0); 210235267Sgabor 211235267Sgabor cmp_bytes = bwscmp(s1, s2, 0); 212235267Sgabor if (cmp_bytes == 0) 213235267Sgabor return (0); 214235267Sgabor 215235267Sgabor len1 = slen1 = BWSLEN(s1); 216235267Sgabor len2 = slen2 = BWSLEN(s2); 217235267Sgabor 218235267Sgabor if (slen1 < 1) 219235267Sgabor return (-1); 220235267Sgabor if (slen2 < 1) 221235267Sgabor return (+1); 222235267Sgabor 223235267Sgabor si1 = bws_begin(s1); 224235267Sgabor si2 = bws_begin(s2); 225235267Sgabor 226235267Sgabor c1 = bws_get_iter_value(si1); 227235267Sgabor c2 = bws_get_iter_value(si2); 228235267Sgabor 229235267Sgabor if (c1 == L'.' && (slen1 == 1)) 230235267Sgabor return (-1); 231235267Sgabor 232235267Sgabor if (c2 == L'.' && (slen2 == 1)) 233235267Sgabor return (+1); 234235267Sgabor 235235267Sgabor if (slen1 == 2 && c1 == L'.' && 236235267Sgabor bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.') 237235267Sgabor return (-1); 238235267Sgabor if (slen2 == 2 && c2 == L'.' && 239235267Sgabor bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.') 240235267Sgabor return (+1); 241235267Sgabor 242235267Sgabor if (c1 == L'.' && c2 != L'.') 243235267Sgabor return (-1); 244235267Sgabor if (c1 != L'.' && c2 == L'.') 245235267Sgabor return (+1); 246235267Sgabor 247235267Sgabor if (c1 == L'.' && c2 == L'.') { 248235267Sgabor si1 = bws_iterator_inc(si1, 1); 249235267Sgabor si2 = bws_iterator_inc(si2, 1); 250235267Sgabor } 251235267Sgabor 252235267Sgabor find_suffix(si1, bws_end(s1), &len1); 253235267Sgabor find_suffix(si2, bws_end(s2), &len2); 254235267Sgabor 255235267Sgabor if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0)) 256235267Sgabor return (cmp_bytes); 257235267Sgabor 258235267Sgabor cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2, 259235267Sgabor bws_iterator_inc(si2, len2)); 260235267Sgabor 261235267Sgabor if (cmp_res == 0) 262235267Sgabor cmp_res = cmp_bytes; 263235267Sgabor 264235267Sgabor return (cmp_res); 265235267Sgabor} 266