coll.c revision 235435
1/*-
2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3 * Copyright (C) 2012 Oleg Moskalenko <oleg.moskalenko@citrix.com>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/usr.bin/sort/coll.c 235435 2012-05-14 10:06:49Z gabor $");
30
31#include <sys/types.h>
32
33#include <errno.h>
34#include <err.h>
35#include <langinfo.h>
36#include <limits.h>
37#include <math.h>
38#include <md5.h>
39#include <stdlib.h>
40#include <string.h>
41#include <wchar.h>
42#include <wctype.h>
43
44#include "coll.h"
45#include "vsort.h"
46
47struct key_specs *keys;
48size_t keys_num = 0;
49
50wchar_t symbol_decimal_point = L'.';
51/* there is no default thousands separator in collate rules: */
52wchar_t symbol_thousands_sep = 0;
53wchar_t symbol_negative_sign = L'-';
54wchar_t symbol_positive_sign = L'+';
55
56static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
57static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
58static int monthcoll(struct key_value*, struct key_value *, size_t offset);
59static int numcoll(struct key_value*, struct key_value *, size_t offset);
60static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
61static int randomcoll(struct key_value*, struct key_value *, size_t offset);
62static int versioncoll(struct key_value*, struct key_value *, size_t offset);
63
64/*
65 * Allocate keys array
66 */
67struct keys_array *
68keys_array_alloc(void)
69{
70	struct keys_array *ka;
71	size_t sz;
72
73	sz = keys_array_size();
74	ka = sort_malloc(sz);
75	memset(ka, 0, sz);
76
77	return (ka);
78}
79
80/*
81 * Calculate whether we need key hint space
82 */
83static size_t
84key_hint_size(void)
85{
86
87	return (need_hint ? sizeof(struct key_hint) : 0);
88}
89
90/*
91 * Calculate keys array size
92 */
93size_t
94keys_array_size(void)
95{
96
97	return (keys_num * (sizeof(struct key_value) + key_hint_size()));
98}
99
100/*
101 * Clean data of keys array
102 */
103void
104clean_keys_array(const struct bwstring *s, struct keys_array *ka)
105{
106
107	if (ka) {
108		for (size_t i = 0; i < keys_num; ++i)
109			if (ka->key[i].k && ka->key[i].k != s)
110				bwsfree(ka->key[i].k);
111		memset(ka, 0, keys_array_size());
112	}
113}
114
115/*
116 * Set value of a key in the keys set
117 */
118void
119set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
120{
121
122	if (ka && keys_num > ind) {
123		struct key_value *kv;
124
125		kv = &(ka->key[ind]);
126
127		if (kv->k && kv->k != s)
128			bwsfree(kv->k);
129		kv->k = s;
130	}
131}
132
133/*
134 * Initialize a sort list item
135 */
136struct sort_list_item *
137sort_list_item_alloc(void)
138{
139	struct sort_list_item *si;
140	size_t sz;
141
142	sz = sizeof(struct sort_list_item) + keys_array_size();
143	si = sort_malloc(sz);
144	memset(si, 0, sz);
145
146	return (si);
147}
148
149size_t
150sort_list_item_size(struct sort_list_item *si)
151{
152	size_t ret = 0;
153
154	if (si) {
155		ret = sizeof(struct sort_list_item) + keys_array_size();
156		if (si->str)
157			ret += bws_memsize(si->str);
158		for (size_t i = 0; i < keys_num; ++i) {
159			struct key_value *kv;
160
161			kv = &(si->ka.key[i]);
162
163			if (kv->k != si->str)
164				ret += bws_memsize(kv->k);
165		}
166	}
167	return (ret);
168}
169
170/*
171 * Calculate key for a sort list item
172 */
173static void
174sort_list_item_make_key(struct sort_list_item *si)
175{
176
177	preproc(si->str, &(si->ka));
178}
179
180/*
181 * Set value of a sort list item.
182 * Return combined string and keys memory size.
183 */
184void
185sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
186{
187
188	if (si) {
189		clean_keys_array(si->str, &(si->ka));
190		if (si->str) {
191			if (si->str == str) {
192				/* we are trying to reset the same string */
193				return;
194			} else {
195				bwsfree(si->str);
196				si->str = NULL;
197			}
198		}
199		si->str = str;
200		sort_list_item_make_key(si);
201	}
202}
203
204/*
205 * De-allocate a sort list item object memory
206 */
207void
208sort_list_item_clean(struct sort_list_item *si)
209{
210
211	if (si) {
212		clean_keys_array(si->str, &(si->ka));
213		if (si->str) {
214			bwsfree(si->str);
215			si->str = NULL;
216		}
217	}
218}
219
220/*
221 * Skip columns according to specs
222 */
223static size_t
224skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
225    bool skip_blanks, bool *empty_key)
226{
227	if (cols < 1)
228		return (BWSLEN(s) + 1);
229
230	if (skip_blanks)
231		while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
232			++start;
233
234	while (start < BWSLEN(s) && cols > 1) {
235		--cols;
236		++start;
237	}
238
239	if (start >= BWSLEN(s))
240		*empty_key = true;
241
242	return (start);
243}
244
245/*
246 * Skip fields according to specs
247 */
248static size_t
249skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
250{
251
252	if (fields < 2) {
253		if (BWSLEN(s) == 0)
254			*empty_field = true;
255		return (0);
256	} else if (!(sort_opts_vals.tflag)) {
257		size_t cpos = 0;
258		bool pb = true;
259
260		while (cpos < BWSLEN(s)) {
261			bool isblank;
262
263			isblank = iswblank(BWS_GET(s,cpos));
264
265			if (isblank && !pb) {
266				--fields;
267				if (fields <= 1)
268					return (cpos);
269			}
270			pb = isblank;
271			++cpos;
272		}
273		if (fields > 1)
274			*empty_field = true;
275		return (cpos);
276	} else {
277		size_t cpos = 0;
278
279		while (cpos < BWSLEN(s)) {
280			if (BWS_GET(s,cpos) == sort_opts_vals.field_sep) {
281				--fields;
282				if (fields <= 1)
283					return (cpos + 1);
284			}
285			++cpos;
286		}
287		if (fields > 1)
288			*empty_field = true;
289		return (cpos);
290	}
291}
292
293/*
294 * Find fields start
295 */
296static void
297find_field_start(const struct bwstring *s, struct key_specs *ks,
298    size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
299{
300
301	*field_start = skip_fields_to_start(s, ks->f1, empty_field);
302	if (!*empty_field)
303		*key_start = skip_cols_to_start(s, ks->c1, *field_start,
304		    ks->pos1b, empty_key);
305	else
306		*empty_key = true;
307}
308
309/*
310 * Find end key position
311 */
312static size_t
313find_field_end(const struct bwstring *s, struct key_specs *ks)
314{
315	size_t f2, next_field_start, pos_end;
316	bool empty_field, empty_key;
317
318	pos_end = 0;
319	next_field_start = 0;
320	empty_field = false;
321	empty_key = false;
322	f2 = ks->f2;
323
324	if (f2 == 0)
325		return (BWSLEN(s) + 1);
326	else {
327		if (ks->c2 == 0) {
328			next_field_start = skip_fields_to_start(s, f2 + 1,
329			    &empty_field);
330			if ((next_field_start > 0) && sort_opts_vals.tflag &&
331			    (sort_opts_vals.field_sep == BWS_GET(s,
332			    next_field_start - 1)))
333				--next_field_start;
334		} else
335			next_field_start = skip_fields_to_start(s, f2,
336			    &empty_field);
337	}
338
339	if (empty_field || (next_field_start >= BWSLEN(s)))
340		return (BWSLEN(s) + 1);
341
342	if (ks->c2) {
343		pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
344		    ks->pos2b, &empty_key);
345		if (pos_end < BWSLEN(s))
346			++pos_end;
347	} else
348		pos_end = next_field_start;
349
350	return (pos_end);
351}
352
353/*
354 * Cut a field according to the key specs
355 */
356static struct bwstring *
357cut_field(const struct bwstring *s, struct key_specs *ks)
358{
359	struct bwstring *ret = NULL;
360
361	if (s && ks) {
362		size_t field_start, key_end, key_start, sz;
363		bool empty_field, empty_key;
364
365		field_start = 0;
366		key_start = 0;
367		empty_field = false;
368		empty_key = false;
369
370		find_field_start(s, ks, &field_start, &key_start,
371		    &empty_field, &empty_key);
372
373		if (empty_key)
374			sz = 0;
375		else {
376			key_end = find_field_end(s, ks);
377			sz = (key_end < key_start) ? 0 : (key_end - key_start);
378		}
379
380		ret = bwsalloc(sz);
381		if (sz)
382			bwsnocpy(ret, s, key_start, sz);
383	} else
384		ret = bwsalloc(0);
385
386	return (ret);
387}
388
389/*
390 * Preprocesses a line applying the necessary transformations
391 * specified by command line options and returns the preprocessed
392 * string, which can be used to compare.
393 */
394int
395preproc(struct bwstring *s, struct keys_array *ka)
396{
397
398	if (sort_opts_vals.kflag)
399		for (size_t i = 0; i < keys_num; i++) {
400			struct bwstring *key;
401			struct key_specs *kspecs;
402			struct sort_mods *sm;
403
404			kspecs = &(keys[i]);
405			key = cut_field(s, kspecs);
406
407			sm = &(kspecs->sm);
408			if (sm->dflag)
409				key = dictionary_order(key);
410			else if (sm->iflag)
411				key = ignore_nonprinting(key);
412			if (sm->fflag || sm->Mflag)
413				key = ignore_case(key);
414
415			set_key_on_keys_array(ka, key, i);
416		}
417	else {
418		struct bwstring *ret = NULL;
419		struct sort_mods *sm = default_sort_mods;
420
421		if (sm->bflag) {
422			if (ret == NULL)
423				ret = bwsdup(s);
424			ret = ignore_leading_blanks(ret);
425		}
426		if (sm->dflag) {
427			if (ret == NULL)
428				ret = bwsdup(s);
429			ret = dictionary_order(ret);
430		} else if (sm->iflag) {
431			if (ret == NULL)
432				ret = bwsdup(s);
433			ret = ignore_nonprinting(ret);
434		}
435		if (sm->fflag || sm->Mflag) {
436			if (ret == NULL)
437				ret = bwsdup(s);
438			ret = ignore_case(ret);
439		}
440		if (ret == NULL)
441			set_key_on_keys_array(ka, s, 0);
442		else
443			set_key_on_keys_array(ka, ret, 0);
444	}
445
446	return 0;
447}
448
449cmpcoll_t
450get_sort_func(struct sort_mods *sm)
451{
452
453	if (sm->nflag)
454		return (numcoll);
455	else if (sm->hflag)
456		return (hnumcoll);
457	else if (sm->gflag)
458		return (gnumcoll);
459	else if (sm->Mflag)
460		return (monthcoll);
461	else if (sm->Rflag)
462		return (randomcoll);
463	else if (sm->Vflag)
464		return (versioncoll);
465	else
466		return (wstrcoll);
467}
468
469/*
470 * Compares the given strings.  Returns a positive number if
471 * the first precedes the second, a negative number if the second is
472 * the preceding one, and zero if they are equal.  This function calls
473 * the underlying collate functions, which done the actual comparison.
474 */
475int
476key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
477{
478	struct sort_mods *sm;
479	int res = 0;
480
481	for (size_t i = 0; i < keys_num; ++i) {
482		sm = &(keys[i].sm);
483
484		if (sm->rflag)
485			res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
486		else
487			res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
488
489		if (res)
490			break;
491
492		/* offset applies to only the first key */
493		offset = 0;
494	}
495	return (res);
496}
497
498/*
499 * Compare two strings.
500 * Plain symbol-by-symbol comparison.
501 */
502int
503top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
504{
505
506	if (default_sort_mods->rflag) {
507		const struct bwstring *tmp;
508
509		tmp = s1;
510		s1 = s2;
511		s2 = tmp;
512	}
513
514	return (bwscoll(s1, s2, 0));
515}
516
517/*
518 * Compare a string and a sort list item, according to the sort specs.
519 */
520int
521str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
522{
523	struct keys_array *ka1;
524	int ret = 0;
525
526	ka1 = keys_array_alloc();
527
528	preproc(str1, ka1);
529
530	sort_list_item_make_key(*ss2);
531
532	if (debug_sort) {
533		bwsprintf(stdout, str1, "; s1=<", ">");
534		bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
535	}
536
537	ret = key_coll(ka1, &((*ss2)->ka), 0);
538
539	if (debug_sort)
540		printf("; cmp1=%d", ret);
541
542	clean_keys_array(str1, ka1);
543	sort_free(ka1);
544
545	if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
546		ret = top_level_str_coll(str1, ((*ss2)->str));
547		if (debug_sort)
548			printf("; cmp2=%d", ret);
549	}
550
551	if (debug_sort)
552		printf("\n");
553
554	return (ret);
555}
556
557/*
558 * Compare two sort list items, according to the sort specs.
559 */
560int
561list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
562    size_t offset)
563{
564	int ret;
565
566	ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
567
568	if (debug_sort) {
569		if (offset)
570			printf("; offset=%d", (int) offset);
571		bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
572		bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
573		printf("; cmp1=%d\n", ret);
574	}
575
576	if (ret)
577		return (ret);
578
579	if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
580		ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
581		if (debug_sort)
582			printf("; cmp2=%d\n", ret);
583	}
584
585	return (ret);
586}
587
588/*
589 * Compare two sort list items, according to the sort specs.
590 */
591int
592list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
593{
594
595	return (list_coll_offset(ss1, ss2, 0));
596}
597
598#define LSCDEF(N) 											\
599static int 												\
600list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)					\
601{													\
602													\
603	return (list_coll_offset(ss1, ss2, N));								\
604}
605
606LSCDEF(1)
607LSCDEF(2)
608LSCDEF(3)
609LSCDEF(4)
610LSCDEF(5)
611LSCDEF(6)
612LSCDEF(7)
613LSCDEF(8)
614LSCDEF(9)
615LSCDEF(10)
616LSCDEF(11)
617LSCDEF(12)
618LSCDEF(13)
619LSCDEF(14)
620LSCDEF(15)
621LSCDEF(16)
622LSCDEF(17)
623LSCDEF(18)
624LSCDEF(19)
625LSCDEF(20)
626
627listcoll_t
628get_list_call_func(size_t offset)
629{
630	static const listcoll_t lsarray[] = { list_coll, list_coll_1,
631	    list_coll_2, list_coll_3, list_coll_4, list_coll_5,
632	    list_coll_6, list_coll_7, list_coll_8, list_coll_9,
633	    list_coll_10, list_coll_11, list_coll_12, list_coll_13,
634	    list_coll_14, list_coll_15, list_coll_16, list_coll_17,
635	    list_coll_18, list_coll_19, list_coll_20 };
636
637	if (offset <= 20)
638		return (lsarray[offset]);
639
640	return (list_coll);
641}
642
643/*
644 * Compare two sort list items, only by their original string.
645 */
646int
647list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
648{
649
650	return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
651}
652
653/*
654 * Maximum size of a number in the string (before or after decimal point)
655 */
656#define MAX_NUM_SIZE (128)
657
658/*
659 * Set suffix value
660 */
661static void setsuffix(wchar_t c, unsigned char *si)
662{
663	switch (c){
664	case L'k':
665	case L'K':
666		*si = 1;
667		break;
668	case L'M':
669		*si = 2;
670		break;
671	case L'G':
672		*si = 3;
673		break;
674	case L'T':
675		*si = 4;
676		break;
677	case L'P':
678		*si = 5;
679		break;
680	case L'E':
681		*si = 6;
682		break;
683	case L'Z':
684		*si = 7;
685		break;
686	case L'Y':
687		*si = 8;
688		break;
689	default:
690		*si = 0;
691	};
692}
693
694/*
695 * Read string s and parse the string into a fixed-decimal-point number.
696 * sign equals -1 if the number is negative (explicit plus is not allowed,
697 * according to GNU sort's "info sort".
698 * The number part before decimal point is in the smain, after the decimal
699 * point is in sfrac, tail is the pointer to the remainder of the string.
700 */
701static int
702read_number(struct bwstring *s0, int *sign, wchar_t *smain, int *main_len, wchar_t *sfrac, int *frac_len, unsigned char *si)
703{
704	bwstring_iterator s;
705
706	s = bws_begin(s0);
707
708	/* always end the fraction with zero, even if we have no fraction */
709	sfrac[0] = 0;
710
711	while (iswblank(bws_get_iter_value(s)))
712		s = bws_iterator_inc(s, 1);
713
714	if (bws_get_iter_value(s) == symbol_negative_sign) {
715		*sign = -1;
716		s = bws_iterator_inc(s, 1);
717	}
718
719	// This is '0', not '\0', do not change this
720	while (iswdigit(bws_get_iter_value(s)) &&
721	    (bws_get_iter_value(s) == L'0'))
722		s = bws_iterator_inc(s, 1);
723
724	while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
725		if (iswdigit(bws_get_iter_value(s))) {
726			smain[*main_len] = bws_get_iter_value(s);
727			s = bws_iterator_inc(s, 1);
728			*main_len += 1;
729		} else if (symbol_thousands_sep &&
730		    (bws_get_iter_value(s) == symbol_thousands_sep))
731			s = bws_iterator_inc(s, 1);
732		else
733			break;
734	}
735
736	smain[*main_len] = 0;
737
738	if (bws_get_iter_value(s) == symbol_decimal_point) {
739		s = bws_iterator_inc(s, 1);
740		while (iswdigit(bws_get_iter_value(s)) &&
741		    *frac_len < MAX_NUM_SIZE) {
742			sfrac[*frac_len] = bws_get_iter_value(s);
743			s = bws_iterator_inc(s, 1);
744			*frac_len += 1;
745		}
746		sfrac[*frac_len] = 0;
747
748		while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
749			--(*frac_len);
750			sfrac[*frac_len] = L'\0';
751		}
752	}
753
754	setsuffix(bws_get_iter_value(s),si);
755
756	if ((*main_len + *frac_len) == 0)
757		*sign = 0;
758
759	return (0);
760}
761
762/*
763 * Implements string sort.
764 */
765static int
766wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
767{
768
769	if (debug_sort) {
770		if (offset)
771			printf("; offset=%d\n", (int) offset);
772		bwsprintf(stdout, kv1->k, "; k1=<", ">");
773		printf("(%zu)", BWSLEN(kv1->k));
774		bwsprintf(stdout, kv2->k, ", k2=<", ">");
775		printf("(%zu)", BWSLEN(kv2->k));
776	}
777
778	return (bwscoll(kv1->k, kv2->k, offset));
779}
780
781/*
782 * Compare two suffixes
783 */
784static inline int
785cmpsuffix(unsigned char si1, unsigned char si2)
786{
787
788	return ((char)si1 - (char)si2);
789}
790
791/*
792 * Implements numeric sort for -n and -h.
793 */
794static int
795numcoll_impl(struct key_value *kv1, struct key_value *kv2, size_t offset, bool use_suffix)
796{
797	struct bwstring *s1, *s2;
798	wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
799	wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
800	int cmp_res, frac1, frac2, main1, main2, sign1, sign2;
801	unsigned char SI1, SI2;
802	bool e1, e2, key1_read, key2_read;
803
804	s1 = kv1->k;
805	s2 = kv2->k;
806	sign1 = sign2 = 0;
807	main1 = main2 = 0;
808	frac1 = frac2 = 0;
809
810	cmp_res = 0;
811	key1_read = key2_read = false;
812
813	UNUSED_ARG(offset);
814
815	if (debug_sort) {
816		bwsprintf(stdout, s1, "; k1=<", ">");
817		bwsprintf(stdout, s2, ", k2=<", ">");
818	}
819
820	if (s1 == s2)
821		return (0);
822
823	if (kv1->hint->status == HS_UNINITIALIZED) {
824		/* read the number from the string */
825		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
826		key1_read = true;
827		kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
828		if(main1 < 1 && frac1 < 1)
829			kv1->hint->v.nh.empty=true;
830		kv1->hint->v.nh.si = SI1;
831		kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
832		    HS_INITIALIZED : HS_ERROR;
833		kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
834	}
835
836	if (kv2->hint->status == HS_UNINITIALIZED) {
837		/* read the number from the string */
838		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
839		key2_read = true;
840		kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
841		if(main2 < 1 && frac2 < 1)
842			kv2->hint->v.nh.empty=true;
843		kv2->hint->v.nh.si = SI2;
844		kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
845		    HS_INITIALIZED : HS_ERROR;
846		kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
847	}
848
849	if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
850	    HS_INITIALIZED) {
851		unsigned long long n1, n2;
852		bool neg1, neg2;
853
854		e1 = kv1->hint->v.nh.empty;
855		e2 = kv2->hint->v.nh.empty;
856
857		if (e1 && e2)
858			return (0);
859
860		neg1 = kv1->hint->v.nh.neg;
861		neg2 = kv2->hint->v.nh.neg;
862
863		if (neg1 && !neg2)
864			return (-1);
865		if (neg2 && !neg1)
866			return (+1);
867
868		if (e1)
869			return (neg2 ? +1 : -1);
870		else if (e2)
871			return (neg1 ? -1 : +1);
872
873
874		if (use_suffix) {
875			cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
876			if (cmp_res)
877				return (neg1 ? -cmp_res : cmp_res);
878		}
879
880		n1 = kv1->hint->v.nh.n1;
881		n2 = kv2->hint->v.nh.n1;
882		if (n1 < n2)
883			return (neg1 ? +1 : -1);
884		else if (n1 > n2)
885			return (neg1 ? -1 : +1);
886	}
887
888	/* read the numbers from the strings */
889	if (!key1_read)
890		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
891	if (!key2_read)
892		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
893
894	e1 = ((main1 + frac1) == 0);
895	e2 = ((main2 + frac2) == 0);
896
897	if (e1 && e2)
898		return (0);
899
900	/* we know the result if the signs are different */
901	if (sign1 < 0 && sign2 >= 0)
902		return (-1);
903	if (sign1 >= 0 && sign2 < 0)
904		return (+1);
905
906	if (e1)
907		return ((sign2 < 0) ? +1 : -1);
908	else if (e2)
909		return ((sign1 < 0) ? -1 : +1);
910
911	if (use_suffix) {
912		cmp_res = cmpsuffix(SI1, SI2);
913		if (cmp_res)
914			return ((sign1 < 0) ? -cmp_res : cmp_res);
915	}
916
917	/* if both numbers are empty assume that the strings are equal */
918	if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
919		return (0);
920
921	/*
922	 * if the main part is of different size, we know the result
923	 * (because the leading zeros are removed)
924	 */
925	if (main1 < main2)
926		cmp_res = -1;
927	else if (main1 > main2)
928		cmp_res = +1;
929	/* if the sizes are equal then simple non-collate string compare gives the correct result */
930	else
931		cmp_res = wcscmp(smain1, smain2);
932
933	/* check fraction */
934	if (!cmp_res)
935		cmp_res = wcscmp(sfrac1, sfrac2);
936
937	if (!cmp_res)
938		return (0);
939
940	/* reverse result if the signs are negative */
941	if (sign1 < 0 && sign2 < 0)
942		cmp_res = -cmp_res;
943
944	return (cmp_res);
945}
946
947/*
948 * Implements numeric sort (-n).
949 */
950static int
951numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
952{
953
954	return (numcoll_impl(kv1, kv2, offset, false));
955}
956
957/*
958 * Implements 'human' numeric sort (-h).
959 */
960static int
961hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
962{
963
964	return (numcoll_impl(kv1, kv2, offset, true));
965}
966
967/*
968 * Implements random sort (-R).
969 */
970static int
971randomcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
972{
973	struct bwstring *s1, *s2;
974	MD5_CTX ctx1, ctx2;
975	char *b1, *b2;
976
977	UNUSED_ARG(offset);
978
979	s1 = kv1->k;
980	s2 = kv2->k;
981
982	if (debug_sort) {
983		bwsprintf(stdout, s1, "; k1=<", ">");
984		bwsprintf(stdout, s2, ", k2=<", ">");
985	}
986
987	if (s1 == s2)
988		return (0);
989
990	memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX));
991	memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX));
992
993	MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
994	MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
995	b1 = MD5End(&ctx1, NULL);
996	b2 = MD5End(&ctx2, NULL);
997	if (b1 == NULL) {
998		if (b2 == NULL)
999			return (0);
1000		else {
1001			sort_free(b2);
1002			return (-1);
1003		}
1004	} else if (b2 == NULL) {
1005		sort_free(b1);
1006		return (+1);
1007	} else {
1008		int cmp_res;
1009
1010		cmp_res = strcmp(b1,b2);
1011		sort_free(b1);
1012		sort_free(b2);
1013
1014		if (!cmp_res)
1015			cmp_res = bwscoll(s1, s2, 0);
1016
1017		return (cmp_res);
1018	}
1019}
1020
1021/*
1022 * Implements version sort (-V).
1023 */
1024static int
1025versioncoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
1026{
1027	struct bwstring *s1, *s2;
1028
1029	UNUSED_ARG(offset);
1030
1031	s1 = kv1->k;
1032	s2 = kv2->k;
1033
1034	if (debug_sort) {
1035		bwsprintf(stdout, s1, "; k1=<", ">");
1036		bwsprintf(stdout, s2, ", k2=<", ">");
1037	}
1038
1039	if (s1 == s2)
1040		return (0);
1041
1042	return (vcmp(s1, s2));
1043}
1044
1045/*
1046 * Check for minus infinity
1047 */
1048static inline bool
1049huge_minus(double d, int err1)
1050{
1051
1052	if (err1 == ERANGE)
1053		if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1054			return (+1);
1055
1056	return (0);
1057}
1058
1059/*
1060 * Check for plus infinity
1061 */
1062static inline bool
1063huge_plus(double d, int err1)
1064{
1065
1066	if (err1 == ERANGE)
1067		if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1068			return (+1);
1069
1070	return (0);
1071}
1072
1073/*
1074 * Check whether a function is a NAN
1075 */
1076static bool
1077is_nan(double d)
1078{
1079
1080	return ((d == NAN) || (isnan(d)));
1081}
1082
1083/*
1084 * Compare two NANs
1085 */
1086static int
1087cmp_nans(double d1, double d2)
1088{
1089
1090	if (d1 < d2)
1091		return (-1);
1092	if (d2 > d2)
1093		return (+1);
1094	return (0);
1095}
1096
1097/*
1098 * Implements general numeric sort (-g).
1099 */
1100static int
1101gnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
1102{
1103	double d1, d2;
1104	int err1, err2;
1105	bool empty1, empty2, key1_read, key2_read;
1106
1107	d1 = d2 = 0;
1108	err1 = err2 = 0;
1109	key1_read = key2_read = false;
1110
1111	UNUSED_ARG(offset);
1112
1113	if (debug_sort) {
1114		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1115		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1116	}
1117
1118	if (kv1->hint->status == HS_UNINITIALIZED) {
1119		errno = 0;
1120		d1 = bwstod(kv1->k, &empty1);
1121		err1 = errno;
1122
1123		if (empty1)
1124			kv1->hint->v.gh.notnum = true;
1125		else if (err1 == 0) {
1126			kv1->hint->v.gh.d = d1;
1127			kv1->hint->v.gh.nan = is_nan(d1);
1128			kv1->hint->status = HS_INITIALIZED;
1129		} else
1130			kv1->hint->status = HS_ERROR;
1131
1132		key1_read = true;
1133	}
1134
1135	if (kv2->hint->status == HS_UNINITIALIZED) {
1136		errno = 0;
1137		d2 = bwstod(kv2->k, &empty2);
1138		err2 = errno;
1139
1140		if (empty2)
1141			kv2->hint->v.gh.notnum = true;
1142		else if (err2 == 0) {
1143			kv2->hint->v.gh.d = d2;
1144			kv2->hint->v.gh.nan = is_nan(d2);
1145			kv2->hint->status = HS_INITIALIZED;
1146		} else
1147			kv2->hint->status = HS_ERROR;
1148
1149		key2_read = true;
1150	}
1151
1152	if (kv1->hint->status == HS_INITIALIZED &&
1153	    kv2->hint->status == HS_INITIALIZED) {
1154		if (kv1->hint->v.gh.notnum)
1155			return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1156		else if (kv2->hint->v.gh.notnum)
1157			return (+1);
1158
1159		if (kv1->hint->v.gh.nan)
1160			return ((kv2->hint->v.gh.nan) ?
1161			    cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1162			    -1);
1163		else if (kv2->hint->v.gh.nan)
1164			return (+1);
1165
1166		d1 = kv1->hint->v.gh.d;
1167		d2 = kv2->hint->v.gh.d;
1168
1169		if (d1 < d2)
1170			return (-1);
1171		else if (d1 > d2)
1172			return (+1);
1173		else
1174			return (0);
1175	}
1176
1177	if (!key1_read) {
1178		errno = 0;
1179		d1 = bwstod(kv1->k, &empty1);
1180		err1 = errno;
1181	}
1182
1183	if (!key2_read) {
1184		errno = 0;
1185		d2 = bwstod(kv2->k, &empty2);
1186		err2 = errno;
1187	}
1188
1189	/* Non-value case: */
1190	if (empty1)
1191		return (empty2 ? 0 : -1);
1192	else if (empty2)
1193		return (+1);
1194
1195	/* NAN case */
1196	if (is_nan(d1))
1197		return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1198	else if (is_nan(d2))
1199		return (+1);
1200
1201	/* Infinities */
1202	if (err1 == ERANGE || err2 == ERANGE) {
1203		/* Minus infinity case */
1204		if (huge_minus(d1, err1)) {
1205			if (huge_minus(d2, err2)) {
1206				if (d1 < d2)
1207					return (-1);
1208				if (d1 > d2)
1209					return (+1);
1210				return (0);
1211			} else
1212				return (-1);
1213
1214		} else if (huge_minus(d2, err2)) {
1215			if (huge_minus(d1, err1)) {
1216				if (d1 < d2)
1217					return (-1);
1218				if (d1 > d2)
1219					return (+1);
1220				return (0);
1221			} else
1222				return (+1);
1223		}
1224
1225		/* Plus infinity case */
1226		if (huge_plus(d1, err1)) {
1227			if (huge_plus(d2, err2)) {
1228				if (d1 < d2)
1229					return (-1);
1230				if (d1 > d2)
1231					return (+1);
1232				return (0);
1233			} else
1234				return (+1);
1235		} else if (huge_plus(d2, err2)) {
1236			if (huge_plus(d1, err1)) {
1237				if (d1 < d2)
1238					return (-1);
1239				if (d1 > d2)
1240					return (+1);
1241				return (0);
1242			} else
1243				return (-1);
1244		}
1245	}
1246
1247	if (d1 < d2)
1248		return (-1);
1249	if (d1 > d2)
1250		return (+1);
1251
1252	return (0);
1253}
1254
1255/*
1256 * Implements month sort (-M).
1257 */
1258static int
1259monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
1260{
1261	int val1, val2;
1262	bool key1_read, key2_read;
1263
1264	val1 = val2 = 0;
1265	key1_read = key2_read = false;
1266
1267	UNUSED_ARG(offset);
1268
1269	if (debug_sort) {
1270		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1271		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1272	}
1273
1274	if (kv1->hint->status == HS_UNINITIALIZED) {
1275		kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1276		key1_read = true;
1277		kv1->hint->status = HS_INITIALIZED;
1278	}
1279
1280	if (kv2->hint->status == HS_UNINITIALIZED) {
1281		kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1282		key2_read = true;
1283		kv2->hint->status = HS_INITIALIZED;
1284	}
1285
1286	if (kv1->hint->status == HS_INITIALIZED) {
1287		val1 = kv1->hint->v.Mh.m;
1288		key1_read = true;
1289	}
1290
1291	if (kv2->hint->status == HS_INITIALIZED) {
1292		val2 = kv2->hint->v.Mh.m;
1293		key2_read = true;
1294	}
1295
1296	if (!key1_read)
1297		val1 = bws_month_score(kv1->k);
1298	if (!key2_read)
1299		val2 = bws_month_score(kv2->k);
1300
1301	if (val1 == val2) {
1302		return (0);
1303	}
1304	if (val1 < val2)
1305		return (-1);
1306	return (+1);
1307}
1308