1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/types.h>
31
32#include <errno.h>
33#include <err.h>
34#include <langinfo.h>
35#include <limits.h>
36#include <math.h>
37#include <md5.h>
38#include <stdlib.h>
39#include <string.h>
40#include <wchar.h>
41#include <wctype.h>
42
43#include "coll.h"
44#include "vsort.h"
45
46struct key_specs *keys;
47size_t keys_num = 0;
48
49wint_t symbol_decimal_point = L'.';
50/* there is no default thousands separator in collate rules: */
51wint_t symbol_thousands_sep = 0;
52wint_t symbol_negative_sign = L'-';
53wint_t symbol_positive_sign = L'+';
54
55static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
56static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
57static int monthcoll(struct key_value*, struct key_value *, size_t offset);
58static int numcoll(struct key_value*, struct key_value *, size_t offset);
59static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
60static int randomcoll(struct key_value*, struct key_value *, size_t offset);
61static int versioncoll(struct key_value*, struct key_value *, size_t offset);
62
63/*
64 * Allocate keys array
65 */
66struct keys_array *
67keys_array_alloc(void)
68{
69	struct keys_array *ka;
70	size_t sz;
71
72	sz = keys_array_size();
73	ka = sort_calloc(1, sz);
74
75	return (ka);
76}
77
78/*
79 * Calculate whether we need key hint space
80 */
81static size_t
82key_hint_size(void)
83{
84
85	return (need_hint ? sizeof(struct key_hint) : 0);
86}
87
88/*
89 * Calculate keys array size
90 */
91size_t
92keys_array_size(void)
93{
94
95	return (keys_num * (sizeof(struct key_value) + key_hint_size()));
96}
97
98/*
99 * Clean data of keys array
100 */
101void
102clean_keys_array(const struct bwstring *s, struct keys_array *ka)
103{
104
105	if (ka) {
106		for (size_t i = 0; i < keys_num; ++i) {
107			const struct key_value *kv;
108
109			kv = get_key_from_keys_array(ka, i);
110			if (kv->k && kv->k != s)
111				bwsfree(kv->k);
112		}
113		memset(ka, 0, keys_array_size());
114	}
115}
116
117/*
118 * Get pointer to a key value in the keys set
119 */
120struct key_value *
121get_key_from_keys_array(struct keys_array *ka, size_t ind)
122{
123
124	return ((struct key_value *)((caddr_t)ka->key +
125	    ind * (sizeof(struct key_value) + key_hint_size())));
126}
127
128/*
129 * Set value of a key in the keys set
130 */
131void
132set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
133{
134
135	if (ka && keys_num > ind) {
136		struct key_value *kv;
137
138		kv = get_key_from_keys_array(ka, ind);
139
140		if (kv->k && kv->k != s)
141			bwsfree(kv->k);
142		kv->k = s;
143	}
144}
145
146/*
147 * Initialize a sort list item
148 */
149struct sort_list_item *
150sort_list_item_alloc(void)
151{
152	struct sort_list_item *si;
153	size_t sz;
154
155	sz = sizeof(struct sort_list_item) + keys_array_size();
156	si = sort_calloc(1, sz);
157
158	return (si);
159}
160
161size_t
162sort_list_item_size(struct sort_list_item *si)
163{
164	size_t ret = 0;
165
166	if (si) {
167		ret = sizeof(struct sort_list_item) + keys_array_size();
168		if (si->str)
169			ret += bws_memsize(si->str);
170		for (size_t i = 0; i < keys_num; ++i) {
171			const struct key_value *kv;
172
173			kv = get_key_from_keys_array(&si->ka, i);
174
175			if (kv->k != si->str)
176				ret += bws_memsize(kv->k);
177		}
178	}
179	return (ret);
180}
181
182/*
183 * Calculate key for a sort list item
184 */
185static void
186sort_list_item_make_key(struct sort_list_item *si)
187{
188
189	preproc(si->str, &(si->ka));
190}
191
192/*
193 * Set value of a sort list item.
194 * Return combined string and keys memory size.
195 */
196void
197sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
198{
199
200	if (si) {
201		clean_keys_array(si->str, &(si->ka));
202		if (si->str) {
203			if (si->str == str) {
204				/* we are trying to reset the same string */
205				return;
206			} else {
207				bwsfree(si->str);
208				si->str = NULL;
209			}
210		}
211		si->str = str;
212		sort_list_item_make_key(si);
213	}
214}
215
216/*
217 * De-allocate a sort list item object memory
218 */
219void
220sort_list_item_clean(struct sort_list_item *si)
221{
222
223	if (si) {
224		clean_keys_array(si->str, &(si->ka));
225		if (si->str) {
226			bwsfree(si->str);
227			si->str = NULL;
228		}
229	}
230}
231
232/*
233 * Skip columns according to specs
234 */
235static size_t
236skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
237    bool skip_blanks, bool *empty_key)
238{
239	if (cols < 1)
240		return (BWSLEN(s) + 1);
241
242	if (skip_blanks)
243		while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
244			++start;
245
246	while (start < BWSLEN(s) && cols > 1) {
247		--cols;
248		++start;
249	}
250
251	if (start >= BWSLEN(s))
252		*empty_key = true;
253
254	return (start);
255}
256
257/*
258 * Skip fields according to specs
259 */
260static size_t
261skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
262{
263
264	if (fields < 2) {
265		if (BWSLEN(s) == 0)
266			*empty_field = true;
267		return (0);
268	} else if (!(sort_opts_vals.tflag)) {
269		size_t cpos = 0;
270		bool pb = true;
271
272		while (cpos < BWSLEN(s)) {
273			bool isblank;
274
275			isblank = iswblank(BWS_GET(s, cpos));
276
277			if (isblank && !pb) {
278				--fields;
279				if (fields <= 1)
280					return (cpos);
281			}
282			pb = isblank;
283			++cpos;
284		}
285		if (fields > 1)
286			*empty_field = true;
287		return (cpos);
288	} else {
289		size_t cpos = 0;
290
291		while (cpos < BWSLEN(s)) {
292			if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
293				--fields;
294				if (fields <= 1)
295					return (cpos + 1);
296			}
297			++cpos;
298		}
299		if (fields > 1)
300			*empty_field = true;
301		return (cpos);
302	}
303}
304
305/*
306 * Find fields start
307 */
308static void
309find_field_start(const struct bwstring *s, struct key_specs *ks,
310    size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
311{
312
313	*field_start = skip_fields_to_start(s, ks->f1, empty_field);
314	if (!*empty_field)
315		*key_start = skip_cols_to_start(s, ks->c1, *field_start,
316		    ks->pos1b, empty_key);
317	else
318		*empty_key = true;
319}
320
321/*
322 * Find end key position
323 */
324static size_t
325find_field_end(const struct bwstring *s, struct key_specs *ks)
326{
327	size_t f2, next_field_start, pos_end;
328	bool empty_field, empty_key;
329
330	empty_field = false;
331	empty_key = false;
332	f2 = ks->f2;
333
334	if (f2 == 0)
335		return (BWSLEN(s) + 1);
336	else {
337		if (ks->c2 == 0) {
338			next_field_start = skip_fields_to_start(s, f2 + 1,
339			    &empty_field);
340			if ((next_field_start > 0) && sort_opts_vals.tflag &&
341			    ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
342			    next_field_start - 1)))
343				--next_field_start;
344		} else
345			next_field_start = skip_fields_to_start(s, f2,
346			    &empty_field);
347	}
348
349	if (empty_field || (next_field_start >= BWSLEN(s)))
350		return (BWSLEN(s) + 1);
351
352	if (ks->c2) {
353		pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
354		    ks->pos2b, &empty_key);
355		if (pos_end < BWSLEN(s))
356			++pos_end;
357	} else
358		pos_end = next_field_start;
359
360	return (pos_end);
361}
362
363/*
364 * Cut a field according to the key specs
365 */
366static struct bwstring *
367cut_field(const struct bwstring *s, struct key_specs *ks)
368{
369	struct bwstring *ret = NULL;
370
371	if (s && ks) {
372		size_t field_start, key_end, key_start, sz;
373		bool empty_field, empty_key;
374
375		field_start = 0;
376		key_start = 0;
377		empty_field = false;
378		empty_key = false;
379
380		find_field_start(s, ks, &field_start, &key_start,
381		    &empty_field, &empty_key);
382
383		if (empty_key)
384			sz = 0;
385		else {
386			key_end = find_field_end(s, ks);
387			sz = (key_end < key_start) ? 0 : (key_end - key_start);
388		}
389
390		ret = bwsalloc(sz);
391		if (sz)
392			bwsnocpy(ret, s, key_start, sz);
393	} else
394		ret = bwsalloc(0);
395
396	return (ret);
397}
398
399/*
400 * Preprocesses a line applying the necessary transformations
401 * specified by command line options and returns the preprocessed
402 * string, which can be used to compare.
403 */
404int
405preproc(struct bwstring *s, struct keys_array *ka)
406{
407
408	if (sort_opts_vals.kflag)
409		for (size_t i = 0; i < keys_num; i++) {
410			struct bwstring *key;
411			struct key_specs *kspecs;
412			struct sort_mods *sm;
413
414			kspecs = &(keys[i]);
415			key = cut_field(s, kspecs);
416
417			sm = &(kspecs->sm);
418			if (sm->dflag)
419				key = dictionary_order(key);
420			else if (sm->iflag)
421				key = ignore_nonprinting(key);
422			if (sm->fflag || sm->Mflag)
423				key = ignore_case(key);
424
425			set_key_on_keys_array(ka, key, i);
426		}
427	else {
428		struct bwstring *ret = NULL;
429		struct sort_mods *sm = default_sort_mods;
430
431		if (sm->bflag) {
432			if (ret == NULL)
433				ret = bwsdup(s);
434			ret = ignore_leading_blanks(ret);
435		}
436		if (sm->dflag) {
437			if (ret == NULL)
438				ret = bwsdup(s);
439			ret = dictionary_order(ret);
440		} else if (sm->iflag) {
441			if (ret == NULL)
442				ret = bwsdup(s);
443			ret = ignore_nonprinting(ret);
444		}
445		if (sm->fflag || sm->Mflag) {
446			if (ret == NULL)
447				ret = bwsdup(s);
448			ret = ignore_case(ret);
449		}
450		if (ret == NULL)
451			set_key_on_keys_array(ka, s, 0);
452		else
453			set_key_on_keys_array(ka, ret, 0);
454	}
455
456	return 0;
457}
458
459cmpcoll_t
460get_sort_func(struct sort_mods *sm)
461{
462
463	if (sm->nflag)
464		return (numcoll);
465	else if (sm->hflag)
466		return (hnumcoll);
467	else if (sm->gflag)
468		return (gnumcoll);
469	else if (sm->Mflag)
470		return (monthcoll);
471	else if (sm->Rflag)
472		return (randomcoll);
473	else if (sm->Vflag)
474		return (versioncoll);
475	else
476		return (wstrcoll);
477}
478
479/*
480 * Compares the given strings.  Returns a positive number if
481 * the first precedes the second, a negative number if the second is
482 * the preceding one, and zero if they are equal.  This function calls
483 * the underlying collate functions, which done the actual comparison.
484 */
485int
486key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
487{
488	struct key_value *kv1, *kv2;
489	struct sort_mods *sm;
490	int res = 0;
491
492	for (size_t i = 0; i < keys_num; ++i) {
493		kv1 = get_key_from_keys_array(ps1, i);
494		kv2 = get_key_from_keys_array(ps2, i);
495		sm = &(keys[i].sm);
496
497		if (sm->rflag)
498			res = sm->func(kv2, kv1, offset);
499		else
500			res = sm->func(kv1, kv2, offset);
501
502		if (res)
503			break;
504
505		/* offset applies to only the first key */
506		offset = 0;
507	}
508	return (res);
509}
510
511/*
512 * Compare two strings.
513 * Plain symbol-by-symbol comparison.
514 */
515int
516top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
517{
518
519	if (default_sort_mods->rflag) {
520		const struct bwstring *tmp;
521
522		tmp = s1;
523		s1 = s2;
524		s2 = tmp;
525	}
526
527	return (bwscoll(s1, s2, 0));
528}
529
530/*
531 * Compare a string and a sort list item, according to the sort specs.
532 */
533int
534str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
535{
536	struct keys_array *ka1;
537	int ret = 0;
538
539	ka1 = keys_array_alloc();
540
541	preproc(str1, ka1);
542
543	sort_list_item_make_key(*ss2);
544
545	if (debug_sort) {
546		bwsprintf(stdout, str1, "; s1=<", ">");
547		bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
548	}
549
550	ret = key_coll(ka1, &((*ss2)->ka), 0);
551
552	if (debug_sort)
553		printf("; cmp1=%d", ret);
554
555	clean_keys_array(str1, ka1);
556	sort_free(ka1);
557
558	if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
559		ret = top_level_str_coll(str1, ((*ss2)->str));
560		if (debug_sort)
561			printf("; cmp2=%d", ret);
562	}
563
564	if (debug_sort)
565		printf("\n");
566
567	return (ret);
568}
569
570/*
571 * Compare two sort list items, according to the sort specs.
572 */
573int
574list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
575    size_t offset)
576{
577	int ret;
578
579	ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
580
581	if (debug_sort) {
582		if (offset)
583			printf("; offset=%d", (int) offset);
584		bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
585		bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
586		printf("; cmp1=%d\n", ret);
587	}
588
589	if (ret)
590		return (ret);
591
592	if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
593		ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
594		if (debug_sort)
595			printf("; cmp2=%d\n", ret);
596	}
597
598	return (ret);
599}
600
601/*
602 * Compare two sort list items, according to the sort specs.
603 */
604int
605list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
606{
607
608	return (list_coll_offset(ss1, ss2, 0));
609}
610
611#define	LSCDEF(N)							\
612static int 								\
613list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)	\
614{									\
615									\
616	return (list_coll_offset(ss1, ss2, N));				\
617}
618
619LSCDEF(1)
620LSCDEF(2)
621LSCDEF(3)
622LSCDEF(4)
623LSCDEF(5)
624LSCDEF(6)
625LSCDEF(7)
626LSCDEF(8)
627LSCDEF(9)
628LSCDEF(10)
629LSCDEF(11)
630LSCDEF(12)
631LSCDEF(13)
632LSCDEF(14)
633LSCDEF(15)
634LSCDEF(16)
635LSCDEF(17)
636LSCDEF(18)
637LSCDEF(19)
638LSCDEF(20)
639
640listcoll_t
641get_list_call_func(size_t offset)
642{
643	static const listcoll_t lsarray[] = { list_coll, list_coll_1,
644	    list_coll_2, list_coll_3, list_coll_4, list_coll_5,
645	    list_coll_6, list_coll_7, list_coll_8, list_coll_9,
646	    list_coll_10, list_coll_11, list_coll_12, list_coll_13,
647	    list_coll_14, list_coll_15, list_coll_16, list_coll_17,
648	    list_coll_18, list_coll_19, list_coll_20 };
649
650	if (offset <= 20)
651		return (lsarray[offset]);
652
653	return (list_coll);
654}
655
656/*
657 * Compare two sort list items, only by their original string.
658 */
659int
660list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
661{
662
663	return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
664}
665
666/*
667 * Maximum size of a number in the string (before or after decimal point)
668 */
669#define MAX_NUM_SIZE (128)
670
671/*
672 * Set suffix value
673 */
674static void setsuffix(wchar_t c, unsigned char *si)
675{
676	switch (c){
677	case L'k':
678	case L'K':
679		*si = 1;
680		break;
681	case L'M':
682		*si = 2;
683		break;
684	case L'G':
685		*si = 3;
686		break;
687	case L'T':
688		*si = 4;
689		break;
690	case L'P':
691		*si = 5;
692		break;
693	case L'E':
694		*si = 6;
695		break;
696	case L'Z':
697		*si = 7;
698		break;
699	case L'Y':
700		*si = 8;
701		break;
702	default:
703		*si = 0;
704	}
705}
706
707/*
708 * Read string s and parse the string into a fixed-decimal-point number.
709 * sign equals -1 if the number is negative (explicit plus is not allowed,
710 * according to GNU sort's "info sort".
711 * The number part before decimal point is in the smain, after the decimal
712 * point is in sfrac, tail is the pointer to the remainder of the string.
713 */
714static int
715read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
716{
717	bwstring_iterator s;
718
719	s = bws_begin(s0);
720
721	/* always end the fraction with zero, even if we have no fraction */
722	sfrac[0] = 0;
723
724	while (iswblank(bws_get_iter_value(s)))
725		s = bws_iterator_inc(s, 1);
726
727	if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
728		*sign = -1;
729		s = bws_iterator_inc(s, 1);
730	}
731
732	// This is '0', not '\0', do not change this
733	while (iswdigit(bws_get_iter_value(s)) &&
734	    (bws_get_iter_value(s) == L'0'))
735		s = bws_iterator_inc(s, 1);
736
737	while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
738		if (iswdigit(bws_get_iter_value(s))) {
739			smain[*main_len] = bws_get_iter_value(s);
740			s = bws_iterator_inc(s, 1);
741			*main_len += 1;
742		} else if (symbol_thousands_sep &&
743		    (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
744			s = bws_iterator_inc(s, 1);
745		else
746			break;
747	}
748
749	smain[*main_len] = 0;
750
751	if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
752		s = bws_iterator_inc(s, 1);
753		while (iswdigit(bws_get_iter_value(s)) &&
754		    *frac_len < MAX_NUM_SIZE) {
755			sfrac[*frac_len] = bws_get_iter_value(s);
756			s = bws_iterator_inc(s, 1);
757			*frac_len += 1;
758		}
759		sfrac[*frac_len] = 0;
760
761		while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
762			--(*frac_len);
763			sfrac[*frac_len] = L'\0';
764		}
765	}
766
767	setsuffix(bws_get_iter_value(s),si);
768
769	if ((*main_len + *frac_len) == 0)
770		*sign = 0;
771
772	return (0);
773}
774
775/*
776 * Implements string sort.
777 */
778static int
779wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
780{
781
782	if (debug_sort) {
783		if (offset)
784			printf("; offset=%d\n", (int) offset);
785		bwsprintf(stdout, kv1->k, "; k1=<", ">");
786		printf("(%zu)", BWSLEN(kv1->k));
787		bwsprintf(stdout, kv2->k, ", k2=<", ">");
788		printf("(%zu)", BWSLEN(kv2->k));
789	}
790
791	return (bwscoll(kv1->k, kv2->k, offset));
792}
793
794/*
795 * Compare two suffixes
796 */
797static inline int
798cmpsuffix(unsigned char si1, unsigned char si2)
799{
800
801	return ((char)si1 - (char)si2);
802}
803
804/*
805 * Implements numeric sort for -n and -h.
806 */
807static int
808numcoll_impl(struct key_value *kv1, struct key_value *kv2,
809    size_t offset __unused, bool use_suffix)
810{
811	struct bwstring *s1, *s2;
812	wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
813	wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
814	int cmp_res, sign1, sign2;
815	size_t frac1, frac2, main1, main2;
816	unsigned char SI1, SI2;
817	bool e1, e2, key1_read, key2_read;
818
819	s1 = kv1->k;
820	s2 = kv2->k;
821	sign1 = sign2 = 0;
822	main1 = main2 = 0;
823	frac1 = frac2 = 0;
824
825	key1_read = key2_read = false;
826
827	if (debug_sort) {
828		bwsprintf(stdout, s1, "; k1=<", ">");
829		bwsprintf(stdout, s2, ", k2=<", ">");
830	}
831
832	if (s1 == s2)
833		return (0);
834
835	if (kv1->hint->status == HS_UNINITIALIZED) {
836		/* read the number from the string */
837		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
838		key1_read = true;
839		kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
840		if(main1 < 1 && frac1 < 1)
841			kv1->hint->v.nh.empty=true;
842		kv1->hint->v.nh.si = SI1;
843		kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
844		    HS_INITIALIZED : HS_ERROR;
845		kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
846	}
847
848	if (kv2->hint->status == HS_UNINITIALIZED) {
849		/* read the number from the string */
850		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
851		key2_read = true;
852		kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
853		if(main2 < 1 && frac2 < 1)
854			kv2->hint->v.nh.empty=true;
855		kv2->hint->v.nh.si = SI2;
856		kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
857		    HS_INITIALIZED : HS_ERROR;
858		kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
859	}
860
861	if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
862	    HS_INITIALIZED) {
863		unsigned long long n1, n2;
864		bool neg1, neg2;
865
866		e1 = kv1->hint->v.nh.empty;
867		e2 = kv2->hint->v.nh.empty;
868
869		if (e1 && e2)
870			return (0);
871
872		neg1 = kv1->hint->v.nh.neg;
873		neg2 = kv2->hint->v.nh.neg;
874
875		if (neg1 && !neg2)
876			return (-1);
877		if (neg2 && !neg1)
878			return (+1);
879
880		if (e1)
881			return (neg2 ? +1 : -1);
882		else if (e2)
883			return (neg1 ? -1 : +1);
884
885
886		if (use_suffix) {
887			cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
888			if (cmp_res)
889				return (neg1 ? -cmp_res : cmp_res);
890		}
891
892		n1 = kv1->hint->v.nh.n1;
893		n2 = kv2->hint->v.nh.n1;
894		if (n1 < n2)
895			return (neg1 ? +1 : -1);
896		else if (n1 > n2)
897			return (neg1 ? -1 : +1);
898	}
899
900	/* read the numbers from the strings */
901	if (!key1_read)
902		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
903	if (!key2_read)
904		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
905
906	e1 = ((main1 + frac1) == 0);
907	e2 = ((main2 + frac2) == 0);
908
909	if (e1 && e2)
910		return (0);
911
912	/* we know the result if the signs are different */
913	if (sign1 < 0 && sign2 >= 0)
914		return (-1);
915	if (sign1 >= 0 && sign2 < 0)
916		return (+1);
917
918	if (e1)
919		return ((sign2 < 0) ? +1 : -1);
920	else if (e2)
921		return ((sign1 < 0) ? -1 : +1);
922
923	if (use_suffix) {
924		cmp_res = cmpsuffix(SI1, SI2);
925		if (cmp_res)
926			return ((sign1 < 0) ? -cmp_res : cmp_res);
927	}
928
929	/* if both numbers are empty assume that the strings are equal */
930	if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
931		return (0);
932
933	/*
934	 * if the main part is of different size, we know the result
935	 * (because the leading zeros are removed)
936	 */
937	if (main1 < main2)
938		cmp_res = -1;
939	else if (main1 > main2)
940		cmp_res = +1;
941	/* if the sizes are equal then simple non-collate string compare gives the correct result */
942	else
943		cmp_res = wcscmp(smain1, smain2);
944
945	/* check fraction */
946	if (!cmp_res)
947		cmp_res = wcscmp(sfrac1, sfrac2);
948
949	if (!cmp_res)
950		return (0);
951
952	/* reverse result if the signs are negative */
953	if (sign1 < 0 && sign2 < 0)
954		cmp_res = -cmp_res;
955
956	return (cmp_res);
957}
958
959/*
960 * Implements numeric sort (-n).
961 */
962static int
963numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
964{
965
966	return (numcoll_impl(kv1, kv2, offset, false));
967}
968
969/*
970 * Implements 'human' numeric sort (-h).
971 */
972static int
973hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
974{
975
976	return (numcoll_impl(kv1, kv2, offset, true));
977}
978
979/* Use hint space to memoize md5 computations, at least. */
980static void
981randomcoll_init_hint(struct key_value *kv, void *hash)
982{
983
984	memcpy(kv->hint->v.Rh.cached, hash, sizeof(kv->hint->v.Rh.cached));
985	kv->hint->status = HS_INITIALIZED;
986}
987
988/*
989 * Implements random sort (-R).
990 */
991static int
992randomcoll(struct key_value *kv1, struct key_value *kv2,
993    size_t offset __unused)
994{
995	struct bwstring *s1, *s2;
996	MD5_CTX ctx1, ctx2;
997	unsigned char hash1[MD5_DIGEST_LENGTH], hash2[MD5_DIGEST_LENGTH];
998	int cmp;
999
1000	s1 = kv1->k;
1001	s2 = kv2->k;
1002
1003	if (debug_sort) {
1004		bwsprintf(stdout, s1, "; k1=<", ">");
1005		bwsprintf(stdout, s2, ", k2=<", ">");
1006	}
1007
1008	if (s1 == s2)
1009		return (0);
1010
1011	if (kv1->hint->status == HS_INITIALIZED &&
1012	    kv2->hint->status == HS_INITIALIZED) {
1013		cmp = memcmp(kv1->hint->v.Rh.cached,
1014		    kv2->hint->v.Rh.cached, sizeof(kv1->hint->v.Rh.cached));
1015		if (cmp != 0)
1016			return (cmp);
1017	}
1018
1019	memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
1020	memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
1021
1022	MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
1023	MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
1024
1025	MD5Final(hash1, &ctx1);
1026	MD5Final(hash2, &ctx2);
1027
1028	if (kv1->hint->status == HS_UNINITIALIZED)
1029		randomcoll_init_hint(kv1, hash1);
1030	if (kv2->hint->status == HS_UNINITIALIZED)
1031		randomcoll_init_hint(kv2, hash2);
1032
1033	return (memcmp(hash1, hash2, sizeof(hash1)));
1034}
1035
1036/*
1037 * Implements version sort (-V).
1038 */
1039static int
1040versioncoll(struct key_value *kv1, struct key_value *kv2,
1041    size_t offset __unused)
1042{
1043	struct bwstring *s1, *s2;
1044
1045	s1 = kv1->k;
1046	s2 = kv2->k;
1047
1048	if (debug_sort) {
1049		bwsprintf(stdout, s1, "; k1=<", ">");
1050		bwsprintf(stdout, s2, ", k2=<", ">");
1051	}
1052
1053	if (s1 == s2)
1054		return (0);
1055
1056	return (vcmp(s1, s2));
1057}
1058
1059/*
1060 * Check for minus infinity
1061 */
1062static inline bool
1063huge_minus(double d, int err1)
1064{
1065
1066	if (err1 == ERANGE)
1067		if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1068			return (+1);
1069
1070	return (0);
1071}
1072
1073/*
1074 * Check for plus infinity
1075 */
1076static inline bool
1077huge_plus(double d, int err1)
1078{
1079
1080	if (err1 == ERANGE)
1081		if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1082			return (+1);
1083
1084	return (0);
1085}
1086
1087/*
1088 * Check whether a function is a NAN
1089 */
1090static bool
1091is_nan(double d)
1092{
1093
1094	return ((d == NAN) || (isnan(d)));
1095}
1096
1097/*
1098 * Compare two NANs
1099 */
1100static int
1101cmp_nans(double d1, double d2)
1102{
1103
1104	if (d1 < d2)
1105		return (-1);
1106	if (d1 > d2)
1107		return (+1);
1108	return (0);
1109}
1110
1111/*
1112 * Implements general numeric sort (-g).
1113 */
1114static int
1115gnumcoll(struct key_value *kv1, struct key_value *kv2,
1116    size_t offset __unused)
1117{
1118	double d1, d2;
1119	int err1, err2;
1120	bool empty1, empty2, key1_read, key2_read;
1121
1122	d1 = d2 = 0;
1123	err1 = err2 = 0;
1124	key1_read = key2_read = false;
1125
1126	if (debug_sort) {
1127		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1128		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1129	}
1130
1131	if (kv1->hint->status == HS_UNINITIALIZED) {
1132		errno = 0;
1133		d1 = bwstod(kv1->k, &empty1);
1134		err1 = errno;
1135
1136		if (empty1)
1137			kv1->hint->v.gh.notnum = true;
1138		else if (err1 == 0) {
1139			kv1->hint->v.gh.d = d1;
1140			kv1->hint->v.gh.nan = is_nan(d1);
1141			kv1->hint->status = HS_INITIALIZED;
1142		} else
1143			kv1->hint->status = HS_ERROR;
1144
1145		key1_read = true;
1146	}
1147
1148	if (kv2->hint->status == HS_UNINITIALIZED) {
1149		errno = 0;
1150		d2 = bwstod(kv2->k, &empty2);
1151		err2 = errno;
1152
1153		if (empty2)
1154			kv2->hint->v.gh.notnum = true;
1155		else if (err2 == 0) {
1156			kv2->hint->v.gh.d = d2;
1157			kv2->hint->v.gh.nan = is_nan(d2);
1158			kv2->hint->status = HS_INITIALIZED;
1159		} else
1160			kv2->hint->status = HS_ERROR;
1161
1162		key2_read = true;
1163	}
1164
1165	if (kv1->hint->status == HS_INITIALIZED &&
1166	    kv2->hint->status == HS_INITIALIZED) {
1167		if (kv1->hint->v.gh.notnum)
1168			return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1169		else if (kv2->hint->v.gh.notnum)
1170			return (+1);
1171
1172		if (kv1->hint->v.gh.nan)
1173			return ((kv2->hint->v.gh.nan) ?
1174			    cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1175			    -1);
1176		else if (kv2->hint->v.gh.nan)
1177			return (+1);
1178
1179		d1 = kv1->hint->v.gh.d;
1180		d2 = kv2->hint->v.gh.d;
1181
1182		if (d1 < d2)
1183			return (-1);
1184		else if (d1 > d2)
1185			return (+1);
1186		else
1187			return (0);
1188	}
1189
1190	if (!key1_read) {
1191		errno = 0;
1192		d1 = bwstod(kv1->k, &empty1);
1193		err1 = errno;
1194	}
1195
1196	if (!key2_read) {
1197		errno = 0;
1198		d2 = bwstod(kv2->k, &empty2);
1199		err2 = errno;
1200	}
1201
1202	/* Non-value case: */
1203	if (empty1)
1204		return (empty2 ? 0 : -1);
1205	else if (empty2)
1206		return (+1);
1207
1208	/* NAN case */
1209	if (is_nan(d1))
1210		return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1211	else if (is_nan(d2))
1212		return (+1);
1213
1214	/* Infinities */
1215	if (err1 == ERANGE || err2 == ERANGE) {
1216		/* Minus infinity case */
1217		if (huge_minus(d1, err1)) {
1218			if (huge_minus(d2, err2)) {
1219				if (d1 < d2)
1220					return (-1);
1221				if (d1 > d2)
1222					return (+1);
1223				return (0);
1224			} else
1225				return (-1);
1226
1227		} else if (huge_minus(d2, err2)) {
1228			if (huge_minus(d1, err1)) {
1229				if (d1 < d2)
1230					return (-1);
1231				if (d1 > d2)
1232					return (+1);
1233				return (0);
1234			} else
1235				return (+1);
1236		}
1237
1238		/* Plus infinity case */
1239		if (huge_plus(d1, err1)) {
1240			if (huge_plus(d2, err2)) {
1241				if (d1 < d2)
1242					return (-1);
1243				if (d1 > d2)
1244					return (+1);
1245				return (0);
1246			} else
1247				return (+1);
1248		} else if (huge_plus(d2, err2)) {
1249			if (huge_plus(d1, err1)) {
1250				if (d1 < d2)
1251					return (-1);
1252				if (d1 > d2)
1253					return (+1);
1254				return (0);
1255			} else
1256				return (-1);
1257		}
1258	}
1259
1260	if (d1 < d2)
1261		return (-1);
1262	if (d1 > d2)
1263		return (+1);
1264
1265	return (0);
1266}
1267
1268/*
1269 * Implements month sort (-M).
1270 */
1271static int
1272monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1273{
1274	int val1, val2;
1275	bool key1_read, key2_read;
1276
1277	val1 = val2 = 0;
1278	key1_read = key2_read = false;
1279
1280	if (debug_sort) {
1281		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1282		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1283	}
1284
1285	if (kv1->hint->status == HS_UNINITIALIZED) {
1286		kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1287		key1_read = true;
1288		kv1->hint->status = HS_INITIALIZED;
1289	}
1290
1291	if (kv2->hint->status == HS_UNINITIALIZED) {
1292		kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1293		key2_read = true;
1294		kv2->hint->status = HS_INITIALIZED;
1295	}
1296
1297	if (kv1->hint->status == HS_INITIALIZED) {
1298		val1 = kv1->hint->v.Mh.m;
1299		key1_read = true;
1300	}
1301
1302	if (kv2->hint->status == HS_INITIALIZED) {
1303		val2 = kv2->hint->v.Mh.m;
1304		key2_read = true;
1305	}
1306
1307	if (!key1_read)
1308		val1 = bws_month_score(kv1->k);
1309	if (!key2_read)
1310		val2 = bws_month_score(kv2->k);
1311
1312	if (val1 == val2) {
1313		return (0);
1314	}
1315	if (val1 < val2)
1316		return (-1);
1317	return (+1);
1318}
1319