1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
5 * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31#include <errno.h>
32#include <err.h>
33#include <langinfo.h>
34#include <math.h>
35#if defined(SORT_THREADS)
36#include <pthread.h>
37#include <semaphore.h>
38#endif
39#include <stdlib.h>
40#include <string.h>
41#include <wchar.h>
42#include <wctype.h>
43#include <unistd.h>
44
45#include "coll.h"
46#include "radixsort.h"
47
48#define DEFAULT_SORT_FUNC_RADIXSORT mergesort
49
50#define TINY_NODE(sl) ((sl)->tosort_num < 65)
51#define SMALL_NODE(sl) ((sl)->tosort_num < 5)
52
53/* are we sorting in reverse order ? */
54static bool reverse_sort;
55
56/* sort sub-levels array size */
57static const size_t slsz = 256 * sizeof(struct sort_level*);
58
59/* one sort level structure */
60struct sort_level
61{
62	struct sort_level	**sublevels;
63	struct sort_list_item	**leaves;
64	struct sort_list_item	**sorted;
65	struct sort_list_item	**tosort;
66	size_t			  leaves_num;
67	size_t			  leaves_sz;
68	size_t			  level;
69	size_t			  real_sln;
70	size_t			  start_position;
71	size_t			  sln;
72	size_t			  tosort_num;
73	size_t			  tosort_sz;
74};
75
76/* stack of sort levels ready to be sorted */
77struct level_stack {
78	struct level_stack	 *next;
79	struct sort_level	 *sl;
80};
81
82static struct level_stack *g_ls;
83
84#if defined(SORT_THREADS)
85/* stack guarding mutex */
86static pthread_cond_t g_ls_cond;
87static pthread_mutex_t g_ls_mutex;
88
89/* counter: how many items are left */
90static size_t sort_left;
91/* guarding mutex */
92
93/* semaphore to count threads */
94static sem_t mtsem;
95
96/*
97 * Decrement items counter
98 */
99static inline void
100sort_left_dec(size_t n)
101{
102	pthread_mutex_lock(&g_ls_mutex);
103	sort_left -= n;
104	if (sort_left == 0 && nthreads > 1)
105		pthread_cond_broadcast(&g_ls_cond);
106	pthread_mutex_unlock(&g_ls_mutex);
107}
108
109/*
110 * Do we have something to sort ?
111 *
112 * This routine does not need to be locked.
113 */
114static inline bool
115have_sort_left(void)
116{
117	bool ret;
118
119	ret = (sort_left > 0);
120
121	return (ret);
122}
123
124#else
125
126#define sort_left_dec(n)
127
128#endif /* SORT_THREADS */
129
130static void
131_push_ls(struct level_stack *ls)
132{
133
134	ls->next = g_ls;
135	g_ls = ls;
136}
137
138/*
139 * Push sort level to the stack
140 */
141static inline void
142push_ls(struct sort_level *sl)
143{
144	struct level_stack *new_ls;
145
146	new_ls = sort_malloc(sizeof(struct level_stack));
147	new_ls->sl = sl;
148
149#if defined(SORT_THREADS)
150	if (nthreads > 1) {
151		pthread_mutex_lock(&g_ls_mutex);
152		_push_ls(new_ls);
153		pthread_cond_signal(&g_ls_cond);
154		pthread_mutex_unlock(&g_ls_mutex);
155	} else
156#endif
157		_push_ls(new_ls);
158}
159
160/*
161 * Pop sort level from the stack (single-threaded style)
162 */
163static inline struct sort_level*
164pop_ls_st(void)
165{
166	struct sort_level *sl;
167
168	if (g_ls) {
169		struct level_stack *saved_ls;
170
171		sl = g_ls->sl;
172		saved_ls = g_ls;
173		g_ls = g_ls->next;
174		sort_free(saved_ls);
175	} else
176		sl = NULL;
177
178	return (sl);
179}
180
181#if defined(SORT_THREADS)
182
183/*
184 * Pop sort level from the stack (multi-threaded style)
185 */
186static inline struct sort_level*
187pop_ls_mt(void)
188{
189	struct level_stack *saved_ls;
190	struct sort_level *sl;
191
192	pthread_mutex_lock(&g_ls_mutex);
193
194	for (;;) {
195		if (g_ls) {
196			sl = g_ls->sl;
197			saved_ls = g_ls;
198			g_ls = g_ls->next;
199			break;
200		}
201		sl = NULL;
202		saved_ls = NULL;
203
204		if (have_sort_left() == 0)
205			break;
206		pthread_cond_wait(&g_ls_cond, &g_ls_mutex);
207	}
208
209	pthread_mutex_unlock(&g_ls_mutex);
210
211	sort_free(saved_ls);
212
213	return (sl);
214}
215
216#endif /* defined(SORT_THREADS) */
217
218static void
219add_to_sublevel(struct sort_level *sl, struct sort_list_item *item, size_t indx)
220{
221	struct sort_level *ssl;
222
223	ssl = sl->sublevels[indx];
224
225	if (ssl == NULL) {
226		ssl = sort_calloc(1, sizeof(struct sort_level));
227
228		ssl->level = sl->level + 1;
229		sl->sublevels[indx] = ssl;
230
231		++(sl->real_sln);
232	}
233
234	if (++(ssl->tosort_num) > ssl->tosort_sz) {
235		ssl->tosort_sz = ssl->tosort_num + 128;
236		ssl->tosort = sort_realloc(ssl->tosort,
237		    sizeof(struct sort_list_item*) * (ssl->tosort_sz));
238	}
239
240	ssl->tosort[ssl->tosort_num - 1] = item;
241}
242
243static inline void
244add_leaf(struct sort_level *sl, struct sort_list_item *item)
245{
246
247	if (++(sl->leaves_num) > sl->leaves_sz) {
248		sl->leaves_sz = sl->leaves_num + 128;
249		sl->leaves = sort_realloc(sl->leaves,
250		    (sizeof(struct sort_list_item*) * (sl->leaves_sz)));
251	}
252	sl->leaves[sl->leaves_num - 1] = item;
253}
254
255static inline int
256get_wc_index(struct sort_list_item *sli, size_t level)
257{
258	const size_t wcfact = (mb_cur_max == 1) ? 1 : sizeof(wchar_t);
259	const struct key_value *kv;
260	const struct bwstring *bws;
261
262	kv = get_key_from_keys_array(&sli->ka, 0);
263	bws = kv->k;
264
265	if ((BWSLEN(bws) * wcfact > level)) {
266		wchar_t res;
267
268		/*
269		 * Sort wchar strings a byte at a time, rather than a single
270		 * byte from each wchar.
271		 */
272		res = (wchar_t)BWS_GET(bws, level / wcfact);
273		/* Sort most-significant byte first. */
274		if (level % wcfact < wcfact - 1)
275			res = (res >> (8 * (wcfact - 1 - (level % wcfact))));
276
277		return (res & 0xff);
278	}
279
280	return (-1);
281}
282
283static void
284place_item(struct sort_level *sl, size_t item)
285{
286	struct sort_list_item *sli;
287	int c;
288
289	sli = sl->tosort[item];
290	c = get_wc_index(sli, sl->level);
291
292	if (c == -1)
293		add_leaf(sl, sli);
294	else
295		add_to_sublevel(sl, sli, c);
296}
297
298static void
299free_sort_level(struct sort_level *sl)
300{
301
302	if (sl) {
303		if (sl->leaves)
304			sort_free(sl->leaves);
305
306		if (sl->level > 0)
307			sort_free(sl->tosort);
308
309		if (sl->sublevels) {
310			struct sort_level *slc;
311			size_t sln;
312
313			sln = sl->sln;
314
315			for (size_t i = 0; i < sln; ++i) {
316				slc = sl->sublevels[i];
317				if (slc)
318					free_sort_level(slc);
319			}
320
321			sort_free(sl->sublevels);
322		}
323
324		sort_free(sl);
325	}
326}
327
328static void
329run_sort_level_next(struct sort_level *sl)
330{
331	const size_t wcfact = (mb_cur_max == 1) ? 1 : sizeof(wchar_t);
332	struct sort_level *slc;
333	size_t i, sln, tosort_num;
334
335	if (sl->sublevels) {
336		sort_free(sl->sublevels);
337		sl->sublevels = NULL;
338	}
339
340	switch (sl->tosort_num) {
341	case 0:
342		goto end;
343	case (1):
344		sl->sorted[sl->start_position] = sl->tosort[0];
345		sort_left_dec(1);
346		goto end;
347	case (2):
348		/*
349		 * Radixsort only processes a single byte at a time.  In wchar
350		 * mode, this can be a subset of the length of a character.
351		 * list_coll_offset() offset is in units of wchar, not bytes.
352		 * So to calculate the offset, we must divide by
353		 * sizeof(wchar_t) and round down to the index of the first
354		 * character this level references.
355		 */
356		if (list_coll_offset(&(sl->tosort[0]), &(sl->tosort[1]),
357		    sl->level / wcfact) > 0) {
358			sl->sorted[sl->start_position++] = sl->tosort[1];
359			sl->sorted[sl->start_position] = sl->tosort[0];
360		} else {
361			sl->sorted[sl->start_position++] = sl->tosort[0];
362			sl->sorted[sl->start_position] = sl->tosort[1];
363		}
364		sort_left_dec(2);
365
366		goto end;
367	default:
368		if (TINY_NODE(sl) || (sl->level > 15)) {
369			listcoll_t func;
370
371			/*
372			 * Collate comparison offset is in units of
373			 * character-width, so we must divide the level (bytes)
374			 * by operating character width (wchar_t or char).  See
375			 * longer comment above.
376			 */
377			func = get_list_call_func(sl->level / wcfact);
378
379			sl->leaves = sl->tosort;
380			sl->leaves_num = sl->tosort_num;
381			sl->leaves_sz = sl->leaves_num;
382			sl->leaves = sort_realloc(sl->leaves,
383			    (sizeof(struct sort_list_item *) *
384			    (sl->leaves_sz)));
385			sl->tosort = NULL;
386			sl->tosort_num = 0;
387			sl->tosort_sz = 0;
388			sl->sln = 0;
389			sl->real_sln = 0;
390			if (sort_opts_vals.sflag) {
391				if (mergesort(sl->leaves, sl->leaves_num,
392				    sizeof(struct sort_list_item *),
393				    (int(*)(const void *, const void *)) func) == -1)
394					/* NOTREACHED */
395					err(2, "Radix sort error 3");
396			} else
397				DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
398				    sizeof(struct sort_list_item *),
399				    (int(*)(const void *, const void *)) func);
400
401			memcpy(sl->sorted + sl->start_position,
402			    sl->leaves, sl->leaves_num *
403			    sizeof(struct sort_list_item*));
404
405			sort_left_dec(sl->leaves_num);
406
407			goto end;
408		} else {
409			sl->tosort_sz = sl->tosort_num;
410			sl->tosort = sort_realloc(sl->tosort,
411			    sizeof(struct sort_list_item*) * (sl->tosort_sz));
412		}
413	}
414
415	sl->sln = 256;
416	sl->sublevels = sort_calloc(1, slsz);
417
418	sl->real_sln = 0;
419
420	tosort_num = sl->tosort_num;
421	for (i = 0; i < tosort_num; ++i)
422		place_item(sl, i);
423
424	sort_free(sl->tosort);
425	sl->tosort = NULL;
426	sl->tosort_num = 0;
427	sl->tosort_sz = 0;
428
429	if (sl->leaves_num > 1) {
430		if (keys_num > 1) {
431			if (sort_opts_vals.sflag) {
432				mergesort(sl->leaves, sl->leaves_num,
433				    sizeof(struct sort_list_item *),
434				    (int(*)(const void *, const void *)) list_coll);
435			} else {
436				DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
437				    sizeof(struct sort_list_item *),
438				    (int(*)(const void *, const void *)) list_coll);
439			}
440		} else if (!sort_opts_vals.sflag && sort_opts_vals.complex_sort) {
441			DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
442			    sizeof(struct sort_list_item *),
443			    (int(*)(const void *, const void *)) list_coll_by_str_only);
444		}
445	}
446
447	sl->leaves_sz = sl->leaves_num;
448	sl->leaves = sort_realloc(sl->leaves, (sizeof(struct sort_list_item *) *
449	    (sl->leaves_sz)));
450
451	if (!reverse_sort) {
452		memcpy(sl->sorted + sl->start_position, sl->leaves,
453		    sl->leaves_num * sizeof(struct sort_list_item*));
454		sl->start_position += sl->leaves_num;
455		sort_left_dec(sl->leaves_num);
456
457		sort_free(sl->leaves);
458		sl->leaves = NULL;
459		sl->leaves_num = 0;
460		sl->leaves_sz = 0;
461
462		sln = sl->sln;
463
464		for (i = 0; i < sln; ++i) {
465			slc = sl->sublevels[i];
466
467			if (slc) {
468				slc->sorted = sl->sorted;
469				slc->start_position = sl->start_position;
470				sl->start_position += slc->tosort_num;
471				if (SMALL_NODE(slc))
472					run_sort_level_next(slc);
473				else
474					push_ls(slc);
475				sl->sublevels[i] = NULL;
476			}
477		}
478
479	} else {
480		size_t n;
481
482		sln = sl->sln;
483
484		for (i = 0; i < sln; ++i) {
485			n = sln - i - 1;
486			slc = sl->sublevels[n];
487
488			if (slc) {
489				slc->sorted = sl->sorted;
490				slc->start_position = sl->start_position;
491				sl->start_position += slc->tosort_num;
492				if (SMALL_NODE(slc))
493					run_sort_level_next(slc);
494				else
495					push_ls(slc);
496				sl->sublevels[n] = NULL;
497			}
498		}
499
500		memcpy(sl->sorted + sl->start_position, sl->leaves,
501		    sl->leaves_num * sizeof(struct sort_list_item*));
502		sort_left_dec(sl->leaves_num);
503	}
504
505end:
506	free_sort_level(sl);
507}
508
509/*
510 * Single-threaded sort cycle
511 */
512static void
513run_sort_cycle_st(void)
514{
515	struct sort_level *slc;
516
517	for (;;) {
518		slc = pop_ls_st();
519		if (slc == NULL) {
520			break;
521		}
522		run_sort_level_next(slc);
523	}
524}
525
526#if defined(SORT_THREADS)
527
528/*
529 * Multi-threaded sort cycle
530 */
531static void
532run_sort_cycle_mt(void)
533{
534	struct sort_level *slc;
535
536	for (;;) {
537		slc = pop_ls_mt();
538		if (slc == NULL)
539			break;
540		run_sort_level_next(slc);
541	}
542}
543
544/*
545 * Sort cycle thread (in multi-threaded mode)
546 */
547static void*
548sort_thread(void* arg)
549{
550	run_sort_cycle_mt();
551	sem_post(&mtsem);
552
553	return (arg);
554}
555
556#endif /* defined(SORT_THREADS) */
557
558static void
559run_top_sort_level(struct sort_level *sl)
560{
561	struct sort_level *slc;
562
563	reverse_sort = sort_opts_vals.kflag ? keys[0].sm.rflag :
564	    default_sort_mods->rflag;
565
566	sl->start_position = 0;
567	sl->sln = 256;
568	sl->sublevels = sort_calloc(1, slsz);
569
570	for (size_t i = 0; i < sl->tosort_num; ++i)
571		place_item(sl, i);
572
573	if (sl->leaves_num > 1) {
574		if (keys_num > 1) {
575			if (sort_opts_vals.sflag) {
576				mergesort(sl->leaves, sl->leaves_num,
577				    sizeof(struct sort_list_item *),
578				    (int(*)(const void *, const void *)) list_coll);
579			} else {
580				DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
581				    sizeof(struct sort_list_item *),
582				    (int(*)(const void *, const void *)) list_coll);
583			}
584		} else if (!sort_opts_vals.sflag && sort_opts_vals.complex_sort) {
585			DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
586			    sizeof(struct sort_list_item *),
587			    (int(*)(const void *, const void *)) list_coll_by_str_only);
588		}
589	}
590
591	if (!reverse_sort) {
592		memcpy(sl->tosort + sl->start_position, sl->leaves,
593		    sl->leaves_num * sizeof(struct sort_list_item*));
594		sl->start_position += sl->leaves_num;
595		sort_left_dec(sl->leaves_num);
596
597		for (size_t i = 0; i < sl->sln; ++i) {
598			slc = sl->sublevels[i];
599
600			if (slc) {
601				slc->sorted = sl->tosort;
602				slc->start_position = sl->start_position;
603				sl->start_position += slc->tosort_num;
604				push_ls(slc);
605				sl->sublevels[i] = NULL;
606			}
607		}
608
609	} else {
610		size_t n;
611
612		for (size_t i = 0; i < sl->sln; ++i) {
613
614			n = sl->sln - i - 1;
615			slc = sl->sublevels[n];
616
617			if (slc) {
618				slc->sorted = sl->tosort;
619				slc->start_position = sl->start_position;
620				sl->start_position += slc->tosort_num;
621				push_ls(slc);
622				sl->sublevels[n] = NULL;
623			}
624		}
625
626		memcpy(sl->tosort + sl->start_position, sl->leaves,
627		    sl->leaves_num * sizeof(struct sort_list_item*));
628
629		sort_left_dec(sl->leaves_num);
630	}
631
632#if defined(SORT_THREADS)
633	if (nthreads < 2) {
634#endif
635		run_sort_cycle_st();
636#if defined(SORT_THREADS)
637	} else {
638		size_t i;
639
640		for(i = 0; i < nthreads; ++i) {
641			pthread_attr_t attr;
642			pthread_t pth;
643
644			pthread_attr_init(&attr);
645			pthread_attr_setdetachstate(&attr, PTHREAD_DETACHED);
646
647			for (;;) {
648				int res = pthread_create(&pth, &attr,
649				    sort_thread, NULL);
650				if (res >= 0)
651					break;
652				if (errno == EAGAIN) {
653					pthread_yield();
654					continue;
655				}
656				err(2, NULL);
657			}
658
659			pthread_attr_destroy(&attr);
660		}
661
662		for (i = 0; i < nthreads; ++i)
663			sem_wait(&mtsem);
664	}
665#endif /* defined(SORT_THREADS) */
666}
667
668static void
669run_sort(struct sort_list_item **base, size_t nmemb)
670{
671	struct sort_level *sl;
672
673#if defined(SORT_THREADS)
674	size_t nthreads_save = nthreads;
675	if (nmemb < MT_SORT_THRESHOLD)
676		nthreads = 1;
677
678	if (nthreads > 1) {
679		pthread_mutexattr_t mattr;
680
681		pthread_mutexattr_init(&mattr);
682		pthread_mutexattr_settype(&mattr, PTHREAD_MUTEX_ADAPTIVE_NP);
683
684		pthread_mutex_init(&g_ls_mutex, &mattr);
685		pthread_cond_init(&g_ls_cond, NULL);
686
687		pthread_mutexattr_destroy(&mattr);
688
689		sem_init(&mtsem, 0, 0);
690
691	}
692#endif
693
694	sl = sort_calloc(1, sizeof(struct sort_level));
695
696	sl->tosort = base;
697	sl->tosort_num = nmemb;
698	sl->tosort_sz = nmemb;
699
700#if defined(SORT_THREADS)
701	sort_left = nmemb;
702#endif
703
704	run_top_sort_level(sl);
705
706	free_sort_level(sl);
707
708#if defined(SORT_THREADS)
709	if (nthreads > 1) {
710		sem_destroy(&mtsem);
711		pthread_mutex_destroy(&g_ls_mutex);
712	}
713	nthreads = nthreads_save;
714#endif
715}
716
717void
718rxsort(struct sort_list_item **base, size_t nmemb)
719{
720
721	run_sort(base, nmemb);
722}
723