sort.c revision 281535
1/*-
2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/10/usr.bin/sort/sort.c 281535 2015-04-14 18:57:50Z pfg $");
30
31#include <sys/stat.h>
32#include <sys/sysctl.h>
33#include <sys/types.h>
34
35#include <err.h>
36#include <errno.h>
37#include <getopt.h>
38#include <limits.h>
39#include <locale.h>
40#include <md5.h>
41#include <regex.h>
42#include <signal.h>
43#include <stdbool.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <unistd.h>
48#include <wchar.h>
49#include <wctype.h>
50
51#include "coll.h"
52#include "file.h"
53#include "sort.h"
54
55#ifndef WITHOUT_NLS
56#include <nl_types.h>
57nl_catd catalog;
58#endif
59
60#define	OPTIONS	"bcCdfghik:Mmno:RrsS:t:T:uVz"
61
62#define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random")
63#define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024)
64
65static bool need_random;
66static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE;
67static const void *random_seed;
68static size_t random_seed_size;
69
70MD5_CTX md5_ctx;
71
72/*
73 * Default messages to use when NLS is disabled or no catalogue
74 * is found.
75 */
76const char *nlsstr[] = { "",
77/* 1*/"mutually exclusive flags",
78/* 2*/"extra argument not allowed with -c",
79/* 3*/"Unknown feature",
80/* 4*/"Wrong memory buffer specification",
81/* 5*/"0 field in key specs",
82/* 6*/"0 column in key specs",
83/* 7*/"Wrong file mode",
84/* 8*/"Cannot open file for reading",
85/* 9*/"Radix sort cannot be used with these sort options",
86/*10*/"The chosen sort method cannot be used with stable and/or unique sort",
87/*11*/"Invalid key position",
88/*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
89      "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
90      "[-o outfile] [--batch-size size] [--files0-from file] "
91      "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
92      "[--mmap] "
93#if defined(SORT_THREADS)
94      "[--parallel thread_no] "
95#endif
96      "[--human-numeric-sort] "
97      "[--version-sort] [--random-sort [--random-source file]] "
98      "[--compress-program program] [file ...]\n" };
99
100struct sort_opts sort_opts_vals;
101
102bool debug_sort;
103bool need_hint;
104
105#if defined(SORT_THREADS)
106unsigned int ncpu = 1;
107size_t nthreads = 1;
108#endif
109
110static bool gnusort_numeric_compatibility;
111
112static struct sort_mods default_sort_mods_object;
113struct sort_mods * const default_sort_mods = &default_sort_mods_object;
114
115static bool print_symbols_on_debug;
116
117/*
118 * Arguments from file (when file0-from option is used:
119 */
120static size_t argc_from_file0 = (size_t)-1;
121static char **argv_from_file0;
122
123/*
124 * Placeholder symbols for options which have no single-character equivalent
125 */
126enum
127{
128	SORT_OPT = CHAR_MAX + 1,
129	HELP_OPT,
130	FF_OPT,
131	BS_OPT,
132	VERSION_OPT,
133	DEBUG_OPT,
134#if defined(SORT_THREADS)
135	PARALLEL_OPT,
136#endif
137	RANDOMSOURCE_OPT,
138	COMPRESSPROGRAM_OPT,
139	QSORT_OPT,
140	MERGESORT_OPT,
141	HEAPSORT_OPT,
142	RADIXSORT_OPT,
143	MMAP_OPT
144};
145
146#define	NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
147static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
148
149static struct option long_options[] = {
150				{ "batch-size", required_argument, NULL, BS_OPT },
151				{ "buffer-size", required_argument, NULL, 'S' },
152				{ "check", optional_argument, NULL, 'c' },
153				{ "check=silent|quiet", optional_argument, NULL, 'C' },
154				{ "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
155				{ "debug", no_argument, NULL, DEBUG_OPT },
156				{ "dictionary-order", no_argument, NULL, 'd' },
157				{ "field-separator", required_argument, NULL, 't' },
158				{ "files0-from", required_argument, NULL, FF_OPT },
159				{ "general-numeric-sort", no_argument, NULL, 'g' },
160				{ "heapsort", no_argument, NULL, HEAPSORT_OPT },
161				{ "help",no_argument, NULL, HELP_OPT },
162				{ "human-numeric-sort", no_argument, NULL, 'h' },
163				{ "ignore-leading-blanks", no_argument, NULL, 'b' },
164				{ "ignore-case", no_argument, NULL, 'f' },
165				{ "ignore-nonprinting", no_argument, NULL, 'i' },
166				{ "key", required_argument, NULL, 'k' },
167				{ "merge", no_argument, NULL, 'm' },
168				{ "mergesort", no_argument, NULL, MERGESORT_OPT },
169				{ "mmap", no_argument, NULL, MMAP_OPT },
170				{ "month-sort", no_argument, NULL, 'M' },
171				{ "numeric-sort", no_argument, NULL, 'n' },
172				{ "output", required_argument, NULL, 'o' },
173#if defined(SORT_THREADS)
174				{ "parallel", required_argument, NULL, PARALLEL_OPT },
175#endif
176				{ "qsort", no_argument, NULL, QSORT_OPT },
177				{ "radixsort", no_argument, NULL, RADIXSORT_OPT },
178				{ "random-sort", no_argument, NULL, 'R' },
179				{ "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
180				{ "reverse", no_argument, NULL, 'r' },
181				{ "sort", required_argument, NULL, SORT_OPT },
182				{ "stable", no_argument, NULL, 's' },
183				{ "temporary-directory",required_argument, NULL, 'T' },
184				{ "unique", no_argument, NULL, 'u' },
185				{ "version", no_argument, NULL, VERSION_OPT },
186				{ "version-sort",no_argument, NULL, 'V' },
187				{ "zero-terminated", no_argument, NULL, 'z' },
188				{ NULL, no_argument, NULL, 0 }
189};
190
191void fix_obsolete_keys(int *argc, char **argv);
192
193/*
194 * Check where sort modifier is present
195 */
196static bool
197sort_modifier_empty(struct sort_mods *sm)
198{
199
200	if (sm == NULL)
201		return (true);
202	return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
203	    sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
204}
205
206/*
207 * Print out usage text.
208 */
209static void
210usage(bool opt_err)
211{
212	struct option *o;
213	FILE *out;
214
215	out = stdout;
216	o = &(long_options[0]);
217
218	if (opt_err)
219		out = stderr;
220	fprintf(out, getstr(12), getprogname());
221	if (opt_err)
222		exit(2);
223	exit(0);
224}
225
226/*
227 * Read input file names from a file (file0-from option).
228 */
229static void
230read_fns_from_file0(const char *fn)
231{
232	FILE *f;
233	char *line = NULL;
234	size_t linesize = 0;
235	ssize_t linelen;
236
237	if (fn == NULL)
238		return;
239
240	f = fopen(fn, "r");
241	if (f == NULL)
242		err(2, "%s", fn);
243
244	while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
245		if (*line != '\0') {
246			if (argc_from_file0 == (size_t) - 1)
247				argc_from_file0 = 0;
248			++argc_from_file0;
249			argv_from_file0 = sort_realloc(argv_from_file0,
250			    argc_from_file0 * sizeof(char *));
251			if (argv_from_file0 == NULL)
252				err(2, NULL);
253			argv_from_file0[argc_from_file0 - 1] = line;
254		} else {
255			free(line);
256		}
257		line = NULL;
258		linesize = 0;
259	}
260	if (ferror(f))
261		err(2, "%s: getdelim", fn);
262
263	closefile(f, fn);
264}
265
266/*
267 * Check how much RAM is available for the sort.
268 */
269static void
270set_hw_params(void)
271{
272	long pages, psize;
273
274	pages = psize = 0;
275
276#if defined(SORT_THREADS)
277	ncpu = 1;
278#endif
279
280	pages = sysconf(_SC_PHYS_PAGES);
281	if (pages < 1) {
282		perror("sysconf pages");
283		psize = 1;
284	}
285	psize = sysconf(_SC_PAGESIZE);
286	if (psize < 1) {
287		perror("sysconf psize");
288		psize = 4096;
289	}
290#if defined(SORT_THREADS)
291	ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
292	if (ncpu < 1)
293		ncpu = 1;
294	else if(ncpu > 32)
295		ncpu = 32;
296
297	nthreads = ncpu;
298#endif
299
300	free_memory = (unsigned long long) pages * (unsigned long long) psize;
301	available_free_memory = free_memory / 2;
302
303	if (available_free_memory < 1024)
304		available_free_memory = 1024;
305}
306
307/*
308 * Convert "plain" symbol to wide symbol, with default value.
309 */
310static void
311conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
312{
313
314	if (wc && c) {
315		int res;
316
317		res = mbtowc(wc, c, MB_CUR_MAX);
318		if (res < 1)
319			*wc = def;
320	}
321}
322
323/*
324 * Set current locale symbols.
325 */
326static void
327set_locale(void)
328{
329	struct lconv *lc;
330	const char *locale;
331
332	setlocale(LC_ALL, "");
333
334	lc = localeconv();
335
336	if (lc) {
337		/* obtain LC_NUMERIC info */
338		/* Convert to wide char form */
339		conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
340		    symbol_decimal_point);
341		conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
342		    symbol_thousands_sep);
343		conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
344		    symbol_positive_sign);
345		conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
346		    symbol_negative_sign);
347	}
348
349	if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
350		gnusort_numeric_compatibility = true;
351
352	locale = setlocale(LC_COLLATE, NULL);
353
354	if (locale) {
355		char *tmpl;
356		const char *cclocale;
357
358		tmpl = sort_strdup(locale);
359		cclocale = setlocale(LC_COLLATE, "C");
360		if (cclocale && !strcmp(cclocale, tmpl))
361			byte_sort = true;
362		else {
363			const char *pclocale;
364
365			pclocale = setlocale(LC_COLLATE, "POSIX");
366			if (pclocale && !strcmp(pclocale, tmpl))
367				byte_sort = true;
368		}
369		setlocale(LC_COLLATE, tmpl);
370		sort_free(tmpl);
371	}
372}
373
374/*
375 * Set directory temporary files.
376 */
377static void
378set_tmpdir(void)
379{
380	char *td;
381
382	td = getenv("TMPDIR");
383	if (td != NULL)
384		tmpdir = sort_strdup(td);
385}
386
387/*
388 * Parse -S option.
389 */
390static unsigned long long
391parse_memory_buffer_value(const char *value)
392{
393
394	if (value == NULL)
395		return (available_free_memory);
396	else {
397		char *endptr;
398		unsigned long long membuf;
399
400		endptr = NULL;
401		errno = 0;
402		membuf = strtoll(value, &endptr, 10);
403
404		if (errno != 0) {
405			warn("%s",getstr(4));
406			membuf = available_free_memory;
407		} else {
408			switch (*endptr){
409			case 'Y':
410				membuf *= 1024;
411				/* FALLTHROUGH */
412			case 'Z':
413				membuf *= 1024;
414				/* FALLTHROUGH */
415			case 'E':
416				membuf *= 1024;
417				/* FALLTHROUGH */
418			case 'P':
419				membuf *= 1024;
420				/* FALLTHROUGH */
421			case 'T':
422				membuf *= 1024;
423				/* FALLTHROUGH */
424			case 'G':
425				membuf *= 1024;
426				/* FALLTHROUGH */
427			case 'M':
428				membuf *= 1024;
429				/* FALLTHROUGH */
430			case '\0':
431			case 'K':
432				membuf *= 1024;
433				/* FALLTHROUGH */
434			case 'b':
435				break;
436			case '%':
437				membuf = (available_free_memory * membuf) /
438				    100;
439				break;
440			default:
441				warnc(EINVAL, "%s", optarg);
442				membuf = available_free_memory;
443			}
444		}
445		return (membuf);
446	}
447}
448
449/*
450 * Signal handler that clears the temporary files.
451 */
452static void
453sig_handler(int sig __unused, siginfo_t *siginfo __unused,
454    void *context __unused)
455{
456
457	clear_tmp_files();
458	exit(-1);
459}
460
461/*
462 * Set signal handler on panic signals.
463 */
464static void
465set_signal_handler(void)
466{
467	struct sigaction sa;
468
469	memset(&sa, 0, sizeof(sa));
470	sa.sa_sigaction = &sig_handler;
471	sa.sa_flags = SA_SIGINFO;
472
473	if (sigaction(SIGTERM, &sa, NULL) < 0) {
474		perror("sigaction");
475		return;
476	}
477	if (sigaction(SIGHUP, &sa, NULL) < 0) {
478		perror("sigaction");
479		return;
480	}
481	if (sigaction(SIGINT, &sa, NULL) < 0) {
482		perror("sigaction");
483		return;
484	}
485	if (sigaction(SIGQUIT, &sa, NULL) < 0) {
486		perror("sigaction");
487		return;
488	}
489	if (sigaction(SIGABRT, &sa, NULL) < 0) {
490		perror("sigaction");
491		return;
492	}
493	if (sigaction(SIGBUS, &sa, NULL) < 0) {
494		perror("sigaction");
495		return;
496	}
497	if (sigaction(SIGSEGV, &sa, NULL) < 0) {
498		perror("sigaction");
499		return;
500	}
501	if (sigaction(SIGUSR1, &sa, NULL) < 0) {
502		perror("sigaction");
503		return;
504	}
505	if (sigaction(SIGUSR2, &sa, NULL) < 0) {
506		perror("sigaction");
507		return;
508	}
509}
510
511/*
512 * Print "unknown" message and exit with status 2.
513 */
514static void
515unknown(const char *what)
516{
517
518	errx(2, "%s: %s", getstr(3), what);
519}
520
521/*
522 * Check whether contradictory input options are used.
523 */
524static void
525check_mutually_exclusive_flags(char c, bool *mef_flags)
526{
527	int fo_index, mec;
528	bool found_others, found_this;
529
530	found_others = found_this =false;
531	fo_index = 0;
532
533	for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
534		mec = mutually_exclusive_flags[i];
535
536		if (mec != c) {
537			if (mef_flags[i]) {
538				if (found_this)
539					errx(1, "%c:%c: %s", c, mec, getstr(1));
540				found_others = true;
541				fo_index = i;
542			}
543		} else {
544			if (found_others)
545				errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
546			mef_flags[i] = true;
547			found_this = true;
548		}
549	}
550}
551
552/*
553 * Initialise sort opts data.
554 */
555static void
556set_sort_opts(void)
557{
558
559	memset(&default_sort_mods_object, 0,
560	    sizeof(default_sort_mods_object));
561	memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
562	default_sort_mods_object.func =
563	    get_sort_func(&default_sort_mods_object);
564}
565
566/*
567 * Set a sort modifier on a sort modifiers object.
568 */
569static bool
570set_sort_modifier(struct sort_mods *sm, int c)
571{
572
573	if (sm) {
574		switch (c){
575		case 'b':
576			sm->bflag = true;
577			break;
578		case 'd':
579			sm->dflag = true;
580			break;
581		case 'f':
582			sm->fflag = true;
583			break;
584		case 'g':
585			sm->gflag = true;
586			need_hint = true;
587			break;
588		case 'i':
589			sm->iflag = true;
590			break;
591		case 'R':
592			sm->Rflag = true;
593			need_random = true;
594			break;
595		case 'M':
596			initialise_months();
597			sm->Mflag = true;
598			need_hint = true;
599			break;
600		case 'n':
601			sm->nflag = true;
602			need_hint = true;
603			print_symbols_on_debug = true;
604			break;
605		case 'r':
606			sm->rflag = true;
607			break;
608		case 'V':
609			sm->Vflag = true;
610			break;
611		case 'h':
612			sm->hflag = true;
613			need_hint = true;
614			print_symbols_on_debug = true;
615			break;
616		default:
617			return false;
618		}
619		sort_opts_vals.complex_sort = true;
620		sm->func = get_sort_func(sm);
621	}
622	return (true);
623}
624
625/*
626 * Parse POS in -k option.
627 */
628static int
629parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
630{
631	regmatch_t pmatch[4];
632	regex_t re;
633	char *c, *f;
634	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
635	size_t len, nmatch;
636	int ret;
637
638	ret = -1;
639	nmatch = 4;
640	c = f = NULL;
641
642	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
643		return (-1);
644
645	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
646		goto end;
647
648	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
649		goto end;
650
651	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
652		goto end;
653
654	len = pmatch[1].rm_eo - pmatch[1].rm_so;
655	f = sort_malloc((len + 1) * sizeof(char));
656
657	strncpy(f, s + pmatch[1].rm_so, len);
658	f[len] = '\0';
659
660	if (second) {
661		errno = 0;
662		ks->f2 = (size_t) strtoul(f, NULL, 10);
663		if (errno != 0)
664			err(2, "-k");
665		if (ks->f2 == 0) {
666			warn("%s",getstr(5));
667			goto end;
668		}
669	} else {
670		errno = 0;
671		ks->f1 = (size_t) strtoul(f, NULL, 10);
672		if (errno != 0)
673			err(2, "-k");
674		if (ks->f1 == 0) {
675			warn("%s",getstr(5));
676			goto end;
677		}
678	}
679
680	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
681		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
682		c = sort_malloc((len + 1) * sizeof(char));
683
684		strncpy(c, s + pmatch[2].rm_so + 1, len);
685		c[len] = '\0';
686
687		if (second) {
688			errno = 0;
689			ks->c2 = (size_t) strtoul(c, NULL, 10);
690			if (errno != 0)
691				err(2, "-k");
692		} else {
693			errno = 0;
694			ks->c1 = (size_t) strtoul(c, NULL, 10);
695			if (errno != 0)
696				err(2, "-k");
697			if (ks->c1 == 0) {
698				warn("%s",getstr(6));
699				goto end;
700			}
701		}
702	} else {
703		if (second)
704			ks->c2 = 0;
705		else
706			ks->c1 = 1;
707	}
708
709	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
710		regoff_t i = 0;
711
712		for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
713			check_mutually_exclusive_flags(s[i], mef_flags);
714			if (s[i] == 'b') {
715				if (second)
716					ks->pos2b = true;
717				else
718					ks->pos1b = true;
719			} else if (!set_sort_modifier(&(ks->sm), s[i]))
720				goto end;
721		}
722	}
723
724	ret = 0;
725
726end:
727
728	if (c)
729		sort_free(c);
730	if (f)
731		sort_free(f);
732	regfree(&re);
733
734	return (ret);
735}
736
737/*
738 * Parse -k option value.
739 */
740static int
741parse_k(const char *s, struct key_specs *ks)
742{
743	int ret = -1;
744	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
745	    { false, false, false, false, false, false };
746
747	if (s && *s) {
748		char *sptr;
749
750		sptr = strchr(s, ',');
751		if (sptr) {
752			size_t size1;
753			char *pos1, *pos2;
754
755			size1 = sptr - s;
756
757			if (size1 < 1)
758				return (-1);
759			pos1 = sort_malloc((size1 + 1) * sizeof(char));
760
761			strncpy(pos1, s, size1);
762			pos1[size1] = '\0';
763
764			ret = parse_pos(pos1, ks, mef_flags, false);
765
766			sort_free(pos1);
767			if (ret < 0)
768				return (ret);
769
770			pos2 = sort_strdup(sptr + 1);
771			ret = parse_pos(pos2, ks, mef_flags, true);
772			sort_free(pos2);
773		} else
774			ret = parse_pos(s, ks, mef_flags, false);
775	}
776
777	return (ret);
778}
779
780/*
781 * Parse POS in +POS -POS option.
782 */
783static int
784parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
785{
786	regex_t re;
787	regmatch_t pmatch[4];
788	char *c, *f;
789	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
790	int ret;
791	size_t len, nmatch;
792
793	ret = -1;
794	nmatch = 4;
795	c = f = NULL;
796	*nc = *nf = 0;
797
798	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
799		return (-1);
800
801	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
802		goto end;
803
804	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
805		goto end;
806
807	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
808		goto end;
809
810	len = pmatch[1].rm_eo - pmatch[1].rm_so;
811	f = sort_malloc((len + 1) * sizeof(char));
812
813	strncpy(f, s + pmatch[1].rm_so, len);
814	f[len] = '\0';
815
816	errno = 0;
817	*nf = (size_t) strtoul(f, NULL, 10);
818	if (errno != 0)
819		errx(2, "%s", getstr(11));
820
821	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
822		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
823		c = sort_malloc((len + 1) * sizeof(char));
824
825		strncpy(c, s + pmatch[2].rm_so + 1, len);
826		c[len] = '\0';
827
828		errno = 0;
829		*nc = (size_t) strtoul(c, NULL, 10);
830		if (errno != 0)
831			errx(2, "%s", getstr(11));
832	}
833
834	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
835
836		len = pmatch[3].rm_eo - pmatch[3].rm_so;
837
838		strncpy(sopts, s + pmatch[3].rm_so, len);
839		sopts[len] = '\0';
840	}
841
842	ret = 0;
843
844end:
845	if (c)
846		sort_free(c);
847	if (f)
848		sort_free(f);
849	regfree(&re);
850
851	return (ret);
852}
853
854/*
855 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
856 */
857void
858fix_obsolete_keys(int *argc, char **argv)
859{
860	char sopt[129];
861
862	for (int i = 1; i < *argc; i++) {
863		char *arg1;
864
865		arg1 = argv[i];
866
867		if (strlen(arg1) > 1 && arg1[0] == '+') {
868			int c1, f1;
869			char sopts1[128];
870
871			sopts1[0] = 0;
872			c1 = f1 = 0;
873
874			if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
875				continue;
876			else {
877				f1 += 1;
878				c1 += 1;
879				if (i + 1 < *argc) {
880					char *arg2 = argv[i + 1];
881
882					if (strlen(arg2) > 1 &&
883					    arg2[0] == '-') {
884						int c2, f2;
885						char sopts2[128];
886
887						sopts2[0] = 0;
888						c2 = f2 = 0;
889
890						if (parse_pos_obs(arg2 + 1,
891						    &f2, &c2, sopts2) >= 0) {
892							if (c2 > 0)
893								f2 += 1;
894							sprintf(sopt, "-k%d.%d%s,%d.%d%s",
895							    f1, c1, sopts1, f2, c2, sopts2);
896							argv[i] = sort_strdup(sopt);
897							for (int j = i + 1; j + 1 < *argc; j++)
898								argv[j] = argv[j + 1];
899							*argc -= 1;
900							continue;
901						}
902					}
903				}
904				sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
905				argv[i] = sort_strdup(sopt);
906			}
907		}
908	}
909}
910
911/*
912 * Set random seed
913 */
914static void
915set_random_seed(void)
916{
917	if (need_random) {
918
919		if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) {
920			FILE* fseed;
921			MD5_CTX ctx;
922			char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE];
923			size_t sz = 0;
924
925			fseed = openfile(random_source, "r");
926			while (!feof(fseed)) {
927				int cr;
928
929				cr = fgetc(fseed);
930				if (cr == EOF)
931					break;
932
933				rsd[sz++] = (char) cr;
934
935				if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE)
936					break;
937			}
938
939			closefile(fseed, random_source);
940
941			MD5Init(&ctx);
942			MD5Update(&ctx, rsd, sz);
943
944			random_seed = MD5End(&ctx, NULL);
945			random_seed_size = strlen(random_seed);
946
947		} else {
948			MD5_CTX ctx;
949			char *b;
950
951			MD5Init(&ctx);
952			b = MD5File(random_source, NULL);
953			if (b == NULL)
954				err(2, NULL);
955
956			random_seed = b;
957			random_seed_size = strlen(b);
958		}
959
960		MD5Init(&md5_ctx);
961		if(random_seed_size>0) {
962			MD5Update(&md5_ctx, random_seed, random_seed_size);
963		}
964	}
965}
966
967/*
968 * Main function.
969 */
970int
971main(int argc, char **argv)
972{
973	char *outfile, *real_outfile;
974	int c, result;
975	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
976	    { false, false, false, false, false, false };
977
978	result = 0;
979	outfile = sort_strdup("-");
980	real_outfile = NULL;
981
982	struct sort_mods *sm = &default_sort_mods_object;
983
984	init_tmp_files();
985
986	set_signal_handler();
987
988	set_hw_params();
989	set_locale();
990	set_tmpdir();
991	set_sort_opts();
992
993	fix_obsolete_keys(&argc, argv);
994
995	while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
996	    != -1)) {
997
998		check_mutually_exclusive_flags(c, mef_flags);
999
1000		if (!set_sort_modifier(sm, c)) {
1001
1002			switch (c) {
1003			case 'c':
1004				sort_opts_vals.cflag = true;
1005				if (optarg) {
1006					if (!strcmp(optarg, "diagnose-first"))
1007						;
1008					else if (!strcmp(optarg, "silent") ||
1009					    !strcmp(optarg, "quiet"))
1010						sort_opts_vals.csilentflag = true;
1011					else if (*optarg)
1012						unknown(optarg);
1013				}
1014				break;
1015			case 'C':
1016				sort_opts_vals.cflag = true;
1017				sort_opts_vals.csilentflag = true;
1018				break;
1019			case 'k':
1020			{
1021				sort_opts_vals.complex_sort = true;
1022				sort_opts_vals.kflag = true;
1023
1024				keys_num++;
1025				keys = sort_realloc(keys, keys_num *
1026				    sizeof(struct key_specs));
1027				memset(&(keys[keys_num - 1]), 0,
1028				    sizeof(struct key_specs));
1029
1030				if (parse_k(optarg, &(keys[keys_num - 1]))
1031				    < 0) {
1032					errc(2, EINVAL, "-k %s", optarg);
1033				}
1034
1035				break;
1036			}
1037			case 'm':
1038				sort_opts_vals.mflag = true;
1039				break;
1040			case 'o':
1041				outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1042				strcpy(outfile, optarg);
1043				break;
1044			case 's':
1045				sort_opts_vals.sflag = true;
1046				break;
1047			case 'S':
1048				available_free_memory =
1049				    parse_memory_buffer_value(optarg);
1050				break;
1051			case 'T':
1052				tmpdir = sort_strdup(optarg);
1053				break;
1054			case 't':
1055				while (strlen(optarg) > 1) {
1056					if (optarg[0] != '\\') {
1057						errc(2, EINVAL, "%s", optarg);
1058					}
1059					optarg += 1;
1060					if (*optarg == '0') {
1061						*optarg = 0;
1062						break;
1063					}
1064				}
1065				sort_opts_vals.tflag = true;
1066				sort_opts_vals.field_sep = btowc(optarg[0]);
1067				if (sort_opts_vals.field_sep == WEOF) {
1068					errno = EINVAL;
1069					err(2, NULL);
1070				}
1071				if (!gnusort_numeric_compatibility) {
1072					if (symbol_decimal_point == sort_opts_vals.field_sep)
1073						symbol_decimal_point = WEOF;
1074					if (symbol_thousands_sep == sort_opts_vals.field_sep)
1075						symbol_thousands_sep = WEOF;
1076					if (symbol_negative_sign == sort_opts_vals.field_sep)
1077						symbol_negative_sign = WEOF;
1078					if (symbol_positive_sign == sort_opts_vals.field_sep)
1079						symbol_positive_sign = WEOF;
1080				}
1081				break;
1082			case 'u':
1083				sort_opts_vals.uflag = true;
1084				/* stable sort for the correct unique val */
1085				sort_opts_vals.sflag = true;
1086				break;
1087			case 'z':
1088				sort_opts_vals.zflag = true;
1089				break;
1090			case SORT_OPT:
1091				if (optarg) {
1092					if (!strcmp(optarg, "general-numeric"))
1093						set_sort_modifier(sm, 'g');
1094					else if (!strcmp(optarg, "human-numeric"))
1095						set_sort_modifier(sm, 'h');
1096					else if (!strcmp(optarg, "numeric"))
1097						set_sort_modifier(sm, 'n');
1098					else if (!strcmp(optarg, "month"))
1099						set_sort_modifier(sm, 'M');
1100					else if (!strcmp(optarg, "random"))
1101						set_sort_modifier(sm, 'R');
1102					else
1103						unknown(optarg);
1104				}
1105				break;
1106#if defined(SORT_THREADS)
1107			case PARALLEL_OPT:
1108				nthreads = (size_t)(atoi(optarg));
1109				if (nthreads < 1)
1110					nthreads = 1;
1111				if (nthreads > 1024)
1112					nthreads = 1024;
1113				break;
1114#endif
1115			case QSORT_OPT:
1116				sort_opts_vals.sort_method = SORT_QSORT;
1117				break;
1118			case MERGESORT_OPT:
1119				sort_opts_vals.sort_method = SORT_MERGESORT;
1120				break;
1121			case MMAP_OPT:
1122				use_mmap = true;
1123				break;
1124			case HEAPSORT_OPT:
1125				sort_opts_vals.sort_method = SORT_HEAPSORT;
1126				break;
1127			case RADIXSORT_OPT:
1128				sort_opts_vals.sort_method = SORT_RADIXSORT;
1129				break;
1130			case RANDOMSOURCE_OPT:
1131				random_source = strdup(optarg);
1132				break;
1133			case COMPRESSPROGRAM_OPT:
1134				compress_program = strdup(optarg);
1135				break;
1136			case FF_OPT:
1137				read_fns_from_file0(optarg);
1138				break;
1139			case BS_OPT:
1140			{
1141				errno = 0;
1142				long mof = strtol(optarg, NULL, 10);
1143				if (errno != 0)
1144					err(2, "--batch-size");
1145				if (mof >= 2)
1146					max_open_files = (size_t) mof + 1;
1147			}
1148				break;
1149			case VERSION_OPT:
1150				printf("%s\n", VERSION);
1151				exit(EXIT_SUCCESS);
1152				/* NOTREACHED */
1153				break;
1154			case DEBUG_OPT:
1155				debug_sort = true;
1156				break;
1157			case HELP_OPT:
1158				usage(false);
1159				/* NOTREACHED */
1160				break;
1161			default:
1162				usage(true);
1163				/* NOTREACHED */
1164			}
1165		}
1166	}
1167
1168	argc -= optind;
1169	argv += optind;
1170
1171#ifndef WITHOUT_NLS
1172	catalog = catopen("sort", NL_CAT_LOCALE);
1173#endif
1174
1175	if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1176		errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1177
1178#ifndef WITHOUT_NLS
1179	catclose(catalog);
1180#endif
1181
1182	if (keys_num == 0) {
1183		keys_num = 1;
1184		keys = sort_realloc(keys, sizeof(struct key_specs));
1185		memset(&(keys[0]), 0, sizeof(struct key_specs));
1186		keys[0].c1 = 1;
1187		keys[0].pos1b = default_sort_mods->bflag;
1188		keys[0].pos2b = default_sort_mods->bflag;
1189		memcpy(&(keys[0].sm), default_sort_mods,
1190		    sizeof(struct sort_mods));
1191	}
1192
1193	for (size_t i = 0; i < keys_num; i++) {
1194		struct key_specs *ks;
1195
1196		ks = &(keys[i]);
1197
1198		if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1199		    !(ks->pos2b)) {
1200			ks->pos1b = sm->bflag;
1201			ks->pos2b = sm->bflag;
1202			memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1203		}
1204
1205		ks->sm.func = get_sort_func(&(ks->sm));
1206	}
1207
1208	if (argv_from_file0) {
1209		argc = argc_from_file0;
1210		argv = argv_from_file0;
1211	}
1212
1213	if (debug_sort) {
1214		printf("Memory to be used for sorting: %llu\n",available_free_memory);
1215#if defined(SORT_THREADS)
1216		printf("Number of CPUs: %d\n",(int)ncpu);
1217		nthreads = 1;
1218#endif
1219		printf("Using collate rules of %s locale\n",
1220		    setlocale(LC_COLLATE, NULL));
1221		if (byte_sort)
1222			printf("Byte sort is used\n");
1223		if (print_symbols_on_debug) {
1224			printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1225			if (symbol_thousands_sep)
1226				printf("Thousands separator: <%lc>\n",
1227				    symbol_thousands_sep);
1228			printf("Positive sign: <%lc>\n", symbol_positive_sign);
1229			printf("Negative sign: <%lc>\n", symbol_negative_sign);
1230		}
1231	}
1232
1233	set_random_seed();
1234
1235	/* Case when the outfile equals one of the input files: */
1236	if (strcmp(outfile, "-")) {
1237
1238		for(int i = 0; i < argc; ++i) {
1239			if (strcmp(argv[i], outfile) == 0) {
1240				real_outfile = sort_strdup(outfile);
1241				for(;;) {
1242					char* tmp = sort_malloc(strlen(outfile) +
1243					    strlen(".tmp") + 1);
1244
1245					strcpy(tmp, outfile);
1246					strcpy(tmp + strlen(tmp), ".tmp");
1247					sort_free(outfile);
1248					outfile = tmp;
1249					if (access(outfile, F_OK) < 0)
1250						break;
1251				}
1252				tmp_file_atexit(outfile);
1253			}
1254		}
1255	}
1256
1257#if defined(SORT_THREADS)
1258	if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1259		nthreads = 1;
1260#endif
1261
1262	if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1263		struct file_list fl;
1264		struct sort_list list;
1265
1266		sort_list_init(&list);
1267		file_list_init(&fl, true);
1268
1269		if (argc < 1)
1270			procfile("-", &list, &fl);
1271		else {
1272			while (argc > 0) {
1273				procfile(*argv, &list, &fl);
1274				--argc;
1275				++argv;
1276			}
1277		}
1278
1279		if (fl.count < 1)
1280			sort_list_to_file(&list, outfile);
1281		else {
1282			if (list.count > 0) {
1283				char *flast = new_tmp_file_name();
1284
1285				sort_list_to_file(&list, flast);
1286				file_list_add(&fl, flast, false);
1287			}
1288			merge_files(&fl, outfile);
1289		}
1290
1291		file_list_clean(&fl);
1292
1293		/*
1294		 * We are about to exit the program, so we can ignore
1295		 * the clean-up for speed
1296		 *
1297		 * sort_list_clean(&list);
1298		 */
1299
1300	} else if (sort_opts_vals.cflag) {
1301		result = (argc == 0) ? (check("-")) : (check(*argv));
1302	} else if (sort_opts_vals.mflag) {
1303		struct file_list fl;
1304
1305		file_list_init(&fl, false);
1306		file_list_populate(&fl, argc, argv, true);
1307		merge_files(&fl, outfile);
1308		file_list_clean(&fl);
1309	}
1310
1311	if (real_outfile) {
1312		unlink(real_outfile);
1313		if (rename(outfile, real_outfile) < 0)
1314			err(2, NULL);
1315		sort_free(real_outfile);
1316	}
1317
1318	sort_free(outfile);
1319
1320	return (result);
1321}
1322