sort.c revision 309862
1/*-
2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/10/usr.bin/sort/sort.c 309862 2016-12-12 00:47:12Z delphij $");
30
31#include <sys/stat.h>
32#include <sys/sysctl.h>
33#include <sys/types.h>
34
35#include <err.h>
36#include <errno.h>
37#include <getopt.h>
38#include <limits.h>
39#include <locale.h>
40#include <md5.h>
41#include <regex.h>
42#include <signal.h>
43#include <stdbool.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <unistd.h>
48#include <wchar.h>
49#include <wctype.h>
50
51#include "coll.h"
52#include "file.h"
53#include "sort.h"
54
55#ifndef WITHOUT_NLS
56#include <nl_types.h>
57nl_catd catalog;
58#endif
59
60#define	OPTIONS	"bcCdfghik:Mmno:RrsS:t:T:uVz"
61
62#define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random")
63#define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024)
64
65static bool need_random;
66static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE;
67static const void *random_seed;
68static size_t random_seed_size;
69
70MD5_CTX md5_ctx;
71
72/*
73 * Default messages to use when NLS is disabled or no catalogue
74 * is found.
75 */
76const char *nlsstr[] = { "",
77/* 1*/"mutually exclusive flags",
78/* 2*/"extra argument not allowed with -c",
79/* 3*/"Unknown feature",
80/* 4*/"Wrong memory buffer specification",
81/* 5*/"0 field in key specs",
82/* 6*/"0 column in key specs",
83/* 7*/"Wrong file mode",
84/* 8*/"Cannot open file for reading",
85/* 9*/"Radix sort cannot be used with these sort options",
86/*10*/"The chosen sort method cannot be used with stable and/or unique sort",
87/*11*/"Invalid key position",
88/*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
89      "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
90      "[-o outfile] [--batch-size size] [--files0-from file] "
91      "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
92      "[--mmap] "
93#if defined(SORT_THREADS)
94      "[--parallel thread_no] "
95#endif
96      "[--human-numeric-sort] "
97      "[--version-sort] [--random-sort [--random-source file]] "
98      "[--compress-program program] [file ...]\n" };
99
100struct sort_opts sort_opts_vals;
101
102bool debug_sort;
103bool need_hint;
104
105#if defined(SORT_THREADS)
106unsigned int ncpu = 1;
107size_t nthreads = 1;
108#endif
109
110static bool gnusort_numeric_compatibility;
111
112static struct sort_mods default_sort_mods_object;
113struct sort_mods * const default_sort_mods = &default_sort_mods_object;
114
115static bool print_symbols_on_debug;
116
117/*
118 * Arguments from file (when file0-from option is used:
119 */
120static size_t argc_from_file0 = (size_t)-1;
121static char **argv_from_file0;
122
123/*
124 * Placeholder symbols for options which have no single-character equivalent
125 */
126enum
127{
128	SORT_OPT = CHAR_MAX + 1,
129	HELP_OPT,
130	FF_OPT,
131	BS_OPT,
132	VERSION_OPT,
133	DEBUG_OPT,
134#if defined(SORT_THREADS)
135	PARALLEL_OPT,
136#endif
137	RANDOMSOURCE_OPT,
138	COMPRESSPROGRAM_OPT,
139	QSORT_OPT,
140	MERGESORT_OPT,
141	HEAPSORT_OPT,
142	RADIXSORT_OPT,
143	MMAP_OPT
144};
145
146#define	NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
147static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
148
149static struct option long_options[] = {
150				{ "batch-size", required_argument, NULL, BS_OPT },
151				{ "buffer-size", required_argument, NULL, 'S' },
152				{ "check", optional_argument, NULL, 'c' },
153				{ "check=silent|quiet", optional_argument, NULL, 'C' },
154				{ "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
155				{ "debug", no_argument, NULL, DEBUG_OPT },
156				{ "dictionary-order", no_argument, NULL, 'd' },
157				{ "field-separator", required_argument, NULL, 't' },
158				{ "files0-from", required_argument, NULL, FF_OPT },
159				{ "general-numeric-sort", no_argument, NULL, 'g' },
160				{ "heapsort", no_argument, NULL, HEAPSORT_OPT },
161				{ "help",no_argument, NULL, HELP_OPT },
162				{ "human-numeric-sort", no_argument, NULL, 'h' },
163				{ "ignore-leading-blanks", no_argument, NULL, 'b' },
164				{ "ignore-case", no_argument, NULL, 'f' },
165				{ "ignore-nonprinting", no_argument, NULL, 'i' },
166				{ "key", required_argument, NULL, 'k' },
167				{ "merge", no_argument, NULL, 'm' },
168				{ "mergesort", no_argument, NULL, MERGESORT_OPT },
169				{ "mmap", no_argument, NULL, MMAP_OPT },
170				{ "month-sort", no_argument, NULL, 'M' },
171				{ "numeric-sort", no_argument, NULL, 'n' },
172				{ "output", required_argument, NULL, 'o' },
173#if defined(SORT_THREADS)
174				{ "parallel", required_argument, NULL, PARALLEL_OPT },
175#endif
176				{ "qsort", no_argument, NULL, QSORT_OPT },
177				{ "radixsort", no_argument, NULL, RADIXSORT_OPT },
178				{ "random-sort", no_argument, NULL, 'R' },
179				{ "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
180				{ "reverse", no_argument, NULL, 'r' },
181				{ "sort", required_argument, NULL, SORT_OPT },
182				{ "stable", no_argument, NULL, 's' },
183				{ "temporary-directory",required_argument, NULL, 'T' },
184				{ "unique", no_argument, NULL, 'u' },
185				{ "version", no_argument, NULL, VERSION_OPT },
186				{ "version-sort",no_argument, NULL, 'V' },
187				{ "zero-terminated", no_argument, NULL, 'z' },
188				{ NULL, no_argument, NULL, 0 }
189};
190
191void fix_obsolete_keys(int *argc, char **argv);
192
193/*
194 * Check where sort modifier is present
195 */
196static bool
197sort_modifier_empty(struct sort_mods *sm)
198{
199
200	if (sm == NULL)
201		return (true);
202	return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
203	    sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
204}
205
206/*
207 * Print out usage text.
208 */
209static void
210usage(bool opt_err)
211{
212	FILE *out;
213
214	out = opt_err ? stderr : stdout;
215
216	fprintf(out, getstr(12), getprogname());
217	if (opt_err)
218		exit(2);
219	exit(0);
220}
221
222/*
223 * Read input file names from a file (file0-from option).
224 */
225static void
226read_fns_from_file0(const char *fn)
227{
228	FILE *f;
229	char *line = NULL;
230	size_t linesize = 0;
231	ssize_t linelen;
232
233	if (fn == NULL)
234		return;
235
236	f = fopen(fn, "r");
237	if (f == NULL)
238		err(2, "%s", fn);
239
240	while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
241		if (*line != '\0') {
242			if (argc_from_file0 == (size_t) - 1)
243				argc_from_file0 = 0;
244			++argc_from_file0;
245			argv_from_file0 = sort_realloc(argv_from_file0,
246			    argc_from_file0 * sizeof(char *));
247			if (argv_from_file0 == NULL)
248				err(2, NULL);
249			argv_from_file0[argc_from_file0 - 1] = line;
250		} else {
251			free(line);
252		}
253		line = NULL;
254		linesize = 0;
255	}
256	if (ferror(f))
257		err(2, "%s: getdelim", fn);
258
259	closefile(f, fn);
260}
261
262/*
263 * Check how much RAM is available for the sort.
264 */
265static void
266set_hw_params(void)
267{
268	long pages, psize;
269
270#if defined(SORT_THREADS)
271	ncpu = 1;
272#endif
273
274	pages = sysconf(_SC_PHYS_PAGES);
275	if (pages < 1) {
276		perror("sysconf pages");
277		pages = 1;
278	}
279	psize = sysconf(_SC_PAGESIZE);
280	if (psize < 1) {
281		perror("sysconf psize");
282		psize = 4096;
283	}
284#if defined(SORT_THREADS)
285	ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
286	if (ncpu < 1)
287		ncpu = 1;
288	else if(ncpu > 32)
289		ncpu = 32;
290
291	nthreads = ncpu;
292#endif
293
294	free_memory = (unsigned long long) pages * (unsigned long long) psize;
295	available_free_memory = free_memory / 2;
296
297	if (available_free_memory < 1024)
298		available_free_memory = 1024;
299}
300
301/*
302 * Convert "plain" symbol to wide symbol, with default value.
303 */
304static void
305conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
306{
307
308	if (wc && c) {
309		int res;
310
311		res = mbtowc(wc, c, MB_CUR_MAX);
312		if (res < 1)
313			*wc = def;
314	}
315}
316
317/*
318 * Set current locale symbols.
319 */
320static void
321set_locale(void)
322{
323	struct lconv *lc;
324	const char *locale;
325
326	setlocale(LC_ALL, "");
327
328	lc = localeconv();
329
330	if (lc) {
331		/* obtain LC_NUMERIC info */
332		/* Convert to wide char form */
333		conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
334		    symbol_decimal_point);
335		conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
336		    symbol_thousands_sep);
337		conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
338		    symbol_positive_sign);
339		conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
340		    symbol_negative_sign);
341	}
342
343	if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
344		gnusort_numeric_compatibility = true;
345
346	locale = setlocale(LC_COLLATE, NULL);
347
348	if (locale) {
349		char *tmpl;
350		const char *cclocale;
351
352		tmpl = sort_strdup(locale);
353		cclocale = setlocale(LC_COLLATE, "C");
354		if (cclocale && !strcmp(cclocale, tmpl))
355			byte_sort = true;
356		else {
357			const char *pclocale;
358
359			pclocale = setlocale(LC_COLLATE, "POSIX");
360			if (pclocale && !strcmp(pclocale, tmpl))
361				byte_sort = true;
362		}
363		setlocale(LC_COLLATE, tmpl);
364		sort_free(tmpl);
365	}
366}
367
368/*
369 * Set directory temporary files.
370 */
371static void
372set_tmpdir(void)
373{
374	char *td;
375
376	td = getenv("TMPDIR");
377	if (td != NULL)
378		tmpdir = sort_strdup(td);
379}
380
381/*
382 * Parse -S option.
383 */
384static unsigned long long
385parse_memory_buffer_value(const char *value)
386{
387
388	if (value == NULL)
389		return (available_free_memory);
390	else {
391		char *endptr;
392		unsigned long long membuf;
393
394		endptr = NULL;
395		errno = 0;
396		membuf = strtoll(value, &endptr, 10);
397
398		if (errno != 0) {
399			warn("%s",getstr(4));
400			membuf = available_free_memory;
401		} else {
402			switch (*endptr){
403			case 'Y':
404				membuf *= 1024;
405				/* FALLTHROUGH */
406			case 'Z':
407				membuf *= 1024;
408				/* FALLTHROUGH */
409			case 'E':
410				membuf *= 1024;
411				/* FALLTHROUGH */
412			case 'P':
413				membuf *= 1024;
414				/* FALLTHROUGH */
415			case 'T':
416				membuf *= 1024;
417				/* FALLTHROUGH */
418			case 'G':
419				membuf *= 1024;
420				/* FALLTHROUGH */
421			case 'M':
422				membuf *= 1024;
423				/* FALLTHROUGH */
424			case '\0':
425			case 'K':
426				membuf *= 1024;
427				/* FALLTHROUGH */
428			case 'b':
429				break;
430			case '%':
431				membuf = (available_free_memory * membuf) /
432				    100;
433				break;
434			default:
435				warnc(EINVAL, "%s", optarg);
436				membuf = available_free_memory;
437			}
438		}
439		return (membuf);
440	}
441}
442
443/*
444 * Signal handler that clears the temporary files.
445 */
446static void
447sig_handler(int sig __unused, siginfo_t *siginfo __unused,
448    void *context __unused)
449{
450
451	clear_tmp_files();
452	exit(-1);
453}
454
455/*
456 * Set signal handler on panic signals.
457 */
458static void
459set_signal_handler(void)
460{
461	struct sigaction sa;
462
463	memset(&sa, 0, sizeof(sa));
464	sa.sa_sigaction = &sig_handler;
465	sa.sa_flags = SA_SIGINFO;
466
467	if (sigaction(SIGTERM, &sa, NULL) < 0) {
468		perror("sigaction");
469		return;
470	}
471	if (sigaction(SIGHUP, &sa, NULL) < 0) {
472		perror("sigaction");
473		return;
474	}
475	if (sigaction(SIGINT, &sa, NULL) < 0) {
476		perror("sigaction");
477		return;
478	}
479	if (sigaction(SIGQUIT, &sa, NULL) < 0) {
480		perror("sigaction");
481		return;
482	}
483	if (sigaction(SIGABRT, &sa, NULL) < 0) {
484		perror("sigaction");
485		return;
486	}
487	if (sigaction(SIGBUS, &sa, NULL) < 0) {
488		perror("sigaction");
489		return;
490	}
491	if (sigaction(SIGSEGV, &sa, NULL) < 0) {
492		perror("sigaction");
493		return;
494	}
495	if (sigaction(SIGUSR1, &sa, NULL) < 0) {
496		perror("sigaction");
497		return;
498	}
499	if (sigaction(SIGUSR2, &sa, NULL) < 0) {
500		perror("sigaction");
501		return;
502	}
503}
504
505/*
506 * Print "unknown" message and exit with status 2.
507 */
508static void
509unknown(const char *what)
510{
511
512	errx(2, "%s: %s", getstr(3), what);
513}
514
515/*
516 * Check whether contradictory input options are used.
517 */
518static void
519check_mutually_exclusive_flags(char c, bool *mef_flags)
520{
521	int fo_index, mec;
522	bool found_others, found_this;
523
524	found_others = found_this =false;
525	fo_index = 0;
526
527	for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
528		mec = mutually_exclusive_flags[i];
529
530		if (mec != c) {
531			if (mef_flags[i]) {
532				if (found_this)
533					errx(1, "%c:%c: %s", c, mec, getstr(1));
534				found_others = true;
535				fo_index = i;
536			}
537		} else {
538			if (found_others)
539				errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
540			mef_flags[i] = true;
541			found_this = true;
542		}
543	}
544}
545
546/*
547 * Initialise sort opts data.
548 */
549static void
550set_sort_opts(void)
551{
552
553	memset(&default_sort_mods_object, 0,
554	    sizeof(default_sort_mods_object));
555	memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
556	default_sort_mods_object.func =
557	    get_sort_func(&default_sort_mods_object);
558}
559
560/*
561 * Set a sort modifier on a sort modifiers object.
562 */
563static bool
564set_sort_modifier(struct sort_mods *sm, int c)
565{
566
567	if (sm) {
568		switch (c){
569		case 'b':
570			sm->bflag = true;
571			break;
572		case 'd':
573			sm->dflag = true;
574			break;
575		case 'f':
576			sm->fflag = true;
577			break;
578		case 'g':
579			sm->gflag = true;
580			need_hint = true;
581			break;
582		case 'i':
583			sm->iflag = true;
584			break;
585		case 'R':
586			sm->Rflag = true;
587			need_random = true;
588			break;
589		case 'M':
590			initialise_months();
591			sm->Mflag = true;
592			need_hint = true;
593			break;
594		case 'n':
595			sm->nflag = true;
596			need_hint = true;
597			print_symbols_on_debug = true;
598			break;
599		case 'r':
600			sm->rflag = true;
601			break;
602		case 'V':
603			sm->Vflag = true;
604			break;
605		case 'h':
606			sm->hflag = true;
607			need_hint = true;
608			print_symbols_on_debug = true;
609			break;
610		default:
611			return false;
612		}
613		sort_opts_vals.complex_sort = true;
614		sm->func = get_sort_func(sm);
615	}
616	return (true);
617}
618
619/*
620 * Parse POS in -k option.
621 */
622static int
623parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
624{
625	regmatch_t pmatch[4];
626	regex_t re;
627	char *c, *f;
628	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
629	size_t len, nmatch;
630	int ret;
631
632	ret = -1;
633	nmatch = 4;
634	c = f = NULL;
635
636	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
637		return (-1);
638
639	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
640		goto end;
641
642	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
643		goto end;
644
645	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
646		goto end;
647
648	len = pmatch[1].rm_eo - pmatch[1].rm_so;
649	f = sort_malloc((len + 1) * sizeof(char));
650
651	strncpy(f, s + pmatch[1].rm_so, len);
652	f[len] = '\0';
653
654	if (second) {
655		errno = 0;
656		ks->f2 = (size_t) strtoul(f, NULL, 10);
657		if (errno != 0)
658			err(2, "-k");
659		if (ks->f2 == 0) {
660			warn("%s",getstr(5));
661			goto end;
662		}
663	} else {
664		errno = 0;
665		ks->f1 = (size_t) strtoul(f, NULL, 10);
666		if (errno != 0)
667			err(2, "-k");
668		if (ks->f1 == 0) {
669			warn("%s",getstr(5));
670			goto end;
671		}
672	}
673
674	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
675		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
676		c = sort_malloc((len + 1) * sizeof(char));
677
678		strncpy(c, s + pmatch[2].rm_so + 1, len);
679		c[len] = '\0';
680
681		if (second) {
682			errno = 0;
683			ks->c2 = (size_t) strtoul(c, NULL, 10);
684			if (errno != 0)
685				err(2, "-k");
686		} else {
687			errno = 0;
688			ks->c1 = (size_t) strtoul(c, NULL, 10);
689			if (errno != 0)
690				err(2, "-k");
691			if (ks->c1 == 0) {
692				warn("%s",getstr(6));
693				goto end;
694			}
695		}
696	} else {
697		if (second)
698			ks->c2 = 0;
699		else
700			ks->c1 = 1;
701	}
702
703	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
704		regoff_t i = 0;
705
706		for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
707			check_mutually_exclusive_flags(s[i], mef_flags);
708			if (s[i] == 'b') {
709				if (second)
710					ks->pos2b = true;
711				else
712					ks->pos1b = true;
713			} else if (!set_sort_modifier(&(ks->sm), s[i]))
714				goto end;
715		}
716	}
717
718	ret = 0;
719
720end:
721
722	if (c)
723		sort_free(c);
724	if (f)
725		sort_free(f);
726	regfree(&re);
727
728	return (ret);
729}
730
731/*
732 * Parse -k option value.
733 */
734static int
735parse_k(const char *s, struct key_specs *ks)
736{
737	int ret = -1;
738	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
739	    { false, false, false, false, false, false };
740
741	if (s && *s) {
742		char *sptr;
743
744		sptr = strchr(s, ',');
745		if (sptr) {
746			size_t size1;
747			char *pos1, *pos2;
748
749			size1 = sptr - s;
750
751			if (size1 < 1)
752				return (-1);
753			pos1 = sort_malloc((size1 + 1) * sizeof(char));
754
755			strncpy(pos1, s, size1);
756			pos1[size1] = '\0';
757
758			ret = parse_pos(pos1, ks, mef_flags, false);
759
760			sort_free(pos1);
761			if (ret < 0)
762				return (ret);
763
764			pos2 = sort_strdup(sptr + 1);
765			ret = parse_pos(pos2, ks, mef_flags, true);
766			sort_free(pos2);
767		} else
768			ret = parse_pos(s, ks, mef_flags, false);
769	}
770
771	return (ret);
772}
773
774/*
775 * Parse POS in +POS -POS option.
776 */
777static int
778parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
779{
780	regex_t re;
781	regmatch_t pmatch[4];
782	char *c, *f;
783	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
784	int ret;
785	size_t len, nmatch;
786
787	ret = -1;
788	nmatch = 4;
789	c = f = NULL;
790	*nc = *nf = 0;
791
792	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
793		return (-1);
794
795	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
796		goto end;
797
798	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
799		goto end;
800
801	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
802		goto end;
803
804	len = pmatch[1].rm_eo - pmatch[1].rm_so;
805	f = sort_malloc((len + 1) * sizeof(char));
806
807	strncpy(f, s + pmatch[1].rm_so, len);
808	f[len] = '\0';
809
810	errno = 0;
811	*nf = (size_t) strtoul(f, NULL, 10);
812	if (errno != 0)
813		errx(2, "%s", getstr(11));
814
815	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
816		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
817		c = sort_malloc((len + 1) * sizeof(char));
818
819		strncpy(c, s + pmatch[2].rm_so + 1, len);
820		c[len] = '\0';
821
822		errno = 0;
823		*nc = (size_t) strtoul(c, NULL, 10);
824		if (errno != 0)
825			errx(2, "%s", getstr(11));
826	}
827
828	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
829
830		len = pmatch[3].rm_eo - pmatch[3].rm_so;
831
832		strncpy(sopts, s + pmatch[3].rm_so, len);
833		sopts[len] = '\0';
834	}
835
836	ret = 0;
837
838end:
839	if (c)
840		sort_free(c);
841	if (f)
842		sort_free(f);
843	regfree(&re);
844
845	return (ret);
846}
847
848/*
849 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
850 */
851void
852fix_obsolete_keys(int *argc, char **argv)
853{
854	char sopt[129];
855
856	for (int i = 1; i < *argc; i++) {
857		char *arg1;
858
859		arg1 = argv[i];
860
861		if (strlen(arg1) > 1 && arg1[0] == '+') {
862			int c1, f1;
863			char sopts1[128];
864
865			sopts1[0] = 0;
866			c1 = f1 = 0;
867
868			if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
869				continue;
870			else {
871				f1 += 1;
872				c1 += 1;
873				if (i + 1 < *argc) {
874					char *arg2 = argv[i + 1];
875
876					if (strlen(arg2) > 1 &&
877					    arg2[0] == '-') {
878						int c2, f2;
879						char sopts2[128];
880
881						sopts2[0] = 0;
882						c2 = f2 = 0;
883
884						if (parse_pos_obs(arg2 + 1,
885						    &f2, &c2, sopts2) >= 0) {
886							if (c2 > 0)
887								f2 += 1;
888							sprintf(sopt, "-k%d.%d%s,%d.%d%s",
889							    f1, c1, sopts1, f2, c2, sopts2);
890							argv[i] = sort_strdup(sopt);
891							for (int j = i + 1; j + 1 < *argc; j++)
892								argv[j] = argv[j + 1];
893							*argc -= 1;
894							continue;
895						}
896					}
897				}
898				sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
899				argv[i] = sort_strdup(sopt);
900			}
901		}
902	}
903}
904
905/*
906 * Set random seed
907 */
908static void
909set_random_seed(void)
910{
911	if (need_random) {
912
913		if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) {
914			FILE* fseed;
915			MD5_CTX ctx;
916			char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE];
917			size_t sz = 0;
918
919			fseed = openfile(random_source, "r");
920			while (!feof(fseed)) {
921				int cr;
922
923				cr = fgetc(fseed);
924				if (cr == EOF)
925					break;
926
927				rsd[sz++] = (char) cr;
928
929				if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE)
930					break;
931			}
932
933			closefile(fseed, random_source);
934
935			MD5Init(&ctx);
936			MD5Update(&ctx, rsd, sz);
937
938			random_seed = MD5End(&ctx, NULL);
939			random_seed_size = strlen(random_seed);
940
941		} else {
942			MD5_CTX ctx;
943			char *b;
944
945			MD5Init(&ctx);
946			b = MD5File(random_source, NULL);
947			if (b == NULL)
948				err(2, NULL);
949
950			random_seed = b;
951			random_seed_size = strlen(b);
952		}
953
954		MD5Init(&md5_ctx);
955		if(random_seed_size>0) {
956			MD5Update(&md5_ctx, random_seed, random_seed_size);
957		}
958	}
959}
960
961/*
962 * Main function.
963 */
964int
965main(int argc, char **argv)
966{
967	char *outfile, *real_outfile;
968	int c, result;
969	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
970	    { false, false, false, false, false, false };
971
972	result = 0;
973	outfile = sort_strdup("-");
974	real_outfile = NULL;
975
976	struct sort_mods *sm = &default_sort_mods_object;
977
978	init_tmp_files();
979
980	set_signal_handler();
981
982	set_hw_params();
983	set_locale();
984	set_tmpdir();
985	set_sort_opts();
986
987	fix_obsolete_keys(&argc, argv);
988
989	while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
990	    != -1)) {
991
992		check_mutually_exclusive_flags(c, mef_flags);
993
994		if (!set_sort_modifier(sm, c)) {
995
996			switch (c) {
997			case 'c':
998				sort_opts_vals.cflag = true;
999				if (optarg) {
1000					if (!strcmp(optarg, "diagnose-first"))
1001						;
1002					else if (!strcmp(optarg, "silent") ||
1003					    !strcmp(optarg, "quiet"))
1004						sort_opts_vals.csilentflag = true;
1005					else if (*optarg)
1006						unknown(optarg);
1007				}
1008				break;
1009			case 'C':
1010				sort_opts_vals.cflag = true;
1011				sort_opts_vals.csilentflag = true;
1012				break;
1013			case 'k':
1014			{
1015				sort_opts_vals.complex_sort = true;
1016				sort_opts_vals.kflag = true;
1017
1018				keys_num++;
1019				keys = sort_realloc(keys, keys_num *
1020				    sizeof(struct key_specs));
1021				memset(&(keys[keys_num - 1]), 0,
1022				    sizeof(struct key_specs));
1023
1024				if (parse_k(optarg, &(keys[keys_num - 1]))
1025				    < 0) {
1026					errc(2, EINVAL, "-k %s", optarg);
1027				}
1028
1029				break;
1030			}
1031			case 'm':
1032				sort_opts_vals.mflag = true;
1033				break;
1034			case 'o':
1035				outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1036				strcpy(outfile, optarg);
1037				break;
1038			case 's':
1039				sort_opts_vals.sflag = true;
1040				break;
1041			case 'S':
1042				available_free_memory =
1043				    parse_memory_buffer_value(optarg);
1044				break;
1045			case 'T':
1046				tmpdir = sort_strdup(optarg);
1047				break;
1048			case 't':
1049				while (strlen(optarg) > 1) {
1050					if (optarg[0] != '\\') {
1051						errc(2, EINVAL, "%s", optarg);
1052					}
1053					optarg += 1;
1054					if (*optarg == '0') {
1055						*optarg = 0;
1056						break;
1057					}
1058				}
1059				sort_opts_vals.tflag = true;
1060				sort_opts_vals.field_sep = btowc(optarg[0]);
1061				if (sort_opts_vals.field_sep == WEOF) {
1062					errno = EINVAL;
1063					err(2, NULL);
1064				}
1065				if (!gnusort_numeric_compatibility) {
1066					if (symbol_decimal_point == sort_opts_vals.field_sep)
1067						symbol_decimal_point = WEOF;
1068					if (symbol_thousands_sep == sort_opts_vals.field_sep)
1069						symbol_thousands_sep = WEOF;
1070					if (symbol_negative_sign == sort_opts_vals.field_sep)
1071						symbol_negative_sign = WEOF;
1072					if (symbol_positive_sign == sort_opts_vals.field_sep)
1073						symbol_positive_sign = WEOF;
1074				}
1075				break;
1076			case 'u':
1077				sort_opts_vals.uflag = true;
1078				/* stable sort for the correct unique val */
1079				sort_opts_vals.sflag = true;
1080				break;
1081			case 'z':
1082				sort_opts_vals.zflag = true;
1083				break;
1084			case SORT_OPT:
1085				if (optarg) {
1086					if (!strcmp(optarg, "general-numeric"))
1087						set_sort_modifier(sm, 'g');
1088					else if (!strcmp(optarg, "human-numeric"))
1089						set_sort_modifier(sm, 'h');
1090					else if (!strcmp(optarg, "numeric"))
1091						set_sort_modifier(sm, 'n');
1092					else if (!strcmp(optarg, "month"))
1093						set_sort_modifier(sm, 'M');
1094					else if (!strcmp(optarg, "random"))
1095						set_sort_modifier(sm, 'R');
1096					else
1097						unknown(optarg);
1098				}
1099				break;
1100#if defined(SORT_THREADS)
1101			case PARALLEL_OPT:
1102				nthreads = (size_t)(atoi(optarg));
1103				if (nthreads < 1)
1104					nthreads = 1;
1105				if (nthreads > 1024)
1106					nthreads = 1024;
1107				break;
1108#endif
1109			case QSORT_OPT:
1110				sort_opts_vals.sort_method = SORT_QSORT;
1111				break;
1112			case MERGESORT_OPT:
1113				sort_opts_vals.sort_method = SORT_MERGESORT;
1114				break;
1115			case MMAP_OPT:
1116				use_mmap = true;
1117				break;
1118			case HEAPSORT_OPT:
1119				sort_opts_vals.sort_method = SORT_HEAPSORT;
1120				break;
1121			case RADIXSORT_OPT:
1122				sort_opts_vals.sort_method = SORT_RADIXSORT;
1123				break;
1124			case RANDOMSOURCE_OPT:
1125				random_source = strdup(optarg);
1126				break;
1127			case COMPRESSPROGRAM_OPT:
1128				compress_program = strdup(optarg);
1129				break;
1130			case FF_OPT:
1131				read_fns_from_file0(optarg);
1132				break;
1133			case BS_OPT:
1134			{
1135				errno = 0;
1136				long mof = strtol(optarg, NULL, 10);
1137				if (errno != 0)
1138					err(2, "--batch-size");
1139				if (mof >= 2)
1140					max_open_files = (size_t) mof + 1;
1141			}
1142				break;
1143			case VERSION_OPT:
1144				printf("%s\n", VERSION);
1145				exit(EXIT_SUCCESS);
1146				/* NOTREACHED */
1147				break;
1148			case DEBUG_OPT:
1149				debug_sort = true;
1150				break;
1151			case HELP_OPT:
1152				usage(false);
1153				/* NOTREACHED */
1154				break;
1155			default:
1156				usage(true);
1157				/* NOTREACHED */
1158			}
1159		}
1160	}
1161
1162	argc -= optind;
1163	argv += optind;
1164
1165#ifndef WITHOUT_NLS
1166	catalog = catopen("sort", NL_CAT_LOCALE);
1167#endif
1168
1169	if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1170		errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1171
1172#ifndef WITHOUT_NLS
1173	catclose(catalog);
1174#endif
1175
1176	if (keys_num == 0) {
1177		keys_num = 1;
1178		keys = sort_realloc(keys, sizeof(struct key_specs));
1179		memset(&(keys[0]), 0, sizeof(struct key_specs));
1180		keys[0].c1 = 1;
1181		keys[0].pos1b = default_sort_mods->bflag;
1182		keys[0].pos2b = default_sort_mods->bflag;
1183		memcpy(&(keys[0].sm), default_sort_mods,
1184		    sizeof(struct sort_mods));
1185	}
1186
1187	for (size_t i = 0; i < keys_num; i++) {
1188		struct key_specs *ks;
1189
1190		ks = &(keys[i]);
1191
1192		if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1193		    !(ks->pos2b)) {
1194			ks->pos1b = sm->bflag;
1195			ks->pos2b = sm->bflag;
1196			memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1197		}
1198
1199		ks->sm.func = get_sort_func(&(ks->sm));
1200	}
1201
1202	if (argv_from_file0) {
1203		argc = argc_from_file0;
1204		argv = argv_from_file0;
1205	}
1206
1207	if (debug_sort) {
1208		printf("Memory to be used for sorting: %llu\n",available_free_memory);
1209#if defined(SORT_THREADS)
1210		printf("Number of CPUs: %d\n",(int)ncpu);
1211		nthreads = 1;
1212#endif
1213		printf("Using collate rules of %s locale\n",
1214		    setlocale(LC_COLLATE, NULL));
1215		if (byte_sort)
1216			printf("Byte sort is used\n");
1217		if (print_symbols_on_debug) {
1218			printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1219			if (symbol_thousands_sep)
1220				printf("Thousands separator: <%lc>\n",
1221				    symbol_thousands_sep);
1222			printf("Positive sign: <%lc>\n", symbol_positive_sign);
1223			printf("Negative sign: <%lc>\n", symbol_negative_sign);
1224		}
1225	}
1226
1227	set_random_seed();
1228
1229	/* Case when the outfile equals one of the input files: */
1230	if (strcmp(outfile, "-")) {
1231
1232		for(int i = 0; i < argc; ++i) {
1233			if (strcmp(argv[i], outfile) == 0) {
1234				real_outfile = sort_strdup(outfile);
1235				for(;;) {
1236					char* tmp = sort_malloc(strlen(outfile) +
1237					    strlen(".tmp") + 1);
1238
1239					strcpy(tmp, outfile);
1240					strcpy(tmp + strlen(tmp), ".tmp");
1241					sort_free(outfile);
1242					outfile = tmp;
1243					if (access(outfile, F_OK) < 0)
1244						break;
1245				}
1246				tmp_file_atexit(outfile);
1247			}
1248		}
1249	}
1250
1251#if defined(SORT_THREADS)
1252	if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1253		nthreads = 1;
1254#endif
1255
1256	if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1257		struct file_list fl;
1258		struct sort_list list;
1259
1260		sort_list_init(&list);
1261		file_list_init(&fl, true);
1262
1263		if (argc < 1)
1264			procfile("-", &list, &fl);
1265		else {
1266			while (argc > 0) {
1267				procfile(*argv, &list, &fl);
1268				--argc;
1269				++argv;
1270			}
1271		}
1272
1273		if (fl.count < 1)
1274			sort_list_to_file(&list, outfile);
1275		else {
1276			if (list.count > 0) {
1277				char *flast = new_tmp_file_name();
1278
1279				sort_list_to_file(&list, flast);
1280				file_list_add(&fl, flast, false);
1281			}
1282			merge_files(&fl, outfile);
1283		}
1284
1285		file_list_clean(&fl);
1286
1287		/*
1288		 * We are about to exit the program, so we can ignore
1289		 * the clean-up for speed
1290		 *
1291		 * sort_list_clean(&list);
1292		 */
1293
1294	} else if (sort_opts_vals.cflag) {
1295		result = (argc == 0) ? (check("-")) : (check(*argv));
1296	} else if (sort_opts_vals.mflag) {
1297		struct file_list fl;
1298
1299		file_list_init(&fl, false);
1300		file_list_populate(&fl, argc, argv, true);
1301		merge_files(&fl, outfile);
1302		file_list_clean(&fl);
1303	}
1304
1305	if (real_outfile) {
1306		unlink(real_outfile);
1307		if (rename(outfile, real_outfile) < 0)
1308			err(2, NULL);
1309		sort_free(real_outfile);
1310	}
1311
1312	sort_free(outfile);
1313
1314	return (result);
1315}
1316