1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31#include <sys/stat.h>
32#include <sys/sysctl.h>
33#include <sys/types.h>
34
35#include <err.h>
36#include <errno.h>
37#include <fcntl.h>
38#include <getopt.h>
39#include <limits.h>
40#include <locale.h>
41#include <md5.h>
42#include <regex.h>
43#include <signal.h>
44#include <stdbool.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <string.h>
48#include <unistd.h>
49#include <wchar.h>
50#include <wctype.h>
51
52#include "coll.h"
53#include "file.h"
54#include "sort.h"
55
56#define	OPTIONS	"bcCdfghik:Mmno:RrsS:t:T:uVz"
57
58static bool need_random;
59
60MD5_CTX md5_ctx;
61
62/*
63 * Default messages to use
64 */
65const char *nlsstr[] = { "",
66/* 1*/"mutually exclusive flags",
67/* 2*/"extra argument not allowed with -c",
68/* 3*/"Unknown feature",
69/* 4*/"Wrong memory buffer specification",
70/* 5*/"0 field in key specs",
71/* 6*/"0 column in key specs",
72/* 7*/"Wrong file mode",
73/* 8*/"Cannot open file for reading",
74/* 9*/"Radix sort cannot be used with these sort options",
75/*10*/"The chosen sort method cannot be used with stable and/or unique sort",
76/*11*/"Invalid key position",
77/*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
78      "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
79      "[-o outfile] [--batch-size size] [--files0-from file] "
80      "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
81      "[--mmap] "
82#if defined(SORT_THREADS)
83      "[--parallel thread_no] "
84#endif
85      "[--human-numeric-sort] "
86      "[--version-sort] [--random-sort [--random-source file]] "
87      "[--compress-program program] [file ...]\n" };
88
89struct sort_opts sort_opts_vals;
90
91bool debug_sort;
92bool need_hint;
93
94size_t mb_cur_max;
95
96#if defined(SORT_THREADS)
97unsigned int ncpu = 1;
98size_t nthreads = 1;
99#endif
100
101static bool gnusort_numeric_compatibility;
102
103static struct sort_mods default_sort_mods_object;
104struct sort_mods * const default_sort_mods = &default_sort_mods_object;
105
106static bool print_symbols_on_debug;
107
108/*
109 * Arguments from file (when file0-from option is used:
110 */
111static size_t argc_from_file0 = (size_t)-1;
112static char **argv_from_file0;
113
114/*
115 * Placeholder symbols for options which have no single-character equivalent
116 */
117enum
118{
119	SORT_OPT = CHAR_MAX + 1,
120	HELP_OPT,
121	FF_OPT,
122	BS_OPT,
123	VERSION_OPT,
124	DEBUG_OPT,
125#if defined(SORT_THREADS)
126	PARALLEL_OPT,
127#endif
128	RANDOMSOURCE_OPT,
129	COMPRESSPROGRAM_OPT,
130	QSORT_OPT,
131	MERGESORT_OPT,
132	HEAPSORT_OPT,
133	RADIXSORT_OPT,
134	MMAP_OPT
135};
136
137#define	NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
138static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
139
140static struct option long_options[] = {
141				{ "batch-size", required_argument, NULL, BS_OPT },
142				{ "buffer-size", required_argument, NULL, 'S' },
143				{ "check", optional_argument, NULL, 'c' },
144				{ "check=silent|quiet", optional_argument, NULL, 'C' },
145				{ "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
146				{ "debug", no_argument, NULL, DEBUG_OPT },
147				{ "dictionary-order", no_argument, NULL, 'd' },
148				{ "field-separator", required_argument, NULL, 't' },
149				{ "files0-from", required_argument, NULL, FF_OPT },
150				{ "general-numeric-sort", no_argument, NULL, 'g' },
151				{ "heapsort", no_argument, NULL, HEAPSORT_OPT },
152				{ "help",no_argument, NULL, HELP_OPT },
153				{ "human-numeric-sort", no_argument, NULL, 'h' },
154				{ "ignore-leading-blanks", no_argument, NULL, 'b' },
155				{ "ignore-case", no_argument, NULL, 'f' },
156				{ "ignore-nonprinting", no_argument, NULL, 'i' },
157				{ "key", required_argument, NULL, 'k' },
158				{ "merge", no_argument, NULL, 'm' },
159				{ "mergesort", no_argument, NULL, MERGESORT_OPT },
160				{ "mmap", no_argument, NULL, MMAP_OPT },
161				{ "month-sort", no_argument, NULL, 'M' },
162				{ "numeric-sort", no_argument, NULL, 'n' },
163				{ "output", required_argument, NULL, 'o' },
164#if defined(SORT_THREADS)
165				{ "parallel", required_argument, NULL, PARALLEL_OPT },
166#endif
167				{ "qsort", no_argument, NULL, QSORT_OPT },
168				{ "radixsort", no_argument, NULL, RADIXSORT_OPT },
169				{ "random-sort", no_argument, NULL, 'R' },
170				{ "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
171				{ "reverse", no_argument, NULL, 'r' },
172				{ "sort", required_argument, NULL, SORT_OPT },
173				{ "stable", no_argument, NULL, 's' },
174				{ "temporary-directory",required_argument, NULL, 'T' },
175				{ "unique", no_argument, NULL, 'u' },
176				{ "version", no_argument, NULL, VERSION_OPT },
177				{ "version-sort",no_argument, NULL, 'V' },
178				{ "zero-terminated", no_argument, NULL, 'z' },
179				{ NULL, no_argument, NULL, 0 }
180};
181
182void fix_obsolete_keys(int *argc, char **argv);
183
184/*
185 * Check where sort modifier is present
186 */
187static bool
188sort_modifier_empty(struct sort_mods *sm)
189{
190
191	if (sm == NULL)
192		return (true);
193	return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
194	    sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
195}
196
197/*
198 * Print out usage text.
199 */
200static void
201usage(bool opt_err)
202{
203	FILE *out;
204
205	out = opt_err ? stderr : stdout;
206
207	fprintf(out, getstr(12), getprogname());
208	if (opt_err)
209		exit(2);
210	exit(0);
211}
212
213/*
214 * Read input file names from a file (file0-from option).
215 */
216static void
217read_fns_from_file0(const char *fn)
218{
219	FILE *f;
220	char *line = NULL;
221	size_t linesize = 0;
222	ssize_t linelen;
223
224	if (fn == NULL)
225		return;
226
227	f = fopen(fn, "r");
228	if (f == NULL)
229		err(2, "%s", fn);
230
231	while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
232		if (*line != '\0') {
233			if (argc_from_file0 == (size_t) - 1)
234				argc_from_file0 = 0;
235			++argc_from_file0;
236			argv_from_file0 = sort_realloc(argv_from_file0,
237			    argc_from_file0 * sizeof(char *));
238			if (argv_from_file0 == NULL)
239				err(2, NULL);
240			argv_from_file0[argc_from_file0 - 1] = line;
241		} else {
242			free(line);
243		}
244		line = NULL;
245		linesize = 0;
246	}
247	if (ferror(f))
248		err(2, "%s: getdelim", fn);
249
250	closefile(f, fn);
251}
252
253/*
254 * Check how much RAM is available for the sort.
255 */
256static void
257set_hw_params(void)
258{
259	long pages, psize;
260
261#if defined(SORT_THREADS)
262	ncpu = 1;
263#endif
264
265	pages = sysconf(_SC_PHYS_PAGES);
266	if (pages < 1) {
267		perror("sysconf pages");
268		pages = 1;
269	}
270	psize = sysconf(_SC_PAGESIZE);
271	if (psize < 1) {
272		perror("sysconf psize");
273		psize = 4096;
274	}
275#if defined(SORT_THREADS)
276	ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
277	if (ncpu < 1)
278		ncpu = 1;
279	else if(ncpu > 32)
280		ncpu = 32;
281
282	nthreads = ncpu;
283#endif
284
285	free_memory = (unsigned long long) pages * (unsigned long long) psize;
286	available_free_memory = free_memory / 2;
287
288	if (available_free_memory < 1024)
289		available_free_memory = 1024;
290}
291
292/*
293 * Convert "plain" symbol to wide symbol, with default value.
294 */
295static void
296conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
297{
298
299	if (wc && c) {
300		int res;
301
302		res = mbtowc(wc, c, mb_cur_max);
303		if (res < 1)
304			*wc = def;
305	}
306}
307
308/*
309 * Set current locale symbols.
310 */
311static void
312set_locale(void)
313{
314	struct lconv *lc;
315	const char *locale;
316
317	setlocale(LC_ALL, "");
318
319	mb_cur_max = MB_CUR_MAX;
320
321	lc = localeconv();
322
323	if (lc) {
324		/* obtain LC_NUMERIC info */
325		/* Convert to wide char form */
326		conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
327		    symbol_decimal_point);
328		conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
329		    symbol_thousands_sep);
330		conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
331		    symbol_positive_sign);
332		conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
333		    symbol_negative_sign);
334	}
335
336	if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
337		gnusort_numeric_compatibility = true;
338
339	locale = setlocale(LC_COLLATE, NULL);
340
341	if (locale) {
342		char *tmpl;
343		const char *cclocale;
344
345		tmpl = sort_strdup(locale);
346		cclocale = setlocale(LC_COLLATE, "C");
347		if (cclocale && !strcmp(cclocale, tmpl))
348			byte_sort = true;
349		else {
350			const char *pclocale;
351
352			pclocale = setlocale(LC_COLLATE, "POSIX");
353			if (pclocale && !strcmp(pclocale, tmpl))
354				byte_sort = true;
355		}
356		setlocale(LC_COLLATE, tmpl);
357		sort_free(tmpl);
358	}
359}
360
361/*
362 * Set directory temporary files.
363 */
364static void
365set_tmpdir(void)
366{
367	char *td;
368
369	td = getenv("TMPDIR");
370	if (td != NULL)
371		tmpdir = sort_strdup(td);
372}
373
374/*
375 * Parse -S option.
376 */
377static unsigned long long
378parse_memory_buffer_value(const char *value)
379{
380
381	if (value == NULL)
382		return (available_free_memory);
383	else {
384		char *endptr;
385		unsigned long long membuf;
386
387		endptr = NULL;
388		errno = 0;
389		membuf = strtoll(value, &endptr, 10);
390
391		if (errno != 0) {
392			warn("%s",getstr(4));
393			membuf = available_free_memory;
394		} else {
395			switch (*endptr){
396			case 'Y':
397				membuf *= 1024;
398				/* FALLTHROUGH */
399			case 'Z':
400				membuf *= 1024;
401				/* FALLTHROUGH */
402			case 'E':
403				membuf *= 1024;
404				/* FALLTHROUGH */
405			case 'P':
406				membuf *= 1024;
407				/* FALLTHROUGH */
408			case 'T':
409				membuf *= 1024;
410				/* FALLTHROUGH */
411			case 'G':
412				membuf *= 1024;
413				/* FALLTHROUGH */
414			case 'M':
415				membuf *= 1024;
416				/* FALLTHROUGH */
417			case '\0':
418			case 'K':
419				membuf *= 1024;
420				/* FALLTHROUGH */
421			case 'b':
422				break;
423			case '%':
424				membuf = (available_free_memory * membuf) /
425				    100;
426				break;
427			default:
428				warnc(EINVAL, "%s", optarg);
429				membuf = available_free_memory;
430			}
431		}
432		return (membuf);
433	}
434}
435
436/*
437 * Signal handler that clears the temporary files.
438 */
439static void
440sig_handler(int sig __unused, siginfo_t *siginfo __unused,
441    void *context __unused)
442{
443
444	clear_tmp_files();
445	exit(-1);
446}
447
448/*
449 * Set signal handler on panic signals.
450 */
451static void
452set_signal_handler(void)
453{
454	struct sigaction sa;
455
456	memset(&sa, 0, sizeof(sa));
457	sa.sa_sigaction = &sig_handler;
458	sa.sa_flags = SA_SIGINFO;
459
460	if (sigaction(SIGTERM, &sa, NULL) < 0) {
461		perror("sigaction");
462		return;
463	}
464	if (sigaction(SIGHUP, &sa, NULL) < 0) {
465		perror("sigaction");
466		return;
467	}
468	if (sigaction(SIGINT, &sa, NULL) < 0) {
469		perror("sigaction");
470		return;
471	}
472	if (sigaction(SIGQUIT, &sa, NULL) < 0) {
473		perror("sigaction");
474		return;
475	}
476	if (sigaction(SIGABRT, &sa, NULL) < 0) {
477		perror("sigaction");
478		return;
479	}
480	if (sigaction(SIGBUS, &sa, NULL) < 0) {
481		perror("sigaction");
482		return;
483	}
484	if (sigaction(SIGSEGV, &sa, NULL) < 0) {
485		perror("sigaction");
486		return;
487	}
488	if (sigaction(SIGUSR1, &sa, NULL) < 0) {
489		perror("sigaction");
490		return;
491	}
492	if (sigaction(SIGUSR2, &sa, NULL) < 0) {
493		perror("sigaction");
494		return;
495	}
496}
497
498/*
499 * Print "unknown" message and exit with status 2.
500 */
501static void
502unknown(const char *what)
503{
504
505	errx(2, "%s: %s", getstr(3), what);
506}
507
508/*
509 * Check whether contradictory input options are used.
510 */
511static void
512check_mutually_exclusive_flags(char c, bool *mef_flags)
513{
514	int fo_index, mec;
515	bool found_others, found_this;
516
517	found_others = found_this = false;
518	fo_index = 0;
519
520	for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
521		mec = mutually_exclusive_flags[i];
522
523		if (mec != c) {
524			if (mef_flags[i]) {
525				if (found_this)
526					errx(1, "%c:%c: %s", c, mec, getstr(1));
527				found_others = true;
528				fo_index = i;
529			}
530		} else {
531			if (found_others)
532				errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
533			mef_flags[i] = true;
534			found_this = true;
535		}
536	}
537}
538
539/*
540 * Initialise sort opts data.
541 */
542static void
543set_sort_opts(void)
544{
545
546	memset(&default_sort_mods_object, 0,
547	    sizeof(default_sort_mods_object));
548	memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
549	default_sort_mods_object.func =
550	    get_sort_func(&default_sort_mods_object);
551}
552
553/*
554 * Set a sort modifier on a sort modifiers object.
555 */
556static bool
557set_sort_modifier(struct sort_mods *sm, int c)
558{
559
560	if (sm == NULL)
561		return (true);
562
563	switch (c){
564	case 'b':
565		sm->bflag = true;
566		break;
567	case 'd':
568		sm->dflag = true;
569		break;
570	case 'f':
571		sm->fflag = true;
572		break;
573	case 'g':
574		sm->gflag = true;
575		need_hint = true;
576		break;
577	case 'i':
578		sm->iflag = true;
579		break;
580	case 'R':
581		sm->Rflag = true;
582		need_hint = true;
583		need_random = true;
584		break;
585	case 'M':
586		initialise_months();
587		sm->Mflag = true;
588		need_hint = true;
589		break;
590	case 'n':
591		sm->nflag = true;
592		need_hint = true;
593		print_symbols_on_debug = true;
594		break;
595	case 'r':
596		sm->rflag = true;
597		break;
598	case 'V':
599		sm->Vflag = true;
600		break;
601	case 'h':
602		sm->hflag = true;
603		need_hint = true;
604		print_symbols_on_debug = true;
605		break;
606	default:
607		return (false);
608	}
609
610	sort_opts_vals.complex_sort = true;
611	sm->func = get_sort_func(sm);
612	return (true);
613}
614
615/*
616 * Parse POS in -k option.
617 */
618static int
619parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
620{
621	regmatch_t pmatch[4];
622	regex_t re;
623	char *c, *f;
624	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
625	size_t len, nmatch;
626	int ret;
627
628	ret = -1;
629	nmatch = 4;
630	c = f = NULL;
631
632	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
633		return (-1);
634
635	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
636		goto end;
637
638	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
639		goto end;
640
641	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
642		goto end;
643
644	len = pmatch[1].rm_eo - pmatch[1].rm_so;
645	f = sort_malloc((len + 1) * sizeof(char));
646
647	strncpy(f, s + pmatch[1].rm_so, len);
648	f[len] = '\0';
649
650	if (second) {
651		errno = 0;
652		ks->f2 = (size_t) strtoul(f, NULL, 10);
653		if (errno != 0)
654			err(2, "-k");
655		if (ks->f2 == 0) {
656			warn("%s",getstr(5));
657			goto end;
658		}
659	} else {
660		errno = 0;
661		ks->f1 = (size_t) strtoul(f, NULL, 10);
662		if (errno != 0)
663			err(2, "-k");
664		if (ks->f1 == 0) {
665			warn("%s",getstr(5));
666			goto end;
667		}
668	}
669
670	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
671		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
672		c = sort_malloc((len + 1) * sizeof(char));
673
674		strncpy(c, s + pmatch[2].rm_so + 1, len);
675		c[len] = '\0';
676
677		if (second) {
678			errno = 0;
679			ks->c2 = (size_t) strtoul(c, NULL, 10);
680			if (errno != 0)
681				err(2, "-k");
682		} else {
683			errno = 0;
684			ks->c1 = (size_t) strtoul(c, NULL, 10);
685			if (errno != 0)
686				err(2, "-k");
687			if (ks->c1 == 0) {
688				warn("%s",getstr(6));
689				goto end;
690			}
691		}
692	} else {
693		if (second)
694			ks->c2 = 0;
695		else
696			ks->c1 = 1;
697	}
698
699	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
700		regoff_t i = 0;
701
702		for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
703			check_mutually_exclusive_flags(s[i], mef_flags);
704			if (s[i] == 'b') {
705				if (second)
706					ks->pos2b = true;
707				else
708					ks->pos1b = true;
709			} else if (!set_sort_modifier(&(ks->sm), s[i]))
710				goto end;
711		}
712	}
713
714	ret = 0;
715
716end:
717
718	if (c)
719		sort_free(c);
720	if (f)
721		sort_free(f);
722	regfree(&re);
723
724	return (ret);
725}
726
727/*
728 * Parse -k option value.
729 */
730static int
731parse_k(const char *s, struct key_specs *ks)
732{
733	int ret = -1;
734	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
735	    { false, false, false, false, false, false };
736
737	if (s && *s) {
738		char *sptr;
739
740		sptr = strchr(s, ',');
741		if (sptr) {
742			size_t size1;
743			char *pos1, *pos2;
744
745			size1 = sptr - s;
746
747			if (size1 < 1)
748				return (-1);
749			pos1 = sort_malloc((size1 + 1) * sizeof(char));
750
751			strncpy(pos1, s, size1);
752			pos1[size1] = '\0';
753
754			ret = parse_pos(pos1, ks, mef_flags, false);
755
756			sort_free(pos1);
757			if (ret < 0)
758				return (ret);
759
760			pos2 = sort_strdup(sptr + 1);
761			ret = parse_pos(pos2, ks, mef_flags, true);
762			sort_free(pos2);
763		} else
764			ret = parse_pos(s, ks, mef_flags, false);
765	}
766
767	return (ret);
768}
769
770/*
771 * Parse POS in +POS -POS option.
772 */
773static int
774parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
775{
776	regex_t re;
777	regmatch_t pmatch[4];
778	char *c, *f;
779	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
780	int ret;
781	size_t len, nmatch;
782
783	ret = -1;
784	nmatch = 4;
785	c = f = NULL;
786	*nc = *nf = 0;
787
788	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
789		return (-1);
790
791	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
792		goto end;
793
794	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
795		goto end;
796
797	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
798		goto end;
799
800	len = pmatch[1].rm_eo - pmatch[1].rm_so;
801	f = sort_malloc((len + 1) * sizeof(char));
802
803	strncpy(f, s + pmatch[1].rm_so, len);
804	f[len] = '\0';
805
806	errno = 0;
807	*nf = (size_t) strtoul(f, NULL, 10);
808	if (errno != 0)
809		errx(2, "%s", getstr(11));
810
811	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
812		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
813		c = sort_malloc((len + 1) * sizeof(char));
814
815		strncpy(c, s + pmatch[2].rm_so + 1, len);
816		c[len] = '\0';
817
818		errno = 0;
819		*nc = (size_t) strtoul(c, NULL, 10);
820		if (errno != 0)
821			errx(2, "%s", getstr(11));
822	}
823
824	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
825
826		len = pmatch[3].rm_eo - pmatch[3].rm_so;
827
828		strncpy(sopts, s + pmatch[3].rm_so, len);
829		sopts[len] = '\0';
830	}
831
832	ret = 0;
833
834end:
835	if (c)
836		sort_free(c);
837	if (f)
838		sort_free(f);
839	regfree(&re);
840
841	return (ret);
842}
843
844/*
845 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
846 */
847void
848fix_obsolete_keys(int *argc, char **argv)
849{
850	char sopt[129];
851
852	for (int i = 1; i < *argc; i++) {
853		char *arg1;
854
855		arg1 = argv[i];
856
857		if (strcmp(arg1, "--") == 0) {
858			/* Following arguments are treated as filenames. */
859			break;
860		}
861
862		if (strlen(arg1) > 1 && arg1[0] == '+') {
863			int c1, f1;
864			char sopts1[128];
865
866			sopts1[0] = 0;
867			c1 = f1 = 0;
868
869			if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
870				continue;
871			else {
872				f1 += 1;
873				c1 += 1;
874				if (i + 1 < *argc) {
875					char *arg2 = argv[i + 1];
876
877					if (strlen(arg2) > 1 &&
878					    arg2[0] == '-') {
879						int c2, f2;
880						char sopts2[128];
881
882						sopts2[0] = 0;
883						c2 = f2 = 0;
884
885						if (parse_pos_obs(arg2 + 1,
886						    &f2, &c2, sopts2) >= 0) {
887							if (c2 > 0)
888								f2 += 1;
889							sprintf(sopt, "-k%d.%d%s,%d.%d%s",
890							    f1, c1, sopts1, f2, c2, sopts2);
891							argv[i] = sort_strdup(sopt);
892							for (int j = i + 1; j + 1 < *argc; j++)
893								argv[j] = argv[j + 1];
894							*argc -= 1;
895							continue;
896						}
897					}
898				}
899				sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
900				argv[i] = sort_strdup(sopt);
901			}
902		}
903	}
904}
905
906/*
907 * Seed random sort
908 */
909static void
910get_random_seed(const char *random_source)
911{
912	char randseed[32];
913	struct stat fsb, rsb;
914	ssize_t rd;
915	int rsfd;
916
917	rsfd = -1;
918	rd = sizeof(randseed);
919
920	if (random_source == NULL) {
921		if (getentropy(randseed, sizeof(randseed)) < 0)
922			err(EX_SOFTWARE, "getentropy");
923		goto out;
924	}
925
926	rsfd = open(random_source, O_RDONLY | O_CLOEXEC);
927	if (rsfd < 0)
928		err(EX_NOINPUT, "open: %s", random_source);
929
930	if (fstat(rsfd, &fsb) != 0)
931		err(EX_SOFTWARE, "fstat");
932
933	if (!S_ISREG(fsb.st_mode) && !S_ISCHR(fsb.st_mode))
934		err(EX_USAGE,
935		    "random seed isn't a regular file or /dev/random");
936
937	/*
938	 * Regular files: read up to maximum seed size and explicitly
939	 * reject longer files.
940	 */
941	if (S_ISREG(fsb.st_mode)) {
942		if (fsb.st_size > (off_t)sizeof(randseed))
943			errx(EX_USAGE, "random seed is too large (%jd >"
944			    " %zu)!", (intmax_t)fsb.st_size,
945			    sizeof(randseed));
946		else if (fsb.st_size < 1)
947			errx(EX_USAGE, "random seed is too small ("
948			    "0 bytes)");
949
950		memset(randseed, 0, sizeof(randseed));
951
952		rd = read(rsfd, randseed, fsb.st_size);
953		if (rd < 0)
954			err(EX_SOFTWARE, "reading random seed file %s",
955			    random_source);
956		if (rd < (ssize_t)fsb.st_size)
957			errx(EX_SOFTWARE, "short read from %s", random_source);
958	} else if (S_ISCHR(fsb.st_mode)) {
959		if (stat("/dev/random", &rsb) < 0)
960			err(EX_SOFTWARE, "stat");
961
962		if (fsb.st_dev != rsb.st_dev ||
963		    fsb.st_ino != rsb.st_ino)
964			errx(EX_USAGE, "random seed is a character "
965			    "device other than /dev/random");
966
967		if (getentropy(randseed, sizeof(randseed)) < 0)
968			err(EX_SOFTWARE, "getentropy");
969	}
970
971out:
972	if (rsfd >= 0)
973		close(rsfd);
974
975	MD5Init(&md5_ctx);
976	MD5Update(&md5_ctx, randseed, rd);
977}
978
979/*
980 * Main function.
981 */
982int
983main(int argc, char **argv)
984{
985	char *outfile, *real_outfile;
986	char *random_source = NULL;
987	int c, result;
988	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
989	    { false, false, false, false, false, false };
990
991	result = 0;
992	outfile = sort_strdup("-");
993	real_outfile = NULL;
994
995	struct sort_mods *sm = &default_sort_mods_object;
996
997	init_tmp_files();
998
999	set_signal_handler();
1000
1001	set_hw_params();
1002	set_locale();
1003	set_tmpdir();
1004	set_sort_opts();
1005
1006	fix_obsolete_keys(&argc, argv);
1007
1008	while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
1009	    != -1)) {
1010
1011		check_mutually_exclusive_flags(c, mef_flags);
1012
1013		if (!set_sort_modifier(sm, c)) {
1014
1015			switch (c) {
1016			case 'c':
1017				sort_opts_vals.cflag = true;
1018				if (optarg) {
1019					if (!strcmp(optarg, "diagnose-first"))
1020						;
1021					else if (!strcmp(optarg, "silent") ||
1022					    !strcmp(optarg, "quiet"))
1023						sort_opts_vals.csilentflag = true;
1024					else if (*optarg)
1025						unknown(optarg);
1026				}
1027				break;
1028			case 'C':
1029				sort_opts_vals.cflag = true;
1030				sort_opts_vals.csilentflag = true;
1031				break;
1032			case 'k':
1033			{
1034				sort_opts_vals.complex_sort = true;
1035				sort_opts_vals.kflag = true;
1036
1037				keys_num++;
1038				keys = sort_realloc(keys, keys_num *
1039				    sizeof(struct key_specs));
1040				memset(&(keys[keys_num - 1]), 0,
1041				    sizeof(struct key_specs));
1042
1043				if (parse_k(optarg, &(keys[keys_num - 1]))
1044				    < 0) {
1045					errc(2, EINVAL, "-k %s", optarg);
1046				}
1047
1048				break;
1049			}
1050			case 'm':
1051				sort_opts_vals.mflag = true;
1052				break;
1053			case 'o':
1054				outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1055				strcpy(outfile, optarg);
1056				break;
1057			case 's':
1058				sort_opts_vals.sflag = true;
1059				break;
1060			case 'S':
1061				available_free_memory =
1062				    parse_memory_buffer_value(optarg);
1063				break;
1064			case 'T':
1065				tmpdir = sort_strdup(optarg);
1066				break;
1067			case 't':
1068				while (strlen(optarg) > 1) {
1069					if (optarg[0] != '\\') {
1070						errc(2, EINVAL, "%s", optarg);
1071					}
1072					optarg += 1;
1073					if (*optarg == '0') {
1074						*optarg = 0;
1075						break;
1076					}
1077				}
1078				sort_opts_vals.tflag = true;
1079				sort_opts_vals.field_sep = btowc(optarg[0]);
1080				if (sort_opts_vals.field_sep == WEOF) {
1081					errno = EINVAL;
1082					err(2, NULL);
1083				}
1084				if (!gnusort_numeric_compatibility) {
1085					if (symbol_decimal_point == sort_opts_vals.field_sep)
1086						symbol_decimal_point = WEOF;
1087					if (symbol_thousands_sep == sort_opts_vals.field_sep)
1088						symbol_thousands_sep = WEOF;
1089					if (symbol_negative_sign == sort_opts_vals.field_sep)
1090						symbol_negative_sign = WEOF;
1091					if (symbol_positive_sign == sort_opts_vals.field_sep)
1092						symbol_positive_sign = WEOF;
1093				}
1094				break;
1095			case 'u':
1096				sort_opts_vals.uflag = true;
1097				/* stable sort for the correct unique val */
1098				sort_opts_vals.sflag = true;
1099				break;
1100			case 'z':
1101				sort_opts_vals.zflag = true;
1102				break;
1103			case SORT_OPT:
1104				if (optarg) {
1105					if (!strcmp(optarg, "general-numeric"))
1106						set_sort_modifier(sm, 'g');
1107					else if (!strcmp(optarg, "human-numeric"))
1108						set_sort_modifier(sm, 'h');
1109					else if (!strcmp(optarg, "numeric"))
1110						set_sort_modifier(sm, 'n');
1111					else if (!strcmp(optarg, "month"))
1112						set_sort_modifier(sm, 'M');
1113					else if (!strcmp(optarg, "random"))
1114						set_sort_modifier(sm, 'R');
1115					else
1116						unknown(optarg);
1117				}
1118				break;
1119#if defined(SORT_THREADS)
1120			case PARALLEL_OPT:
1121				nthreads = (size_t)(atoi(optarg));
1122				if (nthreads < 1)
1123					nthreads = 1;
1124				if (nthreads > 1024)
1125					nthreads = 1024;
1126				break;
1127#endif
1128			case QSORT_OPT:
1129				sort_opts_vals.sort_method = SORT_QSORT;
1130				break;
1131			case MERGESORT_OPT:
1132				sort_opts_vals.sort_method = SORT_MERGESORT;
1133				break;
1134			case MMAP_OPT:
1135				use_mmap = true;
1136				break;
1137			case HEAPSORT_OPT:
1138				sort_opts_vals.sort_method = SORT_HEAPSORT;
1139				break;
1140			case RADIXSORT_OPT:
1141				sort_opts_vals.sort_method = SORT_RADIXSORT;
1142				break;
1143			case RANDOMSOURCE_OPT:
1144				random_source = strdup(optarg);
1145				break;
1146			case COMPRESSPROGRAM_OPT:
1147				compress_program = strdup(optarg);
1148				break;
1149			case FF_OPT:
1150				read_fns_from_file0(optarg);
1151				break;
1152			case BS_OPT:
1153			{
1154				errno = 0;
1155				long mof = strtol(optarg, NULL, 10);
1156				if (errno != 0)
1157					err(2, "--batch-size");
1158				if (mof >= 2)
1159					max_open_files = (size_t) mof + 1;
1160			}
1161				break;
1162			case VERSION_OPT:
1163				printf("%s\n", VERSION);
1164				exit(EXIT_SUCCESS);
1165				/* NOTREACHED */
1166				break;
1167			case DEBUG_OPT:
1168				debug_sort = true;
1169				break;
1170			case HELP_OPT:
1171				usage(false);
1172				/* NOTREACHED */
1173				break;
1174			default:
1175				usage(true);
1176				/* NOTREACHED */
1177			}
1178		}
1179	}
1180
1181	argc -= optind;
1182	argv += optind;
1183
1184	if (argv_from_file0) {
1185		argc = argc_from_file0;
1186		argv = argv_from_file0;
1187	}
1188
1189	if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1190		errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1191
1192	if (keys_num == 0) {
1193		keys_num = 1;
1194		keys = sort_realloc(keys, sizeof(struct key_specs));
1195		memset(&(keys[0]), 0, sizeof(struct key_specs));
1196		keys[0].c1 = 1;
1197		keys[0].pos1b = default_sort_mods->bflag;
1198		keys[0].pos2b = default_sort_mods->bflag;
1199		memcpy(&(keys[0].sm), default_sort_mods,
1200		    sizeof(struct sort_mods));
1201	}
1202
1203	for (size_t i = 0; i < keys_num; i++) {
1204		struct key_specs *ks;
1205
1206		ks = &(keys[i]);
1207
1208		if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1209		    !(ks->pos2b)) {
1210			ks->pos1b = sm->bflag;
1211			ks->pos2b = sm->bflag;
1212			memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1213		}
1214
1215		ks->sm.func = get_sort_func(&(ks->sm));
1216	}
1217
1218	if (debug_sort) {
1219		printf("Memory to be used for sorting: %llu\n",available_free_memory);
1220#if defined(SORT_THREADS)
1221		printf("Number of CPUs: %d\n",(int)ncpu);
1222		nthreads = 1;
1223#endif
1224		printf("Using collate rules of %s locale\n",
1225		    setlocale(LC_COLLATE, NULL));
1226		if (byte_sort)
1227			printf("Byte sort is used\n");
1228		if (print_symbols_on_debug) {
1229			printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1230			if (symbol_thousands_sep)
1231				printf("Thousands separator: <%lc>\n",
1232				    symbol_thousands_sep);
1233			printf("Positive sign: <%lc>\n", symbol_positive_sign);
1234			printf("Negative sign: <%lc>\n", symbol_negative_sign);
1235		}
1236	}
1237
1238	if (need_random)
1239		get_random_seed(random_source);
1240
1241	/* Case when the outfile equals one of the input files: */
1242	if (strcmp(outfile, "-")) {
1243
1244		for(int i = 0; i < argc; ++i) {
1245			if (strcmp(argv[i], outfile) == 0) {
1246				real_outfile = sort_strdup(outfile);
1247				for(;;) {
1248					char* tmp = sort_malloc(strlen(outfile) +
1249					    strlen(".tmp") + 1);
1250
1251					strcpy(tmp, outfile);
1252					strcpy(tmp + strlen(tmp), ".tmp");
1253					sort_free(outfile);
1254					outfile = tmp;
1255					if (access(outfile, F_OK) < 0)
1256						break;
1257				}
1258				tmp_file_atexit(outfile);
1259			}
1260		}
1261	}
1262
1263#if defined(SORT_THREADS)
1264	if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1265		nthreads = 1;
1266#endif
1267
1268	if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1269		struct file_list fl;
1270		struct sort_list list;
1271
1272		sort_list_init(&list);
1273		file_list_init(&fl, true);
1274
1275		if (argc < 1)
1276			procfile("-", &list, &fl);
1277		else {
1278			while (argc > 0) {
1279				procfile(*argv, &list, &fl);
1280				--argc;
1281				++argv;
1282			}
1283		}
1284
1285		if (fl.count < 1)
1286			sort_list_to_file(&list, outfile);
1287		else {
1288			if (list.count > 0) {
1289				char *flast = new_tmp_file_name();
1290
1291				sort_list_to_file(&list, flast);
1292				file_list_add(&fl, flast, false);
1293			}
1294			merge_files(&fl, outfile);
1295		}
1296
1297		file_list_clean(&fl);
1298
1299		/*
1300		 * We are about to exit the program, so we can ignore
1301		 * the clean-up for speed
1302		 *
1303		 * sort_list_clean(&list);
1304		 */
1305
1306	} else if (sort_opts_vals.cflag) {
1307		result = (argc == 0) ? (check("-")) : (check(*argv));
1308	} else if (sort_opts_vals.mflag) {
1309		struct file_list fl;
1310
1311		file_list_init(&fl, false);
1312		/* No file arguments remaining means "read from stdin." */
1313		if (argc == 0)
1314			file_list_add(&fl, "-", true);
1315		else
1316			file_list_populate(&fl, argc, argv, true);
1317		merge_files(&fl, outfile);
1318		file_list_clean(&fl);
1319	}
1320
1321	if (real_outfile) {
1322		unlink(real_outfile);
1323		if (rename(outfile, real_outfile) < 0)
1324			err(2, NULL);
1325		sort_free(real_outfile);
1326	}
1327
1328	sort_free(outfile);
1329
1330	return (result);
1331}
1332