1/* sort - sort lines of text (with all kinds of options). 2 Copyright (C) 1988, 1991-2005 Free Software Foundation, Inc. 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 2, or (at your option) 7 any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software Foundation, 16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 18 Written December 1988 by Mike Haertel. 19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu, 20 or (US mail) as Mike Haertel c/o Free Software Foundation. 21 22 Ørn E. Hansen added NLS support in 1997. */ 23 24#include <config.h> 25 26#include <getopt.h> 27#include <sys/types.h> 28#include <signal.h> 29#include "system.h" 30#include "error.h" 31#include "hard-locale.h" 32#include "inttostr.h" 33#include "physmem.h" 34#include "posixver.h" 35#include "quote.h" 36#include "stdlib--.h" 37#include "stdio--.h" 38#include "strnumcmp.h" 39#include "xmemcoll.h" 40#include "xstrtol.h" 41 42#if HAVE_SYS_RESOURCE_H 43# include <sys/resource.h> 44#endif 45#ifndef RLIMIT_DATA 46struct rlimit { size_t rlim_cur; }; 47# define getrlimit(Resource, Rlp) (-1) 48#endif 49 50/* The official name of this program (e.g., no `g' prefix). */ 51#define PROGRAM_NAME "sort" 52 53#define AUTHORS "Mike Haertel", "Paul Eggert" 54 55#if HAVE_LANGINFO_CODESET 56# include <langinfo.h> 57#endif 58 59/* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is 60 present. */ 61#ifndef SA_NOCLDSTOP 62# define SA_NOCLDSTOP 0 63# define sigprocmask(How, Set, Oset) /* empty */ 64# define sigset_t int 65# if ! HAVE_SIGINTERRUPT 66# define siginterrupt(sig, flag) /* empty */ 67# endif 68#endif 69 70#ifndef STDC_HEADERS 71double strtod (); 72#endif 73 74#define UCHAR_LIM (UCHAR_MAX + 1) 75 76#ifndef DEFAULT_TMPDIR 77# define DEFAULT_TMPDIR "/tmp" 78#endif 79 80/* Exit statuses. */ 81enum 82 { 83 /* POSIX says to exit with status 1 if invoked with -c and the 84 input is not properly sorted. */ 85 SORT_OUT_OF_ORDER = 1, 86 87 /* POSIX says any other irregular exit must exit with a status 88 code greater than 1. */ 89 SORT_FAILURE = 2 90 }; 91 92/* The representation of the decimal point in the current locale. */ 93static int decimal_point; 94 95/* Thousands separator; if -1, then there isn't one. */ 96static int thousands_sep; 97 98/* Nonzero if the corresponding locales are hard. */ 99static bool hard_LC_COLLATE; 100#if HAVE_NL_LANGINFO 101static bool hard_LC_TIME; 102#endif 103 104#define NONZERO(x) ((x) != 0) 105 106/* The kind of blanks for '-b' to skip in various options. */ 107enum blanktype { bl_start, bl_end, bl_both }; 108 109/* The character marking end of line. Default to \n. */ 110static char eolchar = '\n'; 111 112/* Lines are held in core as counted strings. */ 113struct line 114{ 115 char *text; /* Text of the line. */ 116 size_t length; /* Length including final newline. */ 117 char *keybeg; /* Start of first key. */ 118 char *keylim; /* Limit of first key. */ 119}; 120 121/* Input buffers. */ 122struct buffer 123{ 124 char *buf; /* Dynamically allocated buffer, 125 partitioned into 3 regions: 126 - input data; 127 - unused area; 128 - an array of lines, in reverse order. */ 129 size_t used; /* Number of bytes used for input data. */ 130 size_t nlines; /* Number of lines in the line array. */ 131 size_t alloc; /* Number of bytes allocated. */ 132 size_t left; /* Number of bytes left from previous reads. */ 133 size_t line_bytes; /* Number of bytes to reserve for each line. */ 134 bool eof; /* An EOF has been read. */ 135}; 136 137struct keyfield 138{ 139 size_t sword; /* Zero-origin 'word' to start at. */ 140 size_t schar; /* Additional characters to skip. */ 141 size_t eword; /* Zero-origin first word after field. */ 142 size_t echar; /* Additional characters in field. */ 143 bool const *ignore; /* Boolean array of characters to ignore. */ 144 char const *translate; /* Translation applied to characters. */ 145 bool skipsblanks; /* Skip leading blanks when finding start. */ 146 bool skipeblanks; /* Skip leading blanks when finding end. */ 147 bool numeric; /* Flag for numeric comparison. Handle 148 strings of digits with optional decimal 149 point, but no exponential notation. */ 150 bool general_numeric; /* Flag for general, numeric comparison. 151 Handle numbers in exponential notation. */ 152 bool month; /* Flag for comparison by month name. */ 153 bool reverse; /* Reverse the sense of comparison. */ 154 struct keyfield *next; /* Next keyfield to try. */ 155}; 156 157struct month 158{ 159 char const *name; 160 int val; 161}; 162 163/* The name this program was run with. */ 164char *program_name; 165 166/* FIXME: None of these tables work with multibyte character sets. 167 Also, there are many other bugs when handling multibyte characters. 168 One way to fix this is to rewrite `sort' to use wide characters 169 internally, but doing this with good performance is a bit 170 tricky. */ 171 172/* Table of blanks. */ 173static bool blanks[UCHAR_LIM]; 174 175/* Table of non-printing characters. */ 176static bool nonprinting[UCHAR_LIM]; 177 178/* Table of non-dictionary characters (not letters, digits, or blanks). */ 179static bool nondictionary[UCHAR_LIM]; 180 181/* Translation table folding lower case to upper. */ 182static char fold_toupper[UCHAR_LIM]; 183 184#define MONTHS_PER_YEAR 12 185 186/* Table mapping month names to integers. 187 Alphabetic order allows binary search. */ 188static struct month monthtab[] = 189{ 190 {"APR", 4}, 191 {"AUG", 8}, 192 {"DEC", 12}, 193 {"FEB", 2}, 194 {"JAN", 1}, 195 {"JUL", 7}, 196 {"JUN", 6}, 197 {"MAR", 3}, 198 {"MAY", 5}, 199 {"NOV", 11}, 200 {"OCT", 10}, 201 {"SEP", 9} 202}; 203 204/* During the merge phase, the number of files to merge at once. */ 205#define NMERGE 16 206 207/* Minimum size for a merge or check buffer. */ 208#define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line)) 209 210/* Minimum sort size; the code might not work with smaller sizes. */ 211#define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE) 212 213/* The number of bytes needed for a merge or check buffer, which can 214 function relatively efficiently even if it holds only one line. If 215 a longer line is seen, this value is increased. */ 216static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024); 217 218/* The approximate maximum number of bytes of main memory to use, as 219 specified by the user. Zero if the user has not specified a size. */ 220static size_t sort_size; 221 222/* The guessed size for non-regular files. */ 223#define INPUT_FILE_SIZE_GUESS (1024 * 1024) 224 225/* Array of directory names in which any temporary files are to be created. */ 226static char const **temp_dirs; 227 228/* Number of temporary directory names used. */ 229static size_t temp_dir_count; 230 231/* Number of allocated slots in temp_dirs. */ 232static size_t temp_dir_alloc; 233 234/* Flag to reverse the order of all comparisons. */ 235static bool reverse; 236 237/* Flag for stable sort. This turns off the last ditch bytewise 238 comparison of lines, and instead leaves lines in the same order 239 they were read if all keys compare equal. */ 240static bool stable; 241 242/* If TAB has this value, blanks separate fields. */ 243enum { TAB_DEFAULT = CHAR_MAX + 1 }; 244 245/* Tab character separating fields. If TAB_DEFAULT, then fields are 246 separated by the empty string between a non-blank character and a blank 247 character. */ 248static int tab = TAB_DEFAULT; 249 250/* Flag to remove consecutive duplicate lines from the output. 251 Only the last of a sequence of equal lines will be output. */ 252static bool unique; 253 254/* Nonzero if any of the input files are the standard input. */ 255static bool have_read_stdin; 256 257/* List of key field comparisons to be tried. */ 258static struct keyfield *keylist; 259 260static void sortlines_temp (struct line *, size_t, struct line *); 261 262/* Report MESSAGE for FILE, then clean up and exit. 263 If FILE is null, it represents standard output. */ 264 265static void die (char const *, char const *) ATTRIBUTE_NORETURN; 266static void 267die (char const *message, char const *file) 268{ 269 error (0, errno, "%s: %s", message, file ? file : _("standard output")); 270 exit (SORT_FAILURE); 271} 272 273void 274usage (int status) 275{ 276 if (status != EXIT_SUCCESS) 277 fprintf (stderr, _("Try `%s --help' for more information.\n"), 278 program_name); 279 else 280 { 281 printf (_("\ 282Usage: %s [OPTION]... [FILE]...\n\ 283"), 284 program_name); 285 fputs (_("\ 286Write sorted concatenation of all FILE(s) to standard output.\n\ 287\n\ 288"), stdout); 289 fputs (_("\ 290Mandatory arguments to long options are mandatory for short options too.\n\ 291"), stdout); 292 fputs (_("\ 293Ordering options:\n\ 294\n\ 295"), stdout); 296 fputs (_("\ 297 -b, --ignore-leading-blanks ignore leading blanks\n\ 298 -d, --dictionary-order consider only blanks and alphanumeric characters\n\ 299 -f, --ignore-case fold lower case to upper case characters\n\ 300"), stdout); 301 fputs (_("\ 302 -g, --general-numeric-sort compare according to general numerical value\n\ 303 -i, --ignore-nonprinting consider only printable characters\n\ 304 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\ 305 -n, --numeric-sort compare according to string numerical value\n\ 306 -r, --reverse reverse the result of comparisons\n\ 307\n\ 308"), stdout); 309 fputs (_("\ 310Other options:\n\ 311\n\ 312 -c, --check check whether input is sorted; do not sort\n\ 313 -k, --key=POS1[,POS2] start a key at POS1, end it at POS2 (origin 1)\n\ 314 -m, --merge merge already sorted files; do not sort\n\ 315 -o, --output=FILE write result to FILE instead of standard output\n\ 316 -s, --stable stabilize sort by disabling last-resort comparison\n\ 317 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\ 318"), stdout); 319 printf (_("\ 320 -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\ 321 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\ 322 multiple options specify multiple directories\n\ 323 -u, --unique with -c, check for strict ordering;\n\ 324 without -c, output only the first of an equal run\n\ 325"), DEFAULT_TMPDIR); 326 fputs (_("\ 327 -z, --zero-terminated end lines with 0 byte, not newline\n\ 328"), stdout); 329 fputs (HELP_OPTION_DESCRIPTION, stdout); 330 fputs (VERSION_OPTION_DESCRIPTION, stdout); 331 fputs (_("\ 332\n\ 333POS is F[.C][OPTS], where F is the field number and C the character position\n\ 334in the field. OPTS is one or more single-letter ordering options, which\n\ 335override global ordering options for that key. If no key is given, use the\n\ 336entire line as the key.\n\ 337\n\ 338SIZE may be followed by the following multiplicative suffixes:\n\ 339"), stdout); 340 fputs (_("\ 341% 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\ 342\n\ 343With no FILE, or when FILE is -, read standard input.\n\ 344\n\ 345*** WARNING ***\n\ 346The locale specified by the environment affects sort order.\n\ 347Set LC_ALL=C to get the traditional sort order that uses\n\ 348native byte values.\n\ 349"), stdout ); 350 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); 351 } 352 353 exit (status); 354} 355 356static char const short_options[] = "-bcdfgik:mMno:rsS:t:T:uy:z"; 357 358static struct option const long_options[] = 359{ 360 {"ignore-leading-blanks", no_argument, NULL, 'b'}, 361 {"check", no_argument, NULL, 'c'}, 362 {"dictionary-order", no_argument, NULL, 'd'}, 363 {"ignore-case", no_argument, NULL, 'f'}, 364 {"general-numeric-sort", no_argument, NULL, 'g'}, 365 {"ignore-nonprinting", no_argument, NULL, 'i'}, 366 {"key", required_argument, NULL, 'k'}, 367 {"merge", no_argument, NULL, 'm'}, 368 {"month-sort", no_argument, NULL, 'M'}, 369 {"numeric-sort", no_argument, NULL, 'n'}, 370 {"output", required_argument, NULL, 'o'}, 371 {"reverse", no_argument, NULL, 'r'}, 372 {"stable", no_argument, NULL, 's'}, 373 {"buffer-size", required_argument, NULL, 'S'}, 374 {"field-separator", required_argument, NULL, 't'}, 375 {"temporary-directory", required_argument, NULL, 'T'}, 376 {"unique", no_argument, NULL, 'u'}, 377 {"zero-terminated", no_argument, NULL, 'z'}, 378 {GETOPT_HELP_OPTION_DECL}, 379 {GETOPT_VERSION_OPTION_DECL}, 380 {NULL, 0, NULL, 0}, 381}; 382 383/* The set of signals that are caught. */ 384static sigset_t caught_signals; 385 386/* The list of temporary files. */ 387struct tempnode 388{ 389 struct tempnode *volatile next; 390 char name[1]; /* Actual size is 1 + file name length. */ 391}; 392static struct tempnode *volatile temphead; 393static struct tempnode *volatile *temptail = &temphead; 394 395/* Clean up any remaining temporary files. */ 396 397static void 398cleanup (void) 399{ 400 struct tempnode const *node; 401 402 for (node = temphead; node; node = node->next) 403 unlink (node->name); 404} 405 406/* Create a new temporary file, returning its newly allocated name. 407 Store into *PFP a stream open for writing. */ 408 409static char * 410create_temp_file (FILE **pfp) 411{ 412 static char const slashbase[] = "/sortXXXXXX"; 413 static size_t temp_dir_index; 414 sigset_t oldset; 415 int fd; 416 int saved_errno; 417 char const *temp_dir = temp_dirs[temp_dir_index]; 418 size_t len = strlen (temp_dir); 419 struct tempnode *node = 420 xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase); 421 char *file = node->name; 422 423 memcpy (file, temp_dir, len); 424 memcpy (file + len, slashbase, sizeof slashbase); 425 node->next = NULL; 426 if (++temp_dir_index == temp_dir_count) 427 temp_dir_index = 0; 428 429 /* Create the temporary file in a critical section, to avoid races. */ 430 sigprocmask (SIG_BLOCK, &caught_signals, &oldset); 431 fd = mkstemp (file); 432 if (0 <= fd) 433 { 434 *temptail = node; 435 temptail = &node->next; 436 } 437 saved_errno = errno; 438 sigprocmask (SIG_SETMASK, &oldset, NULL); 439 errno = saved_errno; 440 441 if (fd < 0 || (*pfp = fdopen (fd, "w")) == NULL) 442 die (_("cannot create temporary file"), file); 443 444 return file; 445} 446 447/* Return a stream for FILE, opened with mode HOW. A null FILE means 448 standard output; HOW should be "w". When opening for input, "-" 449 means standard input. To avoid confusion, do not return file 450 descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when 451 opening an ordinary FILE. */ 452 453static FILE * 454xfopen (const char *file, const char *how) 455{ 456 FILE *fp; 457 458 if (!file) 459 fp = stdout; 460 else if (STREQ (file, "-") && *how == 'r') 461 { 462 have_read_stdin = true; 463 fp = stdin; 464 } 465 else 466 { 467 fp = fopen (file, how); 468 if (! fp) 469 die (_("open failed"), file); 470 } 471 472 return fp; 473} 474 475/* Close FP, whose name is FILE, and report any errors. */ 476 477static void 478xfclose (FILE *fp, char const *file) 479{ 480 switch (fileno (fp)) 481 { 482 case STDIN_FILENO: 483 /* Allow reading stdin from tty more than once. */ 484 if (feof (fp)) 485 clearerr (fp); 486 break; 487 488 case STDOUT_FILENO: 489 /* Don't close stdout just yet. close_stdout does that. */ 490 if (fflush (fp) != 0) 491 die (_("fflush failed"), file); 492 break; 493 494 default: 495 if (fclose (fp) != 0) 496 die (_("close failed"), file); 497 break; 498 } 499} 500 501static void 502write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file) 503{ 504 if (fwrite (buf, 1, n_bytes, fp) != n_bytes) 505 die (_("write failed"), output_file); 506} 507 508/* Append DIR to the array of temporary directory names. */ 509static void 510add_temp_dir (char const *dir) 511{ 512 if (temp_dir_count == temp_dir_alloc) 513 temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc); 514 515 temp_dirs[temp_dir_count++] = dir; 516} 517 518/* Remove NAME from the list of temporary files. */ 519 520static void 521zaptemp (const char *name) 522{ 523 struct tempnode *volatile *pnode; 524 struct tempnode *node; 525 struct tempnode *next; 526 sigset_t oldset; 527 int unlink_status; 528 int unlink_errno = 0; 529 530 for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next) 531 continue; 532 533 /* Unlink the temporary file in a critical section to avoid races. */ 534 next = node->next; 535 sigprocmask (SIG_BLOCK, &caught_signals, &oldset); 536 unlink_status = unlink (name); 537 unlink_errno = errno; 538 *pnode = next; 539 sigprocmask (SIG_SETMASK, &oldset, NULL); 540 541 if (unlink_status != 0) 542 error (0, unlink_errno, _("warning: cannot remove: %s"), name); 543 if (! next) 544 temptail = pnode; 545 free (node); 546} 547 548#if HAVE_NL_LANGINFO 549 550static int 551struct_month_cmp (const void *m1, const void *m2) 552{ 553 struct month const *month1 = m1; 554 struct month const *month2 = m2; 555 return strcmp (month1->name, month2->name); 556} 557 558#endif 559 560/* Initialize the character class tables. */ 561 562static void 563inittables (void) 564{ 565 size_t i; 566 567 for (i = 0; i < UCHAR_LIM; ++i) 568 { 569 blanks[i] = !!ISBLANK (i); 570 nonprinting[i] = !ISPRINT (i); 571 nondictionary[i] = !ISALNUM (i) && !ISBLANK (i); 572 fold_toupper[i] = (ISLOWER (i) ? toupper (i) : i); 573 } 574 575#if HAVE_NL_LANGINFO 576 /* If we're not in the "C" locale, read different names for months. */ 577 if (hard_LC_TIME) 578 { 579 for (i = 0; i < MONTHS_PER_YEAR; i++) 580 { 581 char const *s; 582 size_t s_len; 583 size_t j; 584 char *name; 585 586 s = (char *) nl_langinfo (ABMON_1 + i); 587 s_len = strlen (s); 588 monthtab[i].name = name = xmalloc (s_len + 1); 589 monthtab[i].val = i + 1; 590 591 for (j = 0; j < s_len; j++) 592 name[j] = fold_toupper[to_uchar (s[j])]; 593 name[j] = '\0'; 594 } 595 qsort ((void *) monthtab, MONTHS_PER_YEAR, 596 sizeof *monthtab, struct_month_cmp); 597 } 598#endif 599} 600 601/* Specify the amount of main memory to use when sorting. */ 602static void 603specify_sort_size (char const *s) 604{ 605 uintmax_t n; 606 char *suffix; 607 enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ"); 608 609 /* The default unit is KiB. */ 610 if (e == LONGINT_OK && ISDIGIT (suffix[-1])) 611 { 612 if (n <= UINTMAX_MAX / 1024) 613 n *= 1024; 614 else 615 e = LONGINT_OVERFLOW; 616 } 617 618 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */ 619 if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1]) 620 switch (suffix[0]) 621 { 622 case 'b': 623 e = LONGINT_OK; 624 break; 625 626 case '%': 627 { 628 double mem = physmem_total () * n / 100; 629 630 /* Use "<", not "<=", to avoid problems with rounding. */ 631 if (mem < UINTMAX_MAX) 632 { 633 n = mem; 634 e = LONGINT_OK; 635 } 636 else 637 e = LONGINT_OVERFLOW; 638 } 639 break; 640 } 641 642 if (e == LONGINT_OK) 643 { 644 /* If multiple sort sizes are specified, take the maximum, so 645 that option order does not matter. */ 646 if (n < sort_size) 647 return; 648 649 sort_size = n; 650 if (sort_size == n) 651 { 652 sort_size = MAX (sort_size, MIN_SORT_SIZE); 653 return; 654 } 655 656 e = LONGINT_OVERFLOW; 657 } 658 659 STRTOL_FATAL_ERROR (s, _("sort size"), e); 660} 661 662/* Return the default sort size. */ 663static size_t 664default_sort_size (void) 665{ 666 /* Let MEM be available memory or 1/8 of total memory, whichever 667 is greater. */ 668 double avail = physmem_available (); 669 double total = physmem_total (); 670 double mem = MAX (avail, total / 8); 671 struct rlimit rlimit; 672 673 /* Let SIZE be MEM, but no more than the maximum object size or 674 system resource limits. Avoid the MIN macro here, as it is not 675 quite right when only one argument is floating point. Don't 676 bother to check for values like RLIM_INFINITY since in practice 677 they are not much less than SIZE_MAX. */ 678 size_t size = SIZE_MAX; 679 if (mem < size) 680 size = mem; 681 if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size) 682 size = rlimit.rlim_cur; 683#ifdef RLIMIT_AS 684 if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size) 685 size = rlimit.rlim_cur; 686#endif 687 688 /* Leave a large safety margin for the above limits, as failure can 689 occur when they are exceeded. */ 690 size /= 2; 691 692#ifdef RLIMIT_RSS 693 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc. 694 Exceeding RSS is not fatal, but can be quite slow. */ 695 if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size) 696 size = rlimit.rlim_cur / 16 * 15; 697#endif 698 699 /* Use no less than the minimum. */ 700 return MAX (size, MIN_SORT_SIZE); 701} 702 703/* Return the sort buffer size to use with the input files identified 704 by FPS and FILES, which are alternate names of the same files. 705 NFILES gives the number of input files; NFPS may be less. Assume 706 that each input line requires LINE_BYTES extra bytes' worth of line 707 information. Do not exceed the size bound specified by the user 708 (or a default size bound, if the user does not specify one). */ 709 710static size_t 711sort_buffer_size (FILE *const *fps, size_t nfps, 712 char *const *files, size_t nfiles, 713 size_t line_bytes) 714{ 715 /* A bound on the input size. If zero, the bound hasn't been 716 determined yet. */ 717 static size_t size_bound; 718 719 /* In the worst case, each input byte is a newline. */ 720 size_t worst_case_per_input_byte = line_bytes + 1; 721 722 /* Keep enough room for one extra input line and an extra byte. 723 This extra room might be needed when preparing to read EOF. */ 724 size_t size = worst_case_per_input_byte + 1; 725 726 size_t i; 727 728 for (i = 0; i < nfiles; i++) 729 { 730 struct stat st; 731 off_t file_size; 732 size_t worst_case; 733 734 if ((i < nfps ? fstat (fileno (fps[i]), &st) 735 : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st) 736 : stat (files[i], &st)) 737 != 0) 738 die (_("stat failed"), files[i]); 739 740 if (S_ISREG (st.st_mode)) 741 file_size = st.st_size; 742 else 743 { 744 /* The file has unknown size. If the user specified a sort 745 buffer size, use that; otherwise, guess the size. */ 746 if (sort_size) 747 return sort_size; 748 file_size = INPUT_FILE_SIZE_GUESS; 749 } 750 751 if (! size_bound) 752 { 753 size_bound = sort_size; 754 if (! size_bound) 755 size_bound = default_sort_size (); 756 } 757 758 /* Add the amount of memory needed to represent the worst case 759 where the input consists entirely of newlines followed by a 760 single non-newline. Check for overflow. */ 761 worst_case = file_size * worst_case_per_input_byte + 1; 762 if (file_size != worst_case / worst_case_per_input_byte 763 || size_bound - size <= worst_case) 764 return size_bound; 765 size += worst_case; 766 } 767 768 return size; 769} 770 771/* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES 772 must be at least sizeof (struct line). Allocate ALLOC bytes 773 initially. */ 774 775static void 776initbuf (struct buffer *buf, size_t line_bytes, size_t alloc) 777{ 778 /* Ensure that the line array is properly aligned. If the desired 779 size cannot be allocated, repeatedly halve it until allocation 780 succeeds. The smaller allocation may hurt overall performance, 781 but that's better than failing. */ 782 for (;;) 783 { 784 alloc += sizeof (struct line) - alloc % sizeof (struct line); 785 buf->buf = malloc (alloc); 786 if (buf->buf) 787 break; 788 alloc /= 2; 789 if (alloc <= line_bytes + 1) 790 xalloc_die (); 791 } 792 793 buf->line_bytes = line_bytes; 794 buf->alloc = alloc; 795 buf->used = buf->left = buf->nlines = 0; 796 buf->eof = false; 797} 798 799/* Return one past the limit of the line array. */ 800 801static inline struct line * 802buffer_linelim (struct buffer const *buf) 803{ 804 return (struct line *) (buf->buf + buf->alloc); 805} 806 807/* Return a pointer to the first character of the field specified 808 by KEY in LINE. */ 809 810static char * 811begfield (const struct line *line, const struct keyfield *key) 812{ 813 char *ptr = line->text, *lim = ptr + line->length - 1; 814 size_t sword = key->sword; 815 size_t schar = key->schar; 816 size_t remaining_bytes; 817 818 /* The leading field separator itself is included in a field when -t 819 is absent. */ 820 821 if (tab != TAB_DEFAULT) 822 while (ptr < lim && sword--) 823 { 824 while (ptr < lim && *ptr != tab) 825 ++ptr; 826 if (ptr < lim) 827 ++ptr; 828 } 829 else 830 while (ptr < lim && sword--) 831 { 832 while (ptr < lim && blanks[to_uchar (*ptr)]) 833 ++ptr; 834 while (ptr < lim && !blanks[to_uchar (*ptr)]) 835 ++ptr; 836 } 837 838 if (key->skipsblanks) 839 while (ptr < lim && blanks[to_uchar (*ptr)]) 840 ++ptr; 841 842 /* Advance PTR by SCHAR (if possible), but no further than LIM. */ 843 remaining_bytes = lim - ptr; 844 if (schar < remaining_bytes) 845 ptr += schar; 846 else 847 ptr = lim; 848 849 return ptr; 850} 851 852/* Return the limit of (a pointer to the first character after) the field 853 in LINE specified by KEY. */ 854 855static char * 856limfield (const struct line *line, const struct keyfield *key) 857{ 858 char *ptr = line->text, *lim = ptr + line->length - 1; 859 size_t eword = key->eword, echar = key->echar; 860 size_t remaining_bytes; 861 862 /* Move PTR past EWORD fields or to one past the last byte on LINE, 863 whichever comes first. If there are more than EWORD fields, leave 864 PTR pointing at the beginning of the field having zero-based index, 865 EWORD. If a delimiter character was specified (via -t), then that 866 `beginning' is the first character following the delimiting TAB. 867 Otherwise, leave PTR pointing at the first `blank' character after 868 the preceding field. */ 869 if (tab != TAB_DEFAULT) 870 while (ptr < lim && eword--) 871 { 872 while (ptr < lim && *ptr != tab) 873 ++ptr; 874 if (ptr < lim && (eword | echar)) 875 ++ptr; 876 } 877 else 878 while (ptr < lim && eword--) 879 { 880 while (ptr < lim && blanks[to_uchar (*ptr)]) 881 ++ptr; 882 while (ptr < lim && !blanks[to_uchar (*ptr)]) 883 ++ptr; 884 } 885 886#ifdef POSIX_UNSPECIFIED 887 /* The following block of code makes GNU sort incompatible with 888 standard Unix sort, so it's ifdef'd out for now. 889 The POSIX spec isn't clear on how to interpret this. 890 FIXME: request clarification. 891 892 From: kwzh@gnu.ai.mit.edu (Karl Heuer) 893 Date: Thu, 30 May 96 12:20:41 -0400 894 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.] 895 896 [...]I believe I've found another bug in `sort'. 897 898 $ cat /tmp/sort.in 899 a b c 2 d 900 pq rs 1 t 901 $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in 902 a b c 2 d 903 pq rs 1 t 904 $ /bin/sort -k1.7,1.7 </tmp/sort.in 905 pq rs 1 t 906 a b c 2 d 907 908 Unix sort produced the answer I expected: sort on the single character 909 in column 7. GNU sort produced different results, because it disagrees 910 on the interpretation of the key-end spec "M.N". Unix sort reads this 911 as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean 912 "skip M-1 fields, then either N-1 characters or the rest of the current 913 field, whichever comes first". This extra clause applies only to 914 key-ends, not key-starts. 915 */ 916 917 /* Make LIM point to the end of (one byte past) the current field. */ 918 if (tab != TAB_DEFAULT) 919 { 920 char *newlim; 921 newlim = memchr (ptr, tab, lim - ptr); 922 if (newlim) 923 lim = newlim; 924 } 925 else 926 { 927 char *newlim; 928 newlim = ptr; 929 while (newlim < lim && blanks[to_uchar (*newlim)]) 930 ++newlim; 931 while (newlim < lim && !blanks[to_uchar (*newlim)]) 932 ++newlim; 933 lim = newlim; 934 } 935#endif 936 937 /* If we're ignoring leading blanks when computing the End 938 of the field, don't start counting bytes until after skipping 939 past any leading blanks. */ 940 if (key->skipeblanks) 941 while (ptr < lim && blanks[to_uchar (*ptr)]) 942 ++ptr; 943 944 /* Advance PTR by ECHAR (if possible), but no further than LIM. */ 945 remaining_bytes = lim - ptr; 946 if (echar < remaining_bytes) 947 ptr += echar; 948 else 949 ptr = lim; 950 951 return ptr; 952} 953 954/* Fill BUF reading from FP, moving buf->left bytes from the end 955 of buf->buf to the beginning first. If EOF is reached and the 956 file wasn't terminated by a newline, supply one. Set up BUF's line 957 table too. FILE is the name of the file corresponding to FP. 958 Return true if some input was read. */ 959 960static bool 961fillbuf (struct buffer *buf, FILE *fp, char const *file) 962{ 963 struct keyfield const *key = keylist; 964 char eol = eolchar; 965 size_t line_bytes = buf->line_bytes; 966 size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE; 967 968 if (buf->eof) 969 return false; 970 971 if (buf->used != buf->left) 972 { 973 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left); 974 buf->used = buf->left; 975 buf->nlines = 0; 976 } 977 978 for (;;) 979 { 980 char *ptr = buf->buf + buf->used; 981 struct line *linelim = buffer_linelim (buf); 982 struct line *line = linelim - buf->nlines; 983 size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr; 984 char *line_start = buf->nlines ? line->text + line->length : buf->buf; 985 986 while (line_bytes + 1 < avail) 987 { 988 /* Read as many bytes as possible, but do not read so many 989 bytes that there might not be enough room for the 990 corresponding line array. The worst case is when the 991 rest of the input file consists entirely of newlines, 992 except that the last byte is not a newline. */ 993 size_t readsize = (avail - 1) / (line_bytes + 1); 994 size_t bytes_read = fread (ptr, 1, readsize, fp); 995 char *ptrlim = ptr + bytes_read; 996 char *p; 997 avail -= bytes_read; 998 999 if (bytes_read != readsize) 1000 { 1001 if (ferror (fp)) 1002 die (_("read failed"), file); 1003 if (feof (fp)) 1004 { 1005 buf->eof = true; 1006 if (buf->buf == ptrlim) 1007 return false; 1008 if (ptrlim[-1] != eol) 1009 *ptrlim++ = eol; 1010 } 1011 } 1012 1013 /* Find and record each line in the just-read input. */ 1014 while ((p = memchr (ptr, eol, ptrlim - ptr))) 1015 { 1016 ptr = p + 1; 1017 line--; 1018 line->text = line_start; 1019 line->length = ptr - line_start; 1020 mergesize = MAX (mergesize, line->length); 1021 avail -= line_bytes; 1022 1023 if (key) 1024 { 1025 /* Precompute the position of the first key for 1026 efficiency. */ 1027 line->keylim = (key->eword == SIZE_MAX 1028 ? p 1029 : limfield (line, key)); 1030 1031 if (key->sword != SIZE_MAX) 1032 line->keybeg = begfield (line, key); 1033 else 1034 { 1035 if (key->skipsblanks) 1036 while (blanks[to_uchar (*line_start)]) 1037 line_start++; 1038 line->keybeg = line_start; 1039 } 1040 } 1041 1042 line_start = ptr; 1043 } 1044 1045 ptr = ptrlim; 1046 if (buf->eof) 1047 break; 1048 } 1049 1050 buf->used = ptr - buf->buf; 1051 buf->nlines = buffer_linelim (buf) - line; 1052 if (buf->nlines != 0) 1053 { 1054 buf->left = ptr - line_start; 1055 merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE; 1056 return true; 1057 } 1058 1059 /* The current input line is too long to fit in the buffer. 1060 Double the buffer size and try again. */ 1061 buf->buf = X2REALLOC (buf->buf, &buf->alloc); 1062 } 1063} 1064 1065/* Compare strings A and B as numbers without explicitly converting them to 1066 machine numbers. Comparatively slow for short strings, but asymptotically 1067 hideously fast. */ 1068 1069static int 1070numcompare (const char *a, const char *b) 1071{ 1072 while (blanks[to_uchar (*a)]) 1073 a++; 1074 while (blanks[to_uchar (*b)]) 1075 b++; 1076 1077 return strnumcmp (a, b, decimal_point, thousands_sep); 1078} 1079 1080static int 1081general_numcompare (const char *sa, const char *sb) 1082{ 1083 /* FIXME: add option to warn about failed conversions. */ 1084 /* FIXME: maybe add option to try expensive FP conversion 1085 only if A and B can't be compared more cheaply/accurately. */ 1086 1087 char *ea; 1088 char *eb; 1089 double a = strtod (sa, &ea); 1090 double b = strtod (sb, &eb); 1091 1092 /* Put conversion errors at the start of the collating sequence. */ 1093 if (sa == ea) 1094 return sb == eb ? 0 : -1; 1095 if (sb == eb) 1096 return 1; 1097 1098 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after 1099 conversion errors but before numbers; sort them by internal 1100 bit-pattern, for lack of a more portable alternative. */ 1101 return (a < b ? -1 1102 : a > b ? 1 1103 : a == b ? 0 1104 : b == b ? -1 1105 : a == a ? 1 1106 : memcmp ((char *) &a, (char *) &b, sizeof a)); 1107} 1108 1109/* Return an integer in 1..12 of the month name MONTH with length LEN. 1110 Return 0 if the name in S is not recognized. */ 1111 1112static int 1113getmonth (char const *month, size_t len) 1114{ 1115 size_t lo = 0; 1116 size_t hi = MONTHS_PER_YEAR; 1117 char const *monthlim = month + len; 1118 1119 for (;;) 1120 { 1121 if (month == monthlim) 1122 return 0; 1123 if (!blanks[to_uchar (*month)]) 1124 break; 1125 ++month; 1126 } 1127 1128 do 1129 { 1130 size_t ix = (lo + hi) / 2; 1131 char const *m = month; 1132 char const *n = monthtab[ix].name; 1133 1134 for (;; m++, n++) 1135 { 1136 if (!*n) 1137 return monthtab[ix].val; 1138 if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n)) 1139 { 1140 hi = ix; 1141 break; 1142 } 1143 else if (fold_toupper[to_uchar (*m)] > to_uchar (*n)) 1144 { 1145 lo = ix + 1; 1146 break; 1147 } 1148 } 1149 } 1150 while (lo < hi); 1151 1152 return 0; 1153} 1154 1155/* Compare two lines A and B trying every key in sequence until there 1156 are no more keys or a difference is found. */ 1157 1158static int 1159keycompare (const struct line *a, const struct line *b) 1160{ 1161 struct keyfield const *key = keylist; 1162 1163 /* For the first iteration only, the key positions have been 1164 precomputed for us. */ 1165 char *texta = a->keybeg; 1166 char *textb = b->keybeg; 1167 char *lima = a->keylim; 1168 char *limb = b->keylim; 1169 1170 int diff; 1171 1172 for (;;) 1173 { 1174 char const *translate = key->translate; 1175 bool const *ignore = key->ignore; 1176 1177 /* Find the lengths. */ 1178 size_t lena = lima <= texta ? 0 : lima - texta; 1179 size_t lenb = limb <= textb ? 0 : limb - textb; 1180 1181 /* Actually compare the fields. */ 1182 if (key->numeric | key->general_numeric) 1183 { 1184 char savea = *lima, saveb = *limb; 1185 1186 *lima = *limb = '\0'; 1187 diff = ((key->numeric ? numcompare : general_numcompare) 1188 (texta, textb)); 1189 *lima = savea, *limb = saveb; 1190 } 1191 else if (key->month) 1192 diff = getmonth (texta, lena) - getmonth (textb, lenb); 1193 /* Sorting like this may become slow, so in a simple locale the user 1194 can select a faster sort that is similar to ascii sort. */ 1195 else if (hard_LC_COLLATE) 1196 { 1197 if (ignore || translate) 1198 { 1199 char buf[4000]; 1200 size_t size = lena + 1 + lenb + 1; 1201 char *copy_a = (size <= sizeof buf ? buf : xmalloc (size)); 1202 char *copy_b = copy_a + lena + 1; 1203 size_t new_len_a, new_len_b, i; 1204 1205 /* Ignore and/or translate chars before comparing. */ 1206 for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++) 1207 { 1208 if (i < lena) 1209 { 1210 copy_a[new_len_a] = (translate 1211 ? translate[to_uchar (texta[i])] 1212 : texta[i]); 1213 if (!ignore || !ignore[to_uchar (texta[i])]) 1214 ++new_len_a; 1215 } 1216 if (i < lenb) 1217 { 1218 copy_b[new_len_b] = (translate 1219 ? translate[to_uchar (textb[i])] 1220 : textb [i]); 1221 if (!ignore || !ignore[to_uchar (textb[i])]) 1222 ++new_len_b; 1223 } 1224 } 1225 1226 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b); 1227 1228 if (sizeof buf < size) 1229 free (copy_a); 1230 } 1231 else if (lena == 0) 1232 diff = - NONZERO (lenb); 1233 else if (lenb == 0) 1234 goto greater; 1235 else 1236 diff = xmemcoll (texta, lena, textb, lenb); 1237 } 1238 else if (ignore) 1239 { 1240#define CMP_WITH_IGNORE(A, B) \ 1241 do \ 1242 { \ 1243 for (;;) \ 1244 { \ 1245 while (texta < lima && ignore[to_uchar (*texta)]) \ 1246 ++texta; \ 1247 while (textb < limb && ignore[to_uchar (*textb)]) \ 1248 ++textb; \ 1249 if (! (texta < lima && textb < limb)) \ 1250 break; \ 1251 diff = to_uchar (A) - to_uchar (B); \ 1252 if (diff) \ 1253 goto not_equal; \ 1254 ++texta; \ 1255 ++textb; \ 1256 } \ 1257 \ 1258 diff = (texta < lima) - (textb < limb); \ 1259 } \ 1260 while (0) 1261 1262 if (translate) 1263 CMP_WITH_IGNORE (translate[to_uchar (*texta)], 1264 translate[to_uchar (*textb)]); 1265 else 1266 CMP_WITH_IGNORE (*texta, *textb); 1267 } 1268 else if (lena == 0) 1269 diff = - NONZERO (lenb); 1270 else if (lenb == 0) 1271 goto greater; 1272 else 1273 { 1274 if (translate) 1275 { 1276 while (texta < lima && textb < limb) 1277 { 1278 diff = (to_uchar (translate[to_uchar (*texta++)]) 1279 - to_uchar (translate[to_uchar (*textb++)])); 1280 if (diff) 1281 goto not_equal; 1282 } 1283 } 1284 else 1285 { 1286 diff = memcmp (texta, textb, MIN (lena, lenb)); 1287 if (diff) 1288 goto not_equal; 1289 } 1290 diff = lena < lenb ? -1 : lena != lenb; 1291 } 1292 1293 if (diff) 1294 goto not_equal; 1295 1296 key = key->next; 1297 if (! key) 1298 break; 1299 1300 /* Find the beginning and limit of the next field. */ 1301 if (key->eword != SIZE_MAX) 1302 lima = limfield (a, key), limb = limfield (b, key); 1303 else 1304 lima = a->text + a->length - 1, limb = b->text + b->length - 1; 1305 1306 if (key->sword != SIZE_MAX) 1307 texta = begfield (a, key), textb = begfield (b, key); 1308 else 1309 { 1310 texta = a->text, textb = b->text; 1311 if (key->skipsblanks) 1312 { 1313 while (texta < lima && blanks[to_uchar (*texta)]) 1314 ++texta; 1315 while (textb < limb && blanks[to_uchar (*textb)]) 1316 ++textb; 1317 } 1318 } 1319 } 1320 1321 return 0; 1322 1323 greater: 1324 diff = 1; 1325 not_equal: 1326 return key->reverse ? -diff : diff; 1327} 1328 1329/* Compare two lines A and B, returning negative, zero, or positive 1330 depending on whether A compares less than, equal to, or greater than B. */ 1331 1332static int 1333compare (const struct line *a, const struct line *b) 1334{ 1335 int diff; 1336 size_t alen, blen; 1337 1338 /* First try to compare on the specified keys (if any). 1339 The only two cases with no key at all are unadorned sort, 1340 and unadorned sort -r. */ 1341 if (keylist) 1342 { 1343 diff = keycompare (a, b); 1344 if (diff | unique | stable) 1345 return diff; 1346 } 1347 1348 /* If the keys all compare equal (or no keys were specified) 1349 fall through to the default comparison. */ 1350 alen = a->length - 1, blen = b->length - 1; 1351 1352 if (alen == 0) 1353 diff = - NONZERO (blen); 1354 else if (blen == 0) 1355 diff = 1; 1356 else if (hard_LC_COLLATE) 1357 diff = xmemcoll (a->text, alen, b->text, blen); 1358 else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen)))) 1359 diff = alen < blen ? -1 : alen != blen; 1360 1361 return reverse ? -diff : diff; 1362} 1363 1364/* Check that the lines read from FILE_NAME come in order. Print a 1365 diagnostic (FILE_NAME, line number, contents of line) to stderr and return 1366 false if they are not in order. Otherwise, print no diagnostic 1367 and return true. */ 1368 1369static bool 1370check (char const *file_name) 1371{ 1372 FILE *fp = xfopen (file_name, "r"); 1373 struct buffer buf; /* Input buffer. */ 1374 struct line temp; /* Copy of previous line. */ 1375 size_t alloc = 0; 1376 uintmax_t line_number = 0; 1377 struct keyfield const *key = keylist; 1378 bool nonunique = ! unique; 1379 bool ordered = true; 1380 1381 initbuf (&buf, sizeof (struct line), 1382 MAX (merge_buffer_size, sort_size)); 1383 temp.text = NULL; 1384 1385 while (fillbuf (&buf, fp, file_name)) 1386 { 1387 struct line const *line = buffer_linelim (&buf); 1388 struct line const *linebase = line - buf.nlines; 1389 1390 /* Make sure the line saved from the old buffer contents is 1391 less than or equal to the first line of the new buffer. */ 1392 if (alloc && nonunique <= compare (&temp, line - 1)) 1393 { 1394 found_disorder: 1395 { 1396 struct line const *disorder_line = line - 1; 1397 uintmax_t disorder_line_number = 1398 buffer_linelim (&buf) - disorder_line + line_number; 1399 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)]; 1400 fprintf (stderr, _("%s: %s:%s: disorder: "), 1401 program_name, file_name, 1402 umaxtostr (disorder_line_number, hr_buf)); 1403 write_bytes (disorder_line->text, disorder_line->length, stderr, 1404 _("standard error")); 1405 ordered = false; 1406 break; 1407 } 1408 } 1409 1410 /* Compare each line in the buffer with its successor. */ 1411 while (linebase < --line) 1412 if (nonunique <= compare (line, line - 1)) 1413 goto found_disorder; 1414 1415 line_number += buf.nlines; 1416 1417 /* Save the last line of the buffer. */ 1418 if (alloc < line->length) 1419 { 1420 do 1421 { 1422 alloc *= 2; 1423 if (! alloc) 1424 { 1425 alloc = line->length; 1426 break; 1427 } 1428 } 1429 while (alloc < line->length); 1430 1431 temp.text = xrealloc (temp.text, alloc); 1432 } 1433 memcpy (temp.text, line->text, line->length); 1434 temp.length = line->length; 1435 if (key) 1436 { 1437 temp.keybeg = temp.text + (line->keybeg - line->text); 1438 temp.keylim = temp.text + (line->keylim - line->text); 1439 } 1440 } 1441 1442 xfclose (fp, file_name); 1443 free (buf.buf); 1444 free (temp.text); 1445 return ordered; 1446} 1447 1448/* Merge lines from FILES onto OFP. NTEMPS is the number of temporary 1449 files (all of which are at the start of the FILES array), and 1450 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE. 1451 Close input and output files before returning. 1452 OUTPUT_FILE gives the name of the output file. If it is NULL, 1453 the output file is standard output. If OFP is NULL, the output 1454 file has not been opened yet (or written to, if standard output). */ 1455 1456static void 1457mergefps (char **files, size_t ntemps, size_t nfiles, 1458 FILE *ofp, char const *output_file) 1459{ 1460 FILE *fps[NMERGE]; /* Input streams for each file. */ 1461 struct buffer buffer[NMERGE]; /* Input buffers for each file. */ 1462 struct line saved; /* Saved line storage for unique check. */ 1463 struct line const *savedline = NULL; 1464 /* &saved if there is a saved line. */ 1465 size_t savealloc = 0; /* Size allocated for the saved line. */ 1466 struct line const *cur[NMERGE]; /* Current line in each line table. */ 1467 struct line const *base[NMERGE]; /* Base of each line table. */ 1468 size_t ord[NMERGE]; /* Table representing a permutation of fps, 1469 such that cur[ord[0]] is the smallest line 1470 and will be next output. */ 1471 size_t i; 1472 size_t j; 1473 size_t t; 1474 struct keyfield const *key = keylist; 1475 saved.text = NULL; 1476 1477 /* Read initial lines from each input file. */ 1478 for (i = 0; i < nfiles; ) 1479 { 1480 fps[i] = xfopen (files[i], "r"); 1481 initbuf (&buffer[i], sizeof (struct line), 1482 MAX (merge_buffer_size, sort_size / nfiles)); 1483 if (fillbuf (&buffer[i], fps[i], files[i])) 1484 { 1485 struct line const *linelim = buffer_linelim (&buffer[i]); 1486 cur[i] = linelim - 1; 1487 base[i] = linelim - buffer[i].nlines; 1488 i++; 1489 } 1490 else 1491 { 1492 /* fps[i] is empty; eliminate it from future consideration. */ 1493 xfclose (fps[i], files[i]); 1494 if (i < ntemps) 1495 { 1496 ntemps--; 1497 zaptemp (files[i]); 1498 } 1499 free (buffer[i].buf); 1500 --nfiles; 1501 for (j = i; j < nfiles; ++j) 1502 files[j] = files[j + 1]; 1503 } 1504 } 1505 1506 if (! ofp) 1507 ofp = xfopen (output_file, "w"); 1508 1509 /* Set up the ord table according to comparisons among input lines. 1510 Since this only reorders two items if one is strictly greater than 1511 the other, it is stable. */ 1512 for (i = 0; i < nfiles; ++i) 1513 ord[i] = i; 1514 for (i = 1; i < nfiles; ++i) 1515 if (0 < compare (cur[ord[i - 1]], cur[ord[i]])) 1516 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0; 1517 1518 /* Repeatedly output the smallest line until no input remains. */ 1519 while (nfiles) 1520 { 1521 struct line const *smallest = cur[ord[0]]; 1522 1523 /* If uniquified output is turned on, output only the first of 1524 an identical series of lines. */ 1525 if (unique) 1526 { 1527 if (savedline && compare (savedline, smallest)) 1528 { 1529 savedline = NULL; 1530 write_bytes (saved.text, saved.length, ofp, output_file); 1531 } 1532 if (!savedline) 1533 { 1534 savedline = &saved; 1535 if (savealloc < smallest->length) 1536 { 1537 do 1538 if (! savealloc) 1539 { 1540 savealloc = smallest->length; 1541 break; 1542 } 1543 while ((savealloc *= 2) < smallest->length); 1544 1545 saved.text = xrealloc (saved.text, savealloc); 1546 } 1547 saved.length = smallest->length; 1548 memcpy (saved.text, smallest->text, saved.length); 1549 if (key) 1550 { 1551 saved.keybeg = 1552 saved.text + (smallest->keybeg - smallest->text); 1553 saved.keylim = 1554 saved.text + (smallest->keylim - smallest->text); 1555 } 1556 } 1557 } 1558 else 1559 write_bytes (smallest->text, smallest->length, ofp, output_file); 1560 1561 /* Check if we need to read more lines into core. */ 1562 if (base[ord[0]] < smallest) 1563 cur[ord[0]] = smallest - 1; 1564 else 1565 { 1566 if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]])) 1567 { 1568 struct line const *linelim = buffer_linelim (&buffer[ord[0]]); 1569 cur[ord[0]] = linelim - 1; 1570 base[ord[0]] = linelim - buffer[ord[0]].nlines; 1571 } 1572 else 1573 { 1574 /* We reached EOF on fps[ord[0]]. */ 1575 for (i = 1; i < nfiles; ++i) 1576 if (ord[i] > ord[0]) 1577 --ord[i]; 1578 --nfiles; 1579 xfclose (fps[ord[0]], files[ord[0]]); 1580 if (ord[0] < ntemps) 1581 { 1582 ntemps--; 1583 zaptemp (files[ord[0]]); 1584 } 1585 free (buffer[ord[0]].buf); 1586 for (i = ord[0]; i < nfiles; ++i) 1587 { 1588 fps[i] = fps[i + 1]; 1589 files[i] = files[i + 1]; 1590 buffer[i] = buffer[i + 1]; 1591 cur[i] = cur[i + 1]; 1592 base[i] = base[i + 1]; 1593 } 1594 for (i = 0; i < nfiles; ++i) 1595 ord[i] = ord[i + 1]; 1596 continue; 1597 } 1598 } 1599 1600 /* The new line just read in may be larger than other lines 1601 already in main memory; push it back in the queue until we 1602 encounter a line larger than it. Optimize for the common 1603 case where the new line is smallest. */ 1604 { 1605 size_t lo = 1; 1606 size_t hi = nfiles; 1607 size_t probe = lo; 1608 size_t ord0 = ord[0]; 1609 size_t count_of_smaller_lines; 1610 1611 while (lo < hi) 1612 { 1613 int cmp = compare (cur[ord0], cur[ord[probe]]); 1614 if (cmp < 0 || (cmp == 0 && ord0 < ord[probe])) 1615 hi = probe; 1616 else 1617 lo = probe + 1; 1618 probe = (lo + hi) / 2; 1619 } 1620 1621 count_of_smaller_lines = lo - 1; 1622 for (j = 0; j < count_of_smaller_lines; j++) 1623 ord[j] = ord[j + 1]; 1624 ord[count_of_smaller_lines] = ord0; 1625 } 1626 } 1627 1628 if (unique && savedline) 1629 { 1630 write_bytes (saved.text, saved.length, ofp, output_file); 1631 free (saved.text); 1632 } 1633 1634 xfclose (ofp, output_file); 1635} 1636 1637/* Merge into T the two sorted arrays of lines LO (with NLO members) 1638 and HI (with NHI members). T, LO, and HI point just past their 1639 respective arrays, and the arrays are in reverse order. NLO and 1640 NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */ 1641 1642static inline void 1643mergelines (struct line *t, 1644 struct line const *lo, size_t nlo, 1645 struct line const *hi, size_t nhi) 1646{ 1647 for (;;) 1648 if (compare (lo - 1, hi - 1) <= 0) 1649 { 1650 *--t = *--lo; 1651 if (! --nlo) 1652 { 1653 /* HI - NHI equalled T - (NLO + NHI) when this function 1654 began. Therefore HI must equal T now, and there is no 1655 need to copy from HI to T. */ 1656 return; 1657 } 1658 } 1659 else 1660 { 1661 *--t = *--hi; 1662 if (! --nhi) 1663 { 1664 do 1665 *--t = *--lo; 1666 while (--nlo); 1667 1668 return; 1669 } 1670 } 1671} 1672 1673/* Sort the array LINES with NLINES members, using TEMP for temporary space. 1674 NLINES must be at least 2. 1675 The input and output arrays are in reverse order, and LINES and 1676 TEMP point just past the end of their respective arrays. 1677 1678 Use a recursive divide-and-conquer algorithm, in the style 1679 suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use 1680 the optimization suggested by exercise 5.2.4-10; this requires room 1681 for only 1.5*N lines, rather than the usual 2*N lines. Knuth 1682 writes that this memory optimization was originally published by 1683 D. A. Bell, Comp J. 1 (1958), 75. */ 1684 1685static void 1686sortlines (struct line *lines, size_t nlines, struct line *temp) 1687{ 1688 if (nlines == 2) 1689 { 1690 if (0 < compare (&lines[-1], &lines[-2])) 1691 { 1692 struct line tmp = lines[-1]; 1693 lines[-1] = lines[-2]; 1694 lines[-2] = tmp; 1695 } 1696 } 1697 else 1698 { 1699 size_t nlo = nlines / 2; 1700 size_t nhi = nlines - nlo; 1701 struct line *lo = lines; 1702 struct line *hi = lines - nlo; 1703 struct line *sorted_lo = temp; 1704 1705 sortlines (hi, nhi, temp); 1706 if (1 < nlo) 1707 sortlines_temp (lo, nlo, sorted_lo); 1708 else 1709 sorted_lo[-1] = lo[-1]; 1710 1711 mergelines (lines, sorted_lo, nlo, hi, nhi); 1712 } 1713} 1714 1715/* Like sortlines (LINES, NLINES, TEMP), except output into TEMP 1716 rather than sorting in place. */ 1717 1718static void 1719sortlines_temp (struct line *lines, size_t nlines, struct line *temp) 1720{ 1721 if (nlines == 2) 1722 { 1723 /* Declare `swap' as int, not bool, to work around a bug 1724 <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html> 1725 in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */ 1726 int swap = (0 < compare (&lines[-1], &lines[-2])); 1727 temp[-1] = lines[-1 - swap]; 1728 temp[-2] = lines[-2 + swap]; 1729 } 1730 else 1731 { 1732 size_t nlo = nlines / 2; 1733 size_t nhi = nlines - nlo; 1734 struct line *lo = lines; 1735 struct line *hi = lines - nlo; 1736 struct line *sorted_hi = temp - nlo; 1737 1738 sortlines_temp (hi, nhi, sorted_hi); 1739 if (1 < nlo) 1740 sortlines (lo, nlo, temp); 1741 1742 mergelines (temp, lo, nlo, sorted_hi, nhi); 1743 } 1744} 1745 1746/* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is 1747 the same as OUTFILE. If found, merge the found instances (and perhaps 1748 some other files) into a temporary file so that it can in turn be 1749 merged into OUTFILE without destroying OUTFILE before it is completely 1750 read. Return the new value of NFILES, which differs from the old if 1751 some merging occurred. 1752 1753 This test ensures that an otherwise-erroneous use like 1754 "sort -m -o FILE ... FILE ..." copies FILE before writing to it. 1755 It's not clear that POSIX requires this nicety. 1756 Detect common error cases, but don't try to catch obscure cases like 1757 "cat ... FILE ... | sort -m -o FILE" 1758 where traditional "sort" doesn't copy the input and where 1759 people should know that they're getting into trouble anyway. 1760 Catching these obscure cases would slow down performance in 1761 common cases. */ 1762 1763static size_t 1764avoid_trashing_input (char **files, size_t ntemps, size_t nfiles, 1765 char const *outfile) 1766{ 1767 size_t i; 1768 bool got_outstat = false; 1769 struct stat outstat; 1770 1771 for (i = ntemps; i < nfiles; i++) 1772 { 1773 bool is_stdin = STREQ (files[i], "-"); 1774 bool same; 1775 struct stat instat; 1776 1777 if (outfile && STREQ (outfile, files[i]) && !is_stdin) 1778 same = true; 1779 else 1780 { 1781 if (! got_outstat) 1782 { 1783 if ((outfile 1784 ? stat (outfile, &outstat) 1785 : fstat (STDOUT_FILENO, &outstat)) 1786 != 0) 1787 break; 1788 got_outstat = true; 1789 } 1790 1791 same = (((is_stdin 1792 ? fstat (STDIN_FILENO, &instat) 1793 : stat (files[i], &instat)) 1794 == 0) 1795 && SAME_INODE (instat, outstat)); 1796 } 1797 1798 if (same) 1799 { 1800 FILE *tftp; 1801 char *temp = create_temp_file (&tftp); 1802 mergefps (&files[i], 0, nfiles - i, tftp, temp); 1803 files[i] = temp; 1804 return i + 1; 1805 } 1806 } 1807 1808 return nfiles; 1809} 1810 1811/* Merge the input FILES. NTEMPS is the number of files at the 1812 start of FILES that are temporary; it is zero at the top level. 1813 NFILES is the total number of files. Put the output in 1814 OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */ 1815 1816static void 1817merge (char **files, size_t ntemps, size_t nfiles, char const *output_file) 1818{ 1819 while (NMERGE < nfiles) 1820 { 1821 /* Number of input files processed so far. */ 1822 size_t in; 1823 1824 /* Number of output files generated so far. */ 1825 size_t out; 1826 1827 /* nfiles % NMERGE; this counts input files that are left over 1828 after all full-sized merges have been done. */ 1829 size_t remainder; 1830 1831 /* Number of easily-available slots at the next loop iteration. */ 1832 size_t cheap_slots; 1833 1834 /* Do as many NMERGE-size merges as possible. */ 1835 for (out = in = 0; out < nfiles / NMERGE; out++, in += NMERGE) 1836 { 1837 FILE *tfp; 1838 char *temp = create_temp_file (&tfp); 1839 size_t nt = MIN (ntemps, NMERGE); 1840 ntemps -= nt; 1841 mergefps (&files[in], nt, NMERGE, tfp, temp); 1842 files[out] = temp; 1843 } 1844 1845 remainder = nfiles - in; 1846 cheap_slots = NMERGE - out % NMERGE; 1847 1848 if (cheap_slots < remainder) 1849 { 1850 /* So many files remain that they can't all be put into the last 1851 NMERGE-sized output window. Do one more merge. Merge as few 1852 files as possible, to avoid needless I/O. */ 1853 size_t nshortmerge = remainder - cheap_slots + 1; 1854 FILE *tfp; 1855 char *temp = create_temp_file (&tfp); 1856 size_t nt = MIN (ntemps, nshortmerge); 1857 ntemps -= nt; 1858 mergefps (&files[in], nt, nshortmerge, tfp, temp); 1859 files[out++] = temp; 1860 in += nshortmerge; 1861 } 1862 1863 /* Put the remaining input files into the last NMERGE-sized output 1864 window, so they will be merged in the next pass. */ 1865 memmove(&files[out], &files[in], (nfiles - in) * sizeof *files); 1866 ntemps += out; 1867 nfiles -= in - out; 1868 } 1869 1870 nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file); 1871 mergefps (files, ntemps, nfiles, NULL, output_file); 1872} 1873 1874/* Sort NFILES FILES onto OUTPUT_FILE. */ 1875 1876static void 1877sort (char * const *files, size_t nfiles, char const *output_file) 1878{ 1879 struct buffer buf; 1880 size_t ntemps = 0; 1881 bool output_file_created = false; 1882 1883 buf.alloc = 0; 1884 1885 while (nfiles) 1886 { 1887 char const *temp_output; 1888 char const *file = *files; 1889 FILE *fp = xfopen (file, "r"); 1890 FILE *tfp; 1891 size_t bytes_per_line = (2 * sizeof (struct line) 1892 - sizeof (struct line) / 2); 1893 1894 if (! buf.alloc) 1895 initbuf (&buf, bytes_per_line, 1896 sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line)); 1897 buf.eof = false; 1898 files++; 1899 nfiles--; 1900 1901 while (fillbuf (&buf, fp, file)) 1902 { 1903 struct line *line; 1904 struct line *linebase; 1905 1906 if (buf.eof && nfiles 1907 && (bytes_per_line + 1 1908 < (buf.alloc - buf.used - bytes_per_line * buf.nlines))) 1909 { 1910 /* End of file, but there is more input and buffer room. 1911 Concatenate the next input file; this is faster in 1912 the usual case. */ 1913 buf.left = buf.used; 1914 break; 1915 } 1916 1917 line = buffer_linelim (&buf); 1918 linebase = line - buf.nlines; 1919 if (1 < buf.nlines) 1920 sortlines (line, buf.nlines, linebase); 1921 if (buf.eof && !nfiles && !ntemps && !buf.left) 1922 { 1923 xfclose (fp, file); 1924 tfp = xfopen (output_file, "w"); 1925 temp_output = output_file; 1926 output_file_created = true; 1927 } 1928 else 1929 { 1930 ++ntemps; 1931 temp_output = create_temp_file (&tfp); 1932 } 1933 1934 do 1935 { 1936 line--; 1937 write_bytes (line->text, line->length, tfp, temp_output); 1938 if (unique) 1939 while (linebase < line && compare (line, line - 1) == 0) 1940 line--; 1941 } 1942 while (linebase < line); 1943 1944 xfclose (tfp, temp_output); 1945 1946 if (output_file_created) 1947 goto finish; 1948 } 1949 xfclose (fp, file); 1950 } 1951 1952 finish: 1953 free (buf.buf); 1954 1955 if (! output_file_created) 1956 { 1957 size_t i; 1958 struct tempnode *node = temphead; 1959 char **tempfiles = xnmalloc (ntemps, sizeof *tempfiles); 1960 for (i = 0; node; i++) 1961 { 1962 tempfiles[i] = node->name; 1963 node = node->next; 1964 } 1965 merge (tempfiles, ntemps, ntemps, output_file); 1966 free (tempfiles); 1967 } 1968} 1969 1970/* Insert key KEY at the end of the key list. */ 1971 1972static void 1973insertkey (struct keyfield *key) 1974{ 1975 struct keyfield **p; 1976 1977 for (p = &keylist; *p; p = &(*p)->next) 1978 continue; 1979 *p = key; 1980 key->next = NULL; 1981} 1982 1983/* Report a bad field specification SPEC, with extra info MSGID. */ 1984 1985static void badfieldspec (char const *, char const *) 1986 ATTRIBUTE_NORETURN; 1987static void 1988badfieldspec (char const *spec, char const *msgid) 1989{ 1990 error (SORT_FAILURE, 0, _("%s: invalid field specification %s"), 1991 _(msgid), quote (spec)); 1992 abort (); 1993} 1994 1995/* Parse the leading integer in STRING and store the resulting value 1996 (which must fit into size_t) into *VAL. Return the address of the 1997 suffix after the integer. If MSGID is NULL, return NULL after 1998 failure; otherwise, report MSGID and exit on failure. */ 1999 2000static char const * 2001parse_field_count (char const *string, size_t *val, char const *msgid) 2002{ 2003 char *suffix; 2004 uintmax_t n; 2005 2006 switch (xstrtoumax (string, &suffix, 10, &n, "")) 2007 { 2008 case LONGINT_OK: 2009 case LONGINT_INVALID_SUFFIX_CHAR: 2010 *val = n; 2011 if (*val == n) 2012 break; 2013 /* Fall through. */ 2014 case LONGINT_OVERFLOW: 2015 case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR: 2016 if (msgid) 2017 error (SORT_FAILURE, 0, _("%s: count `%.*s' too large"), 2018 _(msgid), (int) (suffix - string), string); 2019 return NULL; 2020 2021 case LONGINT_INVALID: 2022 if (msgid) 2023 error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"), 2024 _(msgid), quote (string)); 2025 return NULL; 2026 } 2027 2028 return suffix; 2029} 2030 2031/* Handle interrupts and hangups. */ 2032 2033static void 2034sighandler (int sig) 2035{ 2036 if (! SA_NOCLDSTOP) 2037 signal (sig, SIG_IGN); 2038 2039 cleanup (); 2040 2041 signal (sig, SIG_DFL); 2042 raise (sig); 2043} 2044 2045/* Set the ordering options for KEY specified in S. 2046 Return the address of the first character in S that 2047 is not a valid ordering option. 2048 BLANKTYPE is the kind of blanks that 'b' should skip. */ 2049 2050static char * 2051set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype) 2052{ 2053 while (*s) 2054 { 2055 switch (*s) 2056 { 2057 case 'b': 2058 if (blanktype == bl_start || blanktype == bl_both) 2059 key->skipsblanks = true; 2060 if (blanktype == bl_end || blanktype == bl_both) 2061 key->skipeblanks = true; 2062 break; 2063 case 'd': 2064 key->ignore = nondictionary; 2065 break; 2066 case 'f': 2067 key->translate = fold_toupper; 2068 break; 2069 case 'g': 2070 key->general_numeric = true; 2071 break; 2072 case 'i': 2073 /* Option order should not matter, so don't let -i override 2074 -d. -d implies -i, but -i does not imply -d. */ 2075 if (! key->ignore) 2076 key->ignore = nonprinting; 2077 break; 2078 case 'M': 2079 key->month = true; 2080 break; 2081 case 'n': 2082 key->numeric = true; 2083 break; 2084 case 'r': 2085 key->reverse = true; 2086 break; 2087 default: 2088 return (char *) s; 2089 } 2090 ++s; 2091 } 2092 return (char *) s; 2093} 2094 2095static struct keyfield * 2096new_key (void) 2097{ 2098 struct keyfield *key = xzalloc (sizeof *key); 2099 key->eword = SIZE_MAX; 2100 return key; 2101} 2102 2103int 2104main (int argc, char **argv) 2105{ 2106 struct keyfield *key; 2107 struct keyfield gkey; 2108 char const *s; 2109 int c = 0; 2110 bool checkonly = false; 2111 bool mergeonly = false; 2112 size_t nfiles = 0; 2113 bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL); 2114 bool obsolete_usage = (posix2_version () < 200112); 2115 char *minus = "-", **files; 2116 char const *outfile = NULL; 2117 2118 initialize_main (&argc, &argv); 2119 program_name = (argv[0]?argv[0]:PROGRAM_NAME);; 2120 setlocale (LC_ALL, ""); 2121 bindtextdomain (PACKAGE, LOCALEDIR); 2122 textdomain (PACKAGE); 2123 2124 atexit (cleanup); 2125 2126 initialize_exit_failure (SORT_FAILURE); 2127 atexit (close_stdout); 2128 2129 hard_LC_COLLATE = hard_locale (LC_COLLATE); 2130#if HAVE_NL_LANGINFO 2131 hard_LC_TIME = hard_locale (LC_TIME); 2132#endif 2133 2134 /* Get locale's representation of the decimal point. */ 2135 { 2136 struct lconv const *locale = localeconv (); 2137 2138 /* If the locale doesn't define a decimal point, or if the decimal 2139 point is multibyte, use the C locale's decimal point. FIXME: 2140 add support for multibyte decimal points. */ 2141 decimal_point = to_uchar (locale->decimal_point[0]); 2142 if (! decimal_point || locale->decimal_point[1]) 2143 decimal_point = '.'; 2144 2145 /* FIXME: add support for multibyte thousands separators. */ 2146 thousands_sep = to_uchar (*locale->thousands_sep); 2147 if (! thousands_sep || locale->thousands_sep[1]) 2148 thousands_sep = -1; 2149 } 2150 2151 have_read_stdin = false; 2152 inittables (); 2153 2154 { 2155 size_t i; 2156 static int const sig[] = { SIGHUP, SIGINT, SIGPIPE, SIGTERM }; 2157 enum { nsigs = sizeof sig / sizeof sig[0] }; 2158 2159#if SA_NOCLDSTOP 2160 struct sigaction act; 2161 2162 sigemptyset (&caught_signals); 2163 for (i = 0; i < nsigs; i++) 2164 { 2165 sigaction (sig[i], NULL, &act); 2166 if (act.sa_handler != SIG_IGN) 2167 sigaddset (&caught_signals, sig[i]); 2168 } 2169 2170 act.sa_handler = sighandler; 2171 act.sa_mask = caught_signals; 2172 act.sa_flags = 0; 2173 2174 for (i = 0; i < nsigs; i++) 2175 if (sigismember (&caught_signals, sig[i])) 2176 sigaction (sig[i], &act, NULL); 2177#else 2178 for (i = 0; i < nsigs; i++) 2179 if (signal (sig[i], SIG_IGN) != SIG_IGN) 2180 { 2181 signal (sig[i], sighandler); 2182 siginterrupt (sig[i], 1); 2183 } 2184#endif 2185 } 2186 2187 gkey.sword = gkey.eword = SIZE_MAX; 2188 gkey.ignore = NULL; 2189 gkey.translate = NULL; 2190 gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = false; 2191 gkey.skipsblanks = gkey.skipeblanks = false; 2192 2193 files = xnmalloc (argc, sizeof *files); 2194 2195 for (;;) 2196 { 2197 /* Parse an operand as a file after "--" was seen; or if 2198 pedantic and a file was seen, unless the POSIX version 2199 predates 1003.1-2001 and -c was not seen and the operand is 2200 "-o FILE" or "-oFILE". */ 2201 2202 if (c == -1 2203 || (posixly_correct && nfiles != 0 2204 && ! (obsolete_usage 2205 && ! checkonly 2206 && optind != argc && argv[optind] 2207 && argv[optind][0] == '-' && argv[optind][1] == 'o' 2208 && (argv[optind][2] || optind + 1 != argc))) 2209 || ((c = getopt_long (argc, argv, short_options, 2210 long_options, NULL)) 2211 == -1)) 2212 { 2213 if (argc <= optind) 2214 break; 2215 files[nfiles++] = argv[optind++]; 2216 } 2217 else switch (c) 2218 { 2219 case 1: 2220 key = NULL; 2221 if (obsolete_usage && optarg[0] == '+') 2222 { 2223 /* Treat +POS1 [-POS2] as a key if possible; but silently 2224 treat an operand as a file if it is not a valid +POS1. */ 2225 key = new_key (); 2226 s = parse_field_count (optarg + 1, &key->sword, NULL); 2227 if (s && *s == '.') 2228 s = parse_field_count (s + 1, &key->schar, NULL); 2229 if (! (key->sword | key->schar)) 2230 key->sword = SIZE_MAX; 2231 if (! s || *set_ordering (s, key, bl_start)) 2232 { 2233 free (key); 2234 key = NULL; 2235 } 2236 else 2237 { 2238 if (optind != argc && argv[optind] && argv[optind][0] == '-' 2239 && ISDIGIT (argv[optind][1])) 2240 { 2241 char const *optarg1 = argv[optind++]; 2242 s = parse_field_count (optarg1 + 1, &key->eword, 2243 N_("invalid number after `-'")); 2244 if (*s == '.') 2245 s = parse_field_count (s + 1, &key->echar, 2246 N_("invalid number after `.'")); 2247 if (*set_ordering (s, key, bl_end)) 2248 badfieldspec (optarg1, 2249 N_("stray character in field spec")); 2250 } 2251 insertkey (key); 2252 } 2253 } 2254 if (! key) 2255 files[nfiles++] = optarg; 2256 break; 2257 2258 case 'b': 2259 case 'd': 2260 case 'f': 2261 case 'g': 2262 case 'i': 2263 case 'M': 2264 case 'n': 2265 case 'r': 2266 { 2267 char str[2]; 2268 str[0] = c; 2269 str[1] = '\0'; 2270 set_ordering (str, &gkey, bl_both); 2271 } 2272 break; 2273 2274 case 'c': 2275 checkonly = true; 2276 break; 2277 2278 case 'k': 2279 key = new_key (); 2280 2281 /* Get POS1. */ 2282 s = parse_field_count (optarg, &key->sword, 2283 N_("invalid number at field start")); 2284 if (! key->sword--) 2285 { 2286 /* Provoke with `sort -k0' */ 2287 badfieldspec (optarg, N_("field number is zero")); 2288 } 2289 if (*s == '.') 2290 { 2291 s = parse_field_count (s + 1, &key->schar, 2292 N_("invalid number after `.'")); 2293 if (! key->schar--) 2294 { 2295 /* Provoke with `sort -k1.0' */ 2296 badfieldspec (optarg, N_("character offset is zero")); 2297 } 2298 } 2299 if (! (key->sword | key->schar)) 2300 key->sword = SIZE_MAX; 2301 s = set_ordering (s, key, bl_start); 2302 if (*s != ',') 2303 { 2304 key->eword = SIZE_MAX; 2305 key->echar = 0; 2306 } 2307 else 2308 { 2309 /* Get POS2. */ 2310 s = parse_field_count (s + 1, &key->eword, 2311 N_("invalid number after `,'")); 2312 if (! key->eword--) 2313 { 2314 /* Provoke with `sort -k1,0' */ 2315 badfieldspec (optarg, N_("field number is zero")); 2316 } 2317 if (*s == '.') 2318 s = parse_field_count (s + 1, &key->echar, 2319 N_("invalid number after `.'")); 2320 else 2321 { 2322 /* `-k 2,3' is equivalent to `+1 -3'. */ 2323 key->eword++; 2324 } 2325 s = set_ordering (s, key, bl_end); 2326 } 2327 if (*s) 2328 badfieldspec (optarg, N_("stray character in field spec")); 2329 insertkey (key); 2330 break; 2331 2332 case 'm': 2333 mergeonly = true; 2334 break; 2335 2336 case 'o': 2337 if (outfile && !STREQ (outfile, optarg)) 2338 error (SORT_FAILURE, 0, _("multiple output files specified")); 2339 outfile = optarg; 2340 break; 2341 2342 case 's': 2343 stable = true; 2344 break; 2345 2346 case 'S': 2347 specify_sort_size (optarg); 2348 break; 2349 2350 case 't': 2351 { 2352 char newtab = optarg[0]; 2353 if (! newtab) 2354 error (SORT_FAILURE, 0, _("empty tab")); 2355 if (optarg[1]) 2356 { 2357 if (STREQ (optarg, "\\0")) 2358 newtab = '\0'; 2359 else 2360 { 2361 /* Provoke with `sort -txx'. Complain about 2362 "multi-character tab" instead of "multibyte tab", so 2363 that the diagnostic's wording does not need to be 2364 changed once multibyte characters are supported. */ 2365 error (SORT_FAILURE, 0, _("multi-character tab %s"), 2366 quote (optarg)); 2367 } 2368 } 2369 if (tab != TAB_DEFAULT && tab != newtab) 2370 error (SORT_FAILURE, 0, _("incompatible tabs")); 2371 tab = newtab; 2372 } 2373 break; 2374 2375 case 'T': 2376 add_temp_dir (optarg); 2377 break; 2378 2379 case 'u': 2380 unique = true; 2381 break; 2382 2383 case 'y': 2384 /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x 2385 through Solaris 7. It is also accepted by many non-Solaris 2386 "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5. 2387 -y is marked as obsolete starting with Solaris 8 (1999), but is 2388 still accepted as of Solaris 10 prerelease (2004). 2389 2390 Solaris 2.5.1 "sort -y 100" reads the input file "100", but 2391 emulate Solaris 8 and 9 "sort -y 100" which ignores the "100", 2392 and which in general ignores the argument after "-y" if it 2393 consists entirely of digits (it can even be empty). */ 2394 if (optarg == argv[optind - 1]) 2395 { 2396 char const *p; 2397 for (p = optarg; ISDIGIT (*p); p++) 2398 continue; 2399 optind -= (*p != '\0'); 2400 } 2401 break; 2402 2403 case 'z': 2404 eolchar = 0; 2405 break; 2406 2407 case_GETOPT_HELP_CHAR; 2408 2409 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); 2410 2411 default: 2412 usage (SORT_FAILURE); 2413 } 2414 } 2415 2416 /* Inheritance of global options to individual keys. */ 2417 for (key = keylist; key; key = key->next) 2418 if (! (key->ignore || key->translate 2419 || (key->skipsblanks | key->reverse 2420 | key->skipeblanks | key->month | key->numeric 2421 | key->general_numeric))) 2422 { 2423 key->ignore = gkey.ignore; 2424 key->translate = gkey.translate; 2425 key->skipsblanks = gkey.skipsblanks; 2426 key->skipeblanks = gkey.skipeblanks; 2427 key->month = gkey.month; 2428 key->numeric = gkey.numeric; 2429 key->general_numeric = gkey.general_numeric; 2430 key->reverse = gkey.reverse; 2431 } 2432 2433 if (!keylist && (gkey.ignore || gkey.translate 2434 || (gkey.skipsblanks | gkey.skipeblanks | gkey.month 2435 | gkey.numeric | gkey.general_numeric))) 2436 insertkey (&gkey); 2437 reverse = gkey.reverse; 2438 2439 if (temp_dir_count == 0) 2440 { 2441 char const *tmp_dir = getenv ("TMPDIR"); 2442 add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR); 2443 } 2444 2445 if (nfiles == 0) 2446 { 2447 nfiles = 1; 2448 files = − 2449 } 2450 2451 if (checkonly) 2452 { 2453 if (nfiles > 1) 2454 { 2455 error (0, 0, _("extra operand %s not allowed with -c"), 2456 quote (files[1])); 2457 usage (SORT_FAILURE); 2458 } 2459 2460 /* POSIX requires that sort return 1 IFF invoked with -c and the 2461 input is not properly sorted. */ 2462 exit (check (files[0]) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER); 2463 } 2464 2465 if (mergeonly) 2466 merge (files, 0, nfiles, outfile); 2467 else 2468 sort (files, nfiles, outfile); 2469 2470 if (have_read_stdin && fclose (stdin) == EOF) 2471 die (_("close failed"), "-"); 2472 2473 exit (EXIT_SUCCESS); 2474} 2475