1235267Sgabor/*- 2235267Sgabor * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 3251245Sgabor * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 4235267Sgabor * All rights reserved. 5235267Sgabor * 6235267Sgabor * Redistribution and use in source and binary forms, with or without 7235267Sgabor * modification, are permitted provided that the following conditions 8235267Sgabor * are met: 9235267Sgabor * 1. Redistributions of source code must retain the above copyright 10235267Sgabor * notice, this list of conditions and the following disclaimer. 11235267Sgabor * 2. Redistributions in binary form must reproduce the above copyright 12235267Sgabor * notice, this list of conditions and the following disclaimer in the 13235267Sgabor * documentation and/or other materials provided with the distribution. 14235267Sgabor * 15235267Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16235267Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17235267Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18235267Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19235267Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20235267Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21235267Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22235267Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23235267Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24235267Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25235267Sgabor * SUCH DAMAGE. 26235267Sgabor */ 27235267Sgabor 28235267Sgabor#include <sys/cdefs.h> 29235267Sgabor__FBSDID("$FreeBSD$"); 30235267Sgabor 31235267Sgabor#include <sys/mman.h> 32235267Sgabor#include <sys/stat.h> 33235267Sgabor#include <sys/types.h> 34235267Sgabor#include <sys/queue.h> 35235267Sgabor 36235267Sgabor#include <err.h> 37235267Sgabor#include <fcntl.h> 38235267Sgabor#if defined(SORT_THREADS) 39235267Sgabor#include <pthread.h> 40235267Sgabor#endif 41235267Sgabor#include <semaphore.h> 42235267Sgabor#include <stdio.h> 43235267Sgabor#include <stdlib.h> 44235267Sgabor#include <string.h> 45235267Sgabor#include <unistd.h> 46235267Sgabor#include <wchar.h> 47235267Sgabor#include <wctype.h> 48235267Sgabor 49235267Sgabor#include "coll.h" 50235267Sgabor#include "file.h" 51235267Sgabor#include "radixsort.h" 52235267Sgabor 53235267Sgaborunsigned long long free_memory = 1000000; 54235267Sgaborunsigned long long available_free_memory = 1000000; 55235267Sgabor 56235987Sgaborbool use_mmap; 57235987Sgabor 58235267Sgaborconst char *tmpdir = "/var/tmp"; 59235435Sgaborconst char *compress_program; 60235267Sgabor 61235267Sgaborsize_t max_open_files = 16; 62235267Sgabor 63235267Sgabor/* 64235267Sgabor * How much space we read from file at once 65235267Sgabor */ 66235267Sgabor#define READ_CHUNK (4096) 67235267Sgabor 68235267Sgabor/* 69235267Sgabor * File reader structure 70235267Sgabor */ 71235267Sgaborstruct file_reader 72235267Sgabor{ 73235267Sgabor struct reader_buffer rb; 74235267Sgabor FILE *file; 75235267Sgabor char *fname; 76235267Sgabor unsigned char *buffer; 77235267Sgabor unsigned char *mmapaddr; 78235267Sgabor unsigned char *mmapptr; 79235267Sgabor size_t bsz; 80235267Sgabor size_t cbsz; 81235267Sgabor size_t mmapsize; 82235267Sgabor size_t strbeg; 83235267Sgabor int fd; 84235267Sgabor char elsymb; 85235267Sgabor}; 86235267Sgabor 87235267Sgabor/* 88235267Sgabor * Structure to be used in file merge process. 89235267Sgabor */ 90235267Sgaborstruct file_header 91235267Sgabor{ 92235267Sgabor struct file_reader *fr; 93235267Sgabor struct sort_list_item *si; /* current top line */ 94235267Sgabor size_t file_pos; 95235267Sgabor}; 96235267Sgabor 97235267Sgabor/* 98235267Sgabor * List elements of "cleanable" files list. 99235267Sgabor */ 100235267Sgaborstruct CLEANABLE_FILE 101235267Sgabor{ 102235267Sgabor char *fn; 103235267Sgabor LIST_ENTRY(CLEANABLE_FILE) files; 104235267Sgabor}; 105235267Sgabor 106235267Sgabor/* 107235267Sgabor * List header of "cleanable" files list. 108235267Sgabor */ 109235267Sgaborstatic LIST_HEAD(CLEANABLE_FILES,CLEANABLE_FILE) tmp_files; 110235267Sgabor 111235267Sgabor/* 112235267Sgabor * Semaphore to protect the tmp file list. 113235267Sgabor * We use semaphore here because it is signal-safe, according to POSIX. 114235267Sgabor * And semaphore does not require pthread library. 115235267Sgabor */ 116235267Sgaborstatic sem_t tmp_files_sem; 117235267Sgabor 118235267Sgaborstatic void mt_sort(struct sort_list *list, 119235267Sgabor int (*sort_func)(void *, size_t, size_t, 120235267Sgabor int (*)(const void *, const void *)), const char* fn); 121235267Sgabor 122235267Sgabor/* 123235267Sgabor * Init tmp files list 124235267Sgabor */ 125235267Sgaborvoid 126235267Sgaborinit_tmp_files(void) 127235267Sgabor{ 128235267Sgabor 129235267Sgabor LIST_INIT(&tmp_files); 130235267Sgabor sem_init(&tmp_files_sem, 0, 1); 131235267Sgabor} 132235267Sgabor 133235267Sgabor/* 134235267Sgabor * Save name of a tmp file for signal cleanup 135235267Sgabor */ 136235267Sgaborvoid 137235267Sgabortmp_file_atexit(const char *tmp_file) 138235267Sgabor{ 139235267Sgabor 140235267Sgabor if (tmp_file) { 141235267Sgabor sem_wait(&tmp_files_sem); 142235267Sgabor struct CLEANABLE_FILE *item = 143235267Sgabor sort_malloc(sizeof(struct CLEANABLE_FILE)); 144235267Sgabor item->fn = sort_strdup(tmp_file); 145235267Sgabor LIST_INSERT_HEAD(&tmp_files, item, files); 146235267Sgabor sem_post(&tmp_files_sem); 147235267Sgabor } 148235267Sgabor} 149235267Sgabor 150235267Sgabor/* 151235267Sgabor * Clear tmp files 152235267Sgabor */ 153235267Sgaborvoid 154235267Sgaborclear_tmp_files(void) 155235267Sgabor{ 156235267Sgabor struct CLEANABLE_FILE *item; 157235267Sgabor 158235267Sgabor sem_wait(&tmp_files_sem); 159235267Sgabor LIST_FOREACH(item,&tmp_files,files) { 160235267Sgabor if ((item) && (item->fn)) 161235267Sgabor unlink(item->fn); 162235267Sgabor } 163235267Sgabor sem_post(&tmp_files_sem); 164235267Sgabor} 165235267Sgabor 166235267Sgabor/* 167235267Sgabor * Check whether a file is a temporary file 168235267Sgabor */ 169235267Sgaborstatic bool 170235267Sgaborfile_is_tmp(const char* fn) 171235267Sgabor{ 172235267Sgabor struct CLEANABLE_FILE *item; 173235267Sgabor bool ret = false; 174235267Sgabor 175235267Sgabor if (fn) { 176235267Sgabor sem_wait(&tmp_files_sem); 177235267Sgabor LIST_FOREACH(item,&tmp_files,files) { 178235267Sgabor if ((item) && (item->fn)) 179235267Sgabor if (strcmp(item->fn, fn) == 0) { 180235267Sgabor ret = true; 181235267Sgabor break; 182235267Sgabor } 183235267Sgabor } 184235267Sgabor sem_post(&tmp_files_sem); 185235267Sgabor } 186235267Sgabor 187235267Sgabor return (ret); 188235267Sgabor} 189235267Sgabor 190235267Sgabor/* 191235267Sgabor * Read zero-terminated line from a file 192235267Sgabor */ 193235267Sgaborchar * 194235267Sgaborread_file0_line(struct file0_reader *f0r) 195235267Sgabor{ 196235267Sgabor size_t pos = 0; 197235267Sgabor int c; 198235267Sgabor 199235267Sgabor if ((f0r->f == NULL) || feof(f0r->f)) 200235267Sgabor return (NULL); 201235267Sgabor 202235267Sgabor if (f0r->current_line && f0r->current_sz > 0) 203235267Sgabor f0r->current_line[0] = 0; 204235267Sgabor 205235267Sgabor while (!feof(f0r->f)) { 206235267Sgabor c = fgetc(f0r->f); 207235267Sgabor if (feof(f0r->f) || (c == -1)) 208235267Sgabor break; 209235267Sgabor if ((pos + 1) >= f0r->current_sz) { 210235267Sgabor size_t newsz = (f0r->current_sz + 2) * 2; 211235267Sgabor f0r->current_line = sort_realloc(f0r->current_line, 212235267Sgabor newsz); 213235267Sgabor f0r->current_sz = newsz; 214235267Sgabor } 215235267Sgabor f0r->current_line[pos] = (char)c; 216235267Sgabor if (c == 0) 217235267Sgabor break; 218235267Sgabor else 219235267Sgabor f0r->current_line[pos + 1] = 0; 220235267Sgabor ++pos; 221235267Sgabor } 222235267Sgabor 223235267Sgabor return f0r->current_line; 224235267Sgabor} 225235267Sgabor 226235267Sgabor/* 227235267Sgabor * Generate new temporary file name 228235267Sgabor */ 229235267Sgaborchar * 230235267Sgabornew_tmp_file_name(void) 231235267Sgabor{ 232242430Sgabor static size_t tfcounter = 0; 233235267Sgabor static const char *fn = ".bsdsort."; 234235267Sgabor char *ret; 235235267Sgabor size_t sz; 236235267Sgabor 237235267Sgabor sz = strlen(tmpdir) + 1 + strlen(fn) + 32 + 1; 238235267Sgabor ret = sort_malloc(sz); 239235267Sgabor 240242430Sgabor sprintf(ret, "%s/%s%d.%lu", tmpdir, fn, (int) getpid(), (unsigned long)(tfcounter++)); 241235267Sgabor tmp_file_atexit(ret); 242235267Sgabor return (ret); 243235267Sgabor} 244235267Sgabor 245235267Sgabor/* 246235267Sgabor * Initialize file list 247235267Sgabor */ 248235267Sgaborvoid 249235267Sgaborfile_list_init(struct file_list *fl, bool tmp) 250235267Sgabor{ 251235267Sgabor 252235267Sgabor if (fl) { 253235267Sgabor fl->count = 0; 254235267Sgabor fl->sz = 0; 255235267Sgabor fl->fns = NULL; 256235267Sgabor fl->tmp = tmp; 257235267Sgabor } 258235267Sgabor} 259235267Sgabor 260235267Sgabor/* 261235267Sgabor * Add a file name to the list 262235267Sgabor */ 263235267Sgaborvoid 264235267Sgaborfile_list_add(struct file_list *fl, char *fn, bool allocate) 265235267Sgabor{ 266235267Sgabor 267235267Sgabor if (fl && fn) { 268235267Sgabor if (fl->count >= fl->sz || (fl->fns == NULL)) { 269235267Sgabor fl->sz = (fl->sz) * 2 + 1; 270235267Sgabor fl->fns = sort_realloc(fl->fns, fl->sz * 271235267Sgabor sizeof(char *)); 272235267Sgabor } 273235267Sgabor fl->fns[fl->count] = allocate ? sort_strdup(fn) : fn; 274235267Sgabor fl->count += 1; 275235267Sgabor } 276235267Sgabor} 277235267Sgabor 278235267Sgabor/* 279235267Sgabor * Populate file list from array of file names 280235267Sgabor */ 281235267Sgaborvoid 282235267Sgaborfile_list_populate(struct file_list *fl, int argc, char **argv, bool allocate) 283235267Sgabor{ 284235267Sgabor 285235267Sgabor if (fl && argv) { 286235267Sgabor int i; 287235267Sgabor 288235267Sgabor for (i = 0; i < argc; i++) 289235267Sgabor file_list_add(fl, argv[i], allocate); 290235267Sgabor } 291235267Sgabor} 292235267Sgabor 293235267Sgabor/* 294235267Sgabor * Clean file list data and delete the files, 295235267Sgabor * if this is a list of temporary files 296235267Sgabor */ 297235267Sgaborvoid 298235267Sgaborfile_list_clean(struct file_list *fl) 299235267Sgabor{ 300235267Sgabor 301235267Sgabor if (fl) { 302235267Sgabor if (fl->fns) { 303242430Sgabor size_t i; 304235267Sgabor 305235267Sgabor for (i = 0; i < fl->count; i++) { 306235267Sgabor if (fl->fns[i]) { 307235267Sgabor if (fl->tmp) 308235267Sgabor unlink(fl->fns[i]); 309235267Sgabor sort_free(fl->fns[i]); 310235267Sgabor fl->fns[i] = 0; 311235267Sgabor } 312235267Sgabor } 313235267Sgabor sort_free(fl->fns); 314235267Sgabor fl->fns = NULL; 315235267Sgabor } 316235267Sgabor fl->sz = 0; 317235267Sgabor fl->count = 0; 318235267Sgabor fl->tmp = false; 319235267Sgabor } 320235267Sgabor} 321235267Sgabor 322235267Sgabor/* 323235267Sgabor * Init sort list 324235267Sgabor */ 325235267Sgaborvoid 326235267Sgaborsort_list_init(struct sort_list *l) 327235267Sgabor{ 328235267Sgabor 329235267Sgabor if (l) { 330235267Sgabor l->count = 0; 331235267Sgabor l->size = 0; 332235267Sgabor l->memsize = sizeof(struct sort_list); 333235267Sgabor l->list = NULL; 334235267Sgabor } 335235267Sgabor} 336235267Sgabor 337235267Sgabor/* 338235267Sgabor * Add string to sort list 339235267Sgabor */ 340235267Sgaborvoid 341235267Sgaborsort_list_add(struct sort_list *l, struct bwstring *str) 342235267Sgabor{ 343235267Sgabor 344235267Sgabor if (l && str) { 345235267Sgabor size_t indx = l->count; 346235267Sgabor 347235267Sgabor if ((l->list == NULL) || (indx >= l->size)) { 348242430Sgabor size_t newsize = (l->size + 1) + 1024; 349235267Sgabor 350235267Sgabor l->list = sort_realloc(l->list, 351235267Sgabor sizeof(struct sort_list_item*) * newsize); 352235267Sgabor l->memsize += (newsize - l->size) * 353235267Sgabor sizeof(struct sort_list_item*); 354235267Sgabor l->size = newsize; 355235267Sgabor } 356235267Sgabor l->list[indx] = sort_list_item_alloc(); 357235267Sgabor sort_list_item_set(l->list[indx], str); 358235267Sgabor l->memsize += sort_list_item_size(l->list[indx]); 359235267Sgabor l->count += 1; 360235267Sgabor } 361235267Sgabor} 362235267Sgabor 363235267Sgabor/* 364235267Sgabor * Clean sort list data 365235267Sgabor */ 366235267Sgaborvoid 367235267Sgaborsort_list_clean(struct sort_list *l) 368235267Sgabor{ 369235267Sgabor 370235267Sgabor if (l) { 371235267Sgabor if (l->list) { 372235267Sgabor size_t i; 373235267Sgabor 374235267Sgabor for (i = 0; i < l->count; i++) { 375235267Sgabor struct sort_list_item *item; 376235267Sgabor 377235267Sgabor item = l->list[i]; 378235267Sgabor 379235267Sgabor if (item) { 380235267Sgabor sort_list_item_clean(item); 381235267Sgabor sort_free(item); 382235267Sgabor l->list[i] = NULL; 383235267Sgabor } 384235267Sgabor } 385235267Sgabor sort_free(l->list); 386235267Sgabor l->list = NULL; 387235267Sgabor } 388235267Sgabor l->count = 0; 389235267Sgabor l->size = 0; 390235267Sgabor l->memsize = sizeof(struct sort_list); 391235267Sgabor } 392235267Sgabor} 393235267Sgabor 394235267Sgabor/* 395235267Sgabor * Write sort list to file 396235267Sgabor */ 397235267Sgaborvoid 398235267Sgaborsort_list_dump(struct sort_list *l, const char *fn) 399235267Sgabor{ 400235267Sgabor 401235267Sgabor if (l && fn) { 402235267Sgabor FILE *f; 403235267Sgabor 404235267Sgabor f = openfile(fn, "w"); 405235267Sgabor if (f == NULL) 406235267Sgabor err(2, NULL); 407235267Sgabor 408235267Sgabor if (l->list) { 409235267Sgabor size_t i; 410235987Sgabor if (!(sort_opts_vals.uflag)) { 411235987Sgabor for (i = 0; i < l->count; ++i) 412235987Sgabor bwsfwrite(l->list[i]->str, f, 413235987Sgabor sort_opts_vals.zflag); 414235987Sgabor } else { 415235987Sgabor struct sort_list_item *last_printed_item = NULL; 416235267Sgabor struct sort_list_item *item; 417235987Sgabor for (i = 0; i < l->count; ++i) { 418235987Sgabor item = l->list[i]; 419235987Sgabor if ((last_printed_item == NULL) || 420235987Sgabor list_coll(&last_printed_item, &item)) { 421235987Sgabor bwsfwrite(item->str, f, sort_opts_vals.zflag); 422235267Sgabor last_printed_item = item; 423235987Sgabor } 424235267Sgabor } 425235267Sgabor } 426235267Sgabor } 427235267Sgabor 428235267Sgabor closefile(f, fn); 429235267Sgabor } 430235267Sgabor} 431235267Sgabor 432235267Sgabor/* 433235267Sgabor * Checks if the given file is sorted. Stops at the first disorder, 434235267Sgabor * prints the disordered line and returns 1. 435235267Sgabor */ 436235267Sgaborint 437235267Sgaborcheck(const char *fn) 438235267Sgabor{ 439235267Sgabor struct bwstring *s1, *s2, *s1disorder, *s2disorder; 440235267Sgabor struct file_reader *fr; 441235267Sgabor struct keys_array *ka1, *ka2; 442242430Sgabor int res; 443242430Sgabor size_t pos, posdisorder; 444235267Sgabor 445235267Sgabor s1 = s2 = s1disorder = s2disorder = NULL; 446235267Sgabor ka1 = ka2 = NULL; 447235267Sgabor 448235267Sgabor fr = file_reader_init(fn); 449235267Sgabor 450235267Sgabor res = 0; 451235267Sgabor pos = 1; 452235267Sgabor posdisorder = 1; 453235267Sgabor 454235267Sgabor if (fr == NULL) { 455235267Sgabor err(2, NULL); 456235267Sgabor goto end; 457235267Sgabor } 458235267Sgabor 459235267Sgabor s1 = file_reader_readline(fr); 460235267Sgabor if (s1 == NULL) 461235267Sgabor goto end; 462235267Sgabor 463235267Sgabor ka1 = keys_array_alloc(); 464235267Sgabor preproc(s1, ka1); 465235267Sgabor 466235267Sgabor s2 = file_reader_readline(fr); 467235267Sgabor if (s2 == NULL) 468235267Sgabor goto end; 469235267Sgabor 470235267Sgabor ka2 = keys_array_alloc(); 471235267Sgabor preproc(s2, ka2); 472235267Sgabor 473235267Sgabor for (;;) { 474235267Sgabor 475235267Sgabor if (debug_sort) { 476235267Sgabor bwsprintf(stdout, s2, "s1=<", ">"); 477235267Sgabor bwsprintf(stdout, s1, "s2=<", ">"); 478235267Sgabor } 479235267Sgabor int cmp = key_coll(ka2, ka1, 0); 480235267Sgabor if (debug_sort) 481235267Sgabor printf("; cmp1=%d", cmp); 482235267Sgabor 483235267Sgabor if (!cmp && sort_opts_vals.complex_sort && 484235267Sgabor !(sort_opts_vals.uflag) && !(sort_opts_vals.sflag)) { 485235267Sgabor cmp = top_level_str_coll(s2, s1); 486235267Sgabor if (debug_sort) 487235267Sgabor printf("; cmp2=%d", cmp); 488235267Sgabor } 489235267Sgabor if (debug_sort) 490235267Sgabor printf("\n"); 491235267Sgabor 492235267Sgabor if ((sort_opts_vals.uflag && (cmp <= 0)) || (cmp < 0)) { 493235267Sgabor if (!(sort_opts_vals.csilentflag)) { 494235267Sgabor s2disorder = bwsdup(s2); 495235267Sgabor posdisorder = pos; 496235267Sgabor if (debug_sort) 497235267Sgabor s1disorder = bwsdup(s1); 498235267Sgabor } 499235267Sgabor res = 1; 500235267Sgabor goto end; 501235267Sgabor } 502235267Sgabor 503235267Sgabor pos++; 504235267Sgabor 505235267Sgabor clean_keys_array(s1, ka1); 506235267Sgabor sort_free(ka1); 507235267Sgabor ka1 = ka2; 508235267Sgabor ka2 = NULL; 509235267Sgabor 510235267Sgabor bwsfree(s1); 511235267Sgabor s1 = s2; 512235267Sgabor 513235267Sgabor s2 = file_reader_readline(fr); 514235267Sgabor if (s2 == NULL) 515235267Sgabor goto end; 516235267Sgabor 517235267Sgabor ka2 = keys_array_alloc(); 518235267Sgabor preproc(s2, ka2); 519235267Sgabor } 520235267Sgabor 521235267Sgaborend: 522235267Sgabor if (ka1) { 523235267Sgabor clean_keys_array(s1, ka1); 524235267Sgabor sort_free(ka1); 525235267Sgabor } 526235267Sgabor 527235267Sgabor if (s1) 528235267Sgabor bwsfree(s1); 529235267Sgabor 530235267Sgabor if (ka2) { 531235267Sgabor clean_keys_array(s2, ka2); 532235267Sgabor sort_free(ka2); 533235267Sgabor } 534235267Sgabor 535235267Sgabor if (s2) 536235267Sgabor bwsfree(s2); 537235267Sgabor 538235267Sgabor if ((fn == NULL) || (*fn == 0) || (strcmp(fn, "-") == 0)) { 539235267Sgabor for (;;) { 540235267Sgabor s2 = file_reader_readline(fr); 541235267Sgabor if (s2 == NULL) 542235267Sgabor break; 543235267Sgabor bwsfree(s2); 544235267Sgabor } 545235267Sgabor } 546235267Sgabor 547235267Sgabor file_reader_free(fr); 548235267Sgabor 549235267Sgabor if (s2disorder) { 550235267Sgabor bws_disorder_warnx(s2disorder, fn, posdisorder); 551235267Sgabor if (s1disorder) { 552235267Sgabor bws_disorder_warnx(s1disorder, fn, posdisorder); 553235267Sgabor if (s1disorder != s2disorder) 554235267Sgabor bwsfree(s1disorder); 555235267Sgabor } 556235267Sgabor bwsfree(s2disorder); 557235267Sgabor s1disorder = NULL; 558235267Sgabor s2disorder = NULL; 559235267Sgabor } 560235267Sgabor 561235267Sgabor if (res) 562235267Sgabor exit(res); 563235267Sgabor 564235267Sgabor return (0); 565235267Sgabor} 566235267Sgabor 567235267Sgabor/* 568235267Sgabor * Opens a file. If the given filename is "-", stdout will be 569235267Sgabor * opened. 570235267Sgabor */ 571235267SgaborFILE * 572235267Sgaboropenfile(const char *fn, const char *mode) 573235267Sgabor{ 574235267Sgabor FILE *file; 575235267Sgabor 576235267Sgabor if (strcmp(fn, "-") == 0) { 577235267Sgabor return ((mode && mode[0] == 'r') ? stdin : stdout); 578235267Sgabor } else { 579235267Sgabor mode_t orig_file_mask = 0; 580235267Sgabor int is_tmp = file_is_tmp(fn); 581235267Sgabor 582235267Sgabor if (is_tmp && (mode[0] == 'w')) 583235267Sgabor orig_file_mask = umask(S_IWGRP | S_IWOTH | 584235267Sgabor S_IRGRP | S_IROTH); 585235267Sgabor 586235267Sgabor if (is_tmp && (compress_program != NULL)) { 587235267Sgabor char *cmd; 588235267Sgabor size_t cmdsz; 589235267Sgabor 590235267Sgabor cmdsz = strlen(fn) + 128; 591235267Sgabor cmd = sort_malloc(cmdsz); 592235267Sgabor 593235267Sgabor fflush(stdout); 594235267Sgabor 595235267Sgabor if (mode[0] == 'r') 596235267Sgabor snprintf(cmd, cmdsz - 1, "cat %s | %s -d", 597235267Sgabor fn, compress_program); 598235267Sgabor else if (mode[0] == 'w') 599235267Sgabor snprintf(cmd, cmdsz - 1, "%s > %s", 600235267Sgabor compress_program, fn); 601235267Sgabor else 602235432Sgabor err(2, "%s", getstr(7)); 603235267Sgabor 604235267Sgabor if ((file = popen(cmd, mode)) == NULL) 605235267Sgabor err(2, NULL); 606235267Sgabor 607235267Sgabor sort_free(cmd); 608235267Sgabor 609235267Sgabor } else 610235267Sgabor if ((file = fopen(fn, mode)) == NULL) 611235267Sgabor err(2, NULL); 612235267Sgabor 613235267Sgabor if (is_tmp && (mode[0] == 'w')) 614235267Sgabor umask(orig_file_mask); 615235267Sgabor } 616235267Sgabor 617235267Sgabor return (file); 618235267Sgabor} 619235267Sgabor 620235267Sgabor/* 621235267Sgabor * Close file 622235267Sgabor */ 623235267Sgaborvoid 624235267Sgaborclosefile(FILE *f, const char *fn) 625235267Sgabor{ 626235267Sgabor if (f == NULL) { 627235267Sgabor ; 628235267Sgabor } else if (f == stdin) { 629235267Sgabor ; 630235267Sgabor } else if (f == stdout) { 631235267Sgabor fflush(f); 632235267Sgabor } else { 633235267Sgabor if (file_is_tmp(fn) && compress_program != NULL) { 634235267Sgabor if(pclose(f)<0) 635235267Sgabor err(2,NULL); 636235267Sgabor } else 637235267Sgabor fclose(f); 638235267Sgabor } 639235267Sgabor} 640235267Sgabor 641235267Sgabor/* 642235267Sgabor * Reads a file into the internal buffer. 643235267Sgabor */ 644235267Sgaborstruct file_reader * 645235267Sgaborfile_reader_init(const char *fsrc) 646235267Sgabor{ 647235267Sgabor struct file_reader *ret; 648235267Sgabor 649235267Sgabor if (fsrc == NULL) 650235267Sgabor fsrc = "-"; 651235267Sgabor 652235267Sgabor ret = sort_malloc(sizeof(struct file_reader)); 653235267Sgabor memset(ret, 0, sizeof(struct file_reader)); 654235267Sgabor 655235267Sgabor ret->elsymb = '\n'; 656235267Sgabor if (sort_opts_vals.zflag) 657235267Sgabor ret->elsymb = 0; 658235267Sgabor 659235267Sgabor ret->fname = sort_strdup(fsrc); 660235267Sgabor 661235987Sgabor if (strcmp(fsrc, "-") && (compress_program == NULL) && use_mmap) { 662235267Sgabor 663235267Sgabor do { 664235267Sgabor struct stat stat_buf; 665235267Sgabor void *addr; 666235267Sgabor size_t sz = 0; 667235267Sgabor int fd, flags; 668235267Sgabor 669235267Sgabor flags = MAP_NOCORE | MAP_NOSYNC; 670235267Sgabor addr = MAP_FAILED; 671235267Sgabor 672235267Sgabor fd = open(fsrc, O_RDONLY); 673235267Sgabor if (fd < 0) 674235267Sgabor err(2, NULL); 675235267Sgabor 676235267Sgabor if (fstat(fd, &stat_buf) < 0) { 677235267Sgabor close(fd); 678235267Sgabor break; 679235267Sgabor } 680235267Sgabor 681235267Sgabor sz = stat_buf.st_size; 682235267Sgabor 683235267Sgabor#if defined(MAP_PREFAULT_READ) 684235267Sgabor flags |= MAP_PREFAULT_READ; 685235267Sgabor#endif 686235267Sgabor 687235267Sgabor addr = mmap(NULL, sz, PROT_READ, flags, fd, 0); 688235267Sgabor if (addr == MAP_FAILED) { 689235267Sgabor close(fd); 690235267Sgabor break; 691235267Sgabor } 692235267Sgabor 693235267Sgabor ret->fd = fd; 694235267Sgabor ret->mmapaddr = addr; 695235267Sgabor ret->mmapsize = sz; 696235267Sgabor ret->mmapptr = ret->mmapaddr; 697235267Sgabor 698235267Sgabor } while (0); 699235267Sgabor } 700235267Sgabor 701235267Sgabor if (ret->mmapaddr == NULL) { 702235267Sgabor ret->file = openfile(fsrc, "r"); 703235267Sgabor if (ret->file == NULL) 704235267Sgabor err(2, NULL); 705235267Sgabor 706235267Sgabor if (strcmp(fsrc, "-")) { 707235267Sgabor ret->cbsz = READ_CHUNK; 708235267Sgabor ret->buffer = sort_malloc(ret->cbsz); 709235267Sgabor ret->bsz = 0; 710235267Sgabor ret->strbeg = 0; 711235267Sgabor 712235267Sgabor ret->bsz = fread(ret->buffer, 1, ret->cbsz, ret->file); 713235267Sgabor if (ret->bsz == 0) { 714235267Sgabor if (ferror(ret->file)) 715235267Sgabor err(2, NULL); 716235267Sgabor } 717235267Sgabor } 718235267Sgabor } 719235267Sgabor 720235267Sgabor return (ret); 721235267Sgabor} 722235267Sgabor 723235267Sgaborstruct bwstring * 724235267Sgaborfile_reader_readline(struct file_reader *fr) 725235267Sgabor{ 726235267Sgabor struct bwstring *ret = NULL; 727235267Sgabor 728235267Sgabor if (fr->mmapaddr) { 729235267Sgabor unsigned char *mmapend; 730235267Sgabor 731235267Sgabor mmapend = fr->mmapaddr + fr->mmapsize; 732235267Sgabor if (fr->mmapptr >= mmapend) 733235267Sgabor return (NULL); 734235267Sgabor else { 735235267Sgabor unsigned char *strend; 736235267Sgabor size_t sz; 737235267Sgabor 738235267Sgabor sz = mmapend - fr->mmapptr; 739235267Sgabor strend = memchr(fr->mmapptr, fr->elsymb, sz); 740235267Sgabor 741235267Sgabor if (strend == NULL) { 742235267Sgabor ret = bwscsbdup(fr->mmapptr, sz); 743235267Sgabor fr->mmapptr = mmapend; 744235267Sgabor } else { 745235267Sgabor ret = bwscsbdup(fr->mmapptr, strend - 746235267Sgabor fr->mmapptr); 747235267Sgabor fr->mmapptr = strend + 1; 748235267Sgabor } 749235267Sgabor } 750235267Sgabor 751235267Sgabor } else if (fr->file != stdin) { 752235267Sgabor unsigned char *strend; 753235267Sgabor size_t bsz1, remsz, search_start; 754235267Sgabor 755235267Sgabor search_start = 0; 756235267Sgabor remsz = 0; 757235267Sgabor strend = NULL; 758235267Sgabor 759235267Sgabor if (fr->bsz > fr->strbeg) 760235267Sgabor remsz = fr->bsz - fr->strbeg; 761235267Sgabor 762235267Sgabor /* line read cycle */ 763235267Sgabor for (;;) { 764235267Sgabor if (remsz > search_start) 765235267Sgabor strend = memchr(fr->buffer + fr->strbeg + 766235267Sgabor search_start, fr->elsymb, remsz - 767235267Sgabor search_start); 768235267Sgabor else 769235267Sgabor strend = NULL; 770235267Sgabor 771235267Sgabor if (strend) 772235267Sgabor break; 773235267Sgabor if (feof(fr->file)) 774235267Sgabor break; 775235267Sgabor 776235267Sgabor if (fr->bsz != fr->cbsz) 777235267Sgabor /* NOTREACHED */ 778235267Sgabor err(2, "File read software error 1"); 779235267Sgabor 780235267Sgabor if (remsz > (READ_CHUNK >> 1)) { 781235267Sgabor search_start = fr->cbsz - fr->strbeg; 782235267Sgabor fr->cbsz += READ_CHUNK; 783235267Sgabor fr->buffer = sort_realloc(fr->buffer, 784235267Sgabor fr->cbsz); 785235267Sgabor bsz1 = fread(fr->buffer + fr->bsz, 1, 786235267Sgabor READ_CHUNK, fr->file); 787235267Sgabor if (bsz1 == 0) { 788235267Sgabor if (ferror(fr->file)) 789235267Sgabor err(2, NULL); 790235267Sgabor break; 791235267Sgabor } 792235267Sgabor fr->bsz += bsz1; 793235267Sgabor remsz += bsz1; 794235267Sgabor } else { 795235267Sgabor if (remsz > 0 && fr->strbeg>0) 796235267Sgabor bcopy(fr->buffer + fr->strbeg, 797235267Sgabor fr->buffer, remsz); 798235267Sgabor 799235267Sgabor fr->strbeg = 0; 800235267Sgabor search_start = remsz; 801235267Sgabor bsz1 = fread(fr->buffer + remsz, 1, 802235267Sgabor fr->cbsz - remsz, fr->file); 803235267Sgabor if (bsz1 == 0) { 804235267Sgabor if (ferror(fr->file)) 805235267Sgabor err(2, NULL); 806235267Sgabor break; 807235267Sgabor } 808235267Sgabor fr->bsz = remsz + bsz1; 809235267Sgabor remsz = fr->bsz; 810235267Sgabor } 811235267Sgabor } 812235267Sgabor 813235267Sgabor if (strend == NULL) 814235267Sgabor strend = fr->buffer + fr->bsz; 815235267Sgabor 816235267Sgabor if ((fr->buffer + fr->strbeg <= strend) && 817235267Sgabor (fr->strbeg < fr->bsz) && (remsz>0)) 818235267Sgabor ret = bwscsbdup(fr->buffer + fr->strbeg, strend - 819235267Sgabor fr->buffer - fr->strbeg); 820235267Sgabor 821235267Sgabor fr->strbeg = (strend - fr->buffer) + 1; 822235267Sgabor 823235267Sgabor } else { 824235267Sgabor size_t len = 0; 825235267Sgabor 826235267Sgabor ret = bwsfgetln(fr->file, &len, sort_opts_vals.zflag, 827235267Sgabor &(fr->rb)); 828235267Sgabor } 829235267Sgabor 830235267Sgabor return (ret); 831235267Sgabor} 832235267Sgabor 833235267Sgaborstatic void 834235267Sgaborfile_reader_clean(struct file_reader *fr) 835235267Sgabor{ 836235267Sgabor 837235267Sgabor if (fr) { 838235267Sgabor if (fr->mmapaddr) 839235267Sgabor munmap(fr->mmapaddr, fr->mmapsize); 840235267Sgabor 841235267Sgabor if (fr->fd) 842235267Sgabor close(fr->fd); 843235267Sgabor 844235267Sgabor if (fr->buffer) 845235267Sgabor sort_free(fr->buffer); 846235267Sgabor 847235267Sgabor if (fr->file) 848235267Sgabor if (fr->file != stdin) 849235267Sgabor closefile(fr->file, fr->fname); 850235267Sgabor 851235267Sgabor if(fr->fname) 852235267Sgabor sort_free(fr->fname); 853235267Sgabor 854235267Sgabor memset(fr, 0, sizeof(struct file_reader)); 855235267Sgabor } 856235267Sgabor} 857235267Sgabor 858235267Sgaborvoid 859235267Sgaborfile_reader_free(struct file_reader *fr) 860235267Sgabor{ 861235267Sgabor 862235267Sgabor if (fr) { 863235267Sgabor file_reader_clean(fr); 864235267Sgabor sort_free(fr); 865235267Sgabor } 866235267Sgabor} 867235267Sgabor 868235267Sgaborint 869235267Sgaborprocfile(const char *fsrc, struct sort_list *list, struct file_list *fl) 870235267Sgabor{ 871235267Sgabor struct file_reader *fr; 872235267Sgabor 873235267Sgabor fr = file_reader_init(fsrc); 874235267Sgabor if (fr == NULL) 875235267Sgabor err(2, NULL); 876235267Sgabor 877235267Sgabor /* file browse cycle */ 878235267Sgabor for (;;) { 879235267Sgabor struct bwstring *bws; 880235267Sgabor 881235267Sgabor bws = file_reader_readline(fr); 882235267Sgabor 883235267Sgabor if (bws == NULL) 884235267Sgabor break; 885235267Sgabor 886235267Sgabor sort_list_add(list, bws); 887235267Sgabor 888235267Sgabor if (list->memsize >= available_free_memory) { 889235267Sgabor char *fn; 890235267Sgabor 891235267Sgabor fn = new_tmp_file_name(); 892235267Sgabor sort_list_to_file(list, fn); 893235267Sgabor file_list_add(fl, fn, false); 894235267Sgabor sort_list_clean(list); 895235267Sgabor } 896235267Sgabor } 897235267Sgabor 898235267Sgabor file_reader_free(fr); 899235267Sgabor 900235267Sgabor return (0); 901235267Sgabor} 902235267Sgabor 903235267Sgabor/* 904235267Sgabor * Compare file headers. Files with EOF always go to the end of the list. 905235267Sgabor */ 906235267Sgaborstatic int 907235267Sgaborfile_header_cmp(struct file_header *f1, struct file_header *f2) 908235267Sgabor{ 909235267Sgabor 910235267Sgabor if (f1 == f2) 911235267Sgabor return (0); 912235267Sgabor else { 913235267Sgabor if (f1->fr == NULL) { 914235267Sgabor return ((f2->fr == NULL) ? 0 : +1); 915235267Sgabor } else if (f2->fr == NULL) 916235267Sgabor return (-1); 917235267Sgabor else { 918235267Sgabor int ret; 919235267Sgabor 920235267Sgabor ret = list_coll(&(f1->si), &(f2->si)); 921235267Sgabor if (!ret) 922235267Sgabor return ((f1->file_pos < f2->file_pos) ? -1 : +1); 923235267Sgabor return (ret); 924235267Sgabor } 925235267Sgabor } 926235267Sgabor} 927235267Sgabor 928235267Sgabor/* 929235267Sgabor * Allocate and init file header structure 930235267Sgabor */ 931235267Sgaborstatic void 932235267Sgaborfile_header_init(struct file_header **fh, const char *fn, size_t file_pos) 933235267Sgabor{ 934235267Sgabor 935235267Sgabor if (fh && fn) { 936235267Sgabor struct bwstring *line; 937235267Sgabor 938235267Sgabor *fh = sort_malloc(sizeof(struct file_header)); 939235267Sgabor (*fh)->file_pos = file_pos; 940235267Sgabor (*fh)->fr = file_reader_init(fn); 941235267Sgabor if ((*fh)->fr == NULL) { 942235267Sgabor perror(fn); 943235432Sgabor err(2, "%s", getstr(8)); 944235267Sgabor } 945235267Sgabor line = file_reader_readline((*fh)->fr); 946235267Sgabor if (line == NULL) { 947235267Sgabor file_reader_free((*fh)->fr); 948235267Sgabor (*fh)->fr = NULL; 949235267Sgabor (*fh)->si = NULL; 950235267Sgabor } else { 951235267Sgabor (*fh)->si = sort_list_item_alloc(); 952235267Sgabor sort_list_item_set((*fh)->si, line); 953235267Sgabor } 954235267Sgabor } 955235267Sgabor} 956235267Sgabor 957235267Sgabor/* 958235267Sgabor * Close file 959235267Sgabor */ 960235267Sgaborstatic void 961235267Sgaborfile_header_close(struct file_header **fh) 962235267Sgabor{ 963235267Sgabor 964235267Sgabor if (fh && *fh) { 965235267Sgabor if ((*fh)->fr) { 966235267Sgabor file_reader_free((*fh)->fr); 967235267Sgabor (*fh)->fr = NULL; 968235267Sgabor } 969235267Sgabor if ((*fh)->si) { 970235267Sgabor sort_list_item_clean((*fh)->si); 971235267Sgabor sort_free((*fh)->si); 972235267Sgabor (*fh)->si = NULL; 973235267Sgabor } 974235267Sgabor sort_free(*fh); 975235267Sgabor *fh = NULL; 976235267Sgabor } 977235267Sgabor} 978235267Sgabor 979235267Sgabor/* 980235267Sgabor * Swap two array elements 981235267Sgabor */ 982235267Sgaborstatic void 983242430Sgaborfile_header_swap(struct file_header **fh, size_t i1, size_t i2) 984235267Sgabor{ 985235267Sgabor struct file_header *tmp; 986235267Sgabor 987235267Sgabor tmp = fh[i1]; 988235267Sgabor fh[i1] = fh[i2]; 989235267Sgabor fh[i2] = tmp; 990235267Sgabor} 991235267Sgabor 992235267Sgabor/* heap algorithm ==>> */ 993235267Sgabor 994235267Sgabor/* 995235267Sgabor * See heap sort algorithm 996235267Sgabor * "Raises" last element to its right place 997235267Sgabor */ 998235267Sgaborstatic void 999242430Sgaborfile_header_heap_swim(struct file_header **fh, size_t indx) 1000235267Sgabor{ 1001235267Sgabor 1002235267Sgabor if (indx > 0) { 1003242430Sgabor size_t parent_index; 1004235267Sgabor 1005235267Sgabor parent_index = (indx - 1) >> 1; 1006235267Sgabor 1007235267Sgabor if (file_header_cmp(fh[indx], fh[parent_index]) < 0) { 1008235267Sgabor /* swap child and parent and continue */ 1009235267Sgabor file_header_swap(fh, indx, parent_index); 1010235267Sgabor file_header_heap_swim(fh, parent_index); 1011235267Sgabor } 1012235267Sgabor } 1013235267Sgabor} 1014235267Sgabor 1015235267Sgabor/* 1016235267Sgabor * Sink the top element to its correct position 1017235267Sgabor */ 1018235267Sgaborstatic void 1019242430Sgaborfile_header_heap_sink(struct file_header **fh, size_t indx, size_t size) 1020235267Sgabor{ 1021242430Sgabor size_t left_child_index; 1022242430Sgabor size_t right_child_index; 1023235267Sgabor 1024235267Sgabor left_child_index = indx + indx + 1; 1025235267Sgabor right_child_index = left_child_index + 1; 1026235267Sgabor 1027235267Sgabor if (left_child_index < size) { 1028242430Sgabor size_t min_child_index; 1029235267Sgabor 1030235267Sgabor min_child_index = left_child_index; 1031235267Sgabor 1032235267Sgabor if ((right_child_index < size) && 1033235267Sgabor (file_header_cmp(fh[left_child_index], 1034235267Sgabor fh[right_child_index]) > 0)) 1035235267Sgabor min_child_index = right_child_index; 1036235267Sgabor if (file_header_cmp(fh[indx], fh[min_child_index]) > 0) { 1037235267Sgabor file_header_swap(fh, indx, min_child_index); 1038235267Sgabor file_header_heap_sink(fh, min_child_index, size); 1039235267Sgabor } 1040235267Sgabor } 1041235267Sgabor} 1042235267Sgabor 1043235267Sgabor/* <<== heap algorithm */ 1044235267Sgabor 1045235267Sgabor/* 1046235267Sgabor * Adds element to the "left" end 1047235267Sgabor */ 1048235267Sgaborstatic void 1049242430Sgaborfile_header_list_rearrange_from_header(struct file_header **fh, size_t size) 1050235267Sgabor{ 1051235267Sgabor 1052235267Sgabor file_header_heap_sink(fh, 0, size); 1053235267Sgabor} 1054235267Sgabor 1055235267Sgabor/* 1056235267Sgabor * Adds element to the "right" end 1057235267Sgabor */ 1058235267Sgaborstatic void 1059242430Sgaborfile_header_list_push(struct file_header *f, struct file_header **fh, size_t size) 1060235267Sgabor{ 1061235267Sgabor 1062235267Sgabor fh[size++] = f; 1063235267Sgabor file_header_heap_swim(fh, size - 1); 1064235267Sgabor} 1065235267Sgabor 1066235267Sgaborstruct last_printed 1067235267Sgabor{ 1068235267Sgabor struct bwstring *str; 1069235267Sgabor}; 1070235267Sgabor 1071235267Sgabor/* 1072235267Sgabor * Prints the current line of the file 1073235267Sgabor */ 1074235267Sgaborstatic void 1075235267Sgaborfile_header_print(struct file_header *fh, FILE *f_out, struct last_printed *lp) 1076235267Sgabor{ 1077235267Sgabor 1078235267Sgabor if (fh && fh->fr && f_out && fh->si && fh->si->str) { 1079235267Sgabor if (sort_opts_vals.uflag) { 1080235267Sgabor if ((lp->str == NULL) || (str_list_coll(lp->str, &(fh->si)))) { 1081235267Sgabor bwsfwrite(fh->si->str, f_out, sort_opts_vals.zflag); 1082235267Sgabor if (lp->str) 1083235267Sgabor bwsfree(lp->str); 1084235267Sgabor lp->str = bwsdup(fh->si->str); 1085235267Sgabor } 1086235267Sgabor } else 1087235267Sgabor bwsfwrite(fh->si->str, f_out, sort_opts_vals.zflag); 1088235267Sgabor } 1089235267Sgabor} 1090235267Sgabor 1091235267Sgabor/* 1092235267Sgabor * Read next line 1093235267Sgabor */ 1094235267Sgaborstatic void 1095235267Sgaborfile_header_read_next(struct file_header *fh) 1096235267Sgabor{ 1097235267Sgabor 1098235267Sgabor if (fh && fh->fr) { 1099235267Sgabor struct bwstring *tmp; 1100235267Sgabor 1101235267Sgabor tmp = file_reader_readline(fh->fr); 1102235267Sgabor if (tmp == NULL) { 1103235267Sgabor file_reader_free(fh->fr); 1104235267Sgabor fh->fr = NULL; 1105235267Sgabor if (fh->si) { 1106235267Sgabor sort_list_item_clean(fh->si); 1107235267Sgabor sort_free(fh->si); 1108235267Sgabor fh->si = NULL; 1109235267Sgabor } 1110235267Sgabor } else { 1111235267Sgabor if (fh->si == NULL) 1112235267Sgabor fh->si = sort_list_item_alloc(); 1113235267Sgabor sort_list_item_set(fh->si, tmp); 1114235267Sgabor } 1115235267Sgabor } 1116235267Sgabor} 1117235267Sgabor 1118235267Sgabor/* 1119235267Sgabor * Merge array of "files headers" 1120235267Sgabor */ 1121235267Sgaborstatic void 1122242430Sgaborfile_headers_merge(size_t fnum, struct file_header **fh, FILE *f_out) 1123235267Sgabor{ 1124235267Sgabor struct last_printed lp; 1125242430Sgabor size_t i; 1126235267Sgabor 1127235267Sgabor memset(&lp, 0, sizeof(lp)); 1128235267Sgabor 1129235267Sgabor /* 1130235267Sgabor * construct the initial sort structure 1131235267Sgabor */ 1132235267Sgabor for (i = 0; i < fnum; i++) 1133235267Sgabor file_header_list_push(fh[i], fh, i); 1134235267Sgabor 1135235267Sgabor while (fh[0]->fr) { /* unfinished files are always in front */ 1136235267Sgabor /* output the smallest line: */ 1137235267Sgabor file_header_print(fh[0], f_out, &lp); 1138235267Sgabor /* read a new line, if possible: */ 1139235267Sgabor file_header_read_next(fh[0]); 1140235267Sgabor /* re-arrange the list: */ 1141235267Sgabor file_header_list_rearrange_from_header(fh, fnum); 1142235267Sgabor } 1143235267Sgabor 1144235267Sgabor if (lp.str) 1145235267Sgabor bwsfree(lp.str); 1146235267Sgabor} 1147235267Sgabor 1148235267Sgabor/* 1149235267Sgabor * Merges the given files into the output file, which can be 1150235267Sgabor * stdout. 1151235267Sgabor */ 1152235267Sgaborstatic void 1153242430Sgabormerge_files_array(size_t argc, char **argv, const char *fn_out) 1154235267Sgabor{ 1155235267Sgabor 1156235267Sgabor if (argv && fn_out) { 1157235267Sgabor struct file_header **fh; 1158235267Sgabor FILE *f_out; 1159242430Sgabor size_t i; 1160235267Sgabor 1161235267Sgabor f_out = openfile(fn_out, "w"); 1162235267Sgabor 1163235267Sgabor if (f_out == NULL) 1164235267Sgabor err(2, NULL); 1165235267Sgabor 1166235267Sgabor fh = sort_malloc((argc + 1) * sizeof(struct file_header *)); 1167235267Sgabor 1168235267Sgabor for (i = 0; i < argc; i++) 1169235267Sgabor file_header_init(fh + i, argv[i], (size_t) i); 1170235267Sgabor 1171235267Sgabor file_headers_merge(argc, fh, f_out); 1172235267Sgabor 1173235267Sgabor for (i = 0; i < argc; i++) 1174235267Sgabor file_header_close(fh + i); 1175235267Sgabor 1176235267Sgabor sort_free(fh); 1177235267Sgabor 1178235267Sgabor closefile(f_out, fn_out); 1179235267Sgabor } 1180235267Sgabor} 1181235267Sgabor 1182235267Sgabor/* 1183235267Sgabor * Shrinks the file list until its size smaller than max number of opened files 1184235267Sgabor */ 1185235267Sgaborstatic int 1186235267Sgaborshrink_file_list(struct file_list *fl) 1187235267Sgabor{ 1188235267Sgabor 1189235267Sgabor if ((fl == NULL) || (size_t) (fl->count) < max_open_files) 1190235267Sgabor return (0); 1191235267Sgabor else { 1192235267Sgabor struct file_list new_fl; 1193242430Sgabor size_t indx = 0; 1194235267Sgabor 1195235267Sgabor file_list_init(&new_fl, true); 1196235267Sgabor while (indx < fl->count) { 1197235267Sgabor char *fnew; 1198242430Sgabor size_t num; 1199235267Sgabor 1200235267Sgabor num = fl->count - indx; 1201235267Sgabor fnew = new_tmp_file_name(); 1202235267Sgabor 1203235267Sgabor if ((size_t) num >= max_open_files) 1204235267Sgabor num = max_open_files - 1; 1205235267Sgabor merge_files_array(num, fl->fns + indx, fnew); 1206235267Sgabor if (fl->tmp) { 1207242430Sgabor size_t i; 1208235267Sgabor 1209235267Sgabor for (i = 0; i < num; i++) 1210235267Sgabor unlink(fl->fns[indx + i]); 1211235267Sgabor } 1212235267Sgabor file_list_add(&new_fl, fnew, false); 1213235267Sgabor indx += num; 1214235267Sgabor } 1215235267Sgabor fl->tmp = false; /* already taken care of */ 1216235267Sgabor file_list_clean(fl); 1217235267Sgabor 1218235267Sgabor fl->count = new_fl.count; 1219235267Sgabor fl->fns = new_fl.fns; 1220235267Sgabor fl->sz = new_fl.sz; 1221235267Sgabor fl->tmp = new_fl.tmp; 1222235267Sgabor 1223235267Sgabor return (1); 1224235267Sgabor } 1225235267Sgabor} 1226235267Sgabor 1227235267Sgabor/* 1228235267Sgabor * Merge list of files 1229235267Sgabor */ 1230235267Sgaborvoid 1231235267Sgabormerge_files(struct file_list *fl, const char *fn_out) 1232235267Sgabor{ 1233235267Sgabor 1234235267Sgabor if (fl && fn_out) { 1235235267Sgabor while (shrink_file_list(fl)); 1236235267Sgabor 1237235267Sgabor merge_files_array(fl->count, fl->fns, fn_out); 1238235267Sgabor } 1239235267Sgabor} 1240235267Sgabor 1241235267Sgaborstatic const char * 1242235267Sgaborget_sort_method_name(int sm) 1243235267Sgabor{ 1244235267Sgabor 1245235267Sgabor if (sm == SORT_MERGESORT) 1246235267Sgabor return "mergesort"; 1247235267Sgabor else if (sort_opts_vals.sort_method == SORT_RADIXSORT) 1248235267Sgabor return "radixsort"; 1249235267Sgabor else if (sort_opts_vals.sort_method == SORT_HEAPSORT) 1250235267Sgabor return "heapsort"; 1251235267Sgabor else 1252235267Sgabor return "quicksort"; 1253235267Sgabor} 1254235267Sgabor 1255235267Sgabor/* 1256235267Sgabor * Wrapper for qsort 1257235267Sgabor */ 1258235267Sgaborstatic int sort_qsort(void *list, size_t count, size_t elem_size, 1259235267Sgabor int (*cmp_func)(const void *, const void *)) 1260235267Sgabor{ 1261235267Sgabor 1262235267Sgabor qsort(list, count, elem_size, cmp_func); 1263235267Sgabor return (0); 1264235267Sgabor} 1265235267Sgabor 1266235267Sgabor/* 1267235267Sgabor * Sort list of lines and writes it to the file 1268235267Sgabor */ 1269235267Sgaborvoid 1270235267Sgaborsort_list_to_file(struct sort_list *list, const char *outfile) 1271235267Sgabor{ 1272235267Sgabor struct sort_mods *sm = &(keys[0].sm); 1273235267Sgabor 1274235267Sgabor if (!(sm->Mflag) && !(sm->Rflag) && !(sm->Vflag) && !(sm->Vflag) && 1275235267Sgabor !(sm->gflag) && !(sm->hflag) && !(sm->nflag)) { 1276235267Sgabor if ((sort_opts_vals.sort_method == SORT_DEFAULT) && byte_sort) 1277235267Sgabor sort_opts_vals.sort_method = SORT_RADIXSORT; 1278235267Sgabor 1279235267Sgabor } else if (sort_opts_vals.sort_method == SORT_RADIXSORT) 1280235432Sgabor err(2, "%s", getstr(9)); 1281235267Sgabor 1282235267Sgabor /* 1283235267Sgabor * to handle stable sort and the unique cases in the 1284235267Sgabor * right order, we need stable basic algorithm 1285235267Sgabor */ 1286235267Sgabor if (sort_opts_vals.sflag) { 1287235267Sgabor switch (sort_opts_vals.sort_method){ 1288235267Sgabor case SORT_MERGESORT: 1289235267Sgabor break; 1290235267Sgabor case SORT_RADIXSORT: 1291235267Sgabor break; 1292235267Sgabor case SORT_DEFAULT: 1293235267Sgabor sort_opts_vals.sort_method = SORT_MERGESORT; 1294235267Sgabor break; 1295235267Sgabor default: 1296235432Sgabor errx(2, "%s", getstr(10)); 1297235267Sgabor }; 1298235267Sgabor } 1299235267Sgabor 1300235267Sgabor if (sort_opts_vals.sort_method == SORT_DEFAULT) 1301238108Sgabor sort_opts_vals.sort_method = DEFAULT_SORT_ALGORITHM; 1302235267Sgabor 1303235267Sgabor if (debug_sort) 1304235267Sgabor printf("sort_method=%s\n", 1305235267Sgabor get_sort_method_name(sort_opts_vals.sort_method)); 1306235267Sgabor 1307235267Sgabor switch (sort_opts_vals.sort_method){ 1308235267Sgabor case SORT_RADIXSORT: 1309235267Sgabor rxsort(list->list, list->count); 1310235267Sgabor sort_list_dump(list, outfile); 1311235267Sgabor break; 1312235267Sgabor case SORT_MERGESORT: 1313235267Sgabor mt_sort(list, mergesort, outfile); 1314235267Sgabor break; 1315235267Sgabor case SORT_HEAPSORT: 1316235267Sgabor mt_sort(list, heapsort, outfile); 1317235267Sgabor break; 1318238108Sgabor case SORT_QSORT: 1319235267Sgabor mt_sort(list, sort_qsort, outfile); 1320235267Sgabor break; 1321238108Sgabor default: 1322238108Sgabor mt_sort(list, DEFAULT_SORT_FUNC, outfile); 1323238108Sgabor break; 1324235267Sgabor } 1325235267Sgabor} 1326235267Sgabor 1327235267Sgabor/******************* MT SORT ************************/ 1328235267Sgabor 1329235267Sgabor#if defined(SORT_THREADS) 1330235267Sgabor/* semaphore to count threads */ 1331235267Sgaborstatic sem_t mtsem; 1332235267Sgabor 1333235267Sgabor/* current system sort function */ 1334235267Sgaborstatic int (*g_sort_func)(void *, size_t, size_t, 1335235267Sgabor int(*)(const void *, const void *)); 1336235267Sgabor 1337235267Sgabor/* 1338235267Sgabor * Sort cycle thread (in multi-threaded mode) 1339235267Sgabor */ 1340235267Sgaborstatic void* 1341235267Sgabormt_sort_thread(void* arg) 1342235267Sgabor{ 1343235267Sgabor struct sort_list *list = arg; 1344235267Sgabor 1345235267Sgabor g_sort_func(list->list, list->count, sizeof(struct sort_list_item *), 1346235267Sgabor (int(*)(const void *, const void *)) list_coll); 1347235267Sgabor 1348235267Sgabor sem_post(&mtsem); 1349235267Sgabor 1350235267Sgabor return (arg); 1351235267Sgabor} 1352235267Sgabor 1353235267Sgabor/* 1354235267Sgabor * Compare sub-lists. Empty sub-lists always go to the end of the list. 1355235267Sgabor */ 1356235267Sgaborstatic int 1357235267Sgaborsub_list_cmp(struct sort_list *l1, struct sort_list *l2) 1358235267Sgabor{ 1359235267Sgabor 1360235267Sgabor if (l1 == l2) 1361235267Sgabor return (0); 1362235267Sgabor else { 1363235267Sgabor if (l1->count == 0) { 1364235267Sgabor return ((l2->count == 0) ? 0 : +1); 1365235267Sgabor } else if (l2->count == 0) { 1366235267Sgabor return (-1); 1367235267Sgabor } else { 1368235267Sgabor int ret; 1369235267Sgabor 1370235267Sgabor ret = list_coll(&(l1->list[0]), &(l2->list[0])); 1371235267Sgabor if (!ret) 1372235267Sgabor return ((l1->sub_list_pos < l2->sub_list_pos) ? 1373235267Sgabor -1 : +1); 1374235267Sgabor return (ret); 1375235267Sgabor } 1376235267Sgabor } 1377235267Sgabor} 1378235267Sgabor 1379235267Sgabor/* 1380235267Sgabor * Swap two array elements 1381235267Sgabor */ 1382235267Sgaborstatic void 1383242430Sgaborsub_list_swap(struct sort_list **sl, size_t i1, size_t i2) 1384235267Sgabor{ 1385235267Sgabor struct sort_list *tmp; 1386235267Sgabor 1387235267Sgabor tmp = sl[i1]; 1388235267Sgabor sl[i1] = sl[i2]; 1389235267Sgabor sl[i2] = tmp; 1390235267Sgabor} 1391235267Sgabor 1392235267Sgabor/* heap algorithm ==>> */ 1393235267Sgabor 1394235267Sgabor/* 1395235267Sgabor * See heap sort algorithm 1396235267Sgabor * "Raises" last element to its right place 1397235267Sgabor */ 1398235267Sgaborstatic void 1399242430Sgaborsub_list_swim(struct sort_list **sl, size_t indx) 1400235267Sgabor{ 1401235267Sgabor 1402235267Sgabor if (indx > 0) { 1403242430Sgabor size_t parent_index; 1404235267Sgabor 1405235267Sgabor parent_index = (indx - 1) >> 1; 1406235267Sgabor 1407235267Sgabor if (sub_list_cmp(sl[indx], sl[parent_index]) < 0) { 1408235267Sgabor /* swap child and parent and continue */ 1409235267Sgabor sub_list_swap(sl, indx, parent_index); 1410235267Sgabor sub_list_swim(sl, parent_index); 1411235267Sgabor } 1412235267Sgabor } 1413235267Sgabor} 1414235267Sgabor 1415235267Sgabor/* 1416235267Sgabor * Sink the top element to its correct position 1417235267Sgabor */ 1418235267Sgaborstatic void 1419242430Sgaborsub_list_sink(struct sort_list **sl, size_t indx, size_t size) 1420235267Sgabor{ 1421242430Sgabor size_t left_child_index; 1422242430Sgabor size_t right_child_index; 1423235267Sgabor 1424235267Sgabor left_child_index = indx + indx + 1; 1425235267Sgabor right_child_index = left_child_index + 1; 1426235267Sgabor 1427235267Sgabor if (left_child_index < size) { 1428242430Sgabor size_t min_child_index; 1429235267Sgabor 1430235267Sgabor min_child_index = left_child_index; 1431235267Sgabor 1432235267Sgabor if ((right_child_index < size) && 1433235267Sgabor (sub_list_cmp(sl[left_child_index], 1434235267Sgabor sl[right_child_index]) > 0)) 1435235267Sgabor min_child_index = right_child_index; 1436235267Sgabor if (sub_list_cmp(sl[indx], sl[min_child_index]) > 0) { 1437235267Sgabor sub_list_swap(sl, indx, min_child_index); 1438235267Sgabor sub_list_sink(sl, min_child_index, size); 1439235267Sgabor } 1440235267Sgabor } 1441235267Sgabor} 1442235267Sgabor 1443235267Sgabor/* <<== heap algorithm */ 1444235267Sgabor 1445235267Sgabor/* 1446235267Sgabor * Adds element to the "right" end 1447235267Sgabor */ 1448235267Sgaborstatic void 1449242430Sgaborsub_list_push(struct sort_list *s, struct sort_list **sl, size_t size) 1450235267Sgabor{ 1451235267Sgabor 1452235267Sgabor sl[size++] = s; 1453235267Sgabor sub_list_swim(sl, size - 1); 1454235267Sgabor} 1455235267Sgabor 1456235267Sgaborstruct last_printed_item 1457235267Sgabor{ 1458235267Sgabor struct sort_list_item *item; 1459235267Sgabor}; 1460235267Sgabor 1461235267Sgabor/* 1462235267Sgabor * Prints the current line of the file 1463235267Sgabor */ 1464235267Sgaborstatic void 1465235267Sgaborsub_list_header_print(struct sort_list *sl, FILE *f_out, 1466235267Sgabor struct last_printed_item *lp) 1467235267Sgabor{ 1468235267Sgabor 1469235267Sgabor if (sl && sl->count && f_out && sl->list[0]->str) { 1470235267Sgabor if (sort_opts_vals.uflag) { 1471235267Sgabor if ((lp->item == NULL) || (list_coll(&(lp->item), 1472235267Sgabor &(sl->list[0])))) { 1473235267Sgabor bwsfwrite(sl->list[0]->str, f_out, 1474235267Sgabor sort_opts_vals.zflag); 1475235267Sgabor lp->item = sl->list[0]; 1476235267Sgabor } 1477235267Sgabor } else 1478235267Sgabor bwsfwrite(sl->list[0]->str, f_out, 1479235267Sgabor sort_opts_vals.zflag); 1480235267Sgabor } 1481235267Sgabor} 1482235267Sgabor 1483235267Sgabor/* 1484235267Sgabor * Read next line 1485235267Sgabor */ 1486235267Sgaborstatic void 1487235267Sgaborsub_list_next(struct sort_list *sl) 1488235267Sgabor{ 1489235267Sgabor 1490235267Sgabor if (sl && sl->count) { 1491235267Sgabor sl->list += 1; 1492235267Sgabor sl->count -= 1; 1493235267Sgabor } 1494235267Sgabor} 1495235267Sgabor 1496235267Sgabor/* 1497235267Sgabor * Merge sub-lists to a file 1498235267Sgabor */ 1499235267Sgaborstatic void 1500235267Sgabormerge_sub_lists(struct sort_list **sl, size_t n, FILE* f_out) 1501235267Sgabor{ 1502235267Sgabor struct last_printed_item lp; 1503235267Sgabor size_t i; 1504235267Sgabor 1505235267Sgabor memset(&lp,0,sizeof(lp)); 1506235267Sgabor 1507235267Sgabor /* construct the initial list: */ 1508235267Sgabor for (i = 0; i < n; i++) 1509235267Sgabor sub_list_push(sl[i], sl, i); 1510235267Sgabor 1511235267Sgabor while (sl[0]->count) { /* unfinished lists are always in front */ 1512235267Sgabor /* output the smallest line: */ 1513235267Sgabor sub_list_header_print(sl[0], f_out, &lp); 1514235267Sgabor /* move to a new line, if possible: */ 1515235267Sgabor sub_list_next(sl[0]); 1516235267Sgabor /* re-arrange the list: */ 1517235267Sgabor sub_list_sink(sl, 0, n); 1518235267Sgabor } 1519235267Sgabor} 1520235267Sgabor 1521235267Sgabor/* 1522235267Sgabor * Merge sub-lists to a file 1523235267Sgabor */ 1524235267Sgaborstatic void 1525235267Sgabormerge_list_parts(struct sort_list **parts, size_t n, const char *fn) 1526235267Sgabor{ 1527235267Sgabor FILE* f_out; 1528235267Sgabor 1529235267Sgabor f_out = openfile(fn,"w"); 1530235267Sgabor 1531235267Sgabor merge_sub_lists(parts, n, f_out); 1532235267Sgabor 1533235267Sgabor closefile(f_out, fn); 1534235267Sgabor} 1535235267Sgabor 1536235267Sgabor#endif /* defined(SORT_THREADS) */ 1537235267Sgabor/* 1538235267Sgabor * Multi-threaded sort algorithm "driver" 1539235267Sgabor */ 1540235267Sgaborstatic void 1541235267Sgabormt_sort(struct sort_list *list, 1542235267Sgabor int(*sort_func)(void *, size_t, size_t, int(*)(const void *, const void *)), 1543235267Sgabor const char* fn) 1544235267Sgabor{ 1545235267Sgabor#if defined(SORT_THREADS) 1546235987Sgabor if (nthreads < 2 || list->count < MT_SORT_THRESHOLD) { 1547235987Sgabor size_t nthreads_save = nthreads; 1548235987Sgabor nthreads = 1; 1549235267Sgabor#endif 1550235267Sgabor /* if single thread or small data, do simple sort */ 1551235267Sgabor sort_func(list->list, list->count, 1552235267Sgabor sizeof(struct sort_list_item *), 1553235267Sgabor (int(*)(const void *, const void *)) list_coll); 1554235267Sgabor sort_list_dump(list, fn); 1555235267Sgabor#if defined(SORT_THREADS) 1556235987Sgabor nthreads = nthreads_save; 1557235267Sgabor } else { 1558235267Sgabor /* multi-threaded sort */ 1559235267Sgabor struct sort_list **parts; 1560235267Sgabor size_t avgsize, cstart, i; 1561235267Sgabor 1562235267Sgabor /* array of sub-lists */ 1563235267Sgabor parts = sort_malloc(sizeof(struct sort_list*) * nthreads); 1564235267Sgabor cstart = 0; 1565235267Sgabor avgsize = list->count / nthreads; 1566235267Sgabor 1567235267Sgabor /* set global system sort function */ 1568235267Sgabor g_sort_func = sort_func; 1569235267Sgabor 1570235267Sgabor /* set sublists */ 1571235267Sgabor for (i = 0; i < nthreads; ++i) { 1572235267Sgabor size_t sz = 0; 1573235267Sgabor 1574235267Sgabor parts[i] = sort_malloc(sizeof(struct sort_list)); 1575235267Sgabor parts[i]->list = list->list + cstart; 1576235267Sgabor parts[i]->memsize = 0; 1577235267Sgabor parts[i]->sub_list_pos = i; 1578235267Sgabor 1579235267Sgabor sz = (i == nthreads - 1) ? list->count - cstart : 1580235267Sgabor avgsize; 1581235267Sgabor 1582235267Sgabor parts[i]->count = sz; 1583235267Sgabor 1584235267Sgabor parts[i]->size = parts[i]->count; 1585235267Sgabor 1586235267Sgabor cstart += sz; 1587235267Sgabor } 1588235267Sgabor 1589235267Sgabor /* init threads counting semaphore */ 1590235267Sgabor sem_init(&mtsem, 0, 0); 1591235267Sgabor 1592235267Sgabor /* start threads */ 1593235267Sgabor for (i = 0; i < nthreads; ++i) { 1594235267Sgabor pthread_t pth; 1595235267Sgabor pthread_attr_t attr; 1596235267Sgabor 1597235267Sgabor pthread_attr_init(&attr); 1598235267Sgabor pthread_attr_setdetachstate(&attr, PTHREAD_DETACHED); 1599235267Sgabor 1600235987Sgabor for (;;) { 1601235987Sgabor int res = pthread_create(&pth, &attr, 1602235987Sgabor mt_sort_thread, parts[i]); 1603235267Sgabor 1604235987Sgabor if (res >= 0) 1605235987Sgabor break; 1606235987Sgabor if (errno == EAGAIN) { 1607235987Sgabor pthread_yield(); 1608235987Sgabor continue; 1609235987Sgabor } 1610235987Sgabor err(2, NULL); 1611235987Sgabor } 1612235987Sgabor 1613235267Sgabor pthread_attr_destroy(&attr); 1614235267Sgabor } 1615235267Sgabor 1616235267Sgabor /* wait for threads completion */ 1617235267Sgabor for (i = 0; i < nthreads; ++i) { 1618235267Sgabor sem_wait(&mtsem); 1619235267Sgabor } 1620235267Sgabor /* destroy the semaphore - we do not need it anymore */ 1621235267Sgabor sem_destroy(&mtsem); 1622235267Sgabor 1623235267Sgabor /* merge sorted sub-lists to the file */ 1624235267Sgabor merge_list_parts(parts, nthreads, fn); 1625235267Sgabor 1626235267Sgabor /* free sub-lists data */ 1627235267Sgabor for (i = 0; i < nthreads; ++i) { 1628235267Sgabor sort_free(parts[i]); 1629235267Sgabor } 1630235267Sgabor sort_free(parts); 1631235267Sgabor } 1632235267Sgabor#endif /* defined(SORT_THREADS) */ 1633235267Sgabor} 1634