1/*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/types.h> 31 32#include <errno.h> 33#include <err.h> 34#include <langinfo.h> 35#include <limits.h> 36#include <math.h> 37#include <md5.h> 38#include <stdlib.h> 39#include <string.h> 40#include <wchar.h> 41#include <wctype.h> 42 43#include "coll.h" 44#include "vsort.h" 45 46struct key_specs *keys; 47size_t keys_num = 0; 48 49wint_t symbol_decimal_point = L'.'; 50/* there is no default thousands separator in collate rules: */ 51wint_t symbol_thousands_sep = 0; 52wint_t symbol_negative_sign = L'-'; 53wint_t symbol_positive_sign = L'+'; 54 55static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset); 56static int gnumcoll(struct key_value*, struct key_value *, size_t offset); 57static int monthcoll(struct key_value*, struct key_value *, size_t offset); 58static int numcoll(struct key_value*, struct key_value *, size_t offset); 59static int hnumcoll(struct key_value*, struct key_value *, size_t offset); 60static int randomcoll(struct key_value*, struct key_value *, size_t offset); 61static int versioncoll(struct key_value*, struct key_value *, size_t offset); 62 63/* 64 * Allocate keys array 65 */ 66struct keys_array * 67keys_array_alloc(void) 68{ 69 struct keys_array *ka; 70 size_t sz; 71 72 sz = keys_array_size(); 73 ka = sort_calloc(1, sz); 74 75 return (ka); 76} 77 78/* 79 * Calculate whether we need key hint space 80 */ 81static size_t 82key_hint_size(void) 83{ 84 85 return (need_hint ? sizeof(struct key_hint) : 0); 86} 87 88/* 89 * Calculate keys array size 90 */ 91size_t 92keys_array_size(void) 93{ 94 95 return (keys_num * (sizeof(struct key_value) + key_hint_size())); 96} 97 98/* 99 * Clean data of keys array 100 */ 101void 102clean_keys_array(const struct bwstring *s, struct keys_array *ka) 103{ 104 105 if (ka) { 106 for (size_t i = 0; i < keys_num; ++i) { 107 const struct key_value *kv; 108 109 kv = get_key_from_keys_array(ka, i); 110 if (kv->k && kv->k != s) 111 bwsfree(kv->k); 112 } 113 memset(ka, 0, keys_array_size()); 114 } 115} 116 117/* 118 * Get pointer to a key value in the keys set 119 */ 120struct key_value * 121get_key_from_keys_array(struct keys_array *ka, size_t ind) 122{ 123 124 return ((struct key_value *)((caddr_t)ka->key + 125 ind * (sizeof(struct key_value) + key_hint_size()))); 126} 127 128/* 129 * Set value of a key in the keys set 130 */ 131void 132set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind) 133{ 134 135 if (ka && keys_num > ind) { 136 struct key_value *kv; 137 138 kv = get_key_from_keys_array(ka, ind); 139 140 if (kv->k && kv->k != s) 141 bwsfree(kv->k); 142 kv->k = s; 143 } 144} 145 146/* 147 * Initialize a sort list item 148 */ 149struct sort_list_item * 150sort_list_item_alloc(void) 151{ 152 struct sort_list_item *si; 153 size_t sz; 154 155 sz = sizeof(struct sort_list_item) + keys_array_size(); 156 si = sort_calloc(1, sz); 157 158 return (si); 159} 160 161size_t 162sort_list_item_size(struct sort_list_item *si) 163{ 164 size_t ret = 0; 165 166 if (si) { 167 ret = sizeof(struct sort_list_item) + keys_array_size(); 168 if (si->str) 169 ret += bws_memsize(si->str); 170 for (size_t i = 0; i < keys_num; ++i) { 171 const struct key_value *kv; 172 173 kv = get_key_from_keys_array(&si->ka, i); 174 175 if (kv->k != si->str) 176 ret += bws_memsize(kv->k); 177 } 178 } 179 return (ret); 180} 181 182/* 183 * Calculate key for a sort list item 184 */ 185static void 186sort_list_item_make_key(struct sort_list_item *si) 187{ 188 189 preproc(si->str, &(si->ka)); 190} 191 192/* 193 * Set value of a sort list item. 194 * Return combined string and keys memory size. 195 */ 196void 197sort_list_item_set(struct sort_list_item *si, struct bwstring *str) 198{ 199 200 if (si) { 201 clean_keys_array(si->str, &(si->ka)); 202 if (si->str) { 203 if (si->str == str) { 204 /* we are trying to reset the same string */ 205 return; 206 } else { 207 bwsfree(si->str); 208 si->str = NULL; 209 } 210 } 211 si->str = str; 212 sort_list_item_make_key(si); 213 } 214} 215 216/* 217 * De-allocate a sort list item object memory 218 */ 219void 220sort_list_item_clean(struct sort_list_item *si) 221{ 222 223 if (si) { 224 clean_keys_array(si->str, &(si->ka)); 225 if (si->str) { 226 bwsfree(si->str); 227 si->str = NULL; 228 } 229 } 230} 231 232/* 233 * Skip columns according to specs 234 */ 235static size_t 236skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start, 237 bool skip_blanks, bool *empty_key) 238{ 239 if (cols < 1) 240 return (BWSLEN(s) + 1); 241 242 if (skip_blanks) 243 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start))) 244 ++start; 245 246 while (start < BWSLEN(s) && cols > 1) { 247 --cols; 248 ++start; 249 } 250 251 if (start >= BWSLEN(s)) 252 *empty_key = true; 253 254 return (start); 255} 256 257/* 258 * Skip fields according to specs 259 */ 260static size_t 261skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field) 262{ 263 264 if (fields < 2) { 265 if (BWSLEN(s) == 0) 266 *empty_field = true; 267 return (0); 268 } else if (!(sort_opts_vals.tflag)) { 269 size_t cpos = 0; 270 bool pb = true; 271 272 while (cpos < BWSLEN(s)) { 273 bool isblank; 274 275 isblank = iswblank(BWS_GET(s, cpos)); 276 277 if (isblank && !pb) { 278 --fields; 279 if (fields <= 1) 280 return (cpos); 281 } 282 pb = isblank; 283 ++cpos; 284 } 285 if (fields > 1) 286 *empty_field = true; 287 return (cpos); 288 } else { 289 size_t cpos = 0; 290 291 while (cpos < BWSLEN(s)) { 292 if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) { 293 --fields; 294 if (fields <= 1) 295 return (cpos + 1); 296 } 297 ++cpos; 298 } 299 if (fields > 1) 300 *empty_field = true; 301 return (cpos); 302 } 303} 304 305/* 306 * Find fields start 307 */ 308static void 309find_field_start(const struct bwstring *s, struct key_specs *ks, 310 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key) 311{ 312 313 *field_start = skip_fields_to_start(s, ks->f1, empty_field); 314 if (!*empty_field) 315 *key_start = skip_cols_to_start(s, ks->c1, *field_start, 316 ks->pos1b, empty_key); 317 else 318 *empty_key = true; 319} 320 321/* 322 * Find end key position 323 */ 324static size_t 325find_field_end(const struct bwstring *s, struct key_specs *ks) 326{ 327 size_t f2, next_field_start, pos_end; 328 bool empty_field, empty_key; 329 330 empty_field = false; 331 empty_key = false; 332 f2 = ks->f2; 333 334 if (f2 == 0) 335 return (BWSLEN(s) + 1); 336 else { 337 if (ks->c2 == 0) { 338 next_field_start = skip_fields_to_start(s, f2 + 1, 339 &empty_field); 340 if ((next_field_start > 0) && sort_opts_vals.tflag && 341 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s, 342 next_field_start - 1))) 343 --next_field_start; 344 } else 345 next_field_start = skip_fields_to_start(s, f2, 346 &empty_field); 347 } 348 349 if (empty_field || (next_field_start >= BWSLEN(s))) 350 return (BWSLEN(s) + 1); 351 352 if (ks->c2) { 353 pos_end = skip_cols_to_start(s, ks->c2, next_field_start, 354 ks->pos2b, &empty_key); 355 if (pos_end < BWSLEN(s)) 356 ++pos_end; 357 } else 358 pos_end = next_field_start; 359 360 return (pos_end); 361} 362 363/* 364 * Cut a field according to the key specs 365 */ 366static struct bwstring * 367cut_field(const struct bwstring *s, struct key_specs *ks) 368{ 369 struct bwstring *ret = NULL; 370 371 if (s && ks) { 372 size_t field_start, key_end, key_start, sz; 373 bool empty_field, empty_key; 374 375 field_start = 0; 376 key_start = 0; 377 empty_field = false; 378 empty_key = false; 379 380 find_field_start(s, ks, &field_start, &key_start, 381 &empty_field, &empty_key); 382 383 if (empty_key) 384 sz = 0; 385 else { 386 key_end = find_field_end(s, ks); 387 sz = (key_end < key_start) ? 0 : (key_end - key_start); 388 } 389 390 ret = bwsalloc(sz); 391 if (sz) 392 bwsnocpy(ret, s, key_start, sz); 393 } else 394 ret = bwsalloc(0); 395 396 return (ret); 397} 398 399/* 400 * Preprocesses a line applying the necessary transformations 401 * specified by command line options and returns the preprocessed 402 * string, which can be used to compare. 403 */ 404int 405preproc(struct bwstring *s, struct keys_array *ka) 406{ 407 408 if (sort_opts_vals.kflag) 409 for (size_t i = 0; i < keys_num; i++) { 410 struct bwstring *key; 411 struct key_specs *kspecs; 412 struct sort_mods *sm; 413 414 kspecs = &(keys[i]); 415 key = cut_field(s, kspecs); 416 417 sm = &(kspecs->sm); 418 if (sm->dflag) 419 key = dictionary_order(key); 420 else if (sm->iflag) 421 key = ignore_nonprinting(key); 422 if (sm->fflag || sm->Mflag) 423 key = ignore_case(key); 424 425 set_key_on_keys_array(ka, key, i); 426 } 427 else { 428 struct bwstring *ret = NULL; 429 struct sort_mods *sm = default_sort_mods; 430 431 if (sm->bflag) { 432 if (ret == NULL) 433 ret = bwsdup(s); 434 ret = ignore_leading_blanks(ret); 435 } 436 if (sm->dflag) { 437 if (ret == NULL) 438 ret = bwsdup(s); 439 ret = dictionary_order(ret); 440 } else if (sm->iflag) { 441 if (ret == NULL) 442 ret = bwsdup(s); 443 ret = ignore_nonprinting(ret); 444 } 445 if (sm->fflag || sm->Mflag) { 446 if (ret == NULL) 447 ret = bwsdup(s); 448 ret = ignore_case(ret); 449 } 450 if (ret == NULL) 451 set_key_on_keys_array(ka, s, 0); 452 else 453 set_key_on_keys_array(ka, ret, 0); 454 } 455 456 return 0; 457} 458 459cmpcoll_t 460get_sort_func(struct sort_mods *sm) 461{ 462 463 if (sm->nflag) 464 return (numcoll); 465 else if (sm->hflag) 466 return (hnumcoll); 467 else if (sm->gflag) 468 return (gnumcoll); 469 else if (sm->Mflag) 470 return (monthcoll); 471 else if (sm->Rflag) 472 return (randomcoll); 473 else if (sm->Vflag) 474 return (versioncoll); 475 else 476 return (wstrcoll); 477} 478 479/* 480 * Compares the given strings. Returns a positive number if 481 * the first precedes the second, a negative number if the second is 482 * the preceding one, and zero if they are equal. This function calls 483 * the underlying collate functions, which done the actual comparison. 484 */ 485int 486key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset) 487{ 488 struct key_value *kv1, *kv2; 489 struct sort_mods *sm; 490 int res = 0; 491 492 for (size_t i = 0; i < keys_num; ++i) { 493 kv1 = get_key_from_keys_array(ps1, i); 494 kv2 = get_key_from_keys_array(ps2, i); 495 sm = &(keys[i].sm); 496 497 if (sm->rflag) 498 res = sm->func(kv2, kv1, offset); 499 else 500 res = sm->func(kv1, kv2, offset); 501 502 if (res) 503 break; 504 505 /* offset applies to only the first key */ 506 offset = 0; 507 } 508 return (res); 509} 510 511/* 512 * Compare two strings. 513 * Plain symbol-by-symbol comparison. 514 */ 515int 516top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2) 517{ 518 519 if (default_sort_mods->rflag) { 520 const struct bwstring *tmp; 521 522 tmp = s1; 523 s1 = s2; 524 s2 = tmp; 525 } 526 527 return (bwscoll(s1, s2, 0)); 528} 529 530/* 531 * Compare a string and a sort list item, according to the sort specs. 532 */ 533int 534str_list_coll(struct bwstring *str1, struct sort_list_item **ss2) 535{ 536 struct keys_array *ka1; 537 int ret = 0; 538 539 ka1 = keys_array_alloc(); 540 541 preproc(str1, ka1); 542 543 sort_list_item_make_key(*ss2); 544 545 if (debug_sort) { 546 bwsprintf(stdout, str1, "; s1=<", ">"); 547 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">"); 548 } 549 550 ret = key_coll(ka1, &((*ss2)->ka), 0); 551 552 if (debug_sort) 553 printf("; cmp1=%d", ret); 554 555 clean_keys_array(str1, ka1); 556 sort_free(ka1); 557 558 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 559 ret = top_level_str_coll(str1, ((*ss2)->str)); 560 if (debug_sort) 561 printf("; cmp2=%d", ret); 562 } 563 564 if (debug_sort) 565 printf("\n"); 566 567 return (ret); 568} 569 570/* 571 * Compare two sort list items, according to the sort specs. 572 */ 573int 574list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, 575 size_t offset) 576{ 577 int ret; 578 579 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset); 580 581 if (debug_sort) { 582 if (offset) 583 printf("; offset=%d", (int) offset); 584 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">"); 585 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">"); 586 printf("; cmp1=%d\n", ret); 587 } 588 589 if (ret) 590 return (ret); 591 592 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 593 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str)); 594 if (debug_sort) 595 printf("; cmp2=%d\n", ret); 596 } 597 598 return (ret); 599} 600 601/* 602 * Compare two sort list items, according to the sort specs. 603 */ 604int 605list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2) 606{ 607 608 return (list_coll_offset(ss1, ss2, 0)); 609} 610 611#define LSCDEF(N) \ 612static int \ 613list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \ 614{ \ 615 \ 616 return (list_coll_offset(ss1, ss2, N)); \ 617} 618 619LSCDEF(1) 620LSCDEF(2) 621LSCDEF(3) 622LSCDEF(4) 623LSCDEF(5) 624LSCDEF(6) 625LSCDEF(7) 626LSCDEF(8) 627LSCDEF(9) 628LSCDEF(10) 629LSCDEF(11) 630LSCDEF(12) 631LSCDEF(13) 632LSCDEF(14) 633LSCDEF(15) 634LSCDEF(16) 635LSCDEF(17) 636LSCDEF(18) 637LSCDEF(19) 638LSCDEF(20) 639 640listcoll_t 641get_list_call_func(size_t offset) 642{ 643 static const listcoll_t lsarray[] = { list_coll, list_coll_1, 644 list_coll_2, list_coll_3, list_coll_4, list_coll_5, 645 list_coll_6, list_coll_7, list_coll_8, list_coll_9, 646 list_coll_10, list_coll_11, list_coll_12, list_coll_13, 647 list_coll_14, list_coll_15, list_coll_16, list_coll_17, 648 list_coll_18, list_coll_19, list_coll_20 }; 649 650 if (offset <= 20) 651 return (lsarray[offset]); 652 653 return (list_coll); 654} 655 656/* 657 * Compare two sort list items, only by their original string. 658 */ 659int 660list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2) 661{ 662 663 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str))); 664} 665 666/* 667 * Maximum size of a number in the string (before or after decimal point) 668 */ 669#define MAX_NUM_SIZE (128) 670 671/* 672 * Set suffix value 673 */ 674static void setsuffix(wchar_t c, unsigned char *si) 675{ 676 switch (c){ 677 case L'k': 678 case L'K': 679 *si = 1; 680 break; 681 case L'M': 682 *si = 2; 683 break; 684 case L'G': 685 *si = 3; 686 break; 687 case L'T': 688 *si = 4; 689 break; 690 case L'P': 691 *si = 5; 692 break; 693 case L'E': 694 *si = 6; 695 break; 696 case L'Z': 697 *si = 7; 698 break; 699 case L'Y': 700 *si = 8; 701 break; 702 default: 703 *si = 0; 704 } 705} 706 707/* 708 * Read string s and parse the string into a fixed-decimal-point number. 709 * sign equals -1 if the number is negative (explicit plus is not allowed, 710 * according to GNU sort's "info sort". 711 * The number part before decimal point is in the smain, after the decimal 712 * point is in sfrac, tail is the pointer to the remainder of the string. 713 */ 714static int 715read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si) 716{ 717 bwstring_iterator s; 718 719 s = bws_begin(s0); 720 721 /* always end the fraction with zero, even if we have no fraction */ 722 sfrac[0] = 0; 723 724 while (iswblank(bws_get_iter_value(s))) 725 s = bws_iterator_inc(s, 1); 726 727 if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) { 728 *sign = -1; 729 s = bws_iterator_inc(s, 1); 730 } 731 732 // This is '0', not '\0', do not change this 733 while (iswdigit(bws_get_iter_value(s)) && 734 (bws_get_iter_value(s) == L'0')) 735 s = bws_iterator_inc(s, 1); 736 737 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) { 738 if (iswdigit(bws_get_iter_value(s))) { 739 smain[*main_len] = bws_get_iter_value(s); 740 s = bws_iterator_inc(s, 1); 741 *main_len += 1; 742 } else if (symbol_thousands_sep && 743 (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep)) 744 s = bws_iterator_inc(s, 1); 745 else 746 break; 747 } 748 749 smain[*main_len] = 0; 750 751 if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) { 752 s = bws_iterator_inc(s, 1); 753 while (iswdigit(bws_get_iter_value(s)) && 754 *frac_len < MAX_NUM_SIZE) { 755 sfrac[*frac_len] = bws_get_iter_value(s); 756 s = bws_iterator_inc(s, 1); 757 *frac_len += 1; 758 } 759 sfrac[*frac_len] = 0; 760 761 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') { 762 --(*frac_len); 763 sfrac[*frac_len] = L'\0'; 764 } 765 } 766 767 setsuffix(bws_get_iter_value(s),si); 768 769 if ((*main_len + *frac_len) == 0) 770 *sign = 0; 771 772 return (0); 773} 774 775/* 776 * Implements string sort. 777 */ 778static int 779wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 780{ 781 782 if (debug_sort) { 783 if (offset) 784 printf("; offset=%d\n", (int) offset); 785 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 786 printf("(%zu)", BWSLEN(kv1->k)); 787 bwsprintf(stdout, kv2->k, ", k2=<", ">"); 788 printf("(%zu)", BWSLEN(kv2->k)); 789 } 790 791 return (bwscoll(kv1->k, kv2->k, offset)); 792} 793 794/* 795 * Compare two suffixes 796 */ 797static inline int 798cmpsuffix(unsigned char si1, unsigned char si2) 799{ 800 801 return ((char)si1 - (char)si2); 802} 803 804/* 805 * Implements numeric sort for -n and -h. 806 */ 807static int 808numcoll_impl(struct key_value *kv1, struct key_value *kv2, 809 size_t offset __unused, bool use_suffix) 810{ 811 struct bwstring *s1, *s2; 812 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1]; 813 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1]; 814 int cmp_res, sign1, sign2; 815 size_t frac1, frac2, main1, main2; 816 unsigned char SI1, SI2; 817 bool e1, e2, key1_read, key2_read; 818 819 s1 = kv1->k; 820 s2 = kv2->k; 821 sign1 = sign2 = 0; 822 main1 = main2 = 0; 823 frac1 = frac2 = 0; 824 825 key1_read = key2_read = false; 826 827 if (debug_sort) { 828 bwsprintf(stdout, s1, "; k1=<", ">"); 829 bwsprintf(stdout, s2, ", k2=<", ">"); 830 } 831 832 if (s1 == s2) 833 return (0); 834 835 if (kv1->hint->status == HS_UNINITIALIZED) { 836 /* read the number from the string */ 837 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 838 key1_read = true; 839 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10); 840 if(main1 < 1 && frac1 < 1) 841 kv1->hint->v.nh.empty=true; 842 kv1->hint->v.nh.si = SI1; 843 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ? 844 HS_INITIALIZED : HS_ERROR; 845 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false; 846 } 847 848 if (kv2->hint->status == HS_UNINITIALIZED) { 849 /* read the number from the string */ 850 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2); 851 key2_read = true; 852 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10); 853 if(main2 < 1 && frac2 < 1) 854 kv2->hint->v.nh.empty=true; 855 kv2->hint->v.nh.si = SI2; 856 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ? 857 HS_INITIALIZED : HS_ERROR; 858 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false; 859 } 860 861 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status == 862 HS_INITIALIZED) { 863 unsigned long long n1, n2; 864 bool neg1, neg2; 865 866 e1 = kv1->hint->v.nh.empty; 867 e2 = kv2->hint->v.nh.empty; 868 869 if (e1 && e2) 870 return (0); 871 872 neg1 = kv1->hint->v.nh.neg; 873 neg2 = kv2->hint->v.nh.neg; 874 875 if (neg1 && !neg2) 876 return (-1); 877 if (neg2 && !neg1) 878 return (+1); 879 880 if (e1) 881 return (neg2 ? +1 : -1); 882 else if (e2) 883 return (neg1 ? -1 : +1); 884 885 886 if (use_suffix) { 887 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si); 888 if (cmp_res) 889 return (neg1 ? -cmp_res : cmp_res); 890 } 891 892 n1 = kv1->hint->v.nh.n1; 893 n2 = kv2->hint->v.nh.n1; 894 if (n1 < n2) 895 return (neg1 ? +1 : -1); 896 else if (n1 > n2) 897 return (neg1 ? -1 : +1); 898 } 899 900 /* read the numbers from the strings */ 901 if (!key1_read) 902 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 903 if (!key2_read) 904 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); 905 906 e1 = ((main1 + frac1) == 0); 907 e2 = ((main2 + frac2) == 0); 908 909 if (e1 && e2) 910 return (0); 911 912 /* we know the result if the signs are different */ 913 if (sign1 < 0 && sign2 >= 0) 914 return (-1); 915 if (sign1 >= 0 && sign2 < 0) 916 return (+1); 917 918 if (e1) 919 return ((sign2 < 0) ? +1 : -1); 920 else if (e2) 921 return ((sign1 < 0) ? -1 : +1); 922 923 if (use_suffix) { 924 cmp_res = cmpsuffix(SI1, SI2); 925 if (cmp_res) 926 return ((sign1 < 0) ? -cmp_res : cmp_res); 927 } 928 929 /* if both numbers are empty assume that the strings are equal */ 930 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1) 931 return (0); 932 933 /* 934 * if the main part is of different size, we know the result 935 * (because the leading zeros are removed) 936 */ 937 if (main1 < main2) 938 cmp_res = -1; 939 else if (main1 > main2) 940 cmp_res = +1; 941 /* if the sizes are equal then simple non-collate string compare gives the correct result */ 942 else 943 cmp_res = wcscmp(smain1, smain2); 944 945 /* check fraction */ 946 if (!cmp_res) 947 cmp_res = wcscmp(sfrac1, sfrac2); 948 949 if (!cmp_res) 950 return (0); 951 952 /* reverse result if the signs are negative */ 953 if (sign1 < 0 && sign2 < 0) 954 cmp_res = -cmp_res; 955 956 return (cmp_res); 957} 958 959/* 960 * Implements numeric sort (-n). 961 */ 962static int 963numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 964{ 965 966 return (numcoll_impl(kv1, kv2, offset, false)); 967} 968 969/* 970 * Implements 'human' numeric sort (-h). 971 */ 972static int 973hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 974{ 975 976 return (numcoll_impl(kv1, kv2, offset, true)); 977} 978 979/* Use hint space to memoize md5 computations, at least. */ 980static void 981randomcoll_init_hint(struct key_value *kv, void *hash) 982{ 983 984 memcpy(kv->hint->v.Rh.cached, hash, sizeof(kv->hint->v.Rh.cached)); 985 kv->hint->status = HS_INITIALIZED; 986} 987 988/* 989 * Implements random sort (-R). 990 */ 991static int 992randomcoll(struct key_value *kv1, struct key_value *kv2, 993 size_t offset __unused) 994{ 995 struct bwstring *s1, *s2; 996 MD5_CTX ctx1, ctx2; 997 unsigned char hash1[MD5_DIGEST_LENGTH], hash2[MD5_DIGEST_LENGTH]; 998 int cmp; 999 1000 s1 = kv1->k; 1001 s2 = kv2->k; 1002 1003 if (debug_sort) { 1004 bwsprintf(stdout, s1, "; k1=<", ">"); 1005 bwsprintf(stdout, s2, ", k2=<", ">"); 1006 } 1007 1008 if (s1 == s2) 1009 return (0); 1010 1011 if (kv1->hint->status == HS_INITIALIZED && 1012 kv2->hint->status == HS_INITIALIZED) { 1013 cmp = memcmp(kv1->hint->v.Rh.cached, 1014 kv2->hint->v.Rh.cached, sizeof(kv1->hint->v.Rh.cached)); 1015 if (cmp != 0) 1016 return (cmp); 1017 } 1018 1019 memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX)); 1020 memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX)); 1021 1022 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1)); 1023 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2)); 1024 1025 MD5Final(hash1, &ctx1); 1026 MD5Final(hash2, &ctx2); 1027 1028 if (kv1->hint->status == HS_UNINITIALIZED) 1029 randomcoll_init_hint(kv1, hash1); 1030 if (kv2->hint->status == HS_UNINITIALIZED) 1031 randomcoll_init_hint(kv2, hash2); 1032 1033 return (memcmp(hash1, hash2, sizeof(hash1))); 1034} 1035 1036/* 1037 * Implements version sort (-V). 1038 */ 1039static int 1040versioncoll(struct key_value *kv1, struct key_value *kv2, 1041 size_t offset __unused) 1042{ 1043 struct bwstring *s1, *s2; 1044 1045 s1 = kv1->k; 1046 s2 = kv2->k; 1047 1048 if (debug_sort) { 1049 bwsprintf(stdout, s1, "; k1=<", ">"); 1050 bwsprintf(stdout, s2, ", k2=<", ">"); 1051 } 1052 1053 if (s1 == s2) 1054 return (0); 1055 1056 return (vcmp(s1, s2)); 1057} 1058 1059/* 1060 * Check for minus infinity 1061 */ 1062static inline bool 1063huge_minus(double d, int err1) 1064{ 1065 1066 if (err1 == ERANGE) 1067 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL) 1068 return (+1); 1069 1070 return (0); 1071} 1072 1073/* 1074 * Check for plus infinity 1075 */ 1076static inline bool 1077huge_plus(double d, int err1) 1078{ 1079 1080 if (err1 == ERANGE) 1081 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL) 1082 return (+1); 1083 1084 return (0); 1085} 1086 1087/* 1088 * Check whether a function is a NAN 1089 */ 1090static bool 1091is_nan(double d) 1092{ 1093 1094 return ((d == NAN) || (isnan(d))); 1095} 1096 1097/* 1098 * Compare two NANs 1099 */ 1100static int 1101cmp_nans(double d1, double d2) 1102{ 1103 1104 if (d1 < d2) 1105 return (-1); 1106 if (d1 > d2) 1107 return (+1); 1108 return (0); 1109} 1110 1111/* 1112 * Implements general numeric sort (-g). 1113 */ 1114static int 1115gnumcoll(struct key_value *kv1, struct key_value *kv2, 1116 size_t offset __unused) 1117{ 1118 double d1, d2; 1119 int err1, err2; 1120 bool empty1, empty2, key1_read, key2_read; 1121 1122 d1 = d2 = 0; 1123 err1 = err2 = 0; 1124 key1_read = key2_read = false; 1125 1126 if (debug_sort) { 1127 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1128 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1129 } 1130 1131 if (kv1->hint->status == HS_UNINITIALIZED) { 1132 errno = 0; 1133 d1 = bwstod(kv1->k, &empty1); 1134 err1 = errno; 1135 1136 if (empty1) 1137 kv1->hint->v.gh.notnum = true; 1138 else if (err1 == 0) { 1139 kv1->hint->v.gh.d = d1; 1140 kv1->hint->v.gh.nan = is_nan(d1); 1141 kv1->hint->status = HS_INITIALIZED; 1142 } else 1143 kv1->hint->status = HS_ERROR; 1144 1145 key1_read = true; 1146 } 1147 1148 if (kv2->hint->status == HS_UNINITIALIZED) { 1149 errno = 0; 1150 d2 = bwstod(kv2->k, &empty2); 1151 err2 = errno; 1152 1153 if (empty2) 1154 kv2->hint->v.gh.notnum = true; 1155 else if (err2 == 0) { 1156 kv2->hint->v.gh.d = d2; 1157 kv2->hint->v.gh.nan = is_nan(d2); 1158 kv2->hint->status = HS_INITIALIZED; 1159 } else 1160 kv2->hint->status = HS_ERROR; 1161 1162 key2_read = true; 1163 } 1164 1165 if (kv1->hint->status == HS_INITIALIZED && 1166 kv2->hint->status == HS_INITIALIZED) { 1167 if (kv1->hint->v.gh.notnum) 1168 return ((kv2->hint->v.gh.notnum) ? 0 : -1); 1169 else if (kv2->hint->v.gh.notnum) 1170 return (+1); 1171 1172 if (kv1->hint->v.gh.nan) 1173 return ((kv2->hint->v.gh.nan) ? 1174 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : 1175 -1); 1176 else if (kv2->hint->v.gh.nan) 1177 return (+1); 1178 1179 d1 = kv1->hint->v.gh.d; 1180 d2 = kv2->hint->v.gh.d; 1181 1182 if (d1 < d2) 1183 return (-1); 1184 else if (d1 > d2) 1185 return (+1); 1186 else 1187 return (0); 1188 } 1189 1190 if (!key1_read) { 1191 errno = 0; 1192 d1 = bwstod(kv1->k, &empty1); 1193 err1 = errno; 1194 } 1195 1196 if (!key2_read) { 1197 errno = 0; 1198 d2 = bwstod(kv2->k, &empty2); 1199 err2 = errno; 1200 } 1201 1202 /* Non-value case: */ 1203 if (empty1) 1204 return (empty2 ? 0 : -1); 1205 else if (empty2) 1206 return (+1); 1207 1208 /* NAN case */ 1209 if (is_nan(d1)) 1210 return (is_nan(d2) ? cmp_nans(d1, d2) : -1); 1211 else if (is_nan(d2)) 1212 return (+1); 1213 1214 /* Infinities */ 1215 if (err1 == ERANGE || err2 == ERANGE) { 1216 /* Minus infinity case */ 1217 if (huge_minus(d1, err1)) { 1218 if (huge_minus(d2, err2)) { 1219 if (d1 < d2) 1220 return (-1); 1221 if (d1 > d2) 1222 return (+1); 1223 return (0); 1224 } else 1225 return (-1); 1226 1227 } else if (huge_minus(d2, err2)) { 1228 if (huge_minus(d1, err1)) { 1229 if (d1 < d2) 1230 return (-1); 1231 if (d1 > d2) 1232 return (+1); 1233 return (0); 1234 } else 1235 return (+1); 1236 } 1237 1238 /* Plus infinity case */ 1239 if (huge_plus(d1, err1)) { 1240 if (huge_plus(d2, err2)) { 1241 if (d1 < d2) 1242 return (-1); 1243 if (d1 > d2) 1244 return (+1); 1245 return (0); 1246 } else 1247 return (+1); 1248 } else if (huge_plus(d2, err2)) { 1249 if (huge_plus(d1, err1)) { 1250 if (d1 < d2) 1251 return (-1); 1252 if (d1 > d2) 1253 return (+1); 1254 return (0); 1255 } else 1256 return (-1); 1257 } 1258 } 1259 1260 if (d1 < d2) 1261 return (-1); 1262 if (d1 > d2) 1263 return (+1); 1264 1265 return (0); 1266} 1267 1268/* 1269 * Implements month sort (-M). 1270 */ 1271static int 1272monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused) 1273{ 1274 int val1, val2; 1275 bool key1_read, key2_read; 1276 1277 val1 = val2 = 0; 1278 key1_read = key2_read = false; 1279 1280 if (debug_sort) { 1281 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1282 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1283 } 1284 1285 if (kv1->hint->status == HS_UNINITIALIZED) { 1286 kv1->hint->v.Mh.m = bws_month_score(kv1->k); 1287 key1_read = true; 1288 kv1->hint->status = HS_INITIALIZED; 1289 } 1290 1291 if (kv2->hint->status == HS_UNINITIALIZED) { 1292 kv2->hint->v.Mh.m = bws_month_score(kv2->k); 1293 key2_read = true; 1294 kv2->hint->status = HS_INITIALIZED; 1295 } 1296 1297 if (kv1->hint->status == HS_INITIALIZED) { 1298 val1 = kv1->hint->v.Mh.m; 1299 key1_read = true; 1300 } 1301 1302 if (kv2->hint->status == HS_INITIALIZED) { 1303 val2 = kv2->hint->v.Mh.m; 1304 key2_read = true; 1305 } 1306 1307 if (!key1_read) 1308 val1 = bws_month_score(kv1->k); 1309 if (!key2_read) 1310 val2 = bws_month_score(kv2->k); 1311 1312 if (val1 == val2) { 1313 return (0); 1314 } 1315 if (val1 < val2) 1316 return (-1); 1317 return (+1); 1318} 1319