1238384Sjkim/*- 2238384Sjkim * SPDX-License-Identifier: BSD-2-Clause 3238384Sjkim * 4238384Sjkim * Copyright 2014 Garrett D'Amore <garrett@damore.org> 5238384Sjkim * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 6238384Sjkim * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> 7238384Sjkim * at Electronni Visti IA, Kiev, Ukraine. 8238384Sjkim * All rights reserved. 9238384Sjkim * 10238384Sjkim * Copyright (c) 2011 The FreeBSD Foundation 11238384Sjkim * 12238384Sjkim * Portions of this software were developed by David Chisnall 13238384Sjkim * under sponsorship from the FreeBSD Foundation. 14238384Sjkim * 15238384Sjkim * Redistribution and use in source and binary forms, with or without 16238384Sjkim * modification, are permitted provided that the following conditions 17238384Sjkim * are met: 18238384Sjkim * 1. Redistributions of source code must retain the above copyright 19238384Sjkim * notice, this list of conditions and the following disclaimer. 20238384Sjkim * 2. Redistributions in binary form must reproduce the above copyright 21238384Sjkim * notice, this list of conditions and the following disclaimer in the 22238384Sjkim * documentation and/or other materials provided with the distribution. 23238384Sjkim * 24238384Sjkim * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 25238384Sjkim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26238384Sjkim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27238384Sjkim * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE 28238384Sjkim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29238384Sjkim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30238384Sjkim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31238384Sjkim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32238384Sjkim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33238384Sjkim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34238384Sjkim * SUCH DAMAGE. 35238384Sjkim * 36238384Sjkim * Adapted to xlocale by John Marino <draco@marino.st> 37238384Sjkim */ 38238384Sjkim 39238384Sjkim#include "namespace.h" 40238384Sjkim 41238384Sjkim#include <sys/types.h> 42238384Sjkim#include <sys/stat.h> 43238384Sjkim#include <sys/mman.h> 44246772Sjkim 45246772Sjkim#include <assert.h> 46238384Sjkim#include <stdio.h> 47238384Sjkim#include <stdlib.h> 48238384Sjkim#include <string.h> 49238384Sjkim#include <wchar.h> 50238384Sjkim#include <errno.h> 51238384Sjkim#include <unistd.h> 52238384Sjkim#include <fcntl.h> 53238384Sjkim#include "un-namespace.h" 54238384Sjkim 55238384Sjkim#include "collate.h" 56238384Sjkim#include "setlocale.h" 57238384Sjkim#include "ldpart.h" 58238384Sjkim#include "libc_private.h" 59238384Sjkim 60238384Sjkimstruct xlocale_collate __xlocale_global_collate = { 61238384Sjkim {{0}, "C"}, 1, 0, 0, 0 62238384Sjkim}; 63238384Sjkim 64238384Sjkimstruct xlocale_collate __xlocale_C_collate = { 65238384Sjkim {{0}, "C"}, 1, 0, 0, 0 66238384Sjkim}; 67238384Sjkim 68238384Sjkimstruct xlocale_collate __xlocale_POSIX_collate = { 69238384Sjkim {{0}, "POSIX"}, 1, 0, 0, 0 70238384Sjkim}; 71238384Sjkim 72238384Sjkimstruct xlocale_collate __xlocale_CUTF8_collate = { 73238384Sjkim {{0}, "C.UTF-8"}, 1, 0, 0, 0 74238384Sjkim}; 75238384Sjkim 76238384Sjkimstatic int 77238384Sjkim__collate_load_tables_l(const char *encoding, struct xlocale_collate *table); 78238384Sjkim 79238384Sjkimstatic void 80238384Sjkimdestruct_collate(void *t) 81238384Sjkim{ 82238384Sjkim struct xlocale_collate *table = t; 83238384Sjkim if (table->map && (table->maplen > 0)) { 84238384Sjkim (void) munmap(table->map, table->maplen); 85238384Sjkim } 86238384Sjkim free(t); 87238384Sjkim} 88238384Sjkim 89238384Sjkimvoid * 90238384Sjkim__collate_load(const char *encoding, __unused locale_t unused) 91238384Sjkim{ 92238384Sjkim if (strcmp(encoding, "C") == 0) 93238384Sjkim return (&__xlocale_C_collate); 94238384Sjkim else if (strcmp(encoding, "POSIX") == 0) 95238384Sjkim return (&__xlocale_POSIX_collate); 96238384Sjkim else if (strcmp(encoding, "C.UTF-8") == 0) 97238384Sjkim return (&__xlocale_CUTF8_collate); 98238384Sjkim 99238384Sjkim struct xlocale_collate *table = calloc(sizeof(struct xlocale_collate), 100238384Sjkim 1); 101238384Sjkim if (table == NULL) 102238384Sjkim return (NULL); 103238384Sjkim table->header.header.destructor = destruct_collate; 104238384Sjkim 105238384Sjkim /* 106238384Sjkim * FIXME: Make sure that _LDP_CACHE is never returned. We 107238384Sjkim * should be doing the caching outside of this section. 108238384Sjkim */ 109238384Sjkim if (__collate_load_tables_l(encoding, table) != _LDP_LOADED) { 110238384Sjkim xlocale_release(table); 111238384Sjkim return (NULL); 112238384Sjkim } 113238384Sjkim return (table); 114238384Sjkim} 115238384Sjkim 116238384Sjkim/** 117238384Sjkim * Load the collation tables for the specified encoding into the global table. 118238384Sjkim */ 119238384Sjkimint 120238384Sjkim__collate_load_tables(const char *encoding) 121238384Sjkim{ 122238384Sjkim 123238384Sjkim return (__collate_load_tables_l(encoding, &__xlocale_global_collate)); 124238384Sjkim} 125238384Sjkim 126238384Sjkimstatic int 127238384Sjkim__collate_load_tables_l(const char *encoding, struct xlocale_collate *table) 128238384Sjkim{ 129238384Sjkim int i, chains, z; 130238384Sjkim char *buf; 131238384Sjkim char *TMP; 132238384Sjkim char *map; 133238384Sjkim collate_info_t *info; 134238384Sjkim struct stat sbuf; 135238384Sjkim int fd; 136238384Sjkim 137238384Sjkim table->__collate_load_error = 1; 138238384Sjkim 139238384Sjkim /* 'encoding' must be already checked. */ 140238384Sjkim if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0 || 141238384Sjkim strncmp(encoding, "C.", 2) == 0) { 142238384Sjkim return (_LDP_CACHE); 143238384Sjkim } 144238384Sjkim 145238384Sjkim if (asprintf(&buf, "%s/%s/LC_COLLATE", _PathLocale, encoding) == -1) 146238384Sjkim return (_LDP_ERROR); 147238384Sjkim 148238384Sjkim if ((fd = _open(buf, O_RDONLY | O_CLOEXEC)) < 0) { 149238384Sjkim free(buf); 150238384Sjkim return (_LDP_ERROR); 151238384Sjkim } 152238384Sjkim free(buf); 153238384Sjkim if (_fstat(fd, &sbuf) < 0) { 154238384Sjkim (void) _close(fd); 155238384Sjkim return (_LDP_ERROR); 156238384Sjkim } 157238384Sjkim if (sbuf.st_size < (COLLATE_FMT_VERSION_LEN + 158238384Sjkim XLOCALE_DEF_VERSION_LEN + 159238384Sjkim sizeof (*info))) { 160238384Sjkim (void) _close(fd); 161238384Sjkim errno = EINVAL; 162238384Sjkim return (_LDP_ERROR); 163238384Sjkim } 164238384Sjkim map = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); 165238384Sjkim (void) _close(fd); 166238384Sjkim if ((TMP = map) == MAP_FAILED) { 167238384Sjkim return (_LDP_ERROR); 168238384Sjkim } 169238384Sjkim 170238384Sjkim if (strncmp(TMP, COLLATE_FMT_VERSION, COLLATE_FMT_VERSION_LEN) != 0) { 171238384Sjkim (void) munmap(map, sbuf.st_size); 172238384Sjkim errno = EINVAL; 173238384Sjkim return (_LDP_ERROR); 174238384Sjkim } 175238384Sjkim TMP += COLLATE_FMT_VERSION_LEN; 176238384Sjkim strlcat(table->header.version, TMP, sizeof (table->header.version)); 177238384Sjkim TMP += XLOCALE_DEF_VERSION_LEN; 178238384Sjkim 179238384Sjkim info = (void *)TMP; 180238384Sjkim TMP += sizeof (*info); 181238384Sjkim 182238384Sjkim if ((info->directive_count < 1) || 183238384Sjkim (info->directive_count >= COLL_WEIGHTS_MAX) || 184238384Sjkim ((chains = info->chain_count) < 0)) { 185238384Sjkim (void) munmap(map, sbuf.st_size); 186238384Sjkim errno = EINVAL; 187238384Sjkim return (_LDP_ERROR); 188238384Sjkim } 189238384Sjkim 190238384Sjkim i = (sizeof (collate_char_t) * (UCHAR_MAX + 1)) + 191238384Sjkim (sizeof (collate_chain_t) * chains) + 192238384Sjkim (sizeof (collate_large_t) * info->large_count); 193238384Sjkim for (z = 0; z < info->directive_count; z++) { 194238384Sjkim i += sizeof (collate_subst_t) * info->subst_count[z]; 195238384Sjkim } 196238384Sjkim if (i != (sbuf.st_size - (TMP - map))) { 197238384Sjkim (void) munmap(map, sbuf.st_size); 198238384Sjkim errno = EINVAL; 199238384Sjkim return (_LDP_ERROR); 200238384Sjkim } 201238384Sjkim 202238384Sjkim if (table->map && (table->maplen > 0)) { 203238384Sjkim (void) munmap(table->map, table->maplen); 204238384Sjkim } 205238384Sjkim table->map = map; 206238384Sjkim table->maplen = sbuf.st_size; 207238384Sjkim table->info = info; 208238384Sjkim table->char_pri_table = (void *)TMP; 209238384Sjkim TMP += sizeof (collate_char_t) * (UCHAR_MAX + 1); 210238384Sjkim 211238384Sjkim for (z = 0; z < info->directive_count; z++) { 212238384Sjkim if (info->subst_count[z] > 0) { 213238384Sjkim table->subst_table[z] = (void *)TMP; 214238384Sjkim TMP += info->subst_count[z] * sizeof (collate_subst_t); 215238384Sjkim } else { 216238384Sjkim table->subst_table[z] = NULL; 217238384Sjkim } 218238384Sjkim } 219238384Sjkim 220238384Sjkim if (chains > 0) { 221238384Sjkim table->chain_pri_table = (void *)TMP; 222238384Sjkim TMP += chains * sizeof (collate_chain_t); 223238384Sjkim } else 224238384Sjkim table->chain_pri_table = NULL; 225238384Sjkim if (info->large_count > 0) 226238384Sjkim table->large_pri_table = (void *)TMP; 227238384Sjkim else 228238384Sjkim table->large_pri_table = NULL; 229238384Sjkim 230238384Sjkim table->__collate_load_error = 0; 231238384Sjkim return (_LDP_LOADED); 232238384Sjkim} 233238384Sjkim 234238384Sjkimstatic const int32_t * 235238384Sjkimsubstsearch(struct xlocale_collate *table, const wchar_t key, int pass) 236238384Sjkim{ 237238384Sjkim const collate_subst_t *p; 238238384Sjkim int n = table->info->subst_count[pass]; 239238384Sjkim 240238384Sjkim if (n == 0) 241238384Sjkim return (NULL); 242238384Sjkim 243238384Sjkim if (pass >= table->info->directive_count) 244238384Sjkim return (NULL); 245238384Sjkim 246238384Sjkim if (!(key & COLLATE_SUBST_PRIORITY)) 247238384Sjkim return (NULL); 248238384Sjkim 249238384Sjkim p = table->subst_table[pass] + (key & ~COLLATE_SUBST_PRIORITY); 250238384Sjkim assert(p->key == key); 251238384Sjkim 252238384Sjkim return (p->pri); 253238384Sjkim} 254238384Sjkim 255238384Sjkimstatic collate_chain_t * 256238384Sjkimchainsearch(struct xlocale_collate *table, const wchar_t *key, int *len) 257238384Sjkim{ 258238384Sjkim int low = 0; 259238384Sjkim int high = table->info->chain_count - 1; 260238384Sjkim int next, compar, l; 261238384Sjkim collate_chain_t *p; 262238384Sjkim collate_chain_t *tab = table->chain_pri_table; 263238384Sjkim 264238384Sjkim if (high < 0) 265238384Sjkim return (NULL); 266238384Sjkim 267238384Sjkim while (low <= high) { 268238384Sjkim next = (low + high) / 2; 269238384Sjkim p = tab + next; 270238384Sjkim compar = *key - *p->str; 271238384Sjkim if (compar == 0) { 272238384Sjkim l = wcsnlen(p->str, COLLATE_STR_LEN); 273238384Sjkim compar = wcsncmp(key, p->str, l); 274238384Sjkim if (compar == 0) { 275238384Sjkim *len = l; 276238384Sjkim return (p); 277238384Sjkim } 278238384Sjkim } 279238384Sjkim if (compar > 0) 280238384Sjkim low = next + 1; 281238384Sjkim else 282238384Sjkim high = next - 1; 283238384Sjkim } 284238384Sjkim return (NULL); 285238384Sjkim} 286238384Sjkim 287238384Sjkimstatic collate_large_t * 288238384Sjkimlargesearch(struct xlocale_collate *table, const wchar_t key) 289238384Sjkim{ 290238384Sjkim int low = 0; 291238384Sjkim int high = table->info->large_count - 1; 292238384Sjkim int next, compar; 293238384Sjkim collate_large_t *p; 294238384Sjkim collate_large_t *tab = table->large_pri_table; 295238384Sjkim 296238384Sjkim if (high < 0) 297238384Sjkim return (NULL); 298238384Sjkim 299238384Sjkim while (low <= high) { 300238384Sjkim next = (low + high) / 2; 301238384Sjkim p = tab + next; 302238384Sjkim compar = key - p->val; 303238384Sjkim if (compar == 0) 304238384Sjkim return (p); 305238384Sjkim if (compar > 0) 306238384Sjkim low = next + 1; 307238384Sjkim else 308238384Sjkim high = next - 1; 309238384Sjkim } 310238384Sjkim return (NULL); 311238384Sjkim} 312238384Sjkim 313238384Sjkimvoid 314238384Sjkim_collate_lookup(struct xlocale_collate *table, const wchar_t *t, int *len, 315238384Sjkim int *pri, int which, const int **state) 316238384Sjkim{ 317238384Sjkim collate_chain_t *p2; 318238384Sjkim collate_large_t *match; 319238384Sjkim int p, l; 320238384Sjkim const int *sptr; 321238384Sjkim 322238384Sjkim /* 323238384Sjkim * If this is the "last" pass for the UNDEFINED, then 324238384Sjkim * we just return the priority itself. 325238384Sjkim */ 326238384Sjkim if (which >= table->info->directive_count) { 327238384Sjkim *pri = *t; 328238384Sjkim *len = 1; 329238384Sjkim *state = NULL; 330238384Sjkim return; 331238384Sjkim } 332238384Sjkim 333238384Sjkim /* 334238384Sjkim * If we have remaining substitution data from a previous 335238384Sjkim * call, consume it first. 336238384Sjkim */ 337238384Sjkim if ((sptr = *state) != NULL) { 338238384Sjkim *pri = *sptr; 339238384Sjkim sptr++; 340238384Sjkim if ((sptr == *state) || (sptr == NULL)) 341238384Sjkim *state = NULL; 342238384Sjkim else 343238384Sjkim *state = sptr; 344238384Sjkim *len = 0; 345238384Sjkim return; 346238384Sjkim } 347238384Sjkim 348238384Sjkim /* No active substitutions */ 349238384Sjkim *len = 1; 350238384Sjkim 351238384Sjkim /* 352238384Sjkim * Check for composites such as diphthongs that collate as a 353238384Sjkim * single element (aka chains or collating-elements). 354238384Sjkim */ 355238384Sjkim if (((p2 = chainsearch(table, t, &l)) != NULL) && 356238384Sjkim ((p = p2->pri[which]) >= 0)) { 357238384Sjkim 358238384Sjkim *len = l; 359238384Sjkim *pri = p; 360238384Sjkim 361238384Sjkim } else if (*t <= UCHAR_MAX) { 362238384Sjkim 363238384Sjkim /* 364238384Sjkim * Character is a small (8-bit) character. 365238384Sjkim * We just look these up directly for speed. 366238384Sjkim */ 367238384Sjkim *pri = table->char_pri_table[*t].pri[which]; 368238384Sjkim 369238384Sjkim } else if ((table->info->large_count > 0) && 370238384Sjkim ((match = largesearch(table, *t)) != NULL)) { 371238384Sjkim 372238384Sjkim /* 373238384Sjkim * Character was found in the extended table. 374238384Sjkim */ 375238384Sjkim *pri = match->pri.pri[which]; 376238384Sjkim 377238384Sjkim } else { 378238384Sjkim /* 379238384Sjkim * Character lacks a specific definition. 380238384Sjkim */ 381238384Sjkim if (table->info->directive[which] & DIRECTIVE_UNDEFINED) { 382238384Sjkim /* Mask off sign bit to prevent ordering confusion. */ 383238384Sjkim *pri = (*t & COLLATE_MAX_PRIORITY); 384238384Sjkim } else { 385238384Sjkim *pri = table->info->undef_pri[which]; 386238384Sjkim } 387238384Sjkim /* No substitutions for undefined characters! */ 388238384Sjkim return; 389238384Sjkim } 390238384Sjkim 391238384Sjkim /* 392238384Sjkim * Try substituting (expanding) the character. We are 393238384Sjkim * currently doing this *after* the chain compression. I 394238384Sjkim * think it should not matter, but this way might be slightly 395238384Sjkim * faster. 396238384Sjkim * 397238384Sjkim * We do this after the priority search, as this will help us 398238384Sjkim * to identify a single key value. In order for this to work, 399238384Sjkim * its important that the priority assigned to a given element 400238384Sjkim * to be substituted be unique for that level. The localedef 401238384Sjkim * code ensures this for us. 402238384Sjkim */ 403238384Sjkim if ((sptr = substsearch(table, *pri, which)) != NULL) { 404238384Sjkim if ((*pri = *sptr) > 0) { 405238384Sjkim sptr++; 406238384Sjkim *state = *sptr ? sptr : NULL; 407238384Sjkim } 408238384Sjkim } 409238384Sjkim 410238384Sjkim} 411238384Sjkim 412238384Sjkim/* 413238384Sjkim * This is the meaty part of wcsxfrm & strxfrm. Note that it does 414238384Sjkim * NOT NULL terminate. That is left to the caller. 415238384Sjkim */ 416238384Sjkimsize_t 417238384Sjkim_collate_wxfrm(struct xlocale_collate *table, const wchar_t *src, wchar_t *xf, 418238384Sjkim size_t room) 419238384Sjkim{ 420238384Sjkim int pri; 421238384Sjkim int len; 422238384Sjkim const wchar_t *t; 423238384Sjkim wchar_t *tr = NULL; 424238384Sjkim int direc; 425238384Sjkim int pass; 426238384Sjkim const int32_t *state; 427238384Sjkim size_t want = 0; 428238384Sjkim size_t need = 0; 429238384Sjkim int ndir = table->info->directive_count; 430238384Sjkim 431238384Sjkim assert(src); 432238384Sjkim 433238384Sjkim for (pass = 0; pass <= ndir; pass++) { 434238384Sjkim 435238384Sjkim state = NULL; 436238384Sjkim 437238384Sjkim if (pass != 0) { 438238384Sjkim /* insert level separator from the previous pass */ 439238384Sjkim if (room) { 440238384Sjkim *xf++ = 1; 441238384Sjkim room--; 442238384Sjkim } 443238384Sjkim want++; 444238384Sjkim } 445238384Sjkim 446238384Sjkim /* special pass for undefined */ 447238384Sjkim if (pass == ndir) { 448238384Sjkim direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; 449238384Sjkim } else { 450238384Sjkim direc = table->info->directive[pass]; 451238384Sjkim } 452238384Sjkim 453238384Sjkim t = src; 454238384Sjkim 455238384Sjkim if (direc & DIRECTIVE_BACKWARD) { 456238384Sjkim wchar_t *bp, *fp, c; 457238384Sjkim free(tr); 458238384Sjkim if ((tr = wcsdup(t)) == NULL) { 459238384Sjkim errno = ENOMEM; 460238384Sjkim goto fail; 461238384Sjkim } 462238384Sjkim bp = tr; 463238384Sjkim fp = tr + wcslen(tr) - 1; 464238384Sjkim while (bp < fp) { 465238384Sjkim c = *bp; 466238384Sjkim *bp++ = *fp; 467238384Sjkim *fp-- = c; 468238384Sjkim } 469238384Sjkim t = (const wchar_t *)tr; 470238384Sjkim } 471238384Sjkim 472238384Sjkim if (direc & DIRECTIVE_POSITION) { 473238384Sjkim while (*t || state) { 474238384Sjkim _collate_lookup(table, t, &len, &pri, pass, &state); 475238384Sjkim t += len; 476238384Sjkim if (pri <= 0) { 477238384Sjkim if (pri < 0) { 478238384Sjkim errno = EINVAL; 479238384Sjkim goto fail; 480238384Sjkim } 481238384Sjkim state = NULL; 482238384Sjkim pri = COLLATE_MAX_PRIORITY; 483238384Sjkim } 484238384Sjkim if (room) { 485238384Sjkim *xf++ = pri; 486238384Sjkim room--; 487238384Sjkim } 488238384Sjkim want++; 489238384Sjkim need = want; 490238384Sjkim } 491238384Sjkim } else { 492238384Sjkim while (*t || state) { 493238384Sjkim _collate_lookup(table, t, &len, &pri, pass, &state); 494238384Sjkim t += len; 495238384Sjkim if (pri <= 0) { 496238384Sjkim if (pri < 0) { 497238384Sjkim errno = EINVAL; 498238384Sjkim goto fail; 499238384Sjkim } 500238384Sjkim state = NULL; 501238384Sjkim continue; 502238384Sjkim } 503238384Sjkim if (room) { 504238384Sjkim *xf++ = pri; 505238384Sjkim room--; 506238384Sjkim } 507238384Sjkim want++; 508238384Sjkim need = want; 509238384Sjkim } 510238384Sjkim } 511238384Sjkim } 512238384Sjkim free(tr); 513238384Sjkim return (need); 514238384Sjkim 515238384Sjkimfail: 516238384Sjkim free(tr); 517238384Sjkim return ((size_t)(-1)); 518238384Sjkim} 519238384Sjkim 520238384Sjkim/* 521238384Sjkim * In the non-POSIX case, we transform each character into a string of 522238384Sjkim * characters representing the character's priority. Since char is usually 523238384Sjkim * signed, we are limited by 7 bits per byte. To avoid zero, we need to add 524238384Sjkim * XFRM_OFFSET, so we can't use a full 7 bits. For simplicity, we choose 6 525238384Sjkim * bits per byte. 526238384Sjkim * 527238384Sjkim * It turns out that we sometimes have real priorities that are 528238384Sjkim * 31-bits wide. (But: be careful using priorities where the high 529238384Sjkim * order bit is set -- i.e. the priority is negative. The sort order 530238384Sjkim * may be surprising!) 531238384Sjkim * 532238384Sjkim * TODO: This would be a good area to optimize somewhat. It turns out 533238384Sjkim * that real prioririties *except for the last UNDEFINED pass* are generally 534238384Sjkim * very small. We need the localedef code to precalculate the max 535238384Sjkim * priority for us, and ideally also give us a mask, and then we could 536238384Sjkim * severely limit what we expand to. 537238384Sjkim */ 538238384Sjkim#define XFRM_BYTES 6 539238384Sjkim#define XFRM_OFFSET ('0') /* make all printable characters */ 540238384Sjkim#define XFRM_SHIFT 6 541238384Sjkim#define XFRM_MASK ((1 << XFRM_SHIFT) - 1) 542238384Sjkim#define XFRM_SEP ('.') /* chosen to be less than XFRM_OFFSET */ 543238384Sjkim 544238384Sjkimstatic int 545238384Sjkimxfrm(struct xlocale_collate *table, unsigned char *p, int pri, int pass) 546238384Sjkim{ 547238384Sjkim /* we use unsigned to ensure zero fill on right shift */ 548238384Sjkim uint32_t val = (uint32_t)table->info->pri_count[pass]; 549238384Sjkim int nc = 0; 550238384Sjkim 551238384Sjkim while (val) { 552238384Sjkim *p = (pri & XFRM_MASK) + XFRM_OFFSET; 553238384Sjkim pri >>= XFRM_SHIFT; 554238384Sjkim val >>= XFRM_SHIFT; 555238384Sjkim p++; 556238384Sjkim nc++; 557238384Sjkim } 558238384Sjkim return (nc); 559238384Sjkim} 560238384Sjkim 561238384Sjkimsize_t 562238384Sjkim_collate_sxfrm(struct xlocale_collate *table, const wchar_t *src, char *xf, 563238384Sjkim size_t room) 564238384Sjkim{ 565238384Sjkim int pri; 566238384Sjkim int len; 567238384Sjkim const wchar_t *t; 568238384Sjkim wchar_t *tr = NULL; 569238384Sjkim int direc; 570238384Sjkim int pass; 571238384Sjkim const int32_t *state; 572238384Sjkim size_t want = 0; 573238384Sjkim size_t need = 0; 574238384Sjkim int b; 575238384Sjkim uint8_t buf[XFRM_BYTES]; 576238384Sjkim int ndir = table->info->directive_count; 577238384Sjkim 578238384Sjkim assert(src); 579238384Sjkim 580238384Sjkim for (pass = 0; pass <= ndir; pass++) { 581238384Sjkim 582238384Sjkim state = NULL; 583238384Sjkim 584238384Sjkim if (pass != 0) { 585238384Sjkim /* insert level separator from the previous pass */ 586238384Sjkim if (room) { 587238384Sjkim *xf++ = XFRM_SEP; 588238384Sjkim room--; 589238384Sjkim } 590238384Sjkim want++; 591 } 592 593 /* special pass for undefined */ 594 if (pass == ndir) { 595 direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; 596 } else { 597 direc = table->info->directive[pass]; 598 } 599 600 t = src; 601 602 if (direc & DIRECTIVE_BACKWARD) { 603 wchar_t *bp, *fp, c; 604 free(tr); 605 if ((tr = wcsdup(t)) == NULL) { 606 errno = ENOMEM; 607 goto fail; 608 } 609 bp = tr; 610 fp = tr + wcslen(tr) - 1; 611 while (bp < fp) { 612 c = *bp; 613 *bp++ = *fp; 614 *fp-- = c; 615 } 616 t = (const wchar_t *)tr; 617 } 618 619 if (direc & DIRECTIVE_POSITION) { 620 while (*t || state) { 621 622 _collate_lookup(table, t, &len, &pri, pass, &state); 623 t += len; 624 if (pri <= 0) { 625 if (pri < 0) { 626 errno = EINVAL; 627 goto fail; 628 } 629 state = NULL; 630 pri = COLLATE_MAX_PRIORITY; 631 } 632 633 b = xfrm(table, buf, pri, pass); 634 want += b; 635 if (room) { 636 while (b) { 637 b--; 638 if (room) { 639 *xf++ = buf[b]; 640 room--; 641 } 642 } 643 } 644 need = want; 645 } 646 } else { 647 while (*t || state) { 648 _collate_lookup(table, t, &len, &pri, pass, &state); 649 t += len; 650 if (pri <= 0) { 651 if (pri < 0) { 652 errno = EINVAL; 653 goto fail; 654 } 655 state = NULL; 656 continue; 657 } 658 659 b = xfrm(table, buf, pri, pass); 660 want += b; 661 if (room) { 662 663 while (b) { 664 b--; 665 if (room) { 666 *xf++ = buf[b]; 667 room--; 668 } 669 } 670 } 671 need = want; 672 } 673 } 674 } 675 free(tr); 676 return (need); 677 678fail: 679 free(tr); 680 return ((size_t)(-1)); 681} 682 683/* 684 * __collate_equiv_value returns the primary collation value for the given 685 * collating symbol specified by str and len. Zero or negative is returned 686 * if the collating symbol was not found. This function is used by bracket 687 * code in the TRE regex library. 688 */ 689int 690__collate_equiv_value(locale_t locale, const wchar_t *str, size_t len) 691{ 692 int32_t e; 693 694 if (len < 1 || len >= COLLATE_STR_LEN) 695 return (-1); 696 697 FIX_LOCALE(locale); 698 struct xlocale_collate *table = 699 (struct xlocale_collate*)locale->components[XLC_COLLATE]; 700 701 if (table->__collate_load_error) 702 return ((len == 1 && *str <= UCHAR_MAX) ? *str : -1); 703 704 if (len == 1) { 705 e = -1; 706 if (*str <= UCHAR_MAX) 707 e = table->char_pri_table[*str].pri[0]; 708 else if (table->info->large_count > 0) { 709 collate_large_t *match_large; 710 match_large = largesearch(table, *str); 711 if (match_large) 712 e = match_large->pri.pri[0]; 713 } 714 if (e == 0) 715 return (1); 716 return (e > 0 ? e : 0); 717 } 718 if (table->info->chain_count > 0) { 719 wchar_t name[COLLATE_STR_LEN]; 720 collate_chain_t *match_chain; 721 int clen; 722 723 wcsncpy (name, str, len); 724 name[len] = 0; 725 match_chain = chainsearch(table, name, &clen); 726 if (match_chain) { 727 e = match_chain->pri[0]; 728 if (e == 0) 729 return (1); 730 return (e < 0 ? -e : e); 731 } 732 } 733 return (0); 734} 735