1219019Sgabor/* $FreeBSD$ */ 2264497Stijl/* $NetBSD: citrus_iso2022.c,v 1.20 2010/12/07 22:01:45 joerg Exp $ */ 3219019Sgabor 4219019Sgabor/*- 5219019Sgabor * Copyright (c)1999, 2002 Citrus Project, 6219019Sgabor * All rights reserved. 7219019Sgabor * 8219019Sgabor * Redistribution and use in source and binary forms, with or without 9219019Sgabor * modification, are permitted provided that the following conditions 10219019Sgabor * are met: 11219019Sgabor * 1. Redistributions of source code must retain the above copyright 12219019Sgabor * notice, this list of conditions and the following disclaimer. 13219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright 14219019Sgabor * notice, this list of conditions and the following disclaimer in the 15219019Sgabor * documentation and/or other materials provided with the distribution. 16219019Sgabor * 17219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20219019Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27219019Sgabor * SUCH DAMAGE. 28219019Sgabor * 29219019Sgabor * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $ 30219019Sgabor */ 31219019Sgabor 32219019Sgabor#include <sys/cdefs.h> 33219019Sgabor#include <sys/types.h> 34219019Sgabor 35219019Sgabor#include <assert.h> 36219019Sgabor#include <errno.h> 37219019Sgabor#include <limits.h> 38219019Sgabor#include <stdbool.h> 39219019Sgabor#include <stddef.h> 40219019Sgabor#include <stdio.h> 41219019Sgabor#include <stdlib.h> 42219019Sgabor#include <string.h> 43219019Sgabor#include <wchar.h> 44219019Sgabor 45219019Sgabor#include "citrus_namespace.h" 46219019Sgabor#include "citrus_types.h" 47219019Sgabor#include "citrus_module.h" 48219019Sgabor#include "citrus_stdenc.h" 49219019Sgabor#include "citrus_iso2022.h" 50219019Sgabor 51219019Sgabor 52219019Sgabor/* ---------------------------------------------------------------------- 53219019Sgabor * private stuffs used by templates 54219019Sgabor */ 55219019Sgabor 56219019Sgabor 57219019Sgabor/* 58219019Sgabor * wchar_t mappings: 59219019Sgabor * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx 60219019Sgabor * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx 61219019Sgabor * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx 62219019Sgabor * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx 63219019Sgabor * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx 64219019Sgabor * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx 65219019Sgabor * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx 66219019Sgabor * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx 67219019Sgabor * 94x94 charset (ESC & V ESC $ ( F) 68219019Sgabor * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx 69219019Sgabor * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx 70219019Sgabor * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx 71219019Sgabor * reserved for UCS4 co-existence (UCS4 is 31bit encoding thanks to mohta bit) 72219019Sgabor * 1xxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 73219019Sgabor */ 74219019Sgabor 75219019Sgabor#define CS94 (0U) 76219019Sgabor#define CS96 (1U) 77219019Sgabor#define CS94MULTI (2U) 78219019Sgabor#define CS96MULTI (3U) 79219019Sgabor 80219019Sgabortypedef struct { 81264497Stijl unsigned char type; 82264497Stijl unsigned char final; 83219019Sgabor unsigned char interm; 84219019Sgabor unsigned char vers; 85219019Sgabor} _ISO2022Charset; 86219019Sgabor 87219019Sgaborstatic const _ISO2022Charset ascii = { CS94, 'B', '\0', '\0' }; 88219019Sgaborstatic const _ISO2022Charset iso88591 = { CS96, 'A', '\0', '\0' }; 89219019Sgabor 90219019Sgabortypedef struct { 91219019Sgabor _ISO2022Charset g[4]; 92219019Sgabor /* need 3 bits to hold -1, 0, ..., 3 */ 93219019Sgabor int gl:3, 94219019Sgabor gr:3, 95219019Sgabor singlegl:3, 96219019Sgabor singlegr:3; 97219019Sgabor char ch[7]; /* longest escape sequence (ESC & V ESC $ ( F) */ 98219019Sgabor size_t chlen; 99219019Sgabor int flags; 100219019Sgabor#define _ISO2022STATE_FLAG_INITIALIZED 1 101219019Sgabor} _ISO2022State; 102219019Sgabor 103219019Sgabortypedef struct { 104219019Sgabor _ISO2022Charset *recommend[4]; 105219019Sgabor size_t recommendsize[4]; 106219019Sgabor _ISO2022Charset initg[4]; 107219019Sgabor int maxcharset; 108219019Sgabor int flags; 109219019Sgabor#define F_8BIT 0x0001 110219019Sgabor#define F_NOOLD 0x0002 111219019Sgabor#define F_SI 0x0010 /*0F*/ 112219019Sgabor#define F_SO 0x0020 /*0E*/ 113219019Sgabor#define F_LS0 0x0010 /*0F*/ 114219019Sgabor#define F_LS1 0x0020 /*0E*/ 115219019Sgabor#define F_LS2 0x0040 /*ESC n*/ 116219019Sgabor#define F_LS3 0x0080 /*ESC o*/ 117219019Sgabor#define F_LS1R 0x0100 /*ESC ~*/ 118219019Sgabor#define F_LS2R 0x0200 /*ESC }*/ 119219019Sgabor#define F_LS3R 0x0400 /*ESC |*/ 120219019Sgabor#define F_SS2 0x0800 /*ESC N*/ 121219019Sgabor#define F_SS3 0x1000 /*ESC O*/ 122219019Sgabor#define F_SS2R 0x2000 /*8E*/ 123219019Sgabor#define F_SS3R 0x4000 /*8F*/ 124219019Sgabor} _ISO2022EncodingInfo; 125219019Sgabor 126219019Sgabor#define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 127219019Sgabor#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 128219019Sgabor 129219019Sgabor#define _FUNCNAME(m) _citrus_ISO2022_##m 130219019Sgabor#define _ENCODING_INFO _ISO2022EncodingInfo 131219019Sgabor#define _ENCODING_STATE _ISO2022State 132219019Sgabor#define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 133219019Sgabor#define _ENCODING_IS_STATE_DEPENDENT 1 134219019Sgabor#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \ 135219019Sgabor (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED)) 136219019Sgabor 137219019Sgabor 138219019Sgabor#define _ISO2022INVALID (wchar_t)-1 139219019Sgabor 140219019Sgaborstatic __inline bool isc0(__uint8_t x) 141219019Sgabor{ 142219019Sgabor 143219019Sgabor return ((x & 0x1f) == x); 144219019Sgabor} 145219019Sgabor 146219019Sgaborstatic __inline bool isc1(__uint8_t x) 147219019Sgabor{ 148219019Sgabor 149219019Sgabor return (0x80 <= x && x <= 0x9f); 150219019Sgabor} 151219019Sgabor 152219019Sgaborstatic __inline bool iscntl(__uint8_t x) 153219019Sgabor{ 154219019Sgabor 155219019Sgabor return (isc0(x) || isc1(x) || x == 0x7f); 156219019Sgabor} 157219019Sgabor 158219019Sgaborstatic __inline bool is94(__uint8_t x) 159219019Sgabor{ 160219019Sgabor 161219019Sgabor return (0x21 <= x && x <= 0x7e); 162219019Sgabor} 163219019Sgabor 164219019Sgaborstatic __inline bool is96(__uint8_t x) 165219019Sgabor{ 166219019Sgabor 167219019Sgabor return (0x20 <= x && x <= 0x7f); 168219019Sgabor} 169219019Sgabor 170219019Sgaborstatic __inline bool isecma(__uint8_t x) 171219019Sgabor{ 172219019Sgabor 173219019Sgabor return (0x30 <= x && x <= 0x7f); 174219019Sgabor} 175219019Sgabor 176219019Sgaborstatic __inline bool isinterm(__uint8_t x) 177219019Sgabor{ 178219019Sgabor 179219019Sgabor return (0x20 <= x && x <= 0x2f); 180219019Sgabor} 181219019Sgabor 182219019Sgaborstatic __inline bool isthree(__uint8_t x) 183219019Sgabor{ 184219019Sgabor 185219019Sgabor return (0x60 <= x && x <= 0x6f); 186219019Sgabor} 187219019Sgabor 188219019Sgaborstatic __inline int 189219019Sgaborgetcs(const char * __restrict p, _ISO2022Charset * __restrict cs) 190219019Sgabor{ 191219019Sgabor 192219019Sgabor if (!strncmp(p, "94$", 3) && p[3] && !p[4]) { 193219019Sgabor cs->final = (unsigned char)(p[3] & 0xff); 194219019Sgabor cs->interm = '\0'; 195219019Sgabor cs->vers = '\0'; 196219019Sgabor cs->type = CS94MULTI; 197219019Sgabor } else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) { 198219019Sgabor cs->final = (unsigned char)(p[3] & 0xff); 199219019Sgabor cs->interm = '\0'; 200219019Sgabor cs->vers = '\0'; 201219019Sgabor cs->type = CS96MULTI; 202219019Sgabor } else if (!strncmp(p, "94", 2) && p[2] && !p[3]) { 203219019Sgabor cs->final = (unsigned char)(p[2] & 0xff); 204219019Sgabor cs->interm = '\0'; 205219019Sgabor cs->vers = '\0'; 206219019Sgabor cs->type = CS94; 207219019Sgabor } else if (!strncmp(p, "96", 2) && p[2] && !p[3]) { 208219019Sgabor cs->final = (unsigned char )(p[2] & 0xff); 209219019Sgabor cs->interm = '\0'; 210219019Sgabor cs->vers = '\0'; 211219019Sgabor cs->type = CS96; 212219019Sgabor } else 213219019Sgabor return (1); 214219019Sgabor 215219019Sgabor return (0); 216219019Sgabor} 217219019Sgabor 218219019Sgabor 219219019Sgabor#define _NOTMATCH 0 220219019Sgabor#define _MATCH 1 221219019Sgabor#define _PARSEFAIL 2 222219019Sgabor 223219019Sgaborstatic __inline int 224219019Sgaborget_recommend(_ISO2022EncodingInfo * __restrict ei, 225219019Sgabor const char * __restrict token) 226219019Sgabor{ 227219019Sgabor _ISO2022Charset cs, *p; 228219019Sgabor int i; 229219019Sgabor 230219019Sgabor if (!strchr("0123", token[0]) || token[1] != '=') 231219019Sgabor return (_NOTMATCH); 232219019Sgabor 233219019Sgabor if (getcs(&token[2], &cs) == 0) 234219019Sgabor ; 235219019Sgabor else if (!strcmp(&token[2], "94")) { 236219019Sgabor cs.final = (unsigned char)(token[4]); 237219019Sgabor cs.interm = '\0'; 238219019Sgabor cs.vers = '\0'; 239219019Sgabor cs.type = CS94; 240219019Sgabor } else if (!strcmp(&token[2], "96")) { 241219019Sgabor cs.final = (unsigned char)(token[4]); 242219019Sgabor cs.interm = '\0'; 243219019Sgabor cs.vers = '\0'; 244219019Sgabor cs.type = CS96; 245219019Sgabor } else if (!strcmp(&token[2], "94$")) { 246219019Sgabor cs.final = (unsigned char)(token[5]); 247219019Sgabor cs.interm = '\0'; 248219019Sgabor cs.vers = '\0'; 249219019Sgabor cs.type = CS94MULTI; 250219019Sgabor } else if (!strcmp(&token[2], "96$")) { 251219019Sgabor cs.final = (unsigned char)(token[5]); 252219019Sgabor cs.interm = '\0'; 253219019Sgabor cs.vers = '\0'; 254219019Sgabor cs.type = CS96MULTI; 255219019Sgabor } else 256219019Sgabor return (_PARSEFAIL); 257219019Sgabor 258219019Sgabor i = token[0] - '0'; 259219019Sgabor if (!ei->recommend[i]) 260219019Sgabor ei->recommend[i] = malloc(sizeof(_ISO2022Charset)); 261219019Sgabor else { 262219019Sgabor p = realloc(ei->recommend[i], 263219019Sgabor sizeof(_ISO2022Charset) * (ei->recommendsize[i] + 1)); 264219019Sgabor if (!p) 265219019Sgabor return (_PARSEFAIL); 266219019Sgabor ei->recommend[i] = p; 267219019Sgabor } 268219019Sgabor if (!ei->recommend[i]) 269219019Sgabor return (_PARSEFAIL); 270219019Sgabor ei->recommendsize[i]++; 271219019Sgabor 272219019Sgabor (ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final; 273219019Sgabor (ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm; 274219019Sgabor (ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers; 275219019Sgabor (ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type; 276219019Sgabor 277219019Sgabor return (_MATCH); 278219019Sgabor} 279219019Sgabor 280219019Sgaborstatic __inline int 281219019Sgaborget_initg(_ISO2022EncodingInfo * __restrict ei, 282219019Sgabor const char * __restrict token) 283219019Sgabor{ 284219019Sgabor _ISO2022Charset cs; 285219019Sgabor 286219019Sgabor if (strncmp("INIT", &token[0], 4) || 287219019Sgabor !strchr("0123", token[4]) || 288219019Sgabor token[5] != '=') 289219019Sgabor return (_NOTMATCH); 290219019Sgabor 291219019Sgabor if (getcs(&token[6], &cs) != 0) 292219019Sgabor return (_PARSEFAIL); 293219019Sgabor 294219019Sgabor ei->initg[token[4] - '0'].type = cs.type; 295219019Sgabor ei->initg[token[4] - '0'].final = cs.final; 296219019Sgabor ei->initg[token[4] - '0'].interm = cs.interm; 297219019Sgabor ei->initg[token[4] - '0'].vers = cs.vers; 298219019Sgabor 299219019Sgabor return (_MATCH); 300219019Sgabor} 301219019Sgabor 302219019Sgaborstatic __inline int 303219019Sgaborget_max(_ISO2022EncodingInfo * __restrict ei, 304219019Sgabor const char * __restrict token) 305219019Sgabor{ 306219019Sgabor if (!strcmp(token, "MAX1")) 307219019Sgabor ei->maxcharset = 1; 308219019Sgabor else if (!strcmp(token, "MAX2")) 309219019Sgabor ei->maxcharset = 2; 310219019Sgabor else if (!strcmp(token, "MAX3")) 311219019Sgabor ei->maxcharset = 3; 312219019Sgabor else 313219019Sgabor return (_NOTMATCH); 314219019Sgabor 315219019Sgabor return (_MATCH); 316219019Sgabor} 317219019Sgabor 318219019Sgabor 319219019Sgaborstatic __inline int 320219019Sgaborget_flags(_ISO2022EncodingInfo * __restrict ei, 321219019Sgabor const char * __restrict token) 322219019Sgabor{ 323219019Sgabor static struct { 324219019Sgabor const char *tag; 325219019Sgabor int flag; 326219019Sgabor } const tags[] = { 327219019Sgabor { "DUMMY", 0 }, 328219019Sgabor { "8BIT", F_8BIT }, 329219019Sgabor { "NOOLD", F_NOOLD }, 330219019Sgabor { "SI", F_SI }, 331219019Sgabor { "SO", F_SO }, 332219019Sgabor { "LS0", F_LS0 }, 333219019Sgabor { "LS1", F_LS1 }, 334219019Sgabor { "LS2", F_LS2 }, 335219019Sgabor { "LS3", F_LS3 }, 336219019Sgabor { "LS1R", F_LS1R }, 337219019Sgabor { "LS2R", F_LS2R }, 338219019Sgabor { "LS3R", F_LS3R }, 339219019Sgabor { "SS2", F_SS2 }, 340219019Sgabor { "SS3", F_SS3 }, 341219019Sgabor { "SS2R", F_SS2R }, 342219019Sgabor { "SS3R", F_SS3R }, 343219019Sgabor { NULL, 0 } 344219019Sgabor }; 345219019Sgabor int i; 346219019Sgabor 347219019Sgabor for (i = 0; tags[i].tag; i++) 348219019Sgabor if (!strcmp(token, tags[i].tag)) { 349219019Sgabor ei->flags |= tags[i].flag; 350219019Sgabor return (_MATCH); 351219019Sgabor } 352219019Sgabor 353219019Sgabor return (_NOTMATCH); 354219019Sgabor} 355219019Sgabor 356219019Sgabor 357219019Sgaborstatic __inline int 358219019Sgabor_citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei, 359219019Sgabor const void * __restrict var, size_t lenvar __unused) 360219019Sgabor{ 361219019Sgabor char const *e, *v; 362219019Sgabor char buf[20]; 363219019Sgabor size_t len; 364219019Sgabor int i, ret; 365219019Sgabor 366219019Sgabor /* 367219019Sgabor * parse VARIABLE section. 368219019Sgabor */ 369219019Sgabor 370219019Sgabor if (!var) 371219019Sgabor return (EFTYPE); 372219019Sgabor 373219019Sgabor v = (const char *) var; 374219019Sgabor 375219019Sgabor /* initialize structure */ 376219019Sgabor ei->maxcharset = 0; 377219019Sgabor for (i = 0; i < 4; i++) { 378219019Sgabor ei->recommend[i] = NULL; 379219019Sgabor ei->recommendsize[i] = 0; 380219019Sgabor } 381219019Sgabor ei->flags = 0; 382219019Sgabor 383219019Sgabor while (*v) { 384219019Sgabor while (*v == ' ' || *v == '\t') 385219019Sgabor ++v; 386219019Sgabor 387219019Sgabor /* find the token */ 388219019Sgabor e = v; 389219019Sgabor while (*e && *e != ' ' && *e != '\t') 390219019Sgabor ++e; 391219019Sgabor 392219019Sgabor len = e - v; 393219019Sgabor if (len == 0) 394219019Sgabor break; 395219019Sgabor if (len >= sizeof(buf)) 396219019Sgabor goto parsefail; 397219019Sgabor snprintf(buf, sizeof(buf), "%.*s", (int)len, v); 398219019Sgabor 399219019Sgabor if ((ret = get_recommend(ei, buf)) != _NOTMATCH) 400219019Sgabor ; 401219019Sgabor else if ((ret = get_initg(ei, buf)) != _NOTMATCH) 402219019Sgabor ; 403219019Sgabor else if ((ret = get_max(ei, buf)) != _NOTMATCH) 404219019Sgabor ; 405219019Sgabor else if ((ret = get_flags(ei, buf)) != _NOTMATCH) 406219019Sgabor ; 407219019Sgabor else 408219019Sgabor ret = _PARSEFAIL; 409219019Sgabor if (ret == _PARSEFAIL) 410219019Sgabor goto parsefail; 411219019Sgabor v = e; 412219019Sgabor 413219019Sgabor } 414219019Sgabor 415219019Sgabor return (0); 416219019Sgabor 417219019Sgaborparsefail: 418219019Sgabor free(ei->recommend[0]); 419219019Sgabor free(ei->recommend[1]); 420219019Sgabor free(ei->recommend[2]); 421219019Sgabor free(ei->recommend[3]); 422219019Sgabor 423219019Sgabor return (EFTYPE); 424219019Sgabor} 425219019Sgabor 426219019Sgaborstatic __inline void 427219019Sgabor/*ARGSUSED*/ 428219019Sgabor_citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei, 429219019Sgabor _ISO2022State * __restrict s) 430219019Sgabor{ 431219019Sgabor int i; 432219019Sgabor 433219019Sgabor memset(s, 0, sizeof(*s)); 434219019Sgabor s->gl = 0; 435219019Sgabor s->gr = (ei->flags & F_8BIT) ? 1 : -1; 436219019Sgabor 437219019Sgabor for (i = 0; i < 4; i++) 438219019Sgabor if (ei->initg[i].final) { 439219019Sgabor s->g[i].type = ei->initg[i].type; 440219019Sgabor s->g[i].final = ei->initg[i].final; 441219019Sgabor s->g[i].interm = ei->initg[i].interm; 442219019Sgabor } 443219019Sgabor s->singlegl = s->singlegr = -1; 444219019Sgabor s->flags |= _ISO2022STATE_FLAG_INITIALIZED; 445219019Sgabor} 446219019Sgabor 447260264Sdim#if 0 448219019Sgaborstatic __inline void 449219019Sgabor/*ARGSUSED*/ 450219019Sgabor_citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei __unused, 451219019Sgabor void * __restrict pspriv, const _ISO2022State * __restrict s) 452219019Sgabor{ 453219019Sgabor 454219019Sgabor memcpy(pspriv, (const void *)s, sizeof(*s)); 455219019Sgabor} 456219019Sgabor 457219019Sgaborstatic __inline void 458219019Sgabor/*ARGSUSED*/ 459219019Sgabor_citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei __unused, 460219019Sgabor _ISO2022State * __restrict s, const void * __restrict pspriv) 461219019Sgabor{ 462219019Sgabor 463219019Sgabor memcpy((void *)s, pspriv, sizeof(*s)); 464219019Sgabor} 465260264Sdim#endif 466219019Sgabor 467219019Sgaborstatic int 468219019Sgabor/*ARGSUSED*/ 469219019Sgabor_citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo * __restrict ei, 470219019Sgabor const void * __restrict var, size_t lenvar) 471219019Sgabor{ 472219019Sgabor 473219019Sgabor return (_citrus_ISO2022_parse_variable(ei, var, lenvar)); 474219019Sgabor} 475219019Sgabor 476219019Sgaborstatic void 477219019Sgabor/*ARGSUSED*/ 478219019Sgabor_citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo *ei __unused) 479219019Sgabor{ 480219019Sgabor 481219019Sgabor} 482219019Sgabor 483219019Sgabor#define ESC '\033' 484219019Sgabor#define ECMA -1 485219019Sgabor#define INTERM -2 486219019Sgabor#define OECMA -3 487219019Sgaborstatic const struct seqtable { 488219019Sgabor int type; 489219019Sgabor int csoff; 490219019Sgabor int finaloff; 491219019Sgabor int intermoff; 492219019Sgabor int versoff; 493219019Sgabor int len; 494219019Sgabor int chars[10]; 495219019Sgabor} seqtable[] = { 496219019Sgabor /* G0 94MULTI special */ 497219019Sgabor { CS94MULTI, -1, 2, -1, -1, 3, { ESC, '$', OECMA }, }, 498219019Sgabor /* G0 94MULTI special with version identification */ 499219019Sgabor { CS94MULTI, -1, 5, -1, 2, 6, { ESC, '&', ECMA, ESC, '$', OECMA }, }, 500219019Sgabor /* G? 94 */ 501219019Sgabor { CS94, 1, 2, -1, -1, 3, { ESC, CS94, ECMA, }, }, 502219019Sgabor /* G? 94 with 2nd intermediate char */ 503219019Sgabor { CS94, 1, 3, 2, -1, 4, { ESC, CS94, INTERM, ECMA, }, }, 504219019Sgabor /* G? 96 */ 505219019Sgabor { CS96, 1, 2, -1, -1, 3, { ESC, CS96, ECMA, }, }, 506219019Sgabor /* G? 96 with 2nd intermediate char */ 507219019Sgabor { CS96, 1, 3, 2, -1, 4, { ESC, CS96, INTERM, ECMA, }, }, 508219019Sgabor /* G? 94MULTI */ 509219019Sgabor { CS94MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS94, ECMA, }, }, 510219019Sgabor /* G? 96MULTI */ 511219019Sgabor { CS96MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS96, ECMA, }, }, 512219019Sgabor /* G? 94MULTI with version specification */ 513219019Sgabor { CS94MULTI, 5, 6, -1, 2, 7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, }, 514219019Sgabor /* LS2/3 */ 515219019Sgabor { -1, -1, -1, -1, -1, 2, { ESC, 'n', }, }, 516219019Sgabor { -1, -1, -1, -1, -1, 2, { ESC, 'o', }, }, 517219019Sgabor /* LS1/2/3R */ 518219019Sgabor { -1, -1, -1, -1, -1, 2, { ESC, '~', }, }, 519219019Sgabor { -1, -1, -1, -1, -1, 2, { ESC, /*{*/ '}', }, }, 520219019Sgabor { -1, -1, -1, -1, -1, 2, { ESC, '|', }, }, 521219019Sgabor /* SS2/3 */ 522219019Sgabor { -1, -1, -1, -1, -1, 2, { ESC, 'N', }, }, 523219019Sgabor { -1, -1, -1, -1, -1, 2, { ESC, 'O', }, }, 524219019Sgabor /* end of records */ 525219019Sgabor// { 0, } 526219019Sgabor { 0, 0, 0, 0, 0, 0, { ESC, 0, }, } 527219019Sgabor}; 528219019Sgabor 529219019Sgaborstatic int 530219019Sgaborseqmatch(const char * __restrict s, size_t n, 531219019Sgabor const struct seqtable * __restrict sp) 532219019Sgabor{ 533219019Sgabor const int *p; 534219019Sgabor 535219019Sgabor p = sp->chars; 536219019Sgabor while ((size_t)(p - sp->chars) < n && p - sp->chars < sp->len) { 537219019Sgabor switch (*p) { 538219019Sgabor case ECMA: 539219019Sgabor if (!isecma(*s)) 540219019Sgabor goto terminate; 541219019Sgabor break; 542219019Sgabor case OECMA: 543219019Sgabor if (*s && strchr("@AB", *s)) 544219019Sgabor break; 545219019Sgabor else 546219019Sgabor goto terminate; 547219019Sgabor case INTERM: 548219019Sgabor if (!isinterm(*s)) 549219019Sgabor goto terminate; 550219019Sgabor break; 551219019Sgabor case CS94: 552219019Sgabor if (*s && strchr("()*+", *s)) 553219019Sgabor break; 554219019Sgabor else 555219019Sgabor goto terminate; 556219019Sgabor case CS96: 557219019Sgabor if (*s && strchr(",-./", *s)) 558219019Sgabor break; 559219019Sgabor else 560219019Sgabor goto terminate; 561219019Sgabor default: 562219019Sgabor if (*s != *p) 563219019Sgabor goto terminate; 564219019Sgabor break; 565219019Sgabor } 566219019Sgabor 567219019Sgabor p++; 568219019Sgabor s++; 569219019Sgabor } 570219019Sgabor 571219019Sgaborterminate: 572219019Sgabor return (p - sp->chars); 573219019Sgabor} 574219019Sgabor 575219019Sgaborstatic wchar_t 576219019Sgabor_ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei __unused, 577252583Speter const char * __restrict string, size_t n, const char ** __restrict result, 578219019Sgabor _ISO2022State * __restrict psenc) 579219019Sgabor{ 580219019Sgabor const struct seqtable *sp; 581219019Sgabor wchar_t wchar = 0; 582219019Sgabor int i, cur, nmatch; 583219019Sgabor 584219019Sgabor while (1) { 585219019Sgabor /* SI/SO */ 586219019Sgabor if (1 <= n && string[0] == '\017') { 587219019Sgabor psenc->gl = 0; 588219019Sgabor string++; 589219019Sgabor n--; 590219019Sgabor continue; 591219019Sgabor } 592219019Sgabor if (1 <= n && string[0] == '\016') { 593219019Sgabor psenc->gl = 1; 594219019Sgabor string++; 595219019Sgabor n--; 596219019Sgabor continue; 597219019Sgabor } 598219019Sgabor 599219019Sgabor /* SS2/3R */ 600219019Sgabor if (1 <= n && string[0] && strchr("\217\216", string[0])) { 601219019Sgabor psenc->singlegl = psenc->singlegr = 602219019Sgabor (string[0] - '\216') + 2; 603219019Sgabor string++; 604219019Sgabor n--; 605219019Sgabor continue; 606219019Sgabor } 607219019Sgabor 608219019Sgabor /* eat the letter if this is not ESC */ 609219019Sgabor if (1 <= n && string[0] != '\033') 610219019Sgabor break; 611219019Sgabor 612219019Sgabor /* look for a perfect match from escape sequences */ 613219019Sgabor for (sp = &seqtable[0]; sp->len; sp++) { 614219019Sgabor nmatch = seqmatch(string, n, sp); 615219019Sgabor if (sp->len == nmatch && n >= (size_t)(sp->len)) 616219019Sgabor break; 617219019Sgabor } 618219019Sgabor 619219019Sgabor if (!sp->len) 620219019Sgabor goto notseq; 621219019Sgabor 622219019Sgabor if (sp->type != -1) { 623219019Sgabor if (sp->csoff == -1) 624219019Sgabor i = 0; 625219019Sgabor else { 626219019Sgabor switch (sp->type) { 627219019Sgabor case CS94: 628219019Sgabor case CS94MULTI: 629219019Sgabor i = string[sp->csoff] - '('; 630219019Sgabor break; 631219019Sgabor case CS96: 632219019Sgabor case CS96MULTI: 633219019Sgabor i = string[sp->csoff] - ','; 634219019Sgabor break; 635219019Sgabor default: 636219019Sgabor return (_ISO2022INVALID); 637219019Sgabor } 638219019Sgabor } 639219019Sgabor psenc->g[i].type = sp->type; 640219019Sgabor psenc->g[i].final = '\0'; 641219019Sgabor psenc->g[i].interm = '\0'; 642219019Sgabor psenc->g[i].vers = '\0'; 643219019Sgabor /* sp->finaloff must not be -1 */ 644219019Sgabor if (sp->finaloff != -1) 645219019Sgabor psenc->g[i].final = string[sp->finaloff]; 646219019Sgabor if (sp->intermoff != -1) 647219019Sgabor psenc->g[i].interm = string[sp->intermoff]; 648219019Sgabor if (sp->versoff != -1) 649219019Sgabor psenc->g[i].vers = string[sp->versoff]; 650219019Sgabor 651219019Sgabor string += sp->len; 652219019Sgabor n -= sp->len; 653219019Sgabor continue; 654219019Sgabor } 655219019Sgabor 656219019Sgabor /* LS2/3 */ 657219019Sgabor if (2 <= n && string[0] == '\033' && 658219019Sgabor string[1] && strchr("no", string[1])) { 659219019Sgabor psenc->gl = string[1] - 'n' + 2; 660219019Sgabor string += 2; 661219019Sgabor n -= 2; 662219019Sgabor continue; 663219019Sgabor } 664219019Sgabor 665219019Sgabor /* LS1/2/3R */ 666219019Sgabor /* XXX: { for vi showmatch */ 667219019Sgabor if (2 <= n && string[0] == '\033' && 668219019Sgabor string[1] && strchr("~}|", string[1])) { 669219019Sgabor psenc->gr = 3 - (string[1] - '|'); 670219019Sgabor string += 2; 671219019Sgabor n -= 2; 672219019Sgabor continue; 673219019Sgabor } 674219019Sgabor 675219019Sgabor /* SS2/3 */ 676219019Sgabor if (2 <= n && string[0] == '\033' && string[1] && 677219019Sgabor strchr("NO", string[1])) { 678219019Sgabor psenc->singlegl = (string[1] - 'N') + 2; 679219019Sgabor string += 2; 680219019Sgabor n -= 2; 681219019Sgabor continue; 682219019Sgabor } 683219019Sgabor 684219019Sgabor notseq: 685219019Sgabor /* 686219019Sgabor * if we've got an unknown escape sequence, eat the ESC at the 687219019Sgabor * head. otherwise, wait till full escape sequence comes. 688219019Sgabor */ 689219019Sgabor for (sp = &seqtable[0]; sp->len; sp++) { 690219019Sgabor nmatch = seqmatch(string, n, sp); 691219019Sgabor if (!nmatch) 692219019Sgabor continue; 693219019Sgabor 694219019Sgabor /* 695219019Sgabor * if we are in the middle of escape sequence, 696219019Sgabor * we still need to wait for more characters to come 697219019Sgabor */ 698219019Sgabor if (n < (size_t)(sp->len)) { 699219019Sgabor if ((size_t)(nmatch) == n) { 700219019Sgabor if (result) 701219019Sgabor *result = string; 702219019Sgabor return (_ISO2022INVALID); 703219019Sgabor } 704219019Sgabor } else { 705219019Sgabor if (nmatch == sp->len) { 706219019Sgabor /* this case should not happen */ 707219019Sgabor goto eat; 708219019Sgabor } 709219019Sgabor } 710219019Sgabor } 711219019Sgabor 712219019Sgabor break; 713219019Sgabor } 714219019Sgabor 715219019Sgaboreat: 716219019Sgabor /* no letter to eat */ 717219019Sgabor if (n < 1) { 718219019Sgabor if (result) 719219019Sgabor *result = string; 720219019Sgabor return (_ISO2022INVALID); 721219019Sgabor } 722219019Sgabor 723219019Sgabor /* normal chars. always eat C0/C1 as is. */ 724219019Sgabor if (iscntl(*string & 0xff)) 725219019Sgabor cur = -1; 726219019Sgabor else if (*string & 0x80) 727219019Sgabor cur = (psenc->singlegr == -1) ? psenc->gr : psenc->singlegr; 728219019Sgabor else 729219019Sgabor cur = (psenc->singlegl == -1) ? psenc->gl : psenc->singlegl; 730219019Sgabor 731219019Sgabor if (cur == -1) { 732219019Sgaborasis: 733219019Sgabor wchar = *string++ & 0xff; 734219019Sgabor if (result) 735219019Sgabor *result = string; 736219019Sgabor /* reset single shift state */ 737219019Sgabor psenc->singlegr = psenc->singlegl = -1; 738219019Sgabor return (wchar); 739219019Sgabor } 740219019Sgabor 741219019Sgabor /* length error check */ 742219019Sgabor switch (psenc->g[cur].type) { 743219019Sgabor case CS94MULTI: 744219019Sgabor case CS96MULTI: 745219019Sgabor if (!isthree(psenc->g[cur].final)) { 746219019Sgabor if (2 <= n && 747219019Sgabor (string[0] & 0x80) == (string[1] & 0x80)) 748219019Sgabor break; 749219019Sgabor } else { 750219019Sgabor if (3 <= n && 751219019Sgabor (string[0] & 0x80) == (string[1] & 0x80) && 752219019Sgabor (string[0] & 0x80) == (string[2] & 0x80)) 753219019Sgabor break; 754219019Sgabor } 755219019Sgabor 756219019Sgabor /* we still need to wait for more characters to come */ 757219019Sgabor if (result) 758219019Sgabor *result = string; 759219019Sgabor return (_ISO2022INVALID); 760219019Sgabor 761219019Sgabor case CS94: 762219019Sgabor case CS96: 763219019Sgabor if (1 <= n) 764219019Sgabor break; 765219019Sgabor 766219019Sgabor /* we still need to wait for more characters to come */ 767219019Sgabor if (result) 768219019Sgabor *result = string; 769219019Sgabor return (_ISO2022INVALID); 770219019Sgabor } 771219019Sgabor 772219019Sgabor /* range check */ 773219019Sgabor switch (psenc->g[cur].type) { 774219019Sgabor case CS94: 775219019Sgabor if (!(is94(string[0] & 0x7f))) 776219019Sgabor goto asis; 777219019Sgabor case CS96: 778219019Sgabor if (!(is96(string[0] & 0x7f))) 779219019Sgabor goto asis; 780219019Sgabor break; 781219019Sgabor case CS94MULTI: 782219019Sgabor if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f))) 783219019Sgabor goto asis; 784219019Sgabor break; 785219019Sgabor case CS96MULTI: 786219019Sgabor if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f))) 787219019Sgabor goto asis; 788219019Sgabor break; 789219019Sgabor } 790219019Sgabor 791219019Sgabor /* extract the character. */ 792219019Sgabor switch (psenc->g[cur].type) { 793219019Sgabor case CS94: 794219019Sgabor /* special case for ASCII. */ 795219019Sgabor if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) { 796219019Sgabor wchar = *string++; 797219019Sgabor wchar &= 0x7f; 798219019Sgabor break; 799219019Sgabor } 800219019Sgabor wchar = psenc->g[cur].final; 801219019Sgabor wchar = (wchar << 8); 802219019Sgabor wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0); 803219019Sgabor wchar = (wchar << 8); 804219019Sgabor wchar = (wchar << 8) | (*string++ & 0x7f); 805219019Sgabor break; 806219019Sgabor case CS96: 807219019Sgabor /* special case for ISO-8859-1. */ 808219019Sgabor if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) { 809219019Sgabor wchar = *string++; 810219019Sgabor wchar &= 0x7f; 811219019Sgabor wchar |= 0x80; 812219019Sgabor break; 813219019Sgabor } 814219019Sgabor wchar = psenc->g[cur].final; 815219019Sgabor wchar = (wchar << 8); 816219019Sgabor wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0); 817219019Sgabor wchar = (wchar << 8); 818219019Sgabor wchar = (wchar << 8) | (*string++ & 0x7f); 819219019Sgabor wchar |= 0x80; 820219019Sgabor break; 821219019Sgabor case CS94MULTI: 822219019Sgabor case CS96MULTI: 823219019Sgabor wchar = psenc->g[cur].final; 824219019Sgabor wchar = (wchar << 8); 825219019Sgabor if (isthree(psenc->g[cur].final)) 826219019Sgabor wchar |= (*string++ & 0x7f); 827219019Sgabor wchar = (wchar << 8) | (*string++ & 0x7f); 828219019Sgabor wchar = (wchar << 8) | (*string++ & 0x7f); 829219019Sgabor if (psenc->g[cur].type == CS96MULTI) 830219019Sgabor wchar |= 0x80; 831219019Sgabor break; 832219019Sgabor } 833219019Sgabor 834219019Sgabor if (result) 835219019Sgabor *result = string; 836219019Sgabor /* reset single shift state */ 837219019Sgabor psenc->singlegr = psenc->singlegl = -1; 838219019Sgabor return (wchar); 839219019Sgabor} 840219019Sgabor 841219019Sgabor 842219019Sgabor 843219019Sgaborstatic int 844219019Sgabor_citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei, 845252583Speter wchar_t * __restrict pwc, const char ** __restrict s, 846219019Sgabor size_t n, _ISO2022State * __restrict psenc, size_t * __restrict nresult) 847219019Sgabor{ 848252583Speter const char *p, *result, *s0; 849219019Sgabor wchar_t wchar; 850219019Sgabor int c, chlenbak; 851219019Sgabor 852219019Sgabor if (*s == NULL) { 853219019Sgabor _citrus_ISO2022_init_state(ei, psenc); 854219019Sgabor *nresult = _ENCODING_IS_STATE_DEPENDENT; 855219019Sgabor return (0); 856219019Sgabor } 857219019Sgabor s0 = *s; 858219019Sgabor c = 0; 859219019Sgabor chlenbak = psenc->chlen; 860219019Sgabor 861219019Sgabor /* 862219019Sgabor * if we have something in buffer, use that. 863219019Sgabor * otherwise, skip here 864219019Sgabor */ 865219019Sgabor if (psenc->chlen > sizeof(psenc->ch)) { 866219019Sgabor /* illgeal state */ 867219019Sgabor _citrus_ISO2022_init_state(ei, psenc); 868219019Sgabor goto encoding_error; 869219019Sgabor } 870219019Sgabor if (psenc->chlen == 0) 871219019Sgabor goto emptybuf; 872219019Sgabor 873219019Sgabor /* buffer is not empty */ 874219019Sgabor p = psenc->ch; 875219019Sgabor while (psenc->chlen < sizeof(psenc->ch)) { 876219019Sgabor if (n > 0) { 877219019Sgabor psenc->ch[psenc->chlen++] = *s0++; 878219019Sgabor n--; 879219019Sgabor } 880219019Sgabor 881219019Sgabor wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch), 882219019Sgabor &result, psenc); 883219019Sgabor c += result - p; 884219019Sgabor if (wchar != _ISO2022INVALID) { 885219019Sgabor if (psenc->chlen > (size_t)c) 886219019Sgabor memmove(psenc->ch, result, psenc->chlen - c); 887219019Sgabor if (psenc->chlen < (size_t)c) 888219019Sgabor psenc->chlen = 0; 889219019Sgabor else 890219019Sgabor psenc->chlen -= c; 891219019Sgabor goto output; 892219019Sgabor } 893219019Sgabor 894219019Sgabor if (n == 0) { 895219019Sgabor if ((size_t)(result - p) == psenc->chlen) 896219019Sgabor /* complete shift sequence. */ 897219019Sgabor psenc->chlen = 0; 898219019Sgabor goto restart; 899219019Sgabor } 900219019Sgabor 901219019Sgabor p = result; 902219019Sgabor } 903219019Sgabor 904219019Sgabor /* escape sequence too long? */ 905219019Sgabor goto encoding_error; 906219019Sgabor 907219019Sgaboremptybuf: 908219019Sgabor wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc); 909219019Sgabor if (wchar != _ISO2022INVALID) { 910219019Sgabor c += result - s0; 911219019Sgabor psenc->chlen = 0; 912219019Sgabor s0 = result; 913219019Sgabor goto output; 914219019Sgabor } 915219019Sgabor if (result > s0) { 916219019Sgabor c += (result - s0); 917219019Sgabor n -= (result - s0); 918219019Sgabor s0 = result; 919219019Sgabor if (n > 0) 920219019Sgabor goto emptybuf; 921219019Sgabor /* complete shift sequence. */ 922219019Sgabor goto restart; 923219019Sgabor } 924219019Sgabor n += c; 925219019Sgabor if (n < sizeof(psenc->ch)) { 926219019Sgabor memcpy(psenc->ch, s0 - c, n); 927219019Sgabor psenc->chlen = n; 928219019Sgabor s0 = result; 929219019Sgabor goto restart; 930219019Sgabor } 931219019Sgabor 932219019Sgabor /* escape sequence too long? */ 933219019Sgabor 934219019Sgaborencoding_error: 935219019Sgabor psenc->chlen = 0; 936219019Sgabor *nresult = (size_t)-1; 937219019Sgabor return (EILSEQ); 938219019Sgabor 939219019Sgaboroutput: 940219019Sgabor *s = s0; 941219019Sgabor if (pwc) 942219019Sgabor *pwc = wchar; 943219019Sgabor *nresult = wchar ? c - chlenbak : 0; 944219019Sgabor return (0); 945219019Sgabor 946219019Sgaborrestart: 947219019Sgabor *s = s0; 948219019Sgabor *nresult = (size_t)-2; 949219019Sgabor 950219019Sgabor return (0); 951219019Sgabor} 952219019Sgabor 953219019Sgaborstatic int 954219019Sgaborrecommendation(_ISO2022EncodingInfo * __restrict ei, 955219019Sgabor _ISO2022Charset * __restrict cs) 956219019Sgabor{ 957219019Sgabor _ISO2022Charset *recommend; 958219019Sgabor size_t j; 959219019Sgabor int i; 960219019Sgabor 961219019Sgabor /* first, try a exact match. */ 962219019Sgabor for (i = 0; i < 4; i++) { 963219019Sgabor recommend = ei->recommend[i]; 964219019Sgabor for (j = 0; j < ei->recommendsize[i]; j++) { 965219019Sgabor if (cs->type != recommend[j].type) 966219019Sgabor continue; 967219019Sgabor if (cs->final != recommend[j].final) 968219019Sgabor continue; 969219019Sgabor if (cs->interm != recommend[j].interm) 970219019Sgabor continue; 971219019Sgabor 972219019Sgabor return (i); 973219019Sgabor } 974219019Sgabor } 975219019Sgabor 976219019Sgabor /* then, try a wildcard match over final char. */ 977219019Sgabor for (i = 0; i < 4; i++) { 978219019Sgabor recommend = ei->recommend[i]; 979219019Sgabor for (j = 0; j < ei->recommendsize[i]; j++) { 980219019Sgabor if (cs->type != recommend[j].type) 981219019Sgabor continue; 982219019Sgabor if (cs->final && (cs->final != recommend[j].final)) 983219019Sgabor continue; 984219019Sgabor if (cs->interm && (cs->interm != recommend[j].interm)) 985219019Sgabor continue; 986219019Sgabor 987219019Sgabor return (i); 988219019Sgabor } 989219019Sgabor } 990219019Sgabor 991219019Sgabor /* there's no recommendation. make a guess. */ 992219019Sgabor if (ei->maxcharset == 0) { 993219019Sgabor return (0); 994219019Sgabor } else { 995219019Sgabor switch (cs->type) { 996219019Sgabor case CS94: 997219019Sgabor case CS94MULTI: 998219019Sgabor return (0); 999219019Sgabor case CS96: 1000219019Sgabor case CS96MULTI: 1001219019Sgabor return (1); 1002219019Sgabor } 1003219019Sgabor } 1004219019Sgabor return (0); 1005219019Sgabor} 1006219019Sgabor 1007219019Sgaborstatic int 1008219019Sgabor_ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t wc, 1009219019Sgabor char * __restrict string, size_t n, char ** __restrict result, 1010219019Sgabor _ISO2022State * __restrict psenc, size_t * __restrict nresult) 1011219019Sgabor{ 1012219019Sgabor _ISO2022Charset cs; 1013219019Sgabor char *p; 1014219019Sgabor char tmp[MB_LEN_MAX]; 1015219019Sgabor size_t len; 1016219019Sgabor int bit8, i = 0, target; 1017219019Sgabor unsigned char mask; 1018219019Sgabor 1019219019Sgabor if (isc0(wc & 0xff)) { 1020219019Sgabor /* go back to INIT0 or ASCII on control chars */ 1021219019Sgabor cs = ei->initg[0].final ? ei->initg[0] : ascii; 1022219019Sgabor } else if (isc1(wc & 0xff)) { 1023219019Sgabor /* go back to INIT1 or ISO-8859-1 on control chars */ 1024219019Sgabor cs = ei->initg[1].final ? ei->initg[1] : iso88591; 1025219019Sgabor } else if (!(wc & ~0xff)) { 1026219019Sgabor if (wc & 0x80) { 1027219019Sgabor /* special treatment for ISO-8859-1 */ 1028219019Sgabor cs = iso88591; 1029219019Sgabor } else { 1030219019Sgabor /* special treatment for ASCII */ 1031219019Sgabor cs = ascii; 1032219019Sgabor } 1033219019Sgabor } else { 1034219019Sgabor cs.final = (wc >> 24) & 0x7f; 1035219019Sgabor if ((wc >> 16) & 0x80) 1036219019Sgabor cs.interm = (wc >> 16) & 0x7f; 1037219019Sgabor else 1038219019Sgabor cs.interm = '\0'; 1039219019Sgabor if (wc & 0x80) 1040219019Sgabor cs.type = (wc & 0x00007f00) ? CS96MULTI : CS96; 1041219019Sgabor else 1042219019Sgabor cs.type = (wc & 0x00007f00) ? CS94MULTI : CS94; 1043219019Sgabor } 1044219019Sgabor target = recommendation(ei, &cs); 1045219019Sgabor p = tmp; 1046219019Sgabor bit8 = ei->flags & F_8BIT; 1047219019Sgabor 1048219019Sgabor /* designate the charset onto the target plane(G0/1/2/3). */ 1049219019Sgabor if (psenc->g[target].type == cs.type && 1050219019Sgabor psenc->g[target].final == cs.final && 1051219019Sgabor psenc->g[target].interm == cs.interm) 1052219019Sgabor goto planeok; 1053219019Sgabor 1054219019Sgabor *p++ = '\033'; 1055219019Sgabor if (cs.type == CS94MULTI || cs.type == CS96MULTI) 1056219019Sgabor *p++ = '$'; 1057219019Sgabor if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final) && 1058219019Sgabor !cs.interm && !(ei->flags & F_NOOLD)) 1059219019Sgabor ; 1060219019Sgabor else if (cs.type == CS94 || cs.type == CS94MULTI) 1061219019Sgabor *p++ = "()*+"[target]; 1062219019Sgabor else 1063219019Sgabor *p++ = ",-./"[target]; 1064219019Sgabor if (cs.interm) 1065219019Sgabor *p++ = cs.interm; 1066219019Sgabor *p++ = cs.final; 1067219019Sgabor 1068219019Sgabor psenc->g[target].type = cs.type; 1069219019Sgabor psenc->g[target].final = cs.final; 1070219019Sgabor psenc->g[target].interm = cs.interm; 1071219019Sgabor 1072219019Sgaborplaneok: 1073219019Sgabor /* invoke the plane onto GL or GR. */ 1074219019Sgabor if (psenc->gl == target) 1075219019Sgabor goto sideok; 1076219019Sgabor if (bit8 && psenc->gr == target) 1077219019Sgabor goto sideok; 1078219019Sgabor 1079219019Sgabor if (target == 0 && (ei->flags & F_LS0)) { 1080219019Sgabor *p++ = '\017'; 1081219019Sgabor psenc->gl = 0; 1082219019Sgabor } else if (target == 1 && (ei->flags & F_LS1)) { 1083219019Sgabor *p++ = '\016'; 1084219019Sgabor psenc->gl = 1; 1085219019Sgabor } else if (target == 2 && (ei->flags & F_LS2)) { 1086219019Sgabor *p++ = '\033'; 1087219019Sgabor *p++ = 'n'; 1088219019Sgabor psenc->gl = 2; 1089219019Sgabor } else if (target == 3 && (ei->flags & F_LS3)) { 1090219019Sgabor *p++ = '\033'; 1091219019Sgabor *p++ = 'o'; 1092219019Sgabor psenc->gl = 3; 1093219019Sgabor } else if (bit8 && target == 1 && (ei->flags & F_LS1R)) { 1094219019Sgabor *p++ = '\033'; 1095219019Sgabor *p++ = '~'; 1096219019Sgabor psenc->gr = 1; 1097219019Sgabor } else if (bit8 && target == 2 && (ei->flags & F_LS2R)) { 1098219019Sgabor *p++ = '\033'; 1099219019Sgabor /*{*/ 1100219019Sgabor *p++ = '}'; 1101219019Sgabor psenc->gr = 2; 1102219019Sgabor } else if (bit8 && target == 3 && (ei->flags & F_LS3R)) { 1103219019Sgabor *p++ = '\033'; 1104219019Sgabor *p++ = '|'; 1105219019Sgabor psenc->gr = 3; 1106219019Sgabor } else if (target == 2 && (ei->flags & F_SS2)) { 1107219019Sgabor *p++ = '\033'; 1108219019Sgabor *p++ = 'N'; 1109219019Sgabor psenc->singlegl = 2; 1110219019Sgabor } else if (target == 3 && (ei->flags & F_SS3)) { 1111219019Sgabor *p++ = '\033'; 1112219019Sgabor *p++ = 'O'; 1113219019Sgabor psenc->singlegl = 3; 1114219019Sgabor } else if (bit8 && target == 2 && (ei->flags & F_SS2R)) { 1115219019Sgabor *p++ = '\216'; 1116219019Sgabor *p++ = 'N'; 1117219019Sgabor psenc->singlegl = psenc->singlegr = 2; 1118219019Sgabor } else if (bit8 && target == 3 && (ei->flags & F_SS3R)) { 1119219019Sgabor *p++ = '\217'; 1120219019Sgabor *p++ = 'O'; 1121219019Sgabor psenc->singlegl = psenc->singlegr = 3; 1122219019Sgabor } else 1123219019Sgabor goto ilseq; 1124219019Sgabor 1125219019Sgaborsideok: 1126219019Sgabor if (psenc->singlegl == target) 1127219019Sgabor mask = 0x00; 1128219019Sgabor else if (psenc->singlegr == target) 1129219019Sgabor mask = 0x80; 1130219019Sgabor else if (psenc->gl == target) 1131219019Sgabor mask = 0x00; 1132219019Sgabor else if ((ei->flags & F_8BIT) && psenc->gr == target) 1133219019Sgabor mask = 0x80; 1134219019Sgabor else 1135219019Sgabor goto ilseq; 1136219019Sgabor 1137219019Sgabor switch (cs.type) { 1138219019Sgabor case CS94: 1139219019Sgabor case CS96: 1140219019Sgabor i = 1; 1141219019Sgabor break; 1142219019Sgabor case CS94MULTI: 1143219019Sgabor case CS96MULTI: 1144219019Sgabor i = !iscntl(wc & 0xff) ? 1145219019Sgabor (isthree(cs.final) ? 3 : 2) : 1; 1146219019Sgabor break; 1147219019Sgabor } 1148219019Sgabor while (i-- > 0) 1149219019Sgabor *p++ = ((wc >> (i << 3)) & 0x7f) | mask; 1150219019Sgabor 1151219019Sgabor /* reset single shift state */ 1152219019Sgabor psenc->singlegl = psenc->singlegr = -1; 1153219019Sgabor 1154219019Sgabor len = (size_t)(p - tmp); 1155219019Sgabor if (n < len) { 1156219019Sgabor if (result) 1157219019Sgabor *result = (char *)0; 1158219019Sgabor *nresult = (size_t)-1; 1159219019Sgabor return (E2BIG); 1160219019Sgabor } 1161219019Sgabor if (result) 1162219019Sgabor *result = string + len; 1163219019Sgabor memcpy(string, tmp, len); 1164219019Sgabor *nresult = len; 1165219019Sgabor 1166219019Sgabor return (0); 1167219019Sgabor 1168219019Sgaborilseq: 1169219019Sgabor *nresult = (size_t)-1; 1170219019Sgabor return (EILSEQ); 1171219019Sgabor} 1172219019Sgabor 1173219019Sgaborstatic int 1174219019Sgabor_citrus_ISO2022_put_state_reset(_ISO2022EncodingInfo * __restrict ei, 1175219019Sgabor char * __restrict s, size_t n, _ISO2022State * __restrict psenc, 1176219019Sgabor size_t * __restrict nresult) 1177219019Sgabor{ 1178219019Sgabor char *result; 1179219019Sgabor char buf[MB_LEN_MAX]; 1180219019Sgabor size_t len; 1181219019Sgabor int ret; 1182219019Sgabor 1183219019Sgabor /* XXX state will be modified after this operation... */ 1184219019Sgabor ret = _ISO2022_sputwchar(ei, L'\0', buf, sizeof(buf), &result, psenc, 1185219019Sgabor &len); 1186219019Sgabor if (ret) { 1187219019Sgabor *nresult = len; 1188219019Sgabor return (ret); 1189219019Sgabor } 1190219019Sgabor 1191219019Sgabor if (sizeof(buf) < len || n < len-1) { 1192219019Sgabor /* XXX should recover state? */ 1193219019Sgabor *nresult = (size_t)-1; 1194219019Sgabor return (E2BIG); 1195219019Sgabor } 1196219019Sgabor 1197219019Sgabor memcpy(s, buf, len - 1); 1198219019Sgabor *nresult = len - 1; 1199219019Sgabor return (0); 1200219019Sgabor} 1201219019Sgabor 1202219019Sgaborstatic int 1203219019Sgabor_citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei, 1204219019Sgabor char * __restrict s, size_t n, wchar_t wc, 1205219019Sgabor _ISO2022State * __restrict psenc, size_t * __restrict nresult) 1206219019Sgabor{ 1207219019Sgabor char *result; 1208219019Sgabor char buf[MB_LEN_MAX]; 1209219019Sgabor size_t len; 1210219019Sgabor int ret; 1211219019Sgabor 1212219019Sgabor /* XXX state will be modified after this operation... */ 1213219019Sgabor ret = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc, 1214219019Sgabor &len); 1215219019Sgabor if (ret) { 1216219019Sgabor *nresult = len; 1217219019Sgabor return (ret); 1218219019Sgabor } 1219219019Sgabor 1220219019Sgabor if (sizeof(buf) < len || n < len) { 1221219019Sgabor /* XXX should recover state? */ 1222219019Sgabor *nresult = (size_t)-1; 1223219019Sgabor return (E2BIG); 1224219019Sgabor } 1225219019Sgabor 1226219019Sgabor memcpy(s, buf, len); 1227219019Sgabor *nresult = len; 1228219019Sgabor return (0); 1229219019Sgabor} 1230219019Sgabor 1231219019Sgaborstatic __inline int 1232219019Sgabor/*ARGSUSED*/ 1233219019Sgabor_citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo * __restrict ei __unused, 1234219019Sgabor _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 1235219019Sgabor{ 1236219019Sgabor wchar_t m, nm; 1237219019Sgabor 1238219019Sgabor m = wc & 0x7FFF8080; 1239219019Sgabor nm = wc & 0x007F7F7F; 1240219019Sgabor if (m & 0x00800000) 1241219019Sgabor nm &= 0x00007F7F; 1242219019Sgabor else 1243219019Sgabor m &= 0x7F008080; 1244219019Sgabor if (nm & 0x007F0000) { 1245219019Sgabor /* ^3 mark */ 1246219019Sgabor m |= 0x007F0000; 1247219019Sgabor } else if (nm & 0x00007F00) { 1248219019Sgabor /* ^2 mark */ 1249219019Sgabor m |= 0x00007F00; 1250219019Sgabor } 1251219019Sgabor *csid = (_csid_t)m; 1252219019Sgabor *idx = (_index_t)nm; 1253219019Sgabor 1254219019Sgabor return (0); 1255219019Sgabor} 1256219019Sgabor 1257219019Sgaborstatic __inline int 1258219019Sgabor/*ARGSUSED*/ 1259219019Sgabor_citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo * __restrict ei __unused, 1260219019Sgabor wchar_t * __restrict wc, _csid_t csid, _index_t idx) 1261219019Sgabor{ 1262219019Sgabor 1263219019Sgabor *wc = (wchar_t)(csid & 0x7F808080) | (wchar_t)idx; 1264219019Sgabor 1265219019Sgabor return (0); 1266219019Sgabor} 1267219019Sgabor 1268219019Sgaborstatic __inline int 1269219019Sgabor/*ARGSUSED*/ 1270219019Sgabor_citrus_ISO2022_stdenc_get_state_desc_generic(_ISO2022EncodingInfo * __restrict ei __unused, 1271219019Sgabor _ISO2022State * __restrict psenc, int * __restrict rstate) 1272219019Sgabor{ 1273219019Sgabor 1274219019Sgabor if (psenc->chlen == 0) { 1275219019Sgabor /* XXX: it should distinguish initial and stable. */ 1276219019Sgabor *rstate = _STDENC_SDGEN_STABLE; 1277219019Sgabor } else 1278219019Sgabor *rstate = (psenc->ch[0] == '\033') ? 1279219019Sgabor _STDENC_SDGEN_INCOMPLETE_SHIFT : 1280219019Sgabor _STDENC_SDGEN_INCOMPLETE_CHAR; 1281219019Sgabor return (0); 1282219019Sgabor} 1283219019Sgabor 1284219019Sgabor/* ---------------------------------------------------------------------- 1285219019Sgabor * public interface for stdenc 1286219019Sgabor */ 1287219019Sgabor 1288219019Sgabor_CITRUS_STDENC_DECLS(ISO2022); 1289219019Sgabor_CITRUS_STDENC_DEF_OPS(ISO2022); 1290219019Sgabor 1291219019Sgabor#include "citrus_stdenc_template.h" 1292