1/* Copyright 1994 NEC Corporation, Tokyo, Japan. 2 * 3 * Permission to use, copy, modify, distribute and sell this software 4 * and its documentation for any purpose is hereby granted without 5 * fee, provided that the above copyright notice appear in all copies 6 * and that both that copyright notice and this permission notice 7 * appear in supporting documentation, and that the name of NEC 8 * Corporation not be used in advertising or publicity pertaining to 9 * distribution of the software without specific, written prior 10 * permission. NEC Corporation makes no representations about the 11 * suitability of this software for any purpose. It is provided "as 12 * is" without express or implied warranty. 13 * 14 * NEC CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 15 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN 16 * NO EVENT SHALL NEC CORPORATION BE LIABLE FOR ANY SPECIAL, INDIRECT OR 17 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF 18 * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 19 * OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 20 * PERFORMANCE OF THIS SOFTWARE. 21 */ 22 23/************************************************************************/ 24/* THIS SOURCE CODE IS MODIFIED FOR TKO BY T.MURAI 1997 25/************************************************************************/ 26 27#include <string.h> 28 29#if !defined(lint) && !defined(__CODECENTER__) 30static char rcsid[]="$Id: nword.c 14875 2005-11-12 21:25:31Z bonefish $"; 31#endif 32 33/* LINTLIBRARY */ 34#include "RKintern.h" 35 36#define rk_debug(file, fmt, a, b, c) 37 38static struct nword *allocWord(struct nstore *st, int bb); 39static void derefWord(struct nword *word); 40static void killWord(struct nstore *st, struct nword *word); 41static void freeWord(struct nstore *st, struct nword *word); 42static int cvtNum(WCHAR_T *dst, int maxdst, WCHAR_T *src, int maxsrc, int format); 43static int cvtAlpha(WCHAR_T *dst, int maxdst, WCHAR_T *src, int maxsrc, int format); 44static int cvtHira(WCHAR_T *dst, int maxdst, WCHAR_T *src, int maxsrc, int format); 45static int cvtLit(WCHAR_T *dst, int maxdst, WCHAR_T *src, int maxsrc, int format, unsigned long mode); 46static void cancelNVE(struct NV *nv, struct NVE *p); 47static int parseWord(struct RkContext *cx, int yy, int ys, int ye, int iclass, struct nword *xqh[], int maxclen, int doflush, int douniq); 48static int doParse(struct RkContext *cx, int yy, int ys, int ye, struct nword *xqh[], int maxclen, int doflush, int douniq); 49static struct nword *height2list(struct nword *height[], int maxclen); 50static void list2height(struct nword *height[], int maxclen, struct nword *parse); 51static void storeBun(struct RkContext *cx, int yy, int ys, int ye, struct nbun *bun); 52static void evalSplit(struct nword *suc, struct splitParm *ul); 53static int calcSplit(struct RkContext *cx, int yy, struct nword *top, struct nqueue xq[], int maxclen, int flush); 54static void parseQue(struct RkContext *cx, int maxq, int yy, int ys, int ye, int doflush); 55static int IsStableQue(struct RkContext *cx, int c, int doflush); 56static int Que2Bun(struct RkContext *cx, int yy, int ys, int ye, int doflush); 57static void doLearn(struct RkContext *cx, struct nword *thisW); 58 59inline void 60usncopy(WCHAR_T *dst, WCHAR_T *src, int len) 61{ 62 memcpy(dst, src, len * sizeof(WCHAR_T)); 63} 64 65inline void 66clearWord(struct nword *w, int bb) 67{ 68 if (w) { 69 w->nw_cache = (struct ncache *)0; 70 w->nw_rowcol = bb; /* ʸÀá */ 71 w->nw_klen = w->nw_ylen = 0; 72 w->nw_class = ND_EMP; 73 w->nw_flags = 0; 74 w->nw_lit = 0; 75 w->nw_prio = 0L; 76 w->nw_left = w->nw_next = (struct nword *)0; 77 w->nw_kanji = (Wrec *)0; 78 } 79} 80 81/*ARGSUSED*/ 82inline void 83setWord(struct nword *w, int rc, int lit, WCHAR_T *yomi, int ylen, Wrec *kanji, int klen, int bb) 84{ 85 clearWord(w, bb); 86 w->nw_rowcol = rc; 87 w->nw_klen = klen; 88 w->nw_ylen = ylen; 89 w->nw_class = 0; 90 w->nw_flags = 0; 91 w->nw_lit = lit; 92 w->nw_kanji = kanji; 93} 94 95/* allocWord 96 * allocate a fresh word 97 */ 98/*ARGSUSED*/ 99static 100struct nword * 101allocWord(struct nstore *st, int bb) 102{ 103struct nword *new_word; 104const size_t NW_PAGESIZE = 1024; 105 106 if (!SX.word) { 107 struct nword *new_page; 108 int i; 109 new_page = (struct nword *)malloc(sizeof(struct nword)*NW_PAGESIZE); 110 if (new_page) { 111 SX.page_in_use++; 112 new_page[0].nw_next = SX.page; 113 SX.page = &new_page[0]; 114 SX.word = &new_page[1]; 115 for (i = 1; i + 1 < NW_PAGESIZE; i++) 116 new_page[i].nw_next = &new_page[i + 1]; 117 new_page[i].nw_next = (struct nword *)0; 118 }; 119 }; 120 new_word = SX.word; 121 if (new_word) { 122 SX.word = new_word->nw_next; 123 clearWord(new_word, bb); 124 st->word_in_use++; 125 SX.word_in_use++; 126 }; 127 return new_word; 128} 129 130static void 131derefWord(struct nword *word) 132{ 133 for (; word; word = word->nw_next) 134 if (word->nw_cache) 135 (void)_RkDerefCache(word->nw_cache); 136} 137 138/*ARGSUSED*/ 139static void 140killWord(struct nstore *st, struct nword *word) 141{ 142 struct nword *p, *q; 143 144 if (word) { 145 for (p = q = word; p; q = p, p = p->nw_next) { 146 if (!p->nw_cache && p->nw_kanji) { 147 _Rkpanic("killWord this would never happen addr ", 0, 0, 0); 148 free(p->nw_kanji); 149 }; 150 st->word_in_use--; 151 SX.word_in_use--; 152 } 153 q->nw_next = SX.word; 154 SX.word = word; 155 } 156} 157 158static void 159freeWord(struct nstore *st, struct nword *word) 160{ 161 derefWord(word); 162 killWord(st, word); 163} 164 165void 166_RkFreeBunq(struct nstore *st) 167{ 168 struct nbun *bunq = &st->bunq[st->curbun]; 169 170 freeWord(st, bunq->nb_cand); 171 bunq->nb_cand = (struct nword *)0; 172 bunq->nb_yoff = 0; 173 bunq->nb_curlen = bunq->nb_maxcand = bunq->nb_curcand = 0; 174 bunq->nb_flags = (unsigned short)0; 175 return; 176} 177 178extern unsigned searchRut(); 179extern int entryRut(); 180 181inline 182struct nword * 183concWord(struct nstore *st, struct nword *p, struct nword *q, int loc, int bb) 184{ 185 struct nword conc; 186 struct nword *pq; 187 struct nword *t; 188 int count; 189 190/* check limit conditions */ 191 count = 0; 192 for (t = p; t; t = t->nw_left) 193 count++; 194 if (((unsigned long)(p->nw_klen + q->nw_klen) > RK_LEN_WMAX) || 195 ((unsigned long)(p->nw_ylen + q->nw_ylen) > RK_LEN_WMAX) || 196 (count >= RK_CONC_NMAX)) 197 return (struct nword *)0; 198/* create a concatinated word temoprally */ 199 conc = *q; 200 conc.nw_klen += p->nw_klen; 201 conc.nw_ylen += p->nw_ylen; 202#ifdef FUJIEDA_HACK 203 conc.nw_flags = p->nw_flags&(NW_PRE|NW_SUC|NW_SWD|NW_DUMMY); 204#else 205 conc.nw_flags = p->nw_flags&(NW_PRE|NW_SUC|NW_SWD); 206#endif 207 conc.nw_prio = p->nw_prio; 208 conc.nw_next = (struct nword *)0; 209 conc.nw_left = p; 210 switch(q->nw_class) { 211/* kakko, kutouten ha setuzoku kankei ni eikyou sinai */ 212 case ND_OPN: 213 case ND_CLS: 214 conc.nw_rowcol = p->nw_rowcol; 215 if (p->nw_class != ND_EMP) { 216 conc.nw_class = p->nw_class; 217 conc.nw_flags = p->nw_flags; 218 } else { 219 conc.nw_class = q->nw_class; 220 conc.nw_flags = q->nw_flags; 221 }; 222 break; 223 case ND_PUN: 224 /* avoid punctionations where prohibited */ 225 if (!CanSplitWord(p)) 226 return (struct nword *)0; 227 /* don't remove loc check or you get stuck when a punctionation comes */ 228 if (loc > 0 && p->nw_class == ND_EMP) 229 return (struct nword *)0; 230 conc.nw_rowcol = p->nw_rowcol; 231 conc.nw_class = ND_SWD; 232 break; 233 case ND_MWD: 234 conc.nw_flags |= NW_MWD; 235#ifdef FUJIEDA_HACK 236 conc.nw_flags |= (q->nw_flags & NW_DUMMY); 237#endif 238 conc.nw_prio = q->nw_prio; 239 break; 240 case ND_SWD: 241 if (!(conc.nw_flags&NW_SWD)) 242 conc.nw_flags |= NW_SWD; 243 break; 244 case ND_PRE: 245 conc.nw_flags |= NW_PRE; 246 break; 247 case ND_SUC: 248 conc.nw_flags |= NW_SUC; 249 break; 250 }; 251/* cache no sanshoudo wo kousinn suru */ 252 pq = allocWord(st, bb); 253 if (pq) { 254 *pq = conc; 255 p->nw_flags |= NW_FOLLOW; 256 if (pq->nw_cache) 257 _RkEnrefCache(pq->nw_cache); 258 }; 259 return pq; 260} 261 262/* clearQue 263 * clear word tree queue 264 */ 265inline void 266clearQue(struct nqueue *xq) 267{ 268 xq->tree = (struct nword *)0; 269 xq->maxlen = 0; 270 xq->status = 0; 271} 272/* RkFreeQue 273 * free word tree stored in [s, e) 274 */ 275void 276_RkFreeQue(struct nstore *st, int s, int e) 277{ 278 struct nqueue *xq = st->xq; 279 280 while (s < e) { 281 if (xq[s].tree) 282 freeWord(st, xq[s].tree); 283 clearQue(&xq[s]); 284 s++; 285 }; 286} 287 288/* 289 * Literal 290 */ 291inline 292int 293cvtNum(WCHAR_T *dst, int maxdst, WCHAR_T *src, int maxsrc, int format) 294{ 295 return RkwCvtSuuji(dst, maxdst, src, maxsrc, format - 1); 296} 297 298inline int 299cvtAlpha(WCHAR_T *dst, int maxdst, WCHAR_T *src, int maxsrc, int format) 300{ 301 switch(format) { 302#ifdef ALPHA_CONVERSION 303 case 1: return RkwCvtZen(dst, maxdst, src, maxsrc); 304 case 2: return RkwCvtHan(dst, maxdst, src, maxsrc); 305 case 3: return -1; 306#else 307 case 1: return RkwCvtNone(dst, maxdst, src, maxsrc); 308 case 2: return -1; 309#endif 310 default: return 0; 311 } 312} 313 314inline int 315cvtHira(WCHAR_T *dst, int maxdst, WCHAR_T *src, int maxsrc, int format) 316{ 317 switch(format) { 318 case 1: return RkwCvtHira(dst, maxdst, src, maxsrc); 319 case 2: return RkwCvtKana(dst, maxdst, src, maxsrc); 320 default: return 0; 321 } 322} 323 324static 325int 326cvtLit(WCHAR_T *dst, int maxdst, WCHAR_T *src, int maxsrc, int format, unsigned long mode) 327{ 328 switch(format >> 4) { 329 case LIT_NUM: 330 if (mode & RK_MAKE_KANSUUJI) 331 return cvtNum(dst, maxdst, src, maxsrc, format&15); 332 else 333 return RkwCvtNone(dst, maxdst, src, maxsrc); 334 case LIT_ALPHA: return cvtAlpha(dst, maxdst, src, maxsrc, format&15); 335 case LIT_HIRA: return cvtHira(dst, maxdst, src, maxsrc, format&15); 336 default: return 0; 337 } 338} 339 340/* setLit 341 * create the literals as many as the context requires 342 */ 343inline 344struct nword * 345setLit(struct RkContext *cx, struct nword *word, int maxword, int rc, WCHAR_T *src, int srclen, int format) 346{ 347 struct nword *w = word; 348 int dstlen; 349 unsigned long mode; 350 351 if (!cx->litmode) 352 return 0; 353 for (mode = cx->litmode[format]; mode; mode >>= RK_XFERBITS) 354 if (w < word + maxword) { 355 int code = MAKELIT(format, mode&RK_XFERMASK); 356 357 dstlen = cvtLit((WCHAR_T *)0, 9999, src, srclen, code, (unsigned long)cx->concmode); 358 if (0 < dstlen && dstlen <= RK_LEN_WMAX) 359 setWord(w++, rc, code, src, srclen, (Wrec *)0, dstlen, cx->gram->P_BB); 360 if (dstlen < 0) 361 setWord(w++, rc, code, src, srclen, (Wrec *)0, srclen, cx->gram->P_BB); 362 } 363 return (struct nword *) w; 364} 365 366#define READWORD_MAXCACHE 128 367inline 368struct nword * 369readWord(struct RkContext *cx, int yy, int ys, int ye, int iclass, struct nword *nword, int maxword, int doflush, int douniq) 370{ 371 WCHAR_T *key = cx->store->yomi + yy; 372 struct nword *wrds; 373 struct MD *head = cx->md[iclass], *md; 374 int maxcache = READWORD_MAXCACHE; 375#ifndef USE_MALLOC_FOR_BIG_ARRAY 376 unsigned permutation[RK_CAND_NMAX]; 377 unsigned char *candidates[RK_CAND_NMAX]; 378 struct nread nread[READWORD_MAXCACHE]; 379#else 380 unsigned *permutation; 381 unsigned char **candidates; 382 struct nread *nread; 383 384 permutation = (unsigned *)malloc(sizeof(unsigned) * RK_CAND_NMAX); 385 candidates = (unsigned char **) 386 malloc(sizeof(unsigned char *) * RK_CAND_NMAX); 387 nread = (struct nread *)malloc(sizeof(struct nread) * READWORD_MAXCACHE); 388 if (!permutation || !candidates || !nread) { 389 if (permutation) free(permutation); 390 if (candidates) free(candidates); 391 if (nread) free(nread); 392 return nword; 393 } 394#endif 395 396 wrds = nword; 397 for (md = head->md_next; md != head; md = md->md_next) { 398 struct DM *dm = md->md_dic; 399 struct DM *qm = md->md_freq; 400 struct nword *pp, *qq; 401 int c, nc, num, cf = 0, nl; 402 403 if (maxword <= 0) 404 break; 405 if (!dm) 406 continue; 407 if (qm && !qm->dm_qbits) 408 qm = (struct DM *)0; 409 nc = DST_SEARCH(cx, dm, key, ye, nread, maxcache, &cf); 410 for (c = 0; c < nc; c++) { 411 struct nread *thisRead = nread + c; 412 struct ncache *thisCache = thisRead->cache; 413 unsigned char *wp = thisCache->nc_word; 414 unsigned long offset; 415 int nk, cnt = 1; 416 unsigned long csnb; 417 int bitSize; 418 419 nk = _RkCandNumber(wp); 420 nl = (*wp >> 1) & 0x3f; 421 if (!doflush && (cf || thisRead->nk > ye || thisRead->nk > RK_KEY_WMAX)) 422 cx->poss_cont++; 423 if (*wp & 0x80) 424 wp += 2; 425 wp += 2 + nl *2; 426 csnb = thisRead->csn; 427 offset = thisRead->offset; 428 if (ys < thisRead->nk && thisRead->nk <= ye && thisRead->nk <= RK_KEY_WMAX) { 429 for (num = 0; num < nk; num++) { 430 candidates[num] = wp; 431 wp += 2 * ((*wp >> 1) & 0x7f) + 2; 432 }; 433 if (qm) { 434 int ecount, cval, i; 435 436 bitSize = _RkCalcLog2(nk + 1) + 1; 437 _RkUnpackBits(permutation, qm->dm_qbits, offset, bitSize, nk); 438 for (ecount = cval = i = 0; i < nk; i++) { 439 if ((int)permutation[i]/2 > nk) { 440 ecount++; 441 break; 442 }; 443 cval += permutation[i]; 444 } 445 if (ecount || cval < (nk-1)*(nk-2)) { 446 for (i = 0; i < nk; i++) 447 permutation[i] = 2*i; 448 _RkPackBits(qm->dm_qbits, offset, bitSize, permutation, nk); 449 }; 450 }; 451 pp = wrds; 452 for (num = 0; num < nk; num++) { 453 unsigned permed; 454 455 if (maxword <= 0) 456 break; 457 if (qm) { 458 permed = permutation[num]/2; 459 if ((int)permed > nk) { 460 break; 461 } else if ((int)permed == nk) 462 continue; 463 } else 464 permed = num; 465 wp = candidates[permed]; 466 clearWord(wrds, cx->gram->P_BB); 467 wrds->nw_kanji = wp; 468 wrds->nw_freq = qm; 469 wrds->nw_rowcol = _RkRowNumber(wp); 470 wrds->nw_cache = thisCache; 471 wrds->nw_ylen = thisRead->nk; 472 wrds->nw_klen = (*wp >> 1) & 0x7f; 473 wrds->nw_class = iclass; 474 wrds->nw_csn = csnb + permed; 475 wrds->nw_prio = 0L; 476 if (iclass == ND_MWD) { 477 if (qm && qm->dm_rut) { 478 if (cnt) 479 cnt = wrds->nw_prio = searchRut(qm->dm_rut, wrds->nw_csn); 480 } else if (DM2TYPE(dm)) { 481 if (num < 2) 482 wrds->nw_prio = ((struct TW *)thisCache->nc_address)->lucks[num]; 483 } 484 if (wrds->nw_prio) { 485 long t; 486 487 t = _RkGetTick(0) - wrds->nw_prio; 488 wrds->nw_prio = (0 <= t && t < 0x2000) ? (0x2000 - t) << 4 : 0; 489 }; 490 switch(num) { 491 case 0: wrds->nw_prio += 15L; break; 492 case 1: wrds->nw_prio += 11L; break; 493 case 2: wrds->nw_prio += 7L; break; 494 case 3: wrds->nw_prio += 3L; break; 495 }; 496 wrds->nw_prio |= 0x01; 497 }; 498 if (douniq) { 499 for (qq = pp; qq < wrds; qq++) 500 if (qq->nw_rowcol == wrds->nw_rowcol) 501 break; 502 if (qq < wrds) 503 continue; 504 } 505 _RkEnrefCache(thisCache); 506 wrds++; 507 maxword--; 508 }; 509 }; 510 _RkDerefCache(thisCache); 511 }; 512 maxcache -= nc; 513 }; 514#ifdef USE_MALLOC_FOR_BIG_ARRAY 515 free(permutation); 516 free(candidates); 517 free(nread); 518#endif 519 return(wrds); 520} 521 522/* makeWord 523 * jisho ni nai katakana, suuji, tokushu moji wo tango to minasu 524 */ 525/*ARGSUSED*/ 526inline 527struct nword * 528makeWord(struct RkContext *cx, int yy, int ys, int ye, int iclass, struct nword *word, int maxword, int doflush, int douniq) 529{ 530 struct nstore *st = cx->store; 531 WCHAR_T *key = st->yomi + yy; 532 WCHAR_T *k, *z; 533 struct nword *w = word; 534 WCHAR_T c; 535 int clen; 536 int hinshi = cx->gram->P_BB; 537 int literal = -1; 538 int punct = 0; 539 int gobeyond = 0; 540 541 if (ye <= 0) 542 return w; 543 z = (k = key) + ye; 544 /* sentou moji wo yomu */ 545 c = *k++; 546 clen = 1; 547 if (us_iscodeG0(c)) { /* ascii string */ 548 if ('0' <= c && '9' >= c) { /* numeral */ 549 if (!(cx->concmode & RK_MAKE_EISUUJI)) { 550 doflush++; 551 } else { 552 for (; k < z; k++, clen++) 553 if (clen >= RK_KEY_WMAX || !('0' <= *k && *k <= '9')) { 554 doflush++; 555 break; 556 }; 557 } 558 hinshi = cx->gram->P_NN; literal = LIT_NUM; 559 } else { /* others */ 560 if (!(cx->concmode & RK_MAKE_EISUUJI)) { 561 doflush++; 562 } else { 563 for (; k < z; k++, clen++) 564 if (clen >= RK_KEY_WMAX || !us_iscodeG0(*k)) { 565 doflush++; 566 break; 567 }; 568 } 569 hinshi = cx->gram->P_T35; literal = LIT_ALPHA; 570 } 571 } else if (us_iscodeG1(c)) { 572 if (0xb000 <= c) { /* kanji string */ 573 for (; k < z; k++, clen++) 574 if (clen >= RK_KEY_WMAX || *k < 0xb000) { 575 doflush++; 576 break; 577 }; 578 hinshi = cx->gram->P_T00; 579 } else if (0xa1a2 <= c && c <= 0xa1db) { 580 /* 581 * now multiple punctiation characters constitute a single punct 582 */ 583 for (; k < z; k++, clen++) 584 if (clen >= RK_KEY_WMAX || !(0xa1a2 <= *k && *k <= 0xa1db)) { 585 doflush++; 586 break; 587 }; 588 switch(c) { 589 case 0xa1a2: case 0xa1a3: case 0xa1a4: 590 case 0xa1a5: case 0xa1a6: case 0xa1a7: 591 case 0xa1a8: case 0xa1a9: case 0xa1aa: 592 case 0xa1c4: 593 punct = ND_PUN; 594 break; 595 case 0xa1c6: case 0xa1c8: case 0xa1ca: 596 case 0xa1cc: case 0xa1ce: 597 case 0xa1d0: case 0xa1d2: case 0xa1d4: 598 case 0xa1d6: case 0xa1d8: case 0xa1da: 599 punct = ND_OPN; 600 break; 601 case 0xa1c7: case 0xa1c9: case 0xa1cb: 602 case 0xa1cd: case 0xa1cf: case 0xa1d1: 603 case 0xa1d3: case 0xa1d5: case 0xa1d7: 604 case 0xa1d9: case 0xa1db: 605 punct = ND_CLS; 606 break; 607 default: 608 hinshi = cx->gram->P_T00; 609 doflush++; 610 }; 611 } else if (0xa3b0 <= c && c <= 0xa3b9) { /* suuji */ 612 if (!(cx->concmode & RK_MAKE_EISUUJI)) { 613 doflush++; 614 } else { 615 for (; k < z; k++, clen++) 616 if (clen >= RK_KEY_WMAX || !(0xa3b0 <= *k && *k <= 0xa3b9)) { 617 doflush++; 618 break; 619 }; 620 } 621 hinshi = cx->gram->P_NN; literal = LIT_NUM; 622 } else if ((0xa3c1 <= c && c <= 0xa3da) 623 || (0xa3e1 <= c && c <= 0xa3fa)) { /* eiji */ 624 if (!(cx->concmode & RK_MAKE_EISUUJI)) { 625 doflush++; 626 } else { 627 for (; k < z; k++, clen++) 628 if (clen >= RK_KEY_WMAX 629 || !((0xa3c1 <= (c = *k) && c <= 0xa3da) 630 || (0xa3e1 <= c && c <= 0xa3fa))) { 631 doflush++; 632 break; 633 }; 634 } 635 hinshi = cx->gram->P_T35; literal = LIT_ALPHA; 636 } else if (0xa5a1 <= c && c <= 0xa5f6) { /* zenkaku katakana */ 637 for (; k < z; k++, clen++) 638 if (clen >= RK_KEY_WMAX || 639 ((0xa5a1 > (c = *k) || c > 0xa5f6) && 640 (0xa1a1 > c || c > 0xa1f6))) { 641 doflush++; 642 break; 643 }; 644 hinshi = cx->gram->P_T30; 645 } else if (0xa4a1 <= c && c <= 0xa4f3) { /* hiragana */ 646 for (; k < z; k++, clen++) { 647 if (clen >= RK_KEY_WMAX) { 648 doflush++; 649 break; 650 }; 651 switch (*k) { 652#ifndef FUJIEDA_HACK 653 case 0xa4a1: case 0xa4a3: case 0xa4a5: 654 case 0xa4a7: case 0xa4a9: 655 case 0xa4e3: case 0xa4e5: case 0xa4e7: 656 case 0xa4c3: case 0xa4f3: 657#endif 658 case 0xa1ab: case 0xa1ac: case 0xa1b3: 659 case 0xa1b4: case 0xa1b5: case 0xa1b6: 660 case 0xa1bc: 661 continue; 662 default: 663 doflush++; 664 gobeyond++; 665 goto hira; 666 }; 667 }; 668 hira: 669 hinshi = cx->gram->P_T35; 670 } else { 671 doflush++; 672 hinshi = cx->gram->P_T35; 673 }; 674 } else if (us_iscodeG2(c)) { /* hankaku katakana */ 675 for (; k < z; k++, clen++) 676 if (clen >= RK_KEY_WMAX || !us_iscodeG2(*k)) { 677 doflush++; 678 break; 679 }; 680 hinshi = cx->gram->P_T30; 681 } else { 682 doflush++; 683 hinshi = cx->gram->P_T35; 684 } 685 if ((ys <= clen && clen <= ye) || gobeyond) { 686 if (iclass == ND_MWD || punct) { 687 if (!doflush && !gobeyond) 688 cx->poss_cont++; 689 if (literal != -1) { 690 if (doflush) 691 w= setLit(cx, w, maxword, hinshi, key, clen, literal); 692 } else if (w < word + maxword) { 693 if (doflush) { 694 setWord(w++, hinshi, 0, key, clen, (Wrec *)0, 695 clen, cx->gram->P_BB); 696 if (punct) 697 w[-1].nw_class = punct; 698#ifdef FUJIEDA_HACK 699 w[-1].nw_flags |= NW_DUMMY; 700#endif 701 }; 702 } 703 } 704 } 705 return w; 706} 707 708inline int 709determinate(Wrec *y1, Wrec *y2, int l) 710{ 711 if ((int)*y1 > l) 712 return(0); 713 for (l = *y1, y1 += 2; l; l--) { 714 WCHAR_T *wy = (WCHAR_T *) y2; 715 Wrec c1 = (Wrec) ((*wy & 0xff00) >> 8); 716 Wrec c2 = (Wrec) (*wy & 0xff); 717 718 y2 += 2; 719 if (*y1++ != c1 || *y1++ != c2) { 720 return(0); 721 } 722 } 723 return(1); 724} 725 726inline 727int 728positive(Wrec *y1, Wrec *y2, int l) 729{ 730 l = (int)*y1 < l ? (int)*y1 : l; 731 for (y1 += 2; l; l--) { 732 if (*y1++ != *y2++ || *y1++ != *y2++) { 733 return(0); 734 } 735 } 736 return(1); 737} 738 739inline 740int 741positiveRev(Wrec *y1, Wrec *y2, int l) 742{ 743 l = (int)*y1 < l ? (int)*y1 : l; 744 for (y1 += 2; l; l--) { 745 WCHAR_T *wy = (WCHAR_T *) y2; 746 Wrec c1 = (Wrec) ((*wy & 0xff00) >> 8); 747 Wrec c2 = (Wrec) (*wy & 0xff); 748 749 y2 += 2; 750 if (*y1++ != c1 || *y1++ != c2) { 751 return(0); 752 } 753 } 754 return(1); 755} 756 757static 758void 759cancelNVE(struct NV *nv, struct NVE *p) 760{ 761 unsigned char *s = p->data; 762 763 nv->csz -= *s * 2 + 2; 764 nv->cnt--; 765 p->right->left = p->left; 766 p->left->right = p->right; 767 free(s); 768 free(p); 769} 770 771inline 772struct NVE * 773newNVE(struct NV *nv, Wrec *y, int l, int v) 774{ 775 unsigned short w; 776 struct NVE *p, **q, *r; 777 struct NVE *nve; 778 unsigned char *s; 779 780 nve = (struct NVE *)calloc(1, sizeof(struct NVE)); 781 if (nve) { 782 s = (unsigned char *)malloc(l * 2 + 2); 783 if (s) { 784 nve->data = s; 785 *s++ = l; 786 *s++ = v; 787 788 memcpy(s, y, l * 2); 789 nv->csz += l * 2 + 2; 790 nv->cnt++; 791 while ((p = nv->head.right) != &nv->head && nv->csz >= (long)nv->sz) { 792 w = bst2_to_s(p->data + 2); 793 q = nv->buf + w % nv->tsz; 794 while ((r = *q) != (struct NVE *)0) { 795 if (r == p) { 796 *q = r->next; 797 cancelNVE(nv, p); 798 break; 799 } else 800 q = &r->next; 801 } 802 } 803 if (nv->csz >= (long)nv->sz) { 804 nv->csz -= l * 2 + 2; 805 nv->cnt--; 806 free(nve->data); 807 free(nve); 808 return((struct NVE *)0); 809 } 810 } else { 811 free(nve); 812 nve = (struct NVE *)0; 813 } 814 } 815 return(nve); 816} 817 818int 819_RkRegisterNV(struct NV *nv, Wrec *yomi, int len, int half) 820{ 821 unsigned short v; 822 struct NVE *p, **q, **r; 823 824 if (nv && nv->tsz && nv->buf) { 825 v = bst2_to_s(yomi); 826 q = r = nv->buf + v % nv->tsz; 827 for (p = *q; p; p = *q) { 828 if (positive(p->data, yomi, len)) { 829 *q = p->next; 830 cancelNVE(nv, p); 831 } else { 832 q = &p->next; 833 } 834 } 835 p = newNVE(nv, yomi, len, half); 836 if (p) { 837 p->next = *r; 838 *r = p; 839 p->left = nv->head.left; 840 p->left->right = p; 841 p->right = &nv->head; 842 nv->head.left = p; 843 } 844 } 845 return(0); 846} 847 848#define TAILSIZE 256 849#define RIGHTSIZE (64 * 16) 850 851/* parseWord 852 * bunsestu no ki wo seichou saseru. 853 */ 854static int 855parseWord(struct RkContext *cx, int yy, int ys, int ye, int iclass, struct nword *xqh[], int maxclen, int doflush, int douniq) 856{ 857 struct RkKxGram *gram = cx->gram->gramdic; 858 int clen; 859 static unsigned classmask[] = { /* ¸å¤í¤Ë¤Ä¤Ê¤¬¤ë¥¯¥é¥¹ */ 860 (1 << ND_SWD) | (1 << ND_SUC), /* MWD --> SUC | SWD */ 861 (1 << ND_SWD), /* SWD --> SWD */ 862 (1 << ND_MWD) | (1 << ND_SWD), /* PRE --> MWD | SWD */ 863 (1 << ND_SWD), /* SUC --> SWD */ 864 (1 << ND_MWD) | (1 << ND_SWD) | (1 << ND_PRE),/* EMP --> MWD | SWD | PRE */ 865 }; 866#ifndef USE_MALLOC_FOR_BIG_ARRAY 867 struct nword *tail[TAILSIZE]; 868 struct nword right[RIGHTSIZE]; 869#else 870 struct nword **tail, *right; 871 tail = (struct nword **)malloc(sizeof(struct nword *) * TAILSIZE); 872 right = (struct nword *)malloc(sizeof(struct nword) * RIGHTSIZE); 873 if (!tail || !right) { 874 if (tail) free(tail); 875 if (right) free(right); 876 return maxclen; 877 } 878#endif 879 880 for (clen = 0; (clen <= maxclen && clen < ye); clen++) { 881 int sameLen; 882 int t; 883 struct nword *p, *q, *r; 884 int ys1, ye1; 885 886 /* ÆɤߤÎŤµ clen ¤Îñ¸ì¤Î¤¦¤Á¡¢¸å¤í¤Ë iclass ¤Ç»ØÄꤵ¤ì¤¿Ã±¸ì¤¬ 887 ¤Ä¤Ê¤¬¤ë²ÄǽÀ¤¬¤¢¤ë¤â¤Î¤ò¥ê¥¹¥È¥¢¥Ã¥×¤·¡¢tail ¤ËµÏ¿¤¹¤ë */ 888 for (p = xqh[clen], sameLen = 0; p; p = p->nw_next) { 889 if (classmask[p->nw_class] & (1<<iclass)) { 890 /* p ¤Î¸å¤í¤Ë iclass ¤Îñ¸ì¤¬¤Ä¤Ê¤¬¤ë²ÄǽÀ¤¬¤¢¤ë */ 891 if (sameLen < TAILSIZE) { /* ¤Þ¤À tail ¤Ë¤¢¤¤¬¤¢¤ë */ 892 tail[sameLen++] = p; 893 } 894 } 895 } 896 if (!sameLen) 897 continue; 898 ys1 = ys - clen; if (ys1 < 0) ys1 = 0; 899 ye1 = ye - clen; 900 r = readWord(cx, yy + clen, ys1, ye1, iclass, 901 right, RIGHTSIZE - 1, doflush, douniq); 902 if (Is_Word_Make(cx)) 903 r = makeWord(cx, yy + clen, ys1, ye1, iclass, 904 r, RIGHTSIZE -1 - (int)(r - right), doflush, douniq); 905 for (t = 0; t < sameLen; t++) { 906 unsigned char *cj; 907 p = tail[t]; 908 cj = (unsigned char *)(gram ? GetGramRow(gram, p->nw_rowcol) : 0); 909 for (q = right; q < r; q++) 910 if (Is_Word_Connect(cx) && 911 (q->nw_class >= ND_OPN || !cj || TestGram(cj, q->nw_rowcol))) { 912 struct nword *pq = concWord(cx->store, p, q, clen, cx->gram->P_BB); 913 if (pq) { 914 struct nword *s; 915 if (gram && !IsShuutan(gram, pq->nw_rowcol)) { 916#ifdef BUNMATU 917 /* ʸ¾ÏËö¤Ë¤·¤«¤Ê¤é¤Ê¤¤ */ 918 if (IsBunmatu(gram, pq->nw_rowcol)) { 919 /* ¶çÆÉÅÀ¤½¤Î¾¤Î¾ì¹ç¤Ë¤Ïʸ¾ÏËö¸¡ºº¤ÏÉÔÍ× */ 920 if (q->nw_class >= ND_OPN) 921 pq->nw_flags &= ~NW_BUNMATU; 922 else 923 pq->nw_flags |= NW_BUNMATU; 924 } else 925#endif 926 DontSplitWord(pq); 927 } 928 if ((unsigned long)maxclen < (unsigned long)pq->nw_ylen) { 929 while (++maxclen < (int)pq->nw_ylen) 930 xqh[maxclen] = (struct nword *)0; 931 xqh[maxclen] = pq; 932 } 933 else { 934 s = xqh[pq->nw_ylen]; 935 if (s) { 936 while (s->nw_next) 937 s = s->nw_next; 938 s->nw_next = pq; 939 } 940 else 941 xqh[pq->nw_ylen] = pq; 942 } 943 pq->nw_next = (struct nword *)0; 944 } 945 } 946 } 947 for (q = right; q < r; q++) 948 if (q->nw_cache) 949 _RkDerefCache(q->nw_cache); 950 if (!gram) 951 goto done; 952 } 953 done: 954#ifdef USE_MALLOC_FOR_BIG_ARRAY 955 free(tail); 956 free(right); 957#endif 958 return maxclen; 959} 960 961/*ARGSUSED*/ 962static int 963doParse(struct RkContext *cx, int yy, int ys, int ye, struct nword *xqh[], int maxclen, int doflush, int douniq) 964{ 965 maxclen = parseWord(cx, yy, ys, ye, ND_PRE, xqh, maxclen, doflush, douniq); 966 maxclen = parseWord(cx, yy, ys, ye, ND_MWD, xqh, maxclen, doflush, douniq); 967 maxclen = parseWord(cx, yy, ys, ye, ND_SUC, xqh, maxclen, doflush, douniq); 968 maxclen = parseWord(cx, yy, ys, ye, ND_SWD, xqh, maxclen, doflush, douniq); 969 return maxclen; 970} 971 972/* getKanji 973 * get kanji in reverse order 974 */ 975WCHAR_T * 976_RkGetKanji(struct nword *cw, WCHAR_T *key, unsigned long mode) 977{ 978 Wrec *str; 979 static WCHAR_T tmp[RK_LEN_WMAX+1]; /* static! */ 980 WCHAR_T *p = tmp; 981 int klen, ylen; 982 struct nword *lw = cw->nw_left; 983 984 klen = cw->nw_klen - lw->nw_klen; 985 ylen = cw->nw_ylen - lw->nw_ylen; 986/* nw_cache --> nw_kanji !nw_lit */ 987/* !nw_cache --> !nw_kanji nw_lit */ 988 989 if (cw->nw_cache) { 990 if ((*(cw->nw_kanji) >> 1) & 0x7f) { 991 str = cw->nw_kanji + NW_PREFIX; 992 for (; klen-- ; str += 2) 993 *p++ = S2TOS(str); 994 return tmp; 995 } else 996 return key; 997 } else if (cw->nw_kanji) { 998 _Rkpanic("_RkGetKanji\n", 0, 0, 0); 999 str = cw->nw_kanji + NW_PREFIX; 1000 for (; klen-- ; str += 2) 1001 *p++ = S2TOS(str); 1002 return tmp; 1003 } else if (cw->nw_lit) { 1004 if (cvtLit(tmp, klen + 1, key, ylen, cw->nw_lit, mode) > 0) 1005 return tmp; 1006 else 1007 return key; 1008 } else 1009 return key; 1010} 1011 1012inline 1013int 1014getKanji(struct nword *w, WCHAR_T *key, WCHAR_T *d, unsigned long mode) 1015{ 1016 struct nword *cw, *lw; 1017 int hash, klen; 1018 1019 hash = 0; 1020 for (cw = w; cw; cw = lw) { 1021 WCHAR_T *s, *t; 1022 1023 if (!(lw = cw->nw_left)) 1024 continue; 1025 klen = (cw->nw_klen - lw->nw_klen); 1026 s = _RkGetKanji(cw, key + lw->nw_ylen, mode); 1027 t = s + klen; 1028 /* copy */ 1029 while (s < t) { 1030 *d++ = *--t; 1031 hash += *t; 1032 } 1033 } 1034 return hash; 1035} 1036 1037#define HEAPSIZE 512 1038 1039/* uniqWord 1040 * unique word list 1041 */ 1042inline void 1043uniqWord(WCHAR_T *key, struct nword *words, unsigned ylen, unsigned long mode) 1044{ 1045 struct nword *p; 1046 long hp = 0; 1047 long uniq[16]; 1048#ifndef USE_MALLOC_FOR_BIG_ARRAY 1049 long heap[HEAPSIZE]; 1050#else 1051 long *heap = (long *)malloc(sizeof(long) * HEAPSIZE); 1052 if (!heap) { 1053 return; 1054 } 1055#endif 1056 1057 if (!(!key || ylen <= 0)) { 1058 /* clear hash table */ 1059 uniq[ 0] = uniq[ 1] = uniq[ 2] = uniq[ 3] = 1060 uniq[ 4] = uniq[ 5] = uniq[ 6] = uniq[ 7] = 1061 uniq[ 8] = uniq[ 9] = uniq[10] = uniq[11] = 1062 uniq[12] = uniq[13] = uniq[14] = uniq[15] = -1; 1063 for (p = words; p; p = p->nw_next) { 1064 if (CanSplitWord(p) && p->nw_ylen == ylen) { 1065 int wsize; 1066 /* compute word size */ 1067 wsize = (2*p->nw_klen + sizeof(long)-1)/sizeof(long); 1068 if (hp + 1 + wsize < HEAPSIZE) { 1069 long hno, h; 1070 /* put kanji string without EOS */ 1071 heap[hp + wsize] = 0; 1072 hno = getKanji(p, key, (WCHAR_T *)&heap[hp + 1], mode)&15; 1073 /* search on the hash list */ 1074 for (h = uniq[hno]; h >= 0; h = heap[h&0xffff]) 1075 if ((h >> 16) == p->nw_klen) { /* same length */ 1076 long *p1 = &heap[(h&0xffff) + 1]; 1077 long *p2 = &heap[hp + 1]; 1078 int i; 1079 /* compare by word */ 1080 switch(wsize) { 1081 case 3: if (*p1++ != *p2++) goto next; 1082 case 2: if (*p1++ != *p2++) goto next; 1083 case 1: if (*p1++ != *p2++) goto next; 1084 case 0: break; 1085 default: 1086 for (i = wsize; i--;) 1087 if (*p1++ != *p2++) goto next; 1088 break; 1089 } 1090 /* match */ 1091 DontSplitWord(p); 1092 goto done; 1093 next: 1094 continue; 1095 } 1096 /* enter new entry */ 1097 heap[hp + 0] = uniq[hno]; 1098 uniq[hno] = (((unsigned long) (p->nw_klen))<<16)|hp; 1099 hp += 1 + wsize; 1100 } 1101 done: 1102 continue; 1103 } 1104 } 1105 } 1106#ifdef USE_MALLOC_FOR_BIG_ARRAY 1107 free(heap); 1108#endif 1109} 1110 1111/* sortWord 1112 * word list wo sort suru 1113 */ 1114struct compRec { 1115 struct nword *word; 1116 long prio; 1117}; 1118 1119 1120static 1121int 1122compword(const struct compRec *x, const struct compRec *y) 1123{ 1124 long d = ((long) y->word->nw_prio) - ((long) (x->word->nw_prio)); 1125 1126 if (d > 0) return(1); 1127 else if(d < 0) return(-1); 1128 else { 1129 long dd = x->prio - y->prio; 1130 1131 if (dd > 0) return(1); 1132 else if (dd < 0) return(-1); 1133 else return(0); 1134 } 1135} 1136 1137inline 1138struct nword * 1139sortWord(struct nword *words) 1140{ 1141 unsigned long nwords, pos, neg; 1142 long i, p, n; 1143 struct compRec *wptr; 1144 struct nword *w; 1145/* count number of words */ 1146 pos = neg = 0L; 1147 for (w = words; w; w = w->nw_next) 1148 if (w->nw_prio > 0) 1149 pos++; 1150 else 1151 neg++; 1152 nwords = pos + neg; 1153 if (nwords <= 0) 1154 return words; 1155 /* sort word list using work space if possible */ 1156 wptr = (struct compRec *)malloc(sizeof(struct compRec)*nwords); 1157 if (wptr) { 1158 p = 0L; 1159 n = pos; 1160 /* store pointers */ 1161 for (w = words; w; w = w->nw_next) 1162 if (w->nw_prio > 0) { /* positive list */ 1163 wptr[p].word = w; 1164 wptr[p].prio = p; 1165 p++; 1166 } else { /* negative list && null word */ 1167 wptr[n].word = w; 1168 n++; 1169 } 1170 /* positive list no sakusei */ 1171 if (pos > 1) 1172 (void)qsort((char *)wptr, (int)pos, sizeof(struct compRec), 1173 (int (*) (const void *, const void *))compword); 1174 for (i = 1; i < (int)nwords; i++) 1175 wptr[i - 1].word->nw_next = wptr[i].word; 1176 words = wptr[0].word; 1177 free(wptr); 1178 } 1179 return words; 1180} 1181 1182static 1183struct nword * 1184height2list(struct nword *height[], int maxclen) 1185{ 1186 int i; 1187 struct nword *e, *p, *head, *tail; 1188 1189 e = height[0]; 1190 tail = (struct nword *)0; 1191 for (i = 1; i <= maxclen; i++) 1192 if (height[i]) { 1193 for (p = height[i] ; p->nw_next ;) { 1194 p = p->nw_next; 1195 } 1196 if (tail) 1197 tail->nw_next = height[i]; 1198 else 1199 head = height[i]; 1200 tail = p; 1201 } 1202 if (tail) 1203 tail->nw_next = e; 1204 else 1205 head = e; 1206 return head; 1207} 1208static 1209void 1210list2height(struct nword *height[], int maxclen, struct nword *parse) 1211{ 1212 int i; 1213 struct nword *p, *q; 1214 1215 for (i = 0; i <= maxclen; i++) 1216 height[i] = (struct nword *)0; 1217 for (p = parse; p; p = p->nw_next) 1218 if ((unsigned long)p->nw_ylen <= (unsigned long)maxclen && !height[p->nw_ylen]) 1219 height[p->nw_ylen] = p; 1220 for (i = 0; i <= maxclen; i++) 1221 if (height[i]) { 1222 for (p = height[i] ; (q = p->nw_next) != (struct nword *)0; p = q) { 1223 if (q->nw_ylen != i) { 1224 p->nw_next = (struct nword *)0; 1225 break; 1226 } 1227 } 1228 } 1229} 1230 1231/* parseBun 1232 * key yori hajimaru bunsetsu wo kaiseki suru 1233 */ 1234inline 1235struct nword * 1236parseBun(struct RkContext *cx, int yy, int ys, int ye, int doflush, int douniq, int *maxclen) /* bunsetu saidai moji suu */ 1237{ 1238 struct nstore *st = cx->store; 1239 struct nword **xqh = st->xqh; 1240 1241#ifdef TEST 1242 printf("parseBun[yy = %d, ys = %d, ye = %d]\n", yy, ys, ye); 1243#endif 1244 1245 xqh[0] = allocWord(st, cx->gram->P_BB); 1246 if (xqh[0]) { 1247 *maxclen = doParse(cx, yy, ys, ye, xqh, 0, doflush, douniq); 1248 return height2list(xqh, *maxclen); 1249 } else { /* kaiseki funou */ 1250 *maxclen = 0; 1251 return (struct nword *)0; 1252 } 1253} 1254 1255static 1256void 1257storeBun(struct RkContext *cx, int yy, int ys, int ye, struct nbun *bun) 1258{ 1259 struct nword *full; 1260 struct nword *w; 1261 int maxclen; 1262 1263 full = sortWord(parseBun(cx, yy, ys, ye, 1, 0, &maxclen)); 1264 bun->nb_cand = full; 1265 bun->nb_yoff = yy; 1266/* kouho wo unique ni suru */ 1267 uniqWord(cx->store->yomi + yy, full, bun->nb_curlen, cx->concmode); 1268 bun->nb_curcand = (unsigned short)0; 1269 bun->nb_maxcand = (unsigned short)0; 1270 for (w = full; w; w = w->nw_next) { 1271 if (CanSplitWord(w) && w->nw_ylen == bun->nb_curlen) 1272 bun->nb_maxcand++; 1273 } 1274} 1275 1276/* 1277 * SPLIT 1278 */ 1279struct splitParm { 1280 unsigned long u2; 1281 int l2; 1282}; 1283 1284#ifdef FUJIEDA_HACK 1285static 1286void 1287evalSplit( 1288 struct RkContext *cx, 1289 struct nword *suc, 1290 struct splitParm *ul 1291) 1292{ 1293 struct nword *p; 1294 unsigned l2; 1295 unsigned long u2; 1296 1297 l2 = 0; 1298 u2 = 0L; 1299 for (p = suc; p; p = p->nw_next) 1300 { 1301 if (!CanSplitWord(p) || /* ʸÀá¤Ë¤Ê¤é¤Ê¤¤ */ 1302 OnlyBunmatu(p) || /* ¥ê¥Æ¥é¥ë¤ÎľÁ°¤Ç¤·¤«Ê¸Àá¤Ë¤Ê¤ì¤Ê¤¤ */ 1303 (p->nw_rowcol == cx->gram->P_KJ) || /* ñ´Á»ú */ 1304 (p->nw_flags & NW_DUMMY) || /* ÙÔ¤¤µ¤ì¤¿Ì¾»ì */ 1305 (p->nw_flags & NW_SUC)) 1306 continue; 1307 if (l2 <= p->nw_ylen) { 1308 l2 = p->nw_ylen; 1309 if (u2 < p->nw_prio) 1310 u2 = p->nw_prio; 1311 } 1312 } 1313 ul->l2 = l2; 1314 ul->u2 = u2; 1315} 1316#else /* FUJIEDA_HACK */ 1317static 1318void 1319evalSplit(struct nword *suc, struct splitParm *ul) 1320{ 1321 struct nword *p; 1322 int l2; 1323 unsigned long u2; 1324 1325 l2 = 0; 1326 u2 = 0L; 1327 for (p = suc; p; p = p->nw_next) 1328 { 1329 if (!CanSplitWord(p) || (p->nw_flags & NW_SUC)) 1330 continue; 1331 if ((unsigned long)l2 < (unsigned long)p->nw_ylen) 1332 l2 = p->nw_ylen; 1333 if (u2 < p->nw_prio) 1334 u2 = p->nw_prio; 1335 }; 1336 ul->l2 = l2; 1337 ul->u2 = u2; 1338} 1339#endif /* FUJIEDA_HACK */ 1340 1341#define PARMSIZE 256 1342 1343static 1344int 1345calcSplit(struct RkContext *cx, int yy, struct nword *top, struct nqueue xq[], int maxclen, int flush) 1346{ 1347#ifdef FUJIEDA_HACK 1348 int L, L1 = 0, L2; 1349 unsigned long U; 1350#else 1351 unsigned L, L1 = 0, L2; 1352 unsigned U2; 1353#endif 1354 struct nword *w; 1355 int i; 1356 int maxary = PARMSIZE - 1; 1357 struct nstore *st = cx->store; 1358 struct NVE *p, **r; 1359#ifndef USE_MALLOC_FOR_BIG_ARRAY 1360 struct splitParm ul2[PARMSIZE]; 1361#else 1362 struct splitParm *ul2 = (struct splitParm *) 1363 malloc(sizeof(struct splitParm) * PARMSIZE); 1364 if (!ul2) { 1365 return L1; 1366 } 1367#endif 1368 1369 L2 = st->nyomi - yy; 1370 if (cx->nv && cx->nv->tsz && cx->nv->buf) { 1371 r = cx->nv->buf + *(st->yomi + yy) % cx->nv->tsz; 1372 for (p = *r; p; p = p->next) { 1373 if (determinate(p->data, (Wrec *)(st->yomi + yy), (int)L2)) { 1374 if (*(p->data+1) > L1) 1375 L1 = *(p->data + 1); 1376 } 1377 } 1378 } 1379 if (L1 == 0) { 1380 L = (L1 = 1)+ (L2 = 0); 1381#ifdef FUJIEDA_HACK 1382 U = 0L; 1383#else 1384 U2 = (unsigned)0; 1385#endif 1386 if (maxary > maxclen) 1387 maxary = maxclen; 1388 for (i = 0; i <= maxary; i++) 1389 ul2[i].l2 = ul2[i].u2 = 0L; 1390 for (w = top; w; w = w->nw_next) { 1391 int l, l1; 1392#ifdef FUJIEDA_HACK 1393 unsigned long u; 1394#endif 1395 struct splitParm ul; 1396 /* ʸÀá¤Ë¤Ê¤é¤Ê¤¤ */ 1397 if (!CanSplitWord(w)) { 1398 continue; 1399 } 1400 if ((w->nw_flags & NW_PRE) && (w->nw_flags & NW_SUC)) { 1401 continue; 1402 } 1403 /* Æɤߤò¾ÃÈñ¤·¤Æ¤¤¤Ê¤¤ */ 1404 l1 = w->nw_ylen; 1405 if (l1 <= 0) { 1406 continue; 1407 } 1408 /* °ìʸÀá¤Ë¤¹¤ë¤Î¤¬ºÇĹ */ 1409 if (flush && (unsigned)yy + w->nw_ylen == cx->store->nyomi) { 1410 L1 = l1; 1411 break; 1412 } 1413#ifdef BUNMATU 1414 /* ³¤¯Ê¸À᤬¥ê¥Æ¥é¥ë¤Ç¤Ê¤¤¤Ê¤éʸ¾ÏËöÉÊ»ì¤Ïʸ¤ÎÅÓÃæ¤Ë¤Ê¤é¤Ê¤¤ */ 1415 else if (OnlyBunmatu(w) && xq[l1].tree->nw_lit == 0) { 1416 DontSplitWord(w); 1417 continue; 1418 } 1419#endif 1420#ifdef FUJIEDA_HACK 1421 /* ñ´Á»ú¤Ïʸ¤ÎÅÓÃæ¤ËÅо줷¤Ê¤¤ */ 1422 if (w->nw_rowcol == cx->gram->P_KJ) { 1423 DontSplitWord(w); 1424 continue; 1425 } 1426#endif 1427 /* ±¦ÎÙ¤ÎʸÀá¤ò²òÀÏ */ 1428 if (l1 <= maxary) { 1429 if (!ul2[l1].l2) 1430#ifdef FUJIEDA_HACK 1431 evalSplit(cx, xq[l1].tree, &ul2[l1]); 1432#else 1433 evalSplit(xq[l1].tree, &ul2[l1]); 1434#endif 1435 ul = ul2[l1]; 1436 } 1437 else { 1438#ifdef FUJIEDA_HACK 1439 evalSplit(cx, xq[l1].tree, &ul); 1440#else 1441 evalSplit(xq[l1].tree, &ul); 1442#endif 1443 } 1444 /* hikaku */ 1445 l = l1 + ul.l2; 1446#ifdef FUJIEDA_HACK 1447 u = w->nw_prio + ul.u2; 1448 if ((L < l) || /* ÆóʸÀáºÇĹ */ 1449 ((L == l) && 1450 (U < u || /* Í¥ÀèÅ٤ιç·× */ 1451 (U == u && (L2 < ul.l2))))) { /* ÆóʸÀáÌܤÎŤµ */ 1452 L = l; 1453 U = u; 1454 L1 = l1; 1455 L2 = ul.l2; 1456 } 1457#else 1458 if ((((int)L < l)) || 1459 (((int)L == l) && (U2 < ul.u2)) || 1460 (((int)L == l) && (U2 == ul.u2) && ((int)L2 < ul.l2)) 1461 ) { 1462 L = l; 1463 L1 = l1; 1464 L2 = ul.l2; 1465 U2 = ul.u2; 1466 } 1467#endif 1468 } 1469 } 1470#ifdef USE_MALLOC_FOR_BIG_ARRAY 1471 (void)free((char *)ul2); 1472#endif 1473 return L1; 1474} 1475 1476inline 1477int 1478splitBun(struct RkContext *cx, int yy, int ys, int ye) 1479{ 1480 struct nstore *st = cx->store; 1481 struct nqueue *xq = st->xq; 1482 struct nword *w; 1483 int maxclen; 1484 int i, count, junk; 1485 1486/* create the initial bun-tree table */ 1487 xq[0].tree = parseBun(cx, yy, ys, ye, 1, 1, &maxclen); 1488 1489#ifdef TEST 1490 { 1491 printf("show splitBun [yy = %d, ys = %d, ye = %d, clen = %d]\n", 1492 yy, ys, ye, maxclen); 1493#if 1 1494 showWord(xq[0].tree); 1495#endif 1496 } 1497#endif 1498 1499 for (i = 1; i <= maxclen; i++) 1500 clearQue(&xq[i]); 1501/* create the following buns from every possible position */ 1502 for (w = xq[0].tree; w; w = w->nw_next) { 1503 if (CanSplitWord(w) && !xq[w->nw_ylen].tree) { 1504 int len = w->nw_ylen; 1505 int ys1 = (ys >= len) ? (ys - len) : 0; 1506 int ye1 = (ye - len); 1507 1508 xq[w->nw_ylen].tree = parseBun(cx, yy+len, ys1, ye1, 1, 1, &junk); 1509 }; 1510 }; 1511 1512/* compute the proper bunsetu length */ 1513 count = calcSplit(cx, yy, xq[0].tree, xq, maxclen, 1); 1514 _RkFreeQue(st, 0, st->maxxq + 1); 1515 1516#ifdef TEST 1517 printf("End SplitBun\n"); 1518#endif 1519 1520 return count; 1521} 1522 1523/* parseQue 1524 * queue jou de bunsetu wo kaiseki suru. 1525 */ 1526 1527static void parseQue (struct RkContext *, int, int, int, int, int); 1528 1529static void 1530parseQue(struct RkContext *cx, int maxq, int yy, int ys, int ye, int doflush) 1531{ 1532 struct nstore *st = cx->store; 1533 struct nqueue *xq = st->xq; 1534 struct nword **xqh = st->xqh; 1535 int i, j; 1536 1537/* put a new seed to start an analysis. */ 1538 if (!xq[0].tree) { 1539 xq[0].tree = allocWord(st, cx->gram->P_BB); 1540 xq[0].maxlen = 0; 1541 xq[0].status = 0; 1542 } 1543/* try to extend each tree in the queue. */ 1544 for (i = 0; i <= maxq; i++) { 1545 if (xq[i].tree) { 1546 int old = cx->poss_cont; 1547 list2height(xqh, xq[i].maxlen, xq[i].tree); 1548 xq[i].maxlen = doParse(cx, yy, ys, ye, xqh, xq[i].maxlen, doflush, 1); 1549 /* set up new analysis points */ 1550 for (j = 0; j <= xq[i].maxlen; j++) 1551 if (xqh[j] && !xq[i+j].tree) { 1552 xq[i+j].tree = allocWord(st, cx->gram->P_BB); 1553 xq[i+j].maxlen = 0; 1554 xq[i+j].status = 0; 1555 xq[i+j].status = 0x80; 1556 } 1557 xq[i].tree = height2list(xqh, xq[i].maxlen); 1558 if (cx->poss_cont != old) 1559 xq[i].status |= 0x80; 1560 else 1561 xq[i].status &= ~0x80; 1562 } 1563 ++yy; 1564 if (--ys < 0) ys = 0; 1565 --ye; 1566 } 1567} 1568 1569/* Que2Bun 1570 * queue kara bunsetu wo toridasu. 1571 */ 1572static 1573int 1574IsStableQue(struct RkContext *cx, int c, int doflush) 1575{ 1576 struct nqueue *xq = cx->store->xq; 1577 struct nword *w; 1578 1579 if (doflush) 1580 { 1581 if (xq[c].maxlen <= 0) 1582 return 0; 1583 else 1584 return 1; 1585 }; 1586 if (xq[c].maxlen <= 0) 1587 return(!c ? 0 : 1); 1588 1589 for (w = xq[c].tree; w; w = w->nw_next) 1590 { 1591 if (xq[c + w->nw_ylen].status) 1592 return 0; 1593 if (!c && w->nw_ylen && !IsStableQue(cx, c + w->nw_ylen, doflush)) 1594 return 0; 1595 }; 1596 return 1; 1597} 1598 1599static 1600int 1601Que2Bun(struct RkContext *cx, int yy, int ys, int ye, int doflush) 1602{ 1603 struct nstore *st = cx->store; 1604 struct nqueue *xq = st->xq; 1605 unsigned i; 1606 struct NVE *p, **r; 1607 1608 if (doflush) 1609 for (i = 0; (int)i <= st->maxxq; i++) 1610 xq[i].status = 0; 1611 while (IsStableQue(cx, 0, doflush)) { 1612 struct nbun *bun = &st->bunq[st->maxbun]; 1613 int count; 1614 1615 i = 0; 1616 if (!doflush) { 1617 if (cx->nv && cx->nv->tsz && cx->nv->buf) { 1618 r = cx->nv->buf + *(st->yomi + yy) % cx->nv->tsz; 1619 for (p = *r; p; p = p->next) { 1620 if (positiveRev(p->data, (Wrec *)(st->yomi + yy), st->nyomi - yy)) { 1621 if (*(p->data + 1) > i) 1622 i = *(p->data + 1); 1623 } 1624 } 1625 } 1626 if (i > st->nyomi - yy) 1627 break; 1628 } 1629 if ((count = calcSplit(cx, yy, xq[0].tree, xq, xq[0].maxlen, 1)) > 0) { 1630 /* shift queue to left */ 1631 _RkFreeQue(st, 0, count); 1632 for (i = count; (int)i <= st->maxxq; i++) { 1633 xq[i-count] = xq[i]; 1634 clearQue(&xq[i]); 1635 }; 1636 bun->nb_curlen = count; 1637 storeBun(cx, (int)bun->nb_yoff, 0, ye, bun); 1638 st->maxbun++; 1639 st->bunq[st->maxbun].nb_yoff = yy + bun->nb_curlen; 1640 } 1641 yy = yy + bun->nb_curlen; 1642 ys = ys - bun->nb_curlen; 1643 ye = ye - bun->nb_curlen; 1644 } 1645 return st->maxbun; 1646} 1647 1648/* _RkRenbun2 1649 * current bunsetsu kara migi wo saihenkan suru 1650 */ 1651int 1652_RkRenbun2(struct RkContext *cx, int firstlen) /* bunsetsu chou sitei(ow 0) */ 1653{ 1654 struct nstore *st = cx->store; 1655 struct nbun *bun = &st->bunq[st->curbun]; 1656 int count; 1657 int yy, ys, ye; /* yomi kensaku hani */ 1658 int oldcurbun = st->curbun; 1659 int uyomi; 1660 int i; 1661 1662 yy = bun->nb_yoff; 1663 ys = 0; 1664 ye = st->nyomi - bun->nb_yoff; 1665/* release queue */ 1666 uyomi = st->nyomi - st->bunq[st->maxbun].nb_yoff; 1667 if (IS_XAUTCTX(cx)) { 1668 if (uyomi >= 0) 1669 _RkFreeQue(st, 0, uyomi+1); 1670 }; 1671/* 1672 * 1673 */ 1674 for (count = 0; ye > 0; count++) 1675 { 1676/* sudeni kaiseki zumi deareba, sono kekka wo mochiiru */ 1677 if (count && !uyomi) 1678 { 1679 int b, c; 1680 for (b = st->curbun; b < (int)st->maxbun; b++) 1681 if (st->bunq[b].nb_yoff == yy) { 1682 /* dispose inbetween bun-trees */ 1683 for (c = st->curbun; c < b; c++) { 1684 freeWord(st, st->bunq[c].nb_cand); 1685 st->bunq[c].nb_cand = (struct nword *)0; 1686 } 1687 /* shift bunq forward */ 1688 while (b < (int)st->maxbun) 1689 st->bunq[st->curbun++] = st->bunq[b++]; 1690 goto exit; 1691 } 1692 } 1693/* dispose the current bun-tree */ 1694 if (st->curbun < (int)st->maxbun) { 1695 freeWord(st, bun->nb_cand); 1696 bun->nb_cand = (struct nword *)0; 1697 } 1698 /* compute the length of bun */ 1699 if (st->curbun >= (int)st->maxbunq) /* too many buns */ 1700 bun->nb_curlen = ye; 1701 else { 1702 if (firstlen) { /* length specified */ 1703 bun->nb_curlen = firstlen; 1704 firstlen = 0; 1705 } else { 1706 /* destroy */ 1707 bun->nb_curlen = splitBun(cx, yy, ys, ye); 1708 if (!bun->nb_curlen) /* fail to split */ 1709 bun->nb_curlen = ye; 1710 } 1711 } 1712/* set up bun (xqh is destroyed */ 1713 storeBun(cx, yy, ys, ye, bun); 1714#if defined(TEST) && 0 1715 showWord(bun->nb_cand); 1716#endif 1717 yy += bun->nb_curlen; 1718 if ((ys -= (int)bun->nb_curlen) < 0) 1719 ys = 0; 1720 ye -= bun->nb_curlen; 1721 bun++; 1722 st->curbun++; 1723 } 1724/* free the remaining bun-trees */ 1725 while ((int)st->maxbun > st->curbun) { 1726 freeWord(st, st->bunq[--st->maxbun].nb_cand); 1727 st->bunq[st->maxbun].nb_cand = (struct nword *)0; 1728 } 1729/* do final settings */ 1730 exit: 1731 st->maxbun = st->curbun; 1732 st->curbun = oldcurbun; 1733 st->bunq[st->maxbun].nb_yoff = 0; 1734/* i hate this fake, ... */ 1735 for (i = 0; i < (int)st->maxbun; i++) 1736 st->bunq[st->maxbun].nb_yoff += st->bunq[i].nb_curlen; 1737/* this case will never happen */ 1738 if (0 != (st->nyomi - st->bunq[st->maxbun].nb_yoff)) 1739 _Rkpanic("Renbun2: uyomi destroyed %d %d\n", 1740 st->nyomi, st->bunq[st->maxbun].nb_yoff, 0); 1741 bun = &st->bunq[st->maxbun]; 1742 if (IS_XAUTCTX(cx) && uyomi > 0) 1743 { 1744 _RkSubstYomi(cx, 0, uyomi, st->yomi + bun->nb_yoff, uyomi); 1745 st->curbun = oldcurbun; 1746 }; 1747 return st->maxbun; 1748} 1749 1750/* RkSubstYomi 1751 */ 1752int 1753_RkSubstYomi(struct RkContext *cx, int ys, int ye, WCHAR_T *yomi, int newLen) 1754{ 1755 struct nstore *st = cx->store; 1756 struct nbun *bun; 1757 struct nqueue *xq; 1758 struct nword **xqh; 1759 int i, j; 1760 int count; 1761 int yf; 1762 int cs, ce, cf; 1763 WCHAR_T *d, *s, *be; 1764 int nbun; 1765 int new_size; 1766 1767 yf = ys + newLen; 1768 cs = ys; 1769 ce = ye; 1770 /* 1771 * STEP 0: reallocate resources if needed 1772 * youmigana buffer should be reallocated as well. 1773 */ 1774 new_size = st->nyomi + (newLen - (ye - ys)); 1775 if (new_size > (int)st->maxyomi || new_size > (int)st->maxbunq || 1776 new_size > (int)st->maxxq) 1777 { 1778 st = _RkReallocBunStorage(st, (int)(new_size*1.2+10)); 1779 if (!st) 1780 return -1; 1781 cx->store = st; 1782 }; 1783 /* 1784 * STEP 1: update yomigana buffer 1785 */ 1786 /* move unchanged text portion [ye, ...) */ 1787 bun = &st->bunq[st->maxbun]; 1788 be = st->yomi + bun->nb_yoff; 1789 xq = st->xq; 1790 xqh = st->xqh; 1791 count = (st->nyomi - bun->nb_yoff) - ye; 1792 if (yf < ye) { /* shrunk */ 1793 d = be + yf; 1794 s = be + ye; 1795 while (count--) *d++ = *s++; 1796 } else if (ye < yf) { /* enlarged */ 1797 d = (s = st->yomi + st->nyomi) + count; 1798 while (count--) 1799 *--d = *--s; 1800 } 1801 /* replace the new text in [ys, yf) */ 1802 usncopy(be + ys, yomi, newLen); 1803 st->nyomi += (yf - ye); 1804 cf = yf; 1805 /* 1806 * STEP 2: remove affected words from XQ 1807 */ 1808/* Trim the words which terminate in [cs, ...) */ 1809 1810 for (i = 0; i < cs; i++) 1811 if (xq[i].tree && cs - i <= xq[i].maxlen) { 1812 list2height(xqh, xq[i].maxlen, xq[i].tree); 1813 for (j = cs - i; j < xq[i].maxlen; j++) 1814 if (xqh[j + 1]) { 1815 freeWord(st, xqh[j + 1]); 1816 xqh[j + 1] = (struct nword *)0; 1817 } 1818 xq[i].maxlen = 0; 1819 for (j = cs - i ; j >= 0 && !xqh[j] ;) { 1820 j--; 1821 } 1822 if (j > 0) 1823 xq[i].maxlen = j; 1824 else { 1825 xq[i].maxlen = 0; 1826 if (!j) { 1827 freeWord(st, xqh[0]); 1828 xqh[0] = (struct nword *)0; 1829 } 1830 } 1831 xq[i].tree = height2list(xqh, xq[i].maxlen); 1832 xq[i].status = 0; 1833 } 1834 /* Kill the whole trees in [cs, ce) and shift XQ to fill it. */ 1835 _RkFreeQue(st, cs, ce); 1836 if (cf < ce) 1837 for (i = cf, j = ce; j <= st->maxxq; i++, j++) { 1838 xq[i] = xq[j]; 1839 clearQue(&xq[j]); 1840 } 1841 if (ce < cf) 1842 for (i = st->maxxq, j = st->maxxq - (cf - ce); j >= ce; i--, j--) { 1843 xq[i] = xq[j]; 1844 clearQue(&xq[j]); 1845 } 1846 /* 1847 * STEP 3 restore queues by parsing yomigana after ys. 1848 */ 1849 nbun = st->maxbun; 1850 count = (st->nyomi - bun->nb_yoff) - ys; 1851 while (count > 0) { 1852 int yy; 1853 yy = st->bunq[st->maxbun].nb_yoff; 1854 ys = st->nyomi - yy - count; 1855 parseQue(cx, cf-1, yy, ys, ys + 1, 0); 1856 nbun = Que2Bun(cx, yy, ys, ys + 1, 0); 1857 ys++; 1858 count--; 1859 } 1860 st->curbun = 0; 1861 return nbun; 1862} 1863 1864/* RkFlushYomi 1865 */ 1866int 1867_RkFlushYomi(struct RkContext *cx) 1868{ 1869 int yy = cx->store->bunq[cx->store->maxbun].nb_yoff; 1870 int ys = cx->store->nyomi - yy; 1871 int ret; 1872 1873 parseQue(cx, cx->store->maxxq, yy, ys, ys, 1); 1874 if ((ret = Que2Bun(cx, yy, ys, ys, 1)) != -1) 1875 cx->store->curbun = 0; 1876 return(ret); 1877} 1878 1879/* _RkLearnBun 1880 * bunsetu jouho wo motoni gakushuu suru 1881 * sarani, word wo kaihou suru 1882 */ 1883inline 1884void blkcpy(unsigned char *d, unsigned char *s, unsigned char *e) 1885{ while (s < e) *d++ = *s++; } 1886 1887static 1888void 1889doLearn(struct RkContext *cx, struct nword *thisW) 1890{ 1891 struct nword *leftW; 1892#ifndef USE_MALLOC_FOR_BIG_ARRAY 1893 unsigned char *candidates[RK_CAND_NMAX]; 1894 unsigned permutation[RK_CAND_NMAX]; 1895 unsigned char tmp[RK_WREC_BMAX]; 1896#else 1897 unsigned char **candidates, *tmp; 1898 unsigned *permutation; 1899 candidates = (unsigned char **) 1900 malloc(sizeof(unsigned char *) * RK_CAND_NMAX); 1901 permutation = (unsigned *)malloc(sizeof(unsigned) * RK_CAND_NMAX); 1902 tmp = (unsigned char *)malloc(RK_WREC_BMAX); 1903 if (!candidates || !permutation || !tmp) { 1904 if (candidates) free(candidates); 1905 if (permutation) free(permutation); 1906 if (tmp) free(tmp); 1907 return; 1908 } 1909#endif 1910 1911 for (; (leftW = thisW->nw_left) != (struct nword *)0 ; thisW = leftW) { 1912 struct ncache *thisCache = thisW->nw_cache; 1913 1914 if (thisCache) { 1915 struct DM *dm = thisCache->nc_dic; 1916 struct DM *qm = thisW->nw_freq; 1917 unsigned char *wp; 1918 int ncands; 1919 int nl; 1920 unsigned long offset; 1921 int i; 1922 int current; 1923 1924 cx->time = _RkGetTick(1); 1925 if (thisCache->nc_flags & NC_ERROR) 1926 continue; 1927 if (!(wp = thisCache->nc_word)) 1928 continue; 1929 ncands = _RkCandNumber(wp); 1930 nl = (*wp >> 1) & 0x3f; 1931 if (qm && qm->dm_qbits) 1932 offset = _RkGetOffset((struct ND *)dm->dm_extdata.var, wp); 1933 else 1934 offset = 0L; 1935 if (*wp & 0x80) 1936 wp += 2; 1937 wp += 2 + nl * 2; 1938 for (i = 0; i < ncands; i++) { 1939 candidates[i] = wp; 1940 wp += 2 * ((*wp >> 1) & 0x7f) + 2; 1941 }; 1942 if (thisCache->nc_count) 1943 continue; 1944 if (qm && qm->dm_qbits) { 1945 int bits; 1946 1947 if (!(qm->dm_flags & DM_WRITABLE)) 1948 continue; 1949 bits = _RkCalcLog2(ncands + 1) + 1; 1950 _RkUnpackBits(permutation, qm->dm_qbits, offset, bits, ncands); 1951 for (current = 0; current < ncands; current++) 1952 if (ncands > (int)permutation[current]/2 && 1953 candidates[permutation[current]/2] == thisW->nw_kanji) 1954 break; 1955 if (current < ncands) { 1956 entryRut(qm->dm_rut, thisW->nw_csn, cx->time); 1957 if (0 < current) { 1958 _RkCopyBits(tmp, (unsigned long) 0L, bits, 1959 qm->dm_qbits, (unsigned long) offset, current); 1960 _RkCopyBits(qm->dm_qbits, (unsigned long) (offset + 0L), bits, 1961 qm->dm_qbits, (unsigned long) (offset + current*bits), 1962 1); 1963 _RkCopyBits(qm->dm_qbits, (unsigned long) (offset + bits), bits, 1964 tmp, (unsigned long) 0L, current); 1965 1966 }; 1967 qm->dm_flags |= DM_UPDATED; 1968 } 1969 } else { 1970 if (!(dm->dm_flags & DM_WRITABLE)) 1971 continue; 1972 for (current = 0; current < ncands; current++) 1973 if (candidates[current] == thisW->nw_kanji) 1974 break; 1975 if (DM2TYPE(dm)) { 1976 if (current) { 1977 unsigned char *t = candidates[0]; 1978 unsigned char *l = candidates[current]; 1979 unsigned char *c = l + 2 * ((*l >> 1) & 0x7f) + 2; 1980 1981 ((struct TW *)thisCache->nc_address)->lucks[1] 1982 = ((struct TW *)thisCache->nc_address)->lucks[0]; 1983 blkcpy(tmp, t, l); 1984 blkcpy(t, l, c); 1985 blkcpy(t + (int)(c - l), tmp, tmp + (int)(l - t)); 1986 thisCache->nc_flags |= NC_DIRTY; 1987 } 1988 ((struct TW *)thisCache->nc_address)->lucks[0] = cx->time; 1989 dm->dm_flags |= DM_UPDATED; 1990 } 1991 } 1992 } 1993 } 1994#ifdef USE_MALLOC_FOR_BIG_ARRAY 1995 free(candidates); 1996 free(permutation); 1997 free(tmp); 1998#endif 1999} 2000 2001void 2002_RkLearnBun(struct RkContext *cx, int cur, int mode) 2003{ 2004 struct nstore *st = cx->store; 2005 struct nbun *bun = &st->bunq[cur]; 2006 struct nword *w; 2007 int count = bun->nb_curcand; 2008 WCHAR_T *yomi = st->yomi + bun->nb_yoff; 2009 int ylen; 2010 int pos; 2011 2012 derefWord(bun->nb_cand); 2013 if (mode) { 2014 if (bun->nb_flags & RK_REARRANGED) { 2015 ylen = bun->nb_curlen 2016 + (cur < (int)st->maxbun - 1 ? (bun + 1)->nb_curlen : 0); 2017 pos = bun->nb_curlen; 2018 if (ylen < 32) { 2019 WCHAR_T *ey = yomi + ylen, *p; 2020#ifndef USE_MALLOC_FOR_BIG_ARRAY 2021 Wrec yomwrec[32 * sizeof(WCHAR_T)]; 2022 Wrec *dp = yomwrec; 2023#else 2024 Wrec *dp; 2025 Wrec *yomwrec = (Wrec *)malloc(sizeof(Wrec) * 32 * sizeof(WCHAR_T)); 2026 if (!yomwrec) { 2027 return; 2028 } 2029 dp = yomwrec; 2030#endif 2031 for (p = yomi ; p < ey ; p++) { 2032 *dp++ = (unsigned)*p >> 8; 2033 *dp++ = (unsigned)*p & 0x0ff; 2034 } 2035 _RkRegisterNV(cx->nv, yomwrec, ylen, pos); 2036#ifdef USE_MALLOC_FOR_BIG_ARRAY 2037 free(yomwrec); 2038#endif 2039 } 2040 } 2041 for (w = bun->nb_cand; w; w = w->nw_next) { 2042 if (CanSplitWord(w) && w->nw_ylen == bun->nb_curlen) { 2043 if (count-- <= 0) { 2044 doLearn(cx, w); 2045 break; 2046 } 2047 } 2048 } 2049 } 2050 killWord(st, bun->nb_cand); 2051} 2052 2053