1/* $NetBSD: compress.c,v 1.6 2003/08/07 11:17:24 agc Exp $ */ 2 3/* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Edward Wang at The University of California, Berkeley. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35#include <sys/cdefs.h> 36#ifndef lint 37#if 0 38static char sccsid[] = "@(#)compress.c 8.1 (Berkeley) 6/6/93"; 39#else 40__RCSID("$NetBSD: compress.c,v 1.6 2003/08/07 11:17:24 agc Exp $"); 41#endif 42#endif /* not lint */ 43 44#include <fcntl.h> 45#include <stdio.h> 46#include <stdlib.h> 47#include <string.h> 48#include "defs.h" 49#include "tt.h" 50 51 /* special */ 52int cc_trace = 0; 53FILE *cc_trace_fp; 54 55 /* tunable parameters */ 56 57int cc_reverse = 1; 58int cc_sort = 0; 59int cc_chop = 0; 60 61int cc_token_max = 8; /* <= TOKEN_MAX */ 62int cc_token_min = 2; /* > tt.tt_put_token_cost */ 63int cc_npass0 = 1; 64int cc_npass1 = 1; 65 66int cc_bufsize = 1024 * 3; /* XXX, or 80 * 24 * 2 */ 67 68int cc_ntoken = 8192; 69 70#define cc_weight XXX 71#ifndef cc_weight 72int cc_weight = 0; 73#endif 74 75#define TOKEN_MAX 16 76 77struct cc { 78 char string[TOKEN_MAX]; 79 char length; 80 char flag; 81#ifndef cc_weight 82 short weight; 83#endif 84 long time; /* time last seen */ 85 short bcount; /* count in this buffer */ 86 short ccount; /* count in compression */ 87 short places; /* places in the buffer */ 88 short code; /* token code */ 89 struct cc *qforw, *qback; 90 struct cc *hforw, **hback; 91}; 92 93short cc_thresholds[TOKEN_MAX + 1]; 94#define thresh(length) (cc_thresholds[length]) 95#define threshp(code, count, length) \ 96 ((code) >= 0 || (short) (count) >= cc_thresholds[length]) 97 98#ifndef cc_weight 99short cc_wthresholds[TOKEN_MAX + 1]; 100#define wthresh(length) (cc_wthresholds[length]) 101#define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length]) 102#else 103#define wthreshp(weight, length) (0) 104#endif 105 106#ifndef cc_weight 107short cc_wlimits[TOKEN_MAX + 1]; 108#define wlimit(length) (cc_wlimits[length]) 109#endif 110 111#define put_token_score(length) ((length) - tt.tt_put_token_cost) 112 113int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */ 114#define score_adjust(score, p) \ 115 do { \ 116 int _length = (p)->length; \ 117 int _ccount = (p)->ccount; \ 118 if (threshp((p)->code, _ccount, _length) || \ 119 wthreshp((p)->weight, _length)) /* XXX */ \ 120 (score) -= _length - tt.tt_put_token_cost; \ 121 else \ 122 (score) += cc_score_adjustments[_length][_ccount]; \ 123 } while (0) 124 125int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */ 126 127struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b; 128 129#define qinsert(p1, p2) \ 130 do { \ 131 struct cc *forw = (p1)->qforw; \ 132 struct cc *back = (p1)->qback; \ 133 back->qforw = forw; \ 134 forw->qback = back; \ 135 forw = (p2)->qforw; \ 136 (p1)->qforw = forw; \ 137 forw->qback = (p1); \ 138 (p2)->qforw = (p1); \ 139 (p1)->qback = (p2); \ 140 } while (0) 141 142#define qinsertq(q, p) \ 143 ((q)->qforw == (q) ? 0 : \ 144 ((q)->qback->qforw = (p)->qforw, \ 145 (p)->qforw->qback = (q)->qback, \ 146 (q)->qforw->qback = (p), \ 147 (p)->qforw = (q)->qforw, \ 148 (q)->qforw = (q), \ 149 (q)->qback = (q))) 150 151#define H (14) 152#define HSIZE (1 << H) 153#define hash(h, c) (((((h) >> (H - 8)) | (h) << 8) ^ (c)) & (HSIZE - 1)) 154 155char *cc_buffer; 156struct cc **cc_output; /* the output array */ 157short *cc_places[TOKEN_MAX + 1]; 158short *cc_hashcodes; /* for computing hashcodes */ 159struct cc **cc_htab; /* the hash table */ 160struct cc **cc_tokens; /* holds all the active tokens */ 161struct cc_undo { 162 struct cc **pos; 163 struct cc *val; 164} *cc_undo; 165 166long cc_time, cc_time0; 167 168char *cc_tt_ob, *cc_tt_obe; 169 170int cc_compress(struct cc **, struct cc **, char); 171void cc_compress_cleanup(struct cc **, int); 172void cc_compress_phase(struct cc **, int, struct cc **, int); 173void cc_compress_phase1(struct cc **, struct cc **, int, int); 174void cc_output_phase(char *, struct cc **, int); 175int cc_sweep(char *, int, struct cc **, int); 176void cc_sweep0(char *, int, int); 177int cc_sweep_phase(char *, int, struct cc **); 178void cc_sweep_reverse(struct cc **, short *); 179int cc_token_compare(const void *, const void *); 180 181int 182ccinit(void) 183{ 184 int i, j; 185 struct cc *p; 186 187 if (tt.tt_token_max > cc_token_max) 188 tt.tt_token_max = cc_token_max; 189 if (tt.tt_token_min < cc_token_min) 190 tt.tt_token_min = cc_token_min; 191 if (tt.tt_token_min > tt.tt_token_max) { 192 tt.tt_ntoken = 0; 193 return 0; 194 } 195 if (tt.tt_ntoken > cc_ntoken / 2) /* not likely */ 196 tt.tt_ntoken = cc_ntoken / 2; 197#define C(x) (sizeof (x) / sizeof *(x)) 198 for (i = 0; i < (int)C(cc_thresholds); i++) { 199 int h = i - tt.tt_put_token_cost; 200 if (h > 0) 201 cc_thresholds[i] = 202 (tt.tt_set_token_cost + 1 + h - 1) / h + 1; 203 else 204 cc_thresholds[i] = 0; 205 } 206 for (i = 0; i < (int)C(cc_score_adjustments); i++) { 207 int t = cc_thresholds[i]; 208 for (j = 0; j < (int)C(*cc_score_adjustments); j++) { 209 if (j >= t) 210 cc_score_adjustments[i][j] = 211 - (i - tt.tt_put_token_cost); 212 else if (j < t - 1) 213 cc_score_adjustments[i][j] = 0; 214 else 215 /* 216 * cost now is 217 * length * (ccount + 1) a 218 * cost before was 219 * set-token-cost + length + 220 * ccount * put-token-cost b 221 * the score adjustment is (b - a) 222 */ 223 cc_score_adjustments[i][j] = 224 tt.tt_set_token_cost + i + 225 j * tt.tt_put_token_cost - 226 i * (j + 1); 227 if (j >= t) 228 cc_initial_scores[i][j] = 0; 229 else 230 /* 231 * - (set-token-cost + 232 * (length - put-token-cost) - 233 * (length - put-token-cost) * ccount) 234 */ 235 cc_initial_scores[i][j] = 236 - (tt.tt_set_token_cost + 237 (i - tt.tt_put_token_cost) - 238 (i - tt.tt_put_token_cost) * j); 239 } 240 } 241#ifndef cc_weight 242 for (i = 1; i < C(cc_wthresholds); i++) { 243 cc_wthresholds[i] = 244 ((tt.tt_set_token_cost + tt.tt_put_token_cost) / i + 245 i / 5 + 1) * 246 cc_weight + 1; 247 cc_wlimits[i] = cc_wthresholds[i] + cc_weight; 248 } 249#endif 250#undef C 251 if ((cc_output = (struct cc **) 252 malloc((unsigned) cc_bufsize * sizeof *cc_output)) == 0) 253 goto nomem; 254 if ((cc_hashcodes = (short *) 255 malloc((unsigned) cc_bufsize * sizeof *cc_hashcodes)) == 0) 256 goto nomem; 257 if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == 0) 258 goto nomem; 259 if ((cc_tokens = (struct cc **) 260 malloc((unsigned) 261 (cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) * 262 sizeof *cc_tokens)) == 0) 263 goto nomem; 264 if ((cc_undo = (struct cc_undo *) 265 malloc((unsigned) cc_bufsize * sizeof *cc_undo)) == 0) 266 goto nomem; 267 for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) 268 if ((cc_places[i] = (short *) 269 malloc((unsigned) cc_bufsize * sizeof **cc_places)) == 0) 270 goto nomem; 271 cc_q0a.qforw = cc_q0a.qback = &cc_q0a; 272 cc_q0b.qforw = cc_q0b.qback = &cc_q0b; 273 cc_q1a.qforw = cc_q1a.qback = &cc_q1a; 274 cc_q1b.qforw = cc_q1b.qback = &cc_q1b; 275 if ((p = (struct cc *) malloc((unsigned) cc_ntoken * sizeof *p)) == 0) 276 goto nomem; 277 for (i = 0; i < tt.tt_ntoken; i++) { 278 p->code = i; 279 p->time = -1; 280 p->qback = cc_q0a.qback; 281 p->qforw = &cc_q0a; 282 p->qback->qforw = p; 283 cc_q0a.qback = p; 284 p++; 285 } 286 for (; i < cc_ntoken; i++) { 287 p->code = -1; 288 p->time = -1; 289 p->qback = cc_q1a.qback; 290 p->qforw = &cc_q1a; 291 p->qback->qforw = p; 292 cc_q1a.qback = p; 293 p++; 294 } 295 cc_tt_ob = tt_ob; 296 cc_tt_obe = tt_obe; 297 if ((cc_buffer = malloc((unsigned) cc_bufsize)) == 0) 298 goto nomem; 299 return 0; 300nomem: 301 wwerrno = WWE_NOMEM; 302 return -1; 303} 304 305void 306ccstart(void) 307{ 308 ttflush(); 309 tt_obp = tt_ob = cc_buffer; 310 tt_obe = tt_ob + cc_bufsize; 311 tt.tt_flush = ccflush; 312 if (cc_trace) { 313 cc_trace_fp = fopen("window-trace", "a"); 314 (void) fcntl(fileno(cc_trace_fp), F_SETFD, 1); 315 } 316 ccreset(); 317} 318 319void 320ccreset(void) 321{ 322 struct cc *p; 323 324 memset((char *) cc_htab, 0, HSIZE * sizeof *cc_htab); 325 for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw) 326 p->hback = 0; 327 for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw) 328 p->hback = 0; 329} 330 331void 332ccend(void) 333{ 334 335 ttflush(); 336 tt_obp = tt_ob = cc_tt_ob; 337 tt_obe = cc_tt_obe; 338 tt.tt_flush = 0; 339 if (cc_trace_fp != NULL) { 340 (void) fclose(cc_trace_fp); 341 cc_trace_fp = NULL; 342 } 343} 344 345void 346ccflush(void) 347{ 348 int bufsize = tt_obp - tt_ob; 349 int n; 350 351 if (tt_ob != cc_buffer) 352 abort(); 353 if (cc_trace_fp != NULL) { 354 (void) fwrite(tt_ob, 1, bufsize, cc_trace_fp); 355 (void) putc(-1, cc_trace_fp); 356 } 357 tt.tt_flush = 0; 358 (*tt.tt_compress)(1); 359 if (bufsize < tt.tt_token_min) { 360 ttflush(); 361 goto out; 362 } 363 tt_obp = tt_ob = cc_tt_ob; 364 tt_obe = cc_tt_obe; 365 cc_time0 = cc_time; 366 cc_time += bufsize; 367 n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens); 368 cc_compress_phase(cc_output, bufsize, cc_tokens, n); 369 cc_output_phase(cc_buffer, cc_output, bufsize); 370 ttflush(); 371 tt_obp = tt_ob = cc_buffer; 372 tt_obe = cc_buffer + cc_bufsize; 373out: 374 (*tt.tt_compress)(0); 375 tt.tt_flush = ccflush; 376} 377 378int 379cc_sweep_phase(char *buffer, int bufsize, struct cc **tokens) 380{ 381 struct cc **pp = tokens; 382 int i, n; 383#ifdef STATS 384 int nn, ii; 385#endif 386 387#ifdef STATS 388 if (verbose >= 0) 389 time_begin(); 390 if (verbose > 0) 391 printf("Sweep:"); 392#endif 393 cc_sweep0(buffer, bufsize, tt.tt_token_min - 1); 394#ifdef STATS 395 ntoken_stat = 0; 396 nn = 0; 397 ii = 0; 398#endif 399 for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) { 400#ifdef STATS 401 if (verbose > 0) { 402 if (ii > 7) { 403 printf("\n "); 404 ii = 0; 405 } 406 ii++; 407 printf(" (%d", i); 408 (void) fflush(stdout); 409 } 410#endif 411 n = cc_sweep(buffer, bufsize, pp, i); 412 pp += n; 413#ifdef STATS 414 if (verbose > 0) { 415 if (--n > 0) { 416 printf(" %d", n); 417 nn += n; 418 } 419 putchar(')'); 420 } 421#endif 422 } 423 qinsertq(&cc_q1b, &cc_q1a); 424#ifdef STATS 425 if (verbose > 0) 426 printf("\n %d tokens, %d candidates\n", 427 ntoken_stat, nn); 428 if (verbose >= 0) 429 time_end(); 430#endif 431 return pp - tokens; 432} 433 434void 435cc_sweep0(char *buffer, int n, int length) 436{ 437 char *p; 438 short *hc; 439 int i; 440 short c; 441 short pc = tt.tt_padc; 442 443 /* n and length are at least 1 */ 444 p = buffer++; 445 hc = cc_hashcodes; 446 i = n; 447 do { 448 if ((*hc++ = *p++) == pc) 449 hc[-1] = -1; 450 } while (--i); 451 while (--length) { 452 p = buffer++; 453 hc = cc_hashcodes; 454 for (i = n--; --i;) { 455 if ((c = *p++) == pc || *hc < 0) 456 c = -1; 457 else 458 c = hash(*hc, c); 459 *hc++ = c; 460 } 461 } 462} 463 464int 465cc_sweep(char *buffer, int bufsize, struct cc **tokens, int length) 466{ 467 struct cc *p; 468 char *cp; 469 int i; 470 short *hc; 471 short *places = cc_places[length]; 472 struct cc **pp = tokens; 473 short threshold = thresh(length); 474#ifndef cc_weight 475 short wthreshold = wthresh(length); 476 short limit = wlimit(length); 477#endif 478 int time0; 479 short pc = tt.tt_padc; 480 481 i = length - 1; 482 bufsize -= i; 483 cp = buffer + i; 484 hc = cc_hashcodes; 485 time0 = cc_time0; 486 for (i = 0; i < bufsize; i++, time0++) { 487 struct cc **h; 488 489 { 490 short *hc1 = hc; 491 short c = *cp++; 492 short hh; 493 if ((hh = *hc1) < 0 || c == pc) { 494 *hc1++ = -1; 495 hc = hc1; 496 continue; 497 } 498 h = cc_htab + (*hc1++ = hash(hh, c)); 499 hc = hc1; 500 } 501 for (p = *h; p != 0; p = p->hforw) 502 if (p->length == (char) length) { 503 char *p1 = p->string; 504 char *p2 = cp - length; 505 int n = length; 506 do 507 if (*p1++ != *p2++) 508 goto fail; 509 while (--n); 510 break; 511 fail:; 512 } 513 if (p == 0) { 514 p = cc_q1a.qback; 515 if (p == &cc_q1a || 516 (p->time >= cc_time0 && p->length == (char) length)) 517 continue; 518 if (p->hback != 0) 519 if ((*p->hback = p->hforw) != 0) 520 p->hforw->hback = p->hback; 521 { 522 char *p1 = p->string; 523 char *p2 = cp - length; 524 int n = length; 525 do 526 *p1++ = *p2++; 527 while (--n); 528 } 529 p->length = length; 530#ifndef cc_weight 531 p->weight = cc_weight; 532#endif 533 p->time = time0; 534 p->bcount = 1; 535 p->ccount = 0; 536 p->flag = 0; 537 if ((p->hforw = *h) != 0) 538 p->hforw->hback = &p->hforw; 539 *h = p; 540 p->hback = h; 541 qinsert(p, &cc_q1a); 542 places[i] = -1; 543 p->places = i; 544#ifdef STATS 545 ntoken_stat++; 546#endif 547 } else if (p->time < cc_time0) { 548#ifndef cc_weight 549 if ((p->weight += p->time - time0) < 0) 550 p->weight = cc_weight; 551 else if ((p->weight += cc_weight) > limit) 552 p->weight = limit; 553#endif 554 p->time = time0; 555 p->bcount = 1; 556 p->ccount = 0; 557 if (p->code >= 0) { 558 p->flag = 1; 559 *pp++ = p; 560 } else 561#ifndef cc_weight 562 if (p->weight >= wthreshold) { 563 p->flag = 1; 564 *pp++ = p; 565 qinsert(p, &cc_q1b); 566 } else 567#endif 568 { 569 p->flag = 0; 570 qinsert(p, &cc_q1a); 571 } 572 places[i] = -1; 573 p->places = i; 574#ifdef STATS 575 ntoken_stat++; 576#endif 577 } else if (p->time + length > time0) { 578 /* 579 * overlapping token, don't count as two and 580 * don't update time0, but do adjust weight to offset 581 * the difference 582 */ 583#ifndef cc_weight 584 if (cc_weight != 0) { /* XXX */ 585 p->weight += time0 - p->time; 586 if (!p->flag && p->weight >= wthreshold) { 587 p->flag = 1; 588 *pp++ = p; 589 qinsert(p, &cc_q1b); 590 } 591 } 592#endif 593 places[i] = p->places; 594 p->places = i; 595 } else { 596#ifndef cc_weight 597 if ((p->weight += p->time - time0) < 0) 598 p->weight = cc_weight; 599 else if ((p->weight += cc_weight) > limit) 600 p->weight = limit; 601#endif 602 p->time = time0; 603 p->bcount++; 604 if (!p->flag && 605 /* code must be < 0 if flag false here */ 606 (p->bcount >= threshold 607#ifndef cc_weight 608 || p->weight >= wthreshold 609#endif 610 )) { 611 p->flag = 1; 612 *pp++ = p; 613 qinsert(p, &cc_q1b); 614 } 615 places[i] = p->places; 616 p->places = i; 617 } 618 } 619 if ((i = pp - tokens) > 0) { 620 *pp = 0; 621 if (cc_reverse) 622 cc_sweep_reverse(tokens, places); 623 if (cc_sort && i > 1) { 624 qsort((char *) tokens, i, sizeof *tokens, 625 cc_token_compare); 626 } 627 if (cc_chop) { 628 if ((i = i * cc_chop / 100) == 0) 629 i = 1; 630 tokens[i] = 0; 631 } 632 i++; 633 } 634 return i; 635} 636 637void 638cc_sweep_reverse(struct cc **pp, short int *places) 639{ 640 struct cc *p; 641 short frnt, back, t; 642 643 while ((p = *pp++) != 0) { 644 back = -1; 645 t = p->places; 646 /* the list is never empty */ 647 do { 648 frnt = places[t]; 649 places[t] = back; 650 back = t; 651 } while ((t = frnt) >= 0); 652 p->places = back; 653 } 654} 655 656void 657cc_compress_phase(struct cc **output, int bufsize, struct cc **tokens, int ntoken) 658{ 659 int i; 660 661 memset((char *) output, 0, bufsize * sizeof *output); 662 for (i = 0; i < cc_npass0; i++) 663 cc_compress_phase1(output, tokens, ntoken, 0); 664 for (i = 0; i < cc_npass1; i++) 665 cc_compress_phase1(output, tokens, ntoken, 1); 666 cc_compress_cleanup(output, bufsize); 667} 668 669void 670cc_compress_phase1(struct cc **output, struct cc **tokens, int ntoken, int flag) 671{ 672 struct cc **pp; 673#ifdef STATS 674 int i = 0; 675 int nt = 0, cc = 0, nc = 0; 676#endif 677 678#ifdef STATS 679 if (verbose >= 0) 680 time_begin(); 681 if (verbose > 0) 682 printf("Compress:"); 683#endif 684 pp = tokens; 685 while (pp < tokens + ntoken) { 686#ifdef STATS 687 if (verbose > 0) { 688 ntoken_stat = 0; 689 ccount_stat = 0; 690 ncover_stat = 0; 691 if (i > 2) { 692 printf("\n "); 693 i = 0; 694 } 695 i++; 696 printf(" (%d", (*pp)->length); 697 (void) fflush(stdout); 698 } 699#endif 700 pp += cc_compress(output, pp, flag); 701#ifdef STATS 702 if (verbose > 0) { 703 printf(" %dt %du %dc)", ntoken_stat, ccount_stat, 704 ncover_stat); 705 nt += ntoken_stat; 706 cc += ccount_stat; 707 nc += ncover_stat; 708 } 709#endif 710 } 711#ifdef STATS 712 if (verbose > 0) 713 printf("\n total: (%dt %du %dc)\n", nt, cc, nc); 714 if (verbose >= 0) 715 time_end(); 716#endif 717} 718 719void 720cc_compress_cleanup(struct cc **output, int bufsize) 721{ 722 struct cc **end; 723 724 /* the previous output phase may have been interrupted */ 725 qinsertq(&cc_q0b, &cc_q0a); 726 for (end = output + bufsize; output < end;) { 727 struct cc *p; 728 int length; 729 if ((p = *output) == 0) { 730 output++; 731 continue; 732 } 733 length = p->length; 734 if (!p->flag) { 735 } else if (p->code >= 0) { 736 qinsert(p, &cc_q0b); 737 p->flag = 0; 738 } else if (p->ccount == 0) { 739 *output = 0; 740 } else if (p->ccount >= thresh(length) 741#ifndef cc_weight 742 || wthreshp(p->weight, length) 743#endif 744 ) { 745 p->flag = 0; 746 } else { 747 p->ccount = 0; 748 *output = 0; 749 } 750 output += length; 751 } 752} 753 754int 755cc_compress(struct cc **output, struct cc **tokens, char flag) 756{ 757 struct cc **pp = tokens; 758 struct cc *p = *pp++; 759 int length = p->length; 760 int threshold = thresh(length); 761#ifndef cc_weight 762 short wthreshold = wthresh(length); 763#endif 764 short *places = cc_places[length]; 765 int *initial_scores = cc_initial_scores[length]; 766 int initial_score0 = put_token_score(length); 767 768 do { 769 int score; 770 struct cc_undo *undop; 771 int ccount; 772#ifdef STATS 773 int ncover; 774#endif 775 int i; 776 777 ccount = p->ccount; 778 if ((short) ccount >= p->bcount) 779 continue; 780 if (p->code >= 0 || ccount >= threshold) 781 score = 0; 782#ifndef cc_weight 783 else if (p->weight >= wthreshold) 784 /* allow one fewer match than normal */ 785 /* XXX, should adjust for ccount */ 786 score = - tt.tt_set_token_cost; 787#endif 788 else 789 score = initial_scores[ccount]; 790 undop = cc_undo; 791#ifdef STATS 792 ncover = 0; 793#endif 794 for (i = p->places; i >= 0; i = places[i]) { 795 struct cc **jp; 796 struct cc *x; 797 struct cc **ip = output + i; 798 int score0 = initial_score0; 799 struct cc **iip = ip + length; 800 struct cc_undo *undop1 = undop; 801 802 if ((x = *(jp = ip)) != 0) 803 goto z; 804 while (--jp >= output) 805 if ((x = *jp) != 0) { 806 if (jp + x->length > ip) 807 goto z; 808 break; 809 } 810 jp = ip + 1; 811 while (jp < iip) { 812 if ((x = *jp) == 0) { 813 jp++; 814 continue; 815 } 816 z: 817 if (x == p) 818 goto undo; 819#ifdef STATS 820 ncover++; 821#endif 822 undop->pos = jp; 823 undop->val = x; 824 undop++; 825 *jp = 0; 826 x->ccount--; 827 score_adjust(score0, x); 828 if (score0 < 0 && flag) 829 goto undo; 830 jp += x->length; 831 } 832 undop->pos = ip; 833 undop->val = 0; 834 undop++; 835 *ip = p; 836 ccount++; 837 score += score0; 838 continue; 839 undo: 840 while (--undop >= undop1) 841 if ((*undop->pos = x = undop->val)) 842 x->ccount++; 843 undop++; 844 } 845 if (score > 0) { 846#ifdef STATS 847 ccount_stat += ccount - p->ccount; 848 ntoken_stat++; 849 ncover_stat += ncover; 850#endif 851 p->ccount = ccount; 852 } else { 853 struct cc_undo *u = cc_undo; 854 while (--undop >= u) { 855 struct cc *x; 856 if ((*undop->pos = x = undop->val)) 857 x->ccount++; 858 } 859 } 860 } while ((p = *pp++) != 0); 861 return pp - tokens; 862} 863 864void 865cc_output_phase(char *buffer, struct cc **output, int bufsize) 866{ 867 int i; 868 struct cc *p, *p1; 869 870 for (i = 0; i < bufsize;) { 871 if ((p = output[i]) == 0) { 872 ttputc(buffer[i]); 873 i++; 874 } else if (p->code >= 0) { 875 if (--p->ccount == 0) 876 qinsert(p, &cc_q0a); 877 (*tt.tt_put_token)(p->code, p->string, p->length); 878 wwntokuse++; 879 wwntoksave += put_token_score(p->length); 880 i += p->length; 881 } else if ((p1 = cc_q0a.qback) != &cc_q0a) { 882 p->code = p1->code; 883 p1->code = -1; 884 qinsert(p1, &cc_q1a); 885 if (--p->ccount == 0) 886 qinsert(p, &cc_q0a); 887 else 888 qinsert(p, &cc_q0b); 889 (*tt.tt_set_token)(p->code, p->string, p->length); 890 wwntokdef++; 891 wwntoksave -= tt.tt_set_token_cost; 892 i += p->length; 893 } else { 894 p->ccount--; 895 ttwrite(p->string, p->length); 896 wwntokbad++; 897 i += p->length; 898 } 899 } 900 wwntokc += bufsize; 901} 902 903int 904cc_token_compare(const void *p1, const void *p2) 905{ 906 const struct cc * const * vp1 = p1; 907 const struct cc * const * vp2 = p2; 908 return (*vp2)->bcount - (*vp1)->bcount; 909} 910