1/*	$NetBSD: compress.c,v 1.6 2003/08/07 11:17:24 agc Exp $	*/
2
3/*
4 * Copyright (c) 1989, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Edward Wang at The University of California, Berkeley.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#include <sys/cdefs.h>
36#ifndef lint
37#if 0
38static char sccsid[] = "@(#)compress.c	8.1 (Berkeley) 6/6/93";
39#else
40__RCSID("$NetBSD: compress.c,v 1.6 2003/08/07 11:17:24 agc Exp $");
41#endif
42#endif /* not lint */
43
44#include <fcntl.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <string.h>
48#include "defs.h"
49#include "tt.h"
50
51	/* special */
52int cc_trace = 0;
53FILE *cc_trace_fp;
54
55	/* tunable parameters */
56
57int cc_reverse = 1;
58int cc_sort = 0;
59int cc_chop = 0;
60
61int cc_token_max = 8;		/* <= TOKEN_MAX */
62int cc_token_min = 2;		/* > tt.tt_put_token_cost */
63int cc_npass0 = 1;
64int cc_npass1 = 1;
65
66int cc_bufsize = 1024 * 3;	/* XXX, or 80 * 24 * 2 */
67
68int cc_ntoken = 8192;
69
70#define cc_weight XXX
71#ifndef cc_weight
72int cc_weight = 0;
73#endif
74
75#define TOKEN_MAX 16
76
77struct cc {
78	char string[TOKEN_MAX];
79	char length;
80	char flag;
81#ifndef cc_weight
82	short weight;
83#endif
84	long time;		/* time last seen */
85	short bcount;		/* count in this buffer */
86	short ccount;		/* count in compression */
87	short places;		/* places in the buffer */
88	short code;		/* token code */
89	struct cc *qforw, *qback;
90	struct cc *hforw, **hback;
91};
92
93short cc_thresholds[TOKEN_MAX + 1];
94#define thresh(length) (cc_thresholds[length])
95#define threshp(code, count, length) \
96	((code) >= 0 || (short) (count) >= cc_thresholds[length])
97
98#ifndef cc_weight
99short cc_wthresholds[TOKEN_MAX + 1];
100#define wthresh(length) (cc_wthresholds[length])
101#define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length])
102#else
103#define wthreshp(weight, length) (0)
104#endif
105
106#ifndef cc_weight
107short cc_wlimits[TOKEN_MAX + 1];
108#define wlimit(length) (cc_wlimits[length])
109#endif
110
111#define put_token_score(length) ((length) - tt.tt_put_token_cost)
112
113int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
114#define score_adjust(score, p) \
115	do { \
116		int _length = (p)->length; \
117		int _ccount = (p)->ccount; \
118		if (threshp((p)->code, _ccount, _length) || \
119		    wthreshp((p)->weight, _length)) /* XXX */ \
120			(score) -= _length - tt.tt_put_token_cost; \
121		else \
122			(score) += cc_score_adjustments[_length][_ccount]; \
123	} while (0)
124
125int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
126
127struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b;
128
129#define qinsert(p1, p2) \
130	do { \
131		struct cc *forw = (p1)->qforw; \
132		struct cc *back = (p1)->qback; \
133		back->qforw = forw; \
134		forw->qback = back; \
135		forw = (p2)->qforw; \
136		(p1)->qforw = forw; \
137		forw->qback = (p1); \
138		(p2)->qforw = (p1); \
139		(p1)->qback = (p2); \
140	} while (0)
141
142#define qinsertq(q, p) \
143	((q)->qforw == (q) ? 0 : \
144	 ((q)->qback->qforw = (p)->qforw, \
145	  (p)->qforw->qback = (q)->qback, \
146	  (q)->qforw->qback = (p), \
147	  (p)->qforw = (q)->qforw, \
148	  (q)->qforw = (q), \
149	  (q)->qback = (q)))
150
151#define H		(14)
152#define HSIZE		(1 << H)
153#define hash(h, c)	(((((h) >> (H - 8)) | (h) << 8) ^ (c)) & (HSIZE - 1))
154
155char *cc_buffer;
156struct cc **cc_output;			/* the output array */
157short *cc_places[TOKEN_MAX + 1];
158short *cc_hashcodes;			/* for computing hashcodes */
159struct cc **cc_htab;			/* the hash table */
160struct cc **cc_tokens;			/* holds all the active tokens */
161struct cc_undo {
162	struct cc **pos;
163	struct cc *val;
164} *cc_undo;
165
166long cc_time, cc_time0;
167
168char *cc_tt_ob, *cc_tt_obe;
169
170int	cc_compress(struct cc **, struct cc **, char);
171void	cc_compress_cleanup(struct cc **, int);
172void	cc_compress_phase(struct cc **, int, struct cc **, int);
173void	cc_compress_phase1(struct cc **, struct cc **, int, int);
174void	cc_output_phase(char *, struct cc **, int);
175int	cc_sweep(char *, int, struct cc **, int);
176void	cc_sweep0(char *, int, int);
177int	cc_sweep_phase(char *, int, struct cc **);
178void	cc_sweep_reverse(struct cc **, short *);
179int	cc_token_compare(const void *, const void *);
180
181int
182ccinit(void)
183{
184	int i, j;
185	struct cc *p;
186
187	if (tt.tt_token_max > cc_token_max)
188		tt.tt_token_max = cc_token_max;
189	if (tt.tt_token_min < cc_token_min)
190		tt.tt_token_min = cc_token_min;
191	if (tt.tt_token_min > tt.tt_token_max) {
192		tt.tt_ntoken = 0;
193		return 0;
194	}
195	if (tt.tt_ntoken > cc_ntoken / 2)	/* not likely */
196		tt.tt_ntoken = cc_ntoken / 2;
197#define C(x) (sizeof (x) / sizeof *(x))
198	for (i = 0; i < (int)C(cc_thresholds); i++) {
199		int h = i - tt.tt_put_token_cost;
200		if (h > 0)
201			cc_thresholds[i] =
202				(tt.tt_set_token_cost + 1 + h - 1) / h + 1;
203		else
204			cc_thresholds[i] = 0;
205	}
206	for (i = 0; i < (int)C(cc_score_adjustments); i++) {
207		int t = cc_thresholds[i];
208		for (j = 0; j < (int)C(*cc_score_adjustments); j++) {
209			if (j >= t)
210				cc_score_adjustments[i][j] =
211					- (i - tt.tt_put_token_cost);
212			else if (j < t - 1)
213				cc_score_adjustments[i][j] = 0;
214			else
215				/*
216				 * cost now is
217				 *	length * (ccount + 1)		a
218				 * cost before was
219				 *	set-token-cost + length +
220				 *		ccount * put-token-cost	b
221				 * the score adjustment is (b - a)
222				 */
223				cc_score_adjustments[i][j] =
224					tt.tt_set_token_cost + i +
225						j * tt.tt_put_token_cost -
226							i * (j + 1);
227			if (j >= t)
228				cc_initial_scores[i][j] = 0;
229			else
230				/*
231				 * - (set-token-cost +
232				 *	(length - put-token-cost) -
233				 *	(length - put-token-cost) * ccount)
234				 */
235				cc_initial_scores[i][j] =
236					- (tt.tt_set_token_cost +
237					   (i - tt.tt_put_token_cost) -
238					   (i - tt.tt_put_token_cost) * j);
239		}
240	}
241#ifndef cc_weight
242	for (i = 1; i < C(cc_wthresholds); i++) {
243		cc_wthresholds[i] =
244			((tt.tt_set_token_cost + tt.tt_put_token_cost) / i +
245				i / 5 + 1) *
246				cc_weight + 1;
247		cc_wlimits[i] = cc_wthresholds[i] + cc_weight;
248	}
249#endif
250#undef C
251	if ((cc_output = (struct cc **)
252	     malloc((unsigned) cc_bufsize * sizeof *cc_output)) == 0)
253		goto nomem;
254	if ((cc_hashcodes = (short *)
255	     malloc((unsigned) cc_bufsize * sizeof *cc_hashcodes)) == 0)
256		goto nomem;
257	if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == 0)
258		goto nomem;
259	if ((cc_tokens = (struct cc **)
260	     malloc((unsigned)
261	            (cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) *
262		    sizeof *cc_tokens)) == 0)
263		goto nomem;
264	if ((cc_undo = (struct cc_undo *)
265	     malloc((unsigned) cc_bufsize * sizeof *cc_undo)) == 0)
266		goto nomem;
267	for (i = tt.tt_token_min; i <= tt.tt_token_max; i++)
268		if ((cc_places[i] = (short *)
269		     malloc((unsigned) cc_bufsize * sizeof **cc_places)) == 0)
270			goto nomem;
271	cc_q0a.qforw = cc_q0a.qback = &cc_q0a;
272	cc_q0b.qforw = cc_q0b.qback = &cc_q0b;
273	cc_q1a.qforw = cc_q1a.qback = &cc_q1a;
274	cc_q1b.qforw = cc_q1b.qback = &cc_q1b;
275	if ((p = (struct cc *) malloc((unsigned) cc_ntoken * sizeof *p)) == 0)
276		goto nomem;
277	for (i = 0; i < tt.tt_ntoken; i++) {
278		p->code = i;
279		p->time = -1;
280		p->qback = cc_q0a.qback;
281		p->qforw = &cc_q0a;
282		p->qback->qforw = p;
283		cc_q0a.qback = p;
284		p++;
285	}
286	for (; i < cc_ntoken; i++) {
287		p->code = -1;
288		p->time = -1;
289		p->qback = cc_q1a.qback;
290		p->qforw = &cc_q1a;
291		p->qback->qforw = p;
292		cc_q1a.qback = p;
293		p++;
294	}
295	cc_tt_ob = tt_ob;
296	cc_tt_obe = tt_obe;
297	if ((cc_buffer = malloc((unsigned) cc_bufsize)) == 0)
298		goto nomem;
299	return 0;
300nomem:
301	wwerrno = WWE_NOMEM;
302	return -1;
303}
304
305void
306ccstart(void)
307{
308	ttflush();
309	tt_obp = tt_ob = cc_buffer;
310	tt_obe = tt_ob + cc_bufsize;
311	tt.tt_flush = ccflush;
312	if (cc_trace) {
313		cc_trace_fp = fopen("window-trace", "a");
314		(void) fcntl(fileno(cc_trace_fp), F_SETFD, 1);
315	}
316	ccreset();
317}
318
319void
320ccreset(void)
321{
322	struct cc *p;
323
324	memset((char *) cc_htab, 0, HSIZE * sizeof *cc_htab);
325	for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw)
326		p->hback = 0;
327	for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw)
328		p->hback = 0;
329}
330
331void
332ccend(void)
333{
334
335	ttflush();
336	tt_obp = tt_ob = cc_tt_ob;
337	tt_obe = cc_tt_obe;
338	tt.tt_flush = 0;
339	if (cc_trace_fp != NULL) {
340		(void) fclose(cc_trace_fp);
341		cc_trace_fp = NULL;
342	}
343}
344
345void
346ccflush(void)
347{
348	int bufsize = tt_obp - tt_ob;
349	int n;
350
351	if (tt_ob != cc_buffer)
352		abort();
353	if (cc_trace_fp != NULL) {
354		(void) fwrite(tt_ob, 1, bufsize, cc_trace_fp);
355		(void) putc(-1, cc_trace_fp);
356	}
357	tt.tt_flush = 0;
358	(*tt.tt_compress)(1);
359	if (bufsize < tt.tt_token_min) {
360		ttflush();
361		goto out;
362	}
363	tt_obp = tt_ob = cc_tt_ob;
364	tt_obe = cc_tt_obe;
365	cc_time0 = cc_time;
366	cc_time += bufsize;
367	n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens);
368	cc_compress_phase(cc_output, bufsize, cc_tokens, n);
369	cc_output_phase(cc_buffer, cc_output, bufsize);
370	ttflush();
371	tt_obp = tt_ob = cc_buffer;
372	tt_obe = cc_buffer + cc_bufsize;
373out:
374	(*tt.tt_compress)(0);
375	tt.tt_flush = ccflush;
376}
377
378int
379cc_sweep_phase(char *buffer, int bufsize, struct cc **tokens)
380{
381	struct cc **pp = tokens;
382	int i, n;
383#ifdef STATS
384	int nn, ii;
385#endif
386
387#ifdef STATS
388	if (verbose >= 0)
389		time_begin();
390	if (verbose > 0)
391		printf("Sweep:");
392#endif
393	cc_sweep0(buffer, bufsize, tt.tt_token_min - 1);
394#ifdef STATS
395	ntoken_stat = 0;
396	nn = 0;
397	ii = 0;
398#endif
399	for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) {
400#ifdef STATS
401		if (verbose > 0) {
402			if (ii > 7) {
403				printf("\n      ");
404				ii = 0;
405			}
406			ii++;
407			printf(" (%d", i);
408			(void) fflush(stdout);
409		}
410#endif
411		n = cc_sweep(buffer, bufsize, pp, i);
412		pp += n;
413#ifdef STATS
414		if (verbose > 0) {
415			if (--n > 0) {
416				printf(" %d", n);
417				nn += n;
418			}
419			putchar(')');
420		}
421#endif
422	}
423	qinsertq(&cc_q1b, &cc_q1a);
424#ifdef STATS
425	if (verbose > 0)
426		printf("\n       %d tokens, %d candidates\n",
427			ntoken_stat, nn);
428	if (verbose >= 0)
429		time_end();
430#endif
431	return pp - tokens;
432}
433
434void
435cc_sweep0(char *buffer, int n, int length)
436{
437	char *p;
438	short *hc;
439	int i;
440	short c;
441	short pc = tt.tt_padc;
442
443	/* n and length are at least 1 */
444	p = buffer++;
445	hc = cc_hashcodes;
446	i = n;
447	do {
448		if ((*hc++ = *p++) == pc)
449			hc[-1] = -1;
450	} while (--i);
451	while (--length) {
452		p = buffer++;
453		hc = cc_hashcodes;
454		for (i = n--; --i;) {
455			if ((c = *p++) == pc || *hc < 0)
456				c = -1;
457			else
458				c = hash(*hc, c);
459			*hc++ = c;
460		}
461	}
462}
463
464int
465cc_sweep(char *buffer, int bufsize, struct cc **tokens, int length)
466{
467	struct cc *p;
468	char *cp;
469	int i;
470	short *hc;
471	short *places = cc_places[length];
472	struct cc **pp = tokens;
473	short threshold = thresh(length);
474#ifndef cc_weight
475	short wthreshold = wthresh(length);
476	short limit = wlimit(length);
477#endif
478	int time0;
479	short pc = tt.tt_padc;
480
481	i = length - 1;
482	bufsize -= i;
483	cp = buffer + i;
484	hc = cc_hashcodes;
485	time0 = cc_time0;
486	for (i = 0; i < bufsize; i++, time0++) {
487		struct cc **h;
488
489		{
490			short *hc1 = hc;
491			short c = *cp++;
492			short hh;
493			if ((hh = *hc1) < 0 || c == pc) {
494				*hc1++ = -1;
495				hc = hc1;
496				continue;
497			}
498			h = cc_htab + (*hc1++ = hash(hh, c));
499			hc = hc1;
500		}
501		for (p = *h; p != 0; p = p->hforw)
502			if (p->length == (char) length) {
503				char *p1 = p->string;
504				char *p2 = cp - length;
505				int n = length;
506				do
507					if (*p1++ != *p2++)
508						goto fail;
509				while (--n);
510				break;
511			fail:;
512			}
513		if (p == 0) {
514			p = cc_q1a.qback;
515			if (p == &cc_q1a ||
516			    (p->time >= cc_time0 && p->length == (char) length))
517				continue;
518			if (p->hback != 0)
519				if ((*p->hback = p->hforw) != 0)
520					p->hforw->hback = p->hback;
521			{
522				char *p1 = p->string;
523				char *p2 = cp - length;
524				int n = length;
525				do
526					*p1++ = *p2++;
527				while (--n);
528			}
529			p->length = length;
530#ifndef cc_weight
531			p->weight = cc_weight;
532#endif
533			p->time = time0;
534			p->bcount = 1;
535			p->ccount = 0;
536			p->flag = 0;
537			if ((p->hforw = *h) != 0)
538				p->hforw->hback = &p->hforw;
539			*h = p;
540			p->hback = h;
541			qinsert(p, &cc_q1a);
542			places[i] = -1;
543			p->places = i;
544#ifdef STATS
545			ntoken_stat++;
546#endif
547		} else if (p->time < cc_time0) {
548#ifndef cc_weight
549			if ((p->weight += p->time - time0) < 0)
550				p->weight = cc_weight;
551			else if ((p->weight += cc_weight) > limit)
552				p->weight = limit;
553#endif
554			p->time = time0;
555			p->bcount = 1;
556			p->ccount = 0;
557			if (p->code >= 0) {
558				p->flag = 1;
559				*pp++ = p;
560			} else
561#ifndef cc_weight
562			if (p->weight >= wthreshold) {
563				p->flag = 1;
564				*pp++ = p;
565				qinsert(p, &cc_q1b);
566			} else
567#endif
568			{
569				p->flag = 0;
570				qinsert(p, &cc_q1a);
571			}
572			places[i] = -1;
573			p->places = i;
574#ifdef STATS
575			ntoken_stat++;
576#endif
577		} else if (p->time + length > time0) {
578			/*
579			 * overlapping token, don't count as two and
580			 * don't update time0, but do adjust weight to offset
581			 * the difference
582			 */
583#ifndef cc_weight
584			if (cc_weight != 0) {	/* XXX */
585				p->weight += time0 - p->time;
586				if (!p->flag && p->weight >= wthreshold) {
587					p->flag = 1;
588					*pp++ = p;
589					qinsert(p, &cc_q1b);
590				}
591			}
592#endif
593			places[i] = p->places;
594			p->places = i;
595		} else {
596#ifndef cc_weight
597			if ((p->weight += p->time - time0) < 0)
598				p->weight = cc_weight;
599			else if ((p->weight += cc_weight) > limit)
600				p->weight = limit;
601#endif
602			p->time = time0;
603			p->bcount++;
604			if (!p->flag &&
605			    /* code must be < 0 if flag false here */
606			    (p->bcount >= threshold
607#ifndef cc_weight
608			     || p->weight >= wthreshold
609#endif
610			     )) {
611				p->flag = 1;
612				*pp++ = p;
613				qinsert(p, &cc_q1b);
614			}
615			places[i] = p->places;
616			p->places = i;
617		}
618	}
619	if ((i = pp - tokens) > 0) {
620		*pp = 0;
621		if (cc_reverse)
622			cc_sweep_reverse(tokens, places);
623		if (cc_sort && i > 1) {
624			qsort((char *) tokens, i, sizeof *tokens,
625			      cc_token_compare);
626		}
627		if (cc_chop) {
628			if ((i = i * cc_chop / 100) == 0)
629				i = 1;
630			tokens[i] = 0;
631		}
632		i++;
633	}
634	return i;
635}
636
637void
638cc_sweep_reverse(struct cc **pp, short int *places)
639{
640	struct cc *p;
641	short frnt, back, t;
642
643	while ((p = *pp++) != 0) {
644		back = -1;
645		t = p->places;
646		/* the list is never empty */
647		do {
648			frnt = places[t];
649			places[t] = back;
650			back = t;
651		} while ((t = frnt) >= 0);
652		p->places = back;
653	}
654}
655
656void
657cc_compress_phase(struct cc **output, int bufsize, struct cc **tokens, int ntoken)
658{
659	int i;
660
661	memset((char *) output, 0, bufsize * sizeof *output);
662	for (i = 0; i < cc_npass0; i++)
663		cc_compress_phase1(output, tokens, ntoken, 0);
664	for (i = 0; i < cc_npass1; i++)
665		cc_compress_phase1(output, tokens, ntoken, 1);
666	cc_compress_cleanup(output, bufsize);
667}
668
669void
670cc_compress_phase1(struct cc **output, struct cc **tokens, int ntoken, int flag)
671{
672	struct cc **pp;
673#ifdef STATS
674	int i = 0;
675	int nt = 0, cc = 0, nc = 0;
676#endif
677
678#ifdef STATS
679	if (verbose >= 0)
680		time_begin();
681	if (verbose > 0)
682		printf("Compress:");
683#endif
684	pp = tokens;
685	while (pp < tokens + ntoken) {
686#ifdef STATS
687		if (verbose > 0) {
688			ntoken_stat = 0;
689			ccount_stat = 0;
690			ncover_stat = 0;
691			if (i > 2) {
692				printf("\n         ");
693				i = 0;
694			}
695			i++;
696			printf(" (%d", (*pp)->length);
697			(void) fflush(stdout);
698		}
699#endif
700		pp += cc_compress(output, pp, flag);
701#ifdef STATS
702		if (verbose > 0) {
703			printf(" %dt %du %dc)", ntoken_stat, ccount_stat,
704			       ncover_stat);
705			nt += ntoken_stat;
706			cc += ccount_stat;
707			nc += ncover_stat;
708		}
709#endif
710	}
711#ifdef STATS
712	if (verbose > 0)
713		printf("\n   total: (%dt %du %dc)\n", nt, cc, nc);
714	if (verbose >= 0)
715		time_end();
716#endif
717}
718
719void
720cc_compress_cleanup(struct cc **output, int bufsize)
721{
722	struct cc **end;
723
724	/* the previous output phase may have been interrupted */
725	qinsertq(&cc_q0b, &cc_q0a);
726	for (end = output + bufsize; output < end;) {
727		struct cc *p;
728		int length;
729		if ((p = *output) == 0) {
730			output++;
731			continue;
732		}
733		length = p->length;
734		if (!p->flag) {
735		} else if (p->code >= 0) {
736			qinsert(p, &cc_q0b);
737			p->flag = 0;
738		} else if (p->ccount == 0) {
739			*output = 0;
740		} else if (p->ccount >= thresh(length)
741#ifndef cc_weight
742			   || wthreshp(p->weight, length)
743#endif
744			   ) {
745			p->flag = 0;
746		} else {
747			p->ccount = 0;
748			*output = 0;
749		}
750		output += length;
751	}
752}
753
754int
755cc_compress(struct cc **output, struct cc **tokens, char flag)
756{
757	struct cc **pp = tokens;
758	struct cc *p = *pp++;
759	int length = p->length;
760	int threshold = thresh(length);
761#ifndef cc_weight
762	short wthreshold = wthresh(length);
763#endif
764	short *places = cc_places[length];
765	int *initial_scores = cc_initial_scores[length];
766	int initial_score0 = put_token_score(length);
767
768	do {
769		int score;
770		struct cc_undo *undop;
771		int ccount;
772#ifdef STATS
773		int ncover;
774#endif
775		int i;
776
777		ccount = p->ccount;
778		if ((short) ccount >= p->bcount)
779			continue;
780		if (p->code >= 0 || ccount >= threshold)
781			score = 0;
782#ifndef cc_weight
783		else if (p->weight >= wthreshold)
784			/* allow one fewer match than normal */
785			/* XXX, should adjust for ccount */
786			score = - tt.tt_set_token_cost;
787#endif
788		else
789			score = initial_scores[ccount];
790		undop = cc_undo;
791#ifdef STATS
792		ncover = 0;
793#endif
794		for (i = p->places; i >= 0; i = places[i]) {
795			struct cc **jp;
796			struct cc *x;
797			struct cc **ip = output + i;
798			int score0 = initial_score0;
799			struct cc **iip = ip + length;
800			struct cc_undo *undop1 = undop;
801
802			if ((x = *(jp = ip)) != 0)
803				goto z;
804			while (--jp >= output)
805				if ((x = *jp) != 0) {
806					if (jp + x->length > ip)
807						goto z;
808					break;
809				}
810			jp = ip + 1;
811			while (jp < iip) {
812				if ((x = *jp) == 0) {
813					jp++;
814					continue;
815				}
816			z:
817				if (x == p)
818					goto undo;
819#ifdef STATS
820				ncover++;
821#endif
822				undop->pos = jp;
823				undop->val = x;
824				undop++;
825				*jp = 0;
826				x->ccount--;
827				score_adjust(score0, x);
828				if (score0 < 0 && flag)
829					goto undo;
830				jp += x->length;
831			}
832			undop->pos = ip;
833			undop->val = 0;
834			undop++;
835			*ip = p;
836			ccount++;
837			score += score0;
838			continue;
839		undo:
840			while (--undop >= undop1)
841				if ((*undop->pos = x = undop->val))
842					x->ccount++;
843			undop++;
844		}
845		if (score > 0) {
846#ifdef STATS
847			ccount_stat += ccount - p->ccount;
848			ntoken_stat++;
849			ncover_stat += ncover;
850#endif
851			p->ccount = ccount;
852		} else {
853			struct cc_undo *u = cc_undo;
854			while (--undop >= u) {
855				struct cc *x;
856				if ((*undop->pos = x = undop->val))
857					x->ccount++;
858			}
859		}
860	} while ((p = *pp++) != 0);
861	return pp - tokens;
862}
863
864void
865cc_output_phase(char *buffer, struct cc **output, int bufsize)
866{
867	int i;
868	struct cc *p, *p1;
869
870	for (i = 0; i < bufsize;) {
871		if ((p = output[i]) == 0) {
872			ttputc(buffer[i]);
873			i++;
874		} else if (p->code >= 0) {
875			if (--p->ccount == 0)
876				qinsert(p, &cc_q0a);
877			(*tt.tt_put_token)(p->code, p->string, p->length);
878			wwntokuse++;
879			wwntoksave += put_token_score(p->length);
880			i += p->length;
881		} else if ((p1 = cc_q0a.qback) != &cc_q0a) {
882			p->code = p1->code;
883			p1->code = -1;
884			qinsert(p1, &cc_q1a);
885			if (--p->ccount == 0)
886				qinsert(p, &cc_q0a);
887			else
888				qinsert(p, &cc_q0b);
889			(*tt.tt_set_token)(p->code, p->string, p->length);
890			wwntokdef++;
891			wwntoksave -= tt.tt_set_token_cost;
892			i += p->length;
893		} else {
894			p->ccount--;
895			ttwrite(p->string, p->length);
896			wwntokbad++;
897			i += p->length;
898		}
899	}
900	wwntokc += bufsize;
901}
902
903int
904cc_token_compare(const void *p1, const void *p2)
905{
906	const struct cc * const * vp1 = p1;
907	const struct cc * const * vp2 = p2;
908	return (*vp2)->bcount - (*vp1)->bcount;
909}
910