11590Srgrimes/*- 21590Srgrimes * Copyright (c) 1985, 1986, 1992, 1993 31590Srgrimes * The Regents of the University of California. All rights reserved. 41590Srgrimes * 51590Srgrimes * This code is derived from software contributed to Berkeley by 61590Srgrimes * Diomidis Spinellis and James A. Woods, derived from original 71590Srgrimes * work by Spencer Thomas and Joseph Orost. 81590Srgrimes * 91590Srgrimes * Redistribution and use in source and binary forms, with or without 101590Srgrimes * modification, are permitted provided that the following conditions 111590Srgrimes * are met: 121590Srgrimes * 1. Redistributions of source code must retain the above copyright 131590Srgrimes * notice, this list of conditions and the following disclaimer. 141590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 151590Srgrimes * notice, this list of conditions and the following disclaimer in the 161590Srgrimes * documentation and/or other materials provided with the distribution. 171590Srgrimes * 4. Neither the name of the University nor the names of its contributors 181590Srgrimes * may be used to endorse or promote products derived from this software 191590Srgrimes * without specific prior written permission. 201590Srgrimes * 211590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 301590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311590Srgrimes * SUCH DAMAGE. 321590Srgrimes */ 331590Srgrimes 341590Srgrimes#if defined(LIBC_SCCS) && !defined(lint) 351590Srgrimesstatic char sccsid[] = "@(#)zopen.c 8.1 (Berkeley) 6/27/93"; 361590Srgrimes#endif /* LIBC_SCCS and not lint */ 371590Srgrimes 3887628Sdwmalone#include <sys/cdefs.h> 3987628Sdwmalone__FBSDID("$FreeBSD$"); 4087628Sdwmalone 411590Srgrimes/*- 421590Srgrimes * fcompress.c - File compression ala IEEE Computer, June 1984. 431590Srgrimes * 441590Srgrimes * Compress authors: 451590Srgrimes * Spencer W. Thomas (decvax!utah-cs!thomas) 461590Srgrimes * Jim McKie (decvax!mcvax!jim) 471590Srgrimes * Steve Davies (decvax!vax135!petsd!peora!srd) 481590Srgrimes * Ken Turkowski (decvax!decwrl!turtlevax!ken) 491590Srgrimes * James A. Woods (decvax!ihnp4!ames!jaw) 501590Srgrimes * Joe Orost (decvax!vax135!petsd!joe) 511590Srgrimes * 521590Srgrimes * Cleaned up and converted to library returning I/O streams by 531590Srgrimes * Diomidis Spinellis <dds@doc.ic.ac.uk>. 541590Srgrimes * 551590Srgrimes * zopen(filename, mode, bits) 561590Srgrimes * Returns a FILE * that can be used for read or write. The modes 571590Srgrimes * supported are only "r" and "w". Seeking is not allowed. On 581590Srgrimes * reading the file is decompressed, on writing it is compressed. 591590Srgrimes * The output is compatible with compress(1) with 16 bit tables. 601590Srgrimes * Any file produced by compress(1) can be read. 611590Srgrimes */ 621590Srgrimes 631590Srgrimes#include <sys/param.h> 641590Srgrimes#include <sys/stat.h> 651590Srgrimes 66200462Sdelphij#include <ctype.h> 671590Srgrimes#include <errno.h> 681590Srgrimes#include <signal.h> 691590Srgrimes#include <stdio.h> 701590Srgrimes#include <stdlib.h> 711590Srgrimes#include <string.h> 72200462Sdelphij#include <unistd.h> 7317717Swosch#include "zopen.h" 741590Srgrimes 751590Srgrimes#define BITS 16 /* Default bits. */ 761590Srgrimes#define HSIZE 69001 /* 95% occupancy */ 771590Srgrimes 781590Srgrimes/* A code_int must be able to hold 2**BITS values of type int, and also -1. */ 791590Srgrimestypedef long code_int; 801590Srgrimestypedef long count_int; 811590Srgrimes 821590Srgrimestypedef u_char char_type; 831590Srgrimesstatic char_type magic_header[] = 841590Srgrimes {'\037', '\235'}; /* 1F 9D */ 851590Srgrimes 861590Srgrimes#define BIT_MASK 0x1f /* Defines for third byte of header. */ 871590Srgrimes#define BLOCK_MASK 0x80 881590Srgrimes 891590Srgrimes/* 901590Srgrimes * Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is 911590Srgrimes * a fourth header byte (for expansion). 921590Srgrimes */ 931590Srgrimes#define INIT_BITS 9 /* Initial number of bits/code. */ 941590Srgrimes 951590Srgrimes#define MAXCODE(n_bits) ((1 << (n_bits)) - 1) 961590Srgrimes 971590Srgrimesstruct s_zstate { 981590Srgrimes FILE *zs_fp; /* File stream for I/O */ 991590Srgrimes char zs_mode; /* r or w */ 1001590Srgrimes enum { 1011590Srgrimes S_START, S_MIDDLE, S_EOF 1021590Srgrimes } zs_state; /* State of computation */ 10387247Smarkm u_int zs_n_bits; /* Number of bits/code. */ 10487247Smarkm u_int zs_maxbits; /* User settable max # bits/code. */ 1051590Srgrimes code_int zs_maxcode; /* Maximum code, given n_bits. */ 1061590Srgrimes code_int zs_maxmaxcode; /* Should NEVER generate this code. */ 1071590Srgrimes count_int zs_htab [HSIZE]; 1081590Srgrimes u_short zs_codetab [HSIZE]; 1091590Srgrimes code_int zs_hsize; /* For dynamic table sizing. */ 1101590Srgrimes code_int zs_free_ent; /* First unused entry. */ 1111590Srgrimes /* 1121590Srgrimes * Block compression parameters -- after all codes are used up, 1131590Srgrimes * and compression rate changes, start over. 1141590Srgrimes */ 1151590Srgrimes int zs_block_compress; 1161590Srgrimes int zs_clear_flg; 1171590Srgrimes long zs_ratio; 1181590Srgrimes count_int zs_checkpoint; 11987247Smarkm u_int zs_offset; 1201590Srgrimes long zs_in_count; /* Length of input. */ 1211590Srgrimes long zs_bytes_out; /* Length of compressed output. */ 1221590Srgrimes long zs_out_count; /* # of codes output (for debugging). */ 1231590Srgrimes char_type zs_buf[BITS]; 1241590Srgrimes union { 1251590Srgrimes struct { 1261590Srgrimes long zs_fcode; 1271590Srgrimes code_int zs_ent; 1281590Srgrimes code_int zs_hsize_reg; 1291590Srgrimes int zs_hshift; 130213927Sbcr } w; /* Write parameters */ 1311590Srgrimes struct { 1321590Srgrimes char_type *zs_stackp; 1331590Srgrimes int zs_finchar; 1341590Srgrimes code_int zs_code, zs_oldcode, zs_incode; 1351590Srgrimes int zs_roffset, zs_size; 1361590Srgrimes char_type zs_gbuf[BITS]; 1371590Srgrimes } r; /* Read parameters */ 1381590Srgrimes } u; 1391590Srgrimes}; 1401590Srgrimes 1411590Srgrimes/* Definitions to retain old variable names */ 1421590Srgrimes#define fp zs->zs_fp 1431590Srgrimes#define zmode zs->zs_mode 1441590Srgrimes#define state zs->zs_state 1451590Srgrimes#define n_bits zs->zs_n_bits 1461590Srgrimes#define maxbits zs->zs_maxbits 1471590Srgrimes#define maxcode zs->zs_maxcode 1481590Srgrimes#define maxmaxcode zs->zs_maxmaxcode 1491590Srgrimes#define htab zs->zs_htab 1501590Srgrimes#define codetab zs->zs_codetab 1511590Srgrimes#define hsize zs->zs_hsize 1521590Srgrimes#define free_ent zs->zs_free_ent 1531590Srgrimes#define block_compress zs->zs_block_compress 1541590Srgrimes#define clear_flg zs->zs_clear_flg 1551590Srgrimes#define ratio zs->zs_ratio 1561590Srgrimes#define checkpoint zs->zs_checkpoint 1571590Srgrimes#define offset zs->zs_offset 1581590Srgrimes#define in_count zs->zs_in_count 1591590Srgrimes#define bytes_out zs->zs_bytes_out 1601590Srgrimes#define out_count zs->zs_out_count 1611590Srgrimes#define buf zs->zs_buf 1621590Srgrimes#define fcode zs->u.w.zs_fcode 1631590Srgrimes#define hsize_reg zs->u.w.zs_hsize_reg 1641590Srgrimes#define ent zs->u.w.zs_ent 1651590Srgrimes#define hshift zs->u.w.zs_hshift 1661590Srgrimes#define stackp zs->u.r.zs_stackp 1671590Srgrimes#define finchar zs->u.r.zs_finchar 1681590Srgrimes#define code zs->u.r.zs_code 1691590Srgrimes#define oldcode zs->u.r.zs_oldcode 1701590Srgrimes#define incode zs->u.r.zs_incode 1711590Srgrimes#define roffset zs->u.r.zs_roffset 1721590Srgrimes#define size zs->u.r.zs_size 1731590Srgrimes#define gbuf zs->u.r.zs_gbuf 1741590Srgrimes 1751590Srgrimes/* 1761590Srgrimes * To save much memory, we overlay the table used by compress() with those 1771590Srgrimes * used by decompress(). The tab_prefix table is the same size and type as 1781590Srgrimes * the codetab. The tab_suffix table needs 2**BITS characters. We get this 1791590Srgrimes * from the beginning of htab. The output stack uses the rest of htab, and 1801590Srgrimes * contains characters. There is plenty of room for any possible stack 1811590Srgrimes * (stack used to be 8000 characters). 1821590Srgrimes */ 1831590Srgrimes 1841590Srgrimes#define htabof(i) htab[i] 1851590Srgrimes#define codetabof(i) codetab[i] 1861590Srgrimes 1871590Srgrimes#define tab_prefixof(i) codetabof(i) 1881590Srgrimes#define tab_suffixof(i) ((char_type *)(htab))[i] 1891590Srgrimes#define de_stack ((char_type *)&tab_suffixof(1 << BITS)) 1901590Srgrimes 1911590Srgrimes#define CHECK_GAP 10000 /* Ratio check interval. */ 1921590Srgrimes 1931590Srgrimes/* 1941590Srgrimes * the next two codes should not be changed lightly, as they must not 1951590Srgrimes * lie within the contiguous general code space. 1961590Srgrimes */ 1971590Srgrimes#define FIRST 257 /* First free entry. */ 1981590Srgrimes#define CLEAR 256 /* Table clear output code. */ 1991590Srgrimes 20092920Simpstatic int cl_block(struct s_zstate *); 20192920Simpstatic void cl_hash(struct s_zstate *, count_int); 20292920Simpstatic code_int getcode(struct s_zstate *); 20392920Simpstatic int output(struct s_zstate *, code_int); 20492920Simpstatic int zclose(void *); 20592920Simpstatic int zread(void *, char *, int); 20692920Simpstatic int zwrite(void *, const char *, int); 2071590Srgrimes 2081590Srgrimes/*- 2091590Srgrimes * Algorithm from "A Technique for High Performance Data Compression", 2101590Srgrimes * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. 2111590Srgrimes * 2121590Srgrimes * Algorithm: 2131590Srgrimes * Modified Lempel-Ziv method (LZW). Basically finds common 2141590Srgrimes * substrings and replaces them with a variable size code. This is 2151590Srgrimes * deterministic, and can be done on the fly. Thus, the decompression 2161590Srgrimes * procedure needs no input table, but tracks the way the table was built. 2171590Srgrimes */ 2181590Srgrimes 2191590Srgrimes/*- 2201590Srgrimes * compress write 2211590Srgrimes * 2221590Srgrimes * Algorithm: use open addressing double hashing (no chaining) on the 2231590Srgrimes * prefix code / next character combination. We do a variant of Knuth's 2241590Srgrimes * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime 2251590Srgrimes * secondary probe. Here, the modular division first probe is gives way 2261590Srgrimes * to a faster exclusive-or manipulation. Also do block compression with 2271590Srgrimes * an adaptive reset, whereby the code table is cleared when the compression 2281590Srgrimes * ratio decreases, but after the table fills. The variable-length output 2291590Srgrimes * codes are re-sized at this point, and a special CLEAR code is generated 2301590Srgrimes * for the decompressor. Late addition: construct the table according to 2311590Srgrimes * file size for noticeable speed improvement on small files. Please direct 2321590Srgrimes * questions about this implementation to ames!jaw. 2331590Srgrimes */ 2341590Srgrimesstatic int 235100820Sdwmalonezwrite(void *cookie, const char *wbp, int num) 2361590Srgrimes{ 23787214Smarkm code_int i; 23887214Smarkm int c, disp; 2391590Srgrimes struct s_zstate *zs; 240146336Skan const u_char *bp; 2411590Srgrimes u_char tmp; 2421590Srgrimes int count; 2431590Srgrimes 2441590Srgrimes if (num == 0) 2451590Srgrimes return (0); 2461590Srgrimes 2471590Srgrimes zs = cookie; 2481590Srgrimes count = num; 249146336Skan bp = (const u_char *)wbp; 2501590Srgrimes if (state == S_MIDDLE) 2511590Srgrimes goto middle; 2521590Srgrimes state = S_MIDDLE; 2531590Srgrimes 2545342Sats maxmaxcode = 1L << maxbits; 2551590Srgrimes if (fwrite(magic_header, 2561590Srgrimes sizeof(char), sizeof(magic_header), fp) != sizeof(magic_header)) 2571590Srgrimes return (-1); 2585302Sjkh tmp = (u_char)((maxbits) | block_compress); 2591590Srgrimes if (fwrite(&tmp, sizeof(char), sizeof(tmp), fp) != sizeof(tmp)) 2601590Srgrimes return (-1); 2611590Srgrimes 2621590Srgrimes offset = 0; 2631590Srgrimes bytes_out = 3; /* Includes 3-byte header mojo. */ 2641590Srgrimes out_count = 0; 2651590Srgrimes clear_flg = 0; 2661590Srgrimes ratio = 0; 2671590Srgrimes in_count = 1; 2681590Srgrimes checkpoint = CHECK_GAP; 2691590Srgrimes maxcode = MAXCODE(n_bits = INIT_BITS); 2701590Srgrimes free_ent = ((block_compress) ? FIRST : 256); 2711590Srgrimes 2721590Srgrimes ent = *bp++; 2731590Srgrimes --count; 2741590Srgrimes 2751590Srgrimes hshift = 0; 2761590Srgrimes for (fcode = (long)hsize; fcode < 65536L; fcode *= 2L) 2771590Srgrimes hshift++; 2781590Srgrimes hshift = 8 - hshift; /* Set hash code range bound. */ 2791590Srgrimes 2801590Srgrimes hsize_reg = hsize; 2811590Srgrimes cl_hash(zs, (count_int)hsize_reg); /* Clear hash table. */ 2821590Srgrimes 2831590Srgrimesmiddle: for (i = 0; count--;) { 2841590Srgrimes c = *bp++; 2851590Srgrimes in_count++; 2861590Srgrimes fcode = (long)(((long)c << maxbits) + ent); 2871590Srgrimes i = ((c << hshift) ^ ent); /* Xor hashing. */ 2881590Srgrimes 2891590Srgrimes if (htabof(i) == fcode) { 2901590Srgrimes ent = codetabof(i); 2911590Srgrimes continue; 2921590Srgrimes } else if ((long)htabof(i) < 0) /* Empty slot. */ 2931590Srgrimes goto nomatch; 2941590Srgrimes disp = hsize_reg - i; /* Secondary hash (after G. Knott). */ 2951590Srgrimes if (i == 0) 2961590Srgrimes disp = 1; 2971590Srgrimesprobe: if ((i -= disp) < 0) 2981590Srgrimes i += hsize_reg; 2991590Srgrimes 3001590Srgrimes if (htabof(i) == fcode) { 3011590Srgrimes ent = codetabof(i); 3021590Srgrimes continue; 3031590Srgrimes } 3041590Srgrimes if ((long)htabof(i) >= 0) 3051590Srgrimes goto probe; 3061590Srgrimesnomatch: if (output(zs, (code_int) ent) == -1) 3071590Srgrimes return (-1); 3081590Srgrimes out_count++; 3091590Srgrimes ent = c; 3101590Srgrimes if (free_ent < maxmaxcode) { 3111590Srgrimes codetabof(i) = free_ent++; /* code -> hashtable */ 3121590Srgrimes htabof(i) = fcode; 3131590Srgrimes } else if ((count_int)in_count >= 3141590Srgrimes checkpoint && block_compress) { 3151590Srgrimes if (cl_block(zs) == -1) 3161590Srgrimes return (-1); 3171590Srgrimes } 3181590Srgrimes } 3191590Srgrimes return (num); 3201590Srgrimes} 3211590Srgrimes 3221590Srgrimesstatic int 323100820Sdwmalonezclose(void *cookie) 3241590Srgrimes{ 3251590Srgrimes struct s_zstate *zs; 3261590Srgrimes int rval; 3271590Srgrimes 3281590Srgrimes zs = cookie; 3291590Srgrimes if (zmode == 'w') { /* Put out the final code. */ 3301590Srgrimes if (output(zs, (code_int) ent) == -1) { 3311590Srgrimes (void)fclose(fp); 3321590Srgrimes free(zs); 3331590Srgrimes return (-1); 3341590Srgrimes } 3351590Srgrimes out_count++; 3361590Srgrimes if (output(zs, (code_int) - 1) == -1) { 3371590Srgrimes (void)fclose(fp); 3381590Srgrimes free(zs); 3391590Srgrimes return (-1); 3401590Srgrimes } 3411590Srgrimes } 3421590Srgrimes rval = fclose(fp) == EOF ? -1 : 0; 3431590Srgrimes free(zs); 3441590Srgrimes return (rval); 3451590Srgrimes} 3461590Srgrimes 3471590Srgrimes/*- 3481590Srgrimes * Output the given code. 3491590Srgrimes * Inputs: 3501590Srgrimes * code: A n_bits-bit integer. If == -1, then EOF. This assumes 3511590Srgrimes * that n_bits =< (long)wordsize - 1. 3521590Srgrimes * Outputs: 3531590Srgrimes * Outputs code to the file. 3541590Srgrimes * Assumptions: 3551590Srgrimes * Chars are 8 bits long. 3561590Srgrimes * Algorithm: 3571590Srgrimes * Maintain a BITS character long buffer (so that 8 codes will 3581590Srgrimes * fit in it exactly). Use the VAX insv instruction to insert each 3591590Srgrimes * code in turn. When the buffer fills up empty it and start over. 3601590Srgrimes */ 3611590Srgrimes 3621590Srgrimesstatic char_type lmask[9] = 3631590Srgrimes {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00}; 3641590Srgrimesstatic char_type rmask[9] = 3651590Srgrimes {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; 3661590Srgrimes 3671590Srgrimesstatic int 368100820Sdwmaloneoutput(struct s_zstate *zs, code_int ocode) 3691590Srgrimes{ 37087214Smarkm int r_off; 37187247Smarkm u_int bits; 37287214Smarkm char_type *bp; 3731590Srgrimes 3741590Srgrimes r_off = offset; 3751590Srgrimes bits = n_bits; 3761590Srgrimes bp = buf; 3771590Srgrimes if (ocode >= 0) { 3781590Srgrimes /* Get to the first byte. */ 3791590Srgrimes bp += (r_off >> 3); 3801590Srgrimes r_off &= 7; 3811590Srgrimes /* 3821590Srgrimes * Since ocode is always >= 8 bits, only need to mask the first 3831590Srgrimes * hunk on the left. 3841590Srgrimes */ 38541568Sarchie *bp = (*bp & rmask[r_off]) | ((ocode << r_off) & lmask[r_off]); 3861590Srgrimes bp++; 3871590Srgrimes bits -= (8 - r_off); 3881590Srgrimes ocode >>= 8 - r_off; 3891590Srgrimes /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 3901590Srgrimes if (bits >= 8) { 3911590Srgrimes *bp++ = ocode; 3921590Srgrimes ocode >>= 8; 3931590Srgrimes bits -= 8; 3941590Srgrimes } 3951590Srgrimes /* Last bits. */ 3961590Srgrimes if (bits) 3971590Srgrimes *bp = ocode; 3981590Srgrimes offset += n_bits; 3991590Srgrimes if (offset == (n_bits << 3)) { 4001590Srgrimes bp = buf; 4011590Srgrimes bits = n_bits; 4021590Srgrimes bytes_out += bits; 4031590Srgrimes if (fwrite(bp, sizeof(char), bits, fp) != bits) 4041590Srgrimes return (-1); 4051590Srgrimes bp += bits; 4061590Srgrimes bits = 0; 4071590Srgrimes offset = 0; 4081590Srgrimes } 4091590Srgrimes /* 4101590Srgrimes * If the next entry is going to be too big for the ocode size, 4111590Srgrimes * then increase it, if possible. 4121590Srgrimes */ 4131590Srgrimes if (free_ent > maxcode || (clear_flg > 0)) { 4141590Srgrimes /* 4151590Srgrimes * Write the whole buffer, because the input side won't 4161590Srgrimes * discover the size increase until after it has read it. 4171590Srgrimes */ 4181590Srgrimes if (offset > 0) { 4191590Srgrimes if (fwrite(buf, 1, n_bits, fp) != n_bits) 4201590Srgrimes return (-1); 4211590Srgrimes bytes_out += n_bits; 4221590Srgrimes } 4231590Srgrimes offset = 0; 4241590Srgrimes 4251590Srgrimes if (clear_flg) { 4261590Srgrimes maxcode = MAXCODE(n_bits = INIT_BITS); 4271590Srgrimes clear_flg = 0; 4281590Srgrimes } else { 4291590Srgrimes n_bits++; 4301590Srgrimes if (n_bits == maxbits) 4311590Srgrimes maxcode = maxmaxcode; 4321590Srgrimes else 4331590Srgrimes maxcode = MAXCODE(n_bits); 4341590Srgrimes } 4351590Srgrimes } 4361590Srgrimes } else { 4371590Srgrimes /* At EOF, write the rest of the buffer. */ 4381590Srgrimes if (offset > 0) { 4391590Srgrimes offset = (offset + 7) / 8; 4401590Srgrimes if (fwrite(buf, 1, offset, fp) != offset) 4411590Srgrimes return (-1); 4421590Srgrimes bytes_out += offset; 4431590Srgrimes } 4441590Srgrimes offset = 0; 4451590Srgrimes } 4461590Srgrimes return (0); 4471590Srgrimes} 4481590Srgrimes 4491590Srgrimes/* 4501590Srgrimes * Decompress read. This routine adapts to the codes in the file building 4511590Srgrimes * the "string" table on-the-fly; requiring no table to be stored in the 4521590Srgrimes * compressed file. The tables used herein are shared with those of the 4531590Srgrimes * compress() routine. See the definitions above. 4541590Srgrimes */ 4551590Srgrimesstatic int 456100820Sdwmalonezread(void *cookie, char *rbp, int num) 4571590Srgrimes{ 45887214Smarkm u_int count; 4591590Srgrimes struct s_zstate *zs; 4601590Srgrimes u_char *bp, header[3]; 4611590Srgrimes 4621590Srgrimes if (num == 0) 4631590Srgrimes return (0); 4641590Srgrimes 4651590Srgrimes zs = cookie; 4661590Srgrimes count = num; 4671590Srgrimes bp = (u_char *)rbp; 4681590Srgrimes switch (state) { 4691590Srgrimes case S_START: 4701590Srgrimes state = S_MIDDLE; 4711590Srgrimes break; 4721590Srgrimes case S_MIDDLE: 4731590Srgrimes goto middle; 4741590Srgrimes case S_EOF: 4751590Srgrimes goto eof; 4761590Srgrimes } 4771590Srgrimes 4781590Srgrimes /* Check the magic number */ 4791590Srgrimes if (fread(header, 4801590Srgrimes sizeof(char), sizeof(header), fp) != sizeof(header) || 4811590Srgrimes memcmp(header, magic_header, sizeof(magic_header)) != 0) { 4821590Srgrimes errno = EFTYPE; 4831590Srgrimes return (-1); 4841590Srgrimes } 4851590Srgrimes maxbits = header[2]; /* Set -b from file. */ 4861590Srgrimes block_compress = maxbits & BLOCK_MASK; 4871590Srgrimes maxbits &= BIT_MASK; 4881590Srgrimes maxmaxcode = 1L << maxbits; 489225827Sbz if (maxbits > BITS || maxbits < 12) { 4901590Srgrimes errno = EFTYPE; 4911590Srgrimes return (-1); 4921590Srgrimes } 4931590Srgrimes /* As above, initialize the first 256 entries in the table. */ 4941590Srgrimes maxcode = MAXCODE(n_bits = INIT_BITS); 4951590Srgrimes for (code = 255; code >= 0; code--) { 4961590Srgrimes tab_prefixof(code) = 0; 4971590Srgrimes tab_suffixof(code) = (char_type) code; 4981590Srgrimes } 4991590Srgrimes free_ent = block_compress ? FIRST : 256; 5001590Srgrimes 5011590Srgrimes finchar = oldcode = getcode(zs); 5021590Srgrimes if (oldcode == -1) /* EOF already? */ 5031590Srgrimes return (0); /* Get out of here */ 5041590Srgrimes 5051590Srgrimes /* First code must be 8 bits = char. */ 5061590Srgrimes *bp++ = (u_char)finchar; 5071590Srgrimes count--; 5081590Srgrimes stackp = de_stack; 5091590Srgrimes 5101590Srgrimes while ((code = getcode(zs)) > -1) { 5111590Srgrimes 5121590Srgrimes if ((code == CLEAR) && block_compress) { 5131590Srgrimes for (code = 255; code >= 0; code--) 5141590Srgrimes tab_prefixof(code) = 0; 5151590Srgrimes clear_flg = 1; 516225827Sbz free_ent = FIRST; 517225827Sbz oldcode = -1; 518225827Sbz continue; 5191590Srgrimes } 5201590Srgrimes incode = code; 5211590Srgrimes 522225827Sbz /* Special case for kWkWk string. */ 5231590Srgrimes if (code >= free_ent) { 524225827Sbz if (code > free_ent || oldcode == -1) { 525225827Sbz /* Bad stream. */ 526225827Sbz errno = EINVAL; 527225827Sbz return (-1); 528225827Sbz } 5291590Srgrimes *stackp++ = finchar; 5301590Srgrimes code = oldcode; 5311590Srgrimes } 532225827Sbz /* 533225827Sbz * The above condition ensures that code < free_ent. 534225827Sbz * The construction of tab_prefixof in turn guarantees that 535225827Sbz * each iteration decreases code and therefore stack usage is 536225827Sbz * bound by 1 << BITS - 256. 537225827Sbz */ 5381590Srgrimes 5391590Srgrimes /* Generate output characters in reverse order. */ 5401590Srgrimes while (code >= 256) { 5411590Srgrimes *stackp++ = tab_suffixof(code); 5421590Srgrimes code = tab_prefixof(code); 5431590Srgrimes } 5441590Srgrimes *stackp++ = finchar = tab_suffixof(code); 5451590Srgrimes 5461590Srgrimes /* And put them out in forward order. */ 5471590Srgrimesmiddle: do { 5481590Srgrimes if (count-- == 0) 5491590Srgrimes return (num); 5501590Srgrimes *bp++ = *--stackp; 5511590Srgrimes } while (stackp > de_stack); 5521590Srgrimes 5531590Srgrimes /* Generate the new entry. */ 554225827Sbz if ((code = free_ent) < maxmaxcode && oldcode != -1) { 5551590Srgrimes tab_prefixof(code) = (u_short) oldcode; 5561590Srgrimes tab_suffixof(code) = finchar; 5571590Srgrimes free_ent = code + 1; 5581590Srgrimes } 5591590Srgrimes 5601590Srgrimes /* Remember previous code. */ 5611590Srgrimes oldcode = incode; 5621590Srgrimes } 5631590Srgrimes state = S_EOF; 5641590Srgrimeseof: return (num - count); 5651590Srgrimes} 5661590Srgrimes 5671590Srgrimes/*- 5681590Srgrimes * Read one code from the standard input. If EOF, return -1. 5691590Srgrimes * Inputs: 5701590Srgrimes * stdin 5711590Srgrimes * Outputs: 5721590Srgrimes * code or -1 is returned. 5731590Srgrimes */ 5741590Srgrimesstatic code_int 575100820Sdwmalonegetcode(struct s_zstate *zs) 5761590Srgrimes{ 57787214Smarkm code_int gcode; 57887214Smarkm int r_off, bits; 57987214Smarkm char_type *bp; 5801590Srgrimes 5811590Srgrimes bp = gbuf; 5821590Srgrimes if (clear_flg > 0 || roffset >= size || free_ent > maxcode) { 5831590Srgrimes /* 5841590Srgrimes * If the next entry will be too big for the current gcode 5851590Srgrimes * size, then we must increase the size. This implies reading 5861590Srgrimes * a new buffer full, too. 5871590Srgrimes */ 5881590Srgrimes if (free_ent > maxcode) { 5891590Srgrimes n_bits++; 5901590Srgrimes if (n_bits == maxbits) /* Won't get any bigger now. */ 5911590Srgrimes maxcode = maxmaxcode; 5921590Srgrimes else 5931590Srgrimes maxcode = MAXCODE(n_bits); 5941590Srgrimes } 5951590Srgrimes if (clear_flg > 0) { 5961590Srgrimes maxcode = MAXCODE(n_bits = INIT_BITS); 5971590Srgrimes clear_flg = 0; 5981590Srgrimes } 5991590Srgrimes size = fread(gbuf, 1, n_bits, fp); 6001590Srgrimes if (size <= 0) /* End of file. */ 6011590Srgrimes return (-1); 6021590Srgrimes roffset = 0; 6031590Srgrimes /* Round size down to integral number of codes. */ 6041590Srgrimes size = (size << 3) - (n_bits - 1); 6051590Srgrimes } 6061590Srgrimes r_off = roffset; 6071590Srgrimes bits = n_bits; 6081590Srgrimes 6091590Srgrimes /* Get to the first byte. */ 6101590Srgrimes bp += (r_off >> 3); 6111590Srgrimes r_off &= 7; 6121590Srgrimes 6131590Srgrimes /* Get first part (low order bits). */ 6141590Srgrimes gcode = (*bp++ >> r_off); 6151590Srgrimes bits -= (8 - r_off); 6161590Srgrimes r_off = 8 - r_off; /* Now, roffset into gcode word. */ 6171590Srgrimes 6181590Srgrimes /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 6191590Srgrimes if (bits >= 8) { 6201590Srgrimes gcode |= *bp++ << r_off; 6211590Srgrimes r_off += 8; 6221590Srgrimes bits -= 8; 6231590Srgrimes } 6241590Srgrimes 6251590Srgrimes /* High order bits. */ 6261590Srgrimes gcode |= (*bp & rmask[bits]) << r_off; 6271590Srgrimes roffset += n_bits; 6281590Srgrimes 6291590Srgrimes return (gcode); 6301590Srgrimes} 6311590Srgrimes 6321590Srgrimesstatic int 633100820Sdwmalonecl_block(struct s_zstate *zs) /* Table clear for block compress. */ 6341590Srgrimes{ 63587214Smarkm long rat; 6361590Srgrimes 6371590Srgrimes checkpoint = in_count + CHECK_GAP; 6381590Srgrimes 6391590Srgrimes if (in_count > 0x007fffff) { /* Shift will overflow. */ 6401590Srgrimes rat = bytes_out >> 8; 6411590Srgrimes if (rat == 0) /* Don't divide by zero. */ 6421590Srgrimes rat = 0x7fffffff; 6431590Srgrimes else 6441590Srgrimes rat = in_count / rat; 6451590Srgrimes } else 6461590Srgrimes rat = (in_count << 8) / bytes_out; /* 8 fractional bits. */ 6471590Srgrimes if (rat > ratio) 6481590Srgrimes ratio = rat; 6491590Srgrimes else { 6501590Srgrimes ratio = 0; 6511590Srgrimes cl_hash(zs, (count_int) hsize); 6521590Srgrimes free_ent = FIRST; 6531590Srgrimes clear_flg = 1; 6541590Srgrimes if (output(zs, (code_int) CLEAR) == -1) 6551590Srgrimes return (-1); 6561590Srgrimes } 6571590Srgrimes return (0); 6581590Srgrimes} 6591590Srgrimes 6601590Srgrimesstatic void 661100820Sdwmalonecl_hash(struct s_zstate *zs, count_int cl_hsize) /* Reset code table. */ 6621590Srgrimes{ 66387214Smarkm count_int *htab_p; 66487214Smarkm long i, m1; 6651590Srgrimes 6661590Srgrimes m1 = -1; 6671590Srgrimes htab_p = htab + cl_hsize; 6681590Srgrimes i = cl_hsize - 16; 6691590Srgrimes do { /* Might use Sys V memset(3) here. */ 6701590Srgrimes *(htab_p - 16) = m1; 6711590Srgrimes *(htab_p - 15) = m1; 6721590Srgrimes *(htab_p - 14) = m1; 6731590Srgrimes *(htab_p - 13) = m1; 6741590Srgrimes *(htab_p - 12) = m1; 6751590Srgrimes *(htab_p - 11) = m1; 6761590Srgrimes *(htab_p - 10) = m1; 6771590Srgrimes *(htab_p - 9) = m1; 6781590Srgrimes *(htab_p - 8) = m1; 6791590Srgrimes *(htab_p - 7) = m1; 6801590Srgrimes *(htab_p - 6) = m1; 6811590Srgrimes *(htab_p - 5) = m1; 6821590Srgrimes *(htab_p - 4) = m1; 6831590Srgrimes *(htab_p - 3) = m1; 6841590Srgrimes *(htab_p - 2) = m1; 6851590Srgrimes *(htab_p - 1) = m1; 6861590Srgrimes htab_p -= 16; 6871590Srgrimes } while ((i -= 16) >= 0); 6881590Srgrimes for (i += 16; i > 0; i--) 6891590Srgrimes *--htab_p = m1; 6901590Srgrimes} 6911590Srgrimes 6921590SrgrimesFILE * 693100820Sdwmalonezopen(const char *fname, const char *mode, int bits) 6941590Srgrimes{ 6951590Srgrimes struct s_zstate *zs; 6961590Srgrimes 69741568Sarchie if ((mode[0] != 'r' && mode[0] != 'w') || mode[1] != '\0' || 6981590Srgrimes bits < 0 || bits > BITS) { 6991590Srgrimes errno = EINVAL; 7001590Srgrimes return (NULL); 7011590Srgrimes } 7021590Srgrimes 7031590Srgrimes if ((zs = calloc(1, sizeof(struct s_zstate))) == NULL) 7041590Srgrimes return (NULL); 7051590Srgrimes 7061590Srgrimes maxbits = bits ? bits : BITS; /* User settable max # bits/code. */ 7075342Sats maxmaxcode = 1L << maxbits; /* Should NEVER generate this code. */ 7081590Srgrimes hsize = HSIZE; /* For dynamic table sizing. */ 7091590Srgrimes free_ent = 0; /* First unused entry. */ 7101590Srgrimes block_compress = BLOCK_MASK; 7111590Srgrimes clear_flg = 0; 7121590Srgrimes ratio = 0; 7131590Srgrimes checkpoint = CHECK_GAP; 7141590Srgrimes in_count = 1; /* Length of input. */ 7151590Srgrimes out_count = 0; /* # of codes output (for debugging). */ 7161590Srgrimes state = S_START; 7171590Srgrimes roffset = 0; 7181590Srgrimes size = 0; 7191590Srgrimes 7201590Srgrimes /* 7211590Srgrimes * Layering compress on top of stdio in order to provide buffering, 7221590Srgrimes * and ensure that reads and write work with the data specified. 7231590Srgrimes */ 7241590Srgrimes if ((fp = fopen(fname, mode)) == NULL) { 7251590Srgrimes free(zs); 7261590Srgrimes return (NULL); 7271590Srgrimes } 7281590Srgrimes switch (*mode) { 7291590Srgrimes case 'r': 7301590Srgrimes zmode = 'r'; 7311590Srgrimes return (funopen(zs, zread, NULL, NULL, zclose)); 7321590Srgrimes case 'w': 7331590Srgrimes zmode = 'w'; 7341590Srgrimes return (funopen(zs, NULL, zwrite, NULL, zclose)); 7351590Srgrimes } 7361590Srgrimes /* NOTREACHED */ 73741568Sarchie return (NULL); 7381590Srgrimes} 739