bzipfs.c revision 174741
1235368Sgnn/*
2 * Copyright (c) 1998 Michael Smith.
3 * Copyright (c) 2000 Maxim Sobolev
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/lib/libstand/bzipfs.c 174741 2007-12-18 01:50:49Z sobomax $");
30
31#ifndef REGRESSION
32#include "stand.h"
33#else
34#include <sys/errno.h>
35#include <sys/fcntl.h>
36#include <sys/types.h>
37#include <sys/unistd.h>
38
39struct open_file {
40    int                 f_flags;        /* see F_* below */
41    void                *f_fsdata;      /* file system specific data */
42};
43#define F_READ          0x0001  /* file opened for reading */
44#define EOFFSET (ELAST+8)       /* relative seek not supported */
45static inline u_int min(u_int a, u_int b) { return (a < b ? a : b); }
46#define panic(x, y) abort()
47#endif
48
49#include <sys/stat.h>
50#include <string.h>
51#include <bzlib.h>
52
53#define BZ_BUFSIZE 2048	/* XXX larger? */
54
55struct bz_file
56{
57    int			bzf_rawfd;
58    bz_stream		bzf_bzstream;
59    char		bzf_buf[BZ_BUFSIZE];
60    int			bzf_endseen;
61};
62
63static int	bzf_fill(struct bz_file *z);
64static int	bzf_open(const char *path, struct open_file *f);
65static int	bzf_close(struct open_file *f);
66static int	bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid);
67static off_t	bzf_seek(struct open_file *f, off_t offset, int where);
68static int	bzf_stat(struct open_file *f, struct stat *sb);
69
70#ifndef REGRESSION
71struct fs_ops bzipfs_fsops = {
72    "bzip",
73    bzf_open,
74    bzf_close,
75    bzf_read,
76    null_write,
77    bzf_seek,
78    bzf_stat,
79    null_readdir
80};
81#endif
82
83#if 0
84void *
85calloc(int items, size_t size)
86{
87    return(malloc(items * size));
88}
89#endif
90
91static int
92bzf_fill(struct bz_file *bzf)
93{
94    int		result;
95    int		req;
96
97    req = BZ_BUFSIZE - bzf->bzf_bzstream.avail_in;
98    result = 0;
99
100    /* If we need more */
101    if (req > 0) {
102	/* move old data to bottom of buffer */
103	if (req < BZ_BUFSIZE)
104	    bcopy(bzf->bzf_buf + req, bzf->bzf_buf, BZ_BUFSIZE - req);
105
106	/* read to fill buffer and update availibility data */
107	result = read(bzf->bzf_rawfd, bzf->bzf_buf + bzf->bzf_bzstream.avail_in, req);
108	bzf->bzf_bzstream.next_in = bzf->bzf_buf;
109	if (result >= 0)
110	    bzf->bzf_bzstream.avail_in += result;
111    }
112    return(result);
113}
114
115/*
116 * Adapted from get_byte/check_header in libz
117 *
118 * Returns 0 if the header is OK, nonzero if not.
119 */
120static int
121get_byte(struct bz_file *bzf)
122{
123    if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1))
124	return(-1);
125    bzf->bzf_bzstream.avail_in--;
126    return(*(bzf->bzf_bzstream.next_in)++);
127}
128
129static int bz_magic[3] = {'B', 'Z', 'h'}; /* bzip2 magic header */
130
131static int
132check_header(struct bz_file *bzf)
133{
134    unsigned int len;
135    int		 c;
136
137    /* Check the bzip2 magic header */
138    for (len = 0; len < 3; len++) {
139	c = get_byte(bzf);
140	if (c != bz_magic[len]) {
141	    return(1);
142	}
143    }
144    /* Check that the block size is valid */
145    c = get_byte(bzf);
146    if (c < '1' || c > '9')
147	return(1);
148
149    /* Put back bytes that we've took from the input stream */
150    bzf->bzf_bzstream.next_in -= 4;
151    bzf->bzf_bzstream.avail_in += 4;
152
153    return(0);
154}
155
156static int
157bzf_open(const char *fname, struct open_file *f)
158{
159    static char		*bzfname;
160    int			rawfd;
161    struct bz_file	*bzf;
162    char		*cp;
163    int			error;
164    struct stat		sb;
165
166    /* Have to be in "just read it" mode */
167    if (f->f_flags != F_READ)
168	return(EPERM);
169
170    /* If the name already ends in .gz or .bz2, ignore it */
171    if ((cp = strrchr(fname, '.')) && (!strcmp(cp, ".gz")
172	    || !strcmp(cp, ".bz2") || !strcmp(cp, ".split")))
173	return(ENOENT);
174
175    /* Construct new name */
176    bzfname = malloc(strlen(fname) + 5);
177    sprintf(bzfname, "%s.bz2", fname);
178
179    /* Try to open the compressed datafile */
180    rawfd = open(bzfname, O_RDONLY);
181    free(bzfname);
182    if (rawfd == -1)
183	return(ENOENT);
184
185    if (fstat(rawfd, &sb) < 0) {
186	printf("bzf_open: stat failed\n");
187	close(rawfd);
188	return(ENOENT);
189    }
190    if (!S_ISREG(sb.st_mode)) {
191	printf("bzf_open: not a file\n");
192	close(rawfd);
193	return(EISDIR);			/* best guess */
194    }
195
196    /* Allocate a bz_file structure, populate it */
197    bzf = malloc(sizeof(struct bz_file));
198    bzero(bzf, sizeof(struct bz_file));
199    bzf->bzf_rawfd = rawfd;
200
201    /* Verify that the file is bzipped (XXX why do this afterwards?) */
202    if (check_header(bzf)) {
203	close(bzf->bzf_rawfd);
204	BZ2_bzDecompressEnd(&(bzf->bzf_bzstream));
205	free(bzf);
206	return(EFTYPE);
207    }
208
209    /* Initialise the inflation engine */
210    if ((error = BZ2_bzDecompressInit(&(bzf->bzf_bzstream), 0, 1)) != BZ_OK) {
211	printf("bzf_open: BZ2_bzDecompressInit returned %d\n", error);
212	close(bzf->bzf_rawfd);
213	free(bzf);
214	return(EIO);
215    }
216
217    /* Looks OK, we'll take it */
218    f->f_fsdata = bzf;
219    return(0);
220}
221
222static int
223bzf_close(struct open_file *f)
224{
225    struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
226
227    BZ2_bzDecompressEnd(&(bzf->bzf_bzstream));
228    close(bzf->bzf_rawfd);
229    free(bzf);
230    return(0);
231}
232
233static int
234bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid)
235{
236    struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
237    int			error;
238
239    bzf->bzf_bzstream.next_out = buf;			/* where and how much */
240    bzf->bzf_bzstream.avail_out = size;
241
242    while (bzf->bzf_bzstream.avail_out && bzf->bzf_endseen == 0) {
243	if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1)) {
244	    printf("bzf_read: fill error\n");
245	    return(EIO);
246	}
247	if (bzf->bzf_bzstream.avail_in == 0) {		/* oops, unexpected EOF */
248	    printf("bzf_read: unexpected EOF\n");
249	    if (bzf->bzf_bzstream.avail_out == size)
250		return (EIO);
251	    break;
252	}
253
254	error = BZ2_bzDecompress(&bzf->bzf_bzstream);	/* decompression pass */
255	if (error == BZ_STREAM_END) {			/* EOF, all done */
256	    bzf->bzf_endseen = 1;
257	    break;
258	}
259	if (error != BZ_OK) {				/* argh, decompression error */
260	    printf("bzf_read: BZ2_bzDecompress returned %d\n", error);
261	    return(EIO);
262	}
263    }
264    if (resid != NULL)
265	*resid = bzf->bzf_bzstream.avail_out;
266    return(0);
267}
268
269static off_t
270bzf_seek(struct open_file *f, off_t offset, int where)
271{
272    struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
273    off_t		target;
274    char		discard[16];
275
276    switch (where) {
277    case SEEK_SET:
278	target = offset;
279	break;
280    case SEEK_CUR:
281	target = offset + bzf->bzf_bzstream.total_out_lo32;
282	break;
283    case SEEK_END:
284	target = -1;
285    default:
286	errno = EINVAL;
287	return (-1);
288    }
289
290    /* Can we get there from here? */
291    if (target < bzf->bzf_bzstream.total_out_lo32) {
292	errno = EOFFSET;
293	return -1;
294    }
295
296    /* skip forwards if required */
297    while (target > bzf->bzf_bzstream.total_out_lo32) {
298	errno = bzf_read(f, discard, min(sizeof(discard),
299	    target - bzf->bzf_bzstream.total_out_lo32), NULL);
300	if (errno)
301	    return(-1);
302    }
303    /* This is where we are (be honest if we overshot) */
304    return (bzf->bzf_bzstream.total_out_lo32);
305}
306
307static int
308bzf_stat(struct open_file *f, struct stat *sb)
309{
310    struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
311    int			result;
312
313    /* stat as normal, but indicate that size is unknown */
314    if ((result = fstat(bzf->bzf_rawfd, sb)) == 0)
315	sb->st_size = -1;
316    return(result);
317}
318
319void
320bz_internal_error(int errorcode)
321{
322    panic("bzipfs: critical error %d in bzip2 library occured\n", errorcode);
323}
324
325#ifdef REGRESSION
326/* Small test case, open and decompress test.bz2 */
327int main()
328{
329    struct open_file f;
330    char buf[1024];
331    size_t resid;
332    int err;
333
334    memset(&f, '\0', sizeof(f));
335    f.f_flags = F_READ;
336    err = bzf_open("test", &f);
337    if (err != 0)
338	exit(1);
339    do {
340	err = bzf_read(&f, buf, sizeof(buf), &resid);
341    } while (err == 0 && resid != sizeof(buf));
342
343    if (err != 0)
344	exit(2);
345    exit(0);
346}
347#endif
348