deflate_buckets.c revision 289166
1/* Copyright 2002-2004 Justin Erenkrantz and Greg Stein
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include <apr_strings.h>
17
18#include <zlib.h>
19
20/* This conditional isn't defined anywhere yet. */
21#ifdef HAVE_ZUTIL_H
22#include <zutil.h>
23#endif
24
25#include "serf.h"
26#include "serf_bucket_util.h"
27
28/* magic header */
29static char deflate_magic[2] = { '\037', '\213' };
30#define DEFLATE_MAGIC_SIZE 10
31#define DEFLATE_VERIFY_SIZE 8
32#define DEFLATE_BUFFER_SIZE 8096
33
34static const int DEFLATE_WINDOW_SIZE = -15;
35static const int DEFLATE_MEMLEVEL = 9;
36
37typedef struct {
38    serf_bucket_t *stream;
39    serf_bucket_t *inflate_stream;
40
41    int format;                 /* Are we 'deflate' or 'gzip'? */
42
43    enum {
44        STATE_READING_HEADER,   /* reading the gzip header */
45        STATE_HEADER,           /* read the gzip header */
46        STATE_INIT,             /* init'ing zlib functions */
47        STATE_INFLATE,          /* inflating the content now */
48        STATE_READING_VERIFY,   /* reading the final gzip CRC */
49        STATE_VERIFY,           /* verifying the final gzip CRC */
50        STATE_FINISH,           /* clean up after reading body */
51        STATE_DONE,             /* body is done; we'll return EOF here */
52    } state;
53
54    z_stream zstream;
55    char hdr_buffer[DEFLATE_MAGIC_SIZE];
56    unsigned char buffer[DEFLATE_BUFFER_SIZE];
57    unsigned long crc;
58    int windowSize;
59    int memLevel;
60    int bufferSize;
61
62    /* How much of the chunk, or the terminator, do we have left to read? */
63    apr_size_t stream_left;
64
65    /* How much are we supposed to read? */
66    apr_size_t stream_size;
67
68    int stream_status; /* What was the last status we read? */
69
70} deflate_context_t;
71
72/* Inputs a string and returns a long.  */
73static unsigned long getLong(unsigned char *string)
74{
75    return ((unsigned long)string[0])
76          | (((unsigned long)string[1]) << 8)
77          | (((unsigned long)string[2]) << 16)
78          | (((unsigned long)string[3]) << 24);
79}
80
81serf_bucket_t *serf_bucket_deflate_create(
82    serf_bucket_t *stream,
83    serf_bucket_alloc_t *allocator,
84    int format)
85{
86    deflate_context_t *ctx;
87
88    ctx = serf_bucket_mem_alloc(allocator, sizeof(*ctx));
89    ctx->stream = stream;
90    ctx->stream_status = APR_SUCCESS;
91    ctx->inflate_stream = serf_bucket_aggregate_create(allocator);
92    ctx->format = format;
93    ctx->crc = 0;
94    /* zstream must be NULL'd out. */
95    memset(&ctx->zstream, 0, sizeof(ctx->zstream));
96
97    switch (ctx->format) {
98        case SERF_DEFLATE_GZIP:
99            ctx->state = STATE_READING_HEADER;
100            break;
101        case SERF_DEFLATE_DEFLATE:
102            /* deflate doesn't have a header. */
103            ctx->state = STATE_INIT;
104            break;
105        default:
106            /* Not reachable */
107            return NULL;
108    }
109
110    /* Initial size of gzip header. */
111    ctx->stream_left = ctx->stream_size = DEFLATE_MAGIC_SIZE;
112
113    ctx->windowSize = DEFLATE_WINDOW_SIZE;
114    ctx->memLevel = DEFLATE_MEMLEVEL;
115    ctx->bufferSize = DEFLATE_BUFFER_SIZE;
116
117    return serf_bucket_create(&serf_bucket_type_deflate, allocator, ctx);
118}
119
120static void serf_deflate_destroy_and_data(serf_bucket_t *bucket)
121{
122    deflate_context_t *ctx = bucket->data;
123
124    if (ctx->state > STATE_INIT &&
125        ctx->state <= STATE_FINISH)
126        inflateEnd(&ctx->zstream);
127
128    /* We may have appended inflate_stream into the stream bucket.
129     * If so, avoid free'ing it twice.
130     */
131    if (ctx->inflate_stream) {
132        serf_bucket_destroy(ctx->inflate_stream);
133    }
134    serf_bucket_destroy(ctx->stream);
135
136    serf_default_destroy_and_data(bucket);
137}
138
139static apr_status_t serf_deflate_read(serf_bucket_t *bucket,
140                                      apr_size_t requested,
141                                      const char **data, apr_size_t *len)
142{
143    deflate_context_t *ctx = bucket->data;
144    apr_status_t status;
145    const char *private_data;
146    apr_size_t private_len;
147    int zRC;
148
149    while (1) {
150        switch (ctx->state) {
151        case STATE_READING_HEADER:
152        case STATE_READING_VERIFY:
153            status = serf_bucket_read(ctx->stream, ctx->stream_left,
154                                      &private_data, &private_len);
155
156            if (SERF_BUCKET_READ_ERROR(status)) {
157                return status;
158            }
159
160            memcpy(ctx->hdr_buffer + (ctx->stream_size - ctx->stream_left),
161                   private_data, private_len);
162
163            ctx->stream_left -= private_len;
164
165            if (ctx->stream_left == 0) {
166                ctx->state++;
167                if (APR_STATUS_IS_EAGAIN(status)) {
168                    *len = 0;
169                    return status;
170                }
171            }
172            else if (status) {
173                *len = 0;
174                return status;
175            }
176            break;
177        case STATE_HEADER:
178            if (ctx->hdr_buffer[0] != deflate_magic[0] ||
179                ctx->hdr_buffer[1] != deflate_magic[1]) {
180                return SERF_ERROR_DECOMPRESSION_FAILED;
181            }
182            if (ctx->hdr_buffer[3] != 0) {
183                return SERF_ERROR_DECOMPRESSION_FAILED;
184            }
185            ctx->state++;
186            break;
187        case STATE_VERIFY:
188        {
189            unsigned long compCRC, compLen, actualLen;
190
191            /* Do the checksum computation. */
192            compCRC = getLong((unsigned char*)ctx->hdr_buffer);
193            if (ctx->crc != compCRC) {
194                return SERF_ERROR_DECOMPRESSION_FAILED;
195            }
196            compLen = getLong((unsigned char*)ctx->hdr_buffer + 4);
197            /* The length in the trailer is module 2^32, so do the same for
198               the actual length. */
199            actualLen = ctx->zstream.total_out;
200            actualLen &= 0xFFFFFFFF;
201            if (actualLen != compLen) {
202                return SERF_ERROR_DECOMPRESSION_FAILED;
203            }
204            ctx->state++;
205            break;
206        }
207        case STATE_INIT:
208            zRC = inflateInit2(&ctx->zstream, ctx->windowSize);
209            if (zRC != Z_OK) {
210                return SERF_ERROR_DECOMPRESSION_FAILED;
211            }
212            ctx->zstream.next_out = ctx->buffer;
213            ctx->zstream.avail_out = ctx->bufferSize;
214            ctx->state++;
215            break;
216        case STATE_FINISH:
217            inflateEnd(&ctx->zstream);
218            serf_bucket_aggregate_prepend(ctx->stream, ctx->inflate_stream);
219            ctx->inflate_stream = 0;
220            ctx->state++;
221            break;
222        case STATE_INFLATE:
223            /* Do we have anything already uncompressed to read? */
224            status = serf_bucket_read(ctx->inflate_stream, requested, data,
225                                      len);
226            if (SERF_BUCKET_READ_ERROR(status)) {
227                return status;
228            }
229            /* Hide EOF. */
230            if (APR_STATUS_IS_EOF(status)) {
231                status = ctx->stream_status;
232                if (APR_STATUS_IS_EOF(status)) {
233                    /* We've read all of the data from our stream, but we
234                     * need to continue to iterate until we flush
235                     * out the zlib buffer.
236                     */
237                    status = APR_SUCCESS;
238                }
239            }
240            if (*len != 0) {
241                return status;
242            }
243
244            /* We tried; but we have nothing buffered. Fetch more. */
245
246            /* It is possible that we maxed out avail_out before
247             * exhausting avail_in; therefore, continue using the
248             * previous buffer.  Otherwise, fetch more data from
249             * our stream bucket.
250             */
251            if (ctx->zstream.avail_in == 0) {
252                /* When we empty our inflated stream, we'll return this
253                 * status - this allow us to eventually pass up EAGAINs.
254                 */
255                ctx->stream_status = serf_bucket_read(ctx->stream,
256                                                      ctx->bufferSize,
257                                                      &private_data,
258                                                      &private_len);
259
260                if (SERF_BUCKET_READ_ERROR(ctx->stream_status)) {
261                    return ctx->stream_status;
262                }
263
264                if (!private_len && APR_STATUS_IS_EAGAIN(ctx->stream_status)) {
265                    *len = 0;
266                    status = ctx->stream_status;
267                    ctx->stream_status = APR_SUCCESS;
268                    return status;
269                }
270
271                ctx->zstream.next_in = (unsigned char*)private_data;
272                ctx->zstream.avail_in = private_len;
273            }
274
275            while (1) {
276
277                zRC = inflate(&ctx->zstream, Z_NO_FLUSH);
278
279                /* We're full or zlib requires more space. Either case, clear
280                   out our buffer, reset, and return. */
281                if (zRC == Z_BUF_ERROR || ctx->zstream.avail_out == 0) {
282                    serf_bucket_t *tmp;
283                    ctx->zstream.next_out = ctx->buffer;
284                    private_len = ctx->bufferSize - ctx->zstream.avail_out;
285
286                    ctx->crc = crc32(ctx->crc, (const Bytef *)ctx->buffer,
287                                     private_len);
288
289                    /* FIXME: There probably needs to be a free func. */
290                    tmp = SERF_BUCKET_SIMPLE_STRING_LEN((char *)ctx->buffer,
291                                                        private_len,
292                                                        bucket->allocator);
293                    serf_bucket_aggregate_append(ctx->inflate_stream, tmp);
294                    ctx->zstream.avail_out = ctx->bufferSize;
295                    break;
296                }
297
298                if (zRC == Z_STREAM_END) {
299                    serf_bucket_t *tmp;
300
301                    private_len = ctx->bufferSize - ctx->zstream.avail_out;
302                    ctx->crc = crc32(ctx->crc, (const Bytef *)ctx->buffer,
303                                     private_len);
304                    /* FIXME: There probably needs to be a free func. */
305                    tmp = SERF_BUCKET_SIMPLE_STRING_LEN((char *)ctx->buffer,
306                                                        private_len,
307                                                        bucket->allocator);
308                    serf_bucket_aggregate_append(ctx->inflate_stream, tmp);
309
310                    ctx->zstream.avail_out = ctx->bufferSize;
311
312                    /* Push back the remaining data to be read. */
313                    tmp = serf_bucket_aggregate_create(bucket->allocator);
314                    serf_bucket_aggregate_prepend(tmp, ctx->stream);
315                    ctx->stream = tmp;
316
317                    /* We now need to take the remaining avail_in and
318                     * throw it in ctx->stream so our next read picks it up.
319                     */
320                    tmp = SERF_BUCKET_SIMPLE_STRING_LEN(
321                                        (const char*)ctx->zstream.next_in,
322                                                     ctx->zstream.avail_in,
323                                                     bucket->allocator);
324                    serf_bucket_aggregate_prepend(ctx->stream, tmp);
325
326                    switch (ctx->format) {
327                    case SERF_DEFLATE_GZIP:
328                        ctx->stream_left = ctx->stream_size =
329                            DEFLATE_VERIFY_SIZE;
330                        ctx->state++;
331                        break;
332                    case SERF_DEFLATE_DEFLATE:
333                        /* Deflate does not have a verify footer. */
334                        ctx->state = STATE_FINISH;
335                        break;
336                    default:
337                        /* Not reachable */
338                        return APR_EGENERAL;
339                    }
340
341                    break;
342                }
343
344                /* Any other error? */
345                if (zRC != Z_OK) {
346                    return SERF_ERROR_DECOMPRESSION_FAILED;
347                }
348
349                /* As long as zRC == Z_OK, just keep looping. */
350            }
351            /* Okay, we've inflated.  Try to read. */
352            status = serf_bucket_read(ctx->inflate_stream, requested, data,
353                                      len);
354            /* Hide EOF. */
355            if (APR_STATUS_IS_EOF(status)) {
356                status = ctx->stream_status;
357
358                /* If the inflation wasn't finished, return APR_SUCCESS. */
359                if (zRC != Z_STREAM_END)
360                    return APR_SUCCESS;
361
362                /* If our stream is finished too and all data was inflated,
363                 * return SUCCESS so we'll iterate one more time.
364                 */
365                if (APR_STATUS_IS_EOF(status)) {
366                    /* No more data to read from the stream, and everything
367                       inflated. If all data was received correctly, state
368                       should have been advanced to STATE_READING_VERIFY or
369                       STATE_FINISH. If not, then the data was incomplete
370                       and we have an error. */
371                    if (ctx->state != STATE_INFLATE)
372                        return APR_SUCCESS;
373                    else
374                        return SERF_ERROR_DECOMPRESSION_FAILED;
375                }
376            }
377            return status;
378        case STATE_DONE:
379            /* We're done inflating.  Use our finished buffer. */
380            return serf_bucket_read(ctx->stream, requested, data, len);
381        default:
382            /* Not reachable */
383            return APR_EGENERAL;
384        }
385    }
386
387    /* NOTREACHED */
388}
389
390/* ### need to implement */
391#define serf_deflate_readline NULL
392#define serf_deflate_peek NULL
393
394const serf_bucket_type_t serf_bucket_type_deflate = {
395    "DEFLATE",
396    serf_deflate_read,
397    serf_deflate_readline,
398    serf_default_read_iovec,
399    serf_default_read_for_sendfile,
400    serf_default_read_bucket,
401    serf_deflate_peek,
402    serf_deflate_destroy_and_data,
403};
404