response_buckets.c revision 262339
1/* Copyright 2002-2004 Justin Erenkrantz and Greg Stein
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include <apr_lib.h>
17#include <apr_strings.h>
18#include <apr_date.h>
19
20#include "serf.h"
21#include "serf_bucket_util.h"
22#include "serf_private.h"
23
24typedef struct {
25    serf_bucket_t *stream;
26    serf_bucket_t *body;        /* Pointer to the stream wrapping the body. */
27    serf_bucket_t *headers;     /* holds parsed headers */
28
29    enum {
30        STATE_STATUS_LINE,      /* reading status line */
31        STATE_HEADERS,          /* reading headers */
32        STATE_BODY,             /* reading body */
33        STATE_TRAILERS,         /* reading trailers */
34        STATE_DONE              /* we've sent EOF */
35    } state;
36
37    /* Buffer for accumulating a line from the response. */
38    serf_linebuf_t linebuf;
39
40    serf_status_line sl;
41
42    int chunked;                /* Do we need to read trailers? */
43    int head_req;               /* Was this a HEAD request? */
44} response_context_t;
45
46/* Returns 1 if according to RFC2626 this response can have a body, 0 if it
47   must not have a body. */
48static int expect_body(response_context_t *ctx)
49{
50    if (ctx->head_req)
51        return 0;
52
53    /* 100 Continue and 101 Switching Protocols */
54    if (ctx->sl.code >= 100 && ctx->sl.code < 200)
55        return 0;
56
57    /* 204 No Content */
58    if (ctx->sl.code == 204)
59        return 0;
60
61    /* 205? */
62
63    /* 304 Not Modified */
64    if (ctx->sl.code == 304)
65        return 0;
66
67    return 1;
68}
69
70serf_bucket_t *serf_bucket_response_create(
71    serf_bucket_t *stream,
72    serf_bucket_alloc_t *allocator)
73{
74    response_context_t *ctx;
75
76    ctx = serf_bucket_mem_alloc(allocator, sizeof(*ctx));
77    ctx->stream = stream;
78    ctx->body = NULL;
79    ctx->headers = serf_bucket_headers_create(allocator);
80    ctx->state = STATE_STATUS_LINE;
81    ctx->chunked = 0;
82    ctx->head_req = 0;
83
84    serf_linebuf_init(&ctx->linebuf);
85
86    return serf_bucket_create(&serf_bucket_type_response, allocator, ctx);
87}
88
89void serf_bucket_response_set_head(
90    serf_bucket_t *bucket)
91{
92    response_context_t *ctx = bucket->data;
93
94    ctx->head_req = 1;
95}
96
97serf_bucket_t *serf_bucket_response_get_headers(
98    serf_bucket_t *bucket)
99{
100    return ((response_context_t *)bucket->data)->headers;
101}
102
103
104static void serf_response_destroy_and_data(serf_bucket_t *bucket)
105{
106    response_context_t *ctx = bucket->data;
107
108    if (ctx->state != STATE_STATUS_LINE) {
109        serf_bucket_mem_free(bucket->allocator, (void*)ctx->sl.reason);
110    }
111
112    serf_bucket_destroy(ctx->stream);
113    if (ctx->body != NULL)
114        serf_bucket_destroy(ctx->body);
115    serf_bucket_destroy(ctx->headers);
116
117    serf_default_destroy_and_data(bucket);
118}
119
120static apr_status_t fetch_line(response_context_t *ctx, int acceptable)
121{
122    return serf_linebuf_fetch(&ctx->linebuf, ctx->stream, acceptable);
123}
124
125static apr_status_t parse_status_line(response_context_t *ctx,
126                                      serf_bucket_alloc_t *allocator)
127{
128    int res;
129    char *reason; /* ### stupid APR interface makes this non-const */
130
131    /* ctx->linebuf.line should be of form: HTTP/1.1 200 OK */
132    res = apr_date_checkmask(ctx->linebuf.line, "HTTP/#.# ###*");
133    if (!res) {
134        /* Not an HTTP response?  Well, at least we won't understand it. */
135        return SERF_ERROR_BAD_HTTP_RESPONSE;
136    }
137
138    ctx->sl.version = SERF_HTTP_VERSION(ctx->linebuf.line[5] - '0',
139                                        ctx->linebuf.line[7] - '0');
140    ctx->sl.code = apr_strtoi64(ctx->linebuf.line + 8, &reason, 10);
141
142    /* Skip leading spaces for the reason string. */
143    if (apr_isspace(*reason)) {
144        reason++;
145    }
146
147    /* Copy the reason value out of the line buffer. */
148    ctx->sl.reason = serf_bstrmemdup(allocator, reason,
149                                     ctx->linebuf.used
150                                     - (reason - ctx->linebuf.line));
151
152    return APR_SUCCESS;
153}
154
155/* This code should be replaced with header buckets. */
156static apr_status_t fetch_headers(serf_bucket_t *bkt, response_context_t *ctx)
157{
158    apr_status_t status;
159
160    /* RFC 2616 says that CRLF is the only line ending, but we can easily
161     * accept any kind of line ending.
162     */
163    status = fetch_line(ctx, SERF_NEWLINE_ANY);
164    if (SERF_BUCKET_READ_ERROR(status)) {
165        return status;
166    }
167    /* Something was read. Process it. */
168
169    if (ctx->linebuf.state == SERF_LINEBUF_READY && ctx->linebuf.used) {
170        const char *end_key;
171        const char *c;
172
173        end_key = c = memchr(ctx->linebuf.line, ':', ctx->linebuf.used);
174        if (!c) {
175            /* Bad headers? */
176            return SERF_ERROR_BAD_HTTP_RESPONSE;
177        }
178
179        /* Skip over initial ':' */
180        c++;
181
182        /* And skip all whitespaces. */
183        for(; c < ctx->linebuf.line + ctx->linebuf.used; c++)
184        {
185            if (!apr_isspace(*c))
186            {
187              break;
188            }
189        }
190
191        /* Always copy the headers (from the linebuf into new mem). */
192        /* ### we should be able to optimize some mem copies */
193        serf_bucket_headers_setx(
194            ctx->headers,
195            ctx->linebuf.line, end_key - ctx->linebuf.line, 1,
196            c, ctx->linebuf.line + ctx->linebuf.used - c, 1);
197    }
198
199    return status;
200}
201
202/* Perform one iteration of the state machine.
203 *
204 * Will return when one the following conditions occurred:
205 *  1) a state change
206 *  2) an error
207 *  3) the stream is not ready or at EOF
208 *  4) APR_SUCCESS, meaning the machine can be run again immediately
209 */
210static apr_status_t run_machine(serf_bucket_t *bkt, response_context_t *ctx)
211{
212    apr_status_t status = APR_SUCCESS; /* initialize to avoid gcc warnings */
213
214    switch (ctx->state) {
215    case STATE_STATUS_LINE:
216        /* RFC 2616 says that CRLF is the only line ending, but we can easily
217         * accept any kind of line ending.
218         */
219        status = fetch_line(ctx, SERF_NEWLINE_ANY);
220        if (SERF_BUCKET_READ_ERROR(status))
221            return status;
222
223        if (ctx->linebuf.state == SERF_LINEBUF_READY) {
224            /* The Status-Line is in the line buffer. Process it. */
225            status = parse_status_line(ctx, bkt->allocator);
226            if (status)
227                return status;
228
229            /* Good times ahead: we're switching protocols! */
230            if (ctx->sl.code == 101) {
231                ctx->body =
232                    serf_bucket_barrier_create(ctx->stream, bkt->allocator);
233                ctx->state = STATE_DONE;
234                break;
235            }
236
237            /* Okay... move on to reading the headers. */
238            ctx->state = STATE_HEADERS;
239        }
240        else {
241            /* The connection closed before we could get the next
242             * response.  Treat the request as lost so that our upper
243             * end knows the server never tried to give us a response.
244             */
245            if (APR_STATUS_IS_EOF(status)) {
246                return SERF_ERROR_REQUEST_LOST;
247            }
248        }
249        break;
250    case STATE_HEADERS:
251        status = fetch_headers(bkt, ctx);
252        if (SERF_BUCKET_READ_ERROR(status))
253            return status;
254
255        /* If an empty line was read, then we hit the end of the headers.
256         * Move on to the body.
257         */
258        if (ctx->linebuf.state == SERF_LINEBUF_READY && !ctx->linebuf.used) {
259            const void *v;
260
261            /* Advance the state. */
262            ctx->state = STATE_BODY;
263
264            /* If this is a response to a HEAD request, or code == 1xx,204 or304
265               then we don't receive a real body. */
266            if (!expect_body(ctx)) {
267                ctx->body = serf_bucket_simple_create(NULL, 0, NULL, NULL,
268                                                      bkt->allocator);
269                ctx->state = STATE_BODY;
270                break;
271            }
272
273            ctx->body =
274                serf_bucket_barrier_create(ctx->stream, bkt->allocator);
275
276            /* Are we C-L, chunked, or conn close? */
277            v = serf_bucket_headers_get(ctx->headers, "Content-Length");
278            if (v) {
279                apr_uint64_t length;
280                length = apr_strtoi64(v, NULL, 10);
281                if (errno == ERANGE) {
282                    return APR_FROM_OS_ERROR(ERANGE);
283                }
284                ctx->body = serf_bucket_response_body_create(
285                              ctx->body, length, bkt->allocator);
286            }
287            else {
288                v = serf_bucket_headers_get(ctx->headers, "Transfer-Encoding");
289
290                /* Need to handle multiple transfer-encoding. */
291                if (v && strcasecmp("chunked", v) == 0) {
292                    ctx->chunked = 1;
293                    ctx->body = serf_bucket_dechunk_create(ctx->body,
294                                                           bkt->allocator);
295                }
296            }
297            v = serf_bucket_headers_get(ctx->headers, "Content-Encoding");
298            if (v) {
299                /* Need to handle multiple content-encoding. */
300                if (v && strcasecmp("gzip", v) == 0) {
301                    ctx->body =
302                        serf_bucket_deflate_create(ctx->body, bkt->allocator,
303                                                   SERF_DEFLATE_GZIP);
304                }
305                else if (v && strcasecmp("deflate", v) == 0) {
306                    ctx->body =
307                        serf_bucket_deflate_create(ctx->body, bkt->allocator,
308                                                   SERF_DEFLATE_DEFLATE);
309                }
310            }
311        }
312        break;
313    case STATE_BODY:
314        /* Don't do anything. */
315        break;
316    case STATE_TRAILERS:
317        status = fetch_headers(bkt, ctx);
318        if (SERF_BUCKET_READ_ERROR(status))
319            return status;
320
321        /* If an empty line was read, then we're done. */
322        if (ctx->linebuf.state == SERF_LINEBUF_READY && !ctx->linebuf.used) {
323            ctx->state = STATE_DONE;
324            return APR_EOF;
325        }
326        break;
327    case STATE_DONE:
328        return APR_EOF;
329    default:
330        /* Not reachable */
331        return APR_EGENERAL;
332    }
333
334    return status;
335}
336
337static apr_status_t wait_for_body(serf_bucket_t *bkt, response_context_t *ctx)
338{
339    apr_status_t status;
340
341    /* Keep reading and moving through states if we aren't at the BODY */
342    while (ctx->state != STATE_BODY) {
343        status = run_machine(bkt, ctx);
344
345        /* Anything other than APR_SUCCESS means that we cannot immediately
346         * read again (for now).
347         */
348        if (status)
349            return status;
350    }
351    /* in STATE_BODY */
352
353    return APR_SUCCESS;
354}
355
356apr_status_t serf_bucket_response_wait_for_headers(
357    serf_bucket_t *bucket)
358{
359    response_context_t *ctx = bucket->data;
360
361    return wait_for_body(bucket, ctx);
362}
363
364apr_status_t serf_bucket_response_status(
365    serf_bucket_t *bkt,
366    serf_status_line *sline)
367{
368    response_context_t *ctx = bkt->data;
369    apr_status_t status;
370
371    if (ctx->state != STATE_STATUS_LINE) {
372        /* We already read it and moved on. Just return it. */
373        *sline = ctx->sl;
374        return APR_SUCCESS;
375    }
376
377    /* Running the state machine once will advance the machine, or state
378     * that the stream isn't ready with enough data. There isn't ever a
379     * need to run the machine more than once to try and satisfy this. We
380     * have to look at the state to tell whether it advanced, though, as
381     * it is quite possible to advance *and* to return APR_EAGAIN.
382     */
383    status = run_machine(bkt, ctx);
384    if (ctx->state == STATE_HEADERS) {
385        *sline = ctx->sl;
386    }
387    else {
388        /* Indicate that we don't have the information yet. */
389        sline->version = 0;
390    }
391
392    return status;
393}
394
395static apr_status_t serf_response_read(serf_bucket_t *bucket,
396                                       apr_size_t requested,
397                                       const char **data, apr_size_t *len)
398{
399    response_context_t *ctx = bucket->data;
400    apr_status_t rv;
401
402    rv = wait_for_body(bucket, ctx);
403    if (rv) {
404        /* It's not possible to have read anything yet! */
405        if (APR_STATUS_IS_EOF(rv) || APR_STATUS_IS_EAGAIN(rv)) {
406            *len = 0;
407        }
408        return rv;
409    }
410
411    rv = serf_bucket_read(ctx->body, requested, data, len);
412    if (SERF_BUCKET_READ_ERROR(rv))
413        return rv;
414
415    if (APR_STATUS_IS_EOF(rv)) {
416        if (ctx->chunked) {
417            ctx->state = STATE_TRAILERS;
418            /* Mask the result. */
419            rv = APR_SUCCESS;
420        } else {
421            ctx->state = STATE_DONE;
422        }
423    }
424    return rv;
425}
426
427static apr_status_t serf_response_readline(serf_bucket_t *bucket,
428                                           int acceptable, int *found,
429                                           const char **data, apr_size_t *len)
430{
431    response_context_t *ctx = bucket->data;
432    apr_status_t rv;
433
434    rv = wait_for_body(bucket, ctx);
435    if (rv) {
436        return rv;
437    }
438
439    /* Delegate to the stream bucket to do the readline. */
440    return serf_bucket_readline(ctx->body, acceptable, found, data, len);
441}
442
443apr_status_t serf_response_full_become_aggregate(serf_bucket_t *bucket)
444{
445    response_context_t *ctx = bucket->data;
446    serf_bucket_t *bkt;
447    char buf[256];
448    int size;
449
450    serf_bucket_aggregate_become(bucket);
451
452    /* Add reconstructed status line. */
453    size = apr_snprintf(buf, 256, "HTTP/%d.%d %d ",
454                        SERF_HTTP_VERSION_MAJOR(ctx->sl.version),
455                        SERF_HTTP_VERSION_MINOR(ctx->sl.version),
456                        ctx->sl.code);
457    bkt = serf_bucket_simple_copy_create(buf, size,
458                                         bucket->allocator);
459    serf_bucket_aggregate_append(bucket, bkt);
460    bkt = serf_bucket_simple_copy_create(ctx->sl.reason, strlen(ctx->sl.reason),
461                                         bucket->allocator);
462    serf_bucket_aggregate_append(bucket, bkt);
463    bkt = SERF_BUCKET_SIMPLE_STRING_LEN("\r\n", 2,
464                                        bucket->allocator);
465    serf_bucket_aggregate_append(bucket, bkt);
466
467    /* Add headers and stream buckets in order. */
468    serf_bucket_aggregate_append(bucket, ctx->headers);
469    serf_bucket_aggregate_append(bucket, ctx->stream);
470
471    serf_bucket_mem_free(bucket->allocator, ctx);
472
473    return APR_SUCCESS;
474}
475
476/* ### need to implement */
477#define serf_response_peek NULL
478
479const serf_bucket_type_t serf_bucket_type_response = {
480    "RESPONSE",
481    serf_response_read,
482    serf_response_readline,
483    serf_default_read_iovec,
484    serf_default_read_for_sendfile,
485    serf_default_read_bucket,
486    serf_response_peek,
487    serf_response_destroy_and_data,
488};
489