http.c revision 40975
1243730Srwatson/*-
2243730Srwatson * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
3243730Srwatson * All rights reserved.
4243730Srwatson *
5243730Srwatson * Redistribution and use in source and binary forms, with or without
6243730Srwatson * modification, are permitted provided that the following conditions
7243730Srwatson * are met:
8243730Srwatson * 1. Redistributions of source code must retain the above copyright
9243730Srwatson *    notice, this list of conditions and the following disclaimer
10243730Srwatson *    in this position and unchanged.
11243730Srwatson * 2. Redistributions in binary form must reproduce the above copyright
12243730Srwatson *    notice, this list of conditions and the following disclaimer in the
13243730Srwatson *    documentation and/or other materials provided with the distribution.
14243730Srwatson * 3. The name of the author may not be used to endorse or promote products
15243730Srwatson *    derived from this software without specific prior written permission
16243730Srwatson *
17243730Srwatson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18243730Srwatson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19243730Srwatson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20243730Srwatson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21243730Srwatson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22243730Srwatson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23243730Srwatson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24243730Srwatson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25243730Srwatson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26243730Srwatson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27243730Srwatson *
28243730Srwatson *	$Id: http.c,v 1.6 1998/11/05 19:48:17 des Exp $
29243734Srwatson */
30243730Srwatson
31243730Srwatson/*
32243734Srwatson * The base64 code in this file is based on code from MIT fetch, which
33243730Srwatson * has the following copyright and license:
34243730Srwatson *
35243730Srwatson *-
36243730Srwatson * Copyright 1997 Massachusetts Institute of Technology
37243730Srwatson *
38243730Srwatson * Permission to use, copy, modify, and distribute this software and
39243730Srwatson * its documentation for any purpose and without fee is hereby
40243730Srwatson * granted, provided that both the above copyright notice and this
41243730Srwatson * permission notice appear in all copies, that both the above
42243730Srwatson * copyright notice and this permission notice appear in all
43243730Srwatson * supporting documentation, and that the name of M.I.T. not be used
44243730Srwatson * in advertising or publicity pertaining to distribution of the
45243730Srwatson * software without specific, written prior permission.  M.I.T. makes
46243730Srwatson * no representations about the suitability of this software for any
47243730Srwatson * purpose.  It is provided "as is" without express or implied
48243730Srwatson * warranty.
49243730Srwatson *
50243730Srwatson * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
51243730Srwatson * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
52243730Srwatson * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
53243730Srwatson * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
54243730Srwatson * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55243730Srwatson * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
56243730Srwatson * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
57243730Srwatson * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
58243730Srwatson * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
59243730Srwatson * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
60243730Srwatson * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61243730Srwatson * SUCH DAMAGE. */
62243730Srwatson
63243730Srwatson#include <sys/param.h>
64243730Srwatson#include <sys/errno.h>
65243730Srwatson#include <sys/socket.h>
66243730Srwatson#include <sys/types.h>
67243730Srwatson
68243730Srwatson#include <netinet/in.h>
69243730Srwatson
70243730Srwatson#include <err.h>
71243730Srwatson#include <ctype.h>
72243730Srwatson#include <netdb.h>
73243730Srwatson#include <stdarg.h>
74243730Srwatson#include <stdio.h>
75243730Srwatson#include <stdlib.h>
76243730Srwatson#include <string.h>
77243730Srwatson#include <unistd.h>
78243730Srwatson
79243730Srwatson#include "fetch.h"
80243730Srwatson#include "common.h"
81243730Srwatson#include "httperr.inc"
82243730Srwatson
83243730Srwatson#ifndef NDEBUG
84243730Srwatson#define DEBUG(x) do x; while (0)
85243730Srwatson#else
86243730Srwatson#define DEBUG(x) do { } while (0)
87243730Srwatson#endif
88243730Srwatson
89243730Srwatsonextern char *__progname;
90243730Srwatson
91243730Srwatson#define ENDL "\r\n"
92243730Srwatson
93243730Srwatsonstruct cookie
94243730Srwatson{
95243730Srwatson    FILE *real_f;
96243730Srwatson#define ENC_NONE 0
97243730Srwatson#define ENC_CHUNKED 1
98243730Srwatson    int encoding;			/* 1 = chunked, 0 = none */
99243730Srwatson#define HTTPCTYPELEN 59
100243730Srwatson    char content_type[HTTPCTYPELEN+1];
101243730Srwatson    char *buf;
102243730Srwatson    int b_cur, eof;
103243730Srwatson    unsigned b_len, chunksize;
104243730Srwatson};
105243730Srwatson
106243730Srwatson/*
107243730Srwatson * Send a formatted line; optionally echo to terminal
108243730Srwatson */
109243730Srwatsonstatic int
110243730Srwatson_http_cmd(FILE *f, char *fmt, ...)
111243730Srwatson{
112243730Srwatson    va_list ap;
113243730Srwatson
114243730Srwatson    va_start(ap, fmt);
115243730Srwatson    vfprintf(f, fmt, ap);
116243730Srwatson#ifndef NDEBUG
117243730Srwatson    fprintf(stderr, "\033[1m>>> ");
118243730Srwatson    vfprintf(stderr, fmt, ap);
119243730Srwatson    fprintf(stderr, "\033[m");
120243730Srwatson#endif
121243730Srwatson    va_end(ap);
122243730Srwatson
123243730Srwatson    return 0; /* XXX */
124243730Srwatson}
125243730Srwatson
126243730Srwatson/*
127243730Srwatson * Fill the input buffer, do chunk decoding on the fly
128243730Srwatson */
129243730Srwatsonstatic char *
130243730Srwatson_http_fillbuf(struct cookie *c)
131243730Srwatson{
132243730Srwatson    char *ln;
133243730Srwatson    unsigned int len;
134243730Srwatson
135243730Srwatson    if (c->eof)
136243730Srwatson	return NULL;
137243730Srwatson
138243730Srwatson    if (c->encoding == ENC_NONE) {
139243730Srwatson	c->buf = fgetln(c->real_f, &(c->b_len));
140243730Srwatson	c->b_cur = 0;
141243730Srwatson    } else if (c->encoding == ENC_CHUNKED) {
142243730Srwatson	if (c->chunksize == 0) {
143243730Srwatson	    ln = fgetln(c->real_f, &len);
144243730Srwatson	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: "
145243730Srwatson			  "%*.*s\033[m\n", (int)len-2, (int)len-2, ln));
146243730Srwatson	    sscanf(ln, "%x", &(c->chunksize));
147243730Srwatson	    if (!c->chunksize) {
148243730Srwatson		DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
149243730Srwatson			      "end of last chunk\033[m\n"));
150243730Srwatson		c->eof = 1;
151243730Srwatson		return NULL;
152243730Srwatson	    }
153243730Srwatson	    DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): "
154243730Srwatson			  "new chunk: %X\033[m\n", c->chunksize));
155243730Srwatson	}
156243730Srwatson	c->buf = fgetln(c->real_f, &(c->b_len));
157243730Srwatson	if (c->b_len > c->chunksize)
158243730Srwatson	    c->b_len = c->chunksize;
159243730Srwatson	c->chunksize -= c->b_len;
160243730Srwatson	c->b_cur = 0;
161243730Srwatson    }
162243730Srwatson    else return NULL; /* unknown encoding */
163243730Srwatson    return c->buf;
164243730Srwatson}
165243730Srwatson
166243730Srwatson/*
167243730Srwatson * Read function
168243730Srwatson */
169243730Srwatsonstatic int
170243730Srwatson_http_readfn(struct cookie *c, char *buf, int len)
171243730Srwatson{
172243730Srwatson    int l, pos = 0;
173243730Srwatson    while (len) {
174243730Srwatson	/* empty buffer */
175243730Srwatson	if (!c->buf || (c->b_cur == c->b_len))
176243730Srwatson	    if (!_http_fillbuf(c))
177243730Srwatson		break;
178243730Srwatson
179243730Srwatson	l = c->b_len - c->b_cur;
180243730Srwatson	if (len < l) l = len;
181243730Srwatson	memcpy(buf + pos, c->buf + c->b_cur, l);
182243730Srwatson	c->b_cur += l;
183243730Srwatson	pos += l;
184243730Srwatson	len -= l;
185243730Srwatson    }
186243730Srwatson
187243730Srwatson    if (ferror(c->real_f))
188243730Srwatson	return -1;
189243730Srwatson    else return pos;
190243730Srwatson}
191243730Srwatson
192243730Srwatson/*
193243730Srwatson * Write function
194243730Srwatson */
195243730Srwatsonstatic int
196243730Srwatson_http_writefn(struct cookie *c, const char *buf, int len)
197243730Srwatson{
198243730Srwatson    size_t r = fwrite(buf, 1, (size_t)len, c->real_f);
199243730Srwatson    return r ? r : -1;
200243730Srwatson}
201243730Srwatson
202243730Srwatson/*
203243730Srwatson * Close function
204243730Srwatson */
205243730Srwatsonstatic int
206243730Srwatson_http_closefn(struct cookie *c)
207243730Srwatson{
208243730Srwatson    int r = fclose(c->real_f);
209243730Srwatson    free(c);
210243730Srwatson    return (r == EOF) ? -1 : 0;
211243730Srwatson}
212243730Srwatson
213243730Srwatson/*
214243730Srwatson * Extract content type from cookie
215243730Srwatson */
216243730Srwatsonchar *
217243730SrwatsonfetchContentType(FILE *f)
218243730Srwatson{
219243730Srwatson    /*
220243730Srwatson     * We have no way of making sure this really *is* one of our cookies,
221243730Srwatson     * so just check for a null pointer and hope for the best.
222243730Srwatson     */
223243730Srwatson    return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL;
224243730Srwatson}
225243730Srwatson
226243730Srwatson/*
227243730Srwatson * Base64 encoding
228243730Srwatson */
229243730Srwatsonint
230243730Srwatson_http_base64(char *dst, char *src, int l)
231243730Srwatson{
232243730Srwatson    static const char base64[] =
233243730Srwatson	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
234243730Srwatson	"abcdefghijklmnopqrstuvwxyz"
235243730Srwatson	"0123456789+/";
236243730Srwatson    int t, r = 0;
237243730Srwatson
238243730Srwatson    while (l >= 3) {
239243730Srwatson	t = (src[0] << 16) | (src[1] << 8) | src[2];
240243730Srwatson	dst[0] = base64[(t >> 18) & 0x3f];
241243730Srwatson	dst[1] = base64[(t >> 12) & 0x3f];
242243730Srwatson	dst[2] = base64[(t >> 6) & 0x3f];
243243730Srwatson	dst[3] = base64[(t >> 0) & 0x3f];
244243730Srwatson	src += 3; l -= 3;
245243730Srwatson	dst += 4; r += 4;
246243730Srwatson    }
247243730Srwatson
248243730Srwatson    switch (l) {
249243730Srwatson    case 2:
250243730Srwatson	t = (src[0] << 16) | (src[1] << 8);
251243730Srwatson	dst[0] = base64[(t >> 18) & 0x3f];
252243730Srwatson	dst[1] = base64[(t >> 12) & 0x3f];
253243730Srwatson	dst[2] = base64[(t >> 6) & 0x3f];
254243730Srwatson	dst[3] = '=';
255243730Srwatson	dst += 4;
256243730Srwatson	r += 4;
257243730Srwatson	break;
258243730Srwatson    case 1:
259243730Srwatson	t = src[0] << 16;
260243730Srwatson	dst[0] = base64[(t >> 18) & 0x3f];
261243730Srwatson	dst[1] = base64[(t >> 12) & 0x3f];
262243730Srwatson	dst[2] = dst[3] = '=';
263243730Srwatson	dst += 4;
264243730Srwatson	r += 4;
265243730Srwatson	break;
266243730Srwatson    case 0:
267243730Srwatson	break;
268243730Srwatson    }
269243730Srwatson
270243730Srwatson    *dst = 0;
271243730Srwatson    return r;
272243730Srwatson}
273243730Srwatson
274243730Srwatson/*
275243730Srwatson * Encode username and password
276243730Srwatson */
277243730Srwatsonchar *
278243730Srwatson_http_auth(char *usr, char *pwd)
279243730Srwatson{
280243730Srwatson    int len, lu, lp;
281243730Srwatson    char *str, *s;
282243730Srwatson
283243730Srwatson    lu = strlen(usr);
284243730Srwatson    lp = strlen(pwd);
285243730Srwatson
286243730Srwatson    len = (lu * 4 + 2) / 3	/* user name, round up */
287243730Srwatson	+ 1			/* colon */
288243730Srwatson	+ (lp * 4 + 2) / 3	/* password, round up */
289243730Srwatson	+ 1;			/* null */
290243730Srwatson
291243730Srwatson    if ((s = str = (char *)malloc(len)) == NULL)
292243730Srwatson	return NULL;
293243730Srwatson
294243730Srwatson    s += _http_base64(s, usr, lu);
295243730Srwatson    *s++ = ':';
296243730Srwatson    s += _http_base64(s, pwd, lp);
297243730Srwatson    *s = 0;
298243730Srwatson
299243730Srwatson    return str;
300243730Srwatson}
301243730Srwatson
302243730Srwatson/*
303243730Srwatson * Retrieve a file by HTTP
304243730Srwatson */
305243730SrwatsonFILE *
306243730SrwatsonfetchGetHTTP(struct url *URL, char *flags)
307243730Srwatson{
308243730Srwatson    int sd = -1, err, i, enc = ENC_NONE;
309243730Srwatson    struct cookie *c;
310243730Srwatson    char *ln, *p, *q;
311243730Srwatson    FILE *f, *cf;
312243730Srwatson    size_t len;
313243730Srwatson
314243730Srwatson    /* allocate cookie */
315243730Srwatson    if ((c = calloc(1, sizeof(struct cookie))) == NULL)
316243730Srwatson	return NULL;
317243730Srwatson
318243730Srwatson    /* check port */
319243730Srwatson    if (!URL->port)
320243730Srwatson	URL->port = 80; /* default HTTP port */
321243730Srwatson
322243730Srwatson    /* attempt to connect to proxy server */
323243730Srwatson    if (getenv("HTTP_PROXY")) {
324243730Srwatson	char *px, host[MAXHOSTNAMELEN];
325243730Srwatson	int port = 3128; /* XXX I think 3128 is default... check? */
326243730Srwatson	size_t len;
327243730Srwatson
328243730Srwatson	/* measure length */
329243730Srwatson	px = getenv("HTTP_PROXY");
330243730Srwatson	len = strcspn(px, ":");
331243730Srwatson
332243730Srwatson	/* get port (atoi is a little too tolerant perhaps?) */
333243730Srwatson	if (px[len] == ':')
334243730Srwatson	    port = atoi(px+len+1);
335243730Srwatson
336243730Srwatson	/* get host name */
337243730Srwatson	if (len >= MAXHOSTNAMELEN)
338243730Srwatson	    len = MAXHOSTNAMELEN - 1;
339243730Srwatson	strncpy(host, px, len);
340243730Srwatson	host[len] = 0;
341243730Srwatson
342243730Srwatson	/* connect */
343243730Srwatson	sd = fetchConnect(host, port);
344243730Srwatson    }
345243730Srwatson
346243730Srwatson    /* if no proxy is configured or could be contacted, try direct */
347243730Srwatson    if (sd == -1) {
348243730Srwatson	if ((sd = fetchConnect(URL->host, URL->port)) == -1)
349243730Srwatson	    goto ouch;
350243730Srwatson    }
351243730Srwatson
352243730Srwatson    /* reopen as stream */
353243730Srwatson    if ((f = fdopen(sd, "r+")) == NULL)
354243730Srwatson	goto ouch;
355243730Srwatson    c->real_f = f;
356243730Srwatson
357243730Srwatson    /* send request (proxies require absolute form, so use that) */
358243730Srwatson    _http_cmd(f, "GET http://%s:%d%s HTTP/1.1" ENDL,
359243730Srwatson	      URL->host, URL->port, URL->doc);
360243730Srwatson
361243730Srwatson    /* start sending headers away */
362243730Srwatson    if (URL->user[0] || URL->pwd[0]) {
363243730Srwatson	char *auth_str = _http_auth(URL->user, URL->pwd);
364243730Srwatson	if (!auth_str)
365243730Srwatson	    goto fouch;
366243730Srwatson	_http_cmd(f, "Authorization: Basic %s" ENDL, auth_str);
367243730Srwatson	free(auth_str);
368243730Srwatson    }
369243730Srwatson    _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port);
370243730Srwatson    _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname);
371243730Srwatson    _http_cmd(f, "Connection: close" ENDL ENDL);
372243730Srwatson
373243730Srwatson    /* get response */
374243730Srwatson    if ((ln = fgetln(f, &len)) == NULL)
375243730Srwatson	goto fouch;
376243730Srwatson    DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n",
377243730Srwatson		  (int)len-2, (int)len-2, ln));
378243730Srwatson
379243730Srwatson    /* we can't use strchr() and friends since ln isn't NUL-terminated */
380243730Srwatson    p = ln;
381243730Srwatson    while ((p < ln + len) && !isspace(*p))
382243730Srwatson	p++;
383243730Srwatson    while ((p < ln + len) && !isdigit(*p))
384243730Srwatson	p++;
385243730Srwatson    if (!isdigit(*p))
386243730Srwatson	goto fouch;
387243730Srwatson    err = atoi(p);
388243730Srwatson    DEBUG(fprintf(stderr, "code:     [\033[1m%d\033[m]\n", err));
389243730Srwatson
390243730Srwatson    /* add code to handle redirects later */
391243730Srwatson    if (err != 200) {
392243730Srwatson	_http_seterr(err);
393243730Srwatson	goto fouch;
394243730Srwatson    }
395243730Srwatson
396243730Srwatson    /* browse through header */
397243730Srwatson    while (1) {
398243730Srwatson	if ((ln = fgetln(f, &len)) == NULL)
399243730Srwatson	    goto fouch;
400243730Srwatson	if ((ln[0] == '\r') || (ln[0] == '\n'))
401243730Srwatson	    break;
402243730Srwatson	DEBUG(fprintf(stderr, "header:   [\033[1m%*.*s\033[m]\n",
403243730Srwatson		      (int)len-2, (int)len-2, ln));
404243730Srwatson#define XFERENC "Transfer-Encoding:"
405243730Srwatson	if (strncasecmp(ln, XFERENC, sizeof(XFERENC)-1) == 0) {
406243730Srwatson	    p = ln + sizeof(XFERENC) - 1;
407243730Srwatson	    while ((p < ln + len) && isspace(*p))
408243730Srwatson		p++;
409243730Srwatson	    for (q = p; (q < ln + len) && !isspace(*q); q++)
410243730Srwatson		/* VOID */ ;
411243730Srwatson	    *q = 0;
412243730Srwatson	    if (strcasecmp(p, "chunked") == 0)
413243730Srwatson		enc = ENC_CHUNKED;
414243730Srwatson	    DEBUG(fprintf(stderr, "xferenc:  [\033[1m%s\033[m]\n", p));
415243730Srwatson#undef XFERENC
416243730Srwatson#define CONTTYPE "Content-Type:"
417243730Srwatson	} else if (strncasecmp(ln, CONTTYPE, sizeof(CONTTYPE)-1) == 0) {
418243730Srwatson	    p = ln + sizeof(CONTTYPE) - 1;
419243730Srwatson	    while ((p < ln + len) && isspace(*p))
420243730Srwatson		p++;
421243730Srwatson	    for (i = 0; p < ln + len; p++)
422243730Srwatson		if (i < HTTPCTYPELEN)
423243730Srwatson		    c->content_type[i++] = *p;
424243730Srwatson	    do c->content_type[i--] = 0; while (isspace(c->content_type[i]));
425243730Srwatson	    DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n",
426243730Srwatson			  c->content_type));
427243730Srwatson#undef CONTTYPE
428243730Srwatson	}
429243730Srwatson    }
430243730Srwatson
431243730Srwatson    /* only body remains */
432243730Srwatson    c->encoding = enc;
433243730Srwatson    cf = funopen(c,
434243730Srwatson		 (int (*)(void *, char *, int))_http_readfn,
435243730Srwatson		 (int (*)(void *, const char *, int))_http_writefn,
436243730Srwatson		 (fpos_t (*)(void *, fpos_t, int))NULL,
437243730Srwatson		 (int (*)(void *))_http_closefn);
438243730Srwatson    if (cf == NULL)
439243730Srwatson	goto fouch;
440243730Srwatson    return cf;
441243730Srwatson
442243730Srwatsonouch:
443243730Srwatson    if (sd >= 0)
444243730Srwatson	close(sd);
445243730Srwatson    free(c);
446243730Srwatson    return NULL;
447243730Srwatsonfouch:
448243730Srwatson    fclose(f);
449243730Srwatson    free(c);
450243730Srwatson    return NULL;
451243730Srwatson}
452243730Srwatson
453243730SrwatsonFILE *
454243730SrwatsonfetchPutHTTP(struct url *URL, char *flags)
455243730Srwatson{
456243730Srwatson    warnx("fetchPutHTTP(): not implemented");
457243730Srwatson    return NULL;
458243730Srwatson}
459243730Srwatson
460243730Srwatson/*
461243730Srwatson * Get an HTTP document's metadata
462243730Srwatson */
463243730Srwatsonint
464243730SrwatsonfetchStatHTTP(struct url *url, struct url_stat *us, char *flags)
465243730Srwatson{
466243730Srwatson    warnx("fetchStatHTTP(): not implemented");
467243730Srwatson    return -1;
468243730Srwatson}
469243730Srwatson