http.c revision 40975
1243730Srwatson/*- 2243730Srwatson * Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav 3243730Srwatson * All rights reserved. 4243730Srwatson * 5243730Srwatson * Redistribution and use in source and binary forms, with or without 6243730Srwatson * modification, are permitted provided that the following conditions 7243730Srwatson * are met: 8243730Srwatson * 1. Redistributions of source code must retain the above copyright 9243730Srwatson * notice, this list of conditions and the following disclaimer 10243730Srwatson * in this position and unchanged. 11243730Srwatson * 2. Redistributions in binary form must reproduce the above copyright 12243730Srwatson * notice, this list of conditions and the following disclaimer in the 13243730Srwatson * documentation and/or other materials provided with the distribution. 14243730Srwatson * 3. The name of the author may not be used to endorse or promote products 15243730Srwatson * derived from this software without specific prior written permission 16243730Srwatson * 17243730Srwatson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18243730Srwatson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19243730Srwatson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20243730Srwatson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21243730Srwatson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22243730Srwatson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23243730Srwatson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24243730Srwatson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25243730Srwatson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26243730Srwatson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27243730Srwatson * 28243730Srwatson * $Id: http.c,v 1.6 1998/11/05 19:48:17 des Exp $ 29243734Srwatson */ 30243730Srwatson 31243730Srwatson/* 32243734Srwatson * The base64 code in this file is based on code from MIT fetch, which 33243730Srwatson * has the following copyright and license: 34243730Srwatson * 35243730Srwatson *- 36243730Srwatson * Copyright 1997 Massachusetts Institute of Technology 37243730Srwatson * 38243730Srwatson * Permission to use, copy, modify, and distribute this software and 39243730Srwatson * its documentation for any purpose and without fee is hereby 40243730Srwatson * granted, provided that both the above copyright notice and this 41243730Srwatson * permission notice appear in all copies, that both the above 42243730Srwatson * copyright notice and this permission notice appear in all 43243730Srwatson * supporting documentation, and that the name of M.I.T. not be used 44243730Srwatson * in advertising or publicity pertaining to distribution of the 45243730Srwatson * software without specific, written prior permission. M.I.T. makes 46243730Srwatson * no representations about the suitability of this software for any 47243730Srwatson * purpose. It is provided "as is" without express or implied 48243730Srwatson * warranty. 49243730Srwatson * 50243730Srwatson * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 51243730Srwatson * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 52243730Srwatson * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 53243730Srwatson * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 54243730Srwatson * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 55243730Srwatson * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 56243730Srwatson * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 57243730Srwatson * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 58243730Srwatson * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 59243730Srwatson * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 60243730Srwatson * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61243730Srwatson * SUCH DAMAGE. */ 62243730Srwatson 63243730Srwatson#include <sys/param.h> 64243730Srwatson#include <sys/errno.h> 65243730Srwatson#include <sys/socket.h> 66243730Srwatson#include <sys/types.h> 67243730Srwatson 68243730Srwatson#include <netinet/in.h> 69243730Srwatson 70243730Srwatson#include <err.h> 71243730Srwatson#include <ctype.h> 72243730Srwatson#include <netdb.h> 73243730Srwatson#include <stdarg.h> 74243730Srwatson#include <stdio.h> 75243730Srwatson#include <stdlib.h> 76243730Srwatson#include <string.h> 77243730Srwatson#include <unistd.h> 78243730Srwatson 79243730Srwatson#include "fetch.h" 80243730Srwatson#include "common.h" 81243730Srwatson#include "httperr.inc" 82243730Srwatson 83243730Srwatson#ifndef NDEBUG 84243730Srwatson#define DEBUG(x) do x; while (0) 85243730Srwatson#else 86243730Srwatson#define DEBUG(x) do { } while (0) 87243730Srwatson#endif 88243730Srwatson 89243730Srwatsonextern char *__progname; 90243730Srwatson 91243730Srwatson#define ENDL "\r\n" 92243730Srwatson 93243730Srwatsonstruct cookie 94243730Srwatson{ 95243730Srwatson FILE *real_f; 96243730Srwatson#define ENC_NONE 0 97243730Srwatson#define ENC_CHUNKED 1 98243730Srwatson int encoding; /* 1 = chunked, 0 = none */ 99243730Srwatson#define HTTPCTYPELEN 59 100243730Srwatson char content_type[HTTPCTYPELEN+1]; 101243730Srwatson char *buf; 102243730Srwatson int b_cur, eof; 103243730Srwatson unsigned b_len, chunksize; 104243730Srwatson}; 105243730Srwatson 106243730Srwatson/* 107243730Srwatson * Send a formatted line; optionally echo to terminal 108243730Srwatson */ 109243730Srwatsonstatic int 110243730Srwatson_http_cmd(FILE *f, char *fmt, ...) 111243730Srwatson{ 112243730Srwatson va_list ap; 113243730Srwatson 114243730Srwatson va_start(ap, fmt); 115243730Srwatson vfprintf(f, fmt, ap); 116243730Srwatson#ifndef NDEBUG 117243730Srwatson fprintf(stderr, "\033[1m>>> "); 118243730Srwatson vfprintf(stderr, fmt, ap); 119243730Srwatson fprintf(stderr, "\033[m"); 120243730Srwatson#endif 121243730Srwatson va_end(ap); 122243730Srwatson 123243730Srwatson return 0; /* XXX */ 124243730Srwatson} 125243730Srwatson 126243730Srwatson/* 127243730Srwatson * Fill the input buffer, do chunk decoding on the fly 128243730Srwatson */ 129243730Srwatsonstatic char * 130243730Srwatson_http_fillbuf(struct cookie *c) 131243730Srwatson{ 132243730Srwatson char *ln; 133243730Srwatson unsigned int len; 134243730Srwatson 135243730Srwatson if (c->eof) 136243730Srwatson return NULL; 137243730Srwatson 138243730Srwatson if (c->encoding == ENC_NONE) { 139243730Srwatson c->buf = fgetln(c->real_f, &(c->b_len)); 140243730Srwatson c->b_cur = 0; 141243730Srwatson } else if (c->encoding == ENC_CHUNKED) { 142243730Srwatson if (c->chunksize == 0) { 143243730Srwatson ln = fgetln(c->real_f, &len); 144243730Srwatson DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): new chunk: " 145243730Srwatson "%*.*s\033[m\n", (int)len-2, (int)len-2, ln)); 146243730Srwatson sscanf(ln, "%x", &(c->chunksize)); 147243730Srwatson if (!c->chunksize) { 148243730Srwatson DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 149243730Srwatson "end of last chunk\033[m\n")); 150243730Srwatson c->eof = 1; 151243730Srwatson return NULL; 152243730Srwatson } 153243730Srwatson DEBUG(fprintf(stderr, "\033[1m_http_fillbuf(): " 154243730Srwatson "new chunk: %X\033[m\n", c->chunksize)); 155243730Srwatson } 156243730Srwatson c->buf = fgetln(c->real_f, &(c->b_len)); 157243730Srwatson if (c->b_len > c->chunksize) 158243730Srwatson c->b_len = c->chunksize; 159243730Srwatson c->chunksize -= c->b_len; 160243730Srwatson c->b_cur = 0; 161243730Srwatson } 162243730Srwatson else return NULL; /* unknown encoding */ 163243730Srwatson return c->buf; 164243730Srwatson} 165243730Srwatson 166243730Srwatson/* 167243730Srwatson * Read function 168243730Srwatson */ 169243730Srwatsonstatic int 170243730Srwatson_http_readfn(struct cookie *c, char *buf, int len) 171243730Srwatson{ 172243730Srwatson int l, pos = 0; 173243730Srwatson while (len) { 174243730Srwatson /* empty buffer */ 175243730Srwatson if (!c->buf || (c->b_cur == c->b_len)) 176243730Srwatson if (!_http_fillbuf(c)) 177243730Srwatson break; 178243730Srwatson 179243730Srwatson l = c->b_len - c->b_cur; 180243730Srwatson if (len < l) l = len; 181243730Srwatson memcpy(buf + pos, c->buf + c->b_cur, l); 182243730Srwatson c->b_cur += l; 183243730Srwatson pos += l; 184243730Srwatson len -= l; 185243730Srwatson } 186243730Srwatson 187243730Srwatson if (ferror(c->real_f)) 188243730Srwatson return -1; 189243730Srwatson else return pos; 190243730Srwatson} 191243730Srwatson 192243730Srwatson/* 193243730Srwatson * Write function 194243730Srwatson */ 195243730Srwatsonstatic int 196243730Srwatson_http_writefn(struct cookie *c, const char *buf, int len) 197243730Srwatson{ 198243730Srwatson size_t r = fwrite(buf, 1, (size_t)len, c->real_f); 199243730Srwatson return r ? r : -1; 200243730Srwatson} 201243730Srwatson 202243730Srwatson/* 203243730Srwatson * Close function 204243730Srwatson */ 205243730Srwatsonstatic int 206243730Srwatson_http_closefn(struct cookie *c) 207243730Srwatson{ 208243730Srwatson int r = fclose(c->real_f); 209243730Srwatson free(c); 210243730Srwatson return (r == EOF) ? -1 : 0; 211243730Srwatson} 212243730Srwatson 213243730Srwatson/* 214243730Srwatson * Extract content type from cookie 215243730Srwatson */ 216243730Srwatsonchar * 217243730SrwatsonfetchContentType(FILE *f) 218243730Srwatson{ 219243730Srwatson /* 220243730Srwatson * We have no way of making sure this really *is* one of our cookies, 221243730Srwatson * so just check for a null pointer and hope for the best. 222243730Srwatson */ 223243730Srwatson return f->_cookie ? (((struct cookie *)f->_cookie)->content_type) : NULL; 224243730Srwatson} 225243730Srwatson 226243730Srwatson/* 227243730Srwatson * Base64 encoding 228243730Srwatson */ 229243730Srwatsonint 230243730Srwatson_http_base64(char *dst, char *src, int l) 231243730Srwatson{ 232243730Srwatson static const char base64[] = 233243730Srwatson "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 234243730Srwatson "abcdefghijklmnopqrstuvwxyz" 235243730Srwatson "0123456789+/"; 236243730Srwatson int t, r = 0; 237243730Srwatson 238243730Srwatson while (l >= 3) { 239243730Srwatson t = (src[0] << 16) | (src[1] << 8) | src[2]; 240243730Srwatson dst[0] = base64[(t >> 18) & 0x3f]; 241243730Srwatson dst[1] = base64[(t >> 12) & 0x3f]; 242243730Srwatson dst[2] = base64[(t >> 6) & 0x3f]; 243243730Srwatson dst[3] = base64[(t >> 0) & 0x3f]; 244243730Srwatson src += 3; l -= 3; 245243730Srwatson dst += 4; r += 4; 246243730Srwatson } 247243730Srwatson 248243730Srwatson switch (l) { 249243730Srwatson case 2: 250243730Srwatson t = (src[0] << 16) | (src[1] << 8); 251243730Srwatson dst[0] = base64[(t >> 18) & 0x3f]; 252243730Srwatson dst[1] = base64[(t >> 12) & 0x3f]; 253243730Srwatson dst[2] = base64[(t >> 6) & 0x3f]; 254243730Srwatson dst[3] = '='; 255243730Srwatson dst += 4; 256243730Srwatson r += 4; 257243730Srwatson break; 258243730Srwatson case 1: 259243730Srwatson t = src[0] << 16; 260243730Srwatson dst[0] = base64[(t >> 18) & 0x3f]; 261243730Srwatson dst[1] = base64[(t >> 12) & 0x3f]; 262243730Srwatson dst[2] = dst[3] = '='; 263243730Srwatson dst += 4; 264243730Srwatson r += 4; 265243730Srwatson break; 266243730Srwatson case 0: 267243730Srwatson break; 268243730Srwatson } 269243730Srwatson 270243730Srwatson *dst = 0; 271243730Srwatson return r; 272243730Srwatson} 273243730Srwatson 274243730Srwatson/* 275243730Srwatson * Encode username and password 276243730Srwatson */ 277243730Srwatsonchar * 278243730Srwatson_http_auth(char *usr, char *pwd) 279243730Srwatson{ 280243730Srwatson int len, lu, lp; 281243730Srwatson char *str, *s; 282243730Srwatson 283243730Srwatson lu = strlen(usr); 284243730Srwatson lp = strlen(pwd); 285243730Srwatson 286243730Srwatson len = (lu * 4 + 2) / 3 /* user name, round up */ 287243730Srwatson + 1 /* colon */ 288243730Srwatson + (lp * 4 + 2) / 3 /* password, round up */ 289243730Srwatson + 1; /* null */ 290243730Srwatson 291243730Srwatson if ((s = str = (char *)malloc(len)) == NULL) 292243730Srwatson return NULL; 293243730Srwatson 294243730Srwatson s += _http_base64(s, usr, lu); 295243730Srwatson *s++ = ':'; 296243730Srwatson s += _http_base64(s, pwd, lp); 297243730Srwatson *s = 0; 298243730Srwatson 299243730Srwatson return str; 300243730Srwatson} 301243730Srwatson 302243730Srwatson/* 303243730Srwatson * Retrieve a file by HTTP 304243730Srwatson */ 305243730SrwatsonFILE * 306243730SrwatsonfetchGetHTTP(struct url *URL, char *flags) 307243730Srwatson{ 308243730Srwatson int sd = -1, err, i, enc = ENC_NONE; 309243730Srwatson struct cookie *c; 310243730Srwatson char *ln, *p, *q; 311243730Srwatson FILE *f, *cf; 312243730Srwatson size_t len; 313243730Srwatson 314243730Srwatson /* allocate cookie */ 315243730Srwatson if ((c = calloc(1, sizeof(struct cookie))) == NULL) 316243730Srwatson return NULL; 317243730Srwatson 318243730Srwatson /* check port */ 319243730Srwatson if (!URL->port) 320243730Srwatson URL->port = 80; /* default HTTP port */ 321243730Srwatson 322243730Srwatson /* attempt to connect to proxy server */ 323243730Srwatson if (getenv("HTTP_PROXY")) { 324243730Srwatson char *px, host[MAXHOSTNAMELEN]; 325243730Srwatson int port = 3128; /* XXX I think 3128 is default... check? */ 326243730Srwatson size_t len; 327243730Srwatson 328243730Srwatson /* measure length */ 329243730Srwatson px = getenv("HTTP_PROXY"); 330243730Srwatson len = strcspn(px, ":"); 331243730Srwatson 332243730Srwatson /* get port (atoi is a little too tolerant perhaps?) */ 333243730Srwatson if (px[len] == ':') 334243730Srwatson port = atoi(px+len+1); 335243730Srwatson 336243730Srwatson /* get host name */ 337243730Srwatson if (len >= MAXHOSTNAMELEN) 338243730Srwatson len = MAXHOSTNAMELEN - 1; 339243730Srwatson strncpy(host, px, len); 340243730Srwatson host[len] = 0; 341243730Srwatson 342243730Srwatson /* connect */ 343243730Srwatson sd = fetchConnect(host, port); 344243730Srwatson } 345243730Srwatson 346243730Srwatson /* if no proxy is configured or could be contacted, try direct */ 347243730Srwatson if (sd == -1) { 348243730Srwatson if ((sd = fetchConnect(URL->host, URL->port)) == -1) 349243730Srwatson goto ouch; 350243730Srwatson } 351243730Srwatson 352243730Srwatson /* reopen as stream */ 353243730Srwatson if ((f = fdopen(sd, "r+")) == NULL) 354243730Srwatson goto ouch; 355243730Srwatson c->real_f = f; 356243730Srwatson 357243730Srwatson /* send request (proxies require absolute form, so use that) */ 358243730Srwatson _http_cmd(f, "GET http://%s:%d%s HTTP/1.1" ENDL, 359243730Srwatson URL->host, URL->port, URL->doc); 360243730Srwatson 361243730Srwatson /* start sending headers away */ 362243730Srwatson if (URL->user[0] || URL->pwd[0]) { 363243730Srwatson char *auth_str = _http_auth(URL->user, URL->pwd); 364243730Srwatson if (!auth_str) 365243730Srwatson goto fouch; 366243730Srwatson _http_cmd(f, "Authorization: Basic %s" ENDL, auth_str); 367243730Srwatson free(auth_str); 368243730Srwatson } 369243730Srwatson _http_cmd(f, "Host: %s:%d" ENDL, URL->host, URL->port); 370243730Srwatson _http_cmd(f, "User-Agent: %s " _LIBFETCH_VER ENDL, __progname); 371243730Srwatson _http_cmd(f, "Connection: close" ENDL ENDL); 372243730Srwatson 373243730Srwatson /* get response */ 374243730Srwatson if ((ln = fgetln(f, &len)) == NULL) 375243730Srwatson goto fouch; 376243730Srwatson DEBUG(fprintf(stderr, "response: [\033[1m%*.*s\033[m]\n", 377243730Srwatson (int)len-2, (int)len-2, ln)); 378243730Srwatson 379243730Srwatson /* we can't use strchr() and friends since ln isn't NUL-terminated */ 380243730Srwatson p = ln; 381243730Srwatson while ((p < ln + len) && !isspace(*p)) 382243730Srwatson p++; 383243730Srwatson while ((p < ln + len) && !isdigit(*p)) 384243730Srwatson p++; 385243730Srwatson if (!isdigit(*p)) 386243730Srwatson goto fouch; 387243730Srwatson err = atoi(p); 388243730Srwatson DEBUG(fprintf(stderr, "code: [\033[1m%d\033[m]\n", err)); 389243730Srwatson 390243730Srwatson /* add code to handle redirects later */ 391243730Srwatson if (err != 200) { 392243730Srwatson _http_seterr(err); 393243730Srwatson goto fouch; 394243730Srwatson } 395243730Srwatson 396243730Srwatson /* browse through header */ 397243730Srwatson while (1) { 398243730Srwatson if ((ln = fgetln(f, &len)) == NULL) 399243730Srwatson goto fouch; 400243730Srwatson if ((ln[0] == '\r') || (ln[0] == '\n')) 401243730Srwatson break; 402243730Srwatson DEBUG(fprintf(stderr, "header: [\033[1m%*.*s\033[m]\n", 403243730Srwatson (int)len-2, (int)len-2, ln)); 404243730Srwatson#define XFERENC "Transfer-Encoding:" 405243730Srwatson if (strncasecmp(ln, XFERENC, sizeof(XFERENC)-1) == 0) { 406243730Srwatson p = ln + sizeof(XFERENC) - 1; 407243730Srwatson while ((p < ln + len) && isspace(*p)) 408243730Srwatson p++; 409243730Srwatson for (q = p; (q < ln + len) && !isspace(*q); q++) 410243730Srwatson /* VOID */ ; 411243730Srwatson *q = 0; 412243730Srwatson if (strcasecmp(p, "chunked") == 0) 413243730Srwatson enc = ENC_CHUNKED; 414243730Srwatson DEBUG(fprintf(stderr, "xferenc: [\033[1m%s\033[m]\n", p)); 415243730Srwatson#undef XFERENC 416243730Srwatson#define CONTTYPE "Content-Type:" 417243730Srwatson } else if (strncasecmp(ln, CONTTYPE, sizeof(CONTTYPE)-1) == 0) { 418243730Srwatson p = ln + sizeof(CONTTYPE) - 1; 419243730Srwatson while ((p < ln + len) && isspace(*p)) 420243730Srwatson p++; 421243730Srwatson for (i = 0; p < ln + len; p++) 422243730Srwatson if (i < HTTPCTYPELEN) 423243730Srwatson c->content_type[i++] = *p; 424243730Srwatson do c->content_type[i--] = 0; while (isspace(c->content_type[i])); 425243730Srwatson DEBUG(fprintf(stderr, "conttype: [\033[1m%s\033[m]\n", 426243730Srwatson c->content_type)); 427243730Srwatson#undef CONTTYPE 428243730Srwatson } 429243730Srwatson } 430243730Srwatson 431243730Srwatson /* only body remains */ 432243730Srwatson c->encoding = enc; 433243730Srwatson cf = funopen(c, 434243730Srwatson (int (*)(void *, char *, int))_http_readfn, 435243730Srwatson (int (*)(void *, const char *, int))_http_writefn, 436243730Srwatson (fpos_t (*)(void *, fpos_t, int))NULL, 437243730Srwatson (int (*)(void *))_http_closefn); 438243730Srwatson if (cf == NULL) 439243730Srwatson goto fouch; 440243730Srwatson return cf; 441243730Srwatson 442243730Srwatsonouch: 443243730Srwatson if (sd >= 0) 444243730Srwatson close(sd); 445243730Srwatson free(c); 446243730Srwatson return NULL; 447243730Srwatsonfouch: 448243730Srwatson fclose(f); 449243730Srwatson free(c); 450243730Srwatson return NULL; 451243730Srwatson} 452243730Srwatson 453243730SrwatsonFILE * 454243730SrwatsonfetchPutHTTP(struct url *URL, char *flags) 455243730Srwatson{ 456243730Srwatson warnx("fetchPutHTTP(): not implemented"); 457243730Srwatson return NULL; 458243730Srwatson} 459243730Srwatson 460243730Srwatson/* 461243730Srwatson * Get an HTTP document's metadata 462243730Srwatson */ 463243730Srwatsonint 464243730SrwatsonfetchStatHTTP(struct url *url, struct url_stat *us, char *flags) 465243730Srwatson{ 466243730Srwatson warnx("fetchStatHTTP(): not implemented"); 467243730Srwatson return -1; 468243730Srwatson} 469243730Srwatson