137535Sdes/*- 2236103Sdes * Copyright (c) 1998-2004 Dag-Erling Sm��rgrav 337535Sdes * All rights reserved. 437535Sdes * 537535Sdes * Redistribution and use in source and binary forms, with or without 637535Sdes * modification, are permitted provided that the following conditions 737535Sdes * are met: 837535Sdes * 1. Redistributions of source code must retain the above copyright 937535Sdes * notice, this list of conditions and the following disclaimer 1037535Sdes * in this position and unchanged. 1137535Sdes * 2. Redistributions in binary form must reproduce the above copyright 1237535Sdes * notice, this list of conditions and the following disclaimer in the 1337535Sdes * documentation and/or other materials provided with the distribution. 1437535Sdes * 3. The name of the author may not be used to endorse or promote products 1537535Sdes * derived from this software without specific prior written permission 1637535Sdes * 1737535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1837535Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1937535Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2037535Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2137535Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2237535Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2337535Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2437535Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2537535Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2637535Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2737535Sdes */ 2837535Sdes 2984203Sdillon#include <sys/cdefs.h> 3084203Sdillon__FBSDID("$FreeBSD$"); 3184203Sdillon 3237535Sdes#include <sys/param.h> 3340975Sdes#include <sys/errno.h> 3437535Sdes 3537535Sdes#include <ctype.h> 3637535Sdes#include <stdio.h> 3737535Sdes#include <stdlib.h> 3837535Sdes#include <string.h> 3937535Sdes 4037535Sdes#include "fetch.h" 4140975Sdes#include "common.h" 4237535Sdes 4377238Sdesauth_t fetchAuthMethod; 4460924Sdesint fetchLastErrCode; 4560924Sdeschar fetchLastErrString[MAXERRSTRING]; 4660924Sdesint fetchTimeout; 4763334Sdesint fetchRestartCalls = 1; 4887560Sdesint fetchDebug; 4941862Sdes 5041862Sdes 5140975Sdes/*** Local data **************************************************************/ 5237535Sdes 5340939Sdes/* 5440975Sdes * Error messages for parser errors 5540975Sdes */ 5640975Sdes#define URL_MALFORMED 1 5740975Sdes#define URL_BAD_SCHEME 2 5840975Sdes#define URL_BAD_PORT 3 59174588Sdesstatic struct fetcherr url_errlist[] = { 6090267Sdes { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 6190267Sdes { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 6290267Sdes { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 6390267Sdes { -1, FETCH_UNKNOWN, "Unknown parser error" } 6440975Sdes}; 6540975Sdes 6640975Sdes 6740975Sdes/*** Public API **************************************************************/ 6840975Sdes 6940975Sdes/* 7040939Sdes * Select the appropriate protocol for the URL scheme, and return a 7140939Sdes * read-only stream connected to the document referenced by the URL. 7263340Sdes * Also fill out the struct url_stat. 7340939Sdes */ 7438394SdesFILE * 7575891SarchiefetchXGet(struct url *URL, struct url_stat *us, const char *flags) 7638394Sdes{ 7760587Sume 78109694Sdes if (us != NULL) { 79109694Sdes us->size = -1; 80109694Sdes us->atime = us->mtime = 0; 81109694Sdes } 8290267Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 8390267Sdes return (fetchXGetFile(URL, us, flags)); 8497868Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 8597868Sdes return (fetchXGetFTP(URL, us, flags)); 8690267Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 8790267Sdes return (fetchXGetHTTP(URL, us, flags)); 8897868Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 8997868Sdes return (fetchXGetHTTP(URL, us, flags)); 90174588Sdes url_seterr(URL_BAD_SCHEME); 9197868Sdes return (NULL); 9238394Sdes} 9338394Sdes 9440939Sdes/* 9540939Sdes * Select the appropriate protocol for the URL scheme, and return a 9663340Sdes * read-only stream connected to the document referenced by the URL. 9763340Sdes */ 9863340SdesFILE * 9975891SarchiefetchGet(struct url *URL, const char *flags) 10063340Sdes{ 10190267Sdes return (fetchXGet(URL, NULL, flags)); 10263340Sdes} 10363340Sdes 10463340Sdes/* 10563340Sdes * Select the appropriate protocol for the URL scheme, and return a 10640939Sdes * write-only stream connected to the document referenced by the URL. 10740939Sdes */ 10838394SdesFILE * 10975891SarchiefetchPut(struct url *URL, const char *flags) 11038394Sdes{ 11160587Sume 11290267Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 11390267Sdes return (fetchPutFile(URL, flags)); 11497868Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 11597868Sdes return (fetchPutFTP(URL, flags)); 11690267Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 11790267Sdes return (fetchPutHTTP(URL, flags)); 11897868Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 11997868Sdes return (fetchPutHTTP(URL, flags)); 120174588Sdes url_seterr(URL_BAD_SCHEME); 12197868Sdes return (NULL); 12238394Sdes} 12338394Sdes 12440939Sdes/* 12540975Sdes * Select the appropriate protocol for the URL scheme, and return the 12640975Sdes * size of the document referenced by the URL if it exists. 12740975Sdes */ 12840975Sdesint 12975891SarchiefetchStat(struct url *URL, struct url_stat *us, const char *flags) 13040975Sdes{ 13160587Sume 132109694Sdes if (us != NULL) { 133109694Sdes us->size = -1; 134109694Sdes us->atime = us->mtime = 0; 135109694Sdes } 13690267Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 13790267Sdes return (fetchStatFile(URL, us, flags)); 13897868Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 13997868Sdes return (fetchStatFTP(URL, us, flags)); 14090267Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 14190267Sdes return (fetchStatHTTP(URL, us, flags)); 14297868Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 14397868Sdes return (fetchStatHTTP(URL, us, flags)); 144174588Sdes url_seterr(URL_BAD_SCHEME); 14590267Sdes return (-1); 14640975Sdes} 14740975Sdes 14840975Sdes/* 14941989Sdes * Select the appropriate protocol for the URL scheme, and return a 15041989Sdes * list of files in the directory pointed to by the URL. 15141989Sdes */ 15241989Sdesstruct url_ent * 15375891SarchiefetchList(struct url *URL, const char *flags) 15441989Sdes{ 15560587Sume 15690267Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 15790267Sdes return (fetchListFile(URL, flags)); 15897868Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 15997868Sdes return (fetchListFTP(URL, flags)); 16090267Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 16190267Sdes return (fetchListHTTP(URL, flags)); 16297868Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 16397868Sdes return (fetchListHTTP(URL, flags)); 164174588Sdes url_seterr(URL_BAD_SCHEME); 16590267Sdes return (NULL); 16641989Sdes} 16741989Sdes 16841989Sdes/* 16963340Sdes * Attempt to parse the given URL; if successful, call fetchXGet(). 17040939Sdes */ 17137535SdesFILE * 17275891SarchiefetchXGetURL(const char *URL, struct url_stat *us, const char *flags) 17337535Sdes{ 17490267Sdes struct url *u; 17590267Sdes FILE *f; 17638394Sdes 17790267Sdes if ((u = fetchParseURL(URL)) == NULL) 17890267Sdes return (NULL); 17990267Sdes 18090267Sdes f = fetchXGet(u, us, flags); 18190267Sdes 18290267Sdes fetchFreeURL(u); 18390267Sdes return (f); 18437535Sdes} 18537535Sdes 18663340Sdes/* 18763340Sdes * Attempt to parse the given URL; if successful, call fetchGet(). 18863340Sdes */ 18963340SdesFILE * 19075891SarchiefetchGetURL(const char *URL, const char *flags) 19163340Sdes{ 19290267Sdes return (fetchXGetURL(URL, NULL, flags)); 19363340Sdes} 19437535Sdes 19540939Sdes/* 19640939Sdes * Attempt to parse the given URL; if successful, call fetchPut(). 19740939Sdes */ 19837535SdesFILE * 19975891SarchiefetchPutURL(const char *URL, const char *flags) 20037535Sdes{ 20190267Sdes struct url *u; 20290267Sdes FILE *f; 20390267Sdes 20490267Sdes if ((u = fetchParseURL(URL)) == NULL) 20590267Sdes return (NULL); 20690267Sdes 20790267Sdes f = fetchPut(u, flags); 20890267Sdes 20990267Sdes fetchFreeURL(u); 21090267Sdes return (f); 21137535Sdes} 21237535Sdes 21337535Sdes/* 21440975Sdes * Attempt to parse the given URL; if successful, call fetchStat(). 21540975Sdes */ 21640975Sdesint 21775891SarchiefetchStatURL(const char *URL, struct url_stat *us, const char *flags) 21840975Sdes{ 21990267Sdes struct url *u; 22090267Sdes int s; 22140975Sdes 22290267Sdes if ((u = fetchParseURL(URL)) == NULL) 22390267Sdes return (-1); 22440975Sdes 22590267Sdes s = fetchStat(u, us, flags); 22640975Sdes 22790267Sdes fetchFreeURL(u); 22890267Sdes return (s); 22940975Sdes} 23040975Sdes 23140975Sdes/* 23241989Sdes * Attempt to parse the given URL; if successful, call fetchList(). 23341989Sdes */ 23441989Sdesstruct url_ent * 23575891SarchiefetchListURL(const char *URL, const char *flags) 23641989Sdes{ 23790267Sdes struct url *u; 23890267Sdes struct url_ent *ue; 23941989Sdes 24090267Sdes if ((u = fetchParseURL(URL)) == NULL) 24190267Sdes return (NULL); 24241989Sdes 24390267Sdes ue = fetchList(u, flags); 24441989Sdes 24590267Sdes fetchFreeURL(u); 24690267Sdes return (ue); 24741989Sdes} 24841989Sdes 24941989Sdes/* 25060927Sdes * Make a URL 25160927Sdes */ 25260927Sdesstruct url * 25375891SarchiefetchMakeURL(const char *scheme, const char *host, int port, const char *doc, 25475891Sarchie const char *user, const char *pwd) 25560927Sdes{ 25690267Sdes struct url *u; 25760927Sdes 25890267Sdes if (!scheme || (!host && !doc)) { 259174588Sdes url_seterr(URL_MALFORMED); 26090267Sdes return (NULL); 26190267Sdes } 26260927Sdes 26390267Sdes if (port < 0 || port > 65535) { 264174588Sdes url_seterr(URL_BAD_PORT); 26590267Sdes return (NULL); 26690267Sdes } 26790267Sdes 26890267Sdes /* allocate struct url */ 269109967Sdes if ((u = calloc(1, sizeof(*u))) == NULL) { 270174588Sdes fetch_syserr(); 27190267Sdes return (NULL); 27290267Sdes } 27390267Sdes 27490267Sdes if ((u->doc = strdup(doc ? doc : "/")) == NULL) { 275174588Sdes fetch_syserr(); 27690267Sdes free(u); 27790267Sdes return (NULL); 27890267Sdes } 27990267Sdes 280109967Sdes#define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x) 28190267Sdes seturl(scheme); 28290267Sdes seturl(host); 28390267Sdes seturl(user); 28490267Sdes seturl(pwd); 28560927Sdes#undef seturl 28690267Sdes u->port = port; 28760927Sdes 28890267Sdes return (u); 28960927Sdes} 29060927Sdes 29160927Sdes/* 292235253Semaste * Return value of the given hex digit. 293235253Semaste */ 294235253Semastestatic int 295235253Semastefetch_hexval(char ch) 296235253Semaste{ 297235253Semaste 298235253Semaste if (ch >= '0' && ch <= '9') 299235253Semaste return (ch - '0'); 300235253Semaste else if (ch >= 'a' && ch <= 'f') 301235253Semaste return (ch - 'a' + 10); 302235253Semaste else if (ch >= 'A' && ch <= 'F') 303235253Semaste return (ch - 'A' + 10); 304235253Semaste return (-1); 305235253Semaste} 306235253Semaste 307235253Semaste/* 308235253Semaste * Decode percent-encoded URL component from src into dst, stopping at end 309235253Semaste * of string, or at @ or : separators. Returns a pointer to the unhandled 310235253Semaste * part of the input string (null terminator, @, or :). No terminator is 311235253Semaste * written to dst (it is the caller's responsibility). 312235253Semaste */ 313235253Semastestatic const char * 314235253Semastefetch_pctdecode(char *dst, const char *src, size_t dlen) 315235253Semaste{ 316235253Semaste int d1, d2; 317235253Semaste char c; 318235253Semaste const char *s; 319235253Semaste 320235253Semaste for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) { 321235253Semaste if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 && 322235253Semaste (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) { 323235253Semaste c = d1 << 4 | d2; 324235253Semaste s += 2; 325235253Semaste } else { 326235253Semaste c = *s; 327235253Semaste } 328235253Semaste if (dlen-- > 0) 329235253Semaste *dst++ = c; 330235253Semaste } 331235253Semaste return (s); 332235253Semaste} 333235253Semaste 334235253Semaste/* 33537535Sdes * Split an URL into components. URL syntax is: 33667042Sdes * [method:/][/[user[:pwd]@]host[:port]/][document] 33737535Sdes * This almost, but not quite, RFC1738 URL syntax. 33837535Sdes */ 33940975Sdesstruct url * 34075891SarchiefetchParseURL(const char *URL) 34137535Sdes{ 34290267Sdes char *doc; 34390267Sdes const char *p, *q; 34490267Sdes struct url *u; 34590267Sdes int i; 34637535Sdes 34790267Sdes /* allocate struct url */ 348109967Sdes if ((u = calloc(1, sizeof(*u))) == NULL) { 349174588Sdes fetch_syserr(); 35090267Sdes return (NULL); 35190267Sdes } 35237535Sdes 35390267Sdes /* scheme name */ 35490267Sdes if ((p = strstr(URL, ":/"))) { 35590267Sdes snprintf(u->scheme, URL_SCHEMELEN+1, 35690267Sdes "%.*s", (int)(p - URL), URL); 35790267Sdes URL = ++p; 35890267Sdes /* 35990267Sdes * Only one slash: no host, leave slash as part of document 36090267Sdes * Two slashes: host follows, strip slashes 36190267Sdes */ 36290267Sdes if (URL[1] == '/') 36390267Sdes URL = (p += 2); 36490267Sdes } else { 36590267Sdes p = URL; 36690267Sdes } 36790267Sdes if (!*URL || *URL == '/' || *URL == '.' || 36890267Sdes (u->scheme[0] == '\0' && 36990267Sdes strchr(URL, '/') == NULL && strchr(URL, ':') == NULL)) 37090267Sdes goto nohost; 37137535Sdes 37290267Sdes p = strpbrk(URL, "/@"); 37390267Sdes if (p && *p == '@') { 37490267Sdes /* username */ 375235253Semaste q = fetch_pctdecode(u->user, URL, URL_USERLEN); 37679423Sdes 37790267Sdes /* password */ 37890267Sdes if (*q == ':') 379253152Sdes q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN); 38090267Sdes 38190267Sdes p++; 38290267Sdes } else { 38390267Sdes p = URL; 38490267Sdes } 38590267Sdes 38690267Sdes /* hostname */ 38760737Sume#ifdef INET6 38890267Sdes if (*p == '[' && (q = strchr(p + 1, ']')) != NULL && 38990267Sdes (*++q == '\0' || *q == '/' || *q == ':')) { 39090267Sdes if ((i = q - p - 2) > MAXHOSTNAMELEN) 39190267Sdes i = MAXHOSTNAMELEN; 39290267Sdes strncpy(u->host, ++p, i); 39390267Sdes p = q; 39490267Sdes } else 39560737Sume#endif 39690267Sdes for (i = 0; *p && (*p != '/') && (*p != ':'); p++) 39790267Sdes if (i < MAXHOSTNAMELEN) 39890267Sdes u->host[i++] = *p; 39937535Sdes 40090267Sdes /* port */ 40190267Sdes if (*p == ':') { 40290267Sdes for (q = ++p; *q && (*q != '/'); q++) 403174761Sdes if (isdigit((unsigned char)*q)) 40490267Sdes u->port = u->port * 10 + (*q - '0'); 40590267Sdes else { 40690267Sdes /* invalid port */ 407174588Sdes url_seterr(URL_BAD_PORT); 40890267Sdes goto ouch; 40990267Sdes } 41090267Sdes p = q; 41190267Sdes } 41237535Sdes 41337535Sdesnohost: 41490267Sdes /* document */ 41590267Sdes if (!*p) 41690267Sdes p = "/"; 41767419Sdes 41890267Sdes if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 || 41990267Sdes strcasecmp(u->scheme, SCHEME_HTTPS) == 0) { 42090267Sdes const char hexnums[] = "0123456789abcdef"; 42190267Sdes 42290267Sdes /* percent-escape whitespace. */ 42390267Sdes if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) { 424174588Sdes fetch_syserr(); 42590267Sdes goto ouch; 42690267Sdes } 42790267Sdes u->doc = doc; 42890267Sdes while (*p != '\0') { 429174761Sdes if (!isspace((unsigned char)*p)) { 43090267Sdes *doc++ = *p++; 43190267Sdes } else { 43290267Sdes *doc++ = '%'; 43390267Sdes *doc++ = hexnums[((unsigned int)*p) >> 4]; 43490267Sdes *doc++ = hexnums[((unsigned int)*p) & 0xf]; 43590267Sdes p++; 43690267Sdes } 43790267Sdes } 43890267Sdes *doc = '\0'; 43990267Sdes } else if ((u->doc = strdup(p)) == NULL) { 440174588Sdes fetch_syserr(); 44190267Sdes goto ouch; 44267419Sdes } 44390267Sdes 44490267Sdes DEBUG(fprintf(stderr, 44588769Sdes "scheme: [%s]\n" 44688769Sdes "user: [%s]\n" 44788769Sdes "password: [%s]\n" 44888769Sdes "host: [%s]\n" 44988769Sdes "port: [%d]\n" 45088769Sdes "document: [%s]\n", 45137535Sdes u->scheme, u->user, u->pwd, 45237535Sdes u->host, u->port, u->doc)); 45337535Sdes 45490267Sdes return (u); 45590267Sdes 45637535Sdesouch: 45790267Sdes free(u); 45890267Sdes return (NULL); 45937535Sdes} 46060376Sdes 46160376Sdes/* 46260376Sdes * Free a URL 46360376Sdes */ 46460376Sdesvoid 46560376SdesfetchFreeURL(struct url *u) 46660376Sdes{ 46790267Sdes free(u->doc); 46890267Sdes free(u); 46960376Sdes} 470