137535Sdes/*- 2226537Sdes * Copyright (c) 1998-2004 Dag-Erling Sm��rgrav 337535Sdes * All rights reserved. 437535Sdes * 537535Sdes * Redistribution and use in source and binary forms, with or without 637535Sdes * modification, are permitted provided that the following conditions 737535Sdes * are met: 837535Sdes * 1. Redistributions of source code must retain the above copyright 937535Sdes * notice, this list of conditions and the following disclaimer 1037535Sdes * in this position and unchanged. 1137535Sdes * 2. Redistributions in binary form must reproduce the above copyright 1237535Sdes * notice, this list of conditions and the following disclaimer in the 1337535Sdes * documentation and/or other materials provided with the distribution. 1437535Sdes * 3. The name of the author may not be used to endorse or promote products 1537535Sdes * derived from this software without specific prior written permission 1637535Sdes * 1737535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1837535Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1937535Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2037535Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2137535Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2237535Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2337535Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2437535Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2537535Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2637535Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2737535Sdes */ 2837535Sdes 2984203Sdillon#include <sys/cdefs.h> 3084203Sdillon__FBSDID("$FreeBSD: stable/10/lib/libfetch/fetch.c 357582 2020-02-05 18:26:50Z emaste $"); 3184203Sdillon 3237535Sdes#include <sys/param.h> 3340975Sdes#include <sys/errno.h> 3437535Sdes 3537535Sdes#include <ctype.h> 3637535Sdes#include <stdio.h> 3737535Sdes#include <stdlib.h> 3837535Sdes#include <string.h> 3937535Sdes 4037535Sdes#include "fetch.h" 4140975Sdes#include "common.h" 4237535Sdes 4377238Sdesauth_t fetchAuthMethod; 4460924Sdesint fetchLastErrCode; 4560924Sdeschar fetchLastErrString[MAXERRSTRING]; 4660924Sdesint fetchTimeout; 4763334Sdesint fetchRestartCalls = 1; 4887560Sdesint fetchDebug; 4941862Sdes 5041862Sdes 5140975Sdes/*** Local data **************************************************************/ 5237535Sdes 5340939Sdes/* 5440975Sdes * Error messages for parser errors 5540975Sdes */ 5640975Sdes#define URL_MALFORMED 1 5740975Sdes#define URL_BAD_SCHEME 2 5840975Sdes#define URL_BAD_PORT 3 59174588Sdesstatic struct fetcherr url_errlist[] = { 6090267Sdes { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 6190267Sdes { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 6290267Sdes { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 6390267Sdes { -1, FETCH_UNKNOWN, "Unknown parser error" } 6440975Sdes}; 6540975Sdes 6640975Sdes 6740975Sdes/*** Public API **************************************************************/ 6840975Sdes 6940975Sdes/* 7040939Sdes * Select the appropriate protocol for the URL scheme, and return a 7140939Sdes * read-only stream connected to the document referenced by the URL. 7263340Sdes * Also fill out the struct url_stat. 7340939Sdes */ 7438394SdesFILE * 7575891SarchiefetchXGet(struct url *URL, struct url_stat *us, const char *flags) 7638394Sdes{ 7760587Sume 78109694Sdes if (us != NULL) { 79109694Sdes us->size = -1; 80109694Sdes us->atime = us->mtime = 0; 81109694Sdes } 8290267Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 8390267Sdes return (fetchXGetFile(URL, us, flags)); 8497868Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 8597868Sdes return (fetchXGetFTP(URL, us, flags)); 8690267Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 8790267Sdes return (fetchXGetHTTP(URL, us, flags)); 8897868Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 8997868Sdes return (fetchXGetHTTP(URL, us, flags)); 90174588Sdes url_seterr(URL_BAD_SCHEME); 9197868Sdes return (NULL); 9238394Sdes} 9338394Sdes 9440939Sdes/* 9540939Sdes * Select the appropriate protocol for the URL scheme, and return a 9663340Sdes * read-only stream connected to the document referenced by the URL. 9763340Sdes */ 9863340SdesFILE * 9975891SarchiefetchGet(struct url *URL, const char *flags) 10063340Sdes{ 10190267Sdes return (fetchXGet(URL, NULL, flags)); 10263340Sdes} 10363340Sdes 10463340Sdes/* 10563340Sdes * Select the appropriate protocol for the URL scheme, and return a 10640939Sdes * write-only stream connected to the document referenced by the URL. 10740939Sdes */ 10838394SdesFILE * 10975891SarchiefetchPut(struct url *URL, const char *flags) 11038394Sdes{ 11160587Sume 11290267Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 11390267Sdes return (fetchPutFile(URL, flags)); 11497868Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 11597868Sdes return (fetchPutFTP(URL, flags)); 11690267Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 11790267Sdes return (fetchPutHTTP(URL, flags)); 11897868Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 11997868Sdes return (fetchPutHTTP(URL, flags)); 120174588Sdes url_seterr(URL_BAD_SCHEME); 12197868Sdes return (NULL); 12238394Sdes} 12338394Sdes 12440939Sdes/* 12540975Sdes * Select the appropriate protocol for the URL scheme, and return the 12640975Sdes * size of the document referenced by the URL if it exists. 12740975Sdes */ 12840975Sdesint 12975891SarchiefetchStat(struct url *URL, struct url_stat *us, const char *flags) 13040975Sdes{ 13160587Sume 132109694Sdes if (us != NULL) { 133109694Sdes us->size = -1; 134109694Sdes us->atime = us->mtime = 0; 135109694Sdes } 13690267Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 13790267Sdes return (fetchStatFile(URL, us, flags)); 13897868Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 13997868Sdes return (fetchStatFTP(URL, us, flags)); 14090267Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 14190267Sdes return (fetchStatHTTP(URL, us, flags)); 14297868Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 14397868Sdes return (fetchStatHTTP(URL, us, flags)); 144174588Sdes url_seterr(URL_BAD_SCHEME); 14590267Sdes return (-1); 14640975Sdes} 14740975Sdes 14840975Sdes/* 14941989Sdes * Select the appropriate protocol for the URL scheme, and return a 15041989Sdes * list of files in the directory pointed to by the URL. 15141989Sdes */ 15241989Sdesstruct url_ent * 15375891SarchiefetchList(struct url *URL, const char *flags) 15441989Sdes{ 15560587Sume 15690267Sdes if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 15790267Sdes return (fetchListFile(URL, flags)); 15897868Sdes else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 15997868Sdes return (fetchListFTP(URL, flags)); 16090267Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 16190267Sdes return (fetchListHTTP(URL, flags)); 16297868Sdes else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 16397868Sdes return (fetchListHTTP(URL, flags)); 164174588Sdes url_seterr(URL_BAD_SCHEME); 16590267Sdes return (NULL); 16641989Sdes} 16741989Sdes 16841989Sdes/* 16963340Sdes * Attempt to parse the given URL; if successful, call fetchXGet(). 17040939Sdes */ 17137535SdesFILE * 17275891SarchiefetchXGetURL(const char *URL, struct url_stat *us, const char *flags) 17337535Sdes{ 17490267Sdes struct url *u; 17590267Sdes FILE *f; 17638394Sdes 17790267Sdes if ((u = fetchParseURL(URL)) == NULL) 17890267Sdes return (NULL); 17990267Sdes 18090267Sdes f = fetchXGet(u, us, flags); 18190267Sdes 18290267Sdes fetchFreeURL(u); 18390267Sdes return (f); 18437535Sdes} 18537535Sdes 18663340Sdes/* 18763340Sdes * Attempt to parse the given URL; if successful, call fetchGet(). 18863340Sdes */ 18963340SdesFILE * 19075891SarchiefetchGetURL(const char *URL, const char *flags) 19163340Sdes{ 19290267Sdes return (fetchXGetURL(URL, NULL, flags)); 19363340Sdes} 19437535Sdes 19540939Sdes/* 19640939Sdes * Attempt to parse the given URL; if successful, call fetchPut(). 19740939Sdes */ 19837535SdesFILE * 19975891SarchiefetchPutURL(const char *URL, const char *flags) 20037535Sdes{ 20190267Sdes struct url *u; 20290267Sdes FILE *f; 20390267Sdes 20490267Sdes if ((u = fetchParseURL(URL)) == NULL) 20590267Sdes return (NULL); 20690267Sdes 20790267Sdes f = fetchPut(u, flags); 20890267Sdes 20990267Sdes fetchFreeURL(u); 21090267Sdes return (f); 21137535Sdes} 21237535Sdes 21337535Sdes/* 21440975Sdes * Attempt to parse the given URL; if successful, call fetchStat(). 21540975Sdes */ 21640975Sdesint 21775891SarchiefetchStatURL(const char *URL, struct url_stat *us, const char *flags) 21840975Sdes{ 21990267Sdes struct url *u; 22090267Sdes int s; 22140975Sdes 22290267Sdes if ((u = fetchParseURL(URL)) == NULL) 22390267Sdes return (-1); 22440975Sdes 22590267Sdes s = fetchStat(u, us, flags); 22640975Sdes 22790267Sdes fetchFreeURL(u); 22890267Sdes return (s); 22940975Sdes} 23040975Sdes 23140975Sdes/* 23241989Sdes * Attempt to parse the given URL; if successful, call fetchList(). 23341989Sdes */ 23441989Sdesstruct url_ent * 23575891SarchiefetchListURL(const char *URL, const char *flags) 23641989Sdes{ 23790267Sdes struct url *u; 23890267Sdes struct url_ent *ue; 23941989Sdes 24090267Sdes if ((u = fetchParseURL(URL)) == NULL) 24190267Sdes return (NULL); 24241989Sdes 24390267Sdes ue = fetchList(u, flags); 24441989Sdes 24590267Sdes fetchFreeURL(u); 24690267Sdes return (ue); 24741989Sdes} 24841989Sdes 24941989Sdes/* 25060927Sdes * Make a URL 25160927Sdes */ 25260927Sdesstruct url * 25375891SarchiefetchMakeURL(const char *scheme, const char *host, int port, const char *doc, 25475891Sarchie const char *user, const char *pwd) 25560927Sdes{ 25690267Sdes struct url *u; 25760927Sdes 25890267Sdes if (!scheme || (!host && !doc)) { 259174588Sdes url_seterr(URL_MALFORMED); 26090267Sdes return (NULL); 26190267Sdes } 26260927Sdes 26390267Sdes if (port < 0 || port > 65535) { 264174588Sdes url_seterr(URL_BAD_PORT); 26590267Sdes return (NULL); 26690267Sdes } 26790267Sdes 26890267Sdes /* allocate struct url */ 269109967Sdes if ((u = calloc(1, sizeof(*u))) == NULL) { 270174588Sdes fetch_syserr(); 27190267Sdes return (NULL); 27290267Sdes } 27390267Sdes 27490267Sdes if ((u->doc = strdup(doc ? doc : "/")) == NULL) { 275174588Sdes fetch_syserr(); 27690267Sdes free(u); 27790267Sdes return (NULL); 27890267Sdes } 27990267Sdes 280109967Sdes#define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x) 28190267Sdes seturl(scheme); 28290267Sdes seturl(host); 28390267Sdes seturl(user); 28490267Sdes seturl(pwd); 28560927Sdes#undef seturl 28690267Sdes u->port = port; 287315904Sdes u->netrcfd = -2; 28860927Sdes 28990267Sdes return (u); 29060927Sdes} 29160927Sdes 29260927Sdes/* 293234138Semaste * Return value of the given hex digit. 294234138Semaste */ 295234138Semastestatic int 296234138Semastefetch_hexval(char ch) 297234138Semaste{ 298234138Semaste 299234138Semaste if (ch >= '0' && ch <= '9') 300234138Semaste return (ch - '0'); 301234138Semaste else if (ch >= 'a' && ch <= 'f') 302234138Semaste return (ch - 'a' + 10); 303234138Semaste else if (ch >= 'A' && ch <= 'F') 304234138Semaste return (ch - 'A' + 10); 305234138Semaste return (-1); 306234138Semaste} 307234138Semaste 308234138Semaste/* 309234138Semaste * Decode percent-encoded URL component from src into dst, stopping at end 310234138Semaste * of string, or at @ or : separators. Returns a pointer to the unhandled 311234138Semaste * part of the input string (null terminator, @, or :). No terminator is 312234138Semaste * written to dst (it is the caller's responsibility). 313234138Semaste */ 314234138Semastestatic const char * 315234138Semastefetch_pctdecode(char *dst, const char *src, size_t dlen) 316234138Semaste{ 317234138Semaste int d1, d2; 318234138Semaste char c; 319234138Semaste const char *s; 320234138Semaste 321234138Semaste for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) { 322234138Semaste if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 && 323234138Semaste (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) { 324234138Semaste c = d1 << 4 | d2; 325234138Semaste s += 2; 326234138Semaste } else { 327234138Semaste c = *s; 328234138Semaste } 329234138Semaste if (dlen-- > 0) 330234138Semaste *dst++ = c; 331357582Semaste else 332357582Semaste return (NULL); 333234138Semaste } 334234138Semaste return (s); 335234138Semaste} 336234138Semaste 337234138Semaste/* 33837535Sdes * Split an URL into components. URL syntax is: 33967042Sdes * [method:/][/[user[:pwd]@]host[:port]/][document] 34037535Sdes * This almost, but not quite, RFC1738 URL syntax. 34137535Sdes */ 34240975Sdesstruct url * 34375891SarchiefetchParseURL(const char *URL) 34437535Sdes{ 34590267Sdes char *doc; 34690267Sdes const char *p, *q; 34790267Sdes struct url *u; 34890267Sdes int i; 34937535Sdes 35090267Sdes /* allocate struct url */ 351109967Sdes if ((u = calloc(1, sizeof(*u))) == NULL) { 352174588Sdes fetch_syserr(); 35390267Sdes return (NULL); 35490267Sdes } 355315904Sdes u->netrcfd = -2; 35637535Sdes 35790267Sdes /* scheme name */ 35890267Sdes if ((p = strstr(URL, ":/"))) { 35990267Sdes snprintf(u->scheme, URL_SCHEMELEN+1, 36090267Sdes "%.*s", (int)(p - URL), URL); 36190267Sdes URL = ++p; 36290267Sdes /* 36390267Sdes * Only one slash: no host, leave slash as part of document 36490267Sdes * Two slashes: host follows, strip slashes 36590267Sdes */ 36690267Sdes if (URL[1] == '/') 36790267Sdes URL = (p += 2); 36890267Sdes } else { 36990267Sdes p = URL; 37090267Sdes } 37190267Sdes if (!*URL || *URL == '/' || *URL == '.' || 37290267Sdes (u->scheme[0] == '\0' && 37390267Sdes strchr(URL, '/') == NULL && strchr(URL, ':') == NULL)) 37490267Sdes goto nohost; 37537535Sdes 37690267Sdes p = strpbrk(URL, "/@"); 37790267Sdes if (p && *p == '@') { 37890267Sdes /* username */ 379234138Semaste q = fetch_pctdecode(u->user, URL, URL_USERLEN); 380357582Semaste if (q == NULL) 381357582Semaste goto ouch; 38279423Sdes 38390267Sdes /* password */ 384357582Semaste if (*q == ':') { 385252375Skientzle q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN); 386357582Semaste if (q == NULL) 387357582Semaste goto ouch; 388357582Semaste } 38990267Sdes p++; 39090267Sdes } else { 39190267Sdes p = URL; 39290267Sdes } 39390267Sdes 39490267Sdes /* hostname */ 39590267Sdes if (*p == '[' && (q = strchr(p + 1, ']')) != NULL && 39690267Sdes (*++q == '\0' || *q == '/' || *q == ':')) { 397315904Sdes if ((i = q - p) > MAXHOSTNAMELEN) 39890267Sdes i = MAXHOSTNAMELEN; 399315904Sdes strncpy(u->host, p, i); 40090267Sdes p = q; 401315904Sdes } else { 40290267Sdes for (i = 0; *p && (*p != '/') && (*p != ':'); p++) 40390267Sdes if (i < MAXHOSTNAMELEN) 40490267Sdes u->host[i++] = *p; 405315904Sdes } 40637535Sdes 40790267Sdes /* port */ 40890267Sdes if (*p == ':') { 40990267Sdes for (q = ++p; *q && (*q != '/'); q++) 410174761Sdes if (isdigit((unsigned char)*q)) 41190267Sdes u->port = u->port * 10 + (*q - '0'); 41290267Sdes else { 41390267Sdes /* invalid port */ 414174588Sdes url_seterr(URL_BAD_PORT); 41590267Sdes goto ouch; 41690267Sdes } 41790267Sdes p = q; 41890267Sdes } 41937535Sdes 42037535Sdesnohost: 42190267Sdes /* document */ 42290267Sdes if (!*p) 42390267Sdes p = "/"; 42467419Sdes 42590267Sdes if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 || 42690267Sdes strcasecmp(u->scheme, SCHEME_HTTPS) == 0) { 42790267Sdes const char hexnums[] = "0123456789abcdef"; 42890267Sdes 42990267Sdes /* percent-escape whitespace. */ 43090267Sdes if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) { 431174588Sdes fetch_syserr(); 43290267Sdes goto ouch; 43390267Sdes } 43490267Sdes u->doc = doc; 43590267Sdes while (*p != '\0') { 436174761Sdes if (!isspace((unsigned char)*p)) { 43790267Sdes *doc++ = *p++; 43890267Sdes } else { 43990267Sdes *doc++ = '%'; 44090267Sdes *doc++ = hexnums[((unsigned int)*p) >> 4]; 44190267Sdes *doc++ = hexnums[((unsigned int)*p) & 0xf]; 44290267Sdes p++; 44390267Sdes } 44490267Sdes } 44590267Sdes *doc = '\0'; 44690267Sdes } else if ((u->doc = strdup(p)) == NULL) { 447174588Sdes fetch_syserr(); 44890267Sdes goto ouch; 44967419Sdes } 45090267Sdes 45190267Sdes DEBUG(fprintf(stderr, 452315904Sdes "scheme: \"%s\"\n" 453315904Sdes "user: \"%s\"\n" 454315904Sdes "password: \"%s\"\n" 455315904Sdes "host: \"%s\"\n" 456315904Sdes "port: \"%d\"\n" 457315904Sdes "document: \"%s\"\n", 45837535Sdes u->scheme, u->user, u->pwd, 45937535Sdes u->host, u->port, u->doc)); 46037535Sdes 46190267Sdes return (u); 46290267Sdes 46337535Sdesouch: 46490267Sdes free(u); 46590267Sdes return (NULL); 46637535Sdes} 46760376Sdes 46860376Sdes/* 46960376Sdes * Free a URL 47060376Sdes */ 47160376Sdesvoid 47260376SdesfetchFreeURL(struct url *u) 47360376Sdes{ 47490267Sdes free(u->doc); 47590267Sdes free(u); 47660376Sdes} 477