137535Sdes/*-
2236103Sdes * Copyright (c) 1998-2004 Dag-Erling Sm��rgrav
337535Sdes * All rights reserved.
437535Sdes *
537535Sdes * Redistribution and use in source and binary forms, with or without
637535Sdes * modification, are permitted provided that the following conditions
737535Sdes * are met:
837535Sdes * 1. Redistributions of source code must retain the above copyright
937535Sdes *    notice, this list of conditions and the following disclaimer
1037535Sdes *    in this position and unchanged.
1137535Sdes * 2. Redistributions in binary form must reproduce the above copyright
1237535Sdes *    notice, this list of conditions and the following disclaimer in the
1337535Sdes *    documentation and/or other materials provided with the distribution.
1437535Sdes * 3. The name of the author may not be used to endorse or promote products
1537535Sdes *    derived from this software without specific prior written permission
1637535Sdes *
1737535Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1837535Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1937535Sdes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2037535Sdes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2137535Sdes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2237535Sdes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2337535Sdes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2437535Sdes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2537535Sdes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2637535Sdes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2737535Sdes */
2837535Sdes
2984203Sdillon#include <sys/cdefs.h>
3084203Sdillon__FBSDID("$FreeBSD$");
3184203Sdillon
3237535Sdes#include <sys/param.h>
3340975Sdes#include <sys/errno.h>
3437535Sdes
3537535Sdes#include <ctype.h>
3637535Sdes#include <stdio.h>
3737535Sdes#include <stdlib.h>
3837535Sdes#include <string.h>
3937535Sdes
4037535Sdes#include "fetch.h"
4140975Sdes#include "common.h"
4237535Sdes
4377238Sdesauth_t	 fetchAuthMethod;
4460924Sdesint	 fetchLastErrCode;
4560924Sdeschar	 fetchLastErrString[MAXERRSTRING];
4660924Sdesint	 fetchTimeout;
4763334Sdesint	 fetchRestartCalls = 1;
4887560Sdesint	 fetchDebug;
4941862Sdes
5041862Sdes
5140975Sdes/*** Local data **************************************************************/
5237535Sdes
5340939Sdes/*
5440975Sdes * Error messages for parser errors
5540975Sdes */
5640975Sdes#define URL_MALFORMED		1
5740975Sdes#define URL_BAD_SCHEME		2
5840975Sdes#define URL_BAD_PORT		3
59174588Sdesstatic struct fetcherr url_errlist[] = {
6090267Sdes	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
6190267Sdes	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
6290267Sdes	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
6390267Sdes	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
6440975Sdes};
6540975Sdes
6640975Sdes
6740975Sdes/*** Public API **************************************************************/
6840975Sdes
6940975Sdes/*
7040939Sdes * Select the appropriate protocol for the URL scheme, and return a
7140939Sdes * read-only stream connected to the document referenced by the URL.
7263340Sdes * Also fill out the struct url_stat.
7340939Sdes */
7438394SdesFILE *
7575891SarchiefetchXGet(struct url *URL, struct url_stat *us, const char *flags)
7638394Sdes{
7760587Sume
78109694Sdes	if (us != NULL) {
79109694Sdes		us->size = -1;
80109694Sdes		us->atime = us->mtime = 0;
81109694Sdes	}
8290267Sdes	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
8390267Sdes		return (fetchXGetFile(URL, us, flags));
8497868Sdes	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
8597868Sdes		return (fetchXGetFTP(URL, us, flags));
8690267Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
8790267Sdes		return (fetchXGetHTTP(URL, us, flags));
8897868Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
8997868Sdes		return (fetchXGetHTTP(URL, us, flags));
90174588Sdes	url_seterr(URL_BAD_SCHEME);
9197868Sdes	return (NULL);
9238394Sdes}
9338394Sdes
9440939Sdes/*
9540939Sdes * Select the appropriate protocol for the URL scheme, and return a
9663340Sdes * read-only stream connected to the document referenced by the URL.
9763340Sdes */
9863340SdesFILE *
9975891SarchiefetchGet(struct url *URL, const char *flags)
10063340Sdes{
10190267Sdes	return (fetchXGet(URL, NULL, flags));
10263340Sdes}
10363340Sdes
10463340Sdes/*
10563340Sdes * Select the appropriate protocol for the URL scheme, and return a
10640939Sdes * write-only stream connected to the document referenced by the URL.
10740939Sdes */
10838394SdesFILE *
10975891SarchiefetchPut(struct url *URL, const char *flags)
11038394Sdes{
11160587Sume
11290267Sdes	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
11390267Sdes		return (fetchPutFile(URL, flags));
11497868Sdes	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
11597868Sdes		return (fetchPutFTP(URL, flags));
11690267Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
11790267Sdes		return (fetchPutHTTP(URL, flags));
11897868Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
11997868Sdes		return (fetchPutHTTP(URL, flags));
120174588Sdes	url_seterr(URL_BAD_SCHEME);
12197868Sdes	return (NULL);
12238394Sdes}
12338394Sdes
12440939Sdes/*
12540975Sdes * Select the appropriate protocol for the URL scheme, and return the
12640975Sdes * size of the document referenced by the URL if it exists.
12740975Sdes */
12840975Sdesint
12975891SarchiefetchStat(struct url *URL, struct url_stat *us, const char *flags)
13040975Sdes{
13160587Sume
132109694Sdes	if (us != NULL) {
133109694Sdes		us->size = -1;
134109694Sdes		us->atime = us->mtime = 0;
135109694Sdes	}
13690267Sdes	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
13790267Sdes		return (fetchStatFile(URL, us, flags));
13897868Sdes	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
13997868Sdes		return (fetchStatFTP(URL, us, flags));
14090267Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
14190267Sdes		return (fetchStatHTTP(URL, us, flags));
14297868Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
14397868Sdes		return (fetchStatHTTP(URL, us, flags));
144174588Sdes	url_seterr(URL_BAD_SCHEME);
14590267Sdes	return (-1);
14640975Sdes}
14740975Sdes
14840975Sdes/*
14941989Sdes * Select the appropriate protocol for the URL scheme, and return a
15041989Sdes * list of files in the directory pointed to by the URL.
15141989Sdes */
15241989Sdesstruct url_ent *
15375891SarchiefetchList(struct url *URL, const char *flags)
15441989Sdes{
15560587Sume
15690267Sdes	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
15790267Sdes		return (fetchListFile(URL, flags));
15897868Sdes	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
15997868Sdes		return (fetchListFTP(URL, flags));
16090267Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
16190267Sdes		return (fetchListHTTP(URL, flags));
16297868Sdes	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
16397868Sdes		return (fetchListHTTP(URL, flags));
164174588Sdes	url_seterr(URL_BAD_SCHEME);
16590267Sdes	return (NULL);
16641989Sdes}
16741989Sdes
16841989Sdes/*
16963340Sdes * Attempt to parse the given URL; if successful, call fetchXGet().
17040939Sdes */
17137535SdesFILE *
17275891SarchiefetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
17337535Sdes{
17490267Sdes	struct url *u;
17590267Sdes	FILE *f;
17638394Sdes
17790267Sdes	if ((u = fetchParseURL(URL)) == NULL)
17890267Sdes		return (NULL);
17990267Sdes
18090267Sdes	f = fetchXGet(u, us, flags);
18190267Sdes
18290267Sdes	fetchFreeURL(u);
18390267Sdes	return (f);
18437535Sdes}
18537535Sdes
18663340Sdes/*
18763340Sdes * Attempt to parse the given URL; if successful, call fetchGet().
18863340Sdes */
18963340SdesFILE *
19075891SarchiefetchGetURL(const char *URL, const char *flags)
19163340Sdes{
19290267Sdes	return (fetchXGetURL(URL, NULL, flags));
19363340Sdes}
19437535Sdes
19540939Sdes/*
19640939Sdes * Attempt to parse the given URL; if successful, call fetchPut().
19740939Sdes */
19837535SdesFILE *
19975891SarchiefetchPutURL(const char *URL, const char *flags)
20037535Sdes{
20190267Sdes	struct url *u;
20290267Sdes	FILE *f;
20390267Sdes
20490267Sdes	if ((u = fetchParseURL(URL)) == NULL)
20590267Sdes		return (NULL);
20690267Sdes
20790267Sdes	f = fetchPut(u, flags);
20890267Sdes
20990267Sdes	fetchFreeURL(u);
21090267Sdes	return (f);
21137535Sdes}
21237535Sdes
21337535Sdes/*
21440975Sdes * Attempt to parse the given URL; if successful, call fetchStat().
21540975Sdes */
21640975Sdesint
21775891SarchiefetchStatURL(const char *URL, struct url_stat *us, const char *flags)
21840975Sdes{
21990267Sdes	struct url *u;
22090267Sdes	int s;
22140975Sdes
22290267Sdes	if ((u = fetchParseURL(URL)) == NULL)
22390267Sdes		return (-1);
22440975Sdes
22590267Sdes	s = fetchStat(u, us, flags);
22640975Sdes
22790267Sdes	fetchFreeURL(u);
22890267Sdes	return (s);
22940975Sdes}
23040975Sdes
23140975Sdes/*
23241989Sdes * Attempt to parse the given URL; if successful, call fetchList().
23341989Sdes */
23441989Sdesstruct url_ent *
23575891SarchiefetchListURL(const char *URL, const char *flags)
23641989Sdes{
23790267Sdes	struct url *u;
23890267Sdes	struct url_ent *ue;
23941989Sdes
24090267Sdes	if ((u = fetchParseURL(URL)) == NULL)
24190267Sdes		return (NULL);
24241989Sdes
24390267Sdes	ue = fetchList(u, flags);
24441989Sdes
24590267Sdes	fetchFreeURL(u);
24690267Sdes	return (ue);
24741989Sdes}
24841989Sdes
24941989Sdes/*
25060927Sdes * Make a URL
25160927Sdes */
25260927Sdesstruct url *
25375891SarchiefetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
25475891Sarchie    const char *user, const char *pwd)
25560927Sdes{
25690267Sdes	struct url *u;
25760927Sdes
25890267Sdes	if (!scheme || (!host && !doc)) {
259174588Sdes		url_seterr(URL_MALFORMED);
26090267Sdes		return (NULL);
26190267Sdes	}
26260927Sdes
26390267Sdes	if (port < 0 || port > 65535) {
264174588Sdes		url_seterr(URL_BAD_PORT);
26590267Sdes		return (NULL);
26690267Sdes	}
26790267Sdes
26890267Sdes	/* allocate struct url */
269109967Sdes	if ((u = calloc(1, sizeof(*u))) == NULL) {
270174588Sdes		fetch_syserr();
27190267Sdes		return (NULL);
27290267Sdes	}
27390267Sdes
27490267Sdes	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
275174588Sdes		fetch_syserr();
27690267Sdes		free(u);
27790267Sdes		return (NULL);
27890267Sdes	}
27990267Sdes
280109967Sdes#define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
28190267Sdes	seturl(scheme);
28290267Sdes	seturl(host);
28390267Sdes	seturl(user);
28490267Sdes	seturl(pwd);
28560927Sdes#undef seturl
28690267Sdes	u->port = port;
28760927Sdes
28890267Sdes	return (u);
28960927Sdes}
29060927Sdes
29160927Sdes/*
292235253Semaste * Return value of the given hex digit.
293235253Semaste */
294235253Semastestatic int
295235253Semastefetch_hexval(char ch)
296235253Semaste{
297235253Semaste
298235253Semaste	if (ch >= '0' && ch <= '9')
299235253Semaste		return (ch - '0');
300235253Semaste	else if (ch >= 'a' && ch <= 'f')
301235253Semaste		return (ch - 'a' + 10);
302235253Semaste	else if (ch >= 'A' && ch <= 'F')
303235253Semaste		return (ch - 'A' + 10);
304235253Semaste	return (-1);
305235253Semaste}
306235253Semaste
307235253Semaste/*
308235253Semaste * Decode percent-encoded URL component from src into dst, stopping at end
309235253Semaste * of string, or at @ or : separators.  Returns a pointer to the unhandled
310235253Semaste * part of the input string (null terminator, @, or :).  No terminator is
311235253Semaste * written to dst (it is the caller's responsibility).
312235253Semaste */
313235253Semastestatic const char *
314235253Semastefetch_pctdecode(char *dst, const char *src, size_t dlen)
315235253Semaste{
316235253Semaste	int d1, d2;
317235253Semaste	char c;
318235253Semaste	const char *s;
319235253Semaste
320235253Semaste	for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) {
321235253Semaste		if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 &&
322235253Semaste		    (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) {
323235253Semaste			c = d1 << 4 | d2;
324235253Semaste			s += 2;
325235253Semaste		} else {
326235253Semaste			c = *s;
327235253Semaste		}
328235253Semaste		if (dlen-- > 0)
329235253Semaste			*dst++ = c;
330235253Semaste	}
331235253Semaste	return (s);
332235253Semaste}
333235253Semaste
334235253Semaste/*
33537535Sdes * Split an URL into components. URL syntax is:
33667042Sdes * [method:/][/[user[:pwd]@]host[:port]/][document]
33737535Sdes * This almost, but not quite, RFC1738 URL syntax.
33837535Sdes */
33940975Sdesstruct url *
34075891SarchiefetchParseURL(const char *URL)
34137535Sdes{
34290267Sdes	char *doc;
34390267Sdes	const char *p, *q;
34490267Sdes	struct url *u;
34590267Sdes	int i;
34637535Sdes
34790267Sdes	/* allocate struct url */
348109967Sdes	if ((u = calloc(1, sizeof(*u))) == NULL) {
349174588Sdes		fetch_syserr();
35090267Sdes		return (NULL);
35190267Sdes	}
35237535Sdes
35390267Sdes	/* scheme name */
35490267Sdes	if ((p = strstr(URL, ":/"))) {
35590267Sdes		snprintf(u->scheme, URL_SCHEMELEN+1,
35690267Sdes		    "%.*s", (int)(p - URL), URL);
35790267Sdes		URL = ++p;
35890267Sdes		/*
35990267Sdes		 * Only one slash: no host, leave slash as part of document
36090267Sdes		 * Two slashes: host follows, strip slashes
36190267Sdes		 */
36290267Sdes		if (URL[1] == '/')
36390267Sdes			URL = (p += 2);
36490267Sdes	} else {
36590267Sdes		p = URL;
36690267Sdes	}
36790267Sdes	if (!*URL || *URL == '/' || *URL == '.' ||
36890267Sdes	    (u->scheme[0] == '\0' &&
36990267Sdes		strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
37090267Sdes		goto nohost;
37137535Sdes
37290267Sdes	p = strpbrk(URL, "/@");
37390267Sdes	if (p && *p == '@') {
37490267Sdes		/* username */
375235253Semaste		q = fetch_pctdecode(u->user, URL, URL_USERLEN);
37679423Sdes
37790267Sdes		/* password */
37890267Sdes		if (*q == ':')
379253152Sdes			q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN);
38090267Sdes
38190267Sdes		p++;
38290267Sdes	} else {
38390267Sdes		p = URL;
38490267Sdes	}
38590267Sdes
38690267Sdes	/* hostname */
38760737Sume#ifdef INET6
38890267Sdes	if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
38990267Sdes	    (*++q == '\0' || *q == '/' || *q == ':')) {
39090267Sdes		if ((i = q - p - 2) > MAXHOSTNAMELEN)
39190267Sdes			i = MAXHOSTNAMELEN;
39290267Sdes		strncpy(u->host, ++p, i);
39390267Sdes		p = q;
39490267Sdes	} else
39560737Sume#endif
39690267Sdes		for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
39790267Sdes			if (i < MAXHOSTNAMELEN)
39890267Sdes				u->host[i++] = *p;
39937535Sdes
40090267Sdes	/* port */
40190267Sdes	if (*p == ':') {
40290267Sdes		for (q = ++p; *q && (*q != '/'); q++)
403174761Sdes			if (isdigit((unsigned char)*q))
40490267Sdes				u->port = u->port * 10 + (*q - '0');
40590267Sdes			else {
40690267Sdes				/* invalid port */
407174588Sdes				url_seterr(URL_BAD_PORT);
40890267Sdes				goto ouch;
40990267Sdes			}
41090267Sdes		p = q;
41190267Sdes	}
41237535Sdes
41337535Sdesnohost:
41490267Sdes	/* document */
41590267Sdes	if (!*p)
41690267Sdes		p = "/";
41767419Sdes
41890267Sdes	if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
41990267Sdes	    strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
42090267Sdes		const char hexnums[] = "0123456789abcdef";
42190267Sdes
42290267Sdes		/* percent-escape whitespace. */
42390267Sdes		if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
424174588Sdes			fetch_syserr();
42590267Sdes			goto ouch;
42690267Sdes		}
42790267Sdes		u->doc = doc;
42890267Sdes		while (*p != '\0') {
429174761Sdes			if (!isspace((unsigned char)*p)) {
43090267Sdes				*doc++ = *p++;
43190267Sdes			} else {
43290267Sdes				*doc++ = '%';
43390267Sdes				*doc++ = hexnums[((unsigned int)*p) >> 4];
43490267Sdes				*doc++ = hexnums[((unsigned int)*p) & 0xf];
43590267Sdes				p++;
43690267Sdes			}
43790267Sdes		}
43890267Sdes		*doc = '\0';
43990267Sdes	} else if ((u->doc = strdup(p)) == NULL) {
440174588Sdes		fetch_syserr();
44190267Sdes		goto ouch;
44267419Sdes	}
44390267Sdes
44490267Sdes	DEBUG(fprintf(stderr,
44588769Sdes		  "scheme:   [%s]\n"
44688769Sdes		  "user:     [%s]\n"
44788769Sdes		  "password: [%s]\n"
44888769Sdes		  "host:     [%s]\n"
44988769Sdes		  "port:     [%d]\n"
45088769Sdes		  "document: [%s]\n",
45137535Sdes		  u->scheme, u->user, u->pwd,
45237535Sdes		  u->host, u->port, u->doc));
45337535Sdes
45490267Sdes	return (u);
45590267Sdes
45637535Sdesouch:
45790267Sdes	free(u);
45890267Sdes	return (NULL);
45937535Sdes}
46060376Sdes
46160376Sdes/*
46260376Sdes * Free a URL
46360376Sdes */
46460376Sdesvoid
46560376SdesfetchFreeURL(struct url *u)
46660376Sdes{
46790267Sdes	free(u->doc);
46890267Sdes	free(u);
46960376Sdes}
470