1/*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1998-2004 Dag-Erling Sm��rgrav 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in this position and unchanged. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31#include <sys/param.h> 32 33#include <netinet/in.h> 34 35#include <errno.h> 36#include <ctype.h> 37#include <stdio.h> 38#include <stdlib.h> 39#include <string.h> 40 41#include "fetch.h" 42#include "common.h" 43 44auth_t fetchAuthMethod; 45int fetchLastErrCode; 46char fetchLastErrString[MAXERRSTRING]; 47int fetchTimeout; 48int fetchRestartCalls = 1; 49int fetchDebug; 50 51 52/*** Local data **************************************************************/ 53 54/* 55 * Error messages for parser errors 56 */ 57#define URL_MALFORMED 1 58#define URL_BAD_SCHEME 2 59#define URL_BAD_PORT 3 60static struct fetcherr url_errlist[] = { 61 { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 62 { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 63 { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 64 { -1, FETCH_UNKNOWN, "Unknown parser error" } 65}; 66 67 68/*** Public API **************************************************************/ 69 70/* 71 * Select the appropriate protocol for the URL scheme, and return a 72 * read-only stream connected to the document referenced by the URL. 73 * Also fill out the struct url_stat. 74 */ 75FILE * 76fetchXGet(struct url *URL, struct url_stat *us, const char *flags) 77{ 78 79 if (us != NULL) { 80 us->size = -1; 81 us->atime = us->mtime = 0; 82 } 83 if (strcmp(URL->scheme, SCHEME_FILE) == 0) 84 return (fetchXGetFile(URL, us, flags)); 85 else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 86 return (fetchXGetFTP(URL, us, flags)); 87 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 88 return (fetchXGetHTTP(URL, us, flags)); 89 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 90 return (fetchXGetHTTP(URL, us, flags)); 91 url_seterr(URL_BAD_SCHEME); 92 return (NULL); 93} 94 95/* 96 * Select the appropriate protocol for the URL scheme, and return a 97 * read-only stream connected to the document referenced by the URL. 98 */ 99FILE * 100fetchGet(struct url *URL, const char *flags) 101{ 102 return (fetchXGet(URL, NULL, flags)); 103} 104 105/* 106 * Select the appropriate protocol for the URL scheme, and return a 107 * write-only stream connected to the document referenced by the URL. 108 */ 109FILE * 110fetchPut(struct url *URL, const char *flags) 111{ 112 113 if (strcmp(URL->scheme, SCHEME_FILE) == 0) 114 return (fetchPutFile(URL, flags)); 115 else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 116 return (fetchPutFTP(URL, flags)); 117 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 118 return (fetchPutHTTP(URL, flags)); 119 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 120 return (fetchPutHTTP(URL, flags)); 121 url_seterr(URL_BAD_SCHEME); 122 return (NULL); 123} 124 125/* 126 * Select the appropriate protocol for the URL scheme, and return the 127 * size of the document referenced by the URL if it exists. 128 */ 129int 130fetchStat(struct url *URL, struct url_stat *us, const char *flags) 131{ 132 133 if (us != NULL) { 134 us->size = -1; 135 us->atime = us->mtime = 0; 136 } 137 if (strcmp(URL->scheme, SCHEME_FILE) == 0) 138 return (fetchStatFile(URL, us, flags)); 139 else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 140 return (fetchStatFTP(URL, us, flags)); 141 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 142 return (fetchStatHTTP(URL, us, flags)); 143 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 144 return (fetchStatHTTP(URL, us, flags)); 145 url_seterr(URL_BAD_SCHEME); 146 return (-1); 147} 148 149/* 150 * Select the appropriate protocol for the URL scheme, and return a 151 * list of files in the directory pointed to by the URL. 152 */ 153struct url_ent * 154fetchList(struct url *URL, const char *flags) 155{ 156 157 if (strcmp(URL->scheme, SCHEME_FILE) == 0) 158 return (fetchListFile(URL, flags)); 159 else if (strcmp(URL->scheme, SCHEME_FTP) == 0) 160 return (fetchListFTP(URL, flags)); 161 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0) 162 return (fetchListHTTP(URL, flags)); 163 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0) 164 return (fetchListHTTP(URL, flags)); 165 url_seterr(URL_BAD_SCHEME); 166 return (NULL); 167} 168 169/* 170 * Attempt to parse the given URL; if successful, call fetchXGet(). 171 */ 172FILE * 173fetchXGetURL(const char *URL, struct url_stat *us, const char *flags) 174{ 175 struct url *u; 176 FILE *f; 177 178 if ((u = fetchParseURL(URL)) == NULL) 179 return (NULL); 180 181 f = fetchXGet(u, us, flags); 182 183 fetchFreeURL(u); 184 return (f); 185} 186 187/* 188 * Attempt to parse the given URL; if successful, call fetchGet(). 189 */ 190FILE * 191fetchGetURL(const char *URL, const char *flags) 192{ 193 return (fetchXGetURL(URL, NULL, flags)); 194} 195 196/* 197 * Attempt to parse the given URL; if successful, call fetchPut(). 198 */ 199FILE * 200fetchPutURL(const char *URL, const char *flags) 201{ 202 struct url *u; 203 FILE *f; 204 205 if ((u = fetchParseURL(URL)) == NULL) 206 return (NULL); 207 208 f = fetchPut(u, flags); 209 210 fetchFreeURL(u); 211 return (f); 212} 213 214/* 215 * Attempt to parse the given URL; if successful, call fetchStat(). 216 */ 217int 218fetchStatURL(const char *URL, struct url_stat *us, const char *flags) 219{ 220 struct url *u; 221 int s; 222 223 if ((u = fetchParseURL(URL)) == NULL) 224 return (-1); 225 226 s = fetchStat(u, us, flags); 227 228 fetchFreeURL(u); 229 return (s); 230} 231 232/* 233 * Attempt to parse the given URL; if successful, call fetchList(). 234 */ 235struct url_ent * 236fetchListURL(const char *URL, const char *flags) 237{ 238 struct url *u; 239 struct url_ent *ue; 240 241 if ((u = fetchParseURL(URL)) == NULL) 242 return (NULL); 243 244 ue = fetchList(u, flags); 245 246 fetchFreeURL(u); 247 return (ue); 248} 249 250/* 251 * Make a URL 252 */ 253struct url * 254fetchMakeURL(const char *scheme, const char *host, int port, const char *doc, 255 const char *user, const char *pwd) 256{ 257 struct url *u; 258 259 if (!scheme || (!host && !doc)) { 260 url_seterr(URL_MALFORMED); 261 return (NULL); 262 } 263 264 if (port < 0 || port > 65535) { 265 url_seterr(URL_BAD_PORT); 266 return (NULL); 267 } 268 269 /* allocate struct url */ 270 if ((u = calloc(1, sizeof(*u))) == NULL) { 271 fetch_syserr(); 272 return (NULL); 273 } 274 u->netrcfd = -1; 275 276 if ((u->doc = strdup(doc ? doc : "/")) == NULL) { 277 fetch_syserr(); 278 free(u); 279 return (NULL); 280 } 281 282#define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x) 283 seturl(scheme); 284 seturl(host); 285 seturl(user); 286 seturl(pwd); 287#undef seturl 288 u->port = port; 289 290 return (u); 291} 292 293/* 294 * Return value of the given hex digit. 295 */ 296static int 297fetch_hexval(char ch) 298{ 299 300 if (ch >= '0' && ch <= '9') 301 return (ch - '0'); 302 else if (ch >= 'a' && ch <= 'f') 303 return (ch - 'a' + 10); 304 else if (ch >= 'A' && ch <= 'F') 305 return (ch - 'A' + 10); 306 return (-1); 307} 308 309/* 310 * Decode percent-encoded URL component from src into dst, stopping at end 311 * of string, or at @ or : separators. Returns a pointer to the unhandled 312 * part of the input string (null terminator, @, or :). No terminator is 313 * written to dst (it is the caller's responsibility). 314 */ 315static const char * 316fetch_pctdecode(char *dst, const char *src, size_t dlen) 317{ 318 int d1, d2; 319 char c; 320 const char *s; 321 322 for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) { 323 if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 && 324 (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) { 325 c = d1 << 4 | d2; 326 s += 2; 327 } else if (s[0] == '%') { 328 /* Invalid escape sequence. */ 329 return (NULL); 330 } else { 331 c = *s; 332 } 333 if (dlen-- > 0) 334 *dst++ = c; 335 else 336 return (NULL); 337 } 338 return (s); 339} 340 341/* 342 * Split an URL into components. URL syntax is: 343 * [method:/][/[user[:pwd]@]host[:port]/][document] 344 * This almost, but not quite, RFC1738 URL syntax. 345 */ 346struct url * 347fetchParseURL(const char *URL) 348{ 349 char *doc; 350 const char *p, *q; 351 struct url *u; 352 int i, n; 353 354 /* allocate struct url */ 355 if ((u = calloc(1, sizeof(*u))) == NULL) { 356 fetch_syserr(); 357 return (NULL); 358 } 359 u->netrcfd = -1; 360 361 /* scheme name */ 362 if ((p = strstr(URL, ":/"))) { 363 if (p - URL > URL_SCHEMELEN) 364 goto ouch; 365 for (i = 0; URL + i < p; i++) 366 u->scheme[i] = tolower((unsigned char)URL[i]); 367 URL = ++p; 368 /* 369 * Only one slash: no host, leave slash as part of document 370 * Two slashes: host follows, strip slashes 371 */ 372 if (URL[1] == '/') 373 URL = (p += 2); 374 } else { 375 p = URL; 376 } 377 if (!*URL || *URL == '/' || *URL == '.' || 378 (u->scheme[0] == '\0' && 379 strchr(URL, '/') == NULL && strchr(URL, ':') == NULL)) 380 goto nohost; 381 382 p = strpbrk(URL, "/@"); 383 if (p && *p == '@') { 384 /* username */ 385 q = fetch_pctdecode(u->user, URL, URL_USERLEN); 386 if (q == NULL) 387 goto ouch; 388 389 /* password */ 390 if (*q == ':') { 391 q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN); 392 if (q == NULL) 393 goto ouch; 394 } 395 p++; 396 } else { 397 p = URL; 398 } 399 400 /* hostname */ 401 if (*p == '[') { 402 q = p + 1 + strspn(p + 1, ":0123456789ABCDEFabcdef"); 403 if (*q++ != ']') 404 goto ouch; 405 } else { 406 /* valid characters in a DNS name */ 407 q = p + strspn(p, "-." "0123456789" 408 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "_" 409 "abcdefghijklmnopqrstuvwxyz"); 410 } 411 if ((*q != '\0' && *q != '/' && *q != ':') || q - p > MAXHOSTNAMELEN) 412 goto ouch; 413 for (i = 0; p + i < q; i++) 414 u->host[i] = tolower((unsigned char)p[i]); 415 u->host[i] = '\0'; 416 p = q; 417 418 /* port */ 419 if (*p == ':') { 420 for (n = 0, q = ++p; *q && (*q != '/'); q++) { 421 if (*q >= '0' && *q <= '9' && n < INT_MAX / 10) { 422 n = n * 10 + (*q - '0'); 423 } else { 424 /* invalid port */ 425 url_seterr(URL_BAD_PORT); 426 goto ouch; 427 } 428 } 429 if (p != q && (n < 1 || n > IPPORT_MAX)) 430 goto ouch; 431 u->port = n; 432 p = q; 433 } 434 435nohost: 436 /* document */ 437 if (!*p) 438 p = "/"; 439 440 if (strcmp(u->scheme, SCHEME_HTTP) == 0 || 441 strcmp(u->scheme, SCHEME_HTTPS) == 0) { 442 const char hexnums[] = "0123456789abcdef"; 443 444 /* percent-escape whitespace. */ 445 if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) { 446 fetch_syserr(); 447 goto ouch; 448 } 449 u->doc = doc; 450 while (*p != '\0') { 451 if (!isspace((unsigned char)*p)) { 452 *doc++ = *p++; 453 } else { 454 *doc++ = '%'; 455 *doc++ = hexnums[((unsigned int)*p) >> 4]; 456 *doc++ = hexnums[((unsigned int)*p) & 0xf]; 457 p++; 458 } 459 } 460 *doc = '\0'; 461 } else if ((u->doc = strdup(p)) == NULL) { 462 fetch_syserr(); 463 goto ouch; 464 } 465 466 DEBUGF("scheme: \"%s\"\n" 467 "user: \"%s\"\n" 468 "password: \"%s\"\n" 469 "host: \"%s\"\n" 470 "port: \"%d\"\n" 471 "document: \"%s\"\n", 472 u->scheme, u->user, u->pwd, 473 u->host, u->port, u->doc); 474 475 return (u); 476 477ouch: 478 free(u); 479 return (NULL); 480} 481 482/* 483 * Free a URL 484 */ 485void 486fetchFreeURL(struct url *u) 487{ 488 free(u->doc); 489 free(u); 490} 491