fetch.c revision 62254
1/*- 2 * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $FreeBSD: head/usr.bin/fetch/fetch.c 62254 2000-06-29 10:32:56Z des $ 29 */ 30 31#include <sys/param.h> 32#include <sys/stat.h> 33#include <sys/socket.h> 34 35#include <ctype.h> 36#include <err.h> 37#include <errno.h> 38#include <stdio.h> 39#include <stdlib.h> 40#include <string.h> 41#include <sysexits.h> 42#include <unistd.h> 43 44#include <fetch.h> 45 46#define MINBUFSIZE 4096 47 48/* Option flags */ 49int A_flag; /* -A: do not follow 302 redirects */ 50int a_flag; /* -a: auto retry */ 51size_t B_size; /* -B: buffer size */ 52int b_flag; /*! -b: workaround TCP bug */ 53char *c_dirname; /* -c: remote directory */ 54int d_flag; /* -d: direct connection */ 55int F_flag; /* -F: restart without checking mtime */ 56char *f_filename; /* -f: file to fetch */ 57int H_flag; /* -H: use high port */ 58char *h_hostname; /* -h: host to fetch from */ 59int l_flag; /* -l: link rather than copy file: URLs */ 60int m_flag; /* -[Mm]: set local timestamp to remote timestamp */ 61int o_flag; /* -o: specify output file */ 62int o_directory; /* output file is a directory */ 63char *o_filename; /* name of output file */ 64int o_stdout; /* output file is stdout */ 65int once_flag; /* -1: stop at first successful file */ 66int p_flag = 1; /* -[Pp]: use passive FTP */ 67int R_flag; /* -R: don't delete partially transferred files */ 68int r_flag; /* -r: restart previously interrupted transfer */ 69u_int T_secs = 0; /* -T: transfer timeout in seconds */ 70int s_flag; /* -s: show size, don't fetch */ 71off_t S_size; /* -S: require size to match */ 72int t_flag; /*! -t: workaround TCP bug */ 73int v_level = 1; /* -v: verbosity level */ 74int v_tty; /* stdout is a tty */ 75u_int w_secs; /* -w: retry delay */ 76int family = PF_UNSPEC; /* -[46]: address family to use */ 77 78 79u_int ftp_timeout; /* default timeout for FTP transfers */ 80u_int http_timeout; /* default timeout for HTTP transfers */ 81u_char *buf; /* transfer buffer */ 82 83 84void 85sig_handler(int sig) 86{ 87 errx(1, "Transfer timed out"); 88} 89 90struct xferstat { 91 char name[40]; 92 struct timeval start; 93 struct timeval end; 94 struct timeval last; 95 off_t size; 96 off_t offset; 97 off_t rcvd; 98}; 99 100void 101stat_start(struct xferstat *xs, char *name, off_t size, off_t offset) 102{ 103 snprintf(xs->name, sizeof xs->name, "%s", name); 104 xs->size = size; 105 xs->offset = offset; 106 if (v_level) { 107 fprintf(stderr, "Receiving %s", xs->name); 108 if (xs->size != -1) 109 fprintf(stderr, " (%lld bytes)", xs->size - xs->offset); 110 } 111 gettimeofday(&xs->start, NULL); 112 xs->last = xs->start; 113} 114 115void 116stat_update(struct xferstat *xs, off_t rcvd) 117{ 118 struct timeval now; 119 120 xs->rcvd = rcvd; 121 122 if (v_level <= 1 || !v_tty) 123 return; 124 125 gettimeofday(&now, NULL); 126 if (now.tv_sec <= xs->last.tv_sec) 127 return; 128 xs->last = now; 129 130 fprintf(stderr, "\rReceiving %s", xs->name); 131 if (xs->size == -1) 132 fprintf(stderr, ": %lld bytes", xs->rcvd - xs->offset); 133 else 134 fprintf(stderr, " (%lld bytes): %d%%", xs->size - xs->offset, 135 (int)((100.0 * xs->rcvd) / (xs->size - xs->offset))); 136} 137 138void 139stat_end(struct xferstat *xs) 140{ 141 double delta; 142 double bps; 143 144 gettimeofday(&xs->end, NULL); 145 146 if (!v_level) 147 return; 148 149 fputc('\n', stderr); 150 delta = (xs->end.tv_sec + (xs->end.tv_usec / 1.e6)) 151 - (xs->start.tv_sec + (xs->start.tv_usec / 1.e6)); 152 fprintf(stderr, "%lld bytes transferred in %.1f seconds ", 153 xs->size - xs->offset, delta); 154 bps = (xs->size - xs->offset) / delta; 155 if (bps > 1024*1024) 156 fprintf(stderr, "(%.2f MBps)\n", bps / (1024*1024)); 157 else if (bps > 1024) 158 fprintf(stderr, "(%.2f kBps)\n", bps / 1024); 159 else 160 fprintf(stderr, "(%.2f Bps)\n", bps); 161} 162 163int 164fetch(char *URL, char *path) 165{ 166 struct url *url; 167 struct url_stat us; 168 struct stat sb; 169 struct xferstat xs; 170 FILE *f, *of; 171 size_t size; 172 off_t count; 173 char flags[8]; 174 int ch, n, r; 175 u_int timeout; 176 177 f = of = NULL; 178 179 /* parse URL */ 180 if ((url = fetchParseURL(URL)) == NULL) { 181 warnx("%s: parse error", URL); 182 goto failure; 183 } 184 185 timeout = 0; 186 *flags = 0; 187 188 /* common flags */ 189 if (v_level > 2) 190 strcat(flags, "v"); 191 switch (family) { 192 case PF_INET: 193 strcat(flags, "4"); 194 break; 195 case PF_INET6: 196 strcat(flags, "6"); 197 break; 198 } 199 200 /* FTP specific flags */ 201 if (strcmp(url->scheme, "ftp") == 0) { 202 if (p_flag) 203 strcat(flags, "p"); 204 if (d_flag) 205 strcat(flags, "d"); 206 if (H_flag) 207 strcat(flags, "h"); 208 timeout = T_secs ? T_secs : ftp_timeout; 209 } 210 211 /* HTTP specific flags */ 212 if (strcmp(url->scheme, "http") == 0) { 213 if (d_flag) 214 strcat(flags, "d"); 215 if (A_flag) 216 strcat(flags, "A"); 217 timeout = T_secs ? T_secs : http_timeout; 218 } 219 220 /* 221 * Set the protocol timeout. 222 * This currently only works for FTP, so we still use 223 * alarm(timeout) further down. 224 */ 225 fetchTimeout = timeout; 226 227 /* stat remote file */ 228 alarm(timeout); 229 if (fetchStat(url, &us, flags) == -1) 230 warnx("%s: size not known", path); 231 alarm(timeout); 232 233 /* just print size */ 234 if (s_flag) { 235 if (us.size == -1) 236 printf("Unknown\n"); 237 else 238 printf("%lld\n", us.size); 239 goto success; 240 } 241 242 /* check that size is as expected */ 243 if (S_size && us.size != -1 && us.size != S_size) { 244 warnx("%s: size mismatch: expected %lld, actual %lld", 245 path, S_size, us.size); 246 goto failure; 247 } 248 249 /* symlink instead of copy */ 250 if (l_flag && strcmp(url->scheme, "file") == 0 && !o_stdout) { 251 if (symlink(url->doc, path) == -1) { 252 warn("%s: symlink()", path); 253 goto failure; 254 } 255 goto success; 256 } 257 258 if (o_stdout) { 259 /* output to stdout */ 260 of = stdout; 261 } else if (r_flag && us.size != -1 && stat(path, &sb) != -1 262 && (F_flag || (us.mtime && sb.st_mtime == us.mtime))) { 263 /* output to file, restart aborted transfer */ 264 if (us.size == sb.st_size) 265 goto success; 266 else if (sb.st_size > us.size && truncate(path, us.size) == -1) { 267 warn("%s: truncate()", path); 268 goto failure; 269 } 270 if ((of = fopen(path, "a")) == NULL) { 271 warn("%s: open()", path); 272 goto failure; 273 } 274 url->offset = sb.st_size; 275 } else if (m_flag && us.size != -1 && stat(path, &sb) != -1) { 276 /* output to file, mirror mode */ 277 warnx(" local: %lld bytes, mtime %ld", sb.st_size, sb.st_mtime); 278 warnx("remote: %lld bytes, mtime %ld", us.size, us.mtime); 279 if (sb.st_size == us.size && sb.st_mtime == us.mtime) 280 return 0; 281 if ((of = fopen(path, "w")) == NULL) { 282 warn("%s: open()", path); 283 goto failure; 284 } 285 } else { 286 /* output to file, all other cases */ 287 if ((of = fopen(path, "w")) == NULL) { 288 warn("%s: open()", path); 289 goto failure; 290 } 291 } 292 count = url->offset; 293 294 /* start the transfer */ 295 if ((f = fetchGet(url, flags)) == NULL) { 296 warnx("%s", fetchLastErrString); 297 if (!R_flag && !r_flag && !o_stdout) 298 unlink(path); 299 goto failure; 300 } 301 302 /* start the counter */ 303 stat_start(&xs, path, us.size, count); 304 305 n = 0; 306 307 if (us.size == -1) { 308 /* 309 * We have no idea how much data to expect, so do it byte by 310 * byte. This is incredibly inefficient, but there's not much 311 * we can do about it... :( 312 */ 313 while (1) { 314 if (timeout) 315 alarm(timeout); 316#ifdef STDIO_HACK 317 /* 318 * This is a non-portable hack, but it makes things go 319 * faster. Basically, if there is data in the input file's 320 * buffer, write it out; then fall through to the fgetc() 321 * which forces a refill. It saves a memcpy() and reduces 322 * the number of iterations, i.e the number of calls to 323 * alarm(). Empirical evidence shows this can cut user 324 * time by up to 90%. There may be better (even portable) 325 * ways to do this. 326 */ 327 if (f->_r && (f->_ub._base == NULL)) { 328 if (fwrite(f->_p, f->_r, 1, of) < 1) 329 break; 330 count += f->_r; 331 f->_p += f->_r; 332 f->_r = 0; 333 } 334#endif 335 if ((ch = fgetc(f)) == EOF || fputc(ch, of) == EOF) 336 break; 337 stat_update(&xs, count++); 338 n++; 339 } 340 } else { 341 /* we know exactly how much to transfer, so do it efficiently */ 342 for (size = B_size; count != us.size; n++) { 343 if (us.size - count < B_size) 344 size = us.size - count; 345 if (timeout) 346 alarm(timeout); 347 if (fread(buf, size, 1, f) != 1 || fwrite(buf, size, 1, of) != 1) 348 break; 349 stat_update(&xs, count += size); 350 } 351 } 352 353 if (timeout) 354 alarm(0); 355 356 stat_end(&xs); 357 358 /* check the status of our files */ 359 if (ferror(f)) 360 warn("%s", URL); 361 if (ferror(of)) 362 warn("%s", path); 363 if (ferror(f) || ferror(of)) { 364 if (!R_flag && !r_flag && !o_stdout) 365 unlink(path); 366 goto failure; 367 } 368 369 /* need to close the file before setting mtime */ 370 if (of != stdout) { 371 fclose(of); 372 of = NULL; 373 } 374 375 /* Set mtime of local file */ 376 if (m_flag && us.size != -1 && !o_stdout) { 377 struct timeval tv[2]; 378 379 tv[0].tv_sec = (long)us.atime; 380 tv[1].tv_sec = (long)us.mtime; 381 tv[0].tv_usec = tv[1].tv_usec = 0; 382 if (utimes(path, tv)) 383 warn("%s: utimes()", path); 384 } 385 386 success: 387 r = 0; 388 goto done; 389 failure: 390 r = -1; 391 goto done; 392 done: 393 if (f) 394 fclose(f); 395 if (of && of != stdout) 396 fclose(of); 397 fetchFreeURL(url); 398 return r; 399} 400 401void 402usage(void) 403{ 404 /* XXX badly out of synch */ 405 fprintf(stderr, 406 "Usage: fetch [-1AFHMPRabdlmnpqrstv] [-o outputfile] [-S bytes]\n" 407 " [-B bytes] [-T seconds] [-w seconds]\n" 408 " [-f file -h host [-c dir] | URL ...]\n" 409 ); 410} 411 412 413#define PARSENUM(NAME, TYPE) \ 414int \ 415NAME(char *s, TYPE *v) \ 416{ \ 417 *v = 0; \ 418 for (*v = 0; *s; s++) \ 419 if (isdigit(*s)) \ 420 *v = *v * 10 + *s - '0'; \ 421 else \ 422 return -1; \ 423 return 0; \ 424} 425 426PARSENUM(parseint, u_int) 427PARSENUM(parsesize, size_t) 428PARSENUM(parseoff, off_t) 429 430int 431main(int argc, char *argv[]) 432{ 433 struct stat sb; 434 char *p, *q, *s; 435 int c, e, r; 436 437 while ((c = getopt(argc, argv, 438 "146AaB:bc:dFf:h:lHMmnPpo:qRrS:sT:tvw:")) != EOF) 439 switch (c) { 440 case '1': 441 once_flag = 1; 442 break; 443 case '4': 444 family = PF_INET; 445 break; 446 case '6': 447 family = PF_INET6; 448 break; 449 case 'A': 450 A_flag = 1; 451 break; 452 case 'a': 453 a_flag = 1; 454 break; 455 case 'B': 456 if (parsesize(optarg, &B_size) == -1) 457 errx(1, "invalid buffer size"); 458 break; 459 case 'b': 460 warnx("warning: the -b option is deprecated"); 461 b_flag = 1; 462 break; 463 case 'c': 464 c_dirname = optarg; 465 break; 466 case 'd': 467 d_flag = 1; 468 break; 469 case 'F': 470 F_flag = 1; 471 break; 472 case 'f': 473 f_filename = optarg; 474 break; 475 case 'H': 476 H_flag = 1; 477 break; 478 case 'h': 479 h_hostname = optarg; 480 break; 481 case 'l': 482 l_flag = 1; 483 break; 484 case 'o': 485 o_flag = 1; 486 o_filename = optarg; 487 break; 488 case 'M': 489 case 'm': 490 m_flag = 1; 491 break; 492 case 'n': 493 m_flag = 0; 494 break; 495 case 'P': 496 case 'p': 497 p_flag = 1; 498 break; 499 case 'q': 500 v_level = 0; 501 break; 502 case 'R': 503 R_flag = 1; 504 break; 505 case 'r': 506 r_flag = 1; 507 break; 508 case 'S': 509 if (parseoff(optarg, &S_size) == -1) 510 errx(1, "invalid size"); 511 break; 512 case 's': 513 s_flag = 1; 514 break; 515 case 'T': 516 if (parseint(optarg, &T_secs) == -1) 517 errx(1, "invalid timeout"); 518 break; 519 case 't': 520 t_flag = 1; 521 warnx("warning: the -t option is deprecated"); 522 break; 523 case 'v': 524 v_level++; 525 break; 526 case 'w': 527 a_flag = 1; 528 if (parseint(optarg, &w_secs) == -1) 529 errx(1, "invalid delay"); 530 break; 531 default: 532 usage(); 533 exit(EX_USAGE); 534 } 535 536 argc -= optind; 537 argv += optind; 538 539 if (h_hostname || f_filename || c_dirname) { 540 if (!h_hostname || !f_filename || argc) { 541 usage(); 542 exit(EX_USAGE); 543 } 544 /* XXX this is a hack. */ 545 if (strcspn(h_hostname, "@:/") != strlen(h_hostname)) 546 errx(1, "invalid hostname"); 547 if (asprintf(argv, "ftp://%s/%s/%s", h_hostname, 548 c_dirname ? c_dirname : "", f_filename) == -1) 549 errx(1, strerror(ENOMEM)); 550 argc++; 551 } 552 553 if (!argc) { 554 usage(); 555 exit(EX_USAGE); 556 } 557 558 /* allocate buffer */ 559 if (B_size < MINBUFSIZE) 560 B_size = MINBUFSIZE; 561 if ((buf = malloc(B_size)) == NULL) 562 errx(1, strerror(ENOMEM)); 563 564 /* timeout handling */ 565 signal(SIGALRM, sig_handler); 566 if ((s = getenv("FTP_TIMEOUT")) != NULL) { 567 if (parseint(s, &ftp_timeout) == -1) { 568 warnx("FTP_TIMEOUT is not a positive integer"); 569 ftp_timeout = 0; 570 } 571 } 572 if ((s = getenv("HTTP_TIMEOUT")) != NULL) { 573 if (parseint(s, &http_timeout) == -1) { 574 warnx("HTTP_TIMEOUT is not a positive integer"); 575 http_timeout = 0; 576 } 577 } 578 579 /* output file */ 580 if (o_flag) { 581 if (strcmp(o_filename, "-") == 0) { 582 o_stdout = 1; 583 } else if (stat(o_filename, &sb) == -1) { 584 if (errno == ENOENT) { 585 if (argc > 1) 586 errx(EX_USAGE, "%s is not a directory", o_filename); 587 } else { 588 err(EX_IOERR, "%s", o_filename); 589 } 590 } else { 591 if (sb.st_mode & S_IFDIR) 592 o_directory = 1; 593 } 594 } 595 596 /* check if output is to a tty (for progress report) */ 597 v_tty = isatty(STDOUT_FILENO); 598 r = 0; 599 600 while (argc) { 601 if ((p = strrchr(*argv, '/')) == NULL) 602 p = *argv; 603 else 604 p++; 605 606 if (!*p) 607 p = "fetch.out"; 608 609 fetchLastErrCode = 0; 610 611 if (o_flag) { 612 if (o_stdout) { 613 e = fetch(*argv, "-"); 614 } else if (o_directory) { 615 asprintf(&q, "%s/%s", o_filename, p); 616 e = fetch(*argv, q); 617 free(q); 618 } else { 619 e = fetch(*argv, o_filename); 620 } 621 } else { 622 e = fetch(*argv, p); 623 } 624 625 if (e == 0 && once_flag) 626 exit(0); 627 628 if (e) { 629 r = 1; 630 if ((fetchLastErrCode 631 && fetchLastErrCode != FETCH_UNAVAIL 632 && fetchLastErrCode != FETCH_MOVED 633 && fetchLastErrCode != FETCH_URL 634 && fetchLastErrCode != FETCH_RESOLV 635 && fetchLastErrCode != FETCH_UNKNOWN)) { 636 if (w_secs) { 637 if (v_level) 638 fprintf(stderr, "Waiting %d seconds before retrying\n", w_secs); 639 sleep(w_secs); 640 } 641 if (a_flag) 642 continue; 643 fprintf(stderr, "Skipping %s\n", *argv); 644 } 645 } 646 647 argc--, argv++; 648 } 649 650 exit(r); 651} 652