fetch.c revision 62254
1/*-
2 * Copyright (c) 2000 Dag-Erling Co�dan Sm�rgrav
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 *	$FreeBSD: head/usr.bin/fetch/fetch.c 62254 2000-06-29 10:32:56Z des $
29 */
30
31#include <sys/param.h>
32#include <sys/stat.h>
33#include <sys/socket.h>
34
35#include <ctype.h>
36#include <err.h>
37#include <errno.h>
38#include <stdio.h>
39#include <stdlib.h>
40#include <string.h>
41#include <sysexits.h>
42#include <unistd.h>
43
44#include <fetch.h>
45
46#define MINBUFSIZE	4096
47
48/* Option flags */
49int	 A_flag;	/*    -A: do not follow 302 redirects */
50int	 a_flag;	/*    -a: auto retry */
51size_t	 B_size;	/*    -B: buffer size */
52int	 b_flag;	/*!   -b: workaround TCP bug */
53char    *c_dirname;	/*    -c: remote directory */
54int	 d_flag;	/*    -d: direct connection */
55int	 F_flag;	/*    -F: restart without checking mtime  */
56char	*f_filename;	/*    -f: file to fetch */
57int	 H_flag;	/*    -H: use high port */
58char	*h_hostname;	/*    -h: host to fetch from */
59int	 l_flag;	/*    -l: link rather than copy file: URLs */
60int	 m_flag;	/* -[Mm]: set local timestamp to remote timestamp */
61int	 o_flag;	/*    -o: specify output file */
62int	 o_directory;	/*        output file is a directory */
63char	*o_filename;	/*        name of output file */
64int	 o_stdout;	/*        output file is stdout */
65int	 once_flag;	/*    -1: stop at first successful file */
66int	 p_flag = 1;	/* -[Pp]: use passive FTP */
67int	 R_flag;	/*    -R: don't delete partially transferred files */
68int	 r_flag;	/*    -r: restart previously interrupted transfer */
69u_int	 T_secs = 0;	/*    -T: transfer timeout in seconds */
70int	 s_flag;        /*    -s: show size, don't fetch */
71off_t	 S_size;        /*    -S: require size to match */
72int	 t_flag;	/*!   -t: workaround TCP bug */
73int	 v_level = 1;	/*    -v: verbosity level */
74int	 v_tty;		/*        stdout is a tty */
75u_int	 w_secs;	/*    -w: retry delay */
76int	 family = PF_UNSPEC;	/* -[46]: address family to use */
77
78
79u_int	 ftp_timeout;	/* default timeout for FTP transfers */
80u_int	 http_timeout;	/* default timeout for HTTP transfers */
81u_char	*buf;		/* transfer buffer */
82
83
84void
85sig_handler(int sig)
86{
87    errx(1, "Transfer timed out");
88}
89
90struct xferstat {
91    char		 name[40];
92    struct timeval	 start;
93    struct timeval	 end;
94    struct timeval	 last;
95    off_t		 size;
96    off_t		 offset;
97    off_t		 rcvd;
98};
99
100void
101stat_start(struct xferstat *xs, char *name, off_t size, off_t offset)
102{
103    snprintf(xs->name, sizeof xs->name, "%s", name);
104    xs->size = size;
105    xs->offset = offset;
106    if (v_level) {
107	fprintf(stderr, "Receiving %s", xs->name);
108	if (xs->size != -1)
109	    fprintf(stderr, " (%lld bytes)", xs->size - xs->offset);
110    }
111    gettimeofday(&xs->start, NULL);
112    xs->last = xs->start;
113}
114
115void
116stat_update(struct xferstat *xs, off_t rcvd)
117{
118    struct timeval now;
119
120    xs->rcvd = rcvd;
121
122    if (v_level <= 1 || !v_tty)
123	return;
124
125    gettimeofday(&now, NULL);
126    if (now.tv_sec <= xs->last.tv_sec)
127	return;
128    xs->last = now;
129
130    fprintf(stderr, "\rReceiving %s", xs->name);
131    if (xs->size == -1)
132	fprintf(stderr, ": %lld bytes", xs->rcvd - xs->offset);
133    else
134	fprintf(stderr, " (%lld bytes): %d%%", xs->size - xs->offset,
135		(int)((100.0 * xs->rcvd) / (xs->size - xs->offset)));
136}
137
138void
139stat_end(struct xferstat *xs)
140{
141    double delta;
142    double bps;
143
144    gettimeofday(&xs->end, NULL);
145
146    if (!v_level)
147	return;
148
149    fputc('\n', stderr);
150    delta = (xs->end.tv_sec + (xs->end.tv_usec / 1.e6))
151	- (xs->start.tv_sec + (xs->start.tv_usec / 1.e6));
152    fprintf(stderr, "%lld bytes transferred in %.1f seconds ",
153	    xs->size - xs->offset, delta);
154    bps = (xs->size - xs->offset) / delta;
155    if (bps > 1024*1024)
156	fprintf(stderr, "(%.2f MBps)\n", bps / (1024*1024));
157    else if (bps > 1024)
158	fprintf(stderr, "(%.2f kBps)\n", bps / 1024);
159    else
160	fprintf(stderr, "(%.2f Bps)\n", bps);
161}
162
163int
164fetch(char *URL, char *path)
165{
166    struct url *url;
167    struct url_stat us;
168    struct stat sb;
169    struct xferstat xs;
170    FILE *f, *of;
171    size_t size;
172    off_t count;
173    char flags[8];
174    int ch, n, r;
175    u_int timeout;
176
177    f = of = NULL;
178
179    /* parse URL */
180    if ((url = fetchParseURL(URL)) == NULL) {
181	warnx("%s: parse error", URL);
182	goto failure;
183    }
184
185    timeout = 0;
186    *flags = 0;
187
188    /* common flags */
189    if (v_level > 2)
190	strcat(flags, "v");
191    switch (family) {
192    case PF_INET:
193	strcat(flags, "4");
194	break;
195    case PF_INET6:
196	strcat(flags, "6");
197	break;
198    }
199
200    /* FTP specific flags */
201    if (strcmp(url->scheme, "ftp") == 0) {
202	if (p_flag)
203	    strcat(flags, "p");
204	if (d_flag)
205	    strcat(flags, "d");
206	if (H_flag)
207	    strcat(flags, "h");
208	timeout = T_secs ? T_secs : ftp_timeout;
209    }
210
211    /* HTTP specific flags */
212    if (strcmp(url->scheme, "http") == 0) {
213	if (d_flag)
214	    strcat(flags, "d");
215	if (A_flag)
216	    strcat(flags, "A");
217	timeout = T_secs ? T_secs : http_timeout;
218    }
219
220    /*
221     * Set the protocol timeout.
222     * This currently only works for FTP, so we still use
223     * alarm(timeout) further down.
224     */
225    fetchTimeout = timeout;
226
227    /* stat remote file */
228    alarm(timeout);
229    if (fetchStat(url, &us, flags) == -1)
230	warnx("%s: size not known", path);
231    alarm(timeout);
232
233    /* just print size */
234    if (s_flag) {
235	if (us.size == -1)
236	    printf("Unknown\n");
237	else
238	    printf("%lld\n", us.size);
239	goto success;
240    }
241
242    /* check that size is as expected */
243    if (S_size && us.size != -1 && us.size != S_size) {
244	warnx("%s: size mismatch: expected %lld, actual %lld",
245	      path, S_size, us.size);
246	goto failure;
247    }
248
249    /* symlink instead of copy */
250    if (l_flag && strcmp(url->scheme, "file") == 0 && !o_stdout) {
251	if (symlink(url->doc, path) == -1) {
252	    warn("%s: symlink()", path);
253	    goto failure;
254	}
255	goto success;
256    }
257
258    if (o_stdout) {
259	/* output to stdout */
260	of = stdout;
261    } else if (r_flag && us.size != -1 && stat(path, &sb) != -1
262	       && (F_flag || (us.mtime && sb.st_mtime == us.mtime))) {
263	/* output to file, restart aborted transfer */
264	if (us.size == sb.st_size)
265	    goto success;
266	else if (sb.st_size > us.size && truncate(path, us.size) == -1) {
267	    warn("%s: truncate()", path);
268	    goto failure;
269	}
270	if ((of = fopen(path, "a")) == NULL) {
271	    warn("%s: open()", path);
272	    goto failure;
273	}
274	url->offset = sb.st_size;
275    } else if (m_flag && us.size != -1 && stat(path, &sb) != -1) {
276	/* output to file, mirror mode */
277	warnx(" local: %lld bytes, mtime %ld", sb.st_size, sb.st_mtime);
278	warnx("remote: %lld bytes, mtime %ld", us.size, us.mtime);
279	if (sb.st_size == us.size && sb.st_mtime == us.mtime)
280	    return 0;
281	if ((of = fopen(path, "w")) == NULL) {
282	    warn("%s: open()", path);
283	    goto failure;
284	}
285    } else {
286	/* output to file, all other cases */
287	if ((of = fopen(path, "w")) == NULL) {
288	    warn("%s: open()", path);
289	    goto failure;
290	}
291    }
292    count = url->offset;
293
294    /* start the transfer */
295    if ((f = fetchGet(url, flags)) == NULL) {
296	warnx("%s", fetchLastErrString);
297	if (!R_flag && !r_flag && !o_stdout)
298	    unlink(path);
299	goto failure;
300    }
301
302    /* start the counter */
303    stat_start(&xs, path, us.size, count);
304
305    n = 0;
306
307    if (us.size == -1) {
308	/*
309	 * We have no idea how much data to expect, so do it byte by
310         * byte. This is incredibly inefficient, but there's not much
311         * we can do about it... :(
312	 */
313	while (1) {
314	    if (timeout)
315		alarm(timeout);
316#ifdef STDIO_HACK
317	    /*
318	     * This is a non-portable hack, but it makes things go
319	     * faster. Basically, if there is data in the input file's
320	     * buffer, write it out; then fall through to the fgetc()
321	     * which forces a refill. It saves a memcpy() and reduces
322	     * the number of iterations, i.e the number of calls to
323	     * alarm(). Empirical evidence shows this can cut user
324	     * time by up to 90%. There may be better (even portable)
325	     * ways to do this.
326	     */
327	    if (f->_r && (f->_ub._base == NULL)) {
328		if (fwrite(f->_p, f->_r, 1, of) < 1)
329		    break;
330		count += f->_r;
331		f->_p += f->_r;
332		f->_r = 0;
333	    }
334#endif
335	    if ((ch = fgetc(f)) == EOF || fputc(ch, of) == EOF)
336		break;
337	    stat_update(&xs, count++);
338	    n++;
339	}
340    } else {
341	/* we know exactly how much to transfer, so do it efficiently */
342	for (size = B_size; count != us.size; n++) {
343	    if (us.size - count < B_size)
344		size = us.size - count;
345	    if (timeout)
346		alarm(timeout);
347	    if (fread(buf, size, 1, f) != 1 || fwrite(buf, size, 1, of) != 1)
348		break;
349	    stat_update(&xs, count += size);
350	}
351    }
352
353    if (timeout)
354	alarm(0);
355
356    stat_end(&xs);
357
358    /* check the status of our files */
359    if (ferror(f))
360	warn("%s", URL);
361    if (ferror(of))
362	warn("%s", path);
363    if (ferror(f) || ferror(of)) {
364	if (!R_flag && !r_flag && !o_stdout)
365	    unlink(path);
366	goto failure;
367    }
368
369    /* need to close the file before setting mtime */
370    if (of != stdout) {
371	fclose(of);
372	of = NULL;
373    }
374
375    /* Set mtime of local file */
376    if (m_flag && us.size != -1 && !o_stdout) {
377	struct timeval tv[2];
378
379	tv[0].tv_sec = (long)us.atime;
380	tv[1].tv_sec = (long)us.mtime;
381	tv[0].tv_usec = tv[1].tv_usec = 0;
382	if (utimes(path, tv))
383	    warn("%s: utimes()", path);
384    }
385
386 success:
387    r = 0;
388    goto done;
389 failure:
390    r = -1;
391    goto done;
392 done:
393    if (f)
394	fclose(f);
395    if (of && of != stdout)
396	fclose(of);
397    fetchFreeURL(url);
398    return r;
399}
400
401void
402usage(void)
403{
404    /* XXX badly out of synch */
405    fprintf(stderr,
406	    "Usage: fetch [-1AFHMPRabdlmnpqrstv] [-o outputfile] [-S bytes]\n"
407	    "             [-B bytes] [-T seconds] [-w seconds]\n"
408	    "             [-f file -h host [-c dir] | URL ...]\n"
409	);
410}
411
412
413#define PARSENUM(NAME, TYPE)		\
414int					\
415NAME(char *s, TYPE *v)			\
416{					\
417    *v = 0;				\
418    for (*v = 0; *s; s++)		\
419	if (isdigit(*s))		\
420	    *v = *v * 10 + *s - '0';	\
421	else				\
422	    return -1;			\
423    return 0;				\
424}
425
426PARSENUM(parseint, u_int)
427PARSENUM(parsesize, size_t)
428PARSENUM(parseoff, off_t)
429
430int
431main(int argc, char *argv[])
432{
433    struct stat sb;
434    char *p, *q, *s;
435    int c, e, r;
436
437    while ((c = getopt(argc, argv,
438		       "146AaB:bc:dFf:h:lHMmnPpo:qRrS:sT:tvw:")) != EOF)
439	switch (c) {
440	case '1':
441	    once_flag = 1;
442	    break;
443	case '4':
444	    family = PF_INET;
445	    break;
446	case '6':
447	    family = PF_INET6;
448	    break;
449	case 'A':
450	    A_flag = 1;
451	    break;
452	case 'a':
453	    a_flag = 1;
454	    break;
455	case 'B':
456	    if (parsesize(optarg, &B_size) == -1)
457		errx(1, "invalid buffer size");
458	    break;
459	case 'b':
460	    warnx("warning: the -b option is deprecated");
461	    b_flag = 1;
462	    break;
463	case 'c':
464	    c_dirname = optarg;
465	    break;
466	case 'd':
467	    d_flag = 1;
468	    break;
469	case 'F':
470	    F_flag = 1;
471	    break;
472	case 'f':
473	    f_filename = optarg;
474	    break;
475	case 'H':
476	    H_flag = 1;
477	    break;
478	case 'h':
479	    h_hostname = optarg;
480	    break;
481	case 'l':
482	    l_flag = 1;
483	    break;
484	case 'o':
485	    o_flag = 1;
486	    o_filename = optarg;
487	    break;
488	case 'M':
489	case 'm':
490	    m_flag = 1;
491	    break;
492	case 'n':
493	    m_flag = 0;
494	    break;
495	case 'P':
496	case 'p':
497	    p_flag = 1;
498	    break;
499	case 'q':
500	    v_level = 0;
501	    break;
502	case 'R':
503	    R_flag = 1;
504	    break;
505	case 'r':
506	    r_flag = 1;
507	    break;
508	case 'S':
509	    if (parseoff(optarg, &S_size) == -1)
510		errx(1, "invalid size");
511	    break;
512	case 's':
513	    s_flag = 1;
514	    break;
515	case 'T':
516	    if (parseint(optarg, &T_secs) == -1)
517		errx(1, "invalid timeout");
518	    break;
519	case 't':
520	    t_flag = 1;
521	    warnx("warning: the -t option is deprecated");
522	    break;
523	case 'v':
524	    v_level++;
525	    break;
526	case 'w':
527	    a_flag = 1;
528	    if (parseint(optarg, &w_secs) == -1)
529		errx(1, "invalid delay");
530	    break;
531	default:
532	    usage();
533	    exit(EX_USAGE);
534	}
535
536    argc -= optind;
537    argv += optind;
538
539    if (h_hostname || f_filename || c_dirname) {
540	if (!h_hostname || !f_filename || argc) {
541	    usage();
542	    exit(EX_USAGE);
543	}
544	/* XXX this is a hack. */
545	if (strcspn(h_hostname, "@:/") != strlen(h_hostname))
546	    errx(1, "invalid hostname");
547	if (asprintf(argv, "ftp://%s/%s/%s", h_hostname,
548		     c_dirname ? c_dirname : "", f_filename) == -1)
549	    errx(1, strerror(ENOMEM));
550	argc++;
551    }
552
553    if (!argc) {
554	usage();
555	exit(EX_USAGE);
556    }
557
558    /* allocate buffer */
559    if (B_size < MINBUFSIZE)
560	B_size = MINBUFSIZE;
561    if ((buf = malloc(B_size)) == NULL)
562	errx(1, strerror(ENOMEM));
563
564    /* timeout handling */
565    signal(SIGALRM, sig_handler);
566    if ((s = getenv("FTP_TIMEOUT")) != NULL) {
567	if (parseint(s, &ftp_timeout) == -1) {
568	    warnx("FTP_TIMEOUT is not a positive integer");
569	    ftp_timeout = 0;
570	}
571    }
572    if ((s = getenv("HTTP_TIMEOUT")) != NULL) {
573	if (parseint(s, &http_timeout) == -1) {
574	    warnx("HTTP_TIMEOUT is not a positive integer");
575	    http_timeout = 0;
576	}
577    }
578
579    /* output file */
580    if (o_flag) {
581	if (strcmp(o_filename, "-") == 0) {
582	    o_stdout = 1;
583	} else if (stat(o_filename, &sb) == -1) {
584	    if (errno == ENOENT) {
585		if (argc > 1)
586		    errx(EX_USAGE, "%s is not a directory", o_filename);
587	    } else {
588		err(EX_IOERR, "%s", o_filename);
589	    }
590	} else {
591	    if (sb.st_mode & S_IFDIR)
592		o_directory = 1;
593	}
594    }
595
596    /* check if output is to a tty (for progress report) */
597    v_tty = isatty(STDOUT_FILENO);
598    r = 0;
599
600    while (argc) {
601	if ((p = strrchr(*argv, '/')) == NULL)
602	    p = *argv;
603	else
604	    p++;
605
606	if (!*p)
607	    p = "fetch.out";
608
609	fetchLastErrCode = 0;
610
611	if (o_flag) {
612	    if (o_stdout) {
613		e = fetch(*argv, "-");
614	    } else if (o_directory) {
615		asprintf(&q, "%s/%s", o_filename, p);
616		e = fetch(*argv, q);
617		free(q);
618	    } else {
619		e = fetch(*argv, o_filename);
620	    }
621	} else {
622	    e = fetch(*argv, p);
623	}
624
625	if (e == 0 && once_flag)
626	    exit(0);
627
628	if (e) {
629	    r = 1;
630	    if ((fetchLastErrCode
631		 && fetchLastErrCode != FETCH_UNAVAIL
632		 && fetchLastErrCode != FETCH_MOVED
633		 && fetchLastErrCode != FETCH_URL
634		 && fetchLastErrCode != FETCH_RESOLV
635		 && fetchLastErrCode != FETCH_UNKNOWN)) {
636		if (w_secs) {
637		    if (v_level)
638			fprintf(stderr, "Waiting %d seconds before retrying\n", w_secs);
639		    sleep(w_secs);
640		}
641		if (a_flag)
642		    continue;
643		fprintf(stderr, "Skipping %s\n", *argv);
644	    }
645	}
646
647	argc--, argv++;
648    }
649
650    exit(r);
651}
652