1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright 2005 Colin Percival
5 * All rights reserved
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted providing that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/types.h>
30#include <sys/time.h>
31#include <sys/socket.h>
32
33#include <ctype.h>
34#include <err.h>
35#include <errno.h>
36#include <fcntl.h>
37#include <limits.h>
38#include <netdb.h>
39#include <stdint.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43#include <sysexits.h>
44#include <unistd.h>
45
46static const char *	env_HTTP_PROXY;
47static char *		env_HTTP_PROXY_AUTH;
48static const char *	env_HTTP_USER_AGENT;
49static char *		env_HTTP_TIMEOUT;
50static const char *	proxyport;
51static char *		proxyauth;
52
53static struct timeval	timo = { 15, 0};
54
55static void
56usage(void)
57{
58
59	fprintf(stderr, "usage: phttpget server [file ...]\n");
60	exit(EX_USAGE);
61}
62
63/*
64 * Base64 encode a string; the string returned, if non-NULL, is
65 * allocated using malloc() and must be freed by the caller.
66 */
67static char *
68b64enc(const char *ptext)
69{
70	static const char base64[] =
71	    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
72	    "abcdefghijklmnopqrstuvwxyz"
73	    "0123456789+/";
74	const char *pt;
75	char *ctext, *pc;
76	size_t ptlen, ctlen;
77	uint32_t t;
78	unsigned int j;
79
80	/*
81	 * Encoded length is 4 characters per 3-byte block or partial
82	 * block of plaintext, plus one byte for the terminating NUL
83	 */
84	ptlen = strlen(ptext);
85	if (ptlen > ((SIZE_MAX - 1) / 4) * 3 - 2)
86		return NULL;	/* Possible integer overflow */
87	ctlen = 4 * ((ptlen + 2) / 3) + 1;
88	if ((ctext = malloc(ctlen)) == NULL)
89		return NULL;
90	ctext[ctlen - 1] = 0;
91
92	/*
93	 * Scan through ptext, reading up to 3 bytes from ptext and
94	 * writing 4 bytes to ctext, until we run out of input.
95	 */
96	for (pt = ptext, pc = ctext; ptlen; ptlen -= 3, pc += 4) {
97		/* Read 3 bytes */
98		for (t = j = 0; j < 3; j++) {
99			t <<= 8;
100			if (j < ptlen)
101				t += *pt++;
102		}
103
104		/* Write 4 bytes */
105		for (j = 0; j < 4; j++) {
106			if (j <= ptlen + 1)
107				pc[j] = base64[(t >> 18) & 0x3f];
108			else
109				pc[j] = '=';
110			t <<= 6;
111		}
112
113		/* If we're done, exit the loop */
114		if (ptlen <= 3)
115			break;
116	}
117
118	return (ctext);
119}
120
121static void
122readenv(void)
123{
124	char *proxy_auth_userpass, *proxy_auth_userpass64, *p;
125	char *proxy_auth_user = NULL;
126	char *proxy_auth_pass = NULL;
127	long http_timeout;
128
129	env_HTTP_PROXY = getenv("HTTP_PROXY");
130	if (env_HTTP_PROXY == NULL)
131		env_HTTP_PROXY = getenv("http_proxy");
132	if (env_HTTP_PROXY != NULL) {
133		if (strncmp(env_HTTP_PROXY, "http://", 7) == 0)
134			env_HTTP_PROXY += 7;
135		p = strchr(env_HTTP_PROXY, '/');
136		if (p != NULL)
137			*p = 0;
138		p = strchr(env_HTTP_PROXY, ':');
139		if (p != NULL) {
140			*p = 0;
141			proxyport = p + 1;
142		} else
143			proxyport = "3128";
144	}
145
146	env_HTTP_PROXY_AUTH = getenv("HTTP_PROXY_AUTH");
147	if ((env_HTTP_PROXY != NULL) &&
148	    (env_HTTP_PROXY_AUTH != NULL) &&
149	    (strncasecmp(env_HTTP_PROXY_AUTH, "basic:" , 6) == 0)) {
150		/* Ignore authentication scheme */
151		(void) strsep(&env_HTTP_PROXY_AUTH, ":");
152
153		/* Ignore realm */
154		(void) strsep(&env_HTTP_PROXY_AUTH, ":");
155
156		/* Obtain username and password */
157		proxy_auth_user = strsep(&env_HTTP_PROXY_AUTH, ":");
158		proxy_auth_pass = env_HTTP_PROXY_AUTH;
159	}
160
161	if ((proxy_auth_user != NULL) && (proxy_auth_pass != NULL)) {
162		asprintf(&proxy_auth_userpass, "%s:%s",
163		    proxy_auth_user, proxy_auth_pass);
164		if (proxy_auth_userpass == NULL)
165			err(1, "asprintf");
166
167		proxy_auth_userpass64 = b64enc(proxy_auth_userpass);
168		if (proxy_auth_userpass64 == NULL)
169			err(1, "malloc");
170
171		asprintf(&proxyauth, "Proxy-Authorization: Basic %s\r\n",
172		    proxy_auth_userpass64);
173		if (proxyauth == NULL)
174			err(1, "asprintf");
175
176		free(proxy_auth_userpass);
177		free(proxy_auth_userpass64);
178	} else
179		proxyauth = NULL;
180
181	env_HTTP_USER_AGENT = getenv("HTTP_USER_AGENT");
182	if (env_HTTP_USER_AGENT == NULL)
183		env_HTTP_USER_AGENT = "phttpget/0.1";
184
185	env_HTTP_TIMEOUT = getenv("HTTP_TIMEOUT");
186	if (env_HTTP_TIMEOUT != NULL) {
187		http_timeout = strtol(env_HTTP_TIMEOUT, &p, 10);
188		if ((*env_HTTP_TIMEOUT == '\0') || (*p != '\0') ||
189		    (http_timeout < 0))
190			warnx("HTTP_TIMEOUT (%s) is not a positive integer",
191			    env_HTTP_TIMEOUT);
192		else
193			timo.tv_sec = http_timeout;
194	}
195}
196
197static int
198makerequest(char ** buf, char * path, char * server, int connclose)
199{
200	int buflen;
201
202	buflen = asprintf(buf,
203	    "GET %s%s/%s HTTP/1.1\r\n"
204	    "Host: %s\r\n"
205	    "User-Agent: %s\r\n"
206	    "%s"
207	    "%s"
208	    "\r\n",
209	    env_HTTP_PROXY ? "http://" : "",
210	    env_HTTP_PROXY ? server : "",
211	    path, server, env_HTTP_USER_AGENT,
212	    proxyauth ? proxyauth : "",
213	    connclose ? "Connection: Close\r\n" : "Connection: Keep-Alive\r\n");
214	if (buflen == -1)
215		err(1, "asprintf");
216	return(buflen);
217}
218
219static int
220readln(int sd, char * resbuf, int * resbuflen, int * resbufpos)
221{
222	ssize_t len;
223
224	while (strnstr(resbuf + *resbufpos, "\r\n",
225	    *resbuflen - *resbufpos) == NULL) {
226		/* Move buffered data to the start of the buffer */
227		if (*resbufpos != 0) {
228			memmove(resbuf, resbuf + *resbufpos,
229			    *resbuflen - *resbufpos);
230			*resbuflen -= *resbufpos;
231			*resbufpos = 0;
232		}
233
234		/* If the buffer is full, complain */
235		if (*resbuflen == BUFSIZ)
236			return -1;
237
238		/* Read more data into the buffer */
239		len = recv(sd, resbuf + *resbuflen, BUFSIZ - *resbuflen, 0);
240		if ((len == 0) ||
241		    ((len == -1) && (errno != EINTR)))
242			return -1;
243
244		if (len != -1)
245			*resbuflen += len;
246	}
247
248	return 0;
249}
250
251static int
252copybytes(int sd, int fd, off_t copylen, char * resbuf, int * resbuflen,
253    int * resbufpos)
254{
255	ssize_t len;
256
257	while (copylen) {
258		/* Write data from resbuf to fd */
259		len = *resbuflen - *resbufpos;
260		if (copylen < len)
261			len = copylen;
262		if (len > 0) {
263			if (fd != -1)
264				len = write(fd, resbuf + *resbufpos, len);
265			if (len == -1)
266				err(1, "write");
267			*resbufpos += len;
268			copylen -= len;
269			continue;
270		}
271
272		/* Read more data into buffer */
273		len = recv(sd, resbuf, BUFSIZ, 0);
274		if (len == -1) {
275			if (errno == EINTR)
276				continue;
277			return -1;
278		} else if (len == 0) {
279			return -2;
280		} else {
281			*resbuflen = len;
282			*resbufpos = 0;
283		}
284	}
285
286	return 0;
287}
288
289int
290main(int argc, char *argv[])
291{
292	struct addrinfo hints;	/* Hints to getaddrinfo */
293	struct addrinfo *res;	/* Pointer to server address being used */
294	struct addrinfo *res0;	/* Pointer to server addresses */
295	char * resbuf = NULL;	/* Response buffer */
296	int resbufpos = 0;	/* Response buffer position */
297	int resbuflen = 0;	/* Response buffer length */
298	char * eolp;		/* Pointer to "\r\n" within resbuf */
299	char * hln;		/* Pointer within header line */
300	char * servername;	/* Name of server */
301	char * fname = NULL;	/* Name of downloaded file */
302	char * reqbuf = NULL;	/* Request buffer */
303	int reqbufpos = 0;	/* Request buffer position */
304	int reqbuflen = 0;	/* Request buffer length */
305	ssize_t len;		/* Length sent or received */
306	int nreq = 0;		/* Number of next request to send */
307	int nres = 0;		/* Number of next reply to receive */
308	int pipelined = 0;	/* != 0 if connection in pipelined mode. */
309	int keepalive;		/* != 0 if HTTP/1.0 keep-alive rcvd. */
310	int sd = -1;		/* Socket descriptor */
311	int sdflags = 0;	/* Flags on the socket sd */
312	int fd = -1;		/* Descriptor for download target file */
313	int error;		/* Error code */
314	int statuscode;		/* HTTP Status code */
315	off_t contentlength;	/* Value from Content-Length header */
316	int chunked;		/* != if transfer-encoding is chunked */
317	off_t clen;		/* Chunk length */
318	int firstreq = 0;	/* # of first request for this connection */
319	int val;		/* Value used for setsockopt call */
320
321	/* Check that the arguments are sensible */
322	if (argc < 2)
323		usage();
324
325	/* Read important environment variables */
326	readenv();
327
328	/* Get server name and adjust arg[cv] to point at file names */
329	servername = argv[1];
330	argv += 2;
331	argc -= 2;
332
333	/* Allocate response buffer */
334	resbuf = malloc(BUFSIZ);
335	if (resbuf == NULL)
336		err(1, "malloc");
337
338	/* Look up server */
339	memset(&hints, 0, sizeof(hints));
340	hints.ai_family = PF_UNSPEC;
341	hints.ai_socktype = SOCK_STREAM;
342	error = getaddrinfo(env_HTTP_PROXY ? env_HTTP_PROXY : servername,
343	    env_HTTP_PROXY ? proxyport : "http", &hints, &res0);
344	if (error)
345		errx(1, "host = %s, port = %s: %s",
346		    env_HTTP_PROXY ? env_HTTP_PROXY : servername,
347		    env_HTTP_PROXY ? proxyport : "http",
348		    gai_strerror(error));
349	if (res0 == NULL)
350		errx(1, "could not look up %s", servername);
351	res = res0;
352
353	/* Do the fetching */
354	while (nres < argc) {
355		/* Make sure we have a connected socket */
356		for (; sd == -1; res = res->ai_next) {
357			/* No addresses left to try :-( */
358			if (res == NULL)
359				errx(1, "Could not connect to %s", servername);
360
361			/* Create a socket... */
362			sd = socket(res->ai_family, res->ai_socktype,
363			    res->ai_protocol);
364			if (sd == -1)
365				continue;
366
367			/* ... set 15-second timeouts ... */
368			setsockopt(sd, SOL_SOCKET, SO_SNDTIMEO,
369			    (void *)&timo, (socklen_t)sizeof(timo));
370			setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO,
371			    (void *)&timo, (socklen_t)sizeof(timo));
372
373			/* ... disable SIGPIPE generation ... */
374			val = 1;
375			setsockopt(sd, SOL_SOCKET, SO_NOSIGPIPE,
376			    (void *)&val, sizeof(int));
377
378			/* ... and connect to the server. */
379			if(connect(sd, res->ai_addr, res->ai_addrlen)) {
380				close(sd);
381				sd = -1;
382				continue;
383			}
384
385			firstreq = nres;
386		}
387
388		/*
389		 * If in pipelined HTTP mode, put socket into non-blocking
390		 * mode, since we're probably going to want to try to send
391		 * several HTTP requests.
392		 */
393		if (pipelined) {
394			sdflags = fcntl(sd, F_GETFL);
395			if (fcntl(sd, F_SETFL, sdflags | O_NONBLOCK) == -1)
396				err(1, "fcntl");
397		}
398
399		/* Construct requests and/or send them without blocking */
400		while ((nreq < argc) && ((reqbuf == NULL) || pipelined)) {
401			/* If not in the middle of a request, make one */
402			if (reqbuf == NULL) {
403				reqbuflen = makerequest(&reqbuf, argv[nreq],
404				    servername, (nreq == argc - 1));
405				reqbufpos = 0;
406			}
407
408			/* If in pipelined mode, try to send the request */
409			if (pipelined) {
410				while (reqbufpos < reqbuflen) {
411					len = send(sd, reqbuf + reqbufpos,
412					    reqbuflen - reqbufpos, 0);
413					if (len == -1)
414						break;
415					reqbufpos += len;
416				}
417				if (reqbufpos < reqbuflen) {
418					if (errno != EAGAIN)
419						goto conndied;
420					break;
421				} else {
422					free(reqbuf);
423					reqbuf = NULL;
424					nreq++;
425				}
426			}
427		}
428
429		/* Put connection back into blocking mode */
430		if (pipelined) {
431			if (fcntl(sd, F_SETFL, sdflags) == -1)
432				err(1, "fcntl");
433		}
434
435		/* Do we need to blocking-send a request? */
436		if (nres == nreq) {
437			while (reqbufpos < reqbuflen) {
438				len = send(sd, reqbuf + reqbufpos,
439				    reqbuflen - reqbufpos, 0);
440				if (len == -1)
441					goto conndied;
442				reqbufpos += len;
443			}
444			free(reqbuf);
445			reqbuf = NULL;
446			nreq++;
447		}
448
449		/* Scan through the response processing headers. */
450		statuscode = 0;
451		contentlength = -1;
452		chunked = 0;
453		keepalive = 0;
454		do {
455			/* Get a header line */
456			error = readln(sd, resbuf, &resbuflen, &resbufpos);
457			if (error)
458				goto conndied;
459			hln = resbuf + resbufpos;
460			eolp = strnstr(hln, "\r\n", resbuflen - resbufpos);
461			resbufpos = (eolp - resbuf) + 2;
462			*eolp = '\0';
463
464			/* Make sure it doesn't contain a NUL character */
465			if (strchr(hln, '\0') != eolp)
466				goto conndied;
467
468			if (statuscode == 0) {
469				/* The first line MUST be HTTP/1.x xxx ... */
470				if ((strncmp(hln, "HTTP/1.", 7) != 0) ||
471				    ! isdigit(hln[7]))
472					goto conndied;
473
474				/*
475				 * If the minor version number isn't zero,
476				 * then we can assume that pipelining our
477				 * requests is OK -- as long as we don't
478				 * see a "Connection: close" line later
479				 * and we either have a Content-Length or
480				 * Transfer-Encoding: chunked header to
481				 * tell us the length.
482				 */
483				if (hln[7] != '0')
484					pipelined = 1;
485
486				/* Skip over the minor version number */
487				hln = strchr(hln + 7, ' ');
488				if (hln == NULL)
489					goto conndied;
490				else
491					hln++;
492
493				/* Read the status code */
494				while (isdigit(*hln)) {
495					statuscode = statuscode * 10 +
496					    *hln - '0';
497					hln++;
498				}
499
500				if (statuscode < 100 || statuscode > 599)
501					goto conndied;
502
503				/* Ignore the rest of the line */
504				continue;
505			}
506
507			/*
508			 * Check for "Connection: close" or
509			 * "Connection: Keep-Alive" header
510			 */
511			if (strncasecmp(hln, "Connection:", 11) == 0) {
512				hln += 11;
513				if (strcasestr(hln, "close") != NULL)
514					pipelined = 0;
515				if (strcasestr(hln, "Keep-Alive") != NULL)
516					keepalive = 1;
517
518				/* Next header... */
519				continue;
520			}
521
522			/* Check for "Content-Length:" header */
523			if (strncasecmp(hln, "Content-Length:", 15) == 0) {
524				hln += 15;
525				contentlength = 0;
526
527				/* Find the start of the length */
528				while (!isdigit(*hln) && (*hln != '\0'))
529					hln++;
530
531				/* Compute the length */
532				while (isdigit(*hln)) {
533					if (contentlength >= OFF_MAX / 10) {
534						/* Nasty people... */
535						goto conndied;
536					}
537					contentlength = contentlength * 10 +
538					    *hln - '0';
539					hln++;
540				}
541
542				/* Next header... */
543				continue;
544			}
545
546			/* Check for "Transfer-Encoding: chunked" header */
547			if (strncasecmp(hln, "Transfer-Encoding:", 18) == 0) {
548				hln += 18;
549				if (strcasestr(hln, "chunked") != NULL)
550					chunked = 1;
551
552				/* Next header... */
553				continue;
554			}
555
556			/* We blithely ignore any other header lines */
557
558			/* No more header lines */
559			if (strlen(hln) == 0) {
560				/*
561				 * If the status code was 1xx, then there will
562				 * be a real header later.  Servers may emit
563				 * 1xx header blocks at will, but since we
564				 * don't expect one, we should just ignore it.
565				 */
566				if (100 <= statuscode && statuscode <= 199) {
567					statuscode = 0;
568					continue;
569				}
570
571				/* End of header; message body follows */
572				break;
573			}
574		} while (1);
575
576		/* No message body for 204 or 304 */
577		if (statuscode == 204 || statuscode == 304) {
578			nres++;
579			continue;
580		}
581
582		/*
583		 * There should be a message body coming, but we only want
584		 * to send it to a file if the status code is 200
585		 */
586		if (statuscode == 200) {
587			/* Generate a file name for the download */
588			fname = strrchr(argv[nres], '/');
589			if (fname == NULL)
590				fname = argv[nres];
591			else
592				fname++;
593			if (strlen(fname) == 0)
594				errx(1, "Cannot obtain file name from %s\n",
595				    argv[nres]);
596
597			fd = open(fname, O_CREAT | O_TRUNC | O_WRONLY, 0644);
598			if (fd == -1)
599				errx(1, "open(%s)", fname);
600		}
601
602		/* Read the message and send data to fd if appropriate */
603		if (chunked) {
604			/* Handle a chunked-encoded entity */
605
606			/* Read chunks */
607			do {
608				error = readln(sd, resbuf, &resbuflen,
609				    &resbufpos);
610				if (error)
611					goto conndied;
612				hln = resbuf + resbufpos;
613				eolp = strstr(hln, "\r\n");
614				resbufpos = (eolp - resbuf) + 2;
615
616				clen = 0;
617				while (isxdigit(*hln)) {
618					if (clen >= OFF_MAX / 16) {
619						/* Nasty people... */
620						goto conndied;
621					}
622					if (isdigit(*hln))
623						clen = clen * 16 + *hln - '0';
624					else
625						clen = clen * 16 + 10 +
626						    tolower(*hln) - 'a';
627					hln++;
628				}
629
630				error = copybytes(sd, fd, clen, resbuf,
631				    &resbuflen, &resbufpos);
632				if (error) {
633					goto conndied;
634				}
635			} while (clen != 0);
636
637			/* Read trailer and final CRLF */
638			do {
639				error = readln(sd, resbuf, &resbuflen,
640				    &resbufpos);
641				if (error)
642					goto conndied;
643				hln = resbuf + resbufpos;
644				eolp = strstr(hln, "\r\n");
645				resbufpos = (eolp - resbuf) + 2;
646			} while (hln != eolp);
647		} else if (contentlength != -1) {
648			error = copybytes(sd, fd, contentlength, resbuf,
649			    &resbuflen, &resbufpos);
650			if (error)
651				goto conndied;
652		} else {
653			/*
654			 * Not chunked, and no content length header.
655			 * Read everything until the server closes the
656			 * socket.
657			 */
658			error = copybytes(sd, fd, OFF_MAX, resbuf,
659			    &resbuflen, &resbufpos);
660			if (error == -1)
661				goto conndied;
662			pipelined = 0;
663		}
664
665		if (fd != -1) {
666			close(fd);
667			fd = -1;
668		}
669
670		fprintf(stderr, "http://%s/%s: %d ", servername, argv[nres],
671		    statuscode);
672		if (statuscode == 200)
673			fprintf(stderr, "OK\n");
674		else if (statuscode < 300)
675			fprintf(stderr, "Successful (ignored)\n");
676		else if (statuscode < 400)
677			fprintf(stderr, "Redirection (ignored)\n");
678		else
679			fprintf(stderr, "Error (ignored)\n");
680
681		/* We've finished this file! */
682		nres++;
683
684		/*
685		 * If necessary, clean up this connection so that we
686		 * can start a new one.
687		 */
688		if (pipelined == 0 && keepalive == 0)
689			goto cleanupconn;
690		continue;
691
692conndied:
693		/*
694		 * Something went wrong -- our connection died, the server
695		 * sent us garbage, etc.  If this happened on the first
696		 * request we sent over this connection, give up.  Otherwise,
697		 * close this connection, open a new one, and reissue the
698		 * request.
699		 */
700		if (nres == firstreq)
701			errx(1, "Connection failure");
702
703cleanupconn:
704		/*
705		 * Clean up our connection and keep on going
706		 */
707		shutdown(sd, SHUT_RDWR);
708		close(sd);
709		sd = -1;
710		if (fd != -1) {
711			close(fd);
712			fd = -1;
713		}
714		if (reqbuf != NULL) {
715			free(reqbuf);
716			reqbuf = NULL;
717		}
718		nreq = nres;
719		res = res0;
720		pipelined = 0;
721		resbufpos = resbuflen = 0;
722		continue;
723	}
724
725	free(resbuf);
726	freeaddrinfo(res0);
727
728	return 0;
729}
730