1/*	$NetBSD$	*/
2
3/*
4 * Copyright (c) 1988, 1989, 1991, 1994, 1995, 1996, 1997, 1998, 1999, 2000
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that: (1) source code distributions
9 * retain the above copyright notice and this paragraph in its entirety, (2)
10 * distributions including binary code include the above copyright notice and
11 * this paragraph in its entirety in the documentation or other materials
12 * provided with the distribution, and (3) all advertising materials mentioning
13 * features or use of this software display the following acknowledgement:
14 * ``This product includes software developed by the University of California,
15 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
16 * the University nor the names of its contributors may be used to endorse
17 * or promote products derived from this software without specific prior
18 * written permission.
19 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
20 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
22 */
23
24#include <sys/cdefs.h>
25#ifndef lint
26#if 0
27static const char rcsid[] =
28    "@(#)Id: traceroute.c,v 1.68 2000/12/14 08:04:33 leres Exp  (LBL)";
29#else
30__COPYRIGHT("@(#) Copyright (c) 1988, 1989, 1991, 1994, 1995, 1996, 1997,\
31 1998, 1999, 2000\
32 The Regents of the University of California.  All rights reserved.");
33__RCSID("$NetBSD$");
34#endif
35#endif
36
37/*
38 * traceroute host  - trace the route ip packets follow going to "host".
39 *
40 * Attempt to trace the route an ip packet would follow to some
41 * internet host.  We find out intermediate hops by launching probe
42 * packets with a small ttl (time to live) then listening for an
43 * icmp "time exceeded" reply from a gateway.  We start our probes
44 * with a ttl of one and increase by one until we get an icmp "port
45 * unreachable" (which means we got to "host") or hit a max (which
46 * defaults to 30 hops & can be changed with the -m flag).  Three
47 * probes (change with -q flag) are sent at each ttl setting and a
48 * line is printed showing the ttl, address of the gateway and
49 * round trip time of each probe.  If the probe answers come from
50 * different gateways, the address of each responding system will
51 * be printed.  If there is no response within a 5 sec. timeout
52 * interval (changed with the -w flag), a "*" is printed for that
53 * probe.
54 *
55 * Probe packets are UDP format.  We don't want the destination
56 * host to process them so the destination port is set to an
57 * unlikely value (if some clod on the destination is using that
58 * value, it can be changed with the -p flag).
59 *
60 * A sample use might be:
61 *
62 *     [yak 71]% traceroute nis.nsf.net.
63 *     traceroute to nis.nsf.net (35.1.1.48), 30 hops max, 56 byte packet
64 *      1  helios.ee.lbl.gov (128.3.112.1)  19 ms  19 ms  0 ms
65 *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  39 ms  19 ms
66 *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  39 ms  19 ms
67 *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  39 ms  40 ms  39 ms
68 *      5  ccn-nerif22.Berkeley.EDU (128.32.168.22)  39 ms  39 ms  39 ms
69 *      6  128.32.197.4 (128.32.197.4)  40 ms  59 ms  59 ms
70 *      7  131.119.2.5 (131.119.2.5)  59 ms  59 ms  59 ms
71 *      8  129.140.70.13 (129.140.70.13)  99 ms  99 ms  80 ms
72 *      9  129.140.71.6 (129.140.71.6)  139 ms  239 ms  319 ms
73 *     10  129.140.81.7 (129.140.81.7)  220 ms  199 ms  199 ms
74 *     11  nic.merit.edu (35.1.1.48)  239 ms  239 ms  239 ms
75 *
76 * Note that lines 2 & 3 are the same.  This is due to a buggy
77 * kernel on the 2nd hop system -- lbl-csam.arpa -- that forwards
78 * packets with a zero ttl.
79 *
80 * A more interesting example is:
81 *
82 *     [yak 72]% traceroute allspice.lcs.mit.edu.
83 *     traceroute to allspice.lcs.mit.edu (18.26.0.115), 30 hops max
84 *      1  helios.ee.lbl.gov (128.3.112.1)  0 ms  0 ms  0 ms
85 *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  19 ms  19 ms  19 ms
86 *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  19 ms  19 ms
87 *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  19 ms  39 ms  39 ms
88 *      5  ccn-nerif22.Berkeley.EDU (128.32.168.22)  20 ms  39 ms  39 ms
89 *      6  128.32.197.4 (128.32.197.4)  59 ms  119 ms  39 ms
90 *      7  131.119.2.5 (131.119.2.5)  59 ms  59 ms  39 ms
91 *      8  129.140.70.13 (129.140.70.13)  80 ms  79 ms  99 ms
92 *      9  129.140.71.6 (129.140.71.6)  139 ms  139 ms  159 ms
93 *     10  129.140.81.7 (129.140.81.7)  199 ms  180 ms  300 ms
94 *     11  129.140.72.17 (129.140.72.17)  300 ms  239 ms  239 ms
95 *     12  * * *
96 *     13  128.121.54.72 (128.121.54.72)  259 ms  499 ms  279 ms
97 *     14  * * *
98 *     15  * * *
99 *     16  * * *
100 *     17  * * *
101 *     18  ALLSPICE.LCS.MIT.EDU (18.26.0.115)  339 ms  279 ms  279 ms
102 *
103 * (I start to see why I'm having so much trouble with mail to
104 * MIT.)  Note that the gateways 12, 14, 15, 16 & 17 hops away
105 * either don't send ICMP "time exceeded" messages or send them
106 * with a ttl too small to reach us.  14 - 17 are running the
107 * MIT C Gateway code that doesn't send "time exceeded"s.  God
108 * only knows what's going on with 12.
109 *
110 * The silent gateway 12 in the above may be the result of a bug in
111 * the 4.[23]BSD network code (and its derivatives):  4.x (x <= 3)
112 * sends an unreachable message using whatever ttl remains in the
113 * original datagram.  Since, for gateways, the remaining ttl is
114 * zero, the icmp "time exceeded" is guaranteed to not make it back
115 * to us.  The behavior of this bug is slightly more interesting
116 * when it appears on the destination system:
117 *
118 *      1  helios.ee.lbl.gov (128.3.112.1)  0 ms  0 ms  0 ms
119 *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  19 ms  39 ms
120 *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  19 ms  39 ms  19 ms
121 *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  39 ms  40 ms  19 ms
122 *      5  ccn-nerif35.Berkeley.EDU (128.32.168.35)  39 ms  39 ms  39 ms
123 *      6  csgw.Berkeley.EDU (128.32.133.254)  39 ms  59 ms  39 ms
124 *      7  * * *
125 *      8  * * *
126 *      9  * * *
127 *     10  * * *
128 *     11  * * *
129 *     12  * * *
130 *     13  rip.Berkeley.EDU (128.32.131.22)  59 ms !  39 ms !  39 ms !
131 *
132 * Notice that there are 12 "gateways" (13 is the final
133 * destination) and exactly the last half of them are "missing".
134 * What's really happening is that rip (a Sun-3 running Sun OS3.5)
135 * is using the ttl from our arriving datagram as the ttl in its
136 * icmp reply.  So, the reply will time out on the return path
137 * (with no notice sent to anyone since icmp's aren't sent for
138 * icmp's) until we probe with a ttl that's at least twice the path
139 * length.  I.e., rip is really only 7 hops away.  A reply that
140 * returns with a ttl of 1 is a clue this problem exists.
141 * Traceroute prints a "!" after the time if the ttl is <= 1.
142 * Since vendors ship a lot of obsolete (DEC's Ultrix, Sun 3.x) or
143 * non-standard (HPUX) software, expect to see this problem
144 * frequently and/or take care picking the target host of your
145 * probes.
146 *
147 * Other possible annotations after the time are !H, !N, !P (got a host,
148 * network or protocol unreachable, respectively), !S or !F (source
149 * route failed or fragmentation needed -- neither of these should
150 * ever occur and the associated gateway is busted if you see one).  If
151 * almost all the probes result in some kind of unreachable, traceroute
152 * will give up and exit.
153 *
154 * Notes
155 * -----
156 * This program must be run by root or be setuid.  (I suggest that
157 * you *don't* make it setuid -- casual use could result in a lot
158 * of unnecessary traffic on our poor, congested nets.)
159 *
160 * This program requires a kernel mod that does not appear in any
161 * system available from Berkeley:  A raw ip socket using proto
162 * IPPROTO_RAW must interpret the data sent as an ip datagram (as
163 * opposed to data to be wrapped in a ip datagram).  See the README
164 * file that came with the source to this program for a description
165 * of the mods I made to /sys/netinet/raw_ip.c.  Your mileage may
166 * vary.  But, again, ANY 4.x (x < 4) BSD KERNEL WILL HAVE TO BE
167 * MODIFIED TO RUN THIS PROGRAM.
168 *
169 * The udp port usage may appear bizarre (well, ok, it is bizarre).
170 * The problem is that an icmp message only contains 8 bytes of
171 * data from the original datagram.  8 bytes is the size of a udp
172 * header so, if we want to associate replies with the original
173 * datagram, the necessary information must be encoded into the
174 * udp header (the ip id could be used but there's no way to
175 * interlock with the kernel's assignment of ip id's and, anyway,
176 * it would have taken a lot more kernel hacking to allow this
177 * code to set the ip id).  So, to allow two or more users to
178 * use traceroute simultaneously, we use this task's pid as the
179 * source port (the high bit is set to move the port number out
180 * of the "likely" range).  To keep track of which probe is being
181 * replied to (so times and/or hop counts don't get confused by a
182 * reply that was delayed in transit), we increment the destination
183 * port number before each probe.
184 *
185 * Don't use this as a coding example.  I was trying to find a
186 * routing problem and this code sort-of popped out after 48 hours
187 * without sleep.  I was amazed it ever compiled, much less ran.
188 *
189 * I stole the idea for this program from Steve Deering.  Since
190 * the first release, I've learned that had I attended the right
191 * IETF working group meetings, I also could have stolen it from Guy
192 * Almes or Matt Mathis.  I don't know (or care) who came up with
193 * the idea first.  I envy the originators' perspicacity and I'm
194 * glad they didn't keep the idea a secret.
195 *
196 * Tim Seaver, Ken Adelman and C. Philip Wood provided bug fixes and/or
197 * enhancements to the original distribution.
198 *
199 * I've hacked up a round-trip-route version of this that works by
200 * sending a loose-source-routed udp datagram through the destination
201 * back to yourself.  Unfortunately, SO many gateways botch source
202 * routing, the thing is almost worthless.  Maybe one day...
203 *
204 *  -- Van Jacobson (van@ee.lbl.gov)
205 *     Tue Dec 20 03:50:13 PST 1988
206 */
207
208#include <sys/param.h>
209#include <sys/file.h>
210#include <sys/ioctl.h>
211#include <sys/socket.h>
212#include <sys/time.h>
213#include <sys/sysctl.h>
214
215#include <netinet/in_systm.h>
216#include <netinet/in.h>
217#include <netinet/ip.h>
218#include <netinet/ip_var.h>
219#include <netinet/ip_icmp.h>
220#include <netinet/udp.h>
221#include <netinet/udp_var.h>
222
223#include <arpa/inet.h>
224
225#include <ctype.h>
226#include <err.h>
227#include <errno.h>
228#ifdef HAVE_MALLOC_H
229#include <malloc.h>
230#endif
231#include <memory.h>
232#include <netdb.h>
233#include <stdio.h>
234#include <stdlib.h>
235#include <string.h>
236#include <unistd.h>
237#include <poll.h>
238#ifdef IPSEC
239#include <net/route.h>
240#include <netipsec/ipsec.h>
241#endif
242
243#include "gnuc.h"
244#ifdef HAVE_OS_PROTO_H
245#include "os-proto.h"
246#endif
247
248/* rfc1716 */
249#ifndef ICMP_UNREACH_FILTER_PROHIB
250#define ICMP_UNREACH_FILTER_PROHIB	13	/* admin prohibited filter */
251#endif
252#ifndef ICMP_UNREACH_HOST_PRECEDENCE
253#define ICMP_UNREACH_HOST_PRECEDENCE	14	/* host precedence violation */
254#endif
255#ifndef ICMP_UNREACH_PRECEDENCE_CUTOFF
256#define ICMP_UNREACH_PRECEDENCE_CUTOFF	15	/* precedence cutoff */
257#endif
258
259#include "ifaddrlist.h"
260#include "as.h"
261#include "prog_ops.h"
262
263/* Maximum number of gateways (include room for one noop) */
264#define NGATEWAYS ((int)((MAX_IPOPTLEN - IPOPT_MINOFF - 1) / sizeof(u_int32_t)))
265
266#ifndef MAXHOSTNAMELEN
267#define MAXHOSTNAMELEN	64
268#endif
269
270#define Fprintf (void)fprintf
271#define Printf (void)printf
272
273/* Host name and address list */
274struct hostinfo {
275	char *name;
276	int n;
277	u_int32_t *addrs;
278};
279
280/* Data section of the probe packet */
281struct outdata {
282	u_char seq;		/* sequence number of this packet */
283	u_char ttl;		/* ttl packet left with */
284	struct tv32 {
285		int32_t tv32_sec;
286		int32_t tv32_usec;
287	} tv;			/* time packet left */
288};
289
290/*
291 * Support for ICMP extensions
292 *
293 * http://www.ietf.org/proceedings/01aug/I-D/draft-ietf-mpls-icmp-02.txt
294 */
295#define ICMP_EXT_OFFSET    8 /* ICMP type, code, checksum, unused */ + \
296                         128 /* original datagram */
297#define ICMP_EXT_VERSION 2
298/*
299 * ICMP extensions, common header
300 */
301struct icmp_ext_cmn_hdr {
302#if BYTE_ORDER == BIG_ENDIAN
303	unsigned char   version:4;
304	unsigned char   reserved1:4;
305#else
306	unsigned char   reserved1:4;
307	unsigned char   version:4;
308#endif
309	unsigned char   reserved2;
310	unsigned short  checksum;
311};
312
313/*
314 * ICMP extensions, object header
315 */
316struct icmp_ext_obj_hdr {
317    u_short length;
318    u_char  class_num;
319#define MPLS_STACK_ENTRY_CLASS 1
320    u_char  c_type;
321#define MPLS_STACK_ENTRY_C_TYPE 1
322};
323
324struct mpls_header {
325#if BYTE_ORDER == BIG_ENDIAN
326	 uint32_t	label:20;
327	 unsigned char  exp:3;
328	 unsigned char  s:1;
329	 unsigned char  ttl:8;
330#else
331	 unsigned char  ttl:8;
332	 unsigned char  s:1;
333	 unsigned char  exp:3;
334	 uint32_t	label:20;
335#endif
336};
337
338#ifndef HAVE_ICMP_NEXTMTU
339/* Path MTU Discovery (RFC1191) */
340struct my_pmtu {
341	u_short ipm_void;
342	u_short ipm_nextmtu;
343};
344#endif
345
346static u_char	packet[512];		/* last inbound (icmp) packet */
347
348static struct ip *outip;		/* last output (udp) packet */
349static struct udphdr *outudp;		/* last output (udp) packet */
350static void *outmark;			/* packed location of struct outdata */
351static struct outdata outsetup;	/* setup and copy for alignment */
352
353static struct icmp *outicmp;		/* last output (icmp) packet */
354
355/* loose source route gateway list (including room for final destination) */
356static u_int32_t gwlist[NGATEWAYS + 1];
357
358static int s;				/* receive (icmp) socket file descriptor */
359static int sndsock;			/* send (udp/icmp) socket file descriptor */
360
361static struct sockaddr whereto;		/* Who to try to reach */
362static struct sockaddr wherefrom;	/* Who we are */
363static int packlen;			/* total length of packet */
364static int minpacket;			/* min ip packet size */
365static int maxpacket = 32 * 1024;	/* max ip packet size */
366static int printed_ttl = 0;
367static int pmtu;			/* Path MTU Discovery (RFC1191) */
368static u_int pausemsecs;
369
370static const char *prog;
371static char *source;
372static char *hostname;
373static char *device;
374#ifdef notdef
375static const char devnull[] = "/dev/null";
376#endif
377
378static int nprobes = 3;
379static int max_ttl = 30;
380static int first_ttl = 1;
381static u_int16_t ident;
382static in_port_t port = 32768 + 666;	/* start udp dest port # for probe packets */
383
384static int options;			/* socket options */
385static int verbose;
386static int waittime = 5;		/* time to wait for response (in seconds) */
387static int nflag;			/* print addresses numerically */
388static int dump;
389static int Mflag;			/* show MPLS labels if any */
390static int as_path;			/* print as numbers for each hop */
391static char *as_server = NULL;
392static void *asn;
393static int useicmp = 0;		/* use icmp echo instead of udp packets */
394#ifdef CANT_HACK_CKSUM
395static int doipcksum = 0;		/* don't calculate checksums */
396#else
397static int doipcksum = 1;		/* calculate checksums */
398#endif
399static int optlen;			/* length of ip options */
400
401static int mtus[] = {
402        17914,
403         8166,
404         4464,
405         4352,
406         2048,
407         2002,
408         1536,
409         1500,
410         1492,
411         1480,
412         1280,
413         1006,
414          576,
415          552,
416          544,
417          512,
418          508,
419          296,
420           68,
421            0
422};
423static int *mtuptr = &mtus[0];
424static int mtudisc = 0;
425static int nextmtu;   /* from ICMP error, set by packet_ok(), might be 0 */
426
427/* Forwards */
428static double deltaT(struct timeval *, struct timeval *);
429static void freehostinfo(struct hostinfo *);
430static void getaddr(u_int32_t *, char *);
431static struct hostinfo *gethostinfo(char *);
432static u_int16_t in_cksum(u_int16_t *, int);
433static u_int16_t in_cksum2(u_int16_t, u_int16_t *, int);
434static char *inetname(struct in_addr);
435static int packet_ok(u_char *, ssize_t, struct sockaddr_in *, int);
436static const char *pr_type(u_char);
437static void print(u_char *, int, struct sockaddr_in *);
438static void resize_packet(void);
439static void dump_packet(void);
440static void send_probe(int, int, struct timeval *);
441static void setsin(struct sockaddr_in *, u_int32_t);
442static int str2val(const char *, const char *, int, int);
443static void tvsub(struct timeval *, struct timeval *);
444static void usage(void) __attribute__((__noreturn__));
445static ssize_t wait_for_reply(int, struct sockaddr_in *, const struct timeval *);
446static void decode_extensions(unsigned char *buf, int ip_len);
447static void frag_err(void);
448static int find_local_ip(struct sockaddr_in *, struct sockaddr_in *);
449#ifdef IPSEC
450#ifdef IPSEC_POLICY_IPSEC
451static int setpolicy(int, const char *);
452#endif
453#endif
454
455int
456main(int argc, char **argv)
457{
458	int op, code, n;
459	u_char *outp;
460	u_int32_t *ap;
461	struct sockaddr_in *from = (struct sockaddr_in *)&wherefrom;
462	struct sockaddr_in *to = (struct sockaddr_in *)&whereto;
463	struct hostinfo *hi;
464	int on = 1;
465	int ttl, probe, i;
466	int seq = 0;
467	int tos = 0, settos = 0, ttl_flag = 0;
468	int lsrr = 0;
469	u_int16_t off = 0;
470	struct ifaddrlist *al, *al2;
471	char errbuf[132];
472	int mib[4] = { CTL_NET, PF_INET, IPPROTO_IP, IPCTL_DEFTTL };
473	size_t size = sizeof(max_ttl);
474
475	setprogname(argv[0]);
476	prog = getprogname();
477
478	if (prog_init && prog_init() == -1)
479		err(1, "init failed");
480
481#ifdef notdef
482	/* Kernel takes care of it */
483	/* Insure the socket fds won't be 0, 1 or 2 */
484	if (open(devnull, O_RDONLY) < 0 ||
485	    open(devnull, O_RDONLY) < 0 ||
486	    open(devnull, O_RDONLY) < 0)
487		err(1, "Cannot open `%s'", devnull);
488#endif
489	if ((s = prog_socket(AF_INET, SOCK_RAW, IPPROTO_ICMP)) < 0)
490		err(1, "icmp socket");
491
492	/*
493	 * XXX 'useicmp' will always be zero here. I think the HP-UX users
494	 * running our traceroute code will forgive us.
495	 */
496#ifndef __hpux
497	sndsock = prog_socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
498#else
499	sndsock = socket(AF_INET, SOCK_RAW, IPPROTO_RAW
500	    useicmp ? IPPROTO_ICMP : IPPROTO_UDP);
501#endif
502	if (sndsock < 0)
503		err(1, "raw socket");
504
505	(void) prog_sysctl(mib, sizeof(mib)/sizeof(mib[0]), &max_ttl, &size,
506	    NULL, 0);
507
508	opterr = 0;
509	while ((op = getopt(argc, argv, "aA:dDFPIMnlrvxf:g:i:m:p:q:s:t:w:z:")) != -1)
510		switch (op) {
511
512		case 'a':
513			as_path = 1;
514			break;
515
516		case 'A':
517			as_path = 1;
518			as_server = optarg;
519			break;
520
521		case 'd':
522			options |= SO_DEBUG;
523			break;
524
525		case 'D':
526			dump = 1;
527			break;
528
529		case 'f':
530			first_ttl = str2val(optarg, "first ttl", 1, 255);
531			break;
532
533		case 'F':
534			off = IP_DF;
535			break;
536
537		case 'g':
538			if (lsrr >= NGATEWAYS)
539				errx(1, "more than %d gateways", NGATEWAYS);
540			getaddr(gwlist + lsrr, optarg);
541			++lsrr;
542			break;
543
544		case 'i':
545			device = optarg;
546			break;
547
548		case 'I':
549			++useicmp;
550			break;
551
552		case 'l':
553			++ttl_flag;
554			break;
555
556		case 'm':
557			max_ttl = str2val(optarg, "max ttl", 1, 255);
558			break;
559
560		case 'M':
561			Mflag = 1;
562			break;
563
564		case 'n':
565			++nflag;
566			break;
567
568		case 'p':
569			port = (u_short)str2val(optarg, "port",
570			    1, (1 << 16) - 1);
571			break;
572
573		case 'q':
574			nprobes = str2val(optarg, "nprobes", 1, -1);
575			break;
576
577		case 'r':
578			options |= SO_DONTROUTE;
579			break;
580
581		case 's':
582			/*
583			 * set the ip source address of the outbound
584			 * probe (e.g., on a multi-homed host).
585			 */
586			source = optarg;
587			break;
588
589		case 't':
590			tos = str2val(optarg, "tos", 0, 255);
591			++settos;
592			break;
593
594		case 'v':
595			++verbose;
596			break;
597
598		case 'x':
599			doipcksum = (doipcksum == 0);
600			break;
601
602		case 'w':
603			waittime = str2val(optarg, "wait time",
604			    2, 24 * 60 * 60);
605			break;
606
607		case 'z':
608			pausemsecs = str2val(optarg, "pause msecs",
609			    0, 60 * 60 * 1000);
610
611		case 'P':
612			off = IP_DF;
613			mtudisc = 1;
614			break;
615
616		default:
617			usage();
618		}
619
620	if (first_ttl > max_ttl)
621		errx(1, "first ttl (%d) may not be greater than max ttl (%d)",
622		    first_ttl, max_ttl);
623
624	if (!doipcksum)
625		warnx("ip checksums disabled");
626
627	if (lsrr > 0)
628		optlen = (lsrr + 1) * sizeof(gwlist[0]);
629	minpacket = sizeof(*outip) + sizeof(struct outdata) + optlen;
630	if (useicmp)
631		minpacket += 8;			/* XXX magic number */
632	else
633		minpacket += sizeof(*outudp);
634	packlen = minpacket;		/* minimum sized packet */
635
636	if (mtudisc)
637		packlen = *mtuptr++;
638
639	/* Process destination and optional packet size */
640	switch (argc - optind) {
641
642	case 2:
643		packlen = str2val(argv[optind + 1],
644		    "packet length", minpacket, maxpacket);
645		/* Fall through */
646
647	case 1:
648		hostname = argv[optind];
649		hi = gethostinfo(hostname);
650		setsin(to, hi->addrs[0]);
651		if (hi->n > 1)
652			warnx("%s has multiple addresses; using %s",
653			    hostname, inet_ntoa(to->sin_addr));
654		hostname = hi->name;
655		hi->name = NULL;
656		freehostinfo(hi);
657		break;
658
659	default:
660		usage();
661	}
662
663#ifdef HAVE_SETLINEBUF
664	setlinebuf (stdout);
665#else
666	setvbuf(stdout, NULL, _IOLBF, 0);
667#endif
668
669	outip = malloc((unsigned)packlen);
670	if (outip == NULL)
671		err(1, "malloc");
672	memset(outip, 0, packlen);
673
674	outip->ip_v = IPVERSION;
675	if (settos)
676		outip->ip_tos = tos;
677#ifdef BYTESWAP_IP_HDR
678	outip->ip_len = htons(packlen);
679	outip->ip_off = htons(off);
680#else
681	outip->ip_len = packlen;
682	outip->ip_off = off;
683#endif
684	outp = (u_char *)(outip + 1);
685#ifdef HAVE_RAW_OPTIONS
686	if (lsrr > 0) {
687		u_char *optlist;
688
689		optlist = outp;
690		outp += optlen;
691
692		/* final hop */
693		gwlist[lsrr] = to->sin_addr.s_addr;
694
695		outip->ip_dst.s_addr = gwlist[0];
696
697		/* force 4 byte alignment */
698		optlist[0] = IPOPT_NOP;
699		/* loose source route option */
700		optlist[1] = IPOPT_LSRR;
701		i = lsrr * sizeof(gwlist[0]);
702		optlist[2] = i + 3;
703		/* Pointer to LSRR addresses */
704		optlist[3] = IPOPT_MINOFF;
705		memcpy(optlist + 4, gwlist + 1, i);
706	} else
707#endif
708		outip->ip_dst = to->sin_addr;
709
710	outip->ip_hl = (outp - (u_char *)outip) >> 2;
711	ident = htons(arc4random() & 0xffff) | 0x8000;
712	if (useicmp) {
713		outip->ip_p = IPPROTO_ICMP;
714
715		outicmp = (struct icmp *)outp;
716		outicmp->icmp_type = ICMP_ECHO;
717		outicmp->icmp_id = htons(ident);
718
719		outmark = outp + 8;	/* XXX magic number */
720	} else {
721		outip->ip_p = IPPROTO_UDP;
722
723		outudp = (struct udphdr *)outp;
724		outudp->uh_sport = htons(ident);
725		outudp->uh_ulen =
726		    htons((u_int16_t)(packlen - (sizeof(*outip) + optlen)));
727		outmark = outudp + 1;
728	}
729
730	if (options & SO_DEBUG)
731		(void)prog_setsockopt(s, SOL_SOCKET, SO_DEBUG, (char *)&on,
732		    sizeof(on));
733#ifdef IPSEC
734#ifdef IPSEC_POLICY_IPSEC
735	/*
736	 * do not raise error even if setsockopt fails, kernel may have ipsec
737	 * turned off.
738	 */
739	if (setpolicy(s, "in bypass") < 0)
740		exit(1);
741	if (setpolicy(s, "out bypass") < 0)
742		exit(1);
743#else
744    {
745	int level = IPSEC_LEVEL_AVAIL;
746
747	(void)prog_setsockopt(s, IPPROTO_IP, IP_ESP_TRANS_LEVEL, &level,
748		sizeof(level));
749	(void)prog_setsockopt(s, IPPROTO_IP, IP_ESP_NETWORK_LEVEL, &level,
750		sizeof(level));
751#ifdef IP_AUTH_TRANS_LEVEL
752	(void)prog_setsockopt(s, IPPROTO_IP, IP_AUTH_TRANS_LEVEL, &level,
753		sizeof(level));
754#else
755	(void)prog_setsockopt(s, IPPROTO_IP, IP_AUTH_LEVEL, &level,
756		sizeof(level));
757#endif
758#ifdef IP_AUTH_NETWORK_LEVEL
759	(void)prog_setsockopt(s, IPPROTO_IP, IP_AUTH_NETWORK_LEVEL, &level,
760		sizeof(level));
761#endif
762    }
763#endif /*IPSEC_POLICY_IPSEC*/
764#endif /*IPSEC*/
765
766#ifdef IPSEC
767#ifdef IPSEC_POLICY_IPSEC
768	/*
769	 * do not raise error even if setsockopt fails, kernel may have ipsec
770	 * turned off.
771	 */
772	if (setpolicy(sndsock, "in bypass") < 0)
773		exit(1);
774	if (setpolicy(sndsock, "out bypass") < 0)
775		exit(1);
776#else
777    {
778	int level = IPSEC_LEVEL_BYPASS;
779
780	(void)prog_setsockopt(sndsock, IPPROTO_IP, IP_ESP_TRANS_LEVEL, &level,
781		sizeof(level));
782	(void)prog_setsockopt(sndsock, IPPROTO_IP, IP_ESP_NETWORK_LEVEL, &level,
783		sizeof(level));
784#ifdef IP_AUTH_TRANS_LEVEL
785	(void)prog_setsockopt(sndsock, IPPROTO_IP, IP_AUTH_TRANS_LEVEL, &level,
786		sizeof(level));
787#else
788	(void)prog_setsockopt(sndsock, IPPROTO_IP, IP_AUTH_LEVEL, &level,
789		sizeof(level));
790#endif
791#ifdef IP_AUTH_NETWORK_LEVEL
792	(void)prog_setsockopt(sndsock, IPPROTO_IP, IP_AUTH_NETWORK_LEVEL, &level,
793		sizeof(level));
794#endif
795    }
796#endif /*IPSEC_POLICY_IPSEC*/
797#endif /*IPSEC*/
798
799#if defined(IP_OPTIONS) && !defined(HAVE_RAW_OPTIONS)
800	if (lsrr > 0) {
801		u_char optlist[MAX_IPOPTLEN];
802
803		/* final hop */
804		gwlist[lsrr] = to->sin_addr.s_addr;
805		++lsrr;
806
807		/* force 4 byte alignment */
808		optlist[0] = IPOPT_NOP;
809		/* loose source route option */
810		optlist[1] = IPOPT_LSRR;
811		i = lsrr * sizeof(gwlist[0]);
812		optlist[2] = i + 3;
813		/* Pointer to LSRR addresses */
814		optlist[3] = IPOPT_MINOFF;
815		memcpy(optlist + 4, gwlist, i);
816
817		if ((prog_setsockopt(sndsock, IPPROTO_IP, IP_OPTIONS, optlist,
818		    i + sizeof(gwlist[0]))) < 0)
819			err(1, "IP_OPTIONS");
820	}
821#endif
822
823#ifdef SO_SNDBUF
824	if (prog_setsockopt(sndsock, SOL_SOCKET, SO_SNDBUF, (char *)&packlen,
825	    sizeof(packlen)) < 0)
826		err(1, "SO_SNDBUF");
827#endif
828#ifdef IP_HDRINCL
829	if (prog_setsockopt(sndsock, IPPROTO_IP, IP_HDRINCL, (char *)&on,
830	    sizeof(on)) < 0)
831		err(1, "IP_HDRINCL");
832#else
833#ifdef IP_TOS
834	if (settos && prog_setsockopt(sndsock, IPPROTO_IP, IP_TOS,
835	    &tos, sizeof(tos)) < 0)
836		err(1, "setsockopt tos %d", tos);
837#endif
838#endif
839	if (options & SO_DEBUG)
840		if (prog_setsockopt(sndsock, SOL_SOCKET, SO_DEBUG, &on,
841		    sizeof(on)) < 0)
842			err(1, "setsockopt debug %d", tos);
843	if (options & SO_DONTROUTE)
844		if (prog_setsockopt(sndsock, SOL_SOCKET, SO_DONTROUTE, &on,
845		    sizeof(on)) < 0)
846			err(1, "setsockopt dontroute %d", tos);
847
848	/* Get the interface address list */
849	n = ifaddrlist(&al, errbuf, sizeof errbuf);
850	al2 = al;
851	if (n < 0)
852		errx(1, "ifaddrlist (%s)", errbuf);
853	if (n == 0)
854		errx(1, "Can't find any network interfaces");
855
856	/* Look for a specific device */
857	if (device != NULL) {
858		for (i = n; i > 0; --i, ++al2)
859			if (strcmp(device, al2->device) == 0)
860				break;
861		if (i <= 0)
862			errx(1, "Can't find interface %.32s", device);
863	}
864
865	/* Determine our source address */
866	if (source == NULL) {
867		/*
868		 * If a device was specified, use the interface address.
869		 * Otherwise, try to determine our source address.
870		 * Warn if there are more than one.
871		 */
872		setsin(from, al2->addr);
873		if (n > 1 && device == NULL && !find_local_ip(from, to)) {
874			warnx("Multiple interfaces found; using %s @ %s",
875			    inet_ntoa(from->sin_addr), al2->device);
876		}
877	} else {
878		hi = gethostinfo(source);
879		source = hi->name;
880		hi->name = NULL;
881		if (device == NULL) {
882			/*
883			 * Use the first interface found.
884			 * Warn if there are more than one.
885			 */
886			setsin(from, hi->addrs[0]);
887			if (hi->n > 1)
888				warnx("%s has multiple addresses; using %s",
889				    source, inet_ntoa(from->sin_addr));
890		} else {
891			/*
892			 * Make sure the source specified matches the
893			 * interface address.
894			 */
895			for (i = hi->n, ap = hi->addrs; i > 0; --i, ++ap)
896				if (*ap == al2->addr)
897					break;
898			if (i <= 0)
899				errx(1, "%s is not on interface %s",
900				    source, device);
901			setsin(from, *ap);
902		}
903		freehostinfo(hi);
904	}
905
906	/* Revert to non-privileged user after opening sockets */
907	setgid(getgid());
908	setuid(getuid());
909
910	/*
911	 * If not root, make sure source address matches a local interface.
912	 * (The list of addresses produced by ifaddrlist() automatically
913	 * excludes interfaces that are marked down and/or loopback.)
914	 */
915	if (getuid())  {
916		al2 = al;
917		for (i = n; i > 0; --i, ++al2)
918			if (from->sin_addr.s_addr == al2->addr)
919			    break;
920		if (i <= 0)
921			errx(1, "%s is not a valid local address "
922			    "and you are not superuser.",
923			    inet_ntoa(from->sin_addr));
924	}
925
926	outip->ip_src = from->sin_addr;
927#ifndef IP_HDRINCL
928	if (bind(sndsock, (struct sockaddr *)from, sizeof(*from)) < 0)
929		err(1, "bind");
930#endif
931
932	if (as_path) {
933		asn = as_setup(as_server);
934		if (asn == NULL) {
935			warnx("as_setup failed, AS# lookups disabled");
936			(void)fflush(stderr);
937			as_path = 0;
938		}
939	}
940
941	setuid(getuid());
942	Fprintf(stderr, "%s to %s (%s)",
943	    prog, hostname, inet_ntoa(to->sin_addr));
944	if (source)
945		Fprintf(stderr, " from %s", source);
946	Fprintf(stderr, ", %d hops max, %d byte packets\n", max_ttl, packlen);
947	(void)fflush(stderr);
948
949	for (ttl = first_ttl; ttl <= max_ttl; ++ttl) {
950		u_int32_t lastaddr = 0;
951		int gotlastaddr = 0;
952		int got_there = 0;
953		int unreachable = 0;
954		int sentfirst = 0;
955
956again:
957		printed_ttl = 0;
958		for (probe = 0; probe < nprobes; ++probe) {
959			int cc;
960			struct timeval t1, t2;
961			struct ip *ip;
962			if (sentfirst && pausemsecs > 0)
963				usleep(pausemsecs * 1000);
964			(void)gettimeofday(&t1, NULL);
965			if (!useicmp && htons(port + seq + 1) == 0)
966				seq++;
967			send_probe(++seq, ttl, &t1);
968			++sentfirst;
969			while ((cc = wait_for_reply(s, from, &t1)) != 0) {
970				(void)gettimeofday(&t2, NULL);
971				/*
972				 * Since we'll be receiving all ICMP
973				 * messages to this host above, we may
974				 * never end up with cc=0, so we need
975				 * an additional termination check.
976				 */
977				if (t2.tv_sec - t1.tv_sec > waittime) {
978					cc = 0;
979					break;
980				}
981				i = packet_ok(packet, cc, from, seq);
982				/* Skip short packet */
983				if (i == 0)
984					continue;
985				if (!gotlastaddr ||
986				    from->sin_addr.s_addr != lastaddr) {
987					if (gotlastaddr) printf("\n   ");
988					print(packet, cc, from);
989					lastaddr = from->sin_addr.s_addr;
990					++gotlastaddr;
991				}
992				ip = (struct ip *)packet;
993				Printf("  %.3f ms", deltaT(&t1, &t2));
994				if (ttl_flag)
995					Printf(" (ttl = %d)", ip->ip_ttl);
996				if (i == -2) {
997#ifndef ARCHAIC
998					if (ip->ip_ttl <= 1)
999						Printf(" !");
1000#endif
1001					++got_there;
1002					break;
1003				}
1004
1005				/* time exceeded in transit */
1006				if (i == -1)
1007					break;
1008				code = i - 1;
1009				switch (code) {
1010
1011				case ICMP_UNREACH_PORT:
1012#ifndef ARCHAIC
1013					if (ip->ip_ttl <= 1)
1014						Printf(" !");
1015#endif
1016					++got_there;
1017					break;
1018
1019				case ICMP_UNREACH_NET:
1020					++unreachable;
1021					Printf(" !N");
1022					break;
1023
1024				case ICMP_UNREACH_HOST:
1025					++unreachable;
1026					Printf(" !H");
1027					break;
1028
1029				case ICMP_UNREACH_PROTOCOL:
1030					++got_there;
1031					Printf(" !P");
1032					break;
1033
1034				case ICMP_UNREACH_NEEDFRAG:
1035					if (mtudisc) {
1036						frag_err();
1037						goto again;
1038					} else {
1039						++unreachable;
1040						Printf(" !F-%d", pmtu);
1041					}
1042					break;
1043
1044				case ICMP_UNREACH_SRCFAIL:
1045					++unreachable;
1046					Printf(" !S");
1047					break;
1048
1049				case ICMP_UNREACH_FILTER_PROHIB:
1050					++unreachable;
1051					Printf(" !X");
1052					break;
1053
1054				case ICMP_UNREACH_HOST_PRECEDENCE:
1055					++unreachable;
1056					Printf(" !V");
1057					break;
1058
1059				case ICMP_UNREACH_PRECEDENCE_CUTOFF:
1060					++unreachable;
1061					Printf(" !C");
1062					break;
1063
1064				default:
1065					++unreachable;
1066					Printf(" !<%d>", code);
1067					break;
1068				}
1069				break;
1070			}
1071			if (cc == 0)
1072				Printf(" *");
1073			else if (cc && probe == nprobes - 1 && Mflag)
1074				decode_extensions(packet, cc);
1075			(void)fflush(stdout);
1076		}
1077		putchar('\n');
1078		if (got_there ||
1079		    (unreachable > 0 && unreachable >= ((nprobes + 1) / 2)))
1080			break;
1081	}
1082
1083	if (as_path)
1084		as_shutdown(asn);
1085
1086	exit(0);
1087}
1088
1089static ssize_t
1090wait_for_reply(int sock, struct sockaddr_in *fromp, const struct timeval *tp)
1091{
1092	struct pollfd set[1];
1093	struct timeval now, wait;
1094	ssize_t cc = 0;
1095	socklen_t fromlen = sizeof(*fromp);
1096	int retval;
1097
1098	set[0].fd = sock;
1099	set[0].events = POLLIN;
1100
1101	wait.tv_sec = tp->tv_sec + waittime;
1102	wait.tv_usec = tp->tv_usec;
1103	(void)gettimeofday(&now, NULL);
1104	tvsub(&wait, &now);
1105
1106	if (wait.tv_sec < 0) {
1107		wait.tv_sec = 0;
1108		wait.tv_usec = 0;
1109	}
1110
1111	retval = prog_poll(set, 1, wait.tv_sec * 1000 + wait.tv_usec / 1000);
1112	if (retval < 0)
1113		/* If we continue, we probably just flood the remote host. */
1114		err(1, "poll");
1115	if (retval > 0)  {
1116		cc = prog_recvfrom(sock, (char *)packet, sizeof(packet), 0,
1117			    (struct sockaddr *)fromp, &fromlen);
1118	}
1119
1120	return cc;
1121}
1122
1123static void
1124decode_extensions(unsigned char *buf, int ip_len)
1125{
1126        struct icmp_ext_cmn_hdr *cmn_hdr;
1127        struct icmp_ext_obj_hdr *obj_hdr;
1128        union {
1129                struct mpls_header mpls;
1130                uint32_t mpls_h;
1131        } mpls;
1132        size_t datalen, obj_len;
1133        struct ip *ip;
1134
1135        ip = (struct ip *)buf;
1136
1137        if (ip_len < (int)((ip->ip_hl << 2) + ICMP_EXT_OFFSET +
1138	    sizeof(struct icmp_ext_cmn_hdr))) {
1139		/*
1140		 * No support for ICMP extensions on this host
1141		 */
1142		return;
1143        }
1144
1145        /*
1146         * Move forward to the start of the ICMP extensions, if present
1147         */
1148        buf += (ip->ip_hl << 2) + ICMP_EXT_OFFSET;
1149        cmn_hdr = (struct icmp_ext_cmn_hdr *)buf;
1150
1151        if (cmn_hdr->version != ICMP_EXT_VERSION) {
1152		/*
1153		 * Unknown version
1154		 */
1155		return;
1156        }
1157
1158        datalen = ip_len - ((u_char *)cmn_hdr - (u_char *)ip);
1159
1160        /*
1161         * Check the checksum, cmn_hdr->checksum == 0 means no checksum'ing
1162         * done by sender.
1163         *
1164        * If the checksum is ok, we'll get 0, as the checksum is calculated
1165         * with the checksum field being 0'd.
1166         */
1167        if (ntohs(cmn_hdr->checksum) &&
1168            in_cksum((u_short *)cmn_hdr, datalen)) {
1169
1170            return;
1171        }
1172
1173        buf += sizeof(*cmn_hdr);
1174        datalen -= sizeof(*cmn_hdr);
1175
1176        while (datalen >= sizeof(struct icmp_ext_obj_hdr)) {
1177		obj_hdr = (struct icmp_ext_obj_hdr *)buf;
1178		obj_len = ntohs(obj_hdr->length);
1179
1180		/*
1181		 * Sanity check the length field
1182		 */
1183		if (obj_len > datalen)
1184			return;
1185
1186		datalen -= obj_len;
1187
1188		/*
1189		 * Move past the object header
1190		 */
1191		buf += sizeof(struct icmp_ext_obj_hdr);
1192		obj_len -= sizeof(struct icmp_ext_obj_hdr);
1193
1194		switch (obj_hdr->class_num) {
1195		case MPLS_STACK_ENTRY_CLASS:
1196			switch (obj_hdr->c_type) {
1197			case MPLS_STACK_ENTRY_C_TYPE:
1198				while (obj_len >= sizeof(uint32_t)) {
1199					mpls.mpls_h = ntohl(*(uint32_t *)buf);
1200
1201					buf += sizeof(uint32_t);
1202					obj_len -= sizeof(uint32_t);
1203
1204					printf(" [MPLS: Label %d Exp %d]",
1205					    mpls.mpls.label, mpls.mpls.exp);
1206				}
1207				if (obj_len > 0) {
1208					/*
1209					 * Something went wrong, and we're at
1210					 * a unknown offset into the packet,
1211					 * ditch the rest of it.
1212					 */
1213					return;
1214				}
1215				break;
1216			default:
1217				/*
1218				 * Unknown object, skip past it
1219				 */
1220				buf += ntohs(obj_hdr->length) -
1221				    sizeof(struct icmp_ext_obj_hdr);
1222				break;
1223			}
1224			break;
1225
1226		default:
1227			/*
1228			 * Unknown object, skip past it
1229			 */
1230			buf += ntohs(obj_hdr->length) -
1231			    sizeof(struct icmp_ext_obj_hdr);
1232			break;
1233		}
1234	}
1235}
1236
1237static void
1238dump_packet(void)
1239{
1240	u_char *p;
1241	int i;
1242
1243	Fprintf(stderr, "packet data:");
1244
1245#ifdef __hpux
1246	for (p = useicmp ? (u_char *)outicmp : (u_char *)outudp, i = 0; i <
1247	    i < packlen - (sizeof(*outip) + optlen); i++)
1248#else
1249	for (p = (u_char *)outip, i = 0; i < packlen; i++)
1250#endif
1251	{
1252		if ((i % 24) == 0)
1253			Fprintf(stderr, "\n ");
1254		Fprintf(stderr, " %02x", *p++);
1255	}
1256	Fprintf(stderr, "\n");
1257}
1258
1259void
1260send_probe(int seq, int ttl, struct timeval *tp)
1261{
1262	int cc;
1263	struct udpiphdr * ui, *oui;
1264	int oldmtu = packlen;
1265 	struct ip tip;
1266
1267again:
1268#ifdef BYTESWAP_IP_LEN
1269	outip->ip_len = htons(packlen);
1270#else
1271	outip->ip_len = packlen;
1272#endif
1273	outip->ip_ttl = ttl;
1274#ifndef __hpux
1275	outip->ip_id = htons(ident + seq);
1276#endif
1277
1278	/*
1279	 * In most cases, the kernel will recalculate the ip checksum.
1280	 * But we must do it anyway so that the udp checksum comes out
1281	 * right.
1282	 */
1283	if (doipcksum) {
1284		outip->ip_sum =
1285		    in_cksum((u_int16_t *)outip, sizeof(*outip) + optlen);
1286		if (outip->ip_sum == 0)
1287			outip->ip_sum = 0xffff;
1288	}
1289
1290	/* Payload */
1291	outsetup.seq = seq;
1292	outsetup.ttl = ttl;
1293	outsetup.tv.tv32_sec = htonl(tp->tv_sec);
1294	outsetup.tv.tv32_usec = htonl(tp->tv_usec);
1295	memcpy(outmark,&outsetup,sizeof(outsetup));
1296
1297	if (useicmp)
1298		outicmp->icmp_seq = htons(seq);
1299	else
1300		outudp->uh_dport = htons(port + seq);
1301
1302	if (useicmp) {
1303		/* Always calculate checksum for icmp packets */
1304		outicmp->icmp_cksum = 0;
1305		outicmp->icmp_cksum = in_cksum((u_short *)outicmp,
1306		    packlen - (sizeof(*outip) + optlen));
1307		if (outicmp->icmp_cksum == 0)
1308			outicmp->icmp_cksum = 0xffff;
1309	} else if (doipcksum) {
1310		/* Checksum (we must save and restore ip header) */
1311		tip = *outip;
1312		ui = (struct udpiphdr *)outip;
1313		oui = (struct udpiphdr *)&tip;
1314		/* Easier to zero and put back things that are ok */
1315		memset(ui, 0, sizeof(ui->ui_i));
1316		ui->ui_src = oui->ui_src;
1317		ui->ui_dst = oui->ui_dst;
1318		ui->ui_pr = oui->ui_pr;
1319		ui->ui_len = outudp->uh_ulen;
1320		outudp->uh_sum = 0;
1321		outudp->uh_sum = in_cksum((u_short *)ui, packlen);
1322		if (outudp->uh_sum == 0)
1323			outudp->uh_sum = 0xffff;
1324		*outip = tip;
1325	}
1326
1327	/* XXX undocumented debugging hack */
1328	if (verbose > 1) {
1329		const u_int16_t *sp;
1330		int nshorts, i;
1331
1332		sp = (u_int16_t *)outip;
1333		nshorts = (u_int)packlen / sizeof(u_int16_t);
1334		i = 0;
1335		Printf("[ %d bytes", packlen);
1336		while (--nshorts >= 0) {
1337			if ((i++ % 8) == 0)
1338				Printf("\n\t");
1339			Printf(" %04x", ntohs(*sp++));
1340		}
1341		if (packlen & 1) {
1342			if ((i % 8) == 0)
1343				Printf("\n\t");
1344			Printf(" %02x", *(const u_char *)sp);
1345		}
1346		Printf("]\n");
1347	}
1348
1349#if !defined(IP_HDRINCL) && defined(IP_TTL)
1350	if (prog_setsockopt(sndsock, IPPROTO_IP, IP_TTL,
1351	    (char *)&ttl, sizeof(ttl)) < 0)
1352		err(1, "setsockopt ttl %d", ttl);
1353#endif
1354	if (dump)
1355		dump_packet();
1356
1357#ifdef __hpux
1358	cc = sendto(sndsock, useicmp ? (char *)outicmp : (char *)outudp,
1359	    packlen - (sizeof(*outip) + optlen), 0, &whereto, sizeof(whereto));
1360	if (cc > 0)
1361		cc += sizeof(*outip) + optlen;
1362#else
1363	cc = prog_sendto(sndsock, (char *)outip,
1364	    packlen, 0, &whereto, sizeof(whereto));
1365#endif
1366	if (cc < 0 || cc != packlen)  {
1367		if (cc < 0) {
1368			/*
1369			 * An errno of EMSGSIZE means we're writing too big a
1370			 * datagram for the interface.  We have to just
1371			 * decrease the packet size until we find one that
1372			 * works.
1373			 *
1374			 * XXX maybe we should try to read the outgoing if's
1375			 * mtu?
1376			 */
1377			if (errno == EMSGSIZE) {
1378				packlen = *mtuptr++;
1379				resize_packet();
1380				goto again;
1381			} else
1382				warn("sendto");
1383		}
1384
1385		Printf("%s: wrote %s %d chars, ret=%d\n",
1386		    prog, hostname, packlen, cc);
1387		(void)fflush(stdout);
1388	}
1389	if (oldmtu != packlen) {
1390		Printf("message too big, "
1391		    "trying new MTU = %d\n", packlen);
1392		printed_ttl = 0;
1393	}
1394	if (!printed_ttl) {
1395		Printf("%2d ", ttl);
1396		printed_ttl = 1;
1397	}
1398
1399}
1400
1401static double
1402deltaT(struct timeval *t1p, struct timeval *t2p)
1403{
1404	double dt;
1405
1406	dt = (double)(t2p->tv_sec - t1p->tv_sec) * 1000.0 +
1407	     (double)(t2p->tv_usec - t1p->tv_usec) / 1000.0;
1408	return dt;
1409}
1410
1411/*
1412 * Convert an ICMP "type" field to a printable string.
1413 */
1414static const char *
1415pr_type(u_char t)
1416{
1417	static const char *ttab[] = {
1418	"Echo Reply",	"ICMP 1",	"ICMP 2",	"Dest Unreachable",
1419	"Source Quench", "Redirect",	"ICMP 6",	"ICMP 7",
1420	"Echo",		"ICMP 9",	"ICMP 10",	"Time Exceeded",
1421	"Param Problem", "Timestamp",	"Timestamp Reply", "Info Request",
1422	"Info Reply"
1423	};
1424
1425	if (t > 16)
1426		return "OUT-OF-RANGE";
1427
1428	return ttab[t];
1429}
1430
1431static int
1432packet_ok(u_char *buf, ssize_t cc, struct sockaddr_in *from, int seq)
1433{
1434	struct icmp *icp;
1435	u_char type, code;
1436	int hlen;
1437#ifndef ARCHAIC
1438	struct ip *ip;
1439
1440	ip = (struct ip *) buf;
1441	hlen = ip->ip_hl << 2;
1442	if (cc < hlen + ICMP_MINLEN) {
1443		if (verbose)
1444			Printf("packet too short (%zd bytes) from %s\n", cc,
1445				inet_ntoa(from->sin_addr));
1446		return 0;
1447	}
1448	cc -= hlen;
1449	icp = (struct icmp *)(buf + hlen);
1450#else
1451	icp = (struct icmp *)buf;
1452#endif
1453	type = icp->icmp_type;
1454	code = icp->icmp_code;
1455	/* Path MTU Discovery (RFC1191) */
1456	if (code != ICMP_UNREACH_NEEDFRAG)
1457		pmtu = 0;
1458	else {
1459#ifdef HAVE_ICMP_NEXTMTU
1460		pmtu = ntohs(icp->icmp_nextmtu);
1461#else
1462		pmtu = ntohs(((struct my_pmtu *)&icp->icmp_void)->ipm_nextmtu);
1463#endif
1464	}
1465	if ((type == ICMP_TIMXCEED && code == ICMP_TIMXCEED_INTRANS) ||
1466	    type == ICMP_UNREACH || type == ICMP_ECHOREPLY) {
1467		struct ip *hip;
1468		struct udphdr *up;
1469		struct icmp *hicmp;
1470
1471		hip = &icp->icmp_ip;
1472		hlen = hip->ip_hl << 2;
1473
1474		nextmtu = ntohs(icp->icmp_nextmtu);	/* for frag_err() */
1475
1476		if (useicmp) {
1477			/* XXX */
1478			if (type == ICMP_ECHOREPLY &&
1479			    icp->icmp_id == htons(ident) &&
1480			    icp->icmp_seq == htons(seq))
1481				return -2;
1482
1483			hicmp = (struct icmp *)((u_char *)hip + hlen);
1484			/* XXX 8 is a magic number */
1485			if (hlen + 8 <= cc &&
1486			    hip->ip_p == IPPROTO_ICMP &&
1487			    hicmp->icmp_id == htons(ident) &&
1488			    hicmp->icmp_seq == htons(seq))
1489				return type == ICMP_TIMXCEED ? -1 : code + 1;
1490		} else {
1491			up = (struct udphdr *)((u_char *)hip + hlen);
1492			/* XXX 8 is a magic number */
1493			if (hlen + 12 <= cc &&
1494			    hip->ip_p == IPPROTO_UDP &&
1495			    up->uh_sport == htons(ident) &&
1496			    up->uh_dport == htons(port + seq))
1497				return type == ICMP_TIMXCEED ? -1 : code + 1;
1498		}
1499	}
1500#ifndef ARCHAIC
1501	if (verbose) {
1502		int i;
1503		u_int32_t *lp = (u_int32_t *)&icp->icmp_ip;
1504
1505		Printf("\n%zd bytes from %s to ", cc, inet_ntoa(from->sin_addr));
1506		Printf("%s: icmp type %d (%s) code %d\n",
1507		    inet_ntoa(ip->ip_dst), type, pr_type(type), icp->icmp_code);
1508		for (i = 4; i < cc ; i += sizeof(*lp))
1509			Printf("%2d: x%8.8x\n", i, *lp++);
1510	}
1511#endif
1512	return(0);
1513}
1514
1515static void
1516resize_packet(void)
1517{
1518	if (useicmp) {
1519		outicmp->icmp_cksum = 0;
1520		outicmp->icmp_cksum = in_cksum((u_int16_t *)outicmp,
1521		    packlen - (sizeof(*outip) + optlen));
1522		if (outicmp->icmp_cksum == 0)
1523			outicmp->icmp_cksum = 0xffff;
1524	} else {
1525		outudp->uh_ulen =
1526		    htons((u_int16_t)(packlen - (sizeof(*outip) + optlen)));
1527	}
1528}
1529
1530static void
1531print(u_char *buf, int cc, struct sockaddr_in *from)
1532{
1533	struct ip *ip;
1534	int hlen;
1535	char addr[INET_ADDRSTRLEN];
1536
1537	ip = (struct ip *) buf;
1538	hlen = ip->ip_hl << 2;
1539	cc -= hlen;
1540
1541	strlcpy(addr, inet_ntoa(from->sin_addr), sizeof(addr));
1542
1543	if (as_path)
1544		Printf(" [AS%u]", as_lookup(asn, addr, AF_INET));
1545
1546	if (nflag)
1547		Printf(" %s", addr);
1548	else
1549		Printf(" %s (%s)", inetname(from->sin_addr), addr);
1550
1551	if (verbose)
1552		Printf(" %d bytes to %s", cc, inet_ntoa (ip->ip_dst));
1553}
1554
1555static u_int16_t
1556in_cksum(u_int16_t *addr, int len)
1557{
1558
1559	return ~in_cksum2(0, addr, len);
1560}
1561
1562/*
1563 * Checksum routine for Internet Protocol family headers (C Version)
1564 */
1565static u_int16_t
1566in_cksum2(u_int16_t seed, u_int16_t *addr, int len)
1567{
1568	int nleft = len;
1569	u_int16_t *w = addr;
1570	union {
1571		u_int16_t w;
1572		u_int8_t b[2];
1573	} answer;
1574	int32_t sum = seed;
1575
1576	/*
1577	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
1578	 *  we add sequential 16 bit words to it, and at the end, fold
1579	 *  back all the carry bits from the top 16 bits into the lower
1580	 *  16 bits.
1581	 */
1582	while (nleft > 1)  {
1583		sum += *w++;
1584		nleft -= 2;
1585	}
1586
1587	/* mop up an odd byte, if necessary */
1588	if (nleft == 1) {
1589		answer.b[0] = *(u_char *)w;
1590		answer.b[1] = 0;
1591		sum += answer.w;
1592	}
1593
1594	/*
1595	 * add back carry outs from top 16 bits to low 16 bits
1596	 */
1597	sum = (sum >> 16) + (sum & 0xffff);	/* add hi 16 to low 16 */
1598	sum += (sum >> 16);			/* add carry */
1599	answer.w = sum;				/* truncate to 16 bits */
1600	return answer.w;
1601}
1602
1603/*
1604 * Subtract 2 timeval structs:  out = out - in.
1605 * Out is assumed to be >= in.
1606 */
1607static void
1608tvsub(struct timeval *out, struct timeval *in)
1609{
1610
1611	if ((out->tv_usec -= in->tv_usec) < 0)   {
1612		--out->tv_sec;
1613		out->tv_usec += 1000000;
1614	}
1615	out->tv_sec -= in->tv_sec;
1616}
1617
1618/*
1619 * Construct an Internet address representation.
1620 * If the nflag has been supplied, give
1621 * numeric value, otherwise try for symbolic name.
1622 */
1623static char *
1624inetname(struct in_addr in)
1625{
1626	char *cp;
1627	struct hostent *hp;
1628	static int first = 1;
1629	static char domain[MAXHOSTNAMELEN + 1], line[MAXHOSTNAMELEN + 1];
1630
1631	if (first && !nflag) {
1632
1633		first = 0;
1634		if (gethostname(domain, sizeof(domain) - 1) < 0)
1635 			domain[0] = '\0';
1636		else {
1637			cp = strchr(domain, '.');
1638			if (cp == NULL) {
1639				hp = gethostbyname(domain);
1640				if (hp != NULL)
1641					cp = strchr(hp->h_name, '.');
1642			}
1643			if (cp == NULL)
1644				domain[0] = '\0';
1645			else {
1646				++cp;
1647				(void)strlcpy(domain, cp, sizeof(domain));
1648			}
1649		}
1650	}
1651	if (!nflag && in.s_addr != INADDR_ANY) {
1652		hp = gethostbyaddr((char *)&in, sizeof(in), AF_INET);
1653		if (hp != NULL) {
1654			if ((cp = strchr(hp->h_name, '.')) != NULL &&
1655			    strcmp(cp + 1, domain) == 0)
1656				*cp = '\0';
1657			(void)strlcpy(line, hp->h_name, sizeof(line));
1658			return line;
1659		}
1660	}
1661	return inet_ntoa(in);
1662}
1663
1664static struct hostinfo *
1665gethostinfo(char *hname)
1666{
1667	int n;
1668	struct hostent *hp;
1669	struct hostinfo *hi;
1670	char **p;
1671	u_int32_t *ap;
1672	struct in_addr addr;
1673
1674	hi = calloc(1, sizeof(*hi));
1675	if (hi == NULL)
1676		err(1, "calloc");
1677	if (inet_aton(hname, &addr) != 0) {
1678		hi->name = strdup(hname);
1679		if (!hi->name)
1680			err(1, "strdup");
1681		hi->n = 1;
1682		hi->addrs = calloc(1, sizeof(hi->addrs[0]));
1683		if (hi->addrs == NULL)
1684			err(1, "calloc");
1685		hi->addrs[0] = addr.s_addr;
1686		return hi;
1687	}
1688
1689	hp = gethostbyname(hname);
1690	if (hp == NULL)
1691		errx(1, "unknown host %s", hname);
1692	if (hp->h_addrtype != AF_INET || hp->h_length != 4)
1693		errx(1, "bad host %s", hname);
1694	hi->name = strdup(hp->h_name);
1695	if (!hi->name)
1696		err(1, "strdup");
1697	for (n = 0, p = hp->h_addr_list; *p != NULL; ++n, ++p)
1698		continue;
1699	hi->n = n;
1700	hi->addrs = calloc(n, sizeof(hi->addrs[0]));
1701	if (hi->addrs == NULL)
1702		err(1, "calloc");
1703	for (ap = hi->addrs, p = hp->h_addr_list; *p != NULL; ++ap, ++p)
1704		memcpy(ap, *p, sizeof(*ap));
1705	return hi;
1706}
1707
1708static void
1709freehostinfo(struct hostinfo *hi)
1710{
1711	if (hi->name != NULL) {
1712		free(hi->name);
1713		hi->name = NULL;
1714	}
1715	free(hi->addrs);
1716	free(hi);
1717}
1718
1719static void
1720getaddr(u_int32_t *ap, char *hname)
1721{
1722	struct hostinfo *hi;
1723
1724	hi = gethostinfo(hname);
1725	*ap = hi->addrs[0];
1726	freehostinfo(hi);
1727}
1728
1729static void
1730setsin(struct sockaddr_in *sin, u_int32_t addr)
1731{
1732
1733	memset(sin, 0, sizeof(*sin));
1734#ifdef HAVE_SOCKADDR_SA_LEN
1735	sin->sin_len = sizeof(*sin);
1736#endif
1737	sin->sin_family = AF_INET;
1738	sin->sin_addr.s_addr = addr;
1739}
1740
1741/* String to value with optional min and max. Handles decimal and hex. */
1742static int
1743str2val(const char *str, const char *what, int mi, int ma)
1744{
1745	const char *cp;
1746	long val;
1747	char *ep;
1748
1749	errno = 0;
1750	ep = NULL;
1751	if (str[0] == '0' && (str[1] == 'x' || str[1] == 'X')) {
1752		cp = str + 2;
1753		val = strtol(cp, &ep, 16);
1754	} else
1755		val = strtol(str, &ep, 10);
1756	if (errno || str[0] == '\0' || *ep != '\0')
1757		errx(1, "\"%s\" bad value for %s", str, what);
1758	if (val < mi && mi >= 0) {
1759		if (mi == 0)
1760			errx(1, "%s must be >= %d", what, mi);
1761		else
1762			errx(1, "%s must be > %d", what, mi - 1);
1763	}
1764	if (val > ma && ma >= 0)
1765		errx(1, "%s must be <= %d", what, ma);
1766	return (int)val;
1767}
1768
1769__dead void
1770usage(void)
1771{
1772	extern char version[];
1773
1774	Fprintf(stderr, "Version %s\n", version);
1775	Fprintf(stderr, "Usage: %s [-adDFPIlMnrvx] [-g gateway] [-i iface] \
1776[-f first_ttl]\n\t[-m max_ttl] [-p port] [-q nqueries] [-s src_addr] [-t tos]\n\t\
1777[-w waittime] [-z pausemsecs] [-A as_server] host [packetlen]\n",
1778	    getprogname());
1779	exit(1);
1780}
1781
1782/*
1783 * Received ICMP unreachable (fragmentation required and DF set).
1784 * If the ICMP error was from a "new" router, it'll contain the next-hop
1785 * MTU that we should use next.  Otherwise we'll just keep going in the
1786 * mtus[] table, trying until we hit a valid MTU.
1787 */
1788
1789
1790void
1791frag_err()
1792{
1793        int i;
1794
1795        if (nextmtu > 0 && nextmtu < packlen) {
1796                Printf("\nfragmentation required and DF set, "
1797		     "next hop MTU = %d\n",
1798                        nextmtu);
1799                packlen = nextmtu;
1800                for (i = 0; mtus[i] > 0; i++) {
1801                        if (mtus[i] < nextmtu) {
1802                                mtuptr = &mtus[i];    /* next one to try */
1803                                break;
1804                        }
1805                }
1806        } else {
1807                Printf("\nfragmentation required and DF set. ");
1808		if (nextmtu)
1809			Printf("\nBogus next hop MTU = %d > last MTU = %d. ",
1810			    nextmtu, packlen);
1811                packlen = *mtuptr++;
1812		Printf("Trying new MTU = %d\n", packlen);
1813        }
1814	resize_packet();
1815}
1816
1817int
1818find_local_ip(struct sockaddr_in *from, struct sockaddr_in *to)
1819{
1820	int sock;
1821	struct sockaddr_in help;
1822	socklen_t help_len;
1823
1824	sock = prog_socket(AF_INET, SOCK_DGRAM, 0);
1825	if (sock < 0) return 0;
1826
1827	help.sin_family = AF_INET;
1828	/*
1829	 * At this point the port number doesn't matter
1830	 * since it only has to be greater than zero.
1831	 */
1832	help.sin_port = 42;
1833	help.sin_addr.s_addr = to->sin_addr.s_addr;
1834	if (prog_connect(sock, (struct sockaddr *)&help, sizeof(help)) < 0) {
1835		(void)prog_close(sock);
1836		return 0;
1837	}
1838
1839	help_len = sizeof(help);
1840	if (prog_getsockname(sock, (struct sockaddr *)&help, &help_len) < 0 ||
1841	    help_len != sizeof(help) ||
1842	    help.sin_addr.s_addr == INADDR_ANY) {
1843		(void)prog_close(sock);
1844		return 0;
1845	}
1846
1847	(void)prog_close(sock);
1848	setsin(from, help.sin_addr.s_addr);
1849	return 1;
1850}
1851
1852#ifdef IPSEC
1853#ifdef IPSEC_POLICY_IPSEC
1854static int
1855setpolicy(int so, const char *policy)
1856{
1857	char *buf;
1858
1859	buf = ipsec_set_policy(policy, strlen(policy));
1860	if (buf == NULL) {
1861		warnx("%s", ipsec_strerror());
1862		return -1;
1863	}
1864	(void)prog_setsockopt(so, IPPROTO_IP, IP_IPSEC_POLICY,
1865		buf, ipsec_get_policylen(buf));
1866
1867	free(buf);
1868
1869	return 0;
1870}
1871#endif
1872#endif
1873
1874