1/*
2 * ntp_io.c - input/output routines for ntpd.	The socket-opening code
3 *		   was shamelessly stolen from ntpd.
4 */
5
6#ifdef HAVE_CONFIG_H
7# include <config.h>
8#endif
9
10#include <stdio.h>
11#include <signal.h>
12#ifdef HAVE_FNMATCH_H
13# include <fnmatch.h>
14# if !defined(FNM_CASEFOLD) && defined(FNM_IGNORECASE)
15#  define FNM_CASEFOLD FNM_IGNORECASE
16# endif
17#endif
18#ifdef HAVE_SYS_PARAM_H
19# include <sys/param.h>
20#endif
21#ifdef HAVE_SYS_IOCTL_H
22# include <sys/ioctl.h>
23#endif
24#ifdef HAVE_SYS_SOCKIO_H	/* UXPV: SIOC* #defines (Frank Vance <fvance@waii.com>) */
25# include <sys/sockio.h>
26#endif
27#ifdef HAVE_SYS_UIO_H
28# include <sys/uio.h>
29#endif
30
31#include "ntp_machine.h"
32#include "ntpd.h"
33#include "ntp_io.h"
34#include "iosignal.h"
35#include "ntp_lists.h"
36#include "ntp_refclock.h"
37#include "ntp_stdlib.h"
38#include "ntp_worker.h"
39#include "ntp_request.h"
40#include "ntp_assert.h"
41#include "timevalops.h"
42#include "timespecops.h"
43#include "ntpd-opts.h"
44#include "safecast.h"
45
46/* Don't include ISC's version of IPv6 variables and structures */
47#define ISC_IPV6_H 1
48#include <isc/mem.h>
49#include <isc/interfaceiter.h>
50#include <isc/netaddr.h>
51#include <isc/result.h>
52#include <isc/sockaddr.h>
53
54#ifdef SIM
55#include "ntpsim.h"
56#endif
57
58#ifdef HAS_ROUTING_SOCKET
59# include <net/route.h>
60# ifdef HAVE_RTNETLINK
61#  include <linux/rtnetlink.h>
62# endif
63#endif
64
65/*
66 * setsockopt does not always have the same arg declaration
67 * across all platforms. If it's not defined we make it empty
68 */
69
70#ifndef SETSOCKOPT_ARG_CAST
71#define SETSOCKOPT_ARG_CAST
72#endif
73
74extern int listen_to_virtual_ips;
75
76#ifndef IPTOS_DSCP_EF
77#define IPTOS_DSCP_EF 0xb8
78#endif
79int qos = IPTOS_DSCP_EF;	/* QoS RFC3246 */
80
81#ifdef LEAP_SMEAR
82/* TODO burnicki: This should be moved to ntp_timer.c, but if we do so
83 * we get a linker error. Since we're running out of time before the leap
84 * second occurs, we let it here where it just works.
85 */
86int leap_smear_intv;
87#endif
88
89/*
90 * NIC rule entry
91 */
92typedef struct nic_rule_tag nic_rule;
93
94struct nic_rule_tag {
95	nic_rule *	next;
96	nic_rule_action	action;
97	nic_rule_match	match_type;
98	char *		if_name;
99	sockaddr_u	addr;
100	int		prefixlen;
101};
102
103/*
104 * NIC rule listhead.  Entries are added at the head so that the first
105 * match in the list is the last matching rule specified.
106 */
107nic_rule *nic_rule_list;
108
109
110#if defined(SO_BINTIME) && defined(SCM_BINTIME) && defined(CMSG_FIRSTHDR)
111#  define HAVE_PACKET_TIMESTAMP
112#  define HAVE_BINTIME
113#  ifdef BINTIME_CTLMSGBUF_SIZE
114#   define CMSG_BUFSIZE BINTIME_CTLMSGBUF_SIZE
115#  else
116#   define CMSG_BUFSIZE  1536 /* moderate default */
117#  endif
118#elif defined(SO_TIMESTAMPNS) && defined(SCM_TIMESTAMPNS) && defined(CMSG_FIRSTHDR)
119#  define HAVE_PACKET_TIMESTAMP
120#  define HAVE_TIMESTAMPNS
121#  ifdef TIMESTAMPNS_CTLMSGBUF_SIZE
122#   define CMSG_BUFSIZE TIMESTAMPNS_CTLMSGBUF_SIZE
123#  else
124#   define CMSG_BUFSIZE  1536 /* moderate default */
125#  endif
126#elif defined(SO_TIMESTAMP) && defined(SCM_TIMESTAMP) && defined(CMSG_FIRSTHDR)
127#  define HAVE_PACKET_TIMESTAMP
128#  define HAVE_TIMESTAMP
129#  ifdef TIMESTAMP_CTLMSGBUF_SIZE
130#   define CMSG_BUFSIZE TIMESTAMP_CTLMSGBUF_SIZE
131#  else
132#   define CMSG_BUFSIZE  1536 /* moderate default */
133#  endif
134#else
135/* fill in for old/other timestamp interfaces */
136#endif
137
138#if defined(SYS_WINNT)
139#include "win32_io.h"
140#include <isc/win32os.h>
141#endif
142
143/*
144 * We do asynchronous input using the SIGIO facility.  A number of
145 * recvbuf buffers are preallocated for input.	In the signal
146 * handler we poll to see which sockets are ready and read the
147 * packets from them into the recvbuf's along with a time stamp and
148 * an indication of the source host and the interface it was received
149 * through.  This allows us to get as accurate receive time stamps
150 * as possible independent of other processing going on.
151 *
152 * We watch the number of recvbufs available to the signal handler
153 * and allocate more when this number drops below the low water
154 * mark.  If the signal handler should run out of buffers in the
155 * interim it will drop incoming frames, the idea being that it is
156 * better to drop a packet than to be inaccurate.
157 */
158
159
160/*
161 * Other statistics of possible interest
162 */
163volatile u_long packets_dropped;	/* total number of packets dropped on reception */
164volatile u_long packets_ignored;	/* packets received on wild card interface */
165volatile u_long packets_received;	/* total number of packets received */
166	 u_long packets_sent;		/* total number of packets sent */
167	 u_long packets_notsent;	/* total number of packets which couldn't be sent */
168
169volatile u_long handler_calls;	/* number of calls to interrupt handler */
170volatile u_long handler_pkts;	/* number of pkts received by handler */
171u_long io_timereset;		/* time counters were reset */
172
173/*
174 * Interface stuff
175 */
176endpt *	any_interface;		/* wildcard ipv4 interface */
177endpt *	any6_interface;		/* wildcard ipv6 interface */
178endpt *	loopback_interface;	/* loopback ipv4 interface */
179
180isc_boolean_t broadcast_client_enabled;	/* is broadcast client enabled */
181u_int sys_ifnum;			/* next .ifnum to assign */
182int ninterfaces;			/* Total number of interfaces */
183
184int disable_dynamic_updates;		/* scan interfaces once only */
185
186#ifdef REFCLOCK
187/*
188 * Refclock stuff.	We keep a chain of structures with data concerning
189 * the guys we are doing I/O for.
190 */
191static	struct refclockio *refio;
192#endif /* REFCLOCK */
193
194/*
195 * File descriptor masks etc. for call to select
196 * Not needed for I/O Completion Ports or anything outside this file
197 */
198static fd_set activefds;
199static int maxactivefd;
200
201/*
202 * bit alternating value to detect verified interfaces during an update cycle
203 */
204static  u_short		sys_interphase = 0;
205
206static endpt *	new_interface(endpt *);
207static void	add_interface(endpt *);
208static int	update_interfaces(u_short, interface_receiver_t,
209				  void *);
210static void	remove_interface(endpt *);
211static endpt *	create_interface(u_short, endpt *);
212
213static int	is_wildcard_addr	(const sockaddr_u *);
214
215/*
216 * Multicast functions
217 */
218static	isc_boolean_t	addr_ismulticast	(sockaddr_u *);
219static	isc_boolean_t	is_anycast		(sockaddr_u *,
220						 const char *);
221
222/*
223 * Not all platforms support multicast
224 */
225#ifdef MCAST
226static	isc_boolean_t	socket_multicast_enable	(endpt *, sockaddr_u *);
227static	isc_boolean_t	socket_multicast_disable(endpt *, sockaddr_u *);
228#endif
229
230#ifdef DEBUG
231static void interface_dump	(const endpt *);
232static void sockaddr_dump	(const sockaddr_u *);
233static void print_interface	(const endpt *, const char *, const char *);
234#define DPRINT_INTERFACE(level, args) do { if (debug >= (level)) { print_interface args; } } while (0)
235#else
236#define DPRINT_INTERFACE(level, args) do {} while (0)
237#endif
238
239typedef struct vsock vsock_t;
240enum desc_type { FD_TYPE_SOCKET, FD_TYPE_FILE };
241
242struct vsock {
243	vsock_t	*	link;
244	SOCKET		fd;
245	enum desc_type	type;
246};
247
248vsock_t	*fd_list;
249
250#if !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET)
251/*
252 * async notification processing (e. g. routing sockets)
253 */
254/*
255 * support for receiving data on fd that is not a refclock or a socket
256 * like e. g. routing sockets
257 */
258struct asyncio_reader {
259	struct asyncio_reader *link;		    /* the list this is being kept in */
260	SOCKET fd;				    /* fd to be read */
261	void  *data;				    /* possibly local data */
262	void (*receiver)(struct asyncio_reader *);  /* input handler */
263};
264
265struct asyncio_reader *asyncio_reader_list;
266
267static void delete_asyncio_reader (struct asyncio_reader *);
268static struct asyncio_reader *new_asyncio_reader (void);
269static void add_asyncio_reader (struct asyncio_reader *, enum desc_type);
270static void remove_asyncio_reader (struct asyncio_reader *);
271
272#endif /* !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET) */
273
274static void init_async_notifications (void);
275
276static	int	addr_eqprefix	(const sockaddr_u *, const sockaddr_u *,
277				 int);
278static int	addr_samesubnet	(const sockaddr_u *, const sockaddr_u *,
279				 const sockaddr_u *, const sockaddr_u *);
280static	int	create_sockets	(u_short);
281static	SOCKET	open_socket	(sockaddr_u *, int, int, endpt *);
282static	void	set_reuseaddr	(int);
283static	isc_boolean_t	socket_broadcast_enable	 (struct interface *, SOCKET, sockaddr_u *);
284
285#if !defined(HAVE_IO_COMPLETION_PORT) && !defined(HAVE_SIGNALED_IO)
286static	char *	fdbits		(int, const fd_set *);
287#endif
288#ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
289static	isc_boolean_t	socket_broadcast_disable (struct interface *, sockaddr_u *);
290#endif
291
292typedef struct remaddr remaddr_t;
293
294struct remaddr {
295	remaddr_t *		link;
296	sockaddr_u		addr;
297	endpt *			ep;
298};
299
300remaddr_t *	remoteaddr_list;
301endpt *		ep_list;	/* complete endpt list */
302endpt *		mc4_list;	/* IPv4 mcast-capable unicast endpts */
303endpt *		mc6_list;	/* IPv6 mcast-capable unicast endpts */
304
305static endpt *	wildipv4;
306static endpt *	wildipv6;
307
308#ifdef SYS_WINNT
309int accept_wildcard_if_for_winnt;
310#else
311const int accept_wildcard_if_for_winnt = FALSE;
312#endif
313
314static void	add_fd_to_list		(SOCKET, enum desc_type);
315static endpt *	find_addr_in_list	(sockaddr_u *);
316static endpt *	find_flagged_addr_in_list(sockaddr_u *, u_int32);
317static void	delete_addr_from_list	(sockaddr_u *);
318static void	delete_interface_from_list(endpt *);
319static void	close_and_delete_fd_from_list(SOCKET);
320static void	add_addr_to_list	(sockaddr_u *, endpt *);
321static void	create_wildcards	(u_short);
322static endpt *	findlocalinterface	(sockaddr_u *, int, int);
323static endpt *	findclosestinterface	(sockaddr_u *, int);
324#ifdef DEBUG
325static const char *	action_text	(nic_rule_action);
326#endif
327static nic_rule_action	interface_action(char *, sockaddr_u *, u_int32);
328static void		convert_isc_if	(isc_interface_t *,
329					 endpt *, u_short);
330static void		calc_addr_distance(sockaddr_u *,
331					   const sockaddr_u *,
332					   const sockaddr_u *);
333static int		cmp_addr_distance(const sockaddr_u *,
334					  const sockaddr_u *);
335
336/*
337 * Routines to read the ntp packets
338 */
339#if !defined(HAVE_IO_COMPLETION_PORT)
340static inline int	read_network_packet	(SOCKET, struct interface *, l_fp);
341static void		ntpd_addremove_io_fd	(int, int, int);
342static void 		input_handler_scan	(const l_fp*, const fd_set*);
343static int/*BOOL*/	sanitize_fdset		(int errc);
344#ifdef REFCLOCK
345static inline int	read_refclock_packet	(SOCKET, struct refclockio *, l_fp);
346#endif
347#ifdef HAVE_SIGNALED_IO
348static void 		input_handler		(l_fp*);
349#endif
350#endif
351
352
353#ifndef HAVE_IO_COMPLETION_PORT
354void
355maintain_activefds(
356	int fd,
357	int closing
358	)
359{
360	int i;
361
362	if (fd < 0 || fd >= FD_SETSIZE) {
363		msyslog(LOG_ERR,
364			"Too many sockets in use, FD_SETSIZE %d exceeded by fd %d",
365			FD_SETSIZE, fd);
366		exit(1);
367	}
368
369	if (!closing) {
370		FD_SET(fd, &activefds);
371		maxactivefd = max(fd, maxactivefd);
372	} else {
373		FD_CLR(fd, &activefds);
374		if (maxactivefd && fd == maxactivefd) {
375			for (i = maxactivefd - 1; i >= 0; i--)
376				if (FD_ISSET(i, &activefds)) {
377					maxactivefd = i;
378					break;
379				}
380			INSIST(fd != maxactivefd);
381		}
382	}
383}
384#endif	/* !HAVE_IO_COMPLETION_PORT */
385
386
387#ifdef DEBUG_TIMING
388/*
389 * collect timing information for various processing
390 * paths. currently we only pass them on to the file
391 * for later processing. this could also do histogram
392 * based analysis in other to reduce the load (and skew)
393 * dur to the file output
394 */
395void
396collect_timing(struct recvbuf *rb, const char *tag, int count, l_fp *dts)
397{
398	char buf[256];
399
400	snprintf(buf, sizeof(buf), "%s %d %s %s",
401		 (rb != NULL)
402		     ? ((rb->dstadr != NULL)
403			    ? stoa(&rb->recv_srcadr)
404			    : "-REFCLOCK-")
405		     : "-",
406		 count, lfptoa(dts, 9), tag);
407	record_timing_stats(buf);
408}
409#endif
410
411/*
412 * About dynamic interfaces, sockets, reception and more...
413 *
414 * the code solves following tasks:
415 *
416 *   - keep a current list of active interfaces in order
417 *     to bind to to the interface address on NTP_PORT so that
418 *     all wild and specific bindings for NTP_PORT are taken by ntpd
419 *     to avoid other daemons messing with the time or sockets.
420 *   - all interfaces keep a list of peers that are referencing
421 *     the interface in order to quickly re-assign the peers to
422 *     new interface in case an interface is deleted (=> gone from system or
423 *     down)
424 *   - have a preconfigured socket ready with the right local address
425 *     for transmission and reception
426 *   - have an address list for all destination addresses used within ntpd
427 *     to find the "right" preconfigured socket.
428 *   - facilitate updating the internal interface list with respect to
429 *     the current kernel state
430 *
431 * special issues:
432 *
433 *   - mapping of multicast addresses to the interface affected is not always
434 *     one to one - especially on hosts with multiple interfaces
435 *     the code here currently allocates a separate interface entry for those
436 *     multicast addresses
437 *     iff it is able to bind to a *new* socket with the multicast address (flags |= MCASTIF)
438 *     in case of failure the multicast address is bound to an existing interface.
439 *   - on some systems it is perfectly legal to assign the same address to
440 *     multiple interfaces. Therefore this code does not keep a list of interfaces
441 *     but a list of interfaces that represent a unique address as determined by the kernel
442 *     by the procedure in findlocalinterface. Thus it is perfectly legal to see only
443 *     one representative of a group of real interfaces if they share the same address.
444 *
445 * Frank Kardel 20050910
446 */
447
448/*
449 * init_io - initialize I/O module.
450 */
451void
452init_io(void)
453{
454	/* Init buffer free list and stat counters */
455	init_recvbuff(RECV_INIT);
456	/* update interface every 5 minutes as default */
457	interface_interval = 300;
458
459#ifdef WORK_PIPE
460	addremove_io_fd = &ntpd_addremove_io_fd;
461#endif
462
463#if defined(SYS_WINNT)
464	init_io_completion_port();
465#elif defined(HAVE_SIGNALED_IO)
466	(void) set_signal(input_handler);
467#endif
468}
469
470
471static void
472ntpd_addremove_io_fd(
473	int	fd,
474	int	is_pipe,
475	int	remove_it
476	)
477{
478	UNUSED_ARG(is_pipe);
479
480#ifdef HAVE_SIGNALED_IO
481	if (!remove_it)
482		init_socket_sig(fd);
483#endif /* not HAVE_SIGNALED_IO */
484
485	maintain_activefds(fd, remove_it);
486}
487
488
489/*
490 * io_open_sockets - call socket creation routine
491 */
492void
493io_open_sockets(void)
494{
495	static int already_opened;
496
497	if (already_opened || HAVE_OPT( SAVECONFIGQUIT ))
498		return;
499
500	already_opened = 1;
501
502	/*
503	 * Create the sockets
504	 */
505	BLOCKIO();
506	create_sockets(NTP_PORT);
507	UNBLOCKIO();
508
509	init_async_notifications();
510
511	DPRINTF(3, ("io_open_sockets: maxactivefd %d\n", maxactivefd));
512}
513
514
515#ifdef DEBUG
516/*
517 * function to dump the contents of the interface structure
518 * for debugging use only.
519 */
520void
521interface_dump(const endpt *itf)
522{
523	printf("Dumping interface: %p\n", itf);
524	printf("fd = %d\n", itf->fd);
525	printf("bfd = %d\n", itf->bfd);
526	printf("sin = %s,\n", stoa(&itf->sin));
527	sockaddr_dump(&itf->sin);
528	printf("bcast = %s,\n", stoa(&itf->bcast));
529	sockaddr_dump(&itf->bcast);
530	printf("mask = %s,\n", stoa(&itf->mask));
531	sockaddr_dump(&itf->mask);
532	printf("name = %s\n", itf->name);
533	printf("flags = 0x%08x\n", itf->flags);
534	printf("last_ttl = %d\n", itf->last_ttl);
535	printf("addr_refid = %08x\n", itf->addr_refid);
536	printf("num_mcast = %d\n", itf->num_mcast);
537	printf("received = %ld\n", itf->received);
538	printf("sent = %ld\n", itf->sent);
539	printf("notsent = %ld\n", itf->notsent);
540	printf("ifindex = %u\n", itf->ifindex);
541	printf("peercnt = %u\n", itf->peercnt);
542	printf("phase = %u\n", itf->phase);
543}
544
545/*
546 * sockaddr_dump - hex dump the start of a sockaddr_u
547 */
548static void
549sockaddr_dump(const sockaddr_u *psau)
550{
551	/* Limit the size of the sockaddr_in6 hex dump */
552	const int maxsize = min(32, sizeof(psau->sa6));
553	const u_char *	cp;
554	int		i;
555
556	/* XXX: Should we limit maxsize based on psau->saX.sin_family? */
557	cp = (const void *)&psau->sa6;
558
559	for(i = 0; i < maxsize; i++) {
560		printf("%02x", *cp++);
561		if (!((i + 1) % 4))
562			printf(" ");
563	}
564	printf("\n");
565}
566
567/*
568 * print_interface - helper to output debug information
569 */
570static void
571print_interface(const endpt *iface, const char *pfx, const char *sfx)
572{
573	printf("%sinterface #%d: fd=%d, bfd=%d, name=%s, flags=0x%x, ifindex=%u, sin=%s",
574	       pfx,
575	       iface->ifnum,
576	       iface->fd,
577	       iface->bfd,
578	       iface->name,
579	       iface->flags,
580	       iface->ifindex,
581	       stoa(&iface->sin));
582	if (AF_INET == iface->family) {
583		if (iface->flags & INT_BROADCAST)
584			printf(", bcast=%s", stoa(&iface->bcast));
585		printf(", mask=%s", stoa(&iface->mask));
586	}
587	printf(", %s:%s",
588	       (iface->ignore_packets)
589		   ? "Disabled"
590		   : "Enabled",
591	       sfx);
592	if (debug > 4)	/* in-depth debugging only */
593		interface_dump(iface);
594}
595#endif
596
597#if !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET)
598/*
599 * create an asyncio_reader structure
600 */
601static struct asyncio_reader *
602new_asyncio_reader(void)
603{
604	struct asyncio_reader *reader;
605
606	reader = emalloc_zero(sizeof(*reader));
607	reader->fd = INVALID_SOCKET;
608
609	return reader;
610}
611
612/*
613 * delete a reader
614 */
615static void
616delete_asyncio_reader(
617	struct asyncio_reader *reader
618	)
619{
620	free(reader);
621}
622
623/*
624 * add asynchio_reader
625 */
626static void
627add_asyncio_reader(
628	struct asyncio_reader *	reader,
629	enum desc_type		type)
630{
631	LINK_SLIST(asyncio_reader_list, reader, link);
632	add_fd_to_list(reader->fd, type);
633}
634
635/*
636 * remove asynchio_reader
637 */
638static void
639remove_asyncio_reader(
640	struct asyncio_reader *reader
641	)
642{
643	struct asyncio_reader *unlinked;
644
645	UNLINK_SLIST(unlinked, asyncio_reader_list, reader, link,
646	    struct asyncio_reader);
647
648	if (reader->fd != INVALID_SOCKET)
649		close_and_delete_fd_from_list(reader->fd);
650
651	reader->fd = INVALID_SOCKET;
652}
653#endif /* !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET) */
654
655
656/* compare two sockaddr prefixes */
657static int
658addr_eqprefix(
659	const sockaddr_u *	a,
660	const sockaddr_u *	b,
661	int			prefixlen
662	)
663{
664	isc_netaddr_t		isc_a;
665	isc_netaddr_t		isc_b;
666	isc_sockaddr_t		isc_sa;
667
668	ZERO(isc_sa);
669	memcpy(&isc_sa.type, a, min(sizeof(isc_sa.type), sizeof(*a)));
670	isc_netaddr_fromsockaddr(&isc_a, &isc_sa);
671
672	ZERO(isc_sa);
673	memcpy(&isc_sa.type, b, min(sizeof(isc_sa.type), sizeof(*b)));
674	isc_netaddr_fromsockaddr(&isc_b, &isc_sa);
675
676	return (int)isc_netaddr_eqprefix(&isc_a, &isc_b,
677					 (u_int)prefixlen);
678}
679
680
681static int
682addr_samesubnet(
683	const sockaddr_u *	a,
684	const sockaddr_u *	a_mask,
685	const sockaddr_u *	b,
686	const sockaddr_u *	b_mask
687	)
688{
689	const u_int32 *	pa;
690	const u_int32 *	pa_limit;
691	const u_int32 *	pb;
692	const u_int32 *	pm;
693	size_t		loops;
694
695	REQUIRE(AF(a) == AF(a_mask));
696	REQUIRE(AF(b) == AF(b_mask));
697	/*
698	 * With address and mask families verified to match, comparing
699	 * the masks also validates the address's families match.
700	 */
701	if (!SOCK_EQ(a_mask, b_mask))
702		return FALSE;
703
704	if (IS_IPV6(a)) {
705		loops = sizeof(NSRCADR6(a)) / sizeof(*pa);
706		pa = (const void *)&NSRCADR6(a);
707		pb = (const void *)&NSRCADR6(b);
708		pm = (const void *)&NSRCADR6(a_mask);
709	} else {
710		loops = sizeof(NSRCADR(a)) / sizeof(*pa);
711		pa = (const void *)&NSRCADR(a);
712		pb = (const void *)&NSRCADR(b);
713		pm = (const void *)&NSRCADR(a_mask);
714	}
715	for (pa_limit = pa + loops; pa < pa_limit; pa++, pb++, pm++)
716		if ((*pa & *pm) != (*pb & *pm))
717			return FALSE;
718
719	return TRUE;
720}
721
722
723/*
724 * interface list enumerator - visitor pattern
725 */
726void
727interface_enumerate(
728	interface_receiver_t	receiver,
729	void *			data
730	)
731{
732	interface_info_t ifi;
733
734	ifi.action = IFS_EXISTS;
735	for (ifi.ep = ep_list; ifi.ep != NULL; ifi.ep = ifi.ep->elink)
736		(*receiver)(data, &ifi);
737}
738
739/*
740 * do standard initialization of interface structure
741 */
742static void
743init_interface(
744	endpt *ep
745	)
746{
747	ZERO(*ep);
748	ep->fd = INVALID_SOCKET;
749	ep->bfd = INVALID_SOCKET;
750	ep->phase = sys_interphase;
751}
752
753
754/*
755 * create new interface structure initialize from
756 * template structure or via standard initialization
757 * function
758 */
759static struct interface *
760new_interface(
761	struct interface *interface
762	)
763{
764	struct interface *	iface;
765
766	iface = emalloc(sizeof(*iface));
767
768	if (NULL == interface)
769		init_interface(iface);
770	else				/* use the template */
771		memcpy(iface, interface, sizeof(*iface));
772
773	/* count every new instance of an interface in the system */
774	iface->ifnum = sys_ifnum++;
775	iface->starttime = current_time;
776
777#   ifdef HAVE_IO_COMPLETION_PORT
778	if (!io_completion_port_add_interface(iface)) {
779		msyslog(LOG_EMERG, "cannot register interface with IO engine -- will exit now");
780		exit(1);
781	}
782#   endif
783	return iface;
784}
785
786
787/*
788 * return interface storage into free memory pool
789 */
790static void
791delete_interface(
792	endpt *ep
793	)
794{
795#    ifdef HAVE_IO_COMPLETION_PORT
796	io_completion_port_remove_interface(ep);
797#    endif
798	free(ep);
799}
800
801
802/*
803 * link interface into list of known interfaces
804 */
805static void
806add_interface(
807	endpt *	ep
808	)
809{
810	endpt **	pmclisthead;
811	endpt *		scan;
812	endpt *		scan_next;
813	endpt *		unlinked;
814	sockaddr_u *	addr;
815	int		ep_local;
816	int		scan_local;
817	int		same_subnet;
818	int		ep_univ_iid;	/* iface ID from MAC address */
819	int		scan_univ_iid;	/* see RFC 4291 */
820	int		ep_privacy;	/* random local iface ID */
821	int		scan_privacy;	/* see RFC 4941 */
822	int		rc;
823
824	/* Calculate the refid */
825	ep->addr_refid = addr2refid(&ep->sin);
826	/* link at tail so ntpdc -c ifstats index increases each row */
827	LINK_TAIL_SLIST(ep_list, ep, elink, endpt);
828	ninterfaces++;
829#ifdef MCAST
830	/* the rest is for enabled multicast-capable addresses only */
831	if (ep->ignore_packets || !(INT_MULTICAST & ep->flags) ||
832	    INT_LOOPBACK & ep->flags)
833		return;
834# ifndef INCLUDE_IPV6_MULTICAST_SUPPORT
835	if (AF_INET6 == ep->family)
836		return;
837# endif
838	pmclisthead = (AF_INET == ep->family)
839			 ? &mc4_list
840			 : &mc6_list;
841
842	if (AF_INET6 == ep->family) {
843		ep_local =
844		    IN6_IS_ADDR_LINKLOCAL(PSOCK_ADDR6(&ep->sin)) ||
845		    IN6_IS_ADDR_SITELOCAL(PSOCK_ADDR6(&ep->sin));
846		ep_univ_iid = IS_IID_UNIV(&ep->sin);
847		ep_privacy = !!(INT_PRIVACY & ep->flags);
848	} else {
849		ep_local = FALSE;
850		ep_univ_iid = FALSE;
851		ep_privacy = FALSE;
852	}
853	DPRINTF(4, ("add_interface mcast-capable %s%s%s%s\n",
854		    stoa(&ep->sin),
855		    (ep_local) ? " link/scope-local" : "",
856		    (ep_univ_iid) ? " univ-IID" : "",
857		    (ep_privacy) ? " privacy" : ""));
858	/*
859	 * If we have multiple local addresses on the same network
860	 * interface, and some are link- or site-local, do not multicast
861	 * out from the link-/site-local addresses by default, to avoid
862	 * duplicate manycastclient associations between v6 peers using
863	 * link-local and global addresses.  link-local can still be
864	 * chosen using "nic ignore myv6globalprefix::/64".
865	 * Similarly, if we have multiple global addresses from the same
866	 * prefix on the same network interface, multicast from one,
867	 * preferring EUI-64, then static, then least RFC 4941 privacy
868	 * addresses.
869	 */
870	for (scan = *pmclisthead; scan != NULL; scan = scan_next) {
871		scan_next = scan->mclink;
872		if (ep->family != scan->family)
873			continue;
874		if (strcmp(ep->name, scan->name))
875			continue;
876		same_subnet = addr_samesubnet(&ep->sin, &ep->mask,
877					      &scan->sin, &scan->mask);
878		if (AF_INET6 == ep->family) {
879			addr = &scan->sin;
880			scan_local =
881			    IN6_IS_ADDR_LINKLOCAL(PSOCK_ADDR6(addr)) ||
882			    IN6_IS_ADDR_SITELOCAL(PSOCK_ADDR6(addr));
883			scan_univ_iid = IS_IID_UNIV(addr);
884			scan_privacy = !!(INT_PRIVACY & scan->flags);
885		} else {
886			scan_local = FALSE;
887			scan_univ_iid = FALSE;
888			scan_privacy = FALSE;
889		}
890		DPRINTF(4, ("add_interface mcast-capable scan %s%s%s%s\n",
891			    stoa(&scan->sin),
892			    (scan_local) ? " link/scope-local" : "",
893			    (scan_univ_iid) ? " univ-IID" : "",
894			    (scan_privacy) ? " privacy" : ""));
895		if ((ep_local && !scan_local) || (same_subnet &&
896		    ((ep_privacy && !scan_privacy) ||
897		     (!ep_univ_iid && scan_univ_iid)))) {
898			DPRINTF(4, ("did not add %s to %s of IPv6 multicast-capable list which already has %s\n",
899				stoa(&ep->sin),
900				(ep_local)
901				    ? "tail"
902				    : "head",
903				stoa(&scan->sin)));
904			return;
905		}
906		if ((scan_local && !ep_local) || (same_subnet &&
907		    ((scan_privacy && !ep_privacy) ||
908		     (!scan_univ_iid && ep_univ_iid)))) {
909			UNLINK_SLIST(unlinked, *pmclisthead,
910				     scan, mclink, endpt);
911			DPRINTF(4, ("%s %s from IPv6 multicast-capable list to add %s\n",
912				(unlinked != scan)
913				    ? "Failed to remove"
914				    : "removed",
915				stoa(&scan->sin), stoa(&ep->sin)));
916		}
917	}
918	/*
919	 * Add link/site local at the tail of the multicast-
920	 * capable unicast interfaces list, so that ntpd will
921	 * send from global addresses before link-/site-local
922	 * ones.
923	 */
924	if (ep_local)
925		LINK_TAIL_SLIST(*pmclisthead, ep, mclink, endpt);
926	else
927		LINK_SLIST(*pmclisthead, ep, mclink);
928	DPRINTF(4, ("added %s to %s of IPv%s multicast-capable unicast local address list\n",
929		stoa(&ep->sin),
930		(ep_local)
931		    ? "tail"
932		    : "head",
933		(AF_INET == ep->family)
934		    ? "4"
935		    : "6"));
936
937	if (INVALID_SOCKET == ep->fd)
938		return;
939
940	/*
941	 * select the local address from which to send to multicast.
942	 */
943	switch (AF(&ep->sin)) {
944
945	case AF_INET :
946		rc = setsockopt(ep->fd, IPPROTO_IP,
947				IP_MULTICAST_IF,
948				(void *)&NSRCADR(&ep->sin),
949				sizeof(NSRCADR(&ep->sin)));
950		if (rc)
951			msyslog(LOG_ERR,
952				"setsockopt IP_MULTICAST_IF %s fails: %m",
953				stoa(&ep->sin));
954		break;
955
956# ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
957	case AF_INET6 :
958		rc = setsockopt(ep->fd, IPPROTO_IPV6,
959				 IPV6_MULTICAST_IF,
960				 (void *)&ep->ifindex,
961				 sizeof(ep->ifindex));
962		/* do not complain if bound addr scope is ifindex */
963		if (rc && ep->ifindex != SCOPE(&ep->sin))
964			msyslog(LOG_ERR,
965				"setsockopt IPV6_MULTICAST_IF %u for %s fails: %m",
966				ep->ifindex, stoa(&ep->sin));
967		break;
968# endif
969	}
970#endif	/* MCAST */
971}
972
973
974/*
975 * remove interface from known interface list and clean up
976 * associated resources
977 */
978static void
979remove_interface(
980	endpt *	ep
981	)
982{
983	endpt *		unlinked;
984	endpt **	pmclisthead;
985	sockaddr_u	resmask;
986
987	UNLINK_SLIST(unlinked, ep_list, ep, elink, endpt);
988	if (!ep->ignore_packets && INT_MULTICAST & ep->flags) {
989		pmclisthead = (AF_INET == ep->family)
990				 ? &mc4_list
991				 : &mc6_list;
992		UNLINK_SLIST(unlinked, *pmclisthead, ep, mclink, endpt);
993		DPRINTF(4, ("%s %s IPv%s multicast-capable unicast local address list\n",
994			stoa(&ep->sin),
995			(unlinked != NULL)
996			    ? "removed from"
997			    : "not found on",
998			(AF_INET == ep->family)
999			    ? "4"
1000			    : "6"));
1001	}
1002	delete_interface_from_list(ep);
1003
1004	if (ep->fd != INVALID_SOCKET) {
1005		msyslog(LOG_INFO,
1006			"Deleting interface #%d %s, %s#%d, interface stats: received=%ld, sent=%ld, dropped=%ld, active_time=%ld secs",
1007			ep->ifnum,
1008			ep->name,
1009			stoa(&ep->sin),
1010			SRCPORT(&ep->sin),
1011			ep->received,
1012			ep->sent,
1013			ep->notsent,
1014			current_time - ep->starttime);
1015#	    ifdef HAVE_IO_COMPLETION_PORT
1016		io_completion_port_remove_socket(ep->fd, ep);
1017#	    endif
1018		close_and_delete_fd_from_list(ep->fd);
1019		ep->fd = INVALID_SOCKET;
1020	}
1021
1022	if (ep->bfd != INVALID_SOCKET) {
1023		msyslog(LOG_INFO,
1024			"stop listening for broadcasts to %s on interface #%d %s",
1025			stoa(&ep->bcast), ep->ifnum, ep->name);
1026#	    ifdef HAVE_IO_COMPLETION_PORT
1027		io_completion_port_remove_socket(ep->bfd, ep);
1028#	    endif
1029		close_and_delete_fd_from_list(ep->bfd);
1030		ep->bfd = INVALID_SOCKET;
1031	}
1032#   ifdef HAVE_IO_COMPLETION_PORT
1033	io_completion_port_remove_interface(ep);
1034#   endif
1035
1036	ninterfaces--;
1037	mon_clearinterface(ep);
1038
1039	/* remove restrict interface entry */
1040	SET_HOSTMASK(&resmask, AF(&ep->sin));
1041	hack_restrict(RESTRICT_REMOVEIF, &ep->sin, &resmask,
1042		      RESM_NTPONLY | RESM_INTERFACE, RES_IGNORE, 0);
1043}
1044
1045
1046static void
1047log_listen_address(
1048	endpt *	ep
1049	)
1050{
1051	msyslog(LOG_INFO, "%s on %d %s %s",
1052		(ep->ignore_packets)
1053		    ? "Listen and drop"
1054		    : "Listen normally",
1055		ep->ifnum,
1056		ep->name,
1057		sptoa(&ep->sin));
1058}
1059
1060
1061static void
1062create_wildcards(
1063	u_short	port
1064	)
1065{
1066	int			v4wild;
1067#ifdef INCLUDE_IPV6_SUPPORT
1068	int			v6wild;
1069#endif
1070	sockaddr_u		wildaddr;
1071	nic_rule_action		action;
1072	struct interface *	wildif;
1073
1074	/*
1075	 * silence "potentially uninitialized" warnings from VC9
1076	 * failing to follow the logic.  Ideally action could remain
1077	 * uninitialized, and the memset be the first statement under
1078	 * the first if (v4wild).
1079	 */
1080	action = ACTION_LISTEN;
1081	ZERO(wildaddr);
1082
1083#ifdef INCLUDE_IPV6_SUPPORT
1084	/*
1085	 * create pseudo-interface with wildcard IPv6 address
1086	 */
1087	v6wild = ipv6_works;
1088	if (v6wild) {
1089		/* set wildaddr to the v6 wildcard address :: */
1090		ZERO(wildaddr);
1091		AF(&wildaddr) = AF_INET6;
1092		SET_ADDR6N(&wildaddr, in6addr_any);
1093		SET_PORT(&wildaddr, port);
1094		SET_SCOPE(&wildaddr, 0);
1095
1096		/* check for interface/nic rules affecting the wildcard */
1097		action = interface_action(NULL, &wildaddr, 0);
1098		v6wild = (ACTION_IGNORE != action);
1099	}
1100	if (v6wild) {
1101		wildif = new_interface(NULL);
1102
1103		strlcpy(wildif->name, "v6wildcard", sizeof(wildif->name));
1104		memcpy(&wildif->sin, &wildaddr, sizeof(wildif->sin));
1105		wildif->family = AF_INET6;
1106		AF(&wildif->mask) = AF_INET6;
1107		SET_ONESMASK(&wildif->mask);
1108
1109		wildif->flags = INT_UP | INT_WILDCARD;
1110		wildif->ignore_packets = (ACTION_DROP == action);
1111
1112		wildif->fd = open_socket(&wildif->sin, 0, 1, wildif);
1113
1114		if (wildif->fd != INVALID_SOCKET) {
1115			wildipv6 = wildif;
1116			any6_interface = wildif;
1117			add_addr_to_list(&wildif->sin, wildif);
1118			add_interface(wildif);
1119			log_listen_address(wildif);
1120		} else {
1121			msyslog(LOG_ERR,
1122				"unable to bind to wildcard address %s - another process may be running - EXITING",
1123				stoa(&wildif->sin));
1124			exit(1);
1125		}
1126		DPRINT_INTERFACE(2, (wildif, "created ", "\n"));
1127	}
1128#endif
1129
1130	/*
1131	 * create pseudo-interface with wildcard IPv4 address
1132	 */
1133	v4wild = ipv4_works;
1134	if (v4wild) {
1135		/* set wildaddr to the v4 wildcard address 0.0.0.0 */
1136		AF(&wildaddr) = AF_INET;
1137		SET_ADDR4N(&wildaddr, INADDR_ANY);
1138		SET_PORT(&wildaddr, port);
1139
1140		/* check for interface/nic rules affecting the wildcard */
1141		action = interface_action(NULL, &wildaddr, 0);
1142		v4wild = (ACTION_IGNORE != action);
1143	}
1144	if (v4wild) {
1145		wildif = new_interface(NULL);
1146
1147		strlcpy(wildif->name, "v4wildcard", sizeof(wildif->name));
1148		memcpy(&wildif->sin, &wildaddr, sizeof(wildif->sin));
1149		wildif->family = AF_INET;
1150		AF(&wildif->mask) = AF_INET;
1151		SET_ONESMASK(&wildif->mask);
1152
1153		wildif->flags = INT_BROADCAST | INT_UP | INT_WILDCARD;
1154		wildif->ignore_packets = (ACTION_DROP == action);
1155#if defined(MCAST)
1156		/*
1157		 * enable multicast reception on the broadcast socket
1158		 */
1159		AF(&wildif->bcast) = AF_INET;
1160		SET_ADDR4N(&wildif->bcast, INADDR_ANY);
1161		SET_PORT(&wildif->bcast, port);
1162#endif /* MCAST */
1163		wildif->fd = open_socket(&wildif->sin, 0, 1, wildif);
1164
1165		if (wildif->fd != INVALID_SOCKET) {
1166			wildipv4 = wildif;
1167			any_interface = wildif;
1168
1169			add_addr_to_list(&wildif->sin, wildif);
1170			add_interface(wildif);
1171			log_listen_address(wildif);
1172		} else {
1173			msyslog(LOG_ERR,
1174				"unable to bind to wildcard address %s - another process may be running - EXITING",
1175				stoa(&wildif->sin));
1176			exit(1);
1177		}
1178		DPRINT_INTERFACE(2, (wildif, "created ", "\n"));
1179	}
1180}
1181
1182
1183/*
1184 * add_nic_rule() -- insert a rule entry at the head of nic_rule_list.
1185 */
1186void
1187add_nic_rule(
1188	nic_rule_match	match_type,
1189	const char *	if_name,	/* interface name or numeric address */
1190	int		prefixlen,
1191	nic_rule_action	action
1192	)
1193{
1194	nic_rule *	rule;
1195	isc_boolean_t	is_ip;
1196
1197	rule = emalloc_zero(sizeof(*rule));
1198	rule->match_type = match_type;
1199	rule->prefixlen = prefixlen;
1200	rule->action = action;
1201
1202	if (MATCH_IFNAME == match_type) {
1203		REQUIRE(NULL != if_name);
1204		rule->if_name = estrdup(if_name);
1205	} else if (MATCH_IFADDR == match_type) {
1206		REQUIRE(NULL != if_name);
1207		/* set rule->addr */
1208		is_ip = is_ip_address(if_name, AF_UNSPEC, &rule->addr);
1209		REQUIRE(is_ip);
1210	} else
1211		REQUIRE(NULL == if_name);
1212
1213	LINK_SLIST(nic_rule_list, rule, next);
1214}
1215
1216
1217#ifdef DEBUG
1218static const char *
1219action_text(
1220	nic_rule_action	action
1221	)
1222{
1223	const char *t;
1224
1225	switch (action) {
1226
1227	default:
1228		t = "ERROR";	/* quiet uninit warning */
1229		DPRINTF(1, ("fatal: unknown nic_rule_action %d\n",
1230			    action));
1231		ENSURE(0);
1232		break;
1233
1234	case ACTION_LISTEN:
1235		t = "listen";
1236		break;
1237
1238	case ACTION_IGNORE:
1239		t = "ignore";
1240		break;
1241
1242	case ACTION_DROP:
1243		t = "drop";
1244		break;
1245	}
1246
1247	return t;
1248}
1249#endif	/* DEBUG */
1250
1251
1252static nic_rule_action
1253interface_action(
1254	char *		if_name,
1255	sockaddr_u *	if_addr,
1256	u_int32		if_flags
1257	)
1258{
1259	nic_rule *	rule;
1260	int		isloopback;
1261	int		iswildcard;
1262
1263	DPRINTF(4, ("interface_action: interface %s ",
1264		    (if_name != NULL) ? if_name : "wildcard"));
1265
1266	iswildcard = is_wildcard_addr(if_addr);
1267	isloopback = !!(INT_LOOPBACK & if_flags);
1268
1269	/*
1270	 * Find any matching NIC rule from --interface / -I or ntp.conf
1271	 * interface/nic rules.
1272	 */
1273	for (rule = nic_rule_list; rule != NULL; rule = rule->next) {
1274
1275		switch (rule->match_type) {
1276
1277		case MATCH_ALL:
1278			/* loopback and wildcard excluded from "all" */
1279			if (isloopback || iswildcard)
1280				break;
1281			DPRINTF(4, ("nic all %s\n",
1282			    action_text(rule->action)));
1283			return rule->action;
1284
1285		case MATCH_IPV4:
1286			if (IS_IPV4(if_addr)) {
1287				DPRINTF(4, ("nic ipv4 %s\n",
1288				    action_text(rule->action)));
1289				return rule->action;
1290			}
1291			break;
1292
1293		case MATCH_IPV6:
1294			if (IS_IPV6(if_addr)) {
1295				DPRINTF(4, ("nic ipv6 %s\n",
1296				    action_text(rule->action)));
1297				return rule->action;
1298			}
1299			break;
1300
1301		case MATCH_WILDCARD:
1302			if (iswildcard) {
1303				DPRINTF(4, ("nic wildcard %s\n",
1304				    action_text(rule->action)));
1305				return rule->action;
1306			}
1307			break;
1308
1309		case MATCH_IFADDR:
1310			if (rule->prefixlen != -1) {
1311				if (addr_eqprefix(if_addr, &rule->addr,
1312						  rule->prefixlen)) {
1313
1314					DPRINTF(4, ("subnet address match - %s\n",
1315					    action_text(rule->action)));
1316					return rule->action;
1317				}
1318			} else
1319				if (SOCK_EQ(if_addr, &rule->addr)) {
1320
1321					DPRINTF(4, ("address match - %s\n",
1322					    action_text(rule->action)));
1323					return rule->action;
1324				}
1325			break;
1326
1327		case MATCH_IFNAME:
1328			if (if_name != NULL
1329#if defined(HAVE_FNMATCH) && defined(FNM_CASEFOLD)
1330			    && !fnmatch(rule->if_name, if_name, FNM_CASEFOLD)
1331#else
1332			    && !strcasecmp(if_name, rule->if_name)
1333#endif
1334			    ) {
1335
1336				DPRINTF(4, ("interface name match - %s\n",
1337				    action_text(rule->action)));
1338				return rule->action;
1339			}
1340			break;
1341		}
1342	}
1343
1344	/*
1345	 * Unless explicitly disabled such as with "nic ignore ::1"
1346	 * listen on loopback addresses.  Since ntpq and ntpdc query
1347	 * "localhost" by default, which typically resolves to ::1 and
1348	 * 127.0.0.1, it's useful to default to listening on both.
1349	 */
1350	if (isloopback) {
1351		DPRINTF(4, ("default loopback listen\n"));
1352		return ACTION_LISTEN;
1353	}
1354
1355	/*
1356	 * Treat wildcard addresses specially.  If there is no explicit
1357	 * "nic ... wildcard" or "nic ... 0.0.0.0" or "nic ... ::" rule
1358	 * default to drop.
1359	 */
1360	if (iswildcard) {
1361		DPRINTF(4, ("default wildcard drop\n"));
1362		return ACTION_DROP;
1363	}
1364
1365	/*
1366	 * Check for "virtual IP" (colon in the interface name) after
1367	 * the rules so that "ntpd --interface eth0:1 -novirtualips"
1368	 * does indeed listen on eth0:1's addresses.
1369	 */
1370	if (!listen_to_virtual_ips && if_name != NULL
1371	    && (strchr(if_name, ':') != NULL)) {
1372
1373		DPRINTF(4, ("virtual ip - ignore\n"));
1374		return ACTION_IGNORE;
1375	}
1376
1377	/*
1378	 * If there are no --interface/-I command-line options and no
1379	 * interface/nic rules in ntp.conf, the default action is to
1380	 * listen.  In the presence of rules from either, the default
1381	 * is to ignore.  This implements ntpd's traditional listen-
1382	 * every default with no interface listen configuration, and
1383	 * ensures a single -I eth0 or "nic listen eth0" means do not
1384	 * listen on any other addresses.
1385	 */
1386	if (NULL == nic_rule_list) {
1387		DPRINTF(4, ("default listen\n"));
1388		return ACTION_LISTEN;
1389	}
1390
1391	DPRINTF(4, ("implicit ignore\n"));
1392	return ACTION_IGNORE;
1393}
1394
1395
1396static void
1397convert_isc_if(
1398	isc_interface_t *isc_if,
1399	endpt *itf,
1400	u_short port
1401	)
1402{
1403	const u_char v6loop[16] = {0, 0, 0, 0, 0, 0, 0, 0,
1404				   0, 0, 0, 0, 0, 0, 0, 1};
1405
1406	strlcpy(itf->name, isc_if->name, sizeof(itf->name));
1407	itf->ifindex = isc_if->ifindex;
1408	itf->family = (u_short)isc_if->af;
1409	AF(&itf->sin) = itf->family;
1410	AF(&itf->mask) = itf->family;
1411	AF(&itf->bcast) = itf->family;
1412	SET_PORT(&itf->sin, port);
1413	SET_PORT(&itf->mask, port);
1414	SET_PORT(&itf->bcast, port);
1415
1416	if (IS_IPV4(&itf->sin)) {
1417		NSRCADR(&itf->sin) = isc_if->address.type.in.s_addr;
1418		NSRCADR(&itf->mask) = isc_if->netmask.type.in.s_addr;
1419
1420		if (isc_if->flags & INTERFACE_F_BROADCAST) {
1421			itf->flags |= INT_BROADCAST;
1422			NSRCADR(&itf->bcast) =
1423			    isc_if->broadcast.type.in.s_addr;
1424		}
1425	}
1426#ifdef INCLUDE_IPV6_SUPPORT
1427	else if (IS_IPV6(&itf->sin)) {
1428		SET_ADDR6N(&itf->sin, isc_if->address.type.in6);
1429		SET_ADDR6N(&itf->mask, isc_if->netmask.type.in6);
1430
1431		SET_SCOPE(&itf->sin, isc_if->address.zone);
1432	}
1433#endif /* INCLUDE_IPV6_SUPPORT */
1434
1435
1436	/* Process the rest of the flags */
1437
1438	itf->flags |=
1439		  ((INTERFACE_F_UP & isc_if->flags)
1440			? INT_UP : 0)
1441		| ((INTERFACE_F_LOOPBACK & isc_if->flags)
1442			? INT_LOOPBACK : 0)
1443		| ((INTERFACE_F_POINTTOPOINT & isc_if->flags)
1444			? INT_PPP : 0)
1445		| ((INTERFACE_F_MULTICAST & isc_if->flags)
1446			? INT_MULTICAST : 0)
1447		| ((INTERFACE_F_PRIVACY & isc_if->flags)
1448			? INT_PRIVACY : 0)
1449		;
1450
1451	/*
1452	 * Clear the loopback flag if the address is not localhost.
1453	 * http://bugs.ntp.org/1683
1454	 */
1455	if (INT_LOOPBACK & itf->flags) {
1456		if (AF_INET == itf->family) {
1457			if (127 != (SRCADR(&itf->sin) >> 24))
1458				itf->flags &= ~INT_LOOPBACK;
1459		} else {
1460			if (memcmp(v6loop, NSRCADR6(&itf->sin),
1461				   sizeof(NSRCADR6(&itf->sin))))
1462				itf->flags &= ~INT_LOOPBACK;
1463		}
1464	}
1465}
1466
1467
1468/*
1469 * refresh_interface
1470 *
1471 * some OSes have been observed to keep
1472 * cached routes even when more specific routes
1473 * become available.
1474 * this can be mitigated by re-binding
1475 * the socket.
1476 */
1477static int
1478refresh_interface(
1479	struct interface * interface
1480	)
1481{
1482#ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
1483	if (interface->fd != INVALID_SOCKET) {
1484		int bcast = (interface->flags & INT_BCASTXMIT) != 0;
1485		/* as we forcibly close() the socket remove the
1486		   broadcast permission indication */
1487		if (bcast)
1488			socket_broadcast_disable(interface, &interface->sin);
1489
1490		close_and_delete_fd_from_list(interface->fd);
1491
1492		/* create new socket picking up a new first hop binding
1493		   at connect() time */
1494		interface->fd = open_socket(&interface->sin,
1495					    bcast, 0, interface);
1496		 /*
1497		  * reset TTL indication so TTL is is set again
1498		  * next time around
1499		  */
1500		interface->last_ttl = 0;
1501		return (interface->fd != INVALID_SOCKET);
1502	} else
1503		return 0;	/* invalid sockets are not refreshable */
1504#else /* !OS_MISSES_SPECIFIC_ROUTE_UPDATES */
1505	return (interface->fd != INVALID_SOCKET);
1506#endif /* !OS_MISSES_SPECIFIC_ROUTE_UPDATES */
1507}
1508
1509/*
1510 * interface_update - externally callable update function
1511 */
1512void
1513interface_update(
1514	interface_receiver_t	receiver,
1515	void *			data)
1516{
1517	int new_interface_found;
1518
1519	if (disable_dynamic_updates)
1520		return;
1521
1522	BLOCKIO();
1523	new_interface_found = update_interfaces(NTP_PORT, receiver, data);
1524	UNBLOCKIO();
1525
1526	if (!new_interface_found)
1527		return;
1528
1529#ifdef DEBUG
1530	msyslog(LOG_DEBUG, "new interface(s) found: waking up resolver");
1531#endif
1532	interrupt_worker_sleep();
1533}
1534
1535
1536/*
1537 * sau_from_netaddr() - convert network address on-wire formats.
1538 * Convert from libisc's isc_netaddr_t to NTP's sockaddr_u
1539 */
1540void
1541sau_from_netaddr(
1542	sockaddr_u *psau,
1543	const isc_netaddr_t *pna
1544	)
1545{
1546	ZERO_SOCK(psau);
1547	AF(psau) = (u_short)pna->family;
1548	switch (pna->family) {
1549
1550	case AF_INET:
1551		memcpy(&psau->sa4.sin_addr, &pna->type.in,
1552		       sizeof(psau->sa4.sin_addr));
1553		break;
1554
1555	case AF_INET6:
1556		memcpy(&psau->sa6.sin6_addr, &pna->type.in6,
1557		       sizeof(psau->sa6.sin6_addr));
1558		break;
1559	}
1560}
1561
1562
1563static int
1564is_wildcard_addr(
1565	const sockaddr_u *psau
1566	)
1567{
1568	if (IS_IPV4(psau) && !NSRCADR(psau))
1569		return 1;
1570
1571#ifdef INCLUDE_IPV6_SUPPORT
1572	if (IS_IPV6(psau) && S_ADDR6_EQ(psau, &in6addr_any))
1573		return 1;
1574#endif
1575
1576	return 0;
1577}
1578
1579
1580#ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
1581/*
1582 * enable/disable re-use of wildcard address socket
1583 */
1584static void
1585set_wildcard_reuse(
1586	u_short	family,
1587	int	on
1588	)
1589{
1590	struct interface *any;
1591	SOCKET fd = INVALID_SOCKET;
1592
1593	any = ANY_INTERFACE_BYFAM(family);
1594	if (any != NULL)
1595		fd = any->fd;
1596
1597	if (fd != INVALID_SOCKET) {
1598		if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
1599			       (char *)&on, sizeof(on)))
1600			msyslog(LOG_ERR,
1601				"set_wildcard_reuse: setsockopt(SO_REUSEADDR, %s) failed: %m",
1602				on ? "on" : "off");
1603
1604		DPRINTF(4, ("set SO_REUSEADDR to %s on %s\n",
1605			    on ? "on" : "off",
1606			    stoa(&any->sin)));
1607	}
1608}
1609#endif /* OS_NEEDS_REUSEADDR_FOR_IFADDRBIND */
1610
1611
1612static isc_boolean_t
1613check_flags6(
1614	sockaddr_u *psau,
1615	const char *name,
1616	u_int32 flags6
1617	)
1618{
1619#if defined(INCLUDE_IPV6_SUPPORT) && defined(SIOCGIFAFLAG_IN6)
1620	struct in6_ifreq ifr6;
1621	int fd;
1622
1623	if (psau->sa.sa_family != AF_INET6)
1624		return ISC_FALSE;
1625	if ((fd = socket(AF_INET6, SOCK_DGRAM, 0)) < 0)
1626		return ISC_FALSE;
1627	ZERO(ifr6);
1628	memcpy(&ifr6.ifr_addr, &psau->sa6, sizeof(ifr6.ifr_addr));
1629	strlcpy(ifr6.ifr_name, name, sizeof(ifr6.ifr_name));
1630	if (ioctl(fd, SIOCGIFAFLAG_IN6, &ifr6) < 0) {
1631		close(fd);
1632		return ISC_FALSE;
1633	}
1634	close(fd);
1635	if ((ifr6.ifr_ifru.ifru_flags6 & flags6) != 0)
1636		return ISC_TRUE;
1637#endif	/* INCLUDE_IPV6_SUPPORT && SIOCGIFAFLAG_IN6 */
1638	return ISC_FALSE;
1639}
1640
1641static isc_boolean_t
1642is_anycast(
1643	sockaddr_u *psau,
1644	const char *name
1645	)
1646{
1647#ifdef IN6_IFF_ANYCAST
1648	return check_flags6(psau, name, IN6_IFF_ANYCAST);
1649#else
1650	return ISC_FALSE;
1651#endif
1652}
1653
1654static isc_boolean_t
1655is_valid(
1656	sockaddr_u *psau,
1657	const char *name
1658	)
1659{
1660	u_int32 flags6;
1661
1662	flags6 = 0;
1663#ifdef IN6_IFF_DEPARTED
1664	flags6 |= IN6_IFF_DEPARTED;
1665#endif
1666#ifdef IN6_IFF_DETACHED
1667	flags6 |= IN6_IFF_DETACHED;
1668#endif
1669#ifdef IN6_IFF_TENTATIVE
1670	flags6 |= IN6_IFF_TENTATIVE;
1671#endif
1672	return check_flags6(psau, name, flags6) ? ISC_FALSE : ISC_TRUE;
1673}
1674
1675/*
1676 * update_interface strategy
1677 *
1678 * toggle configuration phase
1679 *
1680 * Phase 1:
1681 * forall currently existing interfaces
1682 *   if address is known:
1683 *	drop socket - rebind again
1684 *
1685 *   if address is NOT known:
1686 *	attempt to create a new interface entry
1687 *
1688 * Phase 2:
1689 * forall currently known non MCAST and WILDCARD interfaces
1690 *   if interface does not match configuration phase (not seen in phase 1):
1691 *	remove interface from known interface list
1692 *	forall peers associated with this interface
1693 *         disconnect peer from this interface
1694 *
1695 * Phase 3:
1696 *   attempt to re-assign interfaces to peers
1697 *
1698 */
1699
1700static int
1701update_interfaces(
1702	u_short			port,
1703	interface_receiver_t	receiver,
1704	void *			data
1705	)
1706{
1707	isc_mem_t *		mctx = (void *)-1;
1708	interface_info_t	ifi;
1709	isc_interfaceiter_t *	iter;
1710	isc_result_t		result;
1711	isc_interface_t		isc_if;
1712	int			new_interface_found;
1713	unsigned int		family;
1714	endpt			enumep;
1715	endpt *			ep;
1716	endpt *			next_ep;
1717
1718	DPRINTF(3, ("update_interfaces(%d)\n", port));
1719
1720	/*
1721	 * phase one - scan interfaces
1722	 * - create those that are not found
1723	 * - update those that are found
1724	 */
1725
1726	new_interface_found = FALSE;
1727	iter = NULL;
1728	result = isc_interfaceiter_create(mctx, &iter);
1729
1730	if (result != ISC_R_SUCCESS)
1731		return 0;
1732
1733	/*
1734	 * Toggle system interface scan phase to find untouched
1735	 * interfaces to be deleted.
1736	 */
1737	sys_interphase ^= 0x1;
1738
1739	for (result = isc_interfaceiter_first(iter);
1740	     ISC_R_SUCCESS == result;
1741	     result = isc_interfaceiter_next(iter)) {
1742
1743		result = isc_interfaceiter_current(iter, &isc_if);
1744
1745		if (result != ISC_R_SUCCESS)
1746			break;
1747
1748		/* See if we have a valid family to use */
1749		family = isc_if.address.family;
1750		if (AF_INET != family && AF_INET6 != family)
1751			continue;
1752		if (AF_INET == family && !ipv4_works)
1753			continue;
1754		if (AF_INET6 == family && !ipv6_works)
1755			continue;
1756
1757		/* create prototype */
1758		init_interface(&enumep);
1759
1760		convert_isc_if(&isc_if, &enumep, port);
1761
1762		DPRINT_INTERFACE(4, (&enumep, "examining ", "\n"));
1763
1764		/*
1765		 * Check if and how we are going to use the interface.
1766		 */
1767		switch (interface_action(enumep.name, &enumep.sin,
1768					 enumep.flags)) {
1769
1770		case ACTION_IGNORE:
1771			DPRINTF(4, ("ignoring interface %s (%s) - by nic rules\n",
1772				    enumep.name, stoa(&enumep.sin)));
1773			continue;
1774
1775		case ACTION_LISTEN:
1776			DPRINTF(4, ("listen interface %s (%s) - by nic rules\n",
1777				    enumep.name, stoa(&enumep.sin)));
1778			enumep.ignore_packets = ISC_FALSE;
1779			break;
1780
1781		case ACTION_DROP:
1782			DPRINTF(4, ("drop on interface %s (%s) - by nic rules\n",
1783				    enumep.name, stoa(&enumep.sin)));
1784			enumep.ignore_packets = ISC_TRUE;
1785			break;
1786		}
1787
1788		 /* interfaces must be UP to be usable */
1789		if (!(enumep.flags & INT_UP)) {
1790			DPRINTF(4, ("skipping interface %s (%s) - DOWN\n",
1791				    enumep.name, stoa(&enumep.sin)));
1792			continue;
1793		}
1794
1795		/*
1796		 * skip any interfaces UP and bound to a wildcard
1797		 * address - some dhcp clients produce that in the
1798		 * wild
1799		 */
1800		if (is_wildcard_addr(&enumep.sin))
1801			continue;
1802
1803		if (is_anycast(&enumep.sin, isc_if.name))
1804			continue;
1805
1806		/*
1807		 * skip any address that is an invalid state to be used
1808		 */
1809		if (!is_valid(&enumep.sin, isc_if.name))
1810			continue;
1811
1812		/*
1813		 * map to local *address* in order to map all duplicate
1814		 * interfaces to an endpt structure with the appropriate
1815		 * socket.  Our name space is (ip-address), NOT
1816		 * (interface name, ip-address).
1817		 */
1818		ep = getinterface(&enumep.sin, INT_WILDCARD);
1819
1820		if (ep != NULL && refresh_interface(ep)) {
1821			/*
1822			 * found existing and up to date interface -
1823			 * mark present.
1824			 */
1825			if (ep->phase != sys_interphase) {
1826				/*
1827				 * On a new round we reset the name so
1828				 * the interface name shows up again if
1829				 * this address is no longer shared.
1830				 * We reset ignore_packets from the
1831				 * new prototype to respect any runtime
1832				 * changes to the nic rules.
1833				 */
1834				strlcpy(ep->name, enumep.name,
1835					sizeof(ep->name));
1836				ep->ignore_packets =
1837					    enumep.ignore_packets;
1838			} else {
1839				/* name collision - rename interface */
1840				strlcpy(ep->name, "*multiple*",
1841					sizeof(ep->name));
1842			}
1843
1844			DPRINT_INTERFACE(4, (ep, "updating ",
1845					     " present\n"));
1846
1847			if (ep->ignore_packets !=
1848			    enumep.ignore_packets) {
1849				/*
1850				 * We have conflicting configurations
1851				 * for the interface address. This is
1852				 * caused by using -I <interfacename>
1853				 * for an interface that shares its
1854				 * address with other interfaces. We
1855				 * can not disambiguate incoming
1856				 * packets delivered to this socket
1857				 * without extra syscalls/features.
1858				 * These are not (commonly) available.
1859				 * Note this is a more unusual
1860				 * configuration where several
1861				 * interfaces share an address but
1862				 * filtering via interface name is
1863				 * attempted.  We resolve the
1864				 * configuration conflict by disabling
1865				 * the processing of received packets.
1866				 * This leads to no service on the
1867				 * interface address where the conflict
1868				 * occurs.
1869				 */
1870				msyslog(LOG_ERR,
1871					"WARNING: conflicting enable configuration for interfaces %s and %s for address %s - unsupported configuration - address DISABLED",
1872					enumep.name, ep->name,
1873					stoa(&enumep.sin));
1874
1875				ep->ignore_packets = ISC_TRUE;
1876			}
1877
1878			ep->phase = sys_interphase;
1879
1880			ifi.action = IFS_EXISTS;
1881			ifi.ep = ep;
1882			if (receiver != NULL)
1883				(*receiver)(data, &ifi);
1884		} else {
1885			/*
1886			 * This is new or refreshing failed - add to
1887			 * our interface list.  If refreshing failed we
1888			 * will delete the interface structure in phase
1889			 * 2 as the interface was not marked current.
1890			 * We can bind to the address as the refresh
1891			 * code already closed the offending socket
1892			 */
1893			ep = create_interface(port, &enumep);
1894
1895			if (ep != NULL) {
1896				ifi.action = IFS_CREATED;
1897				ifi.ep = ep;
1898				if (receiver != NULL)
1899					(*receiver)(data, &ifi);
1900
1901				new_interface_found = TRUE;
1902				DPRINT_INTERFACE(3,
1903					(ep, "updating ",
1904					 " new - created\n"));
1905			} else {
1906				DPRINT_INTERFACE(3,
1907					(&enumep, "updating ",
1908					 " new - creation FAILED"));
1909
1910				msyslog(LOG_INFO,
1911					"failed to init interface for address %s",
1912					stoa(&enumep.sin));
1913				continue;
1914			}
1915		}
1916	}
1917
1918	isc_interfaceiter_destroy(&iter);
1919
1920	/*
1921	 * phase 2 - delete gone interfaces - reassigning peers to
1922	 * other interfaces
1923	 */
1924	for (ep = ep_list; ep != NULL; ep = next_ep) {
1925		next_ep = ep->elink;
1926
1927		/*
1928		 * if phase does not match sys_phase this interface was
1929		 * not enumerated during the last interface scan - so it
1930		 * is gone and will be deleted here unless it did not
1931		 * originate from interface enumeration (INT_WILDCARD,
1932		 * INT_MCASTIF).
1933		 */
1934		if (((INT_WILDCARD | INT_MCASTIF) & ep->flags) ||
1935		    ep->phase == sys_interphase)
1936			continue;
1937
1938		DPRINT_INTERFACE(3, (ep, "updating ",
1939				     "GONE - deleting\n"));
1940		remove_interface(ep);
1941
1942		ifi.action = IFS_DELETED;
1943		ifi.ep = ep;
1944		if (receiver != NULL)
1945			(*receiver)(data, &ifi);
1946
1947		/* disconnect peers from deleted endpt. */
1948		while (ep->peers != NULL)
1949			set_peerdstadr(ep->peers, NULL);
1950
1951		/*
1952		 * update globals in case we lose
1953		 * a loopback interface
1954		 */
1955		if (ep == loopback_interface)
1956			loopback_interface = NULL;
1957
1958		delete_interface(ep);
1959	}
1960
1961	/*
1962	 * phase 3 - re-configure as the world has possibly changed
1963	 *
1964	 * never ever make this conditional again - it is needed to track
1965	 * routing updates. see bug #2506
1966	 */
1967	refresh_all_peerinterfaces();
1968
1969	if (broadcast_client_enabled)
1970		io_setbclient();
1971
1972	if (sys_bclient)
1973		io_setbclient();
1974
1975#ifdef MCAST
1976	/*
1977	 * Check multicast interfaces and try to join multicast groups if
1978         * not joined yet.
1979         */
1980	for (ep = ep_list; ep != NULL; ep = ep->elink) {
1981		remaddr_t *entry;
1982
1983		if (!(INT_MCASTIF & ep->flags) || (INT_MCASTOPEN & ep->flags))
1984			continue;
1985
1986		/* Find remote address that was linked to this interface */
1987		for (entry = remoteaddr_list;
1988		     entry != NULL;
1989		     entry = entry->link) {
1990			if (entry->ep == ep) {
1991				if (socket_multicast_enable(ep, &entry->addr)) {
1992					msyslog(LOG_INFO,
1993						"Joined %s socket to multicast group %s",
1994						stoa(&ep->sin),
1995						stoa(&entry->addr));
1996				}
1997				break;
1998			}
1999		}
2000	}
2001#endif /* MCAST */
2002
2003	return new_interface_found;
2004}
2005
2006
2007/*
2008 * create_sockets - create a socket for each interface plus a default
2009 *			socket for when we don't know where to send
2010 */
2011static int
2012create_sockets(
2013	u_short port
2014	)
2015{
2016#ifndef HAVE_IO_COMPLETION_PORT
2017	/*
2018	 * I/O Completion Ports don't care about the select and FD_SET
2019	 */
2020	maxactivefd = 0;
2021	FD_ZERO(&activefds);
2022#endif
2023
2024	DPRINTF(2, ("create_sockets(%d)\n", port));
2025
2026	create_wildcards(port);
2027
2028	update_interfaces(port, NULL, NULL);
2029
2030	/*
2031	 * Now that we have opened all the sockets, turn off the reuse
2032	 * flag for security.
2033	 */
2034	set_reuseaddr(0);
2035
2036	DPRINTF(2, ("create_sockets: Total interfaces = %d\n", ninterfaces));
2037
2038	return ninterfaces;
2039}
2040
2041/*
2042 * create_interface - create a new interface for a given prototype
2043 *		      binding the socket.
2044 */
2045static struct interface *
2046create_interface(
2047	u_short			port,
2048	struct interface *	protot
2049	)
2050{
2051	sockaddr_u	resmask;
2052	endpt *		iface;
2053#if defined(MCAST) && defined(MULTICAST_NONEWSOCKET)
2054	remaddr_t *	entry;
2055	remaddr_t *	next_entry;
2056#endif
2057	DPRINTF(2, ("create_interface(%s#%d)\n", stoa(&protot->sin),
2058		    port));
2059
2060	/* build an interface */
2061	iface = new_interface(protot);
2062
2063	/*
2064	 * create socket
2065	 */
2066	iface->fd = open_socket(&iface->sin, 0, 0, iface);
2067
2068	if (iface->fd != INVALID_SOCKET)
2069		log_listen_address(iface);
2070
2071	if ((INT_BROADCAST & iface->flags)
2072	    && iface->bfd != INVALID_SOCKET)
2073		msyslog(LOG_INFO, "Listening on broadcast address %s#%d",
2074			stoa((&iface->bcast)), port);
2075
2076	if (INVALID_SOCKET == iface->fd
2077	    && INVALID_SOCKET == iface->bfd) {
2078		msyslog(LOG_ERR, "unable to create socket on %s (%d) for %s#%d",
2079			iface->name,
2080			iface->ifnum,
2081			stoa((&iface->sin)),
2082			port);
2083		delete_interface(iface);
2084		return NULL;
2085	}
2086
2087	/*
2088	 * Blacklist our own addresses, no use talking to ourself
2089	 */
2090	SET_HOSTMASK(&resmask, AF(&iface->sin));
2091	hack_restrict(RESTRICT_FLAGS, &iface->sin, &resmask,
2092		      RESM_NTPONLY | RESM_INTERFACE, RES_IGNORE, 0);
2093
2094	/*
2095	 * set globals with the first found
2096	 * loopback interface of the appropriate class
2097	 */
2098	if (NULL == loopback_interface && AF_INET == iface->family
2099	    && (INT_LOOPBACK & iface->flags))
2100		loopback_interface = iface;
2101
2102	/*
2103	 * put into our interface list
2104	 */
2105	add_addr_to_list(&iface->sin, iface);
2106	add_interface(iface);
2107
2108#if defined(MCAST) && defined(MULTICAST_NONEWSOCKET)
2109	/*
2110	 * Join any previously-configured compatible multicast groups.
2111	 */
2112	if (INT_MULTICAST & iface->flags &&
2113	    !((INT_LOOPBACK | INT_WILDCARD) & iface->flags) &&
2114	    !iface->ignore_packets) {
2115		for (entry = remoteaddr_list;
2116		     entry != NULL;
2117		     entry = next_entry) {
2118			next_entry = entry->link;
2119			if (AF(&iface->sin) != AF(&entry->addr) ||
2120			    !IS_MCAST(&entry->addr))
2121				continue;
2122			if (socket_multicast_enable(iface,
2123						    &entry->addr))
2124				msyslog(LOG_INFO,
2125					"Joined %s socket to multicast group %s",
2126					stoa(&iface->sin),
2127					stoa(&entry->addr));
2128			else
2129				msyslog(LOG_ERR,
2130					"Failed to join %s socket to multicast group %s",
2131					stoa(&iface->sin),
2132					stoa(&entry->addr));
2133		}
2134	}
2135#endif	/* MCAST && MCAST_NONEWSOCKET */
2136
2137	DPRINT_INTERFACE(2, (iface, "created ", "\n"));
2138	return iface;
2139}
2140
2141
2142#ifdef SO_EXCLUSIVEADDRUSE
2143static void
2144set_excladdruse(
2145	SOCKET fd
2146	)
2147{
2148	int one = 1;
2149	int failed;
2150#ifdef SYS_WINNT
2151	DWORD err;
2152#endif
2153
2154	failed = setsockopt(fd, SOL_SOCKET, SO_EXCLUSIVEADDRUSE,
2155			    (char *)&one, sizeof(one));
2156
2157	if (!failed)
2158		return;
2159
2160#ifdef SYS_WINNT
2161	/*
2162	 * Prior to Windows XP setting SO_EXCLUSIVEADDRUSE can fail with
2163	 * error WSAINVAL depending on service pack level and whether
2164	 * the user account is in the Administrators group.  Do not
2165	 * complain if it fails that way on versions prior to XP (5.1).
2166	 */
2167	err = GetLastError();
2168
2169	if (isc_win32os_versioncheck(5, 1, 0, 0) < 0	/* < 5.1/XP */
2170	    && WSAEINVAL == err)
2171		return;
2172
2173	SetLastError(err);
2174#endif
2175	msyslog(LOG_ERR,
2176		"setsockopt(%d, SO_EXCLUSIVEADDRUSE, on): %m",
2177		(int)fd);
2178}
2179#endif  /* SO_EXCLUSIVEADDRUSE */
2180
2181
2182/*
2183 * set_reuseaddr() - set/clear REUSEADDR on all sockets
2184 *			NB possible hole - should we be doing this on broadcast
2185 *			fd's also?
2186 */
2187static void
2188set_reuseaddr(
2189	int flag
2190	)
2191{
2192#ifndef SO_EXCLUSIVEADDRUSE
2193	endpt *ep;
2194
2195	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2196		if (ep->flags & INT_WILDCARD)
2197			continue;
2198
2199		/*
2200		 * if ep->fd  is INVALID_SOCKET, we might have a adapter
2201		 * configured but not present
2202		 */
2203		DPRINTF(4, ("setting SO_REUSEADDR on %.16s@%s to %s\n",
2204			    ep->name, stoa(&ep->sin),
2205			    flag ? "on" : "off"));
2206
2207		if (ep->fd != INVALID_SOCKET) {
2208			if (setsockopt(ep->fd, SOL_SOCKET, SO_REUSEADDR,
2209				       (char *)&flag, sizeof(flag))) {
2210				msyslog(LOG_ERR, "set_reuseaddr: setsockopt(%s, SO_REUSEADDR, %s) failed: %m",
2211					stoa(&ep->sin), flag ? "on" : "off");
2212			}
2213		}
2214	}
2215#endif /* ! SO_EXCLUSIVEADDRUSE */
2216}
2217
2218/*
2219 * This is just a wrapper around an internal function so we can
2220 * make other changes as necessary later on
2221 */
2222void
2223enable_broadcast(
2224	struct interface *	iface,
2225	sockaddr_u *		baddr
2226	)
2227{
2228#ifdef OPEN_BCAST_SOCKET
2229	socket_broadcast_enable(iface, iface->fd, baddr);
2230#endif
2231}
2232
2233#ifdef OPEN_BCAST_SOCKET
2234/*
2235 * Enable a broadcast address to a given socket
2236 * The socket is in the ep_list all we need to do is enable
2237 * broadcasting. It is not this function's job to select the socket
2238 */
2239static isc_boolean_t
2240socket_broadcast_enable(
2241	struct interface *	iface,
2242	SOCKET			fd,
2243	sockaddr_u *		baddr
2244	)
2245{
2246#ifdef SO_BROADCAST
2247	int on = 1;
2248
2249	if (IS_IPV4(baddr)) {
2250		/* if this interface can support broadcast, set SO_BROADCAST */
2251		if (setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
2252			       (char *)&on, sizeof(on)))
2253			msyslog(LOG_ERR,
2254				"setsockopt(SO_BROADCAST) enable failure on address %s: %m",
2255				stoa(baddr));
2256		else
2257			DPRINTF(2, ("Broadcast enabled on socket %d for address %s\n",
2258				    fd, stoa(baddr)));
2259	}
2260	iface->flags |= INT_BCASTXMIT;
2261	return ISC_TRUE;
2262#else
2263	return ISC_FALSE;
2264#endif /* SO_BROADCAST */
2265}
2266
2267#ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
2268/*
2269 * Remove a broadcast address from a given socket
2270 * The socket is in the ep_list all we need to do is disable
2271 * broadcasting. It is not this function's job to select the socket
2272 */
2273static isc_boolean_t
2274socket_broadcast_disable(
2275	struct interface *	iface,
2276	sockaddr_u *		baddr
2277	)
2278{
2279#ifdef SO_BROADCAST
2280	int off = 0;	/* This seems to be OK as an int */
2281
2282	if (IS_IPV4(baddr) && setsockopt(iface->fd, SOL_SOCKET,
2283	    SO_BROADCAST, (char *)&off, sizeof(off)))
2284		msyslog(LOG_ERR,
2285			"setsockopt(SO_BROADCAST) disable failure on address %s: %m",
2286			stoa(baddr));
2287
2288	iface->flags &= ~INT_BCASTXMIT;
2289	return ISC_TRUE;
2290#else
2291	return ISC_FALSE;
2292#endif /* SO_BROADCAST */
2293}
2294#endif /* OS_MISSES_SPECIFIC_ROUTE_UPDATES */
2295
2296#endif /* OPEN_BCAST_SOCKET */
2297
2298/*
2299 * return the broadcast client flag value
2300 */
2301isc_boolean_t
2302get_broadcastclient_flag(void)
2303{
2304	return (broadcast_client_enabled);
2305}
2306
2307/*
2308 * Check to see if the address is a multicast address
2309 */
2310static isc_boolean_t
2311addr_ismulticast(
2312	sockaddr_u *maddr
2313	)
2314{
2315	isc_boolean_t result;
2316
2317#ifndef INCLUDE_IPV6_MULTICAST_SUPPORT
2318	/*
2319	 * If we don't have IPV6 support any IPV6 addr is not multicast
2320	 */
2321	if (IS_IPV6(maddr))
2322		result = ISC_FALSE;
2323	else
2324#endif
2325		result = IS_MCAST(maddr);
2326
2327	if (!result)
2328		DPRINTF(4, ("address %s is not multicast\n",
2329			    stoa(maddr)));
2330
2331	return result;
2332}
2333
2334/*
2335 * Multicast servers need to set the appropriate Multicast interface
2336 * socket option in order for it to know which interface to use for
2337 * send the multicast packet.
2338 */
2339void
2340enable_multicast_if(
2341	struct interface *	iface,
2342	sockaddr_u *		maddr
2343	)
2344{
2345#ifdef MCAST
2346#ifdef IP_MULTICAST_LOOP
2347	TYPEOF_IP_MULTICAST_LOOP off = 0;
2348#endif
2349#if defined(INCLUDE_IPV6_MULTICAST_SUPPORT) && defined(IPV6_MULTICAST_LOOP)
2350	u_int off6 = 0;
2351#endif
2352
2353	REQUIRE(AF(maddr) == AF(&iface->sin));
2354
2355	switch (AF(&iface->sin)) {
2356
2357	case AF_INET:
2358#ifdef IP_MULTICAST_LOOP
2359		/*
2360		 * Don't send back to itself, but allow failure to set
2361		 */
2362		if (setsockopt(iface->fd, IPPROTO_IP,
2363			       IP_MULTICAST_LOOP,
2364			       SETSOCKOPT_ARG_CAST &off,
2365			       sizeof(off))) {
2366
2367			msyslog(LOG_ERR,
2368				"setsockopt IP_MULTICAST_LOOP failed: %m on socket %d, addr %s for multicast address %s",
2369				iface->fd, stoa(&iface->sin),
2370				stoa(maddr));
2371		}
2372#endif
2373		break;
2374
2375	case AF_INET6:
2376#ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2377#ifdef IPV6_MULTICAST_LOOP
2378		/*
2379		 * Don't send back to itself, but allow failure to set
2380		 */
2381		if (setsockopt(iface->fd, IPPROTO_IPV6,
2382			       IPV6_MULTICAST_LOOP,
2383			       (char *) &off6, sizeof(off6))) {
2384
2385			msyslog(LOG_ERR,
2386				"setsockopt IPV6_MULTICAST_LOOP failed: %m on socket %d, addr %s for multicast address %s",
2387				iface->fd, stoa(&iface->sin),
2388				stoa(maddr));
2389		}
2390#endif
2391		break;
2392#else
2393		return;
2394#endif	/* INCLUDE_IPV6_MULTICAST_SUPPORT */
2395	}
2396	return;
2397#endif
2398}
2399
2400/*
2401 * Add a multicast address to a given socket
2402 * The socket is in the ep_list all we need to do is enable
2403 * multicasting. It is not this function's job to select the socket
2404 */
2405#if defined(MCAST)
2406static isc_boolean_t
2407socket_multicast_enable(
2408	endpt *		iface,
2409	sockaddr_u *	maddr
2410	)
2411{
2412	struct ip_mreq		mreq;
2413# ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2414	struct ipv6_mreq	mreq6;
2415# endif
2416	switch (AF(maddr)) {
2417
2418	case AF_INET:
2419		ZERO(mreq);
2420		mreq.imr_multiaddr = SOCK_ADDR4(maddr);
2421		mreq.imr_interface.s_addr = htonl(INADDR_ANY);
2422		if (setsockopt(iface->fd,
2423			       IPPROTO_IP,
2424			       IP_ADD_MEMBERSHIP,
2425			       (char *)&mreq,
2426			       sizeof(mreq))) {
2427			DPRINTF(2, (
2428				"setsockopt IP_ADD_MEMBERSHIP failed: %m on socket %d, addr %s for %x / %x (%s)",
2429				iface->fd, stoa(&iface->sin),
2430				mreq.imr_multiaddr.s_addr,
2431				mreq.imr_interface.s_addr,
2432				stoa(maddr)));
2433			return ISC_FALSE;
2434		}
2435		DPRINTF(4, ("Added IPv4 multicast membership on socket %d, addr %s for %x / %x (%s)\n",
2436			    iface->fd, stoa(&iface->sin),
2437			    mreq.imr_multiaddr.s_addr,
2438			    mreq.imr_interface.s_addr, stoa(maddr)));
2439		break;
2440
2441	case AF_INET6:
2442# ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2443		/*
2444		 * Enable reception of multicast packets.
2445		 * If the address is link-local we can get the
2446		 * interface index from the scope id. Don't do this
2447		 * for other types of multicast addresses. For now let
2448		 * the kernel figure it out.
2449		 */
2450		ZERO(mreq6);
2451		mreq6.ipv6mr_multiaddr = SOCK_ADDR6(maddr);
2452		mreq6.ipv6mr_interface = iface->ifindex;
2453
2454		if (setsockopt(iface->fd, IPPROTO_IPV6,
2455			       IPV6_JOIN_GROUP, (char *)&mreq6,
2456			       sizeof(mreq6))) {
2457			DPRINTF(2, (
2458				"setsockopt IPV6_JOIN_GROUP failed: %m on socket %d, addr %s for interface %u (%s)",
2459				iface->fd, stoa(&iface->sin),
2460				mreq6.ipv6mr_interface, stoa(maddr)));
2461			return ISC_FALSE;
2462		}
2463		DPRINTF(4, ("Added IPv6 multicast group on socket %d, addr %s for interface %u (%s)\n",
2464			    iface->fd, stoa(&iface->sin),
2465			    mreq6.ipv6mr_interface, stoa(maddr)));
2466# else
2467		return ISC_FALSE;
2468# endif	/* INCLUDE_IPV6_MULTICAST_SUPPORT */
2469	}
2470	iface->flags |= INT_MCASTOPEN;
2471	iface->num_mcast++;
2472
2473	return ISC_TRUE;
2474}
2475#endif	/* MCAST */
2476
2477
2478/*
2479 * Remove a multicast address from a given socket
2480 * The socket is in the ep_list all we need to do is disable
2481 * multicasting. It is not this function's job to select the socket
2482 */
2483#ifdef MCAST
2484static isc_boolean_t
2485socket_multicast_disable(
2486	struct interface *	iface,
2487	sockaddr_u *		maddr
2488	)
2489{
2490# ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2491	struct ipv6_mreq mreq6;
2492# endif
2493	struct ip_mreq mreq;
2494
2495	ZERO(mreq);
2496
2497	if (find_addr_in_list(maddr) == NULL) {
2498		DPRINTF(4, ("socket_multicast_disable(%s): not found\n",
2499			    stoa(maddr)));
2500		return ISC_TRUE;
2501	}
2502
2503	switch (AF(maddr)) {
2504
2505	case AF_INET:
2506		mreq.imr_multiaddr = SOCK_ADDR4(maddr);
2507		mreq.imr_interface = SOCK_ADDR4(&iface->sin);
2508		if (setsockopt(iface->fd, IPPROTO_IP,
2509			       IP_DROP_MEMBERSHIP, (char *)&mreq,
2510			       sizeof(mreq))) {
2511
2512			msyslog(LOG_ERR,
2513				"setsockopt IP_DROP_MEMBERSHIP failed: %m on socket %d, addr %s for %x / %x (%s)",
2514				iface->fd, stoa(&iface->sin),
2515				SRCADR(maddr), SRCADR(&iface->sin),
2516				stoa(maddr));
2517			return ISC_FALSE;
2518		}
2519		break;
2520	case AF_INET6:
2521# ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2522		/*
2523		 * Disable reception of multicast packets
2524		 * If the address is link-local we can get the
2525		 * interface index from the scope id.  Don't do this
2526		 * for other types of multicast addresses. For now let
2527		 * the kernel figure it out.
2528		 */
2529		mreq6.ipv6mr_multiaddr = SOCK_ADDR6(maddr);
2530		mreq6.ipv6mr_interface = iface->ifindex;
2531
2532		if (setsockopt(iface->fd, IPPROTO_IPV6,
2533			       IPV6_LEAVE_GROUP, (char *)&mreq6,
2534			       sizeof(mreq6))) {
2535
2536			msyslog(LOG_ERR,
2537				"setsockopt IPV6_LEAVE_GROUP failure: %m on socket %d, addr %s for %d (%s)",
2538				iface->fd, stoa(&iface->sin),
2539				iface->ifindex, stoa(maddr));
2540			return ISC_FALSE;
2541		}
2542		break;
2543# else
2544		return ISC_FALSE;
2545# endif	/* INCLUDE_IPV6_MULTICAST_SUPPORT */
2546	}
2547
2548	iface->num_mcast--;
2549	if (!iface->num_mcast)
2550		iface->flags &= ~INT_MCASTOPEN;
2551
2552	return ISC_TRUE;
2553}
2554#endif	/* MCAST */
2555
2556/*
2557 * io_setbclient - open the broadcast client sockets
2558 */
2559void
2560io_setbclient(void)
2561{
2562#ifdef OPEN_BCAST_SOCKET
2563	struct interface *	interf;
2564	int			nif;
2565
2566	nif = 0;
2567	set_reuseaddr(1);
2568
2569	for (interf = ep_list;
2570	     interf != NULL;
2571	     interf = interf->elink) {
2572
2573		if (interf->flags & (INT_WILDCARD | INT_LOOPBACK))
2574			continue;
2575
2576		/* use only allowed addresses */
2577		if (interf->ignore_packets)
2578			continue;
2579
2580		/* Need a broadcast-capable interface */
2581		if (!(interf->flags & INT_BROADCAST))
2582			continue;
2583
2584		/* Only IPv4 addresses are valid for broadcast */
2585		REQUIRE(IS_IPV4(&interf->bcast));
2586
2587		/* Do we already have the broadcast address open? */
2588		if (interf->flags & INT_BCASTOPEN) {
2589			/*
2590			 * account for already open interfaces to avoid
2591			 * misleading warning below
2592			 */
2593			nif++;
2594			continue;
2595		}
2596
2597		/*
2598		 * Try to open the broadcast address
2599		 */
2600		interf->family = AF_INET;
2601		interf->bfd = open_socket(&interf->bcast, 1, 0, interf);
2602
2603		/*
2604		 * If we succeeded then we use it otherwise enable
2605		 * broadcast on the interface address
2606		 */
2607		if (interf->bfd != INVALID_SOCKET) {
2608			nif++;
2609			interf->flags |= INT_BCASTOPEN;
2610			msyslog(LOG_INFO,
2611				"Listen for broadcasts to %s on interface #%d %s",
2612				stoa(&interf->bcast), interf->ifnum, interf->name);
2613		} else switch (errno) {
2614			/* Silently ignore EADDRINUSE as we probably
2615			 * opened the socket already for an address in
2616			 * the same network */
2617		case EADDRINUSE:
2618			/* Some systems cannot bind a socket to a broadcast
2619			 * address, as that is not a valid host address. */
2620		case EADDRNOTAVAIL:
2621#		    ifdef SYS_WINNT	/*TODO: use for other systems, too? */
2622			/* avoid recurrence here -- if we already have a
2623			 * regular socket, it's quite useless to try this
2624			 * again.
2625			 */
2626			if (interf->fd != INVALID_SOCKET) {
2627				interf->flags |= INT_BCASTOPEN;
2628				nif++;
2629			}
2630#		    endif
2631			break;
2632
2633		default:
2634			msyslog(LOG_INFO,
2635				"failed to listen for broadcasts to %s on interface #%d %s",
2636				stoa(&interf->bcast), interf->ifnum, interf->name);
2637			break;
2638		}
2639	}
2640	set_reuseaddr(0);
2641	if (nif > 0) {
2642		broadcast_client_enabled = ISC_TRUE;
2643		DPRINTF(1, ("io_setbclient: listening to %d broadcast addresses\n", nif));
2644	}
2645	else if (!nif) {
2646		broadcast_client_enabled = ISC_FALSE;
2647		msyslog(LOG_ERR,
2648			"Unable to listen for broadcasts, no broadcast interfaces available");
2649	}
2650#else
2651	msyslog(LOG_ERR,
2652		"io_setbclient: Broadcast Client disabled by build");
2653#endif	/* OPEN_BCAST_SOCKET */
2654}
2655
2656/*
2657 * io_unsetbclient - close the broadcast client sockets
2658 */
2659void
2660io_unsetbclient(void)
2661{
2662	endpt *ep;
2663
2664	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2665		if (INT_WILDCARD & ep->flags)
2666			continue;
2667		if (!(INT_BCASTOPEN & ep->flags))
2668			continue;
2669
2670		if (ep->bfd != INVALID_SOCKET) {
2671			/* destroy broadcast listening socket */
2672			msyslog(LOG_INFO,
2673				"stop listening for broadcasts to %s on interface #%d %s",
2674				stoa(&ep->bcast), ep->ifnum, ep->name);
2675#		    ifdef HAVE_IO_COMPLETION_PORT
2676			io_completion_port_remove_socket(ep->bfd, ep);
2677#		    endif
2678			close_and_delete_fd_from_list(ep->bfd);
2679			ep->bfd = INVALID_SOCKET;
2680		}
2681		ep->flags &= ~INT_BCASTOPEN;
2682	}
2683	broadcast_client_enabled = ISC_FALSE;
2684}
2685
2686/*
2687 * io_multicast_add() - add multicast group address
2688 */
2689void
2690io_multicast_add(
2691	sockaddr_u *addr
2692	)
2693{
2694#ifdef MCAST
2695	endpt *	ep;
2696	endpt *	one_ep;
2697
2698	/*
2699	 * Check to see if this is a multicast address
2700	 */
2701	if (!addr_ismulticast(addr))
2702		return;
2703
2704	/* If we already have it we can just return */
2705	if (NULL != find_flagged_addr_in_list(addr, INT_MCASTOPEN)) {
2706		msyslog(LOG_INFO,
2707			"Duplicate request found for multicast address %s",
2708			stoa(addr));
2709		return;
2710	}
2711
2712# ifndef MULTICAST_NONEWSOCKET
2713	ep = new_interface(NULL);
2714
2715	/*
2716	 * Open a new socket for the multicast address
2717	 */
2718	ep->sin = *addr;
2719	SET_PORT(&ep->sin, NTP_PORT);
2720	ep->family = AF(&ep->sin);
2721	AF(&ep->mask) = ep->family;
2722	SET_ONESMASK(&ep->mask);
2723
2724	set_reuseaddr(1);
2725	ep->bfd = INVALID_SOCKET;
2726	ep->fd = open_socket(&ep->sin, 0, 0, ep);
2727	if (ep->fd != INVALID_SOCKET) {
2728		ep->ignore_packets = ISC_FALSE;
2729		ep->flags |= INT_MCASTIF;
2730
2731		strlcpy(ep->name, "multicast", sizeof(ep->name));
2732		DPRINT_INTERFACE(2, (ep, "multicast add ", "\n"));
2733		add_interface(ep);
2734		log_listen_address(ep);
2735	} else {
2736		/* bind failed, re-use wildcard interface */
2737		delete_interface(ep);
2738
2739		if (IS_IPV4(addr))
2740			ep = wildipv4;
2741		else if (IS_IPV6(addr))
2742			ep = wildipv6;
2743		else
2744			ep = NULL;
2745
2746		if (ep != NULL) {
2747			/* HACK ! -- stuff in an address */
2748			/* because we don't bind addr? DH */
2749			ep->bcast = *addr;
2750			msyslog(LOG_ERR,
2751				"multicast address %s using wildcard interface #%d %s",
2752				stoa(addr), ep->ifnum, ep->name);
2753		} else {
2754			msyslog(LOG_ERR,
2755				"No multicast socket available to use for address %s",
2756				stoa(addr));
2757			return;
2758		}
2759	}
2760	{	/* in place of the { following for in #else clause */
2761		one_ep = ep;
2762# else	/* MULTICAST_NONEWSOCKET follows */
2763	/*
2764	 * For the case where we can't use a separate socket (Windows)
2765	 * join each applicable endpoint socket to the group address.
2766	 */
2767	if (IS_IPV4(addr))
2768		one_ep = wildipv4;
2769	else
2770		one_ep = wildipv6;
2771	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2772		if (ep->ignore_packets || AF(&ep->sin) != AF(addr) ||
2773		    !(INT_MULTICAST & ep->flags) ||
2774		    (INT_LOOPBACK | INT_WILDCARD) & ep->flags)
2775			continue;
2776		one_ep = ep;
2777# endif	/* MULTICAST_NONEWSOCKET */
2778		if (socket_multicast_enable(ep, addr))
2779			msyslog(LOG_INFO,
2780				"Joined %s socket to multicast group %s",
2781				stoa(&ep->sin),
2782				stoa(addr));
2783	}
2784
2785	add_addr_to_list(addr, one_ep);
2786#else	/* !MCAST  follows*/
2787	msyslog(LOG_ERR,
2788		"Can not add multicast address %s: no multicast support",
2789		stoa(addr));
2790#endif
2791	return;
2792}
2793
2794
2795/*
2796 * io_multicast_del() - delete multicast group address
2797 */
2798void
2799io_multicast_del(
2800	sockaddr_u *	addr
2801	)
2802{
2803#ifdef MCAST
2804	endpt *iface;
2805
2806	/*
2807	 * Check to see if this is a multicast address
2808	 */
2809	if (!addr_ismulticast(addr)) {
2810		msyslog(LOG_ERR, "invalid multicast address %s",
2811			stoa(addr));
2812		return;
2813	}
2814
2815	/*
2816	 * Disable reception of multicast packets
2817	 */
2818	while ((iface = find_flagged_addr_in_list(addr, INT_MCASTOPEN))
2819	       != NULL)
2820		socket_multicast_disable(iface, addr);
2821
2822	delete_addr_from_list(addr);
2823
2824#else /* not MCAST */
2825	msyslog(LOG_ERR,
2826		"Can not delete multicast address %s: no multicast support",
2827		stoa(addr));
2828#endif /* not MCAST */
2829}
2830
2831
2832/*
2833 * open_socket - open a socket, returning the file descriptor
2834 */
2835
2836static SOCKET
2837open_socket(
2838	sockaddr_u *	addr,
2839	int		bcast,
2840	int		turn_off_reuse,
2841	endpt *		interf
2842	)
2843{
2844	SOCKET	fd;
2845	int	errval;
2846	/*
2847	 * int is OK for REUSEADR per
2848	 * http://www.kohala.com/start/mcast.api.txt
2849	 */
2850	int	on = 1;
2851	int	off = 0;
2852
2853	if (IS_IPV6(addr) && !ipv6_works)
2854		return INVALID_SOCKET;
2855
2856	/* create a datagram (UDP) socket */
2857	fd = socket(AF(addr), SOCK_DGRAM, 0);
2858	if (INVALID_SOCKET == fd) {
2859		errval = socket_errno();
2860		msyslog(LOG_ERR,
2861			"socket(AF_INET%s, SOCK_DGRAM, 0) failed on address %s: %m",
2862			IS_IPV6(addr) ? "6" : "", stoa(addr));
2863
2864		if (errval == EPROTONOSUPPORT ||
2865		    errval == EAFNOSUPPORT ||
2866		    errval == EPFNOSUPPORT)
2867			return (INVALID_SOCKET);
2868
2869		errno = errval;
2870		msyslog(LOG_ERR,
2871			"unexpected socket() error %m code %d (not EPROTONOSUPPORT nor EAFNOSUPPORT nor EPFNOSUPPORT) - exiting",
2872			errno);
2873		exit(1);
2874	}
2875
2876#ifdef SYS_WINNT
2877	connection_reset_fix(fd, addr);
2878#endif
2879	/*
2880	 * Fixup the file descriptor for some systems
2881	 * See bug #530 for details of the issue.
2882	 */
2883	fd = move_fd(fd);
2884
2885	/*
2886	 * set SO_REUSEADDR since we will be binding the same port
2887	 * number on each interface according to turn_off_reuse.
2888	 * This is undesirable on Windows versions starting with
2889	 * Windows XP (numeric version 5.1).
2890	 */
2891#ifdef SYS_WINNT
2892	if (isc_win32os_versioncheck(5, 1, 0, 0) < 0)  /* before 5.1 */
2893#endif
2894		if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
2895			       (char *)((turn_off_reuse)
2896					    ? &off
2897					    : &on),
2898			       sizeof(on))) {
2899
2900			msyslog(LOG_ERR,
2901				"setsockopt SO_REUSEADDR %s fails for address %s: %m",
2902				(turn_off_reuse)
2903				    ? "off"
2904				    : "on",
2905				stoa(addr));
2906			closesocket(fd);
2907			return INVALID_SOCKET;
2908		}
2909#ifdef SO_EXCLUSIVEADDRUSE
2910	/*
2911	 * setting SO_EXCLUSIVEADDRUSE on the wildcard we open
2912	 * first will cause more specific binds to fail.
2913	 */
2914	if (!(interf->flags & INT_WILDCARD))
2915		set_excladdruse(fd);
2916#endif
2917
2918	/*
2919	 * IPv4 specific options go here
2920	 */
2921	if (IS_IPV4(addr)) {
2922#if defined(IPPROTO_IP) && defined(IP_TOS)
2923		if (setsockopt(fd, IPPROTO_IP, IP_TOS, (char*)&qos,
2924			       sizeof(qos)))
2925			msyslog(LOG_ERR,
2926				"setsockopt IP_TOS (%02x) fails on address %s: %m",
2927				qos, stoa(addr));
2928#endif /* IPPROTO_IP && IP_TOS */
2929		if (bcast)
2930			socket_broadcast_enable(interf, fd, addr);
2931	}
2932
2933	/*
2934	 * IPv6 specific options go here
2935	 */
2936	if (IS_IPV6(addr)) {
2937#if defined(IPPROTO_IPV6) && defined(IPV6_TCLASS)
2938		if (setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, (char*)&qos,
2939			       sizeof(qos)))
2940			msyslog(LOG_ERR,
2941				"setsockopt IPV6_TCLASS (%02x) fails on address %s: %m",
2942				qos, stoa(addr));
2943#endif /* IPPROTO_IPV6 && IPV6_TCLASS */
2944#ifdef IPV6_V6ONLY
2945		if (isc_net_probe_ipv6only() == ISC_R_SUCCESS
2946		    && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
2947		    (char*)&on, sizeof(on)))
2948			msyslog(LOG_ERR,
2949				"setsockopt IPV6_V6ONLY on fails on address %s: %m",
2950				stoa(addr));
2951#endif
2952#ifdef IPV6_BINDV6ONLY
2953		if (setsockopt(fd, IPPROTO_IPV6, IPV6_BINDV6ONLY,
2954		    (char*)&on, sizeof(on)))
2955			msyslog(LOG_ERR,
2956				"setsockopt IPV6_BINDV6ONLY on fails on address %s: %m",
2957				stoa(addr));
2958#endif
2959	}
2960
2961#ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
2962	/*
2963	 * some OSes don't allow binding to more specific
2964	 * addresses if a wildcard address already bound
2965	 * to the port and SO_REUSEADDR is not set
2966	 */
2967	if (!is_wildcard_addr(addr))
2968		set_wildcard_reuse(AF(addr), 1);
2969#endif
2970
2971	/*
2972	 * bind the local address.
2973	 */
2974	errval = bind(fd, &addr->sa, SOCKLEN(addr));
2975
2976#ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
2977	if (!is_wildcard_addr(addr))
2978		set_wildcard_reuse(AF(addr), 0);
2979#endif
2980
2981	if (errval < 0) {
2982		/*
2983		 * Don't log this under all conditions
2984		 */
2985		if (turn_off_reuse == 0
2986#ifdef DEBUG
2987		    || debug > 1
2988#endif
2989		    ) {
2990			msyslog(LOG_ERR,
2991				"bind(%d) AF_INET%s %s#%d%s flags 0x%x failed: %m",
2992				fd, IS_IPV6(addr) ? "6" : "",
2993				stoa(addr), SRCPORT(addr),
2994				IS_MCAST(addr) ? " (multicast)" : "",
2995				interf->flags);
2996		}
2997
2998		closesocket(fd);
2999
3000		return INVALID_SOCKET;
3001	}
3002
3003#ifdef HAVE_TIMESTAMP
3004	{
3005		if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMP,
3006			       (char*)&on, sizeof(on)))
3007			msyslog(LOG_DEBUG,
3008				"setsockopt SO_TIMESTAMP on fails on address %s: %m",
3009				stoa(addr));
3010		else
3011			DPRINTF(4, ("setsockopt SO_TIMESTAMP enabled on fd %d address %s\n",
3012				    fd, stoa(addr)));
3013	}
3014#endif
3015#ifdef HAVE_TIMESTAMPNS
3016	{
3017		if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS,
3018			       (char*)&on, sizeof(on)))
3019			msyslog(LOG_DEBUG,
3020				"setsockopt SO_TIMESTAMPNS on fails on address %s: %m",
3021				stoa(addr));
3022		else
3023			DPRINTF(4, ("setsockopt SO_TIMESTAMPNS enabled on fd %d address %s\n",
3024				    fd, stoa(addr)));
3025	}
3026#endif
3027#ifdef HAVE_BINTIME
3028	{
3029		if (setsockopt(fd, SOL_SOCKET, SO_BINTIME,
3030			       (char*)&on, sizeof(on)))
3031			msyslog(LOG_DEBUG,
3032				"setsockopt SO_BINTIME on fails on address %s: %m",
3033				stoa(addr));
3034		else
3035			DPRINTF(4, ("setsockopt SO_BINTIME enabled on fd %d address %s\n",
3036				    fd, stoa(addr)));
3037	}
3038#endif
3039
3040	DPRINTF(4, ("bind(%d) AF_INET%s, addr %s%%%d#%d, flags 0x%x\n",
3041		   fd, IS_IPV6(addr) ? "6" : "", stoa(addr),
3042		   SCOPE(addr), SRCPORT(addr), interf->flags));
3043
3044	make_socket_nonblocking(fd);
3045
3046#ifdef HAVE_SIGNALED_IO
3047	init_socket_sig(fd);
3048#endif /* not HAVE_SIGNALED_IO */
3049
3050	add_fd_to_list(fd, FD_TYPE_SOCKET);
3051
3052#if !defined(SYS_WINNT) && !defined(VMS)
3053	DPRINTF(4, ("flags for fd %d: 0x%x\n", fd,
3054		    fcntl(fd, F_GETFL, 0)));
3055#endif /* SYS_WINNT || VMS */
3056
3057#if defined(HAVE_IO_COMPLETION_PORT)
3058/*
3059 * Add the socket to the completion port
3060 */
3061	if (!io_completion_port_add_socket(fd, interf, bcast)) {
3062		msyslog(LOG_ERR, "unable to set up io completion port - EXITING");
3063		exit(1);
3064	}
3065#endif
3066	return fd;
3067}
3068
3069
3070
3071/* XXX ELIMINATE sendpkt similar in ntpq.c, ntpdc.c, ntp_io.c, ntptrace.c */
3072/*
3073 * sendpkt - send a packet to the specified destination. Maintain a
3074 * send error cache so that only the first consecutive error for a
3075 * destination is logged.
3076 */
3077void
3078sendpkt(
3079	sockaddr_u *		dest,
3080	struct interface *	ep,
3081	int			ttl,
3082	struct pkt *		pkt,
3083	int			len
3084	)
3085{
3086	endpt *	src;
3087	int	ismcast;
3088	int	cc;
3089	int	rc;
3090	u_char	cttl;
3091
3092	ismcast = IS_MCAST(dest);
3093	if (!ismcast)
3094		src = ep;
3095	else
3096		src = (IS_IPV4(dest))
3097			  ? mc4_list
3098			  : mc6_list;
3099
3100	if (NULL == src) {
3101		/*
3102		 * unbound peer - drop request and wait for better
3103		 * network conditions
3104		 */
3105		DPRINTF(2, ("%ssendpkt(dst=%s, ttl=%d, len=%d): no interface - IGNORED\n",
3106			    ismcast ? "\tMCAST\t***** " : "",
3107			    stoa(dest), ttl, len));
3108		return;
3109	}
3110
3111	do {
3112		DPRINTF(2, ("%ssendpkt(%d, dst=%s, src=%s, ttl=%d, len=%d)\n",
3113			    ismcast ? "\tMCAST\t***** " : "", src->fd,
3114			    stoa(dest), stoa(&src->sin), ttl, len));
3115#ifdef MCAST
3116		/*
3117		 * for the moment we use the bcast option to set multicast ttl
3118		 */
3119		if (ismcast && ttl > 0 && ttl != src->last_ttl) {
3120			/*
3121			 * set the multicast ttl for outgoing packets
3122			 */
3123			switch (AF(&src->sin)) {
3124
3125			case AF_INET :
3126				cttl = (u_char)ttl;
3127				rc = setsockopt(src->fd, IPPROTO_IP,
3128						IP_MULTICAST_TTL,
3129						(void *)&cttl,
3130						sizeof(cttl));
3131				break;
3132
3133# ifdef INCLUDE_IPV6_SUPPORT
3134			case AF_INET6 :
3135				rc = setsockopt(src->fd, IPPROTO_IPV6,
3136						 IPV6_MULTICAST_HOPS,
3137						 (void *)&ttl,
3138						 sizeof(ttl));
3139				break;
3140# endif	/* INCLUDE_IPV6_SUPPORT */
3141
3142			default:
3143				rc = 0;
3144			}
3145
3146			if (!rc)
3147				src->last_ttl = ttl;
3148			else
3149				msyslog(LOG_ERR,
3150					"setsockopt IP_MULTICAST_TTL/IPV6_MULTICAST_HOPS fails on address %s: %m",
3151					stoa(&src->sin));
3152		}
3153#endif	/* MCAST */
3154
3155#ifdef SIM
3156		cc = simulate_server(dest, src, pkt);
3157#elif defined(HAVE_IO_COMPLETION_PORT)
3158		cc = io_completion_port_sendto(src, src->fd, pkt,
3159			(size_t)len, (sockaddr_u *)&dest->sa);
3160#else
3161		cc = sendto(src->fd, (char *)pkt, (u_int)len, 0,
3162			    &dest->sa, SOCKLEN(dest));
3163#endif
3164		if (cc == -1) {
3165			src->notsent++;
3166			packets_notsent++;
3167		} else	{
3168			src->sent++;
3169			packets_sent++;
3170		}
3171		if (ismcast)
3172			src = src->mclink;
3173	} while (ismcast && src != NULL);
3174}
3175
3176
3177#if !defined(HAVE_IO_COMPLETION_PORT)
3178#if !defined(HAVE_SIGNALED_IO)
3179/*
3180 * fdbits - generate ascii representation of fd_set (FAU debug support)
3181 * HFDF format - highest fd first.
3182 */
3183static char *
3184fdbits(
3185	int		count,
3186	const fd_set*	set
3187	)
3188{
3189	static char buffer[256];
3190	char * buf = buffer;
3191
3192	count = min(count,  255);
3193
3194	while (count >= 0) {
3195		*buf++ = FD_ISSET(count, set) ? '#' : '-';
3196		count--;
3197	}
3198	*buf = '\0';
3199
3200	return buffer;
3201}
3202#endif
3203
3204#ifdef REFCLOCK
3205/*
3206 * Routine to read the refclock packets for a specific interface
3207 * Return the number of bytes read. That way we know if we should
3208 * read it again or go on to the next one if no bytes returned
3209 */
3210static inline int
3211read_refclock_packet(
3212	SOCKET			fd,
3213	struct refclockio *	rp,
3214	l_fp			ts
3215	)
3216{
3217	u_int			read_count;
3218	int			buflen;
3219	int			saved_errno;
3220	int			consumed;
3221	struct recvbuf *	rb;
3222
3223	rb = get_free_recv_buffer();
3224
3225	if (NULL == rb) {
3226		/*
3227		 * No buffer space available - just drop the packet
3228		 */
3229		char buf[RX_BUFF_SIZE];
3230
3231		buflen = read(fd, buf, sizeof buf);
3232		packets_dropped++;
3233		return (buflen);
3234	}
3235
3236	/* TALOS-CAN-0064: avoid signed/unsigned clashes that can lead
3237	 * to buffer overrun and memory corruption
3238	 */
3239	if (rp->datalen <= 0 || (size_t)rp->datalen > sizeof(rb->recv_space))
3240		read_count = sizeof(rb->recv_space);
3241	else
3242		read_count = (u_int)rp->datalen;
3243	do {
3244		buflen = read(fd, (char *)&rb->recv_space, read_count);
3245	} while (buflen < 0 && EINTR == errno);
3246
3247	if (buflen <= 0) {
3248		saved_errno = errno;
3249		freerecvbuf(rb);
3250		errno = saved_errno;
3251		return buflen;
3252	}
3253
3254	/*
3255	 * Got one. Mark how and when it got here,
3256	 * put it on the full list and do bookkeeping.
3257	 */
3258	rb->recv_length = buflen;
3259	rb->recv_peer = rp->srcclock;
3260	rb->dstadr = 0;
3261	rb->fd = fd;
3262	rb->recv_time = ts;
3263	rb->receiver = rp->clock_recv;
3264
3265	consumed = indicate_refclock_packet(rp, rb);
3266	if (!consumed) {
3267		rp->recvcount++;
3268		packets_received++;
3269	}
3270
3271	return buflen;
3272}
3273#endif	/* REFCLOCK */
3274
3275
3276#ifdef HAVE_PACKET_TIMESTAMP
3277/*
3278 * extract timestamps from control message buffer
3279 */
3280static l_fp
3281fetch_timestamp(
3282	struct recvbuf *	rb,
3283	struct msghdr *		msghdr,
3284	l_fp			ts
3285	)
3286{
3287	struct cmsghdr *	cmsghdr;
3288	unsigned long		ticks;
3289	double			fuzz;
3290	l_fp			lfpfuzz;
3291	l_fp			nts;
3292#ifdef DEBUG_TIMING
3293	l_fp			dts;
3294#endif
3295
3296	cmsghdr = CMSG_FIRSTHDR(msghdr);
3297	while (cmsghdr != NULL) {
3298		switch (cmsghdr->cmsg_type)
3299		{
3300#ifdef HAVE_BINTIME
3301		case SCM_BINTIME:
3302#endif  /* HAVE_BINTIME */
3303#ifdef HAVE_TIMESTAMPNS
3304		case SCM_TIMESTAMPNS:
3305#endif	/* HAVE_TIMESTAMPNS */
3306#ifdef HAVE_TIMESTAMP
3307		case SCM_TIMESTAMP:
3308#endif	/* HAVE_TIMESTAMP */
3309#if defined(HAVE_BINTIME) || defined (HAVE_TIMESTAMPNS) || defined(HAVE_TIMESTAMP)
3310			switch (cmsghdr->cmsg_type)
3311			{
3312#ifdef HAVE_BINTIME
3313			case SCM_BINTIME:
3314				{
3315					struct bintime	pbt;
3316					memcpy(&pbt, CMSG_DATA(cmsghdr), sizeof(pbt));
3317					/*
3318					 * bintime documentation is at http://phk.freebsd.dk/pubs/timecounter.pdf
3319					 */
3320					nts.l_i = pbt.sec + JAN_1970;
3321					nts.l_uf = (u_int32)(pbt.frac >> 32);
3322					if (sys_tick > measured_tick &&
3323					    sys_tick > 1e-9) {
3324						ticks = (unsigned long)(nts.l_uf / (unsigned long)(sys_tick * FRAC));
3325						nts.l_uf = (unsigned long)(ticks * (unsigned long)(sys_tick * FRAC));
3326					}
3327					DPRINTF(4, ("fetch_timestamp: system bintime network time stamp: %ld.%09lu\n",
3328						    pbt.sec, (unsigned long)((nts.l_uf / FRAC) * 1e9)));
3329				}
3330				break;
3331#endif  /* HAVE_BINTIME */
3332#ifdef HAVE_TIMESTAMPNS
3333			case SCM_TIMESTAMPNS:
3334				{
3335					struct timespec	pts;
3336					memcpy(&pts, CMSG_DATA(cmsghdr), sizeof(pts));
3337					if (sys_tick > measured_tick &&
3338					    sys_tick > 1e-9) {
3339						ticks = (unsigned long)((pts.tv_nsec * 1e-9) /
3340									sys_tick);
3341						pts.tv_nsec = (long)(ticks * 1e9 *
3342								     sys_tick);
3343					}
3344					DPRINTF(4, ("fetch_timestamp: system nsec network time stamp: %ld.%09ld\n",
3345						    pts.tv_sec, pts.tv_nsec));
3346					nts = tspec_stamp_to_lfp(pts);
3347				}
3348				break;
3349#endif	/* HAVE_TIMESTAMPNS */
3350#ifdef HAVE_TIMESTAMP
3351			case SCM_TIMESTAMP:
3352				{
3353					struct timeval	ptv;
3354					memcpy(&ptv, CMSG_DATA(cmsghdr), sizeof(ptv));
3355					if (sys_tick > measured_tick &&
3356					    sys_tick > 1e-6) {
3357						ticks = (unsigned long)((ptv.tv_usec * 1e-6) /
3358									sys_tick);
3359						ptv.tv_usec = (long)(ticks * 1e6 *
3360								    sys_tick);
3361					}
3362					DPRINTF(4, ("fetch_timestamp: system usec network time stamp: %jd.%06ld\n",
3363						    (intmax_t)ptv.tv_sec, (long)ptv.tv_usec));
3364					nts = tval_stamp_to_lfp(ptv);
3365				}
3366				break;
3367#endif  /* HAVE_TIMESTAMP */
3368			}
3369			fuzz = ntp_random() * 2. / FRAC * sys_fuzz;
3370			DTOLFP(fuzz, &lfpfuzz);
3371			L_ADD(&nts, &lfpfuzz);
3372#ifdef DEBUG_TIMING
3373			dts = ts;
3374			L_SUB(&dts, &nts);
3375			collect_timing(rb, "input processing delay", 1,
3376				       &dts);
3377			DPRINTF(4, ("fetch_timestamp: timestamp delta: %s (incl. fuzz)\n",
3378				    lfptoa(&dts, 9)));
3379#endif	/* DEBUG_TIMING */
3380			ts = nts;  /* network time stamp */
3381			break;
3382#endif	/* HAVE_BINTIME || HAVE_TIMESTAMPNS || HAVE_TIMESTAMP */
3383
3384		default:
3385			DPRINTF(4, ("fetch_timestamp: skipping control message 0x%x\n",
3386				    cmsghdr->cmsg_type));
3387		}
3388		cmsghdr = CMSG_NXTHDR(msghdr, cmsghdr);
3389	}
3390	return ts;
3391}
3392#endif	/* HAVE_PACKET_TIMESTAMP */
3393
3394
3395/*
3396 * Routine to read the network NTP packets for a specific interface
3397 * Return the number of bytes read. That way we know if we should
3398 * read it again or go on to the next one if no bytes returned
3399 */
3400static inline int
3401read_network_packet(
3402	SOCKET			fd,
3403	struct interface *	itf,
3404	l_fp			ts
3405	)
3406{
3407	GETSOCKNAME_SOCKLEN_TYPE fromlen;
3408	int buflen;
3409	register struct recvbuf *rb;
3410#ifdef HAVE_PACKET_TIMESTAMP
3411	struct msghdr msghdr;
3412	struct iovec iovec;
3413	char control[CMSG_BUFSIZE];
3414#endif
3415
3416	/*
3417	 * Get a buffer and read the frame.  If we
3418	 * haven't got a buffer, or this is received
3419	 * on a disallowed socket, just dump the
3420	 * packet.
3421	 */
3422
3423	rb = get_free_recv_buffer();
3424	if (NULL == rb || itf->ignore_packets) {
3425		char buf[RX_BUFF_SIZE];
3426		sockaddr_u from;
3427
3428		if (rb != NULL)
3429			freerecvbuf(rb);
3430
3431		fromlen = sizeof(from);
3432		buflen = recvfrom(fd, buf, sizeof(buf), 0,
3433				  &from.sa, &fromlen);
3434		DPRINTF(4, ("%s on (%lu) fd=%d from %s\n",
3435			(itf->ignore_packets)
3436			    ? "ignore"
3437			    : "drop",
3438			free_recvbuffs(), fd, stoa(&from)));
3439		if (itf->ignore_packets)
3440			packets_ignored++;
3441		else
3442			packets_dropped++;
3443		return (buflen);
3444	}
3445
3446	fromlen = sizeof(rb->recv_srcadr);
3447
3448#ifndef HAVE_PACKET_TIMESTAMP
3449	rb->recv_length = recvfrom(fd, (char *)&rb->recv_space,
3450				   sizeof(rb->recv_space), 0,
3451				   &rb->recv_srcadr.sa, &fromlen);
3452#else
3453	iovec.iov_base        = &rb->recv_space;
3454	iovec.iov_len         = sizeof(rb->recv_space);
3455	msghdr.msg_name       = &rb->recv_srcadr;
3456	msghdr.msg_namelen    = fromlen;
3457	msghdr.msg_iov        = &iovec;
3458	msghdr.msg_iovlen     = 1;
3459	msghdr.msg_control    = (void *)&control;
3460	msghdr.msg_controllen = sizeof(control);
3461	msghdr.msg_flags      = 0;
3462	rb->recv_length       = recvmsg(fd, &msghdr, 0);
3463#endif
3464
3465	buflen = rb->recv_length;
3466
3467	if (buflen == 0 || (buflen == -1 &&
3468	    (EWOULDBLOCK == errno
3469#ifdef EAGAIN
3470	     || EAGAIN == errno
3471#endif
3472	     ))) {
3473		freerecvbuf(rb);
3474		return (buflen);
3475	} else if (buflen < 0) {
3476		msyslog(LOG_ERR, "recvfrom(%s) fd=%d: %m",
3477			stoa(&rb->recv_srcadr), fd);
3478		DPRINTF(5, ("read_network_packet: fd=%d dropped (bad recvfrom)\n",
3479			    fd));
3480		freerecvbuf(rb);
3481		return (buflen);
3482	}
3483
3484	DPRINTF(3, ("read_network_packet: fd=%d length %d from %s\n",
3485		    fd, buflen, stoa(&rb->recv_srcadr)));
3486
3487#ifdef ENABLE_BUG3020_FIX
3488	if (ISREFCLOCKADR(&rb->recv_srcadr)) {
3489		msyslog(LOG_ERR, "recvfrom(%s) fd=%d: refclock srcadr on a network interface!",
3490			stoa(&rb->recv_srcadr), fd);
3491		DPRINTF(1, ("read_network_packet: fd=%d dropped (refclock srcadr))\n",
3492			    fd));
3493		packets_dropped++;
3494		freerecvbuf(rb);
3495		return (buflen);
3496	}
3497#endif
3498
3499	/*
3500	** Bug 2672: Some OSes (MacOSX and Linux) don't block spoofed ::1
3501	*/
3502
3503	if (AF_INET6 == itf->family) {
3504		DPRINTF(2, ("Got an IPv6 packet, from <%s> (%d) to <%s> (%d)\n",
3505			stoa(&rb->recv_srcadr),
3506			IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&rb->recv_srcadr)),
3507			stoa(&itf->sin),
3508			!IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&itf->sin))
3509			));
3510
3511		if (   IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&rb->recv_srcadr))
3512		    && !IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&itf->sin))
3513		   ) {
3514			packets_dropped++;
3515			DPRINTF(2, ("DROPPING that packet\n"));
3516			freerecvbuf(rb);
3517			return buflen;
3518		}
3519		DPRINTF(2, ("processing that packet\n"));
3520	}
3521
3522	/*
3523	 * Got one.  Mark how and when it got here,
3524	 * put it on the full list and do bookkeeping.
3525	 */
3526	rb->dstadr = itf;
3527	rb->fd = fd;
3528#ifdef HAVE_PACKET_TIMESTAMP
3529	/* pick up a network time stamp if possible */
3530	ts = fetch_timestamp(rb, &msghdr, ts);
3531#endif
3532	rb->recv_time = ts;
3533	rb->receiver = receive;
3534
3535	add_full_recv_buffer(rb);
3536
3537	itf->received++;
3538	packets_received++;
3539	return (buflen);
3540}
3541
3542/*
3543 * attempt to handle io (select()/signaled IO)
3544 */
3545void
3546io_handler(void)
3547{
3548#  ifndef HAVE_SIGNALED_IO
3549	fd_set rdfdes;
3550	int nfound;
3551
3552	/*
3553	 * Use select() on all on all input fd's for unlimited
3554	 * time.  select() will terminate on SIGALARM or on the
3555	 * reception of input.	Using select() means we can't do
3556	 * robust signal handling and we get a potential race
3557	 * between checking for alarms and doing the select().
3558	 * Mostly harmless, I think.
3559	 */
3560	/*
3561	 * On VMS, I suspect that select() can't be interrupted
3562	 * by a "signal" either, so I take the easy way out and
3563	 * have select() time out after one second.
3564	 * System clock updates really aren't time-critical,
3565	 * and - lacking a hardware reference clock - I have
3566	 * yet to learn about anything else that is.
3567	 */
3568	++handler_calls;
3569	rdfdes = activefds;
3570#   if !defined(VMS) && !defined(SYS_VXWORKS)
3571	nfound = select(maxactivefd + 1, &rdfdes, NULL,
3572			NULL, NULL);
3573#   else	/* VMS, VxWorks */
3574	/* make select() wake up after one second */
3575	{
3576		struct timeval t1;
3577		t1.tv_sec  = 1;
3578		t1.tv_usec = 0;
3579		nfound = select(maxactivefd + 1,
3580				&rdfdes, NULL, NULL,
3581				&t1);
3582	}
3583#   endif	/* VMS, VxWorks */
3584	if (nfound < 0 && sanitize_fdset(errno)) {
3585		struct timeval t1;
3586		t1.tv_sec  = 0;
3587		t1.tv_usec = 0;
3588		rdfdes = activefds;
3589		nfound = select(maxactivefd + 1,
3590				&rdfdes, NULL, NULL,
3591				&t1);
3592	}
3593
3594	if (nfound > 0) {
3595		l_fp ts;
3596
3597		get_systime(&ts);
3598
3599		input_handler_scan(&ts, &rdfdes);
3600	} else if (nfound == -1 && errno != EINTR) {
3601		msyslog(LOG_ERR, "select() error: %m");
3602	}
3603#   ifdef DEBUG
3604	else if (debug > 4) {
3605		msyslog(LOG_DEBUG, "select(): nfound=%d, error: %m", nfound);
3606	} else {
3607		DPRINTF(3, ("select() returned %d: %m\n", nfound));
3608	}
3609#   endif /* DEBUG */
3610#  else /* HAVE_SIGNALED_IO */
3611	wait_for_signal();
3612#  endif /* HAVE_SIGNALED_IO */
3613}
3614
3615#ifdef HAVE_SIGNALED_IO
3616/*
3617 * input_handler - receive packets asynchronously
3618 *
3619 * ALWAYS IN SIGNAL HANDLER CONTEXT -- only async-safe functions allowed!
3620 */
3621static RETSIGTYPE
3622input_handler(
3623	l_fp *	cts
3624	)
3625{
3626	int		n;
3627	struct timeval	tvzero;
3628	fd_set		fds;
3629
3630	++handler_calls;
3631
3632	/*
3633	 * Do a poll to see who has data
3634	 */
3635
3636	fds = activefds;
3637	tvzero.tv_sec = tvzero.tv_usec = 0;
3638
3639	n = select(maxactivefd + 1, &fds, NULL, NULL, &tvzero);
3640	if (n < 0 && sanitize_fdset(errno)) {
3641		fds = activefds;
3642		tvzero.tv_sec = tvzero.tv_usec = 0;
3643		n = select(maxactivefd + 1, &fds, NULL, NULL, &tvzero);
3644	}
3645	if (n > 0)
3646		input_handler_scan(cts, &fds);
3647}
3648#endif /* HAVE_SIGNALED_IO */
3649
3650
3651/*
3652 * Try to sanitize the global FD set
3653 *
3654 * SIGNAL HANDLER CONTEXT if HAVE_SIGNALED_IO, ordinary userspace otherwise
3655 */
3656static int/*BOOL*/
3657sanitize_fdset(
3658	int	errc
3659	)
3660{
3661	int j, b, maxscan;
3662
3663#  ifndef HAVE_SIGNALED_IO
3664	/*
3665	 * extended FAU debugging output
3666	 */
3667	if (errc != EINTR) {
3668		msyslog(LOG_ERR,
3669			"select(%d, %s, 0L, 0L, &0.0) error: %m",
3670			maxactivefd + 1,
3671			fdbits(maxactivefd, &activefds));
3672	}
3673#   endif
3674
3675	if (errc != EBADF)
3676		return FALSE;
3677
3678	/* if we have oviously bad FDs, try to sanitize the FD set. */
3679	for (j = 0, maxscan = 0; j <= maxactivefd; j++) {
3680		if (FD_ISSET(j, &activefds)) {
3681			if (-1 != read(j, &b, 0)) {
3682				maxscan = j;
3683				continue;
3684			}
3685#		    ifndef HAVE_SIGNALED_IO
3686			msyslog(LOG_ERR,
3687				"Removing bad file descriptor %d from select set",
3688				j);
3689#		    endif
3690			FD_CLR(j, &activefds);
3691		}
3692	}
3693	if (maxactivefd != maxscan)
3694		maxactivefd = maxscan;
3695	return TRUE;
3696}
3697
3698/*
3699 * scan the known FDs (clocks, servers, ...) for presence in a 'fd_set'.
3700 *
3701 * SIGNAL HANDLER CONTEXT if HAVE_SIGNALED_IO, ordinary userspace otherwise
3702 */
3703static void
3704input_handler_scan(
3705	const l_fp *	cts,
3706	const fd_set *	pfds
3707	)
3708{
3709	int		buflen;
3710	u_int		idx;
3711	int		doing;
3712	SOCKET		fd;
3713	blocking_child *c;
3714	l_fp		ts;	/* Timestamp at BOselect() gob */
3715
3716#if defined(DEBUG_TIMING)
3717	l_fp		ts_e;	/* Timestamp at EOselect() gob */
3718#endif
3719	endpt *		ep;
3720#ifdef REFCLOCK
3721	struct refclockio *rp;
3722	int		saved_errno;
3723	const char *	clk;
3724#endif
3725#ifdef HAS_ROUTING_SOCKET
3726	struct asyncio_reader *	asyncio_reader;
3727	struct asyncio_reader *	next_asyncio_reader;
3728#endif
3729
3730	++handler_pkts;
3731	ts = *cts;
3732
3733#ifdef REFCLOCK
3734	/*
3735	 * Check out the reference clocks first, if any
3736	 */
3737
3738	for (rp = refio; rp != NULL; rp = rp->next) {
3739		fd = rp->fd;
3740
3741		if (!FD_ISSET(fd, pfds))
3742			continue;
3743		buflen = read_refclock_packet(fd, rp, ts);
3744		/*
3745		 * The first read must succeed after select() indicates
3746		 * readability, or we've reached a permanent EOF.
3747		 * http://bugs.ntp.org/1732 reported ntpd munching CPU
3748		 * after a USB GPS was unplugged because select was
3749		 * indicating EOF but ntpd didn't remove the descriptor
3750		 * from the activefds set.
3751		 */
3752		if (buflen < 0 && EAGAIN != errno) {
3753			saved_errno = errno;
3754			clk = refnumtoa(&rp->srcclock->srcadr);
3755			errno = saved_errno;
3756			msyslog(LOG_ERR, "%s read: %m", clk);
3757			maintain_activefds(fd, TRUE);
3758		} else if (0 == buflen) {
3759			clk = refnumtoa(&rp->srcclock->srcadr);
3760			msyslog(LOG_ERR, "%s read EOF", clk);
3761			maintain_activefds(fd, TRUE);
3762		} else {
3763			/* drain any remaining refclock input */
3764			do {
3765				buflen = read_refclock_packet(fd, rp, ts);
3766			} while (buflen > 0);
3767		}
3768	}
3769#endif /* REFCLOCK */
3770
3771	/*
3772	 * Loop through the interfaces looking for data to read.
3773	 */
3774	for (ep = ep_list; ep != NULL; ep = ep->elink) {
3775		for (doing = 0; doing < 2; doing++) {
3776			if (!doing) {
3777				fd = ep->fd;
3778			} else {
3779				if (!(ep->flags & INT_BCASTOPEN))
3780					break;
3781				fd = ep->bfd;
3782			}
3783			if (fd < 0)
3784				continue;
3785			if (FD_ISSET(fd, pfds))
3786				do {
3787					buflen = read_network_packet(
3788							fd, ep, ts);
3789				} while (buflen > 0);
3790			/* Check more interfaces */
3791		}
3792	}
3793
3794#ifdef HAS_ROUTING_SOCKET
3795	/*
3796	 * scan list of asyncio readers - currently only used for routing sockets
3797	 */
3798	asyncio_reader = asyncio_reader_list;
3799
3800	while (asyncio_reader != NULL) {
3801		/* callback may unlink and free asyncio_reader */
3802		next_asyncio_reader = asyncio_reader->link;
3803		if (FD_ISSET(asyncio_reader->fd, pfds))
3804			(*asyncio_reader->receiver)(asyncio_reader);
3805		asyncio_reader = next_asyncio_reader;
3806	}
3807#endif /* HAS_ROUTING_SOCKET */
3808
3809	/*
3810	 * Check for a response from a blocking child
3811	 */
3812	for (idx = 0; idx < blocking_children_alloc; idx++) {
3813		c = blocking_children[idx];
3814		if (NULL == c || -1 == c->resp_read_pipe)
3815			continue;
3816		if (FD_ISSET(c->resp_read_pipe, pfds)) {
3817			++c->resp_ready_seen;
3818			++blocking_child_ready_seen;
3819		}
3820	}
3821
3822	/* We've done our work */
3823#if defined(DEBUG_TIMING)
3824	get_systime(&ts_e);
3825	/*
3826	 * (ts_e - ts) is the amount of time we spent
3827	 * processing this gob of file descriptors.  Log
3828	 * it.
3829	 */
3830	L_SUB(&ts_e, &ts);
3831	collect_timing(NULL, "input handler", 1, &ts_e);
3832	if (debug > 3)
3833		msyslog(LOG_DEBUG,
3834			"input_handler: Processed a gob of fd's in %s msec",
3835			lfptoms(&ts_e, 6));
3836#endif /* DEBUG_TIMING */
3837}
3838#endif /* !HAVE_IO_COMPLETION_PORT */
3839
3840/*
3841 * find an interface suitable for the src address
3842 */
3843endpt *
3844select_peerinterface(
3845	struct peer *	peer,
3846	sockaddr_u *	srcadr,
3847	endpt *		dstadr
3848	)
3849{
3850	endpt *ep;
3851#ifndef SIM
3852	endpt *wild;
3853
3854	wild = ANY_INTERFACE_CHOOSE(srcadr);
3855
3856	/*
3857	 * Initialize the peer structure and dance the interface jig.
3858	 * Reference clocks step the loopback waltz, the others
3859	 * squaredance around the interface list looking for a buddy. If
3860	 * the dance peters out, there is always the wildcard interface.
3861	 * This might happen in some systems and would preclude proper
3862	 * operation with public key cryptography.
3863	 */
3864	if (ISREFCLOCKADR(srcadr)) {
3865		ep = loopback_interface;
3866	} else if (peer->cast_flags &
3867		   (MDF_BCLNT | MDF_ACAST | MDF_MCAST | MDF_BCAST)) {
3868		ep = findbcastinter(srcadr);
3869		if (ep != NULL)
3870			DPRINTF(4, ("Found *-cast interface %s for address %s\n",
3871				stoa(&ep->sin), stoa(srcadr)));
3872		else
3873			DPRINTF(4, ("No *-cast local address found for address %s\n",
3874				stoa(srcadr)));
3875	} else {
3876		ep = dstadr;
3877		if (NULL == ep)
3878			ep = wild;
3879	}
3880	/*
3881	 * If it is a multicast address, findbcastinter() may not find
3882	 * it.  For unicast, we get to find the interface when dstadr is
3883	 * given to us as the wildcard (ANY_INTERFACE_CHOOSE).  Either
3884	 * way, try a little harder.
3885	 */
3886	if (wild == ep)
3887		ep = findinterface(srcadr);
3888	/*
3889	 * we do not bind to the wildcard interfaces for output
3890	 * as our (network) source address would be undefined and
3891	 * crypto will not work without knowing the own transmit address
3892	 */
3893	if (ep != NULL && INT_WILDCARD & ep->flags)
3894		if (!accept_wildcard_if_for_winnt)
3895			ep = NULL;
3896#else	/* SIM follows */
3897	ep = loopback_interface;
3898#endif
3899
3900	return ep;
3901}
3902
3903
3904/*
3905 * findinterface - find local interface corresponding to address
3906 */
3907endpt *
3908findinterface(
3909	sockaddr_u *addr
3910	)
3911{
3912	endpt *iface;
3913
3914	iface = findlocalinterface(addr, INT_WILDCARD, 0);
3915
3916	if (NULL == iface) {
3917		DPRINTF(4, ("Found no interface for address %s - returning wildcard\n",
3918			    stoa(addr)));
3919
3920		iface = ANY_INTERFACE_CHOOSE(addr);
3921	} else
3922		DPRINTF(4, ("Found interface #%d %s for address %s\n",
3923			    iface->ifnum, iface->name, stoa(addr)));
3924
3925	return iface;
3926}
3927
3928/*
3929 * findlocalinterface - find local interface corresponding to addr,
3930 * which does not have any of flags set.  If bast is nonzero, addr is
3931 * a broadcast address.
3932 *
3933 * This code attempts to find the local sending address for an outgoing
3934 * address by connecting a new socket to destinationaddress:NTP_PORT
3935 * and reading the sockname of the resulting connect.
3936 * the complicated sequence simulates the routing table lookup
3937 * for to first hop without duplicating any of the routing logic into
3938 * ntpd. preferably we would have used an API call - but its not there -
3939 * so this is the best we can do here short of duplicating to entire routing
3940 * logic in ntpd which would be a silly and really unportable thing to do.
3941 *
3942 */
3943static endpt *
3944findlocalinterface(
3945	sockaddr_u *	addr,
3946	int		flags,
3947	int		bcast
3948	)
3949{
3950	GETSOCKNAME_SOCKLEN_TYPE	sockaddrlen;
3951	endpt *				iface;
3952	sockaddr_u			saddr;
3953	SOCKET				s;
3954	int				rtn;
3955	int				on;
3956
3957	DPRINTF(4, ("Finding interface for addr %s in list of addresses\n",
3958		    stoa(addr)));
3959
3960	s = socket(AF(addr), SOCK_DGRAM, 0);
3961	if (INVALID_SOCKET == s)
3962		return NULL;
3963
3964	/*
3965	 * If we are looking for broadcast interface we need to set this
3966	 * socket to allow broadcast
3967	 */
3968	if (bcast) {
3969		on = 1;
3970		if (SOCKET_ERROR == setsockopt(s, SOL_SOCKET,
3971						SO_BROADCAST,
3972						(char *)&on,
3973						sizeof(on))) {
3974			closesocket(s);
3975			return NULL;
3976		}
3977	}
3978
3979	rtn = connect(s, &addr->sa, SOCKLEN(addr));
3980	if (SOCKET_ERROR == rtn) {
3981		closesocket(s);
3982		return NULL;
3983	}
3984
3985	sockaddrlen = sizeof(saddr);
3986	rtn = getsockname(s, &saddr.sa, &sockaddrlen);
3987	closesocket(s);
3988	if (SOCKET_ERROR == rtn)
3989		return NULL;
3990
3991	DPRINTF(4, ("findlocalinterface: kernel maps %s to %s\n",
3992		    stoa(addr), stoa(&saddr)));
3993
3994	iface = getinterface(&saddr, flags);
3995
3996	/*
3997	 * if we didn't find an exact match on saddr, find the closest
3998	 * available local address.  This handles the case of the
3999	 * address suggested by the kernel being excluded by nic rules
4000	 * or the user's -I and -L options to ntpd.
4001	 * See http://bugs.ntp.org/1184 and http://bugs.ntp.org/1683
4002	 * for more background.
4003	 */
4004	if (NULL == iface || iface->ignore_packets)
4005		iface = findclosestinterface(&saddr,
4006					     flags | INT_LOOPBACK);
4007
4008	/* Don't use an interface which will ignore replies */
4009	if (iface != NULL && iface->ignore_packets)
4010		iface = NULL;
4011
4012	return iface;
4013}
4014
4015
4016/*
4017 * findclosestinterface
4018 *
4019 * If there are -I/--interface or -L/novirtualips command-line options,
4020 * or "nic" or "interface" rules in ntp.conf, findlocalinterface() may
4021 * find the kernel's preferred local address for a given peer address is
4022 * administratively unavailable to ntpd, and punt to this routine's more
4023 * expensive search.
4024 *
4025 * Find the numerically closest local address to the one connect()
4026 * suggested.  This matches an address on the same subnet first, as
4027 * needed by Bug 1184, and provides a consistent choice if there are
4028 * multiple feasible local addresses, regardless of the order ntpd
4029 * enumerated them.
4030 */
4031endpt *
4032findclosestinterface(
4033	sockaddr_u *	addr,
4034	int		flags
4035	)
4036{
4037	endpt *		ep;
4038	endpt *		winner;
4039	sockaddr_u	addr_dist;
4040	sockaddr_u	min_dist;
4041
4042	ZERO_SOCK(&min_dist);
4043	winner = NULL;
4044
4045	for (ep = ep_list; ep != NULL; ep = ep->elink) {
4046		if (ep->ignore_packets ||
4047		    AF(addr) != ep->family ||
4048		    flags & ep->flags)
4049			continue;
4050
4051		calc_addr_distance(&addr_dist, addr, &ep->sin);
4052		if (NULL == winner ||
4053		    -1 == cmp_addr_distance(&addr_dist, &min_dist)) {
4054			min_dist = addr_dist;
4055			winner = ep;
4056		}
4057	}
4058	if (NULL == winner)
4059		DPRINTF(4, ("findclosestinterface(%s) failed\n",
4060			    stoa(addr)));
4061	else
4062		DPRINTF(4, ("findclosestinterface(%s) -> %s\n",
4063			    stoa(addr), stoa(&winner->sin)));
4064
4065	return winner;
4066}
4067
4068
4069/*
4070 * calc_addr_distance - calculate the distance between two addresses,
4071 *			the absolute value of the difference between
4072 *			the addresses numerically, stored as an address.
4073 */
4074static void
4075calc_addr_distance(
4076	sockaddr_u *		dist,
4077	const sockaddr_u *	a1,
4078	const sockaddr_u *	a2
4079	)
4080{
4081	u_int32	a1val;
4082	u_int32	a2val;
4083	u_int32	v4dist;
4084	int	found_greater;
4085	int	a1_greater;
4086	int	i;
4087
4088	REQUIRE(AF(a1) == AF(a2));
4089
4090	ZERO_SOCK(dist);
4091	AF(dist) = AF(a1);
4092
4093	/* v4 can be done a bit simpler */
4094	if (IS_IPV4(a1)) {
4095		a1val = SRCADR(a1);
4096		a2val = SRCADR(a2);
4097		v4dist = (a1val > a2val)
4098			     ? a1val - a2val
4099			     : a2val - a1val;
4100		SET_ADDR4(dist, v4dist);
4101
4102		return;
4103	}
4104
4105	found_greater = FALSE;
4106	a1_greater = FALSE;	/* suppress pot. uninit. warning */
4107	for (i = 0; i < (int)sizeof(NSRCADR6(a1)); i++) {
4108		if (!found_greater &&
4109		    NSRCADR6(a1)[i] != NSRCADR6(a2)[i]) {
4110			found_greater = TRUE;
4111			a1_greater = (NSRCADR6(a1)[i] > NSRCADR6(a2)[i]);
4112		}
4113		if (!found_greater) {
4114			NSRCADR6(dist)[i] = 0;
4115		} else {
4116			if (a1_greater)
4117				NSRCADR6(dist)[i] = NSRCADR6(a1)[i] -
4118						    NSRCADR6(a2)[i];
4119			else
4120				NSRCADR6(dist)[i] = NSRCADR6(a2)[i] -
4121						    NSRCADR6(a1)[i];
4122		}
4123	}
4124}
4125
4126
4127/*
4128 * cmp_addr_distance - compare two address distances, returning -1, 0,
4129 *		       1 to indicate their relationship.
4130 */
4131static int
4132cmp_addr_distance(
4133	const sockaddr_u *	d1,
4134	const sockaddr_u *	d2
4135	)
4136{
4137	int	i;
4138
4139	REQUIRE(AF(d1) == AF(d2));
4140
4141	if (IS_IPV4(d1)) {
4142		if (SRCADR(d1) < SRCADR(d2))
4143			return -1;
4144		else if (SRCADR(d1) == SRCADR(d2))
4145			return 0;
4146		else
4147			return 1;
4148	}
4149
4150	for (i = 0; i < (int)sizeof(NSRCADR6(d1)); i++) {
4151		if (NSRCADR6(d1)[i] < NSRCADR6(d2)[i])
4152			return -1;
4153		else if (NSRCADR6(d1)[i] > NSRCADR6(d2)[i])
4154			return 1;
4155	}
4156
4157	return 0;
4158}
4159
4160
4161
4162/*
4163 * fetch an interface structure the matches the
4164 * address and has the given flags NOT set
4165 */
4166endpt *
4167getinterface(
4168	sockaddr_u *	addr,
4169	u_int32		flags
4170	)
4171{
4172	endpt *iface;
4173
4174	iface = find_addr_in_list(addr);
4175
4176	if (iface != NULL && (iface->flags & flags))
4177		iface = NULL;
4178
4179	return iface;
4180}
4181
4182
4183/*
4184 * findbcastinter - find broadcast interface corresponding to address
4185 */
4186endpt *
4187findbcastinter(
4188	sockaddr_u *addr
4189	)
4190{
4191	endpt *	iface;
4192
4193	iface = NULL;
4194#if !defined(MPE) && (defined(SIOCGIFCONF) || defined(SYS_WINNT))
4195	DPRINTF(4, ("Finding broadcast/multicast interface for addr %s in list of addresses\n",
4196		    stoa(addr)));
4197
4198	iface = findlocalinterface(addr, INT_LOOPBACK | INT_WILDCARD,
4199				   1);
4200	if (iface != NULL) {
4201		DPRINTF(4, ("Easily found bcast-/mcast- interface index #%d %s\n",
4202			    iface->ifnum, iface->name));
4203		return iface;
4204	}
4205
4206	/*
4207	 * plan B - try to find something reasonable in our lists in
4208	 * case kernel lookup doesn't help
4209	 */
4210	for (iface = ep_list; iface != NULL; iface = iface->elink) {
4211		if (iface->flags & INT_WILDCARD)
4212			continue;
4213
4214		/* Don't bother with ignored interfaces */
4215		if (iface->ignore_packets)
4216			continue;
4217
4218		/*
4219		 * First look if this is the correct family
4220		 */
4221		if(AF(&iface->sin) != AF(addr))
4222			continue;
4223
4224		/* Skip the loopback addresses */
4225		if (iface->flags & INT_LOOPBACK)
4226			continue;
4227
4228		/*
4229		 * If we are looking to match a multicast address and
4230		 * this interface is one...
4231		 */
4232		if (addr_ismulticast(addr)
4233		    && (iface->flags & INT_MULTICAST)) {
4234#ifdef INCLUDE_IPV6_SUPPORT
4235			/*
4236			 * ...it is the winner unless we're looking for
4237			 * an interface to use for link-local multicast
4238			 * and its address is not link-local.
4239			 */
4240			if (IS_IPV6(addr)
4241			    && IN6_IS_ADDR_MC_LINKLOCAL(PSOCK_ADDR6(addr))
4242			    && !IN6_IS_ADDR_LINKLOCAL(PSOCK_ADDR6(&iface->sin)))
4243				continue;
4244#endif
4245			break;
4246		}
4247
4248		/*
4249		 * We match only those interfaces marked as
4250		 * broadcastable and either the explicit broadcast
4251		 * address or the network portion of the IP address.
4252		 * Sloppy.
4253		 */
4254		if (IS_IPV4(addr)) {
4255			if (SOCK_EQ(&iface->bcast, addr))
4256				break;
4257
4258			if ((NSRCADR(&iface->sin) & NSRCADR(&iface->mask))
4259			    == (NSRCADR(addr)	  & NSRCADR(&iface->mask)))
4260				break;
4261		}
4262#ifdef INCLUDE_IPV6_SUPPORT
4263		else if (IS_IPV6(addr)) {
4264			if (SOCK_EQ(&iface->bcast, addr))
4265				break;
4266
4267			if (SOCK_EQ(netof(&iface->sin), netof(addr)))
4268				break;
4269		}
4270#endif
4271	}
4272#endif /* SIOCGIFCONF */
4273	if (NULL == iface) {
4274		DPRINTF(4, ("No bcast interface found for %s\n",
4275			    stoa(addr)));
4276		iface = ANY_INTERFACE_CHOOSE(addr);
4277	} else {
4278		DPRINTF(4, ("Found bcast-/mcast- interface index #%d %s\n",
4279			    iface->ifnum, iface->name));
4280	}
4281
4282	return iface;
4283}
4284
4285
4286/*
4287 * io_clr_stats - clear I/O module statistics
4288 */
4289void
4290io_clr_stats(void)
4291{
4292	packets_dropped = 0;
4293	packets_ignored = 0;
4294	packets_received = 0;
4295	packets_sent = 0;
4296	packets_notsent = 0;
4297
4298	handler_calls = 0;
4299	handler_pkts = 0;
4300	io_timereset = current_time;
4301}
4302
4303
4304#ifdef REFCLOCK
4305/*
4306 * io_addclock - add a reference clock to the list and arrange that we
4307 *				 get SIGIO interrupts from it.
4308 */
4309int
4310io_addclock(
4311	struct refclockio *rio
4312	)
4313{
4314	BLOCKIO();
4315
4316	/*
4317	 * Stuff the I/O structure in the list and mark the descriptor
4318	 * in use.  There is a harmless (I hope) race condition here.
4319	 */
4320	rio->active = TRUE;
4321
4322# ifdef HAVE_SIGNALED_IO
4323	if (init_clock_sig(rio)) {
4324		UNBLOCKIO();
4325		return 0;
4326	}
4327# elif defined(HAVE_IO_COMPLETION_PORT)
4328	if (!io_completion_port_add_clock_io(rio)) {
4329		UNBLOCKIO();
4330		return 0;
4331	}
4332# endif
4333
4334	/*
4335	 * enqueue
4336	 */
4337	LINK_SLIST(refio, rio, next);
4338
4339	/*
4340	 * register fd
4341	 */
4342	add_fd_to_list(rio->fd, FD_TYPE_FILE);
4343
4344	UNBLOCKIO();
4345	return 1;
4346}
4347
4348
4349/*
4350 * io_closeclock - close the clock in the I/O structure given
4351 */
4352void
4353io_closeclock(
4354	struct refclockio *rio
4355	)
4356{
4357	struct refclockio *unlinked;
4358
4359	BLOCKIO();
4360
4361	/*
4362	 * Remove structure from the list
4363	 */
4364	rio->active = FALSE;
4365	UNLINK_SLIST(unlinked, refio, rio, next, struct refclockio);
4366	if (NULL != unlinked) {
4367		/* Close the descriptor. The order of operations is
4368		 * important here in case of async / overlapped IO:
4369		 * only after we have removed the clock from the
4370		 * IO completion port we can be sure no further
4371		 * input is queued. So...
4372		 *  - we first disable feeding to the queu by removing
4373		 *    the clock from the IO engine
4374		 *  - close the file (which brings down any IO on it)
4375		 *  - clear the buffer from results for this fd
4376		 */
4377#	    ifdef HAVE_IO_COMPLETION_PORT
4378		io_completion_port_remove_clock_io(rio);
4379#	    endif
4380		close_and_delete_fd_from_list(rio->fd);
4381		purge_recv_buffers_for_fd(rio->fd);
4382		rio->fd = -1;
4383	}
4384
4385	UNBLOCKIO();
4386}
4387#endif	/* REFCLOCK */
4388
4389
4390/*
4391 * On NT a SOCKET is an unsigned int so we cannot possibly keep it in
4392 * an array. So we use one of the ISC_LIST functions to hold the
4393 * socket value and use that when we want to enumerate it.
4394 *
4395 * This routine is called by the forked intres child process to close
4396 * all open sockets.  On Windows there's no need as intres runs in
4397 * the same process as a thread.
4398 */
4399#ifndef SYS_WINNT
4400void
4401kill_asyncio(
4402	int	startfd
4403	)
4404{
4405	BLOCKIO();
4406
4407	/*
4408	 * In the child process we do not maintain activefds and
4409	 * maxactivefd.  Zeroing maxactivefd disables code which
4410	 * maintains it in close_and_delete_fd_from_list().
4411	 */
4412	maxactivefd = 0;
4413
4414	while (fd_list != NULL)
4415		close_and_delete_fd_from_list(fd_list->fd);
4416
4417	UNBLOCKIO();
4418}
4419#endif	/* !SYS_WINNT */
4420
4421
4422/*
4423 * Add and delete functions for the list of open sockets
4424 */
4425static void
4426add_fd_to_list(
4427	SOCKET fd,
4428	enum desc_type type
4429	)
4430{
4431	vsock_t *lsock = emalloc(sizeof(*lsock));
4432
4433	lsock->fd = fd;
4434	lsock->type = type;
4435
4436	LINK_SLIST(fd_list, lsock, link);
4437	maintain_activefds(fd, 0);
4438}
4439
4440
4441static void
4442close_and_delete_fd_from_list(
4443	SOCKET fd
4444	)
4445{
4446	vsock_t *lsock;
4447
4448	UNLINK_EXPR_SLIST(lsock, fd_list, fd ==
4449	    UNLINK_EXPR_SLIST_CURRENT()->fd, link, vsock_t);
4450
4451	if (NULL == lsock)
4452		return;
4453
4454	switch (lsock->type) {
4455
4456	case FD_TYPE_SOCKET:
4457		closesocket(lsock->fd);
4458		break;
4459
4460	case FD_TYPE_FILE:
4461		closeserial((int)lsock->fd);
4462		break;
4463
4464	default:
4465		msyslog(LOG_ERR,
4466			"internal error - illegal descriptor type %d - EXITING",
4467			(int)lsock->type);
4468		exit(1);
4469	}
4470
4471	free(lsock);
4472	/*
4473	 * remove from activefds
4474	 */
4475	maintain_activefds(fd, 1);
4476}
4477
4478
4479static void
4480add_addr_to_list(
4481	sockaddr_u *	addr,
4482	endpt *		ep
4483	)
4484{
4485	remaddr_t *laddr;
4486
4487#ifdef DEBUG
4488	if (find_addr_in_list(addr) == NULL) {
4489#endif
4490		/* not there yet - add to list */
4491		laddr = emalloc(sizeof(*laddr));
4492		laddr->addr = *addr;
4493		laddr->ep = ep;
4494
4495		LINK_SLIST(remoteaddr_list, laddr, link);
4496
4497		DPRINTF(4, ("Added addr %s to list of addresses\n",
4498			    stoa(addr)));
4499#ifdef DEBUG
4500	} else
4501		DPRINTF(4, ("WARNING: Attempt to add duplicate addr %s to address list\n",
4502			    stoa(addr)));
4503#endif
4504}
4505
4506
4507static void
4508delete_addr_from_list(
4509	sockaddr_u *addr
4510	)
4511{
4512	remaddr_t *unlinked;
4513
4514	UNLINK_EXPR_SLIST(unlinked, remoteaddr_list, SOCK_EQ(addr,
4515		&(UNLINK_EXPR_SLIST_CURRENT()->addr)), link, remaddr_t);
4516
4517	if (unlinked != NULL) {
4518		DPRINTF(4, ("Deleted addr %s from list of addresses\n",
4519			stoa(addr)));
4520		free(unlinked);
4521	}
4522}
4523
4524
4525static void
4526delete_interface_from_list(
4527	endpt *iface
4528	)
4529{
4530	remaddr_t *unlinked;
4531
4532	for (;;) {
4533		UNLINK_EXPR_SLIST(unlinked, remoteaddr_list, iface ==
4534		    UNLINK_EXPR_SLIST_CURRENT()->ep, link,
4535		    remaddr_t);
4536
4537		if (unlinked == NULL)
4538			break;
4539		DPRINTF(4, ("Deleted addr %s for interface #%d %s from list of addresses\n",
4540			    stoa(&unlinked->addr), iface->ifnum,
4541			    iface->name));
4542		free(unlinked);
4543	}
4544}
4545
4546
4547static struct interface *
4548find_addr_in_list(
4549	sockaddr_u *addr
4550	)
4551{
4552	remaddr_t *entry;
4553
4554	DPRINTF(4, ("Searching for addr %s in list of addresses - ",
4555		    stoa(addr)));
4556
4557	for (entry = remoteaddr_list;
4558	     entry != NULL;
4559	     entry = entry->link)
4560		if (SOCK_EQ(&entry->addr, addr)) {
4561			DPRINTF(4, ("FOUND\n"));
4562			return entry->ep;
4563		}
4564
4565	DPRINTF(4, ("NOT FOUND\n"));
4566	return NULL;
4567}
4568
4569
4570/*
4571 * Find the given address with the all given flags set in the list
4572 */
4573static endpt *
4574find_flagged_addr_in_list(
4575	sockaddr_u *	addr,
4576	u_int32		flags
4577	)
4578{
4579	remaddr_t *entry;
4580
4581	DPRINTF(4, ("Finding addr %s with flags %d in list: ",
4582		    stoa(addr), flags));
4583
4584	for (entry = remoteaddr_list;
4585	     entry != NULL;
4586	     entry = entry->link)
4587
4588		if (SOCK_EQ(&entry->addr, addr)
4589		    && (entry->ep->flags & flags) == flags) {
4590
4591			DPRINTF(4, ("FOUND\n"));
4592			return entry->ep;
4593		}
4594
4595	DPRINTF(4, ("NOT FOUND\n"));
4596	return NULL;
4597}
4598
4599
4600const char *
4601localaddrtoa(
4602	endpt *la
4603	)
4604{
4605	return (NULL == la)
4606		   ? "<null>"
4607		   : stoa(&la->sin);
4608}
4609
4610
4611#ifdef HAS_ROUTING_SOCKET
4612# ifndef UPDATE_GRACE
4613#  define UPDATE_GRACE	2	/* wait UPDATE_GRACE seconds before scanning */
4614# endif
4615
4616static void
4617process_routing_msgs(struct asyncio_reader *reader)
4618{
4619	char buffer[5120];
4620	int cnt, msg_type;
4621#ifdef HAVE_RTNETLINK
4622	struct nlmsghdr *nh;
4623#else
4624	struct rt_msghdr rtm;
4625	char *p;
4626#endif
4627
4628	if (disable_dynamic_updates) {
4629		/*
4630		 * discard ourselves if we are not needed any more
4631		 * usually happens when running unprivileged
4632		 */
4633		remove_asyncio_reader(reader);
4634		delete_asyncio_reader(reader);
4635		return;
4636	}
4637
4638	cnt = read(reader->fd, buffer, sizeof(buffer));
4639
4640	if (cnt < 0) {
4641		if (errno == ENOBUFS) {
4642			msyslog(LOG_ERR,
4643				"routing socket reports: %m");
4644		} else {
4645			msyslog(LOG_ERR,
4646				"routing socket reports: %m - disabling");
4647			remove_asyncio_reader(reader);
4648			delete_asyncio_reader(reader);
4649		}
4650		return;
4651	}
4652
4653	/*
4654	 * process routing message
4655	 */
4656#ifdef HAVE_RTNETLINK
4657	for (nh = UA_PTR(struct nlmsghdr, buffer);
4658	     NLMSG_OK(nh, cnt);
4659	     nh = NLMSG_NEXT(nh, cnt)) {
4660		msg_type = nh->nlmsg_type;
4661#else
4662	for (p = buffer;
4663	     (p + sizeof(struct rt_msghdr)) <= (buffer + cnt);
4664	     p += rtm.rtm_msglen) {
4665		memcpy(&rtm, p, sizeof(rtm));
4666		if (rtm.rtm_version != RTM_VERSION) {
4667			msyslog(LOG_ERR,
4668				"version mismatch (got %d - expected %d) on routing socket - disabling",
4669				rtm.rtm_version, RTM_VERSION);
4670
4671			remove_asyncio_reader(reader);
4672			delete_asyncio_reader(reader);
4673			return;
4674		}
4675		msg_type = rtm.rtm_type;
4676#endif
4677		switch (msg_type) {
4678#ifdef RTM_NEWADDR
4679		case RTM_NEWADDR:
4680#endif
4681#ifdef RTM_DELADDR
4682		case RTM_DELADDR:
4683#endif
4684#ifdef RTM_ADD
4685		case RTM_ADD:
4686#endif
4687#ifdef RTM_DELETE
4688		case RTM_DELETE:
4689#endif
4690#ifdef RTM_REDIRECT
4691		case RTM_REDIRECT:
4692#endif
4693#ifdef RTM_CHANGE
4694		case RTM_CHANGE:
4695#endif
4696#ifdef RTM_LOSING
4697		case RTM_LOSING:
4698#endif
4699#ifdef RTM_IFINFO
4700		case RTM_IFINFO:
4701#endif
4702#ifdef RTM_IFANNOUNCE
4703		case RTM_IFANNOUNCE:
4704#endif
4705#ifdef RTM_NEWLINK
4706		case RTM_NEWLINK:
4707#endif
4708#ifdef RTM_DELLINK
4709		case RTM_DELLINK:
4710#endif
4711#ifdef RTM_NEWROUTE
4712		case RTM_NEWROUTE:
4713#endif
4714#ifdef RTM_DELROUTE
4715		case RTM_DELROUTE:
4716#endif
4717			/*
4718			 * we are keen on new and deleted addresses and
4719			 * if an interface goes up and down or routing
4720			 * changes
4721			 */
4722			DPRINTF(3, ("routing message op = %d: scheduling interface update\n",
4723				    msg_type));
4724			timer_interfacetimeout(current_time + UPDATE_GRACE);
4725			break;
4726#ifdef HAVE_RTNETLINK
4727		case NLMSG_DONE:
4728			/* end of multipart message */
4729			return;
4730#endif
4731		default:
4732			/*
4733			 * the rest doesn't bother us.
4734			 */
4735			DPRINTF(4, ("routing message op = %d: ignored\n",
4736				    msg_type));
4737			break;
4738		}
4739	}
4740}
4741
4742/*
4743 * set up routing notifications
4744 */
4745static void
4746init_async_notifications()
4747{
4748	struct asyncio_reader *reader;
4749#ifdef HAVE_RTNETLINK
4750	int fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
4751	struct sockaddr_nl sa;
4752#else
4753	int fd = socket(PF_ROUTE, SOCK_RAW, 0);
4754#endif
4755	if (fd < 0) {
4756		msyslog(LOG_ERR,
4757			"unable to open routing socket (%m) - using polled interface update");
4758		return;
4759	}
4760
4761	fd = move_fd(fd);
4762#ifdef HAVE_RTNETLINK
4763	ZERO(sa);
4764	sa.nl_family = PF_NETLINK;
4765	sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR
4766		       | RTMGRP_IPV6_IFADDR | RTMGRP_IPV4_ROUTE
4767		       | RTMGRP_IPV4_MROUTE | RTMGRP_IPV6_ROUTE
4768		       | RTMGRP_IPV6_MROUTE;
4769	if (bind(fd, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
4770		msyslog(LOG_ERR,
4771			"bind failed on routing socket (%m) - using polled interface update");
4772		return;
4773	}
4774#endif
4775	make_socket_nonblocking(fd);
4776#if defined(HAVE_SIGNALED_IO)
4777	init_socket_sig(fd);
4778#endif /* HAVE_SIGNALED_IO */
4779
4780	reader = new_asyncio_reader();
4781
4782	reader->fd = fd;
4783	reader->receiver = process_routing_msgs;
4784
4785	add_asyncio_reader(reader, FD_TYPE_SOCKET);
4786	msyslog(LOG_INFO,
4787		"Listening on routing socket on fd #%d for interface updates",
4788		fd);
4789}
4790#else
4791/* HAS_ROUTING_SOCKET not defined */
4792static void
4793init_async_notifications(void)
4794{
4795}
4796#endif
4797
4798