listen_dnsport.c revision 285206
1/*
2 * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36/**
37 * \file
38 *
39 * This file has functions to get queries from clients.
40 */
41#include "config.h"
42#ifdef HAVE_SYS_TYPES_H
43#  include <sys/types.h>
44#endif
45#include <sys/time.h>
46#include "services/listen_dnsport.h"
47#include "services/outside_network.h"
48#include "util/netevent.h"
49#include "util/log.h"
50#include "util/config_file.h"
51#include "util/net_help.h"
52#include "ldns/sbuffer.h"
53
54#ifdef HAVE_NETDB_H
55#include <netdb.h>
56#endif
57#include <fcntl.h>
58
59#ifdef HAVE_SYS_UN_H
60#include <sys/un.h>
61#endif
62
63/** number of queued TCP connections for listen() */
64#define TCP_BACKLOG 256
65
66/**
67 * Debug print of the getaddrinfo returned address.
68 * @param addr: the address returned.
69 */
70static void
71verbose_print_addr(struct addrinfo *addr)
72{
73	if(verbosity >= VERB_ALGO) {
74		char buf[100];
75		void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
76#ifdef INET6
77		if(addr->ai_family == AF_INET6)
78			sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
79				sin6_addr;
80#endif /* INET6 */
81		if(inet_ntop(addr->ai_family, sinaddr, buf,
82			(socklen_t)sizeof(buf)) == 0) {
83			(void)strlcpy(buf, "(null)", sizeof(buf));
84		}
85		buf[sizeof(buf)-1] = 0;
86		verbose(VERB_ALGO, "creating %s%s socket %s %d",
87			addr->ai_socktype==SOCK_DGRAM?"udp":
88			addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
89			addr->ai_family==AF_INET?"4":
90			addr->ai_family==AF_INET6?"6":
91			"_otherfam", buf,
92			ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
93	}
94}
95
96int
97create_udp_sock(int family, int socktype, struct sockaddr* addr,
98        socklen_t addrlen, int v6only, int* inuse, int* noproto,
99	int rcv, int snd, int listen, int* reuseport)
100{
101	int s;
102#if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU)
103	int on=1;
104#endif
105#ifdef IPV6_MTU
106	int mtu = IPV6_MIN_MTU;
107#endif
108#if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
109	(void)rcv;
110#endif
111#if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
112	(void)snd;
113#endif
114#ifndef IPV6_V6ONLY
115	(void)v6only;
116#endif
117	if((s = socket(family, socktype, 0)) == -1) {
118		*inuse = 0;
119#ifndef USE_WINSOCK
120		if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
121			*noproto = 1;
122			return -1;
123		}
124		log_err("can't create socket: %s", strerror(errno));
125#else
126		if(WSAGetLastError() == WSAEAFNOSUPPORT ||
127			WSAGetLastError() == WSAEPROTONOSUPPORT) {
128			*noproto = 1;
129			return -1;
130		}
131		log_err("can't create socket: %s",
132			wsa_strerror(WSAGetLastError()));
133#endif
134		*noproto = 0;
135		return -1;
136	}
137	if(listen) {
138#ifdef SO_REUSEADDR
139		if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
140			(socklen_t)sizeof(on)) < 0) {
141#ifndef USE_WINSOCK
142			log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
143				strerror(errno));
144			if(errno != ENOSYS) {
145				close(s);
146				*noproto = 0;
147				*inuse = 0;
148				return -1;
149			}
150#else
151			log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
152				wsa_strerror(WSAGetLastError()));
153			closesocket(s);
154			*noproto = 0;
155			*inuse = 0;
156			return -1;
157#endif
158		}
159#endif /* SO_REUSEADDR */
160#ifdef SO_REUSEPORT
161		/* try to set SO_REUSEPORT so that incoming
162		 * queries are distributed evenly among the receiving threads.
163		 * Each thread must have its own socket bound to the same port,
164		 * with SO_REUSEPORT set on each socket.
165		 */
166		if (reuseport && *reuseport &&
167		    setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
168			(socklen_t)sizeof(on)) < 0) {
169#ifdef ENOPROTOOPT
170			if(errno != ENOPROTOOPT || verbosity >= 3)
171				log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
172					strerror(errno));
173#endif
174			/* this option is not essential, we can continue */
175			*reuseport = 0;
176		}
177#else
178		(void)reuseport;
179#endif /* defined(SO_REUSEPORT) */
180	}
181	if(rcv) {
182#ifdef SO_RCVBUF
183		int got;
184		socklen_t slen = (socklen_t)sizeof(got);
185#  ifdef SO_RCVBUFFORCE
186		/* Linux specific: try to use root permission to override
187		 * system limits on rcvbuf. The limit is stored in
188		 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
189		if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv,
190			(socklen_t)sizeof(rcv)) < 0) {
191			if(errno != EPERM) {
192#    ifndef USE_WINSOCK
193				log_err("setsockopt(..., SO_RCVBUFFORCE, "
194					"...) failed: %s", strerror(errno));
195				close(s);
196#    else
197				log_err("setsockopt(..., SO_RCVBUFFORCE, "
198					"...) failed: %s",
199					wsa_strerror(WSAGetLastError()));
200				closesocket(s);
201#    endif
202				*noproto = 0;
203				*inuse = 0;
204				return -1;
205			}
206#  endif /* SO_RCVBUFFORCE */
207			if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv,
208				(socklen_t)sizeof(rcv)) < 0) {
209#  ifndef USE_WINSOCK
210				log_err("setsockopt(..., SO_RCVBUF, "
211					"...) failed: %s", strerror(errno));
212				close(s);
213#  else
214				log_err("setsockopt(..., SO_RCVBUF, "
215					"...) failed: %s",
216					wsa_strerror(WSAGetLastError()));
217				closesocket(s);
218#  endif
219				*noproto = 0;
220				*inuse = 0;
221				return -1;
222			}
223			/* check if we got the right thing or if system
224			 * reduced to some system max.  Warn if so */
225			if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got,
226				&slen) >= 0 && got < rcv/2) {
227				log_warn("so-rcvbuf %u was not granted. "
228					"Got %u. To fix: start with "
229					"root permissions(linux) or sysctl "
230					"bigger net.core.rmem_max(linux) or "
231					"kern.ipc.maxsockbuf(bsd) values.",
232					(unsigned)rcv, (unsigned)got);
233			}
234#  ifdef SO_RCVBUFFORCE
235		}
236#  endif
237#endif /* SO_RCVBUF */
238	}
239	/* first do RCVBUF as the receive buffer is more important */
240	if(snd) {
241#ifdef SO_SNDBUF
242		int got;
243		socklen_t slen = (socklen_t)sizeof(got);
244#  ifdef SO_SNDBUFFORCE
245		/* Linux specific: try to use root permission to override
246		 * system limits on sndbuf. The limit is stored in
247		 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
248		if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd,
249			(socklen_t)sizeof(snd)) < 0) {
250			if(errno != EPERM) {
251#    ifndef USE_WINSOCK
252				log_err("setsockopt(..., SO_SNDBUFFORCE, "
253					"...) failed: %s", strerror(errno));
254				close(s);
255#    else
256				log_err("setsockopt(..., SO_SNDBUFFORCE, "
257					"...) failed: %s",
258					wsa_strerror(WSAGetLastError()));
259				closesocket(s);
260#    endif
261				*noproto = 0;
262				*inuse = 0;
263				return -1;
264			}
265#  endif /* SO_SNDBUFFORCE */
266			if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd,
267				(socklen_t)sizeof(snd)) < 0) {
268#  ifndef USE_WINSOCK
269				log_err("setsockopt(..., SO_SNDBUF, "
270					"...) failed: %s", strerror(errno));
271				close(s);
272#  else
273				log_err("setsockopt(..., SO_SNDBUF, "
274					"...) failed: %s",
275					wsa_strerror(WSAGetLastError()));
276				closesocket(s);
277#  endif
278				*noproto = 0;
279				*inuse = 0;
280				return -1;
281			}
282			/* check if we got the right thing or if system
283			 * reduced to some system max.  Warn if so */
284			if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got,
285				&slen) >= 0 && got < snd/2) {
286				log_warn("so-sndbuf %u was not granted. "
287					"Got %u. To fix: start with "
288					"root permissions(linux) or sysctl "
289					"bigger net.core.wmem_max(linux) or "
290					"kern.ipc.maxsockbuf(bsd) values.",
291					(unsigned)snd, (unsigned)got);
292			}
293#  ifdef SO_SNDBUFFORCE
294		}
295#  endif
296#endif /* SO_SNDBUF */
297	}
298	if(family == AF_INET6) {
299# if defined(IPV6_V6ONLY)
300		if(v6only) {
301			int val=(v6only==2)?0:1;
302			if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
303				(void*)&val, (socklen_t)sizeof(val)) < 0) {
304#ifndef USE_WINSOCK
305				log_err("setsockopt(..., IPV6_V6ONLY"
306					", ...) failed: %s", strerror(errno));
307				close(s);
308#else
309				log_err("setsockopt(..., IPV6_V6ONLY"
310					", ...) failed: %s",
311					wsa_strerror(WSAGetLastError()));
312				closesocket(s);
313#endif
314				*noproto = 0;
315				*inuse = 0;
316				return -1;
317			}
318		}
319# endif
320# if defined(IPV6_USE_MIN_MTU)
321		/*
322		 * There is no fragmentation of IPv6 datagrams
323		 * during forwarding in the network. Therefore
324		 * we do not send UDP datagrams larger than
325		 * the minimum IPv6 MTU of 1280 octets. The
326		 * EDNS0 message length can be larger if the
327		 * network stack supports IPV6_USE_MIN_MTU.
328		 */
329		if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
330			(void*)&on, (socklen_t)sizeof(on)) < 0) {
331#  ifndef USE_WINSOCK
332			log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
333				"...) failed: %s", strerror(errno));
334			close(s);
335#  else
336			log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
337				"...) failed: %s",
338				wsa_strerror(WSAGetLastError()));
339			closesocket(s);
340#  endif
341			*noproto = 0;
342			*inuse = 0;
343			return -1;
344		}
345# elif defined(IPV6_MTU)
346		/*
347		 * On Linux, to send no larger than 1280, the PMTUD is
348		 * disabled by default for datagrams anyway, so we set
349		 * the MTU to use.
350		 */
351		if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
352			(void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
353#  ifndef USE_WINSOCK
354			log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
355				strerror(errno));
356			close(s);
357#  else
358			log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
359				wsa_strerror(WSAGetLastError()));
360			closesocket(s);
361#  endif
362			*noproto = 0;
363			*inuse = 0;
364			return -1;
365		}
366# endif /* IPv6 MTU */
367	} else if(family == AF_INET) {
368#  if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
369/* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that
370 * PMTU information is not accepted, but fragmentation is allowed
371 * if and only if the packet size exceeds the outgoing interface MTU
372 * (and also uses the interface mtu to determine the size of the packets).
373 * So there won't be any EMSGSIZE error.  Against DNS fragmentation attacks.
374 * FreeBSD already has same semantics without setting the option. */
375		int omit_set = 0;
376		int action;
377#   if defined(IP_PMTUDISC_OMIT)
378		action = IP_PMTUDISC_OMIT;
379		if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
380			&action, (socklen_t)sizeof(action)) < 0) {
381
382			if (errno != EINVAL) {
383				log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
384					strerror(errno));
385
386#    ifndef USE_WINSOCK
387				close(s);
388#    else
389				closesocket(s);
390#    endif
391				*noproto = 0;
392				*inuse = 0;
393				return -1;
394			}
395		}
396		else
397		{
398		    omit_set = 1;
399		}
400#   endif
401		if (omit_set == 0) {
402   			action = IP_PMTUDISC_DONT;
403			if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
404				&action, (socklen_t)sizeof(action)) < 0) {
405				log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
406					strerror(errno));
407#    ifndef USE_WINSOCK
408				close(s);
409#    else
410				closesocket(s);
411#    endif
412				*noproto = 0;
413				*inuse = 0;
414				return -1;
415			}
416		}
417#  elif defined(IP_DONTFRAG)
418		int off = 0;
419		if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG,
420			&off, (socklen_t)sizeof(off)) < 0) {
421			log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
422				strerror(errno));
423#    ifndef USE_WINSOCK
424			close(s);
425#    else
426			closesocket(s);
427#    endif
428			*noproto = 0;
429			*inuse = 0;
430			return -1;
431		}
432#  endif /* IPv4 MTU */
433	}
434	if(bind(s, (struct sockaddr*)addr, addrlen) != 0) {
435		*noproto = 0;
436		*inuse = 0;
437#ifndef USE_WINSOCK
438#ifdef EADDRINUSE
439		*inuse = (errno == EADDRINUSE);
440		/* detect freebsd jail with no ipv6 permission */
441		if(family==AF_INET6 && errno==EINVAL)
442			*noproto = 1;
443		else if(errno != EADDRINUSE) {
444			log_err_addr("can't bind socket", strerror(errno),
445				(struct sockaddr_storage*)addr, addrlen);
446		}
447#endif /* EADDRINUSE */
448		close(s);
449#else /* USE_WINSOCK */
450		if(WSAGetLastError() != WSAEADDRINUSE &&
451			WSAGetLastError() != WSAEADDRNOTAVAIL) {
452			log_err_addr("can't bind socket",
453				wsa_strerror(WSAGetLastError()),
454				(struct sockaddr_storage*)addr, addrlen);
455		}
456		closesocket(s);
457#endif
458		return -1;
459	}
460	if(!fd_set_nonblock(s)) {
461		*noproto = 0;
462		*inuse = 0;
463#ifndef USE_WINSOCK
464		close(s);
465#else
466		closesocket(s);
467#endif
468		return -1;
469	}
470	return s;
471}
472
473int
474create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
475	int* reuseport)
476{
477	int s;
478#if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY)
479	int on = 1;
480#endif /* SO_REUSEADDR || IPV6_V6ONLY */
481	verbose_print_addr(addr);
482	*noproto = 0;
483	if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
484#ifndef USE_WINSOCK
485		if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
486			*noproto = 1;
487			return -1;
488		}
489		log_err("can't create socket: %s", strerror(errno));
490#else
491		if(WSAGetLastError() == WSAEAFNOSUPPORT ||
492			WSAGetLastError() == WSAEPROTONOSUPPORT) {
493			*noproto = 1;
494			return -1;
495		}
496		log_err("can't create socket: %s",
497			wsa_strerror(WSAGetLastError()));
498#endif
499		return -1;
500	}
501#ifdef SO_REUSEADDR
502	if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
503		(socklen_t)sizeof(on)) < 0) {
504#ifndef USE_WINSOCK
505		log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
506			strerror(errno));
507		close(s);
508#else
509		log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
510			wsa_strerror(WSAGetLastError()));
511		closesocket(s);
512#endif
513		return -1;
514	}
515#endif /* SO_REUSEADDR */
516#ifdef SO_REUSEPORT
517	/* try to set SO_REUSEPORT so that incoming
518	 * connections are distributed evenly among the receiving threads.
519	 * Each thread must have its own socket bound to the same port,
520	 * with SO_REUSEPORT set on each socket.
521	 */
522	if (reuseport && *reuseport &&
523		setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
524		(socklen_t)sizeof(on)) < 0) {
525#ifdef ENOPROTOOPT
526		if(errno != ENOPROTOOPT || verbosity >= 3)
527			log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
528				strerror(errno));
529#endif
530		/* this option is not essential, we can continue */
531		*reuseport = 0;
532	}
533#else
534	(void)reuseport;
535#endif /* defined(SO_REUSEPORT) */
536#if defined(IPV6_V6ONLY)
537	if(addr->ai_family == AF_INET6 && v6only) {
538		if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
539			(void*)&on, (socklen_t)sizeof(on)) < 0) {
540#ifndef USE_WINSOCK
541			log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
542				strerror(errno));
543			close(s);
544#else
545			log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
546				wsa_strerror(WSAGetLastError()));
547			closesocket(s);
548#endif
549			return -1;
550		}
551	}
552#else
553	(void)v6only;
554#endif /* IPV6_V6ONLY */
555	if(bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
556#ifndef USE_WINSOCK
557		/* detect freebsd jail with no ipv6 permission */
558		if(addr->ai_family==AF_INET6 && errno==EINVAL)
559			*noproto = 1;
560		else {
561			log_err_addr("can't bind socket", strerror(errno),
562				(struct sockaddr_storage*)addr->ai_addr,
563				addr->ai_addrlen);
564		}
565		close(s);
566#else
567		log_err_addr("can't bind socket",
568			wsa_strerror(WSAGetLastError()),
569			(struct sockaddr_storage*)addr->ai_addr,
570			addr->ai_addrlen);
571		closesocket(s);
572#endif
573		return -1;
574	}
575	if(!fd_set_nonblock(s)) {
576#ifndef USE_WINSOCK
577		close(s);
578#else
579		closesocket(s);
580#endif
581		return -1;
582	}
583	if(listen(s, TCP_BACKLOG) == -1) {
584#ifndef USE_WINSOCK
585		log_err("can't listen: %s", strerror(errno));
586		close(s);
587#else
588		log_err("can't listen: %s", wsa_strerror(WSAGetLastError()));
589		closesocket(s);
590#endif
591		return -1;
592	}
593	return s;
594}
595
596int
597create_local_accept_sock(const char *path, int* noproto)
598{
599#ifdef HAVE_SYS_UN_H
600	int s;
601	struct sockaddr_un usock;
602
603	verbose(VERB_ALGO, "creating unix socket %s", path);
604#ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
605	/* this member exists on BSDs, not Linux */
606	usock.sun_len = (socklen_t)sizeof(usock);
607#endif
608	usock.sun_family = AF_LOCAL;
609	/* length is 92-108, 104 on FreeBSD */
610	(void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path));
611
612	if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
613		log_err("Cannot create local socket %s (%s)",
614			path, strerror(errno));
615		return -1;
616	}
617
618	if (unlink(path) && errno != ENOENT) {
619		/* The socket already exists and cannot be removed */
620		log_err("Cannot remove old local socket %s (%s)",
621			path, strerror(errno));
622		return -1;
623	}
624
625	if (bind(s, (struct sockaddr *)&usock,
626		(socklen_t)sizeof(struct sockaddr_un)) == -1) {
627		log_err("Cannot bind local socket %s (%s)",
628			path, strerror(errno));
629		return -1;
630	}
631
632	if (!fd_set_nonblock(s)) {
633		log_err("Cannot set non-blocking mode");
634		return -1;
635	}
636
637	if (listen(s, TCP_BACKLOG) == -1) {
638		log_err("can't listen: %s", strerror(errno));
639		return -1;
640	}
641
642	(void)noproto; /*unused*/
643	return s;
644#else
645	(void)path;
646	log_err("Local sockets are not supported");
647	*noproto = 1;
648	return -1;
649#endif
650}
651
652
653/**
654 * Create socket from getaddrinfo results
655 */
656static int
657make_sock(int stype, const char* ifname, const char* port,
658	struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
659	int* reuseport)
660{
661	struct addrinfo *res = NULL;
662	int r, s, inuse, noproto;
663	hints->ai_socktype = stype;
664	*noip6 = 0;
665	if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) {
666#ifdef USE_WINSOCK
667		if(r == EAI_NONAME && hints->ai_family == AF_INET6){
668			*noip6 = 1; /* 'Host not found' for IP6 on winXP */
669			return -1;
670		}
671#endif
672		log_err("node %s:%s getaddrinfo: %s %s",
673			ifname?ifname:"default", port, gai_strerror(r),
674#ifdef EAI_SYSTEM
675			r==EAI_SYSTEM?(char*)strerror(errno):""
676#else
677			""
678#endif
679		);
680		return -1;
681	}
682	if(stype == SOCK_DGRAM) {
683		verbose_print_addr(res);
684		s = create_udp_sock(res->ai_family, res->ai_socktype,
685			(struct sockaddr*)res->ai_addr, res->ai_addrlen,
686			v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
687			reuseport);
688		if(s == -1 && inuse) {
689			log_err("bind: address already in use");
690		} else if(s == -1 && noproto && hints->ai_family == AF_INET6){
691			*noip6 = 1;
692		}
693	} else	{
694		s = create_tcp_accept_sock(res, v6only, &noproto, reuseport);
695		if(s == -1 && noproto && hints->ai_family == AF_INET6){
696			*noip6 = 1;
697		}
698	}
699	freeaddrinfo(res);
700	return s;
701}
702
703/** make socket and first see if ifname contains port override info */
704static int
705make_sock_port(int stype, const char* ifname, const char* port,
706	struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
707	int* reuseport)
708{
709	char* s = strchr(ifname, '@');
710	if(s) {
711		/* override port with ifspec@port */
712		char p[16];
713		char newif[128];
714		if((size_t)(s-ifname) >= sizeof(newif)) {
715			log_err("ifname too long: %s", ifname);
716			*noip6 = 0;
717			return -1;
718		}
719		if(strlen(s+1) >= sizeof(p)) {
720			log_err("portnumber too long: %s", ifname);
721			*noip6 = 0;
722			return -1;
723		}
724		(void)strlcpy(newif, ifname, sizeof(newif));
725		newif[s-ifname] = 0;
726		(void)strlcpy(p, s+1, sizeof(p));
727		p[strlen(s+1)]=0;
728		return make_sock(stype, newif, p, hints, v6only, noip6,
729			rcv, snd, reuseport);
730	}
731	return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
732		reuseport);
733}
734
735/**
736 * Add port to open ports list.
737 * @param list: list head. changed.
738 * @param s: fd.
739 * @param ftype: if fd is UDP.
740 * @return false on failure. list in unchanged then.
741 */
742static int
743port_insert(struct listen_port** list, int s, enum listen_type ftype)
744{
745	struct listen_port* item = (struct listen_port*)malloc(
746		sizeof(struct listen_port));
747	if(!item)
748		return 0;
749	item->next = *list;
750	item->fd = s;
751	item->ftype = ftype;
752	*list = item;
753	return 1;
754}
755
756/** set fd to receive source address packet info */
757static int
758set_recvpktinfo(int s, int family)
759{
760#if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
761	int on = 1;
762#else
763	(void)s;
764#endif
765	if(family == AF_INET6) {
766#           ifdef IPV6_RECVPKTINFO
767		if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
768			(void*)&on, (socklen_t)sizeof(on)) < 0) {
769			log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
770				strerror(errno));
771			return 0;
772		}
773#           elif defined(IPV6_PKTINFO)
774		if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
775			(void*)&on, (socklen_t)sizeof(on)) < 0) {
776			log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
777				strerror(errno));
778			return 0;
779		}
780#           else
781		log_err("no IPV6_RECVPKTINFO and no IPV6_PKTINFO option, please "
782			"disable interface-automatic in config");
783		return 0;
784#           endif /* defined IPV6_RECVPKTINFO */
785
786	} else if(family == AF_INET) {
787#           ifdef IP_PKTINFO
788		if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
789			(void*)&on, (socklen_t)sizeof(on)) < 0) {
790			log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
791				strerror(errno));
792			return 0;
793		}
794#           elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
795		if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
796			(void*)&on, (socklen_t)sizeof(on)) < 0) {
797			log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
798				strerror(errno));
799			return 0;
800		}
801#           else
802		log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
803			"interface-automatic in config");
804		return 0;
805#           endif /* IP_PKTINFO */
806
807	}
808	return 1;
809}
810
811/**
812 * Helper for ports_open. Creates one interface (or NULL for default).
813 * @param ifname: The interface ip address.
814 * @param do_auto: use automatic interface detection.
815 * 	If enabled, then ifname must be the wildcard name.
816 * @param do_udp: if udp should be used.
817 * @param do_tcp: if udp should be used.
818 * @param hints: for getaddrinfo. family and flags have to be set by caller.
819 * @param port: Port number to use (as string).
820 * @param list: list of open ports, appended to, changed to point to list head.
821 * @param rcv: receive buffer size for UDP
822 * @param snd: send buffer size for UDP
823 * @param ssl_port: ssl service port number
824 * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
825 * 	set to false on exit if reuseport failed due to no kernel support.
826 * @return: returns false on error.
827 */
828static int
829ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
830	struct addrinfo *hints, const char* port, struct listen_port** list,
831	size_t rcv, size_t snd, int ssl_port, int* reuseport)
832{
833	int s, noip6=0;
834	if(!do_udp && !do_tcp)
835		return 0;
836	if(do_auto) {
837		if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
838			&noip6, rcv, snd, reuseport)) == -1) {
839			if(noip6) {
840				log_warn("IPv6 protocol not available");
841				return 1;
842			}
843			return 0;
844		}
845		/* getting source addr packet info is highly non-portable */
846		if(!set_recvpktinfo(s, hints->ai_family)) {
847#ifndef USE_WINSOCK
848			close(s);
849#else
850			closesocket(s);
851#endif
852			return 0;
853		}
854		if(!port_insert(list, s, listen_type_udpancil)) {
855#ifndef USE_WINSOCK
856			close(s);
857#else
858			closesocket(s);
859#endif
860			return 0;
861		}
862	} else if(do_udp) {
863		/* regular udp socket */
864		if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
865			&noip6, rcv, snd, reuseport)) == -1) {
866			if(noip6) {
867				log_warn("IPv6 protocol not available");
868				return 1;
869			}
870			return 0;
871		}
872		if(!port_insert(list, s, listen_type_udp)) {
873#ifndef USE_WINSOCK
874			close(s);
875#else
876			closesocket(s);
877#endif
878			return 0;
879		}
880	}
881	if(do_tcp) {
882		int is_ssl = ((strchr(ifname, '@') &&
883			atoi(strchr(ifname, '@')+1) == ssl_port) ||
884			(!strchr(ifname, '@') && atoi(port) == ssl_port));
885		if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1,
886			&noip6, 0, 0, reuseport)) == -1) {
887			if(noip6) {
888				/*log_warn("IPv6 protocol not available");*/
889				return 1;
890			}
891			return 0;
892		}
893		if(is_ssl)
894			verbose(VERB_ALGO, "setup TCP for SSL service");
895		if(!port_insert(list, s, is_ssl?listen_type_ssl:
896			listen_type_tcp)) {
897#ifndef USE_WINSOCK
898			close(s);
899#else
900			closesocket(s);
901#endif
902			return 0;
903		}
904	}
905	return 1;
906}
907
908/**
909 * Add items to commpoint list in front.
910 * @param c: commpoint to add.
911 * @param front: listen struct.
912 * @return: false on failure.
913 */
914static int
915listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
916{
917	struct listen_list* item = (struct listen_list*)malloc(
918		sizeof(struct listen_list));
919	if(!item)
920		return 0;
921	item->com = c;
922	item->next = front->cps;
923	front->cps = item;
924	return 1;
925}
926
927struct listen_dnsport*
928listen_create(struct comm_base* base, struct listen_port* ports,
929	size_t bufsize, int tcp_accept_count, void* sslctx,
930	struct dt_env* dtenv, comm_point_callback_t* cb, void *cb_arg)
931{
932	struct listen_dnsport* front = (struct listen_dnsport*)
933		malloc(sizeof(struct listen_dnsport));
934	if(!front)
935		return NULL;
936	front->cps = NULL;
937	front->udp_buff = sldns_buffer_new(bufsize);
938	if(!front->udp_buff) {
939		free(front);
940		return NULL;
941	}
942
943	/* create comm points as needed */
944	while(ports) {
945		struct comm_point* cp = NULL;
946		if(ports->ftype == listen_type_udp)
947			cp = comm_point_create_udp(base, ports->fd,
948				front->udp_buff, cb, cb_arg);
949		else if(ports->ftype == listen_type_tcp)
950			cp = comm_point_create_tcp(base, ports->fd,
951				tcp_accept_count, bufsize, cb, cb_arg);
952		else if(ports->ftype == listen_type_ssl) {
953			cp = comm_point_create_tcp(base, ports->fd,
954				tcp_accept_count, bufsize, cb, cb_arg);
955			cp->ssl = sslctx;
956		} else if(ports->ftype == listen_type_udpancil)
957			cp = comm_point_create_udp_ancil(base, ports->fd,
958				front->udp_buff, cb, cb_arg);
959		if(!cp) {
960			log_err("can't create commpoint");
961			listen_delete(front);
962			return NULL;
963		}
964		cp->dtenv = dtenv;
965		cp->do_not_close = 1;
966		if(!listen_cp_insert(cp, front)) {
967			log_err("malloc failed");
968			comm_point_delete(cp);
969			listen_delete(front);
970			return NULL;
971		}
972		ports = ports->next;
973	}
974	if(!front->cps) {
975		log_err("Could not open sockets to accept queries.");
976		listen_delete(front);
977		return NULL;
978	}
979
980	return front;
981}
982
983void
984listen_list_delete(struct listen_list* list)
985{
986	struct listen_list *p = list, *pn;
987	while(p) {
988		pn = p->next;
989		comm_point_delete(p->com);
990		free(p);
991		p = pn;
992	}
993}
994
995void
996listen_delete(struct listen_dnsport* front)
997{
998	if(!front)
999		return;
1000	listen_list_delete(front->cps);
1001	sldns_buffer_free(front->udp_buff);
1002	free(front);
1003}
1004
1005struct listen_port*
1006listening_ports_open(struct config_file* cfg, int* reuseport)
1007{
1008	struct listen_port* list = NULL;
1009	struct addrinfo hints;
1010	int i, do_ip4, do_ip6;
1011	int do_tcp, do_auto;
1012	char portbuf[32];
1013	snprintf(portbuf, sizeof(portbuf), "%d", cfg->port);
1014	do_ip4 = cfg->do_ip4;
1015	do_ip6 = cfg->do_ip6;
1016	do_tcp = cfg->do_tcp;
1017	do_auto = cfg->if_automatic && cfg->do_udp;
1018	if(cfg->incoming_num_tcp == 0)
1019		do_tcp = 0;
1020
1021	/* getaddrinfo */
1022	memset(&hints, 0, sizeof(hints));
1023	hints.ai_flags = AI_PASSIVE;
1024	/* no name lookups on our listening ports */
1025	if(cfg->num_ifs > 0)
1026		hints.ai_flags |= AI_NUMERICHOST;
1027	hints.ai_family = AF_UNSPEC;
1028#ifndef INET6
1029	do_ip6 = 0;
1030#endif
1031	if(!do_ip4 && !do_ip6) {
1032		return NULL;
1033	}
1034	/* create ip4 and ip6 ports so that return addresses are nice. */
1035	if(do_auto || cfg->num_ifs == 0) {
1036		if(do_ip6) {
1037			hints.ai_family = AF_INET6;
1038			if(!ports_create_if(do_auto?"::0":"::1",
1039				do_auto, cfg->do_udp, do_tcp,
1040				&hints, portbuf, &list,
1041				cfg->so_rcvbuf, cfg->so_sndbuf,
1042				cfg->ssl_port, reuseport)) {
1043				listening_ports_free(list);
1044				return NULL;
1045			}
1046		}
1047		if(do_ip4) {
1048			hints.ai_family = AF_INET;
1049			if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1",
1050				do_auto, cfg->do_udp, do_tcp,
1051				&hints, portbuf, &list,
1052				cfg->so_rcvbuf, cfg->so_sndbuf,
1053				cfg->ssl_port, reuseport)) {
1054				listening_ports_free(list);
1055				return NULL;
1056			}
1057		}
1058	} else for(i = 0; i<cfg->num_ifs; i++) {
1059		if(str_is_ip6(cfg->ifs[i])) {
1060			if(!do_ip6)
1061				continue;
1062			hints.ai_family = AF_INET6;
1063			if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp,
1064				do_tcp, &hints, portbuf, &list,
1065				cfg->so_rcvbuf, cfg->so_sndbuf,
1066				cfg->ssl_port, reuseport)) {
1067				listening_ports_free(list);
1068				return NULL;
1069			}
1070		} else {
1071			if(!do_ip4)
1072				continue;
1073			hints.ai_family = AF_INET;
1074			if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp,
1075				do_tcp, &hints, portbuf, &list,
1076				cfg->so_rcvbuf, cfg->so_sndbuf,
1077				cfg->ssl_port, reuseport)) {
1078				listening_ports_free(list);
1079				return NULL;
1080			}
1081		}
1082	}
1083	return list;
1084}
1085
1086void listening_ports_free(struct listen_port* list)
1087{
1088	struct listen_port* nx;
1089	while(list) {
1090		nx = list->next;
1091		if(list->fd != -1) {
1092#ifndef USE_WINSOCK
1093			close(list->fd);
1094#else
1095			closesocket(list->fd);
1096#endif
1097		}
1098		free(list);
1099		list = nx;
1100	}
1101}
1102
1103size_t listen_get_mem(struct listen_dnsport* listen)
1104{
1105	size_t s = sizeof(*listen) + sizeof(*listen->base) +
1106		sizeof(*listen->udp_buff) +
1107		sldns_buffer_capacity(listen->udp_buff);
1108	struct listen_list* p;
1109	for(p = listen->cps; p; p = p->next) {
1110		s += sizeof(*p);
1111		s += comm_point_get_mem(p->com);
1112	}
1113	return s;
1114}
1115
1116void listen_stop_accept(struct listen_dnsport* listen)
1117{
1118	/* do not stop the ones that have no tcp_free list
1119	 * (they have already stopped listening) */
1120	struct listen_list* p;
1121	for(p=listen->cps; p; p=p->next) {
1122		if(p->com->type == comm_tcp_accept &&
1123			p->com->tcp_free != NULL) {
1124			comm_point_stop_listening(p->com);
1125		}
1126	}
1127}
1128
1129void listen_start_accept(struct listen_dnsport* listen)
1130{
1131	/* do not start the ones that have no tcp_free list, it is no
1132	 * use to listen to them because they have no free tcp handlers */
1133	struct listen_list* p;
1134	for(p=listen->cps; p; p=p->next) {
1135		if(p->com->type == comm_tcp_accept &&
1136			p->com->tcp_free != NULL) {
1137			comm_point_start_listening(p->com, -1, -1);
1138		}
1139	}
1140}
1141
1142