listen_dnsport.c revision 269257
1/*
2 * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36/**
37 * \file
38 *
39 * This file has functions to get queries from clients.
40 */
41#include "config.h"
42#ifdef HAVE_SYS_TYPES_H
43#  include <sys/types.h>
44#endif
45#include <sys/time.h>
46#include "services/listen_dnsport.h"
47#include "services/outside_network.h"
48#include "util/netevent.h"
49#include "util/log.h"
50#include "util/config_file.h"
51#include "util/net_help.h"
52#include "ldns/sbuffer.h"
53
54#ifdef HAVE_NETDB_H
55#include <netdb.h>
56#endif
57#include <fcntl.h>
58
59/** number of queued TCP connections for listen() */
60#define TCP_BACKLOG 5
61
62/**
63 * Debug print of the getaddrinfo returned address.
64 * @param addr: the address returned.
65 */
66static void
67verbose_print_addr(struct addrinfo *addr)
68{
69	if(verbosity >= VERB_ALGO) {
70		char buf[100];
71		void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
72#ifdef INET6
73		if(addr->ai_family == AF_INET6)
74			sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
75				sin6_addr;
76#endif /* INET6 */
77		if(inet_ntop(addr->ai_family, sinaddr, buf,
78			(socklen_t)sizeof(buf)) == 0) {
79			(void)strlcpy(buf, "(null)", sizeof(buf));
80		}
81		buf[sizeof(buf)-1] = 0;
82		verbose(VERB_ALGO, "creating %s%s socket %s %d",
83			addr->ai_socktype==SOCK_DGRAM?"udp":
84			addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
85			addr->ai_family==AF_INET?"4":
86			addr->ai_family==AF_INET6?"6":
87			"_otherfam", buf,
88			ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
89	}
90}
91
92int
93create_udp_sock(int family, int socktype, struct sockaddr* addr,
94        socklen_t addrlen, int v6only, int* inuse, int* noproto,
95	int rcv, int snd, int listen, int* reuseport)
96{
97	int s;
98#if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU)
99	int on=1;
100#endif
101#ifdef IPV6_MTU
102	int mtu = IPV6_MIN_MTU;
103#endif
104#if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
105	(void)rcv;
106#endif
107#if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
108	(void)snd;
109#endif
110#ifndef IPV6_V6ONLY
111	(void)v6only;
112#endif
113	if((s = socket(family, socktype, 0)) == -1) {
114		*inuse = 0;
115#ifndef USE_WINSOCK
116		if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
117			*noproto = 1;
118			return -1;
119		}
120		log_err("can't create socket: %s", strerror(errno));
121#else
122		if(WSAGetLastError() == WSAEAFNOSUPPORT ||
123			WSAGetLastError() == WSAEPROTONOSUPPORT) {
124			*noproto = 1;
125			return -1;
126		}
127		log_err("can't create socket: %s",
128			wsa_strerror(WSAGetLastError()));
129#endif
130		*noproto = 0;
131		return -1;
132	}
133	if(listen) {
134#ifdef SO_REUSEADDR
135		if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
136			(socklen_t)sizeof(on)) < 0) {
137#ifndef USE_WINSOCK
138			log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
139				strerror(errno));
140			if(errno != ENOSYS) {
141				close(s);
142				*noproto = 0;
143				*inuse = 0;
144				return -1;
145			}
146#else
147			log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
148				wsa_strerror(WSAGetLastError()));
149			closesocket(s);
150			*noproto = 0;
151			*inuse = 0;
152			return -1;
153#endif
154		}
155#endif /* SO_REUSEADDR */
156#if defined(__linux__) && defined(SO_REUSEPORT)
157		/* Linux specific: try to set SO_REUSEPORT so that incoming
158		 * queries are distributed evenly among the receiving threads.
159		 * Each thread must have its own socket bound to the same port,
160		 * with SO_REUSEPORT set on each socket.
161		 */
162		if (reuseport && *reuseport &&
163		    setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
164			(socklen_t)sizeof(on)) < 0) {
165#ifdef ENOPROTOOPT
166			if(errno != ENOPROTOOPT || verbosity >= 3)
167				log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
168					strerror(errno));
169#endif
170			/* this option is not essential, we can continue */
171			*reuseport = 0;
172		}
173#else
174		(void)reuseport;
175#endif /* defined(__linux__) && defined(SO_REUSEPORT) */
176	}
177	if(rcv) {
178#ifdef SO_RCVBUF
179		int got;
180		socklen_t slen = (socklen_t)sizeof(got);
181#  ifdef SO_RCVBUFFORCE
182		/* Linux specific: try to use root permission to override
183		 * system limits on rcvbuf. The limit is stored in
184		 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
185		if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv,
186			(socklen_t)sizeof(rcv)) < 0) {
187			if(errno != EPERM) {
188#    ifndef USE_WINSOCK
189				log_err("setsockopt(..., SO_RCVBUFFORCE, "
190					"...) failed: %s", strerror(errno));
191				close(s);
192#    else
193				log_err("setsockopt(..., SO_RCVBUFFORCE, "
194					"...) failed: %s",
195					wsa_strerror(WSAGetLastError()));
196				closesocket(s);
197#    endif
198				*noproto = 0;
199				*inuse = 0;
200				return -1;
201			}
202#  endif /* SO_RCVBUFFORCE */
203			if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv,
204				(socklen_t)sizeof(rcv)) < 0) {
205#  ifndef USE_WINSOCK
206				log_err("setsockopt(..., SO_RCVBUF, "
207					"...) failed: %s", strerror(errno));
208				close(s);
209#  else
210				log_err("setsockopt(..., SO_RCVBUF, "
211					"...) failed: %s",
212					wsa_strerror(WSAGetLastError()));
213				closesocket(s);
214#  endif
215				*noproto = 0;
216				*inuse = 0;
217				return -1;
218			}
219			/* check if we got the right thing or if system
220			 * reduced to some system max.  Warn if so */
221			if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got,
222				&slen) >= 0 && got < rcv/2) {
223				log_warn("so-rcvbuf %u was not granted. "
224					"Got %u. To fix: start with "
225					"root permissions(linux) or sysctl "
226					"bigger net.core.rmem_max(linux) or "
227					"kern.ipc.maxsockbuf(bsd) values.",
228					(unsigned)rcv, (unsigned)got);
229			}
230#  ifdef SO_RCVBUFFORCE
231		}
232#  endif
233#endif /* SO_RCVBUF */
234	}
235	/* first do RCVBUF as the receive buffer is more important */
236	if(snd) {
237#ifdef SO_SNDBUF
238		int got;
239		socklen_t slen = (socklen_t)sizeof(got);
240#  ifdef SO_SNDBUFFORCE
241		/* Linux specific: try to use root permission to override
242		 * system limits on sndbuf. The limit is stored in
243		 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
244		if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd,
245			(socklen_t)sizeof(snd)) < 0) {
246			if(errno != EPERM) {
247#    ifndef USE_WINSOCK
248				log_err("setsockopt(..., SO_SNDBUFFORCE, "
249					"...) failed: %s", strerror(errno));
250				close(s);
251#    else
252				log_err("setsockopt(..., SO_SNDBUFFORCE, "
253					"...) failed: %s",
254					wsa_strerror(WSAGetLastError()));
255				closesocket(s);
256#    endif
257				*noproto = 0;
258				*inuse = 0;
259				return -1;
260			}
261#  endif /* SO_SNDBUFFORCE */
262			if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd,
263				(socklen_t)sizeof(snd)) < 0) {
264#  ifndef USE_WINSOCK
265				log_err("setsockopt(..., SO_SNDBUF, "
266					"...) failed: %s", strerror(errno));
267				close(s);
268#  else
269				log_err("setsockopt(..., SO_SNDBUF, "
270					"...) failed: %s",
271					wsa_strerror(WSAGetLastError()));
272				closesocket(s);
273#  endif
274				*noproto = 0;
275				*inuse = 0;
276				return -1;
277			}
278			/* check if we got the right thing or if system
279			 * reduced to some system max.  Warn if so */
280			if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got,
281				&slen) >= 0 && got < snd/2) {
282				log_warn("so-sndbuf %u was not granted. "
283					"Got %u. To fix: start with "
284					"root permissions(linux) or sysctl "
285					"bigger net.core.wmem_max(linux) or "
286					"kern.ipc.maxsockbuf(bsd) values.",
287					(unsigned)snd, (unsigned)got);
288			}
289#  ifdef SO_SNDBUFFORCE
290		}
291#  endif
292#endif /* SO_SNDBUF */
293	}
294	if(family == AF_INET6) {
295# if defined(IPV6_V6ONLY)
296		if(v6only) {
297			int val=(v6only==2)?0:1;
298			if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
299				(void*)&val, (socklen_t)sizeof(val)) < 0) {
300#ifndef USE_WINSOCK
301				log_err("setsockopt(..., IPV6_V6ONLY"
302					", ...) failed: %s", strerror(errno));
303				close(s);
304#else
305				log_err("setsockopt(..., IPV6_V6ONLY"
306					", ...) failed: %s",
307					wsa_strerror(WSAGetLastError()));
308				closesocket(s);
309#endif
310				*noproto = 0;
311				*inuse = 0;
312				return -1;
313			}
314		}
315# endif
316# if defined(IPV6_USE_MIN_MTU)
317		/*
318		 * There is no fragmentation of IPv6 datagrams
319		 * during forwarding in the network. Therefore
320		 * we do not send UDP datagrams larger than
321		 * the minimum IPv6 MTU of 1280 octets. The
322		 * EDNS0 message length can be larger if the
323		 * network stack supports IPV6_USE_MIN_MTU.
324		 */
325		if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
326			(void*)&on, (socklen_t)sizeof(on)) < 0) {
327#  ifndef USE_WINSOCK
328			log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
329				"...) failed: %s", strerror(errno));
330			close(s);
331#  else
332			log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
333				"...) failed: %s",
334				wsa_strerror(WSAGetLastError()));
335			closesocket(s);
336#  endif
337			*noproto = 0;
338			*inuse = 0;
339			return -1;
340		}
341# elif defined(IPV6_MTU)
342		/*
343		 * On Linux, to send no larger than 1280, the PMTUD is
344		 * disabled by default for datagrams anyway, so we set
345		 * the MTU to use.
346		 */
347		if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
348			(void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
349#  ifndef USE_WINSOCK
350			log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
351				strerror(errno));
352			close(s);
353#  else
354			log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
355				wsa_strerror(WSAGetLastError()));
356			closesocket(s);
357#  endif
358			*noproto = 0;
359			*inuse = 0;
360			return -1;
361		}
362# endif /* IPv6 MTU */
363	} else if(family == AF_INET) {
364#  if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
365		int action = IP_PMTUDISC_DONT;
366		if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
367			&action, (socklen_t)sizeof(action)) < 0) {
368			log_err("setsockopt(..., IP_MTU_DISCOVER, "
369				"IP_PMTUDISC_DONT...) failed: %s",
370				strerror(errno));
371#    ifndef USE_WINSOCK
372			close(s);
373#    else
374			closesocket(s);
375#    endif
376			*noproto = 0;
377			*inuse = 0;
378			return -1;
379		}
380#  elif defined(IP_DONTFRAG)
381		int off = 0;
382		if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG,
383			&off, (socklen_t)sizeof(off)) < 0) {
384			log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
385				strerror(errno));
386#    ifndef USE_WINSOCK
387			close(s);
388#    else
389			closesocket(s);
390#    endif
391			*noproto = 0;
392			*inuse = 0;
393			return -1;
394		}
395#  endif /* IPv4 MTU */
396	}
397	if(bind(s, (struct sockaddr*)addr, addrlen) != 0) {
398		*noproto = 0;
399		*inuse = 0;
400#ifndef USE_WINSOCK
401#ifdef EADDRINUSE
402		*inuse = (errno == EADDRINUSE);
403		/* detect freebsd jail with no ipv6 permission */
404		if(family==AF_INET6 && errno==EINVAL)
405			*noproto = 1;
406		else if(errno != EADDRINUSE) {
407			log_err("can't bind socket: %s", strerror(errno));
408			log_addr(0, "failed address",
409				(struct sockaddr_storage*)addr, addrlen);
410		}
411#endif /* EADDRINUSE */
412		close(s);
413#else /* USE_WINSOCK */
414		if(WSAGetLastError() != WSAEADDRINUSE &&
415			WSAGetLastError() != WSAEADDRNOTAVAIL) {
416			log_err("can't bind socket: %s",
417				wsa_strerror(WSAGetLastError()));
418			log_addr(0, "failed address",
419				(struct sockaddr_storage*)addr, addrlen);
420		}
421		closesocket(s);
422#endif
423		return -1;
424	}
425	if(!fd_set_nonblock(s)) {
426		*noproto = 0;
427		*inuse = 0;
428#ifndef USE_WINSOCK
429		close(s);
430#else
431		closesocket(s);
432#endif
433		return -1;
434	}
435	return s;
436}
437
438int
439create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
440	int* reuseport)
441{
442	int s;
443#if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY)
444	int on = 1;
445#endif /* SO_REUSEADDR || IPV6_V6ONLY */
446	verbose_print_addr(addr);
447	*noproto = 0;
448	if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
449#ifndef USE_WINSOCK
450		if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
451			*noproto = 1;
452			return -1;
453		}
454		log_err("can't create socket: %s", strerror(errno));
455#else
456		if(WSAGetLastError() == WSAEAFNOSUPPORT ||
457			WSAGetLastError() == WSAEPROTONOSUPPORT) {
458			*noproto = 1;
459			return -1;
460		}
461		log_err("can't create socket: %s",
462			wsa_strerror(WSAGetLastError()));
463#endif
464		return -1;
465	}
466#ifdef SO_REUSEADDR
467	if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
468		(socklen_t)sizeof(on)) < 0) {
469#ifndef USE_WINSOCK
470		log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
471			strerror(errno));
472		close(s);
473#else
474		log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
475			wsa_strerror(WSAGetLastError()));
476		closesocket(s);
477#endif
478		return -1;
479	}
480#endif /* SO_REUSEADDR */
481#if defined(__linux__) && defined(SO_REUSEPORT)
482	/* Linux specific: try to set SO_REUSEPORT so that incoming
483	 * connections are distributed evenly among the receiving threads.
484	 * Each thread must have its own socket bound to the same port,
485	 * with SO_REUSEPORT set on each socket.
486	 */
487	if (reuseport && *reuseport &&
488		setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
489		(socklen_t)sizeof(on)) < 0) {
490#ifdef ENOPROTOOPT
491		if(errno != ENOPROTOOPT || verbosity >= 3)
492			log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
493				strerror(errno));
494#endif
495		/* this option is not essential, we can continue */
496		*reuseport = 0;
497	}
498#else
499	(void)reuseport;
500#endif /* defined(__linux__) && defined(SO_REUSEPORT) */
501#if defined(IPV6_V6ONLY)
502	if(addr->ai_family == AF_INET6 && v6only) {
503		if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
504			(void*)&on, (socklen_t)sizeof(on)) < 0) {
505#ifndef USE_WINSOCK
506			log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
507				strerror(errno));
508			close(s);
509#else
510			log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
511				wsa_strerror(WSAGetLastError()));
512			closesocket(s);
513#endif
514			return -1;
515		}
516	}
517#else
518	(void)v6only;
519#endif /* IPV6_V6ONLY */
520	if(bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
521#ifndef USE_WINSOCK
522		/* detect freebsd jail with no ipv6 permission */
523		if(addr->ai_family==AF_INET6 && errno==EINVAL)
524			*noproto = 1;
525		else {
526			log_err("can't bind socket: %s", strerror(errno));
527			log_addr(0, "failed address",
528				(struct sockaddr_storage*)addr->ai_addr,
529				addr->ai_addrlen);
530		}
531		close(s);
532#else
533		log_err("can't bind socket: %s",
534			wsa_strerror(WSAGetLastError()));
535		log_addr(0, "failed address",
536			(struct sockaddr_storage*)addr->ai_addr,
537			addr->ai_addrlen);
538		closesocket(s);
539#endif
540		return -1;
541	}
542	if(!fd_set_nonblock(s)) {
543#ifndef USE_WINSOCK
544		close(s);
545#else
546		closesocket(s);
547#endif
548		return -1;
549	}
550	if(listen(s, TCP_BACKLOG) == -1) {
551#ifndef USE_WINSOCK
552		log_err("can't listen: %s", strerror(errno));
553		close(s);
554#else
555		log_err("can't listen: %s", wsa_strerror(WSAGetLastError()));
556		closesocket(s);
557#endif
558		return -1;
559	}
560	return s;
561}
562
563/**
564 * Create socket from getaddrinfo results
565 */
566static int
567make_sock(int stype, const char* ifname, const char* port,
568	struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
569	int* reuseport)
570{
571	struct addrinfo *res = NULL;
572	int r, s, inuse, noproto;
573	hints->ai_socktype = stype;
574	*noip6 = 0;
575	if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) {
576#ifdef USE_WINSOCK
577		if(r == EAI_NONAME && hints->ai_family == AF_INET6){
578			*noip6 = 1; /* 'Host not found' for IP6 on winXP */
579			return -1;
580		}
581#endif
582		log_err("node %s:%s getaddrinfo: %s %s",
583			ifname?ifname:"default", port, gai_strerror(r),
584#ifdef EAI_SYSTEM
585			r==EAI_SYSTEM?(char*)strerror(errno):""
586#else
587			""
588#endif
589		);
590		return -1;
591	}
592	if(stype == SOCK_DGRAM) {
593		verbose_print_addr(res);
594		s = create_udp_sock(res->ai_family, res->ai_socktype,
595			(struct sockaddr*)res->ai_addr, res->ai_addrlen,
596			v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
597			reuseport);
598		if(s == -1 && inuse) {
599			log_err("bind: address already in use");
600		} else if(s == -1 && noproto && hints->ai_family == AF_INET6){
601			*noip6 = 1;
602		}
603	} else	{
604		s = create_tcp_accept_sock(res, v6only, &noproto, reuseport);
605		if(s == -1 && noproto && hints->ai_family == AF_INET6){
606			*noip6 = 1;
607		}
608	}
609	freeaddrinfo(res);
610	return s;
611}
612
613/** make socket and first see if ifname contains port override info */
614static int
615make_sock_port(int stype, const char* ifname, const char* port,
616	struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
617	int* reuseport)
618{
619	char* s = strchr(ifname, '@');
620	if(s) {
621		/* override port with ifspec@port */
622		char p[16];
623		char newif[128];
624		if((size_t)(s-ifname) >= sizeof(newif)) {
625			log_err("ifname too long: %s", ifname);
626			*noip6 = 0;
627			return -1;
628		}
629		if(strlen(s+1) >= sizeof(p)) {
630			log_err("portnumber too long: %s", ifname);
631			*noip6 = 0;
632			return -1;
633		}
634		(void)strlcpy(newif, ifname, sizeof(newif));
635		newif[s-ifname] = 0;
636		(void)strlcpy(p, s+1, sizeof(p));
637		p[strlen(s+1)]=0;
638		return make_sock(stype, newif, p, hints, v6only, noip6,
639			rcv, snd, reuseport);
640	}
641	return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
642		reuseport);
643}
644
645/**
646 * Add port to open ports list.
647 * @param list: list head. changed.
648 * @param s: fd.
649 * @param ftype: if fd is UDP.
650 * @return false on failure. list in unchanged then.
651 */
652static int
653port_insert(struct listen_port** list, int s, enum listen_type ftype)
654{
655	struct listen_port* item = (struct listen_port*)malloc(
656		sizeof(struct listen_port));
657	if(!item)
658		return 0;
659	item->next = *list;
660	item->fd = s;
661	item->ftype = ftype;
662	*list = item;
663	return 1;
664}
665
666/** set fd to receive source address packet info */
667static int
668set_recvpktinfo(int s, int family)
669{
670#if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
671	int on = 1;
672#else
673	(void)s;
674#endif
675	if(family == AF_INET6) {
676#           ifdef IPV6_RECVPKTINFO
677		if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
678			(void*)&on, (socklen_t)sizeof(on)) < 0) {
679			log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
680				strerror(errno));
681			return 0;
682		}
683#           elif defined(IPV6_PKTINFO)
684		if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
685			(void*)&on, (socklen_t)sizeof(on)) < 0) {
686			log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
687				strerror(errno));
688			return 0;
689		}
690#           else
691		log_err("no IPV6_RECVPKTINFO and no IPV6_PKTINFO option, please "
692			"disable interface-automatic in config");
693		return 0;
694#           endif /* defined IPV6_RECVPKTINFO */
695
696	} else if(family == AF_INET) {
697#           ifdef IP_PKTINFO
698		if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
699			(void*)&on, (socklen_t)sizeof(on)) < 0) {
700			log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
701				strerror(errno));
702			return 0;
703		}
704#           elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
705		if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
706			(void*)&on, (socklen_t)sizeof(on)) < 0) {
707			log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
708				strerror(errno));
709			return 0;
710		}
711#           else
712		log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
713			"interface-automatic in config");
714		return 0;
715#           endif /* IP_PKTINFO */
716
717	}
718	return 1;
719}
720
721/**
722 * Helper for ports_open. Creates one interface (or NULL for default).
723 * @param ifname: The interface ip address.
724 * @param do_auto: use automatic interface detection.
725 * 	If enabled, then ifname must be the wildcard name.
726 * @param do_udp: if udp should be used.
727 * @param do_tcp: if udp should be used.
728 * @param hints: for getaddrinfo. family and flags have to be set by caller.
729 * @param port: Port number to use (as string).
730 * @param list: list of open ports, appended to, changed to point to list head.
731 * @param rcv: receive buffer size for UDP
732 * @param snd: send buffer size for UDP
733 * @param ssl_port: ssl service port number
734 * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
735 * 	set to false on exit if reuseport failed due to no kernel support.
736 * @return: returns false on error.
737 */
738static int
739ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
740	struct addrinfo *hints, const char* port, struct listen_port** list,
741	size_t rcv, size_t snd, int ssl_port, int* reuseport)
742{
743	int s, noip6=0;
744	if(!do_udp && !do_tcp)
745		return 0;
746	if(do_auto) {
747		if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
748			&noip6, rcv, snd, reuseport)) == -1) {
749			if(noip6) {
750				log_warn("IPv6 protocol not available");
751				return 1;
752			}
753			return 0;
754		}
755		/* getting source addr packet info is highly non-portable */
756		if(!set_recvpktinfo(s, hints->ai_family)) {
757#ifndef USE_WINSOCK
758			close(s);
759#else
760			closesocket(s);
761#endif
762			return 0;
763		}
764		if(!port_insert(list, s, listen_type_udpancil)) {
765#ifndef USE_WINSOCK
766			close(s);
767#else
768			closesocket(s);
769#endif
770			return 0;
771		}
772	} else if(do_udp) {
773		/* regular udp socket */
774		if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
775			&noip6, rcv, snd, reuseport)) == -1) {
776			if(noip6) {
777				log_warn("IPv6 protocol not available");
778				return 1;
779			}
780			return 0;
781		}
782		if(!port_insert(list, s, listen_type_udp)) {
783#ifndef USE_WINSOCK
784			close(s);
785#else
786			closesocket(s);
787#endif
788			return 0;
789		}
790	}
791	if(do_tcp) {
792		int is_ssl = ((strchr(ifname, '@') &&
793			atoi(strchr(ifname, '@')+1) == ssl_port) ||
794			(!strchr(ifname, '@') && atoi(port) == ssl_port));
795		if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1,
796			&noip6, 0, 0, reuseport)) == -1) {
797			if(noip6) {
798				/*log_warn("IPv6 protocol not available");*/
799				return 1;
800			}
801			return 0;
802		}
803		if(is_ssl)
804			verbose(VERB_ALGO, "setup TCP for SSL service");
805		if(!port_insert(list, s, is_ssl?listen_type_ssl:
806			listen_type_tcp)) {
807#ifndef USE_WINSOCK
808			close(s);
809#else
810			closesocket(s);
811#endif
812			return 0;
813		}
814	}
815	return 1;
816}
817
818/**
819 * Add items to commpoint list in front.
820 * @param c: commpoint to add.
821 * @param front: listen struct.
822 * @return: false on failure.
823 */
824static int
825listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
826{
827	struct listen_list* item = (struct listen_list*)malloc(
828		sizeof(struct listen_list));
829	if(!item)
830		return 0;
831	item->com = c;
832	item->next = front->cps;
833	front->cps = item;
834	return 1;
835}
836
837struct listen_dnsport*
838listen_create(struct comm_base* base, struct listen_port* ports,
839	size_t bufsize, int tcp_accept_count, void* sslctx,
840	comm_point_callback_t* cb, void *cb_arg)
841{
842	struct listen_dnsport* front = (struct listen_dnsport*)
843		malloc(sizeof(struct listen_dnsport));
844	if(!front)
845		return NULL;
846	front->cps = NULL;
847	front->udp_buff = sldns_buffer_new(bufsize);
848	if(!front->udp_buff) {
849		free(front);
850		return NULL;
851	}
852
853	/* create comm points as needed */
854	while(ports) {
855		struct comm_point* cp = NULL;
856		if(ports->ftype == listen_type_udp)
857			cp = comm_point_create_udp(base, ports->fd,
858				front->udp_buff, cb, cb_arg);
859		else if(ports->ftype == listen_type_tcp)
860			cp = comm_point_create_tcp(base, ports->fd,
861				tcp_accept_count, bufsize, cb, cb_arg);
862		else if(ports->ftype == listen_type_ssl) {
863			cp = comm_point_create_tcp(base, ports->fd,
864				tcp_accept_count, bufsize, cb, cb_arg);
865			cp->ssl = sslctx;
866		} else if(ports->ftype == listen_type_udpancil)
867			cp = comm_point_create_udp_ancil(base, ports->fd,
868				front->udp_buff, cb, cb_arg);
869		if(!cp) {
870			log_err("can't create commpoint");
871			listen_delete(front);
872			return NULL;
873		}
874		cp->do_not_close = 1;
875		if(!listen_cp_insert(cp, front)) {
876			log_err("malloc failed");
877			comm_point_delete(cp);
878			listen_delete(front);
879			return NULL;
880		}
881		ports = ports->next;
882	}
883	if(!front->cps) {
884		log_err("Could not open sockets to accept queries.");
885		listen_delete(front);
886		return NULL;
887	}
888
889	return front;
890}
891
892void
893listen_list_delete(struct listen_list* list)
894{
895	struct listen_list *p = list, *pn;
896	while(p) {
897		pn = p->next;
898		comm_point_delete(p->com);
899		free(p);
900		p = pn;
901	}
902}
903
904void
905listen_delete(struct listen_dnsport* front)
906{
907	if(!front)
908		return;
909	listen_list_delete(front->cps);
910	sldns_buffer_free(front->udp_buff);
911	free(front);
912}
913
914struct listen_port*
915listening_ports_open(struct config_file* cfg, int* reuseport)
916{
917	struct listen_port* list = NULL;
918	struct addrinfo hints;
919	int i, do_ip4, do_ip6;
920	int do_tcp, do_auto;
921	char portbuf[32];
922	snprintf(portbuf, sizeof(portbuf), "%d", cfg->port);
923	do_ip4 = cfg->do_ip4;
924	do_ip6 = cfg->do_ip6;
925	do_tcp = cfg->do_tcp;
926	do_auto = cfg->if_automatic && cfg->do_udp;
927	if(cfg->incoming_num_tcp == 0)
928		do_tcp = 0;
929
930	/* getaddrinfo */
931	memset(&hints, 0, sizeof(hints));
932	hints.ai_flags = AI_PASSIVE;
933	/* no name lookups on our listening ports */
934	if(cfg->num_ifs > 0)
935		hints.ai_flags |= AI_NUMERICHOST;
936	hints.ai_family = AF_UNSPEC;
937#ifndef INET6
938	do_ip6 = 0;
939#endif
940	if(!do_ip4 && !do_ip6) {
941		return NULL;
942	}
943	/* create ip4 and ip6 ports so that return addresses are nice. */
944	if(do_auto || cfg->num_ifs == 0) {
945		if(do_ip6) {
946			hints.ai_family = AF_INET6;
947			if(!ports_create_if(do_auto?"::0":"::1",
948				do_auto, cfg->do_udp, do_tcp,
949				&hints, portbuf, &list,
950				cfg->so_rcvbuf, cfg->so_sndbuf,
951				cfg->ssl_port, reuseport)) {
952				listening_ports_free(list);
953				return NULL;
954			}
955		}
956		if(do_ip4) {
957			hints.ai_family = AF_INET;
958			if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1",
959				do_auto, cfg->do_udp, do_tcp,
960				&hints, portbuf, &list,
961				cfg->so_rcvbuf, cfg->so_sndbuf,
962				cfg->ssl_port, reuseport)) {
963				listening_ports_free(list);
964				return NULL;
965			}
966		}
967	} else for(i = 0; i<cfg->num_ifs; i++) {
968		if(str_is_ip6(cfg->ifs[i])) {
969			if(!do_ip6)
970				continue;
971			hints.ai_family = AF_INET6;
972			if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp,
973				do_tcp, &hints, portbuf, &list,
974				cfg->so_rcvbuf, cfg->so_sndbuf,
975				cfg->ssl_port, reuseport)) {
976				listening_ports_free(list);
977				return NULL;
978			}
979		} else {
980			if(!do_ip4)
981				continue;
982			hints.ai_family = AF_INET;
983			if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp,
984				do_tcp, &hints, portbuf, &list,
985				cfg->so_rcvbuf, cfg->so_sndbuf,
986				cfg->ssl_port, reuseport)) {
987				listening_ports_free(list);
988				return NULL;
989			}
990		}
991	}
992	return list;
993}
994
995void listening_ports_free(struct listen_port* list)
996{
997	struct listen_port* nx;
998	while(list) {
999		nx = list->next;
1000		if(list->fd != -1) {
1001#ifndef USE_WINSOCK
1002			close(list->fd);
1003#else
1004			closesocket(list->fd);
1005#endif
1006		}
1007		free(list);
1008		list = nx;
1009	}
1010}
1011
1012size_t listen_get_mem(struct listen_dnsport* listen)
1013{
1014	size_t s = sizeof(*listen) + sizeof(*listen->base) +
1015		sizeof(*listen->udp_buff) +
1016		sldns_buffer_capacity(listen->udp_buff);
1017	struct listen_list* p;
1018	for(p = listen->cps; p; p = p->next) {
1019		s += sizeof(*p);
1020		s += comm_point_get_mem(p->com);
1021	}
1022	return s;
1023}
1024
1025void listen_stop_accept(struct listen_dnsport* listen)
1026{
1027	/* do not stop the ones that have no tcp_free list
1028	 * (they have already stopped listening) */
1029	struct listen_list* p;
1030	for(p=listen->cps; p; p=p->next) {
1031		if(p->com->type == comm_tcp_accept &&
1032			p->com->tcp_free != NULL) {
1033			comm_point_stop_listening(p->com);
1034		}
1035	}
1036}
1037
1038void listen_start_accept(struct listen_dnsport* listen)
1039{
1040	/* do not start the ones that have no tcp_free list, it is no
1041	 * use to listen to them because they have no free tcp handlers */
1042	struct listen_list* p;
1043	for(p=listen->cps; p; p=p->next) {
1044		if(p->com->type == comm_tcp_accept &&
1045			p->com->tcp_free != NULL) {
1046			comm_point_start_listening(p->com, -1, -1);
1047		}
1048	}
1049}
1050
1051