ntp_intres.c revision 310419
1/*
2 * ntp_intres.c - Implements a generic blocking worker child or thread,
3 *		  initially to provide a nonblocking solution for DNS
4 *		  name to address lookups available with getaddrinfo().
5 *
6 * This is a new implementation as of 2009 sharing the filename and
7 * very little else with the prior implementation, which used a
8 * temporary file to receive a single set of requests from the parent,
9 * and a NTP mode 7 authenticated request to push back responses.
10 *
11 * A primary goal in rewriting this code was the need to support the
12 * pool configuration directive's requirement to retrieve multiple
13 * addresses resolving a single name, which has previously been
14 * satisfied with blocking resolver calls from the ntpd mainline code.
15 *
16 * A secondary goal is to provide a generic mechanism for other
17 * blocking operations to be delegated to a worker using a common
18 * model for both Unix and Windows ntpd.  ntp_worker.c, work_fork.c,
19 * and work_thread.c implement the generic mechanism.  This file
20 * implements the two current consumers, getaddrinfo_sometime() and the
21 * presently unused getnameinfo_sometime().
22 *
23 * Both routines deliver results to a callback and manage memory
24 * allocation, meaning there is no freeaddrinfo_sometime().
25 *
26 * The initial implementation for Unix uses a pair of unidirectional
27 * pipes, one each for requests and responses, connecting the forked
28 * blocking child worker with the ntpd mainline.  The threaded code
29 * uses arrays of pointers to queue requests and responses.
30 *
31 * The parent drives the process, including scheduling sleeps between
32 * retries.
33 *
34 * Memory is managed differently for a child process, which mallocs
35 * request buffers to read from the pipe into, whereas the threaded
36 * code mallocs a copy of the request to hand off to the worker via
37 * the queueing array.  The resulting request buffer is free()d by
38 * platform-independent code.  A wrinkle is the request needs to be
39 * available to the requestor during response processing.
40 *
41 * Response memory allocation is also platform-dependent.  With a
42 * separate process and pipes, the response is free()d after being
43 * written to the pipe.  With threads, the same memory is handed
44 * over and the requestor frees it after processing is completed.
45 *
46 * The code should be generalized to support threads on Unix using
47 * much of the same code used for Windows initially.
48 *
49 */
50#ifdef HAVE_CONFIG_H
51# include <config.h>
52#endif
53
54#include "ntp_workimpl.h"
55
56#ifdef WORKER
57
58#include <stdio.h>
59#include <ctype.h>
60#include <signal.h>
61
62/**/
63#ifdef HAVE_SYS_TYPES_H
64# include <sys/types.h>
65#endif
66#ifdef HAVE_NETINET_IN_H
67#include <netinet/in.h>
68#endif
69#include <arpa/inet.h>
70/**/
71#ifdef HAVE_SYS_PARAM_H
72# include <sys/param.h>
73#endif
74
75#if !defined(HAVE_RES_INIT) && defined(HAVE___RES_INIT)
76# define HAVE_RES_INIT
77#endif
78
79#if defined(HAVE_RESOLV_H) && defined(HAVE_RES_INIT)
80# ifdef HAVE_ARPA_NAMESER_H
81#  include <arpa/nameser.h> /* DNS HEADER struct */
82# endif
83# ifdef HAVE_NETDB_H
84#  include <netdb.h>
85# endif
86# include <resolv.h>
87# ifdef HAVE_INT32_ONLY_WITH_DNS
88#  define HAVE_INT32
89# endif
90# ifdef HAVE_U_INT32_ONLY_WITH_DNS
91#  define HAVE_U_INT32
92# endif
93#endif
94
95#include "ntp.h"
96#include "ntp_debug.h"
97#include "ntp_malloc.h"
98#include "ntp_syslog.h"
99#include "ntp_unixtime.h"
100#include "ntp_intres.h"
101#include "intreswork.h"
102
103
104/*
105 * Following are implementations of getaddrinfo_sometime() and
106 * getnameinfo_sometime().  Each is implemented in three routines:
107 *
108 * getaddrinfo_sometime()		getnameinfo_sometime()
109 * blocking_getaddrinfo()		blocking_getnameinfo()
110 * getaddrinfo_sometime_complete()	getnameinfo_sometime_complete()
111 *
112 * The first runs in the parent and marshalls (or serializes) request
113 * parameters into a request blob which is processed in the child by
114 * the second routine, blocking_*(), which serializes the results into
115 * a response blob unpacked by the third routine, *_complete(), which
116 * calls the callback routine provided with the request and frees
117 * _request_ memory allocated by the first routine.  Response memory
118 * is managed by the code which calls the *_complete routines.
119 */
120
121
122/* === typedefs === */
123typedef struct blocking_gai_req_tag {	/* marshalled args */
124	size_t			octets;
125	u_int			dns_idx;
126	time_t			scheduled;
127	time_t			earliest;
128	int			retry;
129	struct addrinfo		hints;
130	u_int			qflags;
131	gai_sometime_callback	callback;
132	void *			context;
133	size_t			nodesize;
134	size_t			servsize;
135} blocking_gai_req;
136
137typedef struct blocking_gai_resp_tag {
138	size_t			octets;
139	int			retcode;
140	int			retry;
141	int			gai_errno; /* for EAI_SYSTEM case */
142	int			ai_count;
143	/*
144	 * Followed by ai_count struct addrinfo and then ai_count
145	 * sockaddr_u and finally the canonical name strings.
146	 */
147} blocking_gai_resp;
148
149typedef struct blocking_gni_req_tag {
150	size_t			octets;
151	u_int			dns_idx;
152	time_t			scheduled;
153	time_t			earliest;
154	int			retry;
155	size_t			hostoctets;
156	size_t			servoctets;
157	int			flags;
158	gni_sometime_callback	callback;
159	void *			context;
160	sockaddr_u		socku;
161} blocking_gni_req;
162
163typedef struct blocking_gni_resp_tag {
164	size_t			octets;
165	int			retcode;
166	int			gni_errno; /* for EAI_SYSTEM case */
167	int			retry;
168	size_t			hostoctets;
169	size_t			servoctets;
170	/*
171	 * Followed by hostoctets bytes of null-terminated host,
172	 * then servoctets bytes of null-terminated service.
173	 */
174} blocking_gni_resp;
175
176/* per-DNS-worker state in parent */
177typedef struct dnschild_ctx_tag {
178	u_int	index;
179	time_t	next_dns_timeslot;
180} dnschild_ctx;
181
182/* per-DNS-worker state in worker */
183typedef struct dnsworker_ctx_tag {
184	blocking_child *	c;
185	time_t			ignore_scheduled_before;
186#ifdef HAVE_RES_INIT
187	time_t	next_res_init;
188#endif
189} dnsworker_ctx;
190
191
192/* === variables === */
193dnschild_ctx **		dnschild_contexts;		/* parent */
194u_int			dnschild_contexts_alloc;
195dnsworker_ctx **	dnsworker_contexts;		/* child */
196u_int			dnsworker_contexts_alloc;
197
198#ifdef HAVE_RES_INIT
199static	time_t		next_res_init;
200#endif
201
202
203/* === forward declarations === */
204static	u_int		reserve_dnschild_ctx(void);
205static	u_int		get_dnschild_ctx(void);
206static	dnsworker_ctx *	get_worker_context(blocking_child *, u_int);
207static	void		scheduled_sleep(time_t, time_t,
208					dnsworker_ctx *);
209static	void		manage_dns_retry_interval(time_t *, time_t *,
210						  int *, time_t *,
211						  int/*BOOL*/);
212static	int		should_retry_dns(int, int);
213#ifdef HAVE_RES_INIT
214static	void		reload_resolv_conf(dnsworker_ctx *);
215#else
216# define		reload_resolv_conf(wc)		\
217	do {						\
218		(void)(wc);				\
219	} while (FALSE)
220#endif
221static	void		getaddrinfo_sometime_complete(blocking_work_req,
222						      void *, size_t,
223						      void *);
224static	void		getnameinfo_sometime_complete(blocking_work_req,
225						      void *, size_t,
226						      void *);
227
228
229/* === functions === */
230/*
231 * getaddrinfo_sometime - uses blocking child to call getaddrinfo then
232 *			  invokes provided callback completion function.
233 */
234int
235getaddrinfo_sometime_ex(
236	const char *		node,
237	const char *		service,
238	const struct addrinfo *	hints,
239	int			retry,
240	gai_sometime_callback	callback,
241	void *			context,
242	u_int			qflags
243	)
244{
245	blocking_gai_req *	gai_req;
246	u_int			idx;
247	dnschild_ctx *		child_ctx;
248	size_t			req_size;
249	size_t			nodesize;
250	size_t			servsize;
251	time_t			now;
252
253	REQUIRE(NULL != node);
254	if (NULL != hints) {
255		REQUIRE(0 == hints->ai_addrlen);
256		REQUIRE(NULL == hints->ai_addr);
257		REQUIRE(NULL == hints->ai_canonname);
258		REQUIRE(NULL == hints->ai_next);
259	}
260
261	idx = get_dnschild_ctx();
262	child_ctx = dnschild_contexts[idx];
263
264	nodesize = strlen(node) + 1;
265	servsize = strlen(service) + 1;
266	req_size = sizeof(*gai_req) + nodesize + servsize;
267
268	gai_req = emalloc_zero(req_size);
269
270	gai_req->octets = req_size;
271	gai_req->dns_idx = idx;
272	now = time(NULL);
273	gai_req->scheduled = now;
274	gai_req->earliest = max(now, child_ctx->next_dns_timeslot);
275	child_ctx->next_dns_timeslot = gai_req->earliest;
276	if (hints != NULL)
277		gai_req->hints = *hints;
278	gai_req->retry = retry;
279	gai_req->callback = callback;
280	gai_req->context = context;
281	gai_req->nodesize = nodesize;
282	gai_req->servsize = servsize;
283	gai_req->qflags = qflags;
284
285	memcpy((char *)gai_req + sizeof(*gai_req), node, nodesize);
286	memcpy((char *)gai_req + sizeof(*gai_req) + nodesize, service,
287	       servsize);
288
289	if (queue_blocking_request(
290		BLOCKING_GETADDRINFO,
291		gai_req,
292		req_size,
293		&getaddrinfo_sometime_complete,
294		gai_req)) {
295
296		msyslog(LOG_ERR, "unable to queue getaddrinfo request");
297		errno = EFAULT;
298		return -1;
299	}
300
301	return 0;
302}
303
304int
305blocking_getaddrinfo(
306	blocking_child *	c,
307	blocking_pipe_header *	req
308	)
309{
310	blocking_gai_req *	gai_req;
311	dnsworker_ctx *		worker_ctx;
312	blocking_pipe_header *	resp;
313	blocking_gai_resp *	gai_resp;
314	char *			node;
315	char *			service;
316	struct addrinfo *	ai_res;
317	struct addrinfo *	ai;
318	struct addrinfo *	serialized_ai;
319	size_t			canons_octets;
320	size_t			this_octets;
321	size_t			resp_octets;
322	char *			cp;
323	time_t			time_now;
324
325	gai_req = (void *)((char *)req + sizeof(*req));
326	node = (char *)gai_req + sizeof(*gai_req);
327	service = node + gai_req->nodesize;
328
329	worker_ctx = get_worker_context(c, gai_req->dns_idx);
330	scheduled_sleep(gai_req->scheduled, gai_req->earliest,
331			worker_ctx);
332	reload_resolv_conf(worker_ctx);
333
334	/*
335	 * Take a shot at the final size, better to overestimate
336	 * at first and then realloc to a smaller size.
337	 */
338
339	resp_octets = sizeof(*resp) + sizeof(*gai_resp) +
340		      16 * (sizeof(struct addrinfo) +
341			    sizeof(sockaddr_u)) +
342		      256;
343	resp = emalloc_zero(resp_octets);
344	gai_resp = (void *)(resp + 1);
345
346	TRACE(2, ("blocking_getaddrinfo given node %s serv %s fam %d flags %x\n",
347		  node, service, gai_req->hints.ai_family,
348		  gai_req->hints.ai_flags));
349#ifdef DEBUG
350	if (debug >= 2)
351		fflush(stdout);
352#endif
353	ai_res = NULL;
354	gai_resp->retcode = getaddrinfo(node, service, &gai_req->hints,
355					&ai_res);
356	gai_resp->retry = gai_req->retry;
357#ifdef EAI_SYSTEM
358	if (EAI_SYSTEM == gai_resp->retcode)
359		gai_resp->gai_errno = errno;
360#endif
361	canons_octets = 0;
362
363	if (0 == gai_resp->retcode) {
364		ai = ai_res;
365		while (NULL != ai) {
366			gai_resp->ai_count++;
367			if (ai->ai_canonname)
368				canons_octets += strlen(ai->ai_canonname) + 1;
369			ai = ai->ai_next;
370		}
371		/*
372		 * If this query succeeded only after retrying, DNS may have
373		 * just become responsive.  Ignore previously-scheduled
374		 * retry sleeps once for each pending request, similar to
375		 * the way scheduled_sleep() does when its worker_sleep()
376		 * is interrupted.
377		 */
378		if (gai_resp->retry > INITIAL_DNS_RETRY) {
379			time_now = time(NULL);
380			worker_ctx->ignore_scheduled_before = time_now;
381			TRACE(1, ("DNS success after retry, ignoring sleeps scheduled before now (%s)\n",
382				  humantime(time_now)));
383		}
384	}
385
386	/*
387	 * Our response consists of a header, followed by ai_count
388	 * addrinfo structs followed by ai_count sockaddr_storage
389	 * structs followed by the canonical names.
390	 */
391	gai_resp->octets = sizeof(*gai_resp)
392			    + gai_resp->ai_count
393				* (sizeof(gai_req->hints)
394				   + sizeof(sockaddr_u))
395			    + canons_octets;
396
397	resp_octets = sizeof(*resp) + gai_resp->octets;
398	resp = erealloc(resp, resp_octets);
399	gai_resp = (void *)(resp + 1);
400
401	/* cp serves as our current pointer while serializing */
402	cp = (void *)(gai_resp + 1);
403	canons_octets = 0;
404
405	if (0 == gai_resp->retcode) {
406		ai = ai_res;
407		while (NULL != ai) {
408			memcpy(cp, ai, sizeof(*ai));
409			serialized_ai = (void *)cp;
410			cp += sizeof(*ai);
411
412			/* transform ai_canonname into offset */
413			if (NULL != serialized_ai->ai_canonname) {
414				serialized_ai->ai_canonname = (char *)canons_octets;
415				canons_octets += strlen(ai->ai_canonname) + 1;
416			}
417
418			/* leave fixup of ai_addr pointer for receiver */
419
420			ai = ai->ai_next;
421		}
422
423		ai = ai_res;
424		while (NULL != ai) {
425			INSIST(ai->ai_addrlen <= sizeof(sockaddr_u));
426			memcpy(cp, ai->ai_addr, ai->ai_addrlen);
427			cp += sizeof(sockaddr_u);
428
429			ai = ai->ai_next;
430		}
431
432		ai = ai_res;
433		while (NULL != ai) {
434			if (NULL != ai->ai_canonname) {
435				this_octets = strlen(ai->ai_canonname) + 1;
436				memcpy(cp, ai->ai_canonname, this_octets);
437				cp += this_octets;
438			}
439
440			ai = ai->ai_next;
441		}
442		freeaddrinfo(ai_res);
443	}
444
445	/*
446	 * make sure our walk and earlier calc match
447	 */
448	DEBUG_INSIST((size_t)(cp - (char *)resp) == resp_octets);
449
450	if (queue_blocking_response(c, resp, resp_octets, req)) {
451		msyslog(LOG_ERR, "blocking_getaddrinfo can not queue response");
452		return -1;
453	}
454
455	return 0;
456}
457
458int
459getaddrinfo_sometime(
460	const char *		node,
461	const char *		service,
462	const struct addrinfo *	hints,
463	int			retry,
464	gai_sometime_callback	callback,
465	void *			context
466	)
467{
468	return getaddrinfo_sometime_ex(node, service, hints, retry,
469				       callback, context, 0);
470}
471
472
473static void
474getaddrinfo_sometime_complete(
475	blocking_work_req	rtype,
476	void *			context,
477	size_t			respsize,
478	void *			resp
479	)
480{
481	blocking_gai_req *	gai_req;
482	blocking_gai_resp *	gai_resp;
483	dnschild_ctx *		child_ctx;
484	struct addrinfo *	ai;
485	struct addrinfo *	next_ai;
486	sockaddr_u *		psau;
487	char *			node;
488	char *			service;
489	char *			canon_start;
490	time_t			time_now;
491	int			again, noerr;
492	int			af;
493	const char *		fam_spec;
494	int			i;
495
496	gai_req = context;
497	gai_resp = resp;
498
499	DEBUG_REQUIRE(BLOCKING_GETADDRINFO == rtype);
500	DEBUG_REQUIRE(respsize == gai_resp->octets);
501
502	node = (char *)gai_req + sizeof(*gai_req);
503	service = node + gai_req->nodesize;
504
505	child_ctx = dnschild_contexts[gai_req->dns_idx];
506
507	if (0 == gai_resp->retcode) {
508		/*
509		 * If this query succeeded only after retrying, DNS may have
510		 * just become responsive.
511		 */
512		if (gai_resp->retry > INITIAL_DNS_RETRY) {
513			time_now = time(NULL);
514			child_ctx->next_dns_timeslot = time_now;
515			TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
516				  gai_req->dns_idx, humantime(time_now)));
517		}
518	} else {
519		noerr = !!(gai_req->qflags & GAIR_F_IGNDNSERR);
520		again = noerr || should_retry_dns(
521					gai_resp->retcode, gai_resp->gai_errno);
522		/*
523		 * exponential backoff of DNS retries to 64s
524		 */
525		if (gai_req->retry > 0 && again) {
526			/* log the first retry only */
527			if (INITIAL_DNS_RETRY == gai_req->retry)
528				NLOG(NLOG_SYSINFO) {
529					af = gai_req->hints.ai_family;
530					fam_spec = (AF_INET6 == af)
531						       ? " (AAAA)"
532						       : (AF_INET == af)
533							     ? " (A)"
534							     : "";
535#ifdef EAI_SYSTEM
536					if (EAI_SYSTEM == gai_resp->retcode) {
537						errno = gai_resp->gai_errno;
538						msyslog(LOG_INFO,
539							"retrying DNS %s%s: EAI_SYSTEM %d: %m",
540							node, fam_spec,
541							gai_resp->gai_errno);
542					} else
543#endif
544						msyslog(LOG_INFO,
545							"retrying DNS %s%s: %s (%d)",
546							node, fam_spec,
547							gai_strerror(gai_resp->retcode),
548							gai_resp->retcode);
549				}
550			manage_dns_retry_interval(
551				&gai_req->scheduled, &gai_req->earliest,
552				&gai_req->retry, &child_ctx->next_dns_timeslot,
553				noerr);
554			if (!queue_blocking_request(
555					BLOCKING_GETADDRINFO,
556					gai_req,
557					gai_req->octets,
558					&getaddrinfo_sometime_complete,
559					gai_req))
560				return;
561			else
562				msyslog(LOG_ERR,
563					"unable to retry hostname %s",
564					node);
565		}
566	}
567
568	/*
569	 * fixup pointers in returned addrinfo array
570	 */
571	ai = (void *)((char *)gai_resp + sizeof(*gai_resp));
572	next_ai = NULL;
573	for (i = gai_resp->ai_count - 1; i >= 0; i--) {
574		ai[i].ai_next = next_ai;
575		next_ai = &ai[i];
576	}
577
578	psau = (void *)((char *)ai + gai_resp->ai_count * sizeof(*ai));
579	canon_start = (char *)psau + gai_resp->ai_count * sizeof(*psau);
580
581	for (i = 0; i < gai_resp->ai_count; i++) {
582		if (NULL != ai[i].ai_addr)
583			ai[i].ai_addr = &psau->sa;
584		psau++;
585		if (NULL != ai[i].ai_canonname)
586			ai[i].ai_canonname += (size_t)canon_start;
587	}
588
589	ENSURE((char *)psau == canon_start);
590
591	if (!gai_resp->ai_count)
592		ai = NULL;
593
594	(*gai_req->callback)(gai_resp->retcode, gai_resp->gai_errno,
595			     gai_req->context, node, service,
596			     &gai_req->hints, ai);
597
598	free(gai_req);
599	/* gai_resp is part of block freed by process_blocking_resp() */
600}
601
602
603#ifdef TEST_BLOCKING_WORKER
604void gai_test_callback(int rescode, int gai_errno, void *context, const char *name, const char *service, const struct addrinfo *hints, const struct addrinfo *ai_res)
605{
606	sockaddr_u addr;
607
608	if (rescode) {
609		TRACE(1, ("gai_test_callback context %p error rescode %d %s serv %s\n",
610			  context, rescode, name, service));
611		return;
612	}
613	while (!rescode && NULL != ai_res) {
614		ZERO_SOCK(&addr);
615		memcpy(&addr, ai_res->ai_addr, ai_res->ai_addrlen);
616		TRACE(1, ("ctx %p fam %d addr %s canon '%s' type %s at %p ai_addr %p ai_next %p\n",
617			  context,
618			  AF(&addr),
619			  stoa(&addr),
620			  (ai_res->ai_canonname)
621			      ? ai_res->ai_canonname
622			      : "",
623			  (SOCK_DGRAM == ai_res->ai_socktype)
624			      ? "DGRAM"
625			      : (SOCK_STREAM == ai_res->ai_socktype)
626				    ? "STREAM"
627				    : "(other)",
628			  ai_res,
629			  ai_res->ai_addr,
630			  ai_res->ai_next));
631
632		getnameinfo_sometime((sockaddr_u *)ai_res->ai_addr, 128, 32, 0, gni_test_callback, context);
633
634		ai_res = ai_res->ai_next;
635	}
636}
637#endif	/* TEST_BLOCKING_WORKER */
638
639
640int
641getnameinfo_sometime(
642	sockaddr_u *		psau,
643	size_t			hostoctets,
644	size_t			servoctets,
645	int			flags,
646	gni_sometime_callback	callback,
647	void *			context
648	)
649{
650	blocking_gni_req *	gni_req;
651	u_int			idx;
652	dnschild_ctx *		child_ctx;
653	time_t			time_now;
654
655	REQUIRE(hostoctets);
656	REQUIRE(hostoctets + servoctets < 1024);
657
658	idx = get_dnschild_ctx();
659	child_ctx = dnschild_contexts[idx];
660
661	gni_req = emalloc_zero(sizeof(*gni_req));
662
663	gni_req->octets = sizeof(*gni_req);
664	gni_req->dns_idx = idx;
665	time_now = time(NULL);
666	gni_req->scheduled = time_now;
667	gni_req->earliest = max(time_now, child_ctx->next_dns_timeslot);
668	child_ctx->next_dns_timeslot = gni_req->earliest;
669	memcpy(&gni_req->socku, psau, SOCKLEN(psau));
670	gni_req->hostoctets = hostoctets;
671	gni_req->servoctets = servoctets;
672	gni_req->flags = flags;
673	gni_req->retry = INITIAL_DNS_RETRY;
674	gni_req->callback = callback;
675	gni_req->context = context;
676
677	if (queue_blocking_request(
678		BLOCKING_GETNAMEINFO,
679		gni_req,
680		sizeof(*gni_req),
681		&getnameinfo_sometime_complete,
682		gni_req)) {
683
684		msyslog(LOG_ERR, "unable to queue getnameinfo request");
685		errno = EFAULT;
686		return -1;
687	}
688
689	return 0;
690}
691
692
693int
694blocking_getnameinfo(
695	blocking_child *	c,
696	blocking_pipe_header *	req
697	)
698{
699	blocking_gni_req *	gni_req;
700	dnsworker_ctx *		worker_ctx;
701	blocking_pipe_header *	resp;
702	blocking_gni_resp *	gni_resp;
703	size_t			octets;
704	size_t			resp_octets;
705	char *			service;
706	char *			cp;
707	int			rc;
708	time_t			time_now;
709	char			host[1024];
710
711	gni_req = (void *)((char *)req + sizeof(*req));
712
713	octets = gni_req->hostoctets + gni_req->servoctets;
714
715	/*
716	 * Some alloca() implementations are fragile regarding
717	 * large allocations.  We only need room for the host
718	 * and service names.
719	 */
720	REQUIRE(octets < sizeof(host));
721	service = host + gni_req->hostoctets;
722
723	worker_ctx = get_worker_context(c, gni_req->dns_idx);
724	scheduled_sleep(gni_req->scheduled, gni_req->earliest,
725			worker_ctx);
726	reload_resolv_conf(worker_ctx);
727
728	/*
729	 * Take a shot at the final size, better to overestimate
730	 * then realloc to a smaller size.
731	 */
732
733	resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
734	resp = emalloc_zero(resp_octets);
735	gni_resp = (void *)((char *)resp + sizeof(*resp));
736
737	TRACE(2, ("blocking_getnameinfo given addr %s flags 0x%x hostlen %lu servlen %lu\n",
738		  stoa(&gni_req->socku), gni_req->flags,
739		  (u_long)gni_req->hostoctets, (u_long)gni_req->servoctets));
740
741	gni_resp->retcode = getnameinfo(&gni_req->socku.sa,
742					SOCKLEN(&gni_req->socku),
743					host,
744					gni_req->hostoctets,
745					service,
746					gni_req->servoctets,
747					gni_req->flags);
748	gni_resp->retry = gni_req->retry;
749#ifdef EAI_SYSTEM
750	if (EAI_SYSTEM == gni_resp->retcode)
751		gni_resp->gni_errno = errno;
752#endif
753
754	if (0 != gni_resp->retcode) {
755		gni_resp->hostoctets = 0;
756		gni_resp->servoctets = 0;
757	} else {
758		gni_resp->hostoctets = strlen(host) + 1;
759		gni_resp->servoctets = strlen(service) + 1;
760		/*
761		 * If this query succeeded only after retrying, DNS may have
762		 * just become responsive.  Ignore previously-scheduled
763		 * retry sleeps once for each pending request, similar to
764		 * the way scheduled_sleep() does when its worker_sleep()
765		 * is interrupted.
766		 */
767		if (gni_req->retry > INITIAL_DNS_RETRY) {
768			time_now = time(NULL);
769			worker_ctx->ignore_scheduled_before = time_now;
770			TRACE(1, ("DNS success after retrying, ignoring sleeps scheduled before now (%s)\n",
771				humantime(time_now)));
772		}
773	}
774	octets = gni_resp->hostoctets + gni_resp->servoctets;
775	/*
776	 * Our response consists of a header, followed by the host and
777	 * service strings, each null-terminated.
778	 */
779	resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
780
781	resp = erealloc(resp, resp_octets);
782	gni_resp = (void *)(resp + 1);
783
784	gni_resp->octets = sizeof(*gni_resp) + octets;
785
786	/* cp serves as our current pointer while serializing */
787	cp = (void *)(gni_resp + 1);
788
789	if (0 == gni_resp->retcode) {
790		memcpy(cp, host, gni_resp->hostoctets);
791		cp += gni_resp->hostoctets;
792		memcpy(cp, service, gni_resp->servoctets);
793		cp += gni_resp->servoctets;
794	}
795
796	INSIST((size_t)(cp - (char *)resp) == resp_octets);
797	INSIST(resp_octets - sizeof(*resp) == gni_resp->octets);
798
799	rc = queue_blocking_response(c, resp, resp_octets, req);
800	if (rc)
801		msyslog(LOG_ERR, "blocking_getnameinfo unable to queue response");
802	return rc;
803}
804
805
806static void
807getnameinfo_sometime_complete(
808	blocking_work_req	rtype,
809	void *			context,
810	size_t			respsize,
811	void *			resp
812	)
813{
814	blocking_gni_req *	gni_req;
815	blocking_gni_resp *	gni_resp;
816	dnschild_ctx *		child_ctx;
817	char *			host;
818	char *			service;
819	time_t			time_now;
820	int			again;
821
822	gni_req = context;
823	gni_resp = resp;
824
825	DEBUG_REQUIRE(BLOCKING_GETNAMEINFO == rtype);
826	DEBUG_REQUIRE(respsize == gni_resp->octets);
827
828	child_ctx = dnschild_contexts[gni_req->dns_idx];
829
830	if (0 == gni_resp->retcode) {
831		/*
832		 * If this query succeeded only after retrying, DNS may have
833		 * just become responsive.
834		 */
835		if (gni_resp->retry > INITIAL_DNS_RETRY) {
836			time_now = time(NULL);
837			child_ctx->next_dns_timeslot = time_now;
838			TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
839				  gni_req->dns_idx, humantime(time_now)));
840		}
841	} else {
842		again = should_retry_dns(gni_resp->retcode, gni_resp->gni_errno);
843		/*
844		 * exponential backoff of DNS retries to 64s
845		 */
846		if (gni_req->retry > 0)
847			manage_dns_retry_interval(&gni_req->scheduled,
848			    &gni_req->earliest, &gni_req->retry,
849						  &child_ctx->next_dns_timeslot, FALSE);
850
851		if (gni_req->retry > 0 && again) {
852			if (!queue_blocking_request(
853				BLOCKING_GETNAMEINFO,
854				gni_req,
855				gni_req->octets,
856				&getnameinfo_sometime_complete,
857				gni_req))
858				return;
859
860			msyslog(LOG_ERR, "unable to retry reverse lookup of %s", stoa(&gni_req->socku));
861		}
862	}
863
864	if (!gni_resp->hostoctets) {
865		host = NULL;
866		service = NULL;
867	} else {
868		host = (char *)gni_resp + sizeof(*gni_resp);
869		service = (gni_resp->servoctets)
870			      ? host + gni_resp->hostoctets
871			      : NULL;
872	}
873
874	(*gni_req->callback)(gni_resp->retcode, gni_resp->gni_errno,
875			     &gni_req->socku, gni_req->flags, host,
876			     service, gni_req->context);
877
878	free(gni_req);
879	/* gni_resp is part of block freed by process_blocking_resp() */
880}
881
882
883#ifdef TEST_BLOCKING_WORKER
884void gni_test_callback(int rescode, int gni_errno, sockaddr_u *psau, int flags, const char *host, const char *service, void *context)
885{
886	if (!rescode)
887		TRACE(1, ("gni_test_callback got host '%s' serv '%s' for addr %s context %p\n",
888			  host, service, stoa(psau), context));
889	else
890		TRACE(1, ("gni_test_callback context %p rescode %d gni_errno %d flags 0x%x addr %s\n",
891			  context, rescode, gni_errno, flags, stoa(psau)));
892}
893#endif	/* TEST_BLOCKING_WORKER */
894
895
896#ifdef HAVE_RES_INIT
897static void
898reload_resolv_conf(
899	dnsworker_ctx *	worker_ctx
900	)
901{
902	time_t	time_now;
903
904	/*
905	 * This is ad-hoc.  Reload /etc/resolv.conf once per minute
906	 * to pick up on changes from the DHCP client.  [Bug 1226]
907	 * When using threads for the workers, this needs to happen
908	 * only once per minute process-wide.
909	 */
910	time_now = time(NULL);
911# ifdef WORK_THREAD
912	worker_ctx->next_res_init = next_res_init;
913# endif
914	if (worker_ctx->next_res_init <= time_now) {
915		if (worker_ctx->next_res_init != 0)
916			res_init();
917		worker_ctx->next_res_init = time_now + 60;
918# ifdef WORK_THREAD
919		next_res_init = worker_ctx->next_res_init;
920# endif
921	}
922}
923#endif	/* HAVE_RES_INIT */
924
925
926static u_int
927reserve_dnschild_ctx(void)
928{
929	const size_t	ps = sizeof(dnschild_contexts[0]);
930	const size_t	cs = sizeof(*dnschild_contexts[0]);
931	u_int		c;
932	u_int		new_alloc;
933	size_t		octets;
934	size_t		new_octets;
935
936	c = 0;
937	while (TRUE) {
938		for ( ; c < dnschild_contexts_alloc; c++) {
939			if (NULL == dnschild_contexts[c]) {
940				dnschild_contexts[c] = emalloc_zero(cs);
941
942				return c;
943			}
944		}
945		new_alloc = dnschild_contexts_alloc + 20;
946		new_octets = new_alloc * ps;
947		octets = dnschild_contexts_alloc * ps;
948		dnschild_contexts = erealloc_zero(dnschild_contexts,
949						  new_octets, octets);
950		dnschild_contexts_alloc = new_alloc;
951	}
952}
953
954
955static u_int
956get_dnschild_ctx(void)
957{
958	static u_int	shared_ctx = UINT_MAX;
959
960	if (worker_per_query)
961		return reserve_dnschild_ctx();
962
963	if (UINT_MAX == shared_ctx)
964		shared_ctx = reserve_dnschild_ctx();
965
966	return shared_ctx;
967}
968
969
970static dnsworker_ctx *
971get_worker_context(
972	blocking_child *	c,
973	u_int			idx
974	)
975{
976	u_int		min_new_alloc;
977	u_int		new_alloc;
978	size_t		octets;
979	size_t		new_octets;
980	dnsworker_ctx *	retv;
981
982	worker_global_lock(TRUE);
983
984	if (dnsworker_contexts_alloc <= idx) {
985		min_new_alloc = 1 + idx;
986		/* round new_alloc up to nearest multiple of 4 */
987		new_alloc = (min_new_alloc + 4) & ~(4 - 1);
988		new_octets = new_alloc * sizeof(dnsworker_ctx*);
989		octets = dnsworker_contexts_alloc * sizeof(dnsworker_ctx*);
990		dnsworker_contexts = erealloc_zero(dnsworker_contexts,
991						   new_octets, octets);
992		dnsworker_contexts_alloc = new_alloc;
993		retv = emalloc_zero(sizeof(dnsworker_ctx));
994		dnsworker_contexts[idx] = retv;
995	} else if (NULL == (retv = dnsworker_contexts[idx])) {
996		retv = emalloc_zero(sizeof(dnsworker_ctx));
997		dnsworker_contexts[idx] = retv;
998	}
999
1000	worker_global_lock(FALSE);
1001
1002	ZERO(*retv);
1003	retv->c = c;
1004	return retv;
1005}
1006
1007
1008static void
1009scheduled_sleep(
1010	time_t		scheduled,
1011	time_t		earliest,
1012	dnsworker_ctx *	worker_ctx
1013	)
1014{
1015	time_t now;
1016
1017	if (scheduled < worker_ctx->ignore_scheduled_before) {
1018		TRACE(1, ("ignoring sleep until %s scheduled at %s (before %s)\n",
1019			  humantime(earliest), humantime(scheduled),
1020			  humantime(worker_ctx->ignore_scheduled_before)));
1021		return;
1022	}
1023
1024	now = time(NULL);
1025
1026	if (now < earliest) {
1027		TRACE(1, ("sleep until %s scheduled at %s (>= %s)\n",
1028			  humantime(earliest), humantime(scheduled),
1029			  humantime(worker_ctx->ignore_scheduled_before)));
1030		if (-1 == worker_sleep(worker_ctx->c, earliest - now)) {
1031			/* our sleep was interrupted */
1032			now = time(NULL);
1033			worker_ctx->ignore_scheduled_before = now;
1034#ifdef HAVE_RES_INIT
1035			worker_ctx->next_res_init = now + 60;
1036			next_res_init = worker_ctx->next_res_init;
1037			res_init();
1038#endif
1039			TRACE(1, ("sleep interrupted by daemon, ignoring sleeps scheduled before now (%s)\n",
1040				  humantime(worker_ctx->ignore_scheduled_before)));
1041		}
1042	}
1043}
1044
1045
1046/*
1047 * manage_dns_retry_interval is a helper used by
1048 * getaddrinfo_sometime_complete and getnameinfo_sometime_complete
1049 * to calculate the new retry interval and schedule the next query.
1050 */
1051static void
1052manage_dns_retry_interval(
1053	time_t *	pscheduled,
1054	time_t *	pwhen,
1055	int *		pretry,
1056	time_t *	pnext_timeslot,
1057	int		forever
1058	)
1059{
1060	time_t	now;
1061	time_t	when;
1062	int	retry;
1063	int	retmax;
1064
1065	now = time(NULL);
1066	retry = *pretry;
1067	when = max(now + retry, *pnext_timeslot);
1068	*pnext_timeslot = when;
1069
1070	/* this exponential backoff is slower than doubling up: The
1071	 * sequence goes 2-3-4-6-8-12-16-24-32... and the upper limit is
1072	 * 64 seconds for things that should not repeat forever, and
1073	 * 1024 when repeated forever.
1074	 */
1075	retmax = forever ? 1024 : 64;
1076	retry <<= 1;
1077	if (retry & (retry - 1))
1078		retry &= (retry - 1);
1079	else
1080		retry -= (retry >> 2);
1081	retry = min(retmax, retry);
1082
1083	*pscheduled = now;
1084	*pwhen = when;
1085	*pretry = retry;
1086}
1087
1088/*
1089 * should_retry_dns is a helper used by getaddrinfo_sometime_complete
1090 * and getnameinfo_sometime_complete which implements ntpd's DNS retry
1091 * policy.
1092 */
1093static int
1094should_retry_dns(
1095	int	rescode,
1096	int	res_errno
1097	)
1098{
1099	static int	eai_again_seen;
1100	int		again;
1101#if defined (EAI_SYSTEM) && defined(DEBUG)
1102	char		msg[256];
1103#endif
1104
1105	/*
1106	 * If the resolver failed, see if the failure is
1107	 * temporary. If so, return success.
1108	 */
1109	again = 0;
1110
1111	switch (rescode) {
1112
1113	case EAI_FAIL:
1114		again = 1;
1115		break;
1116
1117	case EAI_AGAIN:
1118		again = 1;
1119		eai_again_seen = 1;		/* [Bug 1178] */
1120		break;
1121
1122	case EAI_NONAME:
1123#if defined(EAI_NODATA) && (EAI_NODATA != EAI_NONAME)
1124	case EAI_NODATA:
1125#endif
1126		again = !eai_again_seen;	/* [Bug 1178] */
1127		break;
1128
1129#ifdef EAI_SYSTEM
1130	case EAI_SYSTEM:
1131		/*
1132		 * EAI_SYSTEM means the real error is in errno.  We should be more
1133		 * discriminating about which errno values require retrying, but
1134		 * this matches existing behavior.
1135		 */
1136		again = 1;
1137# ifdef DEBUG
1138		errno_to_str(res_errno, msg, sizeof(msg));
1139		TRACE(1, ("intres: EAI_SYSTEM errno %d (%s) means try again, right?\n",
1140			  res_errno, msg));
1141# endif
1142		break;
1143#endif
1144	}
1145
1146	TRACE(2, ("intres: resolver returned: %s (%d), %sretrying\n",
1147		  gai_strerror(rescode), rescode, again ? "" : "not "));
1148
1149	return again;
1150}
1151
1152#else	/* !WORKER follows */
1153int ntp_intres_nonempty_compilation_unit;
1154#endif
1155