1/*
2 * services/cache/dns.c - Cache services for DNS using msg and rrset caches.
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36/**
37 * \file
38 *
39 * This file contains the DNS cache.
40 */
41#include "config.h"
42#include "iterator/iter_delegpt.h"
43#include "iterator/iter_utils.h"
44#include "validator/val_nsec.h"
45#include "validator/val_utils.h"
46#include "services/cache/dns.h"
47#include "services/cache/rrset.h"
48#include "util/data/msgparse.h"
49#include "util/data/msgreply.h"
50#include "util/data/packed_rrset.h"
51#include "util/data/dname.h"
52#include "util/module.h"
53#include "util/net_help.h"
54#include "util/regional.h"
55#include "util/config_file.h"
56#include "sldns/sbuffer.h"
57
58/** store rrsets in the rrset cache.
59 * @param env: module environment with caches.
60 * @param rep: contains list of rrsets to store.
61 * @param now: current time.
62 * @param leeway: during prefetch how much leeway to update TTLs.
63 * 	This makes rrsets (other than type NS) timeout sooner so they get
64 * 	updated with a new full TTL.
65 * 	Type NS does not get this, because it must not be refreshed from the
66 * 	child domain, but keep counting down properly.
67 * @param pside: if from parentside discovered NS, so that its NS is okay
68 * 	in a prefetch situation to be updated (without becoming sticky).
69 * @param qrep: update rrsets here if cache is better
70 * @param region: for qrep allocs.
71 * @param qstarttime: time when delegations were looked up, this is perhaps
72 *	earlier than the time in now. The time is used to determine if RRsets
73 *	of type NS have expired, so that they can only be updated using
74 *	lookups of delegation points that did not use them, since they had
75 *	expired then.
76 */
77static void
78store_rrsets(struct module_env* env, struct reply_info* rep, time_t now,
79	time_t leeway, int pside, struct reply_info* qrep,
80	struct regional* region, time_t qstarttime)
81{
82	size_t i;
83	time_t ttl, min_ttl = rep->ttl;
84	/* see if rrset already exists in cache, if not insert it. */
85	for(i=0; i<rep->rrset_count; i++) {
86		rep->ref[i].key = rep->rrsets[i];
87		rep->ref[i].id = rep->rrsets[i]->id;
88		/* update ref if it was in the cache */
89		switch(rrset_cache_update(env->rrset_cache, &rep->ref[i],
90				env->alloc, ((ntohs(rep->ref[i].key->rk.type)==
91				LDNS_RR_TYPE_NS && !pside)?qstarttime:now + leeway))) {
92		case 0: /* ref unchanged, item inserted */
93			break;
94		case 2: /* ref updated, cache is superior */
95			if(region) {
96				struct ub_packed_rrset_key* ck;
97				lock_rw_rdlock(&rep->ref[i].key->entry.lock);
98				/* if deleted rrset, do not copy it */
99				if(rep->ref[i].key->id == 0)
100					ck = NULL;
101				else 	ck = packed_rrset_copy_region(
102					rep->ref[i].key, region, now);
103				lock_rw_unlock(&rep->ref[i].key->entry.lock);
104				if(ck) {
105					/* use cached copy if memory allows */
106					qrep->rrsets[i] = ck;
107				}
108			}
109			/* no break: also copy key item */
110			/* the line below is matched by gcc regex and silences
111			 * the fallthrough warning */
112			/* fallthrough */
113		case 1: /* ref updated, item inserted */
114			rep->rrsets[i] = rep->ref[i].key;
115		}
116		/* if ref was updated make sure the message ttl is updated to
117		 * the minimum of the current rrsets. */
118		ttl = ((struct packed_rrset_data*)rep->rrsets[i]->entry.data)->ttl;
119		if(ttl < min_ttl) min_ttl = ttl;
120	}
121	if(min_ttl < rep->ttl) {
122		rep->ttl = min_ttl;
123		rep->prefetch_ttl = PREFETCH_TTL_CALC(rep->ttl);
124		rep->serve_expired_ttl = rep->ttl + SERVE_EXPIRED_TTL;
125	}
126}
127
128/** delete message from message cache */
129void
130msg_cache_remove(struct module_env* env, uint8_t* qname, size_t qnamelen,
131	uint16_t qtype, uint16_t qclass, uint16_t flags)
132{
133	struct query_info k;
134	hashvalue_type h;
135
136	k.qname = qname;
137	k.qname_len = qnamelen;
138	k.qtype = qtype;
139	k.qclass = qclass;
140	k.local_alias = NULL;
141	h = query_info_hash(&k, flags);
142	slabhash_remove(env->msg_cache, h, &k);
143}
144
145void
146dns_cache_store_msg(struct module_env* env, struct query_info* qinfo,
147	hashvalue_type hash, struct reply_info* rep, time_t leeway, int pside,
148	struct reply_info* qrep, uint32_t flags, struct regional* region,
149	time_t qstarttime)
150{
151	struct msgreply_entry* e;
152	time_t ttl = rep->ttl;
153	size_t i;
154
155	/* store RRsets */
156        for(i=0; i<rep->rrset_count; i++) {
157		rep->ref[i].key = rep->rrsets[i];
158		rep->ref[i].id = rep->rrsets[i]->id;
159	}
160
161	/* there was a reply_info_sortref(rep) here but it seems to be
162	 * unnecessary, because the cache gets locked per rrset. */
163	reply_info_set_ttls(rep, *env->now);
164	store_rrsets(env, rep, *env->now, leeway, pside, qrep, region,
165		qstarttime);
166	if(ttl == 0 && !(flags & DNSCACHE_STORE_ZEROTTL)) {
167		/* we do not store the message, but we did store the RRs,
168		 * which could be useful for delegation information */
169		verbose(VERB_ALGO, "TTL 0: dropped msg from cache");
170		reply_info_delete(rep, NULL);
171		/* if the message is in the cache, remove that msg,
172		 * so that the TTL 0 response can be returned for future
173		 * responses (i.e. don't get answered from
174		 * cache, but instead go to recursion to get this TTL0
175		 * response).
176		 * Possible messages that could be in the cache:
177		 * - SERVFAIL
178		 * - NXDOMAIN
179		 * - NODATA
180		 * - an older record that is expired
181		 * - an older record that did not yet expire */
182		msg_cache_remove(env, qinfo->qname, qinfo->qname_len,
183			qinfo->qtype, qinfo->qclass, flags);
184		return;
185	}
186
187	/* store msg in the cache */
188	reply_info_sortref(rep);
189	if(!(e = query_info_entrysetup(qinfo, rep, hash))) {
190		log_err("store_msg: malloc failed");
191		return;
192	}
193	slabhash_insert(env->msg_cache, hash, &e->entry, rep, env->alloc);
194}
195
196/** find closest NS or DNAME and returns the rrset (locked) */
197static struct ub_packed_rrset_key*
198find_closest_of_type(struct module_env* env, uint8_t* qname, size_t qnamelen,
199	uint16_t qclass, time_t now, uint16_t searchtype, int stripfront,
200	int noexpiredabove, uint8_t* expiretop, size_t expiretoplen)
201{
202	struct ub_packed_rrset_key *rrset;
203	uint8_t lablen;
204
205	if(stripfront) {
206		/* strip off so that DNAMEs have strict subdomain match */
207		lablen = *qname;
208		qname += lablen + 1;
209		qnamelen -= lablen + 1;
210	}
211
212	/* snip off front part of qname until the type is found */
213	while(qnamelen > 0) {
214		if((rrset = rrset_cache_lookup(env->rrset_cache, qname,
215			qnamelen, searchtype, qclass, 0, now, 0))) {
216			uint8_t* origqname = qname;
217			size_t origqnamelen = qnamelen;
218			if(!noexpiredabove)
219				return rrset;
220			/* if expiretop set, do not look above it, but
221			 * qname is equal, so the just found result is also
222			 * the nonexpired above part. */
223			if(expiretop && qnamelen == expiretoplen &&
224				query_dname_compare(qname, expiretop)==0)
225				return rrset;
226			/* check for expiry, but we have to let go of the rrset
227			 * for the lock ordering */
228			lock_rw_unlock(&rrset->entry.lock);
229			/* the rrset_cache_expired_above function always takes
230			 * off one label (if qnamelen>0) and returns the final
231			 * qname where it searched, so we can continue from
232			 * there turning the O N*N search into O N. */
233			if(!rrset_cache_expired_above(env->rrset_cache, &qname,
234				&qnamelen, searchtype, qclass, now, expiretop,
235				expiretoplen)) {
236				/* we want to return rrset, but it may be
237				 * gone from cache, if so, just loop like
238				 * it was not in the cache in the first place.
239				 */
240				if((rrset = rrset_cache_lookup(env->
241					rrset_cache, origqname, origqnamelen,
242					searchtype, qclass, 0, now, 0))) {
243					return rrset;
244				}
245			}
246			log_nametypeclass(VERB_ALGO, "ignoring rrset because expired rrsets exist above it", origqname, searchtype, qclass);
247			continue;
248		}
249
250		/* snip off front label */
251		lablen = *qname;
252		qname += lablen + 1;
253		qnamelen -= lablen + 1;
254	}
255	return NULL;
256}
257
258/** add addr to additional section */
259static void
260addr_to_additional(struct ub_packed_rrset_key* rrset, struct regional* region,
261	struct dns_msg* msg, time_t now)
262{
263	if((msg->rep->rrsets[msg->rep->rrset_count] =
264		packed_rrset_copy_region(rrset, region, now))) {
265		msg->rep->ar_numrrsets++;
266		msg->rep->rrset_count++;
267	}
268}
269
270/** lookup message in message cache */
271struct msgreply_entry*
272msg_cache_lookup(struct module_env* env, uint8_t* qname, size_t qnamelen,
273	uint16_t qtype, uint16_t qclass, uint16_t flags, time_t now, int wr)
274{
275	struct lruhash_entry* e;
276	struct query_info k;
277	hashvalue_type h;
278
279	k.qname = qname;
280	k.qname_len = qnamelen;
281	k.qtype = qtype;
282	k.qclass = qclass;
283	k.local_alias = NULL;
284	h = query_info_hash(&k, flags);
285	e = slabhash_lookup(env->msg_cache, h, &k, wr);
286
287	if(!e) return NULL;
288	if( now > ((struct reply_info*)e->data)->ttl ) {
289		lock_rw_unlock(&e->lock);
290		return NULL;
291	}
292	return (struct msgreply_entry*)e->key;
293}
294
295/** find and add A and AAAA records for nameservers in delegpt */
296static int
297find_add_addrs(struct module_env* env, uint16_t qclass,
298	struct regional* region, struct delegpt* dp, time_t now,
299	struct dns_msg** msg)
300{
301	struct delegpt_ns* ns;
302	struct msgreply_entry* neg;
303	struct ub_packed_rrset_key* akey;
304	for(ns = dp->nslist; ns; ns = ns->next) {
305		akey = rrset_cache_lookup(env->rrset_cache, ns->name,
306			ns->namelen, LDNS_RR_TYPE_A, qclass, 0, now, 0);
307		if(akey) {
308			if(!delegpt_add_rrset_A(dp, region, akey, 0, NULL)) {
309				lock_rw_unlock(&akey->entry.lock);
310				return 0;
311			}
312			if(msg)
313				addr_to_additional(akey, region, *msg, now);
314			lock_rw_unlock(&akey->entry.lock);
315		} else {
316			/* BIT_CD on false because delegpt lookup does
317			 * not use dns64 translation */
318			neg = msg_cache_lookup(env, ns->name, ns->namelen,
319				LDNS_RR_TYPE_A, qclass, 0, now, 0);
320			if(neg) {
321				delegpt_add_neg_msg(dp, neg);
322				lock_rw_unlock(&neg->entry.lock);
323			}
324		}
325		akey = rrset_cache_lookup(env->rrset_cache, ns->name,
326			ns->namelen, LDNS_RR_TYPE_AAAA, qclass, 0, now, 0);
327		if(akey) {
328			if(!delegpt_add_rrset_AAAA(dp, region, akey, 0, NULL)) {
329				lock_rw_unlock(&akey->entry.lock);
330				return 0;
331			}
332			if(msg)
333				addr_to_additional(akey, region, *msg, now);
334			lock_rw_unlock(&akey->entry.lock);
335		} else {
336			/* BIT_CD on false because delegpt lookup does
337			 * not use dns64 translation */
338			neg = msg_cache_lookup(env, ns->name, ns->namelen,
339				LDNS_RR_TYPE_AAAA, qclass, 0, now, 0);
340			if(neg) {
341				delegpt_add_neg_msg(dp, neg);
342				lock_rw_unlock(&neg->entry.lock);
343			}
344		}
345	}
346	return 1;
347}
348
349/** find and add A and AAAA records for missing nameservers in delegpt */
350int
351cache_fill_missing(struct module_env* env, uint16_t qclass,
352	struct regional* region, struct delegpt* dp)
353{
354	struct delegpt_ns* ns;
355	struct msgreply_entry* neg;
356	struct ub_packed_rrset_key* akey;
357	time_t now = *env->now;
358	for(ns = dp->nslist; ns; ns = ns->next) {
359		if(ns->cache_lookup_count > ITERATOR_NAME_CACHELOOKUP_MAX)
360			continue;
361		ns->cache_lookup_count++;
362		akey = rrset_cache_lookup(env->rrset_cache, ns->name,
363			ns->namelen, LDNS_RR_TYPE_A, qclass, 0, now, 0);
364		if(akey) {
365			if(!delegpt_add_rrset_A(dp, region, akey, ns->lame,
366				NULL)) {
367				lock_rw_unlock(&akey->entry.lock);
368				return 0;
369			}
370			log_nametypeclass(VERB_ALGO, "found in cache",
371				ns->name, LDNS_RR_TYPE_A, qclass);
372			lock_rw_unlock(&akey->entry.lock);
373		} else {
374			/* BIT_CD on false because delegpt lookup does
375			 * not use dns64 translation */
376			neg = msg_cache_lookup(env, ns->name, ns->namelen,
377				LDNS_RR_TYPE_A, qclass, 0, now, 0);
378			if(neg) {
379				delegpt_add_neg_msg(dp, neg);
380				lock_rw_unlock(&neg->entry.lock);
381			}
382		}
383		akey = rrset_cache_lookup(env->rrset_cache, ns->name,
384			ns->namelen, LDNS_RR_TYPE_AAAA, qclass, 0, now, 0);
385		if(akey) {
386			if(!delegpt_add_rrset_AAAA(dp, region, akey, ns->lame,
387				NULL)) {
388				lock_rw_unlock(&akey->entry.lock);
389				return 0;
390			}
391			log_nametypeclass(VERB_ALGO, "found in cache",
392				ns->name, LDNS_RR_TYPE_AAAA, qclass);
393			lock_rw_unlock(&akey->entry.lock);
394		} else {
395			/* BIT_CD on false because delegpt lookup does
396			 * not use dns64 translation */
397			neg = msg_cache_lookup(env, ns->name, ns->namelen,
398				LDNS_RR_TYPE_AAAA, qclass, 0, now, 0);
399			if(neg) {
400				delegpt_add_neg_msg(dp, neg);
401				lock_rw_unlock(&neg->entry.lock);
402			}
403		}
404	}
405	return 1;
406}
407
408/** find and add DS or NSEC to delegation msg */
409static void
410find_add_ds(struct module_env* env, struct regional* region,
411	struct dns_msg* msg, struct delegpt* dp, time_t now)
412{
413	/* Lookup the DS or NSEC at the delegation point. */
414	struct ub_packed_rrset_key* rrset = rrset_cache_lookup(
415		env->rrset_cache, dp->name, dp->namelen, LDNS_RR_TYPE_DS,
416		msg->qinfo.qclass, 0, now, 0);
417	if(!rrset) {
418		/* NOTE: this won't work for alternate NSEC schemes
419		 *	(opt-in, NSEC3) */
420		rrset = rrset_cache_lookup(env->rrset_cache, dp->name,
421			dp->namelen, LDNS_RR_TYPE_NSEC, msg->qinfo.qclass,
422			0, now, 0);
423		/* Note: the PACKED_RRSET_NSEC_AT_APEX flag is not used.
424		 * since this is a referral, we need the NSEC at the parent
425		 * side of the zone cut, not the NSEC at apex side. */
426		if(rrset && nsec_has_type(rrset, LDNS_RR_TYPE_DS)) {
427			lock_rw_unlock(&rrset->entry.lock);
428			rrset = NULL; /* discard wrong NSEC */
429		}
430	}
431	if(rrset) {
432		/* add it to auth section. This is the second rrset. */
433		if((msg->rep->rrsets[msg->rep->rrset_count] =
434			packed_rrset_copy_region(rrset, region, now))) {
435			msg->rep->ns_numrrsets++;
436			msg->rep->rrset_count++;
437		}
438		lock_rw_unlock(&rrset->entry.lock);
439	}
440}
441
442struct dns_msg*
443dns_msg_create(uint8_t* qname, size_t qnamelen, uint16_t qtype,
444	uint16_t qclass, struct regional* region, size_t capacity)
445{
446	struct dns_msg* msg = (struct dns_msg*)regional_alloc(region,
447		sizeof(struct dns_msg));
448	if(!msg)
449		return NULL;
450	msg->qinfo.qname = regional_alloc_init(region, qname, qnamelen);
451	if(!msg->qinfo.qname)
452		return NULL;
453	msg->qinfo.qname_len = qnamelen;
454	msg->qinfo.qtype = qtype;
455	msg->qinfo.qclass = qclass;
456	msg->qinfo.local_alias = NULL;
457	/* non-packed reply_info, because it needs to grow the array */
458	msg->rep = (struct reply_info*)regional_alloc_zero(region,
459		sizeof(struct reply_info)-sizeof(struct rrset_ref));
460	if(!msg->rep)
461		return NULL;
462	if(capacity > RR_COUNT_MAX)
463		return NULL; /* integer overflow protection */
464	msg->rep->flags = BIT_QR; /* with QR, no AA */
465	msg->rep->qdcount = 1;
466	msg->rep->reason_bogus = LDNS_EDE_NONE;
467	msg->rep->rrsets = (struct ub_packed_rrset_key**)
468		regional_alloc(region,
469		capacity*sizeof(struct ub_packed_rrset_key*));
470	if(!msg->rep->rrsets)
471		return NULL;
472	return msg;
473}
474
475int
476dns_msg_authadd(struct dns_msg* msg, struct regional* region,
477	struct ub_packed_rrset_key* rrset, time_t now)
478{
479	if(!(msg->rep->rrsets[msg->rep->rrset_count++] =
480		packed_rrset_copy_region(rrset, region, now)))
481		return 0;
482	msg->rep->ns_numrrsets++;
483	return 1;
484}
485
486int
487dns_msg_ansadd(struct dns_msg* msg, struct regional* region,
488	struct ub_packed_rrset_key* rrset, time_t now)
489{
490	if(!(msg->rep->rrsets[msg->rep->rrset_count++] =
491		packed_rrset_copy_region(rrset, region, now)))
492		return 0;
493	msg->rep->an_numrrsets++;
494	return 1;
495}
496
497struct delegpt*
498dns_cache_find_delegation(struct module_env* env, uint8_t* qname,
499	size_t qnamelen, uint16_t qtype, uint16_t qclass,
500	struct regional* region, struct dns_msg** msg, time_t now,
501	int noexpiredabove, uint8_t* expiretop, size_t expiretoplen)
502{
503	/* try to find closest NS rrset */
504	struct ub_packed_rrset_key* nskey;
505	struct packed_rrset_data* nsdata;
506	struct delegpt* dp;
507
508	nskey = find_closest_of_type(env, qname, qnamelen, qclass, now,
509		LDNS_RR_TYPE_NS, 0, noexpiredabove, expiretop, expiretoplen);
510	if(!nskey) /* hope the caller has hints to prime or something */
511		return NULL;
512	nsdata = (struct packed_rrset_data*)nskey->entry.data;
513	/* got the NS key, create delegation point */
514	dp = delegpt_create(region);
515	if(!dp || !delegpt_set_name(dp, region, nskey->rk.dname)) {
516		lock_rw_unlock(&nskey->entry.lock);
517		log_err("find_delegation: out of memory");
518		return NULL;
519	}
520	/* create referral message */
521	if(msg) {
522		/* allocate the array to as much as we could need:
523		 *	NS rrset + DS/NSEC rrset +
524		 *	A rrset for every NS RR
525		 *	AAAA rrset for every NS RR
526		 */
527		*msg = dns_msg_create(qname, qnamelen, qtype, qclass, region,
528			2 + nsdata->count*2);
529		if(!*msg || !dns_msg_authadd(*msg, region, nskey, now)) {
530			lock_rw_unlock(&nskey->entry.lock);
531			log_err("find_delegation: out of memory");
532			return NULL;
533		}
534	}
535	if(!delegpt_rrset_add_ns(dp, region, nskey, 0))
536		log_err("find_delegation: addns out of memory");
537	lock_rw_unlock(&nskey->entry.lock); /* first unlock before next lookup*/
538	/* find and add DS/NSEC (if any) */
539	if(msg)
540		find_add_ds(env, region, *msg, dp, now);
541	/* find and add A entries */
542	if(!find_add_addrs(env, qclass, region, dp, now, msg))
543		log_err("find_delegation: addrs out of memory");
544	return dp;
545}
546
547/** allocate dns_msg from query_info and reply_info */
548static struct dns_msg*
549gen_dns_msg(struct regional* region, struct query_info* q, size_t num)
550{
551	struct dns_msg* msg = (struct dns_msg*)regional_alloc(region,
552		sizeof(struct dns_msg));
553	if(!msg)
554		return NULL;
555	memcpy(&msg->qinfo, q, sizeof(struct query_info));
556	msg->qinfo.qname = regional_alloc_init(region, q->qname, q->qname_len);
557	if(!msg->qinfo.qname)
558		return NULL;
559	/* allocate replyinfo struct and rrset key array separately */
560	msg->rep = (struct reply_info*)regional_alloc(region,
561		sizeof(struct reply_info) - sizeof(struct rrset_ref));
562	if(!msg->rep)
563		return NULL;
564	msg->rep->reason_bogus = LDNS_EDE_NONE;
565	msg->rep->reason_bogus_str = NULL;
566	if(num > RR_COUNT_MAX)
567		return NULL; /* integer overflow protection */
568	msg->rep->rrsets = (struct ub_packed_rrset_key**)
569		regional_alloc(region,
570		num * sizeof(struct ub_packed_rrset_key*));
571	if(!msg->rep->rrsets)
572		return NULL;
573	return msg;
574}
575
576struct dns_msg*
577tomsg(struct module_env* env, struct query_info* q, struct reply_info* r,
578	struct regional* region, time_t now, int allow_expired,
579	struct regional* scratch)
580{
581	struct dns_msg* msg;
582	size_t i;
583	int is_expired = 0;
584	time_t now_control = now;
585	if(now > r->ttl) {
586		/* Check if we are allowed to serve expired */
587		if(allow_expired) {
588			if(env->cfg->serve_expired_ttl &&
589				r->serve_expired_ttl < now) {
590				return NULL;
591			}
592			/* Ignore expired failure answers */
593			if(FLAGS_GET_RCODE(r->flags) !=
594				LDNS_RCODE_NOERROR &&
595				FLAGS_GET_RCODE(r->flags) !=
596				LDNS_RCODE_NXDOMAIN &&
597				FLAGS_GET_RCODE(r->flags) !=
598				LDNS_RCODE_YXDOMAIN)
599				return 0;
600		} else {
601			return NULL;
602		}
603		/* Change the current time so we can pass the below TTL checks when
604		 * serving expired data. */
605		now_control = r->ttl - env->cfg->serve_expired_reply_ttl;
606		is_expired = 1;
607	}
608
609	msg = gen_dns_msg(region, q, r->rrset_count);
610	if(!msg) return NULL;
611	msg->rep->flags = r->flags;
612	msg->rep->qdcount = r->qdcount;
613	msg->rep->ttl = is_expired
614		?SERVE_EXPIRED_REPLY_TTL
615		:r->ttl - now;
616	if(r->prefetch_ttl > now)
617		msg->rep->prefetch_ttl = r->prefetch_ttl - now;
618	else
619		msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(msg->rep->ttl);
620	msg->rep->serve_expired_ttl = msg->rep->ttl + SERVE_EXPIRED_TTL;
621	msg->rep->security = r->security;
622	msg->rep->an_numrrsets = r->an_numrrsets;
623	msg->rep->ns_numrrsets = r->ns_numrrsets;
624	msg->rep->ar_numrrsets = r->ar_numrrsets;
625	msg->rep->rrset_count = r->rrset_count;
626	msg->rep->authoritative = r->authoritative;
627	msg->rep->reason_bogus = r->reason_bogus;
628	if(r->reason_bogus_str) {
629		msg->rep->reason_bogus_str = regional_strdup(region, r->reason_bogus_str);
630	}
631
632	if(!rrset_array_lock(r->ref, r->rrset_count, now_control)) {
633		return NULL;
634	}
635	if(r->an_numrrsets > 0 && (r->rrsets[0]->rk.type == htons(
636		LDNS_RR_TYPE_CNAME) || r->rrsets[0]->rk.type == htons(
637		LDNS_RR_TYPE_DNAME)) && !reply_check_cname_chain(q, r)) {
638		/* cname chain is now invalid, reconstruct msg */
639		rrset_array_unlock(r->ref, r->rrset_count);
640		return NULL;
641	}
642	if(r->security == sec_status_secure && !reply_all_rrsets_secure(r)) {
643		/* message rrsets have changed status, revalidate */
644		rrset_array_unlock(r->ref, r->rrset_count);
645		return NULL;
646	}
647	for(i=0; i<msg->rep->rrset_count; i++) {
648		msg->rep->rrsets[i] = packed_rrset_copy_region(r->rrsets[i],
649			region, now);
650		if(!msg->rep->rrsets[i]) {
651			rrset_array_unlock(r->ref, r->rrset_count);
652			return NULL;
653		}
654	}
655	if(env)
656		rrset_array_unlock_touch(env->rrset_cache, scratch, r->ref,
657		r->rrset_count);
658	else
659		rrset_array_unlock(r->ref, r->rrset_count);
660	return msg;
661}
662
663struct dns_msg*
664dns_msg_deepcopy_region(struct dns_msg* origin, struct regional* region)
665{
666	size_t i;
667	struct dns_msg* res = NULL;
668	res = gen_dns_msg(region, &origin->qinfo, origin->rep->rrset_count);
669	if(!res) return NULL;
670	*res->rep = *origin->rep;
671	if(origin->rep->reason_bogus_str) {
672		res->rep->reason_bogus_str = regional_strdup(region,
673			origin->rep->reason_bogus_str);
674	}
675	for(i=0; i<res->rep->rrset_count; i++) {
676		res->rep->rrsets[i] = packed_rrset_copy_region(
677			origin->rep->rrsets[i], region, 0);
678		if(!res->rep->rrsets[i]) {
679			return NULL;
680		}
681	}
682	return res;
683}
684
685/** synthesize RRset-only response from cached RRset item */
686static struct dns_msg*
687rrset_msg(struct ub_packed_rrset_key* rrset, struct regional* region,
688	time_t now, struct query_info* q)
689{
690	struct dns_msg* msg;
691	struct packed_rrset_data* d = (struct packed_rrset_data*)
692		rrset->entry.data;
693	if(now > d->ttl)
694		return NULL;
695	msg = gen_dns_msg(region, q, 1); /* only the CNAME (or other) RRset */
696	if(!msg)
697		return NULL;
698	msg->rep->flags = BIT_QR; /* reply, no AA, no error */
699        msg->rep->authoritative = 0; /* reply stored in cache can't be authoritative */
700	msg->rep->qdcount = 1;
701	msg->rep->ttl = d->ttl - now;
702	msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(msg->rep->ttl);
703	msg->rep->serve_expired_ttl = msg->rep->ttl + SERVE_EXPIRED_TTL;
704	msg->rep->security = sec_status_unchecked;
705	msg->rep->an_numrrsets = 1;
706	msg->rep->ns_numrrsets = 0;
707	msg->rep->ar_numrrsets = 0;
708	msg->rep->rrset_count = 1;
709	msg->rep->reason_bogus = LDNS_EDE_NONE;
710	msg->rep->rrsets[0] = packed_rrset_copy_region(rrset, region, now);
711	if(!msg->rep->rrsets[0]) /* copy CNAME */
712		return NULL;
713	return msg;
714}
715
716/** synthesize DNAME+CNAME response from cached DNAME item */
717static struct dns_msg*
718synth_dname_msg(struct ub_packed_rrset_key* rrset, struct regional* region,
719	time_t now, struct query_info* q, enum sec_status* sec_status)
720{
721	struct dns_msg* msg;
722	struct ub_packed_rrset_key* ck;
723	struct packed_rrset_data* newd, *d = (struct packed_rrset_data*)
724		rrset->entry.data;
725	uint8_t* newname, *dtarg = NULL;
726	size_t newlen, dtarglen;
727	if(now > d->ttl)
728		return NULL;
729	/* only allow validated (with DNSSEC) DNAMEs used from cache
730	 * for insecure DNAMEs, query again. */
731	*sec_status = d->security;
732	/* return sec status, so the status of the CNAME can be checked
733	 * by the calling routine. */
734	msg = gen_dns_msg(region, q, 2); /* DNAME + CNAME RRset */
735	if(!msg)
736		return NULL;
737	msg->rep->flags = BIT_QR; /* reply, no AA, no error */
738        msg->rep->authoritative = 0; /* reply stored in cache can't be authoritative */
739	msg->rep->qdcount = 1;
740	msg->rep->ttl = d->ttl - now;
741	msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(msg->rep->ttl);
742	msg->rep->serve_expired_ttl = msg->rep->ttl + SERVE_EXPIRED_TTL;
743	msg->rep->security = sec_status_unchecked;
744	msg->rep->an_numrrsets = 1;
745	msg->rep->ns_numrrsets = 0;
746	msg->rep->ar_numrrsets = 0;
747	msg->rep->rrset_count = 1;
748	msg->rep->reason_bogus = LDNS_EDE_NONE;
749	msg->rep->rrsets[0] = packed_rrset_copy_region(rrset, region, now);
750	if(!msg->rep->rrsets[0]) /* copy DNAME */
751		return NULL;
752	/* synth CNAME rrset */
753	get_cname_target(rrset, &dtarg, &dtarglen);
754	if(!dtarg)
755		return NULL;
756	newlen = q->qname_len + dtarglen - rrset->rk.dname_len;
757	if(newlen > LDNS_MAX_DOMAINLEN) {
758		msg->rep->flags |= LDNS_RCODE_YXDOMAIN;
759		return msg;
760	}
761	newname = (uint8_t*)regional_alloc(region, newlen);
762	if(!newname)
763		return NULL;
764	/* new name is concatenation of qname front (without DNAME owner)
765	 * and DNAME target name */
766	memcpy(newname, q->qname, q->qname_len-rrset->rk.dname_len);
767	memmove(newname+(q->qname_len-rrset->rk.dname_len), dtarg, dtarglen);
768	/* create rest of CNAME rrset */
769	ck = (struct ub_packed_rrset_key*)regional_alloc(region,
770		sizeof(struct ub_packed_rrset_key));
771	if(!ck)
772		return NULL;
773	memset(&ck->entry, 0, sizeof(ck->entry));
774	msg->rep->rrsets[1] = ck;
775	ck->entry.key = ck;
776	ck->rk.type = htons(LDNS_RR_TYPE_CNAME);
777	ck->rk.rrset_class = rrset->rk.rrset_class;
778	ck->rk.flags = 0;
779	ck->rk.dname = regional_alloc_init(region, q->qname, q->qname_len);
780	if(!ck->rk.dname)
781		return NULL;
782	ck->rk.dname_len = q->qname_len;
783	ck->entry.hash = rrset_key_hash(&ck->rk);
784	newd = (struct packed_rrset_data*)regional_alloc_zero(region,
785		sizeof(struct packed_rrset_data) + sizeof(size_t) +
786		sizeof(uint8_t*) + sizeof(time_t) + sizeof(uint16_t)
787		+ newlen);
788	if(!newd)
789		return NULL;
790	ck->entry.data = newd;
791	newd->ttl = d->ttl - now; /* RFC6672: synth CNAME TTL == DNAME TTL */
792	newd->count = 1;
793	newd->rrsig_count = 0;
794	newd->trust = rrset_trust_ans_noAA;
795	newd->rr_len = (size_t*)((uint8_t*)newd +
796		sizeof(struct packed_rrset_data));
797	newd->rr_len[0] = newlen + sizeof(uint16_t);
798	packed_rrset_ptr_fixup(newd);
799	newd->rr_ttl[0] = newd->ttl;
800	msg->rep->ttl = newd->ttl;
801	msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(newd->ttl);
802	msg->rep->serve_expired_ttl = newd->ttl + SERVE_EXPIRED_TTL;
803	sldns_write_uint16(newd->rr_data[0], newlen);
804	memmove(newd->rr_data[0] + sizeof(uint16_t), newname, newlen);
805	msg->rep->an_numrrsets ++;
806	msg->rep->rrset_count ++;
807	return msg;
808}
809
810/** Fill TYPE_ANY response with some data from cache */
811static struct dns_msg*
812fill_any(struct module_env* env,
813	uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass,
814	struct regional* region)
815{
816	time_t now = *env->now;
817	struct dns_msg* msg = NULL;
818	uint16_t lookup[] = {LDNS_RR_TYPE_A, LDNS_RR_TYPE_AAAA,
819		LDNS_RR_TYPE_MX, LDNS_RR_TYPE_SOA, LDNS_RR_TYPE_NS,
820		LDNS_RR_TYPE_DNAME, 0};
821	int i, num=6; /* number of RR types to look up */
822	log_assert(lookup[num] == 0);
823
824	if(env->cfg->deny_any) {
825		/* return empty message */
826		msg = dns_msg_create(qname, qnamelen, qtype, qclass,
827			region, 0);
828		if(!msg) {
829			return NULL;
830		}
831		/* set NOTIMPL for RFC 8482 */
832		msg->rep->flags |= LDNS_RCODE_NOTIMPL;
833		msg->rep->security = sec_status_indeterminate;
834		return msg;
835	}
836
837	for(i=0; i<num; i++) {
838		/* look up this RR for inclusion in type ANY response */
839		struct ub_packed_rrset_key* rrset = rrset_cache_lookup(
840			env->rrset_cache, qname, qnamelen, lookup[i],
841			qclass, 0, now, 0);
842		struct packed_rrset_data *d;
843		if(!rrset)
844			continue;
845
846		/* only if rrset from answer section */
847		d = (struct packed_rrset_data*)rrset->entry.data;
848		if(d->trust == rrset_trust_add_noAA ||
849			d->trust == rrset_trust_auth_noAA ||
850			d->trust == rrset_trust_add_AA ||
851			d->trust == rrset_trust_auth_AA) {
852			lock_rw_unlock(&rrset->entry.lock);
853			continue;
854		}
855
856		/* create msg if none */
857		if(!msg) {
858			msg = dns_msg_create(qname, qnamelen, qtype, qclass,
859				region, (size_t)(num-i));
860			if(!msg) {
861				lock_rw_unlock(&rrset->entry.lock);
862				return NULL;
863			}
864		}
865
866		/* add RRset to response */
867		if(!dns_msg_ansadd(msg, region, rrset, now)) {
868			lock_rw_unlock(&rrset->entry.lock);
869			return NULL;
870		}
871		lock_rw_unlock(&rrset->entry.lock);
872	}
873	return msg;
874}
875
876struct dns_msg*
877dns_cache_lookup(struct module_env* env,
878	uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass,
879	uint16_t flags, struct regional* region, struct regional* scratch,
880	int no_partial, uint8_t* dpname, size_t dpnamelen)
881{
882	struct lruhash_entry* e;
883	struct query_info k;
884	hashvalue_type h;
885	time_t now = *env->now;
886	struct ub_packed_rrset_key* rrset;
887
888	/* lookup first, this has both NXdomains and ANSWER responses */
889	k.qname = qname;
890	k.qname_len = qnamelen;
891	k.qtype = qtype;
892	k.qclass = qclass;
893	k.local_alias = NULL;
894	h = query_info_hash(&k, flags);
895	e = slabhash_lookup(env->msg_cache, h, &k, 0);
896	if(e) {
897		struct msgreply_entry* key = (struct msgreply_entry*)e->key;
898		struct reply_info* data = (struct reply_info*)e->data;
899		struct dns_msg* msg = tomsg(env, &key->key, data, region, now, 0,
900			scratch);
901		if(msg) {
902			lock_rw_unlock(&e->lock);
903			return msg;
904		}
905		/* could be msg==NULL; due to TTL or not all rrsets available */
906		lock_rw_unlock(&e->lock);
907	}
908
909	/* see if a DNAME exists. Checked for first, to enforce that DNAMEs
910	 * are more important, the CNAME is resynthesized and thus
911	 * consistent with the DNAME */
912	if(!no_partial &&
913		(rrset=find_closest_of_type(env, qname, qnamelen, qclass, now,
914		LDNS_RR_TYPE_DNAME, 1, 0, NULL, 0))) {
915		/* synthesize a DNAME+CNAME message based on this */
916		enum sec_status sec_status = sec_status_unchecked;
917		struct dns_msg* msg = synth_dname_msg(rrset, region, now, &k,
918			&sec_status);
919		if(msg) {
920			struct ub_packed_rrset_key* cname_rrset;
921			lock_rw_unlock(&rrset->entry.lock);
922			/* now, after unlocking the DNAME rrset lock,
923			 * check the sec_status, and see if we need to look
924			 * up the CNAME record associated before it can
925			 * be used */
926			/* normally, only secure DNAMEs allowed from cache*/
927			if(sec_status == sec_status_secure)
928				return msg;
929			/* but if we have a CNAME cached with this name, then we
930			 * have previously already allowed this name to pass.
931			 * the next cache lookup is going to fetch that CNAME itself,
932			 * but it is better to have the (unsigned)DNAME + CNAME in
933			 * that case */
934			cname_rrset = rrset_cache_lookup(
935				env->rrset_cache, qname, qnamelen,
936				LDNS_RR_TYPE_CNAME, qclass, 0, now, 0);
937			if(cname_rrset) {
938				/* CNAME already synthesized by
939				 * synth_dname_msg routine, so we can
940				 * straight up return the msg */
941				lock_rw_unlock(&cname_rrset->entry.lock);
942				return msg;
943			}
944		} else {
945			lock_rw_unlock(&rrset->entry.lock);
946		}
947	}
948
949	/* see if we have CNAME for this domain,
950	 * but not for DS records (which are part of the parent) */
951	if(!no_partial && qtype != LDNS_RR_TYPE_DS &&
952	   (rrset=rrset_cache_lookup(env->rrset_cache, qname, qnamelen,
953		LDNS_RR_TYPE_CNAME, qclass, 0, now, 0))) {
954		uint8_t* wc = NULL;
955		size_t wl;
956		/* if the rrset is not a wildcard expansion, with wcname */
957		/* because, if we return that CNAME rrset on its own, it is
958		 * missing the NSEC or NSEC3 proof */
959		if(!(val_rrset_wildcard(rrset, &wc, &wl) && wc != NULL)) {
960			struct dns_msg* msg = rrset_msg(rrset, region, now, &k);
961			if(msg) {
962				lock_rw_unlock(&rrset->entry.lock);
963				return msg;
964			}
965		}
966		lock_rw_unlock(&rrset->entry.lock);
967	}
968
969	/* construct DS, DNSKEY messages from rrset cache. */
970	if((qtype == LDNS_RR_TYPE_DS || qtype == LDNS_RR_TYPE_DNSKEY) &&
971		(rrset=rrset_cache_lookup(env->rrset_cache, qname, qnamelen,
972		qtype, qclass, 0, now, 0))) {
973		/* if the rrset is from the additional section, and the
974		 * signatures have fallen off, then do not synthesize a msg
975		 * instead, allow a full query for signed results to happen.
976		 * Forego all rrset data from additional section, because
977		 * some signatures may not be present and cause validation
978		 * failure.
979		 */
980		struct packed_rrset_data *d = (struct packed_rrset_data*)
981			rrset->entry.data;
982		if(d->trust != rrset_trust_add_noAA &&
983			d->trust != rrset_trust_add_AA &&
984			(qtype == LDNS_RR_TYPE_DS ||
985				(d->trust != rrset_trust_auth_noAA
986				&& d->trust != rrset_trust_auth_AA) )) {
987			struct dns_msg* msg = rrset_msg(rrset, region, now, &k);
988			if(msg) {
989				lock_rw_unlock(&rrset->entry.lock);
990				return msg;
991			}
992		}
993		lock_rw_unlock(&rrset->entry.lock);
994	}
995
996	/* stop downwards cache search on NXDOMAIN.
997	 * Empty nonterminals are NOERROR, so an NXDOMAIN for foo
998	 * means bla.foo also does not exist.  The DNSSEC proofs are
999	 * the same.  We search upwards for NXDOMAINs. */
1000	if(env->cfg->harden_below_nxdomain) {
1001		while(!dname_is_root(k.qname)) {
1002			if(dpname && dpnamelen
1003				&& !dname_subdomain_c(k.qname, dpname))
1004				break; /* no synth nxdomain above the stub */
1005			dname_remove_label(&k.qname, &k.qname_len);
1006			h = query_info_hash(&k, flags);
1007			e = slabhash_lookup(env->msg_cache, h, &k, 0);
1008			if(!e && k.qtype != LDNS_RR_TYPE_A &&
1009				env->cfg->qname_minimisation) {
1010				k.qtype = LDNS_RR_TYPE_A;
1011				h = query_info_hash(&k, flags);
1012				e = slabhash_lookup(env->msg_cache, h, &k, 0);
1013			}
1014			if(e) {
1015				struct reply_info* data = (struct reply_info*)e->data;
1016				struct dns_msg* msg;
1017				if(FLAGS_GET_RCODE(data->flags) == LDNS_RCODE_NXDOMAIN
1018					&& data->security == sec_status_secure
1019					&& (data->an_numrrsets == 0 ||
1020						ntohs(data->rrsets[0]->rk.type) != LDNS_RR_TYPE_CNAME)
1021					&& (msg=tomsg(env, &k, data, region, now, 0, scratch))) {
1022					lock_rw_unlock(&e->lock);
1023					msg->qinfo.qname=qname;
1024					msg->qinfo.qname_len=qnamelen;
1025					/* check that DNSSEC really works out */
1026					msg->rep->security = sec_status_unchecked;
1027					iter_scrub_nxdomain(msg);
1028					return msg;
1029				}
1030				lock_rw_unlock(&e->lock);
1031			}
1032			k.qtype = qtype;
1033		}
1034	}
1035
1036	/* fill common RR types for ANY response to avoid requery */
1037	if(qtype == LDNS_RR_TYPE_ANY) {
1038		return fill_any(env, qname, qnamelen, qtype, qclass, region);
1039	}
1040
1041	return NULL;
1042}
1043
1044int
1045dns_cache_store(struct module_env* env, struct query_info* msgqinf,
1046        struct reply_info* msgrep, int is_referral, time_t leeway, int pside,
1047	struct regional* region, uint32_t flags, time_t qstarttime)
1048{
1049	struct reply_info* rep = NULL;
1050	/* alloc, malloc properly (not in region, like msg is) */
1051	rep = reply_info_copy(msgrep, env->alloc, NULL);
1052	if(!rep)
1053		return 0;
1054	/* ttl must be relative ;i.e. 0..86400 not  time(0)+86400.
1055	 * the env->now is added to message and RRsets in this routine. */
1056	/* the leeway is used to invalidate other rrsets earlier */
1057	if(is_referral) {
1058		/* store rrsets */
1059		struct rrset_ref ref;
1060		size_t i;
1061		for(i=0; i<rep->rrset_count; i++) {
1062			packed_rrset_ttl_add((struct packed_rrset_data*)
1063				rep->rrsets[i]->entry.data, *env->now);
1064			ref.key = rep->rrsets[i];
1065			ref.id = rep->rrsets[i]->id;
1066			/*ignore ret: it was in the cache, ref updated */
1067			/* no leeway for typeNS */
1068			(void)rrset_cache_update(env->rrset_cache, &ref,
1069				env->alloc,
1070				((ntohs(ref.key->rk.type)==LDNS_RR_TYPE_NS
1071				 && !pside) ? qstarttime:*env->now + leeway));
1072		}
1073		reply_info_delete(rep, NULL);
1074		return 1;
1075	} else {
1076		/* store msg, and rrsets */
1077		struct query_info qinf;
1078		hashvalue_type h;
1079
1080		qinf = *msgqinf;
1081		qinf.qname = memdup(msgqinf->qname, msgqinf->qname_len);
1082		if(!qinf.qname) {
1083			reply_info_parsedelete(rep, env->alloc);
1084			return 0;
1085		}
1086		/* fixup flags to be sensible for a reply based on the cache */
1087		/* this module means that RA is available. It is an answer QR.
1088		 * Not AA from cache. Not CD in cache (depends on client bit). */
1089		rep->flags |= (BIT_RA | BIT_QR);
1090		rep->flags &= ~(BIT_AA | BIT_CD);
1091		h = query_info_hash(&qinf, (uint16_t)flags);
1092		dns_cache_store_msg(env, &qinf, h, rep, leeway, pside, msgrep,
1093			flags, region, qstarttime);
1094		/* qname is used inside query_info_entrysetup, and set to
1095		 * NULL. If it has not been used, free it. free(0) is safe. */
1096		free(qinf.qname);
1097	}
1098	return 1;
1099}
1100
1101int
1102dns_cache_prefetch_adjust(struct module_env* env, struct query_info* qinfo,
1103        time_t adjust, uint16_t flags)
1104{
1105	struct msgreply_entry* msg;
1106	msg = msg_cache_lookup(env, qinfo->qname, qinfo->qname_len,
1107		qinfo->qtype, qinfo->qclass, flags, *env->now, 1);
1108	if(msg) {
1109		struct reply_info* rep = (struct reply_info*)msg->entry.data;
1110		if(rep) {
1111			rep->prefetch_ttl += adjust;
1112			lock_rw_unlock(&msg->entry.lock);
1113			return 1;
1114		}
1115		lock_rw_unlock(&msg->entry.lock);
1116	}
1117	return 0;
1118}
1119