1/*
2 * services/cache/rrset.c - Resource record set cache.
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36/**
37 * \file
38 *
39 * This file contains the rrset cache.
40 */
41#include "config.h"
42#include "services/cache/rrset.h"
43#include "sldns/rrdef.h"
44#include "util/storage/slabhash.h"
45#include "util/config_file.h"
46#include "util/data/packed_rrset.h"
47#include "util/data/msgreply.h"
48#include "util/data/msgparse.h"
49#include "util/data/dname.h"
50#include "util/regional.h"
51#include "util/alloc.h"
52#include "util/net_help.h"
53
54void
55rrset_markdel(void* key)
56{
57	struct ub_packed_rrset_key* r = (struct ub_packed_rrset_key*)key;
58	r->id = 0;
59}
60
61struct rrset_cache* rrset_cache_create(struct config_file* cfg,
62	struct alloc_cache* alloc)
63{
64	size_t slabs = (cfg?cfg->rrset_cache_slabs:HASH_DEFAULT_SLABS);
65	size_t startarray = HASH_DEFAULT_STARTARRAY;
66	size_t maxmem = (cfg?cfg->rrset_cache_size:HASH_DEFAULT_MAXMEM);
67
68	struct rrset_cache *r = (struct rrset_cache*)slabhash_create(slabs,
69		startarray, maxmem, ub_rrset_sizefunc, ub_rrset_compare,
70		ub_rrset_key_delete, rrset_data_delete, alloc);
71	slabhash_setmarkdel(&r->table, &rrset_markdel);
72	return r;
73}
74
75void rrset_cache_delete(struct rrset_cache* r)
76{
77	if(!r)
78		return;
79	slabhash_delete(&r->table);
80	/* slabhash delete also does free(r), since table is first in struct*/
81}
82
83struct rrset_cache* rrset_cache_adjust(struct rrset_cache *r,
84	struct config_file* cfg, struct alloc_cache* alloc)
85{
86	if(!r || !cfg || !slabhash_is_size(&r->table, cfg->rrset_cache_size,
87		cfg->rrset_cache_slabs))
88	{
89		rrset_cache_delete(r);
90		r = rrset_cache_create(cfg, alloc);
91	}
92	return r;
93}
94
95void
96rrset_cache_touch(struct rrset_cache* r, struct ub_packed_rrset_key* key,
97        hashvalue_type hash, rrset_id_type id)
98{
99	struct lruhash* table = slabhash_gettable(&r->table, hash);
100	/*
101	 * This leads to locking problems, deadlocks, if the caller is
102	 * holding any other rrset lock.
103	 * Because a lookup through the hashtable does:
104	 *	tablelock -> entrylock  (for that entry caller holds)
105	 * And this would do
106	 *	entrylock(already held) -> tablelock
107	 * And if two threads do this, it results in deadlock.
108	 * So, the caller must not hold entrylock.
109	 */
110	lock_quick_lock(&table->lock);
111	/* we have locked the hash table, the item can still be deleted.
112	 * because it could already have been reclaimed, but not yet set id=0.
113	 * This is because some lruhash routines have lazy deletion.
114	 * so, we must acquire a lock on the item to verify the id != 0.
115	 * also, with hash not changed, we are using the right slab.
116	 */
117	lock_rw_rdlock(&key->entry.lock);
118	if(key->id == id && key->entry.hash == hash) {
119		lru_touch(table, &key->entry);
120	}
121	lock_rw_unlock(&key->entry.lock);
122	lock_quick_unlock(&table->lock);
123}
124
125/** see if rrset needs to be updated in the cache */
126static int
127need_to_update_rrset(void* nd, void* cd, time_t timenow, int equal, int ns)
128{
129	struct packed_rrset_data* newd = (struct packed_rrset_data*)nd;
130	struct packed_rrset_data* cached = (struct packed_rrset_data*)cd;
131	/*	o if new data is expired, current data is better */
132	if( newd->ttl < timenow && cached->ttl >= timenow)
133		return 0;
134	/* 	o store if rrset has been validated
135	 *  		everything better than bogus data
136	 *  		secure is preferred */
137	if( newd->security == sec_status_secure &&
138		cached->security != sec_status_secure)
139		return 1;
140	if( cached->security == sec_status_bogus &&
141		newd->security != sec_status_bogus && !equal)
142		return 1;
143        /*      o if current RRset is more trustworthy - insert it */
144        if( newd->trust > cached->trust ) {
145		/* if the cached rrset is bogus, and this one equal,
146		 * do not update the TTL - let it expire. */
147		if(equal && cached->ttl >= timenow &&
148			cached->security == sec_status_bogus)
149			return 0;
150                return 1;
151	}
152	/*	o item in cache has expired */
153	if( cached->ttl < timenow )
154		return 1;
155	/*  o same trust, but different in data - insert it */
156	if( newd->trust == cached->trust && !equal ) {
157		/* if this is type NS, do not 'stick' to owner that changes
158		 * the NS RRset, but use the old TTL for the new data, and
159		 * update to fetch the latest data. ttl is not expired, because
160		 * that check was before this one. */
161		if(ns) {
162			size_t i;
163			newd->ttl = cached->ttl;
164			for(i=0; i<(newd->count+newd->rrsig_count); i++)
165				if(newd->rr_ttl[i] > newd->ttl)
166					newd->rr_ttl[i] = newd->ttl;
167		}
168		return 1;
169	}
170	return 0;
171}
172
173/** Update RRSet special key ID */
174static void
175rrset_update_id(struct rrset_ref* ref, struct alloc_cache* alloc)
176{
177	/* this may clear the cache and invalidate lock below */
178	uint64_t newid = alloc_get_id(alloc);
179	/* obtain writelock */
180	lock_rw_wrlock(&ref->key->entry.lock);
181	/* check if it was deleted in the meantime, if so, skip update */
182	if(ref->key->id == ref->id) {
183		ref->key->id = newid;
184		ref->id = newid;
185	}
186	lock_rw_unlock(&ref->key->entry.lock);
187}
188
189int
190rrset_cache_update(struct rrset_cache* r, struct rrset_ref* ref,
191	struct alloc_cache* alloc, time_t timenow)
192{
193	struct lruhash_entry* e;
194	struct ub_packed_rrset_key* k = ref->key;
195	hashvalue_type h = k->entry.hash;
196	uint16_t rrset_type = ntohs(k->rk.type);
197	int equal = 0;
198	log_assert(ref->id != 0 && k->id != 0);
199	log_assert(k->rk.dname != NULL);
200	/* looks up item with a readlock - no editing! */
201	if((e=slabhash_lookup(&r->table, h, k, 0)) != 0) {
202		/* return id and key as they will be used in the cache
203		 * since the lruhash_insert, if item already exists, deallocs
204		 * the passed key in favor of the already stored key.
205		 * because of the small gap (see below) this key ptr and id
206		 * may prove later to be already deleted, which is no problem
207		 * as it only makes a cache miss.
208		 */
209		ref->key = (struct ub_packed_rrset_key*)e->key;
210		ref->id = ref->key->id;
211		equal = rrsetdata_equal((struct packed_rrset_data*)k->entry.
212			data, (struct packed_rrset_data*)e->data);
213		if(!need_to_update_rrset(k->entry.data, e->data, timenow,
214			equal, (rrset_type==LDNS_RR_TYPE_NS))) {
215			/* cache is superior, return that value */
216			lock_rw_unlock(&e->lock);
217			ub_packed_rrset_parsedelete(k, alloc);
218			if(equal) return 2;
219			return 1;
220		}
221		lock_rw_unlock(&e->lock);
222		/* Go on and insert the passed item.
223		 * small gap here, where entry is not locked.
224		 * possibly entry is updated with something else.
225		 * we then overwrite that with our data.
226		 * this is just too bad, its cache anyway. */
227		/* use insert to update entry to manage lruhash
228		 * cache size values nicely. */
229	}
230	log_assert(ref->key->id != 0);
231	slabhash_insert(&r->table, h, &k->entry, k->entry.data, alloc);
232	if(e) {
233		/* For NSEC, NSEC3, DNAME, when rdata is updated, update
234		 * the ID number so that proofs in message cache are
235		 * invalidated */
236		if((rrset_type == LDNS_RR_TYPE_NSEC
237			|| rrset_type == LDNS_RR_TYPE_NSEC3
238			|| rrset_type == LDNS_RR_TYPE_DNAME) && !equal) {
239			rrset_update_id(ref, alloc);
240		}
241		return 1;
242	}
243	return 0;
244}
245
246void rrset_cache_update_wildcard(struct rrset_cache* rrset_cache,
247	struct ub_packed_rrset_key* rrset, uint8_t* ce, size_t ce_len,
248	struct alloc_cache* alloc, time_t timenow)
249{
250	struct rrset_ref ref;
251	uint8_t wc_dname[LDNS_MAX_DOMAINLEN+3];
252	rrset = packed_rrset_copy_alloc(rrset, alloc, timenow);
253	if(!rrset) {
254		log_err("malloc failure in rrset_cache_update_wildcard");
255		return;
256	}
257	/* ce has at least one label less then qname, we can therefore safely
258	 * add the wildcard label. */
259	wc_dname[0] = 1;
260	wc_dname[1] = (uint8_t)'*';
261	memmove(wc_dname+2, ce, ce_len);
262
263	free(rrset->rk.dname);
264	rrset->rk.dname_len = ce_len + 2;
265	rrset->rk.dname = (uint8_t*)memdup(wc_dname, rrset->rk.dname_len);
266	if(!rrset->rk.dname) {
267		alloc_special_release(alloc, rrset);
268		log_err("memdup failure in rrset_cache_update_wildcard");
269		return;
270	}
271
272	rrset->entry.hash = rrset_key_hash(&rrset->rk);
273	ref.key = rrset;
274	ref.id = rrset->id;
275	/* ignore ret: if it was in the cache, ref updated */
276	(void)rrset_cache_update(rrset_cache, &ref, alloc, timenow);
277}
278
279struct ub_packed_rrset_key*
280rrset_cache_lookup(struct rrset_cache* r, uint8_t* qname, size_t qnamelen,
281	uint16_t qtype, uint16_t qclass, uint32_t flags, time_t timenow,
282	int wr)
283{
284	struct lruhash_entry* e;
285	struct ub_packed_rrset_key key;
286
287	key.entry.key = &key;
288	key.entry.data = NULL;
289	key.rk.dname = qname;
290	key.rk.dname_len = qnamelen;
291	key.rk.type = htons(qtype);
292	key.rk.rrset_class = htons(qclass);
293	key.rk.flags = flags;
294
295	key.entry.hash = rrset_key_hash(&key.rk);
296
297	if((e = slabhash_lookup(&r->table, key.entry.hash, &key, wr))) {
298		/* check TTL */
299		struct packed_rrset_data* data =
300			(struct packed_rrset_data*)e->data;
301		if(timenow > data->ttl) {
302			lock_rw_unlock(&e->lock);
303			return NULL;
304		}
305		/* we're done */
306		return (struct ub_packed_rrset_key*)e->key;
307	}
308	return NULL;
309}
310
311int
312rrset_array_lock(struct rrset_ref* ref, size_t count, time_t timenow)
313{
314	size_t i;
315	for(i=0; i<count; i++) {
316		if(i>0 && ref[i].key == ref[i-1].key)
317			continue; /* only lock items once */
318		lock_rw_rdlock(&ref[i].key->entry.lock);
319		if(ref[i].id != ref[i].key->id || timenow >
320			((struct packed_rrset_data*)(ref[i].key->entry.data))
321			->ttl) {
322			/* failure! rollback our readlocks */
323			rrset_array_unlock(ref, i+1);
324			return 0;
325		}
326	}
327	return 1;
328}
329
330void
331rrset_array_unlock(struct rrset_ref* ref, size_t count)
332{
333	size_t i;
334	for(i=0; i<count; i++) {
335		if(i>0 && ref[i].key == ref[i-1].key)
336			continue; /* only unlock items once */
337		lock_rw_unlock(&ref[i].key->entry.lock);
338	}
339}
340
341void
342rrset_array_unlock_touch(struct rrset_cache* r, struct regional* scratch,
343	struct rrset_ref* ref, size_t count)
344{
345	hashvalue_type* h;
346	size_t i;
347	if(count > RR_COUNT_MAX || !(h = (hashvalue_type*)regional_alloc(
348		scratch, sizeof(hashvalue_type)*count))) {
349		log_warn("rrset LRU: memory allocation failed");
350		h = NULL;
351	} else 	/* store hash values */
352		for(i=0; i<count; i++)
353			h[i] = ref[i].key->entry.hash;
354	/* unlock */
355	for(i=0; i<count; i++) {
356		if(i>0 && ref[i].key == ref[i-1].key)
357			continue; /* only unlock items once */
358		lock_rw_unlock(&ref[i].key->entry.lock);
359	}
360	if(h) {
361		/* LRU touch, with no rrset locks held */
362		for(i=0; i<count; i++) {
363			if(i>0 && ref[i].key == ref[i-1].key)
364				continue; /* only touch items once */
365			rrset_cache_touch(r, ref[i].key, h[i], ref[i].id);
366		}
367	}
368}
369
370void
371rrset_update_sec_status(struct rrset_cache* r,
372	struct ub_packed_rrset_key* rrset, time_t now)
373{
374	struct packed_rrset_data* updata =
375		(struct packed_rrset_data*)rrset->entry.data;
376	struct lruhash_entry* e;
377	struct packed_rrset_data* cachedata;
378
379	/* hash it again to make sure it has a hash */
380	rrset->entry.hash = rrset_key_hash(&rrset->rk);
381
382	e = slabhash_lookup(&r->table, rrset->entry.hash, rrset, 1);
383	if(!e)
384		return; /* not in the cache anymore */
385	cachedata = (struct packed_rrset_data*)e->data;
386	if(!rrsetdata_equal(updata, cachedata)) {
387		lock_rw_unlock(&e->lock);
388		return; /* rrset has changed in the meantime */
389	}
390	/* update the cached rrset */
391	if(updata->security > cachedata->security) {
392		size_t i;
393		if(updata->trust > cachedata->trust)
394			cachedata->trust = updata->trust;
395		cachedata->security = updata->security;
396		/* for NS records only shorter TTLs, other types: update it */
397		if(ntohs(rrset->rk.type) != LDNS_RR_TYPE_NS ||
398			updata->ttl+now < cachedata->ttl ||
399			cachedata->ttl < now ||
400			updata->security == sec_status_bogus) {
401			cachedata->ttl = updata->ttl + now;
402			for(i=0; i<cachedata->count+cachedata->rrsig_count; i++)
403				cachedata->rr_ttl[i] = updata->rr_ttl[i]+now;
404			cachedata->ttl_add = now;
405		}
406	}
407	lock_rw_unlock(&e->lock);
408}
409
410void
411rrset_check_sec_status(struct rrset_cache* r,
412	struct ub_packed_rrset_key* rrset, time_t now)
413{
414	struct packed_rrset_data* updata =
415		(struct packed_rrset_data*)rrset->entry.data;
416	struct lruhash_entry* e;
417	struct packed_rrset_data* cachedata;
418
419	/* hash it again to make sure it has a hash */
420	rrset->entry.hash = rrset_key_hash(&rrset->rk);
421
422	e = slabhash_lookup(&r->table, rrset->entry.hash, rrset, 0);
423	if(!e)
424		return; /* not in the cache anymore */
425	cachedata = (struct packed_rrset_data*)e->data;
426	if(now > cachedata->ttl || !rrsetdata_equal(updata, cachedata)) {
427		lock_rw_unlock(&e->lock);
428		return; /* expired, or rrset has changed in the meantime */
429	}
430	if(cachedata->security > updata->security) {
431		updata->security = cachedata->security;
432		if(cachedata->security == sec_status_bogus) {
433			size_t i;
434			updata->ttl = cachedata->ttl - now;
435			for(i=0; i<cachedata->count+cachedata->rrsig_count; i++)
436				if(cachedata->rr_ttl[i] < now)
437					updata->rr_ttl[i] = 0;
438				else updata->rr_ttl[i] =
439					cachedata->rr_ttl[i]-now;
440		}
441		if(cachedata->trust > updata->trust)
442			updata->trust = cachedata->trust;
443	}
444	lock_rw_unlock(&e->lock);
445}
446
447void
448rrset_cache_remove_above(struct rrset_cache* r, uint8_t** qname, size_t*
449	qnamelen, uint16_t searchtype, uint16_t qclass, time_t now, uint8_t*
450	qnametop, size_t qnametoplen)
451{
452	struct ub_packed_rrset_key *rrset;
453	uint8_t lablen;
454
455	while(*qnamelen > 0) {
456		/* look one label higher */
457		lablen = **qname;
458		*qname += lablen + 1;
459		*qnamelen -= lablen + 1;
460		if(*qnamelen <= 0)
461			return;
462
463		/* stop at qnametop */
464		if(qnametop && *qnamelen == qnametoplen &&
465			query_dname_compare(*qname, qnametop)==0)
466			return;
467
468		if(verbosity >= VERB_ALGO) {
469			/* looks up with a time of 0, to see expired entries */
470			if((rrset = rrset_cache_lookup(r, *qname,
471				*qnamelen, searchtype, qclass, 0, 0, 0))) {
472				struct packed_rrset_data* data =
473					(struct packed_rrset_data*)rrset->entry.data;
474				int expired = (now > data->ttl);
475				lock_rw_unlock(&rrset->entry.lock);
476				if(expired)
477					log_nametypeclass(verbosity, "this "
478						"(grand)parent rrset will be "
479						"removed (expired)",
480						*qname, searchtype, qclass);
481				else	log_nametypeclass(verbosity, "this "
482						"(grand)parent rrset will be "
483						"removed",
484						*qname, searchtype, qclass);
485			}
486		}
487		rrset_cache_remove(r, *qname, *qnamelen, searchtype, qclass, 0);
488	}
489}
490
491int
492rrset_cache_expired_above(struct rrset_cache* r, uint8_t** qname, size_t*
493	qnamelen, uint16_t searchtype, uint16_t qclass, time_t now, uint8_t*
494	qnametop, size_t qnametoplen)
495{
496	struct ub_packed_rrset_key *rrset;
497	uint8_t lablen;
498
499	while(*qnamelen > 0) {
500		/* look one label higher */
501		lablen = **qname;
502		*qname += lablen + 1;
503		*qnamelen -= lablen + 1;
504		if(*qnamelen <= 0)
505			break;
506
507		/* looks up with a time of 0, to see expired entries */
508		if((rrset = rrset_cache_lookup(r, *qname,
509			*qnamelen, searchtype, qclass, 0, 0, 0))) {
510			struct packed_rrset_data* data =
511				(struct packed_rrset_data*)rrset->entry.data;
512			if(now > data->ttl) {
513				/* it is expired, this is not wanted */
514				lock_rw_unlock(&rrset->entry.lock);
515				log_nametypeclass(VERB_ALGO, "this rrset is expired", *qname, searchtype, qclass);
516				return 1;
517			}
518			/* it is not expired, continue looking */
519			lock_rw_unlock(&rrset->entry.lock);
520		}
521
522		/* do not look above the qnametop. */
523		if(qnametop && *qnamelen == qnametoplen &&
524			query_dname_compare(*qname, qnametop)==0)
525			break;
526	}
527	return 0;
528}
529
530void rrset_cache_remove(struct rrset_cache* r, uint8_t* nm, size_t nmlen,
531	uint16_t type, uint16_t dclass, uint32_t flags)
532{
533	struct ub_packed_rrset_key key;
534	key.entry.key = &key;
535	key.rk.dname = nm;
536	key.rk.dname_len = nmlen;
537	key.rk.rrset_class = htons(dclass);
538	key.rk.type = htons(type);
539	key.rk.flags = flags;
540	key.entry.hash = rrset_key_hash(&key.rk);
541	slabhash_remove(&r->table, key.entry.hash, &key);
542}
543