1/*
2 * services/cache/infra.c - infrastructure cache, server rtt and capabilities
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36/**
37 * \file
38 *
39 * This file contains the infrastructure cache.
40 */
41#include "config.h"
42#include <ldns/rr.h>
43#include "services/cache/infra.h"
44#include "util/storage/slabhash.h"
45#include "util/storage/lookup3.h"
46#include "util/data/dname.h"
47#include "util/log.h"
48#include "util/net_help.h"
49#include "util/config_file.h"
50#include "iterator/iterator.h"
51
52/** Timeout when only a single probe query per IP is allowed. */
53#define PROBE_MAXRTO 12000 /* in msec */
54
55/** number of timeouts for a type when the domain can be blocked ;
56 * even if another type has completely rtt maxed it, the different type
57 * can do this number of packets (until those all timeout too) */
58#define TIMEOUT_COUNT_MAX 3
59
60size_t
61infra_sizefunc(void* k, void* ATTR_UNUSED(d))
62{
63	struct infra_key* key = (struct infra_key*)k;
64	return sizeof(*key) + sizeof(struct infra_data) + key->namelen
65		+ lock_get_mem(&key->entry.lock);
66}
67
68int
69infra_compfunc(void* key1, void* key2)
70{
71	struct infra_key* k1 = (struct infra_key*)key1;
72	struct infra_key* k2 = (struct infra_key*)key2;
73	int r = sockaddr_cmp(&k1->addr, k1->addrlen, &k2->addr, k2->addrlen);
74	if(r != 0)
75		return r;
76	if(k1->namelen != k2->namelen) {
77		if(k1->namelen < k2->namelen)
78			return -1;
79		return 1;
80	}
81	return query_dname_compare(k1->zonename, k2->zonename);
82}
83
84void
85infra_delkeyfunc(void* k, void* ATTR_UNUSED(arg))
86{
87	struct infra_key* key = (struct infra_key*)k;
88	if(!key)
89		return;
90	lock_rw_destroy(&key->entry.lock);
91	free(key->zonename);
92	free(key);
93}
94
95void
96infra_deldatafunc(void* d, void* ATTR_UNUSED(arg))
97{
98	struct infra_data* data = (struct infra_data*)d;
99	free(data);
100}
101
102struct infra_cache*
103infra_create(struct config_file* cfg)
104{
105	struct infra_cache* infra = (struct infra_cache*)calloc(1,
106		sizeof(struct infra_cache));
107	size_t maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+
108		sizeof(struct infra_data)+INFRA_BYTES_NAME);
109	infra->hosts = slabhash_create(cfg->infra_cache_slabs,
110		INFRA_HOST_STARTSIZE, maxmem, &infra_sizefunc, &infra_compfunc,
111		&infra_delkeyfunc, &infra_deldatafunc, NULL);
112	if(!infra->hosts) {
113		free(infra);
114		return NULL;
115	}
116	infra->host_ttl = cfg->host_ttl;
117	return infra;
118}
119
120void
121infra_delete(struct infra_cache* infra)
122{
123	if(!infra)
124		return;
125	slabhash_delete(infra->hosts);
126	free(infra);
127}
128
129struct infra_cache*
130infra_adjust(struct infra_cache* infra, struct config_file* cfg)
131{
132	size_t maxmem;
133	if(!infra)
134		return infra_create(cfg);
135	infra->host_ttl = cfg->host_ttl;
136	maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+
137		sizeof(struct infra_data)+INFRA_BYTES_NAME);
138	if(maxmem != slabhash_get_size(infra->hosts) ||
139		cfg->infra_cache_slabs != infra->hosts->size) {
140		infra_delete(infra);
141		infra = infra_create(cfg);
142	}
143	return infra;
144}
145
146/** calculate the hash value for a host key */
147static hashvalue_t
148hash_addr(struct sockaddr_storage* addr, socklen_t addrlen)
149{
150	hashvalue_t h = 0xab;
151	/* select the pieces to hash, some OS have changing data inside */
152	if(addr_is_ip6(addr, addrlen)) {
153		struct sockaddr_in6* in6 = (struct sockaddr_in6*)addr;
154		h = hashlittle(&in6->sin6_family, sizeof(in6->sin6_family), h);
155		h = hashlittle(&in6->sin6_port, sizeof(in6->sin6_port), h);
156		h = hashlittle(&in6->sin6_addr, INET6_SIZE, h);
157	} else {
158		struct sockaddr_in* in = (struct sockaddr_in*)addr;
159		h = hashlittle(&in->sin_family, sizeof(in->sin_family), h);
160		h = hashlittle(&in->sin_port, sizeof(in->sin_port), h);
161		h = hashlittle(&in->sin_addr, INET_SIZE, h);
162	}
163	return h;
164}
165
166/** calculate infra hash for a key */
167static hashvalue_t
168hash_infra(struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name)
169{
170	return dname_query_hash(name, hash_addr(addr, addrlen));
171}
172
173/** lookup version that does not check host ttl (you check it) */
174struct lruhash_entry*
175infra_lookup_nottl(struct infra_cache* infra, struct sockaddr_storage* addr,
176	socklen_t addrlen, uint8_t* name, size_t namelen, int wr)
177{
178	struct infra_key k;
179	k.addrlen = addrlen;
180	memcpy(&k.addr, addr, addrlen);
181	k.namelen = namelen;
182	k.zonename = name;
183	k.entry.hash = hash_infra(addr, addrlen, name);
184	k.entry.key = (void*)&k;
185	k.entry.data = NULL;
186	return slabhash_lookup(infra->hosts, k.entry.hash, &k, wr);
187}
188
189/** init the data elements */
190static void
191data_entry_init(struct infra_cache* infra, struct lruhash_entry* e,
192	uint32_t timenow)
193{
194	struct infra_data* data = (struct infra_data*)e->data;
195	data->ttl = timenow + infra->host_ttl;
196	rtt_init(&data->rtt);
197	data->edns_version = 0;
198	data->edns_lame_known = 0;
199	data->probedelay = 0;
200	data->isdnsseclame = 0;
201	data->rec_lame = 0;
202	data->lame_type_A = 0;
203	data->lame_other = 0;
204	data->timeout_A = 0;
205	data->timeout_AAAA = 0;
206	data->timeout_other = 0;
207}
208
209/**
210 * Create and init a new entry for a host
211 * @param infra: infra structure with config parameters.
212 * @param addr: host address.
213 * @param addrlen: length of addr.
214 * @param name: name of zone
215 * @param namelen: length of name.
216 * @param tm: time now.
217 * @return: the new entry or NULL on malloc failure.
218 */
219static struct lruhash_entry*
220new_entry(struct infra_cache* infra, struct sockaddr_storage* addr,
221	socklen_t addrlen, uint8_t* name, size_t namelen, uint32_t tm)
222{
223	struct infra_data* data;
224	struct infra_key* key = (struct infra_key*)malloc(sizeof(*key));
225	if(!key)
226		return NULL;
227	data = (struct infra_data*)malloc(sizeof(struct infra_data));
228	if(!data) {
229		free(key);
230		return NULL;
231	}
232	key->zonename = memdup(name, namelen);
233	if(!key->zonename) {
234		free(key);
235		free(data);
236		return NULL;
237	}
238	key->namelen = namelen;
239	lock_rw_init(&key->entry.lock);
240	key->entry.hash = hash_infra(addr, addrlen, name);
241	key->entry.key = (void*)key;
242	key->entry.data = (void*)data;
243	key->addrlen = addrlen;
244	memcpy(&key->addr, addr, addrlen);
245	data_entry_init(infra, &key->entry, tm);
246	return &key->entry;
247}
248
249int
250infra_host(struct infra_cache* infra, struct sockaddr_storage* addr,
251        socklen_t addrlen, uint8_t* nm, size_t nmlen, uint32_t timenow,
252	int* edns_vs, uint8_t* edns_lame_known, int* to)
253{
254	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
255		nm, nmlen, 0);
256	struct infra_data* data;
257	int wr = 0;
258	if(e && ((struct infra_data*)e->data)->ttl < timenow) {
259		/* it expired, try to reuse existing entry */
260		int old = ((struct infra_data*)e->data)->rtt.rto;
261		uint8_t tA = ((struct infra_data*)e->data)->timeout_A;
262		uint8_t tAAAA = ((struct infra_data*)e->data)->timeout_AAAA;
263		uint8_t tother = ((struct infra_data*)e->data)->timeout_other;
264		lock_rw_unlock(&e->lock);
265		e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1);
266		if(e) {
267			/* if its still there we have a writelock, init */
268			/* re-initialise */
269			/* do not touch lameness, it may be valid still */
270			data_entry_init(infra, e, timenow);
271			wr = 1;
272			/* TOP_TIMEOUT remains on reuse */
273			if(old >= USEFUL_SERVER_TOP_TIMEOUT) {
274				((struct infra_data*)e->data)->rtt.rto
275					= USEFUL_SERVER_TOP_TIMEOUT;
276				((struct infra_data*)e->data)->timeout_A = tA;
277				((struct infra_data*)e->data)->timeout_AAAA = tAAAA;
278				((struct infra_data*)e->data)->timeout_other = tother;
279			}
280		}
281	}
282	if(!e) {
283		/* insert new entry */
284		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
285			return 0;
286		data = (struct infra_data*)e->data;
287		*edns_vs = data->edns_version;
288		*edns_lame_known = data->edns_lame_known;
289		*to = rtt_timeout(&data->rtt);
290		slabhash_insert(infra->hosts, e->hash, e, data, NULL);
291		return 1;
292	}
293	/* use existing entry */
294	data = (struct infra_data*)e->data;
295	*edns_vs = data->edns_version;
296	*edns_lame_known = data->edns_lame_known;
297	*to = rtt_timeout(&data->rtt);
298	if(*to >= PROBE_MAXRTO && rtt_notimeout(&data->rtt)*4 <= *to) {
299		/* delay other queries, this is the probe query */
300		if(!wr) {
301			lock_rw_unlock(&e->lock);
302			e = infra_lookup_nottl(infra, addr,addrlen,nm,nmlen, 1);
303			if(!e) { /* flushed from cache real fast, no use to
304				allocate just for the probedelay */
305				return 1;
306			}
307			data = (struct infra_data*)e->data;
308		}
309		/* add 999 to round up the timeout value from msec to sec,
310		 * then add a whole second so it is certain that this probe
311		 * has timed out before the next is allowed */
312		data->probedelay = timenow + ((*to)+1999)/1000;
313	}
314	lock_rw_unlock(&e->lock);
315	return 1;
316}
317
318int
319infra_set_lame(struct infra_cache* infra, struct sockaddr_storage* addr,
320	socklen_t addrlen, uint8_t* nm, size_t nmlen, uint32_t timenow,
321	int dnsseclame, int reclame, uint16_t qtype)
322{
323	struct infra_data* data;
324	struct lruhash_entry* e;
325	int needtoinsert = 0;
326	e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1);
327	if(!e) {
328		/* insert it */
329		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) {
330			log_err("set_lame: malloc failure");
331			return 0;
332		}
333		needtoinsert = 1;
334	} else if( ((struct infra_data*)e->data)->ttl < timenow) {
335		/* expired, reuse existing entry */
336		data_entry_init(infra, e, timenow);
337	}
338	/* got an entry, now set the zone lame */
339	data = (struct infra_data*)e->data;
340	/* merge data (if any) */
341	if(dnsseclame)
342		data->isdnsseclame = 1;
343	if(reclame)
344		data->rec_lame = 1;
345	if(!dnsseclame && !reclame && qtype == LDNS_RR_TYPE_A)
346		data->lame_type_A = 1;
347	if(!dnsseclame  && !reclame && qtype != LDNS_RR_TYPE_A)
348		data->lame_other = 1;
349	/* done */
350	if(needtoinsert)
351		slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
352	else 	{ lock_rw_unlock(&e->lock); }
353	return 1;
354}
355
356void
357infra_update_tcp_works(struct infra_cache* infra,
358        struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm,
359	size_t nmlen)
360{
361	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
362		nm, nmlen, 1);
363	struct infra_data* data;
364	if(!e)
365		return; /* doesn't exist */
366	data = (struct infra_data*)e->data;
367	if(data->rtt.rto >= RTT_MAX_TIMEOUT)
368		/* do not disqualify this server altogether, it is better
369		 * than nothing */
370		data->rtt.rto = RTT_MAX_TIMEOUT-1000;
371	lock_rw_unlock(&e->lock);
372}
373
374int
375infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr,
376	socklen_t addrlen, uint8_t* nm, size_t nmlen, int qtype,
377	int roundtrip, int orig_rtt, uint32_t timenow)
378{
379	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
380		nm, nmlen, 1);
381	struct infra_data* data;
382	int needtoinsert = 0;
383	int rto = 1;
384	if(!e) {
385		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
386			return 0;
387		needtoinsert = 1;
388	} else if(((struct infra_data*)e->data)->ttl < timenow) {
389		data_entry_init(infra, e, timenow);
390	}
391	/* have an entry, update the rtt */
392	data = (struct infra_data*)e->data;
393	if(roundtrip == -1) {
394		rtt_lost(&data->rtt, orig_rtt);
395		if(qtype == LDNS_RR_TYPE_A) {
396			if(data->timeout_A < TIMEOUT_COUNT_MAX)
397				data->timeout_A++;
398		} else if(qtype == LDNS_RR_TYPE_AAAA) {
399			if(data->timeout_AAAA < TIMEOUT_COUNT_MAX)
400				data->timeout_AAAA++;
401		} else {
402			if(data->timeout_other < TIMEOUT_COUNT_MAX)
403				data->timeout_other++;
404		}
405	} else {
406		/* if we got a reply, but the old timeout was above server
407		 * selection height, delete the timeout so the server is
408		 * fully available again */
409		if(rtt_unclamped(&data->rtt) >= USEFUL_SERVER_TOP_TIMEOUT)
410			rtt_init(&data->rtt);
411		rtt_update(&data->rtt, roundtrip);
412		data->probedelay = 0;
413		if(qtype == LDNS_RR_TYPE_A)
414			data->timeout_A = 0;
415		else if(qtype == LDNS_RR_TYPE_AAAA)
416			data->timeout_AAAA = 0;
417		else	data->timeout_other = 0;
418	}
419	if(data->rtt.rto > 0)
420		rto = data->rtt.rto;
421
422	if(needtoinsert)
423		slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
424	else 	{ lock_rw_unlock(&e->lock); }
425	return rto;
426}
427
428int infra_get_host_rto(struct infra_cache* infra,
429        struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm,
430	size_t nmlen, struct rtt_info* rtt, int* delay, uint32_t timenow,
431	int* tA, int* tAAAA, int* tother)
432{
433	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
434		nm, nmlen, 0);
435	struct infra_data* data;
436	int ttl = -2;
437	if(!e) return -1;
438	data = (struct infra_data*)e->data;
439	if(data->ttl >= timenow) {
440		ttl = (int)(data->ttl - timenow);
441		memmove(rtt, &data->rtt, sizeof(*rtt));
442		if(timenow < data->probedelay)
443			*delay = (int)(data->probedelay - timenow);
444		else	*delay = 0;
445	}
446	*tA = (int)data->timeout_A;
447	*tAAAA = (int)data->timeout_AAAA;
448	*tother = (int)data->timeout_other;
449	lock_rw_unlock(&e->lock);
450	return ttl;
451}
452
453int
454infra_edns_update(struct infra_cache* infra, struct sockaddr_storage* addr,
455	socklen_t addrlen, uint8_t* nm, size_t nmlen, int edns_version,
456	uint32_t timenow)
457{
458	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
459		nm, nmlen, 1);
460	struct infra_data* data;
461	int needtoinsert = 0;
462	if(!e) {
463		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
464			return 0;
465		needtoinsert = 1;
466	} else if(((struct infra_data*)e->data)->ttl < timenow) {
467		data_entry_init(infra, e, timenow);
468	}
469	/* have an entry, update the rtt, and the ttl */
470	data = (struct infra_data*)e->data;
471	/* do not update if noEDNS and stored is yesEDNS */
472	if(!(edns_version == -1 && (data->edns_version != -1 &&
473		data->edns_lame_known))) {
474		data->edns_version = edns_version;
475		data->edns_lame_known = 1;
476	}
477
478	if(needtoinsert)
479		slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
480	else 	{ lock_rw_unlock(&e->lock); }
481	return 1;
482}
483
484int
485infra_get_lame_rtt(struct infra_cache* infra,
486        struct sockaddr_storage* addr, socklen_t addrlen,
487        uint8_t* name, size_t namelen, uint16_t qtype,
488	int* lame, int* dnsseclame, int* reclame, int* rtt, uint32_t timenow)
489{
490	struct infra_data* host;
491	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
492		name, namelen, 0);
493	if(!e)
494		return 0;
495	host = (struct infra_data*)e->data;
496	*rtt = rtt_unclamped(&host->rtt);
497	if(host->rtt.rto >= PROBE_MAXRTO && timenow < host->probedelay
498		&& rtt_notimeout(&host->rtt)*4 <= host->rtt.rto) {
499		/* single probe for this domain, and we are not probing */
500		/* unless the query type allows a probe to happen */
501		if(qtype == LDNS_RR_TYPE_A) {
502			if(host->timeout_A >= TIMEOUT_COUNT_MAX)
503				*rtt = USEFUL_SERVER_TOP_TIMEOUT;
504			else	*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
505		} else if(qtype == LDNS_RR_TYPE_AAAA) {
506			if(host->timeout_AAAA >= TIMEOUT_COUNT_MAX)
507				*rtt = USEFUL_SERVER_TOP_TIMEOUT;
508			else	*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
509		} else {
510			if(host->timeout_other >= TIMEOUT_COUNT_MAX)
511				*rtt = USEFUL_SERVER_TOP_TIMEOUT;
512			else	*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
513		}
514	}
515	if(timenow > host->ttl) {
516		/* expired entry */
517		/* see if this can be a re-probe of an unresponsive server */
518		/* minus 1000 because that is outside of the RTTBAND, so
519		 * blacklisted servers stay blacklisted if this is chosen */
520		if(host->rtt.rto >= USEFUL_SERVER_TOP_TIMEOUT) {
521			lock_rw_unlock(&e->lock);
522			*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
523			*lame = 0;
524			*dnsseclame = 0;
525			*reclame = 0;
526			return 1;
527		}
528		lock_rw_unlock(&e->lock);
529		return 0;
530	}
531	/* check lameness first */
532	if(host->lame_type_A && qtype == LDNS_RR_TYPE_A) {
533		lock_rw_unlock(&e->lock);
534		*lame = 1;
535		*dnsseclame = 0;
536		*reclame = 0;
537		return 1;
538	} else if(host->lame_other && qtype != LDNS_RR_TYPE_A) {
539		lock_rw_unlock(&e->lock);
540		*lame = 1;
541		*dnsseclame = 0;
542		*reclame = 0;
543		return 1;
544	} else if(host->isdnsseclame) {
545		lock_rw_unlock(&e->lock);
546		*lame = 0;
547		*dnsseclame = 1;
548		*reclame = 0;
549		return 1;
550	} else if(host->rec_lame) {
551		lock_rw_unlock(&e->lock);
552		*lame = 0;
553		*dnsseclame = 0;
554		*reclame = 1;
555		return 1;
556	}
557	/* no lameness for this type of query */
558	lock_rw_unlock(&e->lock);
559	*lame = 0;
560	*dnsseclame = 0;
561	*reclame = 0;
562	return 1;
563}
564
565size_t
566infra_get_mem(struct infra_cache* infra)
567{
568	return sizeof(*infra) + slabhash_get_mem(infra->hosts);
569}
570