1/*
2 * edns-subnet/subnetmod.c - edns subnet module. Must be called before validator
3 * and iterator.
4 *
5 * Copyright (c) 2013, NLnet Labs. All rights reserved.
6 *
7 * This software is open source.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * Redistributions of source code must retain the above copyright notice,
14 * this list of conditions and the following disclaimer.
15 *
16 * Redistributions in binary form must reproduce the above copyright notice,
17 * this list of conditions and the following disclaimer in the documentation
18 * and/or other materials provided with the distribution.
19 *
20 * Neither the name of the NLNET LABS nor the names of its contributors may
21 * be used to endorse or promote products derived from this software without
22 * specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
30 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 */
36 /**
37 * \file
38 * subnet module for unbound.
39 */
40
41#include "config.h"
42
43#ifdef CLIENT_SUBNET /* keeps splint happy */
44
45#include "edns-subnet/subnetmod.h"
46#include "edns-subnet/edns-subnet.h"
47#include "edns-subnet/addrtree.h"
48#include "edns-subnet/subnet-whitelist.h"
49
50#include "services/mesh.h"
51#include "services/cache/dns.h"
52#include "util/module.h"
53#include "util/regional.h"
54#include "util/storage/slabhash.h"
55#include "util/config_file.h"
56#include "util/data/msgreply.h"
57#include "sldns/sbuffer.h"
58#include "sldns/wire2str.h"
59#include "iterator/iter_utils.h"
60#ifdef USE_CACHEDB
61#include "cachedb/cachedb.h"
62#endif
63
64/** externally called */
65void
66subnet_data_delete(void *d, void *ATTR_UNUSED(arg))
67{
68	struct subnet_msg_cache_data *r;
69	r = (struct subnet_msg_cache_data*)d;
70	addrtree_delete(r->tree4);
71	addrtree_delete(r->tree6);
72	free(r);
73}
74
75/** externally called */
76size_t
77msg_cache_sizefunc(void *k, void *d)
78{
79	struct msgreply_entry *q = (struct msgreply_entry*)k;
80	struct subnet_msg_cache_data *r = (struct subnet_msg_cache_data*)d;
81	size_t s = sizeof(struct msgreply_entry)
82		+ sizeof(struct subnet_msg_cache_data)
83		+ q->key.qname_len + lock_get_mem(&q->entry.lock);
84	s += addrtree_size(r->tree4);
85	s += addrtree_size(r->tree6);
86	return s;
87}
88
89/** new query for ecs module */
90static int
91subnet_new_qstate(struct module_qstate *qstate, int id)
92{
93	struct subnet_qstate *sq = (struct subnet_qstate*)regional_alloc(
94		qstate->region, sizeof(struct subnet_qstate));
95	if(!sq)
96		return 0;
97	qstate->minfo[id] = sq;
98	memset(sq, 0, sizeof(*sq));
99	sq->started_no_cache_store = qstate->no_cache_store;
100	sq->started_no_cache_lookup = qstate->no_cache_lookup;
101	return 1;
102}
103
104/** Add ecs struct to edns list, after parsing it to wire format. */
105void
106subnet_ecs_opt_list_append(struct ecs_data* ecs, struct edns_option** list,
107	struct module_qstate *qstate, struct regional *region)
108{
109	size_t sn_octs, sn_octs_remainder;
110	sldns_buffer* buf = qstate->env->scratch_buffer;
111
112	if(ecs->subnet_validdata) {
113		log_assert(ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4 ||
114			ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP6);
115		log_assert(ecs->subnet_addr_fam != EDNSSUBNET_ADDRFAM_IP4 ||
116			ecs->subnet_source_mask <=  INET_SIZE*8);
117		log_assert(ecs->subnet_addr_fam != EDNSSUBNET_ADDRFAM_IP6 ||
118			ecs->subnet_source_mask <= INET6_SIZE*8);
119
120		sn_octs = ecs->subnet_source_mask / 8;
121		sn_octs_remainder =
122			(size_t)((ecs->subnet_source_mask % 8)>0?1:0);
123
124		log_assert(sn_octs + sn_octs_remainder <= INET6_SIZE);
125
126		sldns_buffer_clear(buf);
127		sldns_buffer_write_u16(buf, ecs->subnet_addr_fam);
128		sldns_buffer_write_u8(buf, ecs->subnet_source_mask);
129		sldns_buffer_write_u8(buf, ecs->subnet_scope_mask);
130		sldns_buffer_write(buf, ecs->subnet_addr, sn_octs);
131		if(sn_octs_remainder)
132			sldns_buffer_write_u8(buf, ecs->subnet_addr[sn_octs] &
133				~(0xFF >> (ecs->subnet_source_mask % 8)));
134		sldns_buffer_flip(buf);
135
136		edns_opt_list_append(list,
137				qstate->env->cfg->client_subnet_opcode,
138				sn_octs + sn_octs_remainder + 4,
139				sldns_buffer_begin(buf), region);
140	}
141}
142
143int ecs_whitelist_check(struct query_info* qinfo,
144	uint16_t ATTR_UNUSED(flags), struct module_qstate* qstate,
145	struct sockaddr_storage* addr, socklen_t addrlen,
146	uint8_t* ATTR_UNUSED(zone), size_t ATTR_UNUSED(zonelen),
147	struct regional *region, int id, void* ATTR_UNUSED(cbargs))
148{
149	struct subnet_qstate *sq;
150	struct subnet_env *sn_env;
151
152	if(!(sq=(struct subnet_qstate*)qstate->minfo[id]))
153		return 1;
154	sn_env = (struct subnet_env*)qstate->env->modinfo[id];
155
156	/* Cache by default, might be disabled after parsing EDNS option
157	 * received from nameserver. */
158	if(!iter_stub_fwd_no_cache(qstate, &qstate->qinfo, NULL, NULL, NULL, 0)) {
159		qstate->no_cache_store = 0;
160	}
161
162	sq->subnet_sent_no_subnet = 0;
163	if(sq->ecs_server_out.subnet_validdata && ((sq->subnet_downstream &&
164		qstate->env->cfg->client_subnet_always_forward) ||
165		ecs_is_whitelisted(sn_env->whitelist,
166		addr, addrlen, qinfo->qname, qinfo->qname_len,
167		qinfo->qclass))) {
168		/* Address on whitelist or client query contains ECS option, we
169		 * want to sent out ECS. Only add option if it is not already
170		 * set. */
171		if(!edns_opt_list_find(qstate->edns_opts_back_out,
172			qstate->env->cfg->client_subnet_opcode)) {
173			/* if the client is not wanting an EDNS subnet option,
174			 * omit it and store that we omitted it but actually
175			 * are doing EDNS subnet to the server. */
176			if(sq->ecs_server_out.subnet_source_mask == 0) {
177				sq->subnet_sent_no_subnet = 1;
178				sq->subnet_sent = 0;
179				return 1;
180			}
181			subnet_ecs_opt_list_append(&sq->ecs_server_out,
182				&qstate->edns_opts_back_out, qstate, region);
183		}
184		sq->subnet_sent = 1;
185	}
186	else {
187		/* Outgoing ECS option is set, but we don't want to sent it to
188		 * this address, remove option. */
189		if(edns_opt_list_find(qstate->edns_opts_back_out,
190			qstate->env->cfg->client_subnet_opcode)) {
191			edns_opt_list_remove(&qstate->edns_opts_back_out,
192				qstate->env->cfg->client_subnet_opcode);
193		}
194		sq->subnet_sent = 0;
195	}
196	return 1;
197}
198
199
200void
201subnet_markdel(void* key)
202{
203	struct msgreply_entry *e = (struct msgreply_entry*)key;
204	e->key.qtype = 0;
205	e->key.qclass = 0;
206}
207
208int
209subnetmod_init(struct module_env *env, int id)
210{
211	struct subnet_env *sn_env = (struct subnet_env*)calloc(1,
212		sizeof(struct subnet_env));
213	if(!sn_env) {
214		log_err("malloc failure");
215		return 0;
216	}
217	alloc_init(&sn_env->alloc, NULL, 0);
218	env->modinfo[id] = (void*)sn_env;
219
220	/* Warn that serve-expired and prefetch do not work with the subnet
221	 * module cache. */
222	if(env->cfg->serve_expired)
223		log_warn(
224			"subnetcache: serve-expired is set but not working "
225			"for data originating from the subnet module cache.");
226	if(env->cfg->prefetch)
227		log_warn(
228			"subnetcache: prefetch is set but not working "
229			"for data originating from the subnet module cache.");
230	/* Copy msg_cache settings */
231	sn_env->subnet_msg_cache = slabhash_create(env->cfg->msg_cache_slabs,
232		HASH_DEFAULT_STARTARRAY, env->cfg->msg_cache_size,
233		msg_cache_sizefunc, query_info_compare, query_entry_delete,
234		subnet_data_delete, NULL);
235	slabhash_setmarkdel(sn_env->subnet_msg_cache, &subnet_markdel);
236	if(!sn_env->subnet_msg_cache) {
237		log_err("subnetcache: could not create cache");
238		free(sn_env);
239		env->modinfo[id] = NULL;
240		return 0;
241	}
242	/* whitelist for edns subnet capable servers */
243	sn_env->whitelist = ecs_whitelist_create();
244	if(!sn_env->whitelist ||
245		!ecs_whitelist_apply_cfg(sn_env->whitelist, env->cfg)) {
246		log_err("subnetcache: could not create ECS whitelist");
247		slabhash_delete(sn_env->subnet_msg_cache);
248		free(sn_env);
249		env->modinfo[id] = NULL;
250		return 0;
251	}
252
253	verbose(VERB_QUERY, "subnetcache: option registered (%d)",
254		env->cfg->client_subnet_opcode);
255	/* Create new mesh state for all queries. */
256	env->unique_mesh = 1;
257	if(!edns_register_option(env->cfg->client_subnet_opcode,
258		env->cfg->client_subnet_always_forward /* bypass cache */,
259		1 /* no aggregation */, env)) {
260		log_err("subnetcache: could not register opcode");
261		ecs_whitelist_delete(sn_env->whitelist);
262		slabhash_delete(sn_env->subnet_msg_cache);
263		free(sn_env);
264		env->modinfo[id] = NULL;
265		return 0;
266	}
267	inplace_cb_register((void*)ecs_whitelist_check, inplace_cb_query, NULL,
268		env, id);
269	inplace_cb_register((void*)ecs_edns_back_parsed,
270		inplace_cb_edns_back_parsed, NULL, env, id);
271	inplace_cb_register((void*)ecs_query_response,
272		inplace_cb_query_response, NULL, env, id);
273	lock_rw_init(&sn_env->biglock);
274	return 1;
275}
276
277void
278subnetmod_deinit(struct module_env *env, int id)
279{
280	struct subnet_env *sn_env;
281	if(!env || !env->modinfo[id])
282		return;
283	sn_env = (struct subnet_env*)env->modinfo[id];
284	lock_rw_destroy(&sn_env->biglock);
285	inplace_cb_delete(env, inplace_cb_edns_back_parsed, id);
286	inplace_cb_delete(env, inplace_cb_query, id);
287	inplace_cb_delete(env, inplace_cb_query_response, id);
288	ecs_whitelist_delete(sn_env->whitelist);
289	slabhash_delete(sn_env->subnet_msg_cache);
290	alloc_clear(&sn_env->alloc);
291	free(sn_env);
292	env->modinfo[id] = NULL;
293}
294
295/** Tells client that upstream has no/improper support */
296static void
297cp_edns_bad_response(struct ecs_data *target, struct ecs_data *source)
298{
299	target->subnet_scope_mask  = 0;
300	target->subnet_source_mask = source->subnet_source_mask;
301	target->subnet_addr_fam    = source->subnet_addr_fam;
302	memcpy(target->subnet_addr, source->subnet_addr, INET6_SIZE);
303	target->subnet_validdata = 1;
304}
305
306static void
307delfunc(void *envptr, void *elemptr) {
308	struct reply_info *elem = (struct reply_info *)elemptr;
309	struct subnet_env *env = (struct subnet_env *)envptr;
310	reply_info_parsedelete(elem, &env->alloc);
311}
312
313static size_t
314sizefunc(void *elemptr) {
315	struct reply_info *elem  = (struct reply_info *)elemptr;
316	size_t s = sizeof (struct reply_info) - sizeof (struct rrset_ref)
317		+ elem->rrset_count * sizeof (struct rrset_ref)
318		+ elem->rrset_count * sizeof (struct ub_packed_rrset_key *);
319	size_t i;
320	for (i = 0; i < elem->rrset_count; i++) {
321		struct ub_packed_rrset_key *key = elem->rrsets[i];
322		struct packed_rrset_data *data = key->entry.data;
323		s += ub_rrset_sizefunc(key, data);
324	}
325	if(elem->reason_bogus_str)
326		s += strlen(elem->reason_bogus_str)+1;
327	return s;
328}
329
330/**
331 * Select tree from cache entry based on edns data.
332 * If for address family not present it will create a new one.
333 * NULL on failure to create. */
334static struct addrtree*
335get_tree(struct subnet_msg_cache_data *data, struct ecs_data *edns,
336	struct subnet_env *env, struct config_file* cfg)
337{
338	struct addrtree *tree;
339	if (edns->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4) {
340		if (!data->tree4)
341			data->tree4 = addrtree_create(
342				cfg->max_client_subnet_ipv4, &delfunc,
343				&sizefunc, env, cfg->max_ecs_tree_size_ipv4);
344		tree = data->tree4;
345	} else {
346		if (!data->tree6)
347			data->tree6 = addrtree_create(
348				cfg->max_client_subnet_ipv6, &delfunc,
349				&sizefunc, env, cfg->max_ecs_tree_size_ipv6);
350		tree = data->tree6;
351	}
352	return tree;
353}
354
355static void
356update_cache(struct module_qstate *qstate, int id)
357{
358	struct msgreply_entry *mrep_entry;
359	struct addrtree *tree;
360	struct reply_info *rep;
361	struct query_info qinf;
362	struct subnet_env *sne = qstate->env->modinfo[id];
363	struct subnet_qstate *sq = (struct subnet_qstate*)qstate->minfo[id];
364	struct slabhash *subnet_msg_cache = sne->subnet_msg_cache;
365	struct ecs_data *edns = &sq->ecs_client_in;
366	size_t i;
367	int only_match_scope_zero, diff_size;
368
369	/* We already calculated hash upon lookup (lookup_and_reply) if we were
370	 * allowed to look in the ECS cache */
371	hashvalue_type h = qstate->minfo[id] &&
372		((struct subnet_qstate*)qstate->minfo[id])->qinfo_hash_calculated?
373		((struct subnet_qstate*)qstate->minfo[id])->qinfo_hash :
374		query_info_hash(&qstate->qinfo, qstate->query_flags);
375	/* Step 1, general qinfo lookup */
376	struct lruhash_entry* lru_entry = slabhash_lookup(subnet_msg_cache, h,
377		&qstate->qinfo, 1);
378	int need_to_insert = (lru_entry == NULL);
379	if (!lru_entry) {
380		void* data = calloc(1,
381			sizeof(struct subnet_msg_cache_data));
382		if(!data) {
383			log_err("malloc failed");
384			return;
385		}
386		qinf = qstate->qinfo;
387		qinf.qname = memdup(qstate->qinfo.qname,
388			qstate->qinfo.qname_len);
389		if(!qinf.qname) {
390			free(data);
391			log_err("memdup failed");
392			return;
393		}
394		mrep_entry = query_info_entrysetup(&qinf, data, h);
395		free(qinf.qname); /* if qname 'consumed', it is set to NULL */
396		if (!mrep_entry) {
397			free(data);
398			log_err("query_info_entrysetup failed");
399			return;
400		}
401		lru_entry = &mrep_entry->entry;
402		lock_rw_wrlock(&lru_entry->lock);
403	}
404	/* lru_entry->lock is locked regardless of how we got here,
405	 * either from the slabhash_lookup, or above in the new allocated */
406	/* Step 2, find the correct tree */
407	if (!(tree = get_tree(lru_entry->data, edns, sne, qstate->env->cfg))) {
408		lock_rw_unlock(&lru_entry->lock);
409		log_err("subnetcache: cache insertion failed");
410		return;
411	}
412	lock_quick_lock(&sne->alloc.lock);
413	rep = reply_info_copy(qstate->return_msg->rep, &sne->alloc, NULL);
414	lock_quick_unlock(&sne->alloc.lock);
415	if (!rep) {
416		lock_rw_unlock(&lru_entry->lock);
417		log_err("subnetcache: cache insertion failed");
418		return;
419	}
420
421	/* store RRsets */
422	for(i=0; i<rep->rrset_count; i++) {
423		rep->ref[i].key = rep->rrsets[i];
424		rep->ref[i].id = rep->rrsets[i]->id;
425	}
426	reply_info_set_ttls(rep, *qstate->env->now);
427	reply_info_sortref(rep);
428	rep->flags |= (BIT_RA | BIT_QR); /* fix flags to be sensible for */
429	rep->flags &= ~(BIT_AA | BIT_CD);/* a reply based on the cache   */
430	if(edns->subnet_source_mask == 0 && edns->subnet_scope_mask == 0)
431		only_match_scope_zero = 1;
432	else only_match_scope_zero = 0;
433	diff_size = (int)tree->size_bytes;
434	addrtree_insert(tree, (addrkey_t*)edns->subnet_addr,
435		edns->subnet_source_mask, sq->max_scope, rep,
436		rep->ttl, *qstate->env->now, only_match_scope_zero);
437	diff_size = (int)tree->size_bytes - diff_size;
438
439	lock_rw_unlock(&lru_entry->lock);
440	if (need_to_insert) {
441		slabhash_insert(subnet_msg_cache, h, lru_entry, lru_entry->data,
442			NULL);
443	} else {
444		slabhash_update_space_used(subnet_msg_cache, h, NULL,
445			diff_size);
446	}
447}
448
449/** Lookup in cache and reply true iff reply is sent. */
450static int
451lookup_and_reply(struct module_qstate *qstate, int id, struct subnet_qstate *sq, int prefetch)
452{
453	struct lruhash_entry *e;
454	struct module_env *env = qstate->env;
455	struct subnet_env *sne = (struct subnet_env*)env->modinfo[id];
456	hashvalue_type h = query_info_hash(&qstate->qinfo, qstate->query_flags);
457	struct subnet_msg_cache_data *data;
458	struct ecs_data *ecs = &sq->ecs_client_in;
459	struct addrtree *tree;
460	struct addrnode *node;
461	uint8_t scope;
462
463	memset(&sq->ecs_client_out, 0, sizeof(sq->ecs_client_out));
464
465	if (sq) {
466		sq->qinfo_hash = h; /* Might be useful on cache miss */
467		sq->qinfo_hash_calculated = 1;
468	}
469	e = slabhash_lookup(sne->subnet_msg_cache, h, &qstate->qinfo, 1);
470	if (!e) return 0; /* qinfo not in cache */
471	data = e->data;
472	tree = (ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4)?
473		data->tree4 : data->tree6;
474	if (!tree) { /* qinfo in cache but not for this family */
475		lock_rw_unlock(&e->lock);
476		return 0;
477	}
478	node = addrtree_find(tree, (addrkey_t*)ecs->subnet_addr,
479		ecs->subnet_source_mask, *env->now);
480	if (!node) { /* plain old cache miss */
481		lock_rw_unlock(&e->lock);
482		return 0;
483	}
484
485	qstate->return_msg = tomsg(NULL, &qstate->qinfo,
486		(struct reply_info *)node->elem, qstate->region, *env->now, 0,
487		env->scratch);
488	scope = (uint8_t)node->scope;
489	lock_rw_unlock(&e->lock);
490
491	if (!qstate->return_msg) { /* Failed allocation or expired TTL */
492		return 0;
493	}
494
495	if (sq->subnet_downstream) { /* relay to interested client */
496		sq->ecs_client_out.subnet_scope_mask = scope;
497		sq->ecs_client_out.subnet_addr_fam = ecs->subnet_addr_fam;
498		sq->ecs_client_out.subnet_source_mask = ecs->subnet_source_mask;
499		memcpy(&sq->ecs_client_out.subnet_addr, &ecs->subnet_addr,
500			INET6_SIZE);
501		sq->ecs_client_out.subnet_validdata = 1;
502	}
503
504	if (prefetch && *qstate->env->now >= ((struct reply_info *)node->elem)->prefetch_ttl) {
505		qstate->need_refetch = 1;
506	}
507	return 1;
508}
509
510/**
511 * Test first bits of addresses for equality. Caller is responsible
512 * for making sure that both a and b are at least net/8 octets long.
513 * @param a: first address.
514 * @param a: seconds address.
515 * @param net: Number of bits to test.
516 * @return: 1 if equal, 0 otherwise.
517 */
518static int
519common_prefix(uint8_t *a, uint8_t *b, uint8_t net)
520{
521	size_t n = (size_t)net / 8;
522	return !memcmp(a, b, n) && ((net % 8) == 0 || a[n] == b[n]);
523}
524
525static enum module_ext_state
526eval_response(struct module_qstate *qstate, int id, struct subnet_qstate *sq)
527{
528	struct subnet_env *sne = qstate->env->modinfo[id];
529
530	struct ecs_data *c_in  = &sq->ecs_client_in; /* rcvd from client */
531	struct ecs_data *c_out = &sq->ecs_client_out;/* will send to client */
532	struct ecs_data *s_in  = &sq->ecs_server_in; /* rcvd from auth */
533	struct ecs_data *s_out = &sq->ecs_server_out;/* sent to auth */
534
535	memset(c_out, 0, sizeof(*c_out));
536
537	if (!qstate->return_msg) {
538		/* already an answer and its not a message, but retain
539		 * the actual rcode, instead of module_error, so send
540		 * module_finished */
541		return module_finished;
542	}
543
544	/* We have not asked for subnet data */
545	if (!sq->subnet_sent && !sq->subnet_sent_no_subnet) {
546		if (s_in->subnet_validdata)
547			verbose(VERB_QUERY, "subnetcache: received spurious data");
548		if (sq->subnet_downstream) /* Copy back to client */
549			cp_edns_bad_response(c_out, c_in);
550		return module_finished;
551	}
552
553	/* subnet sent but nothing came back */
554	if (!s_in->subnet_validdata && !sq->subnet_sent_no_subnet) {
555		/* The authority indicated no support for edns subnet. As a
556		 * consequence the answer ended up in the regular cache. It
557		 * is still useful to put it in the edns subnet cache for
558		 * when a client explicitly asks for subnet specific answer. */
559		verbose(VERB_QUERY, "subnetcache: Authority indicates no support");
560		if(!sq->started_no_cache_store) {
561			lock_rw_wrlock(&sne->biglock);
562			update_cache(qstate, id);
563			lock_rw_unlock(&sne->biglock);
564		}
565		if (sq->subnet_downstream)
566			cp_edns_bad_response(c_out, c_in);
567		return module_finished;
568	}
569
570	/* Purposefully there was no sent subnet, and there is consequently
571	 * no subnet in the answer. If there was, use the subnet in the answer
572	 * anyway. But if there is not, treat it as a prefix 0 answer. */
573	if(sq->subnet_sent_no_subnet && !s_in->subnet_validdata) {
574		/* Fill in 0.0.0.0/0 scope 0, or ::0/0 scope 0, for caching. */
575		s_in->subnet_addr_fam = s_out->subnet_addr_fam;
576		s_in->subnet_source_mask = 0;
577		s_in->subnet_scope_mask = 0;
578		memset(s_in->subnet_addr, 0, INET6_SIZE);
579		s_in->subnet_validdata = 1;
580	}
581
582	/* Being here means we have asked for and got a subnet specific
583	 * answer. Also, the answer from the authority is not yet cached
584	 * anywhere. */
585
586	/* can we accept response? */
587	if(s_out->subnet_addr_fam != s_in->subnet_addr_fam ||
588		s_out->subnet_source_mask != s_in->subnet_source_mask ||
589		!common_prefix(s_out->subnet_addr, s_in->subnet_addr,
590			s_out->subnet_source_mask))
591	{
592		/* we can not accept, restart query without option */
593		verbose(VERB_QUERY, "subnetcache: forged data");
594		s_out->subnet_validdata = 0;
595		(void)edns_opt_list_remove(&qstate->edns_opts_back_out,
596			qstate->env->cfg->client_subnet_opcode);
597		sq->subnet_sent = 0;
598		sq->subnet_sent_no_subnet = 0;
599		return module_restart_next;
600	}
601
602	lock_rw_wrlock(&sne->biglock);
603	if(!sq->started_no_cache_store) {
604		update_cache(qstate, id);
605	}
606	sne->num_msg_nocache++;
607	lock_rw_unlock(&sne->biglock);
608
609	/* If there is an expired answer in the global cache, remove that,
610	 * because expired answers would otherwise resurface once the ecs data
611	 * expires, giving once in a while global data responses for ecs
612	 * domains, with serve expired enabled. */
613	if(qstate->env->cfg->serve_expired) {
614		msg_cache_remove(qstate->env, qstate->qinfo.qname,
615			qstate->qinfo.qname_len, qstate->qinfo.qtype,
616			qstate->qinfo.qclass, 0);
617#ifdef USE_CACHEDB
618		if(qstate->env->cachedb_enabled)
619			cachedb_msg_remove(qstate);
620#endif
621	}
622
623	if (sq->subnet_downstream) {
624		/* Client wants to see the answer, echo option back
625		 * and adjust the scope. */
626		c_out->subnet_addr_fam = c_in->subnet_addr_fam;
627		c_out->subnet_source_mask = c_in->subnet_source_mask;
628		memcpy(&c_out->subnet_addr, &c_in->subnet_addr, INET6_SIZE);
629		c_out->subnet_scope_mask = sq->max_scope;
630		/* Limit scope returned to client to scope used for caching. */
631		if(c_out->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4) {
632			if(c_out->subnet_scope_mask >
633				qstate->env->cfg->max_client_subnet_ipv4) {
634				c_out->subnet_scope_mask =
635					qstate->env->cfg->max_client_subnet_ipv4;
636			}
637		}
638		else if(c_out->subnet_scope_mask >
639				qstate->env->cfg->max_client_subnet_ipv6) {
640				c_out->subnet_scope_mask =
641					qstate->env->cfg->max_client_subnet_ipv6;
642		}
643		c_out->subnet_validdata = 1;
644	}
645	return module_finished;
646}
647
648/** Parse EDNS opt data containing ECS */
649static int
650parse_subnet_option(struct edns_option* ecs_option, struct ecs_data* ecs)
651{
652	memset(ecs, 0, sizeof(*ecs));
653	if (ecs_option->opt_len < 4)
654		return 0;
655
656	ecs->subnet_addr_fam = sldns_read_uint16(ecs_option->opt_data);
657	ecs->subnet_source_mask = ecs_option->opt_data[2];
658	ecs->subnet_scope_mask = ecs_option->opt_data[3];
659	/* remaining bytes indicate address */
660
661	/* validate input*/
662	/* option length matches calculated length? */
663	if (ecs_option->opt_len != (size_t)((ecs->subnet_source_mask+7)/8 + 4))
664		return 0;
665	if (ecs_option->opt_len - 4 > INET6_SIZE || ecs_option->opt_len == 0)
666		return 0;
667	if (ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4) {
668		if (ecs->subnet_source_mask > 32 || ecs->subnet_scope_mask > 32)
669			return 0;
670	} else if (ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP6) {
671		if (ecs->subnet_source_mask > 128 ||
672			ecs->subnet_scope_mask > 128)
673			return 0;
674	} else
675		return 0;
676
677	/* valid ECS data, write to ecs_data */
678	if (copy_clear(ecs->subnet_addr, INET6_SIZE, ecs_option->opt_data + 4,
679		ecs_option->opt_len - 4, ecs->subnet_source_mask))
680		return 0;
681	ecs->subnet_validdata = 1;
682	return 1;
683}
684
685void
686subnet_option_from_ss(struct sockaddr_storage *ss, struct ecs_data* ecs,
687	struct config_file* cfg)
688{
689	void* sinaddr;
690
691	/* Construct subnet option from original query */
692	if(((struct sockaddr_in*)ss)->sin_family == AF_INET) {
693		ecs->subnet_source_mask = cfg->max_client_subnet_ipv4;
694		ecs->subnet_addr_fam = EDNSSUBNET_ADDRFAM_IP4;
695		sinaddr = &((struct sockaddr_in*)ss)->sin_addr;
696		if (!copy_clear( ecs->subnet_addr, INET6_SIZE,
697			(uint8_t *)sinaddr, INET_SIZE,
698			ecs->subnet_source_mask)) {
699			ecs->subnet_validdata = 1;
700		}
701	}
702#ifdef INET6
703	else {
704		ecs->subnet_source_mask = cfg->max_client_subnet_ipv6;
705		ecs->subnet_addr_fam = EDNSSUBNET_ADDRFAM_IP6;
706		sinaddr = &((struct sockaddr_in6*)ss)->sin6_addr;
707		if (!copy_clear( ecs->subnet_addr, INET6_SIZE,
708			(uint8_t *)sinaddr, INET6_SIZE,
709			ecs->subnet_source_mask)) {
710			ecs->subnet_validdata = 1;
711		}
712	}
713#else
714			/* We don't know how to handle ip6, just pass */
715#endif /* INET6 */
716}
717
718int
719ecs_query_response(struct module_qstate* qstate, struct dns_msg* response,
720	int id, void* ATTR_UNUSED(cbargs))
721{
722	struct subnet_qstate *sq;
723
724	if(!response || !(sq=(struct subnet_qstate*)qstate->minfo[id]))
725		return 1;
726
727	if(sq->subnet_sent &&
728		FLAGS_GET_RCODE(response->rep->flags) == LDNS_RCODE_REFUSED) {
729		/* REFUSED response to ECS query, remove ECS option. */
730		edns_opt_list_remove(&qstate->edns_opts_back_out,
731			qstate->env->cfg->client_subnet_opcode);
732		sq->subnet_sent = 0;
733		sq->subnet_sent_no_subnet = 0;
734		memset(&sq->ecs_server_out, 0, sizeof(sq->ecs_server_out));
735	} else if (!sq->track_max_scope &&
736		FLAGS_GET_RCODE(response->rep->flags) == LDNS_RCODE_NOERROR &&
737		response->rep->an_numrrsets > 0
738		) {
739		struct ub_packed_rrset_key* s = response->rep->rrsets[0];
740		if(ntohs(s->rk.type) == LDNS_RR_TYPE_CNAME &&
741			query_dname_compare(qstate->qinfo.qname,
742			s->rk.dname) == 0) {
743			/* CNAME response for QNAME. From now on keep track of
744			 * longest received ECS prefix for all queries on this
745			 * qstate. */
746			sq->track_max_scope = 1;
747		}
748	}
749	return 1;
750}
751
752/** verbose print edns subnet option in pretty print */
753static void
754subnet_log_print(const char* s, struct edns_option* ecs_opt)
755{
756	if(verbosity >= VERB_ALGO) {
757		char buf[256];
758		char* str = buf;
759		size_t str_len = sizeof(buf);
760		if(!ecs_opt) {
761			verbose(VERB_ALGO, "%s (null)", s);
762			return;
763		}
764		(void)sldns_wire2str_edns_subnet_print(&str, &str_len,
765			ecs_opt->opt_data, ecs_opt->opt_len);
766		verbose(VERB_ALGO, "%s %s", s, buf);
767	}
768}
769
770int
771ecs_edns_back_parsed(struct module_qstate* qstate, int id,
772	void* ATTR_UNUSED(cbargs))
773{
774	struct subnet_qstate *sq;
775	struct edns_option* ecs_opt;
776
777	if(!(sq=(struct subnet_qstate*)qstate->minfo[id]))
778		return 1;
779	if((ecs_opt = edns_opt_list_find(
780		qstate->edns_opts_back_in,
781		qstate->env->cfg->client_subnet_opcode)) &&
782		parse_subnet_option(ecs_opt, &sq->ecs_server_in) &&
783		sq->subnet_sent && sq->ecs_server_in.subnet_validdata) {
784			subnet_log_print("answer has edns subnet", ecs_opt);
785			/* Only skip global cache store if we sent an ECS option
786			 * and received one back. Answers from non-whitelisted
787			 * servers will end up in global cache. Answers for
788			 * queries with 0 source will not (unless nameserver
789			 * does not support ECS). */
790			qstate->no_cache_store = 1;
791			if(!sq->track_max_scope || (sq->track_max_scope &&
792				sq->ecs_server_in.subnet_scope_mask >
793				sq->max_scope))
794				sq->max_scope = sq->ecs_server_in.subnet_scope_mask;
795	} else if(sq->subnet_sent_no_subnet) {
796		/* The answer can be stored as scope 0, not in global cache. */
797		qstate->no_cache_store = 1;
798	}
799
800	return 1;
801}
802
803void
804subnetmod_operate(struct module_qstate *qstate, enum module_ev event,
805	int id, struct outbound_entry* outbound)
806{
807	struct subnet_env *sne = qstate->env->modinfo[id];
808	struct subnet_qstate *sq = (struct subnet_qstate*)qstate->minfo[id];
809
810	verbose(VERB_QUERY, "subnetcache[module %d] operate: extstate:%s "
811		"event:%s", id, strextstate(qstate->ext_state[id]),
812		strmodulevent(event));
813	log_query_info(VERB_QUERY, "subnetcache operate: query", &qstate->qinfo);
814
815	if((event == module_event_new || event == module_event_pass) &&
816		sq == NULL) {
817		struct edns_option* ecs_opt;
818		if(!subnet_new_qstate(qstate, id)) {
819			qstate->return_msg = NULL;
820			qstate->ext_state[id] = module_finished;
821			return;
822		}
823
824		sq = (struct subnet_qstate*)qstate->minfo[id];
825
826		if((ecs_opt = edns_opt_list_find(
827			qstate->edns_opts_front_in,
828			qstate->env->cfg->client_subnet_opcode))) {
829			if(!parse_subnet_option(ecs_opt, &sq->ecs_client_in)) {
830				/* Wrongly formatted ECS option. RFC mandates to
831				 * return FORMERROR. */
832				qstate->return_rcode = LDNS_RCODE_FORMERR;
833				qstate->ext_state[id] = module_finished;
834				return;
835			}
836			subnet_log_print("query has edns subnet", ecs_opt);
837			sq->subnet_downstream = 1;
838		}
839		else if(qstate->mesh_info->reply_list) {
840			subnet_option_from_ss(
841				&qstate->mesh_info->reply_list->query_reply.client_addr,
842				&sq->ecs_client_in, qstate->env->cfg);
843		}
844		else if(qstate->client_addr.ss_family != AF_UNSPEC) {
845			subnet_option_from_ss(
846				&qstate->client_addr,
847				&sq->ecs_client_in, qstate->env->cfg);
848		}
849
850		if(sq->ecs_client_in.subnet_validdata == 0) {
851			/* No clients are interested in result or we could not
852			 * parse it, we don't do client subnet */
853			sq->ecs_server_out.subnet_validdata = 0;
854			verbose(VERB_ALGO, "subnetcache: pass to next module");
855			qstate->ext_state[id] = module_wait_module;
856			return;
857		}
858
859		/* Limit to minimum allowed source mask */
860		if(sq->ecs_client_in.subnet_source_mask != 0 && (
861			(sq->ecs_client_in.subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4 &&
862			 sq->ecs_client_in.subnet_source_mask < qstate->env->cfg->min_client_subnet_ipv4) ||
863			(sq->ecs_client_in.subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP6 &&
864			 sq->ecs_client_in.subnet_source_mask < qstate->env->cfg->min_client_subnet_ipv6))) {
865				qstate->return_rcode = LDNS_RCODE_REFUSED;
866				qstate->ext_state[id] = module_finished;
867				return;
868		}
869
870		if(!sq->started_no_cache_lookup && !qstate->blacklist) {
871			lock_rw_wrlock(&sne->biglock);
872			if(qstate->mesh_info->reply_list &&
873				lookup_and_reply(qstate, id, sq,
874				qstate->env->cfg->prefetch)) {
875				sne->num_msg_cache++;
876				lock_rw_unlock(&sne->biglock);
877				verbose(VERB_QUERY, "subnetcache: answered from cache");
878				qstate->ext_state[id] = module_finished;
879
880				subnet_ecs_opt_list_append(&sq->ecs_client_out,
881					&qstate->edns_opts_front_out, qstate,
882					qstate->region);
883				if(verbosity >= VERB_ALGO) {
884					subnet_log_print("reply has edns subnet",
885						edns_opt_list_find(
886						qstate->edns_opts_front_out,
887						qstate->env->cfg->
888						client_subnet_opcode));
889				}
890				return;
891			}
892			lock_rw_unlock(&sne->biglock);
893		}
894
895		sq->ecs_server_out.subnet_addr_fam =
896			sq->ecs_client_in.subnet_addr_fam;
897		sq->ecs_server_out.subnet_source_mask =
898			sq->ecs_client_in.subnet_source_mask;
899		/* Limit source prefix to configured maximum */
900		if(sq->ecs_server_out.subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4
901			&& sq->ecs_server_out.subnet_source_mask >
902			qstate->env->cfg->max_client_subnet_ipv4)
903			sq->ecs_server_out.subnet_source_mask =
904				qstate->env->cfg->max_client_subnet_ipv4;
905		else if(sq->ecs_server_out.subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP6
906			&& sq->ecs_server_out.subnet_source_mask >
907			qstate->env->cfg->max_client_subnet_ipv6)
908			sq->ecs_server_out.subnet_source_mask =
909				qstate->env->cfg->max_client_subnet_ipv6;
910		/* Safe to copy completely, even if the source is limited by the
911		 * configuration. subnet_ecs_opt_list_append() will limit the address.
912		 * */
913		memcpy(&sq->ecs_server_out.subnet_addr,
914			sq->ecs_client_in.subnet_addr, INET6_SIZE);
915		sq->ecs_server_out.subnet_scope_mask = 0;
916		sq->ecs_server_out.subnet_validdata = 1;
917		if(sq->ecs_server_out.subnet_source_mask != 0 &&
918			qstate->env->cfg->client_subnet_always_forward &&
919			sq->subnet_downstream)
920			/* ECS specific data required, do not look at the global
921			 * cache in other modules. */
922			qstate->no_cache_lookup = 1;
923
924		/* pass request to next module */
925		verbose(VERB_ALGO,
926			"subnetcache: not found in cache. pass to next module");
927		qstate->ext_state[id] = module_wait_module;
928		return;
929	}
930	/* Query handed back by next module, we have a 'final' answer */
931	if(sq && event == module_event_moddone) {
932		qstate->ext_state[id] = eval_response(qstate, id, sq);
933		if(qstate->ext_state[id] == module_finished &&
934			qstate->return_msg) {
935			subnet_ecs_opt_list_append(&sq->ecs_client_out,
936				&qstate->edns_opts_front_out, qstate,
937				qstate->region);
938			if(verbosity >= VERB_ALGO) {
939				subnet_log_print("reply has edns subnet",
940					edns_opt_list_find(
941					qstate->edns_opts_front_out,
942					qstate->env->cfg->
943					client_subnet_opcode));
944			}
945		}
946		qstate->no_cache_store = sq->started_no_cache_store;
947		qstate->no_cache_lookup = sq->started_no_cache_lookup;
948		return;
949	}
950	if(sq && outbound) {
951		return;
952	}
953	/* We are being revisited */
954	if(event == module_event_pass || event == module_event_new) {
955		/* Just pass it on, we already did the work */
956		verbose(VERB_ALGO, "subnetcache: pass to next module");
957		qstate->ext_state[id] = module_wait_module;
958		return;
959	}
960	if(!sq && (event == module_event_moddone)) {
961		/* during priming, module done but we never started */
962		qstate->ext_state[id] = module_finished;
963		return;
964	}
965	log_err("subnetcache: bad event %s", strmodulevent(event));
966	qstate->ext_state[id] = module_error;
967	return;
968}
969
970void
971subnetmod_clear(struct module_qstate *ATTR_UNUSED(qstate),
972	int ATTR_UNUSED(id))
973{
974	/* qstate has no data outside region */
975}
976
977void
978subnetmod_inform_super(struct module_qstate *ATTR_UNUSED(qstate),
979	int ATTR_UNUSED(id), struct module_qstate *ATTR_UNUSED(super))
980{
981	/* Not used */
982}
983
984size_t
985subnetmod_get_mem(struct module_env *env, int id)
986{
987	struct subnet_env *sn_env = env->modinfo[id];
988	if (!sn_env) return 0;
989	return sizeof(*sn_env) +
990		slabhash_get_mem(sn_env->subnet_msg_cache) +
991		ecs_whitelist_get_mem(sn_env->whitelist);
992}
993
994/**
995 * The module function block
996 */
997static struct module_func_block subnetmod_block = {
998	"subnetcache", &subnetmod_init, &subnetmod_deinit, &subnetmod_operate,
999	&subnetmod_inform_super, &subnetmod_clear, &subnetmod_get_mem
1000};
1001
1002struct module_func_block*
1003subnetmod_get_funcblock(void)
1004{
1005	return &subnetmod_block;
1006}
1007
1008/** Wrappers for static functions to unit test */
1009size_t
1010unittest_wrapper_subnetmod_sizefunc(void *elemptr)
1011{
1012	return sizefunc(elemptr);
1013}
1014
1015#endif  /* CLIENT_SUBNET */
1016