1/*
2 * services/mesh.h - deal with mesh of query states and handle events for that.
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36/**
37 * \file
38 *
39 * This file contains functions to assist in dealing with a mesh of
40 * query states. This mesh is supposed to be thread-specific.
41 * It consists of query states (per qname, qtype, qclass) and connections
42 * between query states and the super and subquery states, and replies to
43 * send back to clients.
44 */
45
46#ifndef SERVICES_MESH_H
47#define SERVICES_MESH_H
48
49#include "util/rbtree.h"
50#include "util/netevent.h"
51#include "util/data/msgparse.h"
52#include "util/module.h"
53#include "services/modstack.h"
54struct sldns_buffer;
55struct mesh_state;
56struct mesh_reply;
57struct mesh_cb;
58struct query_info;
59struct reply_info;
60struct outbound_entry;
61struct timehist;
62
63/**
64 * Maximum number of mesh state activations. Any more is likely an
65 * infinite loop in the module. It is then terminated.
66 */
67#define MESH_MAX_ACTIVATION 3000
68
69/**
70 * Max number of references-to-references-to-references.. search size.
71 * Any more is treated like 'too large', and the creation of a new
72 * dependency is failed (so that no loops can be created).
73 */
74#define MESH_MAX_SUBSUB 1024
75
76/**
77 * Mesh of query states
78 */
79struct mesh_area {
80	/** active module stack */
81	struct module_stack mods;
82	/** environment for new states */
83	struct module_env* env;
84
85	/** set of runnable queries (mesh_state.run_node) */
86	rbtree_t run;
87	/** rbtree of all current queries (mesh_state.node)*/
88	rbtree_t all;
89
90	/** count of the total number of mesh_reply entries */
91	size_t num_reply_addrs;
92	/** count of the number of mesh_states that have mesh_replies
93	 * Because a state can send results to multiple reply addresses,
94	 * this number must be equal or lower than num_reply_addrs. */
95	size_t num_reply_states;
96	/** number of mesh_states that have no mesh_replies, and also
97	 * an empty set of super-states, thus are 'toplevel' or detached
98	 * internal opportunistic queries */
99	size_t num_detached_states;
100	/** number of reply states in the forever list */
101	size_t num_forever_states;
102
103	/** max total number of reply states to have */
104	size_t max_reply_states;
105	/** max forever number of reply states to have */
106	size_t max_forever_states;
107
108	/** stats, cumulative number of reply states jostled out */
109	size_t stats_jostled;
110	/** stats, cumulative number of incoming client msgs dropped */
111	size_t stats_dropped;
112	/** number of replies sent */
113	size_t replies_sent;
114	/** sum of waiting times for the replies */
115	struct timeval replies_sum_wait;
116	/** histogram of time values */
117	struct timehist* histogram;
118	/** (extended stats) secure replies */
119	size_t ans_secure;
120	/** (extended stats) bogus replies */
121	size_t ans_bogus;
122	/** (extended stats) rcodes in replies */
123	size_t ans_rcode[16];
124	/** (extended stats) rcode nodata in replies */
125	size_t ans_nodata;
126
127	/** backup of query if other operations recurse and need the
128	 * network buffers */
129	struct sldns_buffer* qbuf_bak;
130
131	/** double linked list of the run-to-completion query states.
132	 * These are query states with a reply */
133	struct mesh_state* forever_first;
134	/** last entry in run forever list */
135	struct mesh_state* forever_last;
136
137	/** double linked list of the query states that can be jostled out
138	 * by new queries if too old.  These are query states with a reply */
139	struct mesh_state* jostle_first;
140	/** last entry in jostle list - this is the entry that is newest */
141	struct mesh_state* jostle_last;
142	/** timeout for jostling. if age is lower, it does not get jostled. */
143	struct timeval jostle_max;
144};
145
146/**
147 * A mesh query state
148 * Unique per qname, qtype, qclass (from the qstate).
149 * And RD / CD flag; in case a client turns it off.
150 * And priming queries are different from ordinary queries (because of hints).
151 *
152 * The entire structure is allocated in a region, this region is the qstate
153 * region. All parts (rbtree nodes etc) are also allocated in the region.
154 */
155struct mesh_state {
156	/** node in mesh_area all tree, key is this struct. Must be first. */
157	rbnode_t node;
158	/** node in mesh_area runnable tree, key is this struct */
159	rbnode_t run_node;
160	/** the query state. Note that the qinfo and query_flags
161	 * may not change. */
162	struct module_qstate s;
163	/** the list of replies to clients for the results */
164	struct mesh_reply* reply_list;
165	/** the list of callbacks for the results */
166	struct mesh_cb* cb_list;
167	/** set of superstates (that want this state's result)
168	 * contains struct mesh_state_ref* */
169	rbtree_t super_set;
170	/** set of substates (that this state needs to continue)
171	 * contains struct mesh_state_ref* */
172	rbtree_t sub_set;
173	/** number of activations for the mesh state */
174	size_t num_activated;
175
176	/** previous in linked list for reply states */
177	struct mesh_state* prev;
178	/** next in linked list for reply states */
179	struct mesh_state* next;
180	/** if this state is in the forever list, jostle list, or neither */
181	enum mesh_list_select { mesh_no_list, mesh_forever_list,
182		mesh_jostle_list } list_select;
183
184	/** true if replies have been sent out (at end for alignment) */
185	uint8_t replies_sent;
186};
187
188/**
189 * Rbtree reference to a mesh_state.
190 * Used in super_set and sub_set.
191 */
192struct mesh_state_ref {
193	/** node in rbtree for set, key is this structure */
194	rbnode_t node;
195	/** the mesh state */
196	struct mesh_state* s;
197};
198
199/**
200 * Reply to a client
201 */
202struct mesh_reply {
203	/** next in reply list */
204	struct mesh_reply* next;
205	/** the query reply destination, packet buffer and where to send. */
206	struct comm_reply query_reply;
207	/** edns data from query */
208	struct edns_data edns;
209	/** the time when request was entered */
210	struct timeval start_time;
211	/** id of query, in network byteorder. */
212	uint16_t qid;
213	/** flags of query, for reply flags */
214	uint16_t qflags;
215	/** qname from this query. len same as mesh qinfo. */
216	uint8_t* qname;
217};
218
219/**
220 * Mesh result callback func.
221 * called as func(cb_arg, rcode, buffer_with_reply, security, why_bogus);
222 */
223typedef void (*mesh_cb_func_t)(void*, int, struct sldns_buffer*, enum sec_status,
224	char*);
225
226/**
227 * Callback to result routine
228 */
229struct mesh_cb {
230	/** next in list */
231	struct mesh_cb* next;
232	/** edns data from query */
233	struct edns_data edns;
234	/** id of query, in network byteorder. */
235	uint16_t qid;
236	/** flags of query, for reply flags */
237	uint16_t qflags;
238	/** buffer for reply */
239	struct sldns_buffer* buf;
240
241	/** callback routine for results. if rcode != 0 buf has message.
242	 * called as cb(cb_arg, rcode, buf, sec_state);
243	 */
244	mesh_cb_func_t cb;
245	/** user arg for callback */
246	void* cb_arg;
247};
248
249/* ------------------- Functions for worker -------------------- */
250
251/**
252 * Allocate mesh, to empty.
253 * @param stack: module stack to activate, copied (as readonly reference).
254 * @param env: environment for new queries.
255 * @return mesh: the new mesh or NULL on error.
256 */
257struct mesh_area* mesh_create(struct module_stack* stack,
258	struct module_env* env);
259
260/**
261 * Delete mesh, and all query states and replies in it.
262 * @param mesh: the mesh to delete.
263 */
264void mesh_delete(struct mesh_area* mesh);
265
266/**
267 * New query incoming from clients. Create new query state if needed, and
268 * add mesh_reply to it. Returns error to client on malloc failures.
269 * Will run the mesh area queries to process if a new query state is created.
270 *
271 * @param mesh: the mesh.
272 * @param qinfo: query from client.
273 * @param qflags: flags from client query.
274 * @param edns: edns data from client query.
275 * @param rep: where to reply to.
276 * @param qid: query id to reply with.
277 */
278void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo,
279	uint16_t qflags, struct edns_data* edns, struct comm_reply* rep,
280	uint16_t qid);
281
282/**
283 * New query with callback. Create new query state if needed, and
284 * add mesh_cb to it.
285 * Will run the mesh area queries to process if a new query state is created.
286 *
287 * @param mesh: the mesh.
288 * @param qinfo: query from client.
289 * @param qflags: flags from client query.
290 * @param edns: edns data from client query.
291 * @param buf: buffer for reply contents.
292 * @param qid: query id to reply with.
293 * @param cb: callback function.
294 * @param cb_arg: callback user arg.
295 * @return 0 on error.
296 */
297int mesh_new_callback(struct mesh_area* mesh, struct query_info* qinfo,
298	uint16_t qflags, struct edns_data* edns, struct sldns_buffer* buf,
299	uint16_t qid, mesh_cb_func_t cb, void* cb_arg);
300
301/**
302 * New prefetch message. Create new query state if needed.
303 * Will run the mesh area queries to process if a new query state is created.
304 *
305 * @param mesh: the mesh.
306 * @param qinfo: query from client.
307 * @param qflags: flags from client query.
308 * @param leeway: TTL leeway what to expire earlier for this update.
309 */
310void mesh_new_prefetch(struct mesh_area* mesh, struct query_info* qinfo,
311	uint16_t qflags, time_t leeway);
312
313/**
314 * Handle new event from the wire. A serviced query has returned.
315 * The query state will be made runnable, and the mesh_area will process
316 * query states until processing is complete.
317 *
318 * @param mesh: the query mesh.
319 * @param e: outbound entry, with query state to run and reply pointer.
320 * @param reply: the comm point reply info.
321 * @param what: NETEVENT_* error code (if not 0, what is wrong, TIMEOUT).
322 */
323void mesh_report_reply(struct mesh_area* mesh, struct outbound_entry* e,
324	struct comm_reply* reply, int what);
325
326/* ------------------- Functions for module environment --------------- */
327
328/**
329 * Detach-subqueries.
330 * Remove all sub-query references from this query state.
331 * Keeps super-references of those sub-queries correct.
332 * Updates stat items in mesh_area structure.
333 * @param qstate: used to find mesh state.
334 */
335void mesh_detach_subs(struct module_qstate* qstate);
336
337/**
338 * Attach subquery.
339 * Creates it if it does not exist already.
340 * Keeps sub and super references correct.
341 * Performs a cycle detection - for double check - and fails if there is one.
342 * Also fails if the sub-sub-references become too large.
343 * Updates stat items in mesh_area structure.
344 * Pass if it is priming query or not.
345 * return:
346 * 	o if error (malloc) happened.
347 * 	o need to initialise the new state (module init; it is a new state).
348 * 	  so that the next run of the query with this module is successful.
349 * 	o no init needed, attachment successful.
350 *
351 * @param qstate: the state to find mesh state, and that wants to receive
352 * 	the results from the new subquery.
353 * @param qinfo: what to query for (copied).
354 * @param qflags: what flags to use (RD / CD flag or not).
355 * @param prime: if it is a (stub) priming query.
356 * @param newq: If the new subquery needs initialisation, it is returned,
357 * 	otherwise NULL is returned.
358 * @return: false on error, true if success (and init may be needed).
359 */
360int mesh_attach_sub(struct module_qstate* qstate, struct query_info* qinfo,
361	uint16_t qflags, int prime, struct module_qstate** newq);
362
363/**
364 * Query state is done, send messages to reply entries.
365 * Encode messages using reply entry values and the querystate (with original
366 * qinfo), using given reply_info.
367 * Pass errcode != 0 if an error reply is needed.
368 * If no reply entries, nothing is done.
369 * Must be called before a module can module_finished or return module_error.
370 * The module must handle the super query states itself as well.
371 *
372 * @param mstate: mesh state that is done. return_rcode and return_msg
373 * 	are used for replies.
374 * 	return_rcode: if not 0 (NOERROR) an error is sent back (and
375 * 		return_msg is ignored).
376 * 	return_msg: reply to encode and send back to clients.
377 */
378void mesh_query_done(struct mesh_state* mstate);
379
380/**
381 * Call inform_super for the super query states that are interested in the
382 * results from this query state. These can then be changed for error
383 * or results.
384 * Called when a module is module_finished or returns module_error.
385 * The super query states become runnable with event module_event_pass,
386 * it calls the current module for the super with the inform_super event.
387 *
388 * @param mesh: mesh area to add newly runnable modules to.
389 * @param mstate: the state that has results, used to find mesh state.
390 */
391void mesh_walk_supers(struct mesh_area* mesh, struct mesh_state* mstate);
392
393/**
394 * Delete mesh state, cleanup and also rbtrees and so on.
395 * Will detach from all super/subnodes.
396 * @param qstate: to remove.
397 */
398void mesh_state_delete(struct module_qstate* qstate);
399
400/* ------------------- Functions for mesh -------------------- */
401
402/**
403 * Create and initialize a new mesh state and its query state
404 * Does not put the mesh state into rbtrees and so on.
405 * @param env: module environment to set.
406 * @param qinfo: query info that the mesh is for.
407 * @param qflags: flags for query (RD / CD flag).
408 * @param prime: if true, it is a priming query, set is_priming on mesh state.
409 * @return: new mesh state or NULL on allocation error.
410 */
411struct mesh_state* mesh_state_create(struct module_env* env,
412	struct query_info* qinfo, uint16_t qflags, int prime);
413
414/**
415 * Cleanup a mesh state and its query state. Does not do rbtree or
416 * reference cleanup.
417 * @param mstate: mesh state to cleanup. Its pointer may no longer be used
418 * 	afterwards. Cleanup rbtrees before calling this function.
419 */
420void mesh_state_cleanup(struct mesh_state* mstate);
421
422/**
423 * Delete all mesh states from the mesh.
424 * @param mesh: the mesh area to clear
425 */
426void mesh_delete_all(struct mesh_area* mesh);
427
428/**
429 * Find a mesh state in the mesh area. Pass relevant flags.
430 *
431 * @param mesh: the mesh area to look in.
432 * @param qinfo: what query
433 * @param qflags: if RD / CD bit is set or not.
434 * @param prime: if it is a priming query.
435 * @return: mesh state or NULL if not found.
436 */
437struct mesh_state* mesh_area_find(struct mesh_area* mesh,
438	struct query_info* qinfo, uint16_t qflags, int prime);
439
440/**
441 * Setup attachment super/sub relation between super and sub mesh state.
442 * The relation must not be present when calling the function.
443 * Does not update stat items in mesh_area.
444 * @param super: super state.
445 * @param sub: sub state.
446 * @return: 0 on alloc error.
447 */
448int mesh_state_attachment(struct mesh_state* super, struct mesh_state* sub);
449
450/**
451 * Create new reply structure and attach it to a mesh state.
452 * Does not update stat items in mesh area.
453 * @param s: the mesh state.
454 * @param edns: edns data for reply (bufsize).
455 * @param rep: comm point reply info.
456 * @param qid: ID of reply.
457 * @param qflags: original query flags.
458 * @param qname: original query name.
459 * @return: 0 on alloc error.
460 */
461int mesh_state_add_reply(struct mesh_state* s, struct edns_data* edns,
462	struct comm_reply* rep, uint16_t qid, uint16_t qflags, uint8_t* qname);
463
464/**
465 * Create new callback structure and attach it to a mesh state.
466 * Does not update stat items in mesh area.
467 * @param s: the mesh state.
468 * @param edns: edns data for reply (bufsize).
469 * @param buf: buffer for reply
470 * @param cb: callback to call with results.
471 * @param cb_arg: callback user arg.
472 * @param qid: ID of reply.
473 * @param qflags: original query flags.
474 * @return: 0 on alloc error.
475 */
476int mesh_state_add_cb(struct mesh_state* s, struct edns_data* edns,
477        struct sldns_buffer* buf, mesh_cb_func_t cb, void* cb_arg, uint16_t qid,
478	uint16_t qflags);
479
480/**
481 * Run the mesh. Run all runnable mesh states. Which can create new
482 * runnable mesh states. Until completion. Automatically called by
483 * mesh_report_reply and mesh_new_client as needed.
484 * @param mesh: mesh area.
485 * @param mstate: first mesh state to run.
486 * @param ev: event the mstate. Others get event_pass.
487 * @param e: if a reply, its outbound entry.
488 */
489void mesh_run(struct mesh_area* mesh, struct mesh_state* mstate,
490	enum module_ev ev, struct outbound_entry* e);
491
492/**
493 * Print some stats about the mesh to the log.
494 * @param mesh: the mesh to print it for.
495 * @param str: descriptive string to go with it.
496 */
497void mesh_stats(struct mesh_area* mesh, const char* str);
498
499/**
500 * Clear the stats that the mesh keeps (number of queries serviced)
501 * @param mesh: the mesh
502 */
503void mesh_stats_clear(struct mesh_area* mesh);
504
505/**
506 * Print all the states in the mesh to the log.
507 * @param mesh: the mesh to print all states of.
508 */
509void mesh_log_list(struct mesh_area* mesh);
510
511/**
512 * Calculate memory size in use by mesh and all queries inside it.
513 * @param mesh: the mesh to examine.
514 * @return size in bytes.
515 */
516size_t mesh_get_mem(struct mesh_area* mesh);
517
518/**
519 * Find cycle; see if the given mesh is in the targets sub, or sub-sub, ...
520 * trees.
521 * If the sub-sub structure is too large, it returns 'a cycle'=2.
522 * @param qstate: given mesh querystate.
523 * @param qinfo: query info for dependency.
524 * @param flags: query flags of dependency.
525 * @param prime: if dependency is a priming query or not.
526 * @return true if the name,type,class exists and the given qstate mesh exists
527 * 	as a dependency of that name. Thus if qstate becomes dependent on
528 * 	name,type,class then a cycle is created, this is return value 1.
529 * 	Too large to search is value 2 (also true).
530 */
531int mesh_detect_cycle(struct module_qstate* qstate, struct query_info* qinfo,
532	uint16_t flags, int prime);
533
534/** compare two mesh_states */
535int mesh_state_compare(const void* ap, const void* bp);
536
537/** compare two mesh references */
538int mesh_state_ref_compare(const void* ap, const void* bp);
539
540/**
541 * Make space for another recursion state for a reply in the mesh
542 * @param mesh: mesh area
543 * @param qbuf: query buffer to save if recursion is invoked to make space.
544 *    This buffer is necessary, because the following sequence in calls
545 *    can result in an overwrite of the incoming query:
546 *    delete_other_mesh_query - iter_clean - serviced_delete - waiting
547 *    udp query is sent - on error callback - callback sends SERVFAIL reply
548 *    over the same network channel, and shared UDP buffer is overwritten.
549 *    You can pass NULL if there is no buffer that must be backed up.
550 * @return false if no space is available.
551 */
552int mesh_make_new_space(struct mesh_area* mesh, struct sldns_buffer* qbuf);
553
554/**
555 * Insert mesh state into a double linked list.  Inserted at end.
556 * @param m: mesh state.
557 * @param fp: pointer to the first-elem-pointer of the list.
558 * @param lp: pointer to the last-elem-pointer of the list.
559 */
560void mesh_list_insert(struct mesh_state* m, struct mesh_state** fp,
561	struct mesh_state** lp);
562
563/**
564 * Remove mesh state from a double linked list.  Remove from any position.
565 * @param m: mesh state.
566 * @param fp: pointer to the first-elem-pointer of the list.
567 * @param lp: pointer to the last-elem-pointer of the list.
568 */
569void mesh_list_remove(struct mesh_state* m, struct mesh_state** fp,
570	struct mesh_state** lp);
571
572#endif /* SERVICES_MESH_H */
573