stats.c revision 291767
1204076Spjd/* 2204076Spjd * daemon/stats.c - collect runtime performance indicators. 3220272Spjd * 4204076Spjd * Copyright (c) 2007, NLnet Labs. All rights reserved. 5204076Spjd * 6204076Spjd * This software is open source. 7204076Spjd * 8204076Spjd * Redistribution and use in source and binary forms, with or without 9204076Spjd * modification, are permitted provided that the following conditions 10204076Spjd * are met: 11204076Spjd * 12204076Spjd * Redistributions of source code must retain the above copyright notice, 13204076Spjd * this list of conditions and the following disclaimer. 14204076Spjd * 15204076Spjd * Redistributions in binary form must reproduce the above copyright notice, 16204076Spjd * this list of conditions and the following disclaimer in the documentation 17204076Spjd * and/or other materials provided with the distribution. 18204076Spjd * 19204076Spjd * Neither the name of the NLNET LABS nor the names of its contributors may 20204076Spjd * be used to endorse or promote products derived from this software without 21204076Spjd * specific prior written permission. 22204076Spjd * 23204076Spjd * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24204076Spjd * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25204076Spjd * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26204076Spjd * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27204076Spjd * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28204076Spjd * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29204076Spjd * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30204076Spjd * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31204076Spjd * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32204076Spjd * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33204076Spjd * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34204076Spjd */ 35204076Spjd 36204076Spjd/** 37204076Spjd * \file 38220272Spjd * 39220272Spjd * This file describes the data structure used to collect runtime performance 40204076Spjd * numbers. These 'statistics' may be of interest to the operator. 41204076Spjd */ 42220272Spjd#include "config.h" 43204076Spjd#ifdef HAVE_TIME_H 44218138Spjd#include <time.h> 45204076Spjd#endif 46204076Spjd#include <sys/time.h> 47204076Spjd#include <sys/types.h> 48204076Spjd#include "daemon/stats.h" 49211452Spjd#include "daemon/worker.h" 50204076Spjd#include "daemon/daemon.h" 51204076Spjd#include "services/mesh.h" 52220272Spjd#include "services/outside_network.h" 53220272Spjd#include "services/listen_dnsport.h" 54220272Spjd#include "util/config_file.h" 55220272Spjd#include "util/tube.h" 56220272Spjd#include "util/timehist.h" 57220272Spjd#include "util/net_help.h" 58220272Spjd#include "validator/validator.h" 59220272Spjd#include "sldns/sbuffer.h" 60220272Spjd#include "services/cache/rrset.h" 61220272Spjd#include "services/cache/infra.h" 62218194Spjd#include "validator/val_kcache.h" 63218194Spjd 64204076Spjd/** add timers and the values do not overflow or become negative */ 65218139Spjdstatic void 66218139Spjdtimeval_add(struct timeval* d, const struct timeval* add) 67218139Spjd{ 68218139Spjd#ifndef S_SPLINT_S 69218139Spjd d->tv_sec += add->tv_sec; 70218139Spjd d->tv_usec += add->tv_usec; 71218139Spjd if(d->tv_usec > 1000000) { 72218139Spjd d->tv_usec -= 1000000; 73218139Spjd d->tv_sec++; 74218139Spjd } 75218139Spjd#endif 76218139Spjd} 77218139Spjd 78218139Spjdvoid server_stats_init(struct server_stats* stats, struct config_file* cfg) 79218139Spjd{ 80218139Spjd memset(stats, 0, sizeof(*stats)); 81218139Spjd stats->extended = cfg->stat_extended; 82218139Spjd} 83218139Spjd 84218148Spjdvoid server_stats_querymiss(struct server_stats* stats, struct worker* worker) 85218139Spjd{ 86218139Spjd stats->num_queries_missed_cache++; 87218139Spjd stats->sum_query_list_size += worker->env.mesh->all.count; 88218139Spjd if(worker->env.mesh->all.count > stats->max_query_list_size) 89218139Spjd stats->max_query_list_size = worker->env.mesh->all.count; 90218139Spjd} 91218139Spjd 92218139Spjdvoid server_stats_prefetch(struct server_stats* stats, struct worker* worker) 93218194Spjd{ 94218139Spjd stats->num_queries_prefetch++; 95218194Spjd /* changes the query list size so account that, like a querymiss */ 96218194Spjd stats->sum_query_list_size += worker->env.mesh->all.count; 97220273Spjd if(worker->env.mesh->all.count > stats->max_query_list_size) 98218194Spjd stats->max_query_list_size = worker->env.mesh->all.count; 99218194Spjd} 100220270Spjd 101220270Spjdvoid server_stats_log(struct server_stats* stats, struct worker* worker, 102220270Spjd int threadnum) 103220270Spjd{ 104220270Spjd log_info("server stats for thread %d: %u queries, " 105220270Spjd "%u answers from cache, %u recursions, %u prefetch", 106220270Spjd threadnum, (unsigned)stats->num_queries, 107220270Spjd (unsigned)(stats->num_queries - 108220270Spjd stats->num_queries_missed_cache), 109220270Spjd (unsigned)stats->num_queries_missed_cache, 110220270Spjd (unsigned)stats->num_queries_prefetch); 111218194Spjd log_info("server stats for thread %d: requestlist max %u avg %g " 112218194Spjd "exceeded %u jostled %u", threadnum, 113218194Spjd (unsigned)stats->max_query_list_size, 114218194Spjd (stats->num_queries_missed_cache+stats->num_queries_prefetch)? 115218194Spjd (double)stats->sum_query_list_size/ 116218194Spjd (stats->num_queries_missed_cache+ 117220272Spjd stats->num_queries_prefetch) : 0.0, 118218194Spjd (unsigned)worker->env.mesh->stats_dropped, 119231017Strociny (unsigned)worker->env.mesh->stats_jostled); 120218194Spjd} 121218194Spjd 122220273Spjd/** get rrsets bogus number from validator */ 123220273Spjdstatic size_t 124220273Spjdget_rrset_bogus(struct worker* worker) 125220273Spjd{ 126220273Spjd int m = modstack_find(&worker->env.mesh->mods, "validator"); 127220273Spjd struct val_env* ve; 128220273Spjd size_t r; 129220273Spjd if(m == -1) 130220273Spjd return 0; 131220273Spjd ve = (struct val_env*)worker->env.modinfo[m]; 132220273Spjd lock_basic_lock(&ve->bogus_lock); 133220273Spjd r = ve->num_rrset_bogus; 134220273Spjd if(!worker->env.cfg->stat_cumulative) 135220273Spjd ve->num_rrset_bogus = 0; 136220273Spjd lock_basic_unlock(&ve->bogus_lock); 137220273Spjd return r; 138220273Spjd} 139220272Spjd 140220272Spjdvoid 141220272Spjdserver_stats_compile(struct worker* worker, struct stats_info* s, int reset) 142220272Spjd{ 143220272Spjd int i; 144220272Spjd struct listen_list* lp; 145220272Spjd 146220272Spjd s->svr = worker->stats; 147218194Spjd s->mesh_num_states = worker->env.mesh->all.count; 148218194Spjd s->mesh_num_reply_states = worker->env.mesh->num_reply_states; 149218194Spjd s->mesh_jostled = worker->env.mesh->stats_jostled; 150218194Spjd s->mesh_dropped = worker->env.mesh->stats_dropped; 151218194Spjd s->mesh_replies_sent = worker->env.mesh->replies_sent; 152220273Spjd s->mesh_replies_sum_wait = worker->env.mesh->replies_sum_wait; 153220273Spjd s->mesh_time_median = timehist_quartile(worker->env.mesh->histogram, 154220273Spjd 0.50); 155220273Spjd 156218194Spjd /* add in the values from the mesh */ 157218194Spjd s->svr.ans_secure += worker->env.mesh->ans_secure; 158218194Spjd s->svr.ans_bogus += worker->env.mesh->ans_bogus; 159218194Spjd s->svr.ans_rcode_nodata += worker->env.mesh->ans_nodata; 160218194Spjd for(i=0; i<16; i++) 161218194Spjd s->svr.ans_rcode[i] += worker->env.mesh->ans_rcode[i]; 162218194Spjd timehist_export(worker->env.mesh->histogram, s->svr.hist, 163218194Spjd NUM_BUCKETS_HIST); 164218194Spjd /* values from outside network */ 165218139Spjd s->svr.unwanted_replies = worker->back->unwanted_replies; 166218139Spjd s->svr.qtcp_outgoing = worker->back->num_tcp_outgoing; 167218139Spjd 168218139Spjd /* get and reset validator rrset bogus number */ 169218139Spjd s->svr.rrset_bogus = get_rrset_bogus(worker); 170218139Spjd 171218139Spjd /* get cache sizes */ 172218139Spjd s->svr.msg_cache_count = count_slabhash_entries(worker->env.msg_cache); 173218139Spjd s->svr.rrset_cache_count = count_slabhash_entries(&worker->env.rrset_cache->table); 174218139Spjd s->svr.infra_cache_count = count_slabhash_entries(worker->env.infra_cache->hosts); 175218139Spjd if(worker->env.key_cache) 176218139Spjd s->svr.key_cache_count = count_slabhash_entries(worker->env.key_cache->slab); 177218139Spjd else s->svr.key_cache_count = 0; 178218139Spjd 179218139Spjd /* get tcp accept usage */ 180218139Spjd s->svr.tcp_accept_usage = 0; 181218139Spjd for(lp = worker->front->cps; lp; lp = lp->next) { 182218139Spjd if(lp->com->type == comm_tcp_accept) 183218148Spjd s->svr.tcp_accept_usage += lp->com->cur_tcp_count; 184218148Spjd } 185218194Spjd 186218148Spjd if(reset && !worker->env.cfg->stat_cumulative) { 187218139Spjd worker_stats_clear(worker); 188218148Spjd } 189218139Spjd} 190218148Spjd 191218139Spjdvoid server_stats_obtain(struct worker* worker, struct worker* who, 192218194Spjd struct stats_info* s, int reset) 193218194Spjd{ 194218194Spjd uint8_t *reply = NULL; 195218194Spjd uint32_t len = 0; 196218194Spjd if(worker == who) { 197218194Spjd /* just fill it in */ 198218194Spjd server_stats_compile(worker, s, reset); 199220270Spjd return; 200220270Spjd } 201220270Spjd /* communicate over tube */ 202220270Spjd verbose(VERB_ALGO, "write stats cmd"); 203220270Spjd if(reset) 204220270Spjd worker_send_cmd(who, worker_cmd_stats); 205220270Spjd else worker_send_cmd(who, worker_cmd_stats_noreset); 206220270Spjd verbose(VERB_ALGO, "wait for stats reply"); 207220270Spjd if(!tube_read_msg(worker->cmd, &reply, &len, 0)) 208220270Spjd fatal_exit("failed to read stats over cmd channel"); 209220270Spjd if(len != (uint32_t)sizeof(*s)) 210218194Spjd fatal_exit("stats on cmd channel wrong length %d %d", 211218194Spjd (int)len, (int)sizeof(*s)); 212218194Spjd memcpy(s, reply, (size_t)len); 213218194Spjd free(reply); 214223143Ssobomax} 215223143Ssobomax 216220272Spjdvoid server_stats_reply(struct worker* worker, int reset) 217218194Spjd{ 218231017Strociny struct stats_info s; 219220272Spjd server_stats_compile(worker, &s, reset); 220220272Spjd verbose(VERB_ALGO, "write stats replymsg"); 221220272Spjd if(!tube_write_msg(worker->daemon->workers[0]->cmd, 222220272Spjd (uint8_t*)&s, sizeof(s), 0)) 223220272Spjd fatal_exit("could not write stat values over cmd channel"); 224220272Spjd} 225220272Spjd 226220272Spjdvoid server_stats_add(struct stats_info* total, struct stats_info* a) 227218194Spjd{ 228220272Spjd total->svr.num_queries += a->svr.num_queries; 229218194Spjd total->svr.num_queries_missed_cache += a->svr.num_queries_missed_cache; 230218194Spjd total->svr.num_queries_prefetch += a->svr.num_queries_prefetch; 231218194Spjd total->svr.sum_query_list_size += a->svr.sum_query_list_size; 232218194Spjd /* the max size reached is upped to higher of both */ 233 if(a->svr.max_query_list_size > total->svr.max_query_list_size) 234 total->svr.max_query_list_size = a->svr.max_query_list_size; 235 236 if(a->svr.extended) { 237 int i; 238 total->svr.qtype_big += a->svr.qtype_big; 239 total->svr.qclass_big += a->svr.qclass_big; 240 total->svr.qtcp += a->svr.qtcp; 241 total->svr.qtcp_outgoing += a->svr.qtcp_outgoing; 242 total->svr.qipv6 += a->svr.qipv6; 243 total->svr.qbit_QR += a->svr.qbit_QR; 244 total->svr.qbit_AA += a->svr.qbit_AA; 245 total->svr.qbit_TC += a->svr.qbit_TC; 246 total->svr.qbit_RD += a->svr.qbit_RD; 247 total->svr.qbit_RA += a->svr.qbit_RA; 248 total->svr.qbit_Z += a->svr.qbit_Z; 249 total->svr.qbit_AD += a->svr.qbit_AD; 250 total->svr.qbit_CD += a->svr.qbit_CD; 251 total->svr.qEDNS += a->svr.qEDNS; 252 total->svr.qEDNS_DO += a->svr.qEDNS_DO; 253 total->svr.ans_rcode_nodata += a->svr.ans_rcode_nodata; 254 total->svr.ans_secure += a->svr.ans_secure; 255 total->svr.ans_bogus += a->svr.ans_bogus; 256 total->svr.rrset_bogus += a->svr.rrset_bogus; 257 total->svr.unwanted_replies += a->svr.unwanted_replies; 258 total->svr.unwanted_queries += a->svr.unwanted_queries; 259 total->svr.tcp_accept_usage += a->svr.tcp_accept_usage; 260 for(i=0; i<STATS_QTYPE_NUM; i++) 261 total->svr.qtype[i] += a->svr.qtype[i]; 262 for(i=0; i<STATS_QCLASS_NUM; i++) 263 total->svr.qclass[i] += a->svr.qclass[i]; 264 for(i=0; i<STATS_OPCODE_NUM; i++) 265 total->svr.qopcode[i] += a->svr.qopcode[i]; 266 for(i=0; i<STATS_RCODE_NUM; i++) 267 total->svr.ans_rcode[i] += a->svr.ans_rcode[i]; 268 for(i=0; i<NUM_BUCKETS_HIST; i++) 269 total->svr.hist[i] += a->svr.hist[i]; 270 } 271 272 total->mesh_num_states += a->mesh_num_states; 273 total->mesh_num_reply_states += a->mesh_num_reply_states; 274 total->mesh_jostled += a->mesh_jostled; 275 total->mesh_dropped += a->mesh_dropped; 276 total->mesh_replies_sent += a->mesh_replies_sent; 277 timeval_add(&total->mesh_replies_sum_wait, &a->mesh_replies_sum_wait); 278 /* the medians are averaged together, this is not as accurate as 279 * taking the median over all of the data, but is good and fast 280 * added up here, division later*/ 281 total->mesh_time_median += a->mesh_time_median; 282} 283 284void server_stats_insquery(struct server_stats* stats, struct comm_point* c, 285 uint16_t qtype, uint16_t qclass, struct edns_data* edns, 286 struct comm_reply* repinfo) 287{ 288 uint16_t flags = sldns_buffer_read_u16_at(c->buffer, 2); 289 if(qtype < STATS_QTYPE_NUM) 290 stats->qtype[qtype]++; 291 else stats->qtype_big++; 292 if(qclass < STATS_QCLASS_NUM) 293 stats->qclass[qclass]++; 294 else stats->qclass_big++; 295 stats->qopcode[ LDNS_OPCODE_WIRE(sldns_buffer_begin(c->buffer)) ]++; 296 if(c->type != comm_udp) 297 stats->qtcp++; 298 if(repinfo && addr_is_ip6(&repinfo->addr, repinfo->addrlen)) 299 stats->qipv6++; 300 if( (flags&BIT_QR) ) 301 stats->qbit_QR++; 302 if( (flags&BIT_AA) ) 303 stats->qbit_AA++; 304 if( (flags&BIT_TC) ) 305 stats->qbit_TC++; 306 if( (flags&BIT_RD) ) 307 stats->qbit_RD++; 308 if( (flags&BIT_RA) ) 309 stats->qbit_RA++; 310 if( (flags&BIT_Z) ) 311 stats->qbit_Z++; 312 if( (flags&BIT_AD) ) 313 stats->qbit_AD++; 314 if( (flags&BIT_CD) ) 315 stats->qbit_CD++; 316 if(edns->edns_present) { 317 stats->qEDNS++; 318 if( (edns->bits & EDNS_DO) ) 319 stats->qEDNS_DO++; 320 } 321} 322 323void server_stats_insrcode(struct server_stats* stats, sldns_buffer* buf) 324{ 325 if(stats->extended && sldns_buffer_limit(buf) != 0) { 326 int r = (int)LDNS_RCODE_WIRE( sldns_buffer_begin(buf) ); 327 stats->ans_rcode[r] ++; 328 if(r == 0 && LDNS_ANCOUNT( sldns_buffer_begin(buf) ) == 0) 329 stats->ans_rcode_nodata ++; 330 } 331} 332