diff --git a/daemon/remote.c b/daemon/remote.c index 855e3b175..730d30f03 100644 --- a/daemon/remote.c +++ b/daemon/remote.c @@ -46,6 +46,7 @@ #include "daemon/remote.h" #include "daemon/worker.h" #include "daemon/daemon.h" +#include "daemon/stats.h" #include "util/log.h" #include "util/config_file.h" #include "util/net_help.h" @@ -73,6 +74,25 @@ log_crypto_err(const char* str) } } +/** divide sum of timers to get average */ +static void +timeval_divide(struct timeval* avg, struct timeval* sum, size_t d) +{ +#ifndef S_SPLINT_S + size_t leftover; + if(d == 0) { + avg->tv_sec = 0; + avg->tv_usec = 0; + return; + } + avg->tv_sec = sum->tv_sec / d; + avg->tv_usec = sum->tv_usec / d; + /* handle fraction from seconds divide */ + leftover = sum->tv_sec - avg->tv_sec*d; + avg->tv_usec += (leftover*1000000)/d; +#endif +} + struct daemon_remote* daemon_remote_create(struct worker* worker) { @@ -452,6 +472,16 @@ ssl_read_line(SSL* ssl, char* buf, size_t max) return 0; } +/** skip whitespace, return new pointer into string */ +static char* +skipwhite(char* str) +{ + /* EOS \0 is not a space */ + while( isspace(*str) ) + str++; + return str; +} + /** send the OK to the control client */ static void send_ok(SSL* ssl) { @@ -460,7 +490,7 @@ static void send_ok(SSL* ssl) /** do the stop command */ static void -do_stop(struct daemon_remote* rc, SSL* ssl) +do_stop(SSL* ssl, struct daemon_remote* rc) { rc->worker->need_to_exit = 1; comm_base_exit(rc->worker->base); @@ -469,25 +499,108 @@ do_stop(struct daemon_remote* rc, SSL* ssl) /** do the reload command */ static void -do_reload(struct daemon_remote* rc, SSL* ssl) +do_reload(SSL* ssl, struct daemon_remote* rc) { rc->worker->need_to_exit = 0; comm_base_exit(rc->worker->base); send_ok(ssl); } +/** do the verbosity command */ +static void +do_verbosity(SSL* ssl, char* str) +{ + int val = atoi(str); + if(val == 0 && strcmp(str, "0") != 0) { + ssl_printf(ssl, "error in verbosity number syntax: %s\n", str); + return; + } + verbosity = val; + send_ok(ssl); +} + +/** print stats from statinfo */ +static int +print_stats(SSL* ssl, char* nm, struct stats_info* s) +{ + struct timeval avg; + if(!ssl_printf(ssl, "%s.num.queries: %u\n", nm, + (unsigned)s->svr.num_queries)) return 0; + if(!ssl_printf(ssl, "%s.num.cachehits: %u\n", nm, + (unsigned)(s->svr.num_queries + - s->svr.num_queries_missed_cache))) return 0; + if(!ssl_printf(ssl, "%s.num.cachemiss: %u\n", nm, + (unsigned)s->svr.num_queries_missed_cache)) return 0; + if(!ssl_printf(ssl, "%s.num.recursivereplies: %u\n", nm, + (unsigned)s->mesh_replies_sent)) return 0; + if(!ssl_printf(ssl, "%s.requestlist.avg: %g\n", nm, + s->svr.num_queries_missed_cache? + (double)s->svr.sum_query_list_size/ + s->svr.num_queries_missed_cache : 0.0)) return 0; + if(!ssl_printf(ssl, "%s.requestlist.max: %u\n", nm, + (unsigned)s->svr.max_query_list_size)) return 0; + if(!ssl_printf(ssl, "%s.requestlist.overwritten: %u\n", nm, + (unsigned)s->mesh_jostled)) return 0; + if(!ssl_printf(ssl, "%s.requestlist.exceeded: %u\n", nm, + (unsigned)s->mesh_dropped)) return 0; + if(!ssl_printf(ssl, "%s.requestlist.current.all: %u\n", nm, + (unsigned)s->mesh_num_states)) return 0; + if(!ssl_printf(ssl, "%s.requestlist.current.user: %u\n", nm, + (unsigned)s->mesh_num_reply_states)) return 0; + timeval_divide(&avg, &s->mesh_replies_sum_wait, s->mesh_replies_sent); + if(!ssl_printf(ssl, "%s.recursion.time.avg: %d.%6.6d\n", nm, + (int)avg.tv_sec, (int)avg.tv_usec)) return 0; + if(!ssl_printf(ssl, "%s.recursion.time.median: %g\n", nm, + s->mesh_time_median)) return 0; + return 1; +} + +/** print stats for one thread */ +static int +print_thread_stats(SSL* ssl, int i, struct stats_info* s) +{ + char nm[16]; + snprintf(nm, sizeof(nm), "thread%d", i); + nm[sizeof(nm)-1]=0; + return print_stats(ssl, nm, s); +} + +/** do the stats command */ +static void +do_stats(SSL* ssl, struct daemon_remote* rc) +{ + struct daemon* daemon = rc->worker->daemon; + struct stats_info total; + struct stats_info s; + int i; + /* gather all thread statistics in one place */ + for(i=0; inum; i++) { + server_stats_obtain(rc->worker, daemon->workers[i], &s); + if(!print_thread_stats(ssl, i, &s)) + return; + if(i == 0) + total = s; + else server_stats_add(&total, &s); + } + /* print the thread statistics */ + total.mesh_time_median /= (double)daemon->num; + print_stats(ssl, "total", &total); +} + /** execute a remote control command */ static void execute_cmd(struct daemon_remote* rc, SSL* ssl, char* cmd) { - char* p = cmd; - /* skip whitespace */ - while( isspace(*p) ) p++; - /* compare command - check longer strings first */ + char* p = skipwhite(cmd); + /* compare command - check longer strings first in case of substrings*/ if(strncmp(p, "stop", 4) == 0) { - do_stop(rc, ssl); + do_stop(ssl, rc); } else if(strncmp(p, "reload", 6) == 0) { - do_reload(rc, ssl); + do_reload(ssl, rc); + } else if(strncmp(p, "verbosity", 9) == 0) { + do_verbosity(ssl, skipwhite(p+9)); + } else if(strncmp(p, "stats", 5) == 0) { + do_stats(ssl, rc); } else { (void)ssl_printf(ssl, "error unknown command '%s'\n", p); } diff --git a/daemon/stats.c b/daemon/stats.c index 8c2b917b4..8fa9d44b1 100644 --- a/daemon/stats.c +++ b/daemon/stats.c @@ -42,7 +42,25 @@ #include "config.h" #include "daemon/stats.h" #include "daemon/worker.h" +#include "daemon/daemon.h" #include "services/mesh.h" +#include "util/config_file.h" +#include "util/tube.h" +#include "util/timehist.h" + +/** add timers and the values do not overflow or become negative */ +static void +timeval_add(struct timeval* d, struct timeval* add) +{ +#ifndef S_SPLINT_S + d->tv_sec += add->tv_sec; + d->tv_usec += add->tv_usec; + while(d->tv_usec > 1000000 ) { + d->tv_usec -= 1000000; + d->tv_sec++; + } +#endif +} void server_stats_init(struct server_stats* stats) { @@ -57,7 +75,8 @@ void server_stats_querymiss(struct server_stats* stats, struct worker* worker) stats->max_query_list_size = worker->env.mesh->all.count; } -void server_stats_log(struct server_stats* stats, int threadnum) +void server_stats_log(struct server_stats* stats, struct worker* worker, + int threadnum) { log_info("server stats for thread %d: %u queries, " "%u answers from cache, %u recursions", @@ -70,5 +89,77 @@ void server_stats_log(struct server_stats* stats, int threadnum) stats->num_queries_missed_cache? (double)stats->sum_query_list_size/ stats->num_queries_missed_cache : 0.0, - (unsigned)stats->num_query_list_exceeded); + (unsigned)worker->env.mesh->stats_dropped); +} + +void +server_stats_compile(struct worker* worker, struct stats_info* s) +{ + s->svr = worker->stats; + s->mesh_num_states = worker->env.mesh->all.count; + s->mesh_num_reply_states = worker->env.mesh->num_reply_states; + s->mesh_jostled = worker->env.mesh->stats_jostled; + s->mesh_dropped = worker->env.mesh->stats_dropped; + s->mesh_replies_sent = worker->env.mesh->replies_sent; + s->mesh_replies_sum_wait = worker->env.mesh->replies_sum_wait; + s->mesh_time_median = timehist_quartile(worker->env.mesh->histogram, + 0.50); + if(!worker->env.cfg->stat_cumulative) { + server_stats_init(&worker->stats); + mesh_stats_clear(worker->env.mesh); + } +} + +void server_stats_obtain(struct worker* worker, struct worker* who, + struct stats_info* s) +{ + uint8_t *reply = NULL; + size_t len = 0; + if(worker == who) { + /* just fill it in */ + server_stats_compile(worker, s); + return; + } + /* communicate over tube */ + verbose(VERB_ALGO, "write stats cmd"); + worker_send_cmd(who, worker_cmd_stats); + verbose(VERB_ALGO, "wait for stats reply"); + if(!tube_read_msg(worker->cmd, &reply, &len, 0)) + fatal_exit("failed to read stats over cmd channel"); + if(len != sizeof(*s)) + fatal_exit("stats on cmd channel wrong length %d %d", + (int)len, (int)sizeof(*s)); + memcpy(s, reply, len); + free(reply); +} + +void server_stats_reply(struct worker* worker) +{ + struct stats_info s; + server_stats_compile(worker, &s); + verbose(VERB_ALGO, "write stats replymsg"); + if(!tube_write_msg(worker->daemon->workers[0]->cmd, + (uint8_t*)&s, sizeof(s), 0)) + fatal_exit("could not write stat values over cmd channel"); +} + +void server_stats_add(struct stats_info* total, struct stats_info* a) +{ + total->svr.num_queries += a->svr.num_queries; + total->svr.num_queries_missed_cache += a->svr.num_queries_missed_cache; + total->svr.sum_query_list_size += a->svr.sum_query_list_size; + /* the max size reached is upped to higher of both */ + if(a->svr.max_query_list_size > total->svr.max_query_list_size) + total->svr.max_query_list_size = a->svr.max_query_list_size; + + total->mesh_num_states += a->mesh_num_states; + total->mesh_num_reply_states += a->mesh_num_reply_states; + total->mesh_jostled += a->mesh_jostled; + total->mesh_dropped += a->mesh_dropped; + total->mesh_replies_sent += a->mesh_replies_sent; + timeval_add(&total->mesh_replies_sum_wait, &a->mesh_replies_sum_wait); + /* the medians are averaged together, this is not as accurate as + * taking the median over all of the data, but is good and fast + * added up here, division later*/ + total->mesh_time_median += a->mesh_time_median; } diff --git a/daemon/stats.h b/daemon/stats.h index a3f2bc674..cc0f6d91d 100644 --- a/daemon/stats.h +++ b/daemon/stats.h @@ -58,8 +58,30 @@ struct server_stats { size_t sum_query_list_size; /** max value of query list size reached. */ size_t max_query_list_size; - /** number of times that the query_list_size was insufficient */ - size_t num_query_list_exceeded; +}; + +/** + * Statistics to send over the control pipe when asked + * This struct is made to be memcpied, sent in binary. + */ +struct stats_info { + /** the thread stats */ + struct server_stats svr; + + /** mesh stats: current number of states */ + size_t mesh_num_states; + /** mesh stats: current number of reply (user) states */ + size_t mesh_num_reply_states; + /** mesh stats: number of reply states overwritten with a new one */ + size_t mesh_jostled; + /** mesh stats: number of incoming queries dropped */ + size_t mesh_dropped; + /** mesh stats: replies sent */ + size_t mesh_replies_sent; + /** mesh stats: sum of waiting times for the replies */ + struct timeval mesh_replies_sum_wait; + /** mesh stats: median of waiting times for replies (in sec) */ + double mesh_time_median; }; /** @@ -72,6 +94,37 @@ void server_stats_init(struct server_stats* stats); void server_stats_querymiss(struct server_stats* stats, struct worker* worker); /** display the stats to the log */ -void server_stats_log(struct server_stats* stats, int threadnum); +void server_stats_log(struct server_stats* stats, struct worker* worker, + int threadnum); + +/** + * Obtain the stats info for a given thread. Uses pipe to communicate. + * @param worker: the worker that is executing (the first worker). + * @param who: on who to get the statistics info. + * @param s: the stats block to fill in. + */ +void server_stats_obtain(struct worker* worker, struct worker* who, + struct stats_info* s); + +/** + * Compile stats into structure for this thread worker. + * Also clears the statistics counters (if that is set by config file). + * @param worker: the worker to compile stats for, also the executing worker. + * @param s: stats block. + */ +void server_stats_compile(struct worker* worker, struct stats_info* s); + +/** + * Send stats over comm tube in reply to query cmd + * @param worker: this worker. + */ +void server_stats_reply(struct worker* worker); + +/** + * Addup stat blocks. + * @param total: sum of the two entries. + * @param a: to add to it. + */ +void server_stats_add(struct stats_info* total, struct stats_info* a); #endif /* DAEMON_STATS_H */ diff --git a/daemon/worker.c b/daemon/worker.c index 83e0462ca..6ca3bca2c 100644 --- a/daemon/worker.c +++ b/daemon/worker.c @@ -194,7 +194,7 @@ worker_mem_report(struct worker* ATTR_UNUSED(worker), void worker_send_cmd(struct worker* worker, enum worker_commands cmd) { - uint32_t c = (uint32_t)cmd; + uint32_t c = (uint32_t)htonl(cmd); if(!tube_write_msg(worker->cmd, (uint8_t*)&c, sizeof(c), 0)) { log_err("worker send cmd %d failed", (int)cmd); } @@ -338,6 +338,10 @@ worker_handle_control_cmd(struct tube* ATTR_UNUSED(tube), uint8_t* msg, verbose(VERB_ALGO, "got control cmd quit"); comm_base_exit(worker->base); break; + case worker_cmd_stats: + verbose(VERB_ALGO, "got control cmd stats"); + server_stats_reply(worker); + break; default: log_err("bad command %d", (int)cmd); break; @@ -831,7 +835,7 @@ worker_handle_request(struct comm_point* c, void* arg, int error, /* protect our memory usage from storing reply addresses */ verbose(VERB_ALGO, "Too many requests queued. " "dropping incoming query."); - worker->stats.num_query_list_exceeded++; + worker->env.mesh->stats_dropped++; comm_point_drop_reply(repinfo); return 0; } @@ -895,7 +899,7 @@ worker_restart_timer(struct worker* worker) void worker_stat_timer_cb(void* arg) { struct worker* worker = (struct worker*)arg; - server_stats_log(&worker->stats, worker->thread_num); + server_stats_log(&worker->stats, worker, worker->thread_num); mesh_stats(worker->env.mesh, "mesh has"); worker_mem_report(worker, NULL); if(!worker->daemon->cfg->stat_cumulative) { @@ -921,12 +925,10 @@ worker_create(struct daemon* daemon, int id, int* ports, int n) } worker->daemon = daemon; worker->thread_num = id; - if(id != 0) { - if(!(worker->cmd = tube_create())) { - free(worker->ports); - free(worker); - return NULL; - } + if(!(worker->cmd = tube_create())) { + free(worker->ports); + free(worker); + return NULL; } return worker; } @@ -1011,14 +1013,12 @@ worker_init(struct worker* worker, struct config_file *cfg, worker_delete(worker); return 0; } - if(worker->thread_num != 0) { - /* start listening to commands */ - if(!tube_setup_bg_listen(worker->cmd, worker->base, - &worker_handle_control_cmd, worker)) { - log_err("could not create control compt."); - worker_delete(worker); - return 0; - } + /* start listening to commands */ + if(!tube_setup_bg_listen(worker->cmd, worker->base, + &worker_handle_control_cmd, worker)) { + log_err("could not create control compt."); + worker_delete(worker); + return 0; } worker->stat_timer = comm_timer_create(worker->base, worker_stat_timer_cb, worker); @@ -1082,7 +1082,7 @@ worker_delete(struct worker* worker) if(!worker) return; if(worker->env.mesh && verbosity >= VERB_OPS) { - server_stats_log(&worker->stats, worker->thread_num); + server_stats_log(&worker->stats, worker, worker->thread_num); mesh_stats(worker->env.mesh, "mesh has"); worker_mem_report(worker, NULL); } diff --git a/daemon/worker.h b/daemon/worker.h index 25801d16a..6beb607f8 100644 --- a/daemon/worker.h +++ b/daemon/worker.h @@ -64,7 +64,9 @@ struct daemon_remote; /** worker commands */ enum worker_commands { /** make the worker quit */ - worker_cmd_quit + worker_cmd_quit, + /** obtain statistics */ + worker_cmd_stats }; /** diff --git a/doc/Changelog b/doc/Changelog index f14071c43..e09d766f0 100644 --- a/doc/Changelog +++ b/doc/Changelog @@ -1,6 +1,8 @@ 15 September 2008: Wouter - working start, stop, reload commands for unbound-control. - test for unbound-control working; better exit value for control. + - verbosity control via unbound-control. + - unbound-control stats. 12 September 2008: Wouter - removed browser control mentions. Proto speccy. diff --git a/doc/control_proto_spec.txt b/doc/control_proto_spec.txt index cf9078622..1ad8d1cf9 100644 --- a/doc/control_proto_spec.txt +++ b/doc/control_proto_spec.txt @@ -25,6 +25,8 @@ stop stops the server. reload reloads the config file, and flushes the cache. +verbosity + Change logging verbosity to new value. stats output is a list of [name]=[value] lines. clears the counters. @@ -61,7 +63,5 @@ local_data local_data_add www.example.com. IN A 192.0.2.2 if no local_zone exists for it; a transparent zone with the same name as the data is created. -verbosity - Change logging verbosity to new value. diff --git a/doc/unbound-control.8.in b/doc/unbound-control.8.in index d4ed17195..92e71865b 100644 --- a/doc/unbound-control.8.in +++ b/doc/unbound-control.8.in @@ -50,6 +50,16 @@ Stop the server. The server daemon exits. .TP .B reload Reload the server. This flushes the cache and reads the config file fresh. +.TP +.B verbosity \fInumber +Change verbosity value for logging. Same values as \fBverbosity\fR keyword in +\fIunbound.conf\fR(5). This new setting lasts until the server is issued +a reload (taken from config file again), or the next verbosity control command. +.TP +.B stats +Print statistics. Resets the internal counters to zero, this can be +controlled using the \fBstatistics\-cumulative\fR config statement. +Statistics are printed with one [name]: [value] per line. .SH "EXIT CODE" The unbound-control program exits with status code 1 on error. .SH "SET UP" diff --git a/smallapp/unbound-control.c b/smallapp/unbound-control.c index af34cb78c..b50410f73 100644 --- a/smallapp/unbound-control.c +++ b/smallapp/unbound-control.c @@ -61,6 +61,8 @@ usage() printf(" start start server; runs unbound(8)\n"); printf(" stop stops the server\n"); printf(" reload reloads the server\n"); + printf(" stats print statistics\n"); + printf(" verbosity [number] change logging detail\n"); printf("Version %s\n", PACKAGE_VERSION); printf("BSD licensed, see LICENSE in source package for details.\n"); printf("Report bugs to %s\n", PACKAGE_BUGREPORT); @@ -118,6 +120,14 @@ contact_server(char* svr, struct config_file* cfg) if(cfg->control_ifs) svr = cfg->control_ifs->str; else svr = "127.0.0.1"; + /* config 0 addr (everything), means ask localhost */ + if(strcmp(svr, "0.0.0.0") == 0) + svr = "127.0.0.1"; + else if(strcmp(svr, "::0") == 0 || + strcmp(svr, "0::0") == 0 || + strcmp(svr, "0::") == 0 || + strcmp(svr, "::") == 0) + svr = "::1"; } if(strchr(svr, '@')) { if(!extstrtoaddr(svr, &addr, &addrlen))