verbosity and stats control.

git-svn-id: file:///svn/unbound/trunk@1237 be551aaa-1e26-0410-a405-d3ace91eadb9
This commit is contained in:
Wouter Wijngaards 2008-09-15 14:19:41 +00:00
parent 757eff88f5
commit 0b3c133dd3
9 changed files with 315 additions and 34 deletions

View file

@ -46,6 +46,7 @@
#include "daemon/remote.h"
#include "daemon/worker.h"
#include "daemon/daemon.h"
#include "daemon/stats.h"
#include "util/log.h"
#include "util/config_file.h"
#include "util/net_help.h"
@ -73,6 +74,25 @@ log_crypto_err(const char* str)
}
}
/** divide sum of timers to get average */
static void
timeval_divide(struct timeval* avg, struct timeval* sum, size_t d)
{
#ifndef S_SPLINT_S
size_t leftover;
if(d == 0) {
avg->tv_sec = 0;
avg->tv_usec = 0;
return;
}
avg->tv_sec = sum->tv_sec / d;
avg->tv_usec = sum->tv_usec / d;
/* handle fraction from seconds divide */
leftover = sum->tv_sec - avg->tv_sec*d;
avg->tv_usec += (leftover*1000000)/d;
#endif
}
struct daemon_remote*
daemon_remote_create(struct worker* worker)
{
@ -452,6 +472,16 @@ ssl_read_line(SSL* ssl, char* buf, size_t max)
return 0;
}
/** skip whitespace, return new pointer into string */
static char*
skipwhite(char* str)
{
/* EOS \0 is not a space */
while( isspace(*str) )
str++;
return str;
}
/** send the OK to the control client */
static void send_ok(SSL* ssl)
{
@ -460,7 +490,7 @@ static void send_ok(SSL* ssl)
/** do the stop command */
static void
do_stop(struct daemon_remote* rc, SSL* ssl)
do_stop(SSL* ssl, struct daemon_remote* rc)
{
rc->worker->need_to_exit = 1;
comm_base_exit(rc->worker->base);
@ -469,25 +499,108 @@ do_stop(struct daemon_remote* rc, SSL* ssl)
/** do the reload command */
static void
do_reload(struct daemon_remote* rc, SSL* ssl)
do_reload(SSL* ssl, struct daemon_remote* rc)
{
rc->worker->need_to_exit = 0;
comm_base_exit(rc->worker->base);
send_ok(ssl);
}
/** do the verbosity command */
static void
do_verbosity(SSL* ssl, char* str)
{
int val = atoi(str);
if(val == 0 && strcmp(str, "0") != 0) {
ssl_printf(ssl, "error in verbosity number syntax: %s\n", str);
return;
}
verbosity = val;
send_ok(ssl);
}
/** print stats from statinfo */
static int
print_stats(SSL* ssl, char* nm, struct stats_info* s)
{
struct timeval avg;
if(!ssl_printf(ssl, "%s.num.queries: %u\n", nm,
(unsigned)s->svr.num_queries)) return 0;
if(!ssl_printf(ssl, "%s.num.cachehits: %u\n", nm,
(unsigned)(s->svr.num_queries
- s->svr.num_queries_missed_cache))) return 0;
if(!ssl_printf(ssl, "%s.num.cachemiss: %u\n", nm,
(unsigned)s->svr.num_queries_missed_cache)) return 0;
if(!ssl_printf(ssl, "%s.num.recursivereplies: %u\n", nm,
(unsigned)s->mesh_replies_sent)) return 0;
if(!ssl_printf(ssl, "%s.requestlist.avg: %g\n", nm,
s->svr.num_queries_missed_cache?
(double)s->svr.sum_query_list_size/
s->svr.num_queries_missed_cache : 0.0)) return 0;
if(!ssl_printf(ssl, "%s.requestlist.max: %u\n", nm,
(unsigned)s->svr.max_query_list_size)) return 0;
if(!ssl_printf(ssl, "%s.requestlist.overwritten: %u\n", nm,
(unsigned)s->mesh_jostled)) return 0;
if(!ssl_printf(ssl, "%s.requestlist.exceeded: %u\n", nm,
(unsigned)s->mesh_dropped)) return 0;
if(!ssl_printf(ssl, "%s.requestlist.current.all: %u\n", nm,
(unsigned)s->mesh_num_states)) return 0;
if(!ssl_printf(ssl, "%s.requestlist.current.user: %u\n", nm,
(unsigned)s->mesh_num_reply_states)) return 0;
timeval_divide(&avg, &s->mesh_replies_sum_wait, s->mesh_replies_sent);
if(!ssl_printf(ssl, "%s.recursion.time.avg: %d.%6.6d\n", nm,
(int)avg.tv_sec, (int)avg.tv_usec)) return 0;
if(!ssl_printf(ssl, "%s.recursion.time.median: %g\n", nm,
s->mesh_time_median)) return 0;
return 1;
}
/** print stats for one thread */
static int
print_thread_stats(SSL* ssl, int i, struct stats_info* s)
{
char nm[16];
snprintf(nm, sizeof(nm), "thread%d", i);
nm[sizeof(nm)-1]=0;
return print_stats(ssl, nm, s);
}
/** do the stats command */
static void
do_stats(SSL* ssl, struct daemon_remote* rc)
{
struct daemon* daemon = rc->worker->daemon;
struct stats_info total;
struct stats_info s;
int i;
/* gather all thread statistics in one place */
for(i=0; i<daemon->num; i++) {
server_stats_obtain(rc->worker, daemon->workers[i], &s);
if(!print_thread_stats(ssl, i, &s))
return;
if(i == 0)
total = s;
else server_stats_add(&total, &s);
}
/* print the thread statistics */
total.mesh_time_median /= (double)daemon->num;
print_stats(ssl, "total", &total);
}
/** execute a remote control command */
static void
execute_cmd(struct daemon_remote* rc, SSL* ssl, char* cmd)
{
char* p = cmd;
/* skip whitespace */
while( isspace(*p) ) p++;
/* compare command - check longer strings first */
char* p = skipwhite(cmd);
/* compare command - check longer strings first in case of substrings*/
if(strncmp(p, "stop", 4) == 0) {
do_stop(rc, ssl);
do_stop(ssl, rc);
} else if(strncmp(p, "reload", 6) == 0) {
do_reload(rc, ssl);
do_reload(ssl, rc);
} else if(strncmp(p, "verbosity", 9) == 0) {
do_verbosity(ssl, skipwhite(p+9));
} else if(strncmp(p, "stats", 5) == 0) {
do_stats(ssl, rc);
} else {
(void)ssl_printf(ssl, "error unknown command '%s'\n", p);
}

View file

@ -42,7 +42,25 @@
#include "config.h"
#include "daemon/stats.h"
#include "daemon/worker.h"
#include "daemon/daemon.h"
#include "services/mesh.h"
#include "util/config_file.h"
#include "util/tube.h"
#include "util/timehist.h"
/** add timers and the values do not overflow or become negative */
static void
timeval_add(struct timeval* d, struct timeval* add)
{
#ifndef S_SPLINT_S
d->tv_sec += add->tv_sec;
d->tv_usec += add->tv_usec;
while(d->tv_usec > 1000000 ) {
d->tv_usec -= 1000000;
d->tv_sec++;
}
#endif
}
void server_stats_init(struct server_stats* stats)
{
@ -57,7 +75,8 @@ void server_stats_querymiss(struct server_stats* stats, struct worker* worker)
stats->max_query_list_size = worker->env.mesh->all.count;
}
void server_stats_log(struct server_stats* stats, int threadnum)
void server_stats_log(struct server_stats* stats, struct worker* worker,
int threadnum)
{
log_info("server stats for thread %d: %u queries, "
"%u answers from cache, %u recursions",
@ -70,5 +89,77 @@ void server_stats_log(struct server_stats* stats, int threadnum)
stats->num_queries_missed_cache?
(double)stats->sum_query_list_size/
stats->num_queries_missed_cache : 0.0,
(unsigned)stats->num_query_list_exceeded);
(unsigned)worker->env.mesh->stats_dropped);
}
void
server_stats_compile(struct worker* worker, struct stats_info* s)
{
s->svr = worker->stats;
s->mesh_num_states = worker->env.mesh->all.count;
s->mesh_num_reply_states = worker->env.mesh->num_reply_states;
s->mesh_jostled = worker->env.mesh->stats_jostled;
s->mesh_dropped = worker->env.mesh->stats_dropped;
s->mesh_replies_sent = worker->env.mesh->replies_sent;
s->mesh_replies_sum_wait = worker->env.mesh->replies_sum_wait;
s->mesh_time_median = timehist_quartile(worker->env.mesh->histogram,
0.50);
if(!worker->env.cfg->stat_cumulative) {
server_stats_init(&worker->stats);
mesh_stats_clear(worker->env.mesh);
}
}
void server_stats_obtain(struct worker* worker, struct worker* who,
struct stats_info* s)
{
uint8_t *reply = NULL;
size_t len = 0;
if(worker == who) {
/* just fill it in */
server_stats_compile(worker, s);
return;
}
/* communicate over tube */
verbose(VERB_ALGO, "write stats cmd");
worker_send_cmd(who, worker_cmd_stats);
verbose(VERB_ALGO, "wait for stats reply");
if(!tube_read_msg(worker->cmd, &reply, &len, 0))
fatal_exit("failed to read stats over cmd channel");
if(len != sizeof(*s))
fatal_exit("stats on cmd channel wrong length %d %d",
(int)len, (int)sizeof(*s));
memcpy(s, reply, len);
free(reply);
}
void server_stats_reply(struct worker* worker)
{
struct stats_info s;
server_stats_compile(worker, &s);
verbose(VERB_ALGO, "write stats replymsg");
if(!tube_write_msg(worker->daemon->workers[0]->cmd,
(uint8_t*)&s, sizeof(s), 0))
fatal_exit("could not write stat values over cmd channel");
}
void server_stats_add(struct stats_info* total, struct stats_info* a)
{
total->svr.num_queries += a->svr.num_queries;
total->svr.num_queries_missed_cache += a->svr.num_queries_missed_cache;
total->svr.sum_query_list_size += a->svr.sum_query_list_size;
/* the max size reached is upped to higher of both */
if(a->svr.max_query_list_size > total->svr.max_query_list_size)
total->svr.max_query_list_size = a->svr.max_query_list_size;
total->mesh_num_states += a->mesh_num_states;
total->mesh_num_reply_states += a->mesh_num_reply_states;
total->mesh_jostled += a->mesh_jostled;
total->mesh_dropped += a->mesh_dropped;
total->mesh_replies_sent += a->mesh_replies_sent;
timeval_add(&total->mesh_replies_sum_wait, &a->mesh_replies_sum_wait);
/* the medians are averaged together, this is not as accurate as
* taking the median over all of the data, but is good and fast
* added up here, division later*/
total->mesh_time_median += a->mesh_time_median;
}

View file

@ -58,8 +58,30 @@ struct server_stats {
size_t sum_query_list_size;
/** max value of query list size reached. */
size_t max_query_list_size;
/** number of times that the query_list_size was insufficient */
size_t num_query_list_exceeded;
};
/**
* Statistics to send over the control pipe when asked
* This struct is made to be memcpied, sent in binary.
*/
struct stats_info {
/** the thread stats */
struct server_stats svr;
/** mesh stats: current number of states */
size_t mesh_num_states;
/** mesh stats: current number of reply (user) states */
size_t mesh_num_reply_states;
/** mesh stats: number of reply states overwritten with a new one */
size_t mesh_jostled;
/** mesh stats: number of incoming queries dropped */
size_t mesh_dropped;
/** mesh stats: replies sent */
size_t mesh_replies_sent;
/** mesh stats: sum of waiting times for the replies */
struct timeval mesh_replies_sum_wait;
/** mesh stats: median of waiting times for replies (in sec) */
double mesh_time_median;
};
/**
@ -72,6 +94,37 @@ void server_stats_init(struct server_stats* stats);
void server_stats_querymiss(struct server_stats* stats, struct worker* worker);
/** display the stats to the log */
void server_stats_log(struct server_stats* stats, int threadnum);
void server_stats_log(struct server_stats* stats, struct worker* worker,
int threadnum);
/**
* Obtain the stats info for a given thread. Uses pipe to communicate.
* @param worker: the worker that is executing (the first worker).
* @param who: on who to get the statistics info.
* @param s: the stats block to fill in.
*/
void server_stats_obtain(struct worker* worker, struct worker* who,
struct stats_info* s);
/**
* Compile stats into structure for this thread worker.
* Also clears the statistics counters (if that is set by config file).
* @param worker: the worker to compile stats for, also the executing worker.
* @param s: stats block.
*/
void server_stats_compile(struct worker* worker, struct stats_info* s);
/**
* Send stats over comm tube in reply to query cmd
* @param worker: this worker.
*/
void server_stats_reply(struct worker* worker);
/**
* Addup stat blocks.
* @param total: sum of the two entries.
* @param a: to add to it.
*/
void server_stats_add(struct stats_info* total, struct stats_info* a);
#endif /* DAEMON_STATS_H */

View file

@ -194,7 +194,7 @@ worker_mem_report(struct worker* ATTR_UNUSED(worker),
void
worker_send_cmd(struct worker* worker, enum worker_commands cmd)
{
uint32_t c = (uint32_t)cmd;
uint32_t c = (uint32_t)htonl(cmd);
if(!tube_write_msg(worker->cmd, (uint8_t*)&c, sizeof(c), 0)) {
log_err("worker send cmd %d failed", (int)cmd);
}
@ -338,6 +338,10 @@ worker_handle_control_cmd(struct tube* ATTR_UNUSED(tube), uint8_t* msg,
verbose(VERB_ALGO, "got control cmd quit");
comm_base_exit(worker->base);
break;
case worker_cmd_stats:
verbose(VERB_ALGO, "got control cmd stats");
server_stats_reply(worker);
break;
default:
log_err("bad command %d", (int)cmd);
break;
@ -831,7 +835,7 @@ worker_handle_request(struct comm_point* c, void* arg, int error,
/* protect our memory usage from storing reply addresses */
verbose(VERB_ALGO, "Too many requests queued. "
"dropping incoming query.");
worker->stats.num_query_list_exceeded++;
worker->env.mesh->stats_dropped++;
comm_point_drop_reply(repinfo);
return 0;
}
@ -895,7 +899,7 @@ worker_restart_timer(struct worker* worker)
void worker_stat_timer_cb(void* arg)
{
struct worker* worker = (struct worker*)arg;
server_stats_log(&worker->stats, worker->thread_num);
server_stats_log(&worker->stats, worker, worker->thread_num);
mesh_stats(worker->env.mesh, "mesh has");
worker_mem_report(worker, NULL);
if(!worker->daemon->cfg->stat_cumulative) {
@ -921,13 +925,11 @@ worker_create(struct daemon* daemon, int id, int* ports, int n)
}
worker->daemon = daemon;
worker->thread_num = id;
if(id != 0) {
if(!(worker->cmd = tube_create())) {
free(worker->ports);
free(worker);
return NULL;
}
}
return worker;
}
@ -1011,7 +1013,6 @@ worker_init(struct worker* worker, struct config_file *cfg,
worker_delete(worker);
return 0;
}
if(worker->thread_num != 0) {
/* start listening to commands */
if(!tube_setup_bg_listen(worker->cmd, worker->base,
&worker_handle_control_cmd, worker)) {
@ -1019,7 +1020,6 @@ worker_init(struct worker* worker, struct config_file *cfg,
worker_delete(worker);
return 0;
}
}
worker->stat_timer = comm_timer_create(worker->base,
worker_stat_timer_cb, worker);
if(!worker->stat_timer) {
@ -1082,7 +1082,7 @@ worker_delete(struct worker* worker)
if(!worker)
return;
if(worker->env.mesh && verbosity >= VERB_OPS) {
server_stats_log(&worker->stats, worker->thread_num);
server_stats_log(&worker->stats, worker, worker->thread_num);
mesh_stats(worker->env.mesh, "mesh has");
worker_mem_report(worker, NULL);
}

View file

@ -64,7 +64,9 @@ struct daemon_remote;
/** worker commands */
enum worker_commands {
/** make the worker quit */
worker_cmd_quit
worker_cmd_quit,
/** obtain statistics */
worker_cmd_stats
};
/**

View file

@ -1,6 +1,8 @@
15 September 2008: Wouter
- working start, stop, reload commands for unbound-control.
- test for unbound-control working; better exit value for control.
- verbosity control via unbound-control.
- unbound-control stats.
12 September 2008: Wouter
- removed browser control mentions. Proto speccy.

View file

@ -25,6 +25,8 @@ stop
stops the server.
reload
reloads the config file, and flushes the cache.
verbosity <new value>
Change logging verbosity to new value.
stats
output is a list of [name]=[value] lines.
clears the counters.
@ -61,7 +63,5 @@ local_data <resource record string>
local_data_add www.example.com. IN A 192.0.2.2
if no local_zone exists for it; a transparent zone with the same
name as the data is created.
verbosity <new value>
Change logging verbosity to new value.

View file

@ -50,6 +50,16 @@ Stop the server. The server daemon exits.
.TP
.B reload
Reload the server. This flushes the cache and reads the config file fresh.
.TP
.B verbosity \fInumber
Change verbosity value for logging. Same values as \fBverbosity\fR keyword in
\fIunbound.conf\fR(5). This new setting lasts until the server is issued
a reload (taken from config file again), or the next verbosity control command.
.TP
.B stats
Print statistics. Resets the internal counters to zero, this can be
controlled using the \fBstatistics\-cumulative\fR config statement.
Statistics are printed with one [name]: [value] per line.
.SH "EXIT CODE"
The unbound-control program exits with status code 1 on error.
.SH "SET UP"

View file

@ -61,6 +61,8 @@ usage()
printf(" start start server; runs unbound(8)\n");
printf(" stop stops the server\n");
printf(" reload reloads the server\n");
printf(" stats print statistics\n");
printf(" verbosity [number] change logging detail\n");
printf("Version %s\n", PACKAGE_VERSION);
printf("BSD licensed, see LICENSE in source package for details.\n");
printf("Report bugs to %s\n", PACKAGE_BUGREPORT);
@ -118,6 +120,14 @@ contact_server(char* svr, struct config_file* cfg)
if(cfg->control_ifs)
svr = cfg->control_ifs->str;
else svr = "127.0.0.1";
/* config 0 addr (everything), means ask localhost */
if(strcmp(svr, "0.0.0.0") == 0)
svr = "127.0.0.1";
else if(strcmp(svr, "::0") == 0 ||
strcmp(svr, "0::0") == 0 ||
strcmp(svr, "0::") == 0 ||
strcmp(svr, "::") == 0)
svr = "::1";
}
if(strchr(svr, '@')) {
if(!extstrtoaddr(svr, &addr, &addrlen))