mirror of
https://github.com/NLnetLabs/unbound.git
synced 2025-12-18 23:06:06 -05:00
Mesh reply counters (#1374)
* Statistics counter for number of queries dropped by limit on reply addresses Request list entries can be associated with multiple pending "reply addresses". Basically each request list entry keeps its own list of clients that should receive the response once the recursion is finished. This requires keeping allocations around for each client, and there is a global limit on the number of *additional* reply addresses that can be allocated. (Each new request list entry seems to get its own initial reply address which is not counted against the limit.) This commit adds a statistics counter "num_queries_replyaddr_limit" that counts the number of incoming client queries that have been dropped due to the restriction on allocating additional reply addresses. This allows distinguishing these drops from other kinds of drops. * Statistics counter for number of mesh reply entries Request list entries can be associated with multiple pending "reply addresses". Since there is a limit on the number of additional reply addresses that can be allocated which can cause incoming queries to be dropped if exceeded, it would be nice to be able to track this number. This commit basically exports the mesh_area's internal counter `num_reply_addrs` as "threadX.requestlist.current.replies" / "total.requestlist.current.replies".
This commit is contained in:
parent
98f4257890
commit
fceb4e8585
8 changed files with 71 additions and 1 deletions
|
|
@ -801,6 +801,8 @@ print_stats(RES* ssl, const char* nm, struct ub_stats_info* s)
|
||||||
(unsigned long)s->svr.num_queries_cookie_invalid)) return 0;
|
(unsigned long)s->svr.num_queries_cookie_invalid)) return 0;
|
||||||
if(!ssl_printf(ssl, "%s.num.queries_discard_timeout"SQ"%lu\n", nm,
|
if(!ssl_printf(ssl, "%s.num.queries_discard_timeout"SQ"%lu\n", nm,
|
||||||
(unsigned long)s->svr.num_queries_discard_timeout)) return 0;
|
(unsigned long)s->svr.num_queries_discard_timeout)) return 0;
|
||||||
|
if(!ssl_printf(ssl, "%s.num.queries_replyaddr_limit"SQ"%lu\n", nm,
|
||||||
|
(unsigned long)s->svr.num_queries_replyaddr_limit)) return 0;
|
||||||
if(!ssl_printf(ssl, "%s.num.queries_wait_limit"SQ"%lu\n", nm,
|
if(!ssl_printf(ssl, "%s.num.queries_wait_limit"SQ"%lu\n", nm,
|
||||||
(unsigned long)s->svr.num_queries_wait_limit)) return 0;
|
(unsigned long)s->svr.num_queries_wait_limit)) return 0;
|
||||||
if(!ssl_printf(ssl, "%s.num.cachehits"SQ"%lu\n", nm,
|
if(!ssl_printf(ssl, "%s.num.cachehits"SQ"%lu\n", nm,
|
||||||
|
|
@ -845,6 +847,8 @@ print_stats(RES* ssl, const char* nm, struct ub_stats_info* s)
|
||||||
(unsigned long)s->mesh_num_states)) return 0;
|
(unsigned long)s->mesh_num_states)) return 0;
|
||||||
if(!ssl_printf(ssl, "%s.requestlist.current.user"SQ"%lu\n", nm,
|
if(!ssl_printf(ssl, "%s.requestlist.current.user"SQ"%lu\n", nm,
|
||||||
(unsigned long)s->mesh_num_reply_states)) return 0;
|
(unsigned long)s->mesh_num_reply_states)) return 0;
|
||||||
|
if(!ssl_printf(ssl, "%s.requestlist.current.replies"SQ"%lu\n", nm,
|
||||||
|
(unsigned long)s->mesh_num_reply_addrs)) return 0;
|
||||||
#ifndef S_SPLINT_S
|
#ifndef S_SPLINT_S
|
||||||
sumwait.tv_sec = s->mesh_replies_sum_wait_sec;
|
sumwait.tv_sec = s->mesh_replies_sum_wait_sec;
|
||||||
sumwait.tv_usec = s->mesh_replies_sum_wait_usec;
|
sumwait.tv_usec = s->mesh_replies_sum_wait_usec;
|
||||||
|
|
|
||||||
|
|
@ -262,6 +262,7 @@ server_stats_compile(struct worker* worker, struct ub_stats_info* s, int reset)
|
||||||
s->svr = worker->stats;
|
s->svr = worker->stats;
|
||||||
s->mesh_num_states = (long long)worker->env.mesh->all.count;
|
s->mesh_num_states = (long long)worker->env.mesh->all.count;
|
||||||
s->mesh_num_reply_states = (long long)worker->env.mesh->num_reply_states;
|
s->mesh_num_reply_states = (long long)worker->env.mesh->num_reply_states;
|
||||||
|
s->mesh_num_reply_addrs = (long long)worker->env.mesh->num_reply_addrs;
|
||||||
s->mesh_jostled = (long long)worker->env.mesh->stats_jostled;
|
s->mesh_jostled = (long long)worker->env.mesh->stats_jostled;
|
||||||
s->mesh_dropped = (long long)worker->env.mesh->stats_dropped;
|
s->mesh_dropped = (long long)worker->env.mesh->stats_dropped;
|
||||||
s->mesh_replies_sent = (long long)worker->env.mesh->replies_sent;
|
s->mesh_replies_sent = (long long)worker->env.mesh->replies_sent;
|
||||||
|
|
@ -284,6 +285,8 @@ server_stats_compile(struct worker* worker, struct ub_stats_info* s, int reset)
|
||||||
NUM_BUCKETS_HIST);
|
NUM_BUCKETS_HIST);
|
||||||
s->svr.num_queries_discard_timeout +=
|
s->svr.num_queries_discard_timeout +=
|
||||||
(long long)worker->env.mesh->num_queries_discard_timeout;
|
(long long)worker->env.mesh->num_queries_discard_timeout;
|
||||||
|
s->svr.num_queries_replyaddr_limit +=
|
||||||
|
(long long)worker->env.mesh->num_queries_replyaddr_limit;
|
||||||
s->svr.num_queries_wait_limit +=
|
s->svr.num_queries_wait_limit +=
|
||||||
(long long)worker->env.mesh->num_queries_wait_limit;
|
(long long)worker->env.mesh->num_queries_wait_limit;
|
||||||
s->svr.num_dns_error_reports +=
|
s->svr.num_dns_error_reports +=
|
||||||
|
|
@ -448,6 +451,8 @@ void server_stats_add(struct ub_stats_info* total, struct ub_stats_info* a)
|
||||||
total->svr.num_queries_cookie_invalid += a->svr.num_queries_cookie_invalid;
|
total->svr.num_queries_cookie_invalid += a->svr.num_queries_cookie_invalid;
|
||||||
total->svr.num_queries_discard_timeout +=
|
total->svr.num_queries_discard_timeout +=
|
||||||
a->svr.num_queries_discard_timeout;
|
a->svr.num_queries_discard_timeout;
|
||||||
|
total->svr.num_queries_replyaddr_limit +=
|
||||||
|
a->svr.num_queries_replyaddr_limit;
|
||||||
total->svr.num_queries_wait_limit += a->svr.num_queries_wait_limit;
|
total->svr.num_queries_wait_limit += a->svr.num_queries_wait_limit;
|
||||||
total->svr.num_dns_error_reports += a->svr.num_dns_error_reports;
|
total->svr.num_dns_error_reports += a->svr.num_dns_error_reports;
|
||||||
total->svr.num_queries_missed_cache += a->svr.num_queries_missed_cache;
|
total->svr.num_queries_missed_cache += a->svr.num_queries_missed_cache;
|
||||||
|
|
@ -519,6 +524,7 @@ void server_stats_add(struct ub_stats_info* total, struct ub_stats_info* a)
|
||||||
|
|
||||||
total->mesh_num_states += a->mesh_num_states;
|
total->mesh_num_states += a->mesh_num_states;
|
||||||
total->mesh_num_reply_states += a->mesh_num_reply_states;
|
total->mesh_num_reply_states += a->mesh_num_reply_states;
|
||||||
|
total->mesh_num_reply_addrs += a->mesh_num_reply_addrs;
|
||||||
total->mesh_jostled += a->mesh_jostled;
|
total->mesh_jostled += a->mesh_jostled;
|
||||||
total->mesh_dropped += a->mesh_dropped;
|
total->mesh_dropped += a->mesh_dropped;
|
||||||
total->mesh_replies_sent += a->mesh_replies_sent;
|
total->mesh_replies_sent += a->mesh_replies_sent;
|
||||||
|
|
|
||||||
|
|
@ -880,6 +880,11 @@ number of queries removed due to discard\-timeout by thread
|
||||||
.UNINDENT
|
.UNINDENT
|
||||||
.INDENT 0.0
|
.INDENT 0.0
|
||||||
.TP
|
.TP
|
||||||
|
.B threadX.num.queries_replyaddr_limit
|
||||||
|
number of queries removed due to replyaddr limits by thread
|
||||||
|
.UNINDENT
|
||||||
|
.INDENT 0.0
|
||||||
|
.TP
|
||||||
.B threadX.num.queries_wait_limit
|
.B threadX.num.queries_wait_limit
|
||||||
number of queries removed due to wait\-limit by thread
|
number of queries removed due to wait\-limit by thread
|
||||||
.UNINDENT
|
.UNINDENT
|
||||||
|
|
@ -994,6 +999,13 @@ Current size of the request list, only the requests from client queries.
|
||||||
.UNINDENT
|
.UNINDENT
|
||||||
.INDENT 0.0
|
.INDENT 0.0
|
||||||
.TP
|
.TP
|
||||||
|
.B threadX.requestlist.current.replies
|
||||||
|
Current count of the number of reply entries waiting on request list
|
||||||
|
entries. Because a request list entry can send results to multiple reply
|
||||||
|
addresses, this number may be larger than the size of the request list.
|
||||||
|
.UNINDENT
|
||||||
|
.INDENT 0.0
|
||||||
|
.TP
|
||||||
.B threadX.recursion.time.avg
|
.B threadX.recursion.time.avg
|
||||||
Average time it took to answer queries that needed recursive processing.
|
Average time it took to answer queries that needed recursive processing.
|
||||||
Note that queries that were answered from the cache are not in this average.
|
Note that queries that were answered from the cache are not in this average.
|
||||||
|
|
@ -1048,6 +1060,11 @@ summed over threads.
|
||||||
.UNINDENT
|
.UNINDENT
|
||||||
.INDENT 0.0
|
.INDENT 0.0
|
||||||
.TP
|
.TP
|
||||||
|
.B total.num.queries_replyaddr_limit
|
||||||
|
summed over threads.
|
||||||
|
.UNINDENT
|
||||||
|
.INDENT 0.0
|
||||||
|
.TP
|
||||||
.B total.num.queries_wait_limit
|
.B total.num.queries_wait_limit
|
||||||
summed over threads.
|
summed over threads.
|
||||||
.UNINDENT
|
.UNINDENT
|
||||||
|
|
@ -1138,6 +1155,16 @@ summed over threads.
|
||||||
.UNINDENT
|
.UNINDENT
|
||||||
.INDENT 0.0
|
.INDENT 0.0
|
||||||
.TP
|
.TP
|
||||||
|
.B total.requestlist.current.user
|
||||||
|
summed over threads.
|
||||||
|
.UNINDENT
|
||||||
|
.INDENT 0.0
|
||||||
|
.TP
|
||||||
|
.B total.requestlist.current.replies
|
||||||
|
summed over threads.
|
||||||
|
.UNINDENT
|
||||||
|
.INDENT 0.0
|
||||||
|
.TP
|
||||||
.B total.recursion.time.median
|
.B total.recursion.time.median
|
||||||
averaged over threads.
|
averaged over threads.
|
||||||
.UNINDENT
|
.UNINDENT
|
||||||
|
|
|
||||||
|
|
@ -815,6 +815,10 @@ number of statistic counters:
|
||||||
number of queries removed due to discard-timeout by thread
|
number of queries removed due to discard-timeout by thread
|
||||||
|
|
||||||
|
|
||||||
|
@@UAHL@unbound-control.stats@threadX.num.queries_replyaddr_limit@@
|
||||||
|
number of queries removed due to replyaddr limits by thread
|
||||||
|
|
||||||
|
|
||||||
@@UAHL@unbound-control.stats@threadX.num.queries_wait_limit@@
|
@@UAHL@unbound-control.stats@threadX.num.queries_wait_limit@@
|
||||||
number of queries removed due to wait-limit by thread
|
number of queries removed due to wait-limit by thread
|
||||||
|
|
||||||
|
|
@ -910,6 +914,12 @@ number of statistic counters:
|
||||||
Current size of the request list, only the requests from client queries.
|
Current size of the request list, only the requests from client queries.
|
||||||
|
|
||||||
|
|
||||||
|
@@UAHL@unbound-control.stats@threadX.requestlist.current.replies@@
|
||||||
|
Current count of the number of reply entries waiting on request list
|
||||||
|
entries. Because a request list entry can send results to multiple reply
|
||||||
|
addresses, this number may be larger than the size of the request list.
|
||||||
|
|
||||||
|
|
||||||
@@UAHL@unbound-control.stats@threadX.recursion.time.avg@@
|
@@UAHL@unbound-control.stats@threadX.recursion.time.avg@@
|
||||||
Average time it took to answer queries that needed recursive processing.
|
Average time it took to answer queries that needed recursive processing.
|
||||||
Note that queries that were answered from the cache are not in this average.
|
Note that queries that were answered from the cache are not in this average.
|
||||||
|
|
@ -955,6 +965,10 @@ number of statistic counters:
|
||||||
summed over threads.
|
summed over threads.
|
||||||
|
|
||||||
|
|
||||||
|
@@UAHL@unbound-control.stats@total.num.queries_replyaddr_limit@@
|
||||||
|
summed over threads.
|
||||||
|
|
||||||
|
|
||||||
@@UAHL@unbound-control.stats@total.num.queries_wait_limit@@
|
@@UAHL@unbound-control.stats@total.num.queries_wait_limit@@
|
||||||
summed over threads.
|
summed over threads.
|
||||||
|
|
||||||
|
|
@ -1027,6 +1041,14 @@ number of statistic counters:
|
||||||
summed over threads.
|
summed over threads.
|
||||||
|
|
||||||
|
|
||||||
|
@@UAHL@unbound-control.stats@total.requestlist.current.user@@
|
||||||
|
summed over threads.
|
||||||
|
|
||||||
|
|
||||||
|
@@UAHL@unbound-control.stats@total.requestlist.current.replies@@
|
||||||
|
summed over threads.
|
||||||
|
|
||||||
|
|
||||||
@@UAHL@unbound-control.stats@total.recursion.time.median@@
|
@@UAHL@unbound-control.stats@total.recursion.time.median@@
|
||||||
averaged over threads.
|
averaged over threads.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -853,6 +853,8 @@ struct ub_server_stats {
|
||||||
long long qquic;
|
long long qquic;
|
||||||
/** number of queries removed due to discard-timeout */
|
/** number of queries removed due to discard-timeout */
|
||||||
long long num_queries_discard_timeout;
|
long long num_queries_discard_timeout;
|
||||||
|
/** number of queries removed due to replyaddr limit */
|
||||||
|
long long num_queries_replyaddr_limit;
|
||||||
/** number of queries removed due to wait-limit */
|
/** number of queries removed due to wait-limit */
|
||||||
long long num_queries_wait_limit;
|
long long num_queries_wait_limit;
|
||||||
/** number of dns error reports generated */
|
/** number of dns error reports generated */
|
||||||
|
|
@ -872,6 +874,8 @@ struct ub_stats_info {
|
||||||
long long mesh_num_states;
|
long long mesh_num_states;
|
||||||
/** mesh stats: current number of reply (user) states */
|
/** mesh stats: current number of reply (user) states */
|
||||||
long long mesh_num_reply_states;
|
long long mesh_num_reply_states;
|
||||||
|
/** mesh stats: current number of reply entries */
|
||||||
|
long long mesh_num_reply_addrs;
|
||||||
/** mesh stats: number of reply states overwritten with a new one */
|
/** mesh stats: number of reply states overwritten with a new one */
|
||||||
long long mesh_jostled;
|
long long mesh_jostled;
|
||||||
/** mesh stats: number of incoming queries dropped */
|
/** mesh stats: number of incoming queries dropped */
|
||||||
|
|
|
||||||
|
|
@ -231,6 +231,7 @@ mesh_create(struct module_stack* stack, struct module_env* env)
|
||||||
mesh->ans_expired = 0;
|
mesh->ans_expired = 0;
|
||||||
mesh->ans_cachedb = 0;
|
mesh->ans_cachedb = 0;
|
||||||
mesh->num_queries_discard_timeout = 0;
|
mesh->num_queries_discard_timeout = 0;
|
||||||
|
mesh->num_queries_replyaddr_limit = 0;
|
||||||
mesh->num_queries_wait_limit = 0;
|
mesh->num_queries_wait_limit = 0;
|
||||||
mesh->num_dns_error_reports = 0;
|
mesh->num_dns_error_reports = 0;
|
||||||
mesh->max_reply_states = env->cfg->num_queries_per_thread;
|
mesh->max_reply_states = env->cfg->num_queries_per_thread;
|
||||||
|
|
@ -474,7 +475,7 @@ void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo,
|
||||||
verbose(VERB_ALGO, "Too many requests queued. "
|
verbose(VERB_ALGO, "Too many requests queued. "
|
||||||
"dropping incoming query.");
|
"dropping incoming query.");
|
||||||
comm_point_drop_reply(rep);
|
comm_point_drop_reply(rep);
|
||||||
mesh->stats_dropped++;
|
mesh->num_queries_replyaddr_limit++;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -2295,6 +2296,7 @@ mesh_stats_clear(struct mesh_area* mesh)
|
||||||
memset(&mesh->rpz_action[0], 0, sizeof(size_t)*UB_STATS_RPZ_ACTION_NUM);
|
memset(&mesh->rpz_action[0], 0, sizeof(size_t)*UB_STATS_RPZ_ACTION_NUM);
|
||||||
mesh->ans_nodata = 0;
|
mesh->ans_nodata = 0;
|
||||||
mesh->num_queries_discard_timeout = 0;
|
mesh->num_queries_discard_timeout = 0;
|
||||||
|
mesh->num_queries_replyaddr_limit = 0;
|
||||||
mesh->num_queries_wait_limit = 0;
|
mesh->num_queries_wait_limit = 0;
|
||||||
mesh->num_dns_error_reports = 0;
|
mesh->num_dns_error_reports = 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -141,6 +141,8 @@ struct mesh_area {
|
||||||
size_t rpz_action[UB_STATS_RPZ_ACTION_NUM];
|
size_t rpz_action[UB_STATS_RPZ_ACTION_NUM];
|
||||||
/** stats, number of queries removed due to discard-timeout */
|
/** stats, number of queries removed due to discard-timeout */
|
||||||
size_t num_queries_discard_timeout;
|
size_t num_queries_discard_timeout;
|
||||||
|
/** stats, number of queries removed due to replyaddr limit */
|
||||||
|
size_t num_queries_replyaddr_limit;
|
||||||
/** stats, number of queries removed due to wait-limit */
|
/** stats, number of queries removed due to wait-limit */
|
||||||
size_t num_queries_wait_limit;
|
size_t num_queries_wait_limit;
|
||||||
/** stats, number of dns error reports generated */
|
/** stats, number of dns error reports generated */
|
||||||
|
|
|
||||||
|
|
@ -236,6 +236,8 @@ static void pr_stats(const char* nm, struct ub_stats_info* s)
|
||||||
s->svr.num_queries_cookie_invalid);
|
s->svr.num_queries_cookie_invalid);
|
||||||
PR_UL_NM("num.queries_discard_timeout",
|
PR_UL_NM("num.queries_discard_timeout",
|
||||||
s->svr.num_queries_discard_timeout);
|
s->svr.num_queries_discard_timeout);
|
||||||
|
PR_UL_NM("num.queries_replyaddr_limit",
|
||||||
|
s->svr.num_queries_replyaddr_limit);
|
||||||
PR_UL_NM("num.queries_wait_limit", s->svr.num_queries_wait_limit);
|
PR_UL_NM("num.queries_wait_limit", s->svr.num_queries_wait_limit);
|
||||||
PR_UL_NM("num.cachehits",
|
PR_UL_NM("num.cachehits",
|
||||||
s->svr.num_queries - s->svr.num_queries_missed_cache);
|
s->svr.num_queries - s->svr.num_queries_missed_cache);
|
||||||
|
|
@ -263,6 +265,7 @@ static void pr_stats(const char* nm, struct ub_stats_info* s)
|
||||||
PR_UL_NM("requestlist.exceeded", s->mesh_dropped);
|
PR_UL_NM("requestlist.exceeded", s->mesh_dropped);
|
||||||
PR_UL_NM("requestlist.current.all", s->mesh_num_states);
|
PR_UL_NM("requestlist.current.all", s->mesh_num_states);
|
||||||
PR_UL_NM("requestlist.current.user", s->mesh_num_reply_states);
|
PR_UL_NM("requestlist.current.user", s->mesh_num_reply_states);
|
||||||
|
PR_UL_NM("requestlist.current.replies", s->mesh_num_reply_addrs);
|
||||||
#ifndef S_SPLINT_S
|
#ifndef S_SPLINT_S
|
||||||
sumwait.tv_sec = s->mesh_replies_sum_wait_sec;
|
sumwait.tv_sec = s->mesh_replies_sum_wait_sec;
|
||||||
sumwait.tv_usec = s->mesh_replies_sum_wait_usec;
|
sumwait.tv_usec = s->mesh_replies_sum_wait_usec;
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue