mirror of
https://github.com/NLnetLabs/unbound.git
synced 2025-12-20 23:00:56 -05:00
Blacklist when 16 queries fail in a row.
git-svn-id: file:///svn/unbound/trunk@1764 be551aaa-1e26-0410-a405-d3ace91eadb9
This commit is contained in:
parent
1cd84ce84e
commit
c42056b9aa
6 changed files with 37 additions and 11 deletions
|
|
@ -786,7 +786,7 @@ print_dp_details(SSL* ssl, struct worker* worker, struct delegpt* dp)
|
||||||
{
|
{
|
||||||
char buf[257];
|
char buf[257];
|
||||||
struct delegpt_addr* a;
|
struct delegpt_addr* a;
|
||||||
int lame, dlame, rlame, rtt, edns_vs, to;
|
int lame, dlame, rlame, rtt, edns_vs, to, lost;
|
||||||
uint8_t edns_lame_known;
|
uint8_t edns_lame_known;
|
||||||
for(a = dp->target_list; a; a = a->next_target) {
|
for(a = dp->target_list; a; a = a->next_target) {
|
||||||
addr_to_str(&a->addr, a->addrlen, buf, sizeof(buf));
|
addr_to_str(&a->addr, a->addrlen, buf, sizeof(buf));
|
||||||
|
|
@ -801,15 +801,15 @@ print_dp_details(SSL* ssl, struct worker* worker, struct delegpt* dp)
|
||||||
* lameness won't be reported then */
|
* lameness won't be reported then */
|
||||||
if(!infra_get_lame_rtt(worker->env.infra_cache,
|
if(!infra_get_lame_rtt(worker->env.infra_cache,
|
||||||
&a->addr, a->addrlen, dp->name, dp->namelen,
|
&a->addr, a->addrlen, dp->name, dp->namelen,
|
||||||
LDNS_RR_TYPE_A, &lame, &dlame, &rlame, &rtt,
|
LDNS_RR_TYPE_A, &lame, &dlame, &rlame, &rtt, &lost,
|
||||||
*worker->env.now)) {
|
*worker->env.now)) {
|
||||||
if(!ssl_printf(ssl, "not in infra cache.\n"))
|
if(!ssl_printf(ssl, "not in infra cache.\n"))
|
||||||
return;
|
return;
|
||||||
continue; /* skip stuff not in infra cache */
|
continue; /* skip stuff not in infra cache */
|
||||||
}
|
}
|
||||||
if(!ssl_printf(ssl, "%s%s%srtt %d msec. ",
|
if(!ssl_printf(ssl, "%s%s%srtt %d msec, %d lost. ",
|
||||||
lame?"LAME ":"", dlame?"NoDNSSEC ":"",
|
lame?"LAME ":"", dlame?"NoDNSSEC ":"",
|
||||||
rlame?"NoAuthButRecursive ":"", rtt))
|
rlame?"NoAuthButRecursive ":"", rtt, lost))
|
||||||
return;
|
return;
|
||||||
if(infra_host(worker->env.infra_cache, &a->addr, a->addrlen,
|
if(infra_host(worker->env.infra_cache, &a->addr, a->addrlen,
|
||||||
*worker->env.now, &edns_vs, &edns_lame_known, &to)) {
|
*worker->env.now, &edns_vs, &edns_lame_known, &to)) {
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,7 @@
|
||||||
|
17 August 2009: Wouter
|
||||||
|
- Fix so that servers are only blacklisted if they fail to reply
|
||||||
|
to 16 queries in a row and the timeout gets above 2 minutes.
|
||||||
|
|
||||||
14 August 2009: Wouter
|
14 August 2009: Wouter
|
||||||
- unbound-control lookup prints out infra cache information, like RTT.
|
- unbound-control lookup prints out infra cache information, like RTT.
|
||||||
- Fix bug in DLV lookup reported by Amanda from Secure64.
|
- Fix bug in DLV lookup reported by Amanda from Secure64.
|
||||||
|
|
|
||||||
|
|
@ -151,6 +151,8 @@ iter_apply_cfg(struct iter_env* iter_env, struct config_file* cfg)
|
||||||
* values 0 .. 49 are not used, unless that is changed.
|
* values 0 .. 49 are not used, unless that is changed.
|
||||||
* USEFUL_SERVER_TOP_TIMEOUT
|
* USEFUL_SERVER_TOP_TIMEOUT
|
||||||
* This value exactly is given for unresponsive blacklisted.
|
* This value exactly is given for unresponsive blacklisted.
|
||||||
|
* USEFUL_SERVER_TOP_TIMEOUT+1
|
||||||
|
* For non-blacklisted servers: huge timeout, but has traffic.
|
||||||
* USEFUL_SERVER_TOP_TIMEOUT ..
|
* USEFUL_SERVER_TOP_TIMEOUT ..
|
||||||
* dnsseclame servers get penalty
|
* dnsseclame servers get penalty
|
||||||
* USEFUL_SERVER_TOP_TIMEOUT*2 ..
|
* USEFUL_SERVER_TOP_TIMEOUT*2 ..
|
||||||
|
|
@ -170,7 +172,7 @@ iter_filter_unsuitable(struct iter_env* iter_env, struct module_env* env,
|
||||||
uint8_t* name, size_t namelen, uint16_t qtype, uint32_t now,
|
uint8_t* name, size_t namelen, uint16_t qtype, uint32_t now,
|
||||||
struct delegpt_addr* a)
|
struct delegpt_addr* a)
|
||||||
{
|
{
|
||||||
int rtt, lame, reclame, dnsseclame;
|
int rtt, lame, reclame, dnsseclame, lost;
|
||||||
if(a->bogus)
|
if(a->bogus)
|
||||||
return -1; /* address of server is bogus */
|
return -1; /* address of server is bogus */
|
||||||
if(donotq_lookup(iter_env->donotq, &a->addr, a->addrlen)) {
|
if(donotq_lookup(iter_env->donotq, &a->addr, a->addrlen)) {
|
||||||
|
|
@ -182,7 +184,7 @@ iter_filter_unsuitable(struct iter_env* iter_env, struct module_env* env,
|
||||||
/* check lameness - need zone , class info */
|
/* check lameness - need zone , class info */
|
||||||
if(infra_get_lame_rtt(env->infra_cache, &a->addr, a->addrlen,
|
if(infra_get_lame_rtt(env->infra_cache, &a->addr, a->addrlen,
|
||||||
name, namelen, qtype, &lame, &dnsseclame, &reclame,
|
name, namelen, qtype, &lame, &dnsseclame, &reclame,
|
||||||
&rtt, now)) {
|
&rtt, &lost, now)) {
|
||||||
log_addr(VERB_ALGO, "servselect", &a->addr, a->addrlen);
|
log_addr(VERB_ALGO, "servselect", &a->addr, a->addrlen);
|
||||||
verbose(VERB_ALGO, " rtt=%d%s%s%s", rtt,
|
verbose(VERB_ALGO, " rtt=%d%s%s%s", rtt,
|
||||||
lame?" LAME":"",
|
lame?" LAME":"",
|
||||||
|
|
@ -190,9 +192,12 @@ iter_filter_unsuitable(struct iter_env* iter_env, struct module_env* env,
|
||||||
reclame?" REC_LAME":"");
|
reclame?" REC_LAME":"");
|
||||||
if(lame)
|
if(lame)
|
||||||
return -1; /* server is lame */
|
return -1; /* server is lame */
|
||||||
else if(rtt >= USEFUL_SERVER_TOP_TIMEOUT)
|
else if(rtt >= USEFUL_SERVER_TOP_TIMEOUT &&
|
||||||
|
lost >= USEFUL_SERVER_MAX_LOST)
|
||||||
/* server is unresponsive */
|
/* server is unresponsive */
|
||||||
return USEFUL_SERVER_TOP_TIMEOUT;
|
return USEFUL_SERVER_TOP_TIMEOUT;
|
||||||
|
else if(rtt >= USEFUL_SERVER_TOP_TIMEOUT) /* not blacklisted*/
|
||||||
|
return USEFUL_SERVER_TOP_TIMEOUT+1;
|
||||||
else if(reclame)
|
else if(reclame)
|
||||||
return rtt+USEFUL_SERVER_TOP_TIMEOUT*2; /* nonpref */
|
return rtt+USEFUL_SERVER_TOP_TIMEOUT*2; /* nonpref */
|
||||||
else if(dnsseclame )
|
else if(dnsseclame )
|
||||||
|
|
|
||||||
|
|
@ -65,6 +65,10 @@ struct iter_priv;
|
||||||
* Equals RTT_MAX_TIMEOUT
|
* Equals RTT_MAX_TIMEOUT
|
||||||
*/
|
*/
|
||||||
#define USEFUL_SERVER_TOP_TIMEOUT 120000
|
#define USEFUL_SERVER_TOP_TIMEOUT 120000
|
||||||
|
/** Number of lost messages in a row that get a host blacklisted.
|
||||||
|
* With 16, a couple different queries have to time out and no working
|
||||||
|
* queries are happening */
|
||||||
|
#define USEFUL_SERVER_MAX_LOST 16
|
||||||
/** number of retries on outgoing queries */
|
/** number of retries on outgoing queries */
|
||||||
#define OUTBOUND_MSG_RETRY 5
|
#define OUTBOUND_MSG_RETRY 5
|
||||||
/** RTT band, within this amount from the best, servers are chosen randomly.
|
/** RTT band, within this amount from the best, servers are chosen randomly.
|
||||||
|
|
|
||||||
14
services/cache/infra.c
vendored
14
services/cache/infra.c
vendored
|
|
@ -219,6 +219,7 @@ new_host_entry(struct infra_cache* infra, struct sockaddr_storage* addr,
|
||||||
data->lameness = NULL;
|
data->lameness = NULL;
|
||||||
data->edns_version = 0;
|
data->edns_version = 0;
|
||||||
data->edns_lame_known = 0;
|
data->edns_lame_known = 0;
|
||||||
|
data->num_timeouts = 0;
|
||||||
rtt_init(&data->rtt);
|
rtt_init(&data->rtt);
|
||||||
return &key->entry;
|
return &key->entry;
|
||||||
}
|
}
|
||||||
|
|
@ -471,9 +472,14 @@ infra_rtt_update(struct infra_cache* infra,
|
||||||
/* have an entry, update the rtt, and the ttl */
|
/* have an entry, update the rtt, and the ttl */
|
||||||
data = (struct infra_host_data*)e->data;
|
data = (struct infra_host_data*)e->data;
|
||||||
data->ttl = timenow + infra->host_ttl;
|
data->ttl = timenow + infra->host_ttl;
|
||||||
if(roundtrip == -1)
|
if(roundtrip == -1) {
|
||||||
rtt_lost(&data->rtt, orig_rtt);
|
rtt_lost(&data->rtt, orig_rtt);
|
||||||
else rtt_update(&data->rtt, roundtrip);
|
if(data->num_timeouts<255)
|
||||||
|
data->num_timeouts++;
|
||||||
|
} else {
|
||||||
|
rtt_update(&data->rtt, roundtrip);
|
||||||
|
data->num_timeouts = 0;
|
||||||
|
}
|
||||||
if(data->rtt.rto > 0)
|
if(data->rtt.rto > 0)
|
||||||
rto = data->rtt.rto;
|
rto = data->rtt.rto;
|
||||||
|
|
||||||
|
|
@ -513,7 +519,8 @@ int
|
||||||
infra_get_lame_rtt(struct infra_cache* infra,
|
infra_get_lame_rtt(struct infra_cache* infra,
|
||||||
struct sockaddr_storage* addr, socklen_t addrlen,
|
struct sockaddr_storage* addr, socklen_t addrlen,
|
||||||
uint8_t* name, size_t namelen, uint16_t qtype,
|
uint8_t* name, size_t namelen, uint16_t qtype,
|
||||||
int* lame, int* dnsseclame, int* reclame, int* rtt, uint32_t timenow)
|
int* lame, int* dnsseclame, int* reclame, int* rtt, int* lost,
|
||||||
|
uint32_t timenow)
|
||||||
{
|
{
|
||||||
struct infra_host_data* host;
|
struct infra_host_data* host;
|
||||||
struct lruhash_entry* e = infra_lookup_host_nottl(infra, addr,
|
struct lruhash_entry* e = infra_lookup_host_nottl(infra, addr,
|
||||||
|
|
@ -523,6 +530,7 @@ infra_get_lame_rtt(struct infra_cache* infra,
|
||||||
return 0;
|
return 0;
|
||||||
host = (struct infra_host_data*)e->data;
|
host = (struct infra_host_data*)e->data;
|
||||||
*rtt = rtt_unclamped(&host->rtt);
|
*rtt = rtt_unclamped(&host->rtt);
|
||||||
|
*lost = (int)host->num_timeouts;
|
||||||
/* check lameness first, if so, ttl on host does not matter anymore */
|
/* check lameness first, if so, ttl on host does not matter anymore */
|
||||||
if(infra_lookup_lame(host, name, namelen, timenow,
|
if(infra_lookup_lame(host, name, namelen, timenow,
|
||||||
&dlm, &rlm, &alm, &olm)) {
|
&dlm, &rlm, &alm, &olm)) {
|
||||||
|
|
|
||||||
7
services/cache/infra.h
vendored
7
services/cache/infra.h
vendored
|
|
@ -74,6 +74,8 @@ struct infra_host_data {
|
||||||
* EDNS lame is when EDNS queries or replies are dropped,
|
* EDNS lame is when EDNS queries or replies are dropped,
|
||||||
* and cause a timeout */
|
* and cause a timeout */
|
||||||
uint8_t edns_lame_known;
|
uint8_t edns_lame_known;
|
||||||
|
/** Number of consequtive timeouts; reset when reply arrives OK. */
|
||||||
|
uint8_t num_timeouts;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -270,13 +272,16 @@ int infra_edns_update(struct infra_cache* infra,
|
||||||
* @param reclame: if function returns true, this is if it is recursion lame.
|
* @param reclame: if function returns true, this is if it is recursion lame.
|
||||||
* @param rtt: if function returns true, this returns avg rtt of the server.
|
* @param rtt: if function returns true, this returns avg rtt of the server.
|
||||||
* The rtt value is unclamped and reflects recent timeouts.
|
* The rtt value is unclamped and reflects recent timeouts.
|
||||||
|
* @param lost: number of queries lost in a row. Reset to 0 when an answer
|
||||||
|
* gets back. Gives a connectivity number.
|
||||||
* @param timenow: what time it is now.
|
* @param timenow: what time it is now.
|
||||||
* @return if found in cache, or false if not (or TTL bad).
|
* @return if found in cache, or false if not (or TTL bad).
|
||||||
*/
|
*/
|
||||||
int infra_get_lame_rtt(struct infra_cache* infra,
|
int infra_get_lame_rtt(struct infra_cache* infra,
|
||||||
struct sockaddr_storage* addr, socklen_t addrlen,
|
struct sockaddr_storage* addr, socklen_t addrlen,
|
||||||
uint8_t* name, size_t namelen, uint16_t qtype,
|
uint8_t* name, size_t namelen, uint16_t qtype,
|
||||||
int* lame, int* dnsseclame, int* reclame, int* rtt, uint32_t timenow);
|
int* lame, int* dnsseclame, int* reclame, int* rtt, int* lost,
|
||||||
|
uint32_t timenow);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get memory used by the infra cache.
|
* Get memory used by the infra cache.
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue