- Fix bug where fallback_tcp causes wrong roundtrip and edns

observation to be noted in cache.  Fix bug where EDNSprobe halted
  exponential backoff if EDNS status unknown.
- new unresponsive host method, exponentially increasing block backoff.
- iana portlist updated.


git-svn-id: file:///svn/unbound/trunk@2303 be551aaa-1e26-0410-a405-d3ace91eadb9
This commit is contained in:
Wouter Wijngaards 2010-10-21 15:11:39 +00:00
parent 25a2e1da5a
commit 2bdb094f7b
10 changed files with 121 additions and 28 deletions

View file

@ -803,6 +803,7 @@ print_dp_details(SSL* ssl, struct worker* worker, struct delegpt* dp)
char buf[257];
struct delegpt_addr* a;
int lame, dlame, rlame, rtt, edns_vs, to, lost;
int entry_ttl, clean_rtt, backoff;
uint8_t edns_lame_known;
for(a = dp->target_list; a; a = a->next_target) {
addr_to_str(&a->addr, a->addrlen, buf, sizeof(buf));
@ -813,6 +814,20 @@ print_dp_details(SSL* ssl, struct worker* worker, struct delegpt* dp)
return;
}
/* lookup in infra cache */
entry_ttl = infra_get_host_rto(worker->env.infra_cache,
&a->addr, a->addrlen, &clean_rtt, &rtt, &backoff,
*worker->env.now);
if(entry_ttl == -1) {
if(!ssl_printf(ssl, "not in infra cache.\n"))
return;
continue; /* skip stuff not in infra cache */
} else if(entry_ttl == -2) {
if(!ssl_printf(ssl, "not in infra cache "
"(backoff %d).\n", backoff))
return;
continue; /* skip stuff not in infra cache */
}
/* uses type_A because most often looked up, but other
* lameness won't be reported then */
if(!infra_get_lame_rtt(worker->env.infra_cache,
@ -823,20 +838,28 @@ print_dp_details(SSL* ssl, struct worker* worker, struct delegpt* dp)
return;
continue; /* skip stuff not in infra cache */
}
if(!ssl_printf(ssl, "%s%s%s%srtt %d msec, %d lost. ",
if(!ssl_printf(ssl, "%s%s%s%srtt %d msec, %d lost, ttl %d",
lame?"LAME ":"", dlame?"NoDNSSEC ":"",
a->lame?"AddrWasParentSide ":"",
rlame?"NoAuthButRecursive ":"", rtt, lost))
rlame?"NoAuthButRecursive ":"", rtt, lost, entry_ttl))
return;
if(rtt != clean_rtt && clean_rtt != 376 /* unknown */) {
if(!ssl_printf(ssl, ", ping %d", clean_rtt))
return;
}
if(backoff != INFRA_BACKOFF_INITIAL) {
if(!ssl_printf(ssl, ", backoff %d", backoff))
return;
}
if(infra_host(worker->env.infra_cache, &a->addr, a->addrlen,
*worker->env.now, &edns_vs, &edns_lame_known, &to)) {
if(edns_vs == -1) {
if(!ssl_printf(ssl, "noEDNS%s.",
edns_lame_known?" probed":""))
if(!ssl_printf(ssl, ", noEDNS%s.",
edns_lame_known?" probed":" assumed"))
return;
} else {
if(!ssl_printf(ssl, "EDNS %d%s.",
edns_vs, edns_lame_known?" probed":""))
if(!ssl_printf(ssl, ", EDNS %d%s.", edns_vs,
edns_lame_known?" probed":" assumed"))
return;
}
}

View file

@ -1,3 +1,10 @@
21 October 2010: Wouter
- Fix bug where fallback_tcp causes wrong roundtrip and edns
observation to be noted in cache. Fix bug where EDNSprobe halted
exponential backoff if EDNS status unknown.
- new unresponsive host method, exponentially increasing block backoff.
- iana portlist updated.
20 October 2010: Wouter
- interface automatic works for some people with ip6 disabled.
Therefore the error check is removed, so they can use the option.

View file

@ -206,11 +206,9 @@ iter_filter_unsuitable(struct iter_env* iter_env, struct module_env* env,
a->lame?" ADDR_LAME":"");
if(lame)
return -1; /* server is lame */
else if(rtt >= USEFUL_SERVER_TOP_TIMEOUT &&
lost >= USEFUL_SERVER_MAX_LOST) {
else if(rtt >= USEFUL_SERVER_TOP_TIMEOUT)
/* server is unresponsive */
return USEFUL_SERVER_TOP_TIMEOUT;
}
/* select remainder from worst to best */
else if(reclame)
return rtt+USEFUL_SERVER_TOP_TIMEOUT*3; /* nonpref */
@ -218,8 +216,6 @@ iter_filter_unsuitable(struct iter_env* iter_env, struct module_env* env,
return rtt+USEFUL_SERVER_TOP_TIMEOUT*2; /* nonpref */
else if(a->lame)
return rtt+USEFUL_SERVER_TOP_TIMEOUT+1; /* nonpref */
else if(rtt >= USEFUL_SERVER_TOP_TIMEOUT) /* not blacklisted*/
return USEFUL_SERVER_TOP_TIMEOUT+1;
else return rtt;
}
/* no server information present */

View file

@ -75,6 +75,8 @@ struct iter_priv;
* Chosen so that the UNKNOWN_SERVER_NICENESS falls within the band of a
* fast server, this causes server exploration as a side benefit. msec. */
#define RTT_BAND 400
/** Start value for blacklisting a host, 2*USEFUL_SERVER_TOP_TIMEOUT in sec */
#define INFRA_BACKOFF_INITIAL 240
/**
* Global state for the iterator.

View file

@ -190,7 +190,7 @@ infra_lookup_host(struct infra_cache* infra,
return data;
}
/** init the host elements (not lame elems) */
/** init the host elements (not lame elems, not backoff) */
static void
host_entry_init(struct infra_cache* infra, struct lruhash_entry* e,
uint32_t timenow)
@ -233,6 +233,7 @@ new_host_entry(struct infra_cache* infra, struct sockaddr_storage* addr,
key->addrlen = addrlen;
memcpy(&key->addr, addr, addrlen);
data->lameness = NULL;
data->backoff = INFRA_BACKOFF_INITIAL;
host_entry_init(infra, &key->entry, tm);
return &key->entry;
}
@ -270,14 +271,6 @@ infra_host(struct infra_cache* infra, struct sockaddr_storage* addr,
/* use existing entry */
data = (struct infra_host_data*)e->data;
*to = rtt_timeout(&data->rtt);
if(*to >= USEFUL_SERVER_TOP_TIMEOUT &&
data->num_timeouts < USEFUL_SERVER_MAX_LOST)
/* use smaller timeout, backoff does not work
* The server seems to still reply but sporadically.
* Perhaps it has rate-limited the traffic, or it
* drops particular queries (AAAA). ignore timeouts,
* and use the jostle timeout for rtt estimate. */
*to = (int)infra->jostle;
*edns_vs = data->edns_version;
*edns_lame_known = data->edns_lame_known;
lock_rw_unlock(&e->lock);
@ -491,11 +484,29 @@ infra_rtt_update(struct infra_cache* infra,
/* have an entry, update the rtt */
data = (struct infra_host_data*)e->data;
if(roundtrip == -1) {
int o = rtt_timeout(&data->rtt);
rtt_lost(&data->rtt, orig_rtt);
if(rtt_timeout(&data->rtt) >= USEFUL_SERVER_TOP_TIMEOUT
&& o < USEFUL_SERVER_TOP_TIMEOUT) {
/* backoff the blacklisted timeout */
log_addr(VERB_ALGO, "backoff for", addr, addrlen);
data->backoff *= 2;
if(data->backoff >= 24*3600)
data->backoff = 24*3600;
verbose(VERB_ALGO, "backoff to %d", data->backoff);
/* increase the infra item TTL */
data->ttl = timenow + data->backoff;
}
if(data->num_timeouts<255)
data->num_timeouts++;
} else {
rtt_update(&data->rtt, roundtrip);
/* un-backoff the element */
if(data->backoff > (uint32_t)infra->host_ttl*2)
data->backoff = (uint32_t)infra->host_ttl*2;
else data->backoff = INFRA_BACKOFF_INITIAL;
data->num_timeouts = 0;
}
if(data->rtt.rto > 0)
@ -507,6 +518,26 @@ infra_rtt_update(struct infra_cache* infra,
return rto;
}
int infra_get_host_rto(struct infra_cache* infra,
struct sockaddr_storage* addr, socklen_t addrlen,
int* rtt, int* rto, int* backoff, uint32_t timenow)
{
struct lruhash_entry* e = infra_lookup_host_nottl(infra, addr,
addrlen, 0);
struct infra_host_data* data;
int ttl = -2;
if(!e) return -1;
data = (struct infra_host_data*)e->data;
*backoff = (int)data->backoff;
if(data->ttl >= timenow) {
ttl = (int)(data->ttl - timenow);
*rtt = rtt_notimeout(&data->rtt);
*rto = rtt_unclamped(&data->rtt);
}
lock_rw_unlock(&e->lock);
return ttl;
}
int
infra_edns_update(struct infra_cache* infra,
struct sockaddr_storage* addr, socklen_t addrlen,

View file

@ -64,6 +64,8 @@ struct infra_host_key {
struct infra_host_data {
/** TTL value for this entry. absolute time. */
uint32_t ttl;
/** backoff time if blacklisted unresponsive. in seconds. */
uint32_t backoff;
/** round trip times for timeout calculation */
struct rtt_info rtt;
/** Names of the zones that are lame. NULL=no lame zones. */
@ -286,6 +288,22 @@ int infra_get_lame_rtt(struct infra_cache* infra,
int* lame, int* dnsseclame, int* reclame, int* rtt, int* lost,
uint32_t timenow);
/**
* Get additional (debug) info on timing.
* @param infra: infra cache.
* @param addr: host address.
* @param addrlen: length of addr.
* @param rtt: the clean rtt time (of working replies).
* @param rto: the rtt with timeouts applied. (rtt as returned by other funcs).
* @param backoff: the backoff time for blacked entries.
* @param timenow: what time it is now.
* @return TTL the infra host element is valid for. If -1: not found in cache.
* If -2: found in cache, but TTL was not valid, only backoff is filled.
*/
int infra_get_host_rto(struct infra_cache* infra,
struct sockaddr_storage* addr, socklen_t addrlen,
int* rtt, int* rto, int* backoff, uint32_t timenow);
/**
* Get memory used by the infra cache.
* @param infra: infrastructure cache.

View file

@ -1305,6 +1305,7 @@ serviced_udp_send(struct serviced_query* sq, ldns_buffer* buff)
if(!infra_host(sq->outnet->infra, &sq->addr, sq->addrlen, now, &vs,
&edns_lame_known, &rtt))
return 0;
sq->last_rtt = rtt;
if(sq->status == serviced_initial) {
if(edns_lame_known == 0 && rtt > 5000 && rtt < 10001) {
/* perform EDNS lame probe - check if server is
@ -1321,7 +1322,6 @@ serviced_udp_send(struct serviced_query* sq, ldns_buffer* buff)
}
serviced_encode(sq, buff, sq->status == serviced_query_UDP_EDNS);
sq->last_sent_time = *sq->outnet->now_tv;
sq->last_rtt = rtt;
sq->edns_lame_known = (int)edns_lame_known;
verbose(VERB_ALGO, "serviced query UDP timeout=%d msec", rtt);
sq->pending = pending_udp_query(sq->outnet, buff, &sq->addr,
@ -1584,7 +1584,8 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error,
serviced_callbacks(sq, error, c, rep);
return 0;
}
if(sq->status == serviced_query_UDP_EDNS
if(!fallback_tcp) {
if(sq->status == serviced_query_UDP_EDNS
&& (LDNS_RCODE_WIRE(ldns_buffer_begin(c->buffer))
== LDNS_RCODE_FORMERR || LDNS_RCODE_WIRE(
ldns_buffer_begin(c->buffer)) == LDNS_RCODE_NOTIMPL)) {
@ -1595,7 +1596,7 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error,
serviced_callbacks(sq, NETEVENT_CLOSED, c, rep);
}
return 0;
} else if(sq->status == serviced_query_PROBE_EDNS) {
} else if(sq->status == serviced_query_PROBE_EDNS) {
/* probe without EDNS succeeds, so we conclude that this
* host likely has EDNS packets dropped */
log_addr(VERB_DETAIL, "timeouts, concluded that connection to "
@ -1607,7 +1608,7 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error,
log_err("Out of memory caching no edns for host");
}
sq->status = serviced_query_UDP;
} else if(sq->status == serviced_query_UDP_EDNS &&
} else if(sq->status == serviced_query_UDP_EDNS &&
!sq->edns_lame_known) {
/* now we know that edns queries received answers store that */
if(!infra_edns_update(outnet->infra, &sq->addr, sq->addrlen,
@ -1615,7 +1616,7 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error,
log_err("Out of memory caching edns works");
}
sq->edns_lame_known = 1;
} else if(sq->status == serviced_query_UDP_EDNS_fallback &&
} else if(sq->status == serviced_query_UDP_EDNS_fallback &&
!sq->edns_lame_known && (LDNS_RCODE_WIRE(
ldns_buffer_begin(c->buffer)) == LDNS_RCODE_NOERROR ||
LDNS_RCODE_WIRE(ldns_buffer_begin(c->buffer)) ==
@ -1630,8 +1631,8 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error,
log_err("Out of memory caching no edns for host");
}
sq->status = serviced_query_UDP;
}
if(now.tv_sec > sq->last_sent_time.tv_sec ||
}
if(now.tv_sec > sq->last_sent_time.tv_sec ||
(now.tv_sec == sq->last_sent_time.tv_sec &&
now.tv_usec > sq->last_sent_time.tv_usec)) {
/* convert from microseconds to milliseconds */
@ -1642,7 +1643,8 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error,
if(!infra_rtt_update(outnet->infra, &sq->addr, sq->addrlen,
roundtime, sq->last_rtt, (uint32_t)now.tv_sec))
log_err("out of memory noting rtt.");
}
}
} /* end of if_!fallback_tcp */
/* perform TC flag check and TCP fallback after updating our
* cache entries for EDNS status and RTT times */
if(LDNS_TC_WIRE(ldns_buffer_begin(c->buffer)) || fallback_tcp) {

View file

@ -4079,6 +4079,8 @@
5100,
5101,
5102,
5104,
5105,
5111,
5112,
5113,

View file

@ -109,3 +109,8 @@ rtt_lost(struct rtt_info* rtt, int orig)
rtt->rto = RTT_MAX_TIMEOUT;
}
}
int rtt_notimeout(const struct rtt_info* rtt)
{
return calc_rto(rtt);
}

View file

@ -81,6 +81,13 @@ int rtt_timeout(const struct rtt_info* rtt);
*/
int rtt_unclamped(const struct rtt_info* rtt);
/**
* RTT for valid responses. Without timeouts.
* @param rtt: round trip statistics structure.
* @return: value in msec.
*/
int rtt_notimeout(const struct rtt_info* rtt);
/**
* Update the statistics with a new roundtrip estimate observation.
* @param rtt: round trip statistics structure.