- Fix that auth zone fails over to next master for timeout in tcp.

git-svn-id: file:///svn/unbound/trunk@5155 be551aaa-1e26-0410-a405-d3ace91eadb9
This commit is contained in:
Wouter Wijngaards 2019-04-11 13:41:53 +00:00
parent 474afc9016
commit c6369e9ffa
5 changed files with 105 additions and 31 deletions

View file

@ -2,6 +2,7 @@
- Fix that auth zone uses correct network type for sockets for
SOA serial probes. This fixes that probes fail because earlier
probe addresses are unreachable.
- Fix that auth zone fails over to next master for timeout in tcp.
8 April 2019: Wouter
- Fix to use event_assign with libevent for thread-safety.

View file

@ -2042,11 +2042,13 @@ auth_xfer_delete(struct auth_xfer* xfr)
if(xfr->task_probe) {
auth_free_masters(xfr->task_probe->masters);
comm_point_delete(xfr->task_probe->cp);
comm_timer_delete(xfr->task_probe->timer);
free(xfr->task_probe);
}
if(xfr->task_transfer) {
auth_free_masters(xfr->task_transfer->masters);
comm_point_delete(xfr->task_transfer->cp);
comm_timer_delete(xfr->task_transfer->timer);
if(xfr->task_transfer->chunks_first) {
auth_chunks_delete(xfr->task_transfer);
}
@ -4973,6 +4975,9 @@ xfr_process_chunk_list(struct auth_xfer* xfr, struct module_env* env,
static void
xfr_transfer_disown(struct auth_xfer* xfr)
{
/* remove timer (from this worker's event base) */
comm_timer_delete(xfr->task_transfer->timer);
xfr->task_transfer->timer = NULL;
/* remove the commpoint */
comm_point_delete(xfr->task_transfer->cp);
xfr->task_transfer->cp = NULL;
@ -5054,6 +5059,8 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env)
struct sockaddr_storage addr;
socklen_t addrlen = 0;
struct auth_master* master = xfr->task_transfer->master;
struct timeval t;
int timeout;
if(!master) return 0;
if(master->allow_notify) return 0; /* only for notify */
@ -5079,17 +5086,31 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env)
comm_point_delete(xfr->task_transfer->cp);
xfr->task_transfer->cp = NULL;
}
if(!xfr->task_transfer->timer) {
xfr->task_transfer->timer = comm_timer_create(env->worker_base,
auth_xfer_transfer_timer_callback, xfr);
if(!xfr->task_transfer->timer) {
log_err("malloc failure");
return 0;
}
}
timeout = AUTH_TRANSFER_TIMEOUT;
#ifndef S_SPLINT_S
t.tv_sec = timeout/1000;
t.tv_usec = (timeout%1000)*1000;
#endif
if(master->http) {
/* perform http fetch */
/* store http port number into sockaddr,
* unless someone used unbound's host@port notation */
xfr->task_transfer->on_ixfr = 0;
if(strchr(master->host, '@') == NULL)
sockaddr_store_port(&addr, addrlen, master->port);
xfr->task_transfer->cp = outnet_comm_point_for_http(
env->outnet, auth_xfer_transfer_http_callback, xfr,
&addr, addrlen, AUTH_TRANSFER_TIMEOUT, master->ssl,
master->host, master->file);
&addr, addrlen, -1, master->ssl, master->host,
master->file);
if(!xfr->task_transfer->cp) {
char zname[255+1], as[256];
dname_str(xfr->name, zname);
@ -5098,6 +5119,7 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env)
"connection for %s to %s", zname, as);
return 0;
}
comm_timer_set(xfr->task_transfer->timer, &t);
if(verbosity >= VERB_ALGO) {
char zname[255+1], as[256];
dname_str(xfr->name, zname);
@ -5117,7 +5139,7 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env)
/* connect on fd */
xfr->task_transfer->cp = outnet_comm_point_for_tcp(env->outnet,
auth_xfer_transfer_tcp_callback, xfr, &addr, addrlen,
env->scratch_buffer, AUTH_TRANSFER_TIMEOUT);
env->scratch_buffer, -1);
if(!xfr->task_transfer->cp) {
char zname[255+1], as[256];
dname_str(xfr->name, zname);
@ -5126,6 +5148,7 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env)
"xfr %s to %s", zname, as);
return 0;
}
comm_timer_set(xfr->task_transfer->timer, &t);
if(verbosity >= VERB_ALGO) {
char zname[255+1], as[256];
dname_str(xfr->name, zname);
@ -5678,6 +5701,47 @@ process_list_end_transfer(struct auth_xfer* xfr, struct module_env* env)
xfr_transfer_nexttarget_or_end(xfr, env);
}
/** callback for the task_transfer timer */
void
auth_xfer_transfer_timer_callback(void* arg)
{
struct auth_xfer* xfr = (struct auth_xfer*)arg;
struct module_env* env;
int gonextonfail = 1;
log_assert(xfr->task_transfer);
lock_basic_lock(&xfr->lock);
env = xfr->task_transfer->env;
if(env->outnet->want_to_quit) {
lock_basic_unlock(&xfr->lock);
return; /* stop on quit */
}
verbose(VERB_ALGO, "xfr stopped, connection timeout to %s",
xfr->task_transfer->master->host);
/* see if IXFR caused the failure, if so, try AXFR */
if(xfr->task_transfer->on_ixfr) {
xfr->task_transfer->ixfr_possible_timeout_count++;
if(xfr->task_transfer->ixfr_possible_timeout_count >=
NUM_TIMEOUTS_FALLBACK_IXFR) {
verbose(VERB_ALGO, "xfr to %s, fallback "
"from IXFR to AXFR (because of timeouts)",
xfr->task_transfer->master->host);
xfr->task_transfer->ixfr_fail = 1;
gonextonfail = 0;
}
}
/* delete transferred data from list */
auth_chunks_delete(xfr->task_transfer);
comm_point_delete(xfr->task_transfer->cp);
xfr->task_transfer->cp = NULL;
if(gonextonfail)
xfr_transfer_nextmaster(xfr);
xfr_transfer_nexttarget_or_end(xfr, env);
return;
}
/** callback for task_transfer tcp connections */
int
auth_xfer_transfer_tcp_callback(struct comm_point* c, void* arg, int err,
@ -5694,6 +5758,8 @@ auth_xfer_transfer_tcp_callback(struct comm_point* c, void* arg, int err,
lock_basic_unlock(&xfr->lock);
return 0; /* stop on quit */
}
/* stop the timer */
comm_timer_disable(xfr->task_transfer->timer);
if(err != NETEVENT_NOERROR) {
/* connection failed, closed, or timeout */
@ -5774,6 +5840,8 @@ auth_xfer_transfer_http_callback(struct comm_point* c, void* arg, int err,
return 0; /* stop on quit */
}
verbose(VERB_ALGO, "auth zone transfer http callback");
/* stop the timer */
comm_timer_disable(xfr->task_transfer->timer);
if(err != NETEVENT_NOERROR && err != NETEVENT_DONE) {
/* connection failed, closed, or timeout */
@ -5973,13 +6041,12 @@ auth_xfer_probe_timer_callback(void* arg)
return; /* stop on quit */
}
if(verbosity >= VERB_ALGO) {
char zname[255+1];
dname_str(xfr->name, zname);
verbose(VERB_ALGO, "auth zone %s soa probe timeout", zname);
}
if(xfr->task_probe->timeout <= AUTH_PROBE_TIMEOUT_STOP) {
if(verbosity >= VERB_ALGO) {
char zname[255+1];
dname_str(xfr->name, zname);
verbose(VERB_ALGO, "auth zone %s soa probe timeout",
zname);
}
/* try again with bigger timeout */
if(xfr_probe_send_probe(xfr, env, xfr->task_probe->timeout*2)) {
lock_basic_unlock(&xfr->lock);

View file

@ -400,6 +400,9 @@ struct auth_transfer {
/** the transfer (TCP) to the master.
* on the workers event base. */
struct comm_point* cp;
/** timeout for the transfer.
* on the workers event base. */
struct comm_timer* timer;
};
/** list of addresses */
@ -649,6 +652,8 @@ int auth_xfer_transfer_http_callback(struct comm_point* c, void* arg, int err,
struct comm_reply* repinfo);
/** xfer probe timeout callback, part of task_probe */
void auth_xfer_probe_timer_callback(void* arg);
/** xfer transfer timeout callback, part of task_transfer */
void auth_xfer_transfer_timer_callback(void* arg);
/** mesh callback for task_probe on lookup of host names */
void auth_xfer_probe_lookup_callback(void* arg, int rcode,
struct sldns_buffer* buf, enum sec_status sec, char* why_bogus,

View file

@ -127,6 +127,7 @@ fptr_whitelist_comm_timer(void (*fptr)(void*))
#endif
else if(fptr == &auth_xfer_timer) return 1;
else if(fptr == &auth_xfer_probe_timer_callback) return 1;
else if(fptr == &auth_xfer_transfer_timer_callback) return 1;
return 0;
}

View file

@ -1746,6 +1746,16 @@ comm_point_tcp_handle_callback(int fd, short event, void* arg)
}
#endif
if(event&UB_EV_TIMEOUT) {
verbose(VERB_QUERY, "tcp took too long, dropped");
reclaim_tcp_handler(c);
if(!c->tcp_do_close) {
fptr_ok(fptr_whitelist_comm_point(c->callback));
(void)(*c->callback)(c, c->cb_arg,
NETEVENT_TIMEOUT, NULL);
}
return;
}
if(event&UB_EV_READ) {
int has_tcpq = (c->tcp_req_info != NULL);
if(!comm_point_tcp_handle_read(fd, c, 0)) {
@ -1776,16 +1786,6 @@ comm_point_tcp_handle_callback(int fd, short event, void* arg)
tcp_req_info_read_again(fd, c);
return;
}
if(event&UB_EV_TIMEOUT) {
verbose(VERB_QUERY, "tcp took too long, dropped");
reclaim_tcp_handler(c);
if(!c->tcp_do_close) {
fptr_ok(fptr_whitelist_comm_point(c->callback));
(void)(*c->callback)(c, c->cb_arg,
NETEVENT_TIMEOUT, NULL);
}
return;
}
log_err("Ignored event %d for tcphdl.", event);
}
@ -2390,6 +2390,16 @@ comm_point_http_handle_callback(int fd, short event, void* arg)
log_assert(c->type == comm_http);
ub_comm_base_now(c->ev->base);
if(event&UB_EV_TIMEOUT) {
verbose(VERB_QUERY, "http took too long, dropped");
reclaim_http_handler(c);
if(!c->tcp_do_close) {
fptr_ok(fptr_whitelist_comm_point(c->callback));
(void)(*c->callback)(c, c->cb_arg,
NETEVENT_TIMEOUT, NULL);
}
return;
}
if(event&UB_EV_READ) {
if(!comm_point_http_handle_read(fd, c)) {
reclaim_http_handler(c);
@ -2414,16 +2424,6 @@ comm_point_http_handle_callback(int fd, short event, void* arg)
}
return;
}
if(event&UB_EV_TIMEOUT) {
verbose(VERB_QUERY, "http took too long, dropped");
reclaim_http_handler(c);
if(!c->tcp_do_close) {
fptr_ok(fptr_whitelist_comm_point(c->callback));
(void)(*c->callback)(c, c->cb_arg,
NETEVENT_TIMEOUT, NULL);
}
return;
}
log_err("Ignored event %d for httphdl.", event);
}
@ -3146,8 +3146,8 @@ comm_point_stop_listening(struct comm_point* c)
void
comm_point_start_listening(struct comm_point* c, int newfd, int msec)
{
verbose(VERB_ALGO, "comm point start listening %d",
c->fd==-1?newfd:c->fd);
verbose(VERB_ALGO, "comm point start listening %d (%d msec)",
c->fd==-1?newfd:c->fd, msec);
if(c->type == comm_tcp_accept && !c->tcp_free) {
/* no use to start listening no free slots. */
return;