Merge pull request #612 from NLnetLabs/tcp-race-condition

TCP race condition
This commit is contained in:
gthess 2022-01-25 17:26:30 +01:00 committed by GitHub
commit ddc3c754b0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 123 additions and 27 deletions

View file

@ -94,6 +94,16 @@ static void waiting_list_remove(struct outside_network* outnet,
static uint16_t tcp_select_id(struct outside_network* outnet,
struct reuse_tcp* reuse);
/** Perform serviced query UDP sending operation */
static int serviced_udp_send(struct serviced_query* sq, sldns_buffer* buff);
/** Send serviced query over TCP return false on initial failure */
static int serviced_tcp_send(struct serviced_query* sq, sldns_buffer* buff);
/** call the callbacks for a serviced query */
static void serviced_callbacks(struct serviced_query* sq, int error,
struct comm_point* c, struct comm_reply* rep);
int
pending_cmp(const void* key1, const void* key2)
{
@ -836,6 +846,7 @@ outnet_add_tcp_waiting_first(struct outside_network* outnet,
if(w->on_tcp_waiting_list)
return;
w->next_waiting = outnet->tcp_wait_first;
log_assert(w->next_waiting != w);
if(!outnet->tcp_wait_last)
outnet->tcp_wait_last = w;
outnet->tcp_wait_first = w;
@ -1136,6 +1147,22 @@ static void reuse_cb_readwait_for_failure(rbtree_type* tree_by_id, int err)
}
}
/** mark the entry for being in the cb_and_decommission stage */
static void mark_for_cb_and_decommission(rbnode_type* node,
void* ATTR_UNUSED(arg))
{
struct waiting_tcp* w = (struct waiting_tcp*)node->key;
/* Mark the waiting_tcp to signal later code (serviced_delete) that
* this item is part of the backed up tree_by_id and will be deleted
* later. */
w->in_cb_and_decommission = 1;
/* Mark the serviced_query for deletion so that later code through
* callbacks (iter_clear .. outnet_serviced_query_stop) won't
* prematurely delete it. */
if(w->cb)
((struct serviced_query*)w->cb_arg)->to_be_deleted = 1;
}
/** perform callbacks for failure and also decommission pending tcp.
* the callbacks remove references in sq->pending to the waiting_tcp
* members of the tree_by_id in the pending tcp. The pending_tcp is
@ -1151,6 +1178,9 @@ static void reuse_cb_and_decommission(struct outside_network* outnet,
pend->reuse.write_wait_first = NULL;
pend->reuse.write_wait_last = NULL;
decommission_pending_tcp(outnet, pend);
if(store.root != NULL && store.root != RBTREE_NULL) {
traverse_postorder(&store, &mark_for_cb_and_decommission, NULL);
}
reuse_cb_readwait_for_failure(&store, error);
reuse_del_readwait(&store);
}
@ -1271,6 +1301,8 @@ outnet_tcp_cb(struct comm_point* c, void* arg, int error,
}
}
if(w) {
log_assert(!w->on_tcp_waiting_list);
log_assert(!w->write_wait_queued);
reuse_tree_by_id_delete(&pend->reuse, w);
verbose(VERB_CLIENT, "outnet tcp callback query err %d buflen %d",
error, (int)sldns_buffer_limit(c->buffer));
@ -1330,7 +1362,7 @@ outnet_send_wait_udp(struct outside_network* outnet)
{
struct pending* pend;
/* process waiting queries */
while(outnet->udp_wait_first && outnet->unused_fds
while(outnet->udp_wait_first && outnet->unused_fds
&& !outnet->want_to_quit) {
pend = outnet->udp_wait_first;
outnet->udp_wait_first = pend->next_waiting;
@ -1339,8 +1371,10 @@ outnet_send_wait_udp(struct outside_network* outnet)
sldns_buffer_write(outnet->udp_buff, pend->pkt, pend->pkt_len);
sldns_buffer_flip(outnet->udp_buff);
free(pend->pkt); /* freeing now makes get_mem correct */
pend->pkt = NULL;
pend->pkt = NULL;
pend->pkt_len = 0;
log_assert(!pend->sq->busy);
pend->sq->busy = 1;
if(!randomize_and_send_udp(pend, outnet->udp_buff,
pend->timeout)) {
/* callback error on pending */
@ -1351,6 +1385,7 @@ outnet_send_wait_udp(struct outside_network* outnet)
}
pending_delete(outnet, pend);
}
pend->sq->busy = 0;
}
}
@ -1460,7 +1495,6 @@ calc_num46(char** ifs, int num_ifs, int do_ip4, int do_ip6,
(*num_ip4)++;
}
}
}
void
@ -1715,6 +1749,8 @@ serviced_node_del(rbnode_type* node, void* ATTR_UNUSED(arg))
{
struct serviced_query* sq = (struct serviced_query*)node;
alloc_reg_release(sq->alloc, sq->region);
if(sq->timer)
comm_timer_delete(sq->timer);
free(sq);
}
@ -2171,10 +2207,13 @@ pending_udp_query(struct serviced_query* sq, struct sldns_buffer* packet,
sq->outnet->udp_wait_last = pend;
return pend;
}
log_assert(!sq->busy);
sq->busy = 1;
if(!randomize_and_send_udp(pend, packet, timeout)) {
pending_delete(sq->outnet, pend);
return NULL;
}
sq->busy = 0;
return pend;
}
@ -2244,7 +2283,7 @@ reuse_tcp_select_id(struct reuse_tcp* reuse, struct outside_network* outnet)
}
/* equally pick a random unused element from the tree that is
* not in use. Pick a the n-th index of an ununused number,
* not in use. Pick a the n-th index of an unused number,
* then loop over the empty spaces in the tree and find it */
log_assert(reuse->tree_by_id.count < 0xffff);
select = ub_random_max(outnet->rnd, 0xffff - reuse->tree_by_id.count);
@ -2357,6 +2396,7 @@ pending_tcp_query(struct serviced_query* sq, sldns_buffer* packet,
#ifdef USE_DNSTAP
w->sq = NULL;
#endif
w->in_cb_and_decommission = 0;
if(pend) {
/* we have a buffer available right now */
if(reuse) {
@ -2453,6 +2493,32 @@ lookup_serviced(struct outside_network* outnet, sldns_buffer* buff, int dnssec,
return (struct serviced_query*)rbtree_search(outnet->serviced, &key);
}
void
serviced_timer_cb(void* arg)
{
struct serviced_query* sq = (struct serviced_query*)arg;
struct outside_network* outnet = sq->outnet;
verbose(VERB_ALGO, "serviced send timer");
/* By the time this cb is called, if we don't have any registered
* callbacks for this serviced_query anymore; do not send. */
if(!sq->cblist)
goto delete;
/* perform first network action */
if(outnet->do_udp && !(sq->tcp_upstream || sq->ssl_upstream)) {
if(!serviced_udp_send(sq, outnet->udp_buff))
goto delete;
} else {
if(!serviced_tcp_send(sq, outnet->udp_buff))
goto delete;
}
/* Maybe by this time we don't have callbacks attached anymore. Don't
* proactively try to delete; let it run and maybe another callback
* will get attached by the time we get an answer. */
return;
delete:
serviced_callbacks(sq, NETEVENT_CLOSED, NULL, NULL);
}
/** Create new serviced entry */
static struct serviced_query*
serviced_create(struct outside_network* outnet, sldns_buffer* buff, int dnssec,
@ -2463,6 +2529,7 @@ serviced_create(struct outside_network* outnet, sldns_buffer* buff, int dnssec,
struct regional* region)
{
struct serviced_query* sq = (struct serviced_query*)malloc(sizeof(*sq));
struct timeval t;
#ifdef UNBOUND_DEBUG
rbnode_type* ins;
#endif
@ -2505,6 +2572,15 @@ serviced_create(struct outside_network* outnet, sldns_buffer* buff, int dnssec,
memcpy(&sq->addr, addr, addrlen);
sq->addrlen = addrlen;
sq->opt_list = opt_list;
sq->busy = 0;
sq->timer = comm_timer_create(outnet->base, serviced_timer_cb, sq);
if(!sq->timer) {
alloc_reg_release(alloc, region);
free(sq);
return NULL;
}
memset(&t, 0, sizeof(t));
comm_timer_set(sq->timer, &t);
sq->outnet = outnet;
sq->cblist = NULL;
sq->pending = NULL;
@ -2611,29 +2687,38 @@ serviced_delete(struct serviced_query* sq)
struct waiting_tcp* w = (struct waiting_tcp*)
sq->pending;
verbose(VERB_CLIENT, "serviced_delete: TCP");
log_assert(!(w->write_wait_queued && w->on_tcp_waiting_list));
/* if on stream-write-waiting list then
* remove from waiting list and waiting_tcp_delete */
if(w->write_wait_queued) {
struct pending_tcp* pend =
(struct pending_tcp*)w->next_waiting;
verbose(VERB_CLIENT, "serviced_delete: writewait");
reuse_tree_by_id_delete(&pend->reuse, w);
if(!w->in_cb_and_decommission)
reuse_tree_by_id_delete(&pend->reuse, w);
reuse_write_wait_remove(&pend->reuse, w);
waiting_tcp_delete(w);
if(!w->in_cb_and_decommission)
waiting_tcp_delete(w);
} else if(!w->on_tcp_waiting_list) {
struct pending_tcp* pend =
(struct pending_tcp*)w->next_waiting;
verbose(VERB_CLIENT, "serviced_delete: tcpreusekeep");
/* w needs to stay on tree_by_id to not assign
* the same ID; remove the callback since its
* serviced_query will be gone. */
w->cb = NULL;
if(!reuse_tcp_remove_serviced_keep(w, sq)) {
reuse_cb_and_decommission(sq->outnet,
pend, NETEVENT_CLOSED);
if(!w->in_cb_and_decommission)
reuse_cb_and_decommission(sq->outnet,
pend, NETEVENT_CLOSED);
use_free_buffer(sq->outnet);
}
sq->pending = NULL;
} else {
verbose(VERB_CLIENT, "serviced_delete: tcpwait");
waiting_list_remove(sq->outnet, w);
waiting_tcp_delete(w);
if(!w->in_cb_and_decommission)
waiting_tcp_delete(w);
}
}
}
@ -2921,7 +3006,7 @@ serviced_tcp_callback(struct comm_point* c, void* arg, int error,
struct waiting_tcp* w = (struct waiting_tcp*)sq->pending;
struct pending_tcp* pend_tcp = NULL;
struct port_if* pi = NULL;
if(!w->on_tcp_waiting_list && w->next_waiting) {
if(w && !w->on_tcp_waiting_list && w->next_waiting) {
pend_tcp = (struct pending_tcp*)w->next_waiting;
pi = pend_tcp->pi;
}
@ -3017,8 +3102,11 @@ serviced_tcp_initiate(struct serviced_query* sq, sldns_buffer* buff)
sq->status==serviced_query_TCP_EDNS?"EDNS":"");
serviced_encode(sq, buff, sq->status == serviced_query_TCP_EDNS);
sq->last_sent_time = *sq->outnet->now_tv;
log_assert(!sq->busy);
sq->busy = 1;
sq->pending = pending_tcp_query(sq, buff, sq->outnet->tcp_auth_query_timeout,
serviced_tcp_callback, sq);
sq->busy = 0;
if(!sq->pending) {
/* delete from tree so that a retry by above layer does not
* clash with this entry */
@ -3050,8 +3138,11 @@ serviced_tcp_send(struct serviced_query* sq, sldns_buffer* buff)
} else {
timeout = sq->outnet->tcp_auth_query_timeout;
}
log_assert(!sq->busy);
sq->busy = 1;
sq->pending = pending_tcp_query(sq, buff, timeout,
serviced_tcp_callback, sq);
sq->busy = 0;
return sq->pending != NULL;
}
@ -3313,20 +3404,8 @@ outnet_serviced_query(struct outside_network* outnet,
serviced_node_del(&sq->node, NULL);
return NULL;
}
/* perform first network action */
if(outnet->do_udp && !(tcp_upstream || ssl_upstream)) {
if(!serviced_udp_send(sq, buff)) {
(void)rbtree_delete(outnet->serviced, sq);
serviced_node_del(&sq->node, NULL);
return NULL;
}
} else {
if(!serviced_tcp_send(sq, buff)) {
(void)rbtree_delete(outnet->serviced, sq);
serviced_node_del(&sq->node, NULL);
return NULL;
}
}
/* No network action at this point; it will be invoked with the
* serviced_query timer instead to run outside of the mesh. */
} else {
/* We don't need this region anymore. */
alloc_reg_release(env->alloc, region);
@ -3363,13 +3442,13 @@ callback_list_remove(struct serviced_query* sq, void* cb_arg)
void outnet_serviced_query_stop(struct serviced_query* sq, void* cb_arg)
{
if(!sq)
if(!sq)
return;
callback_list_remove(sq, cb_arg);
/* if callbacks() routine scheduled deletion, let it do that */
if(!sq->cblist && !sq->to_be_deleted) {
if(!sq->cblist && !sq->busy && !sq->to_be_deleted) {
(void)rbtree_delete(sq->outnet->serviced, sq);
serviced_delete(sq);
serviced_delete(sq);
}
}

View file

@ -414,6 +414,8 @@ struct waiting_tcp {
char* tls_auth_name;
/** the packet was involved in an error, to stop looping errors */
int error_count;
/** if true, the item is at the cb_and_decommission stage */
int in_cb_and_decommission;
#ifdef USE_DNSTAP
/** serviced query pointer for dnstap to get logging info, if nonNULL*/
struct serviced_query* sq;
@ -519,6 +521,10 @@ struct serviced_query {
struct regional* region;
/** allocation service for the region */
struct alloc_cache* alloc;
/** flash timer to start the net I/O as a separate event */
struct comm_timer* timer;
/** true if serviced_query is currently doing net I/O and may block */
int busy;
};
/**
@ -792,6 +798,9 @@ void pending_udp_timer_delay_cb(void *arg);
/** callback for outgoing TCP timer event */
void outnet_tcptimer(void* arg);
/** callback to send serviced queries */
void serviced_timer_cb(void *arg);
/** callback for serviced query UDP answers */
int serviced_udp_callback(struct comm_point* c, void* arg, int error,
struct comm_reply* rep);

View file

@ -1442,6 +1442,11 @@ void pending_udp_timer_cb(void *ATTR_UNUSED(arg))
log_assert(0);
}
void serviced_timer_cb(void *ATTR_UNUSED(arg))
{
log_assert(0);
}
void pending_udp_timer_delay_cb(void *ATTR_UNUSED(arg))
{
log_assert(0);

View file

@ -138,6 +138,7 @@ fptr_whitelist_comm_timer(void (*fptr)(void*))
else if(fptr == &auth_xfer_probe_timer_callback) return 1;
else if(fptr == &auth_xfer_transfer_timer_callback) return 1;
else if(fptr == &mesh_serve_expired_callback) return 1;
else if(fptr == &serviced_timer_cb) return 1;
#ifdef USE_DNSTAP
else if(fptr == &mq_wakeup_cb) return 1;
#endif

View file

@ -1146,6 +1146,8 @@ reclaim_tcp_handler(struct comm_point* c)
}
c->tcp_more_read_again = NULL;
c->tcp_more_write_again = NULL;
c->tcp_byte_count = 0;
sldns_buffer_clear(c->buffer);
}
/** do the callback when writing is done */