Merge branch '3451-handle-transient-TCP-connect-EADDRINUSE-on-BSDs' into 'main'

Handle the transient TCP connect() failures on FreeBSD

Closes #3451 and #3452

See merge request isc-projects/bind9!6562
This commit is contained in:
Ondřej Surý 2022-07-14 19:38:33 +00:00
commit 6b15eb45df
7 changed files with 78 additions and 26 deletions

View file

@ -1,3 +1,7 @@
5926. [func] Handle transient TCP connect() EADDRINUSE failures
on FreeBSD (and possibly other BSDs) by trying three
times before giving up. [GL #3451]
5925. [bug] With a forwarder configured for all queries, resolution
failures encountered during DS chasing could trigger
assertion failures due to a logic bug in

View file

@ -377,6 +377,7 @@ struct isc__nm_uvreq {
isc__nm_cb_t cb; /* callback */
void *cbarg; /* callback argument */
isc_nm_timer_t *timer; /* TCP write timer */
int connect_tries; /* connect retries */
union {
uv_handle_t handle;

View file

@ -1919,6 +1919,8 @@ isc__nm_failed_connect_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
REQUIRE(sock->tid == isc_nm_tid());
REQUIRE(req->cb.connect != NULL);
isc__nm_incstats(sock, STATID_CONNECTFAIL);
isc__nmsocket_timer_stop(sock);
uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
@ -2465,7 +2467,10 @@ isc___nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock FLARG) {
req = isc_mem_get(mgr->mctx, sizeof(*req));
}
*req = (isc__nm_uvreq_t){ .magic = 0 };
*req = (isc__nm_uvreq_t){
.magic = 0,
.connect_tries = 3,
};
ISC_LINK_INIT(req, link);
req->uv_req.req.data = req;
isc___nmsocket_attach(sock, &req->sock FLARG_PASS);

View file

@ -165,7 +165,6 @@ tcp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
isc__nm_incstats(sock, STATID_CONNECTFAIL);
goto done;
}
isc__nm_incstats(sock, STATID_CONNECT);
uv_handle_set_data((uv_handle_t *)&sock->read_timer,
&req->uv_req.connect);
@ -219,7 +218,7 @@ isc__nm_async_tcpconnect(isc__networker_t *worker, isc__netievent_t *ev0) {
static void
tcp_connect_cb(uv_connect_t *uvreq, int status) {
isc_result_t result;
isc_result_t result = ISC_R_UNSET;
isc__nm_uvreq_t *req = NULL;
isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)uvreq->handle);
struct sockaddr_storage ss;
@ -228,9 +227,6 @@ tcp_connect_cb(uv_connect_t *uvreq, int status) {
REQUIRE(VALID_NMSOCK(sock));
REQUIRE(sock->tid == isc_nm_tid());
isc__nmsocket_timer_stop(sock);
uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
req = uv_handle_get_data((uv_handle_t *)uvreq);
REQUIRE(VALID_UVREQ(req));
@ -239,9 +235,7 @@ tcp_connect_cb(uv_connect_t *uvreq, int status) {
if (atomic_load(&sock->timedout)) {
result = ISC_R_TIMEDOUT;
goto error;
}
if (!atomic_load(&sock->connecting)) {
} else if (!atomic_load(&sock->connecting)) {
/*
* The connect was cancelled from timeout; just clean up
* the req.
@ -260,11 +254,32 @@ tcp_connect_cb(uv_connect_t *uvreq, int status) {
/* Timeout status code here indicates hard error */
result = ISC_R_TIMEDOUT;
goto error;
} else if (status == UV_EADDRINUSE) {
/*
* On FreeBSD the TCP connect() call sometimes results in a
* spurious transient EADDRINUSE. Try a few more times before
* giving up.
*/
if (--req->connect_tries > 0) {
r = uv_tcp_connect(&req->uv_req.connect,
&sock->uv_handle.tcp,
&req->peer.type.sa, tcp_connect_cb);
if (r != 0) {
result = isc_uverr2result(r);
goto error;
}
return;
}
result = isc_uverr2result(status);
goto error;
} else if (status != 0) {
result = isc_uverr2result(status);
goto error;
}
isc__nmsocket_timer_stop(sock);
uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
isc__nm_incstats(sock, STATID_CONNECT);
r = uv_tcp_getpeername(&sock->uv_handle.tcp, (struct sockaddr *)&ss,
&(int){ sizeof(ss) });
@ -281,7 +296,6 @@ tcp_connect_cb(uv_connect_t *uvreq, int status) {
isc__nm_connectcb(sock, req, ISC_R_SUCCESS, false);
return;
error:
isc__nm_failed_connect_cb(sock, req, result, false);
}

View file

@ -135,7 +135,6 @@ tcpdns_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
isc__nm_incstats(sock, STATID_CONNECTFAIL);
goto done;
}
isc__nm_incstats(sock, STATID_CONNECT);
uv_handle_set_data((uv_handle_t *)&sock->read_timer,
&req->uv_req.connect);
@ -189,7 +188,7 @@ isc__nm_async_tcpdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0) {
static void
tcpdns_connect_cb(uv_connect_t *uvreq, int status) {
isc_result_t result;
isc_result_t result = ISC_R_UNSET;
isc__nm_uvreq_t *req = NULL;
isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)uvreq->handle);
struct sockaddr_storage ss;
@ -198,9 +197,6 @@ tcpdns_connect_cb(uv_connect_t *uvreq, int status) {
REQUIRE(VALID_NMSOCK(sock));
REQUIRE(sock->tid == isc_nm_tid());
isc__nmsocket_timer_stop(sock);
uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
req = uv_handle_get_data((uv_handle_t *)uvreq);
REQUIRE(VALID_UVREQ(req));
@ -209,9 +205,7 @@ tcpdns_connect_cb(uv_connect_t *uvreq, int status) {
if (atomic_load(&sock->timedout)) {
result = ISC_R_TIMEDOUT;
goto error;
}
if (isc__nm_closing(sock)) {
} else if (isc__nm_closing(sock)) {
/* Network manager shutting down */
result = ISC_R_SHUTTINGDOWN;
goto error;
@ -223,11 +217,32 @@ tcpdns_connect_cb(uv_connect_t *uvreq, int status) {
/* Timeout status code here indicates hard error */
result = ISC_R_TIMEDOUT;
goto error;
} else if (status == UV_EADDRINUSE) {
/*
* On FreeBSD the TCP connect() call sometimes results in a
* spurious transient EADDRINUSE. Try a few more times before
* giving up.
*/
if (--req->connect_tries > 0) {
r = uv_tcp_connect(
&req->uv_req.connect, &sock->uv_handle.tcp,
&req->peer.type.sa, tcpdns_connect_cb);
if (r != 0) {
result = isc_uverr2result(r);
goto error;
}
return;
}
result = isc_uverr2result(status);
goto error;
} else if (status != 0) {
result = isc_uverr2result(status);
goto error;
}
isc__nmsocket_timer_stop(sock);
uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
isc__nm_incstats(sock, STATID_CONNECT);
r = uv_tcp_getpeername(&sock->uv_handle.tcp, (struct sockaddr *)&ss,
&(int){ sizeof(ss) });
@ -244,7 +259,6 @@ tcpdns_connect_cb(uv_connect_t *uvreq, int status) {
isc__nm_connectcb(sock, req, ISC_R_SUCCESS, false);
return;
error:
isc__nm_failed_connect_cb(sock, req, result, false);
}

View file

@ -173,7 +173,6 @@ tlsdns_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
isc__nm_incstats(sock, STATID_CONNECTFAIL);
goto done;
}
isc__nm_incstats(sock, STATID_CONNECT);
uv_handle_set_data((uv_handle_t *)&sock->read_timer,
&req->uv_req.connect);
@ -229,7 +228,7 @@ isc__nm_async_tlsdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0) {
static void
tlsdns_connect_cb(uv_connect_t *uvreq, int status) {
isc_result_t result;
isc_result_t result = ISC_R_UNSET;
isc__nm_uvreq_t *req = NULL;
isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)uvreq->handle);
struct sockaddr_storage ss;
@ -246,9 +245,7 @@ tlsdns_connect_cb(uv_connect_t *uvreq, int status) {
if (atomic_load(&sock->timedout)) {
result = ISC_R_TIMEDOUT;
goto error;
}
if (isc__nm_closing(sock)) {
} else if (isc__nm_closing(sock)) {
/* Network manager shutting down */
result = ISC_R_SHUTTINGDOWN;
goto error;
@ -260,6 +257,24 @@ tlsdns_connect_cb(uv_connect_t *uvreq, int status) {
/* Timeout status code here indicates hard error */
result = ISC_R_TIMEDOUT;
goto error;
} else if (status == UV_EADDRINUSE) {
/*
* On FreeBSD the TCP connect() call sometimes results in a
* spurious transient EADDRINUSE. Try a few more times before
* giving up.
*/
if (--req->connect_tries > 0) {
r = uv_tcp_connect(
&req->uv_req.connect, &sock->uv_handle.tcp,
&req->peer.type.sa, tlsdns_connect_cb);
if (r != 0) {
result = isc_uverr2result(r);
goto error;
}
return;
}
result = isc_uverr2result(status);
goto error;
} else if (status != 0) {
result = isc_uverr2result(status);
goto error;

View file

@ -846,7 +846,6 @@ udp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
isc__networker_t *worker = NULL;
int uv_bind_flags = UV_UDP_REUSEADDR;
isc_result_t result = ISC_R_UNSET;
int tries = 3;
int r;
REQUIRE(isc__nm_in_netthread());
@ -901,7 +900,7 @@ udp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
do {
r = isc_uv_udp_connect(&sock->uv_handle.udp,
&req->peer.type.sa);
} while (r == UV_EADDRINUSE && --tries > 0);
} while (r == UV_EADDRINUSE && --req->connect_tries > 0);
if (r != 0) {
isc__nm_incstats(sock, STATID_CONNECTFAIL);
goto done;