Fix a race condition in xfrin_recv_done() when calling xfrin_reset()

When the xfrin_recv_done() function decides to retry the transfer
using AXFR because of a previous error, it calls the xfrin_reset()
function which calls dns_db_closeversion() on 'xfr->ver'. The problem
is that the ixfr processing of a previous message could be still
in process in a worker thread, which then can use freed 'xfr->ver'.

If there is an ongoing worker thread delay the AXFR retry until after
the worker thread has finished its work.
This commit is contained in:
Aram Sargsyan 2026-03-04 16:25:33 +00:00
parent 5c248e7d1a
commit 141ff7bfa7

View file

@ -141,7 +141,7 @@ struct dns_xfrin {
_Atomic xfrin_state_t state;
uint32_t expireopt;
bool edns, expireoptset;
bool edns, expireoptset, retry_axfr;
atomic_bool is_ixfr;
/*
@ -270,6 +270,10 @@ xfrin_idledout(void *);
static void
xfrin_minratecheck(void *);
static void
xfrin_reset(dns_xfrin_t *xfr);
static void
xfrin_ixfrcleanup(dns_xfrin_t *xfr);
static void
xfrin_fail(dns_xfrin_t *xfr, isc_result_t result, const char *msg);
static isc_result_t
render(dns_message_t *msg, isc_mem_t *mctx, isc_buffer_t *buf);
@ -627,7 +631,9 @@ ixfr_apply_done(void *arg) {
CHECK(result);
/* Reschedule */
if (!cds_wfcq_empty(&xfr->diff_head, &xfr->diff_tail)) {
if (!xfr->retry_axfr &&
!cds_wfcq_empty(&xfr->diff_head, &xfr->diff_tail))
{
isc_work_enqueue(xfr->loop, ixfr_apply, ixfr_apply_done, work);
return;
}
@ -637,7 +643,18 @@ cleanup:
isc_mem_put(xfr->mctx, work, sizeof(*work));
if (result == ISC_R_SUCCESS) {
/*
* Don't retry with AXFR (even if it was requested) because there was
* an error or the transfer is shutting down. In case if it _was_ an
* error, xfrin_fail() will return a special result code which will
* still result in AXFR retry from the initiator of the transfer after
* the failure has been is logged.
*/
if (result != ISC_R_SUCCESS) {
xfr->retry_axfr = false;
}
if (!xfr->retry_axfr && result == ISC_R_SUCCESS) {
dns_db_closeversion(xfr->db, &xfr->ver, true);
dns_zone_markdirty(xfr->zone);
@ -647,7 +664,21 @@ cleanup:
} else {
dns_db_closeversion(xfr->db, &xfr->ver, false);
xfrin_fail(xfr, result, "failed while processing responses");
if (result != ISC_R_SUCCESS) {
xfrin_fail(xfr, result,
"failed while processing responses");
}
}
if (xfr->retry_axfr) {
xfr->reqtype = dns_rdatatype_soa;
atomic_store(&xfr->state, XFRST_SOAQUERY);
xfrin_reset(xfr);
result = xfrin_start(xfr);
if (result != ISC_R_SUCCESS) {
xfrin_fail(xfr, result, "failed setting up socket");
}
}
dns_xfrin_detach(&xfr);
@ -1175,13 +1206,18 @@ xfrin_cancelio(dns_xfrin_t *xfr) {
static void
xfrin_reset(dns_xfrin_t *xfr) {
REQUIRE(VALID_XFRIN(xfr));
REQUIRE(!xfr->diff_running);
xfrin_log(xfr, ISC_LOG_INFO, "resetting");
xfr->retry_axfr = false;
if (xfr->lasttsig != NULL) {
isc_buffer_free(&xfr->lasttsig);
}
xfrin_ixfrcleanup(xfr);
dns_diff_clear(&xfr->diff);
xfr->ixfr.diffs = 0;
@ -1844,6 +1880,11 @@ xfrin_recv_done(isc_result_t result, isc_region_t *region, void *arg) {
{
xfr->edns = false;
dns_message_detach(&msg);
/*
* With these states (see the conditions above) the diff
* process can't be currently in the running state, so
* it is safe to reset the 'xfr' and retry right away.
*/
xfrin_reset(xfr);
goto try_again;
} else if (result == ISC_R_SUCCESS &&
@ -1873,6 +1914,12 @@ xfrin_recv_done(isc_result_t result, isc_region_t *region, void *arg) {
try_axfr:
LIBDNS_XFRIN_RECV_TRY_AXFR(xfr, xfr->info, result);
dns_message_detach(&msg);
/* If there is a running worker thread then delay the retry. */
if (xfr->diff_running) {
xfr->retry_axfr = true;
dns_xfrin_detach(&xfr);
return;
}
xfrin_reset(xfr);
xfr->reqtype = dns_rdatatype_soa;
atomic_store(&xfr->state, XFRST_SOAQUERY);
@ -2070,6 +2117,19 @@ cleanup:
dns_xfrin_detach(&xfr);
}
static void
xfrin_ixfrcleanup(dns_xfrin_t *xfr) {
struct cds_wfcq_node *node, *next;
__cds_wfcq_for_each_blocking_safe(&xfr->diff_head, &xfr->diff_tail,
node, next) {
ixfr_apply_data_t *data =
caa_container_of(node, ixfr_apply_data_t, wfcq_node);
/* We need to clear and free all data chunks */
dns_diff_clear(&data->diff);
isc_mem_put(xfr->mctx, data, sizeof(*data));
}
}
static void
xfrin_destroy(dns_xfrin_t *xfr) {
uint64_t msecs, persec;
@ -2120,15 +2180,7 @@ xfrin_destroy(dns_xfrin_t *xfr) {
sep, expireopt);
/* Cleanup unprocessed IXFR data */
struct cds_wfcq_node *node, *next;
__cds_wfcq_for_each_blocking_safe(&xfr->diff_head, &xfr->diff_tail,
node, next) {
ixfr_apply_data_t *data =
caa_container_of(node, ixfr_apply_data_t, wfcq_node);
/* We need to clear and free all data chunks */
dns_diff_clear(&data->diff);
isc_mem_put(xfr->mctx, data, sizeof(*data));
}
xfrin_ixfrcleanup(xfr);
/* Cleanup unprocessed AXFR data */
dns_diff_clear(&xfr->diff);