mirror of
https://github.com/isc-projects/bind9.git
synced 2026-05-28 04:34:54 -04:00
[9.20] fix: usr: Fix a possible race condition during zone transfers
The :iscman:`named` process could terminate unexpectedly when processing an IXFR message during a zone transfer. This has been fixed. Closes #5767 Backport of MR !11781 Merge branch 'backport-5767-use-after-free-xfrin_reset-9.20' into 'bind-9.20' See merge request isc-projects/bind9!11799
This commit is contained in:
commit
a48b287d9f
4 changed files with 394 additions and 13 deletions
299
bin/tests/system/xfer/ans11/ans.py
Normal file
299
bin/tests/system/xfer/ans11/ans.py
Normal file
|
|
@ -0,0 +1,299 @@
|
|||
"""
|
||||
Copyright (C) Internet Systems Consortium, Inc. ("ISC")
|
||||
|
||||
SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
This Source Code Form is subject to the terms of the Mozilla Public
|
||||
License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
file, you can obtain one at https://mozilla.org/MPL/2.0/.
|
||||
|
||||
See the COPYRIGHT file distributed with this work for additional
|
||||
information regarding copyright ownership.
|
||||
"""
|
||||
|
||||
from collections.abc import AsyncGenerator
|
||||
|
||||
import struct
|
||||
|
||||
import dns.flags
|
||||
import dns.rcode
|
||||
import dns.rdatatype
|
||||
|
||||
from isctest.asyncserver import (
|
||||
AsyncDnsServer,
|
||||
BytesResponseSend,
|
||||
DnsProtocol,
|
||||
DnsResponseSend,
|
||||
QueryContext,
|
||||
ResponseAction,
|
||||
ResponseHandler,
|
||||
)
|
||||
|
||||
# DNS constants used by raw wire builder functions below
|
||||
DNS_TYPE_SOA = 6
|
||||
DNS_TYPE_A = 1
|
||||
DNS_TYPE_NS = 2
|
||||
DNS_TYPE_AXFR = 252
|
||||
DNS_TYPE_IXFR = 251
|
||||
DNS_CLASS_IN = 1
|
||||
DNS_FLAG_QR = 0x8000
|
||||
DNS_FLAG_AA = 0x0400
|
||||
DNS_RCODE_NOERROR = 0
|
||||
DNS_RCODE_SERVFAIL = 2
|
||||
|
||||
ZONE_NAME = "ixfr-race."
|
||||
NUM_RECORDS = 400
|
||||
|
||||
|
||||
def encode_name(name):
|
||||
"""Encode a DNS name in wire format (no compression)."""
|
||||
parts = name.rstrip(".").split(".")
|
||||
result = b""
|
||||
for part in parts:
|
||||
encoded = part.encode("ascii")
|
||||
result += struct.pack("B", len(encoded)) + encoded
|
||||
result += b"\x00"
|
||||
return result
|
||||
|
||||
|
||||
def build_soa_rdata(
|
||||
mname, rname, serial, refresh=3600, retry=900, expire=604800, minimum=86400
|
||||
):
|
||||
"""Build SOA record rdata."""
|
||||
rdata = encode_name(mname)
|
||||
rdata += encode_name(rname)
|
||||
rdata += struct.pack("!IIIII", serial, refresh, retry, expire, minimum)
|
||||
return rdata
|
||||
|
||||
|
||||
def build_a_rdata(ip_str):
|
||||
"""Build A record rdata from dotted-quad string."""
|
||||
parts = ip_str.split(".")
|
||||
return struct.pack("4B", *[int(p) for p in parts])
|
||||
|
||||
|
||||
def build_rr(name_bytes, rtype, rclass, ttl, rdata):
|
||||
"""Build a complete resource record."""
|
||||
rr = name_bytes
|
||||
rr += struct.pack("!HHIH", rtype, rclass, ttl, len(rdata))
|
||||
rr += rdata
|
||||
return rr
|
||||
|
||||
|
||||
def build_dns_header(qid, flags, qdcount, ancount, nscount=0, arcount=0):
|
||||
"""Build DNS message header."""
|
||||
return struct.pack("!HHHHHH", qid, flags, qdcount, ancount, nscount, arcount)
|
||||
|
||||
|
||||
def build_ixfr_message1(qid, zone_name, num_records):
|
||||
"""
|
||||
Build IXFR Message 1: A valid IXFR diff that triggers ixfr_commit().
|
||||
|
||||
This message contains a complete diff 1 (large, many records) which
|
||||
triggers ixfr_commit() -> isc_work_enqueue() -> worker thread starts.
|
||||
|
||||
The message ends with a boundary SOA that starts diff 2, so the state
|
||||
machine is in XFRST_IXFR_DEL waiting for more records.
|
||||
|
||||
Answer section structure:
|
||||
1. Initial SOA (end_serial=3) -- XFRST_ZONEXFRREQUEST
|
||||
2. Old SOA (serial=1) -- XFRST_FIRSTDATA -> IXFR -> DELSOA
|
||||
3. DEL A records (num_records) -- XFRST_IXFR_DEL (diffs++)
|
||||
4. Mid SOA (serial=2) -- XFRST_IXFR_ADDSOA (diffs++)
|
||||
5. ADD A records (num_records) -- XFRST_IXFR_ADD (diffs++)
|
||||
6. Boundary SOA (serial=2) -- ixfr_commit()! Worker enqueued.
|
||||
Then goto redo -> DELSOA of diff 2
|
||||
"""
|
||||
zone_wire = encode_name(zone_name)
|
||||
question = zone_wire + struct.pack("!HH", DNS_TYPE_IXFR, DNS_CLASS_IN)
|
||||
|
||||
mname = "ns." + zone_name
|
||||
rname = "admin." + zone_name
|
||||
end_serial = 3
|
||||
old_serial = 1
|
||||
mid_serial = 2
|
||||
|
||||
soa_end = build_soa_rdata(mname, rname, end_serial)
|
||||
soa_old = build_soa_rdata(mname, rname, old_serial)
|
||||
soa_mid = build_soa_rdata(mname, rname, mid_serial)
|
||||
|
||||
records = []
|
||||
|
||||
# 1. Initial SOA (end serial)
|
||||
records.append(build_rr(zone_wire, DNS_TYPE_SOA, DNS_CLASS_IN, 3600, soa_end))
|
||||
|
||||
# 2. Old SOA (serial 1) - triggers IXFR detection
|
||||
records.append(build_rr(zone_wire, DNS_TYPE_SOA, DNS_CLASS_IN, 3600, soa_old))
|
||||
|
||||
# 3. DEL A records
|
||||
for i in range(num_records):
|
||||
name = encode_name(f"host-{i}.{zone_name}")
|
||||
ip = f"10.0.{(i >> 8) & 0xFF}.{i & 0xFF}"
|
||||
records.append(
|
||||
build_rr(name, DNS_TYPE_A, DNS_CLASS_IN, 3600, build_a_rdata(ip))
|
||||
)
|
||||
|
||||
# 4. Mid SOA (serial 2) - end of DEL, start of ADD
|
||||
records.append(build_rr(zone_wire, DNS_TYPE_SOA, DNS_CLASS_IN, 3600, soa_mid))
|
||||
|
||||
# 5. ADD A records
|
||||
for i in range(num_records):
|
||||
name = encode_name(f"host-{i}.{zone_name}")
|
||||
ip = f"10.1.{(i >> 8) & 0xFF}.{i & 0xFF}"
|
||||
records.append(
|
||||
build_rr(name, DNS_TYPE_A, DNS_CLASS_IN, 3600, build_a_rdata(ip))
|
||||
)
|
||||
|
||||
# 6. Boundary SOA (serial=2 == current_serial) -> ixfr_commit()!
|
||||
# This triggers the worker thread via isc_work_enqueue().
|
||||
# Then goto redo processes it as DELSOA of diff 2.
|
||||
records.append(build_rr(zone_wire, DNS_TYPE_SOA, DNS_CLASS_IN, 3600, soa_mid))
|
||||
|
||||
ancount = len(records)
|
||||
answer = b"".join(records)
|
||||
flags = DNS_FLAG_QR | DNS_FLAG_AA | DNS_RCODE_NOERROR
|
||||
header = build_dns_header(qid, flags, 1, ancount)
|
||||
msg = header + question + answer
|
||||
|
||||
return msg
|
||||
|
||||
|
||||
def build_bad_rcode_message2(qid, zone_name):
|
||||
"""
|
||||
Build Message 2
|
||||
|
||||
A DNS response with rcode=SERVFAIL. When BIND receives this during an
|
||||
active IXFR transfer:
|
||||
|
||||
xfrin_recv_done():
|
||||
msg->rcode != dns_rcode_noerror (SERVFAIL != NOERROR) ->
|
||||
result = dns_result_fromrcode(msg->rcode) ->
|
||||
reqtype == dns_rdatatype_ixfr (not axfr/soa) ->
|
||||
falls through to try_axfr: ->
|
||||
xfrin_reset() -> destroys journal/version
|
||||
|
||||
Meanwhile ixfr_apply worker from Message 1 is still running -> UAF.
|
||||
|
||||
This works with DEFAULT secondary configuration (no special options).
|
||||
"""
|
||||
zone_wire = encode_name(zone_name)
|
||||
question = zone_wire + struct.pack("!HH", DNS_TYPE_IXFR, DNS_CLASS_IN)
|
||||
|
||||
flags = DNS_FLAG_QR | DNS_FLAG_AA | DNS_RCODE_SERVFAIL
|
||||
header = build_dns_header(qid, flags, 1, 0)
|
||||
msg = header + question
|
||||
|
||||
return msg
|
||||
|
||||
|
||||
def build_soa_response(qid, zone_name, serial):
|
||||
"""Build a SOA response for the zone."""
|
||||
zone_wire = encode_name(zone_name)
|
||||
question = zone_wire + struct.pack("!HH", DNS_TYPE_SOA, DNS_CLASS_IN)
|
||||
|
||||
mname = "ns." + zone_name
|
||||
rname = "admin." + zone_name
|
||||
soa_rdata = build_soa_rdata(mname, rname, serial)
|
||||
answer = build_rr(zone_wire, DNS_TYPE_SOA, DNS_CLASS_IN, 3600, soa_rdata)
|
||||
|
||||
flags = DNS_FLAG_QR | DNS_FLAG_AA | DNS_RCODE_NOERROR
|
||||
header = build_dns_header(qid, flags, 1, 1)
|
||||
return header + question + answer
|
||||
|
||||
|
||||
def build_axfr_response(qid, zone_name, serial, num_records):
|
||||
"""
|
||||
Build a complete AXFR response for initial zone load.
|
||||
|
||||
AXFR format: SOA, NS, A records, ..., SOA (trailing SOA marks end).
|
||||
"""
|
||||
zone_wire = encode_name(zone_name)
|
||||
question = zone_wire + struct.pack("!HH", DNS_TYPE_AXFR, DNS_CLASS_IN)
|
||||
|
||||
mname = "ns." + zone_name
|
||||
rname = "admin." + zone_name
|
||||
soa_rdata = build_soa_rdata(mname, rname, serial)
|
||||
|
||||
records = []
|
||||
|
||||
# Opening SOA
|
||||
records.append(build_rr(zone_wire, DNS_TYPE_SOA, DNS_CLASS_IN, 3600, soa_rdata))
|
||||
|
||||
# NS record
|
||||
ns_wire = encode_name("ns." + zone_name)
|
||||
records.append(build_rr(zone_wire, DNS_TYPE_NS, DNS_CLASS_IN, 3600, ns_wire))
|
||||
|
||||
# NS A record
|
||||
records.append(
|
||||
build_rr(ns_wire, DNS_TYPE_A, DNS_CLASS_IN, 3600, build_a_rdata("127.0.0.1"))
|
||||
)
|
||||
|
||||
# A records (matching gen_zone.py output)
|
||||
for i in range(num_records):
|
||||
name = encode_name(f"host-{i}.{zone_name}")
|
||||
ip = f"10.0.{(i >> 8) & 0xFF}.{i & 0xFF}"
|
||||
records.append(
|
||||
build_rr(name, DNS_TYPE_A, DNS_CLASS_IN, 3600, build_a_rdata(ip))
|
||||
)
|
||||
|
||||
# Trailing SOA (marks end of AXFR)
|
||||
records.append(build_rr(zone_wire, DNS_TYPE_SOA, DNS_CLASS_IN, 3600, soa_rdata))
|
||||
|
||||
ancount = len(records)
|
||||
answer = b"".join(records)
|
||||
flags = DNS_FLAG_QR | DNS_FLAG_AA | DNS_RCODE_NOERROR
|
||||
header = build_dns_header(qid, flags, 1, ancount)
|
||||
msg = header + question + answer
|
||||
|
||||
return msg
|
||||
|
||||
|
||||
class IxfrRaceHandler(ResponseHandler):
|
||||
"""
|
||||
Handle SOA, AXFR, and IXFR queries to trigger the IXFR->AXFR race condition.
|
||||
|
||||
Phase 1: Respond to SOA with serial=1 and serve an AXFR to load the zone.
|
||||
Phase 2: After AXFR, respond to SOA with serial=3 to trigger IXFR.
|
||||
On IXFR, send a valid large diff (msg1) followed immediately by a
|
||||
SERVFAIL response (msg2) to race ixfr_commit() against xfrin_reset().
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._axfr_done = False
|
||||
|
||||
async def get_responses(
|
||||
self, qctx: QueryContext
|
||||
) -> AsyncGenerator[ResponseAction, None]:
|
||||
qid = qctx.query.id
|
||||
|
||||
if qctx.qtype == dns.rdatatype.SOA:
|
||||
serial = 3 if self._axfr_done else 1
|
||||
yield BytesResponseSend(build_soa_response(qid, ZONE_NAME, serial))
|
||||
|
||||
elif qctx.qtype == dns.rdatatype.AXFR:
|
||||
yield BytesResponseSend(build_axfr_response(qid, ZONE_NAME, 1, NUM_RECORDS))
|
||||
self._axfr_done = True
|
||||
|
||||
elif qctx.qtype == dns.rdatatype.IXFR:
|
||||
if qctx.protocol == DnsProtocol.UDP:
|
||||
# Force TCP retry by setting the TC bit
|
||||
qctx.response.flags |= dns.flags.TC
|
||||
yield DnsResponseSend(qctx.response)
|
||||
else:
|
||||
# Message 1: Valid IXFR diff -> triggers ixfr_commit()
|
||||
yield BytesResponseSend(
|
||||
build_ixfr_message1(qid, ZONE_NAME, NUM_RECORDS)
|
||||
)
|
||||
# Message 2: SERVFAIL -> triggers xfrin_reset() while
|
||||
# ixfr_apply worker from Message 1 is still running -> UAF
|
||||
yield BytesResponseSend(build_bad_rcode_message2(qid, ZONE_NAME))
|
||||
|
||||
|
||||
def main() -> None:
|
||||
server = AsyncDnsServer(default_rcode=dns.rcode.NOERROR, default_aa=True)
|
||||
server.install_response_handler(IxfrRaceHandler())
|
||||
server.run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -111,3 +111,10 @@ zone "xfr-and-reconfig" {
|
|||
file "xfr-and-reconfig.bk";
|
||||
request-ixfr no; # ans9 supports only axfr
|
||||
};
|
||||
|
||||
# GL#5767
|
||||
zone "ixfr-race" {
|
||||
type secondary;
|
||||
primaries { 10.53.0.11; };
|
||||
file "ixfr-race.bk";
|
||||
};
|
||||
|
|
|
|||
|
|
@ -549,3 +549,26 @@ def test_reconfiguration_when_zone_transfer_is_in_the_middle_of_soa_query(ns6):
|
|||
isctest.log.info("Try to reload the zone from the primary")
|
||||
ns6.rndc("reload xfr-and-reconfig")
|
||||
watcher_transfer_started.wait_for_line("Transfer started")
|
||||
|
||||
|
||||
# See #5767
|
||||
def test_ixfr_race(ns6):
|
||||
isctest.log.info(
|
||||
"Check that ixfr-race has been successfully transferred by the secondary"
|
||||
)
|
||||
if "zone ixfr-race/IN: zone transfer finished: success" not in ns6.log:
|
||||
# ns11 is started after ns6, so the zone transfer might not have
|
||||
# happened by the time this test is started: if not, use retransfer to
|
||||
# do the initial fetch now
|
||||
with ns6.watch_log_from_start() as watcher_transfer_completed:
|
||||
ns6.rndc("retransfer ixfr-race.")
|
||||
watcher_transfer_completed.wait_for_line(
|
||||
"zone ixfr-race/IN: zone transfer finished: success"
|
||||
)
|
||||
|
||||
isctest.log.info("Try to reload the zone from the primary")
|
||||
with ns6.watch_log_from_here() as watcher_transfer_completed:
|
||||
ns6.rndc("reload ixfr-race")
|
||||
watcher_transfer_completed.wait_for_line(
|
||||
"zone ixfr-race/IN: zone transfer finished: success"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -132,7 +132,7 @@ struct dns_xfrin {
|
|||
|
||||
_Atomic xfrin_state_t state;
|
||||
uint32_t expireopt;
|
||||
bool edns, expireoptset;
|
||||
bool edns, expireoptset, retry_axfr;
|
||||
atomic_bool is_ixfr;
|
||||
|
||||
/*
|
||||
|
|
@ -264,6 +264,10 @@ xfrin_idledout(void *);
|
|||
static void
|
||||
xfrin_minratecheck(void *);
|
||||
static void
|
||||
xfrin_reset(dns_xfrin_t *xfr);
|
||||
static void
|
||||
xfrin_ixfrcleanup(dns_xfrin_t *xfr);
|
||||
static void
|
||||
xfrin_fail(dns_xfrin_t *xfr, isc_result_t result, const char *msg);
|
||||
static isc_result_t
|
||||
render(dns_message_t *msg, isc_mem_t *mctx, isc_buffer_t *buf);
|
||||
|
|
@ -617,7 +621,9 @@ ixfr_apply_done(void *arg) {
|
|||
CHECK(result);
|
||||
|
||||
/* Reschedule */
|
||||
if (!cds_wfcq_empty(&xfr->diff_head, &xfr->diff_tail)) {
|
||||
if (!xfr->retry_axfr &&
|
||||
!cds_wfcq_empty(&xfr->diff_head, &xfr->diff_tail))
|
||||
{
|
||||
isc_work_enqueue(xfr->loop, ixfr_apply, ixfr_apply_done, work);
|
||||
return;
|
||||
}
|
||||
|
|
@ -627,7 +633,18 @@ cleanup:
|
|||
|
||||
isc_mem_put(xfr->mctx, work, sizeof(*work));
|
||||
|
||||
if (result == ISC_R_SUCCESS) {
|
||||
/*
|
||||
* Don't retry with AXFR (even if it was requested) because there was
|
||||
* an error or the transfer is shutting down. In case if it _was_ an
|
||||
* error, xfrin_fail() will return a special result code which will
|
||||
* still result in AXFR retry from the initiator of the transfer after
|
||||
* the failure has been is logged.
|
||||
*/
|
||||
if (result != ISC_R_SUCCESS) {
|
||||
xfr->retry_axfr = false;
|
||||
}
|
||||
|
||||
if (!xfr->retry_axfr && result == ISC_R_SUCCESS) {
|
||||
dns_db_closeversion(xfr->db, &xfr->ver, true);
|
||||
dns_zone_markdirty(xfr->zone);
|
||||
|
||||
|
|
@ -637,7 +654,21 @@ cleanup:
|
|||
} else {
|
||||
dns_db_closeversion(xfr->db, &xfr->ver, false);
|
||||
|
||||
xfrin_fail(xfr, result, "failed while processing responses");
|
||||
if (result != ISC_R_SUCCESS) {
|
||||
xfrin_fail(xfr, result,
|
||||
"failed while processing responses");
|
||||
}
|
||||
}
|
||||
|
||||
if (xfr->retry_axfr) {
|
||||
xfr->reqtype = dns_rdatatype_soa;
|
||||
atomic_store(&xfr->state, XFRST_SOAQUERY);
|
||||
|
||||
xfrin_reset(xfr);
|
||||
result = xfrin_start(xfr);
|
||||
if (result != ISC_R_SUCCESS) {
|
||||
xfrin_fail(xfr, result, "failed setting up socket");
|
||||
}
|
||||
}
|
||||
|
||||
dns_xfrin_detach(&xfr);
|
||||
|
|
@ -1165,13 +1196,18 @@ xfrin_cancelio(dns_xfrin_t *xfr) {
|
|||
static void
|
||||
xfrin_reset(dns_xfrin_t *xfr) {
|
||||
REQUIRE(VALID_XFRIN(xfr));
|
||||
REQUIRE(!xfr->diff_running);
|
||||
|
||||
xfrin_log(xfr, ISC_LOG_INFO, "resetting");
|
||||
|
||||
xfr->retry_axfr = false;
|
||||
|
||||
if (xfr->lasttsig != NULL) {
|
||||
isc_buffer_free(&xfr->lasttsig);
|
||||
}
|
||||
|
||||
xfrin_ixfrcleanup(xfr);
|
||||
|
||||
dns_diff_clear(&xfr->diff);
|
||||
|
||||
if (xfr->ixfr.journal != NULL) {
|
||||
|
|
@ -1838,6 +1874,11 @@ xfrin_recv_done(isc_result_t result, isc_region_t *region, void *arg) {
|
|||
{
|
||||
xfr->edns = false;
|
||||
dns_message_detach(&msg);
|
||||
/*
|
||||
* With these states (see the conditions above) the diff
|
||||
* process can't be currently in the running state, so
|
||||
* it is safe to reset the 'xfr' and retry right away.
|
||||
*/
|
||||
xfrin_reset(xfr);
|
||||
goto try_again;
|
||||
} else if (result == ISC_R_SUCCESS &&
|
||||
|
|
@ -1867,6 +1908,12 @@ xfrin_recv_done(isc_result_t result, isc_region_t *region, void *arg) {
|
|||
try_axfr:
|
||||
LIBDNS_XFRIN_RECV_TRY_AXFR(xfr, xfr->info, result);
|
||||
dns_message_detach(&msg);
|
||||
/* If there is a running worker thread then delay the retry. */
|
||||
if (xfr->diff_running) {
|
||||
xfr->retry_axfr = true;
|
||||
dns_xfrin_detach(&xfr);
|
||||
return;
|
||||
}
|
||||
xfrin_reset(xfr);
|
||||
xfr->reqtype = dns_rdatatype_soa;
|
||||
atomic_store(&xfr->state, XFRST_SOAQUERY);
|
||||
|
|
@ -2075,6 +2122,19 @@ cleanup:
|
|||
dns_xfrin_detach(&xfr);
|
||||
}
|
||||
|
||||
static void
|
||||
xfrin_ixfrcleanup(dns_xfrin_t *xfr) {
|
||||
struct cds_wfcq_node *node, *next;
|
||||
__cds_wfcq_for_each_blocking_safe(&xfr->diff_head, &xfr->diff_tail,
|
||||
node, next) {
|
||||
ixfr_apply_data_t *data =
|
||||
caa_container_of(node, ixfr_apply_data_t, wfcq_node);
|
||||
/* We need to clear and free all data chunks */
|
||||
dns_diff_clear(&data->diff);
|
||||
isc_mem_put(xfr->mctx, data, sizeof(*data));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
xfrin_destroy(dns_xfrin_t *xfr) {
|
||||
uint64_t msecs, persec;
|
||||
|
|
@ -2125,15 +2185,7 @@ xfrin_destroy(dns_xfrin_t *xfr) {
|
|||
sep, expireopt);
|
||||
|
||||
/* Cleanup unprocessed IXFR data */
|
||||
struct cds_wfcq_node *node, *next;
|
||||
__cds_wfcq_for_each_blocking_safe(&xfr->diff_head, &xfr->diff_tail,
|
||||
node, next) {
|
||||
ixfr_apply_data_t *data =
|
||||
caa_container_of(node, ixfr_apply_data_t, wfcq_node);
|
||||
/* We need to clear and free all data chunks */
|
||||
dns_diff_clear(&data->diff);
|
||||
isc_mem_put(xfr->mctx, data, sizeof(*data));
|
||||
}
|
||||
xfrin_ixfrcleanup(xfr);
|
||||
|
||||
/* Cleanup unprocessed AXFR data */
|
||||
dns_diff_clear(&xfr->diff);
|
||||
|
|
|
|||
Loading…
Reference in a new issue