From 2b20db05e38b7d7ba7f07a45dc286e2ca478b5ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Thu, 21 Sep 2023 15:20:58 +0200 Subject: [PATCH 1/5] Remove dns_adblameinfo from dns_adb Keeping the information about lame server in the ADB was done in !322 to fix following security issue: [CVE-2021-25219] Disable "lame-ttl" cache The handling of the lame servers needs to be redesigned and it is not going to be enabled any time soon, and the current code is just dead code that takes up space, code and stands in the way of making ADB work faster. Remove all the internals needed for handling the lame servers in the ADB for now. It might get reintroduced later if and when we redesign ADB. --- lib/dns/adb.c | 185 ++------------------------------------ lib/dns/include/dns/adb.h | 47 +--------- lib/dns/resolver.c | 13 --- lib/dns/zone.c | 6 +- 4 files changed, 8 insertions(+), 243 deletions(-) diff --git a/lib/dns/adb.c b/lib/dns/adb.c index c47b2219aa..226b10e3ea 100644 --- a/lib/dns/adb.c +++ b/lib/dns/adb.c @@ -50,8 +50,6 @@ #define DNS_ADBNAME_VALID(x) ISC_MAGIC_VALID(x, DNS_ADBNAME_MAGIC) #define DNS_ADBNAMEHOOK_MAGIC ISC_MAGIC('a', 'd', 'N', 'H') #define DNS_ADBNAMEHOOK_VALID(x) ISC_MAGIC_VALID(x, DNS_ADBNAMEHOOK_MAGIC) -#define DNS_ADBLAMEINFO_MAGIC ISC_MAGIC('a', 'd', 'b', 'Z') -#define DNS_ADBLAMEINFO_VALID(x) ISC_MAGIC_VALID(x, DNS_ADBLAMEINFO_MAGIC) #define DNS_ADBENTRY_MAGIC ISC_MAGIC('a', 'd', 'b', 'E') #define DNS_ADBENTRY_VALID(x) ISC_MAGIC_VALID(x, DNS_ADBENTRY_MAGIC) #define DNS_ADBFETCH_MAGIC ISC_MAGIC('a', 'd', 'F', '4') @@ -87,7 +85,6 @@ typedef ISC_LIST(dns_adbname_t) dns_adbnamelist_t; typedef struct dns_adbnamehook dns_adbnamehook_t; typedef ISC_LIST(dns_adbnamehook_t) dns_adbnamehooklist_t; -typedef struct dns_adblameinfo dns_adblameinfo_t; typedef ISC_LIST(dns_adbentry_t) dns_adbentrylist_t; typedef struct dns_adbfetch dns_adbfetch_t; typedef struct dns_adbfetch6 dns_adbfetch6_t; @@ -201,23 +198,6 @@ struct dns_adbnamehook { ISC_LINK(dns_adbnamehook_t) entry_link; }; -/*% - * dns_adblameinfo structure: - * - * This is a small widget that holds qname-specific information about an - * address. Currently limited to lameness, but could just as easily be - * extended to other types of information about zones. - */ -struct dns_adblameinfo { - unsigned int magic; - - dns_name_t qname; - dns_rdatatype_t qtype; - isc_stdtime_t lame_timer; - - ISC_LINK(dns_adblameinfo_t) plink; -}; - /*% * dns_adbentry structure: * @@ -269,9 +249,6 @@ struct dns_adbentry { * entry. */ - /* FIXME */ - ISC_LIST(dns_adblameinfo_t) lameinfo; - ISC_LINK(dns_adbentry_t) link; }; @@ -304,10 +281,6 @@ static dns_adbnamehook_t * new_adbnamehook(dns_adb_t *adb); static void free_adbnamehook(dns_adb_t *adb, dns_adbnamehook_t **namehookp); -static dns_adblameinfo_t * -new_adblameinfo(dns_adb_t *, const dns_name_t *, dns_rdatatype_t); -static void -free_adblameinfo(dns_adb_t *, dns_adblameinfo_t **); static dns_adbentry_t * new_adbentry(dns_adb_t *adb, const isc_sockaddr_t *addr); static void @@ -444,7 +417,6 @@ enum { #define FIND_AVOIDFETCHES(fn) (((fn)->options & DNS_ADBFIND_AVOIDFETCHES) != 0) #define FIND_STARTATZONE(fn) (((fn)->options & DNS_ADBFIND_STARTATZONE) != 0) #define FIND_HAS_ADDRS(fn) (!ISC_LIST_EMPTY((fn)->list)) -#define FIND_RETURNLAME(fn) (((fn)->options & DNS_ADBFIND_RETURNLAME) != 0) #define FIND_NOFETCH(fn) (((fn)->options & DNS_ADBFIND_NOFETCH) != 0) /* @@ -1087,39 +1059,6 @@ free_adbnamehook(dns_adb_t *adb, dns_adbnamehook_t **namehook) { isc_mem_put(adb->mctx, nh, sizeof(*nh)); } -static dns_adblameinfo_t * -new_adblameinfo(dns_adb_t *adb, const dns_name_t *qname, - dns_rdatatype_t qtype) { - dns_adblameinfo_t *li = isc_mem_get(adb->mctx, sizeof(*li)); - - dns_name_init(&li->qname, NULL); - dns_name_dup(qname, adb->mctx, &li->qname); - li->magic = DNS_ADBLAMEINFO_MAGIC; - li->lame_timer = 0; - li->qtype = qtype; - ISC_LINK_INIT(li, plink); - - return (li); -} - -static void -free_adblameinfo(dns_adb_t *adb, dns_adblameinfo_t **lameinfo) { - dns_adblameinfo_t *li = NULL; - - REQUIRE(lameinfo != NULL && DNS_ADBLAMEINFO_VALID(*lameinfo)); - - li = *lameinfo; - *lameinfo = NULL; - - REQUIRE(!ISC_LINK_LINKED(li, plink)); - - dns_name_free(&li->qname, adb->mctx); - - li->magic = 0; - - isc_mem_put(adb->mctx, li, sizeof(*li)); -} - static dns_adbentry_t * new_adbentry(dns_adb_t *adb, const isc_sockaddr_t *addr) { dns_adbentry_t *entry = NULL; @@ -1128,7 +1067,6 @@ new_adbentry(dns_adb_t *adb, const isc_sockaddr_t *addr) { *entry = (dns_adbentry_t){ .srtt = isc_random_uniform(0x1f) + 1, .sockaddr = *addr, - .lameinfo = ISC_LIST_INITIALIZER, .link = ISC_LINK_INITIALIZER, .magic = DNS_ADBENTRY_MAGIC, }; @@ -1154,7 +1092,6 @@ static void destroy_adbentry(dns_adbentry_t *entry) { REQUIRE(DNS_ADBENTRY_VALID(entry)); - dns_adblameinfo_t *li = NULL; dns_adb_t *adb = entry->adb; uint_fast32_t active; @@ -1171,13 +1108,6 @@ destroy_adbentry(dns_adbentry_t *entry) { isc_mem_put(adb->mctx, entry->cookie, entry->cookielen); } - li = ISC_LIST_HEAD(entry->lameinfo); - while (li != NULL) { - ISC_LIST_UNLINK(entry->lameinfo, li, plink); - free_adblameinfo(adb, &li); - li = ISC_LIST_HEAD(entry->lameinfo); - } - isc_mutex_destroy(&entry->lock); isc_mem_put(adb->mctx, entry, sizeof(*entry)); @@ -1531,48 +1461,6 @@ get_attached_and_locked_entry(dns_adb_t *adb, isc_stdtime_t now, return (adbentry); } -/* - * The entry must be locked. - */ -static bool -entry_is_lame(dns_adb_t *adb, dns_adbentry_t *entry, const dns_name_t *qname, - dns_rdatatype_t qtype, isc_stdtime_t now) { - dns_adblameinfo_t *li = NULL, *next_li = NULL; - bool is_bad = false; - - li = ISC_LIST_HEAD(entry->lameinfo); - if (li == NULL) { - return (false); - } - while (li != NULL) { - next_li = ISC_LIST_NEXT(li, plink); - - /* - * Has the entry expired? - */ - if (li->lame_timer < now) { - ISC_LIST_UNLINK(entry->lameinfo, li, plink); - free_adblameinfo(adb, &li); - } - - /* - * Order tests from least to most expensive. - * - * We do not break out of the main loop here as - * we use the loop for house keeping. - */ - if (li != NULL && !is_bad && li->qtype == qtype && - dns_name_equal(qname, &li->qname)) - { - is_bad = true; - } - - li = next_li; - } - - return (is_bad); -} - static void log_quota(dns_adbentry_t *entry, const char *fmt, ...) { va_list ap; @@ -1595,9 +1483,7 @@ log_quota(dns_adbentry_t *entry, const char *fmt, ...) { } static void -copy_namehook_lists(dns_adb_t *adb, dns_adbfind_t *find, - const dns_name_t *qname, dns_rdatatype_t qtype, - dns_adbname_t *name, isc_stdtime_t now) { +copy_namehook_lists(dns_adb_t *adb, dns_adbfind_t *find, dns_adbname_t *name) { dns_adbnamehook_t *namehook = NULL; dns_adbentry_t *entry = NULL; @@ -1609,15 +1495,7 @@ copy_namehook_lists(dns_adb_t *adb, dns_adbfind_t *find, LOCK(&entry->lock); if (adbentry_overquota(entry)) { - find->options |= (DNS_ADBFIND_LAMEPRUNED | - DNS_ADBFIND_OVERQUOTA); - goto nextv4; - } - - if (!FIND_RETURNLAME(find) && - entry_is_lame(adb, entry, qname, qtype, now)) - { - find->options |= DNS_ADBFIND_LAMEPRUNED; + find->options |= DNS_ADBFIND_OVERQUOTA; goto nextv4; } @@ -1641,17 +1519,10 @@ copy_namehook_lists(dns_adb_t *adb, dns_adbfind_t *find, LOCK(&entry->lock); if (adbentry_overquota(entry)) { - find->options |= (DNS_ADBFIND_LAMEPRUNED | - DNS_ADBFIND_OVERQUOTA); + find->options |= DNS_ADBFIND_OVERQUOTA; goto nextv6; } - if (!FIND_RETURNLAME(find) && - entry_is_lame(adb, entry, qname, qtype, now)) - { - find->options |= DNS_ADBFIND_LAMEPRUNED; - goto nextv6; - } addrinfo = new_adbaddrinfo(adb, entry, find->port); /* @@ -2067,7 +1938,7 @@ dns_adb_shutdown(dns_adb_t *adb) { isc_result_t dns_adb_createfind(dns_adb_t *adb, isc_loop_t *loop, isc_job_cb cb, void *cbarg, const dns_name_t *name, const dns_name_t *qname, - dns_rdatatype_t qtype, unsigned int options, + dns_rdatatype_t qtype ISC_ATTR_UNUSED, unsigned int options, isc_stdtime_t now, dns_name_t *target, in_port_t port, unsigned int depth, isc_counter_t *qc, dns_adbfind_t **findp) { @@ -2299,7 +2170,7 @@ fetch: * Run through the name and copy out the bits we are * interested in. */ - copy_namehook_lists(adb, find, qname, qtype, adbname, now); + copy_namehook_lists(adb, find, adbname); post_copy: if (NAME_FETCH_A(adbname)) { @@ -2568,9 +2439,7 @@ static void dump_entry(FILE *f, dns_adb_t *adb, dns_adbentry_t *entry, bool debug, isc_stdtime_t now) { char addrbuf[ISC_NETADDR_FORMATSIZE]; - char typebuf[DNS_RDATATYPE_FORMATSIZE]; isc_netaddr_t netaddr; - dns_adblameinfo_t *li = NULL; isc_netaddr_fromsockaddr(&netaddr, &entry->sockaddr); isc_netaddr_format(&netaddr, addrbuf, sizeof(addrbuf)); @@ -2607,15 +2476,6 @@ dump_entry(FILE *f, dns_adb_t *adb, dns_adbentry_t *entry, bool debug, } fprintf(f, "\n"); - for (li = ISC_LIST_HEAD(entry->lameinfo); li != NULL; - li = ISC_LIST_NEXT(li, plink)) - { - fprintf(f, ";\t\t"); - dns_name_print(&li->qname, f); - dns_rdatatype_format(li->qtype, typebuf, sizeof(typebuf)); - fprintf(f, " %s [lame TTL %d]\n", typebuf, - (int)(li->lame_timer - now)); - } } static void @@ -3164,41 +3024,6 @@ cleanup: return (result); } -isc_result_t -dns_adb_marklame(dns_adb_t *adb, dns_adbaddrinfo_t *addr, - const dns_name_t *qname, dns_rdatatype_t qtype, - isc_stdtime_t expire_time) { - REQUIRE(DNS_ADB_VALID(adb)); - REQUIRE(DNS_ADBADDRINFO_VALID(addr)); - REQUIRE(qname != NULL); - - isc_result_t result = ISC_R_SUCCESS; - dns_adblameinfo_t *li = NULL; - dns_adbentry_t *entry = addr->entry; - - LOCK(&entry->lock); - li = ISC_LIST_HEAD(entry->lameinfo); - while (li != NULL && - (li->qtype != qtype || !dns_name_equal(qname, &li->qname))) - { - li = ISC_LIST_NEXT(li, plink); - } - if (li != NULL) { - if (expire_time > li->lame_timer) { - li->lame_timer = expire_time; - } - goto unlock; - } - li = new_adblameinfo(adb, qname, qtype); - li->lame_timer = expire_time; - - ISC_LIST_PREPEND(addr->entry->lameinfo, li, plink); - -unlock: - UNLOCK(&entry->lock); - return (result); -} - void dns_adb_adjustsrtt(dns_adb_t *adb, dns_adbaddrinfo_t *addr, unsigned int rtt, unsigned int factor) { diff --git a/lib/dns/include/dns/adb.h b/lib/dns/include/dns/adb.h index f7dee889e1..e1b94ed369 100644 --- a/lib/dns/include/dns/adb.h +++ b/lib/dns/include/dns/adb.h @@ -43,11 +43,6 @@ * Records are stored internally until a timer expires. The timer is the * smaller of the TTL or signature validity period. * - * Lameness is stored per tuple, and this data hangs off each - * address field. When an address is marked lame for a given tuple the address - * will not be returned to a caller. - * - * * MP: * *\li The ADB takes care of all necessary locking. @@ -159,13 +154,6 @@ struct dns_adbfind { * _STARTATZONE: * Fetches will start using the closest zone data or use the root servers. * This is useful for reestablishing glue that has expired. - * - * _RETURNLAME: - * Return lame servers in a find, so that all addresses are returned. - * - * _LAMEPRUNED: - * At least one address was omitted from the list because it was lame. - * This bit will NEVER be set if _RETURNLAME is set in the createfind(). */ /*% Return addresses of type INET. */ #define DNS_ADBFIND_INET 0x00000001 @@ -192,15 +180,6 @@ struct dns_adbfind { * This is useful for reestablishing glue that has expired. */ #define DNS_ADBFIND_STARTATZONE 0x00000020 -/*% - * Return lame servers in a find, so that all addresses are returned. - */ -#define DNS_ADBFIND_RETURNLAME 0x00000100 -/*% - * Only schedule an event if no addresses are known. - * Must set _WANTEVENT for this to be meaningful. - */ -#define DNS_ADBFIND_LAMEPRUNED 0x00000200 /*% * The server's fetch quota is exceeded; it will be treated as * lame for this query. @@ -320,8 +299,7 @@ dns_adb_createfind(dns_adb_t *adb, isc_loop_t *loop, isc_job_cb cb, void *cbarg, * * The list of addresses returned is unordered. The caller must impose * any ordering required. The list will not contain "known bad" addresses, - * however. For instance, it will not return hosts that are known to be - * lame for the zone in question. + * however. * * The caller cannot (directly) modify the contents of the address list's * fields other than the "link" field. All values can be read at any @@ -441,29 +419,6 @@ dns_adb_dump(dns_adb_t *adb, FILE *f); *\li f != NULL, and is a file open for writing. */ -isc_result_t -dns_adb_marklame(dns_adb_t *adb, dns_adbaddrinfo_t *addr, - const dns_name_t *qname, dns_rdatatype_t type, - isc_stdtime_t expire_time); -/*%< - * Mark the given address as lame for the . expire_time should - * be set to the time when the entry should expire. That is, if it is to - * expire 10 minutes in the future, it should set it to (now + 10 * 60). - * - * Requires: - * - *\li adb be valid. - * - *\li addr be valid. - * - *\li qname be the qname used in the dns_adb_createfind() call. - * - * Returns: - * - *\li #ISC_R_SUCCESS -- all is well. - *\li #ISC_R_NOMEMORY -- could not mark address as lame. - */ - /* * Reasonable defaults for RTT adjustments * diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c index bd467d5269..2c91606b16 100644 --- a/lib/dns/resolver.c +++ b/lib/dns/resolver.c @@ -3393,8 +3393,6 @@ findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port, *overquota = true; } fctx->quotacount++; /* quota exceeded */ - } else if ((find->options & DNS_ADBFIND_LAMEPRUNED) != 0) { - fctx->lamecount++; /* cached lame server */ } else { fctx->adberr++; /* unreachable server, etc. */ } @@ -9829,17 +9827,6 @@ rctx_lameserver(respctx_t *rctx) { inc_stats(fctx->res, dns_resstatscounter_lame); log_lame(fctx, query->addrinfo); - if (fctx->res->lame_ttl != 0) { - result = dns_adb_marklame(fctx->adb, query->addrinfo, - fctx->name, fctx->type, - rctx->now + fctx->res->lame_ttl); - if (result != ISC_R_SUCCESS) { - isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER, - DNS_LOGMODULE_RESOLVER, ISC_LOG_ERROR, - "could not mark server as lame: %s", - isc_result_totext(result)); - } - } rctx->broken_server = DNS_R_LAME; rctx->next_server = true; FCTXTRACE("lame server"); diff --git a/lib/dns/zone.c b/lib/dns/zone.c index eb023ec967..b9bea598ef 100644 --- a/lib/dns/zone.c +++ b/lib/dns/zone.c @@ -12051,8 +12051,7 @@ notify_find_address(dns_notify_t *notify) { dns_adb_t *adb = NULL; REQUIRE(DNS_NOTIFY_VALID(notify)); - options = DNS_ADBFIND_WANTEVENT | DNS_ADBFIND_INET | DNS_ADBFIND_INET6 | - DNS_ADBFIND_RETURNLAME; + options = DNS_ADBFIND_WANTEVENT | DNS_ADBFIND_INET | DNS_ADBFIND_INET6; dns_view_getadb(notify->zone->view, &adb); if (adb == NULL) { @@ -20491,8 +20490,7 @@ checkds_find_address(dns_checkds_t *checkds) { dns_adb_t *adb = NULL; REQUIRE(DNS_CHECKDS_VALID(checkds)); - options = DNS_ADBFIND_WANTEVENT | DNS_ADBFIND_INET | DNS_ADBFIND_INET6 | - DNS_ADBFIND_RETURNLAME; + options = DNS_ADBFIND_WANTEVENT | DNS_ADBFIND_INET | DNS_ADBFIND_INET6; dns_view_getadb(checkds->zone->view, &adb); if (adb == NULL) { From cb0db600e78e88ca6e804a95382b2dd99fbbbeb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Thu, 21 Sep 2023 11:59:01 +0200 Subject: [PATCH 2/5] Replace some ADB entry locking with atomics to reduce ADB contention Use atomics on couple of ADB entry members (.srtt, .flags, .expires, and .lastage) to remove ADB entry locking from couple of hot spots. The most prominent place is copy_namehook_lists() that gets called under ADB name lock and if the namehook list is long it acquires-releases quite a few ADB entry locks. Changing those ADB entry members to atomics allowed us to new_adbaddrinfo() not require locked ADB entry and since adbentry_overquota() already used atomics and handling lame information was dropped in the previous commit, we could not make the copy_namehook_lists() lockless. The other hotspot is dns_adb_adjustsrtt() and dns_adb_agesrtt() that can now use atomics because .srtt is already atomic_uint. And the last place that could now use atomics is dns_adb_changeflags(). --- lib/dns/adb.c | 100 +++++++++++++++++++++++--------------------------- 1 file changed, 46 insertions(+), 54 deletions(-) diff --git a/lib/dns/adb.c b/lib/dns/adb.c index 226b10e3ea..26f9885ba3 100644 --- a/lib/dns/adb.c +++ b/lib/dns/adb.c @@ -220,8 +220,8 @@ struct dns_adbentry { isc_refcount_t references; dns_adbnamehooklist_t nhs; - unsigned int flags; - unsigned int srtt; + atomic_uint flags; + atomic_uint srtt; unsigned int completed; unsigned int timeouts; unsigned char plain; @@ -239,8 +239,8 @@ struct dns_adbentry { unsigned char *cookie; uint16_t cookielen; - isc_stdtime_t expires; - isc_stdtime_t lastage; + _Atomic(isc_stdtime_t) expires; + _Atomic(isc_stdtime_t) lastage; /*%< * A nonzero 'expires' field indicates that the entry should * persist until that time. This allows entries found @@ -391,7 +391,7 @@ enum { enum { ENTRY_IS_DEAD = 1 << 31, }; -#define ENTRY_DEAD(e) (((e)->flags & ENTRY_IS_DEAD) != 0) +#define ENTRY_DEAD(e) ((atomic_load(&(e)->flags) & ENTRY_IS_DEAD) != 0) /* * To the name, address classes are all that really exist. If it has a @@ -1068,6 +1068,9 @@ new_adbentry(dns_adb_t *adb, const isc_sockaddr_t *addr) { .srtt = isc_random_uniform(0x1f) + 1, .sockaddr = *addr, .link = ISC_LINK_INITIALIZER, + .quota = adb->quota, + .references = ISC_REFCOUNT_INITIALIZER(1), + .adb = dns_adb_ref(adb), .magic = DNS_ADBENTRY_MAGIC, }; @@ -1075,14 +1078,8 @@ new_adbentry(dns_adb_t *adb, const isc_sockaddr_t *addr) { fprintf(stderr, "dns_adbentry__init:%s:%s:%d:%p->references = 1\n", __func__, __FILE__, __LINE__ + 1, entry); #endif - isc_refcount_init(&entry->references, 1); isc_mutex_init(&entry->lock); - atomic_init(&entry->active, 0); - atomic_init(&entry->quota, adb->quota); - - dns_adb_attach(adb, &entry->adb); - inc_adbstats(adb, dns_adbstats_entriescnt); return (entry); @@ -1210,8 +1207,8 @@ new_adbaddrinfo(dns_adb_t *adb, dns_adbentry_t *entry, in_port_t port) { ai = isc_mem_get(adb->mctx, sizeof(*ai)); *ai = (dns_adbaddrinfo_t){ - .srtt = entry->srtt, - .flags = entry->flags, + .srtt = atomic_load(&entry->srtt), + .flags = atomic_load(&entry->flags), .publink = ISC_LINK_INITIALIZER, .sockaddr = entry->sockaddr, .entry = dns_adbentry_ref(entry), @@ -1492,7 +1489,6 @@ copy_namehook_lists(dns_adb_t *adb, dns_adbfind_t *find, dns_adbname_t *name) { while (namehook != NULL) { dns_adbaddrinfo_t *addrinfo = NULL; entry = namehook->entry; - LOCK(&entry->lock); if (adbentry_overquota(entry)) { find->options |= DNS_ADBFIND_OVERQUOTA; @@ -1506,7 +1502,6 @@ copy_namehook_lists(dns_adb_t *adb, dns_adbfind_t *find, dns_adbname_t *name) { */ ISC_LIST_APPEND(find->list, addrinfo, publink); nextv4: - UNLOCK(&entry->lock); namehook = ISC_LIST_NEXT(namehook, name_link); } } @@ -1516,7 +1511,6 @@ copy_namehook_lists(dns_adb_t *adb, dns_adbfind_t *find, dns_adbname_t *name) { while (namehook != NULL) { dns_adbaddrinfo_t *addrinfo = NULL; entry = namehook->entry; - LOCK(&entry->lock); if (adbentry_overquota(entry)) { find->options |= DNS_ADBFIND_OVERQUOTA; @@ -1530,7 +1524,6 @@ copy_namehook_lists(dns_adb_t *adb, dns_adbfind_t *find, dns_adbname_t *name) { */ ISC_LIST_APPEND(find->list, addrinfo, publink); nextv6: - UNLOCK(&entry->lock); namehook = ISC_LIST_NEXT(namehook, name_link); } } @@ -1572,7 +1565,7 @@ expire_entry(dns_adbentry_t *adbentry) { dns_adb_t *adb = adbentry->adb; if (!ENTRY_DEAD(adbentry)) { - adbentry->flags |= ENTRY_IS_DEAD; + (void)atomic_fetch_or(&adbentry->flags, ENTRY_IS_DEAD); result = isc_hashmap_delete( adb->entries, @@ -1591,7 +1584,9 @@ entry_expired(dns_adbentry_t *adbentry, isc_stdtime_t now) { return (false); } - if (adbentry->expires == 0 || adbentry->expires > now) { + if (atomic_load(&adbentry->expires) == 0 || + atomic_load(&adbentry->expires) > now) + { return (false); } @@ -2452,8 +2447,8 @@ dump_entry(FILE *f, dns_adb_t *adb, dns_adbentry_t *entry, bool debug, fprintf(f, ";\t%s [srtt %u] [flags %08x] [edns %u/%u] " "[plain %u/%u]", - addrbuf, entry->srtt, entry->flags, entry->edns, entry->ednsto, - entry->plain, entry->plainto); + addrbuf, atomic_load(&entry->srtt), atomic_load(&entry->flags), + entry->edns, entry->ednsto, entry->plain, entry->plainto); if (entry->udpsize != 0U) { fprintf(f, " [udpsize %u]", entry->udpsize); } @@ -2465,8 +2460,9 @@ dump_entry(FILE *f, dns_adb_t *adb, dns_adbentry_t *entry, bool debug, } fprintf(f, "]"); } - if (entry->expires != 0) { - fprintf(f, " [ttl %d]", (int)(entry->expires - now)); + if (atomic_load(&entry->expires) != 0) { + fprintf(f, " [ttl %d]", + (int)(atomic_load(&entry->expires) - now)); } if (adb != NULL && adb->quota != 0 && adb->atr_freq != 0) { @@ -3031,16 +3027,14 @@ dns_adb_adjustsrtt(dns_adb_t *adb, dns_adbaddrinfo_t *addr, unsigned int rtt, REQUIRE(DNS_ADBADDRINFO_VALID(addr)); REQUIRE(factor <= 10); - isc_stdtime_t now = 0; dns_adbentry_t *entry = addr->entry; - LOCK(&entry->lock); - if (entry->expires == 0 || factor == DNS_ADB_RTTADJAGE) { + isc_stdtime_t now = 0; + if (atomic_load(&entry->expires) == 0 || factor == DNS_ADB_RTTADJAGE) { now = isc_stdtime_now(); } - adjustsrtt(addr, rtt, factor, now); - UNLOCK(&entry->lock); + adjustsrtt(addr, rtt, factor, now); } void @@ -3048,40 +3042,36 @@ dns_adb_agesrtt(dns_adb_t *adb, dns_adbaddrinfo_t *addr, isc_stdtime_t now) { REQUIRE(DNS_ADB_VALID(adb)); REQUIRE(DNS_ADBADDRINFO_VALID(addr)); - dns_adbentry_t *entry = addr->entry; - LOCK(&entry->lock); - adjustsrtt(addr, 0, DNS_ADB_RTTADJAGE, now); - - UNLOCK(&entry->lock); } static void adjustsrtt(dns_adbaddrinfo_t *addr, unsigned int rtt, unsigned int factor, isc_stdtime_t now) { - uint64_t new_srtt; + unsigned int new_srtt; if (factor == DNS_ADB_RTTADJAGE) { - if (addr->entry->lastage != now) { + if (atomic_load(&addr->entry->lastage) != now) { new_srtt = addr->entry->srtt; new_srtt <<= 9; new_srtt -= addr->entry->srtt; new_srtt >>= 9; - addr->entry->lastage = now; + atomic_store(&addr->entry->lastage, now); } else { - new_srtt = addr->entry->srtt; + new_srtt = atomic_load(&addr->entry->srtt) } } else { - new_srtt = ((uint64_t)addr->entry->srtt / 10 * factor) + + new_srtt = ((uint64_t)atomic_load(&addr->entry->srtt) / 10 * + factor) + ((uint64_t)rtt / 10 * (10 - factor)); } - addr->entry->srtt = (unsigned int)new_srtt; - addr->srtt = (unsigned int)new_srtt; + atomic_store(&addr->entry->srtt, new_srtt); + addr->srtt = new_srtt; - if (addr->entry->expires == 0) { - addr->entry->expires = now + ADB_ENTRY_WINDOW; - } + (void)atomic_compare_exchange_strong(&addr->entry->expires, + &(isc_stdtime_t){ 0 }, + now + ADB_ENTRY_WINDOW); } void @@ -3093,12 +3083,16 @@ dns_adb_changeflags(dns_adb_t *adb, dns_adbaddrinfo_t *addr, unsigned int bits, isc_stdtime_t now; dns_adbentry_t *entry = addr->entry; - LOCK(&entry->lock); + unsigned int flags = atomic_load(&entry->flags); + while (!atomic_compare_exchange_strong(&entry->flags, &flags, + (flags & ~mask) | (bits & mask))) + { + /* repeat */ + } - entry->flags = (entry->flags & ~mask) | (bits & mask); - if (entry->expires == 0) { + if (atomic_load(&entry->expires) == 0) { now = isc_stdtime_now(); - entry->expires = now + ADB_ENTRY_WINDOW; + atomic_store(&entry->expires, now + ADB_ENTRY_WINDOW); } /* @@ -3106,8 +3100,6 @@ dns_adb_changeflags(dns_adb_t *adb, dns_adbaddrinfo_t *addr, unsigned int bits, * the most recent values from addr->entry->flags. */ addr->flags = (addr->flags & ~mask) | (bits & mask); - - UNLOCK(&entry->lock); } /* @@ -3365,11 +3357,12 @@ dns_adb_findaddrinfo(dns_adb_t *adb, const isc_sockaddr_t *sa, entry = get_attached_and_locked_entry(adb, now, sa); INSIST(entry != NULL); + UNLOCK(&entry->lock); + port = isc_sockaddr_getport(sa); addr = new_adbaddrinfo(adb, entry, port); *addrp = addr; - UNLOCK(&entry->lock); dns_adbentry_detach(&entry); return (result); @@ -3393,10 +3386,9 @@ dns_adb_freeaddrinfo(dns_adb_t *adb, dns_adbaddrinfo_t **addrp) { REQUIRE(DNS_ADBENTRY_VALID(entry)); - if (entry->expires == 0) { - now = isc_stdtime_now(); - entry->expires = now + ADB_ENTRY_WINDOW; - } + now = isc_stdtime_now(); + (void)atomic_compare_exchange_strong( + &entry->expires, &(isc_stdtime_t){ 0 }, now + ADB_ENTRY_WINDOW); free_adbaddrinfo(adb, &addr); } From 0635bd01cbf47cf05cfa7670fe0833077ccfe6fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Thu, 12 Oct 2023 09:17:40 +0200 Subject: [PATCH 3/5] Skip the no-op code in adjustsrtt() If factor == DNS_ADB_RTTADJAGE and addr->entry->lastage == now we would load value into new_srtt and then immediatelly store it back which triggers the synchronization between threads using .srtt values. --- lib/dns/adb.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/dns/adb.c b/lib/dns/adb.c index 26f9885ba3..a589519f6e 100644 --- a/lib/dns/adb.c +++ b/lib/dns/adb.c @@ -3057,18 +3057,17 @@ adjustsrtt(dns_adbaddrinfo_t *addr, unsigned int rtt, unsigned int factor, new_srtt -= addr->entry->srtt; new_srtt >>= 9; atomic_store(&addr->entry->lastage, now); - } else { - new_srtt = atomic_load(&addr->entry->srtt) + atomic_store(&addr->entry->srtt, new_srtt); + addr->srtt = new_srtt; } } else { new_srtt = ((uint64_t)atomic_load(&addr->entry->srtt) / 10 * factor) + ((uint64_t)rtt / 10 * (10 - factor)); + atomic_store(&addr->entry->srtt, new_srtt); + addr->srtt = new_srtt; } - atomic_store(&addr->entry->srtt, new_srtt); - addr->srtt = new_srtt; - (void)atomic_compare_exchange_strong(&addr->entry->expires, &(isc_stdtime_t){ 0 }, now + ADB_ENTRY_WINDOW); From 91f3b0edee9873b3f727bc9fc372a362b59480b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Thu, 12 Oct 2023 09:20:42 +0200 Subject: [PATCH 4/5] Use mul and div instead of bitshifts to calculate srtt There was a microoptimization for smoothing srtt with bitshifts. Revert the code to use * 98 / 100, it doesn't really make that difference on modern CPUs, for comparison here: muldiv: imul eax, edi, 98 imul rax, rax, 1374389535 shr rax, 37 ret shift: mov eax, edi sal eax, 9 sub eax, edi shr eax, 9 ret --- lib/dns/adb.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/dns/adb.c b/lib/dns/adb.c index a589519f6e..65904e0f5d 100644 --- a/lib/dns/adb.c +++ b/lib/dns/adb.c @@ -3052,10 +3052,8 @@ adjustsrtt(dns_adbaddrinfo_t *addr, unsigned int rtt, unsigned int factor, if (factor == DNS_ADB_RTTADJAGE) { if (atomic_load(&addr->entry->lastage) != now) { - new_srtt = addr->entry->srtt; - new_srtt <<= 9; - new_srtt -= addr->entry->srtt; - new_srtt >>= 9; + new_srtt = (uint64_t)atomic_load(&addr->entry->srtt) * + 98 / 100; atomic_store(&addr->entry->lastage, now); atomic_store(&addr->entry->srtt, new_srtt); addr->srtt = new_srtt; From 6b306b9debaa88581a716dd0501e588215457612 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Thu, 21 Sep 2023 15:31:47 +0200 Subject: [PATCH 5/5] Add CHANGES note for [GL #4326] --- CHANGES | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES b/CHANGES index cdfeecaaa9..b2edb6163c 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,6 @@ +6264. [func] Use atomics to handle some ADB entry members + to reduce ADB locking contention. [GL #4326] + 6263. [func] Convert the RPZ summary database to use a QP trie instead of an RBT. [GL !8352]