Refactor the dns_adb unit

The dns_adb unit has been refactored to be much simpler.  Following
changes have been made:

1. Simplify the ADB to always allow GLUE and hints

   There were only two places where dns_adb_createfind() was used - in
   the dns_resolver unit where hints and GLUE addresses were ok, and in
   the dns_zone where dns_adb_createfind() would be called without
   DNS_ADBFIND_HINTOK and DNS_ADBFIND_GLUEOK set.

   Simplify the logic by allowing hint and GLUE addresses when looking
   up the nameserver addresses to notify.  The difference is negligible
   and would cause a difference in the notified addresses only when
   there's mismatch between the parent and child addresses and we
   haven't cached the child addresses yet.

2. Drop the namebuckets and entrybuckets

   Formerly, the namebuckets and entrybuckets were used to reduced the
   lock contention when accessing the double-linked lists stored in each
   bucket.  In the previous refactoring, the custom hashtable for the
   buckets has been replaced with isc_ht/isc_hashmap, so only a single
   item (mostly, see below) would end up in each bucket.

   Removing the entrybuckets has been straightforward, the only matching
   was done on the isc_sockaddr_t member of the dns_adbentry.

   Removing the zonebuckets required GLUEOK and HINTOK bits to be
   removed because the find could match entries with-or-without the bits
   set, and creating a custom key that stores the
   DNS_ADBFIND_STARTATZONE in the first byte of the key, so we can do a
   straightforward lookup into the hashtable without traversing a list
   that contains items with different flags.

3. Remove unassociated entries from ADB database

   Previously, the adbentries could live in the ADB database even after
   unlinking them from dns_adbnames.  Such entries would show up as
   "Unassociated entries" in the ADB dump.  The benefit of keeping such
   entries is little - the chance that we link such entry to a adbname
   is small, and it's simpler to evict unlinked entries from the ADB
   cache (and the hashtable) than create second LRU cleaning mechanism.

   Unlinked ADB entries are now directly deleted from the hash
   table (hashmap) upon destruction.

4. Cleanup expired entries from the hash table

   When buckets were still in place, the code would keep the buckets
   always allocated and never shrink the hash table (hashmap).  With
   proper reference counting in place, we can delete the adbnames from
   the hash table and the LRU list.

5. Stop purging the names early when we hit the time limit

   Because the LRU list is now time ordered, we can stop purging the
   names when we find a first entry that doesn't fullfil our time-based
   eviction criteria because no further entry on the LRU list will meet
   the criteria.

Future work:

1. Lock contention

   In this commit, the focus was on correctness of the data structure,
   but in the future, the lock contention in the ADB database needs to
   be addressed.  Currently, we use simple mutex to lock the hash
   tables, because we almost always need to use a write lock for
   properly purging the hashtables.  The ADB database needs to be
   sharded (similar to the effect that buckets had in the past).  Each
   shard would contain own hashmap and own LRU list.

2. Time-based purging

   The ADB names and entries stay intact when there are no lookups.
   When we add separate shards, a timer needs to be added for time-based
   cleaning in case there's no traffic hashing to the inactive shard.

3. Revisit the 30 minutes limit

   The ADB cache is capped at 30 minutes.  This needs to be revisited,
   and at least the limit should be configurable (in both directions).
This commit is contained in:
Ondřej Surý 2022-10-13 12:00:54 +02:00
parent 66d8bb03cb
commit 50f357cb36
No known key found for this signature in database
GPG key ID: 2820F37E873DEA41
5 changed files with 700 additions and 1246 deletions

View file

@ -234,7 +234,6 @@ mv ns2/named_dump.db.test$n ns2/named_dump.db.test$n.a
sed -n '/plain success\/timeout/,/Unassociated entries/p' \
ns2/named_dump.db.test$n.a > sed.out.$n.a
grep 'plain success/timeout' sed.out.$n.a > /dev/null 2>&1 || ret=1
grep 'Unassociated entries' sed.out.$n.a > /dev/null 2>&1 || ret=1
grep 'ns.flushtest.example' sed.out.$n.a > /dev/null 2>&1 || ret=1
$RNDC $RNDCOPTS flushtree flushtest.example || ret=1
dump_cache
@ -242,7 +241,6 @@ mv ns2/named_dump.db.test$n ns2/named_dump.db.test$n.b
sed -n '/plain success\/timeout/,/Unassociated entries/p' \
ns2/named_dump.db.test$n.b > sed.out.$n.b
grep 'plain success/timeout' sed.out.$n.b > /dev/null 2>&1 || ret=1
grep 'Unassociated entries' sed.out.$n.b > /dev/null 2>&1 || ret=1
grep 'ns.flushtest.example' sed.out.$n.b > /dev/null 2>&1 && ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`

File diff suppressed because it is too large Load diff

View file

@ -66,6 +66,9 @@
*** Imports
***/
/* Define to 1 for detailed reference tracing */
#undef DNS_ADB_TRACE
#include <inttypes.h>
#include <stdbool.h>
@ -92,9 +95,7 @@ ISC_LANG_BEGINDECLS
*** TYPES
***/
typedef struct dns_adbname dns_adbname_t;
typedef struct dns_adbnamebucket dns_adbnamebucket_t;
typedef struct dns_adbentrybucket dns_adbentrybucket_t;
typedef struct dns_adbname dns_adbname_t;
/*!
*\brief
@ -146,11 +147,6 @@ struct dns_adbfind {
* Fetches will start using the closest zone data or use the root servers.
* This is useful for reestablishing glue that has expired.
*
* _GLUEOK:
* _HINTOK:
* Glue or hints are ok. These are used when matching names already
* in the adb, and when dns databases are searched.
*
* _RETURNLAME:
* Return lame servers in a find, so that all addresses are returned.
*
@ -183,16 +179,6 @@ struct dns_adbfind {
* This is useful for reestablishing glue that has expired.
*/
#define DNS_ADBFIND_STARTATZONE 0x00000020
/*%
* Glue or hints are ok. These are used when matching names already
* in the adb, and when dns databases are searched.
*/
#define DNS_ADBFIND_GLUEOK 0x00000040
/*%
* Glue or hints are ok. These are used when matching names already
* in the adb, and when dns databases are searched.
*/
#define DNS_ADBFIND_HINTOK 0x00000080
/*%
* Return lame servers in a find, so that all addresses are returned.
*/
@ -281,32 +267,16 @@ dns_adb_create(isc_mem_t *mem, dns_view_t *view, isc_loopmgr_t *loopmgr,
*\li #ISC_R_NOMEMORY after resource allocation failure.
*/
#define dns_adb_attach(a, ap) \
dns__adb_attach(a, ap, __func__, __FILE__, __LINE__)
void
dns__adb_attach(dns_adb_t *adb, dns_adb_t **adbp, const char *func,
const char *file, unsigned int line);
/*%
* Attach to an 'adb' to 'adbp'.
*
* Requires:
*\li 'adb' to be a valid dns_adb_t, created via dns_adb_create().
*\li 'adbp' to be a valid pointer to a *dns_adb_t which is initialized
* to NULL.
*/
#define dns_adb_detach(ap) dns__adb_detach(ap, __func__, __FILE__, __LINE__)
void
dns__adb_detach(dns_adb_t **adb, const char *func, const char *file,
unsigned int line);
/*%
* Delete the ADB. Sets *ADB to NULL. Cancels any outstanding requests.
*
* Requires:
*
*\li 'adb' be non-NULL and '*adb' be a valid dns_adb_t, created via
* dns_adb_create().
*/
#if DNS_ADB_TRACE
#define dns_adb_ref(ptr) dns_adb__ref(ptr, __func__, __FILE__, __LINE__)
#define dns_adb_unref(ptr) dns_adb__unref(ptr, __func__, __FILE__, __LINE__)
#define dns_adb_attach(ptr, ptrp) \
dns_adb__attach(ptr, ptrp, __func__, __FILE__, __LINE__)
#define dns_adb_detach(ptrp) dns_adb__detach(ptrp, __func__, __FILE__, __LINE__)
ISC_REFCOUNT_TRACE_DECL(dns_adb);
#else
ISC_REFCOUNT_DECL(dns_adb);
#endif
void
dns_adb_shutdown(dns_adb_t *adb);

View file

@ -3444,8 +3444,6 @@ findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port,
if (dns_name_issubdomain(name, fctx->domain)) {
options |= DNS_ADBFIND_STARTATZONE;
}
options |= DNS_ADBFIND_GLUEOK;
options |= DNS_ADBFIND_HINTOK;
/*
* See what we know about this address.

View file

@ -150,8 +150,8 @@ isc_refcount_decrement(isc_refcount_t *target) {
} while (0)
#define ISC_REFCOUNT_TRACE_DECL(name) \
void name##__ref(name##_t *ptr, const char *func, const char *file, \
unsigned int line); \
name##_t *name##__ref(name##_t *ptr, const char *func, \
const char *file, unsigned int line); \
void name##__unref(name##_t *ptr, const char *func, const char *file, \
unsigned int line); \
void name##__attach(name##_t *ptr, name##_t **ptrp, const char *func, \
@ -160,31 +160,37 @@ isc_refcount_decrement(isc_refcount_t *target) {
const char *file, unsigned int line)
#define ISC_REFCOUNT_TRACE_IMPL(name, destroy) \
void name##__ref(name##_t *ptr, const char *func, const char *file, \
unsigned int line) { \
uint_fast32_t refs; \
name##_t *name##__ref(name##_t *ptr, const char *func, \
const char *file, unsigned int line) { \
REQUIRE(ptr != NULL); \
refs = isc_refcount_increment(&ptr->references); \
uint_fast32_t refs = \
isc_refcount_increment(&ptr->references) + 1; \
fprintf(stderr, \
"%s:%s:%s:%u:%p->references = %" PRIuFAST32 "\n", \
__func__, func, file, line, ptr, refs + 1); \
__func__, func, file, line, ptr, refs); \
return (ptr); \
} \
\
void name##__unref(name##_t *ptr, const char *func, const char *file, \
unsigned int line) { \
uint_fast32_t refs; \
REQUIRE(ptr != NULL); \
if ((refs = isc_refcount_decrement(&ptr->references)) == 1) { \
uint_fast32_t refs = \
isc_refcount_decrement(&ptr->references) - 1; \
if (refs == 0) { \
destroy(ptr); \
} \
fprintf(stderr, \
"%s:%s:%s:%u:%p->references = %" PRIuFAST32 "\n", \
__func__, func, file, line, ptr, refs - 1); \
__func__, func, file, line, ptr, refs); \
} \
void name##__attach(name##_t *ptr, name##_t **ptrp, const char *func, \
const char *file, unsigned int line) { \
REQUIRE(ptrp != NULL && *ptrp == NULL); \
name##__ref(ptr, func, file, line); \
uint_fast32_t refs = \
isc_refcount_increment(&ptr->references) + 1; \
fprintf(stderr, \
"%s:%s:%s:%u:%p->references = %" PRIuFAST32 "\n", \
__func__, func, file, line, ptr, refs); \
*ptrp = ptr; \
} \
\
@ -193,19 +199,27 @@ isc_refcount_decrement(isc_refcount_t *target) {
REQUIRE(ptrp != NULL && *ptrp != NULL); \
name##_t *ptr = *ptrp; \
*ptrp = NULL; \
name##__unref(ptr, func, file, line); \
uint_fast32_t refs = \
isc_refcount_decrement(&ptr->references) - 1; \
if (refs == 0) { \
destroy(ptr); \
} \
fprintf(stderr, \
"%s:%s:%s:%u:%p->references = %" PRIuFAST32 "\n", \
__func__, func, file, line, ptr, refs); \
}
#define ISC_REFCOUNT_DECL(name) \
void name##_ref(name##_t *ptr); \
void name##_unref(name##_t *ptr); \
void name##_attach(name##_t *ptr, name##_t **ptrp); \
void name##_detach(name##_t **ptrp)
#define ISC_REFCOUNT_DECL(name) \
name##_t *name##_ref(name##_t *ptr); \
void name##_unref(name##_t *ptr); \
void name##_attach(name##_t *ptr, name##_t **ptrp); \
void name##_detach(name##_t **ptrp)
#define ISC_REFCOUNT_IMPL(name, destroy) \
void name##_ref(name##_t *ptr) { \
name##_t *name##_ref(name##_t *ptr) { \
REQUIRE(ptr != NULL); \
isc_refcount_increment(&ptr->references); \
return (ptr); \
} \
\
void name##_unref(name##_t *ptr) { \