diff --git a/CHANGES b/CHANGES index 081f6572b6..b673261f55 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,7 @@ +6099. [performance] Change the internal read-write lock to modified C-RW-WP + algorithm that is more reader-writer fair and has better + performance for our workloads. [GL #1609] + 6098. [test] Don't test HMAC-MD5 when not supported by libcrypto. [GL #3871] diff --git a/bin/dnssec/dnssec-signzone.c b/bin/dnssec/dnssec-signzone.c index 6f2328c72e..39ae3a35b9 100644 --- a/bin/dnssec/dnssec-signzone.c +++ b/bin/dnssec/dnssec-signzone.c @@ -3778,7 +3778,7 @@ main(int argc, char *argv[]) { * of keys rather early. */ ISC_LIST_INIT(keylist); - isc_rwlock_init(&keylist_lock, 0, 0); + isc_rwlock_init(&keylist_lock); /* * Fill keylist with: diff --git a/lib/dns/acl.c b/lib/dns/acl.c index 09f39bf28f..8678490906 100644 --- a/lib/dns/acl.c +++ b/lib/dns/acl.c @@ -696,7 +696,7 @@ dns_aclenv_create(isc_mem_t *mctx, dns_aclenv_t **envp) { isc_mem_attach(mctx, &env->mctx); isc_refcount_init(&env->references, 1); - isc_rwlock_init(&env->rwlock, 0, 0); + isc_rwlock_init(&env->rwlock); result = dns_acl_create(mctx, 0, &env->localhost); if (result != ISC_R_SUCCESS) { diff --git a/lib/dns/adb.c b/lib/dns/adb.c index 146c5cecca..6b32df2a0f 100644 --- a/lib/dns/adb.c +++ b/lib/dns/adb.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -118,12 +119,12 @@ struct dns_adb { dns_adbnamelist_t names_lru; isc_stdtime_t names_last_update; isc_hashmap_t *names; - isc_mutex_t names_lock; + isc_rwlock_t names_lock; dns_adbentrylist_t entries_lru; isc_stdtime_t entries_last_update; isc_hashmap_t *entries; - isc_mutex_t entries_lock; + isc_rwlock_t entries_lock; isc_stats_t *stats; @@ -372,6 +373,8 @@ maybe_expire_name(dns_adbname_t *adbname, isc_stdtime_t now); static void expire_name(dns_adbname_t *adbname, isc_eventtype_t evtype); static bool +entry_expired(dns_adbentry_t *adbentry, isc_stdtime_t now); +static bool maybe_expire_entry(dns_adbentry_t *adbentry, isc_stdtime_t now); static void expire_entry(dns_adbentry_t *adbentry); @@ -762,7 +765,7 @@ static void shutdown_names(dns_adb_t *adb) { dns_adbname_t *next = NULL; - LOCK(&adb->names_lock); + RWLOCK(&adb->names_lock, isc_rwlocktype_write); for (dns_adbname_t *name = ISC_LIST_HEAD(adb->names_lru); name != NULL; name = next) { @@ -779,20 +782,20 @@ shutdown_names(dns_adb_t *adb) { UNLOCK(&name->lock); dns_adbname_detach(&name); } - UNLOCK(&adb->names_lock); + RWUNLOCK(&adb->names_lock, isc_rwlocktype_write); } static void shutdown_entries(dns_adb_t *adb) { dns_adbentry_t *next = NULL; - LOCK(&adb->entries_lock); + RWLOCK(&adb->entries_lock, isc_rwlocktype_write); for (dns_adbentry_t *adbentry = ISC_LIST_HEAD(adb->entries_lru); adbentry != NULL; adbentry = next) { next = ISC_LIST_NEXT(adbentry, link); expire_entry(adbentry); } - UNLOCK(&adb->entries_lock); + RWUNLOCK(&adb->entries_lock, isc_rwlocktype_write); } /* @@ -1342,6 +1345,7 @@ get_attached_and_locked_name(dns_adb_t *adb, const dns_name_t *name, isc_time_t timenow; isc_stdtime_t last_update; adbnamekey_t key; + isc_rwlocktype_t locktype = isc_rwlocktype_read; isc_time_set(&timenow, now, 0); @@ -1351,54 +1355,65 @@ get_attached_and_locked_name(dns_adb_t *adb, const dns_name_t *name, hashval = isc_hashmap_hash(adb->names, &key.key, key.size); - LOCK(&adb->names_lock); + RWLOCK(&adb->names_lock, locktype); last_update = adb->names_last_update; - if (last_update + ADB_STALE_MARGIN < now || + + if (last_update + ADB_STALE_MARGIN >= now || atomic_load_relaxed(&adb->is_overmem)) { - last_update = adb->names_last_update = now; - + last_update = now; + UPGRADELOCK(&adb->names_lock, locktype); purge_stale_names(adb, now); + adb->names_last_update = last_update; } result = isc_hashmap_find(adb->names, &hashval, key.key, key.size, (void **)&adbname); switch (result) { case ISC_R_NOTFOUND: + UPGRADELOCK(&adb->names_lock, locktype); + /* Allocate a new name and add it to the hash table. */ adbname = new_adbname(adb, name, start_at_zone); result = isc_hashmap_add(adb->names, &hashval, &adbname->key.key, adbname->key.size, adbname); + if (result == ISC_R_EXISTS) { + destroy_adbname(adbname); + adbname = NULL; + result = isc_hashmap_find(adb->names, &hashval, key.key, + key.size, (void **)&adbname); + ISC_LIST_UNLINK(adb->names_lru, adbname, link); + } INSIST(result == ISC_R_SUCCESS); - dns_adbname_ref(adbname); - LOCK(&adbname->lock); /* Must be unlocked by the caller */ - adbname->last_used = now; - - ISC_LIST_PREPEND(adb->names_lru, adbname, link); break; case ISC_R_SUCCESS: - dns_adbname_ref(adbname); - LOCK(&adbname->lock); /* Must be unlocked by the caller */ - if (adbname->last_used + ADB_CACHE_MINIMUM <= last_update) { - adbname->last_used = now; - + if (locktype == isc_rwlocktype_write) { ISC_LIST_UNLINK(adb->names_lru, adbname, link); - ISC_LIST_PREPEND(adb->names_lru, adbname, link); } break; default: UNREACHABLE(); } + dns_adbname_ref(adbname); + + LOCK(&adbname->lock); /* Must be unlocked by the caller */ + if (adbname->last_used + ADB_CACHE_MINIMUM <= last_update) { + adbname->last_used = now; + } + if (locktype == isc_rwlocktype_write) { + ISC_LIST_PREPEND(adb->names_lru, adbname, link); + } + /* * The refcount is now 2 and the final detach will happen in * expire_name() - the unused adbname stored in the hashtable and lru * has always refcount == 1 */ - UNLOCK(&adb->names_lock); + RWUNLOCK(&adb->names_lock, locktype); return (adbname); } @@ -1415,64 +1430,93 @@ get_attached_and_locked_entry(dns_adb_t *adb, isc_stdtime_t now, isc_stdtime_t last_update; uint32_t hashval = isc_hashmap_hash( adb->entries, (const unsigned char *)addr, sizeof(*addr)); + isc_rwlocktype_t locktype = isc_rwlocktype_read; isc_time_set(&timenow, now, 0); - LOCK(&adb->entries_lock); + RWLOCK(&adb->entries_lock, locktype); last_update = adb->entries_last_update; + if (now - last_update > ADB_STALE_MARGIN || atomic_load_relaxed(&adb->is_overmem)) { - last_update = adb->entries_last_update = now; + last_update = now; + + UPGRADELOCK(&adb->entries_lock, locktype); purge_stale_entries(adb, now); + adb->entries_last_update = now; } +again: result = isc_hashmap_find(adb->entries, &hashval, (const unsigned char *)addr, sizeof(*addr), (void **)&adbentry); switch (result) { case ISC_R_NOTFOUND: { create: + UPGRADELOCK(&adb->entries_lock, locktype); + /* Allocate a new entry and add it to the hash table. */ adbentry = new_adbentry(adb, addr); result = isc_hashmap_add(adb->entries, &hashval, &adbentry->sockaddr, sizeof(adbentry->sockaddr), adbentry); + if (result == ISC_R_EXISTS) { + dns_adbentry_detach(&adbentry); + + result = isc_hashmap_find(adb->entries, &hashval, + (const unsigned char *)addr, + sizeof(*addr), + (void **)&adbentry); + ISC_LIST_UNLINK(adb->entries_lru, adbentry, link); + } INSIST(result == ISC_R_SUCCESS); - - dns_adbentry_ref(adbentry); - LOCK(&adbentry->lock); /* Must be unlocked by the caller */ - adbentry->last_used = now; - - ISC_LIST_PREPEND(adb->entries_lru, adbentry, link); break; } case ISC_R_SUCCESS: - /* - * The dns_adbentry_ref() must stay here before trying to expire - * the ADB entry, so it is not destroyed under the lock. - */ - dns_adbentry_ref(adbentry); - LOCK(&adbentry->lock); /* Must be unlocked by the caller */ - if (maybe_expire_entry(adbentry, now)) { - UNLOCK(&adbentry->lock); - dns_adbentry_detach(&adbentry); - goto create; - } - if (adbentry->last_used + ADB_CACHE_MINIMUM <= last_update) { - adbentry->last_used = now; - + if (locktype == isc_rwlocktype_write) { ISC_LIST_UNLINK(adb->entries_lru, adbentry, link); - ISC_LIST_PREPEND(adb->entries_lru, adbentry, link); } break; default: UNREACHABLE(); } - UNLOCK(&adb->entries_lock); + /* + * The dns_adbentry_ref() must stay here before trying to expire + * the ADB entry, so it is not destroyed under the lock. + */ + dns_adbentry_ref(adbentry); + LOCK(&adbentry->lock); /* Must be unlocked by the caller */ + switch (locktype) { + case isc_rwlocktype_read: + if (entry_expired(adbentry, now)) { + UNLOCK(&adbentry->lock); + dns_adbentry_detach(&adbentry); + goto again; + } + break; + case isc_rwlocktype_write: + if (maybe_expire_entry(adbentry, now)) { + UNLOCK(&adbentry->lock); + dns_adbentry_detach(&adbentry); + goto create; + } + break; + default: + UNREACHABLE(); + } + + if (adbentry->last_used + ADB_CACHE_MINIMUM <= last_update) { + adbentry->last_used = now; + } + if (locktype == isc_rwlocktype_write) { + ISC_LIST_PREPEND(adb->entries_lru, adbentry, link); + } + + RWUNLOCK(&adb->entries_lock, locktype); return (adbentry); } @@ -1657,9 +1701,7 @@ expire_entry(dns_adbentry_t *adbentry) { } static bool -maybe_expire_entry(dns_adbentry_t *adbentry, isc_stdtime_t now) { - REQUIRE(DNS_ADBENTRY_VALID(adbentry)); - +entry_expired(dns_adbentry_t *adbentry, isc_stdtime_t now) { if (!ISC_LIST_EMPTY(adbentry->nhs)) { return (false); } @@ -1668,11 +1710,21 @@ maybe_expire_entry(dns_adbentry_t *adbentry, isc_stdtime_t now) { return (false); } - expire_entry(adbentry); - return (true); } +static bool +maybe_expire_entry(dns_adbentry_t *adbentry, isc_stdtime_t now) { + REQUIRE(DNS_ADBENTRY_VALID(adbentry)); + + if (entry_expired(adbentry, now)) { + expire_entry(adbentry); + return (true); + } + + return (false); +} + /*% * Examine the tail entry of the LRU list to see if it expires or is stale * (unused for some period); if so, the name entry will be freed. If the ADB @@ -1756,7 +1808,7 @@ static void cleanup_names(dns_adb_t *adb, isc_stdtime_t now) { dns_adbname_t *next = NULL; - LOCK(&adb->names_lock); + RWLOCK(&adb->names_lock, isc_rwlocktype_write); for (dns_adbname_t *adbname = ISC_LIST_HEAD(adb->names_lru); adbname != NULL; adbname = next) { @@ -1775,7 +1827,7 @@ cleanup_names(dns_adb_t *adb, isc_stdtime_t now) { UNLOCK(&adbname->lock); dns_adbname_detach(&adbname); } - UNLOCK(&adb->names_lock); + RWUNLOCK(&adb->names_lock, isc_rwlocktype_write); } /*% @@ -1859,7 +1911,7 @@ static void cleanup_entries(dns_adb_t *adb, isc_stdtime_t now) { dns_adbentry_t *next = NULL; - LOCK(&adb->entries_lock); + RWLOCK(&adb->entries_lock, isc_rwlocktype_write); for (dns_adbentry_t *adbentry = ISC_LIST_HEAD(adb->entries_lru); adbentry != NULL; adbentry = next) { @@ -1871,7 +1923,7 @@ cleanup_entries(dns_adb_t *adb, isc_stdtime_t now) { UNLOCK(&adbentry->lock); dns_adbentry_detach(&adbentry); } - UNLOCK(&adb->entries_lock); + RWUNLOCK(&adb->entries_lock, isc_rwlocktype_write); } static void @@ -1880,18 +1932,18 @@ destroy(dns_adb_t *adb) { adb->magic = 0; - LOCK(&adb->names_lock); + RWLOCK(&adb->names_lock, isc_rwlocktype_write); INSIST(isc_hashmap_count(adb->names) == 0); isc_hashmap_destroy(&adb->names); - UNLOCK(&adb->names_lock); - isc_mutex_destroy(&adb->names_lock); + RWUNLOCK(&adb->names_lock, isc_rwlocktype_write); + isc_rwlock_destroy(&adb->names_lock); - LOCK(&adb->entries_lock); + RWLOCK(&adb->entries_lock, isc_rwlocktype_write); /* There are no unassociated entries */ INSIST(isc_hashmap_count(adb->entries) == 0); isc_hashmap_destroy(&adb->entries); - UNLOCK(&adb->entries_lock); - isc_mutex_destroy(&adb->entries_lock); + RWUNLOCK(&adb->entries_lock, isc_rwlocktype_write); + isc_rwlock_destroy(&adb->entries_lock); isc_mem_destroy(&adb->hmctx); @@ -1956,11 +2008,11 @@ dns_adb_create(isc_mem_t *mem, dns_view_t *view, isc_loopmgr_t *loopmgr, isc_hashmap_create(adb->hmctx, ADB_HASH_BITS, ISC_HASHMAP_CASE_INSENSITIVE, &adb->names); - isc_mutex_init(&adb->names_lock); + isc_rwlock_init(&adb->names_lock); isc_hashmap_create(adb->hmctx, ADB_HASH_BITS, ISC_HASHMAP_CASE_SENSITIVE, &adb->entries); - isc_mutex_init(&adb->entries_lock); + isc_rwlock_init(&adb->entries_lock); isc_mutex_init(&adb->lock); @@ -2002,11 +2054,11 @@ free_tasks: isc_mutex_destroy(&adb->lock); - isc_mutex_destroy(&adb->entries_lock); + isc_rwlock_destroy(&adb->entries_lock); isc_hashmap_destroy(&adb->entries); INSIST(ISC_LIST_EMPTY(adb->entries_lru)); - isc_mutex_destroy(&adb->names_lock); + isc_rwlock_destroy(&adb->names_lock); isc_hashmap_destroy(&adb->names); INSIST(ISC_LIST_EMPTY(adb->names_lru)); @@ -2507,7 +2559,7 @@ dump_adb(dns_adb_t *adb, FILE *f, bool debug, isc_stdtime_t now) { /* * Ensure this operation is applied to both hash tables at once. */ - LOCK(&adb->names_lock); + RWLOCK(&adb->names_lock, isc_rwlocktype_write); for (dns_adbname_t *name = ISC_LIST_HEAD(adb->names_lru); name != NULL; name = ISC_LIST_NEXT(name, link)) @@ -2546,7 +2598,7 @@ dump_adb(dns_adb_t *adb, FILE *f, bool debug, isc_stdtime_t now) { UNLOCK(&name->lock); } - LOCK(&adb->entries_lock); + RWLOCK(&adb->entries_lock, isc_rwlocktype_write); fprintf(f, ";\n; Unassociated entries\n;\n"); for (dns_adbentry_t *adbentry = ISC_LIST_HEAD(adb->entries_lru); adbentry != NULL; adbentry = ISC_LIST_NEXT(adbentry, link)) @@ -2558,8 +2610,8 @@ dump_adb(dns_adb_t *adb, FILE *f, bool debug, isc_stdtime_t now) { UNLOCK(&adbentry->lock); } - UNLOCK(&adb->entries_lock); - UNLOCK(&adb->names_lock); + RWUNLOCK(&adb->entries_lock, isc_rwlocktype_write); + RWUNLOCK(&adb->names_lock, isc_rwlocktype_write); } static void @@ -2735,7 +2787,7 @@ dns_adb_dumpquota(dns_adb_t *adb, isc_buffer_t **buf) { isc_hashmap_iter_t *it = NULL; isc_result_t result; - LOCK(&adb->entries_lock); + RWLOCK(&adb->entries_lock, isc_rwlocktype_read); isc_hashmap_iter_create(adb->entries, &it); for (result = isc_hashmap_iter_first(it); result == ISC_R_SUCCESS; result = isc_hashmap_iter_next(it)) @@ -2764,7 +2816,7 @@ dns_adb_dumpquota(dns_adb_t *adb, isc_buffer_t **buf) { UNLOCK(&entry->lock); } isc_hashmap_iter_destroy(&it); - UNLOCK(&adb->entries_lock); + RWUNLOCK(&adb->entries_lock, isc_rwlocktype_read); return (ISC_R_SUCCESS); } @@ -3612,7 +3664,7 @@ dns_adb_flushname(dns_adb_t *adb, const dns_name_t *name) { return; } - LOCK(&adb->names_lock); + RWLOCK(&adb->names_lock, isc_rwlocktype_write); again: /* * Delete both entries - without and with NAME_STARTATZONE set. @@ -3636,7 +3688,7 @@ again: start_at_zone = true; goto again; } - UNLOCK(&adb->names_lock); + RWUNLOCK(&adb->names_lock, isc_rwlocktype_write); } void @@ -3650,7 +3702,7 @@ dns_adb_flushnames(dns_adb_t *adb, const dns_name_t *name) { return; } - LOCK(&adb->names_lock); + RWLOCK(&adb->names_lock, isc_rwlocktype_write); for (dns_adbname_t *adbname = ISC_LIST_HEAD(adb->names_lru); adbname != NULL; adbname = next) { @@ -3663,7 +3715,7 @@ dns_adb_flushnames(dns_adb_t *adb, const dns_name_t *name) { UNLOCK(&adbname->lock); dns_adbname_detach(&adbname); } - UNLOCK(&adb->names_lock); + RWUNLOCK(&adb->names_lock, isc_rwlocktype_write); } static void diff --git a/lib/dns/badcache.c b/lib/dns/badcache.c index 3ef1dfaeb6..cff5dcadf0 100644 --- a/lib/dns/badcache.c +++ b/lib/dns/badcache.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -82,7 +83,7 @@ dns_badcache_init(isc_mem_t *mctx, unsigned int size, dns_badcache_t **bcp) { }; isc_mem_attach(mctx, &bc->mctx); - isc_rwlock_init(&bc->lock, 0, 0); + isc_rwlock_init(&bc->lock); bc->table = isc_mem_getx(bc->mctx, sizeof(bc->table[0]) * size, ISC_MEM_ZERO); diff --git a/lib/dns/db.c b/lib/dns/db.c index e0a46c9856..87549bbcb4 100644 --- a/lib/dns/db.c +++ b/lib/dns/db.c @@ -70,7 +70,7 @@ static dns_dbimplementation_t rbtimp; static void initialize(void) { - isc_rwlock_init(&implock, 0, 0); + isc_rwlock_init(&implock); rbtimp.name = "rbt"; rbtimp.create = dns_rbtdb_create; diff --git a/lib/dns/dlz.c b/lib/dns/dlz.c index 7515943912..3cd390600c 100644 --- a/lib/dns/dlz.c +++ b/lib/dns/dlz.c @@ -83,7 +83,7 @@ static isc_once_t once = ISC_ONCE_INIT; static void dlz_initialize(void) { - isc_rwlock_init(&dlz_implock, 0, 0); + isc_rwlock_init(&dlz_implock); ISC_LIST_INIT(dlz_implementations); } diff --git a/lib/dns/forward.c b/lib/dns/forward.c index ef05529d83..20cb709472 100644 --- a/lib/dns/forward.c +++ b/lib/dns/forward.c @@ -54,7 +54,7 @@ dns_fwdtable_create(isc_mem_t *mctx, dns_fwdtable_t **fwdtablep) { goto cleanup_fwdtable; } - isc_rwlock_init(&fwdtable->rwlock, 0, 0); + isc_rwlock_init(&fwdtable->rwlock); fwdtable->mctx = NULL; isc_mem_attach(mctx, &fwdtable->mctx); fwdtable->magic = FWDTABLEMAGIC; diff --git a/lib/dns/keytable.c b/lib/dns/keytable.c index 2e9922509b..deb6ca01de 100644 --- a/lib/dns/keytable.c +++ b/lib/dns/keytable.c @@ -158,7 +158,7 @@ dns_keytable_create(isc_mem_t *mctx, dns_keytable_t **keytablep) { goto cleanup_keytable; } - isc_rwlock_init(&keytable->rwlock, 0, 0); + isc_rwlock_init(&keytable->rwlock); isc_refcount_init(&keytable->references, 1); keytable->mctx = NULL; @@ -342,7 +342,7 @@ new_keynode(dns_rdata_ds_t *ds, dns_keytable_t *keytable, bool managed, dns_rdataset_init(&knode->dsset); isc_refcount_init(&knode->refcount, 1); - isc_rwlock_init(&knode->rwlock, 0, 0); + isc_rwlock_init(&knode->rwlock); /* * If a DS was supplied, initialize an rdatalist. diff --git a/lib/dns/nta.c b/lib/dns/nta.c index 92fb14c535..c913c7b281 100644 --- a/lib/dns/nta.c +++ b/lib/dns/nta.c @@ -136,7 +136,7 @@ dns_ntatable_create(dns_view_t *view, isc_taskmgr_t *taskmgr, goto cleanup_task; } - isc_rwlock_init(&ntatable->rwlock, 0, 0); + isc_rwlock_init(&ntatable->rwlock); isc_refcount_init(&ntatable->references, 1); diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c index b41f05d73d..a416ab66f7 100644 --- a/lib/dns/rbtdb.c +++ b/lib/dns/rbtdb.c @@ -104,7 +104,7 @@ typedef uint32_t rbtdb_rdatatype_t; RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_soa) #define RBTDB_RDATATYPE_NCACHEANY RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any) -#define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0) +#define RBTDB_INITLOCK(l) isc_rwlock_init((l)) #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l) #define RBTDB_LOCK(l, t) RWLOCK((l), (t)) #define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t)) @@ -117,7 +117,7 @@ typedef uint32_t rbtdb_rdatatype_t; typedef isc_rwlock_t nodelock_t; -#define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0) +#define NODE_INITLOCK(l) isc_rwlock_init((l)) #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l) #define NODE_LOCK(l, t, tp) \ { \ @@ -161,7 +161,7 @@ typedef isc_rwlock_t nodelock_t; typedef isc_rwlock_t treelock_t; -#define TREE_INITLOCK(l) isc_rwlock_init(l, 0, 0) +#define TREE_INITLOCK(l) isc_rwlock_init(l) #define TREE_DESTROYLOCK(l) isc_rwlock_destroy(l) #define TREE_LOCK(l, t, tp) \ { \ @@ -1294,7 +1294,7 @@ allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial, version->serial = serial; isc_refcount_init(&version->references, references); - isc_rwlock_init(&version->glue_rwlock, 0, 0); + isc_rwlock_init(&version->glue_rwlock); version->glue_table_bits = ISC_HASH_MIN_BITS; version->glue_table_nodecount = 0U; @@ -1343,7 +1343,7 @@ newversion(dns_db_t *db, dns_dbversion_t **versionp) { version->salt_length = 0; memset(version->salt, 0, sizeof(version->salt)); } - isc_rwlock_init(&version->rwlock, 0, 0); + isc_rwlock_init(&version->rwlock); RWLOCK(&rbtdb->current_version->rwlock, isc_rwlocktype_read); version->records = rbtdb->current_version->records; version->xfrsize = rbtdb->current_version->xfrsize; @@ -2207,7 +2207,7 @@ prune_tree(void *arg) { node = parent; } while (node != NULL); NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); - RWUNLOCK(&rbtdb->tree_lock, tlocktype); + TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); detach((dns_db_t **)&rbtdb); } @@ -8431,7 +8431,7 @@ dns_rbtdb_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type, rbtdb->current_version->salt_length = 0; memset(rbtdb->current_version->salt, 0, sizeof(rbtdb->current_version->salt)); - isc_rwlock_init(&rbtdb->current_version->rwlock, 0, 0); + isc_rwlock_init(&rbtdb->current_version->rwlock); rbtdb->current_version->records = 0; rbtdb->current_version->xfrsize = 0; rbtdb->future_version = NULL; diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c index cf1ac6e069..060a0a41e7 100644 --- a/lib/dns/resolver.c +++ b/lib/dns/resolver.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -325,6 +326,7 @@ struct fctxkey { typedef struct fctxcount fctxcount_t; struct fctxcount { unsigned int magic; + isc_mutex_t lock; dns_fixedname_t dfname; dns_name_t *domain; uint_fast32_t count; @@ -558,10 +560,10 @@ struct dns_resolver { dns_dispatchset_t *dispatches6; isc_hashmap_t *fctxs; - isc_mutex_t fctxs_lock; + isc_rwlock_t fctxs_lock; isc_hashmap_t *counters; - isc_mutex_t counters_lock; + isc_rwlock_t counters_lock; unsigned int ntasks; isc_task_t **tasks; @@ -1633,6 +1635,7 @@ fcount_incr(fetchctx_t *fctx, bool force) { fctxcount_t *counter = NULL; uint32_t hashval; uint_fast32_t spill; + isc_rwlocktype_t locktype = isc_rwlocktype_read; REQUIRE(fctx != NULL); res = fctx->res; @@ -1647,7 +1650,7 @@ fcount_incr(fetchctx_t *fctx, bool force) { hashval = isc_hashmap_hash(res->counters, fctx->domain->ndata, fctx->domain->length); - LOCK(&res->counters_lock); + RWLOCK(&res->counters_lock, locktype); result = isc_hashmap_find(res->counters, &hashval, fctx->domain->ndata, fctx->domain->length, (void **)&counter); switch (result) { @@ -1660,12 +1663,24 @@ fcount_incr(fetchctx_t *fctx, bool force) { .count = 0, .allowed = 0, }; + isc_mutex_init(&counter->lock); counter->domain = dns_fixedname_initname(&counter->dfname); dns_name_copy(fctx->domain, counter->domain); + UPGRADELOCK(&res->counters_lock, locktype); + result = isc_hashmap_add(res->counters, &hashval, counter->domain->ndata, counter->domain->length, counter); + if (result == ISC_R_EXISTS) { + isc_mutex_destroy(&counter->lock); + isc_mem_put(fctx->mctx, counter, sizeof(*counter)); + counter = NULL; + result = isc_hashmap_find( + res->counters, &hashval, fctx->domain->ndata, + fctx->domain->length, (void **)&counter); + } + INSIST(result == ISC_R_SUCCESS); break; default: @@ -1674,6 +1689,7 @@ fcount_incr(fetchctx_t *fctx, bool force) { INSIST(VALID_FCTXCOUNT(counter)); INSIST(spill > 0); + LOCK(&counter->lock); if (++counter->count > spill) { counter->count--; INSIST(counter->count > 0); @@ -1684,7 +1700,8 @@ fcount_incr(fetchctx_t *fctx, bool force) { counter->allowed++; fctx->counter = counter; } - UNLOCK(&res->counters_lock); + UNLOCK(&counter->lock); + RWUNLOCK(&res->counters_lock, locktype); return (result); } @@ -1699,19 +1716,35 @@ fcount_decr(fetchctx_t *fctx) { } fctx->counter = NULL; - LOCK(&fctx->res->counters_lock); + /* + * FIXME: This should not require a write lock, but should be + * implemented using reference counting later, otherwise we would could + * encounter ABA problem here - the count could go up and down when we + * switch from read to write lock. + */ + RWLOCK(&fctx->res->counters_lock, isc_rwlocktype_write); + + LOCK(&counter->lock); INSIST(VALID_FCTXCOUNT(counter)); INSIST(counter->count > 0); - if (--counter->count == 0) { - isc_result_t result = isc_hashmap_delete( - fctx->res->counters, NULL, counter->domain->ndata, - counter->domain->length); - INSIST(result == ISC_R_SUCCESS); - - fcount_logspill(fctx, counter, true); - isc_mem_put(fctx->mctx, counter, sizeof(*counter)); + if (--counter->count > 0) { + UNLOCK(&counter->lock); + RWUNLOCK(&fctx->res->counters_lock, isc_rwlocktype_write); + return; } - UNLOCK(&fctx->res->counters_lock); + + isc_result_t result = isc_hashmap_delete(fctx->res->counters, NULL, + counter->domain->ndata, + counter->domain->length); + INSIST(result == ISC_R_SUCCESS); + + fcount_logspill(fctx, counter, true); + UNLOCK(&counter->lock); + + isc_mutex_destroy(&counter->lock); + isc_mem_put(fctx->mctx, counter, sizeof(*counter)); + + RWUNLOCK(&fctx->res->counters_lock, isc_rwlocktype_write); } static void @@ -7201,12 +7234,12 @@ release_fctx(fetchctx_t *fctx) { return; } - LOCK(&res->fctxs_lock); + RWLOCK(&res->fctxs_lock, isc_rwlocktype_write); result = isc_hashmap_delete(res->fctxs, &hashval, fctx->key.key, fctx->key.size); INSIST(result == ISC_R_SUCCESS); fctx->hashed = false; - UNLOCK(&res->fctxs_lock); + RWUNLOCK(&res->fctxs_lock, isc_rwlocktype_write); } static void @@ -10072,11 +10105,11 @@ dns_resolver__destroy(dns_resolver_t *res) { INSIST(isc_hashmap_count(res->fctxs) == 0); isc_hashmap_destroy(&res->fctxs); - isc_mutex_destroy(&res->fctxs_lock); + isc_rwlock_destroy(&res->fctxs_lock); INSIST(isc_hashmap_count(res->counters) == 0); isc_hashmap_destroy(&res->counters); - isc_mutex_destroy(&res->counters_lock); + isc_rwlock_destroy(&res->counters_lock); if (res->dispatches4 != NULL) { dns_dispatchset_destroy(&res->dispatches4); @@ -10206,11 +10239,11 @@ dns_resolver_create(dns_view_t *view, isc_loopmgr_t *loopmgr, /* This needs to be case sensitive to not lowercase options and type */ isc_hashmap_create(view->mctx, RES_DOMAIN_HASH_BITS, ISC_HASHMAP_CASE_SENSITIVE, &res->fctxs); - isc_mutex_init(&res->fctxs_lock); + isc_rwlock_init(&res->fctxs_lock); isc_hashmap_create(view->mctx, RES_DOMAIN_HASH_BITS, ISC_HASHMAP_CASE_INSENSITIVE, &res->counters); - isc_mutex_init(&res->counters_lock); + isc_rwlock_init(&res->counters_lock); if (dispatchv4 != NULL) { dns_dispatchset_create(res->mctx, dispatchv4, &res->dispatches4, @@ -10370,7 +10403,7 @@ dns_resolver_shutdown(dns_resolver_t *res) { RTRACE("exiting"); - LOCK(&res->fctxs_lock); + RWLOCK(&res->fctxs_lock, isc_rwlocktype_write); isc_hashmap_iter_create(res->fctxs, &it); for (result = isc_hashmap_iter_first(it); result == ISC_R_SUCCESS; @@ -10386,7 +10419,7 @@ dns_resolver_shutdown(dns_resolver_t *res) { fctx); } isc_hashmap_iter_destroy(&it); - UNLOCK(&res->fctxs_lock); + RWUNLOCK(&res->fctxs_lock, isc_rwlocktype_write); LOCK(&res->lock); if (res->spillattimer != NULL) { @@ -10525,6 +10558,7 @@ get_attached_fctx(dns_resolver_t *res, const dns_name_t *name, name->length, }; fetchctx_t *fctx = NULL; + isc_rwlocktype_t locktype = isc_rwlocktype_read; STATIC_ASSERT(sizeof(key.options) == sizeof(options), "key options size mismatch"); @@ -10538,7 +10572,7 @@ get_attached_fctx(dns_resolver_t *res, const dns_name_t *name, hashval = isc_hashmap_hash(res->fctxs, key.key, key.size); again: - LOCK(&res->fctxs_lock); + RWLOCK(&res->fctxs_lock, locktype); result = isc_hashmap_find(res->fctxs, &hashval, key.key, key.size, (void **)&fctx); switch (result) { @@ -10552,13 +10586,17 @@ again: goto unlock; } - *new_fctx = true; - + UPGRADELOCK(&res->fctxs_lock, locktype); result = isc_hashmap_add(res->fctxs, &hashval, fctx->key.key, fctx->key.size, fctx); - - fctx->hashed = true; - + if (result == ISC_R_SUCCESS) { + *new_fctx = true; + fctx->hashed = true; + } else { + fctx_done_detach(&fctx, result); + result = isc_hashmap_find(res->fctxs, &hashval, key.key, + key.size, (void **)&fctx); + } INSIST(result == ISC_R_SUCCESS); break; default: @@ -10566,8 +10604,7 @@ again: } fetchctx_ref(fctx); unlock: - UNLOCK(&res->fctxs_lock); - + RWUNLOCK(&res->fctxs_lock, locktype); if (result == ISC_R_SUCCESS) { LOCK(&fctx->lock); if (SHUTTINGDOWN(fctx) || fctx->cloned) { @@ -11388,7 +11425,7 @@ dns_resolver_dumpfetches(dns_resolver_t *res, isc_statsformat_t format, REQUIRE(fp != NULL); REQUIRE(format == isc_statsformat_file); - LOCK(&res->counters_lock); + RWLOCK(&res->counters_lock, isc_rwlocktype_read); isc_hashmap_iter_create(res->counters, &it); for (result = isc_hashmap_iter_first(it); result == ISC_R_SUCCESS; result = isc_hashmap_iter_next(it)) @@ -11402,7 +11439,7 @@ dns_resolver_dumpfetches(dns_resolver_t *res, isc_statsformat_t format, " spilled, %" PRIuFAST32 " allowed)\n", counter->count, counter->dropped, counter->allowed); } - UNLOCK(&res->counters_lock); + RWUNLOCK(&res->counters_lock, isc_rwlocktype_read); isc_hashmap_iter_destroy(&it); } @@ -11419,7 +11456,7 @@ dns_resolver_dumpquota(dns_resolver_t *res, isc_buffer_t **buf) { return (ISC_R_SUCCESS); } - LOCK(&res->counters_lock); + RWLOCK(&res->counters_lock, isc_rwlocktype_read); isc_hashmap_iter_create(res->counters, &it); for (result = isc_hashmap_iter_first(it); result == ISC_R_SUCCESS; result = isc_hashmap_iter_next(it)) @@ -11451,7 +11488,7 @@ dns_resolver_dumpquota(dns_resolver_t *res, isc_buffer_t **buf) { } cleanup: - UNLOCK(&res->counters_lock); + RWUNLOCK(&res->counters_lock, isc_rwlocktype_read); isc_hashmap_iter_destroy(&it); return (result); } diff --git a/lib/dns/rpz.c b/lib/dns/rpz.c index 8dc62d29e2..75ee525f7d 100644 --- a/lib/dns/rpz.c +++ b/lib/dns/rpz.c @@ -1481,7 +1481,7 @@ dns_rpz_new_zones(isc_mem_t *mctx, isc_loopmgr_t *loopmgr, char *rps_cstr, .magic = DNS_RPZ_ZONES_MAGIC, }; - isc_rwlock_init(&rpzs->search_lock, 0, 0); + isc_rwlock_init(&rpzs->search_lock); isc_mutex_init(&rpzs->maint_lock); isc_refcount_init(&rpzs->references, 1); diff --git a/lib/dns/transport.c b/lib/dns/transport.c index 9c6a123081..d76f62f3d5 100644 --- a/lib/dns/transport.c +++ b/lib/dns/transport.c @@ -652,7 +652,7 @@ dns_transport_list_new(isc_mem_t *mctx) { *list = (dns_transport_list_t){ 0 }; - isc_rwlock_init(&list->lock, 0, 0); + isc_rwlock_init(&list->lock); isc_mem_attach(mctx, &list->mctx); isc_refcount_init(&list->references, 1); diff --git a/lib/dns/tsig.c b/lib/dns/tsig.c index 3dca68930b..1bf652a159 100644 --- a/lib/dns/tsig.c +++ b/lib/dns/tsig.c @@ -1812,7 +1812,7 @@ dns_tsigkeyring_create(isc_mem_t *mctx, dns_tsig_keyring_t **ringp) { ring = isc_mem_get(mctx, sizeof(dns_tsig_keyring_t)); - isc_rwlock_init(&ring->lock, 0, 0); + isc_rwlock_init(&ring->lock); ring->keys = NULL; result = dns_rbt_create(mctx, free_tsignode, NULL, &ring->keys); if (result != ISC_R_SUCCESS) { diff --git a/lib/dns/view.c b/lib/dns/view.c index e8a53aa108..0c7de74463 100644 --- a/lib/dns/view.c +++ b/lib/dns/view.c @@ -133,7 +133,7 @@ dns_view_create(isc_mem_t *mctx, dns_rdataclass_t rdclass, const char *name, isc_mutex_init(&view->lock); - isc_rwlock_init(&view->sfd_lock, 0, 0); + isc_rwlock_init(&view->sfd_lock); view->zonetable = NULL; result = dns_zt_create(mctx, rdclass, &view->zonetable); diff --git a/lib/dns/zone.c b/lib/dns/zone.c index 951bbfd34c..3a261d0b49 100644 --- a/lib/dns/zone.c +++ b/lib/dns/zone.c @@ -214,7 +214,7 @@ typedef struct dns_include dns_include_t; } while (0) #endif /* ifdef DNS_ZONE_CHECKLOCK */ -#define ZONEDB_INITLOCK(l) isc_rwlock_init((l), 0, 0) +#define ZONEDB_INITLOCK(l) isc_rwlock_init((l)) #define ZONEDB_DESTROYLOCK(l) isc_rwlock_destroy(l) #define ZONEDB_LOCK(l, t) RWLOCK((l), (t)) #define ZONEDB_UNLOCK(l, t) RWUNLOCK((l), (t)) @@ -17942,7 +17942,7 @@ zonemgr_keymgmt_init(dns_zonemgr_t *zmgr) { }; isc_mem_attach(zmgr->mctx, &mgmt->mctx); - isc_rwlock_init(&mgmt->lock, 0, 0); + isc_rwlock_init(&mgmt->lock); isc_hashmap_create(mgmt->mctx, DNS_KEYMGMT_HASH_BITS, ISC_HASHMAP_CASE_SENSITIVE, &mgmt->table); @@ -18074,10 +18074,10 @@ dns_zonemgr_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr, for (size_t i = 0; i < UNREACH_CACHE_SIZE; i++) { atomic_init(&zmgr->unreachable[i].expire, 0); } - isc_rwlock_init(&zmgr->rwlock, 0, 0); + isc_rwlock_init(&zmgr->rwlock); /* Unreachable lock. */ - isc_rwlock_init(&zmgr->urlock, 0, 0); + isc_rwlock_init(&zmgr->urlock); isc_ratelimiter_create(loop, &zmgr->checkdsrl); isc_ratelimiter_create(loop, &zmgr->notifyrl); diff --git a/lib/dns/zt.c b/lib/dns/zt.c index a0c300aaf0..da4eac74ce 100644 --- a/lib/dns/zt.c +++ b/lib/dns/zt.c @@ -95,7 +95,7 @@ dns_zt_create(isc_mem_t *mctx, dns_rdataclass_t rdclass, dns_zt_t **ztp) { goto cleanup_zt; } - isc_rwlock_init(&zt->rwlock, 0, 0); + isc_rwlock_init(&zt->rwlock); zt->mctx = NULL; isc_mem_attach(mctx, &zt->mctx); isc_refcount_init(&zt->references, 1); diff --git a/lib/isc/hashmap.c b/lib/isc/hashmap.c index 2ea7a25182..1a8f15e0a2 100644 --- a/lib/isc/hashmap.c +++ b/lib/isc/hashmap.c @@ -332,8 +332,9 @@ isc_hashmap_find(const isc_hashmap_t *hashmap, const uint32_t *hashvalp, const void *key, uint32_t keysize, void **valuep) { REQUIRE(ISC_HASHMAP_VALID(hashmap)); REQUIRE(key != NULL && keysize <= UINT16_MAX); + REQUIRE(valuep == NULL || *valuep == NULL); - hashmap_node_t *node; + hashmap_node_t *node = NULL; uint8_t idx = hashmap->hindex; uint32_t hashval = (hashvalp != NULL) ? *hashvalp diff --git a/lib/isc/include/isc/mutex.h b/lib/isc/include/isc/mutex.h index df24922336..45256a6fac 100644 --- a/lib/isc/include/isc/mutex.h +++ b/lib/isc/include/isc/mutex.h @@ -23,6 +23,8 @@ #include /* for ISC_R_ codes */ #include +#define ISC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER + ISC_LANG_BEGINDECLS /* diff --git a/lib/isc/include/isc/rwlock.h b/lib/isc/include/isc/rwlock.h index 44a44a86cc..9d5c54053e 100644 --- a/lib/isc/include/isc/rwlock.h +++ b/lib/isc/include/isc/rwlock.h @@ -18,8 +18,6 @@ /*! \file isc/rwlock.h */ -#include -#include #include #include #include @@ -46,10 +44,10 @@ typedef enum { typedef pthread_rwlock_t *isc_rwlock_t; typedef pthread_rwlock_t isc__rwlock_t; -#define isc_rwlock_init(rwl, rq, wq) \ - { \ - *rwl = malloc(sizeof(**rwl)); \ - isc__rwlock_init(*rwl, rq, wq); \ +#define isc_rwlock_init(rwl) \ + { \ + *rwl = malloc(sizeof(**rwl)); \ + isc__rwlock_init(*rwl); \ } #define isc_rwlock_lock(rwl, type) isc__rwlock_lock(*rwl, type) #define isc_rwlock_trylock(rwl, type) isc__rwlock_trylock(*rwl, type) @@ -66,7 +64,7 @@ typedef pthread_rwlock_t isc__rwlock_t; typedef pthread_rwlock_t isc_rwlock_t; typedef pthread_rwlock_t isc__rwlock_t; -#define isc_rwlock_init(rwl, rq, wq) isc__rwlock_init(rwl, rq, wq) +#define isc_rwlock_init(rwl) isc__rwlock_init(rwl) #define isc_rwlock_lock(rwl, type) isc__rwlock_lock(rwl, type) #define isc_rwlock_trylock(rwl, type) isc__rwlock_trylock(rwl, type) #define isc_rwlock_unlock(rwl, type) isc__rwlock_unlock(rwl, type) @@ -75,7 +73,7 @@ typedef pthread_rwlock_t isc__rwlock_t; #endif /* ISC_TRACK_PTHREADS_OBJECTS */ -#define isc__rwlock_init(rwl, read_quota, write_quote) \ +#define isc__rwlock_init(rwl) \ { \ int _ret = pthread_rwlock_init(rwl, NULL); \ PTHREADS_RUNTIME_CHECK(pthread_rwlock_init, _ret); \ @@ -158,72 +156,99 @@ typedef pthread_rwlock_t isc__rwlock_t; PTHREADS_RUNTIME_CHECK(pthread_rwlock_destroy, _ret); \ } +#define isc_rwlock_setworkers(workers) + #else /* USE_PTHREAD_RWLOCK */ +#include +#include +#include + struct isc_rwlock { - /* Unlocked. */ - unsigned int magic; - isc_mutex_t lock; - atomic_int_fast32_t spins; - - /* - * When some atomic instructions with hardware assistance are - * available, rwlock will use those so that concurrent readers do not - * interfere with each other through mutex as long as no writers - * appear, massively reducing the lock overhead in the typical case. - * - * The basic algorithm of this approach is the "simple - * writer-preference lock" shown in the following URL: - * http://www.cs.rochester.edu/u/scott/synchronization/pseudocode/rw.html - * but our implementation does not rely on the spin lock unlike the - * original algorithm to be more portable as a user space application. - */ - - /* Read or modified atomically. */ - atomic_int_fast32_t write_requests; - atomic_int_fast32_t write_completions; - atomic_int_fast32_t cnt_and_flag; - - /* Locked by lock. */ - isc_condition_t readable; - isc_condition_t writeable; - unsigned int readers_waiting; - - /* Locked by rwlock itself. */ - atomic_uint_fast32_t write_granted; - - /* Unlocked. */ - unsigned int write_quota; + alignas(ISC_OS_CACHELINE_SIZE) atomic_uint_fast32_t readers_ingress; + alignas(ISC_OS_CACHELINE_SIZE) atomic_uint_fast32_t readers_egress; + alignas(ISC_OS_CACHELINE_SIZE) atomic_int_fast32_t writers_barrier; + alignas(ISC_OS_CACHELINE_SIZE) atomic_bool writers_lock; }; typedef struct isc_rwlock isc_rwlock_t; -typedef struct isc_rwlock isc__rwlock_t; - -#define isc_rwlock_init(rwl, rq, wq) isc__rwlock_init(rwl, rq, wq) -#define isc_rwlock_lock(rwl, type) isc__rwlock_lock(rwl, type) -#define isc_rwlock_trylock(rwl, type) isc__rwlock_trylock(rwl, type) -#define isc_rwlock_unlock(rwl, type) isc__rwlock_unlock(rwl, type) -#define isc_rwlock_tryupgrade(rwl) isc__rwlock_tryupgrade(rwl) -#define isc_rwlock_destroy(rwl) isc__rwlock_destroy(rwl) void -isc__rwlock_init(isc__rwlock_t *rwl, unsigned int read_quota, - unsigned int write_quota); +isc_rwlock_init(isc_rwlock_t *rwl); void -isc__rwlock_lock(isc__rwlock_t *rwl, isc_rwlocktype_t type); +isc_rwlock_rdlock(isc_rwlock_t *rwl); + +void +isc_rwlock_wrlock(isc_rwlock_t *rwl); isc_result_t -isc__rwlock_trylock(isc__rwlock_t *rwl, isc_rwlocktype_t type); - -void -isc__rwlock_unlock(isc__rwlock_t *rwl, isc_rwlocktype_t type); +isc_rwlock_tryrdlock(isc_rwlock_t *rwl); isc_result_t -isc__rwlock_tryupgrade(isc__rwlock_t *rwl); +isc_rwlock_trywrlock(isc_rwlock_t *rwl); void -isc__rwlock_destroy(isc__rwlock_t *rwl); +isc_rwlock_rdunlock(isc_rwlock_t *rwl); + +void +isc_rwlock_wrunlock(isc_rwlock_t *rwl); + +isc_result_t +isc_rwlock_tryupgrade(isc_rwlock_t *rwl); + +void +isc_rwlock_downgrade(isc_rwlock_t *rwl); + +void +isc_rwlock_destroy(isc_rwlock_t *rwl); + +void +isc_rwlock_setworkers(uint16_t workers); + +#define isc_rwlock_lock(rwl, type) \ + { \ + switch (type) { \ + case isc_rwlocktype_read: \ + isc_rwlock_rdlock(rwl); \ + break; \ + case isc_rwlocktype_write: \ + isc_rwlock_wrlock(rwl); \ + break; \ + default: \ + UNREACHABLE(); \ + } \ + } + +#define isc_rwlock_trylock(rwl, type) \ + ({ \ + int __result; \ + switch (type) { \ + case isc_rwlocktype_read: \ + __result = isc_rwlock_tryrdlock(rwl); \ + break; \ + case isc_rwlocktype_write: \ + __result = isc_rwlock_trywrlock(rwl); \ + break; \ + default: \ + UNREACHABLE(); \ + } \ + __result; \ + }) + +#define isc_rwlock_unlock(rwl, type) \ + { \ + switch (type) { \ + case isc_rwlocktype_read: \ + isc_rwlock_rdunlock(rwl); \ + break; \ + case isc_rwlocktype_write: \ + isc_rwlock_wrunlock(rwl); \ + break; \ + default: \ + UNREACHABLE(); \ + } \ + } #endif /* USE_PTHREAD_RWLOCK */ diff --git a/lib/isc/include/isc/util.h b/lib/isc/include/isc/util.h index f24a8da2d5..35d9050158 100644 --- a/lib/isc/include/isc/util.h +++ b/lib/isc/include/isc/util.h @@ -198,6 +198,20 @@ #define WRLOCK(lp) RWLOCK(lp, isc_rwlocktype_write) #define WRUNLOCK(lp) RWUNLOCK(lp, isc_rwlocktype_write) +#define UPGRADELOCK(lock, locktype) \ + { \ + if (locktype == isc_rwlocktype_read) { \ + if (isc_rwlock_tryupgrade(lock) == ISC_R_SUCCESS) { \ + locktype = isc_rwlocktype_write; \ + } else { \ + RWUNLOCK(lock, locktype); \ + locktype = isc_rwlocktype_write; \ + RWLOCK(lock, locktype); \ + } \ + } \ + INSIST(locktype == isc_rwlocktype_write); \ + } + /* * List Macros. */ diff --git a/lib/isc/log.c b/lib/isc/log.c index 5547495f7f..43e371a813 100644 --- a/lib/isc/log.c +++ b/lib/isc/log.c @@ -264,7 +264,7 @@ isc_log_create(isc_mem_t *mctx, isc_log_t **lctxp, isc_logconfig_t **lcfgp) { ISC_LIST_INIT(lctx->messages); isc_mutex_init(&lctx->lock); - isc_rwlock_init(&lctx->lcfg_rwl, 0, 0); + isc_rwlock_init(&lctx->lcfg_rwl); /* * Normally setting the magic number is the last step done diff --git a/lib/isc/managers.c b/lib/isc/managers.c index 2fa8177720..9fcade3e61 100644 --- a/lib/isc/managers.c +++ b/lib/isc/managers.c @@ -12,7 +12,9 @@ */ #include +#include #include +#include void isc_managers_create(isc_mem_t **mctxp, uint32_t workers, @@ -33,6 +35,8 @@ isc_managers_create(isc_mem_t **mctxp, uint32_t workers, REQUIRE(taskmgrp != NULL && *taskmgrp == NULL); isc_taskmgr_create(*mctxp, *loopmgrp, taskmgrp); INSIST(*taskmgrp != NULL); + + isc_rwlock_setworkers(workers); } void diff --git a/lib/isc/rwlock.c b/lib/isc/rwlock.c index 62bdbf1a62..ae4fc388a0 100644 --- a/lib/isc/rwlock.c +++ b/lib/isc/rwlock.c @@ -11,396 +11,247 @@ * information regarding copyright ownership. */ +/* + * Modified C-RW-WP Implementation from NUMA-Aware Reader-Writer Locks paper: + * http://dl.acm.org/citation.cfm?id=2442532 + * + * This work is based on C++ code available from + * https://github.com/pramalhe/ConcurrencyFreaks/ + * + * Copyright (c) 2014-2016, Pedro Ramalhete, Andreia Correia + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Concurrency Freaks nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + /*! \file */ #include #include #include +#include +#include #include -#include +#include #include -#include #include +#include +#include #include -#define RWLOCK_MAGIC ISC_MAGIC('R', 'W', 'L', 'k') -#define VALID_RWLOCK(rwl) ISC_MAGIC_VALID(rwl, RWLOCK_MAGIC) +static atomic_uint_fast16_t isc__crwlock_workers = 128; -#ifndef RWLOCK_DEFAULT_READ_QUOTA -#define RWLOCK_DEFAULT_READ_QUOTA 4 -#endif /* ifndef RWLOCK_DEFAULT_READ_QUOTA */ - -#ifndef RWLOCK_DEFAULT_WRITE_QUOTA -#define RWLOCK_DEFAULT_WRITE_QUOTA 4 -#endif /* ifndef RWLOCK_DEFAULT_WRITE_QUOTA */ - -#ifndef RWLOCK_MAX_ADAPTIVE_COUNT -#define RWLOCK_MAX_ADAPTIVE_COUNT 100 -#endif /* ifndef RWLOCK_MAX_ADAPTIVE_COUNT */ - -#ifdef ISC_RWLOCK_TRACE -#include /* Required for fprintf/stderr. */ - -#include /* Required for isc_thread_self(). */ - -static void -print_lock(const char *operation, isc__rwlock_t *rwl, isc_rwlocktype_t type) { - fprintf(stderr, - "rwlock %p thread %" PRIuPTR " %s(%s): " - "write_requests=%u, write_completions=%u, " - "cnt_and_flag=0x%x, readers_waiting=%u, " - "write_granted=%u, write_quota=%u\n", - rwl, isc_thread_self(), operation, - (type == isc_rwlocktype_read ? "read" : "write"), - atomic_load_acquire(&rwl->write_requests), - atomic_load_acquire(&rwl->write_completions), - atomic_load_acquire(&rwl->cnt_and_flag), rwl->readers_waiting, - atomic_load_acquire(&rwl->write_granted), rwl->write_quota); -} -#endif /* ISC_RWLOCK_TRACE */ - -void -isc__rwlock_init(isc__rwlock_t *rwl, unsigned int read_quota, - unsigned int write_quota) { - REQUIRE(rwl != NULL); - - /* - * In case there's trouble initializing, we zero magic now. If all - * goes well, we'll set it to RWLOCK_MAGIC. - */ - rwl->magic = 0; - - atomic_init(&rwl->spins, 0); - atomic_init(&rwl->write_requests, 0); - atomic_init(&rwl->write_completions, 0); - atomic_init(&rwl->cnt_and_flag, 0); - rwl->readers_waiting = 0; - atomic_init(&rwl->write_granted, 0); - if (read_quota != 0) { - UNEXPECTED_ERROR("read quota is not supported"); - } - if (write_quota == 0) { - write_quota = RWLOCK_DEFAULT_WRITE_QUOTA; - } - rwl->write_quota = write_quota; - - isc_mutex_init(&rwl->lock); - - isc_condition_init(&rwl->readable); - isc_condition_init(&rwl->writeable); - - rwl->magic = RWLOCK_MAGIC; -} - -void -isc__rwlock_destroy(isc__rwlock_t *rwl) { - REQUIRE(VALID_RWLOCK(rwl)); - - REQUIRE(atomic_load_acquire(&rwl->write_requests) == - atomic_load_acquire(&rwl->write_completions) && - atomic_load_acquire(&rwl->cnt_and_flag) == 0 && - rwl->readers_waiting == 0); - - rwl->magic = 0; - isc_condition_destroy(&rwl->readable); - isc_condition_destroy(&rwl->writeable); - isc_mutex_destroy(&rwl->lock); -} +#define ISC_RWLOCK_UNLOCKED false +#define ISC_RWLOCK_LOCKED true /* - * When some architecture-dependent atomic operations are available, - * rwlock can be more efficient than the generic algorithm defined below. - * The basic algorithm is described in the following URL: - * http://www.cs.rochester.edu/u/scott/synchronization/pseudocode/rw.html - * - * The key is to use the following integer variables modified atomically: - * write_requests, write_completions, and cnt_and_flag. - * - * write_requests and write_completions act as a waiting queue for writers - * in order to ensure the FIFO order. Both variables begin with the initial - * value of 0. When a new writer tries to get a write lock, it increments - * write_requests and gets the previous value of the variable as a "ticket". - * When write_completions reaches the ticket number, the new writer can start - * writing. When the writer completes its work, it increments - * write_completions so that another new writer can start working. If the - * write_requests is not equal to write_completions, it means a writer is now - * working or waiting. In this case, a new readers cannot start reading, or - * in other words, this algorithm basically prefers writers. - * - * cnt_and_flag is a "lock" shared by all readers and writers. This integer - * variable is a kind of structure with two members: writer_flag (1 bit) and - * reader_count (31 bits). The writer_flag shows whether a writer is working, - * and the reader_count shows the number of readers currently working or almost - * ready for working. A writer who has the current "ticket" tries to get the - * lock by exclusively setting the writer_flag to 1, provided that the whole - * 32-bit is 0 (meaning no readers or writers working). On the other hand, - * a new reader tries to increment the "reader_count" field provided that - * the writer_flag is 0 (meaning there is no writer working). - * - * If some of the above operations fail, the reader or the writer sleeps - * until the related condition changes. When a working reader or writer - * completes its work, some readers or writers are sleeping, and the condition - * that suspended the reader or writer has changed, it wakes up the sleeping - * readers or writers. - * - * As already noted, this algorithm basically prefers writers. In order to - * prevent readers from starving, however, the algorithm also introduces the - * "writer quota" (Q). When Q consecutive writers have completed their work, - * suspending readers, the last writer will wake up the readers, even if a new - * writer is waiting. - * - * Implementation specific note: due to the combination of atomic operations - * and a mutex lock, ordering between the atomic operation and locks can be - * very sensitive in some cases. In particular, it is generally very important - * to check the atomic variable that requires a reader or writer to sleep after - * locking the mutex and before actually sleeping; otherwise, it could be very - * likely to cause a deadlock. For example, assume "var" is a variable - * atomically modified, then the corresponding code would be: - * if (var == need_sleep) { - * LOCK(lock); - * if (var == need_sleep) - * WAIT(cond, lock); - * UNLOCK(lock); - * } - * The second check is important, since "var" is protected by the atomic - * operation, not by the mutex, and can be changed just before sleeping. - * (The first "if" could be omitted, but this is also important in order to - * make the code efficient by avoiding the use of the mutex unless it is - * really necessary.) + * See https://csce.ucmss.com/cr/books/2017/LFS/CSREA2017/FCS3701.pdf for + * guidance on patience level */ - -#define WRITER_ACTIVE 0x1 -#define READER_INCR 0x2 +#ifndef RWLOCK_MAX_READER_PATIENCE +#define RWLOCK_MAX_READER_PATIENCE 500 +#endif /* ifndef RWLOCK_MAX_READER_PATIENCE */ static void -rwlock_lock(isc__rwlock_t *rwl, isc_rwlocktype_t type) { - int32_t cntflag; +read_indicator_wait_until_empty(isc_rwlock_t *rwl); - REQUIRE(VALID_RWLOCK(rwl)); +#include -#ifdef ISC_RWLOCK_TRACE - print_lock("prelock", rwl, type); -#endif /* ifdef ISC_RWLOCK_TRACE */ - - if (type == isc_rwlocktype_read) { - if (atomic_load_acquire(&rwl->write_requests) != - atomic_load_acquire(&rwl->write_completions)) - { - /* there is a waiting or active writer */ - LOCK(&rwl->lock); - if (atomic_load_acquire(&rwl->write_requests) != - atomic_load_acquire(&rwl->write_completions)) - { - rwl->readers_waiting++; - WAIT(&rwl->readable, &rwl->lock); - rwl->readers_waiting--; - } - UNLOCK(&rwl->lock); - } - - cntflag = atomic_fetch_add_release(&rwl->cnt_and_flag, - READER_INCR); - POST(cntflag); - while (1) { - if ((atomic_load_acquire(&rwl->cnt_and_flag) & - WRITER_ACTIVE) == 0) - { - break; - } - - /* A writer is still working */ - LOCK(&rwl->lock); - rwl->readers_waiting++; - if ((atomic_load_acquire(&rwl->cnt_and_flag) & - WRITER_ACTIVE) != 0) - { - WAIT(&rwl->readable, &rwl->lock); - } - rwl->readers_waiting--; - UNLOCK(&rwl->lock); - - /* - * Typically, the reader should be able to get a lock - * at this stage: - * (1) there should have been no pending writer when - * the reader was trying to increment the - * counter; otherwise, the writer should be in - * the waiting queue, preventing the reader from - * proceeding to this point. - * (2) once the reader increments the counter, no - * more writer can get a lock. - * Still, it is possible another writer can work at - * this point, e.g. in the following scenario: - * A previous writer unlocks the writer lock. - * This reader proceeds to point (1). - * A new writer appears, and gets a new lock before - * the reader increments the counter. - * The reader then increments the counter. - * The previous writer notices there is a waiting - * reader who is almost ready, and wakes it up. - * So, the reader needs to confirm whether it can now - * read explicitly (thus we loop). Note that this is - * not an infinite process, since the reader has - * incremented the counter at this point. - */ - } - - /* - * If we are temporarily preferred to writers due to the writer - * quota, reset the condition (race among readers doesn't - * matter). - */ - atomic_store_release(&rwl->write_granted, 0); - } else { - int32_t prev_writer; - - /* enter the waiting queue, and wait for our turn */ - prev_writer = atomic_fetch_add_release(&rwl->write_requests, 1); - while (atomic_load_acquire(&rwl->write_completions) != - prev_writer) - { - LOCK(&rwl->lock); - if (atomic_load_acquire(&rwl->write_completions) != - prev_writer) - { - WAIT(&rwl->writeable, &rwl->lock); - UNLOCK(&rwl->lock); - continue; - } - UNLOCK(&rwl->lock); - break; - } - - while (!atomic_compare_exchange_weak_acq_rel( - &rwl->cnt_and_flag, &(int_fast32_t){ 0 }, - WRITER_ACTIVE)) - { - /* Another active reader or writer is working. */ - LOCK(&rwl->lock); - if (atomic_load_acquire(&rwl->cnt_and_flag) != 0) { - WAIT(&rwl->writeable, &rwl->lock); - } - UNLOCK(&rwl->lock); - } - - INSIST((atomic_load_acquire(&rwl->cnt_and_flag) & - WRITER_ACTIVE)); - atomic_fetch_add_release(&rwl->write_granted, 1); - } - -#ifdef ISC_RWLOCK_TRACE - print_lock("postlock", rwl, type); -#endif /* ifdef ISC_RWLOCK_TRACE */ +static void +read_indicator_arrive(isc_rwlock_t *rwl) { + (void)atomic_fetch_add_release(&rwl->readers_ingress, 1); } -void -isc__rwlock_lock(isc__rwlock_t *rwl, isc_rwlocktype_t type) { - int32_t cnt = 0; - int32_t spins = atomic_load_acquire(&rwl->spins) * 2 + 10; - int32_t max_cnt = ISC_MAX(spins, RWLOCK_MAX_ADAPTIVE_COUNT); +static void +read_indicator_depart(isc_rwlock_t *rwl) { + (void)atomic_fetch_add_release(&rwl->readers_egress, 1); +} - do { - if (cnt++ >= max_cnt) { - rwlock_lock(rwl, type); +static bool +read_indicator_isempty(isc_rwlock_t *rwl) { + return (atomic_load_acquire(&rwl->readers_egress) == + atomic_load_acquire(&rwl->readers_ingress)); +} + +static void +writers_barrier_raise(isc_rwlock_t *rwl) { + (void)atomic_fetch_add_release(&rwl->writers_barrier, 1); +} + +static void +writers_barrier_lower(isc_rwlock_t *rwl) { + (void)atomic_fetch_sub_release(&rwl->writers_barrier, 1); +} + +static bool +writers_barrier_israised(isc_rwlock_t *rwl) { + return (atomic_load_acquire(&rwl->writers_barrier) > 0); +} + +static bool +writers_lock_islocked(isc_rwlock_t *rwl) { + return (atomic_load_acquire(&rwl->writers_lock) == ISC_RWLOCK_LOCKED); +} + +static bool +writers_lock_acquire(isc_rwlock_t *rwl) { + return (atomic_compare_exchange_weak_acq_rel( + &rwl->writers_lock, &(bool){ ISC_RWLOCK_UNLOCKED }, + ISC_RWLOCK_LOCKED)); +} + +static void +writers_lock_release(isc_rwlock_t *rwl) { + REQUIRE(atomic_compare_exchange_strong_acq_rel( + &rwl->writers_lock, &(bool){ ISC_RWLOCK_LOCKED }, + ISC_RWLOCK_UNLOCKED)); +} + +#define ran_out_of_patience(cnt) (cnt >= RWLOCK_MAX_READER_PATIENCE) + +void +isc_rwlock_rdlock(isc_rwlock_t *rwl) { + uint32_t cnt = 0; + bool barrier_raised = false; + + while (true) { + read_indicator_arrive(rwl); + if (!writers_lock_islocked(rwl)) { + /* Acquired lock in read-only mode */ break; } - isc_pause(); - } while (isc_rwlock_trylock(rwl, type) != ISC_R_SUCCESS); - atomic_fetch_add_release(&rwl->spins, (cnt - spins) / 8); + /* Writer has acquired the lock, must reset to 0 and wait */ + read_indicator_depart(rwl); + + while (writers_lock_islocked(rwl)) { + isc_pause(); + if (ran_out_of_patience(cnt++) && !barrier_raised) { + writers_barrier_raise(rwl); + barrier_raised = true; + } + } + } + if (barrier_raised) { + writers_barrier_lower(rwl); + } } isc_result_t -isc__rwlock_trylock(isc__rwlock_t *rwl, isc_rwlocktype_t type) { - int32_t cntflag; +isc_rwlock_tryrdlock(isc_rwlock_t *rwl) { + read_indicator_arrive(rwl); + if (writers_lock_islocked(rwl)) { + /* Writer has acquired the lock, release the read lock */ + read_indicator_depart(rwl); - REQUIRE(VALID_RWLOCK(rwl)); - -#ifdef ISC_RWLOCK_TRACE - print_lock("prelock", rwl, type); -#endif /* ifdef ISC_RWLOCK_TRACE */ - - if (type == isc_rwlocktype_read) { - /* If a writer is waiting or working, we fail. */ - if (atomic_load_acquire(&rwl->write_requests) != - atomic_load_acquire(&rwl->write_completions)) - { - return (ISC_R_LOCKBUSY); - } - - /* Otherwise, be ready for reading. */ - cntflag = atomic_fetch_add_release(&rwl->cnt_and_flag, - READER_INCR); - if ((cntflag & WRITER_ACTIVE) != 0) { - /* - * A writer is working. We lose, and cancel the read - * request. - */ - cntflag = atomic_fetch_sub_release(&rwl->cnt_and_flag, - READER_INCR); - /* - * If no other readers are waiting and we've suspended - * new writers in this short period, wake them up. - */ - if (cntflag == READER_INCR && - atomic_load_acquire(&rwl->write_completions) != - atomic_load_acquire(&rwl->write_requests)) - { - LOCK(&rwl->lock); - BROADCAST(&rwl->writeable); - UNLOCK(&rwl->lock); - } - - return (ISC_R_LOCKBUSY); - } - } else { - /* Try locking without entering the waiting queue. */ - int_fast32_t zero = 0; - if (!atomic_compare_exchange_strong_acq_rel( - &rwl->cnt_and_flag, &zero, WRITER_ACTIVE)) - { - return (ISC_R_LOCKBUSY); - } - - /* - * XXXJT: jump into the queue, possibly breaking the writer - * order. - */ - atomic_fetch_sub_release(&rwl->write_completions, 1); - atomic_fetch_add_release(&rwl->write_granted, 1); + return (ISC_R_LOCKBUSY); } -#ifdef ISC_RWLOCK_TRACE - print_lock("postlock", rwl, type); -#endif /* ifdef ISC_RWLOCK_TRACE */ - + /* Acquired lock in read-only mode */ return (ISC_R_SUCCESS); } +void +isc_rwlock_rdunlock(isc_rwlock_t *rwl) { + read_indicator_depart(rwl); +} + isc_result_t -isc__rwlock_tryupgrade(isc__rwlock_t *rwl) { - REQUIRE(VALID_RWLOCK(rwl)); +isc_rwlock_tryupgrade(isc_rwlock_t *rwl) { + /* Write Barriers has been raised */ + if (writers_barrier_israised(rwl)) { + return (ISC_R_LOCKBUSY); + } - int_fast32_t reader_incr = READER_INCR; + /* Try to acquire the write-lock */ + if (!writers_lock_acquire(rwl)) { + return (ISC_R_LOCKBUSY); + } - /* Try to acquire write access. */ - atomic_compare_exchange_strong_acq_rel(&rwl->cnt_and_flag, &reader_incr, - WRITER_ACTIVE); - /* - * There must have been no writer, and there must have - * been at least one reader. - */ - INSIST((reader_incr & WRITER_ACTIVE) == 0 && - (reader_incr & ~WRITER_ACTIVE) != 0); + /* Unlock the read-lock */ + read_indicator_depart(rwl); + + if (!read_indicator_isempty(rwl)) { + /* Re-acquire the read-lock back */ + read_indicator_arrive(rwl); + + /* Unlock the write-lock */ + writers_lock_release(rwl); + return (ISC_R_LOCKBUSY); + } + return (ISC_R_SUCCESS); +} + +static void +read_indicator_wait_until_empty(isc_rwlock_t *rwl) { + /* Write-lock was acquired, now wait for running Readers to finish */ + while (true) { + if (read_indicator_isempty(rwl)) { + break; + } + isc_pause(); + } +} + +void +isc_rwlock_wrlock(isc_rwlock_t *rwl) { + /* Write Barriers has been raised, wait */ + while (writers_barrier_israised(rwl)) { + isc_pause(); + } + + /* Try to acquire the write-lock */ + while (!writers_lock_acquire(rwl)) { + isc_pause(); + } + + read_indicator_wait_until_empty(rwl); +} + +void +isc_rwlock_wrunlock(isc_rwlock_t *rwl) { + writers_lock_release(rwl); +} + +isc_result_t +isc_rwlock_trywrlock(isc_rwlock_t *rwl) { + /* Write Barriers has been raised */ + if (writers_barrier_israised(rwl)) { + return (ISC_R_LOCKBUSY); + } + + /* Try to acquire the write-lock */ + if (!writers_lock_acquire(rwl)) { + return (ISC_R_LOCKBUSY); + } + + if (!read_indicator_isempty(rwl)) { + /* Unlock the write-lock */ + writers_lock_release(rwl); - if (reader_incr == READER_INCR) { - /* - * We are the only reader and have been upgraded. - * Now jump into the head of the writer waiting queue. - */ - atomic_fetch_sub_release(&rwl->write_completions, 1); - } else { return (ISC_R_LOCKBUSY); } @@ -408,74 +259,30 @@ isc__rwlock_tryupgrade(isc__rwlock_t *rwl) { } void -isc__rwlock_unlock(isc__rwlock_t *rwl, isc_rwlocktype_t type) { - int32_t prev_cnt; +isc_rwlock_downgrade(isc_rwlock_t *rwl) { + read_indicator_arrive(rwl); - REQUIRE(VALID_RWLOCK(rwl)); - -#ifdef ISC_RWLOCK_TRACE - print_lock("preunlock", rwl, type); -#endif /* ifdef ISC_RWLOCK_TRACE */ - - if (type == isc_rwlocktype_read) { - prev_cnt = atomic_fetch_sub_release(&rwl->cnt_and_flag, - READER_INCR); - /* - * If we're the last reader and any writers are waiting, wake - * them up. We need to wake up all of them to ensure the - * FIFO order. - */ - if (prev_cnt == READER_INCR && - atomic_load_acquire(&rwl->write_completions) != - atomic_load_acquire(&rwl->write_requests)) - { - LOCK(&rwl->lock); - BROADCAST(&rwl->writeable); - UNLOCK(&rwl->lock); - } - } else { - bool wakeup_writers = true; - - /* - * Reset the flag, and (implicitly) tell other writers - * we are done. - */ - atomic_fetch_sub_release(&rwl->cnt_and_flag, WRITER_ACTIVE); - atomic_fetch_add_release(&rwl->write_completions, 1); - - if ((atomic_load_acquire(&rwl->write_granted) >= - rwl->write_quota) || - (atomic_load_acquire(&rwl->write_requests) == - atomic_load_acquire(&rwl->write_completions)) || - (atomic_load_acquire(&rwl->cnt_and_flag) & ~WRITER_ACTIVE)) - { - /* - * We have passed the write quota, no writer is - * waiting, or some readers are almost ready, pending - * possible writers. Note that the last case can - * happen even if write_requests != write_completions - * (which means a new writer in the queue), so we need - * to catch the case explicitly. - */ - LOCK(&rwl->lock); - if (rwl->readers_waiting > 0) { - wakeup_writers = false; - BROADCAST(&rwl->readable); - } - UNLOCK(&rwl->lock); - } - - if ((atomic_load_acquire(&rwl->write_requests) != - atomic_load_acquire(&rwl->write_completions)) && - wakeup_writers) - { - LOCK(&rwl->lock); - BROADCAST(&rwl->writeable); - UNLOCK(&rwl->lock); - } - } - -#ifdef ISC_RWLOCK_TRACE - print_lock("postunlock", rwl, type); -#endif /* ifdef ISC_RWLOCK_TRACE */ + writers_lock_release(rwl); +} + +void +isc_rwlock_init(isc_rwlock_t *rwl) { + REQUIRE(rwl != NULL); + + atomic_init(&rwl->writers_lock, ISC_RWLOCK_UNLOCKED); + atomic_init(&rwl->writers_barrier, 0); + atomic_init(&rwl->readers_ingress, 0); + atomic_init(&rwl->readers_egress, 0); +} + +void +isc_rwlock_destroy(isc_rwlock_t *rwl) { + /* Check whether write lock has been unlocked */ + REQUIRE(atomic_load(&rwl->writers_lock) == ISC_RWLOCK_UNLOCKED); + REQUIRE(read_indicator_isempty(rwl)); +} + +void +isc_rwlock_setworkers(uint16_t workers) { + atomic_store(&isc__crwlock_workers, workers); } diff --git a/lib/isc/tls.c b/lib/isc/tls.c index 8c4768c46b..569a041a8d 100644 --- a/lib/isc/tls.c +++ b/lib/isc/tls.c @@ -1186,7 +1186,7 @@ isc_tlsctx_cache_create(isc_mem_t *mctx, isc_tlsctx_cache_t **cachep) { isc_mem_attach(mctx, &nc->mctx); isc_ht_init(&nc->data, mctx, 5, ISC_HT_CASE_SENSITIVE); - isc_rwlock_init(&nc->rwlock, 0, 0); + isc_rwlock_init(&nc->rwlock); *cachep = nc; } diff --git a/tests/isc/hashmap_test.c b/tests/isc/hashmap_test.c index 99b7951f57..b7e755a1fe 100644 --- a/tests/isc/hashmap_test.c +++ b/tests/isc/hashmap_test.c @@ -381,7 +381,6 @@ ISC_RUN_TEST_IMPL(isc_hashmap_case) { test_node_t lower = { .key = "isc_hashmap_case" }; test_node_t upper = { .key = "ISC_HASHMAP_CASE" }; test_node_t mixed = { .key = "IsC_hAsHmAp_CaSe" }; - test_node_t *value; isc_hashmap_create(mctx, 1, ISC_HASHMAP_CASE_SENSITIVE, &hashmap); @@ -398,7 +397,7 @@ ISC_RUN_TEST_IMPL(isc_hashmap_case) { assert_int_equal(result, ISC_R_SUCCESS); result = isc_hashmap_find(hashmap, NULL, mixed.key, strlen(mixed.key), - (void *)&value); + &(void *){ NULL }); assert_int_equal(result, ISC_R_NOTFOUND); isc_hashmap_destroy(&hashmap); @@ -418,7 +417,7 @@ ISC_RUN_TEST_IMPL(isc_hashmap_case) { assert_int_equal(result, ISC_R_EXISTS); result = isc_hashmap_find(hashmap, NULL, mixed.key, strlen(mixed.key), - (void *)&value); + &(void *){ NULL }); assert_int_equal(result, ISC_R_SUCCESS); isc_hashmap_destroy(&hashmap);