Merge branch 'fanf-urcu-qp' into 'main'

Replace isc_qsbr with liburcu-qsbr

Closes #3936 and #4019

See merge request isc-projects/bind9!7668
This commit is contained in:
Tony Finch 2023-05-12 20:43:30 +00:00
commit 562697e703
49 changed files with 302 additions and 1598 deletions

View file

@ -18,14 +18,20 @@ variables:
CLANG_VERSION: 16
CLANG: "clang-${CLANG_VERSION}"
SCAN_BUILD: "scan-build-${CLANG_VERSION}"
ASAN_SYMBOLIZER_PATH: "/usr/lib/llvm-${CLANG_VERSION}/bin/llvm-symbolizer"
LLVM_SYMBOLIZER: "/usr/lib/llvm-${CLANG_VERSION}/bin/llvm-symbolizer"
CLANG_FORMAT: "clang-format-${CLANG_VERSION}"
CFLAGS_COMMON: -fno-omit-frame-pointer -fno-optimize-sibling-calls -O1 -g -Wall -Wextra
# Pass run-time flags to AddressSanitizer to get core dumps on error.
ASAN_OPTIONS: abort_on_error=1:disable_coredump=0:unmap_shadow_on_exit=1
TSAN_OPTIONS_COMMON: "disable_coredump=0 second_deadlock_stack=1 history_size=7 log_exe_name=true log_path=tsan"
ASAN_SYMBOLIZER_PATH: "${LLVM_SYMBOLIZER}"
TSAN_OPTIONS_COMMON: "disable_coredump=0 second_deadlock_stack=1 atexit_sleep_ms=1000 history_size=7 log_exe_name=true log_path=tsan"
TSAN_SYMBOLIZER: "external_symbolizer_path=/usr/bin/llvm-symbolizer"
TSAN_SUPPRESSIONS: "suppressions=${CI_PROJECT_DIR}/.tsan-suppress"
TSAN_OPTIONS_DEFAULT: "${TSAN_OPTIONS_COMMON} ${TSAN_SUPPRESSIONS} ${TSAN_SYMBOLIZER}"
UBSAN_OPTIONS: "halt_on_error=1:abort_on_error=1:disable_coredump=0"
TARBALL_EXTENSION: xz
@ -1044,7 +1050,7 @@ gcc:tsan:
system:gcc:tsan:
variables:
TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} external_symbolizer_path=/usr/bin/llvm-symbolizer"
TSAN_OPTIONS: "${TSAN_OPTIONS_DEFAULT}"
<<: *fedora_37_amd64_image
<<: *system_test_tsan_job
needs:
@ -1053,7 +1059,7 @@ system:gcc:tsan:
unit:gcc:tsan:
variables:
TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} external_symbolizer_path=/usr/bin/llvm-symbolizer"
TSAN_OPTIONS: "${TSAN_OPTIONS_DEFAULT}"
<<: *fedora_37_amd64_image
<<: *unit_test_tsan_job
needs:
@ -1071,7 +1077,7 @@ clang:tsan:
system:clang:tsan:
variables:
TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} external_symbolizer_path=/usr/lib/llvm-${CLANG_VERSION}/bin/llvm-symbolizer"
TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} ${TSAN_SUPPRESSIONS} external_symbolizer_path=${LLVM_SYMBOLIZER}"
<<: *base_image
<<: *system_test_tsan_job
needs:
@ -1080,7 +1086,7 @@ system:clang:tsan:
unit:clang:tsan:
variables:
TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} external_symbolizer_path=/usr/lib/llvm-${CLANG_VERSION}/bin/llvm-symbolizer suppressions=$CI_PROJECT_DIR/tsan-suppressions.txt"
TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} suppressions=$CI_PROJECT_DIR/.tsan-suppress-extra external_symbolizer_path=${LLVM_SYMBOLIZER}"
<<: *base_image
<<: *unit_test_tsan_job
needs:
@ -1323,7 +1329,7 @@ respdiff-short:tsan:
LDFLAGS: "-fsanitize=thread"
EXTRA_CONFIGURE: "--enable-pthread-rwlock --without-jemalloc"
MAX_DISAGREEMENTS_PERCENTAGE: "0.5"
TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} external_symbolizer_path=/usr/bin/llvm-symbolizer"
TSAN_OPTIONS: "${TSAN_OPTIONS_DEFAULT}"
script:
- bash respdiff.sh -s named -q "${PWD}/10k_a.txt" -c 3 -w "${PWD}/rspworkdir" "${CI_PROJECT_DIR}" "/usr/local/respdiff-reference-bind/sbin/named"
after_script:
@ -1361,7 +1367,7 @@ respdiff-long:tsan:
LDFLAGS: "-fsanitize=thread"
EXTRA_CONFIGURE: "--enable-pthread-rwlock --without-jemalloc"
MAX_DISAGREEMENTS_PERCENTAGE: "0.5"
TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} external_symbolizer_path=/usr/bin/llvm-symbolizer"
TSAN_OPTIONS: "${TSAN_OPTIONS_DEFAULT}"
script:
- bash respdiff.sh -s named -q "${PWD}/100k_mixed.txt" -c 3 -w "${PWD}/rspworkdir" "${CI_PROJECT_DIR}" "/usr/local/respdiff-reference-bind/sbin/named"
after_script:

View file

@ -163,11 +163,12 @@ Files: **/.clang-format
.gitlab-ci.yml
.lgtm.yml
.pylintrc
.tsan-suppress
.tsan-suppress-extra
.uncrustify.cfg
doc/misc/*.zoneopt
doc/misc/options
doc/misc/rndc.grammar
tsan-suppressions.txt
sonar-project.properties
Copyright: Internet Systems Consortium, Inc. ("ISC")
License: CC0-1.0

4
.tsan-suppress Normal file
View file

@ -0,0 +1,4 @@
# be more selective with liburcu
race:rcu_barrier
race:rcu_memb_barrier
thread:*

7
.tsan-suppress-extra Normal file
View file

@ -0,0 +1,7 @@
# Uninstrumented libraries
called_from_lib:libfstrm.so
called_from_lib:libdummyrpz.so
# be more selective with liburcu
race:rcu_barrier
race:rcu_memb_barrier
thread:*

View file

@ -1,3 +1,10 @@
6172. [cleanup] Refactor the loop manager and qp-trie code to remove
isc_qsbr and use liburcu instead. [GL #3936]
6171. [cleanup] Remove the stack implementation added in change 6108:
we are using the liburcu concurrent data structures
instead. [GL !7920]
6170. [func] The 'rndc -t' option allows a timeout to be set in
seconds, so that commands that take a long time to
complete (e.g., reloading a very large configuration)

View file

@ -2147,8 +2147,7 @@ run_server(void *arg) {
CHECK(ns_interfacemgr_create(mctx, sctx, loopmgr, netmgr, dispatchmgr,
NULL, false, &interfacemgr));
CHECK(dns_view_create(mctx, loopmgr, dns_rdataclass_in, "_default",
&view));
CHECK(dns_view_create(mctx, dns_rdataclass_in, "_default", &view));
CHECK(dns_cache_create(loopmgr, dns_rdataclass_in, "", &cache));
dns_view_setcache(view, cache, false);
dns_cache_detach(&cache);

View file

@ -6437,8 +6437,7 @@ create_view(const cfg_obj_t *vconfig, dns_viewlist_t *viewlist,
}
INSIST(view == NULL);
result = dns_view_create(named_g_mctx, named_g_loopmgr, viewclass,
viewname, &view);
result = dns_view_create(named_g_mctx, viewclass, viewname, &view);
if (result != ISC_R_SUCCESS) {
return (result);
}

View file

@ -256,7 +256,7 @@ main(int argc, char *argv[]) {
RUNCHECK(dns_requestmgr_create(mctx, dispatchmgr, dispatchv4, NULL,
&requestmgr));
RUNCHECK(dns_view_create(mctx, loopmgr, 0, "_test", &view));
RUNCHECK(dns_view_create(mctx, 0, "_test", &view));
isc_loopmgr_setup(loopmgr, sendqueries, NULL);
isc_loopmgr_run(loopmgr);

View file

@ -89,6 +89,7 @@ if ! $do_run; then
LOG_FLAGS="$log_flags" \
TEST_LARGE_MAP="${TEST_LARGE_MAP}" \
CI_ENABLE_ALL_TESTS="${CI_ENABLE_ALL_TESTS}" \
${TSAN_OPTIONS:+"TSAN_OPTIONS=${TSAN_OPTIONS}"} \
${VIRTUAL_ENV:+"VIRTUAL_ENV=${VIRTUAL_ENV}"} \
${PERL5LIB:+"PERL5LIB=${PERL5LIB}"} \
make -e check

View file

@ -2141,7 +2141,7 @@ main(int argc, char *argv[]) {
mctx, dispatchmgr, have_ipv4 ? dispatchvx : NULL,
have_ipv6 ? dispatchvx : NULL, &requestmgr));
RUNCHECK(dns_view_create(mctx, loopmgr, 0, "_mdig", &view));
RUNCHECK(dns_view_create(mctx, 0, "_mdig", &view));
query = ISC_LIST_HEAD(queries);
isc_loopmgr_setup(loopmgr, sendqueries, query);

View file

@ -657,45 +657,6 @@ Items can be removed from the list using `ISC_LIST_UNLINK`:
ISC_LIST_UNLINK(foolist, foo, link);
##### Atomic stacks
In `<isc/stack.h>`, there are also similar macros for singly-linked
stacks (`ISC_STACK`) and atomic stacks (`ISC_ASTACK`).
Lock-free linked data structures have pitfalls that the `ISC_ASTACK`
macros avoid by having a restricted API. Firstly, it is difficult to
implement atomic doubly-linked lists without at least a double-width
compare-exchange, which is not a widely supported primitive, so this
stack is singly-linked. Secondly, when individual elements can be
removed from a list (whether doubly-linked or singly-linked), we run
into the ABA problem, where removes and (re-)inserts race and as a
result the links get in a tangle. Safe support for removing individual
elements requires some kind of garbage collection, or extending link
pointers with generation counters. Instead the ASTACK macros only
provide a function for emptying the entire stack in one go,
`ISC_ASTACK_TO_STACK()`. This only requires setting the `top` pointer to
`NULL`, which cannot cause `ISC_ASTACK_PUSH()` to go wrong.
The `ISC_ASTACK` macros are used the same way as the non-atomic
doubly-linked lists described above, except that not all of the
`ISC_LIST` macros have `ISC_ASTACK` equivalents, and stacks have a
`TOP` instead of a `HEAD`.
As well as read-only iteration similar to `ISC_LIST` described above,
there is an idiom for emptying an atomic stack into a regular stack,
and processing its contents:
ISC_STACK(isc_foo_t) drain = ISC_ASTACK_TO_STACK(foo_work);
while (!ISC_STACK_EMPTY(drain)) {
isc_foo_t *foo = ISC_STACK_POP(drain, link);
/* do things */
/* maybe again? ISC_ASTACK_PUSH(foo_work, foo, link) */
}
There is also an `ISC_ASTACK_ADD()` macro, which is a convenience
wrapper for pushing an element onto a stack if it has not already been
added.
#### <a name="names"></a>Names
The `dns_name` API has facilities for processing DNS names and labels,

View file

@ -1,397 +0,0 @@
<!--
Copyright (C) Internet Systems Consortium, Inc. ("ISC")
SPDX-License-Identifier: MPL-2.0
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, you can obtain one at https://mozilla.org/MPL/2.0/.
See the COPYRIGHT file distributed with this work for additional
information regarding copyright ownership.
-->
QSBR: quiescent state based reclamation
=======================================
QSBR is a safe memory reclamation (SMR) algorithm for lock-free data
structures such as a qp-trie. (See `doc/dev/qp.md`.)
When an object is unlinked from a lock-free data structure, it
cannot be `free()`ed immediately, because there can still be readers
accessing the object via an old version of the data structure. SMR
algorithms determine when it is safe to reclaim memory after it has
been unlinked.
Introductions and overviews
---------------------------
There is a terse overview in `include/isc/qsbr.h`.
Jeff Preshing has a nice introduction to QSBR,
_<https://preshing.com/20160726/using-quiescent-states-to-reclaim-memory/>_
At the end of this note is a copy of a blog post about writing BIND's
`isc_qsbr`, _<https://dotat.at/@/2023-01-10-qsbr.html>_
[Paul McKenney's web page][paulmck] has links to his book on
concurrent programming, the [Userspace RCU library][urcu], and more.
McKenney invented RCU and QSBR. RCU is the Linux kernel's machinery
for lock-free data structures and safe memory reclamation, based on
QSBR.
[paulmck]: http://www.rdrop.com/~paulmck/
[urcu]: https://liburcu.org/
Example code
------------
If you are implementing a lock-free data structure that needs safe
memory reclamation, here's a guide to using `isc_qsbr`, based on how
QSBR is used by `dns_qp`.
### registration
When the program starts up you need to register a global callback
function that will reclaim unused memory. You can do so using an
ISC_CONSTRUCTOR function that runs automatically at startup.
static void
qp_qsbr_register(void) ISC_CONSTRUCTOR;
static void
qp_qsbr_register(void) {
isc_qsbr_register(qp_qsbr_reclaimer);
}
### work list
Your module will need somewhere that your callback can find the work
it needs to do. The qp-trie has an atomic list of `dns_qpmulti_t`
objects for this purpose.
/* a global variable */
static ISC_ASTACK(dns_qpmulti_t) qsbr_work;
The reason for using global variables is so that we don't need to
allocate a thunk every time we have memory reclamation work to do.
### read-only access
You should design your data structure so that it has a single atomic
root pointer referring to its current version. A lock-free reader
_must_ run in an `isc_loop` callback. It gains access to the data
structure by taking a copy of this pointer:
qp_node_t *reader = atomic_load_acquire(&multi->reader);
During an `isc_loop` callback, a reader should keep using the same
pointer go get a consistent view of the data structure. If it reloads
the pointer it can get a different version changed by concurrent
writers.
A reader _must_ stop using the root pointer and any interior pointers
obtained via the root pointer before it returns to the `isc_loop`.
### modifications and writes
All changes to the data structure must be copy-on-write (aka
read-copy-update) so that concurrent readers are not disturbed.
When a new version of the data structure has been prepared, it is
committed by overwriting the atomic root pointer,
atomic_store_release(&multi->reader, reader); /* COMMIT */
### scheduling cleanup
After committing a change, your data structure may have memory that
will become free, after concurrent readers have stopped accessing it.
To reclaim the memory when it is safe, use code like:
isc_qsbr_phase_t phase = isc_qsbr_phase(multi->loopmgr);
if (defer_chunk_reclamation(qp, phase)) {
ISC_ASTACK_ADD(qsbr_work, multi, cleanup);
isc_qsbr_activate(multi->loopmgr, phase);
}
* First, get the current QSBR phase
* Second, mark free memory with the phase number. The qp-trie scans
its chunks and marks those that will become free, and returns
`true` if there is cleanup work to do.
* If so, the qp-trie is added to the work list. (`ISC_ALIST_ADD()`
is idempotent).
* Finally, QSBR is informed that there is work to do.
In other cases it might not make sense to scan the data structure
after committing, and instead you might make note of which memory to
clean up while making changes before you know what the phase will be.
You can then have per-phase work lists, like:
static ISC_ASTACK(my_work_t) qsbr_work[ISC_QSBR_PHASES];
isc_qsbr_phase_t phase = isc_qsbr_phase(loopmgr);
ISC_ASTACK_ADD(qsbr_work[phase], cleanup_work, link);
isc_qsbr_activate(loopmgr, phase);
In general, there will be several (maybe many) write operations during
a grace period. Your lock-free data structure should collect its
reclamation work from all these writes into a batch per phase, i.e.
per grace period.
### reclaiming
Inside the reclaimer callback, we iterate over the work list and clean
up each item on it. If there is more cleanup work to do in another
phase, we put the qp-trie back on the work list for another go.
static void
qsbreclaimer(void *arg, isc_qsbr_phase_t phase) {
UNUSED(arg);
ISC_STACK(dns_qpmulti_t) drain = ISC_ASTACK_TO_STACK(qsbr_work);
while (!ISC_STACK_EMPTY(drain)) {
dns_qpmulti_t *multi = ISC_STACK_POP(drain, cleanup);
INSIST(QPMULTI_VALID(multi));
LOCK(&multi->mutex);
if (reclaim_chunks(&multi->writer, phase)) {
/* more to do next time */
ISC_ALIST_PUSH(qsbr_work, multi, cleanup);
}
UNLOCK(&multi->mutex);
}
}
### reclaim marks
In the qp-trie data structure, each chunk has some metadata which
includes a bitfield for the reclaim phase:
isc_qsbr_phase_t phase : ISC_QSBR_PHASE_BITS;
We use a bitfield so that all the metadata fits in a single word.
------------------------------------------------------------------------
Safe memory reclamation for BIND
================================
At the end of October 2022, I _finally_ got [my multithreaded
qp-trie][qp-gc] working! It could be built with two different
concurrency control mechanisms:
* A reader/writer lock
This has poor read-side scalability, because every thread is
hammering on the same shared location. But its write performance
is reasonably good: concurrent readers don't slow it down too much.
* [`liburcu`, userland read-copy-update][urcu]
RCU has a fast and scalable read side, nice! But on the write side
I used `synchronize_rcu()`, which is blocking and rather slow, so
my write performance was terrible.
OK, but I want the best of both worlds! To fix it, I needed to change
the qp-trie code to use safe memory reclamation more effectively:
instead of blocking inside `synchronize_rcu()` before cleaning up, use
`call_rcu()` to clean up asynchronously. I expect I'll write about the
qp-trie changes another time.
Another issue is that I want the best of both worlds _by default_,
but `liburcu` is [LGPL][] and we don't want BIND to depend on
code whose licence demands more from our users than the [MPL][].
[qp-gc]: https://dotat.at/@/2021-06-23-page-based-gc-for-qp-trie-rcu.html
[LGPL]: https://opensource.org/licenses/LGPL-2.1
[MPL]: https://opensource.org/licenses/MPL-2.0
So I set out to write my own safe memory reclamation support code.
lock freedom
------------
In a [multithreaded qp-trie][qp-gc], there can be many concurrent
readers, but there can be only one writer at a time and modifications
are strictly serialized. When I have got it working properly, readers
are completely wait-free, unaffected by other readers, and almost
unaffected by writers. Writers need to get a mutex to ensure there is
only one at a time, but once the mutex is acquired, a writer is not
obstructed by readers.
The way this works is that readers use an atomic load to get a pointer
to the root of the current version of the trie. Readers can make
multiple queries using this root pointer and the results will be
consistent wrt that particular version, regardless of what changes
writers might be making concurrently. Writers do not affect readers
because all changes are made by copy-on-write. When a writer is ready
to commit a new version of the trie, it uses an atomic store to flip
the root pointer.
safe memory reclamation
-----------------------
We can't copy-on-write indefinitely: we need to reclaim the memory
used by old versions of the trie. And we must do so "safely", i.e.
without `free()`ing memory that readers are still using.
So, before `free()`ing memory, a writer must wait for a _"grace
period"_, which is a jargon term meaning "until readers are not using
the old version". There are a bunch of algorithms for determining when
a grace period is over, with varying amounts of over-approximation,
CPU overhead, and memory backlog.
The [RCU][urcu] function `synchronize_rcu()` is slow because it blocks
waiting for a grace period; the `call_rcu()` function runs a callback
asynchronously after a grace period has passed. I wanted to avoid
blocking my writers, so I needed to implement something like
`call_rcu()`.
aversions
---------
When I started trying to work out how to do safe memory reclamation,
it all seemed quite intimidating. But as I learned more, I found that
my circumstances make it easier than it appeared at first.
The [`liburcu`][urcu] homepage has a long list of supported CPU
architectures and operating systems. Do I have to care about those
details too? No! The RCU code dates back to before the age of
standardized concurrent memory models, so the RCU developers had to
invent their own atomic primitives and correctness rules. Twenty-ish
years later the state of the art has advanced, so I can use
`<stdatomic.h>` without having to re-do it like `liburcu`.
You can also choose between several algorithms implemented by
[`liburcu`][urcu], involving questions about kernel support, specially
reserved signals, and intrusiveness in application code. But while I
was working out how to schedule asynchronous memory reclamation work,
I realised that BIND is already well-suited to the fastest flavour of
RCU, called "QSBR".
QSBR
----
QSBR stands for "quiescent state based reclamation". A _"quiescent
state"_ is a fancy name for a point when a thread is not accessing a
lock-free data structure, and does not retain any root pointers or
interior pointers.
When a thread has passed through a quiescent state, it no longer has
access to older versions of the data structures. When _all_ threads
have passed through quiescent states, then nothing in the program has
access to old versions. This is how QSBR detects grace periods: after
a writer commits a new version, it waits for all threads to pass
through quiescent states, and therefore a grace period has definitely
elapsed, and so it is then safe to reclaim the old version's memory.
QSBR is fast because readers do not need to explicitly mark the
critical section surrounding the atomic load that I mentioned earlier.
Threads just need to pass through a quiescent state frequently enough
that there isn't a huge build-up of unreclaimed memory.
Inside an operating system kernel (RCU's native environment), a
context switch provides a natural quiescent state. In a userland
application, you need to find a good place to call
`rcu_quiescent_state()`. You could call it every time you have
finished using a root pointer, but marking a quiescent state is not
completely free, so there are probably more efficient ways.
`libuv`
-------
BIND is multithreaded, and (basically) each thread runs an event loop.
Recent versions of BIND use [`libuv`][uv] for the event loops.
A lot of things started falling into place when I realised that the
`libuv` event loop gives BIND a [natural quiescent state][uv-loop]:
when the event callbacks have finished running, and `libuv` is about
to call `select()` or `poll()` or whatever, we can mark a quiescent
state. We can require that event-handling functions do not stash root
pointers in the heap, but only use them via local variables, so we
know that old versions are inaccessible after the callback returns.
My design marks a quiescent state once per loop, so on a busy server
where each loop has lots to do, the cost of marking a quiescent state
is amortized across several I/O events.
[uv]: http://libuv.org/
[uv-loop]: http://docs.libuv.org/en/v1.x/design.html#the-i-o-loop
fuzzy barrier
-------------
So, how do we mark a quiescent state? Using a _"fuzzy barrier"_.
When a thread reaches a normal barrier, it blocks until all the other
threads have reached the barrier, after which exactly one of the
threads can enter a protected section of code, and the others are
unblocked and can proceed as normal.
When a thread encounters a fuzzy barrier, it never blocks. It either
proceeds immediately as normal, or if it is the last thread to reach
the barrier, it enters the protected code.
RCU does not actually use a fuzzy barrier as I have described it. Like
a fuzzy barrier, each thread keeps track of whether it has passed
through a quiescent state in the current grace period, without
blocking; but unlike a fuzzy barrier, no thread is diverted to the
protected code. Instead, code that wants to enter a protected section
uses the blocking `synchronize_rcu()` function.
EBR-ish
-------
As in the paper ["performance of memory reclamation for lockless
synchronization"][HMBW], my implementation of QSBR uses a fuzzy
barrier designed for another safe memory reclamation algorithm, EBR,
epoch based reclamation. (EBR was invented here in Cambridge by [Keir
Fraser][tr579].)
[HMBW]: http://csng.cs.toronto.edu/publication_files/0000/0159/jpdc07.pdf
[tr579]: https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-579.html
Actually, my fuzzy barrier is slightly different to EBR's. In EBR, the
fuzzy barrier is used every time the program enters a critical
section. (In qp-trie terms, that would be every time a reader fetches
a root pointer.) So it is vital that EBR's barrier avoids mutating
shared state, because that would wreck multithreaded performance.
Because BIND will only pass through the fuzzy barrier when it is about
to use a blocking system call, my version mutates shared state more
frequently (typically, once per CPU per grace period, instead of once
per grace period). If this turns out to be a problem, it won't be too
hard to make it work more like EBR.
More trivially, I'm using the term "phase" instead of "epoch", because
it's nothing to do with the unix epoch, because there are three
phases, and because I can talk about phase transitions and threads
being out of phase with each other.
coda
----
While reading various RCU-related papers, I was amused by ["user-level
implementations of read-copy update"][DMSDW], which says:
> BIND, a major domain-name server used for Internet domain-name
> resolution, is facing scalability issues. Since domain names
> are read often but rarely updated, using user-level RCU might be
> beneficial.
Yes, I think it might :-)
[DMSDW]: https://www.efficios.com/publications/

View file

@ -6,6 +6,7 @@ AM_CFLAGS += \
AM_CPPFLAGS += \
$(LIBISC_CFLAGS) \
$(LIBDNS_CFLAGS) \
$(LIBURCU_CFLAGS) \
$(LIBUV_CFLAGS) \
-DFUZZDIR=\"$(abs_srcdir)\" \
-I$(top_srcdir)/lib/dns \

View file

@ -236,8 +236,7 @@ LLVMFuzzerInitialize(int *argc ISC_ATTR_UNUSED, char ***argv ISC_ATTR_UNUSED) {
isc_loopmgr_create(mctx, 1, &loopmgr);
result = dns_view_create(mctx, loopmgr, dns_rdataclass_in, "view",
&view);
result = dns_view_create(mctx, dns_rdataclass_in, "view", &view);
if (result != ISC_R_SUCCESS) {
fprintf(stderr, "dns_view_create failed: %s\n",
isc_result_totext(result));

View file

@ -16,10 +16,10 @@
#include <stdbool.h>
#include <stdint.h>
#include <isc/qsbr.h>
#include <isc/random.h>
#include <isc/refcount.h>
#include <isc/rwlock.h>
#include <isc/urcu.h>
#include <isc/util.h>
#include <dns/qp.h>

View file

@ -263,6 +263,7 @@ libdns_la_CPPFLAGS = \
$(AM_CPPFLAGS) \
$(LIBDNS_CFLAGS) \
$(LIBISC_CFLAGS) \
$(LIBURCU_CFLAGS) \
$(LIBUV_CFLAGS) \
$(OPENSSL_CFLAGS)
@ -272,6 +273,7 @@ libdns_la_LDFLAGS = \
libdns_la_LIBADD = \
$(LIBISC_LIBS) \
$(LIBURCU_LIBS) \
$(LIBUV_LIBS) \
$(OPENSSL_LIBS)

View file

@ -206,8 +206,7 @@ createview(isc_mem_t *mctx, dns_rdataclass_t rdclass, isc_loopmgr_t *loopmgr,
isc_result_t result;
dns_view_t *view = NULL;
result = dns_view_create(mctx, loopmgr, rdclass, DNS_CLIENTVIEW_NAME,
&view);
result = dns_view_create(mctx, rdclass, DNS_CLIENTVIEW_NAME, &view);
if (result != ISC_R_SUCCESS) {
return (result);
}

View file

@ -47,17 +47,11 @@
* version of the trie.
*
* For fast concurrent reads, call `dns_qpmulti_query()` to fill in a
* `dns_qpread_t`, which must be allocated on the stack. Readers can
* access a single version of the trie within the scope of an isc_loop
* thread (NOT an isc_work thread). We rely on the loop to bound the
* lifetime of a `dns_qpread_t`, instead of using locks. Readers are
* not blocked by any write activity, and vice versa.
*
* For read-only access outside the scope of a loop, such as from an
* isc_work callback, `dns_qpmulti_lockedread()`. This looks like a
* query transaction; the difference is that a locked read transaction
* takes the `dns_qpmulti_t` mutex. When you have finished with a
* `dns_qpread_t`, call `dns_qpread_destroy()` to release the mutex.
* `dns_qpread_t`, which must be allocated on the stack. This gives
* the reader access to a single version of the trie. The reader's
* thread must be registered with `liburcu`, which is normally taken
* care of by `libisc`. Readers are not blocked by any write activity,
* and vice versa.
*
* For reads that need a stable view of the trie for multiple cycles
* of an isc_loop, or which can be used from any thread, call
@ -314,17 +308,15 @@ dns_qp_destroy(dns_qp_t **qptp);
*/
void
dns_qpmulti_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr,
const dns_qpmethods_t *methods, void *uctx,
dns_qpmulti_create(isc_mem_t *mctx, const dns_qpmethods_t *methods, void *uctx,
dns_qpmulti_t **qpmp);
/*%<
* Create a multi-threaded qp-trie.
*
* Requires:
* \li `mctx` is a pointer to a valid memory context.
* \li 'loopmgr' is a valid loop manager.
* \li `qpmp != NULL && *qpmp == NULL`
* \li all the methods are non-NULL
* \li `qpmp != NULL && *qpmp == NULL`
*
* Ensures:
* \li `*qpmp` is a pointer to a valid multi-threaded qp-trie

View file

@ -259,8 +259,8 @@ struct dns_view {
#endif /* HAVE_LMDB */
isc_result_t
dns_view_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr,
dns_rdataclass_t rdclass, const char *name, dns_view_t **viewp);
dns_view_create(isc_mem_t *mctx, dns_rdataclass_t rdclass, const char *name,
dns_view_t **viewp);
/*%<
* Create a view.
*

View file

@ -34,8 +34,7 @@ typedef isc_result_t
dns_zt_callback_t(void *arg);
void
dns_zt_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr, dns_view_t *view,
dns_zt_t **ztp);
dns_zt_create(isc_mem_t *mctx, dns_view_t *view, dns_zt_t **ztp);
/*%<
* Creates a new zone table for a view.
*

View file

@ -28,17 +28,16 @@
#include <isc/atomic.h>
#include <isc/buffer.h>
#include <isc/loop.h>
#include <isc/magic.h>
#include <isc/mem.h>
#include <isc/mutex.h>
#include <isc/qsbr.h>
#include <isc/refcount.h>
#include <isc/result.h>
#include <isc/rwlock.h>
#include <isc/tid.h>
#include <isc/time.h>
#include <isc/types.h>
#include <isc/urcu.h>
#include <isc/util.h>
#include <dns/fixedname.h>
@ -426,17 +425,18 @@ static void
realloc_chunk_arrays(dns_qp_t *qp, qp_chunk_t newmax) {
size_t oldptrs = sizeof(qp->base->ptr[0]) * qp->chunk_max;
size_t newptrs = sizeof(qp->base->ptr[0]) * newmax;
size_t allbytes = sizeof(dns_qpbase_t) + newptrs;
size_t size = STRUCT_FLEX_SIZE(qp->base, ptr, newmax);
if (qp->base == NULL || qpbase_unref(qp)) {
qp->base = isc_mem_reallocate(qp->mctx, qp->base, allbytes);
qp->base = isc_mem_reallocate(qp->mctx, qp->base, size);
} else {
dns_qpbase_t *oldbase = qp->base;
qp->base = isc_mem_allocate(qp->mctx, allbytes);
qp->base = isc_mem_allocate(qp->mctx, size);
memmove(&qp->base->ptr[0], &oldbase->ptr[0], oldptrs);
}
memset(&qp->base->ptr[qp->chunk_max], 0, newptrs - oldptrs);
isc_refcount_init(&qp->base->refcount, 1);
qp->base->magic = QPBASE_MAGIC;
/* usage array is exclusive to the writer */
size_t oldusage = sizeof(qp->usage[0]) * qp->chunk_max;
@ -555,12 +555,14 @@ chunk_usage(dns_qp_t *qp, qp_chunk_t chunk) {
*/
static void
chunk_discount(dns_qp_t *qp, qp_chunk_t chunk) {
if (qp->usage[chunk].phase == 0) {
INSIST(qp->used_count >= qp->usage[chunk].used);
INSIST(qp->free_count >= qp->usage[chunk].free);
qp->used_count -= qp->usage[chunk].used;
qp->free_count -= qp->usage[chunk].free;
if (qp->usage[chunk].discounted) {
return;
}
INSIST(qp->used_count >= qp->usage[chunk].used);
INSIST(qp->free_count >= qp->usage[chunk].free);
qp->used_count -= qp->usage[chunk].used;
qp->free_count -= qp->usage[chunk].free;
qp->usage[chunk].discounted = true;
}
/*
@ -622,109 +624,95 @@ recycle(dns_qp_t *qp) {
}
/*
* At the end of a transaction, mark empty but immutable chunks for
* reclamation later. Returns true when chunks need reclaiming later.
* asynchronous cleanup
*/
static bool
defer_chunk_reclamation(dns_qp_t *qp, isc_qsbr_phase_t phase) {
unsigned int reclaim = 0;
static void
reclaim_chunks_cb(struct rcu_head *arg) {
qp_rcuctx_t *rcuctx = isc_urcu_container(arg, qp_rcuctx_t, rcu_head);
REQUIRE(QPRCU_VALID(rcuctx));
dns_qpmulti_t *multi = rcuctx->multi;
REQUIRE(QPMULTI_VALID(multi));
for (qp_chunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
if (chunk != qp->bump && chunk_usage(qp, chunk) == 0 &&
qp->usage[chunk].exists && qp->usage[chunk].immutable &&
qp->usage[chunk].phase == 0)
{
chunk_discount(qp, chunk);
qp->usage[chunk].phase = phase;
reclaim++;
}
}
LOCK(&multi->mutex);
if (reclaim > 0) {
LOG_STATS("qp will reclaim %u chunks in phase %u", reclaim,
phase);
}
dns_qp_t *qp = &multi->writer;
REQUIRE(QP_VALID(qp));
return (reclaim > 0);
}
static bool
reclaim_chunks(dns_qp_t *qp, isc_qsbr_phase_t phase) {
unsigned int free = 0;
bool more = false;
isc_nanosecs_t start = isc_time_monotonic();
for (qp_chunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
if (qp->usage[chunk].phase == phase) {
if (qp->usage[chunk].snapshot) {
/* cleanup when snapshot is destroyed */
qp->usage[chunk].snapfree = true;
} else {
chunk_free(qp, chunk);
free++;
}
} else if (qp->usage[chunk].phase != 0) {
/*
* We need to reclaim more of this trie's memory
* on a later qsbr callback.
*/
more = true;
for (unsigned int i = 0; i < rcuctx->count; i++) {
qp_chunk_t chunk = rcuctx->chunk[i];
if (qp->usage[chunk].snapshot) {
/* cleanup when snapshot is destroyed */
qp->usage[chunk].snapfree = true;
} else {
chunk_free(qp, chunk);
free++;
}
}
isc_mem_putanddetach(&rcuctx->mctx, rcuctx,
STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count));
isc_nanosecs_t time = isc_time_monotonic() - start;
recycle_time += time;
if (free > 0) {
LOG_STATS("qp reclaim" PRItime "phase %u free %u chunks", time,
phase, free);
LOG_STATS("qp reclaim" PRItime "free %u chunks", time, free);
LOG_STATS("qp reclaim leaf %u live %u used %u free %u hold %u",
qp->leaf_count, qp->used_count - qp->free_count,
qp->used_count, qp->free_count, qp->hold_count);
}
return (more);
UNLOCK(&multi->mutex);
}
/*
* List of `dns_qpmulti_t`s that have chunks to be reclaimed.
*/
static ISC_ASTACK(dns_qpmulti_t) qsbr_work;
/*
* When a grace period has passed, this function reclaims any unused memory.
* At the end of a transaction, schedule empty but immutable chunks
* for reclamation later.
*/
static void
qp_qsbr_reclaimer(isc_qsbr_phase_t phase) {
ISC_STACK(dns_qpmulti_t) drain = ISC_ASTACK_TO_STACK(qsbr_work);
while (!ISC_STACK_EMPTY(drain)) {
/* lock before pop */
dns_qpmulti_t *multi = ISC_STACK_TOP(drain);
INSIST(QPMULTI_VALID(multi));
LOCK(&multi->mutex);
ISC_STACK_POP(drain, cleanup);
if (multi->writer.destroy) {
UNLOCK(&multi->mutex);
dns_qpmulti_destroy(&multi);
} else {
if (reclaim_chunks(&multi->writer, phase)) {
/* more to do next time */
ISC_ASTACK_PUSH(qsbr_work, multi, cleanup);
}
UNLOCK(&multi->mutex);
reclaim_chunks(dns_qpmulti_t *multi) {
dns_qp_t *qp = &multi->writer;
unsigned int count = 0;
for (qp_chunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
if (chunk != qp->bump && chunk_usage(qp, chunk) == 0 &&
qp->usage[chunk].exists && qp->usage[chunk].immutable &&
!qp->usage[chunk].discounted)
{
count++;
}
}
}
/*
* Register `qp_qsbr_reclaimer()` with QSBR at startup.
*/
static void
qp_qsbr_register(void) ISC_CONSTRUCTOR;
static void
qp_qsbr_register(void) {
isc_qsbr_register(qp_qsbr_reclaimer);
if (count == 0) {
return;
}
qp_rcuctx_t *rcuctx =
isc_mem_get(qp->mctx, STRUCT_FLEX_SIZE(rcuctx, chunk, count));
*rcuctx = (qp_rcuctx_t){
.magic = QPRCU_MAGIC,
.multi = multi,
.count = count,
};
isc_mem_attach(qp->mctx, &rcuctx->mctx);
unsigned int i = 0;
for (qp_chunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
if (chunk != qp->bump && chunk_usage(qp, chunk) == 0 &&
qp->usage[chunk].exists && qp->usage[chunk].immutable &&
!qp->usage[chunk].discounted)
{
rcuctx->chunk[i++] = chunk;
chunk_discount(qp, chunk);
}
}
isc_urcu_cleanup(rcuctx, rcu_head, reclaim_chunks_cb);
LOG_STATS("qp will reclaim %u chunks", count);
}
/*
@ -1121,6 +1109,8 @@ dns_qpmulti_update(dns_qpmulti_t *multi, dns_qp_t **qptp) {
memmove(rollback, qp, sizeof(*rollback));
/* can be uninitialized on the first transaction */
if (rollback->base != NULL) {
INSIST(QPBASE_VALID(rollback->base));
INSIST(qp->usage != NULL && qp->chunk_max > 0);
/* paired with either _commit() or _rollback() */
isc_refcount_increment(&rollback->base->refcount);
size_t usage_bytes = sizeof(qp->usage[0]) * qp->chunk_max;
@ -1187,12 +1177,8 @@ dns_qpmulti_commit(dns_qpmulti_t *multi, dns_qp_t **qptp) {
recycle(qp);
}
/* the reclamation phase must be sampled after the commit */
isc_qsbr_phase_t phase = isc_qsbr_phase(multi->loopmgr);
if (defer_chunk_reclamation(qp, phase)) {
ISC_ASTACK_ADD(qsbr_work, multi, cleanup);
isc_qsbr_activate(multi->loopmgr, phase);
}
/* schedule the rest for later */
reclaim_chunks(multi);
*qptp = NULL;
UNLOCK(&multi->mutex);
@ -1282,31 +1268,11 @@ dns_qpmulti_query(dns_qpmulti_t *multi, dns_qpread_t *qp) {
REQUIRE(QPMULTI_VALID(multi));
REQUIRE(qp != NULL);
/* we MUST be in an isc_loop thread */
qp->tid = isc_tid();
REQUIRE(qp->tid != ISC_TID_UNKNOWN);
dns_qpmulti_t *whence = reader_open(multi, qp);
INSIST(whence == multi);
}
/*
* a locked read takes the mutex
*/
void
dns_qpmulti_lockedread(dns_qpmulti_t *multi, dns_qpread_t *qp) {
REQUIRE(QPMULTI_VALID(multi));
REQUIRE(qp != NULL);
/* we MUST NOT be in an isc_loop thread */
qp->tid = isc_tid();
REQUIRE(qp->tid == ISC_TID_UNKNOWN);
LOCK(&multi->mutex);
dns_qpmulti_t *whence = reader_open(multi, qp);
INSIST(whence == multi);
rcu_read_lock();
}
void
@ -1314,10 +1280,8 @@ dns_qpread_destroy(dns_qpmulti_t *multi, dns_qpread_t *qp) {
REQUIRE(QPMULTI_VALID(multi));
REQUIRE(QP_VALID(qp));
REQUIRE(qp->tid == isc_tid());
if (qp->tid == ISC_TID_UNKNOWN) {
UNLOCK(&multi->mutex);
}
*qp = (dns_qpread_t){};
rcu_read_unlock();
}
/*
@ -1406,8 +1370,7 @@ dns_qp_create(isc_mem_t *mctx, const dns_qpmethods_t *methods, void *uctx,
}
void
dns_qpmulti_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr,
const dns_qpmethods_t *methods, void *uctx,
dns_qpmulti_create(isc_mem_t *mctx, const dns_qpmethods_t *methods, void *uctx,
dns_qpmulti_t **qpmp) {
REQUIRE(qpmp != NULL && *qpmp == NULL);
@ -1415,8 +1378,6 @@ dns_qpmulti_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr,
*multi = (dns_qpmulti_t){
.magic = QPMULTI_MAGIC,
.reader_ref = INVALID_REF,
.loopmgr = loopmgr,
.cleanup = ISC_SLINK_INITIALIZER,
};
isc_mutex_init(&multi->mutex);
ISC_LIST_INIT(multi->snapshots);
@ -1468,29 +1429,48 @@ dns_qp_destroy(dns_qp_t **qptp) {
isc_mem_putanddetach(&qp->mctx, qp, sizeof(*qp));
}
static void
qpmulti_destroy_cb(struct rcu_head *arg) {
qp_rcuctx_t *rcuctx = isc_urcu_container(arg, qp_rcuctx_t, rcu_head);
REQUIRE(QPRCU_VALID(rcuctx));
dns_qpmulti_t *multi = rcuctx->multi;
REQUIRE(QPMULTI_VALID(multi));
dns_qp_t *qp = &multi->writer;
REQUIRE(QP_VALID(qp));
REQUIRE(rcuctx->count == 0);
destroy_guts(qp);
isc_mutex_destroy(&multi->mutex);
isc_mem_putanddetach(&rcuctx->mctx, rcuctx,
STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count));
isc_mem_putanddetach(&qp->mctx, multi, sizeof(*multi));
}
void
dns_qpmulti_destroy(dns_qpmulti_t **qpmp) {
dns_qp_t *qp = NULL;
dns_qpmulti_t *multi = NULL;
qp_rcuctx_t *rcuctx = NULL;
REQUIRE(qpmp != NULL);
REQUIRE(QPMULTI_VALID(*qpmp));
dns_qpmulti_t *multi = *qpmp;
dns_qp_t *qp = &multi->writer;
multi = *qpmp;
qp = &multi->writer;
*qpmp = NULL;
REQUIRE(QP_VALID(qp));
REQUIRE(multi->rollback == NULL);
REQUIRE(ISC_LIST_EMPTY(multi->snapshots));
LOCK(&multi->mutex);
if (ISC_SLINK_LINKED(multi, cleanup)) {
qp->destroy = true;
UNLOCK(&multi->mutex);
} else {
destroy_guts(qp);
UNLOCK(&multi->mutex);
isc_mutex_destroy(&multi->mutex);
isc_mem_putanddetach(&qp->mctx, multi, sizeof(*multi));
}
rcuctx = isc_mem_get(qp->mctx, STRUCT_FLEX_SIZE(rcuctx, chunk, 0));
*rcuctx = (qp_rcuctx_t){
.magic = QPRCU_MAGIC,
.multi = multi,
};
isc_mem_attach(qp->mctx, &rcuctx->mctx);
isc_urcu_cleanup(rcuctx, rcu_head, qpmulti_destroy_cb);
}
/***********************************************************************

View file

@ -301,11 +301,9 @@ ref_cell(qp_ref_t ref) {
* transactions, but it must become immutable when an update
* transaction is opened.)
*
* When a chunk becomes empty (wrt the latest version of the trie), we
* note the QSBR phase after which no old versions of the trie will
* need the chunk and it will be safe to free(). There are a few flags
* used to mark which chunks are still needed by snapshots after the
* chunks have passed their normal reclamation phase.
* There are a few flags used to mark which chunks are still needed by
* snapshots after the chunks have passed their normal reclamation
* phase.
*/
typedef struct qp_usage {
/*% the allocation point, increases monotonically */
@ -316,14 +314,14 @@ typedef struct qp_usage {
bool exists : 1;
/*% is this chunk shared? [MT] */
bool immutable : 1;
/*% already subtracted from multi->*_count [MT] */
bool discounted : 1;
/*% is a snapshot using this chunk? [MT] */
bool snapshot : 1;
/*% tried to free it but a snapshot needs it [MT] */
bool snapfree : 1;
/*% for mark/sweep snapshot flag updates [MT] */
bool snapmark : 1;
/*% in which phase did this chunk become unused? [MT] */
isc_qsbr_phase_t phase : ISC_QSBR_PHASE_BITS;
} qp_usage_t;
/*
@ -335,7 +333,7 @@ typedef struct qp_usage {
* A `dns_qpbase_t` counts references from `dns_qp_t` objects and
* from packed readers, but not from `dns_qpread_t` nor from
* `dns_qpsnap_t` objects. Refcount adjustments for `dns_qpread_t`
* would wreck multicore scalability; instead we rely on QSBR.
* would wreck multicore scalability; instead we rely on RCU.
*
* The `usage` array determines when a chunk is no longer needed: old
* chunk pointers in old `base` arrays are ignored. (They can become
@ -349,10 +347,25 @@ typedef struct qp_usage {
* busy slots that are in use by readers.
*/
struct dns_qpbase {
unsigned int magic;
isc_refcount_t refcount;
qp_node_t *ptr[];
};
/*
* Chunks that may be in use by readers are reclaimed asynchronously.
* When a transaction commits, immutable chunks that are now empty are
* listed in a `qp_rcuctx_t` structure and passed to `call_rcu()`.
*/
typedef struct qp_rcuctx {
unsigned int magic;
struct rcu_head rcu_head;
isc_mem_t *mctx;
dns_qpmulti_t *multi;
qp_chunk_t count;
qp_chunk_t chunk[];
} qp_rcuctx_t;
/*
* Returns true when the base array can be free()d.
*/
@ -382,10 +395,14 @@ ref_ptr(dns_qpreadable_t qpr, qp_ref_t ref) {
#define QPITER_MAGIC ISC_MAGIC('q', 'p', 'i', 't')
#define QPMULTI_MAGIC ISC_MAGIC('q', 'p', 'm', 'v')
#define QPREADER_MAGIC ISC_MAGIC('q', 'p', 'r', 'x')
#define QPBASE_MAGIC ISC_MAGIC('q', 'p', 'b', 'p')
#define QPRCU_MAGIC ISC_MAGIC('q', 'p', 'c', 'b')
#define QP_VALID(p) ISC_MAGIC_VALID(p, QP_MAGIC)
#define QPITER_VALID(p) ISC_MAGIC_VALID(p, QPITER_MAGIC)
#define QPMULTI_VALID(p) ISC_MAGIC_VALID(p, QPMULTI_MAGIC)
#define QP_VALID(qp) ISC_MAGIC_VALID(qp, QP_MAGIC)
#define QPITER_VALID(qp) ISC_MAGIC_VALID(qp, QPITER_MAGIC)
#define QPMULTI_VALID(qp) ISC_MAGIC_VALID(qp, QPMULTI_MAGIC)
#define QPBASE_VALID(qp) ISC_MAGIC_VALID(qp, QPBASE_MAGIC)
#define QPRCU_VALID(qp) ISC_MAGIC_VALID(qp, QPRCU_MAGIC)
/*
* Polymorphic initialization of the `dns_qpreader_t` prefix.
@ -504,8 +521,6 @@ struct dns_qp {
enum { QP_NONE, QP_WRITE, QP_UPDATE } transaction_mode : 2;
/*% compact the entire trie [MT] */
bool compact_all : 1;
/*% destroy the trie asynchronously [MT] */
bool destroy : 1;
/*% optionally when compiled with fuzzing support [MT] */
bool write_protect : 1;
};
@ -517,9 +532,6 @@ struct dns_qp {
* of the trie. See the "packed reader nodes" section below for a
* description of what it points to.
*
* We need access to the loopmgr to hook into QSBR safe memory reclamation.
* It is constant after initialization.
*
* The main object under the protection of the mutex is the `writer`
* containing all the allocator state. There can be a backup copy when
* we want to be able to rollback an update transaction.
@ -537,8 +549,6 @@ struct dns_qp {
*/
struct dns_qpmulti {
uint32_t magic;
/*% safe memory reclamation context (const) */
isc_loopmgr_t *loopmgr;
/*% pointer to current packed reader */
atomic_ptr(qp_node_t) reader;
/*% the mutex protects the rest of this structure */
@ -551,8 +561,6 @@ struct dns_qpmulti {
dns_qp_t *rollback;
/*% all snapshots of this trie */
ISC_LIST(dns_qpsnap_t) snapshots;
/*% safe memory reclamation work list */
ISC_SLINK(dns_qpmulti_t) cleanup;
};
/***********************************************************************
@ -878,13 +886,15 @@ static inline dns_qpmulti_t *
unpack_reader(dns_qpreader_t *qp, qp_node_t *reader) {
INSIST(reader_valid(reader));
dns_qpmulti_t *multi = node_pointer(&reader[0]);
dns_qpbase_t *base = node_pointer(&reader[1]);
INSIST(QPMULTI_VALID(multi));
INSIST(QPBASE_VALID(base));
*qp = (dns_qpreader_t){
.magic = QP_MAGIC,
.uctx = multi->writer.uctx,
.methods = multi->writer.methods,
.root_ref = node32(&reader[1]),
.base = node_pointer(&reader[1]),
.base = base,
};
return (multi);
}

View file

@ -9583,8 +9583,8 @@ new_gluelist(isc_mem_t *mctx, dns_name_t *name) {
static void
free_gluelist_rcu(struct rcu_head *rcu_head) {
rbtdb_glue_t *glue = caa_container_of(rcu_head, rbtdb_glue_t, rcu_head);
__tsan_acquire(glue);
rbtdb_glue_t *glue = isc_urcu_container(rcu_head, rbtdb_glue_t,
rcu_head);
free_gluelist(glue);
}
@ -9600,8 +9600,7 @@ free_gluetable(rbtdb_version_t *rbtversion) {
caa_container_of(node, rdatasetheader_t, wfs_node);
rbtdb_glue_t *glue = rcu_xchg_pointer(&header->glue_list, NULL);
__tsan_release(glue);
call_rcu(&glue->rcu_head, free_gluelist_rcu);
isc_urcu_cleanup(glue, rcu_head, free_gluelist_rcu);
}
rcu_read_unlock();
}

View file

@ -79,8 +79,7 @@
#define DEFAULT_EDNS_BUFSIZE 1232
isc_result_t
dns_view_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr,
dns_rdataclass_t rdclass, const char *name,
dns_view_create(isc_mem_t *mctx, dns_rdataclass_t rdclass, const char *name,
dns_view_t **viewp) {
dns_view_t *view = NULL;
isc_result_t result;
@ -135,7 +134,7 @@ dns_view_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr,
isc_rwlock_init(&view->sfd_lock);
view->zonetable = NULL;
dns_zt_create(mctx, loopmgr, view, &view->zonetable);
dns_zt_create(mctx, view, &view->zonetable);
result = dns_fwdtable_create(mctx, &view->fwdtable);
if (result != ISC_R_SUCCESS) {

View file

@ -10619,8 +10619,6 @@ failure:
cleanup:
dns_db_detach(&kfetch->db);
/* The zone must be managed */
INSIST(kfetch->zone->loop != NULL);
isc_refcount_decrement(&zone->irefs);
if (dns_rdataset_isassociated(keydataset)) {
@ -14658,12 +14656,15 @@ zone_timer_set(dns_zone_t *zone, isc_time_t *next, isc_time_t *now) {
isc_time_subtract(next, now, &interval);
}
if (zone->timer == NULL) {
if (zone->loop == NULL) {
zone_debuglog(zone, __func__, 10, "zone is not managed");
} else if (zone->timer == NULL) {
isc_refcount_increment0(&zone->irefs);
isc_timer_create(zone->loop, zone_timer, zone, &zone->timer);
}
isc_timer_start(zone->timer, isc_timertype_once, &interval);
if (zone->timer != NULL) {
isc_timer_start(zone->timer, isc_timertype_once, &interval);
}
}
static void
@ -18169,7 +18170,8 @@ dns_zonemgr_managezone(dns_zonemgr_t *zmgr, dns_zone_t *zone) {
REQUIRE(zone->timer == NULL);
REQUIRE(zone->zmgr == NULL);
zone->loop = isc_loop_get(zmgr->loopmgr, zone->tid);
isc_loop_t *loop = isc_loop_get(zmgr->loopmgr, zone->tid);
isc_loop_attach(loop, &zone->loop);
zonemgr_keymgmt_add(zmgr, zone, &zone->kfio);
INSIST(zone->kfio != NULL);
@ -18200,6 +18202,13 @@ dns_zonemgr_releasezone(dns_zonemgr_t *zmgr, dns_zone_t *zone) {
ENSURE(zone->kfio == NULL);
}
if (zone->timer != NULL) {
isc_refcount_decrement(&zone->irefs);
isc_timer_destroy(&zone->timer);
}
isc_loop_detach(&zone->loop);
/* Detach below, outside of the write lock. */
zone->zmgr = NULL;
@ -20658,8 +20667,6 @@ done:
}
cleanup:
/* The zone must be managed */
INSIST(nsfetch->zone->loop != NULL);
isc_refcount_decrement(&zone->irefs);
if (dns_rdataset_isassociated(nsrrset)) {
@ -21922,7 +21929,7 @@ dns_zone_link(dns_zone_t *zone, dns_zone_t *raw) {
LOCK_ZONE(zone);
LOCK_ZONE(raw);
raw->loop = zone->loop;
isc_loop_attach(zone->loop, &raw->loop);
/* dns_zone_attach(raw, &zone->raw); */
isc_refcount_increment(&raw->references);

View file

@ -94,15 +94,14 @@ static dns_qpmethods_t ztqpmethods = {
};
void
dns_zt_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr, dns_view_t *view,
dns_zt_t **ztp) {
dns_zt_create(isc_mem_t *mctx, dns_view_t *view, dns_zt_t **ztp) {
dns_qpmulti_t *multi = NULL;
dns_zt_t *zt = NULL;
REQUIRE(ztp != NULL && *ztp == NULL);
REQUIRE(view != NULL);
dns_qpmulti_create(mctx, loopmgr, &ztqpmethods, view, &multi);
dns_qpmulti_create(mctx, &ztqpmethods, view, &multi);
zt = isc_mem_get(mctx, sizeof(*zt));
*zt = (dns_zt_t){
@ -177,11 +176,8 @@ dns_zt_find(dns_zt_t *zt, const dns_name_t *name, dns_ztfind_t options,
REQUIRE(VALID_ZT(zt));
REQUIRE(exactopts != exactmask);
if (isc_tid() == ISC_TID_UNKNOWN) {
dns_qpmulti_lockedread(zt->multi, &qpr);
} else {
dns_qpmulti_query(zt->multi, &qpr);
}
dns_qpmulti_query(zt->multi, &qpr);
if (exactopts == DNS_ZTFIND_EXACT) {
result = dns_qp_getname(&qpr, name, &pval, &ival);
} else if (exactopts == DNS_ZTFIND_NOEXACT) {

View file

@ -66,7 +66,6 @@ libisc_la_HEADERS = \
include/isc/pause.h \
include/isc/portset.h \
include/isc/quota.h \
include/isc/qsbr.h \
include/isc/radix.h \
include/isc/random.h \
include/isc/ratelimiter.h \
@ -81,7 +80,6 @@ libisc_la_HEADERS = \
include/isc/siphash.h \
include/isc/sockaddr.h \
include/isc/spinlock.h \
include/isc/stack.h \
include/isc/stats.h \
include/isc/stdio.h \
include/isc/stdtime.h \
@ -174,7 +172,6 @@ libisc_la_SOURCES = \
picohttpparser.h \
portset.c \
quota.c \
qsbr.c \
radix.c \
random.c \
ratelimiter.c \

View file

@ -107,8 +107,7 @@ isc__async_cb(uv_async_t *handle) {
*/
struct cds_wfcq_node *node, *next;
__cds_wfcq_for_each_blocking_safe(&jobs.head, &jobs.tail, node, next) {
isc_job_t *job = caa_container_of(node, isc_job_t, wfcq_node);
__tsan_acquire(job);
isc_job_t *job = isc_urcu_container(node, isc_job_t, wfcq_node);
job->cb(job->cbarg);

View file

@ -27,13 +27,6 @@
#include <isc/mem.h>
#include <isc/tid.h>
/*
* XXXFANF to be added to <isc/util.h> by a commmit in a qp-trie
* feature branch
*/
#define STRUCT_FLEX_SIZE(pointer, member, count) \
(sizeof(*(pointer)) + sizeof(*(pointer)->member) * (count))
/*
* XXXFANF this should probably be in <isc/util.h> too
*/

View file

@ -287,7 +287,7 @@ isc__ht_add(isc_ht_t *ht, const unsigned char *key, const uint32_t keysize,
hash = hash_32(hashval, ht->hashbits[idx]);
node = isc_mem_get(ht->mctx, sizeof(*node) + keysize);
node = isc_mem_get(ht->mctx, STRUCT_FLEX_SIZE(node, key, keysize));
*node = (isc_ht_node_t){
.keysize = keysize,
.hashval = hashval,
@ -396,7 +396,7 @@ isc__ht_delete(isc_ht_t *ht, const unsigned char *key, const uint32_t keysize,
prev->next = node->next;
}
isc_mem_put(ht->mctx, node,
sizeof(*node) + node->keysize);
STRUCT_FLEX_SIZE(node, key, node->keysize));
ht->count--;
return (ISC_R_SUCCESS);

View file

@ -69,17 +69,6 @@ isc_loopmgr_run(isc_loopmgr_t *loopmgr);
*\li 'loopmgr' is a valid loop manager.
*/
void
isc_loopmgr_wakeup(isc_loopmgr_t *loopmgr);
/*%<
* Send no-op events to wake up all running loops in 'loopmgr' except
* the current one. (See <isc/qsbr.h>.)
*
* Requires:
*\li 'loopmgr' is a valid loop manager.
*\li We are in a running loop.
*/
void
isc_loopmgr_pause(isc_loopmgr_t *loopmgr);
/*%<

View file

@ -1,282 +0,0 @@
/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* SPDX-License-Identifier: MPL-2.0
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
#pragma once
#include <isc/atomic.h>
#include <isc/stack.h>
#include <isc/types.h>
#include <isc/uv.h>
/*
* Quiescent state based reclamation
* =================================
*
* QSBR is a safe memory reclamation algorithm for lock-free data
* structures such as a qp-trie.
*
* When an object is unlinked from a lock-free data structure, it
* cannot be free()d immediately, because there can still be readers
* accessing the object via an old version of the data structure. SMR
* algorithms determine when it is safe to reclaim memory after it has
* been unlinked.
*
* With QSBR, reading a data structure is wait-free. All that is
* required is an atomic load to get the data structure's current
* root; there is no need to explicitly mark any read-side critical
* section.
*
* QSBR is used by RCU (read-copy-update) in the Linux kernel. BIND's
* implementation also uses some ideas from EBR (epoch-based reclamation).
* The following summary is based on the overview in the paper
* "performance of memory reclamation for lockless synchronization",
* (http://csng.cs.toronto.edu/publication_files/0000/0159/jpdc07.pdf).
*
* Aside: This QSBR implementation is somewhat different from the one
* in liburcu, described in the paper "user-level implementations of
* read-copy update", (https://www.efficios.com/publications/), which
* contains the amusing comment:
*
* BIND, a major domain-name server used for Internet domain-name
* resolution, is facing scalability issues. Since domain names
* are read often but rarely updated, using user-level RCU might
* be beneficial.
*
* A "quiescent state" is a point when a thread is not accessing any
* lock-free data structure. After passing through a quiescent state,
* a thread can no longer access versions of a data structure that
* were replaced before that point. In BIND, we use a point in the
* event loop (a uv_prepare_t callback) to identify a quiescent state.
*
* Aside: a prepare handle runs its callbacks before the loop sleeps,
* which reduces reclaim latency (unlike a check handle) and it does
* not affect timeout calculations (unlike an idle handle).
*
* A "grace period" is any time interval such that after the end of
* the grace period, all objects removed before the start of the grace
* period can safely be reclaimed. Different SMR algorithms detect
* grace periods with varying degrees of tightness or looseness.
*
* QSBR uses quiescent states to detect grace periods: a grace period
* is a time interval in which every thread passes through a quiescent
* state. (This is a safe over-estimate.) A "fuzzy barrier" is used to
* find out when all threads have passed through a quiescent state.
*
* NOTE: In BIND this means that code which is not running in an event
* loop thread (such as an isc_work / uv_work_t callback) must use
* locking (not lock-free) data structure accessors.
*
* Because a quiescent state happens once per event loop, a grace
* period takes roughly the same amount of time as the slowest event
* loop in each cycle.
*
* Similar to the paper linked above, this QSBR implementation uses a
* variant of the EBR fuzzy barrier. Like EBR, each grace period is
* numbered with a "phase", which cycles round 1,2,3,1,2,3,... (Phases
* are called epochs in EBR, but I think "phase" is a better metaphor.)
* When entering the fuzzy barrier, each thread updates its local phase
* to match the global phase, keeping a global count of the number of
* threads still to pass. When this count reaches zero, it is the end of
* the grace period; the global phase is updated and reclamation is
* triggered.
*
* Note that threads are usually slightly out-of-phase wrt the global
* grace period. At any particular point in time, there will be some
* threads in the current global phase, and some in the previous
* global phase. EBR has three phases because that is the minimum
* number that leaves one phase unoccupied by readers. Any objects that
* were detached from the data structure in the third phase can be
* reclaimed after the start of the current phase, because a grace
* period (the previous phase) has elapsed since the objects were
* detached.
*
* A phase number can be used by a lock-free data structure (such as a
* qp-trie) to record when an object was detached. QSBR calls the data
* structure's reclaimer function, passing a phase number indicating
* that objects detached in that phase can now be reclaimed
*
* In general, there will be several (maybe many) write operations
* during a grace period. The lock-free data structures that use QSBR
* will collect their reclamation work from all these writes into a
* batch per phase, i.e. per grace period.
*
* There is some example code in `doc/dev/qsbr.md`, with pointers to
* less terse introductions to QSBR and other overview material.
*/
#define ISC_QSBR_PHASE_BITS 2
typedef unsigned int isc_qsbr_phase_t;
/*%<
* A grace period phase number. It can be stored in a bitfield of size
* ISC_QSBR_PHASE_BITS. You can use zero to indicate "no phase".
* (Don't assume the maximum is three: We might want to increase the
* number of phases so that there is more than one unoccupied phase.
* This would allow concurrent reclamation of objects released in
* multiple unoccupied phases.)
*/
typedef void
isc_qsbreclaimer_t(isc_qsbr_phase_t phase);
/*%<
* The type of memory reclaimer callback functions.
*
* The `phase` identifies which objects are to be reclaimed.
*
* An isc_qsbreclaimer_t can call isc_qsbr_activate() if it could not
* reclaim everything and needs to be called again.
*/
typedef struct isc_qsbr_registered {
ISC_SLINK(struct isc_qsbr_registered) link;
isc_qsbreclaimer_t *func;
} isc_qsbr_registered_t;
/*%<
* Each reclaimer callback has a static `isc_qsbr_registered_t` object
* so that QSBR can find it.
*/
void
isc__qsbr_register(isc_qsbr_registered_t *reg);
/*%<
* Requires:
* \li reclaimer->link is not linked
* \li reclaimer->func is not NULL
*/
#define isc_qsbr_register(cb) \
do { \
static isc_qsbr_registered_t registration = { \
.link = ISC_SLINK_INITIALIZER, \
.func = cb, \
}; \
isc__qsbr_register(&registration); \
} while (0)
/*%<
* Register a callback function with QSBR. This macro should be used
* inside an `ISC_CONSTRUCTOR` function. There should be one callback
* for eack lock-free data structure implementation, which is able to
* reclaim all the unused memory across all instances of its data
* structure.
*/
isc_qsbr_phase_t
isc_qsbr_phase(isc_loopmgr_t *loopmgr);
/*%<
* Get the current phase, to use for marking detached objects.
*
* To commit a write that requires cleanup, the ordering must be:
*
* - Use atomic_store_release() to commit the data structure's new
* root pointer; release ordering ensures that the interior changes
* are written before the root pointer.
*
* - Call isc_qsbr_phase() to get the phase to be used for marking
* objects to reclaim. This must happen after the commit, to ensure
* there is at least one grace period between commit and cleanup.
*
* - Pass the same phase to isc_qsbr_activate() so that the reclaimer
* will be called after a grace period has passed.
*/
void
isc_qsbr_activate(isc_loopmgr_t *loopmgr, isc_qsbr_phase_t phase);
/*%<
* Tell QSBR that objects have been detached and will need reclaiming
* after a grace period.
*/
/***********************************************************************
*
* private parts
*/
/*
* Accessors and constructors for the `grace` variable.
* It contains two bit fields:
*
* - the global phase in the lower ISC_QSBR_PHASE_BITS
*
* - a thread counter in the upper bits
*/
#define ISC_QSBR_ONE_THREAD (1 << ISC_QSBR_PHASE_BITS)
#define ISC_QSBR_PHASE_MAX (ISC_QSBR_ONE_THREAD - 1)
#define ISC_QSBR_GRACE_PHASE(grace) (grace & ISC_QSBR_PHASE_MAX)
#define ISC_QSBR_GRACE_THREADS(grace) (grace >> ISC_QSBR_PHASE_BITS)
#define ISC_QSBR_GRACE(threads, phase) \
((threads << ISC_QSBR_PHASE_BITS) | phase)
typedef struct isc_qsbr {
/*
* The `grace` variable keeps track of the current grace period.
* When the phase changes, the thread counter is set to the number of
* threads that need to observe the new phase before the grace period
* can end.
*
* The thread counter is an add-on to the usual EBR fuzzy barrier.
* Counting threads through the barrier adds multi-thread update
* contention, and in EBR the fuzzy barrier runs frequently enough
* (on every access) that it's important to minimize its cost. With
* QSBR, the fuzzy barrier runs less frequently (roughly, per loop,
* instead of per-callback) so contention is less of a concern. The
* thread counter helps to reduce reclaim latency, because unlike EBR
* we don't probabilistically check, we know deterministically when
* all threads have changed phase.
*/
atomic_uint_fast32_t grace;
/*
* A flag for each phase indicating that there will be work to
* do, so we don't invoke the reclaim machinery unnecessarily.
* Set by `isc_qsbr_activate()` and cleared before the reclaimer
* functions are invoked (so they can re-set their flag if
* necessary).
*/
atomic_uint_fast32_t activated;
/*
* The time of the last phase transition (isc_nanosecs_t). Used
* to ensure that grace periods do not last forever. We use
* `isc_time_monotonic()` because we need the same time in all
* threads. (`uv_now()` is different in different threads.)
*/
atomic_uint_fast64_t transition_time;
} isc_qsbr_t;
/*
* When we start there is no worker thread yet, so the thread
* count is equal to the number of loops. The global phase starts
* off at one (it must always be nonzero).
*/
#define ISC_QSBR_INITIALIZER(nloops) \
(isc_qsbr_t) { \
.grace = ISC_QSBR_GRACE(nloops, 1), \
.transition_time = isc_time_monotonic(), \
}
/*
* For use by tests that need to explicitly drive QSBR phase transitions.
*/
void
isc__qsbr_quiescent_state(isc_loop_t *loop);
/*
* Used by the loopmgr
*/
void
isc__qsbr_quiescent_cb(uv_prepare_t *handle);
void
isc__qsbr_destroy(isc_loopmgr_t *loopmgr);

View file

@ -1,189 +0,0 @@
/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* SPDX-License-Identifier: MPL-2.0
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
#pragma once
#include <isc/assertions.h>
#include <isc/atomic.h>
#include <isc/list.h>
#include <isc/pause.h>
/*
* Single links, aka stack links
*/
#define ISC_SLINK(type) \
struct { \
type *next; \
}
#define ISC_SLINK_INITIALIZER_TYPE(type) \
{ \
.next = ISC_LINK_TOMBSTONE(type), \
}
#define ISC_SLINK_INIT_TYPE(elt, link, type) \
{ \
(elt)->link.next = ISC_LINK_TOMBSTONE(type); \
}
#define ISC_SLINK_INITIALIZER ISC_SLINK_INITIALIZER_TYPE(void)
#define ISC_SLINK_INIT(elt, link) ISC_SLINK_INIT_TYPE(elt, link, void)
#define ISC_SLINK_LINKED_TYPE(elt, link, type) \
((type *)((elt)->link.next) != ISC_LINK_TOMBSTONE(type))
#define ISC_SLINK_LINKED(elt, link) ISC_SLINK_LINKED_TYPE(elt, link, void)
#define ISC_SLINK_NEXT(elt, link) ((elt)->link.next)
/*
* Simple singly-linked stack implementation
*/
#define ISC_STACK(type) \
struct { \
type *top; \
}
#define ISC_STACK_INITIALIZER \
{ \
.top = NULL, \
}
#define ISC_STACK_INIT(stack) \
{ \
(stack).top = NULL; \
}
#define ISC_STACK_TOP(stack) ((stack).top)
#define ISC_STACK_EMPTY(stack) ((stack).top == NULL)
#define ISC_STACK_PUSHUNSAFE(stack, elt, link) \
{ \
(elt)->link.next = (stack).top; \
(stack).top = (elt); \
}
#define ISC_STACK_PUSH(stack, elt, link) \
{ \
INSIST(!ISC_SLINK_LINKED(elt, link)); \
ISC_STACK_PUSHUNSAFE(stack, elt, link); \
}
/*
* This is slightly round about because when `type` is `void`
* we can't directly access `__top->link.next`.
*/
#define ISC_STACK_POP_TYPE(stack, link, type) \
({ \
type *__top = (stack).top; \
if (__top != NULL) { \
type *__next = ISC_SLINK_NEXT((stack).top, link); \
ISC_SLINK_INIT_TYPE((stack).top, link, type); \
(stack).top = __next; \
} \
__top; \
})
#define ISC_STACK_POP(stack, link) ISC_STACK_POP_TYPE(stack, link, void)
/*
* Helper to add element if not already on stack.
*/
#define ISC_STACK_ADD(stack, elt, link) \
{ \
if (!ISC_STACK_LINKED(elt, link)) { \
ISC_STACK_PUSHUNSAFE(stack, elt, link); \
} \
}
/*
* This is a simplified implementation of the Treiber stack algorithm with
* back-off.
*
* The original paper which describes the algorithm can be found here:
* https://dominoweb.draco.res.ibm.com/58319a2ed2b1078985257003004617ef.html
*
* We sidestep the ABA problem when removing the elements by adding additional
* constraint: once the element has been removed from the stack, it cannot be
* re-added.
*
* This is actually not a problem for planned usage of the ISC_ASTACK where we
* only use ISC_ASTACK_PUSH() to add individual elements and ISC_ASTACK_DRAIN()
* to empty the stack in a single atomic operation.
*
* To make things simple, there's no implementation for ISC_ASTACK_POP() because
* that's complex to implement and requires double-word CAS (for ABA counter).
*
* The pointers are wrapped in structs so that their types are
* distinct, and to match the ISC_LIST macros.
*
* See doc/dev/dev.md for examples.
*/
#define ISC_ASTACK(type) \
struct { \
atomic_ptr(type) atop; \
}
#define ISC_ASTACK_INITIALIZER \
{ \
.atop = NULL, \
}
#define ISC_ASTACK_INIT(stack) \
{ \
(stack).atop = NULL; \
}
/*
* ATOMIC: for performance, this kind of retry loop should use a weak
* CAS with relaxed ordering in the failure case; on success, release
* ordering ensures that writing the element contents happens before
* reading them, following the acquire in ISC_ASTACK_TOP() and _DRAIN().
*/
#define ISC_ASTACK_PUSHUNSAFE(stack, elt, link) \
{ \
(elt)->link.next = atomic_load_relaxed(&(stack).atop); \
while (!atomic_compare_exchange_weak_explicit( \
&(stack).atop, &ISC_SLINK_NEXT(elt, link), elt, \
memory_order_release, memory_order_relaxed)) \
{ \
isc_pause(); \
} \
}
#define ISC_ASTACK_PUSH(stack, elt, link) \
{ \
INSIST(!ISC_SLINK_LINKED(elt, link)); \
ISC_ASTACK_PUSHUNSAFE(stack, elt, link); \
}
/*
* Helper to add element if not already on stack.
*/
#define ISC_ASTACK_ADD(stack, elt, link) \
{ \
if (!ISC_SLINK_LINKED(elt, link)) { \
ISC_ASTACK_PUSHUNSAFE(stack, elt, link); \
} \
}
/*
* ATOMIC: acquire ordering pairs with ISC_ASTACK_PUSHUNSAFEe()
*/
#define ISC_ASTACK_TOP(stack) atomic_load_acquire(&(stack).atop)
#define ISC_ASTACK_EMPTY(stack) (ISC_ASTACK_TOP(stack) == NULL)
/*
* ATOMIC: acquire ordering pairs with ISC_ASTACK_PUSHUNSAFE()
*/
#define ISC_ASTACK_TO_STACK(stack) \
{ \
.top = atomic_exchange_acquire(&(stack).atop, NULL), \
}

View file

@ -39,6 +39,35 @@
#pragma GCC diagnostic pop
/*
* Help thread sanitizer to understand `call_rcu()`:
*
* The `rcu_head` argument to `call_rcu()` is expected to be embedded
* in a structure defined by the caller, which is named `_rcuctx` in
* these macros. The callback function uses `caa_container_of()` to
* recover the `rcuctx` pointer from the `_rcu_head` pointer that is
* passed to the callback.
*
* We explain the ordering dependency to tsan by releasing `_rcuctx`
* pointer before `call_rcu()` and acquiring it in the callback
* funtion. We pass the outer `_rcuctx` pointer to the `__tsan_`
* barriers, because it should match a pointer that is known by tsan
* to have been returned by `malloc()`.
*/
#define isc_urcu_cleanup(ptr, member, func) \
{ \
__tsan_release(ptr); \
call_rcu(&(ptr)->member, func); \
}
#define isc_urcu_container(ptr, type, member) \
({ \
type *_ptr = caa_container_of(ptr, type, member); \
__tsan_acquire(_ptr); \
_ptr; \
})
#if defined(RCU_QSBR)
/*

View file

@ -96,6 +96,14 @@
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
/*%
* Get the allocation size for a struct with a flexible array member
* containing `count` elements. The struct is identified by a pointer,
* typically the one that points to (or will point to) the allocation.
*/
#define STRUCT_FLEX_SIZE(pointer, member, count) \
(sizeof(*(pointer)) + sizeof(*(pointer)->member) * (count))
/*%
* Use this in translation units that would otherwise be empty, to
* suppress compiler warnings.

View file

@ -26,14 +26,12 @@
#include <isc/magic.h>
#include <isc/mem.h>
#include <isc/mutex.h>
#include <isc/qsbr.h>
#include <isc/refcount.h>
#include <isc/result.h>
#include <isc/signal.h>
#include <isc/strerr.h>
#include <isc/thread.h>
#include <isc/tid.h>
#include <isc/time.h>
#include <isc/urcu.h>
#include <isc/util.h>
#include <isc/uv.h>
@ -151,7 +149,6 @@ destroy_cb(uv_async_t *handle) {
uv_close(&loop->run_trigger, isc__job_close);
uv_close(&loop->destroy_trigger, NULL);
uv_close(&loop->pause_trigger, NULL);
uv_close(&loop->wakeup_trigger, NULL);
uv_close(&loop->quiescent, NULL);
uv_walk(&loop->loop, loop_walk_cb, (char *)"destroy_cb");
@ -162,8 +159,6 @@ shutdown_cb(uv_async_t *handle) {
isc_loop_t *loop = uv_handle_get_data(handle);
isc_loopmgr_t *loopmgr = loop->loopmgr;
loop->shuttingdown = true;
/* Make sure, we can't be called again */
uv_close(&loop->shutdown_trigger, shutdown_trigger_close_cb);
@ -185,12 +180,6 @@ shutdown_cb(uv_async_t *handle) {
UV_RUNTIME_CHECK(uv_async_send, r);
}
static void
wakeup_cb(uv_async_t *handle) {
/* we only woke up to make the loop take a spin */
UNUSED(handle);
}
static void
loop_init(isc_loop_t *loop, isc_loopmgr_t *loopmgr, uint32_t tid) {
*loop = (isc_loop_t){
@ -226,9 +215,6 @@ loop_init(isc_loop_t *loop, isc_loopmgr_t *loopmgr, uint32_t tid) {
UV_RUNTIME_CHECK(uv_async_init, r);
uv_handle_set_data(&loop->destroy_trigger, loop);
r = uv_async_init(&loop->loop, &loop->wakeup_trigger, wakeup_cb);
UV_RUNTIME_CHECK(uv_async_init, r);
r = uv_prepare_init(&loop->loop, &loop->quiescent);
UV_RUNTIME_CHECK(uv_prepare_init, r);
uv_handle_set_data(&loop->quiescent, loop);
@ -245,7 +231,7 @@ loop_init(isc_loop_t *loop, isc_loopmgr_t *loopmgr, uint32_t tid) {
static void
quiescent_cb(uv_prepare_t *handle) {
isc__qsbr_quiescent_cb(handle);
UNUSED(handle);
#if defined(RCU_QSBR)
/* safe memory reclamation */
@ -340,7 +326,6 @@ isc_loopmgr_create(isc_mem_t *mctx, uint32_t nloops, isc_loopmgr_t **loopmgrp) {
loopmgr = isc_mem_get(mctx, sizeof(*loopmgr));
*loopmgr = (isc_loopmgr_t){
.nloops = nloops,
.qsbr = ISC_QSBR_INITIALIZER(nloops),
};
isc_mem_attach(mctx, &loopmgr->mctx);
@ -449,6 +434,10 @@ isc_loopmgr_run(isc_loopmgr_t *loopmgr) {
*/
ignore_signal(SIGPIPE, SIG_IGN);
bool free_call_rcu_data = !create_all_cpu_call_rcu_data(0);
rcu_register_thread();
/*
* The thread 0 is this one.
*/
@ -463,21 +452,13 @@ isc_loopmgr_run(isc_loopmgr_t *loopmgr) {
}
isc_thread_main(loop_thread, &loopmgr->loops[0]);
}
void
isc_loopmgr_wakeup(isc_loopmgr_t *loopmgr) {
REQUIRE(VALID_LOOPMGR(loopmgr));
rcu_unregister_thread();
for (size_t i = 0; i < loopmgr->nloops; i++) {
isc_loop_t *loop = &loopmgr->loops[i];
rcu_barrier();
/* Skip current loop */
if (i == isc_tid()) {
continue;
}
uv_async_send(&loop->wakeup_trigger);
if (free_call_rcu_data) {
free_all_cpu_call_rcu_data();
}
}

View file

@ -21,7 +21,6 @@
#include <isc/loop.h>
#include <isc/magic.h>
#include <isc/mem.h>
#include <isc/qsbr.h>
#include <isc/refcount.h>
#include <isc/result.h>
#include <isc/signal.h>
@ -76,9 +75,7 @@ struct isc_loop {
uv_async_t destroy_trigger;
/* safe memory reclamation */
uv_async_t wakeup_trigger;
uv_prepare_t quiescent;
isc_qsbr_phase_t qsbr_phase;
};
/*
@ -113,9 +110,6 @@ struct isc_loopmgr {
/* per-thread objects */
isc_loop_t *loops;
/* safe memory reclamation */
isc_qsbr_t qsbr;
};
/*

View file

@ -32,6 +32,7 @@
#include <isc/strerr.h>
#include <isc/string.h>
#include <isc/types.h>
#include <isc/urcu.h>
#include <isc/util.h>
#ifdef HAVE_LIBXML2
@ -578,6 +579,12 @@ isc__mem_destroy(isc_mem_t **ctxp FLARG) {
ctx = *ctxp;
*ctxp = NULL;
/*
* wait for asynchronous memory reclamation to complete
* before checking for memory leaks
*/
rcu_barrier();
#if ISC_MEM_TRACKLINES
if ((ctx->debugging & ISC_MEM_DEBUGTRACE) != 0) {
fprintf(stderr, "destroy mctx %p file %s line %u\n", ctx, file,

View file

@ -1,393 +0,0 @@
/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* SPDX-License-Identifier: MPL-2.0
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
#include <isc/atomic.h>
#include <isc/log.h>
#include <isc/loop.h>
#include <isc/qsbr.h>
#include <isc/stack.h>
#include <isc/tid.h>
#include <isc/time.h>
#include <isc/types.h>
#include <isc/uv.h>
#include "loop_p.h"
#define MAX_GRACE_PERIOD_NS 53 * NS_PER_MS
#if 0
#define TRACE(fmt, ...) \
isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_OTHER, \
ISC_LOG_DEBUG(7), "%s:%u:%s():t%u: " fmt, __FILE__, \
__LINE__, __func__, isc_tid(), ##__VA_ARGS__)
#else
#define TRACE(...)
#endif
static ISC_STACK(isc_qsbr_registered_t) qsbreclaimers = ISC_STACK_INITIALIZER;
static void
reclaim_cb(void *arg);
static void
reclaimed_cb(void *arg);
/**********************************************************************/
/*
* 3,2,1,3,2,1,...
*/
static isc_qsbr_phase_t
change_phase(isc_qsbr_phase_t phase) {
return (--phase > 0 ? phase : ISC_QSBR_PHASE_MAX);
}
/*
* For marking or checking that a phase has cleanup work to do.
*/
static unsigned int
active_bit(isc_qsbr_phase_t phase) {
return (1 << phase);
}
/*
* Extract the global phase from the grace period state.
*/
static isc_qsbr_phase_t
global_phase(isc_qsbr_t *qsbr, memory_order m_o) {
uint32_t grace = atomic_load_explicit(&qsbr->grace, m_o);
return (ISC_QSBR_GRACE_PHASE(grace));
}
/*
* Record that the current thread has passed the barrier.
* Returns true if more threads still need to pass.
*
* ATOMIC: acquire-release, to ensure that this is not reordered wrt
* read-only accesses to lock-free data structures. This implements the
* ordering requirements of a quiescent state.
*/
static bool
fuzzy_barrier_not_yet(isc_qsbr_t *qsbr) {
uint32_t grace = atomic_fetch_sub_acq_rel(&qsbr->grace,
ISC_QSBR_ONE_THREAD);
uint32_t threads = ISC_QSBR_GRACE_THREADS(grace);
return (threads > 1);
}
/*
* Ungracefully drive all cleanup work to completion.
*
* ATOMIC: everything is relaxed, because we assume that concurrent
* readers have already finished. `reclaim_cb()` uses the `activated`
* flags to ensure it is OK that threads will race to complete the
* cleanup.
*/
static void
qsbr_shutdown(isc_loopmgr_t *loopmgr) {
isc_qsbr_t *qsbr = &loopmgr->qsbr;
isc_qsbr_phase_t phase = global_phase(qsbr, memory_order_relaxed);
uint32_t threads = isc_loopmgr_nloops(loopmgr);
uint32_t grace;
while (atomic_load_relaxed(&qsbr->activated) != 0) {
reclaim_cb(loopmgr);
phase = change_phase(phase);
grace = ISC_QSBR_GRACE(threads, phase);
atomic_store_relaxed(&qsbr->grace, grace);
}
}
/*
* On a quiet server that does not have enough network traffic to keep
* all its threads spinning, grace periods might extend indefinitely.
* So check if we have been waiting an unreasonably long time since
* the last phase change. If so, send a no-op async request to every
* thread to make them all cycle through a quiescent state.
*/
static void
maybe_wakeup(isc_loop_t *loop) {
isc_loopmgr_t *loopmgr = loop->loopmgr;
isc_qsbr_t *qsbr = &loopmgr->qsbr;
/*
* ATOMIC: relaxed is OK here because we don't use any values guarded
* by the `activated` flags.
*/
if (atomic_load_relaxed(&qsbr->activated) == 0) {
return;
}
if (loop->shuttingdown) {
qsbr_shutdown(loopmgr);
return;
}
/*
* ATOMIC: relaxed, because the `transition_time` doesn't guard any
* other values, just the isc_loopmgr_wakeup() call below.
*/
atomic_uint_fast64_t *qsbr_ttp = &qsbr->transition_time;
isc_nanosecs_t now = isc_time_monotonic();
isc_nanosecs_t start = atomic_load_relaxed(qsbr_ttp);
if (now < start + MAX_GRACE_PERIOD_NS) {
return;
}
/*
* To stop other threads from also invoking `isc_loopmgr_wakeup()`,
* we try to push the timer into the future (expecting that it will
* not trigger again), and quit if someone else got there first.
* ATOMIC: relaxed, as before; strong, because there is no retry loop.
*/
if (!atomic_compare_exchange_strong_relaxed(qsbr_ttp, &start, now)) {
return;
}
TRACE("long grace period of %llu ns, waking up other threads",
(unsigned long long)(now - start));
isc_loopmgr_wakeup(loopmgr);
}
/*
* Callers use the fuzzy barrier to ensure only one thread can enter
* this function at a time.
*
* Phase transitions happen at roughly the same frequency that IO
* event loops cycle, limited by the slowest loop in each cycle.
*/
static void
phase_transition(isc_loop_t *loop, isc_qsbr_phase_t current_phase) {
isc_loopmgr_t *loopmgr = loop->loopmgr;
isc_qsbr_t *qsbr = &loopmgr->qsbr;
if (loop->shuttingdown) {
qsbr_shutdown(loopmgr);
return;
}
/*
* After we change phase, threads will be in either the `current_phase`
* or the `next_phase`. We will reclaim memory from the `third_phase`.
*
* ATOMIC: relaxed is OK here because the necessary synchronization
* happens in `reclaim_cb()`.
*/
isc_qsbr_phase_t next_phase = change_phase(current_phase);
isc_qsbr_phase_t third_phase = change_phase(next_phase);
bool activated = atomic_load_relaxed(&qsbr->activated) &
active_bit(third_phase);
/*
* Reset the wakeup timer, and log the length of the grace period.
* ATOMIC: relaxed, per the commentary in `maybe_wakeup()`.
*/
atomic_uint_fast64_t *qsbr_tt = &qsbr->transition_time;
isc_nanosecs_t now = isc_time_monotonic();
isc_nanosecs_t start = atomic_exchange_relaxed(qsbr_tt, now);
TRACE("phase %u -> %u after grace period of %f ms", current_phase,
next_phase, (double)(now - start) / NS_PER_MS);
UNUSED(start); /* ifndef TRACE() */
/*
* Work out the threads counter for this grace period.
*
* We need to add one for any reclamation worker thread, to
* prevent us from changing phase before the work is done. If
* we change too early, any newly detached objects will be
* marked with the same phase as the running reclaimer, which
* might lead to them being free()d too soon.
*/
uint32_t threads = isc_loopmgr_nloops(loopmgr) + (activated ? 1 : 0);
/*
* Start the new grace period.
*
* ATOMIC: release, to pair with the load-acquire in `reclaim_cb()`
* which is spawned in a separate worker thread.
*/
uint32_t grace = ISC_QSBR_GRACE(threads, next_phase);
atomic_store_release(&qsbr->grace, grace);
if (activated) {
isc_work_enqueue(loop, reclaim_cb, reclaimed_cb, loopmgr);
}
}
/*
* This function is called once per cycle of each IO event loop by the
* `uv_prepare` callback below.
*/
void
isc__qsbr_quiescent_state(isc_loop_t *loop) {
isc_loopmgr_t *loopmgr = loop->loopmgr;
isc_qsbr_t *qsbr = &loopmgr->qsbr;
/*
* ATOMIC: relaxed. If we are in phase then we don't need to
* synchronize; if we are not then this thread's presence in
* the thread counter will prevent the phase from changing
* before we get to the fuzzy barrier.
*/
isc_qsbr_phase_t phase = global_phase(qsbr, memory_order_relaxed);
if (loop->qsbr_phase == phase) {
maybe_wakeup(loop);
return;
}
/*
* Enter the current phase and count us out of the previous phase.
*/
loop->qsbr_phase = phase;
if (fuzzy_barrier_not_yet(qsbr)) {
maybe_wakeup(loop);
return;
}
/*
* We were the last thread to enter the current phase so the
* grace period is up. No other thread can reach this point.
*/
phase_transition(loop, phase);
}
void
isc__qsbr_quiescent_cb(uv_prepare_t *handle) {
isc_loop_t *loop = uv_handle_get_data((uv_handle_t *)handle);
isc__qsbr_quiescent_state(loop);
}
static void
reclaimed_cb(void *arg) {
/* we are back on a loop thread */
isc_loopmgr_t *loopmgr = arg;
isc_qsbr_t *qsbr = &loopmgr->qsbr;
isc_loop_t *loop = CURRENT_LOOP(loopmgr);
/*
* Remove the reclaimers from the thread count, so that the
* next grace period can start.
*/
if (fuzzy_barrier_not_yet(qsbr)) {
return;
}
/*
* The reclaimers were the last thread to be counted out: every
* other thread already passed through a quiescent state.
*
* We expect loop->qsbr_phase == global_phase() at this point,
* except during shutdown when the phase shifts rapidly. Also,
* the current loop might not have received the shutdown
* message yet, so it seems easiest to omit the assertion.
*
* ATOMIC: relaxed, the fuzzy barrier already synchronized.
*/
TRACE("reclaimers overran");
phase_transition(loop, global_phase(qsbr, memory_order_relaxed));
}
static void
reclaim_cb(void *arg) {
/* we are on a work thread not a loop thread */
isc_loopmgr_t *loopmgr = arg;
isc_qsbr_t *qsbr = &loopmgr->qsbr;
/*
* The global phase has just been bumped by a `phase_transition()`
* and it cannot change again until the grace period is up, which
* cannot happen until we have finished working.
*
* ATOMIC: acquire, to pair with the release in `phase_transition()`.
*
* The phase we are to clean up is 2 before the current phase,
* which is the same as the one after the current phase (mod 3).
*/
isc_qsbr_phase_t cur_phase = global_phase(qsbr, memory_order_acquire);
isc_qsbr_phase_t third_phase = change_phase(cur_phase);
unsigned int third_bit = active_bit(third_phase);
/*
* If any reclaimers need to be called again later, they can use
* `isc_qsbr_activate()`, so we need to clear the bit first.
*
* ATOMIC: acquire, so that `isc_qsbr_activate()` happens before
* the callbacks are invoked.
*/
uint32_t activated = atomic_fetch_and_explicit(
&qsbr->activated, ~third_bit, memory_order_acquire);
/* this can happen when we are racing to clean up on shutdown */
if ((activated & third_bit) == 0) {
return;
}
isc_qsbr_registered_t *reclaimer = ISC_STACK_TOP(qsbreclaimers);
while (reclaimer != NULL) {
reclaimer->func(third_phase);
reclaimer = ISC_SLINK_NEXT(reclaimer, link);
}
}
void
isc__qsbr_register(isc_qsbr_registered_t *reclaimer) {
REQUIRE(reclaimer->func != NULL);
ISC_STACK_PUSH(qsbreclaimers, reclaimer, link);
}
/*
* ATOMIC: This function needs to ensure that the global phase is read
* after a write has committed. Acquire/release ordering is not sufficient
* for ordering between separate atomics (the data structure's root pointer
* and the global phase), so it must be sequentially consistent.
*
* In general, the phases up to and including the next phase transition
* look like:
*
* 1. local phase
* 2. global phase
* 3. next phase
* 1. third phase
*
* i.e. some threads are still one behind the global phase, on the same
* phase that will be cleaned up immediately after the phase transition.
*
* This function is called just after a write commits. It's likely that
* some threads on the global phase (2) are using a version of the data
* structure from before the write, and they can continue using it while
* the straggler threads (1) catch up and cause a phase transition.
*
* The writer can be one of the straggler threads. If it incorrectly marks
* cleanup work with its local phase (1), memory will be reclaimed
* immediately after the next phase transition (when the third phase is
* also 1), which could be almost immediately when the writer returns to
* the event loop. This will cause a use-after-free for existing readers
* (in phase 2).
*
* More straightforwardly, we need to be able to queue up reclaim work from
* a thread that isn't running a loop, which also means this function has
* to return the global phase.
*/
isc_qsbr_phase_t
isc_qsbr_phase(isc_loopmgr_t *loopmgr) {
isc_qsbr_t *qsbr = &loopmgr->qsbr;
return (global_phase(qsbr, memory_order_seq_cst));
}
void
isc_qsbr_activate(isc_loopmgr_t *loopmgr, isc_qsbr_phase_t phase) {
/*
* ATOMIC: release ordering ensures that writing the cleanup lists
* happens before the callback is invoked from a worker thread.
*/
atomic_fetch_or_release(&loopmgr->qsbr.activated, active_bit(phase));
}

View file

@ -79,8 +79,7 @@ isc_quota_release(isc_quota_t *quota) {
return;
}
isc_job_t *job = caa_container_of(node, isc_job_t, wfcq_node);
__tsan_acquire(job);
isc_job_t *job = isc_urcu_container(node, isc_job_t, wfcq_node);
job->cb(job->cbarg);
}

View file

@ -61,7 +61,7 @@ thread_wrap(isc_threadfunc_t func, void *arg) {
.func = func,
.arg = arg,
};
__tsan_release(wrap);
return (wrap);
}
@ -84,12 +84,8 @@ thread_body(struct thread_wrap *wrap) {
__tsan_acquire(wrap);
free(wrap);
rcu_register_thread();
ret = func(arg);
rcu_unregister_thread();
return (ret);
}
@ -101,10 +97,14 @@ thread_run(void *wrap) {
*/
isc__iterated_hash_initialize();
rcu_register_thread();
void *ret = thread_body(wrap);
isc__iterated_hash_shutdown();
rcu_unregister_thread();
return (ret);
}

View file

@ -18,8 +18,8 @@
#include <isc/hashmap.h>
#include <isc/ht.h>
#include <isc/list.h>
#include <isc/qsbr.h>
#include <isc/rwlock.h>
#include <isc/urcu.h>
#include <isc/util.h>
#include <dns/fixedname.h>

View file

@ -18,6 +18,7 @@
#include <unistd.h>
#include <isc/async.h>
#include <isc/barrier.h>
#include <isc/list.h>
#include <isc/log.h>
#include <isc/loop.h>
@ -25,7 +26,6 @@
#include <isc/mem.h>
#include <isc/mutex.h>
#include <isc/os.h>
#include <isc/qsbr.h>
#include <isc/random.h>
#include <isc/refcount.h>
#include <isc/rwlock.h>
@ -33,6 +33,7 @@
#include <isc/tid.h>
#include <isc/time.h>
#include <isc/timer.h>
#include <isc/urcu.h>
#include <isc/util.h>
#include <isc/uv.h>
@ -380,8 +381,7 @@ load_multi(struct bench_state *bctx) {
size_t count = 0;
uint64_t start;
dns_qpmulti_create(bctx->mctx, bctx->loopmgr, &item_methods, NULL,
&bctx->multi);
dns_qpmulti_create(bctx->mctx, &item_methods, NULL, &bctx->multi);
/* initial contents of the trie */
start = isc_time_monotonic();
@ -884,6 +884,9 @@ int
main(void) {
isc_loopmgr_t *loopmgr = NULL;
isc_mem_t *mctx = NULL;
setlinebuf(stdout);
uint32_t nloops;
const char *env_workers = getenv("ISC_TASK_WORKERS");

View file

@ -22,11 +22,11 @@
#define UNIT_TESTING
#include <cmocka.h>
#include <isc/qsbr.h>
#include <isc/random.h>
#include <isc/refcount.h>
#include <isc/result.h>
#include <isc/string.h>
#include <isc/urcu.h>
#include <isc/util.h>
#include <dns/name.h>

View file

@ -27,10 +27,10 @@
#include <isc/loop.h>
#include <isc/magic.h>
#include <isc/mem.h>
#include <isc/qsbr.h>
#include <isc/random.h>
#include <isc/refcount.h>
#include <isc/rwlock.h>
#include <isc/urcu.h>
#include <isc/util.h>
#include <dns/log.h>
@ -367,14 +367,13 @@ many_transactions(void *arg) {
UNUSED(arg);
dns_qpmulti_t *qpm = NULL;
dns_qpmulti_create(mctx, loopmgr, &test_methods, NULL, &qpm);
dns_qpmulti_create(mctx, &test_methods, NULL, &qpm);
qpm->writer.write_protect = true;
for (size_t n = 0; n < TRANSACTION_COUNT; n++) {
TRACE("transaction %zu", n);
one_transaction(qpm);
isc__qsbr_quiescent_state(isc_loop_current(loopmgr));
isc_loopmgr_wakeup(loopmgr);
rcu_quiescent_state();
}
dns_qpmulti_destroy(&qpm);
@ -387,6 +386,7 @@ ISC_RUN_TEST_IMPL(qpmulti) {
setup_items();
isc_loop_setup(isc_loop_main(loopmgr), many_transactions, NULL);
isc_loopmgr_run(loopmgr);
rcu_barrier();
isc_loopmgr_destroy(&loopmgr);
isc_log_destroy(&dns_lctx);
}

View file

@ -4,6 +4,7 @@ AM_CPPFLAGS += \
$(LIBISC_CFLAGS) \
$(LIBDNS_CFLAGS) \
$(LIBNS_CFLAGS) \
$(LIBURCU_CFLAGS) \
$(LIBUV_CFLAGS) \
-I$(top_srcdir)/lib/isc \
-I$(top_srcdir)/lib/dns

View file

@ -64,7 +64,7 @@ dns_test_makeview(const char *name, bool with_cache, dns_view_t **viewp) {
dns_view_t *view = NULL;
dns_cache_t *cache = NULL;
result = dns_view_create(mctx, loopmgr, dns_rdataclass_in, name, &view);
result = dns_view_create(mctx, dns_rdataclass_in, name, &view);
if (result != ISC_R_SUCCESS) {
return (result);
}

View file

@ -18,9 +18,9 @@
#include <isc/buffer.h>
#include <isc/loop.h>
#include <isc/magic.h>
#include <isc/qsbr.h>
#include <isc/refcount.h>
#include <isc/rwlock.h>
#include <isc/urcu.h>
#include <isc/util.h>
#include <dns/name.h>
@ -255,10 +255,10 @@ qp_test_dumpchunks(dns_qp_t *qp) {
dumpqp(qp, "qp");
for (qp_chunk_t c = 0; c < qp->chunk_max; c++) {
printf("qp %p chunk %u base %p "
"used %u free %u immutable %u phase %u\n",
"used %u free %u immutable %u discounted %u\n",
qp, c, qp->base->ptr[c], qp->usage[c].used,
qp->usage[c].free, qp->usage[c].immutable,
qp->usage[c].phase);
qp->usage[c].discounted);
used += qp->usage[c].used;
free += qp->usage[c].free;
}

View file

@ -1,3 +0,0 @@
# Uninstrumented library.
called_from_lib:libfstrm.so
called_from_lib:libdummyrpz.so