diff --git a/configure.ac b/configure.ac index 1cc2a537c8..255dfe10dd 100644 --- a/configure.ac +++ b/configure.ac @@ -451,6 +451,11 @@ AC_COMPILE_IFELSE( # AX_GCC_FUNC_ATTRIBUTE([returns_nonnull]) +# +# how to link math functions? +# +AC_SEARCH_LIBS([sqrt],[m]) + # # check if we have kqueue # diff --git a/lib/isc/Makefile.am b/lib/isc/Makefile.am index 87ae209b8d..bf934a8899 100644 --- a/lib/isc/Makefile.am +++ b/lib/isc/Makefile.am @@ -36,6 +36,7 @@ libisc_la_HEADERS = \ include/isc/hashmap.h \ include/isc/heap.h \ include/isc/hex.h \ + include/isc/histo.h \ include/isc/hmac.h \ include/isc/ht.h \ include/isc/httpd.h \ @@ -136,6 +137,7 @@ libisc_la_SOURCES = \ hashmap.c \ heap.c \ hex.c \ + histo.c \ hmac.c \ ht.c \ httpd.c \ diff --git a/lib/isc/histo.c b/lib/isc/histo.c new file mode 100644 index 0000000000..6d6fc65ec7 --- /dev/null +++ b/lib/isc/histo.c @@ -0,0 +1,599 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * SPDX-License-Identifier: MPL-2.0 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * XXXFANF to be added to isc/util.h by a commmit in a qp-trie + * feature branch + */ +#define STRUCT_FLEX_SIZE(pointer, member, count) \ + (sizeof(*(pointer)) + sizeof(*(pointer)->member) * (count)) + +#define HISTO_MAGIC ISC_MAGIC('H', 's', 't', 'o') +#define HISTO_VALID(p) ISC_MAGIC_VALID(p, HISTO_MAGIC) + +/* + * Natural logarithms of 2 and 10 for converting precisions between + * binary and decimal significant figures + */ +#define LN_2 0.693147180559945309 +#define LN_10 2.302585092994045684 + +/* + * The chunks array has a static size for simplicity, fixed as the + * number of bits in a value. That means we waste a little extra space + * that could be saved by omitting the exponents that are covered by + * `sigbits`. The following macros calculate (at run time) the exact + * number of buckets when we need to do accurate bounds checks. + * + * For a discussion of the floating point terminology, see the + * commmentary on `value_to_key()` below. + * + * We often use the variable names `c` for chunk and `b` for bucket. + */ +#define CHUNKS 64 + +#define DENORMALS(hg) ((hg)->sigbits - 1) +#define MANTISSAS(hg) (1 << (hg)->sigbits) +#define EXPONENTS(hg) (CHUNKS - DENORMALS(hg)) +#define BUCKETS(hg) (EXPONENTS(hg) * MANTISSAS(hg)) + +#define MAXCHUNK(hg) EXPONENTS(hg) +#define CHUNKSIZE(hg) MANTISSAS(hg) +#define CHUNKBYTES(hg) (CHUNKSIZE(hg) * sizeof(hg_bucket_t)) + +typedef atomic_uint_fast64_t hg_bucket_t; +typedef atomic_ptr(hg_bucket_t) hg_chunk_t; + +#define ISC_HISTO_FIELDS \ + uint magic; \ + uint sigbits; \ + isc_mem_t *mctx + +struct isc_histo { + ISC_HISTO_FIELDS; + /* chunk array must be first after common fields */ + hg_chunk_t chunk[CHUNKS]; +}; + +/* + * To convert between ranks and values, we scan the histogram to find the + * required rank. Each per-chunk total contains the sum of all the buckets + * in that chunk, so we can scan a chunk at a time rather than a bucket at + * a time. + * + * XXXFANF When `sigbits` is large, the chunks get large and slow to scan. + * If this turns out to be a problem, we could store ranks as well as + * values in the summary, and use a binary search. + */ +struct isc_histosummary { + ISC_HISTO_FIELDS; + /* chunk array must be first after common fields */ + uint64_t *chunk[CHUNKS]; + uint64_t total[CHUNKS]; + uint64_t population; + uint64_t maximum; + size_t size; + uint64_t buckets[]; +}; + +/**********************************************************************/ + +#define OUTARG(ptr, val) \ + ({ \ + if ((ptr) != NULL) { \ + *(ptr) = (val); \ + } \ + }) + +static inline uint64_t +interpolate(uint64_t span, uint64_t mul, uint64_t div) { + double frac = div > 0 ? (double)mul / (double)div : mul > 0 ? 1 : 0; + return ((uint64_t)round(span * frac)); +} + +/**********************************************************************/ + +void +isc_histo_create(isc_mem_t *mctx, uint sigbits, isc_histo_t **hgp) { + REQUIRE(sigbits >= ISC_HISTO_MINBITS); + REQUIRE(sigbits <= ISC_HISTO_MAXBITS); + REQUIRE(hgp != NULL); + REQUIRE(*hgp == NULL); + + isc_histo_t *hg = isc_mem_get(mctx, sizeof(*hg)); + *hg = (isc_histo_t){ + .magic = HISTO_MAGIC, + .sigbits = sigbits, + }; + isc_mem_attach(mctx, &hg->mctx); + + *hgp = hg; +} + +void +isc_histo_destroy(isc_histo_t **hgp) { + REQUIRE(hgp != NULL); + REQUIRE(HISTO_VALID(*hgp)); + + isc_histo_t *hg = *hgp; + *hgp = NULL; + + for (uint c = 0; c < CHUNKS; c++) { + if (hg->chunk[c] != NULL) { + isc_mem_put(hg->mctx, hg->chunk[c], CHUNKBYTES(hg)); + } + } + isc_mem_putanddetach(&hg->mctx, hg, sizeof(*hg)); +} + +/**********************************************************************/ + +uint +isc_histo_sigbits(isc_historead_t hr) { + REQUIRE(HISTO_VALID(hr.hg)); + return (hr.hg->sigbits); +} + +/* + * use precomputed logs and builtins to avoid linking with libm + */ + +uint +isc_histo_bits_to_digits(uint bits) { + REQUIRE(bits >= ISC_HISTO_MINBITS); + REQUIRE(bits <= ISC_HISTO_MAXBITS); + return (floor(1.0 - (1.0 - bits) * LN_2 / LN_10)); +} + +uint +isc_histo_digits_to_bits(uint digits) { + REQUIRE(digits >= ISC_HISTO_MINDIGITS); + REQUIRE(digits <= ISC_HISTO_MAXDIGITS); + return (ceil(1.0 - (1.0 - digits) * LN_10 / LN_2)); +} + +/**********************************************************************/ + +/* + * The way we map buckets to keys is what gives the histogram a + * consistent relative error across the whole range of `uint64_t`. + * The mapping is log-linear: a chunk key is the logarithm of part + * of the value (in other words, chunks are spaced exponentially); + * and a bucket within a chunk is a linear function of another part + * of the value. + * + * This log-linear spacing is similar to the size classes used by + * jemalloc. It is also the way floating point numbers work: the + * exponent is the log part, and the mantissa is the linear part. + * + * So, a chunk number is the log (base 2) of a `uint64_t`, which is + * between 0 and 63, which is why there are up to 64 chunks. In + * floating point terms the chunk number is the exponent. The + * histogram's number of significant bits is the size of the + * mantissa, which indexes buckets within each chunk. + * + * A fast way to get the logarithm of a positive integer is CLZ, + * count leading zeroes. + * + * Chunk zero is special. Chunk 1 covers values between `CHUNKSIZE` + * and `CHUNKSIZE * 2 - 1`, where `CHUNKSIZE == exponent << sigbits + * == 1 << sigbits`. Each chunk has CHUNKSIZE buckets, so chunk 1 has + * one value per bucket. There are CHUNKSIZE values before chunk 1 + * which map to chunk 0, so it also has one value per bucket. (Hence + * the first two chunks have one value per bucket.) The values in + * chunk 0 correspond to denormal nubers in floating point terms. + * They are also the values where `63 - sigbits - clz` would be less + * than one if denormals were not handled specially. + * + * This branchless conversion is due to Paul Khuong: see bin_down_of() in + * https://pvk.ca/Blog/2015/06/27/linear-log-bucketing-fast-versatile-simple/ + */ +static inline uint +value_to_key(isc_historead_t hr, uint64_t value) { + /* fast path */ + const isc_histo_t *hg = hr.hg; + /* ensure that denormal numbers are all in chunk zero */ + uint64_t chunked = value | CHUNKSIZE(hg); + int clz = __builtin_clzll((unsigned long long)(chunked)); + /* actually 1 less than the exponent except for denormals */ + uint exponent = 63 - hg->sigbits - clz; + /* mantissa has leading bit set except for denormals */ + uint mantissa = value >> exponent; + /* leading bit of mantissa adds one to exponent */ + return ((exponent << hg->sigbits) + mantissa); +} + +/* + * Inverse functions of `value_to_key()`, to get the minimum and + * maximum values that map to a particular key. + * + * We must not cause undefined behaviour by hitting integer limits, + * which is a risk when we aim to cover the entire range of `uint64_t`. + * + * The maximum value in the last bucket is UINT64_MAX, which + * `key_to_maxval()` gets by deliberately subtracting `0 - 1`, + * undeflowing a `uint64_t`. That is OK when unsigned. + * + * We must take care not to shift too much in `key_to_minval()`. + * The largest key passed by `key_to_maxval()` is `BUCKETS(hg)`, so + * `exponent == EXPONENTS(hg) - 1 == 64 - sigbits` + * which is always less than 64, so the size of the shift is OK. + * + * The `mantissa` in this edge case is just `chunksize`, which when + * shifted becomes `1 << 64` which overflows `uint64_t` Again this is + * OK when unsigned, so the return value is zero. + */ + +static inline uint64_t +key_to_minval(isc_historead_t hr, uint key) { + uint chunksize = CHUNKSIZE(hr.hg); + uint exponent = (key / chunksize) - 1; + uint64_t mantissa = (key % chunksize) + chunksize; + return (key < chunksize ? key : mantissa << exponent); +} + +static inline uint64_t +key_to_maxval(isc_historead_t hr, uint key) { + return (key_to_minval(hr, key + 1) - 1); +} + +/**********************************************************************/ + +static hg_bucket_t * +key_to_new_bucket(isc_histo_t *hg, uint key) { + /* slow path */ + uint chunksize = CHUNKSIZE(hg); + uint chunk = key / chunksize; + uint bucket = key % chunksize; + size_t bytes = CHUNKBYTES(hg); + hg_bucket_t *old_cp = NULL; + hg_bucket_t *new_cp = isc_mem_getx(hg->mctx, bytes, ISC_MEM_ZERO); + hg_chunk_t *cpp = &hg->chunk[chunk]; + if (atomic_compare_exchange_strong_acq_rel(cpp, &old_cp, new_cp)) { + return (&new_cp[bucket]); + } else { + /* lost the race, so use the winner's chunk */ + isc_mem_put(hg->mctx, new_cp, bytes); + return (&old_cp[bucket]); + } +} + +static hg_bucket_t * +get_chunk(isc_historead_t hr, uint chunk) { + const hg_chunk_t *cpp = &hr.hg->chunk[chunk]; + return (atomic_load_acquire(cpp)); +} + +static inline hg_bucket_t * +key_to_bucket(isc_historead_t hr, uint key) { + /* fast path */ + uint chunksize = CHUNKSIZE(hr.hg); + uint chunk = key / chunksize; + uint bucket = key % chunksize; + hg_bucket_t *cp = get_chunk(hr, chunk); + return (cp == NULL ? NULL : &cp[bucket]); +} + +static inline uint64_t +get_key_count(isc_historead_t hr, uint key) { + hg_bucket_t *bp = key_to_bucket(hr, key); + return (bp == NULL ? 0 : atomic_load_relaxed(bp)); +} + +static inline void +add_key_count(isc_histo_t *hg, uint key, uint64_t inc) { + if (inc > 0) { + hg_bucket_t *bp = key_to_bucket(hg, key); + bp = bp != NULL ? bp : key_to_new_bucket(hg, key); + atomic_fetch_add_relaxed(bp, inc); + } +} + +/**********************************************************************/ + +void +isc_histo_add(isc_histo_t *hg, uint64_t value, uint64_t inc) { + REQUIRE(HISTO_VALID(hg)); + add_key_count(hg, value_to_key(hg, value), inc); +} + +void +isc_histo_inc(isc_histo_t *hg, uint64_t value) { + isc_histo_add(hg, value, 1); +} + +void +isc_histo_put(isc_histo_t *hg, uint64_t min, uint64_t max, uint64_t count) { + REQUIRE(HISTO_VALID(hg)); + + uint kmin = value_to_key(hg, min); + uint kmax = value_to_key(hg, max); + + for (uint key = kmin; key <= kmax; key++) { + uint64_t mid = ISC_MIN(max, key_to_maxval(hg, key)); + double in_bucket = mid - min + 1; + double remaining = max - min + 1; + uint64_t inc = ceil(count * in_bucket / remaining); + add_key_count(hg, key, inc); + count -= inc; + min = mid + 1; + } +} + +isc_result_t +isc_histo_get(isc_historead_t hr, uint key, uint64_t *minp, uint64_t *maxp, + uint64_t *countp) { + REQUIRE(HISTO_VALID(hr.hg)); + + if (key < BUCKETS(hr.hg)) { + OUTARG(minp, key_to_minval(hr, key)); + OUTARG(maxp, key_to_maxval(hr, key)); + OUTARG(countp, get_key_count(hr, key)); + return (ISC_R_SUCCESS); + } else { + return (ISC_R_RANGE); + } +} + +void +isc_histo_next(isc_historead_t hr, uint *keyp) { + const isc_histo_t *hg = hr.hg; + + REQUIRE(HISTO_VALID(hg)); + REQUIRE(keyp != NULL); + + uint chunksize = CHUNKSIZE(hg); + uint buckets = BUCKETS(hg); + uint key = *keyp; + + key++; + while (key < buckets && key % chunksize == 0 && + key_to_bucket(hr, key) == NULL) + { + key += chunksize; + } + *keyp = key; +} + +void +isc_histo_merge(isc_histo_t **targetp, isc_historead_t source) { + REQUIRE(HISTO_VALID(source.hg)); + REQUIRE(targetp != NULL); + + if (*targetp != NULL) { + REQUIRE(HISTO_VALID(*targetp)); + } else { + isc_histo_create(source.hg->mctx, source.hg->sigbits, targetp); + } + + uint64_t min, max, count; + for (uint key = 0; + isc_histo_get(source, key, &min, &max, &count) == ISC_R_SUCCESS; + isc_histo_next(source, &key)) + { + isc_histo_put(*targetp, min, max, count); + } +} + +/**********************************************************************/ + +/* + * https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf + * equation 4 (incremental mean) and equation 44 (incremental variance) + */ +void +isc_histo_moments(isc_historead_t hr, double *pm0, double *pm1, double *pm2) { + REQUIRE(HISTO_VALID(hr.hg)); + + double pop = 0.0; + double mean = 0.0; + double sigma = 0.0; + + uint64_t min, max, count; + for (uint key = 0; + isc_histo_get(hr, key, &min, &max, &count) == ISC_R_SUCCESS; + isc_histo_next(hr, &key)) + { + if (count == 0) { /* avoid division by zero */ + continue; + } + double value = min / 2.0 + max / 2.0; + double delta = value - mean; + pop += count; + mean += count * delta / pop; + sigma += count * delta * (value - mean); + } + + OUTARG(pm0, pop); + OUTARG(pm1, mean); + OUTARG(pm2, sqrt(sigma / pop)); +} + +/**********************************************************************/ + +void +isc_histosummary_create(isc_historead_t hr, isc_histosummary_t **hsp) { + const isc_histo_t *hg = hr.hg; + + REQUIRE(HISTO_VALID(hg)); + REQUIRE(hsp != NULL); + REQUIRE(*hsp == NULL); + + uint chunksize = CHUNKSIZE(hg); + hg_bucket_t *chunk[CHUNKS] = { NULL }; + + /* + * First, find out which chunks we will copy across and how much + * space they need. We take a copy of the chunk pointers because + * concurrent threads may add new chunks before we have finished. + */ + uint size = 0; + for (uint c = 0; c < CHUNKS; c++) { + chunk[c] = get_chunk(hg, c); + if (chunk[c] != NULL) { + size += chunksize; + } + } + + isc_histosummary_t *hs = + isc_mem_get(hg->mctx, STRUCT_FLEX_SIZE(hs, buckets, size)); + *hs = (isc_histosummary_t){ + .magic = HISTO_MAGIC, + .sigbits = hg->sigbits, + .size = size, + }; + isc_mem_attach(hg->mctx, &hs->mctx); + + /* + * Second, copy the contents of the buckets. The copied pointers + * are faster than get_key_count() because get_chunk()'s atomics + * would require re-fetching the chunk pointer for every bucket. + */ + uint maxkey = 0; + uint chunkbase = 0; + for (uint c = 0; c < CHUNKS; c++) { + if (chunk[c] == NULL) { + continue; + } + hs->chunk[c] = &hs->buckets[chunkbase]; + chunkbase += chunksize; + for (uint b = 0; b < chunksize; b++) { + uint64_t count = atomic_load_relaxed(&chunk[c][b]); + hs->chunk[c][b] = count; + hs->total[c] += count; + hs->population += count; + maxkey = (count == 0) ? maxkey : chunksize * c + b; + } + } + hs->maximum = key_to_maxval(hs, maxkey); + + *hsp = hs; +} + +void +isc_histosummary_destroy(isc_histosummary_t **hsp) { + REQUIRE(hsp != NULL); + REQUIRE(HISTO_VALID(*hsp)); + + isc_histosummary_t *hs = *hsp; + *hsp = NULL; + + isc_mem_putanddetach(&hs->mctx, hs, + STRUCT_FLEX_SIZE(hs, buckets, hs->size)); +} + +/**********************************************************************/ + +isc_result_t +isc_histo_value_at_rank(const isc_histosummary_t *hs, uint64_t rank, + uint64_t *valuep) { + REQUIRE(HISTO_VALID(hs)); + REQUIRE(valuep != NULL); + + uint maxchunk = MAXCHUNK(hs); + uint chunksize = CHUNKSIZE(hs); + uint64_t count = 0; + uint b, c; + + if (rank > hs->population) { + return (ISC_R_RANGE); + } + if (rank == hs->population) { + *valuep = hs->maximum; + return (ISC_R_SUCCESS); + } + + for (c = 0; c < maxchunk; c++) { + count = hs->total[c]; + if (rank < count) { + break; + } + rank -= count; + } + INSIST(c < maxchunk); + + for (b = 0; b < chunksize; b++) { + count = hs->chunk[c][b]; + if (rank < count) { + break; + } + rank -= count; + } + INSIST(b < chunksize); + + uint key = chunksize * c + b; + uint64_t min = key_to_minval(hs, key); + uint64_t max = key_to_maxval(hs, key); + *valuep = min + interpolate(max - min, rank, count); + + return (ISC_R_SUCCESS); +} + +void +isc_histo_rank_of_value(const isc_histosummary_t *hs, uint64_t value, + uint64_t *rankp) { + REQUIRE(HISTO_VALID(hs)); + REQUIRE(rankp != NULL); + + uint key = value_to_key(hs, value); + uint chunksize = CHUNKSIZE(hs); + uint kc = key / chunksize; + uint kb = key % chunksize; + uint64_t rank = 0; + + for (uint c = 0; c < kc; c++) { + rank += hs->total[c]; + } + for (uint b = 0; b < kb; b++) { + rank += hs->chunk[kc][b]; + } + + uint64_t count = hs->chunk[kc][kb]; + uint64_t min = key_to_minval(hs, key); + uint64_t max = key_to_maxval(hs, key); + + *rankp = rank + interpolate(count, value - min, max - min); +} + +isc_result_t +isc_histo_quantile(const isc_histosummary_t *hs, double p, uint64_t *valuep) { + if (p < 0.0 || p > 1.0) { + return (ISC_R_RANGE); + } + double rank = round(hs->population * p); + return (isc_histo_value_at_rank(hs, (uint64_t)rank, valuep)); +} + +void +isc_histo_cdf(const isc_histosummary_t *hs, uint64_t value, double *pp) { + uint64_t rank; + isc_histo_rank_of_value(hs, value, &rank); + *pp = (double)rank / (double)hs->population; +} + +/**********************************************************************/ diff --git a/lib/isc/include/isc/histo.h b/lib/isc/include/isc/histo.h new file mode 100644 index 0000000000..ad55dac889 --- /dev/null +++ b/lib/isc/include/isc/histo.h @@ -0,0 +1,408 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * SPDX-License-Identifier: MPL-2.0 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#pragma once + +#include + +#include + +/* + * An `isc_histo_t` is a thread-safe histogram of `uint64_t` values. + * It keeps a count of how many values land in each bucket. Use the + * `isc_histo_inc()`, `isc_histo_acc()`, and `isc_histo_put()` + * functions to add values to the histogram. + * + * Values are mapped to buckets by rounding them according to a + * configurable precision, expressed as a number of significant bits. + * The bits <-> digits functions convert betwen decimal significant + * digits (as in scientific notation) and binary significant bits. + * + * At the low end (near zero) there is one value per bucket, then two + * values, four, etc. + * + * You can use the `isc_histo_get()` function to export data from the + * histogram. The range of a bucket is returned as its minimum and + * maximum values, inclusive, i.e. a closed interval. Closed intervals + * are more inconvenient than half-open intervals, and half-open + * intervals are more common in C. We use closed intervals so we are + * able to express the maximum of the last bucket, UINT64_MAX, and + * because OpenMetrics histograms describe buckets as + * less-than-or-equal to a particular value. + * + * The size of a histogram depends on the range of values in the + * stream of samples, not the number of samples. Bucket counters are + * 64 bits each, and are allocated in chunks of `1 << sigbits` where + * `sigbits` is the histogram's configured precision. There are at + * most 64 chunks, one for each bit of a 64 bit value. Histograms with + * greater precision have larger chunks. + * + * The number of values that map to a bucket (1, 2, 4, 8, ...) is the + * same in each chunk. Chunks 0 and 1 have one value per bucket, (see + * `ISC_HISTO_UNITBUCKETS()` below), chunk 2 has 2 values per bucket, + * and they increase by a factor of 2 in each successive bucket. + * + * The update cost is roughly constant and very small (not much more + * than an atomic increment). It mostly depends on cache locality and + * thread contention. + * + * To get statistical properties of a histogram (population, mean, + * standard deviation, CDF, quantiles, ranks) you must first construct + * an `isc_histosummary_t`. A summary is a read-only snapshot of a + * histogram augmented with information for calculating statistics + * more efficiently. + * + * There is no overflow checking for bucket counters. It takes a few + * nanoseconds to add a sample to the histogram, so it would take at + * least a few CPU-centuries to cause an overflow. Aggregate + * statistics from a quarter of a million CPUs might overflow in a + * day. (Provided that in both examples the CPUs are doing nothing + * apart from repeatedly adding 1 to histogram buckets.) + */ + +typedef struct isc_histo isc_histo_t; +typedef struct isc_histosummary isc_histosummary_t; + +/* + * For functions that can take either type. + */ +typedef union isc_historead { + const isc_histo_t *hg; + const isc_histosummary_t *hs; +} isc_historead_t __attribute__((__transparent_union__)); + +#define ISC_HISTO_MINBITS 1 +#define ISC_HISTO_MAXBITS 18 +#define ISC_HISTO_MINDIGITS 1 +#define ISC_HISTO_MAXDIGITS 6 + +/* + * How many values map 1:1 to buckets for a given number of sigbits? + * These are the buckets at the low end, starting from zero. + */ +#define ISC_HISTO_UNITBUCKETS(sigbits) (2 << (sigbits)) + +void +isc_histo_create(isc_mem_t *mctx, uint sigbits, isc_histo_t **hgp); +/*%< + * Create a histogram. + * + * The relative error of values stored in the histogram is less than + * `pow(2.0, -sigbits)`. + * + * Requires: + *\li `sigbits >= ISC_HISTO_MINBITS` + *\li `sigbits <= ISC_HISTO_MAXBITS` + *\li `hgp != NULL` + *\li `*hgp == NULL` + * + * Ensures: + *\li `*hgp` is a pointer to a histogram. + */ + +void +isc_histo_destroy(isc_histo_t **hgp); +/*%< + * Destroy a histogram + * + * Requires: + *\li `hgp != NULL` + *\li `*hgp` is a pointer to a valid histogram + * + * Ensures: + *\li all memory allocated by the histogram has been released + *\li `*hgp` is NULL + */ + +uint +isc_histo_sigbits(isc_historead_t hr); +/*%< + * Get the histogram's `sigbits` setting + * + * Requires: + *\li `hg` is a pointer to a valid histogram + */ + +uint +isc_histo_bits_to_digits(uint bits); +/*%< + * Convert binary significant figures to decimal significant figures, + * rounding down, i.e. get the decimal precision you can expect from a + * given number of significant bits. + * + * Requires: + *\li `bits >= ISC_HISTO_MINBITS` + *\li `bits <= ISC_HISTO_MAXBITS` + */ + +uint +isc_histo_digits_to_bits(uint digits); +/*%< + * Convert decimal significant figures to binary significant figures, + * rounding up, i.e. get the number of significant bits required to + * achieve the given decimal precision. + * + * Requires: + *\li `digits >= ISC_HISTO_MINDIGS` + *\li `digits <= ISC_HISTO_MAXDIGS` + */ + +void +isc_histo_inc(isc_histo_t *hg, uint64_t value); +/*%< + * Add 1 to the value's bucket + * + * Requires: + *\li `hg` is a pointer to a valid histogram + */ + +void +isc_histo_add(isc_histo_t *hg, uint64_t value, uint64_t inc); +/*%< + * Add an arbitrary increment to the value's bucket + * + * Note: there is no counter overflow checking + * + * Requires: + *\li `hg` is a pointer to a valid histogram + */ + +void +isc_histo_put(isc_histo_t *hg, uint64_t min, uint64_t max, uint64_t count); +/* + * Import a collection of samples, where values between `min` and + * `max` inclusive occurred `count` times. This function is a + * counterpart to `isc_histo_get()`. + * + * Note: there is no counter overflow checking + * + * Requires: + *\li `min <= max` + *\li `hg` is a pointer to a valid histogram + */ + +isc_result_t +isc_histo_get(isc_historead_t hr, uint key, uint64_t *minp, uint64_t *maxp, + uint64_t *countp); +/*%< + * Export information about a bucket. + * + * This can be used as an iterator, by initializing `key` to zero + * and incrementing by one or using `isc_histo_next()` until + * `isc_histo_get()` returns ISC_R_RANGE. The number of iterations is + * less than `64 << sigbits`. (64 for the maximum number of chunks, + * multiplied by the size of each chunk.) + * + * It is also a counterpart to `isc_histo_put()`. + * + * If `minp` is non-NULL it is set to the minimum inclusive value + * that maps to this bucket. + * + * If `maxp` is non-NULL it is set to the maximum inclusive value + * that maps to this bucket. + * + * If `countp` is non-NULL it is set to the bucket's counter, + * which can be zero. + * + * Requires: + *\li `hr` is a pointer to a valid histogram or summary + * + * Returns: + *\li ISC_R_SUCCESS, if `key` is valid + *\li ISC_R_RANGE, otherwise + */ + +void +isc_histo_next(isc_historead_t hr, uint *keyp); +/*%< + * Skip to the next key, omitting chunks of unallocated buckets. + * + * This function does not skip buckets that have been allocated but + * are zero. A chunk contains `1 << sigbits` buckets, and buckets + * are created in bulk one chunk at a time. + * + * Example: + * + * uint64_t min, max, count; + * for (uint key = 0; + * isc_histo_get(hg, key, &min, &max, &count) == ISC_R_SUCCESS; + * isc_histo_next(hg, &key)) + * { + * // do something with the bucket + * } + * + * Requires: + *\li `hr` is a pointer to a valid histogram or summary + *\li `keyp != NULL` + */ + +void +isc_histo_merge(isc_histo_t **targetp, isc_historead_t source); +/*%< + * Increase the counts in `*ptarget` by the counts recorded in `source` + * + * If `*targetp == NULL` then `*ptarget` is set to point to a new + * histogram with the same `sigbits` as the `source`. + * + * This function uses `isc_histo_get()` and `isc_histo_next()` to + * export the data from `source`, and `isc_histo_put()` to import it + * into `*ptarget`. + * + * Requires: + *\li `targetp != NULL` + *\li `*targetp` is NULL or a pointer to a valid histogram + *\li `source` is a pointer to a valid histogram or summary + * + * Ensures: + *\li `*targetp` is a pointer to a valid histogram + */ + +/**********************************************************************/ + +void +isc_histosummary_create(const isc_histo_t *hg, isc_histosummary_t **hsp); +/*%< + * Summarize a histogram for rank and quantile calculations. + * + * Requires: + *\li `hg` is a pointer to a valid histogram + *\li `hsp != NULL` + *\li `*hsp == NULL` + * + * Ensures: + *\li `*hsp` is a pointer to a histogram summary + */ + +void +isc_histosummary_destroy(isc_histosummary_t **hsp); +/*%< + * Destroy a histogram summary + * + * Requires: + *\li `hsp != NULL` + *\li `*hsp` is a pointer to a valid histogram summary + * + * Ensures: + *\li all memory allocated by the summary has been released + *\li `*hsp == NULL` + */ + +void +isc_histo_moments(isc_historead_t hr, double *pm0, double *pm1, double *pm2); +/*%< + * Get the population, mean, and standard deviation of a histogram + * + * If `pm0` is non-NULL it is set to the population of the histogram. + * (Strictly speaking, the zeroth moment is `pop / pop == 1`.) + * + * If `pm1` is non-NULL it is set to the mean (first moment) of the + * recorded data. + * + * If `pm2` is non-NULL it is set to the standard deviation of the + * recorded data. The standard deviation is the square root of the + * variance, which is the second moment about the mean. + * + * Requires: + *\li `hr` is a pointer to a valid histogram or summary + */ + +isc_result_t +isc_histo_value_at_rank(const isc_histosummary_t *hs, uint64_t rank, + uint64_t *valuep); +/*%< + * Get the approximate value at a given rank in the recorded data. + * + * The value at rank 0 is the minimum of the bucket containing the + * smallest value added to the histogram. + * + * The value at rank equal to the population is the maximum of the + * bucket containing the largest value added to the histogram. + * + * Greater ranks return a range error. + * + * Note: this function is slow for high-precision histograms + * (more than 3 significant digits). + * + * Requires: + *\li `hs` is a pointer to a valid histogram summary + *\li `valuep != NULL` + * + * Returns: + *\li ISC_R_SUCCESS, if `rank` is within bounds + *\li ISC_R_RANGE, otherwise + */ + +void +isc_histo_rank_of_value(const isc_histosummary_t *hs, uint64_t value, + uint64_t *rankp); +/*%< + * Get the approximate rank of a value in the recorded data. + * + * You can query the rank of any value. + * + * Note: this function is slow for high-precision histograms + * (more than 3 significant digits). + * + * Requires: + *\li `hs` is a pointer to a valid histogram summary + *\li `rankp != NULL` + */ + +isc_result_t +isc_histo_quantile(const isc_histosummary_t *hs, double proportion, + uint64_t *valuep); +/*%< + * The quantile function (aka inverse cumulative distribution function) + * of the histogram. What value is greater than the given proportion of + * the population? + * + * A proportion of 0.5 gets the median value: it is greater than half + * the population. 0.75 gets the third quartile value, and 0.99 gets + * the 99th percentile value. The proportion should be between 0.0 and + * 1.0 inclusive. + * + * https://enwp.org/Quantile_function + * + * Note: this function is slow for high-precision histograms + * (more than 3 significant digits). + * + * Requires: + *\li `hs` is a pointer to a valid histogram summary + *\li `valuep != NULL` + * + * Returns: + *\li ISC_R_SUCCESS, if the proportion is in bounds + *\li ISC_R_RANGE, otherwise + */ + +void +isc_histo_cdf(const isc_histosummary_t *hs, uint64_t value, + double *proportionp); +/*%< + * The cumulative distribution function of the histogram. Given a + * value, what proportion of the population have smaller values? + * You can query any value. + * + * If the value is the median, the proportion is 0.5. The proportion + * of the third quartile value is 0.75, and the proportion of the 99th + * percentile value is 0.99. + * + * https://enwp.org/Cumulative_distribution_function + * + * Note: this function is slow for high-precision histograms + * (more than 3 significant digits). + * + * Requires: + *\li `hs` is a pointer to a valid histogram summary + *\li `proportionp != NULL` + */ diff --git a/tests/isc/Makefile.am b/tests/isc/Makefile.am index c68c1b7da9..4d72be6bec 100644 --- a/tests/isc/Makefile.am +++ b/tests/isc/Makefile.am @@ -22,6 +22,7 @@ check_PROGRAMS = \ hash_test \ hashmap_test \ heap_test \ + histo_test \ hmac_test \ ht_test \ job_test \ diff --git a/tests/isc/histo_test.c b/tests/isc/histo_test.c new file mode 100644 index 0000000000..2355ab1f32 --- /dev/null +++ b/tests/isc/histo_test.c @@ -0,0 +1,340 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * SPDX-License-Identifier: MPL-2.0 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +/* ! \file */ + +#include +#include /* IWYU pragma: keep */ +#include +#include +#include +#include +#include + +#define UNIT_TESTING +#include + +#include +#include +#include + +#include + +#define TIME_LIMIT (123 * NS_PER_MS) + +#if VERBOSE + +#define TRACE(fmt, ...) \ + fprintf(stderr, "%s:%u:%s(): " fmt "\n", __FILE__, __LINE__, __func__, \ + __VA_ARGS__) + +#define TRACETIME(fmt, ...) \ + TRACE("%u bits %.1f ms " fmt, bits, millis_since(start), ##__VA_ARGS__) + +static double +millis_since(isc_nanosecs_t start) { + isc_nanosecs_t end = isc_time_monotonic(); + return ((double)(end - start) / NS_PER_MS); +} + +#else +#define TRACE(...) +#define TRACETIME(...) UNUSED(start) +#endif + +/* + * Note: in many of these tests when adding data to a histogram, + * we need to iterate using `key++` instead of `isc_histo_next()` + * because the latter skips chunks that we want to fill but have + * not yet done so. + */ + +ISC_RUN_TEST_IMPL(basics) { + isc_result_t result; + for (uint bits = ISC_HISTO_MINBITS; bits <= ISC_HISTO_MAXBITS; bits++) { + isc_nanosecs_t start = isc_time_monotonic(); + + isc_histo_t *hg = NULL; + isc_histo_create(mctx, bits, &hg); + + isc_histo_inc(hg, 0); + + uint64_t min, max, count; + + uint64_t prev_max = 0; + uint key = 0; + result = isc_histo_get(hg, key, &min, &max, &count); + while (result == ISC_R_SUCCESS) { + /* previous iteration already bumped this bucket */ + assert_int_equal(count, 1); + + /* min maps to this bucket */ + isc_histo_inc(hg, min); + result = isc_histo_get(hg, key, &min, &max, &count); + assert_int_equal(result, ISC_R_SUCCESS); + assert_int_equal(count, 2); + + /* max maps to this bucket */ + isc_histo_add(hg, max, 2); + result = isc_histo_get(hg, key, &min, &max, &count); + assert_int_equal(result, ISC_R_SUCCESS); + assert_int_equal(count, 4); + + /* put range covers this bucket */ + isc_histo_put(hg, min, max, 4); + result = isc_histo_get(hg, key, &min, &max, &count); + assert_int_equal(result, ISC_R_SUCCESS); + assert_int_equal(count, 8); + + if (max < UINT64_MAX) { + /* max + 1 maps to next bucket */ + isc_histo_inc(hg, max + 1); + result = isc_histo_get(hg, key, &min, &max, + &count); + assert_int_equal(result, ISC_R_SUCCESS); + /* this bucket was not bumped */ + assert_int_equal(count, 8); + } + + if (key == 0) { + assert_int_equal(min, 0); + assert_int_equal(max, 0); + } else { + /* no gap between buckets */ + assert_int_equal(min, prev_max + 1); + } + + prev_max = max; + key++; + result = isc_histo_get(hg, key, &min, &max, &count); + } + + /* last bucket goes up to last possible value */ + assert_int_equal(max, UINT64_MAX); + + double pop; + isc_histo_moments(hg, &pop, NULL, NULL); + assert_int_equal((uint64_t)pop, key * 8); + + isc_histo_destroy(&hg); + + TRACETIME("%u keys", key); + } +} + +/* + * ensure relative error is as expected + */ +ISC_RUN_TEST_IMPL(sigfigs) { + assert_int_equal(ISC_HISTO_MINBITS, + isc_histo_digits_to_bits(ISC_HISTO_MINDIGITS)); + assert_int_equal(ISC_HISTO_MINDIGITS, + isc_histo_bits_to_digits(ISC_HISTO_MINBITS)); + assert_int_equal(ISC_HISTO_MAXBITS, + isc_histo_digits_to_bits(ISC_HISTO_MAXDIGITS)); + assert_int_equal(ISC_HISTO_MAXDIGITS, + isc_histo_bits_to_digits(ISC_HISTO_MAXBITS)); + + uint log10 = 1; + double exp10 = 1.0; /* sigdigs == 1 gives relative error of 1 */ + + for (uint bits = ISC_HISTO_MINBITS; bits <= ISC_HISTO_MAXBITS; bits++) { + isc_histo_t *hg = NULL; + isc_histo_create(mctx, bits, &hg); + + uint digits = isc_histo_bits_to_digits(bits); + assert_true(bits >= isc_histo_digits_to_bits(digits)); + + if (log10 < digits) { + log10 += 1; + exp10 *= 10.0; + assert_int_equal(log10, digits); + } + + TRACE("%u binary %f decimal", 1 << bits, exp10); + + /* binary precision is better than decimal precision */ + double nominal = 1.0 / (double)(1 << bits); + assert_true(nominal < 1.0 / exp10); + + /* start with key = 1 to avoid division by zero */ + uint64_t imin, imax; + for (uint key = 1; isc_histo_get(hg, key, &imin, &imax, NULL) == + ISC_R_SUCCESS; + key++) + { + double min = (double)imin; + double max = (double)imax; + double error = (max - min) / (max + min); + assert_true(error < nominal); + } + + isc_histo_destroy(&hg); + } +} + +ISC_RUN_TEST_IMPL(summary) { + for (uint bits = ISC_HISTO_MINBITS; bits <= ISC_HISTO_MAXBITS; bits++) { + isc_result_t result; + uint64_t min, max, count, value, rank, lorank, hirank; + double pop; + uint key; + + isc_nanosecs_t start = isc_time_monotonic(); + + isc_histo_t *hg = NULL; + isc_histo_create(mctx, bits, &hg); + + for (key = 0; isc_histo_get(hg, key, &min, &max, &count) == + ISC_R_SUCCESS; + key++) + { + /* inc twice so we can check bucket's midpoint */ + assert_int_equal(count, 0); + isc_histo_inc(hg, min); + isc_histo_inc(hg, max); + } + + isc_histosummary_t *hs = NULL; + isc_histosummary_create(hg, &hs); + + /* no incs were lost */ + isc_histo_moments(hg, &pop, NULL, NULL); + assert_float_equal(pop, 2 * key, 0.5); + + isc_histo_destroy(&hg); + + for (key = 0; isc_histo_get(hs, key, &min, &max, &count) == + ISC_R_SUCCESS; + isc_histo_next(hs, &key)) + { + uint64_t lomin = min == 0 ? min : min - 1; + uint64_t himin = min; + uint64_t lomid = floor(min / 2.0 + max / 2.0); + uint64_t himid = ceil(min / 2.0 + max / 2.0); + uint64_t lomax = max; + uint64_t himax = max == UINT64_MAX ? max : max + 1; + + assert_int_equal(count, 2); + + rank = key * 2; + + /* check fenceposts */ + result = isc_histo_value_at_rank(hs, rank, &value); + assert_int_equal(result, ISC_R_SUCCESS); + assert_in_range(value, lomin, himin); + result = isc_histo_value_at_rank(hs, rank + 1, &value); + assert_int_equal(result, ISC_R_SUCCESS); + assert_in_range(value, lomid, himid); + result = isc_histo_value_at_rank(hs, rank + 2, &value); + assert_int_equal(result, ISC_R_SUCCESS); + assert_in_range(value, lomax, himax); + + isc_histo_rank_of_value(hs, min, &rank); + assert_int_equal(rank, key * 2); + + /* only if the bucket covers enough distinct values */ + + if (min < lomid) { + rank = key * 2 + 1; + isc_histo_rank_of_value(hs, lomid, &lorank); + isc_histo_rank_of_value(hs, himid, &hirank); + assert_in_range(rank, lorank, hirank); + } + + if (himid < max) { + rank = key * 2 + 2; + isc_histo_rank_of_value(hs, lomax, &lorank); + isc_histo_rank_of_value(hs, himax, &hirank); + assert_in_range(rank, lorank, hirank); + } + + /* these tests can be slow */ + if (isc_time_monotonic() > start + TIME_LIMIT) { + break; + } + } + + isc_histosummary_destroy(&hs); + + TRACETIME(""); + } +} + +ISC_RUN_TEST_IMPL(subrange) { + for (uint bits = ISC_HISTO_MINBITS; bits <= ISC_HISTO_MAXBITS; bits++) { + isc_result_t result; + uint64_t min, max, count, value; + + isc_nanosecs_t start = isc_time_monotonic(); + + isc_histo_t *hg = NULL; + isc_histo_create(mctx, bits, &hg); + + uint buckets = 64; + for (uint key = 0, top = buckets - 1;; key++, top++) { + if (isc_histo_get(hg, key, &min, NULL, NULL) != + ISC_R_SUCCESS) + { + break; + } + if (isc_histo_get(hg, top, NULL, &max, NULL) != + ISC_R_SUCCESS) + { + break; + } + isc_histo_put(hg, min, max, buckets); + + isc_histosummary_t *hs = NULL; + isc_histosummary_create(hg, &hs); + + for (uint bucket = 0; bucket < buckets; bucket++) { + result = isc_histo_get(hg, key + bucket, &min, + &max, &count); + assert_int_equal(result, ISC_R_SUCCESS); + /* did isc_histo_put() spread evenly? */ + assert_int_equal(count, 1); + result = isc_histo_value_at_rank(hs, bucket, + &value); + assert_int_equal(result, ISC_R_SUCCESS); + assert_int_equal(value, min); + } + result = isc_histo_value_at_rank(hs, buckets, &value); + assert_int_equal(result, ISC_R_SUCCESS); + assert_int_equal(value, max); + + isc_histosummary_destroy(&hs); + isc_histo_destroy(&hg); + isc_histo_create(mctx, bits, &hg); + + /* these tests can be slow */ + if (isc_time_monotonic() > start + TIME_LIMIT) { + break; + } + } + isc_histo_destroy(&hg); + + TRACETIME(""); + } +} + +ISC_TEST_LIST_START + +ISC_TEST_ENTRY(basics) +ISC_TEST_ENTRY(sigfigs) +ISC_TEST_ENTRY(summary) +ISC_TEST_ENTRY(subrange) + +ISC_TEST_LIST_END + +ISC_TEST_MAIN