Merge branch 'fanf-histo' into 'main'

Add isc_histo histograms, and use it for message size statistics

Closes #3464

See merge request isc-projects/bind9!7696
This commit is contained in:
Tony Finch 2023-04-03 11:41:25 +00:00
commit c74f389100
16 changed files with 1708 additions and 1124 deletions

View file

@ -1,3 +1,6 @@
6139. [func] Add isc_histo_t general-purpose log-linear histograms,
and use them for message size statistics. [GL !7696]
6138. [doc] Fix the DF-flag documentation on the outgoing
UDP packets. [GL #3710]

File diff suppressed because it is too large Load diff

View file

@ -451,6 +451,11 @@ AC_COMPILE_IFELSE(
#
AX_GCC_FUNC_ATTRIBUTE([returns_nonnull])
#
# how to link math functions?
#
AC_SEARCH_LIBS([sqrt],[m])
#
# check if we have kqueue
#

View file

@ -17,6 +17,8 @@
#include <inttypes.h>
#include <isc/histo.h>
#include <dns/types.h>
/*%
@ -156,310 +158,49 @@ enum {
};
/*%
* Traffic size statistics counters. Used as isc_statscounter_t values.
* Traffic size statistics, according to RSSAC002 section 2.4
* https://www.icann.org/en/system/files/files/rssac-002-measurements-root-20nov14-en.pdf
*
* The RSSAC002 linear bucketing does not directly match the log-linear
* bucketing of an `isc_histo_t`, so we need to adjust some parameters
* to fit.
*
* To map a message size to an `isc_histo_t`, first divide by
* DNS_SIZEHISTO_QUANTUM so that `isc_histo_inc()` is presented with
* one value per RSSAC002 bucket.
*
* Configure the `isc_histo_t` with large enough `sigbits` that its
* one-value-per-bucket range (its `UNITBUCKETS`) covers the range
* required by RSSAC002.
*/
#define DNS_SIZEHISTO_QUANTUM 16
#define DNS_SIZEHISTO_MAXIN (288 / DNS_SIZEHISTO_QUANTUM)
#define DNS_SIZEHISTO_MAXOUT (4096 / DNS_SIZEHISTO_QUANTUM)
#define DNS_SIZEHISTO_SIGBITSIN 4
#define DNS_SIZEHISTO_SIGBITSOUT 7
#define DNS_SIZEHISTO_BUCKETIN(size) \
ISC_MIN(size / DNS_SIZEHISTO_QUANTUM, DNS_SIZEHISTO_MAXIN)
#define DNS_SIZEHISTO_BUCKETOUT(size) \
ISC_MIN(size / DNS_SIZEHISTO_QUANTUM, DNS_SIZEHISTO_MAXOUT)
STATIC_ASSERT(DNS_SIZEHISTO_MAXIN <=
ISC_HISTO_UNITBUCKETS(DNS_SIZEHISTO_SIGBITSIN),
"must be enough histogram buckets for RSSAC002");
STATIC_ASSERT(DNS_SIZEHISTO_MAXOUT <=
ISC_HISTO_UNITBUCKETS(DNS_SIZEHISTO_SIGBITSOUT),
"must be enough histogram buckets for RSSAC002");
/*
* For consistency with other stats counters
*/
enum {
dns_sizecounter_in_0 = 0,
dns_sizecounter_in_16 = 1,
dns_sizecounter_in_32 = 2,
dns_sizecounter_in_48 = 3,
dns_sizecounter_in_64 = 4,
dns_sizecounter_in_80 = 5,
dns_sizecounter_in_96 = 6,
dns_sizecounter_in_112 = 7,
dns_sizecounter_in_128 = 8,
dns_sizecounter_in_144 = 9,
dns_sizecounter_in_160 = 10,
dns_sizecounter_in_176 = 11,
dns_sizecounter_in_192 = 12,
dns_sizecounter_in_208 = 13,
dns_sizecounter_in_224 = 14,
dns_sizecounter_in_240 = 15,
dns_sizecounter_in_256 = 16,
dns_sizecounter_in_272 = 17,
dns_sizecounter_in_288 = 18,
dns_sizecounter_in_max = 19,
dns_sizecounter_in_max = DNS_SIZEHISTO_MAXIN + 1,
dns_sizecounter_out_max = DNS_SIZEHISTO_MAXOUT + 1,
};
enum {
dns_sizecounter_out_0 = 0,
dns_sizecounter_out_16 = 1,
dns_sizecounter_out_32 = 2,
dns_sizecounter_out_48 = 3,
dns_sizecounter_out_64 = 4,
dns_sizecounter_out_80 = 5,
dns_sizecounter_out_96 = 6,
dns_sizecounter_out_112 = 7,
dns_sizecounter_out_128 = 8,
dns_sizecounter_out_144 = 9,
dns_sizecounter_out_160 = 10,
dns_sizecounter_out_176 = 11,
dns_sizecounter_out_192 = 12,
dns_sizecounter_out_208 = 13,
dns_sizecounter_out_224 = 14,
dns_sizecounter_out_240 = 15,
dns_sizecounter_out_256 = 16,
dns_sizecounter_out_272 = 17,
dns_sizecounter_out_288 = 18,
dns_sizecounter_out_304 = 19,
dns_sizecounter_out_320 = 20,
dns_sizecounter_out_336 = 21,
dns_sizecounter_out_352 = 22,
dns_sizecounter_out_368 = 23,
dns_sizecounter_out_384 = 24,
dns_sizecounter_out_400 = 25,
dns_sizecounter_out_416 = 26,
dns_sizecounter_out_432 = 27,
dns_sizecounter_out_448 = 28,
dns_sizecounter_out_464 = 29,
dns_sizecounter_out_480 = 30,
dns_sizecounter_out_496 = 31,
dns_sizecounter_out_512 = 32,
dns_sizecounter_out_528 = 33,
dns_sizecounter_out_544 = 34,
dns_sizecounter_out_560 = 35,
dns_sizecounter_out_576 = 36,
dns_sizecounter_out_592 = 37,
dns_sizecounter_out_608 = 38,
dns_sizecounter_out_624 = 39,
dns_sizecounter_out_640 = 40,
dns_sizecounter_out_656 = 41,
dns_sizecounter_out_672 = 42,
dns_sizecounter_out_688 = 43,
dns_sizecounter_out_704 = 44,
dns_sizecounter_out_720 = 45,
dns_sizecounter_out_736 = 46,
dns_sizecounter_out_752 = 47,
dns_sizecounter_out_768 = 48,
dns_sizecounter_out_784 = 49,
dns_sizecounter_out_800 = 50,
dns_sizecounter_out_816 = 51,
dns_sizecounter_out_832 = 52,
dns_sizecounter_out_848 = 53,
dns_sizecounter_out_864 = 54,
dns_sizecounter_out_880 = 55,
dns_sizecounter_out_896 = 56,
dns_sizecounter_out_912 = 57,
dns_sizecounter_out_928 = 58,
dns_sizecounter_out_944 = 59,
dns_sizecounter_out_960 = 60,
dns_sizecounter_out_976 = 61,
dns_sizecounter_out_992 = 62,
dns_sizecounter_out_1008 = 63,
dns_sizecounter_out_1024 = 64,
dns_sizecounter_out_1040 = 65,
dns_sizecounter_out_1056 = 66,
dns_sizecounter_out_1072 = 67,
dns_sizecounter_out_1088 = 68,
dns_sizecounter_out_1104 = 69,
dns_sizecounter_out_1120 = 70,
dns_sizecounter_out_1136 = 71,
dns_sizecounter_out_1152 = 72,
dns_sizecounter_out_1168 = 73,
dns_sizecounter_out_1184 = 74,
dns_sizecounter_out_1200 = 75,
dns_sizecounter_out_1216 = 76,
dns_sizecounter_out_1232 = 77,
dns_sizecounter_out_1248 = 78,
dns_sizecounter_out_1264 = 79,
dns_sizecounter_out_1280 = 80,
dns_sizecounter_out_1296 = 81,
dns_sizecounter_out_1312 = 82,
dns_sizecounter_out_1328 = 83,
dns_sizecounter_out_1344 = 84,
dns_sizecounter_out_1360 = 85,
dns_sizecounter_out_1376 = 86,
dns_sizecounter_out_1392 = 87,
dns_sizecounter_out_1408 = 88,
dns_sizecounter_out_1424 = 89,
dns_sizecounter_out_1440 = 90,
dns_sizecounter_out_1456 = 91,
dns_sizecounter_out_1472 = 92,
dns_sizecounter_out_1488 = 93,
dns_sizecounter_out_1504 = 94,
dns_sizecounter_out_1520 = 95,
dns_sizecounter_out_1536 = 96,
dns_sizecounter_out_1552 = 97,
dns_sizecounter_out_1568 = 98,
dns_sizecounter_out_1584 = 99,
dns_sizecounter_out_1600 = 100,
dns_sizecounter_out_1616 = 101,
dns_sizecounter_out_1632 = 102,
dns_sizecounter_out_1648 = 103,
dns_sizecounter_out_1664 = 104,
dns_sizecounter_out_1680 = 105,
dns_sizecounter_out_1696 = 106,
dns_sizecounter_out_1712 = 107,
dns_sizecounter_out_1728 = 108,
dns_sizecounter_out_1744 = 109,
dns_sizecounter_out_1760 = 110,
dns_sizecounter_out_1776 = 111,
dns_sizecounter_out_1792 = 112,
dns_sizecounter_out_1808 = 113,
dns_sizecounter_out_1824 = 114,
dns_sizecounter_out_1840 = 115,
dns_sizecounter_out_1856 = 116,
dns_sizecounter_out_1872 = 117,
dns_sizecounter_out_1888 = 118,
dns_sizecounter_out_1904 = 119,
dns_sizecounter_out_1920 = 120,
dns_sizecounter_out_1936 = 121,
dns_sizecounter_out_1952 = 122,
dns_sizecounter_out_1968 = 123,
dns_sizecounter_out_1984 = 124,
dns_sizecounter_out_2000 = 125,
dns_sizecounter_out_2016 = 126,
dns_sizecounter_out_2032 = 127,
dns_sizecounter_out_2048 = 128,
dns_sizecounter_out_2064 = 129,
dns_sizecounter_out_2080 = 130,
dns_sizecounter_out_2096 = 131,
dns_sizecounter_out_2112 = 132,
dns_sizecounter_out_2128 = 133,
dns_sizecounter_out_2144 = 134,
dns_sizecounter_out_2160 = 135,
dns_sizecounter_out_2176 = 136,
dns_sizecounter_out_2192 = 137,
dns_sizecounter_out_2208 = 138,
dns_sizecounter_out_2224 = 139,
dns_sizecounter_out_2240 = 140,
dns_sizecounter_out_2256 = 141,
dns_sizecounter_out_2272 = 142,
dns_sizecounter_out_2288 = 143,
dns_sizecounter_out_2304 = 144,
dns_sizecounter_out_2320 = 145,
dns_sizecounter_out_2336 = 146,
dns_sizecounter_out_2352 = 147,
dns_sizecounter_out_2368 = 148,
dns_sizecounter_out_2384 = 149,
dns_sizecounter_out_2400 = 150,
dns_sizecounter_out_2416 = 151,
dns_sizecounter_out_2432 = 152,
dns_sizecounter_out_2448 = 153,
dns_sizecounter_out_2464 = 154,
dns_sizecounter_out_2480 = 155,
dns_sizecounter_out_2496 = 156,
dns_sizecounter_out_2512 = 157,
dns_sizecounter_out_2528 = 158,
dns_sizecounter_out_2544 = 159,
dns_sizecounter_out_2560 = 160,
dns_sizecounter_out_2576 = 161,
dns_sizecounter_out_2592 = 162,
dns_sizecounter_out_2608 = 163,
dns_sizecounter_out_2624 = 164,
dns_sizecounter_out_2640 = 165,
dns_sizecounter_out_2656 = 166,
dns_sizecounter_out_2672 = 167,
dns_sizecounter_out_2688 = 168,
dns_sizecounter_out_2704 = 169,
dns_sizecounter_out_2720 = 170,
dns_sizecounter_out_2736 = 171,
dns_sizecounter_out_2752 = 172,
dns_sizecounter_out_2768 = 173,
dns_sizecounter_out_2784 = 174,
dns_sizecounter_out_2800 = 175,
dns_sizecounter_out_2816 = 176,
dns_sizecounter_out_2832 = 177,
dns_sizecounter_out_2848 = 178,
dns_sizecounter_out_2864 = 179,
dns_sizecounter_out_2880 = 180,
dns_sizecounter_out_2896 = 181,
dns_sizecounter_out_2912 = 182,
dns_sizecounter_out_2928 = 183,
dns_sizecounter_out_2944 = 184,
dns_sizecounter_out_2960 = 185,
dns_sizecounter_out_2976 = 186,
dns_sizecounter_out_2992 = 187,
dns_sizecounter_out_3008 = 188,
dns_sizecounter_out_3024 = 189,
dns_sizecounter_out_3040 = 190,
dns_sizecounter_out_3056 = 191,
dns_sizecounter_out_3072 = 192,
dns_sizecounter_out_3088 = 193,
dns_sizecounter_out_3104 = 194,
dns_sizecounter_out_3120 = 195,
dns_sizecounter_out_3136 = 196,
dns_sizecounter_out_3152 = 197,
dns_sizecounter_out_3168 = 198,
dns_sizecounter_out_3184 = 199,
dns_sizecounter_out_3200 = 200,
dns_sizecounter_out_3216 = 201,
dns_sizecounter_out_3232 = 202,
dns_sizecounter_out_3248 = 203,
dns_sizecounter_out_3264 = 204,
dns_sizecounter_out_3280 = 205,
dns_sizecounter_out_3296 = 206,
dns_sizecounter_out_3312 = 207,
dns_sizecounter_out_3328 = 208,
dns_sizecounter_out_3344 = 209,
dns_sizecounter_out_3360 = 210,
dns_sizecounter_out_3376 = 211,
dns_sizecounter_out_3392 = 212,
dns_sizecounter_out_3408 = 213,
dns_sizecounter_out_3424 = 214,
dns_sizecounter_out_3440 = 215,
dns_sizecounter_out_3456 = 216,
dns_sizecounter_out_3472 = 217,
dns_sizecounter_out_3488 = 218,
dns_sizecounter_out_3504 = 219,
dns_sizecounter_out_3520 = 220,
dns_sizecounter_out_3536 = 221,
dns_sizecounter_out_3552 = 222,
dns_sizecounter_out_3568 = 223,
dns_sizecounter_out_3584 = 224,
dns_sizecounter_out_3600 = 225,
dns_sizecounter_out_3616 = 226,
dns_sizecounter_out_3632 = 227,
dns_sizecounter_out_3648 = 228,
dns_sizecounter_out_3664 = 229,
dns_sizecounter_out_3680 = 230,
dns_sizecounter_out_3696 = 231,
dns_sizecounter_out_3712 = 232,
dns_sizecounter_out_3728 = 233,
dns_sizecounter_out_3744 = 234,
dns_sizecounter_out_3760 = 235,
dns_sizecounter_out_3776 = 236,
dns_sizecounter_out_3792 = 237,
dns_sizecounter_out_3808 = 238,
dns_sizecounter_out_3824 = 239,
dns_sizecounter_out_3840 = 240,
dns_sizecounter_out_3856 = 241,
dns_sizecounter_out_3872 = 242,
dns_sizecounter_out_3888 = 243,
dns_sizecounter_out_3904 = 244,
dns_sizecounter_out_3920 = 245,
dns_sizecounter_out_3936 = 246,
dns_sizecounter_out_3952 = 247,
dns_sizecounter_out_3968 = 248,
dns_sizecounter_out_3984 = 249,
dns_sizecounter_out_4000 = 250,
dns_sizecounter_out_4016 = 251,
dns_sizecounter_out_4032 = 252,
dns_sizecounter_out_4048 = 253,
dns_sizecounter_out_4064 = 254,
dns_sizecounter_out_4080 = 255,
dns_sizecounter_out_4096 = 256,
dns_sizecounter_out_max = 257
};
#define DNS_STATS_NCOUNTERS 8
#if 0
/*%<
* Flag(s) for dns_xxxstats_dump(). DNS_STATSDUMP_VERBOSE is obsolete.
* ISC_STATSDUMP_VERBOSE should be used instead. These two values are
* intentionally defined to be the same value to ensure binary compatibility.
*/
#define DNS_STATSDUMP_VERBOSE 0x00000001 /*%< dump 0-value counters */
#endif /* if 0 */
/*%<
* (Obsoleted)
*/
extern const char *dns_statscounter_names[];
/*%
* Attributes for statistics counters of RRset and Rdatatype types.
*
@ -803,22 +544,4 @@ dns_rcodestats_dump(dns_stats_t *stats, dns_rcodestats_dumper_t dump_fn,
*\li 'stats' is a valid dns_stats_t created by dns_generalstats_create().
*/
isc_result_t
dns_stats_alloccounters(isc_mem_t *mctx, uint64_t **ctrp);
/*%<
* Allocate an array of query statistics counters from the memory
* context 'mctx'.
*
* This function is obsoleted. Use dns_xxxstats_create() instead.
*/
void
dns_stats_freecounters(isc_mem_t *mctx, uint64_t **ctrp);
/*%<
* Free an array of query statistics counters allocated from the memory
* context 'mctx'.
*
* This function is obsoleted. Use dns_stats_destroy() instead.
*/
ISC_LANG_ENDDECLS

View file

@ -618,31 +618,3 @@ dns_rcodestats_dump(dns_stats_t *stats, dns_rcodestats_dumper_t dump_fn,
arg.arg = arg0;
isc_stats_dump(stats->counters, rcode_dumpcb, &arg, options);
}
/***
*** Obsolete variables and functions follow:
***/
const char *dns_statscounter_names[DNS_STATS_NCOUNTERS] = {
"success", "referral", "nxrrset", "nxdomain",
"recursion", "failure", "duplicate", "dropped"
};
isc_result_t
dns_stats_alloccounters(isc_mem_t *mctx, uint64_t **ctrp) {
int i;
uint64_t *p = isc_mem_get(mctx, DNS_STATS_NCOUNTERS * sizeof(uint64_t));
if (p == NULL) {
return (ISC_R_NOMEMORY);
}
for (i = 0; i < DNS_STATS_NCOUNTERS; i++) {
p[i] = 0;
}
*ctrp = p;
return (ISC_R_SUCCESS);
}
void
dns_stats_freecounters(isc_mem_t *mctx, uint64_t **ctrp) {
isc_mem_put(mctx, *ctrp, DNS_STATS_NCOUNTERS * sizeof(uint64_t));
*ctrp = NULL;
}

View file

@ -36,6 +36,7 @@ libisc_la_HEADERS = \
include/isc/hashmap.h \
include/isc/heap.h \
include/isc/hex.h \
include/isc/histo.h \
include/isc/hmac.h \
include/isc/ht.h \
include/isc/httpd.h \
@ -136,6 +137,7 @@ libisc_la_SOURCES = \
hashmap.c \
heap.c \
hex.c \
histo.c \
hmac.c \
ht.c \
httpd.c \

588
lib/isc/histo.c Normal file
View file

@ -0,0 +1,588 @@
/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* SPDX-License-Identifier: MPL-2.0
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
#include <assert.h>
#include <errno.h>
#include <math.h>
#include <stdatomic.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <isc/atomic.h>
#include <isc/histo.h>
#include <isc/magic.h>
#include <isc/mem.h>
#include <isc/tid.h>
/*
* XXXFANF to be added to <isc/util.h> by a commmit in a qp-trie
* feature branch
*/
#define STRUCT_FLEX_SIZE(pointer, member, count) \
(sizeof(*(pointer)) + sizeof(*(pointer)->member) * (count))
/*
* XXXFANF this should probably be in <isc/util.h> too
*/
#define OUTARG(ptr, val) \
({ \
if ((ptr) != NULL) { \
*(ptr) = (val); \
} \
})
#define HISTO_MAGIC ISC_MAGIC('H', 's', 't', 'o')
#define HISTO_VALID(p) ISC_MAGIC_VALID(p, HISTO_MAGIC)
#define HISTOMULTI_MAGIC ISC_MAGIC('H', 'g', 'M', 't')
#define HISTOMULTI_VALID(p) ISC_MAGIC_VALID(p, HISTOMULTI_MAGIC)
/*
* Natural logarithms of 2 and 10 for converting precisions between
* binary and decimal significant figures
*/
#define LN_2 0.693147180559945309
#define LN_10 2.302585092994045684
/*
* The chunks array has a static size for simplicity, fixed as the
* number of bits in a value. That means we waste a little extra space
* that could be saved by omitting the exponents that are covered by
* `sigbits`. The following macros calculate (at run time) the exact
* number of buckets when we need to do accurate bounds checks.
*
* For a discussion of the floating point terminology, see the
* commmentary on `value_to_key()` below.
*
* We often use the variable names `c` for chunk and `b` for bucket.
*/
#define CHUNKS 64
#define DENORMALS(hg) ((hg)->sigbits - 1)
#define MANTISSAS(hg) (1 << (hg)->sigbits)
#define EXPONENTS(hg) (CHUNKS - DENORMALS(hg))
#define BUCKETS(hg) (EXPONENTS(hg) * MANTISSAS(hg))
#define MAXCHUNK(hg) EXPONENTS(hg)
#define CHUNKSIZE(hg) MANTISSAS(hg)
#define CHUNKBYTES(hg) (CHUNKSIZE(hg) * sizeof(hg_bucket_t))
typedef atomic_uint_fast64_t hg_bucket_t;
typedef atomic_ptr(hg_bucket_t) hg_chunk_t;
struct isc_histo {
uint magic;
uint sigbits;
isc_mem_t *mctx;
hg_chunk_t chunk[CHUNKS];
};
struct isc_histomulti {
uint magic;
uint size;
isc_histo_t *hg[];
};
/**********************************************************************/
void
isc_histo_create(isc_mem_t *mctx, uint sigbits, isc_histo_t **hgp) {
REQUIRE(sigbits >= ISC_HISTO_MINBITS);
REQUIRE(sigbits <= ISC_HISTO_MAXBITS);
REQUIRE(hgp != NULL);
REQUIRE(*hgp == NULL);
isc_histo_t *hg = isc_mem_get(mctx, sizeof(*hg));
*hg = (isc_histo_t){
.magic = HISTO_MAGIC,
.sigbits = sigbits,
};
isc_mem_attach(mctx, &hg->mctx);
*hgp = hg;
}
void
isc_histo_destroy(isc_histo_t **hgp) {
REQUIRE(hgp != NULL);
REQUIRE(HISTO_VALID(*hgp));
isc_histo_t *hg = *hgp;
*hgp = NULL;
for (uint c = 0; c < CHUNKS; c++) {
if (hg->chunk[c] != NULL) {
isc_mem_put(hg->mctx, hg->chunk[c], CHUNKBYTES(hg));
}
}
isc_mem_putanddetach(&hg->mctx, hg, sizeof(*hg));
}
/**********************************************************************/
uint
isc_histo_sigbits(isc_histo_t *hg) {
REQUIRE(HISTO_VALID(hg));
return (hg->sigbits);
}
/*
* use precomputed logs and builtins to avoid linking with libm
*/
uint
isc_histo_bits_to_digits(uint bits) {
REQUIRE(bits >= ISC_HISTO_MINBITS);
REQUIRE(bits <= ISC_HISTO_MAXBITS);
return (floor(1.0 - (1.0 - bits) * LN_2 / LN_10));
}
uint
isc_histo_digits_to_bits(uint digits) {
REQUIRE(digits >= ISC_HISTO_MINDIGITS);
REQUIRE(digits <= ISC_HISTO_MAXDIGITS);
return (ceil(1.0 - (1.0 - digits) * LN_10 / LN_2));
}
/**********************************************************************/
/*
* The way we map buckets to keys is what gives the histogram a
* consistent relative error across the whole range of `uint64_t`.
* The mapping is log-linear: a chunk key is the logarithm of part
* of the value (in other words, chunks are spaced exponentially);
* and a bucket within a chunk is a linear function of another part
* of the value.
*
* This log-linear spacing is similar to the size classes used by
* jemalloc. It is also the way floating point numbers work: the
* exponent is the log part, and the mantissa is the linear part.
*
* So, a chunk number is the log (base 2) of a `uint64_t`, which is
* between 0 and 63, which is why there are up to 64 chunks. In
* floating point terms the chunk number is the exponent. The
* histogram's number of significant bits is the size of the
* mantissa, which indexes buckets within each chunk.
*
* A fast way to get the logarithm of a positive integer is CLZ,
* count leading zeroes.
*
* Chunk zero is special. Chunk 1 covers values between `CHUNKSIZE`
* and `CHUNKSIZE * 2 - 1`, where `CHUNKSIZE == exponent << sigbits
* == 1 << sigbits`. Each chunk has CHUNKSIZE buckets, so chunk 1 has
* one value per bucket. There are CHUNKSIZE values before chunk 1
* which map to chunk 0, so it also has one value per bucket. (Hence
* the first two chunks have one value per bucket.) The values in
* chunk 0 correspond to denormal nubers in floating point terms.
* They are also the values where `63 - sigbits - clz` would be less
* than one if denormals were not handled specially.
*
* This branchless conversion is due to Paul Khuong: see bin_down_of() in
* https://pvk.ca/Blog/2015/06/27/linear-log-bucketing-fast-versatile-simple/
*
* This function is in the `isc_histo_inc()` fast path.
*/
static inline uint
value_to_key(const isc_histo_t *hg, uint64_t value) {
/* ensure that denormal numbers are all in chunk zero */
uint64_t chunked = value | CHUNKSIZE(hg);
int clz = __builtin_clzll((unsigned long long)(chunked));
/* actually 1 less than the exponent except for denormals */
uint exponent = 63 - hg->sigbits - clz;
/* mantissa has leading bit set except for denormals */
uint mantissa = value >> exponent;
/* leading bit of mantissa adds one to exponent */
return ((exponent << hg->sigbits) + mantissa);
}
/*
* Inverse functions of `value_to_key()`, to get the minimum and
* maximum values that map to a particular key.
*
* We must not cause undefined behaviour by hitting integer limits,
* which is a risk when we aim to cover the entire range of `uint64_t`.
*
* The maximum value in the last bucket is UINT64_MAX, which
* `key_to_maxval()` gets by deliberately subtracting `0 - 1`,
* undeflowing a `uint64_t`. That is OK when unsigned.
*
* We must take care not to shift too much in `key_to_minval()`.
* The largest key passed by `key_to_maxval()` is `BUCKETS(hg)`, so
* `exponent == EXPONENTS(hg) - 1 == 64 - sigbits`
* which is always less than 64, so the size of the shift is OK.
*
* The `mantissa` in this edge case is just `chunksize`, which when
* shifted becomes `1 << 64` which overflows `uint64_t` Again this is
* OK when unsigned, so the return value is zero.
*/
static inline uint64_t
key_to_minval(const isc_histo_t *hg, uint key) {
uint chunksize = CHUNKSIZE(hg);
uint exponent = (key / chunksize) - 1;
uint64_t mantissa = (key % chunksize) + chunksize;
return (key < chunksize ? key : mantissa << exponent);
}
static inline uint64_t
key_to_maxval(const isc_histo_t *hg, uint key) {
return (key_to_minval(hg, key + 1) - 1);
}
/**********************************************************************/
static hg_bucket_t *
key_to_new_bucket(isc_histo_t *hg, uint key) {
/* slow path */
uint chunksize = CHUNKSIZE(hg);
uint chunk = key / chunksize;
uint bucket = key % chunksize;
size_t bytes = CHUNKBYTES(hg);
hg_bucket_t *old_cp = NULL;
hg_bucket_t *new_cp = isc_mem_getx(hg->mctx, bytes, ISC_MEM_ZERO);
hg_chunk_t *cpp = &hg->chunk[chunk];
if (atomic_compare_exchange_strong_acq_rel(cpp, &old_cp, new_cp)) {
return (&new_cp[bucket]);
} else {
/* lost the race, so use the winner's chunk */
isc_mem_put(hg->mctx, new_cp, bytes);
return (&old_cp[bucket]);
}
}
static hg_bucket_t *
get_chunk(const isc_histo_t *hg, uint chunk) {
return (atomic_load_acquire(&hg->chunk[chunk]));
}
static inline hg_bucket_t *
key_to_bucket(const isc_histo_t *hg, uint key) {
/* fast path */
uint chunksize = CHUNKSIZE(hg);
uint chunk = key / chunksize;
uint bucket = key % chunksize;
hg_bucket_t *cp = get_chunk(hg, chunk);
return (cp == NULL ? NULL : &cp[bucket]);
}
static inline uint64_t
bucket_count(const hg_bucket_t *bp) {
return (bp == NULL ? 0 : atomic_load_relaxed(bp));
}
static inline uint64_t
get_key_count(const isc_histo_t *hg, uint key) {
return (bucket_count(key_to_bucket(hg, key)));
}
static inline void
add_key_count(isc_histo_t *hg, uint key, uint64_t inc) {
/* fast path */
if (inc > 0) {
hg_bucket_t *bp = key_to_bucket(hg, key);
bp = bp != NULL ? bp : key_to_new_bucket(hg, key);
atomic_fetch_add_relaxed(bp, inc);
}
}
/**********************************************************************/
void
isc_histo_add(isc_histo_t *hg, uint64_t value, uint64_t inc) {
REQUIRE(HISTO_VALID(hg));
add_key_count(hg, value_to_key(hg, value), inc);
}
void
isc_histo_inc(isc_histo_t *hg, uint64_t value) {
isc_histo_add(hg, value, 1);
}
void
isc_histo_put(isc_histo_t *hg, uint64_t min, uint64_t max, uint64_t count) {
REQUIRE(HISTO_VALID(hg));
uint kmin = value_to_key(hg, min);
uint kmax = value_to_key(hg, max);
for (uint key = kmin; key <= kmax; key++) {
uint64_t mid = ISC_MIN(max, key_to_maxval(hg, key));
double in_bucket = mid - min + 1;
double remaining = max - min + 1;
uint64_t inc = ceil(count * in_bucket / remaining);
add_key_count(hg, key, inc);
count -= inc;
min = mid + 1;
}
}
isc_result_t
isc_histo_get(const isc_histo_t *hg, uint key, uint64_t *minp, uint64_t *maxp,
uint64_t *countp) {
REQUIRE(HISTO_VALID(hg));
if (key < BUCKETS(hg)) {
OUTARG(minp, key_to_minval(hg, key));
OUTARG(maxp, key_to_maxval(hg, key));
OUTARG(countp, get_key_count(hg, key));
return (ISC_R_SUCCESS);
} else {
return (ISC_R_RANGE);
}
}
void
isc_histo_next(const isc_histo_t *hg, uint *keyp) {
REQUIRE(HISTO_VALID(hg));
REQUIRE(keyp != NULL);
uint chunksize = CHUNKSIZE(hg);
uint buckets = BUCKETS(hg);
uint key = *keyp;
key++;
while (key < buckets && key % chunksize == 0 &&
key_to_bucket(hg, key) == NULL)
{
key += chunksize;
}
*keyp = key;
}
void
isc_histo_merge(isc_histo_t **targetp, const isc_histo_t *source) {
REQUIRE(HISTO_VALID(source));
REQUIRE(targetp != NULL);
if (*targetp != NULL) {
REQUIRE(HISTO_VALID(*targetp));
} else {
isc_histo_create(source->mctx, source->sigbits, targetp);
}
uint64_t min, max, count;
for (uint key = 0;
isc_histo_get(source, key, &min, &max, &count) == ISC_R_SUCCESS;
isc_histo_next(source, &key))
{
isc_histo_put(*targetp, min, max, count);
}
}
/**********************************************************************/
void
isc_histomulti_create(isc_mem_t *mctx, uint sigbits, isc_histomulti_t **hmp) {
REQUIRE(hmp != NULL);
REQUIRE(*hmp == NULL);
uint size = isc_tid_count();
INSIST(size > 0);
isc_histomulti_t *hm = isc_mem_getx(
mctx, STRUCT_FLEX_SIZE(hm, hg, size), ISC_MEM_ZERO);
*hm = (isc_histomulti_t){
.magic = HISTOMULTI_MAGIC,
.size = size,
};
for (uint i = 0; i < hm->size; i++) {
isc_histo_create(mctx, sigbits, &hm->hg[i]);
}
*hmp = hm;
}
void
isc_histomulti_destroy(isc_histomulti_t **hmp) {
REQUIRE(hmp != NULL);
REQUIRE(HISTOMULTI_VALID(*hmp));
isc_histomulti_t *hm = *hmp;
isc_mem_t *mctx = hm->hg[0]->mctx;
*hmp = NULL;
for (uint i = 0; i < hm->size; i++) {
isc_histo_destroy(&hm->hg[i]);
}
isc_mem_put(mctx, hm, STRUCT_FLEX_SIZE(hm, hg, hm->size));
}
void
isc_histomulti_merge(isc_histo_t **hgp, const isc_histomulti_t *hm) {
REQUIRE(HISTOMULTI_VALID(hm));
for (uint i = 0; i < hm->size; i++) {
isc_histo_merge(hgp, hm->hg[i]);
}
}
void
isc_histomulti_add(isc_histomulti_t *hm, uint64_t value, uint64_t inc) {
REQUIRE(HISTOMULTI_VALID(hm));
isc_histo_t *hg = hm->hg[isc_tid()];
add_key_count(hg, value_to_key(hg, value), inc);
}
void
isc_histomulti_inc(isc_histomulti_t *hm, uint64_t value) {
isc_histomulti_add(hm, value, 1);
}
/**********************************************************************/
/*
* https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
* equation 4 (incremental mean) and equation 44 (incremental variance)
*/
void
isc_histo_moments(const isc_histo_t *hg, double *pm0, double *pm1,
double *pm2) {
REQUIRE(HISTO_VALID(hg));
uint64_t pop = 0;
double mean = 0.0;
double sigma = 0.0;
uint64_t min, max, count;
for (uint key = 0;
isc_histo_get(hg, key, &min, &max, &count) == ISC_R_SUCCESS;
isc_histo_next(hg, &key))
{
if (count == 0) { /* avoid division by zero */
continue;
}
double value = min / 2.0 + max / 2.0;
double delta = value - mean;
pop += count;
mean += count * delta / pop;
sigma += count * delta * (value - mean);
}
OUTARG(pm0, pop);
OUTARG(pm1, mean);
OUTARG(pm2, sqrt(sigma / pop));
}
/*
* Clamped linear interpolation
*
* `outrange` should be `((1 << n) - 1)` for some `n`; when `n` is larger
* than 53, `outrange` can get rounded up to a power of 2, so we clamp the
* result to keep within bounds (extra important when `max == UINT64_MAX`)
*/
static inline uint64_t
lerp(uint64_t min, uint64_t max, uint64_t lo, uint64_t in, uint64_t hi) {
double inrange = (double)(hi - lo);
double inpart = (double)(in - lo);
double outrange = (double)(max - min);
double outpart = round(outrange * inpart / inrange);
return (min + ISC_MIN((uint64_t)outpart, max - min));
}
/*
* There is non-zero space for the inner value, and it is inside the bounds
*/
static inline bool
inside(uint64_t lo, uint64_t in, uint64_t hi) {
return (lo < hi && lo <= in && in <= hi);
}
isc_result_t
isc_histo_quantiles(const isc_histo_t *hg, uint size, const double *fraction,
uint64_t *value) {
hg_bucket_t *chunk[CHUNKS];
uint64_t total[CHUNKS];
uint64_t rank[ISC_HISTO_MAXQUANTILES];
REQUIRE(HISTO_VALID(hg));
REQUIRE(0 < size && size <= ISC_HISTO_MAXQUANTILES);
REQUIRE(fraction != NULL);
REQUIRE(value != NULL);
const uint maxchunk = MAXCHUNK(hg);
const uint chunksize = CHUNKSIZE(hg);
/*
* Find out which chunks exist and what their totals are. We take a
* copy of the chunk pointers to reduce the need for atomic ops
* later on. Scan from low to high so that higher buckets are more
* likely to be in the CPU cache when we scan from high to low.
*/
uint64_t population = 0;
for (uint c = 0; c < maxchunk; c++) {
chunk[c] = get_chunk(hg, c);
total[c] = 0;
if (chunk[c] != NULL) {
for (uint b = chunksize; b-- > 0;) {
total[c] += bucket_count(&chunk[c][b]);
}
population += total[c];
}
}
/*
* Now we know the population, we can convert fractions to ranks.
* Also ensure they are within bounds and in decreasing order.
*/
for (uint i = 0; i < size; i++) {
REQUIRE(0.0 <= fraction[i] && fraction[i] <= 1.0);
REQUIRE(i == 0 || fraction[i - 1] > fraction[i]);
rank[i] = round(fraction[i] * population);
}
/*
* Scan chunks from high to low, keeping track of the bounds on
* each chunk's ranks. Each time we match `rank[i]`, move on to the
* next rank and continue the scan from the same place.
*/
uint i = 0;
uint64_t chunk_lo = population;
for (uint c = maxchunk; c-- > 0;) {
uint64_t chunk_hi = chunk_lo;
chunk_lo = chunk_hi - total[c];
/*
* Scan buckets backwards within this chunk, in a similar
* manner to the chunk scan. Skip all or part of the loop
* if the current rank is not in the chunk.
*/
uint64_t bucket_lo = chunk_hi;
for (uint b = chunksize;
b-- > 0 && inside(chunk_lo, rank[i], chunk_hi);)
{
uint64_t bucket_hi = bucket_lo;
bucket_lo = bucket_hi - bucket_count(&chunk[c][b]);
/*
* Convert all ranks that fall in this bucket.
*/
while (inside(bucket_lo, rank[i], bucket_hi)) {
uint key = chunksize * c + b;
value[i] = lerp(key_to_minval(hg, key),
key_to_maxval(hg, key),
bucket_lo, rank[i], bucket_hi);
if (++i == size) {
return (ISC_R_SUCCESS);
}
}
}
}
return (ISC_R_UNSET);
}
/**********************************************************************/

391
lib/isc/include/isc/histo.h Normal file
View file

@ -0,0 +1,391 @@
/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* SPDX-License-Identifier: MPL-2.0
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
#pragma once
#include <sys/types.h>
#include <isc/mem.h>
/*
* An `isc_histo_t` is a thread-safe histogram of `uint64_t` values.
* It keeps a count of how many values land in each bucket. Use the
* `isc_histo_inc()`, `isc_histo_acc()`, and `isc_histo_put()`
* functions to add values to the histogram.
*
* Values are mapped to buckets by rounding them according to a
* configurable precision, expressed as a number of significant bits.
* The bits <-> digits functions convert betwen decimal significant
* digits (as in scientific notation) and binary significant bits.
*
* You can use the `isc_histo_get()` function to export data from the
* histogram. The range of a bucket is returned as its minimum and
* maximum values, inclusive, i.e. a closed interval. We use closed
* intervals so we are able to express the maximum of the last bucket,
* UINT64_MAX, although half-open intervals are more common in C.
*
* You can calculate some basic statistics directly from a histogram.
* The `isc_histo_quantiles()` function can get a histogram's median,
* 99th percentile, etc. The `isc_histo_moments()` function gets a
* histogram's population, mean, and standard deviation.
*
* The size of a histogram depends on the range of values in the
* stream of samples, not the number of samples. Bucket counters are
* 64 bits each, and are allocated in chunks of `1 << sigbits` where
* `sigbits` is the histogram's configured precision. There are at
* most 64 chunks, one for each bit of a 64 bit value. Histograms with
* greater precision have larger chunks.
*
* At the low end (values near zero) there is one value per bucket,
* then two values, four, eight, etc. The number of values that map to
* a bucket is the same in each chunk. Chunks 0 and 1 have one value
* per bucket, (see `ISC_HISTO_UNITBUCKETS()` below), chunk 2 has 2
* values per bucket, chunk 3 has 4, etc.
*
* The update cost is roughly constant and very small (not much more
* than an atomic increment). It mostly depends on cache locality and
* thread contention.
*
* There is no overflow checking for the 64 bit bucket counters. It
* takes a few nanoseconds to add a sample to the histogram, so it
* would take at least a few CPU-centuries to cause an overflow.
* Aggregate statistics from a quarter of a million CPUs might
* overflow in a day. (Provided that in both examples the CPUs are
* doing nothing apart from repeatedly adding 1 to histogram buckets.)
*/
typedef struct isc_histo isc_histo_t;
typedef struct isc_histomulti isc_histomulti_t;
#define ISC_HISTO_MINBITS 1
#define ISC_HISTO_MAXBITS 18
#define ISC_HISTO_MINDIGITS 1
#define ISC_HISTO_MAXDIGITS 6
#define ISC_HISTO_MAXQUANTILES 101 /* enough for all the percentiles */
/*
* How many values map 1:1 to buckets for a given number of sigbits?
* These are the buckets at the low end, starting from zero.
*/
#define ISC_HISTO_UNITBUCKETS(sigbits) (2 << (sigbits))
void
isc_histo_create(isc_mem_t *mctx, uint sigbits, isc_histo_t **hgp);
/*%<
* Create a histogram.
*
* The relative error of values stored in the histogram is less than
* `pow(2.0, -sigbits)`.
*
* Requires:
*\li `sigbits >= ISC_HISTO_MINBITS`
*\li `sigbits <= ISC_HISTO_MAXBITS`
*\li `hgp != NULL`
*\li `*hgp == NULL`
*
* Ensures:
*\li `*hgp` is a pointer to a histogram.
*/
void
isc_histo_destroy(isc_histo_t **hgp);
/*%<
* Destroy a histogram
*
* Requires:
*\li `hgp != NULL`
*\li `*hgp` is a pointer to a valid histogram
*
* Ensures:
*\li all memory allocated by the histogram has been released
*\li `*hgp` is NULL
*/
uint
isc_histo_sigbits(isc_histo_t *hg);
/*%<
* Get the histogram's `sigbits` setting
*
* Requires:
*\li `hg` is a pointer to a valid histogram
*/
uint
isc_histo_bits_to_digits(uint bits);
/*%<
* Convert binary significant figures to decimal significant figures,
* rounding down, i.e. get the decimal precision you can expect from a
* given number of significant bits.
*
* Requires:
*\li `bits >= ISC_HISTO_MINBITS`
*\li `bits <= ISC_HISTO_MAXBITS`
*/
uint
isc_histo_digits_to_bits(uint digits);
/*%<
* Convert decimal significant figures to binary significant figures,
* rounding up, i.e. get the number of significant bits required to
* achieve the given decimal precision.
*
* Requires:
*\li `digits >= ISC_HISTO_MINDIGS`
*\li `digits <= ISC_HISTO_MAXDIGS`
*/
/**********************************************************************/
void
isc_histo_inc(isc_histo_t *hg, uint64_t value);
/*%<
* Add 1 to the value's bucket
*
* Requires:
*\li `hg` is a pointer to a valid histogram
*/
void
isc_histo_add(isc_histo_t *hg, uint64_t value, uint64_t inc);
/*%<
* Add an arbitrary increment to the value's bucket
*
* Note: there is no counter overflow checking
*
* Requires:
*\li `hg` is a pointer to a valid histogram
*/
void
isc_histo_put(isc_histo_t *hg, uint64_t min, uint64_t max, uint64_t count);
/*
* Import a collection of samples, where values between `min` and
* `max` inclusive occurred `count` times. This function is a
* counterpart to `isc_histo_get()`.
*
* Note: there is no counter overflow checking
*
* Requires:
*\li `min <= max`
*\li `hg` is a pointer to a valid histogram
*/
isc_result_t
isc_histo_get(const isc_histo_t *hg, uint key, uint64_t *minp, uint64_t *maxp,
uint64_t *countp);
/*%<
* Export information about a bucket.
*
* This can be used as an iterator, by initializing `key` to zero
* and incrementing by one or using `isc_histo_next()` until
* `isc_histo_get()` returns ISC_R_RANGE. The number of iterations is
* less than `64 << sigbits`. (64 for the maximum number of chunks,
* multiplied by the size of each chunk.)
*
* It is also a counterpart to `isc_histo_put()`.
*
* If `minp` is non-NULL it is set to the minimum inclusive value
* that maps to this bucket.
*
* If `maxp` is non-NULL it is set to the maximum inclusive value
* that maps to this bucket.
*
* If `countp` is non-NULL it is set to the bucket's counter,
* which can be zero.
*
* Requires:
*\li `hg` is a pointer to a valid histogram
*
* Returns:
*\li ISC_R_SUCCESS, if `key` is valid
*\li ISC_R_RANGE, otherwise
*/
void
isc_histo_next(const isc_histo_t *hg, uint *keyp);
/*%<
* Skip to the next key, omitting chunks of unallocated buckets.
*
* This function does not skip buckets that have been allocated but
* are zero. A chunk contains `1 << sigbits` buckets, and buckets
* are created in bulk one chunk at a time.
*
* Example:
*
* uint64_t min, max, count;
* for (uint key = 0;
* isc_histo_get(hg, key, &min, &max, &count) == ISC_R_SUCCESS;
* isc_histo_next(hg, &key))
* {
* // do something with the bucket
* }
*
* Requires:
*\li `hg` is a pointer to a valid histogram
*\li `keyp != NULL`
*/
void
isc_histo_merge(isc_histo_t **targetp, const isc_histo_t *source);
/*%<
* Increase the counts in `*ptarget` by the counts recorded in `source`
*
* If `*targetp == NULL` then `*ptarget` is set to point to a new
* histogram with the same `sigbits` as the `source`.
*
* This function uses `isc_histo_get()` and `isc_histo_next()` to
* export the data from `source`, and `isc_histo_put()` to import it
* into `*ptarget`.
*
* Requires:
*\li `targetp != NULL`
*\li `*targetp` is NULL or a pointer to a valid histogram
*\li `source` is a pointer to a valid histogram
*
* Ensures:
*\li `*targetp` is a pointer to a valid histogram
*/
/**********************************************************************/
void
isc_histomulti_create(isc_mem_t *mctx, uint sigbits, isc_histomulti_t **hmp);
/*%<
* Create a multithreaded sharded histogram.
*
* Although an `isc_histo_t` is thread-safe, it can suffer
* from cache contention under heavy load. To avoid this,
* an `isc_histomulti_t` contains a histogram per thread,
* so updates are local and low-contention.
*
* Requires:
*\li `sigbits >= ISC_HISTO_MINBITS`
*\li `sigbits <= ISC_HISTO_MAXBITS`
*\li `hmp != NULL`
*\li `*hmp == NULL`
*
* Ensures:
*\li `*hmp` is a pointer to a multithreaded sharded histogram.
*/
void
isc_histomulti_destroy(isc_histomulti_t **hmp);
/*%<
* Destroy a multithreaded sharded histogram
*
* Requires:
*\li `hmp != NULL`
*\li `*hmp` is a pointer to a valid multithreaded sharded histogram
*
* Ensures:
*\li all memory allocated by the histogram has been released
*\li `*hmp == NULL`
*/
void
isc_histomulti_merge(isc_histo_t **targetp, const isc_histomulti_t *source);
/*%<
* Increase the counts in `*targetp` by the counts recorded in `source`
*
* The target histogram is created if `*targetp` is NULL.
*
* Requires:
*\li `targetp != NULL`
*\li `*targetp` is NULL or a pointer to a valid histogram
*\li `source` is a pointer to a valid multithreaded sharded histogram
*
* Ensures:
*\li `*targetp` is a pointer to a valid histogram
*/
void
isc_histomulti_inc(isc_histomulti_t *hm, uint64_t value);
/*%<
* Add 1 to the value's bucket
*
* Requires:
*\li `hm` is a pointer to a valid histomulti
*/
void
isc_histomulti_add(isc_histomulti_t *hm, uint64_t value, uint64_t inc);
/*%<
* Add an arbitrary increment to the value's bucket
*
* Requires:
*\li `hm` is a pointer to a valid histomulti
*/
/**********************************************************************/
void
isc_histo_moments(const isc_histo_t *hg, double *pm0, double *pm1, double *pm2);
/*%<
* Get the population, mean, and standard deviation of a histogram.
*
* If `pm0` is non-NULL it is set to the population of the histogram.
* (Strictly speaking, the zeroth moment is `pop / pop == 1`.)
*
* If `pm1` is non-NULL it is set to the mean (first moment) of the
* recorded data.
*
* If `pm2` is non-NULL it is set to the standard deviation of the
* recorded data. The standard deviation is the square root of the
* variance, which is the second moment about the mean.
*
* It is safe if the histogram is concurrently modified.
*
* Requires:
*\li `hg` is a pointer to a valid histogram
*/
isc_result_t
isc_histo_quantiles(const isc_histo_t *hg, uint size, const double *fraction,
uint64_t *value);
/*%<
* The quantile function (aka inverse cumulative distribution function)
* of the histogram. What value is greater than the given fraction of
* the population?
*
* A fraction of 0.5 gets the median value: it is greater than half
* the population. 0.75 gets the third quartile value, and 0.99 gets
* the 99th percentile value. The fraction must be between 0.0 and 1.0
* inclusive.
*
* https://enwp.org/Quantile_function
*
* This implementation allows you to query quantile values for
* multiple fractions in one function call. Internally, it makes one
* linear scan over the histogram's buckets to find all the fractions.
* Buckets are scanned from high to low, so that querying large
* quantiles is more efficient. The `fraction` array must be sorted in
* decreasing order. The results are stored in the `value` array. Both
* arrays have `size` elements.
*
* The results may be nonsense if the histogram is concurrently
* modified. To get a stable copy you can call `isc_histo_merge()`.
*
* Requires:
*\li `hg` is a pointer to a valid histogram
*\li `0 < size && size <= ISC_HISTO_MAXQUANTILES`
*\li `fraction != NULL`
*\li `value != NULL`
*\li `0.0 <= fraction[i] && fraction[i] <= 1.0` for every element
*\li `fraction[i - 1] > fraction[i]` for every pair of elements
*
* Returns:
*\li ISC_R_SUCCESS, if results were stored in the `value` array
*\li ISC_R_UNSET, if the histogram is empty
*/
/**********************************************************************/

View file

@ -21,6 +21,12 @@ ISC_LANG_BEGINDECLS
#define ISC_TID_UNKNOWN UINT32_MAX
uint32_t
isc_tid_count(void);
/*%<
* Returns the number of threads.
*/
uint32_t
isc_tid(void);
/*%<
@ -32,4 +38,7 @@ isc_tid(void);
void
isc__tid_init(uint32_t tid);
void
isc__tid_initcount(uint32_t count);
ISC_LANG_ENDDECLS

View file

@ -346,6 +346,7 @@ isc_loopmgr_create(isc_mem_t *mctx, uint32_t nloops, isc_loopmgr_t **loopmgrp) {
REQUIRE(nloops > 0);
threadpool_initialize(nloops);
isc__tid_initcount(nloops);
loopmgr = isc_mem_get(mctx, sizeof(*loopmgr));
*loopmgr = (isc_loopmgr_t){

View file

@ -26,7 +26,13 @@
#define ISC_TID_UNKNOWN UINT32_MAX
static thread_local uint32_t isc__tid_v = ISC_TID_UNKNOWN;
static thread_local uint32_t tid_local = ISC_TID_UNKNOWN;
/*
* Zero is a better nonsense value in this case than ISC_TID_UNKNOWN;
* avoids things like trying to allocate 32GB of per-thread counters.
*/
static uint32_t tid_count = 0;
/**
* Protected
@ -34,9 +40,14 @@ static thread_local uint32_t isc__tid_v = ISC_TID_UNKNOWN;
void
isc__tid_init(uint32_t tid) {
REQUIRE(isc__tid_v == ISC_TID_UNKNOWN || isc__tid_v == tid);
REQUIRE(tid_local == ISC_TID_UNKNOWN || tid_local == tid);
tid_local = tid;
}
isc__tid_v = tid;
void
isc__tid_initcount(uint32_t count) {
REQUIRE(tid_count == 0 || tid_count == count);
tid_count = count;
}
/**
@ -45,5 +56,10 @@ isc__tid_init(uint32_t tid) {
uint32_t
isc_tid(void) {
return (isc__tid_v);
return (tid_local);
}
uint32_t
isc_tid_count(void) {
return (tid_count);
}

View file

@ -655,12 +655,12 @@ renderend:
switch (isc_sockaddr_pf(&client->peeraddr)) {
case AF_INET:
isc_stats_increment(client->manager->sctx->tcpoutstats4,
ISC_MIN((int)respsize / 16, 256));
isc_histomulti_inc(client->manager->sctx->tcpoutstats4,
DNS_SIZEHISTO_BUCKETOUT(respsize));
break;
case AF_INET6:
isc_stats_increment(client->manager->sctx->tcpoutstats6,
ISC_MIN((int)respsize / 16, 256));
isc_histomulti_inc(client->manager->sctx->tcpoutstats6,
DNS_SIZEHISTO_BUCKETOUT(respsize));
break;
default:
UNREACHABLE();
@ -684,12 +684,12 @@ renderend:
switch (isc_sockaddr_pf(&client->peeraddr)) {
case AF_INET:
isc_stats_increment(client->manager->sctx->udpoutstats4,
ISC_MIN((int)respsize / 16, 256));
isc_histomulti_inc(client->manager->sctx->udpoutstats4,
DNS_SIZEHISTO_BUCKETOUT(respsize));
break;
case AF_INET6:
isc_stats_increment(client->manager->sctx->udpoutstats6,
ISC_MIN((int)respsize / 16, 256));
isc_histomulti_inc(client->manager->sctx->udpoutstats6,
DNS_SIZEHISTO_BUCKETOUT(respsize));
break;
default:
UNREACHABLE();
@ -1861,12 +1861,12 @@ ns_client_request(isc_nmhandle_t *handle, isc_result_t eresult,
ns_statscounter_requesttcp);
switch (isc_sockaddr_pf(&client->peeraddr)) {
case AF_INET:
isc_stats_increment(client->manager->sctx->tcpinstats4,
ISC_MIN((int)reqsize / 16, 18));
isc_histomulti_inc(client->manager->sctx->tcpinstats4,
DNS_SIZEHISTO_BUCKETIN(reqsize));
break;
case AF_INET6:
isc_stats_increment(client->manager->sctx->tcpinstats6,
ISC_MIN((int)reqsize / 16, 18));
isc_histomulti_inc(client->manager->sctx->tcpinstats6,
DNS_SIZEHISTO_BUCKETIN(reqsize));
break;
default:
UNREACHABLE();
@ -1874,12 +1874,12 @@ ns_client_request(isc_nmhandle_t *handle, isc_result_t eresult,
} else {
switch (isc_sockaddr_pf(&client->peeraddr)) {
case AF_INET:
isc_stats_increment(client->manager->sctx->udpinstats4,
ISC_MIN((int)reqsize / 16, 18));
isc_histomulti_inc(client->manager->sctx->udpinstats4,
DNS_SIZEHISTO_BUCKETIN(reqsize));
break;
case AF_INET6:
isc_stats_increment(client->manager->sctx->udpinstats6,
ISC_MIN((int)reqsize / 16, 18));
isc_histomulti_inc(client->manager->sctx->udpinstats6,
DNS_SIZEHISTO_BUCKETIN(reqsize));
break;
default:
UNREACHABLE();

View file

@ -19,6 +19,7 @@
#include <stdbool.h>
#include <isc/fuzz.h>
#include <isc/histo.h>
#include <isc/log.h>
#include <isc/magic.h>
#include <isc/quota.h>
@ -113,15 +114,15 @@ struct ns_server {
dns_stats_t *opcodestats;
dns_stats_t *rcodestats;
isc_stats_t *udpinstats4;
isc_stats_t *udpoutstats4;
isc_stats_t *udpinstats6;
isc_stats_t *udpoutstats6;
isc_histomulti_t *udpinstats4;
isc_histomulti_t *udpoutstats4;
isc_histomulti_t *udpinstats6;
isc_histomulti_t *udpoutstats6;
isc_stats_t *tcpinstats4;
isc_stats_t *tcpoutstats4;
isc_stats_t *tcpinstats6;
isc_stats_t *tcpoutstats6;
isc_histomulti_t *tcpinstats4;
isc_histomulti_t *tcpoutstats4;
isc_histomulti_t *tcpinstats6;
isc_histomulti_t *tcpoutstats6;
};
struct ns_altsecret {

View file

@ -75,29 +75,29 @@ ns_server_create(isc_mem_t *mctx, ns_matchview_t matchingview,
CHECKFATAL(dns_rcodestats_create(mctx, &sctx->rcodestats));
CHECKFATAL(isc_stats_create(mctx, &sctx->udpinstats4,
dns_sizecounter_in_max));
isc_histomulti_create(mctx, DNS_SIZEHISTO_SIGBITSIN,
&sctx->udpinstats4);
CHECKFATAL(isc_stats_create(mctx, &sctx->udpoutstats4,
dns_sizecounter_out_max));
isc_histomulti_create(mctx, DNS_SIZEHISTO_SIGBITSOUT,
&sctx->udpoutstats4);
CHECKFATAL(isc_stats_create(mctx, &sctx->udpinstats6,
dns_sizecounter_in_max));
isc_histomulti_create(mctx, DNS_SIZEHISTO_SIGBITSIN,
&sctx->udpinstats6);
CHECKFATAL(isc_stats_create(mctx, &sctx->udpoutstats6,
dns_sizecounter_out_max));
isc_histomulti_create(mctx, DNS_SIZEHISTO_SIGBITSOUT,
&sctx->udpoutstats6);
CHECKFATAL(isc_stats_create(mctx, &sctx->tcpinstats4,
dns_sizecounter_in_max));
isc_histomulti_create(mctx, DNS_SIZEHISTO_SIGBITSIN,
&sctx->tcpinstats4);
CHECKFATAL(isc_stats_create(mctx, &sctx->tcpoutstats4,
dns_sizecounter_out_max));
isc_histomulti_create(mctx, DNS_SIZEHISTO_SIGBITSOUT,
&sctx->tcpoutstats4);
CHECKFATAL(isc_stats_create(mctx, &sctx->tcpinstats6,
dns_sizecounter_in_max));
isc_histomulti_create(mctx, DNS_SIZEHISTO_SIGBITSIN,
&sctx->tcpinstats6);
CHECKFATAL(isc_stats_create(mctx, &sctx->tcpoutstats6,
dns_sizecounter_out_max));
isc_histomulti_create(mctx, DNS_SIZEHISTO_SIGBITSOUT,
&sctx->tcpoutstats6);
ISC_LIST_INIT(sctx->altsecrets);
@ -178,29 +178,29 @@ ns_server_detach(ns_server_t **sctxp) {
}
if (sctx->udpinstats4 != NULL) {
isc_stats_detach(&sctx->udpinstats4);
isc_histomulti_destroy(&sctx->udpinstats4);
}
if (sctx->tcpinstats4 != NULL) {
isc_stats_detach(&sctx->tcpinstats4);
isc_histomulti_destroy(&sctx->tcpinstats4);
}
if (sctx->udpoutstats4 != NULL) {
isc_stats_detach(&sctx->udpoutstats4);
isc_histomulti_destroy(&sctx->udpoutstats4);
}
if (sctx->tcpoutstats4 != NULL) {
isc_stats_detach(&sctx->tcpoutstats4);
isc_histomulti_destroy(&sctx->tcpoutstats4);
}
if (sctx->udpinstats6 != NULL) {
isc_stats_detach(&sctx->udpinstats6);
isc_histomulti_destroy(&sctx->udpinstats6);
}
if (sctx->tcpinstats6 != NULL) {
isc_stats_detach(&sctx->tcpinstats6);
isc_histomulti_destroy(&sctx->tcpinstats6);
}
if (sctx->udpoutstats6 != NULL) {
isc_stats_detach(&sctx->udpoutstats6);
isc_histomulti_destroy(&sctx->udpoutstats6);
}
if (sctx->tcpoutstats6 != NULL) {
isc_stats_detach(&sctx->tcpoutstats6);
isc_histomulti_destroy(&sctx->tcpoutstats6);
}
sctx->magic = 0;

View file

@ -22,6 +22,7 @@ check_PROGRAMS = \
hash_test \
hashmap_test \
heap_test \
histo_test \
hmac_test \
ht_test \
job_test \

351
tests/isc/histo_test.c Normal file
View file

@ -0,0 +1,351 @@
/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* SPDX-License-Identifier: MPL-2.0
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
/* ! \file */
#include <math.h>
#include <sched.h> /* IWYU pragma: keep */
#include <setjmp.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#define UNIT_TESTING
#include <cmocka.h>
#include <isc/histo.h>
#include <isc/result.h>
#include <isc/time.h>
#include <tests/isc.h>
#define TIME_LIMIT (123 * NS_PER_MS)
#define SUBRANGE 69
#if VERBOSE
#define TRACE(fmt, ...) \
fprintf(stderr, "%s:%u:%s(): " fmt "\n", __FILE__, __LINE__, __func__, \
__VA_ARGS__)
#define TRACETIME(fmt, ...) \
TRACE("%u bits %.1f ms " fmt, bits, millis_since(start), ##__VA_ARGS__)
static double
millis_since(isc_nanosecs_t start) {
isc_nanosecs_t end = isc_time_monotonic();
return ((double)(end - start) / NS_PER_MS);
}
#else
#define TRACE(...)
#define TRACETIME(...) UNUSED(start)
#endif
/*
* Note: in many of these tests when adding data to a histogram,
* we need to iterate using `key++` instead of `isc_histo_next()`
* because the latter skips chunks that we want to fill but have
* not yet done so.
*/
ISC_RUN_TEST_IMPL(basics) {
isc_result_t result;
for (uint bits = ISC_HISTO_MINBITS; bits <= ISC_HISTO_MAXBITS; bits++) {
isc_nanosecs_t start = isc_time_monotonic();
isc_histo_t *hg = NULL;
isc_histo_create(mctx, bits, &hg);
isc_histo_inc(hg, 0);
uint64_t min, max, count;
uint64_t prev_max = 0;
uint key = 0;
result = isc_histo_get(hg, key, &min, &max, &count);
while (result == ISC_R_SUCCESS) {
/* previous iteration already bumped this bucket */
assert_int_equal(count, 1);
/* min maps to this bucket */
isc_histo_inc(hg, min);
result = isc_histo_get(hg, key, &min, &max, &count);
assert_int_equal(result, ISC_R_SUCCESS);
assert_int_equal(count, 2);
/* max maps to this bucket */
isc_histo_add(hg, max, 2);
result = isc_histo_get(hg, key, &min, &max, &count);
assert_int_equal(result, ISC_R_SUCCESS);
assert_int_equal(count, 4);
/* put range covers this bucket */
isc_histo_put(hg, min, max, 4);
result = isc_histo_get(hg, key, &min, &max, &count);
assert_int_equal(result, ISC_R_SUCCESS);
assert_int_equal(count, 8);
if (max < UINT64_MAX) {
/* max + 1 maps to next bucket */
isc_histo_inc(hg, max + 1);
result = isc_histo_get(hg, key, &min, &max,
&count);
assert_int_equal(result, ISC_R_SUCCESS);
/* this bucket was not bumped */
assert_int_equal(count, 8);
}
if (key == 0) {
assert_int_equal(min, 0);
assert_int_equal(max, 0);
} else {
/* no gap between buckets */
assert_int_equal(min, prev_max + 1);
}
prev_max = max;
key++;
result = isc_histo_get(hg, key, &min, &max, &count);
/* these tests can be slow */
if (isc_time_monotonic() > start + TIME_LIMIT) {
break;
}
}
/* if we did not stop early */
if (result != ISC_R_SUCCESS) {
/* last bucket goes up to last possible value */
assert_int_equal(max, UINT64_MAX);
double pop;
isc_histo_moments(hg, &pop, NULL, NULL);
assert_int_equal((uint64_t)pop, key * 8);
}
isc_histo_destroy(&hg);
TRACETIME("%u keys", key);
}
}
ISC_RUN_TEST_IMPL(quantiles) {
for (uint bits = ISC_HISTO_MINBITS; bits <= ISC_HISTO_MAXBITS; bits++) {
isc_result_t result;
uint64_t min, max, count;
double pop;
uint key;
isc_nanosecs_t start = isc_time_monotonic();
isc_histo_t *hg = NULL;
isc_histo_create(mctx, bits, &hg);
for (key = 0; isc_histo_get(hg, key, &min, &max, &count) ==
ISC_R_SUCCESS;
key++)
{
/* inc twice so we can check bucket's midpoint */
assert_int_equal(count, 0);
isc_histo_inc(hg, min);
isc_histo_inc(hg, max);
}
const uint buckets = key;
/* no incs were lost */
isc_histo_moments(hg, &pop, NULL, NULL);
assert_float_equal(pop, buckets * 2, 0.5);
/* two ranks per bucket */
const uint quantum = ISC_HISTO_MAXQUANTILES / 2 - 1;
uint64_t value[ISC_HISTO_MAXQUANTILES];
double frac[ISC_HISTO_MAXQUANTILES];
uint base = 0;
for (key = 0; key < buckets; key++) {
/* fill in the values one quantum at a time */
if (key == 0 || key % quantum == buckets % quantum) {
base = key;
for (uint k = 0; k < quantum; k++) {
double rank = (base + k) * 2;
uint i = (quantum - k) * 2;
frac[i - 1] = (rank + 1.0) / pop;
frac[i - 0] = rank / pop;
}
frac[0] = (base + quantum) * 2 / pop;
result = isc_histo_quantiles(
hg, quantum * 2 + 1, frac, value);
assert_int_equal(result, ISC_R_SUCCESS);
}
result = isc_histo_get(hg, key, &min, &max, &count);
assert_int_equal(result, ISC_R_SUCCESS);
assert_int_equal(count, 2);
uint64_t lomin = min == 0 ? min : min - 1;
uint64_t himin = min;
uint64_t lomid = floor(min / 2.0 + max / 2.0);
uint64_t himid = ceil(min / 2.0 + max / 2.0);
uint64_t lomax = max;
uint64_t himax = max == UINT64_MAX ? max : max + 1;
uint i = (quantum + base - key) * 2;
/* check fenceposts */
assert_in_range(value[i - 0], lomin, himin);
assert_in_range(value[i - 1], lomid, himid);
assert_in_range(value[i - 2], lomax, himax);
/* these tests can be slow */
if (isc_time_monotonic() > start + TIME_LIMIT) {
break;
}
}
isc_histo_destroy(&hg);
TRACETIME("");
}
}
/*
* ensure relative error is as expected
*/
ISC_RUN_TEST_IMPL(sigfigs) {
assert_int_equal(ISC_HISTO_MINBITS,
isc_histo_digits_to_bits(ISC_HISTO_MINDIGITS));
assert_int_equal(ISC_HISTO_MINDIGITS,
isc_histo_bits_to_digits(ISC_HISTO_MINBITS));
assert_int_equal(ISC_HISTO_MAXBITS,
isc_histo_digits_to_bits(ISC_HISTO_MAXDIGITS));
assert_int_equal(ISC_HISTO_MAXDIGITS,
isc_histo_bits_to_digits(ISC_HISTO_MAXBITS));
uint log10 = 1;
double exp10 = 1.0; /* sigdigs == 1 gives relative error of 1 */
for (uint bits = ISC_HISTO_MINBITS; bits <= ISC_HISTO_MAXBITS; bits++) {
isc_histo_t *hg = NULL;
isc_histo_create(mctx, bits, &hg);
uint digits = isc_histo_bits_to_digits(bits);
assert_true(bits >= isc_histo_digits_to_bits(digits));
if (log10 < digits) {
log10 += 1;
exp10 *= 10.0;
assert_int_equal(log10, digits);
}
TRACE("%u binary %f decimal", 1 << bits, exp10);
/* binary precision is better than decimal precision */
double nominal = 1.0 / (double)(1 << bits);
assert_true(nominal < 1.0 / exp10);
/* start with key = 1 to avoid division by zero */
uint64_t imin, imax;
for (uint key = 1; isc_histo_get(hg, key, &imin, &imax, NULL) ==
ISC_R_SUCCESS;
key++)
{
double min = (double)imin;
double max = (double)imax;
double error = (max - min) / (max + min);
assert_true(error < nominal);
}
isc_histo_destroy(&hg);
}
}
ISC_RUN_TEST_IMPL(subrange) {
for (uint bits = ISC_HISTO_MINBITS; bits <= ISC_HISTO_MAXBITS; bits++) {
isc_result_t result;
uint64_t min, max, count;
isc_nanosecs_t start = isc_time_monotonic();
isc_histo_t *hg = NULL;
isc_histo_create(mctx, bits, &hg);
uint64_t value[SUBRANGE + 1];
double frac[SUBRANGE + 1];
for (uint i = 0; i <= SUBRANGE; i++) {
frac[i] = (double)(SUBRANGE - i) / (double)(SUBRANGE);
}
result = isc_histo_quantiles(hg, ARRAY_SIZE(frac), frac, value);
assert_int_equal(result, ISC_R_UNSET);
for (uint key = 0, top = SUBRANGE - 1;; key++, top++) {
if (isc_histo_get(hg, key, &min, NULL, NULL) !=
ISC_R_SUCCESS)
{
break;
}
if (isc_histo_get(hg, top, NULL, &max, NULL) !=
ISC_R_SUCCESS)
{
break;
}
/*
* If we try adding more than one sample per bucket
* here, the test fails when buckets have different
* sizes because [min,max] spans multiple chunks.
*/
isc_histo_put(hg, min, max, SUBRANGE);
result = isc_histo_quantiles(hg, ARRAY_SIZE(frac), frac,
value);
assert_int_equal(result, ISC_R_SUCCESS);
for (uint bucket = 0; bucket < SUBRANGE; bucket++) {
result = isc_histo_get(hg, key + bucket, &min,
&max, &count);
assert_int_equal(result, ISC_R_SUCCESS);
/* did isc_histo_put() spread evenly? */
assert_int_equal(count, 1);
/* do the quantile values match? */
assert_int_equal(value[SUBRANGE - bucket], min);
}
assert_int_equal(value[0], max);
isc_histo_destroy(&hg);
isc_histo_create(mctx, bits, &hg);
/* these tests can be slow */
if (isc_time_monotonic() > start + TIME_LIMIT) {
break;
}
}
isc_histo_destroy(&hg);
TRACETIME("");
}
}
ISC_TEST_LIST_START
ISC_TEST_ENTRY(basics)
ISC_TEST_ENTRY(quantiles)
ISC_TEST_ENTRY(sigfigs)
ISC_TEST_ENTRY(subrange)
ISC_TEST_LIST_END
ISC_TEST_MAIN