mirror of
https://github.com/isc-projects/bind9.git
synced 2026-06-11 07:40:00 -04:00
Merge branch 'fanf-lemire-nearly-divisionless' into 'main'
Make isc_random_uniform() nearly divisionless See merge request isc-projects/bind9!6161
This commit is contained in:
commit
870f785e49
8 changed files with 133 additions and 75 deletions
4
CHANGES
4
CHANGES
|
|
@ -1,3 +1,7 @@
|
|||
5868. [cleanup] Use Daniel Lemire's "nearly divisionless" algorithm
|
||||
for unbiased bounded random numbers, and move
|
||||
re-seeding out of the hot path. [GL !6161]
|
||||
|
||||
5867. [bug] Fix assertion failure triggered by attaching to dns_adb
|
||||
in dns_adb_createfind() that has been triggered to shut
|
||||
down in different thread between the check for shutting
|
||||
|
|
|
|||
|
|
@ -170,6 +170,7 @@ libisc_la_SOURCES = \
|
|||
quota.c \
|
||||
radix.c \
|
||||
random.c \
|
||||
random_p.h \
|
||||
ratelimiter.c \
|
||||
regex.c \
|
||||
region.c \
|
||||
|
|
|
|||
|
|
@ -54,12 +54,18 @@ isc_random_buf(void *buf, size_t buflen);
|
|||
uint32_t
|
||||
isc_random_uniform(uint32_t upper_bound);
|
||||
/*!<
|
||||
* \brief Will return a single 32-bit value, uniformly distributed but
|
||||
* less than upper_bound. This is recommended over
|
||||
* constructions like ``isc_random() % upper_bound'' as it
|
||||
* avoids "modulo bias" when the upper bound is not a power of
|
||||
* two. In the worst case, this function may require multiple
|
||||
* iterations to ensure uniformity.
|
||||
* \brief Returns a single 32-bit uniformly distributed random value
|
||||
* less than upper_bound.
|
||||
*
|
||||
* This is better than ``isc_random() % upper_bound'' as it avoids
|
||||
* "modulo bias" when the upper bound is not a power of two. This
|
||||
* function is also faster, because it usually avoids doing any
|
||||
* divisions (which are typically very slow).
|
||||
*
|
||||
* It uses rejection sampling to ensure uniformity, so it may require
|
||||
* multiple iterations to get a result; the probability of needing to
|
||||
* resample is very small when the upper_bound is small, rising to 0.5
|
||||
* when upper_bound is UINT32_MAX/2.
|
||||
*/
|
||||
|
||||
ISC_LANG_ENDDECLS
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@
|
|||
#include "config.h"
|
||||
#include "mem_p.h"
|
||||
#include "os_p.h"
|
||||
#include "random_p.h"
|
||||
#include "tls_p.h"
|
||||
#include "trampoline_p.h"
|
||||
|
||||
|
|
@ -42,6 +43,7 @@ void
|
|||
isc__initialize(void) {
|
||||
isc__os_initialize();
|
||||
isc__mem_initialize();
|
||||
isc__random_initialize();
|
||||
isc__tls_initialize();
|
||||
isc__trampoline_initialize();
|
||||
(void)isc_os_ncpus();
|
||||
|
|
|
|||
142
lib/isc/random.c
142
lib/isc/random.c
|
|
@ -35,7 +35,6 @@
|
|||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <isc/once.h>
|
||||
#include <isc/random.h>
|
||||
#include <isc/result.h>
|
||||
#include <isc/thread.h>
|
||||
|
|
@ -43,22 +42,7 @@
|
|||
#include <isc/util.h>
|
||||
|
||||
#include "entropy_private.h"
|
||||
|
||||
/*
|
||||
* The specific implementation for PRNG is included as a C file
|
||||
* that has to provide a static variable named seed, and a function
|
||||
* uint32_t next(void) that provides next random number.
|
||||
*
|
||||
* The implementation must be thread-safe.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Two contestants have been considered: the xoroshiro family of the
|
||||
* functions by Villa&Blackman, and PCG by O'Neill. After
|
||||
* consideration, the xoshiro128starstar function has been chosen as
|
||||
* the uint32_t random number provider because it is very fast and has
|
||||
* good enough properties for our usage pattern.
|
||||
*/
|
||||
#include "random_p.h"
|
||||
|
||||
/*
|
||||
* Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org)
|
||||
|
|
@ -76,11 +60,9 @@
|
|||
* It has excellent (sub-ns) speed, a state size (128 bits) that is large
|
||||
* enough for mild parallelism, and it passes all tests we are aware of.
|
||||
*
|
||||
* For generating just single-precision (i.e., 32-bit) floating-point
|
||||
* numbers, xoshiro128+ is even faster.
|
||||
*
|
||||
* The state must be seeded so that it is not everywhere zero.
|
||||
*/
|
||||
|
||||
static thread_local uint32_t seed[4] = { 0 };
|
||||
|
||||
static uint32_t
|
||||
|
|
@ -106,11 +88,8 @@ next(void) {
|
|||
|
||||
return (result_starstar);
|
||||
}
|
||||
|
||||
static thread_local isc_once_t isc_random_once = ISC_ONCE_INIT;
|
||||
|
||||
static void
|
||||
isc_random_initialize(void) {
|
||||
void
|
||||
isc__random_initialize(void) {
|
||||
int useed[4] = { 0, 0, 0, 1 };
|
||||
#if FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
/*
|
||||
|
|
@ -126,22 +105,16 @@ isc_random_initialize(void) {
|
|||
|
||||
uint8_t
|
||||
isc_random8(void) {
|
||||
RUNTIME_CHECK(isc_once_do(&isc_random_once, isc_random_initialize) ==
|
||||
ISC_R_SUCCESS);
|
||||
return (next() & 0xff);
|
||||
return ((uint8_t)next());
|
||||
}
|
||||
|
||||
uint16_t
|
||||
isc_random16(void) {
|
||||
RUNTIME_CHECK(isc_once_do(&isc_random_once, isc_random_initialize) ==
|
||||
ISC_R_SUCCESS);
|
||||
return (next() & 0xffff);
|
||||
return ((uint16_t)next());
|
||||
}
|
||||
|
||||
uint32_t
|
||||
isc_random32(void) {
|
||||
RUNTIME_CHECK(isc_once_do(&isc_random_once, isc_random_initialize) ==
|
||||
ISC_R_SUCCESS);
|
||||
return (next());
|
||||
}
|
||||
|
||||
|
|
@ -153,9 +126,6 @@ isc_random_buf(void *buf, size_t buflen) {
|
|||
REQUIRE(buf != NULL);
|
||||
REQUIRE(buflen > 0);
|
||||
|
||||
RUNTIME_CHECK(isc_once_do(&isc_random_once, isc_random_initialize) ==
|
||||
ISC_R_SUCCESS);
|
||||
|
||||
for (i = 0; i + sizeof(r) <= buflen; i += sizeof(r)) {
|
||||
r = next();
|
||||
memmove((uint8_t *)buf + i, &r, sizeof(r));
|
||||
|
|
@ -166,41 +136,75 @@ isc_random_buf(void *buf, size_t buflen) {
|
|||
}
|
||||
|
||||
uint32_t
|
||||
isc_random_uniform(uint32_t upper_bound) {
|
||||
/* Copy of arc4random_uniform from OpenBSD */
|
||||
uint32_t r, min;
|
||||
|
||||
RUNTIME_CHECK(isc_once_do(&isc_random_once, isc_random_initialize) ==
|
||||
ISC_R_SUCCESS);
|
||||
|
||||
if (upper_bound < 2) {
|
||||
return (0);
|
||||
}
|
||||
|
||||
#if (ULONG_MAX > 0xffffffffUL)
|
||||
min = 0x100000000UL % upper_bound;
|
||||
#else /* if (ULONG_MAX > 0xffffffffUL) */
|
||||
/* Calculate (2**32 % upper_bound) avoiding 64-bit math */
|
||||
if (upper_bound > 0x80000000) {
|
||||
min = 1 + ~upper_bound; /* 2**32 - upper_bound */
|
||||
} else {
|
||||
/* (2**32 - (x * 2)) % x == 2**32 % x when x <= 2**31 */
|
||||
min = ((0xffffffff - (upper_bound * 2)) + 1) % upper_bound;
|
||||
}
|
||||
#endif /* if (ULONG_MAX > 0xffffffffUL) */
|
||||
|
||||
isc_random_uniform(uint32_t limit) {
|
||||
/*
|
||||
* This could theoretically loop forever but each retry has
|
||||
* p > 0.5 (worst case, usually far better) of selecting a
|
||||
* number inside the range we need, so it should rarely need
|
||||
* to re-roll.
|
||||
* Daniel Lemire's nearly-divisionless unbiased bounded random numbers.
|
||||
*
|
||||
* https://lemire.me/blog/?p=17551
|
||||
*
|
||||
* The raw random number generator `next()` returns a 32-bit value.
|
||||
* We do a 64-bit multiply `next() * limit` and treat the product as a
|
||||
* 32.32 fixed-point value less than the limit. Our result will be the
|
||||
* integer part (upper 32 bits), and we will use the fraction part
|
||||
* (lower 32 bits) to determine whether or not we need to resample.
|
||||
*/
|
||||
for (;;) {
|
||||
r = next();
|
||||
if (r >= min) {
|
||||
break;
|
||||
uint64_t num = (uint64_t)next() * (uint64_t)limit;
|
||||
/*
|
||||
* In the fast path, we avoid doing a division in most cases by
|
||||
* comparing the fraction part of `num` with the limit, which is
|
||||
* a slight over-estimate for the exact resample threshold.
|
||||
*/
|
||||
if ((uint32_t)(num) < limit) {
|
||||
/*
|
||||
* We are in the slow path where we re-do the approximate test
|
||||
* more accurately. The exact threshold for the resample loop
|
||||
* is the remainder after dividing the raw RNG limit `1 << 32`
|
||||
* by the caller's limit. We use a trick to calculate it
|
||||
* within 32 bits:
|
||||
*
|
||||
* (1 << 32) % limit
|
||||
* == ((1 << 32) - limit) % limit
|
||||
* == (uint32_t)(-limit) % limit
|
||||
*
|
||||
* This division is safe: we know that `limit` is strictly
|
||||
* greater than zero because of the slow-path test above.
|
||||
*/
|
||||
uint32_t residue = (uint32_t)(-limit) % limit;
|
||||
/*
|
||||
* Unless we get one of `N = (1 << 32) - residue` valid
|
||||
* values, we reject the sample. This `N` is a multiple of
|
||||
* `limit`, so our results will be unbiased; and `N` is the
|
||||
* largest multiple that fits in 32 bits, so rejections are as
|
||||
* rare as possible.
|
||||
*
|
||||
* There are `limit` possible values for the integer part of
|
||||
* our fixed-point number. Each one corresponds to `N/limit`
|
||||
* or `N/limit + 1` possible fraction parts. For our result to
|
||||
* be unbiased, every possible integer part must have the same
|
||||
* number of possible valid fraction parts. So, when we get
|
||||
* the superfluous value in the `N/limit + 1` cases, we need
|
||||
* to reject and resample.
|
||||
*
|
||||
* Because of the multiplication, the possible values in the
|
||||
* fraction part are equally spaced by `limit`, with varying
|
||||
* gaps at each end of the fraction's 32-bit range. We will
|
||||
* choose a range of size `N` (a multiple of `limit`) into
|
||||
* which valid fraction values must fall, with the rest of the
|
||||
* 32-bit range covered by the `residue`. Lemire's paper says
|
||||
* that exactly `N/limit` possible values spaced apart by
|
||||
* `limit` will fit into our size `N` valid range, regardless
|
||||
* of the size of the end gaps, the phase alignment of the
|
||||
* values, or the position of the range.
|
||||
*
|
||||
* So, when a fraction value falls in the `residue` outside
|
||||
* our valid range, it is superfluous, and we resample.
|
||||
*/
|
||||
while ((uint32_t)(num) < residue) {
|
||||
num = (uint64_t)next() * (uint64_t)limit;
|
||||
}
|
||||
}
|
||||
|
||||
return (r % upper_bound);
|
||||
/*
|
||||
* Return the integer part (upper 32 bits).
|
||||
*/
|
||||
return ((uint32_t)(num >> 32));
|
||||
}
|
||||
|
|
|
|||
30
lib/isc/random_p.h
Normal file
30
lib/isc/random_p.h
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
/*
|
||||
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
|
||||
*
|
||||
* SPDX-License-Identifier: MPL-2.0
|
||||
*
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
|
||||
*
|
||||
* See the COPYRIGHT file distributed with this work for additional
|
||||
* information regarding copyright ownership.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <isc/lang.h>
|
||||
|
||||
/*! \file isc/random_p.h
|
||||
* \brief For automatically seeding and re-seeding when required.
|
||||
*/
|
||||
|
||||
ISC_LANG_BEGINDECLS
|
||||
|
||||
void
|
||||
isc__random_initialize(void);
|
||||
/*!<
|
||||
* \brief Seed the thread-local random number state with fresh entropy.
|
||||
*/
|
||||
|
||||
ISC_LANG_ENDDECLS
|
||||
|
|
@ -87,6 +87,11 @@ _setup(void **state) {
|
|||
result = isc_test_begin(NULL, true, 0);
|
||||
assert_int_equal(result, ISC_R_SUCCESS);
|
||||
|
||||
/*
|
||||
* Ensure the RNG has been seeded.
|
||||
*/
|
||||
assert_int_not_equal(isc_random32(), 0);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@
|
|||
#include <isc/thread.h>
|
||||
#include <isc/util.h>
|
||||
|
||||
#include "random_p.h"
|
||||
#include "trampoline_p.h"
|
||||
|
||||
#define ISC__TRAMPOLINE_UNUSED 0
|
||||
|
|
@ -185,6 +186,11 @@ isc__trampoline_attach(isc__trampoline_t *trampoline) {
|
|||
* malloc() + free() calls altogether, as it would foil the fix.
|
||||
*/
|
||||
trampoline->jemalloc_enforce_init = malloc(8);
|
||||
|
||||
/*
|
||||
* Re-seed the random number generator in each thread.
|
||||
*/
|
||||
isc__random_initialize();
|
||||
}
|
||||
|
||||
isc_threadresult_t
|
||||
|
|
|
|||
Loading…
Reference in a new issue