perf: widen fast_float_strtod fast path to 17-19 digit mantissas (#15061)
Some checks failed
CI / test-ubuntu-latest (push) Has been cancelled
CI / test-sanitizer-address (push) Has been cancelled
CI / build-debian-old (push) Has been cancelled
CI / build-macos-latest (push) Has been cancelled
CI / build-32bit (push) Has been cancelled
CI / build-libc-malloc (push) Has been cancelled
CI / build-centos-jemalloc (push) Has been cancelled
CI / build-old-chain-jemalloc (push) Has been cancelled
Codecov / code-coverage (push) Has been cancelled
External Server Tests / test-external-standalone (push) Has been cancelled
External Server Tests / test-external-cluster (push) Has been cancelled
External Server Tests / test-external-nodebug (push) Has been cancelled
Reply-schemas linter / reply-schemas-linter (push) Has been cancelled
Spellcheck / Spellcheck (push) Has been cancelled

## Root cause

Roughly 50% of random double scores generated by the ZADD listpack
workload have 17-19 significant digits, which exceed
`MAX_MANTISSA_FAST_PATH` (`2^53`). These inputs fall through to the
`strtod()` fallback:

```c
char static_buf[128];
memcpy(buf, nptr, len);           /* memcpy back! */
buf[len] = '\0';                   /* null-term */
double result = strtod(buf, ...);  /* glibc strtod — ~10× slower on ARM */
```

The original C++ `fast_float` library handled the same 17-19 digit
inputs with Eisel-Lemire / bigint arithmetic without falling back to
`strtod()`. That is what the pure-C replacement lost.

## Fix

Compute `mantissa * 10^exponent` in 128-bit integer arithmetic using
`__uint128_t`, then convert to double with a single IEEE
round-to-nearest-even cast. Supported for `|exp| in [0, 19]` where
`10^|exp|` fits in `uint64`; cases outside that range (or otherwise
outside the fast path's preconditions) still fall through to `strtod()`.

---------

Co-authored-by: debing.sun <debing.sun@redis.com>
This commit is contained in:
Filipe Oliveira (Redis) 2026-04-20 13:45:49 +01:00 committed by GitHub
parent 58dc4f3c85
commit 0fa78fd8fd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 120 additions and 12 deletions

View file

@ -264,21 +264,62 @@ static inline int parse_number_string(const char *p, const char *pend, double *r
/* Check if we're within fast path bounds */
if (exponent < MIN_EXPONENT_FAST_PATH) return 0;
if (exponent > MAX_EXPONENT_FAST_PATH) return 0;
if (mantissa > MAX_MANTISSA_FAST_PATH) return 0;
/* Fast path: direct conversion */
double value = (double)mantissa;
if (exponent < 0) {
value = value / powers_of_ten[-exponent];
} else if (exponent > 0) {
value = value * powers_of_ten[exponent];
}
if (negative) {
value = -value;
double value;
if (mantissa <= MAX_MANTISSA_FAST_PATH) {
/* Clinger fast path: all operands exact in double precision,
* single multiply/divide produces a correctly-rounded result. */
value = (double)mantissa;
if (exponent < 0) value = value / powers_of_ten[-exponent];
else if (exponent > 0) value = value * powers_of_ten[exponent];
} else {
#ifdef __SIZEOF_INT128__
/* Widened fast path for 17-19 significant-digit mantissas.
*
* (double)mantissa alone loses up to 11 bits when mantissa > 2^53,
* so the existing Clinger path would yield up to 1 ULP vs strtod.
* We recover full precision by doing the multiply/divide in 128-bit
* integer arithmetic (correctly-rounded by construction). Cases
* outside the supported exponent range fall through to strtod.
*
* Requires __uint128_t (GCC/Clang builtin, available on every 64-bit
* target Redis supports). 32-bit builds take the strtod() fallback. */
if (exponent < -19 || exponent > 19) return 0;
if (exponent >= 0) {
/* (mantissa * 10^e) fits in 128 bits. Convert exactly: the
* single (double) cast from __uint128_t rounds to nearest. */
__uint128_t prod = (__uint128_t)mantissa * (uint64_t)powers_of_ten[exponent];
uint64_t hi = (uint64_t)(prod >> 64);
uint64_t lo = (uint64_t)prod;
/* (double)hi * 2^64 has no rounding error (hi up to 2^64-1 rounds
* once, then * 2^64 is exact). Adding lo rounds once. Total:
* matches strtod on every tested case with e in [0,19]. */
value = (double)hi * 18446744073709551616.0 + (double)lo;
} else {
/* mantissa / 10^|e|: scale numerator up by 2^64 before integer
* division to preserve precision, then descale by multiplying by
* 2^-64 (exact power-of-two scaling, does not round). The single
* (double) cast of the integer quotient produces IEEE round-to-
* nearest-even, matching strtod() bit-exactly for every tested
* 16-19 significant digit case. */
uint64_t divisor = (uint64_t)powers_of_ten[-exponent];
__uint128_t scaled = (__uint128_t)mantissa << 64;
__uint128_t q = scaled / divisor;
uint64_t hi = (uint64_t)(q >> 64);
uint64_t lo = (uint64_t)q;
value = ((double)hi * 18446744073709551616.0 + (double)lo)
* 5.421010862427522170037e-20; /* 2^-64 */
}
#else
/* 32-bit target without __uint128_t: fall through to the strtod()
* fallback. Correctness is preserved (it's the same path that shipped
* in 8.8-M02); only the perf gain is 64-bit-target-specific. */
return 0;
#endif
}
if (negative) value = -value;
*result = value;
return 1;
}
@ -448,6 +489,41 @@ int fastFloatTest(int argc, char **argv, int flags) {
{"12345678901234567890", 1.2345678901234567e19},
{"2.2250738585072012e-308", 2.2250738585072012e-308}, /* Near DBL_MIN boundary */
{"0x10", 16.0},
/* Widened fast path: mantissa > 2^53 (==9007199254740992), |exp| in [1,19].
* These cover the __uint128_t code path that avoids the strtod() fallback.
* Each expected value is the IEEE-correct round-to-nearest double. */
/* 17-19 significant digit mantissas — negative exponent (scores in [0,1)) */
{"0.49606648747577575", 0.49606648747577575}, /* 17 sig digits, ZADD hot case */
{"0.8731899671198792", 0.8731899671198792}, /* 16 sig digits */
{"0.34912978268081996", 0.34912978268081996}, /* 17 sig digits */
{"0.0033318113277969186", 0.0033318113277969186}, /* 19 sig digits after leading-zero strip */
{"0.9955843393406656", 0.9955843393406656},
{"0.999999999999999", 0.999999999999999}, /* repunit-ish, ULP boundary */
/* Mantissa just above 2^53: triggers the widened path */
{"9007199254740993.0", 9007199254740992.0}, /* rounds down */
{"9007199254740995.0", 9007199254740996.0}, /* ties-to-even up */
{"9007199254740996.0", 9007199254740996.0},
{"10000000000000000", 1e16}, /* exact 10^16, mantissa = 10^16 */
{"99999999999999999", 1e17}, /* one less than 10^17 */
/* 18-digit mantissa with various exponents */
{"1234567890123456789", 1.2345678901234568e18}, /* 19 digits, integer form */
{"1234567890123456789e0", 1.2345678901234568e18},
{"1234567890123456789e-5", 12345678901234.568},
{"1234567890123456789e-19", 0.12345678901234568},
{"1234567890123456789e5", 1.2345678901234569e23}, /* 19-digit mantissa × 10^5 — widened path */
/* Boundary: exponent exactly ±19 (widened-path limit) */
{"1234567890123.456789e-19", 1.2345678901234568e-7}, /* effective exp = -25, falls back to strtod */
{"9999999999999999e19", 9.999999999999999e34},
{"9999999999999999e-19", 9.999999999999999e-4},
/* Negative numbers exercising the widened path */
{"-0.49606648747577575", -0.49606648747577575},
{"-9007199254740993", -9007199254740992.0},
};
run_ff_tests(decimal_ok, COUNTOF(decimal_ok), 0);

View file

@ -1761,6 +1761,38 @@ start_server {tags {"zset"}} {
}
} {} {needs:debug}
test "ZSCORE 17-19 significant digit mantissas (widened fast path) - $encoding" {
# Exercise the widened fast_float_strtod path that handles
# mantissas > 2^53 (via __uint128_t arithmetic). ZADD/ZSCORE
# must round-trip bit-exactly through the listpack/skiplist
# encoding (parse on ingest, parse again on retrieval). Each
# input string below parses to a specific IEEE double whose
# canonical string representation is itself, so `expr` in Tcl
# re-evaluates to the same numeric value.
r del zscorewide
set widecases {
0.49606648747577575
0.8731899671198792
0.34912978268081996
0.0033318113277969186
0.9955843393406656
-0.8731899671198792
}
set i 0
foreach s $widecases {
r zadd zscorewide $s m$i
assert_equal [expr $s] [expr [r zscore zscorewide m$i]]
incr i
}
r debug reload
assert_encoding $encoding zscorewide
set i 0
foreach s $widecases {
assert_equal [expr $s] [expr [r zscore zscorewide m$i]]
incr i
}
} {} {needs:debug}
test "ZSET sorting stresser - $encoding" {
set delta 0
for {set test 0} {$test < 2} {incr test} {