From 5b38d54ca7d89c76ed3a8c344fbd94b526ac9b5b Mon Sep 17 00:00:00 2001 From: Henry Filgueiras <33638747+henry-filgueiras@users.noreply.github.com> Date: Mon, 20 Apr 2026 13:55:25 -0700 Subject: [PATCH 1/2] Add dict-rehash-during-command LATENCY event Operators can observe slow commands via SLOWLOG and LATENCY HISTOGRAM but have no signal for dict rehash work that runs synchronously inside command execution. _dictRehashStep and _dictRehashStepIfNeeded amortize rehash work across lookups/inserts/deletes during an active resize; that cost is currently invisible. This change adds a new named LATENCY event, "dict-rehash-during-command", reusing the existing latency-monitor-threshold gating. Implementation: * dict.c: a file-scope uint64_t accumulator (dictRehashStepElapsedUs), declared extern in dict.h. A second file-scope int flag (dictRehashStepTiming), also extern, gates the timing path end-to-end so that when latency monitoring is disabled no clock is read and no accumulator write happens - only a single predictable branch remains. Both _dictRehashStep and _dictRehashStepIfNeeded time the bucket-rehash work with the existing elapsedStart/elapsedUs helpers and add into the accumulator. Timing is also skipped while getMonotonicUs is still NULL, since core module registration can run dictAdd before monotonicInit() in initServer(). * server.c call(): at the outermost call() entry (execution_nesting == 0) the accumulator is zeroed and dictRehashStepTiming is set to (latency_monitor_threshold != 0). At the outermost exit, adjacent to the existing durationAddSample, the event is emitted via latencyAddSampleIfNeeded and dictRehashStepTiming is cleared so cron work between commands does not pay the timing cost. Nested call() from EXEC, Lua, and module RM_Call accumulate into the shared counter; only the outermost command emits, matching durationAddSample semantics. No new config, no new command, no SLOWLOG schema change. Cron rehash goes through dictRehashMicroseconds (a distinct code path) and is not instrumented by this event. FLUSHDB's bulk rehash is likewise not covered. --- src/dict.c | 33 ++++++++++++++++++++++++++++++++- src/dict.h | 2 ++ src/server.c | 21 ++++++++++++++++++++- 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/src/dict.c b/src/dict.c index d0885ff88..79b1ab25e 100644 --- a/src/dict.c +++ b/src/dict.c @@ -44,6 +44,18 @@ static dictResizeEnable dict_can_resize = DICT_RESIZE_ENABLE; static unsigned int dict_force_resize_ratio = 4; +/* Accumulator of microseconds spent in incremental rehash steps performed + * inside dict lookup/insert/delete operations. Read and reset by higher + * layers (server.c call()) to attribute per-command rehash cost. */ +uint64_t dictRehashStepElapsedUs = 0; + +/* Non-zero when latency monitoring is active + * (server.latency_monitor_threshold != 0). Set by server.c at the outermost + * call() entry and cleared at the outermost call() exit. While zero, the + * rehash timing path is skipped entirely: no clock reads, no accumulator + * writes - just one predictable branch. */ +int dictRehashStepTiming = 0; + /* -------------------------- types ----------------------------------------- */ struct dictEntry { struct dictEntry *next; /* Must be first */ @@ -466,7 +478,19 @@ int dictRehashMicroseconds(dict *d, uint64_t us) { * dictionary so that the hash table automatically migrates from H1 to H2 * while it is actively used. */ static void _dictRehashStep(dict *d) { - if (d->pauserehash == 0) dictRehash(d,1); + if (d->pauserehash == 0) { + /* Skip the timing path when latency monitoring is off, and also + * when getMonotonicUs is still NULL during early startup (core + * module registration runs dictAdd before monotonicInit). */ + if (!dictRehashStepTiming || getMonotonicUs == NULL) { + dictRehash(d,1); + return; + } + monotime t; + elapsedStart(&t); + dictRehash(d,1); + dictRehashStepElapsedUs += elapsedUs(t); + } } /* Performs rehashing on a single bucket. */ @@ -1705,6 +1729,12 @@ static void _dictShrinkIfNeeded(dict *d) static void _dictRehashStepIfNeeded(dict *d, uint64_t visitedIdx) { if ((!dictIsRehashing(d)) || (d->pauserehash != 0)) return; + /* Skip the timing path when latency monitoring is off, and also when + * getMonotonicUs is still NULL during early startup (core module + * registration runs dictAdd before monotonicInit). */ + const int time_it = dictRehashStepTiming && (getMonotonicUs != NULL); + monotime t; + if (time_it) elapsedStart(&t); /* rehashing not in progress if rehashidx == -1 */ if ((long)visitedIdx >= d->rehashidx && d->ht_table[0][visitedIdx]) { /* If we have a valid hash entry at `idx` in ht0, we perform @@ -1715,6 +1745,7 @@ static void _dictRehashStepIfNeeded(dict *d, uint64_t visitedIdx) { * on the rehashidx (not CPU cache friendly). */ dictRehash(d,1); } + if (time_it) dictRehashStepElapsedUs += elapsedUs(t); } /* Our hash table capability is a power of two */ diff --git a/src/dict.h b/src/dict.h index 25e4cf1bd..30e204492 100644 --- a/src/dict.h +++ b/src/dict.h @@ -273,6 +273,8 @@ void dictEmpty(dict *d, void(callback)(dict*)); void dictSetResizeEnabled(dictResizeEnable enable); int dictRehash(dict *d, int n); int dictRehashMicroseconds(dict *d, uint64_t us); +extern uint64_t dictRehashStepElapsedUs; +extern int dictRehashStepTiming; void dictSetHashFunctionSeed(uint8_t *seed); unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *privdata); unsigned long dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctions *defragfns, void *privdata); diff --git a/src/server.c b/src/server.c index a284f5784..e3c122966 100644 --- a/src/server.c +++ b/src/server.c @@ -3949,6 +3949,17 @@ void call(client *c, int flags) { /* Pass current server.ustime to avoid ustime() call if monotonic clock is used * and time will be updated before command execution based on monotonic clock. */ const long long call_timer = use_hw_clock ? server.ustime : ustime(); + /* Arm dict-rehash attribution at the outermost call() only. The + * accumulator starts at zero so that only rehash work performed during + * this top-level command's proc is attributed to it, and the timing + * flag is set so that the dict-side path starts collecting samples + * while this command runs. When the feature is disabled + * (latency_monitor_threshold == 0), the flag stays 0 and the entire + * timing path is a single predictable branch. */ + if (server.execution_nesting == 0) { + dictRehashStepElapsedUs = 0; + dictRehashStepTiming = (server.latency_monitor_threshold != 0); + } enterExecutionUnit(1, call_timer); /* setting the CLIENT_EXECUTING_COMMAND flag so we will avoid @@ -4005,8 +4016,16 @@ void call(client *c, int flags) { char *latency_event = (real_cmd->flags & CMD_FAST) ? "fast-command" : "command"; latencyAddSampleIfNeeded(latency_event,duration/1000); - if (server.execution_nesting == 0) + if (server.execution_nesting == 0) { durationAddSample(EL_DURATION_TYPE_CMD, duration); + latencyAddSampleIfNeeded("dict-rehash-during-command", + (long long)(dictRehashStepElapsedUs/1000)); + /* Disarm the dict-rehash timing path between top-level calls so + * any rehash work done in cron (e.g. via active expire) does not + * pay the timing cost. The accumulator is zeroed again at the + * next top-level entry. */ + dictRehashStepTiming = 0; + } } /* Log the command into the Slow log if needed. From dd97d597448a1c8158dc96826466cd46ba1bc135 Mon Sep 17 00:00:00 2001 From: Henry Filgueiras <33638747+henry-filgueiras@users.noreply.github.com> Date: Mon, 20 Apr 2026 15:56:28 -0700 Subject: [PATCH 2/2] Disarm dict-rehash timing unconditionally at top-level call() exit The disarm of dictRehashStepTiming was inside the if (update_command_stats) block, which is skipped during AOF loading and other contexts where command stats are suppressed. Across those paths the flag stayed armed, so every incremental rehash step continued to pay the getMonotonicUs() cost and wrote into the accumulator that was never read back. The arm/disarm pair is state-machine cleanup, not a stats emission. Move the disarm out of the stats block, guarded only by execution_nesting == 0, so it runs on every outermost call() exit. Emission remains where it was: inside the stats block, adjacent to durationAddSample. Restores the "zero-cost when latency monitoring is disabled" invariant on all code paths through call(), including AOF replay. --- src/server.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/server.c b/src/server.c index e3c122966..06a10334c 100644 --- a/src/server.c +++ b/src/server.c @@ -4020,14 +4020,16 @@ void call(client *c, int flags) { durationAddSample(EL_DURATION_TYPE_CMD, duration); latencyAddSampleIfNeeded("dict-rehash-during-command", (long long)(dictRehashStepElapsedUs/1000)); - /* Disarm the dict-rehash timing path between top-level calls so - * any rehash work done in cron (e.g. via active expire) does not - * pay the timing cost. The accumulator is zeroed again at the - * next top-level entry. */ - dictRehashStepTiming = 0; } } + /* Disarm the dict-rehash timing path unconditionally at outermost call() + * exit, so cron work between commands does not pay the timing cost. This + * must run regardless of update_command_stats (e.g. during AOF loading) + * so the flag cannot stay armed across paths that skip the stats block. */ + if (server.execution_nesting == 0) + dictRehashStepTiming = 0; + /* Log the command into the Slow log if needed. * If the client is blocked we will handle slowlog when it is unblocked. */ if (update_command_stats && !(c->flags & CLIENT_BLOCKED))