From d22c68f9042e966bdbcaeab7bd3e62c9ad5ea3f4 Mon Sep 17 00:00:00 2001 From: Moti Cohen Date: Tue, 7 Apr 2026 12:15:26 +0300 Subject: [PATCH 01/32] Partial support set keymeta on ksn (#15004) As part of KSN, modules must not modify keys. However, RediSearch modifies key metadata in some flows, which may invalidate the local kvobj pointer. Introduce KSN_INVALIDATE_KVOBJ() to explicitly invalidate kvobj after notifications, preventing further access by Redis core. Currently relevant for hash keys without HFE. Changes: - Add KSN_INVALIDATE_KVOBJ() to guard unsafe flows - Apply invalidation beyond hash-specific paths - Extend KSN side-effect coverage for DELEX and MOVE - Rearrange flows to avoid kvobj access after notification - Include additional tests from @JoanFM (#14939) Behavior: No intended behavior change and no reordering of notifications. --- src/cluster.c | 1 + src/db.c | 16 +- src/expire.c | 2 + src/keymeta.c | 16 +- src/server.h | 8 + src/t_hash.c | 89 +++++++---- tests/modules/keymeta_notify.c | 23 ++- .../unit/moduleapi/ksn_notify_side_effect.tcl | 151 ++++++++++++++++-- 8 files changed, 237 insertions(+), 69 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index 04ba14647..b831c203a 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -321,6 +321,7 @@ void restoreCommand(client *c) { objectSetLRUOrLFU(kv, lfu_freq, lru_idle, lru_clock, 1000); keyModified(c,c->db,key,NULL,1); notifyKeyspaceEvent(NOTIFY_GENERIC,"restore",key,c->db->id); + KSN_INVALIDATE_KVOBJ(kv); /* If we deleted a key that means REPLACE parameter was passed and the * destination key existed. */ diff --git a/src/db.c b/src/db.c index 1565b7bef..7d7a3e96b 100644 --- a/src/db.c +++ b/src/db.c @@ -1522,6 +1522,7 @@ void delexCommand(client *c) { rewriteClientCommandVector(c, 2, shared.del, key); keyModified(c, c->db, key, NULL, 1); notifyKeyspaceEvent(NOTIFY_GENERIC, "del", key, c->db->id); + KSN_INVALIDATE_KVOBJ(o); server.dirty++; } @@ -2248,10 +2249,9 @@ void renameGenericCommand(client *c, int nx) { keyModified(c,c->db,c->argv[1],NULL,1); keyModified(c,c->db,c->argv[2],o,1); - notifyKeyspaceEvent(NOTIFY_GENERIC,"rename_from", - c->argv[1],c->db->id); - notifyKeyspaceEvent(NOTIFY_GENERIC,"rename_to", - c->argv[2],c->db->id); + notifyKeyspaceEvent(NOTIFY_GENERIC, "rename_from", c->argv[1],c->db->id); + notifyKeyspaceEvent(NOTIFY_GENERIC, "rename_to", c->argv[2],c->db->id); + KSN_INVALIDATE_KVOBJ(o); if (overwritten) { notifyKeyspaceEvent(NOTIFY_OVERWRITTEN, "overwritten", c->argv[2], c->db->id); if (desttype != srctype) @@ -2346,10 +2346,9 @@ void moveCommand(client *c) { keyModified(c,src,c->argv[1],NULL,1); keyModified(c,dst,c->argv[1],kv,1); - notifyKeyspaceEvent(NOTIFY_GENERIC, - "move_from",c->argv[1],src->id); - notifyKeyspaceEvent(NOTIFY_GENERIC, - "move_to",c->argv[1],dst->id); + notifyKeyspaceEvent(NOTIFY_GENERIC, "move_from", c->argv[1],src->id); + notifyKeyspaceEvent(NOTIFY_GENERIC, "move_to", c->argv[1],dst->id); + KSN_INVALIDATE_KVOBJ(kv); server.dirty++; addReply(c,shared.cone); @@ -2471,6 +2470,7 @@ void copyCommand(client *c) { /* OK! key copied. Signal modification */ keyModified(c,dst,c->argv[2],kvCopy,1); notifyKeyspaceEvent(NOTIFY_GENERIC,"copy_to",c->argv[2],dst->id); + KSN_INVALIDATE_KVOBJ(kvCopy); /* `delete` implies the destination key was overwritten */ if (delete) { diff --git a/src/expire.c b/src/expire.c index f784c9f88..0400fedc2 100644 --- a/src/expire.c +++ b/src/expire.c @@ -836,6 +836,7 @@ void expireGenericCommand(client *c, long long basetime, int unit) { keyModified(c,c->db,key,kv,1); notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",key,c->db->id); + KSN_INVALIDATE_KVOBJ(kv); server.dirty++; return; } @@ -913,6 +914,7 @@ void persistCommand(client *c) { if (removeExpire(c->db,c->argv[1])) { keyModified(c,c->db,c->argv[1],kv,1); notifyKeyspaceEvent(NOTIFY_GENERIC,"persist",c->argv[1],c->db->id); + KSN_INVALIDATE_KVOBJ(kv); addReply(c,shared.cone); server.dirty++; } else { diff --git a/src/keymeta.c b/src/keymeta.c index ca6878307..e4430da2c 100644 --- a/src/keymeta.c +++ b/src/keymeta.c @@ -743,25 +743,25 @@ KeyMetaClassId keyMetaClassCreate(RedisModule *context, const char *name, /* Check for name conflicts using 4-char name. Allow reuse of RELEASED; forbid if INUSE. */ int alreayReleased; - int slot = keyMetaClassLookupByName(name, &alreayReleased); + int keyMetaId = keyMetaClassLookupByName(name, &alreayReleased); if (alreayReleased) { - /* If already released, then reuse the slot. */ + /* If already released, then reuse the keyMetaId. */ } else { /* Assert class is registered for first time */ - serverAssert(slot == -1); + serverAssert(keyMetaId == -1); - /* Find free slot */ + /* Find free keyMetaId */ for (int i = KEY_META_ID_MODULE_FIRST; i <= KEY_META_ID_MODULE_LAST; i++) { if (keyMetaClass[i].state == CLASS_STATE_FREE) { - slot = i; + keyMetaId = i; break; } } - if (slot == -1) return 0; /* no free slots */ + if (keyMetaId == -1) return 0; /* no free keyMetaId */ } - KeyMetaClass *pKeyMetaClass = &keyMetaClass[slot]; + KeyMetaClass *pKeyMetaClass = &keyMetaClass[keyMetaId]; /* Store 4-char short name */ memcpy(pKeyMetaClass->name, name, KM_NAME_LEN); @@ -774,7 +774,7 @@ KeyMetaClassId keyMetaClassCreate(RedisModule *context, const char *name, pKeyMetaClass->state = CLASS_STATE_INUSE; pKeyMetaClass->classSpecEncoded = classSpecEncoded; KM_SET_CONST_CONF(pKeyMetaClass->conf) = *conf; /* Copy config as is. */ - return slot; /* Return handle (1..7). */ + return keyMetaId; /* Return handle (1..7). */ } /* Destroy (release) a class by its ID. Returns 1 on success, 0 on failure. */ diff --git a/src/server.h b/src/server.h index 72036f6b6..f191e068e 100644 --- a/src/server.h +++ b/src/server.h @@ -3825,6 +3825,14 @@ void notifyKeyspaceEvent(int type, const char *event, robj *key, int dbid); int keyspaceEventsStringToFlags(char *classes); sds keyspaceEventsFlagsToString(int flags); +/* As part of KSN the module should not attempt to modify the key. Nevertheless, + * RediSearch does it in some specific flows and modifies key metadata which in + * turn might invalidates the local kvobj pointer. Those specific flows are + * protected by the following macro which invalidates the local kvobj pointer + * after the notification to prevent further access to it (Currently it is only + * using it with hash type keys, without hash field expiration) */ +#define KSN_INVALIDATE_KVOBJ(o) do { (o) = NULL; } while (0) + /* Configuration */ /* Configuration Flags */ #define MODIFIABLE_CONFIG 0 /* This is the implied default for a standard diff --git a/src/t_hash.c b/src/t_hash.c index 01e4a7807..acfa6c6a9 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -2105,6 +2105,7 @@ void hsetnxCommand(client *c) { if (server.memory_tracking_enabled) updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), kv, oldsize, kvobjAllocSize(kv)); notifyKeyspaceEvent(NOTIFY_HASH, "hset", c->argv[1], c->db->id); + KSN_INVALIDATE_KVOBJ(kv); server.dirty++; } @@ -2142,6 +2143,7 @@ void hsetCommand(client *c) { if (server.memory_tracking_enabled) updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), kv, oldsize, kvobjAllocSize(kv)); notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id); + KSN_INVALIDATE_KVOBJ(kv); server.dirty += (c->argc - 2)/2; } @@ -2470,6 +2472,7 @@ out: updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, oldsize, kvobjAllocSize(o)); /* Emit keyspace notifications based on field expiry, mutation, or key deletion */ if (fields_set || expired) { + newlen = (int64_t) hashTypeLength(o, 0); keyModified(c, c->db, c->argv[1], o, 1); if (expired) notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", c->argv[1], c->db->id); @@ -2478,18 +2481,20 @@ out: if (deleted || updated) notifyKeyspaceEvent(NOTIFY_HASH, deleted ? "hdel" : "hexpire", c->argv[1], c->db->id); } + + KSN_INVALIDATE_KVOBJ(o); + + /* Key may become empty due to lazy expiry in hashTypeGetValue() + * or the new expiration time is in the past.*/ + if (newlen == 0) { + newlen = -1; + /* Del key but don't update KEYSIZES. else it will decr wrong bin in histogram */ + dbDeleteSkipKeysizesUpdate(c->db, c->argv[1]); + notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); + } + if (oldlen != newlen) + updateKeysizesHist(c->db, OBJ_HASH, oldlen, newlen); } - /* Key may become empty due to lazy expiry in hashTypeExists() - * or the new expiration time is in the past.*/ - newlen = (int64_t) hashTypeLength(o, 0); - if (newlen == 0) { - newlen = -1; - /* Del key but don't update KEYSIZES. else it will decr wrong bin in histogram */ - dbDeleteSkipKeysizesUpdate(c->db, c->argv[1]); - notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); - } - if (oldlen != newlen) - updateKeysizesHist(c->db, OBJ_HASH, oldlen, newlen); } void hincrbyCommand(client *c) { @@ -2540,6 +2545,7 @@ void hincrbyCommand(client *c) { addReplyLongLong(c,value); keyModified(c,c->db,c->argv[1], o, 1); notifyKeyspaceEvent(NOTIFY_HASH,"hincrby",c->argv[1],c->db->id); + KSN_INVALIDATE_KVOBJ(o); server.dirty++; } @@ -2598,6 +2604,7 @@ void hincrbyfloatCommand(client *c) { addReplyBulkCBuffer(c,buf,len); keyModified(c,c->db,c->argv[1],o,1); notifyKeyspaceEvent(NOTIFY_HASH,"hincrbyfloat",c->argv[1],c->db->id); + KSN_INVALIDATE_KVOBJ(o); server.dirty++; /* Always replicate HINCRBYFLOAT as an HSETEX command with the final value @@ -2737,8 +2744,20 @@ void hgetdelCommand(client *c) { if (expired == 0 && deleted == 0) return; + int64_t newlen = (int64_t) hashTypeLength(o, 0); + /* del key if become empty */ + int delete_key = (newlen == 0); + /* update new len for keysizes histogram */ + int64_t hist_newlen = delete_key ? -1 : newlen; + if (oldlen != hist_newlen) + updateKeysizesHist(c->db, OBJ_HASH, oldlen, hist_newlen); + /* update memory tracking */ if (server.memory_tracking_enabled) updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, oldsize, kvobjAllocSize(o)); + /* is it last HFE */ + if (!delete_key && hfe && (hashTypeIsFieldsWithExpire(o) == 0)) + estoreRemove(c->db->subexpires, getKeySlot(c->argv[1]->ptr), o); + keyModified(c, c->db, c->argv[1], o, 1); if (expired) @@ -2755,21 +2774,14 @@ void hgetdelCommand(client *c) { rewriteClientCommandArgument(c, 2, NULL); /* Delete arg */ } + KSN_INVALIDATE_KVOBJ(o); + /* Key may have become empty because of deleting fields or lazy expire. */ - int64_t newlen = (int64_t) hashTypeLength(o, 0); - if (newlen == 0) { - newlen = -1; + if (delete_key) { /* Del key but don't update KEYSIZES. else it will decr wrong bin in histogram */ dbDeleteSkipKeysizesUpdate(c->db, c->argv[1]); notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); - } else { - if (hfe && (hashTypeIsFieldsWithExpire(o) == 0)) { /*is it last HFE*/ - estoreRemove(c->db->subexpires, getKeySlot(kvobjGetKey(o)), o); - } } - - if (oldlen != newlen) - updateKeysizesHist(c->db, OBJ_HASH, oldlen, newlen); } /* Get the value of one or more fields of a given hash key and optionally set @@ -2938,33 +2950,40 @@ void hdelCommand(client *c) { if (hashTypeDelete(o,c->argv[j]->ptr)) { deleted++; if (hashTypeLength(o, 0) == 0) { - if (server.memory_tracking_enabled) - updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, oldsize, kvobjAllocSize(o)); - /* del key but don't update KEYSIZES. Else it will decr wrong bin in histogram */ - dbDeleteSkipKeysizesUpdate(c->db, c->argv[1]); keyremoved = 1; break; } } } + if (!keyremoved && o->encoding == OBJ_ENCODING_HT) { dictResumeAutoResize((dict*)o->ptr); dictShrinkIfNeeded((dict*)o->ptr); } - if (server.memory_tracking_enabled && !keyremoved) + if (server.memory_tracking_enabled) updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, oldsize, kvobjAllocSize(o)); if (deleted) { - int64_t newLen = -1; /* The value -1 indicates that the key is deleted. */ + /* Update keysizes histogram */ + int64_t newLen = (int64_t) hashTypeLength(o, 0); + updateKeysizesHist(c->db, OBJ_HASH, oldLen, keyremoved ? -1 : newLen); + + if (keyremoved) { + /* del key but don't update KEYSIZES. Else it will decr wrong bin in histogram */ + dbDeleteSkipKeysizesUpdate(c->db, c->argv[1]); + } else { + /* is it last HFE */ + if (isHFE && (hashTypeIsFieldsWithExpire(o) == 0)) + estoreRemove(c->db->subexpires, getKeySlot(c->argv[1]->ptr), o); + } + + /* Signal key modification */ keyModified(c, c->db, c->argv[1], keyremoved ? NULL : o, 1); notifyKeyspaceEvent(NOTIFY_HASH,"hdel",c->argv[1],c->db->id); - if (keyremoved) { - notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); - } else { - if (isHFE && (hashTypeIsFieldsWithExpire(o) == 0)) /* is it last HFE */ - estoreRemove(c->db->subexpires, getKeySlot(c->argv[1]->ptr), o); - newLen = oldLen - deleted; - } - updateKeysizesHist(c->db, OBJ_HASH, oldLen, newLen); + + KSN_INVALIDATE_KVOBJ(o); /* Invalidate local kvobj pointer */ + + /* Notify del event if key was deleted */ + if (keyremoved) notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); server.dirty += deleted; } addReplyLongLong(c,deleted); diff --git a/tests/modules/keymeta_notify.c b/tests/modules/keymeta_notify.c index 189646766..b02949c6e 100644 --- a/tests/modules/keymeta_notify.c +++ b/tests/modules/keymeta_notify.c @@ -1,10 +1,10 @@ -/* Test module: SetKeyMeta during keyspace notification callback. +/* Test module for KSN paths that must tolerate keymeta writes. * - * This module registers keyspace notification callbacks for HASH, STRING, - * GENERIC, EXPIRED, and EVICTED events that write to key metadata (via - * RedisModule_SetKeyMeta). It is used to verify that commands remain safe - * when a notification callback modifies key metadata, which may trigger - * kvobj reallocation. + * In general, keyspace notification callbacks must not perform write + * operations. However, Search module modifies key metadata as part of KSN, so + * this module exercises the subset of KSN flows that must remain resilient to + * such keymeta modifications, including cases that may trigger kvobj + * reallocation. * * Commands: * KEYMETANOTIFY.GET - Get the metadata value attached to a key @@ -146,3 +146,14 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) return REDISMODULE_OK; } + +int RedisModule_OnUnload(RedisModuleCtx *ctx) { + REDISMODULE_NOT_USED(ctx); + + if (meta_class_id >= 0) { + RedisModule_ReleaseKeyMetaClass(meta_class_id); + meta_class_id = -1; + } + + return REDISMODULE_OK; +} diff --git a/tests/unit/moduleapi/ksn_notify_side_effect.tcl b/tests/unit/moduleapi/ksn_notify_side_effect.tcl index 5d920a4fd..22f837589 100644 --- a/tests/unit/moduleapi/ksn_notify_side_effect.tcl +++ b/tests/unit/moduleapi/ksn_notify_side_effect.tcl @@ -1,17 +1,12 @@ # Test for SetKeyMeta during keyspace notification (KSN) callbacks. # -# This test loads a module that registers KSN callbacks for HASH, STRING, -# GENERIC, EXPIRED, and EVICTED events. The callback writes to key metadata -# (via RedisModule_SetKeyMeta), which may trigger kvobj reallocation. -# It exercises various commands across these notification types to catch -# regressions where the kvobj pointer becomes stale after a notification -# callback reallocates it. -# -# Important: each test uses a fresh key so that SetKeyMeta triggers an actual -# kvobj reallocation (the first metadata attachment grows the kvobj). We verify -# this by checking that the setcount increases after each command. -# Each test also validates that the metadata is properly accessible after the -# operation by reading it back via RedisModule_GetKeyMeta. +# On key space notification, the module shouldn't modify the key. This focused +# regression tests makes an exception for RediSearch which uses SetKeyMeta +# as part of its KSN callback (Currently only for hash keys without hash field +# expiration). The test module mutates key metadata during selected notifications, +# which may reallocate the underlying kvobj and invalidates any local pointer to +# it. Each test uses fresh keys when possible so the first metadata write forces +# the reallocation-sensitive path, then verifies the command still completes. set testmodule [file normalize tests/modules/keymeta_notify.so] @@ -91,6 +86,107 @@ start_server {tags {"modules" "external:skip"}} { assert {[r keymetanotify.setcount] >= $before + 100} } + test {HGETDEL with SetKeyMeta in notification does not crash} { + # To test the "first SetKeyMeta causes kvobj reallocation" scenario, + # create the key BEFORE loading the module so the first metadata + # attachment happens during HGETDEL, not during HSET. + r module unload keymetanotify + r HSET hgetdel_key f1 v1 f2 v2 f3 v3 + r module load $testmodule + + # HGETDEL returns the value and deletes the field + # This is the first SetKeyMeta call for this key, triggering kvobj reallocation + set before [r keymetanotify.setcount] + set result [r HGETDEL hgetdel_key FIELDS 1 f1] + assert_equal $result "v1" + assert_equal [r HEXISTS hgetdel_key f1] 0 + assert_equal [r HLEN hgetdel_key] 2 + # SetKeyMeta should be called during the hdel notification + assert {[r keymetanotify.setcount] > $before} + assert_equal [r keymetanotify.get hgetdel_key] "notified" + + # HGETDEL multiple fields + set result [r HGETDEL hgetdel_key FIELDS 2 f2 f3] + assert_equal [lindex $result 0] "v2" + assert_equal [lindex $result 1] "v3" + assert_equal [r HLEN hgetdel_key] 0 + } + + test {HDEL with SetKeyMeta in notification does not crash} { + # To test the "first SetKeyMeta causes kvobj reallocation" scenario, + # create the key BEFORE loading the module so the first metadata + # attachment happens during HDEL, not during HSET. + r module unload keymetanotify + r HSET hdel_key f1 v1 f2 v2 f3 v3 + r module load $testmodule + + # HDEL single field - this is the first SetKeyMeta call for this key, + # triggering kvobj reallocation during the hdel notification + set before [r keymetanotify.setcount] + r HDEL hdel_key f1 + assert_equal [r HEXISTS hdel_key f1] 0 + assert_equal [r HLEN hdel_key] 2 + # SetKeyMeta should be called during the hdel notification + assert {[r keymetanotify.setcount] > $before} + assert_equal [r keymetanotify.get hdel_key] "notified" + + # HDEL multiple fields (in-place metadata update) + r HDEL hdel_key f2 f3 + assert_equal [r HLEN hdel_key] 0 + } + + # --- GENERIC notification tests --- + + test {PERSIST with SetKeyMeta in notification does not crash} { + # Create key with expiration + set before [r keymetanotify.setcount] + r SET persist_key "value" + r EXPIRE persist_key 1000 + assert_equal [r keymetanotify.get persist_key] "notified" + assert {[r keymetanotify.setcount] > $before} + + # Verify TTL is set + assert {[r TTL persist_key] > 0} + + # PERSIST removes expiration + set before [r keymetanotify.setcount] + r PERSIST persist_key + # persist notification triggers SetKeyMeta + assert {[r keymetanotify.setcount] > $before} + + # Verify TTL is removed + assert_equal [r TTL persist_key] -1 + assert_equal [r GET persist_key] "value" + } + + test {COPY with SetKeyMeta in notification does not crash} { + # Create source key + set before [r keymetanotify.setcount] + r HSET copy_src_key f1 v1 f2 v2 + assert_equal [r keymetanotify.get copy_src_key] "notified" + assert {[r keymetanotify.setcount] > $before} + + # COPY to new key + set before [r keymetanotify.setcount] + r COPY copy_src_key copy_dst_key + # copy_to notification triggers SetKeyMeta on destination + assert_equal [r keymetanotify.get copy_dst_key] "notified" + assert {[r keymetanotify.setcount] > $before} + + # Verify both keys have same content + assert_equal [r HGET copy_src_key f1] "v1" + assert_equal [r HGET copy_dst_key f1] "v1" + assert_equal [r HGET copy_src_key f2] "v2" + assert_equal [r HGET copy_dst_key f2] "v2" + + # COPY with REPLACE + r HSET copy_src_key f3 v3 + set before [r keymetanotify.setcount] + r COPY copy_src_key copy_dst_key REPLACE + assert {[r keymetanotify.setcount] > $before} + assert_equal [r HGET copy_dst_key f3] "v3" + } + # --- STRING notification tests --- # Each test uses a fresh key for actual kvobj reallocation. @@ -164,6 +260,37 @@ start_server {tags {"modules" "external:skip"}} { assert_equal [r EXISTS del_key] 0 } + test {DELEX with SetKeyMeta in notification does not crash} { + r SET delex_key "value" + assert_equal [r keymetanotify.get delex_key] "notified" + r DELEX delex_key IFEQ value + assert_equal [r EXISTS delex_key] 0 + } + + test {MOVE with SetKeyMeta in notification does not crash} { + r select 10 + r DEL move_key + r select 9 + + # Create the key before loading the module so the first metadata + # attachment happens during MOVE, not during SET. + r module unload keymetanotify + r SET move_key "value" + r module load $testmodule + + set before [r keymetanotify.setcount] + r MOVE move_key 10 + assert_equal [r EXISTS move_key] 0 + + r select 10 + assert_equal [r GET move_key] "value" + assert_equal [r keymetanotify.get move_key] "notified" + assert {[r keymetanotify.setcount] > $before} + r DEL move_key + r select 9 + set _ {} + } {} {singledb:skip} + test {RENAME with SetKeyMeta in notification does not crash} { r SET rename_src "value" r RENAME rename_src rename_dst From 153b79a290bd37c5ab2ff45737fee1db2e29e8f3 Mon Sep 17 00:00:00 2001 From: Moti Cohen Date: Tue, 7 Apr 2026 12:31:53 +0300 Subject: [PATCH 02/32] keymeta: add DEBUG flag for runtime keymeta class registration (#14968) M_CreateKeyMetaClass() allows registration only on: - 'DEBUG enable-module-keymeta-runtime-registration 1' (replaces server.enable_debug_cmd) - REDISMODULE_CTX_FLAGS_SERVER_STARTUP, in addition to module->onload --- src/debug.c | 7 +++++++ src/module.c | 14 +++++++++----- src/server.c | 1 + src/server.h | 1 + tests/unit/moduleapi/keymeta.tcl | 10 +++++++--- tests/unit/moduleapi/ksn_notify_side_effect.tcl | 3 ++- 6 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/debug.c b/src/debug.c index 14f8cfe73..29ae88298 100644 --- a/src/debug.c +++ b/src/debug.c @@ -436,6 +436,8 @@ void debugCommand(client *c) { " Show low level info about `key` and associated value.", "DROP-CLUSTER-PACKET-FILTER ", " Drop all packets that match the filtered type. Set to -1 allow all packets.", +"ENABLE-KEYMETA-RUNTIME-REGISTRATION <0|1>", +" Allow keymeta class registration outside server startup (for testing).", "OOM", " Crash the server simulating an out-of-memory error.", "PANIC", @@ -927,6 +929,11 @@ NULL { server.skip_checksum_validation = atoi(c->argv[2]->ptr); addReply(c,shared.ok); + } else if (!strcasecmp(c->argv[1]->ptr,"enable-keymeta-runtime-registration") && + c->argc == 3) + { + server.allow_keymeta_registration = atoi(c->argv[2]->ptr); + addReply(c,shared.ok); } else if (!strcasecmp(c->argv[1]->ptr,"aof-flush-sleep") && c->argc == 3) { diff --git a/src/module.c b/src/module.c index e3971195b..fc28b37a1 100644 --- a/src/module.c +++ b/src/module.c @@ -4480,9 +4480,10 @@ int RM_SetAbsExpire(RedisModuleKey *key, mstime_t expire) { * * Note: the metadata class name "AAAAAAAAA" is reserved and produces an error. * - * If RM_CreateKeyMetaClass() is called outside of RedisModule_OnLoad() function, - * there is already a metadata class registered with the same name, - * or if the metadata class name or metaver is invalid, a negative value is returned. + * If RM_CreateKeyMetaClass() is called outside of RedisModule_OnLoad() function + * and outside of server startup, there is already a metadata class registered + * with the same name, or if the metadata class name or metaver is invalid, + * a negative value is returned. * Otherwise the new metadata class is registered into Redis, and a reference of * type RedisModuleKeyMetaClassId is returned: the caller of the function should store * this reference into a global variable to make future use of it in the @@ -4503,8 +4504,11 @@ RedisModuleKeyMetaClassId RM_CreateKeyMetaClass(RedisModuleCtx *ctx, { RedisModuleKeyMetaClassId id; - /* Allow registration only OnLoad (and when debug commands disabled) */ - if ((!ctx->module->onload) && (server.enable_debug_cmd == PROTECTED_ACTION_ALLOWED_NO)) + /* Allow registration during OnLoad, server startup, or when debug flag is set */ + int ctx_flags = RM_GetContextFlags(ctx); + if (!ctx->module->onload && + !(ctx_flags & REDISMODULE_CTX_FLAGS_SERVER_STARTUP) && + !server.allow_keymeta_registration) return -1; if (!confPtr) diff --git a/src/server.c b/src/server.c index fc5eb135a..7568e242c 100644 --- a/src/server.c +++ b/src/server.c @@ -2342,6 +2342,7 @@ void initServerConfig(void) { server.allow_access_expired = 0; server.allow_access_trimmed = 0; server.skip_checksum_validation = 0; + server.allow_keymeta_registration = 0; server.loading = 0; server.async_loading = 0; server.loading_rdb_used_mem = 0; diff --git a/src/server.h b/src/server.h index f191e068e..055a516c4 100644 --- a/src/server.h +++ b/src/server.h @@ -2158,6 +2158,7 @@ struct redisServer { int active_defrag_enabled; int sanitize_dump_payload; /* Enables deep sanitization for ziplist and listpack in RDB and RESTORE. */ int skip_checksum_validation; /* Disable checksum validation for RDB and RESTORE payload. */ + int allow_keymeta_registration; /* Allow keymeta class registration outside server startup (for testing). */ int jemalloc_bg_thread; /* Enable jemalloc background thread */ int active_defrag_configuration_changed; /* defrag configuration has been changed and need to reconsider * active_defrag_running in computeDefragCycles. */ diff --git a/tests/unit/moduleapi/keymeta.tcl b/tests/unit/moduleapi/keymeta.tcl index ebb778426..cc3462389 100644 --- a/tests/unit/moduleapi/keymeta.tcl +++ b/tests/unit/moduleapi/keymeta.tcl @@ -99,6 +99,7 @@ proc flushallAndVerifyCleanup {} { start_server {tags {"modules" "external:skip" "cluster:skip"} overrides {enable-debug-command yes}} { r module load $testmodule + r debug enable-keymeta-runtime-registration 1 array set classesSpec {} set classesSpec(1) "KEEPONCOPY:KEEPONRENAME:KEEPONMOVE:ALLOWIGNORE:RDBLOAD:RDBSAVE" @@ -763,6 +764,7 @@ test "RDB: Load with different module registration order preserves metadata corr # metadata values should still be correctly associated with their classes. start_server {tags {"modules" "external:skip" "cluster:skip"} overrides {enable-debug-command yes}} { r module load $testmodule + r debug enable-keymeta-runtime-registration 1 # Helper function to generate class names (needed in inner scope) proc cname {id} { return "CLS$id" } @@ -805,6 +807,7 @@ test "RDB: Load with different module registration order preserves metadata corr # INNER SERVER: Start new server, register classes in DIFFERENT order, then load RDB start_server [list overrides [list dir $rdb_dir enable-debug-command yes]] { r module load $testmodule + r debug enable-keymeta-runtime-registration 1 # Helper function to generate class names (needed in inner scope) proc cname {id} { return "CLS$id" } @@ -866,6 +869,7 @@ test "RDB: File size same with/without metadata when no rdb_save callback" { start_server {tags {"modules" "external:skip" "cluster:skip"} overrides {enable-debug-command yes}} { r module load $testmodule + r debug enable-keymeta-runtime-registration 1 # Get RDB directory set rdb_dir [lindex [r config get dir] 1] @@ -900,11 +904,11 @@ test "RDB: File size same with/without metadata when no rdb_save callback" { } {} {external:skip needs:save} test "Creating key metadata not during OnLoad should fail" { - # This time start_server without "enable-debug-command yes" + # Start server without enabling keymeta runtime registration debug flag start_server {tags {"modules" "external:skip" "cluster:skip"} overrides {enable-debug-command no}} { r module load $testmodule - # Creating a class not during OnLoad should fail + # Creating a class not during server startup should fail catch {r keymeta.register [cname 1] 1 "ALLOWIGNORE"} err - assert_match {*failed to create metadata class*} $err + assert_match {*failed to create metadata class*} $err } } {} {external:skip needs:save} diff --git a/tests/unit/moduleapi/ksn_notify_side_effect.tcl b/tests/unit/moduleapi/ksn_notify_side_effect.tcl index 22f837589..f17a1c688 100644 --- a/tests/unit/moduleapi/ksn_notify_side_effect.tcl +++ b/tests/unit/moduleapi/ksn_notify_side_effect.tcl @@ -10,7 +10,8 @@ set testmodule [file normalize tests/modules/keymeta_notify.so] -start_server {tags {"modules" "external:skip"}} { +start_server {tags {"modules" "external:skip"} overrides {enable-debug-command yes}} { + r debug enable-keymeta-runtime-registration 1 r module load $testmodule # --- HASH notification tests --- From 747dfe578e6d204e288c7915edd919bc791d9d9b Mon Sep 17 00:00:00 2001 From: Sergei Georgiev Date: Tue, 7 Apr 2026 14:17:53 +0300 Subject: [PATCH 03/32] Add XNACK command for releasing stream messages back to the group (#14797) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Overview This PR enhances Redis Streams consumer groups by adding a new `XNACK` command that allows consumers to explicitly release pending messages back to the group without acknowledging them. Released (NACKed) entries become immediately available for re-delivery to other consumers, eliminating the idle-timeout delay currently required for message recovery. The command supports three modes — SILENT, FAIL, and FATAL — giving consumers fine-grained control over delivery counter semantics to handle graceful shutdowns, transient failures, and poison messages respectively. ### Problem Statement For developers using Redis Streams with consumer groups, there are several common scenarios where a consumer needs to release a message it has claimed without acknowledging it: 1. **Transient internal failures**: A consumer may fail to process a message because of problems unrelated to the message itself — for example, it cannot connect to an external service to fetch required context. The message is perfectly valid and should be retried promptly by another consumer. 2. **Resource pressure**: A consumer under resource stress (low CPU, low memory) may be unable to handle a specific message (e.g., a complex or large message) within acceptable QoS. It should leave the opportunity to other consumers in the group, with minimal delay. 3. **Graceful shutdown**: A consumer about to shut down would like to immediately release all unprocessed messages it has claimed, so they can be picked up by remaining consumers without waiting for idle timeouts. 4. **Poison / malicious messages**: A consumer may detect or suspect that a claimed message is invalid or malicious and wants to mark it as permanently failed (for dead-letter queue processing when available). **Currently, a consumer cannot NACK a message.** It can either: - **XACK** it — marks it as "processed" and removes it from the PEL entirely, losing the ability to redeliver it - **Leave it pending** — requires other consumers to discover it via `XPENDING` and claim it via `XCLAIM`/`XAUTOCLAIM` or `XREADGROUP CLAIM` after the idle timeout expires, introducing a long, unnecessary delay In all these cases, the logic that applications must implement introduces **message handling delays**, **implementation complexity**, and **code duplication** across consumer implementations. ### Solution Introduces a new `XNACK` (Negative ACKnowledge) command that explicitly releases pending messages from their owning consumer back to the group's PEL, making them immediately claimable via `XCLAIM` and `XAUTOCLAIM`, and prioritized for re-delivery in `XREADGROUP CLAIM`: ``` XNACK key group IDS numids id [id ...] [RETRYCOUNT count] [FORCE] ``` When executed, the command: 1. **Disassociates** the entry from its owning consumer (`consumer = NULL`) 2. **Repositions** the entry to the head of the PEL time-ordered list (`delivery_time = 0`), making it immediately claimable with any `min-idle-time` threshold 3. **Adjusts the delivery counter** based on the specified mode, giving consumers fine-grained control over retry semantics 4. **Returns** the count of successfully NACKed entries **Mode** controls the delivery counter adjustment and communicates the reason for the NACK: | Mode | Delivery Counter Behavior | Use Case | |----------|---------------------------------------------------|---------------------------------------------| | `SILENT` | Decrement by 1 (undo the delivery increment) | Consumer shutdown / transient internal error — the delivery "didn't count" | | `FAIL` | No change (keep the incremented value) | Message too complex for this consumer, but may work for others — count this as an attempt | | `FATAL` | Set to `LLONG_MAX` | Invalid / suspected malicious message — mark as permanently failed | The three modes map directly to the real-world scenarios above: - **SILENT** for graceful shutdown or transient failures unrelated to the message - **FAIL** for resource-constrained consumers that cannot handle a specific message - **FATAL** for poison message detection and dead-letter queue integration **Optional parameters:** - **`RETRYCOUNT count`**: Directly sets `delivery_count` to the specified value, overriding the mode-based adjustment - **`FORCE`**: Creates new unowned PEL entries for IDs that are not already in the group PEL (the entry must exist in the stream). When `FORCE` creates an entry, the delivery counter is set to `0` (or to `RETRYCOUNT` if specified, or to `LLONG_MAX` if mode is `FATAL`). This is used internally for AOF rewrite and replication. ### Response Format The command returns an integer — the number of messages successfully NACKed (released back to the group PEL): ``` 127.0.0.1:6379> XADD mystream 1-0 f v1 "1-0" 127.0.0.1:6379> XADD mystream 2-0 f v2 "2-0" 127.0.0.1:6379> XGROUP CREATE mystream grp 0 OK 127.0.0.1:6379> XREADGROUP GROUP grp c1 STREAMS mystream > 1) 1) "mystream" 2) 1) 1) "1-0" 2) 1) "f" 2) "v1" 2) 1) "2-0" 2) 1) "f" 2) "v2" 127.0.0.1:6379> XNACK mystream grp FAIL IDS 2 1-0 2-0 (integer) 2 ``` After XNACK, the entries appear with an empty consumer in XPENDING output: ``` 127.0.0.1:6379> XPENDING mystream grp - + 10 1) 1) "1-0" 2) "" 3) (integer) -1 4) (integer) 1 2) 1) "2-0" 2) "" 3) (integer) -1 4) (integer) 1 ``` ### NACK Zone: Data Structure Extension To support unowned PEL entries and ensure they are prioritized for re-delivery, a **NACK zone** is introduced at the head of the existing PEL time-ordered doubly-linked list. A new `pel_nack_tail` pointer is added to the `streamCG` structure: **PEL ordering:** ``` [pel_time_head] <-> ... <-> [pel_nack_tail] <-> [owned entries...] <-> [pel_time_tail] |_____________ NACK zone ______________| |_______ normal PEL ________| ``` The head of the PEL contains all NACKed messages (FIFO-ordered), followed by all delivered messages that were not NACKed (same order as today). This ensures NACKed messages are always prioritized over idle pending messages. The delivery order for `XREADGROUP` is therefore: 1. If `CLAIM` was specified: first deliver NACKed messages, then deliver due pending messages (current behavior) 2. Deliver new entries after the group's last-delivered-id (current behavior) **Structure Design:** - NACKed entries occupy positions from `pel_time_head` to `pel_nack_tail` in the time-ordered list - Their `delivery_time` is set to `0`, ensuring they always appear "oldest" and are immediately claimable - Their `consumer` pointer is set to `NULL`, marking them as unowned - `pel_nack_tail` is `NULL` when no NACKed entries exist **Key Properties:** - **O(1) insertion**: New NACKed entries are inserted right after `pel_nack_tail` (or at the list head if the zone is empty) - **FIFO ordering** among NACKed entries: entries are NACKed in the order they are released - **Immediate claimability**: Since `delivery_time = 0`, NACKed entries have maximum idle time and satisfy any `min-idle-time` threshold in `XCLAIM` and `XAUTOCLAIM`, In `XREADGROUP CLAIM`, NACKed entries are also prioritized over other pending entries due to their position at the head of the PEL. - **Zone integrity**: The `pelListInsertSorted` function is updated to stop scanning at the `pel_nack_tail` boundary, ensuring owned entries are never placed inside the NACK zone ### Impact on Existing Commands All commands that interact with the PEL are updated to handle unowned (`consumer = NULL`) entries: - **XPENDING**: Shows NACKed entries with an empty consumer name - **XCLAIM / XAUTOCLAIM**: Can claim NACKed entries (they satisfy any min-idle-time since `delivery_time = 0`) - **XREADGROUP CLAIM**: NACKed entries are picked up by the claim phase - **XACK**: Works correctly on NACKed entries (removes from group PEL) - **XINFO STREAM FULL**: Displays NACKed entries with an empty consumer name - **XGROUP DELCONSUMER**: Unaffected — NACKed entries are not in any consumer's PEL Propagation is also updated: when `XCLAIM` or `XAUTOCLAIM` encounters a deleted stream entry for an unowned NACK, it propagates `XACK` (instead of `XCLAIM`) to replicas and AOF, since there is no source consumer to reference. ### Persistence **RDB:** - A new RDB type `RDB_TYPE_STREAM_LISTPACKS_5` (type 27) is introduced - After saving consumer PEL entries, the NACK zone stream IDs are saved separately (count + encoded IDs) - On load, NACK zone entries are reconstructed by looking them up in the group PEL, unlinking from their sorted position, and re-inserting into the NACK zone via `pelListInsertNacked` - Backward compatibility is preserved: old RDB types continue to load with the existing validation (all entries must have consumers) **AOF:** - AOF rewrite emits `XNACK FAIL IDS RETRYCOUNT FORCE` commands for entries in the NACK zone - Consecutive entries with the same `delivery_count` are batched into a single command (up to `AOF_REWRITE_ITEMS_PER_CMD` IDs per command) ### Defragmentation The defragmentation logic is restructured to handle unowned entries: - **`defragStreamCGPendingEntry`** (new): Walks the group-level PEL rax, defragments each NACK, updates the doubly-linked list pointers (`pel_prev`, `pel_next`), `pel_time_head`, `pel_time_tail`, `pel_nack_tail`, and the consumer PEL back-pointer for owned entries - **`defragStreamConsumerPendingEntry`** (simplified): Only fixes up back-pointers to the possibly-relocated consumer and CG, since actual defragmentation is now done at the group-level walk. Unowned (NACK zone) entries have no consumer PEL walk, so the group-level pass is their only chance ### Key Benefits - **Immediate re-delivery**: NACKed entries are instantly claimable by other consumers via `XCLAIM` and `XAUTOCLAIM` (since `delivery_time = 0` satisfies any `min-idle-time`), and prioritized for re-delivery in `XREADGROUP CLAIM`, eliminating idle-time delays that can range from seconds to minutes - **Explicit release semantics**: Consumers can release messages intentionally, with fine-grained control over retry behavior — a capability that exists in competing systems like RabbitMQ - **Flexible retry control**: Three modes (SILENT, FAIL, FATAL) plus RETRYCOUNT cover the full spectrum of failure handling strategies, from graceful shutdown to poison message detection - **Reduced application complexity**: Eliminates the need for application-level workarounds involving XPENDING polling, arbitrary idle timeouts, and manual XCLAIM orchestration - **Dead-letter queue readiness**: FATAL mode + delivery count enables straightforward poison message detection and future DLQ integration - **Backward compatibility**: Fully optional new command with zero breaking changes to existing behavior --- src/aof.c | 66 ++ src/commands.def | 46 +- src/commands/xinfo-stream.json | 8 + src/commands/xnack.json | 102 ++ src/defrag.c | 66 +- src/rdb.c | 96 +- src/rdb.h | 5 +- src/redis-check-rdb.c | 1 + src/server.c | 1 + src/server.h | 3 +- src/stream.h | 8 + src/t_stream.c | 420 +++++++-- tests/support/util.tcl | 2 +- tests/unit/type/stream-cgroups.tcl | 1396 +++++++++++++++++++++++++++- 14 files changed, 2084 insertions(+), 136 deletions(-) create mode 100644 src/commands/xnack.json diff --git a/src/aof.c b/src/aof.c index b489608d4..a094d11ca 100644 --- a/src/aof.c +++ b/src/aof.c @@ -2197,6 +2197,35 @@ int rioWriteStreamPendingEntry(rio *r, robj *key, const char *groupname, size_t return 1; } +/* Helper for rewriteStreamObject(): emit a single XNACK FORCE command that + * reconstructs one or more NACKed (unowned) PEL entries sharing the same + * delivery_count. `ids` points to an array of `count` streamIDs (at most + * AOF_REWRITE_ITEMS_PER_CMD). Returns 0 on error, 1 on success. */ +int rioWriteStreamNackedEntries(rio *r, robj *key, const char *groupname, + size_t groupname_len, streamID *ids, + int count, uint64_t delivery_count) { + serverAssert(count > 0 && count <= AOF_REWRITE_ITEMS_PER_CMD); + + /* XNACK FAIL IDS RETRYCOUNT FORCE + * 6 fixed tokens before IDs + count IDs + 3 fixed tokens after. */ + if (rioWriteBulkCount(r,'*',6+count+3) == 0) return 0; + if (rioWriteBulkString(r,"XNACK",5) == 0) return 0; + if (rioWriteBulkObject(r,key) == 0) return 0; + if (rioWriteBulkString(r,groupname,groupname_len) == 0) return 0; + if (rioWriteBulkString(r,"FAIL",4) == 0) return 0; + if (rioWriteBulkString(r,"IDS",3) == 0) return 0; + if (rioWriteBulkLongLong(r,count) == 0) return 0; + + for (int i = 0; i < count; i++) { + if (rioWriteBulkStreamID(r,&ids[i]) == 0) return 0; + } + + if (rioWriteBulkString(r,"RETRYCOUNT",10) == 0) return 0; + if (rioWriteBulkLongLong(r,delivery_count) == 0) return 0; + if (rioWriteBulkString(r,"FORCE",5) == 0) return 0; + return 1; +} + /* Helper for rewriteStreamObject(): emit the XGROUP CREATECONSUMER is * needed in order to create consumers that do not have any pending entries. * All this in the context of the specified key and group. */ @@ -2354,6 +2383,43 @@ int rewriteStreamObject(rio *r, robj *key, robj *o) { raxStop(&ri_pel); } raxStop(&ri_cons); + + /* Emit XNACK FORCE for NACKed (unowned) entries from the + * NACK zone of the PEL time-ordered list + * (pel_time_head..pel_nack_tail). Consecutive entries with + * the same delivery_count are batched into a single command. + * + * nack_stop is the first node outside the NACK zone (or NULL + * when the zone extends to the end of the PEL). When + * pel_nack_tail is NULL (no NACKed entries) the guard below + * skips the whole block. */ + streamNACK *nack_end = group->pel_nack_tail; + if (nack_end != NULL) { + streamID batch_ids[AOF_REWRITE_ITEMS_PER_CMD]; + streamNACK *nack_stop = nack_end->pel_next; + streamNACK *nack = group->pel_time_head; + int batch_count = 0; + uint64_t batch_dc = 0; + while (nack && nack != nack_stop) { + if (batch_count == 0) batch_dc = nack->delivery_count; + batch_ids[batch_count++] = nack->id; + streamNACK *next = nack->pel_next; + if (batch_count >= AOF_REWRITE_ITEMS_PER_CMD || + !next || next == nack_stop || + next->delivery_count != batch_dc) + { + if (rioWriteStreamNackedEntries(r,key,(char*)ri.key, + ri.key_len,batch_ids, + batch_count,batch_dc) == 0) + { + raxStop(&ri); + return 0; + } + batch_count = 0; + } + nack = next; + } + } } raxStop(&ri); } diff --git a/src/commands.def b/src/commands.def index 3980365d4..07e1dccc6 100644 --- a/src/commands.def +++ b/src/commands.def @@ -10574,6 +10574,7 @@ commandHistory XINFO_STREAM_History[] = { {"7.0.0","Added the `max-deleted-entry-id`, `entries-added`, `recorded-first-entry-id`, `entries-read` and `lag` fields"}, {"7.2.0","Added the `active-time` field, and changed the meaning of `seen-time`."}, {"8.6.0","Added the `idmp-duration`, `idmp-maxsize`, `pids-tracked`, `iids-tracked`, `iids-added` and `iids-duplicates` fields for IDMP tracking."}, +{"8.8.0","Added the `nacked-count` field to consumer groups in `FULL` output."}, }; #endif @@ -10606,7 +10607,7 @@ struct COMMAND_STRUCT XINFO_Subcommands[] = { {MAKE_CMD("consumers","Returns a list of the consumers in a consumer group.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XINFO_CONSUMERS_History,1,XINFO_CONSUMERS_Tips,1,xinfoCommand,4,CMD_READONLY,ACL_CATEGORY_STREAM,XINFO_CONSUMERS_Keyspecs,1,NULL,2),.args=XINFO_CONSUMERS_Args}, {MAKE_CMD("groups","Returns a list of the consumer groups of a stream.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XINFO_GROUPS_History,1,XINFO_GROUPS_Tips,0,xinfoCommand,3,CMD_READONLY,ACL_CATEGORY_STREAM,XINFO_GROUPS_Keyspecs,1,NULL,1),.args=XINFO_GROUPS_Args}, {MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XINFO_HELP_History,0,XINFO_HELP_Tips,0,xinfoCommand,2,CMD_LOADING|CMD_STALE,ACL_CATEGORY_STREAM,XINFO_HELP_Keyspecs,0,NULL,0)}, -{MAKE_CMD("stream","Returns information about a stream.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XINFO_STREAM_History,4,XINFO_STREAM_Tips,0,xinfoCommand,-3,CMD_READONLY,ACL_CATEGORY_STREAM,XINFO_STREAM_Keyspecs,1,NULL,2),.args=XINFO_STREAM_Args}, +{MAKE_CMD("stream","Returns information about a stream.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XINFO_STREAM_History,5,XINFO_STREAM_Tips,0,xinfoCommand,-3,CMD_READONLY,ACL_CATEGORY_STREAM,XINFO_STREAM_Keyspecs,1,NULL,2),.args=XINFO_STREAM_Args}, {0} }; @@ -10651,6 +10652,48 @@ struct COMMAND_ARG XLEN_Args[] = { {MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, }; +/********** XNACK ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* XNACK history */ +#define XNACK_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* XNACK tips */ +#define XNACK_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* XNACK key specs */ +keySpec XNACK_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* XNACK mode argument table */ +struct COMMAND_ARG XNACK_mode_Subargs[] = { +{MAKE_ARG("silent",ARG_TYPE_PURE_TOKEN,-1,"SILENT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("fail",ARG_TYPE_PURE_TOKEN,-1,"FAIL",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("fatal",ARG_TYPE_PURE_TOKEN,-1,"FATAL",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* XNACK ids argument table */ +struct COMMAND_ARG XNACK_ids_Subargs[] = { +{MAKE_ARG("numids",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("id",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/* XNACK argument table */ +struct COMMAND_ARG XNACK_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("group",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("mode",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,3,NULL),.subargs=XNACK_mode_Subargs}, +{MAKE_ARG("ids",ARG_TYPE_BLOCK,-1,"IDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=XNACK_ids_Subargs}, +{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"RETRYCOUNT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)}, +{MAKE_ARG("force",ARG_TYPE_PURE_TOKEN,-1,"FORCE",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)}, +}; + /********** XPENDING ********************/ #ifndef SKIP_CMD_HISTORY_TABLE @@ -11984,6 +12027,7 @@ struct COMMAND_STRUCT redisCommandTable[] = { {MAKE_CMD("xidmprecord","An internal command for setting IDMP metadata on an existing stream message.","O(1)","8.6.2",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XIDMPRECORD_History,0,XIDMPRECORD_Tips,0,xidmprecordCommand,5,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_STREAM,XIDMPRECORD_Keyspecs,1,NULL,4),.args=XIDMPRECORD_Args}, {MAKE_CMD("xinfo","A container for stream introspection commands.","Depends on subcommand.","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XINFO_History,0,XINFO_Tips,0,NULL,-2,0,0,XINFO_Keyspecs,0,NULL,0),.subcommands=XINFO_Subcommands}, {MAKE_CMD("xlen","Return the number of messages in a stream.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XLEN_History,0,XLEN_Tips,0,xlenCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_STREAM,XLEN_Keyspecs,1,NULL,1),.args=XLEN_Args}, +{MAKE_CMD("xnack","Releases claimed messages back to the group's PEL without acknowledging them, making them available for re-delivery.","O(1) for each message ID processed.","8.8.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XNACK_History,0,XNACK_Tips,0,xnackCommand,-7,CMD_WRITE|CMD_FAST,ACL_CATEGORY_STREAM,XNACK_Keyspecs,1,NULL,6),.args=XNACK_Args}, {MAKE_CMD("xpending","Returns the information and entries from a stream consumer group's pending entries list.","O(N) with N being the number of elements returned, so asking for a small fixed number of entries per call is O(1). O(M), where M is the total number of entries scanned when used with the IDLE filter. When the command returns just the summary and the list of consumers is small, it runs in O(1) time; otherwise, an additional O(N) time for iterating every consumer.","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XPENDING_History,1,XPENDING_Tips,1,xpendingCommand,-3,CMD_READONLY,ACL_CATEGORY_STREAM,XPENDING_Keyspecs,1,NULL,3),.args=XPENDING_Args}, {MAKE_CMD("xrange","Returns the messages from a stream within a range of IDs.","O(N) with N being the number of elements being returned. If N is constant (e.g. always asking for the first 10 elements with COUNT), you can consider it O(1).","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XRANGE_History,1,XRANGE_Tips,0,xrangeCommand,-4,CMD_READONLY,ACL_CATEGORY_STREAM,XRANGE_Keyspecs,1,NULL,4),.args=XRANGE_Args}, {MAKE_CMD("xread","Returns messages from multiple streams with IDs greater than the ones requested. Blocks until a message is available otherwise.",NULL,"5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XREAD_History,0,XREAD_Tips,0,xreadCommand,-4,CMD_BLOCKING|CMD_READONLY,ACL_CATEGORY_STREAM,XREAD_Keyspecs,1,xreadGetKeys,3),.args=XREAD_Args}, diff --git a/src/commands/xinfo-stream.json b/src/commands/xinfo-stream.json index 50f2dacca..65b948a6e 100644 --- a/src/commands/xinfo-stream.json +++ b/src/commands/xinfo-stream.json @@ -22,6 +22,10 @@ [ "8.6.0", "Added the `idmp-duration`, `idmp-maxsize`, `pids-tracked`, `iids-tracked`, `iids-added` and `iids-duplicates` fields for IDMP tracking." + ], + [ + "8.8.0", + "Added the `nacked-count` field to consumer groups in `FULL` output." ] ], "function": "xinfoCommand", @@ -298,6 +302,10 @@ "description": "total number of unacknowledged entries", "type": "integer" }, + "nacked-count": { + "description": "number of entries currently in the nacked zone", + "type": "integer" + }, "pending": { "description": "data about all of the unacknowledged entries", "type": "array", diff --git a/src/commands/xnack.json b/src/commands/xnack.json new file mode 100644 index 000000000..59c6a3477 --- /dev/null +++ b/src/commands/xnack.json @@ -0,0 +1,102 @@ +{ + "XNACK": { + "summary": "Releases claimed messages back to the group's PEL without acknowledging them, making them available for re-delivery.", + "complexity": "O(1) for each message ID processed.", + "group": "stream", + "since": "8.8.0", + "arity": -7, + "function": "xnackCommand", + "command_flags": [ + "WRITE", + "FAST" + ], + "acl_categories": [ + "STREAM" + ], + "key_specs": [ + { + "flags": [ + "RW", + "UPDATE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "group", + "type": "string" + }, + { + "name": "mode", + "type": "oneof", + "arguments": [ + { + "name": "silent", + "type": "pure-token", + "token": "SILENT" + }, + { + "name": "fail", + "type": "pure-token", + "token": "FAIL" + }, + { + "name": "fatal", + "type": "pure-token", + "token": "FATAL" + } + ] + }, + { + "name": "ids", + "token": "IDS", + "type": "block", + "arguments": [ + { + "name": "numids", + "type": "integer" + }, + { + "name": "id", + "type": "string", + "multiple": true + } + ] + }, + { + "token": "RETRYCOUNT", + "name": "count", + "type": "integer", + "optional": true + }, + { + "name": "force", + "token": "FORCE", + "type": "pure-token", + "optional": true + } + ], + "reply_schema": { + "description": "The number of messages successfully NACKed (released back to the group PEL).", + "type": "integer", + "minimum": 0 + } + } +} diff --git a/src/defrag.c b/src/defrag.c index b80d1ef28..f3ca5acba 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -855,51 +855,52 @@ void defragRadixTree(rax **raxref, int defrag_data, raxDefragFunction *element_c raxStop(&ri); } -typedef struct { - streamCG *cg; - streamConsumer *c; -} PendingEntryContext; - void* defragStreamConsumerPendingEntry(raxIterator *ri, void *privdata) { - PendingEntryContext *ctx = privdata; + streamConsumer *c = privdata; + streamNACK *nack = ri->data; + /* NACKs are already defragged by the CG PEL walk (defragStreamCGPendingEntry). + * cgroup_ref_node->value is also updated there for all NACKs (including + * unowned NACK-zone entries that have no consumer PEL walk). + * Here we only fix up the back-pointer to the possibly-relocated consumer. */ + nack->consumer = c; + return NULL; +} + +void* defragStreamCGPendingEntry(raxIterator *ri, void *privdata) { + streamCG *cg = privdata; streamNACK *nack = ri->data, *newnack; - nack->consumer = ctx->c; /* update nack pointer to consumer */ - nack->cgroup_ref_node->value = ctx->cg; /* Update the value of cgroups_ref node to the consumer group. */ + /* Update cgroup_ref_node to the possibly-relocated CG for every NACK. + * Consumer-owned entries will get this overwritten again redundantly by + * defragStreamConsumerPendingEntry; unowned (NACK zone) entries have no + * consumer PEL walk, so this is their only chance. */ + nack->cgroup_ref_node->value = cg; newnack = activeDefragAlloc(nack); if (newnack) { - /* Update consumer group pointer to the nack. */ - void *prev; - raxInsert(ctx->cg->pel, ri->key, ri->key_len, newnack, &prev); - serverAssert(prev==nack); - - /* Update the doubly-linked list pointers in adjacent nacks. - * When we move a nack to a new address, we need to update the - * pel_prev->pel_next and pel_next->pel_prev pointers. */ + /* If this NACK is owned by a consumer, update the consumer's PEL. */ + if (newnack->consumer) { + void *prev; + raxInsert(newnack->consumer->pel, ri->key, ri->key_len, newnack, &prev); + serverAssert(prev == nack); + } if (newnack->pel_prev) { newnack->pel_prev->pel_next = newnack; } else { - /* This is the head of the list */ - ctx->cg->pel_time_head = newnack; + cg->pel_time_head = newnack; } if (newnack->pel_next) { newnack->pel_next->pel_prev = newnack; } else { - /* This is the tail of the list */ - ctx->cg->pel_time_tail = newnack; + cg->pel_time_tail = newnack; + } + if (cg->pel_nack_tail == nack) { + cg->pel_nack_tail = newnack; } } return newnack; } -typedef struct { - stream *s; - streamCG *cg; -} StreamConsumerContext; - void* defragStreamConsumer(raxIterator *ri, void *privdata) { - StreamConsumerContext *ctx = privdata; - stream *s = ctx->s; - streamCG *cg = ctx->cg; + stream *s = privdata; streamConsumer *c = ri->data; void *newc = activeDefragAlloc(c); if (newc) { @@ -911,8 +912,7 @@ void* defragStreamConsumer(raxIterator *ri, void *privdata) { if (c->pel) { /* Update pel back-pointer to new stream */ c->pel->alloc_size = &s->alloc_size; - PendingEntryContext pel_ctx = {cg, c}; - defragRadixTree(&c->pel, 0, defragStreamConsumerPendingEntry, &pel_ctx); + defragRadixTree(&c->pel, 0, defragStreamConsumerPendingEntry, c); } return newc; /* returns NULL if c was not defragged */ } @@ -925,14 +925,12 @@ void* defragStreamConsumerGroup(raxIterator *ri, void *privdata) { if (cg->pel) { /* Update pel back-pointer to new stream */ cg->pel->alloc_size = &s->alloc_size; - defragRadixTree(&cg->pel, 0, NULL, NULL); + defragRadixTree(&cg->pel, 0, defragStreamCGPendingEntry, cg); } - /* pel_time_head/tail are just pointers to NACKs in pel, no separate defrag needed */ if (cg->consumers) { /* Update consumers back-pointer to new stream */ cg->consumers->alloc_size = &s->alloc_size; - StreamConsumerContext consumer_ctx = {s, cg}; - defragRadixTree(&cg->consumers, 0, defragStreamConsumer, &consumer_ctx); + defragRadixTree(&cg->consumers, 0, defragStreamConsumer, s); } return cg; } diff --git a/src/rdb.c b/src/rdb.c index e9429d28f..574f96b9a 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -712,7 +712,7 @@ int rdbSaveObjectType(rio *rdb, robj *o) { } else serverPanic("Unknown hash encoding"); case OBJ_STREAM: - return rdbSaveType(rdb,RDB_TYPE_STREAM_LISTPACKS_4); + return rdbSaveType(rdb,RDB_TYPE_STREAM_LISTPACKS_5); case OBJ_MODULE: return rdbSaveType(rdb,RDB_TYPE_MODULE_2); default: @@ -1351,6 +1351,29 @@ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) { return -1; } nwritten += n; + + /* Save NACK zone: count followed by the IDs of NACKed entries. */ + uint64_t nacked_count = pelListNackedCount(cg); + if ((n = rdbSaveLen(rdb, nacked_count)) == -1) { + raxStop(&ri); + return -1; + } + nwritten += n; + + if (cg->pel_nack_tail) { + streamNACK *nack = cg->pel_time_head; + while (nack) { + unsigned char buf[sizeof(streamID)]; + streamEncodeID(buf, &nack->id); + if ((n = rdbWriteRaw(rdb, buf, sizeof(buf))) == -1) { + raxStop(&ri); + return -1; + } + nwritten += n; + if (nack == cg->pel_nack_tail) break; + nack = nack->pel_next; + } + } } raxStop(&ri); } @@ -3092,7 +3115,8 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) } else if (rdbtype == RDB_TYPE_STREAM_LISTPACKS || rdbtype == RDB_TYPE_STREAM_LISTPACKS_2 || rdbtype == RDB_TYPE_STREAM_LISTPACKS_3 || - rdbtype == RDB_TYPE_STREAM_LISTPACKS_4) + rdbtype == RDB_TYPE_STREAM_LISTPACKS_4 || + rdbtype == RDB_TYPE_STREAM_LISTPACKS_5) { o = createStreamObject(); stream *s = o->ptr; @@ -3387,21 +3411,67 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) } } - /* Verify that each PEL eventually got a consumer assigned to it. */ - if (deep_integrity_validation) { - raxIterator ri_cg_pel; - raxStart(&ri_cg_pel,cgroup->pel); - raxSeek(&ri_cg_pel,"^",NULL,0); - while(raxNext(&ri_cg_pel)) { - streamNACK *nack = ri_cg_pel.data; - if (!nack->consumer) { - raxStop(&ri_cg_pel); - rdbReportCorruptRDB("Stream CG PEL entry without consumer"); + /* For RDB_TYPE_STREAM_LISTPACKS_5 and above, load the NACK + * zone stream IDs and reconstruct the NACK zone. Entries with + * delivery_time == 0 may exist for both nacked and owned PEL + * entries, so we cannot rely on a simple walk — we use the + * stored IDs to unlink each nacked entry from its sorted + * position and re-insert it into the NACK zone. */ + if (rdbtype >= RDB_TYPE_STREAM_LISTPACKS_5) { + uint64_t nacked_count = rdbLoadLen(rdb, NULL); + if (nacked_count == RDB_LENERR) { + rdbReportReadError("Stream NACK zone count loading failed."); + decrRefCount(o); + return NULL; + } + + /* Load each NACKed entry's stream ID, look it up in the + * group PEL, unlink from its current time-list position, + * and re-insert into the NACK zone. */ + for (uint64_t i = 0; i < nacked_count; i++) { + unsigned char rawid[sizeof(streamID)]; + if (rioRead(rdb, rawid, sizeof(rawid)) == 0) { + rdbReportReadError("Stream NACK zone entry ID loading failed."); decrRefCount(o); return NULL; } + + void *result; + if (!raxFind(cgroup->pel, rawid, sizeof(rawid), &result)) { + rdbReportCorruptRDB("Stream NACK zone entry not found " + "in group global PEL"); + decrRefCount(o); + return NULL; + } + streamNACK *nack = result; + if (nack->consumer != NULL) { + rdbReportCorruptRDB("Stream NACK zone entry has a " + "consumer assigned"); + decrRefCount(o); + return NULL; + } + pelListUnlink(cgroup, nack); + pelListInsertNacked(cgroup, nack); + } + + } + + /* Verify entries outside the NACK zone all have a consumer + * assigned. For old RDB types pel_nack_tail is NULL, so + * this walks the entire PEL — equivalent to checking all. */ + if (deep_integrity_validation) { + streamNACK *cur = cgroup->pel_nack_tail ? + cgroup->pel_nack_tail->pel_next : + cgroup->pel_time_head; + while (cur) { + if (!cur->consumer) { + rdbReportCorruptRDB("Stream CG PEL entry without " + "consumer outside NACK zone"); + decrRefCount(o); + return NULL; + } + cur = cur->pel_next; } - raxStop(&ri_cg_pel); } } diff --git a/src/rdb.h b/src/rdb.h index a020ff62c..5d92f8430 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -18,7 +18,7 @@ /* The current RDB version. When the format changes in a way that is no longer * backward compatible this number gets incremented. */ -#define RDB_VERSION 13 +#define RDB_VERSION 14 /* Defines related to the dump file format. To store 32 bits lengths for short * keys requires a lot of space, so we check the most significant 2 bits of @@ -79,10 +79,11 @@ #define RDB_TYPE_HASH_METADATA 24 /* Hash with HFEs. Attach min TTL at start */ #define RDB_TYPE_HASH_LISTPACK_EX 25 /* Hash LP with HFEs. Attach min TTL at start */ #define RDB_TYPE_STREAM_LISTPACKS_4 26 /* Stream with IDMP support */ +#define RDB_TYPE_STREAM_LISTPACKS_5 27 /* Stream with XNACK support (NACKed entries) */ /* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType(), and rdb_type_string[] */ /* Test if a type is an object type. */ -#define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 26)) +#define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 27)) /* Special RDB opcodes (saved/loaded with rdbSaveType/rdbLoadType). */ #define RDB_OPCODE_KEY_META 243 /* Key metadata (module metadata classes). */ diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c index 11781dd61..4fe226474 100644 --- a/src/redis-check-rdb.c +++ b/src/redis-check-rdb.c @@ -87,6 +87,7 @@ char *rdb_type_string[] = { "hash-hashtable-md", "hash-listpack-md", "stream-v4", + "stream-v5", }; /* Show a few stats collected into 'rdbstate' */ diff --git a/src/server.c b/src/server.c index 7568e242c..aa012918b 100644 --- a/src/server.c +++ b/src/server.c @@ -2232,6 +2232,7 @@ void createSharedObjects(void) { shared.srem = createStringObject("SREM",4); shared.xgroup = createStringObject("XGROUP",6); shared.xclaim = createStringObject("XCLAIM",6); + shared.xack = createStringObject("XACK",4); shared.script = createStringObject("SCRIPT",6); shared.replconf = createStringObject("REPLCONF",8); shared.pexpireat = createStringObject("PEXPIREAT",9); diff --git a/src/server.h b/src/server.h index 055a516c4..8e2753300 100644 --- a/src/server.h +++ b/src/server.h @@ -1690,7 +1690,7 @@ struct sharedObjectsStruct { *busykeyerr, *oomerr, *plus, *messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk, *psubscribebulk, *punsubscribebulk, *del, *unlink, *rpop, *lpop, *lpush, *rpoplpush, *lmove, *blmove, *zpopmin, *zpopmax, - *emptyscan, *multi, *exec, *left, *right, *hset, *srem, *xgroup, *xclaim, + *emptyscan, *multi, *exec, *left, *right, *hset, *srem, *xgroup, *xclaim, *xack, *script, *replconf, *eval, *persist, *set, *pexpireat, *pexpire, *hdel, *hpexpireat, *hpersist, *hsetex, *time, *pxat, *absttl, *retrycount, *force, *justid, *entriesread, @@ -4448,6 +4448,7 @@ void xgroupCommand(client *c); void xsetidCommand(client *c); void xidmprecordCommand(client *c); void xackCommand(client *c); +void xnackCommand(client *c); void xackdelCommand(client *c); void xpendingCommand(client *c); void xclaimCommand(client *c); diff --git a/src/stream.h b/src/stream.h index da9d41a69..8adbeffe4 100644 --- a/src/stream.h +++ b/src/stream.h @@ -111,6 +111,10 @@ typedef struct streamCG { streamNACK *pel_time_tail; /* Tail of time-ordered doubly-linked list of pending entries (newest delivery_time). O(1) append for updates that set delivery_time to current time. */ + streamNACK *pel_nack_tail; /* Tail of the NACK zone at the head of the + PEL time-ordered list. NACKed entries occupy + positions from pel_time_head to pel_nack_tail. + NULL if no NACKed entries exist. */ rax *consumers; /* A radix tree representing the consumers by name and their associated representation in the form of streamConsumer structures. */ @@ -175,6 +179,7 @@ streamConsumer *streamLookupConsumer(streamCG *cg, sds name); streamConsumer *streamCreateConsumer(stream *s, streamCG *cg, sds name, robj *key, int dbid, int flags); streamCG *streamCreateCG(stream *s, char *name, size_t namelen, streamID *id, long long entries_read); streamNACK *streamCreateNACK(stream *s, streamConsumer *consumer, streamID *id); +void streamEncodeID(void *buf, streamID *id); void streamDecodeID(void *buf, streamID *id); int streamCompareID(streamID *a, streamID *b); void streamFreeNACK(stream *s, streamNACK *na); @@ -200,6 +205,9 @@ listNode *streamLinkCGroupToEntry(stream *s, streamCG *cg, unsigned char *key); /* PEL time list management (used by RDB loading) */ void pelListInsertSorted(streamCG *cg, streamNACK *nack); +void pelListUnlink(streamCG *cg, streamNACK *nack); +void pelListInsertNacked(streamCG *cg, streamNACK *nack); +uint64_t pelListNackedCount(streamCG *cg); /* IDMP functions */ idmpEntry *idmpEntryCreate(const char *iid, size_t iid_len, size_t *alloc_size); diff --git a/src/t_stream.c b/src/t_stream.c index 1ea7dbf0e..e000df144 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -57,8 +57,8 @@ static int createIdempotencyHash(robj **argv, int64_t numfields, XXH128_hash_t * static void idmpEvictOldestEntry(stream *s, idmpProducer *producer); /* Forward declarations for PEL time list functions */ +static void pelListInsertAfter(streamCG *cg, streamNACK *after, streamNACK *nack); static void pelListInsertAtTail(streamCG *cg, streamNACK *nack); -static void pelListUnlink(streamCG *cg, streamNACK *nack); static void pelListUpdate(streamCG *cg, streamNACK *nack, mstime_t new_delivery_time); /* ----------------------------------------------------------------------- @@ -281,24 +281,19 @@ robj *streamDup(robj *o) { serverAssert(new_cg != NULL); - /* Consumer Group PEL */ - raxIterator ri_cg_pel; - raxStart(&ri_cg_pel,cg->pel); - raxSeek(&ri_cg_pel,"^",NULL,0); - while(raxNext(&ri_cg_pel)){ - streamNACK *nack = ri_cg_pel.data; - streamID nack_id; - streamDecodeID(ri_cg_pel.key, &nack_id); - streamNACK *new_nack = streamCreateNACK(new_s, NULL, &nack_id); + /* Consumer Group PEL -- walk the time-ordered list so we can + * append directly and preserve NACK zone structure. */ + for (streamNACK *nack = cg->pel_time_head; nack; nack = nack->pel_next) { + unsigned char buf[sizeof(streamID)]; + streamEncodeID(buf, &nack->id); + streamNACK *new_nack = streamCreateNACK(new_s, NULL, &nack->id); new_nack->delivery_time = nack->delivery_time; new_nack->delivery_count = nack->delivery_count; - new_nack->cgroup_ref_node = streamLinkCGroupToEntry(new_s, new_cg, ri_cg_pel.key); - raxInsert(new_cg->pel, ri_cg_pel.key, sizeof(streamID), new_nack, NULL); - - /* Insert in sorted order to preserve ordering */ - pelListInsertSorted(new_cg, new_nack); + new_nack->cgroup_ref_node = streamLinkCGroupToEntry(new_s, new_cg, buf); + raxInsert(new_cg->pel, buf, sizeof(streamID), new_nack, NULL); + pelListInsertAtTail(new_cg, new_nack); + if (nack == cg->pel_nack_tail) new_cg->pel_nack_tail = new_nack; } - raxStop(&ri_cg_pel); /* Consumers */ raxIterator ri_consumers; @@ -802,6 +797,33 @@ typedef struct { #define DELETE_STRATEGY_DELREF 2 /* Delete from pending entries list */ #define DELETE_STRATEGY_ACKED 3 /* Only delete messages that are acknowledged */ +/* XNACK mode flags – control how the delivery counter is adjusted when + * a pending entry is released back to the group (NACKed). */ +#define XNACK_SILENT 0 /* Decrement delivery_count by 1 (undo the delivery) */ +#define XNACK_FAIL 1 /* Keep delivery_count unchanged */ +#define XNACK_FATAL 2 /* Set delivery_count to LLONG_MAX (permanent failure) */ + +/* Set the delivery attempts counter on a NACK entry. When retrycount >= 0 + * the counter is set to that explicit value; otherwise it is adjusted + * according to the XNACK mode (SILENT/FAIL/FATAL). */ +static void nackSetDeliveryCount(streamNACK *nack, int mode, long long retrycount) { + if (retrycount >= 0) { + nack->delivery_count = (uint64_t)retrycount; + } else { + switch (mode) { + case XNACK_SILENT: + if (nack->delivery_count > 0) + nack->delivery_count--; + break; + case XNACK_FAIL: + break; + case XNACK_FATAL: + nack->delivery_count = LLONG_MAX; + break; + } + } +} + /* Trim the stream 's' according to args->trim_strategy, and return the * number of elements removed from the stream. The 'approx' option, if non-zero, * specifies that the trimming must be performed in a approximated way in @@ -1869,6 +1891,18 @@ static inline void streamPropagateXCLAIMCopyFree(int dbid, robj *key, robj *grou alsoPropagate(dbid,argv,14,PROPAGATE_AOF|PROPAGATE_REPL); } +/* Propagate an XACK command to AOF and replicas. Used when a PEL entry is + * removed implicitly (e.g. entry no longer exists during XCLAIM/XAUTOCLAIM) + * and the NACK has no consumer, so XCLAIM propagation is not applicable. */ +static inline void streamPropagateXACK(int dbid, robj *key, robj *groupname, robj *id) { + robj *argv[4]; + argv[0] = shared.xack; + argv[1] = key; + argv[2] = groupname; + argv[3] = id; + alsoPropagate(dbid,argv,4,PROPAGATE_AOF|PROPAGATE_REPL); +} + /* As a result of an explicit XCLAIM or XREADGROUP command, new entries * are created in the pending list of the stream and consumers. We need * to propagate this changes in the form of XCLAIM commands. */ @@ -2062,11 +2096,12 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end if (nack->consumer != consumer) { unsigned char buf[sizeof(streamID)]; streamEncodeID(buf, &nack->id); - raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL); + if (nack->consumer) + raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL); nack->consumer = consumer; raxInsert(consumer->pel,buf,sizeof(buf),nack,NULL); } - nack->delivery_count++; + nack->delivery_count += nack->delivery_count == LLONG_MAX ? 0 : 1; pelListUpdate(group, nack, cmd_time_snapshot); /* Moves element from beginning to end of list */ consumer->active_time = cmd_time_snapshot; @@ -2204,7 +2239,8 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end nack = result; /* Only transfer between consumers if they're different */ if (nack->consumer != consumer) { - raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL); + if (nack->consumer) + raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL); nack->consumer = consumer; raxInsert(consumer->pel,buf,sizeof(buf),nack,NULL); } @@ -2289,7 +2325,7 @@ size_t streamReplyWithRangeFromConsumerPEL(client *c, stream *s, streamID *start addReplyNullArray(c); } else { streamNACK *nack = ri.data; - nack->delivery_count++; + nack->delivery_count += nack->delivery_count == LLONG_MAX ? 0 : 1; pelListUpdate(group, nack, commandTimeSnapshot()); } arraylen++; @@ -3133,7 +3169,8 @@ void streamCleanupEntryCGroupRefs(stream *s, streamID *id) { /* Remove from group and consumer PELs */ pelListUnlink(group, nack); raxRemove(group->pel, buf, sizeof(buf), NULL); - raxRemove(nack->consumer->pel, buf, sizeof(buf), NULL); + if (nack->consumer) + raxRemove(nack->consumer->pel, buf, sizeof(buf), NULL); /* Since we're removing all references from the cgroups_ref, we can directly * free the NACK without unlinking it from the cgroups_ref. */ streamFreeNACK(s, nack); @@ -3266,6 +3303,7 @@ streamCG *streamCreateCG(stream *s, char *name, size_t namelen, streamID *id, lo cg->pel = raxNewWithMetadata(0, &s->alloc_size); cg->pel_time_head = NULL; cg->pel_time_tail = NULL; + cg->pel_nack_tail = NULL; cg->consumers = raxNewWithMetadata(0, &s->alloc_size); cg->last_id.ms = 0; cg->last_id.seq = 0; @@ -3281,8 +3319,8 @@ static void streamFreeCG(stream *s, streamCG *cg) { streamFreeNACKCtx ctx = {s, cg}; raxFreeWithCbAndContext(cg->pel, streamFreeNACKGeneric, &ctx); - /* pel_time_head/tail should now be NULL after unlinking all NACKs */ - serverAssert(cg->pel_time_head == NULL && cg->pel_time_tail == NULL); + /* pel_time_head/tail/pel_nack_tail should now be NULL after unlinking all NACKs */ + serverAssert(cg->pel_time_head == NULL && cg->pel_time_tail == NULL && cg->pel_nack_tail == NULL); raxFreeWithCbAndContext(cg->consumers, streamFreeConsumerGeneric, s); size_t usable; @@ -3773,7 +3811,8 @@ void xackCommand(client *c) { streamNACK *nack = result; pelListUnlink(group, nack); raxRemove(group->pel,buf,sizeof(buf),NULL); - raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL); + if (nack->consumer) + raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL); streamDestroyNACK(kv->ptr, nack, buf); acknowledged++; server.dirty++; @@ -3787,6 +3826,161 @@ cleanup: if (ids != static_ids) zfree(ids); } +/* XNACK key group IDS numids id [id ...] + * [RETRYCOUNT count] [FORCE] + * + * Release pending messages back to the group's PEL without acknowledging them. + * Entries are disassociated from their consumer (consumer = NULL) and + * repositioned to the head of the PEL time-ordered list (delivery_time = 0), + * making them immediately claimable by other consumers. + * + * Delivery counter behavior (when RETRYCOUNT is not specified): + * SILENT: decrement by 1 (undo the delivery increment) + * FAIL: no change (already incremented during delivery) + * FATAL: set to LLONG_MAX + * + * RETRYCOUNT count: directly sets delivery_count to the specified value, + * overriding the mode-based adjustment. + * + * FORCE: create new unowned PEL entries (consumer = NULL) for IDs that + * are not already in the group PEL. When FORCE creates an entry, the + * delivery counter is set to 0 (or to RETRYCOUNT if specified, or to + * LLONG_MAX if mode is FATAL). */ +void xnackCommand(client *c) { + streamCG *group = NULL; + kvobj *kv = lookupKeyWrite(c->db,c->argv[1]); + if (kv) { + if (checkType(c,kv,OBJ_STREAM)) return; + group = streamLookupCG(kv->ptr,c->argv[2]->ptr); + } + + if (kv == NULL || group == NULL) { + addReplyErrorFormat(c,"-NOGROUP No such key '%s' or " + "consumer group '%s'", (char*)c->argv[1]->ptr, + (char*)c->argv[2]->ptr); + return; + } + + int mode; + if (!strcasecmp(c->argv[3]->ptr,"SILENT")) { + mode = XNACK_SILENT; + } else if (!strcasecmp(c->argv[3]->ptr,"FAIL")) { + mode = XNACK_FAIL; + } else if (!strcasecmp(c->argv[3]->ptr,"FATAL")) { + mode = XNACK_FATAL; + } else { + addReplyError(c,"ERR mode must be SILENT, FAIL, or FATAL"); + return; + } + + int ids_start = 0; + int numids = 0; + int force = 0; + long long retrycount = -1; + for (int i = 4; i < c->argc; i++) { + int moreargs = (c->argc-1) - i; /* Number of additional arguments. */ + char *opt = c->argv[i]->ptr; + if (!strcasecmp(opt,"IDS") && moreargs) { + long numids_long; + if (getRangeLongFromObjectOrReply(c,c->argv[i+1],1,INT_MAX, + &numids_long,"numids must be a positive integer") != C_OK) + return; + numids = (int)numids_long; + ids_start = i + 2; + if (numids > (c->argc - ids_start)) { + addReplyError(c,"ERR number of IDs doesn't match numids"); + return; + } + i = ids_start + numids - 1; + } else if (!strcasecmp(opt,"FORCE")) { + force = 1; + } else if (!strcasecmp(opt,"RETRYCOUNT") && moreargs) { + i++; + if (getLongLongFromObjectOrReply(c,c->argv[i],&retrycount,NULL) != C_OK) + return; + if (retrycount < 0) { + addReplyError(c,"ERR Invalid RETRYCOUNT value, must be >= 0"); + return; + } + } else { + addReplyErrorFormat(c,"ERR Unrecognized XNACK option '%s'", + (char *)c->argv[i]->ptr); + return; + } + } + + if (ids_start == 0) { + addReplyError(c,"ERR syntax error, expected IDS keyword"); + return; + } + + streamID static_ids[STREAMID_STATIC_VECTOR_LEN]; + streamID *ids = static_ids; + if (numids > STREAMID_STATIC_VECTOR_LEN) + ids = zmalloc(sizeof(streamID)*numids); + for (int j = 0; j < numids; j++) { + if (streamParseStrictIDOrReply(c,c->argv[ids_start+j],&ids[j],0,NULL) != C_OK) goto cleanup; + } + + stream *s = kv->ptr; + int nacked = 0; + size_t old_alloc = server.memory_tracking_enabled ? kvobjAllocSize(kv) : 0; + for (int j = 0; j < numids; j++) { + unsigned char buf[sizeof(streamID)]; + streamEncodeID(buf,&ids[j]); + + void *result; + int found = raxFind(group->pel,buf,sizeof(buf),&result); + if (found) { + streamNACK *nack = result; + nackSetDeliveryCount(nack, mode, retrycount); + if (nack->consumer != NULL) { + raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL); + nack->consumer = NULL; + } + + /* Move to NACK zone: unlink from current position, insert at + * end of NACK zone (head region of PEL). */ + pelListUnlink(group, nack); + pelListInsertNacked(group, nack); + } else if (force) { + /* FORCE: create new unowned PEL entry only if the stream + * entry exists, otherwise skip silently (same as XCLAIM). */ + if (!streamEntryExists(s, &ids[j])) + continue; + streamNACK *nack = streamCreateNACK(s, NULL, &ids[j]); + + /* streamCreateNACK() initialises delivery_count to 1 (a real + * delivery), but FORCE creates a synthetic entry with no actual + * delivery, so reset to 0 before letting nackSetDeliveryCount() + * apply the mode/retrycount logic on a clean baseline. */ + nack->delivery_count = 0; + nackSetDeliveryCount(nack, mode, retrycount); + + raxInsert(group->pel, buf, sizeof(buf), nack, NULL); + pelListInsertNacked(group, nack); + nack->cgroup_ref_node = streamLinkCGroupToEntry(s, group, buf); + } else { + continue; + } + nacked++; + } + + if (nacked > 0) { + server.dirty += nacked; + keyModified(c,c->db,c->argv[1],kv,0); + /* XNACK can make entries immediately claimable. */ + signalKeyAsReady(c->db, c->argv[1], OBJ_STREAM); + } + if (server.memory_tracking_enabled) + updateSlotAllocSize(c->db,getKeySlot(c->argv[1]->ptr),kv,old_alloc,kvobjAllocSize(kv)); + + addReplyLongLong(c,nacked); + +cleanup: + if (ids != static_ids) zfree(ids); +} + /* Used by xackdelCommand() */ typedef enum XAckDelRes { XACKDEL_NO_ID = -1, /* ID not found in PEL. */ @@ -3849,7 +4043,8 @@ void xackdelCommand(client *c) { streamNACK *nack = result; pelListUnlink(group, nack); raxRemove(group->pel,buf,sizeof(buf),NULL); - raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL); + if (nack->consumer) + raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL); streamDestroyNACK(s, nack, buf); server.dirty++; @@ -4059,7 +4254,7 @@ void xpendingCommand(client *c) { while(count && raxNext(&ri) && memcmp(ri.key,endkey,ri.key_len) <= 0) { streamNACK *nack = ri.data; - if (minidle) { + if (nack->consumer && minidle) { mstime_t this_idle = now - nack->delivery_time; if (this_idle < minidle) continue; } @@ -4073,13 +4268,22 @@ void xpendingCommand(client *c) { streamDecodeID(ri.key,&id); addReplyStreamID(c,&id); - /* Consumer name. */ - addReplyBulkCBuffer(c,nack->consumer->name, - sdslen(nack->consumer->name)); + /* Consumer name (empty string if NACKed / unowned). */ + if (nack->consumer) { + addReplyBulkCBuffer(c,nack->consumer->name, + sdslen(nack->consumer->name)); + } else { + addReplyBulkCBuffer(c,"",0); + } - /* Milliseconds elapsed since last delivery. */ - mstime_t elapsed = now - nack->delivery_time; - if (elapsed < 0) elapsed = 0; + /* Milliseconds elapsed since last delivery (-1 if unowned / NACKed). */ + mstime_t elapsed; + if (nack->consumer) { + elapsed = now - nack->delivery_time; + if (elapsed < 0) elapsed = 0; + } else { + elapsed = -1; + } addReplyLongLong(c,elapsed); /* Number of deliveries. */ @@ -4283,13 +4487,20 @@ void xclaimCommand(client *c) { /* Clear this entry from the PEL, it no longer exists */ if (nack != NULL) { /* Propagate this change (we are going to delete the NACK). */ - streamPropagateXCLAIM(c,c->argv[1],group,c->argv[2],c->argv[j],nack); - propagate_last_id = 0; /* Will be propagated by XCLAIM itself. */ + if (nack->consumer) { + streamPropagateXCLAIM(c,c->argv[1],group,c->argv[2],c->argv[j],nack); + propagate_last_id = 0; /* Will be propagated by XCLAIM itself. */ + } else { + /* Unowned NACK (NACK zone entry from XNACK) — can't use + * XCLAIM propagation without a consumer; use XACK instead. */ + streamPropagateXACK(c->db->id,c->argv[1],c->argv[2],c->argv[j]); + } server.dirty++; /* Release the NACK */ pelListUnlink(group, nack); raxRemove(group->pel,buf,sizeof(buf),NULL); - raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL); + if (nack->consumer) + raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL); streamDestroyNACK(s, nack, buf); } continue; @@ -4336,7 +4547,7 @@ void xclaimCommand(client *c) { if (retrycount >= 0) { nack->delivery_count = retrycount; } else if (!justid) { - nack->delivery_count++; + nack->delivery_count += nack->delivery_count == LLONG_MAX ? 0 : 1; } if (nack->consumer != consumer) { /* Add the entry in the new consumer local PEL. */ @@ -4482,14 +4693,23 @@ void xautoclaimCommand(client *c) { /* Item must exist for us to transfer it to another consumer. */ if (!streamEntryExists(s,&id)) { /* Propagate this change (we are going to delete the NACK). */ - robj *idstr = createObjectFromStreamID(&id); - streamPropagateXCLAIM(c,c->argv[1],group,c->argv[2],idstr,nack); - decrRefCount(idstr); + if (nack->consumer) { + robj *idstr = createObjectFromStreamID(&id); + streamPropagateXCLAIM(c,c->argv[1],group,c->argv[2],idstr,nack); + decrRefCount(idstr); + } else { + /* Unowned NACK (NACK zone entry from XNACK) — can't use + * XCLAIM propagation without a consumer; use XACK instead. */ + robj *idstr = createObjectFromStreamID(&id); + streamPropagateXACK(c->db->id,c->argv[1],c->argv[2],idstr); + decrRefCount(idstr); + } server.dirty++; /* Clear this entry from the PEL, it no longer exists */ pelListUnlink(group, nack); raxRemove(group->pel,ri.key,ri.key_len,NULL); - raxRemove(nack->consumer->pel,ri.key,ri.key_len,NULL); + if (nack->consumer) + raxRemove(nack->consumer->pel,ri.key,ri.key_len,NULL); streamDestroyNACK(s, nack, ri.key); /* Remember the ID for later */ deleted_ids[deleted_id_num++] = id; @@ -4498,7 +4718,7 @@ void xautoclaimCommand(client *c) { continue; } - if (minidle) { + if (nack->consumer && minidle) { mstime_t this_idle = now - nack->delivery_time; if (this_idle < minidle) continue; @@ -4518,7 +4738,7 @@ void xautoclaimCommand(client *c) { /* Increment the delivery attempts counter unless JUSTID option provided */ if (!justid) - nack->delivery_count++; + nack->delivery_count += nack->delivery_count == LLONG_MAX ? 0 : 1; if (nack->consumer != consumer) { /* Add the entry in the new consumer local PEL. */ @@ -4922,7 +5142,7 @@ void xinfoReplyWithStreamInfo(client *c, kvobj *kv) { raxSeek(&ri_cgroups,"^",NULL,0); while(raxNext(&ri_cgroups)) { streamCG *cg = ri_cgroups.data; - addReplyMapLen(c,7); + addReplyMapLen(c,8); /* Name */ addReplyBulkCString(c,"name"); @@ -4948,6 +5168,10 @@ void xinfoReplyWithStreamInfo(client *c, kvobj *kv) { addReplyBulkCString(c,"pel-count"); addReplyLongLong(c,raxSize(cg->pel)); + /* NACKed entries count (entries in the NACK zone) */ + addReplyBulkCString(c,"nacked-count"); + addReplyLongLong(c,pelListNackedCount(cg)); + /* Group PEL */ addReplyBulkCString(c,"pending"); long long arraylen_cg_pel = 0; @@ -4964,10 +5188,13 @@ void xinfoReplyWithStreamInfo(client *c, kvobj *kv) { streamDecodeID(ri_cg_pel.key,&id); addReplyStreamID(c,&id); - /* Consumer name. */ - serverAssert(nack->consumer); /* assertion for valgrind (avoid NPD) */ - addReplyBulkCBuffer(c,nack->consumer->name, - sdslen(nack->consumer->name)); + /* Consumer name (empty string if NACKed / unowned). */ + if (nack->consumer) { + addReplyBulkCBuffer(c,nack->consumer->name, + sdslen(nack->consumer->name)); + } else { + addReplyBulkCBuffer(c,"",0); + } /* Last delivery. */ addReplyLongLong(c,nack->delivery_time); @@ -5340,21 +5567,39 @@ int streamValidateListpackIntegrity(unsigned char *lp, size_t size, int deep) { * O(1) unlink from any position, O(1) append to tail, O(1) access to oldest * entries for CLAIM operations. */ +/* Insert a NACK after 'after' in the time-ordered list. + * If after is NULL, insert at the head. */ +static void pelListInsertAfter(streamCG *cg, streamNACK *after, streamNACK *nack) { + if (after) { + nack->pel_prev = after; + nack->pel_next = after->pel_next; + if (after->pel_next) + after->pel_next->pel_prev = nack; + else + cg->pel_time_tail = nack; + after->pel_next = nack; + } else { + nack->pel_prev = NULL; + nack->pel_next = cg->pel_time_head; + if (cg->pel_time_head) + cg->pel_time_head->pel_prev = nack; + else + cg->pel_time_tail = nack; + cg->pel_time_head = nack; + } +} + /* Insert a NACK at the tail of the PEL time-ordered list. This is used when * delivery_time is set to current time, which is the common case. */ static void pelListInsertAtTail(streamCG *cg, streamNACK *nack) { - nack->pel_prev = cg->pel_time_tail; - nack->pel_next = NULL; - if (cg->pel_time_tail) { - cg->pel_time_tail->pel_next = nack; - } else { - cg->pel_time_head = nack; - } - cg->pel_time_tail = nack; + pelListInsertAfter(cg, cg->pel_time_tail, nack); } /* Unlink a NACK from the PEL time-ordered list. */ -static void pelListUnlink(streamCG *cg, streamNACK *nack) { +void pelListUnlink(streamCG *cg, streamNACK *nack) { + if (nack == cg->pel_nack_tail) { + cg->pel_nack_tail = nack->pel_prev; + } if (nack->pel_prev) { nack->pel_prev->pel_next = nack->pel_next; } else { @@ -5373,43 +5618,52 @@ static void pelListUnlink(streamCG *cg, streamNACK *nack) { /* Insert a NACK in sorted order by delivery_time. Used for edge cases where * delivery_time is set to a past time, and also by RDB loading where entries * may not be time-ordered. We scan backwards from the tail since most times - * are recent, so the common case is still fast. */ + * are recent, so the common case is still fast. + * + * The NACK zone (pel_time_head..pel_nack_tail) is skipped: new entries are + * never placed before pel_nack_tail, so the NACK zone stays intact. */ void pelListInsertSorted(streamCG *cg, streamNACK *nack) { - /* Empty list. */ - if (cg->pel_time_head == NULL) { - cg->pel_time_head = cg->pel_time_tail = nack; - nack->pel_prev = nack->pel_next = NULL; - return; - } - - /* Append to tail (common case: delivery_time >= tail time). */ - if (nack->delivery_time >= cg->pel_time_tail->delivery_time) { + /* Empty list or append to tail (common case). */ + if (cg->pel_time_head == NULL || + nack->delivery_time >= cg->pel_time_tail->delivery_time) { pelListInsertAtTail(cg, nack); return; } - /* Prepend to head (rare: delivery_time < head time). */ - if (nack->delivery_time < cg->pel_time_head->delivery_time) { - nack->pel_next = cg->pel_time_head; - nack->pel_prev = NULL; - cg->pel_time_head->pel_prev = nack; - cg->pel_time_head = nack; - return; - } - - /* Insert in middle: scan backwards from tail since most times are recent. */ + /* Scan backwards from tail, stopping at the NACK-zone boundary + * (pel_nack_tail) so we never insert inside the zone. If boundary + * is NULL (no NACK zone), the scan may reach the list head. */ + streamNACK *boundary = cg->pel_nack_tail; streamNACK *curr = cg->pel_time_tail; - while (curr && curr->delivery_time > nack->delivery_time) { + while (curr != boundary && curr->delivery_time > nack->delivery_time) { curr = curr->pel_prev; } - /* Insert after curr. */ - nack->pel_next = curr->pel_next; - nack->pel_prev = curr; - if (curr->pel_next) { - curr->pel_next->pel_prev = nack; + pelListInsertAfter(cg, curr, nack); +} + +/* Insert a NACKed entry at the end of the NACK zone (head region of the PEL + * time-ordered list). The NACK zone occupies positions from pel_time_head to + * pel_nack_tail. This is O(1) and maintains FIFO order among NACKed entries. */ +void pelListInsertNacked(streamCG *cg, streamNACK *nack) { + nack->delivery_time = 0; + pelListInsertAfter(cg, cg->pel_nack_tail, nack); + cg->pel_nack_tail = nack; +} + +/* Return the number of entries in the NACK zone (pel_time_head..pel_nack_tail). + * Returns 0 when no NACKed entries exist. */ +uint64_t pelListNackedCount(streamCG *cg) { + uint64_t count = 0; + if (cg->pel_nack_tail) { + streamNACK *nack = cg->pel_time_head; + while (nack) { + count++; + if (nack == cg->pel_nack_tail) break; + nack = nack->pel_next; + } } - curr->pel_next = nack; + return count; } /* Update a NACK's delivery_time and reposition it in the time-ordered list. */ diff --git a/tests/support/util.tcl b/tests/support/util.tcl index 5d06c8cd9..0c9f64836 100644 --- a/tests/support/util.tcl +++ b/tests/support/util.tcl @@ -796,7 +796,7 @@ proc generate_fuzzy_traffic_on_key {key type duration} { set zset_commands {ZADD ZCARD ZCOUNT ZINCRBY ZINTERSTORE ZLEXCOUNT ZPOPMAX ZPOPMIN ZRANGE ZRANGEBYLEX ZRANGEBYSCORE ZRANK ZREM ZREMRANGEBYLEX ZREMRANGEBYRANK ZREMRANGEBYSCORE ZREVRANGE ZREVRANGEBYLEX ZREVRANGEBYSCORE ZREVRANK ZSCAN ZSCORE ZUNIONSTORE ZRANDMEMBER} set list_commands {LINDEX LINSERT LLEN LPOP LPOS LPUSH LPUSHX LRANGE LREM LSET LTRIM RPOP RPOPLPUSH RPUSH RPUSHX} set set_commands {SADD SCARD SDIFF SDIFFSTORE SINTER SINTERSTORE SISMEMBER SMEMBERS SMOVE SPOP SRANDMEMBER SREM SSCAN SUNION SUNIONSTORE} - set stream_commands {XACK XADD XCLAIM XDEL XGROUP XINFO XLEN XPENDING XRANGE XREAD XREADGROUP XREVRANGE XTRIM XDELEX XACKDEL} + set stream_commands {XACK XADD XCLAIM XDEL XGROUP XINFO XLEN XPENDING XRANGE XREAD XREADGROUP XREVRANGE XTRIM XDELEX XACKDEL XNACK} set vset_commands {VADD VREM} set commands [dict create string $string_commands hash $hash_commands zset $zset_commands list $list_commands set $set_commands stream $stream_commands vectorset $vset_commands] diff --git a/tests/unit/type/stream-cgroups.tcl b/tests/unit/type/stream-cgroups.tcl index 4990275e2..9adb7c705 100644 --- a/tests/unit/type/stream-cgroups.tcl +++ b/tests/unit/type/stream-cgroups.tcl @@ -1904,7 +1904,7 @@ start_server { } } } - + start_server {} { if {!$::force_resp3} { test "XREADGROUP CLAIM field types are correct" { @@ -3290,4 +3290,1398 @@ start_server { assert_error "*ERR The CLAIM option is only supported*" {r XREAD COUNT 2 CLAIM 10 STREAMS mystream 0-0} } } + + # Verify that XNACK rejects every invalid invocation with the correct error. + # Covers: wrong argument count, nonexistent key/group (NOGROUP), wrong key + # type (WRONGTYPE), unrecognized options at every position the parser + # accepts them, invalid mode names, duplicate mode words, missing/bad IDS + # keyword, bad numids (non-integer, zero, negative, mismatch), invalid + # stream-ID format, RETRYCOUNT edge cases (non-integer, negative, overflow, + # missing value, missing IDS), and extra trailing arguments. + test "XNACK argument and error validation" { + # Wrong number of arguments (no stream needed) + assert_error "*wrong number of arguments*" {r XNACK} + assert_error "*wrong number of arguments*" {r XNACK key} + assert_error "*wrong number of arguments*" {r XNACK key group} + assert_error "*wrong number of arguments*" {r XNACK key group SILENT} + assert_error "*wrong number of arguments*" {r XNACK key group SILENT IDS} + assert_error "*wrong number of arguments*" {r XNACK key group SILENT IDS 1} + + # Non-existent key / group + r DEL nosuchkey + assert_error "*NOGROUP*" {r XNACK nosuchkey grp SILENT IDS 1 1-0} + + r DEL mystream + r XADD mystream 1-0 f v + assert_error "*NOGROUP*" {r XNACK mystream nogroup SILENT IDS 1 1-0} + + # Wrong key type + r DEL mykey + r SET mykey "not a stream" + assert_error "*WRONGTYPE*" {r XNACK mykey grp FAIL IDS 1 1-0} + + # All remaining checks need a stream + group + consumer + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + + # Unrecognized option at various positions — the parser accepts options + # both before and after the IDS block, so verify rejection in each slot. + assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL BADOPT IDS 1 1-0} + assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL IDS 1 1-0 BADOPT} + assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp SILENT BADOPT IDS 1 1-0 FORCE} + assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp SILENT FORCE BADOPT IDS 1 1-0} + assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL RETRYCOUNT 5 BADOPT IDS 1 1-0} + assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT 5 BADOPT} + assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL FORCE IDS 1 1-0 BADOPT RETRYCOUNT 5} + + # Invalid mode + assert_error "*mode must be SILENT, FAIL, or FATAL*" {r XNACK mystream grp BADMODE IDS 1 1-0} + + # Multiple mode words — only one mode is allowed per invocation. + assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL FATAL IDS 1 1-0} + assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp SILENT FAIL IDS 1 1-0} + assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FATAL SILENT IDS 1 1-0} + assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL SILENT FATAL IDS 1 1-0} + + # IDS keyword validation + assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp SILENT NOTIDS 1 1-0} + assert_error "*expected IDS keyword*" {r XNACK mystream grp SILENT FORCE RETRYCOUNT 5} + + # numids validation + assert_error "*numids must be a positive integer*" {r XNACK mystream grp SILENT IDS abc 1-0} + assert_error "*numids must be a positive integer*" {r XNACK mystream grp SILENT IDS 0 1-0} + assert_error "*numids must be a positive integer*" {r XNACK mystream grp SILENT IDS -1 1-0} + assert_error "*number of IDs doesn't match numids*" {r XNACK mystream grp SILENT IDS 2 1-0} + + # Invalid stream ID format + assert_error "*Invalid stream ID*" {r XNACK mystream grp FAIL IDS 1 not-a-valid-id} + + # RETRYCOUNT validation — non-integer, negative, overflow, missing value + assert_error "*value is not an integer or out of range*" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT abc} + assert_error "*Invalid RETRYCOUNT*" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT -1} + assert_error "*value is not an integer or out of range*" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT 99999999999999999999} + # RETRYCOUNT without a following value — consumed as trailing option + assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT} + # RETRYCOUNT right after mode with no IDS — too few arguments + assert_error "*wrong number of arguments*" {r XNACK mystream grp FAIL RETRYCOUNT} + + # Extra args after numids IDs — the surplus ID is parsed as an option + assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL IDS 1 1-0 2-0} + } + + # Verify SILENT mode decrements delivery_count by 1, clamped at 0. + # XPENDING format per entry: {id consumer idle delivery_count}. + # After XNACK, consumer becomes {} (unowned) and idle becomes -1 + # (delivery_time reset to 0). + test "XNACK SILENT mode delivery_count behavior" { + r DEL mystream + r XADD mystream 1-0 f v + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + + # delivery_count is 1 after XREADGROUP; SILENT decrements to 0 + set pending [r XPENDING mystream grp - + 10] + assert_equal [lindex $pending 0 3] 1 + assert_equal 1 [r XNACK mystream grp SILENT IDS 1 1-0] + set pending [r XPENDING mystream grp - + 10] + assert_equal [llength $pending] 1 + assert_equal [lindex $pending 0] {1-0 {} -1 0} + + # Clamp at 0: reclaim with RETRYCOUNT 0, then SILENT must not go below 0 + r XCLAIM mystream grp c2 0 1-0 RETRYCOUNT 0 + r XNACK mystream grp SILENT IDS 1 1-0 + set pending [r XPENDING mystream grp - + 10] + assert_equal [lindex $pending 0 3] 0 + + # Decrement from higher value: XCLAIM bumps delivery_count each time + r XCLAIM mystream grp c1 0 1-0 + r XCLAIM mystream grp c1 0 1-0 + r XCLAIM mystream grp c1 0 1-0 + set pending [r XPENDING mystream grp - + 10] + assert_equal [lindex $pending 0 3] 3 + assert_equal 1 [r XNACK mystream grp SILENT IDS 1 1-0] + set pending [r XPENDING mystream grp - + 10] + # 3 - 1 = 2 + assert_equal [lindex $pending 0] {1-0 {} -1 2} + } + + # Verify FAIL mode NACKs the entry (makes it unowned) but preserves the + # original delivery_count. The count stays at 1 (set by XREADGROUP). + test "XNACK FAIL mode keeps delivery_count unchanged" { + r DEL mystream + r XADD mystream 1-0 f v + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + + set pending [r XPENDING mystream grp - + 10] + assert_equal [lindex $pending 0 3] 1 + + assert_equal 1 [r XNACK mystream grp FAIL IDS 1 1-0] + + set pending [r XPENDING mystream grp - + 10] + assert_equal [llength $pending] 1 + # delivery_count unchanged at 1 + assert_equal [lindex $pending 0] {1-0 {} -1 1} + } + + # Verify FATAL mode sets delivery_count to LLONG_MAX (9223372036854775807), + # signaling permanent/unrecoverable failure for this entry. + test "XNACK FATAL mode sets delivery_count to max" { + r DEL mystream + r XADD mystream 1-0 f v + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + + assert_equal 1 [r XNACK mystream grp FATAL IDS 1 1-0] + + set pending [r XPENDING mystream grp - + 10] + assert_equal [llength $pending] 1 + # 9223372036854775807 == LLONG_MAX + assert_equal [lindex $pending 0] {1-0 {} -1 9223372036854775807} + } + + # Verify that XNACK removes entries from the consumer-level PEL (the entry + # becomes unowned) while keeping them in the group-level PEL. + # Setup: c1 owns {1-0, 2-0}, c2 owns {3-0}. NACK entries from both + # consumers and confirm the ownership transfer. + # Also verifies that XNACK does not auto-create or destroy consumers. + test "XNACK releases entries and removes from consumer PEL" { + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XADD mystream 3-0 f v3 + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 COUNT 2 STREAMS mystream > + r XREADGROUP GROUP grp c2 COUNT 1 STREAMS mystream > + + # XNACK entries owned by different consumers + assert_equal 1 [r XNACK mystream grp FAIL IDS 1 3-0] + assert_equal 1 [r XNACK mystream grp FAIL IDS 1 1-0] + + # Both NACKed entries should be unowned in the group PEL + set pending [r XPENDING mystream grp - + 10] + assert_equal [llength $pending] 3 + assert_equal [lindex $pending 0] {1-0 {} -1 1} + assert_equal [lindex $pending 2] {3-0 {} -1 1} + + # Consumer-level PEL: c1 only has 2-0 left, c2 has nothing + set c1_pending [r XPENDING mystream grp - + 10 c1] + assert_equal [llength $c1_pending] 1 + assert_equal [lindex $c1_pending 0 0] 2-0 + set c2_pending [r XPENDING mystream grp - + 10 c2] + assert_equal [llength $c2_pending] 0 + + # XNACK does not auto-create or destroy consumers + set info [r XINFO GROUPS mystream] + assert_equal [dict get [lindex $info 0] consumers] 2 + } + + # Verify the integer return value of XNACK (number of entries successfully + # NACKed) and several edge cases: + # - IDs not in the PEL are silently skipped (return 0). + # - Multiple IDs can be NACKed in a single call. + # - When valid and non-PEL IDs are mixed, only valid ones are counted. + # - Duplicate IDs: each occurrence is counted separately. + # - NACKing against an empty PEL returns 0. + test "XNACK return count and edge cases" { + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XADD mystream 3-0 f v3 + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + + # Skips IDs not in PEL + assert_equal 0 [r XNACK mystream grp FAIL IDS 1 9-9] + + # Multiple IDs at once + assert_equal 3 [r XNACK mystream grp SILENT IDS 3 1-0 2-0 3-0] + set pending [r XPENDING mystream grp - + 10] + assert_equal $pending {{1-0 {} -1 0} {2-0 {} -1 0} {3-0 {} -1 0}} + + # Reclaim all entries back to c1 for further sub-tests + r XCLAIM mystream grp c1 0 1-0 2-0 3-0 + + # Mixed valid and invalid IDs: only the 3 valid ones are counted + assert_equal 3 [r XNACK mystream grp FAIL IDS 5 1-0 9-9 2-0 8-8 3-0] + set pending [r XPENDING mystream grp - + 10] + foreach entry $pending { + assert_equal [lindex $entry 1] {} + } + + # Duplicate IDs: the first NACK finds a consumer-owned entry, the + # second finds an already-NACKed entry — both count as successful. + r XCLAIM mystream grp c1 0 1-0 + assert_equal 2 [r XNACK mystream grp FAIL IDS 2 1-0 1-0] + set pending [r XPENDING mystream grp - + 10] + assert_equal [llength $pending] 3 + assert_equal [lindex $pending 0] {1-0 {} -1 2} + + # Empty PEL returns 0 + r XACK mystream grp 1-0 2-0 3-0 + set info [r XPENDING mystream grp] + assert_equal [lindex $info 0] 0 + assert_equal 0 [r XNACK mystream grp FAIL IDS 1 1-0] + } + + # Verify behavior when re-NACKing an entry that is already in the NACK + # zone (unowned). Each mode still applies its delivery_count semantics: + # - FAIL is idempotent (count unchanged, returns 1). + # - SILENT still decrements. + # - FATAL still sets to LLONG_MAX. + # Mode transitions on already-NACKed entries work correctly. + test "XNACK on already-NACKed entry: idempotency and mode changes" { + r DEL mystream + r XADD mystream 1-0 f v + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + + # Re-NACK with FAIL: returns 1, count unchanged + assert_equal 1 [r XNACK mystream grp FAIL IDS 1 1-0] + assert_equal 1 [r XNACK mystream grp FAIL IDS 1 1-0] + set pending [r XPENDING mystream grp - + 10] + assert_equal [llength $pending] 1 + assert_equal [lindex $pending 0] {1-0 {} -1 1} + + # SILENT on already-NACKed: decrements 1 to 0 + assert_equal 1 [r XNACK mystream grp SILENT IDS 1 1-0] + set pending [r XPENDING mystream grp - + 10] + assert_equal [lindex $pending 0 3] 0 + + # FATAL on already-NACKed: sets to LLONG_MAX + assert_equal 1 [r XNACK mystream grp FATAL IDS 1 1-0] + set pending [r XPENDING mystream grp - + 10] + assert_equal [llength $pending] 1 + assert_equal [lindex $pending 0] {1-0 {} -1 9223372036854775807} + + # FAIL on already-NACKed: returns success (idempotent) + assert_equal 1 [r XNACK mystream grp FAIL IDS 1 1-0] + } + + # Verify that NACKed entries form a "NACK zone" at the head of the + # time-ordered PEL with FIFO insertion order. + # NACKed entries have delivery_time=0, so XPENDING reports idle=-1. + # XINFO STREAM FULL iterates the PEL rax by stream-ID order (not NACK + # order), so we check both views to confirm correct state. + test "XNACK ordering: NACKed entries at head of PEL with FIFO order" { + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XADD mystream 3-0 f v3 + r XADD mystream 4-0 f v4 + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + + # XNACK in non-sequential stream-ID order: 3-0 first, then 1-0 + r XNACK mystream grp FAIL IDS 1 3-0 + r XNACK mystream grp FAIL IDS 1 1-0 + + # NACKed entries should have delivery_time=0 (idle=-1 in XPENDING) + set pending [r XPENDING mystream grp - + 10] + assert_equal [llength $pending] 4 + + foreach entry $pending { + set id [lindex $entry 0] + if {$id eq "3-0" || $id eq "1-0"} { + assert_equal [lindex $entry 1] {} ;# unowned + assert_equal [lindex $entry 2] -1 ;# idle is -1 because delivery_time is 0 + } else { + assert_equal [lindex $entry 1] c1 ;# still owned + } + } + + # XINFO STREAM FULL iterates the PEL rax by stream ID order. + # NACKed entries show delivery_time=0 and consumer={}. + set info [r XINFO STREAM mystream FULL] + set group [lindex [dict get $info groups] 0] + set pel [dict get $group pending] + + assert_equal [lindex $pel 0] {1-0 {} 0 1} + assert_match {2-0 c1 * 1} [lindex $pel 1] + assert_equal [lindex $pel 2] {3-0 {} 0 1} + assert_match {4-0 c1 * 1} [lindex $pel 3] + } + + # Verify that NACKed PEL entries survive deletion of the underlying stream + # entry. Both XDEL (single entry removal) and XTRIM (bulk trimming) must + # not remove PEL entries — they become "ghost" entries that are cleaned up + # only when claimed (XCLAIM/XAUTOCLAIM) or acknowledged (XACK). + test "XNACK NACKed entries persist after XDEL and XTRIM" { + # XDEL case: delete the stream entry, PEL entry stays + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + r XNACK mystream grp FAIL IDS 1 1-0 + r XDEL mystream 1-0 + set pending [r XPENDING mystream grp - + 10] + assert_equal [llength $pending] 2 + assert_equal [lindex $pending 0] {1-0 {} -1 1} + + # XTRIM case: trim all but the last entry, PEL entries remain + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XADD mystream 3-0 f v3 + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + r XNACK mystream grp FAIL IDS 1 1-0 + r XTRIM mystream MAXLEN 1 + set pending [r XPENDING mystream grp - + 10] + assert_equal [llength $pending] 3 + assert_equal [lindex $pending 0] {1-0 {} -1 1} + } + + # Verify that XNACK handles more IDs than fit in the stack-allocated + # static vector (STREAMID_STATIC_VECTOR_LEN), forcing a heap allocation + # for the ID array. Uses 50 IDs to exceed the typical static limit. + test "XNACK with IDs exceeding STREAMID_STATIC_VECTOR_LEN for heap allocation" { + r DEL mystream + r XGROUP CREATE mystream grp $ MKSTREAM + + set ids {} + for {set i 1} {$i <= 50} {incr i} { + r XADD mystream $i-0 f v$i + lappend ids "$i-0" + } + r XREADGROUP GROUP grp c1 COUNT 50 STREAMS mystream > + + set result [r XNACK mystream grp FAIL IDS 50 {*}$ids] + assert_equal $result 50 + + set pending [r XPENDING mystream grp - + 100] + assert_equal [llength $pending] 50 + foreach entry $pending { + assert_equal [lindex $entry 1] {} + } + } + + # Verify that the RETRYCOUNT option overrides the delivery_count that + # the mode would normally set. It takes precedence over FATAL (would + # set LLONG_MAX), SILENT (would decrement), and FAIL (would keep). + # Also works when applied to an already-NACKed entry. + test "XNACK RETRYCOUNT overrides delivery_count" { + # RETRYCOUNT overrides FATAL: count is 42 instead of LLONG_MAX + r DEL mystream + r XADD mystream 1-0 f v + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + + assert_equal 1 [r XNACK mystream grp FATAL IDS 1 1-0 RETRYCOUNT 42] + set pending [r XPENDING mystream grp - + 10] + assert_equal [lindex $pending 0] {1-0 {} -1 42} + + # RETRYCOUNT overrides SILENT: count is 10 instead of decrement + r XCLAIM mystream grp c1 0 1-0 + assert_equal 1 [r XNACK mystream grp SILENT IDS 1 1-0 RETRYCOUNT 10] + set pending [r XPENDING mystream grp - + 10] + assert_equal [lindex $pending 0 3] 10 + + # RETRYCOUNT 0: explicitly set count to zero + r XCLAIM mystream grp c1 0 1-0 + assert_equal 1 [r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT 0] + set pending [r XPENDING mystream grp - + 10] + assert_equal [lindex $pending 0] {1-0 {} -1 0} + + # RETRYCOUNT on already-NACKed entry: overwrites the existing count + assert_equal 1 [r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT 99] + set pending [r XPENDING mystream grp - + 10] + assert_equal [lindex $pending 0] {1-0 {} -1 99} + } + + # Verify FORCE option behavior. FORCE creates an unowned PEL entry for an + # ID that is not currently in any consumer's PEL, as long as the + # corresponding stream entry exists. Covers: + # - Creating a new NACKed PEL entry without prior XREADGROUP. + # - Skipping non-existent stream entries (returns 0). + # - FATAL and SILENT modes apply their delivery_count logic on FORCE-created entries. + # - On already-owned entries, FORCE follows the normal NACK path. + # - On already-NACKed entries, FORCE is a no-op (found-path applies). + # - FORCE on an empty stream returns 0 and creates no PEL entry. + test "XNACK FORCE behavior" { + # FORCE creates a new unowned PEL entry (no prior XREADGROUP) + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XADD mystream 3-0 f v3 + r XGROUP CREATE mystream grp 0 + + assert_equal 1 [r XNACK mystream grp FAIL IDS 1 1-0 FORCE] + set pending [r XPENDING mystream grp - + 10] + assert_equal [llength $pending] 1 + # FAIL + FORCE on a new entry: delivery_count defaults to 0 + assert_equal [lindex $pending 0] {1-0 {} -1 0} + # Verify the FORCE-created entry is claimable + set claimed [r XCLAIM mystream grp c1 0 1-0] + assert_equal [llength $claimed] 1 + assert_equal [lindex $claimed 0 0] 1-0 + + # FORCE skips non-existent stream entries + assert_equal 0 [r XNACK mystream grp FAIL IDS 1 9-9 FORCE] + + # FATAL + FORCE sets delivery_count to LLONG_MAX + r XACK mystream grp 1-0 + assert_equal 1 [r XNACK mystream grp FATAL IDS 1 1-0 FORCE] + set pending [r XPENDING mystream grp - + 10] + assert_equal [lindex $pending 0] {1-0 {} -1 9223372036854775807} + + # SILENT + FORCE: no prior count to decrement, so clamped to 0 + r XACK mystream grp 1-0 + assert_equal 2 [r XNACK mystream grp SILENT IDS 2 1-0 2-0 FORCE] + set pending [r XPENDING mystream grp - + 10] + assert_equal [llength $pending] 2 + assert_equal [lindex $pending 0] {1-0 {} -1 0} + assert_equal [lindex $pending 1] {2-0 {} -1 0} + + # On already-owned PEL entries: FORCE follows the normal NACK path + r XACK mystream grp 1-0 2-0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + assert_equal 3 [r XNACK mystream grp FAIL IDS 3 1-0 2-0 3-0 FORCE] + set pending [r XPENDING mystream grp - + 10] + assert_equal [llength $pending] 3 + assert_equal [lindex $pending 0] {1-0 {} -1 1} + assert_equal [lindex $pending 1] {2-0 {} -1 1} + assert_equal [lindex $pending 2] {3-0 {} -1 1} + set c1_pending [r XPENDING mystream grp - + 10 c1] + assert_equal [llength $c1_pending] 0 + + # On already-NACKed entry: found-path applies, no duplicate created + assert_equal 1 [r XNACK mystream grp FAIL IDS 1 1-0 FORCE] + set pending [r XPENDING mystream grp - + 10] + assert_equal [llength $pending] 3 + assert_equal [lindex $pending 0] {1-0 {} -1 1} + + # FORCE on empty stream (MKSTREAM group): entry doesn't exist, returns 0 + r DEL mystream + r XGROUP CREATE mystream grp $ MKSTREAM + assert_equal 0 [r XNACK mystream grp FAIL IDS 1 1-0 FORCE] + set info [r XPENDING mystream grp] + assert_equal [lindex $info 0] 0 + } + + # Verify that FORCE and RETRYCOUNT work together: FORCE creates the PEL + # entry for IDs not currently in the PEL, and RETRYCOUNT overrides the + # delivery_count that the mode would normally assign. Tests all three + # modes (FAIL, SILENT, FATAL) combined with FORCE + RETRYCOUNT. + test "XNACK FORCE + RETRYCOUNT combination" { + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XADD mystream 3-0 f v3 + r XGROUP CREATE mystream grp 0 + + assert_equal 1 [r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT 7 FORCE] + assert_equal 1 [r XNACK mystream grp SILENT IDS 1 2-0 RETRYCOUNT 5 FORCE] + assert_equal 1 [r XNACK mystream grp FATAL IDS 1 3-0 RETRYCOUNT 99 FORCE] + + set pending [r XPENDING mystream grp - + 10] + assert_equal [llength $pending] 3 + + # RETRYCOUNT overrides all modes: each has the explicitly set count + assert_equal [lindex $pending 0] {1-0 {} -1 7} + assert_equal [lindex $pending 1] {2-0 {} -1 5} + assert_equal [lindex $pending 2] {3-0 {} -1 99} + } + + # Verify that FORCE and RETRYCOUNT options are accepted both before and + # after the "IDS numids id..." block, in any permutation. + # Each sub-case ACKs the entry afterward so the next sub-case starts clean. + test "XNACK flexible IDS position - options accepted before and after IDS block" { + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XADD mystream 3-0 f v3 + r XGROUP CREATE mystream grp 0 + + # FORCE before IDS + assert_equal 1 [r XNACK mystream grp FAIL FORCE IDS 1 1-0] + r XACK mystream grp 1-0 + + # FORCE + RETRYCOUNT both before IDS + assert_equal 1 [r XNACK mystream grp FAIL FORCE RETRYCOUNT 42 IDS 1 1-0] + r XACK mystream grp 1-0 + + # RETRYCOUNT before IDS, FORCE after IDS + assert_equal 1 [r XNACK mystream grp FAIL RETRYCOUNT 5 IDS 1 1-0 FORCE] + r XACK mystream grp 1-0 + + # FORCE before IDS, RETRYCOUNT after IDS + assert_equal 1 [r XNACK mystream grp FAIL FORCE IDS 1 1-0 RETRYCOUNT 3] + r XACK mystream grp 1-0 + + # Multiple IDs with options before IDS + assert_equal 3 [r XNACK mystream grp FAIL RETRYCOUNT 10 IDS 3 1-0 2-0 3-0 FORCE] + r XACK mystream grp 1-0 2-0 3-0 + + # Canonical order (IDS first, options after) still works + assert_equal 1 [r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT 20 FORCE] + } + + # Verify that re-NACKing an already-NACKed entry moves it to the tail + # of the NACK zone. The NACK zone is time-ordered (FIFO insertion), + # so moving to the tail means it will be claimed last. + # Initial NACK order: 1-0, 2-0, 3-0. After re-NACKing 1-0 the order + # becomes: 2-0, 3-0, 1-0. Verified via XREADGROUP CLAIM which walks + # the PEL from pel_time_head to pel_time_tail. + test "XNACK re-NACK moves entry to end of NACK zone" { + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XADD mystream 3-0 f v3 + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + + r XNACK mystream grp FAIL IDS 3 1-0 2-0 3-0 + + set info [r XINFO STREAM mystream FULL] + set group [lindex [dict get $info groups] 0] + assert_equal [dict get $group nacked-count] 3 + + # Re-NACK 1-0 — moves it to end of NACK zone + r XNACK mystream grp FAIL IDS 1 1-0 + + set info [r XINFO STREAM mystream FULL] + set group [lindex [dict get $info groups] 0] + assert_equal [dict get $group nacked-count] 3 + assert_equal [dict get $group pel-count] 3 + + # XREADGROUP CLAIM walks from pel_time_head to pel_time_tail. + # After the re-NACK, zone order is: 2-0, 3-0, 1-0. + # `after 10` ensures enough idle time for the CLAIM min-idle threshold. + after 10 + set r1 [r XREADGROUP GROUP grp c2 COUNT 1 CLAIM 5 STREAMS mystream >] + set msg1 [lindex [lindex $r1 0] 1 0 0] + assert_equal $msg1 2-0 + + after 10 + set r2 [r XREADGROUP GROUP grp c2 COUNT 1 CLAIM 5 STREAMS mystream >] + set msg2 [lindex [lindex $r2 0] 1 0 0] + assert_equal $msg2 3-0 + + after 10 + set r3 [r XREADGROUP GROUP grp c2 COUNT 1 CLAIM 5 STREAMS mystream >] + set msg3 [lindex [lindex $r3 0] 1 0 0] + assert_equal $msg3 1-0 + } + + # Verify that NACKed entries are claimable by all three claim mechanisms. + # NACKed entries have delivery_time=0 which means effectively infinite + # idle time, so they always satisfy any min-idle-time threshold. + # Each sub-test sets up a fresh stream, NACKs an entry, then claims it. + test "XNACK NACKed entries claimable via XCLAIM, XAUTOCLAIM, and XREADGROUP CLAIM" { + # XCLAIM with large min-idle-time: succeeds because idle is infinite + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + r XNACK mystream grp FAIL IDS 1 1-0 + set claimed [r XCLAIM mystream grp c2 99999 1-0] + assert_equal [llength $claimed] 1 + assert_equal [lindex $claimed 0 0] 1-0 + set pending [r XPENDING mystream grp - + 10 c2] + assert_equal [llength $pending] 1 + assert_equal [lindex $pending 0 0] 1-0 + + # XAUTOCLAIM with large min-idle-time: also succeeds + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + r XNACK mystream grp FAIL IDS 1 1-0 + set result [r XAUTOCLAIM mystream grp c2 99999 0-0] + set claimed_msgs [lindex $result 1] + assert_equal [llength $claimed_msgs] 1 + assert_equal [lindex $claimed_msgs 0 0] 1-0 + + # XCLAIM with min-idle-time 0: trivially satisfied + r DEL mystream + r XADD mystream 1-0 f v + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + r XNACK mystream grp FAIL IDS 1 1-0 + set claimed [r XCLAIM mystream grp c2 0 1-0] + assert_equal [llength $claimed] 1 + assert_equal [lindex $claimed 0 0] 1-0 + set pending [r XPENDING mystream grp - + 10 c2] + assert_equal [llength $pending] 1 + assert_equal [lindex $pending 0 0] 1-0 + + # XAUTOCLAIM with min-idle-time 0 + r DEL mystream + r XADD mystream 1-0 f v + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + r XNACK mystream grp FAIL IDS 1 1-0 + set result [r XAUTOCLAIM mystream grp c2 0 0-0] + set claimed_msgs [lindex $result 1] + assert_equal [llength $claimed_msgs] 1 + assert_equal [lindex $claimed_msgs 0 0] 1-0 + set pending [r XPENDING mystream grp - + 10 c2] + assert_equal [llength $pending] 1 + assert_equal [lindex $pending 0 0] 1-0 + + # XREADGROUP CLAIM: `after 10` ensures idle time exceeds the 5ms threshold + r DEL mystream + r XADD mystream 1-0 f v + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + r XNACK mystream grp FAIL IDS 1 1-0 + after 10 + set result [r XREADGROUP GROUP grp c2 CLAIM 5 STREAMS mystream >] + set pending [r XPENDING mystream grp - + 10 c2] + assert_equal [llength $pending] 1 + assert_equal [lindex $pending 0 0] 1-0 + } + + # Verify that claiming a NACKed entry whose underlying stream data has + # been deleted (a "ghost" PEL entry) cleans the PEL entry instead of + # returning data. + # - XCLAIM on a deleted NACKed entry: returns empty, removes the PEL + # entry (exercises the streamPropagateXACK path for unowned NACKs). + # - XAUTOCLAIM: claims the surviving owned entry (2-0) and reports the + # deleted NACKed entry (3-0) in its deleted-IDs list. + test "XNACK XCLAIM/XAUTOCLAIM of deleted NACKed entries cleans PEL" { + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XADD mystream 3-0 f v3 + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + + r XNACK mystream grp FAIL IDS 2 1-0 3-0 + r XDEL mystream 1-0 + r XDEL mystream 3-0 + + # XCLAIM of deleted unowned NACK: returns empty but cleans PEL + # (exercises the streamPropagateXACK path for unowned NACKs) + set claimed [r XCLAIM mystream grp c2 0 1-0] + assert_equal [llength $claimed] 0 + + # 1-0 was cleaned from PEL; 3-0 still a ghost NACKed entry + set pending [r XPENDING mystream grp - + 10] + assert_equal [llength $pending] 2 + assert_match {2-0 c1 * 1} [lindex $pending 0] + assert_equal [lindex $pending 1] {3-0 {} -1 1} + + # XAUTOCLAIM walks the entire PEL: claims surviving 2-0, reports deleted 3-0 + set result [r XAUTOCLAIM mystream grp c2 0 0-0] + set claimed_msgs [lindex $result 1] + set deleted_ids [lindex $result 2] + + assert_equal [llength $claimed_msgs] 1 + assert_equal [lindex $claimed_msgs 0 0] 2-0 + assert_equal [llength $deleted_ids] 1 + assert_equal [lindex $deleted_ids 0] 3-0 + + set pending [r XPENDING mystream grp - + 10] + assert_equal [llength $pending] 1 + assert_match {2-0 c2 * 2} [lindex $pending 0] + } + + # Verify that a client blocked on XREADGROUP BLOCK CLAIM is woken up + # when entries are NACKed. NACKed entries have delivery_time=0 (infinite + # idle), so they immediately satisfy the CLAIM min-idle-time threshold. + # Uses a deferring client (non-blocking Tcl socket) to simulate a + # blocked consumer waiting for claimable entries. + test "XNACK XREADGROUP BLOCK CLAIM wakes up on NACKed entries" { + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + + # c2 blocks waiting for claimable entries with min-idle 1000ms + set rd [redis_deferring_client] + $rd XREADGROUP GROUP grp c2 BLOCK 5000 CLAIM 1000 STREAMS mystream > + wait_for_blocked_client + + # XNACK makes entries immediately claimable, waking c2 + r XNACK mystream grp FAIL IDS 2 1-0 2-0 + + wait_for_blocked_clients_count 0 + set result [$rd read] + assert_equal [llength $result] 1 + lassign [lindex $result 0] stream_name messages + assert_equal $stream_name "mystream" + assert_equal [llength $messages] 2 + assert_equal [lindex $messages 0 0] 1-0 + assert_equal [lindex $messages 1 0] 2-0 + + # Entries are now owned by c2 + set pending [r XPENDING mystream grp - + 10 c2] + assert_equal [llength $pending] 2 + assert_equal [lindex $pending 0 0] 1-0 + assert_equal [lindex $pending 1 0] 2-0 + + $rd close + } + + # Verify that when a consumer reads its own pending entries via + # `XREADGROUP ... 0-0` (pending-entry scan), NACKed entries are + # excluded because they are no longer owned by any consumer. + # Only 2-0 (still owned by c1) should be returned. + test "XNACK XREADGROUP pending read excludes NACKed entries" { + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XADD mystream 3-0 f v3 + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + + r XNACK mystream grp FAIL IDS 2 1-0 3-0 + + set result [r XREADGROUP GROUP grp c1 STREAMS mystream 0-0] + set entries [lindex $result 0 1] + assert_equal [llength $entries] 1 + assert_equal [lindex $entries 0 0] 2-0 + } + + # Verify that XINFO CONSUMERS reflects the reduced pending count after + # XNACK, and that a consumer is not destroyed even when all its entries + # are NACKed (0 pending). Consumer cleanup is only done by explicit + # XGROUP DELCONSUMER. + test "XNACK effect on consumer state and XINFO CONSUMERS" { + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XADD mystream 3-0 f v3 + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + + # c1 initially has 3 pending + set consumers [r XINFO CONSUMERS mystream grp] + set c1_info [lindex $consumers 0] + assert_equal [dict get $c1_info pending] 3 + + # XNACK 2 entries: c1 pending drops to 1 + r XNACK mystream grp FAIL IDS 2 1-0 2-0 + set consumers [r XINFO CONSUMERS mystream grp] + set c1_info [lindex $consumers 0] + assert_equal [dict get $c1_info pending] 1 + + # XNACK the last entry: c1 has 0 pending but still exists + r XNACK mystream grp FAIL IDS 1 3-0 + set c1_pending [r XPENDING mystream grp - + 10 c1] + assert_equal [llength $c1_pending] 0 + set info [r XINFO GROUPS mystream] + set grp [lindex $info 0] + assert_equal [dict get $grp consumers] 1 + } + + # Verify that XGROUP DESTROY removes all PEL entries including NACKed + # (unowned) ones. After destroying the group and creating a new one, + # the PEL is empty. + test "XNACK XGROUP DESTROY cleans up NACKed entries" { + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + + r XNACK mystream grp FAIL IDS 2 1-0 2-0 + + set info [r XPENDING mystream grp] + assert_equal [lindex $info 0] 2 + + r XGROUP DESTROY mystream grp + + # New group has a clean PEL + r XGROUP CREATE mystream grp2 0 + set info [r XPENDING mystream grp2] + assert_equal [lindex $info 0] 0 + } + + # Verify that XGROUP DELCONSUMER only removes consumer-owned PEL entries. + # NACKed (unowned) entries are not affected — they remain in the group + # PEL and can still be claimed by other consumers. + # Setup: c1 owns {1-0, 2-0}. NACK 1-0. Delete c1. Only 2-0 (owned) + # is removed; 1-0 (NACKed/unowned) survives. + test "XNACK XGROUP DELCONSUMER works when group PEL has NACKed entries" { + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + + r XNACK mystream grp FAIL IDS 1 1-0 + + # DELCONSUMER returns the count of consumer-owned entries removed (1: only 2-0) + set deleted_pending [r XGROUP DELCONSUMER mystream grp c1] + assert_equal $deleted_pending 1 + + # Group PEL still has the NACKed entry (1-0) + set info [r XPENDING mystream grp] + assert_equal [lindex $info 0] 1 + + set pending [r XPENDING mystream grp - + 10] + assert_equal [llength $pending] 1 + assert_equal [lindex $pending 0] {1-0 {} -1 1} + + set stream_info [r XINFO STREAM mystream FULL] + set group [lindex [dict get $stream_info groups] 0] + assert_equal [dict get $group nacked-count] 1 + + # The surviving NACKed entry can still be claimed + set claimed [r XCLAIM mystream grp c2 0 1-0] + assert_equal [llength $claimed] 1 + } + + # Verify that the `nacked-count` field reported by XINFO STREAM FULL + # accurately tracks the number of entries in the NACK zone through + # various operations: + # - XNACK increases nacked-count (pel-count stays the same). + # - XCLAIM (reclaim) decreases nacked-count (moves entry back to owned). + # - XACK of a NACKed entry decreases both nacked-count and pel-count. + # - nacked-count is per-group (independent across groups). + test "XNACK XINFO STREAM FULL nacked-count reflects nack zone size" { + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XADD mystream 3-0 f v3 + r XADD mystream 4-0 f v4 + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 COUNT 4 STREAMS mystream > + + # Before any XNACK: all entries owned, nacked-count is 0 + set info [r XINFO STREAM mystream FULL] + set group [lindex [dict get $info groups] 0] + assert_equal [dict get $group nacked-count] 0 + assert_equal [dict get $group pel-count] 4 + + # NACK one entry: nacked-count goes up, pel-count unchanged + r XNACK mystream grp FAIL IDS 1 1-0 + set info [r XINFO STREAM mystream FULL] + set group [lindex [dict get $info groups] 0] + assert_equal [dict get $group nacked-count] 1 + assert_equal [dict get $group pel-count] 4 + + # NACK two more + r XNACK mystream grp FAIL IDS 2 2-0 3-0 + set info [r XINFO STREAM mystream FULL] + set group [lindex [dict get $info groups] 0] + assert_equal [dict get $group nacked-count] 3 + assert_equal [dict get $group pel-count] 4 + + # Reclaim via XCLAIM: nacked-count decreases, pel-count unchanged + r XCLAIM mystream grp c1 0 1-0 + set info [r XINFO STREAM mystream FULL] + set group [lindex [dict get $info groups] 0] + assert_equal [dict get $group nacked-count] 2 + assert_equal [dict get $group pel-count] 4 + + # XACK a NACKed entry: both counts decrease + r XACK mystream grp 2-0 + set info [r XINFO STREAM mystream FULL] + set group [lindex [dict get $info groups] 0] + assert_equal [dict get $group nacked-count] 1 + assert_equal [dict get $group pel-count] 3 + + # XACK last NACKed entry: nacked-count reaches 0 + r XACK mystream grp 3-0 + set info [r XINFO STREAM mystream FULL] + set group [lindex [dict get $info groups] 0] + assert_equal [dict get $group nacked-count] 0 + assert_equal [dict get $group pel-count] 2 + + # Multiple groups: nacked-count is per-group + r XNACK mystream grp FAIL IDS 1 1-0 + r XGROUP CREATE mystream grp2 0 + r XREADGROUP GROUP grp2 c2 COUNT 4 STREAMS mystream > + set info [r XINFO STREAM mystream FULL] + set grp1 [lindex [dict get $info groups] 0] + set grp2 [lindex [dict get $info groups] 1] + assert_equal [dict get $grp1 nacked-count] 1 + assert_equal [dict get $grp2 nacked-count] 0 + } + + # Verify that NACKed entries survive an RDB save/reload cycle. + # Uses all three modes (FAIL, FATAL, SILENT) plus FORCE-created entries + # in a second group (grp2) with RETRYCOUNT. After DEBUG RELOAD: + # - delivery_counts are preserved (FAIL=1, FATAL=LLONG_MAX, SILENT=0). + # - NACK zone order is preserved (verified via XREADGROUP CLAIM order). + # - FORCE-created entries in grp2 are intact and claimable. + test "XNACK RDB save and load preserves NACKed entries" { + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XADD mystream 3-0 f v3 + r XADD mystream 4-0 f v4 + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + + # NACK with different modes + r XNACK mystream grp FAIL IDS 1 1-0 + r XNACK mystream grp FATAL IDS 1 2-0 + r XNACK mystream grp SILENT IDS 1 3-0 + + # Separate group: FORCE-created entries (no prior XREADGROUP in grp2) + r XGROUP CREATE mystream grp2 0 + r XNACK mystream grp2 FAIL IDS 1 1-0 FORCE + r XNACK mystream grp2 FATAL IDS 1 2-0 RETRYCOUNT 77 FORCE + + r SAVE + r DEBUG RELOAD + + # Verify grp state after reload + set pending [r XPENDING mystream grp - + 10] + assert_equal [llength $pending] 4 + assert_equal [lindex $pending 0] {1-0 {} -1 1} + assert_equal [lindex $pending 1] {2-0 {} -1 9223372036854775807} + assert_equal [lindex $pending 2] {3-0 {} -1 0} + assert_match {4-0 c1 * 1} [lindex $pending 3] + + # Verify NACK zone order is preserved: 1-0, 2-0, 3-0 + set r1 [r XREADGROUP GROUP grp c2 COUNT 1 CLAIM 0 STREAMS mystream >] + assert_equal [lindex [lindex $r1 0] 1 0 0] 1-0 + set r2 [r XREADGROUP GROUP grp c2 COUNT 1 CLAIM 0 STREAMS mystream >] + assert_equal [lindex [lindex $r2 0] 1 0 0] 2-0 + + # Verify grp2 FORCE-created entries survived the reload + set pending2 [r XPENDING mystream grp2 - + 10] + assert_equal [llength $pending2] 2 + assert_equal [lindex $pending2 0] {1-0 {} -1 0} + assert_equal [lindex $pending2 1] {2-0 {} -1 77} + + set claimed [r XCLAIM mystream grp2 c1 0 1-0 2-0] + assert_equal [llength $claimed] 2 + } {} {external:skip needs:debug} + + # Verify that NACKed entries survive DUMP/RESTORE serialization. + # After DUMP + DEL + RESTORE, the PEL state (delivery_counts, unowned + # status, nacked-count, and NACK zone claim order) is identical to the + # original. + test "XNACK NACKed entries survive DUMP and RESTORE" { + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XADD mystream 3-0 f v3 + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + + r XNACK mystream grp SILENT IDS 1 1-0 + r XNACK mystream grp FATAL IDS 1 3-0 + + set pending_before [r XPENDING mystream grp - + 10] + assert_equal [llength $pending_before] 3 + + set dump [r DUMP mystream] + r DEL mystream + r RESTORE mystream 0 $dump + + # PEL state must match pre-DUMP state + set pending_after [r XPENDING mystream grp - + 10] + assert_equal [llength $pending_after] 3 + + assert_equal [lindex $pending_after 0] {1-0 {} -1 0} + assert_match {2-0 c1 * 1} [lindex $pending_after 1] + assert_equal [lindex $pending_after 2] {3-0 {} -1 9223372036854775807} + + set info [r XINFO STREAM mystream FULL] + set group [lindex [dict get $info groups] 0] + assert_equal [dict get $group nacked-count] 2 + + # NACK zone claim order preserved: 1-0 first, then 3-0 + set r1 [r XREADGROUP GROUP grp c2 COUNT 1 CLAIM 0 STREAMS mystream >] + assert_equal [lindex [lindex $r1 0] 1 0 0] 1-0 + set r2 [r XREADGROUP GROUP grp c2 COUNT 1 CLAIM 0 STREAMS mystream >] + assert_equal [lindex [lindex $r2 0] 1 0 0] 3-0 + } + + # Verify that COPY creates an independent copy that preserves NACKed + # entries (delivery_counts, unowned status, nacked-count, NACK zone + # order). Also confirms the original stream is unaffected by operations + # on the copy. + # Uses hash-tag keys {t} to ensure same slot for cluster compatibility. + test "XNACK COPY preserves NACKed entries" { + r DEL mystream{t} mystream{t}_copy + r XADD mystream{t} 1-0 f v1 + r XADD mystream{t} 2-0 f v2 + r XADD mystream{t} 3-0 f v3 + r XGROUP CREATE mystream{t} grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream{t} > + + r XNACK mystream{t} grp FAIL IDS 1 1-0 + r XNACK mystream{t} grp FATAL IDS 1 3-0 + + r COPY mystream{t} mystream{t}_copy + + # Copied stream has the same NACKed state + set pending [r XPENDING mystream{t}_copy grp - + 10] + assert_equal [llength $pending] 3 + assert_equal [lindex $pending 0] {1-0 {} -1 1} + assert_match {2-0 c1 * 1} [lindex $pending 1] + assert_equal [lindex $pending 2] {3-0 {} -1 9223372036854775807} + + set info [r XINFO STREAM mystream{t}_copy FULL] + set group [lindex [dict get $info groups] 0] + assert_equal [dict get $group nacked-count] 2 + + # NACK zone order is preserved in the copy + set r1 [r XREADGROUP GROUP grp c2 COUNT 1 CLAIM 0 STREAMS mystream{t}_copy >] + assert_equal [lindex [lindex $r1 0] 1 0 0] 1-0 + set r2 [r XREADGROUP GROUP grp c2 COUNT 1 CLAIM 0 STREAMS mystream{t}_copy >] + assert_equal [lindex [lindex $r2 0] 1 0 0] 3-0 + + # Original stream is unaffected: 1-0 still NACKed/unowned + set orig_pending [r XPENDING mystream{t} grp - + 10] + assert_equal [lindex $orig_pending 0 1] {} + } } + +start_server {tags {"stream needs:debug"} overrides {appendonly yes aof-use-rdb-preamble no}} { + # Verify that NACKed entries are correctly emitted during AOF rewrite + # and fully restored via `debug loadaof`. After rewrite + reload, + # delivery_counts, unowned status, and NACK zone claim order must + # match the pre-rewrite state. + test "XNACK entries survive AOF rewrite" { + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XADD mystream 3-0 f v3 + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 STREAMS mystream > + + r XNACK mystream grp SILENT IDS 1 1-0 + r XNACK mystream grp FAIL IDS 1 3-0 + + set pending_before [r XPENDING mystream grp - + 10] + assert_equal [llength $pending_before] 3 + assert_equal [lindex $pending_before 0] {1-0 {} -1 0} + assert_match {2-0 c1 * 1} [lindex $pending_before 1] + assert_equal [lindex $pending_before 2] {3-0 {} -1 1} + + r bgrewriteaof + waitForBgrewriteaof r + r debug loadaof + + # Verify state matches pre-rewrite + set pending_after [r XPENDING mystream grp - + 10] + assert_equal [llength $pending_after] 3 + assert_equal [lindex $pending_after 0] {1-0 {} -1 0} + assert_match {2-0 c1 * 1} [lindex $pending_after 1] + assert_equal [lindex $pending_after 2] {3-0 {} -1 1} + + # NACK zone claim order preserved + set r1 [r XREADGROUP GROUP grp c2 COUNT 1 CLAIM 0 STREAMS mystream >] + assert_equal [lindex [lindex $r1 0] 1 0 0] 1-0 + set r2 [r XREADGROUP GROUP grp c2 COUNT 1 CLAIM 0 STREAMS mystream >] + assert_equal [lindex [lindex $r2 0] 1 0 0] 3-0 + } + + # Test AOF rewrite when the NACK zone has more entries than the AOF + # batch size (64 entries per XNACK FORCE batch in the AOF emitter). + # With 65 NACKed entries + 1 owned entry, the rewriter must emit + # multiple XNACK FORCE batches for the NACK zone and a separate + # XCLAIM batch for the owned tail. After rewrite + reload, all 66 + # PEL entries must be intact with correct ownership and delivery_counts. + test "XNACK AOF rewrite batch split -- 65 NACKed entries with owned tail" { + r DEL mystream + + set total_nack 65 + set total [expr {$total_nack + 1}] + + for {set i 1} {$i <= $total} {incr i} { + r XADD mystream $i-0 f v$i + } + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 COUNT $total STREAMS mystream > + + set nack_ids {} + for {set i 1} {$i <= $total_nack} {incr i} { + lappend nack_ids $i-0 + } + r XNACK mystream grp FAIL IDS $total_nack {*}$nack_ids + + set pending_before [r XPENDING mystream grp - + 200] + assert_equal [llength $pending_before] $total + + r bgrewriteaof + waitForBgrewriteaof r + r debug loadaof + + set pending_after [r XPENDING mystream grp - + 200] + assert_equal [llength $pending_after] $total + + # All 65 NACKed entries: unowned with delivery_count=1 + for {set i 0} {$i < $total_nack} {incr i} { + set entry [lindex $pending_after $i] + assert_equal [lindex $entry 0] "[expr {$i + 1}]-0" + assert_equal [lindex $entry 1] {} + assert_equal [lindex $entry 3] 1 + } + + # The last entry (66-0) is still owned by c1 + set last [lindex $pending_after $total_nack] + assert_equal [lindex $last 0] "$total-0" + assert_equal [lindex $last 1] c1 + + set claimed [r XCLAIM mystream grp c2 0 1-0 65-0] + assert_equal [llength $claimed] 2 + } + + # Edge case: the entire PEL consists of NACKed entries (no owned + # entries at all). With 65 entries exceeding the 64-entry AOF batch + # limit, the rewriter must split into multiple batches even though + # there is no owned tail. After reload all entries are unowned. + test "XNACK AOF rewrite batch split -- entire PEL is NACK zone" { + r DEL mystream + + set total 65 + + for {set i 1} {$i <= $total} {incr i} { + r XADD mystream $i-0 f v$i + } + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 COUNT $total STREAMS mystream > + + set nack_ids {} + for {set i 1} {$i <= $total} {incr i} { + lappend nack_ids $i-0 + } + r XNACK mystream grp FAIL IDS $total {*}$nack_ids + + set pending_before [r XPENDING mystream grp - + 200] + assert_equal [llength $pending_before] $total + + r bgrewriteaof + waitForBgrewriteaof r + r debug loadaof + + set pending_after [r XPENDING mystream grp - + 200] + assert_equal [llength $pending_after] $total + + # Every entry is unowned with delivery_count=1 + for {set i 0} {$i < $total} {incr i} { + assert_equal [lindex $pending_after $i] "[expr {$i + 1}]-0 {} -1 1" + } + } + + # Verify that AOF rewrite correctly batches NACKed entries that have + # different delivery_counts. The AOF emitter groups consecutive entries + # with the same delivery_count into a single XNACK FORCE command; + # entries with different counts require separate batches. + # Setup: 6 entries NACKed with mixed modes/RETRYCOUNT: + # 1-0,2-0 = FATAL (LLONG_MAX), 3-0,4-0 = SILENT (0), + # 5-0 = RETRYCOUNT 42, 6-0 = FAIL (1). + test "XNACK AOF rewrite with mixed delivery_counts batches correctly" { + r DEL mystream + + for {set i 1} {$i <= 6} {incr i} { + r XADD mystream $i-0 f v$i + } + r XGROUP CREATE mystream grp 0 + r XREADGROUP GROUP grp c1 COUNT 6 STREAMS mystream > + + r XNACK mystream grp FATAL IDS 2 1-0 2-0 + r XNACK mystream grp SILENT IDS 2 3-0 4-0 + r XNACK mystream grp FAIL IDS 1 5-0 RETRYCOUNT 42 + r XNACK mystream grp FAIL IDS 1 6-0 + + set pending_before [r XPENDING mystream grp - + 10] + assert_equal [llength $pending_before] 6 + + r bgrewriteaof + waitForBgrewriteaof r + r debug loadaof + + set pending_after [r XPENDING mystream grp - + 10] + assert_equal [llength $pending_after] 6 + + # Verify each entry retained its specific delivery_count + foreach entry $pending_after { + set id [lindex $entry 0] + set consumer [lindex $entry 1] + set dc [lindex $entry 3] + + assert_equal $consumer {} + + switch $id { + 1-0 - 2-0 { + assert_equal $dc 9223372036854775807 + } + 3-0 - 4-0 { + assert_equal $dc 0 + } + 5-0 { + assert_equal $dc 42 + } + 6-0 { + assert_equal $dc 1 + } + } + } + } + + # Verify that FORCE-created PEL entries (which were never delivered + # to a consumer via XREADGROUP) survive AOF rewrite. These entries + # only exist in the group PEL, not in any consumer PEL, so the AOF + # emitter must handle them specially. + test "XNACK FORCE-created entries survive AOF rewrite" { + r DEL mystream + r XADD mystream 1-0 f v1 + r XADD mystream 2-0 f v2 + r XADD mystream 3-0 f v3 + r XGROUP CREATE mystream grp 0 + + r XNACK mystream grp FAIL IDS 1 1-0 FORCE + r XNACK mystream grp FATAL IDS 1 2-0 FORCE + r XNACK mystream grp SILENT IDS 1 3-0 RETRYCOUNT 33 FORCE + + set pending_before [r XPENDING mystream grp - + 10] + assert_equal [llength $pending_before] 3 + + r bgrewriteaof + waitForBgrewriteaof r + r debug loadaof + + set pending_after [r XPENDING mystream grp - + 10] + assert_equal [llength $pending_after] 3 + assert_equal [lindex $pending_after 0] {1-0 {} -1 0} + assert_equal [lindex $pending_after 1] {2-0 {} -1 9223372036854775807} + assert_equal [lindex $pending_after 2] {3-0 {} -1 33} + + # FORCE-created entries are still claimable after reload + set claimed [r XCLAIM mystream grp c1 0 1-0 2-0 3-0] + assert_equal [llength $claimed] 3 + } +} + +start_server {tags {"repl external:skip" "stream"}} { + # Verify that XNACK commands replicate correctly to replicas. + # Tests all three modes (FAIL, FATAL, SILENT) and FORCE option. + # After wait_for_ofs_sync, the replica's PEL state must match the + # master's: same delivery_counts, same unowned status. + test "XNACK replication of modes and FORCE" { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + start_server {tags {"stream"}} { + set replica [srv 0 client] + + $replica replicaof $master_host $master_port + wait_for_sync $replica + + # Mode replication: FAIL, FATAL, SILENT on consumer-owned entries + $master DEL mystream + $master XADD mystream 1-0 f v1 + $master XADD mystream 2-0 f v2 + $master XADD mystream 3-0 f v3 + $master XADD mystream 4-0 f v4 + $master XGROUP CREATE mystream grp 0 + $master XREADGROUP GROUP grp c1 STREAMS mystream > + wait_for_ofs_sync $master $replica + + $master XNACK mystream grp FAIL IDS 1 1-0 + $master XNACK mystream grp FATAL IDS 1 3-0 + $master XNACK mystream grp SILENT IDS 1 4-0 + wait_for_ofs_sync $master $replica + + # Verify replica state matches master + set pending [$replica XPENDING mystream grp - + 10] + assert_equal [llength $pending] 4 + assert_equal [lindex $pending 0] {1-0 {} -1 1} + assert_match {2-0 c1 * 1} [lindex $pending 1] + assert_equal [lindex $pending 2] {3-0 {} -1 9223372036854775807} + assert_equal [lindex $pending 3] {4-0 {} -1 0} + + # FORCE replication: create PEL entries without prior XREADGROUP + $master DEL mystream2 + $master XADD mystream2 1-0 f v1 + $master XADD mystream2 2-0 f v2 + $master XGROUP CREATE mystream2 grp 0 + wait_for_ofs_sync $master $replica + + $master XNACK mystream2 grp FAIL IDS 1 1-0 FORCE + $master XNACK mystream2 grp FATAL IDS 1 2-0 FORCE + wait_for_ofs_sync $master $replica + + set pending [$replica XPENDING mystream2 grp - + 10] + assert_equal [llength $pending] 2 + + assert_equal [lindex $pending 0] {1-0 {} -1 0} + assert_equal [lindex $pending 1] {2-0 {} -1 9223372036854775807} + } + } +} + +start_server {tags {"repl external:skip" "stream"}} { + # Verify that reclaim/acknowledge operations on NACKed entries + # propagate correctly to replicas. Tests four operations: + # 1. XCLAIM a NACKed entry — replica sees new consumer ownership. + # 2. XACK a NACKed entry — replica sees it removed from PEL. + # 3. XAUTOCLAIM NACKed entries — replica sees new consumer ownership. + # 4. XREADGROUP CLAIM NACKed entries — replica sees new consumer ownership. + # Each step uses wait_for_ofs_sync to ensure replication completes + # before reading from the replica. + test "XNACK reclaim operations propagate correctly to replica" { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + start_server {tags {"stream"}} { + set replica [srv 0 client] + + $replica replicaof $master_host $master_port + wait_for_sync $replica + + $master DEL mystream + $master XADD mystream 1-0 f v1 + $master XADD mystream 2-0 f v2 + $master XADD mystream 3-0 f v3 + $master XGROUP CREATE mystream grp 0 + $master XREADGROUP GROUP grp c1 STREAMS mystream > + wait_for_ofs_sync $master $replica + + $master XNACK mystream grp FAIL IDS 2 1-0 2-0 + wait_for_ofs_sync $master $replica + + # 1. XCLAIM a NACKed entry: replica sees c2 owning 1-0 + $master XCLAIM mystream grp c2 0 1-0 + wait_for_ofs_sync $master $replica + + set pending [$replica XPENDING mystream grp - + 10 c2] + assert_equal [llength $pending] 1 + assert_equal [lindex $pending 0 0] 1-0 + + # 2. XACK a NACKed entry: 2-0 removed from replica PEL + $master XACK mystream grp 2-0 + wait_for_ofs_sync $master $replica + + set all_pending [$replica XPENDING mystream grp - + 10] + assert_equal [llength $all_pending] 2 + foreach entry $all_pending { + assert {[lindex $entry 0] ne "2-0"} + } + + # 3. XAUTOCLAIM NACKed entries: replica sees c3 owning 3-0 + $master XNACK mystream grp FAIL IDS 1 3-0 + wait_for_ofs_sync $master $replica + + $master XAUTOCLAIM mystream grp c3 99999 0-0 + wait_for_ofs_sync $master $replica + + set c3_pending [$replica XPENDING mystream grp - + 10 c3] + assert_equal [llength $c3_pending] 1 + assert_equal [lindex $c3_pending 0 0] 3-0 + + # 4. XREADGROUP CLAIM NACKed entries: replica sees c4 owning 1-0 + $master XNACK mystream grp FAIL IDS 1 1-0 + wait_for_ofs_sync $master $replica + + $master XREADGROUP GROUP grp c4 CLAIM 99999 STREAMS mystream > + wait_for_ofs_sync $master $replica + + set c4_pending [$replica XPENDING mystream grp - + 10 c4] + assert_equal [llength $c4_pending] 1 + assert_equal [lindex $c4_pending 0 0] 1-0 + } + } +} + From c77d60d6b8c0dfa67b938ea50929ffb1661612df Mon Sep 17 00:00:00 2001 From: charsyam Date: Wed, 8 Apr 2026 10:50:51 +0900 Subject: [PATCH 04/32] fix trivial double-free issue in rdbLoadObject (#15011) --- src/rdb.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/rdb.c b/src/rdb.c index 574f96b9a..61ca7f7cf 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -2901,11 +2901,13 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) /* search for duplicate records */ sds field = sdstrynewlen(fstr, flen); - if (!field || dictAdd(dupSearchDict, field, NULL) != DICT_OK || - !lpSafeToAdd(lp, (size_t)flen + vlen)) { + int field_added = (field != NULL && dictAdd(dupSearchDict, field, NULL) == DICT_OK); + if (!field_added || !lpSafeToAdd(lp, (size_t)flen + vlen)) { rdbReportCorruptRDB("Hash zipmap with dup elements, or big length (%u)", flen); + /* If field was not added to dict, we still own it. + * If it was added, dict owns it and dictRelease will free it. */ + if (!field_added) sdsfree(field); dictRelease(dupSearchDict); - sdsfree(field); lpFree(lp); zfree(encoded); o->ptr = NULL; From 0be39e503260324013ec1293501d5b5acaf34acf Mon Sep 17 00:00:00 2001 From: Sergei Georgiev Date: Wed, 8 Apr 2026 14:59:22 +0300 Subject: [PATCH 05/32] Fix missing consumer propagation on empty XREADGROUP (#14963) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Fixes consumer replication inconsistency when `XREADGROUP` is called for a new consumer but no `XCLAIM` commands are propagated to the replica. Previously, consumer creation was only propagated to replicas when `noack=true`, relying on `XCLAIM` propagation to implicitly create the consumer in the non-NOACK path. However, if no messages exist to read, no `XCLAIM` is generated, and the consumer is silently lost on the replica. This is a follow-up to the original fix in [redis/redis#7140](https://github.com/redis/redis/issues/7140) / [redis/redis#7526](https://github.com/redis/redis/pull/7526), which introduced `XGROUP CREATECONSUMER` propagation but only for the `NOACK` case. ## Changes - **`xreadgroupCommand` (src/t_stream.c):** Replaced the `if (noack)` guard around the `streamPropagateConsumerCreation()` call with a deferred check after `streamReplyWithRange()`. Consumer creation is now propagated when `noack || propCount == 0` — that is, only when no `XCLAIM` commands were generated. This avoids redundant propagation in the common case where `XCLAIM` already implicitly creates the consumer on the replica, while correctly handling both the NOACK path (where PEL/XCLAIM is skipped entirely) and the no-messages path (where there is nothing to XCLAIM). - **Test (tests/unit/type/stream-cgroups.tcl):** Added replication test `"XREADGROUP propagates new consumer to replica"` that sets up a master-replica pair and verifies consumer propagation in two cases: (1) without NOACK when no messages are available to deliver, and (2) with NOACK when messages are delivered but XCLAIM is skipped. ## Benefits - **Master-replica consistency:** Consumers created by `XREADGROUP` are now visible on replicas whenever no `XCLAIM` would otherwise create them — covering both the NOACK path and the empty-stream path. - **No redundant propagation:** The noack || propCount == 0 condition avoids emitting a superfluous XGROUP CREATECONSUMER when XCLAIM commands are already propagated and would implicitly create the consumer on the replica. --- src/t_stream.c | 27 +++++++---- tests/unit/type/stream-cgroups.tcl | 73 ++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 9 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index e000df144..faa8aba0f 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -1942,11 +1942,14 @@ void streamPropagateGroupID(client *c, robj *key, streamCG *group, robj *groupna decrRefCount(argv[6]); } -/* We need this when we want to propagate creation of consumer that was created - * by XREADGROUP with the NOACK option. In that case, the only way to create - * the consumer at the replica is by using XGROUP CREATECONSUMER (see issue #7140) +/* Propagate creation of a consumer that was implicitly created by XREADGROUP. + * Called only when no XCLAIM commands were propagated for this consumer, + * since XCLAIM implicitly creates the consumer on the replica. This covers + * two cases: + * (1) NOACK, where the PEL/XCLAIM path is skipped entirely. + * (2) no messages were available to deliver (see #7140). * - * XGROUP CREATECONSUMER + * XGROUP CREATECONSUMER */ void streamPropagateConsumerCreation(client *c, robj *key, robj *groupname, sds consumername) { robj *argv[5]; @@ -2910,6 +2913,7 @@ void xreadCommand(client *c) { int serve_claimed = 0; int serve_synchronously = 0; int serve_history = 0; /* True for XREADGROUP with ID != ">". */ + int consumer_created = 0; streamConsumer *consumer = NULL; /* Unused if XREAD */ streamPropInfo spi = {c->argv[streams_arg+i],groupname}; /* Unused if XREAD */ @@ -2970,10 +2974,7 @@ void xreadCommand(client *c) { c->db->id,SCC_DEFAULT); if (server.memory_tracking_enabled) updateSlotAllocSize(c->db,getKeySlot(c->argv[streams_arg+i]->ptr),o,old_alloc,kvobjAllocSize(o)); - if (noack) - streamPropagateConsumerCreation(c,spi.keyname, - spi.groupname, - consumer->name); + consumer_created = 1; } consumer->seen_time = commandTimeSnapshot(); keyModified(c,c->db,c->argv[streams_arg+i],o,0); /* only update LRM */ @@ -2999,6 +3000,7 @@ void xreadCommand(client *c) { flags |= STREAM_RWR_CLAIMED; } + unsigned long propCount = 0; if (serve_synchronously) { arraylen++; if (arraylen == 1) arraylen_ptr = addReplyDeferredLen(c); @@ -3013,7 +3015,6 @@ void xreadCommand(client *c) { if (c->resp == 2) addReplyArrayLen(c,2); addReplyBulk(c,c->argv[streams_arg+i]); - unsigned long propCount = 0; if (noack) flags |= STREAM_RWR_NOACK; if (serve_history) flags |= STREAM_RWR_HISTORY; if (server.memory_tracking_enabled) @@ -3028,6 +3029,14 @@ void xreadCommand(client *c) { keyModified(c,c->db,c->argv[streams_arg+i],o,0); /* only update LRM */ } } + + /* Propagate consumer creation only when no XCLAIM was generated, + * since XCLAIM implicitly creates the consumer on the replica. + * With NOACK the PEL/XCLAIM path is skipped entirely, so we + * always need explicit propagation regardless of propCount. */ + if (consumer_created && (noack || propCount == 0)) { + streamPropagateConsumerCreation(c,spi.keyname, spi.groupname, consumer->name); + } } /* We replied synchronously! Set the top array len and return to caller. */ diff --git a/tests/unit/type/stream-cgroups.tcl b/tests/unit/type/stream-cgroups.tcl index 9adb7c705..357e3baea 100644 --- a/tests/unit/type/stream-cgroups.tcl +++ b/tests/unit/type/stream-cgroups.tcl @@ -1905,6 +1905,79 @@ start_server { } } + start_server {tags {"repl external:skip" "stream"}} { + # Verify that XREADGROUP propagates a newly created consumer to + # the replica in cases where no XCLAIM is generated (XCLAIM + # implicitly creates the consumer, so explicit propagation is + # only needed when it is absent). Two cases are tested: + # 1. Without NOACK and no messages to deliver — no XCLAIM at all. + # 2. With NOACK and messages delivered — NOACK skips PEL/XCLAIM. + test "XREADGROUP propagates new consumer to replica" { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + start_server {tags {"stream"}} { + set replica [srv 0 client] + + $replica replicaof $master_host $master_port + wait_for_sync $replica + + $master DEL mystream + $master XADD mystream 1-0 f v + $master XGROUP CREATE mystream grp 0 + + # Consume the only message so the stream has no + # new messages pending for delivery. + $master XREADGROUP GROUP grp c1 STREAMS mystream > + $master XACK mystream grp 1-0 + + wait_for_ofs_sync $master $replica + + # Case 1: XREADGROUP without NOACK for a brand-new + # consumer when there are NO messages to deliver. + # No XCLAIM is generated, so the consumer must be + # explicitly propagated. + set reply [$master XREADGROUP GROUP grp c2 STREAMS mystream >] + assert_equal $reply {} + + set master_consumers [$master XINFO CONSUMERS mystream grp] + set master_names [lmap c $master_consumers {dict get $c name}] + assert {[lsearch $master_names "c2"] >= 0} + + wait_for_ofs_sync $master $replica + + set replica_consumers [$replica XINFO CONSUMERS mystream grp] + set replica_names [lmap c $replica_consumers {dict get $c name}] + if {[lsearch $replica_names "c2"] < 0} { + fail "Consumer 'c2' not found on replica (have: $replica_names)" + } + + # Case 2: XREADGROUP with NOACK for a brand-new consumer + # when a message IS available. NOACK skips PEL/XCLAIM + # entirely, so the consumer must be explicitly propagated + # even though messages were delivered. + $master XADD mystream 2-0 f v + wait_for_ofs_sync $master $replica + + set reply [$master XREADGROUP GROUP grp c3 NOACK STREAMS mystream >] + assert {$reply ne {}} + + set master_consumers [$master XINFO CONSUMERS mystream grp] + set master_names [lmap c $master_consumers {dict get $c name}] + assert {[lsearch $master_names "c3"] >= 0} + + wait_for_ofs_sync $master $replica + + set replica_consumers [$replica XINFO CONSUMERS mystream grp] + set replica_names [lmap c $replica_consumers {dict get $c name}] + if {[lsearch $replica_names "c3"] < 0} { + fail "Consumer 'c3' not found on replica (have: $replica_names)" + } + } + } + } + start_server {} { if {!$::force_resp3} { test "XREADGROUP CLAIM field types are correct" { From e97fe246aa13976231501561027a6b83d9b37bdb Mon Sep 17 00:00:00 2001 From: dagecko Date: Wed, 8 Apr 2026 22:17:39 -0400 Subject: [PATCH 06/32] Pin third-party action to commit SHA and move secrets to step env (#14937) --- .github/workflows/codecov.yml | 2 +- .github/workflows/coverity.yml | 11 ++++++++--- .github/workflows/daily.yml | 2 +- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml index 82656ac31..5108ec907 100644 --- a/.github/workflows/codecov.yml +++ b/.github/workflows/codecov.yml @@ -18,7 +18,7 @@ jobs: make lcov - name: Upload coverage reports to Codecov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6 with: token: ${{ secrets.CODECOV_TOKEN }} file: ./src/redis.info diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 0237c8739..f5d37ae5c 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -14,9 +14,11 @@ jobs: - uses: actions/checkout@main - name: Download and extract the Coverity Build Tool run: | - wget -q https://scan.coverity.com/download/cxx/linux64 --post-data "token=${{ secrets.COVERITY_SCAN_TOKEN }}&project=redis-unstable" -O cov-analysis-linux64.tar.gz + wget -q https://scan.coverity.com/download/cxx/linux64 --post-data "token=${COVERITY_SCAN_TOKEN}&project=redis-unstable" -O cov-analysis-linux64.tar.gz mkdir cov-analysis-linux64 tar xzf cov-analysis-linux64.tar.gz --strip 1 -C cov-analysis-linux64 + env: + COVERITY_SCAN_TOKEN: ${{ secrets.COVERITY_SCAN_TOKEN }} - name: Install Redis dependencies run: sudo apt install -y gcc tcl8.6 tclx procps libssl-dev - name: Build with cov-build @@ -26,7 +28,10 @@ jobs: tar czvf cov-int.tgz cov-int curl \ --form project=redis-unstable \ - --form email=${{ secrets.COVERITY_SCAN_EMAIL }} \ - --form token=${{ secrets.COVERITY_SCAN_TOKEN }} \ + --form email="${COVERITY_SCAN_EMAIL}" \ + --form token="${COVERITY_SCAN_TOKEN}" \ --form file=@cov-int.tgz \ https://scan.coverity.com/builds + env: + COVERITY_SCAN_EMAIL: ${{ secrets.COVERITY_SCAN_EMAIL }} + COVERITY_SCAN_TOKEN: ${{ secrets.COVERITY_SCAN_TOKEN }} diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index b70f98618..2f0572444 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -1224,7 +1224,7 @@ jobs: if: true && !contains(github.event.inputs.skiptests, 'cluster') run: ./runtest-cluster --log-req-res --dont-clean --force-resp3 ${{github.event.inputs.cluster_test_args}} - name: Install Python dependencies - uses: py-actions/py-dependency-install@v4 + uses: py-actions/py-dependency-install@30aa0023464ed4b5b116bd9fbdab87acf01a484e # v4.1.0 with: path: "./utils/req-res-validator/requirements.txt" - name: validator From ae9552663d383f7e884dd7aea175f20e1b0c4be5 Mon Sep 17 00:00:00 2001 From: Momchil Marinov Date: Thu, 9 Apr 2026 17:58:37 +0300 Subject: [PATCH 07/32] RED-183356: Automate tarball creation (#14911) This PR implements the tarball creation job by reusing 01 script. It splits the original job to smaller jobs and moves the gate and test jobs before the upload job. The job outputs the SHA of the tar and the size. Link to a run: https://github.com/m-marinov/redis/actions/runs/23437802059 --- .github/workflows/post-release-automation.yml | 244 +++++++++--------- 1 file changed, 127 insertions(+), 117 deletions(-) diff --git a/.github/workflows/post-release-automation.yml b/.github/workflows/post-release-automation.yml index b7bf20235..94d9cc52a 100644 --- a/.github/workflows/post-release-automation.yml +++ b/.github/workflows/post-release-automation.yml @@ -5,17 +5,15 @@ on: types: [published] jobs: - automate-release-scripts: - # Only run for the main redis/redis repository (not forks) - # Note: Only users with write access can publish releases, providing implicit authorization + extract-release-info: if: github.repository == 'redis/redis' runs-on: ubuntu-latest - + outputs: + tag_name: ${{ steps.release-info.outputs.tag_name }} + release_type: ${{ steps.release-info.outputs.release_type }} steps: - name: Checkout repository - uses: actions/checkout@v4 - with: - fetch-depth: 0 # Fetch all history for git archive + uses: actions/checkout@v5 - name: Extract and validate release information id: release-info @@ -23,15 +21,12 @@ jobs: TAG_NAME: ${{ github.event.release.tag_name }} GH_TOKEN: ${{ github.token }} run: | - # Extract tag name from the release event (via env var to prevent injection) echo "tag_name=${TAG_NAME}" >> $GITHUB_OUTPUT echo "Release tag: ${TAG_NAME}" - # Get the latest release tag LATEST_TAG=$(gh release view --json tagName --jq '.tagName') - echo "Latest release tag from gh cli: ${LATEST_TAG}" + echo "Latest release tag(from gh release): ${LATEST_TAG}" - # Determine release type by comparing with latest release if [[ "${TAG_NAME}" == "${LATEST_TAG}" ]]; then echo "release_type=latest" >> $GITHUB_OUTPUT echo "Detected latest release: ${TAG_NAME}" @@ -40,116 +35,131 @@ jobs: echo "Detected non-latest release: ${TAG_NAME} (latest is ${LATEST_TAG})" fi - - name: Set up environment variables - run: | - echo "RELEASE_TAG=${{ steps.release-info.outputs.tag_name }}" >> $GITHUB_ENV - echo "RELEASE_TYPE=${{ steps.release-info.outputs.release_type }}" >> $GITHUB_ENV - echo "Environment variables set:" - echo " RELEASE_TAG: ${{ steps.release-info.outputs.tag_name }}" - echo " RELEASE_TYPE: ${{ steps.release-info.outputs.release_type }}" - + create-tarball: + needs: extract-release-info + runs-on: ubuntu-latest + env: + TAG_NAME: ${{ needs.extract-release-info.outputs.tag_name }} + outputs: + sha256: ${{ steps.checksum.outputs.sha256 }} + size_mb: ${{ steps.size.outputs.size_mb }} + size_warning: ${{ steps.size.outputs.size_warning }} + steps: + - name: Checkout repository + uses: actions/checkout@v5 + with: + ref: ${{ env.TAG_NAME }} + fetch-depth: 0 + - name: Create tarball - id: create-tarball + run: ./utils/releasetools/01_create_tarball.sh "$TAG_NAME" + + - name: Verify tarball size + id: size run: | - echo "Creating tarball for version ${RELEASE_TAG}..." - # TODO: Implement tarball creation using utils/releasetools/01_create_tarball.sh - # ./utils/releasetools/01_create_tarball.sh ${RELEASE_TAG} - - # Placeholder: Verify tarball was created - # TARBALL_PATH="/tmp/redis-${RELEASE_TAG}.tar.gz" - # if [ ! -f "${TARBALL_PATH}" ]; then - # echo "Error: Tarball not found at ${TARBALL_PATH}" - # exit 1 - # fi - # echo "tarball_path=${TARBALL_PATH}" >> $GITHUB_OUTPUT - - echo "✓ Tarball creation step (placeholder)" - - - name: Upload tarball - id: upload-tarball - run: | - echo "Uploading tarball for version ${RELEASE_TAG}..." - # TODO: Implement tarball upload - # This will require: - # - SSH credentials/keys for upload to download.redis.io - # - Adaptation of utils/releasetools/02_upload_tarball.sh for CI environment - - echo "✓ Tarball upload step (placeholder)" - - - name: Test release tarball - id: test-release - run: | - echo "Testing release tarball for version ${RELEASE_TAG}..." - # TODO: Implement release testing using utils/releasetools/03_test_release.sh - # This will: - # - Download the uploaded tarball - # - Extract and build Redis - - echo "✓ Release testing step (placeholder)" - - - name: Update release hashes - id: update-hashes - run: | - echo "Updating release hashes for version ${RELEASE_TAG}..." - # TODO: Implement hash update using utils/releasetools/04_release_hash.sh - # This will require: - # - Access to redis-hashes repository - # - Git credentials for committing and pushing - - echo "✓ Release hashes update step (placeholder)" - - - name: Approval gate for latest releases - if: steps.release-info.outputs.release_type == 'latest' - run: | - echo "Latest release detected. Manual approval required for production deployment." - # TODO: Implement approval workflow - # This could use GitHub Environments with required reviewers - # or a manual approval step - - echo "✓ Approval gate (placeholder)" - - - name: Update stable symlink (latest releases only) - if: steps.release-info.outputs.release_type == 'latest' - id: update-stable - run: | - echo "This is a latest release. Updating stable symlink after approval." - # TODO: Implement stable symlink update - # This step should only run for latest releases (not non-latest) - # It will update the redis-stable symlink on download.redis.io - # This is part of the upload script (02_upload_tarball.sh) - - echo "✓ Stable symlink update step (placeholder)" - - - name: Summary - if: always() - run: | - echo "## Post-Release Automation Summary" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "- **Release Tag:** ${{ steps.release-info.outputs.tag_name }}" >> $GITHUB_STEP_SUMMARY - echo "- **Release Type:** ${{ steps.release-info.outputs.release_type }}" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "### Steps Status" >> $GITHUB_STEP_SUMMARY - echo "- Create tarball: ${{ steps.create-tarball.outcome }}" >> $GITHUB_STEP_SUMMARY - echo "- Upload tarball: ${{ steps.upload-tarball.outcome }}" >> $GITHUB_STEP_SUMMARY - echo "- Test release: ${{ steps.test-release.outcome }}" >> $GITHUB_STEP_SUMMARY - echo "- Update hashes: ${{ steps.update-hashes.outcome }}" >> $GITHUB_STEP_SUMMARY - if [[ "${{ steps.release-info.outputs.release_type }}" == "latest" ]]; then - echo "- Update stable symlink: ${{ steps.update-stable.outcome }}" >> $GITHUB_STEP_SUMMARY + TARBALL="/tmp/redis-${TAG_NAME}.tar.gz" + SIZE_MB=$(du -m "$TARBALL" | cut -f1) + echo "Tarball size: ${SIZE_MB} MB" + echo "size_mb=${SIZE_MB}" >> $GITHUB_OUTPUT + if [ "$SIZE_MB" -lt 3 ] || [ "$SIZE_MB" -gt 5 ]; then + echo "::warning::Tarball size ${SIZE_MB} MB is outside expected range (3-5 MB)" + echo "size_warning=true" >> $GITHUB_OUTPUT + else + echo "size_warning=false" >> $GITHUB_OUTPUT fi - - name: Send Slack notification - if: always() + - name: Calculate SHA256 checksum + id: checksum run: | - echo "Sending Slack notification for release ${RELEASE_TAG}..." - # TODO: Implement Slack notification - # This will require: - # - Slack webhook URL or bot token (stored in secrets) - # - Determine appropriate channel (e.g., #releases, #redis-releases) - # - Craft message with release information and workflow status - # Example using webhook: - # curl -X POST -H 'Content-type: application/json' \ - # --data '{"channel":"#releases","text":"Release ${RELEASE_TAG} automation completed"}' \ - # ${{ secrets.SLACK_WEBHOOK_URL }} + TARBALL="/tmp/redis-${TAG_NAME}.tar.gz" + SHA256=$(shasum -a 256 "$TARBALL" | cut -d' ' -f1) + echo "SHA256: $SHA256" + echo "sha256=$SHA256" >> $GITHUB_OUTPUT - echo "✓ Slack notification step (placeholder)" + - name: Upload tarball as artifact + uses: actions/upload-artifact@v6 + with: + name: redis-${{ env.TAG_NAME }}-tarball + path: /tmp/redis-${{ env.TAG_NAME }}.tar.gz + compression-level: 0 + # approval-gate: + # needs: [extract-release-info, create-tarball] + # if: needs.extract-release-info.outputs.release_type == 'latest' + # runs-on: ubuntu-latest + # steps: + # - name: Approval gate + # run: | + # echo "Latest release detected. Manual approval required for production deployment." + # # TODO: Implement approval workflow + # # This could use GitHub Environments with required reviewers + # # or a manual approval step + + # upload-tarball: + # needs: [extract-release-info, create-tarball, approval-gate] + # if: always() && !cancelled() && needs.create-tarball.result == 'success' && (needs.approval-gate.result == 'success' || needs.approval-gate.result == 'skipped') + # runs-on: ubuntu-latest + # steps: + # - name: Upload tarball + # run: | + # echo "TODO: Implement tarball upload" + # # This will require: + # # - SSH credentials/keys for upload to download.redis.io + # # - Adaptation of utils/releasetools/02_upload_tarball.sh for CI environment + + # test-release-tarball: + # needs: upload-tarball + # runs-on: ubuntu-latest + # steps: + # - name: Test release tarball + # run: | + # echo "TODO: Implement release testing using utils/releasetools/03_test_release.sh" + # # This will: + # # - Download the uploaded tarball + # # - Extract and build Redis + + # update-release-hashes: + # needs: test-release-tarball + # runs-on: ubuntu-latest + # steps: + # - name: Update release hashes + # run: | + # echo "TODO: Implement hash update using utils/releasetools/04_release_hash.sh" + # # This will require: + # # - Access to redis-hashes repository + # # - Git credentials for committing and pushing + + summary-and-notify: + needs: [extract-release-info, create-tarball] # update-release-hashes + if: always() && github.repository == 'redis/redis' + runs-on: ubuntu-latest + env: + TAG_NAME: ${{ needs.extract-release-info.outputs.tag_name }} + RELEASE_TYPE: ${{ needs.extract-release-info.outputs.release_type }} + SHA256: ${{ needs.create-tarball.outputs.sha256 }} + SIZE_MB: ${{ needs.create-tarball.outputs.size_mb }} + SIZE_WARNING: ${{ needs.create-tarball.outputs.size_warning }} + steps: + - name: Summary + run: | + { + echo "## Post-Release Automation Summary" + echo "" + echo "- **Release Tag:** ${TAG_NAME}" + echo "- **Release Type:** ${RELEASE_TYPE}" + echo "- **Tarball SHA256:** ${SHA256}" + echo "- **Tarball Size:** ${SIZE_MB} MB" + if [ "${SIZE_WARNING}" == "true" ]; then + echo "" + echo "> [!WARNING]" + echo "> Tarball size is outside expected range, check the logs for details." + fi + } >> $GITHUB_STEP_SUMMARY + + # - name: Send Slack notification + # run: | + # echo "TODO: Implement Slack notification" + # # This will require: + # # - Slack webhook URL or bot token (stored in secrets) + # # - Determine appropriate channel (e.g., #releases, #redis-releases) + # # - Craft message with release information and workflow status From 0d85627bf06a20561bfd9c3e739cfaba4422e091 Mon Sep 17 00:00:00 2001 From: ShubhamTaple <155555100+ShubhamTaple@users.noreply.github.com> Date: Fri, 10 Apr 2026 20:55:56 +0530 Subject: [PATCH 08/32] Use no_value dict type for stream_idmp_keys to explicitly mark it as a key-only set (#14987) Fixes #14985 ### Problem dict stream_idmp_keys was using objectKeyPointerValueDictType, in this dict type dicts are expected to have RObj as keys and Pointers as values, but stream_idmp_keys was not using the value field at all. ### Solution This PR fixes the above issue by implementing new dict type (objectKeyNoValueDictType) for stream_idmp_keys --------- Co-authored-by: debing.sun --- src/cluster_asm.c | 2 +- src/db.c | 4 ++-- src/lazyfree.c | 2 +- src/server.c | 15 ++++++++++++++- src/server.h | 1 + src/t_stream.c | 2 +- 6 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/cluster_asm.c b/src/cluster_asm.c index db801a9a1..13e0fd44e 100644 --- a/src/cluster_asm.c +++ b/src/cluster_asm.c @@ -3033,7 +3033,7 @@ void asmTriggerBackgroundTrim(asmTrimCtx *trim_ctx, int migration_cleanup) { CLUSTER_SLOT_MASK_BITS, KVSTORE_ALLOCATE_DICTS_ON_DEMAND); estore *subexpires = estoreCreate(&subexpiresBucketsType, CLUSTER_SLOT_MASK_BITS); - dict *stream_idmp_keys = dictCreate(&objectKeyPointerValueDictType); + dict *stream_idmp_keys = dictCreate(&objectKeyNoValueDictType); size_t total_keys = 0; diff --git a/src/db.c b/src/db.c index 7d7a3e96b..18e9c47a9 100644 --- a/src/db.c +++ b/src/db.c @@ -1083,7 +1083,7 @@ redisDb *initTempDb(void) { tempDb[i].expires = kvstoreCreate(&kvstoreBaseType, &dbExpiresDictType, slot_count_bits, flags); tempDb[i].subexpires = estoreCreate(&subexpiresBucketsType, slot_count_bits); - tempDb[i].stream_idmp_keys = dictCreate(&objectKeyPointerValueDictType); + tempDb[i].stream_idmp_keys = dictCreate(&objectKeyNoValueDictType); } return tempDb; @@ -1117,7 +1117,7 @@ void streamMoveIdmpKeys(dict *src, dict *dst, int slot) { while ((de = dictNext(di)) != NULL) { robj *key = dictGetKey(de); if (calculateKeySlot(key->ptr) == slot) { - if (dictAdd(dst, key, dictGetVal(de)) == DICT_OK) { + if (dictAddRaw(dst, key, NULL)) { incrRefCount(key); } dictDelete(src, key); diff --git a/src/lazyfree.c b/src/lazyfree.c index 5d89d00aa..8d291bc9a 100644 --- a/src/lazyfree.c +++ b/src/lazyfree.c @@ -332,7 +332,7 @@ void emptyDbAsync(redisDb *db) { db->keys = kvstoreCreate(&kvstoreExType, &dbDictType, slot_count_bits, flags); db->expires = kvstoreCreate(&kvstoreBaseType, &dbExpiresDictType, slot_count_bits, flags); db->subexpires = estoreCreate(&subexpiresBucketsType, slot_count_bits); - db->stream_idmp_keys = dictCreate(&objectKeyPointerValueDictType); + db->stream_idmp_keys = dictCreate(&objectKeyNoValueDictType); protectClientReplyObjects(); /* Protect client reply objects before async free. */ emptyDbDataAsync(oldkeys, oldexpires, oldsubexpires, old_stream_idmp_keys, NULL); } diff --git a/src/server.c b/src/server.c index aa012918b..c7f415717 100644 --- a/src/server.c +++ b/src/server.c @@ -581,6 +581,19 @@ dictType objectKeyPointerValueDictType = { NULL /* allow to expand */ }; +/* Dict type with robj pointer keys and no values. */ +dictType objectKeyNoValueDictType = { + dictEncObjHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictEncObjKeyCompare, /* key compare */ + dictObjectDestructor, /* key destructor */ + NULL, /* val destructor */ + NULL, /* allow to expand */ + .no_value = 1, /* no values in this dict */ + .keys_are_odd = 0, /* robj pointers are not odd */ +}; + /* Like objectKeyPointerValueDictType(), but values can be destroyed, if * not NULL, calling zfree(). */ dictType objectKeyHeapPointerValueDictType = { @@ -2996,7 +3009,7 @@ void initServer(void) { server.db[j].blocking_keys = dictCreate(&keylistDictType); server.db[j].blocking_keys_unblock_on_nokey = dictCreate(&objectKeyPointerValueDictType); server.db[j].stream_claim_pending_keys = dictCreate(&objectKeyPointerValueDictType); - server.db[j].stream_idmp_keys = dictCreate(&objectKeyPointerValueDictType); + server.db[j].stream_idmp_keys = dictCreate(&objectKeyNoValueDictType); server.db[j].ready_keys = dictCreate(&objectKeyPointerValueDictType); server.db[j].watched_keys = dictCreate(&keylistDictType); server.db[j].id = j; diff --git a/src/server.h b/src/server.h index 8e2753300..506191327 100644 --- a/src/server.h +++ b/src/server.h @@ -3004,6 +3004,7 @@ typedef struct { extern struct redisServer server; extern struct sharedObjectsStruct shared; extern dictType objectKeyPointerValueDictType; +extern dictType objectKeyNoValueDictType; extern dictType objectKeyHeapPointerValueDictType; extern dictType setDictType; extern dictType BenchmarkDictType; diff --git a/src/t_stream.c b/src/t_stream.c index faa8aba0f..2fe882572 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -5983,7 +5983,7 @@ void streamKeyLoaded(redisDb *db, robj *key, robj *val) { } } -/* To be used when a steam key was removed from ram, un-redigster from stream_idmp_keys if needed */ +/* To be used when a stream key was removed from ram, un-register from stream_idmp_keys if needed */ void streamKeyRemoved(redisDb *db, robj *key, robj *val) { UNUSED(val); dictDelete(db->stream_idmp_keys, key); From e8da0e5b47eb7a5872f21898712d22b71ae23dc7 Mon Sep 17 00:00:00 2001 From: "h.o.t. neglected" Date: Mon, 13 Apr 2026 02:45:14 -0400 Subject: [PATCH 09/32] Fix brittle assert_match patterns for unexpected slowlog fields (#14948) --- tests/cluster/tests/18-info.tcl | 2 +- tests/unit/acl.tcl | 2 +- tests/unit/info.tcl | 30 +++++++------- tests/unit/moduleapi/blockedclient.tcl | 8 ++-- tests/unit/moduleapi/moduleauth.tcl | 54 +++++++++++++------------- tests/unit/type/list.tcl | 4 +- tests/unit/type/stream-cgroups.tcl | 2 +- 7 files changed, 51 insertions(+), 51 deletions(-) diff --git a/tests/cluster/tests/18-info.tcl b/tests/cluster/tests/18-info.tcl index 68c62d357..744934990 100644 --- a/tests/cluster/tests/18-info.tcl +++ b/tests/cluster/tests/18-info.tcl @@ -41,5 +41,5 @@ test "errorstats: rejected call due to MOVED Redirection" { } assert_match {} [errorstat $pok MOVED] assert_match {*count=1*} [errorstat $perr MOVED] - assert_match {*calls=0,*,rejected_calls=1,failed_calls=0} [cmdstat $perr set] + assert_match {*calls=0,*,rejected_calls=1,failed_calls=0*} [cmdstat $perr set] } diff --git a/tests/unit/acl.tcl b/tests/unit/acl.tcl index 6f9d94f29..77bb37095 100644 --- a/tests/unit/acl.tcl +++ b/tests/unit/acl.tcl @@ -357,7 +357,7 @@ start_server {tags {"acl external:skip"}} { assert_error {*NOPERM No permissions to access a key*} {$rd read} $rd ping $rd close - assert_match {*calls=0,usec=0,*,rejected_calls=1,failed_calls=0} [cmdrstat blpop r] + assert_match {*calls=0,usec=0,*,rejected_calls=1,failed_calls=0*} [cmdrstat blpop r] } test {Users can be configured to authenticate with any password} { diff --git a/tests/unit/info.tcl b/tests/unit/info.tcl index 0dee39d55..07543b3ad 100644 --- a/tests/unit/info.tcl +++ b/tests/unit/info.tcl @@ -121,7 +121,7 @@ start_server {tags {"info" "external:skip"}} { catch {r auth k} e assert_match {ERR AUTH*} $e assert_match {*count=1*} [errorstat ERR] - assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat auth] + assert_match {*calls=1,*,rejected_calls=0,failed_calls=1*} [cmdstat auth] assert_equal [s total_error_replies] 1 r config resetstat assert_match {} [errorstat ERR] @@ -137,15 +137,15 @@ start_server {tags {"info" "external:skip"}} { catch {r exec} e assert_match {ERR AUTH*} $e assert_match {*count=1*} [errorstat ERR] - assert_match {*calls=1,*,rejected_calls=0,failed_calls=0} [cmdstat set] - assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat auth] - assert_match {*calls=1,*,rejected_calls=0,failed_calls=0} [cmdstat exec] + assert_match {*calls=1,*,rejected_calls=0,failed_calls=0*} [cmdstat set] + assert_match {*calls=1,*,rejected_calls=0,failed_calls=1*} [cmdstat auth] + assert_match {*calls=1,*,rejected_calls=0,failed_calls=0*} [cmdstat exec] assert_equal [s total_error_replies] 1 # MULTI/EXEC command errors should still be pinpointed to him catch {r exec} e assert_match {ERR EXEC without MULTI} $e - assert_match {*calls=2,*,rejected_calls=0,failed_calls=1} [cmdstat exec] + assert_match {*calls=2,*,rejected_calls=0,failed_calls=1*} [cmdstat exec] assert_match {*count=2*} [errorstat ERR] assert_equal [s total_error_replies] 2 } @@ -174,7 +174,7 @@ start_server {tags {"info" "external:skip"}} { catch {r evalsha NotValidShaSUM 0} e assert_match {NOSCRIPT*} $e assert_match {*count=1*} [errorstat NOSCRIPT] - assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat evalsha] + assert_match {*calls=1,*,rejected_calls=0,failed_calls=1*} [cmdstat evalsha] assert_equal [s total_error_replies] 1 r config resetstat assert_match {} [errorstat NOSCRIPT] @@ -188,7 +188,7 @@ start_server {tags {"info" "external:skip"}} { catch {r XGROUP CREATECONSUMER mystream mygroup consumer} e assert_match {NOGROUP*} $e assert_match {*count=1*} [errorstat NOGROUP] - assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat xgroup\\|createconsumer] + assert_match {*calls=1,*,rejected_calls=0,failed_calls=1*} [cmdstat xgroup\\|createconsumer] r config resetstat assert_match {} [errorstat NOGROUP] } @@ -217,9 +217,9 @@ start_server {tags {"info" "external:skip"}} { assert_match {*count=1*} [errorstat ERR] assert_match {*count=1*} [errorstat EXECABORT] assert_equal [s total_error_replies] 2 - assert_match {*calls=0,*,rejected_calls=1,failed_calls=0} [cmdstat set] - assert_match {*calls=1,*,rejected_calls=0,failed_calls=0} [cmdstat multi] - assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat exec] + assert_match {*calls=0,*,rejected_calls=1,failed_calls=0*} [cmdstat set] + assert_match {*calls=1,*,rejected_calls=0,failed_calls=0*} [cmdstat multi] + assert_match {*calls=1,*,rejected_calls=0,failed_calls=1*} [cmdstat exec] assert_equal [s total_error_replies] 2 r config resetstat assert_match {} [errorstat ERR] @@ -232,11 +232,11 @@ start_server {tags {"info" "external:skip"}} { catch {r set k} e assert_match {ERR wrong number of arguments for 'set' command} $e assert_match {*count=1*} [errorstat ERR] - assert_match {*calls=0,*,rejected_calls=1,failed_calls=0} [cmdstat set] + assert_match {*calls=0,*,rejected_calls=1,failed_calls=0*} [cmdstat set] # ensure that after a rejected command, valid ones are counted properly r set k1 v1 r set k2 v2 - assert_match {calls=2,*,rejected_calls=1,failed_calls=0} [cmdstat set] + assert_match {calls=2,*,rejected_calls=1,failed_calls=0*} [cmdstat set] assert_equal [s total_error_replies] 1 } @@ -248,7 +248,7 @@ start_server {tags {"info" "external:skip"}} { catch {r set a b} e assert_match {OOM*} $e assert_match {*count=1*} [errorstat OOM] - assert_match {*calls=0,*,rejected_calls=1,failed_calls=0} [cmdstat set] + assert_match {*calls=0,*,rejected_calls=1,failed_calls=0*} [cmdstat set] assert_equal [s total_error_replies] 1 r config resetstat assert_match {} [errorstat OOM] @@ -264,7 +264,7 @@ start_server {tags {"info" "external:skip"}} { catch {r set a b} e assert_match {NOPERM*} $e assert_match {*count=1*} [errorstat NOPERM] - assert_match {*calls=0,*,rejected_calls=1,failed_calls=0} [cmdstat set] + assert_match {*calls=0,*,rejected_calls=1,failed_calls=0*} [cmdstat set] assert_equal [s total_error_replies] 1 r config resetstat assert_match {} [errorstat NOPERM] @@ -283,7 +283,7 @@ start_server {tags {"info" "external:skip"}} { r client unblock $rd_id error assert_error {UNBLOCKED*} {$rd read} assert_match {*count=1*} [errorstat UNBLOCKED] - assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat blpop] + assert_match {*calls=1,*,rejected_calls=0,failed_calls=1*} [cmdstat blpop] assert_equal [s total_error_replies] 1 $rd close } diff --git a/tests/unit/moduleapi/blockedclient.tcl b/tests/unit/moduleapi/blockedclient.tcl index 7dcc1d6a6..71aafd444 100644 --- a/tests/unit/moduleapi/blockedclient.tcl +++ b/tests/unit/moduleapi/blockedclient.tcl @@ -248,16 +248,16 @@ foreach call_type {nested normal} { # RM_Call that propagates an error assert_error "WRONGTYPE*" {r do_rm_call hgetall x} assert_equal [errorrstat WRONGTYPE r] {count=1} - assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdrstat hgetall r] + assert_match {*calls=1,*,rejected_calls=0,failed_calls=1*} [cmdrstat hgetall r] # RM_Call from bg thread that propagates an error assert_error "WRONGTYPE*" {r do_bg_rm_call hgetall x} assert_equal [errorrstat WRONGTYPE r] {count=2} - assert_match {*calls=2,*,rejected_calls=0,failed_calls=2} [cmdrstat hgetall r] + assert_match {*calls=2,*,rejected_calls=0,failed_calls=2*} [cmdrstat hgetall r] assert_equal [s total_error_replies] 6 - assert_match {*calls=5,*,rejected_calls=0,failed_calls=4} [cmdrstat do_rm_call r] - assert_match {*calls=2,*,rejected_calls=0,failed_calls=2} [cmdrstat do_bg_rm_call r] + assert_match {*calls=5,*,rejected_calls=0,failed_calls=4*} [cmdrstat do_rm_call r] + assert_match {*calls=2,*,rejected_calls=0,failed_calls=2*} [cmdrstat do_bg_rm_call r] } set master [srv 0 client] diff --git a/tests/unit/moduleapi/moduleauth.tcl b/tests/unit/moduleapi/moduleauth.tcl index f8b721ce1..16951175c 100644 --- a/tests/unit/moduleapi/moduleauth.tcl +++ b/tests/unit/moduleapi/moduleauth.tcl @@ -36,15 +36,15 @@ start_server {tags {"modules external:skip"}} { r acl setuser foo >pwd on ~* &* +@all assert_equal {OK} [r AUTH foo allow] assert_error {*Auth denied by Misc Module*} {r AUTH foo deny} - assert_match {*calls=2,*,rejected_calls=0,failed_calls=1} [cmdstat auth] + assert_match {*calls=2,*,rejected_calls=0,failed_calls=1*} [cmdstat auth] assert_error {*WRONGPASS*} {r AUTH foo nomatch} - assert_match {*calls=3,*,rejected_calls=0,failed_calls=2} [cmdstat auth] + assert_match {*calls=3,*,rejected_calls=0,failed_calls=2*} [cmdstat auth] assert_equal {OK} [r AUTH foo pwd] # Test for No Pass user r acl setuser foo on ~* &* +@all nopass assert_equal {OK} [r AUTH foo allow] assert_error {*Auth denied by Misc Module*} {r AUTH foo deny} - assert_match {*calls=6,*,rejected_calls=0,failed_calls=3} [cmdstat auth] + assert_match {*calls=6,*,rejected_calls=0,failed_calls=3*} [cmdstat auth] assert_equal {OK} [r AUTH foo nomatch] # Validate that the Module added an ACL Log entry. @@ -67,13 +67,13 @@ start_server {tags {"modules external:skip"}} { assert_equal $hello3_response [r HELLO 3 AUTH foo allow] # Validate denying AUTH for the HELLO cmd assert_error {*Auth denied by Misc Module*} {r HELLO 2 AUTH foo deny} - assert_match {*calls=5,*,rejected_calls=0,failed_calls=1} [cmdstat hello] + assert_match {*calls=5,*,rejected_calls=0,failed_calls=1*} [cmdstat hello] assert_error {*WRONGPASS*} {r HELLO 2 AUTH foo nomatch} - assert_match {*calls=6,*,rejected_calls=0,failed_calls=2} [cmdstat hello] + assert_match {*calls=6,*,rejected_calls=0,failed_calls=2*} [cmdstat hello] assert_error {*Auth denied by Misc Module*} {r HELLO 3 AUTH foo deny} - assert_match {*calls=7,*,rejected_calls=0,failed_calls=3} [cmdstat hello] + assert_match {*calls=7,*,rejected_calls=0,failed_calls=3*} [cmdstat hello] assert_error {*WRONGPASS*} {r HELLO 3 AUTH foo nomatch} - assert_match {*calls=8,*,rejected_calls=0,failed_calls=4} [cmdstat hello] + assert_match {*calls=8,*,rejected_calls=0,failed_calls=4*} [cmdstat hello] # Validate that the Module added an ACL Log entry. set entry [lindex [r ACL LOG] 1] @@ -97,10 +97,10 @@ start_server {tags {"modules external:skip"}} { r client setname client0 assert_error {*Auth denied by Misc Module*} {r HELLO 2 AUTH foo deny setname client1} assert {[r client getname] eq {client0}} - assert_match {*calls=3,*,rejected_calls=0,failed_calls=1} [cmdstat hello] + assert_match {*calls=3,*,rejected_calls=0,failed_calls=1*} [cmdstat hello] assert_error {*WRONGPASS*} {r HELLO 2 AUTH foo nomatch setname client2} assert {[r client getname] eq {client0}} - assert_match {*calls=4,*,rejected_calls=0,failed_calls=2} [cmdstat hello] + assert_match {*calls=4,*,rejected_calls=0,failed_calls=2*} [cmdstat hello] } test {test blocking module AUTH} { @@ -109,15 +109,15 @@ start_server {tags {"modules external:skip"}} { r acl setuser foo >pwd on ~* &* +@all assert_equal {OK} [r AUTH foo block_allow] assert_error {*Auth denied by Misc Module*} {r AUTH foo block_deny} - assert_match {*calls=2,*,rejected_calls=0,failed_calls=1} [cmdstat auth] + assert_match {*calls=2,*,rejected_calls=0,failed_calls=1*} [cmdstat auth] assert_error {*WRONGPASS*} {r AUTH foo nomatch} - assert_match {*calls=3,*,rejected_calls=0,failed_calls=2} [cmdstat auth] + assert_match {*calls=3,*,rejected_calls=0,failed_calls=2*} [cmdstat auth] assert_equal {OK} [r AUTH foo pwd] # Test for No Pass user r acl setuser foo on ~* &* +@all nopass assert_equal {OK} [r AUTH foo block_allow] assert_error {*Auth denied by Misc Module*} {r AUTH foo block_deny} - assert_match {*calls=6,*,rejected_calls=0,failed_calls=3} [cmdstat auth] + assert_match {*calls=6,*,rejected_calls=0,failed_calls=3*} [cmdstat auth] assert_equal {OK} [r AUTH foo nomatch] # Validate that every Blocking AUTH command took at least 500000 usec. set stats [cmdstat auth] @@ -144,13 +144,13 @@ start_server {tags {"modules external:skip"}} { assert_equal $hello3_response [r HELLO 3 AUTH foo block_allow] # validate denying AUTH for the HELLO cmd assert_error {*Auth denied by Misc Module*} {r HELLO 2 AUTH foo block_deny} - assert_match {*calls=5,*,rejected_calls=0,failed_calls=1} [cmdstat hello] + assert_match {*calls=5,*,rejected_calls=0,failed_calls=1*} [cmdstat hello] assert_error {*WRONGPASS*} {r HELLO 2 AUTH foo nomatch} - assert_match {*calls=6,*,rejected_calls=0,failed_calls=2} [cmdstat hello] + assert_match {*calls=6,*,rejected_calls=0,failed_calls=2*} [cmdstat hello] assert_error {*Auth denied by Misc Module*} {r HELLO 3 AUTH foo block_deny} - assert_match {*calls=7,*,rejected_calls=0,failed_calls=3} [cmdstat hello] + assert_match {*calls=7,*,rejected_calls=0,failed_calls=3*} [cmdstat hello] assert_error {*WRONGPASS*} {r HELLO 3 AUTH foo nomatch} - assert_match {*calls=8,*,rejected_calls=0,failed_calls=4} [cmdstat hello] + assert_match {*calls=8,*,rejected_calls=0,failed_calls=4*} [cmdstat hello] # Validate that every HELLO AUTH command took at least 500000 usec. set stats [cmdstat hello] regexp "usec_per_call=(\[0-9]{1,})\.*," $stats all usec_per_call @@ -178,10 +178,10 @@ start_server {tags {"modules external:skip"}} { r client setname client0 assert_error {*Auth denied by Misc Module*} {r HELLO 2 AUTH foo block_deny setname client1} assert {[r client getname] eq {client0}} - assert_match {*calls=3,*,rejected_calls=0,failed_calls=1} [cmdstat hello] + assert_match {*calls=3,*,rejected_calls=0,failed_calls=1*} [cmdstat hello] assert_error {*WRONGPASS*} {r HELLO 2 AUTH foo nomatch setname client2} assert {[r client getname] eq {client0}} - assert_match {*calls=4,*,rejected_calls=0,failed_calls=2} [cmdstat hello] + assert_match {*calls=4,*,rejected_calls=0,failed_calls=2*} [cmdstat hello] # Validate that every HELLO AUTH SETNAME command took at least 500000 usec. set stats [cmdstat hello] regexp "usec_per_call=(\[0-9]{1,})\.*," $stats all usec_per_call @@ -205,7 +205,7 @@ start_server {tags {"modules external:skip"}} { # Case 2 - Non Blocking Deny assert_error {*Auth denied by Misc Module*} {r AUTH foo deny} - assert_match {*calls=2,*,rejected_calls=0,failed_calls=1} [cmdstat auth] + assert_match {*calls=2,*,rejected_calls=0,failed_calls=1*} [cmdstat auth] r config resetstat @@ -214,7 +214,7 @@ start_server {tags {"modules external:skip"}} { # Case 4 - Blocking Deny assert_error {*Auth denied by Misc Module*} {r AUTH foo block_deny} - assert_match {*calls=2,*,rejected_calls=0,failed_calls=1} [cmdstat auth] + assert_match {*calls=2,*,rejected_calls=0,failed_calls=1*} [cmdstat auth] # Validate that every Blocking AUTH command took at least 500000 usec. set stats [cmdstat auth] @@ -228,13 +228,13 @@ start_server {tags {"modules external:skip"}} { # Case 6 - Non Blocking Deny via the second module. assert_error {*Auth denied by Misc Module*} {r AUTH foo deny_two} - assert_match {*calls=2,*,rejected_calls=0,failed_calls=1} [cmdstat auth] + assert_match {*calls=2,*,rejected_calls=0,failed_calls=1*} [cmdstat auth] r config resetstat # Case 7 - All four auth callbacks "Skip" by not explicitly allowing or denying. assert_error {*WRONGPASS*} {r AUTH foo nomatch} - assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat auth] + assert_match {*calls=1,*,rejected_calls=0,failed_calls=1*} [cmdstat auth] assert_equal {OK} [r AUTH foo pwd] # Because we had to attempt all 4 callbacks, validate that the AUTH command took at least @@ -283,7 +283,7 @@ start_server {tags {"modules external:skip"}} { r multi r AUTH foo block_allow assert_error {*ERR Blocking module command called from transaction*} {r exec} - assert_match {*calls=2,*,rejected_calls=0,failed_calls=1} [cmdstat auth] + assert_match {*calls=2,*,rejected_calls=0,failed_calls=1*} [cmdstat auth] } test {Disabling Redis User during blocking module auth} { @@ -300,7 +300,7 @@ start_server {tags {"modules external:skip"}} { wait_for_blocked_clients_count 0 500 10 $rd flush assert_error {*WRONGPASS*} { $rd read } - assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat auth] + assert_match {*calls=1,*,rejected_calls=0,failed_calls=1*} [cmdstat auth] } test {Killing a client in the middle of blocking module auth} { @@ -354,7 +354,7 @@ start_server {tags {"modules external:skip"}} { $rd flush assert_equal [$rd read] "OK" set stats [cmdstat auth] - assert_match {*calls=1,*,rejected_calls=0,failed_calls=0} $stats + assert_match {*calls=1,*,rejected_calls=0,failed_calls=0*} $stats # Validate that even the new blocking module auth cb which was registered in the middle of # blocking module auth is attempted - making it take twice the duration (2x 500000 us). @@ -387,7 +387,7 @@ start_server {tags {"modules external:skip"}} { wait_for_blocked_clients_count 0 500 10 $rd flush assert_equal [$rd read] "OK" - assert_match {*calls=1,*,rejected_calls=0,failed_calls=0} [cmdstat auth] + assert_match {*calls=1,*,rejected_calls=0,failed_calls=0*} [cmdstat auth] # Validate that unloading the moduleauthtwo module does not unregister module auth cbs of # of the testacl module. Module based auth should succeed. @@ -400,6 +400,6 @@ start_server {tags {"modules external:skip"}} { assert_error {*WRONGPASS*} {r AUTH foo block_allow} assert_error {*WRONGPASS*} {r AUTH foo allow_two} assert_error {*WRONGPASS*} {r AUTH foo allow} - assert_match {*calls=5,*,rejected_calls=0,failed_calls=3} [cmdstat auth] + assert_match {*calls=5,*,rejected_calls=0,failed_calls=3*} [cmdstat auth] } } diff --git a/tests/unit/type/list.tcl b/tests/unit/type/list.tcl index 8d17c5c38..96f80e9fb 100644 --- a/tests/unit/type/list.tcl +++ b/tests/unit/type/list.tcl @@ -2366,7 +2366,7 @@ foreach {pop} {BLPOP BLMPOP_RIGHT} { r LPUSH mylist 1 wait_for_blocked_clients_count 0 - assert_match {*calls=1,*,rejected_calls=0,failed_calls=0} [cmdrstat blpop r] + assert_match {*calls=1,*,rejected_calls=0,failed_calls=0*} [cmdrstat blpop r] $rd close } @@ -2390,7 +2390,7 @@ foreach {pop} {BLPOP BLMPOP_RIGHT} { # unblock the client on timeout r client unblock $id timeout - assert_match {*calls=1,*,rejected_calls=0,failed_calls=0} [cmdrstat blpop r] + assert_match {*calls=1,*,rejected_calls=0,failed_calls=0*} [cmdrstat blpop r] $rd close } diff --git a/tests/unit/type/stream-cgroups.tcl b/tests/unit/type/stream-cgroups.tcl index 357e3baea..60e40596b 100644 --- a/tests/unit/type/stream-cgroups.tcl +++ b/tests/unit/type/stream-cgroups.tcl @@ -690,7 +690,7 @@ start_server { # verify command stats, error stats and error counter work on failed blocked command assert_match {*count=1*} [errorrstat NOGROUP r] - assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdrstat xreadgroup r] + assert_match {*calls=1,*,rejected_calls=0,failed_calls=1*} [cmdrstat xreadgroup r] assert_equal [s total_error_replies] 1 } From e1d35aca01c4240fa6c3feac55b00e9c1640abc0 Mon Sep 17 00:00:00 2001 From: Moti Cohen Date: Mon, 13 Apr 2026 09:46:46 +0300 Subject: [PATCH 10/32] Fix HEXPIRE numfields overflow (#15021) Validate HEXPIRE-family field counts without parser overflow keep flexible option order; only require fields fit in argv add tests for INT_MAX numfields across HEXPIRE/HPEXPIRE/HEXPIREAT/HPEXPIREAT --- src/t_hash.c | 5 +++-- tests/unit/type/hash-field-expire.tcl | 5 +++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index acfa6c6a9..e258eb71f 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -3608,15 +3608,16 @@ static int parseHashCommandArgs(client *c, HashCommandArgs *args, &numFields, "Parameter `numFields` should be greater than 0") != C_OK) return C_ERR; - args->fieldCount = (int)numFields; args->firstFieldPos = i + 2; /* Check bounds - we must have exactly the right number of fields */ - if (args->firstFieldPos + args->fieldCount > c->argc) { + if (numFields > c->argc - args->firstFieldPos) { addReplyError(c, "wrong number of arguments"); return C_ERR; } + args->fieldCount = (int)numFields; + /* Skip over the field arguments */ i = args->firstFieldPos + args->fieldCount - 1; continue; diff --git a/tests/unit/type/hash-field-expire.tcl b/tests/unit/type/hash-field-expire.tcl index 7f3520e80..e1ba72019 100644 --- a/tests/unit/type/hash-field-expire.tcl +++ b/tests/unit/type/hash-field-expire.tcl @@ -2359,6 +2359,11 @@ start_server {tags {"hash"}} { assert_error {*Parameter*numFields*should be greater than 0*} {r HEXPIRE myhash 60 FIELDS -1 f1} assert_error {*invalid number of fields*} {r HSETEX myhash FIELDS 0 f1 v1 EX 60} assert_error {*invalid number of fields*} {r HGETEX myhash FIELDS 0 f1 EX 60} + set future_sec [expr {[clock seconds] + 60}] + set future_ms [expr {[clock milliseconds] + 60000}] + foreach {cmd expire} [list HEXPIRE 60 HPEXPIRE 60000 HEXPIREAT $future_sec HPEXPIREAT $future_ms] { + assert_error {*wrong number of arguments*} [list r $cmd myhash $expire FIELDS 2147483647 f1] + } # Test missing FIELDS keyword assert_error {*unknown argument*} {r HEXPIRE myhash 60 2 f1 f2} From 80f1ebda8850d8c149111e4fa6c976edf63b0706 Mon Sep 17 00:00:00 2001 From: Sergei Georgiev Date: Tue, 14 Apr 2026 09:21:53 +0300 Subject: [PATCH 11/32] Add AGGREGATE COUNT option to ZUNION, ZINTER, ZUNIONSTORE, and ZINTERSTORE (#14892) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Overview This PR adds a new `COUNT` aggregation mode to the `ZUNIONSTORE`, `ZINTERSTORE`, `ZUNION`, and `ZINTER` sorted set commands. When `AGGREGATE COUNT` is specified, the resulting score for each element reflects how many input sets contain it (optionally scaled by `WEIGHTS`), rather than combining the actual scores of the elements. This enables a common use case — counting set membership frequency — directly at the command level, without application-side workarounds. ### Problem Statement For developers who need to know **how many input sorted sets contain each element**, there is no single-command solution today. **Example:** given several game leaderboards, find how many leaderboards each player appears in. The existing aggregation modes (`SUM`, `MIN`, `MAX`) all operate on the elements' scores. To ignore scores and just count set membership, you'd currently need to copy each sorted set with all scores set to 1, then run `ZUNIONSTORE`/`ZINTERSTORE` with `SUM` — requiring multiple round trips, temporary keys, and application-level locking to avoid races. A `COUNT` aggregation mode solves this directly. ### Solution Introduces `AGGREGATE COUNT` as a fourth aggregation mode: - `ZINTER numkeys key [key ...] [WEIGHTS weight [weight ...]] [AGGREGATE ] [WITHSCORES]` - `ZINTERSTORE destination numkeys key [key ...] [WEIGHTS weight [weight ...]] [AGGREGATE ]` - `ZUNION numkeys key [key ...] [WEIGHTS weight [weight ...]] [AGGREGATE ] [WITHSCORES]` - `ZUNIONSTORE destination numkeys key [key ...] [WEIGHTS weight [weight ...]] [AGGREGATE ]` When `COUNT` is specified, **the scores in the input sets are ignored**. Note that `WEIGHTS` is **not** ignored — each set contributes its weight (default 1) per element, and the contributions are summed. **Implementation details:** A new helper function `zuiWeightedScore()` computes the per-set contribution: ```c inline static double zuiWeightedScore(double score, double weight, int aggregate) { return (aggregate == REDIS_AGGR_COUNT) ? weight : weight * score; } ``` The `zunionInterAggregate()` function treats `COUNT` identically to `SUM` — it adds the per-set contributions. All four call sites where `weight * score` was previously computed inline are updated to use `zuiWeightedScore()`. ### Examples ``` > ZADD s1 1 foo 1 bar > ZADD s2 2 foo 2 bar > ZADD s3 3 foo ``` **With `SUM` (existing behavior, for comparison):** ``` > ZINTERSTORE t1 3 s1 s2 s3 WEIGHTS 10 5 3 AGGREGATE SUM (integer) 1 > ZRANGE t1 0 -1 WITHSCORES 1) "foo" 2) "29" > ZUNIONSTORE t1 3 s1 s2 s3 WEIGHTS 10 5 3 AGGREGATE SUM (integer) 2 > ZRANGE t1 0 -1 WITHSCORES 1) "bar" 2) "20" 3) "foo" 4) "29" ``` **With `COUNT` and `WEIGHTS`:** ``` > ZINTERSTORE t1 3 s1 s2 s3 WEIGHTS 10 5 3 AGGREGATE COUNT (integer) 1 > ZRANGE t1 0 -1 WITHSCORES 1) "foo" 2) "18" > ZUNIONSTORE t1 3 s1 s2 s3 WEIGHTS 10 5 3 AGGREGATE COUNT (integer) 2 > ZRANGE t1 0 -1 WITHSCORES 1) "bar" 2) "15" 3) "foo" 4) "18" ``` **With `COUNT` and no specified `WEIGHTS`** — resulting score equals the number of input sorted sets containing the element: ``` > ZINTERSTORE t1 3 s1 s2 s3 AGGREGATE COUNT (integer) 1 > ZRANGE t1 0 -1 WITHSCORES 1) "foo" 2) "3" > ZUNIONSTORE t1 3 s1 s2 s3 AGGREGATE COUNT (integer) 2 > ZRANGE t1 0 -1 WITHSCORES 1) "bar" 2) "2" 3) "foo" 4) "3" ``` ### Backward Compatibility This is a fully additive change. The new `COUNT` keyword is only recognized after the `AGGREGATE` token in the four affected commands. Existing commands, arguments, and default behavior (`AGGREGATE SUM`) are completely unchanged. No new command is introduced, and no existing response format is modified. --- src/commands.def | 36 ++++++++++++++++-------- src/commands/zinter.json | 12 ++++++++ src/commands/zinterstore.json | 12 ++++++++ src/commands/zunion.json | 12 ++++++++ src/commands/zunionstore.json | 12 ++++++++ src/t_zset.c | 24 +++++++++++++--- tests/unit/type/zset.tcl | 53 +++++++++++++++++++++++++++++++++++ 7 files changed, 145 insertions(+), 16 deletions(-) diff --git a/src/commands.def b/src/commands.def index 07e1dccc6..9f1f88b0d 100644 --- a/src/commands.def +++ b/src/commands.def @@ -9165,7 +9165,9 @@ struct COMMAND_ARG ZINCRBY_Args[] = { #ifndef SKIP_CMD_HISTORY_TABLE /* ZINTER history */ -#define ZINTER_History NULL +commandHistory ZINTER_History[] = { +{"8.8.0","Added `COUNT` aggregate option."}, +}; #endif #ifndef SKIP_CMD_TIPS_TABLE @@ -9185,6 +9187,7 @@ struct COMMAND_ARG ZINTER_aggregate_Subargs[] = { {MAKE_ARG("sum",ARG_TYPE_PURE_TOKEN,-1,"SUM",NULL,NULL,CMD_ARG_NONE,0,NULL)}, {MAKE_ARG("min",ARG_TYPE_PURE_TOKEN,-1,"MIN",NULL,NULL,CMD_ARG_NONE,0,NULL)}, {MAKE_ARG("max",ARG_TYPE_PURE_TOKEN,-1,"MAX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("count",ARG_TYPE_PURE_TOKEN,-1,"COUNT",NULL,"8.8.0",CMD_ARG_NONE,0,NULL)}, }; /* ZINTER argument table */ @@ -9192,7 +9195,7 @@ struct COMMAND_ARG ZINTER_Args[] = { {MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, {MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, {MAKE_ARG("weight",ARG_TYPE_INTEGER,-1,"WEIGHTS",NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)}, -{MAKE_ARG("aggregate",ARG_TYPE_ONEOF,-1,"AGGREGATE",NULL,NULL,CMD_ARG_OPTIONAL,3,NULL),.subargs=ZINTER_aggregate_Subargs}, +{MAKE_ARG("aggregate",ARG_TYPE_ONEOF,-1,"AGGREGATE",NULL,NULL,CMD_ARG_OPTIONAL,4,NULL),.subargs=ZINTER_aggregate_Subargs}, {MAKE_ARG("withscores",ARG_TYPE_PURE_TOKEN,-1,"WITHSCORES",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)}, }; @@ -9226,7 +9229,9 @@ struct COMMAND_ARG ZINTERCARD_Args[] = { #ifndef SKIP_CMD_HISTORY_TABLE /* ZINTERSTORE history */ -#define ZINTERSTORE_History NULL +commandHistory ZINTERSTORE_History[] = { +{"8.8.0","Added `COUNT` aggregate option."}, +}; #endif #ifndef SKIP_CMD_TIPS_TABLE @@ -9246,6 +9251,7 @@ struct COMMAND_ARG ZINTERSTORE_aggregate_Subargs[] = { {MAKE_ARG("sum",ARG_TYPE_PURE_TOKEN,-1,"SUM",NULL,NULL,CMD_ARG_NONE,0,NULL)}, {MAKE_ARG("min",ARG_TYPE_PURE_TOKEN,-1,"MIN",NULL,NULL,CMD_ARG_NONE,0,NULL)}, {MAKE_ARG("max",ARG_TYPE_PURE_TOKEN,-1,"MAX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("count",ARG_TYPE_PURE_TOKEN,-1,"COUNT",NULL,"8.8.0",CMD_ARG_NONE,0,NULL)}, }; /* ZINTERSTORE argument table */ @@ -9254,7 +9260,7 @@ struct COMMAND_ARG ZINTERSTORE_Args[] = { {MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, {MAKE_ARG("key",ARG_TYPE_KEY,1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, {MAKE_ARG("weight",ARG_TYPE_INTEGER,-1,"WEIGHTS",NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)}, -{MAKE_ARG("aggregate",ARG_TYPE_ONEOF,-1,"AGGREGATE",NULL,NULL,CMD_ARG_OPTIONAL,3,NULL),.subargs=ZINTERSTORE_aggregate_Subargs}, +{MAKE_ARG("aggregate",ARG_TYPE_ONEOF,-1,"AGGREGATE",NULL,NULL,CMD_ARG_OPTIONAL,4,NULL),.subargs=ZINTERSTORE_aggregate_Subargs}, }; /********** ZLEXCOUNT ********************/ @@ -9894,7 +9900,9 @@ struct COMMAND_ARG ZSCORE_Args[] = { #ifndef SKIP_CMD_HISTORY_TABLE /* ZUNION history */ -#define ZUNION_History NULL +commandHistory ZUNION_History[] = { +{"8.8.0","Added `COUNT` aggregate option."}, +}; #endif #ifndef SKIP_CMD_TIPS_TABLE @@ -9914,6 +9922,7 @@ struct COMMAND_ARG ZUNION_aggregate_Subargs[] = { {MAKE_ARG("sum",ARG_TYPE_PURE_TOKEN,-1,"SUM",NULL,NULL,CMD_ARG_NONE,0,NULL)}, {MAKE_ARG("min",ARG_TYPE_PURE_TOKEN,-1,"MIN",NULL,NULL,CMD_ARG_NONE,0,NULL)}, {MAKE_ARG("max",ARG_TYPE_PURE_TOKEN,-1,"MAX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("count",ARG_TYPE_PURE_TOKEN,-1,"COUNT",NULL,"8.8.0",CMD_ARG_NONE,0,NULL)}, }; /* ZUNION argument table */ @@ -9921,7 +9930,7 @@ struct COMMAND_ARG ZUNION_Args[] = { {MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, {MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, {MAKE_ARG("weight",ARG_TYPE_INTEGER,-1,"WEIGHTS",NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)}, -{MAKE_ARG("aggregate",ARG_TYPE_ONEOF,-1,"AGGREGATE",NULL,NULL,CMD_ARG_OPTIONAL,3,NULL),.subargs=ZUNION_aggregate_Subargs}, +{MAKE_ARG("aggregate",ARG_TYPE_ONEOF,-1,"AGGREGATE",NULL,NULL,CMD_ARG_OPTIONAL,4,NULL),.subargs=ZUNION_aggregate_Subargs}, {MAKE_ARG("withscores",ARG_TYPE_PURE_TOKEN,-1,"WITHSCORES",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)}, }; @@ -9929,7 +9938,9 @@ struct COMMAND_ARG ZUNION_Args[] = { #ifndef SKIP_CMD_HISTORY_TABLE /* ZUNIONSTORE history */ -#define ZUNIONSTORE_History NULL +commandHistory ZUNIONSTORE_History[] = { +{"8.8.0","Added `COUNT` aggregate option."}, +}; #endif #ifndef SKIP_CMD_TIPS_TABLE @@ -9949,6 +9960,7 @@ struct COMMAND_ARG ZUNIONSTORE_aggregate_Subargs[] = { {MAKE_ARG("sum",ARG_TYPE_PURE_TOKEN,-1,"SUM",NULL,NULL,CMD_ARG_NONE,0,NULL)}, {MAKE_ARG("min",ARG_TYPE_PURE_TOKEN,-1,"MIN",NULL,NULL,CMD_ARG_NONE,0,NULL)}, {MAKE_ARG("max",ARG_TYPE_PURE_TOKEN,-1,"MAX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("count",ARG_TYPE_PURE_TOKEN,-1,"COUNT",NULL,"8.8.0",CMD_ARG_NONE,0,NULL)}, }; /* ZUNIONSTORE argument table */ @@ -9957,7 +9969,7 @@ struct COMMAND_ARG ZUNIONSTORE_Args[] = { {MAKE_ARG("numkeys",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, {MAKE_ARG("key",ARG_TYPE_KEY,1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, {MAKE_ARG("weight",ARG_TYPE_INTEGER,-1,"WEIGHTS",NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)}, -{MAKE_ARG("aggregate",ARG_TYPE_ONEOF,-1,"AGGREGATE",NULL,NULL,CMD_ARG_OPTIONAL,3,NULL),.subargs=ZUNIONSTORE_aggregate_Subargs}, +{MAKE_ARG("aggregate",ARG_TYPE_ONEOF,-1,"AGGREGATE",NULL,NULL,CMD_ARG_OPTIONAL,4,NULL),.subargs=ZUNIONSTORE_aggregate_Subargs}, }; /********** XACK ********************/ @@ -11988,9 +12000,9 @@ struct COMMAND_STRUCT redisCommandTable[] = { {MAKE_CMD("zdiff","Returns the difference between multiple sorted sets.","O(L + (N-K)log(N)) worst case where L is the total number of elements in all the sets, N is the size of the first set, and K is the size of the result set.","6.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZDIFF_History,0,ZDIFF_Tips,0,zdiffCommand,-3,CMD_READONLY,ACL_CATEGORY_SORTEDSET,ZDIFF_Keyspecs,1,zunionInterDiffGetKeys,3),.args=ZDIFF_Args}, {MAKE_CMD("zdiffstore","Stores the difference of multiple sorted sets in a key.","O(L + (N-K)log(N)) worst case where L is the total number of elements in all the sets, N is the size of the first set, and K is the size of the result set.","6.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZDIFFSTORE_History,0,ZDIFFSTORE_Tips,0,zdiffstoreCommand,-4,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_SORTEDSET,ZDIFFSTORE_Keyspecs,2,zunionInterDiffStoreGetKeys,3),.args=ZDIFFSTORE_Args}, {MAKE_CMD("zincrby","Increments the score of a member in a sorted set.","O(log(N)) where N is the number of elements in the sorted set.","1.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZINCRBY_History,0,ZINCRBY_Tips,0,zincrbyCommand,4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_SORTEDSET,ZINCRBY_Keyspecs,1,NULL,3),.args=ZINCRBY_Args}, -{MAKE_CMD("zinter","Returns the intersect of multiple sorted sets.","O(N*K)+O(M*log(M)) worst case with N being the smallest input sorted set, K being the number of input sorted sets and M being the number of elements in the resulting sorted set.","6.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZINTER_History,0,ZINTER_Tips,0,zinterCommand,-3,CMD_READONLY,ACL_CATEGORY_SORTEDSET,ZINTER_Keyspecs,1,zunionInterDiffGetKeys,5),.args=ZINTER_Args}, +{MAKE_CMD("zinter","Returns the intersect of multiple sorted sets.","O(N*K)+O(M*log(M)) worst case with N being the smallest input sorted set, K being the number of input sorted sets and M being the number of elements in the resulting sorted set.","6.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZINTER_History,1,ZINTER_Tips,0,zinterCommand,-3,CMD_READONLY,ACL_CATEGORY_SORTEDSET,ZINTER_Keyspecs,1,zunionInterDiffGetKeys,5),.args=ZINTER_Args}, {MAKE_CMD("zintercard","Returns the number of members of the intersect of multiple sorted sets.","O(N*K) worst case with N being the smallest input sorted set, K being the number of input sorted sets.","7.0.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZINTERCARD_History,0,ZINTERCARD_Tips,0,zinterCardCommand,-3,CMD_READONLY,ACL_CATEGORY_SORTEDSET,ZINTERCARD_Keyspecs,1,zunionInterDiffGetKeys,3),.args=ZINTERCARD_Args}, -{MAKE_CMD("zinterstore","Stores the intersect of multiple sorted sets in a key.","O(N*K)+O(M*log(M)) worst case with N being the smallest input sorted set, K being the number of input sorted sets and M being the number of elements in the resulting sorted set.","2.0.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZINTERSTORE_History,0,ZINTERSTORE_Tips,0,zinterstoreCommand,-4,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_SORTEDSET,ZINTERSTORE_Keyspecs,2,zunionInterDiffStoreGetKeys,5),.args=ZINTERSTORE_Args}, +{MAKE_CMD("zinterstore","Stores the intersect of multiple sorted sets in a key.","O(N*K)+O(M*log(M)) worst case with N being the smallest input sorted set, K being the number of input sorted sets and M being the number of elements in the resulting sorted set.","2.0.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZINTERSTORE_History,1,ZINTERSTORE_Tips,0,zinterstoreCommand,-4,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_SORTEDSET,ZINTERSTORE_Keyspecs,2,zunionInterDiffStoreGetKeys,5),.args=ZINTERSTORE_Args}, {MAKE_CMD("zlexcount","Returns the number of members in a sorted set within a lexicographical range.","O(log(N)) with N being the number of elements in the sorted set.","2.8.9",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZLEXCOUNT_History,0,ZLEXCOUNT_Tips,0,zlexcountCommand,4,CMD_READONLY|CMD_FAST,ACL_CATEGORY_SORTEDSET,ZLEXCOUNT_Keyspecs,1,NULL,3),.args=ZLEXCOUNT_Args}, {MAKE_CMD("zmpop","Returns the highest- or lowest-scoring members from one or more sorted sets after removing them. Deletes the sorted set if the last member was popped.","O(K) + O(M*log(N)) where K is the number of provided keys, N being the number of elements in the sorted set, and M being the number of elements popped.","7.0.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZMPOP_History,0,ZMPOP_Tips,0,zmpopCommand,-4,CMD_WRITE,ACL_CATEGORY_SORTEDSET,ZMPOP_Keyspecs,1,zmpopGetKeys,4),.args=ZMPOP_Args}, {MAKE_CMD("zmscore","Returns the score of one or more members in a sorted set.","O(N) where N is the number of members being requested.","6.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZMSCORE_History,0,ZMSCORE_Tips,0,zmscoreCommand,-3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_SORTEDSET,ZMSCORE_Keyspecs,1,NULL,2),.args=ZMSCORE_Args}, @@ -12012,8 +12024,8 @@ struct COMMAND_STRUCT redisCommandTable[] = { {MAKE_CMD("zrevrank","Returns the index of a member in a sorted set ordered by descending scores.","O(log(N))","2.0.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZREVRANK_History,1,ZREVRANK_Tips,0,zrevrankCommand,-3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_SORTEDSET,ZREVRANK_Keyspecs,1,NULL,3),.args=ZREVRANK_Args}, {MAKE_CMD("zscan","Iterates over members and scores of a sorted set.","O(1) for every call. O(N) for a complete iteration, including enough command calls for the cursor to return back to 0. N is the number of elements inside the collection.","2.8.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZSCAN_History,0,ZSCAN_Tips,1,zscanCommand,-3,CMD_READONLY,ACL_CATEGORY_SORTEDSET,ZSCAN_Keyspecs,1,NULL,4),.args=ZSCAN_Args}, {MAKE_CMD("zscore","Returns the score of a member in a sorted set.","O(1)","1.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZSCORE_History,0,ZSCORE_Tips,0,zscoreCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_SORTEDSET,ZSCORE_Keyspecs,1,NULL,2),.args=ZSCORE_Args}, -{MAKE_CMD("zunion","Returns the union of multiple sorted sets.","O(N)+O(M*log(M)) with N being the sum of the sizes of the input sorted sets, and M being the number of elements in the resulting sorted set.","6.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZUNION_History,0,ZUNION_Tips,0,zunionCommand,-3,CMD_READONLY,ACL_CATEGORY_SORTEDSET,ZUNION_Keyspecs,1,zunionInterDiffGetKeys,5),.args=ZUNION_Args}, -{MAKE_CMD("zunionstore","Stores the union of multiple sorted sets in a key.","O(N)+O(M log(M)) with N being the sum of the sizes of the input sorted sets, and M being the number of elements in the resulting sorted set.","2.0.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZUNIONSTORE_History,0,ZUNIONSTORE_Tips,0,zunionstoreCommand,-4,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_SORTEDSET,ZUNIONSTORE_Keyspecs,2,zunionInterDiffStoreGetKeys,5),.args=ZUNIONSTORE_Args}, +{MAKE_CMD("zunion","Returns the union of multiple sorted sets.","O(N)+O(M*log(M)) with N being the sum of the sizes of the input sorted sets, and M being the number of elements in the resulting sorted set.","6.2.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZUNION_History,1,ZUNION_Tips,0,zunionCommand,-3,CMD_READONLY,ACL_CATEGORY_SORTEDSET,ZUNION_Keyspecs,1,zunionInterDiffGetKeys,5),.args=ZUNION_Args}, +{MAKE_CMD("zunionstore","Stores the union of multiple sorted sets in a key.","O(N)+O(M log(M)) with N being the sum of the sizes of the input sorted sets, and M being the number of elements in the resulting sorted set.","2.0.0",CMD_DOC_NONE,NULL,NULL,"sorted_set",COMMAND_GROUP_SORTED_SET,ZUNIONSTORE_History,1,ZUNIONSTORE_Tips,0,zunionstoreCommand,-4,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_SORTEDSET,ZUNIONSTORE_Keyspecs,2,zunionInterDiffStoreGetKeys,5),.args=ZUNIONSTORE_Args}, /* stream */ {MAKE_CMD("xack","Returns the number of messages that were successfully acknowledged by the consumer group member of a stream.","O(1) for each message ID processed.","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XACK_History,0,XACK_Tips,0,xackCommand,-4,CMD_WRITE|CMD_FAST,ACL_CATEGORY_STREAM,XACK_Keyspecs,1,NULL,3),.args=XACK_Args}, {MAKE_CMD("xackdel","Acknowledges and deletes one or multiple messages for a stream consumer group.","O(1) for each message ID processed.","8.2.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XACKDEL_History,0,XACKDEL_Tips,0,xackdelCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_STREAM,XACKDEL_Keyspecs,1,NULL,4),.args=XACKDEL_Args}, diff --git a/src/commands/zinter.json b/src/commands/zinter.json index 4828e21d6..1b192cdb2 100644 --- a/src/commands/zinter.json +++ b/src/commands/zinter.json @@ -7,6 +7,12 @@ "arity": -3, "function": "zinterCommand", "get_keys_function": "zunionInterDiffGetKeys", + "history": [ + [ + "8.8.0", + "Added `COUNT` aggregate option." + ] + ], "command_flags": [ "READONLY" ], @@ -101,6 +107,12 @@ "name": "max", "type": "pure-token", "token": "MAX" + }, + { + "name": "count", + "type": "pure-token", + "token": "COUNT", + "since": "8.8.0" } ] }, diff --git a/src/commands/zinterstore.json b/src/commands/zinterstore.json index 5bd940c65..0404bf749 100644 --- a/src/commands/zinterstore.json +++ b/src/commands/zinterstore.json @@ -7,6 +7,12 @@ "arity": -4, "function": "zinterstoreCommand", "get_keys_function": "zunionInterDiffStoreGetKeys", + "history": [ + [ + "8.8.0", + "Added `COUNT` aggregate option." + ] + ], "command_flags": [ "WRITE", "DENYOOM" @@ -100,6 +106,12 @@ "name": "max", "type": "pure-token", "token": "MAX" + }, + { + "name": "count", + "type": "pure-token", + "token": "COUNT", + "since": "8.8.0" } ] } diff --git a/src/commands/zunion.json b/src/commands/zunion.json index 1ce3dc5ee..366e0e8f9 100644 --- a/src/commands/zunion.json +++ b/src/commands/zunion.json @@ -7,6 +7,12 @@ "arity": -3, "function": "zunionCommand", "get_keys_function": "zunionInterDiffGetKeys", + "history": [ + [ + "8.8.0", + "Added `COUNT` aggregate option." + ] + ], "command_flags": [ "READONLY" ], @@ -101,6 +107,12 @@ "name": "max", "type": "pure-token", "token": "MAX" + }, + { + "name": "count", + "type": "pure-token", + "token": "COUNT", + "since": "8.8.0" } ] }, diff --git a/src/commands/zunionstore.json b/src/commands/zunionstore.json index 65e7b5469..fd208a6c0 100644 --- a/src/commands/zunionstore.json +++ b/src/commands/zunionstore.json @@ -7,6 +7,12 @@ "arity": -4, "function": "zunionstoreCommand", "get_keys_function": "zunionInterDiffStoreGetKeys", + "history": [ + [ + "8.8.0", + "Added `COUNT` aggregate option." + ] + ], "command_flags": [ "WRITE", "DENYOOM" @@ -99,6 +105,12 @@ "name": "max", "type": "pure-token", "token": "MAX" + }, + { + "name": "count", + "type": "pure-token", + "token": "COUNT", + "since": "8.8.0" } ] } diff --git a/src/t_zset.c b/src/t_zset.c index 346bcd38c..b4cd47c23 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -2653,6 +2653,15 @@ static int zuiCompareByRevCardinality(const void *s1, const void *s2) { #define REDIS_AGGR_SUM 1 #define REDIS_AGGR_MIN 2 #define REDIS_AGGR_MAX 3 +#define REDIS_AGGR_COUNT 4 + +/* Return the weighted contribution of a single sorted set member. + * For COUNT aggregation the actual score is irrelevant — each member + * contributes its set's weight (i.e. "one occurrence worth "). + * For all other aggregation modes the contribution is weight * score. */ +inline static double zuiWeightedScore(double score, double weight, int aggregate) { + return (aggregate == REDIS_AGGR_COUNT) ? weight : weight * score; +} inline static void zunionInterAggregate(double *target, double val, int aggregate) { if (aggregate == REDIS_AGGR_SUM) { @@ -2661,6 +2670,11 @@ inline static void zunionInterAggregate(double *target, double val, int aggregat * is +inf and the other is -inf. When these numbers are added, * we maintain the convention of the result being 0.0. */ if (isnan(*target)) *target = 0.0; + } else if (aggregate == REDIS_AGGR_COUNT) { + *target += val; + /* The val is zuiWeightedScore(…) == weight, which can be +inf/-inf, + * so the NaN guard applies here. */ + if (isnan(*target)) *target = 0.0; } else if (aggregate == REDIS_AGGR_MIN) { *target = val < *target ? val : *target; } else if (aggregate == REDIS_AGGR_MAX) { @@ -2962,6 +2976,8 @@ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, in aggregate = REDIS_AGGR_MIN; } else if (!strcasecmp(c->argv[j]->ptr,"max")) { aggregate = REDIS_AGGR_MAX; + } else if (!strcasecmp(c->argv[j]->ptr,"count")) { + aggregate = REDIS_AGGR_COUNT; } else { zfree(src); addReplyErrorObject(c,shared.syntaxerr); @@ -3018,17 +3034,17 @@ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, in while (zuiNext(&src[0],&zval)) { double score, value; - score = src[0].weight * zval.score; + score = zuiWeightedScore(zval.score, src[0].weight, aggregate); if (isnan(score)) score = 0; for (j = 1; j < setnum; j++) { /* It is not safe to access the zset we are * iterating, so explicitly check for equal object. */ if (src[j].subject == src[0].subject) { - value = zval.score*src[j].weight; + value = zuiWeightedScore(zval.score, src[j].weight, aggregate); zunionInterAggregate(&score,value,aggregate); } else if (zuiFind(&src[j],&zval,&value)) { - value *= src[j].weight; + value = zuiWeightedScore(value, src[j].weight, aggregate); zunionInterAggregate(&score,value,aggregate); } else { break; @@ -3075,7 +3091,7 @@ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, in zuiInitIterator(&src[i]); while (zuiNext(&src[i],&zval)) { /* Initialize value */ - score = src[i].weight * zval.score; + score = zuiWeightedScore(zval.score, src[i].weight, aggregate); if (isnan(score)) score = 0; /* Search for this element in the dict (which stores node pointers). */ diff --git a/tests/unit/type/zset.tcl b/tests/unit/type/zset.tcl index ad9483b2d..f08ddf70c 100644 --- a/tests/unit/type/zset.tcl +++ b/tests/unit/type/zset.tcl @@ -971,6 +971,26 @@ start_server {tags {"zset"}} { assert_equal {b 2 c 3} [r zinter 2 zseta{t} zsetb{t} aggregate max withscores] } + test "ZUNIONSTORE with AGGREGATE COUNT - $encoding" { + assert_equal 4 [r zunionstore zsetc{t} 2 zseta{t} zsetb{t} aggregate count] + assert_equal {a 1 d 1 b 2 c 2} [r zrange zsetc{t} 0 -1 withscores] + } + + test "ZUNION/ZINTER with AGGREGATE COUNT - $encoding" { + assert_equal {a 1 d 1 b 2 c 2} [r zunion 2 zseta{t} zsetb{t} aggregate count withscores] + assert_equal {b 2 c 2} [r zinter 2 zseta{t} zsetb{t} aggregate count withscores] + } + + test "ZUNIONSTORE with AGGREGATE COUNT and WEIGHTS - $encoding" { + assert_equal 4 [r zunionstore zsetc{t} 2 zseta{t} zsetb{t} weights 2 3 aggregate count] + assert_equal {a 2 d 3 b 5 c 5} [r zrange zsetc{t} 0 -1 withscores] + } + + test "ZUNION/ZINTER with AGGREGATE COUNT and WEIGHTS - $encoding" { + assert_equal {a 2 d 3 b 5 c 5} [r zunion 2 zseta{t} zsetb{t} weights 2 3 aggregate count withscores] + assert_equal {b 5 c 5} [r zinter 2 zseta{t} zsetb{t} weights 2 3 aggregate count withscores] + } + test "ZINTERSTORE basics - $encoding" { assert_equal 2 [r zinterstore zsetc{t} 2 zseta{t} zsetb{t}] assert_equal {b 3 c 5} [r zrange zsetc{t} 0 -1 withscores] @@ -1030,6 +1050,39 @@ start_server {tags {"zset"}} { assert_equal {b 2 c 3} [r zrange zsetc{t} 0 -1 withscores] } + test "ZINTERSTORE with AGGREGATE COUNT - $encoding" { + assert_equal 2 [r zinterstore zsetc{t} 2 zseta{t} zsetb{t} aggregate count] + assert_equal {b 2 c 2} [r zrange zsetc{t} 0 -1 withscores] + } + + test "ZINTERSTORE with AGGREGATE COUNT and WEIGHTS - $encoding" { + assert_equal 2 [r zinterstore zsetc{t} 2 zseta{t} zsetb{t} weights 2 3 aggregate count] + assert_equal {b 5 c 5} [r zrange zsetc{t} 0 -1 withscores] + } + + test "ZUNIONSTORE/ZINTERSTORE with AGGREGATE COUNT - 3 sets - $encoding" { + r del s1{t} s2{t} s3{t} t1{t} + r zadd s1{t} 1 foo 1 bar + r zadd s2{t} 2 foo 2 bar + r zadd s3{t} 3 foo + + assert_equal 1 [r zinterstore t1{t} 3 s1{t} s2{t} s3{t} aggregate count] + assert_equal {foo 3} [r zrange t1{t} 0 -1 withscores] + + assert_equal 2 [r zunionstore t1{t} 3 s1{t} s2{t} s3{t} aggregate count] + assert_equal {bar 2 foo 3} [r zrange t1{t} 0 -1 withscores] + } + + test "ZUNIONSTORE/ZINTERSTORE with AGGREGATE COUNT and WEIGHTS - 3 sets - $encoding" { + assert_equal 1 [r zinterstore t1{t} 3 s1{t} s2{t} s3{t} weights 10 5 3 aggregate count] + assert_equal {foo 18} [r zrange t1{t} 0 -1 withscores] + + assert_equal 2 [r zunionstore t1{t} 3 s1{t} s2{t} s3{t} weights 10 5 3 aggregate count] + assert_equal {bar 15 foo 18} [r zrange t1{t} 0 -1 withscores] + + r del s1{t} s2{t} s3{t} t1{t} + } + foreach cmd {ZUNIONSTORE ZINTERSTORE} { test "$cmd with +inf/-inf scores - $encoding" { r del zsetinf1{t} zsetinf2{t} From 2049c7fe32f08841c57d50b595986760ed8d9bd0 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Tue, 14 Apr 2026 19:26:42 +0800 Subject: [PATCH 12/32] Fix wrong argv index in xinfoReplyWithStreamInfo for slot alloc size tracking (#15037) `xinfoReplyWithStreamInfo` passed the wrong key(c->argv[1]) instead of `c->argv[2]` to `updateSlotAllocSize` when updating per-slot memory tracking. Fix by passing the key explicitly to `xinfoReplyWithStreamInfo` instead of relying on a hardcoded argv index. Also, add the `-DDEBUG_ASSERTIONS` flag to the test-ubuntu-jemalloc CI to cover this debug assertion. --- .github/workflows/daily.yml | 2 +- src/t_stream.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index 2f0572444..fdac6d994 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -52,7 +52,7 @@ jobs: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} - name: make - run: make REDIS_CFLAGS='-Werror -DREDIS_TEST' + run: make REDIS_CFLAGS='-Werror -DREDIS_TEST -DDEBUG_ASSERTIONS' - name: testprep run: sudo apt-get install tcl8.6 tclx - name: test diff --git a/src/t_stream.c b/src/t_stream.c index 2fe882572..ac070247a 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -5038,7 +5038,7 @@ void xtrimCommand(client *c) { /* Helper function for xinfoCommand. * Handles the variants of XINFO STREAM */ -void xinfoReplyWithStreamInfo(client *c, kvobj *kv) { +void xinfoReplyWithStreamInfo(client *c, robj *key, kvobj *kv) { stream *s = kv->ptr; int full = 1; long long count = 10; /* Default COUNT is 10 so we don't block the server */ @@ -5275,7 +5275,7 @@ void xinfoReplyWithStreamInfo(client *c, kvobj *kv) { } } if (server.memory_tracking_enabled) - updateSlotAllocSize(c->db,getKeySlot(c->argv[1]->ptr),kv,old_alloc,kvobjAllocSize(kv)); + updateSlotAllocSize(c->db,getKeySlot(key->ptr),kv,old_alloc,kvobjAllocSize(kv)); } /* XINFO CONSUMERS @@ -5379,7 +5379,7 @@ NULL raxStop(&ri); } else if (!strcasecmp(opt,"STREAM")) { /* XINFO STREAM [FULL [COUNT ]]. */ - xinfoReplyWithStreamInfo(c,kv); + xinfoReplyWithStreamInfo(c,key,kv); } else { addReplySubcommandSyntaxError(c); } From 3f810d35bf7de04667d87391013387d82f88aec1 Mon Sep 17 00:00:00 2001 From: Moti Cohen Date: Tue, 14 Apr 2026 18:45:48 +0300 Subject: [PATCH 13/32] Introduce internal append-only pointer vector DS (#15039) Refactoring work for follow-ups (e.g. subkey notifications #14958), splitting reusable infrastructure from feature logic. Optimized for stack allocation with optional growth to heap. Usage: Start on stack (grow to heap): vec v; void *vstack[8]; vecInit(&v, vstack, 8); Start embedded (grow to heap): typedef struct { vec v; void *vembedded[8]; } obj; vecInit(&obj.v, obj.vembedded, 8); Heap only (capacity 8 or 0): vecInit(&v, NULL, 8); vecInit(&v, NULL, 0); Reserve based on size: vecInit(&v, vstack, 8); vecReserve(&v, varsize); // <=8 uses stack, else heap --- src/Makefile | 2 +- src/server.c | 2 + src/vector.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/vector.h | 92 +++++++++++++++++++++++++++ 4 files changed, 268 insertions(+), 1 deletion(-) create mode 100644 src/vector.c create mode 100644 src/vector.h diff --git a/src/Makefile b/src/Makefile index b3ebd13b8..c202a233d 100644 --- a/src/Makefile +++ b/src/Makefile @@ -382,7 +382,7 @@ endif REDIS_SERVER_NAME=redis-server$(PROG_SUFFIX) REDIS_SENTINEL_NAME=redis-sentinel$(PROG_SUFFIX) -REDIS_SERVER_OBJ=threads_mngr.o memory_prefetch.o adlist.o quicklist.o ae.o anet.o dict.o ebuckets.o eventnotifier.o iothread.o mstr.o entry.o kvstore.o fwtree.o estore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_asm.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o lolwut8.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o keymeta.o chk.o hotkeys.o gcra.o +REDIS_SERVER_OBJ=threads_mngr.o memory_prefetch.o adlist.o quicklist.o ae.o anet.o dict.o ebuckets.o eventnotifier.o iothread.o mstr.o entry.o kvstore.o fwtree.o estore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_asm.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o lolwut8.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o keymeta.o chk.o hotkeys.o gcra.o vector.o REDIS_CLI_NAME=redis-cli$(PROG_SUFFIX) REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o ae.o redisassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o REDIS_BENCHMARK_NAME=redis-benchmark$(PROG_SUFFIX) diff --git a/src/server.c b/src/server.c index c7f415717..4b2d0191c 100644 --- a/src/server.c +++ b/src/server.c @@ -7797,6 +7797,7 @@ int __test_num = 0; typedef int redisTestProc(int argc, char **argv, int flags); int bitopsTest(int argc, char **argv, int flags); int zsetTest(int argc, char **argv, int flags); +int vectorTest(int argc, char **argv, int flags); struct redisTest { char *name; redisTestProc *proc; @@ -7820,6 +7821,7 @@ struct redisTest { {"fwtree", fwtreeTest}, {"estore", estoreTest}, {"ebuckets", ebucketsTest}, + {"vector", vectorTest}, {"bitmap", bitopsTest}, {"rax", raxTest}, {"zset", zsetTest}, diff --git a/src/vector.c b/src/vector.c new file mode 100644 index 000000000..e5809dabb --- /dev/null +++ b/src/vector.c @@ -0,0 +1,173 @@ +/* vector.c - Simple append-only vector implementation + * + * Copyright (c) 2026-Present, Redis Ltd. + * All rights reserved. + * + * Licensed under your choice of (a) the Redis Source Available License 2.0 + * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the + * GNU Affero General Public License v3 (AGPLv3). + */ + +#include +#include +#include + +#include "vector.h" +#include "redisassert.h" +#include "zmalloc.h" + +#define VEC_DEFAULT_INITCAP 8 + +/* + * Vector initialization. + * + * Modes: + * - stack != NULL: use caller-provided storage for the first initcap items. + * - stack == NULL && initcap > 0: start heap-backed with an initial 'initcap' capacity. + * - stack == NULL && initcap == 0: start heap-backed with no initial storage. + */ +void vecInit(vec *v, void **stack, size_t initcap) { + /* If stack is provided, initcap must be > 0 and at the size of the stack */ + assert(initcap > 0 || stack == NULL); + + v->size = 0; + v->cap = initcap; + v->stack = stack; /* stack is NULL if not used */ + + /* now init data either stack, heap or NULL */ + v->data = (stack) ? stack : ((initcap > 0) ? zmalloc(initcap * sizeof(void *)) : NULL); +} + +/* Free only heap storage if any */ +void vecRelease(vec *v) { + /* if data is not stack-allocated and is not NULL, free it */ + if (v->data && v->data != v->stack) + zfree(v->data); + v->size = 0; + v->cap = 0; + v->data = NULL; + v->stack = NULL; +} + +/* Reset the logical length to zero while preserving allocated storage. */ +void vecClear(vec *v) { + v->size = 0; +} + +/* Return the number of elements in the vector. */ +size_t vecSize(const vec *v) { + return v->size; +} + +/* Get element at index. index must be < vecSize(v). */ +void *vecGet(const vec *v, size_t index) { + assert(index < v->size); + return v->data[index]; +} + +/* Return the contiguous backing array. */ +void **vecData(vec *v) { + return v->data; +} + +/* Ensure capacity is at least mincap. */ +void vecReserve(vec *v, size_t mincap) { + void **newdata; + + if (mincap <= v->cap) return; + + /* If no heap storage is used yet, allocate and copy from stack if needed. */ + if (v->data == v->stack) { + newdata = zmalloc(mincap * sizeof(void *)); + if (v->size) memcpy(newdata, v->data, v->size * sizeof(void *)); + } else { + newdata = zrealloc(v->data, mincap * sizeof(void *)); + } + + v->data = newdata; + v->cap = mincap; +} + +/* Append one element, growing storage as needed. */ +void vecPush(vec *v, void *value) { + if (v->size == v->cap) { + size_t newcap = (v->cap > 0) ? v->cap * 2 : VEC_DEFAULT_INITCAP; + vecReserve(v, newcap); + } + + v->data[v->size++] = value; +} + +#ifdef REDIS_TEST + +#include +#include + +#include "testhelp.h" + +#define UNUSED(x) (void)(x) + +int vectorTest(int argc, char **argv, int flags) +{ + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + vec v; + void *vstack[2]; + int one = 1, two = 2, three = 3, four = 4, five = 5, six = 6; + + vecInit(&v, vstack, 2); + test_cond("vecInit() stack-backed size is 0", vecSize(&v) == 0); + test_cond("vecInit() uses stack buffer", vecData(&v) == vstack); + vecReserve(&v, 1); + test_cond("vecReserve() no-ops when capacity is already sufficient", + v.cap == 2 && vecData(&v) == vstack); + vecPush(&v, &one); + vecPush(&v, &two); + test_cond("vecPush() appends into stack storage", + vecSize(&v) == 2 && vecData(&v) == vstack && + vecGet(&v, 0) == &one && vecGet(&v, 1) == &two); + vecReserve(&v, 4); + test_cond("vecReserve() spills from stack to heap preserving values", + v.cap == 4 && vecData(&v) != vstack && + vecGet(&v, 0) == &one && vecGet(&v, 1) == &two); + vecPush(&v, &three); + test_cond("vecPush() spills from stack to heap preserving values", + vecSize(&v) == 3 && + vecData(&v) != vstack && vecGet(&v, 0) == &one && + vecGet(&v, 1) == &two && vecGet(&v, 2) == &three); + + void **heap_data = vecData(&v); + vecClear(&v); + test_cond("vecClear() resets size but preserves storage", + vecSize(&v) == 0 && vecData(&v) == heap_data); + vecRelease(&v); + test_cond("vecRelease() resets vector state", + vecSize(&v) == 0 && vecData(&v) == NULL && v.cap == 0); + + vecInit(&v, NULL, 4); + test_cond("vecInit() heap-backed hint allocates storage", + vecSize(&v) == 0 && vecData(&v) != NULL && v.cap == 4); + vecPush(&v, &four); + test_cond("vecPush() works in heap-backed mode", + vecGet(&v, 0) == &four); + vecReserve(&v, 8); + test_cond("vecReserve() grows heap-backed storage preserving values", + v.cap == 8 && vecGet(&v, 0) == &four); + vecRelease(&v); + + vecInit(&v, NULL, 0); + vecReserve(&v, 6); + test_cond("vecReserve() allocates heap storage from empty vector", + v.cap == 6 && vecData(&v) != NULL); + vecPush(&v, &five); + vecPush(&v, &six); + test_cond("vecPush() works after vecReserve() on empty vector", + vecSize(&v) == 2 && + vecGet(&v, 0) == &five && vecGet(&v, 1) == &six); + vecRelease(&v); + + return 0; +} +#endif diff --git a/src/vector.h b/src/vector.h new file mode 100644 index 000000000..a3ea28505 --- /dev/null +++ b/src/vector.h @@ -0,0 +1,92 @@ +#ifndef REDIS_VECTOR_H +#define REDIS_VECTOR_H + +#include + +/* + * Simple append-only vector (dynamic array) of void * elements. + * + * Design: + * -------- + * - Stores elements in a contiguous array (void **). + * - Supports append (vecPush) and read access. + * - Optionally uses caller-provided stack buffer to avoid heap allocations. + * - See also comment in vector.c of vecInit() for more details. + * + * Memory: + * ------- + * - vecRelease() frees heap memory if used. + * - Stack buffer is never freed. + * - Stored elements are never freed. + * + * Modes: + * ------- + * 1. Start On Stack (grow to heap): vec v; + * void *vstack[8]; + * ... + * vecInit(&v, vstack, 8); + * + * Start Embedded (grow to heap): typedef struct { + * vec v; + * void *vembedded[8]; + * } obj; + * ... + * vecInit(&obj->v, obj->vembedded, 8); + * + * 2. Heap only, init capacity 8: vec v; + * ... + * vecInit(&v, NULL, 8); + * + * Heap only, init capacity 0: vec v; + * ... + * vecInit(&v, NULL, 0); + * + * 3. Depends on var size: vec v; + * void *vstack[8]; + * vecInit(&v, vstack, 8); + * vecReserve(&v, varsize); // varsize <= 8 ? stack : heap + * + * Notes: + * ------ + * - Not thread-safe. + * - If stack == NULL and initcap > 0, initcap is treated as an initial + * heap-capacity hint. + * - When used in Redis core, the implementation should use the Redis allocator + * wrappers (zmalloc / zrealloc / zfree) rather than libc allocation APIs. + */ + +typedef struct vec { + size_t size; /* Number of elements in the vector. */ + size_t cap; /* Capacity of the vector. */ + void **data; /* Heap-allocated storage or refers to stack. */ + void **stack; /* Optional stack buffer. */ +} vec; + +/* Initialize a vector */ +void vecInit(vec *v, void **stack, size_t initcap); + +/* Free only heap storage if any */ +void vecRelease(vec *v); + +/* Reset the logical length to zero while preserving allocated storage. */ +void vecClear(vec *v); + +size_t vecSize(const vec *v); + +/* Requires index < vecSize(v). */ +void *vecGet(const vec *v, size_t index); + +/* Return the contiguous backing array. */ +void **vecData(vec *v); + +/* Ensure capacity is at least mincap. */ +void vecReserve(vec *v, size_t mincap); + +/* Append one element, growing storage as needed. */ +void vecPush(vec *v, void *value); + +#ifdef REDIS_TEST +int vectorTest(int argc, char **argv, int flags); +#endif + +#endif /* REDIS_VECTOR_H */ From 3cd464263b03b425ffae2e23db24df3dc9346871 Mon Sep 17 00:00:00 2001 From: Vitah Lin Date: Wed, 15 Apr 2026 08:34:40 +0800 Subject: [PATCH 14/32] Fix gen_write_load error on MOVED/ASK during atomic-slot-migration tests (#15016) --- tests/helpers/gen_write_load.tcl | 26 +++++++++++++---- tests/support/util.tcl | 6 ++-- tests/unit/cluster/atomic-slot-migration.tcl | 30 ++++++++------------ 3 files changed, 36 insertions(+), 26 deletions(-) diff --git a/tests/helpers/gen_write_load.tcl b/tests/helpers/gen_write_load.tcl index 60d954e5d..e9f430ae1 100644 --- a/tests/helpers/gen_write_load.tcl +++ b/tests/helpers/gen_write_load.tcl @@ -18,7 +18,9 @@ set ::tlsdir "tests/tls" # Continuously sends SET commands to the server. If key is omitted, a random key # is used for every SET command. The value is always random. -proc gen_write_load {host port seconds tls {key ""} {size 0} {sleep 0}} { +# ignore_error_reply (default 0): when non-zero, MOVED/ASK replies are tolerated +# while draining pipelined responses (periodic 500-reply batches and final drain). +proc gen_write_load {host port seconds tls {key ""} {size 0} {sleep 0} {ignore_error_reply 0}} { set start_time [clock seconds] set r [redis $host $port 1 $tls] $r client setname LOAD_HANDLER @@ -44,12 +46,19 @@ proc gen_write_load {host port seconds tls {key ""} {size 0} {sleep 0}} { } else { $r set $key $value } - + incr count if {$count % 500 == 0} { for {set i 0} {$i < 500} {incr i} { - $r read + # Capture opts to preserve original errorInfo/errorCode on re-raise. + if {[catch {$r read} err opts]} { + if {$ignore_error_reply && ([string match {MOVED*} $err] || [string match {ASK*} $err])} { + continue + } + return -options $opts $err + } } + set count 0 } if {[clock seconds]-$start_time > $seconds} { @@ -59,12 +68,17 @@ proc gen_write_load {host port seconds tls {key ""} {size 0} {sleep 0}} { after $sleep } } - + # Read remaining replies for {set i 0} {$i < $count} {incr i} { - $r read + if {[catch {$r read} err opts]} { + if {$ignore_error_reply && ([string match {MOVED*} $err] || [string match {ASK*} $err])} { + continue + } + return -options $opts $err + } } exit 0 } -gen_write_load [lindex $argv 0] [lindex $argv 1] [lindex $argv 2] [lindex $argv 3] [lindex $argv 4] [lindex $argv 5] [lindex $argv 6] +gen_write_load [lindex $argv 0] [lindex $argv 1] [lindex $argv 2] [lindex $argv 3] [lindex $argv 4] [lindex $argv 5] [lindex $argv 6] [lindex $argv 7] diff --git a/tests/support/util.tcl b/tests/support/util.tcl index 0c9f64836..16eb80008 100644 --- a/tests/support/util.tcl +++ b/tests/support/util.tcl @@ -604,9 +604,11 @@ proc find_valgrind_errors {stderr on_termination} { # Execute a background process writing random data for the specified number # of seconds to the specified Redis instance. If key is omitted, a random key # is used for every SET command. -proc start_write_load {host port seconds {key ""} {size 0} {sleep 0}} { +# ignore_error_reply (default 0): set non-zero in cluster slot-migration tests to tolerate +# MOVED/ASK replies while draining pipelined writes in the load helper. +proc start_write_load {host port seconds {key ""} {size 0} {sleep 0} {ignore_error_reply 0}} { set tclsh [info nameofexecutable] - exec $tclsh tests/helpers/gen_write_load.tcl $host $port $seconds $::tls $key $size $sleep & + exec $tclsh tests/helpers/gen_write_load.tcl $host $port $seconds $::tls $key $size $sleep $ignore_error_reply & } # Stop a process generating write load executed with start_write_load. diff --git a/tests/unit/cluster/atomic-slot-migration.tcl b/tests/unit/cluster/atomic-slot-migration.tcl index 826f0d69c..74eee55f0 100644 --- a/tests/unit/cluster/atomic-slot-migration.tcl +++ b/tests/unit/cluster/atomic-slot-migration.tcl @@ -577,23 +577,16 @@ start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-node-timeout R 1 debug asm-trim-method none populate_slot 10000 -idx 1 -slot 6000 - # Start write traffic on node-0 - # Throws -MOVED error once asm is completed, catch block will ignore it. - catch { - # Start the slot 0 write load on the R 0 - set port [get_port 0] - set key [slot_key 0 mykey] - set load_handle0 [start_write_load "127.0.0.1" $port 100 $key 0 5] - } + # Start write traffic on node-0 (ignore_error_reply=1 tolerates MOVED/ASK + # replies while slots are being migrated). + set port [get_port 0] + set key [slot_key 0 mykey] + set load_handle0 [start_write_load "127.0.0.1" $port 100 $key 0 5 1] - # Start write traffic on node-1 - # Throws -MOVED error once asm is completed, catch block will ignore it. - catch { - # Start the slot 6000 write load on the R 1 - set port [get_port 1] - set key [slot_key 6000 mykey] - set load_handle1 [start_write_load "127.0.0.1" $port 100 $key 0 5] - } + # Start write traffic on node-1 (ignore_error_reply=1 for migration redirects). + set port [get_port 1] + set key [slot_key 6000 mykey] + set load_handle1 [start_write_load "127.0.0.1" $port 100 $key 0 5 1] # Migrate keys R 1 CLUSTER MIGRATION IMPORT 0 100 @@ -801,8 +794,9 @@ start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-node-timeout # we set a delay to write incremental data R 1 config set rdb-key-save-delay 1000000 - # Start the slot 0 write load on the R 1 - set load_handle [start_write_load "127.0.0.1" [get_port 1] 100 $slot0_key] + # Start slot 0 write load on R1. ignore_error_reply=1 tolerates MOVED/ASK + # replies that can appear while slot 0 is being migrated. + set load_handle [start_write_load "127.0.0.1" [get_port 1] 100 $slot0_key 0 0 1] # Clear all fail points assert_equal {OK} [R 0 debug asm-failpoint "" ""] From 670993a89de6ecd8751161d0f866d66f010e76d4 Mon Sep 17 00:00:00 2001 From: Salvatore Sanfilippo Date: Wed, 15 Apr 2026 14:33:55 +0200 Subject: [PATCH 15/32] Replace fast_float C++ library with pure C implementation (#14661) The fast_float dependency required C++ (libstdc++) to build Redis. This commit replaces the 3800-line C++ template library with a minimal pure C implementation (~360 lines) that provides the same functionality needed by Redis. This is **very important** because Redis build process would fail without g++ installed, a common situation in Linux distributions even after installing the basic build tools: we want the build process of Redis to be the simplest possible. Also Redis sometimes is compiled in embedded systems lacking the g++ toolchain. There is no reason to depend on C++ in a project written in C. ## The C implementation uses 1. Fast path (Clinger's algorithm) for numbers with mantissa <= 2^53 and exponent in [-22, 22], covering ~99% of real-world cases. 2. Fallback to strtod() for complex cases to ensure correctly-rounded results. ## Changes - Move new fast_float_strtod.c(C implementation) from deps into Redis core since it is now a single file and no longer needs a separate directory. - Remove all c++ dependencies The implementation was tested against both strtod and the original C++ implementation with 10,000+ test cases including edge cases, special values (inf/nan), and random inputs. --------- Co-authored-by: debing.sun Co-authored-by: Mincho Paskalev Co-authored-by: Moti Cohen --- .github/workflows/ci.yml | 7 +- .github/workflows/daily.yml | 28 +- .gitignore | 1 - deps/Makefile | 7 - deps/fast_float/Makefile | 27 - deps/fast_float/README.md | 21 - deps/fast_float/fast_float.h | 3838 ------------------------- deps/fast_float/fast_float_strtod.cpp | 32 - deps/fast_float/fast_float_strtod.h | 15 - src/Makefile | 10 +- src/debug.c | 2 +- src/fast_float_strtod.c | 544 ++++ src/fast_float_strtod.h | 13 + src/resp_parser.c | 5 +- src/server.c | 2 + src/sort.c | 7 +- src/t_zset.c | 19 +- src/util.c | 9 +- tests/unit/sort.tcl | 8 + 19 files changed, 600 insertions(+), 3995 deletions(-) delete mode 100644 deps/fast_float/Makefile delete mode 100644 deps/fast_float/README.md delete mode 100644 deps/fast_float/fast_float.h delete mode 100644 deps/fast_float/fast_float_strtod.cpp delete mode 100644 deps/fast_float/fast_float_strtod.h create mode 100644 src/fast_float_strtod.c create mode 100644 src/fast_float_strtod.h diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4fe75a6fa..75a8ff62d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -62,7 +62,7 @@ jobs: - uses: actions/checkout@v4 - name: make run: | - sudo apt-get update && sudo apt-get install libc6-dev-i386 gcc-multilib g++-multilib + sudo apt-get update && sudo apt-get install libc6-dev-i386 gcc-multilib make REDIS_CFLAGS='-Werror' 32bit build-libc-malloc: @@ -79,7 +79,7 @@ jobs: - uses: actions/checkout@v4 - name: make run: | - dnf -y install which gcc gcc-c++ make + dnf -y install which gcc make make REDIS_CFLAGS='-Werror' build-old-chain-jemalloc: @@ -96,7 +96,6 @@ jobs: apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 40976EAF437D05B5 apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 3B4FE6ACC0B21F32 apt-get update - apt-get install -y make gcc-4.8 g++-4.8 + apt-get install -y make gcc-4.8 update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.8 100 - update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.8 100 make CC=gcc REDIS_CFLAGS='-Werror' diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index fdac6d994..36edb7529 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -240,7 +240,7 @@ jobs: ref: ${{ env.GITHUB_HEAD_REF }} - name: make run: | - apt-get update && apt-get install -y make gcc g++ + apt-get update && apt-get install -y make gcc make CC=gcc REDIS_CFLAGS='-Werror -DREDIS_TEST -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3' - name: testprep run: sudo apt-get install -y tcl8.6 tclx procps @@ -347,7 +347,7 @@ jobs: ref: ${{ env.GITHUB_HEAD_REF }} - name: make run: | - sudo apt-get update && sudo apt-get install libc6-dev-i386 g++ gcc-multilib g++-multilib + sudo apt-get update && sudo apt-get install libc6-dev-i386 gcc-multilib make 32bit REDIS_CFLAGS='-Werror -DREDIS_TEST' make -C tests/modules 32bit # the script below doesn't have an argument, we must build manually ahead of time - name: testprep @@ -580,7 +580,7 @@ jobs: - name: testprep run: | sudo apt-get update - sudo apt-get install tcl8.6 tclx valgrind g++ -y + sudo apt-get install tcl8.6 tclx valgrind -y - name: test if: true && !contains(github.event.inputs.skiptests, 'redis') # Note that valgrind's overhead doesn't pair well with io-threads so we @@ -645,7 +645,7 @@ jobs: - name: testprep run: | sudo apt-get update - sudo apt-get install tcl8.6 tclx valgrind g++ -y + sudo apt-get install tcl8.6 tclx valgrind -y - name: test if: true && !contains(github.event.inputs.skiptests, 'redis') run: ./runtest --valgrind --tags -iothreads --no-latency --verbose --clients 1 --timeout 2400 --dump-logs ${{github.event.inputs.test_args}} @@ -878,7 +878,7 @@ jobs: ref: ${{ env.GITHUB_HEAD_REF }} - name: make run: | - dnf -y install which gcc make g++ + dnf -y install which gcc make make REDIS_CFLAGS='-Werror' - name: testprep run: | @@ -917,7 +917,7 @@ jobs: ref: ${{ env.GITHUB_HEAD_REF }} - name: make run: | - dnf -y install which gcc make openssl-devel openssl g++ + dnf -y install which gcc make openssl-devel openssl make BUILD_TLS=module REDIS_CFLAGS='-Werror' - name: testprep run: | @@ -960,7 +960,7 @@ jobs: ref: ${{ env.GITHUB_HEAD_REF }} - name: make run: | - dnf -y install which gcc make openssl-devel openssl g++ + dnf -y install which gcc make openssl-devel openssl make BUILD_TLS=module REDIS_CFLAGS='-Werror' - name: testprep run: | @@ -1093,9 +1093,6 @@ jobs: (github.event_name == 'workflow_dispatch' || (github.event_name != 'workflow_dispatch' && github.repository == 'redis/redis')) && !contains(github.event.inputs.skipjobs, 'freebsd') timeout-minutes: 360 - env: - CC: clang - CXX: clang++ steps: - name: prep if: github.event_name == 'workflow_dispatch' @@ -1260,9 +1257,8 @@ jobs: apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 40976EAF437D05B5 apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 3B4FE6ACC0B21F32 apt-get update - apt-get install -y make gcc-4.8 g++-4.8 + apt-get install -y make gcc-4.8 update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.8 100 - update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.8 100 make CC=gcc REDIS_CFLAGS='-Werror' - name: testprep run: apt-get install -y tcl tcltls tclx @@ -1306,10 +1302,9 @@ jobs: apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 40976EAF437D05B5 apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 3B4FE6ACC0B21F32 apt-get update - apt-get install -y make gcc-4.8 g++-4.8 openssl libssl-dev + apt-get install -y make gcc-4.8 openssl libssl-dev update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.8 100 - update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.8 100 - make CC=gcc CXX=g++ BUILD_TLS=module REDIS_CFLAGS='-Werror' + make CC=gcc BUILD_TLS=module REDIS_CFLAGS='-Werror' - name: testprep run: | apt-get install -y tcl tcltls tclx @@ -1357,9 +1352,8 @@ jobs: apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 40976EAF437D05B5 apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 3B4FE6ACC0B21F32 apt-get update - apt-get install -y make gcc-4.8 g++-4.8 openssl libssl-dev + apt-get install -y make gcc-4.8 openssl libssl-dev update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.8 100 - update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.8 100 make BUILD_TLS=module CC=gcc REDIS_CFLAGS='-Werror' - name: testprep run: | diff --git a/.gitignore b/.gitignore index 507aad8e0..5ed94f1da 100644 --- a/.gitignore +++ b/.gitignore @@ -30,7 +30,6 @@ deps/lua/src/luac deps/lua/src/liblua.a deps/hdr_histogram/libhdrhistogram.a deps/fpconv/libfpconv.a -deps/fast_float/libfast_float.a tests/tls/* .make-* .prerequisites diff --git a/deps/Makefile b/deps/Makefile index c1d13bd85..60e0e569e 100644 --- a/deps/Makefile +++ b/deps/Makefile @@ -59,7 +59,6 @@ distclean: -(cd jemalloc && [ -f Makefile ] && $(MAKE) distclean) > /dev/null || true -(cd hdr_histogram && $(MAKE) clean) > /dev/null || true -(cd fpconv && $(MAKE) clean) > /dev/null || true - -(cd fast_float && $(MAKE) clean) > /dev/null || true -(cd xxhash && $(MAKE) clean) > /dev/null || true -(rm -f .make-*) @@ -95,12 +94,6 @@ fpconv: .make-prerequisites .PHONY: fpconv -fast_float: .make-prerequisites - @printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR) - cd fast_float && $(MAKE) libfast_float CFLAGS="$(DEPS_CFLAGS)" LDFLAGS="$(DEPS_LDFLAGS)" - -.PHONY: fast_float - XXHASH_CFLAGS = -fPIC $(DEPS_CFLAGS) xxhash: .make-prerequisites @printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR) diff --git a/deps/fast_float/Makefile b/deps/fast_float/Makefile deleted file mode 100644 index e3acaa500..000000000 --- a/deps/fast_float/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -# Fallback to gcc/g++ when $CC or $CXX is not in $PATH. -CC ?= gcc -CXX ?= g++ - -WARN=-Wall -OPT=-O3 -STD=-std=c++11 -DEFS=-DFASTFLOAT_ALLOWS_LEADING_PLUS - -FASTFLOAT_CFLAGS=$(WARN) $(OPT) $(STD) $(DEFS) $(CFLAGS) -FASTFLOAT_LDFLAGS=$(LDFLAGS) - -libfast_float: fast_float_strtod.o - $(AR) -r libfast_float.a fast_float_strtod.o - -32bit: FASTFLOAT_CFLAGS += -m32 -32bit: FASTFLOAT_LDFLAGS += -m32 -32bit: libfast_float - -fast_float_strtod.o: fast_float_strtod.cpp - $(CXX) $(FASTFLOAT_CFLAGS) -c fast_float_strtod.cpp $(FASTFLOAT_LDFLAGS) - -clean: - rm -f *.o - rm -f *.a - rm -f *.h.gch - rm -rf *.dSYM diff --git a/deps/fast_float/README.md b/deps/fast_float/README.md deleted file mode 100644 index 90462d3bf..000000000 --- a/deps/fast_float/README.md +++ /dev/null @@ -1,21 +0,0 @@ -README for fast_float v6.1.4 - ----------------------------------------------- - -We're using the fast_float library[1] in our (compiled-in) -floating-point fast_float_strtod implementation for faster and more -portable parsing of 64 decimal strings. - -The single file fast_float.h is an amalgamation of the entire library, -which can be (re)generated with the amalgamate.py script (from the -fast_float repository) via the command - -``` -git clone https://github.com/fastfloat/fast_float -cd fast_float -git checkout v6.1.4 -python3 ./script/amalgamate.py --license=MIT \ - > $REDIS_SRC/deps/fast_float/fast_float.h -``` - -[1]: https://github.com/fastfloat/fast_float diff --git a/deps/fast_float/fast_float.h b/deps/fast_float/fast_float.h deleted file mode 100644 index 81d9da50f..000000000 --- a/deps/fast_float/fast_float.h +++ /dev/null @@ -1,3838 +0,0 @@ -// fast_float by Daniel Lemire -// fast_float by João Paulo Magalhaes -// -// -// with contributions from Eugene Golushkov -// with contributions from Maksim Kita -// with contributions from Marcin Wojdyr -// with contributions from Neal Richardson -// with contributions from Tim Paine -// with contributions from Fabio Pellacini -// with contributions from Lénárd Szolnoki -// with contributions from Jan Pharago -// with contributions from Maya Warrier -// with contributions from Taha Khokhar -// -// -// MIT License Notice -// -// MIT License -// -// Copyright (c) 2021 The fast_float authors -// -// Permission is hereby granted, free of charge, to any -// person obtaining a copy of this software and associated -// documentation files (the "Software"), to deal in the -// Software without restriction, including without -// limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of -// the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice -// shall be included in all copies or substantial portions -// of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS IN THE SOFTWARE. -// - -#ifndef FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H -#define FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H - -#ifdef __has_include -#if __has_include() -#include -#endif -#endif - -// Testing for https://wg21.link/N3652, adopted in C++14 -#if __cpp_constexpr >= 201304 -#define FASTFLOAT_CONSTEXPR14 constexpr -#else -#define FASTFLOAT_CONSTEXPR14 -#endif - -#if defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L -#define FASTFLOAT_HAS_BIT_CAST 1 -#else -#define FASTFLOAT_HAS_BIT_CAST 0 -#endif - -#if defined(__cpp_lib_is_constant_evaluated) && \ - __cpp_lib_is_constant_evaluated >= 201811L -#define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 1 -#else -#define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 0 -#endif - -// Testing for relevant C++20 constexpr library features -#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED && FASTFLOAT_HAS_BIT_CAST && \ - __cpp_lib_constexpr_algorithms >= 201806L /*For std::copy and std::fill*/ -#define FASTFLOAT_CONSTEXPR20 constexpr -#define FASTFLOAT_IS_CONSTEXPR 1 -#else -#define FASTFLOAT_CONSTEXPR20 -#define FASTFLOAT_IS_CONSTEXPR 0 -#endif - -#endif // FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H - -#ifndef FASTFLOAT_FLOAT_COMMON_H -#define FASTFLOAT_FLOAT_COMMON_H - -#include -#include -#include -#include -#include -#include -#ifdef __has_include -#if __has_include() && (__cplusplus > 202002L || _MSVC_LANG > 202002L) -#include -#endif -#endif - -namespace fast_float { - -#define FASTFLOAT_JSONFMT (1 << 5) -#define FASTFLOAT_FORTRANFMT (1 << 6) - -enum chars_format { - scientific = 1 << 0, - fixed = 1 << 2, - hex = 1 << 3, - no_infnan = 1 << 4, - // RFC 8259: https://datatracker.ietf.org/doc/html/rfc8259#section-6 - json = FASTFLOAT_JSONFMT | fixed | scientific | no_infnan, - // Extension of RFC 8259 where, e.g., "inf" and "nan" are allowed. - json_or_infnan = FASTFLOAT_JSONFMT | fixed | scientific, - fortran = FASTFLOAT_FORTRANFMT | fixed | scientific, - general = fixed | scientific -}; - -template struct from_chars_result_t { - UC const *ptr; - std::errc ec; -}; -using from_chars_result = from_chars_result_t; - -template struct parse_options_t { - constexpr explicit parse_options_t(chars_format fmt = chars_format::general, - UC dot = UC('.')) - : format(fmt), decimal_point(dot) {} - - /** Which number formats are accepted */ - chars_format format; - /** The character used as decimal point */ - UC decimal_point; -}; -using parse_options = parse_options_t; - -} // namespace fast_float - -#if FASTFLOAT_HAS_BIT_CAST -#include -#endif - -#if (defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || \ - defined(__amd64) || defined(__aarch64__) || defined(_M_ARM64) || \ - defined(__MINGW64__) || defined(__s390x__) || \ - (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || \ - defined(__PPC64LE__)) || \ - defined(__loongarch64)) -#define FASTFLOAT_64BIT 1 -#elif (defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ - defined(__arm__) || defined(_M_ARM) || defined(__ppc__) || \ - defined(__MINGW32__) || defined(__EMSCRIPTEN__)) -#define FASTFLOAT_32BIT 1 -#else - // Need to check incrementally, since SIZE_MAX is a size_t, avoid overflow. -// We can never tell the register width, but the SIZE_MAX is a good -// approximation. UINTPTR_MAX and INTPTR_MAX are optional, so avoid them for max -// portability. -#if SIZE_MAX == 0xffff -#error Unknown platform (16-bit, unsupported) -#elif SIZE_MAX == 0xffffffff -#define FASTFLOAT_32BIT 1 -#elif SIZE_MAX == 0xffffffffffffffff -#define FASTFLOAT_64BIT 1 -#else -#error Unknown platform (not 32-bit, not 64-bit?) -#endif -#endif - -#if ((defined(_WIN32) || defined(_WIN64)) && !defined(__clang__)) || \ - (defined(_M_ARM64) && !defined(__MINGW32__)) -#include -#endif - -#if defined(_MSC_VER) && !defined(__clang__) -#define FASTFLOAT_VISUAL_STUDIO 1 -#endif - -#if defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__ -#define FASTFLOAT_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -#elif defined _WIN32 -#define FASTFLOAT_IS_BIG_ENDIAN 0 -#else -#if defined(__APPLE__) || defined(__FreeBSD__) -#include -#elif defined(sun) || defined(__sun) -#include -#elif defined(__MVS__) -#include -#else -#ifdef __has_include -#if __has_include() -#include -#endif //__has_include() -#endif //__has_include -#endif -# -#ifndef __BYTE_ORDER__ -// safe choice -#define FASTFLOAT_IS_BIG_ENDIAN 0 -#endif -# -#ifndef __ORDER_LITTLE_ENDIAN__ -// safe choice -#define FASTFLOAT_IS_BIG_ENDIAN 0 -#endif -# -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -#define FASTFLOAT_IS_BIG_ENDIAN 0 -#else -#define FASTFLOAT_IS_BIG_ENDIAN 1 -#endif -#endif - -#if defined(__SSE2__) || (defined(FASTFLOAT_VISUAL_STUDIO) && \ - (defined(_M_AMD64) || defined(_M_X64) || \ - (defined(_M_IX86_FP) && _M_IX86_FP == 2))) -#define FASTFLOAT_SSE2 1 -#endif - -#if defined(__aarch64__) || defined(_M_ARM64) -#define FASTFLOAT_NEON 1 -#endif - -#if defined(FASTFLOAT_SSE2) || defined(FASTFLOAT_NEON) -#define FASTFLOAT_HAS_SIMD 1 -#endif - -#if defined(__GNUC__) -// disable -Wcast-align=strict (GCC only) -#define FASTFLOAT_SIMD_DISABLE_WARNINGS \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wcast-align\"") -#else -#define FASTFLOAT_SIMD_DISABLE_WARNINGS -#endif - -#if defined(__GNUC__) -#define FASTFLOAT_SIMD_RESTORE_WARNINGS _Pragma("GCC diagnostic pop") -#else -#define FASTFLOAT_SIMD_RESTORE_WARNINGS -#endif - -#ifdef FASTFLOAT_VISUAL_STUDIO -#define fastfloat_really_inline __forceinline -#else -#define fastfloat_really_inline inline __attribute__((always_inline)) -#endif - -#ifndef FASTFLOAT_ASSERT -#define FASTFLOAT_ASSERT(x) \ - { ((void)(x)); } -#endif - -#ifndef FASTFLOAT_DEBUG_ASSERT -#define FASTFLOAT_DEBUG_ASSERT(x) \ - { ((void)(x)); } -#endif - -// rust style `try!()` macro, or `?` operator -#define FASTFLOAT_TRY(x) \ - { \ - if (!(x)) \ - return false; \ - } - -#define FASTFLOAT_ENABLE_IF(...) \ - typename std::enable_if<(__VA_ARGS__), int>::type - -namespace fast_float { - -fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() { -#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED - return std::is_constant_evaluated(); -#else - return false; -#endif -} - -template -fastfloat_really_inline constexpr bool is_supported_float_type() { - return std::is_same::value || std::is_same::value -#if __STDCPP_FLOAT32_T__ - || std::is_same::value -#endif -#if __STDCPP_FLOAT64_T__ - || std::is_same::value -#endif - ; -} - -template -fastfloat_really_inline constexpr bool is_supported_char_type() { - return std::is_same::value || std::is_same::value || - std::is_same::value || std::is_same::value; -} - -// Compares two ASCII strings in a case insensitive manner. -template -inline FASTFLOAT_CONSTEXPR14 bool -fastfloat_strncasecmp(UC const *input1, UC const *input2, size_t length) { - char running_diff{0}; - for (size_t i = 0; i < length; ++i) { - running_diff |= (char(input1[i]) ^ char(input2[i])); - } - return (running_diff == 0) || (running_diff == 32); -} - -#ifndef FLT_EVAL_METHOD -#error "FLT_EVAL_METHOD should be defined, please include cfloat." -#endif - -// a pointer and a length to a contiguous block of memory -template struct span { - const T *ptr; - size_t length; - constexpr span(const T *_ptr, size_t _length) : ptr(_ptr), length(_length) {} - constexpr span() : ptr(nullptr), length(0) {} - - constexpr size_t len() const noexcept { return length; } - - FASTFLOAT_CONSTEXPR14 const T &operator[](size_t index) const noexcept { - FASTFLOAT_DEBUG_ASSERT(index < length); - return ptr[index]; - } -}; - -struct value128 { - uint64_t low; - uint64_t high; - constexpr value128(uint64_t _low, uint64_t _high) : low(_low), high(_high) {} - constexpr value128() : low(0), high(0) {} -}; - -/* Helper C++14 constexpr generic implementation of leading_zeroes */ -fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int -leading_zeroes_generic(uint64_t input_num, int last_bit = 0) { - if (input_num & uint64_t(0xffffffff00000000)) { - input_num >>= 32; - last_bit |= 32; - } - if (input_num & uint64_t(0xffff0000)) { - input_num >>= 16; - last_bit |= 16; - } - if (input_num & uint64_t(0xff00)) { - input_num >>= 8; - last_bit |= 8; - } - if (input_num & uint64_t(0xf0)) { - input_num >>= 4; - last_bit |= 4; - } - if (input_num & uint64_t(0xc)) { - input_num >>= 2; - last_bit |= 2; - } - if (input_num & uint64_t(0x2)) { /* input_num >>= 1; */ - last_bit |= 1; - } - return 63 - last_bit; -} - -/* result might be undefined when input_num is zero */ -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 int -leading_zeroes(uint64_t input_num) { - assert(input_num > 0); - if (cpp20_and_in_constexpr()) { - return leading_zeroes_generic(input_num); - } -#ifdef FASTFLOAT_VISUAL_STUDIO -#if defined(_M_X64) || defined(_M_ARM64) - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - _BitScanReverse64(&leading_zero, input_num); - return (int)(63 - leading_zero); -#else - return leading_zeroes_generic(input_num); -#endif -#else - return __builtin_clzll(input_num); -#endif -} - -// slow emulation routine for 32-bit -fastfloat_really_inline constexpr uint64_t emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} - -fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t -umul128_generic(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = (uint64_t)(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + (uint64_t)(lo < bd); - return lo; -} - -#ifdef FASTFLOAT_32BIT - -// slow emulation routine for 32-bit -#if !defined(__MINGW64__) -fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t _umul128(uint64_t ab, - uint64_t cd, - uint64_t *hi) { - return umul128_generic(ab, cd, hi); -} -#endif // !__MINGW64__ - -#endif // FASTFLOAT_32BIT - -// compute 64-bit a*b -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 value128 -full_multiplication(uint64_t a, uint64_t b) { - if (cpp20_and_in_constexpr()) { - value128 answer; - answer.low = umul128_generic(a, b, &answer.high); - return answer; - } - value128 answer; -#if defined(_M_ARM64) && !defined(__MINGW32__) - // ARM64 has native support for 64-bit multiplications, no need to emulate - // But MinGW on ARM64 doesn't have native support for 64-bit multiplications - answer.high = __umulh(a, b); - answer.low = a * b; -#elif defined(FASTFLOAT_32BIT) || (defined(_WIN64) && !defined(__clang__)) - answer.low = _umul128(a, b, &answer.high); // _umul128 not available on ARM64 -#elif defined(FASTFLOAT_64BIT) && defined(__SIZEOF_INT128__) - __uint128_t r = ((__uint128_t)a) * b; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); -#else - answer.low = umul128_generic(a, b, &answer.high); -#endif - return answer; -} - -struct adjusted_mantissa { - uint64_t mantissa{0}; - int32_t power2{0}; // a negative value indicates an invalid result - adjusted_mantissa() = default; - constexpr bool operator==(const adjusted_mantissa &o) const { - return mantissa == o.mantissa && power2 == o.power2; - } - constexpr bool operator!=(const adjusted_mantissa &o) const { - return mantissa != o.mantissa || power2 != o.power2; - } -}; - -// Bias so we can get the real exponent with an invalid adjusted_mantissa. -constexpr static int32_t invalid_am_bias = -0x8000; - -// used for binary_format_lookup_tables::max_mantissa -constexpr uint64_t constant_55555 = 5 * 5 * 5 * 5 * 5; - -template struct binary_format_lookup_tables; - -template struct binary_format : binary_format_lookup_tables { - using equiv_uint = - typename std::conditional::type; - - static inline constexpr int mantissa_explicit_bits(); - static inline constexpr int minimum_exponent(); - static inline constexpr int infinite_power(); - static inline constexpr int sign_index(); - static inline constexpr int - min_exponent_fast_path(); // used when fegetround() == FE_TONEAREST - static inline constexpr int max_exponent_fast_path(); - static inline constexpr int max_exponent_round_to_even(); - static inline constexpr int min_exponent_round_to_even(); - static inline constexpr uint64_t max_mantissa_fast_path(int64_t power); - static inline constexpr uint64_t - max_mantissa_fast_path(); // used when fegetround() == FE_TONEAREST - static inline constexpr int largest_power_of_ten(); - static inline constexpr int smallest_power_of_ten(); - static inline constexpr T exact_power_of_ten(int64_t power); - static inline constexpr size_t max_digits(); - static inline constexpr equiv_uint exponent_mask(); - static inline constexpr equiv_uint mantissa_mask(); - static inline constexpr equiv_uint hidden_bit_mask(); -}; - -template struct binary_format_lookup_tables { - static constexpr double powers_of_ten[] = { - 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, - 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; - - // Largest integer value v so that (5**index * v) <= 1<<53. - // 0x20000000000000 == 1 << 53 - static constexpr uint64_t max_mantissa[] = { - 0x20000000000000, - 0x20000000000000 / 5, - 0x20000000000000 / (5 * 5), - 0x20000000000000 / (5 * 5 * 5), - 0x20000000000000 / (5 * 5 * 5 * 5), - 0x20000000000000 / (constant_55555), - 0x20000000000000 / (constant_55555 * 5), - 0x20000000000000 / (constant_55555 * 5 * 5), - 0x20000000000000 / (constant_55555 * 5 * 5 * 5), - 0x20000000000000 / (constant_55555 * 5 * 5 * 5 * 5), - 0x20000000000000 / (constant_55555 * constant_55555), - 0x20000000000000 / (constant_55555 * constant_55555 * 5), - 0x20000000000000 / (constant_55555 * constant_55555 * 5 * 5), - 0x20000000000000 / (constant_55555 * constant_55555 * 5 * 5 * 5), - 0x20000000000000 / (constant_55555 * constant_55555 * constant_55555), - 0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5), - 0x20000000000000 / - (constant_55555 * constant_55555 * constant_55555 * 5 * 5), - 0x20000000000000 / - (constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5), - 0x20000000000000 / - (constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5 * 5), - 0x20000000000000 / - (constant_55555 * constant_55555 * constant_55555 * constant_55555), - 0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 * - constant_55555 * 5), - 0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 * - constant_55555 * 5 * 5), - 0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 * - constant_55555 * 5 * 5 * 5), - 0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 * - constant_55555 * 5 * 5 * 5 * 5)}; -}; - -template -constexpr double binary_format_lookup_tables::powers_of_ten[]; - -template -constexpr uint64_t binary_format_lookup_tables::max_mantissa[]; - -template struct binary_format_lookup_tables { - static constexpr float powers_of_ten[] = {1e0f, 1e1f, 1e2f, 1e3f, 1e4f, 1e5f, - 1e6f, 1e7f, 1e8f, 1e9f, 1e10f}; - - // Largest integer value v so that (5**index * v) <= 1<<24. - // 0x1000000 == 1<<24 - static constexpr uint64_t max_mantissa[] = { - 0x1000000, - 0x1000000 / 5, - 0x1000000 / (5 * 5), - 0x1000000 / (5 * 5 * 5), - 0x1000000 / (5 * 5 * 5 * 5), - 0x1000000 / (constant_55555), - 0x1000000 / (constant_55555 * 5), - 0x1000000 / (constant_55555 * 5 * 5), - 0x1000000 / (constant_55555 * 5 * 5 * 5), - 0x1000000 / (constant_55555 * 5 * 5 * 5 * 5), - 0x1000000 / (constant_55555 * constant_55555), - 0x1000000 / (constant_55555 * constant_55555 * 5)}; -}; - -template -constexpr float binary_format_lookup_tables::powers_of_ten[]; - -template -constexpr uint64_t binary_format_lookup_tables::max_mantissa[]; - -template <> -inline constexpr int binary_format::min_exponent_fast_path() { -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - return 0; -#else - return -22; -#endif -} - -template <> -inline constexpr int binary_format::min_exponent_fast_path() { -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - return 0; -#else - return -10; -#endif -} - -template <> -inline constexpr int binary_format::mantissa_explicit_bits() { - return 52; -} -template <> -inline constexpr int binary_format::mantissa_explicit_bits() { - return 23; -} - -template <> -inline constexpr int binary_format::max_exponent_round_to_even() { - return 23; -} - -template <> -inline constexpr int binary_format::max_exponent_round_to_even() { - return 10; -} - -template <> -inline constexpr int binary_format::min_exponent_round_to_even() { - return -4; -} - -template <> -inline constexpr int binary_format::min_exponent_round_to_even() { - return -17; -} - -template <> inline constexpr int binary_format::minimum_exponent() { - return -1023; -} -template <> inline constexpr int binary_format::minimum_exponent() { - return -127; -} - -template <> inline constexpr int binary_format::infinite_power() { - return 0x7FF; -} -template <> inline constexpr int binary_format::infinite_power() { - return 0xFF; -} - -template <> inline constexpr int binary_format::sign_index() { - return 63; -} -template <> inline constexpr int binary_format::sign_index() { - return 31; -} - -template <> -inline constexpr int binary_format::max_exponent_fast_path() { - return 22; -} -template <> -inline constexpr int binary_format::max_exponent_fast_path() { - return 10; -} - -template <> -inline constexpr uint64_t binary_format::max_mantissa_fast_path() { - return uint64_t(2) << mantissa_explicit_bits(); -} -template <> -inline constexpr uint64_t -binary_format::max_mantissa_fast_path(int64_t power) { - // caller is responsible to ensure that - // power >= 0 && power <= 22 - // - // Work around clang bug https://godbolt.org/z/zedh7rrhc - return (void)max_mantissa[0], max_mantissa[power]; -} -template <> -inline constexpr uint64_t binary_format::max_mantissa_fast_path() { - return uint64_t(2) << mantissa_explicit_bits(); -} -template <> -inline constexpr uint64_t -binary_format::max_mantissa_fast_path(int64_t power) { - // caller is responsible to ensure that - // power >= 0 && power <= 10 - // - // Work around clang bug https://godbolt.org/z/zedh7rrhc - return (void)max_mantissa[0], max_mantissa[power]; -} - -template <> -inline constexpr double -binary_format::exact_power_of_ten(int64_t power) { - // Work around clang bug https://godbolt.org/z/zedh7rrhc - return (void)powers_of_ten[0], powers_of_ten[power]; -} -template <> -inline constexpr float binary_format::exact_power_of_ten(int64_t power) { - // Work around clang bug https://godbolt.org/z/zedh7rrhc - return (void)powers_of_ten[0], powers_of_ten[power]; -} - -template <> inline constexpr int binary_format::largest_power_of_ten() { - return 308; -} -template <> inline constexpr int binary_format::largest_power_of_ten() { - return 38; -} - -template <> -inline constexpr int binary_format::smallest_power_of_ten() { - return -342; -} -template <> inline constexpr int binary_format::smallest_power_of_ten() { - return -64; -} - -template <> inline constexpr size_t binary_format::max_digits() { - return 769; -} -template <> inline constexpr size_t binary_format::max_digits() { - return 114; -} - -template <> -inline constexpr binary_format::equiv_uint -binary_format::exponent_mask() { - return 0x7F800000; -} -template <> -inline constexpr binary_format::equiv_uint -binary_format::exponent_mask() { - return 0x7FF0000000000000; -} - -template <> -inline constexpr binary_format::equiv_uint -binary_format::mantissa_mask() { - return 0x007FFFFF; -} -template <> -inline constexpr binary_format::equiv_uint -binary_format::mantissa_mask() { - return 0x000FFFFFFFFFFFFF; -} - -template <> -inline constexpr binary_format::equiv_uint -binary_format::hidden_bit_mask() { - return 0x00800000; -} -template <> -inline constexpr binary_format::equiv_uint -binary_format::hidden_bit_mask() { - return 0x0010000000000000; -} - -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void -to_float(bool negative, adjusted_mantissa am, T &value) { - using fastfloat_uint = typename binary_format::equiv_uint; - fastfloat_uint word = (fastfloat_uint)am.mantissa; - word |= fastfloat_uint(am.power2) - << binary_format::mantissa_explicit_bits(); - word |= fastfloat_uint(negative) << binary_format::sign_index(); -#if FASTFLOAT_HAS_BIT_CAST - value = std::bit_cast(word); -#else - ::memcpy(&value, &word, sizeof(T)); -#endif -} - -#ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default -template struct space_lut { - static constexpr bool value[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -}; - -template constexpr bool space_lut::value[]; - -inline constexpr bool is_space(uint8_t c) { return space_lut<>::value[c]; } -#endif - -template static constexpr uint64_t int_cmp_zeros() { - static_assert((sizeof(UC) == 1) || (sizeof(UC) == 2) || (sizeof(UC) == 4), - "Unsupported character size"); - return (sizeof(UC) == 1) ? 0x3030303030303030 - : (sizeof(UC) == 2) - ? (uint64_t(UC('0')) << 48 | uint64_t(UC('0')) << 32 | - uint64_t(UC('0')) << 16 | UC('0')) - : (uint64_t(UC('0')) << 32 | UC('0')); -} -template static constexpr int int_cmp_len() { - return sizeof(uint64_t) / sizeof(UC); -} -template static constexpr UC const *str_const_nan() { - return nullptr; -} -template <> constexpr char const *str_const_nan() { return "nan"; } -template <> constexpr wchar_t const *str_const_nan() { return L"nan"; } -template <> constexpr char16_t const *str_const_nan() { - return u"nan"; -} -template <> constexpr char32_t const *str_const_nan() { - return U"nan"; -} -template static constexpr UC const *str_const_inf() { - return nullptr; -} -template <> constexpr char const *str_const_inf() { return "infinity"; } -template <> constexpr wchar_t const *str_const_inf() { - return L"infinity"; -} -template <> constexpr char16_t const *str_const_inf() { - return u"infinity"; -} -template <> constexpr char32_t const *str_const_inf() { - return U"infinity"; -} - -template struct int_luts { - static constexpr uint8_t chdigit[] = { - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, - 255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, - 35, 255, 255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 16, 17, - 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, - 33, 34, 35, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255}; - - static constexpr size_t maxdigits_u64[] = { - 64, 41, 32, 28, 25, 23, 22, 21, 20, 19, 18, 18, 17, 17, 16, 16, 16, 16, - 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13}; - - static constexpr uint64_t min_safe_u64[] = { - 9223372036854775808ull, 12157665459056928801ull, 4611686018427387904, - 7450580596923828125, 4738381338321616896, 3909821048582988049, - 9223372036854775808ull, 12157665459056928801ull, 10000000000000000000ull, - 5559917313492231481, 2218611106740436992, 8650415919381337933, - 2177953337809371136, 6568408355712890625, 1152921504606846976, - 2862423051509815793, 6746640616477458432, 15181127029874798299ull, - 1638400000000000000, 3243919932521508681, 6221821273427820544, - 11592836324538749809ull, 876488338465357824, 1490116119384765625, - 2481152873203736576, 4052555153018976267, 6502111422497947648, - 10260628712958602189ull, 15943230000000000000ull, 787662783788549761, - 1152921504606846976, 1667889514952984961, 2386420683693101056, - 3379220508056640625, 4738381338321616896}; -}; - -template constexpr uint8_t int_luts::chdigit[]; - -template constexpr size_t int_luts::maxdigits_u64[]; - -template constexpr uint64_t int_luts::min_safe_u64[]; - -template -fastfloat_really_inline constexpr uint8_t ch_to_digit(UC c) { - return int_luts<>::chdigit[static_cast(c)]; -} - -fastfloat_really_inline constexpr size_t max_digits_u64(int base) { - return int_luts<>::maxdigits_u64[base - 2]; -} - -// If a u64 is exactly max_digits_u64() in length, this is -// the value below which it has definitely overflowed. -fastfloat_really_inline constexpr uint64_t min_safe_u64(int base) { - return int_luts<>::min_safe_u64[base - 2]; -} - -} // namespace fast_float - -#endif - - -#ifndef FASTFLOAT_FAST_FLOAT_H -#define FASTFLOAT_FAST_FLOAT_H - - -namespace fast_float { -/** - * This function parses the character sequence [first,last) for a number. It - * parses floating-point numbers expecting a locale-indepent format equivalent - * to what is used by std::strtod in the default ("C") locale. The resulting - * floating-point value is the closest floating-point values (using either float - * or double), using the "round to even" convention for values that would - * otherwise fall right in-between two values. That is, we provide exact parsing - * according to the IEEE standard. - * - * Given a successful parse, the pointer (`ptr`) in the returned value is set to - * point right after the parsed number, and the `value` referenced is set to the - * parsed value. In case of error, the returned `ec` contains a representative - * error, otherwise the default (`std::errc()`) value is stored. - * - * The implementation does not throw and does not allocate memory (e.g., with - * `new` or `malloc`). - * - * Like the C++17 standard, the `fast_float::from_chars` functions take an - * optional last argument of the type `fast_float::chars_format`. It is a bitset - * value: we check whether `fmt & fast_float::chars_format::fixed` and `fmt & - * fast_float::chars_format::scientific` are set to determine whether we allow - * the fixed point and scientific notation respectively. The default is - * `fast_float::chars_format::general` which allows both `fixed` and - * `scientific`. - */ -template ())> -FASTFLOAT_CONSTEXPR20 from_chars_result_t -from_chars(UC const *first, UC const *last, T &value, - chars_format fmt = chars_format::general) noexcept; - -/** - * Like from_chars, but accepts an `options` argument to govern number parsing. - */ -template -FASTFLOAT_CONSTEXPR20 from_chars_result_t -from_chars_advanced(UC const *first, UC const *last, T &value, - parse_options_t options) noexcept; -/** - * from_chars for integer types. - */ -template ())> -FASTFLOAT_CONSTEXPR20 from_chars_result_t -from_chars(UC const *first, UC const *last, T &value, int base = 10) noexcept; - -} // namespace fast_float -#endif // FASTFLOAT_FAST_FLOAT_H - -#ifndef FASTFLOAT_ASCII_NUMBER_H -#define FASTFLOAT_ASCII_NUMBER_H - -#include -#include -#include -#include -#include -#include - - -#ifdef FASTFLOAT_SSE2 -#include -#endif - -#ifdef FASTFLOAT_NEON -#include -#endif - -namespace fast_float { - -template fastfloat_really_inline constexpr bool has_simd_opt() { -#ifdef FASTFLOAT_HAS_SIMD - return std::is_same::value; -#else - return false; -#endif -} - -// Next function can be micro-optimized, but compilers are entirely -// able to optimize it well. -template -fastfloat_really_inline constexpr bool is_integer(UC c) noexcept { - return !(c > UC('9') || c < UC('0')); -} - -fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) { - return (val & 0xFF00000000000000) >> 56 | (val & 0x00FF000000000000) >> 40 | - (val & 0x0000FF0000000000) >> 24 | (val & 0x000000FF00000000) >> 8 | - (val & 0x00000000FF000000) << 8 | (val & 0x0000000000FF0000) << 24 | - (val & 0x000000000000FF00) << 40 | (val & 0x00000000000000FF) << 56; -} - -// Read 8 UC into a u64. Truncates UC if not char. -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t -read8_to_u64(const UC *chars) { - if (cpp20_and_in_constexpr() || !std::is_same::value) { - uint64_t val = 0; - for (int i = 0; i < 8; ++i) { - val |= uint64_t(uint8_t(*chars)) << (i * 8); - ++chars; - } - return val; - } - uint64_t val; - ::memcpy(&val, chars, sizeof(uint64_t)); -#if FASTFLOAT_IS_BIG_ENDIAN == 1 - // Need to read as-if the number was in little-endian order. - val = byteswap(val); -#endif - return val; -} - -#ifdef FASTFLOAT_SSE2 - -fastfloat_really_inline uint64_t simd_read8_to_u64(const __m128i data) { - FASTFLOAT_SIMD_DISABLE_WARNINGS - const __m128i packed = _mm_packus_epi16(data, data); -#ifdef FASTFLOAT_64BIT - return uint64_t(_mm_cvtsi128_si64(packed)); -#else - uint64_t value; - // Visual Studio + older versions of GCC don't support _mm_storeu_si64 - _mm_storel_epi64(reinterpret_cast<__m128i *>(&value), packed); - return value; -#endif - FASTFLOAT_SIMD_RESTORE_WARNINGS -} - -fastfloat_really_inline uint64_t simd_read8_to_u64(const char16_t *chars) { - FASTFLOAT_SIMD_DISABLE_WARNINGS - return simd_read8_to_u64( - _mm_loadu_si128(reinterpret_cast(chars))); - FASTFLOAT_SIMD_RESTORE_WARNINGS -} - -#elif defined(FASTFLOAT_NEON) - -fastfloat_really_inline uint64_t simd_read8_to_u64(const uint16x8_t data) { - FASTFLOAT_SIMD_DISABLE_WARNINGS - uint8x8_t utf8_packed = vmovn_u16(data); - return vget_lane_u64(vreinterpret_u64_u8(utf8_packed), 0); - FASTFLOAT_SIMD_RESTORE_WARNINGS -} - -fastfloat_really_inline uint64_t simd_read8_to_u64(const char16_t *chars) { - FASTFLOAT_SIMD_DISABLE_WARNINGS - return simd_read8_to_u64( - vld1q_u16(reinterpret_cast(chars))); - FASTFLOAT_SIMD_RESTORE_WARNINGS -} - -#endif // FASTFLOAT_SSE2 - -// MSVC SFINAE is broken pre-VS2017 -#if defined(_MSC_VER) && _MSC_VER <= 1900 -template -#else -template ()) = 0> -#endif -// dummy for compile -uint64_t simd_read8_to_u64(UC const *) { - return 0; -} - -// credit @aqrit -fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint32_t -parse_eight_digits_unrolled(uint64_t val) { - const uint64_t mask = 0x000000FF000000FF; - const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32) - const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32) - val -= 0x3030303030303030; - val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; - val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; - return uint32_t(val); -} - -// Call this if chars are definitely 8 digits. -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint32_t -parse_eight_digits_unrolled(UC const *chars) noexcept { - if (cpp20_and_in_constexpr() || !has_simd_opt()) { - return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay - } - return parse_eight_digits_unrolled(simd_read8_to_u64(chars)); -} - -// credit @aqrit -fastfloat_really_inline constexpr bool -is_made_of_eight_digits_fast(uint64_t val) noexcept { - return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & - 0x8080808080808080)); -} - -#ifdef FASTFLOAT_HAS_SIMD - -// Call this if chars might not be 8 digits. -// Using this style (instead of is_made_of_eight_digits_fast() then -// parse_eight_digits_unrolled()) ensures we don't load SIMD registers twice. -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool -simd_parse_if_eight_digits_unrolled(const char16_t *chars, - uint64_t &i) noexcept { - if (cpp20_and_in_constexpr()) { - return false; - } -#ifdef FASTFLOAT_SSE2 - FASTFLOAT_SIMD_DISABLE_WARNINGS - const __m128i data = - _mm_loadu_si128(reinterpret_cast(chars)); - - // (x - '0') <= 9 - // http://0x80.pl/articles/simd-parsing-int-sequences.html - const __m128i t0 = _mm_add_epi16(data, _mm_set1_epi16(32720)); - const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-32759)); - - if (_mm_movemask_epi8(t1) == 0) { - i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data)); - return true; - } else - return false; - FASTFLOAT_SIMD_RESTORE_WARNINGS -#elif defined(FASTFLOAT_NEON) - FASTFLOAT_SIMD_DISABLE_WARNINGS - const uint16x8_t data = vld1q_u16(reinterpret_cast(chars)); - - // (x - '0') <= 9 - // http://0x80.pl/articles/simd-parsing-int-sequences.html - const uint16x8_t t0 = vsubq_u16(data, vmovq_n_u16('0')); - const uint16x8_t mask = vcltq_u16(t0, vmovq_n_u16('9' - '0' + 1)); - - if (vminvq_u16(mask) == 0xFFFF) { - i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data)); - return true; - } else - return false; - FASTFLOAT_SIMD_RESTORE_WARNINGS -#else - (void)chars; - (void)i; - return false; -#endif // FASTFLOAT_SSE2 -} - -#endif // FASTFLOAT_HAS_SIMD - -// MSVC SFINAE is broken pre-VS2017 -#if defined(_MSC_VER) && _MSC_VER <= 1900 -template -#else -template ()) = 0> -#endif -// dummy for compile -bool simd_parse_if_eight_digits_unrolled(UC const *, uint64_t &) { - return 0; -} - -template ::value) = 0> -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void -loop_parse_if_eight_digits(const UC *&p, const UC *const pend, uint64_t &i) { - if (!has_simd_opt()) { - return; - } - while ((std::distance(p, pend) >= 8) && - simd_parse_if_eight_digits_unrolled( - p, i)) { // in rare cases, this will overflow, but that's ok - p += 8; - } -} - -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void -loop_parse_if_eight_digits(const char *&p, const char *const pend, - uint64_t &i) { - // optimizes better than parse_if_eight_digits_unrolled() for UC = char. - while ((std::distance(p, pend) >= 8) && - is_made_of_eight_digits_fast(read8_to_u64(p))) { - i = i * 100000000 + - parse_eight_digits_unrolled(read8_to_u64( - p)); // in rare cases, this will overflow, but that's ok - p += 8; - } -} - -enum class parse_error { - no_error, - // [JSON-only] The minus sign must be followed by an integer. - missing_integer_after_sign, - // A sign must be followed by an integer or dot. - missing_integer_or_dot_after_sign, - // [JSON-only] The integer part must not have leading zeros. - leading_zeros_in_integer_part, - // [JSON-only] The integer part must have at least one digit. - no_digits_in_integer_part, - // [JSON-only] If there is a decimal point, there must be digits in the - // fractional part. - no_digits_in_fractional_part, - // The mantissa must have at least one digit. - no_digits_in_mantissa, - // Scientific notation requires an exponential part. - missing_exponential_part, -}; - -template struct parsed_number_string_t { - int64_t exponent{0}; - uint64_t mantissa{0}; - UC const *lastmatch{nullptr}; - bool negative{false}; - bool valid{false}; - bool too_many_digits{false}; - // contains the range of the significant digits - span integer{}; // non-nullable - span fraction{}; // nullable - parse_error error{parse_error::no_error}; -}; - -using byte_span = span; -using parsed_number_string = parsed_number_string_t; - -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t -report_parse_error(UC const *p, parse_error error) { - parsed_number_string_t answer; - answer.valid = false; - answer.lastmatch = p; - answer.error = error; - return answer; -} - -// Assuming that you use no more than 19 digits, this will -// parse an ASCII string. -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t -parse_number_string(UC const *p, UC const *pend, - parse_options_t options) noexcept { - chars_format const fmt = options.format; - UC const decimal_point = options.decimal_point; - - parsed_number_string_t answer; - answer.valid = false; - answer.too_many_digits = false; - answer.negative = (*p == UC('-')); -#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default - if ((*p == UC('-')) || (!(fmt & FASTFLOAT_JSONFMT) && *p == UC('+'))) { -#else - if (*p == UC('-')) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here -#endif - ++p; - if (p == pend) { - return report_parse_error( - p, parse_error::missing_integer_or_dot_after_sign); - } - if (fmt & FASTFLOAT_JSONFMT) { - if (!is_integer(*p)) { // a sign must be followed by an integer - return report_parse_error(p, - parse_error::missing_integer_after_sign); - } - } else { - if (!is_integer(*p) && - (*p != - decimal_point)) { // a sign must be followed by an integer or the dot - return report_parse_error( - p, parse_error::missing_integer_or_dot_after_sign); - } - } - } - UC const *const start_digits = p; - - uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) - - while ((p != pend) && is_integer(*p)) { - // a multiplication by 10 is cheaper than an arbitrary integer - // multiplication - i = 10 * i + - uint64_t(*p - - UC('0')); // might overflow, we will handle the overflow later - ++p; - } - UC const *const end_of_integer_part = p; - int64_t digit_count = int64_t(end_of_integer_part - start_digits); - answer.integer = span(start_digits, size_t(digit_count)); - if (fmt & FASTFLOAT_JSONFMT) { - // at least 1 digit in integer part, without leading zeros - if (digit_count == 0) { - return report_parse_error(p, parse_error::no_digits_in_integer_part); - } - if ((start_digits[0] == UC('0') && digit_count > 1)) { - return report_parse_error(start_digits, - parse_error::leading_zeros_in_integer_part); - } - } - - int64_t exponent = 0; - const bool has_decimal_point = (p != pend) && (*p == decimal_point); - if (has_decimal_point) { - ++p; - UC const *before = p; - // can occur at most twice without overflowing, but let it occur more, since - // for integers with many digits, digit parsing is the primary bottleneck. - loop_parse_if_eight_digits(p, pend, i); - - while ((p != pend) && is_integer(*p)) { - uint8_t digit = uint8_t(*p - UC('0')); - ++p; - i = i * 10 + digit; // in rare cases, this will overflow, but that's ok - } - exponent = before - p; - answer.fraction = span(before, size_t(p - before)); - digit_count -= exponent; - } - if (fmt & FASTFLOAT_JSONFMT) { - // at least 1 digit in fractional part - if (has_decimal_point && exponent == 0) { - return report_parse_error(p, - parse_error::no_digits_in_fractional_part); - } - } else if (digit_count == - 0) { // we must have encountered at least one integer! - return report_parse_error(p, parse_error::no_digits_in_mantissa); - } - int64_t exp_number = 0; // explicit exponential part - if (((fmt & chars_format::scientific) && (p != pend) && - ((UC('e') == *p) || (UC('E') == *p))) || - ((fmt & FASTFLOAT_FORTRANFMT) && (p != pend) && - ((UC('+') == *p) || (UC('-') == *p) || (UC('d') == *p) || - (UC('D') == *p)))) { - UC const *location_of_e = p; - if ((UC('e') == *p) || (UC('E') == *p) || (UC('d') == *p) || - (UC('D') == *p)) { - ++p; - } - bool neg_exp = false; - if ((p != pend) && (UC('-') == *p)) { - neg_exp = true; - ++p; - } else if ((p != pend) && - (UC('+') == - *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1) - ++p; - } - if ((p == pend) || !is_integer(*p)) { - if (!(fmt & chars_format::fixed)) { - // The exponential part is invalid for scientific notation, so it must - // be a trailing token for fixed notation. However, fixed notation is - // disabled, so report a scientific notation error. - return report_parse_error(p, parse_error::missing_exponential_part); - } - // Otherwise, we will be ignoring the 'e'. - p = location_of_e; - } else { - while ((p != pend) && is_integer(*p)) { - uint8_t digit = uint8_t(*p - UC('0')); - if (exp_number < 0x10000000) { - exp_number = 10 * exp_number + digit; - } - ++p; - } - if (neg_exp) { - exp_number = -exp_number; - } - exponent += exp_number; - } - } else { - // If it scientific and not fixed, we have to bail out. - if ((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { - return report_parse_error(p, parse_error::missing_exponential_part); - } - } - answer.lastmatch = p; - answer.valid = true; - - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon. - // - // We can deal with up to 19 digits. - if (digit_count > 19) { // this is uncommon - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - // We need to be mindful of the case where we only have zeroes... - // E.g., 0.000000000...000. - UC const *start = start_digits; - while ((start != pend) && (*start == UC('0') || *start == decimal_point)) { - if (*start == UC('0')) { - digit_count--; - } - start++; - } - - if (digit_count > 19) { - answer.too_many_digits = true; - // Let us start again, this time, avoiding overflows. - // We don't need to check if is_integer, since we use the - // pre-tokenized spans from above. - i = 0; - p = answer.integer.ptr; - UC const *int_end = p + answer.integer.len(); - const uint64_t minimal_nineteen_digit_integer{1000000000000000000}; - while ((i < minimal_nineteen_digit_integer) && (p != int_end)) { - i = i * 10 + uint64_t(*p - UC('0')); - ++p; - } - if (i >= minimal_nineteen_digit_integer) { // We have a big integers - exponent = end_of_integer_part - p + exp_number; - } else { // We have a value with a fractional component. - p = answer.fraction.ptr; - UC const *frac_end = p + answer.fraction.len(); - while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) { - i = i * 10 + uint64_t(*p - UC('0')); - ++p; - } - exponent = answer.fraction.ptr - p + exp_number; - } - // We have now corrected both exponent and i, to a truncated value - } - } - answer.exponent = exponent; - answer.mantissa = i; - return answer; -} - -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 from_chars_result_t -parse_int_string(UC const *p, UC const *pend, T &value, int base) { - from_chars_result_t answer; - - UC const *const first = p; - - bool negative = (*p == UC('-')); - if (!std::is_signed::value && negative) { - answer.ec = std::errc::invalid_argument; - answer.ptr = first; - return answer; - } -#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default - if ((*p == UC('-')) || (*p == UC('+'))) { -#else - if (*p == UC('-')) { -#endif - ++p; - } - - UC const *const start_num = p; - - while (p != pend && *p == UC('0')) { - ++p; - } - - const bool has_leading_zeros = p > start_num; - - UC const *const start_digits = p; - - uint64_t i = 0; - if (base == 10) { - loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible - } - while (p != pend) { - uint8_t digit = ch_to_digit(*p); - if (digit >= base) { - break; - } - i = uint64_t(base) * i + digit; // might overflow, check this later - p++; - } - - size_t digit_count = size_t(p - start_digits); - - if (digit_count == 0) { - if (has_leading_zeros) { - value = 0; - answer.ec = std::errc(); - answer.ptr = p; - } else { - answer.ec = std::errc::invalid_argument; - answer.ptr = first; - } - return answer; - } - - answer.ptr = p; - - // check u64 overflow - size_t max_digits = max_digits_u64(base); - if (digit_count > max_digits) { - answer.ec = std::errc::result_out_of_range; - return answer; - } - // this check can be eliminated for all other types, but they will all require - // a max_digits(base) equivalent - if (digit_count == max_digits && i < min_safe_u64(base)) { - answer.ec = std::errc::result_out_of_range; - return answer; - } - - // check other types overflow - if (!std::is_same::value) { - if (i > uint64_t(std::numeric_limits::max()) + uint64_t(negative)) { - answer.ec = std::errc::result_out_of_range; - return answer; - } - } - - if (negative) { -#ifdef FASTFLOAT_VISUAL_STUDIO -#pragma warning(push) -#pragma warning(disable : 4146) -#endif - // this weird workaround is required because: - // - converting unsigned to signed when its value is greater than signed max - // is UB pre-C++23. - // - reinterpret_casting (~i + 1) would work, but it is not constexpr - // this is always optimized into a neg instruction (note: T is an integer - // type) - value = T(-std::numeric_limits::max() - - T(i - uint64_t(std::numeric_limits::max()))); -#ifdef FASTFLOAT_VISUAL_STUDIO -#pragma warning(pop) -#endif - } else { - value = T(i); - } - - answer.ec = std::errc(); - return answer; -} - -} // namespace fast_float - -#endif - -#ifndef FASTFLOAT_FAST_TABLE_H -#define FASTFLOAT_FAST_TABLE_H - -#include - -namespace fast_float { - -/** - * When mapping numbers from decimal to binary, - * we go from w * 10^q to m * 2^p but we have - * 10^q = 5^q * 2^q, so effectively - * we are trying to match - * w * 2^q * 5^q to m * 2^p. Thus the powers of two - * are not a concern since they can be represented - * exactly using the binary notation, only the powers of five - * affect the binary significand. - */ - -/** - * The smallest non-zero float (binary64) is 2^-1074. - * We take as input numbers of the form w x 10^q where w < 2^64. - * We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076. - * However, we have that - * (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^-1074. - * Thus it is possible for a number of the form w * 10^-342 where - * w is a 64-bit value to be a non-zero floating-point number. - ********* - * Any number of form w * 10^309 where w>= 1 is going to be - * infinite in binary64 so we never need to worry about powers - * of 5 greater than 308. - */ -template struct powers_template { - - constexpr static int smallest_power_of_five = - binary_format::smallest_power_of_ten(); - constexpr static int largest_power_of_five = - binary_format::largest_power_of_ten(); - constexpr static int number_of_entries = - 2 * (largest_power_of_five - smallest_power_of_five + 1); - // Powers of five from 5^-342 all the way to 5^308 rounded toward one. - constexpr static uint64_t power_of_five_128[number_of_entries] = { - 0xeef453d6923bd65a, 0x113faa2906a13b3f, - 0x9558b4661b6565f8, 0x4ac7ca59a424c507, - 0xbaaee17fa23ebf76, 0x5d79bcf00d2df649, - 0xe95a99df8ace6f53, 0xf4d82c2c107973dc, - 0x91d8a02bb6c10594, 0x79071b9b8a4be869, - 0xb64ec836a47146f9, 0x9748e2826cdee284, - 0xe3e27a444d8d98b7, 0xfd1b1b2308169b25, - 0x8e6d8c6ab0787f72, 0xfe30f0f5e50e20f7, - 0xb208ef855c969f4f, 0xbdbd2d335e51a935, - 0xde8b2b66b3bc4723, 0xad2c788035e61382, - 0x8b16fb203055ac76, 0x4c3bcb5021afcc31, - 0xaddcb9e83c6b1793, 0xdf4abe242a1bbf3d, - 0xd953e8624b85dd78, 0xd71d6dad34a2af0d, - 0x87d4713d6f33aa6b, 0x8672648c40e5ad68, - 0xa9c98d8ccb009506, 0x680efdaf511f18c2, - 0xd43bf0effdc0ba48, 0x212bd1b2566def2, - 0x84a57695fe98746d, 0x14bb630f7604b57, - 0xa5ced43b7e3e9188, 0x419ea3bd35385e2d, - 0xcf42894a5dce35ea, 0x52064cac828675b9, - 0x818995ce7aa0e1b2, 0x7343efebd1940993, - 0xa1ebfb4219491a1f, 0x1014ebe6c5f90bf8, - 0xca66fa129f9b60a6, 0xd41a26e077774ef6, - 0xfd00b897478238d0, 0x8920b098955522b4, - 0x9e20735e8cb16382, 0x55b46e5f5d5535b0, - 0xc5a890362fddbc62, 0xeb2189f734aa831d, - 0xf712b443bbd52b7b, 0xa5e9ec7501d523e4, - 0x9a6bb0aa55653b2d, 0x47b233c92125366e, - 0xc1069cd4eabe89f8, 0x999ec0bb696e840a, - 0xf148440a256e2c76, 0xc00670ea43ca250d, - 0x96cd2a865764dbca, 0x380406926a5e5728, - 0xbc807527ed3e12bc, 0xc605083704f5ecf2, - 0xeba09271e88d976b, 0xf7864a44c633682e, - 0x93445b8731587ea3, 0x7ab3ee6afbe0211d, - 0xb8157268fdae9e4c, 0x5960ea05bad82964, - 0xe61acf033d1a45df, 0x6fb92487298e33bd, - 0x8fd0c16206306bab, 0xa5d3b6d479f8e056, - 0xb3c4f1ba87bc8696, 0x8f48a4899877186c, - 0xe0b62e2929aba83c, 0x331acdabfe94de87, - 0x8c71dcd9ba0b4925, 0x9ff0c08b7f1d0b14, - 0xaf8e5410288e1b6f, 0x7ecf0ae5ee44dd9, - 0xdb71e91432b1a24a, 0xc9e82cd9f69d6150, - 0x892731ac9faf056e, 0xbe311c083a225cd2, - 0xab70fe17c79ac6ca, 0x6dbd630a48aaf406, - 0xd64d3d9db981787d, 0x92cbbccdad5b108, - 0x85f0468293f0eb4e, 0x25bbf56008c58ea5, - 0xa76c582338ed2621, 0xaf2af2b80af6f24e, - 0xd1476e2c07286faa, 0x1af5af660db4aee1, - 0x82cca4db847945ca, 0x50d98d9fc890ed4d, - 0xa37fce126597973c, 0xe50ff107bab528a0, - 0xcc5fc196fefd7d0c, 0x1e53ed49a96272c8, - 0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7a, - 0x9faacf3df73609b1, 0x77b191618c54e9ac, - 0xc795830d75038c1d, 0xd59df5b9ef6a2417, - 0xf97ae3d0d2446f25, 0x4b0573286b44ad1d, - 0x9becce62836ac577, 0x4ee367f9430aec32, - 0xc2e801fb244576d5, 0x229c41f793cda73f, - 0xf3a20279ed56d48a, 0x6b43527578c1110f, - 0x9845418c345644d6, 0x830a13896b78aaa9, - 0xbe5691ef416bd60c, 0x23cc986bc656d553, - 0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa8, - 0x94b3a202eb1c3f39, 0x7bf7d71432f3d6a9, - 0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc53, - 0xe858ad248f5c22c9, 0xd1b3400f8f9cff68, - 0x91376c36d99995be, 0x23100809b9c21fa1, - 0xb58547448ffffb2d, 0xabd40a0c2832a78a, - 0xe2e69915b3fff9f9, 0x16c90c8f323f516c, - 0x8dd01fad907ffc3b, 0xae3da7d97f6792e3, - 0xb1442798f49ffb4a, 0x99cd11cfdf41779c, - 0xdd95317f31c7fa1d, 0x40405643d711d583, - 0x8a7d3eef7f1cfc52, 0x482835ea666b2572, - 0xad1c8eab5ee43b66, 0xda3243650005eecf, - 0xd863b256369d4a40, 0x90bed43e40076a82, - 0x873e4f75e2224e68, 0x5a7744a6e804a291, - 0xa90de3535aaae202, 0x711515d0a205cb36, - 0xd3515c2831559a83, 0xd5a5b44ca873e03, - 0x8412d9991ed58091, 0xe858790afe9486c2, - 0xa5178fff668ae0b6, 0x626e974dbe39a872, - 0xce5d73ff402d98e3, 0xfb0a3d212dc8128f, - 0x80fa687f881c7f8e, 0x7ce66634bc9d0b99, - 0xa139029f6a239f72, 0x1c1fffc1ebc44e80, - 0xc987434744ac874e, 0xa327ffb266b56220, - 0xfbe9141915d7a922, 0x4bf1ff9f0062baa8, - 0x9d71ac8fada6c9b5, 0x6f773fc3603db4a9, - 0xc4ce17b399107c22, 0xcb550fb4384d21d3, - 0xf6019da07f549b2b, 0x7e2a53a146606a48, - 0x99c102844f94e0fb, 0x2eda7444cbfc426d, - 0xc0314325637a1939, 0xfa911155fefb5308, - 0xf03d93eebc589f88, 0x793555ab7eba27ca, - 0x96267c7535b763b5, 0x4bc1558b2f3458de, - 0xbbb01b9283253ca2, 0x9eb1aaedfb016f16, - 0xea9c227723ee8bcb, 0x465e15a979c1cadc, - 0x92a1958a7675175f, 0xbfacd89ec191ec9, - 0xb749faed14125d36, 0xcef980ec671f667b, - 0xe51c79a85916f484, 0x82b7e12780e7401a, - 0x8f31cc0937ae58d2, 0xd1b2ecb8b0908810, - 0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa15, - 0xdfbdcece67006ac9, 0x67a791e093e1d49a, - 0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e0, - 0xaecc49914078536d, 0x58fae9f773886e18, - 0xda7f5bf590966848, 0xaf39a475506a899e, - 0x888f99797a5e012d, 0x6d8406c952429603, - 0xaab37fd7d8f58178, 0xc8e5087ba6d33b83, - 0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a64, - 0x855c3be0a17fcd26, 0x5cf2eea09a55067f, - 0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481e, - 0xd0601d8efc57b08b, 0xf13b94daf124da26, - 0x823c12795db6ce57, 0x76c53d08d6b70858, - 0xa2cb1717b52481ed, 0x54768c4b0c64ca6e, - 0xcb7ddcdda26da268, 0xa9942f5dcf7dfd09, - 0xfe5d54150b090b02, 0xd3f93b35435d7c4c, - 0x9efa548d26e5a6e1, 0xc47bc5014a1a6daf, - 0xc6b8e9b0709f109a, 0x359ab6419ca1091b, - 0xf867241c8cc6d4c0, 0xc30163d203c94b62, - 0x9b407691d7fc44f8, 0x79e0de63425dcf1d, - 0xc21094364dfb5636, 0x985915fc12f542e4, - 0xf294b943e17a2bc4, 0x3e6f5b7b17b2939d, - 0x979cf3ca6cec5b5a, 0xa705992ceecf9c42, - 0xbd8430bd08277231, 0x50c6ff782a838353, - 0xece53cec4a314ebd, 0xa4f8bf5635246428, - 0x940f4613ae5ed136, 0x871b7795e136be99, - 0xb913179899f68584, 0x28e2557b59846e3f, - 0xe757dd7ec07426e5, 0x331aeada2fe589cf, - 0x9096ea6f3848984f, 0x3ff0d2c85def7621, - 0xb4bca50b065abe63, 0xfed077a756b53a9, - 0xe1ebce4dc7f16dfb, 0xd3e8495912c62894, - 0x8d3360f09cf6e4bd, 0x64712dd7abbbd95c, - 0xb080392cc4349dec, 0xbd8d794d96aacfb3, - 0xdca04777f541c567, 0xecf0d7a0fc5583a0, - 0x89e42caaf9491b60, 0xf41686c49db57244, - 0xac5d37d5b79b6239, 0x311c2875c522ced5, - 0xd77485cb25823ac7, 0x7d633293366b828b, - 0x86a8d39ef77164bc, 0xae5dff9c02033197, - 0xa8530886b54dbdeb, 0xd9f57f830283fdfc, - 0xd267caa862a12d66, 0xd072df63c324fd7b, - 0x8380dea93da4bc60, 0x4247cb9e59f71e6d, - 0xa46116538d0deb78, 0x52d9be85f074e608, - 0xcd795be870516656, 0x67902e276c921f8b, - 0x806bd9714632dff6, 0xba1cd8a3db53b6, - 0xa086cfcd97bf97f3, 0x80e8a40eccd228a4, - 0xc8a883c0fdaf7df0, 0x6122cd128006b2cd, - 0xfad2a4b13d1b5d6c, 0x796b805720085f81, - 0x9cc3a6eec6311a63, 0xcbe3303674053bb0, - 0xc3f490aa77bd60fc, 0xbedbfc4411068a9c, - 0xf4f1b4d515acb93b, 0xee92fb5515482d44, - 0x991711052d8bf3c5, 0x751bdd152d4d1c4a, - 0xbf5cd54678eef0b6, 0xd262d45a78a0635d, - 0xef340a98172aace4, 0x86fb897116c87c34, - 0x9580869f0e7aac0e, 0xd45d35e6ae3d4da0, - 0xbae0a846d2195712, 0x8974836059cca109, - 0xe998d258869facd7, 0x2bd1a438703fc94b, - 0x91ff83775423cc06, 0x7b6306a34627ddcf, - 0xb67f6455292cbf08, 0x1a3bc84c17b1d542, - 0xe41f3d6a7377eeca, 0x20caba5f1d9e4a93, - 0x8e938662882af53e, 0x547eb47b7282ee9c, - 0xb23867fb2a35b28d, 0xe99e619a4f23aa43, - 0xdec681f9f4c31f31, 0x6405fa00e2ec94d4, - 0x8b3c113c38f9f37e, 0xde83bc408dd3dd04, - 0xae0b158b4738705e, 0x9624ab50b148d445, - 0xd98ddaee19068c76, 0x3badd624dd9b0957, - 0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d6, - 0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4c, - 0xd47487cc8470652b, 0x7647c3200069671f, - 0x84c8d4dfd2c63f3b, 0x29ecd9f40041e073, - 0xa5fb0a17c777cf09, 0xf468107100525890, - 0xcf79cc9db955c2cc, 0x7182148d4066eeb4, - 0x81ac1fe293d599bf, 0xc6f14cd848405530, - 0xa21727db38cb002f, 0xb8ada00e5a506a7c, - 0xca9cf1d206fdc03b, 0xa6d90811f0e4851c, - 0xfd442e4688bd304a, 0x908f4a166d1da663, - 0x9e4a9cec15763e2e, 0x9a598e4e043287fe, - 0xc5dd44271ad3cdba, 0x40eff1e1853f29fd, - 0xf7549530e188c128, 0xd12bee59e68ef47c, - 0x9a94dd3e8cf578b9, 0x82bb74f8301958ce, - 0xc13a148e3032d6e7, 0xe36a52363c1faf01, - 0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac1, - 0x96f5600f15a7b7e5, 0x29ab103a5ef8c0b9, - 0xbcb2b812db11a5de, 0x7415d448f6b6f0e7, - 0xebdf661791d60f56, 0x111b495b3464ad21, - 0x936b9fcebb25c995, 0xcab10dd900beec34, - 0xb84687c269ef3bfb, 0x3d5d514f40eea742, - 0xe65829b3046b0afa, 0xcb4a5a3112a5112, - 0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ab, - 0xb3f4e093db73a093, 0x59ed216765690f56, - 0xe0f218b8d25088b8, 0x306869c13ec3532c, - 0x8c974f7383725573, 0x1e414218c73a13fb, - 0xafbd2350644eeacf, 0xe5d1929ef90898fa, - 0xdbac6c247d62a583, 0xdf45f746b74abf39, - 0x894bc396ce5da772, 0x6b8bba8c328eb783, - 0xab9eb47c81f5114f, 0x66ea92f3f326564, - 0xd686619ba27255a2, 0xc80a537b0efefebd, - 0x8613fd0145877585, 0xbd06742ce95f5f36, - 0xa798fc4196e952e7, 0x2c48113823b73704, - 0xd17f3b51fca3a7a0, 0xf75a15862ca504c5, - 0x82ef85133de648c4, 0x9a984d73dbe722fb, - 0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebba, - 0xcc963fee10b7d1b3, 0x318df905079926a8, - 0xffbbcfe994e5c61f, 0xfdf17746497f7052, - 0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa633, - 0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc0, - 0xf9bd690a1b68637b, 0x3dfdce7aa3c673b0, - 0x9c1661a651213e2d, 0x6bea10ca65c084e, - 0xc31bfa0fe5698db8, 0x486e494fcff30a62, - 0xf3e2f893dec3f126, 0x5a89dba3c3efccfa, - 0x986ddb5c6b3a76b7, 0xf89629465a75e01c, - 0xbe89523386091465, 0xf6bbb397f1135823, - 0xee2ba6c0678b597f, 0x746aa07ded582e2c, - 0x94db483840b717ef, 0xa8c2a44eb4571cdc, - 0xba121a4650e4ddeb, 0x92f34d62616ce413, - 0xe896a0d7e51e1566, 0x77b020baf9c81d17, - 0x915e2486ef32cd60, 0xace1474dc1d122e, - 0xb5b5ada8aaff80b8, 0xd819992132456ba, - 0xe3231912d5bf60e6, 0x10e1fff697ed6c69, - 0x8df5efabc5979c8f, 0xca8d3ffa1ef463c1, - 0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb2, - 0xddd0467c64bce4a0, 0xac7cb3f6d05ddbde, - 0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96b, - 0xad4ab7112eb3929d, 0x86c16c98d2c953c6, - 0xd89d64d57a607744, 0xe871c7bf077ba8b7, - 0x87625f056c7c4a8b, 0x11471cd764ad4972, - 0xa93af6c6c79b5d2d, 0xd598e40d3dd89bcf, - 0xd389b47879823479, 0x4aff1d108d4ec2c3, - 0x843610cb4bf160cb, 0xcedf722a585139ba, - 0xa54394fe1eedb8fe, 0xc2974eb4ee658828, - 0xce947a3da6a9273e, 0x733d226229feea32, - 0x811ccc668829b887, 0x806357d5a3f525f, - 0xa163ff802a3426a8, 0xca07c2dcb0cf26f7, - 0xc9bcff6034c13052, 0xfc89b393dd02f0b5, - 0xfc2c3f3841f17c67, 0xbbac2078d443ace2, - 0x9d9ba7832936edc0, 0xd54b944b84aa4c0d, - 0xc5029163f384a931, 0xa9e795e65d4df11, - 0xf64335bcf065d37d, 0x4d4617b5ff4a16d5, - 0x99ea0196163fa42e, 0x504bced1bf8e4e45, - 0xc06481fb9bcf8d39, 0xe45ec2862f71e1d6, - 0xf07da27a82c37088, 0x5d767327bb4e5a4c, - 0x964e858c91ba2655, 0x3a6a07f8d510f86f, - 0xbbe226efb628afea, 0x890489f70a55368b, - 0xeadab0aba3b2dbe5, 0x2b45ac74ccea842e, - 0x92c8ae6b464fc96f, 0x3b0b8bc90012929d, - 0xb77ada0617e3bbcb, 0x9ce6ebb40173744, - 0xe55990879ddcaabd, 0xcc420a6a101d0515, - 0x8f57fa54c2a9eab6, 0x9fa946824a12232d, - 0xb32df8e9f3546564, 0x47939822dc96abf9, - 0xdff9772470297ebd, 0x59787e2b93bc56f7, - 0x8bfbea76c619ef36, 0x57eb4edb3c55b65a, - 0xaefae51477a06b03, 0xede622920b6b23f1, - 0xdab99e59958885c4, 0xe95fab368e45eced, - 0x88b402f7fd75539b, 0x11dbcb0218ebb414, - 0xaae103b5fcd2a881, 0xd652bdc29f26a119, - 0xd59944a37c0752a2, 0x4be76d3346f0495f, - 0x857fcae62d8493a5, 0x6f70a4400c562ddb, - 0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb952, - 0xd097ad07a71f26b2, 0x7e2000a41346a7a7, - 0x825ecc24c873782f, 0x8ed400668c0c28c8, - 0xa2f67f2dfa90563b, 0x728900802f0f32fa, - 0xcbb41ef979346bca, 0x4f2b40a03ad2ffb9, - 0xfea126b7d78186bc, 0xe2f610c84987bfa8, - 0x9f24b832e6b0f436, 0xdd9ca7d2df4d7c9, - 0xc6ede63fa05d3143, 0x91503d1c79720dbb, - 0xf8a95fcf88747d94, 0x75a44c6397ce912a, - 0x9b69dbe1b548ce7c, 0xc986afbe3ee11aba, - 0xc24452da229b021b, 0xfbe85badce996168, - 0xf2d56790ab41c2a2, 0xfae27299423fb9c3, - 0x97c560ba6b0919a5, 0xdccd879fc967d41a, - 0xbdb6b8e905cb600f, 0x5400e987bbc1c920, - 0xed246723473e3813, 0x290123e9aab23b68, - 0x9436c0760c86e30b, 0xf9a0b6720aaf6521, - 0xb94470938fa89bce, 0xf808e40e8d5b3e69, - 0xe7958cb87392c2c2, 0xb60b1d1230b20e04, - 0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c2, - 0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af3, - 0xe2280b6c20dd5232, 0x25c6da63c38de1b0, - 0x8d590723948a535f, 0x579c487e5a38ad0e, - 0xb0af48ec79ace837, 0x2d835a9df0c6d851, - 0xdcdb1b2798182244, 0xf8e431456cf88e65, - 0x8a08f0f8bf0f156b, 0x1b8e9ecb641b58ff, - 0xac8b2d36eed2dac5, 0xe272467e3d222f3f, - 0xd7adf884aa879177, 0x5b0ed81dcc6abb0f, - 0x86ccbb52ea94baea, 0x98e947129fc2b4e9, - 0xa87fea27a539e9a5, 0x3f2398d747b36224, - 0xd29fe4b18e88640e, 0x8eec7f0d19a03aad, - 0x83a3eeeef9153e89, 0x1953cf68300424ac, - 0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd7, - 0xcdb02555653131b6, 0x3792f412cb06794d, - 0x808e17555f3ebf11, 0xe2bbd88bbee40bd0, - 0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec4, - 0xc8de047564d20a8b, 0xf245825a5a445275, - 0xfb158592be068d2e, 0xeed6e2f0f0d56712, - 0x9ced737bb6c4183d, 0x55464dd69685606b, - 0xc428d05aa4751e4c, 0xaa97e14c3c26b886, - 0xf53304714d9265df, 0xd53dd99f4b3066a8, - 0x993fe2c6d07b7fab, 0xe546a8038efe4029, - 0xbf8fdb78849a5f96, 0xde98520472bdd033, - 0xef73d256a5c0f77c, 0x963e66858f6d4440, - 0x95a8637627989aad, 0xdde7001379a44aa8, - 0xbb127c53b17ec159, 0x5560c018580d5d52, - 0xe9d71b689dde71af, 0xaab8f01e6e10b4a6, - 0x9226712162ab070d, 0xcab3961304ca70e8, - 0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d22, - 0xe45c10c42a2b3b05, 0x8cb89a7db77c506a, - 0x8eb98a7a9a5b04e3, 0x77f3608e92adb242, - 0xb267ed1940f1c61c, 0x55f038b237591ed3, - 0xdf01e85f912e37a3, 0x6b6c46dec52f6688, - 0x8b61313bbabce2c6, 0x2323ac4b3b3da015, - 0xae397d8aa96c1b77, 0xabec975e0a0d081a, - 0xd9c7dced53c72255, 0x96e7bd358c904a21, - 0x881cea14545c7575, 0x7e50d64177da2e54, - 0xaa242499697392d2, 0xdde50bd1d5d0b9e9, - 0xd4ad2dbfc3d07787, 0x955e4ec64b44e864, - 0x84ec3c97da624ab4, 0xbd5af13bef0b113e, - 0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58e, - 0xcfb11ead453994ba, 0x67de18eda5814af2, - 0x81ceb32c4b43fcf4, 0x80eacf948770ced7, - 0xa2425ff75e14fc31, 0xa1258379a94d028d, - 0xcad2f7f5359a3b3e, 0x96ee45813a04330, - 0xfd87b5f28300ca0d, 0x8bca9d6e188853fc, - 0x9e74d1b791e07e48, 0x775ea264cf55347e, - 0xc612062576589dda, 0x95364afe032a819e, - 0xf79687aed3eec551, 0x3a83ddbd83f52205, - 0x9abe14cd44753b52, 0xc4926a9672793543, - 0xc16d9a0095928a27, 0x75b7053c0f178294, - 0xf1c90080baf72cb1, 0x5324c68b12dd6339, - 0x971da05074da7bee, 0xd3f6fc16ebca5e04, - 0xbce5086492111aea, 0x88f4bb1ca6bcf585, - 0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6, - 0x9392ee8e921d5d07, 0x3aff322e62439fd0, - 0xb877aa3236a4b449, 0x9befeb9fad487c3, - 0xe69594bec44de15b, 0x4c2ebe687989a9b4, - 0x901d7cf73ab0acd9, 0xf9d37014bf60a11, - 0xb424dc35095cd80f, 0x538484c19ef38c95, - 0xe12e13424bb40e13, 0x2865a5f206b06fba, - 0x8cbccc096f5088cb, 0xf93f87b7442e45d4, - 0xafebff0bcb24aafe, 0xf78f69a51539d749, - 0xdbe6fecebdedd5be, 0xb573440e5a884d1c, - 0x89705f4136b4a597, 0x31680a88f8953031, - 0xabcc77118461cefc, 0xfdc20d2b36ba7c3e, - 0xd6bf94d5e57a42bc, 0x3d32907604691b4d, - 0x8637bd05af6c69b5, 0xa63f9a49c2c1b110, - 0xa7c5ac471b478423, 0xfcf80dc33721d54, - 0xd1b71758e219652b, 0xd3c36113404ea4a9, - 0x83126e978d4fdf3b, 0x645a1cac083126ea, - 0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4, - 0xcccccccccccccccc, 0xcccccccccccccccd, - 0x8000000000000000, 0x0, - 0xa000000000000000, 0x0, - 0xc800000000000000, 0x0, - 0xfa00000000000000, 0x0, - 0x9c40000000000000, 0x0, - 0xc350000000000000, 0x0, - 0xf424000000000000, 0x0, - 0x9896800000000000, 0x0, - 0xbebc200000000000, 0x0, - 0xee6b280000000000, 0x0, - 0x9502f90000000000, 0x0, - 0xba43b74000000000, 0x0, - 0xe8d4a51000000000, 0x0, - 0x9184e72a00000000, 0x0, - 0xb5e620f480000000, 0x0, - 0xe35fa931a0000000, 0x0, - 0x8e1bc9bf04000000, 0x0, - 0xb1a2bc2ec5000000, 0x0, - 0xde0b6b3a76400000, 0x0, - 0x8ac7230489e80000, 0x0, - 0xad78ebc5ac620000, 0x0, - 0xd8d726b7177a8000, 0x0, - 0x878678326eac9000, 0x0, - 0xa968163f0a57b400, 0x0, - 0xd3c21bcecceda100, 0x0, - 0x84595161401484a0, 0x0, - 0xa56fa5b99019a5c8, 0x0, - 0xcecb8f27f4200f3a, 0x0, - 0x813f3978f8940984, 0x4000000000000000, - 0xa18f07d736b90be5, 0x5000000000000000, - 0xc9f2c9cd04674ede, 0xa400000000000000, - 0xfc6f7c4045812296, 0x4d00000000000000, - 0x9dc5ada82b70b59d, 0xf020000000000000, - 0xc5371912364ce305, 0x6c28000000000000, - 0xf684df56c3e01bc6, 0xc732000000000000, - 0x9a130b963a6c115c, 0x3c7f400000000000, - 0xc097ce7bc90715b3, 0x4b9f100000000000, - 0xf0bdc21abb48db20, 0x1e86d40000000000, - 0x96769950b50d88f4, 0x1314448000000000, - 0xbc143fa4e250eb31, 0x17d955a000000000, - 0xeb194f8e1ae525fd, 0x5dcfab0800000000, - 0x92efd1b8d0cf37be, 0x5aa1cae500000000, - 0xb7abc627050305ad, 0xf14a3d9e40000000, - 0xe596b7b0c643c719, 0x6d9ccd05d0000000, - 0x8f7e32ce7bea5c6f, 0xe4820023a2000000, - 0xb35dbf821ae4f38b, 0xdda2802c8a800000, - 0xe0352f62a19e306e, 0xd50b2037ad200000, - 0x8c213d9da502de45, 0x4526f422cc340000, - 0xaf298d050e4395d6, 0x9670b12b7f410000, - 0xdaf3f04651d47b4c, 0x3c0cdd765f114000, - 0x88d8762bf324cd0f, 0xa5880a69fb6ac800, - 0xab0e93b6efee0053, 0x8eea0d047a457a00, - 0xd5d238a4abe98068, 0x72a4904598d6d880, - 0x85a36366eb71f041, 0x47a6da2b7f864750, - 0xa70c3c40a64e6c51, 0x999090b65f67d924, - 0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d, - 0x82818f1281ed449f, 0xbff8f10e7a8921a4, - 0xa321f2d7226895c7, 0xaff72d52192b6a0d, - 0xcbea6f8ceb02bb39, 0x9bf4f8a69f764490, - 0xfee50b7025c36a08, 0x2f236d04753d5b4, - 0x9f4f2726179a2245, 0x1d762422c946590, - 0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef5, - 0xf8ebad2b84e0d58b, 0xd2e0898765a7deb2, - 0x9b934c3b330c8577, 0x63cc55f49f88eb2f, - 0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fb, - 0xf316271c7fc3908a, 0x8bef464e3945ef7a, - 0x97edd871cfda3a56, 0x97758bf0e3cbb5ac, - 0xbde94e8e43d0c8ec, 0x3d52eeed1cbea317, - 0xed63a231d4c4fb27, 0x4ca7aaa863ee4bdd, - 0x945e455f24fb1cf8, 0x8fe8caa93e74ef6a, - 0xb975d6b6ee39e436, 0xb3e2fd538e122b44, - 0xe7d34c64a9c85d44, 0x60dbbca87196b616, - 0x90e40fbeea1d3a4a, 0xbc8955e946fe31cd, - 0xb51d13aea4a488dd, 0x6babab6398bdbe41, - 0xe264589a4dcdab14, 0xc696963c7eed2dd1, - 0x8d7eb76070a08aec, 0xfc1e1de5cf543ca2, - 0xb0de65388cc8ada8, 0x3b25a55f43294bcb, - 0xdd15fe86affad912, 0x49ef0eb713f39ebe, - 0x8a2dbf142dfcc7ab, 0x6e3569326c784337, - 0xacb92ed9397bf996, 0x49c2c37f07965404, - 0xd7e77a8f87daf7fb, 0xdc33745ec97be906, - 0x86f0ac99b4e8dafd, 0x69a028bb3ded71a3, - 0xa8acd7c0222311bc, 0xc40832ea0d68ce0c, - 0xd2d80db02aabd62b, 0xf50a3fa490c30190, - 0x83c7088e1aab65db, 0x792667c6da79e0fa, - 0xa4b8cab1a1563f52, 0x577001b891185938, - 0xcde6fd5e09abcf26, 0xed4c0226b55e6f86, - 0x80b05e5ac60b6178, 0x544f8158315b05b4, - 0xa0dc75f1778e39d6, 0x696361ae3db1c721, - 0xc913936dd571c84c, 0x3bc3a19cd1e38e9, - 0xfb5878494ace3a5f, 0x4ab48a04065c723, - 0x9d174b2dcec0e47b, 0x62eb0d64283f9c76, - 0xc45d1df942711d9a, 0x3ba5d0bd324f8394, - 0xf5746577930d6500, 0xca8f44ec7ee36479, - 0x9968bf6abbe85f20, 0x7e998b13cf4e1ecb, - 0xbfc2ef456ae276e8, 0x9e3fedd8c321a67e, - 0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101e, - 0x95d04aee3b80ece5, 0xbba1f1d158724a12, - 0xbb445da9ca61281f, 0x2a8a6e45ae8edc97, - 0xea1575143cf97226, 0xf52d09d71a3293bd, - 0x924d692ca61be758, 0x593c2626705f9c56, - 0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836c, - 0xe498f455c38b997a, 0xb6dfb9c0f956447, - 0x8edf98b59a373fec, 0x4724bd4189bd5eac, - 0xb2977ee300c50fe7, 0x58edec91ec2cb657, - 0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ed, - 0x8b865b215899f46c, 0xbd79e0d20082ee74, - 0xae67f1e9aec07187, 0xecd8590680a3aa11, - 0xda01ee641a708de9, 0xe80e6f4820cc9495, - 0x884134fe908658b2, 0x3109058d147fdcdd, - 0xaa51823e34a7eede, 0xbd4b46f0599fd415, - 0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91a, - 0x850fadc09923329e, 0x3e2cf6bc604ddb0, - 0xa6539930bf6bff45, 0x84db8346b786151c, - 0xcfe87f7cef46ff16, 0xe612641865679a63, - 0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07e, - 0xa26da3999aef7749, 0xe3be5e330f38f09d, - 0xcb090c8001ab551c, 0x5cadf5bfd3072cc5, - 0xfdcb4fa002162a63, 0x73d9732fc7c8f7f6, - 0x9e9f11c4014dda7e, 0x2867e7fddcdd9afa, - 0xc646d63501a1511d, 0xb281e1fd541501b8, - 0xf7d88bc24209a565, 0x1f225a7ca91a4226, - 0x9ae757596946075f, 0x3375788de9b06958, - 0xc1a12d2fc3978937, 0x52d6b1641c83ae, - 0xf209787bb47d6b84, 0xc0678c5dbd23a49a, - 0x9745eb4d50ce6332, 0xf840b7ba963646e0, - 0xbd176620a501fbff, 0xb650e5a93bc3d898, - 0xec5d3fa8ce427aff, 0xa3e51f138ab4cebe, - 0x93ba47c980e98cdf, 0xc66f336c36b10137, - 0xb8a8d9bbe123f017, 0xb80b0047445d4184, - 0xe6d3102ad96cec1d, 0xa60dc059157491e5, - 0x9043ea1ac7e41392, 0x87c89837ad68db2f, - 0xb454e4a179dd1877, 0x29babe4598c311fb, - 0xe16a1dc9d8545e94, 0xf4296dd6fef3d67a, - 0x8ce2529e2734bb1d, 0x1899e4a65f58660c, - 0xb01ae745b101e9e4, 0x5ec05dcff72e7f8f, - 0xdc21a1171d42645d, 0x76707543f4fa1f73, - 0x899504ae72497eba, 0x6a06494a791c53a8, - 0xabfa45da0edbde69, 0x487db9d17636892, - 0xd6f8d7509292d603, 0x45a9d2845d3c42b6, - 0x865b86925b9bc5c2, 0xb8a2392ba45a9b2, - 0xa7f26836f282b732, 0x8e6cac7768d7141e, - 0xd1ef0244af2364ff, 0x3207d795430cd926, - 0x8335616aed761f1f, 0x7f44e6bd49e807b8, - 0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a6, - 0xcd036837130890a1, 0x36dba887c37a8c0f, - 0x802221226be55a64, 0xc2494954da2c9789, - 0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6c, - 0xc83553c5c8965d3d, 0x6f92829494e5acc7, - 0xfa42a8b73abbf48c, 0xcb772339ba1f17f9, - 0x9c69a97284b578d7, 0xff2a760414536efb, - 0xc38413cf25e2d70d, 0xfef5138519684aba, - 0xf46518c2ef5b8cd1, 0x7eb258665fc25d69, - 0x98bf2f79d5993802, 0xef2f773ffbd97a61, - 0xbeeefb584aff8603, 0xaafb550ffacfd8fa, - 0xeeaaba2e5dbf6784, 0x95ba2a53f983cf38, - 0x952ab45cfa97a0b2, 0xdd945a747bf26183, - 0xba756174393d88df, 0x94f971119aeef9e4, - 0xe912b9d1478ceb17, 0x7a37cd5601aab85d, - 0x91abb422ccb812ee, 0xac62e055c10ab33a, - 0xb616a12b7fe617aa, 0x577b986b314d6009, - 0xe39c49765fdf9d94, 0xed5a7e85fda0b80b, - 0x8e41ade9fbebc27d, 0x14588f13be847307, - 0xb1d219647ae6b31c, 0x596eb2d8ae258fc8, - 0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bb, - 0x8aec23d680043bee, 0x25de7bb9480d5854, - 0xada72ccc20054ae9, 0xaf561aa79a10ae6a, - 0xd910f7ff28069da4, 0x1b2ba1518094da04, - 0x87aa9aff79042286, 0x90fb44d2f05d0842, - 0xa99541bf57452b28, 0x353a1607ac744a53, - 0xd3fa922f2d1675f2, 0x42889b8997915ce8, - 0x847c9b5d7c2e09b7, 0x69956135febada11, - 0xa59bc234db398c25, 0x43fab9837e699095, - 0xcf02b2c21207ef2e, 0x94f967e45e03f4bb, - 0x8161afb94b44f57d, 0x1d1be0eebac278f5, - 0xa1ba1ba79e1632dc, 0x6462d92a69731732, - 0xca28a291859bbf93, 0x7d7b8f7503cfdcfe, - 0xfcb2cb35e702af78, 0x5cda735244c3d43e, - 0x9defbf01b061adab, 0x3a0888136afa64a7, - 0xc56baec21c7a1916, 0x88aaa1845b8fdd0, - 0xf6c69a72a3989f5b, 0x8aad549e57273d45, - 0x9a3c2087a63f6399, 0x36ac54e2f678864b, - 0xc0cb28a98fcf3c7f, 0x84576a1bb416a7dd, - 0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d5, - 0x969eb7c47859e743, 0x9f644ae5a4b1b325, - 0xbc4665b596706114, 0x873d5d9f0dde1fee, - 0xeb57ff22fc0c7959, 0xa90cb506d155a7ea, - 0x9316ff75dd87cbd8, 0x9a7f12442d588f2, - 0xb7dcbf5354e9bece, 0xc11ed6d538aeb2f, - 0xe5d3ef282a242e81, 0x8f1668c8a86da5fa, - 0x8fa475791a569d10, 0xf96e017d694487bc, - 0xb38d92d760ec4455, 0x37c981dcc395a9ac, - 0xe070f78d3927556a, 0x85bbe253f47b1417, - 0x8c469ab843b89562, 0x93956d7478ccec8e, - 0xaf58416654a6babb, 0x387ac8d1970027b2, - 0xdb2e51bfe9d0696a, 0x6997b05fcc0319e, - 0x88fcf317f22241e2, 0x441fece3bdf81f03, - 0xab3c2fddeeaad25a, 0xd527e81cad7626c3, - 0xd60b3bd56a5586f1, 0x8a71e223d8d3b074, - 0x85c7056562757456, 0xf6872d5667844e49, - 0xa738c6bebb12d16c, 0xb428f8ac016561db, - 0xd106f86e69d785c7, 0xe13336d701beba52, - 0x82a45b450226b39c, 0xecc0024661173473, - 0xa34d721642b06084, 0x27f002d7f95d0190, - 0xcc20ce9bd35c78a5, 0x31ec038df7b441f4, - 0xff290242c83396ce, 0x7e67047175a15271, - 0x9f79a169bd203e41, 0xf0062c6e984d386, - 0xc75809c42c684dd1, 0x52c07b78a3e60868, - 0xf92e0c3537826145, 0xa7709a56ccdf8a82, - 0x9bbcc7a142b17ccb, 0x88a66076400bb691, - 0xc2abf989935ddbfe, 0x6acff893d00ea435, - 0xf356f7ebf83552fe, 0x583f6b8c4124d43, - 0x98165af37b2153de, 0xc3727a337a8b704a, - 0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5c, - 0xeda2ee1c7064130c, 0x1162def06f79df73, - 0x9485d4d1c63e8be7, 0x8addcb5645ac2ba8, - 0xb9a74a0637ce2ee1, 0x6d953e2bd7173692, - 0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0437, - 0x910ab1d4db9914a0, 0x1d9c9892400a22a2, - 0xb54d5e4a127f59c8, 0x2503beb6d00cab4b, - 0xe2a0b5dc971f303a, 0x2e44ae64840fd61d, - 0x8da471a9de737e24, 0x5ceaecfed289e5d2, - 0xb10d8e1456105dad, 0x7425a83e872c5f47, - 0xdd50f1996b947518, 0xd12f124e28f77719, - 0x8a5296ffe33cc92f, 0x82bd6b70d99aaa6f, - 0xace73cbfdc0bfb7b, 0x636cc64d1001550b, - 0xd8210befd30efa5a, 0x3c47f7e05401aa4e, - 0x8714a775e3e95c78, 0x65acfaec34810a71, - 0xa8d9d1535ce3b396, 0x7f1839a741a14d0d, - 0xd31045a8341ca07c, 0x1ede48111209a050, - 0x83ea2b892091e44d, 0x934aed0aab460432, - 0xa4e4b66b68b65d60, 0xf81da84d5617853f, - 0xce1de40642e3f4b9, 0x36251260ab9d668e, - 0x80d2ae83e9ce78f3, 0xc1d72b7c6b426019, - 0xa1075a24e4421730, 0xb24cf65b8612f81f, - 0xc94930ae1d529cfc, 0xdee033f26797b627, - 0xfb9b7cd9a4a7443c, 0x169840ef017da3b1, - 0x9d412e0806e88aa5, 0x8e1f289560ee864e, - 0xc491798a08a2ad4e, 0xf1a6f2bab92a27e2, - 0xf5b5d7ec8acb58a2, 0xae10af696774b1db, - 0x9991a6f3d6bf1765, 0xacca6da1e0a8ef29, - 0xbff610b0cc6edd3f, 0x17fd090a58d32af3, - 0xeff394dcff8a948e, 0xddfc4b4cef07f5b0, - 0x95f83d0a1fb69cd9, 0x4abdaf101564f98e, - 0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f1, - 0xea53df5fd18d5513, 0x84c86189216dc5ed, - 0x92746b9be2f8552c, 0x32fd3cf5b4e49bb4, - 0xb7118682dbb66a77, 0x3fbc8c33221dc2a1, - 0xe4d5e82392a40515, 0xfabaf3feaa5334a, - 0x8f05b1163ba6832d, 0x29cb4d87f2a7400e, - 0xb2c71d5bca9023f8, 0x743e20e9ef511012, - 0xdf78e4b2bd342cf6, 0x914da9246b255416, - 0x8bab8eefb6409c1a, 0x1ad089b6c2f7548e, - 0xae9672aba3d0c320, 0xa184ac2473b529b1, - 0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741e, - 0x8865899617fb1871, 0x7e2fa67c7a658892, - 0xaa7eebfb9df9de8d, 0xddbb901b98feeab7, - 0xd51ea6fa85785631, 0x552a74227f3ea565, - 0x8533285c936b35de, 0xd53a88958f87275f, - 0xa67ff273b8460356, 0x8a892abaf368f137, - 0xd01fef10a657842c, 0x2d2b7569b0432d85, - 0x8213f56a67f6b29b, 0x9c3b29620e29fc73, - 0xa298f2c501f45f42, 0x8349f3ba91b47b8f, - 0xcb3f2f7642717713, 0x241c70a936219a73, - 0xfe0efb53d30dd4d7, 0xed238cd383aa0110, - 0x9ec95d1463e8a506, 0xf4363804324a40aa, - 0xc67bb4597ce2ce48, 0xb143c6053edcd0d5, - 0xf81aa16fdc1b81da, 0xdd94b7868e94050a, - 0x9b10a4e5e9913128, 0xca7cf2b4191c8326, - 0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f0, - 0xf24a01a73cf2dccf, 0xbc633b39673c8cec, - 0x976e41088617ca01, 0xd5be0503e085d813, - 0xbd49d14aa79dbc82, 0x4b2d8644d8a74e18, - 0xec9c459d51852ba2, 0xddf8e7d60ed1219e, - 0x93e1ab8252f33b45, 0xcabb90e5c942b503, - 0xb8da1662e7b00a17, 0x3d6a751f3b936243, - 0xe7109bfba19c0c9d, 0xcc512670a783ad4, - 0x906a617d450187e2, 0x27fb2b80668b24c5, - 0xb484f9dc9641e9da, 0xb1f9f660802dedf6, - 0xe1a63853bbd26451, 0x5e7873f8a0396973, - 0x8d07e33455637eb2, 0xdb0b487b6423e1e8, - 0xb049dc016abc5e5f, 0x91ce1a9a3d2cda62, - 0xdc5c5301c56b75f7, 0x7641a140cc7810fb, - 0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9d, - 0xac2820d9623bf429, 0x546345fa9fbdcd44, - 0xd732290fbacaf133, 0xa97c177947ad4095, - 0x867f59a9d4bed6c0, 0x49ed8eabcccc485d, - 0xa81f301449ee8c70, 0x5c68f256bfff5a74, - 0xd226fc195c6a2f8c, 0x73832eec6fff3111, - 0x83585d8fd9c25db7, 0xc831fd53c5ff7eab, - 0xa42e74f3d032f525, 0xba3e7ca8b77f5e55, - 0xcd3a1230c43fb26f, 0x28ce1bd2e55f35eb, - 0x80444b5e7aa7cf85, 0x7980d163cf5b81b3, - 0xa0555e361951c366, 0xd7e105bcc332621f, - 0xc86ab5c39fa63440, 0x8dd9472bf3fefaa7, - 0xfa856334878fc150, 0xb14f98f6f0feb951, - 0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d3, - 0xc3b8358109e84f07, 0xa862f80ec4700c8, - 0xf4a642e14c6262c8, 0xcd27bb612758c0fa, - 0x98e7e9cccfbd7dbd, 0x8038d51cb897789c, - 0xbf21e44003acdd2c, 0xe0470a63e6bd56c3, - 0xeeea5d5004981478, 0x1858ccfce06cac74, - 0x95527a5202df0ccb, 0xf37801e0c43ebc8, - 0xbaa718e68396cffd, 0xd30560258f54e6ba, - 0xe950df20247c83fd, 0x47c6b82ef32a2069, - 0x91d28b7416cdd27e, 0x4cdc331d57fa5441, - 0xb6472e511c81471d, 0xe0133fe4adf8e952, - 0xe3d8f9e563a198e5, 0x58180fddd97723a6, - 0x8e679c2f5e44ff8f, 0x570f09eaa7ea7648, - }; -}; - -template -constexpr uint64_t - powers_template::power_of_five_128[number_of_entries]; - -using powers = powers_template<>; - -} // namespace fast_float - -#endif - -#ifndef FASTFLOAT_DECIMAL_TO_BINARY_H -#define FASTFLOAT_DECIMAL_TO_BINARY_H - -#include -#include -#include -#include -#include -#include - -namespace fast_float { - -// This will compute or rather approximate w * 5**q and return a pair of 64-bit -// words approximating the result, with the "high" part corresponding to the -// most significant bits and the low part corresponding to the least significant -// bits. -// -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 value128 -compute_product_approximation(int64_t q, uint64_t w) { - const int index = 2 * int(q - powers::smallest_power_of_five); - // For small values of q, e.g., q in [0,27], the answer is always exact - // because The line value128 firstproduct = full_multiplication(w, - // power_of_five_128[index]); gives the exact answer. - value128 firstproduct = - full_multiplication(w, powers::power_of_five_128[index]); - static_assert((bit_precision >= 0) && (bit_precision <= 64), - " precision should be in (0,64]"); - constexpr uint64_t precision_mask = - (bit_precision < 64) ? (uint64_t(0xFFFFFFFFFFFFFFFF) >> bit_precision) - : uint64_t(0xFFFFFFFFFFFFFFFF); - if ((firstproduct.high & precision_mask) == - precision_mask) { // could further guard with (lower + w < lower) - // regarding the second product, we only need secondproduct.high, but our - // expectation is that the compiler will optimize this extra work away if - // needed. - value128 secondproduct = - full_multiplication(w, powers::power_of_five_128[index + 1]); - firstproduct.low += secondproduct.high; - if (secondproduct.high > firstproduct.low) { - firstproduct.high++; - } - } - return firstproduct; -} - -namespace detail { -/** - * For q in (0,350), we have that - * f = (((152170 + 65536) * q ) >> 16); - * is equal to - * floor(p) + q - * where - * p = log(5**q)/log(2) = q * log(5)/log(2) - * - * For negative values of q in (-400,0), we have that - * f = (((152170 + 65536) * q ) >> 16); - * is equal to - * -ceil(p) + q - * where - * p = log(5**-q)/log(2) = -q * log(5)/log(2) - */ -constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept { - return (((152170 + 65536) * q) >> 16) + 63; -} -} // namespace detail - -// create an adjusted mantissa, biased by the invalid power2 -// for significant digits already multiplied by 10 ** q. -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR14 adjusted_mantissa -compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept { - int hilz = int(w >> 63) ^ 1; - adjusted_mantissa answer; - answer.mantissa = w << hilz; - int bias = binary::mantissa_explicit_bits() - binary::minimum_exponent(); - answer.power2 = int32_t(detail::power(int32_t(q)) + bias - hilz - lz - 62 + - invalid_am_bias); - return answer; -} - -// w * 10 ** q, without rounding the representation up. -// the power2 in the exponent will be adjusted by invalid_am_bias. -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa -compute_error(int64_t q, uint64_t w) noexcept { - int lz = leading_zeroes(w); - w <<= lz; - value128 product = - compute_product_approximation(q, w); - return compute_error_scaled(q, product.high, lz); -} - -// w * 10 ** q -// The returned value should be a valid ieee64 number that simply need to be -// packed. However, in some very rare cases, the computation will fail. In such -// cases, we return an adjusted_mantissa with a negative power of 2: the caller -// should recompute in such cases. -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa -compute_float(int64_t q, uint64_t w) noexcept { - adjusted_mantissa answer; - if ((w == 0) || (q < binary::smallest_power_of_ten())) { - answer.power2 = 0; - answer.mantissa = 0; - // result should be zero - return answer; - } - if (q > binary::largest_power_of_ten()) { - // we want to get infinity: - answer.power2 = binary::infinite_power(); - answer.mantissa = 0; - return answer; - } - // At this point in time q is in [powers::smallest_power_of_five, - // powers::largest_power_of_five]. - - // We want the most significant bit of i to be 1. Shift if needed. - int lz = leading_zeroes(w); - w <<= lz; - - // The required precision is binary::mantissa_explicit_bits() + 3 because - // 1. We need the implicit bit - // 2. We need an extra bit for rounding purposes - // 3. We might lose a bit due to the "upperbit" routine (result too small, - // requiring a shift) - - value128 product = - compute_product_approximation(q, w); - // The computed 'product' is always sufficient. - // Mathematical proof: - // Noble Mushtak and Daniel Lemire, Fast Number Parsing Without Fallback (to - // appear) See script/mushtak_lemire.py - - // The "compute_product_approximation" function can be slightly slower than a - // branchless approach: value128 product = compute_product(q, w); but in - // practice, we can win big with the compute_product_approximation if its - // additional branch is easily predicted. Which is best is data specific. - int upperbit = int(product.high >> 63); - int shift = upperbit + 64 - binary::mantissa_explicit_bits() - 3; - - answer.mantissa = product.high >> shift; - - answer.power2 = int32_t(detail::power(int32_t(q)) + upperbit - lz - - binary::minimum_exponent()); - if (answer.power2 <= 0) { // we have a subnormal? - // Here have that answer.power2 <= 0 so -answer.power2 >= 0 - if (-answer.power2 + 1 >= - 64) { // if we have more than 64 bits below the minimum exponent, you - // have a zero for sure. - answer.power2 = 0; - answer.mantissa = 0; - // result should be zero - return answer; - } - // next line is safe because -answer.power2 + 1 < 64 - answer.mantissa >>= -answer.power2 + 1; - // Thankfully, we can't have both "round-to-even" and subnormals because - // "round-to-even" only occurs for powers close to 0. - answer.mantissa += (answer.mantissa & 1); // round up - answer.mantissa >>= 1; - // There is a weird scenario where we don't have a subnormal but just. - // Suppose we start with 2.2250738585072013e-308, we end up - // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal - // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round - // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer - // subnormal, but we can only know this after rounding. - // So we only declare a subnormal if we are smaller than the threshold. - answer.power2 = - (answer.mantissa < (uint64_t(1) << binary::mantissa_explicit_bits())) - ? 0 - : 1; - return answer; - } - - // usually, we round *up*, but if we fall right in between and and we have an - // even basis, we need to round down - // We are only concerned with the cases where 5**q fits in single 64-bit word. - if ((product.low <= 1) && (q >= binary::min_exponent_round_to_even()) && - (q <= binary::max_exponent_round_to_even()) && - ((answer.mantissa & 3) == 1)) { // we may fall between two floats! - // To be in-between two floats we need that in doing - // answer.mantissa = product.high >> (upperbit + 64 - - // binary::mantissa_explicit_bits() - 3); - // ... we dropped out only zeroes. But if this happened, then we can go - // back!!! - if ((answer.mantissa << shift) == product.high) { - answer.mantissa &= ~uint64_t(1); // flip it so that we do not round up - } - } - - answer.mantissa += (answer.mantissa & 1); // round up - answer.mantissa >>= 1; - if (answer.mantissa >= (uint64_t(2) << binary::mantissa_explicit_bits())) { - answer.mantissa = (uint64_t(1) << binary::mantissa_explicit_bits()); - answer.power2++; // undo previous addition - } - - answer.mantissa &= ~(uint64_t(1) << binary::mantissa_explicit_bits()); - if (answer.power2 >= binary::infinite_power()) { // infinity - answer.power2 = binary::infinite_power(); - answer.mantissa = 0; - } - return answer; -} - -} // namespace fast_float - -#endif - -#ifndef FASTFLOAT_BIGINT_H -#define FASTFLOAT_BIGINT_H - -#include -#include -#include -#include - - -namespace fast_float { - -// the limb width: we want efficient multiplication of double the bits in -// limb, or for 64-bit limbs, at least 64-bit multiplication where we can -// extract the high and low parts efficiently. this is every 64-bit -// architecture except for sparc, which emulates 128-bit multiplication. -// we might have platforms where `CHAR_BIT` is not 8, so let's avoid -// doing `8 * sizeof(limb)`. -#if defined(FASTFLOAT_64BIT) && !defined(__sparc) -#define FASTFLOAT_64BIT_LIMB 1 -typedef uint64_t limb; -constexpr size_t limb_bits = 64; -#else -#define FASTFLOAT_32BIT_LIMB -typedef uint32_t limb; -constexpr size_t limb_bits = 32; -#endif - -typedef span limb_span; - -// number of bits in a bigint. this needs to be at least the number -// of bits required to store the largest bigint, which is -// `log2(10**(digits + max_exp))`, or `log2(10**(767 + 342))`, or -// ~3600 bits, so we round to 4000. -constexpr size_t bigint_bits = 4000; -constexpr size_t bigint_limbs = bigint_bits / limb_bits; - -// vector-like type that is allocated on the stack. the entire -// buffer is pre-allocated, and only the length changes. -template struct stackvec { - limb data[size]; - // we never need more than 150 limbs - uint16_t length{0}; - - stackvec() = default; - stackvec(const stackvec &) = delete; - stackvec &operator=(const stackvec &) = delete; - stackvec(stackvec &&) = delete; - stackvec &operator=(stackvec &&other) = delete; - - // create stack vector from existing limb span. - FASTFLOAT_CONSTEXPR20 stackvec(limb_span s) { - FASTFLOAT_ASSERT(try_extend(s)); - } - - FASTFLOAT_CONSTEXPR14 limb &operator[](size_t index) noexcept { - FASTFLOAT_DEBUG_ASSERT(index < length); - return data[index]; - } - FASTFLOAT_CONSTEXPR14 const limb &operator[](size_t index) const noexcept { - FASTFLOAT_DEBUG_ASSERT(index < length); - return data[index]; - } - // index from the end of the container - FASTFLOAT_CONSTEXPR14 const limb &rindex(size_t index) const noexcept { - FASTFLOAT_DEBUG_ASSERT(index < length); - size_t rindex = length - index - 1; - return data[rindex]; - } - - // set the length, without bounds checking. - FASTFLOAT_CONSTEXPR14 void set_len(size_t len) noexcept { - length = uint16_t(len); - } - constexpr size_t len() const noexcept { return length; } - constexpr bool is_empty() const noexcept { return length == 0; } - constexpr size_t capacity() const noexcept { return size; } - // append item to vector, without bounds checking - FASTFLOAT_CONSTEXPR14 void push_unchecked(limb value) noexcept { - data[length] = value; - length++; - } - // append item to vector, returning if item was added - FASTFLOAT_CONSTEXPR14 bool try_push(limb value) noexcept { - if (len() < capacity()) { - push_unchecked(value); - return true; - } else { - return false; - } - } - // add items to the vector, from a span, without bounds checking - FASTFLOAT_CONSTEXPR20 void extend_unchecked(limb_span s) noexcept { - limb *ptr = data + length; - std::copy_n(s.ptr, s.len(), ptr); - set_len(len() + s.len()); - } - // try to add items to the vector, returning if items were added - FASTFLOAT_CONSTEXPR20 bool try_extend(limb_span s) noexcept { - if (len() + s.len() <= capacity()) { - extend_unchecked(s); - return true; - } else { - return false; - } - } - // resize the vector, without bounds checking - // if the new size is longer than the vector, assign value to each - // appended item. - FASTFLOAT_CONSTEXPR20 - void resize_unchecked(size_t new_len, limb value) noexcept { - if (new_len > len()) { - size_t count = new_len - len(); - limb *first = data + len(); - limb *last = first + count; - ::std::fill(first, last, value); - set_len(new_len); - } else { - set_len(new_len); - } - } - // try to resize the vector, returning if the vector was resized. - FASTFLOAT_CONSTEXPR20 bool try_resize(size_t new_len, limb value) noexcept { - if (new_len > capacity()) { - return false; - } else { - resize_unchecked(new_len, value); - return true; - } - } - // check if any limbs are non-zero after the given index. - // this needs to be done in reverse order, since the index - // is relative to the most significant limbs. - FASTFLOAT_CONSTEXPR14 bool nonzero(size_t index) const noexcept { - while (index < len()) { - if (rindex(index) != 0) { - return true; - } - index++; - } - return false; - } - // normalize the big integer, so most-significant zero limbs are removed. - FASTFLOAT_CONSTEXPR14 void normalize() noexcept { - while (len() > 0 && rindex(0) == 0) { - length--; - } - } -}; - -fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t -empty_hi64(bool &truncated) noexcept { - truncated = false; - return 0; -} - -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t -uint64_hi64(uint64_t r0, bool &truncated) noexcept { - truncated = false; - int shl = leading_zeroes(r0); - return r0 << shl; -} - -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t -uint64_hi64(uint64_t r0, uint64_t r1, bool &truncated) noexcept { - int shl = leading_zeroes(r0); - if (shl == 0) { - truncated = r1 != 0; - return r0; - } else { - int shr = 64 - shl; - truncated = (r1 << shl) != 0; - return (r0 << shl) | (r1 >> shr); - } -} - -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t -uint32_hi64(uint32_t r0, bool &truncated) noexcept { - return uint64_hi64(r0, truncated); -} - -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t -uint32_hi64(uint32_t r0, uint32_t r1, bool &truncated) noexcept { - uint64_t x0 = r0; - uint64_t x1 = r1; - return uint64_hi64((x0 << 32) | x1, truncated); -} - -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t -uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool &truncated) noexcept { - uint64_t x0 = r0; - uint64_t x1 = r1; - uint64_t x2 = r2; - return uint64_hi64(x0, (x1 << 32) | x2, truncated); -} - -// add two small integers, checking for overflow. -// we want an efficient operation. for msvc, where -// we don't have built-in intrinsics, this is still -// pretty fast. -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb -scalar_add(limb x, limb y, bool &overflow) noexcept { - limb z; -// gcc and clang -#if defined(__has_builtin) -#if __has_builtin(__builtin_add_overflow) - if (!cpp20_and_in_constexpr()) { - overflow = __builtin_add_overflow(x, y, &z); - return z; - } -#endif -#endif - - // generic, this still optimizes correctly on MSVC. - z = x + y; - overflow = z < x; - return z; -} - -// multiply two small integers, getting both the high and low bits. -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb -scalar_mul(limb x, limb y, limb &carry) noexcept { -#ifdef FASTFLOAT_64BIT_LIMB -#if defined(__SIZEOF_INT128__) - // GCC and clang both define it as an extension. - __uint128_t z = __uint128_t(x) * __uint128_t(y) + __uint128_t(carry); - carry = limb(z >> limb_bits); - return limb(z); -#else - // fallback, no native 128-bit integer multiplication with carry. - // on msvc, this optimizes identically, somehow. - value128 z = full_multiplication(x, y); - bool overflow; - z.low = scalar_add(z.low, carry, overflow); - z.high += uint64_t(overflow); // cannot overflow - carry = z.high; - return z.low; -#endif -#else - uint64_t z = uint64_t(x) * uint64_t(y) + uint64_t(carry); - carry = limb(z >> limb_bits); - return limb(z); -#endif -} - -// add scalar value to bigint starting from offset. -// used in grade school multiplication -template -inline FASTFLOAT_CONSTEXPR20 bool small_add_from(stackvec &vec, limb y, - size_t start) noexcept { - size_t index = start; - limb carry = y; - bool overflow; - while (carry != 0 && index < vec.len()) { - vec[index] = scalar_add(vec[index], carry, overflow); - carry = limb(overflow); - index += 1; - } - if (carry != 0) { - FASTFLOAT_TRY(vec.try_push(carry)); - } - return true; -} - -// add scalar value to bigint. -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool -small_add(stackvec &vec, limb y) noexcept { - return small_add_from(vec, y, 0); -} - -// multiply bigint by scalar value. -template -inline FASTFLOAT_CONSTEXPR20 bool small_mul(stackvec &vec, - limb y) noexcept { - limb carry = 0; - for (size_t index = 0; index < vec.len(); index++) { - vec[index] = scalar_mul(vec[index], y, carry); - } - if (carry != 0) { - FASTFLOAT_TRY(vec.try_push(carry)); - } - return true; -} - -// add bigint to bigint starting from index. -// used in grade school multiplication -template -FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec &x, limb_span y, - size_t start) noexcept { - // the effective x buffer is from `xstart..x.len()`, so exit early - // if we can't get that current range. - if (x.len() < start || y.len() > x.len() - start) { - FASTFLOAT_TRY(x.try_resize(y.len() + start, 0)); - } - - bool carry = false; - for (size_t index = 0; index < y.len(); index++) { - limb xi = x[index + start]; - limb yi = y[index]; - bool c1 = false; - bool c2 = false; - xi = scalar_add(xi, yi, c1); - if (carry) { - xi = scalar_add(xi, 1, c2); - } - x[index + start] = xi; - carry = c1 | c2; - } - - // handle overflow - if (carry) { - FASTFLOAT_TRY(small_add_from(x, 1, y.len() + start)); - } - return true; -} - -// add bigint to bigint. -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool -large_add_from(stackvec &x, limb_span y) noexcept { - return large_add_from(x, y, 0); -} - -// grade-school multiplication algorithm -template -FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec &x, limb_span y) noexcept { - limb_span xs = limb_span(x.data, x.len()); - stackvec z(xs); - limb_span zs = limb_span(z.data, z.len()); - - if (y.len() != 0) { - limb y0 = y[0]; - FASTFLOAT_TRY(small_mul(x, y0)); - for (size_t index = 1; index < y.len(); index++) { - limb yi = y[index]; - stackvec zi; - if (yi != 0) { - // re-use the same buffer throughout - zi.set_len(0); - FASTFLOAT_TRY(zi.try_extend(zs)); - FASTFLOAT_TRY(small_mul(zi, yi)); - limb_span zis = limb_span(zi.data, zi.len()); - FASTFLOAT_TRY(large_add_from(x, zis, index)); - } - } - } - - x.normalize(); - return true; -} - -// grade-school multiplication algorithm -template -FASTFLOAT_CONSTEXPR20 bool large_mul(stackvec &x, limb_span y) noexcept { - if (y.len() == 1) { - FASTFLOAT_TRY(small_mul(x, y[0])); - } else { - FASTFLOAT_TRY(long_mul(x, y)); - } - return true; -} - -template struct pow5_tables { - static constexpr uint32_t large_step = 135; - static constexpr uint64_t small_power_of_5[] = { - 1UL, - 5UL, - 25UL, - 125UL, - 625UL, - 3125UL, - 15625UL, - 78125UL, - 390625UL, - 1953125UL, - 9765625UL, - 48828125UL, - 244140625UL, - 1220703125UL, - 6103515625UL, - 30517578125UL, - 152587890625UL, - 762939453125UL, - 3814697265625UL, - 19073486328125UL, - 95367431640625UL, - 476837158203125UL, - 2384185791015625UL, - 11920928955078125UL, - 59604644775390625UL, - 298023223876953125UL, - 1490116119384765625UL, - 7450580596923828125UL, - }; -#ifdef FASTFLOAT_64BIT_LIMB - constexpr static limb large_power_of_5[] = { - 1414648277510068013UL, 9180637584431281687UL, 4539964771860779200UL, - 10482974169319127550UL, 198276706040285095UL}; -#else - constexpr static limb large_power_of_5[] = { - 4279965485U, 329373468U, 4020270615U, 2137533757U, 4287402176U, - 1057042919U, 1071430142U, 2440757623U, 381945767U, 46164893U}; -#endif -}; - -template constexpr uint32_t pow5_tables::large_step; - -template constexpr uint64_t pow5_tables::small_power_of_5[]; - -template constexpr limb pow5_tables::large_power_of_5[]; - -// big integer type. implements a small subset of big integer -// arithmetic, using simple algorithms since asymptotically -// faster algorithms are slower for a small number of limbs. -// all operations assume the big-integer is normalized. -struct bigint : pow5_tables<> { - // storage of the limbs, in little-endian order. - stackvec vec; - - FASTFLOAT_CONSTEXPR20 bigint() : vec() {} - bigint(const bigint &) = delete; - bigint &operator=(const bigint &) = delete; - bigint(bigint &&) = delete; - bigint &operator=(bigint &&other) = delete; - - FASTFLOAT_CONSTEXPR20 bigint(uint64_t value) : vec() { -#ifdef FASTFLOAT_64BIT_LIMB - vec.push_unchecked(value); -#else - vec.push_unchecked(uint32_t(value)); - vec.push_unchecked(uint32_t(value >> 32)); -#endif - vec.normalize(); - } - - // get the high 64 bits from the vector, and if bits were truncated. - // this is to get the significant digits for the float. - FASTFLOAT_CONSTEXPR20 uint64_t hi64(bool &truncated) const noexcept { -#ifdef FASTFLOAT_64BIT_LIMB - if (vec.len() == 0) { - return empty_hi64(truncated); - } else if (vec.len() == 1) { - return uint64_hi64(vec.rindex(0), truncated); - } else { - uint64_t result = uint64_hi64(vec.rindex(0), vec.rindex(1), truncated); - truncated |= vec.nonzero(2); - return result; - } -#else - if (vec.len() == 0) { - return empty_hi64(truncated); - } else if (vec.len() == 1) { - return uint32_hi64(vec.rindex(0), truncated); - } else if (vec.len() == 2) { - return uint32_hi64(vec.rindex(0), vec.rindex(1), truncated); - } else { - uint64_t result = - uint32_hi64(vec.rindex(0), vec.rindex(1), vec.rindex(2), truncated); - truncated |= vec.nonzero(3); - return result; - } -#endif - } - - // compare two big integers, returning the large value. - // assumes both are normalized. if the return value is - // negative, other is larger, if the return value is - // positive, this is larger, otherwise they are equal. - // the limbs are stored in little-endian order, so we - // must compare the limbs in ever order. - FASTFLOAT_CONSTEXPR20 int compare(const bigint &other) const noexcept { - if (vec.len() > other.vec.len()) { - return 1; - } else if (vec.len() < other.vec.len()) { - return -1; - } else { - for (size_t index = vec.len(); index > 0; index--) { - limb xi = vec[index - 1]; - limb yi = other.vec[index - 1]; - if (xi > yi) { - return 1; - } else if (xi < yi) { - return -1; - } - } - return 0; - } - } - - // shift left each limb n bits, carrying over to the new limb - // returns true if we were able to shift all the digits. - FASTFLOAT_CONSTEXPR20 bool shl_bits(size_t n) noexcept { - // Internally, for each item, we shift left by n, and add the previous - // right shifted limb-bits. - // For example, we transform (for u8) shifted left 2, to: - // b10100100 b01000010 - // b10 b10010001 b00001000 - FASTFLOAT_DEBUG_ASSERT(n != 0); - FASTFLOAT_DEBUG_ASSERT(n < sizeof(limb) * 8); - - size_t shl = n; - size_t shr = limb_bits - shl; - limb prev = 0; - for (size_t index = 0; index < vec.len(); index++) { - limb xi = vec[index]; - vec[index] = (xi << shl) | (prev >> shr); - prev = xi; - } - - limb carry = prev >> shr; - if (carry != 0) { - return vec.try_push(carry); - } - return true; - } - - // move the limbs left by `n` limbs. - FASTFLOAT_CONSTEXPR20 bool shl_limbs(size_t n) noexcept { - FASTFLOAT_DEBUG_ASSERT(n != 0); - if (n + vec.len() > vec.capacity()) { - return false; - } else if (!vec.is_empty()) { - // move limbs - limb *dst = vec.data + n; - const limb *src = vec.data; - std::copy_backward(src, src + vec.len(), dst + vec.len()); - // fill in empty limbs - limb *first = vec.data; - limb *last = first + n; - ::std::fill(first, last, 0); - vec.set_len(n + vec.len()); - return true; - } else { - return true; - } - } - - // move the limbs left by `n` bits. - FASTFLOAT_CONSTEXPR20 bool shl(size_t n) noexcept { - size_t rem = n % limb_bits; - size_t div = n / limb_bits; - if (rem != 0) { - FASTFLOAT_TRY(shl_bits(rem)); - } - if (div != 0) { - FASTFLOAT_TRY(shl_limbs(div)); - } - return true; - } - - // get the number of leading zeros in the bigint. - FASTFLOAT_CONSTEXPR20 int ctlz() const noexcept { - if (vec.is_empty()) { - return 0; - } else { -#ifdef FASTFLOAT_64BIT_LIMB - return leading_zeroes(vec.rindex(0)); -#else - // no use defining a specialized leading_zeroes for a 32-bit type. - uint64_t r0 = vec.rindex(0); - return leading_zeroes(r0 << 32); -#endif - } - } - - // get the number of bits in the bigint. - FASTFLOAT_CONSTEXPR20 int bit_length() const noexcept { - int lz = ctlz(); - return int(limb_bits * vec.len()) - lz; - } - - FASTFLOAT_CONSTEXPR20 bool mul(limb y) noexcept { return small_mul(vec, y); } - - FASTFLOAT_CONSTEXPR20 bool add(limb y) noexcept { return small_add(vec, y); } - - // multiply as if by 2 raised to a power. - FASTFLOAT_CONSTEXPR20 bool pow2(uint32_t exp) noexcept { return shl(exp); } - - // multiply as if by 5 raised to a power. - FASTFLOAT_CONSTEXPR20 bool pow5(uint32_t exp) noexcept { - // multiply by a power of 5 - size_t large_length = sizeof(large_power_of_5) / sizeof(limb); - limb_span large = limb_span(large_power_of_5, large_length); - while (exp >= large_step) { - FASTFLOAT_TRY(large_mul(vec, large)); - exp -= large_step; - } -#ifdef FASTFLOAT_64BIT_LIMB - uint32_t small_step = 27; - limb max_native = 7450580596923828125UL; -#else - uint32_t small_step = 13; - limb max_native = 1220703125U; -#endif - while (exp >= small_step) { - FASTFLOAT_TRY(small_mul(vec, max_native)); - exp -= small_step; - } - if (exp != 0) { - // Work around clang bug https://godbolt.org/z/zedh7rrhc - // This is similar to https://github.com/llvm/llvm-project/issues/47746, - // except the workaround described there don't work here - FASTFLOAT_TRY(small_mul( - vec, limb(((void)small_power_of_5[0], small_power_of_5[exp])))); - } - - return true; - } - - // multiply as if by 10 raised to a power. - FASTFLOAT_CONSTEXPR20 bool pow10(uint32_t exp) noexcept { - FASTFLOAT_TRY(pow5(exp)); - return pow2(exp); - } -}; - -} // namespace fast_float - -#endif - -#ifndef FASTFLOAT_DIGIT_COMPARISON_H -#define FASTFLOAT_DIGIT_COMPARISON_H - -#include -#include -#include -#include - - -namespace fast_float { - -// 1e0 to 1e19 -constexpr static uint64_t powers_of_ten_uint64[] = {1UL, - 10UL, - 100UL, - 1000UL, - 10000UL, - 100000UL, - 1000000UL, - 10000000UL, - 100000000UL, - 1000000000UL, - 10000000000UL, - 100000000000UL, - 1000000000000UL, - 10000000000000UL, - 100000000000000UL, - 1000000000000000UL, - 10000000000000000UL, - 100000000000000000UL, - 1000000000000000000UL, - 10000000000000000000UL}; - -// calculate the exponent, in scientific notation, of the number. -// this algorithm is not even close to optimized, but it has no practical -// effect on performance: in order to have a faster algorithm, we'd need -// to slow down performance for faster algorithms, and this is still fast. -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int32_t -scientific_exponent(parsed_number_string_t &num) noexcept { - uint64_t mantissa = num.mantissa; - int32_t exponent = int32_t(num.exponent); - while (mantissa >= 10000) { - mantissa /= 10000; - exponent += 4; - } - while (mantissa >= 100) { - mantissa /= 100; - exponent += 2; - } - while (mantissa >= 10) { - mantissa /= 10; - exponent += 1; - } - return exponent; -} - -// this converts a native floating-point number to an extended-precision float. -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa -to_extended(T value) noexcept { - using equiv_uint = typename binary_format::equiv_uint; - constexpr equiv_uint exponent_mask = binary_format::exponent_mask(); - constexpr equiv_uint mantissa_mask = binary_format::mantissa_mask(); - constexpr equiv_uint hidden_bit_mask = binary_format::hidden_bit_mask(); - - adjusted_mantissa am; - int32_t bias = binary_format::mantissa_explicit_bits() - - binary_format::minimum_exponent(); - equiv_uint bits; -#if FASTFLOAT_HAS_BIT_CAST - bits = std::bit_cast(value); -#else - ::memcpy(&bits, &value, sizeof(T)); -#endif - if ((bits & exponent_mask) == 0) { - // denormal - am.power2 = 1 - bias; - am.mantissa = bits & mantissa_mask; - } else { - // normal - am.power2 = int32_t((bits & exponent_mask) >> - binary_format::mantissa_explicit_bits()); - am.power2 -= bias; - am.mantissa = (bits & mantissa_mask) | hidden_bit_mask; - } - - return am; -} - -// get the extended precision value of the halfway point between b and b+u. -// we are given a native float that represents b, so we need to adjust it -// halfway between b and b+u. -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa -to_extended_halfway(T value) noexcept { - adjusted_mantissa am = to_extended(value); - am.mantissa <<= 1; - am.mantissa += 1; - am.power2 -= 1; - return am; -} - -// round an extended-precision float to the nearest machine float. -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am, - callback cb) noexcept { - int32_t mantissa_shift = 64 - binary_format::mantissa_explicit_bits() - 1; - if (-am.power2 >= mantissa_shift) { - // have a denormal float - int32_t shift = -am.power2 + 1; - cb(am, std::min(shift, 64)); - // check for round-up: if rounding-nearest carried us to the hidden bit. - am.power2 = (am.mantissa < - (uint64_t(1) << binary_format::mantissa_explicit_bits())) - ? 0 - : 1; - return; - } - - // have a normal float, use the default shift. - cb(am, mantissa_shift); - - // check for carry - if (am.mantissa >= - (uint64_t(2) << binary_format::mantissa_explicit_bits())) { - am.mantissa = (uint64_t(1) << binary_format::mantissa_explicit_bits()); - am.power2++; - } - - // check for infinite: we could have carried to an infinite power - am.mantissa &= ~(uint64_t(1) << binary_format::mantissa_explicit_bits()); - if (am.power2 >= binary_format::infinite_power()) { - am.power2 = binary_format::infinite_power(); - am.mantissa = 0; - } -} - -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void -round_nearest_tie_even(adjusted_mantissa &am, int32_t shift, - callback cb) noexcept { - const uint64_t mask = (shift == 64) ? UINT64_MAX : (uint64_t(1) << shift) - 1; - const uint64_t halfway = (shift == 0) ? 0 : uint64_t(1) << (shift - 1); - uint64_t truncated_bits = am.mantissa & mask; - bool is_above = truncated_bits > halfway; - bool is_halfway = truncated_bits == halfway; - - // shift digits into position - if (shift == 64) { - am.mantissa = 0; - } else { - am.mantissa >>= shift; - } - am.power2 += shift; - - bool is_odd = (am.mantissa & 1) == 1; - am.mantissa += uint64_t(cb(is_odd, is_halfway, is_above)); -} - -fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void -round_down(adjusted_mantissa &am, int32_t shift) noexcept { - if (shift == 64) { - am.mantissa = 0; - } else { - am.mantissa >>= shift; - } - am.power2 += shift; -} -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void -skip_zeros(UC const *&first, UC const *last) noexcept { - uint64_t val; - while (!cpp20_and_in_constexpr() && - std::distance(first, last) >= int_cmp_len()) { - ::memcpy(&val, first, sizeof(uint64_t)); - if (val != int_cmp_zeros()) { - break; - } - first += int_cmp_len(); - } - while (first != last) { - if (*first != UC('0')) { - break; - } - first++; - } -} - -// determine if any non-zero digits were truncated. -// all characters must be valid digits. -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool -is_truncated(UC const *first, UC const *last) noexcept { - // do 8-bit optimizations, can just compare to 8 literal 0s. - uint64_t val; - while (!cpp20_and_in_constexpr() && - std::distance(first, last) >= int_cmp_len()) { - ::memcpy(&val, first, sizeof(uint64_t)); - if (val != int_cmp_zeros()) { - return true; - } - first += int_cmp_len(); - } - while (first != last) { - if (*first != UC('0')) { - return true; - } - ++first; - } - return false; -} -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool -is_truncated(span s) noexcept { - return is_truncated(s.ptr, s.ptr + s.len()); -} - -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void -parse_eight_digits(const UC *&p, limb &value, size_t &counter, - size_t &count) noexcept { - value = value * 100000000 + parse_eight_digits_unrolled(p); - p += 8; - counter += 8; - count += 8; -} - -template -fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void -parse_one_digit(UC const *&p, limb &value, size_t &counter, - size_t &count) noexcept { - value = value * 10 + limb(*p - UC('0')); - p++; - counter++; - count++; -} - -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void -add_native(bigint &big, limb power, limb value) noexcept { - big.mul(power); - big.add(value); -} - -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void -round_up_bigint(bigint &big, size_t &count) noexcept { - // need to round-up the digits, but need to avoid rounding - // ....9999 to ...10000, which could cause a false halfway point. - add_native(big, 10, 1); - count++; -} - -// parse the significant digits into a big integer -template -inline FASTFLOAT_CONSTEXPR20 void -parse_mantissa(bigint &result, parsed_number_string_t &num, - size_t max_digits, size_t &digits) noexcept { - // try to minimize the number of big integer and scalar multiplication. - // therefore, try to parse 8 digits at a time, and multiply by the largest - // scalar value (9 or 19 digits) for each step. - size_t counter = 0; - digits = 0; - limb value = 0; -#ifdef FASTFLOAT_64BIT_LIMB - size_t step = 19; -#else - size_t step = 9; -#endif - - // process all integer digits. - UC const *p = num.integer.ptr; - UC const *pend = p + num.integer.len(); - skip_zeros(p, pend); - // process all digits, in increments of step per loop - while (p != pend) { - while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && - (max_digits - digits >= 8)) { - parse_eight_digits(p, value, counter, digits); - } - while (counter < step && p != pend && digits < max_digits) { - parse_one_digit(p, value, counter, digits); - } - if (digits == max_digits) { - // add the temporary value, then check if we've truncated any digits - add_native(result, limb(powers_of_ten_uint64[counter]), value); - bool truncated = is_truncated(p, pend); - if (num.fraction.ptr != nullptr) { - truncated |= is_truncated(num.fraction); - } - if (truncated) { - round_up_bigint(result, digits); - } - return; - } else { - add_native(result, limb(powers_of_ten_uint64[counter]), value); - counter = 0; - value = 0; - } - } - - // add our fraction digits, if they're available. - if (num.fraction.ptr != nullptr) { - p = num.fraction.ptr; - pend = p + num.fraction.len(); - if (digits == 0) { - skip_zeros(p, pend); - } - // process all digits, in increments of step per loop - while (p != pend) { - while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && - (max_digits - digits >= 8)) { - parse_eight_digits(p, value, counter, digits); - } - while (counter < step && p != pend && digits < max_digits) { - parse_one_digit(p, value, counter, digits); - } - if (digits == max_digits) { - // add the temporary value, then check if we've truncated any digits - add_native(result, limb(powers_of_ten_uint64[counter]), value); - bool truncated = is_truncated(p, pend); - if (truncated) { - round_up_bigint(result, digits); - } - return; - } else { - add_native(result, limb(powers_of_ten_uint64[counter]), value); - counter = 0; - value = 0; - } - } - } - - if (counter != 0) { - add_native(result, limb(powers_of_ten_uint64[counter]), value); - } -} - -template -inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa -positive_digit_comp(bigint &bigmant, int32_t exponent) noexcept { - FASTFLOAT_ASSERT(bigmant.pow10(uint32_t(exponent))); - adjusted_mantissa answer; - bool truncated; - answer.mantissa = bigmant.hi64(truncated); - int bias = binary_format::mantissa_explicit_bits() - - binary_format::minimum_exponent(); - answer.power2 = bigmant.bit_length() - 64 + bias; - - round(answer, [truncated](adjusted_mantissa &a, int32_t shift) { - round_nearest_tie_even( - a, shift, - [truncated](bool is_odd, bool is_halfway, bool is_above) -> bool { - return is_above || (is_halfway && truncated) || - (is_odd && is_halfway); - }); - }); - - return answer; -} - -// the scaling here is quite simple: we have, for the real digits `m * 10^e`, -// and for the theoretical digits `n * 2^f`. Since `e` is always negative, -// to scale them identically, we do `n * 2^f * 5^-f`, so we now have `m * 2^e`. -// we then need to scale by `2^(f- e)`, and then the two significant digits -// are of the same magnitude. -template -inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp( - bigint &bigmant, adjusted_mantissa am, int32_t exponent) noexcept { - bigint &real_digits = bigmant; - int32_t real_exp = exponent; - - // get the value of `b`, rounded down, and get a bigint representation of b+h - adjusted_mantissa am_b = am; - // gcc7 buf: use a lambda to remove the noexcept qualifier bug with - // -Wnoexcept-type. - round(am_b, - [](adjusted_mantissa &a, int32_t shift) { round_down(a, shift); }); - T b; - to_float(false, am_b, b); - adjusted_mantissa theor = to_extended_halfway(b); - bigint theor_digits(theor.mantissa); - int32_t theor_exp = theor.power2; - - // scale real digits and theor digits to be same power. - int32_t pow2_exp = theor_exp - real_exp; - uint32_t pow5_exp = uint32_t(-real_exp); - if (pow5_exp != 0) { - FASTFLOAT_ASSERT(theor_digits.pow5(pow5_exp)); - } - if (pow2_exp > 0) { - FASTFLOAT_ASSERT(theor_digits.pow2(uint32_t(pow2_exp))); - } else if (pow2_exp < 0) { - FASTFLOAT_ASSERT(real_digits.pow2(uint32_t(-pow2_exp))); - } - - // compare digits, and use it to director rounding - int ord = real_digits.compare(theor_digits); - adjusted_mantissa answer = am; - round(answer, [ord](adjusted_mantissa &a, int32_t shift) { - round_nearest_tie_even( - a, shift, [ord](bool is_odd, bool _, bool __) -> bool { - (void)_; // not needed, since we've done our comparison - (void)__; // not needed, since we've done our comparison - if (ord > 0) { - return true; - } else if (ord < 0) { - return false; - } else { - return is_odd; - } - }); - }); - - return answer; -} - -// parse the significant digits as a big integer to unambiguously round the -// the significant digits. here, we are trying to determine how to round -// an extended float representation close to `b+h`, halfway between `b` -// (the float rounded-down) and `b+u`, the next positive float. this -// algorithm is always correct, and uses one of two approaches. when -// the exponent is positive relative to the significant digits (such as -// 1234), we create a big-integer representation, get the high 64-bits, -// determine if any lower bits are truncated, and use that to direct -// rounding. in case of a negative exponent relative to the significant -// digits (such as 1.2345), we create a theoretical representation of -// `b` as a big-integer type, scaled to the same binary exponent as -// the actual digits. we then compare the big integer representations -// of both, and use that to direct rounding. -template -inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa -digit_comp(parsed_number_string_t &num, adjusted_mantissa am) noexcept { - // remove the invalid exponent bias - am.power2 -= invalid_am_bias; - - int32_t sci_exp = scientific_exponent(num); - size_t max_digits = binary_format::max_digits(); - size_t digits = 0; - bigint bigmant; - parse_mantissa(bigmant, num, max_digits, digits); - // can't underflow, since digits is at most max_digits. - int32_t exponent = sci_exp + 1 - int32_t(digits); - if (exponent >= 0) { - return positive_digit_comp(bigmant, exponent); - } else { - return negative_digit_comp(bigmant, am, exponent); - } -} - -} // namespace fast_float - -#endif - -#ifndef FASTFLOAT_PARSE_NUMBER_H -#define FASTFLOAT_PARSE_NUMBER_H - - -#include -#include -#include -#include -namespace fast_float { - -namespace detail { -/** - * Special case +inf, -inf, nan, infinity, -infinity. - * The case comparisons could be made much faster given that we know that the - * strings a null-free and fixed. - **/ -template -from_chars_result_t FASTFLOAT_CONSTEXPR14 parse_infnan(UC const *first, - UC const *last, - T &value) noexcept { - from_chars_result_t answer{}; - answer.ptr = first; - answer.ec = std::errc(); // be optimistic - bool minusSign = false; - if (*first == - UC('-')) { // assume first < last, so dereference without checks; - // C++17 20.19.3.(7.1) explicitly forbids '+' here - minusSign = true; - ++first; - } -#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default - if (*first == UC('+')) { - ++first; - } -#endif - if (last - first >= 3) { - if (fastfloat_strncasecmp(first, str_const_nan(), 3)) { - answer.ptr = (first += 3); - value = minusSign ? -std::numeric_limits::quiet_NaN() - : std::numeric_limits::quiet_NaN(); - // Check for possible nan(n-char-seq-opt), C++17 20.19.3.7, - // C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan). - if (first != last && *first == UC('(')) { - for (UC const *ptr = first + 1; ptr != last; ++ptr) { - if (*ptr == UC(')')) { - answer.ptr = ptr + 1; // valid nan(n-char-seq-opt) - break; - } else if (!((UC('a') <= *ptr && *ptr <= UC('z')) || - (UC('A') <= *ptr && *ptr <= UC('Z')) || - (UC('0') <= *ptr && *ptr <= UC('9')) || *ptr == UC('_'))) - break; // forbidden char, not nan(n-char-seq-opt) - } - } - return answer; - } - if (fastfloat_strncasecmp(first, str_const_inf(), 3)) { - if ((last - first >= 8) && - fastfloat_strncasecmp(first + 3, str_const_inf() + 3, 5)) { - answer.ptr = first + 8; - } else { - answer.ptr = first + 3; - } - value = minusSign ? -std::numeric_limits::infinity() - : std::numeric_limits::infinity(); - return answer; - } - } - answer.ec = std::errc::invalid_argument; - return answer; -} - -/** - * Returns true if the floating-pointing rounding mode is to 'nearest'. - * It is the default on most system. This function is meant to be inexpensive. - * Credit : @mwalcott3 - */ -fastfloat_really_inline bool rounds_to_nearest() noexcept { - // https://lemire.me/blog/2020/06/26/gcc-not-nearest/ -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - return false; -#endif - // See - // A fast function to check your floating-point rounding mode - // https://lemire.me/blog/2022/11/16/a-fast-function-to-check-your-floating-point-rounding-mode/ - // - // This function is meant to be equivalent to : - // prior: #include - // return fegetround() == FE_TONEAREST; - // However, it is expected to be much faster than the fegetround() - // function call. - // - // The volatile keywoard prevents the compiler from computing the function - // at compile-time. - // There might be other ways to prevent compile-time optimizations (e.g., - // asm). The value does not need to be std::numeric_limits::min(), any - // small value so that 1 + x should round to 1 would do (after accounting for - // excess precision, as in 387 instructions). - static volatile float fmin = std::numeric_limits::min(); - float fmini = fmin; // we copy it so that it gets loaded at most once. -// -// Explanation: -// Only when fegetround() == FE_TONEAREST do we have that -// fmin + 1.0f == 1.0f - fmin. -// -// FE_UPWARD: -// fmin + 1.0f > 1 -// 1.0f - fmin == 1 -// -// FE_DOWNWARD or FE_TOWARDZERO: -// fmin + 1.0f == 1 -// 1.0f - fmin < 1 -// -// Note: This may fail to be accurate if fast-math has been -// enabled, as rounding conventions may not apply. -#ifdef FASTFLOAT_VISUAL_STUDIO -#pragma warning(push) -// todo: is there a VS warning? -// see -// https://stackoverflow.com/questions/46079446/is-there-a-warning-for-floating-point-equality-checking-in-visual-studio-2013 -#elif defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" -#elif defined(__GNUC__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wfloat-equal" -#endif - return (fmini + 1.0f == 1.0f - fmini); -#ifdef FASTFLOAT_VISUAL_STUDIO -#pragma warning(pop) -#elif defined(__clang__) -#pragma clang diagnostic pop -#elif defined(__GNUC__) -#pragma GCC diagnostic pop -#endif -} - -} // namespace detail - -template struct from_chars_caller { - template - FASTFLOAT_CONSTEXPR20 static from_chars_result_t - call(UC const *first, UC const *last, T &value, - parse_options_t options) noexcept { - return from_chars_advanced(first, last, value, options); - } -}; - -#if __STDCPP_FLOAT32_T__ == 1 -template <> struct from_chars_caller { - template - FASTFLOAT_CONSTEXPR20 static from_chars_result_t - call(UC const *first, UC const *last, std::float32_t &value, - parse_options_t options) noexcept { - // if std::float32_t is defined, and we are in C++23 mode; macro set for - // float32; set value to float due to equivalence between float and - // float32_t - float val; - auto ret = from_chars_advanced(first, last, val, options); - value = val; - return ret; - } -}; -#endif - -#if __STDCPP_FLOAT64_T__ == 1 -template <> struct from_chars_caller { - template - FASTFLOAT_CONSTEXPR20 static from_chars_result_t - call(UC const *first, UC const *last, std::float64_t &value, - parse_options_t options) noexcept { - // if std::float64_t is defined, and we are in C++23 mode; macro set for - // float64; set value as double due to equivalence between double and - // float64_t - double val; - auto ret = from_chars_advanced(first, last, val, options); - value = val; - return ret; - } -}; -#endif - -template -FASTFLOAT_CONSTEXPR20 from_chars_result_t -from_chars(UC const *first, UC const *last, T &value, - chars_format fmt /*= chars_format::general*/) noexcept { - return from_chars_caller::call(first, last, value, - parse_options_t(fmt)); -} - -/** - * This function overload takes parsed_number_string_t structure that is created - * and populated either by from_chars_advanced function taking chars range and - * parsing options or other parsing custom function implemented by user. - */ -template -FASTFLOAT_CONSTEXPR20 from_chars_result_t -from_chars_advanced(parsed_number_string_t &pns, T &value) noexcept { - - static_assert(is_supported_float_type(), - "only some floating-point types are supported"); - static_assert(is_supported_char_type(), - "only char, wchar_t, char16_t and char32_t are supported"); - - from_chars_result_t answer; - - answer.ec = std::errc(); // be optimistic - answer.ptr = pns.lastmatch; - // The implementation of the Clinger's fast path is convoluted because - // we want round-to-nearest in all cases, irrespective of the rounding mode - // selected on the thread. - // We proceed optimistically, assuming that detail::rounds_to_nearest() - // returns true. - if (binary_format::min_exponent_fast_path() <= pns.exponent && - pns.exponent <= binary_format::max_exponent_fast_path() && - !pns.too_many_digits) { - // Unfortunately, the conventional Clinger's fast path is only possible - // when the system rounds to the nearest float. - // - // We expect the next branch to almost always be selected. - // We could check it first (before the previous branch), but - // there might be performance advantages at having the check - // be last. - if (!cpp20_and_in_constexpr() && detail::rounds_to_nearest()) { - // We have that fegetround() == FE_TONEAREST. - // Next is Clinger's fast path. - if (pns.mantissa <= binary_format::max_mantissa_fast_path()) { - value = T(pns.mantissa); - if (pns.exponent < 0) { - value = value / binary_format::exact_power_of_ten(-pns.exponent); - } else { - value = value * binary_format::exact_power_of_ten(pns.exponent); - } - if (pns.negative) { - value = -value; - } - return answer; - } - } else { - // We do not have that fegetround() == FE_TONEAREST. - // Next is a modified Clinger's fast path, inspired by Jakub Jelínek's - // proposal - if (pns.exponent >= 0 && - pns.mantissa <= - binary_format::max_mantissa_fast_path(pns.exponent)) { -#if defined(__clang__) || defined(FASTFLOAT_32BIT) - // Clang may map 0 to -0.0 when fegetround() == FE_DOWNWARD - if (pns.mantissa == 0) { - value = pns.negative ? T(-0.) : T(0.); - return answer; - } -#endif - value = T(pns.mantissa) * - binary_format::exact_power_of_ten(pns.exponent); - if (pns.negative) { - value = -value; - } - return answer; - } - } - } - adjusted_mantissa am = - compute_float>(pns.exponent, pns.mantissa); - if (pns.too_many_digits && am.power2 >= 0) { - if (am != compute_float>(pns.exponent, pns.mantissa + 1)) { - am = compute_error>(pns.exponent, pns.mantissa); - } - } - // If we called compute_float>(pns.exponent, pns.mantissa) - // and we have an invalid power (am.power2 < 0), then we need to go the long - // way around again. This is very uncommon. - if (am.power2 < 0) { - am = digit_comp(pns, am); - } - to_float(pns.negative, am, value); - // Test for over/underflow. - if ((pns.mantissa != 0 && am.mantissa == 0 && am.power2 == 0) || - am.power2 == binary_format::infinite_power()) { - answer.ec = std::errc::result_out_of_range; - } - return answer; -} - -template -FASTFLOAT_CONSTEXPR20 from_chars_result_t -from_chars_advanced(UC const *first, UC const *last, T &value, - parse_options_t options) noexcept { - - static_assert(is_supported_float_type(), - "only some floating-point types are supported"); - static_assert(is_supported_char_type(), - "only char, wchar_t, char16_t and char32_t are supported"); - - from_chars_result_t answer; -#ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default - while ((first != last) && fast_float::is_space(uint8_t(*first))) { - first++; - } -#endif - if (first == last) { - answer.ec = std::errc::invalid_argument; - answer.ptr = first; - return answer; - } - parsed_number_string_t pns = - parse_number_string(first, last, options); - if (!pns.valid) { - if (options.format & chars_format::no_infnan) { - answer.ec = std::errc::invalid_argument; - answer.ptr = first; - return answer; - } else { - return detail::parse_infnan(first, last, value); - } - } - - // call overload that takes parsed_number_string_t directly. - return from_chars_advanced(pns, value); -} - -template -FASTFLOAT_CONSTEXPR20 from_chars_result_t -from_chars(UC const *first, UC const *last, T &value, int base) noexcept { - static_assert(is_supported_char_type(), - "only char, wchar_t, char16_t and char32_t are supported"); - - from_chars_result_t answer; -#ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default - while ((first != last) && fast_float::is_space(uint8_t(*first))) { - first++; - } -#endif - if (first == last || base < 2 || base > 36) { - answer.ec = std::errc::invalid_argument; - answer.ptr = first; - return answer; - } - return parse_int_string(first, last, value, base); -} - -} // namespace fast_float - -#endif - diff --git a/deps/fast_float/fast_float_strtod.cpp b/deps/fast_float/fast_float_strtod.cpp deleted file mode 100644 index 7f4235c7e..000000000 --- a/deps/fast_float/fast_float_strtod.cpp +++ /dev/null @@ -1,32 +0,0 @@ -#include "fast_float.h" -#include -#include -#include -#include - -/* Convert NPTR to a double using the fast_float library. - * - * This function behaves similarly to the standard strtod function, converting - * the initial portion of the string pointed to by `nptr` to a `double` value, - * using the fast_float library for high performance. If the conversion fails, - * errno is set to EINVAL error code. - * - * @param nptr A pointer to the null-terminated byte string to be interpreted. - * @param endptr A pointer to a pointer to character. If `endptr` is not NULL, - * it will point to the character after the last character used - * in the conversion. - * @return The converted value as a double. If no valid conversion could - * be performed, returns 0.0. - * If ENDPTR is not NULL, a pointer to the character after the last one used - * in the number is put in *ENDPTR. */ -extern "C" double fast_float_strtod(const char *nptr, char **endptr) { - double result = 0.0; - auto answer = fast_float::from_chars(nptr, nptr + strlen(nptr), result); - if (answer.ec != std::errc()) { - errno = EINVAL; // Fallback to for other errors - } - if (endptr != NULL) { - *endptr = (char *)answer.ptr; - } - return result; -} diff --git a/deps/fast_float/fast_float_strtod.h b/deps/fast_float/fast_float_strtod.h deleted file mode 100644 index 1755076a1..000000000 --- a/deps/fast_float/fast_float_strtod.h +++ /dev/null @@ -1,15 +0,0 @@ - -#ifndef __FAST_FLOAT_STRTOD_H__ -#define __FAST_FLOAT_STRTOD_H__ - -#if defined(__cplusplus) -extern "C" -{ -#endif - double fast_float_strtod(const char *in, char **out); - -#if defined(__cplusplus) -} -#endif - -#endif /* __FAST_FLOAT_STRTOD_H__ */ diff --git a/src/Makefile b/src/Makefile index c202a233d..bb69f5dae 100644 --- a/src/Makefile +++ b/src/Makefile @@ -35,7 +35,7 @@ endif ifneq ($(OPTIMIZATION),-O0) OPTIMIZATION+=-fno-omit-frame-pointer endif -DEPENDENCY_TARGETS=hiredis linenoise lua hdr_histogram fpconv fast_float xxhash +DEPENDENCY_TARGETS=hiredis linenoise lua hdr_histogram fpconv xxhash NODEPS:=clean distclean # Default settings @@ -149,7 +149,7 @@ endif FINAL_CFLAGS=$(STD) $(WARN) $(OPT) $(DEBUG) $(CFLAGS) $(REDIS_CFLAGS) FINAL_LDFLAGS=$(LDFLAGS) $(OPT) $(REDIS_LDFLAGS) $(DEBUG) -FINAL_LIBS=-lm -lstdc++ +FINAL_LIBS=-lm DEBUG=-g -ggdb # Linux ARM32 needs -latomic at linking time @@ -257,7 +257,7 @@ ifdef OPENSSL_PREFIX endif # Include paths to dependencies -FINAL_CFLAGS+= -I../deps/hiredis -I../deps/linenoise -I../deps/lua/src -I../deps/hdr_histogram -I../deps/fpconv -I../deps/fast_float -I../deps/xxhash +FINAL_CFLAGS+= -I../deps/hiredis -I../deps/linenoise -I../deps/lua/src -I../deps/hdr_histogram -I../deps/fpconv -I../deps/xxhash # Determine systemd support and/or build preference (defaulting to auto-detection) BUILD_WITH_SYSTEMD=no @@ -382,7 +382,7 @@ endif REDIS_SERVER_NAME=redis-server$(PROG_SUFFIX) REDIS_SENTINEL_NAME=redis-sentinel$(PROG_SUFFIX) -REDIS_SERVER_OBJ=threads_mngr.o memory_prefetch.o adlist.o quicklist.o ae.o anet.o dict.o ebuckets.o eventnotifier.o iothread.o mstr.o entry.o kvstore.o fwtree.o estore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_asm.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o lolwut8.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o keymeta.o chk.o hotkeys.o gcra.o vector.o +REDIS_SERVER_OBJ=threads_mngr.o memory_prefetch.o adlist.o quicklist.o ae.o anet.o dict.o ebuckets.o eventnotifier.o iothread.o mstr.o entry.o kvstore.o fwtree.o estore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_asm.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o lolwut8.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o keymeta.o chk.o hotkeys.o gcra.o vector.o fast_float_strtod.o REDIS_CLI_NAME=redis-cli$(PROG_SUFFIX) REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o ae.o redisassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o REDIS_BENCHMARK_NAME=redis-benchmark$(PROG_SUFFIX) @@ -442,7 +442,7 @@ endif # redis-server $(REDIS_SERVER_NAME): $(REDIS_SERVER_OBJ) $(REDIS_VEC_SETS_OBJ) - $(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/lua/src/liblua.a ../deps/hdr_histogram/libhdrhistogram.a ../deps/fpconv/libfpconv.a ../deps/fast_float/libfast_float.a ../deps/xxhash/libxxhash.a $(FINAL_LIBS) + $(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/lua/src/liblua.a ../deps/hdr_histogram/libhdrhistogram.a ../deps/fpconv/libfpconv.a ../deps/xxhash/libxxhash.a $(FINAL_LIBS) # redis-sentinel $(REDIS_SENTINEL_NAME): $(REDIS_SERVER_NAME) diff --git a/src/debug.c b/src/debug.c index 29ae88298..6c8e1e4db 100644 --- a/src/debug.c +++ b/src/debug.c @@ -896,7 +896,7 @@ NULL addReplyError(c,"Wrong protocol type name. Please use one of the following: string|integer|double|bignum|null|array|set|map|attrib|push|verbatim|true|false"); } } else if (!strcasecmp(c->argv[1]->ptr,"sleep") && c->argc == 3) { - double dtime = fast_float_strtod(c->argv[2]->ptr,NULL); + double dtime = fast_float_strtod(c->argv[2]->ptr,sdslen(c->argv[2]->ptr),NULL); long long utime = dtime*1000000; struct timespec tv; diff --git a/src/fast_float_strtod.c b/src/fast_float_strtod.c new file mode 100644 index 000000000..48a5df502 --- /dev/null +++ b/src/fast_float_strtod.c @@ -0,0 +1,544 @@ +/* fast_float_strtod.c - Fast string to double conversion + * + * This is a C conversion of a subset of the fast_float C++ library, + * implementing only what Redis needs: parsing decimal floating-point strings. + * + * Original fast_float library: + * https://github.com/fastfloat/fast_float + * by Daniel Lemire and João Paulo Magalhaes + * + * MIT License + * + * Copyright (c) 2021 The fast_float authors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "fast_float_strtod.h" +#include "config.h" +#include "zmalloc.h" + +/* Powers of 10 from 10^0 to 10^22 (exact in double precision). + * These are the only powers of 10 that can be exactly represented as doubles. */ +static const double powers_of_ten[] = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, + 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22 +}; + +/* Maximum mantissa for fast path: 2^53 */ +#define MAX_MANTISSA_FAST_PATH 9007199254740992ULL /* 2^53 */ + +/* Exponent limits for fast path */ +#define MIN_EXPONENT_FAST_PATH -22 +#define MAX_EXPONENT_FAST_PATH 22 + +/* Maximum number of significant digits we track before overflow */ +#define MAX_DIGITS 19 + +/* Case-insensitive match against known lowercase literals using `| 0x20`. + * Only valid when the target characters are ASCII letters (a-z). */ +static inline int strcasecmp_3(const char *s, char c0, char c1, char c2) { + return ((s[0] | 0x20) == c0) & ((s[1] | 0x20) == c1) & ((s[2] | 0x20) == c2); +} + +/* Case-insensitive comparison for first n characters. + * Only valid when the target characters are ASCII letters (a-z). */ +static int strncasecmp_local(const char *s1, const char *s2, size_t n) { + for (size_t i = 0; i < n; i++) { + int diff = (s1[i] | 0x20) - s2[i]; + if (diff) return diff; + } + return 0; +} + +/* Parse inf/nan special values. + * Returns 1 if parsed successfully, 0 otherwise. + * On success, *endptr points past the parsed value. */ +static inline int parse_infnan(const char *p, const char *pend, double *result, const char **endptr) { + int negative = (*p == '-'); + if (*p == '-' || *p == '+') p++; + size_t remaining = pend - p; + + if (remaining >= 3) { + if (strcasecmp_3(p, 'n', 'a', 'n')) { + *result = negative ? -NAN : NAN; + p += 3; + /* Check for optional nan(n-char-seq) */ + if (p < pend && *p == '(') { + const char *start = p; + p++; + while (p < pend) { + char c = *p; + if (c == ')') { + p++; + break; + } + if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || c == '_')) { + /* Invalid character, revert to position after "nan" */ + p = start; + break; + } + p++; + } + /* If we didn't find closing ')', revert */ + if (p[-1] != ')') { + p = start; + } + } + if (endptr) *endptr = (char *)p; + return 1; + } + if (strcasecmp_3(p, 'i', 'n', 'f')) { + *result = negative ? -INFINITY : INFINITY; + p += 3; + /* Check for optional "inity" suffix */ + if (remaining == 8 && strncasecmp_local(p, "inity", 5) == 0) { + p += 5; + } + if (endptr) *endptr = (char *)p; + return 1; + } + } + return 0; +} + +/* SWAR (SIMD Within A Register) helpers for batch digit parsing. */ + +static inline uint64_t read8_to_u64(const char *p) { + uint64_t val; + memcpy(&val, p, sizeof(uint64_t)); +#if BYTE_ORDER == BIG_ENDIAN + /* SWAR digit parsing assumes first char in LSB (little-endian layout). */ +#if defined(__GNUC__) || defined(__clang__) + val = __builtin_bswap64(val); +#else + val = ((val & 0x00000000FFFFFFFFULL) << 32) | ((val & 0xFFFFFFFF00000000ULL) >> 32); + val = ((val & 0x0000FFFF0000FFFFULL) << 16) | ((val & 0xFFFF0000FFFF0000ULL) >> 16); + val = ((val & 0x00FF00FF00FF00FFULL) << 8) | ((val & 0xFF00FF00FF00FF00ULL) >> 8); +#endif +#endif + return val; +} + +static inline int is_made_of_eight_digits(uint64_t val) { + return !((((val + 0x4646464646464646ULL) | (val - 0x3030303030303030ULL)) & + 0x8080808080808080ULL)); +} + +static inline uint32_t parse_eight_digits_swar(uint64_t val) { + uint64_t const mask = 0x000000FF000000FFULL; + uint64_t const mul1 = 0x000F424000000064ULL; /* 100 + (1000000ULL << 32) */ + uint64_t const mul2 = 0x0000271000000001ULL; /* 1 + (10000ULL << 32) */ + val -= 0x3030303030303030ULL; + val = (val * 10) + (val >> 8); + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + return (uint32_t)val; +} + +/* Parse a decimal number string into components. + * This follows the fast_float algorithm closely. */ +static inline int parse_number_string(const char *p, const char *pend, double *result, const char **endptr) { + uint64_t mantissa = 0; /* Mantissa digits as uint64 */ + int64_t exponent = 0; /* Decimal exponent (adjusted for decimal point) */ + int negative = 0; /* Sign flag */ + *endptr = p; + + if (p == pend) return 0; + + /* Parse sign */ + negative = (*p == '-'); + if (*p == '-' || *p == '+') { + p++; + if (p == pend) return 0; + } + + const char *start_digits = p; + + /* Parse integer part */ + mantissa = 0; + while (pend - p >= 8) { + uint64_t val = read8_to_u64(p); + if (!is_made_of_eight_digits(val)) break; + mantissa = mantissa * 100000000 + parse_eight_digits_swar(val); + p += 8; + } + while (p != pend && *p >= '0' && *p <= '9') { + mantissa = mantissa * 10 + (*p - '0'); + p++; + } + + int64_t digit_count = p - start_digits; + + /* Parse decimal point and fractional part */ + exponent = 0; + int has_decimal = (p != pend && *p == '.'); + + if (has_decimal) { + p++; + const char *before = p; + while (pend - p >= 8) { + uint64_t val = read8_to_u64(p); + if (!is_made_of_eight_digits(val)) break; + mantissa = mantissa * 100000000 + parse_eight_digits_swar(val); + p += 8; + } + while (p != pend && *p >= '0' && *p <= '9') { + mantissa = mantissa * 10 + (*p - '0'); + p++; + } + exponent = before - p; /* Negative: number of fractional digits */ + digit_count += (p - before); + } + + /* Must have at least one digit */ + if (digit_count == 0) return 0; + + /* Parse exponent */ + int64_t exp_number = 0; + if (p != pend && (*p == 'e' || *p == 'E')) { + const char *exp_start = p; + p++; + + int neg_exp = 0; + if (p != pend && *p == '-') { + neg_exp = 1; + p++; + } else if (p != pend && *p == '+') { + p++; + } + + if (p == pend || *p < '0' || *p > '9') { + /* No digits after e/E, revert to position before 'e' */ + p = exp_start; + } else { + while (p != pend && *p >= '0' && *p <= '9') { + if (exp_number < 0x10000000) { + exp_number = exp_number * 10 + (*p - '0'); + } + p++; + } + if (neg_exp) exp_number = -exp_number; + exponent += exp_number; + } + } + + *endptr = p; + + /* Handle overflow in mantissa: if we have too many digits, + * we need to reparse more carefully */ + if (digit_count > MAX_DIGITS) { + /* Skip leading zeros to get actual digit count */ + const char *s = start_digits; + while (s != pend && (*s == '0' || *s == '.')) { + if (*s == '0') digit_count--; + s++; + } + + if (digit_count > MAX_DIGITS) return 0; + } + + /* Check if we're within fast path bounds */ + if (exponent < MIN_EXPONENT_FAST_PATH) return 0; + if (exponent > MAX_EXPONENT_FAST_PATH) return 0; + if (mantissa > MAX_MANTISSA_FAST_PATH) return 0; + + /* Fast path: direct conversion */ + double value = (double)mantissa; + + if (exponent < 0) { + value = value / powers_of_ten[-exponent]; + } else if (exponent > 0) { + value = value * powers_of_ten[exponent]; + } + + if (negative) { + value = -value; + } + + *result = value; + return 1; +} + +/* Main conversion function. + * + * This function behaves similarly to the standard strtod function, converting + * the initial portion of the string pointed to by `nptr` to a `double` value. + * If the conversion fails, errno is set to EINVAL error code. + * + * @param nptr A pointer to the null-terminated byte string to be interpreted. + * @param endptr A pointer to a pointer to character. If `endptr` is not NULL, + * it will point to the character after the last character used + * in the conversion. + * @return The converted value as a double. If no valid conversion could + * be performed, returns 0.0. + */ +static inline int fast_float_try_fast(const char *nptr, const char *pend, double *result, const char **endptr) { + if (nptr == pend) { + errno = EINVAL; + if (endptr) *endptr = (char *)nptr; + return 0; + } + + /* Parse the number string */ + if (parse_number_string(nptr, pend, result, endptr)) { + return 1; + } + + /* Not a valid decimal number, try inf/nan special values */ + if (parse_infnan(nptr, pend, result, endptr)) { + return 1; + } + + return 0; +} + +static double fast_float_strtod_fallback(const char *nptr, size_t len, char **endptr) { + /* Since the input may not be null-terminated, we must copy it into a temporary buffer. */ + char static_buf[128]; + char *buf = static_buf; + if (len >= sizeof(static_buf)) + buf = zmalloc(len + 1); + memcpy(buf, nptr, len); + buf[len] = '\0'; + + char *fallback_end; + double result = strtod(buf, &fallback_end); + if (endptr) *endptr = (char *)nptr + (fallback_end - buf); + + /* If strtod failed to parse, set errno */ + if (fallback_end == buf) { + errno = EINVAL; + } + + if (buf != static_buf) zfree(buf); + return result; +} + +/* Convert string to double, with explicit length (string need NOT be null-terminated). + * Falls back to strtod by copying to a temporary null-terminated buffer. */ +double fast_float_strtod(const char *nptr, size_t len, char **endptr) { + double result = 0.0; + const char *pend = nptr + len; + const char *eptr; + + /* Use fast path for non-null-terminated strings */ + if (likely(fast_float_try_fast(nptr, pend, &result, &eptr) && eptr == pend)) { + if (endptr) *endptr = (char *)eptr; +#if UINTPTR_MAX == 0xffffffff + /* On 32-bit x86 with x87 FPU, the fast-path fdiv/fmul result lives in + * an 80-bit extended-precision register. With optimisation the compiler + * may return that value in st(0) without ever storing it to a 64-bit + * memory slot, so the caller would receive an 80-bit value that differs + * from the correctly-rounded 64-bit double. Writing through a volatile + * forces a real fstpl (store + pop to 64-bit memory) followed by fldl + * (reload into st(0) from that 64-bit slot), ensuring the return value + * is truncated to double precision before it reaches the caller. */ + volatile double ret = result; + return ret; +#else + return result; +#endif + } + + /* Fall back to strtod for complex cases: + * - Very large or very small exponents + * - Too many digits (need precise rounding) + * This ensures we get correctly-rounded results for edge cases. */ + return fast_float_strtod_fallback(nptr, len, endptr); +} + +#ifdef REDIS_TEST +#include +#include "testhelp.h" + +#define UNUSED(x) (void)(x) +#define COUNTOF(arr) (int)(sizeof(arr) / sizeof((arr)[0])) + +typedef struct { + const char *input; + double expected; +} ff_testcase; + +static int ff_eq(double a, double b) { + if (isnan(a)) return isnan(b); + if (isinf(a)) return isinf(b) && (a > 0) == (b > 0); + return a == b; +} + +static void run_ff_tests(ff_testcase *cases, int n, int expect_failed) { + for (int i = 0; i < n; i++) { + const char *s = cases[i].input; + size_t len = strlen(s); + char *eptr; + + errno = 0; + double d = fast_float_strtod(s, len, &eptr); + int failed = ((size_t)(eptr - s) != len) || errno == EINVAL || + (errno == ERANGE && (d == HUGE_VAL || d == -HUGE_VAL || fpclassify(d) == FP_ZERO)); + int ok = (expect_failed == failed) && ff_eq(d, cases[i].expected); + char descr[128]; + if (ok) + snprintf(descr, sizeof(descr), "\"%s\" -> expect %s(%.20g)", + s, expect_failed ? "fail" : "ok", cases[i].expected); + else + snprintf(descr, sizeof(descr), "\"%s\" -> expect %s(%.20g) but got %s(%.20g)", + s, expect_failed ? "fail" : "ok", cases[i].expected, failed ? "fail" : "ok", d); + test_cond(descr, ok); + } +} + +int fastFloatTest(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + /* Finite decimals: fast path, exponent ±22 edges, mantissa 2^53, strtod fallback. */ + ff_testcase decimal_ok[] = { + {"0", 0.0}, + {"+0", 0.0}, + {"-0", -0.0}, + {"42", 42.0}, + {"+42", 42.0}, + {"-42", -42.0}, + {"00007", 7.0}, + {"00.25", 0.25}, + {"3.14", 3.14}, + {".5", 0.5}, + {"+.5", 0.5}, + {"1.", 1.0}, + {"0.", 0.0}, + {".0", 0.0}, + {"-1.5e2", -150.0}, + {"1e5", 1e5}, + {"1E5", 1e5}, + {"2E3", 2000.0}, + {"3e+5", 3e5}, + {"1e-10", 1e-10}, + {"1e-22", 1e-22}, + {"1e+22", 1e22}, + {"1e-23", 1e-23}, + {"1e+100", 1e100}, + {"1e-100", 1e-100}, + {"9007199254740992", 9007199254740992.0}, + {"9007199254740993", 9007199254740992.0}, + {"12345678901234567890", 1.2345678901234567e19}, + {"2.2250738585072012e-308", 2.2250738585072012e-308}, /* Near DBL_MIN boundary */ + {"0x10", 16.0}, + }; + run_ff_tests(decimal_ok, COUNTOF(decimal_ok), 0); + + /* No valid prefix for full buffer, or trailing junk. */ + ff_testcase decimal_bad[] = { + {"1abc", 1.0}, + {"1e", 1.0}, + {"1e+", 1.0}, + {"1e-", 1.0}, + {"1e+z", 1.0}, + {"12.34.56", 12.34}, + {"..1", 0.0}, + {"e10", 0.0}, + {"E10", 0.0}, + {"+", 0.0}, + {"-", 0.0}, + {"foo", 0.0}, + {"1 ", 1.0}, + {"3.14!", 3.14}, + }; + run_ff_tests(decimal_bad, COUNTOF(decimal_bad), 1); + + ff_testcase inf_valid[] = { + {"inf", INFINITY}, + {"INF", INFINITY}, + {"Inf", INFINITY}, + {"infinity", INFINITY}, + {"INFINITY", INFINITY}, + {"Infinity", INFINITY}, + {"+inf", INFINITY}, + {"-inf", -INFINITY}, + {"+infinity", INFINITY}, + {"-INFINITY", -INFINITY}, + }; + run_ff_tests(inf_valid, COUNTOF(inf_valid), 0); + + ff_testcase inf_invalid[] = { + {"in", 0}, + {"infin", INFINITY}, + {"infini1", INFINITY}, + {"infinitx", INFINITY}, + {"infinityy", INFINITY}, + {"info", INFINITY}, + {"ina", 0}, + {"INFI", INFINITY}, + {"iNf0", INFINITY}, + }; + run_ff_tests(inf_invalid, COUNTOF(inf_invalid), 1); + + ff_testcase nan_valid[] = { + {"nan", NAN}, + {"NAN", NAN}, + {"Nan", NAN}, + {"nan(123)", NAN}, + {"nan(abc)", NAN}, + {"nan(123abc)", NAN}, + }; + run_ff_tests(nan_valid, COUNTOF(nan_valid), 0); + + ff_testcase nan_invalid[] = { + {"na", 0}, + {"nan(", NAN}, /* unclosed paren */ + {"nan(abc", NAN}, /* missing closing paren */ + {"nan(ab!c)", NAN}, /* invalid char in paren */ + {"nan(ab c)", NAN}, /* space in paren */ + {"nanx", NAN}, /* trailing garbage */ + }; + run_ff_tests(nan_invalid, COUNTOF(nan_invalid), 1); + + /* Large input that exceeds static_buf (128 bytes), exercising the zmalloc fallback path. */ + { + /* Build a string "000...00042.0" with total length > 128. */ + char big[256]; + memset(big, '0', sizeof(big)); + big[sizeof(big) - 4] = '2'; + big[sizeof(big) - 3] = '.'; + big[sizeof(big) - 2] = '0'; + big[sizeof(big) - 1] = '\0'; + char *eptr; + double d = fast_float_strtod(big, strlen(big), &eptr); + test_cond("large input (>128 bytes) zmalloc fallback path", + (size_t)(eptr - big) == strlen(big) && ff_eq(d, 2.0)); + + /* Large input that is completely invalid. */ + memset(big, 'x', sizeof(big) - 1); + big[sizeof(big) - 1] = '\0'; + d = fast_float_strtod(big, strlen(big), &eptr); + test_cond("invalid large input (>128 bytes) zmalloc fallback path", + eptr == big && ff_eq(d, 0.0)); + } + + return 0; +} +#endif diff --git a/src/fast_float_strtod.h b/src/fast_float_strtod.h new file mode 100644 index 000000000..91ab9cfbf --- /dev/null +++ b/src/fast_float_strtod.h @@ -0,0 +1,13 @@ + +#ifndef __FAST_FLOAT_STRTOD_H__ +#define __FAST_FLOAT_STRTOD_H__ + +#include + +double fast_float_strtod(const char *nptr, size_t len, char **endptr); + +#ifdef REDIS_TEST +int fastFloatTest(int argc, char **argv, int flags); +#endif + +#endif /* __FAST_FLOAT_STRTOD_H__ */ diff --git a/src/resp_parser.c b/src/resp_parser.c index 8c0f17d39..fd1b5acc1 100644 --- a/src/resp_parser.c +++ b/src/resp_parser.c @@ -128,13 +128,10 @@ static int parseDouble(ReplyParser *parser, void *p_ctx) { const char *proto = parser->curr_location; char *p = strchr(proto+1,'\r'); parser->curr_location = p + 2; /* for \r\n */ - char buf[MAX_LONG_DOUBLE_CHARS+1]; size_t len = p-proto-1; double d; if (len <= MAX_LONG_DOUBLE_CHARS) { - memcpy(buf,proto+1,len); - buf[len] = '\0'; - d = fast_float_strtod(buf,NULL); /* We expect a valid representation. */ + d = fast_float_strtod(proto+1,len,NULL); /* We expect a valid representation. */ } else { d = 0; } diff --git a/src/server.c b/src/server.c index 4b2d0191c..ef7f79f6e 100644 --- a/src/server.c +++ b/src/server.c @@ -32,6 +32,7 @@ #include "fwtree.h" #include "estore.h" #include "chk.h" +#include "fast_float_strtod.h" #include #include @@ -7826,6 +7827,7 @@ struct redisTest { {"rax", raxTest}, {"zset", zsetTest}, {"topk", chkTopKTest}, + {"fastfloat", fastFloatTest}, }; redisTestProc *getTestProcByName(const char *name) { int numtests = sizeof(redisTests)/sizeof(struct redisTest); diff --git a/src/sort.c b/src/sort.c index c6b32624e..0d8dcdd9b 100644 --- a/src/sort.c +++ b/src/sort.c @@ -518,12 +518,7 @@ void sortCommandGeneric(client *c, int readonly) { if (sortby) vector[j].u.cmpobj = getDecodedObject(byval); } else { if (sdsEncodedObject(byval)) { - char *eptr; - - vector[j].u.score = fast_float_strtod(byval->ptr,&eptr); - if (eptr[0] != '\0' || errno == ERANGE || - isnan(vector[j].u.score)) - { + if (string2d(byval->ptr,sdslen(byval->ptr),&vector[j].u.score) == 0) { int_conversion_error = 1; } } else if (byval->encoding == OBJ_ENCODING_INT) { diff --git a/src/t_zset.c b/src/t_zset.c index b4cd47c23..ff61afdd3 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -721,24 +721,26 @@ static int zslParseRange(robj *min, robj *max, zrangespec *spec) { if (min->encoding == OBJ_ENCODING_INT) { spec->min = (long)min->ptr; } else { + size_t len = sdslen(min->ptr); if (((char*)min->ptr)[0] == '(') { - spec->min = fast_float_strtod((char*)min->ptr+1,&eptr); + spec->min = fast_float_strtod((char*)min->ptr+1,len-1,&eptr); if (eptr[0] != '\0' || isnan(spec->min)) return C_ERR; spec->minex = 1; } else { - spec->min = fast_float_strtod((char*)min->ptr,&eptr); + spec->min = fast_float_strtod((char*)min->ptr,len,&eptr); if (eptr[0] != '\0' || isnan(spec->min)) return C_ERR; } } if (max->encoding == OBJ_ENCODING_INT) { spec->max = (long)max->ptr; } else { + size_t len = sdslen(max->ptr); if (((char*)max->ptr)[0] == '(') { - spec->max = fast_float_strtod((char*)max->ptr+1,&eptr); + spec->max = fast_float_strtod((char*)max->ptr+1,len-1,&eptr); if (eptr[0] != '\0' || isnan(spec->max)) return C_ERR; spec->maxex = 1; } else { - spec->max = fast_float_strtod((char*)max->ptr,&eptr); + spec->max = fast_float_strtod((char*)max->ptr,len,&eptr); if (eptr[0] != '\0' || isnan(spec->max)) return C_ERR; } } @@ -945,13 +947,8 @@ zskiplistNode *zslNthInLexRange(zskiplist *zsl, zlexrangespec *range, long n, un *----------------------------------------------------------------------------*/ static double zzlStrtod(unsigned char *vstr, unsigned int vlen) { - char buf[128]; - if (vlen > sizeof(buf) - 1) - vlen = sizeof(buf) - 1; - memcpy(buf,vstr,vlen); - buf[vlen] = '\0'; - return fast_float_strtod(buf,NULL); - } + return fast_float_strtod((char*)vstr, vlen, NULL); +} double zzlGetScore(unsigned char *sptr) { unsigned char *vstr; diff --git a/src/util.c b/src/util.c index ba3d9d072..becf1486a 100644 --- a/src/util.c +++ b/src/util.c @@ -664,13 +664,10 @@ int string2d(const char *s, size_t slen, double *dp) { if (unlikely(slen == 0 || isspace(((const char*)s)[0]))) return 0; - *dp = fast_float_strtod(s, &eptr); - /* If `fast_float_strtod` didn't consume full input, try `strtod` - * Given fast_float does not support hexadecimal strings representation */ + *dp = fast_float_strtod(s, slen, &eptr); + /* Reject if not all characters were consumed by the parser. */ if (unlikely((size_t)(eptr - (char*)s) != slen)) { - char *fallback_eptr; - *dp = strtod(s, &fallback_eptr); - if ((size_t)(fallback_eptr - (char*)s) != slen) return 0; + return 0; } if (unlikely(errno == EINVAL || (errno == ERANGE && diff --git a/tests/unit/sort.tcl b/tests/unit/sort.tcl index 35ec1606e..0dee25b29 100644 --- a/tests/unit/sort.tcl +++ b/tests/unit/sort.tcl @@ -204,6 +204,14 @@ foreach command {SORT SORT_RO} { assert_equal [lsort -real $floats] [r sort mylist] } + test "SORT BY with smallest normal double 2.2250738585072012e-308" { + r flushdb + r lpush mylist a b + r set weight_a 2.2250738585072012e-308 + r set weight_b 1 + assert_equal {a b} [r sort mylist BY weight_*] + } {} {cluster:skip} + test "SORT with STORE returns zero if result is empty (github issue 224)" { r flushdb r sort foo{t} store bar{t} From 2f1a8b2bad47007a4df73e8f643405ab62a9620b Mon Sep 17 00:00:00 2001 From: Yuan Wang Date: Wed, 15 Apr 2026 20:34:36 +0800 Subject: [PATCH 16/32] Dismiss dict bucket arrays in fork child to reduce CoW (#14979) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During RDB saving and AOF rewriting, the fork child already dismisses (madvise(MADV_DONTNEED)) individual key-value objects after serializing them. However, the hash table bucket arrays of each dict were never dismissed, leaving large contiguous allocations subject to CoW when the parent modifies them. This PR extends the dismiss mechanism to cover dict bucket arrays, reducing CoW memory overhead. - **Expires kvstore** — dismissed upfront before saving starts, since the child never accesses expires directly, after embeding expire time in the key object. - **Slot dicts** (cluster mode) — dismissed per-slot as the iterator moves to the next slot during RDB saving or AOF rewriting. - **DB keys kvstore** (standalone mode) — dismissed per-DB after each DB is fully serialized during RDB saving or AOF rewriting. --- src/aof.c | 26 +++++++++---- src/object.c | 10 ++--- src/rdb.c | 11 +++++- src/server.c | 22 +++++++++++ src/server.h | 2 + tests/integration/dismiss-mem.tcl | 61 +++++++++++++++++++++++++++---- 6 files changed, 110 insertions(+), 22 deletions(-) diff --git a/src/aof.c b/src/aof.c index a094d11ca..fe8336061 100644 --- a/src/aof.c +++ b/src/aof.c @@ -2570,10 +2570,20 @@ int rewriteAppendOnlyFileRio(rio *aof) { if (rioWriteBulkLongLong(aof,j) == 0) goto werr; kvstoreIteratorInit(&kvs_it, db->keys); + int last_slot = -1; /* Iterate this DB writing every entry */ while((de = kvstoreIteratorNext(&kvs_it)) != NULL) { long long expiretime; size_t aof_bytes_before_key = aof->processed_bytes; + int curr_slot = kvstoreIteratorGetCurrentDictIndex(&kvs_it); + + /* In cluster mode, dismiss bucket arrays of the previous slot + * which won't be accessed again, to avoid CoW. */ + if (server.cluster_enabled && curr_slot != last_slot) { + if (server.in_fork_child && last_slot != -1) + dismissDictBucketsMemory(kvstoreGetDict(db->keys, last_slot)); + last_slot = curr_slot; + } /* Get the value object (of type kvobj) */ kvobj *o = dictGetKV(de); @@ -2582,12 +2592,9 @@ int rewriteAppendOnlyFileRio(rio *aof) { expiretime = kvobjGetExpire(o); /* Skip keys that are being trimmed */ - if (server.cluster_enabled) { - int curr_slot = kvstoreIteratorGetCurrentDictIndex(&kvs_it); - if (isSlotInTrimJob(curr_slot)) { - skipped++; - continue; - } + if (server.cluster_enabled && isSlotInTrimJob(curr_slot)) { + skipped++; + continue; } /* Set on stack string object for key */ @@ -2600,7 +2607,8 @@ int rewriteAppendOnlyFileRio(rio *aof) { * OS and possibly avoid or decrease COW. We give the dismiss * mechanism a hint about an estimated size of the object we stored. */ size_t dump_size = aof->processed_bytes - aof_bytes_before_key; - if (server.in_fork_child) dismissObject(o, dump_size); + if (server.in_fork_child && dump_size > server.page_size/2) + dismissObject(o, dump_size); /* Update info every 1 second (approximately). * in order to avoid calling mstime() on each iteration, we will @@ -2618,6 +2626,10 @@ int rewriteAppendOnlyFileRio(rio *aof) { debugDelay(server.rdb_key_save_delay); } kvstoreIteratorReset(&kvs_it); + + /* Dismiss bucket arrays of kvstore in standalone mode. */ + if (server.in_fork_child && !server.cluster_enabled) + dismissKvstoreBucketsMemory(db->keys); } serverLog(LL_NOTICE, "AOF rewrite done, %ld keys saved, %llu keys skipped.", key_count, skipped); return C_OK; diff --git a/src/object.c b/src/object.c index 9a9e6c257..1fa922679 100644 --- a/src/object.c +++ b/src/object.c @@ -691,8 +691,7 @@ void dismissSetObject(robj *o, size_t size_hint) { } /* Dismiss hash table memory. */ - dismissMemory(set->ht_table[0], DICTHT_SIZE(set->ht_size_exp[0])*sizeof(dictEntry*)); - dismissMemory(set->ht_table[1], DICTHT_SIZE(set->ht_size_exp[1])*sizeof(dictEntry*)); + dismissDictBucketsMemory(set); } else if (o->encoding == OBJ_ENCODING_INTSET) { dismissMemory(o->ptr, intsetBlobLen((intset*)o->ptr)); } else if (o->encoding == OBJ_ENCODING_LISTPACK) { @@ -720,9 +719,7 @@ void dismissZsetObject(robj *o, size_t size_hint) { } /* Dismiss hash table memory. */ - dict *d = zs->dict; - dismissMemory(d->ht_table[0], DICTHT_SIZE(d->ht_size_exp[0])*sizeof(dictEntry*)); - dismissMemory(d->ht_table[1], DICTHT_SIZE(d->ht_size_exp[1])*sizeof(dictEntry*)); + dismissDictBucketsMemory(zs->dict); } else if (o->encoding == OBJ_ENCODING_LISTPACK) { dismissMemory(o->ptr, lpBytes((unsigned char*)o->ptr)); } else { @@ -748,8 +745,7 @@ void dismissHashObject(robj *o, size_t size_hint) { } /* Dismiss hash table memory. */ - dismissMemory(d->ht_table[0], DICTHT_SIZE(d->ht_size_exp[0])*sizeof(dictEntry*)); - dismissMemory(d->ht_table[1], DICTHT_SIZE(d->ht_size_exp[1])*sizeof(dictEntry*)); + dismissDictBucketsMemory(d); } else if (o->encoding == OBJ_ENCODING_LISTPACK) { dismissMemory(o->ptr, lpBytes((unsigned char*)o->ptr)); } else if (o->encoding == OBJ_ENCODING_LISTPACK_EX) { diff --git a/src/rdb.c b/src/rdb.c index 61ca7f7cf..52dd686d2 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -1680,6 +1680,10 @@ ssize_t rdbSaveDb(rio *rdb, int dbid, int rdbflags, long *key_counter, unsigned written += res; if ((res = rdbSaveLen(rdb, kvstoreDictSize(db->expires, curr_slot))) < 0) goto werr2; written += res; + /* Dismiss bucket arrays of the previous slot to reduce CoW. + * The final slot is not dismissed since the child exits shortly after. */ + if (server.in_fork_child && last_slot != -1) + dismissDictBucketsMemory(kvstoreGetDict(db->keys, last_slot)); last_slot = curr_slot; } kvobj *kv = dictGetKV(de); @@ -1707,7 +1711,8 @@ ssize_t rdbSaveDb(rio *rdb, int dbid, int rdbflags, long *key_counter, unsigned * OS and possibly avoid or decrease COW. We give the dismiss * mechanism a hint about an estimated size of the object we stored. */ size_t dump_size = rdb->processed_bytes - rdb_bytes_before_key; - if (server.in_fork_child) dismissObject(kv, dump_size); + if (server.in_fork_child && dump_size > server.page_size/2) + dismissObject(kv, dump_size); /* Update child info every 1 second (approximately). * in order to avoid calling mstime() on each iteration, we will @@ -1758,6 +1763,10 @@ int rdbSaveRio(int req, rio *rdb, int *error, int rdbflags, rdbSaveInfo *rsi) { if (!(req & SLAVE_REQ_RDB_EXCLUDE_DATA)) { for (j = 0; j < server.dbnum; j++) { if (rdbSaveDb(rdb, j, rdbflags, &key_counter, &skipped) == -1) goto werr; + /* In standalone mode, dismiss bucket arrays of the saved DB's + * kvstore to reduce CoW. In cluster mode this is done per-slot. */ + if (server.in_fork_child && !server.cluster_enabled) + dismissKvstoreBucketsMemory(server.db[j].keys); } } diff --git a/src/server.c b/src/server.c index ef7f79f6e..a284f5784 100644 --- a/src/server.c +++ b/src/server.c @@ -7497,6 +7497,20 @@ void dismissClientMemory(client *c) { } } +/* Dismiss the hash table bucket arrays of a dict. */ +void dismissDictBucketsMemory(dict *d) { + if (!d) return; + dismissMemory(d->ht_table[0], DICTHT_SIZE(d->ht_size_exp[0]) * sizeof(dictEntry*)); + dismissMemory(d->ht_table[1], DICTHT_SIZE(d->ht_size_exp[1]) * sizeof(dictEntry*)); +} + +/* Dismiss the hash table bucket arrays for all dicts in the given kvstore. */ +void dismissKvstoreBucketsMemory(kvstore *kvs) { + for (int didx = 0; didx < kvstoreNumDicts(kvs); didx++) { + dismissDictBucketsMemory(kvstoreGetDict(kvs, didx)); + } +} + /* In the child process, we don't need some buffers anymore, and these are * likely to change in the parent when there's heavy write traffic. * We dismiss them right away, to avoid CoW. @@ -7535,6 +7549,14 @@ void dismissMemoryInChild(void) { client *c = listNodeValue(ln); dismissClientMemory(c); } + + /* Dismiss expires kvstore bucket arrays since the child process never + * accesses them, expire times are embedded in key objects. */ + if (server.in_fork_child == CHILD_TYPE_RDB || server.in_fork_child == CHILD_TYPE_AOF) { + for (int dbid = 0; dbid < server.dbnum; dbid++) { + dismissKvstoreBucketsMemory(server.db[dbid].expires); + } + } #endif } diff --git a/src/server.h b/src/server.h index 506191327..8a0ba3cbd 100644 --- a/src/server.h +++ b/src/server.h @@ -3682,6 +3682,8 @@ void activeDefragFreeRaw(void *ptr); robj *activeDefragStringOb(robj* ob); void dismissSds(sds s); void dismissMemory(void* ptr, size_t size_hint); +void dismissDictBucketsMemory(dict *d); +void dismissKvstoreBucketsMemory(kvstore *kvs); void dismissMemoryInChild(void); int clientsCronRunClient(client *c); diff --git a/tests/integration/dismiss-mem.tcl b/tests/integration/dismiss-mem.tcl index 6e790665a..2b0fbb3e4 100644 --- a/tests/integration/dismiss-mem.tcl +++ b/tests/integration/dismiss-mem.tcl @@ -4,7 +4,7 @@ # Actually, we may not have many asserts in the test, since we just check for # crashes and the dump file inconsistencies. -start_server {tags {"dismiss external:skip"}} { +start_server {tags {"dismiss external:skip needs:debug"}} { # In other tests, although we test child process dumping RDB file, but # memory allocations of key/values are usually small, they couldn't cover # the "dismiss" object methods, in this test, we create big size key/values @@ -47,12 +47,15 @@ start_server {tags {"dismiss external:skip"}} { r xadd bigstream * entry1 $bigstr entry2 $bigstr set digest [debug_digest] - r config set aof-use-rdb-preamble no - r bgrewriteaof - waitForBgrewriteaof r - r debug loadaof - set newdigest [debug_digest] - assert {$digest eq $newdigest} + # Test both RDB (yes) and AOF (no) rewrite paths. + foreach preamble {yes no} { + r config set aof-use-rdb-preamble $preamble + r bgrewriteaof + waitForBgrewriteaof r + r debug loadaof + set newdigest [debug_digest] + assert {$digest eq $newdigest} + } } test {dismiss client output buffer} { @@ -99,4 +102,48 @@ start_server {tags {"dismiss external:skip"}} { waitForBgsave $master } } + + test {dismiss multi-db kvstore bucket memory in standalone mode} { + r flushall + regexp {db=(\d+)} [r client info] -> curdb + # Populate multiple DBs to verify each DB's bucket arrays can be dismissed. + foreach db {0 1 2 3} { + r select $db + populate 2000 "db${db}key:" 3 0 false 3600 + } + set digest [debug_digest] + + # Test both RDB (yes) and AOF (no) rewrite paths. + foreach preamble {yes no} { + r config set aof-use-rdb-preamble $preamble + r bgrewriteaof + waitForBgrewriteaof r + r debug loadaof + set newdigest [debug_digest] + assert {$digest eq $newdigest} + } + r select $curdb + } +} + +start_cluster 1 0 {tags {dismiss external:skip cluster needs:debug}} { + test {dismiss slot dict bucket memory in cluster mode} { + # Concentrate keys into a few slots using hash tags so each slot's + # bucket array is large enough to be dismissed. + # {06S} -> slot 0, {Qi} -> slot 1, {5L5} -> slot 2 + foreach tag {{06S} {Qi} {5L5}} { + populate 2000 "${tag}key:" 3 0 false 3600 + } + set digest [r debug digest] + + # Test both RDB (yes) and AOF (no) rewrite paths. + foreach preamble {yes no} { + r config set aof-use-rdb-preamble $preamble + r bgrewriteaof + waitForBgrewriteaof r + r debug loadaof + set newdigest [r debug digest] + assert {$digest eq $newdigest} + } + } } From b89bc044a3e58f1a8973df9f766254e048bca248 Mon Sep 17 00:00:00 2001 From: Ozan Tezcan Date: Wed, 15 Apr 2026 17:08:36 +0300 Subject: [PATCH 17/32] Reduce overhead in command propagation (#15003) Refactor command propagation code to reduce overhead on master Currently, the main bottleneck is `feedReplicationBuffer()`. It is called for each argument in the command and has bookkeeping overhead on every call (e.g. checking whether to attach replicas to the replication backlog). It is also not inlined by the compiler. These costs become more visible with pipelining and commands with many arguments (e.g. HSET with many fields). Changes: - Defer all bookkeeping to be done once per command instead of once per command argument. - Refactor the hot path so the compiler can inline `replBufWriterAppend()`. - Add `replBufWritterAppendBulkLen()` that uses shared RESP headers for small values, avoiding formatting overhead. These changes should not introduce any behavioral change. **TODO:** In a follow-up PR, explore forwarding the exact command from the client querybuf to avoid re-serialization. Many commands are propagated without modification and can benefit from this. -- | Benchmark | Before (ops/s) | After (ops/s) | Improvement | |---|---|---|---| | SET | 256,048 | 265,131 | **+3%** | | SET (pipeline) | 1,477,310 | 1,671,272 | **+13%** | | HSET 10 fields | 145,000 | 158,000 | **+9%** | | HSET 10 fields (pipeline) | 363,483 | 430,855 | **+18%** | | HSET 10 fields, 15B values (pipeline) | 387,443 | 487,135 | **+26%** | | ZADD 5 members | 180,700 | 193,519 | **+7%** | | ZADD 5 members (pipeline) | 466,453 | 564,872 | **+21%** | ------ Co-authored-by: Yuan Wang --- src/replication.c | 303 ++++++++++++++++++++++++++-------------------- src/server.h | 1 - 2 files changed, 173 insertions(+), 131 deletions(-) diff --git a/src/replication.c b/src/replication.c index f27ab8b7d..2ad39ab6f 100644 --- a/src/replication.c +++ b/src/replication.c @@ -376,23 +376,6 @@ int prepareReplicasToWrite(void) { return prepared; } -/* Wrapper for feedReplicationBuffer() that takes Redis string objects - * as input. */ -void feedReplicationBufferWithObject(robj *o) { - char llstr[LONG_STR_SIZE]; - void *p; - size_t len; - - if (o->encoding == OBJ_ENCODING_INT) { - len = ll2string(llstr,sizeof(llstr),(long)o->ptr); - p = llstr; - } else { - len = sdslen(o->ptr); - p = o->ptr; - } - feedReplicationBuffer(p,len); -} - /* Generally, we only have one replication buffer block to trim when replication * backlog size exceeds our setting and no replica reference it. But if replica * clients disconnect, we need to free many replication buffer blocks that are @@ -468,115 +451,175 @@ void freeReplicaReferencedReplBuffer(client *replica) { replica->ref_block_pos = 0; } -/* Append bytes into the global replication buffer list, replication backlog and - * all replica clients use replication buffers collectively, this function replace - * 'addReply*', 'feedReplicationBacklog' for replicas and replication backlog, - * First we add buffer into global replication buffer block list, and then - * update replica / replication-backlog referenced node and block position. */ -void feedReplicationBuffer(char *s, size_t len) { +/* Batched write API for the global replication backlog, optimized for minimal + * overhead per append: data writes are just memcpys into the tail block. + * All bookkeeping is deferred to replBufWriterEnd(). */ +typedef struct replBufWriter { + listNode *start_node; /* First repl buffer block written to. */ + size_t start_pos; /* Byte offset within start_node where writing began. */ + size_t total_len; /* Total bytes written across all writes. */ + int new_blocks; /* Number of new blocks allocated during this stream. */ + replBufBlock *tail; /* Current tail block. */ +} replBufWriter; + +/* Initialize the writer, cache the current tail position. */ +static void replBufWriterBegin(replBufWriter *wr) { + listNode *ln = listLast(server.repl_buffer_blocks); + replBufBlock *tail = ln ? listNodeValue(ln) : NULL; + + if (tail && tail->used < tail->size) { + wr->start_node = ln; + wr->start_pos = tail->used; + } else { + wr->start_node = NULL; + wr->start_pos = 0; + } + + wr->total_len = 0; + wr->new_blocks = 0; + wr->tail = tail; +} + +/* Allocate a new replication backlog block. Called when current block is full. */ +static void replBufWriterAllocBlock(replBufWriter *wr, size_t hint) { static long long repl_block_id = 0; + size_t usable_size; + /* Avoid creating nodes smaller than PROTO_REPLY_CHUNK_BYTES, so that we can append more data into them, + * and also avoid creating nodes bigger than repl_backlog_size / 16, so that we won't have huge nodes that can't + * trim when we only still need to hold a small portion from them. */ + size_t limit = max((size_t)server.repl_backlog_size / 16, (size_t)PROTO_REPLY_CHUNK_BYTES); + size_t bsize = min(max(hint, (size_t)PROTO_REPLY_CHUNK_BYTES), limit); + replBufBlock *tail = zmalloc_usable(bsize + sizeof(replBufBlock), &usable_size); + /* Take over the allocation's internal fragmentation */ + tail->size = usable_size - sizeof(replBufBlock); + tail->used = 0; + tail->refcount = 0; + tail->repl_offset = server.master_repl_offset + wr->total_len + 1; + tail->id = repl_block_id++; + listAddNodeTail(server.repl_buffer_blocks, tail); + server.repl_buffer_mem += (usable_size + sizeof(listNode)); + createReplicationBacklogIndex(listLast(server.repl_buffer_blocks)); - if (server.repl_backlog == NULL) return; + /* Update stream state. */ + wr->tail = tail; + wr->new_blocks++; + if (wr->start_node == NULL) { + wr->start_node = listLast(server.repl_buffer_blocks); + wr->start_pos = 0; + } +} - clusterSlotStatsIncrNetworkBytesOutForReplication(len); +/* Slow path: fill remainder of current block + allocate as needed. */ +static void replBufWriterAppendSlow(replBufWriter *wr, const char *buf, size_t len) { + while (len > 0) { + size_t avail = wr->tail ? wr->tail->size - wr->tail->used : 0; + if (avail > 0) { + size_t copy = (avail >= len) ? len : avail; + memcpy(wr->tail->buf + wr->tail->used, buf, copy); + wr->tail->used += copy; + wr->total_len += copy; + buf += copy; + len -= copy; + } + + if (len > 0) + replBufWriterAllocBlock(wr, len); + } +} + +/* Write data into the replication buffer. The slow path is split out to give + * the compiler a chance to inline the common case where the write fits entirely + * in the current block. */ +static inline void replBufWriterAppend(replBufWriter *wr, const char *buf, size_t len) { + size_t avail = wr->tail ? wr->tail->size - wr->tail->used : 0; + if (len > 0 && avail >= len) { + memcpy(wr->tail->buf + wr->tail->used, buf, len); + wr->tail->used += len; + wr->total_len += len; + return; + } + replBufWriterAppendSlow(wr, buf, len); +} + +/* Write a RESP header prefix\r\n (e.g. "$12\r\n" or "*3\r\n"). + * Uses pre-built shared objects for small values, formats manually otherwise. */ +static inline void replBufWriterAppendBulkLen(replBufWriter *wr, char prefix, long long value) { + serverAssert(prefix == '$' || prefix == '*'); + if (value >= 0 && value < OBJ_SHARED_BULKHDR_LEN) { + robj **tbl = (prefix == '$') ? shared.bulkhdr : shared.mbulkhdr; + replBufWriterAppend(wr, tbl[value]->ptr, OBJ_SHARED_HDR_STRLEN(value)); + return; + } + char buf[LONG_STR_SIZE+3]; + buf[0] = prefix; + int len = ll2string(buf+1, sizeof(buf)-1, value); + buf[len+1] = '\r'; + buf[len+2] = '\n'; + replBufWriterAppend(wr, buf, len+3); +} + + +/* Finalize the replication buffer write: update global offsets, set up replica + * references for new data, check output buffer limits, and trim the + * backlog if new blocks were allocated. */ +static void replBufWriterEnd(replBufWriter *wr) { + if (wr->total_len == 0) return; + + serverAssert(wr->start_node != NULL); + clusterSlotStatsIncrNetworkBytesOutForReplication(wr->total_len); /* Update the current cmd's keys with the commands replication bytes*/ - hotkeyMetrics metrics = {0, len}; + hotkeyMetrics metrics = {0, wr->total_len}; hotkeyStatsUpdateCurrentCmd(server.hotkeys, metrics); - while(len > 0) { - size_t start_pos = 0; /* The position of referenced block to start sending. */ - listNode *start_node = NULL; /* Replica/backlog starts referenced node. */ - int add_new_block = 0; /* Create new block if current block is total used. */ - listNode *ln = listLast(server.repl_buffer_blocks); - replBufBlock *tail = ln ? listNodeValue(ln) : NULL; + server.master_repl_offset += wr->total_len; + server.repl_backlog->histlen += wr->total_len; - /* Append to tail string when possible. */ - if (tail && tail->size > tail->used) { - start_node = listLast(server.repl_buffer_blocks); - start_pos = tail->used; - /* Copy the part we can fit into the tail, and leave the rest for a - * new node */ - size_t avail = tail->size - tail->used; - size_t copy = (avail >= len) ? len : avail; - memcpy(tail->buf + tail->used, s, copy); - tail->used += copy; - s += copy; - len -= copy; - server.master_repl_offset += copy; - server.repl_backlog->histlen += copy; - } - if (len) { - /* Create a new node, make sure it is allocated to at - * least PROTO_REPLY_CHUNK_BYTES */ - size_t usable_size; - /* Avoid creating nodes smaller than PROTO_REPLY_CHUNK_BYTES, so that we can append more data into them, - * and also avoid creating nodes bigger than repl_backlog_size / 16, so that we won't have huge nodes that can't - * trim when we only still need to hold a small portion from them. */ - size_t limit = max((size_t)server.repl_backlog_size / 16, (size_t)PROTO_REPLY_CHUNK_BYTES); - size_t size = min(max(len, (size_t)PROTO_REPLY_CHUNK_BYTES), limit); - tail = zmalloc_usable(size + sizeof(replBufBlock), &usable_size); - /* Take over the allocation's internal fragmentation */ - tail->size = usable_size - sizeof(replBufBlock); - size_t copy = (tail->size >= len) ? len : tail->size; - tail->used = copy; - tail->refcount = 0; - tail->repl_offset = server.master_repl_offset + 1; - tail->id = repl_block_id++; - memcpy(tail->buf, s, copy); - listAddNodeTail(server.repl_buffer_blocks, tail); - /* We also count the list node memory into replication buffer memory. */ - server.repl_buffer_mem += (usable_size + sizeof(listNode)); - add_new_block = 1; - if (start_node == NULL) { - start_node = listLast(server.repl_buffer_blocks); - start_pos = 0; - } - s += copy; - len -= copy; - server.master_repl_offset += copy; - server.repl_backlog->histlen += copy; - } + /* For output buffer of replicas. */ + listIter li; + listNode *ln; + listRewind(server.slaves,&li); + while((ln = listNext(&li))) { + client *slave = ln->value; + if (!canFeedReplicaReplBuffer(slave)) continue; - /* For output buffer of replicas. */ - listIter li; - listRewind(server.slaves,&li); - while((ln = listNext(&li))) { - client *slave = ln->value; - if (!canFeedReplicaReplBuffer(slave)) continue; - - /* Update shared replication buffer start position. */ - if (slave->ref_repl_buf_node == NULL) { - slave->ref_repl_buf_node = start_node; - slave->ref_block_pos = start_pos; - /* Only increase the start block reference count. */ - ((replBufBlock *)listNodeValue(start_node))->refcount++; - } - - /* Check output buffer limit only when add new block. */ - if (add_new_block) closeClientOnOutputBufferLimitReached(slave, 1); - } - - /* For replication backlog */ - if (server.repl_backlog->ref_repl_buf_node == NULL) { - server.repl_backlog->ref_repl_buf_node = start_node; + /* Update shared replication buffer start position. */ + if (slave->ref_repl_buf_node == NULL) { + slave->ref_repl_buf_node = wr->start_node; + slave->ref_block_pos = wr->start_pos; /* Only increase the start block reference count. */ - ((replBufBlock *)listNodeValue(start_node))->refcount++; - - /* Replication buffer must be empty before adding replication stream - * into replication backlog. */ - serverAssert(add_new_block == 1 && start_pos == 0); + ((replBufBlock *)listNodeValue(wr->start_node))->refcount++; } - if (add_new_block) { - createReplicationBacklogIndex(listLast(server.repl_buffer_blocks)); - /* It is important to trim after adding replication data to keep the backlog size close to - * repl_backlog_size in the common case. We wait until we add a new block to avoid repeated - * unnecessary trimming attempts when small amounts of data are added. See comments in - * freeMemoryGetNotCountedMemory() for details on replication backlog memory tracking. */ - incrementalTrimReplicationBacklog(REPL_BACKLOG_TRIM_BLOCKS_PER_CALL); - } + /* Check output buffer limit only when new blocks were added. */ + if (wr->new_blocks) closeClientOnOutputBufferLimitReached(slave, 1); } + + /* For replication backlog */ + if (server.repl_backlog->ref_repl_buf_node == NULL) { + server.repl_backlog->ref_repl_buf_node = wr->start_node; + /* Only increase the start block reference count. */ + ((replBufBlock *)listNodeValue(wr->start_node))->refcount++; + + /* Replication buffer must be empty before adding replication stream + * into replication backlog. */ + serverAssert(wr->new_blocks > 0 && wr->start_pos == 0); + } + if (wr->new_blocks) { + /* It is important to trim after adding replication data to keep the backlog size close to + * repl_backlog_size in the common case. We wait until we add a new block to avoid repeated + * unnecessary trimming attempts when small amounts of data are added. See comments in + * freeMemoryGetNotCountedMemory() for details on replication backlog memory tracking. */ + incrementalTrimReplicationBacklog(REPL_BACKLOG_TRIM_BLOCKS_PER_CALL); + } +} + +/* Append bytes into the global replication buffer. */ +static void feedReplicationBuffer(const char *buf, size_t len) { + replBufWriter wr; + replBufWriterBegin(&wr); + replBufWriterAppend(&wr, buf, len); + replBufWriterEnd(&wr); } /* Propagate write commands to replication stream. @@ -642,7 +685,7 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) { dictid_len, llstr)); } - feedReplicationBufferWithObject(selectcmd); + feedReplicationBuffer(selectcmd->ptr, sdslen(selectcmd->ptr)); /* Although the SELECT command is not associated with any slot, * its per-slot network-bytes-out accumulation is made by the above function call. @@ -657,28 +700,28 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) { /* Write the command to the replication buffer if any. */ char aux[LONG_STR_SIZE+3]; + replBufWriter wr; + replBufWriterBegin(&wr); - /* Add the multi bulk reply length. */ - aux[0] = '*'; - len = ll2string(aux+1,sizeof(aux)-1,argc); - aux[len+1] = '\r'; - aux[len+2] = '\n'; - feedReplicationBuffer(aux,len+3); + /* Write the multi bulk count */ + replBufWriterAppendBulkLen(&wr, '*', argc); for (j = 0; j < argc; j++) { + /* Write the bulk count */ long objlen = stringObjectLen(argv[j]); + replBufWriterAppendBulkLen(&wr, '$', objlen); - /* We need to feed the buffer with the object as a bulk reply - * not just as a plain string, so create the $..CRLF payload len - * and add the final CRLF */ - aux[0] = '$'; - len = ll2string(aux+1,sizeof(aux)-1,objlen); - aux[len+1] = '\r'; - aux[len+2] = '\n'; - feedReplicationBuffer(aux,len+3); - feedReplicationBufferWithObject(argv[j]); - feedReplicationBuffer(aux+len+1,2); + /* Write the bulk data */ + if (argv[j]->encoding == OBJ_ENCODING_INT) { + len = ll2string(aux, sizeof(aux), (long)argv[j]->ptr); + replBufWriterAppend(&wr, aux, len); + } else { + replBufWriterAppend(&wr, argv[j]->ptr, objlen); + } + replBufWriterAppend(&wr, "\r\n", 2); } + + replBufWriterEnd(&wr); } /* This is a debugging function that gets called when we detect something diff --git a/src/server.h b/src/server.h index 8a0ba3cbd..091f7c9ac 100644 --- a/src/server.h +++ b/src/server.h @@ -3360,7 +3360,6 @@ ssize_t syncReadLine(int fd, char *ptr, ssize_t size, long long timeout); void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc); void replicationFeedStreamFromMasterStream(char *buf, size_t buflen); void resetReplicationBuffer(void); -void feedReplicationBuffer(char *buf, size_t len); void freeReplicaReferencedReplBuffer(client *replica); void replicationFeedMonitors(client *c, list *monitors, int dictid, robj **argv, int argc); void updateSlavesWaitingBgsave(int bgsaveerr, int type); From 3bcfbbe92a60b28c85a163dd920f6777c974bdc4 Mon Sep 17 00:00:00 2001 From: Mincho Paskalev Date: Wed, 15 Apr 2026 17:46:22 +0300 Subject: [PATCH 18/32] Add new OBJ_GCRA type (#14905) [PR ](https://github.com/redis/redis/pull/14826) introduced a new rate limiting command which stores its internal implementation-detail data into a string key. Since this will prevent a client from detecting type errors or accidental overwrites or value invalidations, f.e via SET or INCR this PR introduces a new data type - OBJ_GCRA specifically created for that new command. Furthermore, a new RATE_LIMIT KSN type was introduced for emitting "gcra" events on such keys. GCRASETTAT was renamed to GCRASETVALUE. --------- Co-authored-by: debing.sun --- redis.conf | 3 +- src/acl.c | 1 + src/aof.c | 14 ++++ src/commands.def | 88 ++++++++++++++-------- src/commands/gcra.json | 4 +- src/commands/gcrasetvalue.json | 52 +++++++++++++ src/config.c | 2 +- src/db.c | 4 +- src/debug.c | 14 ++++ src/defrag.c | 7 ++ src/gcra.c | 77 ++++++++++++++----- src/module.c | 6 +- src/notify.c | 2 + src/object.c | 73 +++++++++++++++++- src/object.h | 6 +- src/rdb.c | 14 ++++ src/rdb.h | 3 +- src/redis-check-rdb.c | 1 + src/redismodule.h | 4 +- src/server.h | 19 ++++- tests/unit/gcra.tcl | 132 ++++++++++++++++++++++++++++----- utils/generate-command-code.py | 4 +- 22 files changed, 450 insertions(+), 80 deletions(-) create mode 100644 src/commands/gcrasetvalue.json diff --git a/redis.conf b/redis.conf index 30a3d8b57..845be292f 100644 --- a/redis.conf +++ b/redis.conf @@ -2039,8 +2039,9 @@ latency-monitor-threshold 0 # (Note: not included in the 'A' class) # c Type-changed events generated every time a key's type changes # (Note: not included in the 'A' class) +# r rate limit event # A Alias for g$lshzxetd, so that the "AKE" string means all the events -# except key-miss, new key, overwritten and type-changed. +# except key-miss, new key, overwritten, type-changed and rate-limit. # # The "notify-keyspace-events" takes as argument a string that is composed # of zero or multiple characters. The empty string means that notifications diff --git a/src/acl.c b/src/acl.c index e7c61aacf..79a900200 100644 --- a/src/acl.c +++ b/src/acl.c @@ -70,6 +70,7 @@ struct ACLCategoryItem { {"connection", ACL_CATEGORY_CONNECTION}, {"transaction", ACL_CATEGORY_TRANSACTION}, {"scripting", ACL_CATEGORY_SCRIPTING}, + {"ratelimit", ACL_CATEGORY_RATE_LIMIT}, {NULL,0} /* Terminator. */ }; diff --git a/src/aof.c b/src/aof.c index fe8336061..a2bf945f2 100644 --- a/src/aof.c +++ b/src/aof.c @@ -2467,6 +2467,18 @@ int rewriteStreamObject(rio *r, robj *key, robj *o) { return 1; } +int rewriteGCRAObject(rio *r, robj *key, robj *o) { + long long val; + getLongLongFromGCRAObject(o, &val); + + /* GCRASETVALUE */ + if (rioWriteBulkCount(r,'*',3) == 0) return 0; + if (rioWriteBulkString(r,"GCRASETVALUE",12) == 0) return 0; + if (rioWriteBulkObject(r,key) == 0) return 0; + if (rioWriteBulkLongLong(r,val) == 0) return 0; + return 1; +} + /* Call the module type callback in order to rewrite a data type * that is exported by a module and is not handled by Redis itself. * The function returns 0 on error, 1 on success. */ @@ -2522,6 +2534,8 @@ int rewriteObject(rio *r, robj *key, robj *o, int dbid, long long expiretime) { if (rewriteHashObject(r,key,o) == 0) return C_ERR; } else if (o->type == OBJ_STREAM) { if (rewriteStreamObject(r,key,o) == 0) return C_ERR; + } else if (o->type == OBJ_GCRA) { + if (rewriteGCRAObject(r,key,o) == 0) return C_ERR; } else if (o->type == OBJ_MODULE) { if (rewriteModuleObject(r,key,o,dbid) == 0) return C_ERR; } else { diff --git a/src/commands.def b/src/commands.def index 9f1f88b0d..fed08eda3 100644 --- a/src/commands.def +++ b/src/commands.def @@ -24,7 +24,8 @@ const char *COMMAND_GROUP_STR[] = { "geo", "stream", "bitmap", - "module" + "module", + "rate_limit" }; const char *commandGroupStr(int index) { @@ -5379,6 +5380,59 @@ struct COMMAND_ARG UNSUBSCRIBE_Args[] = { {MAKE_ARG("channel",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)}, }; +/********** GCRA ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* GCRA history */ +#define GCRA_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* GCRA tips */ +#define GCRA_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* GCRA key specs */ +keySpec GCRA_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* GCRA argument table */ +struct COMMAND_ARG GCRA_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("max-burst",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("tokens-per-period",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("period",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"TOKENS",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)}, +}; + +/********** GCRASETVALUE ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* GCRASETVALUE history */ +#define GCRASETVALUE_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* GCRASETVALUE tips */ +#define GCRASETVALUE_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* GCRASETVALUE key specs */ +keySpec GCRASETVALUE_Keyspecs[1] = { +{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* GCRASETVALUE argument table */ +struct COMMAND_ARG GCRASETVALUE_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("tat",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + /********** EVAL ********************/ #ifndef SKIP_CMD_HISTORY_TABLE @@ -11094,34 +11148,6 @@ struct COMMAND_ARG DIGEST_Args[] = { {MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, }; -/********** GCRA ********************/ - -#ifndef SKIP_CMD_HISTORY_TABLE -/* GCRA history */ -#define GCRA_History NULL -#endif - -#ifndef SKIP_CMD_TIPS_TABLE -/* GCRA tips */ -#define GCRA_Tips NULL -#endif - -#ifndef SKIP_CMD_KEY_SPECS_TABLE -/* GCRA key specs */ -keySpec GCRA_Keyspecs[1] = { -{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} -}; -#endif - -/* GCRA argument table */ -struct COMMAND_ARG GCRA_Args[] = { -{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, -{MAKE_ARG("max-burst",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, -{MAKE_ARG("tokens-per-period",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, -{MAKE_ARG("period",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, -{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"TOKENS",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)}, -}; - /********** GET ********************/ #ifndef SKIP_CMD_HISTORY_TABLE @@ -11929,6 +11955,9 @@ struct COMMAND_STRUCT redisCommandTable[] = { {MAKE_CMD("subscribe","Listens for messages published to channels.","O(N) where N is the number of channels to subscribe to.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SUBSCRIBE_History,0,SUBSCRIBE_Tips,0,subscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,SUBSCRIBE_Keyspecs,0,NULL,1),.args=SUBSCRIBE_Args}, {MAKE_CMD("sunsubscribe","Stops listening to messages posted to shard channels.","O(N) where N is the number of shard channels to unsubscribe.","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SUNSUBSCRIBE_History,0,SUNSUBSCRIBE_Tips,0,sunsubscribeCommand,-1,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,SUNSUBSCRIBE_Keyspecs,1,NULL,1),.args=SUNSUBSCRIBE_Args}, {MAKE_CMD("unsubscribe","Stops listening to messages posted to channels.","O(N) where N is the number of channels to unsubscribe.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,UNSUBSCRIBE_History,0,UNSUBSCRIBE_Tips,0,unsubscribeCommand,-1,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,UNSUBSCRIBE_Keyspecs,0,NULL,1),.args=UNSUBSCRIBE_Args}, +/* rate_limit */ +{MAKE_CMD("gcra","Rate limit via GCRA (Generic Cell Rate Algorithm).","O(1)","8.8.0",CMD_DOC_NONE,NULL,NULL,"rate_limit",COMMAND_GROUP_RATE_LIMIT,GCRA_History,0,GCRA_Tips,0,gcraCommand,-5,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_RATE_LIMIT,GCRA_Keyspecs,1,NULL,5),.args=GCRA_Args}, +{MAKE_CMD("gcrasetvalue","An internal command for recording a GCRA TAT value during AOF rewrite and replication.","O(1)","8.8.0",CMD_DOC_NONE,NULL,NULL,"rate_limit",COMMAND_GROUP_RATE_LIMIT,GCRASETVALUE_History,0,GCRASETVALUE_Tips,0,gcraSetValueCommand,3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_RATE_LIMIT,GCRASETVALUE_Keyspecs,1,NULL,2),.args=GCRASETVALUE_Args}, /* scripting */ {MAKE_CMD("eval","Executes a server-side Lua script.","Depends on the script that is executed.","2.6.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,EVAL_History,0,EVAL_Tips,0,evalCommand,-3,CMD_NOSCRIPT|CMD_SKIP_MONITOR|CMD_MAY_REPLICATE|CMD_NO_MANDATORY_KEYS|CMD_STALE,ACL_CATEGORY_SCRIPTING,EVAL_Keyspecs,1,evalGetKeys,4),.args=EVAL_Args}, {MAKE_CMD("evalsha","Executes a server-side Lua script by SHA1 digest.","Depends on the script that is executed.","2.6.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,EVALSHA_History,0,EVALSHA_Tips,0,evalShaCommand,-3,CMD_NOSCRIPT|CMD_SKIP_MONITOR|CMD_MAY_REPLICATE|CMD_NO_MANDATORY_KEYS|CMD_STALE,ACL_CATEGORY_SCRIPTING,EVALSHA_Keyspecs,1,evalGetKeys,4),.args=EVALSHA_Args}, @@ -12053,7 +12082,6 @@ struct COMMAND_STRUCT redisCommandTable[] = { {MAKE_CMD("decrby","Decrements a number from the integer value of a key. Uses 0 as initial value if the key doesn't exist.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,DECRBY_History,0,DECRBY_Tips,0,decrbyCommand,3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_STRING,DECRBY_Keyspecs,1,NULL,2),.args=DECRBY_Args}, {MAKE_CMD("delex","Conditionally removes the specified key based on value or digest comparison.","O(1) for IFEQ/IFNE, O(N) for IFDEQ/IFDNE where N is the length of the string value.","8.4.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,DELEX_History,0,DELEX_Tips,0,delexCommand,-2,CMD_WRITE|CMD_FAST,ACL_CATEGORY_STRING,DELEX_Keyspecs,1,delexGetKeys,2),.args=DELEX_Args}, {MAKE_CMD("digest","Returns the XXH3 hash of a string value.","O(N) where N is the length of the string value.","8.4.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,DIGEST_History,0,DIGEST_Tips,0,digestCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_STRING,DIGEST_Keyspecs,1,NULL,1),.args=DIGEST_Args}, -{MAKE_CMD("gcra","Rate limit via GCRA (Generic Cell Rate Algorithm).","O(1)","8.8.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,GCRA_History,0,GCRA_Tips,0,gcraCommand,-5,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_STRING,GCRA_Keyspecs,1,NULL,5),.args=GCRA_Args}, {MAKE_CMD("get","Returns the string value of a key.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,GET_History,0,GET_Tips,0,getCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_STRING,GET_Keyspecs,1,NULL,1),.args=GET_Args}, {MAKE_CMD("getdel","Returns the string value of a key after deleting the key.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,GETDEL_History,0,GETDEL_Tips,0,getdelCommand,2,CMD_WRITE|CMD_FAST,ACL_CATEGORY_STRING,GETDEL_Keyspecs,1,NULL,1),.args=GETDEL_Args}, {MAKE_CMD("getex","Returns the string value of a key after setting its expiration time.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,GETEX_History,0,GETEX_Tips,0,getexCommand,-2,CMD_WRITE|CMD_FAST,ACL_CATEGORY_STRING,GETEX_Keyspecs,1,NULL,2),.args=GETEX_Args}, diff --git a/src/commands/gcra.json b/src/commands/gcra.json index cc0e029c2..6980af1ac 100644 --- a/src/commands/gcra.json +++ b/src/commands/gcra.json @@ -2,7 +2,7 @@ "GCRA": { "summary": "Rate limit via GCRA (Generic Cell Rate Algorithm).", "complexity": "O(1)", - "group": "string", + "group": "rate_limit", "since": "8.8.0", "arity": -5, "function": "gcraCommand", @@ -12,7 +12,7 @@ "FAST" ], "acl_categories": [ - "STRING" + "RATE_LIMIT" ], "key_specs": [ { diff --git a/src/commands/gcrasetvalue.json b/src/commands/gcrasetvalue.json new file mode 100644 index 000000000..5cce15cf4 --- /dev/null +++ b/src/commands/gcrasetvalue.json @@ -0,0 +1,52 @@ +{ + "GCRASETVALUE": { + "summary": "An internal command for recording a GCRA TAT value during AOF rewrite and replication.", + "complexity": "O(1)", + "group": "rate_limit", + "since": "8.8.0", + "arity": 3, + "function": "gcraSetValueCommand", + "command_flags": [ + "WRITE", + "DENYOOM", + "FAST" + ], + "acl_categories": [ + "RATE_LIMIT" + ], + "key_specs": [ + { + "flags": [ + "OW", + "UPDATE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "const": "OK" + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "tat", + "type": "integer" + } + ] + } +} diff --git a/src/config.c b/src/config.c index e02cd64e5..0ad28ef5b 100644 --- a/src/config.c +++ b/src/config.c @@ -2917,7 +2917,7 @@ static int setConfigNotifyKeyspaceEventsOption(standardConfig *config, sds *argv } int flags = keyspaceEventsStringToFlags(argv[0]); if (flags == -1) { - *err = "Invalid event class character. Use 'Ag$lshzxeKEtmdn'."; + *err = "Invalid event class character. Use 'Ag$lshzxeKEtmdnocr'."; return 0; } server.notify_keyspace_events = flags; diff --git a/src/db.c b/src/db.c index 18e9c47a9..32c058dab 100644 --- a/src/db.c +++ b/src/db.c @@ -1756,7 +1756,8 @@ char *obj_type_name[OBJ_TYPE_MAX] = { "zset", "hash", NULL, /* module type is special */ - "stream" + "stream", + "gcra" }; /* Helper function to get type from a string in scan commands */ @@ -2438,6 +2439,7 @@ void copyCommand(client *c) { case OBJ_ZSET: newobj = zsetDup(o); break; case OBJ_HASH: newobj = hashTypeDup(o, &minHashExpire); break; case OBJ_STREAM: newobj = streamDup(o); break; + case OBJ_GCRA: newobj = gcraDup(o); break; case OBJ_MODULE: newobj = moduleTypeDupOrReply(c, key, newkey, dst->id, o); if (!newobj) return; diff --git a/src/debug.c b/src/debug.c index 6c8e1e4db..c6baf4b4d 100644 --- a/src/debug.c +++ b/src/debug.c @@ -123,6 +123,14 @@ void mixStringObjectDigest(unsigned char *digest, robj *o) { decrRefCount(o); } +void mixGCRAObjectDigest(unsigned char *digest, robj *o) { + char buf[LONG_STR_SIZE]; + long long val; + getLongLongFromGCRAObject(o, &val); + int len = ll2string(buf, sizeof(buf), val); + mixDigest(digest,buf,len); +} + /* This function computes the digest of a data structure stored in the * object 'o'. It is the core of the DEBUG DIGEST command: when taking the * digest of a whole dataset, we take the digest of the key and the value @@ -255,6 +263,8 @@ void xorObjectDigest(redisDb *db, robj *keyobj, unsigned char *digest, robj *o) } } streamIteratorStop(&si); + } else if (o->type == OBJ_GCRA) { + mixGCRAObjectDigest(digest, o); } else if (o->type == OBJ_MODULE) { RedisModuleDigest md = {{0},{0},keyobj,db->id}; moduleValue *mv = o->ptr; @@ -1302,6 +1312,10 @@ void serverLogObjectDebugInfo(const robj *o) { serverLog(LL_WARNING,"Skiplist level: %d", (int) ((const zset*)o->ptr)->zsl->level); } else if (o->type == OBJ_STREAM) { serverLog(LL_WARNING,"Stream size: %d", (int) streamLength(o)); + } else if (o->type == OBJ_GCRA) { +#if UINTPTR_MAX == 0xffffffffffffffff + serverLog(LL_WARNING, "GCRA object: %lld", (long long)o->ptr); +#endif } #endif } diff --git a/src/defrag.c b/src/defrag.c index f3ca5acba..93a7389d9 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -1163,6 +1163,13 @@ void defragKey(defragKeysCtx *ctx, dictEntry *de, dictEntryLink link) { } } else if (ob->type == OBJ_STREAM) { defragStream(ctx, ob); + } else if (ob->type == OBJ_GCRA) { + /* GCRA object is just an allocation to a long long value */ +#if UINTPTR_MAX == 0xffffffff + void *newptr, *ptr = ob->ptr; + if ((newptr = activeDefragAlloc(ptr))) + ob->ptr = newptr; +#endif } else if (ob->type == OBJ_MODULE) { defragModule(ctx,db, ob); } else { diff --git a/src/gcra.c b/src/gcra.c index 9a2c23df5..488fad5ce 100644 --- a/src/gcra.c +++ b/src/gcra.c @@ -129,23 +129,11 @@ void gcraCommand(client *c) { long long tat_us, new_tat_us; dictEntryLink link; kvobj *kv = lookupKeyWriteWithLink(c->db, key, &link); - if (checkType(c, kv, OBJ_STRING)) { + if (checkType(c, kv, OBJ_GCRA)) { return; } if (kv != NULL) { - /* Note the value of the key may have been overwritten outside of the - * GCRA command (f.e by calling SET). We don't try to catch such errors - * as this would be possible only with a dedicated structures for GCRA, - * while using STRING gives us all the benefits of a redis key - - * replication, setting expiration, etc. */ - if (getLongLongFromObject(kv, &tat_us) != C_OK) { - addReplyError(c, "Invalid GCRA key"); - return; - } - if (tat_us <= 0) { - addReplyError(c, "Negative time is invalid value for GCRA"); - return; - } + getLongLongFromGCRAObject(kv, &tat_us); } else { tat_us = now; } @@ -208,10 +196,18 @@ void gcraCommand(client *c) { } else { limited = 0; ttl_us = new_tat_us - now; - robj *tatobj = createStringObjectFromLongLong(new_tat_us); + robj *tatobj = createGCRAObject(new_tat_us); setKeyByLink(c, c->db, key, &tatobj, kv ? SETKEY_ALREADY_EXIST : SETKEY_DOESNT_EXIST, &link); - notifyKeyspaceEvent(NOTIFY_STRING,"set",key,c->db->id); + notifyKeyspaceEvent(NOTIFY_RATE_LIMIT,"gcra",key,c->db->id); + /* The key implicitly sets its own expiry time (which is basically the + * TaT after which time the value is no longer of any use). That way even + * if only one GCRA command is called on a key it will automatically + * expire after reaching its TaT without user needing to explicitly call + * DEL on it. + * These keys are expected to be numerous and short lived thus the + * decision to keep the implicit expiraty. + * NOTE: idea is same as in redis-cell. */ long long when = new_tat_us / 1000; kv = setExpireByLink(c, c->db, key->ptr, when, link); notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",key,c->db->id); @@ -219,9 +215,11 @@ void gcraCommand(client *c) { /* Replicating the command directly would mess up TaT as we use * commandTimeSnapshot. We instead rewrite the command as SET with the * appropriate expire time. */ - robj *pexat_obj = createStringObjectFromLongLong(when); - rewriteClientCommandVector(c, 5, shared.set, key, kv, shared.pxat, pexat_obj); - decrRefCount(pexat_obj); + robj *gcrasetvalue = createStringObject("GCRASETVALUE", 12); + robj *newtatstr = createStringObjectFromLongLong(new_tat_us); + rewriteClientCommandVector(c, 3, gcrasetvalue, key, newtatstr); + decrRefCount(gcrasetvalue); + decrRefCount(newtatstr); server.dirty++; } @@ -239,3 +237,44 @@ void gcraCommand(client *c) { addReplyLongLong(c, retry_after_s); addReplyLongLong(c, reset_after_s); } + +/* GCRASETVALUE key tat + * + * Internal command used during AOF rewrite to record a GCRA TAT value. The GCRA + * command is also rewritten as GCRASETVALUE for replication since GCRA uses + * commandTimeSnapshot. */ +void gcraSetValueCommand(client *c) { + robj *key = c->argv[1]; + robj *tat = c->argv[2]; + long long when; + + dictEntryLink link; + kvobj *kv = lookupKeyWriteWithLink(c->db, key, &link); + if (checkType(c, kv, OBJ_GCRA)) return; + + if (getLongLongFromObjectOrReply(c, tat, &when, "Invalid TaT value") == C_ERR) { + return; + } + if (when < 0) { + addReplyError(c, "Invalid negative TaT value"); + return; + } + + robj *tatobj = createGCRAObject(when); + setKeyByLink(c, c->db, key, &tatobj, kv ? SETKEY_ALREADY_EXIST : SETKEY_DOESNT_EXIST, &link); + notifyKeyspaceEvent(NOTIFY_RATE_LIMIT,"gcra",key,c->db->id); + + /* Just like the base GCRA command we set the expire time of the key implicitly. */ + long long when_ms = when / 1000; + kv = setExpireByLink(c, c->db, key->ptr, when_ms, link); + notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",key,c->db->id); + server.dirty++; + + addReply(c, shared.ok); +} + +robj *gcraDup(robj *o) { + long long val; + getLongLongFromGCRAObject(o, &val); + return createGCRAObject(val); +} diff --git a/src/module.c b/src/module.c index fc28b37a1..d4a857a2e 100644 --- a/src/module.c +++ b/src/module.c @@ -4225,6 +4225,7 @@ int RM_KeyType(RedisModuleKey *key) { case OBJ_HASH: return REDISMODULE_KEYTYPE_HASH; case OBJ_MODULE: return REDISMODULE_KEYTYPE_MODULE; case OBJ_STREAM: return REDISMODULE_KEYTYPE_STREAM; + case OBJ_GCRA: return REDISMODULE_KEYTYPE_GCRA; default: return REDISMODULE_KEYTYPE_EMPTY; } } @@ -9202,10 +9203,11 @@ void moduleReleaseGIL(void) { * - REDISMODULE_NOTIFY_OVERWRITTEN: Overwritten events * - REDISMODULE_NOTIFY_TYPE_CHANGED: Type-changed events * - REDISMODULE_NOTIFY_KEY_TRIMMED: Key trimmed events after a slot migration operation + * - REDISMODULE_NOTIFY_RATE_LIMIT: Rate limit event * - REDISMODULE_NOTIFY_ALL: All events (Excluding REDISMODULE_NOTIFY_KEYMISS, * REDISMODULE_NOTIFY_NEW, REDISMODULE_NOTIFY_OVERWRITTEN, - * REDISMODULE_NOTIFY_TYPE_CHANGED - * and REDISMODULE_NOTIFY_KEY_TRIMMED) + * REDISMODULE_NOTIFY_TYPE_CHANGED, REDISMODULE_NOTIFY_KEY_TRIMMED + * and REDISMODULE_NOTIFY_RATE_LIMIT) * - REDISMODULE_NOTIFY_LOADED: A special notification available only for modules, * indicates that the key was loaded from persistence. * Notice, when this event fires, the given key diff --git a/src/notify.c b/src/notify.c index 00dd4a090..11ea53241 100644 --- a/src/notify.c +++ b/src/notify.c @@ -40,6 +40,7 @@ int keyspaceEventsStringToFlags(char *classes) { case 'n': flags |= NOTIFY_NEW; break; case 'o': flags |= NOTIFY_OVERWRITTEN; break; case 'c': flags |= NOTIFY_TYPE_CHANGED; break; + case 'r': flags |= NOTIFY_RATE_LIMIT; break; default: return -1; } } @@ -70,6 +71,7 @@ sds keyspaceEventsFlagsToString(int flags) { if (flags & NOTIFY_NEW) res = sdscatlen(res,"n",1); if (flags & NOTIFY_OVERWRITTEN) res = sdscatlen(res,"o",1); if (flags & NOTIFY_TYPE_CHANGED) res = sdscatlen(res,"c",1); + if (flags & NOTIFY_RATE_LIMIT) res = sdscatlen(res,"r",1); } if (flags & NOTIFY_KEYSPACE) res = sdscatlen(res,"K",1); if (flags & NOTIFY_KEYEVENT) res = sdscatlen(res,"E",1); diff --git a/src/object.c b/src/object.c index 1fa922679..cfcfa4844 100644 --- a/src/object.c +++ b/src/object.c @@ -514,6 +514,23 @@ robj *createStreamObject(void) { return o; } +robj *createGCRAObject(long long value) { + /* NOTE: for 32-bit systems we can't use integer encoding (as OBJ_STRING does) + * as the GCRA object is a unixtime value in microseconds, which as of the + * time of writing is already much more than 32-bit's LONG_MAX. */ +#if UINTPTR_MAX == 0xffffffff + long long *v = zmalloc(sizeof(long long)); + *v = value; + robj *o = createObject(OBJ_GCRA,v); +#else + robj *o = createObject(OBJ_GCRA,NULL); + o->ptr = (void*)value; +#endif + + o->encoding = OBJ_ENCODING_INT; + return o; +} + robj *createModuleObject(moduleType *mt, void *value) { moduleValue *mv = zmalloc(sizeof(*mv)); mv->type = mt; @@ -586,6 +603,14 @@ void freeStreamObject(robj *o) { freeStream(o->ptr); } +void freeGCRAObject(robj *o) { +#if UINTPTR_MAX == 0xffffffff + zfree(o->ptr); +#else + (void)o; +#endif +} + void incrRefCount(robj *o) { if (o->refcount < OBJ_FIRST_SPECIAL_REFCOUNT - 1) { o->refcount++; @@ -629,6 +654,7 @@ void decrRefCount(robj *o) { case OBJ_HASH: freeHashObject(o); break; case OBJ_MODULE: freeModuleObject(o); break; case OBJ_STREAM: freeStreamObject(o); break; + case OBJ_GCRA: freeGCRAObject(o); break; default: serverPanic("Unknown object type"); break; } } @@ -776,6 +802,13 @@ void dismissStreamObject(robj *o, size_t size_hint) { } } +void dismissGCRAObject(robj *o, size_t size_hint) { + /* GCRA is a single allocation of a long long thus way smaller than a + * page-size. The dismiss mechanism is not needed for it - hence NOOP.*/ + (void)o; + (void)size_hint; +} + /* When creating a snapshot in a fork child process, the main process and child * process share the same physical memory pages, and if / when the parent * modifies any keys due to write traffic, it'll cause CoW which consume @@ -804,6 +837,7 @@ void dismissObject(robj *o, size_t size_hint) { case OBJ_ZSET: dismissZsetObject(o, size_hint); break; case OBJ_HASH: dismissHashObject(o, size_hint); break; case OBJ_STREAM: dismissStreamObject(o, size_hint); break; + case OBJ_GCRA: dismissGCRAObject(o, size_hint); break; default: break; } #else @@ -925,6 +959,7 @@ size_t getObjectLength(robj *o) { case OBJ_ZSET: return zsetLength(o); case OBJ_HASH: return hashTypeLength(o, 0); case OBJ_STREAM: return streamLength(o); + case OBJ_GCRA: return gcraObjectLength(o); default: return 0; } } @@ -1133,6 +1168,22 @@ int getLongLongFromObject(robj *o, long long *target) { return C_OK; } +int getLongLongFromGCRAObject(robj *o, long long *target) { + long long res; + serverAssertWithInfo(NULL, o, o->type == OBJ_GCRA); + serverAssert(o->encoding == OBJ_ENCODING_INT); +#if UINTPTR_MAX == 0xffffffff + res = *((long long*)o->ptr); +#else + res = (long long)o->ptr; +#endif + if (unlikely(res < 0)) { + serverPanic("Invalid negative GCRA value"); + } + *target = res; + return C_OK; +} + int getLongLongFromObjectOrReply(client *c, robj *o, long long *target, const char *msg) { long long value; if (getLongLongFromObject(o, &value) != C_OK) { @@ -1223,7 +1274,8 @@ size_t kvobjComputeSize(robj *key, kvobj *o, size_t sample_size, int dbid) { o->type == OBJ_SET || o->type == OBJ_ZSET || o->type == OBJ_HASH || - o->type == OBJ_STREAM) + o->type == OBJ_STREAM || + o->type == OBJ_GCRA) { return kvobjAllocSize(o); } else if (o->type == OBJ_MODULE) { @@ -1249,12 +1301,31 @@ size_t kvobjAllocSize(kvobj *o) { } else if (o->type == OBJ_STREAM) { stream *s = o->ptr; asize += s->alloc_size; + } else if (o->type == OBJ_GCRA) { + asize += gcraTypeAllocSize(o); } else if (o->type == OBJ_MODULE) { /* TODO: Provide moduleGetAllocSize() module API for O(1) allocation size retrieval */ } return asize; } +size_t gcraTypeAllocSize(robj *o) { + (void)o; +#if UINTPTR_MAX == 0xffffffff + return sizeof(long long); +#else + /* Same as string with int encoding there is no allocation as the value is + * cast to void* and stored in o->ptr */ + return 0; +#endif +} + +/* The gcra object is a single long long value */ +size_t gcraObjectLength(robj *o) { + (void)o; + return 1; +} + /* Release data obtained with getMemoryOverheadData(). */ void freeMemoryOverheadData(struct redisMemOverhead *mh) { zfree(mh->db); diff --git a/src/object.h b/src/object.h index 1e761175d..6b2591877 100644 --- a/src/object.h +++ b/src/object.h @@ -5,7 +5,7 @@ * values of different logical types (strings, lists, sets, hashes, sorted sets, * streams, modules, ...). It contains: * - type: one of OBJ_STRING, OBJ_LIST, OBJ_SET, OBJ_ZSET, OBJ_HASH, OBJ_STREAM, - * OBJ_MODULE, ... + * OBJ_GCRA, OBJ_MODULE, ... * - encoding: an implementation detail of how the value is represented in * memory for the given type (see OBJ_ENCODING_* below). For example, * strings may be RAW/EMBSTR/INT, sets may be INTSET or HT, etc. @@ -161,6 +161,7 @@ robj *createHashObject(void); robj *createZsetObject(void); robj *createZsetListpackObject(void); robj *createStreamObject(void); +robj *createGCRAObject(long long value); robj *createModuleObject(struct RedisModuleType *mt, void *value); int getLongFromObjectOrReply(struct client *c, robj *o, long *target, const char *msg); int getPositiveLongFromObjectOrReply(struct client *c, robj *o, long *target, const char *msg); @@ -170,6 +171,7 @@ int getLongLongFromObjectOrReply(struct client *c, robj *o, long long *target, c int getDoubleFromObjectOrReply(struct client *c, robj *o, double *target, const char *msg); int getDoubleFromObject(const robj *o, double *target); int getLongLongFromObject(robj *o, long long *target); +int getLongLongFromGCRAObject(robj *o, long long *target); int getLongDoubleFromObject(robj *o, long double *target); int getLongDoubleFromObjectOrReply(struct client *c, robj *o, long double *target, const char *msg); int getIntFromObjectOrReply(struct client *c, robj *o, int *target, const char *msg); @@ -179,6 +181,8 @@ int collateStringObjects(const robj *a, const robj *b); int equalStringObjects(robj *a, robj *b); void trimStringObjectIfNeeded(robj *o, int trim_small_values); size_t kvobjAllocSize(kvobj *o); +size_t gcraTypeAllocSize(robj *o); +size_t gcraObjectLength(robj *o); int objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle, long long lru_clock, int lru_multiplier); diff --git a/src/rdb.c b/src/rdb.c index 52dd686d2..14f865142 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -713,6 +713,8 @@ int rdbSaveObjectType(rio *rdb, robj *o) { serverPanic("Unknown hash encoding"); case OBJ_STREAM: return rdbSaveType(rdb,RDB_TYPE_STREAM_LISTPACKS_5); + case OBJ_GCRA: + return rdbSaveType(rdb,RDB_TYPE_GCRA); case OBJ_MODULE: return rdbSaveType(rdb,RDB_TYPE_MODULE_2); default: @@ -1399,6 +1401,11 @@ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) { /* Save the all-time count of duplicate IIDs detected. */ if ((n = rdbSaveLen(rdb,s->iids_duplicates)) == -1) return -1; nwritten += n; + } else if (o->type == OBJ_GCRA) { + long long t; + getLongLongFromGCRAObject(o, &t); + if ((n = rdbSaveLen(rdb,t)) == -1) return -1; + nwritten += n; } else if (o->type == OBJ_MODULE) { /* Save a module-specific value. */ RedisModuleIO io; @@ -3601,6 +3608,13 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) return NULL; } o = createModuleObject(mt, ptr); + } else if (rdbtype == RDB_TYPE_GCRA) { + uint64_t time = rdbLoadLen(rdb, NULL); + if (time == RDB_LENERR || time > LLONG_MAX) { + rdbReportReadError("Failed loading GCRA TaT value"); + return NULL; + } + o = createGCRAObject((long long)time); } else { rdbReportReadError("Unknown RDB encoding type %d",rdbtype); return NULL; diff --git a/src/rdb.h b/src/rdb.h index 5d92f8430..4898d82af 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -80,10 +80,11 @@ #define RDB_TYPE_HASH_LISTPACK_EX 25 /* Hash LP with HFEs. Attach min TTL at start */ #define RDB_TYPE_STREAM_LISTPACKS_4 26 /* Stream with IDMP support */ #define RDB_TYPE_STREAM_LISTPACKS_5 27 /* Stream with XNACK support (NACKed entries) */ +#define RDB_TYPE_GCRA 28 /* GCRA object */ /* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType(), and rdb_type_string[] */ /* Test if a type is an object type. */ -#define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 27)) +#define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 28)) /* Special RDB opcodes (saved/loaded with rdbSaveType/rdbLoadType). */ #define RDB_OPCODE_KEY_META 243 /* Key metadata (module metadata classes). */ diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c index 4fe226474..eea78290d 100644 --- a/src/redis-check-rdb.c +++ b/src/redis-check-rdb.c @@ -88,6 +88,7 @@ char *rdb_type_string[] = { "hash-listpack-md", "stream-v4", "stream-v5", + "gcra", }; /* Show a few stats collected into 'rdbstate' */ diff --git a/src/redismodule.h b/src/redismodule.h index 71579d3c3..c1040f12f 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -89,6 +89,7 @@ typedef long long ustime_t; #define REDISMODULE_KEYTYPE_ZSET 5 #define REDISMODULE_KEYTYPE_MODULE 6 #define REDISMODULE_KEYTYPE_STREAM 7 +#define REDISMODULE_KEYTYPE_GCRA 8 /* Reply types. */ #define REDISMODULE_REPLY_UNKNOWN -1 @@ -247,11 +248,12 @@ This flag should not be used directly by the module. #define REDISMODULE_NOTIFY_OVERWRITTEN (1<<15) /* o, key overwrite notification */ #define REDISMODULE_NOTIFY_TYPE_CHANGED (1<<16) /* c, key type changed notification */ #define REDISMODULE_NOTIFY_KEY_TRIMMED (1<<17) /* module only key space notification, indicates a key trimmed during slot migration */ +#define REDISMODULE_NOTIFY_RATE_LIMIT (1<<18) /* r, rate limit event */ /* Next notification flag, must be updated when adding new flags above! This flag should not be used directly by the module. * Use RedisModule_GetKeyspaceNotificationFlagsAll instead. */ -#define _REDISMODULE_NOTIFY_NEXT (1<<18) +#define _REDISMODULE_NOTIFY_NEXT (1<<19) #define REDISMODULE_NOTIFY_ALL (REDISMODULE_NOTIFY_GENERIC | REDISMODULE_NOTIFY_STRING | REDISMODULE_NOTIFY_LIST | REDISMODULE_NOTIFY_SET | REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_ZSET | REDISMODULE_NOTIFY_EXPIRED | REDISMODULE_NOTIFY_EVICTED | REDISMODULE_NOTIFY_STREAM | REDISMODULE_NOTIFY_MODULE) /* A */ diff --git a/src/server.h b/src/server.h index 091f7c9ac..33556e3a9 100644 --- a/src/server.h +++ b/src/server.h @@ -287,6 +287,7 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT]; #define ACL_CATEGORY_CONNECTION (1ULL<<18) #define ACL_CATEGORY_TRANSACTION (1ULL<<19) #define ACL_CATEGORY_SCRIPTING (1ULL<<20) +#define ACL_CATEGORY_RATE_LIMIT (1ULL<<21) /* Key-spec flags * * -------------- */ @@ -795,6 +796,7 @@ typedef enum { #define NOTIFY_OVERWRITTEN (1<<15) /* o, key overwrite notification (Note: excluded from NOTIFY_ALL) */ #define NOTIFY_TYPE_CHANGED (1<<16) /* c, key type changed notification (Note: excluded from NOTIFY_ALL) */ #define NOTIFY_KEY_TRIMMED (1<<17) /* module only key space notification, indicates a key trimmed during slot migration */ +#define NOTIFY_RATE_LIMIT (1<<18) /* r, notify rate limit event (Note: excluded from NOTIFY_ALL)*/ #define NOTIFY_ALL (NOTIFY_GENERIC | NOTIFY_STRING | NOTIFY_LIST | NOTIFY_SET | NOTIFY_HASH | NOTIFY_ZSET | NOTIFY_EXPIRED | NOTIFY_EVICTED | NOTIFY_STREAM | NOTIFY_MODULE) /* A flag */ /* Using the following macro you can run code inside serverCron() with the @@ -859,7 +861,17 @@ typedef enum { * encoding version. */ #define OBJ_MODULE 5 /* Module object. */ #define OBJ_STREAM 6 /* Stream object. */ -#define OBJ_TYPE_MAX 7 /* Maximum number of object types */ +#define OBJ_GCRA 7 /* GCRA object. */ +#define OBJ_TYPE_MAX 8 /* Maximum number of object types */ + +/* NOTE: adding a new object requires changes in the following places: + * - rdb.c - save/load (also bump RDB_VERSION if needed) + * - aof.c - rewrite + * - db.c - obj_type_name, copyCommand + * - debug.c - xorObjectDigest, serverLogObjectDebugInfo + * - defrag.c - defragKey + * - module.c - RM_KeyType (and add the new keytype to redismodule.h) + * - object.c - object(create/free/dismiss/allocSize/Length) */ /* Extract encver / signature from a module type ID. */ #define REDISMODULE_TYPE_ENCVER_BITS 10 @@ -2771,6 +2783,7 @@ typedef enum { COMMAND_GROUP_STREAM, COMMAND_GROUP_BITMAP, COMMAND_GROUP_MODULE, + COMMAND_GROUP_RATE_LIMIT, } redisCommandGroup; typedef void redisCommandProc(client *c); @@ -3597,6 +3610,9 @@ int zzlLexValueLteMax(unsigned char *p, zlexrangespec *spec); int zslLexValueGteMin(sds value, zlexrangespec *spec); int zslLexValueLteMax(sds value, zlexrangespec *spec); +/* gcra related */ +robj *gcraDup(robj *o); + /* Core functions */ int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level); void updatePeakMemory(void); @@ -4469,6 +4485,7 @@ void resetCommand(client *c); void failoverCommand(client *c); void digestCommand(client *c); void gcraCommand(client *c); +void gcraSetValueCommand(client *c); #if defined(__GNUC__) void *calloc(size_t count, size_t size) __attribute__ ((deprecated)); diff --git a/tests/unit/gcra.tcl b/tests/unit/gcra.tcl index e11101484..b012a0fc4 100644 --- a/tests/unit/gcra.tcl +++ b/tests/unit/gcra.tcl @@ -229,6 +229,103 @@ start_server {tags {"gcra" "external:skip"}} { } } +start_server {tags {"gcra" "external:skip"}} { + test {GCRA - RDB save and reload preserves value} { + r del mykey + r gcra mykey 5 1 60 + r gcra mykey 5 1 60 + + set dump_before [r dump mykey] + + r debug reload + + assert_equal [r type mykey] "gcra" + set dump_after [r dump mykey] + assert_equal $dump_before $dump_after + } {} {needs:debug} + + test {GCRA - RDB save and reload preserves TTL} { + r del mykey + r gcra mykey 5 1 60 + set ttl_before [r pexpiretime mykey] + assert_morethan $ttl_before 0 + + r debug reload + + set ttl_after [r pexpiretime mykey] + assert_morethan $ttl_after 0 + assert_equal $ttl_after $ttl_before + } {} {needs:debug} + + test {GCRA - DUMP and RESTORE roundtrip} { + r del mykey mykey2 + r gcra mykey 5 1 60 + r gcra mykey 5 1 60 + + set dump [r dump mykey] + set ttl [r pttl mykey] + r restore mykey2 $ttl $dump + + assert_equal [r type mykey2] "gcra" + + set result_orig [r gcra mykey 5 1 60] + set result_restored [r gcra mykey2 5 1 60] + assert_equal [lindex $result_orig 2] [lindex $result_restored 2] + } + + test {GCRA - AOF rewrite preserves value} { + r del mykey + r config set appendonly yes + waitForBgrewriteaof r + + r gcra mykey 5 1 60 + r gcra mykey 5 1 60 + + set dump_before [r dump mykey] + + r BGREWRITEAOF + waitForBgrewriteaof r + r debug reload + + assert_equal [r type mykey] "gcra" + set dump_after [r dump mykey] + assert_equal $dump_before $dump_after + } {} {external:skip needs:debug} + + test {GCRA - AOF rewrite preserves TTL} { + r del mykey + r config set appendonly yes + waitForBgrewriteaof r + + r gcra mykey 5 1 60 + + r BGREWRITEAOF + waitForBgrewriteaof r + + set ttl_before [r pttl mykey] + assert {$ttl_before > 0} + + r debug reload + + set ttl_after [r pttl mykey] + assert {$ttl_after > 0} + assert {$ttl_after <= $ttl_before} + } {} {external:skip needs:debug} + + test {GCRA - DEBUG DIGEST consistent after RDB reload} { + r del mykey + r gcra mykey 5 1 60 + r gcra mykey 5 1 60 + + set digest_before [r debug digest] + + r debug reload + + set digest_after [r debug digest] + assert_equal $digest_before $digest_after + } {} {needs:debug} +} + start_server {tags {"gcra repl" "external:skip"}} { set replica [srv 0 client] set replica_host [srv 0 host] @@ -240,27 +337,26 @@ start_server {tags {"gcra repl" "external:skip"}} { set master_host [srv 0 host] set master_port [srv 0 port] - $master flushdb - $replica flushdb + test {GCRA - Replication works} { + $master flushdb + $replica flushdb - $replica replicaof $master_host $master_port - wait_for_condition 100 100 { - [s -1 master_link_status] eq "up" - } else { - fail "Master <-> Replica didn't finish sync" - } + $replica replicaof $master_host $master_port + wait_for_condition 100 100 { + [s -1 master_link_status] eq "up" + } else { + fail "Master <-> Replica didn't finish sync" + } - set cmdinfo [$replica info commandstats] - assert_equal [lsearch -glob $cmdinfo "cmdstat_gcra:*"] -1 - assert_equal [lsearch -glob $cmdinfo "cmdstat_set:*"] -1 + set cmdinfo [$replica info commandstats] + assert_equal [lsearch -glob $cmdinfo "cmdstat_gcrasetvalue:*"] -1 - $master del mykey - $master gcra mykey 2 1 1000 TOKENS 2 + $master del mykey + $master gcra mykey 2 1 1000 TOKENS 2 + wait_for_ofs_sync $master $replica - wait_for_ofs_sync $master $replica - - set cmdinfo [$replica info commandstats] - assert_equal [lsearch -glob $cmdinfo "cmdstat_gcra:*"] -1 - assert_morethan_equal [lsearch -glob $cmdinfo "cmdstat_set:*"] 0 + set cmdinfo [$replica info commandstats] + assert_morethan_equal [lsearch -glob $cmdinfo "cmdstat_gcrasetvalue:*"] 0 + } {} {external:skip} } } diff --git a/utils/generate-command-code.py b/utils/generate-command-code.py index 76c8c3b15..8a25039ad 100755 --- a/utils/generate-command-code.py +++ b/utils/generate-command-code.py @@ -34,6 +34,7 @@ GROUPS = { "geo": "COMMAND_GROUP_GEO", "stream": "COMMAND_GROUP_STREAM", "bitmap": "COMMAND_GROUP_BITMAP", + "rate_limit": "COMMAND_GROUP_RATE_LIMIT", } @@ -602,7 +603,8 @@ const char *COMMAND_GROUP_STR[] = { "geo", "stream", "bitmap", - "module" + "module", + "rate_limit" }; const char *commandGroupStr(int index) { From eb74450fcacd205b7e1e5a95df0ec3622914def1 Mon Sep 17 00:00:00 2001 From: Ozan Tezcan Date: Thu, 16 Apr 2026 12:13:01 +0300 Subject: [PATCH 19/32] Log node address when ASM starts (#15056) Log source/destination address on import/migrate start events for easier debugging. --- src/cluster_asm.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/cluster_asm.c b/src/cluster_asm.c index 13e0fd44e..01a071167 100644 --- a/src/cluster_asm.c +++ b/src/cluster_asm.c @@ -1057,13 +1057,27 @@ void clusterMigrationCommand(client *c) { } } +/* Returns the address of the node in the format "ip:port". */ +static const char *getNodeAddressStr(const char *node_id, int len) { + serverAssert(node_id != NULL); + static char buf[NET_HOST_PORT_STR_LEN]; + + clusterNode *n = clusterLookupNode(node_id, len); + char *ip = n ? clusterNodeIp(n) : "?"; + int port = n ? (server.tls_replication ? clusterNodeTlsPort(n) : + clusterNodeTcpPort(n)) : 0; + formatAddr(buf, sizeof(buf), ip, port); + return buf; +} + /* Log a human-readable message for ASM task lifecycle events. */ void asmLogTaskEvent(asmTask *task, int event) { sds str = slotRangeArrayToString(task->slots); switch (event) { case ASM_EVENT_IMPORT_STARTED: - serverLog(LL_NOTICE, "Import task %s started for slots: %s", task->id, str); + serverLog(LL_NOTICE, "Import task %s started for slots: %s, source address: %s", + task->id, str, getNodeAddressStr(task->source, CLUSTER_NAMELEN)); break; case ASM_EVENT_IMPORT_FAILED: serverLog(LL_NOTICE, "Import task %s failed for slots: %s", task->id, str); @@ -1076,8 +1090,8 @@ void asmLogTaskEvent(asmTask *task, int event) { task->id, str, getKeyCountInSlotRangeArray(task->slots)); break; case ASM_EVENT_MIGRATE_STARTED: - serverLog(LL_NOTICE, "Migrate task %s started for slots: %s (number of keys at start: %llu)", - task->id, str, getKeyCountInSlotRangeArray(task->slots)); + serverLog(LL_NOTICE, "Migrate task %s started for slots: %s, destination address: %s, (number of keys at start: %llu)", + task->id, str, getNodeAddressStr(task->dest, CLUSTER_NAMELEN), getKeyCountInSlotRangeArray(task->slots)); break; case ASM_EVENT_MIGRATE_FAILED: serverLog(LL_NOTICE, "Migrate task %s failed for slots: %s", task->id, str); From fa6d4c3d63cf84299167fdd89eb40d9cf8a64a0f Mon Sep 17 00:00:00 2001 From: Moti Cohen Date: Thu, 16 Apr 2026 13:16:52 +0300 Subject: [PATCH 20/32] Fix SIGABRT in HSETEX when a field appears twice in the FIELDS list (#14956) HSETEX crashed on assert() with a SIGABRT when the same field appeared more than once in the FIELDS list and an expiry time was given (EX/PX/EXAT/PXAT). Root cause: hfieldPersist() and the KEEP_TTL path in hashTypeSet() both asserted that dictExpireMeta->expireMeta.trash == 0, meaning the hash must be globally registered in the HFE DS. This is incorrect during HSETEX execution because hashTypeSetExDone(), which registers the hash globally and clears trash, called only at the end of flow. The private per-field ebuckets are fully valid regardless of the global registration state. Fix: Remove both incorrect assertions. The operations on the private ebuckets (ebRemove in hfieldPersist, ebAdd in the KEEP_TTL path) are correct and do not require the hash to be globally registered. Tests: Added two regression tests covering the crash scenarios: - HSETEX EX with a duplicate field (existing field, expiry given) - HSETEX FNX EX with a duplicate field (no prior field, FNX condition passes) --- src/t_hash.c | 4 ---- tests/unit/type/hash-field-expire.tcl | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index e258eb71f..5ea456597 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -1022,7 +1022,6 @@ int hashTypeSet(redisDb *db, kvobj *o, sds field, sds value, int flags) { if (newExpireAt != EB_EXPIRE_TIME_INVALID) { dict *d = o->ptr; htMetadataEx *dictExpireMeta = htGetMetadataEx(d); - serverAssert(dictExpireMeta->expireMeta.trash == 0); ebAdd(&dictExpireMeta->hfe, &hashFieldExpireBucketsType, newEntry, newExpireAt); } @@ -3478,9 +3477,6 @@ static void hfieldPersist(robj *hashObj, Entry *entry) { dict *d = hashObj->ptr; htMetadataEx *dictExpireMeta = htGetMetadataEx(d); - /* If field has valid expiry then dict must have valid metadata as well */ - serverAssert(dictExpireMeta->expireMeta.trash == 0); - /* Remove field from private HFE DS */ ebRemove(&dictExpireMeta->hfe, &hashFieldExpireBucketsType, entry); diff --git a/tests/unit/type/hash-field-expire.tcl b/tests/unit/type/hash-field-expire.tcl index e1ba72019..402a9ad72 100644 --- a/tests/unit/type/hash-field-expire.tcl +++ b/tests/unit/type/hash-field-expire.tcl @@ -1277,6 +1277,24 @@ start_server {tags {"external:skip needs:debug"}} { assert_range [r hpttl myhash FIELDS 1 f3] 4500 5000 } + test "HSETEX EX - field appears twice in FIELDS list with EX is allowed ($type)" { + # The EX condition passes, so all fields must be set, and the last value wins. + r del myhash + r hset myhash f1 v1 + r hsetex myhash EX 100 FIELDS 2 f1 new1 f1 new2 + # Last value wins (same as plain HSET behavior with duplicate fields) + assert_equal "new2" [r hget myhash f1] + assert_range [r httl myhash FIELDS 1 f1] 80 100 + } + + test "HSETEX FNX - field appears twice in FIELDS list with EX is allowed ($type)" { + # The FNX condition passes, so all fields must be set, and the last value wins. + r del myhash + r hsetex myhash FNX EX 100 FIELDS 2 f1 new1 f1 new2 + assert_equal "new2" [r hget myhash f1] + assert_range [r httl myhash FIELDS 1 f1] 80 100 + } + test "HSETEX - Test 'EX' flag ($type)" { r del myhash r hset myhash f1 v1 f2 v2 From 6339fd739e480f9a1318efb5f24e1999ab8537df Mon Sep 17 00:00:00 2001 From: Aviv David <40210928+AvivDavid23@users.noreply.github.com> Date: Thu, 16 Apr 2026 13:43:10 +0300 Subject: [PATCH 21/32] DataTypes update 8.8 RC1 (#15036) --- modules/redisbloom/Makefile | 2 +- modules/redisjson/Makefile | 2 +- modules/redistimeseries/Makefile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/redisbloom/Makefile b/modules/redisbloom/Makefile index 1e113b400..f40cc7c1f 100644 --- a/modules/redisbloom/Makefile +++ b/modules/redisbloom/Makefile @@ -1,5 +1,5 @@ SRC_DIR = src -MODULE_VERSION = v8.7.80 +MODULE_VERSION = v8.7.90 MODULE_REPO = https://github.com/redisbloom/redisbloom TARGET_MODULE = $(SRC_DIR)/bin/$(FULL_VARIANT)/redisbloom.so diff --git a/modules/redisjson/Makefile b/modules/redisjson/Makefile index 46ee46b88..4d13ed7bc 100644 --- a/modules/redisjson/Makefile +++ b/modules/redisjson/Makefile @@ -1,5 +1,5 @@ SRC_DIR = src -MODULE_VERSION = v8.7.80 +MODULE_VERSION = v8.7.90 MODULE_REPO = https://github.com/redisjson/redisjson TARGET_MODULE = $(SRC_DIR)/bin/$(FULL_VARIANT)/rejson.so diff --git a/modules/redistimeseries/Makefile b/modules/redistimeseries/Makefile index 030e73aaf..1bd8b46ca 100644 --- a/modules/redistimeseries/Makefile +++ b/modules/redistimeseries/Makefile @@ -1,5 +1,5 @@ SRC_DIR = src -MODULE_VERSION = v8.7.80 +MODULE_VERSION = v8.7.90 MODULE_REPO = https://github.com/redistimeseries/redistimeseries TARGET_MODULE = $(SRC_DIR)/bin/$(FULL_VARIANT)/redistimeseries.so From ca6e471a3fe69c7b0af04c7fb6827c6cd88b5a6e Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Thu, 16 Apr 2026 21:50:49 +0800 Subject: [PATCH 22/32] Fix decrRefCount on NULL robj on corrupt KEY_META payload (#15034) ## Summary This PR fixes two issues when processing corrupt data in rdbLoadCheckModuleValue(): 1. When handling `RDB_MODULE_OPCODE_STRING` opcode, rdbGenericLoadStringObject() can return NULL on a corrupt payload. The code called decrRefCount(o) unconditionally without a NULL check, resulting in a NULL pointer dereference crash. 2. The while loop condition was `!= RDB_MODULE_OPCODE_EOF`, which means a truncated payload (causing rdbLoadLen to return RDB_LENERR) would never exit the loop, since `RDB_LENERR != RDB_MODULE_OPCODE_EOF` is always true, potentially causing an infinite hang. --- src/keymeta.c | 2 +- src/rdb.c | 27 ++++++++++++++++++++++----- src/rdb.h | 2 +- src/redis-check-rdb.c | 4 ++-- tests/integration/corrupt-dump.tcl | 9 +++++++++ 5 files changed, 35 insertions(+), 9 deletions(-) diff --git a/src/keymeta.c b/src/keymeta.c index e4430da2c..fba77a2d6 100644 --- a/src/keymeta.c +++ b/src/keymeta.c @@ -416,7 +416,7 @@ int rdbLoadSkipMetaIfAllowed(rio *rdb, char *cname, int flags) { * * Note: rdbLoadCheckModuleValue() reads opcodes until it finds RDB_MODULE_OPCODE_EOF, * so it consumes the EOF marker as well. We don't need to read it separately. */ - robj *dummy = rdbLoadCheckModuleValue(rdb, cname); + robj *dummy = rdbLoadCheckModuleValue(rdb, cname, 1); if (dummy == NULL) { serverLog(LL_WARNING, "Corrupted metadata value for class '%s'", cname); return -1; diff --git a/src/rdb.c b/src/rdb.c index 14f865142..222314a2c 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -2002,11 +2002,18 @@ void rdbRemoveTempFile(pid_t childpid, int from_signal) { /* This function is called by rdbLoadObject() when the code is in RDB-check * mode and we find a module value of type 2 that can be parsed without - * the need of the actual module. The value is parsed for errors, finally - * a dummy redis object is returned just to conform to the API. */ -robj *rdbLoadCheckModuleValue(rio *rdb, char *modulename) { + * the need of the actual module. The value is parsed for errors. + * If null_on_error is true, NULL is returned when data corruption is detected; + * otherwise a dummy redis object is always returned regardless of success or + * failure. */ +robj *rdbLoadCheckModuleValue(rio *rdb, char *modulename, int null_on_error) { uint64_t opcode; while((opcode = rdbLoadLen(rdb,NULL)) != RDB_MODULE_OPCODE_EOF) { + if (opcode == RDB_LENERR) { + rdbReportCorruptRDB("Error reading module opcode length from module %s value", modulename); + goto error; + } + if (opcode == RDB_MODULE_OPCODE_SINT || opcode == RDB_MODULE_OPCODE_UINT) { @@ -2014,12 +2021,14 @@ robj *rdbLoadCheckModuleValue(rio *rdb, char *modulename) { if (rdbLoadLenByRef(rdb,NULL,&len) == -1) { rdbReportCorruptRDB( "Error reading integer from module %s value", modulename); + goto error; } } else if (opcode == RDB_MODULE_OPCODE_STRING) { robj *o = rdbGenericLoadStringObject(rdb,RDB_LOAD_NONE,NULL); if (o == NULL) { rdbReportCorruptRDB( "Error reading string from module %s value", modulename); + goto error; } decrRefCount(o); } else if (opcode == RDB_MODULE_OPCODE_FLOAT) { @@ -2027,16 +2036,24 @@ robj *rdbLoadCheckModuleValue(rio *rdb, char *modulename) { if (rdbLoadBinaryFloatValue(rdb,&val) == -1) { rdbReportCorruptRDB( "Error reading float from module %s value", modulename); + goto error; } } else if (opcode == RDB_MODULE_OPCODE_DOUBLE) { double val; if (rdbLoadBinaryDoubleValue(rdb,&val) == -1) { rdbReportCorruptRDB( "Error reading double from module %s value", modulename); + goto error; } + } else { + rdbReportCorruptRDB( + "Unknown module opcode %llu reading module %s value", (unsigned long long)opcode, modulename); + goto error; } } return createStringObject("module-dummy-value",18); +error: + return null_on_error ? NULL : createStringObject("module-dummy-value",18); } /* Load object type and optional key metadata (into `keymeta`) from RDB stream. @@ -3561,7 +3578,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) if (rdbCheckMode) { char name[10]; moduleTypeNameByID(name,moduleid); - return rdbLoadCheckModuleValue(rdb,name); + return rdbLoadCheckModuleValue(rdb, name, 0); } if (mt == NULL) { @@ -4020,7 +4037,7 @@ int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadin continue; } else { /* RDB check mode. */ - robj *aux = rdbLoadCheckModuleValue(rdb,name); + robj *aux = rdbLoadCheckModuleValue(rdb, name, 0); decrRefCount(aux); continue; /* Read next opcode. */ } diff --git a/src/rdb.h b/src/rdb.h index 4898d82af..f1ea72150 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -152,7 +152,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error); void backgroundSaveDoneHandler(int exitcode, int bysignal); int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime,int dbid); ssize_t rdbSaveSingleModuleAux(rio *rdb, int when, moduleType *mt); -robj *rdbLoadCheckModuleValue(rio *rdb, char *modulename); +robj *rdbLoadCheckModuleValue(rio *rdb, char *modulename, int null_on_error); int rdbResolveKeyType(rio *rdb, int *type, int dbid, KeyMetaSpec *keymeta); robj *rdbLoadStringObject(rio *rdb); ssize_t rdbSaveStringObject(rio *rdb, robj *obj); diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c index eea78290d..1bbebb691 100644 --- a/src/redis-check-rdb.c +++ b/src/redis-check-rdb.c @@ -256,7 +256,7 @@ int redis_check_rdb(char *rdbfilename, FILE *fp) { uint32_t classSpec; if (rioRead(&rdb, &classSpec, 4) == 0) goto eoferr; /* Skip module value using rdbLoadCheckModuleValue */ - robj *o = rdbLoadCheckModuleValue(&rdb, "metadata"); + robj *o = rdbLoadCheckModuleValue(&rdb, "metadata", 1); if (o == NULL) goto eoferr; decrRefCount(o); } @@ -326,7 +326,7 @@ int redis_check_rdb(char *rdbfilename, FILE *fp) { moduleTypeNameByID(name,moduleid); rdbCheckInfo("MODULE AUX for: %s", name); - robj *o = rdbLoadCheckModuleValue(&rdb,name); + robj *o = rdbLoadCheckModuleValue(&rdb, name, 0); decrRefCount(o); continue; /* Read type again. */ } else if (type == RDB_OPCODE_FUNCTION_PRE_GA) { diff --git a/tests/integration/corrupt-dump.tcl b/tests/integration/corrupt-dump.tcl index 59c7c8b3d..412d8a018 100644 --- a/tests/integration/corrupt-dump.tcl +++ b/tests/integration/corrupt-dump.tcl @@ -989,6 +989,15 @@ test {corrupt payload: fuzzer findings - vector sets with wrong encoding} { } } +test {corrupt payload: fuzzer findings - decrRefCount on NULL robj on corrupt KEY_META payload} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + catch {r restore key 0 "\xF3\x02\x01\x0D\x00\x54\x23\x3F\xC9\x82\x32\x05\x8D" replace} err + assert_match "*Bad data format*" $err + r ping + } +} } ;# tags From 47575618616b6419562356b0cfbd4b17bb443bdd Mon Sep 17 00:00:00 2001 From: Yuan Wang Date: Fri, 17 Apr 2026 13:39:04 +0800 Subject: [PATCH 23/32] Subkey notification for hash fields (#14958) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Motivation Redis's existing keyspace notification system operates at the **key level** only — when a hash field is modified via `HSET`, `HDEL`, or `HEXPIRE`, the subscriber receives the key name and the event type, but not **which fields** were affected, therefore, these notifications has very little practical value. This PR introduces a subkey notification system that extends keyspace events to include field-level (subkey) details for hash operations, through both Pub/Sub channels and the Module API. ## New Pub/Sub Notification Channels Four new channels are added: |Channel Format | Payload | |---------------|---------| | `__subkeyspace@__:` | `\|:[,...]` | |`__subkeyevent@__:` | `:\|:[,...]` | | `__subkeyspaceitem@__:\n` | `` | |`__subkeyspaceevent@__:\|` | `:[,...]` | **Design rationale for 4 channels:** - **Subkeyspace**: Subscribe to a specific key, receive all field changes in a single message — efficient for key-centric consumers. - **Subkeyevent**: Subscribe to a specific event type, receive key+fields — efficient for event-centric consumers. - **Subkeyspaceitem**: Subscribe to a specific key+field combination — the most selective, one message per field, no parsing needed. - **Subkeyspaceevent**: Subscribe to event+key combination, receiving only the affected fields — server-side filtering on both dimensions. Subkeys are encoded in a length-prefixed format (`:`) to support binary-safe field names containing delimiters. **Safety guards:** - Events containing `|` are skipped for `__subkeyspace` and `__subkeyspaceevent ` channels (to avoid parsing ambiguity). - Keys containing `\n` are skipped for the `__subkeyspaceitem` channel (newline is the key/subkey separator). - Subkeys channels are only published when `subkeys != NULL && count > 0`. ## Hash Command Integration The following hash operations now emit subkey level notifications with the affected field names: | Command | Event | Subkeys | |---------|-------|---------| | `HSET` / `HMSET` | `hset` | All fields being set | | `HSETNX` | `hset` | The field (if set) | | `HDEL` | `hdel` | All fields deleted | | `HGETDEL` | `hdel` / `hexpired` | Deleted or lazily expired fields | | `HGETEX` | `hexpire` / `hpersist` / `hdel` / `hexpired` | Affected fields per event | | `HINCRBY` | `hincrby` | The field | | `HINCRBYFLOAT` | `hincrbyfloat` | The field | | `HEXPIRE` / `HPEXPIRE` / `HEXPIREAT` / `HPEXPIREAT` | `hexpire` | Updated fields | | `HPERSIST` | `hpersist` | Persisted fields | | `HSETEX` | `hset` / `hdel` / `hexpire` / `hexpired` | Affected fields per event | | Field expiration (active/lazy) | `hexpired` | All expired fields (batched) | For field expiration, expired fields are collected into a dynamic array and sent as a single batched notification after the expiration loop, rather than one notification per field. ## Module API Three new APIs and one new callback type: ```c /* Function pointer type for keyspace event notifications with subkeys from modules. */ typedef void (*RedisModuleNotificationWithSubkeysFunc)( RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key, RedisModuleString **subkeys, int count); /* Subscribe to keyspace notifications with subkey information. * * This is the extended version of RM_SubscribeToKeyspaceEvents. When subkeys * are available, the `subkeys` array and `count` are passed to the callback. * `subkeys` contains only the names of affected subkeys (values are not included), * and `count` is the number of elements. The array may contain duplicates when * the same subkey appears more than once in a command (e.g. HSET key f1 v1 f1 v2 * produces subkeys=["f1","f1"], count=2). When no subkeys are present, `subkeys` * will be NULL and `count` will be 0. Whether events without subkeys are delivered * depends on the `flags` parameter (see below). * * `types` is a bit mask of event types the module is interested in * (using the same REDISMODULE_NOTIFY_* flags as RM_SubscribeToKeyspaceEvents). * * `flags` controls delivery filtering: * - REDISMODULE_NOTIFY_FLAG_NONE: The callback is invoked for all matching * events regardless of whether subkeys are present, so a separate * RM_SubscribeToKeyspaceEvents registration can be omitted. * - REDISMODULE_NOTIFY_FLAG_SUBKEYS_REQUIRED: The callback is only invoked * when subkeys are not empty. Events without subkey information (e.g. SET, * EXPIRE, DEL) are skipped. * * The callback signature is: * void callback(RedisModuleCtx *ctx, int type, const char *event, * RedisModuleString *key, RedisModuleString **subkeys, int count); * * The subkeys array and its contents are only valid during the callback. * The underlying objects may be stack-allocated or temporary, so * RM_RetainString must NOT be used on them. To keep a subkey beyond * the callback (e.g. in a RM_AddPostNotificationJob callback), use * RM_HoldString (which handles static objects by copying) or * RM_CreateStringFromString to make a deep copy before returning. */ int RM_SubscribeToKeyspaceEventsWithSubkeys(RedisModuleCtx *ctx, int types, int flags, RedisModuleNotificationWithSubkeysFunc callback); /* Unregister a module's callback from keyspace notifications with subkeys * for specific event types. * * This function removes a previously registered subscription identified by * the event mask, delivery flags, and the callback function. * * Parameters: * - ctx: The RedisModuleCtx associated with the calling module. * - types: The event mask representing the notification types to unsubscribe from. * - flags: The delivery flags that were used during registration. * - callback: The callback function pointer that was originally registered. * * Returns: * - REDISMODULE_OK on successful removal of the subscription. * - REDISMODULE_ERR if no matching subscription was found. */ int RM_UnsubscribeFromKeyspaceEventsWithSubkeys( RedisModuleCtx *ctx, int types, int flags, RedisModuleNotificationWithSubkeysFunc cb); /* Like RM_NotifyKeyspaceEvent, but also triggers subkey-level notifications * when subkeys are provided. Both key-level (keyspace/keyevent) and * subkey-level (subkeyspace/subkeyevent/subkeyspaceitem/subkeyspaceevent) * channels are published to, depending on the server configuration. * * This is the extended version of RM_NotifyKeyspaceEvent and can actually * replace it. When called with subkeys=NULL and count=0, it behaves * identically to RM_NotifyKeyspaceEvent. */ int RM_NotifyKeyspaceEventWithSubkeys( RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key, RedisModuleString **subkeys, int count); ``` ## Configuration Subkey notifications are controlled via the existing `notify-keyspace-events` configuration string with four new characters: `notify-keyspace-events` "STIV" **S** -> Subkeyspace events, published with `__subkeyspace@__:` prefix. **T** -> Subkeyevent events, published with `__subkeyevent@__:` prefix. **I** -> Subkeyspaceitem events, published per subkey with `__subkeyspaceitem@__:\n` prefix. **V** -> Subkeyspaceevent events, published with `__subkeyspaceevent@__:|` prefix. These flags are **independent** from the existing key-level flags (`K`, `E`, etc.). Enabling subkey notifications does **not** implicitly enable or depend on keyspace/keyevent notifications, and vice versa. ## Known Limitations - **Duplicate fields in subkey notifications**: Subkey notification payloads may contain duplicate field names when the same field is affected more than once within a single command. Since duplicate fields are not the common case and deduplication would introduce significant overhead on every notification, we chose not to deduplicate at this time. - **Subkey is sds encoding object**: We assume the subkey is sds encoding object, and access it by `subkey->ptr`, and there is an assert, redis will crash if not. --- redis.conf | 6 + src/cluster.c | 2 +- src/cluster_asm.c | 2 +- src/config.c | 2 +- src/module.c | 172 ++++++++++- src/notify.c | 171 ++++++++++- src/rdb.c | 2 +- src/redismodule.h | 19 +- src/server.h | 11 +- src/t_hash.c | 318 ++++++++++++++------ src/vector.c | 12 +- src/vector.h | 11 +- tests/modules/keyspace_events.c | 131 +++++++++ tests/unit/moduleapi/keyspace_events.tcl | 165 +++++++++++ tests/unit/pubsub.tcl | 359 ++++++++++++++++++++++- 15 files changed, 1256 insertions(+), 127 deletions(-) diff --git a/redis.conf b/redis.conf index 845be292f..6de8c4d4a 100644 --- a/redis.conf +++ b/redis.conf @@ -2040,6 +2040,12 @@ latency-monitor-threshold 0 # c Type-changed events generated every time a key's type changes # (Note: not included in the 'A' class) # r rate limit event +# S Subkeyspace events, published with __subkeyspace@__: prefix. +# T Subkeyevent events, published with __subkeyevent@__: prefix. +# I Subkeyspaceitem events, published per subkey with +# __subkeyspaceitem@__:\n prefix. +# V Subkeyspaceevent events, published with +# __subkeyspaceevent@__:| prefix. # A Alias for g$lshzxetd, so that the "AKE" string means all the events # except key-miss, new key, overwritten, type-changed and rate-limit. # diff --git a/src/cluster.c b/src/cluster.c index b831c203a..98bb0ebda 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1741,7 +1741,7 @@ unsigned int clusterDelKeysInSlot(unsigned int hashslot, int by_command) { * just moved to another node. The modules needs to know that these * keys are no longer available locally, so just send the keyspace * notification to the modules, but not to clients. */ - moduleNotifyKeyspaceEvent(NOTIFY_GENERIC, "del", key, server.db[0].id); + moduleNotifyKeyspaceEvent(NOTIFY_GENERIC, "del", key, server.db[0].id, NULL, 0); } exitExecutionUnit(); postExecutionUnitOperations(); diff --git a/src/cluster_asm.c b/src/cluster_asm.c index 01a071167..78d90919c 100644 --- a/src/cluster_asm.c +++ b/src/cluster_asm.c @@ -3662,7 +3662,7 @@ void asmActiveTrimDeleteKey(redisDb *db, robj *keyobj, int migration_cleanup) { * to another node. The modules need to know that these keys are no longer * available locally, so just send the keyspace notification to the modules, * but not to clients. */ - moduleNotifyKeyspaceEvent(NOTIFY_KEY_TRIMMED, "key_trimmed", keyobj, db->id); + moduleNotifyKeyspaceEvent(NOTIFY_KEY_TRIMMED, "key_trimmed", keyobj, db->id, NULL, 0); } else { /* Not a migration cleanup, the key is really deleted from the database, * need to notify the clients. */ diff --git a/src/config.c b/src/config.c index 0ad28ef5b..0062708bd 100644 --- a/src/config.c +++ b/src/config.c @@ -2917,7 +2917,7 @@ static int setConfigNotifyKeyspaceEventsOption(standardConfig *config, sds *argv } int flags = keyspaceEventsStringToFlags(argv[0]); if (flags == -1) { - *err = "Invalid event class character. Use 'Ag$lshzxeKEtmdnocr'."; + *err = "Invalid event class character. Use 'Ag$lshzxeKEtmdnocrSTIV'."; return 0; } server.notify_keyspace_events = flags; diff --git a/src/module.c b/src/module.c index d4a857a2e..e69c4f490 100644 --- a/src/module.c +++ b/src/module.c @@ -303,6 +303,9 @@ static pthread_mutex_t moduleGIL = PTHREAD_MUTEX_INITIALIZER; /* Function pointer type for keyspace event notification subscriptions from modules. */ typedef int (*RedisModuleNotificationFunc) (RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key); +/* Function pointer type for keyspace event notifications with subkeys from modules. */ +typedef void (*RedisModuleNotificationWithSubkeysFunc)(RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key, RedisModuleString **subkeys, int count); + /* Function pointer type for post jobs */ typedef void (*RedisModulePostNotificationJobFunc) (RedisModuleCtx *ctx, void *pd); @@ -313,8 +316,12 @@ typedef struct RedisModuleKeyspaceSubscriber { RedisModule *module; /* Notification callback in the module*/ RedisModuleNotificationFunc notify_callback; + /* Extended notification callback with subkeys */ + RedisModuleNotificationWithSubkeysFunc notify_callback_with_subkeys; /* A bit mask of the events the module is interested in */ int event_mask; + /* Delivery flags for subkey notifications, controlling when the callback is invoked. */ + int flags; /* Active flag set on entry, to avoid reentrant subscribers * calling themselves */ int active; @@ -332,6 +339,11 @@ typedef struct RedisModulePostExecUnitJob { /* The module keyspace notification subscribers list */ static list *moduleKeyspaceSubscribers; +/* Cached event types that have at least one subscriber. + * Updated on subscribe/unsubscribe to avoid traversing the list on every event. */ +static int moduleKeyspaceSubscribersTypes = 0; +static int moduleKeyspaceSubscribersWithSubkeysTypes = 0; + /* The module post keyspace jobs list */ static list *modulePostExecUnitJobs; @@ -783,6 +795,23 @@ int moduleDelKeyIfEmpty(RedisModuleKey *key) { } } +/* Update the cached subscriber types by walking the subscriber list. + * Called after subscribe/unsubscribe operations. */ +static void moduleUpdateKeyspaceSubscribersTypes(void) { + int mask = 0, subkeys_mask = 0; + listIter li; + listNode *ln; + listRewind(moduleKeyspaceSubscribers,&li); + while((ln = listNext(&li))) { + RedisModuleKeyspaceSubscriber *sub = ln->value; + mask |= sub->event_mask; + if (sub->notify_callback_with_subkeys) + subkeys_mask |= sub->event_mask; + } + moduleKeyspaceSubscribersTypes = mask; + moduleKeyspaceSubscribersWithSubkeysTypes = subkeys_mask; +} + /* -------------------------------------------------------------------------- * Service API exported to modules * @@ -9250,10 +9279,13 @@ int RM_SubscribeToKeyspaceEvents(RedisModuleCtx *ctx, int types, RedisModuleNoti RedisModuleKeyspaceSubscriber *sub = zmalloc(sizeof(*sub)); sub->module = ctx->module; sub->event_mask = types; + sub->flags = REDISMODULE_NOTIFY_FLAG_NONE; sub->notify_callback = callback; + sub->notify_callback_with_subkeys = NULL; sub->active = 0; listAddNodeTail(moduleKeyspaceSubscribers, sub); + moduleUpdateKeyspaceSubscribersTypes(); return REDISMODULE_OK; } @@ -9286,19 +9318,101 @@ int RM_UnsubscribeFromKeyspaceEvents(RedisModuleCtx *ctx, int types, RedisModule removed++; } } + if (removed > 0) moduleUpdateKeyspaceSubscribersTypes(); return removed > 0 ? REDISMODULE_OK : REDISMODULE_ERR; } -/* Check any subscriber for event */ -int moduleHasSubscribersForKeyspaceEvent(int type) { +/* Subscribe to keyspace notifications with subkey information. + * + * This is the extended version of RM_SubscribeToKeyspaceEvents. When subkeys + * are available, the `subkeys` array and `count` are passed to the callback. + * `subkeys` contains only the names of affected subkeys (values are not included), + * and `count` is the number of elements. The array may contain duplicates when + * the same subkey appears more than once in a command (e.g. HSET key f1 v1 f1 v2 + * produces subkeys=["f1","f1"], count=2). When no subkeys are present, `subkeys` + * will be NULL and `count` will be 0. Whether events without subkeys are delivered + * depends on the `flags` parameter (see below). + * + * `types` is a bit mask of event types the module is interested in + * (using the same REDISMODULE_NOTIFY_* flags as RM_SubscribeToKeyspaceEvents). + * + * `flags` controls delivery filtering: + * - REDISMODULE_NOTIFY_FLAG_NONE: The callback is invoked for all matching + * events regardless of whether subkeys are present, so a separate + * RM_SubscribeToKeyspaceEvents registration can be omitted. + * - REDISMODULE_NOTIFY_FLAG_SUBKEYS_REQUIRED: The callback is only invoked + * when subkeys are not empty. Events without subkey information (e.g. SET, + * EXPIRE, DEL) are skipped. + * + * The callback signature is: + * void callback(RedisModuleCtx *ctx, int type, const char *event, + * RedisModuleString *key, RedisModuleString **subkeys, int count); + * + * The subkeys array and its contents are only valid during the callback. + * The underlying objects may be stack-allocated or temporary, so + * RM_RetainString must NOT be used on them. To keep a subkey beyond + * the callback (e.g. in a RM_AddPostNotificationJob callback), use + * RM_HoldString (which handles static objects by copying) or + * RM_CreateStringFromString to make a deep copy before returning. + */ +int RM_SubscribeToKeyspaceEventsWithSubkeys(RedisModuleCtx *ctx, int types, int flags, RedisModuleNotificationWithSubkeysFunc callback) { + RedisModuleKeyspaceSubscriber *sub = zmalloc(sizeof(*sub)); + sub->module = ctx->module; + sub->event_mask = types; + sub->flags = flags; + sub->notify_callback = NULL; + sub->notify_callback_with_subkeys = callback; + sub->active = 0; + + listAddNodeTail(moduleKeyspaceSubscribers, sub); + moduleUpdateKeyspaceSubscribersTypes(); + return REDISMODULE_OK; +} + +/* Unregister a module's callback from keyspace notifications with subkeys + * for specific event types. + * + * This function removes a previously registered subscription identified by + * the event mask, delivery flags, and the callback function. + * + * Parameters: + * - ctx: The RedisModuleCtx associated with the calling module. + * - types: The event mask representing the notification types to unsubscribe from. + * - flags: The delivery flags that were used during registration. + * - callback: The callback function pointer that was originally registered. + * + * Returns: + * - REDISMODULE_OK on successful removal of the subscription. + * - REDISMODULE_ERR if no matching subscription was found. */ +int RM_UnsubscribeFromKeyspaceEventsWithSubkeys(RedisModuleCtx *ctx, int types, int flags, RedisModuleNotificationWithSubkeysFunc callback) { + if (!ctx || !callback) return REDISMODULE_ERR; + int removed = 0; listIter li; listNode *ln; listRewind(moduleKeyspaceSubscribers,&li); - while((ln = listNext(&li))) { + while ((ln = listNext(&li))) { RedisModuleKeyspaceSubscriber *sub = ln->value; - if (sub->event_mask & type) return 1; + if (sub->event_mask == types && sub->flags == flags && + sub->notify_callback_with_subkeys == callback && + sub->module == ctx->module) + { + zfree(sub); + listDelNode(moduleKeyspaceSubscribers, ln); + removed++; + } } - return 0; + if (removed > 0) moduleUpdateKeyspaceSubscribersTypes(); + return removed > 0 ? REDISMODULE_OK : REDISMODULE_ERR; +} + +/* Check any subscriber for event. */ +int moduleHasSubscribersForKeyspaceEvent(int type) { + return (moduleKeyspaceSubscribersTypes & type) != 0; +} + +/* Check any subscriber for event with subkeys. */ +int moduleHasSubscribersForKeyspaceEventWithSubkeys(int type) { + return (moduleKeyspaceSubscribersWithSubkeysTypes & type) != 0; } void firePostExecutionUnitJobs(void) { @@ -9372,10 +9486,29 @@ int RM_NotifyKeyspaceEvent(RedisModuleCtx *ctx, int type, const char *event, Red return REDISMODULE_OK; } +/* Like RM_NotifyKeyspaceEvent, but also triggers subkey-level notifications + * when subkeys are provided. Both key-level (keyspace/keyevent) and + * subkey-level (subkeyspace/subkeyevent/subkeyspaceitem/subkeyspaceevent) + * channels are published to, depending on the server configuration. + * + * This is the extended version of RM_NotifyKeyspaceEvent and can actually + * replace it. When called with subkeys=NULL and count=0, it behaves + * identically to RM_NotifyKeyspaceEvent. */ +int RM_NotifyKeyspaceEventWithSubkeys(RedisModuleCtx *ctx, int type, const char *event, + RedisModuleString *key, RedisModuleString **subkeys, int count) { + if (!ctx || !ctx->client) + return REDISMODULE_ERR; + notifyKeyspaceEventWithSubkeys(type, (char *)event, key, ctx->client->db->id, subkeys, count); + return REDISMODULE_OK; +} + /* Dispatcher for keyspace notifications to module subscriber functions. - * This gets called only if at least one module requested to be notified on - * keyspace notifications */ -void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid) { + * This gets called only if at least one module requested to be notified on + * keyspace notifications. For each subscriber, if notify_callback is set it + * is called; otherwise if notify_callback_with_subkeys is set it is called + * for all events (subkeys may be NULL/0 when not applicable). */ +void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid, + robj **subkeys, int count) { /* Don't do anything if there aren't any subscribers */ if (listLength(moduleKeyspaceSubscribers) == 0) return; @@ -9403,7 +9536,9 @@ void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid) listRewind(moduleKeyspaceSubscribers,&li); /* Remove irrelevant flags from the type mask */ - type &= ~(NOTIFY_KEYEVENT | NOTIFY_KEYSPACE); + type &= ~(NOTIFY_KEYEVENT | NOTIFY_KEYSPACE | + NOTIFY_SUBKEYSPACE | NOTIFY_SUBKEYEVENT | + NOTIFY_SUBKEYSPACEITEM | NOTIFY_SUBKEYSPACEEVENT); while((ln = listNext(&li))) { RedisModuleKeyspaceSubscriber *sub = ln->value; @@ -9411,6 +9546,15 @@ void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid) * and avoid subscribers triggering themselves */ if ((sub->event_mask & type) && (sub->active == 0 || (sub->module->options & REDISMODULE_OPTIONS_ALLOW_NESTED_KEYSPACE_NOTIFICATIONS))) { + + /* If SUBKEYS_REQUIRED is set, skip events without subkeys. */ + if (sub->notify_callback_with_subkeys && + (sub->flags & REDISMODULE_NOTIFY_FLAG_SUBKEYS_REQUIRED) && + (subkeys == NULL || count == 0)) + { + continue; + } + RedisModuleCtx ctx; moduleCreateContext(&ctx, sub->module, REDISMODULE_CTX_TEMP_CLIENT); selectDb(ctx.client, dbid); @@ -9422,7 +9566,11 @@ void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid) sub->active = 1; server.allow_access_expired++; server.allow_access_trimmed++; - sub->notify_callback(&ctx, type, event, key); + if (sub->notify_callback) { + sub->notify_callback(&ctx, type, event, key); + } else if (sub->notify_callback_with_subkeys) { + sub->notify_callback_with_subkeys(&ctx, type, event, key, subkeys, count); + } server.allow_access_expired--; server.allow_access_trimmed--; sub->active = prev_active; @@ -9445,6 +9593,7 @@ void moduleUnsubscribeNotifications(RedisModule *module) { zfree(sub); } } + moduleUpdateKeyspaceSubscribersTypes(); } /* -------------------------------------------------------------------------- @@ -15414,9 +15563,12 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(DigestAddLongLong); REGISTER_API(DigestEndSequence); REGISTER_API(NotifyKeyspaceEvent); + REGISTER_API(NotifyKeyspaceEventWithSubkeys); REGISTER_API(GetNotifyKeyspaceEvents); REGISTER_API(SubscribeToKeyspaceEvents); REGISTER_API(UnsubscribeFromKeyspaceEvents); + REGISTER_API(SubscribeToKeyspaceEventsWithSubkeys); + REGISTER_API(UnsubscribeFromKeyspaceEventsWithSubkeys); REGISTER_API(AddPostNotificationJob); REGISTER_API(RegisterClusterMessageReceiver); REGISTER_API(SendClusterMessage); diff --git a/src/notify.c b/src/notify.c index 11ea53241..729865f0e 100644 --- a/src/notify.c +++ b/src/notify.c @@ -41,6 +41,10 @@ int keyspaceEventsStringToFlags(char *classes) { case 'o': flags |= NOTIFY_OVERWRITTEN; break; case 'c': flags |= NOTIFY_TYPE_CHANGED; break; case 'r': flags |= NOTIFY_RATE_LIMIT; break; + case 'S': flags |= NOTIFY_SUBKEYSPACE; break; + case 'T': flags |= NOTIFY_SUBKEYEVENT; break; + case 'I': flags |= NOTIFY_SUBKEYSPACEITEM; break; + case 'V': flags |= NOTIFY_SUBKEYSPACEEVENT; break; default: return -1; } } @@ -76,41 +80,88 @@ sds keyspaceEventsFlagsToString(int flags) { if (flags & NOTIFY_KEYSPACE) res = sdscatlen(res,"K",1); if (flags & NOTIFY_KEYEVENT) res = sdscatlen(res,"E",1); if (flags & NOTIFY_KEY_MISS) res = sdscatlen(res,"m",1); + if (flags & NOTIFY_SUBKEYSPACE) res = sdscatlen(res,"S",1); + if (flags & NOTIFY_SUBKEYEVENT) res = sdscatlen(res,"T",1); + if (flags & NOTIFY_SUBKEYSPACEITEM) res = sdscatlen(res,"I",1); + if (flags & NOTIFY_SUBKEYSPACEEVENT) res = sdscatlen(res,"V",1); return res; } -/* The API provided to the rest of the Redis core is a simple function: +/* Append subkeys in length-prefixed format to 'dst'. + * If 'dst' is NULL, a new sds is created. + * Format: :[,:...] + * Example: 3:abc,2:xx,5:hello */ +static sds catSubkeysPayload(sds dst, robj **subkeys, int count) { + if (dst == NULL) dst = sdsempty(); + char lenbuf[32]; + + for (int i = 0; i < count; i++) { + serverAssert(sdsEncodedObject(subkeys[i])); + if (i > 0) dst = sdscatlen(dst, ",", 1); + size_t subkeylen = sdslen(subkeys[i]->ptr); + int lenlen = ll2string(lenbuf, sizeof(lenbuf), subkeylen); + dst = sdscatlen(dst, lenbuf, lenlen); + dst = sdscatlen(dst, ":", 1); + dst = sdscatsds(dst, subkeys[i]->ptr); + } + return dst; +} + +/* Internal implementation for keyspace event notifications. + * + * The API provided to the rest of the Redis core is: * * notifyKeyspaceEvent(int type, char *event, robj *key, int dbid); + * notifyKeyspaceEventWithSubkeys(int type, char *event, robj *key, int dbid, + * robj **subkeys, int count); * * 'type' is the notification class we define in `server.h`. * 'event' is a C string representing the event name. * 'key' is a Redis object representing the key name. * 'dbid' is the database ID where the key lives. + * 'subkeys' is an array of Redis objects representing the subkey names (can be NULL). + * 'count' is the number of subkeys in the array. + * + * For subkey notifications (4 channel types): + * - __subkeyspace@__: payload: | + * - __subkeyevent@__: payload: :| + * - __subkeyspaceitem@__:\n payload: + * - __subkeyspaceevent@__:| payload: + * + * Where is in length-prefixed format: :[,:...] + * Example: 3:foo,5:hello * * NOTE: This function may invoke module notification callbacks, which may * cause the key's kvobj to be reallocated. */ -void notifyKeyspaceEvent(int type, const char *event, robj *key, int dbid) { +static void notifyKeyspaceEventImpl(int type, const char *event, robj *key, int dbid, + robj **subkeys, int count) +{ sds chan; robj *chanobj, *eventobj; - int len = -1; char buf[24]; + serverAssert(sdsEncodedObject(key)); /* If any modules are interested in events, notify the module system now. * This bypasses the notifications configuration, but the module engine * will only call event subscribers if the event type matches the types - * they are interested in. */ - moduleNotifyKeyspaceEvent(type, event, key, dbid); + * they are interested in. Subkeys are passed through so that subscribers + * with a subkey callback receive them. */ + moduleNotifyKeyspaceEvent(type, event, key, dbid, subkeys, count); /* If notifications for this class of events are off, return ASAP. */ if (!(server.notify_keyspace_events & type)) return; + /* If there are no Pub/Sub subscribers (neither pattern nor channel), + * skip the remaining notification work since nobody would receive it. */ + if (dictSize(server.pubsub_patterns) == 0 && kvstoreSize(server.pubsub_channels) == 0) + return; + eventobj = createStringObject(event,strlen(event)); + int len = ll2string(buf,sizeof(buf),dbid); /* __keyspace@__: notifications. */ if (server.notify_keyspace_events & NOTIFY_KEYSPACE) { chan = sdsnewlen("__keyspace@",11); - len = ll2string(buf,sizeof(buf),dbid); chan = sdscatlen(chan, buf, len); chan = sdscatlen(chan, "__:", 3); chan = sdscatsds(chan, key->ptr); @@ -122,7 +173,6 @@ void notifyKeyspaceEvent(int type, const char *event, robj *key, int dbid) { /* __keyevent@__: notifications. */ if (server.notify_keyspace_events & NOTIFY_KEYEVENT) { chan = sdsnewlen("__keyevent@",11); - if (len == -1) len = ll2string(buf,sizeof(buf),dbid); chan = sdscatlen(chan, buf, len); chan = sdscatlen(chan, "__:", 3); chan = sdscatsds(chan, eventobj->ptr); @@ -130,5 +180,112 @@ void notifyKeyspaceEvent(int type, const char *event, robj *key, int dbid) { pubsubPublishMessage(chanobj, key, 0); decrRefCount(chanobj); } + + /* Subkey-level notifications (only when subkeys are provided). */ + if (subkeys != NULL && count > 0) { + /* __subkeyspace@__: |:[,...] notifications. + * Skip if the event contains '|' to avoid parsing ambiguity since '|' + * is used as a separator between event and subkeys in the payload. */ + if (server.notify_keyspace_events & NOTIFY_SUBKEYSPACE && !strchr(event, '|')) { + chan = sdsnewlen("__subkeyspace@", 14); + chan = sdscatlen(chan, buf, len); + chan = sdscatlen(chan, "__:", 3); + chan = sdscatsds(chan, key->ptr); + chanobj = createObject(OBJ_STRING, chan); + + /* Build payload: | */ + sds payload = sdsdup(eventobj->ptr); + payload = sdscatlen(payload, "|", 1); + payload = catSubkeysPayload(payload, subkeys, count); + robj *payloadobj = createObject(OBJ_STRING, payload); + pubsubPublishMessage(chanobj, payloadobj, 0); + decrRefCount(chanobj); + decrRefCount(payloadobj); + } + + /* __subkeyevent@__: :|:[,...] notifications. */ + if (server.notify_keyspace_events & NOTIFY_SUBKEYEVENT) { + chan = sdsnewlen("__subkeyevent@", 14); + chan = sdscatlen(chan, buf, len); + chan = sdscatlen(chan, "__:", 3); + chan = sdscatsds(chan, eventobj->ptr); + chanobj = createObject(OBJ_STRING, chan); + + /* Build payload: :| */ + size_t keylen = sdslen(key->ptr); + char keylenbuf[32]; + int keylenlen = ll2string(keylenbuf, sizeof(keylenbuf), keylen); + sds payload = sdsnewlen(keylenbuf, keylenlen); + payload = sdscatlen(payload, ":", 1); + payload = sdscatsds(payload, key->ptr); + payload = sdscatlen(payload, "|", 1); + payload = catSubkeysPayload(payload, subkeys, count); + robj *payloadobj = createObject(OBJ_STRING, payload); + pubsubPublishMessage(chanobj, payloadobj, 0); + decrRefCount(chanobj); + decrRefCount(payloadobj); + } + + /* __subkeyspaceitem@__:\n notifications (per subkey). + * Skip if the key contains '\n' to avoid parsing ambiguity in the channel name. */ + if (server.notify_keyspace_events & NOTIFY_SUBKEYSPACEITEM && + memchr(key->ptr, '\n', sdslen(key->ptr)) == NULL) + { + for (int i = 0; i < count; i++) { + serverAssert(sdsEncodedObject(subkeys[i])); + chan = sdsnewlen("__subkeyspaceitem@", 18); + chan = sdscatlen(chan, buf, len); + chan = sdscatlen(chan, "__:", 3); + chan = sdscatsds(chan, key->ptr); + chan = sdscatlen(chan, "\n", 1); + chan = sdscatsds(chan, subkeys[i]->ptr); + chanobj = createObject(OBJ_STRING, chan); + pubsubPublishMessage(chanobj, eventobj, 0); + decrRefCount(chanobj); + } + } + + /* __subkeyspaceevent@__:| notifications. + * Skip if the event contains '|' to avoid parsing ambiguity since '|' + * is used as a separator between event and key in the channel name. */ + if (server.notify_keyspace_events & NOTIFY_SUBKEYSPACEEVENT && !strchr(event, '|')) { + chan = sdsnewlen("__subkeyspaceevent@", 19); + chan = sdscatlen(chan, buf, len); + chan = sdscatlen(chan, "__:", 3); + chan = sdscatsds(chan, eventobj->ptr); + chan = sdscatlen(chan, "|", 1); + chan = sdscatsds(chan, key->ptr); + chanobj = createObject(OBJ_STRING, chan); + robj *payloadobj = createObject(OBJ_STRING, catSubkeysPayload(NULL, subkeys, count)); + pubsubPublishMessage(chanobj, payloadobj, 0); + decrRefCount(chanobj); + decrRefCount(payloadobj); + } + } + decrRefCount(eventobj); } + +/* Public API for key-level notifications (backward compatible). */ +void notifyKeyspaceEvent(int type, const char *event, robj *key, int dbid) { + notifyKeyspaceEventImpl(type, event, key, dbid, NULL, 0); +} + +/* Public API for notifications with subkeys (key-level + subkey-level). */ +void notifyKeyspaceEventWithSubkeys(int type, const char *event, robj *key, int dbid, + robj **subkeys, int count) { + notifyKeyspaceEventImpl(type, event, key, dbid, subkeys, count); +} + +/* Check if subkey information should be collected for the given event type. + * Returns true if any module subscribed to this event with subkeys, or if + * there are Pub/Sub subscribers and any subkey-level notification channel is + * enabled for this event type. */ +int isSubkeyNotifyEnabled(int type) { + if (moduleHasSubscribersForKeyspaceEventWithSubkeys(type)) return 1; + if (dictSize(server.pubsub_patterns) == 0 && kvstoreSize(server.pubsub_channels) == 0) + return 0; + return (server.notify_keyspace_events & type) && + (server.notify_keyspace_events & (NOTIFY_SUBKEYSPACE | NOTIFY_SUBKEYEVENT | + NOTIFY_SUBKEYSPACEITEM | NOTIFY_SUBKEYSPACEEVENT)); +} diff --git a/src/rdb.c b/src/rdb.c index 222314a2c..470de9806 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -4156,7 +4156,7 @@ int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadin objectSetLRUOrLFU(val,lfu_freq,lru_idle,lru_clock,1000); /* call key space notification on key loaded for modules only */ - moduleNotifyKeyspaceEvent(NOTIFY_LOADED, "loaded", &keyobj, db->id); + moduleNotifyKeyspaceEvent(NOTIFY_LOADED, "loaded", &keyobj, db->id, NULL, 0); /* Release key (sds), dictEntry stores a copy of it in embedded data */ sdsfree(key); diff --git a/src/redismodule.h b/src/redismodule.h index c1040f12f..56e3d4f91 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -250,10 +250,20 @@ This flag should not be used directly by the module. #define REDISMODULE_NOTIFY_KEY_TRIMMED (1<<17) /* module only key space notification, indicates a key trimmed during slot migration */ #define REDISMODULE_NOTIFY_RATE_LIMIT (1<<18) /* r, rate limit event */ +#define REDISMODULE_NOTIFY_SUBKEYSPACE (1<<19) /* S */ +#define REDISMODULE_NOTIFY_SUBKEYEVENT (1<<20) /* T */ +#define REDISMODULE_NOTIFY_SUBKEYSPACEITEM (1<<21) /* I */ +#define REDISMODULE_NOTIFY_SUBKEYSPACEEVENT (1<<22) /* V */ + /* Next notification flag, must be updated when adding new flags above! This flag should not be used directly by the module. * Use RedisModule_GetKeyspaceNotificationFlagsAll instead. */ -#define _REDISMODULE_NOTIFY_NEXT (1<<19) +#define _REDISMODULE_NOTIFY_NEXT (1<<23) + +/* Delivery flags for RM_SubscribeToKeyspaceEventsWithSubkeys. + * These are passed in the 'flags' parameter, not in 'types'. */ +#define REDISMODULE_NOTIFY_FLAG_NONE 0 /* Invoke callback for all matching events */ +#define REDISMODULE_NOTIFY_FLAG_SUBKEYS_REQUIRED (1<<0) /* Only invoke callback when subkeys are present */ #define REDISMODULE_NOTIFY_ALL (REDISMODULE_NOTIFY_GENERIC | REDISMODULE_NOTIFY_STRING | REDISMODULE_NOTIFY_LIST | REDISMODULE_NOTIFY_SET | REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_ZSET | REDISMODULE_NOTIFY_EXPIRED | REDISMODULE_NOTIFY_EVICTED | REDISMODULE_NOTIFY_STREAM | REDISMODULE_NOTIFY_MODULE) /* A */ @@ -977,6 +987,7 @@ typedef struct RedisModuleConfigIterator RedisModuleConfigIterator; typedef int (*RedisModuleCmdFunc)(RedisModuleCtx *ctx, RedisModuleString **argv, int argc); typedef void (*RedisModuleDisconnectFunc)(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc); typedef int (*RedisModuleNotificationFunc)(RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key); +typedef void (*RedisModuleNotificationWithSubkeysFunc)(RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key, RedisModuleString **subkeys, int count); typedef void (*RedisModulePostNotificationJobFunc) (RedisModuleCtx *ctx, void *pd); typedef void *(*RedisModuleTypeLoadFunc)(RedisModuleIO *rdb, int encver); typedef void (*RedisModuleTypeSaveFunc)(RedisModuleIO *rdb, void *value); @@ -1362,8 +1373,11 @@ REDISMODULE_API int (*RedisModule_ThreadSafeContextTryLock)(RedisModuleCtx *ctx) REDISMODULE_API void (*RedisModule_ThreadSafeContextUnlock)(RedisModuleCtx *ctx) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_SubscribeToKeyspaceEvents)(RedisModuleCtx *ctx, int types, RedisModuleNotificationFunc cb) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_UnsubscribeFromKeyspaceEvents)(RedisModuleCtx *ctx, int types, RedisModuleNotificationFunc cb) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_SubscribeToKeyspaceEventsWithSubkeys)(RedisModuleCtx *ctx, int types, int flags, RedisModuleNotificationWithSubkeysFunc cb) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_UnsubscribeFromKeyspaceEventsWithSubkeys)(RedisModuleCtx *ctx, int types, int flags, RedisModuleNotificationWithSubkeysFunc cb) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_AddPostNotificationJob)(RedisModuleCtx *ctx, RedisModulePostNotificationJobFunc callback, void *pd, void (*free_pd)(void*)) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_NotifyKeyspaceEvent)(RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_NotifyKeyspaceEventWithSubkeys)(RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key, RedisModuleString **subkeys, int count) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_GetNotifyKeyspaceEvents)(void) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_BlockedClientDisconnected)(RedisModuleCtx *ctx) REDISMODULE_ATTR; REDISMODULE_API void (*RedisModule_RegisterClusterMessageReceiver)(RedisModuleCtx *ctx, uint8_t type, RedisModuleClusterMessageReceiver callback) REDISMODULE_ATTR; @@ -1764,8 +1778,11 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(SetDisconnectCallback); REDISMODULE_GET_API(SubscribeToKeyspaceEvents); REDISMODULE_GET_API(UnsubscribeFromKeyspaceEvents); + REDISMODULE_GET_API(SubscribeToKeyspaceEventsWithSubkeys); + REDISMODULE_GET_API(UnsubscribeFromKeyspaceEventsWithSubkeys); REDISMODULE_GET_API(AddPostNotificationJob); REDISMODULE_GET_API(NotifyKeyspaceEvent); + REDISMODULE_GET_API(NotifyKeyspaceEventWithSubkeys); REDISMODULE_GET_API(GetNotifyKeyspaceEvents); REDISMODULE_GET_API(BlockedClientDisconnected); REDISMODULE_GET_API(RegisterClusterMessageReceiver); diff --git a/src/server.h b/src/server.h index 33556e3a9..d7a6e4215 100644 --- a/src/server.h +++ b/src/server.h @@ -796,7 +796,11 @@ typedef enum { #define NOTIFY_OVERWRITTEN (1<<15) /* o, key overwrite notification (Note: excluded from NOTIFY_ALL) */ #define NOTIFY_TYPE_CHANGED (1<<16) /* c, key type changed notification (Note: excluded from NOTIFY_ALL) */ #define NOTIFY_KEY_TRIMMED (1<<17) /* module only key space notification, indicates a key trimmed during slot migration */ -#define NOTIFY_RATE_LIMIT (1<<18) /* r, notify rate limit event (Note: excluded from NOTIFY_ALL)*/ +#define NOTIFY_RATE_LIMIT (1<<18) /* r, notify rate limit event (Note: excluded from NOTIFY_ALL)*/ +#define NOTIFY_SUBKEYSPACE (1<<19) /* S, subkey-level keyspace notification */ +#define NOTIFY_SUBKEYEVENT (1<<20) /* T, subkey-level keyevent notification */ +#define NOTIFY_SUBKEYSPACEITEM (1<<21) /* I, subkey-level notification per item: channel=key\nsubkey */ +#define NOTIFY_SUBKEYSPACEEVENT (1<<22) /* V, subkey-level notification: channel=event|key */ #define NOTIFY_ALL (NOTIFY_GENERIC | NOTIFY_STRING | NOTIFY_LIST | NOTIFY_SET | NOTIFY_HASH | NOTIFY_ZSET | NOTIFY_EXPIRED | NOTIFY_EVICTED | NOTIFY_STREAM | NOTIFY_MODULE) /* A flag */ /* Using the following macro you can run code inside serverCron() with the @@ -3078,7 +3082,7 @@ size_t moduleCount(void); void moduleAcquireGIL(void); int moduleTryAcquireGIL(void); void moduleReleaseGIL(void); -void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid); +void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid, robj **subkeys, int count); void firePostExecutionUnitJobs(void); void moduleCallCommandFilters(client *c); void modulePostExecutionUnitOperations(void); @@ -3106,6 +3110,7 @@ void moduleDefragEnd(void); void *moduleGetHandleByName(char *modulename); int moduleIsModuleCommand(void *module_handle, struct redisCommand *cmd); int moduleHasSubscribersForKeyspaceEvent(int type); +int moduleHasSubscribersForKeyspaceEventWithSubkeys(int type); /* pcmd */ void initPendingCommand(pendingCommand *pcmd); @@ -3841,8 +3846,10 @@ dict *getClientPubSubShardChannels(client *c); /* Keyspace events notification */ void notifyKeyspaceEvent(int type, const char *event, robj *key, int dbid); +void notifyKeyspaceEventWithSubkeys(int type, const char *event, robj *key, int dbid, robj **subkeys, int count); int keyspaceEventsStringToFlags(char *classes); sds keyspaceEventsFlagsToString(int flags); +int isSubkeyNotifyEnabled(int type); /* As part of KSN the module should not attempt to modify the key. Nevertheless, * RediSearch does it in some specific flows and modifies key metadata which in diff --git a/src/t_hash.c b/src/t_hash.c index 5ea456597..ac2219898 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -15,6 +15,7 @@ #include "ebuckets.h" #include "entry.h" #include "cluster_asm.h" +#include "vector.h" #include /* Threshold for HEXPIRE and HPERSIST to be considered whether it is worth to @@ -45,6 +46,18 @@ typedef enum GetFieldRes { typedef listpackEntry CommonEntry; /* extend usage beyond lp */ +#define FIELDS_STACK_SIZE 16 + +/* A vec with an embedded stack buffer, used to collect field robj pointers + * for subkey notifications without heap allocation in the common case. */ +typedef struct fieldvec { vec v; void *buf[FIELDS_STACK_SIZE]; } fieldvec; + +static inline vec *fieldvecInit(fieldvec *fv, size_t cap) { + vecInit(&fv->v, fv->buf, FIELDS_STACK_SIZE); + vecReserve(&fv->v, cap); + return &fv->v; +} + /* hash field expiration (HFE) funcs */ static ExpireAction onFieldExpire(eItem item, void *ctx); static ExpireMeta* hentryGetExpireMeta(const eItem field); @@ -126,6 +139,7 @@ typedef struct OnFieldExpireCtx { robj *hashObj; redisDb *db; int activeEx; /* 1 for active expire, 0 for lazy expire */ + vec *vexpired; /* Expired fields vector */ } OnFieldExpireCtx; /* The implementation of hashes by dict was modified from storing fields as sds @@ -360,7 +374,8 @@ static uint64_t listpackExGetMinExpire(robj *o) { } /* Walk over fields and delete the expired ones. */ -void listpackExExpire(redisDb *db, kvobj *kv, ExpireInfo *info, int activeEx) { +void listpackExExpire(redisDb *db, kvobj *kv, ExpireInfo *info) { + OnFieldExpireCtx *ctx = info->ctx; serverAssert(kv->encoding == OBJ_ENCODING_LISTPACK_EX); uint64_t expired = 0, min = EB_EXPIRE_TIME_INVALID; unsigned char *ptr; @@ -387,9 +402,15 @@ void listpackExExpire(redisDb *db, kvobj *kv, ExpireInfo *info, int activeEx) { if (val == HASH_LP_NO_TTL || (uint64_t) val > info->now) break; + /* Collect expired field for subkey notification. */ + if (ctx->vexpired) { + char *fstr = (char *)(fref ? fref : intbuf); + vecPush(ctx->vexpired, createStringObject(fstr, flen)); + } + propagateHashFieldDeletion(db, key, (char *)((fref) ? fref : intbuf), flen); server.stat_expired_subkeys++; - if (activeEx) server.stat_expired_subkeys_active++; + if (ctx->activeEx) server.stat_expired_subkeys_active++; ptr = lpNext(lpt->lp, ptr); @@ -780,9 +801,13 @@ GetFieldRes hashTypeGetValue(redisDb *db, kvobj *o, sds field, unsigned char **v /* If the field is the last one in the hash, then the hash will be deleted */ res = GETF_EXPIRED; robj *keyObj = createStringObject(key, sdslen(key)); - if (!(hfeFlags & HFE_LAZY_NO_NOTIFICATION)) - notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", keyObj, db->id); - if ((hashTypeLength(o, 0) == 0) && (!(hfeFlags & HFE_LAZY_AVOID_HASH_DEL))) { + unsigned long length = hashTypeLength(o, 0); + if ((length != 0) && !(hfeFlags & HFE_LAZY_NO_NOTIFICATION)) { + robj fobj, *farr[1] = {&fobj}; + initStaticStringObject(fobj, field); + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hexpired", keyObj, db->id, farr, 1); + } + if ((length == 0) && (!(hfeFlags & HFE_LAZY_AVOID_HASH_DEL))) { if (!(hfeFlags & HFE_LAZY_NO_NOTIFICATION)) notifyKeyspaceEvent(NOTIFY_GENERIC, "del", keyObj, db->id); dbDelete(db,keyObj); @@ -1876,30 +1901,29 @@ void hashTypeRandomElement(robj *hashobj, unsigned long hashsize, CommonEntry *k */ uint64_t hashTypeExpire(redisDb *db, kvobj *o, uint32_t *quota, int updateSubexpires, int activeEx) { uint64_t noExpireLeftRes = EB_EXPIRE_TIME_INVALID; - ExpireInfo info = {0}; - if (o->encoding == OBJ_ENCODING_LISTPACK_EX) { - info = (ExpireInfo) { + /* Collect expired field names for batched subkey notification. + * Skip allocation entirely when subkey notifications are disabled. */ + fieldvec fvexpired; + vec *vexpired = isSubkeyNotifyEnabled(NOTIFY_HASH) ? + fieldvecInit(&fvexpired, FIELDS_STACK_SIZE) : NULL; + + OnFieldExpireCtx onFieldExpireCtx = { .hashObj = o, .db = db, .activeEx = activeEx, .vexpired = vexpired }; + ExpireInfo info = (ExpireInfo) { .maxToExpire = *quota, .now = commandTimeSnapshot(), + .ctx = &onFieldExpireCtx, .itemsExpired = 0}; - listpackExExpire(db, o, &info, activeEx); + if (o->encoding == OBJ_ENCODING_LISTPACK_EX) { + listpackExExpire(db, o, &info); } else { serverAssert(o->encoding == OBJ_ENCODING_HT); dict *d = o->ptr; htMetadataEx *dictExpireMeta = htGetMetadataEx(d); - OnFieldExpireCtx onFieldExpireCtx = { .hashObj = o, .db = db, .activeEx = activeEx }; - - info = (ExpireInfo){ - .maxToExpire = *quota, - .onExpireItem = onFieldExpire, - .ctx = &onFieldExpireCtx, - .now = commandTimeSnapshot() - }; - + info.onExpireItem = onFieldExpire; ebExpire(&dictExpireMeta->hfe, &hashFieldExpireBucketsType, &info); } @@ -1912,7 +1936,11 @@ uint64_t hashTypeExpire(redisDb *db, kvobj *o, uint32_t *quota, int updateSubexp if (info.itemsExpired) { sds keystr = kvobjGetKey(o); robj *key = createStringObject(keystr, sdslen(keystr)); - notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", key, db->id); + + /* Send subkey notification with all expired fields */ + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hexpired", key, db->id, + vexpired ? (robj**)vecData(vexpired) : NULL, vexpired ? vecSize(vexpired) : 0); + int slot; int deleted = 0; @@ -1935,6 +1963,14 @@ uint64_t hashTypeExpire(redisDb *db, kvobj *o, uint32_t *quota, int updateSubexp decrRefCount(key); } + /* Free collected expired fields */ + if (vexpired) { + for (size_t i = 0; i < vecSize(vexpired); i++) { + decrRefCount(vecGet(vexpired, i)); + } + vecRelease(vexpired); + } + /* return 0 if hash got deleted, EB_EXPIRE_TIME_INVALID if no more fields * with expiration. Else return next expiration time */ return (info.nextExpireTime == EB_EXPIRE_TIME_INVALID) ? noExpireLeftRes : info.nextExpireTime; @@ -2103,7 +2139,7 @@ void hsetnxCommand(client *c) { updateKeysizesHist(c->db, OBJ_HASH, hlen - 1, hlen); if (server.memory_tracking_enabled) updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), kv, oldsize, kvobjAllocSize(kv)); - notifyKeyspaceEvent(NOTIFY_HASH, "hset", c->argv[1], c->db->id); + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH,"hset",c->argv[1],c->db->id,&c->argv[2],1); KSN_INVALIDATE_KVOBJ(kv); server.dirty++; } @@ -2141,7 +2177,16 @@ void hsetCommand(client *c) { updateKeysizesHist(c->db, OBJ_HASH, l - created, l); if (server.memory_tracking_enabled) updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), kv, oldsize, kvobjAllocSize(kv)); - notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id); + + /* Collect field pointers for subkey notification. Fields are at argv[2,4,6...]. */ + int numfields = (c->argc - 2) / 2; + fieldvec fvset; + vec *vset = fieldvecInit(&fvset, numfields); + for (i = 0; i < numfields; i++) { + vecPush(vset, c->argv[2 + i * 2]); + } + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH,"hset",c->argv[1],c->db->id,(robj**)vecData(vset),numfields); + vecRelease(vset); KSN_INVALIDATE_KVOBJ(kv); server.dirty += (c->argc - 2)/2; } @@ -2355,8 +2400,7 @@ err_expiration: */ void hsetexCommand(client *c) { int flags = 0, first_field_pos = 0, field_count = 0, expire_time_pos = -1; - int updated = 0, deleted = 0, set_expiry; - int expired = 0, fields_set = 0; + int set_expiry; long long expire_time = EB_EXPIRE_TIME_INVALID; int64_t oldlen, newlen; HashTypeSetEx setex; @@ -2383,6 +2427,13 @@ void hsetexCommand(client *c) { if (server.memory_tracking_enabled) oldsize = kvobjAllocSize(o); + /* Track fields for subkey notifications by event type. */ + fieldvec fvexpired, fvset, fvdeleted, fvupdated; + vec *vexpired = fieldvecInit(&fvexpired, field_count); + vec *vset = fieldvecInit(&fvset, field_count); + vec *vdeleted = fieldvecInit(&fvdeleted, field_count); + vec *vupdated = fieldvecInit(&fvupdated, field_count); + if (flags & (HFE_FXX | HFE_FNX)) { int found = 0; for (int i = 0; i < field_count; i++) { @@ -2398,7 +2449,9 @@ void hsetexCommand(client *c) { GetFieldRes res = hashTypeGetValue(c->db, o, field, &vstr, &vlen, &vll, opt, NULL); int exists = (res == GETF_OK); - expired += (res == GETF_EXPIRED); + if (res == GETF_EXPIRED) { + vecPush(vexpired, c->argv[first_field_pos + (i * 2)]); + } found += exists; /* Check for early exit if the condition is already invalid. */ @@ -2435,12 +2488,15 @@ void hsetexCommand(client *c) { opt |= HASH_SET_KEEP_TTL; hashTypeSet(c->db, o, field, value, opt); - fields_set = 1; + vecPush(vset, c->argv[first_field_pos + (i * 2)]); /* Update the expiration time. */ if (set_expiry) { int ret = hashTypeSetEx(o, field, expire_time, &setex); - updated += (ret == HSETEX_OK); - deleted += (ret == HSETEX_DELETED); + if (ret == HSETEX_OK) { + vecPush(vupdated, c->argv[first_field_pos + (i * 2)]); + } else if (ret == HSETEX_DELETED) { + vecPush(vdeleted, c->argv[first_field_pos + (i * 2)]); + } } } @@ -2449,7 +2505,7 @@ void hsetexCommand(client *c) { server.dirty += field_count; - if (deleted) { + if (vecSize(vdeleted)) { /* If fields are deleted due to timestamp is being in the past, hdel's * are already propagated. No need to propagate the command itself. */ preventCommandPropagation(c); @@ -2470,15 +2526,23 @@ out: if (server.memory_tracking_enabled) updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, oldsize, kvobjAllocSize(o)); /* Emit keyspace notifications based on field expiry, mutation, or key deletion */ - if (fields_set || expired) { + if (vecSize(vset) || vecSize(vexpired)) { newlen = (int64_t) hashTypeLength(o, 0); keyModified(c, c->db, c->argv[1], o, 1); - if (expired) - notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", c->argv[1], c->db->id); - if (fields_set) { - notifyKeyspaceEvent(NOTIFY_HASH, "hset", c->argv[1], c->db->id); - if (deleted || updated) - notifyKeyspaceEvent(NOTIFY_HASH, deleted ? "hdel" : "hexpire", c->argv[1], c->db->id); + if (vecSize(vexpired)) { + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hexpired", c->argv[1], + c->db->id, (robj**)vecData(vexpired), vecSize(vexpired)); + } + if (vecSize(vset)) { + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hset", c->argv[1], + c->db->id, (robj**)vecData(vset), vecSize(vset)); + if (vecSize(vdeleted)) { + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hdel", c->argv[1], + c->db->id, (robj**)vecData(vdeleted), vecSize(vdeleted)); + } else if (vecSize(vupdated)) { + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hexpire", c->argv[1], + c->db->id, (robj**)vecData(vupdated), vecSize(vupdated)); + } } KSN_INVALIDATE_KVOBJ(o); @@ -2494,6 +2558,11 @@ out: if (oldlen != newlen) updateKeysizesHist(c->db, OBJ_HASH, oldlen, newlen); } + + vecRelease(vexpired); + vecRelease(vset); + vecRelease(vdeleted); + vecRelease(vupdated); } void hincrbyCommand(client *c) { @@ -2543,7 +2612,7 @@ void hincrbyCommand(client *c) { updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, oldsize, kvobjAllocSize(o)); addReplyLongLong(c,value); keyModified(c,c->db,c->argv[1], o, 1); - notifyKeyspaceEvent(NOTIFY_HASH,"hincrby",c->argv[1],c->db->id); + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH,"hincrby",c->argv[1],c->db->id,&c->argv[2],1); KSN_INVALIDATE_KVOBJ(o); server.dirty++; } @@ -2602,7 +2671,7 @@ void hincrbyfloatCommand(client *c) { updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, oldsize, kvobjAllocSize(o)); addReplyBulkCBuffer(c,buf,len); keyModified(c,c->db,c->argv[1],o,1); - notifyKeyspaceEvent(NOTIFY_HASH,"hincrbyfloat",c->argv[1],c->db->id); + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH,"hincrbyfloat",c->argv[1],c->db->id,&c->argv[2],1); KSN_INVALIDATE_KVOBJ(o); server.dirty++; @@ -2651,19 +2720,24 @@ void hgetCommand(client *c) { void hmgetCommand(client *c) { GetFieldRes res = GETF_OK; - int i; - int expired = 0, deleted = 0; + int i, deleted = 0; /* Don't abort when the key cannot be found. Non-existing keys are empty * hashes, where HMGET should respond with a series of null bulks. */ kvobj *o = lookupKeyRead(c->db, c->argv[1]); if (checkType(c,o,OBJ_HASH)) return; + /* Track expired fields for subkey notification. */ + fieldvec fvexpired; + vec *vexpired = fieldvecInit(&fvexpired, c->argc-2); + addReplyArrayLen(c, c->argc-2); for (i = 2; i < c->argc ; i++) { if (!deleted) { res = addHashFieldToReply(c, o, c->argv[i]->ptr, HFE_LAZY_NO_NOTIFICATION); - expired += (res == GETF_EXPIRED); + if (res == GETF_EXPIRED) { + vecPush(vexpired, c->argv[i]); + } deleted += (res == GETF_EXPIRED_HASH); } else { /* If hash got lazy expired since all fields are expired (o is invalid), @@ -2672,11 +2746,14 @@ void hmgetCommand(client *c) { } } - if (expired) { - notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", c->argv[1], c->db->id); - if (deleted) - notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); + if (vecSize(vexpired)) { + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hexpired", c->argv[1], + c->db->id, (robj**)vecData(vexpired), vecSize(vexpired)); } + if (deleted) + notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); + + vecRelease(vexpired); } /* Get and delete the value of one or more fields of a given hash key. @@ -2685,7 +2762,7 @@ void hmgetCommand(client *c) { * doesn’t exist. */ void hgetdelCommand(client *c) { - int res = 0, hfe = 0, deleted = 0, expired = 0; + int res = 0, hfe = 0; int64_t oldlen = -1; /* not exists as long as it is not set */ long num_fields = 0; size_t oldsize = 0; @@ -2723,6 +2800,11 @@ void hgetdelCommand(client *c) { oldsize = kvobjAllocSize(o); } + /* Track fields for subkey notifications. */ + fieldvec fvexpired, fvdeleted; + vec *vexpired = fieldvecInit(&fvexpired, num_fields); + vec *vdeleted = fieldvecInit(&fvdeleted, num_fields); + addReplyArrayLen(c, num_fields); for (int i = 4; i < c->argc; i++) { const int flags = HFE_LAZY_NO_NOTIFICATION | @@ -2731,17 +2813,22 @@ void hgetdelCommand(client *c) { HFE_LAZY_NO_UPDATE_KEYSIZES | HFE_LAZY_NO_UPDATE_ALLOCSIZES; res = addHashFieldToReply(c, o, c->argv[i]->ptr, flags); - expired += (res == GETF_EXPIRED); + if (res == GETF_EXPIRED) { + vecPush(vexpired, c->argv[i]); + } /* Try to delete only if it's found and not expired lazily. */ if (res == GETF_OK) { - deleted++; + vecPush(vdeleted, c->argv[i]); serverAssert(hashTypeDelete(o, c->argv[i]->ptr) == 1); } } /* Return if no modification has been made. */ - if (expired == 0 && deleted == 0) + if (vecSize(vexpired) == 0 && vecSize(vdeleted) == 0) { + vecRelease(vexpired); + vecRelease(vdeleted); return; + } int64_t newlen = (int64_t) hashTypeLength(o, 0); /* del key if become empty */ @@ -2759,11 +2846,14 @@ void hgetdelCommand(client *c) { keyModified(c, c->db, c->argv[1], o, 1); - if (expired) - notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", c->argv[1], c->db->id); - if (deleted) { - notifyKeyspaceEvent(NOTIFY_HASH, "hdel", c->argv[1], c->db->id); - server.dirty += deleted; + if (vecSize(vexpired)) { + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hexpired", c->argv[1], + c->db->id, (robj**)vecData(vexpired), vecSize(vexpired)); + } + if (vecSize(vdeleted)) { + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hdel", c->argv[1], + c->db->id, (robj**)vecData(vdeleted), vecSize(vdeleted)); + server.dirty += vecSize(vdeleted); /* Propagate as HDEL command. * Orig: HGETDEL FIELDS field1 field2 ... @@ -2773,6 +2863,8 @@ void hgetdelCommand(client *c) { rewriteClientCommandArgument(c, 2, NULL); /* Delete arg */ } + vecRelease(vexpired); + vecRelease(vdeleted); KSN_INVALIDATE_KVOBJ(o); /* Key may have become empty because of deleting fields or lazy expire. */ @@ -2794,7 +2886,6 @@ void hgetdelCommand(client *c) { * doesn’t exist. */ void hgetexCommand(client *c) { - int expired = 0, deleted = 0, updated = 0; int parse_flags = 0, expire_time_pos = -1, first_field_pos = -1, num_fields = -1; long long expire_time = 0; int64_t oldlen = 0, newlen = -1; @@ -2824,6 +2915,12 @@ void hgetexCommand(client *c) { if (parse_flags) hashTypeSetExInit(c->argv[1], o, c, c->db, 0, &setex); + /* Track fields for subkey notifications by event type. */ + fieldvec fvexpired, fvdeleted, fvupdated; + vec *vexpired = fieldvecInit(&fvexpired, num_fields); + vec *vdeleted = fieldvecInit(&fvdeleted, num_fields); + vec *vupdated = fieldvecInit(&fvupdated, num_fields); + addReplyArrayLen(c, num_fields); for (int i = first_field_pos; i < first_field_pos + num_fields; i++) { const int flags = HFE_LAZY_NO_NOTIFICATION | @@ -2833,7 +2930,9 @@ void hgetexCommand(client *c) { HFE_LAZY_NO_UPDATE_ALLOCSIZES; sds field = c->argv[i]->ptr; int res = addHashFieldToReply(c, o, c->argv[i]->ptr, flags); - expired += (res == GETF_EXPIRED); + if (res == GETF_EXPIRED) { + vecPush(vexpired, c->argv[i]); + } /* Set expiration only if the field exists and not expired lazily. */ if (res == GETF_OK && parse_flags) { @@ -2841,8 +2940,11 @@ void hgetexCommand(client *c) { expire_time = EB_EXPIRE_TIME_INVALID; res = hashTypeSetEx(o, field, expire_time, &setex); - deleted += (res == HSETEX_DELETED); - updated += (res == HSETEX_OK); + if (res == HSETEX_DELETED) { + vecPush(vdeleted, c->argv[i]); + } else if (res == HSETEX_OK) { + vecPush(vupdated, c->argv[i]); + } } } @@ -2853,10 +2955,14 @@ void hgetexCommand(client *c) { updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, oldsize, kvobjAllocSize(o)); /* Exit early if no modification has been made. */ - if (expired == 0 && deleted == 0 && updated == 0) + if (vecSize(vexpired) == 0 && vecSize(vdeleted) == 0 && vecSize(vupdated) == 0) { + vecRelease(vexpired); + vecRelease(vdeleted); + vecRelease(vupdated); return; + } - server.dirty += deleted + updated; + server.dirty += vecSize(vdeleted) + vecSize(vupdated); keyModified(c, c->db, c->argv[1], o, 1); /* This command will never be propagated as it is. It will be propagated as @@ -2867,16 +2973,19 @@ void hgetexCommand(client *c) { * If PERSIST flags is used, it will be propagated as HPERSIST command. * IF EX/EXAT/PX/PXAT flags are used, it will be replicated as HPEXPRITEAT. */ - if (expired) - notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", c->argv[1], c->db->id); - if (updated) { + if (vecSize(vexpired)) { + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hexpired", c->argv[1], + c->db->id, (robj**)vecData(vexpired), vecSize(vexpired)); + } + if (vecSize(vupdated)) { /* Build canonical command for propagation */ int canonical_argc; robj **canonical_argv; int idx = 0; if (parse_flags & HFE_PERSIST) { - notifyKeyspaceEvent(NOTIFY_HASH, "hpersist", c->argv[1], c->db->id); + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hpersist", c->argv[1], + c->db->id, (robj**)vecData(vupdated), vecSize(vupdated)); /* Build canonical HPERSIST command: HPERSIST key FIELDS numfields field1 field2 ... */ canonical_argc = 4 + num_fields; canonical_argv = zmalloc(sizeof(robj*) * canonical_argc); @@ -2885,7 +2994,8 @@ void hgetexCommand(client *c) { canonical_argv[idx++] = c->argv[1]; /* key */ incrRefCount(c->argv[1]); } else { - notifyKeyspaceEvent(NOTIFY_HASH, "hexpire", c->argv[1], c->db->id); + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hexpire", c->argv[1], + c->db->id, (robj**)vecData(vupdated), vecSize(vupdated)); /* Build canonical HPEXPIREAT command: HPEXPIREAT key timestamp FIELDS numfields field1 field2 ... */ canonical_argc = 5 + num_fields; canonical_argv = zmalloc(sizeof(robj*) * canonical_argc); @@ -2905,13 +3015,18 @@ void hgetexCommand(client *c) { } replaceClientCommandVector(c, canonical_argc, canonical_argv); - } else if (deleted) { + } else if (vecSize(vdeleted)) { /* If we are here, fields are deleted because new timestamp was in the * past. HDELs are already propagated as part of hashTypeSetEx(). */ - notifyKeyspaceEvent(NOTIFY_HASH, "hdel", c->argv[1], c->db->id); + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hdel", c->argv[1], + c->db->id, (robj**)vecData(vdeleted), vecSize(vdeleted)); preventCommandPropagation(c); } + vecRelease(vexpired); + vecRelease(vdeleted); + vecRelease(vupdated); + /* Key may become empty due to lazy expiry in addHashFieldToReply() * or the new expiration time is in the past.*/ newlen = hashTypeLength(o, 0); @@ -2925,7 +3040,7 @@ void hgetexCommand(client *c) { void hdelCommand(client *c) { kvobj *o; - int j, deleted = 0, keyremoved = 0; + int j, keyremoved = 0; size_t oldsize = 0; if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL || @@ -2943,11 +3058,15 @@ void hdelCommand(client *c) { * field with expiration and removes it from global HFE DS. */ int isHFE = hashTypeIsFieldsWithExpire(o); + /* Track which fields were actually deleted for subkey notification. */ + fieldvec fvdeleted; + vec *vdeleted = fieldvecInit(&fvdeleted, c->argc - 2); + if (o->encoding == OBJ_ENCODING_HT) dictPauseAutoResize((dict*)o->ptr); for (j = 2; j < c->argc; j++) { if (hashTypeDelete(o,c->argv[j]->ptr)) { - deleted++; + vecPush(vdeleted, c->argv[j]); if (hashTypeLength(o, 0) == 0) { keyremoved = 1; break; @@ -2961,7 +3080,7 @@ void hdelCommand(client *c) { } if (server.memory_tracking_enabled) updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, oldsize, kvobjAllocSize(o)); - if (deleted) { + if (vecSize(vdeleted)) { /* Update keysizes histogram */ int64_t newLen = (int64_t) hashTypeLength(o, 0); updateKeysizesHist(c->db, OBJ_HASH, oldLen, keyremoved ? -1 : newLen); @@ -2977,15 +3096,16 @@ void hdelCommand(client *c) { /* Signal key modification */ keyModified(c, c->db, c->argv[1], keyremoved ? NULL : o, 1); - notifyKeyspaceEvent(NOTIFY_HASH,"hdel",c->argv[1],c->db->id); + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH,"hdel",c->argv[1],c->db->id,(robj**)vecData(vdeleted),vecSize(vdeleted)); KSN_INVALIDATE_KVOBJ(o); /* Invalidate local kvobj pointer */ /* Notify del event if key was deleted */ if (keyremoved) notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); - server.dirty += deleted; + server.dirty += vecSize(vdeleted); } - addReplyLongLong(c,deleted); + addReplyLongLong(c,vecSize(vdeleted)); + vecRelease(vdeleted); } void hlenCommand(client *c) { @@ -3523,6 +3643,11 @@ static ExpireAction onFieldExpire(eItem item, void *ctx) { if (server.memory_tracking_enabled) oldsize = kvobjAllocSize(kv); sds field = entryGetField(e); + + /* Collect expired field for subkey notification (before deletion) */ + if (expCtx->vexpired) + vecPush(expCtx->vexpired, createStringObject(field, sdslen(field))); + propagateHashFieldDeletion(expCtx->db, key, field, sdslen(field)); /* update keysizes */ @@ -3816,7 +3941,7 @@ static void httlGenericCommand(client *c, const char *cmd, long long basetime, i */ static void hexpireGenericCommand(client *c, long long basetime, int unit) { HashCommandArgs args; - int fieldsNotSet = 0, updated = 0, deleted = 0; + int fieldsNotSet = 0; int64_t oldlen, newlen; robj *keyArg = c->argv[1]; size_t oldsize = 0; @@ -3852,12 +3977,20 @@ static void hexpireGenericCommand(client *c, long long basetime, int unit) { int *fieldsToRemove = NULL; int removeCount = 0; + /* Track fields for subkey notifications. */ + fieldvec fvupdated, fvdeleted; + vec *vupdated = fieldvecInit(&fvupdated, args.fieldCount); + vec *vdeleted = fieldvecInit(&fvdeleted, args.fieldCount); + for (int i = 0; i < args.fieldCount; i++) { int fieldPos = args.firstFieldPos + i; sds field = c->argv[fieldPos]->ptr; SetExRes res = hashTypeSetEx(hashObj, field, args.expireTime, &exCtx); - updated += (res == HSETEX_OK); - deleted += (res == HSETEX_DELETED); + if (res == HSETEX_OK) { + vecPush(vupdated, c->argv[fieldPos]); + } else if (res == HSETEX_DELETED) { + vecPush(vdeleted, c->argv[fieldPos]); + } if (unlikely(res != HSETEX_OK)) { if (fieldsToRemove == NULL) { @@ -3875,11 +4008,13 @@ static void hexpireGenericCommand(client *c, long long basetime, int unit) { if (server.memory_tracking_enabled) updateSlotAllocSize(c->db, getKeySlot(keyArg->ptr), hashObj, oldsize, kvobjAllocSize(hashObj)); - if (deleted + updated > 0) { - server.dirty += deleted + updated; + if (vecSize(vdeleted) + vecSize(vupdated) > 0) { + server.dirty += vecSize(vdeleted) + vecSize(vupdated); keyModified(c, c->db, keyArg, hashObj, 1); - notifyKeyspaceEvent(NOTIFY_HASH, deleted ? "hdel" : "hexpire", - keyArg, c->db->id); + if (vecSize(vdeleted)) notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hdel", + keyArg, c->db->id, (robj**)vecData(vdeleted), vecSize(vdeleted)); + if (vecSize(vupdated)) notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hexpire", + keyArg, c->db->id, (robj**)vecData(vupdated), vecSize(vupdated)); } newlen = (int64_t) hashTypeLength(hashObj, 0); @@ -3896,7 +4031,9 @@ static void hexpireGenericCommand(client *c, long long basetime, int unit) { /* Avoid propagating command if not even one field was updated (Either because * the time is in the past, and corresponding HDELs were sent, or conditions * not met) then it is useless and invalid to propagate command with no fields */ - if (updated == 0) { + if (vecSize(vupdated) == 0) { + vecRelease(vupdated); + vecRelease(vdeleted); preventCommandPropagation(c); zfree(fieldsToRemove); return; @@ -3917,13 +4054,16 @@ static void hexpireGenericCommand(client *c, long long basetime, int unit) { for (int i = removeCount - 1; i >= 0; i--) { rewriteClientCommandArgument(c, fieldsToRemove[i], NULL); } - robj *newFieldCount = createStringObjectFromLongLong(updated); + robj *newFieldCount = createStringObjectFromLongLong(vecSize(vupdated)); rewriteClientCommandArgument(c, args.fieldsPos + 1, newFieldCount); decrRefCount(newFieldCount); } if (fieldsToRemove) zfree(fieldsToRemove); + + vecRelease(vupdated); + vecRelease(vdeleted); } /* HPEXPIRE key milliseconds [ NX | XX | GT | LT] FIELDS numfields */ @@ -3970,7 +4110,6 @@ void hpexpiretimeCommand(client *c) { /* HPERSIST key FIELDS numfields */ void hpersistCommand(client *c) { long numFields = 0, numFieldsAt = 3; - int changed = 0; /* Used to determine whether to send a notification. */ /* Read the hash object */ kvobj *hashObj = lookupKeyWrite(c->db, c->argv[1]); @@ -4003,6 +4142,10 @@ void hpersistCommand(client *c) { return; } + /* Track which fields were successfully persisted for subkey notification. */ + fieldvec fvpersisted; + vec *vpersisted = fieldvecInit(&fvpersisted, numFields); + if (hashObj->encoding == OBJ_ENCODING_LISTPACK) { addReplyArrayLen(c, numFields); for (int i = 0 ; i < numFields ; i++) { @@ -4018,6 +4161,7 @@ void hpersistCommand(client *c) { else addReplyLongLong(c, HFE_PERSIST_NO_TTL); } + vecRelease(vpersisted); return; } else if (hashObj->encoding == OBJ_ENCODING_LISTPACK_EX) { long long prevExpire; @@ -4059,7 +4203,7 @@ void hpersistCommand(client *c) { if (server.memory_tracking_enabled) updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), hashObj, oldsize, kvobjAllocSize(hashObj)); addReplyLongLong(c, HFE_PERSIST_OK); - changed = 1; + vecPush(vpersisted, c->argv[numFieldsAt + 1 + i]); } } else if (hashObj->encoding == OBJ_ENCODING_HT) { dict *d = hashObj->ptr; @@ -4091,7 +4235,7 @@ void hpersistCommand(client *c) { hfieldPersist(hashObj, entry); addReplyLongLong(c, HFE_PERSIST_OK); - changed = 1; + vecPush(vpersisted, c->argv[numFieldsAt + 1 + i]); } if (server.memory_tracking_enabled) updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), hashObj, oldsize, kvobjAllocSize(hashObj)); @@ -4101,9 +4245,11 @@ void hpersistCommand(client *c) { /* Generates a hpersist event if the expiry time associated with any field * has been successfully deleted. */ - if (changed) { - notifyKeyspaceEvent(NOTIFY_HASH, "hpersist", c->argv[1], c->db->id); + if (vecSize(vpersisted)) { + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hpersist", c->argv[1], + c->db->id, (robj**)vecData(vpersisted), vecSize(vpersisted)); keyModified(c, c->db, c->argv[1], hashObj, 1); server.dirty++; } + vecRelease(vpersisted); } diff --git a/src/vector.c b/src/vector.c index e5809dabb..11859dbfe 100644 --- a/src/vector.c +++ b/src/vector.c @@ -54,22 +54,12 @@ void vecClear(vec *v) { v->size = 0; } -/* Return the number of elements in the vector. */ -size_t vecSize(const vec *v) { - return v->size; -} - /* Get element at index. index must be < vecSize(v). */ void *vecGet(const vec *v, size_t index) { assert(index < v->size); return v->data[index]; } -/* Return the contiguous backing array. */ -void **vecData(vec *v) { - return v->data; -} - /* Ensure capacity is at least mincap. */ void vecReserve(vec *v, size_t mincap) { void **newdata; @@ -90,7 +80,7 @@ void vecReserve(vec *v, size_t mincap) { /* Append one element, growing storage as needed. */ void vecPush(vec *v, void *value) { - if (v->size == v->cap) { + if (unlikely(v->size == v->cap)) { size_t newcap = (v->cap > 0) ? v->cap * 2 : VEC_DEFAULT_INITCAP; vecReserve(v, newcap); } diff --git a/src/vector.h b/src/vector.h index a3ea28505..cdffa792b 100644 --- a/src/vector.h +++ b/src/vector.h @@ -62,6 +62,12 @@ typedef struct vec { void **stack; /* Optional stack buffer. */ } vec; +/* Return the contiguous backing array. */ +#define vecData(v) ((v)->data) + +/* Return the number of elements in the vector. */ +#define vecSize(v) ((v)->size) + /* Initialize a vector */ void vecInit(vec *v, void **stack, size_t initcap); @@ -71,14 +77,9 @@ void vecRelease(vec *v); /* Reset the logical length to zero while preserving allocated storage. */ void vecClear(vec *v); -size_t vecSize(const vec *v); - /* Requires index < vecSize(v). */ void *vecGet(const vec *v, size_t index); -/* Return the contiguous backing array. */ -void **vecData(vec *v); - /* Ensure capacity is at least mincap. */ void vecReserve(vec *v, size_t mincap); diff --git a/tests/modules/keyspace_events.c b/tests/modules/keyspace_events.c index 146261f6e..8dc9e1d1c 100644 --- a/tests/modules/keyspace_events.c +++ b/tests/modules/keyspace_events.c @@ -29,6 +29,11 @@ RedisModuleDict *module_event_log = NULL; /** Counts how many deleted KSN we got on keys with a prefix of "count_dels_" **/ static size_t dels = 0; +/* Subkey notification log */ +#define SUBKEY_LOG_MAX 256 +static char subkey_log[SUBKEY_LOG_MAX][512]; +static int subkey_log_count = 0; + static int KeySpace_NotificationLoaded(RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key){ REDISMODULE_NOT_USED(ctx); REDISMODULE_NOT_USED(type); @@ -298,6 +303,104 @@ static int cmdGetDels(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { return RedisModule_ReplyWithLongLong(ctx, dels); } +/* Subkey notification callback */ +static void KeySpace_NotificationSubkeys(RedisModuleCtx *ctx, int type, const char *event, + RedisModuleString *key, RedisModuleString **subkeys, int count) { + REDISMODULE_NOT_USED(ctx); + REDISMODULE_NOT_USED(type); + + if (subkey_log_count >= SUBKEY_LOG_MAX) return; + + const char *key_str = RedisModule_StringPtrLen(key, NULL); + + /* Format: " ..." or " 0" */ + char buf[512]; + int off = snprintf(buf, sizeof(buf), "%s %s %d", event, key_str, count); + for (int i = 0; i < count && (size_t)off < sizeof(buf) - 1; i++) { + const char *sk = RedisModule_StringPtrLen(subkeys[i], NULL); + off += snprintf(buf + off, sizeof(buf) - off, " %s", sk); + } + snprintf(subkey_log[subkey_log_count], sizeof(subkey_log[0]), "%s", buf); + subkey_log_count++; +} + +/* keyspace.get_subkey_events — return all logged subkey events as an array */ +static int cmdGetSubkeyEvents(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + RedisModule_ReplyWithArray(ctx, subkey_log_count); + for (int i = 0; i < subkey_log_count; i++) { + RedisModule_ReplyWithCString(ctx, subkey_log[i]); + } + return REDISMODULE_OK; +} + +/* keyspace.reset_subkey_events — clear the log */ +static int cmdResetSubkeyEvents(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + subkey_log_count = 0; + return RedisModule_ReplyWithSimpleString(ctx, "OK"); +} + +/* keyspace.notify_with_subkeys [subkey2 ...] — trigger a module subkey notification */ +static int cmdNotifyWithSubkeys(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (argc < 3) return RedisModule_WrongArity(ctx); + + RedisModuleString *key = argv[1]; + RedisModuleString **subkeys = &argv[2]; + int count = argc - 2; + + RedisModule_NotifyKeyspaceEventWithSubkeys(ctx, REDISMODULE_NOTIFY_HASH, "module_subkey_event", key, subkeys, count); + return RedisModule_ReplyWithSimpleString(ctx, "OK"); +} + +/* keyspace.subscribe_subkeys — subscribe with NONE flag (all events) */ +static int cmdSubscribeSubkeys(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + if (RedisModule_SubscribeToKeyspaceEventsWithSubkeys(ctx, REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_GENERIC, + REDISMODULE_NOTIFY_FLAG_NONE, KeySpace_NotificationSubkeys) != REDISMODULE_OK) { + return RedisModule_ReplyWithError(ctx, "ERR subscribe failed"); + } + return RedisModule_ReplyWithSimpleString(ctx, "OK"); +} + +/* keyspace.unsubscribe_subkeys — unsubscribe the subkey callback */ +static int cmdUnsubscribeSubkeys(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + if (RedisModule_UnsubscribeFromKeyspaceEventsWithSubkeys(ctx, REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_GENERIC, + REDISMODULE_NOTIFY_FLAG_NONE, KeySpace_NotificationSubkeys) != REDISMODULE_OK) { + return RedisModule_ReplyWithError(ctx, "ERR unsubscribe failed"); + } + return RedisModule_ReplyWithSimpleString(ctx, "OK"); +} + +/* keyspace.subscribe_require_subkeys — subscribe with SUBKEYS_REQUIRED flag */ +static int cmdSubscribeRequireSubkeys(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + if (RedisModule_SubscribeToKeyspaceEventsWithSubkeys(ctx, REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_GENERIC, + REDISMODULE_NOTIFY_FLAG_SUBKEYS_REQUIRED, + KeySpace_NotificationSubkeys) != REDISMODULE_OK) { + return RedisModule_ReplyWithError(ctx, "ERR subscribe failed"); + } + return RedisModule_ReplyWithSimpleString(ctx, "OK"); +} + +/* keyspace.unsubscribe_require_subkeys — unsubscribe the SUBKEYS_REQUIRED callback */ +static int cmdUnsubscribeRequireSubkeys(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + if (RedisModule_UnsubscribeFromKeyspaceEventsWithSubkeys(ctx, REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_GENERIC, + REDISMODULE_NOTIFY_FLAG_SUBKEYS_REQUIRED, + KeySpace_NotificationSubkeys) != REDISMODULE_OK) { + return RedisModule_ReplyWithError(ctx, "ERR unsubscribe failed"); + } + return RedisModule_ReplyWithSimpleString(ctx, "OK"); +} + static RedisModuleNotificationFunc get_callback_for_event(int event_mask) { switch(event_mask) { case REDISMODULE_NOTIFY_LOADED: @@ -442,6 +545,34 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) return REDISMODULE_ERR; } + if (RedisModule_CreateCommand(ctx, "keyspace.subscribe_subkeys", cmdSubscribeSubkeys, "", 0, 0, 0) == REDISMODULE_ERR) { + return REDISMODULE_ERR; + } + + if (RedisModule_CreateCommand(ctx, "keyspace.unsubscribe_subkeys", cmdUnsubscribeSubkeys, "", 0, 0, 0) == REDISMODULE_ERR) { + return REDISMODULE_ERR; + } + + if (RedisModule_CreateCommand(ctx, "keyspace.get_subkey_events", cmdGetSubkeyEvents, "readonly", 0, 0, 0) == REDISMODULE_ERR) { + return REDISMODULE_ERR; + } + + if (RedisModule_CreateCommand(ctx, "keyspace.reset_subkey_events", cmdResetSubkeyEvents, "", 0, 0, 0) == REDISMODULE_ERR) { + return REDISMODULE_ERR; + } + + if (RedisModule_CreateCommand(ctx, "keyspace.notify_with_subkeys", cmdNotifyWithSubkeys, "write", 0, 0, 0) == REDISMODULE_ERR) { + return REDISMODULE_ERR; + } + + if (RedisModule_CreateCommand(ctx, "keyspace.subscribe_require_subkeys", cmdSubscribeRequireSubkeys, "", 0, 0, 0) == REDISMODULE_ERR) { + return REDISMODULE_ERR; + } + + if (RedisModule_CreateCommand(ctx, "keyspace.unsubscribe_require_subkeys", cmdUnsubscribeRequireSubkeys, "", 0, 0, 0) == REDISMODULE_ERR) { + return REDISMODULE_ERR; + } + if (argc == 1) { const char *ptr = RedisModule_StringPtrLen(argv[0], NULL); if (!strcasecmp(ptr, "noload")) { diff --git a/tests/unit/moduleapi/keyspace_events.tcl b/tests/unit/moduleapi/keyspace_events.tcl index 5d62a7178..49c4d5da1 100644 --- a/tests/unit/moduleapi/keyspace_events.tcl +++ b/tests/unit/moduleapi/keyspace_events.tcl @@ -116,6 +116,139 @@ tags "modules external:skip" { assert_equal [r get testkeyspace:expired] 1 } + test "Subkey notification: subscribe starts callback" { + r keyspace.subscribe_subkeys + r keyspace.reset_subkey_events + r config set notify-keyspace-events "" + } + + test "Subkey notification: HSET triggers module subkey callback" { + r keyspace.reset_subkey_events + r hset myhash f1 v1 f2 v2 + set events [r keyspace.get_subkey_events] + assert_equal 1 [llength $events] + assert_equal "hset myhash 2 f1 f2" [lindex $events 0] + r del myhash + } + + test "Subkey notification: HDEL triggers module subkey callback" { + r hset myhash f1 v1 f2 v2 + r keyspace.reset_subkey_events + r hdel myhash f1 + set events [r keyspace.get_subkey_events] + assert_equal 1 [llength $events] + assert_equal "hdel myhash 1 f1" [lindex $events 0] + r del myhash + } + + test "Subkey notification: non-subkey event calls subkey callback with count=0" { + r hset myhash f1 v1 + r keyspace.reset_subkey_events + r del myhash + set events [r keyspace.get_subkey_events] + # DEL is NOTIFY_GENERIC — our callback is registered for + # HASH|GENERIC, so it should be called with subkeys=NULL, count=0. + assert_equal 1 [llength $events] + assert_equal "del myhash 0" [lindex $events 0] + } + + test "Subkey notification: module-triggered NotifyKeyspaceEventWithSubkeys" { + r keyspace.reset_subkey_events + r keyspace.notify_with_subkeys mykey sk1 sk2 sk3 + set events [r keyspace.get_subkey_events] + assert_equal 1 [llength $events] + assert_equal "module_subkey_event mykey 3 sk1 sk2 sk3" [lindex $events 0] + } + + test "Subkey notification: lazy hash field expiry triggers hexpired with subkeys" { + r debug set-active-expire 0 + r del myhash + r hset myhash f1 v1 f2 v2 f3 v3 + r hpexpire myhash 10 FIELDS 2 f1 f2 + r keyspace.reset_subkey_events + after 100 + r hmget myhash f1 f2 + assert_equal "hexpired myhash 2 f1 f2" [lindex [r keyspace.get_subkey_events] 0] + r debug set-active-expire 1 + } {OK} {needs:debug} + + test "Subkey notification: active hash field expiry triggers hexpired with subkeys" { + r del myhash + r hset myhash f1 v1 f2 v2 + r keyspace.reset_subkey_events + r hpexpire myhash 10 FIELDS 2 f1 f2 + # wait for active expiry to kick in + wait_for_condition 50 100 { + [r exists myhash] == 0 + } else { + fail "Fields not expired by active expiry" + } + # fields order is undefined + assert_match "hexpired myhash 2 f* f*" [lindex [r keyspace.get_subkey_events] 1] + r del myhash + } + + test "Subkey notification: unsubscribe stops callback and resubscribe resumes" { + r keyspace.reset_subkey_events + r hset myhash f1 v1 + set events [r keyspace.get_subkey_events] + assert_equal 1 [llength $events] + + # Unsubscribe — events should stop + r keyspace.unsubscribe_subkeys + r keyspace.reset_subkey_events + r hset myhash f2 v2 + set events [r keyspace.get_subkey_events] + assert_equal 0 [llength $events] + # active expire should not trigger subkey callback + r hpexpire myhash 10 FIELDS 2 f1 f2 + wait_for_condition 50 100 { + [r exists myhash] == 0 + } else { + fail "Fields not expired by active expiry" + } + set events [r keyspace.get_subkey_events] + assert_equal 0 [llength $events] + + # Re-subscribe — events should resume + r keyspace.subscribe_subkeys + r del myhash + r hset myhash f1 v1 f2 v2 + r keyspace.reset_subkey_events + r hpexpire myhash 10 FIELDS 2 f1 f2 + assert_match "hexpire myhash 2 f* f*" [lindex [r keyspace.get_subkey_events] 0] + # active expire should also resume subkey callback + wait_for_condition 50 100 { + [r exists myhash] == 0 + } else { + fail "Fields not expired by active expiry" + } + assert_match "hexpired myhash 2 f* f*" [lindex [r keyspace.get_subkey_events] 1] + + r keyspace.unsubscribe_subkeys + r keyspace.reset_subkey_events + r del myhash + } + + test "Subkey notification: SUBKEYS_REQUIRED flag skips events without subkeys" { + r keyspace.subscribe_require_subkeys + r keyspace.reset_subkey_events + + # HSET has subkeys — should trigger callback + r hset myhash f1 v1 f2 v2 + set events [r keyspace.get_subkey_events] + assert_equal 1 [llength $events] + assert_equal "hset myhash 2 f1 f2" [lindex $events 0] + + # DEL has no subkeys — the callback should be skipped. + r keyspace.reset_subkey_events + r del myhash + set events [r keyspace.get_subkey_events] + assert_equal 0 [llength $events] + + r keyspace.unsubscribe_require_subkeys + } + test "Unload the module - testkeyspace" { assert_equal {OK} [r module unload testkeyspace] } @@ -125,6 +258,38 @@ tags "modules external:skip" { } } + # Replication test: replica module receives subkey notifications + start_server [list overrides [list loadmodule "$testmodule"]] { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + start_server [list overrides [list loadmodule "$testmodule"]] { + set replica [srv 0 client] + + $replica replicaof $master_host $master_port + wait_for_sync $replica + + test "Subkey notification: replica module receives subkey callback after replication" { + $master keyspace.subscribe_subkeys + $replica keyspace.subscribe_subkeys + $replica keyspace.reset_subkey_events + + $master hset myhash f1 v1 f2 v2 + + wait_for_ofs_sync $master $replica + + set events [$replica keyspace.get_subkey_events] + assert_equal 1 [llength $events] + assert_equal "hset myhash 2 f1 f2" [lindex $events 0] + + $master del myhash + $master keyspace.unsubscribe_subkeys + $replica keyspace.unsubscribe_subkeys + } + } + } + start_server {} { test {OnLoad failure will handle un-registration} { catch {r module load $testmodule noload} diff --git a/tests/unit/pubsub.tcl b/tests/unit/pubsub.tcl index 24f779ffc..115970a31 100644 --- a/tests/unit/pubsub.tcl +++ b/tests/unit/pubsub.tcl @@ -602,7 +602,6 @@ start_server {tags {"pubsub network"}} { after 15 r hget myhash f2 assert_equal "pmessage * __keyspace@${db}__:myhash hexpire" [$rd1 read] - assert_equal "pmessage * __keyspace@${db}__:myhash hexpired" [$rd1 read] assert_equal "pmessage * __keyspace@${db}__:myhash del" [$rd1 read] # FNX on logically expired field @@ -962,6 +961,364 @@ start_server {tags {"pubsub network"}} { $rd1 close } + ### Subkey-level notification tests for HASH type ### + + # Helper: build expected payload "event|len:field0,len:field1,..." + proc build_expected_payload {event prefix count} { + set parts {} + for {set i 0} {$i < $count} {incr i} { + set f "${prefix}${i}" + lappend parts "[string length $f]:$f" + } + return "${event}|[join $parts ,]" + } + + # Compare subkey notification payloads as sets (order-insensitive). + # Parses "event|f1,f2,..." and checks event matches and fields match as sets. + proc assert_subkey_payload_equal {expected actual} { + set ep [split $expected "|"] + set ap [split $actual "|"] + assert_equal [lindex $ep 0] [lindex $ap 0] ;# event name + set ef [lsort [split [lindex $ep 1] ","]] + set af [lsort [split [lindex $ap 1] ","]] + assert_equal $ef $af + } + + # Generate N field-value pairs: {f0 v0 f1 v1 ...} + proc gen_field_values {prefix n} { + set args {} + for {set i 0} {$i < $n} {incr i} { + lappend args "${prefix}${i}" "v${i}" + } + return $args + } + + # Generate N field names: {f0 f1 ...} + proc gen_fields {prefix n} { + set fields {} + for {set i 0} {$i < $n} {incr i} { + lappend fields "${prefix}${i}" + } + return $fields + } + + # Subkey notification: subkeyspace channel + foreach {type max_lp_entries} {listpackex 512 hashtable 0} { + r config set hash-max-listpack-entries $max_lp_entries + r config set notify-keyspace-events Sh + set rd1 [redis_deferring_client] + assert_equal {1} [subscribe $rd1 "__subkeyspace@${db}__:myhash"] + + test "Subkey notifications: subkeyspace - HSET single field ($type)" { + r del myhash + r hset myhash f1 v1 + assert_equal "message __subkeyspace@${db}__:myhash hset|2:f1" [$rd1 read] + } + + test "Subkey notifications: subkeyspace - HINCRBY ($type)" { + r del myhash + r hset myhash counter 10 + r hincrby myhash counter 5 + assert_equal "message __subkeyspace@${db}__:myhash hset|7:counter" [$rd1 read] + assert_equal "message __subkeyspace@${db}__:myhash hincrby|7:counter" [$rd1 read] + } + + test "Subkey notifications: subkeyspace - HSETNX ($type)" { + r del myhash + r hsetnx myhash newfield val + assert_equal "message __subkeyspace@${db}__:myhash hset|8:newfield" [$rd1 read] + } + + test "Subkey notifications: subkeyspace - HINCRBYFLOAT ($type)" { + r del myhash + r hset myhash counter 10.5 + r hincrbyfloat myhash counter 2.5 + assert_equal "message __subkeyspace@${db}__:myhash hset|7:counter" [$rd1 read] + assert_equal "message __subkeyspace@${db}__:myhash hincrbyfloat|7:counter" [$rd1 read] + } + + # Test with N=3 (stack path, within FIELDS_STACK_SIZE=16) and + # N=32 (heap path, exceeds FIELDS_STACK_SIZE). + foreach N {3 32} { + + test "Subkey notifications: HSET $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + r hset myhash {*}[gen_field_values "f" $N] + set expected [build_expected_payload "hset" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected" [$rd1 read] + } + + test "Subkey notifications: HDEL $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + r hset myhash {*}[gen_field_values "f" $N] + $rd1 read ;# consume hset notification + r hdel myhash {*}[gen_fields "f" $N] + set expected [build_expected_payload "hdel" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected" [$rd1 read] + } + + test "Subkey notifications: HGETDEL $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + r hset myhash {*}[gen_field_values "f" $N] + $rd1 read ;# consume hset notification + r hgetdel myhash FIELDS $N {*}[gen_fields "f" $N] + set expected [build_expected_payload "hdel" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected" [$rd1 read] + } + + test "Subkey notifications: HEXPIRE $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + r hset myhash {*}[gen_field_values "f" $N] + $rd1 read ;# consume hset notification + r hexpire myhash 1000 FIELDS $N {*}[gen_fields "f" $N] + set expected [build_expected_payload "hexpire" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected" [$rd1 read] + } + + test "Subkey notifications: HEXPIRE past timestamp $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + r hset myhash {*}[gen_field_values "f" $N] + $rd1 read ;# consume hset notification + r hexpireat myhash 1 FIELDS $N {*}[gen_fields "f" $N] + set expected [build_expected_payload "hdel" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected" [$rd1 read] + } + + test "Subkey notifications: HPERSIST $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + set fields [gen_fields "f" $N] + r hset myhash {*}[gen_field_values "f" $N] + r hexpire myhash 1000 FIELDS $N {*}$fields + $rd1 read ;# consume hset + $rd1 read ;# consume hexpire + r hpersist myhash FIELDS $N {*}$fields + set expected [build_expected_payload "hpersist" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected" [$rd1 read] + } + + test "Subkey notifications: HGETEX with expire $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + r hset myhash {*}[gen_field_values "f" $N] + $rd1 read ;# consume hset + r hgetex myhash EX 1000 FIELDS $N {*}[gen_fields "f" $N] + set expected [build_expected_payload "hexpire" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected" [$rd1 read] + } + + test "Subkey notifications: HGETEX with persist $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + set fields [gen_fields "f" $N] + r hset myhash {*}[gen_field_values "f" $N] + r hexpire myhash 1000 FIELDS $N {*}$fields + $rd1 read ;# consume hset + $rd1 read ;# consume hexpire + r hgetex myhash PERSIST FIELDS $N {*}$fields + set expected [build_expected_payload "hpersist" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected" [$rd1 read] + } + + test "Subkey notifications: HGETEX past timestamp $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + r hset myhash {*}[gen_field_values "f" $N] + $rd1 read ;# consume hset + r hgetex myhash PX 0 FIELDS $N {*}[gen_fields "f" $N] + set expected [build_expected_payload "hdel" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected" [$rd1 read] + } + + test "Subkey notifications: HSETEX $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + r hsetex myhash EX 1000 FIELDS $N {*}[gen_field_values "f" $N] + set expected_hset [build_expected_payload "hset" "f" $N] + set expected_hexpire [build_expected_payload "hexpire" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected_hset" [$rd1 read] + assert_equal "message __subkeyspace@${db}__:myhash $expected_hexpire" [$rd1 read] + } + + test "Subkey notifications: HSETEX past timestamp $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + r hsetex myhash PX 0 FIELDS $N {*}[gen_field_values "f" $N] + set expected_hset [build_expected_payload "hset" "f" $N] + set expected_hdel [build_expected_payload "hdel" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected_hset" [$rd1 read] + assert_equal "message __subkeyspace@${db}__:myhash $expected_hdel" [$rd1 read] + } + + test "Subkey notifications: lazy field expiry triggers hexpired $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + # Create N+1 fields, expire N of them; keep one to prevent hash deletion. + set fields [gen_fields "f" $N] + set args [gen_field_values "f" $N] + lappend args "keep" "val" + r hset myhash {*}$args + r debug set-active-expire 0 + r hpexpire myhash 10 FIELDS $N {*}$fields + $rd1 read ;# consume hset + $rd1 read ;# consume hexpire + # Trigger lazy expiry by reading the fields + after 100 + r hmget myhash {*}$fields + set expected_hexpired [build_expected_payload "hexpired" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected_hexpired" [$rd1 read] + r debug set-active-expire 1 + } {OK} {needs:debug} + + test "Subkey notifications: active field expiry triggers hexpired $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + # Create N+1 fields, expire N of them; keep one to prevent hash deletion. + set fields [gen_fields "f" $N] + set args [gen_field_values "f" $N] + lappend args "keep" "val" + r hset myhash {*}$args + r hpexpire myhash 10 FIELDS $N {*}$fields + $rd1 read ;# consume hset + $rd1 read ;# consume hexpire + # Wait for active expiry; field order depends on hash table iteration, + # so compare as set. + set expected_hexpired [build_expected_payload "hexpired" "f" $N] + set actual [$rd1 read] + set prefix "message __subkeyspace@${db}__:myhash " + assert_equal $prefix [string range $actual 0 [expr {[string length $prefix]-1}]] + assert_subkey_payload_equal $expected_hexpired [string range $actual [string length $prefix] end] + } + } ;# end foreach N + $rd1 close + } ;# end foreach type + + # Subkey notification format tests for subkeyevent/subkeyspaceitem/subkeyspaceevent + # Full command coverage is done via subkeyspace channel below; here we only verify channel format. + foreach {type max_lp_entries} {listpackex 512 hashtable 0} { + r config set hash-max-listpack-entries $max_lp_entries + + test "Subkey notifications: subkeyevent format ($type)" { + r config set notify-keyspace-events Th + r del myhash + set rd1 [redis_deferring_client] + assert_equal {1} [subscribe $rd1 "__subkeyevent@${db}__:hset"] + r hset myhash f1 v1 f2 v2 f3 v3 + assert_equal "message __subkeyevent@${db}__:hset 6:myhash|2:f1,2:f2,2:f3" [$rd1 read] + $rd1 close + } + + test "Subkey notifications: subkeyspaceitem format ($type)" { + r config set notify-keyspace-events Ih + r del myhash + set rd1 [redis_deferring_client] + $rd1 subscribe "__subkeyspaceitem@${db}__:myhash\nf1" + $rd1 read ;# consume subscribe confirmation + r hset myhash f1 v1 + set msg [$rd1 read] + assert_equal "message" [lindex $msg 0] + assert_equal "__subkeyspaceitem@${db}__:myhash\nf1" [lindex $msg 1] + assert_equal "hset" [lindex $msg 2] + $rd1 close + } + + test "Subkey notifications: subkeyspaceitem per-subkey delivery with psubscribe ($type)" { + r config set notify-keyspace-events Ih + r del myhash + set rd1 [redis_deferring_client] + assert_equal {1} [psubscribe $rd1 "__subkeyspaceitem@${db}__:myhash*"] + r hset myhash f1 v1 f2 v2 + # Should get one notification per subkey + set msg1 [$rd1 read] + set msg2 [$rd1 read] + assert_equal "pmessage" [lindex $msg1 0] + assert_equal "__subkeyspaceitem@${db}__:myhash\nf1" [lindex $msg1 2] + assert_equal "hset" [lindex $msg1 3] + assert_equal "pmessage" [lindex $msg2 0] + assert_equal "__subkeyspaceitem@${db}__:myhash\nf2" [lindex $msg2 2] + assert_equal "hset" [lindex $msg2 3] + $rd1 close + } + + test "Subkey notifications: subkeyspaceitem skips key with newline ($type)" { + r config set notify-keyspace-events Ih + r del "key\nwith\nnewline" + set rd1 [redis_deferring_client] + assert_equal {1} [psubscribe $rd1 "__subkeyspaceitem@${db}__:*"] + r hset "key\nwith\nnewline" f1 v1 + # Normal key to verify notifications still work + r hset normalkey f1 v1 + # Should only get notification for normalkey + set msg [$rd1 read] + assert_equal "pmessage" [lindex $msg 0] + assert_equal "__subkeyspaceitem@${db}__:normalkey\nf1" [lindex $msg 2] + assert_equal "hset" [lindex $msg 3] + r del "key\nwith\nnewline" + r del normalkey + $rd1 close + } + + test "Subkey notifications: subkeyspaceevent format ($type)" { + r config set notify-keyspace-events Vh + r del myhash + set rd1 [redis_deferring_client] + assert_equal {1} [subscribe $rd1 "__subkeyspaceevent@${db}__:hset|myhash"] + r hset myhash f1 v1 f2 v2 + assert_equal "message __subkeyspaceevent@${db}__:hset|myhash 2:f1,2:f2" [$rd1 read] + $rd1 close + } + } ; + + # Test all 4 channels enabled simultaneously + test "Subkey notifications: all 4 channels enabled simultaneously" { + r config set notify-keyspace-events STIVh + r del myhash + set rd_s [redis_deferring_client] + set rd_t [redis_deferring_client] + set rd_i [redis_deferring_client] + set rd_v [redis_deferring_client] + assert_equal {1} [subscribe $rd_s "__subkeyspace@${db}__:myhash"] + assert_equal {1} [subscribe $rd_t "__subkeyevent@${db}__:hset"] + assert_equal {1} [subscribe $rd_v "__subkeyspaceevent@${db}__:hset|myhash"] + $rd_i subscribe "__subkeyspaceitem@${db}__:myhash\nf1" + $rd_i read ;# consume subscribe confirmation + r hset myhash f1 v1 + assert_equal "message __subkeyspace@${db}__:myhash hset|2:f1" [$rd_s read] + assert_equal "message __subkeyevent@${db}__:hset 6:myhash|2:f1" [$rd_t read] + assert_equal "message __subkeyspaceevent@${db}__:hset|myhash 2:f1" [$rd_v read] + set msg_i [$rd_i read] + assert_equal "message" [lindex $msg_i 0] + assert_equal "__subkeyspaceitem@${db}__:myhash\nf1" [lindex $msg_i 1] + assert_equal "hset" [lindex $msg_i 2] + $rd_s close + $rd_t close + $rd_i close + $rd_v close + } + + # Test that subkey notifications are triggered on replica after replication + test "Subkey notifications: replica receives subkey notifications after replication" { + start_server {tags {"repl external:skip"}} { + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + set replica [srv 0 client] + + $replica replicaof $master_host $master_port + wait_for_sync $replica + + # Enable subkeyspace notifications on replica + $replica config set notify-keyspace-events Sh + + # Subscribe on replica + set rd1 [redis_deferring_client -1] + assert_equal {1} [subscribe $rd1 "__subkeyspace@${db}__:myhash"] + + # Write on master + $master hset myhash f1 v1 f2 v2 + $master hpexpire myhash 100 FIELDS 2 f1 f2 + + # Replica should receive subkey notification + assert_equal "message __subkeyspace@${db}__:myhash hset|2:f1,2:f2" [$rd1 read] + assert_equal "message __subkeyspace@${db}__:myhash hexpire|2:f1,2:f2" [$rd1 read] + assert_equal "message __subkeyspace@${db}__:myhash hexpired|2:f1,2:f2" [$rd1 read] + $rd1 close + $master del myhash + } + } + test "publish to self inside multi" { r hello 3 r subscribe foo From 15cb40dac26527092bf2560b8f39e9ff59da39f4 Mon Sep 17 00:00:00 2001 From: Vitah Lin Date: Fri, 17 Apr 2026 15:30:43 +0800 Subject: [PATCH 24/32] Fix command-docs and corrupt-dump-fuzzer of OBJ_GCRA type (#15055) ### Problem While the new type `OBJ_GCRA` was added, several related code paths were not updated accordingly, leading to failures in the `reply-schemas-validator` CI job and `corrupt-dump-fuzzer.tcl` ##### reply-schemas-validator Failed CI: https://github.com/redis/redis/actions/runs/24485248057/job/71558533290#step:10:903 ```shell Traceback (most recent call last): File "/home/runner/work/redis/redis/./utils/req-res-log-validator.py", line 238, in process_file jsonschema.validate(instance=res.json, schema=req.schema, cls=schema_validator) File "/home/runner/.local/lib/python3.12/site-packages/jsonschema/validators.py", line 1121, in validate raise error jsonschema.exceptions.ValidationError: 'rate_limit' is not valid under any of the given schemas Failed validating 'oneOf' in schema['patternProperties']['^.*$']['properties']['group']: {'description': 'the functional group to which the command belongs', 'oneOf': [{'const': 'bitmap'}, {'const': 'cluster'}, {'const': 'connection'}, {'const': 'generic'}, {'const': 'geo'}, {'const': 'hash'}, {'const': 'hyperloglog'}, {'const': 'list'}, {'const': 'module'}, {'const': 'pubsub'}, {'const': 'scripting'}, {'const': 'sentinel'}, {'const': 'server'}, {'const': 'set'}, {'const': 'sorted-set'}, {'const': 'stream'}, {'const': 'string'}, {'const': 'transactions'}]} On instance['gcrasetvalue']['group']: 'rate_limit' ``` ##### `corrupt-dump-fuzzer.tcl` Also fixed `: Fuzzer corrupt restore payloads - sanitize_dump: yes in tests/integration/corrupt-dump-fuzzer.tcl` Failed daily test : https://github.com/redis/redis/actions/runs/24485248057/job/71558533312#step:6:8652 ```shell Server crashed (by signal: 0, err: key "gcra" not known in dictionary), with payload: "\x1C\x0A\x02\x5F\x37\xC0\x06\xC0\x00\x02\x5F\x39\xC0\x08\x02\x5F\x33\x02\x5F\x35\x02\x5F\x31\xC0\x02\xC0\x04\x0E\x00\xA9\x71\xBF\xEE\x6F\x46\xEF\xA6" violating commands: Done 1434 cycles in 600 seconds. RESTORE: successful: 601, rejected: 833 Total commands sent in traffic: 1194776, crashes during traffic: 1 (0 by signal). [: Fuzzer corrupt restore payloads - sanitize_dump: yes in tests/integration/corrupt-dump-fuzzer.tcl Expected '1' to be equal to '0' (context: type eval line 155 cmd {assert_equal $stat_terminated_in_traffic 0} proc ::test) [147/147 done]: integration/corrupt-dump-fuzzer (1201 seconds) ``` ### Changed This change completes the necessary updates across all relevant components to ensure consistent handling of the rate_limit group and restores CI stability. --- src/commands/command-docs.json | 3 +++ src/server.h | 10 +++++++++- tests/integration/corrupt-dump-fuzzer.tcl | 1 + tests/support/util.tcl | 3 ++- tests/unit/gcra.tcl | 8 ++++++++ 5 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/commands/command-docs.json b/src/commands/command-docs.json index 75df5b4c6..5e76c806c 100644 --- a/src/commands/command-docs.json +++ b/src/commands/command-docs.json @@ -91,6 +91,9 @@ }, { "const": "transactions" + }, + { + "const": "rate_limit" } ] }, diff --git a/src/server.h b/src/server.h index d7a6e4215..eaaf08b60 100644 --- a/src/server.h +++ b/src/server.h @@ -875,7 +875,15 @@ typedef enum { * - debug.c - xorObjectDigest, serverLogObjectDebugInfo * - defrag.c - defragKey * - module.c - RM_KeyType (and add the new keytype to redismodule.h) - * - object.c - object(create/free/dismiss/allocSize/Length) */ + * - object.c - object(create/free/dismiss/allocSize/Length) + * - tests/support/util.tcl:generate_fuzzy_traffic_on_key - add command(s) for the new object type to the `commands` dict. + * + * If the new object type requires new command group make sure to update the following: + * - src/commands/command-docs.json - update the group:oneOf map with the new group + * - utils/generate-command-code.py - add the new group to GROUPS and COMMAND_GROUP_STR arrays + * - src/acl.c - add the new group to ACLDefaultCommandCategories array + * - src/server.h - add the new group to redisCommandGroup enum + * - if needed add new KSN type related to the group - search for NOTIFY_* and REDISMODULE_NOTIFY_* defines. */ /* Extract encver / signature from a module type ID. */ #define REDISMODULE_TYPE_ENCVER_BITS 10 diff --git a/tests/integration/corrupt-dump-fuzzer.tcl b/tests/integration/corrupt-dump-fuzzer.tcl index a6d911324..e69a2221b 100644 --- a/tests/integration/corrupt-dump-fuzzer.tcl +++ b/tests/integration/corrupt-dump-fuzzer.tcl @@ -59,6 +59,7 @@ proc generate_types {} { # create other non-collection types r incr int r set string str + r gcra gcra 10 5 60000 # create bigger objects with 10 items (more than a single ziplist / listpack) generate_collections big 10 diff --git a/tests/support/util.tcl b/tests/support/util.tcl index 16eb80008..6a011380f 100644 --- a/tests/support/util.tcl +++ b/tests/support/util.tcl @@ -800,7 +800,8 @@ proc generate_fuzzy_traffic_on_key {key type duration} { set set_commands {SADD SCARD SDIFF SDIFFSTORE SINTER SINTERSTORE SISMEMBER SMEMBERS SMOVE SPOP SRANDMEMBER SREM SSCAN SUNION SUNIONSTORE} set stream_commands {XACK XADD XCLAIM XDEL XGROUP XINFO XLEN XPENDING XRANGE XREAD XREADGROUP XREVRANGE XTRIM XDELEX XACKDEL XNACK} set vset_commands {VADD VREM} - set commands [dict create string $string_commands hash $hash_commands zset $zset_commands list $list_commands set $set_commands stream $stream_commands vectorset $vset_commands] + set gcra_commands {GCRA} + set commands [dict create string $string_commands hash $hash_commands zset $zset_commands list $list_commands set $set_commands stream $stream_commands vectorset $vset_commands gcra $gcra_commands] set cmds [dict get $commands $type] set start_time [clock seconds] diff --git a/tests/unit/gcra.tcl b/tests/unit/gcra.tcl index b012a0fc4..1080e76f7 100644 --- a/tests/unit/gcra.tcl +++ b/tests/unit/gcra.tcl @@ -227,6 +227,14 @@ start_server {tags {"gcra" "external:skip"}} { catch {r gcra mykey 1 1 2147483647 TOKENS 2147483647} err assert_match "*would cause an overflow*" $err } + + test {GCRASETVALUE - basic functionality} { + r del mykey + set tat_us [expr {[clock microseconds] + 60000000}] + assert_equal {OK} [r gcrasetvalue mykey $tat_us] + assert_equal {gcra} [r type mykey] + assert {[r pttl mykey] > 0} + } } start_server {tags {"gcra" "external:skip"}} { From 8aeea8c210af55709e3aa4c2ec936f25ea3edb87 Mon Sep 17 00:00:00 2001 From: Vitah Lin Date: Fri, 17 Apr 2026 16:36:02 +0800 Subject: [PATCH 25/32] Increase threshold for HPEXPIRETIME persists after RDB reload test (#15047) --- tests/unit/type/hash-field-expire.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/type/hash-field-expire.tcl b/tests/unit/type/hash-field-expire.tcl index 402a9ad72..b69130db4 100644 --- a/tests/unit/type/hash-field-expire.tcl +++ b/tests/unit/type/hash-field-expire.tcl @@ -296,7 +296,7 @@ start_server {tags {"external:skip needs:debug"}} { test "HPEXPIRETIME persists after RDB reload ($type)" { r del myhash r hset myhash field1 value1 field2 value2 - r hpexpire myhash 300 NX FIELDS 1 field1 + r hpexpire myhash 500 NX FIELDS 1 field1 set before [r HPEXPIRETIME myhash FIELDS 1 field1] r debug reload set after [r HPEXPIRETIME myhash FIELDS 1 field1] From 8677971360e131c9e294c08b423dd85e83f4ae90 Mon Sep 17 00:00:00 2001 From: charsyam Date: Fri, 17 Apr 2026 18:28:13 +0900 Subject: [PATCH 26/32] Remove unnecessary `-ERR` and `\r\n` for addReplyErrorFormat in extractLongLatOrReply() (#14995) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In addReplyErrorLength and addReplyErrorFormatInternal, `-ERR` is automatically prepended if the message doesn’t start with `-`, so the initial `-ERR` is unnecessary. Also, trailing `\r\n` will be trimmed, so it doesn’t need to be included. --------- Signed-off-by: charsyam Signed-off-by: DaeMyung Kang Co-authored-by: debing.sun --- src/geo.c | 2 +- tests/unit/geo.tcl | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/geo.c b/src/geo.c index ce890f7f0..978270bb6 100644 --- a/src/geo.c +++ b/src/geo.c @@ -108,7 +108,7 @@ int extractLongLatOrReply(client *c, robj **argv, double *xy) { if (xy[0] < GEO_LONG_MIN || xy[0] > GEO_LONG_MAX || xy[1] < GEO_LAT_MIN || xy[1] > GEO_LAT_MAX) { addReplyErrorFormat(c, - "-ERR invalid longitude,latitude pair %f,%f\r\n",xy[0],xy[1]); + "invalid longitude,latitude pair %f,%f",xy[0],xy[1]); return C_ERR; } return C_OK; diff --git a/tests/unit/geo.tcl b/tests/unit/geo.tcl index 6175329da..8ae201df9 100644 --- a/tests/unit/geo.tcl +++ b/tests/unit/geo.tcl @@ -223,6 +223,14 @@ start_server {tags {"geo"}} { set err } {*valid*} + test {GEOADD out-of-range longitude/latitude error reply is well-formed} { + r readraw 1 + set reply [r geoadd nyc 200 40 "bad lon"] + r readraw 0 + # RESP simple error: single line starting with '-', no duplicated "-ERR" prefix. + assert_match {-ERR invalid longitude,latitude pair*} $reply + } + test {GEOADD multi add} { r geoadd nyc -73.9733487 40.7648057 "central park n/q/r" -73.9903085 40.7362513 "union square" -74.0131604 40.7126674 "wtc one" -73.7858139 40.6428986 "jfk" -73.9375699 40.7498929 "q4" -73.9564142 40.7480973 4545 } {6} From 58dc4f3c854a5abd18a826f97f39048dd1a6abd2 Mon Sep 17 00:00:00 2001 From: Omer Shadmi <76992134+oshadmi@users.noreply.github.com> Date: Sun, 19 Apr 2026 10:49:34 +0300 Subject: [PATCH 27/32] Update RediSearch to 8.8 RC1 (v8.7.90) (#15072) Update RediSearch module version to 8.8 RC1 (v8.7.90) Made with [Cursor](https://cursor.com) --- > [!NOTE] > **Low Risk** > Low risk: a single version bump that changes which RediSearch git tag is cloned/built; main risk is build/runtime incompatibility from the upstream RC update. > > **Overview** > Updates the RediSearch module build configuration to fetch and build upstream `redisearch` tag `v8.7.90` (8.8 RC1) instead of `v8.5.90`. > > Reviewed by [Cursor Bugbot](https://cursor.com/bugbot) for commit 21e121c7380568130846f9d202c90f72ad93e0f3. Bugbot is set up for automated code reviews on this repo. Configure [here](https://www.cursor.com/dashboard/bugbot). --- modules/redisearch/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/redisearch/Makefile b/modules/redisearch/Makefile index 1672d7454..e301c29a7 100644 --- a/modules/redisearch/Makefile +++ b/modules/redisearch/Makefile @@ -1,5 +1,5 @@ SRC_DIR = src -MODULE_VERSION = v8.5.90 +MODULE_VERSION = v8.7.90 MODULE_REPO = https://github.com/redisearch/redisearch TARGET_MODULE = $(SRC_DIR)/bin/$(FULL_VARIANT)/search-community/redisearch.so From 0fa78fd8fddaa1576070ac430d4d796d7396835e Mon Sep 17 00:00:00 2001 From: "Filipe Oliveira (Redis)" Date: Mon, 20 Apr 2026 13:45:49 +0100 Subject: [PATCH 28/32] perf: widen fast_float_strtod fast path to 17-19 digit mantissas (#15061) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Root cause Roughly 50% of random double scores generated by the ZADD listpack workload have 17-19 significant digits, which exceed `MAX_MANTISSA_FAST_PATH` (`2^53`). These inputs fall through to the `strtod()` fallback: ```c char static_buf[128]; memcpy(buf, nptr, len); /* memcpy back! */ buf[len] = '\0'; /* null-term */ double result = strtod(buf, ...); /* glibc strtod — ~10× slower on ARM */ ``` The original C++ `fast_float` library handled the same 17-19 digit inputs with Eisel-Lemire / bigint arithmetic without falling back to `strtod()`. That is what the pure-C replacement lost. ## Fix Compute `mantissa * 10^exponent` in 128-bit integer arithmetic using `__uint128_t`, then convert to double with a single IEEE round-to-nearest-even cast. Supported for `|exp| in [0, 19]` where `10^|exp|` fits in `uint64`; cases outside that range (or otherwise outside the fast path's preconditions) still fall through to `strtod()`. --------- Co-authored-by: debing.sun --- src/fast_float_strtod.c | 100 ++++++++++++++++++++++++++++++++++----- tests/unit/type/zset.tcl | 32 +++++++++++++ 2 files changed, 120 insertions(+), 12 deletions(-) diff --git a/src/fast_float_strtod.c b/src/fast_float_strtod.c index 48a5df502..25bddba79 100644 --- a/src/fast_float_strtod.c +++ b/src/fast_float_strtod.c @@ -264,21 +264,62 @@ static inline int parse_number_string(const char *p, const char *pend, double *r /* Check if we're within fast path bounds */ if (exponent < MIN_EXPONENT_FAST_PATH) return 0; if (exponent > MAX_EXPONENT_FAST_PATH) return 0; - if (mantissa > MAX_MANTISSA_FAST_PATH) return 0; - - /* Fast path: direct conversion */ - double value = (double)mantissa; - if (exponent < 0) { - value = value / powers_of_ten[-exponent]; - } else if (exponent > 0) { - value = value * powers_of_ten[exponent]; - } - - if (negative) { - value = -value; + double value; + if (mantissa <= MAX_MANTISSA_FAST_PATH) { + /* Clinger fast path: all operands exact in double precision, + * single multiply/divide produces a correctly-rounded result. */ + value = (double)mantissa; + if (exponent < 0) value = value / powers_of_ten[-exponent]; + else if (exponent > 0) value = value * powers_of_ten[exponent]; + } else { +#ifdef __SIZEOF_INT128__ + /* Widened fast path for 17-19 significant-digit mantissas. + * + * (double)mantissa alone loses up to 11 bits when mantissa > 2^53, + * so the existing Clinger path would yield up to 1 ULP vs strtod. + * We recover full precision by doing the multiply/divide in 128-bit + * integer arithmetic (correctly-rounded by construction). Cases + * outside the supported exponent range fall through to strtod. + * + * Requires __uint128_t (GCC/Clang builtin, available on every 64-bit + * target Redis supports). 32-bit builds take the strtod() fallback. */ + if (exponent < -19 || exponent > 19) return 0; + + if (exponent >= 0) { + /* (mantissa * 10^e) fits in 128 bits. Convert exactly: the + * single (double) cast from __uint128_t rounds to nearest. */ + __uint128_t prod = (__uint128_t)mantissa * (uint64_t)powers_of_ten[exponent]; + uint64_t hi = (uint64_t)(prod >> 64); + uint64_t lo = (uint64_t)prod; + /* (double)hi * 2^64 has no rounding error (hi up to 2^64-1 rounds + * once, then * 2^64 is exact). Adding lo rounds once. Total: + * matches strtod on every tested case with e in [0,19]. */ + value = (double)hi * 18446744073709551616.0 + (double)lo; + } else { + /* mantissa / 10^|e|: scale numerator up by 2^64 before integer + * division to preserve precision, then descale by multiplying by + * 2^-64 (exact power-of-two scaling, does not round). The single + * (double) cast of the integer quotient produces IEEE round-to- + * nearest-even, matching strtod() bit-exactly for every tested + * 16-19 significant digit case. */ + uint64_t divisor = (uint64_t)powers_of_ten[-exponent]; + __uint128_t scaled = (__uint128_t)mantissa << 64; + __uint128_t q = scaled / divisor; + uint64_t hi = (uint64_t)(q >> 64); + uint64_t lo = (uint64_t)q; + value = ((double)hi * 18446744073709551616.0 + (double)lo) + * 5.421010862427522170037e-20; /* 2^-64 */ + } +#else + /* 32-bit target without __uint128_t: fall through to the strtod() + * fallback. Correctness is preserved (it's the same path that shipped + * in 8.8-M02); only the perf gain is 64-bit-target-specific. */ + return 0; +#endif } + if (negative) value = -value; *result = value; return 1; } @@ -448,6 +489,41 @@ int fastFloatTest(int argc, char **argv, int flags) { {"12345678901234567890", 1.2345678901234567e19}, {"2.2250738585072012e-308", 2.2250738585072012e-308}, /* Near DBL_MIN boundary */ {"0x10", 16.0}, + + /* Widened fast path: mantissa > 2^53 (==9007199254740992), |exp| in [1,19]. + * These cover the __uint128_t code path that avoids the strtod() fallback. + * Each expected value is the IEEE-correct round-to-nearest double. */ + + /* 17-19 significant digit mantissas — negative exponent (scores in [0,1)) */ + {"0.49606648747577575", 0.49606648747577575}, /* 17 sig digits, ZADD hot case */ + {"0.8731899671198792", 0.8731899671198792}, /* 16 sig digits */ + {"0.34912978268081996", 0.34912978268081996}, /* 17 sig digits */ + {"0.0033318113277969186", 0.0033318113277969186}, /* 19 sig digits after leading-zero strip */ + {"0.9955843393406656", 0.9955843393406656}, + {"0.999999999999999", 0.999999999999999}, /* repunit-ish, ULP boundary */ + + /* Mantissa just above 2^53: triggers the widened path */ + {"9007199254740993.0", 9007199254740992.0}, /* rounds down */ + {"9007199254740995.0", 9007199254740996.0}, /* ties-to-even up */ + {"9007199254740996.0", 9007199254740996.0}, + {"10000000000000000", 1e16}, /* exact 10^16, mantissa = 10^16 */ + {"99999999999999999", 1e17}, /* one less than 10^17 */ + + /* 18-digit mantissa with various exponents */ + {"1234567890123456789", 1.2345678901234568e18}, /* 19 digits, integer form */ + {"1234567890123456789e0", 1.2345678901234568e18}, + {"1234567890123456789e-5", 12345678901234.568}, + {"1234567890123456789e-19", 0.12345678901234568}, + {"1234567890123456789e5", 1.2345678901234569e23}, /* 19-digit mantissa × 10^5 — widened path */ + + /* Boundary: exponent exactly ±19 (widened-path limit) */ + {"1234567890123.456789e-19", 1.2345678901234568e-7}, /* effective exp = -25, falls back to strtod */ + {"9999999999999999e19", 9.999999999999999e34}, + {"9999999999999999e-19", 9.999999999999999e-4}, + + /* Negative numbers exercising the widened path */ + {"-0.49606648747577575", -0.49606648747577575}, + {"-9007199254740993", -9007199254740992.0}, }; run_ff_tests(decimal_ok, COUNTOF(decimal_ok), 0); diff --git a/tests/unit/type/zset.tcl b/tests/unit/type/zset.tcl index f08ddf70c..e840b2a16 100644 --- a/tests/unit/type/zset.tcl +++ b/tests/unit/type/zset.tcl @@ -1761,6 +1761,38 @@ start_server {tags {"zset"}} { } } {} {needs:debug} + test "ZSCORE 17-19 significant digit mantissas (widened fast path) - $encoding" { + # Exercise the widened fast_float_strtod path that handles + # mantissas > 2^53 (via __uint128_t arithmetic). ZADD/ZSCORE + # must round-trip bit-exactly through the listpack/skiplist + # encoding (parse on ingest, parse again on retrieval). Each + # input string below parses to a specific IEEE double whose + # canonical string representation is itself, so `expr` in Tcl + # re-evaluates to the same numeric value. + r del zscorewide + set widecases { + 0.49606648747577575 + 0.8731899671198792 + 0.34912978268081996 + 0.0033318113277969186 + 0.9955843393406656 + -0.8731899671198792 + } + set i 0 + foreach s $widecases { + r zadd zscorewide $s m$i + assert_equal [expr $s] [expr [r zscore zscorewide m$i]] + incr i + } + r debug reload + assert_encoding $encoding zscorewide + set i 0 + foreach s $widecases { + assert_equal [expr $s] [expr [r zscore zscorewide m$i]] + incr i + } + } {} {needs:debug} + test "ZSET sorting stresser - $encoding" { set delta 0 for {set test 0} {$test < 2} {incr test} { From 63f02e7876e7b25dad6a63a81a02022df215a505 Mon Sep 17 00:00:00 2001 From: sggeorgiev Date: Wed, 22 Apr 2026 09:12:04 +0300 Subject: [PATCH 29/32] Fix double ERR prefix in XNACK error replies (#15091) Several `addReplyError` and `addReplyErrorFormat` calls in `xnackCommand` included a redundant `"ERR "` prefix in the message string. Since `addReplyErrorLength` already prepends `-ERR ` to the RESP reply, clients received `ERR ERR ...` for these error paths. This PR removes the redundant prefix from all five affected calls and tightens the corresponding test patterns to match from the beginning of the error message (`"ERR ..."` instead of `"*...*"`), so any future double-prefix regression will be caught. --- src/t_stream.c | 10 +++--- tests/unit/type/stream-cgroups.tcl | 50 +++++++++++++++--------------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index ac070247a..09e623911 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -3878,7 +3878,7 @@ void xnackCommand(client *c) { } else if (!strcasecmp(c->argv[3]->ptr,"FATAL")) { mode = XNACK_FATAL; } else { - addReplyError(c,"ERR mode must be SILENT, FAIL, or FATAL"); + addReplyError(c,"mode must be SILENT, FAIL, or FATAL"); return; } @@ -3897,7 +3897,7 @@ void xnackCommand(client *c) { numids = (int)numids_long; ids_start = i + 2; if (numids > (c->argc - ids_start)) { - addReplyError(c,"ERR number of IDs doesn't match numids"); + addReplyError(c,"number of IDs doesn't match numids"); return; } i = ids_start + numids - 1; @@ -3908,18 +3908,18 @@ void xnackCommand(client *c) { if (getLongLongFromObjectOrReply(c,c->argv[i],&retrycount,NULL) != C_OK) return; if (retrycount < 0) { - addReplyError(c,"ERR Invalid RETRYCOUNT value, must be >= 0"); + addReplyError(c,"Invalid RETRYCOUNT value, must be >= 0"); return; } } else { - addReplyErrorFormat(c,"ERR Unrecognized XNACK option '%s'", + addReplyErrorFormat(c,"Unrecognized XNACK option '%s'", (char *)c->argv[i]->ptr); return; } } if (ids_start == 0) { - addReplyError(c,"ERR syntax error, expected IDS keyword"); + addReplyError(c,"syntax error, expected IDS keyword"); return; } diff --git a/tests/unit/type/stream-cgroups.tcl b/tests/unit/type/stream-cgroups.tcl index 60e40596b..a300e6dbf 100644 --- a/tests/unit/type/stream-cgroups.tcl +++ b/tests/unit/type/stream-cgroups.tcl @@ -3402,47 +3402,47 @@ start_server { # Unrecognized option at various positions — the parser accepts options # both before and after the IDS block, so verify rejection in each slot. - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL BADOPT IDS 1 1-0} - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL IDS 1 1-0 BADOPT} - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp SILENT BADOPT IDS 1 1-0 FORCE} - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp SILENT FORCE BADOPT IDS 1 1-0} - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL RETRYCOUNT 5 BADOPT IDS 1 1-0} - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT 5 BADOPT} - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL FORCE IDS 1 1-0 BADOPT RETRYCOUNT 5} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp FAIL BADOPT IDS 1 1-0} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp FAIL IDS 1 1-0 BADOPT} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp SILENT BADOPT IDS 1 1-0 FORCE} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp SILENT FORCE BADOPT IDS 1 1-0} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp FAIL RETRYCOUNT 5 BADOPT IDS 1 1-0} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT 5 BADOPT} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp FAIL FORCE IDS 1 1-0 BADOPT RETRYCOUNT 5} # Invalid mode - assert_error "*mode must be SILENT, FAIL, or FATAL*" {r XNACK mystream grp BADMODE IDS 1 1-0} + assert_error "ERR mode must be SILENT, FAIL, or FATAL" {r XNACK mystream grp BADMODE IDS 1 1-0} # Multiple mode words — only one mode is allowed per invocation. - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL FATAL IDS 1 1-0} - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp SILENT FAIL IDS 1 1-0} - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FATAL SILENT IDS 1 1-0} - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL SILENT FATAL IDS 1 1-0} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp FAIL FATAL IDS 1 1-0} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp SILENT FAIL IDS 1 1-0} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp FATAL SILENT IDS 1 1-0} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp FAIL SILENT FATAL IDS 1 1-0} # IDS keyword validation - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp SILENT NOTIDS 1 1-0} - assert_error "*expected IDS keyword*" {r XNACK mystream grp SILENT FORCE RETRYCOUNT 5} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp SILENT NOTIDS 1 1-0} + assert_error "ERR syntax error, expected IDS keyword" {r XNACK mystream grp SILENT FORCE RETRYCOUNT 5} # numids validation - assert_error "*numids must be a positive integer*" {r XNACK mystream grp SILENT IDS abc 1-0} - assert_error "*numids must be a positive integer*" {r XNACK mystream grp SILENT IDS 0 1-0} - assert_error "*numids must be a positive integer*" {r XNACK mystream grp SILENT IDS -1 1-0} - assert_error "*number of IDs doesn't match numids*" {r XNACK mystream grp SILENT IDS 2 1-0} + assert_error "ERR numids must be a positive integer*" {r XNACK mystream grp SILENT IDS abc 1-0} + assert_error "ERR numids must be a positive integer*" {r XNACK mystream grp SILENT IDS 0 1-0} + assert_error "ERR numids must be a positive integer*" {r XNACK mystream grp SILENT IDS -1 1-0} + assert_error "ERR number of IDs doesn't match numids" {r XNACK mystream grp SILENT IDS 2 1-0} # Invalid stream ID format - assert_error "*Invalid stream ID*" {r XNACK mystream grp FAIL IDS 1 not-a-valid-id} + assert_error "ERR Invalid stream ID*" {r XNACK mystream grp FAIL IDS 1 not-a-valid-id} # RETRYCOUNT validation — non-integer, negative, overflow, missing value - assert_error "*value is not an integer or out of range*" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT abc} - assert_error "*Invalid RETRYCOUNT*" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT -1} - assert_error "*value is not an integer or out of range*" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT 99999999999999999999} + assert_error "ERR value is not an integer or out of range" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT abc} + assert_error "ERR Invalid RETRYCOUNT value, must be >= 0" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT -1} + assert_error "ERR value is not an integer or out of range" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT 99999999999999999999} # RETRYCOUNT without a following value — consumed as trailing option - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT} # RETRYCOUNT right after mode with no IDS — too few arguments - assert_error "*wrong number of arguments*" {r XNACK mystream grp FAIL RETRYCOUNT} + assert_error "ERR wrong number of arguments for 'xnack' command" {r XNACK mystream grp FAIL RETRYCOUNT} # Extra args after numids IDs — the surplus ID is parsed as an option - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL IDS 1 1-0 2-0} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp FAIL IDS 1 1-0 2-0} } # Verify SILENT mode decrements delivery_count by 1, clamped at 0. From 303667a40cdf5032b1044e94dfc6860a15414e03 Mon Sep 17 00:00:00 2001 From: Darsheel Rathore Date: Thu, 23 Apr 2026 14:14:36 +0530 Subject: [PATCH 30/32] Fix use-after-free in RM_RegisterClusterMessageReceiver() (#15059) RM_RegisterClusterMessageReceiver() unlinks a receiver node from the clusterReceivers[type] linked list when the callback is set to NULL, but when removing the head node (prev == NULL), the code updates clusterReceivers[type]->next instead of clusterReceivers[type] itself. This leaves clusterReceivers[type] pointing to the freed node, so any later traversal through clusterReceivers[type] dereferences a dangling pointer. Fix by updating clusterReceivers[type] directly when prev == NULL. Fixes #15057 --------- Co-authored-by: debing.sun --- src/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index e69c4f490..cb9edd67e 100644 --- a/src/module.c +++ b/src/module.c @@ -9661,7 +9661,7 @@ void RM_RegisterClusterMessageReceiver(RedisModuleCtx *ctx, uint8_t type, RedisM if (prev) prev->next = r->next; else - clusterReceivers[type]->next = r->next; + clusterReceivers[type] = r->next; /* Update the head */ zfree(r); } return; From fafc47251afce2e55b917c088fbb63217b5241cb Mon Sep 17 00:00:00 2001 From: Vitah Lin Date: Thu, 23 Apr 2026 17:38:42 +0800 Subject: [PATCH 31/32] Fix signed integer overflow in scan count parameter (#14982) ### Problem In `scanGenericCommand`, `maxiterations = count * 10` overflows when `count > LONG_MAX / 10`, causing undefined behavior. ### Changed 1. Use saturating arithmetic to prevent overflow. 2. Added a test to trigger the overflow path, detectable by UBSan. --- src/db.c | 2 +- tests/unit/scan.tcl | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/db.c b/src/db.c index 32c058dab..7a142cb5d 100644 --- a/src/db.c +++ b/src/db.c @@ -1912,7 +1912,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) { * COUNT, so if the hash table is in a pathological state (very * sparsely populated) we avoid to block too much time at the cost * of returning no or very few elements. */ - long maxiterations = count*10; + long maxiterations = (count > LONG_MAX / 10) ? LONG_MAX : count * 10; /* We pass scanData which have three pointers to the callback: * 1. data.keys: the list to which it will add new elements; diff --git a/tests/unit/scan.tcl b/tests/unit/scan.tcl index 6a092cb4e..c3ec5f273 100644 --- a/tests/unit/scan.tcl +++ b/tests/unit/scan.tcl @@ -471,6 +471,21 @@ proc test_scan {type} { } } + test "{$type} SCAN COUNT overflow" { + r flushdb + populate 10 + + # count = LONG_MAX/10 + 1, within LONG_MAX so it parses fine, + # but count*10 overflows signed long which is undefined behavior. + # Compute dynamically to support both 32-bit and 64-bit builds. + set long_max [expr {[s arch_bits] == 32 ? 2147483647 : 9223372036854775807}] + set big_count [expr {$long_max / 10 + 1}] + set res [r scan 0 count $big_count] + assert {[llength $res] == 2} + assert_equal 0 [lindex $res 0] + assert_equal 10 [llength [lindex $res 1]] + } + test "{$type} SCAN MATCH pattern implies cluster slot" { # Tests the code path for an optimization for patterns like "{foo}-*" # which implies that all matching keys belong to one slot. From 47c51369eeffd55e1baf20df7955a3dfbe842fc4 Mon Sep 17 00:00:00 2001 From: sggeorgiev Date: Thu, 23 Apr 2026 15:46:48 +0300 Subject: [PATCH 32/32] Reject corrupt stream RDB with shared NACK across consumers (#15081) **Summary** Detects and rejects corrupt stream RDB payloads where the same NACK (pending entry) is referenced by more than one consumer, which violates a stream data-structure. **Changes** - **`rdbLoadObject` (stream consumer PEL loading)**: Added a guard that checks `nack->consumer != NULL` before assigning the consumer pointer. When a second consumer's PEL references a NACK that was already claimed by a prior consumer, the loader now reports a corrupt RDB error and aborts instead of silently overwriting the pointer. Without this check, two consumers share the same `streamNACK`, and freeing the first consumer's PEL leaves the second with a dangling pointer. - **`corrupt-dump.tcl`**: Added a regression test that crafts a stream with two consumers (`consumerA`, `consumerB`) whose PELs both reference the same entry (`1-0`). The `RESTORE` command is expected to fail with `"Bad data format"`, and the server must remain responsive (`PING` succeeds). **Benefits** - **Fail-fast on corrupt data**: The invariant violation is caught at load time with a clear diagnostic message rather than manifesting as a crash later during normal operation. - **Regression coverage**: The crafted payload in the test ensures this class of corruption is permanently guarded against. --- src/rdb.c | 8 ++++++++ tests/integration/corrupt-dump.tcl | 16 ++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/rdb.c b/src/rdb.c index 470de9806..192a8825a 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -3431,6 +3431,14 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) } streamNACK *nack = result; + /* If the NACK already has a consumer assigned, the + * payload is corrupt — each global PEL entry must be + * claimed by exactly one consumer. */ + if (nack->consumer != NULL) { + rdbReportCorruptRDB("Stream consumer PEL entry already has a consumer assigned"); + decrRefCount(o); + return NULL; + } /* Set the NACK consumer, that was left to NULL when * loading the global PEL. Then set the same shared * NACK structure also in the consumer-specific PEL. */ diff --git a/tests/integration/corrupt-dump.tcl b/tests/integration/corrupt-dump.tcl index 412d8a018..c693da91d 100644 --- a/tests/integration/corrupt-dump.tcl +++ b/tests/integration/corrupt-dump.tcl @@ -999,5 +999,21 @@ test {corrupt payload: fuzzer findings - decrRefCount on NULL robj on corrupt KE } } +test {corrupt payload: stream with NACK shared between two consumers} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no]] { + r debug set-skip-checksum-validation 1 + # Payload: stream with entry 1-0, one consumer group (mygroup), + # two consumers whose PELs both reference 1-0 (shared NACK). + # XACK on one consumer frees the NACK, leaving a dangling + # pointer in the other consumer's PEL (use-after-free). + catch {r RESTORE mystream 0 "\x1a\x01\x10\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x1d\x1d\x00\x00\x00\x0a\x00\x01\x01\x00\x01\x01\x01\x81\x6b\x02\x00\x01\x02\x01\x00\x01\x00\x01\x81\x76\x02\x04\x01\xff\x01\x01\x00\x01\x00\x00\x00\x01\x01\x07\x6d\x79\x67\x72\x6f\x75\x70\x01\x00\x01\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x01\x64\x42\xb9\x9d\x01\x00\x00\x01\x02\x09\x63\x6f\x6e\x73\x75\x6d\x65\x72\x41\x01\x64\x42\xb9\x9d\x01\x00\x00\x01\x64\x42\xb9\x9d\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x09\x63\x6f\x6e\x73\x75\x6d\x65\x72\x42\x01\x64\x42\xb9\x9d\x01\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x40\x64\x40\x64\x00\x00\x00\x0d\x00\xe7\x12\xf7\xcc\x25\xd5\x0e\x44"} err + catch {r XACK mystream mygroup 1-0} _ + catch {r XREADGROUP GROUP mygroup consumerA COUNT 10 STREAMS mystream 0} _ + catch {r DEL mystream} _ + assert_match "*Bad data format*" $err + r ping + } +} + } ;# tags