From 87d8e717087293073b676565b2cd3c0a094543bb Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Thu, 27 Mar 2025 08:58:57 +0800 Subject: [PATCH] Fix defrag when type/encoding changes during scan (#13883) This PR is based on: https://github.com/valkey-io/valkey/pull/1801 [SoftlyRaining](https://github.com/SoftlyRaining) was hunting for defrag bugs with Jim and found a couple of improvements to make. Jim pointed out that in several of the callbacks, if the encoding were to change it simply returns without doing anything to `cursor` to make it reach 0, meaning that it would continue no-op working on that item without making any progress. Type and encoding can change while the defrag scan is in progress if the value is mutated or replaced by something else with the same key. --------- Signed-off-by: Rain Valentine Co-authored-by: Rain Valentine --- src/defrag.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/src/defrag.c b/src/defrag.c index 8867b301f..7d375305c 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -482,8 +482,7 @@ long scanLaterList(robj *ob, unsigned long *cursor, monotime endtime) { quicklistNode *node; long iterations = 0; int bookmark_failed = 0; - if (ob->type != OBJ_LIST || ob->encoding != OBJ_ENCODING_QUICKLIST) - return 0; + serverAssert(ob->type == OBJ_LIST && ob->encoding == OBJ_ENCODING_QUICKLIST); if (*cursor == 0) { /* if cursor is 0, we start new iteration */ @@ -532,8 +531,7 @@ void scanLaterZsetCallback(void *privdata, const dictEntry *_de) { } void scanLaterZset(robj *ob, unsigned long *cursor) { - if (ob->type != OBJ_ZSET || ob->encoding != OBJ_ENCODING_SKIPLIST) - return; + serverAssert(ob->type == OBJ_ZSET && ob->encoding == OBJ_ENCODING_SKIPLIST); zset *zs = (zset*)ob->ptr; dict *d = zs->dict; scanLaterZsetData data = {zs}; @@ -549,8 +547,7 @@ void scanCallbackCountScanned(void *privdata, const dictEntry *de) { } void scanLaterSet(robj *ob, unsigned long *cursor) { - if (ob->type != OBJ_SET || ob->encoding != OBJ_ENCODING_HT) - return; + serverAssert(ob->type == OBJ_SET && ob->encoding == OBJ_ENCODING_HT); dict *d = ob->ptr; dictDefragFunctions defragfns = { .defragAlloc = activeDefragAlloc, @@ -560,8 +557,7 @@ void scanLaterSet(robj *ob, unsigned long *cursor) { } void scanLaterHash(robj *ob, unsigned long *cursor) { - if (ob->type != OBJ_HASH || ob->encoding != OBJ_ENCODING_HT) - return; + serverAssert(ob->type == OBJ_HASH && ob->encoding == OBJ_ENCODING_HT); dict *d = ob->ptr; dictDefragFunctions defragfns = { .defragAlloc = activeDefragAlloc, @@ -656,10 +652,7 @@ int scanLaterStreamListpacks(robj *ob, unsigned long *cursor, monotime endtime) static unsigned char next[sizeof(streamID)]; raxIterator ri; long iterations = 0; - if (ob->type != OBJ_STREAM || ob->encoding != OBJ_ENCODING_STREAM) { - *cursor = 0; - return 0; - } + serverAssert(ob->type == OBJ_STREAM && ob->encoding == OBJ_ENCODING_STREAM); stream *s = ob->ptr; raxStart(&ri,s->rax); @@ -1005,22 +998,22 @@ void defragPubsubScanCallback(void *privdata, const dictEntry *de) { int defragLaterItem(dictEntry *de, unsigned long *cursor, monotime endtime, int dbid) { if (de) { robj *ob = dictGetVal(de); - if (ob->type == OBJ_LIST) { + if (ob->type == OBJ_LIST && ob->encoding == OBJ_ENCODING_QUICKLIST) { return scanLaterList(ob, cursor, endtime); - } else if (ob->type == OBJ_SET) { + } else if (ob->type == OBJ_SET && ob->encoding == OBJ_ENCODING_HT) { scanLaterSet(ob, cursor); - } else if (ob->type == OBJ_ZSET) { + } else if (ob->type == OBJ_ZSET && ob->encoding == OBJ_ENCODING_SKIPLIST) { scanLaterZset(ob, cursor); - } else if (ob->type == OBJ_HASH) { + } else if (ob->type == OBJ_HASH && ob->encoding == OBJ_ENCODING_HT) { scanLaterHash(ob, cursor); - } else if (ob->type == OBJ_STREAM) { + } else if (ob->type == OBJ_STREAM && ob->encoding == OBJ_ENCODING_STREAM) { return scanLaterStreamListpacks(ob, cursor, endtime); } else if (ob->type == OBJ_MODULE) { robj keyobj; initStaticStringObject(keyobj, dictGetKey(de)); return moduleLateDefrag(&keyobj, ob, cursor, endtime, dbid); } else { - *cursor = 0; /* object type may have changed since we schedule it for later */ + *cursor = 0; /* object type/encoding may have changed since we schedule it for later */ } } else { *cursor = 0; /* object may have been deleted already */