From 3cd464263b03b425ffae2e23db24df3dc9346871 Mon Sep 17 00:00:00 2001 From: Vitah Lin Date: Wed, 15 Apr 2026 08:34:40 +0800 Subject: [PATCH] Fix gen_write_load error on MOVED/ASK during atomic-slot-migration tests (#15016) --- tests/helpers/gen_write_load.tcl | 26 +++++++++++++---- tests/support/util.tcl | 6 ++-- tests/unit/cluster/atomic-slot-migration.tcl | 30 ++++++++------------ 3 files changed, 36 insertions(+), 26 deletions(-) diff --git a/tests/helpers/gen_write_load.tcl b/tests/helpers/gen_write_load.tcl index 60d954e5db..e9f430ae11 100644 --- a/tests/helpers/gen_write_load.tcl +++ b/tests/helpers/gen_write_load.tcl @@ -18,7 +18,9 @@ set ::tlsdir "tests/tls" # Continuously sends SET commands to the server. If key is omitted, a random key # is used for every SET command. The value is always random. -proc gen_write_load {host port seconds tls {key ""} {size 0} {sleep 0}} { +# ignore_error_reply (default 0): when non-zero, MOVED/ASK replies are tolerated +# while draining pipelined responses (periodic 500-reply batches and final drain). +proc gen_write_load {host port seconds tls {key ""} {size 0} {sleep 0} {ignore_error_reply 0}} { set start_time [clock seconds] set r [redis $host $port 1 $tls] $r client setname LOAD_HANDLER @@ -44,12 +46,19 @@ proc gen_write_load {host port seconds tls {key ""} {size 0} {sleep 0}} { } else { $r set $key $value } - + incr count if {$count % 500 == 0} { for {set i 0} {$i < 500} {incr i} { - $r read + # Capture opts to preserve original errorInfo/errorCode on re-raise. + if {[catch {$r read} err opts]} { + if {$ignore_error_reply && ([string match {MOVED*} $err] || [string match {ASK*} $err])} { + continue + } + return -options $opts $err + } } + set count 0 } if {[clock seconds]-$start_time > $seconds} { @@ -59,12 +68,17 @@ proc gen_write_load {host port seconds tls {key ""} {size 0} {sleep 0}} { after $sleep } } - + # Read remaining replies for {set i 0} {$i < $count} {incr i} { - $r read + if {[catch {$r read} err opts]} { + if {$ignore_error_reply && ([string match {MOVED*} $err] || [string match {ASK*} $err])} { + continue + } + return -options $opts $err + } } exit 0 } -gen_write_load [lindex $argv 0] [lindex $argv 1] [lindex $argv 2] [lindex $argv 3] [lindex $argv 4] [lindex $argv 5] [lindex $argv 6] +gen_write_load [lindex $argv 0] [lindex $argv 1] [lindex $argv 2] [lindex $argv 3] [lindex $argv 4] [lindex $argv 5] [lindex $argv 6] [lindex $argv 7] diff --git a/tests/support/util.tcl b/tests/support/util.tcl index 0c9f648368..16eb80008f 100644 --- a/tests/support/util.tcl +++ b/tests/support/util.tcl @@ -604,9 +604,11 @@ proc find_valgrind_errors {stderr on_termination} { # Execute a background process writing random data for the specified number # of seconds to the specified Redis instance. If key is omitted, a random key # is used for every SET command. -proc start_write_load {host port seconds {key ""} {size 0} {sleep 0}} { +# ignore_error_reply (default 0): set non-zero in cluster slot-migration tests to tolerate +# MOVED/ASK replies while draining pipelined writes in the load helper. +proc start_write_load {host port seconds {key ""} {size 0} {sleep 0} {ignore_error_reply 0}} { set tclsh [info nameofexecutable] - exec $tclsh tests/helpers/gen_write_load.tcl $host $port $seconds $::tls $key $size $sleep & + exec $tclsh tests/helpers/gen_write_load.tcl $host $port $seconds $::tls $key $size $sleep $ignore_error_reply & } # Stop a process generating write load executed with start_write_load. diff --git a/tests/unit/cluster/atomic-slot-migration.tcl b/tests/unit/cluster/atomic-slot-migration.tcl index 826f0d69c4..74eee55f0c 100644 --- a/tests/unit/cluster/atomic-slot-migration.tcl +++ b/tests/unit/cluster/atomic-slot-migration.tcl @@ -577,23 +577,16 @@ start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-node-timeout R 1 debug asm-trim-method none populate_slot 10000 -idx 1 -slot 6000 - # Start write traffic on node-0 - # Throws -MOVED error once asm is completed, catch block will ignore it. - catch { - # Start the slot 0 write load on the R 0 - set port [get_port 0] - set key [slot_key 0 mykey] - set load_handle0 [start_write_load "127.0.0.1" $port 100 $key 0 5] - } + # Start write traffic on node-0 (ignore_error_reply=1 tolerates MOVED/ASK + # replies while slots are being migrated). + set port [get_port 0] + set key [slot_key 0 mykey] + set load_handle0 [start_write_load "127.0.0.1" $port 100 $key 0 5 1] - # Start write traffic on node-1 - # Throws -MOVED error once asm is completed, catch block will ignore it. - catch { - # Start the slot 6000 write load on the R 1 - set port [get_port 1] - set key [slot_key 6000 mykey] - set load_handle1 [start_write_load "127.0.0.1" $port 100 $key 0 5] - } + # Start write traffic on node-1 (ignore_error_reply=1 for migration redirects). + set port [get_port 1] + set key [slot_key 6000 mykey] + set load_handle1 [start_write_load "127.0.0.1" $port 100 $key 0 5 1] # Migrate keys R 1 CLUSTER MIGRATION IMPORT 0 100 @@ -801,8 +794,9 @@ start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-node-timeout # we set a delay to write incremental data R 1 config set rdb-key-save-delay 1000000 - # Start the slot 0 write load on the R 1 - set load_handle [start_write_load "127.0.0.1" [get_port 1] 100 $slot0_key] + # Start slot 0 write load on R1. ignore_error_reply=1 tolerates MOVED/ASK + # replies that can appear while slot 0 is being migrated. + set load_handle [start_write_load "127.0.0.1" [get_port 1] 100 $slot0_key 0 0 1] # Clear all fail points assert_equal {OK} [R 0 debug asm-failpoint "" ""]