From 4a3a1e7cc019899fcf70634636474dc2066a184a Mon Sep 17 00:00:00 2001 From: Sebastien Tardif Date: Thu, 7 May 2026 19:56:14 -0700 Subject: [PATCH 1/2] Fix integer overflow in cluster bus PUBLISH and MODULE message length validation Add overflow checks for channel_len + message_len in CLUSTERMSG_TYPE_PUBLISH/ PUBLISHSHARD and for len in CLUSTERMSG_TYPE_MODULE before adding them to explen. Without these checks, attacker-controlled uint32_t length fields can wrap around, producing a small explen that passes the totlen != explen validation. The processing path then uses the original huge lengths to create string objects or pass to module callbacks, causing heap out-of-bounds reads or OOM aborts. The fix extracts the ntohl'd lengths into local variables, checks each addition against UINT32_MAX before performing it, and rejects the packet with a warning log if overflow is detected. Signed-off-by: Sebastien Tardif --- src/cluster_legacy.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index c93aea2ad..a3d8dccff 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -2839,11 +2839,16 @@ int clusterProcessPacket(clusterLink *link) { explen = sizeof(clusterMsg)-sizeof(union clusterMsgData); explen += sizeof(clusterMsgDataFail); } else if (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) { + uint32_t ch_len = ntohl(hdr->data.publish.msg.channel_len); + uint32_t msg_len = ntohl(hdr->data.publish.msg.message_len); explen = sizeof(clusterMsg)-sizeof(union clusterMsgData); - explen += sizeof(clusterMsgDataPublish) - - 8 + - ntohl(hdr->data.publish.msg.channel_len) + - ntohl(hdr->data.publish.msg.message_len); + explen += sizeof(clusterMsgDataPublish) - 8; + if (ch_len > UINT32_MAX - explen || msg_len > UINT32_MAX - explen - ch_len) { + serverLog(LL_WARNING, "Received invalid %s packet with overflow in length fields " + "(channel_len:%u, message_len:%u)", clusterGetMessageTypeString(type), ch_len, msg_len); + return 1; + } + explen += ch_len + msg_len; } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST || type == CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK || type == CLUSTERMSG_TYPE_MFSTART) @@ -2853,9 +2858,15 @@ int clusterProcessPacket(clusterLink *link) { explen = sizeof(clusterMsg)-sizeof(union clusterMsgData); explen += sizeof(clusterMsgDataUpdate); } else if (type == CLUSTERMSG_TYPE_MODULE) { + uint32_t module_len = ntohl(hdr->data.module.msg.len); explen = sizeof(clusterMsg)-sizeof(union clusterMsgData); - explen += sizeof(clusterMsgModule) - - 3 + ntohl(hdr->data.module.msg.len); + explen += sizeof(clusterMsgModule) - 3; + if (module_len > UINT32_MAX - explen) { + serverLog(LL_WARNING, "Received invalid %s packet with overflow in length field " + "(len:%u)", clusterGetMessageTypeString(type), module_len); + return 1; + } + explen += module_len; } else { /* We don't know this type of packet, so we assume it's well formed. */ explen = totlen; From 7a307141ab344dcee1d442b476a10a1b039b6248 Mon Sep 17 00:00:00 2001 From: Sebastien Tardif Date: Tue, 26 May 2026 21:32:43 -0700 Subject: [PATCH 2/2] Add regression tests for PUBLISH and MODULE length overflow Two new tests exercise the overflow checks added to clusterProcessPacket: 1. PUBLISH with channel_len + message_len chosen to overflow uint32_t. Both set to 0x80000000, their sum wraps to 0. 2. MODULE with bulk_data_len (0xFFFFFFFF) exceeding sdslen, causing explen to wrap when added to the struct base size. Each test builds a raw cluster bus packet with the crafted length fields, sends it to a node's bus port, verifies the warning is logged, and checks the server is still responsive. Modeled on the approach in #15263 (raw Tcl packet construction). Signed-off-by: Sebastien Tardif --- tests/unit/cluster/packet-validation.tcl | 113 +++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 tests/unit/cluster/packet-validation.tcl diff --git a/tests/unit/cluster/packet-validation.tcl b/tests/unit/cluster/packet-validation.tcl new file mode 100644 index 000000000..bc72c2c89 --- /dev/null +++ b/tests/unit/cluster/packet-validation.tcl @@ -0,0 +1,113 @@ +start_cluster 2 0 {tags {external:skip cluster}} { + +# Build the 2256-byte cluster bus header (CLUSTERMSG_MIN_LEN) common to all +# message types. Only the type field and the data that follows differ. +proc build_cluster_bus_header {sender_name sender_port sender_cport msg_type totlen} { + set CLUSTER_NAMELEN 40 + set NET_IP_STR_LEN 46 + + set sender_padded [binary format a${CLUSTER_NAMELEN} $sender_name] + set myslots [string repeat \x00 [expr {16384/8}]] + set slaveof [string repeat \x00 $CLUSTER_NAMELEN] + set myip [string repeat \x00 $NET_IP_STR_LEN] + set notused1 [string repeat \x00 30] + + set hdr "" + append hdr "RCmb" + append hdr [binary format I $totlen] + append hdr [binary format S 1] ;# ver + append hdr [binary format S $sender_port] ;# port + append hdr [binary format S $msg_type] ;# type + append hdr [binary format S 0] ;# count + append hdr [binary format W 1] ;# currentEpoch + append hdr [binary format W 2] ;# configEpoch + append hdr [binary format W 0] ;# offset + append hdr $sender_padded ;# sender + append hdr $myslots ;# myslots + append hdr $slaveof ;# slaveof + append hdr $myip ;# myip + append hdr [binary format S 0] ;# extensions + append hdr $notused1 ;# notused1 + append hdr [binary format S 0] ;# pport + append hdr [binary format S $sender_cport] ;# cport + append hdr [binary format S 0] ;# flags + append hdr [binary format c 0] ;# state + append hdr [binary format ccc 0 0 0] ;# mflags + + return $hdr +} + +test "PUBLISH with channel_len + message_len overflow is rejected" { + set CLUSTERMSG_TYPE_PUBLISH 4 + set CLUSTERMSG_MIN_LEN 2256 + + set target_host [srv 0 host] + set target_bus_port [expr {[srv 0 port] + 10000}] + set node_id [R 1 CLUSTER MYID] + set sender_port [srv -1 port] + set sender_cport [expr {$sender_port + 10000}] + + # sizeof(clusterMsgDataPublish) - 8 = 16 - 8 = 8 + # With channel_len=0x80000000 and message_len=0x80000000, their sum + # overflows uint32 to 0, making explen = 2256 + 8 = 2264 without the fix. + # The fix detects the overflow before computing explen. + set channel_len 0x80000000 + set message_len 0x80000000 + set totlen [expr {$CLUSTERMSG_MIN_LEN + 8}] ;# 2264 + + set packet [build_cluster_bus_header $node_id $sender_port $sender_cport \ + $CLUSTERMSG_TYPE_PUBLISH $totlen] + append packet [binary format I $channel_len] + append packet [binary format I $message_len] + + set fd [socket $target_host $target_bus_port] + fconfigure $fd -translation binary -buffering full + puts -nonewline $fd $packet + flush $fd + after 500 + close $fd + + # The fix rejects the packet and logs a warning about the overflow. + wait_for_log_messages 0 {"*publish*overflow in length fields*"} 0 20 500 + + # Verify the server is still responsive after the malformed packet. + assert_equal [R 0 PING] {PONG} +} + +test "MODULE with len overflow is rejected" { + set CLUSTERMSG_TYPE_MODULE 9 + set CLUSTERMSG_MIN_LEN 2256 + + set target_host [srv 0 host] + set target_bus_port [expr {[srv 0 port] + 10000}] + set node_id [R 1 CLUSTER MYID] + set sender_port [srv -1 port] + set sender_cport [expr {$sender_port + 10000}] + + # sizeof(clusterMsgModule) - 3 = 16 - 3 = 13 + # With module_len=0xFFFFFFFF, adding to base explen (2256+13=2269) + # overflows uint32, wrapping to 2268 without the fix. + # The fix detects the overflow before computing explen. + set module_len 0xFFFFFFFF + set totlen [expr {($CLUSTERMSG_MIN_LEN + 13 + $module_len) & 0xFFFFFFFF}] ;# 2268 + + set packet [build_cluster_bus_header $node_id $sender_port $sender_cport \ + $CLUSTERMSG_TYPE_MODULE $totlen] + append packet [binary format W 0] ;# module_id + append packet [binary format I $module_len] ;# len + + set fd [socket $target_host $target_bus_port] + fconfigure $fd -translation binary -buffering full + puts -nonewline $fd $packet + flush $fd + after 500 + close $fd + + # The fix rejects the packet and logs a warning about the overflow. + wait_for_log_messages 0 {"*module*overflow in length field*"} 0 20 500 + + # Verify the server is still responsive after the malformed packet. + assert_equal [R 0 PING] {PONG} +} + +}