From 64a77caa11ee6c0e76e5dff28e8c1a27be9826de Mon Sep 17 00:00:00 2001 From: "yang.l" Date: Wed, 27 Aug 2025 16:13:52 +0800 Subject: [PATCH] Fix Fully drain cluster link send buffer to prevent OOM (#14310) Under a high rate of PUBLISH commands, a Redis cluster node needs to broadcast these messages to other nodes. The `clusterWriteHandler`, which handles writing to the cluster link, previously limited the total bytes written per event loop cycle. When the message ingress rate is higher than the egress rate allowed by this limit, the link's send queue (`send_msg_queue`) grows continuously. This unbounded growth leads to excessive memory consumption and can ultimately cause an Out-Of-Memory (OOM) error, crashing the node. This commit removes the write limit in `clusterWriteHandler`. By doing so, the handler will now attempt to drain the entire send queue in each invocation. This prevents the buffer from bloating and ensures the stability of the cluster under heavy broadcast loads. --- src/cluster_legacy.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 46dd8bce6..af60a531c 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3320,9 +3320,8 @@ void handleLinkIOError(clusterLink *link) { void clusterWriteHandler(connection *conn) { clusterLink *link = connGetPrivateData(conn); ssize_t nwritten; - size_t totwritten = 0; - while (totwritten < NET_MAX_WRITES_PER_EVENT && listLength(link->send_msg_queue) > 0) { + while (listLength(link->send_msg_queue) > 0) { listNode *head = listFirst(link->send_msg_queue); clusterMsgSendBlock *msgblock = (clusterMsgSendBlock*)head->value; clusterMsg *msg = getMessageFromSendBlock(msgblock); @@ -3350,8 +3349,6 @@ void clusterWriteHandler(connection *conn) { listDelNode(link->send_msg_queue, head); server.stat_cluster_links_memory -= sizeof(listNode); link->send_msg_queue_mem -= sizeof(listNode) + blocklen; - - totwritten += nwritten; } if (listLength(link->send_msg_queue) == 0)