diff --git a/src/config.c b/src/config.c
index 025c312c0..4ecbc7e7f 100644
--- a/src/config.c
+++ b/src/config.c
@@ -3186,7 +3186,7 @@ standardConfig static_configs[] = {
     createIntConfig("databases", NULL, IMMUTABLE_CONFIG, 1, INT_MAX, server.dbnum, 16, INTEGER_CONFIG, NULL, NULL),
     createIntConfig("port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.port, 6379, INTEGER_CONFIG, NULL, updatePort), /* TCP port. */
     createIntConfig("io-threads", NULL, DEBUG_CONFIG | IMMUTABLE_CONFIG, 1, 128, server.io_threads_num, 1, INTEGER_CONFIG, NULL, NULL), /* Single threaded by default */
-    createIntConfig("prefetch-batch-max-size", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 0, 128, server.prefetch_batch_max_size, 16, INTEGER_CONFIG, NULL, NULL),
+    createIntConfig("prefetch-batch-max-size", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 0, PREFETCH_BATCH_MAX_SIZE, server.prefetch_batch_max_size, 16, INTEGER_CONFIG, NULL, NULL),
     createIntConfig("auto-aof-rewrite-percentage", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.aof_rewrite_perc, 100, INTEGER_CONFIG, NULL, NULL),
     createIntConfig("cluster-replica-validity-factor", "cluster-slave-validity-factor", MODIFIABLE_CONFIG, 0, INT_MAX, server.cluster_slave_validity_factor, 10, INTEGER_CONFIG, NULL, NULL), /* Slave max data age factor. */
     createIntConfig("list-max-listpack-size", "list-max-ziplist-size", MODIFIABLE_CONFIG, INT_MIN, INT_MAX, server.list_max_listpack_size, -2, INTEGER_CONFIG, NULL, NULL),
diff --git a/src/dict.c b/src/dict.c
index 18258c36f..d0885ff88 100644
--- a/src/dict.c
+++ b/src/dict.c
@@ -783,17 +783,11 @@ static dictEntryLink dictFindLinkInternal(dict *d, const void *key, dictEntryLin
         if (table == 0 && (long)idx < d->rehashidx) continue;
         idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
 
-        /* Prefetch the bucket at the calculated index */
-        redis_prefetch_read(&d->ht_table[table][idx]);
-
         link = &(d->ht_table[table][idx]);
         if (bucket) *bucket = link;
         while(link && *link) {
             const void *visitedKey = dictStoredKey2Key(d, dictGetKey(*link));
 
-            /* Prefetch the next entry to improve cache efficiency */
-            redis_prefetch_read(dictGetNext(*link));
-
             if (key == visitedKey || cmpFunc( &cmpCache, key, visitedKey))                
                 return link;
 
diff --git a/src/iothread.c b/src/iothread.c
index 81014b3d0..9dd808b32 100644
--- a/src/iothread.c
+++ b/src/iothread.c
@@ -351,18 +351,33 @@ int prefetchIOThreadCommands(IOThread *t) {
     int to_prefetch = determinePrefetchCount(len);
     if (to_prefetch == 0) return 0;
 
+    /* Two-phase approach to optimize cache utilization:
+     * Phase 1: Issue prefetch hints for client structures
+     * Phase 2: Access the now-cached client data and add commands to batch */
+    /* Since we double the configured size for better performance,
+     * see also `determinePrefetchCount` */
+    static client *c[PREFETCH_BATCH_MAX_SIZE*2];
+    serverAssert(PREFETCH_BATCH_MAX_SIZE*2 >= to_prefetch );
     int clients = 0;
     listIter li;
     listNode *ln;
     listRewind(mainThreadProcessingClients[t->id], &li);
-    while((ln = listNext(&li)) && clients < to_prefetch) {
-        client *c = listNodeValue(ln);
-        /* A single command may contain multiple keys. If the batch is full,
-         * we stop adding clients to it. */
-        if (addCommandToBatch(c) == C_ERR) break;
-        clients++;
+    /* Phase 1: Issue prefetch instructions for client struct and pending_cmds.
+     * These prefetches will bring data into cache asynchronously. */
+    for (int i = 0; i < to_prefetch && (ln = listNext(&li)); i++) {
+        c[i] = listNodeValue(ln);
+        redis_prefetch_read(c[i]);
+        redis_prefetch_read(&c[i]->pending_cmds);
     }
-
+    /* Phase 2: Access client data (now likely in cache) and add to batch.
+     * Also prefetch additional fields (reply, mem_usage_bucket) that will be
+     * needed later during command execution. */
+    for (int i = 0; i < to_prefetch; i++) {
+        if (addCommandToBatch(c[i]) == C_ERR) break;
+        if (c[i]->reply) redis_prefetch_read(c[i]->reply);
+        redis_prefetch_read(&c[i]->mem_usage_bucket);
+        clients++;
+     }
     /* Prefetch the commands in the batch. */
     prefetchCommands();
     return clients;
diff --git a/src/server.h b/src/server.h
index 1ac7127ec..943400122 100644
--- a/src/server.h
+++ b/src/server.h
@@ -814,6 +814,9 @@ typedef enum {
 #define BUSY_MODULE_YIELD_EVENTS (1<<0)
 #define BUSY_MODULE_YIELD_CLIENTS (1<<1)
 
+/* Key prefetch configs */
+#define PREFETCH_BATCH_MAX_SIZE 128
+
 /*-----------------------------------------------------------------------------
  * Data types
  *----------------------------------------------------------------------------*/
diff --git a/tests/unit/networking.tcl b/tests/unit/networking.tcl
index e1338106c..6a04c22f0 100644
--- a/tests/unit/networking.tcl
+++ b/tests/unit/networking.tcl
@@ -332,6 +332,40 @@ start_server {config "minimal.conf" tags {"external:skip"} overrides {enable-deb
             # With slower machines, the number of prefetch entries can be lower
             assert_range $new_prefetch_entries [expr {$prefetch_entries + 2}] [expr {$prefetch_entries + 16}]
         }
+
+        test {Prefetch works with batch size greater than 16 (buffer overflow regression test)} {
+            # save the current value of prefetch entries
+            set info [r info stats]
+            set prefetch_entries [getInfoProperty $info io_threaded_total_prefetch_entries]
+            # set the batch size to a value greater than the old hardcoded limit of 16
+            r config set prefetch-batch-max-size 64
+
+            # Create a batch with more than 16 clients to trigger the old buffer overflow
+            do_prefetch_batch $server_pid 64
+
+            # verify the prefetch entries increased
+            set info [r info stats]
+            set new_prefetch_entries [getInfoProperty $info io_threaded_total_prefetch_entries]
+            # With slower machines, the number of prefetch entries can be lower
+            assert_range $new_prefetch_entries [expr {$prefetch_entries + 2}] [expr {$prefetch_entries + 64}]
+        }
+
+        test {Prefetch works with maximum batch size of 128 and client number larger than batch size} {
+            # save the current value of prefetch entries
+            set info [r info stats]
+            set prefetch_entries [getInfoProperty $info io_threaded_total_prefetch_entries]
+            # set the batch size to the maximum allowed value
+            r config set prefetch-batch-max-size 128
+
+            # Create a batch with 300 clients to test the maximum limit
+            do_prefetch_batch $server_pid 300
+
+            # verify the prefetch entries increased
+            set info [r info stats]
+            set new_prefetch_entries [getInfoProperty $info io_threaded_total_prefetch_entries]
+            # With slower machines, the number of prefetch entries can be lower
+            assert_range $new_prefetch_entries [expr {$prefetch_entries + 2}] [expr {$prefetch_entries + 300}]
+        }
     }
 }