diff --git a/src/config.c b/src/config.c index 025c312c0..4ecbc7e7f 100644 --- a/src/config.c +++ b/src/config.c @@ -3186,7 +3186,7 @@ standardConfig static_configs[] = { createIntConfig("databases", NULL, IMMUTABLE_CONFIG, 1, INT_MAX, server.dbnum, 16, INTEGER_CONFIG, NULL, NULL), createIntConfig("port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.port, 6379, INTEGER_CONFIG, NULL, updatePort), /* TCP port. */ createIntConfig("io-threads", NULL, DEBUG_CONFIG | IMMUTABLE_CONFIG, 1, 128, server.io_threads_num, 1, INTEGER_CONFIG, NULL, NULL), /* Single threaded by default */ - createIntConfig("prefetch-batch-max-size", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 0, 128, server.prefetch_batch_max_size, 16, INTEGER_CONFIG, NULL, NULL), + createIntConfig("prefetch-batch-max-size", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 0, PREFETCH_BATCH_MAX_SIZE, server.prefetch_batch_max_size, 16, INTEGER_CONFIG, NULL, NULL), createIntConfig("auto-aof-rewrite-percentage", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.aof_rewrite_perc, 100, INTEGER_CONFIG, NULL, NULL), createIntConfig("cluster-replica-validity-factor", "cluster-slave-validity-factor", MODIFIABLE_CONFIG, 0, INT_MAX, server.cluster_slave_validity_factor, 10, INTEGER_CONFIG, NULL, NULL), /* Slave max data age factor. */ createIntConfig("list-max-listpack-size", "list-max-ziplist-size", MODIFIABLE_CONFIG, INT_MIN, INT_MAX, server.list_max_listpack_size, -2, INTEGER_CONFIG, NULL, NULL), diff --git a/src/dict.c b/src/dict.c index 18258c36f..d0885ff88 100644 --- a/src/dict.c +++ b/src/dict.c @@ -783,17 +783,11 @@ static dictEntryLink dictFindLinkInternal(dict *d, const void *key, dictEntryLin if (table == 0 && (long)idx < d->rehashidx) continue; idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]); - /* Prefetch the bucket at the calculated index */ - redis_prefetch_read(&d->ht_table[table][idx]); - link = &(d->ht_table[table][idx]); if (bucket) *bucket = link; while(link && *link) { const void *visitedKey = dictStoredKey2Key(d, dictGetKey(*link)); - /* Prefetch the next entry to improve cache efficiency */ - redis_prefetch_read(dictGetNext(*link)); - if (key == visitedKey || cmpFunc( &cmpCache, key, visitedKey)) return link; diff --git a/src/iothread.c b/src/iothread.c index 81014b3d0..9dd808b32 100644 --- a/src/iothread.c +++ b/src/iothread.c @@ -351,18 +351,33 @@ int prefetchIOThreadCommands(IOThread *t) { int to_prefetch = determinePrefetchCount(len); if (to_prefetch == 0) return 0; + /* Two-phase approach to optimize cache utilization: + * Phase 1: Issue prefetch hints for client structures + * Phase 2: Access the now-cached client data and add commands to batch */ + /* Since we double the configured size for better performance, + * see also `determinePrefetchCount` */ + static client *c[PREFETCH_BATCH_MAX_SIZE*2]; + serverAssert(PREFETCH_BATCH_MAX_SIZE*2 >= to_prefetch ); int clients = 0; listIter li; listNode *ln; listRewind(mainThreadProcessingClients[t->id], &li); - while((ln = listNext(&li)) && clients < to_prefetch) { - client *c = listNodeValue(ln); - /* A single command may contain multiple keys. If the batch is full, - * we stop adding clients to it. */ - if (addCommandToBatch(c) == C_ERR) break; - clients++; + /* Phase 1: Issue prefetch instructions for client struct and pending_cmds. + * These prefetches will bring data into cache asynchronously. */ + for (int i = 0; i < to_prefetch && (ln = listNext(&li)); i++) { + c[i] = listNodeValue(ln); + redis_prefetch_read(c[i]); + redis_prefetch_read(&c[i]->pending_cmds); } - + /* Phase 2: Access client data (now likely in cache) and add to batch. + * Also prefetch additional fields (reply, mem_usage_bucket) that will be + * needed later during command execution. */ + for (int i = 0; i < to_prefetch; i++) { + if (addCommandToBatch(c[i]) == C_ERR) break; + if (c[i]->reply) redis_prefetch_read(c[i]->reply); + redis_prefetch_read(&c[i]->mem_usage_bucket); + clients++; + } /* Prefetch the commands in the batch. */ prefetchCommands(); return clients; diff --git a/src/server.h b/src/server.h index 1ac7127ec..943400122 100644 --- a/src/server.h +++ b/src/server.h @@ -814,6 +814,9 @@ typedef enum { #define BUSY_MODULE_YIELD_EVENTS (1<<0) #define BUSY_MODULE_YIELD_CLIENTS (1<<1) +/* Key prefetch configs */ +#define PREFETCH_BATCH_MAX_SIZE 128 + /*----------------------------------------------------------------------------- * Data types *----------------------------------------------------------------------------*/ diff --git a/tests/unit/networking.tcl b/tests/unit/networking.tcl index e1338106c..6a04c22f0 100644 --- a/tests/unit/networking.tcl +++ b/tests/unit/networking.tcl @@ -332,6 +332,40 @@ start_server {config "minimal.conf" tags {"external:skip"} overrides {enable-deb # With slower machines, the number of prefetch entries can be lower assert_range $new_prefetch_entries [expr {$prefetch_entries + 2}] [expr {$prefetch_entries + 16}] } + + test {Prefetch works with batch size greater than 16 (buffer overflow regression test)} { + # save the current value of prefetch entries + set info [r info stats] + set prefetch_entries [getInfoProperty $info io_threaded_total_prefetch_entries] + # set the batch size to a value greater than the old hardcoded limit of 16 + r config set prefetch-batch-max-size 64 + + # Create a batch with more than 16 clients to trigger the old buffer overflow + do_prefetch_batch $server_pid 64 + + # verify the prefetch entries increased + set info [r info stats] + set new_prefetch_entries [getInfoProperty $info io_threaded_total_prefetch_entries] + # With slower machines, the number of prefetch entries can be lower + assert_range $new_prefetch_entries [expr {$prefetch_entries + 2}] [expr {$prefetch_entries + 64}] + } + + test {Prefetch works with maximum batch size of 128 and client number larger than batch size} { + # save the current value of prefetch entries + set info [r info stats] + set prefetch_entries [getInfoProperty $info io_threaded_total_prefetch_entries] + # set the batch size to the maximum allowed value + r config set prefetch-batch-max-size 128 + + # Create a batch with 300 clients to test the maximum limit + do_prefetch_batch $server_pid 300 + + # verify the prefetch entries increased + set info [r info stats] + set new_prefetch_entries [getInfoProperty $info io_threaded_total_prefetch_entries] + # With slower machines, the number of prefetch entries can be lower + assert_range $new_prefetch_entries [expr {$prefetch_entries + 2}] [expr {$prefetch_entries + 300}] + } } }