mirror of
https://github.com/redis/redis.git
synced 2026-05-28 04:02:46 -04:00
Prefetch client fields before prefetching command-related data (#14700)
This PR refines the prefetch strategy by removing ineffective (to close on the pipeline) dictionary-level prefetching and improving prefetch usage in IO threads. The goal is to better aligning prefetches with predictable access patterns. ## Changes - Removed speculative prefetching from `dictFindLinkInternal()`, simplifying the dictionary lookup hot path. - Introduced a two-phase prefetch approach in `prefetchIOThreadCommands()`: - Phase 1: Prefetch client structures and `pending_cmds` - Phase 2: Add commands to the batch and prefetch follow-up fields (`reply`, `mem_usage_bucket`) ## Performance Measured with `memtier_benchmark-1Mkeys-string-setget2000c-1KiB-pipeline-16`. | Environment | % change | |-----------------------------|----------| | oss-standalone | -0.1% | | oss-standalone-02-io-threads | +0.4% | | oss-standalone-04-io-threads | +1.6% | | oss-standalone-08-io-threads | +2.3% | | oss-standalone-12-io-threads | +0.7% | | oss-standalone-16-io-threads | +1.9% | Overall, this shows an ~2% throughput improvement on IO-threaded configurations, with no meaningful impact on non-IO-threaded setups. --------- Co-authored-by: Yuan Wang <wangyuancode@163.com>
This commit is contained in:
parent
c93e4a62c6
commit
7f541b9607
5 changed files with 60 additions and 14 deletions
|
|
@ -3186,7 +3186,7 @@ standardConfig static_configs[] = {
|
|||
createIntConfig("databases", NULL, IMMUTABLE_CONFIG, 1, INT_MAX, server.dbnum, 16, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.port, 6379, INTEGER_CONFIG, NULL, updatePort), /* TCP port. */
|
||||
createIntConfig("io-threads", NULL, DEBUG_CONFIG | IMMUTABLE_CONFIG, 1, 128, server.io_threads_num, 1, INTEGER_CONFIG, NULL, NULL), /* Single threaded by default */
|
||||
createIntConfig("prefetch-batch-max-size", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 0, 128, server.prefetch_batch_max_size, 16, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("prefetch-batch-max-size", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 0, PREFETCH_BATCH_MAX_SIZE, server.prefetch_batch_max_size, 16, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("auto-aof-rewrite-percentage", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.aof_rewrite_perc, 100, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("cluster-replica-validity-factor", "cluster-slave-validity-factor", MODIFIABLE_CONFIG, 0, INT_MAX, server.cluster_slave_validity_factor, 10, INTEGER_CONFIG, NULL, NULL), /* Slave max data age factor. */
|
||||
createIntConfig("list-max-listpack-size", "list-max-ziplist-size", MODIFIABLE_CONFIG, INT_MIN, INT_MAX, server.list_max_listpack_size, -2, INTEGER_CONFIG, NULL, NULL),
|
||||
|
|
|
|||
|
|
@ -783,17 +783,11 @@ static dictEntryLink dictFindLinkInternal(dict *d, const void *key, dictEntryLin
|
|||
if (table == 0 && (long)idx < d->rehashidx) continue;
|
||||
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
|
||||
|
||||
/* Prefetch the bucket at the calculated index */
|
||||
redis_prefetch_read(&d->ht_table[table][idx]);
|
||||
|
||||
link = &(d->ht_table[table][idx]);
|
||||
if (bucket) *bucket = link;
|
||||
while(link && *link) {
|
||||
const void *visitedKey = dictStoredKey2Key(d, dictGetKey(*link));
|
||||
|
||||
/* Prefetch the next entry to improve cache efficiency */
|
||||
redis_prefetch_read(dictGetNext(*link));
|
||||
|
||||
if (key == visitedKey || cmpFunc( &cmpCache, key, visitedKey))
|
||||
return link;
|
||||
|
||||
|
|
|
|||
|
|
@ -351,18 +351,33 @@ int prefetchIOThreadCommands(IOThread *t) {
|
|||
int to_prefetch = determinePrefetchCount(len);
|
||||
if (to_prefetch == 0) return 0;
|
||||
|
||||
/* Two-phase approach to optimize cache utilization:
|
||||
* Phase 1: Issue prefetch hints for client structures
|
||||
* Phase 2: Access the now-cached client data and add commands to batch */
|
||||
/* Since we double the configured size for better performance,
|
||||
* see also `determinePrefetchCount` */
|
||||
static client *c[PREFETCH_BATCH_MAX_SIZE*2];
|
||||
serverAssert(PREFETCH_BATCH_MAX_SIZE*2 >= to_prefetch );
|
||||
int clients = 0;
|
||||
listIter li;
|
||||
listNode *ln;
|
||||
listRewind(mainThreadProcessingClients[t->id], &li);
|
||||
while((ln = listNext(&li)) && clients < to_prefetch) {
|
||||
client *c = listNodeValue(ln);
|
||||
/* A single command may contain multiple keys. If the batch is full,
|
||||
* we stop adding clients to it. */
|
||||
if (addCommandToBatch(c) == C_ERR) break;
|
||||
clients++;
|
||||
/* Phase 1: Issue prefetch instructions for client struct and pending_cmds.
|
||||
* These prefetches will bring data into cache asynchronously. */
|
||||
for (int i = 0; i < to_prefetch && (ln = listNext(&li)); i++) {
|
||||
c[i] = listNodeValue(ln);
|
||||
redis_prefetch_read(c[i]);
|
||||
redis_prefetch_read(&c[i]->pending_cmds);
|
||||
}
|
||||
|
||||
/* Phase 2: Access client data (now likely in cache) and add to batch.
|
||||
* Also prefetch additional fields (reply, mem_usage_bucket) that will be
|
||||
* needed later during command execution. */
|
||||
for (int i = 0; i < to_prefetch; i++) {
|
||||
if (addCommandToBatch(c[i]) == C_ERR) break;
|
||||
if (c[i]->reply) redis_prefetch_read(c[i]->reply);
|
||||
redis_prefetch_read(&c[i]->mem_usage_bucket);
|
||||
clients++;
|
||||
}
|
||||
/* Prefetch the commands in the batch. */
|
||||
prefetchCommands();
|
||||
return clients;
|
||||
|
|
|
|||
|
|
@ -814,6 +814,9 @@ typedef enum {
|
|||
#define BUSY_MODULE_YIELD_EVENTS (1<<0)
|
||||
#define BUSY_MODULE_YIELD_CLIENTS (1<<1)
|
||||
|
||||
/* Key prefetch configs */
|
||||
#define PREFETCH_BATCH_MAX_SIZE 128
|
||||
|
||||
/*-----------------------------------------------------------------------------
|
||||
* Data types
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
|
|
|||
|
|
@ -332,6 +332,40 @@ start_server {config "minimal.conf" tags {"external:skip"} overrides {enable-deb
|
|||
# With slower machines, the number of prefetch entries can be lower
|
||||
assert_range $new_prefetch_entries [expr {$prefetch_entries + 2}] [expr {$prefetch_entries + 16}]
|
||||
}
|
||||
|
||||
test {Prefetch works with batch size greater than 16 (buffer overflow regression test)} {
|
||||
# save the current value of prefetch entries
|
||||
set info [r info stats]
|
||||
set prefetch_entries [getInfoProperty $info io_threaded_total_prefetch_entries]
|
||||
# set the batch size to a value greater than the old hardcoded limit of 16
|
||||
r config set prefetch-batch-max-size 64
|
||||
|
||||
# Create a batch with more than 16 clients to trigger the old buffer overflow
|
||||
do_prefetch_batch $server_pid 64
|
||||
|
||||
# verify the prefetch entries increased
|
||||
set info [r info stats]
|
||||
set new_prefetch_entries [getInfoProperty $info io_threaded_total_prefetch_entries]
|
||||
# With slower machines, the number of prefetch entries can be lower
|
||||
assert_range $new_prefetch_entries [expr {$prefetch_entries + 2}] [expr {$prefetch_entries + 64}]
|
||||
}
|
||||
|
||||
test {Prefetch works with maximum batch size of 128 and client number larger than batch size} {
|
||||
# save the current value of prefetch entries
|
||||
set info [r info stats]
|
||||
set prefetch_entries [getInfoProperty $info io_threaded_total_prefetch_entries]
|
||||
# set the batch size to the maximum allowed value
|
||||
r config set prefetch-batch-max-size 128
|
||||
|
||||
# Create a batch with 300 clients to test the maximum limit
|
||||
do_prefetch_batch $server_pid 300
|
||||
|
||||
# verify the prefetch entries increased
|
||||
set info [r info stats]
|
||||
set new_prefetch_entries [getInfoProperty $info io_threaded_total_prefetch_entries]
|
||||
# With slower machines, the number of prefetch entries can be lower
|
||||
assert_range $new_prefetch_entries [expr {$prefetch_entries + 2}] [expr {$prefetch_entries + 300}]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue