mirror of
https://github.com/redis/redis.git
synced 2026-05-28 04:02:46 -04:00
reduce getNodeByQuery CPU time by using less cache lines (from 2064 Bytes struct to 64 Bytes): reduces LLC misses and Memory Loads (#13296)
The following PR goes from 33 cacheline on getKeysResult struct (by
default has 256 static buffer)
```
root@hpe10:~/redis# pahole -p ./src/server.o -C getKeysResult
typedef struct {
keyReference keysbuf[256]; /* 0 2048 */
/* --- cacheline 32 boundary (2048 bytes) --- */
/* typedef keyReference */ struct {
int pos;
int flags;
} *keys; /* 2048 8 */
int numkeys; /* 2056 4 */
int size; /* 2060 4 */
/* size: 2064, cachelines: 33, members: 4 */
/* last cacheline: 16 bytes */
} getKeysResult;
```
to 1 cacheline with a static buffer of 6 keys per command):
```
root@hpe10:~/redis# pahole -p ./src/server.o -C getKeysResult
typedef struct {
int numkeys; /* 0 4 */
int size; /* 4 4 */
keyReference keysbuf[6]; /* 8 48 */
/* typedef keyReference */ struct {
int pos;
int flags;
} *keys; /* 56 8 */
/* size: 64, cachelines: 1, members: 4 */
} getKeysResult;
```
we get around 1.5% higher ops/sec, and a confirmation of around 15% less
LLC loads on getNodeByQuery and 37% less Stores.
Function / Call Stack | CPU Time: Difference | CPU Time:
9462436fa4 | CPU Time: this PR | Loads:
Difference | Loads: 9462436fa4 | Loads:
this PR | Stores: Difference | Stores:
9462436fa4 | Stores: This PR
-- | -- | -- | -- | -- | -- | -- | -- | -- | --
getNodeByQuery | 0.753767 | 1.57118 | 0.817416 | 144297829 (15% less
loads) | 920575969 | 776278140 | 367607824 (37% less stores) | 991642384
| 624034560
## results on client side
### baseline
```
taskset -c 2,3 memtier_benchmark -s 192.168.1.200 --port 6379 --authenticate perf --cluster-mode --pipeline 10 --data-size 100 --ratio 1:0 --key-pattern P:P --key-minimum=1 --key-maximum 1000000 --test-time 180 -c 25 -t 2 --hide-histogram
Writing results to stdout
[RUN #1] Preparing benchmark client...
[RUN #1] Launching threads now...
[RUN #1 100%, 180 secs] 0 threads: 110333450 ops, 604992 (avg: 612942) ops/sec, 84.75MB/sec (avg: 85.86MB/sec), 0.82 (avg: 0.81) msec latency
2 Threads
25 Connections per thread
180 Seconds
ALL STATS
======================================================================================================================================================
Type Ops/sec Hits/sec Misses/sec MOVED/sec ASK/sec Avg. Latency p50 Latency p99 Latency p99.9 Latency KB/sec
------------------------------------------------------------------------------------------------------------------------------------------------------
Sets 612942.14 --- --- 0.00 0.00 0.81332 0.80700 1.26300 2.92700 87924.12
Gets 0.00 0.00 0.00 0.00 0.00 --- --- --- --- 0.00
Waits 0.00 --- --- --- --- --- --- --- --- ---
Totals 612942.14 0.00 0.00 0.00 0.00 0.81332 0.80700 1.26300 2.92700 87924.12
```
### comparison
```
taskset -c 2,3 memtier_benchmark -s 192.168.1.200 --port 6379 --authenticate perf --cluster-mode --pipeline 10 --data-size 100 --ratio 1:0 --key-pattern P:P --key-minimum=1 --key-maximum 1000000 --test-time 180 -c 25 -t 2 --hide-histogram
Writing results to stdout
[RUN #1] Preparing benchmark client...
[RUN #1] Launching threads now...
[RUN #1 100%, 180 secs] 0 threads: 111731310 ops, 610195 (avg: 620707) ops/sec, 85.48MB/sec (avg: 86.95MB/sec), 0.82 (avg: 0.80) msec latency
2 Threads
25 Connections per thread
180 Seconds
ALL STATS
======================================================================================================================================================
Type Ops/sec Hits/sec Misses/sec MOVED/sec ASK/sec Avg. Latency p50 Latency p99 Latency p99.9 Latency KB/sec
------------------------------------------------------------------------------------------------------------------------------------------------------
Sets 620707.72 --- --- 0.00 0.00 0.80312 0.79900 1.23900 2.87900 89037.78
Gets 0.00 0.00 0.00 0.00 0.00 --- --- --- --- 0.00
Waits 0.00 --- --- --- --- --- --- --- --- ---
Totals 620707.72 0.00 0.00 0.00 0.00 0.80312 0.79900 1.23900 2.87900 89037.78
```
Co-authored-by: filipecosta90 <filipecosta.90@gmail.com>
This commit is contained in:
parent
4aa25d042c
commit
24c85cc368
1 changed files with 5 additions and 4 deletions
|
|
@ -2071,7 +2071,8 @@ struct redisServer {
|
|||
char *locale_collate;
|
||||
};
|
||||
|
||||
#define MAX_KEYS_BUFFER 256
|
||||
/* we use 6 so that all getKeyResult fits a cacheline */
|
||||
#define MAX_KEYS_BUFFER 6
|
||||
|
||||
typedef struct {
|
||||
int pos; /* The position of the key within the client array */
|
||||
|
|
@ -2084,12 +2085,12 @@ typedef struct {
|
|||
* for returning channel information.
|
||||
*/
|
||||
typedef struct {
|
||||
int numkeys; /* Number of key indices return */
|
||||
int size; /* Available array size */
|
||||
keyReference keysbuf[MAX_KEYS_BUFFER]; /* Pre-allocated buffer, to save heap allocations */
|
||||
keyReference *keys; /* Key indices array, points to keysbuf or heap */
|
||||
int numkeys; /* Number of key indices return */
|
||||
int size; /* Available array size */
|
||||
} getKeysResult;
|
||||
#define GETKEYS_RESULT_INIT { {{0}}, NULL, 0, MAX_KEYS_BUFFER }
|
||||
#define GETKEYS_RESULT_INIT { 0, MAX_KEYS_BUFFER, {{0}}, NULL }
|
||||
|
||||
/* Key specs definitions.
|
||||
*
|
||||
|
|
|
|||
Loading…
Reference in a new issue