List snapshot data outside of lock

This optimizes the exact resource version list logic (StoreList/.../RV=Exact)
by extracting the snapshot and performing the B-Tree traversal outside of
the watch cache lock.

The benchstat evidence below is limited to the `RV=Exact` scenarios, which
are the only paths executing the modified code.

```text

goos: linux
goarch: amd64
pkg: k8s.io/apiserver/pkg/storage/cacher
cpu: AMD Ryzen Threadripper PRO 3945WX 12-Cores
                                                                                                                                         │ before_exact_fixed.txt │        after_exact_fixed.txt         │
                                                                                                                                         │         sec/op         │    sec/op      vs base               │
StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=DeleteCreate/Parallelism=25/Background=ListerExactRV/UseIndex=false-24           218.4µ ±   37%   109.9µ ±  46%  -49.68% (p=0.004 n=6)
StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=DeleteCreate/Parallelism=25/Background=ListerExactRV/UseIndex=true-24            215.2µ ±   48%   218.9µ ±  47%        ~ (p=0.937 n=6)
StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=Patch/Parallelism=25/Background=ListerExactRV/UseIndex=false-24                  50.88µ ± 1960%   40.65µ ± 157%        ~ (p=0.818 n=6)
StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=Patch/Parallelism=25/Background=ListerExactRV/UseIndex=true-24                   37.41µ ±   32%   25.05µ ±  58%        ~ (p=0.180 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Cluster/Paginate=0-24                                                   30.33m ±   24%   24.79m ±  35%        ~ (p=0.240 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Cluster/Paginate=1000-24                                                 2.254 ±    9%    2.194 ±  12%        ~ (p=0.589 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Node/Paginate=0-24                                                      7.960m ±   15%   8.118m ±  15%        ~ (p=0.589 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Namespace/Paginate=0-24                                                 453.6µ ±    5%   429.1µ ±  18%   -5.40% (p=0.041 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Namespace/Paginate=1000-24                                              497.3µ ±   15%   514.2µ ±  13%        ~ (p=1.000 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Cluster/Paginate=0-24                                                  27.38m ±   10%   25.53m ±  39%        ~ (p=0.485 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Cluster/Paginate=1000-24                                                2.355 ±   16%    2.057 ±  18%        ~ (p=0.240 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Node/Paginate=0-24                                                     25.19m ±   47%   24.64m ±  27%        ~ (p=0.699 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Namespace/Paginate=0-24                                                422.5µ ±   20%   423.2µ ±  11%        ~ (p=0.699 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Namespace/Paginate=1000-24                                             499.4µ ±   11%   530.0µ ±  26%        ~ (p=0.132 n=6)
geomean                                                                                                                                            2.949m           2.613m         -11.39%

                                                                                                                                         │ before_exact_fixed.txt │       after_exact_fixed.txt        │
                                                                                                                                         │      list-calls/s      │ list-calls/s  vs base              │
StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=DeleteCreate/Parallelism=25/Background=ListerExactRV/UseIndex=false-24            8.180 ±  102%    9.119 ± 38%       ~ (p=0.485 n=6)
StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=DeleteCreate/Parallelism=25/Background=ListerExactRV/UseIndex=true-24             8.334 ±   67%    7.505 ± 52%       ~ (p=0.699 n=6)
StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=Patch/Parallelism=25/Background=ListerExactRV/UseIndex=false-24                   8.194 ± 2200%    8.233 ± 27%       ~ (p=0.937 n=6)
StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=Patch/Parallelism=25/Background=ListerExactRV/UseIndex=true-24                    9.043 ±   31%    9.500 ± 22%       ~ (p=0.818 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Cluster/Paginate=0-24                                                    32.99 ±   31%    40.41 ± 44%       ~ (p=0.240 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Cluster/Paginate=1000-24                                                 66.54 ±    9%    68.38 ± 14%       ~ (p=0.589 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Node/Paginate=0-24                                                       125.7 ±   13%    123.2 ± 17%       ~ (p=0.589 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Namespace/Paginate=0-24                                                 2.205k ±    5%   2.331k ± 23%  +5.71% (p=0.041 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Namespace/Paginate=1000-24                                              6.034k ±   13%   5.835k ± 15%       ~ (p=1.000 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Cluster/Paginate=0-24                                                   36.52 ±   11%    39.28 ± 28%       ~ (p=0.485 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Cluster/Paginate=1000-24                                                63.70 ±   19%    72.92 ± 15%       ~ (p=0.240 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Node/Paginate=0-24                                                      39.73 ±   32%    40.83 ± 22%       ~ (p=0.699 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Namespace/Paginate=0-24                                                2.366k ±   25%   2.363k ± 10%       ~ (p=0.699 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Namespace/Paginate=1000-24                                             6.007k ±   12%   5.665k ± 35%       ~ (p=0.132 n=6)
geomean                                                                                                                                             106.6            110.2        +3.38%

                                                                                                                                         │ before_exact_fixed.txt │       after_exact_fixed.txt        │
                                                                                                                                         │      list-objs/s       │ list-objs/s   vs base              │
StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=DeleteCreate/Parallelism=25/Background=ListerExactRV/UseIndex=false-24           1.225M ±  102%   1.366M ± 38%       ~ (p=0.394 n=6)
StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=DeleteCreate/Parallelism=25/Background=ListerExactRV/UseIndex=true-24             275.0 ±   67%    216.4 ± 53%       ~ (p=0.485 n=6)
StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=Patch/Parallelism=25/Background=ListerExactRV/UseIndex=false-24                  1.229M ± 2200%   1.235M ± 27%       ~ (p=0.937 n=6)
StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=Patch/Parallelism=25/Background=ListerExactRV/UseIndex=true-24                    298.4 ±   31%    275.5 ± 22%       ~ (p=0.180 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Cluster/Paginate=0-24                                                   4.947M ±   31%   6.059M ± 44%       ~ (p=0.240 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Cluster/Paginate=1000-24                                                66.50k ±    9%   68.34k ± 14%       ~ (p=0.589 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Node/Paginate=0-24                                                      3.764k ±   13%   3.735k ± 17%       ~ (p=0.589 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Namespace/Paginate=0-24                                                 6.614M ±    5%   6.992M ± 23%  +5.72% (p=0.041 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Namespace/Paginate=1000-24                                              6.032M ±   13%   5.835M ± 15%       ~ (p=1.000 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Cluster/Paginate=0-24                                                  5.476M ±   11%   5.890M ± 28%       ~ (p=0.485 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Cluster/Paginate=1000-24                                               63.66k ±   19%   72.88k ± 15%       ~ (p=0.240 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Node/Paginate=0-24                                                     5.958M ±   32%   6.121M ± 22%       ~ (p=0.699 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Namespace/Paginate=0-24                                                7.099M ±   25%   7.088M ± 10%       ~ (p=0.699 n=6)
StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Namespace/Paginate=1000-24                                             6.006M ±   12%   5.664M ± 35%       ~ (p=0.132 n=6)
geomean                                                                                                                                            356.7k           362.2k        +1.54%

                                                                                                                                         │ before_exact_fixed.txt │         after_exact_fixed.txt          │
                                                                                                                                         │     seconds-delay      │  seconds-delay   vs base               │
StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=DeleteCreate/Parallelism=25/Background=ListerExactRV/UseIndex=false-24          616.95m ±  157%    30.28m ± 1951%  -95.09% (p=0.026 n=6)
StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=DeleteCreate/Parallelism=25/Background=ListerExactRV/UseIndex=true-24            649.4m ±  164%   1287.5m ±   85%        ~ (p=0.394 n=6)
StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=Patch/Parallelism=25/Background=ListerExactRV/UseIndex=false-24                  30.70m ± 1818%    55.93m ± 1053%        ~ (p=0.589 n=6)
StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=Patch/Parallelism=25/Background=ListerExactRV/UseIndex=true-24                   27.02m ±   38%    27.83m ± 1378%        ~ (p=0.589 n=6)
geomean                                                                                                                                            135.0m            88.26m          -34.64%

                                                                                                                                         │ before_exact_fixed.txt │        after_exact_fixed.txt        │
                                                                                                                                         │        writes/s        │   writes/s    vs base               │
StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=DeleteCreate/Parallelism=25/Background=ListerExactRV/UseIndex=false-24             19.92k ± 41%   22.37k ± 16%        ~ (p=0.394 n=6)
StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=DeleteCreate/Parallelism=25/Background=ListerExactRV/UseIndex=true-24              22.23k ± 22%   23.36k ± 27%        ~ (p=0.485 n=6)
StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=Patch/Parallelism=25/Background=ListerExactRV/UseIndex=false-24                    25.92k ± 59%   27.93k ± 44%        ~ (p=1.000 n=6)
StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=Patch/Parallelism=25/Background=ListerExactRV/UseIndex=true-24                     32.92k ± 55%   48.04k ± 36%        ~ (p=0.132 n=6)
geomean                                                                                                                                              24.79k         28.93k        +16.71%

Signed-off-by: Marek Siarkowicz <siarkowicz@google.com>
This commit is contained in:
Marek Siarkowicz 2026-06-06 12:26:43 +02:00
parent a006a3c610
commit 44500eca4f

View file

@ -587,21 +587,10 @@ func (w *watchCache) waitUntilFreshAndList(ctx context.Context, key string, opts
}
func (w *watchCache) waitAndListExactRV(ctx context.Context, key, continueKey string, resourceVersion uint64) (resp listResp, index string, err error) {
consistentReadSupported := delegator.ConsistentReadSupported()
w.RLock()
defer w.RUnlock()
err = w.waitUntilFreshLocked(ctx, consistentReadSupported, resourceVersion)
store, err := w.waitAndGetExactSnapshot(ctx, resourceVersion)
if err != nil {
return listResp{}, "", err
}
if w.snapshots == nil {
return listResp{}, "", errors.NewResourceExpired(fmt.Sprintf("too old resource version: %d", resourceVersion))
}
store, ok := w.snapshots.GetLessOrEqual(resourceVersion)
if !ok {
return listResp{}, "", errors.NewResourceExpired(fmt.Sprintf("too old resource version: %d", resourceVersion))
}
items := store.OrderedListPrefix(key, continueKey)
return listResp{
Items: items,
@ -609,6 +598,25 @@ func (w *watchCache) waitAndListExactRV(ctx context.Context, key, continueKey st
}, "", nil
}
func (w *watchCache) waitAndGetExactSnapshot(ctx context.Context, resourceVersion uint64) (store store.OrderedLister, err error) {
consistentReadSupported := delegator.ConsistentReadSupported()
w.RLock()
defer w.RUnlock()
err = w.waitUntilFreshLocked(ctx, consistentReadSupported, resourceVersion)
if err != nil {
return nil, err
}
if w.snapshots == nil {
return nil, errors.NewResourceExpired(fmt.Sprintf("too old resource version: %d", resourceVersion))
}
store, ok := w.snapshots.GetLessOrEqual(resourceVersion)
if !ok {
return nil, errors.NewResourceExpired(fmt.Sprintf("too old resource version: %d", resourceVersion))
}
return store, nil
}
func (w *watchCache) waitAndListConsistent(ctx context.Context, key, continueKey string, matchValues []storage.MatchValue) (resp listResp, index string, err error) {
resourceVersion, err := w.getCurrentRV(ctx)
if err != nil {