From 44500eca4f10cd710366a89ea8f54a84a86b8fda Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Sat, 6 Jun 2026 12:26:43 +0200 Subject: [PATCH] List snapshot data outside of lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This optimizes the exact resource version list logic (StoreList/.../RV=Exact) by extracting the snapshot and performing the B-Tree traversal outside of the watch cache lock. The benchstat evidence below is limited to the `RV=Exact` scenarios, which are the only paths executing the modified code. ```text goos: linux goarch: amd64 pkg: k8s.io/apiserver/pkg/storage/cacher cpu: AMD Ryzen Threadripper PRO 3945WX 12-Cores │ before_exact_fixed.txt │ after_exact_fixed.txt │ │ sec/op │ sec/op vs base │ StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=DeleteCreate/Parallelism=25/Background=ListerExactRV/UseIndex=false-24 218.4µ ± 37% 109.9µ ± 46% -49.68% (p=0.004 n=6) StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=DeleteCreate/Parallelism=25/Background=ListerExactRV/UseIndex=true-24 215.2µ ± 48% 218.9µ ± 47% ~ (p=0.937 n=6) StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=Patch/Parallelism=25/Background=ListerExactRV/UseIndex=false-24 50.88µ ± 1960% 40.65µ ± 157% ~ (p=0.818 n=6) StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=Patch/Parallelism=25/Background=ListerExactRV/UseIndex=true-24 37.41µ ± 32% 25.05µ ± 58% ~ (p=0.180 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Cluster/Paginate=0-24 30.33m ± 24% 24.79m ± 35% ~ (p=0.240 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Cluster/Paginate=1000-24 2.254 ± 9% 2.194 ± 12% ~ (p=0.589 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Node/Paginate=0-24 7.960m ± 15% 8.118m ± 15% ~ (p=0.589 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Namespace/Paginate=0-24 453.6µ ± 5% 429.1µ ± 18% -5.40% (p=0.041 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Namespace/Paginate=1000-24 497.3µ ± 15% 514.2µ ± 13% ~ (p=1.000 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Cluster/Paginate=0-24 27.38m ± 10% 25.53m ± 39% ~ (p=0.485 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Cluster/Paginate=1000-24 2.355 ± 16% 2.057 ± 18% ~ (p=0.240 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Node/Paginate=0-24 25.19m ± 47% 24.64m ± 27% ~ (p=0.699 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Namespace/Paginate=0-24 422.5µ ± 20% 423.2µ ± 11% ~ (p=0.699 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Namespace/Paginate=1000-24 499.4µ ± 11% 530.0µ ± 26% ~ (p=0.132 n=6) geomean 2.949m 2.613m -11.39% │ before_exact_fixed.txt │ after_exact_fixed.txt │ │ list-calls/s │ list-calls/s vs base │ StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=DeleteCreate/Parallelism=25/Background=ListerExactRV/UseIndex=false-24 8.180 ± 102% 9.119 ± 38% ~ (p=0.485 n=6) StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=DeleteCreate/Parallelism=25/Background=ListerExactRV/UseIndex=true-24 8.334 ± 67% 7.505 ± 52% ~ (p=0.699 n=6) StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=Patch/Parallelism=25/Background=ListerExactRV/UseIndex=false-24 8.194 ± 2200% 8.233 ± 27% ~ (p=0.937 n=6) StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=Patch/Parallelism=25/Background=ListerExactRV/UseIndex=true-24 9.043 ± 31% 9.500 ± 22% ~ (p=0.818 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Cluster/Paginate=0-24 32.99 ± 31% 40.41 ± 44% ~ (p=0.240 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Cluster/Paginate=1000-24 66.54 ± 9% 68.38 ± 14% ~ (p=0.589 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Node/Paginate=0-24 125.7 ± 13% 123.2 ± 17% ~ (p=0.589 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Namespace/Paginate=0-24 2.205k ± 5% 2.331k ± 23% +5.71% (p=0.041 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Namespace/Paginate=1000-24 6.034k ± 13% 5.835k ± 15% ~ (p=1.000 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Cluster/Paginate=0-24 36.52 ± 11% 39.28 ± 28% ~ (p=0.485 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Cluster/Paginate=1000-24 63.70 ± 19% 72.92 ± 15% ~ (p=0.240 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Node/Paginate=0-24 39.73 ± 32% 40.83 ± 22% ~ (p=0.699 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Namespace/Paginate=0-24 2.366k ± 25% 2.363k ± 10% ~ (p=0.699 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Namespace/Paginate=1000-24 6.007k ± 12% 5.665k ± 35% ~ (p=0.132 n=6) geomean 106.6 110.2 +3.38% │ before_exact_fixed.txt │ after_exact_fixed.txt │ │ list-objs/s │ list-objs/s vs base │ StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=DeleteCreate/Parallelism=25/Background=ListerExactRV/UseIndex=false-24 1.225M ± 102% 1.366M ± 38% ~ (p=0.394 n=6) StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=DeleteCreate/Parallelism=25/Background=ListerExactRV/UseIndex=true-24 275.0 ± 67% 216.4 ± 53% ~ (p=0.485 n=6) StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=Patch/Parallelism=25/Background=ListerExactRV/UseIndex=false-24 1.229M ± 2200% 1.235M ± 27% ~ (p=0.937 n=6) StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=Patch/Parallelism=25/Background=ListerExactRV/UseIndex=true-24 298.4 ± 31% 275.5 ± 22% ~ (p=0.180 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Cluster/Paginate=0-24 4.947M ± 31% 6.059M ± 44% ~ (p=0.240 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Cluster/Paginate=1000-24 66.50k ± 9% 68.34k ± 14% ~ (p=0.589 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Node/Paginate=0-24 3.764k ± 13% 3.735k ± 17% ~ (p=0.589 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Namespace/Paginate=0-24 6.614M ± 5% 6.992M ± 23% +5.72% (p=0.041 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=true/RV=Exact/Scope=Namespace/Paginate=1000-24 6.032M ± 13% 5.835M ± 15% ~ (p=1.000 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Cluster/Paginate=0-24 5.476M ± 11% 5.890M ± 28% ~ (p=0.485 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Cluster/Paginate=1000-24 63.66k ± 19% 72.88k ± 15% ~ (p=0.240 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Node/Paginate=0-24 5.958M ± 32% 6.121M ± 22% ~ (p=0.699 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Namespace/Paginate=0-24 7.099M ± 25% 7.088M ± 10% ~ (p=0.699 n=6) StoreList/Namespaces=50/Pods=150000/Nodes=5000/Indexed=false/RV=Exact/Scope=Namespace/Paginate=1000-24 6.006M ± 12% 5.664M ± 35% ~ (p=0.132 n=6) geomean 356.7k 362.2k +1.54% │ before_exact_fixed.txt │ after_exact_fixed.txt │ │ seconds-delay │ seconds-delay vs base │ StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=DeleteCreate/Parallelism=25/Background=ListerExactRV/UseIndex=false-24 616.95m ± 157% 30.28m ± 1951% -95.09% (p=0.026 n=6) StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=DeleteCreate/Parallelism=25/Background=ListerExactRV/UseIndex=true-24 649.4m ± 164% 1287.5m ± 85% ~ (p=0.394 n=6) StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=Patch/Parallelism=25/Background=ListerExactRV/UseIndex=false-24 30.70m ± 1818% 55.93m ± 1053% ~ (p=0.589 n=6) StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=Patch/Parallelism=25/Background=ListerExactRV/UseIndex=true-24 27.02m ± 38% 27.83m ± 1378% ~ (p=0.589 n=6) geomean 135.0m 88.26m -34.64% │ before_exact_fixed.txt │ after_exact_fixed.txt │ │ writes/s │ writes/s vs base │ StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=DeleteCreate/Parallelism=25/Background=ListerExactRV/UseIndex=false-24 19.92k ± 41% 22.37k ± 16% ~ (p=0.394 n=6) StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=DeleteCreate/Parallelism=25/Background=ListerExactRV/UseIndex=true-24 22.23k ± 22% 23.36k ± 27% ~ (p=0.485 n=6) StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=Patch/Parallelism=25/Background=ListerExactRV/UseIndex=false-24 25.92k ± 59% 27.93k ± 44% ~ (p=1.000 n=6) StoreWriteThroughput/Namespaces=50/Pods=150000/Nodes=5000/Traffic=Patch/Parallelism=25/Background=ListerExactRV/UseIndex=true-24 32.92k ± 55% 48.04k ± 36% ~ (p=0.132 n=6) geomean 24.79k 28.93k +16.71% Signed-off-by: Marek Siarkowicz --- .../pkg/storage/cacher/watch_cache.go | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/staging/src/k8s.io/apiserver/pkg/storage/cacher/watch_cache.go b/staging/src/k8s.io/apiserver/pkg/storage/cacher/watch_cache.go index 5419be91139..465da5141ca 100644 --- a/staging/src/k8s.io/apiserver/pkg/storage/cacher/watch_cache.go +++ b/staging/src/k8s.io/apiserver/pkg/storage/cacher/watch_cache.go @@ -587,21 +587,10 @@ func (w *watchCache) waitUntilFreshAndList(ctx context.Context, key string, opts } func (w *watchCache) waitAndListExactRV(ctx context.Context, key, continueKey string, resourceVersion uint64) (resp listResp, index string, err error) { - consistentReadSupported := delegator.ConsistentReadSupported() - w.RLock() - defer w.RUnlock() - err = w.waitUntilFreshLocked(ctx, consistentReadSupported, resourceVersion) + store, err := w.waitAndGetExactSnapshot(ctx, resourceVersion) if err != nil { return listResp{}, "", err } - - if w.snapshots == nil { - return listResp{}, "", errors.NewResourceExpired(fmt.Sprintf("too old resource version: %d", resourceVersion)) - } - store, ok := w.snapshots.GetLessOrEqual(resourceVersion) - if !ok { - return listResp{}, "", errors.NewResourceExpired(fmt.Sprintf("too old resource version: %d", resourceVersion)) - } items := store.OrderedListPrefix(key, continueKey) return listResp{ Items: items, @@ -609,6 +598,25 @@ func (w *watchCache) waitAndListExactRV(ctx context.Context, key, continueKey st }, "", nil } +func (w *watchCache) waitAndGetExactSnapshot(ctx context.Context, resourceVersion uint64) (store store.OrderedLister, err error) { + consistentReadSupported := delegator.ConsistentReadSupported() + w.RLock() + defer w.RUnlock() + err = w.waitUntilFreshLocked(ctx, consistentReadSupported, resourceVersion) + if err != nil { + return nil, err + } + + if w.snapshots == nil { + return nil, errors.NewResourceExpired(fmt.Sprintf("too old resource version: %d", resourceVersion)) + } + store, ok := w.snapshots.GetLessOrEqual(resourceVersion) + if !ok { + return nil, errors.NewResourceExpired(fmt.Sprintf("too old resource version: %d", resourceVersion)) + } + return store, nil +} + func (w *watchCache) waitAndListConsistent(ctx context.Context, key, continueKey string, matchValues []storage.MatchValue) (resp listResp, index string, err error) { resourceVersion, err := w.getCurrentRV(ctx) if err != nil {