DRA device taints: avoid unnecessary Pod lookup

When rapidly processing informer events it can happen that a pod gets scheduled
twice (seen only in the TestEviction/update unit test):

- Claim update observed, pod from informer cache with NodeName from update -> queue pod for eviction.
- Pod update observed, claim from informer cache -> queue pod again.

The effect is one additional Get call to the apiserver. We can avoid it by
maintaining an LRU cache with the UIDs of the pods which we have evicted and
thus don't need to do anything for.
This commit is contained in:
Patrick Ohly 2026-02-27 14:34:56 +01:00
parent 017a53a1a9
commit 29e92367db
2 changed files with 61 additions and 3 deletions

View file

@ -67,6 +67,8 @@ const (
// updates while eviction is in progress. Once it is done, it no longer gets
// updated until in progress again.
ruleStatusPeriod = 10 * time.Second
maxUIDCacheEntries = 500
)
// Controller listens to Taint changes of DRA devices and Toleration changes of ResourceClaims,
@ -105,6 +107,10 @@ type Controller struct {
metrics metrics.Metrics
workqueue workqueue.TypedRateLimitingInterface[workItem]
// The evictedPods cache keeps track of Pods for which we know that
// they have been evicted.
evictedPods *uidCache
evictPodHook func(pod tainteviction.NamespacedObject, eviction evictionAndReason)
cancelEvictHook func(pod tainteviction.NamespacedObject) bool
@ -383,10 +389,11 @@ func (tc *Controller) maybeDeletePod(ctx context.Context, podRef tainteviction.N
tc.mutex.Lock()
tc.maybeDeletePodCount++
eviction, ok := tc.deletePodAt[podRef]
evicted := tc.evictedPods.has(podRef.UID)
tc.mutex.Unlock()
logger.V(5).Info("Processing pod deletion work item", "active", ok, "eviction", eviction)
logger.V(5).Info("Processing pod deletion work item", "active", ok, "eviction", eviction, "evicted", evicted)
if !ok {
if !ok || evicted {
logger.V(5).Info("Work item for pod deletion obsolete, nothing to do")
return 0, nil
}
@ -401,8 +408,12 @@ func (tc *Controller) maybeDeletePod(ctx context.Context, podRef tainteviction.N
defer func() {
if finalErr == nil {
// Forget the deletion time, we are done.
// Also remember that we don't even need to
// check the pod again, should it have been
// added to the queue again in the meantime.
tc.mutex.Lock()
delete(tc.deletePodAt, podRef)
tc.evictedPods.add(podRef.UID)
tc.mutex.Unlock()
}
}()
@ -739,7 +750,8 @@ func New(c clientset.Interface, podInformer coreinformers.PodInformer, claimInfo
sliceInformer.Informer().HasSyncedChecker(),
classInformer.Informer().HasSyncedChecker(),
},
metrics: metrics.Global,
metrics: metrics.Global,
evictedPods: newUIDCache(maxUIDCacheEntries),
}
// The informer for DeviceTaintRules only gets instantiated if the corresponding

View file

@ -0,0 +1,46 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package devicetainteviction
import (
"k8s.io/utils/lru"
"k8s.io/apimachinery/pkg/types"
)
// uidCache is an LRU cache for uid.
type uidCache struct {
cache *lru.Cache
}
// newUIDCache returns a uidCache.
func newUIDCache(maxCacheEntries int) *uidCache {
return &uidCache{
cache: lru.New(maxCacheEntries),
}
}
// add adds a uid to the cache.
func (c *uidCache) add(uid types.UID) {
c.cache.Add(uid, nil)
}
// has returns if a uid is in the cache.
func (c *uidCache) has(uid types.UID) bool {
_, found := c.cache.Get(uid)
return found
}