diff --git a/pkg/controller/devicetainteviction/device_taint_eviction.go b/pkg/controller/devicetainteviction/device_taint_eviction.go index 2569486fa58..2b385efad90 100644 --- a/pkg/controller/devicetainteviction/device_taint_eviction.go +++ b/pkg/controller/devicetainteviction/device_taint_eviction.go @@ -67,6 +67,8 @@ const ( // updates while eviction is in progress. Once it is done, it no longer gets // updated until in progress again. ruleStatusPeriod = 10 * time.Second + + maxUIDCacheEntries = 500 ) // Controller listens to Taint changes of DRA devices and Toleration changes of ResourceClaims, @@ -105,6 +107,10 @@ type Controller struct { metrics metrics.Metrics workqueue workqueue.TypedRateLimitingInterface[workItem] + // The evictedPods cache keeps track of Pods for which we know that + // they have been evicted. + evictedPods *uidCache + evictPodHook func(pod tainteviction.NamespacedObject, eviction evictionAndReason) cancelEvictHook func(pod tainteviction.NamespacedObject) bool @@ -383,10 +389,11 @@ func (tc *Controller) maybeDeletePod(ctx context.Context, podRef tainteviction.N tc.mutex.Lock() tc.maybeDeletePodCount++ eviction, ok := tc.deletePodAt[podRef] + evicted := tc.evictedPods.has(podRef.UID) tc.mutex.Unlock() - logger.V(5).Info("Processing pod deletion work item", "active", ok, "eviction", eviction) + logger.V(5).Info("Processing pod deletion work item", "active", ok, "eviction", eviction, "evicted", evicted) - if !ok { + if !ok || evicted { logger.V(5).Info("Work item for pod deletion obsolete, nothing to do") return 0, nil } @@ -401,8 +408,12 @@ func (tc *Controller) maybeDeletePod(ctx context.Context, podRef tainteviction.N defer func() { if finalErr == nil { // Forget the deletion time, we are done. + // Also remember that we don't even need to + // check the pod again, should it have been + // added to the queue again in the meantime. tc.mutex.Lock() delete(tc.deletePodAt, podRef) + tc.evictedPods.add(podRef.UID) tc.mutex.Unlock() } }() @@ -739,7 +750,8 @@ func New(c clientset.Interface, podInformer coreinformers.PodInformer, claimInfo sliceInformer.Informer().HasSyncedChecker(), classInformer.Informer().HasSyncedChecker(), }, - metrics: metrics.Global, + metrics: metrics.Global, + evictedPods: newUIDCache(maxUIDCacheEntries), } // The informer for DeviceTaintRules only gets instantiated if the corresponding diff --git a/pkg/controller/devicetainteviction/uid_cache.go b/pkg/controller/devicetainteviction/uid_cache.go new file mode 100644 index 00000000000..9d33ef11680 --- /dev/null +++ b/pkg/controller/devicetainteviction/uid_cache.go @@ -0,0 +1,46 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package devicetainteviction + +import ( + "k8s.io/utils/lru" + + "k8s.io/apimachinery/pkg/types" +) + +// uidCache is an LRU cache for uid. +type uidCache struct { + cache *lru.Cache +} + +// newUIDCache returns a uidCache. +func newUIDCache(maxCacheEntries int) *uidCache { + return &uidCache{ + cache: lru.New(maxCacheEntries), + } +} + +// add adds a uid to the cache. +func (c *uidCache) add(uid types.UID) { + c.cache.Add(uid, nil) +} + +// has returns if a uid is in the cache. +func (c *uidCache) has(uid types.UID) bool { + _, found := c.cache.Get(uid) + return found +}