Refactor plugin clearing to use ClearRejectorPlugins method

This commit is contained in:
Mohammad Varmazyar 2025-11-26 09:54:32 +01:00
parent d64e09c697
commit 4f455c9c0d
3 changed files with 15 additions and 9 deletions

View file

@ -324,14 +324,9 @@ func (aq *activeQueue) unlockedPop(logger klog.Logger) (*framework.QueuedPodInfo
aq.schedCycle++
// Update metrics for unschedulable plugins.
// Note: We don't clear UnschedulablePlugins and PendingPlugins here because:
// 1. If the pod schedules successfully, we need them for logging/debugging
// 2. If the pod fails to schedule, they will be cleared and repopulated in handleSchedulingFailure
for plugin := range pInfo.UnschedulablePlugins.Union(pInfo.PendingPlugins) {
metrics.UnschedulableReason(plugin, pInfo.Pod.Spec.SchedulerName).Dec()
}
pInfo.GatingPlugin = ""
pInfo.GatingPluginEvents = nil
return pInfo, nil
}

View file

@ -619,6 +619,16 @@ func (pqi *QueuedPodInfo) DeepCopy() *QueuedPodInfo {
}
}
// ClearRejectorPlugins clears the plugin-related fields that track why a pod
// was rejected in a previous scheduling attempt. This should be called at the
// beginning of a new scheduling attempt to ensure stale data doesn't persist.
func (pqi *QueuedPodInfo) ClearRejectorPlugins() {
pqi.UnschedulablePlugins.Clear()
pqi.PendingPlugins.Clear()
pqi.GatingPlugin = ""
pqi.GatingPluginEvents = nil
}
// PodInfo is a wrapper to a Pod with additional pre-computed information to
// accelerate processing. This information is typically immutable (e.g., pre-processed
// inter-pod affinity selectors).

View file

@ -1063,10 +1063,11 @@ func (sched *Scheduler) handleSchedulingFailure(ctx context.Context, fwk framewo
err := status.AsError()
errMsg := status.Message()
// Clear plugin sets to avoid stale data from previous scheduling attempts.
// They will be repopulated below for FitError cases.
podInfo.UnschedulablePlugins.Clear()
podInfo.PendingPlugins.Clear()
// Clear plugin-related fields to avoid stale data from previous scheduling attempts.
// These fields will be repopulated below for FitError cases.
// We clear them here (rather than at Pop) because we sometimes want to use them
// for logging when a pod schedules successfully (e.g., after being flushed).
podInfo.ClearRejectorPlugins()
if err == ErrNoNodesAvailable {
logger.V(2).Info("Unable to schedule pod; no nodes are registered to the cluster; waiting", "pod", klog.KObj(pod))