scheduler: add metric for pods scheduled after flush

Add counter metric to track pods that schedule immediately after
being flushed from unschedulablePods due to timeout. Uses a boolean
flag that is cleared when pods return to queue or move via events.
This commit is contained in:
Mohammad Varmazyar 2025-11-05 19:48:20 +01:00
parent b2a399cf30
commit bc632c72d0
3 changed files with 17 additions and 6 deletions

View file

@ -894,6 +894,8 @@ func (p *PriorityQueue) AddUnschedulableIfNotPresent(logger klog.Logger, pInfo *
// We changed ConsecutiveErrorsCount or UnschedulableCount plus Timestamp, and now the calculated backoff time should be different,
// removing the cached backoff time.
pInfo.BackoffExpiration = time.Time{}
// Clear the flush flag since the pod is returning to the queue after a scheduling attempt.
pInfo.WasFlushedFromUnschedulable = false
if !p.isSchedulingQueueHintEnabled {
// fall back to the old behavior which doesn't depend on the queueing hint.
@ -949,7 +951,7 @@ func (p *PriorityQueue) flushUnschedulablePodsLeftover(logger klog.Logger) {
lastScheduleTime := pInfo.Timestamp
if currentTime.Sub(lastScheduleTime) > p.podMaxInUnschedulablePodsDuration {
// Mark this pod as flushed so we can detect if it schedules soon after
pInfo.FlushedFromUnschedulableAt = &currentTime
pInfo.WasFlushedFromUnschedulable = true
podsToMove = append(podsToMove, pInfo)
}
}
@ -1237,6 +1239,13 @@ func (p *PriorityQueue) movePodsToActiveOrBackoffQueue(logger klog.Logger, podIn
continue
}
// Clear the flush flag if this pod is being moved by an event (not by timeout flush).
// EventUnschedulableTimeout is the event used by flushUnschedulablePodsLeftover,
// where the flag is set to true before calling this function.
if event != framework.EventUnschedulableTimeout {
pInfo.WasFlushedFromUnschedulable = false
}
p.unschedulablePods.delete(pInfo.Pod, pInfo.Gated())
queue := p.requeuePodWithQueueingStrategy(logger, pInfo, schedulingHint, event.Label())
if queue == activeQ || (p.isPopFromBackoffQEnabled && queue == backoffQ) {

View file

@ -526,10 +526,12 @@ type QueuedPodInfo struct {
// That's why we need to distinguish ConsecutiveErrorsCount for the error status and UnschedulableCount for the unschedulable status.
// See https://github.com/kubernetes/kubernetes/issues/128744 for the discussion.
ConsecutiveErrorsCount int
// FlushedFromUnschedulableAt tracks when this pod was last flushed from unschedulablePods
// due to timeout. This is used to detect if the pod becomes schedulable soon after flush,
// which may indicate missing queue hint optimizations or event handling bugs.
FlushedFromUnschedulableAt *time.Time
// WasFlushedFromUnschedulable tracks whether this pod was most recently moved to activeQ
// by the periodic flush from unschedulablePods due to timeout (rather than by an event).
// This is used to detect if the pod becomes schedulable soon after flush, which may
// indicate missing queue hint optimizations or event handling bugs.
// This flag is cleared when the pod returns to the queue for any reason.
WasFlushedFromUnschedulable bool
// The time when the pod is added to the queue for the first time. The pod may be added
// back to the queue multiple times before it's successfully scheduled.
// It shouldn't be updated once initialized. It's used to record the e2e scheduling

View file

@ -341,7 +341,7 @@ func (sched *Scheduler) bindingCycle(
metrics.PodSchedulingSLIDuration.WithLabelValues(getAttemptsLabel(assumedPodInfo)).Observe(metrics.SinceInSeconds(*assumedPodInfo.InitialAttemptTimestamp))
}
// Count pods scheduled after being flushed from unschedulablePods
if assumedPodInfo.FlushedFromUnschedulableAt != nil {
if assumedPodInfo.WasFlushedFromUnschedulable {
metrics.PodScheduledAfterFlush.Inc()
}
// Run "postbind" plugins.