mirror of
https://github.com/kubernetes/kubernetes.git
synced 2026-05-28 04:04:39 -04:00
scheduler: add metric for pods scheduled after flush
Add counter metric to track pods that schedule immediately after being flushed from unschedulablePods due to timeout. Uses a boolean flag that is cleared when pods return to queue or move via events.
This commit is contained in:
parent
b2a399cf30
commit
bc632c72d0
3 changed files with 17 additions and 6 deletions
|
|
@ -894,6 +894,8 @@ func (p *PriorityQueue) AddUnschedulableIfNotPresent(logger klog.Logger, pInfo *
|
|||
// We changed ConsecutiveErrorsCount or UnschedulableCount plus Timestamp, and now the calculated backoff time should be different,
|
||||
// removing the cached backoff time.
|
||||
pInfo.BackoffExpiration = time.Time{}
|
||||
// Clear the flush flag since the pod is returning to the queue after a scheduling attempt.
|
||||
pInfo.WasFlushedFromUnschedulable = false
|
||||
|
||||
if !p.isSchedulingQueueHintEnabled {
|
||||
// fall back to the old behavior which doesn't depend on the queueing hint.
|
||||
|
|
@ -949,7 +951,7 @@ func (p *PriorityQueue) flushUnschedulablePodsLeftover(logger klog.Logger) {
|
|||
lastScheduleTime := pInfo.Timestamp
|
||||
if currentTime.Sub(lastScheduleTime) > p.podMaxInUnschedulablePodsDuration {
|
||||
// Mark this pod as flushed so we can detect if it schedules soon after
|
||||
pInfo.FlushedFromUnschedulableAt = ¤tTime
|
||||
pInfo.WasFlushedFromUnschedulable = true
|
||||
podsToMove = append(podsToMove, pInfo)
|
||||
}
|
||||
}
|
||||
|
|
@ -1237,6 +1239,13 @@ func (p *PriorityQueue) movePodsToActiveOrBackoffQueue(logger klog.Logger, podIn
|
|||
continue
|
||||
}
|
||||
|
||||
// Clear the flush flag if this pod is being moved by an event (not by timeout flush).
|
||||
// EventUnschedulableTimeout is the event used by flushUnschedulablePodsLeftover,
|
||||
// where the flag is set to true before calling this function.
|
||||
if event != framework.EventUnschedulableTimeout {
|
||||
pInfo.WasFlushedFromUnschedulable = false
|
||||
}
|
||||
|
||||
p.unschedulablePods.delete(pInfo.Pod, pInfo.Gated())
|
||||
queue := p.requeuePodWithQueueingStrategy(logger, pInfo, schedulingHint, event.Label())
|
||||
if queue == activeQ || (p.isPopFromBackoffQEnabled && queue == backoffQ) {
|
||||
|
|
|
|||
|
|
@ -526,10 +526,12 @@ type QueuedPodInfo struct {
|
|||
// That's why we need to distinguish ConsecutiveErrorsCount for the error status and UnschedulableCount for the unschedulable status.
|
||||
// See https://github.com/kubernetes/kubernetes/issues/128744 for the discussion.
|
||||
ConsecutiveErrorsCount int
|
||||
// FlushedFromUnschedulableAt tracks when this pod was last flushed from unschedulablePods
|
||||
// due to timeout. This is used to detect if the pod becomes schedulable soon after flush,
|
||||
// which may indicate missing queue hint optimizations or event handling bugs.
|
||||
FlushedFromUnschedulableAt *time.Time
|
||||
// WasFlushedFromUnschedulable tracks whether this pod was most recently moved to activeQ
|
||||
// by the periodic flush from unschedulablePods due to timeout (rather than by an event).
|
||||
// This is used to detect if the pod becomes schedulable soon after flush, which may
|
||||
// indicate missing queue hint optimizations or event handling bugs.
|
||||
// This flag is cleared when the pod returns to the queue for any reason.
|
||||
WasFlushedFromUnschedulable bool
|
||||
// The time when the pod is added to the queue for the first time. The pod may be added
|
||||
// back to the queue multiple times before it's successfully scheduled.
|
||||
// It shouldn't be updated once initialized. It's used to record the e2e scheduling
|
||||
|
|
|
|||
|
|
@ -341,7 +341,7 @@ func (sched *Scheduler) bindingCycle(
|
|||
metrics.PodSchedulingSLIDuration.WithLabelValues(getAttemptsLabel(assumedPodInfo)).Observe(metrics.SinceInSeconds(*assumedPodInfo.InitialAttemptTimestamp))
|
||||
}
|
||||
// Count pods scheduled after being flushed from unschedulablePods
|
||||
if assumedPodInfo.FlushedFromUnschedulableAt != nil {
|
||||
if assumedPodInfo.WasFlushedFromUnschedulable {
|
||||
metrics.PodScheduledAfterFlush.Inc()
|
||||
}
|
||||
// Run "postbind" plugins.
|
||||
|
|
|
|||
Loading…
Reference in a new issue