From 38f536c71349a1dd314713465395e2dd96fc4031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Skocze=C5=84?= Date: Tue, 17 Mar 2026 09:10:52 +0000 Subject: [PATCH] Use CycleState.IsPodGroupSchedulingCycle instead of NeedsPodGroupScheduling for pods --- .../backend/queue/scheduling_queue.go | 3 -- pkg/scheduler/framework/cycle_state.go | 3 ++ pkg/scheduler/framework/types.go | 6 --- pkg/scheduler/schedule_one.go | 10 ++--- pkg/scheduler/schedule_one_podgroup.go | 2 - pkg/scheduler/schedule_one_podgroup_test.go | 44 +++++++++---------- pkg/scheduler/scheduler.go | 2 + .../kube-scheduler/framework/cycle_state.go | 4 ++ 8 files changed, 36 insertions(+), 38 deletions(-) diff --git a/pkg/scheduler/backend/queue/scheduling_queue.go b/pkg/scheduler/backend/queue/scheduling_queue.go index 186bb082e29..189a5295c01 100644 --- a/pkg/scheduler/backend/queue/scheduling_queue.go +++ b/pkg/scheduler/backend/queue/scheduling_queue.go @@ -931,8 +931,6 @@ func (p *PriorityQueue) AddUnschedulableIfNotPresent(logger klog.Logger, pInfo * pInfo.BackoffExpiration = time.Time{} // Clear the flush flag since the pod is returning to the queue after a scheduling attempt. pInfo.WasFlushedFromUnschedulable = false - // Pod with scheduling group should always need the cycle after got unschedulable for some reason. - pInfo.NeedsPodGroupScheduling = p.isGenericWorkloadEnabled && pod.Spec.SchedulingGroup != nil if !p.isSchedulingQueueHintEnabled { // fall back to the old behavior which doesn't depend on the queueing hint. @@ -1547,7 +1545,6 @@ func (p *PriorityQueue) newQueuedPodInfo(ctx context.Context, pod *v1.Pod, plugi InitialAttemptTimestamp: nil, PodSignature: p.signPod(ctx, pod), UnschedulablePlugins: sets.New(plugins...), - NeedsPodGroupScheduling: p.isGenericWorkloadEnabled && pod.Spec.SchedulingGroup != nil, } } diff --git a/pkg/scheduler/framework/cycle_state.go b/pkg/scheduler/framework/cycle_state.go index a2a3078e6d7..667beddd680 100644 --- a/pkg/scheduler/framework/cycle_state.go +++ b/pkg/scheduler/framework/cycle_state.go @@ -40,6 +40,9 @@ type CycleState struct { // in the PreBind extension point. parallelPreBindPlugins sets.Set[string] // isPodGroupSchedulingCycle indicates whether this cycle is a pod group scheduling cycle or not. + // If set to false, it means that the pod referencing this CycleState either passed the pod group cycle + // or doesn't belong to any pod group. + // This field can only be set to true when GenericWorkload feature flag is enabled. isPodGroupSchedulingCycle bool } diff --git a/pkg/scheduler/framework/types.go b/pkg/scheduler/framework/types.go index e743bd8cfcc..d3010b5aaaa 100644 --- a/pkg/scheduler/framework/types.go +++ b/pkg/scheduler/framework/types.go @@ -556,11 +556,6 @@ type QueuedPodInfo struct { // GatingPluginEvents records the events registered by the plugin that gated the Pod at PreEnqueue. // We have it as a cache purpose to avoid re-computing which event(s) might ungate the Pod. GatingPluginEvents []fwk.ClusterEvent - // NeedsPodGroupScheduling says whether the pod needs to pass a pod group scheduling cycle or not. - // If set to false, it means that the pod either passed the pod group cycle - // or doesn't belong to any pod group. - // This field can only be set to true when GenericWorkload feature flag is enabled. - NeedsPodGroupScheduling bool // PodSignature for opportunistic batching PodSignature fwk.PodSignature } @@ -628,7 +623,6 @@ func (pqi *QueuedPodInfo) DeepCopy() *QueuedPodInfo { GatingPluginEvents: slices.Clone(pqi.GatingPluginEvents), PendingPlugins: pqi.PendingPlugins.Clone(), ConsecutiveErrorsCount: pqi.ConsecutiveErrorsCount, - NeedsPodGroupScheduling: pqi.NeedsPodGroupScheduling, PodSignature: pqi.PodSignature, } } diff --git a/pkg/scheduler/schedule_one.go b/pkg/scheduler/schedule_one.go index 2570ae9a3b3..07c2d357997 100644 --- a/pkg/scheduler/schedule_one.go +++ b/pkg/scheduler/schedule_one.go @@ -74,7 +74,7 @@ func (sched *Scheduler) ScheduleOne(ctx context.Context) { if podInfo == nil || podInfo.Pod == nil { return } - if podInfo.NeedsPodGroupScheduling { + if sched.genericWorkloadEnabled && podInfo.Pod.Spec.SchedulingGroup != nil { podGroupInfo, err := sched.podGroupInfoForPod(ctx, podInfo) if err != nil { podFwk, err := sched.frameworkForPod(podInfo.Pod) @@ -322,7 +322,7 @@ func (sched *Scheduler) assumeAndReserve( assumedPodInfo := podInfo.DeepCopy() assumedPod := assumedPodInfo.Pod // assume modifies `assumedPod` by setting NodeName=scheduleResult.SuggestedHost - err := sched.assume(logger, assumedPodInfo, scheduleResult.SuggestedHost) + err := sched.assume(logger, state, assumedPodInfo, scheduleResult.SuggestedHost) if err != nil { // This is most probably result of a BUG in retrying logic. // We report an error here so that pod scheduling can be retried. @@ -370,7 +370,7 @@ func (sched *Scheduler) unreserveAndForget( logger := klog.FromContext(ctx) schedFramework.RunReservePluginsUnreserve(ctx, state, assumedPodInfo.Pod, nodeName) - if assumedPodInfo.NeedsPodGroupScheduling { + if state.IsPodGroupSchedulingCycle() { err := sched.nodeInfoSnapshot.ForgetPod(logger, assumedPodInfo.Pod) if err != nil { return err @@ -1103,14 +1103,14 @@ func (h *nodeScoreHeap) Pop() interface{} { // assume signals to the cache that a pod is already in the cache, so that binding can be asynchronous. // When called during pod group scheduling cycle, pod is assumed in the snapshot instead. -func (sched *Scheduler) assume(logger klog.Logger, assumedPodInfo *framework.QueuedPodInfo, host string) error { +func (sched *Scheduler) assume(logger klog.Logger, state fwk.CycleState, assumedPodInfo *framework.QueuedPodInfo, host string) error { // Optimistically assume that the binding will succeed and send it to apiserver // in the background. // If the binding fails, scheduler will release resources allocated to assumed pod // immediately. assumedPodInfo.Pod.Spec.NodeName = host - if assumedPodInfo.NeedsPodGroupScheduling { + if state.IsPodGroupSchedulingCycle() { err := sched.nodeInfoSnapshot.AssumePod(assumedPodInfo.PodInfo) if err != nil { logger.Error(err, "Scheduler snapshot AssumePod failed") diff --git a/pkg/scheduler/schedule_one_podgroup.go b/pkg/scheduler/schedule_one_podgroup.go index 89978399438..9e7c474cc1b 100644 --- a/pkg/scheduler/schedule_one_podgroup.go +++ b/pkg/scheduler/schedule_one_podgroup.go @@ -440,8 +440,6 @@ func (sched *Scheduler) submitPodGroupAlgorithmResult(ctx context.Context, sched } switch { case podGroupResult.status.IsSuccess(): - // Pod no longer needs a pod group scheduling cycle. Setting it to false to disable any checks in further functions. - pInfo.NeedsPodGroupScheduling = false // Disable pod group scheduling in cycle state before binding. podCtx.state.SetPodGroupSchedulingCycle(false) // Schedule result is applied for pod and its binding cycle executes. diff --git a/pkg/scheduler/schedule_one_podgroup_test.go b/pkg/scheduler/schedule_one_podgroup_test.go index 3c6714744bc..d4531680c15 100644 --- a/pkg/scheduler/schedule_one_podgroup_test.go +++ b/pkg/scheduler/schedule_one_podgroup_test.go @@ -196,13 +196,13 @@ func TestPodGroupInfoForPod(t *testing.T) { } func TestFrameworkForPodGroup(t *testing.T) { - p1 := st.MakePod().Name("p1").SchedulerName("sched1").Obj() - p2 := st.MakePod().Name("p2").SchedulerName("sched1").Obj() - p3 := st.MakePod().Name("p3").SchedulerName("sched2").Obj() + p1 := st.MakePod().Name("p1").PodGroupName("pg").SchedulerName("sched1").Obj() + p2 := st.MakePod().Name("p2").PodGroupName("pg").SchedulerName("sched1").Obj() + p3 := st.MakePod().Name("p3").PodGroupName("pg").SchedulerName("sched2").Obj() - qInfo1 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p1}, NeedsPodGroupScheduling: true} - qInfo2 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p2}, NeedsPodGroupScheduling: true} - qInfo3 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p3}, NeedsPodGroupScheduling: true} + qInfo1 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p1}} + qInfo2 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p2}} + qInfo3 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p3}} tests := []struct { name string @@ -256,13 +256,13 @@ func TestFrameworkForPodGroup(t *testing.T) { } func TestSkipPodGroupPodSchedule(t *testing.T) { - p1 := st.MakePod().Name("p1").UID("p1").Obj() - p2 := st.MakePod().Name("p2").UID("p2").Terminating().Obj() - p3 := st.MakePod().Name("p3").UID("p3").Obj() + p1 := st.MakePod().Name("p1").UID("p1").PodGroupName("pg").Obj() + p2 := st.MakePod().Name("p2").UID("p2").PodGroupName("pg").Terminating().Obj() + p3 := st.MakePod().Name("p3").UID("p3").PodGroupName("pg").Obj() - qInfo1 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p1}, NeedsPodGroupScheduling: true} - qInfo2 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p2}, NeedsPodGroupScheduling: true} - qInfo3 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p3}, NeedsPodGroupScheduling: true} + qInfo1 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p1}} + qInfo2 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p2}} + qInfo3 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p3}} pgInfo := &framework.QueuedPodGroupInfo{ QueuedPodInfos: []*framework.QueuedPodInfo{qInfo1, qInfo2, qInfo3}, @@ -323,10 +323,10 @@ func TestSkipPodGroupPodSchedule(t *testing.T) { } func TestPodGroupCycle_UpdateSnapshotError(t *testing.T) { - p1 := st.MakePod().Name("p1").UID("p1").SchedulerName("test-scheduler").Obj() - p2 := st.MakePod().Name("p2").UID("p2").SchedulerName("test-scheduler").Obj() - qInfo1 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p1}, NeedsPodGroupScheduling: true} - qInfo2 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p2}, NeedsPodGroupScheduling: true} + p1 := st.MakePod().Name("p1").UID("p1").PodGroupName("pg").SchedulerName("test-scheduler").Obj() + p2 := st.MakePod().Name("p2").UID("p2").PodGroupName("pg").SchedulerName("test-scheduler").Obj() + qInfo1 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p1}} + qInfo2 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p2}} pgInfo := &framework.QueuedPodGroupInfo{ QueuedPodInfos: []*framework.QueuedPodInfo{qInfo1, qInfo2}, @@ -394,9 +394,9 @@ func TestPodGroupSchedulingAlgorithm(t *testing.T) { p2 := st.MakePod().Name("p2").UID("p2").PodGroupName("pg").SchedulerName("test-scheduler").Obj() p3 := st.MakePod().Name("p3").UID("p3").PodGroupName("pg").SchedulerName("test-scheduler").Obj() - qInfo1 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p1}, NeedsPodGroupScheduling: true} - qInfo2 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p2}, NeedsPodGroupScheduling: true} - qInfo3 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p3}, NeedsPodGroupScheduling: true} + qInfo1 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p1}} + qInfo2 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p2}} + qInfo3 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p3}} pgInfo := &framework.QueuedPodGroupInfo{ QueuedPodInfos: []*framework.QueuedPodInfo{qInfo1, qInfo2, qInfo3}, @@ -893,9 +893,9 @@ func TestSubmitPodGroupAlgorithmResult(t *testing.T) { p2 := st.MakePod().Name("p2").UID("p2").PodGroupName("pg").SchedulerName("test-scheduler").Obj() p3 := st.MakePod().Name("p3").UID("p3").PodGroupName("pg").SchedulerName("test-scheduler").Obj() - qInfo1 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p1}, NeedsPodGroupScheduling: true} - qInfo2 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p2}, NeedsPodGroupScheduling: true} - qInfo3 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p3}, NeedsPodGroupScheduling: true} + qInfo1 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p1}} + qInfo2 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p2}} + qInfo3 := &framework.QueuedPodInfo{PodInfo: &framework.PodInfo{Pod: p3}} pgInfo := &framework.QueuedPodGroupInfo{ QueuedPodInfos: []*framework.QueuedPodInfo{qInfo1, qInfo2, qInfo3}, diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index 541e91a45f1..226e9ce8a85 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -117,6 +117,7 @@ type Scheduler struct { registeredHandlers []cache.ResourceEventHandlerRegistration nominatedNodeNameForExpectationEnabled bool + genericWorkloadEnabled bool } func (sched *Scheduler) applyDefaultHandlers() { @@ -444,6 +445,7 @@ func New(ctx context.Context, logger: logger, APIDispatcher: apiDispatcher, nominatedNodeNameForExpectationEnabled: feature.DefaultFeatureGate.Enabled(features.NominatedNodeNameForExpectation), + genericWorkloadEnabled: feature.DefaultFeatureGate.Enabled(features.GenericWorkload), } sched.NextPod = podQueue.Pop sched.applyDefaultHandlers() diff --git a/staging/src/k8s.io/kube-scheduler/framework/cycle_state.go b/staging/src/k8s.io/kube-scheduler/framework/cycle_state.go index 6c4335b3980..4aba7f355d6 100644 --- a/staging/src/k8s.io/kube-scheduler/framework/cycle_state.go +++ b/staging/src/k8s.io/kube-scheduler/framework/cycle_state.go @@ -90,8 +90,12 @@ type CycleState interface { // nil if the context being cloned is nil. Clone() CycleState // IsPodGroupSchedulingCycle returns true if this cycle is a pod group scheduling cycle. + // If set to false, it means that the pod referencing this CycleState either passed the pod group cycle + // or doesn't belong to any pod group. + // This field can only be set to true when GenericWorkload feature flag is enabled. IsPodGroupSchedulingCycle() bool // SetPodGroupSchedulingCycle sets whether this cycle is a pod group scheduling cycle or not. + // This should be only used when GenericWorkload feature flag is enabled. SetPodGroupSchedulingCycle(bool) }