From 3b905ae4b5cd9bae2030b5f22a7f0d984bea4733 Mon Sep 17 00:00:00 2001 From: yliao Date: Thu, 30 Oct 2025 20:10:17 +0000 Subject: [PATCH 1/7] added device class add/update events to noderesources plugin when DRAExtendedResource feature is enabled --- .../framework/plugins/noderesources/fit.go | 10 ++++++++-- .../plugins/noderesources/fit_test.go | 20 ++++++++++++++++++- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/pkg/scheduler/framework/plugins/noderesources/fit.go b/pkg/scheduler/framework/plugins/noderesources/fit.go index 839b8ca1704..79398dbe0ff 100644 --- a/pkg/scheduler/framework/plugins/noderesources/fit.go +++ b/pkg/scheduler/framework/plugins/noderesources/fit.go @@ -314,10 +314,16 @@ func (f *Fit) EventsToRegister(_ context.Context) ([]fwk.ClusterEventWithHint, e nodeActionType = fwk.Add | fwk.UpdateNodeAllocatable } - return []fwk.ClusterEventWithHint{ + events := []fwk.ClusterEventWithHint{ {Event: fwk.ClusterEvent{Resource: fwk.Pod, ActionType: podActionType}, QueueingHintFn: f.isSchedulableAfterPodEvent}, {Event: fwk.ClusterEvent{Resource: fwk.Node, ActionType: nodeActionType}, QueueingHintFn: f.isSchedulableAfterNodeChange}, - }, nil + } + if f.enableDRAExtendedResource { + events = append(events, + // A pod might be waiting for an exteneded resurce fom a class to get created or modified. + fwk.ClusterEventWithHint{Event: fwk.ClusterEvent{Resource: fwk.DeviceClass, ActionType: fwk.Add | fwk.Update}}) + } + return events, nil } // isSchedulableAfterPodEvent is invoked whenever a pod deleted or scaled down. It checks whether diff --git a/pkg/scheduler/framework/plugins/noderesources/fit_test.go b/pkg/scheduler/framework/plugins/noderesources/fit_test.go index 2be8ed60457..e31d621dc94 100644 --- a/pkg/scheduler/framework/plugins/noderesources/fit_test.go +++ b/pkg/scheduler/framework/plugins/noderesources/fit_test.go @@ -1416,6 +1416,7 @@ func TestEventsToRegister(t *testing.T) { name string enableInPlacePodVerticalScaling bool enableSchedulingQueueHint bool + enableDRAExtendedResource bool expectedClusterEvents []fwk.ClusterEventWithHint }{ { @@ -1442,11 +1443,28 @@ func TestEventsToRegister(t *testing.T) { {Event: fwk.ClusterEvent{Resource: "Node", ActionType: fwk.Add | fwk.UpdateNodeAllocatable | fwk.UpdateNodeTaint | fwk.UpdateNodeLabel}}, }, }, + { + name: "Register events with DRAExtendedResource feature enabled", + enableDRAExtendedResource: true, + expectedClusterEvents: []fwk.ClusterEventWithHint{ + {Event: fwk.ClusterEvent{Resource: "Pod", ActionType: fwk.Delete}}, + {Event: fwk.ClusterEvent{Resource: "Node", ActionType: fwk.Add | fwk.UpdateNodeAllocatable | fwk.UpdateNodeTaint | fwk.UpdateNodeLabel}}, + {Event: fwk.ClusterEvent{Resource: fwk.DeviceClass, ActionType: fwk.Add | fwk.Update}}, + }, + }, + { + name: "Register events with DRAExtendedResource feature disabled", + enableDRAExtendedResource: false, + expectedClusterEvents: []fwk.ClusterEventWithHint{ + {Event: fwk.ClusterEvent{Resource: "Pod", ActionType: fwk.Delete}}, + {Event: fwk.ClusterEvent{Resource: "Node", ActionType: fwk.Add | fwk.UpdateNodeAllocatable | fwk.UpdateNodeTaint | fwk.UpdateNodeLabel}}, + }, + }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - fp := &Fit{enableInPlacePodVerticalScaling: test.enableInPlacePodVerticalScaling, enableSchedulingQueueHint: test.enableSchedulingQueueHint} + fp := &Fit{enableInPlacePodVerticalScaling: test.enableInPlacePodVerticalScaling, enableSchedulingQueueHint: test.enableSchedulingQueueHint, enableDRAExtendedResource: test.enableDRAExtendedResource} _, ctx := ktesting.NewTestContext(t) actualClusterEvents, err := fp.EventsToRegister(ctx) if err != nil { From 7aa849160a860251696204e499b048f5f1830825 Mon Sep 17 00:00:00 2001 From: yliao Date: Fri, 31 Oct 2025 22:08:07 +0000 Subject: [PATCH 2/7] added queue hint function --- .../framework/plugins/noderesources/fit.go | 36 +++++- .../plugins/noderesources/fit_test.go | 107 ++++++++++++++++++ 2 files changed, 142 insertions(+), 1 deletion(-) diff --git a/pkg/scheduler/framework/plugins/noderesources/fit.go b/pkg/scheduler/framework/plugins/noderesources/fit.go index 79398dbe0ff..0fa61f4f6b6 100644 --- a/pkg/scheduler/framework/plugins/noderesources/fit.go +++ b/pkg/scheduler/framework/plugins/noderesources/fit.go @@ -22,6 +22,7 @@ import ( "strings" v1 "k8s.io/api/core/v1" + resourceapi "k8s.io/api/resource/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/diff" "k8s.io/apimachinery/pkg/util/sets" @@ -321,7 +322,7 @@ func (f *Fit) EventsToRegister(_ context.Context) ([]fwk.ClusterEventWithHint, e if f.enableDRAExtendedResource { events = append(events, // A pod might be waiting for an exteneded resurce fom a class to get created or modified. - fwk.ClusterEventWithHint{Event: fwk.ClusterEvent{Resource: fwk.DeviceClass, ActionType: fwk.Add | fwk.Update}}) + fwk.ClusterEventWithHint{Event: fwk.ClusterEvent{Resource: fwk.DeviceClass, ActionType: fwk.Add | fwk.Update}, QueueingHintFn: f.isSchedulableAfterDeviceClassEvent}) } return events, nil } @@ -451,6 +452,39 @@ func (f *Fit) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1.Pod, oldO return fwk.Queue, nil } +// isSchedulableAfterDeviceClassChange is invoked whenever a device class added or changed. It checks whether +// that change could make a previously unschedulable pod schedulable. +func (f *Fit) isSchedulableAfterDeviceClassEvent(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (fwk.QueueingHint, error) { + originalClass, modifiedClass, err := schedutil.As[*resourceapi.DeviceClass](oldObj, newObj) + if err != nil { + return fwk.Queue, err + } + if originalClass != nil && modifiedClass != nil && originalClass.Spec.ExtendedResourceName == modifiedClass.Spec.ExtendedResourceName { + return fwk.QueueSkip, nil + } + if originalClass != nil && modifiedClass != nil && originalClass.Spec.ExtendedResourceName != nil && modifiedClass.Spec.ExtendedResourceName != nil && *originalClass.Spec.ExtendedResourceName == *modifiedClass.Spec.ExtendedResourceName { + return fwk.QueueSkip, nil + } + if modifiedClass != nil { + if originalClass == nil { + // only check implicit extended resource name for Add, as device class name does not change during Update. + reqs := resource.PodRequests(pod, resource.PodResourcesOptions{}) + if _, ok := reqs[v1.ResourceName(resourceapi.ResourceDeviceClassPrefix+modifiedClass.Name)]; ok { + logger.V(5).Info("device class was added, and may now fit the pod's resource requests", "pod", klog.KObj(pod), "deviceclass", klog.KObj(modifiedClass)) + return fwk.Queue, nil + } + } + if modifiedClass.Spec.ExtendedResourceName != nil { + reqs := resource.PodRequests(pod, resource.PodResourcesOptions{}) + if _, ok := reqs[v1.ResourceName(*modifiedClass.Spec.ExtendedResourceName)]; ok { + logger.V(5).Info("deivce class was created or updated, and may not fit the pod's resoruce requests", "pod", klog.KObj(pod), "node", klog.KObj(modifiedClass)) + return fwk.Queue, nil + } + } + } + return fwk.QueueSkip, nil +} + // haveAnyRequestedResourcesIncreased returns true if any of the resources requested by the pod have increased or if allowed pod number increased. func haveAnyRequestedResourcesIncreased(pod *v1.Pod, originalNode, modifiedNode *v1.Node, draManager fwk.SharedDRAManager, opts ResourceRequestsOptions) bool { podRequest := computePodResourceRequest(pod, opts) diff --git a/pkg/scheduler/framework/plugins/noderesources/fit_test.go b/pkg/scheduler/framework/plugins/noderesources/fit_test.go index e31d621dc94..57ea5fa3ef8 100644 --- a/pkg/scheduler/framework/plugins/noderesources/fit_test.go +++ b/pkg/scheduler/framework/plugins/noderesources/fit_test.go @@ -48,6 +48,7 @@ import ( st "k8s.io/kubernetes/pkg/scheduler/testing" tf "k8s.io/kubernetes/pkg/scheduler/testing/framework" "k8s.io/kubernetes/pkg/scheduler/util/assumecache" + "k8s.io/utils/ptr" ) var ( @@ -1700,6 +1701,112 @@ func Test_isSchedulableAfterNodeChange(t *testing.T) { } } +func Test_isSchedulableAfterDeviceClassChange(t *testing.T) { + testcases := map[string]struct { + pod *v1.Pod + oldObj, newObj interface{} + expectedHint fwk.QueueingHint + expectedErr bool + }{ + "backoff-wrong-new-object": { + pod: &v1.Pod{}, + newObj: "not-a-class", + expectedHint: fwk.Queue, + expectedErr: true, + }, + "backoff-wrong-old-object": { + pod: &v1.Pod{}, + oldObj: "not-a-class", + newObj: &resourceapi.DeviceClass{}, + expectedHint: fwk.Queue, + expectedErr: true, + }, + "skip-queue-on-class-same-extended-resource-name-pointer": { + pod: newResourcePod(framework.Resource{Memory: 2}), + newObj: &resourceapi.DeviceClass{ + Spec: resourceapi.DeviceClassSpec{}, + }, + oldObj: &resourceapi.DeviceClass{ + Spec: resourceapi.DeviceClassSpec{}, + }, + expectedHint: fwk.QueueSkip, + }, + "skip-queue-on-class-same-extended-resource-name": { + pod: newResourcePod(framework.Resource{Memory: 2}), + newObj: &resourceapi.DeviceClass{ + Spec: resourceapi.DeviceClassSpec{ExtendedResourceName: ptr.To("example.com/gpu")}, + }, + oldObj: &resourceapi.DeviceClass{ + Spec: resourceapi.DeviceClassSpec{ExtendedResourceName: ptr.To("example.com/gpu")}, + }, + expectedHint: fwk.QueueSkip, + }, + "queue-on-class-add-with-implicit-extended-resource-name": { + pod: newResourcePod(framework.Resource{ + ScalarResources: map[v1.ResourceName]int64{"deviceclass.resource.kubernetes.io/gpuclass": 1}, + }), + newObj: &resourceapi.DeviceClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "gpuclass", + }, + Spec: resourceapi.DeviceClassSpec{ExtendedResourceName: ptr.To("example.com/gpu")}, + }, + expectedHint: fwk.Queue, + }, + "skip-on-class-add-with-implicit-extended-resource-name": { + pod: newResourcePod(framework.Resource{ + ScalarResources: map[v1.ResourceName]int64{"deviceclass.resource.kubernetes.io/gpuclass": 1}, + }), + newObj: &resourceapi.DeviceClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "gpuclass", + }, + Spec: resourceapi.DeviceClassSpec{ExtendedResourceName: ptr.To("example.com/gpu")}, + }, + oldObj: &resourceapi.DeviceClass{}, + expectedHint: fwk.QueueSkip, + }, + "queue-on-class-add-with-extended-resource-name": { + pod: newResourcePod(framework.Resource{ + ScalarResources: map[v1.ResourceName]int64{"example.com/gpu": 1}, + }), + newObj: &resourceapi.DeviceClass{ + Spec: resourceapi.DeviceClassSpec{ExtendedResourceName: ptr.To("example.com/gpu")}, + }, + expectedHint: fwk.Queue, + }, + "queue-on-class-update-with-extended-resource-name": { + pod: newResourcePod(framework.Resource{ + ScalarResources: map[v1.ResourceName]int64{"example.com/gpu": 1}, + }), + newObj: &resourceapi.DeviceClass{ + Spec: resourceapi.DeviceClassSpec{ExtendedResourceName: ptr.To("example.com/gpu")}, + }, + oldObj: &resourceapi.DeviceClass{ + Spec: resourceapi.DeviceClassSpec{ExtendedResourceName: ptr.To("example.com/gpu1")}, + }, + expectedHint: fwk.Queue, + }, + } + + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + logger, ctx := ktesting.NewTestContext(t) + p, err := NewFit(ctx, &config.NodeResourcesFitArgs{ScoringStrategy: defaultScoringStrategy}, nil, plfeature.Features{}) + if err != nil { + t.Fatal(err) + } + actualHint, err := p.(*Fit).isSchedulableAfterDeviceClassEvent(logger, tc.pod, tc.oldObj, tc.newObj) + if tc.expectedErr { + require.Error(t, err) + return + } + require.NoError(t, err) + require.Equal(t, tc.expectedHint, actualHint) + }) + } +} + func TestIsFit(t *testing.T) { testCases := map[string]struct { pod *v1.Pod From b609d4713c26d24e1513a3288bd002f24631df2a Mon Sep 17 00:00:00 2001 From: yliao Date: Sat, 1 Nov 2025 01:10:14 +0000 Subject: [PATCH 3/7] added integration test case --- .../framework/plugins/noderesources/fit.go | 2 +- test/integration/scheduler/queueing/queue.go | 55 +++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/pkg/scheduler/framework/plugins/noderesources/fit.go b/pkg/scheduler/framework/plugins/noderesources/fit.go index 0fa61f4f6b6..e080775b9d6 100644 --- a/pkg/scheduler/framework/plugins/noderesources/fit.go +++ b/pkg/scheduler/framework/plugins/noderesources/fit.go @@ -477,7 +477,7 @@ func (f *Fit) isSchedulableAfterDeviceClassEvent(logger klog.Logger, pod *v1.Pod if modifiedClass.Spec.ExtendedResourceName != nil { reqs := resource.PodRequests(pod, resource.PodResourcesOptions{}) if _, ok := reqs[v1.ResourceName(*modifiedClass.Spec.ExtendedResourceName)]; ok { - logger.V(5).Info("deivce class was created or updated, and may not fit the pod's resoruce requests", "pod", klog.KObj(pod), "node", klog.KObj(modifiedClass)) + logger.V(5).Info("deivce class was created or updated, and may fit the pod's resoruce requests", "pod", klog.KObj(pod), "deviceclass", klog.KObj(modifiedClass)) return fwk.Queue, nil } } diff --git a/test/integration/scheduler/queueing/queue.go b/test/integration/scheduler/queueing/queue.go index 2a697520c71..f50613d4c47 100644 --- a/test/integration/scheduler/queueing/queue.go +++ b/test/integration/scheduler/queueing/queue.go @@ -49,6 +49,8 @@ import ( type CoreResourceEnqueueTestCase struct { Name string + // InitialDeviceClasses is the list of DeviceClasses to be created at first. + InitialDeviceClasses []*resourceapi.DeviceClass // InitialNodes is the list of Nodes to be created at first. InitialNodes []*v1.Node // InitialPods is the list of Pods to be created at first if it's not empty. @@ -462,6 +464,54 @@ var CoreResourceEnqueueTestCases = []*CoreResourceEnqueueTestCase{ EnableSchedulingQueueHint: sets.New(true), EnableDRAExtendedResource: true, }, + { + Name: "Pod rejected by the NodeResourcesFit plugin isn't requeued when a DeviceClass have the extended resource not matching pod's requests, and DRAExtendedResource is enabled", + EnablePlugins: []string{names.NodeResourcesFit, names.NodeAffinity}, + InitialDeviceClasses: []*resourceapi.DeviceClass{{ObjectMeta: metav1.ObjectMeta{Name: "fake-class"}, Spec: resourceapi.DeviceClassSpec{ExtendedResourceName: nil}}}, + InitialNodes: []*v1.Node{ + st.MakeNode().Name("fake-node1").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Obj(), + st.MakeNode().Name("fake-node2").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Label("group", "b").Obj(), + }, + Pods: []*v1.Pod{ + // - Pod1 requests available amount of CPU (in fake-node1), but will be rejected by NodeAffinity plugin. Note that the NodeResourceFit plugin will register for QHints because it rejected fake-node2. + st.MakePod().Name("pod1").Res(map[v1.ResourceName]string{v1.ResourceCPU: "4", "example.com/gpu": "1"}).NodeAffinityIn("group", []string{"b"}, st.NodeSelectorTypeMatchExpressions).Container("image").Obj(), + }, + TriggerFn: func(testCtx *testutils.TestContext) (map[fwk.ClusterEvent]uint64, error) { + // Trigger a DeviceClass Update event that adds the extended resource name that matches pod's resource request. + if _, err := testCtx.ClientSet.ResourceV1().DeviceClasses().Update(testCtx.Ctx, &resourceapi.DeviceClass{ObjectMeta: metav1.ObjectMeta{Name: "fake-class"}, Spec: resourceapi.DeviceClassSpec{ExtendedResourceName: ptr.To("example.com/other-gpu")}}, metav1.UpdateOptions{}); err != nil { + return nil, fmt.Errorf("failed to update the fake-class: %w", err) + } + + return map[fwk.ClusterEvent]uint64{{Resource: fwk.DeviceClass, ActionType: fwk.Update}: 1}, nil + }, + WantRequeuedPods: sets.Set[string]{}, + EnableSchedulingQueueHint: sets.New(true), + EnableDRAExtendedResource: true, + }, + { + Name: "Pod rejected by the NodeResourcesFit plugin is requeued when a DeviceClass have the extended resource, and DRAExtendedResource is enabled", + EnablePlugins: []string{names.NodeResourcesFit, names.NodeAffinity}, + InitialDeviceClasses: []*resourceapi.DeviceClass{{ObjectMeta: metav1.ObjectMeta{Name: "fake-class"}, Spec: resourceapi.DeviceClassSpec{ExtendedResourceName: nil}}}, + InitialNodes: []*v1.Node{ + st.MakeNode().Name("fake-node1").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Obj(), + st.MakeNode().Name("fake-node2").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Label("group", "b").Obj(), + }, + Pods: []*v1.Pod{ + // - Pod1 requests available amount of CPU (in fake-node1), but will be rejected by NodeAffinity plugin. Note that the NodeResourceFit plugin will register for QHints because it rejected fake-node2. + st.MakePod().Name("pod1").Res(map[v1.ResourceName]string{v1.ResourceCPU: "4", "example.com/gpu": "1"}).NodeAffinityIn("group", []string{"b"}, st.NodeSelectorTypeMatchExpressions).Container("image").Obj(), + }, + TriggerFn: func(testCtx *testutils.TestContext) (map[fwk.ClusterEvent]uint64, error) { + // Trigger a DeviceClass Update event that adds the extended resource name that matches pod's resource request. + if _, err := testCtx.ClientSet.ResourceV1().DeviceClasses().Update(testCtx.Ctx, &resourceapi.DeviceClass{ObjectMeta: metav1.ObjectMeta{Name: "fake-class"}, Spec: resourceapi.DeviceClassSpec{ExtendedResourceName: ptr.To("example.com/gpu")}}, metav1.UpdateOptions{}); err != nil { + return nil, fmt.Errorf("failed to update the fake-class: %w", err) + } + + return map[fwk.ClusterEvent]uint64{{Resource: fwk.DeviceClass, ActionType: fwk.Update}: 1}, nil + }, + WantRequeuedPods: sets.New("pod1"), + EnableSchedulingQueueHint: sets.New(true), + EnableDRAExtendedResource: true, + }, { Name: "Pod rejected by the NodeResourcesFit plugin isn't requeued when a Node is updated without increase in the requested resources", EnablePlugins: []string{names.NodeResourcesFit, names.NodeAffinity}, @@ -2477,6 +2527,11 @@ func RunTestCoreResourceEnqueue(t *testing.T, tt *CoreResourceEnqueueTestCase) { defer testCtx.Scheduler.SchedulingQueue.Close() cs, ns, ctx := testCtx.ClientSet, testCtx.NS.Name, testCtx.Ctx + for _, class := range tt.InitialDeviceClasses { + if _, err := cs.ResourceV1().DeviceClasses().Create(ctx, class, metav1.CreateOptions{}); err != nil { + t.Fatalf("Failed to create an initial DeviceClass %q: %v", class.Name, err) + } + } // Create one Node with a taint. for _, node := range tt.InitialNodes { if _, err := cs.CoreV1().Nodes().Create(ctx, node, metav1.CreateOptions{}); err != nil { From 14f17a3809a3370be7d0e1e1476c2669ce1486a7 Mon Sep 17 00:00:00 2001 From: yliao Date: Mon, 3 Nov 2025 22:41:54 +0000 Subject: [PATCH 4/7] addressed review feedback --- .../framework/plugins/noderesources/fit.go | 39 ++++++++++--------- .../plugins/noderesources/fit_test.go | 39 +++++++++++++++++-- test/integration/scheduler/queueing/queue.go | 31 ++++++--------- 3 files changed, 68 insertions(+), 41 deletions(-) diff --git a/pkg/scheduler/framework/plugins/noderesources/fit.go b/pkg/scheduler/framework/plugins/noderesources/fit.go index e080775b9d6..7f9946ac394 100644 --- a/pkg/scheduler/framework/plugins/noderesources/fit.go +++ b/pkg/scheduler/framework/plugins/noderesources/fit.go @@ -36,6 +36,7 @@ import ( "k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/names" schedutil "k8s.io/kubernetes/pkg/scheduler/util" + "k8s.io/utils/ptr" ) var _ fwk.PreFilterPlugin = &Fit{} @@ -321,7 +322,7 @@ func (f *Fit) EventsToRegister(_ context.Context) ([]fwk.ClusterEventWithHint, e } if f.enableDRAExtendedResource { events = append(events, - // A pod might be waiting for an exteneded resurce fom a class to get created or modified. + // A pod might be waiting for an exteneded resurce from a class to get created or modified. fwk.ClusterEventWithHint{Event: fwk.ClusterEvent{Resource: fwk.DeviceClass, ActionType: fwk.Add | fwk.Update}, QueueingHintFn: f.isSchedulableAfterDeviceClassEvent}) } return events, nil @@ -432,6 +433,7 @@ func (f *Fit) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1.Pod, oldO EnablePodLevelResources: f.enablePodLevelResources, EnableDRAExtendedResource: f.enableDRAExtendedResource, } + // Leaving in the queue, since the pod won't fit into the modified node anyway. if !isFit(pod, modifiedNode, draManager, opts) { logger.V(5).Info("node was created or updated, but it doesn't have enough resource(s) to accommodate this pod", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode)) @@ -459,29 +461,30 @@ func (f *Fit) isSchedulableAfterDeviceClassEvent(logger klog.Logger, pod *v1.Pod if err != nil { return fwk.Queue, err } - if originalClass != nil && modifiedClass != nil && originalClass.Spec.ExtendedResourceName == modifiedClass.Spec.ExtendedResourceName { + if originalClass != nil && originalClass.Spec.ExtendedResourceName == modifiedClass.Spec.ExtendedResourceName { + logger.V(5).Info("device class has identical extended resource name pointer", "pod", klog.KObj(pod), "deviceclass", klog.KObj(modifiedClass)) return fwk.QueueSkip, nil } - if originalClass != nil && modifiedClass != nil && originalClass.Spec.ExtendedResourceName != nil && modifiedClass.Spec.ExtendedResourceName != nil && *originalClass.Spec.ExtendedResourceName == *modifiedClass.Spec.ExtendedResourceName { + if originalClass != nil && ptr.Deref(originalClass.Spec.ExtendedResourceName, "") == ptr.Deref(modifiedClass.Spec.ExtendedResourceName, "") { + logger.V(5).Info("device class has identical extended resource name string", "pod", klog.KObj(pod), "deviceclass", klog.KObj(modifiedClass)) return fwk.QueueSkip, nil } - if modifiedClass != nil { - if originalClass == nil { - // only check implicit extended resource name for Add, as device class name does not change during Update. - reqs := resource.PodRequests(pod, resource.PodResourcesOptions{}) - if _, ok := reqs[v1.ResourceName(resourceapi.ResourceDeviceClassPrefix+modifiedClass.Name)]; ok { - logger.V(5).Info("device class was added, and may now fit the pod's resource requests", "pod", klog.KObj(pod), "deviceclass", klog.KObj(modifiedClass)) - return fwk.Queue, nil - } - } - if modifiedClass.Spec.ExtendedResourceName != nil { - reqs := resource.PodRequests(pod, resource.PodResourcesOptions{}) - if _, ok := reqs[v1.ResourceName(*modifiedClass.Spec.ExtendedResourceName)]; ok { - logger.V(5).Info("deivce class was created or updated, and may fit the pod's resoruce requests", "pod", klog.KObj(pod), "deviceclass", klog.KObj(modifiedClass)) - return fwk.Queue, nil - } + if originalClass == nil { + // only check implicit extended resource name for Add, as device class name does not change during Update. + reqs := resource.PodRequests(pod, resource.PodResourcesOptions{}) + if _, ok := reqs[v1.ResourceName(resourceapi.ResourceDeviceClassPrefix+modifiedClass.Name)]; ok { + logger.V(5).Info("device class was added, and may now fit the pod's resource requests", "pod", klog.KObj(pod), "deviceclass", klog.KObj(modifiedClass)) + return fwk.Queue, nil } } + if modifiedClass.Spec.ExtendedResourceName != nil { + reqs := resource.PodRequests(pod, resource.PodResourcesOptions{}) + if _, ok := reqs[v1.ResourceName(*modifiedClass.Spec.ExtendedResourceName)]; ok { + logger.V(5).Info("deivce class was created or updated, and may fit the pod's resoruce requests", "pod", klog.KObj(pod), "deviceclass", klog.KObj(modifiedClass)) + return fwk.Queue, nil + } + } + logger.V(5).Info("updated deivce class extended resource name is either nil, or does not match pod's resource request", "pod", klog.KObj(pod), "deviceclass", klog.KObj(modifiedClass)) return fwk.QueueSkip, nil } diff --git a/pkg/scheduler/framework/plugins/noderesources/fit_test.go b/pkg/scheduler/framework/plugins/noderesources/fit_test.go index 57ea5fa3ef8..0916d073f36 100644 --- a/pkg/scheduler/framework/plugins/noderesources/fit_test.go +++ b/pkg/scheduler/framework/plugins/noderesources/fit_test.go @@ -1702,9 +1702,10 @@ func Test_isSchedulableAfterNodeChange(t *testing.T) { } func Test_isSchedulableAfterDeviceClassChange(t *testing.T) { + ern := "example.com/gpu" testcases := map[string]struct { pod *v1.Pod - oldObj, newObj interface{} + oldObj, newObj any expectedHint fwk.QueueingHint expectedErr bool }{ @@ -1721,7 +1722,7 @@ func Test_isSchedulableAfterDeviceClassChange(t *testing.T) { expectedHint: fwk.Queue, expectedErr: true, }, - "skip-queue-on-class-same-extended-resource-name-pointer": { + "skip-queue-on-class-nil-extended-resource-name-pointer": { pod: newResourcePod(framework.Resource{Memory: 2}), newObj: &resourceapi.DeviceClass{ Spec: resourceapi.DeviceClassSpec{}, @@ -1731,6 +1732,20 @@ func Test_isSchedulableAfterDeviceClassChange(t *testing.T) { }, expectedHint: fwk.QueueSkip, }, + "skip-queue-on-class-same-extended-resource-name-pointer": { + pod: newResourcePod(framework.Resource{Memory: 2}), + newObj: &resourceapi.DeviceClass{ + Spec: resourceapi.DeviceClassSpec{ + ExtendedResourceName: &ern, + }, + }, + oldObj: &resourceapi.DeviceClass{ + Spec: resourceapi.DeviceClassSpec{ + ExtendedResourceName: &ern, + }, + }, + expectedHint: fwk.QueueSkip, + }, "skip-queue-on-class-same-extended-resource-name": { pod: newResourcePod(framework.Resource{Memory: 2}), newObj: &resourceapi.DeviceClass{ @@ -1753,7 +1768,19 @@ func Test_isSchedulableAfterDeviceClassChange(t *testing.T) { }, expectedHint: fwk.Queue, }, - "skip-on-class-add-with-implicit-extended-resource-name": { + "queue-on-class-add-with-implicit-extended-resource-name-not-matching": { + pod: newResourcePod(framework.Resource{ + ScalarResources: map[v1.ResourceName]int64{"deviceclass.resource.kubernetes.io/gpuclass": 1}, + }), + newObj: &resourceapi.DeviceClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "myclass", + }, + Spec: resourceapi.DeviceClassSpec{ExtendedResourceName: ptr.To("example.com/gpu")}, + }, + expectedHint: fwk.QueueSkip, + }, + "skip-on-class-update-with-implicit-extended-resource-name": { pod: newResourcePod(framework.Resource{ ScalarResources: map[v1.ResourceName]int64{"deviceclass.resource.kubernetes.io/gpuclass": 1}, }), @@ -1763,7 +1790,11 @@ func Test_isSchedulableAfterDeviceClassChange(t *testing.T) { }, Spec: resourceapi.DeviceClassSpec{ExtendedResourceName: ptr.To("example.com/gpu")}, }, - oldObj: &resourceapi.DeviceClass{}, + oldObj: &resourceapi.DeviceClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "gpuclass", + }, + }, expectedHint: fwk.QueueSkip, }, "queue-on-class-add-with-extended-resource-name": { diff --git a/test/integration/scheduler/queueing/queue.go b/test/integration/scheduler/queueing/queue.go index f50613d4c47..173b645d00d 100644 --- a/test/integration/scheduler/queueing/queue.go +++ b/test/integration/scheduler/queueing/queue.go @@ -49,8 +49,6 @@ import ( type CoreResourceEnqueueTestCase struct { Name string - // InitialDeviceClasses is the list of DeviceClasses to be created at first. - InitialDeviceClasses []*resourceapi.DeviceClass // InitialNodes is the list of Nodes to be created at first. InitialNodes []*v1.Node // InitialPods is the list of Pods to be created at first if it's not empty. @@ -465,9 +463,8 @@ var CoreResourceEnqueueTestCases = []*CoreResourceEnqueueTestCase{ EnableDRAExtendedResource: true, }, { - Name: "Pod rejected by the NodeResourcesFit plugin isn't requeued when a DeviceClass have the extended resource not matching pod's requests, and DRAExtendedResource is enabled", - EnablePlugins: []string{names.NodeResourcesFit, names.NodeAffinity}, - InitialDeviceClasses: []*resourceapi.DeviceClass{{ObjectMeta: metav1.ObjectMeta{Name: "fake-class"}, Spec: resourceapi.DeviceClassSpec{ExtendedResourceName: nil}}}, + Name: "Pod rejected by the NodeResourcesFit plugin is requeued when created a DeviceClass having the extended resource matching pod's requests, and DRAExtendedResource is enabled", + EnablePlugins: []string{names.NodeResourcesFit, names.NodeAffinity}, InitialNodes: []*v1.Node{ st.MakeNode().Name("fake-node1").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Obj(), st.MakeNode().Name("fake-node2").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Label("group", "b").Obj(), @@ -477,19 +474,19 @@ var CoreResourceEnqueueTestCases = []*CoreResourceEnqueueTestCase{ st.MakePod().Name("pod1").Res(map[v1.ResourceName]string{v1.ResourceCPU: "4", "example.com/gpu": "1"}).NodeAffinityIn("group", []string{"b"}, st.NodeSelectorTypeMatchExpressions).Container("image").Obj(), }, TriggerFn: func(testCtx *testutils.TestContext) (map[fwk.ClusterEvent]uint64, error) { - // Trigger a DeviceClass Update event that adds the extended resource name that matches pod's resource request. - if _, err := testCtx.ClientSet.ResourceV1().DeviceClasses().Update(testCtx.Ctx, &resourceapi.DeviceClass{ObjectMeta: metav1.ObjectMeta{Name: "fake-class"}, Spec: resourceapi.DeviceClassSpec{ExtendedResourceName: ptr.To("example.com/other-gpu")}}, metav1.UpdateOptions{}); err != nil { - return nil, fmt.Errorf("failed to update the fake-class: %w", err) + // Trigger a DeviceClass Create event that has the extended resource name that matches pod's resource request. + if _, err := testCtx.ClientSet.ResourceV1().DeviceClasses().Create(testCtx.Ctx, &resourceapi.DeviceClass{ObjectMeta: metav1.ObjectMeta{Name: "fake-class"}, Spec: resourceapi.DeviceClassSpec{ExtendedResourceName: ptr.To("example.com/gpu")}}, metav1.CreateOptions{}); err != nil { + return nil, fmt.Errorf("failed to create the fake-class: %w", err) } - return map[fwk.ClusterEvent]uint64{{Resource: fwk.DeviceClass, ActionType: fwk.Update}: 1}, nil + return map[fwk.ClusterEvent]uint64{{Resource: fwk.DeviceClass, ActionType: fwk.Add}: 1}, nil }, - WantRequeuedPods: sets.Set[string]{}, + WantRequeuedPods: sets.New("pod1"), EnableSchedulingQueueHint: sets.New(true), EnableDRAExtendedResource: true, }, { - Name: "Pod rejected by the NodeResourcesFit plugin is requeued when a DeviceClass have the extended resource, and DRAExtendedResource is enabled", + Name: "Pod rejected by the NodeResourcesFit plugin is requeued when a DeviceClass has the extended resource, and DRAExtendedResource is enabled", EnablePlugins: []string{names.NodeResourcesFit, names.NodeAffinity}, InitialDeviceClasses: []*resourceapi.DeviceClass{{ObjectMeta: metav1.ObjectMeta{Name: "fake-class"}, Spec: resourceapi.DeviceClassSpec{ExtendedResourceName: nil}}}, InitialNodes: []*v1.Node{ @@ -499,6 +496,7 @@ var CoreResourceEnqueueTestCases = []*CoreResourceEnqueueTestCase{ Pods: []*v1.Pod{ // - Pod1 requests available amount of CPU (in fake-node1), but will be rejected by NodeAffinity plugin. Note that the NodeResourceFit plugin will register for QHints because it rejected fake-node2. st.MakePod().Name("pod1").Res(map[v1.ResourceName]string{v1.ResourceCPU: "4", "example.com/gpu": "1"}).NodeAffinityIn("group", []string{"b"}, st.NodeSelectorTypeMatchExpressions).Container("image").Obj(), + st.MakePod().Name("pod2").Res(map[v1.ResourceName]string{v1.ResourceCPU: "4", "example.com/othergpu": "1"}).NodeAffinityIn("group", []string{"b"}, st.NodeSelectorTypeMatchExpressions).Container("image").Obj(), }, TriggerFn: func(testCtx *testutils.TestContext) (map[fwk.ClusterEvent]uint64, error) { // Trigger a DeviceClass Update event that adds the extended resource name that matches pod's resource request. @@ -2527,11 +2525,6 @@ func RunTestCoreResourceEnqueue(t *testing.T, tt *CoreResourceEnqueueTestCase) { defer testCtx.Scheduler.SchedulingQueue.Close() cs, ns, ctx := testCtx.ClientSet, testCtx.NS.Name, testCtx.Ctx - for _, class := range tt.InitialDeviceClasses { - if _, err := cs.ResourceV1().DeviceClasses().Create(ctx, class, metav1.CreateOptions{}); err != nil { - t.Fatalf("Failed to create an initial DeviceClass %q: %v", class.Name, err) - } - } // Create one Node with a taint. for _, node := range tt.InitialNodes { if _, err := cs.CoreV1().Nodes().Create(ctx, node, metav1.CreateOptions{}); err != nil { @@ -2539,9 +2532,9 @@ func RunTestCoreResourceEnqueue(t *testing.T, tt *CoreResourceEnqueueTestCase) { } } - for _, deviceClass := range tt.InitialDeviceClasses { - if _, err := cs.ResourceV1().DeviceClasses().Create(ctx, deviceClass, metav1.CreateOptions{}); err != nil { - t.Fatalf("Failed to create a DeviceClass %q: %v", deviceClass.Name, err) + for _, class := range tt.InitialDeviceClasses { + if _, err := cs.ResourceV1().DeviceClasses().Create(ctx, class, metav1.CreateOptions{}); err != nil { + t.Fatalf("Failed to create an initial DeviceClass %q: %v", class.Name, err) } } From 2e479e00f4e81f2c7121f01a33cb1abd4ba16f51 Mon Sep 17 00:00:00 2001 From: yliao Date: Tue, 4 Nov 2025 16:27:26 +0000 Subject: [PATCH 5/7] refactored the hint function, added test cases --- .../framework/plugins/noderesources/fit.go | 19 ++++++++----------- .../plugins/noderesources/fit_test.go | 14 +++++++++++++- test/integration/scheduler/queueing/queue.go | 8 ++++---- 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/pkg/scheduler/framework/plugins/noderesources/fit.go b/pkg/scheduler/framework/plugins/noderesources/fit.go index 7f9946ac394..c4d42553203 100644 --- a/pkg/scheduler/framework/plugins/noderesources/fit.go +++ b/pkg/scheduler/framework/plugins/noderesources/fit.go @@ -461,15 +461,12 @@ func (f *Fit) isSchedulableAfterDeviceClassEvent(logger klog.Logger, pod *v1.Pod if err != nil { return fwk.Queue, err } - if originalClass != nil && originalClass.Spec.ExtendedResourceName == modifiedClass.Spec.ExtendedResourceName { - logger.V(5).Info("device class has identical extended resource name pointer", "pod", klog.KObj(pod), "deviceclass", klog.KObj(modifiedClass)) - return fwk.QueueSkip, nil - } - if originalClass != nil && ptr.Deref(originalClass.Spec.ExtendedResourceName, "") == ptr.Deref(modifiedClass.Spec.ExtendedResourceName, "") { - logger.V(5).Info("device class has identical extended resource name string", "pod", klog.KObj(pod), "deviceclass", klog.KObj(modifiedClass)) - return fwk.QueueSkip, nil - } - if originalClass == nil { + if originalClass != nil { + if ptr.Deref(originalClass.Spec.ExtendedResourceName, "") == ptr.Deref(modifiedClass.Spec.ExtendedResourceName, "") { + logger.V(5).Info("device class has identical extended resource name", "pod", klog.KObj(pod), "deviceclass", klog.KObj(modifiedClass)) + return fwk.QueueSkip, nil + } + } else { // only check implicit extended resource name for Add, as device class name does not change during Update. reqs := resource.PodRequests(pod, resource.PodResourcesOptions{}) if _, ok := reqs[v1.ResourceName(resourceapi.ResourceDeviceClassPrefix+modifiedClass.Name)]; ok { @@ -480,11 +477,11 @@ func (f *Fit) isSchedulableAfterDeviceClassEvent(logger klog.Logger, pod *v1.Pod if modifiedClass.Spec.ExtendedResourceName != nil { reqs := resource.PodRequests(pod, resource.PodResourcesOptions{}) if _, ok := reqs[v1.ResourceName(*modifiedClass.Spec.ExtendedResourceName)]; ok { - logger.V(5).Info("deivce class was created or updated, and may fit the pod's resoruce requests", "pod", klog.KObj(pod), "deviceclass", klog.KObj(modifiedClass)) + logger.V(5).Info("device class was created or updated, and may fit the pod's resoruce requests", "pod", klog.KObj(pod), "deviceclass", klog.KObj(modifiedClass)) return fwk.Queue, nil } } - logger.V(5).Info("updated deivce class extended resource name is either nil, or does not match pod's resource request", "pod", klog.KObj(pod), "deviceclass", klog.KObj(modifiedClass)) + logger.V(5).Info("created or updated deivce class extended resource name is either nil, or does not match pod's resource request", "pod", klog.KObj(pod), "deviceclass", klog.KObj(modifiedClass)) return fwk.QueueSkip, nil } diff --git a/pkg/scheduler/framework/plugins/noderesources/fit_test.go b/pkg/scheduler/framework/plugins/noderesources/fit_test.go index 0916d073f36..8088591815b 100644 --- a/pkg/scheduler/framework/plugins/noderesources/fit_test.go +++ b/pkg/scheduler/framework/plugins/noderesources/fit_test.go @@ -1768,7 +1768,7 @@ func Test_isSchedulableAfterDeviceClassChange(t *testing.T) { }, expectedHint: fwk.Queue, }, - "queue-on-class-add-with-implicit-extended-resource-name-not-matching": { + "skip-on-class-add-with-implicit-extended-resource-name-not-matching": { pod: newResourcePod(framework.Resource{ ScalarResources: map[v1.ResourceName]int64{"deviceclass.resource.kubernetes.io/gpuclass": 1}, }), @@ -1780,6 +1780,18 @@ func Test_isSchedulableAfterDeviceClassChange(t *testing.T) { }, expectedHint: fwk.QueueSkip, }, + "skip-on-class-add-with-explicit-extended-resource-name-not-matching": { + pod: newResourcePod(framework.Resource{ + ScalarResources: map[v1.ResourceName]int64{"example.com/othergpu": 1}, + }), + newObj: &resourceapi.DeviceClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "myclass", + }, + Spec: resourceapi.DeviceClassSpec{ExtendedResourceName: ptr.To("example.com/gpu")}, + }, + expectedHint: fwk.QueueSkip, + }, "skip-on-class-update-with-implicit-extended-resource-name": { pod: newResourcePod(framework.Resource{ ScalarResources: map[v1.ResourceName]int64{"deviceclass.resource.kubernetes.io/gpuclass": 1}, diff --git a/test/integration/scheduler/queueing/queue.go b/test/integration/scheduler/queueing/queue.go index 173b645d00d..f76987dde4c 100644 --- a/test/integration/scheduler/queueing/queue.go +++ b/test/integration/scheduler/queueing/queue.go @@ -463,15 +463,15 @@ var CoreResourceEnqueueTestCases = []*CoreResourceEnqueueTestCase{ EnableDRAExtendedResource: true, }, { - Name: "Pod rejected by the NodeResourcesFit plugin is requeued when created a DeviceClass having the extended resource matching pod's requests, and DRAExtendedResource is enabled", - EnablePlugins: []string{names.NodeResourcesFit, names.NodeAffinity}, + Name: "Pod rejected by the NodeResourcesFit plugin is requeued when created DeviceClass having the extended resource matching pod's requests, and DRAExtendedResource is enabled", + EnablePlugins: []string{names.NodeResourcesFit}, InitialNodes: []*v1.Node{ st.MakeNode().Name("fake-node1").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Obj(), - st.MakeNode().Name("fake-node2").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Label("group", "b").Obj(), }, Pods: []*v1.Pod{ // - Pod1 requests available amount of CPU (in fake-node1), but will be rejected by NodeAffinity plugin. Note that the NodeResourceFit plugin will register for QHints because it rejected fake-node2. st.MakePod().Name("pod1").Res(map[v1.ResourceName]string{v1.ResourceCPU: "4", "example.com/gpu": "1"}).NodeAffinityIn("group", []string{"b"}, st.NodeSelectorTypeMatchExpressions).Container("image").Obj(), + st.MakePod().Name("pod2").Res(map[v1.ResourceName]string{v1.ResourceCPU: "4", "example.com/othergpu": "1"}).NodeAffinityIn("group", []string{"b"}, st.NodeSelectorTypeMatchExpressions).Container("image").Obj(), }, TriggerFn: func(testCtx *testutils.TestContext) (map[fwk.ClusterEvent]uint64, error) { // Trigger a DeviceClass Create event that has the extended resource name that matches pod's resource request. @@ -486,7 +486,7 @@ var CoreResourceEnqueueTestCases = []*CoreResourceEnqueueTestCase{ EnableDRAExtendedResource: true, }, { - Name: "Pod rejected by the NodeResourcesFit plugin is requeued when a DeviceClass has the extended resource, and DRAExtendedResource is enabled", + Name: "Pod rejected by the NodeResourcesFit plugin is requeued when updated DeviceClass has the extended resource, and DRAExtendedResource is enabled", EnablePlugins: []string{names.NodeResourcesFit, names.NodeAffinity}, InitialDeviceClasses: []*resourceapi.DeviceClass{{ObjectMeta: metav1.ObjectMeta{Name: "fake-class"}, Spec: resourceapi.DeviceClassSpec{ExtendedResourceName: nil}}}, InitialNodes: []*v1.Node{ From 372328f281422df128e2e5c487ee907ab30e3ca1 Mon Sep 17 00:00:00 2001 From: yliao Date: Tue, 4 Nov 2025 16:43:40 +0000 Subject: [PATCH 6/7] reverted the inititial device class change --- test/integration/scheduler/queueing/queue.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/integration/scheduler/queueing/queue.go b/test/integration/scheduler/queueing/queue.go index f76987dde4c..d05a92a049b 100644 --- a/test/integration/scheduler/queueing/queue.go +++ b/test/integration/scheduler/queueing/queue.go @@ -2532,9 +2532,9 @@ func RunTestCoreResourceEnqueue(t *testing.T, tt *CoreResourceEnqueueTestCase) { } } - for _, class := range tt.InitialDeviceClasses { - if _, err := cs.ResourceV1().DeviceClasses().Create(ctx, class, metav1.CreateOptions{}); err != nil { - t.Fatalf("Failed to create an initial DeviceClass %q: %v", class.Name, err) + for _, deviceClass := range tt.InitialDeviceClasses { + if _, err := cs.ResourceV1().DeviceClasses().Create(ctx, deviceClass, metav1.CreateOptions{}); err != nil { + t.Fatalf("Failed to create a DeviceClass %q: %v", deviceClass.Name, err) } } From a181fd2eb81e628de09b8090bbf040f02d7f8801 Mon Sep 17 00:00:00 2001 From: yliao Date: Tue, 4 Nov 2025 18:47:37 +0000 Subject: [PATCH 7/7] removed NodeAffinity in the test cases as it is not needed --- test/integration/scheduler/queueing/queue.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/test/integration/scheduler/queueing/queue.go b/test/integration/scheduler/queueing/queue.go index d05a92a049b..1acd94a079a 100644 --- a/test/integration/scheduler/queueing/queue.go +++ b/test/integration/scheduler/queueing/queue.go @@ -469,9 +469,9 @@ var CoreResourceEnqueueTestCases = []*CoreResourceEnqueueTestCase{ st.MakeNode().Name("fake-node1").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Obj(), }, Pods: []*v1.Pod{ - // - Pod1 requests available amount of CPU (in fake-node1), but will be rejected by NodeAffinity plugin. Note that the NodeResourceFit plugin will register for QHints because it rejected fake-node2. - st.MakePod().Name("pod1").Res(map[v1.ResourceName]string{v1.ResourceCPU: "4", "example.com/gpu": "1"}).NodeAffinityIn("group", []string{"b"}, st.NodeSelectorTypeMatchExpressions).Container("image").Obj(), - st.MakePod().Name("pod2").Res(map[v1.ResourceName]string{v1.ResourceCPU: "4", "example.com/othergpu": "1"}).NodeAffinityIn("group", []string{"b"}, st.NodeSelectorTypeMatchExpressions).Container("image").Obj(), + // - Pod1 requests available amount of CPU (in fake-node1), but will be rejected due to lack of extended resource exampe.com/gpu. + st.MakePod().Name("pod1").Res(map[v1.ResourceName]string{v1.ResourceCPU: "4", "example.com/gpu": "1"}).Container("image").Obj(), + st.MakePod().Name("pod2").Res(map[v1.ResourceName]string{v1.ResourceCPU: "4", "example.com/othergpu": "1"}).Container("image").Obj(), }, TriggerFn: func(testCtx *testutils.TestContext) (map[fwk.ClusterEvent]uint64, error) { // Trigger a DeviceClass Create event that has the extended resource name that matches pod's resource request. @@ -487,16 +487,15 @@ var CoreResourceEnqueueTestCases = []*CoreResourceEnqueueTestCase{ }, { Name: "Pod rejected by the NodeResourcesFit plugin is requeued when updated DeviceClass has the extended resource, and DRAExtendedResource is enabled", - EnablePlugins: []string{names.NodeResourcesFit, names.NodeAffinity}, + EnablePlugins: []string{names.NodeResourcesFit}, InitialDeviceClasses: []*resourceapi.DeviceClass{{ObjectMeta: metav1.ObjectMeta{Name: "fake-class"}, Spec: resourceapi.DeviceClassSpec{ExtendedResourceName: nil}}}, InitialNodes: []*v1.Node{ st.MakeNode().Name("fake-node1").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Obj(), - st.MakeNode().Name("fake-node2").Capacity(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Label("group", "b").Obj(), }, Pods: []*v1.Pod{ - // - Pod1 requests available amount of CPU (in fake-node1), but will be rejected by NodeAffinity plugin. Note that the NodeResourceFit plugin will register for QHints because it rejected fake-node2. - st.MakePod().Name("pod1").Res(map[v1.ResourceName]string{v1.ResourceCPU: "4", "example.com/gpu": "1"}).NodeAffinityIn("group", []string{"b"}, st.NodeSelectorTypeMatchExpressions).Container("image").Obj(), - st.MakePod().Name("pod2").Res(map[v1.ResourceName]string{v1.ResourceCPU: "4", "example.com/othergpu": "1"}).NodeAffinityIn("group", []string{"b"}, st.NodeSelectorTypeMatchExpressions).Container("image").Obj(), + // - Pod1 requests available amount of CPU (in fake-node1), but will be rejected due to lack of extended resource example.com/gpu. + st.MakePod().Name("pod1").Res(map[v1.ResourceName]string{v1.ResourceCPU: "4", "example.com/gpu": "1"}).Container("image").Obj(), + st.MakePod().Name("pod2").Res(map[v1.ResourceName]string{v1.ResourceCPU: "4", "example.com/othergpu": "1"}).Container("image").Obj(), }, TriggerFn: func(testCtx *testutils.TestContext) (map[fwk.ClusterEvent]uint64, error) { // Trigger a DeviceClass Update event that adds the extended resource name that matches pod's resource request.