Merge pull request #139056 from iomarsayed/add-performance-tests-for-tas

Add performance tests for TAS
2026-05-28 04:04:39 -04:00 · 2026-05-18 16:53:58 +05:30 · 2026-05-18 16:53:58 +05:30 · 8993230c71
commit 8993230c71
parent 1619326c6a 430306b3d5
10 changed files with 226 additions and 34 deletions
--- a/test/integration/scheduler_perf/executor.go
+++ b/test/integration/scheduler_perf/executor.go
@ -265,7 +265,14 @@ func (e *WorkloadExecutor) runCreatePodsOp(tCtx ktesting.TContext, opIndex int,
 			return err
 		}
 	default:
-		if err := waitUntilPodsScheduledInNamespace(tCtx, e.podInformer, nil, namespace, op.Count); err != nil {
+		// Default timeout is 10 minutes because even at the lowest observed QPS of ~10 pods/sec,
+		// a standard 5000-node test completes. Heavy test suites (e.g. TAS) can configure a custom
+		// podsSchedulingTimeout option to avoid meeting this strict default ceiling.
+		timeout := 10 * time.Minute
+		if e.opts != nil && e.opts.podsSchedulingTimeout > 0 {
+			timeout = e.opts.podsSchedulingTimeout
+		}
+		if err := waitUntilPodsScheduledInNamespace(tCtx, e.podInformer, nil, namespace, op.Count, timeout); err != nil {
 			return fmt.Errorf("error in waiting for pods to get scheduled: %w", err)
 		}
 	}
@ -809,7 +816,7 @@ func waitUntilPodsScheduled(tCtx ktesting.TContext, podInformer coreinformers.Po
 		if !ok {
 			return fmt.Errorf("unknown namespace %s", namespace)
 		}
-		if err := waitUntilPodsScheduledInNamespace(tCtx, podInformer, labelSelector, namespace, wantCount); err != nil {
+		if err := waitUntilPodsScheduledInNamespace(tCtx, podInformer, labelSelector, namespace, wantCount, 10*time.Minute); err != nil {
 			return fmt.Errorf("error waiting for pods in namespace %q: %w", namespace, err)
 		}
 	}
@ -900,12 +907,14 @@ func getNodePreparer(prefix string, cno *createNodesOp, clientset clientset.Inte
 }

 // waitUntilPodsScheduledInNamespace blocks until all pods in the given
-// namespace are scheduled. Times out after 10 minutes because even at the
+// namespace are scheduled. Times out after 10 minutes by default because even at the
 // lowest observed QPS of ~10 pods/sec, a 5000-node test should complete.
-func waitUntilPodsScheduledInNamespace(tCtx ktesting.TContext, podInformer coreinformers.PodInformer, labelSelector map[string]string, namespace string, wantCount int) error {
+// Complex test suites (e.g. TAS where each pod gets scheduled multiple times for placements)
+// may override this timeout via schedulerPerfOptions.
+func waitUntilPodsScheduledInNamespace(tCtx ktesting.TContext, podInformer coreinformers.PodInformer, labelSelector map[string]string, namespace string, wantCount int, timeout time.Duration) error {
 	var pendingPod *v1.Pod

-	err := wait.PollUntilContextTimeout(tCtx, 1*time.Second, 10*time.Minute, true, func(ctx context.Context) (bool, error) {
+	err := wait.PollUntilContextTimeout(tCtx, 1*time.Second, timeout, true, func(ctx context.Context) (bool, error) {
 		select {
 		case <-ctx.Done():
 			return true, ctx.Err()
--- a/test/integration/scheduler_perf/gangscheduling/performance-config.yaml
+++ b/test/integration/scheduler_perf/gangscheduling/performance-config.yaml
@ -20,6 +20,8 @@
    countParam: $initPodGroups
    namespace: gang-0
    templatePath: templates/podgroup.yaml
+    templateParams:
+      podsPerGroup: $podsPerGroup
  - opcode: waitForPodGroups
    # Wait for the scheduler's informer cache to reflect the newly created PodGroup objects.
    namespace: gang-0
@ -48,46 +50,37 @@
      podsPerGroup: 3
  - name: 5000Nodes_1000Gangs_3000Pods
    labels: [performance]
-    # https://perf-dash.k8s.io/#/?jobname=scheduler-perf-benchmark&metriccategoryname=Scheduler&metricname=BenchmarkPerfScheduling&Metric=scheduler_podgroup_scheduling_attempt_duration_seconds&Name=BenchmarkPerfScheduling%2FGangScheduling%2F5000Nodes_1000Gangs_3000Pods%2Ftest&event=not%20applicable&extension_point=not%20applicable&plugin=not%20applicable&result=not%20applicable
-    # Measured scheduler_podgroup_scheduling_attempt_duration_seconds/Average ~3.7 ms; threshold set conservatively at 8.
-    threshold: 8
-    thresholdMetricSelector:
-      name: scheduler_podgroup_scheduling_attempt_duration_seconds
-      labels:
-        result: scheduled
-      dataBucket: Average
-      expectLower: true
    params:
      initNodes: 5000
      initPodGroups: 1000
      podsPerGroup: 3
  - name: 5000Nodes_2000Gangs_6000Pods
    labels: [performance]
-    # https://perf-dash.k8s.io/#/?jobname=scheduler-perf-benchmark&metriccategoryname=Scheduler&metricname=BenchmarkPerfScheduling&Metric=scheduler_podgroup_scheduling_attempt_duration_seconds&Name=BenchmarkPerfScheduling%2FGangScheduling%2F5000Nodes_2000Gangs_6000Pods%2Ftest&event=not%20applicable&extension_point=not%20applicable&plugin=not%20applicable&result=not%20applicable
-    # Measured scheduler_podgroup_scheduling_attempt_duration_seconds/Average ~5.0 ms; threshold set conservatively at 10.
-    threshold: 10
-    thresholdMetricSelector:
-      name: scheduler_podgroup_scheduling_attempt_duration_seconds
-      labels:
-        result: scheduled
-      dataBucket: Average
-      expectLower: true
    params:
      initNodes: 5000
      initPodGroups: 2000
      podsPerGroup: 3
  - name: 5000Nodes_3000Gangs_9000Pods
    labels: [performance]
-    # https://perf-dash.k8s.io/#/?jobname=scheduler-perf-benchmark&metriccategoryname=Scheduler&metricname=BenchmarkPerfScheduling&Metric=scheduler_podgroup_scheduling_attempt_duration_seconds&Name=BenchmarkPerfScheduling%2FGangScheduling%2F5000Nodes_3000Gangs_9000Pods%2Ftest&event=not%20applicable&extension_point=not%20applicable&plugin=not%20applicable&result=not%20applicable
-    # Measured scheduler_podgroup_scheduling_attempt_duration_seconds/Average ~5.7 ms; threshold set conservatively at 12.
-    threshold: 12
-    thresholdMetricSelector:
-      name: scheduler_podgroup_scheduling_attempt_duration_seconds
-      labels:
-        result: scheduled
-      dataBucket: Average
-      expectLower: true
    params:
      initNodes: 5000
      initPodGroups: 3000
      podsPerGroup: 3
+  - name: 5000Nodes_3Gangs_3000Pods_1000PerGroup
+    labels: [performance]
+    params:
+      initNodes: 5000
+      initPodGroups: 3
+      podsPerGroup: 1000
+  - name: 5000Nodes_6Gangs_6000Pods_1000PerGroup
+    labels: [performance]
+    params:
+      initNodes: 5000
+      initPodGroups: 6
+      podsPerGroup: 1000
+  - name: 5000Nodes_9Gangs_9000Pods_1000PerGroup
+    labels: [performance]
+    params:
+      initNodes: 5000
+      initPodGroups: 9
+      podsPerGroup: 1000
--- a/test/integration/scheduler_perf/gangscheduling/templates/gang-pod.yaml
+++ b/test/integration/scheduler_perf/gangscheduling/templates/gang-pod.yaml
@ -4,7 +4,6 @@ metadata:
  name: test-gang-scheduling-{{.Index}}
 spec:
  schedulingGroup:
-    # Three pods share the same pod group.
    podGroupName: gang-{{DivideInt .Index .podsPerGroup}}
  containers:
  - image: registry.k8s.io/pause:3.10.1
--- a/test/integration/scheduler_perf/gangscheduling/templates/podgroup.yaml
+++ b/test/integration/scheduler_perf/gangscheduling/templates/podgroup.yaml
@ -5,4 +5,4 @@ metadata:
 spec:
  schedulingPolicy:
    gang:
-      minCount: 3
+      minCount: {{.podsPerGroup}}
--- a/test/integration/scheduler_perf/options.go
+++ b/test/integration/scheduler_perf/options.go
@ -17,6 +17,8 @@ limitations under the License.
 package benchmark

 import (
+	"time"
+
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/kubernetes/pkg/scheduler"
 	frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime"
@ -42,6 +44,7 @@ type schedulerPerfOptions struct {
 	preRunFn                PreRunFn
 	prepareFn               HookFn
 	nodeUpdateFn            NodeUpdateFn
+	podsSchedulingTimeout   time.Duration
 }

 // WithPrepareFn is the option to set a function that is called
@ -69,3 +72,11 @@ func WithPreRunFn(preRunFn PreRunFn) SchedulerPerfOption {
 		s.preRunFn = preRunFn
 	}
 }
+
+// WithPodsSchedulingTimeout is the option to set a custom timeout
+// specifically for waiting for pods to be scheduled.
+func WithPodsSchedulingTimeout(timeout time.Duration) SchedulerPerfOption {
+	return func(s *schedulerPerfOptions) {
+		s.podsSchedulingTimeout = timeout
+	}
+}
--- a/test/integration/scheduler_perf/tas/performance-config.yaml
+++ b/test/integration/scheduler_perf/tas/performance-config.yaml
@ -0,0 +1,90 @@
+# The following labels are used in this file:
+#
+# - integration-test: test cases to run as the integration test.
+# - performance: test cases to run in the performance test.
+# - short: supplemental label for the above two labels (must not used alone), which literally means short execution time test cases.
+
+- name: TopologyAwareScheduling
+  featureGates:
+    GenericWorkload: true
+    GangScheduling: true
+    TopologyAwareWorkloadScheduling: true
+  workloadTemplate:
+  - opcode: createNodes
+    countParam: $initNodes
+    nodeTemplatePath: templates/node.yaml
+  - opcode: createNamespaces
+    prefix: tas
+    count: 1
+  - opcode: createAny
+    # Create pod groups (gangs), each has a min count policy and topology constraint specified in pod group template.
+    # Each pod group is named gang-0, gang-1, ... gang-(n-1).
+    countParam: $initPodGroups
+    namespace: tas-0
+    templatePath: templates/podgroup.yaml
+    templateParams:
+      podsPerGroup: $podsPerGroup
+  - opcode: waitForPodGroups
+    # Wait for the scheduler's informer cache to reflect the newly created PodGroup objects.
+    namespace: tas-0
+    countParam: $initPodGroups
+  - opcode: createPods
+    # Create pods with reference to the pod groups (gangs) according to their indices (e.g., pods 0-2 → gang-0, pods 3-5 → gang-1, etc.).
+    countParam: $initPodGroups
+    countMultiplierParam: $podsPerGroup
+    namespace: tas-0
+    podTemplatePath: templates/gang-pod.yaml
+    collectMetrics: true
+    templateParams:
+      podsPerGroup: $podsPerGroup
+  workloads:
+  - name: 10Nodes_3Gangs
+    labels: [integration-test, short]
+    params:
+      initNodes: 10
+      initPodGroups: 3
+      podsPerGroup: 3
+  - name: 100Nodes_10Gangs
+    labels: [integration-test]
+    params:
+      initNodes: 100
+      initPodGroups: 10
+      podsPerGroup: 3
+  - name: 5000Nodes_750Gangs_3000Pods
+    labels: [performance]
+    params:
+      initNodes: 5000
+      initPodGroups: 750
+      podsPerGroup: 4
+  - name: 5000Nodes_1500Gangs_6000Pods
+    labels: [performance]
+    params:
+      initNodes: 5000
+      initPodGroups: 1500
+      podsPerGroup: 4
+  - name: 5000Nodes_2250Gangs_9000Pods
+    labels: [performance]
+    params:
+      initNodes: 5000
+      initPodGroups: 2250
+      podsPerGroup: 4
+  - name: 5000Nodes_3Gangs_3000Pods_1000PerGroup
+    labels: [performance]
+    params:
+      initNodes: 5000
+      initPodGroups: 3
+      podsPerGroup: 1000
+  - name: 5000Nodes_6Gangs_6000Pods_1000PerGroup
+    labels: [performance]
+    params:
+      initNodes: 5000
+      initPodGroups: 6
+      podsPerGroup: 1000
+  - name: 5000Nodes_9Gangs_9000Pods_1000PerGroup
+    labels: [performance]
+    params:
+      initNodes: 5000
+      initPodGroups: 9
+      podsPerGroup: 1000
+
+
--- a/test/integration/scheduler_perf/tas/tas_test.go
+++ b/test/integration/scheduler_perf/tas/tas_test.go
@ -0,0 +1,44 @@
+/*
+Copyright The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package tas
+
+import (
+	"fmt"
+	"os"
+	"testing"
+	"time"
+
+	_ "k8s.io/component-base/logs/json/register"
+	perf "k8s.io/kubernetes/test/integration/scheduler_perf"
+)
+
+func TestMain(m *testing.M) {
+	if err := perf.InitTests(); err != nil {
+		fmt.Fprintf(os.Stderr, "%v\n", err)
+		os.Exit(1)
+	}
+
+	m.Run()
+}
+
+func TestSchedulerPerf(t *testing.T) {
+	perf.RunIntegrationPerfScheduling(t, "performance-config.yaml", perf.WithPodsSchedulingTimeout(20*time.Minute))
+}
+
+func BenchmarkPerfScheduling(b *testing.B) {
+	perf.RunBenchmarkPerfScheduling(b, "performance-config.yaml", "tas", nil, perf.WithPodsSchedulingTimeout(20*time.Minute))
+}
--- a/test/integration/scheduler_perf/tas/templates/gang-pod.yaml
+++ b/test/integration/scheduler_perf/tas/templates/gang-pod.yaml
@ -0,0 +1,14 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: test-tas-scheduling-{{.Index}}
+spec:
+  schedulingGroup:
+    podGroupName: gang-{{DivideInt .Index .podsPerGroup}}
+  containers:
+  - image: registry.k8s.io/pause:3.10.1
+    name: pause
+    resources:
+      requests:
+        cpu: 100m
+        memory: 100Mi
--- a/test/integration/scheduler_perf/tas/templates/node.yaml
+++ b/test/integration/scheduler_perf/tas/templates/node.yaml
@ -0,0 +1,21 @@
+apiVersion: v1
+kind: Node
+metadata:
+  name: node-{{.Index}}
+  labels:
+    kubernetes.io/hostname: node-{{.Index}}
+    topology.kubernetes.io/zone: zone-{{DivideInt .Index 100}}
+    topology.kubernetes.io/rack: rack-{{DivideInt .Index 100}}
+status:
+  capacity:
+    cpu: "4"
+    memory: 32Gi
+    pods: "110"
+  allocatable:
+    cpu: "4"
+    memory: 32Gi
+    pods: "110"
+  phase: Running
+  conditions:
+  - type: Ready
+    status: "True"
--- a/test/integration/scheduler_perf/tas/templates/podgroup.yaml
+++ b/test/integration/scheduler_perf/tas/templates/podgroup.yaml
@ -0,0 +1,11 @@
+apiVersion: scheduling.k8s.io/v1alpha2
+kind: PodGroup
+metadata:
+  name: gang-{{.Index}}
+spec:
+  schedulingPolicy:
+    gang:
+      minCount: {{.podsPerGroup}}
+  schedulingConstraints:
+    topology:
+    - key: topology.kubernetes.io/rack