mirror of
https://github.com/kubernetes/kubernetes.git
synced 2026-05-28 04:04:39 -04:00
Promote scheduler metrics to beta
This commit is contained in:
parent
9cfdbc0d6e
commit
d66ae77959
4 changed files with 208 additions and 4 deletions
|
|
@ -18,7 +18,12 @@ package parallelize
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"k8s.io/component-base/metrics/testutil"
|
||||
"k8s.io/klog/v2/ktesting"
|
||||
"k8s.io/kubernetes/pkg/scheduler/metrics"
|
||||
)
|
||||
|
||||
func TestChunkSize(t *testing.T) {
|
||||
|
|
@ -52,3 +57,36 @@ func TestChunkSize(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGoroutinesMetric(t *testing.T) {
|
||||
metrics.Register()
|
||||
metrics.Goroutines.Reset()
|
||||
|
||||
const (
|
||||
operation = "test-operation"
|
||||
pieces = 32
|
||||
)
|
||||
|
||||
var (
|
||||
mu sync.Mutex
|
||||
peakValue float64
|
||||
)
|
||||
|
||||
_, ctx := ktesting.NewTestContext(t)
|
||||
p := NewParallelizer(DefaultParallelism)
|
||||
p.Until(ctx, pieces, func(_ int) {
|
||||
val, err := testutil.GetGaugeMetricValue(metrics.Goroutines.WithLabelValues(operation))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to read goroutines metric inside Until: %v", err)
|
||||
}
|
||||
mu.Lock()
|
||||
if val > peakValue {
|
||||
peakValue = val
|
||||
}
|
||||
mu.Unlock()
|
||||
}, operation)
|
||||
|
||||
if peakValue <= 0 {
|
||||
t.Errorf("expected goroutines metric to be >0 during Until, peak was %v", peakValue)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4205,3 +4205,119 @@ func TestRunPlacementScorePlugins(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPluginEvaluationTotalMetric(t *testing.T) {
|
||||
_, ctx := ktesting.NewTestContext(t)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
metrics.PluginEvaluationTotal.Reset()
|
||||
|
||||
registry := Registry{}
|
||||
|
||||
const (
|
||||
preFilterPluginName = "plugin-eval-prefilter"
|
||||
filterPluginNameA = "plugin-eval-filter-a"
|
||||
filterPluginNameB = "plugin-eval-filter-b"
|
||||
preScorePluginName = "plugin-eval-prescore"
|
||||
scorePluginName = "plugin-eval-score"
|
||||
profileName2 = "test-profile-2"
|
||||
)
|
||||
|
||||
preFilterPl := &TestPlugin{name: preFilterPluginName, inj: injectedResult{PreFilterStatus: int(fwk.Success)}}
|
||||
if err := registry.Register(preFilterPluginName, func(_ context.Context, _ runtime.Object, _ fwk.Handle) (fwk.Plugin, error) {
|
||||
return preFilterPl, nil
|
||||
}); err != nil {
|
||||
t.Fatalf("failed to register prefilter plugin %q: %v", preFilterPluginName, err)
|
||||
}
|
||||
|
||||
filterPlA := &TestPlugin{name: filterPluginNameA, inj: injectedResult{FilterStatus: int(fwk.Success)}}
|
||||
if err := registry.Register(filterPluginNameA, func(_ context.Context, _ runtime.Object, _ fwk.Handle) (fwk.Plugin, error) {
|
||||
return filterPlA, nil
|
||||
}); err != nil {
|
||||
t.Fatalf("failed to register filter plugin %q: %v", filterPluginNameA, err)
|
||||
}
|
||||
|
||||
filterPlB := &TestPlugin{name: filterPluginNameB, inj: injectedResult{FilterStatus: int(fwk.Success)}}
|
||||
if err := registry.Register(filterPluginNameB, func(_ context.Context, _ runtime.Object, _ fwk.Handle) (fwk.Plugin, error) {
|
||||
return filterPlB, nil
|
||||
}); err != nil {
|
||||
t.Fatalf("failed to register filter plugin %q: %v", filterPluginNameB, err)
|
||||
}
|
||||
|
||||
preScorePl := &TestPlugin{name: preScorePluginName, inj: injectedResult{PreScoreStatus: int(fwk.Success)}}
|
||||
if err := registry.Register(preScorePluginName, func(_ context.Context, _ runtime.Object, _ fwk.Handle) (fwk.Plugin, error) {
|
||||
return preScorePl, nil
|
||||
}); err != nil {
|
||||
t.Fatalf("failed to register prescore plugin %q: %v", preScorePluginName, err)
|
||||
}
|
||||
|
||||
scorePl := &TestPlugin{name: scorePluginName, inj: injectedResult{}}
|
||||
if err := registry.Register(scorePluginName, func(_ context.Context, _ runtime.Object, _ fwk.Handle) (fwk.Plugin, error) {
|
||||
return scorePl, nil
|
||||
}); err != nil {
|
||||
t.Fatalf("failed to register score plugin %q: %v", scorePluginName, err)
|
||||
}
|
||||
|
||||
// Profile 1: exercise PreFilter, Filter, PreScore and Score extension points.
|
||||
cfgPls1 := &config.Plugins{}
|
||||
cfgPls1.PreFilter.Enabled = append(cfgPls1.PreFilter.Enabled, config.Plugin{Name: preFilterPluginName})
|
||||
cfgPls1.Filter.Enabled = append(cfgPls1.Filter.Enabled, config.Plugin{Name: filterPluginNameA})
|
||||
cfgPls1.PreScore.Enabled = append(cfgPls1.PreScore.Enabled, config.Plugin{Name: preScorePluginName})
|
||||
cfgPls1.Score.Enabled = append(cfgPls1.Score.Enabled, config.Plugin{Name: scorePluginName})
|
||||
profile1 := config.KubeSchedulerProfile{
|
||||
SchedulerName: testProfileName,
|
||||
Plugins: cfgPls1,
|
||||
}
|
||||
|
||||
f1, err := newFrameworkWithQueueSortAndBind(ctx, registry, profile1, WithSnapshotSharedLister(cache.NewEmptySnapshot()))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create framework (profile=%q): %v", testProfileName, err)
|
||||
}
|
||||
defer func() { _ = f1.Close() }()
|
||||
|
||||
state1 := framework.NewCycleState()
|
||||
if _, st, _ := f1.RunPreFilterPlugins(ctx, state1, pod); st != nil && !st.IsSuccess() {
|
||||
t.Fatalf("RunPreFilterPlugins returned unexpected status: %v", st)
|
||||
}
|
||||
if st := f1.RunFilterPlugins(ctx, state1, pod, nil); st != nil && !st.IsSuccess() {
|
||||
t.Fatalf("RunFilterPlugins returned unexpected status: %v", st)
|
||||
}
|
||||
if st := f1.RunPreScorePlugins(ctx, state1, pod, nil); st != nil && !st.IsSuccess() {
|
||||
t.Fatalf("RunPreScorePlugins returned unexpected status: %v", st)
|
||||
}
|
||||
if _, st := f1.RunScorePlugins(ctx, state1, pod, BuildNodeInfos(nodes)); st != nil && !st.IsSuccess() {
|
||||
t.Fatalf("RunScorePlugins returned unexpected status: %v", st)
|
||||
}
|
||||
|
||||
// Profile 2: exercise a different plugin and profile label on Filter.
|
||||
cfgPls2 := &config.Plugins{}
|
||||
cfgPls2.Filter.Enabled = append(cfgPls2.Filter.Enabled, config.Plugin{Name: filterPluginNameB})
|
||||
profile2 := config.KubeSchedulerProfile{
|
||||
SchedulerName: profileName2,
|
||||
Plugins: cfgPls2,
|
||||
}
|
||||
|
||||
f2, err := newFrameworkWithQueueSortAndBind(ctx, registry, profile2, WithSnapshotSharedLister(cache.NewEmptySnapshot()))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create framework (profile=%q): %v", profileName2, err)
|
||||
}
|
||||
defer func() { _ = f2.Close() }()
|
||||
|
||||
state2 := framework.NewCycleState()
|
||||
if st := f2.RunFilterPlugins(ctx, state2, pod, nil); st != nil && !st.IsSuccess() {
|
||||
t.Fatalf("RunFilterPlugins returned unexpected status: %v", st)
|
||||
}
|
||||
|
||||
want := `# HELP scheduler_plugin_evaluation_total Number of attempts to schedule pods by each plugin and the extension point (available only in PreFilter, Filter, PreScore, and Score).
|
||||
# TYPE scheduler_plugin_evaluation_total counter
|
||||
scheduler_plugin_evaluation_total{extension_point="Filter",plugin="plugin-eval-filter-a",profile="test-profile"} 1
|
||||
scheduler_plugin_evaluation_total{extension_point="Filter",plugin="plugin-eval-filter-b",profile="test-profile-2"} 1
|
||||
scheduler_plugin_evaluation_total{extension_point="PreFilter",plugin="plugin-eval-prefilter",profile="test-profile"} 1
|
||||
scheduler_plugin_evaluation_total{extension_point="PreScore",plugin="plugin-eval-prescore",profile="test-profile"} 1
|
||||
scheduler_plugin_evaluation_total{extension_point="Score",plugin="plugin-eval-score",profile="test-profile"} 1
|
||||
`
|
||||
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(want), metrics.PluginEvaluationTotal.Name); err != nil {
|
||||
t.Fatalf("unexpected plugin_evaluation_total metric output:\n%v", err)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -290,7 +290,7 @@ func InitMetrics() {
|
|||
Subsystem: SchedulerSubsystem,
|
||||
Name: "goroutines",
|
||||
Help: "Number of running goroutines split by the work they do such as binding.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
StabilityLevel: metrics.BETA,
|
||||
}, []string{"operation"})
|
||||
BatchAttemptStats = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
|
|
@ -385,7 +385,7 @@ func InitMetrics() {
|
|||
Name: "permit_wait_duration_seconds",
|
||||
Help: "Duration of waiting on permit.",
|
||||
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
StabilityLevel: metrics.BETA,
|
||||
},
|
||||
[]string{"result"})
|
||||
|
||||
|
|
@ -402,7 +402,7 @@ func InitMetrics() {
|
|||
Subsystem: SchedulerSubsystem,
|
||||
Name: "unschedulable_pods",
|
||||
Help: "The number of unschedulable pods broken down by plugin name. A pod will increment the gauge for all plugins that caused it to not schedule and so this metric have meaning only when broken down by plugin.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
StabilityLevel: metrics.BETA,
|
||||
}, []string{"plugin", "profile"})
|
||||
|
||||
PluginEvaluationTotal = metrics.NewCounterVec(
|
||||
|
|
@ -410,7 +410,7 @@ func InitMetrics() {
|
|||
Subsystem: SchedulerSubsystem,
|
||||
Name: "plugin_evaluation_total",
|
||||
Help: "Number of attempts to schedule pods by each plugin and the extension point (available only in PreFilter, Filter, PreScore, and Score).",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
StabilityLevel: metrics.BETA,
|
||||
}, []string{"plugin", "extension_point", "profile"})
|
||||
|
||||
PreemptionGoroutinesDuration = metrics.NewHistogramVec(
|
||||
|
|
|
|||
|
|
@ -422,6 +422,46 @@
|
|||
labels:
|
||||
- manager
|
||||
- name
|
||||
- name: goroutines
|
||||
subsystem: scheduler
|
||||
help: Number of running goroutines split by the work they do such as binding.
|
||||
type: Gauge
|
||||
stabilityLevel: BETA
|
||||
labels:
|
||||
- operation
|
||||
- name: permit_wait_duration_seconds
|
||||
subsystem: scheduler
|
||||
help: Duration of waiting on permit.
|
||||
type: Histogram
|
||||
stabilityLevel: BETA
|
||||
labels:
|
||||
- result
|
||||
buckets:
|
||||
- 0.001
|
||||
- 0.002
|
||||
- 0.004
|
||||
- 0.008
|
||||
- 0.016
|
||||
- 0.032
|
||||
- 0.064
|
||||
- 0.128
|
||||
- 0.256
|
||||
- 0.512
|
||||
- 1.024
|
||||
- 2.048
|
||||
- 4.096
|
||||
- 8.192
|
||||
- 16.384
|
||||
- name: plugin_evaluation_total
|
||||
subsystem: scheduler
|
||||
help: Number of attempts to schedule pods by each plugin and the extension point
|
||||
(available only in PreFilter, Filter, PreScore, and Score).
|
||||
type: Counter
|
||||
stabilityLevel: BETA
|
||||
labels:
|
||||
- extension_point
|
||||
- plugin
|
||||
- profile
|
||||
- name: pod_scheduling_sli_duration_seconds
|
||||
subsystem: scheduler
|
||||
help: E2e latency for a pod being scheduled, from the time the pod enters the scheduling
|
||||
|
|
@ -451,6 +491,16 @@
|
|||
- 1310.72
|
||||
- 2621.44
|
||||
- 5242.88
|
||||
- name: unschedulable_pods
|
||||
subsystem: scheduler
|
||||
help: The number of unschedulable pods broken down by plugin name. A pod will increment
|
||||
the gauge for all plugins that caused it to not schedule and so this metric have
|
||||
meaning only when broken down by plugin.
|
||||
type: Gauge
|
||||
stabilityLevel: BETA
|
||||
labels:
|
||||
- plugin
|
||||
- profile
|
||||
- name: adds_total
|
||||
subsystem: workqueue
|
||||
help: Total number of adds handled by workqueue
|
||||
|
|
|
|||
Loading…
Reference in a new issue