mirror of
https://github.com/kubernetes/kubernetes.git
synced 2026-05-28 04:04:39 -04:00
Merge pull request #137284 from ttsuuubasa/dra-dbc-metrics
DRA Device Binding Conditions: add metrics for prebind flow
This commit is contained in:
commit
03d1c996c7
3 changed files with 288 additions and 7 deletions
|
|
@ -50,6 +50,7 @@ import (
|
|||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
||||
schedmetrics "k8s.io/kubernetes/pkg/scheduler/metrics"
|
||||
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
|
||||
"k8s.io/utils/ptr"
|
||||
)
|
||||
|
|
@ -146,6 +147,11 @@ type DynamicResources struct {
|
|||
draManager fwk.SharedDRAManager
|
||||
}
|
||||
|
||||
var (
|
||||
ErrDeviceBindingTimeout = errors.New("device binding timeout")
|
||||
ErrDeviceBindingFailed = errors.New("device binding failed")
|
||||
)
|
||||
|
||||
// New initializes a new plugin and returns it.
|
||||
func New(ctx context.Context, plArgs runtime.Object, fh fwk.Handle, fts feature.Features) (fwk.Plugin, error) {
|
||||
if !fts.EnableDynamicResourceAllocation {
|
||||
|
|
@ -1095,18 +1101,61 @@ func (pl *DynamicResources) PreBind(ctx context.Context, cs fwk.CycleState, pod
|
|||
|
||||
// We need to wait for the device to be attached to the node.
|
||||
pl.fh.EventRecorder().Eventf(pod, nil, v1.EventTypeNormal, "BindingConditionsPending", "Scheduling", "waiting for binding conditions for device on node %s", nodeName)
|
||||
// START: Record start time for metrics duration calculation
|
||||
start := time.Now()
|
||||
err = wait.PollUntilContextTimeout(ctx, 5*time.Second, pl.bindingTimeout, true,
|
||||
func(ctx context.Context) (bool, error) {
|
||||
return pl.isPodReadyForBinding(state)
|
||||
})
|
||||
if err != nil {
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
err = errors.New("device binding timeout")
|
||||
// GOAL: Calculate duration since start for metrics recording
|
||||
duration := time.Since(start)
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
err = fmt.Errorf("%w: %w", ErrDeviceBindingTimeout, err)
|
||||
}
|
||||
|
||||
// Determine status label based on error
|
||||
statusLabel := schedmetrics.BindingConditionsStatusSuccess
|
||||
switch {
|
||||
case err == nil:
|
||||
logger.V(5).Info("BindingConditions is met",
|
||||
"pod", klog.KObj(pod),
|
||||
"node", nodeName,
|
||||
"devices", bindingConditionsStatusFormatter{state, statusLabel},
|
||||
)
|
||||
// keep success
|
||||
case errors.Is(err, ErrDeviceBindingTimeout):
|
||||
statusLabel = schedmetrics.BindingConditionsStatusTimeout
|
||||
case errors.Is(err, ErrDeviceBindingFailed):
|
||||
statusLabel = schedmetrics.BindingConditionsStatusFailed
|
||||
default:
|
||||
statusLabel = schedmetrics.BindingConditionsStatusError
|
||||
}
|
||||
|
||||
// Record metrics for binding conditions outcome (success/failure/timeout/error)
|
||||
profileLabel := pl.fh.ProfileName()
|
||||
for _, claim := range state.claims.all() {
|
||||
if claim.Status.Allocation == nil {
|
||||
continue
|
||||
}
|
||||
for _, res := range claim.Status.Allocation.Devices.Results {
|
||||
schedmetrics.DRABindingConditionsPreBindDuration.
|
||||
WithLabelValues(profileLabel, res.Driver, statusLabel).
|
||||
Observe(duration.Seconds())
|
||||
}
|
||||
|
||||
// Count scheduling attempts that used devices with BindingConditions.
|
||||
for _, bcStatus := range listBindingConditionsStatus(claim) {
|
||||
schedmetrics.DRABindingConditionsAllocationsTotal.
|
||||
WithLabelValues(profileLabel, bcStatus.Driver, statusLabel).
|
||||
Inc()
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
// Returning an error here causes another scheduling attempt.
|
||||
// In that next attempt, PreFilter will detect the timeout or
|
||||
// error and try to recover.
|
||||
return statusError(logger, err)
|
||||
return statusError(logger, err, "devices", formatBCStatusOneLine(getBindingConditionsStatusForStatus(state, statusLabel)))
|
||||
}
|
||||
|
||||
// If we get here, we know that reserving the claim for
|
||||
|
|
@ -1307,7 +1356,8 @@ func (pl *DynamicResources) isClaimReadyForBinding(claim *resourceapi.ResourceCl
|
|||
for _, cond := range deviceRequest.BindingFailureConditions {
|
||||
failedCond := apimeta.FindStatusCondition(deviceStatus.Conditions, cond)
|
||||
if failedCond != nil && failedCond.Status == metav1.ConditionTrue {
|
||||
return false, fmt.Errorf("claim %s binding failed: reason=%s, message=%q",
|
||||
return false, fmt.Errorf("%w: claim=%s, reason=%s, message=%q",
|
||||
ErrDeviceBindingFailed,
|
||||
claim.Name,
|
||||
failedCond.Reason,
|
||||
failedCond.Message)
|
||||
|
|
@ -1363,7 +1413,7 @@ func (pl *DynamicResources) isPodReadyForBinding(state *stateData) (bool, error)
|
|||
}
|
||||
if !ready {
|
||||
if pl.isClaimTimeout(claim) {
|
||||
return false, fmt.Errorf("claim %s binding timeout", claim.Name)
|
||||
return false, fmt.Errorf("%w: claim=%s", ErrDeviceBindingTimeout, claim.Name)
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
|
@ -1422,3 +1472,130 @@ func getAllocatedDeviceStatus(claim *resourceapi.ResourceClaim, deviceRequest *r
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// bindingConditionsStatusFormatter implements fmt.Stringer for lazy evaluation of device status formatting.
|
||||
// This avoids expensive processing when logging is disabled.
|
||||
type bindingConditionsStatusFormatter struct {
|
||||
state *stateData
|
||||
statusLabel string
|
||||
}
|
||||
|
||||
func (d bindingConditionsStatusFormatter) String() string {
|
||||
return formatBCStatusOneLine(getBindingConditionsStatusForStatus(d.state, d.statusLabel))
|
||||
}
|
||||
|
||||
// BindingConditionsStatus is a compact per-device summary for BindingConditions logging.
|
||||
type BindingConditionsStatus struct {
|
||||
Driver string `json:"driver"`
|
||||
Pool string `json:"pool"`
|
||||
Device string `json:"device"`
|
||||
Pending []string `json:"pending,omitempty"`
|
||||
Failed []string `json:"failed,omitempty"` // only failure conditions that are true
|
||||
}
|
||||
|
||||
// listBindingConditionsStatus inspects ONE claim and returns a summary per allocated device
|
||||
// that has BindingConditions.
|
||||
// - Pending: BindingConditions that are not yet true (or all conditions if device status is missing)
|
||||
// - Failed: BindingFailureConditions that are true
|
||||
func listBindingConditionsStatus(claim *resourceapi.ResourceClaim) []BindingConditionsStatus {
|
||||
if claim == nil || claim.Status.Allocation == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var out []BindingConditionsStatus
|
||||
for _, res := range claim.Status.Allocation.Devices.Results {
|
||||
if len(res.BindingConditions) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
sum := BindingConditionsStatus{
|
||||
Driver: res.Driver,
|
||||
Pool: res.Pool,
|
||||
Device: res.Device,
|
||||
}
|
||||
|
||||
ds := getAllocatedDeviceStatus(claim, &res)
|
||||
if ds == nil {
|
||||
// No status yet => all BindingConditions are pending.
|
||||
sum.Pending = append(sum.Pending, res.BindingConditions...)
|
||||
out = append(out, sum)
|
||||
continue
|
||||
}
|
||||
|
||||
// Failed conditions that are true.
|
||||
for _, c := range res.BindingFailureConditions {
|
||||
if apimeta.IsStatusConditionTrue(ds.Conditions, c) {
|
||||
sum.Failed = append(sum.Failed, c)
|
||||
}
|
||||
}
|
||||
|
||||
// Pending conditions: those not yet true.
|
||||
for _, c := range res.BindingConditions {
|
||||
if !apimeta.IsStatusConditionTrue(ds.Conditions, c) {
|
||||
sum.Pending = append(sum.Pending, c)
|
||||
}
|
||||
}
|
||||
|
||||
out = append(out, sum)
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
// getBindingConditionsStatusForStatus selects which devices to log across ALL claims in the state.
|
||||
func getBindingConditionsStatusForStatus(state *stateData, statusLabel string) []BindingConditionsStatus {
|
||||
var out []BindingConditionsStatus
|
||||
for _, claim := range state.claims.all() {
|
||||
for _, s := range listBindingConditionsStatus(claim) {
|
||||
switch statusLabel {
|
||||
case schedmetrics.BindingConditionsStatusSuccess:
|
||||
// success: show all BC devices
|
||||
if len(s.Failed) == 0 && len(s.Pending) == 0 {
|
||||
out = append(out, s)
|
||||
}
|
||||
case schedmetrics.BindingConditionsStatusFailed:
|
||||
// failure: only devices that have at least one failure condition true
|
||||
if len(s.Failed) > 0 {
|
||||
out = append(out, BindingConditionsStatus{
|
||||
Driver: s.Driver,
|
||||
Pool: s.Pool,
|
||||
Device: s.Device,
|
||||
Failed: append([]string(nil), s.Failed...),
|
||||
})
|
||||
}
|
||||
|
||||
case schedmetrics.BindingConditionsStatusTimeout:
|
||||
// timeout/error: only devices still pending, and not failed
|
||||
if len(s.Pending) > 0 && len(s.Failed) == 0 {
|
||||
out = append(out, s)
|
||||
}
|
||||
|
||||
default:
|
||||
// Treat unknown like timeout/error
|
||||
if len(s.Pending) > 0 && len(s.Failed) == 0 {
|
||||
out = append(out, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// formatBCStatusOneLine formats the selected devices into a single log-friendly line.
|
||||
func formatBCStatusOneLine(devs []BindingConditionsStatus) string {
|
||||
if len(devs) == 0 {
|
||||
return ""
|
||||
}
|
||||
parts := make([]string, 0, len(devs))
|
||||
for _, d := range devs {
|
||||
base := fmt.Sprintf("<driver=%s, pool=%s> %s", d.Driver, d.Pool, d.Device)
|
||||
if len(d.Failed) > 0 {
|
||||
base += fmt.Sprintf(" failed=[%s]", strings.Join(d.Failed, ","))
|
||||
}
|
||||
if len(d.Pending) > 0 {
|
||||
base += fmt.Sprintf(" pending=[%s]", strings.Join(d.Pending, ","))
|
||||
}
|
||||
parts = append(parts, base)
|
||||
}
|
||||
return strings.Join(parts, "; ")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2334,6 +2334,35 @@ func testPlugin(tCtx ktesting.TContext) {
|
|||
status: nil,
|
||||
},
|
||||
},
|
||||
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
||||
// Counter: allocations_total should have exactly one event
|
||||
allocs, err := testutil.GetCounterValuesFromGatherer(
|
||||
g,
|
||||
"scheduler_dra_bindingconditions_allocations_total",
|
||||
map[string]string{
|
||||
"status": "success",
|
||||
},
|
||||
"driver", // group by driver label
|
||||
)
|
||||
require.NoError(tCtx, err)
|
||||
|
||||
var totalAllocs float64
|
||||
for _, v := range allocs {
|
||||
totalAllocs += v
|
||||
}
|
||||
require.InEpsilon(tCtx, float64(1), totalAllocs, 0.1, "expected exactly one successful allocation with BindingConditions")
|
||||
|
||||
// Histogram: one success sample with requires_bindingconditions=true
|
||||
hist, err := testutil.GetHistogramVecFromGatherer(
|
||||
g,
|
||||
"scheduler_dra_bindingconditions_wait_duration_seconds",
|
||||
map[string]string{
|
||||
"status": "success",
|
||||
},
|
||||
)
|
||||
require.NoError(tCtx, err)
|
||||
require.Equal(tCtx, uint64(1), hist.GetAggregatedSampleCount(), "expected one success sample in wait duration histogram")
|
||||
},
|
||||
},
|
||||
"bound-claim-with-failed-binding": {
|
||||
enableDRADeviceBindingConditions: true,
|
||||
|
|
@ -2452,9 +2481,38 @@ func testPlugin(tCtx ktesting.TContext) {
|
|||
},
|
||||
},
|
||||
// From isPodReadyForBinding.
|
||||
status: fwk.AsStatus(errors.New("claim " + claim.Name + " binding timeout")),
|
||||
status: fwk.AsStatus(fmt.Errorf("%w: claim=%s", ErrDeviceBindingTimeout, claim.Name)),
|
||||
},
|
||||
},
|
||||
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
||||
// Counter: timeouts_total should have exactly one event
|
||||
timeouts, err := testutil.GetCounterValuesFromGatherer(
|
||||
g,
|
||||
"scheduler_dra_bindingconditions_allocations_total",
|
||||
map[string]string{
|
||||
"status": "timeout",
|
||||
},
|
||||
"driver",
|
||||
)
|
||||
require.NoError(tCtx, err)
|
||||
|
||||
var totalTimeouts float64
|
||||
for _, v := range timeouts {
|
||||
totalTimeouts += v
|
||||
}
|
||||
require.InEpsilon(tCtx, float64(1), totalTimeouts, 0.1, "expected exactly one timeout with BindingConditions")
|
||||
|
||||
// Histogram: one timeout sample with requires_bindingconditions=true
|
||||
hist, err := testutil.GetHistogramVecFromGatherer(
|
||||
g,
|
||||
"scheduler_dra_bindingconditions_wait_duration_seconds",
|
||||
map[string]string{
|
||||
"status": "timeout",
|
||||
},
|
||||
)
|
||||
require.NoError(tCtx, err)
|
||||
require.Equal(tCtx, uint64(1), hist.GetAggregatedSampleCount(), "expected one timeout sample in wait duration histogram")
|
||||
},
|
||||
},
|
||||
"bound-claim-with-mixed-binding-conditions": {
|
||||
enableDRADeviceBindingConditions: true,
|
||||
|
|
@ -2942,6 +3000,14 @@ func setupMetrics(features feature.Features) compbasemetrics.KubeRegistry {
|
|||
testRegistry.MustRegister(metrics.ResourceClaimCreatesTotal)
|
||||
metrics.ResourceClaimCreatesTotal.Reset()
|
||||
}
|
||||
// DRA DeviceBindingConditions metrics.
|
||||
if features.EnableDRADeviceBindingConditions {
|
||||
testRegistry.MustRegister(metrics.DRABindingConditionsAllocationsTotal)
|
||||
testRegistry.MustRegister(metrics.DRABindingConditionsPreBindDuration)
|
||||
|
||||
metrics.DRABindingConditionsAllocationsTotal.Reset()
|
||||
metrics.DRABindingConditionsPreBindDuration.Reset()
|
||||
}
|
||||
return testRegistry
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -115,6 +115,14 @@ const (
|
|||
BatchFlushPodNotBatchable = "pod_not_batchable"
|
||||
)
|
||||
|
||||
// DRADeviceBindingConditions status labels
|
||||
const (
|
||||
BindingConditionsStatusSuccess = "success"
|
||||
BindingConditionsStatusFailed = "failure"
|
||||
BindingConditionsStatusTimeout = "timeout"
|
||||
BindingConditionsStatusError = "error"
|
||||
)
|
||||
|
||||
// All the histogram based metrics have 1ms as size for the smallest bucket.
|
||||
var (
|
||||
scheduleAttempts *metrics.CounterVec
|
||||
|
|
@ -161,6 +169,9 @@ var (
|
|||
podGroupScheduleAttempts *metrics.CounterVec
|
||||
podGroupSchedulingLatency *metrics.HistogramVec
|
||||
PodGroupSchedulingAlgorithmLatency *metrics.Histogram
|
||||
// The below are only available when the DRADeviceBindingConditions feature gate is enabled.
|
||||
DRABindingConditionsAllocationsTotal *metrics.CounterVec
|
||||
DRABindingConditionsPreBindDuration *metrics.HistogramVec
|
||||
|
||||
// metricsList is a list of all metrics that should be registered always, regardless of any feature gate's value.
|
||||
metricsList []metrics.Registerable
|
||||
|
|
@ -199,6 +210,12 @@ func Register() {
|
|||
PodGroupSchedulingAlgorithmLatency,
|
||||
)
|
||||
}
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.DRADeviceBindingConditions) {
|
||||
RegisterMetrics(
|
||||
DRABindingConditionsAllocationsTotal,
|
||||
DRABindingConditionsPreBindDuration,
|
||||
)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -453,6 +470,27 @@ func InitMetrics() {
|
|||
},
|
||||
[]string{"status"})
|
||||
|
||||
DRABindingConditionsAllocationsTotal = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "dra_bindingconditions_allocations_total",
|
||||
Help: "Number of allocations using devices with BindingConditions, counted per driver per scheduling attempt",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"profile", "driver", "status"},
|
||||
)
|
||||
|
||||
DRABindingConditionsPreBindDuration = metrics.NewHistogramVec(
|
||||
&metrics.HistogramOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "dra_bindingconditions_wait_duration_seconds",
|
||||
Help: "Time in seconds spent waiting for BindingConditions to be satisfied during PreBind.",
|
||||
Buckets: metrics.ExponentialBuckets(0.1, 2, 14),
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"profile", "driver", "status"},
|
||||
)
|
||||
|
||||
GetNodeHintDuration = metrics.NewHistogramVec(
|
||||
&metrics.HistogramOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
|
|
|
|||
Loading…
Reference in a new issue