Merge pull request #138542 from pohly/dra-create-resource-claim

DRA: harmonize ResourceClaim creation metric
This commit is contained in:
Kubernetes Prow Robot 2026-05-12 05:12:09 +05:30 committed by GitHub
commit f830e4a2b8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 148 additions and 107 deletions

View file

@ -21,10 +21,12 @@ coreComponents:
- "cmd/kube-controller-manager/"
- "pkg/controller/"
- "staging/src/k8s.io/controller-manager/"
- "staging/src/k8s.io/dynamic-resource-allocation/resourceclaim/"
- "staging/src/k8s.io/endpointslice/"
kube-scheduler:
- "cmd/kube-scheduler/"
- "pkg/scheduler/"
- "staging/src/k8s.io/dynamic-resource-allocation/resourceclaim/"
- "staging/src/k8s.io/kube-scheduler/"
kube-proxy:
- "cmd/kube-proxy/"

View file

@ -18,6 +18,7 @@ package main
import (
"os"
"slices"
"sort"
"strings"
@ -59,26 +60,24 @@ func (c *endpointMappingConfig) inferComponentEndpoints(filePath string) []metri
endpoint := c.inferEndpoint(filePath)
if c.isSharedPath(filePath) {
// The assumption here is that none of the standalone components
// use the metrics under the path.
return c.allCoreComponentEndpoints(endpoint)
}
component := c.inferComponent(filePath, c.CoreComponents)
if component != "" {
return []metric.ComponentEndpoint{{
// Core and standalone components may explicitly share the same metrics through their path patterns.
components := c.inferComponents(filePath, c.CoreComponents)
components = append(components, c.inferComponents(filePath, c.StandaloneComponents)...)
var endpoints []metric.ComponentEndpoint
for _, component := range components {
endpoints = append(endpoints, metric.ComponentEndpoint{
Component: component,
Endpoint: endpoint,
}}
})
}
component = c.inferComponent(filePath, c.StandaloneComponents)
if component != "" {
return []metric.ComponentEndpoint{{
Component: component,
Endpoint: endpoint,
}}
}
return nil
return endpoints
}
func (c *endpointMappingConfig) isSharedPath(filePath string) bool {
@ -90,23 +89,18 @@ func (c *endpointMappingConfig) isSharedPath(filePath string) bool {
return false
}
func (c *endpointMappingConfig) inferComponent(filePath string, components map[string][]string) string {
// Sort component names for deterministic iteration order
componentNames := make([]string, 0, len(components))
for name := range components {
componentNames = append(componentNames, name)
}
sort.Strings(componentNames)
for _, component := range componentNames {
patterns := components[component]
func (c *endpointMappingConfig) inferComponents(filePath string, components map[string][]string) []string {
var matchingComponents []string
for component, patterns := range components {
for _, pattern := range patterns {
if strings.Contains(filePath, pattern) {
return component
matchingComponents = append(matchingComponents, component)
}
}
}
return ""
// Sort to ensure consistent result, regardless of map iteration order.
slices.Sort(matchingComponents)
return matchingComponents
}
func (c *endpointMappingConfig) inferEndpoint(filePath string) string {

View file

@ -52,9 +52,10 @@ import (
"k8s.io/client-go/util/workqueue"
"k8s.io/component-base/metrics"
"k8s.io/dynamic-resource-allocation/resourceclaim"
resourceclaimmetrics "k8s.io/dynamic-resource-allocation/resourceclaim/metrics"
"k8s.io/klog/v2"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
resourceclaimmetrics "k8s.io/kubernetes/pkg/controller/resourceclaim/metrics"
controllermetrics "k8s.io/kubernetes/pkg/controller/resourceclaim/metrics"
"k8s.io/utils/ptr"
)
@ -187,7 +188,8 @@ func NewController(
deletedObjects: newUIDCache(maxUIDCacheEntries),
}
resourceclaimmetrics.RegisterMetrics(newCustomCollector(ec.claimLister, getAdminAccessMetricLabel, logger))
resourceclaimmetrics.RegisterMetrics()
controllermetrics.RegisterMetrics(newCustomCollector(ec.claimLister, getAdminAccessMetricLabel, logger))
if _, err := podInformer.Informer().AddEventHandlerWithOptions(cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
@ -1768,11 +1770,11 @@ type customCollector struct {
var _ metrics.StableCollector = &customCollector{}
func (collector *customCollector) DescribeWithStability(ch chan<- *metrics.Desc) {
ch <- resourceclaimmetrics.NumResourceClaimsDesc
ch <- controllermetrics.NumResourceClaimsDesc
}
func (collector *customCollector) CollectWithStability(ch chan<- metrics.Metric) {
rcMetrics := make(map[resourceclaimmetrics.NumResourceClaimLabels]int)
rcMetrics := make(map[controllermetrics.NumResourceClaimLabels]int)
rcList, err := collector.rcLister.List(labels.Everything())
if err != nil {
collector.logger.Error(err, "failed to list resource claims for metrics collection")
@ -1791,11 +1793,11 @@ func (collector *customCollector) CollectWithStability(ch chan<- metrics.Metric)
} else if val, ok := rc.Annotations[resourceapi.PodResourceClaimAnnotation]; ok && val != "" {
source = "resource_claim_template"
}
rcMetrics[resourceclaimmetrics.NumResourceClaimLabels{Allocated: allocated, AdminAccess: adminAccess, Source: source}]++
rcMetrics[controllermetrics.NumResourceClaimLabels{Allocated: allocated, AdminAccess: adminAccess, Source: source}]++
}
for rcLabels, count := range rcMetrics {
ch <- metrics.NewLazyConstMetric(
resourceclaimmetrics.NumResourceClaimsDesc,
controllermetrics.NumResourceClaimsDesc,
metrics.GaugeValue,
float64(count),
rcLabels.Allocated,

View file

@ -45,9 +45,10 @@ import (
featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/testutil"
resourceclaimmetrics "k8s.io/dynamic-resource-allocation/resourceclaim/metrics"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/controller"
resourceclaimmetrics "k8s.io/kubernetes/pkg/controller/resourceclaim/metrics"
controllermetrics "k8s.io/kubernetes/pkg/controller/resourceclaim/metrics"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/test/utils/ktesting"
"k8s.io/utils/ptr"
@ -1060,7 +1061,7 @@ func TestResourceClaimEventHandler(t *testing.T) {
expectQueue(tCtx, []string{})
_, err = claimClient.Create(tCtx, testClaim, metav1.CreateOptions{})
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: ""}, 1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: ""}, 1)
tCtx.Step("create claim", func(tCtx ktesting.TContext) {
tCtx.ExpectNoError(err)
em.Eventually(tCtx)
@ -1077,8 +1078,8 @@ func TestResourceClaimEventHandler(t *testing.T) {
})
_, err = claimClient.Update(tCtx, testClaimAllocated, metav1.UpdateOptions{})
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: ""}, -1)
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: ""}, 1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: ""}, -1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: ""}, 1)
tCtx.Step("allocate claim", func(tCtx ktesting.TContext) {
tCtx.ExpectNoError(err)
em.Eventually(tCtx)
@ -1097,7 +1098,7 @@ func TestResourceClaimEventHandler(t *testing.T) {
otherClaimAllocated := testClaimAllocated.DeepCopy()
otherClaimAllocated.Name += "2"
_, err = claimClient.Create(tCtx, otherClaimAllocated, metav1.CreateOptions{})
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: ""}, 1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: ""}, 1)
tCtx.Step("create allocated claim", func(tCtx ktesting.TContext) {
tCtx.ExpectNoError(err)
em.Eventually(tCtx)
@ -1105,8 +1106,8 @@ func TestResourceClaimEventHandler(t *testing.T) {
})
_, err = claimClient.Update(tCtx, testClaim, metav1.UpdateOptions{})
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: ""}, 1)
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: ""}, -1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: ""}, 1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: ""}, -1)
tCtx.Step("deallocate claim", func(tCtx ktesting.TContext) {
tCtx.ExpectNoError(err)
em.Eventually(tCtx)
@ -1114,7 +1115,7 @@ func TestResourceClaimEventHandler(t *testing.T) {
})
err = claimClient.Delete(tCtx, testClaim.Name, metav1.DeleteOptions{})
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: ""}, -1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: ""}, -1)
tCtx.Step("delete deallocated claim", func(tCtx ktesting.TContext) {
tCtx.ExpectNoError(err)
em.Eventually(tCtx)
@ -1122,7 +1123,7 @@ func TestResourceClaimEventHandler(t *testing.T) {
})
err = claimClient.Delete(tCtx, otherClaimAllocated.Name, metav1.DeleteOptions{})
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: ""}, -1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: ""}, -1)
tCtx.Step("delete allocated claim", func(tCtx ktesting.TContext) {
tCtx.ExpectNoError(err)
em.Eventually(tCtx)
@ -1130,7 +1131,7 @@ func TestResourceClaimEventHandler(t *testing.T) {
})
_, err = claimClient.Create(tCtx, templatedTestClaimWithAdmin, metav1.CreateOptions{})
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "true", Source: "resource_claim_template"}, 1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "true", Source: "resource_claim_template"}, 1)
tCtx.Step("create claim with admin access", func(tCtx ktesting.TContext) {
tCtx.ExpectNoError(err)
em.Eventually(tCtx)
@ -1145,8 +1146,8 @@ func TestResourceClaimEventHandler(t *testing.T) {
})
_, err = claimClient.Update(tCtx, templatedTestClaimWithAdminAllocated, metav1.UpdateOptions{})
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "true", Source: "resource_claim_template"}, -1)
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "true", Source: "resource_claim_template"}, 1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "true", Source: "resource_claim_template"}, -1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "true", Source: "resource_claim_template"}, 1)
tCtx.Step("allocate claim with admin access", func(tCtx ktesting.TContext) {
tCtx.ExpectNoError(err)
em.Eventually(tCtx)
@ -1163,59 +1164,59 @@ func TestResourceClaimEventHandler(t *testing.T) {
otherClaimAllocated = templatedTestClaimWithAdminAllocated.DeepCopy()
otherClaimAllocated.Name += "2"
_, err = claimClient.Create(tCtx, otherClaimAllocated, metav1.CreateOptions{})
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "true", Source: "resource_claim_template"}, 1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "true", Source: "resource_claim_template"}, 1)
tCtx.Step("create allocated claim with admin access", func(tCtx ktesting.TContext) {
tCtx.ExpectNoError(err)
em.Eventually(tCtx)
})
_, err = claimClient.Update(tCtx, templatedTestClaimWithAdmin, metav1.UpdateOptions{})
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "true", Source: "resource_claim_template"}, 1)
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "true", Source: "resource_claim_template"}, -1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "true", Source: "resource_claim_template"}, 1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "true", Source: "resource_claim_template"}, -1)
tCtx.Step("deallocate claim with admin access", func(tCtx ktesting.TContext) {
tCtx.ExpectNoError(err)
em.Eventually(tCtx)
})
err = claimClient.Delete(tCtx, templatedTestClaimWithAdmin.Name, metav1.DeleteOptions{})
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "true", Source: "resource_claim_template"}, -1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "true", Source: "resource_claim_template"}, -1)
tCtx.Step("delete deallocated claim with admin access", func(tCtx ktesting.TContext) {
tCtx.ExpectNoError(err)
em.Eventually(tCtx)
})
err = claimClient.Delete(tCtx, otherClaimAllocated.Name, metav1.DeleteOptions{})
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "true", Source: "resource_claim_template"}, -1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "true", Source: "resource_claim_template"}, -1)
tCtx.Step("delete allocated claim with admin access", func(tCtx ktesting.TContext) {
tCtx.ExpectNoError(err)
em.Eventually(tCtx)
})
_, err = claimClient.Create(tCtx, extendedTestClaim, metav1.CreateOptions{})
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: "extended_resource"}, 1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: "extended_resource"}, 1)
tCtx.Step("create extended resource claim", func(tCtx ktesting.TContext) {
tCtx.ExpectNoError(err)
em.Eventually(tCtx)
})
_, err = claimClient.Update(tCtx, extendedTestClaimAllocated, metav1.UpdateOptions{})
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: "extended_resource"}, -1)
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: "extended_resource"}, 1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: "extended_resource"}, -1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: "extended_resource"}, 1)
tCtx.Step("allocate extended resource claim", func(tCtx ktesting.TContext) {
tCtx.ExpectNoError(err)
em.Eventually(tCtx)
})
_, err = claimClient.Update(tCtx, extendedTestClaim, metav1.UpdateOptions{})
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: "extended_resource"}, 1)
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: "extended_resource"}, -1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: "extended_resource"}, 1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: "extended_resource"}, -1)
tCtx.Step("deallocate extended resource claim", func(tCtx ktesting.TContext) {
tCtx.ExpectNoError(err)
em.Eventually(tCtx)
})
err = claimClient.Delete(tCtx, extendedTestClaim.Name, metav1.DeleteOptions{})
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: "extended_resource"}, -1)
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: "extended_resource"}, -1)
tCtx.Step("delete extended resource claim", func(tCtx ktesting.TContext) {
tCtx.ExpectNoError(err)
em.Eventually(tCtx)
@ -1238,7 +1239,7 @@ func testEventHandlers(tCtx ktesting.TContext) {
updateObjects []object
deleteObjects []object
expectedKeys []string
expectedMetrics map[resourceclaimmetrics.NumResourceClaimLabels]float64
expectedMetrics map[controllermetrics.NumResourceClaimLabels]float64
}{
"nothing": {},
"new-podgroup-feature-disabled": {
@ -1256,7 +1257,7 @@ func testEventHandlers(tCtx ktesting.TContext) {
initialObjects: []runtime.Object{testPodGroupClaim},
createObjects: []object{testPodGroupWithResourceInStatus},
expectedKeys: []string{},
expectedMetrics: map[resourceclaimmetrics.NumResourceClaimLabels]float64{
expectedMetrics: map[controllermetrics.NumResourceClaimLabels]float64{
{Allocated: "false", AdminAccess: "false"}: 1,
},
},
@ -1265,7 +1266,7 @@ func testEventHandlers(tCtx ktesting.TContext) {
initialObjects: []runtime.Object{testPodGroupWithResourceInStatus},
createObjects: []object{testPodGroupClaim},
expectedKeys: []string{testClaimKey},
expectedMetrics: map[resourceclaimmetrics.NumResourceClaimLabels]float64{
expectedMetrics: map[controllermetrics.NumResourceClaimLabels]float64{
{Allocated: "false", AdminAccess: "false"}: 1,
},
},
@ -1748,7 +1749,7 @@ func createResourceClaimReactor() func(action k8stesting.Action) (handled bool,
}
type numMetrics struct {
metrics map[resourceclaimmetrics.NumResourceClaimLabels]float64
metrics map[controllermetrics.NumResourceClaimLabels]float64
lister resourcelisters.ResourceClaimLister
}
@ -1767,7 +1768,7 @@ func getNumMetric(lister resourcelisters.ResourceClaimLister, logger klog.Logger
return numMetrics{}, fmt.Errorf("failed to gather metrics: %w", err)
}
metricName := "resourceclaim_controller_resource_claims"
metricName := "dynamic_resource_allocation_resource_claims"
em = newNumMetrics(lister)
@ -1786,7 +1787,7 @@ func getNumMetric(lister resourcelisters.ResourceClaimLister, logger klog.Logger
source := labels["source"]
value := metric.GetGauge().GetValue()
em.metrics[resourceclaimmetrics.NumResourceClaimLabels{
em.metrics[controllermetrics.NumResourceClaimLabels{
Allocated: allocated,
AdminAccess: adminAccess,
Source: source,
@ -1871,18 +1872,18 @@ func handleErr(t *testing.T, err error, metricName string) {
}
func setupMetrics() {
// Enable test mode to prevent global custom collector registration
resourceclaimmetrics.SetTestMode(true)
controllermetrics.SetTestMode(true)
// Reset counter metrics for each test (they are registered by the controller itself)
resourceclaimmetrics.ResourceClaimCreate.Reset()
}
func newNumMetrics(lister resourcelisters.ResourceClaimLister) numMetrics {
metrics := make(map[resourceclaimmetrics.NumResourceClaimLabels]float64)
metrics := make(map[controllermetrics.NumResourceClaimLabels]float64)
for _, allocated := range []string{"false", "true"} {
for _, adminAccess := range []string{"false", "true"} {
for _, source := range []string{"", "extended_resource", "resource_claim_template"} {
metrics[resourceclaimmetrics.NumResourceClaimLabels{
metrics[controllermetrics.NumResourceClaimLabels{
Allocated: allocated,
AdminAccess: adminAccess,
Source: source,
@ -1896,7 +1897,7 @@ func newNumMetrics(lister resourcelisters.ResourceClaimLister) numMetrics {
}
}
func (em numMetrics) withUpdates(rcLabels resourceclaimmetrics.NumResourceClaimLabels, n float64) numMetrics {
func (em numMetrics) withUpdates(rcLabels controllermetrics.NumResourceClaimLabels, n float64) numMetrics {
em.metrics[rcLabels] += n
return numMetrics{
metrics: em.metrics,

View file

@ -23,8 +23,8 @@ import (
"k8s.io/component-base/metrics/legacyregistry"
)
// ResourceClaimSubsystem - subsystem name used for ResourceClaim creation
const ResourceClaimSubsystem = "resourceclaim_controller"
// subsystem is intentionally generic because similar metrics exist also elsewhere.
const subsystem = "dynamic_resource_allocation"
type NumResourceClaimLabels struct {
Allocated string
@ -33,25 +33,12 @@ type NumResourceClaimLabels struct {
}
var (
// ResourceClaimCreate tracks the total number of
// ResourceClaims creation requests
// categorized by their creation status and admin access.
ResourceClaimCreate = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: ResourceClaimSubsystem,
Name: "creates_total",
Help: "Number of ResourceClaims creation requests, categorized by creation status and admin access",
StabilityLevel: metrics.ALPHA,
},
[]string{"status", "admin_access"},
)
// NumResourceClaimsDesc tracks the number of ResourceClaims,
// categorized by their allocation status, admin access, and source.
// Source can be 'resource_claim_template' (created from a template),
// 'extended_resource' (extended resources), or empty (manually created by a user).
NumResourceClaimsDesc = metrics.NewDesc(
metrics.BuildFQName("", ResourceClaimSubsystem, "resource_claims"),
metrics.BuildFQName("", subsystem, "resource_claims"),
"Number of ResourceClaims, categorized by allocation status, admin access, and source. "+
"Source can be 'resource_claim_template' (created from a template), "+
"'extended_resource' (extended resources), or empty (manually created by a user).",
@ -73,7 +60,6 @@ func SetTestMode(enabled bool) {
// RegisterMetrics registers ResourceClaim metrics.
func RegisterMetrics(collector metrics.StableCollector) {
registerMetrics.Do(func() {
legacyregistry.MustRegister(ResourceClaimCreate)
if !testMode && collector != nil {
// Only register custom collector in non-test mode
legacyregistry.CustomMustRegister(collector)

View file

@ -2232,7 +2232,7 @@ func testPlugin(tCtx ktesting.TContext) {
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
want: want{},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
_, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
_, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
require.ErrorContains(tCtx, err, "not found")
},
},
@ -2302,7 +2302,7 @@ func testPlugin(tCtx ktesting.TContext) {
},
},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
_, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
_, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
require.ErrorContains(tCtx, err, "not found")
},
},
@ -2324,7 +2324,7 @@ func testPlugin(tCtx ktesting.TContext) {
},
},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
metric, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
require.NoError(tCtx, err)
require.Equal(tCtx, 1, int(metric["success"]))
},
@ -2347,7 +2347,7 @@ func testPlugin(tCtx ktesting.TContext) {
},
},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
metric, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
require.NoError(tCtx, err)
require.Equal(tCtx, 1, int(metric["success"]))
},
@ -2370,7 +2370,7 @@ func testPlugin(tCtx ktesting.TContext) {
},
},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
metric, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
require.NoError(tCtx, err)
require.Equal(tCtx, 1, int(metric["success"]))
},
@ -2395,7 +2395,7 @@ func testPlugin(tCtx ktesting.TContext) {
},
},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
metric, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
require.NoError(tCtx, err)
require.Equal(tCtx, 1, int(metric["success"]))
},
@ -2418,7 +2418,7 @@ func testPlugin(tCtx ktesting.TContext) {
},
},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
_, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
_, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
require.ErrorContains(tCtx, err, "not found")
},
},
@ -2440,7 +2440,7 @@ func testPlugin(tCtx ktesting.TContext) {
},
},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
_, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
_, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
require.ErrorContains(tCtx, err, "not found")
},
},
@ -2462,7 +2462,7 @@ func testPlugin(tCtx ktesting.TContext) {
},
},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
metric, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
require.NoError(tCtx, err)
require.Equal(tCtx, 1, int(metric["success"]))
},
@ -2479,7 +2479,7 @@ func testPlugin(tCtx ktesting.TContext) {
unreserveBeforePreBind: &result{},
},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
metric, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
require.NoError(tCtx, err)
require.Equal(tCtx, 1, int(metric["success"]))
},
@ -2510,7 +2510,7 @@ func testPlugin(tCtx ktesting.TContext) {
},
},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
metric, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
require.NoError(tCtx, err)
require.Equal(tCtx, 1, int(metric["failure"]))
},

View file

@ -548,10 +548,10 @@ func (pl *DynamicResources) createExtendedResourceClaimInAPI(
createdClaim, err := pl.clientset.ResourceV1().ResourceClaims(claim.Namespace).Create(ctx, claim, metav1.CreateOptions{})
if err != nil {
metrics.ResourceClaimCreatesTotal.WithLabelValues("failure").Inc()
metrics.ResourceClaimCreatesTotal.WithLabelValues("failure", "false").Inc()
return nil, fmt.Errorf("create claim for extended resources %v: %w", klog.KObj(claim), err)
}
metrics.ResourceClaimCreatesTotal.WithLabelValues("success").Inc()
metrics.ResourceClaimCreatesTotal.WithLabelValues("success", "false").Inc()
logger.V(5).Info("created claim for extended resources", "pod", klog.KObj(pod), "node", nodeName, "resourceclaim", klog.Format(createdClaim))
return createdClaim, nil

View file

@ -23,6 +23,7 @@ import (
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
resourceclaimmetrics "k8s.io/dynamic-resource-allocation/resourceclaim/metrics"
"k8s.io/kubernetes/pkg/features"
volumebindingmetrics "k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding/metrics"
)
@ -166,7 +167,8 @@ var (
AsyncAPIPendingCalls *metrics.GaugeVec
// The below is only available when the DRAExtendedResource feature gate is enabled.
ResourceClaimCreatesTotal *metrics.CounterVec
// This is the same metric that also gets recorded in the kube-controller-manager.
ResourceClaimCreatesTotal = resourceclaimmetrics.ResourceClaimCreate
podGroupScheduleAttempts *metrics.CounterVec
podGroupSchedulingLatency *metrics.HistogramVec
@ -200,7 +202,7 @@ func Register() {
)
}
if utilfeature.DefaultFeatureGate.Enabled(features.DRAExtendedResource) {
RegisterMetrics(ResourceClaimCreatesTotal)
resourceclaimmetrics.RegisterMetrics()
}
if utilfeature.DefaultFeatureGate.Enabled(features.GenericWorkload) {
RegisterMetrics(
@ -459,15 +461,6 @@ func InitMetrics() {
},
[]string{"call_type"})
ResourceClaimCreatesTotal = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: SchedulerSubsystem,
Name: "resourceclaim_creates_total",
Help: "Number of ResourceClaims creation requests within scheduler",
StabilityLevel: metrics.ALPHA,
},
[]string{"status"})
DRABindingConditionsAllocationsTotal = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: SchedulerSubsystem,

View file

@ -270,6 +270,8 @@
- k8s.io/apiserver/pkg/cel
- k8s.io/apiserver/pkg/cel/environment
- k8s.io/client-go
- k8s.io/component-base/metrics
- k8s.io/component-base/metrics/legacyregistry
- k8s.io/component-helpers/scheduling/corev1/nodeaffinity
- k8s.io/dynamic-resource-allocation
- k8s.io/klog

View file

@ -19,6 +19,7 @@ require (
k8s.io/apimachinery v0.0.0
k8s.io/apiserver v0.0.0
k8s.io/client-go v0.0.0
k8s.io/component-base v0.0.0
k8s.io/component-helpers v0.0.0
k8s.io/klog/v2 v2.140.0
k8s.io/kubelet v0.0.0
@ -83,7 +84,6 @@ require (
gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/component-base v0.0.0 // indirect
k8s.io/kube-openapi v0.0.0-20260509150519-312035bf509b // indirect
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect
sigs.k8s.io/structured-merge-diff/v6 v6.3.2 // indirect

View file

@ -0,0 +1,8 @@
# See the OWNERS docs at https://go.k8s.io/owners
approvers:
- sig-instrumentation-approvers
reviewers:
- sig-instrumentation-reviewers
labels:
- sig/instrumentation

View file

@ -0,0 +1,53 @@
/*
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"sync"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
)
// subsystem is intentionally generic because these metrics are exposed in kube-controller-manager and kube-scheduler.
const subsystem = "dynamic_resource_allocation"
var (
// ResourceClaimCreate tracks the total number of
// ResourceClaims creation requests
// categorized by their creation status and admin access.
// Used by kube-controller-manager and kube-scheduler, so
// the component where this metric gets collected is another dimension.
ResourceClaimCreate = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: subsystem,
Name: "resourceclaim_creates_total",
Help: "Number of ResourceClaims creation requests, categorized by creation status and admin access",
StabilityLevel: metrics.ALPHA,
},
[]string{"status", "admin_access"},
)
)
var registerMetrics sync.Once
// RegisterMetrics registers ResourceClaim metrics.
func RegisterMetrics() {
registerMetrics.Do(func() {
legacyregistry.MustRegister(ResourceClaimCreate)
})
}

View file

@ -42,10 +42,10 @@ import (
"k8s.io/component-base/featuregate"
"k8s.io/component-base/metrics/testutil"
draclient "k8s.io/dynamic-resource-allocation/client"
resourceclaimmetrics "k8s.io/dynamic-resource-allocation/resourceclaim/metrics"
"k8s.io/dynamic-resource-allocation/resourceslice"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/controller/resourceclaim"
resourceclaimmetrics "k8s.io/kubernetes/pkg/controller/resourceclaim/metrics"
"k8s.io/kubernetes/pkg/features"
st "k8s.io/kubernetes/pkg/scheduler/testing"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"