mirror of
https://github.com/kubernetes/kubernetes.git
synced 2026-06-13 19:01:10 -04:00
Merge pull request #138542 from pohly/dra-create-resource-claim
DRA: harmonize ResourceClaim creation metric
This commit is contained in:
commit
f830e4a2b8
13 changed files with 148 additions and 107 deletions
|
|
@ -21,10 +21,12 @@ coreComponents:
|
|||
- "cmd/kube-controller-manager/"
|
||||
- "pkg/controller/"
|
||||
- "staging/src/k8s.io/controller-manager/"
|
||||
- "staging/src/k8s.io/dynamic-resource-allocation/resourceclaim/"
|
||||
- "staging/src/k8s.io/endpointslice/"
|
||||
kube-scheduler:
|
||||
- "cmd/kube-scheduler/"
|
||||
- "pkg/scheduler/"
|
||||
- "staging/src/k8s.io/dynamic-resource-allocation/resourceclaim/"
|
||||
- "staging/src/k8s.io/kube-scheduler/"
|
||||
kube-proxy:
|
||||
- "cmd/kube-proxy/"
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ package main
|
|||
|
||||
import (
|
||||
"os"
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
|
|
@ -59,26 +60,24 @@ func (c *endpointMappingConfig) inferComponentEndpoints(filePath string) []metri
|
|||
endpoint := c.inferEndpoint(filePath)
|
||||
|
||||
if c.isSharedPath(filePath) {
|
||||
// The assumption here is that none of the standalone components
|
||||
// use the metrics under the path.
|
||||
return c.allCoreComponentEndpoints(endpoint)
|
||||
}
|
||||
|
||||
component := c.inferComponent(filePath, c.CoreComponents)
|
||||
if component != "" {
|
||||
return []metric.ComponentEndpoint{{
|
||||
// Core and standalone components may explicitly share the same metrics through their path patterns.
|
||||
components := c.inferComponents(filePath, c.CoreComponents)
|
||||
components = append(components, c.inferComponents(filePath, c.StandaloneComponents)...)
|
||||
|
||||
var endpoints []metric.ComponentEndpoint
|
||||
for _, component := range components {
|
||||
endpoints = append(endpoints, metric.ComponentEndpoint{
|
||||
Component: component,
|
||||
Endpoint: endpoint,
|
||||
}}
|
||||
})
|
||||
}
|
||||
|
||||
component = c.inferComponent(filePath, c.StandaloneComponents)
|
||||
if component != "" {
|
||||
return []metric.ComponentEndpoint{{
|
||||
Component: component,
|
||||
Endpoint: endpoint,
|
||||
}}
|
||||
}
|
||||
|
||||
return nil
|
||||
return endpoints
|
||||
}
|
||||
|
||||
func (c *endpointMappingConfig) isSharedPath(filePath string) bool {
|
||||
|
|
@ -90,23 +89,18 @@ func (c *endpointMappingConfig) isSharedPath(filePath string) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
func (c *endpointMappingConfig) inferComponent(filePath string, components map[string][]string) string {
|
||||
// Sort component names for deterministic iteration order
|
||||
componentNames := make([]string, 0, len(components))
|
||||
for name := range components {
|
||||
componentNames = append(componentNames, name)
|
||||
}
|
||||
sort.Strings(componentNames)
|
||||
|
||||
for _, component := range componentNames {
|
||||
patterns := components[component]
|
||||
func (c *endpointMappingConfig) inferComponents(filePath string, components map[string][]string) []string {
|
||||
var matchingComponents []string
|
||||
for component, patterns := range components {
|
||||
for _, pattern := range patterns {
|
||||
if strings.Contains(filePath, pattern) {
|
||||
return component
|
||||
matchingComponents = append(matchingComponents, component)
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
// Sort to ensure consistent result, regardless of map iteration order.
|
||||
slices.Sort(matchingComponents)
|
||||
return matchingComponents
|
||||
}
|
||||
|
||||
func (c *endpointMappingConfig) inferEndpoint(filePath string) string {
|
||||
|
|
|
|||
|
|
@ -52,9 +52,10 @@ import (
|
|||
"k8s.io/client-go/util/workqueue"
|
||||
"k8s.io/component-base/metrics"
|
||||
"k8s.io/dynamic-resource-allocation/resourceclaim"
|
||||
resourceclaimmetrics "k8s.io/dynamic-resource-allocation/resourceclaim/metrics"
|
||||
"k8s.io/klog/v2"
|
||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
resourceclaimmetrics "k8s.io/kubernetes/pkg/controller/resourceclaim/metrics"
|
||||
controllermetrics "k8s.io/kubernetes/pkg/controller/resourceclaim/metrics"
|
||||
"k8s.io/utils/ptr"
|
||||
)
|
||||
|
||||
|
|
@ -187,7 +188,8 @@ func NewController(
|
|||
deletedObjects: newUIDCache(maxUIDCacheEntries),
|
||||
}
|
||||
|
||||
resourceclaimmetrics.RegisterMetrics(newCustomCollector(ec.claimLister, getAdminAccessMetricLabel, logger))
|
||||
resourceclaimmetrics.RegisterMetrics()
|
||||
controllermetrics.RegisterMetrics(newCustomCollector(ec.claimLister, getAdminAccessMetricLabel, logger))
|
||||
|
||||
if _, err := podInformer.Informer().AddEventHandlerWithOptions(cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: func(obj interface{}) {
|
||||
|
|
@ -1768,11 +1770,11 @@ type customCollector struct {
|
|||
var _ metrics.StableCollector = &customCollector{}
|
||||
|
||||
func (collector *customCollector) DescribeWithStability(ch chan<- *metrics.Desc) {
|
||||
ch <- resourceclaimmetrics.NumResourceClaimsDesc
|
||||
ch <- controllermetrics.NumResourceClaimsDesc
|
||||
}
|
||||
|
||||
func (collector *customCollector) CollectWithStability(ch chan<- metrics.Metric) {
|
||||
rcMetrics := make(map[resourceclaimmetrics.NumResourceClaimLabels]int)
|
||||
rcMetrics := make(map[controllermetrics.NumResourceClaimLabels]int)
|
||||
rcList, err := collector.rcLister.List(labels.Everything())
|
||||
if err != nil {
|
||||
collector.logger.Error(err, "failed to list resource claims for metrics collection")
|
||||
|
|
@ -1791,11 +1793,11 @@ func (collector *customCollector) CollectWithStability(ch chan<- metrics.Metric)
|
|||
} else if val, ok := rc.Annotations[resourceapi.PodResourceClaimAnnotation]; ok && val != "" {
|
||||
source = "resource_claim_template"
|
||||
}
|
||||
rcMetrics[resourceclaimmetrics.NumResourceClaimLabels{Allocated: allocated, AdminAccess: adminAccess, Source: source}]++
|
||||
rcMetrics[controllermetrics.NumResourceClaimLabels{Allocated: allocated, AdminAccess: adminAccess, Source: source}]++
|
||||
}
|
||||
for rcLabels, count := range rcMetrics {
|
||||
ch <- metrics.NewLazyConstMetric(
|
||||
resourceclaimmetrics.NumResourceClaimsDesc,
|
||||
controllermetrics.NumResourceClaimsDesc,
|
||||
metrics.GaugeValue,
|
||||
float64(count),
|
||||
rcLabels.Allocated,
|
||||
|
|
|
|||
|
|
@ -45,9 +45,10 @@ import (
|
|||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
"k8s.io/component-base/metrics"
|
||||
"k8s.io/component-base/metrics/testutil"
|
||||
resourceclaimmetrics "k8s.io/dynamic-resource-allocation/resourceclaim/metrics"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/controller"
|
||||
resourceclaimmetrics "k8s.io/kubernetes/pkg/controller/resourceclaim/metrics"
|
||||
controllermetrics "k8s.io/kubernetes/pkg/controller/resourceclaim/metrics"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/test/utils/ktesting"
|
||||
"k8s.io/utils/ptr"
|
||||
|
|
@ -1060,7 +1061,7 @@ func TestResourceClaimEventHandler(t *testing.T) {
|
|||
expectQueue(tCtx, []string{})
|
||||
|
||||
_, err = claimClient.Create(tCtx, testClaim, metav1.CreateOptions{})
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: ""}, 1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: ""}, 1)
|
||||
tCtx.Step("create claim", func(tCtx ktesting.TContext) {
|
||||
tCtx.ExpectNoError(err)
|
||||
em.Eventually(tCtx)
|
||||
|
|
@ -1077,8 +1078,8 @@ func TestResourceClaimEventHandler(t *testing.T) {
|
|||
})
|
||||
|
||||
_, err = claimClient.Update(tCtx, testClaimAllocated, metav1.UpdateOptions{})
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: ""}, -1)
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: ""}, 1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: ""}, -1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: ""}, 1)
|
||||
tCtx.Step("allocate claim", func(tCtx ktesting.TContext) {
|
||||
tCtx.ExpectNoError(err)
|
||||
em.Eventually(tCtx)
|
||||
|
|
@ -1097,7 +1098,7 @@ func TestResourceClaimEventHandler(t *testing.T) {
|
|||
otherClaimAllocated := testClaimAllocated.DeepCopy()
|
||||
otherClaimAllocated.Name += "2"
|
||||
_, err = claimClient.Create(tCtx, otherClaimAllocated, metav1.CreateOptions{})
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: ""}, 1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: ""}, 1)
|
||||
tCtx.Step("create allocated claim", func(tCtx ktesting.TContext) {
|
||||
tCtx.ExpectNoError(err)
|
||||
em.Eventually(tCtx)
|
||||
|
|
@ -1105,8 +1106,8 @@ func TestResourceClaimEventHandler(t *testing.T) {
|
|||
})
|
||||
|
||||
_, err = claimClient.Update(tCtx, testClaim, metav1.UpdateOptions{})
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: ""}, 1)
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: ""}, -1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: ""}, 1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: ""}, -1)
|
||||
tCtx.Step("deallocate claim", func(tCtx ktesting.TContext) {
|
||||
tCtx.ExpectNoError(err)
|
||||
em.Eventually(tCtx)
|
||||
|
|
@ -1114,7 +1115,7 @@ func TestResourceClaimEventHandler(t *testing.T) {
|
|||
})
|
||||
|
||||
err = claimClient.Delete(tCtx, testClaim.Name, metav1.DeleteOptions{})
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: ""}, -1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: ""}, -1)
|
||||
tCtx.Step("delete deallocated claim", func(tCtx ktesting.TContext) {
|
||||
tCtx.ExpectNoError(err)
|
||||
em.Eventually(tCtx)
|
||||
|
|
@ -1122,7 +1123,7 @@ func TestResourceClaimEventHandler(t *testing.T) {
|
|||
})
|
||||
|
||||
err = claimClient.Delete(tCtx, otherClaimAllocated.Name, metav1.DeleteOptions{})
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: ""}, -1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: ""}, -1)
|
||||
tCtx.Step("delete allocated claim", func(tCtx ktesting.TContext) {
|
||||
tCtx.ExpectNoError(err)
|
||||
em.Eventually(tCtx)
|
||||
|
|
@ -1130,7 +1131,7 @@ func TestResourceClaimEventHandler(t *testing.T) {
|
|||
})
|
||||
|
||||
_, err = claimClient.Create(tCtx, templatedTestClaimWithAdmin, metav1.CreateOptions{})
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "true", Source: "resource_claim_template"}, 1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "true", Source: "resource_claim_template"}, 1)
|
||||
tCtx.Step("create claim with admin access", func(tCtx ktesting.TContext) {
|
||||
tCtx.ExpectNoError(err)
|
||||
em.Eventually(tCtx)
|
||||
|
|
@ -1145,8 +1146,8 @@ func TestResourceClaimEventHandler(t *testing.T) {
|
|||
})
|
||||
|
||||
_, err = claimClient.Update(tCtx, templatedTestClaimWithAdminAllocated, metav1.UpdateOptions{})
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "true", Source: "resource_claim_template"}, -1)
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "true", Source: "resource_claim_template"}, 1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "true", Source: "resource_claim_template"}, -1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "true", Source: "resource_claim_template"}, 1)
|
||||
tCtx.Step("allocate claim with admin access", func(tCtx ktesting.TContext) {
|
||||
tCtx.ExpectNoError(err)
|
||||
em.Eventually(tCtx)
|
||||
|
|
@ -1163,59 +1164,59 @@ func TestResourceClaimEventHandler(t *testing.T) {
|
|||
otherClaimAllocated = templatedTestClaimWithAdminAllocated.DeepCopy()
|
||||
otherClaimAllocated.Name += "2"
|
||||
_, err = claimClient.Create(tCtx, otherClaimAllocated, metav1.CreateOptions{})
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "true", Source: "resource_claim_template"}, 1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "true", Source: "resource_claim_template"}, 1)
|
||||
tCtx.Step("create allocated claim with admin access", func(tCtx ktesting.TContext) {
|
||||
tCtx.ExpectNoError(err)
|
||||
em.Eventually(tCtx)
|
||||
})
|
||||
|
||||
_, err = claimClient.Update(tCtx, templatedTestClaimWithAdmin, metav1.UpdateOptions{})
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "true", Source: "resource_claim_template"}, 1)
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "true", Source: "resource_claim_template"}, -1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "true", Source: "resource_claim_template"}, 1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "true", Source: "resource_claim_template"}, -1)
|
||||
tCtx.Step("deallocate claim with admin access", func(tCtx ktesting.TContext) {
|
||||
tCtx.ExpectNoError(err)
|
||||
em.Eventually(tCtx)
|
||||
})
|
||||
|
||||
err = claimClient.Delete(tCtx, templatedTestClaimWithAdmin.Name, metav1.DeleteOptions{})
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "true", Source: "resource_claim_template"}, -1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "true", Source: "resource_claim_template"}, -1)
|
||||
tCtx.Step("delete deallocated claim with admin access", func(tCtx ktesting.TContext) {
|
||||
tCtx.ExpectNoError(err)
|
||||
em.Eventually(tCtx)
|
||||
})
|
||||
|
||||
err = claimClient.Delete(tCtx, otherClaimAllocated.Name, metav1.DeleteOptions{})
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "true", Source: "resource_claim_template"}, -1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "true", Source: "resource_claim_template"}, -1)
|
||||
tCtx.Step("delete allocated claim with admin access", func(tCtx ktesting.TContext) {
|
||||
tCtx.ExpectNoError(err)
|
||||
em.Eventually(tCtx)
|
||||
})
|
||||
|
||||
_, err = claimClient.Create(tCtx, extendedTestClaim, metav1.CreateOptions{})
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: "extended_resource"}, 1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: "extended_resource"}, 1)
|
||||
tCtx.Step("create extended resource claim", func(tCtx ktesting.TContext) {
|
||||
tCtx.ExpectNoError(err)
|
||||
em.Eventually(tCtx)
|
||||
})
|
||||
|
||||
_, err = claimClient.Update(tCtx, extendedTestClaimAllocated, metav1.UpdateOptions{})
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: "extended_resource"}, -1)
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: "extended_resource"}, 1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: "extended_resource"}, -1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: "extended_resource"}, 1)
|
||||
tCtx.Step("allocate extended resource claim", func(tCtx ktesting.TContext) {
|
||||
tCtx.ExpectNoError(err)
|
||||
em.Eventually(tCtx)
|
||||
})
|
||||
|
||||
_, err = claimClient.Update(tCtx, extendedTestClaim, metav1.UpdateOptions{})
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: "extended_resource"}, 1)
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: "extended_resource"}, -1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: "extended_resource"}, 1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "true", AdminAccess: "false", Source: "extended_resource"}, -1)
|
||||
tCtx.Step("deallocate extended resource claim", func(tCtx ktesting.TContext) {
|
||||
tCtx.ExpectNoError(err)
|
||||
em.Eventually(tCtx)
|
||||
})
|
||||
|
||||
err = claimClient.Delete(tCtx, extendedTestClaim.Name, metav1.DeleteOptions{})
|
||||
em = em.withUpdates(resourceclaimmetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: "extended_resource"}, -1)
|
||||
em = em.withUpdates(controllermetrics.NumResourceClaimLabels{Allocated: "false", AdminAccess: "false", Source: "extended_resource"}, -1)
|
||||
tCtx.Step("delete extended resource claim", func(tCtx ktesting.TContext) {
|
||||
tCtx.ExpectNoError(err)
|
||||
em.Eventually(tCtx)
|
||||
|
|
@ -1238,7 +1239,7 @@ func testEventHandlers(tCtx ktesting.TContext) {
|
|||
updateObjects []object
|
||||
deleteObjects []object
|
||||
expectedKeys []string
|
||||
expectedMetrics map[resourceclaimmetrics.NumResourceClaimLabels]float64
|
||||
expectedMetrics map[controllermetrics.NumResourceClaimLabels]float64
|
||||
}{
|
||||
"nothing": {},
|
||||
"new-podgroup-feature-disabled": {
|
||||
|
|
@ -1256,7 +1257,7 @@ func testEventHandlers(tCtx ktesting.TContext) {
|
|||
initialObjects: []runtime.Object{testPodGroupClaim},
|
||||
createObjects: []object{testPodGroupWithResourceInStatus},
|
||||
expectedKeys: []string{},
|
||||
expectedMetrics: map[resourceclaimmetrics.NumResourceClaimLabels]float64{
|
||||
expectedMetrics: map[controllermetrics.NumResourceClaimLabels]float64{
|
||||
{Allocated: "false", AdminAccess: "false"}: 1,
|
||||
},
|
||||
},
|
||||
|
|
@ -1265,7 +1266,7 @@ func testEventHandlers(tCtx ktesting.TContext) {
|
|||
initialObjects: []runtime.Object{testPodGroupWithResourceInStatus},
|
||||
createObjects: []object{testPodGroupClaim},
|
||||
expectedKeys: []string{testClaimKey},
|
||||
expectedMetrics: map[resourceclaimmetrics.NumResourceClaimLabels]float64{
|
||||
expectedMetrics: map[controllermetrics.NumResourceClaimLabels]float64{
|
||||
{Allocated: "false", AdminAccess: "false"}: 1,
|
||||
},
|
||||
},
|
||||
|
|
@ -1748,7 +1749,7 @@ func createResourceClaimReactor() func(action k8stesting.Action) (handled bool,
|
|||
}
|
||||
|
||||
type numMetrics struct {
|
||||
metrics map[resourceclaimmetrics.NumResourceClaimLabels]float64
|
||||
metrics map[controllermetrics.NumResourceClaimLabels]float64
|
||||
lister resourcelisters.ResourceClaimLister
|
||||
}
|
||||
|
||||
|
|
@ -1767,7 +1768,7 @@ func getNumMetric(lister resourcelisters.ResourceClaimLister, logger klog.Logger
|
|||
return numMetrics{}, fmt.Errorf("failed to gather metrics: %w", err)
|
||||
}
|
||||
|
||||
metricName := "resourceclaim_controller_resource_claims"
|
||||
metricName := "dynamic_resource_allocation_resource_claims"
|
||||
|
||||
em = newNumMetrics(lister)
|
||||
|
||||
|
|
@ -1786,7 +1787,7 @@ func getNumMetric(lister resourcelisters.ResourceClaimLister, logger klog.Logger
|
|||
source := labels["source"]
|
||||
value := metric.GetGauge().GetValue()
|
||||
|
||||
em.metrics[resourceclaimmetrics.NumResourceClaimLabels{
|
||||
em.metrics[controllermetrics.NumResourceClaimLabels{
|
||||
Allocated: allocated,
|
||||
AdminAccess: adminAccess,
|
||||
Source: source,
|
||||
|
|
@ -1871,18 +1872,18 @@ func handleErr(t *testing.T, err error, metricName string) {
|
|||
}
|
||||
func setupMetrics() {
|
||||
// Enable test mode to prevent global custom collector registration
|
||||
resourceclaimmetrics.SetTestMode(true)
|
||||
controllermetrics.SetTestMode(true)
|
||||
|
||||
// Reset counter metrics for each test (they are registered by the controller itself)
|
||||
resourceclaimmetrics.ResourceClaimCreate.Reset()
|
||||
}
|
||||
|
||||
func newNumMetrics(lister resourcelisters.ResourceClaimLister) numMetrics {
|
||||
metrics := make(map[resourceclaimmetrics.NumResourceClaimLabels]float64)
|
||||
metrics := make(map[controllermetrics.NumResourceClaimLabels]float64)
|
||||
for _, allocated := range []string{"false", "true"} {
|
||||
for _, adminAccess := range []string{"false", "true"} {
|
||||
for _, source := range []string{"", "extended_resource", "resource_claim_template"} {
|
||||
metrics[resourceclaimmetrics.NumResourceClaimLabels{
|
||||
metrics[controllermetrics.NumResourceClaimLabels{
|
||||
Allocated: allocated,
|
||||
AdminAccess: adminAccess,
|
||||
Source: source,
|
||||
|
|
@ -1896,7 +1897,7 @@ func newNumMetrics(lister resourcelisters.ResourceClaimLister) numMetrics {
|
|||
}
|
||||
}
|
||||
|
||||
func (em numMetrics) withUpdates(rcLabels resourceclaimmetrics.NumResourceClaimLabels, n float64) numMetrics {
|
||||
func (em numMetrics) withUpdates(rcLabels controllermetrics.NumResourceClaimLabels, n float64) numMetrics {
|
||||
em.metrics[rcLabels] += n
|
||||
return numMetrics{
|
||||
metrics: em.metrics,
|
||||
|
|
|
|||
|
|
@ -23,8 +23,8 @@ import (
|
|||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
)
|
||||
|
||||
// ResourceClaimSubsystem - subsystem name used for ResourceClaim creation
|
||||
const ResourceClaimSubsystem = "resourceclaim_controller"
|
||||
// subsystem is intentionally generic because similar metrics exist also elsewhere.
|
||||
const subsystem = "dynamic_resource_allocation"
|
||||
|
||||
type NumResourceClaimLabels struct {
|
||||
Allocated string
|
||||
|
|
@ -33,25 +33,12 @@ type NumResourceClaimLabels struct {
|
|||
}
|
||||
|
||||
var (
|
||||
// ResourceClaimCreate tracks the total number of
|
||||
// ResourceClaims creation requests
|
||||
// categorized by their creation status and admin access.
|
||||
ResourceClaimCreate = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: ResourceClaimSubsystem,
|
||||
Name: "creates_total",
|
||||
Help: "Number of ResourceClaims creation requests, categorized by creation status and admin access",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"status", "admin_access"},
|
||||
)
|
||||
|
||||
// NumResourceClaimsDesc tracks the number of ResourceClaims,
|
||||
// categorized by their allocation status, admin access, and source.
|
||||
// Source can be 'resource_claim_template' (created from a template),
|
||||
// 'extended_resource' (extended resources), or empty (manually created by a user).
|
||||
NumResourceClaimsDesc = metrics.NewDesc(
|
||||
metrics.BuildFQName("", ResourceClaimSubsystem, "resource_claims"),
|
||||
metrics.BuildFQName("", subsystem, "resource_claims"),
|
||||
"Number of ResourceClaims, categorized by allocation status, admin access, and source. "+
|
||||
"Source can be 'resource_claim_template' (created from a template), "+
|
||||
"'extended_resource' (extended resources), or empty (manually created by a user).",
|
||||
|
|
@ -73,7 +60,6 @@ func SetTestMode(enabled bool) {
|
|||
// RegisterMetrics registers ResourceClaim metrics.
|
||||
func RegisterMetrics(collector metrics.StableCollector) {
|
||||
registerMetrics.Do(func() {
|
||||
legacyregistry.MustRegister(ResourceClaimCreate)
|
||||
if !testMode && collector != nil {
|
||||
// Only register custom collector in non-test mode
|
||||
legacyregistry.CustomMustRegister(collector)
|
||||
|
|
|
|||
|
|
@ -2232,7 +2232,7 @@ func testPlugin(tCtx ktesting.TContext) {
|
|||
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
|
||||
want: want{},
|
||||
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
||||
_, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
_, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
require.ErrorContains(tCtx, err, "not found")
|
||||
},
|
||||
},
|
||||
|
|
@ -2302,7 +2302,7 @@ func testPlugin(tCtx ktesting.TContext) {
|
|||
},
|
||||
},
|
||||
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
||||
_, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
_, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
require.ErrorContains(tCtx, err, "not found")
|
||||
},
|
||||
},
|
||||
|
|
@ -2324,7 +2324,7 @@ func testPlugin(tCtx ktesting.TContext) {
|
|||
},
|
||||
},
|
||||
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
||||
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
metric, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
require.NoError(tCtx, err)
|
||||
require.Equal(tCtx, 1, int(metric["success"]))
|
||||
},
|
||||
|
|
@ -2347,7 +2347,7 @@ func testPlugin(tCtx ktesting.TContext) {
|
|||
},
|
||||
},
|
||||
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
||||
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
metric, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
require.NoError(tCtx, err)
|
||||
require.Equal(tCtx, 1, int(metric["success"]))
|
||||
},
|
||||
|
|
@ -2370,7 +2370,7 @@ func testPlugin(tCtx ktesting.TContext) {
|
|||
},
|
||||
},
|
||||
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
||||
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
metric, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
require.NoError(tCtx, err)
|
||||
require.Equal(tCtx, 1, int(metric["success"]))
|
||||
},
|
||||
|
|
@ -2395,7 +2395,7 @@ func testPlugin(tCtx ktesting.TContext) {
|
|||
},
|
||||
},
|
||||
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
||||
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
metric, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
require.NoError(tCtx, err)
|
||||
require.Equal(tCtx, 1, int(metric["success"]))
|
||||
},
|
||||
|
|
@ -2418,7 +2418,7 @@ func testPlugin(tCtx ktesting.TContext) {
|
|||
},
|
||||
},
|
||||
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
||||
_, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
_, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
require.ErrorContains(tCtx, err, "not found")
|
||||
},
|
||||
},
|
||||
|
|
@ -2440,7 +2440,7 @@ func testPlugin(tCtx ktesting.TContext) {
|
|||
},
|
||||
},
|
||||
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
||||
_, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
_, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
require.ErrorContains(tCtx, err, "not found")
|
||||
},
|
||||
},
|
||||
|
|
@ -2462,7 +2462,7 @@ func testPlugin(tCtx ktesting.TContext) {
|
|||
},
|
||||
},
|
||||
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
||||
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
metric, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
require.NoError(tCtx, err)
|
||||
require.Equal(tCtx, 1, int(metric["success"]))
|
||||
},
|
||||
|
|
@ -2479,7 +2479,7 @@ func testPlugin(tCtx ktesting.TContext) {
|
|||
unreserveBeforePreBind: &result{},
|
||||
},
|
||||
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
||||
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
metric, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
require.NoError(tCtx, err)
|
||||
require.Equal(tCtx, 1, int(metric["success"]))
|
||||
},
|
||||
|
|
@ -2510,7 +2510,7 @@ func testPlugin(tCtx ktesting.TContext) {
|
|||
},
|
||||
},
|
||||
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
||||
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
metric, err := testutil.GetCounterValuesFromGatherer(g, "dynamic_resource_allocation_resourceclaim_creates_total", map[string]string{}, "status")
|
||||
require.NoError(tCtx, err)
|
||||
require.Equal(tCtx, 1, int(metric["failure"]))
|
||||
},
|
||||
|
|
|
|||
|
|
@ -548,10 +548,10 @@ func (pl *DynamicResources) createExtendedResourceClaimInAPI(
|
|||
|
||||
createdClaim, err := pl.clientset.ResourceV1().ResourceClaims(claim.Namespace).Create(ctx, claim, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
metrics.ResourceClaimCreatesTotal.WithLabelValues("failure").Inc()
|
||||
metrics.ResourceClaimCreatesTotal.WithLabelValues("failure", "false").Inc()
|
||||
return nil, fmt.Errorf("create claim for extended resources %v: %w", klog.KObj(claim), err)
|
||||
}
|
||||
metrics.ResourceClaimCreatesTotal.WithLabelValues("success").Inc()
|
||||
metrics.ResourceClaimCreatesTotal.WithLabelValues("success", "false").Inc()
|
||||
logger.V(5).Info("created claim for extended resources", "pod", klog.KObj(pod), "node", nodeName, "resourceclaim", klog.Format(createdClaim))
|
||||
|
||||
return createdClaim, nil
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ import (
|
|||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/component-base/metrics"
|
||||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
resourceclaimmetrics "k8s.io/dynamic-resource-allocation/resourceclaim/metrics"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
volumebindingmetrics "k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumebinding/metrics"
|
||||
)
|
||||
|
|
@ -166,7 +167,8 @@ var (
|
|||
AsyncAPIPendingCalls *metrics.GaugeVec
|
||||
|
||||
// The below is only available when the DRAExtendedResource feature gate is enabled.
|
||||
ResourceClaimCreatesTotal *metrics.CounterVec
|
||||
// This is the same metric that also gets recorded in the kube-controller-manager.
|
||||
ResourceClaimCreatesTotal = resourceclaimmetrics.ResourceClaimCreate
|
||||
|
||||
podGroupScheduleAttempts *metrics.CounterVec
|
||||
podGroupSchedulingLatency *metrics.HistogramVec
|
||||
|
|
@ -200,7 +202,7 @@ func Register() {
|
|||
)
|
||||
}
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.DRAExtendedResource) {
|
||||
RegisterMetrics(ResourceClaimCreatesTotal)
|
||||
resourceclaimmetrics.RegisterMetrics()
|
||||
}
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.GenericWorkload) {
|
||||
RegisterMetrics(
|
||||
|
|
@ -459,15 +461,6 @@ func InitMetrics() {
|
|||
},
|
||||
[]string{"call_type"})
|
||||
|
||||
ResourceClaimCreatesTotal = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "resourceclaim_creates_total",
|
||||
Help: "Number of ResourceClaims creation requests within scheduler",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"status"})
|
||||
|
||||
DRABindingConditionsAllocationsTotal = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
|
|
|
|||
|
|
@ -270,6 +270,8 @@
|
|||
- k8s.io/apiserver/pkg/cel
|
||||
- k8s.io/apiserver/pkg/cel/environment
|
||||
- k8s.io/client-go
|
||||
- k8s.io/component-base/metrics
|
||||
- k8s.io/component-base/metrics/legacyregistry
|
||||
- k8s.io/component-helpers/scheduling/corev1/nodeaffinity
|
||||
- k8s.io/dynamic-resource-allocation
|
||||
- k8s.io/klog
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ require (
|
|||
k8s.io/apimachinery v0.0.0
|
||||
k8s.io/apiserver v0.0.0
|
||||
k8s.io/client-go v0.0.0
|
||||
k8s.io/component-base v0.0.0
|
||||
k8s.io/component-helpers v0.0.0
|
||||
k8s.io/klog/v2 v2.140.0
|
||||
k8s.io/kubelet v0.0.0
|
||||
|
|
@ -83,7 +84,6 @@ require (
|
|||
gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
|
||||
gopkg.in/inf.v0 v0.9.1 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
k8s.io/component-base v0.0.0 // indirect
|
||||
k8s.io/kube-openapi v0.0.0-20260509150519-312035bf509b // indirect
|
||||
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect
|
||||
sigs.k8s.io/structured-merge-diff/v6 v6.3.2 // indirect
|
||||
|
|
|
|||
|
|
@ -0,0 +1,8 @@
|
|||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
approvers:
|
||||
- sig-instrumentation-approvers
|
||||
reviewers:
|
||||
- sig-instrumentation-reviewers
|
||||
labels:
|
||||
- sig/instrumentation
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
Copyright The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"k8s.io/component-base/metrics"
|
||||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
)
|
||||
|
||||
// subsystem is intentionally generic because these metrics are exposed in kube-controller-manager and kube-scheduler.
|
||||
const subsystem = "dynamic_resource_allocation"
|
||||
|
||||
var (
|
||||
// ResourceClaimCreate tracks the total number of
|
||||
// ResourceClaims creation requests
|
||||
// categorized by their creation status and admin access.
|
||||
// Used by kube-controller-manager and kube-scheduler, so
|
||||
// the component where this metric gets collected is another dimension.
|
||||
ResourceClaimCreate = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: subsystem,
|
||||
Name: "resourceclaim_creates_total",
|
||||
Help: "Number of ResourceClaims creation requests, categorized by creation status and admin access",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{"status", "admin_access"},
|
||||
)
|
||||
)
|
||||
|
||||
var registerMetrics sync.Once
|
||||
|
||||
// RegisterMetrics registers ResourceClaim metrics.
|
||||
func RegisterMetrics() {
|
||||
registerMetrics.Do(func() {
|
||||
legacyregistry.MustRegister(ResourceClaimCreate)
|
||||
})
|
||||
}
|
||||
|
|
@ -42,10 +42,10 @@ import (
|
|||
"k8s.io/component-base/featuregate"
|
||||
"k8s.io/component-base/metrics/testutil"
|
||||
draclient "k8s.io/dynamic-resource-allocation/client"
|
||||
resourceclaimmetrics "k8s.io/dynamic-resource-allocation/resourceclaim/metrics"
|
||||
"k8s.io/dynamic-resource-allocation/resourceslice"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/controller/resourceclaim"
|
||||
resourceclaimmetrics "k8s.io/kubernetes/pkg/controller/resourceclaim/metrics"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
st "k8s.io/kubernetes/pkg/scheduler/testing"
|
||||
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
||||
|
|
|
|||
Loading…
Reference in a new issue