Merge pull request #136368 from bhope/metrics-beta-endpointslice-controller

Promote endpointslice metrics to beta
This commit is contained in:
Kubernetes Prow Robot 2026-03-04 04:30:20 +05:30 committed by GitHub
commit cb01fb2641
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 249 additions and 29 deletions

View file

@ -34,7 +34,7 @@ var (
Subsystem: EndpointSliceSubsystem,
Name: "endpoints_added_per_sync",
Help: "Number of endpoints added on each Service sync",
StabilityLevel: metrics.ALPHA,
StabilityLevel: metrics.BETA,
Buckets: metrics.ExponentialBuckets(2, 2, 15),
},
[]string{},
@ -46,7 +46,7 @@ var (
Subsystem: EndpointSliceSubsystem,
Name: "endpoints_removed_per_sync",
Help: "Number of endpoints removed on each Service sync",
StabilityLevel: metrics.ALPHA,
StabilityLevel: metrics.BETA,
Buckets: metrics.ExponentialBuckets(2, 2, 15),
},
[]string{},
@ -57,7 +57,7 @@ var (
Subsystem: EndpointSliceSubsystem,
Name: "endpoints_desired",
Help: "Number of endpoints desired",
StabilityLevel: metrics.ALPHA,
StabilityLevel: metrics.BETA,
},
[]string{},
)
@ -67,7 +67,7 @@ var (
Subsystem: EndpointSliceSubsystem,
Name: "num_endpoint_slices",
Help: "Number of EndpointSlices",
StabilityLevel: metrics.ALPHA,
StabilityLevel: metrics.BETA,
},
[]string{},
)
@ -78,7 +78,7 @@ var (
Subsystem: EndpointSliceSubsystem,
Name: "desired_endpoint_slices",
Help: "Number of EndpointSlices that would exist with perfect endpoint allocation",
StabilityLevel: metrics.ALPHA,
StabilityLevel: metrics.BETA,
},
[]string{},
)
@ -127,7 +127,7 @@ var (
Subsystem: EndpointSliceSubsystem,
Name: "services_count_by_traffic_distribution",
Help: "Number of Services using some specific trafficDistribution",
StabilityLevel: metrics.ALPHA,
StabilityLevel: metrics.BETA,
},
[]string{"traffic_distribution"}, // A trafficDistribution value
)

View file

@ -39,6 +39,7 @@ import (
corelisters "k8s.io/client-go/listers/core/v1"
k8stesting "k8s.io/client-go/testing"
"k8s.io/client-go/tools/record"
"k8s.io/component-base/metrics/legacyregistry"
"k8s.io/component-base/metrics/testutil"
"k8s.io/endpointslice/metrics"
"k8s.io/endpointslice/topologycache"
@ -51,6 +52,10 @@ const (
controllerName = "endpointslice-controller.k8s.io"
)
func init() {
metrics.RegisterMetrics()
}
func expectAction(t *testing.T, actions []k8stesting.Action, index int, verb, resource string) {
t.Helper()
if len(actions) <= index {
@ -114,7 +119,7 @@ var defaultMaxEndpointsPerSlice = int32(100)
// Even when there are no pods, we want to have a placeholder slice for each service
func TestReconcileEmpty(t *testing.T) {
client := newClientset()
setupMetrics()
resetMetrics()
namespace := "test"
svc, _ := newServiceAndEndpointMeta("foo", namespace)
@ -468,7 +473,7 @@ func TestReconcile1Pod(t *testing.T) {
for name, testCase := range testCases {
t.Run(name, func(t *testing.T) {
client := newClientset()
setupMetrics()
resetMetrics()
triggerTime := time.Now().UTC()
r := newReconciler(client, []*corev1.Node{node1}, defaultMaxEndpointsPerSlice)
reconcileHelper(t, r, &testCase.service, []*corev1.Pod{pod1}, []*discovery.EndpointSlice{}, triggerTime)
@ -574,7 +579,7 @@ func TestReconcile1EndpointSlice(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
client := newClientset()
setupMetrics()
resetMetrics()
existingSlices := []*discovery.EndpointSlice{}
if tc.existing != nil {
@ -727,7 +732,7 @@ func TestReconcile1EndpointSlicePublishNotReadyAddresses(t *testing.T) {
// reconcile should create 3 slices, completely filling 2 of them
func TestReconcileManyPods(t *testing.T) {
client := newClientset()
setupMetrics()
resetMetrics()
namespace := "test"
svc, _ := newServiceAndEndpointMeta("foo", namespace)
@ -760,7 +765,7 @@ func TestReconcileManyPods(t *testing.T) {
// this approach requires 1 update + 1 create instead of 2 updates + 1 create
func TestReconcileEndpointSlicesSomePreexisting(t *testing.T) {
client := newClientset()
setupMetrics()
resetMetrics()
namespace := "test"
svc, endpointMeta := newServiceAndEndpointMeta("foo", namespace)
@ -816,7 +821,7 @@ func TestReconcileEndpointSlicesSomePreexisting(t *testing.T) {
// this approach requires 2 creates instead of 2 updates + 1 create
func TestReconcileEndpointSlicesSomePreexistingWorseAllocation(t *testing.T) {
client := newClientset()
setupMetrics()
resetMetrics()
namespace := "test"
svc, endpointMeta := newServiceAndEndpointMeta("foo", namespace)
@ -973,7 +978,7 @@ func TestReconcileEndpointSlicesServicesReservedLabels(t *testing.T) {
// reconcile repacks the endpoints into 3 slices, and deletes the extras
func TestReconcileEndpointSlicesRecycling(t *testing.T) {
client := newClientset()
setupMetrics()
resetMetrics()
namespace := "test"
svc, endpointMeta := newServiceAndEndpointMeta("foo", namespace)
@ -1023,7 +1028,7 @@ func TestReconcileEndpointSlicesRecycling(t *testing.T) {
// for update.
func TestReconcileEndpointSlicesUpdatePacking(t *testing.T) {
client := newClientset()
setupMetrics()
resetMetrics()
namespace := "test"
svc, endpointMeta := newServiceAndEndpointMeta("foo", namespace)
@ -1082,7 +1087,7 @@ func TestReconcileEndpointSlicesUpdatePacking(t *testing.T) {
// address type will be replaced with a newer IPv4 type.
func TestReconcileEndpointSlicesReplaceDeprecated(t *testing.T) {
client := newClientset()
setupMetrics()
resetMetrics()
namespace := "test"
svc, endpointMeta := newServiceAndEndpointMeta("foo", namespace)
@ -1155,7 +1160,7 @@ func TestReconcileEndpointSlicesRecreation(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
client := newClientset()
setupMetrics()
resetMetrics()
namespace := "test"
svc, endpointMeta := newServiceAndEndpointMeta("foo", namespace)
@ -1204,7 +1209,7 @@ func TestReconcileEndpointSlicesRecreation(t *testing.T) {
// This test ensures that EndpointSlices are grouped correctly in that case.
func TestReconcileEndpointSlicesNamedPorts(t *testing.T) {
client := newClientset()
setupMetrics()
resetMetrics()
namespace := "test"
portNameIntStr := intstr.IntOrString{
@ -1312,7 +1317,7 @@ func TestReconcileMaxEndpointsPerSlice(t *testing.T) {
for _, testCase := range testCases {
t.Run(fmt.Sprintf("maxEndpointsPerSlice: %d", testCase.maxEndpointsPerSlice), func(t *testing.T) {
client := newClientset()
setupMetrics()
resetMetrics()
r := newReconciler(client, []*corev1.Node{{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}}}, testCase.maxEndpointsPerSlice)
reconcileHelper(t, r, &svc, pods, []*discovery.EndpointSlice{}, time.Now())
expectUnorderedSlicesWithLengths(t, fetchEndpointSlices(t, client, namespace), testCase.expectedSliceLengths)
@ -1323,7 +1328,7 @@ func TestReconcileMaxEndpointsPerSlice(t *testing.T) {
func TestReconcileEndpointSlicesMetrics(t *testing.T) {
client := newClientset()
setupMetrics()
resetMetrics()
namespace := "test"
svc, _ := newServiceAndEndpointMeta("foo", namespace)
@ -1347,6 +1352,154 @@ func TestReconcileEndpointSlicesMetrics(t *testing.T) {
expectMetrics(t, expectedMetrics{desiredSlices: 1, actualSlices: 1, desiredEndpoints: 10, addedPerSync: 20, removedPerSync: 10, numCreated: 1, numUpdated: 1, numDeleted: 0, slicesChangedPerSync: 2})
}
func TestReconcileEndpointSlicesMetrics_EndpointsAddedPerSync_CollectAndCompare(t *testing.T) {
resetMetrics()
client := newClientset()
namespace := "test"
svc, endpointMeta := newServiceAndEndpointMeta("foo", namespace)
keepPod := newPod(1, namespace, true, 1, false)
newDesiredPod := newPod(2, namespace, true, 1, false)
// Existing slice has one endpoint that should remain. Reconcile should add exactly 1 endpoint.
existingSlice := newEmptyEndpointSlice(1, namespace, endpointMeta, svc)
existingSlice.Endpoints = []discovery.Endpoint{
podToEndpoint(keepPod, &corev1.Node{}, &svc, discovery.AddressTypeIPv4),
}
createdSlice, err := client.DiscoveryV1().EndpointSlices(namespace).Create(context.TODO(), existingSlice, metav1.CreateOptions{})
if err != nil {
t.Fatalf("Expected no error creating EndpointSlice, got: %v", err)
}
r := newReconciler(client, []*corev1.Node{{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}}}, defaultMaxEndpointsPerSlice)
reconcileHelper(t, r, &svc, []*corev1.Pod{keepPod, newDesiredPod}, []*discovery.EndpointSlice{createdSlice}, time.Now())
want := `
# HELP endpoint_slice_controller_endpoints_added_per_sync [BETA] Number of endpoints added on each Service sync
# TYPE endpoint_slice_controller_endpoints_added_per_sync histogram
endpoint_slice_controller_endpoints_added_per_sync_bucket{le="2"} 1
endpoint_slice_controller_endpoints_added_per_sync_bucket{le="4"} 1
endpoint_slice_controller_endpoints_added_per_sync_bucket{le="8"} 1
endpoint_slice_controller_endpoints_added_per_sync_bucket{le="16"} 1
endpoint_slice_controller_endpoints_added_per_sync_bucket{le="32"} 1
endpoint_slice_controller_endpoints_added_per_sync_bucket{le="64"} 1
endpoint_slice_controller_endpoints_added_per_sync_bucket{le="128"} 1
endpoint_slice_controller_endpoints_added_per_sync_bucket{le="256"} 1
endpoint_slice_controller_endpoints_added_per_sync_bucket{le="512"} 1
endpoint_slice_controller_endpoints_added_per_sync_bucket{le="1024"} 1
endpoint_slice_controller_endpoints_added_per_sync_bucket{le="2048"} 1
endpoint_slice_controller_endpoints_added_per_sync_bucket{le="4096"} 1
endpoint_slice_controller_endpoints_added_per_sync_bucket{le="8192"} 1
endpoint_slice_controller_endpoints_added_per_sync_bucket{le="16384"} 1
endpoint_slice_controller_endpoints_added_per_sync_bucket{le="32768"} 1
endpoint_slice_controller_endpoints_added_per_sync_bucket{le="+Inf"} 1
endpoint_slice_controller_endpoints_added_per_sync_sum 1
endpoint_slice_controller_endpoints_added_per_sync_count 1
`
if err := testutil.CollectAndCompare(metrics.EndpointsAddedPerSync, strings.NewReader(want), "endpoint_slice_controller_endpoints_added_per_sync"); err != nil {
t.Fatal(err)
}
}
func TestReconcileEndpointSlicesMetrics_EndpointsRemovedPerSync_CollectAndCompare(t *testing.T) {
resetMetrics()
client := newClientset()
namespace := "test"
svc, endpointMeta := newServiceAndEndpointMeta("foo", namespace)
stalePod := newPod(0, namespace, true, 1, false)
keepPod := newPod(1, namespace, true, 1, false)
// Existing slice has one stale endpoint and one endpoint that should remain.
// Reconcile should remove exactly 1 endpoint.
existingSlice := newEmptyEndpointSlice(1, namespace, endpointMeta, svc)
existingSlice.Endpoints = []discovery.Endpoint{
podToEndpoint(stalePod, &corev1.Node{}, &svc, discovery.AddressTypeIPv4),
podToEndpoint(keepPod, &corev1.Node{}, &svc, discovery.AddressTypeIPv4),
}
createdSlice, err := client.DiscoveryV1().EndpointSlices(namespace).Create(context.TODO(), existingSlice, metav1.CreateOptions{})
if err != nil {
t.Fatalf("Expected no error creating EndpointSlice, got: %v", err)
}
r := newReconciler(client, []*corev1.Node{{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}}}, defaultMaxEndpointsPerSlice)
reconcileHelper(t, r, &svc, []*corev1.Pod{keepPod}, []*discovery.EndpointSlice{createdSlice}, time.Now())
want := `
# HELP endpoint_slice_controller_endpoints_removed_per_sync [BETA] Number of endpoints removed on each Service sync
# TYPE endpoint_slice_controller_endpoints_removed_per_sync histogram
endpoint_slice_controller_endpoints_removed_per_sync_bucket{le="2"} 1
endpoint_slice_controller_endpoints_removed_per_sync_bucket{le="4"} 1
endpoint_slice_controller_endpoints_removed_per_sync_bucket{le="8"} 1
endpoint_slice_controller_endpoints_removed_per_sync_bucket{le="16"} 1
endpoint_slice_controller_endpoints_removed_per_sync_bucket{le="32"} 1
endpoint_slice_controller_endpoints_removed_per_sync_bucket{le="64"} 1
endpoint_slice_controller_endpoints_removed_per_sync_bucket{le="128"} 1
endpoint_slice_controller_endpoints_removed_per_sync_bucket{le="256"} 1
endpoint_slice_controller_endpoints_removed_per_sync_bucket{le="512"} 1
endpoint_slice_controller_endpoints_removed_per_sync_bucket{le="1024"} 1
endpoint_slice_controller_endpoints_removed_per_sync_bucket{le="2048"} 1
endpoint_slice_controller_endpoints_removed_per_sync_bucket{le="4096"} 1
endpoint_slice_controller_endpoints_removed_per_sync_bucket{le="8192"} 1
endpoint_slice_controller_endpoints_removed_per_sync_bucket{le="16384"} 1
endpoint_slice_controller_endpoints_removed_per_sync_bucket{le="32768"} 1
endpoint_slice_controller_endpoints_removed_per_sync_bucket{le="+Inf"} 1
endpoint_slice_controller_endpoints_removed_per_sync_sum 1
endpoint_slice_controller_endpoints_removed_per_sync_count 1
`
if err := testutil.CollectAndCompare(metrics.EndpointsRemovedPerSync, strings.NewReader(want), "endpoint_slice_controller_endpoints_removed_per_sync"); err != nil {
t.Fatal(err)
}
}
func TestReconcileEndpointSlicesMetrics_GaugeUpdates(t *testing.T) {
resetMetrics()
client := newClientset()
namespace := "test"
svc, _ := newServiceAndEndpointMeta("foo", namespace)
r := newReconciler(client, []*corev1.Node{{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}}}, defaultMaxEndpointsPerSlice)
svc.Spec.TrafficDistribution = ptr.To(corev1.ServiceTrafficDistributionPreferClose)
if _, err := client.CoreV1().Services(namespace).Create(context.TODO(), &svc, metav1.CreateOptions{}); err != nil {
t.Fatalf("Expected no error creating Service, got: %v", err)
}
// Reconcile with exactly 2 pods to deterministically set gauge metric values.
pod1 := newPod(1, namespace, true, 1, false)
pod2 := newPod(2, namespace, true, 1, false)
reconcileHelper(t, r, &svc, []*corev1.Pod{pod1, pod2}, []*discovery.EndpointSlice{}, time.Unix(0, 0))
want := `
# HELP endpoint_slice_controller_desired_endpoint_slices [BETA] Number of EndpointSlices that would exist with perfect endpoint allocation
# TYPE endpoint_slice_controller_desired_endpoint_slices gauge
endpoint_slice_controller_desired_endpoint_slices 1
# HELP endpoint_slice_controller_endpoints_desired [BETA] Number of endpoints desired
# TYPE endpoint_slice_controller_endpoints_desired gauge
endpoint_slice_controller_endpoints_desired 2
# HELP endpoint_slice_controller_num_endpoint_slices [BETA] Number of EndpointSlices
# TYPE endpoint_slice_controller_num_endpoint_slices gauge
endpoint_slice_controller_num_endpoint_slices 1
# HELP endpoint_slice_controller_services_count_by_traffic_distribution [BETA] Number of Services using some specific trafficDistribution
# TYPE endpoint_slice_controller_services_count_by_traffic_distribution gauge
endpoint_slice_controller_services_count_by_traffic_distribution{traffic_distribution="PreferClose"} 1
`
if err := testutil.GatherAndCompare(
legacyregistry.DefaultGatherer,
strings.NewReader(want),
"endpoint_slice_controller_endpoints_desired",
"endpoint_slice_controller_num_endpoint_slices",
"endpoint_slice_controller_desired_endpoint_slices",
"endpoint_slice_controller_services_count_by_traffic_distribution",
); err != nil {
t.Fatal(err)
}
}
// When a Service has a non-nil deletionTimestamp we want to avoid creating any
// new EndpointSlices but continue to allow updates and deletes through. This
// test uses 3 EndpointSlices, 1 "to-create", 1 "to-update", and 1 "to-delete".
@ -1417,7 +1570,7 @@ func TestReconcilerFinalizeSvcDeletionTimestamp(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
client := newClientset()
setupMetrics()
resetMetrics()
r := newReconciler(client, []*corev1.Node{{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}}}, defaultMaxEndpointsPerSlice)
namespace := "test"
@ -1670,7 +1823,7 @@ func TestReconcilerPodMissingNode(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
client := newClientset()
setupMetrics()
resetMetrics()
r := newReconciler(client, tc.existingNodes, defaultMaxEndpointsPerSlice)
logger, _ := ktesting.NewTestContext(t)
@ -1900,7 +2053,7 @@ func TestReconcileTopology(t *testing.T) {
createEndpointSlices(t, client, ns, tc.existingSlices)
logger, _ := ktesting.NewTestContext(t)
setupMetrics()
resetMetrics()
r := newReconciler(client, tc.nodes, defaultMaxEndpointsPerSlice)
if tc.topologyCacheEnabled {
r.topologyCache = topologycache.NewTopologyCache()
@ -2162,7 +2315,7 @@ func TestReconcile_TrafficDistribution(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
client := newClientset()
logger, _ := ktesting.NewTestContext(t)
setupMetrics()
resetMetrics()
r := newReconciler(client, nodes, defaultMaxEndpointsPerSlice)
r.preferSameTrafficDistribution = tc.preferSameFeatureGateEnabled
@ -2214,8 +2367,7 @@ func TestReconcile_TrafficDistribution(t *testing.T) {
func TestReconcileHeadlessServiceNoPorts(t *testing.T) {
namespace := "test"
client := newClientset()
setupMetrics()
resetMetrics()
svc := corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: "headless-no-ports",
@ -2479,7 +2631,7 @@ func expectMetrics(t *testing.T, em expectedMetrics) {
handleErr(t, err, "slicesChangedPreferSameNode")
actualSlicesChangedPerSyncTrafficDist := actualSlicesChangedPreferClose + actualSlicesChangedPreferSameZone + actualSlicesChangedPreferSameNode
if actualSlicesChangedPerSyncTrafficDist != float64(em.slicesChangedPerSyncTrafficDist) {
t.Errorf("Expected slicesChangedPerSyncTrafficDist to be %d, got %v", em.slicesChangedPerSyncTrafficDist, actualSlicesChangedPerSyncTopology)
t.Errorf("Expected slicesChangedPerSyncTrafficDist to be %d, got %v", em.slicesChangedPerSyncTrafficDist, actualSlicesChangedPerSyncTrafficDist)
}
actualSyncSuccesses, err := testutil.GetCounterMetricValue(metrics.EndpointSliceSyncs.WithLabelValues("success"))
@ -2494,7 +2646,12 @@ func expectMetrics(t *testing.T, em expectedMetrics) {
t.Errorf("Expected endpointSliceSyncErrors to be %d, got %v", em.syncErrors, actualSyncErrors)
}
for _, trafficDistribution := range []string{"PreferClose", "ImplementationSpecific"} {
for _, trafficDistribution := range []string{
"PreferClose",
"PreferSameZone",
"PreferSameNode",
"ImplementationSpecific",
} {
gotServicesCount, err := testutil.GetGaugeMetricValue(metrics.ServicesCountByTrafficDistribution.WithLabelValues(trafficDistribution))
var wantServicesCount int
if em.servicesCountByTrafficDistribution != nil {
@ -2513,8 +2670,7 @@ func handleErr(t *testing.T, err error, metricName string) {
}
}
func setupMetrics() {
metrics.RegisterMetrics()
func resetMetrics() {
metrics.NumEndpointSlices.Reset()
metrics.DesiredEndpointSlices.Reset()
metrics.EndpointsDesired.Reset()

View file

@ -208,6 +208,70 @@
help: The count of disabled metrics.
type: Counter
stabilityLevel: BETA
- name: desired_endpoint_slices
subsystem: endpoint_slice_controller
help: Number of EndpointSlices that would exist with perfect endpoint allocation
type: Gauge
stabilityLevel: BETA
- name: endpoints_added_per_sync
subsystem: endpoint_slice_controller
help: Number of endpoints added on each Service sync
type: Histogram
stabilityLevel: BETA
buckets:
- 2
- 4
- 8
- 16
- 32
- 64
- 128
- 256
- 512
- 1024
- 2048
- 4096
- 8192
- 16384
- 32768
- name: endpoints_desired
subsystem: endpoint_slice_controller
help: Number of endpoints desired
type: Gauge
stabilityLevel: BETA
- name: endpoints_removed_per_sync
subsystem: endpoint_slice_controller
help: Number of endpoints removed on each Service sync
type: Histogram
stabilityLevel: BETA
buckets:
- 2
- 4
- 8
- 16
- 32
- 64
- 128
- 256
- 512
- 1024
- 2048
- 4096
- 8192
- 16384
- 32768
- name: num_endpoint_slices
subsystem: endpoint_slice_controller
help: Number of EndpointSlices
type: Gauge
stabilityLevel: BETA
- name: services_count_by_traffic_distribution
subsystem: endpoint_slice_controller
help: Number of Services using some specific trafficDistribution
type: Gauge
stabilityLevel: BETA
labels:
- traffic_distribution
- name: hidden_metrics_total
help: The count of hidden metrics.
type: Counter