From 973779a398ff24891e8c5ccbedb215fac1794bef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanislav=20L=C3=A1zni=C4=8Dka?= Date: Tue, 1 Jul 2025 10:59:21 +0200 Subject: [PATCH 1/2] kubelet/image_manager: add metrics for EnsureImageExists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stanislav Láznička --- pkg/kubelet/images/image_manager.go | 39 +++++++-- pkg/kubelet/images/image_manager_test.go | 103 +++++++++++++++++------ pkg/kubelet/images/metrics.go | 65 ++++++++++++++ pkg/kubelet/images/metrics_test.go | 63 ++++++++++++++ 4 files changed, 235 insertions(+), 35 deletions(-) create mode 100644 pkg/kubelet/images/metrics.go create mode 100644 pkg/kubelet/images/metrics_test.go diff --git a/pkg/kubelet/images/image_manager.go b/pkg/kubelet/images/image_manager.go index b318b347984..9a64910548b 100644 --- a/pkg/kubelet/images/image_manager.go +++ b/pkg/kubelet/images/image_manager.go @@ -42,8 +42,13 @@ import ( "k8s.io/kubernetes/pkg/kubelet/images/pullmanager" "k8s.io/kubernetes/pkg/kubelet/metrics" "k8s.io/kubernetes/pkg/util/parsers" + "k8s.io/utils/ptr" ) +func init() { + registerMetrics() +} + type ImagePodPullingTimeRecorder interface { RecordImageStartedPulling(podUID types.UID) RecordImageFinishedPulling(podUID types.UID) @@ -101,25 +106,33 @@ func NewImageManager( // imagePullPrecheck inspects the pull policy and checks for image presence accordingly, // returning (imageRef, error msg, err) and logging any errors. -func (m *imageManager) imagePullPrecheck(ctx context.Context, objRef *v1.ObjectReference, logPrefix string, pullPolicy v1.PullPolicy, spec *kubecontainer.ImageSpec, requestedImage string) (imageRef string, msg string, err error) { +func (m *imageManager) imagePullPrecheck( + ctx context.Context, + objRef *v1.ObjectReference, + logPrefix string, + pullPolicy v1.PullPolicy, + spec *kubecontainer.ImageSpec, + requestedImage string, +) (imageRef string, imageFound *bool, msg string, err error) { switch pullPolicy { case v1.PullAlways: - return "", msg, nil + return "", nil, msg, nil case v1.PullIfNotPresent, v1.PullNever: imageRef, err = m.imageService.GetImageRef(ctx, *spec) if err != nil { msg = fmt.Sprintf("Failed to inspect image %q: %v", imageRef, err) m.logIt(objRef, v1.EventTypeWarning, events.FailedToInspectImage, logPrefix, msg, klog.Warning) - return "", msg, ErrImageInspect + return "", nil, msg, ErrImageInspect } } - if len(imageRef) == 0 && pullPolicy == v1.PullNever { + imageFound = ptr.To(len(imageRef) > 0) + if !*imageFound && pullPolicy == v1.PullNever { msg, err = m.imageNotPresentOnNeverPolicyError(logPrefix, objRef, requestedImage) - return "", msg, err + return "", ptr.To(false), msg, err } - return imageRef, msg, nil + return imageRef, imageFound, msg, nil } // records an event using ref, event msg. log to glog using prefix, msg, logFn @@ -151,6 +164,14 @@ func (m *imageManager) imageNotPresentOnNeverPolicyError(logPrefix string, objRe func (m *imageManager) EnsureImageExists(ctx context.Context, objRef *v1.ObjectReference, pod *v1.Pod, requestedImage string, pullSecrets []v1.Secret, podSandboxConfig *runtimeapi.PodSandboxConfig, podRuntimeHandler string, pullPolicy v1.PullPolicy) (imageRef, message string, err error) { logPrefix := fmt.Sprintf("%s/%s/%s", pod.Namespace, pod.Name, requestedImage) + var ( // variables used to record metrics + imagePresentLocally *bool + imagePullRequired *bool + ) + defer func() { + recordEnsureImageRequest(pullPolicy, imagePresentLocally, imagePullRequired) + }() + // If the image contains no tag or digest, a default tag should be applied. image, err := applyDefaultImageTag(requestedImage) if err != nil { @@ -173,12 +194,13 @@ func (m *imageManager) EnsureImageExists(ctx context.Context, objRef *v1.ObjectR RuntimeHandler: podRuntimeHandler, } - imageRef, message, err = m.imagePullPrecheck(ctx, objRef, logPrefix, pullPolicy, &spec, requestedImage) + imageRef, imagePresentLocally, message, err = m.imagePullPrecheck(ctx, objRef, logPrefix, pullPolicy, &spec, requestedImage) if err != nil { return "", message, err } if imageRef != "" && !utilfeature.DefaultFeatureGate.Enabled(features.KubeletEnsureSecretPulledImages) { + imagePullRequired = ptr.To(false) msg := fmt.Sprintf("Container image %q already present on machine", requestedImage) m.logIt(objRef, v1.EventTypeNormal, events.PulledImage, logPrefix, msg, klog.Info) @@ -218,6 +240,8 @@ func (m *imageManager) EnsureImageExists(ctx context.Context, objRef *v1.ObjectR pullCredentials, _ := keyring.Lookup(repoToPull) if imageRef != "" { + imagePullRequired = ptr.To(false) // should be overridden right after this if-block in case pull was required + var imagePullSecrets []kubeletconfiginternal.ImagePullSecret // we don't take the audience of the service account into account, so there can only // be one imagePullServiceAccount per pod when we try to make a decision. @@ -256,6 +280,7 @@ func (m *imageManager) EnsureImageExists(ctx context.Context, objRef *v1.ObjectR return imageRef, msg, nil } } + imagePullRequired = ptr.To(true) if pullPolicy == v1.PullNever { // The image is present as confirmed by imagePullPrecheck but it apparently diff --git a/pkg/kubelet/images/image_manager_test.go b/pkg/kubelet/images/image_manager_test.go index ad460e097f9..e8ef0c545c1 100644 --- a/pkg/kubelet/images/image_manager_test.go +++ b/pkg/kubelet/images/image_manager_test.go @@ -61,19 +61,20 @@ type pullerExpects struct { } type pullerTestCase struct { - testName string - containerImage string - policy v1.PullPolicy - pullSecrets []v1.Secret - allowedCredentials *mockImagePullManagerConfig // controls what the image pull manager considers "allowed" - serviceAccountName string // for testing service account coordinates - registryCredentials map[string][]credentialprovider.TrackedAuthConfig // image -> registry credentials (obtained from credential providers using SA tokens) - inspectErr error - pullerErr error - qps float32 - burst int - expected []pullerExpects - enableFeatures []featuregate.Feature + testName string + containerImage string + policy v1.PullPolicy + pullSecrets []v1.Secret + allowedCredentials *mockImagePullManagerConfig // controls what the image pull manager considers "allowed" + serviceAccountName string // for testing service account coordinates + registryCredentials map[string][]credentialprovider.TrackedAuthConfig // image -> registry credentials (obtained from credential providers using SA tokens) + inspectErr error + pullerErr error + qps float32 + burst int + expected []pullerExpects + expectedEnsureImageMetrics string + enableFeatures []featuregate.Feature } // mockImagePullManagerConfig configures what credentials the mock pull manager considers "allowed" @@ -108,7 +109,9 @@ func noFGPullerTestCases() []pullerTestCase { {Reason: "Pulling"}, {Reason: "Pulled"}, }, ""}, - }}, + }, + expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "false", "true"), + }, { // image present, don't pull testName: "image present, allow all, don't pull ", @@ -131,10 +134,12 @@ func noFGPullerTestCases() []pullerTestCase { []v1.Event{ {Reason: "Pulled"}, }, ""}, - }}, + }, + expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "true", "false"), + }, // image present, pull it {containerImage: "present_image", - testName: "image present, pull ", + testName: "image present, pull", policy: v1.PullAlways, inspectErr: nil, pullerErr: nil, @@ -156,7 +161,9 @@ func noFGPullerTestCases() []pullerTestCase { {Reason: "Pulling"}, {Reason: "Pulled"}, }, ""}, - }}, + }, + expectedEnsureImageMetrics: ensureExistsMetricForLabels("always", "unknown", "true"), + }, // missing image, error PullNever {containerImage: "missing_image", testName: "image missing, never pull", @@ -178,10 +185,12 @@ func noFGPullerTestCases() []pullerTestCase { []v1.Event{ {Reason: "ErrImageNeverPull"}, }, ""}, - }}, + }, + expectedEnsureImageMetrics: ensureExistsMetricForLabels("never", "false", "unknown"), + }, // missing image, unable to inspect {containerImage: "missing_image", - testName: "image missing, pull if not present", + testName: "image missing, pull if not present, fail on image inspect", policy: v1.PullIfNotPresent, inspectErr: errors.New("unknown inspectError"), pullerErr: nil, @@ -200,7 +209,9 @@ func noFGPullerTestCases() []pullerTestCase { []v1.Event{ {Reason: "InspectFailed"}, }, ""}, - }}, + }, + expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "unknown", "unknown"), + }, // missing image, unable to fetch {containerImage: "typo_image", testName: "image missing, unable to fetch", @@ -237,7 +248,9 @@ func noFGPullerTestCases() []pullerTestCase { []v1.Event{ {Reason: "BackOff"}, }, ""}, - }}, + }, + expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "false", "true"), + }, // image present, non-zero qps, try to pull {containerImage: "present_image", testName: "image present and qps>0, pull", @@ -262,7 +275,9 @@ func noFGPullerTestCases() []pullerTestCase { {Reason: "Pulling"}, {Reason: "Pulled"}, }, ""}, - }}, + }, + expectedEnsureImageMetrics: ensureExistsMetricForLabels("always", "unknown", "true"), + }, // image present, non-zero qps, try to pull when qps exceeded {containerImage: "present_image", testName: "image present and excessive qps rate, pull", @@ -286,7 +301,9 @@ func noFGPullerTestCases() []pullerTestCase { []v1.Event{ {Reason: "BackOff"}, }, ""}, - }}, + }, + expectedEnsureImageMetrics: ensureExistsMetricForLabels("always", "unknown", "true"), + }, // error case if image name fails validation due to invalid reference format {containerImage: "FAILED_IMAGE", testName: "invalid image name, no pull", @@ -300,7 +317,9 @@ func noFGPullerTestCases() []pullerTestCase { []v1.Event{ {Reason: "InspectFailed"}, }, ""}, - }}, + }, + expectedEnsureImageMetrics: ensureExistsMetricForLabels("always", "unknown", "unknown"), + }, // error case if image name contains http {containerImage: "http://url", testName: "invalid image name with http, no pull", @@ -314,7 +333,9 @@ func noFGPullerTestCases() []pullerTestCase { []v1.Event{ {Reason: "InspectFailed"}, }, ""}, - }}, + }, + expectedEnsureImageMetrics: ensureExistsMetricForLabels("always", "unknown", "unknown"), + }, // error case if image name contains sha256 {containerImage: "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad", testName: "invalid image name with sha256, no pull", @@ -328,7 +349,9 @@ func noFGPullerTestCases() []pullerTestCase { []v1.Event{ {Reason: "InspectFailed"}, }, ""}, - }}, + }, + expectedEnsureImageMetrics: ensureExistsMetricForLabels("always", "unknown", "unknown"), + }, {containerImage: "typo_image", testName: "image missing, SignatureValidationFailed", policy: v1.PullIfNotPresent, @@ -360,7 +383,9 @@ func noFGPullerTestCases() []pullerTestCase { []v1.Event{ {Reason: "BackOff"}, }, "Back-off pulling image \"typo_image\": SignatureValidationFailed: image pull failed for typo_image because the signature validation failed"}, - }}, + }, + expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "false", "true"), + }, } } @@ -401,7 +426,9 @@ func ensureSecretImagesTestCases() []pullerTestCase { {Reason: "Pulling"}, {Reason: "Pulled"}, }, ""}, - }}, + }, + expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "true", "true"), + }, { testName: "[KubeletEnsureSecretPulledImages] image present, unknown secret to image pull manager, pull", containerImage: "present_image", @@ -435,6 +462,7 @@ func ensureSecretImagesTestCases() []pullerTestCase { {Reason: "Pulled"}, }, ""}, }, + expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "true", "true"), }, { testName: "[KubeletEnsureSecretPulledImages] image present, unknown secret to image pull manager, never pull policy -> fail", @@ -466,6 +494,7 @@ func ensureSecretImagesTestCases() []pullerTestCase { {Reason: "ErrImageNeverPull"}, }, ""}, }, + expectedEnsureImageMetrics: ensureExistsMetricForLabels("never", "true", "true"), }, { testName: "[KubeletEnsureSecretPulledImages] image present, a secret matches one of known to the image pull manager, don't pull", @@ -500,6 +529,7 @@ func ensureSecretImagesTestCases() []pullerTestCase { {Reason: "Pulled"}, }, ""}, }, + expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "true", "false"), }, { testName: "[KubeletEnsureSecretPulledImages] image present, service account credentials available, don't pull", @@ -535,6 +565,7 @@ func ensureSecretImagesTestCases() []pullerTestCase { {Reason: "Pulled"}, }, ""}, }, + expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "true", "false"), }, { testName: "[KubeletEnsureSecretPulledImages] image present, service account allowed by pull manager, don't pull", @@ -580,6 +611,7 @@ func ensureSecretImagesTestCases() []pullerTestCase { {Reason: "Pulled"}, }, ""}, }, + expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "true", "false"), }, { testName: "[KubeletEnsureSecretPulledImages] image present, mixed credentials (secrets + service accounts), pull required", @@ -623,6 +655,7 @@ func ensureSecretImagesTestCases() []pullerTestCase { {Reason: "Pulled"}, }, ""}, }, + expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "true", "true"), }, { testName: "[KubeletEnsureSecretPulledImages] image present, only node credentials (no source), proceed without tracking", @@ -651,6 +684,7 @@ func ensureSecretImagesTestCases() []pullerTestCase { {Reason: "Pulled"}, }, ""}, }, + expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "true", "false"), }, } @@ -1594,3 +1628,16 @@ func TestEnsureImageExistsWithNodeCredentialsOnly(t *testing.T) { // Image should not be pulled since it's present and accessible fakeRuntime.AssertCalls([]string{"GetImageRef"}) } + +func ensureExistsMetricForLabels(pullPolicy, imagePresentLocally, pullRequired string) string { + const desc = ` +# HELP kubelet_image_manager_ensure_image_requests_total [ALPHA] Number of ensure-image requests processed by the kubelet. +# TYPE kubelet_image_manager_ensure_image_requests_total counter +` + return desc + fmt.Sprintf( + "kubelet_image_manager_ensure_image_requests_total{present_locally=\"%s\", pull_policy=\"%s\", pull_required=\"%s\"} 1\n", + imagePresentLocally, + pullPolicy, + pullRequired, + ) +} diff --git a/pkg/kubelet/images/metrics.go b/pkg/kubelet/images/metrics.go new file mode 100644 index 00000000000..0a93f12bab1 --- /dev/null +++ b/pkg/kubelet/images/metrics.go @@ -0,0 +1,65 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package images + +import ( + "strconv" + "strings" + "sync" + + v1 "k8s.io/api/core/v1" + "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" + kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics" +) + +var ( + ensureImageRequestsCounter = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: kubeletmetrics.KubeletSubsystem + "_" + "image_manager", + Name: "ensure_image_requests_total", + Help: "Number of ensure-image requests processed by the kubelet.", + StabilityLevel: metrics.ALPHA, + }, + []string{"pull_policy", "present_locally", "pull_required"}, + ) +) + +var once sync.Once + +func registerMetrics() { + once.Do(func() { + legacyregistry.MustRegister(ensureImageRequestsCounter) + }) +} + +func recordEnsureImageRequest(pullPolicy v1.PullPolicy, imagePresentLocally, imagePullRequired *bool) { + presentLocally := "unknown" + if imagePresentLocally != nil { + presentLocally = strconv.FormatBool(*imagePresentLocally) + } + pullRequired := "unknown" + if imagePullRequired != nil { + pullRequired = strconv.FormatBool(*imagePullRequired) + } + + ensureImageRequestsCounter.WithLabelValues( + strings.ToLower(string(pullPolicy)), + presentLocally, + pullRequired, + ).Inc() +} diff --git a/pkg/kubelet/images/metrics_test.go b/pkg/kubelet/images/metrics_test.go new file mode 100644 index 00000000000..8625dc0774c --- /dev/null +++ b/pkg/kubelet/images/metrics_test.go @@ -0,0 +1,63 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package images + +import ( + "strings" + "testing" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + legacyregistry "k8s.io/component-base/metrics/legacyregistry" + metricstestutil "k8s.io/component-base/metrics/testutil" + runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" + "k8s.io/kubernetes/test/utils/ktesting" +) + +func TestEnsureImageRequestsMetrics(t *testing.T) { + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test_pod", + Namespace: "test-ns", + UID: "bar", + ResourceVersion: "42", + }} + + podSandboxConfig := &runtimeapi.PodSandboxConfig{ + Metadata: &runtimeapi.PodSandboxMetadata{ + Name: pod.Name, + Namespace: pod.Namespace, + Uid: string(pod.UID), + }, + } + + cases := pullerTestCases() + + useSerializedEnv := true + for _, tt := range cases { + t.Run(tt.testName, func(t *testing.T) { + ctx := ktesting.Init(t) + puller, _, _, container, _, _ := pullerTestEnv(t, tt, useSerializedEnv, nil) + + ensureImageRequestsCounter.Reset() + _, _, _ = puller.EnsureImageExists(ctx, &v1.ObjectReference{}, pod, container.Image, tt.pullSecrets, podSandboxConfig, "", container.ImagePullPolicy) + if err := metricstestutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(tt.expectedEnsureImageMetrics), "kubelet_image_manager_ensure_image_requests_total"); err != nil { + t.Fatal(err) + } + }) + } +} From 89f1edd5169316ff979639e4a031e4c5d112af13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanislav=20L=C3=A1zni=C4=8Dka?= Date: Tue, 1 Jul 2025 11:00:43 +0200 Subject: [PATCH 2/2] kubelet/image_manager: rename EnsureImageExists arguments at the interface level MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These were previously only renamed at the implementation and the interface itself was forgotten about. Signed-off-by: Stanislav Láznička --- pkg/kubelet/images/types.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/kubelet/images/types.go b/pkg/kubelet/images/types.go index 244451cf5fc..c37d0a554bc 100644 --- a/pkg/kubelet/images/types.go +++ b/pkg/kubelet/images/types.go @@ -47,8 +47,8 @@ var ( // Implementations are expected to abstract the underlying runtimes. // Implementations are expected to be thread safe. type ImageManager interface { - // EnsureImageExists ensures that image specified by `imgRef` exists. - EnsureImageExists(ctx context.Context, objRef *v1.ObjectReference, pod *v1.Pod, imgRef string, pullSecrets []v1.Secret, podSandboxConfig *runtimeapi.PodSandboxConfig, podRuntimeHandler string, pullPolicy v1.PullPolicy) (string, string, error) + // EnsureImageExists ensures that image specified by `requestedImage` exists. + EnsureImageExists(ctx context.Context, objRef *v1.ObjectReference, pod *v1.Pod, requestedImage string, pullSecrets []v1.Secret, podSandboxConfig *runtimeapi.PodSandboxConfig, podRuntimeHandler string, pullPolicy v1.PullPolicy) (imageRef, message string, err error) // TODO(ronl): consolidating image managing and deleting operation in this interface }