Merge pull request #132644 from stlaz/ensure-secret-images-metrics

kubelet: add metrics for EnsureImageExists
This commit is contained in:
Kubernetes Prow Robot 2025-11-03 12:34:05 -08:00 committed by GitHub
commit e86c0171a7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 237 additions and 37 deletions

View file

@ -42,8 +42,13 @@ import (
"k8s.io/kubernetes/pkg/kubelet/images/pullmanager"
"k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/kubernetes/pkg/util/parsers"
"k8s.io/utils/ptr"
)
func init() {
registerMetrics()
}
type ImagePodPullingTimeRecorder interface {
RecordImageStartedPulling(podUID types.UID)
RecordImageFinishedPulling(podUID types.UID)
@ -101,25 +106,33 @@ func NewImageManager(
// imagePullPrecheck inspects the pull policy and checks for image presence accordingly,
// returning (imageRef, error msg, err) and logging any errors.
func (m *imageManager) imagePullPrecheck(ctx context.Context, objRef *v1.ObjectReference, logPrefix string, pullPolicy v1.PullPolicy, spec *kubecontainer.ImageSpec, requestedImage string) (imageRef string, msg string, err error) {
func (m *imageManager) imagePullPrecheck(
ctx context.Context,
objRef *v1.ObjectReference,
logPrefix string,
pullPolicy v1.PullPolicy,
spec *kubecontainer.ImageSpec,
requestedImage string,
) (imageRef string, imageFound *bool, msg string, err error) {
switch pullPolicy {
case v1.PullAlways:
return "", msg, nil
return "", nil, msg, nil
case v1.PullIfNotPresent, v1.PullNever:
imageRef, err = m.imageService.GetImageRef(ctx, *spec)
if err != nil {
msg = fmt.Sprintf("Failed to inspect image %q: %v", imageRef, err)
m.logIt(objRef, v1.EventTypeWarning, events.FailedToInspectImage, logPrefix, msg, klog.Warning)
return "", msg, ErrImageInspect
return "", nil, msg, ErrImageInspect
}
}
if len(imageRef) == 0 && pullPolicy == v1.PullNever {
imageFound = ptr.To(len(imageRef) > 0)
if !*imageFound && pullPolicy == v1.PullNever {
msg, err = m.imageNotPresentOnNeverPolicyError(logPrefix, objRef, requestedImage)
return "", msg, err
return "", ptr.To(false), msg, err
}
return imageRef, msg, nil
return imageRef, imageFound, msg, nil
}
// records an event using ref, event msg. log to glog using prefix, msg, logFn
@ -151,6 +164,14 @@ func (m *imageManager) imageNotPresentOnNeverPolicyError(logPrefix string, objRe
func (m *imageManager) EnsureImageExists(ctx context.Context, objRef *v1.ObjectReference, pod *v1.Pod, requestedImage string, pullSecrets []v1.Secret, podSandboxConfig *runtimeapi.PodSandboxConfig, podRuntimeHandler string, pullPolicy v1.PullPolicy) (imageRef, message string, err error) {
logPrefix := fmt.Sprintf("%s/%s/%s", pod.Namespace, pod.Name, requestedImage)
var ( // variables used to record metrics
imagePresentLocally *bool
imagePullRequired *bool
)
defer func() {
recordEnsureImageRequest(pullPolicy, imagePresentLocally, imagePullRequired)
}()
// If the image contains no tag or digest, a default tag should be applied.
image, err := applyDefaultImageTag(requestedImage)
if err != nil {
@ -173,12 +194,13 @@ func (m *imageManager) EnsureImageExists(ctx context.Context, objRef *v1.ObjectR
RuntimeHandler: podRuntimeHandler,
}
imageRef, message, err = m.imagePullPrecheck(ctx, objRef, logPrefix, pullPolicy, &spec, requestedImage)
imageRef, imagePresentLocally, message, err = m.imagePullPrecheck(ctx, objRef, logPrefix, pullPolicy, &spec, requestedImage)
if err != nil {
return "", message, err
}
if imageRef != "" && !utilfeature.DefaultFeatureGate.Enabled(features.KubeletEnsureSecretPulledImages) {
imagePullRequired = ptr.To(false)
msg := fmt.Sprintf("Container image %q already present on machine", requestedImage)
m.logIt(objRef, v1.EventTypeNormal, events.PulledImage, logPrefix, msg, klog.Info)
@ -218,6 +240,8 @@ func (m *imageManager) EnsureImageExists(ctx context.Context, objRef *v1.ObjectR
pullCredentials, _ := keyring.Lookup(repoToPull)
if imageRef != "" {
imagePullRequired = ptr.To(false) // should be overridden right after this if-block in case pull was required
var imagePullSecrets []kubeletconfiginternal.ImagePullSecret
// we don't take the audience of the service account into account, so there can only
// be one imagePullServiceAccount per pod when we try to make a decision.
@ -256,6 +280,7 @@ func (m *imageManager) EnsureImageExists(ctx context.Context, objRef *v1.ObjectR
return imageRef, msg, nil
}
}
imagePullRequired = ptr.To(true)
if pullPolicy == v1.PullNever {
// The image is present as confirmed by imagePullPrecheck but it apparently

View file

@ -61,19 +61,20 @@ type pullerExpects struct {
}
type pullerTestCase struct {
testName string
containerImage string
policy v1.PullPolicy
pullSecrets []v1.Secret
allowedCredentials *mockImagePullManagerConfig // controls what the image pull manager considers "allowed"
serviceAccountName string // for testing service account coordinates
registryCredentials map[string][]credentialprovider.TrackedAuthConfig // image -> registry credentials (obtained from credential providers using SA tokens)
inspectErr error
pullerErr error
qps float32
burst int
expected []pullerExpects
enableFeatures []featuregate.Feature
testName string
containerImage string
policy v1.PullPolicy
pullSecrets []v1.Secret
allowedCredentials *mockImagePullManagerConfig // controls what the image pull manager considers "allowed"
serviceAccountName string // for testing service account coordinates
registryCredentials map[string][]credentialprovider.TrackedAuthConfig // image -> registry credentials (obtained from credential providers using SA tokens)
inspectErr error
pullerErr error
qps float32
burst int
expected []pullerExpects
expectedEnsureImageMetrics string
enableFeatures []featuregate.Feature
}
// mockImagePullManagerConfig configures what credentials the mock pull manager considers "allowed"
@ -108,7 +109,9 @@ func noFGPullerTestCases() []pullerTestCase {
{Reason: "Pulling"},
{Reason: "Pulled"},
}, ""},
}},
},
expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "false", "true"),
},
{ // image present, don't pull
testName: "image present, allow all, don't pull ",
@ -131,10 +134,12 @@ func noFGPullerTestCases() []pullerTestCase {
[]v1.Event{
{Reason: "Pulled"},
}, ""},
}},
},
expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "true", "false"),
},
// image present, pull it
{containerImage: "present_image",
testName: "image present, pull ",
testName: "image present, pull",
policy: v1.PullAlways,
inspectErr: nil,
pullerErr: nil,
@ -156,7 +161,9 @@ func noFGPullerTestCases() []pullerTestCase {
{Reason: "Pulling"},
{Reason: "Pulled"},
}, ""},
}},
},
expectedEnsureImageMetrics: ensureExistsMetricForLabels("always", "unknown", "true"),
},
// missing image, error PullNever
{containerImage: "missing_image",
testName: "image missing, never pull",
@ -178,10 +185,12 @@ func noFGPullerTestCases() []pullerTestCase {
[]v1.Event{
{Reason: "ErrImageNeverPull"},
}, ""},
}},
},
expectedEnsureImageMetrics: ensureExistsMetricForLabels("never", "false", "unknown"),
},
// missing image, unable to inspect
{containerImage: "missing_image",
testName: "image missing, pull if not present",
testName: "image missing, pull if not present, fail on image inspect",
policy: v1.PullIfNotPresent,
inspectErr: errors.New("unknown inspectError"),
pullerErr: nil,
@ -200,7 +209,9 @@ func noFGPullerTestCases() []pullerTestCase {
[]v1.Event{
{Reason: "InspectFailed"},
}, ""},
}},
},
expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "unknown", "unknown"),
},
// missing image, unable to fetch
{containerImage: "typo_image",
testName: "image missing, unable to fetch",
@ -237,7 +248,9 @@ func noFGPullerTestCases() []pullerTestCase {
[]v1.Event{
{Reason: "BackOff"},
}, ""},
}},
},
expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "false", "true"),
},
// image present, non-zero qps, try to pull
{containerImage: "present_image",
testName: "image present and qps>0, pull",
@ -262,7 +275,9 @@ func noFGPullerTestCases() []pullerTestCase {
{Reason: "Pulling"},
{Reason: "Pulled"},
}, ""},
}},
},
expectedEnsureImageMetrics: ensureExistsMetricForLabels("always", "unknown", "true"),
},
// image present, non-zero qps, try to pull when qps exceeded
{containerImage: "present_image",
testName: "image present and excessive qps rate, pull",
@ -286,7 +301,9 @@ func noFGPullerTestCases() []pullerTestCase {
[]v1.Event{
{Reason: "BackOff"},
}, ""},
}},
},
expectedEnsureImageMetrics: ensureExistsMetricForLabels("always", "unknown", "true"),
},
// error case if image name fails validation due to invalid reference format
{containerImage: "FAILED_IMAGE",
testName: "invalid image name, no pull",
@ -300,7 +317,9 @@ func noFGPullerTestCases() []pullerTestCase {
[]v1.Event{
{Reason: "InspectFailed"},
}, ""},
}},
},
expectedEnsureImageMetrics: ensureExistsMetricForLabels("always", "unknown", "unknown"),
},
// error case if image name contains http
{containerImage: "http://url",
testName: "invalid image name with http, no pull",
@ -314,7 +333,9 @@ func noFGPullerTestCases() []pullerTestCase {
[]v1.Event{
{Reason: "InspectFailed"},
}, ""},
}},
},
expectedEnsureImageMetrics: ensureExistsMetricForLabels("always", "unknown", "unknown"),
},
// error case if image name contains sha256
{containerImage: "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad",
testName: "invalid image name with sha256, no pull",
@ -328,7 +349,9 @@ func noFGPullerTestCases() []pullerTestCase {
[]v1.Event{
{Reason: "InspectFailed"},
}, ""},
}},
},
expectedEnsureImageMetrics: ensureExistsMetricForLabels("always", "unknown", "unknown"),
},
{containerImage: "typo_image",
testName: "image missing, SignatureValidationFailed",
policy: v1.PullIfNotPresent,
@ -360,7 +383,9 @@ func noFGPullerTestCases() []pullerTestCase {
[]v1.Event{
{Reason: "BackOff"},
}, "Back-off pulling image \"typo_image\": SignatureValidationFailed: image pull failed for typo_image because the signature validation failed"},
}},
},
expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "false", "true"),
},
}
}
@ -401,7 +426,9 @@ func ensureSecretImagesTestCases() []pullerTestCase {
{Reason: "Pulling"},
{Reason: "Pulled"},
}, ""},
}},
},
expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "true", "true"),
},
{
testName: "[KubeletEnsureSecretPulledImages] image present, unknown secret to image pull manager, pull",
containerImage: "present_image",
@ -435,6 +462,7 @@ func ensureSecretImagesTestCases() []pullerTestCase {
{Reason: "Pulled"},
}, ""},
},
expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "true", "true"),
},
{
testName: "[KubeletEnsureSecretPulledImages] image present, unknown secret to image pull manager, never pull policy -> fail",
@ -466,6 +494,7 @@ func ensureSecretImagesTestCases() []pullerTestCase {
{Reason: "ErrImageNeverPull"},
}, ""},
},
expectedEnsureImageMetrics: ensureExistsMetricForLabels("never", "true", "true"),
},
{
testName: "[KubeletEnsureSecretPulledImages] image present, a secret matches one of known to the image pull manager, don't pull",
@ -500,6 +529,7 @@ func ensureSecretImagesTestCases() []pullerTestCase {
{Reason: "Pulled"},
}, ""},
},
expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "true", "false"),
},
{
testName: "[KubeletEnsureSecretPulledImages] image present, service account credentials available, don't pull",
@ -535,6 +565,7 @@ func ensureSecretImagesTestCases() []pullerTestCase {
{Reason: "Pulled"},
}, ""},
},
expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "true", "false"),
},
{
testName: "[KubeletEnsureSecretPulledImages] image present, service account allowed by pull manager, don't pull",
@ -580,6 +611,7 @@ func ensureSecretImagesTestCases() []pullerTestCase {
{Reason: "Pulled"},
}, ""},
},
expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "true", "false"),
},
{
testName: "[KubeletEnsureSecretPulledImages] image present, mixed credentials (secrets + service accounts), pull required",
@ -623,6 +655,7 @@ func ensureSecretImagesTestCases() []pullerTestCase {
{Reason: "Pulled"},
}, ""},
},
expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "true", "true"),
},
{
testName: "[KubeletEnsureSecretPulledImages] image present, only node credentials (no source), proceed without tracking",
@ -651,6 +684,7 @@ func ensureSecretImagesTestCases() []pullerTestCase {
{Reason: "Pulled"},
}, ""},
},
expectedEnsureImageMetrics: ensureExistsMetricForLabels("ifnotpresent", "true", "false"),
},
}
@ -1594,3 +1628,16 @@ func TestEnsureImageExistsWithNodeCredentialsOnly(t *testing.T) {
// Image should not be pulled since it's present and accessible
fakeRuntime.AssertCalls([]string{"GetImageRef"})
}
func ensureExistsMetricForLabels(pullPolicy, imagePresentLocally, pullRequired string) string {
const desc = `
# HELP kubelet_image_manager_ensure_image_requests_total [ALPHA] Number of ensure-image requests processed by the kubelet.
# TYPE kubelet_image_manager_ensure_image_requests_total counter
`
return desc + fmt.Sprintf(
"kubelet_image_manager_ensure_image_requests_total{present_locally=\"%s\", pull_policy=\"%s\", pull_required=\"%s\"} 1\n",
imagePresentLocally,
pullPolicy,
pullRequired,
)
}

View file

@ -0,0 +1,65 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"strconv"
"strings"
"sync"
v1 "k8s.io/api/core/v1"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
)
var (
ensureImageRequestsCounter = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: kubeletmetrics.KubeletSubsystem + "_" + "image_manager",
Name: "ensure_image_requests_total",
Help: "Number of ensure-image requests processed by the kubelet.",
StabilityLevel: metrics.ALPHA,
},
[]string{"pull_policy", "present_locally", "pull_required"},
)
)
var once sync.Once
func registerMetrics() {
once.Do(func() {
legacyregistry.MustRegister(ensureImageRequestsCounter)
})
}
func recordEnsureImageRequest(pullPolicy v1.PullPolicy, imagePresentLocally, imagePullRequired *bool) {
presentLocally := "unknown"
if imagePresentLocally != nil {
presentLocally = strconv.FormatBool(*imagePresentLocally)
}
pullRequired := "unknown"
if imagePullRequired != nil {
pullRequired = strconv.FormatBool(*imagePullRequired)
}
ensureImageRequestsCounter.WithLabelValues(
strings.ToLower(string(pullPolicy)),
presentLocally,
pullRequired,
).Inc()
}

View file

@ -0,0 +1,63 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package images
import (
"strings"
"testing"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
legacyregistry "k8s.io/component-base/metrics/legacyregistry"
metricstestutil "k8s.io/component-base/metrics/testutil"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/kubernetes/test/utils/ktesting"
)
func TestEnsureImageRequestsMetrics(t *testing.T) {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "test_pod",
Namespace: "test-ns",
UID: "bar",
ResourceVersion: "42",
}}
podSandboxConfig := &runtimeapi.PodSandboxConfig{
Metadata: &runtimeapi.PodSandboxMetadata{
Name: pod.Name,
Namespace: pod.Namespace,
Uid: string(pod.UID),
},
}
cases := pullerTestCases()
useSerializedEnv := true
for _, tt := range cases {
t.Run(tt.testName, func(t *testing.T) {
ctx := ktesting.Init(t)
puller, _, _, container, _, _ := pullerTestEnv(t, tt, useSerializedEnv, nil)
ensureImageRequestsCounter.Reset()
_, _, _ = puller.EnsureImageExists(ctx, &v1.ObjectReference{}, pod, container.Image, tt.pullSecrets, podSandboxConfig, "", container.ImagePullPolicy)
if err := metricstestutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(tt.expectedEnsureImageMetrics), "kubelet_image_manager_ensure_image_requests_total"); err != nil {
t.Fatal(err)
}
})
}
}

View file

@ -47,8 +47,8 @@ var (
// Implementations are expected to abstract the underlying runtimes.
// Implementations are expected to be thread safe.
type ImageManager interface {
// EnsureImageExists ensures that image specified by `imgRef` exists.
EnsureImageExists(ctx context.Context, objRef *v1.ObjectReference, pod *v1.Pod, imgRef string, pullSecrets []v1.Secret, podSandboxConfig *runtimeapi.PodSandboxConfig, podRuntimeHandler string, pullPolicy v1.PullPolicy) (string, string, error)
// EnsureImageExists ensures that image specified by `requestedImage` exists.
EnsureImageExists(ctx context.Context, objRef *v1.ObjectReference, pod *v1.Pod, requestedImage string, pullSecrets []v1.Secret, podSandboxConfig *runtimeapi.PodSandboxConfig, podRuntimeHandler string, pullPolicy v1.PullPolicy) (imageRef, message string, err error)
// TODO(ronl): consolidating image managing and deleting operation in this interface
}