diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 8b5143e4cc0..758cdabc010 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -1250,6 +1250,25 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po logger.V(4).Info("Creating PodSandbox for pod", "pod", klog.KObj(pod)) metrics.StartedPodsTotal.Inc() + if utilfeature.DefaultFeatureGate.Enabled(features.UserNamespacesSupport) && pod.Spec.HostUsers != nil && !*pod.Spec.HostUsers { + metrics.StartedUserNamespacedPodsTotal.Inc() + // Failures in user namespace creation could happen at any point in the pod lifecycle, + // but usually will be caught in container creation. + // To avoid specifically handling each error case, loop through the result after the sync finishes + defer func() { + // catch unhandled errors + for _, res := range result.SyncResults { + if res.Error != nil { + metrics.StartedUserNamespacedPodsErrorsTotal.Inc() + return + } + } + // catch handled error + if result.SyncError != nil { + metrics.StartedUserNamespacedPodsErrorsTotal.Inc() + } + }() + } createSandboxResult := kubecontainer.NewSyncResult(kubecontainer.CreatePodSandbox, format.Pod(pod)) result.AddSyncResult(createSandboxResult) diff --git a/pkg/kubelet/metrics/metrics.go b/pkg/kubelet/metrics/metrics.go index 684c912f420..09f18f5d11e 100644 --- a/pkg/kubelet/metrics/metrics.go +++ b/pkg/kubelet/metrics/metrics.go @@ -105,6 +105,10 @@ const ( StartedHostProcessContainersTotalKey = "started_host_process_containers_total" StartedHostProcessContainersErrorsTotalKey = "started_host_process_containers_errors_total" + // Metrics to track UserNamespaced (hostUsers = false) pods. + StartedUserNamespacedPodsTotalKey = "started_user_namespaced_pods_total" + StartedUserNamespacedPodsErrorsTotalKey = "started_user_namespaced_pods_errors_total" + // Metrics to track ephemeral container usage by this kubelet ManagedEphemeralContainersKey = "managed_ephemeral_containers" @@ -751,6 +755,24 @@ var ( }, []string{"container_type", "code"}, ) + // StartedUserNamespacedPodsTotal is a counter that tracks the number of user namespaced pods that are attempted to be created. + StartedUserNamespacedPodsTotal = metrics.NewCounter( + &metrics.CounterOpts{ + Subsystem: KubeletSubsystem, + Name: StartedUserNamespacedPodsTotalKey, + Help: "Cumulative number of pods with user namespaces started. This metric will only be collected on Linux.", + StabilityLevel: metrics.ALPHA, + }, + ) + // StartedUserNamespacedPodsErrorsTotal is a counter that tracks the number of errors creating user namespaced pods + StartedUserNamespacedPodsErrorsTotal = metrics.NewCounter( + &metrics.CounterOpts{ + Subsystem: KubeletSubsystem, + Name: StartedUserNamespacedPodsErrorsTotalKey, + Help: "Cumulative number of errors when starting pods with user namespaces. This metric will only be collected on Linux.", + StabilityLevel: metrics.ALPHA, + }, + ) // ManagedEphemeralContainers is a gauge that indicates how many ephemeral containers are managed by this kubelet. ManagedEphemeralContainers = metrics.NewGauge( &metrics.GaugeOpts{ @@ -1214,6 +1236,10 @@ func Register(collectors ...metrics.StableCollector) { legacyregistry.MustRegister(PodResourcesEndpointRequestsGetCount) legacyregistry.MustRegister(PodResourcesEndpointErrorsGetCount) } + if utilfeature.DefaultFeatureGate.Enabled(features.UserNamespacesSupport) { + legacyregistry.MustRegister(StartedUserNamespacedPodsTotal) + legacyregistry.MustRegister(StartedUserNamespacedPodsErrorsTotal) + } legacyregistry.MustRegister(StartedPodsTotal) legacyregistry.MustRegister(StartedPodsErrorsTotal) legacyregistry.MustRegister(StartedContainersTotal) diff --git a/test/e2e/common/node/security_context.go b/test/e2e/common/node/security_context.go index 8e4213b7752..43be7c531a5 100644 --- a/test/e2e/common/node/security_context.go +++ b/test/e2e/common/node/security_context.go @@ -33,8 +33,10 @@ import ( "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/events" "k8s.io/kubernetes/pkg/kubelet/lifecycle" + "k8s.io/kubernetes/pkg/kubelet/metrics" "k8s.io/kubernetes/test/e2e/feature" "k8s.io/kubernetes/test/e2e/framework" + e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2epodoutput "k8s.io/kubernetes/test/e2e/framework/pod/output" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" @@ -387,6 +389,38 @@ var _ = SIGDescribe("Security Context", func() { strings.Repeat(fmt.Sprintf("=%v\n", fsGroup), len(configMap.Data)), }) }) + f.It("metrics should report count of started and failed user namespaced pods [LinuxOnly]", feature.UserNamespacesSupport, framework.WithFeatureGate(features.UserNamespacesSupport), func(ctx context.Context) { + targetNode, err := findLinuxNode(ctx, f) + framework.ExpectNoError(err, "Error finding Linux node") + framework.Logf("Using node: %v", targetNode.Name) + + ginkgo.By("Getting initial kubelet metrics values") + beforeMetrics, err := getCurrentUserNamespacedPodsMetrics(ctx, f, targetNode.Name) + framework.ExpectNoError(err, "Error getting initial kubelet metrics for node") + framework.Logf("Initial UserNamespaced pods metrics -- StartedPods: %v, StartedPodsErrors: %v", beforeMetrics.StartedPods, beforeMetrics.StartedPodsErrors) + + ginkgo.By("Scheduling a pod with a UserNamespace that will fail") + + createdPod := makePod(false) + createdPod.Spec.NodeName = targetNode.Name + createdPod.Spec.Containers[0].Command = []string{"bogus"} + + createdPod = e2epod.NewPodClient(f).Create(ctx, createdPod) + ev, err := e2epod.NewPodClient(f).WaitForErrorEventOrSuccess(ctx, createdPod) + framework.ExpectNoError(err) + gomega.Expect(ev).NotTo(gomega.BeNil()) + gomega.Expect(ev.Reason).To(gomega.Equal(events.FailedToCreateContainer)) + + ginkgo.By("Getting subsequent kubelet metrics values") + + afterMetrics, err := getCurrentUserNamespacedPodsMetrics(ctx, f, targetNode.Name) + framework.ExpectNoError(err, "Error getting subsequent kubelet metrics for node") + framework.Logf("Subsequent UserNamespaced pods metrics -- StartedPods: %v, StartedPodsErrors: %v", afterMetrics.StartedPods, afterMetrics.StartedPodsErrors) + + ginkgo.By("Ensuring metrics were updated") + gomega.Expect(beforeMetrics.StartedPods).To(gomega.BeNumerically("<", afterMetrics.StartedPods), "Count of started UserNamespaced pods should increase") + gomega.Expect(beforeMetrics.StartedPodsErrors).To(gomega.BeNumerically("<", afterMetrics.StartedPodsErrors), "Count of started UserNamespaced pods errors should increase") + }) }) ginkgo.Context("When creating a container with runAsUser", func() { @@ -1065,3 +1099,31 @@ func kubeletUsernsMappings(subuidBinary string) (uint32, uint32, error) { return parseGetSubIdsOutput(string(outUids)) } + +// getCurrentUserNamespacedPodsMetrics returns a UserNamespacedPodsMetrics object. Any metrics that do not have any +// values reported will be set to 0. +func getCurrentUserNamespacedPodsMetrics(ctx context.Context, f *framework.Framework, nodeName string) (UserNamespacedPodsMetrics, error) { + var result UserNamespacedPodsMetrics + + m, err := e2emetrics.GetKubeletMetrics(ctx, f.ClientSet, nodeName) + if err != nil { + return result, err + } + + samples := m[metrics.StartedUserNamespacedPodsTotalKey] + for _, v := range samples { + result.StartedPods += int(v.Value) + } + + samples = m[metrics.StartedUserNamespacedPodsErrorsTotalKey] + for _, v := range samples { + result.StartedPodsErrors += int(v.Value) + } + + return result, nil +} + +type UserNamespacedPodsMetrics struct { + StartedPods int + StartedPodsErrors int +}