Merge pull request #131317 from bitoku/fix-static-init

Fix:Static pod status is always Init:0/1 if unable to get init container status
This commit is contained in:
Kubernetes Prow Robot 2026-01-15 00:27:38 +05:30 committed by GitHub
commit 616fff8247
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 423 additions and 6 deletions

View file

@ -2672,6 +2672,31 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
if s != nil && s.State == kubecontainer.ContainerStateExited && s.ExitCode == 0 {
continue
}
// When pod status is not persisted, init container status may be lost.
// This can occur in the following scenarios:
// 1. Static pods
// 2. Regular pods where init container status failed to sync to the API server
// (e.g., due to control plane issues, though this is rare)
//
// In these cases, if the init container has already been removed from the runtime,
// kubelet cannot determine its status. However, we can infer that non-restartable
// init containers have completed successfully if regular containers have been created.
// Note: Restartable init containers (sidecars) should remain in waiting state and
// will be restarted by the kubelet.
isSidecar := container.RestartPolicy != nil && *container.RestartPolicy == v1.ContainerRestartPolicyAlways
if s == nil &&
kuberuntime.HasAnyRegularContainerCreated(pod, podStatus) &&
!isSidecar &&
statuses[container.Name].State.Waiting != nil {
statuses[container.Name].State = v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
Reason: "Completed",
Message: "Unable to get init container status from container runtime and pod has been initialized, treat it as exited normally",
ExitCode: 0,
},
}
continue
}
}
// If a container should be restarted in next syncpod, it is *Waiting*.
if !kubecontainer.ShouldContainerBeRestarted(logger, &container, pod, podStatus) {

View file

@ -3797,6 +3797,29 @@ func TestConvertToAPIContainerStatuses(t *testing.T) {
},
RestartPolicy: v1.RestartPolicyAlways,
}
desiredStateWithInitContainer := v1.PodSpec{
NodeName: "machine",
InitContainers: []v1.Container{
{Name: "init-1"},
},
Containers: []v1.Container{
{Name: "containerA"},
},
RestartPolicy: v1.RestartPolicyAlways,
}
desiredStateWithSidecarContainer := v1.PodSpec{
NodeName: "machine",
InitContainers: []v1.Container{
{
Name: "sidecar-1",
RestartPolicy: ptr.To(v1.ContainerRestartPolicyAlways),
},
},
Containers: []v1.Container{
{Name: "containerA"},
},
RestartPolicy: v1.RestartPolicyAlways,
}
now := metav1.Now()
tests := []struct {
@ -4058,6 +4081,81 @@ func TestConvertToAPIContainerStatuses(t *testing.T) {
withRestartCount(waitingStateWithRestartingAllContainers("containerB"), 1),
},
},
{
name: "Unable to get init container status from container runtime and pod has been initialized, treat it as exited normally",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "my-pod",
},
Spec: desiredStateWithInitContainer,
Status: v1.PodStatus{
ContainerStatuses: []v1.ContainerStatus{},
},
},
currentStatus: &kubecontainer.PodStatus{
ContainerStatuses: []*kubecontainer.Status{
{
ID: kubecontainer.ContainerID{ID: "foo"},
Name: "containerA",
StartedAt: time.Unix(1, 0).UTC(),
State: kubecontainer.ContainerStateRunning,
},
},
},
previousStatus: []v1.ContainerStatus{},
containers: desiredStateWithInitContainer.InitContainers,
expected: []v1.ContainerStatus{
{
Name: "init-1",
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
Reason: "Completed",
Message: "Unable to get init container status from container runtime and pod has been initialized, treat it as exited normally",
ExitCode: 0,
},
},
},
},
hasInitContainers: true,
isInitContainer: true,
},
{
name: "Unable to get sidecar container status from container runtime and pod has been initialized, sidecar container should be waiting",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "my-pod",
},
Spec: desiredStateWithSidecarContainer,
Status: v1.PodStatus{
ContainerStatuses: []v1.ContainerStatus{},
},
},
currentStatus: &kubecontainer.PodStatus{
ContainerStatuses: []*kubecontainer.Status{
{
ID: kubecontainer.ContainerID{ID: "foo"},
Name: "containerA",
StartedAt: time.Unix(1, 0).UTC(),
State: kubecontainer.ContainerStateRunning,
},
},
},
previousStatus: []v1.ContainerStatus{},
containers: desiredStateWithSidecarContainer.InitContainers,
expected: []v1.ContainerStatus{
{
Name: "sidecar-1",
State: v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{
Reason: "PodInitializing",
Message: "",
},
},
},
},
hasInitContainers: true,
isInitContainer: true,
},
}
featuregatetesting.SetFeatureGatesDuringTest(t, utilfeature.DefaultFeatureGate, featuregatetesting.FeatureOverrides{
features.ContainerRestartRules: true,

View file

@ -1008,9 +1008,9 @@ func (m *kubeGenericRuntimeManager) purgeInitContainers(ctx context.Context, pod
}
}
// hasAnyRegularContainerCreated returns true if any regular container has been
// HasAnyRegularContainerCreated returns true if any regular container has been
// created, which indicates all init containers have been initialized.
func hasAnyRegularContainerCreated(pod *v1.Pod, podStatus *kubecontainer.PodStatus) bool {
func HasAnyRegularContainerCreated(pod *v1.Pod, podStatus *kubecontainer.PodStatus) bool {
for _, container := range pod.Spec.Containers {
status := podStatus.FindContainerStatusByName(container.Name)
if status == nil {

View file

@ -1188,7 +1188,7 @@ func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod *
// If there is any regular container, it means all init containers have
// been initialized.
hasInitialized = hasAnyRegularContainerCreated(pod, podStatus)
hasInitialized = HasAnyRegularContainerCreated(pod, podStatus)
if hasInitialized {
changes.CreateSandbox = false

View file

@ -28,6 +28,8 @@ import (
"strings"
"time"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
v1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
@ -43,9 +45,6 @@ import (
testutils "k8s.io/kubernetes/test/utils"
imageutils "k8s.io/kubernetes/test/utils/image"
admissionapi "k8s.io/pod-security-admission/api"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
)
var _ = SIGDescribe(feature.StandaloneMode, framework.WithFeatureGate(features.EnvFiles), func() {
@ -333,6 +332,79 @@ var _ = SIGDescribe(feature.StandaloneMode, func() {
return fmt.Errorf("pod (%v/%v) still exists", ns, staticPodName)
}).Should(gomega.Succeed())
})
f.Context("when the static pod has init container", f.WithSerial(), func() {
f.It("should be ready after init container is removed and kubelet restarts", func(ctx context.Context) {
ginkgo.By("create static pod")
staticPod := &v1.Pod{
TypeMeta: metav1.TypeMeta{
Kind: "Pod",
APIVersion: "v1",
},
ObjectMeta: metav1.ObjectMeta{
Name: "static",
Namespace: f.Namespace.Name,
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "main",
Image: imageutils.GetE2EImage(imageutils.Pause),
},
},
InitContainers: []v1.Container{
{
Name: "init",
Image: imageutils.GetE2EImage(imageutils.BusyBox),
Command: []string{"ls"},
},
},
},
}
staticPodName = staticPod.Name
podPath = kubeletCfg.StaticPodPath
ns = staticPod.Namespace
err := scheduleStaticPod(podPath, staticPod.Name, ns, staticPod)
framework.ExpectNoError(err)
var initCtrID string
var startTime *metav1.Time
ginkgo.By("wait for the mirror pod to be updated")
gomega.Eventually(ctx, func(g gomega.Gomega) {
pod, err := getPodFromStandaloneKubelet(ctx, staticPod.Namespace, staticPod.Name)
g.Expect(err).Should(gomega.Succeed())
g.Expect(pod.Status.InitContainerStatuses).To(gomega.HaveLen(1))
cstatus := pod.Status.InitContainerStatuses[0]
// Wait until the init container is terminated.
g.Expect(cstatus.State.Terminated).NotTo(gomega.BeNil())
g.Expect(cstatus.State.Terminated.ContainerID).NotTo(gomega.BeEmpty())
initCtrID = cstatus.ContainerID
startTime = pod.Status.StartTime
}, 2*time.Minute, 5*time.Second).Should(gomega.Succeed())
ginkgo.By("remove init container")
removeInitContainer(ctx, initCtrID)
ginkgo.By("restart kubelet")
restartKubelet(ctx, true)
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be started"))
ginkgo.By("wait for the mirror pod to be updated")
gomega.Eventually(ctx, func(g gomega.Gomega) {
pod, err := getPodFromStandaloneKubelet(ctx, staticPod.Namespace, staticPod.Name)
g.Expect(pod.Status.StartTime).NotTo(gomega.Equal(startTime))
g.Expect(err).Should(gomega.Succeed())
g.Expect(pod.Status.InitContainerStatuses).To(gomega.HaveLen(1))
cstatus := pod.Status.InitContainerStatuses[0]
// Init container should be completed.
g.Expect(cstatus.State.Terminated).NotTo(gomega.BeNil())
g.Expect(cstatus.State.Terminated.Reason).To(gomega.Equal("Completed"))
g.Expect(cstatus.State.Terminated.ExitCode).To(gomega.BeZero())
}, 2*time.Minute, 5*time.Second).Should(gomega.Succeed())
})
})
})
})

View file

@ -0,0 +1,222 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package e2enode
import (
"context"
"fmt"
"os"
"strings"
"time"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/cli-runtime/pkg/printers"
"k8s.io/kubernetes/test/e2e/framework"
imageutils "k8s.io/kubernetes/test/utils/image"
admissionapi "k8s.io/pod-security-admission/api"
"k8s.io/utils/ptr"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
)
var _ = SIGDescribe("StaticPod", framework.WithSerial(), func() {
f := framework.NewDefaultFramework("static-pod")
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
f.Context("when the static pod has init container", func() {
f.It("should be ready after init container is removed and kubelet restarts", f.WithNodeConformance(), func(ctx context.Context) {
ginkgo.By("create static pod")
staticPod := &v1.Pod{
TypeMeta: metav1.TypeMeta{
Kind: "Pod",
APIVersion: "v1",
},
ObjectMeta: metav1.ObjectMeta{
Name: "static",
Namespace: f.Namespace.Name,
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "main",
Image: imageutils.GetE2EImage(imageutils.Pause),
},
},
InitContainers: []v1.Container{
{
Name: "init",
Image: imageutils.GetE2EImage(imageutils.BusyBox),
Command: []string{"ls"},
},
},
},
}
staticPodPath, err := createStaticPodFromPod(kubeletCfg.StaticPodPath, staticPod)
framework.ExpectNoError(err)
ginkgo.DeferCleanup(func() {
ginkgo.By("delete static pod")
err = os.Remove(staticPodPath)
framework.ExpectNoError(err)
})
var initCtrID string
var startTime *metav1.Time
mirrorPodName := fmt.Sprintf("%s-%s", staticPod.Name, framework.TestContext.NodeName)
ginkgo.By("wait for the mirror pod to be updated")
gomega.Eventually(ctx, func(g gomega.Gomega) {
pod, err := f.ClientSet.CoreV1().Pods(staticPod.Namespace).Get(ctx, mirrorPodName, metav1.GetOptions{})
g.Expect(err).Should(gomega.Succeed())
g.Expect(pod.Status.InitContainerStatuses).To(gomega.HaveLen(1))
cstatus := pod.Status.InitContainerStatuses[0]
// Wait until the init container is terminated.
g.Expect(cstatus.State.Terminated).NotTo(gomega.BeNil())
g.Expect(cstatus.State.Terminated.ContainerID).NotTo(gomega.BeEmpty())
initCtrID = cstatus.ContainerID
startTime = pod.Status.StartTime
}, 2*time.Minute, 5*time.Second).Should(gomega.Succeed())
ginkgo.By("remove init container")
removeInitContainer(ctx, initCtrID)
ginkgo.By("restart kubelet")
startKubelet := mustStopKubelet(ctx, f)
startKubelet(ctx)
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be started"))
ginkgo.By("wait for the mirror pod to be updated")
gomega.Eventually(ctx, func(g gomega.Gomega) {
pod, err := f.ClientSet.CoreV1().Pods(staticPod.Namespace).Get(ctx, mirrorPodName, metav1.GetOptions{})
g.Expect(pod.Status.StartTime).NotTo(gomega.Equal(startTime))
g.Expect(err).Should(gomega.Succeed())
g.Expect(pod.Status.InitContainerStatuses).To(gomega.HaveLen(1))
cstatus := pod.Status.InitContainerStatuses[0]
// Init container should be completed.
g.Expect(cstatus.State.Terminated).NotTo(gomega.BeNil())
g.Expect(cstatus.State.Terminated.Reason).To(gomega.Equal("Completed"))
g.Expect(cstatus.State.Terminated.ExitCode).To(gomega.BeZero())
}, 2*time.Minute, 5*time.Second).Should(gomega.Succeed())
})
})
f.Context("when the static pod has sidecar container", func() {
f.It("should be ready after sidecar container is removed and kubelet restarts", f.WithNodeConformance(), func(ctx context.Context) {
ginkgo.By("create static pod")
staticPod := &v1.Pod{
TypeMeta: metav1.TypeMeta{
Kind: "Pod",
APIVersion: "v1",
},
ObjectMeta: metav1.ObjectMeta{
Name: "static",
Namespace: f.Namespace.Name,
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "main",
Image: imageutils.GetE2EImage(imageutils.Pause),
},
},
InitContainers: []v1.Container{
{
Name: "init",
Image: imageutils.GetE2EImage(imageutils.Pause),
RestartPolicy: ptr.To(v1.ContainerRestartPolicyAlways),
},
},
},
}
staticPodPath, err := createStaticPodFromPod(kubeletCfg.StaticPodPath, staticPod)
framework.ExpectNoError(err)
ginkgo.DeferCleanup(func() {
ginkgo.By("delete static pod")
err = os.Remove(staticPodPath)
framework.ExpectNoError(err)
})
var sidecarCtrID string
var startTime *metav1.Time
mirrorPodName := fmt.Sprintf("%s-%s", staticPod.Name, framework.TestContext.NodeName)
ginkgo.By("wait for the mirror pod to be updated")
gomega.Eventually(ctx, func(g gomega.Gomega) {
pod, err := f.ClientSet.CoreV1().Pods(staticPod.Namespace).Get(ctx, mirrorPodName, metav1.GetOptions{})
g.Expect(err).Should(gomega.Succeed())
g.Expect(pod.Status.InitContainerStatuses).To(gomega.HaveLen(1))
cstatus := pod.Status.InitContainerStatuses[0]
// Wait until the sidecar container starts running.
g.Expect(cstatus.State.Running).NotTo(gomega.BeNil())
sidecarCtrID = cstatus.ContainerID
startTime = pod.Status.StartTime
}, 2*time.Minute, 5*time.Second).Should(gomega.Succeed())
// Stop kubelet first not to restart the sidecar container.
ginkgo.By("stop kubelet")
startKubelet := mustStopKubelet(ctx, f)
ginkgo.By("remove sidecar container")
removeInitContainer(ctx, sidecarCtrID)
ginkgo.By("start kubelet")
startKubelet(ctx)
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be started"))
ginkgo.By("wait for the mirror pod to be updated")
gomega.Eventually(ctx, func(g gomega.Gomega) {
pod, err := f.ClientSet.CoreV1().Pods(staticPod.Namespace).Get(ctx, mirrorPodName, metav1.GetOptions{})
g.Expect(pod.Status.StartTime).NotTo(gomega.Equal(startTime))
g.Expect(err).Should(gomega.Succeed())
g.Expect(pod.Status.InitContainerStatuses).To(gomega.HaveLen(1))
cstatus := pod.Status.InitContainerStatuses[0]
// Sidecar container should be restarted and running.
g.Expect(cstatus.State.Running).NotTo(gomega.BeNil())
}, 2*time.Minute, 5*time.Second).Should(gomega.Succeed())
})
})
})
func createStaticPodFromPod(dir string, pod *v1.Pod) (string, error) {
name := pod.Name
namespace := pod.Namespace
file := staticPodPath(dir, name, namespace)
f, err := os.OpenFile(file, os.O_RDWR|os.O_TRUNC|os.O_CREATE, 0666)
if err != nil {
return "", err
}
defer func() {
_ = f.Close()
}()
y := printers.YAMLPrinter{}
return file, y.PrintObj(pod, f)
}
func removeInitContainer(ctx context.Context, ctrID string) {
cricli, _, err := getCRIClient()
framework.ExpectNoError(err)
splitID := strings.Split(ctrID, "://")
gomega.Expect(splitID).To(gomega.HaveLen(2))
ctrID = splitID[1]
// Make sure the container is stopped before removing it. This may fail.
_ = cricli.StopContainer(ctx, ctrID, 0)
err = cricli.RemoveContainer(ctx, ctrID)
framework.ExpectNoError(err)
}