Fix:Static pod status is always Init:0/1 if unable to get init container status from container runtime.

Signed-off-by: Ayato Tokubi <atokubi@redhat.com>
This commit is contained in:
V0dik 2024-01-21 16:55:32 +00:00 committed by Ayato Tokubi
parent c180d6762d
commit ce3f6b1d0e
5 changed files with 348 additions and 3 deletions

View file

@ -60,6 +60,7 @@ import (
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/envvars"
"k8s.io/kubernetes/pkg/kubelet/images"
"k8s.io/kubernetes/pkg/kubelet/kuberuntime"
"k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/kubernetes/pkg/kubelet/status"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
@ -2620,6 +2621,24 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
if s != nil && s.State == kubecontainer.ContainerStateExited && s.ExitCode == 0 {
continue
}
// When the pod is static, init container status might be lost.
// We can assume that the init containers are already completed if the main containers are created.
// When the init container is a sidecar container, it should be waiting (and will be restarted).
isSidecar := container.RestartPolicy != nil && *container.RestartPolicy == v1.ContainerRestartPolicyAlways
if s == nil &&
kubetypes.IsStaticPod(pod) &&
kuberuntime.HasAnyRegularContainerCreated(pod, podStatus) &&
!isSidecar &&
statuses[container.Name].State.Waiting != nil {
statuses[container.Name].State = v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
Reason: "Completed",
Message: "Unable to get init container status from container runtime and pod has been initialized, treat it as exited normally",
ExitCode: 0,
},
}
continue
}
}
// If a container should be restarted in next syncpod, it is *Waiting*.
if !kubecontainer.ShouldContainerBeRestarted(logger, &container, pod, podStatus) {

View file

@ -3796,6 +3796,29 @@ func TestConvertToAPIContainerStatuses(t *testing.T) {
},
RestartPolicy: v1.RestartPolicyAlways,
}
desiredStateWithInitContainer := v1.PodSpec{
NodeName: "machine",
InitContainers: []v1.Container{
{Name: "init-1"},
},
Containers: []v1.Container{
{Name: "containerA"},
},
RestartPolicy: v1.RestartPolicyAlways,
}
desiredStateWithSidecarContainer := v1.PodSpec{
NodeName: "machine",
InitContainers: []v1.Container{
{
Name: "sidecar-1",
RestartPolicy: ptr.To(v1.ContainerRestartPolicyAlways),
},
},
Containers: []v1.Container{
{Name: "containerA"},
},
RestartPolicy: v1.RestartPolicyAlways,
}
now := metav1.Now()
tests := []struct {
@ -4057,6 +4080,87 @@ func TestConvertToAPIContainerStatuses(t *testing.T) {
withRestartCount(waitingStateWithRestartingAllContainers("containerB"), 1),
},
},
{
name: "Unable to get init container status from container runtime and pod has been initialized, treat it as exited normally",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "my-pod",
Annotations: map[string]string{
kubetypes.ConfigSourceAnnotationKey: "file", // static pod
},
},
Spec: desiredStateWithInitContainer,
Status: v1.PodStatus{
ContainerStatuses: []v1.ContainerStatus{},
},
},
currentStatus: &kubecontainer.PodStatus{
ContainerStatuses: []*kubecontainer.Status{
{
ID: kubecontainer.ContainerID{ID: "foo"},
Name: "containerA",
StartedAt: time.Unix(1, 0).UTC(),
State: kubecontainer.ContainerStateRunning,
},
},
},
previousStatus: []v1.ContainerStatus{},
containers: desiredStateWithInitContainer.InitContainers,
expected: []v1.ContainerStatus{
{
Name: "init-1",
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
Reason: "Completed",
Message: "Unable to get init container status from container runtime and pod has been initialized, treat it as exited normally",
ExitCode: 0,
},
},
},
},
hasInitContainers: true,
isInitContainer: true,
},
{
name: "Unable to get sidecar container status from container runtime and pod has been initialized, sidecar container should be waiting",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "my-pod",
Annotations: map[string]string{
kubetypes.ConfigSourceAnnotationKey: "file", // static pod
},
},
Spec: desiredStateWithSidecarContainer,
Status: v1.PodStatus{
ContainerStatuses: []v1.ContainerStatus{},
},
},
currentStatus: &kubecontainer.PodStatus{
ContainerStatuses: []*kubecontainer.Status{
{
ID: kubecontainer.ContainerID{ID: "foo"},
Name: "containerA",
StartedAt: time.Unix(1, 0).UTC(),
State: kubecontainer.ContainerStateRunning,
},
},
},
previousStatus: []v1.ContainerStatus{},
containers: desiredStateWithSidecarContainer.InitContainers,
expected: []v1.ContainerStatus{
{
Name: "sidecar-1",
State: v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{
Reason: "PodInitializing",
Message: "",
},
},
},
},
hasInitContainers: true,
isInitContainer: true,
},
}
featuregatetesting.SetFeatureGatesDuringTest(t, utilfeature.DefaultFeatureGate, featuregatetesting.FeatureOverrides{
features.ContainerRestartRules: true,

View file

@ -1009,9 +1009,9 @@ func (m *kubeGenericRuntimeManager) purgeInitContainers(ctx context.Context, pod
}
}
// hasAnyRegularContainerCreated returns true if any regular container has been
// HasAnyRegularContainerCreated returns true if any regular container has been
// created, which indicates all init containers have been initialized.
func hasAnyRegularContainerCreated(pod *v1.Pod, podStatus *kubecontainer.PodStatus) bool {
func HasAnyRegularContainerCreated(pod *v1.Pod, podStatus *kubecontainer.PodStatus) bool {
for _, container := range pod.Spec.Containers {
status := podStatus.FindContainerStatusByName(container.Name)
if status == nil {

View file

@ -1188,7 +1188,7 @@ func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod *
// If there is any regular container, it means all init containers have
// been initialized.
hasInitialized = hasAnyRegularContainerCreated(pod, podStatus)
hasInitialized = HasAnyRegularContainerCreated(pod, podStatus)
if hasInitialized {
changes.CreateSandbox = false

View file

@ -0,0 +1,222 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package e2enode
import (
"context"
"fmt"
"os"
"strings"
"time"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/cli-runtime/pkg/printers"
"k8s.io/kubernetes/test/e2e/framework"
imageutils "k8s.io/kubernetes/test/utils/image"
admissionapi "k8s.io/pod-security-admission/api"
"k8s.io/utils/ptr"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
)
var _ = SIGDescribe("StaticPod", framework.WithSerial(), func() {
f := framework.NewDefaultFramework("static-pod")
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
f.Context("when the static pod has init container", func() {
f.It("should be ready after init container is removed and kubelet restarts", f.WithNodeConformance(), func(ctx context.Context) {
ginkgo.By("create static pod")
staticPod := &v1.Pod{
TypeMeta: metav1.TypeMeta{
Kind: "Pod",
APIVersion: "v1",
},
ObjectMeta: metav1.ObjectMeta{
Name: "static",
Namespace: f.Namespace.Name,
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "main",
Image: imageutils.GetE2EImage(imageutils.Pause),
},
},
InitContainers: []v1.Container{
{
Name: "init",
Image: imageutils.GetE2EImage(imageutils.BusyBox),
Command: []string{"ls"},
},
},
},
}
staticPodPath, err := createStaticPodFromPod(kubeletCfg.StaticPodPath, staticPod)
framework.ExpectNoError(err)
ginkgo.DeferCleanup(func() {
ginkgo.By("delete static pod")
err = os.Remove(staticPodPath)
framework.ExpectNoError(err)
})
var initCtrID string
var startTime *metav1.Time
mirrorPodName := fmt.Sprintf("%s-%s", staticPod.Name, framework.TestContext.NodeName)
ginkgo.By("wait for the mirror pod to be updated")
gomega.Eventually(ctx, func(g gomega.Gomega) {
pod, err := f.ClientSet.CoreV1().Pods(staticPod.Namespace).Get(ctx, mirrorPodName, metav1.GetOptions{})
g.Expect(err).Should(gomega.Succeed())
g.Expect(pod.Status.InitContainerStatuses).To(gomega.HaveLen(1))
cstatus := pod.Status.InitContainerStatuses[0]
// Wait until the init container is terminated.
g.Expect(cstatus.State.Terminated).NotTo(gomega.BeNil())
g.Expect(cstatus.State.Terminated.ContainerID).NotTo(gomega.BeEmpty())
initCtrID = cstatus.ContainerID
startTime = pod.Status.StartTime
}, 2*time.Minute, 5*time.Second).Should(gomega.Succeed())
ginkgo.By("remove init container")
removeInitContainer(ctx, initCtrID)
ginkgo.By("restart kubelet")
startKubelet := mustStopKubelet(ctx, f)
startKubelet(ctx)
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be started"))
ginkgo.By("wait for the mirror pod to be updated")
gomega.Eventually(ctx, func(g gomega.Gomega) {
pod, err := f.ClientSet.CoreV1().Pods(staticPod.Namespace).Get(ctx, mirrorPodName, metav1.GetOptions{})
g.Expect(pod.Status.StartTime).NotTo(gomega.Equal(startTime))
g.Expect(err).Should(gomega.Succeed())
g.Expect(pod.Status.InitContainerStatuses).To(gomega.HaveLen(1))
cstatus := pod.Status.InitContainerStatuses[0]
// Init container should be completed.
g.Expect(cstatus.State.Terminated).NotTo(gomega.BeNil())
g.Expect(cstatus.State.Terminated.Reason).To(gomega.Equal("Completed"))
g.Expect(cstatus.State.Terminated.ExitCode).To(gomega.BeZero())
}, 2*time.Minute, 5*time.Second).Should(gomega.Succeed())
})
})
f.Context("when the static pod has sidecar container", func() {
f.It("should be ready after sidecar container is removed and kubelet restarts", f.WithNodeConformance(), func(ctx context.Context) {
ginkgo.By("create static pod")
staticPod := &v1.Pod{
TypeMeta: metav1.TypeMeta{
Kind: "Pod",
APIVersion: "v1",
},
ObjectMeta: metav1.ObjectMeta{
Name: "static",
Namespace: f.Namespace.Name,
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "main",
Image: imageutils.GetE2EImage(imageutils.Pause),
},
},
InitContainers: []v1.Container{
{
Name: "init",
Image: imageutils.GetE2EImage(imageutils.Pause),
RestartPolicy: ptr.To(v1.ContainerRestartPolicyAlways),
},
},
},
}
staticPodPath, err := createStaticPodFromPod(kubeletCfg.StaticPodPath, staticPod)
framework.ExpectNoError(err)
ginkgo.DeferCleanup(func() {
ginkgo.By("delete static pod")
err = os.Remove(staticPodPath)
framework.ExpectNoError(err)
})
var sidecarCtrID string
var startTime *metav1.Time
mirrorPodName := fmt.Sprintf("%s-%s", staticPod.Name, framework.TestContext.NodeName)
ginkgo.By("wait for the mirror pod to be updated")
gomega.Eventually(ctx, func(g gomega.Gomega) {
pod, err := f.ClientSet.CoreV1().Pods(staticPod.Namespace).Get(ctx, mirrorPodName, metav1.GetOptions{})
g.Expect(err).Should(gomega.Succeed())
g.Expect(pod.Status.InitContainerStatuses).To(gomega.HaveLen(1))
cstatus := pod.Status.InitContainerStatuses[0]
// Wait until the sidecar container starts running.
g.Expect(cstatus.State.Running).NotTo(gomega.BeNil())
sidecarCtrID = cstatus.ContainerID
startTime = pod.Status.StartTime
}, 2*time.Minute, 5*time.Second).Should(gomega.Succeed())
// Stop kubelet first not to restart the sidecar container.
ginkgo.By("stop kubelet")
startKubelet := mustStopKubelet(ctx, f)
ginkgo.By("remove sidecar container")
removeInitContainer(ctx, sidecarCtrID)
ginkgo.By("start kubelet")
startKubelet(ctx)
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be started"))
ginkgo.By("wait for the mirror pod to be updated")
gomega.Eventually(ctx, func(g gomega.Gomega) {
pod, err := f.ClientSet.CoreV1().Pods(staticPod.Namespace).Get(ctx, mirrorPodName, metav1.GetOptions{})
g.Expect(pod.Status.StartTime).NotTo(gomega.Equal(startTime))
g.Expect(err).Should(gomega.Succeed())
g.Expect(pod.Status.InitContainerStatuses).To(gomega.HaveLen(1))
cstatus := pod.Status.InitContainerStatuses[0]
// Sidecar container should be restarted and running.
g.Expect(cstatus.State.Running).NotTo(gomega.BeNil())
}, 2*time.Minute, 5*time.Second).Should(gomega.Succeed())
})
})
})
func createStaticPodFromPod(dir string, pod *v1.Pod) (string, error) {
name := pod.Name
namespace := pod.Namespace
file := staticPodPath(dir, name, namespace)
f, err := os.OpenFile(file, os.O_RDWR|os.O_TRUNC|os.O_CREATE, 0666)
if err != nil {
return "", err
}
defer func() {
_ = f.Close()
}()
y := printers.YAMLPrinter{}
return file, y.PrintObj(pod, f)
}
func removeInitContainer(ctx context.Context, ctrID string) {
cricli, _, err := getCRIClient()
framework.ExpectNoError(err)
splitID := strings.Split(ctrID, "://")
gomega.Expect(splitID).To(gomega.HaveLen(2))
ctrID = splitID[1]
// Make sure the container is stopped before removing it. This may fail.
_ = cricli.StopContainer(ctx, ctrID, 0)
err = cricli.RemoveContainer(ctx, ctrID)
framework.ExpectNoError(err)
}