diff --git a/pkg/kubelet/prober/worker.go b/pkg/kubelet/prober/worker.go index 35404eff758..4863cf91f7a 100644 --- a/pkg/kubelet/prober/worker.go +++ b/pkg/kubelet/prober/worker.go @@ -243,12 +243,17 @@ func (w *worker) doProbe(ctx context.Context) (keepGoing bool) { } } + isRestartableInitContainer := w.isInitContainer() && + w.container.RestartPolicy != nil && + *w.container.RestartPolicy == v1.ContainerRestartPolicyAlways + if w.containerID.String() != c.ContainerID { if !w.containerID.IsEmpty() { w.resultsManager.Remove(w.containerID) } w.containerID = kubecontainer.ParseContainerID(c.ContainerID) + if !utilfeature.DefaultFeatureGate.Enabled(features.ChangeContainerStatusOnKubeletRestart) { // On kubelet restart, we don't want to immediately set the probe result to Failure, // as this could cause a container that was Ready to become NotReady. @@ -259,6 +264,18 @@ func (w *worker) doProbe(ctx context.Context) (keepGoing bool) { isRestart = true } } + + // For restartable init containers (sidecars) with a startup probe, seed the + // startup result with Success if the container had already started before the + // kubelet restarted. This ensures startupManager.Get() returns a valid result, + // allowing computeInitContainerActions to detect pod initialization. + // See https://github.com/kubernetes/kubernetes/issues/136910 + if isRestartableInitContainer && w.probeType == startup { + if c.Started != nil && *c.Started { + w.resultsManager.Set(w.containerID, results.Success, w.pod) + } + } + if !isRestart { w.resultsManager.Set(w.containerID, w.initialValue, w.pod) } @@ -282,9 +299,6 @@ func (w *worker) doProbe(ctx context.Context) (keepGoing bool) { w.resultsManager.Set(w.containerID, results.Failure, w.pod) } - isRestartableInitContainer := w.isInitContainer() && - w.container.RestartPolicy != nil && *w.container.RestartPolicy == v1.ContainerRestartPolicyAlways - // Abort if the container will not be restarted. if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) { if c.State.Terminated == nil { diff --git a/pkg/kubelet/prober/worker_test.go b/pkg/kubelet/prober/worker_test.go index 243140b7a10..a9d5e8802d1 100644 --- a/pkg/kubelet/prober/worker_test.go +++ b/pkg/kubelet/prober/worker_test.go @@ -908,6 +908,8 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) { probeType probeType initialValue results.Result expectSet bool + isSidecar bool + expectedResult results.Result // only checked if expectSet is true }{ { name: "feature enabled, is restart, readiness", @@ -916,6 +918,8 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) { probeType: readiness, initialValue: results.Failure, expectSet: true, + isSidecar: false, + expectedResult: results.Failure, }, { name: "feature enabled, is restart, liveness", @@ -924,6 +928,8 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) { probeType: liveness, initialValue: results.Success, expectSet: true, + isSidecar: false, + expectedResult: results.Success, }, { name: "feature enabled, is restart, startup", @@ -932,6 +938,8 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) { probeType: startup, initialValue: results.Unknown, expectSet: true, + isSidecar: false, + expectedResult: results.Unknown, }, { name: "feature enabled, not restart, readiness", @@ -940,6 +948,8 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) { probeType: readiness, initialValue: results.Failure, expectSet: true, + isSidecar: false, + expectedResult: results.Failure, }, { name: "feature enabled, not restart, liveness", @@ -948,6 +958,8 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) { probeType: liveness, initialValue: results.Success, expectSet: true, + isSidecar: false, + expectedResult: results.Success, }, { name: "feature enabled, not restart, startup", @@ -956,6 +968,8 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) { probeType: startup, initialValue: results.Unknown, expectSet: true, + isSidecar: false, + expectedResult: results.Unknown, }, { name: "feature disabled, is restart, readiness", @@ -963,6 +977,7 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) { isRestart: true, probeType: readiness, expectSet: false, + isSidecar: false, }, { name: "feature disabled, is restart, liveness", @@ -970,6 +985,7 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) { isRestart: true, probeType: liveness, expectSet: false, + isSidecar: false, }, { name: "feature disabled, is restart, startup", @@ -977,6 +993,7 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) { isRestart: true, probeType: startup, expectSet: false, + isSidecar: false, }, { name: "feature disabled, not restart, readiness", @@ -985,6 +1002,8 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) { probeType: readiness, initialValue: results.Failure, expectSet: true, + isSidecar: false, + expectedResult: results.Failure, }, { name: "feature disabled, not restart, liveness", @@ -993,6 +1012,8 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) { probeType: liveness, initialValue: results.Success, expectSet: true, + isSidecar: false, + expectedResult: results.Success, }, { name: "feature disabled, not restart, startup", @@ -1001,6 +1022,37 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) { probeType: startup, initialValue: results.Unknown, expectSet: true, + isSidecar: false, + expectedResult: results.Unknown, + }, + // Sidecar tests - regression for https://github.com/kubernetes/kubernetes/issues/136910 + { + name: "feature disabled, is restart, startup, sidecar", + featureEnabled: false, + isRestart: true, + probeType: startup, + initialValue: results.Unknown, + expectSet: true, + isSidecar: true, + expectedResult: results.Success, // Sidecars get Success, not initialValue + }, + { + name: "feature disabled, is restart, liveness, sidecar", + featureEnabled: false, + isRestart: true, + probeType: liveness, + initialValue: results.Success, + expectSet: false, // Readiness/liveness probes not set for sidecars on restart + isSidecar: true, + }, + { + name: "feature disabled, is restart, readiness, sidecar", + featureEnabled: false, + isRestart: true, + probeType: readiness, + initialValue: results.Failure, + expectSet: false, // Readiness/liveness probes not set for sidecars on restart + isSidecar: true, }, } @@ -1009,27 +1061,48 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) { featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ChangeContainerStatusOnKubeletRestart, tc.featureEnabled) m := newTestManager() - podStatus := getTestRunningStatus() - podStatus.ContainerStatuses[0].ContainerID = "test://container-id" - if tc.isRestart { - podStatus.ContainerStatuses[0].State.Running.StartedAt = metav1.Time{Time: m.start.Add(-5 * time.Minute)} - } else { - podStatus.ContainerStatuses[0].State.Running.StartedAt = metav1.Time{Time: m.start.Add(5 * time.Minute)} - } - w := newTestWorker(m, tc.probeType, v1.Probe{InitialDelaySeconds: 1000}) - m.statusManager.SetPodStatus(logger, w.pod, podStatus) + var w *worker + var containerID kubecontainer.ContainerID + + if tc.isSidecar { + w = newTestWorkerWithRestartableInitContainer(m, tc.probeType) + w.spec = &v1.Probe{InitialDelaySeconds: 1000} + // For sidecar, we need init container status + podStatus := getTestRunningStatus() + // Move container from regular to init containers (it's a sidecar) + podStatus.InitContainerStatuses = []v1.ContainerStatus{podStatus.ContainerStatuses[0]} + podStatus.ContainerStatuses = nil + podStatus.InitContainerStatuses[0].ContainerID = "test://container-id" + if tc.isRestart { + podStatus.InitContainerStatuses[0].State.Running.StartedAt = metav1.Time{Time: m.start.Add(-5 * time.Minute)} + } else { + podStatus.InitContainerStatuses[0].State.Running.StartedAt = metav1.Time{Time: m.start.Add(5 * time.Minute)} + } + m.statusManager.SetPodStatus(logger, w.pod, podStatus) + containerID = kubecontainer.ParseContainerID(podStatus.InitContainerStatuses[0].ContainerID) + } else { + podStatus := getTestRunningStatus() + podStatus.ContainerStatuses[0].ContainerID = "test://container-id" + if tc.isRestart { + podStatus.ContainerStatuses[0].State.Running.StartedAt = metav1.Time{Time: m.start.Add(-5 * time.Minute)} + } else { + podStatus.ContainerStatuses[0].State.Running.StartedAt = metav1.Time{Time: m.start.Add(5 * time.Minute)} + } + w = newTestWorker(m, tc.probeType, v1.Probe{InitialDelaySeconds: 1000}) + m.statusManager.SetPodStatus(logger, w.pod, podStatus) + containerID = kubecontainer.ParseContainerID(podStatus.ContainerStatuses[0].ContainerID) + } w.doProbe(ctx) - containerID := kubecontainer.ParseContainerID(podStatus.ContainerStatuses[0].ContainerID) result, ok := resultsManager(m, tc.probeType).Get(containerID) if ok != tc.expectSet { t.Errorf("Expected result to be set: %v, but got: %v", tc.expectSet, ok) } - if tc.expectSet && result != tc.initialValue { - t.Errorf("Expected result %v, but got: %v", tc.initialValue, result) + if tc.expectSet && result != tc.expectedResult { + t.Errorf("Expected result %v, but got: %v", tc.expectedResult, result) } }) } diff --git a/test/e2e_node/container_lifecycle_test.go b/test/e2e_node/container_lifecycle_test.go index 6e680764ab5..2376b062067 100644 --- a/test/e2e_node/container_lifecycle_test.go +++ b/test/e2e_node/container_lifecycle_test.go @@ -6931,4 +6931,104 @@ var _ = SIGDescribe(framework.WithSerial(), "Not Change Container Status", frame testKubeletRestartForRestartableInit(ctx, pod) }) }) + + // Regression test for https://github.com/kubernetes/kubernetes/issues/136910 + var _ = SIGDescribe(framework.WithSerial(), "Sidecar container restart after kubelet restart", framework.WithFeatureGate(features.ChangeContainerStatusOnKubeletRestart), func() { + f := framework.NewDefaultFramework("sidecar-container-restart-test-serial") + addAfterEachForCleaningUpPods(f) + f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged + + ginkgo.It("should restart regular containers when sidecar with startupProbe is running after kubelet restart", func(ctx context.Context) { + containerRestartPolicyAlways := v1.ContainerRestartPolicyAlways + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-with-sidecar-and-crashing-container", + }, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyAlways, + InitContainers: []v1.Container{ + { + Name: "sidecar", + Image: defaultImage, + Command: []string{"sh", "-c", "while true; do sleep 1; done"}, + RestartPolicy: &containerRestartPolicyAlways, + StartupProbe: &v1.Probe{ + ProbeHandler: v1.ProbeHandler{ + Exec: &v1.ExecAction{ + Command: []string{"/bin/true"}, + }, + }, + InitialDelaySeconds: 1, + PeriodSeconds: 1, + }, + }, + }, + Containers: []v1.Container{ + { + Name: "crasher", + Image: defaultImage, + Command: []string{"/bin/sh", "-c", "trap 'exit 0' TERM; sleep 3600 & wait $!"}, + }, + }, + }, + } + + client := e2epod.NewPodClient(f) + pod = client.Create(ctx, pod) + + ginkgo.By("Waiting for the pod to be running and ready") + err := e2epod.WaitForPodCondition(ctx, f.ClientSet, pod.Namespace, pod.Name, "PodReady", f.Timeouts.PodStart, + func(p *v1.Pod) (bool, error) { + if p.Status.Phase != v1.PodRunning { + return false, nil + } + for _, cond := range p.Status.Conditions { + if cond.Type == v1.PodReady && cond.Status == v1.ConditionTrue { + return true, nil + } + } + return false, nil + }) + framework.ExpectNoError(err) + + // The grace period for kubelet startup is 10 seconds, so we wait here for 11 seconds. + time.Sleep(time.Second * 11) + + ginkgo.By("restarting the kubelet") + restartKubelet := mustStopKubelet(ctx, f) + restartKubelet(ctx) + + ginkgo.By("ensuring kubelet is healthy") + gomega.Eventually(ctx, func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be started")) + + ginkgo.By("Sending SIGTERM to PID 1 in the crasher container") + _, _, err = e2epod.ExecCommandInContainerWithFullOutput(f, pod.Name, "crasher", "kill", "1") + framework.ExpectNoError(err, "failed to send SIGTERM to PID 1 in crasher container") + + ginkgo.By("Waiting for the container to restart and be running again") + err = e2epod.WaitForPodCondition(ctx, f.ClientSet, pod.Namespace, pod.Name, "ContainerRunningAfterRestart", f.Timeouts.PodStart, + func(p *v1.Pod) (bool, error) { + for _, status := range p.Status.ContainerStatuses { + if status.Name == "crasher" && status.RestartCount > 0 && status.State.Running != nil { + return true, nil + } + } + return false, nil + }) + framework.ExpectNoError(err, "container should be restarted and running again") + + ginkgo.By("Verifying sidecar is still running") + p, err := client.Get(ctx, pod.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + gomega.Expect(p.Status.InitContainerStatuses).ToNot(gomega.BeEmpty()) + for _, status := range p.Status.InitContainerStatuses { + if status.Name == "sidecar" { + gomega.Expect(status.State.Running).ToNot(gomega.BeNil(), "sidecar should still be running") + gomega.Expect(status.RestartCount).To(gomega.BeZero(), "sidecar should not have restarted") + } + } + }) + }) })