mirror of
https://github.com/kubernetes/kubernetes.git
synced 2026-05-28 04:04:39 -04:00
Merge pull request #137146 from george-angel/fix/container-restart-sidecar-exited
kubelet: fix containers not restarting when sidecar keeps running
This commit is contained in:
commit
88573def71
3 changed files with 202 additions and 15 deletions
|
|
@ -243,12 +243,17 @@ func (w *worker) doProbe(ctx context.Context) (keepGoing bool) {
|
|||
}
|
||||
}
|
||||
|
||||
isRestartableInitContainer := w.isInitContainer() &&
|
||||
w.container.RestartPolicy != nil &&
|
||||
*w.container.RestartPolicy == v1.ContainerRestartPolicyAlways
|
||||
|
||||
if w.containerID.String() != c.ContainerID {
|
||||
if !w.containerID.IsEmpty() {
|
||||
w.resultsManager.Remove(w.containerID)
|
||||
}
|
||||
|
||||
w.containerID = kubecontainer.ParseContainerID(c.ContainerID)
|
||||
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.ChangeContainerStatusOnKubeletRestart) {
|
||||
// On kubelet restart, we don't want to immediately set the probe result to Failure,
|
||||
// as this could cause a container that was Ready to become NotReady.
|
||||
|
|
@ -259,6 +264,18 @@ func (w *worker) doProbe(ctx context.Context) (keepGoing bool) {
|
|||
isRestart = true
|
||||
}
|
||||
}
|
||||
|
||||
// For restartable init containers (sidecars) with a startup probe, seed the
|
||||
// startup result with Success if the container had already started before the
|
||||
// kubelet restarted. This ensures startupManager.Get() returns a valid result,
|
||||
// allowing computeInitContainerActions to detect pod initialization.
|
||||
// See https://github.com/kubernetes/kubernetes/issues/136910
|
||||
if isRestartableInitContainer && w.probeType == startup {
|
||||
if c.Started != nil && *c.Started {
|
||||
w.resultsManager.Set(w.containerID, results.Success, w.pod)
|
||||
}
|
||||
}
|
||||
|
||||
if !isRestart {
|
||||
w.resultsManager.Set(w.containerID, w.initialValue, w.pod)
|
||||
}
|
||||
|
|
@ -282,9 +299,6 @@ func (w *worker) doProbe(ctx context.Context) (keepGoing bool) {
|
|||
w.resultsManager.Set(w.containerID, results.Failure, w.pod)
|
||||
}
|
||||
|
||||
isRestartableInitContainer := w.isInitContainer() &&
|
||||
w.container.RestartPolicy != nil && *w.container.RestartPolicy == v1.ContainerRestartPolicyAlways
|
||||
|
||||
// Abort if the container will not be restarted.
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules) {
|
||||
if c.State.Terminated == nil {
|
||||
|
|
|
|||
|
|
@ -908,6 +908,8 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) {
|
|||
probeType probeType
|
||||
initialValue results.Result
|
||||
expectSet bool
|
||||
isSidecar bool
|
||||
expectedResult results.Result // only checked if expectSet is true
|
||||
}{
|
||||
{
|
||||
name: "feature enabled, is restart, readiness",
|
||||
|
|
@ -916,6 +918,8 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) {
|
|||
probeType: readiness,
|
||||
initialValue: results.Failure,
|
||||
expectSet: true,
|
||||
isSidecar: false,
|
||||
expectedResult: results.Failure,
|
||||
},
|
||||
{
|
||||
name: "feature enabled, is restart, liveness",
|
||||
|
|
@ -924,6 +928,8 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) {
|
|||
probeType: liveness,
|
||||
initialValue: results.Success,
|
||||
expectSet: true,
|
||||
isSidecar: false,
|
||||
expectedResult: results.Success,
|
||||
},
|
||||
{
|
||||
name: "feature enabled, is restart, startup",
|
||||
|
|
@ -932,6 +938,8 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) {
|
|||
probeType: startup,
|
||||
initialValue: results.Unknown,
|
||||
expectSet: true,
|
||||
isSidecar: false,
|
||||
expectedResult: results.Unknown,
|
||||
},
|
||||
{
|
||||
name: "feature enabled, not restart, readiness",
|
||||
|
|
@ -940,6 +948,8 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) {
|
|||
probeType: readiness,
|
||||
initialValue: results.Failure,
|
||||
expectSet: true,
|
||||
isSidecar: false,
|
||||
expectedResult: results.Failure,
|
||||
},
|
||||
{
|
||||
name: "feature enabled, not restart, liveness",
|
||||
|
|
@ -948,6 +958,8 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) {
|
|||
probeType: liveness,
|
||||
initialValue: results.Success,
|
||||
expectSet: true,
|
||||
isSidecar: false,
|
||||
expectedResult: results.Success,
|
||||
},
|
||||
{
|
||||
name: "feature enabled, not restart, startup",
|
||||
|
|
@ -956,6 +968,8 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) {
|
|||
probeType: startup,
|
||||
initialValue: results.Unknown,
|
||||
expectSet: true,
|
||||
isSidecar: false,
|
||||
expectedResult: results.Unknown,
|
||||
},
|
||||
{
|
||||
name: "feature disabled, is restart, readiness",
|
||||
|
|
@ -963,6 +977,7 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) {
|
|||
isRestart: true,
|
||||
probeType: readiness,
|
||||
expectSet: false,
|
||||
isSidecar: false,
|
||||
},
|
||||
{
|
||||
name: "feature disabled, is restart, liveness",
|
||||
|
|
@ -970,6 +985,7 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) {
|
|||
isRestart: true,
|
||||
probeType: liveness,
|
||||
expectSet: false,
|
||||
isSidecar: false,
|
||||
},
|
||||
{
|
||||
name: "feature disabled, is restart, startup",
|
||||
|
|
@ -977,6 +993,7 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) {
|
|||
isRestart: true,
|
||||
probeType: startup,
|
||||
expectSet: false,
|
||||
isSidecar: false,
|
||||
},
|
||||
{
|
||||
name: "feature disabled, not restart, readiness",
|
||||
|
|
@ -985,6 +1002,8 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) {
|
|||
probeType: readiness,
|
||||
initialValue: results.Failure,
|
||||
expectSet: true,
|
||||
isSidecar: false,
|
||||
expectedResult: results.Failure,
|
||||
},
|
||||
{
|
||||
name: "feature disabled, not restart, liveness",
|
||||
|
|
@ -993,6 +1012,8 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) {
|
|||
probeType: liveness,
|
||||
initialValue: results.Success,
|
||||
expectSet: true,
|
||||
isSidecar: false,
|
||||
expectedResult: results.Success,
|
||||
},
|
||||
{
|
||||
name: "feature disabled, not restart, startup",
|
||||
|
|
@ -1001,6 +1022,37 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) {
|
|||
probeType: startup,
|
||||
initialValue: results.Unknown,
|
||||
expectSet: true,
|
||||
isSidecar: false,
|
||||
expectedResult: results.Unknown,
|
||||
},
|
||||
// Sidecar tests - regression for https://github.com/kubernetes/kubernetes/issues/136910
|
||||
{
|
||||
name: "feature disabled, is restart, startup, sidecar",
|
||||
featureEnabled: false,
|
||||
isRestart: true,
|
||||
probeType: startup,
|
||||
initialValue: results.Unknown,
|
||||
expectSet: true,
|
||||
isSidecar: true,
|
||||
expectedResult: results.Success, // Sidecars get Success, not initialValue
|
||||
},
|
||||
{
|
||||
name: "feature disabled, is restart, liveness, sidecar",
|
||||
featureEnabled: false,
|
||||
isRestart: true,
|
||||
probeType: liveness,
|
||||
initialValue: results.Success,
|
||||
expectSet: false, // Readiness/liveness probes not set for sidecars on restart
|
||||
isSidecar: true,
|
||||
},
|
||||
{
|
||||
name: "feature disabled, is restart, readiness, sidecar",
|
||||
featureEnabled: false,
|
||||
isRestart: true,
|
||||
probeType: readiness,
|
||||
initialValue: results.Failure,
|
||||
expectSet: false, // Readiness/liveness probes not set for sidecars on restart
|
||||
isSidecar: true,
|
||||
},
|
||||
}
|
||||
|
||||
|
|
@ -1009,27 +1061,48 @@ func TestChangeContainerStatusOnKubeletRestart(t *testing.T) {
|
|||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ChangeContainerStatusOnKubeletRestart, tc.featureEnabled)
|
||||
|
||||
m := newTestManager()
|
||||
podStatus := getTestRunningStatus()
|
||||
podStatus.ContainerStatuses[0].ContainerID = "test://container-id"
|
||||
if tc.isRestart {
|
||||
podStatus.ContainerStatuses[0].State.Running.StartedAt = metav1.Time{Time: m.start.Add(-5 * time.Minute)}
|
||||
} else {
|
||||
podStatus.ContainerStatuses[0].State.Running.StartedAt = metav1.Time{Time: m.start.Add(5 * time.Minute)}
|
||||
}
|
||||
|
||||
w := newTestWorker(m, tc.probeType, v1.Probe{InitialDelaySeconds: 1000})
|
||||
m.statusManager.SetPodStatus(logger, w.pod, podStatus)
|
||||
var w *worker
|
||||
var containerID kubecontainer.ContainerID
|
||||
|
||||
if tc.isSidecar {
|
||||
w = newTestWorkerWithRestartableInitContainer(m, tc.probeType)
|
||||
w.spec = &v1.Probe{InitialDelaySeconds: 1000}
|
||||
// For sidecar, we need init container status
|
||||
podStatus := getTestRunningStatus()
|
||||
// Move container from regular to init containers (it's a sidecar)
|
||||
podStatus.InitContainerStatuses = []v1.ContainerStatus{podStatus.ContainerStatuses[0]}
|
||||
podStatus.ContainerStatuses = nil
|
||||
podStatus.InitContainerStatuses[0].ContainerID = "test://container-id"
|
||||
if tc.isRestart {
|
||||
podStatus.InitContainerStatuses[0].State.Running.StartedAt = metav1.Time{Time: m.start.Add(-5 * time.Minute)}
|
||||
} else {
|
||||
podStatus.InitContainerStatuses[0].State.Running.StartedAt = metav1.Time{Time: m.start.Add(5 * time.Minute)}
|
||||
}
|
||||
m.statusManager.SetPodStatus(logger, w.pod, podStatus)
|
||||
containerID = kubecontainer.ParseContainerID(podStatus.InitContainerStatuses[0].ContainerID)
|
||||
} else {
|
||||
podStatus := getTestRunningStatus()
|
||||
podStatus.ContainerStatuses[0].ContainerID = "test://container-id"
|
||||
if tc.isRestart {
|
||||
podStatus.ContainerStatuses[0].State.Running.StartedAt = metav1.Time{Time: m.start.Add(-5 * time.Minute)}
|
||||
} else {
|
||||
podStatus.ContainerStatuses[0].State.Running.StartedAt = metav1.Time{Time: m.start.Add(5 * time.Minute)}
|
||||
}
|
||||
w = newTestWorker(m, tc.probeType, v1.Probe{InitialDelaySeconds: 1000})
|
||||
m.statusManager.SetPodStatus(logger, w.pod, podStatus)
|
||||
containerID = kubecontainer.ParseContainerID(podStatus.ContainerStatuses[0].ContainerID)
|
||||
}
|
||||
|
||||
w.doProbe(ctx)
|
||||
|
||||
containerID := kubecontainer.ParseContainerID(podStatus.ContainerStatuses[0].ContainerID)
|
||||
result, ok := resultsManager(m, tc.probeType).Get(containerID)
|
||||
|
||||
if ok != tc.expectSet {
|
||||
t.Errorf("Expected result to be set: %v, but got: %v", tc.expectSet, ok)
|
||||
}
|
||||
if tc.expectSet && result != tc.initialValue {
|
||||
t.Errorf("Expected result %v, but got: %v", tc.initialValue, result)
|
||||
if tc.expectSet && result != tc.expectedResult {
|
||||
t.Errorf("Expected result %v, but got: %v", tc.expectedResult, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6931,4 +6931,104 @@ var _ = SIGDescribe(framework.WithSerial(), "Not Change Container Status", frame
|
|||
testKubeletRestartForRestartableInit(ctx, pod)
|
||||
})
|
||||
})
|
||||
|
||||
// Regression test for https://github.com/kubernetes/kubernetes/issues/136910
|
||||
var _ = SIGDescribe(framework.WithSerial(), "Sidecar container restart after kubelet restart", framework.WithFeatureGate(features.ChangeContainerStatusOnKubeletRestart), func() {
|
||||
f := framework.NewDefaultFramework("sidecar-container-restart-test-serial")
|
||||
addAfterEachForCleaningUpPods(f)
|
||||
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
|
||||
|
||||
ginkgo.It("should restart regular containers when sidecar with startupProbe is running after kubelet restart", func(ctx context.Context) {
|
||||
containerRestartPolicyAlways := v1.ContainerRestartPolicyAlways
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "pod-with-sidecar-and-crashing-container",
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyAlways,
|
||||
InitContainers: []v1.Container{
|
||||
{
|
||||
Name: "sidecar",
|
||||
Image: defaultImage,
|
||||
Command: []string{"sh", "-c", "while true; do sleep 1; done"},
|
||||
RestartPolicy: &containerRestartPolicyAlways,
|
||||
StartupProbe: &v1.Probe{
|
||||
ProbeHandler: v1.ProbeHandler{
|
||||
Exec: &v1.ExecAction{
|
||||
Command: []string{"/bin/true"},
|
||||
},
|
||||
},
|
||||
InitialDelaySeconds: 1,
|
||||
PeriodSeconds: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Name: "crasher",
|
||||
Image: defaultImage,
|
||||
Command: []string{"/bin/sh", "-c", "trap 'exit 0' TERM; sleep 3600 & wait $!"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
client := e2epod.NewPodClient(f)
|
||||
pod = client.Create(ctx, pod)
|
||||
|
||||
ginkgo.By("Waiting for the pod to be running and ready")
|
||||
err := e2epod.WaitForPodCondition(ctx, f.ClientSet, pod.Namespace, pod.Name, "PodReady", f.Timeouts.PodStart,
|
||||
func(p *v1.Pod) (bool, error) {
|
||||
if p.Status.Phase != v1.PodRunning {
|
||||
return false, nil
|
||||
}
|
||||
for _, cond := range p.Status.Conditions {
|
||||
if cond.Type == v1.PodReady && cond.Status == v1.ConditionTrue {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
})
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
// The grace period for kubelet startup is 10 seconds, so we wait here for 11 seconds.
|
||||
time.Sleep(time.Second * 11)
|
||||
|
||||
ginkgo.By("restarting the kubelet")
|
||||
restartKubelet := mustStopKubelet(ctx, f)
|
||||
restartKubelet(ctx)
|
||||
|
||||
ginkgo.By("ensuring kubelet is healthy")
|
||||
gomega.Eventually(ctx, func() bool {
|
||||
return kubeletHealthCheck(kubeletHealthCheckURL)
|
||||
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be started"))
|
||||
|
||||
ginkgo.By("Sending SIGTERM to PID 1 in the crasher container")
|
||||
_, _, err = e2epod.ExecCommandInContainerWithFullOutput(f, pod.Name, "crasher", "kill", "1")
|
||||
framework.ExpectNoError(err, "failed to send SIGTERM to PID 1 in crasher container")
|
||||
|
||||
ginkgo.By("Waiting for the container to restart and be running again")
|
||||
err = e2epod.WaitForPodCondition(ctx, f.ClientSet, pod.Namespace, pod.Name, "ContainerRunningAfterRestart", f.Timeouts.PodStart,
|
||||
func(p *v1.Pod) (bool, error) {
|
||||
for _, status := range p.Status.ContainerStatuses {
|
||||
if status.Name == "crasher" && status.RestartCount > 0 && status.State.Running != nil {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
})
|
||||
framework.ExpectNoError(err, "container should be restarted and running again")
|
||||
|
||||
ginkgo.By("Verifying sidecar is still running")
|
||||
p, err := client.Get(ctx, pod.Name, metav1.GetOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
gomega.Expect(p.Status.InitContainerStatuses).ToNot(gomega.BeEmpty())
|
||||
for _, status := range p.Status.InitContainerStatuses {
|
||||
if status.Name == "sidecar" {
|
||||
gomega.Expect(status.State.Running).ToNot(gomega.BeNil(), "sidecar should still be running")
|
||||
gomega.Expect(status.RestartCount).To(gomega.BeZero(), "sidecar should not have restarted")
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
|
|||
Loading…
Reference in a new issue