Merge pull request #134746 from HirazawaUi/fix-restart-kubelet-1

KEP-4781: Restarting kubelet does not change pod status
This commit is contained in:
Kubernetes Prow Robot 2025-11-05 01:48:52 -08:00 committed by GitHub
commit 0452b0aec7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 1129 additions and 3 deletions

View file

@ -128,6 +128,11 @@ const (
// Enables kubelet to detect CSI volume condition and send the event of the abnormal volume to the corresponding pod that is using it.
CSIVolumeHealth featuregate.Feature = "CSIVolumeHealth"
// owner: @HirazawaUi
//
// Enabling this feature gate will cause the pod's status to change due to a kubelet restart.
ChangeContainerStatusOnKubeletRestart = "ChangeContainerStatusOnKubeletRestart"
// owner: @sanposhiho @wojtek-t
// kep: https://kep.k8s.io/5278
//
@ -1107,6 +1112,11 @@ var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate
{Version: version.MustParse("1.21"), Default: false, PreRelease: featuregate.Alpha},
},
ChangeContainerStatusOnKubeletRestart: {
{Version: version.MustParse("1.0"), Default: true, PreRelease: featuregate.GA},
{Version: version.MustParse("1.35"), Default: false, PreRelease: featuregate.Deprecated},
},
ClearingNominatedNodeNameAfterBinding: {
{Version: version.MustParse("1.34"), Default: false, PreRelease: featuregate.Alpha},
},
@ -2042,6 +2052,8 @@ var defaultKubernetesFeatureGateDependencies = map[featuregate.Feature][]feature
CSIVolumeHealth: {},
ChangeContainerStatusOnKubeletRestart: {},
ClearingNominatedNodeNameAfterBinding: {},
ClusterTrustBundle: {},

View file

@ -2409,6 +2409,14 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
oldStatusPtr = &oldStatus
}
status := convertContainerStatus(cStatus, oldStatusPtr)
if !utilfeature.DefaultFeatureGate.Enabled(features.ChangeContainerStatusOnKubeletRestart) {
if cStatus.State == kubecontainer.ContainerStateRunning {
if oldStatus, ok := oldStatuses[status.Name]; ok && oldStatus.Started != nil {
status.Started = oldStatus.Started
}
}
}
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
allocatedContainer := kubecontainer.GetContainerSpec(pod, cName)
if allocatedContainer != nil {

View file

@ -18,16 +18,19 @@ package prober
import (
"context"
"sync"
"time"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/tools/record"
"k8s.io/component-base/metrics"
"k8s.io/klog/v2"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/features"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/prober/results"
"k8s.io/kubernetes/pkg/kubelet/status"
@ -278,6 +281,10 @@ func (m *manager) isContainerStarted(pod *v1.Pod, containerStatus *v1.ContainerS
return result == results.Success
}
if !utilfeature.DefaultFeatureGate.Enabled(features.ChangeContainerStatusOnKubeletRestart) && containerStatus.Started != nil && *containerStatus.Started {
return true
}
// if there is a startup probe which hasn't run yet, the container is not
// started.
if _, exists := m.getWorker(pod.UID, containerStatus.Name, startup); exists {
@ -288,6 +295,40 @@ func (m *manager) isContainerStarted(pod *v1.Pod, containerStatus *v1.ContainerS
return true
}
// setReadyStateOnKubeletRestart sets the ready state of a container to false if it was started
// before kubelet restarted and has a readiness probe, but the pod is not ready yet.
// This is to avoid flapping ready status of containers that were ready before kubelet restarted.
func (m *manager) setReadyStateOnKubeletRestart(ready *bool, pod *v1.Pod, containerStatus *v1.ContainerStatus, containerSpec *v1.Container) {
var containerStartTime time.Time
if containerStatus.State.Running != nil {
containerStartTime = containerStatus.State.Running.StartedAt.Time
}
if !containerStartTime.IsZero() && containerStartTime.Before(kubeletRestartGracePeriod(m.start)) {
// At this point, the Pod may be in one of the following two states:
// - It has not yet been added to the readinessManager. In this case, we directly set the container status to Ready.
// - It has been added to the readinessManager, but the probe has not yet started execution.
// Therefore, in this case, we also need to set the container status to Ready.
if !*ready {
if _, ok := m.readinessManager.Get(kubecontainer.ParseContainerID(containerStatus.ContainerID)); !ok {
*ready = true
}
}
if containerSpec.ReadinessProbe != nil {
podIsReady := false
for _, c := range pod.Status.Conditions {
if c.Type == v1.PodReady && c.Status == v1.ConditionTrue {
podIsReady = true
break
}
}
if !podIsReady {
*ready = false
}
}
}
}
func (m *manager) UpdatePodStatus(ctx context.Context, pod *v1.Pod, podStatus *v1.PodStatus) {
logger := klog.FromContext(ctx)
for i, c := range podStatus.ContainerStatuses {
@ -315,6 +356,20 @@ func (m *manager) UpdatePodStatus(ctx context.Context, pod *v1.Pod, podStatus *v
logger.Info("Failed to trigger a manual run", "probe", w.probeType.String())
}
}
if !utilfeature.DefaultFeatureGate.Enabled(features.ChangeContainerStatusOnKubeletRestart) {
// Find the container spec for the container status.
var containerSpec *v1.Container
for j := range pod.Spec.Containers {
if pod.Spec.Containers[j].Name == c.Name {
containerSpec = &pod.Spec.Containers[j]
break
}
}
if containerSpec != nil {
m.setReadyStateOnKubeletRestart(&ready, pod, &podStatus.ContainerStatuses[i], containerSpec)
}
}
}
podStatus.ContainerStatuses[i].Ready = ready
}
@ -356,6 +411,9 @@ func (m *manager) UpdatePodStatus(ctx context.Context, pod *v1.Pod, podStatus *v
logger.Info("Failed to trigger a manual run", "probe", w.probeType.String())
}
}
if !utilfeature.DefaultFeatureGate.Enabled(features.ChangeContainerStatusOnKubeletRestart) {
m.setReadyStateOnKubeletRestart(&ready, pod, &podStatus.InitContainerStatuses[i], &initContainer)
}
}
podStatus.InitContainerStatuses[i].Ready = ready
}
@ -381,3 +439,12 @@ func (m *manager) workerCount() int {
defer m.workerLock.RUnlock()
return len(m.workers)
}
// kubeletRestartGracePeriod returns a time point that is 10 seconds before the kubelet start time.
// This grace period is used to determine if a container was already running before kubelet restarted.
// If a container's start time is before this grace period, it indicates the container was running
// prior to kubelet restart and should not be immediately marked as failed to avoid unnecessary
// status changes for containers that were previously ready.
func kubeletRestartGracePeriod(start time.Time) time.Time {
return start.Add(-time.Second * 10)
}

View file

@ -247,8 +247,25 @@ func (w *worker) doProbe(ctx context.Context) (keepGoing bool) {
if !w.containerID.IsEmpty() {
w.resultsManager.Remove(w.containerID)
}
w.containerID = kubecontainer.ParseContainerID(c.ContainerID)
w.resultsManager.Set(w.containerID, w.initialValue, w.pod)
if !utilfeature.DefaultFeatureGate.Enabled(features.ChangeContainerStatusOnKubeletRestart) {
// On kubelet restart, we don't want to immediately set the probe result to Failure,
// as this could cause a container that was Ready to become NotReady.
isRestart := false
if c.State.Running != nil {
containerStartTime := c.State.Running.StartedAt.Time
if !containerStartTime.IsZero() && containerStartTime.Before(kubeletRestartGracePeriod(w.probeManager.start)) {
isRestart = true
}
}
if !isRestart {
w.resultsManager.Set(w.containerID, w.initialValue, w.pod)
}
} else {
w.resultsManager.Set(w.containerID, w.initialValue, w.pod)
}
// We've got a new container; resume probing.
w.onHold = false
}

View file

@ -28,6 +28,7 @@ import (
"k8s.io/client-go/kubernetes/fake"
featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/kubernetes/pkg/features"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
"k8s.io/kubernetes/pkg/kubelet/prober/results"
"k8s.io/kubernetes/pkg/kubelet/status"
@ -779,3 +780,140 @@ func TestStartupProbeDisabledByStarted(t *testing.T) {
expectContinue(t, w, w.doProbe(ctx), msg)
expectResult(t, w, results.Success, msg)
}
func TestChangeContainerStatusOnKubeletRestart(t *testing.T) {
logger, ctx := ktesting.NewTestContext(t)
tests := []struct {
name string
featureEnabled bool
isRestart bool
probeType probeType
initialValue results.Result
expectSet bool
}{
{
name: "feature enabled, is restart, readiness",
featureEnabled: true,
isRestart: true,
probeType: readiness,
initialValue: results.Failure,
expectSet: true,
},
{
name: "feature enabled, is restart, liveness",
featureEnabled: true,
isRestart: true,
probeType: liveness,
initialValue: results.Success,
expectSet: true,
},
{
name: "feature enabled, is restart, startup",
featureEnabled: true,
isRestart: true,
probeType: startup,
initialValue: results.Unknown,
expectSet: true,
},
{
name: "feature enabled, not restart, readiness",
featureEnabled: true,
isRestart: false,
probeType: readiness,
initialValue: results.Failure,
expectSet: true,
},
{
name: "feature enabled, not restart, liveness",
featureEnabled: true,
isRestart: false,
probeType: liveness,
initialValue: results.Success,
expectSet: true,
},
{
name: "feature enabled, not restart, startup",
featureEnabled: true,
isRestart: false,
probeType: startup,
initialValue: results.Unknown,
expectSet: true,
},
{
name: "feature disabled, is restart, readiness",
featureEnabled: false,
isRestart: true,
probeType: readiness,
expectSet: false,
},
{
name: "feature disabled, is restart, liveness",
featureEnabled: false,
isRestart: true,
probeType: liveness,
expectSet: false,
},
{
name: "feature disabled, is restart, startup",
featureEnabled: false,
isRestart: true,
probeType: startup,
expectSet: false,
},
{
name: "feature disabled, not restart, readiness",
featureEnabled: false,
isRestart: false,
probeType: readiness,
initialValue: results.Failure,
expectSet: true,
},
{
name: "feature disabled, not restart, liveness",
featureEnabled: false,
isRestart: false,
probeType: liveness,
initialValue: results.Success,
expectSet: true,
},
{
name: "feature disabled, not restart, startup",
featureEnabled: false,
isRestart: false,
probeType: startup,
initialValue: results.Unknown,
expectSet: true,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ChangeContainerStatusOnKubeletRestart, tc.featureEnabled)
m := newTestManager()
podStatus := getTestRunningStatus()
podStatus.ContainerStatuses[0].ContainerID = "test://container-id"
if tc.isRestart {
podStatus.ContainerStatuses[0].State.Running.StartedAt = metav1.Time{Time: m.start.Add(-5 * time.Minute)}
} else {
podStatus.ContainerStatuses[0].State.Running.StartedAt = metav1.Time{Time: m.start.Add(5 * time.Minute)}
}
w := newTestWorker(m, tc.probeType, v1.Probe{InitialDelaySeconds: 1000})
m.statusManager.SetPodStatus(logger, w.pod, podStatus)
w.doProbe(ctx)
containerID := kubecontainer.ParseContainerID(podStatus.ContainerStatuses[0].ContainerID)
result, ok := resultsManager(m, tc.probeType).Get(containerID)
if ok != tc.expectSet {
t.Errorf("Expected result to be set: %v, but got: %v", tc.expectSet, ok)
}
if tc.expectSet && result != tc.initialValue {
t.Errorf("Expected result %v, but got: %v", tc.initialValue, result)
}
})
}
}

View file

@ -175,6 +175,16 @@
lockToDefault: false
preRelease: Alpha
version: "1.32"
- name: ChangeContainerStatusOnKubeletRestart
versionedSpecs:
- default: true
lockToDefault: false
preRelease: GA
version: "1.0"
- default: false
lockToDefault: false
preRelease: Deprecated
version: "1.35"
- name: ClearingNominatedNodeNameAfterBinding
versionedSpecs:
- default: false

View file

@ -26,12 +26,13 @@ import (
"github.com/onsi/gomega"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/watch"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
admissionapi "k8s.io/pod-security-admission/api"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/test/e2e/framework"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
imageutils "k8s.io/kubernetes/test/utils/image"
admissionapi "k8s.io/pod-security-admission/api"
"k8s.io/utils/ptr"
)
@ -6304,3 +6305,630 @@ var _ = SIGDescribe(framework.WithNodeConformance(), framework.WithSerial(), "Co
})
})
})
var _ = SIGDescribe(framework.WithSerial(), "Not Change Container Status", framework.WithFeatureGate(features.ChangeContainerStatusOnKubeletRestart), func() {
f := framework.NewDefaultFramework("not-change-container-status-test-serial")
addAfterEachForCleaningUpPods(f)
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
ginkgo.When("a Pod is running", func() {
testKubeletRestart := func(ctx context.Context, pod *v1.Pod) {
client := e2epod.NewPodClient(f)
pod = client.Create(ctx, pod)
ginkgo.By("Waiting for the pod to be running and ready")
err := e2epod.WaitForPodCondition(ctx, f.ClientSet, pod.Namespace, pod.Name, "PodReady", f.Timeouts.PodStart,
func(p *v1.Pod) (bool, error) {
if p.Status.Phase != v1.PodRunning {
return false, nil
}
for _, cond := range p.Status.Conditions {
if cond.Type == v1.PodReady && cond.Status == v1.ConditionTrue {
return true, nil
}
}
return false, nil
})
framework.ExpectNoError(err)
// Double check the initial state before starting the concurrent check
p, err := client.Get(ctx, pod.Name, metav1.GetOptions{})
framework.ExpectNoError(err)
gomega.Expect(p.Status.ContainerStatuses).ToNot(gomega.BeEmpty())
for _, status := range p.Status.ContainerStatuses {
gomega.Expect(status.RestartCount).To(gomega.BeZero())
gomega.Expect(status.Started).ToNot(gomega.BeNil())
gomega.Expect(*status.Started).To(gomega.BeTrueBecause("The Started field should be set to true when a pod enters the Ready condition."))
gomega.Expect(status.Ready).To(gomega.BeTrueBecause("The Ready field should be set to true when a pod enters the Ready condition."))
}
// The grace period for kubelet startup is 10 seconds, so we wait here for 11 seconds.
time.Sleep(time.Second * 11)
stopCh := make(chan struct{})
errCh := make(chan error, 1)
go func() {
watcher, err := f.ClientSet.CoreV1().Pods(pod.Namespace).Watch(ctx, metav1.ListOptions{
FieldSelector: "metadata.name=" + pod.Name,
})
if err != nil {
errCh <- fmt.Errorf("failed to watch pod: %w", err)
return
}
defer watcher.Stop()
for {
select {
case event, ok := <-watcher.ResultChan():
if !ok {
return
}
if event.Type != watch.Modified {
continue
}
p, ok := event.Object.(*v1.Pod)
if !ok {
continue
}
if p.Status.Phase != v1.PodRunning {
errCh <- fmt.Errorf("pod phase is %v, expected %v", p.Status.Phase, v1.PodRunning)
return
}
if len(p.Status.ContainerStatuses) < len(pod.Spec.Containers) {
continue
}
for _, containerStatus := range p.Status.ContainerStatuses {
if containerStatus.RestartCount > 0 {
errCh <- fmt.Errorf("container %q restarted %d times", containerStatus.Name, containerStatus.RestartCount)
return
}
if containerStatus.Started == nil || !*containerStatus.Started {
errCh <- fmt.Errorf("container %q started status is not true", containerStatus.Name)
return
}
if !containerStatus.Ready {
errCh <- fmt.Errorf("container %q ready status is not true", containerStatus.Name)
return
}
}
case <-stopCh:
close(errCh)
return
}
}
}()
ginkgo.By("restarting the kubelet")
restartKubelet := mustStopKubelet(ctx, f)
restartKubelet(ctx)
ginkgo.By("ensuring kubelet is healthy")
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be started"))
// Let the goroutine run for a few more seconds to catch any delayed changes
time.Sleep(5 * time.Second)
close(stopCh)
// Check for errors from the goroutine
for err := range errCh {
framework.ExpectNoError(err, "pod status check failed during kubelet restart")
}
}
ginkgo.It("should not affect pod status when pod has no probe", func(ctx context.Context) {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-no-probe",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "container",
Image: defaultImage,
Command: []string{"sleep", "3600"},
},
},
},
}
testKubeletRestart(ctx, pod)
})
ginkgo.It("should not affect pod status when pod has startupProbe", func(ctx context.Context) {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-with-startup-probe",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "container",
Image: defaultImage,
Command: []string{"sleep", "3600"},
StartupProbe: &v1.Probe{
ProbeHandler: v1.ProbeHandler{
Exec: &v1.ExecAction{
Command: []string{"/bin/true"},
},
},
InitialDelaySeconds: 1,
PeriodSeconds: 1,
},
},
},
},
}
testKubeletRestart(ctx, pod)
})
ginkgo.It("should not affect pod status when pod has readinessProbe", func(ctx context.Context) {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-with-readiness-probe",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "container",
Image: defaultImage,
Command: []string{"sleep", "3600"},
ReadinessProbe: &v1.Probe{
ProbeHandler: v1.ProbeHandler{
Exec: &v1.ExecAction{
Command: []string{"/bin/true"},
},
},
InitialDelaySeconds: 1,
PeriodSeconds: 1,
},
},
},
},
}
testKubeletRestart(ctx, pod)
})
ginkgo.It("should not affect pod status when pod has startupProbe and readinessProbe", func(ctx context.Context) {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-with-startup-and-readiness-probe",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "container",
Image: defaultImage,
Command: []string{"sleep", "3600"},
StartupProbe: &v1.Probe{
ProbeHandler: v1.ProbeHandler{
Exec: &v1.ExecAction{
Command: []string{"/bin/true"},
},
},
InitialDelaySeconds: 1,
PeriodSeconds: 1,
},
ReadinessProbe: &v1.Probe{
ProbeHandler: v1.ProbeHandler{
Exec: &v1.ExecAction{
Command: []string{"/bin/true"},
},
},
InitialDelaySeconds: 1,
PeriodSeconds: 1,
},
},
},
},
}
testKubeletRestart(ctx, pod)
})
ginkgo.It("should not affect pod status when pod has multiple containers", func(ctx context.Context) {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-with-multiple-containers",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "container1",
Image: defaultImage,
Command: []string{"sleep", "3600"},
},
{
Name: "container2",
Image: defaultImage,
Command: []string{"sleep", "3600"},
},
},
},
}
testKubeletRestart(ctx, pod)
})
ginkgo.It("should not affect pod status when pod has multiple containers with startupProbes", func(ctx context.Context) {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-mc-with-startup-probes",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "container1",
Image: defaultImage,
Command: []string{"sleep", "3600"},
StartupProbe: &v1.Probe{
ProbeHandler: v1.ProbeHandler{
Exec: &v1.ExecAction{
Command: []string{"/bin/true"},
},
},
InitialDelaySeconds: 1,
PeriodSeconds: 1,
},
},
{
Name: "container2",
Image: defaultImage,
Command: []string{"sleep", "3600"},
StartupProbe: &v1.Probe{
ProbeHandler: v1.ProbeHandler{
Exec: &v1.ExecAction{
Command: []string{"/bin/true"},
},
},
InitialDelaySeconds: 1,
PeriodSeconds: 1,
},
},
},
},
}
testKubeletRestart(ctx, pod)
})
ginkgo.It("should not affect pod status when pod has multiple containers with readinessProbes", func(ctx context.Context) {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-mc-with-readiness-probes",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "container1",
Image: defaultImage,
Command: []string{"sleep", "3600"},
ReadinessProbe: &v1.Probe{
ProbeHandler: v1.ProbeHandler{
Exec: &v1.ExecAction{
Command: []string{"/bin/true"},
},
},
InitialDelaySeconds: 1,
PeriodSeconds: 1,
},
},
{
Name: "container2",
Image: defaultImage,
Command: []string{"sleep", "3600"},
ReadinessProbe: &v1.Probe{
ProbeHandler: v1.ProbeHandler{
Exec: &v1.ExecAction{
Command: []string{"/bin/true"},
},
},
InitialDelaySeconds: 1,
PeriodSeconds: 1,
},
},
},
},
}
testKubeletRestart(ctx, pod)
})
ginkgo.It("should not affect pod status when pod has multiple containers with startup and readiness probes", func(ctx context.Context) {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-mc-with-startup-and-readiness-probes",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "container1",
Image: defaultImage,
Command: []string{"sleep", "3600"},
StartupProbe: &v1.Probe{
ProbeHandler: v1.ProbeHandler{
Exec: &v1.ExecAction{
Command: []string{"/bin/true"},
},
},
InitialDelaySeconds: 1,
PeriodSeconds: 1,
},
ReadinessProbe: &v1.Probe{
ProbeHandler: v1.ProbeHandler{
Exec: &v1.ExecAction{
Command: []string{"/bin/true"},
},
},
InitialDelaySeconds: 1,
PeriodSeconds: 1,
},
},
{
Name: "container2",
Image: defaultImage,
Command: []string{"sleep", "3600"},
StartupProbe: &v1.Probe{
ProbeHandler: v1.ProbeHandler{
Exec: &v1.ExecAction{
Command: []string{"/bin/true"},
},
},
InitialDelaySeconds: 1,
PeriodSeconds: 1,
},
ReadinessProbe: &v1.Probe{
ProbeHandler: v1.ProbeHandler{
Exec: &v1.ExecAction{
Command: []string{"/bin/true"},
},
},
InitialDelaySeconds: 1,
PeriodSeconds: 1,
},
},
},
},
}
testKubeletRestart(ctx, pod)
})
})
ginkgo.When("a Pod is running with a restartable init container", func() {
testKubeletRestartForRestartableInit := func(ctx context.Context, pod *v1.Pod) {
client := e2epod.NewPodClient(f)
pod = client.Create(ctx, pod)
ginkgo.By("Waiting for the pod to be running and ready")
err := e2epod.WaitForPodCondition(ctx, f.ClientSet, pod.Namespace, pod.Name, "PodReady", f.Timeouts.PodStart,
func(p *v1.Pod) (bool, error) {
if p.Status.Phase != v1.PodRunning {
return false, nil
}
for _, cond := range p.Status.Conditions {
if cond.Type == v1.PodReady && cond.Status == v1.ConditionTrue {
return true, nil
}
}
return false, nil
})
framework.ExpectNoError(err)
// Double check the initial state before starting the concurrent check
p, err := client.Get(ctx, pod.Name, metav1.GetOptions{})
framework.ExpectNoError(err)
gomega.Expect(p.Status.InitContainerStatuses).ToNot(gomega.BeEmpty())
gomega.Expect(p.Status.InitContainerStatuses[0].RestartCount).To(gomega.BeZero())
gomega.Expect(p.Status.InitContainerStatuses[0].Started).ToNot(gomega.BeNil())
gomega.Expect(*p.Status.InitContainerStatuses[0].Started).To(gomega.BeTrueBecause("The Started field should be set to true when a pod enters the Ready condition."))
gomega.Expect(p.Status.InitContainerStatuses[0].Ready).To(gomega.BeTrueBecause("The Ready field should be set to true when a pod enters the Ready condition."))
// The grace period for kubelet startup is 10 seconds, so we wait here for 11 seconds.
time.Sleep(time.Second * 11)
stopCh := make(chan struct{})
errCh := make(chan error, 1)
go func() {
watcher, err := f.ClientSet.CoreV1().Pods(pod.Namespace).Watch(ctx, metav1.ListOptions{
FieldSelector: "metadata.name=" + pod.Name,
})
if err != nil {
errCh <- fmt.Errorf("failed to watch pod: %w", err)
return
}
defer watcher.Stop()
for {
select {
case event, ok := <-watcher.ResultChan():
if !ok {
return
}
if event.Type != watch.Modified {
continue
}
p, ok := event.Object.(*v1.Pod)
if !ok {
continue
}
if p.Status.Phase != v1.PodRunning {
errCh <- fmt.Errorf("pod phase is %v, expected %v", p.Status.Phase, v1.PodRunning)
return
}
if len(p.Status.InitContainerStatuses) == 0 {
errCh <- fmt.Errorf("pod has no init container statuses")
return
}
containerStatus := p.Status.InitContainerStatuses[0]
if containerStatus.RestartCount > 0 {
errCh <- fmt.Errorf("container restarted %d times", containerStatus.RestartCount)
return
}
if containerStatus.Started == nil || !*containerStatus.Started {
errCh <- fmt.Errorf("container started status is not true")
return
}
if !containerStatus.Ready {
errCh <- fmt.Errorf("container ready status is not true")
return
}
case <-stopCh:
close(errCh)
return
}
}
}()
ginkgo.By("restarting the kubelet")
restartKubelet := mustStopKubelet(ctx, f)
restartKubelet(ctx)
ginkgo.By("ensuring kubelet is healthy")
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be started"))
// Let the goroutine run for a few more seconds to catch any delayed changes
time.Sleep(5 * time.Second)
close(stopCh)
// Check for errors from the goroutine
for err := range errCh {
framework.ExpectNoError(err, "pod status check failed during kubelet restart")
}
}
ginkgo.It("should not affect pod status when restartable init container has no probe", func(ctx context.Context) {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-restartable-init-no-probe",
},
Spec: v1.PodSpec{
InitContainers: []v1.Container{
{
Name: "restartable-init",
Image: defaultImage,
Command: []string{"sleep", "3600"},
RestartPolicy: &containerRestartPolicyAlways,
},
},
Containers: []v1.Container{
{
Name: "container",
Image: imageutils.GetPauseImageName(),
},
},
},
}
testKubeletRestartForRestartableInit(ctx, pod)
})
ginkgo.It("should not affect pod status when restartable init container has startupProbe", func(ctx context.Context) {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-restartable-init-with-startup-probe",
},
Spec: v1.PodSpec{
InitContainers: []v1.Container{
{
Name: "restartable-init",
Image: defaultImage,
Command: []string{"sleep", "3600"},
RestartPolicy: &containerRestartPolicyAlways,
StartupProbe: &v1.Probe{
ProbeHandler: v1.ProbeHandler{
Exec: &v1.ExecAction{
Command: []string{"/bin/true"},
},
},
InitialDelaySeconds: 1,
PeriodSeconds: 1,
},
},
},
Containers: []v1.Container{
{
Name: "container",
Image: imageutils.GetPauseImageName(),
},
},
},
}
testKubeletRestartForRestartableInit(ctx, pod)
})
ginkgo.It("should not affect pod status when restartable init container has readinessProbe", func(ctx context.Context) {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-restartable-init-with-readiness-probe",
},
Spec: v1.PodSpec{
InitContainers: []v1.Container{
{
Name: "restartable-init",
Image: defaultImage,
Command: []string{"sleep", "3600"},
RestartPolicy: &containerRestartPolicyAlways,
ReadinessProbe: &v1.Probe{
ProbeHandler: v1.ProbeHandler{
Exec: &v1.ExecAction{
Command: []string{"/bin/true"},
},
},
InitialDelaySeconds: 1,
PeriodSeconds: 1,
},
},
},
Containers: []v1.Container{
{
Name: "container",
Image: imageutils.GetPauseImageName(),
},
},
},
}
testKubeletRestartForRestartableInit(ctx, pod)
})
ginkgo.It("should not affect pod status when restartable init container has startupProbe and readinessProbe", func(ctx context.Context) {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod-restartable-init-with-startup-and-readiness-probe",
},
Spec: v1.PodSpec{
InitContainers: []v1.Container{
{
Name: "restartable-init",
Image: defaultImage,
Command: []string{"sleep", "3600"},
RestartPolicy: &containerRestartPolicyAlways,
StartupProbe: &v1.Probe{
ProbeHandler: v1.ProbeHandler{
Exec: &v1.ExecAction{
Command: []string{"/bin/true"},
},
},
InitialDelaySeconds: 1,
PeriodSeconds: 1,
},
ReadinessProbe: &v1.Probe{
ProbeHandler: v1.ProbeHandler{
Exec: &v1.ExecAction{
Command: []string{"/bin/true"},
},
},
InitialDelaySeconds: 1,
PeriodSeconds: 1,
},
},
},
Containers: []v1.Container{
{
Name: "container",
Image: imageutils.GetPauseImageName(),
},
},
},
}
testKubeletRestartForRestartableInit(ctx, pod)
})
})
})

View file

@ -41,7 +41,9 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/cli-runtime/pkg/printers"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
e2evolume "k8s.io/kubernetes/test/e2e/framework/volume"
)
@ -671,3 +673,169 @@ func checkMirrorPodRecreated(ctx context.Context, cl clientset.Interface, name,
}
return nil
}
var _ = SIGDescribe("MirrorPod", framework.WithSerial(), func() {
f := framework.NewDefaultFramework("mirror-pod-serial")
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
ginkgo.Context("when kubelet restarts", func() {
var ns, podPath, staticPodName, mirrorPodName string
ginkgo.BeforeEach(func(ctx context.Context) {
ns = f.Namespace.Name
staticPodName = "static-pod-" + string(uuid.NewUUID())
mirrorPodName = staticPodName + "-" + framework.TestContext.NodeName
podPath = kubeletCfg.StaticPodPath
ginkgo.By("create the static pod")
podSpec := v1.PodSpec{
Containers: []v1.Container{
{
Name: "container",
Image: defaultImage,
Command: []string{"sleep", "3600"},
StartupProbe: &v1.Probe{
ProbeHandler: v1.ProbeHandler{
Exec: &v1.ExecAction{
Command: []string{"/bin/true"},
},
},
InitialDelaySeconds: 1,
PeriodSeconds: 1,
},
ReadinessProbe: &v1.Probe{
ProbeHandler: v1.ProbeHandler{
Exec: &v1.ExecAction{
Command: []string{"/bin/true"},
},
},
InitialDelaySeconds: 1,
PeriodSeconds: 1,
},
},
},
}
err := createStaticPodWithSpec(podPath, staticPodName, ns, podSpec)
framework.ExpectNoError(err)
ginkgo.By("wait for the mirror pod to be running")
gomega.Eventually(ctx, func(ctx context.Context) error {
return checkMirrorPodRunning(ctx, f.ClientSet, mirrorPodName, ns)
}, 2*time.Minute, time.Second*4).Should(gomega.Succeed())
})
ginkgo.AfterEach(func(ctx context.Context) {
ginkgo.By("delete the static pod")
err := deleteStaticPod(podPath, staticPodName, ns)
framework.ExpectNoError(err)
ginkgo.By("wait for the mirror pod to disappear")
gomega.Eventually(ctx, func(ctx context.Context) error {
return checkMirrorPodDisappear(ctx, f.ClientSet, mirrorPodName, ns)
}, 2*time.Minute, time.Second*4).Should(gomega.Succeed())
})
f.It("should not change container status", f.WithNodeConformance(), func(ctx context.Context) {
ginkgo.By("Waiting for the pod to be running and ready")
err := e2epod.WaitForPodCondition(ctx, f.ClientSet, ns, mirrorPodName, "PodReady", f.Timeouts.PodStart,
func(p *v1.Pod) (bool, error) {
if p.Status.Phase != v1.PodRunning {
return false, nil
}
for _, cond := range p.Status.Conditions {
if cond.Type == v1.PodReady && cond.Status == v1.ConditionTrue {
return true, nil
}
}
return false, nil
})
framework.ExpectNoError(err)
pod, err := f.ClientSet.CoreV1().Pods(ns).Get(ctx, mirrorPodName, metav1.GetOptions{})
framework.ExpectNoError(err)
ginkgo.By("Double check the initial state before starting the concurrent check")
gomega.Expect(pod.Status.ContainerStatuses).ToNot(gomega.BeEmpty())
for _, status := range pod.Status.ContainerStatuses {
gomega.Expect(status.RestartCount).To(gomega.BeZero())
gomega.Expect(status.Started).ToNot(gomega.BeNil())
gomega.Expect(*status.Started).To(gomega.BeTrueBecause("The Started field should be set to true when a pod enters the Ready condition."))
gomega.Expect(status.Ready).To(gomega.BeTrueBecause("The Ready field should be set to true when a pod enters the Ready condition."))
}
// The grace period for kubelet startup is 10 seconds, so we wait here for 11 seconds.
time.Sleep(time.Second * 11)
stopCh := make(chan struct{})
errCh := make(chan error, 1)
go func() {
defer ginkgo.GinkgoRecover()
watcher, err := f.ClientSet.CoreV1().Pods(ns).Watch(ctx, metav1.ListOptions{
FieldSelector: "metadata.name=" + mirrorPodName,
})
if err != nil {
errCh <- fmt.Errorf("failed to watch pod: %w", err)
return
}
defer watcher.Stop()
for {
select {
case event, ok := <-watcher.ResultChan():
if !ok {
return
}
if event.Type != watch.Modified {
continue
}
p, ok := event.Object.(*v1.Pod)
if !ok {
continue
}
if p.Status.Phase != v1.PodRunning {
errCh <- fmt.Errorf("pod phase is %v, expected %v", p.Status.Phase, v1.PodRunning)
return
}
if len(p.Status.ContainerStatuses) < len(pod.Spec.Containers) {
continue
}
for _, containerStatus := range p.Status.ContainerStatuses {
if containerStatus.RestartCount > 0 {
errCh <- fmt.Errorf("container %q restarted %d times", containerStatus.Name, containerStatus.RestartCount)
return
}
if containerStatus.Started == nil || !*containerStatus.Started {
errCh <- fmt.Errorf("container %q started status is not true", containerStatus.Name)
return
}
if !containerStatus.Ready {
errCh <- fmt.Errorf("container %q ready status is not true", containerStatus.Name)
return
}
}
case <-stopCh:
close(errCh)
return
}
}
}()
ginkgo.By("restarting the kubelet")
restartKubelet := mustStopKubelet(ctx, f)
restartKubelet(ctx)
ginkgo.By("ensuring kubelet is healthy")
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be started"))
// Let the goroutine run for a few more seconds to catch any delayed changes
time.Sleep(5 * time.Second)
close(stopCh)
for err := range errCh {
framework.ExpectNoError(err, "pod status check failed during kubelet restart")
}
})
})
})

View file

@ -328,3 +328,81 @@ func decodePods(respBody []byte) (*v1.PodList, error) {
return &pods, nil
}
var _ = SIGDescribe(feature.StandaloneMode, framework.WithSerial(), func() {
f := framework.NewDefaultFramework("static-pod-serial")
f.NamespacePodSecurityLevel = admissionapi.LevelBaseline
ginkgo.Context("when creating a static pod and restarting kubelet", func() {
var ns, podPath, staticPodName string
ginkgo.BeforeEach(func() {
ns = f.Namespace.Name
staticPodName = "static-pod-" + string(uuid.NewUUID())
podPath = kubeletCfg.StaticPodPath
})
ginkgo.AfterEach(func(ctx context.Context) {
ginkgo.By(fmt.Sprintf("delete the static pod (%v/%v)", ns, staticPodName))
err := deleteStaticPod(podPath, staticPodName, ns)
framework.ExpectNoError(err)
ginkgo.By(fmt.Sprintf("wait for pod to disappear (%v/%v)", ns, staticPodName))
gomega.Eventually(ctx, func(ctx context.Context) error {
_, err := getPodFromStandaloneKubelet(ctx, ns, staticPodName)
if apierrors.IsNotFound(err) {
return nil
}
return fmt.Errorf("pod (%v/%v) still exists", ns, staticPodName)
}).Should(gomega.Succeed())
})
ginkgo.It("the pod should be running and kubelet not panic", func(ctx context.Context) {
err := scheduleStaticPod(podPath, staticPodName, ns, createBasicStaticPodSpec(staticPodName, ns))
framework.ExpectNoError(err)
ginkgo.By("Waiting for the pod to be running")
gomega.Eventually(ctx, func(ctx context.Context) error {
pod, err := getPodFromStandaloneKubelet(ctx, ns, staticPodName)
if err != nil {
return fmt.Errorf("error getting pod(%v/%v) from standalone kubelet: %w", ns, staticPodName, err)
}
isReady, err := testutils.PodRunningReady(pod)
if err != nil {
return fmt.Errorf("error checking if pod (%v/%v) is running ready: %w", ns, staticPodName, err)
}
if !isReady {
return fmt.Errorf("pod (%v/%v) is not running", ns, staticPodName)
}
return nil
}, f.Timeouts.PodStart, time.Second*5).Should(gomega.Succeed())
ginkgo.By("stopping the kubelet")
restartKubelet := mustStopKubelet(ctx, f)
ginkgo.By("restarting the kubelet")
restartKubelet(ctx)
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be started"))
ginkgo.By("ensuring that pod is running")
gomega.Eventually(ctx, func(ctx context.Context) error {
pod, err := getPodFromStandaloneKubelet(ctx, ns, staticPodName)
if err != nil {
return fmt.Errorf("error getting pod(%v/%v) from standalone kubelet: %w", ns, staticPodName, err)
}
isReady, err := testutils.PodRunningReady(pod)
if err != nil {
return fmt.Errorf("error checking if pod (%v/%v) is running ready: %w", ns, staticPodName, err)
}
if !isReady {
return fmt.Errorf("pod (%v/%v) is not running", ns, staticPodName)
}
return nil
}, f.Timeouts.PodStart, time.Second*30).Should(gomega.Succeed())
})
})
})