diff --git a/pkg/api/pod/util.go b/pkg/api/pod/util.go index b4e7a8e5484..94de0b59db0 100644 --- a/pkg/api/pod/util.go +++ b/pkg/api/pod/util.go @@ -428,7 +428,7 @@ func GetValidationOptionsFromPodSpecAndMeta(podSpec, oldPodSpec *api.PodSpec, po OldPodViolatesLegacyMatchLabelKeysValidation: false, AllowContainerRestartPolicyRules: utilfeature.DefaultFeatureGate.Enabled(features.ContainerRestartRules), AllowUserNamespacesWithVolumeDevices: false, - // The RestartAllContainers rule action is allowed on sidecars. + // This also allows restart rules on sidecar containers. AllowRestartAllContainers: utilfeature.DefaultFeatureGate.Enabled(features.RestartAllContainersOnContainerExits), } @@ -477,7 +477,7 @@ func GetValidationOptionsFromPodSpecAndMeta(podSpec, oldPodSpec *api.PodSpec, po opts.AllowSidecarResizePolicy = opts.AllowSidecarResizePolicy || hasRestartableInitContainerResizePolicy(oldPodSpec) opts.AllowContainerRestartPolicyRules = opts.AllowContainerRestartPolicyRules || containerRestartRulesInUse(oldPodSpec) - opts.AllowRestartAllContainers = opts.AllowRestartAllContainers || containerRestartRulesInUseOnSidecar(oldPodSpec) + opts.AllowRestartAllContainers = opts.AllowRestartAllContainers || restartAllContainersActionInUse(oldPodSpec) // If old spec has userns and volume devices (doesn't work), we still allow // modifications to it. @@ -1807,18 +1807,27 @@ func workloadRefInUse(podSpec *api.PodSpec) bool { return podSpec.WorkloadRef != nil } -func containerRestartRulesInUseOnSidecar(oldPodSpec *api.PodSpec) bool { +func restartAllContainersActionInUse(oldPodSpec *api.PodSpec) bool { if oldPodSpec == nil { return false } - for _, c := range oldPodSpec.InitContainers { - if c.RestartPolicy != nil && *c.RestartPolicy == api.ContainerRestartPolicyAlways { - for _, rule := range c.RestartPolicyRules { - if rule.Action == api.ContainerRestartRuleActionRestartAllContainers { - return true - } + for _, c := range oldPodSpec.Containers { + for _, rule := range c.RestartPolicyRules { + if rule.Action == api.ContainerRestartRuleActionRestartAllContainers { + return true } } } + for _, c := range oldPodSpec.InitContainers { + for _, rule := range c.RestartPolicyRules { + if rule.Action == api.ContainerRestartRuleActionRestartAllContainers { + return true + } + } + // This feature also allows sidecar containers to have rules. + if c.RestartPolicy != nil && *c.RestartPolicy == api.ContainerRestartPolicyAlways && len(c.RestartPolicyRules) > 0 { + return true + } + } return false } diff --git a/pkg/api/pod/util_test.go b/pkg/api/pod/util_test.go index a0dabb9f5cf..4af5b6dd7ed 100644 --- a/pkg/api/pod/util_test.go +++ b/pkg/api/pod/util_test.go @@ -6314,7 +6314,7 @@ func TestDisabledWorkload(t *testing.T) { } } -func TestValidateContainerRestartRulesOnSidecarOption(t *testing.T) { +func TestValidateRestartAllContainersOption(t *testing.T) { policyAlways := api.ContainerRestartPolicyAlways testCases := []struct { name string @@ -6333,15 +6333,32 @@ func TestValidateContainerRestartRulesOnSidecarOption(t *testing.T) { want: false, }, { - name: "old pod spec has sidecar container without rules", + name: "old pod spec has container without action", oldPodSpec: &api.PodSpec{ - InitContainers: []api.Container{{ - RestartPolicy: &policyAlways, + Containers: []api.Container{{ + Name: "container", }}, }, featureEnabled: false, want: false, }, + { + name: "old pod spec has container with action", + oldPodSpec: &api.PodSpec{ + Containers: []api.Container{{ + Name: "container", + RestartPolicyRules: []api.ContainerRestartRule{{ + Action: api.ContainerRestartRuleActionRestartAllContainers, + ExitCodes: &api.ContainerRestartRuleOnExitCodes{ + Operator: api.ContainerRestartRuleOnExitCodesOpIn, + Values: []int32{42}, + }, + }}, + }}, + }, + featureEnabled: false, + want: true, + }, { name: "old pod spec has sidecar containers with rules", oldPodSpec: &api.PodSpec{ @@ -6363,11 +6380,15 @@ func TestValidateContainerRestartRulesOnSidecarOption(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.RestartAllContainersOnContainerExits, tc.featureEnabled) + featuregatetesting.SetFeatureGatesDuringTest(t, utilfeature.DefaultFeatureGate, featuregatetesting.FeatureOverrides{ + features.ContainerRestartRules: tc.featureEnabled, + features.NodeDeclaredFeatures: tc.featureEnabled, + features.RestartAllContainersOnContainerExits: tc.featureEnabled, + }) // The new pod doesn't impact the outcome. gotOptions := GetValidationOptionsFromPodSpecAndMeta(nil, tc.oldPodSpec, nil, nil) if tc.want != gotOptions.AllowRestartAllContainers { - t.Errorf("unexpected diff, want: %v, got: %v", tc.want, gotOptions.AllowInvalidPodDeletionCost) + t.Errorf("unexpected diff, want: %v, got: %v", tc.want, gotOptions.AllowRestartAllContainers) } }) } diff --git a/pkg/api/v1/pod/util.go b/pkg/api/v1/pod/util.go index 9dceec4a610..6533b38ee89 100644 --- a/pkg/api/v1/pod/util.go +++ b/pkg/api/v1/pod/util.go @@ -492,6 +492,9 @@ func FindMatchingContainerRestartRule(container v1.Container, exitCode int32) (r // for the given pod. This is true if any container has a RestartAllContainers // action. func AllContainersCouldRestart(pod *v1.PodSpec) bool { + if pod == nil { + return false + } for _, container := range pod.InitContainers { for _, rule := range container.RestartPolicyRules { if rule.Action == v1.ContainerRestartRuleActionRestartAllContainers { diff --git a/pkg/api/v1/pod/util_test.go b/pkg/api/v1/pod/util_test.go index c5489ed16e2..bd3044e195e 100644 --- a/pkg/api/v1/pod/util_test.go +++ b/pkg/api/v1/pod/util_test.go @@ -1332,7 +1332,11 @@ func TestContainerHasRestartablePolicy(t *testing.T) { expected: true, }, } - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.RestartAllContainersOnContainerExits, true) + featuregatetesting.SetFeatureGatesDuringTest(t, utilfeature.DefaultFeatureGate, featuregatetesting.FeatureOverrides{ + features.ContainerRestartRules: true, + features.NodeDeclaredFeatures: true, + features.RestartAllContainersOnContainerExits: true, + }) for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { diff --git a/pkg/apis/core/types.go b/pkg/apis/core/types.go index 6f88a950d7f..92fa8eb5433 100644 --- a/pkg/apis/core/types.go +++ b/pkg/apis/core/types.go @@ -3245,7 +3245,12 @@ type ContainerRestartRuleAction string // These are valid restart rule actions. const ( - ContainerRestartRuleActionRestart ContainerRestartRuleAction = "Restart" + // The container will be restarted if the rule matches. Only valid on normal init container and + // regular containers. Not valid on sidecar containers and ephemeral containers. + ContainerRestartRuleActionRestart ContainerRestartRuleAction = "Restart" + // All containers (except ephemeral containers) inside the pod will be terminated and restarted. + // Valid on normal init container, sidecar containers, and regular containers. Not valid on + // ephemeral containers. ContainerRestartRuleActionRestartAllContainers ContainerRestartRuleAction = "RestartAllContainers" ) diff --git a/pkg/apis/core/validation/validation.go b/pkg/apis/core/validation/validation.go index f2aa32d377e..5150e1d3971 100644 --- a/pkg/apis/core/validation/validation.go +++ b/pkg/apis/core/validation/validation.go @@ -3316,6 +3316,9 @@ func validateInitContainerRestartPolicy(restartPolicy *core.ContainerRestartPoli var allErrors field.ErrorList if restartPolicy == nil { + if len(restartRules) > 0 { + allErrors = append(allErrors, field.Required(fldPath.Child("restartPolicy"), "must specify restartPolicy when restart rules are used")) + } return allErrors } if opts.AllowContainerRestartPolicyRules { @@ -3665,6 +3668,14 @@ var supportedContainerRestartPolicyOperators = sets.New( core.ContainerRestartRuleOnExitCodesOpNotIn, ) +// Supported actions depend on whether corresponding feature gates are enabled. +var ( + // Without AllowRestartAllContainers + supportedContainerRestartRuleActions = sets.New(core.ContainerRestartRuleActionRestart) + // With AllowRestartAllContainers + supportedContainerRestartRuleActionsWithRestartAllContainers = sets.New(core.ContainerRestartRuleActionRestart, core.ContainerRestartRuleActionRestartAllContainers) +) + // validateContainerRestartPolicy checks the container-level restartPolicy and restartPolicyRules are valid for // regular containers, init containers, and sidecar containers. func validateContainerRestartPolicy(policy *core.ContainerRestartPolicy, rules []core.ContainerRestartRule, fldPath *field.Path, opts PodValidationOptions) field.ErrorList { @@ -3685,11 +3696,11 @@ func validateContainerRestartPolicy(policy *core.ContainerRestartPolicy, rules [ } for i, rule := range rules { policyRulesFld := fldPath.Child("restartPolicyRules").Index(i) - var supportedContainerRestartRuleActions = sets.New(core.ContainerRestartRuleActionRestart) + allowedActions := supportedContainerRestartRuleActions if opts.AllowRestartAllContainers { - supportedContainerRestartRuleActions.Insert(core.ContainerRestartRuleActionRestartAllContainers) + allowedActions = supportedContainerRestartRuleActionsWithRestartAllContainers } - if !supportedContainerRestartRuleActions.Has(rule.Action) { + if !allowedActions.Has(rule.Action) { allErrs = append(allErrs, field.NotSupported(policyRulesFld.Child("action"), rule.Action, sets.List(supportedContainerRestartRuleActions))) } @@ -3809,8 +3820,8 @@ func validateInitContainers(containers []core.Container, os *core.PodOS, regular restartAlways := false // Apply the validation specific to init containers + allErrs = append(allErrs, validateInitContainerRestartPolicy(ctr.RestartPolicy, ctr.RestartPolicyRules, idxPath, opts)...) if ctr.RestartPolicy != nil { - allErrs = append(allErrs, validateInitContainerRestartPolicy(ctr.RestartPolicy, ctr.RestartPolicyRules, idxPath, opts)...) restartAlways = *ctr.RestartPolicy == core.ContainerRestartPolicyAlways } diff --git a/pkg/apis/core/validation/validation_test.go b/pkg/apis/core/validation/validation_test.go index e9f60fec62c..016982d0995 100644 --- a/pkg/apis/core/validation/validation_test.go +++ b/pkg/apis/core/validation/validation_test.go @@ -29205,9 +29205,10 @@ func TestValidateContainerRestartPolicy(t *testing.T) { podRestartPolicyAlways := core.RestartPolicyAlways successCases := []struct { - Name string - RestartPolicy *core.ContainerRestartPolicy - RestartPolicyRules []core.ContainerRestartRule + Name string + RestartPolicy *core.ContainerRestartPolicy + RestartPolicyRules []core.ContainerRestartRule + AllowRestartAllContainers bool }{ { Name: "no-restart-policy-and-rules", @@ -29231,6 +29232,17 @@ func TestValidateContainerRestartPolicy(t *testing.T) { Values: []int32{42}, }, }}, + }, { + Name: "restart-all-containers", + RestartPolicy: &containerRestartPolicyNever, + RestartPolicyRules: []core.ContainerRestartRule{{ + Action: "RestartAllContainers", + ExitCodes: &core.ContainerRestartRuleOnExitCodes{ + Operator: "In", + Values: []int32{42}, + }, + }}, + AllowRestartAllContainers: true, }, } @@ -29246,6 +29258,7 @@ func TestValidateContainerRestartPolicy(t *testing.T) { }} opts := PodValidationOptions{ AllowContainerRestartPolicyRules: true, + AllowRestartAllContainers: tc.AllowRestartAllContainers, } errs := validateContainers(containers, podOS, volumeDevices, nil, defaultGracePeriod, field.NewPath("containers"), opts, &podRestartPolicyAlways, noUserNamespace) if len(errs) > 0 { @@ -29394,39 +29407,29 @@ func TestValidateContainerRestartPolicy(t *testing.T) { }) } - // test cases sidecar containers + containerRestartPolicyAlways := core.ContainerRestartPolicyAlways + // test cases init containers cases := []struct { title string + restartPolicy *core.ContainerRestartPolicy restartPolicyRules []core.ContainerRestartRule opts PodValidationOptions expectedErrors field.ErrorList }{ { "sidecar containers without restart policy rules", + &containerRestartPolicyAlways, nil, PodValidationOptions{ AllowContainerRestartPolicyRules: true, }, nil, }, - { - "restart policy rules are not supported with restart policy Always without option", - []core.ContainerRestartRule{{ - Action: core.ContainerRestartRuleActionRestart, - ExitCodes: &core.ContainerRestartRuleOnExitCodes{ - Operator: core.ContainerRestartRuleOnExitCodesOpIn, - Values: []int32{1}, - }, - }}, - PodValidationOptions{ - AllowContainerRestartPolicyRules: true, - }, - field.ErrorList{{Type: field.ErrorTypeForbidden, Field: "initContainers[0].restartPolicyRules", BadValue: ""}}, - }, { "restart policy rules are supported with restart policy Always with option", + &containerRestartPolicyAlways, []core.ContainerRestartRule{{ - Action: core.ContainerRestartRuleActionRestart, + Action: core.ContainerRestartRuleActionRestartAllContainers, ExitCodes: &core.ContainerRestartRuleOnExitCodes{ Operator: core.ContainerRestartRuleOnExitCodesOpIn, Values: []int32{1}, @@ -29438,6 +29441,37 @@ func TestValidateContainerRestartPolicy(t *testing.T) { }, nil, }, + { + "restart policy rules are not supported with restart policy Always without option", + &containerRestartPolicyAlways, + []core.ContainerRestartRule{{ + Action: core.ContainerRestartRuleActionRestartAllContainers, + ExitCodes: &core.ContainerRestartRuleOnExitCodes{ + Operator: core.ContainerRestartRuleOnExitCodesOpIn, + Values: []int32{1}, + }, + }}, + PodValidationOptions{ + AllowContainerRestartPolicyRules: true, + }, + field.ErrorList{{Type: field.ErrorTypeForbidden, Field: "initContainers[0].restartPolicyRules", BadValue: ""}}, + }, + { + "restart policy rules are not supported if no restart policy is set", + nil, + []core.ContainerRestartRule{{ + Action: core.ContainerRestartRuleActionRestartAllContainers, + ExitCodes: &core.ContainerRestartRuleOnExitCodes{ + Operator: core.ContainerRestartRuleOnExitCodesOpIn, + Values: []int32{1}, + }, + }}, + PodValidationOptions{ + AllowContainerRestartPolicyRules: true, + AllowRestartAllContainers: true, + }, + field.ErrorList{{Type: field.ErrorTypeRequired, Field: "initContainers[0].restartPolicy", BadValue: ""}}, + }, } for _, tc := range cases { @@ -29447,7 +29481,7 @@ func TestValidateContainerRestartPolicy(t *testing.T) { Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File", - RestartPolicy: &containerRestartPolicyAlways, + RestartPolicy: tc.restartPolicy, RestartPolicyRules: tc.restartPolicyRules, }} errs := validateInitContainers(containers, podOS, nil, volumeDevices, nil, defaultGracePeriod, field.NewPath("initContainers"), tc.opts, &podRestartPolicyAlways, noUserNamespace) diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index be67633c1e0..4165f206849 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -2373,7 +2373,7 @@ var defaultKubernetesFeatureGateDependencies = map[featuregate.Feature][]feature // RestartAllContainersOnContainerExits introduces a new container restart rule action. // All restart rules will be dropped by API if ContainerRestartRules feature is not enabled. - RestartAllContainersOnContainerExits: {ContainerRestartRules}, + RestartAllContainersOnContainerExits: {ContainerRestartRules, NodeDeclaredFeatures}, RotateKubeletServerCertificate: {}, diff --git a/pkg/kubelet/container/runtime.go b/pkg/kubelet/container/runtime.go index 66bf66e425c..7fb7c6c147a 100644 --- a/pkg/kubelet/container/runtime.go +++ b/pkg/kubelet/container/runtime.go @@ -100,7 +100,7 @@ type Runtime interface { // TODO: Revisit this method and make it cleaner. GarbageCollect(ctx context.Context, gcPolicy GCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error // SyncPod syncs the running pod into the desired pod. - SyncPod(ctx context.Context, pod *v1.Pod, podStatus *PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff, apiPodStatus *v1.PodStatus) PodSyncResult + SyncPod(ctx context.Context, pod *v1.Pod, podStatus *PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff, restartAllContainers bool) PodSyncResult // KillPod kills all the containers of a pod. Pod may be nil, running pod must not be. // TODO(random-liu): Return PodSyncResult in KillPod. // gracePeriodOverride if specified allows the caller to override the pod default grace period. diff --git a/pkg/kubelet/container/testing/fake_runtime.go b/pkg/kubelet/container/testing/fake_runtime.go index 07e699603d2..b8bdb258e5b 100644 --- a/pkg/kubelet/container/testing/fake_runtime.go +++ b/pkg/kubelet/container/testing/fake_runtime.go @@ -237,7 +237,7 @@ func (f *FakeRuntime) GetPods(_ context.Context, all bool) ([]*kubecontainer.Pod return pods, f.Err } -func (f *FakeRuntime) SyncPod(_ context.Context, pod *v1.Pod, _ *kubecontainer.PodStatus, _ []v1.Secret, backOff *flowcontrol.Backoff, _ *v1.PodStatus) (result kubecontainer.PodSyncResult) { +func (f *FakeRuntime) SyncPod(_ context.Context, pod *v1.Pod, _ *kubecontainer.PodStatus, _ []v1.Secret, backOff *flowcontrol.Backoff, _ bool) (result kubecontainer.PodSyncResult) { f.Lock() defer f.Unlock() diff --git a/pkg/kubelet/container/testing/mocks.go b/pkg/kubelet/container/testing/mocks.go index c717dc49863..c167dec38b9 100644 --- a/pkg/kubelet/container/testing/mocks.go +++ b/pkg/kubelet/container/testing/mocks.go @@ -1487,16 +1487,16 @@ func (_c *MockRuntime_Status_Call) RunAndReturn(run func(ctx context.Context) (* } // SyncPod provides a mock function for the type MockRuntime -func (_mock *MockRuntime) SyncPod(ctx context.Context, pod *v10.Pod, podStatus *container.PodStatus, pullSecrets []v10.Secret, backOff *flowcontrol.Backoff, apiPodStatus *v10.PodStatus) container.PodSyncResult { - ret := _mock.Called(ctx, pod, podStatus, pullSecrets, backOff, apiPodStatus) +func (_mock *MockRuntime) SyncPod(ctx context.Context, pod *v10.Pod, podStatus *container.PodStatus, pullSecrets []v10.Secret, backOff *flowcontrol.Backoff, restartAllContainers bool) container.PodSyncResult { + ret := _mock.Called(ctx, pod, podStatus, pullSecrets, backOff, restartAllContainers) if len(ret) == 0 { panic("no return value specified for SyncPod") } var r0 container.PodSyncResult - if returnFunc, ok := ret.Get(0).(func(context.Context, *v10.Pod, *container.PodStatus, []v10.Secret, *flowcontrol.Backoff, *v10.PodStatus) container.PodSyncResult); ok { - r0 = returnFunc(ctx, pod, podStatus, pullSecrets, backOff, apiPodStatus) + if returnFunc, ok := ret.Get(0).(func(context.Context, *v10.Pod, *container.PodStatus, []v10.Secret, *flowcontrol.Backoff, bool) container.PodSyncResult); ok { + r0 = returnFunc(ctx, pod, podStatus, pullSecrets, backOff, restartAllContainers) } else { r0 = ret.Get(0).(container.PodSyncResult) } @@ -1514,12 +1514,12 @@ type MockRuntime_SyncPod_Call struct { // - podStatus *container.PodStatus // - pullSecrets []v10.Secret // - backOff *flowcontrol.Backoff -// - apiPodStatus *v10.PodStatus -func (_e *MockRuntime_Expecter) SyncPod(ctx interface{}, pod interface{}, podStatus interface{}, pullSecrets interface{}, backOff interface{}, apiPodStatus interface{}) *MockRuntime_SyncPod_Call { - return &MockRuntime_SyncPod_Call{Call: _e.mock.On("SyncPod", ctx, pod, podStatus, pullSecrets, backOff, apiPodStatus)} +// - restartAllContainers bool +func (_e *MockRuntime_Expecter) SyncPod(ctx interface{}, pod interface{}, podStatus interface{}, pullSecrets interface{}, backOff interface{}, restartAllContainers interface{}) *MockRuntime_SyncPod_Call { + return &MockRuntime_SyncPod_Call{Call: _e.mock.On("SyncPod", ctx, pod, podStatus, pullSecrets, backOff, restartAllContainers)} } -func (_c *MockRuntime_SyncPod_Call) Run(run func(ctx context.Context, pod *v10.Pod, podStatus *container.PodStatus, pullSecrets []v10.Secret, backOff *flowcontrol.Backoff, apiPodStatus *v10.PodStatus)) *MockRuntime_SyncPod_Call { +func (_c *MockRuntime_SyncPod_Call) Run(run func(ctx context.Context, pod *v10.Pod, podStatus *container.PodStatus, pullSecrets []v10.Secret, backOff *flowcontrol.Backoff, restartAllContainers bool)) *MockRuntime_SyncPod_Call { _c.Call.Run(func(args mock.Arguments) { var arg0 context.Context if args[0] != nil { @@ -1541,9 +1541,9 @@ func (_c *MockRuntime_SyncPod_Call) Run(run func(ctx context.Context, pod *v10.P if args[4] != nil { arg4 = args[4].(*flowcontrol.Backoff) } - var arg5 *v10.PodStatus + var arg5 bool if args[5] != nil { - arg5 = args[5].(*v10.PodStatus) + arg5 = args[5].(bool) } run( arg0, @@ -1562,7 +1562,7 @@ func (_c *MockRuntime_SyncPod_Call) Return(podSyncResult container.PodSyncResult return _c } -func (_c *MockRuntime_SyncPod_Call) RunAndReturn(run func(ctx context.Context, pod *v10.Pod, podStatus *container.PodStatus, pullSecrets []v10.Secret, backOff *flowcontrol.Backoff, apiPodStatus *v10.PodStatus) container.PodSyncResult) *MockRuntime_SyncPod_Call { +func (_c *MockRuntime_SyncPod_Call) RunAndReturn(run func(ctx context.Context, pod *v10.Pod, podStatus *container.PodStatus, pullSecrets []v10.Secret, backOff *flowcontrol.Backoff, restartAllContainers bool) container.PodSyncResult) *MockRuntime_SyncPod_Call { _c.Call.Return(run) return _c } diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 15318873724..414bfca2158 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -2139,7 +2139,15 @@ func (kl *Kubelet) SyncPod(ctx context.Context, updateType kubetypes.SyncPodType // Use WithoutCancel instead of a new context.TODO() to propagate trace context // Call the container runtime's SyncPod callback sctx := context.WithoutCancel(ctx) - result := kl.containerRuntime.SyncPod(sctx, pod, podStatus, pullSecrets, kl.crashLoopBackOff, &apiPodStatus) + restartingAllContainers := false + if utilfeature.DefaultFeatureGate.Enabled(features.RestartAllContainersOnContainerExits) { + for _, cond := range apiPodStatus.Conditions { + if cond.Type == v1.AllContainersRestarting && cond.Status == v1.ConditionTrue { + restartingAllContainers = true + } + } + } + result := kl.containerRuntime.SyncPod(sctx, pod, podStatus, pullSecrets, kl.crashLoopBackOff, restartingAllContainers) kl.reasonCache.Update(pod.UID, result) if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) { diff --git a/pkg/kubelet/kubelet_pods_test.go b/pkg/kubelet/kubelet_pods_test.go index ce21feba37e..f8890050e79 100644 --- a/pkg/kubelet/kubelet_pods_test.go +++ b/pkg/kubelet/kubelet_pods_test.go @@ -3530,8 +3530,11 @@ func TestPodPhaseWithContainerRestartPolicyInitContainers(t *testing.T) { } func TestPodPhaseWithRestartAllContainers(t *testing.T) { - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ContainerRestartRules, true) - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.RestartAllContainersOnContainerExits, true) + featuregatetesting.SetFeatureGatesDuringTest(t, utilfeature.DefaultFeatureGate, featuregatetesting.FeatureOverrides{ + features.ContainerRestartRules: true, + features.NodeDeclaredFeatures: true, + features.RestartAllContainersOnContainerExits: true, + }) var ( containerRestartPolicyAlways = v1.ContainerRestartPolicyAlways containerRestartPolicyNever = v1.ContainerRestartPolicyNever @@ -4046,8 +4049,12 @@ func TestConvertToAPIContainerStatuses(t *testing.T) { }, }, } + featuregatetesting.SetFeatureGatesDuringTest(t, utilfeature.DefaultFeatureGate, featuregatetesting.FeatureOverrides{ + features.ContainerRestartRules: true, + features.NodeDeclaredFeatures: true, + features.RestartAllContainersOnContainerExits: true, + }) - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.RestartAllContainersOnContainerExits, true) for _, test := range tests { t.Run(test.name, func(t *testing.T) { testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */) diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 8b640817f15..ec5ddb75e29 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -1020,7 +1020,7 @@ func (m *kubeGenericRuntimeManager) updatePodContainerResources(ctx context.Cont } // computePodActions checks whether the pod spec has changed and returns the changes if true. -func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, apiPodStatus *v1.PodStatus) podActions { +func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, restartAllContainers bool) podActions { logger := klog.FromContext(ctx) logger.V(5).Info("Syncing Pod", "pod", klog.KObj(pod)) @@ -1035,30 +1035,19 @@ func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod * } // Needs to kill and remove all containers in reverse order when the pod is marked for RestartAllContainers. - if utilfeature.DefaultFeatureGate.Enabled(features.RestartAllContainersOnContainerExits) { - allContainersRestarting := false - if apiPodStatus != nil { - for _, cond := range apiPodStatus.Conditions { - if cond.Type == v1.AllContainersRestarting && cond.Status == v1.ConditionTrue { - allContainersRestarting = true - break - } - } - } - if allContainersRestarting { - logger.V(3).Info("Pod marked for RestartAllContainers", "pod", klog.KObj(pod)) - changes.KillPod = false - changes.CreateSandbox = false - // Kill and remove containers in reverse order. Source containers (which exited and triggered - // RestartAllContainers) are removed last. - sourceInitContainers, targetInitContainers := m.getContainersToRemove(ctx, pod.Spec.InitContainers, podStatus) - sourceContainers, targetContainers := m.getContainersToRemove(ctx, pod.Spec.Containers, podStatus) - changes.ContainersToRemove = append(changes.ContainersToRemove, targetContainers...) - changes.ContainersToRemove = append(changes.ContainersToRemove, targetInitContainers...) - changes.ContainersToRemove = append(changes.ContainersToRemove, sourceContainers...) - changes.ContainersToRemove = append(changes.ContainersToRemove, sourceInitContainers...) - return changes - } + if utilfeature.DefaultFeatureGate.Enabled(features.RestartAllContainersOnContainerExits) && restartAllContainers { + logger.V(3).Info("Pod marked for RestartAllContainers", "pod", klog.KObj(pod)) + changes.KillPod = false + changes.CreateSandbox = false + // Kill and remove containers in reverse order. Source containers (which exited and triggered + // RestartAllContainers) are removed last. + sourceInitContainers, targetInitContainers := m.getContainersToRemove(ctx, pod.Spec.InitContainers, podStatus) + sourceContainers, targetContainers := m.getContainersToRemove(ctx, pod.Spec.Containers, podStatus) + changes.ContainersToRemove = append(changes.ContainersToRemove, targetContainers...) + changes.ContainersToRemove = append(changes.ContainersToRemove, targetInitContainers...) + changes.ContainersToRemove = append(changes.ContainersToRemove, sourceContainers...) + changes.ContainersToRemove = append(changes.ContainersToRemove, sourceInitContainers...) + return changes } // If we need to (re-)create the pod sandbox, everything will need to be @@ -1282,10 +1271,10 @@ func (m *kubeGenericRuntimeManager) getContainersToRemove(ctx context.Context, c // 6. Create init containers. // 7. Resize running containers (if InPlacePodVerticalScaling==true) // 8. Create normal containers. -func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff, apiPodStatus *v1.PodStatus) (result kubecontainer.PodSyncResult) { +func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff, restartAllContainers bool) (result kubecontainer.PodSyncResult) { logger := klog.FromContext(ctx) // Step 1: Compute sandbox and container changes. - podContainerChanges := m.computePodActions(ctx, pod, podStatus, apiPodStatus) + podContainerChanges := m.computePodActions(ctx, pod, podStatus, restartAllContainers) logger.V(3).Info("computePodActions got for pod", "podActions", podContainerChanges, "pod", klog.KObj(pod)) if podContainerChanges.CreateSandbox { ref, err := ref.GetReference(legacyscheme.Scheme, pod) diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go index 60e7ccca079..d2a75942219 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go @@ -661,7 +661,7 @@ func TestSyncPod(t *testing.T) { } backOff := flowcontrol.NewBackOff(time.Second, time.Minute) - result := m.SyncPod(tCtx, pod, &kubecontainer.PodStatus{}, []v1.Secret{}, backOff, &pod.Status) + result := m.SyncPod(tCtx, pod, &kubecontainer.PodStatus{}, []v1.Secret{}, backOff, false) assert.NoError(t, result.Error()) assert.Len(t, fakeRuntime.Containers, 2) assert.Len(t, fakeImage.Images, 2) @@ -722,7 +722,7 @@ func TestSyncPodWithConvertedPodSysctls(t *testing.T) { } backOff := flowcontrol.NewBackOff(time.Second, time.Minute) - result := m.SyncPod(tCtx, pod, &kubecontainer.PodStatus{}, []v1.Secret{}, backOff, &pod.Status) + result := m.SyncPod(tCtx, pod, &kubecontainer.PodStatus{}, []v1.Secret{}, backOff, false) assert.NoError(t, result.Error()) assert.Equal(t, exceptSysctls, pod.Spec.SecurityContext.Sysctls) for _, sandbox := range fakeRuntime.Sandboxes { @@ -812,7 +812,7 @@ func TestSyncPodWithInitContainers(t *testing.T) { // 1. should only create the init container. podStatus, err := m.GetPodStatus(tCtx, pod.UID, pod.Name, pod.Namespace) assert.NoError(t, err) - result := m.SyncPod(tCtx, pod, podStatus, []v1.Secret{}, backOff, &pod.Status) + result := m.SyncPod(tCtx, pod, podStatus, []v1.Secret{}, backOff, false) assert.NoError(t, result.Error()) expected := []*cRecord{ {name: initContainers[0].Name, attempt: 0, state: runtimeapi.ContainerState_CONTAINER_RUNNING}, @@ -822,7 +822,7 @@ func TestSyncPodWithInitContainers(t *testing.T) { // 2. should not create app container because init container is still running. podStatus, err = m.GetPodStatus(tCtx, pod.UID, pod.Name, pod.Namespace) assert.NoError(t, err) - result = m.SyncPod(tCtx, pod, podStatus, []v1.Secret{}, backOff, &pod.Status) + result = m.SyncPod(tCtx, pod, podStatus, []v1.Secret{}, backOff, false) assert.NoError(t, result.Error()) verifyContainerStatuses(t, fakeRuntime, expected, "init container still running; do nothing") @@ -838,7 +838,7 @@ func TestSyncPodWithInitContainers(t *testing.T) { // Sync again. podStatus, err = m.GetPodStatus(tCtx, pod.UID, pod.Name, pod.Namespace) assert.NoError(t, err) - result = m.SyncPod(tCtx, pod, podStatus, []v1.Secret{}, backOff, &pod.Status) + result = m.SyncPod(tCtx, pod, podStatus, []v1.Secret{}, backOff, false) assert.NoError(t, result.Error()) expected = []*cRecord{ {name: initContainers[0].Name, attempt: 0, state: runtimeapi.ContainerState_CONTAINER_EXITED}, @@ -854,7 +854,7 @@ func TestSyncPodWithInitContainers(t *testing.T) { // Sync again. podStatus, err = m.GetPodStatus(tCtx, pod.UID, pod.Name, pod.Namespace) assert.NoError(t, err) - result = m.SyncPod(tCtx, pod, podStatus, []v1.Secret{}, backOff, &pod.Status) + result = m.SyncPod(tCtx, pod, podStatus, []v1.Secret{}, backOff, false) assert.NoError(t, result.Error()) expected = []*cRecord{ // The first init container instance is purged and no longer visible. @@ -869,7 +869,11 @@ func TestSyncPodWithInitContainers(t *testing.T) { func TestSyncPodWithRestartAllContainers(t *testing.T) { tCtx := ktesting.Init(t) - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.RestartAllContainersOnContainerExits, true) + featuregatetesting.SetFeatureGatesDuringTest(t, utilfeature.DefaultFeatureGate, featuregatetesting.FeatureOverrides{ + features.ContainerRestartRules: true, + features.NodeDeclaredFeatures: true, + features.RestartAllContainersOnContainerExits: true, + }) fakeRuntime, _, m, err := createTestRuntimeManager(tCtx) require.NoError(t, err) @@ -920,7 +924,7 @@ func TestSyncPodWithRestartAllContainers(t *testing.T) { // 1. Run the pod first. First SyncPod should execute the init container. podStatus, err := m.GetPodStatus(tCtx, pod.UID, pod.Name, pod.Namespace) require.NoError(t, err) - result := m.SyncPod(tCtx, pod, podStatus, []v1.Secret{}, backOff, &pod.Status) + result := m.SyncPod(tCtx, pod, podStatus, []v1.Secret{}, backOff, false) require.NoError(t, result.Error()) expected := []*cRecord{ {name: initContainers[0].Name, attempt: 0, state: runtimeapi.ContainerState_CONTAINER_RUNNING}, @@ -939,7 +943,7 @@ func TestSyncPodWithRestartAllContainers(t *testing.T) { // Sync again. podStatus, err = m.GetPodStatus(tCtx, pod.UID, pod.Name, pod.Namespace) require.NoError(t, err) - result = m.SyncPod(tCtx, pod, podStatus, []v1.Secret{}, backOff, &pod.Status) + result = m.SyncPod(tCtx, pod, podStatus, []v1.Secret{}, backOff, false) require.NoError(t, result.Error()) expected = []*cRecord{ {name: initContainers[0].Name, attempt: 0, state: runtimeapi.ContainerState_CONTAINER_EXITED}, @@ -950,14 +954,6 @@ func TestSyncPodWithRestartAllContainers(t *testing.T) { // 3. Exits the container foo2 with code 42, the pod should be marked for RestartAllContainers, and // should remove all containers. - apiPodStatus := &v1.PodStatus{ - Conditions: []v1.PodCondition{ - { - Type: v1.AllContainersRestarting, - Status: v1.ConditionTrue, - }, - }, - } sandboxIDs, err = m.getSandboxIDByPodUID(tCtx, pod.UID, nil) require.NoError(t, err) sandboxID = sandboxIDs[0] @@ -970,23 +966,15 @@ func TestSyncPodWithRestartAllContainers(t *testing.T) { podStatus, err = m.GetPodStatus(tCtx, pod.UID, pod.Name, pod.Namespace) require.NoError(t, err) - result = m.SyncPod(tCtx, pod, podStatus, []v1.Secret{}, backOff, apiPodStatus) + result = m.SyncPod(tCtx, pod, podStatus, []v1.Secret{}, backOff, true) require.NoError(t, result.Error()) expected = []*cRecord{} verifyContainerStatuses(t, fakeRuntime, expected, "kill all containers") // 4. Unmark the pod. Now it should start the init container first. - apiPodStatus = &v1.PodStatus{ - Conditions: []v1.PodCondition{ - { - Type: v1.AllContainersRestarting, - Status: v1.ConditionFalse, - }, - }, - } podStatus, err = m.GetPodStatus(tCtx, pod.UID, pod.Name, pod.Namespace) require.NoError(t, err) - result = m.SyncPod(tCtx, pod, podStatus, []v1.Secret{}, backOff, apiPodStatus) + result = m.SyncPod(tCtx, pod, podStatus, []v1.Secret{}, backOff, false) require.NoError(t, result.Error()) expected = []*cRecord{ {name: initContainers[0].Name, attempt: 0, state: runtimeapi.ContainerState_CONTAINER_RUNNING}, @@ -1358,7 +1346,7 @@ func TestComputePodActions(t *testing.T) { test.mutateStatusFn(status) } tCtx := ktesting.Init(t) - actions := m.computePodActions(tCtx, pod, status, &pod.Status) + actions := m.computePodActions(tCtx, pod, status, false) verifyActions(t, &test.actions, &actions, desc) if test.resetStatusFn != nil { test.resetStatusFn(status) @@ -1367,8 +1355,11 @@ func TestComputePodActions(t *testing.T) { } func TestComputePodActionsForRestartAllContainers(t *testing.T) { - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ContainerRestartRules, true) - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.RestartAllContainersOnContainerExits, true) + featuregatetesting.SetFeatureGatesDuringTest(t, utilfeature.DefaultFeatureGate, featuregatetesting.FeatureOverrides{ + features.ContainerRestartRules: true, + features.NodeDeclaredFeatures: true, + features.RestartAllContainersOnContainerExits: true, + }) TestComputePodActions(t) TestComputePodActionsWithInitContainers(t) @@ -1402,6 +1393,7 @@ func TestComputePodActionsForRestartAllContainers(t *testing.T) { for desc, test := range map[string]struct { podFunc func() *v1.Pod podStatusFunc func() *kubecontainer.PodStatus + restartAllContainers bool containersToRemove []containerToRemoveInfo containersToStart []int initContainersToStart []int @@ -1427,6 +1419,7 @@ func TestComputePodActionsForRestartAllContainers(t *testing.T) { _, status := makeBasePodAndStatus() return status }, + restartAllContainers: true, containersToRemove: []containerToRemoveInfo{ {name: "foo3", kill: true}, {name: "foo2", kill: true}, @@ -1443,6 +1436,7 @@ func TestComputePodActionsForRestartAllContainers(t *testing.T) { _, status := makeBasePodAndStatusWithInitContainers() return status }, + restartAllContainers: true, containersToRemove: []containerToRemoveInfo{ {name: "init3"}, {name: "init2"}, @@ -1459,6 +1453,7 @@ func TestComputePodActionsForRestartAllContainers(t *testing.T) { _, status := makeBasePodAndStatusWithRestartableInitContainers() return status }, + restartAllContainers: true, containersToRemove: []containerToRemoveInfo{ {name: "restartable-init-3", kill: true}, {name: "restartable-init-2", kill: true}, @@ -1492,6 +1487,7 @@ func TestComputePodActionsForRestartAllContainers(t *testing.T) { status.ContainerStatuses[2] = sourceStatus return status }, + restartAllContainers: true, containersToRemove: []containerToRemoveInfo{ {name: "init2", kill: true}, {name: "init1"}, @@ -1525,6 +1521,7 @@ func TestComputePodActionsForRestartAllContainers(t *testing.T) { }) return status }, + restartAllContainers: true, containersToRemove: []containerToRemoveInfo{ {name: "foo1", kill: true}, {name: "restartable-init-2", kill: true}, @@ -1562,6 +1559,7 @@ func TestComputePodActionsForRestartAllContainers(t *testing.T) { }) return status }, + restartAllContainers: true, containersToRemove: []containerToRemoveInfo{ {name: "foo2", kill: true}, {name: "foo1", kill: true}, @@ -1574,7 +1572,6 @@ func TestComputePodActionsForRestartAllContainers(t *testing.T) { "all containers removed, start init container": { podFunc: func() *v1.Pod { pod, _ := makeBasePodAndStatusWithInitContainers() - pod.Status.Conditions = allContainersRestartingFalse return pod }, podStatusFunc: func() *kubecontainer.PodStatus { @@ -1588,7 +1585,6 @@ func TestComputePodActionsForRestartAllContainers(t *testing.T) { "all containers removed, start regular container": { podFunc: func() *v1.Pod { pod, _ := makeBasePodAndStatus() - pod.Status.Conditions = allContainersRestartingFalse return pod }, podStatusFunc: func() *kubecontainer.PodStatus { @@ -1603,7 +1599,7 @@ func TestComputePodActionsForRestartAllContainers(t *testing.T) { pod := test.podFunc() status := test.podStatusFunc() tCtx := ktesting.Init(t) - actions := m.computePodActions(tCtx, pod, status, &pod.Status) + actions := m.computePodActions(tCtx, pod, status, test.restartAllContainers) expected := &podActions{ CreateSandbox: false, @@ -1905,7 +1901,7 @@ func TestComputePodActionsWithInitContainers(t *testing.T) { test.mutateStatusFn(status) } tCtx := ktesting.Init(t) - actions := m.computePodActions(tCtx, pod, status, &pod.Status) + actions := m.computePodActions(tCtx, pod, status, false) verifyActions(t, &test.actions, &actions, desc) }) } @@ -2312,7 +2308,7 @@ func TestComputePodActionsWithRestartableInitContainers(t *testing.T) { test.mutateStatusFn(pod, status) } tCtx := ktesting.Init(t) - actions := m.computePodActions(tCtx, pod, status, &pod.Status) + actions := m.computePodActions(tCtx, pod, status, false) verifyActions(t, &test.actions, &actions, desc) if test.resetStatusFn != nil { test.resetStatusFn(status) @@ -2507,7 +2503,7 @@ func TestComputePodActionsWithInitAndEphemeralContainers(t *testing.T) { test.mutateStatusFn(status) } tCtx := ktesting.Init(t) - actions := m.computePodActions(tCtx, pod, status, &pod.Status) + actions := m.computePodActions(tCtx, pod, status, false) verifyActions(t, &test.actions, &actions, desc) }) } @@ -2648,7 +2644,7 @@ func TestComputePodActionsWithContainerRestartRules(t *testing.T) { test.mutateStatusFn(status) } ctx := context.Background() - actions := m.computePodActions(ctx, pod, status, &pod.Status) + actions := m.computePodActions(ctx, pod, status, false) verifyActions(t, &test.actions, &actions, desc) if test.resetStatusFn != nil { test.resetStatusFn(status) @@ -2688,7 +2684,7 @@ func TestSyncPodWithSandboxAndDeletedPod(t *testing.T) { // the fakePodProvider so they are 'deleted'. podStatus, err := m.GetPodStatus(tCtx, pod.UID, pod.Name, pod.Namespace) assert.NoError(t, err) - result := m.SyncPod(tCtx, pod, podStatus, []v1.Secret{}, backOff, &pod.Status) + result := m.SyncPod(tCtx, pod, podStatus, []v1.Secret{}, backOff, false) // This will return an error if the pod has _not_ been deleted. assert.NoError(t, result.Error()) } @@ -3172,7 +3168,7 @@ func TestComputePodActionsForPodResize(t *testing.T) { tCtx := ktesting.Init(t) expectedActions := test.getExpectedPodActionsFn(pod, status) - actions := m.computePodActions(tCtx, pod, status, &pod.Status) + actions := m.computePodActions(tCtx, pod, status, false) verifyActions(t, expectedActions, &actions, desc) }) } diff --git a/pkg/kubelet/prober/worker_test.go b/pkg/kubelet/prober/worker_test.go index 5d53a7835b6..243140b7a10 100644 --- a/pkg/kubelet/prober/worker_test.go +++ b/pkg/kubelet/prober/worker_test.go @@ -181,7 +181,9 @@ func TestDoProbe(t *testing.T) { } func TestDoProbeWithContainerRestartRules(t *testing.T) { - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ContainerRestartRules, true) + featuregatetesting.SetFeatureGatesDuringTest(t, utilfeature.DefaultFeatureGate, featuregatetesting.FeatureOverrides{ + features.ContainerRestartRules: true, + }) TestDoProbe(t) var ( @@ -298,8 +300,11 @@ func TestDoProbeWithContainerRestartRules(t *testing.T) { } func TestDoProbeWithContainerRestartAllContainers(t *testing.T) { - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ContainerRestartRules, true) - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.RestartAllContainersOnContainerExits, true) + featuregatetesting.SetFeatureGatesDuringTest(t, utilfeature.DefaultFeatureGate, featuregatetesting.FeatureOverrides{ + features.ContainerRestartRules: true, + features.NodeDeclaredFeatures: true, + features.RestartAllContainersOnContainerExits: true, + }) TestDoProbe(t) TestDoProbeWithContainerRestartRules(t) diff --git a/pkg/kubelet/status/generate_test.go b/pkg/kubelet/status/generate_test.go index 5cf51d0f947..76915a81505 100644 --- a/pkg/kubelet/status/generate_test.go +++ b/pkg/kubelet/status/generate_test.go @@ -563,8 +563,11 @@ func TestGeneratePodInitializedCondition(t *testing.T) { }, }, } - - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.RestartAllContainersOnContainerExits, true) + featuregatetesting.SetFeatureGatesDuringTest(t, utilfeature.DefaultFeatureGate, featuregatetesting.FeatureOverrides{ + features.ContainerRestartRules: true, + features.NodeDeclaredFeatures: true, + features.RestartAllContainersOnContainerExits: true, + }) for _, test := range tests { test.expected.Type = v1.PodInitialized pod := &v1.Pod{Spec: *test.spec} @@ -648,7 +651,11 @@ func TestGeneratePodReadyToStartContainersCondition(t *testing.T) { } func TestGenerateAllContainersRestartingCondition(t *testing.T) { - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.RestartAllContainersOnContainerExits, true) + featuregatetesting.SetFeatureGatesDuringTest(t, utilfeature.DefaultFeatureGate, featuregatetesting.FeatureOverrides{ + features.ContainerRestartRules: true, + features.NodeDeclaredFeatures: true, + features.RestartAllContainersOnContainerExits: true, + }) restartPolicyNever := v1.ContainerRestartPolicyNever defaultPod := &v1.Pod{ diff --git a/staging/src/k8s.io/component-helpers/nodedeclaredfeatures/features/registry.go b/staging/src/k8s.io/component-helpers/nodedeclaredfeatures/features/registry.go index eeea59fd378..1dc2e137c8c 100644 --- a/staging/src/k8s.io/component-helpers/nodedeclaredfeatures/features/registry.go +++ b/staging/src/k8s.io/component-helpers/nodedeclaredfeatures/features/registry.go @@ -19,6 +19,7 @@ package features import ( "k8s.io/component-helpers/nodedeclaredfeatures" "k8s.io/component-helpers/nodedeclaredfeatures/features/inplacepodresize" + "k8s.io/component-helpers/nodedeclaredfeatures/features/restartallcontainers" ) // AllFeatures is the central registry for all declared features. @@ -26,4 +27,5 @@ import ( // discovery and inference logic. var AllFeatures = []nodedeclaredfeatures.Feature{ inplacepodresize.Feature, + restartallcontainers.Feature, } diff --git a/staging/src/k8s.io/component-helpers/nodedeclaredfeatures/features/restartallcontainers/restart_all_containers.go b/staging/src/k8s.io/component-helpers/nodedeclaredfeatures/features/restartallcontainers/restart_all_containers.go new file mode 100644 index 00000000000..db496d30792 --- /dev/null +++ b/staging/src/k8s.io/component-helpers/nodedeclaredfeatures/features/restartallcontainers/restart_all_containers.go @@ -0,0 +1,71 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package restartallcontainers + +import ( + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/version" + "k8s.io/component-helpers/nodedeclaredfeatures" +) + +// Ensure the feature struct implements the unified Feature interface. +var _ nodedeclaredfeatures.Feature = &restartAllContainersFeature{} + +const ( + RestartRulesFeatureGate = "ContainerRestartRules" + RestartAllContainersOnContainerExits = "RestartAllContainersOnContainerExits" +) + +// Feature is the implementation of the `GuaranteedQoSPodCPUResize` feature. +var Feature = &restartAllContainersFeature{} + +type restartAllContainersFeature struct{} + +func (f *restartAllContainersFeature) Name() string { + return RestartAllContainersOnContainerExits +} + +func (f *restartAllContainersFeature) Discover(cfg *nodedeclaredfeatures.NodeConfiguration) bool { + return cfg.FeatureGates.Enabled(RestartAllContainersOnContainerExits) +} + +func (f *restartAllContainersFeature) InferForScheduling(podInfo *nodedeclaredfeatures.PodInfo) bool { + for _, c := range podInfo.Spec.Containers { + for _, rule := range c.RestartPolicyRules { + if rule.Action == v1.ContainerRestartRuleActionRestartAllContainers { + return true + } + } + } + for _, c := range podInfo.Spec.InitContainers { + for _, rule := range c.RestartPolicyRules { + if rule.Action == v1.ContainerRestartRuleActionRestartAllContainers { + return true + } + } + } + return false +} + +func (f *restartAllContainersFeature) InferForUpdate(oldPodInfo, newPodInfo *nodedeclaredfeatures.PodInfo) bool { + // container.restartPolicy and container.restartPolicyRules are not mutable. + return false +} + +func (f *restartAllContainersFeature) MaxVersion() *version.Version { + return nil +} diff --git a/staging/src/k8s.io/component-helpers/nodedeclaredfeatures/features/restartallcontainers/restart_all_containers_test.go b/staging/src/k8s.io/component-helpers/nodedeclaredfeatures/features/restartallcontainers/restart_all_containers_test.go new file mode 100644 index 00000000000..5865f566bda --- /dev/null +++ b/staging/src/k8s.io/component-helpers/nodedeclaredfeatures/features/restartallcontainers/restart_all_containers_test.go @@ -0,0 +1,138 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package restartallcontainers + +import ( + "testing" + + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + "k8s.io/component-helpers/nodedeclaredfeatures" + test "k8s.io/component-helpers/nodedeclaredfeatures/testing" +) + +func TestDiscover(t *testing.T) { + tests := []struct { + name string + featureGateEnabled bool + expected bool + }{ + { + name: "both feature enabled", + featureGateEnabled: true, + expected: true, + }, + { + name: "restartAllContainers feature disabled", + featureGateEnabled: false, + expected: false, + }, + } + + feature := &restartAllContainersFeature{} + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + mockFG := test.NewMockFeatureGate(t) + mockFG.EXPECT().Enabled(RestartAllContainersOnContainerExits).Return(tc.featureGateEnabled) + + config := &nodedeclaredfeatures.NodeConfiguration{ + FeatureGates: mockFG, + } + enabled := feature.Discover(config) + assert.Equal(t, tc.expected, enabled) + }) + } +} + +func TestInferForScheduling(t *testing.T) { + tests := []struct { + name string + pod *v1.PodSpec + expected bool + }{ + { + name: "init container with rules", + pod: &v1.PodSpec{ + InitContainers: []v1.Container{ + containerWithRestartAllContainersAction(), + }, + Containers: []v1.Container{{ + Name: "name", + Image: "image", + }}, + }, + expected: true, + }, + { + name: "regular container with rules", + pod: &v1.PodSpec{ + InitContainers: []v1.Container{{ + Name: "name", + Image: "image", + }}, + Containers: []v1.Container{containerWithRestartAllContainersAction()}, + }, + expected: true, + }, + { + name: "no rules", + pod: &v1.PodSpec{ + InitContainers: []v1.Container{{ + Name: "name", + Image: "image", + }}, + Containers: []v1.Container{{ + Name: "name2", + Image: "image", + }}, + }, + expected: false, + }, + } + + feature := &restartAllContainersFeature{} + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + podInfo := &nodedeclaredfeatures.PodInfo{Spec: tc.pod} + assert.Equal(t, tc.expected, feature.InferForScheduling(podInfo)) + }) + } +} + +func TestInferForUpdate(t *testing.T) { + feature := &restartAllContainersFeature{} + podInfo := &nodedeclaredfeatures.PodInfo{Spec: &v1.PodSpec{}} + assert.False(t, feature.InferForUpdate(nil, podInfo), "expect InferForUpdate to be false") +} + +func containerWithRestartAllContainersAction() v1.Container { + restartPolicy := v1.ContainerRestartPolicyNever + return v1.Container{ + Name: "container", + Image: "image", + RestartPolicy: &restartPolicy, + RestartPolicyRules: []v1.ContainerRestartRule{ + { + Action: v1.ContainerRestartRuleActionRestartAllContainers, + ExitCodes: &v1.ContainerRestartRuleOnExitCodes{ + Operator: v1.ContainerRestartRuleOnExitCodesOpIn, + Values: []int32{1}, + }, + }, + }, + } +} diff --git a/test/e2e/node/pods.go b/test/e2e/node/pods.go index d3ffc515174..cafac5aa726 100644 --- a/test/e2e/node/pods.go +++ b/test/e2e/node/pods.go @@ -1050,6 +1050,59 @@ var _ = SIGDescribe("Pod Extended (RestartAllContainers)", framework.WithFeature validateAllContainersRestarted(ctx, f, pod, []string{"init", "sidecar", "source-init"}) framework.ExpectNoError(e2epod.WaitForContainerRunning(ctx, f.ClientSet, f.Namespace.Name, podName, "regular", 3*time.Minute)) }) + + ginkgo.It("should allow multiple RestartAllContainers actions and not introduce a loop", func(ctx context.Context) { + podName := "restart-rules-exit-code-" + string(uuid.NewUUID()) + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + }, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + Containers: []v1.Container{ + { + Name: "source-container", + Image: imageutils.GetE2EImage(imageutils.BusyBox), + Command: []string{"/bin/sh", "-c", "if [ -f /mnt/restart-complete ]; then sleep 10000; else touch /mnt/restart-complete; sleep 10; exit 42; fi"}, + RestartPolicy: &containerRestartPolicyNever, + RestartPolicyRules: restartAllContainersRules, + VolumeMounts: []v1.VolumeMount{ + { + Name: "workdir", + MountPath: "/mnt", + }, + }, + }, + { + Name: "regular", + Image: imageutils.GetE2EImage(imageutils.BusyBox), + Command: []string{"/bin/sh", "-c", "sleep 10000"}, + RestartPolicy: &containerRestartPolicyNever, + RestartPolicyRules: restartAllContainersRules, + }, + }, + Volumes: []v1.Volume{ + { + Name: "workdir", + VolumeSource: v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }, + }, + }, + }, + } + + // All containers should be restarted once + podClient := e2epod.NewPodClient(f) + podClient.Create(ctx, pod) + ginkgo.DeferCleanup(func(ctx context.Context) error { + ginkgo.By("deleting the pod") + return podClient.Delete(ctx, pod.Name, metav1.DeleteOptions{}) + }) + validateAllContainersRestarted(ctx, f, pod, []string{"source-container", "regular"}) + framework.ExpectNoError(e2epod.WaitForContainerRunning(ctx, f.ClientSet, f.Namespace.Name, podName, "source-container", 3*time.Minute)) + framework.ExpectNoError(e2epod.WaitForContainerRunning(ctx, f.ClientSet, f.Namespace.Name, podName, "regular", 3*time.Minute)) + }) }) })