diff --git a/pkg/controller/statefulset/stateful_set_control.go b/pkg/controller/statefulset/stateful_set_control.go index 63a924d6a09..6931bec85e3 100644 --- a/pkg/controller/statefulset/stateful_set_control.go +++ b/pkg/controller/statefulset/stateful_set_control.go @@ -104,21 +104,29 @@ func (ssc *defaultStatefulSetControl) performUpdate( // perform the main update function and get the status currentStatus, err = ssc.updateStatefulSet(ctx, set, currentRevision, updateRevision, collisionCount, pods) - if err != nil { - return currentRevision, updateRevision, currentStatus, err + if err != nil && currentStatus == nil { + return currentRevision, updateRevision, nil, err } - // update the set's status - err = ssc.updateStatefulSetStatus(ctx, set, currentStatus) - if err != nil { - return currentRevision, updateRevision, currentStatus, err + + // make sure to update the latest status even if there is an error with non-nil currentStatus + statusErr := ssc.updateStatefulSetStatus(ctx, set, currentStatus) + if statusErr == nil { + klog.V(4).InfoS("Updated status", "statefulSet", klog.KObj(set), + "replicas", currentStatus.Replicas, + "readyReplicas", currentStatus.ReadyReplicas, + "currentReplicas", currentStatus.CurrentReplicas, + "updatedReplicas", currentStatus.UpdatedReplicas) + } + + switch { + case err != nil && statusErr != nil: + klog.ErrorS(statusErr, "Could not update status", "statefulSet", klog.KObj(set)) + return currentRevision, updateRevision, currentStatus, err + case err != nil: + return currentRevision, updateRevision, currentStatus, err + case statusErr != nil: + return currentRevision, updateRevision, currentStatus, statusErr } - klog.V(4).Infof("StatefulSet %s/%s pod status replicas=%d ready=%d current=%d updated=%d", - set.Namespace, - set.Name, - currentStatus.Replicas, - currentStatus.ReadyReplicas, - currentStatus.CurrentReplicas, - currentStatus.UpdatedReplicas) klog.V(4).Infof("StatefulSet %s/%s revisions current=%s update=%s", set.Namespace, diff --git a/pkg/controller/statefulset/stateful_set_control_test.go b/pkg/controller/statefulset/stateful_set_control_test.go index 9b56c7cb0cd..631deb51d98 100644 --- a/pkg/controller/statefulset/stateful_set_control_test.go +++ b/pkg/controller/statefulset/stateful_set_control_test.go @@ -1800,6 +1800,79 @@ func TestStatefulSetAvailability(t *testing.T) { } } +func TestStatefulSetStatusUpdate(t *testing.T) { + var ( + syncErr = fmt.Errorf("sync error") + statusErr = fmt.Errorf("status error") + ) + + testCases := []struct { + desc string + + hasSyncErr bool + hasStatusErr bool + + expectedErr error + }{ + { + desc: "no error", + hasSyncErr: false, + hasStatusErr: false, + expectedErr: nil, + }, + { + desc: "sync error", + hasSyncErr: true, + hasStatusErr: false, + expectedErr: syncErr, + }, + { + desc: "status error", + hasSyncErr: false, + hasStatusErr: true, + expectedErr: statusErr, + }, + { + desc: "sync and status error", + hasSyncErr: true, + hasStatusErr: true, + expectedErr: syncErr, + }, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + set := newStatefulSet(3) + client := fake.NewSimpleClientset(set) + om, ssu, ssc, stop := setupController(client) + defer close(stop) + + if tc.hasSyncErr { + om.SetCreateStatefulPodError(syncErr, 0) + } + if tc.hasStatusErr { + ssu.SetUpdateStatefulSetStatusError(statusErr, 0) + } + + selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector) + if err != nil { + t.Error(err) + } + pods, err := om.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + t.Error(err) + } + _, err = ssc.UpdateStatefulSet(context.TODO(), set, pods) + if ssu.updateStatusTracker.requests != 1 { + t.Errorf("Did not update status") + } + if !errors.Is(err, tc.expectedErr) { + t.Errorf("Expected error: %v, got: %v", tc.expectedErr, err) + } + }) + } +} + type requestTracker struct { requests int err error diff --git a/test/integration/statefulset/statefulset_test.go b/test/integration/statefulset/statefulset_test.go index 678e07eae91..01d374eed00 100644 --- a/test/integration/statefulset/statefulset_test.go +++ b/test/integration/statefulset/statefulset_test.go @@ -30,10 +30,13 @@ import ( "k8s.io/apimachinery/pkg/util/wait" utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/client-go/dynamic" + "k8s.io/client-go/informers" clientset "k8s.io/client-go/kubernetes" + restclient "k8s.io/client-go/rest" featuregatetesting "k8s.io/component-base/featuregate/testing" apiservertesting "k8s.io/kubernetes/cmd/kube-apiserver/app/testing" podutil "k8s.io/kubernetes/pkg/api/v1/pod" + "k8s.io/kubernetes/pkg/controller/statefulset" "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/test/integration/framework" ) @@ -349,3 +352,57 @@ func setPodsReadyCondition(t *testing.T, clientSet clientset.Interface, pods *v1 t.Fatalf("failed to mark all StatefulSet pods to ready: %v", err) } } + +// add for issue: https://github.com/kubernetes/kubernetes/issues/108837 +func TestStatefulSetStatusWithPodFail(t *testing.T) { + limitedPodNumber := 2 + controlPlaneConfig := framework.NewIntegrationTestControlPlaneConfig() + controlPlaneConfig.GenericConfig.AdmissionControl = &fakePodFailAdmission{ + limitedPodNumber: limitedPodNumber, + } + _, s, closeFn := framework.RunAnAPIServer(controlPlaneConfig) + defer closeFn() + + config := restclient.Config{Host: s.URL} + c, err := clientset.NewForConfig(&config) + if err != nil { + t.Fatalf("Could not create clientset: %v", err) + } + resyncPeriod := 12 * time.Hour + informers := informers.NewSharedInformerFactory(clientset.NewForConfigOrDie(restclient.AddUserAgent(&config, "statefulset-informers")), resyncPeriod) + + ssc := statefulset.NewStatefulSetController( + informers.Core().V1().Pods(), + informers.Apps().V1().StatefulSets(), + informers.Core().V1().PersistentVolumeClaims(), + informers.Apps().V1().ControllerRevisions(), + clientset.NewForConfigOrDie(restclient.AddUserAgent(&config, "statefulset-controller")), + ) + + ns := framework.CreateTestingNamespace("test-pod-fail", s, t) + defer framework.DeleteTestingNamespace(ns, s, t) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + informers.Start(ctx.Done()) + go ssc.Run(ctx, 5) + + sts := newSTS("sts", ns.Name, 4) + _, err = c.AppsV1().StatefulSets(sts.Namespace).Create(context.TODO(), sts, metav1.CreateOptions{}) + if err != nil { + t.Fatalf("Could not create statefuleSet %s: %v", sts.Name, err) + } + + wantReplicas := limitedPodNumber + var gotReplicas int32 + if err := wait.PollImmediate(pollInterval, pollTimeout, func() (bool, error) { + newSTS, err := c.AppsV1().StatefulSets(sts.Namespace).Get(context.TODO(), sts.Name, metav1.GetOptions{}) + if err != nil { + return false, err + } + gotReplicas = newSTS.Status.Replicas + return gotReplicas == int32(wantReplicas), nil + }); err != nil { + t.Fatalf("StatefulSet %s status has %d replicas, want replicas %d: %v", sts.Name, gotReplicas, wantReplicas, err) + } +} diff --git a/test/integration/statefulset/util.go b/test/integration/statefulset/util.go index b2ef1389c39..6d1705e855d 100644 --- a/test/integration/statefulset/util.go +++ b/test/integration/statefulset/util.go @@ -29,12 +29,14 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/apiserver/pkg/admission" "k8s.io/client-go/informers" clientset "k8s.io/client-go/kubernetes" typedappsv1 "k8s.io/client-go/kubernetes/typed/apps/v1" typedv1 "k8s.io/client-go/kubernetes/typed/core/v1" restclient "k8s.io/client-go/rest" "k8s.io/client-go/util/retry" + api "k8s.io/kubernetes/pkg/apis/core" //svc "k8s.io/kubernetes/pkg/api/v1/service" "k8s.io/kubernetes/pkg/controller/statefulset" @@ -309,3 +311,26 @@ func scaleSTS(t *testing.T, c clientset.Interface, sts *appsv1.StatefulSet, repl } waitSTSStable(t, c, sts) } + +var _ admission.ValidationInterface = &fakePodFailAdmission{} + +type fakePodFailAdmission struct { + limitedPodNumber int + succeedPodsCount int +} + +func (f *fakePodFailAdmission) Handles(operation admission.Operation) bool { + return operation == admission.Create +} + +func (f *fakePodFailAdmission) Validate(ctx context.Context, attr admission.Attributes, o admission.ObjectInterfaces) (err error) { + if attr.GetKind().GroupKind() != api.Kind("Pod") { + return nil + } + + if f.succeedPodsCount >= f.limitedPodNumber { + return fmt.Errorf("fakePodFailAdmission error") + } + f.succeedPodsCount++ + return nil +}