From 2fe49b0cd06a367b2eedc535fc51f0d8a8b1ecd6 Mon Sep 17 00:00:00 2001 From: Akhil Singh Date: Wed, 3 Jun 2026 11:31:43 +0530 Subject: [PATCH] Fix job controller reporting active=0 during pod creation backoff When manageJob() needs to create replacement pods but defers creation because a pod-failure backoff is still active, it returned a hardcoded active=0 to the caller. Because no pods were actually created or deleted, this left Status.Active=0 while Status.Ready still reflected the running pods. The apiserver correctly rejects such updates ("cannot set more ready pods than active") with a 422, which blocks flushing uncounted terminated pods, removing finalizers, and updating job status, leaving pods stuck Terminating with stale status. Return the real active count from both backoff early-returns instead, since the deferral does not change the number of active pods. Issue: https://github.com/kubernetes/kubernetes/issues/139428 --- pkg/controller/job/job_controller.go | 11 +++- pkg/controller/job/job_controller_test.go | 75 +++++++++++++++++++++++ 2 files changed, 84 insertions(+), 2 deletions(-) diff --git a/pkg/controller/job/job_controller.go b/pkg/controller/job/job_controller.go index 3c332110178..db0bf7e9cbd 100644 --- a/pkg/controller/job/job_controller.go +++ b/pkg/controller/job/job_controller.go @@ -1822,7 +1822,12 @@ func (jm *Controller) manageJob(ctx context.Context, job *batch.Job, jobCtx *syn } if remainingTime > 0 { jm.enqueueSyncJobWithDelay(logger, job, remainingTime) - return 0, metrics.JobSyncActionPodsCreated, nil + // No pods were created or deleted, so return the current active + // count rather than 0. Returning 0 here would cause the status + // update to set Active=0 while Ready still reflects the running + // pods, which the API server rejects ("cannot set more ready pods + // than active"), blocking finalizer removal and status flushing. + return active, metrics.JobSyncActionPodsCreated, nil } if diff > int32(MaxPodCreateDeletePerSync) { diff = int32(MaxPodCreateDeletePerSync) @@ -1835,7 +1840,9 @@ func (jm *Controller) manageJob(ctx context.Context, job *batch.Job, jobCtx *syn indexesToAdd, remainingTime = jm.getPodCreationInfoForIndependentIndexes(logger, indexesToAdd, jobCtx.podsWithDelayedDeletionPerIndex) if remainingTime > 0 { jm.enqueueSyncJobWithDelay(logger, job, remainingTime) - return 0, metrics.JobSyncActionPodsCreated, nil + // No pods were created or deleted, so return the current + // active count rather than 0 (see comment above). + return active, metrics.JobSyncActionPodsCreated, nil } } diff = int32(len(indexesToAdd)) diff --git a/pkg/controller/job/job_controller_test.go b/pkg/controller/job/job_controller_test.go index b58f5fb147a..f4819f7c341 100644 --- a/pkg/controller/job/job_controller_test.go +++ b/pkg/controller/job/job_controller_test.go @@ -455,6 +455,37 @@ func TestControllerSyncJob(t *testing.T) { expectedTerminating: ptr.To[int32](0), controllerTime: &referenceTime, }, + // Regression test for https://github.com/kubernetes/kubernetes/issues/139428. + // When replacement pods are needed but pod creation is deferred due to an + // active backoff, manageJob must report the actual active count rather than + // 0. Reporting 0 while Ready still reflects the running pods makes the status + // update fail apiserver validation ("cannot set more ready pods than active"), + // blocking finalizer removal and status flushing. + "too few active pods and active back-off with running pods": { + parallelism: 2, + completions: 2, + backoffLimit: 6, + backoffRecord: &backoffRecord{ + failuresAfterLastSuccess: 1, + lastFailureTime: &referenceTime, + }, + initialStatus: &jobInitialStatus{ + startTime: func() *time.Time { + now := time.Now() + return &now + }(), + }, + activePods: 1, + readyPods: 1, + succeededPods: 0, + expectedCreations: 0, + expectedActive: 1, + expectedSucceeded: 0, + expectedPodPatches: 0, + expectedReady: ptr.To[int32](1), + expectedTerminating: ptr.To[int32](0), + controllerTime: &referenceTime, + }, "too few active pods and no back-offs": { parallelism: 1, completions: 1, @@ -5221,6 +5252,50 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) { FailedIndexes: ptr.To(""), }, }, + // Regression test for https://github.com/kubernetes/kubernetes/issues/139428. + // One index has a running pod while another index's replacement pod + // creation is deferred because its per-index backoff is still active. + // manageJob must report the actual active count (1) rather than 0; a + // status with active=0 while pods are still running is rejected by the + // apiserver ("cannot set more ready pods than active"), blocking + // finalizer removal and status flushing. + "replacement pod creation delayed by per-index backoff while another index runs": { + job: batch.Job{ + TypeMeta: metav1.TypeMeta{Kind: "Job"}, + ObjectMeta: validObjectMeta, + Spec: batch.JobSpec{ + Selector: validSelector, + Template: validTemplate, + Parallelism: ptr.To[int32](2), + Completions: ptr.To[int32](2), + BackoffLimit: ptr.To[int32](math.MaxInt32), + CompletionMode: ptr.To(batch.IndexedCompletion), + BackoffLimitPerIndex: ptr.To[int32](1), + }, + }, + pods: []v1.Pod{ + *buildPod().uid("a").index("0").phase(v1.PodRunning).indexFailureCount("0").trackingFinalizer().Pod, + *buildPod().uid("b").index("1").status(v1.PodStatus{ + Phase: v1.PodFailed, + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "x", + State: v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{ + FinishedAt: metav1.NewTime(now), + }, + }, + }, + }, + }).indexFailureCount("0").trackingFinalizer().Pod, + }, + wantStatus: batch.JobStatus{ + Active: 1, + Terminating: ptr.To[int32](0), + UncountedTerminatedPods: &batch.UncountedTerminatedPods{}, + FailedIndexes: new(string), + }, + }, "single failed index due to exceeding the backoff limit per index, the job continues": { job: batch.Job{ TypeMeta: metav1.TypeMeta{Kind: "Job"},