Fix job controller reporting active=0 during pod creation backoff

When manageJob() needs to create replacement pods but defers creation
because a pod-failure backoff is still active, it returned a hardcoded
active=0 to the caller. Because no pods were actually created or deleted,
this left Status.Active=0 while Status.Ready still reflected the running
pods. The apiserver correctly rejects such updates ("cannot set more
ready pods than active") with a 422, which blocks flushing uncounted
terminated pods, removing finalizers, and updating job status, leaving
pods stuck Terminating with stale status.

Return the real active count from both backoff early-returns instead,
since the deferral does not change the number of active pods.

Issue: https://github.com/kubernetes/kubernetes/issues/139428
This commit is contained in:
Akhil Singh 2026-06-03 11:31:43 +05:30
parent 4ea9058d21
commit 2fe49b0cd0
2 changed files with 84 additions and 2 deletions

View file

@ -1822,7 +1822,12 @@ func (jm *Controller) manageJob(ctx context.Context, job *batch.Job, jobCtx *syn
}
if remainingTime > 0 {
jm.enqueueSyncJobWithDelay(logger, job, remainingTime)
return 0, metrics.JobSyncActionPodsCreated, nil
// No pods were created or deleted, so return the current active
// count rather than 0. Returning 0 here would cause the status
// update to set Active=0 while Ready still reflects the running
// pods, which the API server rejects ("cannot set more ready pods
// than active"), blocking finalizer removal and status flushing.
return active, metrics.JobSyncActionPodsCreated, nil
}
if diff > int32(MaxPodCreateDeletePerSync) {
diff = int32(MaxPodCreateDeletePerSync)
@ -1835,7 +1840,9 @@ func (jm *Controller) manageJob(ctx context.Context, job *batch.Job, jobCtx *syn
indexesToAdd, remainingTime = jm.getPodCreationInfoForIndependentIndexes(logger, indexesToAdd, jobCtx.podsWithDelayedDeletionPerIndex)
if remainingTime > 0 {
jm.enqueueSyncJobWithDelay(logger, job, remainingTime)
return 0, metrics.JobSyncActionPodsCreated, nil
// No pods were created or deleted, so return the current
// active count rather than 0 (see comment above).
return active, metrics.JobSyncActionPodsCreated, nil
}
}
diff = int32(len(indexesToAdd))

View file

@ -455,6 +455,37 @@ func TestControllerSyncJob(t *testing.T) {
expectedTerminating: ptr.To[int32](0),
controllerTime: &referenceTime,
},
// Regression test for https://github.com/kubernetes/kubernetes/issues/139428.
// When replacement pods are needed but pod creation is deferred due to an
// active backoff, manageJob must report the actual active count rather than
// 0. Reporting 0 while Ready still reflects the running pods makes the status
// update fail apiserver validation ("cannot set more ready pods than active"),
// blocking finalizer removal and status flushing.
"too few active pods and active back-off with running pods": {
parallelism: 2,
completions: 2,
backoffLimit: 6,
backoffRecord: &backoffRecord{
failuresAfterLastSuccess: 1,
lastFailureTime: &referenceTime,
},
initialStatus: &jobInitialStatus{
startTime: func() *time.Time {
now := time.Now()
return &now
}(),
},
activePods: 1,
readyPods: 1,
succeededPods: 0,
expectedCreations: 0,
expectedActive: 1,
expectedSucceeded: 0,
expectedPodPatches: 0,
expectedReady: ptr.To[int32](1),
expectedTerminating: ptr.To[int32](0),
controllerTime: &referenceTime,
},
"too few active pods and no back-offs": {
parallelism: 1,
completions: 1,
@ -5221,6 +5252,50 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
FailedIndexes: ptr.To(""),
},
},
// Regression test for https://github.com/kubernetes/kubernetes/issues/139428.
// One index has a running pod while another index's replacement pod
// creation is deferred because its per-index backoff is still active.
// manageJob must report the actual active count (1) rather than 0; a
// status with active=0 while pods are still running is rejected by the
// apiserver ("cannot set more ready pods than active"), blocking
// finalizer removal and status flushing.
"replacement pod creation delayed by per-index backoff while another index runs": {
job: batch.Job{
TypeMeta: metav1.TypeMeta{Kind: "Job"},
ObjectMeta: validObjectMeta,
Spec: batch.JobSpec{
Selector: validSelector,
Template: validTemplate,
Parallelism: ptr.To[int32](2),
Completions: ptr.To[int32](2),
BackoffLimit: ptr.To[int32](math.MaxInt32),
CompletionMode: ptr.To(batch.IndexedCompletion),
BackoffLimitPerIndex: ptr.To[int32](1),
},
},
pods: []v1.Pod{
*buildPod().uid("a").index("0").phase(v1.PodRunning).indexFailureCount("0").trackingFinalizer().Pod,
*buildPod().uid("b").index("1").status(v1.PodStatus{
Phase: v1.PodFailed,
ContainerStatuses: []v1.ContainerStatus{
{
Name: "x",
State: v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{
FinishedAt: metav1.NewTime(now),
},
},
},
},
}).indexFailureCount("0").trackingFinalizer().Pod,
},
wantStatus: batch.JobStatus{
Active: 1,
Terminating: ptr.To[int32](0),
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
FailedIndexes: new(string),
},
},
"single failed index due to exceeding the backoff limit per index, the job continues": {
job: batch.Job{
TypeMeta: metav1.TypeMeta{Kind: "Job"},