mirror of
https://github.com/kubernetes/kubernetes.git
synced 2026-06-13 19:01:10 -04:00
Fix job controller reporting active=0 during pod creation backoff
When manageJob() needs to create replacement pods but defers creation
because a pod-failure backoff is still active, it returned a hardcoded
active=0 to the caller. Because no pods were actually created or deleted,
this left Status.Active=0 while Status.Ready still reflected the running
pods. The apiserver correctly rejects such updates ("cannot set more
ready pods than active") with a 422, which blocks flushing uncounted
terminated pods, removing finalizers, and updating job status, leaving
pods stuck Terminating with stale status.
Return the real active count from both backoff early-returns instead,
since the deferral does not change the number of active pods.
Issue: https://github.com/kubernetes/kubernetes/issues/139428
This commit is contained in:
parent
4ea9058d21
commit
2fe49b0cd0
2 changed files with 84 additions and 2 deletions
|
|
@ -1822,7 +1822,12 @@ func (jm *Controller) manageJob(ctx context.Context, job *batch.Job, jobCtx *syn
|
|||
}
|
||||
if remainingTime > 0 {
|
||||
jm.enqueueSyncJobWithDelay(logger, job, remainingTime)
|
||||
return 0, metrics.JobSyncActionPodsCreated, nil
|
||||
// No pods were created or deleted, so return the current active
|
||||
// count rather than 0. Returning 0 here would cause the status
|
||||
// update to set Active=0 while Ready still reflects the running
|
||||
// pods, which the API server rejects ("cannot set more ready pods
|
||||
// than active"), blocking finalizer removal and status flushing.
|
||||
return active, metrics.JobSyncActionPodsCreated, nil
|
||||
}
|
||||
if diff > int32(MaxPodCreateDeletePerSync) {
|
||||
diff = int32(MaxPodCreateDeletePerSync)
|
||||
|
|
@ -1835,7 +1840,9 @@ func (jm *Controller) manageJob(ctx context.Context, job *batch.Job, jobCtx *syn
|
|||
indexesToAdd, remainingTime = jm.getPodCreationInfoForIndependentIndexes(logger, indexesToAdd, jobCtx.podsWithDelayedDeletionPerIndex)
|
||||
if remainingTime > 0 {
|
||||
jm.enqueueSyncJobWithDelay(logger, job, remainingTime)
|
||||
return 0, metrics.JobSyncActionPodsCreated, nil
|
||||
// No pods were created or deleted, so return the current
|
||||
// active count rather than 0 (see comment above).
|
||||
return active, metrics.JobSyncActionPodsCreated, nil
|
||||
}
|
||||
}
|
||||
diff = int32(len(indexesToAdd))
|
||||
|
|
|
|||
|
|
@ -455,6 +455,37 @@ func TestControllerSyncJob(t *testing.T) {
|
|||
expectedTerminating: ptr.To[int32](0),
|
||||
controllerTime: &referenceTime,
|
||||
},
|
||||
// Regression test for https://github.com/kubernetes/kubernetes/issues/139428.
|
||||
// When replacement pods are needed but pod creation is deferred due to an
|
||||
// active backoff, manageJob must report the actual active count rather than
|
||||
// 0. Reporting 0 while Ready still reflects the running pods makes the status
|
||||
// update fail apiserver validation ("cannot set more ready pods than active"),
|
||||
// blocking finalizer removal and status flushing.
|
||||
"too few active pods and active back-off with running pods": {
|
||||
parallelism: 2,
|
||||
completions: 2,
|
||||
backoffLimit: 6,
|
||||
backoffRecord: &backoffRecord{
|
||||
failuresAfterLastSuccess: 1,
|
||||
lastFailureTime: &referenceTime,
|
||||
},
|
||||
initialStatus: &jobInitialStatus{
|
||||
startTime: func() *time.Time {
|
||||
now := time.Now()
|
||||
return &now
|
||||
}(),
|
||||
},
|
||||
activePods: 1,
|
||||
readyPods: 1,
|
||||
succeededPods: 0,
|
||||
expectedCreations: 0,
|
||||
expectedActive: 1,
|
||||
expectedSucceeded: 0,
|
||||
expectedPodPatches: 0,
|
||||
expectedReady: ptr.To[int32](1),
|
||||
expectedTerminating: ptr.To[int32](0),
|
||||
controllerTime: &referenceTime,
|
||||
},
|
||||
"too few active pods and no back-offs": {
|
||||
parallelism: 1,
|
||||
completions: 1,
|
||||
|
|
@ -5221,6 +5252,50 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
|
|||
FailedIndexes: ptr.To(""),
|
||||
},
|
||||
},
|
||||
// Regression test for https://github.com/kubernetes/kubernetes/issues/139428.
|
||||
// One index has a running pod while another index's replacement pod
|
||||
// creation is deferred because its per-index backoff is still active.
|
||||
// manageJob must report the actual active count (1) rather than 0; a
|
||||
// status with active=0 while pods are still running is rejected by the
|
||||
// apiserver ("cannot set more ready pods than active"), blocking
|
||||
// finalizer removal and status flushing.
|
||||
"replacement pod creation delayed by per-index backoff while another index runs": {
|
||||
job: batch.Job{
|
||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||
ObjectMeta: validObjectMeta,
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validTemplate,
|
||||
Parallelism: ptr.To[int32](2),
|
||||
Completions: ptr.To[int32](2),
|
||||
BackoffLimit: ptr.To[int32](math.MaxInt32),
|
||||
CompletionMode: ptr.To(batch.IndexedCompletion),
|
||||
BackoffLimitPerIndex: ptr.To[int32](1),
|
||||
},
|
||||
},
|
||||
pods: []v1.Pod{
|
||||
*buildPod().uid("a").index("0").phase(v1.PodRunning).indexFailureCount("0").trackingFinalizer().Pod,
|
||||
*buildPod().uid("b").index("1").status(v1.PodStatus{
|
||||
Phase: v1.PodFailed,
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
Name: "x",
|
||||
State: v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{
|
||||
FinishedAt: metav1.NewTime(now),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}).indexFailureCount("0").trackingFinalizer().Pod,
|
||||
},
|
||||
wantStatus: batch.JobStatus{
|
||||
Active: 1,
|
||||
Terminating: ptr.To[int32](0),
|
||||
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||
FailedIndexes: new(string),
|
||||
},
|
||||
},
|
||||
"single failed index due to exceeding the backoff limit per index, the job continues": {
|
||||
job: batch.Job{
|
||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||
|
|
|
|||
Loading…
Reference in a new issue