From d4181e8c20723a9d6a07ba5a6c40686745d6d432 Mon Sep 17 00:00:00 2001 From: Davanum Srinivas Date: Sat, 14 Mar 2026 14:31:46 -0400 Subject: [PATCH] test/e2e/node: tolerate exit code 2 in pod status flake The fast-delete pod status tests currently require the intentionally failing "fail" container to report exit code 1. In CI, some runtimes occasionally report exit code 2 with reason=Error even though the tested invariant still holds: the container failed and the blocked workload container never started. The latest dims/test-k8s failure on master showed exactly that state: the pod remained Failed, Initialized=False, the blocked container reported started=false, and only the failing init container drifted from exit 1 to exit 2. This matches kubernetes/kubernetes issue 135713 and the related pending-container history in PR 131605. Accept exit code 2 in this verifier so the test continues to assert the behavior it is meant to cover instead of a lower-layer exit-code detail. Fixes issue 135713 Tested: - hack/verify-gofmt.sh - hack/verify-test-code.sh - hack/verify-typecheck.sh ./test/e2e/node/... - go test ./test/e2e/node -run TestNonExistent -count=1 Co-authored-by: Jordan Liggitt --- test/e2e/node/pods.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/e2e/node/pods.go b/test/e2e/node/pods.go index c61fcb6507e..f5733cc79ee 100644 --- a/test/e2e/node/pods.go +++ b/test/e2e/node/pods.go @@ -1001,6 +1001,11 @@ func (v *podStartVerifier) Verify(event watch.Event) error { switch { case t.ExitCode == 1: // expected + case t.ExitCode == 2 && t.Reason == "Error" && t.Message == "": + // Some runtimes occasionally surface exit code 2 if stopped before execve makes + // it to launching /bin/false in fast-delete scenarios. The test only cares + // that the container failed. + framework.Logf("pod %s on node %s failed with the symptoms of https://github.com/kubernetes/kubernetes/issues/135713", pod.Name, pod.Spec.NodeName) case t.ExitCode == 137 && (t.Reason == "ContainerStatusUnknown" || t.Reason == "Error"): // expected, pod was force-killed after grace period case t.ExitCode == 128 && (t.Reason == "StartError" || t.Reason == "ContainerCannotRun") && reBug88766.MatchString(t.Message):