From 2efffd62367f3480dc2187633382af52cf85fae5 Mon Sep 17 00:00:00 2001 From: KeZhang Date: Thu, 20 Jan 2022 18:35:31 +0800 Subject: [PATCH] Ignore container notfound error while getPodstatuses --- .../kuberuntime/kuberuntime_container.go | 25 ++++-- .../kuberuntime/kuberuntime_manager.go | 15 +++- .../kuberuntime/kuberuntime_manager_test.go | 79 +++++++++++++++++++ vendor/modules.txt | 1 + 4 files changed, 108 insertions(+), 12 deletions(-) diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container.go b/pkg/kubelet/kuberuntime/kuberuntime_container.go index 34337e04b11..9c762ac309e 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container.go @@ -34,6 +34,8 @@ import ( "sync" "time" + crierror "k8s.io/cri-api/pkg/errors" + grpcstatus "google.golang.org/grpc/status" "github.com/armon/circbuf" @@ -502,10 +504,17 @@ func (m *kubeGenericRuntimeManager) getPodContainerStatuses(uid kubetypes.UID, n return nil, err } - statuses := make([]*kubecontainer.Status, len(containers)) + statuses := []*kubecontainer.Status{} // TODO: optimization: set maximum number of containers per container name to examine. - for i, c := range containers { + for _, c := range containers { status, err := m.runtimeService.ContainerStatus(c.Id) + // Between List (ListContainers) and check (ContainerStatus) another thread might remove a container, and that is normal. + // The previous call (ListContainers) never fails due to a pod container not existing. + // Therefore, this method should not either, but instead act as if the previous call failed, + // which means the error should be ignored. + if crierror.IsNotFound(err) { + continue + } if err != nil { // Merely log this here; GetPodStatus will actually report the error out. klog.V(4).InfoS("ContainerStatus return error", "containerID", c.Id, "err", err) @@ -538,7 +547,7 @@ func (m *kubeGenericRuntimeManager) getPodContainerStatuses(uid kubetypes.UID, n cStatus.Message += tMessage } } - statuses[i] = cStatus + statuses = append(statuses, cStatus) } sort.Sort(containerStatusByCreated(statuses)) @@ -715,15 +724,15 @@ func (m *kubeGenericRuntimeManager) killContainer(pod *v1.Pod, containerID kubec "containerName", containerName, "containerID", containerID.String(), "gracePeriod", gracePeriod) err := m.runtimeService.StopContainer(containerID.ID, gracePeriod) - if err != nil { + if err != nil && !crierror.IsNotFound(err) { klog.ErrorS(err, "Container termination failed with gracePeriod", "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", containerName, "containerID", containerID.String(), "gracePeriod", gracePeriod) - } else { - klog.V(3).InfoS("Container exited normally", "pod", klog.KObj(pod), "podUID", pod.UID, - "containerName", containerName, "containerID", containerID.String()) + return err } + klog.V(3).InfoS("Container exited normally", "pod", klog.KObj(pod), "podUID", pod.UID, + "containerName", containerName, "containerID", containerID.String()) - return err + return nil } // killContainersWithSyncResult kills all pod's containers with sync results. diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 7f4f82aacb6..33a84a12475 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -25,6 +25,7 @@ import ( "time" cadvisorapi "github.com/google/cadvisor/info/v1" + crierror "k8s.io/cri-api/pkg/errors" "k8s.io/klog/v2" v1 "k8s.io/api/core/v1" @@ -1007,7 +1008,7 @@ func (m *kubeGenericRuntimeManager) killPodWithSyncResult(pod *v1.Pod, runningPo result.AddSyncResult(killSandboxResult) // Stop all sandboxes belongs to same pod for _, podSandbox := range runningPod.Sandboxes { - if err := m.runtimeService.StopPodSandbox(podSandbox.ID.ID); err != nil { + if err := m.runtimeService.StopPodSandbox(podSandbox.ID.ID); err != nil && !crierror.IsNotFound(err) { killSandboxResult.Fail(kubecontainer.ErrKillPodSandbox, err.Error()) klog.ErrorS(nil, "Failed to stop sandbox", "podSandboxID", podSandbox.ID) } @@ -1049,16 +1050,22 @@ func (m *kubeGenericRuntimeManager) GetPodStatus(uid kubetypes.UID, name, namesp klog.V(4).InfoS("getSandboxIDByPodUID got sandbox IDs for pod", "podSandboxID", podSandboxIDs, "pod", klog.KObj(pod)) - sandboxStatuses := make([]*runtimeapi.PodSandboxStatus, len(podSandboxIDs)) + sandboxStatuses := []*runtimeapi.PodSandboxStatus{} podIPs := []string{} for idx, podSandboxID := range podSandboxIDs { podSandboxStatus, err := m.runtimeService.PodSandboxStatus(podSandboxID) + // Between List (getSandboxIDByPodUID) and check (PodSandboxStatus) another thread might remove a container, and that is normal. + // The previous call (getSandboxIDByPodUID) never fails due to a pod sandbox not existing. + // Therefore, this method should not either, but instead act as if the previous call failed, + // which means the error should be ignored. + if crierror.IsNotFound(err) { + continue + } if err != nil { klog.ErrorS(err, "PodSandboxStatus of sandbox for pod", "podSandboxID", podSandboxID, "pod", klog.KObj(pod)) return nil, err } - sandboxStatuses[idx] = podSandboxStatus - + sandboxStatuses = append(sandboxStatuses, podSandboxStatus) // Only get pod IP from latest sandbox if idx == 0 && podSandboxStatus.State == runtimeapi.PodSandboxState_SANDBOX_READY { podIPs = m.determinePodSandboxIPs(namespace, name, podSandboxStatus) diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go index 49bfeeeb132..8bec90c8f84 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go @@ -23,6 +23,9 @@ import ( "testing" "time" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + cadvisorapi "github.com/google/cadvisor/info/v1" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -334,6 +337,82 @@ func TestGetPodStatus(t *testing.T) { assert.Equal(t, apitest.FakePodSandboxIPs, podStatus.IPs) } +func TestStopContainerWithNotFoundError(t *testing.T) { + fakeRuntime, _, m, err := createTestRuntimeManager() + assert.NoError(t, err) + + containers := []v1.Container{ + { + Name: "foo1", + Image: "busybox", + ImagePullPolicy: v1.PullIfNotPresent, + }, + { + Name: "foo2", + Image: "busybox", + ImagePullPolicy: v1.PullIfNotPresent, + }, + } + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + UID: "12345678", + Name: "foo", + Namespace: "new", + }, + Spec: v1.PodSpec{ + Containers: containers, + }, + } + + // Set fake sandbox and faked containers to fakeRuntime. + makeAndSetFakePod(t, m, fakeRuntime, pod) + fakeRuntime.InjectError("StopContainer", status.Error(codes.NotFound, "No such container")) + podStatus, err := m.GetPodStatus(pod.UID, pod.Name, pod.Namespace) + require.NoError(t, err) + p := kubecontainer.ConvertPodStatusToRunningPod("", podStatus) + gracePeriod := int64(1) + err = m.KillPod(pod, p, &gracePeriod) + require.NoError(t, err) +} + +func TestGetPodStatusWithNotFoundError(t *testing.T) { + fakeRuntime, _, m, err := createTestRuntimeManager() + assert.NoError(t, err) + + containers := []v1.Container{ + { + Name: "foo1", + Image: "busybox", + ImagePullPolicy: v1.PullIfNotPresent, + }, + { + Name: "foo2", + Image: "busybox", + ImagePullPolicy: v1.PullIfNotPresent, + }, + } + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + UID: "12345678", + Name: "foo", + Namespace: "new", + }, + Spec: v1.PodSpec{ + Containers: containers, + }, + } + + // Set fake sandbox and faked containers to fakeRuntime. + makeAndSetFakePod(t, m, fakeRuntime, pod) + fakeRuntime.InjectError("ContainerStatus", status.Error(codes.NotFound, "No such container")) + podStatus, err := m.GetPodStatus(pod.UID, pod.Name, pod.Namespace) + require.NoError(t, err) + require.Equal(t, pod.UID, podStatus.ID) + require.Equal(t, pod.Name, podStatus.Name) + require.Equal(t, pod.Namespace, podStatus.Namespace) + require.Equal(t, apitest.FakePodSandboxIPs, podStatus.IPs) +} + func TestGetPods(t *testing.T) { fakeRuntime, _, m, err := createTestRuntimeManager() assert.NoError(t, err) diff --git a/vendor/modules.txt b/vendor/modules.txt index 4fafe60d209..70afe6f7b79 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -2043,6 +2043,7 @@ k8s.io/cri-api/pkg/apis k8s.io/cri-api/pkg/apis/runtime/v1 k8s.io/cri-api/pkg/apis/runtime/v1alpha2 k8s.io/cri-api/pkg/apis/testing +k8s.io/cri-api/pkg/errors # k8s.io/csi-translation-lib v0.0.0 => ./staging/src/k8s.io/csi-translation-lib ## explicit k8s.io/csi-translation-lib