diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container.go b/pkg/kubelet/kuberuntime/kuberuntime_container.go index 59999a8ba2d..9a47ed6d3b4 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container.go @@ -988,7 +988,7 @@ func (m *kubeGenericRuntimeManager) pruneInitContainersBeforeStart(ctx context.C } // prune all other init containers that match this container name logger.V(4).Info("Removing init container", "containerName", status.Name, "containerID", status.ID.ID, "count", count) - if err := m.removeContainer(ctx, status.ID.ID); err != nil { + if err := m.removeContainer(ctx, status.ID.ID, false); err != nil { utilruntime.HandleError(fmt.Errorf("failed to remove pod init container %q: %v; Skipping pod %q", status.Name, err, format.Pod(pod))) continue } @@ -1014,7 +1014,7 @@ func (m *kubeGenericRuntimeManager) purgeInitContainers(ctx context.Context, pod count++ // Purge all init containers that match this container name logger.V(4).Info("Removing init container", "containerName", status.Name, "containerID", status.ID.ID, "count", count) - if err := m.removeContainer(ctx, status.ID.ID); err != nil { + if err := m.removeContainer(ctx, status.ID.ID, false); err != nil { utilruntime.HandleError(fmt.Errorf("failed to remove pod init container %q: %v; Skipping pod %q", status.Name, err, format.Pod(pod))) continue } @@ -1361,13 +1361,14 @@ func (m *kubeGenericRuntimeManager) RunInContainer(ctx context.Context, id kubec return append(stdout, stderr...), err } -// removeContainer removes the container and the container logs. +// removeContainer removes the container and optionally the container logs. // Notice that we remove the container logs first, so that container will not be removed if // container logs are failed to be removed, and kubelet will retry this later. This guarantees -// that container logs to be removed with the container. +// that container logs to be removed with the container if specified. // Notice that we assume that the container should only be removed in non-running state, and // it will not write container logs anymore in that state. -func (m *kubeGenericRuntimeManager) removeContainer(ctx context.Context, containerID string) error { +// If the logs are kept, they will be preserved until the pod is removed and kubelet GC is triggered. +func (m *kubeGenericRuntimeManager) removeContainer(ctx context.Context, containerID string, keepLogs bool) error { logger := klog.FromContext(ctx) logger.V(4).Info("Removing container", "containerID", containerID) // Call internal container post-stop lifecycle hook. @@ -1377,8 +1378,10 @@ func (m *kubeGenericRuntimeManager) removeContainer(ctx context.Context, contain // Remove the container log. // TODO: Separate log and container lifecycle management. - if err := m.removeContainerLog(ctx, containerID); err != nil { - return err + if !keepLogs { + if err := m.removeContainerLog(ctx, containerID); err != nil { + return err + } } // Remove the container. return m.runtimeService.RemoveContainer(ctx, containerID) @@ -1414,7 +1417,7 @@ func (m *kubeGenericRuntimeManager) removeContainerLog(ctx context.Context, cont // DeleteContainer removes a container. func (m *kubeGenericRuntimeManager) DeleteContainer(ctx context.Context, containerID kubecontainer.ContainerID) error { - return m.removeContainer(ctx, containerID.ID) + return m.removeContainer(ctx, containerID.ID, false) } // setTerminationGracePeriod determines the grace period to use when killing a container diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container_test.go b/pkg/kubelet/kuberuntime/kuberuntime_container_test.go index 9fe06d6ddfa..1cd615d120d 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container_test.go @@ -89,7 +89,7 @@ func TestRemoveContainer(t *testing.T) { fakeOS.Create(expectedContainerLogPath) fakeOS.Create(expectedContainerLogPathRotated) - err = m.removeContainer(tCtx, containerID) + err = m.removeContainer(tCtx, containerID, false) assert.NoError(t, err) // Verify container log is removed. @@ -104,6 +104,60 @@ func TestRemoveContainer(t *testing.T) { assert.Empty(t, containers) } +func TestRemoveContainer_keepLogs(t *testing.T) { + tCtx := ktesting.Init(t) + fakeRuntime, _, m, err := createTestRuntimeManager(tCtx) + require.NoError(t, err) + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + UID: "12345678", + Name: "bar", + Namespace: "new", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "foo", + Image: "busybox", + ImagePullPolicy: v1.PullIfNotPresent, + }, + }, + }, + } + + // Create fake sandbox and container + _, fakeContainers := makeAndSetFakePod(t, m, fakeRuntime, pod) + assert.Len(t, fakeContainers, 1) + + containerID := fakeContainers[0].Id + fakeOS := m.osInterface.(*containertest.FakeOS) + fakeOS.GlobFn = func(pattern, path string) bool { + pattern = strings.ReplaceAll(pattern, "*", ".*") + pattern = strings.ReplaceAll(pattern, "\\", "\\\\") + return regexp.MustCompile(pattern).MatchString(path) + } + podLogsDirectory := "/var/log/pods" + expectedContainerLogPath := filepath.Join(podLogsDirectory, "new_bar_12345678", "foo", "0.log") + expectedContainerLogPathRotated := filepath.Join(podLogsDirectory, "new_bar_12345678", "foo", "0.log.20060102-150405") + + _, err = fakeOS.Create(expectedContainerLogPath) + require.NoError(t, err) + _, err = fakeOS.Create(expectedContainerLogPathRotated) + require.NoError(t, err) + + err = m.removeContainer(tCtx, containerID, true) + require.NoError(t, err) + + // Verify container logs are kept. + // We could not predict the order of `fakeOS.Removes`, so we use `assert.ElementsMatch` here. + require.Empty(t, fakeOS.Removes) + // Verify container is removed + require.Contains(t, fakeRuntime.Called, "RemoveContainer") + containers, err := fakeRuntime.ListContainers(tCtx, &runtimeapi.ContainerFilter{Id: containerID}) + require.NoError(t, err) + require.Empty(t, containers) +} + // TestKillContainer tests killing the container in a Pod. func TestKillContainer(t *testing.T) { tCtx := ktesting.Init(t) diff --git a/pkg/kubelet/kuberuntime/kuberuntime_gc.go b/pkg/kubelet/kuberuntime/kuberuntime_gc.go index 777421d962d..0bfb961eee4 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_gc.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_gc.go @@ -147,7 +147,7 @@ func (cgc *containerGC) removeOldestN(ctx context.Context, containers []containe continue } } - if err := cgc.manager.removeContainer(ctx, containers[i].id); err != nil { + if err := cgc.manager.removeContainer(ctx, containers[i].id, false); err != nil { logger.Error(err, "Failed to remove container", "containerID", containers[i].id) } } diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 55f6074b198..6a6b69b728c 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -1475,8 +1475,8 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po return } } - // TODO(yuanwang04): Revisit whether container logs should be persisted. - if err := m.removeContainer(ctx, containerInfo.containerID.ID); err != nil { + // The logs of removed containers will be preserved until the pod is deleted and GC is triggered. + if err := m.removeContainer(ctx, containerInfo.containerID.ID, true); err != nil { removeContainerResult.Fail(kubecontainer.ErrRemoveContainer, err.Error()) logger.Error(err, "removeContainer for pod failed", "containerName", cName, "containerID", containerInfo.containerID, "pod", klog.KObj(pod)) return