Merge pull request #136963 from yuanwang04/restart-pod-keep-logs

Keep logs when containers removed by RestartAllContainers
This commit is contained in:
Kubernetes Prow Robot 2026-03-07 07:22:18 +05:30 committed by GitHub
commit 8782cf135f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 69 additions and 12 deletions

View file

@ -988,7 +988,7 @@ func (m *kubeGenericRuntimeManager) pruneInitContainersBeforeStart(ctx context.C
}
// prune all other init containers that match this container name
logger.V(4).Info("Removing init container", "containerName", status.Name, "containerID", status.ID.ID, "count", count)
if err := m.removeContainer(ctx, status.ID.ID); err != nil {
if err := m.removeContainer(ctx, status.ID.ID, false); err != nil {
utilruntime.HandleError(fmt.Errorf("failed to remove pod init container %q: %v; Skipping pod %q", status.Name, err, format.Pod(pod)))
continue
}
@ -1014,7 +1014,7 @@ func (m *kubeGenericRuntimeManager) purgeInitContainers(ctx context.Context, pod
count++
// Purge all init containers that match this container name
logger.V(4).Info("Removing init container", "containerName", status.Name, "containerID", status.ID.ID, "count", count)
if err := m.removeContainer(ctx, status.ID.ID); err != nil {
if err := m.removeContainer(ctx, status.ID.ID, false); err != nil {
utilruntime.HandleError(fmt.Errorf("failed to remove pod init container %q: %v; Skipping pod %q", status.Name, err, format.Pod(pod)))
continue
}
@ -1361,13 +1361,14 @@ func (m *kubeGenericRuntimeManager) RunInContainer(ctx context.Context, id kubec
return append(stdout, stderr...), err
}
// removeContainer removes the container and the container logs.
// removeContainer removes the container and optionally the container logs.
// Notice that we remove the container logs first, so that container will not be removed if
// container logs are failed to be removed, and kubelet will retry this later. This guarantees
// that container logs to be removed with the container.
// that container logs to be removed with the container if specified.
// Notice that we assume that the container should only be removed in non-running state, and
// it will not write container logs anymore in that state.
func (m *kubeGenericRuntimeManager) removeContainer(ctx context.Context, containerID string) error {
// If the logs are kept, they will be preserved until the pod is removed and kubelet GC is triggered.
func (m *kubeGenericRuntimeManager) removeContainer(ctx context.Context, containerID string, keepLogs bool) error {
logger := klog.FromContext(ctx)
logger.V(4).Info("Removing container", "containerID", containerID)
// Call internal container post-stop lifecycle hook.
@ -1377,8 +1378,10 @@ func (m *kubeGenericRuntimeManager) removeContainer(ctx context.Context, contain
// Remove the container log.
// TODO: Separate log and container lifecycle management.
if err := m.removeContainerLog(ctx, containerID); err != nil {
return err
if !keepLogs {
if err := m.removeContainerLog(ctx, containerID); err != nil {
return err
}
}
// Remove the container.
return m.runtimeService.RemoveContainer(ctx, containerID)
@ -1414,7 +1417,7 @@ func (m *kubeGenericRuntimeManager) removeContainerLog(ctx context.Context, cont
// DeleteContainer removes a container.
func (m *kubeGenericRuntimeManager) DeleteContainer(ctx context.Context, containerID kubecontainer.ContainerID) error {
return m.removeContainer(ctx, containerID.ID)
return m.removeContainer(ctx, containerID.ID, false)
}
// setTerminationGracePeriod determines the grace period to use when killing a container

View file

@ -89,7 +89,7 @@ func TestRemoveContainer(t *testing.T) {
fakeOS.Create(expectedContainerLogPath)
fakeOS.Create(expectedContainerLogPathRotated)
err = m.removeContainer(tCtx, containerID)
err = m.removeContainer(tCtx, containerID, false)
assert.NoError(t, err)
// Verify container log is removed.
@ -104,6 +104,60 @@ func TestRemoveContainer(t *testing.T) {
assert.Empty(t, containers)
}
func TestRemoveContainer_keepLogs(t *testing.T) {
tCtx := ktesting.Init(t)
fakeRuntime, _, m, err := createTestRuntimeManager(tCtx)
require.NoError(t, err)
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "bar",
Namespace: "new",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "foo",
Image: "busybox",
ImagePullPolicy: v1.PullIfNotPresent,
},
},
},
}
// Create fake sandbox and container
_, fakeContainers := makeAndSetFakePod(t, m, fakeRuntime, pod)
assert.Len(t, fakeContainers, 1)
containerID := fakeContainers[0].Id
fakeOS := m.osInterface.(*containertest.FakeOS)
fakeOS.GlobFn = func(pattern, path string) bool {
pattern = strings.ReplaceAll(pattern, "*", ".*")
pattern = strings.ReplaceAll(pattern, "\\", "\\\\")
return regexp.MustCompile(pattern).MatchString(path)
}
podLogsDirectory := "/var/log/pods"
expectedContainerLogPath := filepath.Join(podLogsDirectory, "new_bar_12345678", "foo", "0.log")
expectedContainerLogPathRotated := filepath.Join(podLogsDirectory, "new_bar_12345678", "foo", "0.log.20060102-150405")
_, err = fakeOS.Create(expectedContainerLogPath)
require.NoError(t, err)
_, err = fakeOS.Create(expectedContainerLogPathRotated)
require.NoError(t, err)
err = m.removeContainer(tCtx, containerID, true)
require.NoError(t, err)
// Verify container logs are kept.
// We could not predict the order of `fakeOS.Removes`, so we use `assert.ElementsMatch` here.
require.Empty(t, fakeOS.Removes)
// Verify container is removed
require.Contains(t, fakeRuntime.Called, "RemoveContainer")
containers, err := fakeRuntime.ListContainers(tCtx, &runtimeapi.ContainerFilter{Id: containerID})
require.NoError(t, err)
require.Empty(t, containers)
}
// TestKillContainer tests killing the container in a Pod.
func TestKillContainer(t *testing.T) {
tCtx := ktesting.Init(t)

View file

@ -147,7 +147,7 @@ func (cgc *containerGC) removeOldestN(ctx context.Context, containers []containe
continue
}
}
if err := cgc.manager.removeContainer(ctx, containers[i].id); err != nil {
if err := cgc.manager.removeContainer(ctx, containers[i].id, false); err != nil {
logger.Error(err, "Failed to remove container", "containerID", containers[i].id)
}
}

View file

@ -1475,8 +1475,8 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po
return
}
}
// TODO(yuanwang04): Revisit whether container logs should be persisted.
if err := m.removeContainer(ctx, containerInfo.containerID.ID); err != nil {
// The logs of removed containers will be preserved until the pod is deleted and GC is triggered.
if err := m.removeContainer(ctx, containerInfo.containerID.ID, true); err != nil {
removeContainerResult.Fail(kubecontainer.ErrRemoveContainer, err.Error())
logger.Error(err, "removeContainer for pod failed", "containerName", cName, "containerID", containerInfo.containerID, "pod", klog.KObj(pod))
return