Fix flaky kubelet unit test on Windows during log directory cleanup

Signed-off-by: Mark Rossetti <marosset@microsoft.com>
This commit is contained in:
Mark Rossetti 2026-05-15 10:55:32 -07:00
parent 6b0e464c7a
commit 2984ddee71
No known key found for this signature in database
GPG key ID: 301C3E201D5FD515
2 changed files with 30 additions and 12 deletions

View file

@ -1694,7 +1694,7 @@ func (kl *Kubelet) setupDataDirs(logger klog.Logger) error {
func (kl *Kubelet) StartGarbageCollection(ctx context.Context) {
logger := klog.FromContext(ctx)
loggedContainerGCFailure := false
go wait.Until(func() {
go wait.UntilWithContext(ctx, func(ctx context.Context) {
if err := kl.containerGC.GarbageCollect(ctx); err != nil {
logger.Error(err, "Container garbage collection failed")
kl.recorder.WithLogger(logger).Eventf(kl.nodeRef, v1.EventTypeWarning, events.ContainerGCFailed, "%s", err.Error())
@ -1708,7 +1708,7 @@ func (kl *Kubelet) StartGarbageCollection(ctx context.Context) {
logger.V(int(vLevel)).Info("Container garbage collection succeeded")
}
}, ContainerGCPeriod, wait.NeverStop)
}, ContainerGCPeriod)
// when the high threshold is set to 100, and the max age is 0 (or the max age feature is disabled)
// stub the image GC manager
@ -1719,7 +1719,7 @@ func (kl *Kubelet) StartGarbageCollection(ctx context.Context) {
prevImageGCFailed := false
beganGC := time.Now()
go wait.Until(func() {
go wait.UntilWithContext(ctx, func(ctx context.Context) {
if err := kl.imageManager.GarbageCollect(ctx, beganGC); err != nil {
if prevImageGCFailed {
logger.Error(err, "Image garbage collection failed multiple times in a row")
@ -1738,7 +1738,7 @@ func (kl *Kubelet) StartGarbageCollection(ctx context.Context) {
logger.V(int(vLevel)).Info("Image garbage collection succeeded")
}
}, ImageGCPeriod, wait.NeverStop)
}, ImageGCPeriod)
}
// initializeModules will initialize internal modules that do not require the container runtime to be up.

View file

@ -3301,12 +3301,21 @@ func createRemoteRuntimeService(ctx context.Context, endpoint string, t *testing
func TestNewMainKubeletStandAlone(t *testing.T) {
tCtx := ktesting.Init(t)
tempDir, err := os.MkdirTemp("", "logs")
ContainerLogsDir = tempDir
require.NoError(t, err)
defer func() {
err := os.RemoveAll(ContainerLogsDir)
containerLogsDir := ContainerLogsDir
// Point the package-level log directory at this test's temp dir so the
// garbage collection path uses an isolated directory, then restore it during
// cleanup.
ContainerLogsDir = tempDir
// Use t.Cleanup instead of defer so that the cleanup registered by the second
// ktesting.Init in this test cancels the GC context before RemoveAll runs.
// This is needed on Windows because a concurrent os.ReadDir in a GC goroutine
// can keep a handle to this directory in use and cause os.RemoveAll to fail.
t.Cleanup(func() {
ContainerLogsDir = containerLogsDir
err := os.RemoveAll(tempDir)
require.NoError(t, err)
}()
})
ca, cert, key, err := generateCAAndCertKeyWithOptions(
"localhost",
@ -3453,12 +3462,21 @@ func TestNewMainKubeletStandAlone(t *testing.T) {
func TestNewMainKubeletWithCertAndCAReloadingEnabled(t *testing.T) {
tCtx := ktesting.Init(t)
tempDir, err := os.MkdirTemp("", "logs")
ContainerLogsDir = tempDir
require.NoError(t, err)
defer func() {
err := os.RemoveAll(ContainerLogsDir)
containerLogsDir := ContainerLogsDir
// Point the package-level log directory at this test's temp dir so the
// garbage collection path uses an isolated directory, then restore it during
// cleanup.
ContainerLogsDir = tempDir
// Use t.Cleanup instead of defer so that the cleanup registered by the second
// ktesting.Init in this test cancels the GC context before RemoveAll runs.
// This is needed on Windows because a concurrent os.ReadDir in a GC goroutine
// can keep a handle to this directory in use and cause os.RemoveAll to fail.
t.Cleanup(func() {
ContainerLogsDir = containerLogsDir
err := os.RemoveAll(tempDir)
require.NoError(t, err)
}()
})
ca, cert, key, err := generateCAAndCertKeyWithOptions(
"localhost",