diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index 588c0deda29..7a8d458170a 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -63,6 +63,7 @@ import ( utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/validation/field" + utilversion "k8s.io/apimachinery/pkg/util/version" "k8s.io/apimachinery/pkg/util/wait" genericapiserver "k8s.io/apiserver/pkg/server" "k8s.io/apiserver/pkg/server/flagz" @@ -120,6 +121,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/watchdog" utilfs "k8s.io/kubernetes/pkg/util/filesystem" "k8s.io/kubernetes/pkg/util/flock" + utilkernel "k8s.io/kubernetes/pkg/util/kernel" "k8s.io/kubernetes/pkg/util/oom" "k8s.io/kubernetes/pkg/util/rlimit" "k8s.io/kubernetes/pkg/volume/util/hostutil" @@ -642,10 +644,20 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend return err } - // Warn if MemoryQoS enabled with cgroups v1 - if utilfeature.DefaultFeatureGate.Enabled(features.MemoryQoS) && - !kubeletutil.IsCgroup2UnifiedMode() { - logger.Info("Warning: MemoryQoS feature only works with cgroups v2 on Linux, but enabled with cgroups v1") + // Warn about MemoryQoS compatibility issues + if utilfeature.DefaultFeatureGate.Enabled(features.MemoryQoS) { + if !kubeletutil.IsCgroup2UnifiedMode() { + logger.Info("Warning: MemoryQoS feature only works with cgroups v2 on Linux, but enabled with cgroups v1") + } else { + kernelVersion, err := utilkernel.GetVersion() + if err != nil { + logger.Error(err, "Failed to detect kernel version for MemoryQoS compatibility check") + } else if kernelVersion.LessThan(utilversion.MustParseGeneric(utilkernel.MemoryQoSMinKernelVersion)) { + logger.Info("Warning: MemoryQoS memory.high throttling may cause process livelock on older kernels", + "currentKernel", kernelVersion, + "minimumKernel", utilkernel.MemoryQoSMinKernelVersion) + } + } } // Obtain Kubelet Lock File if s.ExitOnLockContention && s.LockFilePath == "" { diff --git a/pkg/util/kernel/constants.go b/pkg/util/kernel/constants.go index 1467f6c229d..b178110ea91 100644 --- a/pkg/util/kernel/constants.go +++ b/pkg/util/kernel/constants.go @@ -58,3 +58,9 @@ const TCPReceiveMemoryNamespacedKernelVersion = "4.15" // TCPTransmitMemoryNamespacedKernelVersion is the kernel version in which net.ipv4.tcp_wmem was namespaced(netns). // (ref: https://github.com/torvalds/linux/commit/356d1833b638bd465672aefeb71def3ab93fc17d) const TCPTransmitMemoryNamespacedKernelVersion = "4.15" + +// MemoryQoSMinKernelVersion is the minimum kernel version for safe memory.high throttling. +// Kernels older than 5.9 have a livelock bug where processes can get stuck in an infinite +// reclaim loop at the memory.high boundary. +// (ref: https://github.com/torvalds/linux/commit/b3ff92916af3156df27716bb080a407e4caf9085) +const MemoryQoSMinKernelVersion = "5.9"