diff --git a/pkg/kubelet/cm/qos_container_manager_linux.go b/pkg/kubelet/cm/qos_container_manager_linux.go index 4c29d23ce15..8f563b8c821 100644 --- a/pkg/kubelet/cm/qos_container_manager_linux.go +++ b/pkg/kubelet/cm/qos_container_manager_linux.go @@ -305,12 +305,6 @@ func (m *qosContainerManagerImpl) setMemoryQoS(logger klog.Logger, configs map[v setUnified(v1.PodQOSBurstable, Cgroup2MemoryLow, 0) kubeletmetrics.MemoryQoSNodeMemoryMinBytes.Set(0) kubeletmetrics.MemoryQoSNodeMemoryLowBytes.Set(0) - // Clear per-pod memory protection only when MemoryQoS feature gate is - // enabled but policy is None (rollback scenario). When the gate is off, - // pods never had protection set, so there's nothing to reconcile. - if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.MemoryQoS) { - m.reconcilePodMemoryProtection(logger) - } return } @@ -330,37 +324,6 @@ func (m *qosContainerManagerImpl) setMemoryQoS(logger klog.Logger, configs map[v setUnified(v1.PodQOSBurstable, Cgroup2MemoryLow, burstableRequests) } -// reconcilePodMemoryProtection clears stale memory.min and memory.low on pod-level cgroups -// when MemoryQoS is disabled or memoryReservationPolicy is not TieredReservation. -func (m *qosContainerManagerImpl) reconcilePodMemoryProtection(logger klog.Logger) { - pods := m.activePods() - for _, pod := range pods { - podQOS := v1qos.GetPodQOS(pod) - var parentContainer CgroupName - switch podQOS { - case v1.PodQOSGuaranteed: - parentContainer = m.qosContainersInfo.Guaranteed - case v1.PodQOSBurstable: - parentContainer = m.qosContainersInfo.Burstable - case v1.PodQOSBestEffort: - parentContainer = m.qosContainersInfo.BestEffort - } - podCgroupName := NewCgroupName(parentContainer, GetPodCgroupNameSuffix(pod.UID)) - podConfig := &CgroupConfig{ - Name: podCgroupName, - ResourceParameters: &ResourceConfig{ - Unified: map[string]string{ - Cgroup2MemoryMin: "0", - Cgroup2MemoryLow: "0", - }, - }, - } - if err := m.cgroupManager.Update(logger, podConfig); err != nil { - logger.V(4).Info("Failed to reconcile pod memory protection", "pod", klog.KObj(pod), "err", err) - } - } -} - func (m *qosContainerManagerImpl) UpdateCgroups(logger logr.Logger) error { m.Lock() defer m.Unlock() diff --git a/test/e2e_node/memory_qos_test.go b/test/e2e_node/memory_qos_test.go index e7c0f335b3f..553e3e781db 100644 --- a/test/e2e_node/memory_qos_test.go +++ b/test/e2e_node/memory_qos_test.go @@ -613,16 +613,23 @@ var _ = SIGDescribe("MemoryQoS", framework.WithSerial(), func() { restartKubelet(ctx, true) waitForKubeletToStart(ctx, f) - // Pod cgroup memory.low is reconciled by the periodic setMemoryQoS loop + // QoS-class cgroup memory.low is cleared by the periodic setMemoryQoS loop // (periodicQOSCgroupUpdateInterval = 1 minute) plus kubelet startup time. + var burstableCgroupPath string + if cgroupDriver == "systemd" { + burstableCgroupPath = filepath.Join(cgroupRoot, "kubepods.slice", "kubepods-burstable.slice") + } else { + burstableCgroupPath = filepath.Join(cgroupRoot, "kubepods", "burstable") + } gomega.Eventually(ctx, func() int64 { - val, _ := memqosReadCgroupInt64(podCgroupPath, cgroupMemoryLow) + val, _ := memqosReadCgroupInt64(burstableCgroupPath, cgroupMemoryLow) return val }).WithTimeout(2*time.Minute).WithPolling(5*time.Second).Should(gomega.Equal(int64(0)), - "memory.low should reset to 0 when MemoryQoS is disabled") + "burstable QoS memory.low should reset to 0 when MemoryQoS is disabled") - // TODO(sohankunkerkar): Assert container-level memory.high resets to max once - // CRI runtimes support Unified map in UpdateContainerResources. + // NOTE: Pod-level and container-level memory.low values persist after rollback. + // Pod-level: clearing via systemd SetUnitProperties interferes with other cgroup settings. + // Container-level: requires CRI runtime support for Unified in UpdateContainerResources. }) })