Merge pull request #137889 from sohankunkerkar/memqos-fix-reconcile

Remove reconcilePodMemoryProtection that resets pod cgroup values on systemd
This commit is contained in:
Kubernetes Prow Robot 2026-03-19 21:32:42 +05:30 committed by GitHub
commit b910026535
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 12 additions and 42 deletions

View file

@ -305,12 +305,6 @@ func (m *qosContainerManagerImpl) setMemoryQoS(logger klog.Logger, configs map[v
setUnified(v1.PodQOSBurstable, Cgroup2MemoryLow, 0)
kubeletmetrics.MemoryQoSNodeMemoryMinBytes.Set(0)
kubeletmetrics.MemoryQoSNodeMemoryLowBytes.Set(0)
// Clear per-pod memory protection only when MemoryQoS feature gate is
// enabled but policy is None (rollback scenario). When the gate is off,
// pods never had protection set, so there's nothing to reconcile.
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.MemoryQoS) {
m.reconcilePodMemoryProtection(logger)
}
return
}
@ -330,37 +324,6 @@ func (m *qosContainerManagerImpl) setMemoryQoS(logger klog.Logger, configs map[v
setUnified(v1.PodQOSBurstable, Cgroup2MemoryLow, burstableRequests)
}
// reconcilePodMemoryProtection clears stale memory.min and memory.low on pod-level cgroups
// when MemoryQoS is disabled or memoryReservationPolicy is not TieredReservation.
func (m *qosContainerManagerImpl) reconcilePodMemoryProtection(logger klog.Logger) {
pods := m.activePods()
for _, pod := range pods {
podQOS := v1qos.GetPodQOS(pod)
var parentContainer CgroupName
switch podQOS {
case v1.PodQOSGuaranteed:
parentContainer = m.qosContainersInfo.Guaranteed
case v1.PodQOSBurstable:
parentContainer = m.qosContainersInfo.Burstable
case v1.PodQOSBestEffort:
parentContainer = m.qosContainersInfo.BestEffort
}
podCgroupName := NewCgroupName(parentContainer, GetPodCgroupNameSuffix(pod.UID))
podConfig := &CgroupConfig{
Name: podCgroupName,
ResourceParameters: &ResourceConfig{
Unified: map[string]string{
Cgroup2MemoryMin: "0",
Cgroup2MemoryLow: "0",
},
},
}
if err := m.cgroupManager.Update(logger, podConfig); err != nil {
logger.V(4).Info("Failed to reconcile pod memory protection", "pod", klog.KObj(pod), "err", err)
}
}
}
func (m *qosContainerManagerImpl) UpdateCgroups(logger logr.Logger) error {
m.Lock()
defer m.Unlock()

View file

@ -613,16 +613,23 @@ var _ = SIGDescribe("MemoryQoS", framework.WithSerial(), func() {
restartKubelet(ctx, true)
waitForKubeletToStart(ctx, f)
// Pod cgroup memory.low is reconciled by the periodic setMemoryQoS loop
// QoS-class cgroup memory.low is cleared by the periodic setMemoryQoS loop
// (periodicQOSCgroupUpdateInterval = 1 minute) plus kubelet startup time.
var burstableCgroupPath string
if cgroupDriver == "systemd" {
burstableCgroupPath = filepath.Join(cgroupRoot, "kubepods.slice", "kubepods-burstable.slice")
} else {
burstableCgroupPath = filepath.Join(cgroupRoot, "kubepods", "burstable")
}
gomega.Eventually(ctx, func() int64 {
val, _ := memqosReadCgroupInt64(podCgroupPath, cgroupMemoryLow)
val, _ := memqosReadCgroupInt64(burstableCgroupPath, cgroupMemoryLow)
return val
}).WithTimeout(2*time.Minute).WithPolling(5*time.Second).Should(gomega.Equal(int64(0)),
"memory.low should reset to 0 when MemoryQoS is disabled")
"burstable QoS memory.low should reset to 0 when MemoryQoS is disabled")
// TODO(sohankunkerkar): Assert container-level memory.high resets to max once
// CRI runtimes support Unified map in UpdateContainerResources.
// NOTE: Pod-level and container-level memory.low values persist after rollback.
// Pod-level: clearing via systemd SetUnitProperties interferes with other cgroup settings.
// Container-level: requires CRI runtime support for Unified in UpdateContainerResources.
})
})