From deb23a8f8400807ecdbc509bc897ad69168d63de Mon Sep 17 00:00:00 2001 From: Yuvraj Date: Thu, 15 Jan 2026 19:11:37 +0530 Subject: [PATCH 1/4] kubelet: add unit tests for QoS CPU shares update Add unit test coverage for UpdateCgroups() to verify that CPU shares are correctly computed and applied for Guaranteed, Burstable, and BestEffort QoS classes based on active pods. This improves test coverage in the QoS container manager area, as suggested in tracker #109717. Signed-off-by: Yuvraj --- .../cm/qos_container_manager_linux_test.go | 268 +++++++++++++++++- 1 file changed, 267 insertions(+), 1 deletion(-) diff --git a/pkg/kubelet/cm/qos_container_manager_linux_test.go b/pkg/kubelet/cm/qos_container_manager_linux_test.go index 54439021aa5..f1f176a7ebb 100644 --- a/pkg/kubelet/cm/qos_container_manager_linux_test.go +++ b/pkg/kubelet/cm/qos_container_manager_linux_test.go @@ -19,16 +19,23 @@ limitations under the License. package cm import ( + "context" "fmt" "strconv" + "strings" "testing" + "github.com/go-logr/logr" "github.com/stretchr/testify/assert" v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" + resource "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/uuid" + cres "k8s.io/component-helpers/resource" "k8s.io/klog/v2" "k8s.io/klog/v2/ktesting" + v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" ) func activeTestPods() []*v1.Pod { @@ -155,3 +162,262 @@ func TestQoSContainerCgroup(t *testing.T) { assert.Equal(t, qosConfigs[v1.PodQOSGuaranteed].ResourceParameters.Unified["memory.min"], strconv.FormatInt(burstableMin.Value()+guaranteedMin.Value(), 10)) assert.Equal(t, qosConfigs[v1.PodQOSBurstable].ResourceParameters.Unified["memory.min"], strconv.FormatInt(burstableMin.Value(), 10)) } + + +// fakeCgroupManager is used because Start() requires a functional +// CgroupManager. All methods are stubbed so that Start() can +// complete successfully without using real cgroups. +type fakeCgroupManager struct { + + created []*CgroupConfig + updates []*CgroupConfig +} + +// Update() is the observation point for this test. +// Capture the updated cgroup config so it can be validated. +func (f *fakeCgroupManager) Update(logger klog.Logger, config *CgroupConfig) error{ + copiedConfig := *config + f.updates = append(f.updates, &copiedConfig) + return nil +} + +// Create() must succeed for Start() to construct QoS cgroups. +// We do not assert on Create() behavior in this test. +func (f *fakeCgroupManager) Create(l klog.Logger, config *CgroupConfig) error { + copiedConfig := *config + f.created = append(f.created, &copiedConfig) + return nil +} + +func (f *fakeCgroupManager) Destroy(l klog.Logger,config *CgroupConfig) error {return nil} +func (f *fakeCgroupManager) Validate(name CgroupName) error {return nil} +func (f *fakeCgroupManager) Exists(name CgroupName) bool {return false} +func (f *fakeCgroupManager) Name(name CgroupName) string {return name.ToCgroupfs()} +func (f *fakeCgroupManager) CgroupName(name string) CgroupName {return ParseCgroupfsToCgroupName(name)} +func (f *fakeCgroupManager) Pids(logger klog.Logger, name CgroupName) []int {return nil} +func (f *fakeCgroupManager) ReduceCPULimits(logger klog.Logger, cgroupName CgroupName) error {return nil} +func (f *fakeCgroupManager) MemoryUsage(name CgroupName) (int64, error) {return int64(0),nil} +func (f *fakeCgroupManager) GetCgroupConfig(name CgroupName, resource v1.ResourceName) (*ResourceConfig, error) {return nil,nil} +func (f *fakeCgroupManager) SetCgroupConfig(logger klog.Logger, name CgroupName, resourceConfig *ResourceConfig) error {return nil} +func (f *fakeCgroupManager) Version() int {return 1} + +func expectedCPUShares(activeTestPods ActivePodsFunc) uint64 { + var pods []*v1.Pod = activeTestPods() + + burstablePodCPURequest := int64(0) + for i := range pods { + pod := pods[i] + qosClass := v1qos.GetPodQOS(pod) + if qosClass != v1.PodQOSBurstable { + // we only care about the burstable qos tier + continue + } + req := cres.PodRequests(pod, cres.PodResourcesOptions{}) + if request, found := req[v1.ResourceCPU]; found { + burstablePodCPURequest += request.MilliValue() + } + } + // set burstable shares based on current observe state + burstableCPUShares := MilliCPUToShares(burstablePodCPURequest) + return burstableCPUShares +} + +func guaranteedTestPods() []*v1.Pod{ + return []*v1.Pod{ + + { + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID(uuid.NewUUID()), + Name: "guaranteed-pod", + Namespace: "test", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "foo", + Image: "busybox", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU:resource.MustParse("1"), + }, + Limits: v1.ResourceList{ + v1.ResourceCPU:resource.MustParse("1"), + }, + }, + }, + }, + }, + }, + + } +} + +func burstableTestPods() []*v1.Pod{ + return []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID(uuid.NewUUID()), + Name: "burstable-pod", + Namespace: "test", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "foo", + Image: "busybox", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU:resource.MustParse("1"), + }, + Limits: v1.ResourceList{ + v1.ResourceCPU:resource.MustParse("2"), + }, + }, + }, + }, + }, + }, + } +} + +func bestEffortTestPods() []*v1.Pod{ + return []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID(uuid.NewUUID()), + Name: "besteffort-pod", + Namespace: "test", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "foo", + Image: "busybox", + }, + }, + }, + }, + } +} + +func guaranteedAndBurstableTestPods() []*v1.Pod { + pods := []*v1.Pod{} + pods = append(pods, guaranteedTestPods()...) + pods = append(pods, burstableTestPods()...) + return pods +} +func besteffortAndBurstableTestPods() []*v1.Pod { + pods := []*v1.Pod{} + pods = append(pods, bestEffortTestPods()...) + pods = append(pods, burstableTestPods()...) + return pods +} + +// TestQOSCPUConfigUpdate verifies that UpdateCgroups() computes and +// updates the correct CPU shares for each QoS class based on the +// currently active pods. +func TestQOSCPUConfigUpdate(t *testing.T){ + + tests := []struct{ + name string + testPods ActivePodsFunc + expectedCPUShares uint64 + }{ + { + name: "guaranteed-pods-only", + testPods: guaranteedTestPods, + expectedCPUShares:expectedCPUShares(guaranteedTestPods), + }, + { + name: "burstable-pods-only", + testPods: burstableTestPods, + expectedCPUShares: expectedCPUShares(burstableTestPods), + }, + { + name: "besteffort-pods-only", + testPods: bestEffortTestPods, + expectedCPUShares: expectedCPUShares(bestEffortTestPods), + }, + { + name: "guaranteed-and-burstable-pods", + testPods: guaranteedAndBurstableTestPods, + expectedCPUShares: expectedCPUShares(guaranteedAndBurstableTestPods), + }, + { + name: "besteffort-and-burstable-pods", + testPods: besteffortAndBurstableTestPods, + expectedCPUShares: expectedCPUShares(besteffortAndBurstableTestPods), + }, + } + + for _,testCase := range tests{ + + t.Run(testCase.name, func(t *testing.T) { + + testContainerManager,err := createTestQOSContainerManager(logr.Logger{}) + if err!=nil{ + t.Fatalf("Unable to create Test Qos Container Manager: %s", err) + return + } + + var fakecgroupManager = &fakeCgroupManager{} + testContainerManager.cgroupManager = fakecgroupManager + + err=testContainerManager.Start(context.Background(), func() v1.ResourceList {return v1.ResourceList{}}, testCase.testPods) + if err!=nil{ + t.Fatalf("Start() failed: %s",err) + } + + // UpdateCgroups() is expected to update all QoS cgroups on each call + // based on the current active pod set. + err = testContainerManager.UpdateCgroups(logr.Logger{}) + if err!=nil{ + t.Fatalf("Error in UpdateCgroups(): %s", err) + } + + //These flags will be used to check whether UpdateCgroups() + //is updating the CPU shares for all QoS classes (cgroups) + foundBurstable := false + foundBestEffort := false + foundGuaranteed := false + + for _,config := range fakecgroupManager.updates { + + if strings.HasSuffix(config.Name.ToCgroupfs(), "burstable"){ + foundBurstable = true + if *config.ResourceParameters.CPUShares != testCase.expectedCPUShares{ + t.Fatalf("Expected CPU Shares for Burstable: %d. Got: %d",testCase.expectedCPUShares, *config.ResourceParameters.CPUShares) + } + continue + } + + if strings.HasSuffix(config.Name.ToCgroupfs(), "besteffort"){ + foundBestEffort = true + if *config.ResourceParameters.CPUShares != MinShares { + t.Fatalf("Expected CPU Shares for BestEffort: %d Got: %d", MinShares, *config.ResourceParameters.CPUShares) + } + continue + } + + if config.Name.ToCgroupfs() == testContainerManager.cgroupRoot.ToCgroupfs() { + foundGuaranteed = true + if config.ResourceParameters != nil && config.ResourceParameters.CPUShares != nil{ + t.Fatalf("Expected CPU Shares for Guaranteed: , Got: %d", *config.ResourceParameters.CPUShares) + } + } + } + + if !foundBurstable { + t.Fatalf("burstable cgroup not found") +} +if !foundBestEffort { + t.Fatalf("besteffort cgroup not found") +} +if !foundGuaranteed { + t.Fatalf("guaranteed cgroup not found") +} + +}) + + } +} \ No newline at end of file From b8af7e8ea8f016df1f74dc2e3ae599912db70061 Mon Sep 17 00:00:00 2001 From: Yuvraj Date: Fri, 16 Jan 2026 11:00:53 +0530 Subject: [PATCH 2/4] kubelet: fix data race in QoS cgroup CPU shares test --- .../cm/qos_container_manager_linux_test.go | 286 ++++++++++-------- 1 file changed, 154 insertions(+), 132 deletions(-) diff --git a/pkg/kubelet/cm/qos_container_manager_linux_test.go b/pkg/kubelet/cm/qos_container_manager_linux_test.go index f1f176a7ebb..6b4fe10ff55 100644 --- a/pkg/kubelet/cm/qos_container_manager_linux_test.go +++ b/pkg/kubelet/cm/qos_container_manager_linux_test.go @@ -23,6 +23,7 @@ import ( "fmt" "strconv" "strings" + "sync" "testing" "github.com/go-logr/logr" @@ -163,19 +164,21 @@ func TestQoSContainerCgroup(t *testing.T) { assert.Equal(t, qosConfigs[v1.PodQOSBurstable].ResourceParameters.Unified["memory.min"], strconv.FormatInt(burstableMin.Value(), 10)) } - // fakeCgroupManager is used because Start() requires a functional // CgroupManager. All methods are stubbed so that Start() can -// complete successfully without using real cgroups. +// complete successfully without using real cgroups. type fakeCgroupManager struct { - + mutex sync.Mutex created []*CgroupConfig updates []*CgroupConfig } // Update() is the observation point for this test. // Capture the updated cgroup config so it can be validated. -func (f *fakeCgroupManager) Update(logger klog.Logger, config *CgroupConfig) error{ +func (f *fakeCgroupManager) Update(logger klog.Logger, config *CgroupConfig) error { + f.mutex.Lock() + defer f.mutex.Unlock() + copiedConfig := *config f.updates = append(f.updates, &copiedConfig) return nil @@ -184,25 +187,36 @@ func (f *fakeCgroupManager) Update(logger klog.Logger, config *CgroupConfig) err // Create() must succeed for Start() to construct QoS cgroups. // We do not assert on Create() behavior in this test. func (f *fakeCgroupManager) Create(l klog.Logger, config *CgroupConfig) error { + f.mutex.Lock() + defer f.mutex.Unlock() + copiedConfig := *config f.created = append(f.created, &copiedConfig) return nil } -func (f *fakeCgroupManager) Destroy(l klog.Logger,config *CgroupConfig) error {return nil} -func (f *fakeCgroupManager) Validate(name CgroupName) error {return nil} -func (f *fakeCgroupManager) Exists(name CgroupName) bool {return false} -func (f *fakeCgroupManager) Name(name CgroupName) string {return name.ToCgroupfs()} -func (f *fakeCgroupManager) CgroupName(name string) CgroupName {return ParseCgroupfsToCgroupName(name)} -func (f *fakeCgroupManager) Pids(logger klog.Logger, name CgroupName) []int {return nil} -func (f *fakeCgroupManager) ReduceCPULimits(logger klog.Logger, cgroupName CgroupName) error {return nil} -func (f *fakeCgroupManager) MemoryUsage(name CgroupName) (int64, error) {return int64(0),nil} -func (f *fakeCgroupManager) GetCgroupConfig(name CgroupName, resource v1.ResourceName) (*ResourceConfig, error) {return nil,nil} -func (f *fakeCgroupManager) SetCgroupConfig(logger klog.Logger, name CgroupName, resourceConfig *ResourceConfig) error {return nil} -func (f *fakeCgroupManager) Version() int {return 1} +func (f *fakeCgroupManager) Destroy(l klog.Logger, config *CgroupConfig) error { return nil } +func (f *fakeCgroupManager) Validate(name CgroupName) error { return nil } +func (f *fakeCgroupManager) Exists(name CgroupName) bool { return false } +func (f *fakeCgroupManager) Name(name CgroupName) string { return name.ToCgroupfs() } +func (f *fakeCgroupManager) CgroupName(name string) CgroupName { + return ParseCgroupfsToCgroupName(name) +} +func (f *fakeCgroupManager) Pids(logger klog.Logger, name CgroupName) []int { return nil } +func (f *fakeCgroupManager) ReduceCPULimits(logger klog.Logger, cgroupName CgroupName) error { + return nil +} +func (f *fakeCgroupManager) MemoryUsage(name CgroupName) (int64, error) { return int64(0), nil } +func (f *fakeCgroupManager) GetCgroupConfig(name CgroupName, resource v1.ResourceName) (*ResourceConfig, error) { + return nil, nil +} +func (f *fakeCgroupManager) SetCgroupConfig(logger klog.Logger, name CgroupName, resourceConfig *ResourceConfig) error { + return nil +} +func (f *fakeCgroupManager) Version() int { return 1 } func expectedCPUShares(activeTestPods ActivePodsFunc) uint64 { - var pods []*v1.Pod = activeTestPods() + pods := activeTestPods() burstablePodCPURequest := int64(0) for i := range pods { @@ -211,7 +225,7 @@ func expectedCPUShares(activeTestPods ActivePodsFunc) uint64 { if qosClass != v1.PodQOSBurstable { // we only care about the burstable qos tier continue - } + } req := cres.PodRequests(pod, cres.PodResourcesOptions{}) if request, found := req[v1.ResourceCPU]; found { burstablePodCPURequest += request.MilliValue() @@ -222,55 +236,26 @@ func expectedCPUShares(activeTestPods ActivePodsFunc) uint64 { return burstableCPUShares } -func guaranteedTestPods() []*v1.Pod{ +func guaranteedTestPods() []*v1.Pod { return []*v1.Pod{ - + { ObjectMeta: metav1.ObjectMeta{ - UID: types.UID(uuid.NewUUID()), - Name: "guaranteed-pod", + UID: types.UID(uuid.NewUUID()), + Name: "guaranteed-pod", Namespace: "test", }, Spec: v1.PodSpec{ Containers: []v1.Container{ { - Name: "foo", + Name: "foo", Image: "busybox", Resources: v1.ResourceRequirements{ Requests: v1.ResourceList{ - v1.ResourceCPU:resource.MustParse("1"), + v1.ResourceCPU: resource.MustParse("1"), }, Limits: v1.ResourceList{ - v1.ResourceCPU:resource.MustParse("1"), - }, - }, - }, - }, - }, - }, - - } -} - -func burstableTestPods() []*v1.Pod{ - return []*v1.Pod{ - { - ObjectMeta: metav1.ObjectMeta{ - UID: types.UID(uuid.NewUUID()), - Name: "burstable-pod", - Namespace: "test", - }, - Spec: v1.PodSpec{ - Containers: []v1.Container{ - { - Name: "foo", - Image: "busybox", - Resources: v1.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceCPU:resource.MustParse("1"), - }, - Limits: v1.ResourceList{ - v1.ResourceCPU:resource.MustParse("2"), + v1.ResourceCPU: resource.MustParse("1"), }, }, }, @@ -280,25 +265,53 @@ func burstableTestPods() []*v1.Pod{ } } -func bestEffortTestPods() []*v1.Pod{ +func burstableTestPods() []*v1.Pod { return []*v1.Pod{ { ObjectMeta: metav1.ObjectMeta{ - UID: types.UID(uuid.NewUUID()), - Name: "besteffort-pod", + UID: types.UID(uuid.NewUUID()), + Name: "burstable-pod", Namespace: "test", }, Spec: v1.PodSpec{ Containers: []v1.Container{ { - Name: "foo", + Name: "foo", + Image: "busybox", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("1"), + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("2"), + }, + }, + }, + }, + }, + }, + } +} + +func bestEffortTestPods() []*v1.Pod { + return []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID(uuid.NewUUID()), + Name: "besteffort-pod", + Namespace: "test", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "foo", Image: "busybox", }, }, }, }, } -} +} func guaranteedAndBurstableTestPods() []*v1.Pod { pods := []*v1.Pod{} @@ -316,108 +329,117 @@ func besteffortAndBurstableTestPods() []*v1.Pod { // TestQOSCPUConfigUpdate verifies that UpdateCgroups() computes and // updates the correct CPU shares for each QoS class based on the // currently active pods. -func TestQOSCPUConfigUpdate(t *testing.T){ +func TestQOSCPUConfigUpdate(t *testing.T) { - tests := []struct{ - name string - testPods ActivePodsFunc + tests := []struct { + name string + testPods ActivePodsFunc expectedCPUShares uint64 }{ { - name: "guaranteed-pods-only", - testPods: guaranteedTestPods, - expectedCPUShares:expectedCPUShares(guaranteedTestPods), + name: "guaranteed-pods-only", + testPods: guaranteedTestPods, + expectedCPUShares: expectedCPUShares(guaranteedTestPods), }, { - name: "burstable-pods-only", - testPods: burstableTestPods, + name: "burstable-pods-only", + testPods: burstableTestPods, expectedCPUShares: expectedCPUShares(burstableTestPods), }, { - name: "besteffort-pods-only", - testPods: bestEffortTestPods, + name: "besteffort-pods-only", + testPods: bestEffortTestPods, expectedCPUShares: expectedCPUShares(bestEffortTestPods), }, { - name: "guaranteed-and-burstable-pods", - testPods: guaranteedAndBurstableTestPods, + name: "guaranteed-and-burstable-pods", + testPods: guaranteedAndBurstableTestPods, expectedCPUShares: expectedCPUShares(guaranteedAndBurstableTestPods), }, { - name: "besteffort-and-burstable-pods", - testPods: besteffortAndBurstableTestPods, + name: "besteffort-and-burstable-pods", + testPods: besteffortAndBurstableTestPods, expectedCPUShares: expectedCPUShares(besteffortAndBurstableTestPods), }, } - for _,testCase := range tests{ + for _, testCase := range tests { t.Run(testCase.name, func(t *testing.T) { - testContainerManager,err := createTestQOSContainerManager(logr.Logger{}) - if err!=nil{ - t.Fatalf("Unable to create Test Qos Container Manager: %s", err) - return - } - - var fakecgroupManager = &fakeCgroupManager{} - testContainerManager.cgroupManager = fakecgroupManager - - err=testContainerManager.Start(context.Background(), func() v1.ResourceList {return v1.ResourceList{}}, testCase.testPods) - if err!=nil{ - t.Fatalf("Start() failed: %s",err) - } - - // UpdateCgroups() is expected to update all QoS cgroups on each call - // based on the current active pod set. - err = testContainerManager.UpdateCgroups(logr.Logger{}) - if err!=nil{ - t.Fatalf("Error in UpdateCgroups(): %s", err) - } - - //These flags will be used to check whether UpdateCgroups() - //is updating the CPU shares for all QoS classes (cgroups) - foundBurstable := false - foundBestEffort := false - foundGuaranteed := false - - for _,config := range fakecgroupManager.updates { - - if strings.HasSuffix(config.Name.ToCgroupfs(), "burstable"){ - foundBurstable = true - if *config.ResourceParameters.CPUShares != testCase.expectedCPUShares{ - t.Fatalf("Expected CPU Shares for Burstable: %d. Got: %d",testCase.expectedCPUShares, *config.ResourceParameters.CPUShares) + testContainerManager, err := createTestQOSContainerManager(logr.Logger{}) + if err != nil { + t.Fatalf("Unable to create Test Qos Container Manager: %s", err) + return } - continue - } - if strings.HasSuffix(config.Name.ToCgroupfs(), "besteffort"){ - foundBestEffort = true - if *config.ResourceParameters.CPUShares != MinShares { - t.Fatalf("Expected CPU Shares for BestEffort: %d Got: %d", MinShares, *config.ResourceParameters.CPUShares) + fakecgroupManager := &fakeCgroupManager{} + testContainerManager.cgroupManager = fakecgroupManager + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + err = testContainerManager.Start(ctx, func() v1.ResourceList { return v1.ResourceList{} }, testCase.testPods) + + if err != nil { + t.Fatalf("Start() failed: %s", err) } - continue - } - if config.Name.ToCgroupfs() == testContainerManager.cgroupRoot.ToCgroupfs() { - foundGuaranteed = true - if config.ResourceParameters != nil && config.ResourceParameters.CPUShares != nil{ - t.Fatalf("Expected CPU Shares for Guaranteed: , Got: %d", *config.ResourceParameters.CPUShares) + // UpdateCgroups() is expected to update all QoS cgroups on each call + // based on the current active pod set. + err = testContainerManager.UpdateCgroups(logr.Logger{}) + if err != nil { + t.Fatalf("Error in UpdateCgroups(): %s", err) } - } - } + cancel() - if !foundBurstable { - t.Fatalf("burstable cgroup not found") -} -if !foundBestEffort { - t.Fatalf("besteffort cgroup not found") -} -if !foundGuaranteed { - t.Fatalf("guaranteed cgroup not found") -} + // These flags will be used to check whether UpdateCgroups() + // is updating the CPU shares for all QoS classes (cgroups) + foundBurstable := false + foundBestEffort := false + foundGuaranteed := false -}) + fakecgroupManager.mutex.Lock() + updates := append([]*CgroupConfig(nil), fakecgroupManager.updates...) + fakecgroupManager.mutex.Unlock() + + for _, config := range updates { + + if strings.HasSuffix(config.Name.ToCgroupfs(), "burstable") { + foundBurstable = true + if *config.ResourceParameters.CPUShares != testCase.expectedCPUShares { + t.Fatalf("Expected CPU Shares for Burstable: %d. Got: %d", testCase.expectedCPUShares, *config.ResourceParameters.CPUShares) + } + continue + } + + if strings.HasSuffix(config.Name.ToCgroupfs(), "besteffort") { + foundBestEffort = true + if *config.ResourceParameters.CPUShares != MinShares { + t.Fatalf("Expected CPU Shares for BestEffort: %d Got: %d", MinShares, *config.ResourceParameters.CPUShares) + } + continue + } + + if config.Name.ToCgroupfs() == testContainerManager.cgroupRoot.ToCgroupfs() { + foundGuaranteed = true + if config.ResourceParameters != nil && config.ResourceParameters.CPUShares != nil { + t.Fatalf("Expected CPU Shares for Guaranteed: , Got: %d", *config.ResourceParameters.CPUShares) + } + } + } + + if !foundBurstable { + t.Fatalf("burstable cgroup not found") + } + if !foundBestEffort { + t.Fatalf("besteffort cgroup not found") + } + if !foundGuaranteed { + t.Fatalf("guaranteed cgroup not found") + } + + }) } -} \ No newline at end of file +} From 9c9b335294eccaeb32eac7de4cd9ed9094e686ab Mon Sep 17 00:00:00 2001 From: Yuvraj Date: Sat, 17 Jan 2026 14:15:48 +0530 Subject: [PATCH 3/4] kubelet: add unit tests for QoS CPU shares update Signed-off-by: Yuvraj --- .../cm/qos_container_manager_linux_test.go | 340 +++++++++++------- 1 file changed, 205 insertions(+), 135 deletions(-) diff --git a/pkg/kubelet/cm/qos_container_manager_linux_test.go b/pkg/kubelet/cm/qos_container_manager_linux_test.go index 6b4fe10ff55..0fa88b913d2 100644 --- a/pkg/kubelet/cm/qos_container_manager_linux_test.go +++ b/pkg/kubelet/cm/qos_container_manager_linux_test.go @@ -33,10 +33,8 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/uuid" - cres "k8s.io/component-helpers/resource" "k8s.io/klog/v2" "k8s.io/klog/v2/ktesting" - v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" ) func activeTestPods() []*v1.Pod { @@ -215,151 +213,218 @@ func (f *fakeCgroupManager) SetCgroupConfig(logger klog.Logger, name CgroupName, } func (f *fakeCgroupManager) Version() int { return 1 } -func expectedCPUShares(activeTestPods ActivePodsFunc) uint64 { - pods := activeTestPods() - - burstablePodCPURequest := int64(0) - for i := range pods { - pod := pods[i] - qosClass := v1qos.GetPodQOS(pod) - if qosClass != v1.PodQOSBurstable { - // we only care about the burstable qos tier - continue - } - req := cres.PodRequests(pod, cres.PodResourcesOptions{}) - if request, found := req[v1.ResourceCPU]; found { - burstablePodCPURequest += request.MilliValue() - } - } - // set burstable shares based on current observe state - burstableCPUShares := MilliCPUToShares(burstablePodCPURequest) - return burstableCPUShares -} - -func guaranteedTestPods() []*v1.Pod { - return []*v1.Pod{ - - { - ObjectMeta: metav1.ObjectMeta{ - UID: types.UID(uuid.NewUUID()), - Name: "guaranteed-pod", - Namespace: "test", - }, - Spec: v1.PodSpec{ - Containers: []v1.Container{ - { - Name: "foo", - Image: "busybox", - Resources: v1.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("1"), - }, - Limits: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("1"), - }, - }, - }, - }, - }, - }, - } -} - -func burstableTestPods() []*v1.Pod { - return []*v1.Pod{ - { - ObjectMeta: metav1.ObjectMeta{ - UID: types.UID(uuid.NewUUID()), - Name: "burstable-pod", - Namespace: "test", - }, - Spec: v1.PodSpec{ - Containers: []v1.Container{ - { - Name: "foo", - Image: "busybox", - Resources: v1.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("1"), - }, - Limits: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("2"), - }, - }, - }, - }, - }, - }, - } -} - -func bestEffortTestPods() []*v1.Pod { - return []*v1.Pod{ - { - ObjectMeta: metav1.ObjectMeta{ - UID: types.UID(uuid.NewUUID()), - Name: "besteffort-pod", - Namespace: "test", - }, - Spec: v1.PodSpec{ - Containers: []v1.Container{ - { - Name: "foo", - Image: "busybox", - }, - }, - }, - }, - } -} - -func guaranteedAndBurstableTestPods() []*v1.Pod { - pods := []*v1.Pod{} - pods = append(pods, guaranteedTestPods()...) - pods = append(pods, burstableTestPods()...) - return pods -} -func besteffortAndBurstableTestPods() []*v1.Pod { - pods := []*v1.Pod{} - pods = append(pods, bestEffortTestPods()...) - pods = append(pods, burstableTestPods()...) - return pods -} - // TestQOSCPUConfigUpdate verifies that UpdateCgroups() computes and // updates the correct CPU shares for each QoS class based on the // currently active pods. func TestQOSCPUConfigUpdate(t *testing.T) { + // Guaranteed QoS uses fixed CPU shares (requests == limits), so they are not + // recalculated. BestEffort always uses MinShares, and only Burstable CPU shares + // depend on aggregate burstable CPU requests. + tests := []struct { - name string - testPods ActivePodsFunc - expectedCPUShares uint64 + name string + testPods ActivePodsFunc + expectedBurstableCPUShares uint64 // Recalculation will be done only for Burstable QoS class }{ { - name: "guaranteed-pods-only", - testPods: guaranteedTestPods, - expectedCPUShares: expectedCPUShares(guaranteedTestPods), + name: "guaranteed-pods-only", + testPods: func() []*v1.Pod { + return []*v1.Pod{ + + { + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID(uuid.NewUUID()), + Name: "guaranteed-pod", + Namespace: "test", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "foo", + Image: "busybox", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("1"), + v1.ResourceMemory: resource.MustParse("128Mi"), + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("1"), + v1.ResourceMemory: resource.MustParse("128Mi"), + }, + }, + }, + }, + }, + }, + } + }, + // MinShares will be given to the Burstable QoS class since kubelet + // creates all QoS cgroups regardless of whether pods of that class exist. + expectedBurstableCPUShares: MinShares, }, { - name: "burstable-pods-only", - testPods: burstableTestPods, - expectedCPUShares: expectedCPUShares(burstableTestPods), + name: "burstable-pods-only", + testPods: func() []*v1.Pod { + return []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID(uuid.NewUUID()), + Name: "burstable-pod", + Namespace: "test", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "foo", + Image: "busybox", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("1"), + v1.ResourceMemory: resource.MustParse("128Mi"), + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("2"), + v1.ResourceMemory: resource.MustParse("256Mi"), + }, + }, + }, + }, + }, + }, + } + }, + // 1 CPU Resource = 1024 CPU Shares + expectedBurstableCPUShares: 1024, }, { - name: "besteffort-pods-only", - testPods: bestEffortTestPods, - expectedCPUShares: expectedCPUShares(bestEffortTestPods), + name: "besteffort-pods-only", + testPods: func() []*v1.Pod { + return []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID(uuid.NewUUID()), + Name: "besteffort-pod", + Namespace: "test", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "foo", + Image: "busybox", + }, + }, + }, + }, + } + }, + expectedBurstableCPUShares: MinShares, }, { - name: "guaranteed-and-burstable-pods", - testPods: guaranteedAndBurstableTestPods, - expectedCPUShares: expectedCPUShares(guaranteedAndBurstableTestPods), + name: "guaranteed-and-burstable-pods", + testPods: func() []*v1.Pod { + return []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID(uuid.NewUUID()), + Name: "guaranteed-pod", + Namespace: "test", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "foo", + Image: "busybox", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("1"), + v1.ResourceMemory: resource.MustParse("128Mi"), + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("1"), + v1.ResourceMemory: resource.MustParse("128Mi"), + }, + }, + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID(uuid.NewUUID()), + Name: "burstable-pod", + Namespace: "test", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "foo", + Image: "busybox", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("1"), + v1.ResourceMemory: resource.MustParse("128Mi"), + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("2"), + v1.ResourceMemory: resource.MustParse("256Mi"), + }, + }, + }, + }, + }, + }, + } + }, + expectedBurstableCPUShares: 1024, }, { - name: "besteffort-and-burstable-pods", - testPods: besteffortAndBurstableTestPods, - expectedCPUShares: expectedCPUShares(besteffortAndBurstableTestPods), + name: "besteffort-and-burstable-pods", + testPods: func() []*v1.Pod { + return []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID(uuid.NewUUID()), + Name: "besteffort-pod", + Namespace: "test", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "foo", + Image: "busybox", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID(uuid.NewUUID()), + Name: "burstable-pod", + Namespace: "test", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "foo", + Image: "busybox", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("1"), + v1.ResourceMemory: resource.MustParse("128Mi"), + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("2"), + v1.ResourceMemory: resource.MustParse("256Mi"), + }, + }, + }, + }, + }, + }, + } + }, + expectedBurstableCPUShares: 1024, }, } @@ -367,7 +432,9 @@ func TestQOSCPUConfigUpdate(t *testing.T) { t.Run(testCase.name, func(t *testing.T) { - testContainerManager, err := createTestQOSContainerManager(logr.Logger{}) + logger := logr.Discard() + + testContainerManager, err := createTestQOSContainerManager(logger) if err != nil { t.Fatalf("Unable to create Test Qos Container Manager: %s", err) return @@ -387,7 +454,7 @@ func TestQOSCPUConfigUpdate(t *testing.T) { // UpdateCgroups() is expected to update all QoS cgroups on each call // based on the current active pod set. - err = testContainerManager.UpdateCgroups(logr.Logger{}) + err = testContainerManager.UpdateCgroups(logger) if err != nil { t.Fatalf("Error in UpdateCgroups(): %s", err) } @@ -399,6 +466,9 @@ func TestQOSCPUConfigUpdate(t *testing.T) { foundBestEffort := false foundGuaranteed := false + // Start() initiates a background UpdateCgroups() goroutine which may + // still be running. Take a snapshot to avoid observing updates from + // the background UpdateCgroups() instead of the explicit UpdateCgroups() call. fakecgroupManager.mutex.Lock() updates := append([]*CgroupConfig(nil), fakecgroupManager.updates...) fakecgroupManager.mutex.Unlock() @@ -407,8 +477,8 @@ func TestQOSCPUConfigUpdate(t *testing.T) { if strings.HasSuffix(config.Name.ToCgroupfs(), "burstable") { foundBurstable = true - if *config.ResourceParameters.CPUShares != testCase.expectedCPUShares { - t.Fatalf("Expected CPU Shares for Burstable: %d. Got: %d", testCase.expectedCPUShares, *config.ResourceParameters.CPUShares) + if *config.ResourceParameters.CPUShares != testCase.expectedBurstableCPUShares { + t.Fatalf("Expected CPU Shares for Burstable: %d Got: %d", testCase.expectedBurstableCPUShares, *config.ResourceParameters.CPUShares) } continue } From 9b05946801265478ac94947e1f4f7050c3900e0b Mon Sep 17 00:00:00 2001 From: Yuvraj Date: Sun, 25 Jan 2026 13:20:07 +0530 Subject: [PATCH 4/4] kubelet: add unit tests for QoS CPU shares update Signed-off-by: Yuvraj --- .../cm/qos_container_manager_linux_test.go | 110 ++++++++++-------- 1 file changed, 64 insertions(+), 46 deletions(-) diff --git a/pkg/kubelet/cm/qos_container_manager_linux_test.go b/pkg/kubelet/cm/qos_container_manager_linux_test.go index 0fa88b913d2..13377b4a728 100644 --- a/pkg/kubelet/cm/qos_container_manager_linux_test.go +++ b/pkg/kubelet/cm/qos_container_manager_linux_test.go @@ -21,12 +21,6 @@ package cm import ( "context" "fmt" - "strconv" - "strings" - "sync" - "testing" - - "github.com/go-logr/logr" "github.com/stretchr/testify/assert" v1 "k8s.io/api/core/v1" resource "k8s.io/apimachinery/pkg/api/resource" @@ -35,6 +29,11 @@ import ( "k8s.io/apimachinery/pkg/util/uuid" "k8s.io/klog/v2" "k8s.io/klog/v2/ktesting" + "strconv" + "strings" + "sync" + "testing" + "time" ) func activeTestPods() []*v1.Pod { @@ -432,7 +431,7 @@ func TestQOSCPUConfigUpdate(t *testing.T) { t.Run(testCase.name, func(t *testing.T) { - logger := logr.Discard() + logger, ctx := ktesting.NewTestContext(t) testContainerManager, err := createTestQOSContainerManager(logger) if err != nil { @@ -443,7 +442,7 @@ func TestQOSCPUConfigUpdate(t *testing.T) { fakecgroupManager := &fakeCgroupManager{} testContainerManager.cgroupManager = fakecgroupManager - ctx, cancel := context.WithCancel(context.Background()) + ctx, cancel := context.WithCancel(ctx) defer cancel() err = testContainerManager.Start(ctx, func() v1.ResourceList { return v1.ResourceList{} }, testCase.testPods) @@ -460,56 +459,75 @@ func TestQOSCPUConfigUpdate(t *testing.T) { } cancel() - // These flags will be used to check whether UpdateCgroups() - // is updating the CPU shares for all QoS classes (cgroups) - foundBurstable := false - foundBestEffort := false - foundGuaranteed := false + // UpdateCgroups() may also be running in the background reconciliation loop (because of Start()). + // Retry until a consistent snapshot containing all QoS cgroup updates is observed. + // + // A small bounded retry count is sufficient here because UpdateCgroups() + // is synchronous and expected to complete quickly. This avoids test flakiness + // without risking an unbounded wait. + maxRetryAttempts := 5 - // Start() initiates a background UpdateCgroups() goroutine which may - // still be running. Take a snapshot to avoid observing updates from - // the background UpdateCgroups() instead of the explicit UpdateCgroups() call. - fakecgroupManager.mutex.Lock() - updates := append([]*CgroupConfig(nil), fakecgroupManager.updates...) - fakecgroupManager.mutex.Unlock() + var ( + foundBurstable bool + foundBestEffort bool + foundGuaranteed bool + ) - for _, config := range updates { + for i := 0; i < maxRetryAttempts; i++ { - if strings.HasSuffix(config.Name.ToCgroupfs(), "burstable") { - foundBurstable = true - if *config.ResourceParameters.CPUShares != testCase.expectedBurstableCPUShares { - t.Fatalf("Expected CPU Shares for Burstable: %d Got: %d", testCase.expectedBurstableCPUShares, *config.ResourceParameters.CPUShares) + // These flags will be used to check whether UpdateCgroups() + // is updating the CPU shares for all QoS classes (cgroups) + foundBurstable = false + foundBestEffort = false + foundGuaranteed = false + + // Start() initiates a background UpdateCgroups() goroutine which may + // still be running. Take a snapshot to avoid observing updates from + // the background UpdateCgroups() instead of the explicit UpdateCgroups() call. + fakecgroupManager.mutex.Lock() + updates := append([]*CgroupConfig(nil), fakecgroupManager.updates...) + fakecgroupManager.mutex.Unlock() + + for _, config := range updates { + + if strings.HasSuffix(config.Name.ToCgroupfs(), "burstable") { + foundBurstable = true + if *config.ResourceParameters.CPUShares != testCase.expectedBurstableCPUShares { + t.Fatalf("Expected CPU Shares for Burstable: %d Got: %d", testCase.expectedBurstableCPUShares, *config.ResourceParameters.CPUShares) + } + continue } - continue - } - if strings.HasSuffix(config.Name.ToCgroupfs(), "besteffort") { - foundBestEffort = true - if *config.ResourceParameters.CPUShares != MinShares { - t.Fatalf("Expected CPU Shares for BestEffort: %d Got: %d", MinShares, *config.ResourceParameters.CPUShares) + if strings.HasSuffix(config.Name.ToCgroupfs(), "besteffort") { + foundBestEffort = true + if *config.ResourceParameters.CPUShares != MinShares { + t.Fatalf("Expected CPU Shares for BestEffort: %d Got: %d", MinShares, *config.ResourceParameters.CPUShares) + } + continue } - continue - } - if config.Name.ToCgroupfs() == testContainerManager.cgroupRoot.ToCgroupfs() { - foundGuaranteed = true - if config.ResourceParameters != nil && config.ResourceParameters.CPUShares != nil { - t.Fatalf("Expected CPU Shares for Guaranteed: , Got: %d", *config.ResourceParameters.CPUShares) + if config.Name.ToCgroupfs() == testContainerManager.cgroupRoot.ToCgroupfs() { + foundGuaranteed = true + if config.ResourceParameters != nil && config.ResourceParameters.CPUShares != nil { + t.Fatalf("Expected CPU Shares for Guaranteed: , Got: %d", *config.ResourceParameters.CPUShares) + } } } + + if foundBurstable && foundBestEffort && foundGuaranteed { + break + } + + time.Sleep(time.Millisecond * 10) } - if !foundBurstable { - t.Fatalf("burstable cgroup not found") - } - if !foundBestEffort { - t.Fatalf("besteffort cgroup not found") - } - if !foundGuaranteed { - t.Fatalf("guaranteed cgroup not found") - } + if !foundBurstable || !foundBestEffort || !foundGuaranteed { + t.Fatalf( + "did not observe all QoS cgroup updates after %d retries (guaranteed=%v, burstable=%v, besteffort=%v)", + maxRetryAttempts, foundGuaranteed, foundBurstable, foundBestEffort, + ) + } }) - } }