Merge pull request #135249 from bart0sh/PR207-migrate-container-manager-to-contextual-logging

kubelet: migrate container manager to contextual logging
This commit is contained in:
Kubernetes Prow Robot 2025-12-17 20:00:01 -08:00 committed by GitHub
commit c0c81a4258
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 92 additions and 65 deletions

View file

@ -224,25 +224,25 @@ func int64Slice(in []int) []int64 {
return out
}
func podHasExclusiveCPUs(cr cpuAllocationReader, pod *v1.Pod) bool {
func podHasExclusiveCPUs(logger klog.Logger, cr cpuAllocationReader, pod *v1.Pod) bool {
for _, container := range pod.Spec.InitContainers {
if containerHasExclusiveCPUs(cr, pod, &container) {
if containerHasExclusiveCPUs(logger, cr, pod, &container) {
return true
}
}
for _, container := range pod.Spec.Containers {
if containerHasExclusiveCPUs(cr, pod, &container) {
if containerHasExclusiveCPUs(logger, cr, pod, &container) {
return true
}
}
klog.V(4).InfoS("Pod contains no container with pinned cpus", "podName", pod.Name)
logger.V(4).Info("Pod contains no container with pinned cpus", "podName", pod.Name)
return false
}
func containerHasExclusiveCPUs(cr cpuAllocationReader, pod *v1.Pod, container *v1.Container) bool {
func containerHasExclusiveCPUs(logger klog.Logger, cr cpuAllocationReader, pod *v1.Pod, container *v1.Container) bool {
exclusiveCPUs := cr.GetExclusiveCPUs(string(pod.UID), container.Name)
if !exclusiveCPUs.IsEmpty() {
klog.V(4).InfoS("Container has pinned cpus", "podName", pod.Name, "containerName", container.Name)
logger.V(4).Info("Container has pinned cpus", "podName", pod.Name, "containerName", container.Name)
return true
}
return false

View file

@ -149,7 +149,7 @@ var _ ContainerManager = &containerManagerImpl{}
// checks if the required cgroups subsystems are mounted.
// As of now, only 'cpu' and 'memory' are required.
// cpu quota is a soft requirement.
func validateSystemRequirements(mountUtil mount.Interface) (features, error) {
func validateSystemRequirements(logger klog.Logger, mountUtil mount.Interface) (features, error) {
const (
cgroupMountType = "cgroup"
localErr = "system validation failed"
@ -190,11 +190,11 @@ func validateSystemRequirements(mountUtil mount.Interface) (features, error) {
// CPU cgroup is required and so it expected to be mounted at this point.
periodExists, err := utilpath.Exists(utilpath.CheckFollowSymlink, path.Join(cpuMountPoint, "cpu.cfs_period_us"))
if err != nil {
klog.ErrorS(err, "Failed to detect if CPU cgroup cpu.cfs_period_us is available")
logger.Error(err, "Failed to detect if CPU cgroup cpu.cfs_period_us is available")
}
quotaExists, err := utilpath.Exists(utilpath.CheckFollowSymlink, path.Join(cpuMountPoint, "cpu.cfs_quota_us"))
if err != nil {
klog.ErrorS(err, "Failed to detect if CPU cgroup cpu.cfs_quota_us is available")
logger.Error(err, "Failed to detect if CPU cgroup cpu.cfs_quota_us is available")
}
if quotaExists && periodExists {
f.cpuHardcapping = true
@ -415,11 +415,17 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager {
}
func (cm *containerManagerImpl) PodHasExclusiveCPUs(pod *v1.Pod) bool {
return podHasExclusiveCPUs(cm.cpuManager, pod)
// Use klog.TODO() because we currently do not have a proper logger to pass in.
// Replace this with an appropriate logger when refactoring this function to accept a logger parameter.
logger := klog.TODO()
return podHasExclusiveCPUs(logger, cm.cpuManager, pod)
}
func (cm *containerManagerImpl) ContainerHasExclusiveCPUs(pod *v1.Pod, container *v1.Container) bool {
return containerHasExclusiveCPUs(cm.cpuManager, pod, container)
// Use klog.TODO() because we currently do not have a proper logger to pass in.
// Replace this with an appropriate logger when refactoring this function to accept a logger parameter.
logger := klog.TODO()
return containerHasExclusiveCPUs(logger, cm.cpuManager, pod, container)
}
func (cm *containerManagerImpl) InternalContainerLifecycle() InternalContainerLifecycle {
@ -450,7 +456,7 @@ const (
// setupKernelTunables validates kernel tunable flags are set as expected
// depending upon the specified option, it will either warn, error, or modify the kernel tunable flags
func setupKernelTunables(option KernelTunableBehavior) error {
func setupKernelTunables(logger klog.Logger, option KernelTunableBehavior) error {
desiredState := map[string]int{
utilsysctl.VMOvercommitMemory: utilsysctl.VMOvercommitMemoryAlways,
utilsysctl.VMPanicOnOOM: utilsysctl.VMPanicOnOOMInvokeOOMKiller,
@ -477,17 +483,17 @@ func setupKernelTunables(option KernelTunableBehavior) error {
case KernelTunableError:
errList = append(errList, fmt.Errorf("invalid kernel flag: %v, expected value: %v, actual value: %v", flag, expectedValue, val))
case KernelTunableWarn:
klog.V(2).InfoS("Invalid kernel flag", "flag", flag, "expectedValue", expectedValue, "actualValue", val)
logger.V(2).Info("Invalid kernel flag", "flag", flag, "expectedValue", expectedValue, "actualValue", val)
case KernelTunableModify:
klog.V(2).InfoS("Updating kernel flag", "flag", flag, "expectedValue", expectedValue, "actualValue", val)
logger.V(2).Info("Updating kernel flag", "flag", flag, "expectedValue", expectedValue, "actualValue", val)
err = sysctl.SetSysctl(flag, expectedValue)
if err != nil {
if inuserns.RunningInUserNS() {
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.KubeletInUserNamespace) {
klog.V(2).InfoS("Updating kernel flag failed (running in UserNS, ignoring)", "flag", flag, "err", err)
logger.V(2).Info("Updating kernel flag failed (running in UserNS, ignoring)", "flag", flag, "err", err)
continue
}
klog.ErrorS(err, "Updating kernel flag failed (Hint: enable KubeletInUserNamespace feature flag to ignore the error)", "flag", flag)
logger.Error(err, "Updating kernel flag failed (Hint: enable KubeletInUserNamespace feature flag to ignore the error)", "flag", flag)
}
errList = append(errList, err)
}
@ -499,7 +505,7 @@ func setupKernelTunables(option KernelTunableBehavior) error {
func (cm *containerManagerImpl) setupNode(ctx context.Context, activePods ActivePodsFunc) error {
logger := klog.FromContext(ctx)
f, err := validateSystemRequirements(cm.mountUtil)
f, err := validateSystemRequirements(logger, cm.mountUtil)
if err != nil {
return err
}
@ -510,7 +516,7 @@ func (cm *containerManagerImpl) setupNode(ctx context.Context, activePods Active
if cm.GetNodeConfig().ProtectKernelDefaults {
b = KernelTunableError
}
if err := setupKernelTunables(b); err != nil {
if err := setupKernelTunables(logger, b); err != nil {
return err
}
@ -541,7 +547,7 @@ func (cm *containerManagerImpl) setupNode(ctx context.Context, activePods Active
return err
}
cont.ensureStateFunc = func(manager cgroups.Manager) error {
return ensureSystemCgroups("/", manager)
return ensureSystemCgroups(logger, "/", manager)
}
systemContainers = append(systemContainers, cont)
}
@ -769,23 +775,23 @@ func (cm *containerManagerImpl) SystemCgroupsLimit() v1.ResourceList {
}
}
func isProcessRunningInHost(pid int) (bool, error) {
func isProcessRunningInHost(logger klog.Logger, pid int) (bool, error) {
// Get init pid namespace.
initPidNs, err := os.Readlink("/proc/1/ns/pid")
if err != nil {
return false, fmt.Errorf("failed to find pid namespace of init process")
}
klog.V(10).InfoS("Found init PID namespace", "namespace", initPidNs)
logger.V(10).Info("Found init PID namespace", "namespace", initPidNs)
processPidNs, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/pid", pid))
if err != nil {
return false, fmt.Errorf("failed to find pid namespace of process %q", pid)
}
klog.V(10).InfoS("Process info", "pid", pid, "namespace", processPidNs)
logger.V(10).Info("Process info", "pid", pid, "namespace", processPidNs)
return initPidNs == processPidNs, nil
}
func ensureProcessInContainerWithOOMScore(logger klog.Logger, pid int, oomScoreAdj int, manager cgroups.Manager) error {
if runningInHost, err := isProcessRunningInHost(pid); err != nil {
if runningInHost, err := isProcessRunningInHost(logger, pid); err != nil {
// Err on the side of caution. Avoid moving the docker daemon unless we are able to identify its context.
return err
} else if !runningInHost {
@ -885,7 +891,7 @@ func getContainer(logger klog.Logger, pid int) (string, error) {
//
// The reason of leaving kernel threads at root cgroup is that we don't want to tie the
// execution of these threads with to-be defined /system quota and create priority inversions.
func ensureSystemCgroups(rootCgroupPath string, manager cgroups.Manager) error {
func ensureSystemCgroups(logger klog.Logger, rootCgroupPath string, manager cgroups.Manager) error {
// Move non-kernel PIDs to the system container.
// Only keep errors on latest attempt.
var finalErr error
@ -911,7 +917,7 @@ func ensureSystemCgroups(rootCgroupPath string, manager cgroups.Manager) error {
return nil
}
klog.V(3).InfoS("Moving non-kernel processes", "pids", pids)
logger.V(3).Info("Moving non-kernel processes", "pids", pids)
for _, pid := range pids {
err := manager.Apply(pid)
if err != nil {
@ -940,6 +946,9 @@ func isKernelPid(pid int) bool {
// GetCapacity returns node capacity data for "cpu", "memory", "ephemeral-storage", and "huge-pages*"
// At present this method is only invoked when introspecting ephemeral storage
func (cm *containerManagerImpl) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList {
// Use klog.TODO() because we currently do not have a proper logger to pass in.
// Replace this with an appropriate logger when refactoring this function to accept a logger parameter.
logger := klog.TODO()
if localStorageCapacityIsolation {
// We store allocatable ephemeral-storage in the capacity property once we Start() the container manager
if _, ok := cm.capacity[v1.ResourceEphemeralStorage]; !ok {
@ -947,7 +956,7 @@ func (cm *containerManagerImpl) GetCapacity(localStorageCapacityIsolation bool)
if cm.cadvisorInterface != nil {
rootfs, err := cm.cadvisorInterface.RootFsInfo()
if err != nil {
klog.ErrorS(err, "Unable to get rootfs data from cAdvisor interface")
logger.Error(err, "Unable to get rootfs data from cAdvisor interface")
// If the rootfsinfo retrieval from cAdvisor fails for any reason, fallback to returning the capacity property with no ephemeral storage data
return cm.capacity
}
@ -1012,6 +1021,9 @@ func (cm *containerManagerImpl) GetAllocatableMemory() []*podresourcesapi.Contai
}
func (cm *containerManagerImpl) GetDynamicResources(pod *v1.Pod, container *v1.Container) []*podresourcesapi.DynamicResource {
// Use klog.TODO() because we currently do not have a proper logger to pass in.
// Replace this with an appropriate logger when refactoring this function to accept a logger parameter.
logger := klog.TODO()
if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.DynamicResourceAllocation) {
return []*podresourcesapi.DynamicResource{}
}
@ -1019,7 +1031,7 @@ func (cm *containerManagerImpl) GetDynamicResources(pod *v1.Pod, container *v1.C
var containerDynamicResources []*podresourcesapi.DynamicResource
containerClaimInfos, err := cm.draManager.GetContainerClaimInfos(pod, container)
if err != nil {
klog.ErrorS(err, "Unable to get container claim info state")
logger.Error(err, "Unable to get container claim info state")
return []*podresourcesapi.DynamicResource{}
}
for _, containerClaimInfo := range containerClaimInfos {

View file

@ -66,7 +66,8 @@ func fakeContainerMgrMountInt() mount.Interface {
}
func TestCgroupMountValidationSuccess(t *testing.T) {
f, err := validateSystemRequirements(fakeContainerMgrMountInt())
logger, _ := ktesting.NewTestContext(t)
f, err := validateSystemRequirements(logger, fakeContainerMgrMountInt())
assert.NoError(t, err)
if cgroups.IsCgroup2UnifiedMode() {
assert.True(t, f.cpuHardcapping, "cpu hardcapping is expected to be enabled")
@ -79,6 +80,7 @@ func TestCgroupMountValidationMemoryMissing(t *testing.T) {
if cgroups.IsCgroup2UnifiedMode() {
t.Skip("skipping cgroup v1 test on a cgroup v2 system")
}
logger, _ := ktesting.NewTestContext(t)
mountInt := mount.NewFakeMounter(
[]mount.MountPoint{
{
@ -97,7 +99,7 @@ func TestCgroupMountValidationMemoryMissing(t *testing.T) {
Opts: []string{"rw", "relatime", "cpuacct"},
},
})
_, err := validateSystemRequirements(mountInt)
_, err := validateSystemRequirements(logger, mountInt)
assert.Error(t, err)
}
@ -105,6 +107,7 @@ func TestCgroupMountValidationMultipleSubsystem(t *testing.T) {
if cgroups.IsCgroup2UnifiedMode() {
t.Skip("skipping cgroup v1 test on a cgroup v2 system")
}
logger, _ := ktesting.NewTestContext(t)
mountInt := mount.NewFakeMounter(
[]mount.MountPoint{
{
@ -123,7 +126,7 @@ func TestCgroupMountValidationMultipleSubsystem(t *testing.T) {
Opts: []string{"rw", "relatime", "cpuacct"},
},
})
_, err := validateSystemRequirements(mountInt)
_, err := validateSystemRequirements(logger, mountInt)
assert.NoError(t, err)
}
@ -144,6 +147,7 @@ func TestSoftRequirementsValidationSuccess(t *testing.T) {
if cgroups.IsCgroup2UnifiedMode() {
t.Skip("skipping cgroup v1 test on a cgroup v2 system")
}
logger, _ := ktesting.NewTestContext(t)
req := require.New(t)
tempDir, err := os.MkdirTemp("", "")
req.NoError(err)
@ -169,7 +173,7 @@ func TestSoftRequirementsValidationSuccess(t *testing.T) {
Opts: []string{"rw", "relatime", "cpuacct", "memory"},
},
})
f, err := validateSystemRequirements(mountInt)
f, err := validateSystemRequirements(logger, mountInt)
assert.NoError(t, err)
assert.True(t, f.cpuHardcapping, "cpu hardcapping is expected to be enabled")
}

View file

@ -376,9 +376,15 @@ func (cm *containerManagerImpl) PodMightNeedToUnprepareResources(UID types.UID)
}
func (cm *containerManagerImpl) PodHasExclusiveCPUs(pod *v1.Pod) bool {
return podHasExclusiveCPUs(cm.cpuManager, pod)
// Use klog.TODO() because we currently do not have a proper logger to pass in.
// Replace this with an appropriate logger when refactoring this function to accept a logger parameter.
logger := klog.TODO()
return podHasExclusiveCPUs(logger, cm.cpuManager, pod)
}
func (cm *containerManagerImpl) ContainerHasExclusiveCPUs(pod *v1.Pod, container *v1.Container) bool {
return containerHasExclusiveCPUs(cm.cpuManager, pod, container)
// Use klog.TODO() because we currently do not have a proper logger to pass in.
// Replace this with an appropriate logger when refactoring this function to accept a logger parameter.
logger := klog.TODO()
return containerHasExclusiveCPUs(logger, cm.cpuManager, pod, container)
}

View file

@ -55,9 +55,10 @@ var _ ContainerManager = &FakeContainerManager{}
func NewFakeContainerManager() *FakeContainerManager {
return &FakeContainerManager{
PodContainerManager: NewFakePodContainerManager(),
// Using klog.Background() for fake/test implementations where no real context is available
cpuManager: cpumanager.NewFakeManager(klog.Background()),
memoryManager: memorymanager.NewFakeManager(klog.Background()),
// Use klog.TODO() because we currently do not have a proper logger to pass in.
// Replace this with an appropriate logger when refactoring this function to accept a logger parameter.
cpuManager: cpumanager.NewFakeManager(klog.TODO()),
memoryManager: memorymanager.NewFakeManager(klog.TODO()),
}
}

View file

@ -22,7 +22,7 @@ import (
"github.com/go-logr/logr"
v1 "k8s.io/api/core/v1"
"k8s.io/klog/v2"
"k8s.io/klog/v2/ktesting"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
"k8s.io/kubernetes/pkg/kubelet/cm/memorymanager"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
@ -89,7 +89,8 @@ func TestPreStartContainer(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
_ = test.lifecycle.PreStartContainer(klog.Background(), pod, container, "42")
logger, _ := ktesting.NewTestContext(t)
_ = test.lifecycle.PreStartContainer(logger, pod, container, "42")
})
cManager := test.lifecycle.cpuManager

View file

@ -77,7 +77,7 @@ func (m *podContainerManagerImpl) EnsureExists(logger klog.Logger, pod *v1.Pod)
if !alreadyExists {
enforceCPULimits := m.enforceCPULimits
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.DisableCPUQuotaWithExclusiveCPUs) && m.podContainerManager.PodHasExclusiveCPUs(pod) {
klog.V(2).InfoS("Disabled CFS quota", "pod", klog.KObj(pod))
logger.V(2).Info("Disabled CFS quota", "pod", klog.KObj(pod))
enforceCPULimits = false
}
enforceMemoryQoS := false
@ -95,7 +95,7 @@ func (m *podContainerManagerImpl) EnsureExists(logger klog.Logger, pod *v1.Pod)
containerConfig.ResourceParameters.PidsLimit = &m.podPidsLimit
}
if enforceMemoryQoS {
klog.V(4).InfoS("MemoryQoS config for pod", "pod", klog.KObj(pod), "unified", containerConfig.ResourceParameters.Unified)
logger.V(4).Info("MemoryQoS config for pod", "pod", klog.KObj(pod), "unified", containerConfig.ResourceParameters.Unified)
}
if err := m.cgroupManager.Create(logger, containerConfig); err != nil {
return fmt.Errorf("failed to create container for %v : %v", podContainerName, err)
@ -148,14 +148,14 @@ func (m *podContainerManagerImpl) SetPodCgroupConfig(logger klog.Logger, pod *v1
}
// Kill one process ID
func (m *podContainerManagerImpl) killOnePid(pid int) error {
func (m *podContainerManagerImpl) killOnePid(logger klog.Logger, pid int) error {
// os.FindProcess never returns an error on POSIX
// https://go-review.googlesource.com/c/go/+/19093
p, _ := os.FindProcess(pid)
if err := p.Kill(); err != nil {
// If the process already exited, that's fine.
if errors.Is(err, os.ErrProcessDone) {
klog.V(3).InfoS("Process no longer exists", "pid", pid)
logger.V(3).Info("Process no longer exists", "pid", pid)
return nil
}
return err
@ -178,23 +178,23 @@ func (m *podContainerManagerImpl) tryKillingCgroupProcesses(logger klog.Logger,
removed := map[int]bool{}
for i := 0; i < 5; i++ {
if i != 0 {
klog.V(3).InfoS("Attempt failed to kill all unwanted process from cgroup, retrying", "attempt", i, "cgroupName", podCgroup)
logger.V(3).Info("Attempt failed to kill all unwanted process from cgroup, retrying", "attempt", i, "cgroupName", podCgroup)
}
errlist = []error{}
for _, pid := range pidsToKill {
if _, ok := removed[pid]; ok {
continue
}
klog.V(3).InfoS("Attempting to kill process from cgroup", "pid", pid, "cgroupName", podCgroup)
if err := m.killOnePid(pid); err != nil {
klog.V(3).InfoS("Failed to kill process from cgroup", "pid", pid, "cgroupName", podCgroup, "err", err)
logger.V(3).Info("Attempting to kill process from cgroup", "pid", pid, "cgroupName", podCgroup)
if err := m.killOnePid(logger, pid); err != nil {
logger.V(3).Info("Failed to kill process from cgroup", "pid", pid, "cgroupName", podCgroup, "err", err)
errlist = append(errlist, err)
} else {
removed[pid] = true
}
}
if len(errlist) == 0 {
klog.V(3).InfoS("Successfully killed all unwanted processes from cgroup", "cgroupName", podCgroup)
logger.V(3).Info("Successfully killed all unwanted processes from cgroup", "cgroupName", podCgroup)
return nil
}
}
@ -254,6 +254,9 @@ func (m *podContainerManagerImpl) IsPodCgroup(cgroupfs string) (bool, types.UID)
// GetAllPodsFromCgroups scans through all the subsystems of pod cgroups
// Get list of pods whose cgroup still exist on the cgroup mounts
func (m *podContainerManagerImpl) GetAllPodsFromCgroups() (map[types.UID]CgroupName, error) {
// Use klog.TODO() because we currently do not have a proper logger to pass in.
// Replace this with an appropriate logger when refactoring this function to accept a logger parameter.
logger := klog.TODO()
// Map for storing all the found pods on the disk
foundPods := make(map[types.UID]CgroupName)
qosContainersList := [3]CgroupName{m.qosContainersInfo.BestEffort, m.qosContainersInfo.Burstable, m.qosContainersInfo.Guaranteed}
@ -294,7 +297,7 @@ func (m *podContainerManagerImpl) GetAllPodsFromCgroups() (map[types.UID]CgroupN
parts := strings.Split(basePath, podCgroupNamePrefix)
// the uid is missing, so we log the unexpected cgroup not of form pod<uid>
if len(parts) != 2 {
klog.InfoS("Pod cgroup manager ignored unexpected cgroup because it is not a pod", "path", cgroupfsPath)
logger.Info("Pod cgroup manager ignored unexpected cgroup because it is not a pod", "path", cgroupfsPath)
continue
}
podUID := parts[1]

View file

@ -141,7 +141,7 @@ func (m *qosContainerManagerImpl) Start(ctx context.Context, getNodeAllocatable
go wait.Until(func() {
err := m.UpdateCgroups(logger)
if err != nil {
klog.InfoS("Failed to reserve QoS requests", "err", err)
logger.Info("Failed to reserve QoS requests", "err", err)
}
}, periodicQOSCgroupUpdateInterval, wait.NeverStop)
@ -233,23 +233,23 @@ func (m *qosContainerManagerImpl) getQoSMemoryRequests() map[v1.PodQOSClass]int6
// setMemoryReserve sums the memory limits of all pods in a QOS class,
// calculates QOS class memory limits, and set those limits in the
// CgroupConfig for each QOS class.
func (m *qosContainerManagerImpl) setMemoryReserve(configs map[v1.PodQOSClass]*CgroupConfig, percentReserve int64) {
func (m *qosContainerManagerImpl) setMemoryReserve(logger klog.Logger, configs map[v1.PodQOSClass]*CgroupConfig, percentReserve int64) {
qosMemoryRequests := m.getQoSMemoryRequests()
resources := m.getNodeAllocatable()
allocatableResource, ok := resources[v1.ResourceMemory]
if !ok {
klog.V(2).InfoS("Allocatable memory value could not be determined, not setting QoS memory limits")
logger.V(2).Info("Allocatable memory value could not be determined, not setting QoS memory limits")
return
}
allocatable := allocatableResource.Value()
if allocatable == 0 {
klog.V(2).InfoS("Allocatable memory reported as 0, might be in standalone mode, not setting QoS memory limits")
logger.V(2).Info("Allocatable memory reported as 0, might be in standalone mode, not setting QoS memory limits")
return
}
for qos, limits := range qosMemoryRequests {
klog.V(2).InfoS("QoS pod memory limit", "qos", qos, "limits", limits, "percentReserve", percentReserve)
logger.V(2).Info("QoS pod memory limit", "qos", qos, "limits", limits, "percentReserve", percentReserve)
}
// Calculate QOS memory limits
@ -262,14 +262,14 @@ func (m *qosContainerManagerImpl) setMemoryReserve(configs map[v1.PodQOSClass]*C
// retrySetMemoryReserve checks for any QoS cgroups over the limit
// that was attempted to be set in the first Update() and adjusts
// their memory limit to the usage to prevent further growth.
func (m *qosContainerManagerImpl) retrySetMemoryReserve(configs map[v1.PodQOSClass]*CgroupConfig, percentReserve int64) {
func (m *qosContainerManagerImpl) retrySetMemoryReserve(logger klog.Logger, configs map[v1.PodQOSClass]*CgroupConfig, percentReserve int64) {
// Unreclaimable memory usage may already exceeded the desired limit
// Attempt to set the limit near the current usage to put pressure
// on the cgroup and prevent further growth.
for qos, config := range configs {
usage, err := m.cgroupManager.MemoryUsage(config.Name)
if err != nil {
klog.V(2).InfoS("Failed to get resource stats", "err", err)
logger.V(2).Info("Failed to get resource stats", "err", err)
return
}
@ -287,7 +287,7 @@ func (m *qosContainerManagerImpl) retrySetMemoryReserve(configs map[v1.PodQOSCla
// setMemoryQoS sums the memory requests of all pods in the Burstable class,
// and set the sum memory as the memory.min in the Unified field of CgroupConfig.
func (m *qosContainerManagerImpl) setMemoryQoS(configs map[v1.PodQOSClass]*CgroupConfig) {
func (m *qosContainerManagerImpl) setMemoryQoS(logger klog.Logger, configs map[v1.PodQOSClass]*CgroupConfig) {
qosMemoryRequests := m.getQoSMemoryRequests()
// Calculate the memory.min:
@ -301,7 +301,7 @@ func (m *qosContainerManagerImpl) setMemoryQoS(configs map[v1.PodQOSClass]*Cgrou
configs[v1.PodQOSBurstable].ResourceParameters.Unified = make(map[string]string)
}
configs[v1.PodQOSBurstable].ResourceParameters.Unified[Cgroup2MemoryMin] = strconv.FormatInt(burstableMin, 10)
klog.V(4).InfoS("MemoryQoS config for qos", "qos", v1.PodQOSBurstable, "memoryMin", burstableMin)
logger.V(4).Info("MemoryQoS config for qos", "qos", v1.PodQOSBurstable, "memoryMin", burstableMin)
}
if guaranteedMin > 0 {
@ -309,7 +309,7 @@ func (m *qosContainerManagerImpl) setMemoryQoS(configs map[v1.PodQOSClass]*Cgrou
configs[v1.PodQOSGuaranteed].ResourceParameters.Unified = make(map[string]string)
}
configs[v1.PodQOSGuaranteed].ResourceParameters.Unified[Cgroup2MemoryMin] = strconv.FormatInt(guaranteedMin, 10)
klog.V(4).InfoS("MemoryQoS config for qos", "qos", v1.PodQOSGuaranteed, "memoryMin", guaranteedMin)
logger.V(4).Info("MemoryQoS config for qos", "qos", v1.PodQOSGuaranteed, "memoryMin", guaranteedMin)
}
}
@ -345,14 +345,14 @@ func (m *qosContainerManagerImpl) UpdateCgroups(logger logr.Logger) error {
// update the qos level cgrougs v2 settings of memory qos if feature enabled
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.MemoryQoS) &&
libcontainercgroups.IsCgroup2UnifiedMode() {
m.setMemoryQoS(qosConfigs)
m.setMemoryQoS(logger, qosConfigs)
}
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.QOSReserved) {
for resource, percentReserve := range m.qosReserved {
switch resource {
case v1.ResourceMemory:
m.setMemoryReserve(qosConfigs, percentReserve)
m.setMemoryReserve(logger, qosConfigs, percentReserve)
}
}
@ -364,7 +364,7 @@ func (m *qosContainerManagerImpl) UpdateCgroups(logger logr.Logger) error {
}
}
if updateSuccess {
klog.V(4).InfoS("Updated QoS cgroup configuration")
logger.V(4).Info("Updated QoS cgroup configuration")
return nil
}
@ -374,7 +374,7 @@ func (m *qosContainerManagerImpl) UpdateCgroups(logger logr.Logger) error {
for resource, percentReserve := range m.qosReserved {
switch resource {
case v1.ResourceMemory:
m.retrySetMemoryReserve(qosConfigs, percentReserve)
m.retrySetMemoryReserve(logger, qosConfigs, percentReserve)
}
}
}
@ -382,12 +382,12 @@ func (m *qosContainerManagerImpl) UpdateCgroups(logger logr.Logger) error {
for _, config := range qosConfigs {
err := m.cgroupManager.Update(logger, config)
if err != nil {
klog.ErrorS(err, "Failed to update QoS cgroup configuration")
logger.Error(err, "Failed to update QoS cgroup configuration")
return err
}
}
klog.V(4).InfoS("Updated QoS cgroup configuration")
logger.V(4).Info("Updated QoS cgroup configuration")
return nil
}

View file

@ -149,7 +149,7 @@ func TestQoSContainerCgroup(t *testing.T) {
},
}
m.setMemoryQoS(qosConfigs)
m.setMemoryQoS(logger, qosConfigs)
burstableMin := resource.MustParse("384Mi")
guaranteedMin := resource.MustParse("128Mi")